exel 0.0.1 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -1
  3. data/.rspec +2 -1
  4. data/exel.gemspec +9 -2
  5. data/lib/exel/ast_node.rb +30 -0
  6. data/lib/exel/context.rb +79 -0
  7. data/lib/exel/deferred_context_value.rb +18 -0
  8. data/lib/exel/error/job_termination.rb +10 -0
  9. data/lib/exel/execution_worker.rb +13 -0
  10. data/lib/exel/handlers/s3_handler.rb +43 -0
  11. data/lib/exel/handlers/sidekiq_handler.rb +21 -0
  12. data/lib/exel/instruction.rb +17 -0
  13. data/lib/exel/instruction_node.rb +9 -0
  14. data/lib/exel/job.rb +74 -0
  15. data/lib/exel/logging.rb +30 -0
  16. data/lib/exel/null_instruction.rb +6 -0
  17. data/lib/exel/processor_helper.rb +67 -0
  18. data/lib/exel/processors/async_processor.rb +24 -0
  19. data/lib/exel/processors/split_processor.rb +85 -0
  20. data/lib/exel/resource.rb +35 -0
  21. data/lib/exel/sequence_node.rb +14 -0
  22. data/lib/exel/version.rb +1 -1
  23. data/lib/exel.rb +19 -1
  24. data/spec/exel/ast_node_spec.rb +52 -0
  25. data/spec/exel/context_spec.rb +151 -0
  26. data/spec/exel/deferred_context_value_spec.rb +21 -0
  27. data/spec/exel/execution_worker_spec.rb +13 -0
  28. data/spec/exel/handlers/s3_handler_spec.rb +49 -0
  29. data/spec/exel/handlers/sidekiq_handler_spec.rb +54 -0
  30. data/spec/exel/instruction_node_spec.rb +22 -0
  31. data/spec/exel/instruction_spec.rb +58 -0
  32. data/spec/exel/job_spec.rb +215 -0
  33. data/spec/exel/logging_spec.rb +36 -0
  34. data/spec/exel/null_instruction_spec.rb +5 -0
  35. data/spec/exel/processors/async_processor_spec.rb +16 -0
  36. data/spec/exel/processors/split_processor_spec.rb +90 -0
  37. data/spec/exel/resource_spec.rb +51 -0
  38. data/spec/exel/sequence_node_spec.rb +24 -0
  39. data/spec/spec_helper.rb +7 -0
  40. metadata +151 -18
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0c87f88078f409a37472e257ec0f70e31d1c19c4
4
- data.tar.gz: 9961dbc04ded3f33b5201e92c276846c26887701
3
+ metadata.gz: c5eccac7b009cd6a4063a36cbb53b70de2efa22b
4
+ data.tar.gz: f6e7dd493f08d09365158095290b0c319907215e
5
5
  SHA512:
6
- metadata.gz: b623b2d623fbe39a0ef54e314339bd1836f6cf21a81b6c860da9955f09cf4b617ce3f8dcba5e521763086246fefebbebbe74913d5cb6ac9fe26c68ca538b9e69
7
- data.tar.gz: d1cd09aa61c0e415fd4e4d15328062065aadff978b5667d7c0217dd49b2303957e84e6630f6bd3512665f336ea6f3db689a8dc3043dd0b284f9d5f8f58364cbf
6
+ metadata.gz: 4135007934f8288843da2852a13cb269214725997069a3704b03ad7f4f8f5df103f7824156f1e959de24ca42d6b1a81e72f3fac1eb9b02e174ec9825b66f3b34
7
+ data.tar.gz: d605c327290de5f1c71f4a4f552b71f7dc8d2469e146a5814e12581188011e6f462a4b08aa8434d2645e92cebaa76720bf0be14656dce1c0feb99c8e5f0e9ed6
data/.gitignore CHANGED
@@ -21,4 +21,5 @@ tmp
21
21
  *.a
22
22
  mkmf.log
23
23
  .idea
24
- *.iml
24
+ *.iml
25
+ .ruby-*
data/.rspec CHANGED
@@ -1 +1,2 @@
1
- --color
1
+ --color
2
+ --require spec_helper
data/exel.gemspec CHANGED
@@ -18,7 +18,14 @@ Gem::Specification.new do |spec|
18
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
19
  spec.require_paths = ['lib']
20
20
 
21
+ spec.add_dependency 'aws-sdk', '~> 2'
22
+ spec.add_dependency 'sidekiq', '~> 3'
23
+
21
24
  spec.add_development_dependency 'bundler', '~> 1.6'
22
- spec.add_development_dependency 'rake'
23
- spec.add_development_dependency 'rspec'
25
+ spec.add_development_dependency 'rake', '~> 10'
26
+ spec.add_development_dependency 'rspec', '~> 3'
27
+ spec.add_development_dependency 'guard', '~> 2'
28
+ spec.add_development_dependency 'guard-rspec', '~> 4'
29
+ spec.add_development_dependency 'terminal-notifier', '~> 1'
30
+ spec.add_development_dependency 'terminal-notifier-guard', '~> 1'
24
31
  end
@@ -0,0 +1,30 @@
1
+ module EXEL
2
+ class ASTNode
3
+ attr_reader :instruction, :children
4
+
5
+ def initialize(instruction, children=[])
6
+ @instruction = instruction
7
+ @children = children
8
+ end
9
+
10
+ def start(context)
11
+ fail_silently { run(context) }
12
+ end
13
+
14
+ def run(_context)
15
+ raise "#{self.class} does not implement #process"
16
+ end
17
+
18
+ def add_child(node)
19
+ @children << node
20
+ end
21
+
22
+ private
23
+
24
+ def fail_silently(&_block)
25
+ yield if block_given?
26
+ rescue EXEL::Error::JobTermination => e
27
+ EXEL.logger.error "JobTerminationError: #{e.message.chomp}"
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,79 @@
1
+ require 'tempfile'
2
+
3
+ module EXEL
4
+ class Context
5
+ attr_reader :table
6
+
7
+ def initialize(initial_context={})
8
+ @table = initial_context
9
+ end
10
+
11
+ def serialize
12
+ remotized_table = @table.each_with_object({}) { |(key, value), acc| acc[key] = EXEL::Resource.remotize(value) }
13
+ file = serialize_context(remotized_table)
14
+ upload(file)
15
+ end
16
+
17
+ def self.deserialize(uri)
18
+ handler = Handlers::S3Handler.new
19
+ file = handler.download(uri)
20
+ context = Marshal.load(file.read)
21
+ file.close
22
+ context
23
+ end
24
+
25
+ def [](key)
26
+ value = EXEL::Resource.localize(@table[key])
27
+ value = get_deferred(value)
28
+ @table[key] = value
29
+ value
30
+ end
31
+
32
+ def []=(key, value)
33
+ @table[key] = value
34
+ end
35
+
36
+ def merge!(hash)
37
+ @table.merge!(hash)
38
+ self
39
+ end
40
+
41
+ def delete(key)
42
+ @table.delete(key)
43
+ end
44
+
45
+ def ==(other)
46
+ other.kind_of?(EXEL::Context) && table == other.table
47
+ end
48
+
49
+ private
50
+
51
+ def serialize_context(table)
52
+ file = Tempfile.new(SecureRandom.uuid, encoding: 'ascii-8bit')
53
+ file.write(Marshal.dump(Context.new(table)))
54
+ file.rewind
55
+ file
56
+ end
57
+
58
+ def upload(file)
59
+ handler = Handlers::S3Handler.new
60
+ handler.upload(file)
61
+ end
62
+
63
+ def get_deferred(value)
64
+ if is_deferred?(value)
65
+ value = value.get(self)
66
+ elsif value.kind_of?(Array)
67
+ value.map! { |v| get_deferred(v) }
68
+ elsif value.kind_of?(Hash)
69
+ value.each { |k, v| value[k] = get_deferred(v) }
70
+ end
71
+
72
+ value
73
+ end
74
+
75
+ def is_deferred?(value)
76
+ value.kind_of?(DeferredContextValue)
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,18 @@
1
+ module EXEL
2
+ class DeferredContextValue
3
+ attr_reader :keys
4
+
5
+ def initialize
6
+ @keys = []
7
+ end
8
+
9
+ def [](key)
10
+ keys << key
11
+ self
12
+ end
13
+
14
+ def get(context)
15
+ keys.reduce(context) { |acc, key| acc[key] }
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,10 @@
1
+ module EXEL
2
+ module Error
3
+ # Inherit from Exception rather then StandardError
4
+ # because rescue => e will only catch StandardError
5
+ # and allow the Exception to propagate to the root
6
+ # of the job
7
+ class JobTermination < Exception
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,13 @@
1
+ require 'sidekiq'
2
+
3
+ module EXEL
4
+ class ExecutionWorker
5
+ include Sidekiq::Worker
6
+
7
+ def perform(context_uri)
8
+ context = Context.deserialize(context_uri)
9
+ block = context[:_block]
10
+ block.start(context)
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,43 @@
1
+ require 'aws-sdk-resources'
2
+ require 'tempfile'
3
+
4
+ module EXEL
5
+ module Handlers
6
+ class S3Handler
7
+ def upload(file)
8
+ filename = get_filename(file)
9
+ obj = get_object(filename)
10
+ obj.upload_file(file)
11
+ file.close
12
+
13
+ "s3://#{filename}"
14
+ end
15
+
16
+ def download(uri)
17
+ filename = uri.partition('://').last
18
+ obj = get_object(filename)
19
+ file = Tempfile.new(filename, encoding: Encoding::ASCII_8BIT)
20
+ obj.get(response_target: file)
21
+ file.set_encoding(Encoding::UTF_8)
22
+ file
23
+ end
24
+
25
+ def get_object(filename)
26
+ s3 = Aws::S3::Resource.new(
27
+ credentials: Aws::Credentials.new(
28
+ EXEL.configuration[:aws][:access_key_id],
29
+ EXEL.configuration[:aws][:secret_access_key]
30
+ ),
31
+ region: 'us-east-1'
32
+ )
33
+ s3.bucket(EXEL.configuration[:s3_bucket]).object(filename)
34
+ end
35
+
36
+ private
37
+
38
+ def get_filename(file)
39
+ file.path.split('/').last
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,21 @@
1
+ require 'sidekiq'
2
+
3
+ module EXEL
4
+ module Handlers
5
+ class SidekiqHandler
6
+ def initialize(context)
7
+ @context = context
8
+ end
9
+
10
+ def do_async(block)
11
+ @context[:_block] = block
12
+
13
+ push_args = {'class' => ExecutionWorker, 'args' => [@context.serialize]}
14
+ push_args['queue'] = @context[:queue] if @context[:queue]
15
+ push_args['retry'] = @context[:retry] if @context[:retry]
16
+
17
+ Sidekiq::Client.push(push_args)
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,17 @@
1
+ module EXEL
2
+ class EXEL::Instruction
3
+ attr_reader :name
4
+
5
+ def initialize(name, processor_class, args, subtree=nil)
6
+ @name = name
7
+ @processor_class = processor_class
8
+ @args = args || {}
9
+ @subtree = subtree
10
+ end
11
+
12
+ def execute(context)
13
+ context.merge!(@args)
14
+ @processor_class.new(context).process(@subtree)
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,9 @@
1
+ require_relative './ast_node'
2
+
3
+ module EXEL
4
+ class InstructionNode < ASTNode
5
+ def run(context)
6
+ @instruction.execute(context)
7
+ end
8
+ end
9
+ end
data/lib/exel/job.rb ADDED
@@ -0,0 +1,74 @@
1
+ module EXEL
2
+ module Job
3
+ class << self
4
+ def define(job_name, &block)
5
+ raise "Job #{job_name.inspect} is already defined" unless registry[job_name].nil?
6
+ registry[job_name] = block
7
+ end
8
+
9
+ def registry
10
+ @registry ||= {}
11
+ end
12
+
13
+ def run(dsl_code_or_name, context = {})
14
+ context = EXEL::Context.new(context) if context.is_a?(Hash)
15
+ (ast = parse(dsl_code_or_name)) ? ast.start(context) : raise(%(Job "#{dsl_code_or_name}" not found))
16
+ end
17
+
18
+ private
19
+
20
+ def parse(dsl_code_or_name)
21
+ if dsl_code_or_name.is_a?(Symbol)
22
+ job = registry[dsl_code_or_name]
23
+ Parser.parse(job) if job
24
+ else
25
+ Parser.parse(dsl_code_or_name)
26
+ end
27
+ end
28
+ end
29
+
30
+ class Parser
31
+ attr_reader :ast
32
+
33
+ def initialize
34
+ @ast = SequenceNode.new
35
+ end
36
+
37
+ def self.parse(dsl_proc_or_code)
38
+ parser = Parser.new
39
+ if dsl_proc_or_code.is_a?(::Proc)
40
+ parser.instance_eval(&dsl_proc_or_code)
41
+ else
42
+ parser.instance_eval(dsl_proc_or_code)
43
+ end
44
+ parser.ast
45
+ end
46
+
47
+ def process(options, &block)
48
+ processor_class = options.delete(:with)
49
+ add_instruction_node('process', processor_class, block, options)
50
+ end
51
+
52
+ def async(options={}, &block)
53
+ add_instruction_node('async', Processors::AsyncProcessor, block, options)
54
+ end
55
+
56
+ def split(options={}, &block)
57
+ add_instruction_node('split', Processors::SplitProcessor, block, options)
58
+ end
59
+
60
+ def context
61
+ DeferredContextValue.new
62
+ end
63
+
64
+ private
65
+
66
+ def add_instruction_node(name, processor, block, args={})
67
+ sub_tree = block.nil? ? nil : Parser.parse(block)
68
+ instruction = EXEL::Instruction.new(name, processor, args, sub_tree)
69
+ node = sub_tree.nil? ? InstructionNode.new(instruction) : InstructionNode.new(instruction, [sub_tree])
70
+ @ast.add_child(node)
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,30 @@
1
+ require 'logger'
2
+
3
+ module EXEL
4
+ module Logging
5
+ DEFAULT_LEVEL = :info
6
+
7
+ def self.logger
8
+ @logger || initialize_logger
9
+ end
10
+
11
+ def self.initialize_logger
12
+ @logger = Logger.new(log_filename)
13
+ @logger.level = log_level
14
+ @logger
15
+ end
16
+
17
+ def self.log_filename
18
+ EXEL.configuration[:log_filename] || '/dev/null'
19
+ end
20
+
21
+ def self.log_level
22
+ level = EXEL.configuration[:log_level] || DEFAULT_LEVEL
23
+ Logger.const_get(level.to_s.upcase)
24
+ end
25
+
26
+ def self.logger=(logger)
27
+ @logger = logger || Logger.new('/dev/null')
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,6 @@
1
+ module EXEL
2
+ class NullInstruction
3
+ def execute(context)
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,67 @@
1
+ module EXEL
2
+ module ProcessorHelper
3
+
4
+ # Helper Methods
5
+
6
+ def tag(*tags)
7
+ tags.map { |t| "[#{t}]" }.join('')
8
+ end
9
+
10
+ def timestamp
11
+ Time.now.strftime('%m/%e/%y %H:%M')
12
+ end
13
+
14
+ def file_size_in_mb(file)
15
+ "#{'%.2f' % (file.size.to_f / 1_024_000).round(2)} MB"
16
+ end
17
+
18
+ # Logging Helpers
19
+
20
+ def log_prefix_with(prefix)
21
+ @log_prefix = (@context[:log_prefix] || '') + prefix
22
+ end
23
+
24
+ def log_prefix
25
+ @log_prefix
26
+ end
27
+
28
+ def log_info(message)
29
+ EXEL.logger.info(log(message))
30
+ end
31
+
32
+ def log_error(message)
33
+ EXEL.logger.error(log(message))
34
+ end
35
+
36
+ def log(message)
37
+ "#{log_prefix} #{message}"
38
+ end
39
+
40
+ def log_transaction(message = '')
41
+ transaction_start_time = Time.now.to_f
42
+ log_info "Started at #{transaction_start_time}"
43
+ yield(transaction_start_time)
44
+ transaction_end_time = Time.now.to_f
45
+ log_info "Finished in #{(transaction_end_time - transaction_start_time).to_i} seconds #{message}"
46
+ end
47
+
48
+ def log_exception(message = '')
49
+ yield
50
+ rescue => e
51
+ log_error "Exception: #{e.message.chomp} #{message}"
52
+ log_error e.backtrace.join("\n")
53
+ raise e
54
+ end
55
+
56
+ def log_process(message = '')
57
+ log_exception(message) { log_transaction(message) { yield } }
58
+ end
59
+
60
+ def ensure_transaction_duration(duration, start_time)
61
+ elapsed_time = Time.now.to_f - start_time.to_f
62
+ time_to_sleep = duration.second.to_f - elapsed_time
63
+ sleep(time_to_sleep) if time_to_sleep > 0
64
+ end
65
+
66
+ end
67
+ end
@@ -0,0 +1,24 @@
1
+ require_relative '../processor_helper'
2
+
3
+ module EXEL
4
+ module Processors
5
+ class AsyncProcessor
6
+ include EXEL::ProcessorHelper
7
+ attr_reader :handler
8
+
9
+ def initialize(context)
10
+ @context = context
11
+ @handler = EXEL::Handlers::SidekiqHandler.new(context)
12
+
13
+ log_prefix_with '[AsyncProcessor]'
14
+ end
15
+
16
+ def process(block)
17
+ log_process do
18
+ @handler.do_async(block)
19
+ log_info 'call to async completed'
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,85 @@
1
+ require 'csv'
2
+ require 'tempfile'
3
+ require_relative '../processor_helper'
4
+
5
+ module EXEL
6
+ module Processors
7
+ class SplitProcessor
8
+ include EXEL::ProcessorHelper
9
+
10
+ attr_accessor :chunk_size, :file_name, :block
11
+
12
+ DEFAULT_CHUNK_SIZE = 1000
13
+
14
+ def initialize(context)
15
+ @chunk_size = DEFAULT_CHUNK_SIZE
16
+ @buffer = []
17
+ @tempfile_count = 0
18
+ @context = context
19
+
20
+ @file = context[:resource]
21
+ @file_name = filename(@file)
22
+ @csv_options = context[:csv_options] || {col_sep: ','}
23
+
24
+ log_prefix_with '[SplitProcessor]'
25
+ end
26
+
27
+ def process(callback)
28
+ log_process do
29
+ begin
30
+ CSV.foreach(@file.path, @csv_options) do |line|
31
+ process_line(line, callback)
32
+ end
33
+ rescue CSV::MalformedCSVError => e
34
+ log_error "CSV::MalformedCSVError => #{e.message}"
35
+ end
36
+ process_line(:eof, callback)
37
+ File.delete(@file.path)
38
+ end
39
+ end
40
+
41
+ def process_line(line, callback)
42
+ if line == :eof
43
+ flush_buffer callback
44
+ else
45
+ @buffer << CSV.generate_line(line)
46
+
47
+ flush_buffer callback if buffer_full?
48
+ end
49
+ end
50
+
51
+ def generate_chunk(content)
52
+ @tempfile_count += 1
53
+ chunk = Tempfile.new([chunk_filename, '.csv'])
54
+ chunk.write(content)
55
+ chunk.rewind
56
+
57
+ log_info "Generated chunk # #{@tempfile_count} for file #{@file_name} in #{chunk.path}"
58
+ chunk
59
+ end
60
+
61
+ def chunk_filename
62
+ "#{@file_name}_#{@tempfile_count}_"
63
+ end
64
+
65
+ def filename(file)
66
+ file_name_with_extension = file.path.split('/').last
67
+ file_name_with_extension.split('.').first
68
+ end
69
+
70
+ private
71
+
72
+ def flush_buffer(callback)
73
+ unless @buffer.empty?
74
+ file = generate_chunk(@buffer.join(''))
75
+ callback.run(@context.merge!(resource: file))
76
+ end
77
+ @buffer = []
78
+ end
79
+
80
+ def buffer_full?
81
+ @buffer.size == @chunk_size
82
+ end
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,35 @@
1
+ module EXEL
2
+ class Resource
3
+ def self.remotize(value)
4
+ file?(value) ? upload(value) : value
5
+ end
6
+
7
+ def self.localize(value)
8
+ serialized?(value) ? deserialize_file(value) : value
9
+ end
10
+
11
+ class << self
12
+ private
13
+
14
+ def file?(value)
15
+ value.is_a?(File) || value.is_a?(Tempfile)
16
+ end
17
+
18
+ def serialized?(value)
19
+ value =~ %r{^s3://}
20
+ end
21
+
22
+ def deserialize_file(uri)
23
+ download(uri)
24
+ end
25
+
26
+ def download(uri)
27
+ Handlers::S3Handler.new.download(uri)
28
+ end
29
+
30
+ def upload(file)
31
+ Handlers::S3Handler.new.upload(file)
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,14 @@
1
+ require_relative './ast_node'
2
+
3
+ module EXEL
4
+ class SequenceNode < ASTNode
5
+ def initialize(*children)
6
+ @instruction = NullInstruction.new
7
+ @children = children
8
+ end
9
+
10
+ def run(context)
11
+ @children.each { |child| child.run(context) }
12
+ end
13
+ end
14
+ end
data/lib/exel/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module EXEL
2
- VERSION = '0.0.1'
2
+ VERSION = '0.9.0'
3
3
  end
data/lib/exel.rb CHANGED
@@ -1,5 +1,23 @@
1
1
  require 'exel/version'
2
+ require 'exel/logging'
2
3
 
3
4
  module EXEL
4
- # Your code goes here...
5
+ def self.logger
6
+ EXEL::Logging.logger
7
+ end
8
+
9
+ def self.logger=(logger)
10
+ EXEL::Logging.logger = logger
11
+ end
12
+
13
+ def self.configuration
14
+ @config ||= {}
15
+ end
16
+
17
+ def self.configure
18
+ yield configuration
19
+ end
20
+
21
+ root = File.expand_path('../..', __FILE__)
22
+ Dir[File.join(root, 'lib/exel/**/*.rb')].each { |file| require file }
5
23
  end