exel 0.0.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +2 -1
  3. data/.rspec +2 -1
  4. data/exel.gemspec +9 -2
  5. data/lib/exel/ast_node.rb +30 -0
  6. data/lib/exel/context.rb +79 -0
  7. data/lib/exel/deferred_context_value.rb +18 -0
  8. data/lib/exel/error/job_termination.rb +10 -0
  9. data/lib/exel/execution_worker.rb +13 -0
  10. data/lib/exel/handlers/s3_handler.rb +43 -0
  11. data/lib/exel/handlers/sidekiq_handler.rb +21 -0
  12. data/lib/exel/instruction.rb +17 -0
  13. data/lib/exel/instruction_node.rb +9 -0
  14. data/lib/exel/job.rb +74 -0
  15. data/lib/exel/logging.rb +30 -0
  16. data/lib/exel/null_instruction.rb +6 -0
  17. data/lib/exel/processor_helper.rb +67 -0
  18. data/lib/exel/processors/async_processor.rb +24 -0
  19. data/lib/exel/processors/split_processor.rb +85 -0
  20. data/lib/exel/resource.rb +35 -0
  21. data/lib/exel/sequence_node.rb +14 -0
  22. data/lib/exel/version.rb +1 -1
  23. data/lib/exel.rb +19 -1
  24. data/spec/exel/ast_node_spec.rb +52 -0
  25. data/spec/exel/context_spec.rb +151 -0
  26. data/spec/exel/deferred_context_value_spec.rb +21 -0
  27. data/spec/exel/execution_worker_spec.rb +13 -0
  28. data/spec/exel/handlers/s3_handler_spec.rb +49 -0
  29. data/spec/exel/handlers/sidekiq_handler_spec.rb +54 -0
  30. data/spec/exel/instruction_node_spec.rb +22 -0
  31. data/spec/exel/instruction_spec.rb +58 -0
  32. data/spec/exel/job_spec.rb +215 -0
  33. data/spec/exel/logging_spec.rb +36 -0
  34. data/spec/exel/null_instruction_spec.rb +5 -0
  35. data/spec/exel/processors/async_processor_spec.rb +16 -0
  36. data/spec/exel/processors/split_processor_spec.rb +90 -0
  37. data/spec/exel/resource_spec.rb +51 -0
  38. data/spec/exel/sequence_node_spec.rb +24 -0
  39. data/spec/spec_helper.rb +7 -0
  40. metadata +151 -18
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0c87f88078f409a37472e257ec0f70e31d1c19c4
4
- data.tar.gz: 9961dbc04ded3f33b5201e92c276846c26887701
3
+ metadata.gz: c5eccac7b009cd6a4063a36cbb53b70de2efa22b
4
+ data.tar.gz: f6e7dd493f08d09365158095290b0c319907215e
5
5
  SHA512:
6
- metadata.gz: b623b2d623fbe39a0ef54e314339bd1836f6cf21a81b6c860da9955f09cf4b617ce3f8dcba5e521763086246fefebbebbe74913d5cb6ac9fe26c68ca538b9e69
7
- data.tar.gz: d1cd09aa61c0e415fd4e4d15328062065aadff978b5667d7c0217dd49b2303957e84e6630f6bd3512665f336ea6f3db689a8dc3043dd0b284f9d5f8f58364cbf
6
+ metadata.gz: 4135007934f8288843da2852a13cb269214725997069a3704b03ad7f4f8f5df103f7824156f1e959de24ca42d6b1a81e72f3fac1eb9b02e174ec9825b66f3b34
7
+ data.tar.gz: d605c327290de5f1c71f4a4f552b71f7dc8d2469e146a5814e12581188011e6f462a4b08aa8434d2645e92cebaa76720bf0be14656dce1c0feb99c8e5f0e9ed6
data/.gitignore CHANGED
@@ -21,4 +21,5 @@ tmp
21
21
  *.a
22
22
  mkmf.log
23
23
  .idea
24
- *.iml
24
+ *.iml
25
+ .ruby-*
data/.rspec CHANGED
@@ -1 +1,2 @@
1
- --color
1
+ --color
2
+ --require spec_helper
data/exel.gemspec CHANGED
@@ -18,7 +18,14 @@ Gem::Specification.new do |spec|
18
18
  spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
19
  spec.require_paths = ['lib']
20
20
 
21
+ spec.add_dependency 'aws-sdk', '~> 2'
22
+ spec.add_dependency 'sidekiq', '~> 3'
23
+
21
24
  spec.add_development_dependency 'bundler', '~> 1.6'
22
- spec.add_development_dependency 'rake'
23
- spec.add_development_dependency 'rspec'
25
+ spec.add_development_dependency 'rake', '~> 10'
26
+ spec.add_development_dependency 'rspec', '~> 3'
27
+ spec.add_development_dependency 'guard', '~> 2'
28
+ spec.add_development_dependency 'guard-rspec', '~> 4'
29
+ spec.add_development_dependency 'terminal-notifier', '~> 1'
30
+ spec.add_development_dependency 'terminal-notifier-guard', '~> 1'
24
31
  end
@@ -0,0 +1,30 @@
1
+ module EXEL
2
+ class ASTNode
3
+ attr_reader :instruction, :children
4
+
5
+ def initialize(instruction, children=[])
6
+ @instruction = instruction
7
+ @children = children
8
+ end
9
+
10
+ def start(context)
11
+ fail_silently { run(context) }
12
+ end
13
+
14
+ def run(_context)
15
+ raise "#{self.class} does not implement #process"
16
+ end
17
+
18
+ def add_child(node)
19
+ @children << node
20
+ end
21
+
22
+ private
23
+
24
+ def fail_silently(&_block)
25
+ yield if block_given?
26
+ rescue EXEL::Error::JobTermination => e
27
+ EXEL.logger.error "JobTerminationError: #{e.message.chomp}"
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,79 @@
1
+ require 'tempfile'
2
+
3
+ module EXEL
4
+ class Context
5
+ attr_reader :table
6
+
7
+ def initialize(initial_context={})
8
+ @table = initial_context
9
+ end
10
+
11
+ def serialize
12
+ remotized_table = @table.each_with_object({}) { |(key, value), acc| acc[key] = EXEL::Resource.remotize(value) }
13
+ file = serialize_context(remotized_table)
14
+ upload(file)
15
+ end
16
+
17
+ def self.deserialize(uri)
18
+ handler = Handlers::S3Handler.new
19
+ file = handler.download(uri)
20
+ context = Marshal.load(file.read)
21
+ file.close
22
+ context
23
+ end
24
+
25
+ def [](key)
26
+ value = EXEL::Resource.localize(@table[key])
27
+ value = get_deferred(value)
28
+ @table[key] = value
29
+ value
30
+ end
31
+
32
+ def []=(key, value)
33
+ @table[key] = value
34
+ end
35
+
36
+ def merge!(hash)
37
+ @table.merge!(hash)
38
+ self
39
+ end
40
+
41
+ def delete(key)
42
+ @table.delete(key)
43
+ end
44
+
45
+ def ==(other)
46
+ other.kind_of?(EXEL::Context) && table == other.table
47
+ end
48
+
49
+ private
50
+
51
+ def serialize_context(table)
52
+ file = Tempfile.new(SecureRandom.uuid, encoding: 'ascii-8bit')
53
+ file.write(Marshal.dump(Context.new(table)))
54
+ file.rewind
55
+ file
56
+ end
57
+
58
+ def upload(file)
59
+ handler = Handlers::S3Handler.new
60
+ handler.upload(file)
61
+ end
62
+
63
+ def get_deferred(value)
64
+ if is_deferred?(value)
65
+ value = value.get(self)
66
+ elsif value.kind_of?(Array)
67
+ value.map! { |v| get_deferred(v) }
68
+ elsif value.kind_of?(Hash)
69
+ value.each { |k, v| value[k] = get_deferred(v) }
70
+ end
71
+
72
+ value
73
+ end
74
+
75
+ def is_deferred?(value)
76
+ value.kind_of?(DeferredContextValue)
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,18 @@
1
+ module EXEL
2
+ class DeferredContextValue
3
+ attr_reader :keys
4
+
5
+ def initialize
6
+ @keys = []
7
+ end
8
+
9
+ def [](key)
10
+ keys << key
11
+ self
12
+ end
13
+
14
+ def get(context)
15
+ keys.reduce(context) { |acc, key| acc[key] }
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,10 @@
1
+ module EXEL
2
+ module Error
3
+ # Inherit from Exception rather then StandardError
4
+ # because rescue => e will only catch StandardError
5
+ # and allow the Exception to propagate to the root
6
+ # of the job
7
+ class JobTermination < Exception
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,13 @@
1
+ require 'sidekiq'
2
+
3
+ module EXEL
4
+ class ExecutionWorker
5
+ include Sidekiq::Worker
6
+
7
+ def perform(context_uri)
8
+ context = Context.deserialize(context_uri)
9
+ block = context[:_block]
10
+ block.start(context)
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,43 @@
1
+ require 'aws-sdk-resources'
2
+ require 'tempfile'
3
+
4
+ module EXEL
5
+ module Handlers
6
+ class S3Handler
7
+ def upload(file)
8
+ filename = get_filename(file)
9
+ obj = get_object(filename)
10
+ obj.upload_file(file)
11
+ file.close
12
+
13
+ "s3://#{filename}"
14
+ end
15
+
16
+ def download(uri)
17
+ filename = uri.partition('://').last
18
+ obj = get_object(filename)
19
+ file = Tempfile.new(filename, encoding: Encoding::ASCII_8BIT)
20
+ obj.get(response_target: file)
21
+ file.set_encoding(Encoding::UTF_8)
22
+ file
23
+ end
24
+
25
+ def get_object(filename)
26
+ s3 = Aws::S3::Resource.new(
27
+ credentials: Aws::Credentials.new(
28
+ EXEL.configuration[:aws][:access_key_id],
29
+ EXEL.configuration[:aws][:secret_access_key]
30
+ ),
31
+ region: 'us-east-1'
32
+ )
33
+ s3.bucket(EXEL.configuration[:s3_bucket]).object(filename)
34
+ end
35
+
36
+ private
37
+
38
+ def get_filename(file)
39
+ file.path.split('/').last
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,21 @@
1
+ require 'sidekiq'
2
+
3
+ module EXEL
4
+ module Handlers
5
+ class SidekiqHandler
6
+ def initialize(context)
7
+ @context = context
8
+ end
9
+
10
+ def do_async(block)
11
+ @context[:_block] = block
12
+
13
+ push_args = {'class' => ExecutionWorker, 'args' => [@context.serialize]}
14
+ push_args['queue'] = @context[:queue] if @context[:queue]
15
+ push_args['retry'] = @context[:retry] if @context[:retry]
16
+
17
+ Sidekiq::Client.push(push_args)
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,17 @@
1
+ module EXEL
2
+ class EXEL::Instruction
3
+ attr_reader :name
4
+
5
+ def initialize(name, processor_class, args, subtree=nil)
6
+ @name = name
7
+ @processor_class = processor_class
8
+ @args = args || {}
9
+ @subtree = subtree
10
+ end
11
+
12
+ def execute(context)
13
+ context.merge!(@args)
14
+ @processor_class.new(context).process(@subtree)
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,9 @@
1
+ require_relative './ast_node'
2
+
3
+ module EXEL
4
+ class InstructionNode < ASTNode
5
+ def run(context)
6
+ @instruction.execute(context)
7
+ end
8
+ end
9
+ end
data/lib/exel/job.rb ADDED
@@ -0,0 +1,74 @@
1
+ module EXEL
2
+ module Job
3
+ class << self
4
+ def define(job_name, &block)
5
+ raise "Job #{job_name.inspect} is already defined" unless registry[job_name].nil?
6
+ registry[job_name] = block
7
+ end
8
+
9
+ def registry
10
+ @registry ||= {}
11
+ end
12
+
13
+ def run(dsl_code_or_name, context = {})
14
+ context = EXEL::Context.new(context) if context.is_a?(Hash)
15
+ (ast = parse(dsl_code_or_name)) ? ast.start(context) : raise(%(Job "#{dsl_code_or_name}" not found))
16
+ end
17
+
18
+ private
19
+
20
+ def parse(dsl_code_or_name)
21
+ if dsl_code_or_name.is_a?(Symbol)
22
+ job = registry[dsl_code_or_name]
23
+ Parser.parse(job) if job
24
+ else
25
+ Parser.parse(dsl_code_or_name)
26
+ end
27
+ end
28
+ end
29
+
30
+ class Parser
31
+ attr_reader :ast
32
+
33
+ def initialize
34
+ @ast = SequenceNode.new
35
+ end
36
+
37
+ def self.parse(dsl_proc_or_code)
38
+ parser = Parser.new
39
+ if dsl_proc_or_code.is_a?(::Proc)
40
+ parser.instance_eval(&dsl_proc_or_code)
41
+ else
42
+ parser.instance_eval(dsl_proc_or_code)
43
+ end
44
+ parser.ast
45
+ end
46
+
47
+ def process(options, &block)
48
+ processor_class = options.delete(:with)
49
+ add_instruction_node('process', processor_class, block, options)
50
+ end
51
+
52
+ def async(options={}, &block)
53
+ add_instruction_node('async', Processors::AsyncProcessor, block, options)
54
+ end
55
+
56
+ def split(options={}, &block)
57
+ add_instruction_node('split', Processors::SplitProcessor, block, options)
58
+ end
59
+
60
+ def context
61
+ DeferredContextValue.new
62
+ end
63
+
64
+ private
65
+
66
+ def add_instruction_node(name, processor, block, args={})
67
+ sub_tree = block.nil? ? nil : Parser.parse(block)
68
+ instruction = EXEL::Instruction.new(name, processor, args, sub_tree)
69
+ node = sub_tree.nil? ? InstructionNode.new(instruction) : InstructionNode.new(instruction, [sub_tree])
70
+ @ast.add_child(node)
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,30 @@
1
+ require 'logger'
2
+
3
+ module EXEL
4
+ module Logging
5
+ DEFAULT_LEVEL = :info
6
+
7
+ def self.logger
8
+ @logger || initialize_logger
9
+ end
10
+
11
+ def self.initialize_logger
12
+ @logger = Logger.new(log_filename)
13
+ @logger.level = log_level
14
+ @logger
15
+ end
16
+
17
+ def self.log_filename
18
+ EXEL.configuration[:log_filename] || '/dev/null'
19
+ end
20
+
21
+ def self.log_level
22
+ level = EXEL.configuration[:log_level] || DEFAULT_LEVEL
23
+ Logger.const_get(level.to_s.upcase)
24
+ end
25
+
26
+ def self.logger=(logger)
27
+ @logger = logger || Logger.new('/dev/null')
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,6 @@
1
+ module EXEL
2
+ class NullInstruction
3
+ def execute(context)
4
+ end
5
+ end
6
+ end
@@ -0,0 +1,67 @@
1
+ module EXEL
2
+ module ProcessorHelper
3
+
4
+ # Helper Methods
5
+
6
+ def tag(*tags)
7
+ tags.map { |t| "[#{t}]" }.join('')
8
+ end
9
+
10
+ def timestamp
11
+ Time.now.strftime('%m/%e/%y %H:%M')
12
+ end
13
+
14
+ def file_size_in_mb(file)
15
+ "#{'%.2f' % (file.size.to_f / 1_024_000).round(2)} MB"
16
+ end
17
+
18
+ # Logging Helpers
19
+
20
+ def log_prefix_with(prefix)
21
+ @log_prefix = (@context[:log_prefix] || '') + prefix
22
+ end
23
+
24
+ def log_prefix
25
+ @log_prefix
26
+ end
27
+
28
+ def log_info(message)
29
+ EXEL.logger.info(log(message))
30
+ end
31
+
32
+ def log_error(message)
33
+ EXEL.logger.error(log(message))
34
+ end
35
+
36
+ def log(message)
37
+ "#{log_prefix} #{message}"
38
+ end
39
+
40
+ def log_transaction(message = '')
41
+ transaction_start_time = Time.now.to_f
42
+ log_info "Started at #{transaction_start_time}"
43
+ yield(transaction_start_time)
44
+ transaction_end_time = Time.now.to_f
45
+ log_info "Finished in #{(transaction_end_time - transaction_start_time).to_i} seconds #{message}"
46
+ end
47
+
48
+ def log_exception(message = '')
49
+ yield
50
+ rescue => e
51
+ log_error "Exception: #{e.message.chomp} #{message}"
52
+ log_error e.backtrace.join("\n")
53
+ raise e
54
+ end
55
+
56
+ def log_process(message = '')
57
+ log_exception(message) { log_transaction(message) { yield } }
58
+ end
59
+
60
+ def ensure_transaction_duration(duration, start_time)
61
+ elapsed_time = Time.now.to_f - start_time.to_f
62
+ time_to_sleep = duration.second.to_f - elapsed_time
63
+ sleep(time_to_sleep) if time_to_sleep > 0
64
+ end
65
+
66
+ end
67
+ end
@@ -0,0 +1,24 @@
1
+ require_relative '../processor_helper'
2
+
3
+ module EXEL
4
+ module Processors
5
+ class AsyncProcessor
6
+ include EXEL::ProcessorHelper
7
+ attr_reader :handler
8
+
9
+ def initialize(context)
10
+ @context = context
11
+ @handler = EXEL::Handlers::SidekiqHandler.new(context)
12
+
13
+ log_prefix_with '[AsyncProcessor]'
14
+ end
15
+
16
+ def process(block)
17
+ log_process do
18
+ @handler.do_async(block)
19
+ log_info 'call to async completed'
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,85 @@
1
+ require 'csv'
2
+ require 'tempfile'
3
+ require_relative '../processor_helper'
4
+
5
+ module EXEL
6
+ module Processors
7
+ class SplitProcessor
8
+ include EXEL::ProcessorHelper
9
+
10
+ attr_accessor :chunk_size, :file_name, :block
11
+
12
+ DEFAULT_CHUNK_SIZE = 1000
13
+
14
+ def initialize(context)
15
+ @chunk_size = DEFAULT_CHUNK_SIZE
16
+ @buffer = []
17
+ @tempfile_count = 0
18
+ @context = context
19
+
20
+ @file = context[:resource]
21
+ @file_name = filename(@file)
22
+ @csv_options = context[:csv_options] || {col_sep: ','}
23
+
24
+ log_prefix_with '[SplitProcessor]'
25
+ end
26
+
27
+ def process(callback)
28
+ log_process do
29
+ begin
30
+ CSV.foreach(@file.path, @csv_options) do |line|
31
+ process_line(line, callback)
32
+ end
33
+ rescue CSV::MalformedCSVError => e
34
+ log_error "CSV::MalformedCSVError => #{e.message}"
35
+ end
36
+ process_line(:eof, callback)
37
+ File.delete(@file.path)
38
+ end
39
+ end
40
+
41
+ def process_line(line, callback)
42
+ if line == :eof
43
+ flush_buffer callback
44
+ else
45
+ @buffer << CSV.generate_line(line)
46
+
47
+ flush_buffer callback if buffer_full?
48
+ end
49
+ end
50
+
51
+ def generate_chunk(content)
52
+ @tempfile_count += 1
53
+ chunk = Tempfile.new([chunk_filename, '.csv'])
54
+ chunk.write(content)
55
+ chunk.rewind
56
+
57
+ log_info "Generated chunk # #{@tempfile_count} for file #{@file_name} in #{chunk.path}"
58
+ chunk
59
+ end
60
+
61
+ def chunk_filename
62
+ "#{@file_name}_#{@tempfile_count}_"
63
+ end
64
+
65
+ def filename(file)
66
+ file_name_with_extension = file.path.split('/').last
67
+ file_name_with_extension.split('.').first
68
+ end
69
+
70
+ private
71
+
72
+ def flush_buffer(callback)
73
+ unless @buffer.empty?
74
+ file = generate_chunk(@buffer.join(''))
75
+ callback.run(@context.merge!(resource: file))
76
+ end
77
+ @buffer = []
78
+ end
79
+
80
+ def buffer_full?
81
+ @buffer.size == @chunk_size
82
+ end
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,35 @@
1
+ module EXEL
2
+ class Resource
3
+ def self.remotize(value)
4
+ file?(value) ? upload(value) : value
5
+ end
6
+
7
+ def self.localize(value)
8
+ serialized?(value) ? deserialize_file(value) : value
9
+ end
10
+
11
+ class << self
12
+ private
13
+
14
+ def file?(value)
15
+ value.is_a?(File) || value.is_a?(Tempfile)
16
+ end
17
+
18
+ def serialized?(value)
19
+ value =~ %r{^s3://}
20
+ end
21
+
22
+ def deserialize_file(uri)
23
+ download(uri)
24
+ end
25
+
26
+ def download(uri)
27
+ Handlers::S3Handler.new.download(uri)
28
+ end
29
+
30
+ def upload(file)
31
+ Handlers::S3Handler.new.upload(file)
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,14 @@
1
+ require_relative './ast_node'
2
+
3
+ module EXEL
4
+ class SequenceNode < ASTNode
5
+ def initialize(*children)
6
+ @instruction = NullInstruction.new
7
+ @children = children
8
+ end
9
+
10
+ def run(context)
11
+ @children.each { |child| child.run(context) }
12
+ end
13
+ end
14
+ end
data/lib/exel/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module EXEL
2
- VERSION = '0.0.1'
2
+ VERSION = '0.9.0'
3
3
  end
data/lib/exel.rb CHANGED
@@ -1,5 +1,23 @@
1
1
  require 'exel/version'
2
+ require 'exel/logging'
2
3
 
3
4
  module EXEL
4
- # Your code goes here...
5
+ def self.logger
6
+ EXEL::Logging.logger
7
+ end
8
+
9
+ def self.logger=(logger)
10
+ EXEL::Logging.logger = logger
11
+ end
12
+
13
+ def self.configuration
14
+ @config ||= {}
15
+ end
16
+
17
+ def self.configure
18
+ yield configuration
19
+ end
20
+
21
+ root = File.expand_path('../..', __FILE__)
22
+ Dir[File.join(root, 'lib/exel/**/*.rb')].each { |file| require file }
5
23
  end