s3_data_packer 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/.travis.yml +7 -0
- data/CHANGELOG.md +8 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +54 -0
- data/LICENSE.txt +21 -0
- data/README.md +258 -0
- data/Rakefile +10 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/s3_data_packer/bucket.rb +88 -0
- data/lib/s3_data_packer/configuration.rb +99 -0
- data/lib/s3_data_packer/filename_generator.rb +45 -0
- data/lib/s3_data_packer/json_batch.rb +93 -0
- data/lib/s3_data_packer/packer.rb +105 -0
- data/lib/s3_data_packer/queue.rb +46 -0
- data/lib/s3_data_packer/sources/object.rb +28 -0
- data/lib/s3_data_packer/sources/s3_bucket.rb +21 -0
- data/lib/s3_data_packer/summary.rb +59 -0
- data/lib/s3_data_packer/targets/object.rb +21 -0
- data/lib/s3_data_packer/targets/s3_bucket.rb +16 -0
- data/lib/s3_data_packer/thread_set.rb +98 -0
- data/lib/s3_data_packer/version.rb +3 -0
- data/lib/s3_data_packer.rb +41 -0
- data/s3_data_packer.gemspec +41 -0
- metadata +174 -0
@@ -0,0 +1,99 @@
|
|
1
|
+
module S3DataPacker
|
2
|
+
class Configuration
|
3
|
+
# Standard logger to output information
|
4
|
+
attr_accessor :logger
|
5
|
+
|
6
|
+
# How many threads to run for reading and processing items. This needs to be
|
7
|
+
# balanced out with the speed at which item keys are gathered to prevent
|
8
|
+
# emptying the queue too early.
|
9
|
+
attr_accessor :thread_count
|
10
|
+
|
11
|
+
# Time in seconds to let a thread sleep when there's no pending items in queue.
|
12
|
+
attr_accessor :thread_sleep_time
|
13
|
+
|
14
|
+
# Time in seconds for thread to wait when locked.
|
15
|
+
attr_accessor :thread_lock_wait_time
|
16
|
+
|
17
|
+
# Maximum number of items to maintain in queue to not overflow while workers
|
18
|
+
# process items.
|
19
|
+
attr_accessor :max_queue_size
|
20
|
+
|
21
|
+
# Time in seconds to wait when the queue reached max_queue_size to keep adding
|
22
|
+
# new items.
|
23
|
+
attr_accessor :max_queue_wait
|
24
|
+
|
25
|
+
# Directory to keep working files. Make sure you have permissions on the path
|
26
|
+
# set. If the path does not exist, the packer will try to create it before using
|
27
|
+
# it.
|
28
|
+
attr_accessor :workdir
|
29
|
+
|
30
|
+
# Whether to keep or delete the finalized batch file. Set to false if you want to
|
31
|
+
# keep the output files in the workdir.
|
32
|
+
attr_accessor :cleanup_batch
|
33
|
+
|
34
|
+
# Whether to compress the final batch file or not. If set to true, the output file
|
35
|
+
# will be compressed with GZip, and the uncompressed file will be removed.
|
36
|
+
attr_accessor :compress_batch
|
37
|
+
|
38
|
+
# Number of items of the final batch size
|
39
|
+
attr_accessor :batch_size
|
40
|
+
|
41
|
+
attr_accessor :s3_api_key
|
42
|
+
attr_accessor :s3_api_secret
|
43
|
+
attr_accessor :s3_region
|
44
|
+
|
45
|
+
# String prefix to include in output filenames for the batches
|
46
|
+
attr_accessor :output_filename_prefix
|
47
|
+
|
48
|
+
# String suffix to include in output filenames for the batches
|
49
|
+
attr_accessor :output_filename_suffix
|
50
|
+
|
51
|
+
# Desired pattern to construct output filenames
|
52
|
+
attr_accessor :output_filename_pattern
|
53
|
+
|
54
|
+
# Splitter character to concat different values to generate a filename
|
55
|
+
attr_accessor :output_filename_splitter
|
56
|
+
|
57
|
+
def initialize
|
58
|
+
@thread_count = 2
|
59
|
+
@thread_sleep_time = 1
|
60
|
+
@thread_lock_wait_time = 1
|
61
|
+
@max_queue_size = 10000
|
62
|
+
@max_queue_wait = 5
|
63
|
+
@batch_size = 100000
|
64
|
+
@workdir = 'tmp/s3_data_packer'
|
65
|
+
@cleanup_batch = true
|
66
|
+
@compress_batch = true
|
67
|
+
@output_filename_prefix = nil
|
68
|
+
@output_filename_suffix = 'batch'
|
69
|
+
@output_filename_pattern = %i[timecode_int suffix]
|
70
|
+
@output_filename_splitter = '_'
|
71
|
+
end
|
72
|
+
|
73
|
+
def compress_batch?
|
74
|
+
compress_batch == true
|
75
|
+
end
|
76
|
+
|
77
|
+
def cleanup_batch?
|
78
|
+
cleanup_batch == true
|
79
|
+
end
|
80
|
+
|
81
|
+
def s3_credentials?
|
82
|
+
s3_api_key && s3_api_secret
|
83
|
+
end
|
84
|
+
|
85
|
+
def default_s3_credentials
|
86
|
+
return nil unless s3_credentials?
|
87
|
+
|
88
|
+
Aws::Credentials.new(s3_api_key, s3_api_secret)
|
89
|
+
end
|
90
|
+
|
91
|
+
def filename_generator_defaults
|
92
|
+
{ prefix: output_filename_prefix,
|
93
|
+
suffix: output_filename_suffix,
|
94
|
+
pattern: output_filename_pattern,
|
95
|
+
splitter: output_filename_splitter }
|
96
|
+
end
|
97
|
+
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module S3DataPacker
|
2
|
+
class FilenameGenerator
|
3
|
+
attr_reader :pattern, :number, :splitter
|
4
|
+
|
5
|
+
def initialize opts={}
|
6
|
+
@number = opts[:start_at] || 1
|
7
|
+
@prefix = opts[:prefix] || default_options[:prefix]
|
8
|
+
@suffix = opts[:suffix] || default_options[:suffix]
|
9
|
+
@pattern = opts[:pattern] || default_options[:pattern]
|
10
|
+
@splitter = opts[:splitter] || default_options[:splitter]
|
11
|
+
validate_pattern!
|
12
|
+
end
|
13
|
+
|
14
|
+
def timecode_int; Time.now.to_i; end
|
15
|
+
def timecode_dec; Time.now.to_f; end
|
16
|
+
def number; @number; end
|
17
|
+
def timestamp; Time.now.strftime('%Y%m%d%H%M%s'); end
|
18
|
+
def datestamp; Time.now.strftime('%Y%m%d'); end
|
19
|
+
def prefix; @prefix; end
|
20
|
+
def suffix; @suffix; end
|
21
|
+
|
22
|
+
def generate!
|
23
|
+
name = pattern.map{ |key| send(key) }
|
24
|
+
name.delete_if { |value| value.nil? || value == '' }
|
25
|
+
name = name.map(&:to_s).join(splitter)
|
26
|
+
@number += 1
|
27
|
+
name
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def default_options
|
33
|
+
@default_options ||= S3DataPacker.config.filename_generator_defaults
|
34
|
+
end
|
35
|
+
|
36
|
+
def validate_pattern!
|
37
|
+
valid = %i[timecode_int timecode_dec number timestamp datestamp prefix suffix]
|
38
|
+
pattern.each do |item|
|
39
|
+
raise ArgumentError, "Invalid pattern key, has to be a symbol" unless Symbol === item
|
40
|
+
raise ArgumentError, "Invalid pattern key #{item}. Allowed: #{valid.join(', ')}" unless valid.include?(item)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
module S3DataPacker
|
2
|
+
class JSONBatch
|
3
|
+
attr_reader :delimitter, :batch, :item_count
|
4
|
+
|
5
|
+
def initialize opts = {}
|
6
|
+
@delimitter = "\r\n"
|
7
|
+
@workdir = opts[:workdir]
|
8
|
+
@filename_generator = opts[:filename_generator]
|
9
|
+
@pre_processor = opts[:pre_processor] # Should be a proc
|
10
|
+
@size = opts[:size]
|
11
|
+
@item_count = 0
|
12
|
+
init_workdir!
|
13
|
+
end
|
14
|
+
|
15
|
+
def size
|
16
|
+
@size ||= S3DataPacker.config.batch_size
|
17
|
+
end
|
18
|
+
|
19
|
+
def workdir
|
20
|
+
@workdir ||= S3DataPacker.config.workdir
|
21
|
+
end
|
22
|
+
|
23
|
+
def filename_generator
|
24
|
+
@filename_generator ||= S3DataPacker::FilenameGenerator.new
|
25
|
+
end
|
26
|
+
|
27
|
+
def full?
|
28
|
+
item_count >= size
|
29
|
+
end
|
30
|
+
|
31
|
+
def generate_filename
|
32
|
+
name = filename_generator.generate!
|
33
|
+
"#{workdir}/#{name}.json"
|
34
|
+
end
|
35
|
+
|
36
|
+
def new_file!
|
37
|
+
close! if @batch
|
38
|
+
@batch = File.open(generate_filename, 'w')
|
39
|
+
end
|
40
|
+
|
41
|
+
def append_data! data
|
42
|
+
digested = pre_proccess_data(data)
|
43
|
+
batch << "#{digested}#{delimitter}"
|
44
|
+
@item_count += 1
|
45
|
+
end
|
46
|
+
|
47
|
+
def path
|
48
|
+
batch.path
|
49
|
+
end
|
50
|
+
|
51
|
+
def close!
|
52
|
+
batch.close
|
53
|
+
end
|
54
|
+
|
55
|
+
def delete!
|
56
|
+
close! if !@batch.closed?
|
57
|
+
File.delete(path) if File.exist?(path)
|
58
|
+
reset!
|
59
|
+
end
|
60
|
+
|
61
|
+
def finalize!
|
62
|
+
close! if !batch.closed?
|
63
|
+
final_path = batch.path
|
64
|
+
final_path = compress! if S3DataPacker.config.compress_batch?
|
65
|
+
reset!
|
66
|
+
final_path
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
|
71
|
+
def pre_proccess_data(raw_data)
|
72
|
+
# Transformations here, return string for this one
|
73
|
+
return @pre_processor.call(raw_data) if @pre_processor
|
74
|
+
raw_data
|
75
|
+
end
|
76
|
+
|
77
|
+
def init_workdir!
|
78
|
+
Dir.mkdir(workdir) unless Dir.exist?(workdir)
|
79
|
+
end
|
80
|
+
|
81
|
+
def compress!
|
82
|
+
new_path = "#{batch.path}.gz"
|
83
|
+
`gzip #{batch.path}`
|
84
|
+
new_path
|
85
|
+
end
|
86
|
+
|
87
|
+
def reset!
|
88
|
+
@batch = nil
|
89
|
+
@item_count = 0
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
93
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
module S3DataPacker
|
2
|
+
class Packer
|
3
|
+
module Error
|
4
|
+
class DeadWorkers < StandardError; end
|
5
|
+
end
|
6
|
+
|
7
|
+
attr_reader :source, :target, :output
|
8
|
+
|
9
|
+
def initialize opts = {}
|
10
|
+
@source = opts[:source]
|
11
|
+
@target = opts[:target]
|
12
|
+
@output = opts[:output] || S3DataPacker::JSONBatch.new
|
13
|
+
end
|
14
|
+
|
15
|
+
def summary
|
16
|
+
@summary ||= S3DataPacker::Summary.new
|
17
|
+
end
|
18
|
+
|
19
|
+
def logger
|
20
|
+
@logger ||= S3DataPacker.logger
|
21
|
+
end
|
22
|
+
|
23
|
+
def workers
|
24
|
+
@workers ||= S3DataPacker::ThreadSet.new
|
25
|
+
end
|
26
|
+
|
27
|
+
def pack!
|
28
|
+
log "Packing data from #{source.name} to #{target.name} ..."
|
29
|
+
boot_workers!
|
30
|
+
|
31
|
+
@start_time = Time.now
|
32
|
+
begin
|
33
|
+
each_item { |item| workers.queue.add!(item) }
|
34
|
+
finalize_processing!
|
35
|
+
rescue Exception => e
|
36
|
+
log "Unexpected error, killing threads", :error
|
37
|
+
raise e
|
38
|
+
ensure
|
39
|
+
workers.kill!
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def process_item(data)
|
44
|
+
output.append_data! data
|
45
|
+
summary.count_processed
|
46
|
+
if output.full?
|
47
|
+
flush_batch!
|
48
|
+
output.new_file!
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
private
|
53
|
+
|
54
|
+
def finalize_processing!
|
55
|
+
log "No more items found to enqueue, signaling workers to finish"
|
56
|
+
workers.finish!
|
57
|
+
workers.wait!
|
58
|
+
workers.kill!
|
59
|
+
log "Pushing last open batch #{output.path}"
|
60
|
+
flush_batch!
|
61
|
+
summary.set_time(@start_time, Time.now)
|
62
|
+
log "Finished\n#{summary.flush!}"
|
63
|
+
end
|
64
|
+
|
65
|
+
def each_item &block
|
66
|
+
source.each do |item|
|
67
|
+
if workers.dead?
|
68
|
+
log "Workers diead", :error
|
69
|
+
raise Error::DeadWorkers, 'Workers died'
|
70
|
+
end
|
71
|
+
summary.count_item
|
72
|
+
yield item
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def flush_batch!
|
77
|
+
summary.count_batch
|
78
|
+
final_filename = output.finalize!
|
79
|
+
send_file!(final_filename)
|
80
|
+
end
|
81
|
+
|
82
|
+
def send_file!(file)
|
83
|
+
target.save_file file
|
84
|
+
end
|
85
|
+
|
86
|
+
def boot_workers!
|
87
|
+
output.new_file!
|
88
|
+
workers.spawn_threads! do |item|
|
89
|
+
data = source.fetch(item)
|
90
|
+
workers.lock.synchronize { process_item(data) }
|
91
|
+
post_process_item(item)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def post_process_item(item)
|
96
|
+
# Do something with the key after processed
|
97
|
+
nil
|
98
|
+
end
|
99
|
+
|
100
|
+
def log(message, level = :info)
|
101
|
+
logger.send level, "Main process: #{message}"
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module S3DataPacker
|
2
|
+
class Queue
|
3
|
+
attr_reader :total_items, :items
|
4
|
+
|
5
|
+
def initialize opts = {}
|
6
|
+
@max_items = opts[:max_items]
|
7
|
+
@wait_time = opts[:wait_time]
|
8
|
+
@total_items = 0
|
9
|
+
end
|
10
|
+
|
11
|
+
def max_items
|
12
|
+
@max_items ||= S3DataPacker.config.max_queue_size
|
13
|
+
end
|
14
|
+
|
15
|
+
def wait_time
|
16
|
+
@wait_time ||= S3DataPacker.config.max_queue_wait
|
17
|
+
end
|
18
|
+
|
19
|
+
def items
|
20
|
+
@items ||= []
|
21
|
+
end
|
22
|
+
|
23
|
+
def add!(item)
|
24
|
+
items << item
|
25
|
+
@total_items += 1
|
26
|
+
if size >= max_items
|
27
|
+
S3DataPacker.logger.info "Queue full, pausing"
|
28
|
+
sleep(wait_time)
|
29
|
+
S3DataPacker.logger.info "Resuming queue"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def fetch!
|
34
|
+
items.shift
|
35
|
+
end
|
36
|
+
|
37
|
+
def size
|
38
|
+
items.size
|
39
|
+
end
|
40
|
+
|
41
|
+
def reset!
|
42
|
+
@items = []
|
43
|
+
@total_items = 0
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module S3DataPacker
|
2
|
+
module Sources
|
3
|
+
class Object
|
4
|
+
|
5
|
+
def initialize(object:, each_method: :each, fetch_method: :fetch, name_method: :name)
|
6
|
+
@object = object
|
7
|
+
@each_method = each_method
|
8
|
+
@fetch_method = fetch_method
|
9
|
+
@name_method = name_method
|
10
|
+
end
|
11
|
+
|
12
|
+
def name
|
13
|
+
@object.send(@name_method)
|
14
|
+
end
|
15
|
+
|
16
|
+
def each &block
|
17
|
+
@object.send(@each_method) do |item|
|
18
|
+
yield item
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def fetch(item)
|
23
|
+
@object.send(@fetch_method, item)
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module S3DataPacker
|
2
|
+
module Sources
|
3
|
+
|
4
|
+
class S3Bucket < S3DataPacker::Bucket
|
5
|
+
def name
|
6
|
+
"s3://#{bucket_name}/#{path}"
|
7
|
+
end
|
8
|
+
|
9
|
+
def each(&block)
|
10
|
+
each_key do |key|
|
11
|
+
yield key
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def fetch(key)
|
16
|
+
download(key)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
module S3DataPacker
|
2
|
+
class Summary
|
3
|
+
def stats
|
4
|
+
@stats ||= {}
|
5
|
+
end
|
6
|
+
|
7
|
+
def count_item
|
8
|
+
stats[:total_items] ||= 0
|
9
|
+
stats[:total_items] += 1
|
10
|
+
end
|
11
|
+
|
12
|
+
def count_processed
|
13
|
+
stats[:processed] ||= 0
|
14
|
+
stats[:processed] += 1
|
15
|
+
end
|
16
|
+
|
17
|
+
def count_batch
|
18
|
+
stats[:batches] ||= 0
|
19
|
+
stats[:batches] += 1
|
20
|
+
end
|
21
|
+
|
22
|
+
def set_time start_time, end_time
|
23
|
+
stats[:elapsed] = "#{(end_time.to_i - start_time.to_i)} seconds"
|
24
|
+
end
|
25
|
+
|
26
|
+
def total_items
|
27
|
+
stats[:total_items] || 0
|
28
|
+
end
|
29
|
+
|
30
|
+
def processed
|
31
|
+
stats[:processed] || 0
|
32
|
+
end
|
33
|
+
|
34
|
+
def batches
|
35
|
+
stats[:batches] || 0
|
36
|
+
end
|
37
|
+
|
38
|
+
def elapsed
|
39
|
+
stats[:elapsed]
|
40
|
+
end
|
41
|
+
|
42
|
+
def flush!
|
43
|
+
output = [
|
44
|
+
"Summary:",
|
45
|
+
"Total Items: #{stats[:total_items]}",
|
46
|
+
"Processed Items: #{stats[:processed]}",
|
47
|
+
"Batches: #{stats[:batches]}",
|
48
|
+
"Elapsed: #{stats[:elapsed]}"
|
49
|
+
].join("\n")
|
50
|
+
reset!
|
51
|
+
output
|
52
|
+
end
|
53
|
+
|
54
|
+
def reset!
|
55
|
+
@stats = {}
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module S3DataPacker
|
2
|
+
module Targets
|
3
|
+
class Object
|
4
|
+
|
5
|
+
def initialize(object:, name_method: :each, save_file_method: :save_file)
|
6
|
+
@object = object
|
7
|
+
@name_method = name_method
|
8
|
+
@save_file_method = save_file_method
|
9
|
+
end
|
10
|
+
|
11
|
+
def name
|
12
|
+
@object.send(@name_method)
|
13
|
+
end
|
14
|
+
|
15
|
+
def save_file(filepath)
|
16
|
+
@object.send(@save_file_method, filepath)
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module S3DataPacker
|
2
|
+
module Targets
|
3
|
+
|
4
|
+
class S3Bucket < S3DataPacker::Bucket
|
5
|
+
def name
|
6
|
+
"s3://#{bucket_name}/#{path}"
|
7
|
+
end
|
8
|
+
|
9
|
+
def save_file(filepath)
|
10
|
+
upload(filepath)
|
11
|
+
File.delete(filepath) if S3DataPacker.config.cleanup_batch?
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
module S3DataPacker
|
2
|
+
class ThreadSet
|
3
|
+
attr_reader :lock, :workers, :queue
|
4
|
+
|
5
|
+
def initialize opts ={}
|
6
|
+
@lock = Mutex.new
|
7
|
+
@workers = []
|
8
|
+
@finish = false
|
9
|
+
@queue = S3DataPacker::Queue.new
|
10
|
+
end
|
11
|
+
|
12
|
+
def wait_time
|
13
|
+
@wait_time ||= S3DataPacker.config.thread_sleep_time
|
14
|
+
end
|
15
|
+
|
16
|
+
def lock_wait_time
|
17
|
+
@lock_wait_time ||= S3DataPacker.config.thread_lock_wait_time
|
18
|
+
end
|
19
|
+
|
20
|
+
def thread_count
|
21
|
+
@thread_count ||= S3DataPacker.config.thread_count
|
22
|
+
end
|
23
|
+
|
24
|
+
def dead?
|
25
|
+
workers.map(&:status).uniq == [nil] || workers.map(&:status).uniq == [false]
|
26
|
+
end
|
27
|
+
|
28
|
+
def kill!
|
29
|
+
log 'All', "Killing #{workers.size} workers"
|
30
|
+
workers.map(&:kill)
|
31
|
+
end
|
32
|
+
|
33
|
+
def reset!
|
34
|
+
return unless dead?
|
35
|
+
@finish = false
|
36
|
+
@workers = []
|
37
|
+
end
|
38
|
+
|
39
|
+
def finish!
|
40
|
+
@finish = true
|
41
|
+
end
|
42
|
+
|
43
|
+
def finished?
|
44
|
+
@finish == true && queue.size == 0
|
45
|
+
end
|
46
|
+
|
47
|
+
def log id, message, level = :info
|
48
|
+
logger.send level, "Thread #{id}: #{message}"
|
49
|
+
end
|
50
|
+
|
51
|
+
def wait!
|
52
|
+
workers.map(&:join)
|
53
|
+
end
|
54
|
+
|
55
|
+
def spawn_thread! id, &block
|
56
|
+
@workers << Thread.new do
|
57
|
+
log id, "Started!"
|
58
|
+
loop do
|
59
|
+
if finished?
|
60
|
+
log id, "Finish signal up and no more work to pull - Exiting"
|
61
|
+
break
|
62
|
+
end
|
63
|
+
item = queue.fetch!
|
64
|
+
if item
|
65
|
+
log id, "Processing item #{item}", :debug
|
66
|
+
begin
|
67
|
+
yield item
|
68
|
+
rescue ThreadError
|
69
|
+
log id, "Locked, retry in #{lock_wait_time}", :warn
|
70
|
+
sleep(lock_wait_time)
|
71
|
+
retry
|
72
|
+
end
|
73
|
+
else
|
74
|
+
log id, "No more work found, sleeping for #{wait_time}"
|
75
|
+
sleep(wait_time)
|
76
|
+
end
|
77
|
+
rescue Exception => e
|
78
|
+
log id, 'Unexpected error!'
|
79
|
+
raise e
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def spawn_threads! &block
|
85
|
+
logger.info "Spawning #{thread_count} threads"
|
86
|
+
thread_count.times do |id|
|
87
|
+
spawn_thread!(id, &block)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
private
|
92
|
+
|
93
|
+
def logger
|
94
|
+
@logger ||= S3DataPacker.logger
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'csv'
|
2
|
+
require 'json'
|
3
|
+
require 'logger'
|
4
|
+
require 'mime/types/full'
|
5
|
+
require 'aws-sdk-s3'
|
6
|
+
|
7
|
+
require "s3_data_packer/version"
|
8
|
+
require 's3_data_packer/configuration'
|
9
|
+
require 's3_data_packer/packer'
|
10
|
+
require 's3_data_packer/queue'
|
11
|
+
require 's3_data_packer/thread_set'
|
12
|
+
require 's3_data_packer/summary'
|
13
|
+
require 's3_data_packer/json_batch'
|
14
|
+
require 's3_data_packer/bucket'
|
15
|
+
require 's3_data_packer/filename_generator'
|
16
|
+
|
17
|
+
require 's3_data_packer/sources/object'
|
18
|
+
require 's3_data_packer/sources/s3_bucket'
|
19
|
+
|
20
|
+
require 's3_data_packer/targets/s3_bucket'
|
21
|
+
require 's3_data_packer/targets/object'
|
22
|
+
|
23
|
+
module S3DataPacker
|
24
|
+
class << self
|
25
|
+
attr_reader :configuration
|
26
|
+
|
27
|
+
def configuration
|
28
|
+
@configuration ||= Configuration.new
|
29
|
+
end
|
30
|
+
|
31
|
+
alias config configuration
|
32
|
+
|
33
|
+
def configure
|
34
|
+
yield configuration
|
35
|
+
end
|
36
|
+
|
37
|
+
def logger
|
38
|
+
@logger ||= config.logger || Logger.new('log/s3_data_packer.log')
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|