s3_data_packer 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/.travis.yml +7 -0
- data/CHANGELOG.md +8 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +6 -0
- data/Gemfile.lock +54 -0
- data/LICENSE.txt +21 -0
- data/README.md +258 -0
- data/Rakefile +10 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/s3_data_packer/bucket.rb +88 -0
- data/lib/s3_data_packer/configuration.rb +99 -0
- data/lib/s3_data_packer/filename_generator.rb +45 -0
- data/lib/s3_data_packer/json_batch.rb +93 -0
- data/lib/s3_data_packer/packer.rb +105 -0
- data/lib/s3_data_packer/queue.rb +46 -0
- data/lib/s3_data_packer/sources/object.rb +28 -0
- data/lib/s3_data_packer/sources/s3_bucket.rb +21 -0
- data/lib/s3_data_packer/summary.rb +59 -0
- data/lib/s3_data_packer/targets/object.rb +21 -0
- data/lib/s3_data_packer/targets/s3_bucket.rb +16 -0
- data/lib/s3_data_packer/thread_set.rb +98 -0
- data/lib/s3_data_packer/version.rb +3 -0
- data/lib/s3_data_packer.rb +41 -0
- data/s3_data_packer.gemspec +41 -0
- metadata +174 -0
@@ -0,0 +1,99 @@
|
|
1
|
+
module S3DataPacker
|
2
|
+
class Configuration
|
3
|
+
# Standard logger to output information
|
4
|
+
attr_accessor :logger
|
5
|
+
|
6
|
+
# How many threads to run for reading and processing items. This needs to be
|
7
|
+
# balanced out with the speed at which item keys are gathered to prevent
|
8
|
+
# emptying the queue too early.
|
9
|
+
attr_accessor :thread_count
|
10
|
+
|
11
|
+
# Time in seconds to let a thread sleep when there's no pending items in queue.
|
12
|
+
attr_accessor :thread_sleep_time
|
13
|
+
|
14
|
+
# Time in seconds for thread to wait when locked.
|
15
|
+
attr_accessor :thread_lock_wait_time
|
16
|
+
|
17
|
+
# Maximum number of items to maintain in queue to not overflow while workers
|
18
|
+
# process items.
|
19
|
+
attr_accessor :max_queue_size
|
20
|
+
|
21
|
+
# Time in seconds to wait when the queue reached max_queue_size to keep adding
|
22
|
+
# new items.
|
23
|
+
attr_accessor :max_queue_wait
|
24
|
+
|
25
|
+
# Directory to keep working files. Make sure you have permissions on the path
|
26
|
+
# set. If the path does not exist, the packer will try to create it before using
|
27
|
+
# it.
|
28
|
+
attr_accessor :workdir
|
29
|
+
|
30
|
+
# Whether to keep or delete the finalized batch file. Set to false if you want to
|
31
|
+
# keep the output files in the workdir.
|
32
|
+
attr_accessor :cleanup_batch
|
33
|
+
|
34
|
+
# Whether to compress the final batch file or not. If set to true, the output file
|
35
|
+
# will be compressed with GZip, and the uncompressed file will be removed.
|
36
|
+
attr_accessor :compress_batch
|
37
|
+
|
38
|
+
# Number of items of the final batch size
|
39
|
+
attr_accessor :batch_size
|
40
|
+
|
41
|
+
attr_accessor :s3_api_key
|
42
|
+
attr_accessor :s3_api_secret
|
43
|
+
attr_accessor :s3_region
|
44
|
+
|
45
|
+
# String prefix to include in output filenames for the batches
|
46
|
+
attr_accessor :output_filename_prefix
|
47
|
+
|
48
|
+
# String suffix to include in output filenames for the batches
|
49
|
+
attr_accessor :output_filename_suffix
|
50
|
+
|
51
|
+
# Desired pattern to construct output filenames
|
52
|
+
attr_accessor :output_filename_pattern
|
53
|
+
|
54
|
+
# Splitter character to concat different values to generate a filename
|
55
|
+
attr_accessor :output_filename_splitter
|
56
|
+
|
57
|
+
def initialize
|
58
|
+
@thread_count = 2
|
59
|
+
@thread_sleep_time = 1
|
60
|
+
@thread_lock_wait_time = 1
|
61
|
+
@max_queue_size = 10000
|
62
|
+
@max_queue_wait = 5
|
63
|
+
@batch_size = 100000
|
64
|
+
@workdir = 'tmp/s3_data_packer'
|
65
|
+
@cleanup_batch = true
|
66
|
+
@compress_batch = true
|
67
|
+
@output_filename_prefix = nil
|
68
|
+
@output_filename_suffix = 'batch'
|
69
|
+
@output_filename_pattern = %i[timecode_int suffix]
|
70
|
+
@output_filename_splitter = '_'
|
71
|
+
end
|
72
|
+
|
73
|
+
def compress_batch?
|
74
|
+
compress_batch == true
|
75
|
+
end
|
76
|
+
|
77
|
+
def cleanup_batch?
|
78
|
+
cleanup_batch == true
|
79
|
+
end
|
80
|
+
|
81
|
+
def s3_credentials?
|
82
|
+
s3_api_key && s3_api_secret
|
83
|
+
end
|
84
|
+
|
85
|
+
def default_s3_credentials
|
86
|
+
return nil unless s3_credentials?
|
87
|
+
|
88
|
+
Aws::Credentials.new(s3_api_key, s3_api_secret)
|
89
|
+
end
|
90
|
+
|
91
|
+
def filename_generator_defaults
|
92
|
+
{ prefix: output_filename_prefix,
|
93
|
+
suffix: output_filename_suffix,
|
94
|
+
pattern: output_filename_pattern,
|
95
|
+
splitter: output_filename_splitter }
|
96
|
+
end
|
97
|
+
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
module S3DataPacker
|
2
|
+
class FilenameGenerator
|
3
|
+
attr_reader :pattern, :number, :splitter
|
4
|
+
|
5
|
+
def initialize opts={}
|
6
|
+
@number = opts[:start_at] || 1
|
7
|
+
@prefix = opts[:prefix] || default_options[:prefix]
|
8
|
+
@suffix = opts[:suffix] || default_options[:suffix]
|
9
|
+
@pattern = opts[:pattern] || default_options[:pattern]
|
10
|
+
@splitter = opts[:splitter] || default_options[:splitter]
|
11
|
+
validate_pattern!
|
12
|
+
end
|
13
|
+
|
14
|
+
def timecode_int; Time.now.to_i; end
|
15
|
+
def timecode_dec; Time.now.to_f; end
|
16
|
+
def number; @number; end
|
17
|
+
def timestamp; Time.now.strftime('%Y%m%d%H%M%s'); end
|
18
|
+
def datestamp; Time.now.strftime('%Y%m%d'); end
|
19
|
+
def prefix; @prefix; end
|
20
|
+
def suffix; @suffix; end
|
21
|
+
|
22
|
+
def generate!
|
23
|
+
name = pattern.map{ |key| send(key) }
|
24
|
+
name.delete_if { |value| value.nil? || value == '' }
|
25
|
+
name = name.map(&:to_s).join(splitter)
|
26
|
+
@number += 1
|
27
|
+
name
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def default_options
|
33
|
+
@default_options ||= S3DataPacker.config.filename_generator_defaults
|
34
|
+
end
|
35
|
+
|
36
|
+
def validate_pattern!
|
37
|
+
valid = %i[timecode_int timecode_dec number timestamp datestamp prefix suffix]
|
38
|
+
pattern.each do |item|
|
39
|
+
raise ArgumentError, "Invalid pattern key, has to be a symbol" unless Symbol === item
|
40
|
+
raise ArgumentError, "Invalid pattern key #{item}. Allowed: #{valid.join(', ')}" unless valid.include?(item)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
module S3DataPacker
|
2
|
+
class JSONBatch
|
3
|
+
attr_reader :delimitter, :batch, :item_count
|
4
|
+
|
5
|
+
def initialize opts = {}
|
6
|
+
@delimitter = "\r\n"
|
7
|
+
@workdir = opts[:workdir]
|
8
|
+
@filename_generator = opts[:filename_generator]
|
9
|
+
@pre_processor = opts[:pre_processor] # Should be a proc
|
10
|
+
@size = opts[:size]
|
11
|
+
@item_count = 0
|
12
|
+
init_workdir!
|
13
|
+
end
|
14
|
+
|
15
|
+
def size
|
16
|
+
@size ||= S3DataPacker.config.batch_size
|
17
|
+
end
|
18
|
+
|
19
|
+
def workdir
|
20
|
+
@workdir ||= S3DataPacker.config.workdir
|
21
|
+
end
|
22
|
+
|
23
|
+
def filename_generator
|
24
|
+
@filename_generator ||= S3DataPacker::FilenameGenerator.new
|
25
|
+
end
|
26
|
+
|
27
|
+
def full?
|
28
|
+
item_count >= size
|
29
|
+
end
|
30
|
+
|
31
|
+
def generate_filename
|
32
|
+
name = filename_generator.generate!
|
33
|
+
"#{workdir}/#{name}.json"
|
34
|
+
end
|
35
|
+
|
36
|
+
def new_file!
|
37
|
+
close! if @batch
|
38
|
+
@batch = File.open(generate_filename, 'w')
|
39
|
+
end
|
40
|
+
|
41
|
+
def append_data! data
|
42
|
+
digested = pre_proccess_data(data)
|
43
|
+
batch << "#{digested}#{delimitter}"
|
44
|
+
@item_count += 1
|
45
|
+
end
|
46
|
+
|
47
|
+
def path
|
48
|
+
batch.path
|
49
|
+
end
|
50
|
+
|
51
|
+
def close!
|
52
|
+
batch.close
|
53
|
+
end
|
54
|
+
|
55
|
+
def delete!
|
56
|
+
close! if !@batch.closed?
|
57
|
+
File.delete(path) if File.exist?(path)
|
58
|
+
reset!
|
59
|
+
end
|
60
|
+
|
61
|
+
def finalize!
|
62
|
+
close! if !batch.closed?
|
63
|
+
final_path = batch.path
|
64
|
+
final_path = compress! if S3DataPacker.config.compress_batch?
|
65
|
+
reset!
|
66
|
+
final_path
|
67
|
+
end
|
68
|
+
|
69
|
+
private
|
70
|
+
|
71
|
+
def pre_proccess_data(raw_data)
|
72
|
+
# Transformations here, return string for this one
|
73
|
+
return @pre_processor.call(raw_data) if @pre_processor
|
74
|
+
raw_data
|
75
|
+
end
|
76
|
+
|
77
|
+
def init_workdir!
|
78
|
+
Dir.mkdir(workdir) unless Dir.exist?(workdir)
|
79
|
+
end
|
80
|
+
|
81
|
+
def compress!
|
82
|
+
new_path = "#{batch.path}.gz"
|
83
|
+
`gzip #{batch.path}`
|
84
|
+
new_path
|
85
|
+
end
|
86
|
+
|
87
|
+
def reset!
|
88
|
+
@batch = nil
|
89
|
+
@item_count = 0
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
93
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
module S3DataPacker
|
2
|
+
class Packer
|
3
|
+
module Error
|
4
|
+
class DeadWorkers < StandardError; end
|
5
|
+
end
|
6
|
+
|
7
|
+
attr_reader :source, :target, :output
|
8
|
+
|
9
|
+
def initialize opts = {}
|
10
|
+
@source = opts[:source]
|
11
|
+
@target = opts[:target]
|
12
|
+
@output = opts[:output] || S3DataPacker::JSONBatch.new
|
13
|
+
end
|
14
|
+
|
15
|
+
def summary
|
16
|
+
@summary ||= S3DataPacker::Summary.new
|
17
|
+
end
|
18
|
+
|
19
|
+
def logger
|
20
|
+
@logger ||= S3DataPacker.logger
|
21
|
+
end
|
22
|
+
|
23
|
+
def workers
|
24
|
+
@workers ||= S3DataPacker::ThreadSet.new
|
25
|
+
end
|
26
|
+
|
27
|
+
def pack!
|
28
|
+
log "Packing data from #{source.name} to #{target.name} ..."
|
29
|
+
boot_workers!
|
30
|
+
|
31
|
+
@start_time = Time.now
|
32
|
+
begin
|
33
|
+
each_item { |item| workers.queue.add!(item) }
|
34
|
+
finalize_processing!
|
35
|
+
rescue Exception => e
|
36
|
+
log "Unexpected error, killing threads", :error
|
37
|
+
raise e
|
38
|
+
ensure
|
39
|
+
workers.kill!
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def process_item(data)
|
44
|
+
output.append_data! data
|
45
|
+
summary.count_processed
|
46
|
+
if output.full?
|
47
|
+
flush_batch!
|
48
|
+
output.new_file!
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
private
|
53
|
+
|
54
|
+
def finalize_processing!
|
55
|
+
log "No more items found to enqueue, signaling workers to finish"
|
56
|
+
workers.finish!
|
57
|
+
workers.wait!
|
58
|
+
workers.kill!
|
59
|
+
log "Pushing last open batch #{output.path}"
|
60
|
+
flush_batch!
|
61
|
+
summary.set_time(@start_time, Time.now)
|
62
|
+
log "Finished\n#{summary.flush!}"
|
63
|
+
end
|
64
|
+
|
65
|
+
def each_item &block
|
66
|
+
source.each do |item|
|
67
|
+
if workers.dead?
|
68
|
+
log "Workers diead", :error
|
69
|
+
raise Error::DeadWorkers, 'Workers died'
|
70
|
+
end
|
71
|
+
summary.count_item
|
72
|
+
yield item
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def flush_batch!
|
77
|
+
summary.count_batch
|
78
|
+
final_filename = output.finalize!
|
79
|
+
send_file!(final_filename)
|
80
|
+
end
|
81
|
+
|
82
|
+
def send_file!(file)
|
83
|
+
target.save_file file
|
84
|
+
end
|
85
|
+
|
86
|
+
def boot_workers!
|
87
|
+
output.new_file!
|
88
|
+
workers.spawn_threads! do |item|
|
89
|
+
data = source.fetch(item)
|
90
|
+
workers.lock.synchronize { process_item(data) }
|
91
|
+
post_process_item(item)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
def post_process_item(item)
|
96
|
+
# Do something with the key after processed
|
97
|
+
nil
|
98
|
+
end
|
99
|
+
|
100
|
+
def log(message, level = :info)
|
101
|
+
logger.send level, "Main process: #{message}"
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module S3DataPacker
|
2
|
+
class Queue
|
3
|
+
attr_reader :total_items, :items
|
4
|
+
|
5
|
+
def initialize opts = {}
|
6
|
+
@max_items = opts[:max_items]
|
7
|
+
@wait_time = opts[:wait_time]
|
8
|
+
@total_items = 0
|
9
|
+
end
|
10
|
+
|
11
|
+
def max_items
|
12
|
+
@max_items ||= S3DataPacker.config.max_queue_size
|
13
|
+
end
|
14
|
+
|
15
|
+
def wait_time
|
16
|
+
@wait_time ||= S3DataPacker.config.max_queue_wait
|
17
|
+
end
|
18
|
+
|
19
|
+
def items
|
20
|
+
@items ||= []
|
21
|
+
end
|
22
|
+
|
23
|
+
def add!(item)
|
24
|
+
items << item
|
25
|
+
@total_items += 1
|
26
|
+
if size >= max_items
|
27
|
+
S3DataPacker.logger.info "Queue full, pausing"
|
28
|
+
sleep(wait_time)
|
29
|
+
S3DataPacker.logger.info "Resuming queue"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def fetch!
|
34
|
+
items.shift
|
35
|
+
end
|
36
|
+
|
37
|
+
def size
|
38
|
+
items.size
|
39
|
+
end
|
40
|
+
|
41
|
+
def reset!
|
42
|
+
@items = []
|
43
|
+
@total_items = 0
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module S3DataPacker
|
2
|
+
module Sources
|
3
|
+
class Object
|
4
|
+
|
5
|
+
def initialize(object:, each_method: :each, fetch_method: :fetch, name_method: :name)
|
6
|
+
@object = object
|
7
|
+
@each_method = each_method
|
8
|
+
@fetch_method = fetch_method
|
9
|
+
@name_method = name_method
|
10
|
+
end
|
11
|
+
|
12
|
+
def name
|
13
|
+
@object.send(@name_method)
|
14
|
+
end
|
15
|
+
|
16
|
+
def each &block
|
17
|
+
@object.send(@each_method) do |item|
|
18
|
+
yield item
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def fetch(item)
|
23
|
+
@object.send(@fetch_method, item)
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module S3DataPacker
|
2
|
+
module Sources
|
3
|
+
|
4
|
+
class S3Bucket < S3DataPacker::Bucket
|
5
|
+
def name
|
6
|
+
"s3://#{bucket_name}/#{path}"
|
7
|
+
end
|
8
|
+
|
9
|
+
def each(&block)
|
10
|
+
each_key do |key|
|
11
|
+
yield key
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def fetch(key)
|
16
|
+
download(key)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
module S3DataPacker
|
2
|
+
class Summary
|
3
|
+
def stats
|
4
|
+
@stats ||= {}
|
5
|
+
end
|
6
|
+
|
7
|
+
def count_item
|
8
|
+
stats[:total_items] ||= 0
|
9
|
+
stats[:total_items] += 1
|
10
|
+
end
|
11
|
+
|
12
|
+
def count_processed
|
13
|
+
stats[:processed] ||= 0
|
14
|
+
stats[:processed] += 1
|
15
|
+
end
|
16
|
+
|
17
|
+
def count_batch
|
18
|
+
stats[:batches] ||= 0
|
19
|
+
stats[:batches] += 1
|
20
|
+
end
|
21
|
+
|
22
|
+
def set_time start_time, end_time
|
23
|
+
stats[:elapsed] = "#{(end_time.to_i - start_time.to_i)} seconds"
|
24
|
+
end
|
25
|
+
|
26
|
+
def total_items
|
27
|
+
stats[:total_items] || 0
|
28
|
+
end
|
29
|
+
|
30
|
+
def processed
|
31
|
+
stats[:processed] || 0
|
32
|
+
end
|
33
|
+
|
34
|
+
def batches
|
35
|
+
stats[:batches] || 0
|
36
|
+
end
|
37
|
+
|
38
|
+
def elapsed
|
39
|
+
stats[:elapsed]
|
40
|
+
end
|
41
|
+
|
42
|
+
def flush!
|
43
|
+
output = [
|
44
|
+
"Summary:",
|
45
|
+
"Total Items: #{stats[:total_items]}",
|
46
|
+
"Processed Items: #{stats[:processed]}",
|
47
|
+
"Batches: #{stats[:batches]}",
|
48
|
+
"Elapsed: #{stats[:elapsed]}"
|
49
|
+
].join("\n")
|
50
|
+
reset!
|
51
|
+
output
|
52
|
+
end
|
53
|
+
|
54
|
+
def reset!
|
55
|
+
@stats = {}
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module S3DataPacker
|
2
|
+
module Targets
|
3
|
+
class Object
|
4
|
+
|
5
|
+
def initialize(object:, name_method: :each, save_file_method: :save_file)
|
6
|
+
@object = object
|
7
|
+
@name_method = name_method
|
8
|
+
@save_file_method = save_file_method
|
9
|
+
end
|
10
|
+
|
11
|
+
def name
|
12
|
+
@object.send(@name_method)
|
13
|
+
end
|
14
|
+
|
15
|
+
def save_file(filepath)
|
16
|
+
@object.send(@save_file_method, filepath)
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module S3DataPacker
|
2
|
+
module Targets
|
3
|
+
|
4
|
+
class S3Bucket < S3DataPacker::Bucket
|
5
|
+
def name
|
6
|
+
"s3://#{bucket_name}/#{path}"
|
7
|
+
end
|
8
|
+
|
9
|
+
def save_file(filepath)
|
10
|
+
upload(filepath)
|
11
|
+
File.delete(filepath) if S3DataPacker.config.cleanup_batch?
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,98 @@
|
|
1
|
+
module S3DataPacker
|
2
|
+
class ThreadSet
|
3
|
+
attr_reader :lock, :workers, :queue
|
4
|
+
|
5
|
+
def initialize opts ={}
|
6
|
+
@lock = Mutex.new
|
7
|
+
@workers = []
|
8
|
+
@finish = false
|
9
|
+
@queue = S3DataPacker::Queue.new
|
10
|
+
end
|
11
|
+
|
12
|
+
def wait_time
|
13
|
+
@wait_time ||= S3DataPacker.config.thread_sleep_time
|
14
|
+
end
|
15
|
+
|
16
|
+
def lock_wait_time
|
17
|
+
@lock_wait_time ||= S3DataPacker.config.thread_lock_wait_time
|
18
|
+
end
|
19
|
+
|
20
|
+
def thread_count
|
21
|
+
@thread_count ||= S3DataPacker.config.thread_count
|
22
|
+
end
|
23
|
+
|
24
|
+
def dead?
|
25
|
+
workers.map(&:status).uniq == [nil] || workers.map(&:status).uniq == [false]
|
26
|
+
end
|
27
|
+
|
28
|
+
def kill!
|
29
|
+
log 'All', "Killing #{workers.size} workers"
|
30
|
+
workers.map(&:kill)
|
31
|
+
end
|
32
|
+
|
33
|
+
def reset!
|
34
|
+
return unless dead?
|
35
|
+
@finish = false
|
36
|
+
@workers = []
|
37
|
+
end
|
38
|
+
|
39
|
+
def finish!
|
40
|
+
@finish = true
|
41
|
+
end
|
42
|
+
|
43
|
+
def finished?
|
44
|
+
@finish == true && queue.size == 0
|
45
|
+
end
|
46
|
+
|
47
|
+
def log id, message, level = :info
|
48
|
+
logger.send level, "Thread #{id}: #{message}"
|
49
|
+
end
|
50
|
+
|
51
|
+
def wait!
|
52
|
+
workers.map(&:join)
|
53
|
+
end
|
54
|
+
|
55
|
+
def spawn_thread! id, &block
|
56
|
+
@workers << Thread.new do
|
57
|
+
log id, "Started!"
|
58
|
+
loop do
|
59
|
+
if finished?
|
60
|
+
log id, "Finish signal up and no more work to pull - Exiting"
|
61
|
+
break
|
62
|
+
end
|
63
|
+
item = queue.fetch!
|
64
|
+
if item
|
65
|
+
log id, "Processing item #{item}", :debug
|
66
|
+
begin
|
67
|
+
yield item
|
68
|
+
rescue ThreadError
|
69
|
+
log id, "Locked, retry in #{lock_wait_time}", :warn
|
70
|
+
sleep(lock_wait_time)
|
71
|
+
retry
|
72
|
+
end
|
73
|
+
else
|
74
|
+
log id, "No more work found, sleeping for #{wait_time}"
|
75
|
+
sleep(wait_time)
|
76
|
+
end
|
77
|
+
rescue Exception => e
|
78
|
+
log id, 'Unexpected error!'
|
79
|
+
raise e
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def spawn_threads! &block
|
85
|
+
logger.info "Spawning #{thread_count} threads"
|
86
|
+
thread_count.times do |id|
|
87
|
+
spawn_thread!(id, &block)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
private
|
92
|
+
|
93
|
+
def logger
|
94
|
+
@logger ||= S3DataPacker.logger
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'csv'
|
2
|
+
require 'json'
|
3
|
+
require 'logger'
|
4
|
+
require 'mime/types/full'
|
5
|
+
require 'aws-sdk-s3'
|
6
|
+
|
7
|
+
require "s3_data_packer/version"
|
8
|
+
require 's3_data_packer/configuration'
|
9
|
+
require 's3_data_packer/packer'
|
10
|
+
require 's3_data_packer/queue'
|
11
|
+
require 's3_data_packer/thread_set'
|
12
|
+
require 's3_data_packer/summary'
|
13
|
+
require 's3_data_packer/json_batch'
|
14
|
+
require 's3_data_packer/bucket'
|
15
|
+
require 's3_data_packer/filename_generator'
|
16
|
+
|
17
|
+
require 's3_data_packer/sources/object'
|
18
|
+
require 's3_data_packer/sources/s3_bucket'
|
19
|
+
|
20
|
+
require 's3_data_packer/targets/s3_bucket'
|
21
|
+
require 's3_data_packer/targets/object'
|
22
|
+
|
23
|
+
module S3DataPacker
|
24
|
+
class << self
|
25
|
+
attr_reader :configuration
|
26
|
+
|
27
|
+
def configuration
|
28
|
+
@configuration ||= Configuration.new
|
29
|
+
end
|
30
|
+
|
31
|
+
alias config configuration
|
32
|
+
|
33
|
+
def configure
|
34
|
+
yield configuration
|
35
|
+
end
|
36
|
+
|
37
|
+
def logger
|
38
|
+
@logger ||= config.logger || Logger.new('log/s3_data_packer.log')
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|