file_worker 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +0 -0
- data/bin/file_worker +17 -0
- data/lib/file_worker/cli.rb +48 -0
- data/lib/file_worker/default_worker.rb +11 -0
- data/lib/file_worker/directory_scanner.rb +100 -0
- data/lib/file_worker/s3_upload_worker.rb +34 -0
- data/lib/file_worker/version.rb +3 -0
- data/lib/file_worker.rb +7 -0
- data/test/directory_scanner_test.rb +126 -0
- data/test/s3_upload_worker_test.rb +35 -0
- data/test/test_helper.rb +37 -0
- metadata +132 -0
data/README.md
ADDED
File without changes
|
data/bin/file_worker
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "rubygems"
|
4
|
+
require "bundler"
|
5
|
+
Bundler.require
|
6
|
+
|
7
|
+
require 'file_worker/cli'
|
8
|
+
|
9
|
+
scanner = FileWorker::Cli.build_scanner
|
10
|
+
|
11
|
+
scanner.on_error do |file_name, exception|
|
12
|
+
$stderr.puts("Failed to process #{file_name}")
|
13
|
+
$stderr.puts(ex)
|
14
|
+
$stderr.puts(ex.backtrace.join("\n"))
|
15
|
+
end
|
16
|
+
|
17
|
+
scanner.start
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
|
3
|
+
module FileWorker
|
4
|
+
class Cli
|
5
|
+
def self.build_scanner
|
6
|
+
options = {
|
7
|
+
:workers => 5
|
8
|
+
}
|
9
|
+
|
10
|
+
OptionParser.new do |opts|
|
11
|
+
opts.banner = "Usage: #{$0} [options]"
|
12
|
+
|
13
|
+
opts.on("-w", "--workers [NUMBER]", "The number of worker threads") do |workers|
|
14
|
+
options[:workers] = workers.to_i
|
15
|
+
end
|
16
|
+
|
17
|
+
opts.on("-o", "--out [DIRECTORY]", "The directory to put the files when they have been processed") do |out|
|
18
|
+
options[:out_directory] = out
|
19
|
+
end
|
20
|
+
|
21
|
+
opts.on("-s", "--sleep [NUMBER]", Float, "The number of seconds to sleep between scanning the in-directory") do |sleep_time|
|
22
|
+
options[:sleep] = sleep_time
|
23
|
+
end
|
24
|
+
|
25
|
+
opts.on("-q", "--queuesize [NUMBER]", "The maximum queue size to keep in memory") do |max_queue_size|
|
26
|
+
options[:max_queue_size] = max_queue_size.to_i
|
27
|
+
end
|
28
|
+
|
29
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
30
|
+
puts opts
|
31
|
+
exit
|
32
|
+
end
|
33
|
+
end.parse!
|
34
|
+
|
35
|
+
if root = ARGV[0]
|
36
|
+
options[:in_directory] = File.expand_path(root, Dir.pwd)
|
37
|
+
else
|
38
|
+
options[:in_directory] = Dir.pwd
|
39
|
+
end
|
40
|
+
|
41
|
+
options[:out_directory] ||= File.expand_path('../done', options[:in_directory])
|
42
|
+
|
43
|
+
puts options.inspect
|
44
|
+
|
45
|
+
DirectoryScanner.new(options)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
require 'girl_friday'
|
2
|
+
require 'fileutils'
|
3
|
+
require 'jruby/synchronized'
|
4
|
+
require 'pathname'
|
5
|
+
|
6
|
+
module FileWorker
|
7
|
+
class DirectoryScanner
|
8
|
+
attr_accessor :worker_class
|
9
|
+
attr_reader :in_path, :done_path, :state
|
10
|
+
|
11
|
+
def initialize(options)
|
12
|
+
@options = options
|
13
|
+
@in_path = Pathname.new(@options[:in_directory])
|
14
|
+
@done_path = Pathname.new(@options[:out_directory])
|
15
|
+
@sleep = @options[:sleep] || 1
|
16
|
+
|
17
|
+
@max_queue_size = @options[:max_queue_size] || 1000
|
18
|
+
|
19
|
+
@worker_class = DefaultWorker
|
20
|
+
|
21
|
+
@state = {}
|
22
|
+
@state.extend JRuby::Synchronized
|
23
|
+
@queue_name = "file_worker"
|
24
|
+
|
25
|
+
@error_handlers = []
|
26
|
+
end
|
27
|
+
|
28
|
+
# This method is called by the worker threads, so remember to keep it thread safe
|
29
|
+
def process(file_name)
|
30
|
+
@state[file_name] = {:time => Time.now, :status => :working}
|
31
|
+
|
32
|
+
begin
|
33
|
+
@worker_class.new(file_name, @options).process
|
34
|
+
FileUtils.mv(file_name, @done_path)
|
35
|
+
rescue Exception => e
|
36
|
+
handle_error(file_name, e)
|
37
|
+
end
|
38
|
+
|
39
|
+
@state.delete(file_name)
|
40
|
+
end
|
41
|
+
|
42
|
+
def queue
|
43
|
+
@queue ||= GirlFriday::WorkQueue.new(@queue_name, :size => 3) do |file_name|
|
44
|
+
process(file_name)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def queue_size
|
49
|
+
queue.status[@queue_name][:backlog]
|
50
|
+
end
|
51
|
+
|
52
|
+
def enqueue(file_name)
|
53
|
+
@state[file_name] = {:time => Time.now, :status => :enqueued}
|
54
|
+
|
55
|
+
queue.push(file_name)
|
56
|
+
end
|
57
|
+
|
58
|
+
def wait_for_empty
|
59
|
+
queue.wait_for_empty
|
60
|
+
|
61
|
+
sleep 0.5
|
62
|
+
|
63
|
+
while queue.status[@queue_name][:busy] != 0
|
64
|
+
sleep 0.5
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def scan
|
69
|
+
file_names = Dir.glob(@in_path + '*') - @state.keys
|
70
|
+
|
71
|
+
max_items = @max_queue_size - queue_size
|
72
|
+
|
73
|
+
file_names[0,max_items].each do |file_name|
|
74
|
+
enqueue(file_name)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def start
|
79
|
+
@run = true
|
80
|
+
while @run
|
81
|
+
scan
|
82
|
+
sleep(@sleep)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def stop
|
87
|
+
@run = false
|
88
|
+
end
|
89
|
+
|
90
|
+
def on_error(&block)
|
91
|
+
@error_handlers << block
|
92
|
+
end
|
93
|
+
|
94
|
+
def handle_error(file_name, exception)
|
95
|
+
@error_handlers.each do |error_handler|
|
96
|
+
error_handler.call(file_name, exception)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require "fog"
|
2
|
+
|
3
|
+
module FileWorker
|
4
|
+
class S3UploadWorker
|
5
|
+
def initialize(file_name, options)
|
6
|
+
@file_name = file_name
|
7
|
+
@bucket_name = ENV.fetch("S3_BUCKET")
|
8
|
+
@aws_key_id = ENV.fetch("AWS_ACCESS_KEY_ID")
|
9
|
+
@aws_secret = ENV.fetch("AWS_SECRET_ACCESS_KEY")
|
10
|
+
end
|
11
|
+
|
12
|
+
def process
|
13
|
+
bucket.files.create(
|
14
|
+
:key => File.basename(@file_name),
|
15
|
+
:body => File.open(@file_name),
|
16
|
+
:public => false
|
17
|
+
)
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def connection
|
23
|
+
@connection ||= Fog::Storage.new(
|
24
|
+
:provider => "AWS",
|
25
|
+
:aws_access_key_id => @aws_key_id,
|
26
|
+
:aws_secret_access_key => @aws_secret
|
27
|
+
)
|
28
|
+
end
|
29
|
+
|
30
|
+
def bucket
|
31
|
+
connection.directories.get(@bucket_name)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
data/lib/file_worker.rb
ADDED
@@ -0,0 +1,126 @@
|
|
1
|
+
require File.expand_path("test_helper", File.dirname(__FILE__))
|
2
|
+
require 'jruby/synchronized'
|
3
|
+
|
4
|
+
class FailingTestWorker
|
5
|
+
def initialize(file_name, options)
|
6
|
+
@file_name = file_name
|
7
|
+
end
|
8
|
+
|
9
|
+
def process
|
10
|
+
raise @file_name
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
describe "the directory scanner" do
|
15
|
+
before do
|
16
|
+
prepare_fixture_files
|
17
|
+
@file_worker = FileWorker::DirectoryScanner.new(
|
18
|
+
:in_directory => @fixture_root + 'in',
|
19
|
+
:out_directory => @fixture_root + 'done',
|
20
|
+
:max_queue_size => 10
|
21
|
+
)
|
22
|
+
|
23
|
+
@errors = []
|
24
|
+
@errors.extend JRuby::Synchronized
|
25
|
+
|
26
|
+
@file_worker.on_error do |file_name, exception|
|
27
|
+
@errors << file_name
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
it "should enqueue the files in the in directory" do
|
32
|
+
@file_worker.queue.expects(:push).times(5)
|
33
|
+
@file_worker.scan
|
34
|
+
@file_worker.wait_for_empty
|
35
|
+
end
|
36
|
+
|
37
|
+
it "should move the files to the out directory" do
|
38
|
+
@file_worker.scan
|
39
|
+
@file_worker.wait_for_empty
|
40
|
+
|
41
|
+
@file_worker.in_path.children.must_be_empty
|
42
|
+
@file_worker.done_path.children.size.must_equal(5)
|
43
|
+
end
|
44
|
+
|
45
|
+
it "should not enqueue the this same files twice" do
|
46
|
+
@file_worker.queue.expects(:push).times(5)
|
47
|
+
@file_worker.scan
|
48
|
+
@file_worker.wait_for_empty
|
49
|
+
@file_worker.scan
|
50
|
+
@file_worker.wait_for_empty
|
51
|
+
end
|
52
|
+
|
53
|
+
describe "when the worker queue is empty" do
|
54
|
+
before do
|
55
|
+
@file_worker.stubs(:queue_size).returns(0)
|
56
|
+
end
|
57
|
+
|
58
|
+
it "should only enqueue 10 items" do
|
59
|
+
prepare_fixture_files(20)
|
60
|
+
@file_worker.queue.expects(:push).times(10)
|
61
|
+
@file_worker.scan
|
62
|
+
@file_worker.wait_for_empty
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
describe "when the worker queue has some items in it" do
|
67
|
+
before do
|
68
|
+
@file_worker.stubs(:queue_size).returns(3)
|
69
|
+
end
|
70
|
+
|
71
|
+
it "should not try to over fill the queue" do
|
72
|
+
prepare_fixture_files(20)
|
73
|
+
@file_worker.queue.expects(:push).times(7)
|
74
|
+
@file_worker.scan
|
75
|
+
@file_worker.wait_for_empty
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
describe "when the worker queue is full" do
|
80
|
+
before do
|
81
|
+
@file_worker.stubs(:queue_size).returns(10)
|
82
|
+
end
|
83
|
+
|
84
|
+
it "should not enqueue anything" do
|
85
|
+
prepare_fixture_files(20)
|
86
|
+
@file_worker.queue.expects(:push).never
|
87
|
+
@file_worker.scan
|
88
|
+
@file_worker.wait_for_empty
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
describe "when a file is already enqueued" do
|
93
|
+
before do
|
94
|
+
file_name = Dir.glob(@file_worker.in_path + '*').first
|
95
|
+
@file_worker.state[file_name] = {:time => Time.now, :status => :working}
|
96
|
+
end
|
97
|
+
|
98
|
+
it "should not enqueue the file again" do
|
99
|
+
@file_worker.queue.expects(:push).times(4)
|
100
|
+
@file_worker.scan
|
101
|
+
@file_worker.wait_for_empty
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
describe "when jobs fail" do
|
106
|
+
before do
|
107
|
+
@file_worker.worker_class = FailingTestWorker
|
108
|
+
@file_worker.scan
|
109
|
+
@file_worker.wait_for_empty
|
110
|
+
end
|
111
|
+
|
112
|
+
it "should call the error handler" do
|
113
|
+
@errors.size.must_equal(5)
|
114
|
+
end
|
115
|
+
|
116
|
+
it "should not move the files" do
|
117
|
+
@file_worker.done_path.children.must_be_empty
|
118
|
+
@file_worker.in_path.children.size.must_equal(5)
|
119
|
+
end
|
120
|
+
|
121
|
+
it "should remove the files from state" do
|
122
|
+
@file_worker.state.must_be_empty
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require File.expand_path("test_helper", File.dirname(__FILE__))
|
2
|
+
|
3
|
+
describe "an s3 upload worker" do
|
4
|
+
before do
|
5
|
+
@file_name = "/path/to/file.name"
|
6
|
+
end
|
7
|
+
|
8
|
+
it "should fail when not provided auth options" do
|
9
|
+
assert_raises(IndexError) { FileWorker::S3UploadWorker.new(@file_name, {}) }
|
10
|
+
end
|
11
|
+
|
12
|
+
describe "instantiated correctly" do
|
13
|
+
before do
|
14
|
+
ENV["S3_BUCKET"] = "files"
|
15
|
+
ENV["AWS_ACCESS_KEY_ID"] = "some key"
|
16
|
+
ENV["AWS_SECRET_ACCESS_KEY"] = "other key"
|
17
|
+
|
18
|
+
@worker = FileWorker::S3UploadWorker.new(@file_name, {})
|
19
|
+
end
|
20
|
+
|
21
|
+
describe "process" do
|
22
|
+
it "should attempt to transfer to the location" do
|
23
|
+
file = stub()
|
24
|
+
File.expects(:open).with(@file_name).returns(file)
|
25
|
+
|
26
|
+
files = stub()
|
27
|
+
bucket = stub(:files => files)
|
28
|
+
files.expects(:create).with(:key => File.basename(@file_name), :body => file, :public => false)
|
29
|
+
|
30
|
+
@worker.expects(:bucket).returns(bucket)
|
31
|
+
@worker.process
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
$testing = true
|
2
|
+
|
3
|
+
require "rubygems"
|
4
|
+
require "bundler"
|
5
|
+
Bundler.require
|
6
|
+
|
7
|
+
require "minitest/autorun"
|
8
|
+
require "mocha"
|
9
|
+
require "pathname"
|
10
|
+
require "fileutils"
|
11
|
+
|
12
|
+
MiniTest::Spec.class_eval do
|
13
|
+
before do
|
14
|
+
@fixture_root = Pathname.new(File.dirname(__FILE__)) + "../tmp/fixtures"
|
15
|
+
FileUtils.mkdir_p(@fixture_root + "in")
|
16
|
+
FileUtils.mkdir_p(@fixture_root + "done")
|
17
|
+
clean_fixture_dirs
|
18
|
+
end
|
19
|
+
|
20
|
+
def clean_fixture_dirs
|
21
|
+
FileUtils.rm_rf(Dir.glob(@fixture_root + "in/*"))
|
22
|
+
FileUtils.rm_rf(Dir.glob(@fixture_root + "done/*"))
|
23
|
+
end
|
24
|
+
|
25
|
+
def prepare_fixture_files(in_files = 5, done_files = 0)
|
26
|
+
create_fixture_files(@fixture_root + "in", in_files)
|
27
|
+
create_fixture_files(@fixture_root + "done", done_files)
|
28
|
+
end
|
29
|
+
|
30
|
+
def create_fixture_files(path, number)
|
31
|
+
number.times do |n|
|
32
|
+
File.open(path + n.to_s, "w") do |file|
|
33
|
+
file.write(n.to_s)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
metadata
ADDED
@@ -0,0 +1,132 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: file_worker
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease:
|
5
|
+
version: 0.0.1
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Mick Staugaard
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
|
13
|
+
date: 2012-01-26 00:00:00 Z
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: rake
|
17
|
+
prerelease: false
|
18
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
19
|
+
none: false
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
type: :development
|
25
|
+
version_requirements: *id001
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: minitest
|
28
|
+
prerelease: false
|
29
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
30
|
+
none: false
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: "0"
|
35
|
+
type: :development
|
36
|
+
version_requirements: *id002
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: mocha
|
39
|
+
prerelease: false
|
40
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: "0"
|
46
|
+
type: :development
|
47
|
+
version_requirements: *id003
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
name: jruby-openssl
|
50
|
+
prerelease: false
|
51
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
52
|
+
none: false
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: "0"
|
57
|
+
type: :runtime
|
58
|
+
version_requirements: *id004
|
59
|
+
- !ruby/object:Gem::Dependency
|
60
|
+
name: girl_friday
|
61
|
+
prerelease: false
|
62
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
63
|
+
none: false
|
64
|
+
requirements:
|
65
|
+
- - ">="
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: "0"
|
68
|
+
type: :runtime
|
69
|
+
version_requirements: *id005
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: fog
|
72
|
+
prerelease: false
|
73
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
74
|
+
none: false
|
75
|
+
requirements:
|
76
|
+
- - ">="
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: "0"
|
79
|
+
type: :runtime
|
80
|
+
version_requirements: *id006
|
81
|
+
description: If you have files that you some how need to process, file_worker is your friend.
|
82
|
+
email:
|
83
|
+
- mick@staugaard.com
|
84
|
+
executables:
|
85
|
+
- file_worker
|
86
|
+
extensions: []
|
87
|
+
|
88
|
+
extra_rdoc_files: []
|
89
|
+
|
90
|
+
files:
|
91
|
+
- lib/file_worker.rb
|
92
|
+
- lib/file_worker/cli.rb
|
93
|
+
- lib/file_worker/default_worker.rb
|
94
|
+
- lib/file_worker/directory_scanner.rb
|
95
|
+
- lib/file_worker/s3_upload_worker.rb
|
96
|
+
- lib/file_worker/version.rb
|
97
|
+
- README.md
|
98
|
+
- test/directory_scanner_test.rb
|
99
|
+
- test/s3_upload_worker_test.rb
|
100
|
+
- test/test_helper.rb
|
101
|
+
- bin/file_worker
|
102
|
+
homepage: ""
|
103
|
+
licenses: []
|
104
|
+
|
105
|
+
post_install_message:
|
106
|
+
rdoc_options: []
|
107
|
+
|
108
|
+
require_paths:
|
109
|
+
- lib
|
110
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
111
|
+
none: false
|
112
|
+
requirements:
|
113
|
+
- - ">="
|
114
|
+
- !ruby/object:Gem::Version
|
115
|
+
version: "0"
|
116
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
117
|
+
none: false
|
118
|
+
requirements:
|
119
|
+
- - ">="
|
120
|
+
- !ruby/object:Gem::Version
|
121
|
+
version: "0"
|
122
|
+
requirements: []
|
123
|
+
|
124
|
+
rubyforge_project:
|
125
|
+
rubygems_version: 1.8.15
|
126
|
+
signing_key:
|
127
|
+
specification_version: 3
|
128
|
+
summary: A multi-threaded worker that takes files as input
|
129
|
+
test_files:
|
130
|
+
- test/directory_scanner_test.rb
|
131
|
+
- test/s3_upload_worker_test.rb
|
132
|
+
- test/test_helper.rb
|