file_worker 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +0 -0
- data/bin/file_worker +17 -0
- data/lib/file_worker/cli.rb +48 -0
- data/lib/file_worker/default_worker.rb +11 -0
- data/lib/file_worker/directory_scanner.rb +100 -0
- data/lib/file_worker/s3_upload_worker.rb +34 -0
- data/lib/file_worker/version.rb +3 -0
- data/lib/file_worker.rb +7 -0
- data/test/directory_scanner_test.rb +126 -0
- data/test/s3_upload_worker_test.rb +35 -0
- data/test/test_helper.rb +37 -0
- metadata +132 -0
data/README.md
ADDED
File without changes
|
data/bin/file_worker
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "rubygems"
|
4
|
+
require "bundler"
|
5
|
+
Bundler.require
|
6
|
+
|
7
|
+
require 'file_worker/cli'
|
8
|
+
|
9
|
+
scanner = FileWorker::Cli.build_scanner
|
10
|
+
|
11
|
+
scanner.on_error do |file_name, exception|
|
12
|
+
$stderr.puts("Failed to process #{file_name}")
|
13
|
+
$stderr.puts(ex)
|
14
|
+
$stderr.puts(ex.backtrace.join("\n"))
|
15
|
+
end
|
16
|
+
|
17
|
+
scanner.start
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'optparse'
|
2
|
+
|
3
|
+
module FileWorker
|
4
|
+
class Cli
|
5
|
+
def self.build_scanner
|
6
|
+
options = {
|
7
|
+
:workers => 5
|
8
|
+
}
|
9
|
+
|
10
|
+
OptionParser.new do |opts|
|
11
|
+
opts.banner = "Usage: #{$0} [options]"
|
12
|
+
|
13
|
+
opts.on("-w", "--workers [NUMBER]", "The number of worker threads") do |workers|
|
14
|
+
options[:workers] = workers.to_i
|
15
|
+
end
|
16
|
+
|
17
|
+
opts.on("-o", "--out [DIRECTORY]", "The directory to put the files when they have been processed") do |out|
|
18
|
+
options[:out_directory] = out
|
19
|
+
end
|
20
|
+
|
21
|
+
opts.on("-s", "--sleep [NUMBER]", Float, "The number of seconds to sleep between scanning the in-directory") do |sleep_time|
|
22
|
+
options[:sleep] = sleep_time
|
23
|
+
end
|
24
|
+
|
25
|
+
opts.on("-q", "--queuesize [NUMBER]", "The maximum queue size to keep in memory") do |max_queue_size|
|
26
|
+
options[:max_queue_size] = max_queue_size.to_i
|
27
|
+
end
|
28
|
+
|
29
|
+
opts.on_tail("-h", "--help", "Show this message") do
|
30
|
+
puts opts
|
31
|
+
exit
|
32
|
+
end
|
33
|
+
end.parse!
|
34
|
+
|
35
|
+
if root = ARGV[0]
|
36
|
+
options[:in_directory] = File.expand_path(root, Dir.pwd)
|
37
|
+
else
|
38
|
+
options[:in_directory] = Dir.pwd
|
39
|
+
end
|
40
|
+
|
41
|
+
options[:out_directory] ||= File.expand_path('../done', options[:in_directory])
|
42
|
+
|
43
|
+
puts options.inspect
|
44
|
+
|
45
|
+
DirectoryScanner.new(options)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
require 'girl_friday'
|
2
|
+
require 'fileutils'
|
3
|
+
require 'jruby/synchronized'
|
4
|
+
require 'pathname'
|
5
|
+
|
6
|
+
module FileWorker
|
7
|
+
class DirectoryScanner
|
8
|
+
attr_accessor :worker_class
|
9
|
+
attr_reader :in_path, :done_path, :state
|
10
|
+
|
11
|
+
def initialize(options)
|
12
|
+
@options = options
|
13
|
+
@in_path = Pathname.new(@options[:in_directory])
|
14
|
+
@done_path = Pathname.new(@options[:out_directory])
|
15
|
+
@sleep = @options[:sleep] || 1
|
16
|
+
|
17
|
+
@max_queue_size = @options[:max_queue_size] || 1000
|
18
|
+
|
19
|
+
@worker_class = DefaultWorker
|
20
|
+
|
21
|
+
@state = {}
|
22
|
+
@state.extend JRuby::Synchronized
|
23
|
+
@queue_name = "file_worker"
|
24
|
+
|
25
|
+
@error_handlers = []
|
26
|
+
end
|
27
|
+
|
28
|
+
# This method is called by the worker threads, so remember to keep it thread safe
|
29
|
+
def process(file_name)
|
30
|
+
@state[file_name] = {:time => Time.now, :status => :working}
|
31
|
+
|
32
|
+
begin
|
33
|
+
@worker_class.new(file_name, @options).process
|
34
|
+
FileUtils.mv(file_name, @done_path)
|
35
|
+
rescue Exception => e
|
36
|
+
handle_error(file_name, e)
|
37
|
+
end
|
38
|
+
|
39
|
+
@state.delete(file_name)
|
40
|
+
end
|
41
|
+
|
42
|
+
def queue
|
43
|
+
@queue ||= GirlFriday::WorkQueue.new(@queue_name, :size => 3) do |file_name|
|
44
|
+
process(file_name)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def queue_size
|
49
|
+
queue.status[@queue_name][:backlog]
|
50
|
+
end
|
51
|
+
|
52
|
+
def enqueue(file_name)
|
53
|
+
@state[file_name] = {:time => Time.now, :status => :enqueued}
|
54
|
+
|
55
|
+
queue.push(file_name)
|
56
|
+
end
|
57
|
+
|
58
|
+
def wait_for_empty
|
59
|
+
queue.wait_for_empty
|
60
|
+
|
61
|
+
sleep 0.5
|
62
|
+
|
63
|
+
while queue.status[@queue_name][:busy] != 0
|
64
|
+
sleep 0.5
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def scan
|
69
|
+
file_names = Dir.glob(@in_path + '*') - @state.keys
|
70
|
+
|
71
|
+
max_items = @max_queue_size - queue_size
|
72
|
+
|
73
|
+
file_names[0,max_items].each do |file_name|
|
74
|
+
enqueue(file_name)
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def start
|
79
|
+
@run = true
|
80
|
+
while @run
|
81
|
+
scan
|
82
|
+
sleep(@sleep)
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def stop
|
87
|
+
@run = false
|
88
|
+
end
|
89
|
+
|
90
|
+
def on_error(&block)
|
91
|
+
@error_handlers << block
|
92
|
+
end
|
93
|
+
|
94
|
+
def handle_error(file_name, exception)
|
95
|
+
@error_handlers.each do |error_handler|
|
96
|
+
error_handler.call(file_name, exception)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
require "fog"
|
2
|
+
|
3
|
+
module FileWorker
|
4
|
+
class S3UploadWorker
|
5
|
+
def initialize(file_name, options)
|
6
|
+
@file_name = file_name
|
7
|
+
@bucket_name = ENV.fetch("S3_BUCKET")
|
8
|
+
@aws_key_id = ENV.fetch("AWS_ACCESS_KEY_ID")
|
9
|
+
@aws_secret = ENV.fetch("AWS_SECRET_ACCESS_KEY")
|
10
|
+
end
|
11
|
+
|
12
|
+
def process
|
13
|
+
bucket.files.create(
|
14
|
+
:key => File.basename(@file_name),
|
15
|
+
:body => File.open(@file_name),
|
16
|
+
:public => false
|
17
|
+
)
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def connection
|
23
|
+
@connection ||= Fog::Storage.new(
|
24
|
+
:provider => "AWS",
|
25
|
+
:aws_access_key_id => @aws_key_id,
|
26
|
+
:aws_secret_access_key => @aws_secret
|
27
|
+
)
|
28
|
+
end
|
29
|
+
|
30
|
+
def bucket
|
31
|
+
connection.directories.get(@bucket_name)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
data/lib/file_worker.rb
ADDED
@@ -0,0 +1,126 @@
|
|
1
|
+
require File.expand_path("test_helper", File.dirname(__FILE__))
|
2
|
+
require 'jruby/synchronized'
|
3
|
+
|
4
|
+
class FailingTestWorker
|
5
|
+
def initialize(file_name, options)
|
6
|
+
@file_name = file_name
|
7
|
+
end
|
8
|
+
|
9
|
+
def process
|
10
|
+
raise @file_name
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
describe "the directory scanner" do
|
15
|
+
before do
|
16
|
+
prepare_fixture_files
|
17
|
+
@file_worker = FileWorker::DirectoryScanner.new(
|
18
|
+
:in_directory => @fixture_root + 'in',
|
19
|
+
:out_directory => @fixture_root + 'done',
|
20
|
+
:max_queue_size => 10
|
21
|
+
)
|
22
|
+
|
23
|
+
@errors = []
|
24
|
+
@errors.extend JRuby::Synchronized
|
25
|
+
|
26
|
+
@file_worker.on_error do |file_name, exception|
|
27
|
+
@errors << file_name
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
it "should enqueue the files in the in directory" do
|
32
|
+
@file_worker.queue.expects(:push).times(5)
|
33
|
+
@file_worker.scan
|
34
|
+
@file_worker.wait_for_empty
|
35
|
+
end
|
36
|
+
|
37
|
+
it "should move the files to the out directory" do
|
38
|
+
@file_worker.scan
|
39
|
+
@file_worker.wait_for_empty
|
40
|
+
|
41
|
+
@file_worker.in_path.children.must_be_empty
|
42
|
+
@file_worker.done_path.children.size.must_equal(5)
|
43
|
+
end
|
44
|
+
|
45
|
+
it "should not enqueue the this same files twice" do
|
46
|
+
@file_worker.queue.expects(:push).times(5)
|
47
|
+
@file_worker.scan
|
48
|
+
@file_worker.wait_for_empty
|
49
|
+
@file_worker.scan
|
50
|
+
@file_worker.wait_for_empty
|
51
|
+
end
|
52
|
+
|
53
|
+
describe "when the worker queue is empty" do
|
54
|
+
before do
|
55
|
+
@file_worker.stubs(:queue_size).returns(0)
|
56
|
+
end
|
57
|
+
|
58
|
+
it "should only enqueue 10 items" do
|
59
|
+
prepare_fixture_files(20)
|
60
|
+
@file_worker.queue.expects(:push).times(10)
|
61
|
+
@file_worker.scan
|
62
|
+
@file_worker.wait_for_empty
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
describe "when the worker queue has some items in it" do
|
67
|
+
before do
|
68
|
+
@file_worker.stubs(:queue_size).returns(3)
|
69
|
+
end
|
70
|
+
|
71
|
+
it "should not try to over fill the queue" do
|
72
|
+
prepare_fixture_files(20)
|
73
|
+
@file_worker.queue.expects(:push).times(7)
|
74
|
+
@file_worker.scan
|
75
|
+
@file_worker.wait_for_empty
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
describe "when the worker queue is full" do
|
80
|
+
before do
|
81
|
+
@file_worker.stubs(:queue_size).returns(10)
|
82
|
+
end
|
83
|
+
|
84
|
+
it "should not enqueue anything" do
|
85
|
+
prepare_fixture_files(20)
|
86
|
+
@file_worker.queue.expects(:push).never
|
87
|
+
@file_worker.scan
|
88
|
+
@file_worker.wait_for_empty
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
describe "when a file is already enqueued" do
|
93
|
+
before do
|
94
|
+
file_name = Dir.glob(@file_worker.in_path + '*').first
|
95
|
+
@file_worker.state[file_name] = {:time => Time.now, :status => :working}
|
96
|
+
end
|
97
|
+
|
98
|
+
it "should not enqueue the file again" do
|
99
|
+
@file_worker.queue.expects(:push).times(4)
|
100
|
+
@file_worker.scan
|
101
|
+
@file_worker.wait_for_empty
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
describe "when jobs fail" do
|
106
|
+
before do
|
107
|
+
@file_worker.worker_class = FailingTestWorker
|
108
|
+
@file_worker.scan
|
109
|
+
@file_worker.wait_for_empty
|
110
|
+
end
|
111
|
+
|
112
|
+
it "should call the error handler" do
|
113
|
+
@errors.size.must_equal(5)
|
114
|
+
end
|
115
|
+
|
116
|
+
it "should not move the files" do
|
117
|
+
@file_worker.done_path.children.must_be_empty
|
118
|
+
@file_worker.in_path.children.size.must_equal(5)
|
119
|
+
end
|
120
|
+
|
121
|
+
it "should remove the files from state" do
|
122
|
+
@file_worker.state.must_be_empty
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
require File.expand_path("test_helper", File.dirname(__FILE__))
|
2
|
+
|
3
|
+
describe "an s3 upload worker" do
|
4
|
+
before do
|
5
|
+
@file_name = "/path/to/file.name"
|
6
|
+
end
|
7
|
+
|
8
|
+
it "should fail when not provided auth options" do
|
9
|
+
assert_raises(IndexError) { FileWorker::S3UploadWorker.new(@file_name, {}) }
|
10
|
+
end
|
11
|
+
|
12
|
+
describe "instantiated correctly" do
|
13
|
+
before do
|
14
|
+
ENV["S3_BUCKET"] = "files"
|
15
|
+
ENV["AWS_ACCESS_KEY_ID"] = "some key"
|
16
|
+
ENV["AWS_SECRET_ACCESS_KEY"] = "other key"
|
17
|
+
|
18
|
+
@worker = FileWorker::S3UploadWorker.new(@file_name, {})
|
19
|
+
end
|
20
|
+
|
21
|
+
describe "process" do
|
22
|
+
it "should attempt to transfer to the location" do
|
23
|
+
file = stub()
|
24
|
+
File.expects(:open).with(@file_name).returns(file)
|
25
|
+
|
26
|
+
files = stub()
|
27
|
+
bucket = stub(:files => files)
|
28
|
+
files.expects(:create).with(:key => File.basename(@file_name), :body => file, :public => false)
|
29
|
+
|
30
|
+
@worker.expects(:bucket).returns(bucket)
|
31
|
+
@worker.process
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
$testing = true
|
2
|
+
|
3
|
+
require "rubygems"
|
4
|
+
require "bundler"
|
5
|
+
Bundler.require
|
6
|
+
|
7
|
+
require "minitest/autorun"
|
8
|
+
require "mocha"
|
9
|
+
require "pathname"
|
10
|
+
require "fileutils"
|
11
|
+
|
12
|
+
MiniTest::Spec.class_eval do
|
13
|
+
before do
|
14
|
+
@fixture_root = Pathname.new(File.dirname(__FILE__)) + "../tmp/fixtures"
|
15
|
+
FileUtils.mkdir_p(@fixture_root + "in")
|
16
|
+
FileUtils.mkdir_p(@fixture_root + "done")
|
17
|
+
clean_fixture_dirs
|
18
|
+
end
|
19
|
+
|
20
|
+
def clean_fixture_dirs
|
21
|
+
FileUtils.rm_rf(Dir.glob(@fixture_root + "in/*"))
|
22
|
+
FileUtils.rm_rf(Dir.glob(@fixture_root + "done/*"))
|
23
|
+
end
|
24
|
+
|
25
|
+
def prepare_fixture_files(in_files = 5, done_files = 0)
|
26
|
+
create_fixture_files(@fixture_root + "in", in_files)
|
27
|
+
create_fixture_files(@fixture_root + "done", done_files)
|
28
|
+
end
|
29
|
+
|
30
|
+
def create_fixture_files(path, number)
|
31
|
+
number.times do |n|
|
32
|
+
File.open(path + n.to_s, "w") do |file|
|
33
|
+
file.write(n.to_s)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
metadata
ADDED
@@ -0,0 +1,132 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: file_worker
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease:
|
5
|
+
version: 0.0.1
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Mick Staugaard
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
|
13
|
+
date: 2012-01-26 00:00:00 Z
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: rake
|
17
|
+
prerelease: false
|
18
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
19
|
+
none: false
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
type: :development
|
25
|
+
version_requirements: *id001
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: minitest
|
28
|
+
prerelease: false
|
29
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
30
|
+
none: false
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: "0"
|
35
|
+
type: :development
|
36
|
+
version_requirements: *id002
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: mocha
|
39
|
+
prerelease: false
|
40
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: "0"
|
46
|
+
type: :development
|
47
|
+
version_requirements: *id003
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
name: jruby-openssl
|
50
|
+
prerelease: false
|
51
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
52
|
+
none: false
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: "0"
|
57
|
+
type: :runtime
|
58
|
+
version_requirements: *id004
|
59
|
+
- !ruby/object:Gem::Dependency
|
60
|
+
name: girl_friday
|
61
|
+
prerelease: false
|
62
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
63
|
+
none: false
|
64
|
+
requirements:
|
65
|
+
- - ">="
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: "0"
|
68
|
+
type: :runtime
|
69
|
+
version_requirements: *id005
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: fog
|
72
|
+
prerelease: false
|
73
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
74
|
+
none: false
|
75
|
+
requirements:
|
76
|
+
- - ">="
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: "0"
|
79
|
+
type: :runtime
|
80
|
+
version_requirements: *id006
|
81
|
+
description: If you have files that you some how need to process, file_worker is your friend.
|
82
|
+
email:
|
83
|
+
- mick@staugaard.com
|
84
|
+
executables:
|
85
|
+
- file_worker
|
86
|
+
extensions: []
|
87
|
+
|
88
|
+
extra_rdoc_files: []
|
89
|
+
|
90
|
+
files:
|
91
|
+
- lib/file_worker.rb
|
92
|
+
- lib/file_worker/cli.rb
|
93
|
+
- lib/file_worker/default_worker.rb
|
94
|
+
- lib/file_worker/directory_scanner.rb
|
95
|
+
- lib/file_worker/s3_upload_worker.rb
|
96
|
+
- lib/file_worker/version.rb
|
97
|
+
- README.md
|
98
|
+
- test/directory_scanner_test.rb
|
99
|
+
- test/s3_upload_worker_test.rb
|
100
|
+
- test/test_helper.rb
|
101
|
+
- bin/file_worker
|
102
|
+
homepage: ""
|
103
|
+
licenses: []
|
104
|
+
|
105
|
+
post_install_message:
|
106
|
+
rdoc_options: []
|
107
|
+
|
108
|
+
require_paths:
|
109
|
+
- lib
|
110
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
111
|
+
none: false
|
112
|
+
requirements:
|
113
|
+
- - ">="
|
114
|
+
- !ruby/object:Gem::Version
|
115
|
+
version: "0"
|
116
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
117
|
+
none: false
|
118
|
+
requirements:
|
119
|
+
- - ">="
|
120
|
+
- !ruby/object:Gem::Version
|
121
|
+
version: "0"
|
122
|
+
requirements: []
|
123
|
+
|
124
|
+
rubyforge_project:
|
125
|
+
rubygems_version: 1.8.15
|
126
|
+
signing_key:
|
127
|
+
specification_version: 3
|
128
|
+
summary: A multi-threaded worker that takes files as input
|
129
|
+
test_files:
|
130
|
+
- test/directory_scanner_test.rb
|
131
|
+
- test/s3_upload_worker_test.rb
|
132
|
+
- test/test_helper.rb
|