file_worker 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md ADDED
File without changes
data/bin/file_worker ADDED
@@ -0,0 +1,17 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "rubygems"
4
+ require "bundler"
5
+ Bundler.require
6
+
7
+ require 'file_worker/cli'
8
+
9
+ scanner = FileWorker::Cli.build_scanner
10
+
11
+ scanner.on_error do |file_name, exception|
12
+ $stderr.puts("Failed to process #{file_name}")
13
+ $stderr.puts(ex)
14
+ $stderr.puts(ex.backtrace.join("\n"))
15
+ end
16
+
17
+ scanner.start
@@ -0,0 +1,48 @@
1
+ require 'optparse'
2
+
3
+ module FileWorker
4
+ class Cli
5
+ def self.build_scanner
6
+ options = {
7
+ :workers => 5
8
+ }
9
+
10
+ OptionParser.new do |opts|
11
+ opts.banner = "Usage: #{$0} [options]"
12
+
13
+ opts.on("-w", "--workers [NUMBER]", "The number of worker threads") do |workers|
14
+ options[:workers] = workers.to_i
15
+ end
16
+
17
+ opts.on("-o", "--out [DIRECTORY]", "The directory to put the files when they have been processed") do |out|
18
+ options[:out_directory] = out
19
+ end
20
+
21
+ opts.on("-s", "--sleep [NUMBER]", Float, "The number of seconds to sleep between scanning the in-directory") do |sleep_time|
22
+ options[:sleep] = sleep_time
23
+ end
24
+
25
+ opts.on("-q", "--queuesize [NUMBER]", "The maximum queue size to keep in memory") do |max_queue_size|
26
+ options[:max_queue_size] = max_queue_size.to_i
27
+ end
28
+
29
+ opts.on_tail("-h", "--help", "Show this message") do
30
+ puts opts
31
+ exit
32
+ end
33
+ end.parse!
34
+
35
+ if root = ARGV[0]
36
+ options[:in_directory] = File.expand_path(root, Dir.pwd)
37
+ else
38
+ options[:in_directory] = Dir.pwd
39
+ end
40
+
41
+ options[:out_directory] ||= File.expand_path('../done', options[:in_directory])
42
+
43
+ puts options.inspect
44
+
45
+ DirectoryScanner.new(options)
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,11 @@
1
+ module FileWorker
2
+ class DefaultWorker
3
+ def initialize(file_name, options)
4
+ @file_name = file_name
5
+ end
6
+
7
+ def process
8
+ puts @file_name
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,100 @@
1
+ require 'girl_friday'
2
+ require 'fileutils'
3
+ require 'jruby/synchronized'
4
+ require 'pathname'
5
+
6
+ module FileWorker
7
+ class DirectoryScanner
8
+ attr_accessor :worker_class
9
+ attr_reader :in_path, :done_path, :state
10
+
11
+ def initialize(options)
12
+ @options = options
13
+ @in_path = Pathname.new(@options[:in_directory])
14
+ @done_path = Pathname.new(@options[:out_directory])
15
+ @sleep = @options[:sleep] || 1
16
+
17
+ @max_queue_size = @options[:max_queue_size] || 1000
18
+
19
+ @worker_class = DefaultWorker
20
+
21
+ @state = {}
22
+ @state.extend JRuby::Synchronized
23
+ @queue_name = "file_worker"
24
+
25
+ @error_handlers = []
26
+ end
27
+
28
+ # This method is called by the worker threads, so remember to keep it thread safe
29
+ def process(file_name)
30
+ @state[file_name] = {:time => Time.now, :status => :working}
31
+
32
+ begin
33
+ @worker_class.new(file_name, @options).process
34
+ FileUtils.mv(file_name, @done_path)
35
+ rescue Exception => e
36
+ handle_error(file_name, e)
37
+ end
38
+
39
+ @state.delete(file_name)
40
+ end
41
+
42
+ def queue
43
+ @queue ||= GirlFriday::WorkQueue.new(@queue_name, :size => 3) do |file_name|
44
+ process(file_name)
45
+ end
46
+ end
47
+
48
+ def queue_size
49
+ queue.status[@queue_name][:backlog]
50
+ end
51
+
52
+ def enqueue(file_name)
53
+ @state[file_name] = {:time => Time.now, :status => :enqueued}
54
+
55
+ queue.push(file_name)
56
+ end
57
+
58
+ def wait_for_empty
59
+ queue.wait_for_empty
60
+
61
+ sleep 0.5
62
+
63
+ while queue.status[@queue_name][:busy] != 0
64
+ sleep 0.5
65
+ end
66
+ end
67
+
68
+ def scan
69
+ file_names = Dir.glob(@in_path + '*') - @state.keys
70
+
71
+ max_items = @max_queue_size - queue_size
72
+
73
+ file_names[0,max_items].each do |file_name|
74
+ enqueue(file_name)
75
+ end
76
+ end
77
+
78
+ def start
79
+ @run = true
80
+ while @run
81
+ scan
82
+ sleep(@sleep)
83
+ end
84
+ end
85
+
86
+ def stop
87
+ @run = false
88
+ end
89
+
90
+ def on_error(&block)
91
+ @error_handlers << block
92
+ end
93
+
94
+ def handle_error(file_name, exception)
95
+ @error_handlers.each do |error_handler|
96
+ error_handler.call(file_name, exception)
97
+ end
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,34 @@
1
+ require "fog"
2
+
3
+ module FileWorker
4
+ class S3UploadWorker
5
+ def initialize(file_name, options)
6
+ @file_name = file_name
7
+ @bucket_name = ENV.fetch("S3_BUCKET")
8
+ @aws_key_id = ENV.fetch("AWS_ACCESS_KEY_ID")
9
+ @aws_secret = ENV.fetch("AWS_SECRET_ACCESS_KEY")
10
+ end
11
+
12
+ def process
13
+ bucket.files.create(
14
+ :key => File.basename(@file_name),
15
+ :body => File.open(@file_name),
16
+ :public => false
17
+ )
18
+ end
19
+
20
+ private
21
+
22
+ def connection
23
+ @connection ||= Fog::Storage.new(
24
+ :provider => "AWS",
25
+ :aws_access_key_id => @aws_key_id,
26
+ :aws_secret_access_key => @aws_secret
27
+ )
28
+ end
29
+
30
+ def bucket
31
+ connection.directories.get(@bucket_name)
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,3 @@
1
+ module FileWorker
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,7 @@
1
+ require "file_worker/version"
2
+
3
+ module FileWorker
4
+ autoload :DirectoryScanner, 'file_worker/directory_scanner'
5
+ autoload :DefaultWorker, 'file_worker/default_worker'
6
+ autoload :S3UploadWorker, 'file_worker/s3_upload_worker'
7
+ end
@@ -0,0 +1,126 @@
1
+ require File.expand_path("test_helper", File.dirname(__FILE__))
2
+ require 'jruby/synchronized'
3
+
4
+ class FailingTestWorker
5
+ def initialize(file_name, options)
6
+ @file_name = file_name
7
+ end
8
+
9
+ def process
10
+ raise @file_name
11
+ end
12
+ end
13
+
14
+ describe "the directory scanner" do
15
+ before do
16
+ prepare_fixture_files
17
+ @file_worker = FileWorker::DirectoryScanner.new(
18
+ :in_directory => @fixture_root + 'in',
19
+ :out_directory => @fixture_root + 'done',
20
+ :max_queue_size => 10
21
+ )
22
+
23
+ @errors = []
24
+ @errors.extend JRuby::Synchronized
25
+
26
+ @file_worker.on_error do |file_name, exception|
27
+ @errors << file_name
28
+ end
29
+ end
30
+
31
+ it "should enqueue the files in the in directory" do
32
+ @file_worker.queue.expects(:push).times(5)
33
+ @file_worker.scan
34
+ @file_worker.wait_for_empty
35
+ end
36
+
37
+ it "should move the files to the out directory" do
38
+ @file_worker.scan
39
+ @file_worker.wait_for_empty
40
+
41
+ @file_worker.in_path.children.must_be_empty
42
+ @file_worker.done_path.children.size.must_equal(5)
43
+ end
44
+
45
+ it "should not enqueue the this same files twice" do
46
+ @file_worker.queue.expects(:push).times(5)
47
+ @file_worker.scan
48
+ @file_worker.wait_for_empty
49
+ @file_worker.scan
50
+ @file_worker.wait_for_empty
51
+ end
52
+
53
+ describe "when the worker queue is empty" do
54
+ before do
55
+ @file_worker.stubs(:queue_size).returns(0)
56
+ end
57
+
58
+ it "should only enqueue 10 items" do
59
+ prepare_fixture_files(20)
60
+ @file_worker.queue.expects(:push).times(10)
61
+ @file_worker.scan
62
+ @file_worker.wait_for_empty
63
+ end
64
+ end
65
+
66
+ describe "when the worker queue has some items in it" do
67
+ before do
68
+ @file_worker.stubs(:queue_size).returns(3)
69
+ end
70
+
71
+ it "should not try to over fill the queue" do
72
+ prepare_fixture_files(20)
73
+ @file_worker.queue.expects(:push).times(7)
74
+ @file_worker.scan
75
+ @file_worker.wait_for_empty
76
+ end
77
+ end
78
+
79
+ describe "when the worker queue is full" do
80
+ before do
81
+ @file_worker.stubs(:queue_size).returns(10)
82
+ end
83
+
84
+ it "should not enqueue anything" do
85
+ prepare_fixture_files(20)
86
+ @file_worker.queue.expects(:push).never
87
+ @file_worker.scan
88
+ @file_worker.wait_for_empty
89
+ end
90
+ end
91
+
92
+ describe "when a file is already enqueued" do
93
+ before do
94
+ file_name = Dir.glob(@file_worker.in_path + '*').first
95
+ @file_worker.state[file_name] = {:time => Time.now, :status => :working}
96
+ end
97
+
98
+ it "should not enqueue the file again" do
99
+ @file_worker.queue.expects(:push).times(4)
100
+ @file_worker.scan
101
+ @file_worker.wait_for_empty
102
+ end
103
+ end
104
+
105
+ describe "when jobs fail" do
106
+ before do
107
+ @file_worker.worker_class = FailingTestWorker
108
+ @file_worker.scan
109
+ @file_worker.wait_for_empty
110
+ end
111
+
112
+ it "should call the error handler" do
113
+ @errors.size.must_equal(5)
114
+ end
115
+
116
+ it "should not move the files" do
117
+ @file_worker.done_path.children.must_be_empty
118
+ @file_worker.in_path.children.size.must_equal(5)
119
+ end
120
+
121
+ it "should remove the files from state" do
122
+ @file_worker.state.must_be_empty
123
+ end
124
+ end
125
+
126
+ end
@@ -0,0 +1,35 @@
1
+ require File.expand_path("test_helper", File.dirname(__FILE__))
2
+
3
+ describe "an s3 upload worker" do
4
+ before do
5
+ @file_name = "/path/to/file.name"
6
+ end
7
+
8
+ it "should fail when not provided auth options" do
9
+ assert_raises(IndexError) { FileWorker::S3UploadWorker.new(@file_name, {}) }
10
+ end
11
+
12
+ describe "instantiated correctly" do
13
+ before do
14
+ ENV["S3_BUCKET"] = "files"
15
+ ENV["AWS_ACCESS_KEY_ID"] = "some key"
16
+ ENV["AWS_SECRET_ACCESS_KEY"] = "other key"
17
+
18
+ @worker = FileWorker::S3UploadWorker.new(@file_name, {})
19
+ end
20
+
21
+ describe "process" do
22
+ it "should attempt to transfer to the location" do
23
+ file = stub()
24
+ File.expects(:open).with(@file_name).returns(file)
25
+
26
+ files = stub()
27
+ bucket = stub(:files => files)
28
+ files.expects(:create).with(:key => File.basename(@file_name), :body => file, :public => false)
29
+
30
+ @worker.expects(:bucket).returns(bucket)
31
+ @worker.process
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,37 @@
1
+ $testing = true
2
+
3
+ require "rubygems"
4
+ require "bundler"
5
+ Bundler.require
6
+
7
+ require "minitest/autorun"
8
+ require "mocha"
9
+ require "pathname"
10
+ require "fileutils"
11
+
12
+ MiniTest::Spec.class_eval do
13
+ before do
14
+ @fixture_root = Pathname.new(File.dirname(__FILE__)) + "../tmp/fixtures"
15
+ FileUtils.mkdir_p(@fixture_root + "in")
16
+ FileUtils.mkdir_p(@fixture_root + "done")
17
+ clean_fixture_dirs
18
+ end
19
+
20
+ def clean_fixture_dirs
21
+ FileUtils.rm_rf(Dir.glob(@fixture_root + "in/*"))
22
+ FileUtils.rm_rf(Dir.glob(@fixture_root + "done/*"))
23
+ end
24
+
25
+ def prepare_fixture_files(in_files = 5, done_files = 0)
26
+ create_fixture_files(@fixture_root + "in", in_files)
27
+ create_fixture_files(@fixture_root + "done", done_files)
28
+ end
29
+
30
+ def create_fixture_files(path, number)
31
+ number.times do |n|
32
+ File.open(path + n.to_s, "w") do |file|
33
+ file.write(n.to_s)
34
+ end
35
+ end
36
+ end
37
+ end
metadata ADDED
@@ -0,0 +1,132 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: file_worker
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.0.1
6
+ platform: ruby
7
+ authors:
8
+ - Mick Staugaard
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2012-01-26 00:00:00 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rake
17
+ prerelease: false
18
+ requirement: &id001 !ruby/object:Gem::Requirement
19
+ none: false
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ type: :development
25
+ version_requirements: *id001
26
+ - !ruby/object:Gem::Dependency
27
+ name: minitest
28
+ prerelease: false
29
+ requirement: &id002 !ruby/object:Gem::Requirement
30
+ none: false
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: "0"
35
+ type: :development
36
+ version_requirements: *id002
37
+ - !ruby/object:Gem::Dependency
38
+ name: mocha
39
+ prerelease: false
40
+ requirement: &id003 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: "0"
46
+ type: :development
47
+ version_requirements: *id003
48
+ - !ruby/object:Gem::Dependency
49
+ name: jruby-openssl
50
+ prerelease: false
51
+ requirement: &id004 !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: "0"
57
+ type: :runtime
58
+ version_requirements: *id004
59
+ - !ruby/object:Gem::Dependency
60
+ name: girl_friday
61
+ prerelease: false
62
+ requirement: &id005 !ruby/object:Gem::Requirement
63
+ none: false
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: "0"
68
+ type: :runtime
69
+ version_requirements: *id005
70
+ - !ruby/object:Gem::Dependency
71
+ name: fog
72
+ prerelease: false
73
+ requirement: &id006 !ruby/object:Gem::Requirement
74
+ none: false
75
+ requirements:
76
+ - - ">="
77
+ - !ruby/object:Gem::Version
78
+ version: "0"
79
+ type: :runtime
80
+ version_requirements: *id006
81
+ description: If you have files that you some how need to process, file_worker is your friend.
82
+ email:
83
+ - mick@staugaard.com
84
+ executables:
85
+ - file_worker
86
+ extensions: []
87
+
88
+ extra_rdoc_files: []
89
+
90
+ files:
91
+ - lib/file_worker.rb
92
+ - lib/file_worker/cli.rb
93
+ - lib/file_worker/default_worker.rb
94
+ - lib/file_worker/directory_scanner.rb
95
+ - lib/file_worker/s3_upload_worker.rb
96
+ - lib/file_worker/version.rb
97
+ - README.md
98
+ - test/directory_scanner_test.rb
99
+ - test/s3_upload_worker_test.rb
100
+ - test/test_helper.rb
101
+ - bin/file_worker
102
+ homepage: ""
103
+ licenses: []
104
+
105
+ post_install_message:
106
+ rdoc_options: []
107
+
108
+ require_paths:
109
+ - lib
110
+ required_ruby_version: !ruby/object:Gem::Requirement
111
+ none: false
112
+ requirements:
113
+ - - ">="
114
+ - !ruby/object:Gem::Version
115
+ version: "0"
116
+ required_rubygems_version: !ruby/object:Gem::Requirement
117
+ none: false
118
+ requirements:
119
+ - - ">="
120
+ - !ruby/object:Gem::Version
121
+ version: "0"
122
+ requirements: []
123
+
124
+ rubyforge_project:
125
+ rubygems_version: 1.8.15
126
+ signing_key:
127
+ specification_version: 3
128
+ summary: A multi-threaded worker that takes files as input
129
+ test_files:
130
+ - test/directory_scanner_test.rb
131
+ - test/s3_upload_worker_test.rb
132
+ - test/test_helper.rb