file_worker 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.md ADDED
File without changes
data/bin/file_worker ADDED
@@ -0,0 +1,17 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "rubygems"
4
+ require "bundler"
5
+ Bundler.require
6
+
7
+ require 'file_worker/cli'
8
+
9
+ scanner = FileWorker::Cli.build_scanner
10
+
11
+ scanner.on_error do |file_name, exception|
12
+ $stderr.puts("Failed to process #{file_name}")
13
+ $stderr.puts(ex)
14
+ $stderr.puts(ex.backtrace.join("\n"))
15
+ end
16
+
17
+ scanner.start
@@ -0,0 +1,48 @@
1
+ require 'optparse'
2
+
3
+ module FileWorker
4
+ class Cli
5
+ def self.build_scanner
6
+ options = {
7
+ :workers => 5
8
+ }
9
+
10
+ OptionParser.new do |opts|
11
+ opts.banner = "Usage: #{$0} [options]"
12
+
13
+ opts.on("-w", "--workers [NUMBER]", "The number of worker threads") do |workers|
14
+ options[:workers] = workers.to_i
15
+ end
16
+
17
+ opts.on("-o", "--out [DIRECTORY]", "The directory to put the files when they have been processed") do |out|
18
+ options[:out_directory] = out
19
+ end
20
+
21
+ opts.on("-s", "--sleep [NUMBER]", Float, "The number of seconds to sleep between scanning the in-directory") do |sleep_time|
22
+ options[:sleep] = sleep_time
23
+ end
24
+
25
+ opts.on("-q", "--queuesize [NUMBER]", "The maximum queue size to keep in memory") do |max_queue_size|
26
+ options[:max_queue_size] = max_queue_size.to_i
27
+ end
28
+
29
+ opts.on_tail("-h", "--help", "Show this message") do
30
+ puts opts
31
+ exit
32
+ end
33
+ end.parse!
34
+
35
+ if root = ARGV[0]
36
+ options[:in_directory] = File.expand_path(root, Dir.pwd)
37
+ else
38
+ options[:in_directory] = Dir.pwd
39
+ end
40
+
41
+ options[:out_directory] ||= File.expand_path('../done', options[:in_directory])
42
+
43
+ puts options.inspect
44
+
45
+ DirectoryScanner.new(options)
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,11 @@
1
+ module FileWorker
2
+ class DefaultWorker
3
+ def initialize(file_name, options)
4
+ @file_name = file_name
5
+ end
6
+
7
+ def process
8
+ puts @file_name
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,100 @@
1
+ require 'girl_friday'
2
+ require 'fileutils'
3
+ require 'jruby/synchronized'
4
+ require 'pathname'
5
+
6
+ module FileWorker
7
+ class DirectoryScanner
8
+ attr_accessor :worker_class
9
+ attr_reader :in_path, :done_path, :state
10
+
11
+ def initialize(options)
12
+ @options = options
13
+ @in_path = Pathname.new(@options[:in_directory])
14
+ @done_path = Pathname.new(@options[:out_directory])
15
+ @sleep = @options[:sleep] || 1
16
+
17
+ @max_queue_size = @options[:max_queue_size] || 1000
18
+
19
+ @worker_class = DefaultWorker
20
+
21
+ @state = {}
22
+ @state.extend JRuby::Synchronized
23
+ @queue_name = "file_worker"
24
+
25
+ @error_handlers = []
26
+ end
27
+
28
+ # This method is called by the worker threads, so remember to keep it thread safe
29
+ def process(file_name)
30
+ @state[file_name] = {:time => Time.now, :status => :working}
31
+
32
+ begin
33
+ @worker_class.new(file_name, @options).process
34
+ FileUtils.mv(file_name, @done_path)
35
+ rescue Exception => e
36
+ handle_error(file_name, e)
37
+ end
38
+
39
+ @state.delete(file_name)
40
+ end
41
+
42
+ def queue
43
+ @queue ||= GirlFriday::WorkQueue.new(@queue_name, :size => 3) do |file_name|
44
+ process(file_name)
45
+ end
46
+ end
47
+
48
+ def queue_size
49
+ queue.status[@queue_name][:backlog]
50
+ end
51
+
52
+ def enqueue(file_name)
53
+ @state[file_name] = {:time => Time.now, :status => :enqueued}
54
+
55
+ queue.push(file_name)
56
+ end
57
+
58
+ def wait_for_empty
59
+ queue.wait_for_empty
60
+
61
+ sleep 0.5
62
+
63
+ while queue.status[@queue_name][:busy] != 0
64
+ sleep 0.5
65
+ end
66
+ end
67
+
68
+ def scan
69
+ file_names = Dir.glob(@in_path + '*') - @state.keys
70
+
71
+ max_items = @max_queue_size - queue_size
72
+
73
+ file_names[0,max_items].each do |file_name|
74
+ enqueue(file_name)
75
+ end
76
+ end
77
+
78
+ def start
79
+ @run = true
80
+ while @run
81
+ scan
82
+ sleep(@sleep)
83
+ end
84
+ end
85
+
86
+ def stop
87
+ @run = false
88
+ end
89
+
90
+ def on_error(&block)
91
+ @error_handlers << block
92
+ end
93
+
94
+ def handle_error(file_name, exception)
95
+ @error_handlers.each do |error_handler|
96
+ error_handler.call(file_name, exception)
97
+ end
98
+ end
99
+ end
100
+ end
@@ -0,0 +1,34 @@
1
+ require "fog"
2
+
3
+ module FileWorker
4
+ class S3UploadWorker
5
+ def initialize(file_name, options)
6
+ @file_name = file_name
7
+ @bucket_name = ENV.fetch("S3_BUCKET")
8
+ @aws_key_id = ENV.fetch("AWS_ACCESS_KEY_ID")
9
+ @aws_secret = ENV.fetch("AWS_SECRET_ACCESS_KEY")
10
+ end
11
+
12
+ def process
13
+ bucket.files.create(
14
+ :key => File.basename(@file_name),
15
+ :body => File.open(@file_name),
16
+ :public => false
17
+ )
18
+ end
19
+
20
+ private
21
+
22
+ def connection
23
+ @connection ||= Fog::Storage.new(
24
+ :provider => "AWS",
25
+ :aws_access_key_id => @aws_key_id,
26
+ :aws_secret_access_key => @aws_secret
27
+ )
28
+ end
29
+
30
+ def bucket
31
+ connection.directories.get(@bucket_name)
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,3 @@
1
+ module FileWorker
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,7 @@
1
+ require "file_worker/version"
2
+
3
+ module FileWorker
4
+ autoload :DirectoryScanner, 'file_worker/directory_scanner'
5
+ autoload :DefaultWorker, 'file_worker/default_worker'
6
+ autoload :S3UploadWorker, 'file_worker/s3_upload_worker'
7
+ end
@@ -0,0 +1,126 @@
1
+ require File.expand_path("test_helper", File.dirname(__FILE__))
2
+ require 'jruby/synchronized'
3
+
4
+ class FailingTestWorker
5
+ def initialize(file_name, options)
6
+ @file_name = file_name
7
+ end
8
+
9
+ def process
10
+ raise @file_name
11
+ end
12
+ end
13
+
14
+ describe "the directory scanner" do
15
+ before do
16
+ prepare_fixture_files
17
+ @file_worker = FileWorker::DirectoryScanner.new(
18
+ :in_directory => @fixture_root + 'in',
19
+ :out_directory => @fixture_root + 'done',
20
+ :max_queue_size => 10
21
+ )
22
+
23
+ @errors = []
24
+ @errors.extend JRuby::Synchronized
25
+
26
+ @file_worker.on_error do |file_name, exception|
27
+ @errors << file_name
28
+ end
29
+ end
30
+
31
+ it "should enqueue the files in the in directory" do
32
+ @file_worker.queue.expects(:push).times(5)
33
+ @file_worker.scan
34
+ @file_worker.wait_for_empty
35
+ end
36
+
37
+ it "should move the files to the out directory" do
38
+ @file_worker.scan
39
+ @file_worker.wait_for_empty
40
+
41
+ @file_worker.in_path.children.must_be_empty
42
+ @file_worker.done_path.children.size.must_equal(5)
43
+ end
44
+
45
+ it "should not enqueue the this same files twice" do
46
+ @file_worker.queue.expects(:push).times(5)
47
+ @file_worker.scan
48
+ @file_worker.wait_for_empty
49
+ @file_worker.scan
50
+ @file_worker.wait_for_empty
51
+ end
52
+
53
+ describe "when the worker queue is empty" do
54
+ before do
55
+ @file_worker.stubs(:queue_size).returns(0)
56
+ end
57
+
58
+ it "should only enqueue 10 items" do
59
+ prepare_fixture_files(20)
60
+ @file_worker.queue.expects(:push).times(10)
61
+ @file_worker.scan
62
+ @file_worker.wait_for_empty
63
+ end
64
+ end
65
+
66
+ describe "when the worker queue has some items in it" do
67
+ before do
68
+ @file_worker.stubs(:queue_size).returns(3)
69
+ end
70
+
71
+ it "should not try to over fill the queue" do
72
+ prepare_fixture_files(20)
73
+ @file_worker.queue.expects(:push).times(7)
74
+ @file_worker.scan
75
+ @file_worker.wait_for_empty
76
+ end
77
+ end
78
+
79
+ describe "when the worker queue is full" do
80
+ before do
81
+ @file_worker.stubs(:queue_size).returns(10)
82
+ end
83
+
84
+ it "should not enqueue anything" do
85
+ prepare_fixture_files(20)
86
+ @file_worker.queue.expects(:push).never
87
+ @file_worker.scan
88
+ @file_worker.wait_for_empty
89
+ end
90
+ end
91
+
92
+ describe "when a file is already enqueued" do
93
+ before do
94
+ file_name = Dir.glob(@file_worker.in_path + '*').first
95
+ @file_worker.state[file_name] = {:time => Time.now, :status => :working}
96
+ end
97
+
98
+ it "should not enqueue the file again" do
99
+ @file_worker.queue.expects(:push).times(4)
100
+ @file_worker.scan
101
+ @file_worker.wait_for_empty
102
+ end
103
+ end
104
+
105
+ describe "when jobs fail" do
106
+ before do
107
+ @file_worker.worker_class = FailingTestWorker
108
+ @file_worker.scan
109
+ @file_worker.wait_for_empty
110
+ end
111
+
112
+ it "should call the error handler" do
113
+ @errors.size.must_equal(5)
114
+ end
115
+
116
+ it "should not move the files" do
117
+ @file_worker.done_path.children.must_be_empty
118
+ @file_worker.in_path.children.size.must_equal(5)
119
+ end
120
+
121
+ it "should remove the files from state" do
122
+ @file_worker.state.must_be_empty
123
+ end
124
+ end
125
+
126
+ end
@@ -0,0 +1,35 @@
1
+ require File.expand_path("test_helper", File.dirname(__FILE__))
2
+
3
+ describe "an s3 upload worker" do
4
+ before do
5
+ @file_name = "/path/to/file.name"
6
+ end
7
+
8
+ it "should fail when not provided auth options" do
9
+ assert_raises(IndexError) { FileWorker::S3UploadWorker.new(@file_name, {}) }
10
+ end
11
+
12
+ describe "instantiated correctly" do
13
+ before do
14
+ ENV["S3_BUCKET"] = "files"
15
+ ENV["AWS_ACCESS_KEY_ID"] = "some key"
16
+ ENV["AWS_SECRET_ACCESS_KEY"] = "other key"
17
+
18
+ @worker = FileWorker::S3UploadWorker.new(@file_name, {})
19
+ end
20
+
21
+ describe "process" do
22
+ it "should attempt to transfer to the location" do
23
+ file = stub()
24
+ File.expects(:open).with(@file_name).returns(file)
25
+
26
+ files = stub()
27
+ bucket = stub(:files => files)
28
+ files.expects(:create).with(:key => File.basename(@file_name), :body => file, :public => false)
29
+
30
+ @worker.expects(:bucket).returns(bucket)
31
+ @worker.process
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,37 @@
1
+ $testing = true
2
+
3
+ require "rubygems"
4
+ require "bundler"
5
+ Bundler.require
6
+
7
+ require "minitest/autorun"
8
+ require "mocha"
9
+ require "pathname"
10
+ require "fileutils"
11
+
12
+ MiniTest::Spec.class_eval do
13
+ before do
14
+ @fixture_root = Pathname.new(File.dirname(__FILE__)) + "../tmp/fixtures"
15
+ FileUtils.mkdir_p(@fixture_root + "in")
16
+ FileUtils.mkdir_p(@fixture_root + "done")
17
+ clean_fixture_dirs
18
+ end
19
+
20
+ def clean_fixture_dirs
21
+ FileUtils.rm_rf(Dir.glob(@fixture_root + "in/*"))
22
+ FileUtils.rm_rf(Dir.glob(@fixture_root + "done/*"))
23
+ end
24
+
25
+ def prepare_fixture_files(in_files = 5, done_files = 0)
26
+ create_fixture_files(@fixture_root + "in", in_files)
27
+ create_fixture_files(@fixture_root + "done", done_files)
28
+ end
29
+
30
+ def create_fixture_files(path, number)
31
+ number.times do |n|
32
+ File.open(path + n.to_s, "w") do |file|
33
+ file.write(n.to_s)
34
+ end
35
+ end
36
+ end
37
+ end
metadata ADDED
@@ -0,0 +1,132 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: file_worker
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.0.1
6
+ platform: ruby
7
+ authors:
8
+ - Mick Staugaard
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2012-01-26 00:00:00 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rake
17
+ prerelease: false
18
+ requirement: &id001 !ruby/object:Gem::Requirement
19
+ none: false
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ type: :development
25
+ version_requirements: *id001
26
+ - !ruby/object:Gem::Dependency
27
+ name: minitest
28
+ prerelease: false
29
+ requirement: &id002 !ruby/object:Gem::Requirement
30
+ none: false
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: "0"
35
+ type: :development
36
+ version_requirements: *id002
37
+ - !ruby/object:Gem::Dependency
38
+ name: mocha
39
+ prerelease: false
40
+ requirement: &id003 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: "0"
46
+ type: :development
47
+ version_requirements: *id003
48
+ - !ruby/object:Gem::Dependency
49
+ name: jruby-openssl
50
+ prerelease: false
51
+ requirement: &id004 !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: "0"
57
+ type: :runtime
58
+ version_requirements: *id004
59
+ - !ruby/object:Gem::Dependency
60
+ name: girl_friday
61
+ prerelease: false
62
+ requirement: &id005 !ruby/object:Gem::Requirement
63
+ none: false
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: "0"
68
+ type: :runtime
69
+ version_requirements: *id005
70
+ - !ruby/object:Gem::Dependency
71
+ name: fog
72
+ prerelease: false
73
+ requirement: &id006 !ruby/object:Gem::Requirement
74
+ none: false
75
+ requirements:
76
+ - - ">="
77
+ - !ruby/object:Gem::Version
78
+ version: "0"
79
+ type: :runtime
80
+ version_requirements: *id006
81
+ description: If you have files that you some how need to process, file_worker is your friend.
82
+ email:
83
+ - mick@staugaard.com
84
+ executables:
85
+ - file_worker
86
+ extensions: []
87
+
88
+ extra_rdoc_files: []
89
+
90
+ files:
91
+ - lib/file_worker.rb
92
+ - lib/file_worker/cli.rb
93
+ - lib/file_worker/default_worker.rb
94
+ - lib/file_worker/directory_scanner.rb
95
+ - lib/file_worker/s3_upload_worker.rb
96
+ - lib/file_worker/version.rb
97
+ - README.md
98
+ - test/directory_scanner_test.rb
99
+ - test/s3_upload_worker_test.rb
100
+ - test/test_helper.rb
101
+ - bin/file_worker
102
+ homepage: ""
103
+ licenses: []
104
+
105
+ post_install_message:
106
+ rdoc_options: []
107
+
108
+ require_paths:
109
+ - lib
110
+ required_ruby_version: !ruby/object:Gem::Requirement
111
+ none: false
112
+ requirements:
113
+ - - ">="
114
+ - !ruby/object:Gem::Version
115
+ version: "0"
116
+ required_rubygems_version: !ruby/object:Gem::Requirement
117
+ none: false
118
+ requirements:
119
+ - - ">="
120
+ - !ruby/object:Gem::Version
121
+ version: "0"
122
+ requirements: []
123
+
124
+ rubyforge_project:
125
+ rubygems_version: 1.8.15
126
+ signing_key:
127
+ specification_version: 3
128
+ summary: A multi-threaded worker that takes files as input
129
+ test_files:
130
+ - test/directory_scanner_test.rb
131
+ - test/s3_upload_worker_test.rb
132
+ - test/test_helper.rb