file-processing-job 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 cj2
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
File without changes
data/Rakefile ADDED
@@ -0,0 +1,57 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "file-processing-job"
8
+ gem.summary = %Q{Light-weight client / server implementation for processing disk based records}
9
+ gem.description = %Q{file-processing-job allows you to distribute the processing load of large files to clients across the network. It is a thin wrapper on top of the EventMachine library.}
10
+ gem.email = "cjgrimes@gmail.com"
11
+ gem.homepage = "http://github.com/cj2/file-processing-job"
12
+ gem.authors = ["cj2"]
13
+ gem.files = FileList["[A-Z]*.*", "{examples,lib,features,spec}/**/*", "Rakefile", "cucumber.yml"]
14
+ gem.add_development_dependency "aasm", "~> 2.1"
15
+ gem.add_development_dependency "directory_watcher", "~> 1.3"
16
+ gem.add_development_dependency "eventmachine", "~> 0.12"
17
+
18
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
19
+ end
20
+ Jeweler::GemcutterTasks.new
21
+ rescue LoadError
22
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
23
+ end
24
+
25
+ require 'rake/testtask'
26
+ Rake::TestTask.new(:test) do |test|
27
+ test.libs << 'lib' << 'test'
28
+ test.pattern = 'test/**/test_*.rb'
29
+ test.verbose = true
30
+ end
31
+
32
+ begin
33
+ require 'rcov/rcovtask'
34
+ Rcov::RcovTask.new do |test|
35
+ test.libs << 'test'
36
+ test.pattern = 'test/**/test_*.rb'
37
+ test.verbose = true
38
+ end
39
+ rescue LoadError
40
+ task :rcov do
41
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
42
+ end
43
+ end
44
+
45
+ task :test => :check_dependencies
46
+
47
+ task :default => :test
48
+
49
+ require 'rake/rdoctask'
50
+ Rake::RDocTask.new do |rdoc|
51
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
52
+
53
+ rdoc.rdoc_dir = 'rdoc'
54
+ rdoc.title = "file-processing-job #{version}"
55
+ rdoc.rdoc_files.include('README*')
56
+ rdoc.rdoc_files.include('lib/**/*.rb')
57
+ end
@@ -0,0 +1,10 @@
1
+ require 'file-processing-job'
2
+
3
+ class FileProcessor
4
+ def receive_file data
5
+ puts "received file:"
6
+ p data
7
+ end
8
+ end
9
+
10
+ FileProcessingJob::connect('127.0.0.1', 11222, FileProcessor)
@@ -0,0 +1,6 @@
1
+ require 'file-processing-job'
2
+
3
+ FileProcessingJob::start_server() {|config|
4
+ config.inbox_directory = './data/inbox'
5
+ config.processed_directory = './data/processed'
6
+ }
@@ -0,0 +1,23 @@
1
+ $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
2
+
3
+ require 'eventmachine'
4
+ require 'aasm'
5
+ require 'directory_watcher'
6
+
7
+ module FileProcessingJob
8
+ def self.logger
9
+ @@logger ||= begin
10
+ if (const_defined?('Rails'))
11
+ Rails.logger
12
+ else
13
+ require 'logger'
14
+ Dir.mkdir('log') unless Dir.exists?('log')
15
+ Logger.new('log/fileprocessingjob.log')
16
+ end
17
+ end
18
+ end
19
+ end
20
+
21
+ require 'fpj/server'
22
+ require 'fpj/client'
23
+
data/lib/fpj/client.rb ADDED
@@ -0,0 +1,94 @@
1
+ # add special parsers - parse JSON files, XML files into nokogiri, etc
2
+ # go to eventmachine forum - see if anyone has already done this
3
+
4
+ module FileProcessingJob
5
+ def self.connect(host='127.0.0.1',port=11222,callback)
6
+ EM::run {
7
+ client_connection = Client::Connection
8
+ client_connection.callback = callback
9
+ EventMachine::connect host, port, client_connection
10
+ }
11
+ end
12
+
13
+ module Client
14
+ class Config
15
+ attr_accessor :parse_file
16
+ end
17
+
18
+ class Connection < EM::Connection
19
+ include AASM
20
+
21
+ class << self
22
+ attr_accessor :callback
23
+ end
24
+
25
+ attr_accessor :filename, :data
26
+
27
+ aasm_state :requesting_task
28
+ aasm_state :downloading, :enter => :download_file
29
+ aasm_state :processing, :enter => :process_file
30
+ aasm_state :complete, :exit => :request_task
31
+
32
+ aasm_event :filename_received do
33
+ transitions :to => :downloading, :from => [:requesting_task]
34
+ end
35
+
36
+ aasm_event :file_received do
37
+ transitions :to => :processing, :from => [:downloading]
38
+ end
39
+
40
+ aasm_event :file_processed do
41
+ transitions :to => :complete, :from => [:processing]
42
+ end
43
+
44
+ aasm_event :next_task do
45
+ transitions :to => :requesting_task, :from => [:complete]
46
+ end
47
+
48
+ def logger
49
+ FileProcessingJob.logger
50
+ end
51
+
52
+
53
+ def request_task
54
+ self.filename = nil
55
+ self.data = nil
56
+ send_data "next"
57
+ end
58
+
59
+ def download_file
60
+ self.filename = self.data
61
+ send_data "get #{self.filename}"
62
+ end
63
+
64
+ def process_file
65
+ begin
66
+ self.class.callback.new.send(:receive_file, self.data)
67
+ send_data "processed #{self.filename}"
68
+ rescue
69
+ logger.error("Error processing #{self.filename}. The file can be found on the server in the 'error' directory")
70
+ logger.error($!)
71
+ send_data "error #{self.filename}"
72
+ end
73
+ end
74
+
75
+ def post_init
76
+ request_task
77
+ end
78
+
79
+ def receive_data(data)
80
+ self.data = data
81
+ case aasm_current_state
82
+ when :requesting_task
83
+ filename_received
84
+ when :downloading
85
+ file_received
86
+ when :processing
87
+ file_processed
88
+ next_task
89
+ end
90
+ end
91
+ end
92
+ end
93
+ end
94
+
data/lib/fpj/server.rb ADDED
@@ -0,0 +1,186 @@
1
+ # add special parsers - parse JSON files, XML files into nokogiri, etc
2
+ # go to eventmachine forum - see if anyone has already done this
3
+
4
+ require 'fileutils'
5
+
6
+ module FileProcessingJob
7
+ # FileProcessingJob::start_server creates an event machine server
8
+ # on the specified IP address and port. The optional configuration
9
+ # object passed to start_server can be used to point FileProcessingJob
10
+ # to the directories used to manage the files.
11
+ #
12
+ # By default, FileProcessingJob will look in the following subdirectories
13
+ # to manage files. If the directories do not exist they will be created
14
+ # automatically at runtime.
15
+ #
16
+ # Files placed in the "inbox" directory will automatically detected and
17
+ # sent to a worker for processing.
18
+ #
19
+ # ./data/inbox
20
+ # ./data/processing
21
+ # ./data/processed
22
+ #
23
+ # === Logging
24
+ #
25
+ # By default the server and clients will use the Rails logging facility
26
+ # if deployed within a Rails application. Otherwise a logger will be
27
+ # created that logs to ./log/file_processing_job.log
28
+ #
29
+ # === Usage example
30
+ #
31
+ # The server monitors the inbox directory for new files and dispatches
32
+ # the contents of each file to the next available worker. The worker
33
+ # then processed the file or raises an exception and the server handles
34
+ # moving the file to either the processed or error directory. Any number
35
+ # of workers can exist anywhere on the network. Note that by default
36
+ # the server and client bind to 127.0.0.1
37
+ #
38
+ # require 'rubygems'
39
+ # require 'file-processing-job'
40
+ #
41
+ # Server:
42
+ #
43
+ # FileProcessingJob::start_server() {|config|
44
+ # config.inbox_directory = './data/inbox'
45
+ # config.processing_directory = './data/processing'
46
+ # config.processed_directory = './data/processed'
47
+ # }
48
+ #
49
+ # Client:
50
+ #
51
+ # class FileProcessor
52
+ # def receive_file data
53
+ # puts "received file data: #{data}"
54
+ # # do something interesting here
55
+ # end
56
+ # end
57
+ #
58
+ # FileProcessingJob::connect('127.0.0.1', 11222, FileProcessor)
59
+
60
+ def self.server_config
61
+ end
62
+
63
+ def self.stop_server
64
+
65
+ end
66
+
67
+ def self.start_server(host='127.0.0.1',port=11222,&block)
68
+ Server.config(&block) if block
69
+
70
+ # create the directories if they do not exist
71
+ Server.create_directories
72
+
73
+ # watch inbox directory
74
+ dw = DirectoryWatcher.new Server::config.inbox_directory, :glob => '*'
75
+ dw.add_observer {|*args|
76
+ args.each {|event|
77
+ if (event.type == :added)
78
+ filename = event.path.sub(Server::config.inbox_directory, '').sub(/^\//, '')
79
+ logger.debug "#{filename} discovered in inbox"
80
+ Server::Connection.push(filename)
81
+ end
82
+ }
83
+ }
84
+ dw.start
85
+
86
+ # run the server
87
+ EM::run {
88
+ EM::start_server host, port, Server::Connection
89
+ FileProcessingJob.logger.info "server started: #{host}:#{port}"
90
+ }
91
+ dw.stop
92
+ end
93
+
94
+ module Server
95
+ def self.create_directories
96
+ [config.inbox_directory, config.processing_directory, config.processed_directory, config.error_directory].each do |dir|
97
+ FileUtils.mkpath(dir) unless Dir.exists?(dir)
98
+ end
99
+ end
100
+
101
+ def self.config(&block)
102
+ @@config ||= Config.new()
103
+ block.call(@@config) if block
104
+ @@config
105
+ end
106
+
107
+ class Config
108
+ attr_writer :inbox_directory, :processed_directory, :processing_directory, :error_directory
109
+
110
+ def inbox_directory
111
+ @inbox_directory || './data/inbox'
112
+ end
113
+
114
+ def processed_directory
115
+ @processed_directory || './data/processed'
116
+ end
117
+
118
+ def processing_directory
119
+ @processing_directory || './data/processing'
120
+ end
121
+
122
+ def error_directory
123
+ @error_directory || './data/error'
124
+ end
125
+ end
126
+
127
+ class Connection < EM::Connection
128
+ @@q = EM::Queue.new()
129
+
130
+ def self.push(message)
131
+ @@q.push(message)
132
+ end
133
+
134
+ def config
135
+ @config ||= FileProcessingJob::Server.config
136
+ end
137
+
138
+ def logger
139
+ FileProcessingJob.logger
140
+ end
141
+
142
+ # inbound client connection
143
+ def receive_data data
144
+ data = data.strip
145
+ case (data)
146
+ when "next"
147
+ @@q.pop {|filename| send_data filename }
148
+ when /^get (.*)$/
149
+ filename = $1
150
+ streamer = EventMachine::FileStreamer.new(self, File.join(self.config.inbox_directory, filename))
151
+ streamer.callback{
152
+ FileUtils.move(File.join(self.config.inbox_directory, filename), File.join(self.config.processing_directory, filename))
153
+ }
154
+ # streamer.error {
155
+ # logger.error "Unable to stream file #{filename} to client. The file can be found in the 'error' directory"
156
+ # FileUtils.move(File.join(self.config.inbox_directory, filename), File.join(self.config.error_directory, filename))
157
+ # }
158
+ when /^processed (.*)$/
159
+ filename = $1
160
+ if (File.exists?(File.join(self.config.processed_directory, filename)))
161
+ logger.error "file #{filename} already exists in processed directory"
162
+ else
163
+ FileUtils.move(File.join(self.config.processing_directory, filename), File.join(self.config.processed_directory, filename))
164
+ logger.debug "finished processing #{filename}"
165
+ end
166
+ send_data "ack"
167
+ when /^error (.*)$/
168
+ filename = $1
169
+ FileUtils.move(File.join(self.config.processing_directory, filename), File.join(self.config.error_directory, filename))
170
+ logger.error "Unable to stream file #{filename} to client. The file can be found in the 'error' directory"
171
+ when "close"
172
+ close_connection
173
+ end
174
+ end
175
+
176
+ def bind
177
+ logger.info "client connected to the server"
178
+ end
179
+
180
+ def unbind
181
+ logger.info "client disconnected from the server"
182
+ end
183
+ end
184
+ end
185
+ end
186
+
@@ -0,0 +1,12 @@
1
+ $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__)))
2
+ $LOAD_PATH.unshift(File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib')))
3
+
4
+ require 'rubygems'
5
+ require 'aasm'
6
+ require 'file-processing-job'
7
+ require 'rspec'
8
+ require 'rspec/autorun'
9
+
10
+ Rspec.configure do |config|
11
+
12
+ end
@@ -0,0 +1,54 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec_helper'))
2
+
3
+ class ErrorRaisingProcessor
4
+ def receive_file data
5
+ raise "unknown error"
6
+ end
7
+ end
8
+
9
+ class SuccessfulProcessor
10
+ def receive_file data
11
+ end
12
+ end
13
+
14
+
15
+ describe FileProcessingJob::Client do
16
+ describe :prcoess_file do
17
+ before do
18
+ EventMachine.stub!(:send_data)
19
+ @connection = FileProcessingJob::Client::Connection.new(11222)
20
+ @filename = 'thefile.dat'
21
+ @connection.filename = @filename
22
+ end
23
+
24
+ describe "success" do
25
+ before do
26
+ @handler = SuccessfulProcessor
27
+ FileProcessingJob::Client::Connection.callback = @handler
28
+ end
29
+
30
+ it "should send back the success code" do
31
+ @connection.should_receive(:send_data).with("processed #{@filename}")
32
+ @connection.process_file
33
+ end
34
+ end
35
+
36
+ describe "exception raised" do
37
+ before do
38
+ @handler = ErrorRaisingProcessor
39
+ FileProcessingJob::Client::Connection.callback = @handler
40
+ end
41
+
42
+ it "should trap and log the exception" do
43
+ FileProcessingJob.logger.should_receive(:error).twice # once for the human friendly message, once with the stack dump
44
+ lambda { @connection.process_file }.should_not raise_error
45
+ end
46
+
47
+ it "should send back an error message" do
48
+ @connection.should_receive(:send_data).with("error #{@filename}")
49
+ @connection.process_file
50
+ end
51
+ end
52
+
53
+ end
54
+ end
@@ -0,0 +1,15 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec_helper'))
2
+
3
+ class Rails
4
+ def self.logger
5
+ "rails_logger"
6
+ end
7
+ end
8
+
9
+ describe FileProcessingJob do
10
+ describe "logging" do
11
+ it "should use the Rails logger" do
12
+ FileProcessingJob.logger.should eq('rails_logger')
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,41 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec_helper'))
2
+
3
+ describe FileProcessingJob::Server do
4
+ describe :config do
5
+ describe "defaults" do
6
+ it "should return ./data/inbox for the inbox directory" do
7
+ FileProcessingJob::Server::config.inbox_directory.should == './data/inbox'
8
+ end
9
+
10
+ it "should return ./data/processing for the processing directory" do
11
+ FileProcessingJob::Server::config.processing_directory.should == './data/processing'
12
+ end
13
+
14
+ it "should return ./data/processed for the processed directory" do
15
+ FileProcessingJob::Server::config.processed_directory.should == './data/processed'
16
+ end
17
+ end
18
+
19
+ describe "specified" do
20
+ before do
21
+ FileProcessingJob::Server::config do |config|
22
+ config.inbox_directory = './inbox'
23
+ config.processing_directory = './processing'
24
+ config.processed_directory = './processed'
25
+ end
26
+ end
27
+
28
+ it "should return ./data/processing for the processing directory" do
29
+ FileProcessingJob::Server::config.inbox_directory.should == './inbox'
30
+ end
31
+
32
+ it "should return ./data/processing for the processing directory" do
33
+ FileProcessingJob::Server::config.processing_directory.should == './processing'
34
+ end
35
+
36
+ it "should return ./data/processing for the processing directory" do
37
+ FileProcessingJob::Server::config.processed_directory.should == './processed'
38
+ end
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,48 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec_helper'))
2
+
3
+ describe FileProcessingJob::Server do
4
+ def mock_streamer
5
+ o = String.new
6
+ o.stub!(:callback).and_return nil
7
+ o.stub!(:error).and_return nil
8
+ o
9
+ end
10
+
11
+ describe :receive_data do
12
+ before do
13
+ @connection = FileProcessingJob::Server::Connection.new(11222)
14
+ @filename = 'thefile.dat'
15
+ @config = FileProcessingJob::Server.config
16
+ @qualified_filename = File.join(@config.inbox_directory, @filename)
17
+ end
18
+
19
+ describe "get" do
20
+ it "should attempt to stream the file" do
21
+ EventMachine::FileStreamer.should_receive(:new).with(@connection, @qualified_filename).and_return(mock_streamer)
22
+ @connection.receive_data("get #{@filename}")
23
+ end
24
+ end
25
+
26
+ describe "error" do
27
+ it "should move the file to the error directory" do
28
+ FileUtils.should_receive(:move).with(File.join(@config.processing_directory, @filename), File.join(@config.error_directory, @filename))
29
+ @connection.logger.should_receive(:error)
30
+ @connection.receive_data("error #{@filename}")
31
+ end
32
+ end
33
+ end
34
+
35
+ describe :create_directories do
36
+ it "should create the directories if they do not exist" do
37
+ Dir.should_receive(:exists?).exactly(4).times.with(an_instance_of(String)).and_return(false)
38
+ FileUtils.should_receive(:path).exactly(4).times.with(an_instance_of(String))
39
+ FileProcessingJob::Server.create_directories
40
+ end
41
+
42
+ it "should not attempt to create the directories if they exist" do
43
+ Dir.should_receive(:exists?).exactly(4).times.with(an_instance_of(String)).and_return(true)
44
+ FileUtils.should_not_receive(:mkpath)
45
+ FileProcessingJob::Server.create_directories
46
+ end
47
+ end
48
+ end
metadata ADDED
@@ -0,0 +1,124 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: file-processing-job
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 1
8
+ - 0
9
+ version: 0.1.0
10
+ platform: ruby
11
+ authors:
12
+ - cj2
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-09-09 00:00:00 -06:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: aasm
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - ~>
27
+ - !ruby/object:Gem::Version
28
+ segments:
29
+ - 2
30
+ - 1
31
+ version: "2.1"
32
+ type: :development
33
+ version_requirements: *id001
34
+ - !ruby/object:Gem::Dependency
35
+ name: directory_watcher
36
+ prerelease: false
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ~>
41
+ - !ruby/object:Gem::Version
42
+ segments:
43
+ - 1
44
+ - 3
45
+ version: "1.3"
46
+ type: :development
47
+ version_requirements: *id002
48
+ - !ruby/object:Gem::Dependency
49
+ name: eventmachine
50
+ prerelease: false
51
+ requirement: &id003 !ruby/object:Gem::Requirement
52
+ none: false
53
+ requirements:
54
+ - - ~>
55
+ - !ruby/object:Gem::Version
56
+ segments:
57
+ - 0
58
+ - 12
59
+ version: "0.12"
60
+ type: :development
61
+ version_requirements: *id003
62
+ description: file-processing-job allows you to distribute the processing load of large files to clients across the network. It is a thin wrapper on top of the EventMachine library.
63
+ email: cjgrimes@gmail.com
64
+ executables: []
65
+
66
+ extensions: []
67
+
68
+ extra_rdoc_files:
69
+ - LICENSE
70
+ - README.rdoc
71
+ files:
72
+ - README.rdoc
73
+ - Rakefile
74
+ - examples/test_client.rb
75
+ - examples/test_server.rb
76
+ - lib/file-processing-job.rb
77
+ - lib/fpj/client.rb
78
+ - lib/fpj/server.rb
79
+ - spec/spec_helper.rb
80
+ - spec/unit/client_spec.rb
81
+ - spec/unit/rails_support_spec.rb
82
+ - spec/unit/server_config_spec.rb
83
+ - spec/unit/server_spec.rb
84
+ - LICENSE
85
+ has_rdoc: true
86
+ homepage: http://github.com/cj2/file-processing-job
87
+ licenses: []
88
+
89
+ post_install_message:
90
+ rdoc_options:
91
+ - --charset=UTF-8
92
+ require_paths:
93
+ - lib
94
+ required_ruby_version: !ruby/object:Gem::Requirement
95
+ none: false
96
+ requirements:
97
+ - - ">="
98
+ - !ruby/object:Gem::Version
99
+ segments:
100
+ - 0
101
+ version: "0"
102
+ required_rubygems_version: !ruby/object:Gem::Requirement
103
+ none: false
104
+ requirements:
105
+ - - ">="
106
+ - !ruby/object:Gem::Version
107
+ segments:
108
+ - 0
109
+ version: "0"
110
+ requirements: []
111
+
112
+ rubyforge_project:
113
+ rubygems_version: 1.3.7
114
+ signing_key:
115
+ specification_version: 3
116
+ summary: Light-weight client / server implementation for processing disk based records
117
+ test_files:
118
+ - spec/spec_helper.rb
119
+ - spec/unit/client_spec.rb
120
+ - spec/unit/rails_support_spec.rb
121
+ - spec/unit/server_config_spec.rb
122
+ - spec/unit/server_spec.rb
123
+ - examples/test_client.rb
124
+ - examples/test_server.rb