file-processing-job 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -11,7 +11,6 @@ begin
11
11
  gem.homepage = "http://github.com/cj2/file-processing-job"
12
12
  gem.authors = ["cj2"]
13
13
  gem.files = FileList["[A-Z]*.*", "{examples,lib,features,spec}/**/*", "Rakefile", "cucumber.yml"]
14
- gem.add_development_dependency "aasm", "~> 2.1"
15
14
  gem.add_development_dependency "directory_watcher", "~> 1.3"
16
15
  gem.add_development_dependency "eventmachine", "~> 0.12"
17
16
 
@@ -1,3 +1,4 @@
1
+ require 'rubygems'
1
2
  require 'file-processing-job'
2
3
 
3
4
  class FileProcessor
@@ -1,3 +1,4 @@
1
+ require 'rubygems'
1
2
  require 'file-processing-job'
2
3
 
3
4
  FileProcessingJob::start_server() {|config|
@@ -1,10 +1,10 @@
1
1
  $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__))) unless $LOAD_PATH.include?(File.expand_path(File.dirname(__FILE__)))
2
2
 
3
3
  require 'eventmachine'
4
- require 'aasm'
5
4
  require 'directory_watcher'
6
5
 
7
6
  module FileProcessingJob
7
+
8
8
  def self.logger
9
9
  @@logger ||= begin
10
10
  if (const_defined?('Rails'))
@@ -18,6 +18,7 @@ module FileProcessingJob
18
18
  end
19
19
  end
20
20
 
21
+ require 'fpj/connection'
21
22
  require 'fpj/server'
22
23
  require 'fpj/client'
23
24
 
data/lib/fpj/client.rb CHANGED
@@ -3,11 +3,17 @@
3
3
 
4
4
  module FileProcessingJob
5
5
  def self.connect(host='127.0.0.1',port=11222,callback)
6
- EM::run {
6
+ @@running = true
7
+ while (@@running)
7
8
  client_connection = Client::Connection
8
9
  client_connection.callback = callback
9
- EventMachine::connect host, port, client_connection
10
- }
10
+ EM::run { EventMachine::connect host, port, client_connection }
11
+ end
12
+ end
13
+
14
+ def self.disconnect
15
+ @@running = false
16
+ EventMachine::stop_event_loop
11
17
  end
12
18
 
13
19
  module Client
@@ -15,60 +21,31 @@ module FileProcessingJob
15
21
  attr_accessor :parse_file
16
22
  end
17
23
 
18
- class Connection < EM::Connection
19
- include AASM
20
-
24
+ class Connection < FileProcessingJob::Connection
21
25
  class << self
22
26
  attr_accessor :callback
23
27
  end
24
28
 
25
- attr_accessor :filename, :data
26
-
27
- aasm_state :requesting_task
28
- aasm_state :downloading, :enter => :download_file
29
- aasm_state :processing, :enter => :process_file
30
- aasm_state :complete, :exit => :request_task
31
-
32
- aasm_event :filename_received do
33
- transitions :to => :downloading, :from => [:requesting_task]
34
- end
35
-
36
- aasm_event :file_received do
37
- transitions :to => :processing, :from => [:downloading]
38
- end
39
-
40
- aasm_event :file_processed do
41
- transitions :to => :complete, :from => [:processing]
42
- end
43
-
44
- aasm_event :next_task do
45
- transitions :to => :requesting_task, :from => [:complete]
46
- end
47
-
48
29
  def logger
49
30
  FileProcessingJob.logger
50
31
  end
51
-
52
-
32
+
53
33
  def request_task
54
- self.filename = nil
55
- self.data = nil
56
- send_data "next"
34
+ send_message('next')
57
35
  end
58
36
 
59
- def download_file
60
- self.filename = self.data
61
- send_data "get #{self.filename}"
37
+ def error(filename)
38
+ send_message "error #{filename}"
62
39
  end
63
40
 
64
- def process_file
41
+ def process(filename, contents)
65
42
  begin
66
- self.class.callback.new.send(:receive_file, self.filename, self.data)
67
- send_data "processed #{self.filename}"
43
+ self.class.callback.new.send(:receive_file, filename, contents)
44
+ send_message "processed #{filename}"
68
45
  rescue
69
- logger.error("Error processing #{self.filename}. The file can be found on the server in the 'error' directory")
46
+ logger.error("Error processing #{filename}. The file can be found on the server in the 'error' directory")
70
47
  logger.error($!)
71
- send_data "error #{self.filename}"
48
+ error(filename)
72
49
  end
73
50
  end
74
51
 
@@ -76,17 +53,21 @@ module FileProcessingJob
76
53
  request_task
77
54
  end
78
55
 
79
- def receive_data(data)
80
- self.data = data
81
- case aasm_current_state
82
- when :requesting_task
83
- filename_received
84
- when :downloading
85
- file_received
86
- when :processing
87
- file_processed
88
- next_task
89
- end
56
+ def unbind
57
+ EventMachine::stop_event_loop
58
+ end
59
+
60
+ def receive_task(data)
61
+ if (data =~ /^filename: (.*)/)
62
+ @filename = $1
63
+ else
64
+ @contents = data
65
+ end
66
+
67
+ if (@filename && @contents)
68
+ logger.info "Processing #{@filename}"
69
+ process(@filename, @contents)
70
+ end
90
71
  end
91
72
  end
92
73
  end
@@ -0,0 +1,25 @@
1
+ module FileProcessingJob
2
+ class Connection < EM::Connection
3
+ TOKEN = "_-|fpj|-_"
4
+
5
+ def initialize(*args)
6
+ super
7
+ @recv_buffer = BufferedTokenizer.new(TOKEN)
8
+ # set_comm_inactivity_timeout(120)
9
+ end
10
+
11
+ def receive_data(data)
12
+ @recv_buffer.extract(data).each do |msg|
13
+ receive_task(msg)
14
+ end
15
+ end
16
+
17
+ def send_message(data)
18
+ send_data "#{data}#{TOKEN}"
19
+ end
20
+
21
+ def receive_request
22
+ raise "receive_task not overridden by #{self.class.name}"
23
+ end
24
+ end
25
+ end
data/lib/fpj/server.rb CHANGED
@@ -2,6 +2,7 @@
2
2
  # go to eventmachine forum - see if anyone has already done this
3
3
 
4
4
  require 'fileutils'
5
+ require 'timeout'
5
6
 
6
7
  module FileProcessingJob
7
8
  # FileProcessingJob::start_server creates an event machine server
@@ -70,21 +71,31 @@ module FileProcessingJob
70
71
  # create the directories if they do not exist
71
72
  Server.create_directories
72
73
 
73
- # watch inbox directory
74
- dw = DirectoryWatcher.new Server::config.inbox_directory, :glob => '*'
74
+ # manually load existing files in case there is a large backlog
75
+ # the directory_watcher interface does not handle large volumes
76
+ # well on startup
77
+ Dir.entries(Server::config.inbox_directory).each do |filename|
78
+ unless (File.directory?(File.join(Server::config.inbox_directory, filename)))
79
+ Server::Connection.push(filename) unless filename =~ /^\./
80
+ end
81
+ end
82
+
83
+ # watch the inbox directory for additions
84
+ dw = DirectoryWatcher.new Server::config.inbox_directory, :glob => '**.*', :pre_load => true
75
85
  dw.add_observer {|*args|
76
86
  args.each {|event|
77
87
  if (event.type == :added)
78
88
  filename = event.path.sub(Server::config.inbox_directory, '').sub(/^\//, '')
79
- logger.debug "#{filename} discovered in inbox"
80
89
  Server::Connection.push(filename)
81
90
  end
82
91
  }
83
92
  }
84
93
  dw.start
85
-
94
+
86
95
  # run the server
87
96
  EM::run {
97
+
98
+ # start the server
88
99
  EM::start_server host, port, Server::Connection
89
100
  FileProcessingJob.logger.info "server started: #{host}:#{port}"
90
101
  }
@@ -124,10 +135,11 @@ module FileProcessingJob
124
135
  end
125
136
  end
126
137
 
127
- class Connection < EM::Connection
138
+ class Connection < FileProcessingJob::Connection
128
139
  @@q = EM::Queue.new()
129
-
140
+
130
141
  def self.push(message)
142
+ FileProcessingJob.logger.debug "#{message} discovered in inbox"
131
143
  @@q.push(message)
132
144
  end
133
145
 
@@ -138,39 +150,44 @@ module FileProcessingJob
138
150
  def logger
139
151
  FileProcessingJob.logger
140
152
  end
153
+
154
+ def send_file(filename)
155
+ logger.debug "Sending #{filename} to client"
156
+
157
+ send_message("filename: #{filename}")
158
+
159
+ FileUtils.move(File.join(self.config.inbox_directory, filename), File.join(self.config.processing_directory, filename))
160
+ streamer = EventMachine::FileStreamer.new(self, File.join(self.config.processing_directory, filename))
161
+ streamer.callback{
162
+ send_data TOKEN
163
+ }
164
+ end
141
165
 
142
166
  # inbound client connection
143
- def receive_data data
167
+ def receive_task data
144
168
  data = data.strip
169
+ logger.debug("received command: #{data}")
145
170
  case (data)
146
- when "next"
147
- @@q.pop {|filename| send_data filename }
148
- when /^get (.*)$/
149
- filename = $1
150
- streamer = EventMachine::FileStreamer.new(self, File.join(self.config.inbox_directory, filename))
151
- streamer.callback{
152
- FileUtils.move(File.join(self.config.inbox_directory, filename), File.join(self.config.processing_directory, filename))
153
- }
154
- # streamer.error {
155
- # logger.error "Unable to stream file #{filename} to client. The file can be found in the 'error' directory"
156
- # FileUtils.move(File.join(self.config.inbox_directory, filename), File.join(self.config.error_directory, filename))
157
- # }
158
- when /^processed (.*)$/
159
- filename = $1
160
- if (File.exists?(File.join(self.config.processed_directory, filename)))
161
- logger.error "file #{filename} already exists in processed directory"
171
+ when "next"
172
+ @@q.pop {|filename| send_file(filename) }
173
+ when /^processed (.*)$/
174
+ filename = $1
175
+ logger.debug("finished processing #{filename}")
176
+ if (File.exists?(File.join(self.config.processed_directory, filename)))
177
+ logger.error "file #{filename} already exists in processed directory"
178
+ else
179
+ FileUtils.move(File.join(self.config.processing_directory, filename), File.join(self.config.processed_directory, filename))
180
+ logger.debug "finished processing #{filename}"
181
+ end
182
+ close_connection
183
+ when /^error (.*)$/
184
+ filename = $1
185
+ FileUtils.move(File.join(self.config.processing_directory, filename), File.join(self.config.error_directory, filename))
186
+ logger.error "Unable to stream file #{filename} to client. The file can be found in the 'error' directory"
187
+ close_connection
162
188
  else
163
- FileUtils.move(File.join(self.config.processing_directory, filename), File.join(self.config.processed_directory, filename))
164
- logger.debug "finished processing #{filename}"
189
+ puts "unknown command: '#{data}'"
165
190
  end
166
- send_data "ack"
167
- when /^error (.*)$/
168
- filename = $1
169
- FileUtils.move(File.join(self.config.processing_directory, filename), File.join(self.config.error_directory, filename))
170
- logger.error "Unable to stream file #{filename} to client. The file can be found in the 'error' directory"
171
- when "close"
172
- close_connection
173
- end
174
191
  end
175
192
 
176
193
  def bind
data/spec/spec_helper.rb CHANGED
@@ -2,7 +2,6 @@ $LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__)))
2
2
  $LOAD_PATH.unshift(File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib')))
3
3
 
4
4
  require 'rubygems'
5
- require 'aasm'
6
5
  require 'file-processing-job'
7
6
  require 'rspec'
8
7
  require 'rspec/autorun'
@@ -1,13 +1,13 @@
1
1
  require File.expand_path(File.join(File.dirname(__FILE__), '..', 'spec_helper'))
2
2
 
3
3
  class ErrorRaisingProcessor
4
- def receive_file data
4
+ def receive_file(filename, contents)
5
5
  raise "unknown error"
6
6
  end
7
7
  end
8
8
 
9
9
  class SuccessfulProcessor
10
- def receive_file data
10
+ def receive_file(filename, contents)
11
11
  end
12
12
  end
13
13
 
@@ -18,7 +18,6 @@ describe FileProcessingJob::Client do
18
18
  EventMachine.stub!(:send_data)
19
19
  @connection = FileProcessingJob::Client::Connection.new(11222)
20
20
  @filename = 'thefile.dat'
21
- @connection.filename = @filename
22
21
  end
23
22
 
24
23
  describe "success" do
@@ -28,8 +27,8 @@ describe FileProcessingJob::Client do
28
27
  end
29
28
 
30
29
  it "should send back the success code" do
31
- @connection.should_receive(:send_data).with("processed #{@filename}")
32
- @connection.process_file
30
+ @connection.should_receive(:send_message).with("processed #{@filename}")
31
+ @connection.process(@filename, "ab")
33
32
  end
34
33
  end
35
34
 
@@ -41,12 +40,12 @@ describe FileProcessingJob::Client do
41
40
 
42
41
  it "should trap and log the exception" do
43
42
  FileProcessingJob.logger.should_receive(:error).twice # once for the human friendly message, once with the stack dump
44
- lambda { @connection.process_file }.should_not raise_error
43
+ lambda { @connection.process(@filename, "ab") }.should_not raise_error
45
44
  end
46
45
 
47
46
  it "should send back an error message" do
48
- @connection.should_receive(:send_data).with("error #{@filename}")
49
- @connection.process_file
47
+ @connection.should_receive(:send_message).with("error #{@filename}")
48
+ @connection.process(@filename, "ab")
50
49
  end
51
50
  end
52
51
 
@@ -16,10 +16,13 @@ describe FileProcessingJob::Server do
16
16
  @qualified_filename = File.join(@config.inbox_directory, @filename)
17
17
  end
18
18
 
19
- describe "get" do
20
- it "should attempt to stream the file" do
21
- EventMachine::FileStreamer.should_receive(:new).with(@connection, @qualified_filename).and_return(mock_streamer)
22
- @connection.receive_data("get #{@filename}")
19
+ describe :send_file do
20
+ it "should attempt to send the file" do
21
+ File.should_receive(:read).and_return("ab")
22
+ @connection.should_receive(:send_message).with("filename: thefile.dat, size: 2, contents: ab")
23
+ FileUtils.should_receive(:move).with(File.join(@config.inbox_directory, @filename), File.join(@config.processing_directory, @filename))
24
+
25
+ @connection.send_file("thefile.dat")
23
26
  end
24
27
  end
25
28
 
@@ -27,7 +30,7 @@ describe FileProcessingJob::Server do
27
30
  it "should move the file to the error directory" do
28
31
  FileUtils.should_receive(:move).with(File.join(@config.processing_directory, @filename), File.join(@config.error_directory, @filename))
29
32
  @connection.logger.should_receive(:error)
30
- @connection.receive_data("error #{@filename}")
33
+ @connection.receive_task("error #{@filename}")
31
34
  end
32
35
  end
33
36
  end
@@ -35,7 +38,6 @@ describe FileProcessingJob::Server do
35
38
  describe :create_directories do
36
39
  it "should create the directories if they do not exist" do
37
40
  Dir.should_receive(:exists?).exactly(4).times.with(an_instance_of(String)).and_return(false)
38
- FileUtils.should_receive(:path).exactly(4).times.with(an_instance_of(String))
39
41
  FileProcessingJob::Server.create_directories
40
42
  end
41
43
 
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 1
8
- - 1
9
- version: 0.1.1
8
+ - 2
9
+ version: 0.1.2
10
10
  platform: ruby
11
11
  authors:
12
12
  - cj2
@@ -17,24 +17,10 @@ cert_chain: []
17
17
  date: 2010-09-09 00:00:00 -06:00
18
18
  default_executable:
19
19
  dependencies:
20
- - !ruby/object:Gem::Dependency
21
- name: aasm
22
- prerelease: false
23
- requirement: &id001 !ruby/object:Gem::Requirement
24
- none: false
25
- requirements:
26
- - - ~>
27
- - !ruby/object:Gem::Version
28
- segments:
29
- - 2
30
- - 1
31
- version: "2.1"
32
- type: :development
33
- version_requirements: *id001
34
20
  - !ruby/object:Gem::Dependency
35
21
  name: directory_watcher
36
22
  prerelease: false
37
- requirement: &id002 !ruby/object:Gem::Requirement
23
+ requirement: &id001 !ruby/object:Gem::Requirement
38
24
  none: false
39
25
  requirements:
40
26
  - - ~>
@@ -44,11 +30,11 @@ dependencies:
44
30
  - 3
45
31
  version: "1.3"
46
32
  type: :development
47
- version_requirements: *id002
33
+ version_requirements: *id001
48
34
  - !ruby/object:Gem::Dependency
49
35
  name: eventmachine
50
36
  prerelease: false
51
- requirement: &id003 !ruby/object:Gem::Requirement
37
+ requirement: &id002 !ruby/object:Gem::Requirement
52
38
  none: false
53
39
  requirements:
54
40
  - - ~>
@@ -58,7 +44,7 @@ dependencies:
58
44
  - 12
59
45
  version: "0.12"
60
46
  type: :development
61
- version_requirements: *id003
47
+ version_requirements: *id002
62
48
  description: file-processing-job allows you to distribute the processing load of large files to clients across the network. It is a thin wrapper on top of the EventMachine library.
63
49
  email: cjgrimes@gmail.com
64
50
  executables: []
@@ -75,6 +61,7 @@ files:
75
61
  - examples/test_server.rb
76
62
  - lib/file-processing-job.rb
77
63
  - lib/fpj/client.rb
64
+ - lib/fpj/connection.rb
78
65
  - lib/fpj/server.rb
79
66
  - spec/spec_helper.rb
80
67
  - spec/unit/client_spec.rb