turbot-runner 0.0.8 → 0.0.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ YzFlNTliNjE3MjU2NTExNzU2ZDA0OWJjZDcyZTE5ZmNlZWMyNWQ2ZQ==
5
+ data.tar.gz: !binary |-
6
+ MDhlOWNhZjNkMWJlZjVkOTdkMWI4MzYxZDc0NDFiOTNlNDJlNmQwYg==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ YzIzNjRmZWVjZjY1ZThkMTY3NmRkNzE3NDg2MjExYjFhMjRhMDZkMjBhMGE1
10
+ ZDk5OGE3ZWFkNDJhNDhjOTY4ODBjYWRkMjEzNjA1OTkzMjliYTc3YWI1ZDZi
11
+ OTBmMzEzM2I5OWYxZWQyYzQ2YTQyYTAwNjk2NWQxOGJmOWU1ZTM=
12
+ data.tar.gz: !binary |-
13
+ YjA3ZmYzOTA3ZTZhYzAwMGFhNjEzZmZjMzg2NGFhNjRhMTgwMTk3Yjc4ZmQw
14
+ MDkzNTZlNTEyZmZkZWYzMGUyMDJmODA2NDE0NjU3MWIyMWJjODI5Y2I1N2Yx
15
+ MjBkMDgzMDVjNzVhNzNiYjNkNDFjZGRhMzAzZjI5M2I1YzdmZDM=
@@ -1,3 +1,3 @@
1
1
  module TurbotRunner
2
- VERSION = '0.0.8'
2
+ VERSION = '0.0.9'
3
3
  end
data/lib/turbot_runner.rb CHANGED
@@ -31,8 +31,21 @@ module TurbotRunner
31
31
 
32
32
  command = "#{interpreter_for(scraper_file)} #{scraper_file}"
33
33
  data_type = @config['data_type']
34
+
35
+ scraper_runner = CommandRunner.new(command)
36
+
37
+ transformers.each do |config|
38
+ file = File.join(@bot_directory, config['file'])
39
+ command = "#{interpreter_for(file)} #{file}"
40
+ transformer_runner = CommandRunner.new(command)
41
+ config['runner'] = transformer_runner
42
+ end
43
+
34
44
  begin
35
- run_script_each_line(command) do |line|
45
+ until @interrupted do
46
+ line = scraper_runner.get_next_line
47
+ break if line.nil?
48
+
36
49
  begin
37
50
  record = JSON.parse(line)
38
51
  rescue JSON::ParserError
@@ -41,29 +54,30 @@ module TurbotRunner
41
54
  end
42
55
 
43
56
  errors = validate(record, data_type)
57
+
44
58
  if errors.empty?
45
59
  handle_valid_record(record, data_type)
46
60
 
47
61
  transformers.each do |transformer|
48
- file = File.join(@bot_directory, transformer['file'])
49
- command1 = "#{interpreter_for(file)} #{file}"
50
62
  data_type1 = transformer['data_type']
51
63
 
52
- run_script_each_line(command1, :input => line) do |line1|
53
- begin
54
- record1 = JSON.parse(line1)
55
- rescue JSON::ParserError
56
- handle_non_json_output(line1)
57
- next
58
- end
59
-
60
- errors = validate(record1, data_type1)
61
-
62
- if errors.empty?
63
- handle_valid_record(record1, data_type1)
64
- else
65
- handle_invalid_record(record1, data_type1, errors)
66
- end
64
+ runner = transformer['runner']
65
+ runner.send_line(line)
66
+ line1 = runner.get_next_line
67
+
68
+ begin
69
+ record1 = JSON.parse(line1)
70
+ rescue JSON::ParserError
71
+ handle_non_json_output(line1)
72
+ next
73
+ end
74
+
75
+ errors = validate(record1, data_type1)
76
+
77
+ if errors.empty?
78
+ handle_valid_record(record1, data_type1)
79
+ else
80
+ handle_invalid_record(record1, data_type1, errors)
67
81
  end
68
82
  end
69
83
  else
@@ -87,6 +101,11 @@ module TurbotRunner
87
101
  handle_failed_run
88
102
  end
89
103
  end
104
+ ensure
105
+ scraper_runner.close unless scraper_runner.nil?
106
+ transformers.each do |transformer|
107
+ transformer['runner'].close unless transformer['runner'].nil?
108
+ end
90
109
  end
91
110
 
92
111
  def successful?
@@ -146,32 +165,6 @@ module TurbotRunner
146
165
  raise NotImplementedError
147
166
  end
148
167
 
149
- def run_script_each_line(command, options={})
150
- # TODO: handle timeouts, errors
151
- Open3::popen2(command) do |stdin, stdout, wait_thread|
152
- @wait_thread = wait_thread
153
- if options[:input]
154
- stdin.puts(options[:input])
155
- stdin.close
156
- end
157
-
158
- timeout = options[:timeout] || 3600
159
-
160
- while !@interrupted do
161
- begin
162
- result = stdout.readline.strip
163
- yield result unless result.empty?
164
- rescue EOFError
165
- break
166
- end
167
- end
168
-
169
- if !wait_thread.value.success?
170
- raise ScriptError
171
- end
172
- end
173
- end
174
-
175
168
  def scraper_file
176
169
  candidates = Dir.glob(File.join(@bot_directory, 'scraper.{rb,py}'))
177
170
  case candidates.size
@@ -196,4 +189,33 @@ module TurbotRunner
196
189
  end
197
190
  end
198
191
  end
192
+
193
+ class CommandRunner
194
+ def initialize(command, opts={})
195
+ @command = command
196
+ @timeout = opts[:timeout] ||= 3600
197
+ @stdin, @stdout, @wait_thread = Open3.popen2(command)
198
+ end
199
+
200
+ def get_next_line
201
+ begin
202
+ Timeout::timeout(@timeout) { @stdout.gets }
203
+ rescue Timeout::Error
204
+ STDOUT.puts("#{@command} produced no output for #{@timeout} seconds")
205
+ raise ScriptError
206
+ rescue EOFError
207
+ raise ScriptError unless @wait_thread.value.success?
208
+ return nil
209
+ end
210
+ end
211
+
212
+ def send_line(line)
213
+ @stdin.puts(line)
214
+ end
215
+
216
+ def close
217
+ @stdin.close
218
+ @stdout.close
219
+ end
220
+ end
199
221
  end
@@ -3,9 +3,6 @@ require 'turbot_runner'
3
3
 
4
4
  describe TurbotRunner::BaseRunner do
5
5
  it 'can run a bot' do
6
- # This test runs slowly - there seems to be some delay in subprocesses
7
- # reading from their stdins, but this is not observed when the code is run
8
- # outside of rspec.
9
6
 
10
7
  class SpecRunner < TurbotRunner::BaseRunner
11
8
  def validate(record, data_type)
metadata CHANGED
@@ -1,20 +1,18 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: turbot-runner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
5
- prerelease:
4
+ version: 0.0.9
6
5
  platform: ruby
7
6
  authors:
8
7
  - OpenCorporates
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2014-07-08 00:00:00.000000000 Z
11
+ date: 2014-07-09 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: json-schema
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
17
  - - '='
20
18
  - !ruby/object:Gem::Version
@@ -22,7 +20,6 @@ dependencies:
22
20
  type: :runtime
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
24
  - - '='
28
25
  - !ruby/object:Gem::Version
@@ -37,10 +34,6 @@ files:
37
34
  - lib/prerun.rb
38
35
  - lib/turbot_runner.rb
39
36
  - lib/turbot_runner/version.rb
40
- - spec/dummy-bot/manifest.json
41
- - spec/dummy-bot/scraper.rb
42
- - spec/dummy-bot/transformer.rb
43
- - spec/turbot_runner_spec.rb
44
37
  - schema/schemas/company-schema.json
45
38
  - schema/schemas/includes/address.json
46
39
  - schema/schemas/includes/base-statement.json
@@ -58,29 +51,33 @@ files:
58
51
  - schema/schemas/share-parcel-schema.json
59
52
  - schema/schemas/simple-licence-schema.json
60
53
  - schema/schemas/subsidiary-relationship-schema.json
54
+ - spec/dummy-bot/manifest.json
55
+ - spec/dummy-bot/scraper.rb
56
+ - spec/dummy-bot/transformer.rb
57
+ - spec/turbot_runner_spec.rb
61
58
  homepage: http://turbot.opencorporates.com/
62
59
  licenses:
63
60
  - MIT
61
+ metadata: {}
64
62
  post_install_message:
65
63
  rdoc_options: []
66
64
  require_paths:
67
65
  - lib
68
66
  required_ruby_version: !ruby/object:Gem::Requirement
69
- none: false
70
67
  requirements:
71
68
  - - ! '>='
72
69
  - !ruby/object:Gem::Version
73
70
  version: 1.9.2
74
71
  required_rubygems_version: !ruby/object:Gem::Requirement
75
- none: false
76
72
  requirements:
77
73
  - - ! '>='
78
74
  - !ruby/object:Gem::Version
79
75
  version: '0'
80
76
  requirements: []
81
77
  rubyforge_project:
82
- rubygems_version: 1.8.23
78
+ rubygems_version: 2.2.2
83
79
  signing_key:
84
- specification_version: 3
80
+ specification_version: 4
85
81
  summary: Utilities for running bots with Turbot
86
82
  test_files: []
83
+ has_rdoc: