turbot-runner 0.0.8 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ YzFlNTliNjE3MjU2NTExNzU2ZDA0OWJjZDcyZTE5ZmNlZWMyNWQ2ZQ==
5
+ data.tar.gz: !binary |-
6
+ MDhlOWNhZjNkMWJlZjVkOTdkMWI4MzYxZDc0NDFiOTNlNDJlNmQwYg==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ YzIzNjRmZWVjZjY1ZThkMTY3NmRkNzE3NDg2MjExYjFhMjRhMDZkMjBhMGE1
10
+ ZDk5OGE3ZWFkNDJhNDhjOTY4ODBjYWRkMjEzNjA1OTkzMjliYTc3YWI1ZDZi
11
+ OTBmMzEzM2I5OWYxZWQyYzQ2YTQyYTAwNjk2NWQxOGJmOWU1ZTM=
12
+ data.tar.gz: !binary |-
13
+ YjA3ZmYzOTA3ZTZhYzAwMGFhNjEzZmZjMzg2NGFhNjRhMTgwMTk3Yjc4ZmQw
14
+ MDkzNTZlNTEyZmZkZWYzMGUyMDJmODA2NDE0NjU3MWIyMWJjODI5Y2I1N2Yx
15
+ MjBkMDgzMDVjNzVhNzNiYjNkNDFjZGRhMzAzZjI5M2I1YzdmZDM=
@@ -1,3 +1,3 @@
1
1
  module TurbotRunner
2
- VERSION = '0.0.8'
2
+ VERSION = '0.0.9'
3
3
  end
data/lib/turbot_runner.rb CHANGED
@@ -31,8 +31,21 @@ module TurbotRunner
31
31
 
32
32
  command = "#{interpreter_for(scraper_file)} #{scraper_file}"
33
33
  data_type = @config['data_type']
34
+
35
+ scraper_runner = CommandRunner.new(command)
36
+
37
+ transformers.each do |config|
38
+ file = File.join(@bot_directory, config['file'])
39
+ command = "#{interpreter_for(file)} #{file}"
40
+ transformer_runner = CommandRunner.new(command)
41
+ config['runner'] = transformer_runner
42
+ end
43
+
34
44
  begin
35
- run_script_each_line(command) do |line|
45
+ until @interrupted do
46
+ line = scraper_runner.get_next_line
47
+ break if line.nil?
48
+
36
49
  begin
37
50
  record = JSON.parse(line)
38
51
  rescue JSON::ParserError
@@ -41,29 +54,30 @@ module TurbotRunner
41
54
  end
42
55
 
43
56
  errors = validate(record, data_type)
57
+
44
58
  if errors.empty?
45
59
  handle_valid_record(record, data_type)
46
60
 
47
61
  transformers.each do |transformer|
48
- file = File.join(@bot_directory, transformer['file'])
49
- command1 = "#{interpreter_for(file)} #{file}"
50
62
  data_type1 = transformer['data_type']
51
63
 
52
- run_script_each_line(command1, :input => line) do |line1|
53
- begin
54
- record1 = JSON.parse(line1)
55
- rescue JSON::ParserError
56
- handle_non_json_output(line1)
57
- next
58
- end
59
-
60
- errors = validate(record1, data_type1)
61
-
62
- if errors.empty?
63
- handle_valid_record(record1, data_type1)
64
- else
65
- handle_invalid_record(record1, data_type1, errors)
66
- end
64
+ runner = transformer['runner']
65
+ runner.send_line(line)
66
+ line1 = runner.get_next_line
67
+
68
+ begin
69
+ record1 = JSON.parse(line1)
70
+ rescue JSON::ParserError
71
+ handle_non_json_output(line1)
72
+ next
73
+ end
74
+
75
+ errors = validate(record1, data_type1)
76
+
77
+ if errors.empty?
78
+ handle_valid_record(record1, data_type1)
79
+ else
80
+ handle_invalid_record(record1, data_type1, errors)
67
81
  end
68
82
  end
69
83
  else
@@ -87,6 +101,11 @@ module TurbotRunner
87
101
  handle_failed_run
88
102
  end
89
103
  end
104
+ ensure
105
+ scraper_runner.close unless scraper_runner.nil?
106
+ transformers.each do |transformer|
107
+ transformer['runner'].close unless transformer['runner'].nil?
108
+ end
90
109
  end
91
110
 
92
111
  def successful?
@@ -146,32 +165,6 @@ module TurbotRunner
146
165
  raise NotImplementedError
147
166
  end
148
167
 
149
- def run_script_each_line(command, options={})
150
- # TODO: handle timeouts, errors
151
- Open3::popen2(command) do |stdin, stdout, wait_thread|
152
- @wait_thread = wait_thread
153
- if options[:input]
154
- stdin.puts(options[:input])
155
- stdin.close
156
- end
157
-
158
- timeout = options[:timeout] || 3600
159
-
160
- while !@interrupted do
161
- begin
162
- result = stdout.readline.strip
163
- yield result unless result.empty?
164
- rescue EOFError
165
- break
166
- end
167
- end
168
-
169
- if !wait_thread.value.success?
170
- raise ScriptError
171
- end
172
- end
173
- end
174
-
175
168
  def scraper_file
176
169
  candidates = Dir.glob(File.join(@bot_directory, 'scraper.{rb,py}'))
177
170
  case candidates.size
@@ -196,4 +189,33 @@ module TurbotRunner
196
189
  end
197
190
  end
198
191
  end
192
+
193
+ class CommandRunner
194
+ def initialize(command, opts={})
195
+ @command = command
196
+ @timeout = opts[:timeout] ||= 3600
197
+ @stdin, @stdout, @wait_thread = Open3.popen2(command)
198
+ end
199
+
200
+ def get_next_line
201
+ begin
202
+ Timeout::timeout(@timeout) { @stdout.gets }
203
+ rescue Timeout::Error
204
+ STDOUT.puts("#{@command} produced no output for #{@timeout} seconds")
205
+ raise ScriptError
206
+ rescue EOFError
207
+ raise ScriptError unless @wait_thread.value.success?
208
+ return nil
209
+ end
210
+ end
211
+
212
+ def send_line(line)
213
+ @stdin.puts(line)
214
+ end
215
+
216
+ def close
217
+ @stdin.close
218
+ @stdout.close
219
+ end
220
+ end
199
221
  end
@@ -3,9 +3,6 @@ require 'turbot_runner'
3
3
 
4
4
  describe TurbotRunner::BaseRunner do
5
5
  it 'can run a bot' do
6
- # This test runs slowly - there seems to be some delay in subprocesses
7
- # reading from their stdins, but this is not observed when the code is run
8
- # outside of rspec.
9
6
 
10
7
  class SpecRunner < TurbotRunner::BaseRunner
11
8
  def validate(record, data_type)
metadata CHANGED
@@ -1,20 +1,18 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: turbot-runner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.8
5
- prerelease:
4
+ version: 0.0.9
6
5
  platform: ruby
7
6
  authors:
8
7
  - OpenCorporates
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2014-07-08 00:00:00.000000000 Z
11
+ date: 2014-07-09 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: json-schema
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
17
  - - '='
20
18
  - !ruby/object:Gem::Version
@@ -22,7 +20,6 @@ dependencies:
22
20
  type: :runtime
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
24
  - - '='
28
25
  - !ruby/object:Gem::Version
@@ -37,10 +34,6 @@ files:
37
34
  - lib/prerun.rb
38
35
  - lib/turbot_runner.rb
39
36
  - lib/turbot_runner/version.rb
40
- - spec/dummy-bot/manifest.json
41
- - spec/dummy-bot/scraper.rb
42
- - spec/dummy-bot/transformer.rb
43
- - spec/turbot_runner_spec.rb
44
37
  - schema/schemas/company-schema.json
45
38
  - schema/schemas/includes/address.json
46
39
  - schema/schemas/includes/base-statement.json
@@ -58,29 +51,33 @@ files:
58
51
  - schema/schemas/share-parcel-schema.json
59
52
  - schema/schemas/simple-licence-schema.json
60
53
  - schema/schemas/subsidiary-relationship-schema.json
54
+ - spec/dummy-bot/manifest.json
55
+ - spec/dummy-bot/scraper.rb
56
+ - spec/dummy-bot/transformer.rb
57
+ - spec/turbot_runner_spec.rb
61
58
  homepage: http://turbot.opencorporates.com/
62
59
  licenses:
63
60
  - MIT
61
+ metadata: {}
64
62
  post_install_message:
65
63
  rdoc_options: []
66
64
  require_paths:
67
65
  - lib
68
66
  required_ruby_version: !ruby/object:Gem::Requirement
69
- none: false
70
67
  requirements:
71
68
  - - ! '>='
72
69
  - !ruby/object:Gem::Version
73
70
  version: 1.9.2
74
71
  required_rubygems_version: !ruby/object:Gem::Requirement
75
- none: false
76
72
  requirements:
77
73
  - - ! '>='
78
74
  - !ruby/object:Gem::Version
79
75
  version: '0'
80
76
  requirements: []
81
77
  rubyforge_project:
82
- rubygems_version: 1.8.23
78
+ rubygems_version: 2.2.2
83
79
  signing_key:
84
- specification_version: 3
80
+ specification_version: 4
85
81
  summary: Utilities for running bots with Turbot
86
82
  test_files: []
83
+ has_rdoc: