turbot-runner 0.0.8 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/lib/turbot_runner/version.rb +1 -1
- data/lib/turbot_runner.rb +66 -44
- data/spec/turbot_runner_spec.rb +0 -3
- metadata +10 -13
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
YzFlNTliNjE3MjU2NTExNzU2ZDA0OWJjZDcyZTE5ZmNlZWMyNWQ2ZQ==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
MDhlOWNhZjNkMWJlZjVkOTdkMWI4MzYxZDc0NDFiOTNlNDJlNmQwYg==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
YzIzNjRmZWVjZjY1ZThkMTY3NmRkNzE3NDg2MjExYjFhMjRhMDZkMjBhMGE1
|
10
|
+
ZDk5OGE3ZWFkNDJhNDhjOTY4ODBjYWRkMjEzNjA1OTkzMjliYTc3YWI1ZDZi
|
11
|
+
OTBmMzEzM2I5OWYxZWQyYzQ2YTQyYTAwNjk2NWQxOGJmOWU1ZTM=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
YjA3ZmYzOTA3ZTZhYzAwMGFhNjEzZmZjMzg2NGFhNjRhMTgwMTk3Yjc4ZmQw
|
14
|
+
MDkzNTZlNTEyZmZkZWYzMGUyMDJmODA2NDE0NjU3MWIyMWJjODI5Y2I1N2Yx
|
15
|
+
MjBkMDgzMDVjNzVhNzNiYjNkNDFjZGRhMzAzZjI5M2I1YzdmZDM=
|
data/lib/turbot_runner.rb
CHANGED
@@ -31,8 +31,21 @@ module TurbotRunner
|
|
31
31
|
|
32
32
|
command = "#{interpreter_for(scraper_file)} #{scraper_file}"
|
33
33
|
data_type = @config['data_type']
|
34
|
+
|
35
|
+
scraper_runner = CommandRunner.new(command)
|
36
|
+
|
37
|
+
transformers.each do |config|
|
38
|
+
file = File.join(@bot_directory, config['file'])
|
39
|
+
command = "#{interpreter_for(file)} #{file}"
|
40
|
+
transformer_runner = CommandRunner.new(command)
|
41
|
+
config['runner'] = transformer_runner
|
42
|
+
end
|
43
|
+
|
34
44
|
begin
|
35
|
-
|
45
|
+
until @interrupted do
|
46
|
+
line = scraper_runner.get_next_line
|
47
|
+
break if line.nil?
|
48
|
+
|
36
49
|
begin
|
37
50
|
record = JSON.parse(line)
|
38
51
|
rescue JSON::ParserError
|
@@ -41,29 +54,30 @@ module TurbotRunner
|
|
41
54
|
end
|
42
55
|
|
43
56
|
errors = validate(record, data_type)
|
57
|
+
|
44
58
|
if errors.empty?
|
45
59
|
handle_valid_record(record, data_type)
|
46
60
|
|
47
61
|
transformers.each do |transformer|
|
48
|
-
file = File.join(@bot_directory, transformer['file'])
|
49
|
-
command1 = "#{interpreter_for(file)} #{file}"
|
50
62
|
data_type1 = transformer['data_type']
|
51
63
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
64
|
+
runner = transformer['runner']
|
65
|
+
runner.send_line(line)
|
66
|
+
line1 = runner.get_next_line
|
67
|
+
|
68
|
+
begin
|
69
|
+
record1 = JSON.parse(line1)
|
70
|
+
rescue JSON::ParserError
|
71
|
+
handle_non_json_output(line1)
|
72
|
+
next
|
73
|
+
end
|
74
|
+
|
75
|
+
errors = validate(record1, data_type1)
|
76
|
+
|
77
|
+
if errors.empty?
|
78
|
+
handle_valid_record(record1, data_type1)
|
79
|
+
else
|
80
|
+
handle_invalid_record(record1, data_type1, errors)
|
67
81
|
end
|
68
82
|
end
|
69
83
|
else
|
@@ -87,6 +101,11 @@ module TurbotRunner
|
|
87
101
|
handle_failed_run
|
88
102
|
end
|
89
103
|
end
|
104
|
+
ensure
|
105
|
+
scraper_runner.close unless scraper_runner.nil?
|
106
|
+
transformers.each do |transformer|
|
107
|
+
transformer['runner'].close unless transformer['runner'].nil?
|
108
|
+
end
|
90
109
|
end
|
91
110
|
|
92
111
|
def successful?
|
@@ -146,32 +165,6 @@ module TurbotRunner
|
|
146
165
|
raise NotImplementedError
|
147
166
|
end
|
148
167
|
|
149
|
-
def run_script_each_line(command, options={})
|
150
|
-
# TODO: handle timeouts, errors
|
151
|
-
Open3::popen2(command) do |stdin, stdout, wait_thread|
|
152
|
-
@wait_thread = wait_thread
|
153
|
-
if options[:input]
|
154
|
-
stdin.puts(options[:input])
|
155
|
-
stdin.close
|
156
|
-
end
|
157
|
-
|
158
|
-
timeout = options[:timeout] || 3600
|
159
|
-
|
160
|
-
while !@interrupted do
|
161
|
-
begin
|
162
|
-
result = stdout.readline.strip
|
163
|
-
yield result unless result.empty?
|
164
|
-
rescue EOFError
|
165
|
-
break
|
166
|
-
end
|
167
|
-
end
|
168
|
-
|
169
|
-
if !wait_thread.value.success?
|
170
|
-
raise ScriptError
|
171
|
-
end
|
172
|
-
end
|
173
|
-
end
|
174
|
-
|
175
168
|
def scraper_file
|
176
169
|
candidates = Dir.glob(File.join(@bot_directory, 'scraper.{rb,py}'))
|
177
170
|
case candidates.size
|
@@ -196,4 +189,33 @@ module TurbotRunner
|
|
196
189
|
end
|
197
190
|
end
|
198
191
|
end
|
192
|
+
|
193
|
+
class CommandRunner
|
194
|
+
def initialize(command, opts={})
|
195
|
+
@command = command
|
196
|
+
@timeout = opts[:timeout] ||= 3600
|
197
|
+
@stdin, @stdout, @wait_thread = Open3.popen2(command)
|
198
|
+
end
|
199
|
+
|
200
|
+
def get_next_line
|
201
|
+
begin
|
202
|
+
Timeout::timeout(@timeout) { @stdout.gets }
|
203
|
+
rescue Timeout::Error
|
204
|
+
STDOUT.puts("#{@command} produced no output for #{@timeout} seconds")
|
205
|
+
raise ScriptError
|
206
|
+
rescue EOFError
|
207
|
+
raise ScriptError unless @wait_thread.value.success?
|
208
|
+
return nil
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
def send_line(line)
|
213
|
+
@stdin.puts(line)
|
214
|
+
end
|
215
|
+
|
216
|
+
def close
|
217
|
+
@stdin.close
|
218
|
+
@stdout.close
|
219
|
+
end
|
220
|
+
end
|
199
221
|
end
|
data/spec/turbot_runner_spec.rb
CHANGED
@@ -3,9 +3,6 @@ require 'turbot_runner'
|
|
3
3
|
|
4
4
|
describe TurbotRunner::BaseRunner do
|
5
5
|
it 'can run a bot' do
|
6
|
-
# This test runs slowly - there seems to be some delay in subprocesses
|
7
|
-
# reading from their stdins, but this is not observed when the code is run
|
8
|
-
# outside of rspec.
|
9
6
|
|
10
7
|
class SpecRunner < TurbotRunner::BaseRunner
|
11
8
|
def validate(record, data_type)
|
metadata
CHANGED
@@ -1,20 +1,18 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: turbot-runner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.0.9
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- OpenCorporates
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2014-07-
|
11
|
+
date: 2014-07-09 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: json-schema
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
17
|
- - '='
|
20
18
|
- !ruby/object:Gem::Version
|
@@ -22,7 +20,6 @@ dependencies:
|
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
24
|
- - '='
|
28
25
|
- !ruby/object:Gem::Version
|
@@ -37,10 +34,6 @@ files:
|
|
37
34
|
- lib/prerun.rb
|
38
35
|
- lib/turbot_runner.rb
|
39
36
|
- lib/turbot_runner/version.rb
|
40
|
-
- spec/dummy-bot/manifest.json
|
41
|
-
- spec/dummy-bot/scraper.rb
|
42
|
-
- spec/dummy-bot/transformer.rb
|
43
|
-
- spec/turbot_runner_spec.rb
|
44
37
|
- schema/schemas/company-schema.json
|
45
38
|
- schema/schemas/includes/address.json
|
46
39
|
- schema/schemas/includes/base-statement.json
|
@@ -58,29 +51,33 @@ files:
|
|
58
51
|
- schema/schemas/share-parcel-schema.json
|
59
52
|
- schema/schemas/simple-licence-schema.json
|
60
53
|
- schema/schemas/subsidiary-relationship-schema.json
|
54
|
+
- spec/dummy-bot/manifest.json
|
55
|
+
- spec/dummy-bot/scraper.rb
|
56
|
+
- spec/dummy-bot/transformer.rb
|
57
|
+
- spec/turbot_runner_spec.rb
|
61
58
|
homepage: http://turbot.opencorporates.com/
|
62
59
|
licenses:
|
63
60
|
- MIT
|
61
|
+
metadata: {}
|
64
62
|
post_install_message:
|
65
63
|
rdoc_options: []
|
66
64
|
require_paths:
|
67
65
|
- lib
|
68
66
|
required_ruby_version: !ruby/object:Gem::Requirement
|
69
|
-
none: false
|
70
67
|
requirements:
|
71
68
|
- - ! '>='
|
72
69
|
- !ruby/object:Gem::Version
|
73
70
|
version: 1.9.2
|
74
71
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
75
|
-
none: false
|
76
72
|
requirements:
|
77
73
|
- - ! '>='
|
78
74
|
- !ruby/object:Gem::Version
|
79
75
|
version: '0'
|
80
76
|
requirements: []
|
81
77
|
rubyforge_project:
|
82
|
-
rubygems_version:
|
78
|
+
rubygems_version: 2.2.2
|
83
79
|
signing_key:
|
84
|
-
specification_version:
|
80
|
+
specification_version: 4
|
85
81
|
summary: Utilities for running bots with Turbot
|
86
82
|
test_files: []
|
83
|
+
has_rdoc:
|