turbot-runner 0.0.8 → 0.0.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/lib/turbot_runner/version.rb +1 -1
- data/lib/turbot_runner.rb +66 -44
- data/spec/turbot_runner_spec.rb +0 -3
- metadata +10 -13
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
YzFlNTliNjE3MjU2NTExNzU2ZDA0OWJjZDcyZTE5ZmNlZWMyNWQ2ZQ==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
MDhlOWNhZjNkMWJlZjVkOTdkMWI4MzYxZDc0NDFiOTNlNDJlNmQwYg==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
YzIzNjRmZWVjZjY1ZThkMTY3NmRkNzE3NDg2MjExYjFhMjRhMDZkMjBhMGE1
|
10
|
+
ZDk5OGE3ZWFkNDJhNDhjOTY4ODBjYWRkMjEzNjA1OTkzMjliYTc3YWI1ZDZi
|
11
|
+
OTBmMzEzM2I5OWYxZWQyYzQ2YTQyYTAwNjk2NWQxOGJmOWU1ZTM=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
YjA3ZmYzOTA3ZTZhYzAwMGFhNjEzZmZjMzg2NGFhNjRhMTgwMTk3Yjc4ZmQw
|
14
|
+
MDkzNTZlNTEyZmZkZWYzMGUyMDJmODA2NDE0NjU3MWIyMWJjODI5Y2I1N2Yx
|
15
|
+
MjBkMDgzMDVjNzVhNzNiYjNkNDFjZGRhMzAzZjI5M2I1YzdmZDM=
|
data/lib/turbot_runner.rb
CHANGED
@@ -31,8 +31,21 @@ module TurbotRunner
|
|
31
31
|
|
32
32
|
command = "#{interpreter_for(scraper_file)} #{scraper_file}"
|
33
33
|
data_type = @config['data_type']
|
34
|
+
|
35
|
+
scraper_runner = CommandRunner.new(command)
|
36
|
+
|
37
|
+
transformers.each do |config|
|
38
|
+
file = File.join(@bot_directory, config['file'])
|
39
|
+
command = "#{interpreter_for(file)} #{file}"
|
40
|
+
transformer_runner = CommandRunner.new(command)
|
41
|
+
config['runner'] = transformer_runner
|
42
|
+
end
|
43
|
+
|
34
44
|
begin
|
35
|
-
|
45
|
+
until @interrupted do
|
46
|
+
line = scraper_runner.get_next_line
|
47
|
+
break if line.nil?
|
48
|
+
|
36
49
|
begin
|
37
50
|
record = JSON.parse(line)
|
38
51
|
rescue JSON::ParserError
|
@@ -41,29 +54,30 @@ module TurbotRunner
|
|
41
54
|
end
|
42
55
|
|
43
56
|
errors = validate(record, data_type)
|
57
|
+
|
44
58
|
if errors.empty?
|
45
59
|
handle_valid_record(record, data_type)
|
46
60
|
|
47
61
|
transformers.each do |transformer|
|
48
|
-
file = File.join(@bot_directory, transformer['file'])
|
49
|
-
command1 = "#{interpreter_for(file)} #{file}"
|
50
62
|
data_type1 = transformer['data_type']
|
51
63
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
64
|
+
runner = transformer['runner']
|
65
|
+
runner.send_line(line)
|
66
|
+
line1 = runner.get_next_line
|
67
|
+
|
68
|
+
begin
|
69
|
+
record1 = JSON.parse(line1)
|
70
|
+
rescue JSON::ParserError
|
71
|
+
handle_non_json_output(line1)
|
72
|
+
next
|
73
|
+
end
|
74
|
+
|
75
|
+
errors = validate(record1, data_type1)
|
76
|
+
|
77
|
+
if errors.empty?
|
78
|
+
handle_valid_record(record1, data_type1)
|
79
|
+
else
|
80
|
+
handle_invalid_record(record1, data_type1, errors)
|
67
81
|
end
|
68
82
|
end
|
69
83
|
else
|
@@ -87,6 +101,11 @@ module TurbotRunner
|
|
87
101
|
handle_failed_run
|
88
102
|
end
|
89
103
|
end
|
104
|
+
ensure
|
105
|
+
scraper_runner.close unless scraper_runner.nil?
|
106
|
+
transformers.each do |transformer|
|
107
|
+
transformer['runner'].close unless transformer['runner'].nil?
|
108
|
+
end
|
90
109
|
end
|
91
110
|
|
92
111
|
def successful?
|
@@ -146,32 +165,6 @@ module TurbotRunner
|
|
146
165
|
raise NotImplementedError
|
147
166
|
end
|
148
167
|
|
149
|
-
def run_script_each_line(command, options={})
|
150
|
-
# TODO: handle timeouts, errors
|
151
|
-
Open3::popen2(command) do |stdin, stdout, wait_thread|
|
152
|
-
@wait_thread = wait_thread
|
153
|
-
if options[:input]
|
154
|
-
stdin.puts(options[:input])
|
155
|
-
stdin.close
|
156
|
-
end
|
157
|
-
|
158
|
-
timeout = options[:timeout] || 3600
|
159
|
-
|
160
|
-
while !@interrupted do
|
161
|
-
begin
|
162
|
-
result = stdout.readline.strip
|
163
|
-
yield result unless result.empty?
|
164
|
-
rescue EOFError
|
165
|
-
break
|
166
|
-
end
|
167
|
-
end
|
168
|
-
|
169
|
-
if !wait_thread.value.success?
|
170
|
-
raise ScriptError
|
171
|
-
end
|
172
|
-
end
|
173
|
-
end
|
174
|
-
|
175
168
|
def scraper_file
|
176
169
|
candidates = Dir.glob(File.join(@bot_directory, 'scraper.{rb,py}'))
|
177
170
|
case candidates.size
|
@@ -196,4 +189,33 @@ module TurbotRunner
|
|
196
189
|
end
|
197
190
|
end
|
198
191
|
end
|
192
|
+
|
193
|
+
class CommandRunner
|
194
|
+
def initialize(command, opts={})
|
195
|
+
@command = command
|
196
|
+
@timeout = opts[:timeout] ||= 3600
|
197
|
+
@stdin, @stdout, @wait_thread = Open3.popen2(command)
|
198
|
+
end
|
199
|
+
|
200
|
+
def get_next_line
|
201
|
+
begin
|
202
|
+
Timeout::timeout(@timeout) { @stdout.gets }
|
203
|
+
rescue Timeout::Error
|
204
|
+
STDOUT.puts("#{@command} produced no output for #{@timeout} seconds")
|
205
|
+
raise ScriptError
|
206
|
+
rescue EOFError
|
207
|
+
raise ScriptError unless @wait_thread.value.success?
|
208
|
+
return nil
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
def send_line(line)
|
213
|
+
@stdin.puts(line)
|
214
|
+
end
|
215
|
+
|
216
|
+
def close
|
217
|
+
@stdin.close
|
218
|
+
@stdout.close
|
219
|
+
end
|
220
|
+
end
|
199
221
|
end
|
data/spec/turbot_runner_spec.rb
CHANGED
@@ -3,9 +3,6 @@ require 'turbot_runner'
|
|
3
3
|
|
4
4
|
describe TurbotRunner::BaseRunner do
|
5
5
|
it 'can run a bot' do
|
6
|
-
# This test runs slowly - there seems to be some delay in subprocesses
|
7
|
-
# reading from their stdins, but this is not observed when the code is run
|
8
|
-
# outside of rspec.
|
9
6
|
|
10
7
|
class SpecRunner < TurbotRunner::BaseRunner
|
11
8
|
def validate(record, data_type)
|
metadata
CHANGED
@@ -1,20 +1,18 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: turbot-runner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
5
|
-
prerelease:
|
4
|
+
version: 0.0.9
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- OpenCorporates
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2014-07-
|
11
|
+
date: 2014-07-09 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: json-schema
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
17
|
- - '='
|
20
18
|
- !ruby/object:Gem::Version
|
@@ -22,7 +20,6 @@ dependencies:
|
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
24
|
- - '='
|
28
25
|
- !ruby/object:Gem::Version
|
@@ -37,10 +34,6 @@ files:
|
|
37
34
|
- lib/prerun.rb
|
38
35
|
- lib/turbot_runner.rb
|
39
36
|
- lib/turbot_runner/version.rb
|
40
|
-
- spec/dummy-bot/manifest.json
|
41
|
-
- spec/dummy-bot/scraper.rb
|
42
|
-
- spec/dummy-bot/transformer.rb
|
43
|
-
- spec/turbot_runner_spec.rb
|
44
37
|
- schema/schemas/company-schema.json
|
45
38
|
- schema/schemas/includes/address.json
|
46
39
|
- schema/schemas/includes/base-statement.json
|
@@ -58,29 +51,33 @@ files:
|
|
58
51
|
- schema/schemas/share-parcel-schema.json
|
59
52
|
- schema/schemas/simple-licence-schema.json
|
60
53
|
- schema/schemas/subsidiary-relationship-schema.json
|
54
|
+
- spec/dummy-bot/manifest.json
|
55
|
+
- spec/dummy-bot/scraper.rb
|
56
|
+
- spec/dummy-bot/transformer.rb
|
57
|
+
- spec/turbot_runner_spec.rb
|
61
58
|
homepage: http://turbot.opencorporates.com/
|
62
59
|
licenses:
|
63
60
|
- MIT
|
61
|
+
metadata: {}
|
64
62
|
post_install_message:
|
65
63
|
rdoc_options: []
|
66
64
|
require_paths:
|
67
65
|
- lib
|
68
66
|
required_ruby_version: !ruby/object:Gem::Requirement
|
69
|
-
none: false
|
70
67
|
requirements:
|
71
68
|
- - ! '>='
|
72
69
|
- !ruby/object:Gem::Version
|
73
70
|
version: 1.9.2
|
74
71
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
75
|
-
none: false
|
76
72
|
requirements:
|
77
73
|
- - ! '>='
|
78
74
|
- !ruby/object:Gem::Version
|
79
75
|
version: '0'
|
80
76
|
requirements: []
|
81
77
|
rubyforge_project:
|
82
|
-
rubygems_version:
|
78
|
+
rubygems_version: 2.2.2
|
83
79
|
signing_key:
|
84
|
-
specification_version:
|
80
|
+
specification_version: 4
|
85
81
|
summary: Utilities for running bots with Turbot
|
86
82
|
test_files: []
|
83
|
+
has_rdoc:
|