turbot-runner 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +15 -0
- data/bin/rspec +16 -0
- data/lib/turbot_runner.rb +168 -0
- data/lib/turbot_runner/version.rb +3 -0
- data/spec/dummy-bot/manifest.json +15 -0
- data/spec/dummy-bot/scraper.rb +6 -0
- data/spec/dummy-bot/transformer.rb +12 -0
- data/spec/turbot_runner_spec.rb +32 -0
- metadata +65 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
MGQwYjkyYzJlYWE2MmM2YzRiMjljM2NlM2MxNGQyYmZkMDMzOWZjZQ==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
ODRjOWJkZmIzYjNiNTliZmRjYjA2ZDNhZGQ0MmFlZGY4ZmEwZTc3ZA==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
ZTYwYTdmOWY0OTdlYzEyZTNjZDMxOTc1ZGI2YmM2Nzg0ZDIwZTYwNzc1ZThi
|
10
|
+
NjJlNTVlNWUyMjFlM2RhOWQyMTU4MWM1MDJlYTRhZTAyNjY0MzJhMTBjOGZj
|
11
|
+
N2FkNzMwOTc3YmIxM2NmMjJhNWY5MWE1YTE1ZjIwMjcxZWMwODA=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
MGNiZDlhYzNiYTM4ZjhmYjE3OWJmMzJhNzI5MThkMmM4ODBkMDQ4YmNmMmJi
|
14
|
+
MzE1ZDIzNmFkYmQ1OGRiNDA3MjY1OWZmZjdjN2FjNDMwNjYzNzg2NDhlYTc0
|
15
|
+
NmNkMmQwMTcwYWQ5YzE1YTY2NDJmZjFjYWY5Mjc4Zjk2NTZmZTQ=
|
data/bin/rspec
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# This file was generated by Bundler.
|
4
|
+
#
|
5
|
+
# The application 'rspec' is installed as part of a gem, and
|
6
|
+
# this file is here to facilitate running it.
|
7
|
+
#
|
8
|
+
|
9
|
+
require 'pathname'
|
10
|
+
ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
|
11
|
+
Pathname.new(__FILE__).realpath)
|
12
|
+
|
13
|
+
require 'rubygems'
|
14
|
+
require 'bundler/setup'
|
15
|
+
|
16
|
+
load Gem.bin_path('rspec-core', 'rspec')
|
@@ -0,0 +1,168 @@
|
|
1
|
+
require 'open3'
|
2
|
+
|
3
|
+
module TurbotRunner
|
4
|
+
class ScriptError < StandardError; end
|
5
|
+
|
6
|
+
class BaseRunner
|
7
|
+
def initialize(bot_directory)
|
8
|
+
@bot_directory = bot_directory
|
9
|
+
|
10
|
+
manifest_path = File.join(bot_directory, 'manifest.json')
|
11
|
+
raise "Could not find #{manifest_path}" unless File.exist?(manifest_path)
|
12
|
+
|
13
|
+
begin
|
14
|
+
@config = JSON.parse(open(manifest_path) {|f| f.read})
|
15
|
+
rescue JSON::ParserError
|
16
|
+
# TODO provide better error message
|
17
|
+
raise "Could not parse #{manifest_path} as JSON"
|
18
|
+
end
|
19
|
+
|
20
|
+
@interrupted = false
|
21
|
+
@schemas = {}
|
22
|
+
end
|
23
|
+
|
24
|
+
def run(opts={})
|
25
|
+
validation_required = opts[:validate] || true
|
26
|
+
|
27
|
+
command = "#{interpreter_for(scraper_file)} #{scraper_file}"
|
28
|
+
data_type = @config['data_type']
|
29
|
+
|
30
|
+
begin
|
31
|
+
run_script_each_line(command) do |line|
|
32
|
+
record = JSON.parse(line)
|
33
|
+
errors = validate(record, data_type)
|
34
|
+
|
35
|
+
if errors.empty?
|
36
|
+
handle_valid_record(record, data_type)
|
37
|
+
|
38
|
+
transformers.each do |transformer|
|
39
|
+
file = File.join(@bot_directory, transformer['file'])
|
40
|
+
command1 = "#{interpreter_for(file)} #{file}"
|
41
|
+
data_type1 = transformer['data_type']
|
42
|
+
|
43
|
+
run_script_each_line(command1, :input => line) do |line1|
|
44
|
+
record1 = JSON.parse(line1)
|
45
|
+
|
46
|
+
errors = validate(record1, data_type1)
|
47
|
+
|
48
|
+
if errors.empty?
|
49
|
+
handle_valid_record(record1, data_type1)
|
50
|
+
else
|
51
|
+
handle_invalid_record(record1, data_type1, errors)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
else
|
56
|
+
handle_invalid_record(record, data_type, errors)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
if @interrupted
|
61
|
+
handle_interrupted_run
|
62
|
+
else
|
63
|
+
handle_successful_run
|
64
|
+
end
|
65
|
+
rescue ScriptError => e
|
66
|
+
handle_failed_run(e.message)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def interrupt
|
71
|
+
@interrupted = true
|
72
|
+
end
|
73
|
+
|
74
|
+
private
|
75
|
+
def transformers
|
76
|
+
@config['transformers'] || []
|
77
|
+
end
|
78
|
+
|
79
|
+
def validate(record, data_type)
|
80
|
+
schema = get_schema(data_type)
|
81
|
+
errors = JSON::Validator.fully_validate(schema, record, :errors_as_objects => true)
|
82
|
+
errors.map do |error|
|
83
|
+
case error[:message]
|
84
|
+
when /The property '#\/' did not contain a required property of '(\w+)'/
|
85
|
+
"Missing required attribute: #{Regexp.last_match(1)}"
|
86
|
+
else
|
87
|
+
error[:message]
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def get_schema(data_type)
|
93
|
+
if !@schemas.has_key?(data_type)
|
94
|
+
hyphenated_name = data_type.to_s.gsub("_", "-").gsub(" ", "-")
|
95
|
+
@schemas[data_type] = File.expand_path("../../schema/schemas/#{hyphenated_name}-schema.json", __FILE__)
|
96
|
+
end
|
97
|
+
|
98
|
+
@schemas[data_type]
|
99
|
+
end
|
100
|
+
|
101
|
+
def handle_valid_record(record, data_type)
|
102
|
+
raise NotImplementedError
|
103
|
+
end
|
104
|
+
|
105
|
+
def handle_invalid_record(record, data_type, errors)
|
106
|
+
raise NotImplementedError
|
107
|
+
end
|
108
|
+
|
109
|
+
def handle_successful_run
|
110
|
+
end
|
111
|
+
|
112
|
+
def handle_interrupted_run
|
113
|
+
end
|
114
|
+
|
115
|
+
def handle_failed_run(output)
|
116
|
+
raise NotImplementedError
|
117
|
+
end
|
118
|
+
|
119
|
+
def run_script_each_line(command, options={})
|
120
|
+
# TODO: handle timeouts, errors
|
121
|
+
Open3::popen3(command) do |stdin, stdout, stderr, wait_thread|
|
122
|
+
if options[:input]
|
123
|
+
stdin.puts(options[:input])
|
124
|
+
stdin.close
|
125
|
+
end
|
126
|
+
|
127
|
+
timeout = options[:timeout] || 3600
|
128
|
+
|
129
|
+
while !@interrupted do
|
130
|
+
begin
|
131
|
+
result = stdout.readline.strip
|
132
|
+
yield result unless result.empty?
|
133
|
+
rescue EOFError
|
134
|
+
break
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
if !wait_thread.value.success?
|
139
|
+
output = stderr.read
|
140
|
+
raise ScriptError.new(output)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
def scraper_file
|
146
|
+
candidates = Dir.glob(File.join(@bot_directory, 'scraper.{rb,py}'))
|
147
|
+
case candidates.size
|
148
|
+
when 0
|
149
|
+
raise 'Could not find scraper to run'
|
150
|
+
when 1
|
151
|
+
candidates.first
|
152
|
+
else
|
153
|
+
raise "Found multiple scrapers: #{candidates.join(', ')}"
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
def interpreter_for(file)
|
158
|
+
case file
|
159
|
+
when /\.rb$/
|
160
|
+
'ruby'
|
161
|
+
when /\.py$/
|
162
|
+
'python'
|
163
|
+
else
|
164
|
+
raise "Could not run #{file}"
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
{
|
2
|
+
"bot_id": "dummy-bot",
|
3
|
+
"data_type": "hello",
|
4
|
+
"description": "This is a dummy bot",
|
5
|
+
"identifying_fields": ["number"],
|
6
|
+
"files": ["scraper.rb"],
|
7
|
+
"transformers": [
|
8
|
+
{
|
9
|
+
"file": "transformer.rb",
|
10
|
+
"data_type": "goodbye",
|
11
|
+
"identifying_fields": [""]
|
12
|
+
}
|
13
|
+
],
|
14
|
+
"frequency": "monthly"
|
15
|
+
}
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'turbot_runner'
|
3
|
+
|
4
|
+
describe TurbotRunner::BaseRunner do
|
5
|
+
it 'can run a bot' do
|
6
|
+
# This test runs slowly - there seems to be some delay in subprocesses
|
7
|
+
# reading from their stdins, but this is not observed when the code is run
|
8
|
+
# outside of rspec.
|
9
|
+
|
10
|
+
class SpecRunner < TurbotRunner::BaseRunner
|
11
|
+
def validate(record, data_type)
|
12
|
+
if record['n'] == 3
|
13
|
+
[:error]
|
14
|
+
else
|
15
|
+
[]
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
runner = SpecRunner.new('spec/dummy-bot')
|
21
|
+
|
22
|
+
expect(runner).to receive(:handle_valid_record).with({'n' => 1, 'hello' => 'hello, 1'}, 'hello')
|
23
|
+
expect(runner).to receive(:handle_valid_record).with({'n' => 1, 'goodbye' => 'goodbye, 1'}, 'goodbye')
|
24
|
+
expect(runner).to receive(:handle_valid_record).with({'n' => 2, 'hello' => 'hello, 2'}, 'hello')
|
25
|
+
expect(runner).to receive(:handle_valid_record).with({'n' => 2, 'goodbye' => 'goodbye, 2'}, 'goodbye')
|
26
|
+
expect(runner).to receive(:handle_invalid_record).with({'n' => 3}, 'hello', [:error])
|
27
|
+
expect(runner).to receive(:handle_valid_record).with({'n' => 4, 'hello' => 'hello, 4'}, 'hello')
|
28
|
+
expect(runner).to receive(:handle_valid_record).with({'n' => 4, 'goodbye' => 'goodbye, 4'}, 'goodbye')
|
29
|
+
expect(runner).to receive(:handle_successful_run)
|
30
|
+
runner.run
|
31
|
+
end
|
32
|
+
end
|
metadata
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: turbot-runner
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- OpenCorporates
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-06-26 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: json-schema
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 2.2.2
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 2.2.2
|
27
|
+
description:
|
28
|
+
email: bots@opencorporates.com
|
29
|
+
executables: []
|
30
|
+
extensions: []
|
31
|
+
extra_rdoc_files: []
|
32
|
+
files:
|
33
|
+
- bin/rspec
|
34
|
+
- lib/turbot_runner.rb
|
35
|
+
- lib/turbot_runner/version.rb
|
36
|
+
- spec/dummy-bot/manifest.json
|
37
|
+
- spec/dummy-bot/scraper.rb
|
38
|
+
- spec/dummy-bot/transformer.rb
|
39
|
+
- spec/turbot_runner_spec.rb
|
40
|
+
homepage: http://turbot.opencorporates.com/
|
41
|
+
licenses:
|
42
|
+
- MIT
|
43
|
+
metadata: {}
|
44
|
+
post_install_message:
|
45
|
+
rdoc_options: []
|
46
|
+
require_paths:
|
47
|
+
- lib
|
48
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
49
|
+
requirements:
|
50
|
+
- - ! '>='
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: 1.9.2
|
53
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
54
|
+
requirements:
|
55
|
+
- - ! '>='
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: '0'
|
58
|
+
requirements: []
|
59
|
+
rubyforge_project:
|
60
|
+
rubygems_version: 2.2.2
|
61
|
+
signing_key:
|
62
|
+
specification_version: 4
|
63
|
+
summary: Utilities for running bots with Turbot
|
64
|
+
test_files: []
|
65
|
+
has_rdoc:
|