turbot-runner 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/bin/rspec +16 -0
- data/lib/turbot_runner.rb +168 -0
- data/lib/turbot_runner/version.rb +3 -0
- data/spec/dummy-bot/manifest.json +15 -0
- data/spec/dummy-bot/scraper.rb +6 -0
- data/spec/dummy-bot/transformer.rb +12 -0
- data/spec/turbot_runner_spec.rb +32 -0
- metadata +65 -0
checksums.yaml
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
---
|
2
|
+
!binary "U0hBMQ==":
|
3
|
+
metadata.gz: !binary |-
|
4
|
+
MGQwYjkyYzJlYWE2MmM2YzRiMjljM2NlM2MxNGQyYmZkMDMzOWZjZQ==
|
5
|
+
data.tar.gz: !binary |-
|
6
|
+
ODRjOWJkZmIzYjNiNTliZmRjYjA2ZDNhZGQ0MmFlZGY4ZmEwZTc3ZA==
|
7
|
+
SHA512:
|
8
|
+
metadata.gz: !binary |-
|
9
|
+
ZTYwYTdmOWY0OTdlYzEyZTNjZDMxOTc1ZGI2YmM2Nzg0ZDIwZTYwNzc1ZThi
|
10
|
+
NjJlNTVlNWUyMjFlM2RhOWQyMTU4MWM1MDJlYTRhZTAyNjY0MzJhMTBjOGZj
|
11
|
+
N2FkNzMwOTc3YmIxM2NmMjJhNWY5MWE1YTE1ZjIwMjcxZWMwODA=
|
12
|
+
data.tar.gz: !binary |-
|
13
|
+
MGNiZDlhYzNiYTM4ZjhmYjE3OWJmMzJhNzI5MThkMmM4ODBkMDQ4YmNmMmJi
|
14
|
+
MzE1ZDIzNmFkYmQ1OGRiNDA3MjY1OWZmZjdjN2FjNDMwNjYzNzg2NDhlYTc0
|
15
|
+
NmNkMmQwMTcwYWQ5YzE1YTY2NDJmZjFjYWY5Mjc4Zjk2NTZmZTQ=
|
data/bin/rspec
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# This file was generated by Bundler.
|
4
|
+
#
|
5
|
+
# The application 'rspec' is installed as part of a gem, and
|
6
|
+
# this file is here to facilitate running it.
|
7
|
+
#
|
8
|
+
|
9
|
+
require 'pathname'
|
10
|
+
ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
|
11
|
+
Pathname.new(__FILE__).realpath)
|
12
|
+
|
13
|
+
require 'rubygems'
|
14
|
+
require 'bundler/setup'
|
15
|
+
|
16
|
+
load Gem.bin_path('rspec-core', 'rspec')
|
@@ -0,0 +1,168 @@
|
|
1
|
+
require 'open3'
|
2
|
+
|
3
|
+
module TurbotRunner
|
4
|
+
class ScriptError < StandardError; end
|
5
|
+
|
6
|
+
class BaseRunner
|
7
|
+
def initialize(bot_directory)
|
8
|
+
@bot_directory = bot_directory
|
9
|
+
|
10
|
+
manifest_path = File.join(bot_directory, 'manifest.json')
|
11
|
+
raise "Could not find #{manifest_path}" unless File.exist?(manifest_path)
|
12
|
+
|
13
|
+
begin
|
14
|
+
@config = JSON.parse(open(manifest_path) {|f| f.read})
|
15
|
+
rescue JSON::ParserError
|
16
|
+
# TODO provide better error message
|
17
|
+
raise "Could not parse #{manifest_path} as JSON"
|
18
|
+
end
|
19
|
+
|
20
|
+
@interrupted = false
|
21
|
+
@schemas = {}
|
22
|
+
end
|
23
|
+
|
24
|
+
def run(opts={})
|
25
|
+
validation_required = opts[:validate] || true
|
26
|
+
|
27
|
+
command = "#{interpreter_for(scraper_file)} #{scraper_file}"
|
28
|
+
data_type = @config['data_type']
|
29
|
+
|
30
|
+
begin
|
31
|
+
run_script_each_line(command) do |line|
|
32
|
+
record = JSON.parse(line)
|
33
|
+
errors = validate(record, data_type)
|
34
|
+
|
35
|
+
if errors.empty?
|
36
|
+
handle_valid_record(record, data_type)
|
37
|
+
|
38
|
+
transformers.each do |transformer|
|
39
|
+
file = File.join(@bot_directory, transformer['file'])
|
40
|
+
command1 = "#{interpreter_for(file)} #{file}"
|
41
|
+
data_type1 = transformer['data_type']
|
42
|
+
|
43
|
+
run_script_each_line(command1, :input => line) do |line1|
|
44
|
+
record1 = JSON.parse(line1)
|
45
|
+
|
46
|
+
errors = validate(record1, data_type1)
|
47
|
+
|
48
|
+
if errors.empty?
|
49
|
+
handle_valid_record(record1, data_type1)
|
50
|
+
else
|
51
|
+
handle_invalid_record(record1, data_type1, errors)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
else
|
56
|
+
handle_invalid_record(record, data_type, errors)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
if @interrupted
|
61
|
+
handle_interrupted_run
|
62
|
+
else
|
63
|
+
handle_successful_run
|
64
|
+
end
|
65
|
+
rescue ScriptError => e
|
66
|
+
handle_failed_run(e.message)
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def interrupt
|
71
|
+
@interrupted = true
|
72
|
+
end
|
73
|
+
|
74
|
+
private
|
75
|
+
def transformers
|
76
|
+
@config['transformers'] || []
|
77
|
+
end
|
78
|
+
|
79
|
+
def validate(record, data_type)
|
80
|
+
schema = get_schema(data_type)
|
81
|
+
errors = JSON::Validator.fully_validate(schema, record, :errors_as_objects => true)
|
82
|
+
errors.map do |error|
|
83
|
+
case error[:message]
|
84
|
+
when /The property '#\/' did not contain a required property of '(\w+)'/
|
85
|
+
"Missing required attribute: #{Regexp.last_match(1)}"
|
86
|
+
else
|
87
|
+
error[:message]
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
def get_schema(data_type)
|
93
|
+
if !@schemas.has_key?(data_type)
|
94
|
+
hyphenated_name = data_type.to_s.gsub("_", "-").gsub(" ", "-")
|
95
|
+
@schemas[data_type] = File.expand_path("../../schema/schemas/#{hyphenated_name}-schema.json", __FILE__)
|
96
|
+
end
|
97
|
+
|
98
|
+
@schemas[data_type]
|
99
|
+
end
|
100
|
+
|
101
|
+
def handle_valid_record(record, data_type)
|
102
|
+
raise NotImplementedError
|
103
|
+
end
|
104
|
+
|
105
|
+
def handle_invalid_record(record, data_type, errors)
|
106
|
+
raise NotImplementedError
|
107
|
+
end
|
108
|
+
|
109
|
+
def handle_successful_run
|
110
|
+
end
|
111
|
+
|
112
|
+
def handle_interrupted_run
|
113
|
+
end
|
114
|
+
|
115
|
+
def handle_failed_run(output)
|
116
|
+
raise NotImplementedError
|
117
|
+
end
|
118
|
+
|
119
|
+
def run_script_each_line(command, options={})
|
120
|
+
# TODO: handle timeouts, errors
|
121
|
+
Open3::popen3(command) do |stdin, stdout, stderr, wait_thread|
|
122
|
+
if options[:input]
|
123
|
+
stdin.puts(options[:input])
|
124
|
+
stdin.close
|
125
|
+
end
|
126
|
+
|
127
|
+
timeout = options[:timeout] || 3600
|
128
|
+
|
129
|
+
while !@interrupted do
|
130
|
+
begin
|
131
|
+
result = stdout.readline.strip
|
132
|
+
yield result unless result.empty?
|
133
|
+
rescue EOFError
|
134
|
+
break
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
if !wait_thread.value.success?
|
139
|
+
output = stderr.read
|
140
|
+
raise ScriptError.new(output)
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
def scraper_file
|
146
|
+
candidates = Dir.glob(File.join(@bot_directory, 'scraper.{rb,py}'))
|
147
|
+
case candidates.size
|
148
|
+
when 0
|
149
|
+
raise 'Could not find scraper to run'
|
150
|
+
when 1
|
151
|
+
candidates.first
|
152
|
+
else
|
153
|
+
raise "Found multiple scrapers: #{candidates.join(', ')}"
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
def interpreter_for(file)
|
158
|
+
case file
|
159
|
+
when /\.rb$/
|
160
|
+
'ruby'
|
161
|
+
when /\.py$/
|
162
|
+
'python'
|
163
|
+
else
|
164
|
+
raise "Could not run #{file}"
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
{
|
2
|
+
"bot_id": "dummy-bot",
|
3
|
+
"data_type": "hello",
|
4
|
+
"description": "This is a dummy bot",
|
5
|
+
"identifying_fields": ["number"],
|
6
|
+
"files": ["scraper.rb"],
|
7
|
+
"transformers": [
|
8
|
+
{
|
9
|
+
"file": "transformer.rb",
|
10
|
+
"data_type": "goodbye",
|
11
|
+
"identifying_fields": [""]
|
12
|
+
}
|
13
|
+
],
|
14
|
+
"frequency": "monthly"
|
15
|
+
}
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'turbot_runner'
|
3
|
+
|
4
|
+
describe TurbotRunner::BaseRunner do
|
5
|
+
it 'can run a bot' do
|
6
|
+
# This test runs slowly - there seems to be some delay in subprocesses
|
7
|
+
# reading from their stdins, but this is not observed when the code is run
|
8
|
+
# outside of rspec.
|
9
|
+
|
10
|
+
class SpecRunner < TurbotRunner::BaseRunner
|
11
|
+
def validate(record, data_type)
|
12
|
+
if record['n'] == 3
|
13
|
+
[:error]
|
14
|
+
else
|
15
|
+
[]
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
runner = SpecRunner.new('spec/dummy-bot')
|
21
|
+
|
22
|
+
expect(runner).to receive(:handle_valid_record).with({'n' => 1, 'hello' => 'hello, 1'}, 'hello')
|
23
|
+
expect(runner).to receive(:handle_valid_record).with({'n' => 1, 'goodbye' => 'goodbye, 1'}, 'goodbye')
|
24
|
+
expect(runner).to receive(:handle_valid_record).with({'n' => 2, 'hello' => 'hello, 2'}, 'hello')
|
25
|
+
expect(runner).to receive(:handle_valid_record).with({'n' => 2, 'goodbye' => 'goodbye, 2'}, 'goodbye')
|
26
|
+
expect(runner).to receive(:handle_invalid_record).with({'n' => 3}, 'hello', [:error])
|
27
|
+
expect(runner).to receive(:handle_valid_record).with({'n' => 4, 'hello' => 'hello, 4'}, 'hello')
|
28
|
+
expect(runner).to receive(:handle_valid_record).with({'n' => 4, 'goodbye' => 'goodbye, 4'}, 'goodbye')
|
29
|
+
expect(runner).to receive(:handle_successful_run)
|
30
|
+
runner.run
|
31
|
+
end
|
32
|
+
end
|
metadata
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: turbot-runner
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- OpenCorporates
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-06-26 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: json-schema
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 2.2.2
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 2.2.2
|
27
|
+
description:
|
28
|
+
email: bots@opencorporates.com
|
29
|
+
executables: []
|
30
|
+
extensions: []
|
31
|
+
extra_rdoc_files: []
|
32
|
+
files:
|
33
|
+
- bin/rspec
|
34
|
+
- lib/turbot_runner.rb
|
35
|
+
- lib/turbot_runner/version.rb
|
36
|
+
- spec/dummy-bot/manifest.json
|
37
|
+
- spec/dummy-bot/scraper.rb
|
38
|
+
- spec/dummy-bot/transformer.rb
|
39
|
+
- spec/turbot_runner_spec.rb
|
40
|
+
homepage: http://turbot.opencorporates.com/
|
41
|
+
licenses:
|
42
|
+
- MIT
|
43
|
+
metadata: {}
|
44
|
+
post_install_message:
|
45
|
+
rdoc_options: []
|
46
|
+
require_paths:
|
47
|
+
- lib
|
48
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
49
|
+
requirements:
|
50
|
+
- - ! '>='
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: 1.9.2
|
53
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
54
|
+
requirements:
|
55
|
+
- - ! '>='
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: '0'
|
58
|
+
requirements: []
|
59
|
+
rubyforge_project:
|
60
|
+
rubygems_version: 2.2.2
|
61
|
+
signing_key:
|
62
|
+
specification_version: 4
|
63
|
+
summary: Utilities for running bots with Turbot
|
64
|
+
test_files: []
|
65
|
+
has_rdoc:
|