turbot-runner 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ MGQwYjkyYzJlYWE2MmM2YzRiMjljM2NlM2MxNGQyYmZkMDMzOWZjZQ==
5
+ data.tar.gz: !binary |-
6
+ ODRjOWJkZmIzYjNiNTliZmRjYjA2ZDNhZGQ0MmFlZGY4ZmEwZTc3ZA==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ ZTYwYTdmOWY0OTdlYzEyZTNjZDMxOTc1ZGI2YmM2Nzg0ZDIwZTYwNzc1ZThi
10
+ NjJlNTVlNWUyMjFlM2RhOWQyMTU4MWM1MDJlYTRhZTAyNjY0MzJhMTBjOGZj
11
+ N2FkNzMwOTc3YmIxM2NmMjJhNWY5MWE1YTE1ZjIwMjcxZWMwODA=
12
+ data.tar.gz: !binary |-
13
+ MGNiZDlhYzNiYTM4ZjhmYjE3OWJmMzJhNzI5MThkMmM4ODBkMDQ4YmNmMmJi
14
+ MzE1ZDIzNmFkYmQ1OGRiNDA3MjY1OWZmZjdjN2FjNDMwNjYzNzg2NDhlYTc0
15
+ NmNkMmQwMTcwYWQ5YzE1YTY2NDJmZjFjYWY5Mjc4Zjk2NTZmZTQ=
data/bin/rspec ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'rspec' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require 'pathname'
10
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
+ Pathname.new(__FILE__).realpath)
12
+
13
+ require 'rubygems'
14
+ require 'bundler/setup'
15
+
16
+ load Gem.bin_path('rspec-core', 'rspec')
@@ -0,0 +1,168 @@
1
+ require 'open3'
2
+
3
+ module TurbotRunner
4
+ class ScriptError < StandardError; end
5
+
6
+ class BaseRunner
7
+ def initialize(bot_directory)
8
+ @bot_directory = bot_directory
9
+
10
+ manifest_path = File.join(bot_directory, 'manifest.json')
11
+ raise "Could not find #{manifest_path}" unless File.exist?(manifest_path)
12
+
13
+ begin
14
+ @config = JSON.parse(open(manifest_path) {|f| f.read})
15
+ rescue JSON::ParserError
16
+ # TODO provide better error message
17
+ raise "Could not parse #{manifest_path} as JSON"
18
+ end
19
+
20
+ @interrupted = false
21
+ @schemas = {}
22
+ end
23
+
24
+ def run(opts={})
25
+ validation_required = opts[:validate] || true
26
+
27
+ command = "#{interpreter_for(scraper_file)} #{scraper_file}"
28
+ data_type = @config['data_type']
29
+
30
+ begin
31
+ run_script_each_line(command) do |line|
32
+ record = JSON.parse(line)
33
+ errors = validate(record, data_type)
34
+
35
+ if errors.empty?
36
+ handle_valid_record(record, data_type)
37
+
38
+ transformers.each do |transformer|
39
+ file = File.join(@bot_directory, transformer['file'])
40
+ command1 = "#{interpreter_for(file)} #{file}"
41
+ data_type1 = transformer['data_type']
42
+
43
+ run_script_each_line(command1, :input => line) do |line1|
44
+ record1 = JSON.parse(line1)
45
+
46
+ errors = validate(record1, data_type1)
47
+
48
+ if errors.empty?
49
+ handle_valid_record(record1, data_type1)
50
+ else
51
+ handle_invalid_record(record1, data_type1, errors)
52
+ end
53
+ end
54
+ end
55
+ else
56
+ handle_invalid_record(record, data_type, errors)
57
+ end
58
+ end
59
+
60
+ if @interrupted
61
+ handle_interrupted_run
62
+ else
63
+ handle_successful_run
64
+ end
65
+ rescue ScriptError => e
66
+ handle_failed_run(e.message)
67
+ end
68
+ end
69
+
70
+ def interrupt
71
+ @interrupted = true
72
+ end
73
+
74
+ private
75
+ def transformers
76
+ @config['transformers'] || []
77
+ end
78
+
79
+ def validate(record, data_type)
80
+ schema = get_schema(data_type)
81
+ errors = JSON::Validator.fully_validate(schema, record, :errors_as_objects => true)
82
+ errors.map do |error|
83
+ case error[:message]
84
+ when /The property '#\/' did not contain a required property of '(\w+)'/
85
+ "Missing required attribute: #{Regexp.last_match(1)}"
86
+ else
87
+ error[:message]
88
+ end
89
+ end
90
+ end
91
+
92
+ def get_schema(data_type)
93
+ if !@schemas.has_key?(data_type)
94
+ hyphenated_name = data_type.to_s.gsub("_", "-").gsub(" ", "-")
95
+ @schemas[data_type] = File.expand_path("../../schema/schemas/#{hyphenated_name}-schema.json", __FILE__)
96
+ end
97
+
98
+ @schemas[data_type]
99
+ end
100
+
101
+ def handle_valid_record(record, data_type)
102
+ raise NotImplementedError
103
+ end
104
+
105
+ def handle_invalid_record(record, data_type, errors)
106
+ raise NotImplementedError
107
+ end
108
+
109
+ def handle_successful_run
110
+ end
111
+
112
+ def handle_interrupted_run
113
+ end
114
+
115
+ def handle_failed_run(output)
116
+ raise NotImplementedError
117
+ end
118
+
119
+ def run_script_each_line(command, options={})
120
+ # TODO: handle timeouts, errors
121
+ Open3::popen3(command) do |stdin, stdout, stderr, wait_thread|
122
+ if options[:input]
123
+ stdin.puts(options[:input])
124
+ stdin.close
125
+ end
126
+
127
+ timeout = options[:timeout] || 3600
128
+
129
+ while !@interrupted do
130
+ begin
131
+ result = stdout.readline.strip
132
+ yield result unless result.empty?
133
+ rescue EOFError
134
+ break
135
+ end
136
+ end
137
+
138
+ if !wait_thread.value.success?
139
+ output = stderr.read
140
+ raise ScriptError.new(output)
141
+ end
142
+ end
143
+ end
144
+
145
+ def scraper_file
146
+ candidates = Dir.glob(File.join(@bot_directory, 'scraper.{rb,py}'))
147
+ case candidates.size
148
+ when 0
149
+ raise 'Could not find scraper to run'
150
+ when 1
151
+ candidates.first
152
+ else
153
+ raise "Found multiple scrapers: #{candidates.join(', ')}"
154
+ end
155
+ end
156
+
157
+ def interpreter_for(file)
158
+ case file
159
+ when /\.rb$/
160
+ 'ruby'
161
+ when /\.py$/
162
+ 'python'
163
+ else
164
+ raise "Could not run #{file}"
165
+ end
166
+ end
167
+ end
168
+ end
@@ -0,0 +1,3 @@
1
+ module TurbotRunner
2
+ VERSION = '0.0.1'
3
+ end
@@ -0,0 +1,15 @@
1
+ {
2
+ "bot_id": "dummy-bot",
3
+ "data_type": "hello",
4
+ "description": "This is a dummy bot",
5
+ "identifying_fields": ["number"],
6
+ "files": ["scraper.rb"],
7
+ "transformers": [
8
+ {
9
+ "file": "transformer.rb",
10
+ "data_type": "goodbye",
11
+ "identifying_fields": [""]
12
+ }
13
+ ],
14
+ "frequency": "monthly"
15
+ }
@@ -0,0 +1,6 @@
1
+ require 'json'
2
+
3
+ puts({:n => 1, :hello => 'hello, 1'}.to_json)
4
+ puts({:n => 2, :hello => 'hello, 2'}.to_json)
5
+ puts({:n => 3}.to_json)
6
+ puts({:n => 4, :hello => 'hello, 4'}.to_json)
@@ -0,0 +1,12 @@
1
+ require 'json'
2
+
3
+ STDIN.each_line do |line|
4
+ raw_record = JSON.parse(line)
5
+
6
+ transformed_record = {
7
+ :n => raw_record['n'],
8
+ :goodbye => raw_record['hello'].sub('hello', 'goodbye')
9
+ }
10
+
11
+ puts transformed_record.to_json
12
+ end
@@ -0,0 +1,32 @@
1
+ require 'json'
2
+ require 'turbot_runner'
3
+
4
+ describe TurbotRunner::BaseRunner do
5
+ it 'can run a bot' do
6
+ # This test runs slowly - there seems to be some delay in subprocesses
7
+ # reading from their stdins, but this is not observed when the code is run
8
+ # outside of rspec.
9
+
10
+ class SpecRunner < TurbotRunner::BaseRunner
11
+ def validate(record, data_type)
12
+ if record['n'] == 3
13
+ [:error]
14
+ else
15
+ []
16
+ end
17
+ end
18
+ end
19
+
20
+ runner = SpecRunner.new('spec/dummy-bot')
21
+
22
+ expect(runner).to receive(:handle_valid_record).with({'n' => 1, 'hello' => 'hello, 1'}, 'hello')
23
+ expect(runner).to receive(:handle_valid_record).with({'n' => 1, 'goodbye' => 'goodbye, 1'}, 'goodbye')
24
+ expect(runner).to receive(:handle_valid_record).with({'n' => 2, 'hello' => 'hello, 2'}, 'hello')
25
+ expect(runner).to receive(:handle_valid_record).with({'n' => 2, 'goodbye' => 'goodbye, 2'}, 'goodbye')
26
+ expect(runner).to receive(:handle_invalid_record).with({'n' => 3}, 'hello', [:error])
27
+ expect(runner).to receive(:handle_valid_record).with({'n' => 4, 'hello' => 'hello, 4'}, 'hello')
28
+ expect(runner).to receive(:handle_valid_record).with({'n' => 4, 'goodbye' => 'goodbye, 4'}, 'goodbye')
29
+ expect(runner).to receive(:handle_successful_run)
30
+ runner.run
31
+ end
32
+ end
metadata ADDED
@@ -0,0 +1,65 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: turbot-runner
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - OpenCorporates
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-06-26 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: json-schema
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '='
18
+ - !ruby/object:Gem::Version
19
+ version: 2.2.2
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '='
25
+ - !ruby/object:Gem::Version
26
+ version: 2.2.2
27
+ description:
28
+ email: bots@opencorporates.com
29
+ executables: []
30
+ extensions: []
31
+ extra_rdoc_files: []
32
+ files:
33
+ - bin/rspec
34
+ - lib/turbot_runner.rb
35
+ - lib/turbot_runner/version.rb
36
+ - spec/dummy-bot/manifest.json
37
+ - spec/dummy-bot/scraper.rb
38
+ - spec/dummy-bot/transformer.rb
39
+ - spec/turbot_runner_spec.rb
40
+ homepage: http://turbot.opencorporates.com/
41
+ licenses:
42
+ - MIT
43
+ metadata: {}
44
+ post_install_message:
45
+ rdoc_options: []
46
+ require_paths:
47
+ - lib
48
+ required_ruby_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ! '>='
51
+ - !ruby/object:Gem::Version
52
+ version: 1.9.2
53
+ required_rubygems_version: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - ! '>='
56
+ - !ruby/object:Gem::Version
57
+ version: '0'
58
+ requirements: []
59
+ rubyforge_project:
60
+ rubygems_version: 2.2.2
61
+ signing_key:
62
+ specification_version: 4
63
+ summary: Utilities for running bots with Turbot
64
+ test_files: []
65
+ has_rdoc: