turbot-runner 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ MGQwYjkyYzJlYWE2MmM2YzRiMjljM2NlM2MxNGQyYmZkMDMzOWZjZQ==
5
+ data.tar.gz: !binary |-
6
+ ODRjOWJkZmIzYjNiNTliZmRjYjA2ZDNhZGQ0MmFlZGY4ZmEwZTc3ZA==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ ZTYwYTdmOWY0OTdlYzEyZTNjZDMxOTc1ZGI2YmM2Nzg0ZDIwZTYwNzc1ZThi
10
+ NjJlNTVlNWUyMjFlM2RhOWQyMTU4MWM1MDJlYTRhZTAyNjY0MzJhMTBjOGZj
11
+ N2FkNzMwOTc3YmIxM2NmMjJhNWY5MWE1YTE1ZjIwMjcxZWMwODA=
12
+ data.tar.gz: !binary |-
13
+ MGNiZDlhYzNiYTM4ZjhmYjE3OWJmMzJhNzI5MThkMmM4ODBkMDQ4YmNmMmJi
14
+ MzE1ZDIzNmFkYmQ1OGRiNDA3MjY1OWZmZjdjN2FjNDMwNjYzNzg2NDhlYTc0
15
+ NmNkMmQwMTcwYWQ5YzE1YTY2NDJmZjFjYWY5Mjc4Zjk2NTZmZTQ=
data/bin/rspec ADDED
@@ -0,0 +1,16 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # This file was generated by Bundler.
4
+ #
5
+ # The application 'rspec' is installed as part of a gem, and
6
+ # this file is here to facilitate running it.
7
+ #
8
+
9
+ require 'pathname'
10
+ ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
+ Pathname.new(__FILE__).realpath)
12
+
13
+ require 'rubygems'
14
+ require 'bundler/setup'
15
+
16
+ load Gem.bin_path('rspec-core', 'rspec')
@@ -0,0 +1,168 @@
1
+ require 'open3'
2
+
3
+ module TurbotRunner
4
+ class ScriptError < StandardError; end
5
+
6
+ class BaseRunner
7
+ def initialize(bot_directory)
8
+ @bot_directory = bot_directory
9
+
10
+ manifest_path = File.join(bot_directory, 'manifest.json')
11
+ raise "Could not find #{manifest_path}" unless File.exist?(manifest_path)
12
+
13
+ begin
14
+ @config = JSON.parse(open(manifest_path) {|f| f.read})
15
+ rescue JSON::ParserError
16
+ # TODO provide better error message
17
+ raise "Could not parse #{manifest_path} as JSON"
18
+ end
19
+
20
+ @interrupted = false
21
+ @schemas = {}
22
+ end
23
+
24
+ def run(opts={})
25
+ validation_required = opts[:validate] || true
26
+
27
+ command = "#{interpreter_for(scraper_file)} #{scraper_file}"
28
+ data_type = @config['data_type']
29
+
30
+ begin
31
+ run_script_each_line(command) do |line|
32
+ record = JSON.parse(line)
33
+ errors = validate(record, data_type)
34
+
35
+ if errors.empty?
36
+ handle_valid_record(record, data_type)
37
+
38
+ transformers.each do |transformer|
39
+ file = File.join(@bot_directory, transformer['file'])
40
+ command1 = "#{interpreter_for(file)} #{file}"
41
+ data_type1 = transformer['data_type']
42
+
43
+ run_script_each_line(command1, :input => line) do |line1|
44
+ record1 = JSON.parse(line1)
45
+
46
+ errors = validate(record1, data_type1)
47
+
48
+ if errors.empty?
49
+ handle_valid_record(record1, data_type1)
50
+ else
51
+ handle_invalid_record(record1, data_type1, errors)
52
+ end
53
+ end
54
+ end
55
+ else
56
+ handle_invalid_record(record, data_type, errors)
57
+ end
58
+ end
59
+
60
+ if @interrupted
61
+ handle_interrupted_run
62
+ else
63
+ handle_successful_run
64
+ end
65
+ rescue ScriptError => e
66
+ handle_failed_run(e.message)
67
+ end
68
+ end
69
+
70
+ def interrupt
71
+ @interrupted = true
72
+ end
73
+
74
+ private
75
+ def transformers
76
+ @config['transformers'] || []
77
+ end
78
+
79
+ def validate(record, data_type)
80
+ schema = get_schema(data_type)
81
+ errors = JSON::Validator.fully_validate(schema, record, :errors_as_objects => true)
82
+ errors.map do |error|
83
+ case error[:message]
84
+ when /The property '#\/' did not contain a required property of '(\w+)'/
85
+ "Missing required attribute: #{Regexp.last_match(1)}"
86
+ else
87
+ error[:message]
88
+ end
89
+ end
90
+ end
91
+
92
+ def get_schema(data_type)
93
+ if !@schemas.has_key?(data_type)
94
+ hyphenated_name = data_type.to_s.gsub("_", "-").gsub(" ", "-")
95
+ @schemas[data_type] = File.expand_path("../../schema/schemas/#{hyphenated_name}-schema.json", __FILE__)
96
+ end
97
+
98
+ @schemas[data_type]
99
+ end
100
+
101
+ def handle_valid_record(record, data_type)
102
+ raise NotImplementedError
103
+ end
104
+
105
+ def handle_invalid_record(record, data_type, errors)
106
+ raise NotImplementedError
107
+ end
108
+
109
+ def handle_successful_run
110
+ end
111
+
112
+ def handle_interrupted_run
113
+ end
114
+
115
+ def handle_failed_run(output)
116
+ raise NotImplementedError
117
+ end
118
+
119
+ def run_script_each_line(command, options={})
120
+ # TODO: handle timeouts, errors
121
+ Open3::popen3(command) do |stdin, stdout, stderr, wait_thread|
122
+ if options[:input]
123
+ stdin.puts(options[:input])
124
+ stdin.close
125
+ end
126
+
127
+ timeout = options[:timeout] || 3600
128
+
129
+ while !@interrupted do
130
+ begin
131
+ result = stdout.readline.strip
132
+ yield result unless result.empty?
133
+ rescue EOFError
134
+ break
135
+ end
136
+ end
137
+
138
+ if !wait_thread.value.success?
139
+ output = stderr.read
140
+ raise ScriptError.new(output)
141
+ end
142
+ end
143
+ end
144
+
145
+ def scraper_file
146
+ candidates = Dir.glob(File.join(@bot_directory, 'scraper.{rb,py}'))
147
+ case candidates.size
148
+ when 0
149
+ raise 'Could not find scraper to run'
150
+ when 1
151
+ candidates.first
152
+ else
153
+ raise "Found multiple scrapers: #{candidates.join(', ')}"
154
+ end
155
+ end
156
+
157
+ def interpreter_for(file)
158
+ case file
159
+ when /\.rb$/
160
+ 'ruby'
161
+ when /\.py$/
162
+ 'python'
163
+ else
164
+ raise "Could not run #{file}"
165
+ end
166
+ end
167
+ end
168
+ end
@@ -0,0 +1,3 @@
1
+ module TurbotRunner
2
+ VERSION = '0.0.1'
3
+ end
@@ -0,0 +1,15 @@
1
+ {
2
+ "bot_id": "dummy-bot",
3
+ "data_type": "hello",
4
+ "description": "This is a dummy bot",
5
+ "identifying_fields": ["number"],
6
+ "files": ["scraper.rb"],
7
+ "transformers": [
8
+ {
9
+ "file": "transformer.rb",
10
+ "data_type": "goodbye",
11
+ "identifying_fields": [""]
12
+ }
13
+ ],
14
+ "frequency": "monthly"
15
+ }
@@ -0,0 +1,6 @@
1
+ require 'json'
2
+
3
+ puts({:n => 1, :hello => 'hello, 1'}.to_json)
4
+ puts({:n => 2, :hello => 'hello, 2'}.to_json)
5
+ puts({:n => 3}.to_json)
6
+ puts({:n => 4, :hello => 'hello, 4'}.to_json)
@@ -0,0 +1,12 @@
1
+ require 'json'
2
+
3
+ STDIN.each_line do |line|
4
+ raw_record = JSON.parse(line)
5
+
6
+ transformed_record = {
7
+ :n => raw_record['n'],
8
+ :goodbye => raw_record['hello'].sub('hello', 'goodbye')
9
+ }
10
+
11
+ puts transformed_record.to_json
12
+ end
@@ -0,0 +1,32 @@
1
+ require 'json'
2
+ require 'turbot_runner'
3
+
4
+ describe TurbotRunner::BaseRunner do
5
+ it 'can run a bot' do
6
+ # This test runs slowly - there seems to be some delay in subprocesses
7
+ # reading from their stdins, but this is not observed when the code is run
8
+ # outside of rspec.
9
+
10
+ class SpecRunner < TurbotRunner::BaseRunner
11
+ def validate(record, data_type)
12
+ if record['n'] == 3
13
+ [:error]
14
+ else
15
+ []
16
+ end
17
+ end
18
+ end
19
+
20
+ runner = SpecRunner.new('spec/dummy-bot')
21
+
22
+ expect(runner).to receive(:handle_valid_record).with({'n' => 1, 'hello' => 'hello, 1'}, 'hello')
23
+ expect(runner).to receive(:handle_valid_record).with({'n' => 1, 'goodbye' => 'goodbye, 1'}, 'goodbye')
24
+ expect(runner).to receive(:handle_valid_record).with({'n' => 2, 'hello' => 'hello, 2'}, 'hello')
25
+ expect(runner).to receive(:handle_valid_record).with({'n' => 2, 'goodbye' => 'goodbye, 2'}, 'goodbye')
26
+ expect(runner).to receive(:handle_invalid_record).with({'n' => 3}, 'hello', [:error])
27
+ expect(runner).to receive(:handle_valid_record).with({'n' => 4, 'hello' => 'hello, 4'}, 'hello')
28
+ expect(runner).to receive(:handle_valid_record).with({'n' => 4, 'goodbye' => 'goodbye, 4'}, 'goodbye')
29
+ expect(runner).to receive(:handle_successful_run)
30
+ runner.run
31
+ end
32
+ end
metadata ADDED
@@ -0,0 +1,65 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: turbot-runner
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - OpenCorporates
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-06-26 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: json-schema
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '='
18
+ - !ruby/object:Gem::Version
19
+ version: 2.2.2
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '='
25
+ - !ruby/object:Gem::Version
26
+ version: 2.2.2
27
+ description:
28
+ email: bots@opencorporates.com
29
+ executables: []
30
+ extensions: []
31
+ extra_rdoc_files: []
32
+ files:
33
+ - bin/rspec
34
+ - lib/turbot_runner.rb
35
+ - lib/turbot_runner/version.rb
36
+ - spec/dummy-bot/manifest.json
37
+ - spec/dummy-bot/scraper.rb
38
+ - spec/dummy-bot/transformer.rb
39
+ - spec/turbot_runner_spec.rb
40
+ homepage: http://turbot.opencorporates.com/
41
+ licenses:
42
+ - MIT
43
+ metadata: {}
44
+ post_install_message:
45
+ rdoc_options: []
46
+ require_paths:
47
+ - lib
48
+ required_ruby_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ! '>='
51
+ - !ruby/object:Gem::Version
52
+ version: 1.9.2
53
+ required_rubygems_version: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - ! '>='
56
+ - !ruby/object:Gem::Version
57
+ version: '0'
58
+ requirements: []
59
+ rubyforge_project:
60
+ rubygems_version: 2.2.2
61
+ signing_key:
62
+ specification_version: 4
63
+ summary: Utilities for running bots with Turbot
64
+ test_files: []
65
+ has_rdoc: