turbot-runner 0.2.14 → 0.2.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +8 -5
- data/.gitmodules +1 -1
- data/.rspec +3 -0
- data/.travis.yml +11 -0
- data/README.md +6 -4
- data/lib/turbot_runner/processor.rb +1 -0
- data/lib/turbot_runner/version.rb +1 -1
- data/schema/schemas/accounts-statement-schema.json +1 -1
- data/schema/schemas/company-schema.json +1 -1
- data/schema/schemas/filing-schema.json +2 -2
- data/schema/schemas/gazette-notice-schema.json +9 -2
- data/schema/schemas/includes/company-for-nesting.json +13 -4
- data/schema/schemas/licence-schema.json +2 -2
- data/spec/lib/runner_spec.rb +10 -5
- data/spec/spec_helper.rb +12 -1
- data/turbot-runner.gemspec +3 -3
- metadata +66 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f9da65f51ab5d12fe470d847e617ee7f65f39217
|
4
|
+
data.tar.gz: 692ce7bfe967e01b6c7485f301cc8697d6bb2cb1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4fab8ca2464231881299ff8ad68f0549ac3aa077ca637965413846880f09f3d547c66efb13ef73704eb0dd72faffcf9e295b83f092e3f333406351fd6930b126
|
7
|
+
data.tar.gz: 937e02cc9213c18c484cc9262c2691802c437f7f47e92d0f58f1c08a37d4925b570033394aec46751a9e6e58673e9426f83547b47ebb175a0dca9d3839f31740
|
data/.gitignore
CHANGED
data/.gitmodules
CHANGED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/README.md
CHANGED
@@ -1,5 +1,11 @@
|
|
1
1
|
# turbot-runner
|
2
2
|
|
3
|
+
[](https://badge.fury.io/rb/turbot-runner)
|
4
|
+
[](https://travis-ci.org/openc/turbot-runner)
|
5
|
+
[](https://gemnasium.com/openc/turbot-runner)
|
6
|
+
[](https://coveralls.io/r/openc/turbot-runner)
|
7
|
+
[](https://codeclimate.com/github/openc/turbot-runner)
|
8
|
+
|
3
9
|
## Getting started
|
4
10
|
|
5
11
|
git submodule update --init
|
@@ -60,8 +66,4 @@ default, 24 hours) it interrupts itself, and marks the run as having failed.
|
|
60
66
|
|
61
67
|
## Running the tests
|
62
68
|
|
63
|
-
Tests are run with rspec:
|
64
|
-
|
65
|
-
`./bin/rspec`
|
66
|
-
|
67
69
|
The first two specs to run require some manual input.
|
@@ -18,6 +18,7 @@ module TurbotRunner
|
|
18
18
|
else
|
19
19
|
record = Openc::JsonSchema.convert_dates(schema_path, JSON.parse(line))
|
20
20
|
|
21
|
+
# TODO Document why we aren't passing retrieved_at to the validator.
|
21
22
|
record_to_validate = record.select {|k, v| k != 'retrieved_at'}
|
22
23
|
|
23
24
|
error_message = Validator.validate(
|
@@ -64,9 +64,9 @@
|
|
64
64
|
"format": "date"
|
65
65
|
},
|
66
66
|
"retrieved_at": {
|
67
|
-
"description": "
|
67
|
+
"description": "The time or date at which the source URL was requested",
|
68
68
|
"type": "string",
|
69
|
-
"format": "date
|
69
|
+
"format": "date"
|
70
70
|
},
|
71
71
|
"other_attributes": {
|
72
72
|
"description": "Use for other attributes for which we don't yet have curated schema attributes",
|
@@ -267,6 +267,13 @@
|
|
267
267
|
"type": "string",
|
268
268
|
"enum": ["other"]
|
269
269
|
},
|
270
|
+
"classification": {
|
271
|
+
"description": "The type of judgment",
|
272
|
+
"type": "array",
|
273
|
+
"items": {
|
274
|
+
"$ref": "includes/classification.json"
|
275
|
+
}
|
276
|
+
},
|
270
277
|
"body": {
|
271
278
|
"description": "The unstructured prose content",
|
272
279
|
"$ref": "#/definitions/body"
|
@@ -405,9 +412,9 @@
|
|
405
412
|
"format": "date"
|
406
413
|
},
|
407
414
|
"retrieved_at": {
|
408
|
-
"description": "The time at which the source URL was requested",
|
415
|
+
"description": "The time or date at which the source URL was requested",
|
409
416
|
"type": "string",
|
410
|
-
"format": "date
|
417
|
+
"format": "date"
|
411
418
|
},
|
412
419
|
"confidence": {
|
413
420
|
"description": "The scraper's author's confidence in the accuracy of the data",
|
@@ -33,7 +33,7 @@
|
|
33
33
|
},
|
34
34
|
"retrieved_at": {
|
35
35
|
"type": "string",
|
36
|
-
"format": "date
|
36
|
+
"format": "date"
|
37
37
|
},
|
38
38
|
"current_status": {
|
39
39
|
"type": [
|
@@ -239,7 +239,16 @@
|
|
239
239
|
}
|
240
240
|
},
|
241
241
|
"additionalProperties": false,
|
242
|
-
"
|
243
|
-
|
242
|
+
"anyOf": [
|
243
|
+
{
|
244
|
+
"required": [
|
245
|
+
"name"
|
246
|
+
]
|
247
|
+
},
|
248
|
+
{
|
249
|
+
"required": [
|
250
|
+
"company_number"
|
251
|
+
]
|
252
|
+
}
|
244
253
|
]
|
245
|
-
}
|
254
|
+
}
|
@@ -59,9 +59,9 @@
|
|
59
59
|
"format": "date"
|
60
60
|
},
|
61
61
|
"retrieved_at": {
|
62
|
-
"description": "
|
62
|
+
"description": "The time or date at which the source URL was requested",
|
63
63
|
"type": "string",
|
64
|
-
"format": "date
|
64
|
+
"format": "date"
|
65
65
|
},
|
66
66
|
"licence_url": {
|
67
67
|
"type": "string",
|
data/spec/lib/runner_spec.rb
CHANGED
@@ -168,7 +168,7 @@ describe TurbotRunner::Runner do
|
|
168
168
|
context 'with a handler that interrupts the runner' do
|
169
169
|
before do
|
170
170
|
class Handler < TurbotRunner::BaseHandler
|
171
|
-
def initialize
|
171
|
+
def initialize
|
172
172
|
@count = 0
|
173
173
|
super
|
174
174
|
end
|
@@ -266,11 +266,12 @@ describe TurbotRunner::Runner do
|
|
266
266
|
|
267
267
|
context 'when the scraped data is provided' do
|
268
268
|
before do
|
269
|
+
@runner = test_runner('bot-with-transformer', :scraper_provided => true)
|
270
|
+
@runner.set_up_output_directory
|
269
271
|
FileUtils.cp(
|
270
272
|
File.join('spec', 'outputs', 'full-scraper.out'),
|
271
273
|
File.join(File.dirname(__FILE__), '../bots', 'bot-with-transformer', 'output', 'scraper.out')
|
272
274
|
)
|
273
|
-
@runner = test_runner('bot-with-transformer', :scraper_provided => true)
|
274
275
|
end
|
275
276
|
|
276
277
|
it 'does not run scraper' do
|
@@ -296,8 +297,8 @@ describe TurbotRunner::Runner do
|
|
296
297
|
class Handler < TurbotRunner::BaseHandler
|
297
298
|
attr_reader :records_seen
|
298
299
|
|
299
|
-
def initialize
|
300
|
-
@records_seen = Hash.new
|
300
|
+
def initialize
|
301
|
+
@records_seen = Hash.new(0)
|
301
302
|
super
|
302
303
|
end
|
303
304
|
|
@@ -324,7 +325,7 @@ describe TurbotRunner::Runner do
|
|
324
325
|
it 'can cope when scraper has failed immediately' do
|
325
326
|
test_runner('bot-that-crashes-immediately').run
|
326
327
|
|
327
|
-
runner = test_runner('bot-
|
328
|
+
runner = test_runner('bot-that-crashes-immediately',
|
328
329
|
:record_handler => @handler
|
329
330
|
)
|
330
331
|
|
@@ -338,15 +339,19 @@ describe TurbotRunner::Runner do
|
|
338
339
|
end
|
339
340
|
|
340
341
|
it 'clears existing output' do
|
342
|
+
@runner.set_up_output_directory
|
341
343
|
path = File.join(@runner.base_directory, 'output', 'scraper.out')
|
342
344
|
FileUtils.touch(path)
|
345
|
+
|
343
346
|
@runner.set_up_output_directory
|
344
347
|
expect(File.exist?(path)).to be(false)
|
345
348
|
end
|
346
349
|
|
347
350
|
it 'does not clear existing files that are not output files' do
|
351
|
+
@runner.set_up_output_directory
|
348
352
|
path = File.join(@runner.base_directory, 'output', 'stdout')
|
349
353
|
FileUtils.touch(path)
|
354
|
+
|
350
355
|
@runner.set_up_output_directory
|
351
356
|
expect(File.exist?(path)).to be(true)
|
352
357
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,4 +1,15 @@
|
|
1
|
-
require '
|
1
|
+
require 'rubygems'
|
2
|
+
|
3
|
+
require 'simplecov'
|
4
|
+
require 'coveralls'
|
5
|
+
SimpleCov.formatter = Coveralls::SimpleCov::Formatter
|
6
|
+
SimpleCov.start do
|
7
|
+
add_filter 'spec'
|
8
|
+
end
|
9
|
+
|
10
|
+
require 'rspec'
|
11
|
+
|
12
|
+
require File.dirname(__FILE__) + '/../lib/turbot_runner'
|
2
13
|
|
3
14
|
RSpec::Matchers.define(:fail_validation_with) do |expected_error|
|
4
15
|
match do |record|
|
data/turbot-runner.gemspec
CHANGED
@@ -19,8 +19,8 @@ Gem::Specification.new do |gem|
|
|
19
19
|
|
20
20
|
gem.add_dependency "activesupport", '4.1.4'
|
21
21
|
gem.add_dependency "openc-json_schema"
|
22
|
-
|
22
|
+
|
23
|
+
gem.add_development_dependency "coveralls"
|
23
24
|
gem.add_development_dependency "rake"
|
24
|
-
gem.add_development_dependency "rspec", "
|
25
|
-
gem.add_development_dependency "rspec-mocks", ">= 3.0"
|
25
|
+
gem.add_development_dependency "rspec", "~> 3.4.0"
|
26
26
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: turbot-runner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.15
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- OpenCorporates
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-02-
|
11
|
+
date: 2016-02-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -39,7 +39,7 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: coveralls
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - ">="
|
@@ -70,30 +70,16 @@ dependencies:
|
|
70
70
|
name: rspec
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- - "
|
73
|
+
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version:
|
75
|
+
version: 3.4.0
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- - "
|
80
|
+
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version:
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: rspec-mocks
|
85
|
-
requirement: !ruby/object:Gem::Requirement
|
86
|
-
requirements:
|
87
|
-
- - ">="
|
88
|
-
- !ruby/object:Gem::Version
|
89
|
-
version: '3.0'
|
90
|
-
type: :development
|
91
|
-
prerelease: false
|
92
|
-
version_requirements: !ruby/object:Gem::Requirement
|
93
|
-
requirements:
|
94
|
-
- - ">="
|
95
|
-
- !ruby/object:Gem::Version
|
96
|
-
version: '3.0'
|
82
|
+
version: 3.4.0
|
97
83
|
description:
|
98
84
|
email: bots@opencorporates.com
|
99
85
|
executables: []
|
@@ -102,6 +88,8 @@ extra_rdoc_files: []
|
|
102
88
|
files:
|
103
89
|
- ".gitignore"
|
104
90
|
- ".gitmodules"
|
91
|
+
- ".rspec"
|
92
|
+
- ".travis.yml"
|
105
93
|
- Gemfile
|
106
94
|
- README.md
|
107
95
|
- Rakefile
|
@@ -232,4 +220,60 @@ rubygems_version: 2.4.5
|
|
232
220
|
signing_key:
|
233
221
|
specification_version: 4
|
234
222
|
summary: Utilities for running bots with Turbot
|
235
|
-
test_files:
|
223
|
+
test_files:
|
224
|
+
- spec/bots/bot-that-crashes-immediately/manifest.json
|
225
|
+
- spec/bots/bot-that-crashes-immediately/scraper.rb
|
226
|
+
- spec/bots/bot-that-crashes-immediately/transformer1.rb
|
227
|
+
- spec/bots/bot-that-crashes-in-scraper/manifest.json
|
228
|
+
- spec/bots/bot-that-crashes-in-scraper/scraper.rb
|
229
|
+
- spec/bots/bot-that-crashes-in-scraper/transformer1.rb
|
230
|
+
- spec/bots/bot-that-crashes-in-transformer/manifest.json
|
231
|
+
- spec/bots/bot-that-crashes-in-transformer/scraper.rb
|
232
|
+
- spec/bots/bot-that-crashes-in-transformer/transformer1.rb
|
233
|
+
- spec/bots/bot-that-crashes-in-transformer/transformer2.rb
|
234
|
+
- spec/bots/bot-that-emits-run-ended/manifest.json
|
235
|
+
- spec/bots/bot-that-emits-run-ended/scraper.rb
|
236
|
+
- spec/bots/bot-that-emits-snapshot-ended/manifest.json
|
237
|
+
- spec/bots/bot-that-emits-snapshot-ended/scraper.rb
|
238
|
+
- spec/bots/bot-that-expects-file/manifest.json
|
239
|
+
- spec/bots/bot-that-expects-file/scraper.rb
|
240
|
+
- spec/bots/bot-that-expects-file/something.txt
|
241
|
+
- spec/bots/bot-that-is-allowed-to-produce-duplicates/manifest.json
|
242
|
+
- spec/bots/bot-that-is-allowed-to-produce-duplicates/scraper.rb
|
243
|
+
- spec/bots/bot-that-produces-duplicates/manifest.json
|
244
|
+
- spec/bots/bot-that-produces-duplicates/scraper.rb
|
245
|
+
- spec/bots/bot-with-invalid-data-type/manifest.json
|
246
|
+
- spec/bots/bot-with-invalid-data-type/scraper.rb
|
247
|
+
- spec/bots/bot-with-invalid-sample-date/manifest.json
|
248
|
+
- spec/bots/bot-with-invalid-sample-date/scraper.rb
|
249
|
+
- spec/bots/bot-with-pause/manifest.json
|
250
|
+
- spec/bots/bot-with-pause/scraper.rb
|
251
|
+
- spec/bots/bot-with-transformer/manifest.json
|
252
|
+
- spec/bots/bot-with-transformer/scraper.rb
|
253
|
+
- spec/bots/bot-with-transformer/transformer.rb
|
254
|
+
- spec/bots/bot-with-transformers/manifest.json
|
255
|
+
- spec/bots/bot-with-transformers/scraper.rb
|
256
|
+
- spec/bots/bot-with-transformers/transformer1.rb
|
257
|
+
- spec/bots/bot-with-transformers/transformer2.rb
|
258
|
+
- spec/bots/invalid-json-bot/manifest.json
|
259
|
+
- spec/bots/invalid-json-bot/scraper.rb
|
260
|
+
- spec/bots/invalid-record-bot/manifest.json
|
261
|
+
- spec/bots/invalid-record-bot/scraper.rb
|
262
|
+
- spec/bots/logging-bot/manifest.json
|
263
|
+
- spec/bots/logging-bot/scraper.rb
|
264
|
+
- spec/bots/python-bot/manifest.json
|
265
|
+
- spec/bots/python-bot/scraper.py
|
266
|
+
- spec/bots/ruby-bot/manifest.json
|
267
|
+
- spec/bots/ruby-bot/scraper.rb
|
268
|
+
- spec/bots/slow-bot/manifest.json
|
269
|
+
- spec/bots/slow-bot/scraper.rb
|
270
|
+
- spec/lib/processor_spec.rb
|
271
|
+
- spec/lib/runner_spec.rb
|
272
|
+
- spec/lib/utils_spec.rb
|
273
|
+
- spec/lib/validator_spec.rb
|
274
|
+
- spec/manual_spec.rb
|
275
|
+
- spec/outputs/full-scraper.out
|
276
|
+
- spec/outputs/full-transformer.out
|
277
|
+
- spec/outputs/truncated-scraper.out
|
278
|
+
- spec/spec_helper.rb
|
279
|
+
has_rdoc:
|