turbot-runner 0.2.14 → 0.2.15
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +8 -5
- data/.gitmodules +1 -1
- data/.rspec +3 -0
- data/.travis.yml +11 -0
- data/README.md +6 -4
- data/lib/turbot_runner/processor.rb +1 -0
- data/lib/turbot_runner/version.rb +1 -1
- data/schema/schemas/accounts-statement-schema.json +1 -1
- data/schema/schemas/company-schema.json +1 -1
- data/schema/schemas/filing-schema.json +2 -2
- data/schema/schemas/gazette-notice-schema.json +9 -2
- data/schema/schemas/includes/company-for-nesting.json +13 -4
- data/schema/schemas/licence-schema.json +2 -2
- data/spec/lib/runner_spec.rb +10 -5
- data/spec/spec_helper.rb +12 -1
- data/turbot-runner.gemspec +3 -3
- metadata +66 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f9da65f51ab5d12fe470d847e617ee7f65f39217
|
4
|
+
data.tar.gz: 692ce7bfe967e01b6c7485f301cc8697d6bb2cb1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4fab8ca2464231881299ff8ad68f0549ac3aa077ca637965413846880f09f3d547c66efb13ef73704eb0dd72faffcf9e295b83f092e3f333406351fd6930b126
|
7
|
+
data.tar.gz: 937e02cc9213c18c484cc9262c2691802c437f7f47e92d0f58f1c08a37d4925b570033394aec46751a9e6e58673e9426f83547b47ebb175a0dca9d3839f31740
|
data/.gitignore
CHANGED
data/.gitmodules
CHANGED
data/.rspec
ADDED
data/.travis.yml
ADDED
data/README.md
CHANGED
@@ -1,5 +1,11 @@
|
|
1
1
|
# turbot-runner
|
2
2
|
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/turbot-runner.svg)](https://badge.fury.io/rb/turbot-runner)
|
4
|
+
[![Build Status](https://secure.travis-ci.org/openc/turbot-runner.png)](https://travis-ci.org/openc/turbot-runner)
|
5
|
+
[![Dependency Status](https://gemnasium.com/openc/turbot-runner.png)](https://gemnasium.com/openc/turbot-runner)
|
6
|
+
[![Coverage Status](https://coveralls.io/repos/openc/turbot-runner/badge.png)](https://coveralls.io/r/openc/turbot-runner)
|
7
|
+
[![Code Climate](https://codeclimate.com/github/openc/turbot-runner.png)](https://codeclimate.com/github/openc/turbot-runner)
|
8
|
+
|
3
9
|
## Getting started
|
4
10
|
|
5
11
|
git submodule update --init
|
@@ -60,8 +66,4 @@ default, 24 hours) it interrupts itself, and marks the run as having failed.
|
|
60
66
|
|
61
67
|
## Running the tests
|
62
68
|
|
63
|
-
Tests are run with rspec:
|
64
|
-
|
65
|
-
`./bin/rspec`
|
66
|
-
|
67
69
|
The first two specs to run require some manual input.
|
@@ -18,6 +18,7 @@ module TurbotRunner
|
|
18
18
|
else
|
19
19
|
record = Openc::JsonSchema.convert_dates(schema_path, JSON.parse(line))
|
20
20
|
|
21
|
+
# TODO Document why we aren't passing retrieved_at to the validator.
|
21
22
|
record_to_validate = record.select {|k, v| k != 'retrieved_at'}
|
22
23
|
|
23
24
|
error_message = Validator.validate(
|
@@ -64,9 +64,9 @@
|
|
64
64
|
"format": "date"
|
65
65
|
},
|
66
66
|
"retrieved_at": {
|
67
|
-
"description": "
|
67
|
+
"description": "The time or date at which the source URL was requested",
|
68
68
|
"type": "string",
|
69
|
-
"format": "date
|
69
|
+
"format": "date"
|
70
70
|
},
|
71
71
|
"other_attributes": {
|
72
72
|
"description": "Use for other attributes for which we don't yet have curated schema attributes",
|
@@ -267,6 +267,13 @@
|
|
267
267
|
"type": "string",
|
268
268
|
"enum": ["other"]
|
269
269
|
},
|
270
|
+
"classification": {
|
271
|
+
"description": "The type of judgment",
|
272
|
+
"type": "array",
|
273
|
+
"items": {
|
274
|
+
"$ref": "includes/classification.json"
|
275
|
+
}
|
276
|
+
},
|
270
277
|
"body": {
|
271
278
|
"description": "The unstructured prose content",
|
272
279
|
"$ref": "#/definitions/body"
|
@@ -405,9 +412,9 @@
|
|
405
412
|
"format": "date"
|
406
413
|
},
|
407
414
|
"retrieved_at": {
|
408
|
-
"description": "The time at which the source URL was requested",
|
415
|
+
"description": "The time or date at which the source URL was requested",
|
409
416
|
"type": "string",
|
410
|
-
"format": "date
|
417
|
+
"format": "date"
|
411
418
|
},
|
412
419
|
"confidence": {
|
413
420
|
"description": "The scraper's author's confidence in the accuracy of the data",
|
@@ -33,7 +33,7 @@
|
|
33
33
|
},
|
34
34
|
"retrieved_at": {
|
35
35
|
"type": "string",
|
36
|
-
"format": "date
|
36
|
+
"format": "date"
|
37
37
|
},
|
38
38
|
"current_status": {
|
39
39
|
"type": [
|
@@ -239,7 +239,16 @@
|
|
239
239
|
}
|
240
240
|
},
|
241
241
|
"additionalProperties": false,
|
242
|
-
"
|
243
|
-
|
242
|
+
"anyOf": [
|
243
|
+
{
|
244
|
+
"required": [
|
245
|
+
"name"
|
246
|
+
]
|
247
|
+
},
|
248
|
+
{
|
249
|
+
"required": [
|
250
|
+
"company_number"
|
251
|
+
]
|
252
|
+
}
|
244
253
|
]
|
245
|
-
}
|
254
|
+
}
|
@@ -59,9 +59,9 @@
|
|
59
59
|
"format": "date"
|
60
60
|
},
|
61
61
|
"retrieved_at": {
|
62
|
-
"description": "
|
62
|
+
"description": "The time or date at which the source URL was requested",
|
63
63
|
"type": "string",
|
64
|
-
"format": "date
|
64
|
+
"format": "date"
|
65
65
|
},
|
66
66
|
"licence_url": {
|
67
67
|
"type": "string",
|
data/spec/lib/runner_spec.rb
CHANGED
@@ -168,7 +168,7 @@ describe TurbotRunner::Runner do
|
|
168
168
|
context 'with a handler that interrupts the runner' do
|
169
169
|
before do
|
170
170
|
class Handler < TurbotRunner::BaseHandler
|
171
|
-
def initialize
|
171
|
+
def initialize
|
172
172
|
@count = 0
|
173
173
|
super
|
174
174
|
end
|
@@ -266,11 +266,12 @@ describe TurbotRunner::Runner do
|
|
266
266
|
|
267
267
|
context 'when the scraped data is provided' do
|
268
268
|
before do
|
269
|
+
@runner = test_runner('bot-with-transformer', :scraper_provided => true)
|
270
|
+
@runner.set_up_output_directory
|
269
271
|
FileUtils.cp(
|
270
272
|
File.join('spec', 'outputs', 'full-scraper.out'),
|
271
273
|
File.join(File.dirname(__FILE__), '../bots', 'bot-with-transformer', 'output', 'scraper.out')
|
272
274
|
)
|
273
|
-
@runner = test_runner('bot-with-transformer', :scraper_provided => true)
|
274
275
|
end
|
275
276
|
|
276
277
|
it 'does not run scraper' do
|
@@ -296,8 +297,8 @@ describe TurbotRunner::Runner do
|
|
296
297
|
class Handler < TurbotRunner::BaseHandler
|
297
298
|
attr_reader :records_seen
|
298
299
|
|
299
|
-
def initialize
|
300
|
-
@records_seen = Hash.new
|
300
|
+
def initialize
|
301
|
+
@records_seen = Hash.new(0)
|
301
302
|
super
|
302
303
|
end
|
303
304
|
|
@@ -324,7 +325,7 @@ describe TurbotRunner::Runner do
|
|
324
325
|
it 'can cope when scraper has failed immediately' do
|
325
326
|
test_runner('bot-that-crashes-immediately').run
|
326
327
|
|
327
|
-
runner = test_runner('bot-
|
328
|
+
runner = test_runner('bot-that-crashes-immediately',
|
328
329
|
:record_handler => @handler
|
329
330
|
)
|
330
331
|
|
@@ -338,15 +339,19 @@ describe TurbotRunner::Runner do
|
|
338
339
|
end
|
339
340
|
|
340
341
|
it 'clears existing output' do
|
342
|
+
@runner.set_up_output_directory
|
341
343
|
path = File.join(@runner.base_directory, 'output', 'scraper.out')
|
342
344
|
FileUtils.touch(path)
|
345
|
+
|
343
346
|
@runner.set_up_output_directory
|
344
347
|
expect(File.exist?(path)).to be(false)
|
345
348
|
end
|
346
349
|
|
347
350
|
it 'does not clear existing files that are not output files' do
|
351
|
+
@runner.set_up_output_directory
|
348
352
|
path = File.join(@runner.base_directory, 'output', 'stdout')
|
349
353
|
FileUtils.touch(path)
|
354
|
+
|
350
355
|
@runner.set_up_output_directory
|
351
356
|
expect(File.exist?(path)).to be(true)
|
352
357
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -1,4 +1,15 @@
|
|
1
|
-
require '
|
1
|
+
require 'rubygems'
|
2
|
+
|
3
|
+
require 'simplecov'
|
4
|
+
require 'coveralls'
|
5
|
+
SimpleCov.formatter = Coveralls::SimpleCov::Formatter
|
6
|
+
SimpleCov.start do
|
7
|
+
add_filter 'spec'
|
8
|
+
end
|
9
|
+
|
10
|
+
require 'rspec'
|
11
|
+
|
12
|
+
require File.dirname(__FILE__) + '/../lib/turbot_runner'
|
2
13
|
|
3
14
|
RSpec::Matchers.define(:fail_validation_with) do |expected_error|
|
4
15
|
match do |record|
|
data/turbot-runner.gemspec
CHANGED
@@ -19,8 +19,8 @@ Gem::Specification.new do |gem|
|
|
19
19
|
|
20
20
|
gem.add_dependency "activesupport", '4.1.4'
|
21
21
|
gem.add_dependency "openc-json_schema"
|
22
|
-
|
22
|
+
|
23
|
+
gem.add_development_dependency "coveralls"
|
23
24
|
gem.add_development_dependency "rake"
|
24
|
-
gem.add_development_dependency "rspec", "
|
25
|
-
gem.add_development_dependency "rspec-mocks", ">= 3.0"
|
25
|
+
gem.add_development_dependency "rspec", "~> 3.4.0"
|
26
26
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: turbot-runner
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.15
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- OpenCorporates
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-02-
|
11
|
+
date: 2016-02-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -39,7 +39,7 @@ dependencies:
|
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '0'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
42
|
+
name: coveralls
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
45
|
- - ">="
|
@@ -70,30 +70,16 @@ dependencies:
|
|
70
70
|
name: rspec
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- - "
|
73
|
+
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version:
|
75
|
+
version: 3.4.0
|
76
76
|
type: :development
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
|
-
- - "
|
80
|
+
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version:
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: rspec-mocks
|
85
|
-
requirement: !ruby/object:Gem::Requirement
|
86
|
-
requirements:
|
87
|
-
- - ">="
|
88
|
-
- !ruby/object:Gem::Version
|
89
|
-
version: '3.0'
|
90
|
-
type: :development
|
91
|
-
prerelease: false
|
92
|
-
version_requirements: !ruby/object:Gem::Requirement
|
93
|
-
requirements:
|
94
|
-
- - ">="
|
95
|
-
- !ruby/object:Gem::Version
|
96
|
-
version: '3.0'
|
82
|
+
version: 3.4.0
|
97
83
|
description:
|
98
84
|
email: bots@opencorporates.com
|
99
85
|
executables: []
|
@@ -102,6 +88,8 @@ extra_rdoc_files: []
|
|
102
88
|
files:
|
103
89
|
- ".gitignore"
|
104
90
|
- ".gitmodules"
|
91
|
+
- ".rspec"
|
92
|
+
- ".travis.yml"
|
105
93
|
- Gemfile
|
106
94
|
- README.md
|
107
95
|
- Rakefile
|
@@ -232,4 +220,60 @@ rubygems_version: 2.4.5
|
|
232
220
|
signing_key:
|
233
221
|
specification_version: 4
|
234
222
|
summary: Utilities for running bots with Turbot
|
235
|
-
test_files:
|
223
|
+
test_files:
|
224
|
+
- spec/bots/bot-that-crashes-immediately/manifest.json
|
225
|
+
- spec/bots/bot-that-crashes-immediately/scraper.rb
|
226
|
+
- spec/bots/bot-that-crashes-immediately/transformer1.rb
|
227
|
+
- spec/bots/bot-that-crashes-in-scraper/manifest.json
|
228
|
+
- spec/bots/bot-that-crashes-in-scraper/scraper.rb
|
229
|
+
- spec/bots/bot-that-crashes-in-scraper/transformer1.rb
|
230
|
+
- spec/bots/bot-that-crashes-in-transformer/manifest.json
|
231
|
+
- spec/bots/bot-that-crashes-in-transformer/scraper.rb
|
232
|
+
- spec/bots/bot-that-crashes-in-transformer/transformer1.rb
|
233
|
+
- spec/bots/bot-that-crashes-in-transformer/transformer2.rb
|
234
|
+
- spec/bots/bot-that-emits-run-ended/manifest.json
|
235
|
+
- spec/bots/bot-that-emits-run-ended/scraper.rb
|
236
|
+
- spec/bots/bot-that-emits-snapshot-ended/manifest.json
|
237
|
+
- spec/bots/bot-that-emits-snapshot-ended/scraper.rb
|
238
|
+
- spec/bots/bot-that-expects-file/manifest.json
|
239
|
+
- spec/bots/bot-that-expects-file/scraper.rb
|
240
|
+
- spec/bots/bot-that-expects-file/something.txt
|
241
|
+
- spec/bots/bot-that-is-allowed-to-produce-duplicates/manifest.json
|
242
|
+
- spec/bots/bot-that-is-allowed-to-produce-duplicates/scraper.rb
|
243
|
+
- spec/bots/bot-that-produces-duplicates/manifest.json
|
244
|
+
- spec/bots/bot-that-produces-duplicates/scraper.rb
|
245
|
+
- spec/bots/bot-with-invalid-data-type/manifest.json
|
246
|
+
- spec/bots/bot-with-invalid-data-type/scraper.rb
|
247
|
+
- spec/bots/bot-with-invalid-sample-date/manifest.json
|
248
|
+
- spec/bots/bot-with-invalid-sample-date/scraper.rb
|
249
|
+
- spec/bots/bot-with-pause/manifest.json
|
250
|
+
- spec/bots/bot-with-pause/scraper.rb
|
251
|
+
- spec/bots/bot-with-transformer/manifest.json
|
252
|
+
- spec/bots/bot-with-transformer/scraper.rb
|
253
|
+
- spec/bots/bot-with-transformer/transformer.rb
|
254
|
+
- spec/bots/bot-with-transformers/manifest.json
|
255
|
+
- spec/bots/bot-with-transformers/scraper.rb
|
256
|
+
- spec/bots/bot-with-transformers/transformer1.rb
|
257
|
+
- spec/bots/bot-with-transformers/transformer2.rb
|
258
|
+
- spec/bots/invalid-json-bot/manifest.json
|
259
|
+
- spec/bots/invalid-json-bot/scraper.rb
|
260
|
+
- spec/bots/invalid-record-bot/manifest.json
|
261
|
+
- spec/bots/invalid-record-bot/scraper.rb
|
262
|
+
- spec/bots/logging-bot/manifest.json
|
263
|
+
- spec/bots/logging-bot/scraper.rb
|
264
|
+
- spec/bots/python-bot/manifest.json
|
265
|
+
- spec/bots/python-bot/scraper.py
|
266
|
+
- spec/bots/ruby-bot/manifest.json
|
267
|
+
- spec/bots/ruby-bot/scraper.rb
|
268
|
+
- spec/bots/slow-bot/manifest.json
|
269
|
+
- spec/bots/slow-bot/scraper.rb
|
270
|
+
- spec/lib/processor_spec.rb
|
271
|
+
- spec/lib/runner_spec.rb
|
272
|
+
- spec/lib/utils_spec.rb
|
273
|
+
- spec/lib/validator_spec.rb
|
274
|
+
- spec/manual_spec.rb
|
275
|
+
- spec/outputs/full-scraper.out
|
276
|
+
- spec/outputs/full-transformer.out
|
277
|
+
- spec/outputs/truncated-scraper.out
|
278
|
+
- spec/spec_helper.rb
|
279
|
+
has_rdoc:
|