turbot-runner 0.2.14 → 0.2.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a93e7b12e6d13b95affabf91eb2bcf3914260948
4
- data.tar.gz: f129aea4ce1e9f857fa1e9a587dc5fd69a732c81
3
+ metadata.gz: f9da65f51ab5d12fe470d847e617ee7f65f39217
4
+ data.tar.gz: 692ce7bfe967e01b6c7485f301cc8697d6bb2cb1
5
5
  SHA512:
6
- metadata.gz: 21329ceb668293687d5c0822ae29192a4c30ba3cc263cff784e0373cf94a8e9033d05305eafe53c5508622002c93db493571667a7c20149ac375750f24301134
7
- data.tar.gz: f48480dacda4fdf1c3e3d4b21e01ac86851a6d91b463449c2670749fac9079fcdce794b833a9f7315b4dd1405afe525becb19b0e5ce4cb1cd1c3340a3d14b1d2
6
+ metadata.gz: 4fab8ca2464231881299ff8ad68f0549ac3aa077ca637965413846880f09f3d547c66efb13ef73704eb0dd72faffcf9e295b83f092e3f333406351fd6930b126
7
+ data.tar.gz: 937e02cc9213c18c484cc9262c2691802c437f7f47e92d0f58f1c08a37d4925b570033394aec46751a9e6e58673e9426f83547b47ebb175a0dca9d3839f31740
data/.gitignore CHANGED
@@ -1,12 +1,15 @@
1
- /.bundle
1
+ *.gem
2
+ .bundle
3
+ .yardoc
4
+ coverage
5
+ Gemfile.lock
6
+ doc/*
7
+ pkg/*
2
8
  /.rvmrc
3
- /coverage
4
- /pkg
5
- /rdoc
6
9
  /tags
7
10
  /vendor
8
11
  /.rbenv-version
12
+ /spec/bots/*/output
9
13
  *~
10
14
  .#*
11
15
  #*#
12
- /spec/bots/*/output
data/.gitmodules CHANGED
@@ -1,3 +1,3 @@
1
1
  [submodule "schema"]
2
2
  path = schema
3
- url = git@github.com:openc/openc-schema.git
3
+ url = https://github.com/openc/openc-schema.git
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --color
2
+ --require spec_helper
3
+ --order random
data/.travis.yml ADDED
@@ -0,0 +1,11 @@
1
+ sudo: false
2
+ language: ruby
3
+ cache: bundler
4
+ rvm:
5
+ - 1.9.3
6
+ - 2.0.0
7
+ - 2.1.0
8
+ - 2.2.0
9
+ - 2.3.0
10
+ script:
11
+ bundle exec rspec --exclude-pattern "spec/manual_spec.rb"
data/README.md CHANGED
@@ -1,5 +1,11 @@
1
1
  # turbot-runner
2
2
 
3
+ [![Gem Version](https://badge.fury.io/rb/turbot-runner.svg)](https://badge.fury.io/rb/turbot-runner)
4
+ [![Build Status](https://secure.travis-ci.org/openc/turbot-runner.png)](https://travis-ci.org/openc/turbot-runner)
5
+ [![Dependency Status](https://gemnasium.com/openc/turbot-runner.png)](https://gemnasium.com/openc/turbot-runner)
6
+ [![Coverage Status](https://coveralls.io/repos/openc/turbot-runner/badge.png)](https://coveralls.io/r/openc/turbot-runner)
7
+ [![Code Climate](https://codeclimate.com/github/openc/turbot-runner.png)](https://codeclimate.com/github/openc/turbot-runner)
8
+
3
9
  ## Getting started
4
10
 
5
11
  git submodule update --init
@@ -60,8 +66,4 @@ default, 24 hours) it interrupts itself, and marks the run as having failed.
60
66
 
61
67
  ## Running the tests
62
68
 
63
- Tests are run with rspec:
64
-
65
- `./bin/rspec`
66
-
67
69
  The first two specs to run require some manual input.
@@ -18,6 +18,7 @@ module TurbotRunner
18
18
  else
19
19
  record = Openc::JsonSchema.convert_dates(schema_path, JSON.parse(line))
20
20
 
21
+ # TODO Document why we aren't passing retrieved_at to the validator.
21
22
  record_to_validate = record.select {|k, v| k != 'retrieved_at'}
22
23
 
23
24
  error_message = Validator.validate(
@@ -1,3 +1,3 @@
1
1
  module TurbotRunner
2
- VERSION = '0.2.14'
2
+ VERSION = '0.2.15'
3
3
  end
@@ -117,7 +117,7 @@
117
117
  },
118
118
  "retrieved_at": {
119
119
  "type": "string",
120
- "format": "date-time"
120
+ "format": "date"
121
121
  },
122
122
  "currency": {
123
123
  "type": "string",
@@ -29,7 +29,7 @@
29
29
  },
30
30
  "retrieved_at": {
31
31
  "type": "string",
32
- "format": "date-time"
32
+ "format": "date"
33
33
  },
34
34
  "current_status": {
35
35
  "type": [
@@ -64,9 +64,9 @@
64
64
  "format": "date"
65
65
  },
66
66
  "retrieved_at": {
67
- "description": "Date-time this was retrieved from the source",
67
+ "description": "The time or date at which the source URL was requested",
68
68
  "type": "string",
69
- "format": "date-time"
69
+ "format": "date"
70
70
  },
71
71
  "other_attributes": {
72
72
  "description": "Use for other attributes for which we don't yet have curated schema attributes",
@@ -267,6 +267,13 @@
267
267
  "type": "string",
268
268
  "enum": ["other"]
269
269
  },
270
+ "classification": {
271
+ "description": "The type of judgment",
272
+ "type": "array",
273
+ "items": {
274
+ "$ref": "includes/classification.json"
275
+ }
276
+ },
270
277
  "body": {
271
278
  "description": "The unstructured prose content",
272
279
  "$ref": "#/definitions/body"
@@ -405,9 +412,9 @@
405
412
  "format": "date"
406
413
  },
407
414
  "retrieved_at": {
408
- "description": "The time at which the source URL was requested",
415
+ "description": "The time or date at which the source URL was requested",
409
416
  "type": "string",
410
- "format": "date-time"
417
+ "format": "date"
411
418
  },
412
419
  "confidence": {
413
420
  "description": "The scraper's author's confidence in the accuracy of the data",
@@ -33,7 +33,7 @@
33
33
  },
34
34
  "retrieved_at": {
35
35
  "type": "string",
36
- "format": "date-time"
36
+ "format": "date"
37
37
  },
38
38
  "current_status": {
39
39
  "type": [
@@ -239,7 +239,16 @@
239
239
  }
240
240
  },
241
241
  "additionalProperties": false,
242
- "required": [
243
- "name"
242
+ "anyOf": [
243
+ {
244
+ "required": [
245
+ "name"
246
+ ]
247
+ },
248
+ {
249
+ "required": [
250
+ "company_number"
251
+ ]
252
+ }
244
253
  ]
245
- }
254
+ }
@@ -59,9 +59,9 @@
59
59
  "format": "date"
60
60
  },
61
61
  "retrieved_at": {
62
- "description": "Date-time this was retrieved from the source",
62
+ "description": "The time or date at which the source URL was requested",
63
63
  "type": "string",
64
- "format": "date-time"
64
+ "format": "date"
65
65
  },
66
66
  "licence_url": {
67
67
  "type": "string",
@@ -168,7 +168,7 @@ describe TurbotRunner::Runner do
168
168
  context 'with a handler that interrupts the runner' do
169
169
  before do
170
170
  class Handler < TurbotRunner::BaseHandler
171
- def initialize(*)
171
+ def initialize
172
172
  @count = 0
173
173
  super
174
174
  end
@@ -266,11 +266,12 @@ describe TurbotRunner::Runner do
266
266
 
267
267
  context 'when the scraped data is provided' do
268
268
  before do
269
+ @runner = test_runner('bot-with-transformer', :scraper_provided => true)
270
+ @runner.set_up_output_directory
269
271
  FileUtils.cp(
270
272
  File.join('spec', 'outputs', 'full-scraper.out'),
271
273
  File.join(File.dirname(__FILE__), '../bots', 'bot-with-transformer', 'output', 'scraper.out')
272
274
  )
273
- @runner = test_runner('bot-with-transformer', :scraper_provided => true)
274
275
  end
275
276
 
276
277
  it 'does not run scraper' do
@@ -296,8 +297,8 @@ describe TurbotRunner::Runner do
296
297
  class Handler < TurbotRunner::BaseHandler
297
298
  attr_reader :records_seen
298
299
 
299
- def initialize(*)
300
- @records_seen = Hash.new {|h, k| h[k] = 0}
300
+ def initialize
301
+ @records_seen = Hash.new(0)
301
302
  super
302
303
  end
303
304
 
@@ -324,7 +325,7 @@ describe TurbotRunner::Runner do
324
325
  it 'can cope when scraper has failed immediately' do
325
326
  test_runner('bot-that-crashes-immediately').run
326
327
 
327
- runner = test_runner('bot-with-transformer',
328
+ runner = test_runner('bot-that-crashes-immediately',
328
329
  :record_handler => @handler
329
330
  )
330
331
 
@@ -338,15 +339,19 @@ describe TurbotRunner::Runner do
338
339
  end
339
340
 
340
341
  it 'clears existing output' do
342
+ @runner.set_up_output_directory
341
343
  path = File.join(@runner.base_directory, 'output', 'scraper.out')
342
344
  FileUtils.touch(path)
345
+
343
346
  @runner.set_up_output_directory
344
347
  expect(File.exist?(path)).to be(false)
345
348
  end
346
349
 
347
350
  it 'does not clear existing files that are not output files' do
351
+ @runner.set_up_output_directory
348
352
  path = File.join(@runner.base_directory, 'output', 'stdout')
349
353
  FileUtils.touch(path)
354
+
350
355
  @runner.set_up_output_directory
351
356
  expect(File.exist?(path)).to be(true)
352
357
  end
data/spec/spec_helper.rb CHANGED
@@ -1,4 +1,15 @@
1
- require 'turbot_runner'
1
+ require 'rubygems'
2
+
3
+ require 'simplecov'
4
+ require 'coveralls'
5
+ SimpleCov.formatter = Coveralls::SimpleCov::Formatter
6
+ SimpleCov.start do
7
+ add_filter 'spec'
8
+ end
9
+
10
+ require 'rspec'
11
+
12
+ require File.dirname(__FILE__) + '/../lib/turbot_runner'
2
13
 
3
14
  RSpec::Matchers.define(:fail_validation_with) do |expected_error|
4
15
  match do |record|
@@ -19,8 +19,8 @@ Gem::Specification.new do |gem|
19
19
 
20
20
  gem.add_dependency "activesupport", '4.1.4'
21
21
  gem.add_dependency "openc-json_schema"
22
- gem.add_development_dependency "pry"
22
+
23
+ gem.add_development_dependency "coveralls"
23
24
  gem.add_development_dependency "rake"
24
- gem.add_development_dependency "rspec", ">= 3.0"
25
- gem.add_development_dependency "rspec-mocks", ">= 3.0"
25
+ gem.add_development_dependency "rspec", "~> 3.4.0"
26
26
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: turbot-runner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.14
4
+ version: 0.2.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - OpenCorporates
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-08 00:00:00.000000000 Z
11
+ date: 2016-02-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -39,7 +39,7 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: pry
42
+ name: coveralls
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - ">="
@@ -70,30 +70,16 @@ dependencies:
70
70
  name: rspec
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - ">="
73
+ - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '3.0'
75
+ version: 3.4.0
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - ">="
80
+ - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '3.0'
83
- - !ruby/object:Gem::Dependency
84
- name: rspec-mocks
85
- requirement: !ruby/object:Gem::Requirement
86
- requirements:
87
- - - ">="
88
- - !ruby/object:Gem::Version
89
- version: '3.0'
90
- type: :development
91
- prerelease: false
92
- version_requirements: !ruby/object:Gem::Requirement
93
- requirements:
94
- - - ">="
95
- - !ruby/object:Gem::Version
96
- version: '3.0'
82
+ version: 3.4.0
97
83
  description:
98
84
  email: bots@opencorporates.com
99
85
  executables: []
@@ -102,6 +88,8 @@ extra_rdoc_files: []
102
88
  files:
103
89
  - ".gitignore"
104
90
  - ".gitmodules"
91
+ - ".rspec"
92
+ - ".travis.yml"
105
93
  - Gemfile
106
94
  - README.md
107
95
  - Rakefile
@@ -232,4 +220,60 @@ rubygems_version: 2.4.5
232
220
  signing_key:
233
221
  specification_version: 4
234
222
  summary: Utilities for running bots with Turbot
235
- test_files: []
223
+ test_files:
224
+ - spec/bots/bot-that-crashes-immediately/manifest.json
225
+ - spec/bots/bot-that-crashes-immediately/scraper.rb
226
+ - spec/bots/bot-that-crashes-immediately/transformer1.rb
227
+ - spec/bots/bot-that-crashes-in-scraper/manifest.json
228
+ - spec/bots/bot-that-crashes-in-scraper/scraper.rb
229
+ - spec/bots/bot-that-crashes-in-scraper/transformer1.rb
230
+ - spec/bots/bot-that-crashes-in-transformer/manifest.json
231
+ - spec/bots/bot-that-crashes-in-transformer/scraper.rb
232
+ - spec/bots/bot-that-crashes-in-transformer/transformer1.rb
233
+ - spec/bots/bot-that-crashes-in-transformer/transformer2.rb
234
+ - spec/bots/bot-that-emits-run-ended/manifest.json
235
+ - spec/bots/bot-that-emits-run-ended/scraper.rb
236
+ - spec/bots/bot-that-emits-snapshot-ended/manifest.json
237
+ - spec/bots/bot-that-emits-snapshot-ended/scraper.rb
238
+ - spec/bots/bot-that-expects-file/manifest.json
239
+ - spec/bots/bot-that-expects-file/scraper.rb
240
+ - spec/bots/bot-that-expects-file/something.txt
241
+ - spec/bots/bot-that-is-allowed-to-produce-duplicates/manifest.json
242
+ - spec/bots/bot-that-is-allowed-to-produce-duplicates/scraper.rb
243
+ - spec/bots/bot-that-produces-duplicates/manifest.json
244
+ - spec/bots/bot-that-produces-duplicates/scraper.rb
245
+ - spec/bots/bot-with-invalid-data-type/manifest.json
246
+ - spec/bots/bot-with-invalid-data-type/scraper.rb
247
+ - spec/bots/bot-with-invalid-sample-date/manifest.json
248
+ - spec/bots/bot-with-invalid-sample-date/scraper.rb
249
+ - spec/bots/bot-with-pause/manifest.json
250
+ - spec/bots/bot-with-pause/scraper.rb
251
+ - spec/bots/bot-with-transformer/manifest.json
252
+ - spec/bots/bot-with-transformer/scraper.rb
253
+ - spec/bots/bot-with-transformer/transformer.rb
254
+ - spec/bots/bot-with-transformers/manifest.json
255
+ - spec/bots/bot-with-transformers/scraper.rb
256
+ - spec/bots/bot-with-transformers/transformer1.rb
257
+ - spec/bots/bot-with-transformers/transformer2.rb
258
+ - spec/bots/invalid-json-bot/manifest.json
259
+ - spec/bots/invalid-json-bot/scraper.rb
260
+ - spec/bots/invalid-record-bot/manifest.json
261
+ - spec/bots/invalid-record-bot/scraper.rb
262
+ - spec/bots/logging-bot/manifest.json
263
+ - spec/bots/logging-bot/scraper.rb
264
+ - spec/bots/python-bot/manifest.json
265
+ - spec/bots/python-bot/scraper.py
266
+ - spec/bots/ruby-bot/manifest.json
267
+ - spec/bots/ruby-bot/scraper.rb
268
+ - spec/bots/slow-bot/manifest.json
269
+ - spec/bots/slow-bot/scraper.rb
270
+ - spec/lib/processor_spec.rb
271
+ - spec/lib/runner_spec.rb
272
+ - spec/lib/utils_spec.rb
273
+ - spec/lib/validator_spec.rb
274
+ - spec/manual_spec.rb
275
+ - spec/outputs/full-scraper.out
276
+ - spec/outputs/full-transformer.out
277
+ - spec/outputs/truncated-scraper.out
278
+ - spec/spec_helper.rb
279
+ has_rdoc: