turbot-runner 0.2.14 → 0.2.15

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a93e7b12e6d13b95affabf91eb2bcf3914260948
4
- data.tar.gz: f129aea4ce1e9f857fa1e9a587dc5fd69a732c81
3
+ metadata.gz: f9da65f51ab5d12fe470d847e617ee7f65f39217
4
+ data.tar.gz: 692ce7bfe967e01b6c7485f301cc8697d6bb2cb1
5
5
  SHA512:
6
- metadata.gz: 21329ceb668293687d5c0822ae29192a4c30ba3cc263cff784e0373cf94a8e9033d05305eafe53c5508622002c93db493571667a7c20149ac375750f24301134
7
- data.tar.gz: f48480dacda4fdf1c3e3d4b21e01ac86851a6d91b463449c2670749fac9079fcdce794b833a9f7315b4dd1405afe525becb19b0e5ce4cb1cd1c3340a3d14b1d2
6
+ metadata.gz: 4fab8ca2464231881299ff8ad68f0549ac3aa077ca637965413846880f09f3d547c66efb13ef73704eb0dd72faffcf9e295b83f092e3f333406351fd6930b126
7
+ data.tar.gz: 937e02cc9213c18c484cc9262c2691802c437f7f47e92d0f58f1c08a37d4925b570033394aec46751a9e6e58673e9426f83547b47ebb175a0dca9d3839f31740
data/.gitignore CHANGED
@@ -1,12 +1,15 @@
1
- /.bundle
1
+ *.gem
2
+ .bundle
3
+ .yardoc
4
+ coverage
5
+ Gemfile.lock
6
+ doc/*
7
+ pkg/*
2
8
  /.rvmrc
3
- /coverage
4
- /pkg
5
- /rdoc
6
9
  /tags
7
10
  /vendor
8
11
  /.rbenv-version
12
+ /spec/bots/*/output
9
13
  *~
10
14
  .#*
11
15
  #*#
12
- /spec/bots/*/output
data/.gitmodules CHANGED
@@ -1,3 +1,3 @@
1
1
  [submodule "schema"]
2
2
  path = schema
3
- url = git@github.com:openc/openc-schema.git
3
+ url = https://github.com/openc/openc-schema.git
data/.rspec ADDED
@@ -0,0 +1,3 @@
1
+ --color
2
+ --require spec_helper
3
+ --order random
data/.travis.yml ADDED
@@ -0,0 +1,11 @@
1
+ sudo: false
2
+ language: ruby
3
+ cache: bundler
4
+ rvm:
5
+ - 1.9.3
6
+ - 2.0.0
7
+ - 2.1.0
8
+ - 2.2.0
9
+ - 2.3.0
10
+ script:
11
+ bundle exec rspec --exclude-pattern "spec/manual_spec.rb"
data/README.md CHANGED
@@ -1,5 +1,11 @@
1
1
  # turbot-runner
2
2
 
3
+ [![Gem Version](https://badge.fury.io/rb/turbot-runner.svg)](https://badge.fury.io/rb/turbot-runner)
4
+ [![Build Status](https://secure.travis-ci.org/openc/turbot-runner.png)](https://travis-ci.org/openc/turbot-runner)
5
+ [![Dependency Status](https://gemnasium.com/openc/turbot-runner.png)](https://gemnasium.com/openc/turbot-runner)
6
+ [![Coverage Status](https://coveralls.io/repos/openc/turbot-runner/badge.png)](https://coveralls.io/r/openc/turbot-runner)
7
+ [![Code Climate](https://codeclimate.com/github/openc/turbot-runner.png)](https://codeclimate.com/github/openc/turbot-runner)
8
+
3
9
  ## Getting started
4
10
 
5
11
  git submodule update --init
@@ -60,8 +66,4 @@ default, 24 hours) it interrupts itself, and marks the run as having failed.
60
66
 
61
67
  ## Running the tests
62
68
 
63
- Tests are run with rspec:
64
-
65
- `./bin/rspec`
66
-
67
69
  The first two specs to run require some manual input.
@@ -18,6 +18,7 @@ module TurbotRunner
18
18
  else
19
19
  record = Openc::JsonSchema.convert_dates(schema_path, JSON.parse(line))
20
20
 
21
+ # TODO Document why we aren't passing retrieved_at to the validator.
21
22
  record_to_validate = record.select {|k, v| k != 'retrieved_at'}
22
23
 
23
24
  error_message = Validator.validate(
@@ -1,3 +1,3 @@
1
1
  module TurbotRunner
2
- VERSION = '0.2.14'
2
+ VERSION = '0.2.15'
3
3
  end
@@ -117,7 +117,7 @@
117
117
  },
118
118
  "retrieved_at": {
119
119
  "type": "string",
120
- "format": "date-time"
120
+ "format": "date"
121
121
  },
122
122
  "currency": {
123
123
  "type": "string",
@@ -29,7 +29,7 @@
29
29
  },
30
30
  "retrieved_at": {
31
31
  "type": "string",
32
- "format": "date-time"
32
+ "format": "date"
33
33
  },
34
34
  "current_status": {
35
35
  "type": [
@@ -64,9 +64,9 @@
64
64
  "format": "date"
65
65
  },
66
66
  "retrieved_at": {
67
- "description": "Date-time this was retrieved from the source",
67
+ "description": "The time or date at which the source URL was requested",
68
68
  "type": "string",
69
- "format": "date-time"
69
+ "format": "date"
70
70
  },
71
71
  "other_attributes": {
72
72
  "description": "Use for other attributes for which we don't yet have curated schema attributes",
@@ -267,6 +267,13 @@
267
267
  "type": "string",
268
268
  "enum": ["other"]
269
269
  },
270
+ "classification": {
271
+ "description": "The type of judgment",
272
+ "type": "array",
273
+ "items": {
274
+ "$ref": "includes/classification.json"
275
+ }
276
+ },
270
277
  "body": {
271
278
  "description": "The unstructured prose content",
272
279
  "$ref": "#/definitions/body"
@@ -405,9 +412,9 @@
405
412
  "format": "date"
406
413
  },
407
414
  "retrieved_at": {
408
- "description": "The time at which the source URL was requested",
415
+ "description": "The time or date at which the source URL was requested",
409
416
  "type": "string",
410
- "format": "date-time"
417
+ "format": "date"
411
418
  },
412
419
  "confidence": {
413
420
  "description": "The scraper's author's confidence in the accuracy of the data",
@@ -33,7 +33,7 @@
33
33
  },
34
34
  "retrieved_at": {
35
35
  "type": "string",
36
- "format": "date-time"
36
+ "format": "date"
37
37
  },
38
38
  "current_status": {
39
39
  "type": [
@@ -239,7 +239,16 @@
239
239
  }
240
240
  },
241
241
  "additionalProperties": false,
242
- "required": [
243
- "name"
242
+ "anyOf": [
243
+ {
244
+ "required": [
245
+ "name"
246
+ ]
247
+ },
248
+ {
249
+ "required": [
250
+ "company_number"
251
+ ]
252
+ }
244
253
  ]
245
- }
254
+ }
@@ -59,9 +59,9 @@
59
59
  "format": "date"
60
60
  },
61
61
  "retrieved_at": {
62
- "description": "Date-time this was retrieved from the source",
62
+ "description": "The time or date at which the source URL was requested",
63
63
  "type": "string",
64
- "format": "date-time"
64
+ "format": "date"
65
65
  },
66
66
  "licence_url": {
67
67
  "type": "string",
@@ -168,7 +168,7 @@ describe TurbotRunner::Runner do
168
168
  context 'with a handler that interrupts the runner' do
169
169
  before do
170
170
  class Handler < TurbotRunner::BaseHandler
171
- def initialize(*)
171
+ def initialize
172
172
  @count = 0
173
173
  super
174
174
  end
@@ -266,11 +266,12 @@ describe TurbotRunner::Runner do
266
266
 
267
267
  context 'when the scraped data is provided' do
268
268
  before do
269
+ @runner = test_runner('bot-with-transformer', :scraper_provided => true)
270
+ @runner.set_up_output_directory
269
271
  FileUtils.cp(
270
272
  File.join('spec', 'outputs', 'full-scraper.out'),
271
273
  File.join(File.dirname(__FILE__), '../bots', 'bot-with-transformer', 'output', 'scraper.out')
272
274
  )
273
- @runner = test_runner('bot-with-transformer', :scraper_provided => true)
274
275
  end
275
276
 
276
277
  it 'does not run scraper' do
@@ -296,8 +297,8 @@ describe TurbotRunner::Runner do
296
297
  class Handler < TurbotRunner::BaseHandler
297
298
  attr_reader :records_seen
298
299
 
299
- def initialize(*)
300
- @records_seen = Hash.new {|h, k| h[k] = 0}
300
+ def initialize
301
+ @records_seen = Hash.new(0)
301
302
  super
302
303
  end
303
304
 
@@ -324,7 +325,7 @@ describe TurbotRunner::Runner do
324
325
  it 'can cope when scraper has failed immediately' do
325
326
  test_runner('bot-that-crashes-immediately').run
326
327
 
327
- runner = test_runner('bot-with-transformer',
328
+ runner = test_runner('bot-that-crashes-immediately',
328
329
  :record_handler => @handler
329
330
  )
330
331
 
@@ -338,15 +339,19 @@ describe TurbotRunner::Runner do
338
339
  end
339
340
 
340
341
  it 'clears existing output' do
342
+ @runner.set_up_output_directory
341
343
  path = File.join(@runner.base_directory, 'output', 'scraper.out')
342
344
  FileUtils.touch(path)
345
+
343
346
  @runner.set_up_output_directory
344
347
  expect(File.exist?(path)).to be(false)
345
348
  end
346
349
 
347
350
  it 'does not clear existing files that are not output files' do
351
+ @runner.set_up_output_directory
348
352
  path = File.join(@runner.base_directory, 'output', 'stdout')
349
353
  FileUtils.touch(path)
354
+
350
355
  @runner.set_up_output_directory
351
356
  expect(File.exist?(path)).to be(true)
352
357
  end
data/spec/spec_helper.rb CHANGED
@@ -1,4 +1,15 @@
1
- require 'turbot_runner'
1
+ require 'rubygems'
2
+
3
+ require 'simplecov'
4
+ require 'coveralls'
5
+ SimpleCov.formatter = Coveralls::SimpleCov::Formatter
6
+ SimpleCov.start do
7
+ add_filter 'spec'
8
+ end
9
+
10
+ require 'rspec'
11
+
12
+ require File.dirname(__FILE__) + '/../lib/turbot_runner'
2
13
 
3
14
  RSpec::Matchers.define(:fail_validation_with) do |expected_error|
4
15
  match do |record|
@@ -19,8 +19,8 @@ Gem::Specification.new do |gem|
19
19
 
20
20
  gem.add_dependency "activesupport", '4.1.4'
21
21
  gem.add_dependency "openc-json_schema"
22
- gem.add_development_dependency "pry"
22
+
23
+ gem.add_development_dependency "coveralls"
23
24
  gem.add_development_dependency "rake"
24
- gem.add_development_dependency "rspec", ">= 3.0"
25
- gem.add_development_dependency "rspec-mocks", ">= 3.0"
25
+ gem.add_development_dependency "rspec", "~> 3.4.0"
26
26
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: turbot-runner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.14
4
+ version: 0.2.15
5
5
  platform: ruby
6
6
  authors:
7
7
  - OpenCorporates
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-08 00:00:00.000000000 Z
11
+ date: 2016-02-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -39,7 +39,7 @@ dependencies:
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
- name: pry
42
+ name: coveralls
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - ">="
@@ -70,30 +70,16 @@ dependencies:
70
70
  name: rspec
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - ">="
73
+ - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '3.0'
75
+ version: 3.4.0
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - ">="
80
+ - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '3.0'
83
- - !ruby/object:Gem::Dependency
84
- name: rspec-mocks
85
- requirement: !ruby/object:Gem::Requirement
86
- requirements:
87
- - - ">="
88
- - !ruby/object:Gem::Version
89
- version: '3.0'
90
- type: :development
91
- prerelease: false
92
- version_requirements: !ruby/object:Gem::Requirement
93
- requirements:
94
- - - ">="
95
- - !ruby/object:Gem::Version
96
- version: '3.0'
82
+ version: 3.4.0
97
83
  description:
98
84
  email: bots@opencorporates.com
99
85
  executables: []
@@ -102,6 +88,8 @@ extra_rdoc_files: []
102
88
  files:
103
89
  - ".gitignore"
104
90
  - ".gitmodules"
91
+ - ".rspec"
92
+ - ".travis.yml"
105
93
  - Gemfile
106
94
  - README.md
107
95
  - Rakefile
@@ -232,4 +220,60 @@ rubygems_version: 2.4.5
232
220
  signing_key:
233
221
  specification_version: 4
234
222
  summary: Utilities for running bots with Turbot
235
- test_files: []
223
+ test_files:
224
+ - spec/bots/bot-that-crashes-immediately/manifest.json
225
+ - spec/bots/bot-that-crashes-immediately/scraper.rb
226
+ - spec/bots/bot-that-crashes-immediately/transformer1.rb
227
+ - spec/bots/bot-that-crashes-in-scraper/manifest.json
228
+ - spec/bots/bot-that-crashes-in-scraper/scraper.rb
229
+ - spec/bots/bot-that-crashes-in-scraper/transformer1.rb
230
+ - spec/bots/bot-that-crashes-in-transformer/manifest.json
231
+ - spec/bots/bot-that-crashes-in-transformer/scraper.rb
232
+ - spec/bots/bot-that-crashes-in-transformer/transformer1.rb
233
+ - spec/bots/bot-that-crashes-in-transformer/transformer2.rb
234
+ - spec/bots/bot-that-emits-run-ended/manifest.json
235
+ - spec/bots/bot-that-emits-run-ended/scraper.rb
236
+ - spec/bots/bot-that-emits-snapshot-ended/manifest.json
237
+ - spec/bots/bot-that-emits-snapshot-ended/scraper.rb
238
+ - spec/bots/bot-that-expects-file/manifest.json
239
+ - spec/bots/bot-that-expects-file/scraper.rb
240
+ - spec/bots/bot-that-expects-file/something.txt
241
+ - spec/bots/bot-that-is-allowed-to-produce-duplicates/manifest.json
242
+ - spec/bots/bot-that-is-allowed-to-produce-duplicates/scraper.rb
243
+ - spec/bots/bot-that-produces-duplicates/manifest.json
244
+ - spec/bots/bot-that-produces-duplicates/scraper.rb
245
+ - spec/bots/bot-with-invalid-data-type/manifest.json
246
+ - spec/bots/bot-with-invalid-data-type/scraper.rb
247
+ - spec/bots/bot-with-invalid-sample-date/manifest.json
248
+ - spec/bots/bot-with-invalid-sample-date/scraper.rb
249
+ - spec/bots/bot-with-pause/manifest.json
250
+ - spec/bots/bot-with-pause/scraper.rb
251
+ - spec/bots/bot-with-transformer/manifest.json
252
+ - spec/bots/bot-with-transformer/scraper.rb
253
+ - spec/bots/bot-with-transformer/transformer.rb
254
+ - spec/bots/bot-with-transformers/manifest.json
255
+ - spec/bots/bot-with-transformers/scraper.rb
256
+ - spec/bots/bot-with-transformers/transformer1.rb
257
+ - spec/bots/bot-with-transformers/transformer2.rb
258
+ - spec/bots/invalid-json-bot/manifest.json
259
+ - spec/bots/invalid-json-bot/scraper.rb
260
+ - spec/bots/invalid-record-bot/manifest.json
261
+ - spec/bots/invalid-record-bot/scraper.rb
262
+ - spec/bots/logging-bot/manifest.json
263
+ - spec/bots/logging-bot/scraper.rb
264
+ - spec/bots/python-bot/manifest.json
265
+ - spec/bots/python-bot/scraper.py
266
+ - spec/bots/ruby-bot/manifest.json
267
+ - spec/bots/ruby-bot/scraper.rb
268
+ - spec/bots/slow-bot/manifest.json
269
+ - spec/bots/slow-bot/scraper.rb
270
+ - spec/lib/processor_spec.rb
271
+ - spec/lib/runner_spec.rb
272
+ - spec/lib/utils_spec.rb
273
+ - spec/lib/validator_spec.rb
274
+ - spec/manual_spec.rb
275
+ - spec/outputs/full-scraper.out
276
+ - spec/outputs/full-transformer.out
277
+ - spec/outputs/truncated-scraper.out
278
+ - spec/spec_helper.rb
279
+ has_rdoc: