turbot-runner 0.2.13 → 0.2.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b717e95fb7ff52f87fb33c5cf95efe329fcc179b
4
- data.tar.gz: 2ea03178c9f54114f59980588b67bd84e6e5e3db
3
+ metadata.gz: a93e7b12e6d13b95affabf91eb2bcf3914260948
4
+ data.tar.gz: f129aea4ce1e9f857fa1e9a587dc5fd69a732c81
5
5
  SHA512:
6
- metadata.gz: 5d55bc8a5f4e8b35e0c5ac9b805dc6582774bd285b2da3c42870474af3e47815579fa0b9582a67738de964b83e4d755d501ed945e263015cd136ddeb5f78d96b
7
- data.tar.gz: 2d87e2bebf9d7884997941941ba2cafc66aac41a5d7e903754688643859a0f8e74acee903265ab2e0dead577a9fe49b3da0a63b1bd2e5e523d622d32af29d727
6
+ metadata.gz: 21329ceb668293687d5c0822ae29192a4c30ba3cc263cff784e0373cf94a8e9033d05305eafe53c5508622002c93db493571667a7c20149ac375750f24301134
7
+ data.tar.gz: f48480dacda4fdf1c3e3d4b21e01ac86851a6d91b463449c2670749fac9079fcdce794b833a9f7315b4dd1405afe525becb19b0e5ce4cb1cd1c3340a3d14b1d2
data/README.md CHANGED
@@ -18,3 +18,50 @@ Bump the version in `lib/turbot_runner/version.rb` according to the [Semantic Ve
18
18
  rake release # requires Rubygems credentials
19
19
 
20
20
  Finally, [rebuild the Docker image](https://github.com/openc/morph-docker-ruby#readme).
21
+
22
+ ## Rough outline of how it works
23
+
24
+ TurbotRunner is responsible for running a scraper, transforming its data, and
25
+ then validating and processing any output.
26
+
27
+ Work is coordinated by an instance of `Runner`. Most of the interesting work
28
+ is done in `Runner#run_script`, which constructs a command like:
29
+
30
+ python transformer.py >transformer.out 2>transformer.err <scraper.out
31
+
32
+ This command is then passed to an instance of `ScriptRunner` which runs the
33
+ command via `system` in a new thread. The main thread then monitors the output
34
+ file, and processes each complete line of output.
35
+
36
+ A line is processed by an instance of `Processor`, which checks that the line
37
+ is valid JSON, and then passes it on to the instance of a subclass of
38
+ `BaseHandler` that was passed to the `Runner` when it was created.
39
+
40
+ The subclass of `BaseHandler` can implement any of the following methods:
41
+
42
+ * `handle_valid_record`
43
+ * `handle_invalid_record`
44
+ * `handle_invalid_json`
45
+ * `handle_snapshot_ended`
46
+
47
+ If the `Processor` finds an invalid record, it interrupts the `ScriptRunner`,
48
+ and marks the run as having failed.
49
+
50
+ The `Processor` will catch an `InterruptRun` that's raised by
51
+ `handler.handle_valid_record`, which will interrupt the `ScriptRunner`, but
52
+ will not mark the run as having failed.
53
+
54
+ When the `ScriptRunner` is interrupted, it will kill the running process, by
55
+ sending SIGINT to all the processes in the current process group. The current
56
+ process is set up (via `trap('INT') {}` to ignore this.
57
+
58
+ If the `ScriptRunner` reads no output from the command within a timeout (by
59
+ default, 24 hours) it interrupts itself, and marks the run as having failed.
60
+
61
+ ## Running the tests
62
+
63
+ Tests are run with rspec:
64
+
65
+ `./bin/rspec`
66
+
67
+ The first two specs to run require some manual input.
@@ -1,3 +1,3 @@
1
1
  module TurbotRunner
2
- VERSION = '0.2.13'
2
+ VERSION = '0.2.14'
3
3
  end
@@ -117,7 +117,7 @@
117
117
  },
118
118
  "retrieved_at": {
119
119
  "type": "string",
120
- "format": "date"
120
+ "format": "date-time"
121
121
  },
122
122
  "currency": {
123
123
  "type": "string",
@@ -29,7 +29,7 @@
29
29
  },
30
30
  "retrieved_at": {
31
31
  "type": "string",
32
- "format": "date"
32
+ "format": "date-time"
33
33
  },
34
34
  "current_status": {
35
35
  "type": [
@@ -64,9 +64,9 @@
64
64
  "format": "date"
65
65
  },
66
66
  "retrieved_at": {
67
- "description": "The time or date at which the source URL was requested",
67
+ "description": "Date-time this was retrieved from the source",
68
68
  "type": "string",
69
- "format": "date"
69
+ "format": "date-time"
70
70
  },
71
71
  "other_attributes": {
72
72
  "description": "Use for other attributes for which we don't yet have curated schema attributes",
@@ -267,13 +267,6 @@
267
267
  "type": "string",
268
268
  "enum": ["other"]
269
269
  },
270
- "classification": {
271
- "description": "The type of judgment",
272
- "type": "array",
273
- "items": {
274
- "$ref": "includes/classification.json"
275
- }
276
- },
277
270
  "body": {
278
271
  "description": "The unstructured prose content",
279
272
  "$ref": "#/definitions/body"
@@ -412,9 +405,9 @@
412
405
  "format": "date"
413
406
  },
414
407
  "retrieved_at": {
415
- "description": "The time or date at which the source URL was requested",
408
+ "description": "The time at which the source URL was requested",
416
409
  "type": "string",
417
- "format": "date"
410
+ "format": "date-time"
418
411
  },
419
412
  "confidence": {
420
413
  "description": "The scraper's author's confidence in the accuracy of the data",
@@ -33,7 +33,7 @@
33
33
  },
34
34
  "retrieved_at": {
35
35
  "type": "string",
36
- "format": "date"
36
+ "format": "date-time"
37
37
  },
38
38
  "current_status": {
39
39
  "type": [
@@ -239,16 +239,7 @@
239
239
  }
240
240
  },
241
241
  "additionalProperties": false,
242
- "anyOf": [
243
- {
244
- "required": [
245
- "name"
246
- ]
247
- },
248
- {
249
- "required": [
250
- "company_number"
251
- ]
252
- }
242
+ "required": [
243
+ "name"
253
244
  ]
254
- }
245
+ }
@@ -59,9 +59,9 @@
59
59
  "format": "date"
60
60
  },
61
61
  "retrieved_at": {
62
- "description": "The time or date at which the source URL was requested",
62
+ "description": "Date-time this was retrieved from the source",
63
63
  "type": "string",
64
- "format": "date"
64
+ "format": "date-time"
65
65
  },
66
66
  "licence_url": {
67
67
  "type": "string",
@@ -2,11 +2,6 @@ require 'json'
2
2
  require 'turbot_runner'
3
3
 
4
4
  describe TurbotRunner::Runner do
5
- after(:all) do
6
- puts
7
- puts 'If all specs passed, you should now run `ruby spec/manual_spec.rb`'
8
- end
9
-
10
5
  describe '#run' do
11
6
  context 'with a bot written in ruby' do
12
7
  before do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: turbot-runner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.13
4
+ version: 0.2.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - OpenCorporates
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-03 00:00:00.000000000 Z
11
+ date: 2016-02-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -96,8 +96,7 @@ dependencies:
96
96
  version: '3.0'
97
97
  description:
98
98
  email: bots@opencorporates.com
99
- executables:
100
- - rspec
99
+ executables: []
101
100
  extensions: []
102
101
  extra_rdoc_files: []
103
102
  files:
@@ -106,7 +105,6 @@ files:
106
105
  - Gemfile
107
106
  - README.md
108
107
  - Rakefile
109
- - bin/rspec
110
108
  - lib/turbot_runner.rb
111
109
  - lib/turbot_runner/base_handler.rb
112
110
  - lib/turbot_runner/exceptions.rb
@@ -234,60 +232,4 @@ rubygems_version: 2.4.5
234
232
  signing_key:
235
233
  specification_version: 4
236
234
  summary: Utilities for running bots with Turbot
237
- test_files:
238
- - spec/bots/bot-that-crashes-immediately/manifest.json
239
- - spec/bots/bot-that-crashes-immediately/scraper.rb
240
- - spec/bots/bot-that-crashes-immediately/transformer1.rb
241
- - spec/bots/bot-that-crashes-in-scraper/manifest.json
242
- - spec/bots/bot-that-crashes-in-scraper/scraper.rb
243
- - spec/bots/bot-that-crashes-in-scraper/transformer1.rb
244
- - spec/bots/bot-that-crashes-in-transformer/manifest.json
245
- - spec/bots/bot-that-crashes-in-transformer/scraper.rb
246
- - spec/bots/bot-that-crashes-in-transformer/transformer1.rb
247
- - spec/bots/bot-that-crashes-in-transformer/transformer2.rb
248
- - spec/bots/bot-that-emits-run-ended/manifest.json
249
- - spec/bots/bot-that-emits-run-ended/scraper.rb
250
- - spec/bots/bot-that-emits-snapshot-ended/manifest.json
251
- - spec/bots/bot-that-emits-snapshot-ended/scraper.rb
252
- - spec/bots/bot-that-expects-file/manifest.json
253
- - spec/bots/bot-that-expects-file/scraper.rb
254
- - spec/bots/bot-that-expects-file/something.txt
255
- - spec/bots/bot-that-is-allowed-to-produce-duplicates/manifest.json
256
- - spec/bots/bot-that-is-allowed-to-produce-duplicates/scraper.rb
257
- - spec/bots/bot-that-produces-duplicates/manifest.json
258
- - spec/bots/bot-that-produces-duplicates/scraper.rb
259
- - spec/bots/bot-with-invalid-data-type/manifest.json
260
- - spec/bots/bot-with-invalid-data-type/scraper.rb
261
- - spec/bots/bot-with-invalid-sample-date/manifest.json
262
- - spec/bots/bot-with-invalid-sample-date/scraper.rb
263
- - spec/bots/bot-with-pause/manifest.json
264
- - spec/bots/bot-with-pause/scraper.rb
265
- - spec/bots/bot-with-transformer/manifest.json
266
- - spec/bots/bot-with-transformer/scraper.rb
267
- - spec/bots/bot-with-transformer/transformer.rb
268
- - spec/bots/bot-with-transformers/manifest.json
269
- - spec/bots/bot-with-transformers/scraper.rb
270
- - spec/bots/bot-with-transformers/transformer1.rb
271
- - spec/bots/bot-with-transformers/transformer2.rb
272
- - spec/bots/invalid-json-bot/manifest.json
273
- - spec/bots/invalid-json-bot/scraper.rb
274
- - spec/bots/invalid-record-bot/manifest.json
275
- - spec/bots/invalid-record-bot/scraper.rb
276
- - spec/bots/logging-bot/manifest.json
277
- - spec/bots/logging-bot/scraper.rb
278
- - spec/bots/python-bot/manifest.json
279
- - spec/bots/python-bot/scraper.py
280
- - spec/bots/ruby-bot/manifest.json
281
- - spec/bots/ruby-bot/scraper.rb
282
- - spec/bots/slow-bot/manifest.json
283
- - spec/bots/slow-bot/scraper.rb
284
- - spec/lib/processor_spec.rb
285
- - spec/lib/runner_spec.rb
286
- - spec/lib/utils_spec.rb
287
- - spec/lib/validator_spec.rb
288
- - spec/manual_spec.rb
289
- - spec/outputs/full-scraper.out
290
- - spec/outputs/full-transformer.out
291
- - spec/outputs/truncated-scraper.out
292
- - spec/spec_helper.rb
293
- has_rdoc:
235
+ test_files: []
data/bin/rspec DELETED
@@ -1,16 +0,0 @@
1
- #!/usr/bin/env ruby
2
- #
3
- # This file was generated by Bundler.
4
- #
5
- # The application 'rspec' is installed as part of a gem, and
6
- # this file is here to facilitate running it.
7
- #
8
-
9
- require 'pathname'
10
- ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
- Pathname.new(__FILE__).realpath)
12
-
13
- require 'rubygems'
14
- require 'bundler/setup'
15
-
16
- load Gem.bin_path('rspec-core', 'rspec')