turbot-runner 0.2.13 → 0.2.14

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b717e95fb7ff52f87fb33c5cf95efe329fcc179b
4
- data.tar.gz: 2ea03178c9f54114f59980588b67bd84e6e5e3db
3
+ metadata.gz: a93e7b12e6d13b95affabf91eb2bcf3914260948
4
+ data.tar.gz: f129aea4ce1e9f857fa1e9a587dc5fd69a732c81
5
5
  SHA512:
6
- metadata.gz: 5d55bc8a5f4e8b35e0c5ac9b805dc6582774bd285b2da3c42870474af3e47815579fa0b9582a67738de964b83e4d755d501ed945e263015cd136ddeb5f78d96b
7
- data.tar.gz: 2d87e2bebf9d7884997941941ba2cafc66aac41a5d7e903754688643859a0f8e74acee903265ab2e0dead577a9fe49b3da0a63b1bd2e5e523d622d32af29d727
6
+ metadata.gz: 21329ceb668293687d5c0822ae29192a4c30ba3cc263cff784e0373cf94a8e9033d05305eafe53c5508622002c93db493571667a7c20149ac375750f24301134
7
+ data.tar.gz: f48480dacda4fdf1c3e3d4b21e01ac86851a6d91b463449c2670749fac9079fcdce794b833a9f7315b4dd1405afe525becb19b0e5ce4cb1cd1c3340a3d14b1d2
data/README.md CHANGED
@@ -18,3 +18,50 @@ Bump the version in `lib/turbot_runner/version.rb` according to the [Semantic Ve
18
18
  rake release # requires Rubygems credentials
19
19
 
20
20
  Finally, [rebuild the Docker image](https://github.com/openc/morph-docker-ruby#readme).
21
+
22
+ ## Rough outline of how it works
23
+
24
+ TurbotRunner is responsible for running a scraper, transforming its data, and
25
+ then validating and processing any output.
26
+
27
+ Work is coordinated by an instance of `Runner`. Most of the interesting work
28
+ is done in `Runner#run_script`, which constructs a command like:
29
+
30
+ python transformer.py >transformer.out 2>transformer.err <scraper.out
31
+
32
+ This command is then passed to an instance of `ScriptRunner` which runs the
33
+ command via `system` in a new thread. The main thread then monitors the output
34
+ file, and processes each complete line of output.
35
+
36
+ A line is processed by an instance of `Processor`, which checks that the line
37
+ is valid JSON, and then passes it on to the instance of a subclass of
38
+ `BaseHandler` that was passed to the `Runner` when it was created.
39
+
40
+ The subclass of `BaseHandler` can implement any of the following methods:
41
+
42
+ * `handle_valid_record`
43
+ * `handle_invalid_record`
44
+ * `handle_invalid_json`
45
+ * `handle_snapshot_ended`
46
+
47
+ If the `Processor` finds an invalid record, it interrupts the `ScriptRunner`,
48
+ and marks the run as having failed.
49
+
50
+ The `Processor` will catch an `InterruptRun` that's raised by
51
+ `handler.handle_valid_record`, which will interrupt the `ScriptRunner`, but
52
+ will not mark the run as having failed.
53
+
54
+ When the `ScriptRunner` is interrupted, it will kill the running process, by
55
+ sending SIGINT to all the processes in the current process group. The current
56
+ process is set up (via `trap('INT') {}` to ignore this.
57
+
58
+ If the `ScriptRunner` reads no output from the command within a timeout (by
59
+ default, 24 hours) it interrupts itself, and marks the run as having failed.
60
+
61
+ ## Running the tests
62
+
63
+ Tests are run with rspec:
64
+
65
+ `./bin/rspec`
66
+
67
+ The first two specs to run require some manual input.
@@ -1,3 +1,3 @@
1
1
  module TurbotRunner
2
- VERSION = '0.2.13'
2
+ VERSION = '0.2.14'
3
3
  end
@@ -117,7 +117,7 @@
117
117
  },
118
118
  "retrieved_at": {
119
119
  "type": "string",
120
- "format": "date"
120
+ "format": "date-time"
121
121
  },
122
122
  "currency": {
123
123
  "type": "string",
@@ -29,7 +29,7 @@
29
29
  },
30
30
  "retrieved_at": {
31
31
  "type": "string",
32
- "format": "date"
32
+ "format": "date-time"
33
33
  },
34
34
  "current_status": {
35
35
  "type": [
@@ -64,9 +64,9 @@
64
64
  "format": "date"
65
65
  },
66
66
  "retrieved_at": {
67
- "description": "The time or date at which the source URL was requested",
67
+ "description": "Date-time this was retrieved from the source",
68
68
  "type": "string",
69
- "format": "date"
69
+ "format": "date-time"
70
70
  },
71
71
  "other_attributes": {
72
72
  "description": "Use for other attributes for which we don't yet have curated schema attributes",
@@ -267,13 +267,6 @@
267
267
  "type": "string",
268
268
  "enum": ["other"]
269
269
  },
270
- "classification": {
271
- "description": "The type of judgment",
272
- "type": "array",
273
- "items": {
274
- "$ref": "includes/classification.json"
275
- }
276
- },
277
270
  "body": {
278
271
  "description": "The unstructured prose content",
279
272
  "$ref": "#/definitions/body"
@@ -412,9 +405,9 @@
412
405
  "format": "date"
413
406
  },
414
407
  "retrieved_at": {
415
- "description": "The time or date at which the source URL was requested",
408
+ "description": "The time at which the source URL was requested",
416
409
  "type": "string",
417
- "format": "date"
410
+ "format": "date-time"
418
411
  },
419
412
  "confidence": {
420
413
  "description": "The scraper's author's confidence in the accuracy of the data",
@@ -33,7 +33,7 @@
33
33
  },
34
34
  "retrieved_at": {
35
35
  "type": "string",
36
- "format": "date"
36
+ "format": "date-time"
37
37
  },
38
38
  "current_status": {
39
39
  "type": [
@@ -239,16 +239,7 @@
239
239
  }
240
240
  },
241
241
  "additionalProperties": false,
242
- "anyOf": [
243
- {
244
- "required": [
245
- "name"
246
- ]
247
- },
248
- {
249
- "required": [
250
- "company_number"
251
- ]
252
- }
242
+ "required": [
243
+ "name"
253
244
  ]
254
- }
245
+ }
@@ -59,9 +59,9 @@
59
59
  "format": "date"
60
60
  },
61
61
  "retrieved_at": {
62
- "description": "The time or date at which the source URL was requested",
62
+ "description": "Date-time this was retrieved from the source",
63
63
  "type": "string",
64
- "format": "date"
64
+ "format": "date-time"
65
65
  },
66
66
  "licence_url": {
67
67
  "type": "string",
@@ -2,11 +2,6 @@ require 'json'
2
2
  require 'turbot_runner'
3
3
 
4
4
  describe TurbotRunner::Runner do
5
- after(:all) do
6
- puts
7
- puts 'If all specs passed, you should now run `ruby spec/manual_spec.rb`'
8
- end
9
-
10
5
  describe '#run' do
11
6
  context 'with a bot written in ruby' do
12
7
  before do
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: turbot-runner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.13
4
+ version: 0.2.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - OpenCorporates
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-03 00:00:00.000000000 Z
11
+ date: 2016-02-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -96,8 +96,7 @@ dependencies:
96
96
  version: '3.0'
97
97
  description:
98
98
  email: bots@opencorporates.com
99
- executables:
100
- - rspec
99
+ executables: []
101
100
  extensions: []
102
101
  extra_rdoc_files: []
103
102
  files:
@@ -106,7 +105,6 @@ files:
106
105
  - Gemfile
107
106
  - README.md
108
107
  - Rakefile
109
- - bin/rspec
110
108
  - lib/turbot_runner.rb
111
109
  - lib/turbot_runner/base_handler.rb
112
110
  - lib/turbot_runner/exceptions.rb
@@ -234,60 +232,4 @@ rubygems_version: 2.4.5
234
232
  signing_key:
235
233
  specification_version: 4
236
234
  summary: Utilities for running bots with Turbot
237
- test_files:
238
- - spec/bots/bot-that-crashes-immediately/manifest.json
239
- - spec/bots/bot-that-crashes-immediately/scraper.rb
240
- - spec/bots/bot-that-crashes-immediately/transformer1.rb
241
- - spec/bots/bot-that-crashes-in-scraper/manifest.json
242
- - spec/bots/bot-that-crashes-in-scraper/scraper.rb
243
- - spec/bots/bot-that-crashes-in-scraper/transformer1.rb
244
- - spec/bots/bot-that-crashes-in-transformer/manifest.json
245
- - spec/bots/bot-that-crashes-in-transformer/scraper.rb
246
- - spec/bots/bot-that-crashes-in-transformer/transformer1.rb
247
- - spec/bots/bot-that-crashes-in-transformer/transformer2.rb
248
- - spec/bots/bot-that-emits-run-ended/manifest.json
249
- - spec/bots/bot-that-emits-run-ended/scraper.rb
250
- - spec/bots/bot-that-emits-snapshot-ended/manifest.json
251
- - spec/bots/bot-that-emits-snapshot-ended/scraper.rb
252
- - spec/bots/bot-that-expects-file/manifest.json
253
- - spec/bots/bot-that-expects-file/scraper.rb
254
- - spec/bots/bot-that-expects-file/something.txt
255
- - spec/bots/bot-that-is-allowed-to-produce-duplicates/manifest.json
256
- - spec/bots/bot-that-is-allowed-to-produce-duplicates/scraper.rb
257
- - spec/bots/bot-that-produces-duplicates/manifest.json
258
- - spec/bots/bot-that-produces-duplicates/scraper.rb
259
- - spec/bots/bot-with-invalid-data-type/manifest.json
260
- - spec/bots/bot-with-invalid-data-type/scraper.rb
261
- - spec/bots/bot-with-invalid-sample-date/manifest.json
262
- - spec/bots/bot-with-invalid-sample-date/scraper.rb
263
- - spec/bots/bot-with-pause/manifest.json
264
- - spec/bots/bot-with-pause/scraper.rb
265
- - spec/bots/bot-with-transformer/manifest.json
266
- - spec/bots/bot-with-transformer/scraper.rb
267
- - spec/bots/bot-with-transformer/transformer.rb
268
- - spec/bots/bot-with-transformers/manifest.json
269
- - spec/bots/bot-with-transformers/scraper.rb
270
- - spec/bots/bot-with-transformers/transformer1.rb
271
- - spec/bots/bot-with-transformers/transformer2.rb
272
- - spec/bots/invalid-json-bot/manifest.json
273
- - spec/bots/invalid-json-bot/scraper.rb
274
- - spec/bots/invalid-record-bot/manifest.json
275
- - spec/bots/invalid-record-bot/scraper.rb
276
- - spec/bots/logging-bot/manifest.json
277
- - spec/bots/logging-bot/scraper.rb
278
- - spec/bots/python-bot/manifest.json
279
- - spec/bots/python-bot/scraper.py
280
- - spec/bots/ruby-bot/manifest.json
281
- - spec/bots/ruby-bot/scraper.rb
282
- - spec/bots/slow-bot/manifest.json
283
- - spec/bots/slow-bot/scraper.rb
284
- - spec/lib/processor_spec.rb
285
- - spec/lib/runner_spec.rb
286
- - spec/lib/utils_spec.rb
287
- - spec/lib/validator_spec.rb
288
- - spec/manual_spec.rb
289
- - spec/outputs/full-scraper.out
290
- - spec/outputs/full-transformer.out
291
- - spec/outputs/truncated-scraper.out
292
- - spec/spec_helper.rb
293
- has_rdoc:
235
+ test_files: []
data/bin/rspec DELETED
@@ -1,16 +0,0 @@
1
- #!/usr/bin/env ruby
2
- #
3
- # This file was generated by Bundler.
4
- #
5
- # The application 'rspec' is installed as part of a gem, and
6
- # this file is here to facilitate running it.
7
- #
8
-
9
- require 'pathname'
10
- ENV['BUNDLE_GEMFILE'] ||= File.expand_path("../../Gemfile",
11
- Pathname.new(__FILE__).realpath)
12
-
13
- require 'rubygems'
14
- require 'bundler/setup'
15
-
16
- load Gem.bin_path('rspec-core', 'rspec')