turbot-runner 0.0.24 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +8 -8
  2. data/lib/turbot_runner/base_handler.rb +13 -0
  3. data/lib/{prerun.rb → turbot_runner/prerun.rb} +0 -0
  4. data/lib/turbot_runner/processor.rb +55 -0
  5. data/lib/turbot_runner/runner.rb +150 -0
  6. data/lib/turbot_runner/script_runner.rb +90 -0
  7. data/lib/turbot_runner/version.rb +1 -1
  8. data/lib/turbot_runner.rb +5 -335
  9. data/spec/bots/bot-that-crashes-in-scraper/manifest.json +8 -0
  10. data/spec/bots/bot-that-crashes-in-scraper/scraper.rb +11 -0
  11. data/spec/bots/bot-that-crashes-in-transformer/manifest.json +20 -0
  12. data/spec/bots/bot-that-crashes-in-transformer/scraper.rb +10 -0
  13. data/spec/bots/bot-that-crashes-in-transformer/transformer1.rb +15 -0
  14. data/spec/bots/bot-that-crashes-in-transformer/transformer2.rb +17 -0
  15. data/spec/bots/bot-with-pause/manifest.json +8 -0
  16. data/spec/bots/bot-with-pause/scraper.rb +16 -0
  17. data/spec/bots/bot-with-transformer/manifest.json +15 -0
  18. data/spec/bots/bot-with-transformer/scraper.rb +10 -0
  19. data/spec/bots/bot-with-transformer/transformer.rb +15 -0
  20. data/spec/bots/bot-with-transformers/manifest.json +20 -0
  21. data/spec/bots/bot-with-transformers/scraper.rb +10 -0
  22. data/spec/bots/bot-with-transformers/transformer1.rb +15 -0
  23. data/spec/bots/bot-with-transformers/transformer2.rb +15 -0
  24. data/spec/bots/invalid-json-bot/manifest.json +8 -0
  25. data/spec/bots/invalid-json-bot/scraper.rb +11 -0
  26. data/spec/bots/invalid-record-bot/manifest.json +8 -0
  27. data/spec/bots/invalid-record-bot/scraper.rb +11 -0
  28. data/spec/bots/logging-bot/manifest.json +8 -0
  29. data/spec/bots/logging-bot/scraper.rb +14 -0
  30. data/spec/bots/python-bot/manifest.json +8 -0
  31. data/spec/bots/python-bot/scraper.py +11 -0
  32. data/spec/bots/ruby-bot/manifest.json +8 -0
  33. data/spec/bots/ruby-bot/scraper.rb +10 -0
  34. data/spec/bots/slow-bot/manifest.json +8 -0
  35. data/spec/bots/slow-bot/scraper.rb +11 -0
  36. data/spec/lib/processor.rb +48 -0
  37. data/spec/lib/runner_spec.rb +244 -0
  38. data/spec/manual_spec.rb +55 -0
  39. data/spec/outputs/full-scraper.out +10 -0
  40. data/spec/outputs/full-transformer.out +10 -0
  41. data/spec/outputs/truncated-scraper.out +5 -0
  42. metadata +40 -19
  43. data/spec/dummy-bot-python/manifest.json +0 -15
  44. data/spec/dummy-bot-python/scraper.py +0 -11
  45. data/spec/dummy-bot-python/transformer.py +0 -15
  46. data/spec/dummy-bot-ruby/manifest.json +0 -15
  47. data/spec/dummy-bot-ruby/scraper.rb +0 -8
  48. data/spec/dummy-bot-ruby/transformer.rb +0 -12
  49. data/spec/dummy-broken-bot-ruby/manifest.json +0 -8
  50. data/spec/dummy-broken-bot-ruby/scraper.rb +0 -6
  51. data/spec/dummy-broken-bot-ruby/transformer.rb +0 -12
  52. data/spec/dummy-broken-bot-ruby-2/manifest.json +0 -15
  53. data/spec/dummy-broken-bot-ruby-2/scraper.rb +0 -4
  54. data/spec/dummy-broken-bot-ruby-2/transformer.rb +0 -11
  55. data/spec/dummy-broken-bot-ruby-3/manifest.json +0 -15
  56. data/spec/dummy-broken-bot-ruby-3/scraper.rb +0 -5
  57. data/spec/dummy-broken-bot-ruby-3/transformer.rb +0 -5
  58. data/spec/turbot_runner_spec.rb +0 -117
@@ -0,0 +1,55 @@
1
+ # These specs test behaviour that is most easily exercised or verified by hand.
2
+
3
+ $:.unshift('lib')
4
+ require 'turbot_runner'
5
+
6
+ puts
7
+ puts '-' * 80
8
+
9
+ puts <<eos
10
+ This tests whether stderr is directed to the console.
11
+ When the scraper is run, you should see the following two lines in the console:
12
+
13
+ doing...
14
+ done
15
+
16
+ Press <enter> to run the test.
17
+ eos
18
+
19
+ gets
20
+
21
+ runner = TurbotRunner::Runner.new('spec/bots/logging-bot').run
22
+
23
+ puts
24
+ puts 'Did you see the expected lines? [y]/n'
25
+
26
+ exit(1) unless ['Y', 'y', ''].include?(gets.chomp)
27
+
28
+ puts
29
+ puts '-' * 80
30
+
31
+ puts <<eos
32
+ This tests whether hitting Ctrl-C interrupts a running scraper correctly. When
33
+ the scraper is run, it will pause after producing five lines of output, and
34
+ instruct you to interrupt it. You will have ten seconds to do so.
35
+
36
+ Press <enter> to run the test.
37
+ eos
38
+
39
+ gets
40
+
41
+ runner = TurbotRunner::Runner.new('spec/bots/bot-with-pause').run
42
+
43
+ expected_output = File.readlines('spec/outputs/truncated-scraper.out').map {|line| JSON.parse(line)}
44
+ actual_output = File.readlines('spec/bots/bot-with-pause/output/scraper.out').map {|line| JSON.parse(line)}
45
+
46
+ if expected_output == actual_output
47
+ puts 'Bot produced expected output'
48
+ else
49
+ puts 'Bot did not produce expected output'
50
+ exit(1)
51
+ end
52
+
53
+ puts
54
+ puts '-' * 80
55
+ puts 'All tests passed!'
@@ -0,0 +1,10 @@
1
+ {"licence_number":"XYZ0","source_url":"http://example.com","sample_date":"2014-06-01"}
2
+ {"licence_number":"XYZ1","source_url":"http://example.com","sample_date":"2014-06-01"}
3
+ {"licence_number":"XYZ2","source_url":"http://example.com","sample_date":"2014-06-01"}
4
+ {"licence_number":"XYZ3","source_url":"http://example.com","sample_date":"2014-06-01"}
5
+ {"licence_number":"XYZ4","source_url":"http://example.com","sample_date":"2014-06-01"}
6
+ {"licence_number":"XYZ5","source_url":"http://example.com","sample_date":"2014-06-01"}
7
+ {"licence_number":"XYZ6","source_url":"http://example.com","sample_date":"2014-06-01"}
8
+ {"licence_number":"XYZ7","source_url":"http://example.com","sample_date":"2014-06-01"}
9
+ {"licence_number":"XYZ8","source_url":"http://example.com","sample_date":"2014-06-01"}
10
+ {"licence_number":"XYZ9","source_url":"http://example.com","sample_date":"2014-06-01"}
@@ -0,0 +1,10 @@
1
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ0","source_url":"http://example.com","sample_date":"2014-06-01"}
2
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ1","source_url":"http://example.com","sample_date":"2014-06-01"}
3
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ2","source_url":"http://example.com","sample_date":"2014-06-01"}
4
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ3","source_url":"http://example.com","sample_date":"2014-06-01"}
5
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ4","source_url":"http://example.com","sample_date":"2014-06-01"}
6
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ5","source_url":"http://example.com","sample_date":"2014-06-01"}
7
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ6","source_url":"http://example.com","sample_date":"2014-06-01"}
8
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ7","source_url":"http://example.com","sample_date":"2014-06-01"}
9
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ8","source_url":"http://example.com","sample_date":"2014-06-01"}
10
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ9","source_url":"http://example.com","sample_date":"2014-06-01"}
@@ -0,0 +1,5 @@
1
+ {"licence_number":"XYZ0","source_url":"http://example.com","sample_date":"2014-06-01"}
2
+ {"licence_number":"XYZ1","source_url":"http://example.com","sample_date":"2014-06-01"}
3
+ {"licence_number":"XYZ2","source_url":"http://example.com","sample_date":"2014-06-01"}
4
+ {"licence_number":"XYZ3","source_url":"http://example.com","sample_date":"2014-06-01"}
5
+ {"licence_number":"XYZ4","source_url":"http://example.com","sample_date":"2014-06-01"}
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: turbot-runner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.24
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - OpenCorporates
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-07-31 00:00:00.000000000 Z
11
+ date: 2014-08-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json-schema
@@ -31,8 +31,12 @@ extensions: []
31
31
  extra_rdoc_files: []
32
32
  files:
33
33
  - bin/rspec
34
- - lib/prerun.rb
35
34
  - lib/turbot_runner.rb
35
+ - lib/turbot_runner/base_handler.rb
36
+ - lib/turbot_runner/prerun.rb
37
+ - lib/turbot_runner/processor.rb
38
+ - lib/turbot_runner/runner.rb
39
+ - lib/turbot_runner/script_runner.rb
36
40
  - lib/turbot_runner/version.rb
37
41
  - schema/schemas/company-schema.json
38
42
  - schema/schemas/financial-payment-schema.json
@@ -56,22 +60,39 @@ files:
56
60
  - schema/schemas/simple-licence-schema.json
57
61
  - schema/schemas/simple-subsidiary-schema.json
58
62
  - schema/schemas/subsidiary-relationship-schema.json
59
- - spec/dummy-bot-python/manifest.json
60
- - spec/dummy-bot-python/scraper.py
61
- - spec/dummy-bot-python/transformer.py
62
- - spec/dummy-bot-ruby/manifest.json
63
- - spec/dummy-bot-ruby/scraper.rb
64
- - spec/dummy-bot-ruby/transformer.rb
65
- - spec/dummy-broken-bot-ruby-2/manifest.json
66
- - spec/dummy-broken-bot-ruby-2/scraper.rb
67
- - spec/dummy-broken-bot-ruby-2/transformer.rb
68
- - spec/dummy-broken-bot-ruby-3/manifest.json
69
- - spec/dummy-broken-bot-ruby-3/scraper.rb
70
- - spec/dummy-broken-bot-ruby-3/transformer.rb
71
- - spec/dummy-broken-bot-ruby/manifest.json
72
- - spec/dummy-broken-bot-ruby/scraper.rb
73
- - spec/dummy-broken-bot-ruby/transformer.rb
74
- - spec/turbot_runner_spec.rb
63
+ - spec/bots/bot-that-crashes-in-scraper/manifest.json
64
+ - spec/bots/bot-that-crashes-in-scraper/scraper.rb
65
+ - spec/bots/bot-that-crashes-in-transformer/manifest.json
66
+ - spec/bots/bot-that-crashes-in-transformer/scraper.rb
67
+ - spec/bots/bot-that-crashes-in-transformer/transformer1.rb
68
+ - spec/bots/bot-that-crashes-in-transformer/transformer2.rb
69
+ - spec/bots/bot-with-pause/manifest.json
70
+ - spec/bots/bot-with-pause/scraper.rb
71
+ - spec/bots/bot-with-transformer/manifest.json
72
+ - spec/bots/bot-with-transformer/scraper.rb
73
+ - spec/bots/bot-with-transformer/transformer.rb
74
+ - spec/bots/bot-with-transformers/manifest.json
75
+ - spec/bots/bot-with-transformers/scraper.rb
76
+ - spec/bots/bot-with-transformers/transformer1.rb
77
+ - spec/bots/bot-with-transformers/transformer2.rb
78
+ - spec/bots/invalid-json-bot/manifest.json
79
+ - spec/bots/invalid-json-bot/scraper.rb
80
+ - spec/bots/invalid-record-bot/manifest.json
81
+ - spec/bots/invalid-record-bot/scraper.rb
82
+ - spec/bots/logging-bot/manifest.json
83
+ - spec/bots/logging-bot/scraper.rb
84
+ - spec/bots/python-bot/manifest.json
85
+ - spec/bots/python-bot/scraper.py
86
+ - spec/bots/ruby-bot/manifest.json
87
+ - spec/bots/ruby-bot/scraper.rb
88
+ - spec/bots/slow-bot/manifest.json
89
+ - spec/bots/slow-bot/scraper.rb
90
+ - spec/lib/processor.rb
91
+ - spec/lib/runner_spec.rb
92
+ - spec/manual_spec.rb
93
+ - spec/outputs/full-scraper.out
94
+ - spec/outputs/full-transformer.out
95
+ - spec/outputs/truncated-scraper.out
75
96
  homepage: http://turbot.opencorporates.com/
76
97
  licenses:
77
98
  - MIT
@@ -1,15 +0,0 @@
1
- {
2
- "bot_id": "dummy-bot",
3
- "data_type": "hello",
4
- "description": "This is a dummy bot",
5
- "identifying_fields": ["number"],
6
- "files": ["scraper.py"],
7
- "transformers": [
8
- {
9
- "file": "transformer.py",
10
- "data_type": "goodbye",
11
- "identifying_fields": [""]
12
- }
13
- ],
14
- "frequency": "monthly"
15
- }
@@ -1,11 +0,0 @@
1
- from __future__ import print_function
2
-
3
- import json
4
- import sys
5
-
6
- print('hello from python', file=sys.stderr)
7
-
8
- print(json.dumps({'n': 5, 'hello': 'hello, 5'}))
9
- print(json.dumps({'n': 6, 'hello': 'hello, 6'}))
10
- print(json.dumps({'n': 7}))
11
- print(json.dumps({'n': 8, 'hello': 'hello, 8'}))
@@ -1,15 +0,0 @@
1
- import json
2
- import sys
3
-
4
- while True:
5
- line = sys.stdin.readline()
6
- if not line:
7
- break
8
-
9
- raw_record = json.loads(line)
10
- transformed_record = {
11
- 'n': raw_record['n'],
12
- 'goodbye': raw_record['hello'].replace('hello', 'goodbye')
13
- }
14
-
15
- print json.dumps(transformed_record)
@@ -1,15 +0,0 @@
1
- {
2
- "bot_id": "dummy-bot",
3
- "data_type": "hello",
4
- "description": "This is a dummy bot",
5
- "identifying_fields": ["number"],
6
- "files": ["scraper.rb"],
7
- "transformers": [
8
- {
9
- "file": "transformer.rb",
10
- "data_type": "goodbye",
11
- "identifying_fields": [""]
12
- }
13
- ],
14
- "frequency": "monthly"
15
- }
@@ -1,8 +0,0 @@
1
- require 'json'
2
-
3
- $stderr.puts('hello from ruby')
4
-
5
- puts({:n => 1, :hello => 'hello, 1'}.to_json)
6
- puts({:n => 2, :hello => 'hello, 2'}.to_json)
7
- puts({:n => 3}.to_json)
8
- puts({:n => 4, :hello => 'hello, 4'}.to_json)
@@ -1,12 +0,0 @@
1
- require 'json'
2
- STDIN.each_line do |line|
3
-
4
- raw_record = JSON.parse(line)
5
-
6
- transformed_record = {
7
- :n => raw_record['n'],
8
- :goodbye => raw_record['hello'].sub('hello', 'goodbye')
9
- }
10
-
11
- puts transformed_record.to_json
12
- end
@@ -1,8 +0,0 @@
1
- {
2
- "bot_id": "dummy-bot",
3
- "data_type": "hello",
4
- "description": "This is a dummy bot",
5
- "identifying_fields": ["number"],
6
- "files": ["scraper.rb"],
7
- "frequency": "monthly"
8
- }
@@ -1,6 +0,0 @@
1
- require 'json'
2
-
3
- $stderr.puts('hello')
4
-
5
- puts({h: 1}.to_json)
6
- raise "oops"
@@ -1,12 +0,0 @@
1
- require 'json'
2
-
3
- STDIN.each_line do |line|
4
- raw_record = JSON.parse(line)
5
-
6
- transformed_record = {
7
- :n => raw_record['n'],
8
- :goodbye => raw_record['hello'].sub('hello', 'goodbye')
9
- }
10
-
11
- puts transformed_record.to_json
12
- end
@@ -1,15 +0,0 @@
1
- {
2
- "bot_id": "dummy-bot",
3
- "data_type": "hello",
4
- "description": "This is a dummy bot",
5
- "identifying_fields": ["number"],
6
- "files": ["scraper.rb"],
7
- "transformers": [
8
- {
9
- "file": "transformer.rb",
10
- "data_type": "goodbye",
11
- "identifying_fields": [""]
12
- }
13
- ],
14
- "frequency": "monthly"
15
- }
@@ -1,4 +0,0 @@
1
- require 'json'
2
-
3
- puts({h: 1}.to_json)
4
- raise "oops"
@@ -1,11 +0,0 @@
1
- require 'json'
2
-
3
- STDIN.each_line do |line|
4
- raw_record = JSON.parse(line)
5
-
6
- transformed_record = {
7
- :p => raw_record['n'],
8
- }
9
-
10
- puts transformed_record.to_json
11
- end
@@ -1,15 +0,0 @@
1
- {
2
- "bot_id": "dummy-bot",
3
- "data_type": "hello",
4
- "description": "This is a dummy bot",
5
- "identifying_fields": ["number"],
6
- "files": ["scraper.rb"],
7
- "transformers": [
8
- {
9
- "file": "transformer.rb",
10
- "data_type": "goodbye",
11
- "identifying_fields": [""]
12
- }
13
- ],
14
- "frequency": "monthly"
15
- }
@@ -1,5 +0,0 @@
1
- require 'json'
2
-
3
- 3.times do |n|
4
- puts({h: n}.to_json)
5
- end
@@ -1,5 +0,0 @@
1
- require 'json'
2
-
3
- STDIN.each_line do |line|
4
- raise "oops"
5
- end
@@ -1,117 +0,0 @@
1
- require 'json'
2
- require 'turbot_runner'
3
-
4
- class SpecRunner < TurbotRunner::BaseRunner
5
- def validate(record, data_type)
6
- if record['n'] % 4 == 3
7
- [:error]
8
- else
9
- []
10
- end
11
- end
12
-
13
- def handle_failed_run
14
- end
15
- end
16
-
17
- class BrokenRunner < TurbotRunner::BaseRunner
18
- def validate(record, data_type)
19
- []
20
- end
21
-
22
- def handle_valid_record(*args)
23
- end
24
-
25
- def handle_failed_run
26
- end
27
- end
28
-
29
-
30
- describe TurbotRunner::BaseRunner do
31
- before do
32
- $stderr = StringIO.new
33
- end
34
-
35
- after do
36
- $stderr = STDERR
37
- end
38
-
39
- it 'can run a ruby bot' do
40
- runner = SpecRunner.new('spec/dummy-bot-ruby')
41
-
42
- expect(runner).to receive(:handle_valid_record).with({'n' => 1, 'hello' => 'hello, 1'}, 'hello')
43
- expect(runner).to receive(:handle_valid_record).with({'n' => 1, 'goodbye' => 'goodbye, 1'}, 'goodbye')
44
- expect(runner).to receive(:handle_valid_record).with({'n' => 2, 'hello' => 'hello, 2'}, 'hello')
45
- expect(runner).to receive(:handle_valid_record).with({'n' => 2, 'goodbye' => 'goodbye, 2'}, 'goodbye')
46
- expect(runner).to receive(:handle_invalid_record).with({'n' => 3}, 'hello', [:error])
47
- expect(runner).to receive(:handle_valid_record).with({'n' => 4, 'hello' => 'hello, 4'}, 'hello')
48
- expect(runner).to receive(:handle_valid_record).with({'n' => 4, 'goodbye' => 'goodbye, 4'}, 'goodbye')
49
- expect(runner).to receive(:handle_successful_run)
50
- runner.run
51
- expect($stderr.string).to eq("hello from ruby\n")
52
- end
53
-
54
- it 'can run a python bot' do
55
- runner = SpecRunner.new('spec/dummy-bot-python')
56
-
57
- expect(runner).to receive(:handle_valid_record).with({'n' => 5, 'hello' => 'hello, 5'}, 'hello')
58
- expect(runner).to receive(:handle_valid_record).with({'n' => 5, 'goodbye' => 'goodbye, 5'}, 'goodbye')
59
- expect(runner).to receive(:handle_valid_record).with({'n' => 6, 'hello' => 'hello, 6'}, 'hello')
60
- expect(runner).to receive(:handle_valid_record).with({'n' => 6, 'goodbye' => 'goodbye, 6'}, 'goodbye')
61
- expect(runner).to receive(:handle_invalid_record).with({'n' => 7}, 'hello', [:error])
62
- expect(runner).to receive(:handle_valid_record).with({'n' => 8, 'hello' => 'hello, 8'}, 'hello')
63
- expect(runner).to receive(:handle_valid_record).with({'n' => 8, 'goodbye' => 'goodbye, 8'}, 'goodbye')
64
- expect(runner).to receive(:handle_successful_run)
65
- runner.run
66
- expect($stderr.string).to eq("hello from python\n")
67
- end
68
-
69
- describe "broken bots" do
70
- describe "failing bot without transformer" do
71
- it 'should call handle_failed_run' do
72
- runner = BrokenRunner.new('spec/dummy-broken-bot-ruby')
73
- expect(runner).to receive(:handle_valid_record)
74
- expect(runner).to receive(:handle_failed_run)
75
- runner.run
76
- end
77
-
78
- it 'should write exception to stderr' do
79
- runner = BrokenRunner.new('spec/dummy-broken-bot-ruby')
80
- runner.run
81
- expect($stderr.string).to match(/^hello/)
82
- expect($stderr.string).to match(/oops/)
83
- end
84
- end
85
-
86
- describe "failing bot with successful transformer" do
87
- it 'should call handle_failed_run' do
88
- runner = BrokenRunner.new('spec/dummy-broken-bot-ruby-2')
89
- expect(runner).to receive(:handle_valid_record) # first record
90
- expect(runner).to receive(:handle_valid_record) # first transform
91
- expect(runner).to receive(:handle_failed_run)
92
- runner.run
93
- end
94
-
95
- it 'should write exception to stderr' do
96
- runner = BrokenRunner.new('spec/dummy-broken-bot-ruby')
97
- runner.run
98
- expect($stderr.string).to match(/oops/)
99
- end
100
- end
101
-
102
- describe "sucessful bot with failing transformer" do
103
- it 'should call handle_failed_run' do
104
- runner = BrokenRunner.new('spec/dummy-broken-bot-ruby-3')
105
- expect(runner).to receive(:handle_valid_record) # the untransformed one
106
- expect(runner).to receive(:handle_failed_run) # the transformer breaks immediately
107
- runner.run
108
- end
109
-
110
- it 'should write exception to stderr' do
111
- runner = BrokenRunner.new('spec/dummy-broken-bot-ruby')
112
- runner.run
113
- expect($stderr.string).to match(/oops/)
114
- end
115
- end
116
- end
117
- end