turbot-runner 0.0.24 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (58) hide show
  1. checksums.yaml +8 -8
  2. data/lib/turbot_runner/base_handler.rb +13 -0
  3. data/lib/{prerun.rb → turbot_runner/prerun.rb} +0 -0
  4. data/lib/turbot_runner/processor.rb +55 -0
  5. data/lib/turbot_runner/runner.rb +150 -0
  6. data/lib/turbot_runner/script_runner.rb +90 -0
  7. data/lib/turbot_runner/version.rb +1 -1
  8. data/lib/turbot_runner.rb +5 -335
  9. data/spec/bots/bot-that-crashes-in-scraper/manifest.json +8 -0
  10. data/spec/bots/bot-that-crashes-in-scraper/scraper.rb +11 -0
  11. data/spec/bots/bot-that-crashes-in-transformer/manifest.json +20 -0
  12. data/spec/bots/bot-that-crashes-in-transformer/scraper.rb +10 -0
  13. data/spec/bots/bot-that-crashes-in-transformer/transformer1.rb +15 -0
  14. data/spec/bots/bot-that-crashes-in-transformer/transformer2.rb +17 -0
  15. data/spec/bots/bot-with-pause/manifest.json +8 -0
  16. data/spec/bots/bot-with-pause/scraper.rb +16 -0
  17. data/spec/bots/bot-with-transformer/manifest.json +15 -0
  18. data/spec/bots/bot-with-transformer/scraper.rb +10 -0
  19. data/spec/bots/bot-with-transformer/transformer.rb +15 -0
  20. data/spec/bots/bot-with-transformers/manifest.json +20 -0
  21. data/spec/bots/bot-with-transformers/scraper.rb +10 -0
  22. data/spec/bots/bot-with-transformers/transformer1.rb +15 -0
  23. data/spec/bots/bot-with-transformers/transformer2.rb +15 -0
  24. data/spec/bots/invalid-json-bot/manifest.json +8 -0
  25. data/spec/bots/invalid-json-bot/scraper.rb +11 -0
  26. data/spec/bots/invalid-record-bot/manifest.json +8 -0
  27. data/spec/bots/invalid-record-bot/scraper.rb +11 -0
  28. data/spec/bots/logging-bot/manifest.json +8 -0
  29. data/spec/bots/logging-bot/scraper.rb +14 -0
  30. data/spec/bots/python-bot/manifest.json +8 -0
  31. data/spec/bots/python-bot/scraper.py +11 -0
  32. data/spec/bots/ruby-bot/manifest.json +8 -0
  33. data/spec/bots/ruby-bot/scraper.rb +10 -0
  34. data/spec/bots/slow-bot/manifest.json +8 -0
  35. data/spec/bots/slow-bot/scraper.rb +11 -0
  36. data/spec/lib/processor.rb +48 -0
  37. data/spec/lib/runner_spec.rb +244 -0
  38. data/spec/manual_spec.rb +55 -0
  39. data/spec/outputs/full-scraper.out +10 -0
  40. data/spec/outputs/full-transformer.out +10 -0
  41. data/spec/outputs/truncated-scraper.out +5 -0
  42. metadata +40 -19
  43. data/spec/dummy-bot-python/manifest.json +0 -15
  44. data/spec/dummy-bot-python/scraper.py +0 -11
  45. data/spec/dummy-bot-python/transformer.py +0 -15
  46. data/spec/dummy-bot-ruby/manifest.json +0 -15
  47. data/spec/dummy-bot-ruby/scraper.rb +0 -8
  48. data/spec/dummy-bot-ruby/transformer.rb +0 -12
  49. data/spec/dummy-broken-bot-ruby/manifest.json +0 -8
  50. data/spec/dummy-broken-bot-ruby/scraper.rb +0 -6
  51. data/spec/dummy-broken-bot-ruby/transformer.rb +0 -12
  52. data/spec/dummy-broken-bot-ruby-2/manifest.json +0 -15
  53. data/spec/dummy-broken-bot-ruby-2/scraper.rb +0 -4
  54. data/spec/dummy-broken-bot-ruby-2/transformer.rb +0 -11
  55. data/spec/dummy-broken-bot-ruby-3/manifest.json +0 -15
  56. data/spec/dummy-broken-bot-ruby-3/scraper.rb +0 -5
  57. data/spec/dummy-broken-bot-ruby-3/transformer.rb +0 -5
  58. data/spec/turbot_runner_spec.rb +0 -117
@@ -0,0 +1,55 @@
1
+ # These specs test behaviour that is most easily exercised or verified by hand.
2
+
3
+ $:.unshift('lib')
4
+ require 'turbot_runner'
5
+
6
+ puts
7
+ puts '-' * 80
8
+
9
+ puts <<eos
10
+ This tests whether stderr is directed to the console.
11
+ When the scraper is run, you should see the following two lines in the console:
12
+
13
+ doing...
14
+ done
15
+
16
+ Press <enter> to run the test.
17
+ eos
18
+
19
+ gets
20
+
21
+ runner = TurbotRunner::Runner.new('spec/bots/logging-bot').run
22
+
23
+ puts
24
+ puts 'Did you see the expected lines? [y]/n'
25
+
26
+ exit(1) unless ['Y', 'y', ''].include?(gets.chomp)
27
+
28
+ puts
29
+ puts '-' * 80
30
+
31
+ puts <<eos
32
+ This tests whether hitting Ctrl-C interrupts a running scraper correctly. When
33
+ the scraper is run, it will pause after producing five lines of output, and
34
+ instruct you to interrupt it. You will have ten seconds to do so.
35
+
36
+ Press <enter> to run the test.
37
+ eos
38
+
39
+ gets
40
+
41
+ runner = TurbotRunner::Runner.new('spec/bots/bot-with-pause').run
42
+
43
+ expected_output = File.readlines('spec/outputs/truncated-scraper.out').map {|line| JSON.parse(line)}
44
+ actual_output = File.readlines('spec/bots/bot-with-pause/output/scraper.out').map {|line| JSON.parse(line)}
45
+
46
+ if expected_output == actual_output
47
+ puts 'Bot produced expected output'
48
+ else
49
+ puts 'Bot did not produce expected output'
50
+ exit(1)
51
+ end
52
+
53
+ puts
54
+ puts '-' * 80
55
+ puts 'All tests passed!'
@@ -0,0 +1,10 @@
1
+ {"licence_number":"XYZ0","source_url":"http://example.com","sample_date":"2014-06-01"}
2
+ {"licence_number":"XYZ1","source_url":"http://example.com","sample_date":"2014-06-01"}
3
+ {"licence_number":"XYZ2","source_url":"http://example.com","sample_date":"2014-06-01"}
4
+ {"licence_number":"XYZ3","source_url":"http://example.com","sample_date":"2014-06-01"}
5
+ {"licence_number":"XYZ4","source_url":"http://example.com","sample_date":"2014-06-01"}
6
+ {"licence_number":"XYZ5","source_url":"http://example.com","sample_date":"2014-06-01"}
7
+ {"licence_number":"XYZ6","source_url":"http://example.com","sample_date":"2014-06-01"}
8
+ {"licence_number":"XYZ7","source_url":"http://example.com","sample_date":"2014-06-01"}
9
+ {"licence_number":"XYZ8","source_url":"http://example.com","sample_date":"2014-06-01"}
10
+ {"licence_number":"XYZ9","source_url":"http://example.com","sample_date":"2014-06-01"}
@@ -0,0 +1,10 @@
1
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ0","source_url":"http://example.com","sample_date":"2014-06-01"}
2
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ1","source_url":"http://example.com","sample_date":"2014-06-01"}
3
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ2","source_url":"http://example.com","sample_date":"2014-06-01"}
4
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ3","source_url":"http://example.com","sample_date":"2014-06-01"}
5
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ4","source_url":"http://example.com","sample_date":"2014-06-01"}
6
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ5","source_url":"http://example.com","sample_date":"2014-06-01"}
7
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ6","source_url":"http://example.com","sample_date":"2014-06-01"}
8
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ7","source_url":"http://example.com","sample_date":"2014-06-01"}
9
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ8","source_url":"http://example.com","sample_date":"2014-06-01"}
10
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ9","source_url":"http://example.com","sample_date":"2014-06-01"}
@@ -0,0 +1,5 @@
1
+ {"licence_number":"XYZ0","source_url":"http://example.com","sample_date":"2014-06-01"}
2
+ {"licence_number":"XYZ1","source_url":"http://example.com","sample_date":"2014-06-01"}
3
+ {"licence_number":"XYZ2","source_url":"http://example.com","sample_date":"2014-06-01"}
4
+ {"licence_number":"XYZ3","source_url":"http://example.com","sample_date":"2014-06-01"}
5
+ {"licence_number":"XYZ4","source_url":"http://example.com","sample_date":"2014-06-01"}
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: turbot-runner
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.24
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - OpenCorporates
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-07-31 00:00:00.000000000 Z
11
+ date: 2014-08-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: json-schema
@@ -31,8 +31,12 @@ extensions: []
31
31
  extra_rdoc_files: []
32
32
  files:
33
33
  - bin/rspec
34
- - lib/prerun.rb
35
34
  - lib/turbot_runner.rb
35
+ - lib/turbot_runner/base_handler.rb
36
+ - lib/turbot_runner/prerun.rb
37
+ - lib/turbot_runner/processor.rb
38
+ - lib/turbot_runner/runner.rb
39
+ - lib/turbot_runner/script_runner.rb
36
40
  - lib/turbot_runner/version.rb
37
41
  - schema/schemas/company-schema.json
38
42
  - schema/schemas/financial-payment-schema.json
@@ -56,22 +60,39 @@ files:
56
60
  - schema/schemas/simple-licence-schema.json
57
61
  - schema/schemas/simple-subsidiary-schema.json
58
62
  - schema/schemas/subsidiary-relationship-schema.json
59
- - spec/dummy-bot-python/manifest.json
60
- - spec/dummy-bot-python/scraper.py
61
- - spec/dummy-bot-python/transformer.py
62
- - spec/dummy-bot-ruby/manifest.json
63
- - spec/dummy-bot-ruby/scraper.rb
64
- - spec/dummy-bot-ruby/transformer.rb
65
- - spec/dummy-broken-bot-ruby-2/manifest.json
66
- - spec/dummy-broken-bot-ruby-2/scraper.rb
67
- - spec/dummy-broken-bot-ruby-2/transformer.rb
68
- - spec/dummy-broken-bot-ruby-3/manifest.json
69
- - spec/dummy-broken-bot-ruby-3/scraper.rb
70
- - spec/dummy-broken-bot-ruby-3/transformer.rb
71
- - spec/dummy-broken-bot-ruby/manifest.json
72
- - spec/dummy-broken-bot-ruby/scraper.rb
73
- - spec/dummy-broken-bot-ruby/transformer.rb
74
- - spec/turbot_runner_spec.rb
63
+ - spec/bots/bot-that-crashes-in-scraper/manifest.json
64
+ - spec/bots/bot-that-crashes-in-scraper/scraper.rb
65
+ - spec/bots/bot-that-crashes-in-transformer/manifest.json
66
+ - spec/bots/bot-that-crashes-in-transformer/scraper.rb
67
+ - spec/bots/bot-that-crashes-in-transformer/transformer1.rb
68
+ - spec/bots/bot-that-crashes-in-transformer/transformer2.rb
69
+ - spec/bots/bot-with-pause/manifest.json
70
+ - spec/bots/bot-with-pause/scraper.rb
71
+ - spec/bots/bot-with-transformer/manifest.json
72
+ - spec/bots/bot-with-transformer/scraper.rb
73
+ - spec/bots/bot-with-transformer/transformer.rb
74
+ - spec/bots/bot-with-transformers/manifest.json
75
+ - spec/bots/bot-with-transformers/scraper.rb
76
+ - spec/bots/bot-with-transformers/transformer1.rb
77
+ - spec/bots/bot-with-transformers/transformer2.rb
78
+ - spec/bots/invalid-json-bot/manifest.json
79
+ - spec/bots/invalid-json-bot/scraper.rb
80
+ - spec/bots/invalid-record-bot/manifest.json
81
+ - spec/bots/invalid-record-bot/scraper.rb
82
+ - spec/bots/logging-bot/manifest.json
83
+ - spec/bots/logging-bot/scraper.rb
84
+ - spec/bots/python-bot/manifest.json
85
+ - spec/bots/python-bot/scraper.py
86
+ - spec/bots/ruby-bot/manifest.json
87
+ - spec/bots/ruby-bot/scraper.rb
88
+ - spec/bots/slow-bot/manifest.json
89
+ - spec/bots/slow-bot/scraper.rb
90
+ - spec/lib/processor.rb
91
+ - spec/lib/runner_spec.rb
92
+ - spec/manual_spec.rb
93
+ - spec/outputs/full-scraper.out
94
+ - spec/outputs/full-transformer.out
95
+ - spec/outputs/truncated-scraper.out
75
96
  homepage: http://turbot.opencorporates.com/
76
97
  licenses:
77
98
  - MIT
@@ -1,15 +0,0 @@
1
- {
2
- "bot_id": "dummy-bot",
3
- "data_type": "hello",
4
- "description": "This is a dummy bot",
5
- "identifying_fields": ["number"],
6
- "files": ["scraper.py"],
7
- "transformers": [
8
- {
9
- "file": "transformer.py",
10
- "data_type": "goodbye",
11
- "identifying_fields": [""]
12
- }
13
- ],
14
- "frequency": "monthly"
15
- }
@@ -1,11 +0,0 @@
1
- from __future__ import print_function
2
-
3
- import json
4
- import sys
5
-
6
- print('hello from python', file=sys.stderr)
7
-
8
- print(json.dumps({'n': 5, 'hello': 'hello, 5'}))
9
- print(json.dumps({'n': 6, 'hello': 'hello, 6'}))
10
- print(json.dumps({'n': 7}))
11
- print(json.dumps({'n': 8, 'hello': 'hello, 8'}))
@@ -1,15 +0,0 @@
1
- import json
2
- import sys
3
-
4
- while True:
5
- line = sys.stdin.readline()
6
- if not line:
7
- break
8
-
9
- raw_record = json.loads(line)
10
- transformed_record = {
11
- 'n': raw_record['n'],
12
- 'goodbye': raw_record['hello'].replace('hello', 'goodbye')
13
- }
14
-
15
- print json.dumps(transformed_record)
@@ -1,15 +0,0 @@
1
- {
2
- "bot_id": "dummy-bot",
3
- "data_type": "hello",
4
- "description": "This is a dummy bot",
5
- "identifying_fields": ["number"],
6
- "files": ["scraper.rb"],
7
- "transformers": [
8
- {
9
- "file": "transformer.rb",
10
- "data_type": "goodbye",
11
- "identifying_fields": [""]
12
- }
13
- ],
14
- "frequency": "monthly"
15
- }
@@ -1,8 +0,0 @@
1
- require 'json'
2
-
3
- $stderr.puts('hello from ruby')
4
-
5
- puts({:n => 1, :hello => 'hello, 1'}.to_json)
6
- puts({:n => 2, :hello => 'hello, 2'}.to_json)
7
- puts({:n => 3}.to_json)
8
- puts({:n => 4, :hello => 'hello, 4'}.to_json)
@@ -1,12 +0,0 @@
1
- require 'json'
2
- STDIN.each_line do |line|
3
-
4
- raw_record = JSON.parse(line)
5
-
6
- transformed_record = {
7
- :n => raw_record['n'],
8
- :goodbye => raw_record['hello'].sub('hello', 'goodbye')
9
- }
10
-
11
- puts transformed_record.to_json
12
- end
@@ -1,8 +0,0 @@
1
- {
2
- "bot_id": "dummy-bot",
3
- "data_type": "hello",
4
- "description": "This is a dummy bot",
5
- "identifying_fields": ["number"],
6
- "files": ["scraper.rb"],
7
- "frequency": "monthly"
8
- }
@@ -1,6 +0,0 @@
1
- require 'json'
2
-
3
- $stderr.puts('hello')
4
-
5
- puts({h: 1}.to_json)
6
- raise "oops"
@@ -1,12 +0,0 @@
1
- require 'json'
2
-
3
- STDIN.each_line do |line|
4
- raw_record = JSON.parse(line)
5
-
6
- transformed_record = {
7
- :n => raw_record['n'],
8
- :goodbye => raw_record['hello'].sub('hello', 'goodbye')
9
- }
10
-
11
- puts transformed_record.to_json
12
- end
@@ -1,15 +0,0 @@
1
- {
2
- "bot_id": "dummy-bot",
3
- "data_type": "hello",
4
- "description": "This is a dummy bot",
5
- "identifying_fields": ["number"],
6
- "files": ["scraper.rb"],
7
- "transformers": [
8
- {
9
- "file": "transformer.rb",
10
- "data_type": "goodbye",
11
- "identifying_fields": [""]
12
- }
13
- ],
14
- "frequency": "monthly"
15
- }
@@ -1,4 +0,0 @@
1
- require 'json'
2
-
3
- puts({h: 1}.to_json)
4
- raise "oops"
@@ -1,11 +0,0 @@
1
- require 'json'
2
-
3
- STDIN.each_line do |line|
4
- raw_record = JSON.parse(line)
5
-
6
- transformed_record = {
7
- :p => raw_record['n'],
8
- }
9
-
10
- puts transformed_record.to_json
11
- end
@@ -1,15 +0,0 @@
1
- {
2
- "bot_id": "dummy-bot",
3
- "data_type": "hello",
4
- "description": "This is a dummy bot",
5
- "identifying_fields": ["number"],
6
- "files": ["scraper.rb"],
7
- "transformers": [
8
- {
9
- "file": "transformer.rb",
10
- "data_type": "goodbye",
11
- "identifying_fields": [""]
12
- }
13
- ],
14
- "frequency": "monthly"
15
- }
@@ -1,5 +0,0 @@
1
- require 'json'
2
-
3
- 3.times do |n|
4
- puts({h: n}.to_json)
5
- end
@@ -1,5 +0,0 @@
1
- require 'json'
2
-
3
- STDIN.each_line do |line|
4
- raise "oops"
5
- end
@@ -1,117 +0,0 @@
1
- require 'json'
2
- require 'turbot_runner'
3
-
4
- class SpecRunner < TurbotRunner::BaseRunner
5
- def validate(record, data_type)
6
- if record['n'] % 4 == 3
7
- [:error]
8
- else
9
- []
10
- end
11
- end
12
-
13
- def handle_failed_run
14
- end
15
- end
16
-
17
- class BrokenRunner < TurbotRunner::BaseRunner
18
- def validate(record, data_type)
19
- []
20
- end
21
-
22
- def handle_valid_record(*args)
23
- end
24
-
25
- def handle_failed_run
26
- end
27
- end
28
-
29
-
30
- describe TurbotRunner::BaseRunner do
31
- before do
32
- $stderr = StringIO.new
33
- end
34
-
35
- after do
36
- $stderr = STDERR
37
- end
38
-
39
- it 'can run a ruby bot' do
40
- runner = SpecRunner.new('spec/dummy-bot-ruby')
41
-
42
- expect(runner).to receive(:handle_valid_record).with({'n' => 1, 'hello' => 'hello, 1'}, 'hello')
43
- expect(runner).to receive(:handle_valid_record).with({'n' => 1, 'goodbye' => 'goodbye, 1'}, 'goodbye')
44
- expect(runner).to receive(:handle_valid_record).with({'n' => 2, 'hello' => 'hello, 2'}, 'hello')
45
- expect(runner).to receive(:handle_valid_record).with({'n' => 2, 'goodbye' => 'goodbye, 2'}, 'goodbye')
46
- expect(runner).to receive(:handle_invalid_record).with({'n' => 3}, 'hello', [:error])
47
- expect(runner).to receive(:handle_valid_record).with({'n' => 4, 'hello' => 'hello, 4'}, 'hello')
48
- expect(runner).to receive(:handle_valid_record).with({'n' => 4, 'goodbye' => 'goodbye, 4'}, 'goodbye')
49
- expect(runner).to receive(:handle_successful_run)
50
- runner.run
51
- expect($stderr.string).to eq("hello from ruby\n")
52
- end
53
-
54
- it 'can run a python bot' do
55
- runner = SpecRunner.new('spec/dummy-bot-python')
56
-
57
- expect(runner).to receive(:handle_valid_record).with({'n' => 5, 'hello' => 'hello, 5'}, 'hello')
58
- expect(runner).to receive(:handle_valid_record).with({'n' => 5, 'goodbye' => 'goodbye, 5'}, 'goodbye')
59
- expect(runner).to receive(:handle_valid_record).with({'n' => 6, 'hello' => 'hello, 6'}, 'hello')
60
- expect(runner).to receive(:handle_valid_record).with({'n' => 6, 'goodbye' => 'goodbye, 6'}, 'goodbye')
61
- expect(runner).to receive(:handle_invalid_record).with({'n' => 7}, 'hello', [:error])
62
- expect(runner).to receive(:handle_valid_record).with({'n' => 8, 'hello' => 'hello, 8'}, 'hello')
63
- expect(runner).to receive(:handle_valid_record).with({'n' => 8, 'goodbye' => 'goodbye, 8'}, 'goodbye')
64
- expect(runner).to receive(:handle_successful_run)
65
- runner.run
66
- expect($stderr.string).to eq("hello from python\n")
67
- end
68
-
69
- describe "broken bots" do
70
- describe "failing bot without transformer" do
71
- it 'should call handle_failed_run' do
72
- runner = BrokenRunner.new('spec/dummy-broken-bot-ruby')
73
- expect(runner).to receive(:handle_valid_record)
74
- expect(runner).to receive(:handle_failed_run)
75
- runner.run
76
- end
77
-
78
- it 'should write exception to stderr' do
79
- runner = BrokenRunner.new('spec/dummy-broken-bot-ruby')
80
- runner.run
81
- expect($stderr.string).to match(/^hello/)
82
- expect($stderr.string).to match(/oops/)
83
- end
84
- end
85
-
86
- describe "failing bot with successful transformer" do
87
- it 'should call handle_failed_run' do
88
- runner = BrokenRunner.new('spec/dummy-broken-bot-ruby-2')
89
- expect(runner).to receive(:handle_valid_record) # first record
90
- expect(runner).to receive(:handle_valid_record) # first transform
91
- expect(runner).to receive(:handle_failed_run)
92
- runner.run
93
- end
94
-
95
- it 'should write exception to stderr' do
96
- runner = BrokenRunner.new('spec/dummy-broken-bot-ruby')
97
- runner.run
98
- expect($stderr.string).to match(/oops/)
99
- end
100
- end
101
-
102
- describe "sucessful bot with failing transformer" do
103
- it 'should call handle_failed_run' do
104
- runner = BrokenRunner.new('spec/dummy-broken-bot-ruby-3')
105
- expect(runner).to receive(:handle_valid_record) # the untransformed one
106
- expect(runner).to receive(:handle_failed_run) # the transformer breaks immediately
107
- runner.run
108
- end
109
-
110
- it 'should write exception to stderr' do
111
- runner = BrokenRunner.new('spec/dummy-broken-bot-ruby')
112
- runner.run
113
- expect($stderr.string).to match(/oops/)
114
- end
115
- end
116
- end
117
- end