turbot-runner-morph 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. checksums.yaml +15 -0
  2. data/bin/rspec +16 -0
  3. data/lib/turbot_runner.rb +28 -0
  4. data/lib/turbot_runner/base_handler.rb +15 -0
  5. data/lib/turbot_runner/exceptions.rb +4 -0
  6. data/lib/turbot_runner/prerun.rb +3 -0
  7. data/lib/turbot_runner/processor.rb +53 -0
  8. data/lib/turbot_runner/runner.rb +179 -0
  9. data/lib/turbot_runner/script_runner.rb +98 -0
  10. data/lib/turbot_runner/utils.rb +47 -0
  11. data/lib/turbot_runner/validator.rb +28 -0
  12. data/lib/turbot_runner/version.rb +3 -0
  13. data/schema/schemas/company-schema.json +243 -0
  14. data/schema/schemas/financial-payment-schema.json +32 -0
  15. data/schema/schemas/includes/address.json +53 -0
  16. data/schema/schemas/includes/alternative_name.json +36 -0
  17. data/schema/schemas/includes/company-for-nesting.json +245 -0
  18. data/schema/schemas/includes/company.json +25 -0
  19. data/schema/schemas/includes/entity.json +58 -0
  20. data/schema/schemas/includes/filing.json +52 -0
  21. data/schema/schemas/includes/financial-payment-data-object.json +112 -0
  22. data/schema/schemas/includes/identifier.json +20 -0
  23. data/schema/schemas/includes/industry_code.json +29 -0
  24. data/schema/schemas/includes/licence-data-object.json +63 -0
  25. data/schema/schemas/includes/officer.json +70 -0
  26. data/schema/schemas/includes/organisation.json +58 -0
  27. data/schema/schemas/includes/permission.json +46 -0
  28. data/schema/schemas/includes/person.json +62 -0
  29. data/schema/schemas/includes/person_name.json +71 -0
  30. data/schema/schemas/includes/previous_name.json +24 -0
  31. data/schema/schemas/includes/share-parcel-data.json +82 -0
  32. data/schema/schemas/includes/share-parcel.json +78 -0
  33. data/schema/schemas/includes/subsidiary-relationship-data.json +58 -0
  34. data/schema/schemas/includes/total-shares.json +17 -0
  35. data/schema/schemas/includes/unknown_entity_type.json +58 -0
  36. data/schema/schemas/licence-schema.json +105 -0
  37. data/schema/schemas/primary-data-schema.json +20 -0
  38. data/schema/schemas/share-parcel-schema.json +22 -0
  39. data/schema/schemas/simple-financial-payment-schema.json +122 -0
  40. data/schema/schemas/simple-licence-schema.json +82 -0
  41. data/schema/schemas/simple-subsidiary-schema.json +85 -0
  42. data/schema/schemas/subsidiary-relationship-schema.json +46 -0
  43. data/spec/bots/bot-that-crashes-immediately/manifest.json +15 -0
  44. data/spec/bots/bot-that-crashes-immediately/scraper.rb +1 -0
  45. data/spec/bots/bot-that-crashes-immediately/transformer1.rb +15 -0
  46. data/spec/bots/bot-that-crashes-in-scraper/manifest.json +15 -0
  47. data/spec/bots/bot-that-crashes-in-scraper/scraper.rb +11 -0
  48. data/spec/bots/bot-that-crashes-in-scraper/transformer1.rb +15 -0
  49. data/spec/bots/bot-that-crashes-in-transformer/manifest.json +20 -0
  50. data/spec/bots/bot-that-crashes-in-transformer/scraper.rb +10 -0
  51. data/spec/bots/bot-that-crashes-in-transformer/transformer1.rb +15 -0
  52. data/spec/bots/bot-that-crashes-in-transformer/transformer2.rb +17 -0
  53. data/spec/bots/bot-that-emits-run-ended/manifest.json +8 -0
  54. data/spec/bots/bot-that-emits-run-ended/scraper.rb +11 -0
  55. data/spec/bots/bot-that-expects-file/manifest.json +8 -0
  56. data/spec/bots/bot-that-expects-file/scraper.rb +11 -0
  57. data/spec/bots/bot-that-expects-file/something.txt +1 -0
  58. data/spec/bots/bot-with-invalid-data-type/manifest.json +8 -0
  59. data/spec/bots/bot-with-invalid-data-type/scraper.rb +10 -0
  60. data/spec/bots/bot-with-invalid-sample-date/manifest.json +8 -0
  61. data/spec/bots/bot-with-invalid-sample-date/scraper.rb +10 -0
  62. data/spec/bots/bot-with-pause/manifest.json +8 -0
  63. data/spec/bots/bot-with-pause/scraper.rb +16 -0
  64. data/spec/bots/bot-with-transformer/manifest.json +15 -0
  65. data/spec/bots/bot-with-transformer/scraper.rb +10 -0
  66. data/spec/bots/bot-with-transformer/transformer.rb +15 -0
  67. data/spec/bots/bot-with-transformers/manifest.json +20 -0
  68. data/spec/bots/bot-with-transformers/scraper.rb +10 -0
  69. data/spec/bots/bot-with-transformers/transformer1.rb +15 -0
  70. data/spec/bots/bot-with-transformers/transformer2.rb +15 -0
  71. data/spec/bots/invalid-json-bot/manifest.json +8 -0
  72. data/spec/bots/invalid-json-bot/scraper.rb +11 -0
  73. data/spec/bots/invalid-record-bot/manifest.json +8 -0
  74. data/spec/bots/invalid-record-bot/scraper.rb +11 -0
  75. data/spec/bots/logging-bot/manifest.json +8 -0
  76. data/spec/bots/logging-bot/scraper.rb +14 -0
  77. data/spec/bots/python-bot/manifest.json +8 -0
  78. data/spec/bots/python-bot/scraper.py +11 -0
  79. data/spec/bots/ruby-bot/manifest.json +8 -0
  80. data/spec/bots/ruby-bot/scraper.rb +10 -0
  81. data/spec/bots/slow-bot/manifest.json +8 -0
  82. data/spec/bots/slow-bot/scraper.rb +11 -0
  83. data/spec/lib/processor_spec.rb +181 -0
  84. data/spec/lib/runner_spec.rb +330 -0
  85. data/spec/lib/utils_spec.rb +23 -0
  86. data/spec/lib/validator_spec.rb +89 -0
  87. data/spec/manual_spec.rb +57 -0
  88. data/spec/outputs/full-scraper.out +10 -0
  89. data/spec/outputs/full-transformer.out +10 -0
  90. data/spec/outputs/truncated-scraper.out +5 -0
  91. data/spec/spec_helper.rb +20 -0
  92. metadata +148 -0
@@ -0,0 +1,23 @@
1
+ require 'spec_helper'
2
+
3
+ describe TurbotRunner::Utils do
4
+ specify '.flatten' do
5
+ hash = {
6
+ 'a' => {
7
+ 'b' => {
8
+ 'c' => '123',
9
+ 'd' => '124',
10
+ },
11
+ 'e' => {
12
+ 'f' => '156',
13
+ }
14
+ }
15
+ }
16
+
17
+ expect(TurbotRunner::Utils.flatten(hash)).to eq({
18
+ 'a.b.c' => '123',
19
+ 'a.b.d' => '124',
20
+ 'a.e.f' => '156',
21
+ })
22
+ end
23
+ end
@@ -0,0 +1,89 @@
1
+ require 'spec_helper'
2
+
3
+ describe TurbotRunner::Validator do
4
+ describe '.validate' do
5
+ specify 'with valid record' do
6
+ record = {
7
+ 'sample_date' => '2014-06-01',
8
+ 'source_url' => 'http://example.com/123',
9
+ 'number' => 123
10
+ }
11
+ expect(record).to be_valid
12
+ end
13
+
14
+ specify 'with record missing required field' do
15
+ record = {
16
+ 'sample_date' => '2014-06-01',
17
+ 'number' => 123
18
+ }
19
+ expected_error = 'Missing required property: source_url'
20
+ expect(record).to fail_validation_with(expected_error)
21
+ end
22
+
23
+ specify 'with record missing all identifying fields' do
24
+ record = {
25
+ 'sample_date' => '2014-06-01',
26
+ 'source_url' => 'http://example.com/123'
27
+ }
28
+ expected_error = 'There were no values provided for any of the identifying fields: number'
29
+ expect(record).to fail_validation_with(expected_error)
30
+ end
31
+
32
+ specify 'with record with empty sample_date' do
33
+ record = {
34
+ 'sample_date' => '',
35
+ 'source_url' => 'http://example.com/123',
36
+ 'number' => 123
37
+ }
38
+ expected_error = 'Property not of expected format: sample_date (must be of format yyyy-mm-dd)'
39
+ expect(record).to fail_validation_with(expected_error)
40
+ end
41
+
42
+ specify 'with record with invalid sample_date' do
43
+ record = {
44
+ 'sample_date' => '2014-06-00',
45
+ 'source_url' => 'http://example.com/123',
46
+ 'number' => 123
47
+ }
48
+ expected_error = 'Property not of expected format: sample_date (must be of format yyyy-mm-dd)'
49
+ expect(record).to fail_validation_with(expected_error)
50
+ end
51
+
52
+ context 'with nested identifying fields' do
53
+ specify 'with record missing all identifying fields' do
54
+ record = {
55
+ 'sample_date' => '2014-06-01',
56
+ 'source_url' => 'http://example.com/123',
57
+ 'one' => {'two' => {}},
58
+ 'four' => {}
59
+ }
60
+ identifying_fields = ['one.two.three', 'four.five.six']
61
+ error = TurbotRunner::Validator.validate('primary-data', record, identifying_fields)
62
+ expect(error).to eq('There were no values provided for any of the identifying fields: one.two.three, four.five.six')
63
+ end
64
+
65
+ specify 'with record missing some identifying fields' do
66
+ record = {
67
+ 'sample_date' => '2014-06-01',
68
+ 'source_url' => 'http://example.com/123',
69
+ 'one' => {'two' => {'three' => 123}}
70
+ }
71
+ identifying_fields = ['one.two.three', 'four.five.six']
72
+ error = TurbotRunner::Validator.validate('primary-data', record, identifying_fields)
73
+ expect(error).to eq(nil)
74
+ end
75
+
76
+ specify 'with record missing no identifying fields' do
77
+ record = {
78
+ 'sample_date' => '2014-06-01',
79
+ 'source_url' => 'http://example.com/123',
80
+ 'one' => {'two' => {'three' => 123}},
81
+ 'four' => {'five' => {'six' => 456}}
82
+ }
83
+ identifying_fields = ['one.two.three', 'four.five.six']
84
+ error = TurbotRunner::Validator.validate('primary-data', record, identifying_fields)
85
+ expect(error).to eq(nil)
86
+ end
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,57 @@
1
+ # These specs test behaviour that is most easily exercised or verified by hand.
2
+
3
+ $:.unshift('lib')
4
+ require 'turbot_runner'
5
+
6
+ puts
7
+ puts '-' * 80
8
+
9
+ puts <<eos
10
+ This tests whether stderr is directed to the console.
11
+ When the scraper is run, you should see the following two lines in the console:
12
+
13
+ doing...
14
+ done
15
+
16
+ Press <enter> to run the test.
17
+ eos
18
+
19
+ gets
20
+
21
+ bot_location = File.join(File.dirname(__FILE__), 'bots/logging-bot')
22
+ runner = TurbotRunner::Runner.new(bot_location).run
23
+
24
+ puts
25
+ puts 'Did you see the expected lines? [y]/n'
26
+
27
+ exit(1) unless ['Y', 'y', ''].include?(gets.chomp)
28
+
29
+ puts
30
+ puts '-' * 80
31
+
32
+ puts <<eos
33
+ This tests whether hitting Ctrl-C interrupts a running scraper correctly. When
34
+ the scraper is run, it will pause after producing five lines of output, and
35
+ instruct you to interrupt it. You will have ten seconds to do so.
36
+
37
+ Press <enter> to run the test.
38
+ eos
39
+
40
+ gets
41
+
42
+ bot_location = File.join(File.dirname(__FILE__), 'bots/bot-with-pause')
43
+ runner = TurbotRunner::Runner.new(bot_location).run
44
+
45
+ expected_output = File.readlines('spec/outputs/truncated-scraper.out').map {|line| JSON.parse(line)}
46
+ actual_output = File.readlines('spec/bots/bot-with-pause/output/scraper.out').map {|line| JSON.parse(line)}
47
+
48
+ if expected_output == actual_output
49
+ puts 'Bot produced expected output'
50
+ else
51
+ puts 'Bot did not produce expected output'
52
+ exit(1)
53
+ end
54
+
55
+ puts
56
+ puts '-' * 80
57
+ puts 'All tests passed!'
@@ -0,0 +1,10 @@
1
+ {"licence_number":"XYZ0","source_url":"http://example.com","sample_date":"2014-06-01"}
2
+ {"licence_number":"XYZ1","source_url":"http://example.com","sample_date":"2014-06-01"}
3
+ {"licence_number":"XYZ2","source_url":"http://example.com","sample_date":"2014-06-01"}
4
+ {"licence_number":"XYZ3","source_url":"http://example.com","sample_date":"2014-06-01"}
5
+ {"licence_number":"XYZ4","source_url":"http://example.com","sample_date":"2014-06-01"}
6
+ {"licence_number":"XYZ5","source_url":"http://example.com","sample_date":"2014-06-01"}
7
+ {"licence_number":"XYZ6","source_url":"http://example.com","sample_date":"2014-06-01"}
8
+ {"licence_number":"XYZ7","source_url":"http://example.com","sample_date":"2014-06-01"}
9
+ {"licence_number":"XYZ8","source_url":"http://example.com","sample_date":"2014-06-01"}
10
+ {"licence_number":"XYZ9","source_url":"http://example.com","sample_date":"2014-06-01"}
@@ -0,0 +1,10 @@
1
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ0","source_url":"http://example.com","sample_date":"2014-06-01"}
2
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ1","source_url":"http://example.com","sample_date":"2014-06-01"}
3
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ2","source_url":"http://example.com","sample_date":"2014-06-01"}
4
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ3","source_url":"http://example.com","sample_date":"2014-06-01"}
5
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ4","source_url":"http://example.com","sample_date":"2014-06-01"}
6
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ5","source_url":"http://example.com","sample_date":"2014-06-01"}
7
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ6","source_url":"http://example.com","sample_date":"2014-06-01"}
8
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ7","source_url":"http://example.com","sample_date":"2014-06-01"}
9
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ8","source_url":"http://example.com","sample_date":"2014-06-01"}
10
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ9","source_url":"http://example.com","sample_date":"2014-06-01"}
@@ -0,0 +1,5 @@
1
+ {"licence_number":"XYZ0","source_url":"http://example.com","sample_date":"2014-06-01"}
2
+ {"licence_number":"XYZ1","source_url":"http://example.com","sample_date":"2014-06-01"}
3
+ {"licence_number":"XYZ2","source_url":"http://example.com","sample_date":"2014-06-01"}
4
+ {"licence_number":"XYZ3","source_url":"http://example.com","sample_date":"2014-06-01"}
5
+ {"licence_number":"XYZ4","source_url":"http://example.com","sample_date":"2014-06-01"}
@@ -0,0 +1,20 @@
1
+ require 'turbot_runner'
2
+
3
+ RSpec::Matchers.define(:fail_validation_with) do |expected_error|
4
+ match do |record|
5
+ identifying_fields = ['number']
6
+ @error = TurbotRunner::Validator.validate('primary-data', record, identifying_fields)
7
+ expect(@error).to eq(expected_error)
8
+ end
9
+
10
+ failure_message do |actual|
11
+ "Expected error to be #{expected_error}, but was #{@error}"
12
+ end
13
+ end
14
+
15
+ RSpec::Matchers.define(:be_valid) do
16
+ match do |record|
17
+ identifying_fields = ['number']
18
+ expect(TurbotRunner::Validator.validate('primary-data', record, identifying_fields)).to eq(nil)
19
+ end
20
+ end
metadata ADDED
@@ -0,0 +1,148 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: turbot-runner-morph
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - OpenCorporates
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-03-24 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: openc-json_schema
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '='
18
+ - !ruby/object:Gem::Version
19
+ version: 0.0.13
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '='
25
+ - !ruby/object:Gem::Version
26
+ version: 0.0.13
27
+ description:
28
+ email: bots@opencorporates.com
29
+ executables: []
30
+ extensions: []
31
+ extra_rdoc_files: []
32
+ files:
33
+ - bin/rspec
34
+ - lib/turbot_runner.rb
35
+ - lib/turbot_runner/base_handler.rb
36
+ - lib/turbot_runner/exceptions.rb
37
+ - lib/turbot_runner/prerun.rb
38
+ - lib/turbot_runner/processor.rb
39
+ - lib/turbot_runner/runner.rb
40
+ - lib/turbot_runner/script_runner.rb
41
+ - lib/turbot_runner/utils.rb
42
+ - lib/turbot_runner/validator.rb
43
+ - lib/turbot_runner/version.rb
44
+ - schema/schemas/company-schema.json
45
+ - schema/schemas/financial-payment-schema.json
46
+ - schema/schemas/includes/address.json
47
+ - schema/schemas/includes/alternative_name.json
48
+ - schema/schemas/includes/company-for-nesting.json
49
+ - schema/schemas/includes/company.json
50
+ - schema/schemas/includes/entity.json
51
+ - schema/schemas/includes/filing.json
52
+ - schema/schemas/includes/financial-payment-data-object.json
53
+ - schema/schemas/includes/identifier.json
54
+ - schema/schemas/includes/industry_code.json
55
+ - schema/schemas/includes/licence-data-object.json
56
+ - schema/schemas/includes/officer.json
57
+ - schema/schemas/includes/organisation.json
58
+ - schema/schemas/includes/permission.json
59
+ - schema/schemas/includes/person.json
60
+ - schema/schemas/includes/person_name.json
61
+ - schema/schemas/includes/previous_name.json
62
+ - schema/schemas/includes/share-parcel-data.json
63
+ - schema/schemas/includes/share-parcel.json
64
+ - schema/schemas/includes/subsidiary-relationship-data.json
65
+ - schema/schemas/includes/total-shares.json
66
+ - schema/schemas/includes/unknown_entity_type.json
67
+ - schema/schemas/licence-schema.json
68
+ - schema/schemas/primary-data-schema.json
69
+ - schema/schemas/share-parcel-schema.json
70
+ - schema/schemas/simple-financial-payment-schema.json
71
+ - schema/schemas/simple-licence-schema.json
72
+ - schema/schemas/simple-subsidiary-schema.json
73
+ - schema/schemas/subsidiary-relationship-schema.json
74
+ - spec/bots/bot-that-crashes-immediately/manifest.json
75
+ - spec/bots/bot-that-crashes-immediately/scraper.rb
76
+ - spec/bots/bot-that-crashes-immediately/transformer1.rb
77
+ - spec/bots/bot-that-crashes-in-scraper/manifest.json
78
+ - spec/bots/bot-that-crashes-in-scraper/scraper.rb
79
+ - spec/bots/bot-that-crashes-in-scraper/transformer1.rb
80
+ - spec/bots/bot-that-crashes-in-transformer/manifest.json
81
+ - spec/bots/bot-that-crashes-in-transformer/scraper.rb
82
+ - spec/bots/bot-that-crashes-in-transformer/transformer1.rb
83
+ - spec/bots/bot-that-crashes-in-transformer/transformer2.rb
84
+ - spec/bots/bot-that-emits-run-ended/manifest.json
85
+ - spec/bots/bot-that-emits-run-ended/scraper.rb
86
+ - spec/bots/bot-that-expects-file/manifest.json
87
+ - spec/bots/bot-that-expects-file/scraper.rb
88
+ - spec/bots/bot-that-expects-file/something.txt
89
+ - spec/bots/bot-with-invalid-data-type/manifest.json
90
+ - spec/bots/bot-with-invalid-data-type/scraper.rb
91
+ - spec/bots/bot-with-invalid-sample-date/manifest.json
92
+ - spec/bots/bot-with-invalid-sample-date/scraper.rb
93
+ - spec/bots/bot-with-pause/manifest.json
94
+ - spec/bots/bot-with-pause/scraper.rb
95
+ - spec/bots/bot-with-transformer/manifest.json
96
+ - spec/bots/bot-with-transformer/scraper.rb
97
+ - spec/bots/bot-with-transformer/transformer.rb
98
+ - spec/bots/bot-with-transformers/manifest.json
99
+ - spec/bots/bot-with-transformers/scraper.rb
100
+ - spec/bots/bot-with-transformers/transformer1.rb
101
+ - spec/bots/bot-with-transformers/transformer2.rb
102
+ - spec/bots/invalid-json-bot/manifest.json
103
+ - spec/bots/invalid-json-bot/scraper.rb
104
+ - spec/bots/invalid-record-bot/manifest.json
105
+ - spec/bots/invalid-record-bot/scraper.rb
106
+ - spec/bots/logging-bot/manifest.json
107
+ - spec/bots/logging-bot/scraper.rb
108
+ - spec/bots/python-bot/manifest.json
109
+ - spec/bots/python-bot/scraper.py
110
+ - spec/bots/ruby-bot/manifest.json
111
+ - spec/bots/ruby-bot/scraper.rb
112
+ - spec/bots/slow-bot/manifest.json
113
+ - spec/bots/slow-bot/scraper.rb
114
+ - spec/lib/processor_spec.rb
115
+ - spec/lib/runner_spec.rb
116
+ - spec/lib/utils_spec.rb
117
+ - spec/lib/validator_spec.rb
118
+ - spec/manual_spec.rb
119
+ - spec/outputs/full-scraper.out
120
+ - spec/outputs/full-transformer.out
121
+ - spec/outputs/truncated-scraper.out
122
+ - spec/spec_helper.rb
123
+ homepage: http://turbot.opencorporates.com/
124
+ licenses:
125
+ - MIT
126
+ metadata: {}
127
+ post_install_message:
128
+ rdoc_options: []
129
+ require_paths:
130
+ - lib
131
+ required_ruby_version: !ruby/object:Gem::Requirement
132
+ requirements:
133
+ - - ! '>='
134
+ - !ruby/object:Gem::Version
135
+ version: 1.9.2
136
+ required_rubygems_version: !ruby/object:Gem::Requirement
137
+ requirements:
138
+ - - ! '>='
139
+ - !ruby/object:Gem::Version
140
+ version: '0'
141
+ requirements: []
142
+ rubyforge_project:
143
+ rubygems_version: 2.2.2
144
+ signing_key:
145
+ specification_version: 4
146
+ summary: Utilities for running bots with Turbot
147
+ test_files: []
148
+ has_rdoc: