turbot-runner-morph 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +15 -0
  2. data/bin/rspec +16 -0
  3. data/lib/turbot_runner.rb +28 -0
  4. data/lib/turbot_runner/base_handler.rb +15 -0
  5. data/lib/turbot_runner/exceptions.rb +4 -0
  6. data/lib/turbot_runner/prerun.rb +3 -0
  7. data/lib/turbot_runner/processor.rb +53 -0
  8. data/lib/turbot_runner/runner.rb +179 -0
  9. data/lib/turbot_runner/script_runner.rb +98 -0
  10. data/lib/turbot_runner/utils.rb +47 -0
  11. data/lib/turbot_runner/validator.rb +28 -0
  12. data/lib/turbot_runner/version.rb +3 -0
  13. data/schema/schemas/company-schema.json +243 -0
  14. data/schema/schemas/financial-payment-schema.json +32 -0
  15. data/schema/schemas/includes/address.json +53 -0
  16. data/schema/schemas/includes/alternative_name.json +36 -0
  17. data/schema/schemas/includes/company-for-nesting.json +245 -0
  18. data/schema/schemas/includes/company.json +25 -0
  19. data/schema/schemas/includes/entity.json +58 -0
  20. data/schema/schemas/includes/filing.json +52 -0
  21. data/schema/schemas/includes/financial-payment-data-object.json +112 -0
  22. data/schema/schemas/includes/identifier.json +20 -0
  23. data/schema/schemas/includes/industry_code.json +29 -0
  24. data/schema/schemas/includes/licence-data-object.json +63 -0
  25. data/schema/schemas/includes/officer.json +70 -0
  26. data/schema/schemas/includes/organisation.json +58 -0
  27. data/schema/schemas/includes/permission.json +46 -0
  28. data/schema/schemas/includes/person.json +62 -0
  29. data/schema/schemas/includes/person_name.json +71 -0
  30. data/schema/schemas/includes/previous_name.json +24 -0
  31. data/schema/schemas/includes/share-parcel-data.json +82 -0
  32. data/schema/schemas/includes/share-parcel.json +78 -0
  33. data/schema/schemas/includes/subsidiary-relationship-data.json +58 -0
  34. data/schema/schemas/includes/total-shares.json +17 -0
  35. data/schema/schemas/includes/unknown_entity_type.json +58 -0
  36. data/schema/schemas/licence-schema.json +105 -0
  37. data/schema/schemas/primary-data-schema.json +20 -0
  38. data/schema/schemas/share-parcel-schema.json +22 -0
  39. data/schema/schemas/simple-financial-payment-schema.json +122 -0
  40. data/schema/schemas/simple-licence-schema.json +82 -0
  41. data/schema/schemas/simple-subsidiary-schema.json +85 -0
  42. data/schema/schemas/subsidiary-relationship-schema.json +46 -0
  43. data/spec/bots/bot-that-crashes-immediately/manifest.json +15 -0
  44. data/spec/bots/bot-that-crashes-immediately/scraper.rb +1 -0
  45. data/spec/bots/bot-that-crashes-immediately/transformer1.rb +15 -0
  46. data/spec/bots/bot-that-crashes-in-scraper/manifest.json +15 -0
  47. data/spec/bots/bot-that-crashes-in-scraper/scraper.rb +11 -0
  48. data/spec/bots/bot-that-crashes-in-scraper/transformer1.rb +15 -0
  49. data/spec/bots/bot-that-crashes-in-transformer/manifest.json +20 -0
  50. data/spec/bots/bot-that-crashes-in-transformer/scraper.rb +10 -0
  51. data/spec/bots/bot-that-crashes-in-transformer/transformer1.rb +15 -0
  52. data/spec/bots/bot-that-crashes-in-transformer/transformer2.rb +17 -0
  53. data/spec/bots/bot-that-emits-run-ended/manifest.json +8 -0
  54. data/spec/bots/bot-that-emits-run-ended/scraper.rb +11 -0
  55. data/spec/bots/bot-that-expects-file/manifest.json +8 -0
  56. data/spec/bots/bot-that-expects-file/scraper.rb +11 -0
  57. data/spec/bots/bot-that-expects-file/something.txt +1 -0
  58. data/spec/bots/bot-with-invalid-data-type/manifest.json +8 -0
  59. data/spec/bots/bot-with-invalid-data-type/scraper.rb +10 -0
  60. data/spec/bots/bot-with-invalid-sample-date/manifest.json +8 -0
  61. data/spec/bots/bot-with-invalid-sample-date/scraper.rb +10 -0
  62. data/spec/bots/bot-with-pause/manifest.json +8 -0
  63. data/spec/bots/bot-with-pause/scraper.rb +16 -0
  64. data/spec/bots/bot-with-transformer/manifest.json +15 -0
  65. data/spec/bots/bot-with-transformer/scraper.rb +10 -0
  66. data/spec/bots/bot-with-transformer/transformer.rb +15 -0
  67. data/spec/bots/bot-with-transformers/manifest.json +20 -0
  68. data/spec/bots/bot-with-transformers/scraper.rb +10 -0
  69. data/spec/bots/bot-with-transformers/transformer1.rb +15 -0
  70. data/spec/bots/bot-with-transformers/transformer2.rb +15 -0
  71. data/spec/bots/invalid-json-bot/manifest.json +8 -0
  72. data/spec/bots/invalid-json-bot/scraper.rb +11 -0
  73. data/spec/bots/invalid-record-bot/manifest.json +8 -0
  74. data/spec/bots/invalid-record-bot/scraper.rb +11 -0
  75. data/spec/bots/logging-bot/manifest.json +8 -0
  76. data/spec/bots/logging-bot/scraper.rb +14 -0
  77. data/spec/bots/python-bot/manifest.json +8 -0
  78. data/spec/bots/python-bot/scraper.py +11 -0
  79. data/spec/bots/ruby-bot/manifest.json +8 -0
  80. data/spec/bots/ruby-bot/scraper.rb +10 -0
  81. data/spec/bots/slow-bot/manifest.json +8 -0
  82. data/spec/bots/slow-bot/scraper.rb +11 -0
  83. data/spec/lib/processor_spec.rb +181 -0
  84. data/spec/lib/runner_spec.rb +330 -0
  85. data/spec/lib/utils_spec.rb +23 -0
  86. data/spec/lib/validator_spec.rb +89 -0
  87. data/spec/manual_spec.rb +57 -0
  88. data/spec/outputs/full-scraper.out +10 -0
  89. data/spec/outputs/full-transformer.out +10 -0
  90. data/spec/outputs/truncated-scraper.out +5 -0
  91. data/spec/spec_helper.rb +20 -0
  92. metadata +148 -0
@@ -0,0 +1,23 @@
1
+ require 'spec_helper'
2
+
3
+ describe TurbotRunner::Utils do
4
+ specify '.flatten' do
5
+ hash = {
6
+ 'a' => {
7
+ 'b' => {
8
+ 'c' => '123',
9
+ 'd' => '124',
10
+ },
11
+ 'e' => {
12
+ 'f' => '156',
13
+ }
14
+ }
15
+ }
16
+
17
+ expect(TurbotRunner::Utils.flatten(hash)).to eq({
18
+ 'a.b.c' => '123',
19
+ 'a.b.d' => '124',
20
+ 'a.e.f' => '156',
21
+ })
22
+ end
23
+ end
@@ -0,0 +1,89 @@
1
+ require 'spec_helper'
2
+
3
+ describe TurbotRunner::Validator do
4
+ describe '.validate' do
5
+ specify 'with valid record' do
6
+ record = {
7
+ 'sample_date' => '2014-06-01',
8
+ 'source_url' => 'http://example.com/123',
9
+ 'number' => 123
10
+ }
11
+ expect(record).to be_valid
12
+ end
13
+
14
+ specify 'with record missing required field' do
15
+ record = {
16
+ 'sample_date' => '2014-06-01',
17
+ 'number' => 123
18
+ }
19
+ expected_error = 'Missing required property: source_url'
20
+ expect(record).to fail_validation_with(expected_error)
21
+ end
22
+
23
+ specify 'with record missing all identifying fields' do
24
+ record = {
25
+ 'sample_date' => '2014-06-01',
26
+ 'source_url' => 'http://example.com/123'
27
+ }
28
+ expected_error = 'There were no values provided for any of the identifying fields: number'
29
+ expect(record).to fail_validation_with(expected_error)
30
+ end
31
+
32
+ specify 'with record with empty sample_date' do
33
+ record = {
34
+ 'sample_date' => '',
35
+ 'source_url' => 'http://example.com/123',
36
+ 'number' => 123
37
+ }
38
+ expected_error = 'Property not of expected format: sample_date (must be of format yyyy-mm-dd)'
39
+ expect(record).to fail_validation_with(expected_error)
40
+ end
41
+
42
+ specify 'with record with invalid sample_date' do
43
+ record = {
44
+ 'sample_date' => '2014-06-00',
45
+ 'source_url' => 'http://example.com/123',
46
+ 'number' => 123
47
+ }
48
+ expected_error = 'Property not of expected format: sample_date (must be of format yyyy-mm-dd)'
49
+ expect(record).to fail_validation_with(expected_error)
50
+ end
51
+
52
+ context 'with nested identifying fields' do
53
+ specify 'with record missing all identifying fields' do
54
+ record = {
55
+ 'sample_date' => '2014-06-01',
56
+ 'source_url' => 'http://example.com/123',
57
+ 'one' => {'two' => {}},
58
+ 'four' => {}
59
+ }
60
+ identifying_fields = ['one.two.three', 'four.five.six']
61
+ error = TurbotRunner::Validator.validate('primary-data', record, identifying_fields)
62
+ expect(error).to eq('There were no values provided for any of the identifying fields: one.two.three, four.five.six')
63
+ end
64
+
65
+ specify 'with record missing some identifying fields' do
66
+ record = {
67
+ 'sample_date' => '2014-06-01',
68
+ 'source_url' => 'http://example.com/123',
69
+ 'one' => {'two' => {'three' => 123}}
70
+ }
71
+ identifying_fields = ['one.two.three', 'four.five.six']
72
+ error = TurbotRunner::Validator.validate('primary-data', record, identifying_fields)
73
+ expect(error).to eq(nil)
74
+ end
75
+
76
+ specify 'with record missing no identifying fields' do
77
+ record = {
78
+ 'sample_date' => '2014-06-01',
79
+ 'source_url' => 'http://example.com/123',
80
+ 'one' => {'two' => {'three' => 123}},
81
+ 'four' => {'five' => {'six' => 456}}
82
+ }
83
+ identifying_fields = ['one.two.three', 'four.five.six']
84
+ error = TurbotRunner::Validator.validate('primary-data', record, identifying_fields)
85
+ expect(error).to eq(nil)
86
+ end
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,57 @@
1
+ # These specs test behaviour that is most easily exercised or verified by hand.
2
+
3
+ $:.unshift('lib')
4
+ require 'turbot_runner'
5
+
6
+ puts
7
+ puts '-' * 80
8
+
9
+ puts <<eos
10
+ This tests whether stderr is directed to the console.
11
+ When the scraper is run, you should see the following two lines in the console:
12
+
13
+ doing...
14
+ done
15
+
16
+ Press <enter> to run the test.
17
+ eos
18
+
19
+ gets
20
+
21
+ bot_location = File.join(File.dirname(__FILE__), 'bots/logging-bot')
22
+ runner = TurbotRunner::Runner.new(bot_location).run
23
+
24
+ puts
25
+ puts 'Did you see the expected lines? [y]/n'
26
+
27
+ exit(1) unless ['Y', 'y', ''].include?(gets.chomp)
28
+
29
+ puts
30
+ puts '-' * 80
31
+
32
+ puts <<eos
33
+ This tests whether hitting Ctrl-C interrupts a running scraper correctly. When
34
+ the scraper is run, it will pause after producing five lines of output, and
35
+ instruct you to interrupt it. You will have ten seconds to do so.
36
+
37
+ Press <enter> to run the test.
38
+ eos
39
+
40
+ gets
41
+
42
+ bot_location = File.join(File.dirname(__FILE__), 'bots/bot-with-pause')
43
+ runner = TurbotRunner::Runner.new(bot_location).run
44
+
45
+ expected_output = File.readlines('spec/outputs/truncated-scraper.out').map {|line| JSON.parse(line)}
46
+ actual_output = File.readlines('spec/bots/bot-with-pause/output/scraper.out').map {|line| JSON.parse(line)}
47
+
48
+ if expected_output == actual_output
49
+ puts 'Bot produced expected output'
50
+ else
51
+ puts 'Bot did not produce expected output'
52
+ exit(1)
53
+ end
54
+
55
+ puts
56
+ puts '-' * 80
57
+ puts 'All tests passed!'
@@ -0,0 +1,10 @@
1
+ {"licence_number":"XYZ0","source_url":"http://example.com","sample_date":"2014-06-01"}
2
+ {"licence_number":"XYZ1","source_url":"http://example.com","sample_date":"2014-06-01"}
3
+ {"licence_number":"XYZ2","source_url":"http://example.com","sample_date":"2014-06-01"}
4
+ {"licence_number":"XYZ3","source_url":"http://example.com","sample_date":"2014-06-01"}
5
+ {"licence_number":"XYZ4","source_url":"http://example.com","sample_date":"2014-06-01"}
6
+ {"licence_number":"XYZ5","source_url":"http://example.com","sample_date":"2014-06-01"}
7
+ {"licence_number":"XYZ6","source_url":"http://example.com","sample_date":"2014-06-01"}
8
+ {"licence_number":"XYZ7","source_url":"http://example.com","sample_date":"2014-06-01"}
9
+ {"licence_number":"XYZ8","source_url":"http://example.com","sample_date":"2014-06-01"}
10
+ {"licence_number":"XYZ9","source_url":"http://example.com","sample_date":"2014-06-01"}
@@ -0,0 +1,10 @@
1
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ0","source_url":"http://example.com","sample_date":"2014-06-01"}
2
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ1","source_url":"http://example.com","sample_date":"2014-06-01"}
3
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ2","source_url":"http://example.com","sample_date":"2014-06-01"}
4
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ3","source_url":"http://example.com","sample_date":"2014-06-01"}
5
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ4","source_url":"http://example.com","sample_date":"2014-06-01"}
6
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ5","source_url":"http://example.com","sample_date":"2014-06-01"}
7
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ6","source_url":"http://example.com","sample_date":"2014-06-01"}
8
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ7","source_url":"http://example.com","sample_date":"2014-06-01"}
9
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ8","source_url":"http://example.com","sample_date":"2014-06-01"}
10
+ {"company_name":"Foo Widgets","company_jurisdiction":"gb","licence_number":"XYZ9","source_url":"http://example.com","sample_date":"2014-06-01"}
@@ -0,0 +1,5 @@
1
+ {"licence_number":"XYZ0","source_url":"http://example.com","sample_date":"2014-06-01"}
2
+ {"licence_number":"XYZ1","source_url":"http://example.com","sample_date":"2014-06-01"}
3
+ {"licence_number":"XYZ2","source_url":"http://example.com","sample_date":"2014-06-01"}
4
+ {"licence_number":"XYZ3","source_url":"http://example.com","sample_date":"2014-06-01"}
5
+ {"licence_number":"XYZ4","source_url":"http://example.com","sample_date":"2014-06-01"}
@@ -0,0 +1,20 @@
1
+ require 'turbot_runner'
2
+
3
+ RSpec::Matchers.define(:fail_validation_with) do |expected_error|
4
+ match do |record|
5
+ identifying_fields = ['number']
6
+ @error = TurbotRunner::Validator.validate('primary-data', record, identifying_fields)
7
+ expect(@error).to eq(expected_error)
8
+ end
9
+
10
+ failure_message do |actual|
11
+ "Expected error to be #{expected_error}, but was #{@error}"
12
+ end
13
+ end
14
+
15
+ RSpec::Matchers.define(:be_valid) do
16
+ match do |record|
17
+ identifying_fields = ['number']
18
+ expect(TurbotRunner::Validator.validate('primary-data', record, identifying_fields)).to eq(nil)
19
+ end
20
+ end
metadata ADDED
@@ -0,0 +1,148 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: turbot-runner-morph
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - OpenCorporates
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-03-24 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: openc-json_schema
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '='
18
+ - !ruby/object:Gem::Version
19
+ version: 0.0.13
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '='
25
+ - !ruby/object:Gem::Version
26
+ version: 0.0.13
27
+ description:
28
+ email: bots@opencorporates.com
29
+ executables: []
30
+ extensions: []
31
+ extra_rdoc_files: []
32
+ files:
33
+ - bin/rspec
34
+ - lib/turbot_runner.rb
35
+ - lib/turbot_runner/base_handler.rb
36
+ - lib/turbot_runner/exceptions.rb
37
+ - lib/turbot_runner/prerun.rb
38
+ - lib/turbot_runner/processor.rb
39
+ - lib/turbot_runner/runner.rb
40
+ - lib/turbot_runner/script_runner.rb
41
+ - lib/turbot_runner/utils.rb
42
+ - lib/turbot_runner/validator.rb
43
+ - lib/turbot_runner/version.rb
44
+ - schema/schemas/company-schema.json
45
+ - schema/schemas/financial-payment-schema.json
46
+ - schema/schemas/includes/address.json
47
+ - schema/schemas/includes/alternative_name.json
48
+ - schema/schemas/includes/company-for-nesting.json
49
+ - schema/schemas/includes/company.json
50
+ - schema/schemas/includes/entity.json
51
+ - schema/schemas/includes/filing.json
52
+ - schema/schemas/includes/financial-payment-data-object.json
53
+ - schema/schemas/includes/identifier.json
54
+ - schema/schemas/includes/industry_code.json
55
+ - schema/schemas/includes/licence-data-object.json
56
+ - schema/schemas/includes/officer.json
57
+ - schema/schemas/includes/organisation.json
58
+ - schema/schemas/includes/permission.json
59
+ - schema/schemas/includes/person.json
60
+ - schema/schemas/includes/person_name.json
61
+ - schema/schemas/includes/previous_name.json
62
+ - schema/schemas/includes/share-parcel-data.json
63
+ - schema/schemas/includes/share-parcel.json
64
+ - schema/schemas/includes/subsidiary-relationship-data.json
65
+ - schema/schemas/includes/total-shares.json
66
+ - schema/schemas/includes/unknown_entity_type.json
67
+ - schema/schemas/licence-schema.json
68
+ - schema/schemas/primary-data-schema.json
69
+ - schema/schemas/share-parcel-schema.json
70
+ - schema/schemas/simple-financial-payment-schema.json
71
+ - schema/schemas/simple-licence-schema.json
72
+ - schema/schemas/simple-subsidiary-schema.json
73
+ - schema/schemas/subsidiary-relationship-schema.json
74
+ - spec/bots/bot-that-crashes-immediately/manifest.json
75
+ - spec/bots/bot-that-crashes-immediately/scraper.rb
76
+ - spec/bots/bot-that-crashes-immediately/transformer1.rb
77
+ - spec/bots/bot-that-crashes-in-scraper/manifest.json
78
+ - spec/bots/bot-that-crashes-in-scraper/scraper.rb
79
+ - spec/bots/bot-that-crashes-in-scraper/transformer1.rb
80
+ - spec/bots/bot-that-crashes-in-transformer/manifest.json
81
+ - spec/bots/bot-that-crashes-in-transformer/scraper.rb
82
+ - spec/bots/bot-that-crashes-in-transformer/transformer1.rb
83
+ - spec/bots/bot-that-crashes-in-transformer/transformer2.rb
84
+ - spec/bots/bot-that-emits-run-ended/manifest.json
85
+ - spec/bots/bot-that-emits-run-ended/scraper.rb
86
+ - spec/bots/bot-that-expects-file/manifest.json
87
+ - spec/bots/bot-that-expects-file/scraper.rb
88
+ - spec/bots/bot-that-expects-file/something.txt
89
+ - spec/bots/bot-with-invalid-data-type/manifest.json
90
+ - spec/bots/bot-with-invalid-data-type/scraper.rb
91
+ - spec/bots/bot-with-invalid-sample-date/manifest.json
92
+ - spec/bots/bot-with-invalid-sample-date/scraper.rb
93
+ - spec/bots/bot-with-pause/manifest.json
94
+ - spec/bots/bot-with-pause/scraper.rb
95
+ - spec/bots/bot-with-transformer/manifest.json
96
+ - spec/bots/bot-with-transformer/scraper.rb
97
+ - spec/bots/bot-with-transformer/transformer.rb
98
+ - spec/bots/bot-with-transformers/manifest.json
99
+ - spec/bots/bot-with-transformers/scraper.rb
100
+ - spec/bots/bot-with-transformers/transformer1.rb
101
+ - spec/bots/bot-with-transformers/transformer2.rb
102
+ - spec/bots/invalid-json-bot/manifest.json
103
+ - spec/bots/invalid-json-bot/scraper.rb
104
+ - spec/bots/invalid-record-bot/manifest.json
105
+ - spec/bots/invalid-record-bot/scraper.rb
106
+ - spec/bots/logging-bot/manifest.json
107
+ - spec/bots/logging-bot/scraper.rb
108
+ - spec/bots/python-bot/manifest.json
109
+ - spec/bots/python-bot/scraper.py
110
+ - spec/bots/ruby-bot/manifest.json
111
+ - spec/bots/ruby-bot/scraper.rb
112
+ - spec/bots/slow-bot/manifest.json
113
+ - spec/bots/slow-bot/scraper.rb
114
+ - spec/lib/processor_spec.rb
115
+ - spec/lib/runner_spec.rb
116
+ - spec/lib/utils_spec.rb
117
+ - spec/lib/validator_spec.rb
118
+ - spec/manual_spec.rb
119
+ - spec/outputs/full-scraper.out
120
+ - spec/outputs/full-transformer.out
121
+ - spec/outputs/truncated-scraper.out
122
+ - spec/spec_helper.rb
123
+ homepage: http://turbot.opencorporates.com/
124
+ licenses:
125
+ - MIT
126
+ metadata: {}
127
+ post_install_message:
128
+ rdoc_options: []
129
+ require_paths:
130
+ - lib
131
+ required_ruby_version: !ruby/object:Gem::Requirement
132
+ requirements:
133
+ - - ! '>='
134
+ - !ruby/object:Gem::Version
135
+ version: 1.9.2
136
+ required_rubygems_version: !ruby/object:Gem::Requirement
137
+ requirements:
138
+ - - ! '>='
139
+ - !ruby/object:Gem::Version
140
+ version: '0'
141
+ requirements: []
142
+ rubyforge_project:
143
+ rubygems_version: 2.2.2
144
+ signing_key:
145
+ specification_version: 4
146
+ summary: Utilities for running bots with Turbot
147
+ test_files: []
148
+ has_rdoc: