turbot-runner-morph 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +15 -0
  2. data/bin/rspec +16 -0
  3. data/lib/turbot_runner.rb +28 -0
  4. data/lib/turbot_runner/base_handler.rb +15 -0
  5. data/lib/turbot_runner/exceptions.rb +4 -0
  6. data/lib/turbot_runner/prerun.rb +3 -0
  7. data/lib/turbot_runner/processor.rb +53 -0
  8. data/lib/turbot_runner/runner.rb +179 -0
  9. data/lib/turbot_runner/script_runner.rb +98 -0
  10. data/lib/turbot_runner/utils.rb +47 -0
  11. data/lib/turbot_runner/validator.rb +28 -0
  12. data/lib/turbot_runner/version.rb +3 -0
  13. data/schema/schemas/company-schema.json +243 -0
  14. data/schema/schemas/financial-payment-schema.json +32 -0
  15. data/schema/schemas/includes/address.json +53 -0
  16. data/schema/schemas/includes/alternative_name.json +36 -0
  17. data/schema/schemas/includes/company-for-nesting.json +245 -0
  18. data/schema/schemas/includes/company.json +25 -0
  19. data/schema/schemas/includes/entity.json +58 -0
  20. data/schema/schemas/includes/filing.json +52 -0
  21. data/schema/schemas/includes/financial-payment-data-object.json +112 -0
  22. data/schema/schemas/includes/identifier.json +20 -0
  23. data/schema/schemas/includes/industry_code.json +29 -0
  24. data/schema/schemas/includes/licence-data-object.json +63 -0
  25. data/schema/schemas/includes/officer.json +70 -0
  26. data/schema/schemas/includes/organisation.json +58 -0
  27. data/schema/schemas/includes/permission.json +46 -0
  28. data/schema/schemas/includes/person.json +62 -0
  29. data/schema/schemas/includes/person_name.json +71 -0
  30. data/schema/schemas/includes/previous_name.json +24 -0
  31. data/schema/schemas/includes/share-parcel-data.json +82 -0
  32. data/schema/schemas/includes/share-parcel.json +78 -0
  33. data/schema/schemas/includes/subsidiary-relationship-data.json +58 -0
  34. data/schema/schemas/includes/total-shares.json +17 -0
  35. data/schema/schemas/includes/unknown_entity_type.json +58 -0
  36. data/schema/schemas/licence-schema.json +105 -0
  37. data/schema/schemas/primary-data-schema.json +20 -0
  38. data/schema/schemas/share-parcel-schema.json +22 -0
  39. data/schema/schemas/simple-financial-payment-schema.json +122 -0
  40. data/schema/schemas/simple-licence-schema.json +82 -0
  41. data/schema/schemas/simple-subsidiary-schema.json +85 -0
  42. data/schema/schemas/subsidiary-relationship-schema.json +46 -0
  43. data/spec/bots/bot-that-crashes-immediately/manifest.json +15 -0
  44. data/spec/bots/bot-that-crashes-immediately/scraper.rb +1 -0
  45. data/spec/bots/bot-that-crashes-immediately/transformer1.rb +15 -0
  46. data/spec/bots/bot-that-crashes-in-scraper/manifest.json +15 -0
  47. data/spec/bots/bot-that-crashes-in-scraper/scraper.rb +11 -0
  48. data/spec/bots/bot-that-crashes-in-scraper/transformer1.rb +15 -0
  49. data/spec/bots/bot-that-crashes-in-transformer/manifest.json +20 -0
  50. data/spec/bots/bot-that-crashes-in-transformer/scraper.rb +10 -0
  51. data/spec/bots/bot-that-crashes-in-transformer/transformer1.rb +15 -0
  52. data/spec/bots/bot-that-crashes-in-transformer/transformer2.rb +17 -0
  53. data/spec/bots/bot-that-emits-run-ended/manifest.json +8 -0
  54. data/spec/bots/bot-that-emits-run-ended/scraper.rb +11 -0
  55. data/spec/bots/bot-that-expects-file/manifest.json +8 -0
  56. data/spec/bots/bot-that-expects-file/scraper.rb +11 -0
  57. data/spec/bots/bot-that-expects-file/something.txt +1 -0
  58. data/spec/bots/bot-with-invalid-data-type/manifest.json +8 -0
  59. data/spec/bots/bot-with-invalid-data-type/scraper.rb +10 -0
  60. data/spec/bots/bot-with-invalid-sample-date/manifest.json +8 -0
  61. data/spec/bots/bot-with-invalid-sample-date/scraper.rb +10 -0
  62. data/spec/bots/bot-with-pause/manifest.json +8 -0
  63. data/spec/bots/bot-with-pause/scraper.rb +16 -0
  64. data/spec/bots/bot-with-transformer/manifest.json +15 -0
  65. data/spec/bots/bot-with-transformer/scraper.rb +10 -0
  66. data/spec/bots/bot-with-transformer/transformer.rb +15 -0
  67. data/spec/bots/bot-with-transformers/manifest.json +20 -0
  68. data/spec/bots/bot-with-transformers/scraper.rb +10 -0
  69. data/spec/bots/bot-with-transformers/transformer1.rb +15 -0
  70. data/spec/bots/bot-with-transformers/transformer2.rb +15 -0
  71. data/spec/bots/invalid-json-bot/manifest.json +8 -0
  72. data/spec/bots/invalid-json-bot/scraper.rb +11 -0
  73. data/spec/bots/invalid-record-bot/manifest.json +8 -0
  74. data/spec/bots/invalid-record-bot/scraper.rb +11 -0
  75. data/spec/bots/logging-bot/manifest.json +8 -0
  76. data/spec/bots/logging-bot/scraper.rb +14 -0
  77. data/spec/bots/python-bot/manifest.json +8 -0
  78. data/spec/bots/python-bot/scraper.py +11 -0
  79. data/spec/bots/ruby-bot/manifest.json +8 -0
  80. data/spec/bots/ruby-bot/scraper.rb +10 -0
  81. data/spec/bots/slow-bot/manifest.json +8 -0
  82. data/spec/bots/slow-bot/scraper.rb +11 -0
  83. data/spec/lib/processor_spec.rb +181 -0
  84. data/spec/lib/runner_spec.rb +330 -0
  85. data/spec/lib/utils_spec.rb +23 -0
  86. data/spec/lib/validator_spec.rb +89 -0
  87. data/spec/manual_spec.rb +57 -0
  88. data/spec/outputs/full-scraper.out +10 -0
  89. data/spec/outputs/full-transformer.out +10 -0
  90. data/spec/outputs/truncated-scraper.out +5 -0
  91. data/spec/spec_helper.rb +20 -0
  92. metadata +148 -0
@@ -0,0 +1,25 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-04/schema#",
3
+ "description": "A company which is the subject of a statement",
4
+ "type": "object",
5
+ "properties": {
6
+ "name": {
7
+ "type": "string"
8
+ },
9
+ "jurisdiction": {
10
+ "type": "string"
11
+ },
12
+ "company_number": {
13
+ "type": "string"
14
+ },
15
+ "identifier": {
16
+ "type": "string",
17
+ "description": "An official identifier paired with a code for the issuer of the identifier, e.g sec/12345",
18
+ "pattern": "^[^/]+/[^/]+$"
19
+ }
20
+ },
21
+ "required": [
22
+ "name",
23
+ "jurisdiction"
24
+ ]
25
+ }
@@ -0,0 +1,58 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-04/schema#",
3
+ "type": "object",
4
+ "required": [
5
+ "entity_type",
6
+ "entity_properties"
7
+ ],
8
+ "oneOf": [
9
+ {
10
+ "properties": {
11
+ "entity_type": {
12
+ "enum": [
13
+ "company"
14
+ ]
15
+ },
16
+ "entity_properties": {
17
+ "$ref": "company-for-nesting.json"
18
+ }
19
+ }
20
+ },
21
+ {
22
+ "properties": {
23
+ "entity_type": {
24
+ "enum": [
25
+ "person"
26
+ ]
27
+ },
28
+ "entity_properties": {
29
+ "$ref": "person.json"
30
+ }
31
+ }
32
+ },
33
+ {
34
+ "properties": {
35
+ "entity_type": {
36
+ "enum": [
37
+ "organisation"
38
+ ]
39
+ },
40
+ "entity_properties": {
41
+ "$ref": "organisation.json"
42
+ }
43
+ }
44
+ },
45
+ {
46
+ "properties": {
47
+ "entity_type": {
48
+ "enum": [
49
+ "unknown"
50
+ ]
51
+ },
52
+ "entity_properties": {
53
+ "$ref": "unknown_entity_type.json"
54
+ }
55
+ }
56
+ }
57
+ ]
58
+ }
@@ -0,0 +1,52 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-04/schema#",
3
+ "description": "A statutory filing",
4
+ "type": "object",
5
+ "properties": {
6
+ "title": {
7
+ "type": "string"
8
+ },
9
+ "date": {
10
+ "type": "string",
11
+ "format": "date"
12
+ },
13
+ "description": {
14
+ "type": "string"
15
+ },
16
+ "uid": {
17
+ "type": "string"
18
+ },
19
+ "url": {
20
+ "type": "string"
21
+ },
22
+ "filing_type_code": {
23
+ "type": "string"
24
+ },
25
+ "filing_type_name": {
26
+ "type": "string"
27
+ },
28
+ "other_attributes": {
29
+ "type": "object"
30
+ }
31
+ },
32
+ "required": [
33
+ "date"
34
+ ],
35
+ "anyOf": [
36
+ {
37
+ "required": [
38
+ "title"
39
+ ]
40
+ },
41
+ {
42
+ "required": [
43
+ "description"
44
+ ]
45
+ },
46
+ {
47
+ "required": [
48
+ "filing_type_name"
49
+ ]
50
+ }
51
+ ]
52
+ }
@@ -0,0 +1,112 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-04/schema#",
3
+ "type": "object",
4
+ "properties": {
5
+ "data_type": {
6
+ "enum": [
7
+ "financial_payment"
8
+ ]
9
+ },
10
+ "source_url": {
11
+ "type": "string"
12
+ },
13
+ "confidence": {
14
+ "type": "string"
15
+ },
16
+ "properties": {
17
+ "type": "object",
18
+ "properties": {
19
+ "value": {
20
+ "description": "The amount transacted (e.g. 19.95)",
21
+ "type": "string"
22
+ },
23
+ "payee_name": {
24
+ "description": "The name of the payee (the entity that received the money)",
25
+ "type": "string"
26
+ },
27
+ "currency": {
28
+ "description": "The currency of the payment (three letter symbol, e.g. USD, GBP, EUR)",
29
+ "type": "string"
30
+ },
31
+ "date": {
32
+ "description": "The date on which the payment was made",
33
+ "type": "date"
34
+ },
35
+ "transaction_number": {
36
+ "description": "The unique identifier for the transaction, scoped to this bot",
37
+ "type": "string"
38
+ },
39
+ "jurisdiction": {
40
+ "description": "The jurisdiction of the body that made the payment, eg. UK, France, Delaware, Manchester",
41
+ "type": "string"
42
+ },
43
+ "more_details_url": {
44
+ "description": "A url from which more details can be seen (may be the same as the source_url)",
45
+ "type": [
46
+ "string",
47
+ null
48
+ ]
49
+ },
50
+ "description": {
51
+ "description": "The description of the transaction as given in the raw data",
52
+ "type": [
53
+ "string",
54
+ null
55
+ ]
56
+ },
57
+ "expense_type": {
58
+ "description": "The type of expense -- can be either capital, revenue (i.e. current expenditure) or null",
59
+ "enum": [
60
+ "capital",
61
+ "revenue",
62
+ null
63
+ ]
64
+ },
65
+ "expense_area": {
66
+ "description": "category (in words) of the expenditure",
67
+ "type": [
68
+ "string",
69
+ null
70
+ ]
71
+ },
72
+ "entity_name": {
73
+ "description": "The name of the government entity that made the payment, e.g. Environment Agency",
74
+ "type": "string"
75
+ },
76
+ "entity_uri": {
77
+ "description": "A unique URL (ideally a dereferencable URI) for the government entity",
78
+ "type": [
79
+ "string",
80
+ null
81
+ ]
82
+ },
83
+ "department_name": {
84
+ "description": "The name of the government department which the entity belongs to (if relevant), e.g. Department of Health",
85
+ "type": [
86
+ "string",
87
+ null
88
+ ]
89
+ },
90
+ "csv_line_number": {
91
+ "description": "If the source for the data is a CSV file, you can optionally include the line number of the CSV from which this data was retrieved",
92
+ "type": [
93
+ "string",
94
+ null
95
+ ]
96
+ }
97
+ },
98
+ "required": [
99
+ "value",
100
+ "payee_name",
101
+ "date",
102
+ "currency"
103
+ ]
104
+ }
105
+ },
106
+ "required": [
107
+ "data_type",
108
+ "properties",
109
+ "source_url",
110
+ "confidence"
111
+ ]
112
+ }
@@ -0,0 +1,20 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-04/schema#",
3
+ "description": "An identifier for an entity (possibly other things, e.g. securities in the future). Examples are tax IDs, non-profit IDs, SEC CIK numbers, Federal Reserve RSSD id. The main requirements for an identifier is that they should be well-defined, and issued by a government or have statutory standing",
4
+ "type": "object",
5
+ "properties": {
6
+ "uid": {
7
+ "type": "string",
8
+ "description": "The unique identifier given by the identifier system – it should be unique in the context of the identifier_system",
9
+ "minLength": 1
10
+ },
11
+ "identifier_system_code": {
12
+ "type": "string",
13
+ "description": "An identifier representing the identifier scheme. Some examples of this are us_fein (US Federal tax number), us_sec_cik (US Securities and Exchange Commission CIK), uk_ew_cc (Charity Commission of England & Wales), lei (Global Legal Entity Identifier System)"
14
+ }
15
+ },
16
+ "required": [
17
+ "uid",
18
+ "identifier_system_code"
19
+ ]
20
+ }
@@ -0,0 +1,29 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-04/schema#",
3
+ "description": "An industry code from a standard code list (e.g. NAICS 2007 or NACE 2)",
4
+ "type": "object",
5
+ "properties": {
6
+ "name": {
7
+ "type": "string"
8
+ },
9
+ "code": {
10
+ "type": "string"
11
+ },
12
+ "code_scheme_id": {
13
+ "type": "string",
14
+ "description": "An identifier representing industry code scheme. At the moment these are eu_nace_2, uk_sic_2003, uk_sic_2007, us_naics_2002, us_naics_2007, be_nace_2008, dk_db_2007, nz_bic_2006, no_sic_2007, anz_sic_2006, nz_bic_2006, in_nic_2004_mca, ca_qc_cae, lu_nace_2. For other code schemes, or details of these, contact info@opencorporates.com"
15
+ },
16
+ "start_date": {
17
+ "type": "string",
18
+ "format": "date"
19
+ },
20
+ "end_date": {
21
+ "type": "string",
22
+ "format": "date"
23
+ }
24
+ },
25
+ "required": [
26
+ "code",
27
+ "code_scheme_id"
28
+ ]
29
+ }
@@ -0,0 +1,63 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-04/schema#",
3
+ "type": "object",
4
+ "properties": {
5
+ "data_type": {
6
+ "enum": [
7
+ "licence"
8
+ ]
9
+ },
10
+ "source_url": {
11
+ "type": "string"
12
+ },
13
+ "confidence": {
14
+ "type": "string"
15
+ },
16
+ "properties": {
17
+ "type": "object",
18
+ "properties": {
19
+ "regulator": {
20
+ "description": "The regulating body that issued the licence",
21
+ "type": "string"
22
+ },
23
+ "category": {
24
+ "description": "The category of licence. Current possible values are: 'Financial','Business'",
25
+ "enum": [
26
+ "Financial",
27
+ "Business"
28
+ ]
29
+ },
30
+ "jurisdiction_code": {
31
+ "description": "The jurisdiction for which licence was issued",
32
+ "type": "string"
33
+ },
34
+ "licence_number": {
35
+ "description": "Licence number or code (can be null)",
36
+ "type": [
37
+ "string",
38
+ null
39
+ ]
40
+ },
41
+ "jurisdiction_classification": {
42
+ "type": "array",
43
+ "description": "The local classification given by the regulator",
44
+ "minItems": 1
45
+ },
46
+ "oc_classification": {
47
+ "type": "array"
48
+ }
49
+ },
50
+ "required": [
51
+ "jurisdiction_code",
52
+ "jurisdiction_classification",
53
+ "category"
54
+ ]
55
+ }
56
+ },
57
+ "required": [
58
+ "data_type",
59
+ "properties",
60
+ "source_url",
61
+ "confidence"
62
+ ]
63
+ }
@@ -0,0 +1,70 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-04/schema#",
3
+ "description": "An officer (director, senior executive) of a company",
4
+ "type": "object",
5
+ "properties": {
6
+ "name": {
7
+ "type": "string",
8
+ "minLength": 1
9
+ },
10
+ "start_date": {
11
+ "anyOf": [
12
+ {
13
+ "type": "string",
14
+ "format": "date"
15
+ },
16
+ {
17
+ "type": "null"
18
+ }
19
+ ]
20
+ },
21
+ "end_date": {
22
+ "anyOf": [
23
+ {
24
+ "type": "string",
25
+ "format": "date"
26
+ },
27
+ {
28
+ "type": "null"
29
+ }
30
+ ]
31
+ },
32
+ "position": {
33
+ "type": "string"
34
+ },
35
+ "uid": {
36
+ "type": "string",
37
+ "description": "a unique identifier given to the officership"
38
+ },
39
+ "other_attributes": {
40
+ "type": "object",
41
+ "properties": {
42
+ "date_of_birth": {
43
+ "type": "string",
44
+ "format": "date"
45
+ },
46
+ "nationality": {
47
+ "type": "string"
48
+ },
49
+ "person_uid": {
50
+ "type": "string",
51
+ "description": "a unique identifier given to the individual (as opposed to the officership)"
52
+ },
53
+ "address": {
54
+ "type": "string"
55
+ },
56
+ "type": {
57
+ "type": "string",
58
+ "enum": [
59
+ "Person",
60
+ "Company"
61
+ ],
62
+ "description": "The type of entity that is the officer (either 'Person' or 'Company')"
63
+ }
64
+ }
65
+ }
66
+ },
67
+ "required": [
68
+ "name"
69
+ ]
70
+ }
@@ -0,0 +1,58 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-04/schema#",
3
+ "description": "An entity which is a distinct organisation, but is not a company nor an individual. Examples include governments and governmental entities (e.g. Multilateral Devoelpment Banks, Government Departments, municipalities, etc), and also membership organisations",
4
+ "type": "object",
5
+ "properties": {
6
+ "name": {
7
+ "type": "string",
8
+ "description": "Name of the entity",
9
+ "minLength": 1
10
+ },
11
+ "jurisdiction": {
12
+ "type": "string",
13
+ "description": "Name of the jurisdiction in which the entity is incorporated/domiciled (use global for global entities, e.g. UN)"
14
+ },
15
+ "website": {
16
+ "type": "string",
17
+ "minLength": 5
18
+ },
19
+ "telephone_number": {
20
+ "type": "string",
21
+ "minLength": 4
22
+ },
23
+ "fax_number": {
24
+ "type": "string",
25
+ "minLength": 4
26
+ },
27
+ "registered_address": {
28
+ "$ref": "address.json"
29
+ },
30
+ "headquarters_address": {
31
+ "$ref": "address.json"
32
+ },
33
+ "mailing_address": {
34
+ "$ref": "address.json"
35
+ },
36
+ "industry_codes": {
37
+ "type": "array",
38
+ "items": {
39
+ "$ref": "industry_code.json"
40
+ }
41
+ },
42
+ "previous_names": {
43
+ "type": "array",
44
+ "items": {
45
+ "$ref": "previous_name.json"
46
+ }
47
+ },
48
+ "alternative_names": {
49
+ "type": "array",
50
+ "items": {
51
+ "$ref": "alternative_name.json"
52
+ }
53
+ }
54
+ },
55
+ "required": [
56
+ "name"
57
+ ]
58
+ }