turbot-runner-morph 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +15 -0
  2. data/bin/rspec +16 -0
  3. data/lib/turbot_runner.rb +28 -0
  4. data/lib/turbot_runner/base_handler.rb +15 -0
  5. data/lib/turbot_runner/exceptions.rb +4 -0
  6. data/lib/turbot_runner/prerun.rb +3 -0
  7. data/lib/turbot_runner/processor.rb +53 -0
  8. data/lib/turbot_runner/runner.rb +179 -0
  9. data/lib/turbot_runner/script_runner.rb +98 -0
  10. data/lib/turbot_runner/utils.rb +47 -0
  11. data/lib/turbot_runner/validator.rb +28 -0
  12. data/lib/turbot_runner/version.rb +3 -0
  13. data/schema/schemas/company-schema.json +243 -0
  14. data/schema/schemas/financial-payment-schema.json +32 -0
  15. data/schema/schemas/includes/address.json +53 -0
  16. data/schema/schemas/includes/alternative_name.json +36 -0
  17. data/schema/schemas/includes/company-for-nesting.json +245 -0
  18. data/schema/schemas/includes/company.json +25 -0
  19. data/schema/schemas/includes/entity.json +58 -0
  20. data/schema/schemas/includes/filing.json +52 -0
  21. data/schema/schemas/includes/financial-payment-data-object.json +112 -0
  22. data/schema/schemas/includes/identifier.json +20 -0
  23. data/schema/schemas/includes/industry_code.json +29 -0
  24. data/schema/schemas/includes/licence-data-object.json +63 -0
  25. data/schema/schemas/includes/officer.json +70 -0
  26. data/schema/schemas/includes/organisation.json +58 -0
  27. data/schema/schemas/includes/permission.json +46 -0
  28. data/schema/schemas/includes/person.json +62 -0
  29. data/schema/schemas/includes/person_name.json +71 -0
  30. data/schema/schemas/includes/previous_name.json +24 -0
  31. data/schema/schemas/includes/share-parcel-data.json +82 -0
  32. data/schema/schemas/includes/share-parcel.json +78 -0
  33. data/schema/schemas/includes/subsidiary-relationship-data.json +58 -0
  34. data/schema/schemas/includes/total-shares.json +17 -0
  35. data/schema/schemas/includes/unknown_entity_type.json +58 -0
  36. data/schema/schemas/licence-schema.json +105 -0
  37. data/schema/schemas/primary-data-schema.json +20 -0
  38. data/schema/schemas/share-parcel-schema.json +22 -0
  39. data/schema/schemas/simple-financial-payment-schema.json +122 -0
  40. data/schema/schemas/simple-licence-schema.json +82 -0
  41. data/schema/schemas/simple-subsidiary-schema.json +85 -0
  42. data/schema/schemas/subsidiary-relationship-schema.json +46 -0
  43. data/spec/bots/bot-that-crashes-immediately/manifest.json +15 -0
  44. data/spec/bots/bot-that-crashes-immediately/scraper.rb +1 -0
  45. data/spec/bots/bot-that-crashes-immediately/transformer1.rb +15 -0
  46. data/spec/bots/bot-that-crashes-in-scraper/manifest.json +15 -0
  47. data/spec/bots/bot-that-crashes-in-scraper/scraper.rb +11 -0
  48. data/spec/bots/bot-that-crashes-in-scraper/transformer1.rb +15 -0
  49. data/spec/bots/bot-that-crashes-in-transformer/manifest.json +20 -0
  50. data/spec/bots/bot-that-crashes-in-transformer/scraper.rb +10 -0
  51. data/spec/bots/bot-that-crashes-in-transformer/transformer1.rb +15 -0
  52. data/spec/bots/bot-that-crashes-in-transformer/transformer2.rb +17 -0
  53. data/spec/bots/bot-that-emits-run-ended/manifest.json +8 -0
  54. data/spec/bots/bot-that-emits-run-ended/scraper.rb +11 -0
  55. data/spec/bots/bot-that-expects-file/manifest.json +8 -0
  56. data/spec/bots/bot-that-expects-file/scraper.rb +11 -0
  57. data/spec/bots/bot-that-expects-file/something.txt +1 -0
  58. data/spec/bots/bot-with-invalid-data-type/manifest.json +8 -0
  59. data/spec/bots/bot-with-invalid-data-type/scraper.rb +10 -0
  60. data/spec/bots/bot-with-invalid-sample-date/manifest.json +8 -0
  61. data/spec/bots/bot-with-invalid-sample-date/scraper.rb +10 -0
  62. data/spec/bots/bot-with-pause/manifest.json +8 -0
  63. data/spec/bots/bot-with-pause/scraper.rb +16 -0
  64. data/spec/bots/bot-with-transformer/manifest.json +15 -0
  65. data/spec/bots/bot-with-transformer/scraper.rb +10 -0
  66. data/spec/bots/bot-with-transformer/transformer.rb +15 -0
  67. data/spec/bots/bot-with-transformers/manifest.json +20 -0
  68. data/spec/bots/bot-with-transformers/scraper.rb +10 -0
  69. data/spec/bots/bot-with-transformers/transformer1.rb +15 -0
  70. data/spec/bots/bot-with-transformers/transformer2.rb +15 -0
  71. data/spec/bots/invalid-json-bot/manifest.json +8 -0
  72. data/spec/bots/invalid-json-bot/scraper.rb +11 -0
  73. data/spec/bots/invalid-record-bot/manifest.json +8 -0
  74. data/spec/bots/invalid-record-bot/scraper.rb +11 -0
  75. data/spec/bots/logging-bot/manifest.json +8 -0
  76. data/spec/bots/logging-bot/scraper.rb +14 -0
  77. data/spec/bots/python-bot/manifest.json +8 -0
  78. data/spec/bots/python-bot/scraper.py +11 -0
  79. data/spec/bots/ruby-bot/manifest.json +8 -0
  80. data/spec/bots/ruby-bot/scraper.rb +10 -0
  81. data/spec/bots/slow-bot/manifest.json +8 -0
  82. data/spec/bots/slow-bot/scraper.rb +11 -0
  83. data/spec/lib/processor_spec.rb +181 -0
  84. data/spec/lib/runner_spec.rb +330 -0
  85. data/spec/lib/utils_spec.rb +23 -0
  86. data/spec/lib/validator_spec.rb +89 -0
  87. data/spec/manual_spec.rb +57 -0
  88. data/spec/outputs/full-scraper.out +10 -0
  89. data/spec/outputs/full-transformer.out +10 -0
  90. data/spec/outputs/truncated-scraper.out +5 -0
  91. data/spec/spec_helper.rb +20 -0
  92. metadata +148 -0
@@ -0,0 +1,46 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-04/schema#",
3
+ "oneOf": [
4
+ {
5
+ "type": [
6
+ "string",
7
+ "null"
8
+ ],
9
+ "minLength": 2
10
+ },
11
+ {
12
+ "name": "Permission",
13
+ "description": "A permission issued by a government or regulatory body to an entity to do something. This may be fine grained,e.g. to sell liquor, to accept customer deposits, or broader, e.g to operate as a bank, a restaurant. It may also be permission to do something physical, such as explore for oil in a given area, or mine for iron ore",
14
+ "type": "object",
15
+ "properties": {
16
+ "activity_name": {
17
+ "type": "string"
18
+ },
19
+ "activity_id": {
20
+ "type": "string"
21
+ },
22
+ "permission_type": {
23
+ "enum": [
24
+ "operating",
25
+ "exploration",
26
+ "exploitation"
27
+ ],
28
+ "description": "types of permission, such as 'operating', 'exploration', 'exploitation'"
29
+ },
30
+ "restrictions": {
31
+ "type": "array",
32
+ "description": "conditions or restrictions on the permissions",
33
+ "items": {
34
+ "type": "string"
35
+ }
36
+ },
37
+ "other_attributes": {
38
+ "type": "object"
39
+ }
40
+ },
41
+ "required": [
42
+ "activity_name"
43
+ ]
44
+ }
45
+ ]
46
+ }
@@ -0,0 +1,62 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-04/schema#",
3
+ "description": "A person, for example, referenced in some other context, e.g. director of a company, shareholder, licence-holder, lobbyist. This should be used only if the person is the subject of the datum",
4
+ "type": "object",
5
+ "properties": {
6
+ "name": {
7
+ "$ref": "person_name.json"
8
+ },
9
+ "jurisdiction": {
10
+ "type": "string",
11
+ "description": "Name of the jurisdiction in which the entity is based"
12
+ },
13
+ "company": {
14
+ "description": "Company the person is representing. NB If the licence holder is a company, and the individual is a just a contact, then use the company-schema for the entity. This is where the individual themselves are the licence holder",
15
+ "$ref": "company-for-nesting.json"
16
+ },
17
+ "relationship_with_company": {
18
+ "enum": [
19
+ "employee",
20
+ "director",
21
+ "shareholder",
22
+ null
23
+ ]
24
+ },
25
+ "website": {
26
+ "type": "string",
27
+ "minLength": 5
28
+ },
29
+ "telephone_number": {
30
+ "type": "string",
31
+ "minLength": 4
32
+ },
33
+ "fax_number": {
34
+ "type": "string",
35
+ "minLength": 4
36
+ },
37
+ "registered_address": {
38
+ "$ref": "address.json"
39
+ },
40
+ "headquarters_address": {
41
+ "$ref": "address.json"
42
+ },
43
+ "mailing_address": {
44
+ "$ref": "address.json"
45
+ },
46
+ "industry_codes": {
47
+ "type": "array",
48
+ "items": {
49
+ "$ref": "industry_code.json"
50
+ }
51
+ },
52
+ "alternative_names": {
53
+ "type": "array",
54
+ "items": {
55
+ "$ref": "alternative_name.json"
56
+ }
57
+ }
58
+ },
59
+ "required": [
60
+ "name"
61
+ ]
62
+ }
@@ -0,0 +1,71 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-04/schema#",
3
+ "oneOf": [
4
+ {
5
+ "type": [
6
+ "string",
7
+ "null"
8
+ ],
9
+ "format": "non-blank"
10
+ },
11
+ {
12
+ "description": "The name of a person as an object",
13
+ "type": "object",
14
+ "properties": {
15
+ "given_name": {
16
+ "type": [
17
+ "string",
18
+ "null"
19
+ ],
20
+ "format": "non-blank",
21
+ "description": "The given name (often first name) of a person, as opposed to their family name. Following FOAF practice, this is preferred to first_name"
22
+ },
23
+ "family_name": {
24
+ "type": [
25
+ "string",
26
+ "null"
27
+ ],
28
+ "format": "non-blank",
29
+ "description": "The family name (often last name) of a person, as opposed to their family name. Following FOAF practice, this is preferred to last_name"
30
+ },
31
+ "first_name": {
32
+ "type": [
33
+ "string",
34
+ "null"
35
+ ],
36
+ "format": "non-blank"
37
+ },
38
+ "middle_name": {
39
+ "type": [
40
+ "string",
41
+ "null"
42
+ ],
43
+ "format": "non-blank"
44
+ },
45
+ "last_name": {
46
+ "type": [
47
+ "string",
48
+ "null"
49
+ ],
50
+ "format": "non-blank"
51
+ },
52
+ "title": {
53
+ "type": [
54
+ "string",
55
+ "null"
56
+ ],
57
+ "format": "non-blank",
58
+ "description": "Title such as Mr, Ms, Dr etc"
59
+ },
60
+ "suffixes": {
61
+ "type": [
62
+ "string",
63
+ "null"
64
+ ],
65
+ "format": "non-blank",
66
+ "description": "Any suffixes, including degrees, honours (OBE), ordinals (John Smith Jr, Bill Jones II) etc"
67
+ }
68
+ }
69
+ }
70
+ ]
71
+ }
@@ -0,0 +1,24 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-04/schema#",
3
+ "name": "PreviousName",
4
+ "description": "A previous name of a company",
5
+ "type": "object",
6
+ "properties": {
7
+ "company_name": {
8
+ "type": "string",
9
+ "minLength": 1
10
+ },
11
+ "con_date": {
12
+ "type": "string",
13
+ "format": "date",
14
+ "description": "The end (conversion) date of the name"
15
+ },
16
+ "start_date": {
17
+ "type": "string",
18
+ "format": "date"
19
+ }
20
+ },
21
+ "required": [
22
+ "company_name"
23
+ ]
24
+ }
@@ -0,0 +1,82 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-04/schema#",
3
+ "type": "object",
4
+ "properties": {
5
+ "data_type": {
6
+ "enum": [
7
+ "share_parcel"
8
+ ]
9
+ },
10
+ "properties": {
11
+ "type": "object",
12
+ "required": [
13
+ "shareholders"
14
+ ],
15
+ "properties": {
16
+ "number_of_shares": {
17
+ "description": "Number of shares, if known",
18
+ "type": "integer"
19
+ },
20
+ "percentage_of_shares": {
21
+ "description": "Percentage of shares, if known",
22
+ "type": "number",
23
+ "maximum": 100,
24
+ "minimum": 0
25
+ },
26
+ "shareholders": {
27
+ "description": "Legal persons who own this share parcel",
28
+ "type": "array",
29
+ "minItems": 1,
30
+ "items": {
31
+ "anyOf": [
32
+ {
33
+ "type": "object",
34
+ "name": "person",
35
+ "required": [
36
+ "name"
37
+ ],
38
+ "properties": {
39
+ "name": {
40
+ "description": "Name of natural person or company",
41
+ "type": "string"
42
+ },
43
+ "jurisdiction": {
44
+ "description": "Jurisdiction of registration, if company",
45
+ "type": "string"
46
+ },
47
+ "company_number": {
48
+ "description": "Company number, if company and known",
49
+ "type": "string"
50
+ },
51
+ "identifier": {
52
+ "description": "Unique identifier of person",
53
+ "type": "string"
54
+ },
55
+ "type": {
56
+ "description": "Type of person if known (company or natural person)",
57
+ "enum": [
58
+ "Company",
59
+ "Person"
60
+ ]
61
+ },
62
+ "address": {
63
+ "description": "Address given for owner of parcel",
64
+ "type": "string"
65
+ },
66
+ "address_country": {
67
+ "description": "Country part of owner's address",
68
+ "type": "string"
69
+ }
70
+ }
71
+ }
72
+ ]
73
+ }
74
+ }
75
+ }
76
+ }
77
+ },
78
+ "required": [
79
+ "data_type",
80
+ "properties"
81
+ ]
82
+ }
@@ -0,0 +1,78 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-04/schema#",
3
+ "type": "object",
4
+ "description": "A parcel of shares in a company",
5
+ "required": [
6
+
7
+ ],
8
+ "properties": {
9
+ "number_of_shares": {
10
+ "description": "Number of shares, if known",
11
+ "type": "integer"
12
+ },
13
+ "percentage_of_shares": {
14
+ "description": "Percentage of shares, if known",
15
+ "type": "number",
16
+ "maximum": 100,
17
+ "minimum": 0
18
+ },
19
+ "start_date": {
20
+ "type": "string",
21
+ "format": "date"
22
+ },
23
+ "end_date": {
24
+ "type": "string",
25
+ "format": "date"
26
+ },
27
+ "sample_date": {
28
+ "type": "string",
29
+ "format": "date"
30
+ },
31
+ "shareholders": {
32
+ "description": "Legal persons who own this share parcel",
33
+ "type": "array",
34
+ "minItems": 1,
35
+ "items": {
36
+ "type": "object",
37
+ "name": "shareholder",
38
+ "required": [
39
+ "name"
40
+ ],
41
+ "properties": {
42
+ "name": {
43
+ "description": "Name of natural person or company",
44
+ "type": "string",
45
+ "minLength": 1
46
+ },
47
+ "jurisdiction": {
48
+ "description": "Jurisdiction of registration, if company",
49
+ "type": "string"
50
+ },
51
+ "company_number": {
52
+ "description": "Company number, if company and known",
53
+ "type": "string"
54
+ },
55
+ "identifier": {
56
+ "description": "Unique identifier of person",
57
+ "type": "string"
58
+ },
59
+ "type": {
60
+ "description": "Type of person if known (company or natural person)",
61
+ "enum": [
62
+ "Company",
63
+ "Person"
64
+ ]
65
+ },
66
+ "address": {
67
+ "description": "Address given for owner of parcel",
68
+ "type": "string"
69
+ },
70
+ "address_country": {
71
+ "description": "Country part of owner's address",
72
+ "type": "string"
73
+ }
74
+ }
75
+ }
76
+ }
77
+ }
78
+ }
@@ -0,0 +1,58 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-04/schema#",
3
+ "type": "object",
4
+ "properties": {
5
+ "data_type": {
6
+ "enum": [
7
+ "subsidiary_relationship"
8
+ ]
9
+ },
10
+ "properties": {
11
+ "type": "object",
12
+ "required": [
13
+ "subsidiary"
14
+ ],
15
+ "additionalProperties": false,
16
+ "properties": {
17
+ "direct": {
18
+ "description": "If the control is direct (if via an intermediary, this value should be false; if unknown, left blank)",
19
+ "type": "boolean"
20
+ },
21
+ "percentage_controlled": {
22
+ "description": "Percentage controlled, either directly or indirectly",
23
+ "type": "string"
24
+ },
25
+ "significant": {
26
+ "description": "Does the source define the control as somehow significant?",
27
+ "type": "boolean"
28
+ },
29
+ "subsidiary": {
30
+ "description": "Company that is controlled",
31
+ "type": "object",
32
+ "required": [
33
+ "name"
34
+ ],
35
+ "additionalProperties": false,
36
+ "properties": {
37
+ "name": {
38
+ "description": "Name of company",
39
+ "type": "string"
40
+ },
41
+ "jurisdiction": {
42
+ "description": "Jurisdiction of registration",
43
+ "type": "string"
44
+ },
45
+ "company_number": {
46
+ "description": "Company number, if company and known",
47
+ "type": "string"
48
+ },
49
+ "identifier": {
50
+ "description": "Unique identifier of company",
51
+ "type": "string"
52
+ }
53
+ }
54
+ }
55
+ }
56
+ }
57
+ }
58
+ }
@@ -0,0 +1,17 @@
1
+ {
2
+ "$schema": "http://json-schema.org/draft-04/schema#",
3
+ "description": "The total number of shares a company has issued",
4
+ "type": "object",
5
+ "properties": {
6
+ "number": {
7
+ "type": "integer"
8
+ },
9
+ "share_class": {
10
+ "type": "string",
11
+ "minLength": 1
12
+ }
13
+ },
14
+ "required": [
15
+ "number"
16
+ ]
17
+ }