turbot-runner-morph 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +15 -0
- data/bin/rspec +16 -0
- data/lib/turbot_runner.rb +28 -0
- data/lib/turbot_runner/base_handler.rb +15 -0
- data/lib/turbot_runner/exceptions.rb +4 -0
- data/lib/turbot_runner/prerun.rb +3 -0
- data/lib/turbot_runner/processor.rb +53 -0
- data/lib/turbot_runner/runner.rb +179 -0
- data/lib/turbot_runner/script_runner.rb +98 -0
- data/lib/turbot_runner/utils.rb +47 -0
- data/lib/turbot_runner/validator.rb +28 -0
- data/lib/turbot_runner/version.rb +3 -0
- data/schema/schemas/company-schema.json +243 -0
- data/schema/schemas/financial-payment-schema.json +32 -0
- data/schema/schemas/includes/address.json +53 -0
- data/schema/schemas/includes/alternative_name.json +36 -0
- data/schema/schemas/includes/company-for-nesting.json +245 -0
- data/schema/schemas/includes/company.json +25 -0
- data/schema/schemas/includes/entity.json +58 -0
- data/schema/schemas/includes/filing.json +52 -0
- data/schema/schemas/includes/financial-payment-data-object.json +112 -0
- data/schema/schemas/includes/identifier.json +20 -0
- data/schema/schemas/includes/industry_code.json +29 -0
- data/schema/schemas/includes/licence-data-object.json +63 -0
- data/schema/schemas/includes/officer.json +70 -0
- data/schema/schemas/includes/organisation.json +58 -0
- data/schema/schemas/includes/permission.json +46 -0
- data/schema/schemas/includes/person.json +62 -0
- data/schema/schemas/includes/person_name.json +71 -0
- data/schema/schemas/includes/previous_name.json +24 -0
- data/schema/schemas/includes/share-parcel-data.json +82 -0
- data/schema/schemas/includes/share-parcel.json +78 -0
- data/schema/schemas/includes/subsidiary-relationship-data.json +58 -0
- data/schema/schemas/includes/total-shares.json +17 -0
- data/schema/schemas/includes/unknown_entity_type.json +58 -0
- data/schema/schemas/licence-schema.json +105 -0
- data/schema/schemas/primary-data-schema.json +20 -0
- data/schema/schemas/share-parcel-schema.json +22 -0
- data/schema/schemas/simple-financial-payment-schema.json +122 -0
- data/schema/schemas/simple-licence-schema.json +82 -0
- data/schema/schemas/simple-subsidiary-schema.json +85 -0
- data/schema/schemas/subsidiary-relationship-schema.json +46 -0
- data/spec/bots/bot-that-crashes-immediately/manifest.json +15 -0
- data/spec/bots/bot-that-crashes-immediately/scraper.rb +1 -0
- data/spec/bots/bot-that-crashes-immediately/transformer1.rb +15 -0
- data/spec/bots/bot-that-crashes-in-scraper/manifest.json +15 -0
- data/spec/bots/bot-that-crashes-in-scraper/scraper.rb +11 -0
- data/spec/bots/bot-that-crashes-in-scraper/transformer1.rb +15 -0
- data/spec/bots/bot-that-crashes-in-transformer/manifest.json +20 -0
- data/spec/bots/bot-that-crashes-in-transformer/scraper.rb +10 -0
- data/spec/bots/bot-that-crashes-in-transformer/transformer1.rb +15 -0
- data/spec/bots/bot-that-crashes-in-transformer/transformer2.rb +17 -0
- data/spec/bots/bot-that-emits-run-ended/manifest.json +8 -0
- data/spec/bots/bot-that-emits-run-ended/scraper.rb +11 -0
- data/spec/bots/bot-that-expects-file/manifest.json +8 -0
- data/spec/bots/bot-that-expects-file/scraper.rb +11 -0
- data/spec/bots/bot-that-expects-file/something.txt +1 -0
- data/spec/bots/bot-with-invalid-data-type/manifest.json +8 -0
- data/spec/bots/bot-with-invalid-data-type/scraper.rb +10 -0
- data/spec/bots/bot-with-invalid-sample-date/manifest.json +8 -0
- data/spec/bots/bot-with-invalid-sample-date/scraper.rb +10 -0
- data/spec/bots/bot-with-pause/manifest.json +8 -0
- data/spec/bots/bot-with-pause/scraper.rb +16 -0
- data/spec/bots/bot-with-transformer/manifest.json +15 -0
- data/spec/bots/bot-with-transformer/scraper.rb +10 -0
- data/spec/bots/bot-with-transformer/transformer.rb +15 -0
- data/spec/bots/bot-with-transformers/manifest.json +20 -0
- data/spec/bots/bot-with-transformers/scraper.rb +10 -0
- data/spec/bots/bot-with-transformers/transformer1.rb +15 -0
- data/spec/bots/bot-with-transformers/transformer2.rb +15 -0
- data/spec/bots/invalid-json-bot/manifest.json +8 -0
- data/spec/bots/invalid-json-bot/scraper.rb +11 -0
- data/spec/bots/invalid-record-bot/manifest.json +8 -0
- data/spec/bots/invalid-record-bot/scraper.rb +11 -0
- data/spec/bots/logging-bot/manifest.json +8 -0
- data/spec/bots/logging-bot/scraper.rb +14 -0
- data/spec/bots/python-bot/manifest.json +8 -0
- data/spec/bots/python-bot/scraper.py +11 -0
- data/spec/bots/ruby-bot/manifest.json +8 -0
- data/spec/bots/ruby-bot/scraper.rb +10 -0
- data/spec/bots/slow-bot/manifest.json +8 -0
- data/spec/bots/slow-bot/scraper.rb +11 -0
- data/spec/lib/processor_spec.rb +181 -0
- data/spec/lib/runner_spec.rb +330 -0
- data/spec/lib/utils_spec.rb +23 -0
- data/spec/lib/validator_spec.rb +89 -0
- data/spec/manual_spec.rb +57 -0
- data/spec/outputs/full-scraper.out +10 -0
- data/spec/outputs/full-transformer.out +10 -0
- data/spec/outputs/truncated-scraper.out +5 -0
- data/spec/spec_helper.rb +20 -0
- metadata +148 -0
@@ -0,0 +1,46 @@
|
|
1
|
+
{
|
2
|
+
"$schema": "http://json-schema.org/draft-04/schema#",
|
3
|
+
"oneOf": [
|
4
|
+
{
|
5
|
+
"type": [
|
6
|
+
"string",
|
7
|
+
"null"
|
8
|
+
],
|
9
|
+
"minLength": 2
|
10
|
+
},
|
11
|
+
{
|
12
|
+
"name": "Permission",
|
13
|
+
"description": "A permission issued by a government or regulatory body to an entity to do something. This may be fine grained,e.g. to sell liquor, to accept customer deposits, or broader, e.g to operate as a bank, a restaurant. It may also be permission to do something physical, such as explore for oil in a given area, or mine for iron ore",
|
14
|
+
"type": "object",
|
15
|
+
"properties": {
|
16
|
+
"activity_name": {
|
17
|
+
"type": "string"
|
18
|
+
},
|
19
|
+
"activity_id": {
|
20
|
+
"type": "string"
|
21
|
+
},
|
22
|
+
"permission_type": {
|
23
|
+
"enum": [
|
24
|
+
"operating",
|
25
|
+
"exploration",
|
26
|
+
"exploitation"
|
27
|
+
],
|
28
|
+
"description": "types of permission, such as 'operating', 'exploration', 'exploitation'"
|
29
|
+
},
|
30
|
+
"restrictions": {
|
31
|
+
"type": "array",
|
32
|
+
"description": "conditions or restrictions on the permissions",
|
33
|
+
"items": {
|
34
|
+
"type": "string"
|
35
|
+
}
|
36
|
+
},
|
37
|
+
"other_attributes": {
|
38
|
+
"type": "object"
|
39
|
+
}
|
40
|
+
},
|
41
|
+
"required": [
|
42
|
+
"activity_name"
|
43
|
+
]
|
44
|
+
}
|
45
|
+
]
|
46
|
+
}
|
@@ -0,0 +1,62 @@
|
|
1
|
+
{
|
2
|
+
"$schema": "http://json-schema.org/draft-04/schema#",
|
3
|
+
"description": "A person, for example, referenced in some other context, e.g. director of a company, shareholder, licence-holder, lobbyist. This should be used only if the person is the subject of the datum",
|
4
|
+
"type": "object",
|
5
|
+
"properties": {
|
6
|
+
"name": {
|
7
|
+
"$ref": "person_name.json"
|
8
|
+
},
|
9
|
+
"jurisdiction": {
|
10
|
+
"type": "string",
|
11
|
+
"description": "Name of the jurisdiction in which the entity is based"
|
12
|
+
},
|
13
|
+
"company": {
|
14
|
+
"description": "Company the person is representing. NB If the licence holder is a company, and the individual is a just a contact, then use the company-schema for the entity. This is where the individual themselves are the licence holder",
|
15
|
+
"$ref": "company-for-nesting.json"
|
16
|
+
},
|
17
|
+
"relationship_with_company": {
|
18
|
+
"enum": [
|
19
|
+
"employee",
|
20
|
+
"director",
|
21
|
+
"shareholder",
|
22
|
+
null
|
23
|
+
]
|
24
|
+
},
|
25
|
+
"website": {
|
26
|
+
"type": "string",
|
27
|
+
"minLength": 5
|
28
|
+
},
|
29
|
+
"telephone_number": {
|
30
|
+
"type": "string",
|
31
|
+
"minLength": 4
|
32
|
+
},
|
33
|
+
"fax_number": {
|
34
|
+
"type": "string",
|
35
|
+
"minLength": 4
|
36
|
+
},
|
37
|
+
"registered_address": {
|
38
|
+
"$ref": "address.json"
|
39
|
+
},
|
40
|
+
"headquarters_address": {
|
41
|
+
"$ref": "address.json"
|
42
|
+
},
|
43
|
+
"mailing_address": {
|
44
|
+
"$ref": "address.json"
|
45
|
+
},
|
46
|
+
"industry_codes": {
|
47
|
+
"type": "array",
|
48
|
+
"items": {
|
49
|
+
"$ref": "industry_code.json"
|
50
|
+
}
|
51
|
+
},
|
52
|
+
"alternative_names": {
|
53
|
+
"type": "array",
|
54
|
+
"items": {
|
55
|
+
"$ref": "alternative_name.json"
|
56
|
+
}
|
57
|
+
}
|
58
|
+
},
|
59
|
+
"required": [
|
60
|
+
"name"
|
61
|
+
]
|
62
|
+
}
|
@@ -0,0 +1,71 @@
|
|
1
|
+
{
|
2
|
+
"$schema": "http://json-schema.org/draft-04/schema#",
|
3
|
+
"oneOf": [
|
4
|
+
{
|
5
|
+
"type": [
|
6
|
+
"string",
|
7
|
+
"null"
|
8
|
+
],
|
9
|
+
"format": "non-blank"
|
10
|
+
},
|
11
|
+
{
|
12
|
+
"description": "The name of a person as an object",
|
13
|
+
"type": "object",
|
14
|
+
"properties": {
|
15
|
+
"given_name": {
|
16
|
+
"type": [
|
17
|
+
"string",
|
18
|
+
"null"
|
19
|
+
],
|
20
|
+
"format": "non-blank",
|
21
|
+
"description": "The given name (often first name) of a person, as opposed to their family name. Following FOAF practice, this is preferred to first_name"
|
22
|
+
},
|
23
|
+
"family_name": {
|
24
|
+
"type": [
|
25
|
+
"string",
|
26
|
+
"null"
|
27
|
+
],
|
28
|
+
"format": "non-blank",
|
29
|
+
"description": "The family name (often last name) of a person, as opposed to their family name. Following FOAF practice, this is preferred to last_name"
|
30
|
+
},
|
31
|
+
"first_name": {
|
32
|
+
"type": [
|
33
|
+
"string",
|
34
|
+
"null"
|
35
|
+
],
|
36
|
+
"format": "non-blank"
|
37
|
+
},
|
38
|
+
"middle_name": {
|
39
|
+
"type": [
|
40
|
+
"string",
|
41
|
+
"null"
|
42
|
+
],
|
43
|
+
"format": "non-blank"
|
44
|
+
},
|
45
|
+
"last_name": {
|
46
|
+
"type": [
|
47
|
+
"string",
|
48
|
+
"null"
|
49
|
+
],
|
50
|
+
"format": "non-blank"
|
51
|
+
},
|
52
|
+
"title": {
|
53
|
+
"type": [
|
54
|
+
"string",
|
55
|
+
"null"
|
56
|
+
],
|
57
|
+
"format": "non-blank",
|
58
|
+
"description": "Title such as Mr, Ms, Dr etc"
|
59
|
+
},
|
60
|
+
"suffixes": {
|
61
|
+
"type": [
|
62
|
+
"string",
|
63
|
+
"null"
|
64
|
+
],
|
65
|
+
"format": "non-blank",
|
66
|
+
"description": "Any suffixes, including degrees, honours (OBE), ordinals (John Smith Jr, Bill Jones II) etc"
|
67
|
+
}
|
68
|
+
}
|
69
|
+
}
|
70
|
+
]
|
71
|
+
}
|
@@ -0,0 +1,24 @@
|
|
1
|
+
{
|
2
|
+
"$schema": "http://json-schema.org/draft-04/schema#",
|
3
|
+
"name": "PreviousName",
|
4
|
+
"description": "A previous name of a company",
|
5
|
+
"type": "object",
|
6
|
+
"properties": {
|
7
|
+
"company_name": {
|
8
|
+
"type": "string",
|
9
|
+
"minLength": 1
|
10
|
+
},
|
11
|
+
"con_date": {
|
12
|
+
"type": "string",
|
13
|
+
"format": "date",
|
14
|
+
"description": "The end (conversion) date of the name"
|
15
|
+
},
|
16
|
+
"start_date": {
|
17
|
+
"type": "string",
|
18
|
+
"format": "date"
|
19
|
+
}
|
20
|
+
},
|
21
|
+
"required": [
|
22
|
+
"company_name"
|
23
|
+
]
|
24
|
+
}
|
@@ -0,0 +1,82 @@
|
|
1
|
+
{
|
2
|
+
"$schema": "http://json-schema.org/draft-04/schema#",
|
3
|
+
"type": "object",
|
4
|
+
"properties": {
|
5
|
+
"data_type": {
|
6
|
+
"enum": [
|
7
|
+
"share_parcel"
|
8
|
+
]
|
9
|
+
},
|
10
|
+
"properties": {
|
11
|
+
"type": "object",
|
12
|
+
"required": [
|
13
|
+
"shareholders"
|
14
|
+
],
|
15
|
+
"properties": {
|
16
|
+
"number_of_shares": {
|
17
|
+
"description": "Number of shares, if known",
|
18
|
+
"type": "integer"
|
19
|
+
},
|
20
|
+
"percentage_of_shares": {
|
21
|
+
"description": "Percentage of shares, if known",
|
22
|
+
"type": "number",
|
23
|
+
"maximum": 100,
|
24
|
+
"minimum": 0
|
25
|
+
},
|
26
|
+
"shareholders": {
|
27
|
+
"description": "Legal persons who own this share parcel",
|
28
|
+
"type": "array",
|
29
|
+
"minItems": 1,
|
30
|
+
"items": {
|
31
|
+
"anyOf": [
|
32
|
+
{
|
33
|
+
"type": "object",
|
34
|
+
"name": "person",
|
35
|
+
"required": [
|
36
|
+
"name"
|
37
|
+
],
|
38
|
+
"properties": {
|
39
|
+
"name": {
|
40
|
+
"description": "Name of natural person or company",
|
41
|
+
"type": "string"
|
42
|
+
},
|
43
|
+
"jurisdiction": {
|
44
|
+
"description": "Jurisdiction of registration, if company",
|
45
|
+
"type": "string"
|
46
|
+
},
|
47
|
+
"company_number": {
|
48
|
+
"description": "Company number, if company and known",
|
49
|
+
"type": "string"
|
50
|
+
},
|
51
|
+
"identifier": {
|
52
|
+
"description": "Unique identifier of person",
|
53
|
+
"type": "string"
|
54
|
+
},
|
55
|
+
"type": {
|
56
|
+
"description": "Type of person if known (company or natural person)",
|
57
|
+
"enum": [
|
58
|
+
"Company",
|
59
|
+
"Person"
|
60
|
+
]
|
61
|
+
},
|
62
|
+
"address": {
|
63
|
+
"description": "Address given for owner of parcel",
|
64
|
+
"type": "string"
|
65
|
+
},
|
66
|
+
"address_country": {
|
67
|
+
"description": "Country part of owner's address",
|
68
|
+
"type": "string"
|
69
|
+
}
|
70
|
+
}
|
71
|
+
}
|
72
|
+
]
|
73
|
+
}
|
74
|
+
}
|
75
|
+
}
|
76
|
+
}
|
77
|
+
},
|
78
|
+
"required": [
|
79
|
+
"data_type",
|
80
|
+
"properties"
|
81
|
+
]
|
82
|
+
}
|
@@ -0,0 +1,78 @@
|
|
1
|
+
{
|
2
|
+
"$schema": "http://json-schema.org/draft-04/schema#",
|
3
|
+
"type": "object",
|
4
|
+
"description": "A parcel of shares in a company",
|
5
|
+
"required": [
|
6
|
+
|
7
|
+
],
|
8
|
+
"properties": {
|
9
|
+
"number_of_shares": {
|
10
|
+
"description": "Number of shares, if known",
|
11
|
+
"type": "integer"
|
12
|
+
},
|
13
|
+
"percentage_of_shares": {
|
14
|
+
"description": "Percentage of shares, if known",
|
15
|
+
"type": "number",
|
16
|
+
"maximum": 100,
|
17
|
+
"minimum": 0
|
18
|
+
},
|
19
|
+
"start_date": {
|
20
|
+
"type": "string",
|
21
|
+
"format": "date"
|
22
|
+
},
|
23
|
+
"end_date": {
|
24
|
+
"type": "string",
|
25
|
+
"format": "date"
|
26
|
+
},
|
27
|
+
"sample_date": {
|
28
|
+
"type": "string",
|
29
|
+
"format": "date"
|
30
|
+
},
|
31
|
+
"shareholders": {
|
32
|
+
"description": "Legal persons who own this share parcel",
|
33
|
+
"type": "array",
|
34
|
+
"minItems": 1,
|
35
|
+
"items": {
|
36
|
+
"type": "object",
|
37
|
+
"name": "shareholder",
|
38
|
+
"required": [
|
39
|
+
"name"
|
40
|
+
],
|
41
|
+
"properties": {
|
42
|
+
"name": {
|
43
|
+
"description": "Name of natural person or company",
|
44
|
+
"type": "string",
|
45
|
+
"minLength": 1
|
46
|
+
},
|
47
|
+
"jurisdiction": {
|
48
|
+
"description": "Jurisdiction of registration, if company",
|
49
|
+
"type": "string"
|
50
|
+
},
|
51
|
+
"company_number": {
|
52
|
+
"description": "Company number, if company and known",
|
53
|
+
"type": "string"
|
54
|
+
},
|
55
|
+
"identifier": {
|
56
|
+
"description": "Unique identifier of person",
|
57
|
+
"type": "string"
|
58
|
+
},
|
59
|
+
"type": {
|
60
|
+
"description": "Type of person if known (company or natural person)",
|
61
|
+
"enum": [
|
62
|
+
"Company",
|
63
|
+
"Person"
|
64
|
+
]
|
65
|
+
},
|
66
|
+
"address": {
|
67
|
+
"description": "Address given for owner of parcel",
|
68
|
+
"type": "string"
|
69
|
+
},
|
70
|
+
"address_country": {
|
71
|
+
"description": "Country part of owner's address",
|
72
|
+
"type": "string"
|
73
|
+
}
|
74
|
+
}
|
75
|
+
}
|
76
|
+
}
|
77
|
+
}
|
78
|
+
}
|
@@ -0,0 +1,58 @@
|
|
1
|
+
{
|
2
|
+
"$schema": "http://json-schema.org/draft-04/schema#",
|
3
|
+
"type": "object",
|
4
|
+
"properties": {
|
5
|
+
"data_type": {
|
6
|
+
"enum": [
|
7
|
+
"subsidiary_relationship"
|
8
|
+
]
|
9
|
+
},
|
10
|
+
"properties": {
|
11
|
+
"type": "object",
|
12
|
+
"required": [
|
13
|
+
"subsidiary"
|
14
|
+
],
|
15
|
+
"additionalProperties": false,
|
16
|
+
"properties": {
|
17
|
+
"direct": {
|
18
|
+
"description": "If the control is direct (if via an intermediary, this value should be false; if unknown, left blank)",
|
19
|
+
"type": "boolean"
|
20
|
+
},
|
21
|
+
"percentage_controlled": {
|
22
|
+
"description": "Percentage controlled, either directly or indirectly",
|
23
|
+
"type": "string"
|
24
|
+
},
|
25
|
+
"significant": {
|
26
|
+
"description": "Does the source define the control as somehow significant?",
|
27
|
+
"type": "boolean"
|
28
|
+
},
|
29
|
+
"subsidiary": {
|
30
|
+
"description": "Company that is controlled",
|
31
|
+
"type": "object",
|
32
|
+
"required": [
|
33
|
+
"name"
|
34
|
+
],
|
35
|
+
"additionalProperties": false,
|
36
|
+
"properties": {
|
37
|
+
"name": {
|
38
|
+
"description": "Name of company",
|
39
|
+
"type": "string"
|
40
|
+
},
|
41
|
+
"jurisdiction": {
|
42
|
+
"description": "Jurisdiction of registration",
|
43
|
+
"type": "string"
|
44
|
+
},
|
45
|
+
"company_number": {
|
46
|
+
"description": "Company number, if company and known",
|
47
|
+
"type": "string"
|
48
|
+
},
|
49
|
+
"identifier": {
|
50
|
+
"description": "Unique identifier of company",
|
51
|
+
"type": "string"
|
52
|
+
}
|
53
|
+
}
|
54
|
+
}
|
55
|
+
}
|
56
|
+
}
|
57
|
+
}
|
58
|
+
}
|
@@ -0,0 +1,17 @@
|
|
1
|
+
{
|
2
|
+
"$schema": "http://json-schema.org/draft-04/schema#",
|
3
|
+
"description": "The total number of shares a company has issued",
|
4
|
+
"type": "object",
|
5
|
+
"properties": {
|
6
|
+
"number": {
|
7
|
+
"type": "integer"
|
8
|
+
},
|
9
|
+
"share_class": {
|
10
|
+
"type": "string",
|
11
|
+
"minLength": 1
|
12
|
+
}
|
13
|
+
},
|
14
|
+
"required": [
|
15
|
+
"number"
|
16
|
+
]
|
17
|
+
}
|