followthemoney 1.3.7__py3-none-any.whl → 3.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- followthemoney/__init__.py +5 -3
- followthemoney/cli/__init__.py +17 -0
- followthemoney/cli/aggregate.py +56 -0
- followthemoney/cli/cli.py +88 -0
- followthemoney/cli/exports.py +121 -0
- followthemoney/cli/mapping.py +85 -0
- followthemoney/cli/sieve.py +67 -0
- followthemoney/cli/util.py +142 -0
- followthemoney/compare.py +130 -60
- followthemoney/exc.py +19 -6
- followthemoney/export/common.py +29 -0
- followthemoney/export/csv.py +82 -0
- followthemoney/export/excel.py +75 -0
- followthemoney/export/graph.py +79 -0
- followthemoney/export/neo4j.py +182 -0
- followthemoney/export/rdf.py +26 -0
- followthemoney/graph.py +308 -0
- followthemoney/helpers.py +212 -0
- followthemoney/mapping/__init__.py +1 -1
- followthemoney/mapping/csv.py +67 -35
- followthemoney/mapping/entity.py +116 -44
- followthemoney/mapping/property.py +90 -44
- followthemoney/mapping/query.py +27 -19
- followthemoney/mapping/source.py +15 -5
- followthemoney/mapping/sql.py +75 -61
- followthemoney/messages.py +13 -7
- followthemoney/model.py +108 -56
- followthemoney/namespace.py +119 -0
- followthemoney/offshore.py +48 -0
- followthemoney/ontology.py +77 -0
- followthemoney/property.py +204 -71
- followthemoney/proxy.py +455 -118
- followthemoney/rdf.py +9 -0
- followthemoney/schema/Address.yaml +78 -0
- followthemoney/schema/Airplane.yaml +17 -10
- followthemoney/schema/Analyzable.yaml +54 -0
- followthemoney/schema/Article.yaml +16 -0
- followthemoney/schema/Assessment.yaml +32 -0
- followthemoney/schema/Asset.yaml +10 -4
- followthemoney/schema/Associate.yaml +41 -0
- followthemoney/schema/Audio.yaml +24 -0
- followthemoney/schema/BankAccount.yaml +53 -9
- followthemoney/schema/Call.yaml +48 -0
- followthemoney/schema/CallForTenders.yaml +117 -0
- followthemoney/schema/Company.yaml +37 -12
- followthemoney/schema/Contract.yaml +41 -7
- followthemoney/schema/ContractAward.yaml +30 -11
- followthemoney/schema/CourtCase.yaml +16 -10
- followthemoney/schema/CourtCaseParty.yaml +17 -6
- followthemoney/schema/CryptoWallet.yaml +48 -0
- followthemoney/schema/Debt.yaml +37 -0
- followthemoney/schema/Directorship.yaml +17 -4
- followthemoney/schema/Document.yaml +72 -139
- followthemoney/schema/Documentation.yml +38 -0
- followthemoney/schema/EconomicActivity.yaml +32 -17
- followthemoney/schema/Email.yaml +76 -0
- followthemoney/schema/Employment.yaml +39 -0
- followthemoney/schema/Event.yaml +35 -3
- followthemoney/schema/Family.yaml +41 -0
- followthemoney/schema/Folder.yaml +13 -0
- followthemoney/schema/HyperText.yaml +21 -0
- followthemoney/schema/Identification.yaml +40 -0
- followthemoney/schema/Image.yaml +25 -0
- followthemoney/schema/Interest.yaml +3 -6
- followthemoney/schema/Interval.yaml +56 -5
- followthemoney/schema/LegalEntity.yaml +81 -20
- followthemoney/schema/License.yaml +7 -3
- followthemoney/schema/Membership.yaml +19 -4
- followthemoney/schema/Mention.yaml +54 -0
- followthemoney/schema/Message.yaml +73 -0
- followthemoney/schema/Note.yaml +23 -0
- followthemoney/schema/Occupancy.yaml +40 -0
- followthemoney/schema/Organization.yaml +38 -3
- followthemoney/schema/Ownership.yaml +16 -4
- followthemoney/schema/Package.yaml +17 -0
- followthemoney/schema/Page.yaml +43 -0
- followthemoney/schema/Pages.yaml +23 -0
- followthemoney/schema/Passport.yaml +15 -17
- followthemoney/schema/Payment.yaml +38 -7
- followthemoney/schema/Person.yaml +61 -5
- followthemoney/schema/PlainText.yaml +17 -0
- followthemoney/schema/Position.yaml +50 -0
- followthemoney/schema/Post.yaml +42 -0
- followthemoney/schema/Project.yaml +27 -0
- followthemoney/schema/ProjectParticipant.yaml +36 -0
- followthemoney/schema/PublicBody.yaml +14 -3
- followthemoney/schema/RealEstate.yaml +19 -3
- followthemoney/schema/Representation.yaml +17 -6
- followthemoney/schema/Sanction.yaml +44 -20
- followthemoney/schema/Security.yaml +59 -0
- followthemoney/schema/Similar.yaml +37 -0
- followthemoney/schema/Succession.yaml +36 -0
- followthemoney/schema/Table.yaml +32 -0
- followthemoney/schema/TaxRoll.yaml +27 -9
- followthemoney/schema/Thing.yaml +69 -13
- followthemoney/schema/Trip.yaml +42 -0
- followthemoney/schema/UnknownLink.yaml +17 -6
- followthemoney/schema/UserAccount.yaml +44 -0
- followthemoney/schema/Value.yaml +5 -1
- followthemoney/schema/Vehicle.yaml +25 -8
- followthemoney/schema/Vessel.yaml +18 -10
- followthemoney/schema/Video.yaml +20 -0
- followthemoney/schema/Workbook.yaml +18 -0
- followthemoney/schema.py +406 -135
- followthemoney/translations/ar/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/ar/LC_MESSAGES/followthemoney.po +2900 -787
- followthemoney/translations/bs/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/bs/LC_MESSAGES/followthemoney.po +2108 -520
- followthemoney/translations/de/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/de/LC_MESSAGES/followthemoney.po +2902 -782
- followthemoney/translations/es/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/es/LC_MESSAGES/followthemoney.po +2893 -779
- followthemoney/translations/fr/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/fr/LC_MESSAGES/followthemoney.po +4362 -0
- followthemoney/translations/fr/followthemoney.po +3861 -0
- followthemoney/translations/messages.pot +3021 -725
- followthemoney/translations/nb/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/nb/LC_MESSAGES/followthemoney.po +3778 -0
- followthemoney/translations/nl/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/nl/LC_MESSAGES/followthemoney.po +3837 -0
- followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.po +3784 -0
- followthemoney/translations/ru/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/ru/LC_MESSAGES/followthemoney.po +2837 -539
- followthemoney/translations/ru/followthemoney.po +4221 -0
- followthemoney/translations/tr/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/tr/LC_MESSAGES/followthemoney.po +2073 -491
- followthemoney/types/__init__.py +35 -17
- followthemoney/types/address.py +41 -21
- followthemoney/types/checksum.py +25 -0
- followthemoney/types/common.py +233 -88
- followthemoney/types/country.py +89 -56
- followthemoney/types/date.py +59 -76
- followthemoney/types/email.py +66 -35
- followthemoney/types/entity.py +66 -13
- followthemoney/types/gender.py +66 -0
- followthemoney/types/iban.py +47 -28
- followthemoney/types/identifier.py +49 -22
- followthemoney/types/ip.py +35 -21
- followthemoney/types/json.py +58 -0
- followthemoney/types/language.py +124 -37
- followthemoney/types/mimetype.py +44 -0
- followthemoney/types/name.py +56 -12
- followthemoney/types/number.py +30 -0
- followthemoney/types/phone.py +92 -34
- followthemoney/types/registry.py +52 -0
- followthemoney/types/string.py +43 -0
- followthemoney/types/topic.py +94 -0
- followthemoney/types/url.py +39 -17
- followthemoney/util.py +139 -45
- followthemoney-3.8.0.dist-info/METADATA +153 -0
- followthemoney-3.8.0.dist-info/RECORD +157 -0
- {followthemoney-1.3.7.dist-info → followthemoney-3.8.0.dist-info}/WHEEL +1 -2
- followthemoney-3.8.0.dist-info/entry_points.txt +17 -0
- followthemoney-1.3.7.dist-info/LICENSE.txt → followthemoney-3.8.0.dist-info/licenses/LICENSE +1 -1
- followthemoney/link.py +0 -75
- followthemoney/schema/Associate.yml +0 -19
- followthemoney/schema/Family.yml +0 -19
- followthemoney/schema/Land.yml +0 -9
- followthemoney/schema/Relationship.yaml +0 -26
- followthemoney/types/domain.py +0 -50
- followthemoney-1.3.7.dist-info/DESCRIPTION.rst +0 -3
- followthemoney-1.3.7.dist-info/METADATA +0 -39
- followthemoney-1.3.7.dist-info/RECORD +0 -108
- followthemoney-1.3.7.dist-info/entry_points.txt +0 -3
- followthemoney-1.3.7.dist-info/metadata.json +0 -1
- followthemoney-1.3.7.dist-info/namespace_packages.txt +0 -1
- followthemoney-1.3.7.dist-info/top_level.txt +0 -3
- ns/ontology.py +0 -128
- tests/types/test_addresses.py +0 -24
- tests/types/test_common.py +0 -32
- tests/types/test_countries.py +0 -27
- tests/types/test_dates.py +0 -73
- tests/types/test_domains.py +0 -23
- tests/types/test_emails.py +0 -32
- tests/types/test_entity.py +0 -19
- tests/types/test_iban.py +0 -109
- tests/types/test_identifiers.py +0 -27
- tests/types/test_ip.py +0 -29
- tests/types/test_languages.py +0 -23
- tests/types/test_names.py +0 -33
- tests/types/test_phones.py +0 -24
- tests/types/test_registry.py +0 -14
- tests/types/test_urls.py +0 -23
- {ns → followthemoney/export}/__init__.py +0 -0
- /tests/types/__init__.py → /followthemoney/py.typed +0 -0
followthemoney/model.py
CHANGED
|
@@ -1,66 +1,109 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import yaml
|
|
3
|
+
from typing import Any, Dict, Generator, Iterator, Optional, Set, TypedDict, Union
|
|
3
4
|
|
|
4
|
-
from followthemoney.
|
|
5
|
+
from followthemoney.types import registry
|
|
6
|
+
from followthemoney.types.common import PropertyType, PropertyTypeToDict
|
|
7
|
+
from followthemoney.schema import Schema, SchemaToDict
|
|
8
|
+
from followthemoney.property import Property
|
|
5
9
|
from followthemoney.mapping import QueryMapping
|
|
6
10
|
from followthemoney.proxy import EntityProxy
|
|
7
11
|
from followthemoney.exc import InvalidModel, InvalidData
|
|
8
12
|
|
|
9
13
|
|
|
14
|
+
class ModelToDict(TypedDict):
|
|
15
|
+
schemata: Dict[str, SchemaToDict]
|
|
16
|
+
types: Dict[str, PropertyTypeToDict]
|
|
17
|
+
|
|
18
|
+
|
|
10
19
|
class Model(object):
|
|
11
|
-
"""A collection of schemata.
|
|
20
|
+
"""A collection of all the schemata available in followthemoney. The model
|
|
21
|
+
provides some helper functions to find schemata, properties or to instantiate
|
|
22
|
+
entity proxies based on the schema metadata."""
|
|
23
|
+
|
|
24
|
+
__slots__ = ("path", "schemata", "properties", "qnames")
|
|
12
25
|
|
|
13
|
-
def __init__(self, path):
|
|
26
|
+
def __init__(self, path: str) -> None:
|
|
14
27
|
self.path = path
|
|
15
|
-
|
|
28
|
+
|
|
29
|
+
#: A mapping with all schemata, organised by their name.
|
|
30
|
+
self.schemata: Dict[str, Schema] = {}
|
|
31
|
+
|
|
32
|
+
#: All properties defined in the model.
|
|
33
|
+
self.properties: Set[Property] = set()
|
|
34
|
+
self.qnames: Dict[str, Property] = {}
|
|
16
35
|
for (path, _, filenames) in os.walk(self.path):
|
|
17
36
|
for filename in filenames:
|
|
18
37
|
self._load(os.path.join(path, filename))
|
|
19
38
|
self.generate()
|
|
20
39
|
|
|
21
|
-
def generate(self):
|
|
22
|
-
|
|
40
|
+
def generate(self) -> None:
|
|
41
|
+
"""Loading the model is a weird process because the schemata reference
|
|
42
|
+
each other in complex ways, so the generation process cannot be fully
|
|
43
|
+
run as schemata are being instantiated. Hence this process needs to be
|
|
44
|
+
called once all schemata are loaded to finalise dereferencing the
|
|
45
|
+
schemata."""
|
|
23
46
|
for schema in self:
|
|
24
|
-
schema.generate()
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
47
|
+
schema.generate(self)
|
|
48
|
+
for prop in self.properties:
|
|
49
|
+
self.qnames[prop.qname] = prop
|
|
50
|
+
for schema in prop.schema.descendants:
|
|
51
|
+
if prop.name not in schema.properties:
|
|
52
|
+
schema.properties[prop.name] = prop
|
|
53
|
+
|
|
54
|
+
def _load(self, filepath: str) -> None:
|
|
55
|
+
with open(filepath, "r", encoding="utf-8") as fh:
|
|
56
|
+
data = yaml.safe_load(fh)
|
|
31
57
|
if not isinstance(data, dict):
|
|
32
|
-
raise InvalidModel(
|
|
58
|
+
raise InvalidModel("Model file is not a mapping: %s" % filepath)
|
|
33
59
|
for name, config in data.items():
|
|
34
60
|
self.schemata[name] = Schema(self, name, config)
|
|
35
61
|
|
|
36
|
-
def get(self, name):
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
62
|
+
def get(self, name: Union[str, Schema]) -> Optional[Schema]:
|
|
63
|
+
"""Get a schema object based on a schema name. If the input is already
|
|
64
|
+
a schema object, it will just be returned."""
|
|
65
|
+
if isinstance(name, str):
|
|
66
|
+
return self.schemata.get(name)
|
|
67
|
+
return name
|
|
40
68
|
|
|
41
|
-
def get_qname(self, qname):
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
return self._qnames.get(qname)
|
|
69
|
+
def get_qname(self, qname: str) -> Optional[Property]:
|
|
70
|
+
"""Get a property object based on a qualified name (i.e. schema:property)."""
|
|
71
|
+
return self.qnames.get(qname)
|
|
45
72
|
|
|
46
|
-
def __getitem__(self, name):
|
|
73
|
+
def __getitem__(self, name: str) -> Schema:
|
|
74
|
+
"""Same as get(), but throws an exception when the given name does not exist."""
|
|
47
75
|
schema = self.get(name)
|
|
48
76
|
if schema is None:
|
|
49
77
|
raise KeyError("No such schema: %s" % name)
|
|
50
78
|
return schema
|
|
51
79
|
|
|
52
|
-
def
|
|
80
|
+
def get_type_schemata(self, type_: PropertyType) -> Set[Schema]:
|
|
81
|
+
"""Return all the schemata which have a property of the given type."""
|
|
82
|
+
schemata = set()
|
|
83
|
+
for schema in self.schemata.values():
|
|
84
|
+
for prop in schema.properties.values():
|
|
85
|
+
if prop.type == type_:
|
|
86
|
+
schemata.add(schema)
|
|
87
|
+
return schemata
|
|
88
|
+
|
|
89
|
+
def make_mapping(
|
|
90
|
+
self, mapping: Dict[str, Any], key_prefix: Optional[str] = None
|
|
91
|
+
) -> QueryMapping:
|
|
53
92
|
"""Parse a mapping that applies (tabular) source data to the model."""
|
|
54
93
|
return QueryMapping(self, mapping, key_prefix=key_prefix)
|
|
55
94
|
|
|
56
|
-
def map_entities(
|
|
95
|
+
def map_entities(
|
|
96
|
+
self, mapping: Dict[str, Any], key_prefix: Optional[str] = None
|
|
97
|
+
) -> Generator[EntityProxy, None, None]:
|
|
57
98
|
"""Given a mapping, yield a series of entities from the data source."""
|
|
58
|
-
|
|
59
|
-
for record in
|
|
60
|
-
for entity in
|
|
99
|
+
gen = self.make_mapping(mapping, key_prefix=key_prefix)
|
|
100
|
+
for record in gen.source.records:
|
|
101
|
+
for entity in gen.map(record).values():
|
|
61
102
|
yield entity
|
|
62
103
|
|
|
63
|
-
def common_schema(
|
|
104
|
+
def common_schema(
|
|
105
|
+
self, left: Union[str, Schema], right: Union[str, Schema]
|
|
106
|
+
) -> Schema:
|
|
64
107
|
"""Select the most narrow of two schemata.
|
|
65
108
|
|
|
66
109
|
When indexing data from a dataset, an entity may be declared as a
|
|
@@ -68,33 +111,42 @@ class Model(object):
|
|
|
68
111
|
will select the most specific of two schemata offered. In the example,
|
|
69
112
|
that would be Person.
|
|
70
113
|
"""
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
if
|
|
76
|
-
return
|
|
77
|
-
if
|
|
78
|
-
return
|
|
79
|
-
|
|
80
|
-
#
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
if left == right:
|
|
84
|
-
return left
|
|
85
|
-
|
|
86
|
-
msg = "No common ancestor: %s and %s"
|
|
114
|
+
left_schema = self.get(left) or self.get(right)
|
|
115
|
+
right_schema = self.get(right) or self.get(left)
|
|
116
|
+
if left_schema is None or right_schema is None:
|
|
117
|
+
raise InvalidData("Invalid schema")
|
|
118
|
+
if left_schema.is_a(right_schema):
|
|
119
|
+
return left_schema
|
|
120
|
+
if right_schema.is_a(left_schema):
|
|
121
|
+
return right_schema
|
|
122
|
+
# for schema in self.schemata.values():
|
|
123
|
+
# if schema.is_a(left) and schema.is_a(right):
|
|
124
|
+
# return schema
|
|
125
|
+
msg = "No common schema: %s and %s"
|
|
87
126
|
raise InvalidData(msg % (left, right))
|
|
88
127
|
|
|
89
|
-
def make_entity(
|
|
90
|
-
schema =
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
128
|
+
def make_entity(
|
|
129
|
+
self, schema: Union[str, Schema], key_prefix: Optional[str] = None
|
|
130
|
+
) -> EntityProxy:
|
|
131
|
+
"""Instantiate an empty entity proxy of the given schema type."""
|
|
132
|
+
return EntityProxy(self, {"schema": schema}, key_prefix=key_prefix)
|
|
133
|
+
|
|
134
|
+
def get_proxy(self, data: Dict[str, Any], cleaned: bool = True) -> EntityProxy:
|
|
135
|
+
"""Create an entity proxy to reflect the entity data in the given
|
|
136
|
+
dictionary. If ``cleaned`` is disabled, all property values are
|
|
137
|
+
fully re-validated and normalised. Use this if handling input data
|
|
138
|
+
from an untrusted source."""
|
|
139
|
+
if isinstance(data, EntityProxy):
|
|
140
|
+
return data
|
|
141
|
+
return EntityProxy.from_dict(self, data, cleaned=cleaned)
|
|
142
|
+
|
|
143
|
+
def to_dict(self) -> ModelToDict:
|
|
144
|
+
"""Return metadata for all schemata and properties, in a serializable form."""
|
|
145
|
+
return {
|
|
146
|
+
"schemata": {s.name: s.to_dict() for s in self.schemata.values()},
|
|
147
|
+
"types": {t.name: t.to_dict() for t in registry.types},
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
def __iter__(self) -> Iterator[Schema]:
|
|
151
|
+
"""Iterate across all schemata."""
|
|
100
152
|
return iter(self.schemata.values())
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""
|
|
2
|
+
*We like our abstractions like our offshore banks: leaky.*
|
|
3
|
+
|
|
4
|
+
Entity ID namespaces are a security mechanism related to the Aleph search index.
|
|
5
|
+
|
|
6
|
+
Aleph allows the user (via mappings or the API) to create arbitrary entity IDs.
|
|
7
|
+
Entity IDs that are controlled by the user and not the system are unusual.
|
|
8
|
+
However, this makes it possible to generate bulk data outside Aleph,
|
|
9
|
+
and then load entities into the system as a continuous :ref:`streams`.
|
|
10
|
+
|
|
11
|
+
The problem is that having user controlled entity IDs increases the chance
|
|
12
|
+
of conflict in the search index.
|
|
13
|
+
|
|
14
|
+
Namespacing works around this by making each entity ID consist of two parts:
|
|
15
|
+
one controlled by the client, the other controlled by the system. The second
|
|
16
|
+
part of the ID is called its `signature`::
|
|
17
|
+
|
|
18
|
+
entity_id.a40a29300ac6bb79dd2f911e77bbda7a3b502126
|
|
19
|
+
|
|
20
|
+
The signature is generated as ``hmac(entity_id, dataset_id)``. This guarantees
|
|
21
|
+
that the combined ID is specific to a dataset, without needing an (expensive)
|
|
22
|
+
index look up of each ID first. It can also be generated on the client or
|
|
23
|
+
the server without compromising isolation.
|
|
24
|
+
"""
|
|
25
|
+
import hmac
|
|
26
|
+
from typing import Any, Optional, Tuple, Union
|
|
27
|
+
|
|
28
|
+
from followthemoney.types import registry
|
|
29
|
+
from followthemoney.proxy import E
|
|
30
|
+
from followthemoney.util import key_bytes, get_entity_id
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class Namespace(object):
|
|
34
|
+
"""Namespaces are used to partition entity IDs into different units,
|
|
35
|
+
which traditionally represent a dataset, collection or source.
|
|
36
|
+
|
|
37
|
+
See module docstring for details."""
|
|
38
|
+
|
|
39
|
+
SEP = "."
|
|
40
|
+
|
|
41
|
+
def __init__(self, name: Optional[str] = None) -> None:
|
|
42
|
+
self.bname = key_bytes(name) if name else b""
|
|
43
|
+
self.hmac = hmac.new(self.bname, digestmod="sha1")
|
|
44
|
+
|
|
45
|
+
@classmethod
|
|
46
|
+
def parse(cls, entity_id: str) -> Tuple[Optional[str], Optional[str]]:
|
|
47
|
+
"""Split up an entity ID into the plain ID and the namespace
|
|
48
|
+
signature. If either part is missing, return None instead."""
|
|
49
|
+
clean_id = registry.entity.clean(entity_id)
|
|
50
|
+
if clean_id is None:
|
|
51
|
+
return (None, None)
|
|
52
|
+
try:
|
|
53
|
+
plain_id, checksum = clean_id.rsplit(cls.SEP, 1)
|
|
54
|
+
return (plain_id, checksum)
|
|
55
|
+
except ValueError:
|
|
56
|
+
return (clean_id, None)
|
|
57
|
+
|
|
58
|
+
@classmethod
|
|
59
|
+
def strip(cls, entity_id: str) -> Optional[str]:
|
|
60
|
+
plain_id, _ = cls.parse(entity_id)
|
|
61
|
+
return plain_id
|
|
62
|
+
|
|
63
|
+
def signature(self, entity_id: str) -> Optional[str]:
|
|
64
|
+
"""Generate a namespace-specific signature."""
|
|
65
|
+
if not len(self.bname) or entity_id is None:
|
|
66
|
+
return None
|
|
67
|
+
digest = self.hmac.copy()
|
|
68
|
+
digest.update(key_bytes(entity_id))
|
|
69
|
+
return digest.hexdigest()
|
|
70
|
+
|
|
71
|
+
def sign(self, entity_id: str) -> Optional[str]:
|
|
72
|
+
"""Apply a namespace signature to an entity ID, removing any
|
|
73
|
+
previous namespace marker."""
|
|
74
|
+
parsed_id, _ = self.parse(entity_id)
|
|
75
|
+
if not len(self.bname):
|
|
76
|
+
return parsed_id
|
|
77
|
+
if parsed_id is None:
|
|
78
|
+
return None
|
|
79
|
+
digest = self.signature(parsed_id)
|
|
80
|
+
if digest is None:
|
|
81
|
+
return None
|
|
82
|
+
return self.SEP.join((parsed_id, digest))
|
|
83
|
+
|
|
84
|
+
def verify(self, entity_id: str) -> bool:
|
|
85
|
+
"""Check if the signature matches the current namespace."""
|
|
86
|
+
parsed_id, digest = self.parse(entity_id)
|
|
87
|
+
if digest is None or parsed_id is None:
|
|
88
|
+
return False
|
|
89
|
+
signature = self.signature(parsed_id)
|
|
90
|
+
if signature is None:
|
|
91
|
+
return False
|
|
92
|
+
return hmac.compare_digest(digest, signature)
|
|
93
|
+
|
|
94
|
+
def apply(self, proxy: E, shallow: bool = False) -> E:
|
|
95
|
+
"""Rewrite an entity proxy so all IDs mentioned are limited to
|
|
96
|
+
the namespace."""
|
|
97
|
+
signed = proxy.clone()
|
|
98
|
+
signed.id = self.sign(proxy.id)
|
|
99
|
+
if not shallow:
|
|
100
|
+
for prop in proxy.iterprops():
|
|
101
|
+
if prop.type != registry.entity:
|
|
102
|
+
continue
|
|
103
|
+
for value in signed.pop(prop):
|
|
104
|
+
entity_id = get_entity_id(value)
|
|
105
|
+
if entity_id is not None:
|
|
106
|
+
signed.add(prop, self.sign(entity_id))
|
|
107
|
+
return signed
|
|
108
|
+
|
|
109
|
+
@classmethod
|
|
110
|
+
def make(cls, name: Union[str, "Namespace"]) -> "Namespace":
|
|
111
|
+
if isinstance(name, str):
|
|
112
|
+
return cls(name)
|
|
113
|
+
return name
|
|
114
|
+
|
|
115
|
+
def __eq__(self, other: Any) -> bool:
|
|
116
|
+
return bool(self.bname == other.bname)
|
|
117
|
+
|
|
118
|
+
def __repr__(self) -> str:
|
|
119
|
+
return "<Namespace(%r)>" % self.bname
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
from followthemoney.proxy import E
|
|
2
|
+
|
|
3
|
+
# Derived from: https://fsi.taxjustice.net/en/introduction/fsi-results
|
|
4
|
+
OFFSHORE_COUNTRIES = set(
|
|
5
|
+
(
|
|
6
|
+
"ky",
|
|
7
|
+
"ch",
|
|
8
|
+
"sg",
|
|
9
|
+
"lu",
|
|
10
|
+
"vg",
|
|
11
|
+
"gg",
|
|
12
|
+
"pa",
|
|
13
|
+
"je",
|
|
14
|
+
"mt",
|
|
15
|
+
"bs",
|
|
16
|
+
"cy",
|
|
17
|
+
"gi",
|
|
18
|
+
"mo",
|
|
19
|
+
"bm",
|
|
20
|
+
"im",
|
|
21
|
+
"mh",
|
|
22
|
+
"mu",
|
|
23
|
+
"li",
|
|
24
|
+
"ai",
|
|
25
|
+
"kn",
|
|
26
|
+
"tc",
|
|
27
|
+
"vu",
|
|
28
|
+
"mc",
|
|
29
|
+
"sc",
|
|
30
|
+
"ag",
|
|
31
|
+
"dm",
|
|
32
|
+
"ms",
|
|
33
|
+
"lc",
|
|
34
|
+
"ck",
|
|
35
|
+
)
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def offshore_from_jurisdiction(proxy: E) -> E:
|
|
40
|
+
"""Tag organizations linked to a well-known offshore jurisdiction as
|
|
41
|
+
offshores automatically. Complete generalization, use only in experiments."""
|
|
42
|
+
if not proxy.schema.is_a("Organization"):
|
|
43
|
+
return proxy
|
|
44
|
+
countries = set(proxy.get("country", quiet=True))
|
|
45
|
+
countries.update(proxy.get("jurisdiction", quiet=True))
|
|
46
|
+
if len(countries.intersection(OFFSHORE_COUNTRIES)) > 0:
|
|
47
|
+
proxy.add("topics", "corp.offshore")
|
|
48
|
+
return proxy
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from rdflib import Graph, URIRef, Literal
|
|
4
|
+
from rdflib.namespace import OWL, DCTERMS, RDF, RDFS, XSD
|
|
5
|
+
|
|
6
|
+
from followthemoney import model
|
|
7
|
+
from followthemoney.property import Property
|
|
8
|
+
from followthemoney.schema import Schema
|
|
9
|
+
from followthemoney.types import registry
|
|
10
|
+
from followthemoney.rdf import NS
|
|
11
|
+
from followthemoney.util import PathLike
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Ontology(object):
|
|
15
|
+
def __init__(self) -> None:
|
|
16
|
+
self.uri = URIRef(NS)
|
|
17
|
+
self.graph = Graph(identifier=self.uri)
|
|
18
|
+
self.graph.namespace_manager.bind("ftm", NS)
|
|
19
|
+
self.graph.namespace_manager.bind("owl", OWL)
|
|
20
|
+
self.graph.namespace_manager.bind("dct", DCTERMS)
|
|
21
|
+
|
|
22
|
+
self.graph.add((self.uri, RDF.type, OWL.Ontology))
|
|
23
|
+
self.graph.add((self.uri, RDFS.label, Literal("Follow The Money")))
|
|
24
|
+
modified = datetime.now().strftime("%Y-%m-%dT%H:%I:%M")
|
|
25
|
+
modified = Literal(modified, datatype=XSD.dateTime)
|
|
26
|
+
self.graph.add((self.uri, DCTERMS.modified, modified))
|
|
27
|
+
|
|
28
|
+
self.add_schemata()
|
|
29
|
+
|
|
30
|
+
def add_schemata(self) -> None:
|
|
31
|
+
for schema in sorted(model):
|
|
32
|
+
self.add_class(schema)
|
|
33
|
+
|
|
34
|
+
def add_class(self, schema: Schema) -> None:
|
|
35
|
+
self.graph.add((schema.uri, RDF.type, RDFS.Class))
|
|
36
|
+
self.graph.add((schema.uri, RDFS.isDefinedBy, self.uri))
|
|
37
|
+
for parent in schema.extends:
|
|
38
|
+
self.graph.add((schema.uri, RDFS.subClassOf, parent.uri))
|
|
39
|
+
|
|
40
|
+
self.graph.add((schema.uri, RDFS.label, Literal(schema.label)))
|
|
41
|
+
if schema.description is not None:
|
|
42
|
+
description = Literal(schema.description)
|
|
43
|
+
self.graph.add((schema.uri, RDFS.comment, description))
|
|
44
|
+
|
|
45
|
+
for _, prop in sorted(schema.properties.items()):
|
|
46
|
+
self.add_property(prop)
|
|
47
|
+
|
|
48
|
+
def add_property(self, prop: Property) -> None:
|
|
49
|
+
self.graph.add((prop.uri, RDF.type, RDF.Property))
|
|
50
|
+
self.graph.add((prop.uri, RDFS.isDefinedBy, self.uri))
|
|
51
|
+
|
|
52
|
+
self.graph.add((prop.uri, RDFS.label, Literal(prop.label)))
|
|
53
|
+
if prop.description is not None:
|
|
54
|
+
self.graph.add((prop.uri, RDFS.comment, Literal(prop.description)))
|
|
55
|
+
|
|
56
|
+
self.graph.add((prop.uri, RDFS.domain, prop.schema.uri))
|
|
57
|
+
if prop.range is not None:
|
|
58
|
+
range = model.get(prop.range)
|
|
59
|
+
if range is not None:
|
|
60
|
+
range_uri = range.uri
|
|
61
|
+
self.graph.add((prop.uri, RDFS.range, range_uri))
|
|
62
|
+
if prop.reverse is not None:
|
|
63
|
+
self.graph.add((prop.uri, OWL.inverseOf, prop.reverse.uri))
|
|
64
|
+
if prop.type == registry.date:
|
|
65
|
+
self.graph.add((prop.uri, RDFS.range, XSD.dateTime))
|
|
66
|
+
|
|
67
|
+
def write_namespace_docs(self, path: PathLike) -> None:
|
|
68
|
+
xml_fn = "%s/ftm.xml" % path
|
|
69
|
+
with open(xml_fn, "w") as xml_file:
|
|
70
|
+
xml_file.write(self.graph.serialize(format="xml"))
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
if __name__ == "__main__":
|
|
74
|
+
path = sys.argv[1]
|
|
75
|
+
ontology = Ontology()
|
|
76
|
+
ontology.write_namespace_docs(path)
|
|
77
|
+
print("Namespace docs written to %s" % path)
|