followthemoney 3.8.5__py3-none-any.whl → 4.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- followthemoney/__init__.py +30 -10
- followthemoney/cli/cli.py +1 -1
- followthemoney/cli/exports.py +6 -2
- followthemoney/cli/statement.py +62 -0
- followthemoney/cli/util.py +2 -3
- followthemoney/compare.py +26 -16
- followthemoney/dataset/__init__.py +17 -0
- followthemoney/dataset/catalog.py +77 -0
- followthemoney/dataset/coverage.py +29 -0
- followthemoney/dataset/dataset.py +137 -0
- followthemoney/dataset/publisher.py +25 -0
- followthemoney/dataset/resource.py +30 -0
- followthemoney/dataset/util.py +58 -0
- followthemoney/entity.py +73 -0
- followthemoney/exc.py +6 -0
- followthemoney/export/rdf.py +57 -5
- followthemoney/graph.py +1 -2
- followthemoney/model.py +36 -9
- followthemoney/ontology.py +18 -16
- followthemoney/property.py +12 -15
- followthemoney/proxy.py +43 -64
- followthemoney/schema/Analyzable.yaml +2 -3
- followthemoney/schema/BankAccount.yaml +2 -3
- followthemoney/schema/Company.yaml +0 -6
- followthemoney/schema/Contract.yaml +0 -1
- followthemoney/schema/CryptoWallet.yaml +1 -1
- followthemoney/schema/Document.yaml +0 -6
- followthemoney/schema/Interval.yaml +7 -0
- followthemoney/schema/LegalEntity.yaml +6 -0
- followthemoney/schema/License.yaml +2 -0
- followthemoney/schema/Page.yaml +0 -1
- followthemoney/schema/Person.yaml +0 -5
- followthemoney/schema/Sanction.yaml +1 -0
- followthemoney/schema/Thing.yaml +0 -2
- followthemoney/schema/UserAccount.yaml +6 -3
- followthemoney/schema.py +27 -39
- followthemoney/statement/__init__.py +19 -0
- followthemoney/statement/entity.py +437 -0
- followthemoney/statement/serialize.py +245 -0
- followthemoney/statement/statement.py +256 -0
- followthemoney/statement/util.py +31 -0
- followthemoney/types/__init__.py +66 -23
- followthemoney/types/address.py +3 -3
- followthemoney/types/checksum.py +3 -7
- followthemoney/types/common.py +9 -14
- followthemoney/types/country.py +3 -7
- followthemoney/types/date.py +21 -11
- followthemoney/types/email.py +0 -4
- followthemoney/types/entity.py +5 -11
- followthemoney/types/gender.py +6 -10
- followthemoney/types/identifier.py +9 -3
- followthemoney/types/ip.py +5 -9
- followthemoney/types/json.py +2 -2
- followthemoney/types/language.py +3 -7
- followthemoney/types/mimetype.py +4 -8
- followthemoney/types/name.py +7 -8
- followthemoney/types/number.py +88 -6
- followthemoney/types/phone.py +4 -11
- followthemoney/types/string.py +4 -4
- followthemoney/types/topic.py +3 -7
- followthemoney/types/url.py +5 -10
- followthemoney/util.py +12 -13
- followthemoney/value.py +67 -0
- {followthemoney-3.8.5.dist-info → followthemoney-4.0.0.dist-info}/METADATA +23 -8
- {followthemoney-3.8.5.dist-info → followthemoney-4.0.0.dist-info}/RECORD +68 -59
- {followthemoney-3.8.5.dist-info → followthemoney-4.0.0.dist-info}/entry_points.txt +1 -0
- followthemoney/offshore.py +0 -48
- followthemoney/rdf.py +0 -9
- followthemoney/schema/Assessment.yaml +0 -32
- followthemoney/schema/Post.yaml +0 -42
- followthemoney/types/iban.py +0 -58
- followthemoney/types/registry.py +0 -52
- {followthemoney-3.8.5.dist-info → followthemoney-4.0.0.dist-info}/WHEEL +0 -0
- {followthemoney-3.8.5.dist-info → followthemoney-4.0.0.dist-info}/licenses/LICENSE +0 -0
followthemoney/entity.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
from typing import Any, Dict, List, Optional, Set, TypeVar
|
|
2
|
+
|
|
3
|
+
from rigour.names import pick_name
|
|
4
|
+
|
|
5
|
+
from followthemoney.proxy import EntityProxy
|
|
6
|
+
from followthemoney.schema import Schema
|
|
7
|
+
from followthemoney.statement.util import BASE_ID
|
|
8
|
+
|
|
9
|
+
VE = TypeVar("VE", bound="ValueEntity")
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def _defined(*args: Optional[str]) -> List[str]:
|
|
13
|
+
return [arg for arg in args if arg is not None]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ValueEntity(EntityProxy):
|
|
17
|
+
"""
|
|
18
|
+
This class has the extended attributes from `StatementEntity` but without
|
|
19
|
+
statements. Useful for streaming around. Starting from followthemoeny 4.0,
|
|
20
|
+
applications should use this entity class as the base class.
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(
|
|
24
|
+
self,
|
|
25
|
+
schema: Schema,
|
|
26
|
+
data: Dict[str, Any],
|
|
27
|
+
key_prefix: Optional[str] = None,
|
|
28
|
+
cleaned: bool = True,
|
|
29
|
+
):
|
|
30
|
+
super().__init__(schema, data, key_prefix=key_prefix, cleaned=cleaned)
|
|
31
|
+
self._caption: Optional[str] = data.get("caption")
|
|
32
|
+
self.datasets: Set[str] = set(data.get("datasets", []))
|
|
33
|
+
self.referents: Set[str] = set(data.get("referents", []))
|
|
34
|
+
self.first_seen: Optional[str] = data.get("first_seen")
|
|
35
|
+
self.last_seen: Optional[str] = data.get("last_seen")
|
|
36
|
+
self.last_change: Optional[str] = data.get("last_change")
|
|
37
|
+
|
|
38
|
+
# add data from statement dict if present.
|
|
39
|
+
# this updates the dataset and referents set
|
|
40
|
+
for stmt_data in data.pop("statements", []):
|
|
41
|
+
self.datasets.add(stmt_data["dataset"])
|
|
42
|
+
if stmt_data["entity_id"] != self.id:
|
|
43
|
+
self.referents.add(stmt_data["entity_id"])
|
|
44
|
+
if stmt_data["prop"] != BASE_ID:
|
|
45
|
+
self.add(stmt_data["prop"], stmt_data["value"])
|
|
46
|
+
|
|
47
|
+
def merge(self: "ValueEntity", other: "ValueEntity") -> "ValueEntity":
|
|
48
|
+
merged = super().merge(other)
|
|
49
|
+
merged._caption = pick_name(_defined(self._caption, other._caption))
|
|
50
|
+
merged.referents.update(other.referents)
|
|
51
|
+
merged.datasets.update(other.datasets)
|
|
52
|
+
self.first_seen = min(_defined(self.first_seen, other.first_seen), default=None)
|
|
53
|
+
self.last_seen = max(_defined(self.last_seen, other.last_seen), default=None)
|
|
54
|
+
changed = _defined(self.last_change, other.last_change)
|
|
55
|
+
self.last_change = max(changed, default=None)
|
|
56
|
+
return merged
|
|
57
|
+
|
|
58
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
59
|
+
data: Dict[str, Any] = {
|
|
60
|
+
"id": self.id,
|
|
61
|
+
"caption": self._caption or self.caption,
|
|
62
|
+
"schema": self.schema.name,
|
|
63
|
+
"properties": self.properties,
|
|
64
|
+
"referents": list(self.referents),
|
|
65
|
+
"datasets": list(self.datasets),
|
|
66
|
+
}
|
|
67
|
+
if self.first_seen is not None:
|
|
68
|
+
data["first_seen"] = self.first_seen
|
|
69
|
+
if self.last_seen is not None:
|
|
70
|
+
data["last_seen"] = self.last_seen
|
|
71
|
+
if self.last_change is not None:
|
|
72
|
+
data["last_change"] = self.last_change
|
|
73
|
+
return data
|
followthemoney/exc.py
CHANGED
|
@@ -11,6 +11,12 @@ class FollowTheMoneyException(Exception):
|
|
|
11
11
|
pass
|
|
12
12
|
|
|
13
13
|
|
|
14
|
+
class MetadataException(FollowTheMoneyException):
|
|
15
|
+
"""An exception raised by dataset metadata validation."""
|
|
16
|
+
|
|
17
|
+
pass
|
|
18
|
+
|
|
19
|
+
|
|
14
20
|
class InvalidData(FollowTheMoneyException):
|
|
15
21
|
"""Schema validation errors will be caught by the API."""
|
|
16
22
|
|
followthemoney/export/rdf.py
CHANGED
|
@@ -1,23 +1,75 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from
|
|
3
|
-
from
|
|
2
|
+
from prefixdate import Precision
|
|
3
|
+
from rdflib import Graph, Namespace
|
|
4
|
+
from rdflib.term import Identifier, URIRef, Literal
|
|
5
|
+
from rdflib import RDF, SKOS, XSD
|
|
6
|
+
from typing import Generator, List, Optional, TextIO, Tuple
|
|
4
7
|
|
|
5
8
|
from followthemoney.export.common import Exporter
|
|
6
|
-
from followthemoney.
|
|
9
|
+
from followthemoney.types import registry
|
|
10
|
+
from followthemoney.proxy import EntityProxy
|
|
7
11
|
|
|
8
12
|
log = logging.getLogger(__name__)
|
|
13
|
+
Triple = Tuple[Identifier, Identifier, Identifier]
|
|
14
|
+
NS = Namespace("https://schema.followthemoney.tech/#")
|
|
9
15
|
|
|
10
16
|
|
|
11
17
|
class RDFExporter(Exporter):
|
|
18
|
+
"""Export the entity as RDF N-Triples."""
|
|
19
|
+
|
|
20
|
+
TYPE_PREFIXES = {
|
|
21
|
+
registry.checksum: "hash:",
|
|
22
|
+
registry.country: "http://id.loc.gov/vocabulary/countries/",
|
|
23
|
+
registry.email: "mailto:",
|
|
24
|
+
registry.entity: "e:",
|
|
25
|
+
registry.gender: "gender:",
|
|
26
|
+
registry.ip: "ip:",
|
|
27
|
+
registry.identifier: "id:",
|
|
28
|
+
registry.language: "http://lexvo.org/id/iso639-3/",
|
|
29
|
+
registry.mimetype: "urn:mimetype:",
|
|
30
|
+
registry.phone: "tel:",
|
|
31
|
+
registry.topic: "ftm:topic:",
|
|
32
|
+
}
|
|
33
|
+
|
|
12
34
|
def __init__(self, fh: TextIO, qualified: bool = True) -> None:
|
|
13
35
|
super(RDFExporter, self).__init__()
|
|
14
36
|
self.fh = fh
|
|
15
37
|
self.qualified = qualified
|
|
16
38
|
|
|
17
|
-
def
|
|
39
|
+
def entity_triples(self, proxy: EntityProxy) -> Generator[Triple, None, None]:
|
|
40
|
+
if proxy.id is None or proxy.schema is None:
|
|
41
|
+
return
|
|
42
|
+
entity_prefix = self.TYPE_PREFIXES[registry.entity]
|
|
43
|
+
uri = URIRef(f"{entity_prefix}{proxy.id}")
|
|
44
|
+
yield (uri, RDF.type, NS[proxy.schema.name])
|
|
45
|
+
if self.qualified:
|
|
46
|
+
caption = proxy.caption
|
|
47
|
+
if caption != proxy.schema.label:
|
|
48
|
+
yield (uri, SKOS.prefLabel, Literal(caption))
|
|
49
|
+
for prop, value in proxy.itervalues():
|
|
50
|
+
if prop.type in self.TYPE_PREFIXES:
|
|
51
|
+
prefix = self.TYPE_PREFIXES[prop.type]
|
|
52
|
+
if prop.type == registry.identifier and prop.format is not None:
|
|
53
|
+
prefix = f"{prefix}{prop.format}:"
|
|
54
|
+
obj: Identifier = URIRef(f"{prefix}{value}")
|
|
55
|
+
elif prop.type == registry.date:
|
|
56
|
+
if len(value) < Precision.HOUR.value:
|
|
57
|
+
obj = Literal(value, datatype=XSD.date)
|
|
58
|
+
else:
|
|
59
|
+
obj = Literal(value, datatype=XSD.dateTime)
|
|
60
|
+
elif prop.type == registry.url:
|
|
61
|
+
obj = URIRef(value)
|
|
62
|
+
else:
|
|
63
|
+
obj = Literal(value)
|
|
64
|
+
if self.qualified:
|
|
65
|
+
yield (uri, NS[prop.qname], obj)
|
|
66
|
+
else:
|
|
67
|
+
yield (uri, URIRef(prop.name), obj)
|
|
68
|
+
|
|
69
|
+
def write(self, proxy: EntityProxy, extra: Optional[List[str]] = None) -> None:
|
|
18
70
|
graph = Graph()
|
|
19
71
|
|
|
20
|
-
for triple in
|
|
72
|
+
for triple in self.entity_triples(proxy):
|
|
21
73
|
graph.add(triple)
|
|
22
74
|
try:
|
|
23
75
|
nt = graph.serialize(format="nt11").strip()
|
followthemoney/graph.py
CHANGED
|
@@ -196,8 +196,7 @@ class Graph(object):
|
|
|
196
196
|
"""
|
|
197
197
|
|
|
198
198
|
def __init__(self, edge_types: Iterable[PropertyType] = registry.pivots) -> None:
|
|
199
|
-
|
|
200
|
-
self.edge_types = [t for t in types if t.matchable]
|
|
199
|
+
self.edge_types = [t for t in edge_types if t.matchable]
|
|
201
200
|
self.flush()
|
|
202
201
|
|
|
203
202
|
def flush(self) -> None:
|
followthemoney/model.py
CHANGED
|
@@ -1,16 +1,19 @@
|
|
|
1
1
|
import os
|
|
2
2
|
import yaml
|
|
3
3
|
from functools import lru_cache
|
|
4
|
-
from typing import
|
|
4
|
+
from typing import TYPE_CHECKING, Any
|
|
5
|
+
from typing import Dict, Generator, Iterator, Optional, Set, TypedDict, Union
|
|
5
6
|
|
|
6
7
|
from followthemoney.types import registry
|
|
7
8
|
from followthemoney.types.common import PropertyType, PropertyTypeToDict
|
|
8
9
|
from followthemoney.schema import Schema, SchemaToDict
|
|
9
10
|
from followthemoney.property import Property
|
|
10
|
-
from followthemoney.mapping import QueryMapping
|
|
11
|
-
from followthemoney.proxy import EntityProxy
|
|
12
11
|
from followthemoney.exc import InvalidModel, InvalidData
|
|
13
12
|
|
|
13
|
+
if TYPE_CHECKING:
|
|
14
|
+
from followthemoney.proxy import EntityProxy
|
|
15
|
+
from followthemoney.mapping import QueryMapping
|
|
16
|
+
|
|
14
17
|
|
|
15
18
|
class ModelToDict(TypedDict):
|
|
16
19
|
schemata: Dict[str, SchemaToDict]
|
|
@@ -22,6 +25,8 @@ class Model(object):
|
|
|
22
25
|
provides some helper functions to find schemata, properties or to instantiate
|
|
23
26
|
entity proxies based on the schema metadata."""
|
|
24
27
|
|
|
28
|
+
_instance: Optional["Model"] = None
|
|
29
|
+
|
|
25
30
|
__slots__ = ("path", "schemata", "properties", "qnames")
|
|
26
31
|
|
|
27
32
|
def __init__(self, path: str) -> None:
|
|
@@ -38,6 +43,15 @@ class Model(object):
|
|
|
38
43
|
self._load(os.path.join(path, filename))
|
|
39
44
|
self.generate()
|
|
40
45
|
|
|
46
|
+
@classmethod
|
|
47
|
+
def instance(cls) -> "Model":
|
|
48
|
+
if cls._instance is None:
|
|
49
|
+
model_path = os.path.dirname(__file__)
|
|
50
|
+
model_path = os.path.join(model_path, "schema")
|
|
51
|
+
model_path = os.environ.get("FTM_MODEL_PATH", model_path)
|
|
52
|
+
cls._instance = cls(model_path)
|
|
53
|
+
return cls._instance
|
|
54
|
+
|
|
41
55
|
def generate(self) -> None:
|
|
42
56
|
"""Loading the model is a weird process because the schemata reference
|
|
43
57
|
each other in complex ways, so the generation process cannot be fully
|
|
@@ -89,13 +103,15 @@ class Model(object):
|
|
|
89
103
|
|
|
90
104
|
def make_mapping(
|
|
91
105
|
self, mapping: Dict[str, Any], key_prefix: Optional[str] = None
|
|
92
|
-
) -> QueryMapping:
|
|
106
|
+
) -> "QueryMapping":
|
|
93
107
|
"""Parse a mapping that applies (tabular) source data to the model."""
|
|
108
|
+
from followthemoney.mapping import QueryMapping
|
|
109
|
+
|
|
94
110
|
return QueryMapping(self, mapping, key_prefix=key_prefix)
|
|
95
111
|
|
|
96
112
|
def map_entities(
|
|
97
113
|
self, mapping: Dict[str, Any], key_prefix: Optional[str] = None
|
|
98
|
-
) -> Generator[EntityProxy, None, None]:
|
|
114
|
+
) -> Generator["EntityProxy", None, None]:
|
|
99
115
|
"""Given a mapping, yield a series of entities from the data source."""
|
|
100
116
|
gen = self.make_mapping(mapping, key_prefix=key_prefix)
|
|
101
117
|
for record in gen.source.records:
|
|
@@ -127,20 +143,31 @@ class Model(object):
|
|
|
127
143
|
msg = "No common schema: %s and %s"
|
|
128
144
|
raise InvalidData(msg % (left, right))
|
|
129
145
|
|
|
146
|
+
def matchable_schemata(self) -> Set[Schema]:
|
|
147
|
+
"""Return a list of all schemata that are matchable."""
|
|
148
|
+
return set([s for s in self.schemata.values() if s.matchable])
|
|
149
|
+
|
|
130
150
|
def make_entity(
|
|
131
151
|
self, schema: Union[str, Schema], key_prefix: Optional[str] = None
|
|
132
|
-
) -> EntityProxy:
|
|
152
|
+
) -> "EntityProxy":
|
|
133
153
|
"""Instantiate an empty entity proxy of the given schema type."""
|
|
134
|
-
|
|
154
|
+
from followthemoney.proxy import EntityProxy
|
|
155
|
+
|
|
156
|
+
schema_ = self.get(schema)
|
|
157
|
+
if schema_ is None:
|
|
158
|
+
raise InvalidData("Schema does not exist: %s" % schema)
|
|
159
|
+
return EntityProxy(schema_, {}, key_prefix=key_prefix)
|
|
135
160
|
|
|
136
|
-
def get_proxy(self, data: Dict[str, Any], cleaned: bool = True) -> EntityProxy:
|
|
161
|
+
def get_proxy(self, data: Dict[str, Any], cleaned: bool = True) -> "EntityProxy":
|
|
137
162
|
"""Create an entity proxy to reflect the entity data in the given
|
|
138
163
|
dictionary. If ``cleaned`` is disabled, all property values are
|
|
139
164
|
fully re-validated and normalised. Use this if handling input data
|
|
140
165
|
from an untrusted source."""
|
|
166
|
+
from followthemoney.proxy import EntityProxy
|
|
167
|
+
|
|
141
168
|
if isinstance(data, EntityProxy):
|
|
142
169
|
return data
|
|
143
|
-
return EntityProxy.from_dict(
|
|
170
|
+
return EntityProxy.from_dict(data, cleaned=cleaned)
|
|
144
171
|
|
|
145
172
|
def to_dict(self) -> ModelToDict:
|
|
146
173
|
"""Return metadata for all schemata and properties, in a serializable form."""
|
followthemoney/ontology.py
CHANGED
|
@@ -1,15 +1,16 @@
|
|
|
1
1
|
import sys
|
|
2
2
|
from datetime import datetime
|
|
3
|
-
from rdflib import Graph, URIRef, Literal
|
|
3
|
+
from rdflib import Graph, URIRef, Literal, Namespace
|
|
4
4
|
from rdflib.namespace import OWL, DCTERMS, RDF, RDFS, XSD
|
|
5
5
|
|
|
6
6
|
from followthemoney import model
|
|
7
7
|
from followthemoney.property import Property
|
|
8
8
|
from followthemoney.schema import Schema
|
|
9
9
|
from followthemoney.types import registry
|
|
10
|
-
from followthemoney.rdf import NS
|
|
11
10
|
from followthemoney.util import PathLike
|
|
12
11
|
|
|
12
|
+
NS = Namespace("https://schema.followthemoney.tech/#")
|
|
13
|
+
|
|
13
14
|
|
|
14
15
|
class Ontology(object):
|
|
15
16
|
def __init__(self) -> None:
|
|
@@ -32,37 +33,38 @@ class Ontology(object):
|
|
|
32
33
|
self.add_class(schema)
|
|
33
34
|
|
|
34
35
|
def add_class(self, schema: Schema) -> None:
|
|
35
|
-
|
|
36
|
-
self.graph.add((
|
|
36
|
+
suri = NS[schema.name]
|
|
37
|
+
self.graph.add((suri, RDF.type, RDFS.Class))
|
|
38
|
+
self.graph.add((suri, RDFS.isDefinedBy, self.uri))
|
|
37
39
|
for parent in schema.extends:
|
|
38
|
-
self.graph.add((
|
|
40
|
+
self.graph.add((suri, RDFS.subClassOf, NS[parent.name]))
|
|
39
41
|
|
|
40
|
-
self.graph.add((
|
|
42
|
+
self.graph.add((suri, RDFS.label, Literal(schema.label)))
|
|
41
43
|
if schema.description is not None:
|
|
42
44
|
description = Literal(schema.description)
|
|
43
|
-
self.graph.add((
|
|
45
|
+
self.graph.add((suri, RDFS.comment, description))
|
|
44
46
|
|
|
45
47
|
for _, prop in sorted(schema.properties.items()):
|
|
46
48
|
self.add_property(prop)
|
|
47
49
|
|
|
48
50
|
def add_property(self, prop: Property) -> None:
|
|
49
|
-
|
|
50
|
-
self.graph.add((
|
|
51
|
+
puri = NS[prop.qname]
|
|
52
|
+
self.graph.add((puri, RDF.type, RDF.Property))
|
|
53
|
+
self.graph.add((puri, RDFS.isDefinedBy, self.uri))
|
|
51
54
|
|
|
52
|
-
self.graph.add((
|
|
55
|
+
self.graph.add((puri, RDFS.label, Literal(prop.label)))
|
|
53
56
|
if prop.description is not None:
|
|
54
|
-
self.graph.add((
|
|
57
|
+
self.graph.add((puri, RDFS.comment, Literal(prop.description)))
|
|
55
58
|
|
|
56
|
-
self.graph.add((
|
|
59
|
+
self.graph.add((puri, RDFS.domain, NS[prop.schema.name]))
|
|
57
60
|
if prop.range is not None:
|
|
58
61
|
range = model.get(prop.range)
|
|
59
62
|
if range is not None:
|
|
60
|
-
|
|
61
|
-
self.graph.add((prop.uri, RDFS.range, range_uri))
|
|
63
|
+
self.graph.add((puri, RDFS.range, NS[range.name]))
|
|
62
64
|
if prop.reverse is not None:
|
|
63
|
-
self.graph.add((
|
|
65
|
+
self.graph.add((puri, OWL.inverseOf, NS[prop.reverse.qname]))
|
|
64
66
|
if prop.type == registry.date:
|
|
65
|
-
self.graph.add((
|
|
67
|
+
self.graph.add((puri, RDFS.range, XSD.dateTime))
|
|
66
68
|
|
|
67
69
|
def write_namespace_docs(self, path: PathLike) -> None:
|
|
68
70
|
xml_fn = "%s/ftm.xml" % path
|
followthemoney/property.py
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
from banal import is_mapping, as_bool
|
|
2
|
-
from typing import TYPE_CHECKING,
|
|
2
|
+
from typing import TYPE_CHECKING, Any, List, Optional, TypedDict
|
|
3
3
|
|
|
4
4
|
from followthemoney.exc import InvalidModel
|
|
5
5
|
from followthemoney.types import registry
|
|
6
|
-
from followthemoney.
|
|
7
|
-
from followthemoney.util import gettext, get_entity_id
|
|
6
|
+
from followthemoney.util import gettext, get_entity_id, const
|
|
8
7
|
|
|
9
8
|
if TYPE_CHECKING:
|
|
10
9
|
from followthemoney.schema import Schema
|
|
@@ -26,7 +25,6 @@ class PropertyDict(TypedDict, total=False):
|
|
|
26
25
|
deprecated: Optional[bool]
|
|
27
26
|
maxLength: Optional[int]
|
|
28
27
|
# stub: Optional[bool]
|
|
29
|
-
rdf: Optional[str]
|
|
30
28
|
range: Optional[str]
|
|
31
29
|
format: Optional[str]
|
|
32
30
|
|
|
@@ -66,7 +64,6 @@ class Property:
|
|
|
66
64
|
"stub",
|
|
67
65
|
"_reverse",
|
|
68
66
|
"reverse",
|
|
69
|
-
"uri",
|
|
70
67
|
)
|
|
71
68
|
|
|
72
69
|
#: Invalid property names.
|
|
@@ -79,10 +76,10 @@ class Property:
|
|
|
79
76
|
self.schema = schema
|
|
80
77
|
|
|
81
78
|
#: Machine-readable name for this property.
|
|
82
|
-
self.name = name
|
|
79
|
+
self.name = const(name)
|
|
83
80
|
|
|
84
81
|
#: Qualified property name, which also includes the schema name.
|
|
85
|
-
self.qname = "%s:%s" % (schema.name, self.name)
|
|
82
|
+
self.qname = const("%s:%s" % (schema.name, self.name))
|
|
86
83
|
if self.name in self.RESERVED:
|
|
87
84
|
raise InvalidModel("Reserved name: %s" % self.name)
|
|
88
85
|
|
|
@@ -97,12 +94,11 @@ class Property:
|
|
|
97
94
|
#: This property should not be shown or mentioned in the user interface.
|
|
98
95
|
self.hidden = as_bool(data.get("hidden"))
|
|
99
96
|
|
|
100
|
-
type_ = data.get("type"
|
|
101
|
-
if type_ is None or type_ not in registry.named:
|
|
102
|
-
raise InvalidModel("Invalid type: %s" % type_)
|
|
103
|
-
|
|
97
|
+
type_ = data.get("type") or "string"
|
|
104
98
|
#: The data type for this property.
|
|
105
|
-
self.type = registry
|
|
99
|
+
self.type = registry.get(type_)
|
|
100
|
+
if self.type is None:
|
|
101
|
+
raise InvalidModel("Invalid type: %s" % type_)
|
|
106
102
|
|
|
107
103
|
#: Whether this property should be used for matching and cross-referencing.
|
|
108
104
|
_matchable = data.get("matchable")
|
|
@@ -137,9 +133,6 @@ class Property:
|
|
|
137
133
|
self._reverse = data.get("reverse")
|
|
138
134
|
self.reverse: Optional["Property"] = None
|
|
139
135
|
|
|
140
|
-
#: RDF term for this property (i.e. the predicate URI).
|
|
141
|
-
self.uri = URIRef(cast(str, data.get("rdf", NS[self.qname])))
|
|
142
|
-
|
|
143
136
|
def generate(self, model: "Model") -> None:
|
|
144
137
|
"""Setup method used when loading the model in order to build out the reverse
|
|
145
138
|
links of the property."""
|
|
@@ -170,6 +163,10 @@ class Property:
|
|
|
170
163
|
return 0.0
|
|
171
164
|
return self.type.specificity(value)
|
|
172
165
|
|
|
166
|
+
def caption(self, value: str) -> str:
|
|
167
|
+
"""Return a user-friendly caption for the given value."""
|
|
168
|
+
return self.type.caption(value, format=self.format)
|
|
169
|
+
|
|
173
170
|
def validate(self, data: List[Any]) -> Optional[str]:
|
|
174
171
|
"""Validate that the data should be stored.
|
|
175
172
|
|
followthemoney/proxy.py
CHANGED
|
@@ -1,36 +1,25 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import
|
|
3
|
-
|
|
4
|
-
Any,
|
|
5
|
-
Dict,
|
|
6
|
-
Generator,
|
|
7
|
-
List,
|
|
8
|
-
Optional,
|
|
9
|
-
Set,
|
|
10
|
-
Tuple,
|
|
11
|
-
Union,
|
|
12
|
-
Type,
|
|
13
|
-
TypeVar,
|
|
14
|
-
cast,
|
|
15
|
-
)
|
|
16
|
-
import warnings
|
|
2
|
+
from typing import TYPE_CHECKING, cast, Any
|
|
3
|
+
from typing import Dict, Generator, List, Optional, Set, Tuple, Union, Type, TypeVar
|
|
17
4
|
from itertools import product
|
|
18
5
|
from banal import ensure_dict
|
|
6
|
+
from rigour.names import pick_name
|
|
19
7
|
|
|
20
8
|
from followthemoney.exc import InvalidData
|
|
21
9
|
from followthemoney.types import registry
|
|
22
10
|
from followthemoney.types.common import PropertyType
|
|
23
11
|
from followthemoney.property import Property
|
|
24
|
-
from followthemoney.
|
|
12
|
+
from followthemoney.value import string_list, Values
|
|
25
13
|
from followthemoney.util import sanitize_text, gettext
|
|
26
|
-
from followthemoney.util import merge_context,
|
|
14
|
+
from followthemoney.util import merge_context, make_entity_id
|
|
15
|
+
from followthemoney.model import Model
|
|
16
|
+
from followthemoney.schema import Schema
|
|
27
17
|
|
|
28
18
|
if TYPE_CHECKING:
|
|
29
19
|
from followthemoney.model import Model
|
|
30
20
|
|
|
31
21
|
log = logging.getLogger(__name__)
|
|
32
22
|
P = Union[Property, str]
|
|
33
|
-
Triple = Tuple[Identifier, Identifier, Identifier]
|
|
34
23
|
E = TypeVar("E", bound="EntityProxy")
|
|
35
24
|
|
|
36
25
|
|
|
@@ -45,7 +34,7 @@ class EntityProxy(object):
|
|
|
45
34
|
|
|
46
35
|
def __init__(
|
|
47
36
|
self,
|
|
48
|
-
|
|
37
|
+
schema: Schema,
|
|
49
38
|
data: Dict[str, Any],
|
|
50
39
|
key_prefix: Optional[str] = None,
|
|
51
40
|
cleaned: bool = True,
|
|
@@ -57,9 +46,6 @@ class EntityProxy(object):
|
|
|
57
46
|
|
|
58
47
|
#: The schema definition for this entity, which implies the properties
|
|
59
48
|
#: That can be set on it.
|
|
60
|
-
schema = model.get(data.pop("schema", None))
|
|
61
|
-
if schema is None:
|
|
62
|
-
raise InvalidData(gettext("No schema for entity."))
|
|
63
49
|
self.schema = schema
|
|
64
50
|
|
|
65
51
|
#: When using :meth:`~make_id` to generate a natural key for this entity,
|
|
@@ -162,7 +148,7 @@ class EntityProxy(object):
|
|
|
162
148
|
def add(
|
|
163
149
|
self,
|
|
164
150
|
prop: P,
|
|
165
|
-
values:
|
|
151
|
+
values: Values,
|
|
166
152
|
cleaned: bool = False,
|
|
167
153
|
quiet: bool = False,
|
|
168
154
|
fuzzy: bool = False,
|
|
@@ -192,11 +178,9 @@ class EntityProxy(object):
|
|
|
192
178
|
msg = gettext("Stub property (%s): %s")
|
|
193
179
|
raise InvalidData(msg % (self.schema, prop))
|
|
194
180
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
value = prop.type.clean(value, proxy=self, fuzzy=fuzzy, format=format)
|
|
199
|
-
self.unsafe_add(prop, value, cleaned=True)
|
|
181
|
+
value: Optional[str] = None
|
|
182
|
+
for value in string_list(values, sanitize=not cleaned):
|
|
183
|
+
self.unsafe_add(prop, value, cleaned=cleaned, fuzzy=fuzzy, format=format)
|
|
200
184
|
return None
|
|
201
185
|
|
|
202
186
|
def unsafe_add(
|
|
@@ -236,7 +220,7 @@ class EntityProxy(object):
|
|
|
236
220
|
def set(
|
|
237
221
|
self,
|
|
238
222
|
prop: P,
|
|
239
|
-
values:
|
|
223
|
+
values: Values,
|
|
240
224
|
cleaned: bool = False,
|
|
241
225
|
quiet: bool = False,
|
|
242
226
|
fuzzy: bool = False,
|
|
@@ -377,34 +361,21 @@ class EntityProxy(object):
|
|
|
377
361
|
data[group] = values
|
|
378
362
|
return data
|
|
379
363
|
|
|
380
|
-
def triples(self, qualified: bool = True) -> Generator[Triple, None, None]:
|
|
381
|
-
"""Serialise the entity into a set of RDF triple statements. The
|
|
382
|
-
statements include the property values, an ``RDF#type`` definition
|
|
383
|
-
that refers to the entity schema, and a ``SKOS#prefLabel`` with the
|
|
384
|
-
entity caption."""
|
|
385
|
-
if self.id is None or self.schema is None:
|
|
386
|
-
return
|
|
387
|
-
uri = registry.entity.rdf(self.id)
|
|
388
|
-
yield (uri, RDF.type, self.schema.uri)
|
|
389
|
-
if qualified:
|
|
390
|
-
caption = self.caption
|
|
391
|
-
if caption != self.schema.label:
|
|
392
|
-
yield (uri, SKOS.prefLabel, Literal(caption))
|
|
393
|
-
for prop, value in self.itervalues():
|
|
394
|
-
value = prop.type.rdf(value)
|
|
395
|
-
if qualified:
|
|
396
|
-
yield (uri, prop.uri, value)
|
|
397
|
-
else:
|
|
398
|
-
yield (uri, URIRef(prop.name), value)
|
|
399
|
-
|
|
400
364
|
@property
|
|
401
365
|
def caption(self) -> str:
|
|
402
366
|
"""The user-facing label to be used for this entity. This checks a list
|
|
403
367
|
of properties defined by the schema (caption) and returns the first
|
|
404
368
|
available value. If no caption is available, return the schema label."""
|
|
405
|
-
for
|
|
406
|
-
|
|
407
|
-
|
|
369
|
+
for prop_ in self.schema.caption:
|
|
370
|
+
prop = self.schema.properties[prop_]
|
|
371
|
+
values = self.get(prop)
|
|
372
|
+
if prop.type == registry.name and len(values) > 1:
|
|
373
|
+
name = pick_name(sorted(values))
|
|
374
|
+
if name is not None:
|
|
375
|
+
return name
|
|
376
|
+
else:
|
|
377
|
+
for value in values:
|
|
378
|
+
return value
|
|
408
379
|
return self.schema.label
|
|
409
380
|
|
|
410
381
|
@property
|
|
@@ -448,7 +419,7 @@ class EntityProxy(object):
|
|
|
448
419
|
|
|
449
420
|
def clone(self: E) -> E:
|
|
450
421
|
"""Make a deep copy of the current entity proxy."""
|
|
451
|
-
return self.__class__.from_dict(self.
|
|
422
|
+
return self.__class__.from_dict(self.to_dict())
|
|
452
423
|
|
|
453
424
|
def merge(self: E, other: E) -> E:
|
|
454
425
|
"""Merge another entity proxy into this one. This will try and find
|
|
@@ -467,30 +438,36 @@ class EntityProxy(object):
|
|
|
467
438
|
self.add(prop, values, cleaned=True, quiet=True)
|
|
468
439
|
return self
|
|
469
440
|
|
|
441
|
+
def __getstate__(self) -> Dict[str, Any]:
|
|
442
|
+
data = {slot: getattr(self, slot) for slot in self.__slots__}
|
|
443
|
+
data["schema"] = self.schema.name
|
|
444
|
+
return data
|
|
445
|
+
|
|
446
|
+
def __setstate__(self, data: Dict[str, Any]) -> None:
|
|
447
|
+
for slot in self.__slots__:
|
|
448
|
+
value = data.get(slot)
|
|
449
|
+
if slot == "schema":
|
|
450
|
+
value = Model.instance()[data["schema"]]
|
|
451
|
+
setattr(self, slot, value)
|
|
452
|
+
|
|
470
453
|
def __str__(self) -> str:
|
|
471
454
|
return self.caption
|
|
472
455
|
|
|
473
456
|
def __repr__(self) -> str:
|
|
474
|
-
return "<E(%r,%r)>" % (self.id, str(self))
|
|
457
|
+
return "<E(%r,%s,%r)>" % (self.id, self.schema.name, str(self))
|
|
475
458
|
|
|
476
459
|
def __len__(self) -> int:
|
|
477
460
|
return self._size
|
|
478
461
|
|
|
479
462
|
def __hash__(self) -> int:
|
|
480
463
|
if not self.id:
|
|
481
|
-
|
|
482
|
-
"Hashing an EntityProxy without an ID results in undefined behaviour",
|
|
483
|
-
RuntimeWarning,
|
|
484
|
-
)
|
|
464
|
+
raise RuntimeError("Cannot hash entity without an ID")
|
|
485
465
|
return hash(self.id)
|
|
486
466
|
|
|
487
467
|
def __eq__(self, other: Any) -> bool:
|
|
488
468
|
try:
|
|
489
469
|
if self.id is None or other.id is None:
|
|
490
|
-
|
|
491
|
-
"Comparing EntityProxys without IDs results in undefined behaviour",
|
|
492
|
-
RuntimeWarning,
|
|
493
|
-
)
|
|
470
|
+
raise RuntimeError("Cannot compare entities without IDs.")
|
|
494
471
|
return bool(self.id == other.id)
|
|
495
472
|
except AttributeError:
|
|
496
473
|
return False
|
|
@@ -498,11 +475,13 @@ class EntityProxy(object):
|
|
|
498
475
|
@classmethod
|
|
499
476
|
def from_dict(
|
|
500
477
|
cls: Type[E],
|
|
501
|
-
model: "Model",
|
|
502
478
|
data: Dict[str, Any],
|
|
503
479
|
cleaned: bool = True,
|
|
504
480
|
) -> E:
|
|
505
481
|
"""Instantiate a proxy based on the given model and serialised dictionary.
|
|
506
482
|
|
|
507
483
|
Use :meth:`followthemoney.model.Model.get_proxy` instead."""
|
|
508
|
-
|
|
484
|
+
schema = Model.instance().get(data.get("schema", ""))
|
|
485
|
+
if schema is None:
|
|
486
|
+
raise InvalidData(gettext("No schema for entity."))
|
|
487
|
+
return cls(schema, data, cleaned=cleaned)
|