followthemoney 3.8.5__py3-none-any.whl → 4.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- followthemoney/__init__.py +30 -10
- followthemoney/cli/cli.py +1 -1
- followthemoney/cli/exports.py +6 -2
- followthemoney/cli/statement.py +62 -0
- followthemoney/cli/util.py +2 -3
- followthemoney/compare.py +26 -16
- followthemoney/dataset/__init__.py +17 -0
- followthemoney/dataset/catalog.py +77 -0
- followthemoney/dataset/coverage.py +29 -0
- followthemoney/dataset/dataset.py +146 -0
- followthemoney/dataset/publisher.py +25 -0
- followthemoney/dataset/resource.py +30 -0
- followthemoney/dataset/util.py +55 -0
- followthemoney/entity.py +73 -0
- followthemoney/exc.py +6 -0
- followthemoney/export/rdf.py +57 -5
- followthemoney/graph.py +1 -2
- followthemoney/model.py +38 -11
- followthemoney/names.py +33 -0
- followthemoney/ontology.py +18 -16
- followthemoney/property.py +12 -15
- followthemoney/proxy.py +43 -64
- followthemoney/schema/Analyzable.yaml +2 -3
- followthemoney/schema/BankAccount.yaml +2 -3
- followthemoney/schema/Company.yaml +0 -6
- followthemoney/schema/Contract.yaml +0 -1
- followthemoney/schema/CryptoWallet.yaml +1 -1
- followthemoney/schema/Document.yaml +0 -6
- followthemoney/schema/Interval.yaml +7 -0
- followthemoney/schema/LegalEntity.yaml +6 -0
- followthemoney/schema/License.yaml +2 -0
- followthemoney/schema/Page.yaml +0 -1
- followthemoney/schema/Person.yaml +0 -5
- followthemoney/schema/Sanction.yaml +1 -0
- followthemoney/schema/Thing.yaml +0 -2
- followthemoney/schema/UserAccount.yaml +6 -3
- followthemoney/schema.py +30 -42
- followthemoney/statement/__init__.py +19 -0
- followthemoney/statement/entity.py +438 -0
- followthemoney/statement/serialize.py +251 -0
- followthemoney/statement/statement.py +256 -0
- followthemoney/statement/util.py +31 -0
- followthemoney/types/__init__.py +66 -23
- followthemoney/types/address.py +3 -3
- followthemoney/types/checksum.py +3 -7
- followthemoney/types/common.py +9 -14
- followthemoney/types/country.py +3 -7
- followthemoney/types/date.py +21 -11
- followthemoney/types/email.py +0 -4
- followthemoney/types/entity.py +5 -11
- followthemoney/types/gender.py +6 -10
- followthemoney/types/identifier.py +9 -3
- followthemoney/types/ip.py +5 -9
- followthemoney/types/json.py +2 -2
- followthemoney/types/language.py +3 -7
- followthemoney/types/mimetype.py +4 -8
- followthemoney/types/name.py +7 -8
- followthemoney/types/number.py +88 -6
- followthemoney/types/phone.py +4 -11
- followthemoney/types/string.py +4 -4
- followthemoney/types/topic.py +3 -7
- followthemoney/types/url.py +5 -10
- followthemoney/util.py +12 -13
- followthemoney/value.py +67 -0
- {followthemoney-3.8.5.dist-info → followthemoney-4.0.1.dist-info}/METADATA +23 -8
- {followthemoney-3.8.5.dist-info → followthemoney-4.0.1.dist-info}/RECORD +69 -59
- {followthemoney-3.8.5.dist-info → followthemoney-4.0.1.dist-info}/entry_points.txt +1 -0
- followthemoney/offshore.py +0 -48
- followthemoney/rdf.py +0 -9
- followthemoney/schema/Assessment.yaml +0 -32
- followthemoney/schema/Post.yaml +0 -42
- followthemoney/types/iban.py +0 -58
- followthemoney/types/registry.py +0 -52
- {followthemoney-3.8.5.dist-info → followthemoney-4.0.1.dist-info}/WHEEL +0 -0
- {followthemoney-3.8.5.dist-info → followthemoney-4.0.1.dist-info}/licenses/LICENSE +0 -0
followthemoney/proxy.py
CHANGED
|
@@ -1,36 +1,25 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import
|
|
3
|
-
|
|
4
|
-
Any,
|
|
5
|
-
Dict,
|
|
6
|
-
Generator,
|
|
7
|
-
List,
|
|
8
|
-
Optional,
|
|
9
|
-
Set,
|
|
10
|
-
Tuple,
|
|
11
|
-
Union,
|
|
12
|
-
Type,
|
|
13
|
-
TypeVar,
|
|
14
|
-
cast,
|
|
15
|
-
)
|
|
16
|
-
import warnings
|
|
2
|
+
from typing import TYPE_CHECKING, cast, Any
|
|
3
|
+
from typing import Dict, Generator, List, Optional, Set, Tuple, Union, Type, TypeVar
|
|
17
4
|
from itertools import product
|
|
18
5
|
from banal import ensure_dict
|
|
6
|
+
from rigour.names import pick_name
|
|
19
7
|
|
|
20
8
|
from followthemoney.exc import InvalidData
|
|
21
9
|
from followthemoney.types import registry
|
|
22
10
|
from followthemoney.types.common import PropertyType
|
|
23
11
|
from followthemoney.property import Property
|
|
24
|
-
from followthemoney.
|
|
12
|
+
from followthemoney.value import string_list, Values
|
|
25
13
|
from followthemoney.util import sanitize_text, gettext
|
|
26
|
-
from followthemoney.util import merge_context,
|
|
14
|
+
from followthemoney.util import merge_context, make_entity_id
|
|
15
|
+
from followthemoney.model import Model
|
|
16
|
+
from followthemoney.schema import Schema
|
|
27
17
|
|
|
28
18
|
if TYPE_CHECKING:
|
|
29
19
|
from followthemoney.model import Model
|
|
30
20
|
|
|
31
21
|
log = logging.getLogger(__name__)
|
|
32
22
|
P = Union[Property, str]
|
|
33
|
-
Triple = Tuple[Identifier, Identifier, Identifier]
|
|
34
23
|
E = TypeVar("E", bound="EntityProxy")
|
|
35
24
|
|
|
36
25
|
|
|
@@ -45,7 +34,7 @@ class EntityProxy(object):
|
|
|
45
34
|
|
|
46
35
|
def __init__(
|
|
47
36
|
self,
|
|
48
|
-
|
|
37
|
+
schema: Schema,
|
|
49
38
|
data: Dict[str, Any],
|
|
50
39
|
key_prefix: Optional[str] = None,
|
|
51
40
|
cleaned: bool = True,
|
|
@@ -57,9 +46,6 @@ class EntityProxy(object):
|
|
|
57
46
|
|
|
58
47
|
#: The schema definition for this entity, which implies the properties
|
|
59
48
|
#: That can be set on it.
|
|
60
|
-
schema = model.get(data.pop("schema", None))
|
|
61
|
-
if schema is None:
|
|
62
|
-
raise InvalidData(gettext("No schema for entity."))
|
|
63
49
|
self.schema = schema
|
|
64
50
|
|
|
65
51
|
#: When using :meth:`~make_id` to generate a natural key for this entity,
|
|
@@ -162,7 +148,7 @@ class EntityProxy(object):
|
|
|
162
148
|
def add(
|
|
163
149
|
self,
|
|
164
150
|
prop: P,
|
|
165
|
-
values:
|
|
151
|
+
values: Values,
|
|
166
152
|
cleaned: bool = False,
|
|
167
153
|
quiet: bool = False,
|
|
168
154
|
fuzzy: bool = False,
|
|
@@ -192,11 +178,9 @@ class EntityProxy(object):
|
|
|
192
178
|
msg = gettext("Stub property (%s): %s")
|
|
193
179
|
raise InvalidData(msg % (self.schema, prop))
|
|
194
180
|
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
value = prop.type.clean(value, proxy=self, fuzzy=fuzzy, format=format)
|
|
199
|
-
self.unsafe_add(prop, value, cleaned=True)
|
|
181
|
+
value: Optional[str] = None
|
|
182
|
+
for value in string_list(values, sanitize=not cleaned):
|
|
183
|
+
self.unsafe_add(prop, value, cleaned=cleaned, fuzzy=fuzzy, format=format)
|
|
200
184
|
return None
|
|
201
185
|
|
|
202
186
|
def unsafe_add(
|
|
@@ -236,7 +220,7 @@ class EntityProxy(object):
|
|
|
236
220
|
def set(
|
|
237
221
|
self,
|
|
238
222
|
prop: P,
|
|
239
|
-
values:
|
|
223
|
+
values: Values,
|
|
240
224
|
cleaned: bool = False,
|
|
241
225
|
quiet: bool = False,
|
|
242
226
|
fuzzy: bool = False,
|
|
@@ -377,34 +361,21 @@ class EntityProxy(object):
|
|
|
377
361
|
data[group] = values
|
|
378
362
|
return data
|
|
379
363
|
|
|
380
|
-
def triples(self, qualified: bool = True) -> Generator[Triple, None, None]:
|
|
381
|
-
"""Serialise the entity into a set of RDF triple statements. The
|
|
382
|
-
statements include the property values, an ``RDF#type`` definition
|
|
383
|
-
that refers to the entity schema, and a ``SKOS#prefLabel`` with the
|
|
384
|
-
entity caption."""
|
|
385
|
-
if self.id is None or self.schema is None:
|
|
386
|
-
return
|
|
387
|
-
uri = registry.entity.rdf(self.id)
|
|
388
|
-
yield (uri, RDF.type, self.schema.uri)
|
|
389
|
-
if qualified:
|
|
390
|
-
caption = self.caption
|
|
391
|
-
if caption != self.schema.label:
|
|
392
|
-
yield (uri, SKOS.prefLabel, Literal(caption))
|
|
393
|
-
for prop, value in self.itervalues():
|
|
394
|
-
value = prop.type.rdf(value)
|
|
395
|
-
if qualified:
|
|
396
|
-
yield (uri, prop.uri, value)
|
|
397
|
-
else:
|
|
398
|
-
yield (uri, URIRef(prop.name), value)
|
|
399
|
-
|
|
400
364
|
@property
|
|
401
365
|
def caption(self) -> str:
|
|
402
366
|
"""The user-facing label to be used for this entity. This checks a list
|
|
403
367
|
of properties defined by the schema (caption) and returns the first
|
|
404
368
|
available value. If no caption is available, return the schema label."""
|
|
405
|
-
for
|
|
406
|
-
|
|
407
|
-
|
|
369
|
+
for prop_ in self.schema.caption:
|
|
370
|
+
prop = self.schema.properties[prop_]
|
|
371
|
+
values = self.get(prop)
|
|
372
|
+
if prop.type == registry.name and len(values) > 1:
|
|
373
|
+
name = pick_name(sorted(values))
|
|
374
|
+
if name is not None:
|
|
375
|
+
return name
|
|
376
|
+
else:
|
|
377
|
+
for value in values:
|
|
378
|
+
return value
|
|
408
379
|
return self.schema.label
|
|
409
380
|
|
|
410
381
|
@property
|
|
@@ -448,7 +419,7 @@ class EntityProxy(object):
|
|
|
448
419
|
|
|
449
420
|
def clone(self: E) -> E:
|
|
450
421
|
"""Make a deep copy of the current entity proxy."""
|
|
451
|
-
return self.__class__.from_dict(self.
|
|
422
|
+
return self.__class__.from_dict(self.to_dict())
|
|
452
423
|
|
|
453
424
|
def merge(self: E, other: E) -> E:
|
|
454
425
|
"""Merge another entity proxy into this one. This will try and find
|
|
@@ -467,30 +438,36 @@ class EntityProxy(object):
|
|
|
467
438
|
self.add(prop, values, cleaned=True, quiet=True)
|
|
468
439
|
return self
|
|
469
440
|
|
|
441
|
+
def __getstate__(self) -> Dict[str, Any]:
|
|
442
|
+
data = {slot: getattr(self, slot) for slot in self.__slots__}
|
|
443
|
+
data["schema"] = self.schema.name
|
|
444
|
+
return data
|
|
445
|
+
|
|
446
|
+
def __setstate__(self, data: Dict[str, Any]) -> None:
|
|
447
|
+
for slot in self.__slots__:
|
|
448
|
+
value = data.get(slot)
|
|
449
|
+
if slot == "schema":
|
|
450
|
+
value = Model.instance()[data["schema"]]
|
|
451
|
+
setattr(self, slot, value)
|
|
452
|
+
|
|
470
453
|
def __str__(self) -> str:
|
|
471
454
|
return self.caption
|
|
472
455
|
|
|
473
456
|
def __repr__(self) -> str:
|
|
474
|
-
return "<E(%r,%r)>" % (self.id, str(self))
|
|
457
|
+
return "<E(%r,%s,%r)>" % (self.id, self.schema.name, str(self))
|
|
475
458
|
|
|
476
459
|
def __len__(self) -> int:
|
|
477
460
|
return self._size
|
|
478
461
|
|
|
479
462
|
def __hash__(self) -> int:
|
|
480
463
|
if not self.id:
|
|
481
|
-
|
|
482
|
-
"Hashing an EntityProxy without an ID results in undefined behaviour",
|
|
483
|
-
RuntimeWarning,
|
|
484
|
-
)
|
|
464
|
+
raise RuntimeError("Cannot hash entity without an ID")
|
|
485
465
|
return hash(self.id)
|
|
486
466
|
|
|
487
467
|
def __eq__(self, other: Any) -> bool:
|
|
488
468
|
try:
|
|
489
469
|
if self.id is None or other.id is None:
|
|
490
|
-
|
|
491
|
-
"Comparing EntityProxys without IDs results in undefined behaviour",
|
|
492
|
-
RuntimeWarning,
|
|
493
|
-
)
|
|
470
|
+
raise RuntimeError("Cannot compare entities without IDs.")
|
|
494
471
|
return bool(self.id == other.id)
|
|
495
472
|
except AttributeError:
|
|
496
473
|
return False
|
|
@@ -498,11 +475,13 @@ class EntityProxy(object):
|
|
|
498
475
|
@classmethod
|
|
499
476
|
def from_dict(
|
|
500
477
|
cls: Type[E],
|
|
501
|
-
model: "Model",
|
|
502
478
|
data: Dict[str, Any],
|
|
503
479
|
cleaned: bool = True,
|
|
504
480
|
) -> E:
|
|
505
481
|
"""Instantiate a proxy based on the given model and serialised dictionary.
|
|
506
482
|
|
|
507
483
|
Use :meth:`followthemoney.model.Model.get_proxy` instead."""
|
|
508
|
-
|
|
484
|
+
schema = Model.instance().get(data.get("schema", ""))
|
|
485
|
+
if schema is None:
|
|
486
|
+
raise InvalidData(gettext("No schema for entity."))
|
|
487
|
+
return cls(schema, data, cleaned=cleaned)
|
|
@@ -56,12 +56,6 @@ Company:
|
|
|
56
56
|
mbsCode:
|
|
57
57
|
label: "MBS"
|
|
58
58
|
type: identifier
|
|
59
|
-
ibcRuc:
|
|
60
|
-
# TODO: Remove this. It's a column name in the ICIJ-released OffshoreLeaks datasets
|
|
61
|
-
# but seems to just mean "company number".
|
|
62
|
-
deprecated: true
|
|
63
|
-
label: "ibcRUC"
|
|
64
|
-
type: identifier
|
|
65
59
|
caemCode:
|
|
66
60
|
label: "COD CAEM"
|
|
67
61
|
description: "(RO) What kind of activity a legal entity is allowed to develop"
|
|
@@ -30,12 +30,10 @@ Document:
|
|
|
30
30
|
title:
|
|
31
31
|
label: "Title"
|
|
32
32
|
type: string
|
|
33
|
-
rdf: http://purl.org/dc/elements/1.1/title
|
|
34
33
|
caption: true
|
|
35
34
|
author:
|
|
36
35
|
label: "Author"
|
|
37
36
|
description: "The original author, not the uploader"
|
|
38
|
-
rdf: http://purl.org/dc/elements/1.1/creator
|
|
39
37
|
generator:
|
|
40
38
|
label: "Generator"
|
|
41
39
|
description: "The program used to generate this file"
|
|
@@ -61,11 +59,9 @@ Document:
|
|
|
61
59
|
mimeType:
|
|
62
60
|
label: "MIME type"
|
|
63
61
|
type: mimetype
|
|
64
|
-
rdf: http://purl.org/dc/terms/format
|
|
65
62
|
language:
|
|
66
63
|
label: "Language"
|
|
67
64
|
type: language
|
|
68
|
-
rdf: http://purl.org/dc/terms/language
|
|
69
65
|
translatedLanguage:
|
|
70
66
|
label: "The language of the translated text"
|
|
71
67
|
hidden: true
|
|
@@ -78,7 +74,6 @@ Document:
|
|
|
78
74
|
label: "Date"
|
|
79
75
|
description: "If not otherwise specified"
|
|
80
76
|
type: date
|
|
81
|
-
rdf: http://purl.org/dc/elements/1.1/date
|
|
82
77
|
authoredAt:
|
|
83
78
|
label: "Authored on"
|
|
84
79
|
type: date
|
|
@@ -95,7 +90,6 @@ Document:
|
|
|
95
90
|
name: children
|
|
96
91
|
label: "Child documents"
|
|
97
92
|
hidden: true
|
|
98
|
-
rdf: http://purl.org/dc/terms/isPartOf
|
|
99
93
|
ancestors:
|
|
100
94
|
label: "Ancestors"
|
|
101
95
|
type: entity
|
|
@@ -42,6 +42,13 @@ Interval:
|
|
|
42
42
|
label: "Source link"
|
|
43
43
|
type: url
|
|
44
44
|
matchable: false
|
|
45
|
+
proof:
|
|
46
|
+
label: Source document
|
|
47
|
+
reverse:
|
|
48
|
+
name: provenIntervals
|
|
49
|
+
label: "Derived relationships"
|
|
50
|
+
type: entity
|
|
51
|
+
range: Document
|
|
45
52
|
publisher:
|
|
46
53
|
label: "Publishing source"
|
|
47
54
|
publisherUrl:
|
|
@@ -102,6 +102,12 @@ LegalEntity:
|
|
|
102
102
|
bvdId:
|
|
103
103
|
label: Bureau van Dijk ID
|
|
104
104
|
type: identifier
|
|
105
|
+
uscCode:
|
|
106
|
+
# cf. https://en.wikipedia.org/wiki/Unified_Social_Credit_Identifier
|
|
107
|
+
label: "USCC"
|
|
108
|
+
description: "Unified Social Credit Identifier"
|
|
109
|
+
type: identifier
|
|
110
|
+
format: uscc
|
|
105
111
|
icijId:
|
|
106
112
|
label: ICIJ ID
|
|
107
113
|
description: "ID according to International Consortium for Investigative Journalists"
|
followthemoney/schema/Page.yaml
CHANGED
|
@@ -6,7 +6,6 @@ Person:
|
|
|
6
6
|
description: >
|
|
7
7
|
A natural person, as opposed to a corporation of some type.
|
|
8
8
|
matchable: true
|
|
9
|
-
rdf: http://xmlns.com/foaf/0.1/Person
|
|
10
9
|
featured:
|
|
11
10
|
- name
|
|
12
11
|
- nationality
|
|
@@ -26,14 +25,12 @@ Person:
|
|
|
26
25
|
properties:
|
|
27
26
|
title:
|
|
28
27
|
label: Title
|
|
29
|
-
rdf: http://xmlns.com/foaf/0.1/title
|
|
30
28
|
# The `firstName`, `lastName`, `secondName` etc. properties intentionally do not use
|
|
31
29
|
# the `name` property type. Many FtM tools (including Aleph) use name properties to
|
|
32
30
|
# compare/match entities, but matching entites just on e.g. a first name would lead to
|
|
33
31
|
# too many false positives.
|
|
34
32
|
firstName:
|
|
35
33
|
label: First name
|
|
36
|
-
rdf: http://xmlns.com/foaf/0.1/givenName
|
|
37
34
|
secondName:
|
|
38
35
|
label: Second name
|
|
39
36
|
middleName:
|
|
@@ -44,13 +41,11 @@ Person:
|
|
|
44
41
|
label: Matronymic
|
|
45
42
|
lastName:
|
|
46
43
|
label: Last name
|
|
47
|
-
rdf: http://xmlns.com/foaf/0.1/lastName
|
|
48
44
|
nameSuffix:
|
|
49
45
|
label: Name suffix
|
|
50
46
|
birthDate:
|
|
51
47
|
label: Birth date
|
|
52
48
|
type: date
|
|
53
|
-
rdf: http://xmlns.com/foaf/0.1/birthday
|
|
54
49
|
birthPlace:
|
|
55
50
|
label: Place of birth
|
|
56
51
|
birthCountry:
|
followthemoney/schema/Thing.yaml
CHANGED
|
@@ -14,7 +14,6 @@ Thing:
|
|
|
14
14
|
name:
|
|
15
15
|
label: Name
|
|
16
16
|
type: name
|
|
17
|
-
rdf: http://www.w3.org/2004/02/skos/core#prefLabel
|
|
18
17
|
summary: # a short one-liner kind of description
|
|
19
18
|
label: Summary
|
|
20
19
|
type: text
|
|
@@ -27,7 +26,6 @@ Thing:
|
|
|
27
26
|
alias:
|
|
28
27
|
label: Other name
|
|
29
28
|
type: name
|
|
30
|
-
rdf: http://www.w3.org/2004/02/skos/core#altLabel
|
|
31
29
|
previousName:
|
|
32
30
|
label: Previous name
|
|
33
31
|
type: name
|
|
@@ -28,11 +28,14 @@ UserAccount:
|
|
|
28
28
|
label: "Service"
|
|
29
29
|
type: string
|
|
30
30
|
email:
|
|
31
|
-
label:
|
|
31
|
+
label: E-Mail
|
|
32
32
|
type: email
|
|
33
|
-
|
|
34
|
-
|
|
33
|
+
description: "Email address"
|
|
34
|
+
phone:
|
|
35
|
+
label: Phone
|
|
35
36
|
type: phone
|
|
37
|
+
description: "Phone number"
|
|
38
|
+
maxLength: 32
|
|
36
39
|
username:
|
|
37
40
|
label: "Username"
|
|
38
41
|
type: string
|
followthemoney/schema.py
CHANGED
|
@@ -1,22 +1,12 @@
|
|
|
1
|
-
from typing import
|
|
2
|
-
|
|
3
|
-
Any,
|
|
4
|
-
Dict,
|
|
5
|
-
List,
|
|
6
|
-
Optional,
|
|
7
|
-
Set,
|
|
8
|
-
TypedDict,
|
|
9
|
-
Union,
|
|
10
|
-
cast,
|
|
11
|
-
)
|
|
1
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
2
|
+
from typing import Dict, List, Optional, Set, TypedDict, Union
|
|
12
3
|
from banal import ensure_list, ensure_dict, as_bool
|
|
13
|
-
from functools import
|
|
4
|
+
from functools import cache
|
|
14
5
|
|
|
15
6
|
from followthemoney.property import Property, PropertySpec, PropertyToDict, ReverseSpec
|
|
16
7
|
from followthemoney.types import registry
|
|
17
8
|
from followthemoney.exc import InvalidData, InvalidModel
|
|
18
|
-
from followthemoney.
|
|
19
|
-
from followthemoney.util import gettext
|
|
9
|
+
from followthemoney.util import gettext, const
|
|
20
10
|
|
|
21
11
|
if TYPE_CHECKING:
|
|
22
12
|
from followthemoney.model import Model
|
|
@@ -47,7 +37,6 @@ class SchemaSpec(TypedDict, total=False):
|
|
|
47
37
|
edge: EdgeSpec
|
|
48
38
|
temporalExtent: TemporalExtentSpec
|
|
49
39
|
description: Optional[str]
|
|
50
|
-
rdf: Optional[str]
|
|
51
40
|
abstract: bool
|
|
52
41
|
hidden: bool
|
|
53
42
|
generated: bool
|
|
@@ -90,7 +79,6 @@ class Schema:
|
|
|
90
79
|
"_plural",
|
|
91
80
|
"_description",
|
|
92
81
|
"_hash",
|
|
93
|
-
"uri",
|
|
94
82
|
"abstract",
|
|
95
83
|
"hidden",
|
|
96
84
|
"generated",
|
|
@@ -118,15 +106,12 @@ class Schema:
|
|
|
118
106
|
|
|
119
107
|
def __init__(self, model: "Model", name: str, data: SchemaSpec) -> None:
|
|
120
108
|
#: Machine-readable name of the schema, used for identification.
|
|
121
|
-
self.name = name
|
|
109
|
+
self.name = const(name)
|
|
122
110
|
self.model = model
|
|
123
111
|
self._label = data.get("label", name)
|
|
124
112
|
self._plural = data.get("plural", self.label)
|
|
125
113
|
self._description = data.get("description")
|
|
126
|
-
self._hash = hash("<Schema(%r)>" % name)
|
|
127
|
-
|
|
128
|
-
#: RDF identifier for this schema when it is transformed to a triple term.
|
|
129
|
-
self.uri = URIRef(cast(str, data.get("rdf", NS[name])))
|
|
114
|
+
self._hash = hash("<Schema(%r)>" % self.name)
|
|
130
115
|
|
|
131
116
|
#: Do not store or emit entities of this type, it is used only for
|
|
132
117
|
#: inheritance.
|
|
@@ -152,17 +137,17 @@ class Schema:
|
|
|
152
137
|
#: Mark a set of properties as important, i.e. they should be shown
|
|
153
138
|
#: first, or in an abridged view of the entity. In Aleph, these properties
|
|
154
139
|
#: are included in tabular entity listings.
|
|
155
|
-
self.featured =
|
|
140
|
+
self.featured = [const(f) for f in data.get("featured", [])]
|
|
156
141
|
|
|
157
142
|
#: Mark a set of properties as required. This is applied only when
|
|
158
143
|
#: an entity is created by the user - bulk created entities will
|
|
159
144
|
#: slip through even if it is technically invalid.
|
|
160
|
-
self.required =
|
|
145
|
+
self.required = [const(r) for r in data.get("required", [])]
|
|
161
146
|
|
|
162
147
|
#: Mark a set of properties to be used for the entity's caption.
|
|
163
148
|
#: They will be checked in order and the first existent value will
|
|
164
149
|
#: be used.
|
|
165
|
-
self.caption =
|
|
150
|
+
self.caption = [const(s) for s in data.get("caption", [])]
|
|
166
151
|
|
|
167
152
|
# A transform of the entity into an edge for its representation in
|
|
168
153
|
# the context of a property graph representation like Neo4J/Gephi.
|
|
@@ -173,7 +158,7 @@ class Schema:
|
|
|
173
158
|
#: Flag to indicate if this schema should be represented by an edge (rather than
|
|
174
159
|
#: a node) when the data is converted into a property graph.
|
|
175
160
|
self.edge: bool = self.edge_source is not None and self.edge_target is not None
|
|
176
|
-
self.edge_caption =
|
|
161
|
+
self.edge_caption = [const(p) for p in edge.get("caption", [])]
|
|
177
162
|
self._edge_label = edge.get("label", self._label)
|
|
178
163
|
|
|
179
164
|
#: Flag to indicate if the edge should be presented as directed to the user,
|
|
@@ -183,16 +168,16 @@ class Schema:
|
|
|
183
168
|
#: Specify which properties should be used to represent this schema in a
|
|
184
169
|
#: timeline.
|
|
185
170
|
temporal_extent = data.get("temporalExtent", {})
|
|
186
|
-
self._temporal_start =
|
|
187
|
-
self._temporal_end =
|
|
171
|
+
self._temporal_start = [const(s) for s in temporal_extent.get("start", [])]
|
|
172
|
+
self._temporal_end = [const(e) for e in temporal_extent.get("end", [])]
|
|
188
173
|
|
|
189
174
|
#: Direct parent schemata of this schema.
|
|
190
|
-
self._extends =
|
|
175
|
+
self._extends = [const(s) for s in data.get("extends", [])]
|
|
191
176
|
self.extends: Set["Schema"] = set()
|
|
192
177
|
|
|
193
178
|
#: All parents of this schema (including indirect parents and the schema
|
|
194
179
|
#: itself).
|
|
195
|
-
self.schemata = set([self])
|
|
180
|
+
self.schemata: Set[Schema] = set([self])
|
|
196
181
|
|
|
197
182
|
#: All names of :attr:`~schemata`.
|
|
198
183
|
self.names = set([self.name])
|
|
@@ -205,8 +190,8 @@ class Schema:
|
|
|
205
190
|
#: The full list of properties defined for the entity, including those
|
|
206
191
|
#: inherited from parent schemata.
|
|
207
192
|
self.properties: Dict[str, Property] = {}
|
|
208
|
-
for
|
|
209
|
-
self.properties[
|
|
193
|
+
for pname, prop in data.get("properties", {}).items():
|
|
194
|
+
self.properties[pname] = Property(self, pname, prop)
|
|
210
195
|
|
|
211
196
|
def generate(self, model: "Model") -> None:
|
|
212
197
|
"""While loading the schema, this function will validate and
|
|
@@ -317,12 +302,18 @@ class Schema:
|
|
|
317
302
|
|
|
318
303
|
@property
|
|
319
304
|
def source_prop(self) -> Optional[Property]:
|
|
320
|
-
"""The entity property to be used as an edge source
|
|
305
|
+
"""The entity property to be used as an edge source when the schema is
|
|
306
|
+
considered as a relationship."""
|
|
307
|
+
if self.edge_source is None:
|
|
308
|
+
return None
|
|
321
309
|
return self.get(self.edge_source)
|
|
322
310
|
|
|
323
311
|
@property
|
|
324
312
|
def target_prop(self) -> Optional[Property]:
|
|
325
|
-
"""The entity property to be used as an edge target
|
|
313
|
+
"""The entity property to be used as an edge target when the schema is transformed
|
|
314
|
+
into a relationship."""
|
|
315
|
+
if self.edge_target is None:
|
|
316
|
+
return None
|
|
326
317
|
return self.get(self.edge_target)
|
|
327
318
|
|
|
328
319
|
@property
|
|
@@ -391,12 +382,12 @@ class Schema:
|
|
|
391
382
|
self._matchable_schemata.add(schema)
|
|
392
383
|
return self._matchable_schemata
|
|
393
384
|
|
|
394
|
-
@
|
|
385
|
+
@cache
|
|
395
386
|
def can_match(self, other: "Schema") -> bool:
|
|
396
387
|
"""Check if an schema can match with another schema."""
|
|
397
388
|
return other in self.matchable_schemata
|
|
398
389
|
|
|
399
|
-
@
|
|
390
|
+
@cache
|
|
400
391
|
def is_a(self, other: Union[str, "Schema"]) -> bool:
|
|
401
392
|
"""Check if the schema or one of its parents is the same as the given
|
|
402
393
|
candidate ``other``."""
|
|
@@ -404,13 +395,13 @@ class Schema:
|
|
|
404
395
|
other = other.name
|
|
405
396
|
return other in self.names
|
|
406
397
|
|
|
407
|
-
def get(self, name:
|
|
398
|
+
def get(self, name: str) -> Optional[Property]:
|
|
408
399
|
"""Retrieve a property defined for this schema by its name."""
|
|
409
400
|
if name is None:
|
|
410
401
|
return None
|
|
411
402
|
return self.properties.get(name)
|
|
412
403
|
|
|
413
|
-
def validate(self, data: Any) -> Optional[str]:
|
|
404
|
+
def validate(self, data: Dict[str, Any]) -> Optional[str]:
|
|
414
405
|
"""Validate a dictionary against the given schema.
|
|
415
406
|
This will also drop keys which are not valid as properties.
|
|
416
407
|
"""
|
|
@@ -478,7 +469,7 @@ class Schema:
|
|
|
478
469
|
def __eq__(self, other: Any) -> bool:
|
|
479
470
|
"""Compare two schemata (via hash)."""
|
|
480
471
|
try:
|
|
481
|
-
return self._hash ==
|
|
472
|
+
return self._hash == other._hash # type: ignore
|
|
482
473
|
except AttributeError:
|
|
483
474
|
return False
|
|
484
475
|
|
|
@@ -486,10 +477,7 @@ class Schema:
|
|
|
486
477
|
return self.name.__lt__(other.name)
|
|
487
478
|
|
|
488
479
|
def __hash__(self) -> int:
|
|
489
|
-
|
|
490
|
-
return self._hash
|
|
491
|
-
except AttributeError:
|
|
492
|
-
return super().__hash__()
|
|
480
|
+
return self._hash
|
|
493
481
|
|
|
494
482
|
def __repr__(self) -> str:
|
|
495
483
|
return "<Schema(%r)>" % self.name
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from followthemoney.statement.statement import Statement, StatementDict
|
|
2
|
+
from followthemoney.statement.serialize import CSV, JSON, PACK, FORMATS
|
|
3
|
+
from followthemoney.statement.serialize import write_statements
|
|
4
|
+
from followthemoney.statement.serialize import read_statements, read_path_statements
|
|
5
|
+
from followthemoney.statement.entity import SE, StatementEntity
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"Statement",
|
|
9
|
+
"StatementDict",
|
|
10
|
+
"StatementEntity",
|
|
11
|
+
"SE",
|
|
12
|
+
"CSV",
|
|
13
|
+
"JSON",
|
|
14
|
+
"PACK",
|
|
15
|
+
"FORMATS",
|
|
16
|
+
"write_statements",
|
|
17
|
+
"read_statements",
|
|
18
|
+
"read_path_statements",
|
|
19
|
+
]
|