followthemoney 3.8.5__py3-none-any.whl → 4.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- followthemoney/__init__.py +30 -10
- followthemoney/cli/cli.py +1 -1
- followthemoney/cli/exports.py +6 -2
- followthemoney/cli/statement.py +62 -0
- followthemoney/cli/util.py +2 -3
- followthemoney/compare.py +26 -16
- followthemoney/dataset/__init__.py +17 -0
- followthemoney/dataset/catalog.py +77 -0
- followthemoney/dataset/coverage.py +29 -0
- followthemoney/dataset/dataset.py +137 -0
- followthemoney/dataset/publisher.py +25 -0
- followthemoney/dataset/resource.py +30 -0
- followthemoney/dataset/util.py +58 -0
- followthemoney/entity.py +73 -0
- followthemoney/exc.py +6 -0
- followthemoney/export/rdf.py +57 -5
- followthemoney/graph.py +1 -2
- followthemoney/model.py +36 -9
- followthemoney/ontology.py +18 -16
- followthemoney/property.py +12 -15
- followthemoney/proxy.py +43 -64
- followthemoney/schema/Analyzable.yaml +2 -3
- followthemoney/schema/BankAccount.yaml +2 -3
- followthemoney/schema/Company.yaml +0 -6
- followthemoney/schema/Contract.yaml +0 -1
- followthemoney/schema/CryptoWallet.yaml +1 -1
- followthemoney/schema/Document.yaml +0 -6
- followthemoney/schema/Interval.yaml +7 -0
- followthemoney/schema/LegalEntity.yaml +6 -0
- followthemoney/schema/License.yaml +2 -0
- followthemoney/schema/Page.yaml +0 -1
- followthemoney/schema/Person.yaml +0 -5
- followthemoney/schema/Sanction.yaml +1 -0
- followthemoney/schema/Thing.yaml +0 -2
- followthemoney/schema/UserAccount.yaml +6 -3
- followthemoney/schema.py +27 -39
- followthemoney/statement/__init__.py +19 -0
- followthemoney/statement/entity.py +437 -0
- followthemoney/statement/serialize.py +245 -0
- followthemoney/statement/statement.py +256 -0
- followthemoney/statement/util.py +31 -0
- followthemoney/types/__init__.py +66 -23
- followthemoney/types/address.py +3 -3
- followthemoney/types/checksum.py +3 -7
- followthemoney/types/common.py +9 -14
- followthemoney/types/country.py +3 -7
- followthemoney/types/date.py +21 -11
- followthemoney/types/email.py +0 -4
- followthemoney/types/entity.py +5 -11
- followthemoney/types/gender.py +6 -10
- followthemoney/types/identifier.py +9 -3
- followthemoney/types/ip.py +5 -9
- followthemoney/types/json.py +2 -2
- followthemoney/types/language.py +3 -7
- followthemoney/types/mimetype.py +4 -8
- followthemoney/types/name.py +7 -8
- followthemoney/types/number.py +88 -6
- followthemoney/types/phone.py +4 -11
- followthemoney/types/string.py +4 -4
- followthemoney/types/topic.py +3 -7
- followthemoney/types/url.py +5 -10
- followthemoney/util.py +12 -13
- followthemoney/value.py +67 -0
- {followthemoney-3.8.5.dist-info → followthemoney-4.0.0.dist-info}/METADATA +23 -8
- {followthemoney-3.8.5.dist-info → followthemoney-4.0.0.dist-info}/RECORD +68 -59
- {followthemoney-3.8.5.dist-info → followthemoney-4.0.0.dist-info}/entry_points.txt +1 -0
- followthemoney/offshore.py +0 -48
- followthemoney/rdf.py +0 -9
- followthemoney/schema/Assessment.yaml +0 -32
- followthemoney/schema/Post.yaml +0 -42
- followthemoney/types/iban.py +0 -58
- followthemoney/types/registry.py +0 -52
- {followthemoney-3.8.5.dist-info → followthemoney-4.0.0.dist-info}/WHEEL +0 -0
- {followthemoney-3.8.5.dist-info → followthemoney-4.0.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -56,12 +56,6 @@ Company:
|
|
|
56
56
|
mbsCode:
|
|
57
57
|
label: "MBS"
|
|
58
58
|
type: identifier
|
|
59
|
-
ibcRuc:
|
|
60
|
-
# TODO: Remove this. It's a column name in the ICIJ-released OffshoreLeaks datasets
|
|
61
|
-
# but seems to just mean "company number".
|
|
62
|
-
deprecated: true
|
|
63
|
-
label: "ibcRUC"
|
|
64
|
-
type: identifier
|
|
65
59
|
caemCode:
|
|
66
60
|
label: "COD CAEM"
|
|
67
61
|
description: "(RO) What kind of activity a legal entity is allowed to develop"
|
|
@@ -30,12 +30,10 @@ Document:
|
|
|
30
30
|
title:
|
|
31
31
|
label: "Title"
|
|
32
32
|
type: string
|
|
33
|
-
rdf: http://purl.org/dc/elements/1.1/title
|
|
34
33
|
caption: true
|
|
35
34
|
author:
|
|
36
35
|
label: "Author"
|
|
37
36
|
description: "The original author, not the uploader"
|
|
38
|
-
rdf: http://purl.org/dc/elements/1.1/creator
|
|
39
37
|
generator:
|
|
40
38
|
label: "Generator"
|
|
41
39
|
description: "The program used to generate this file"
|
|
@@ -61,11 +59,9 @@ Document:
|
|
|
61
59
|
mimeType:
|
|
62
60
|
label: "MIME type"
|
|
63
61
|
type: mimetype
|
|
64
|
-
rdf: http://purl.org/dc/terms/format
|
|
65
62
|
language:
|
|
66
63
|
label: "Language"
|
|
67
64
|
type: language
|
|
68
|
-
rdf: http://purl.org/dc/terms/language
|
|
69
65
|
translatedLanguage:
|
|
70
66
|
label: "The language of the translated text"
|
|
71
67
|
hidden: true
|
|
@@ -78,7 +74,6 @@ Document:
|
|
|
78
74
|
label: "Date"
|
|
79
75
|
description: "If not otherwise specified"
|
|
80
76
|
type: date
|
|
81
|
-
rdf: http://purl.org/dc/elements/1.1/date
|
|
82
77
|
authoredAt:
|
|
83
78
|
label: "Authored on"
|
|
84
79
|
type: date
|
|
@@ -95,7 +90,6 @@ Document:
|
|
|
95
90
|
name: children
|
|
96
91
|
label: "Child documents"
|
|
97
92
|
hidden: true
|
|
98
|
-
rdf: http://purl.org/dc/terms/isPartOf
|
|
99
93
|
ancestors:
|
|
100
94
|
label: "Ancestors"
|
|
101
95
|
type: entity
|
|
@@ -42,6 +42,13 @@ Interval:
|
|
|
42
42
|
label: "Source link"
|
|
43
43
|
type: url
|
|
44
44
|
matchable: false
|
|
45
|
+
proof:
|
|
46
|
+
label: Source document
|
|
47
|
+
reverse:
|
|
48
|
+
name: provenIntervals
|
|
49
|
+
label: "Derived relationships"
|
|
50
|
+
type: entity
|
|
51
|
+
range: Document
|
|
45
52
|
publisher:
|
|
46
53
|
label: "Publishing source"
|
|
47
54
|
publisherUrl:
|
|
@@ -102,6 +102,12 @@ LegalEntity:
|
|
|
102
102
|
bvdId:
|
|
103
103
|
label: Bureau van Dijk ID
|
|
104
104
|
type: identifier
|
|
105
|
+
uscCode:
|
|
106
|
+
# cf. https://en.wikipedia.org/wiki/Unified_Social_Credit_Identifier
|
|
107
|
+
label: "USCC"
|
|
108
|
+
description: "Unified Social Credit Identifier"
|
|
109
|
+
type: identifier
|
|
110
|
+
format: uscc
|
|
105
111
|
icijId:
|
|
106
112
|
label: ICIJ ID
|
|
107
113
|
description: "ID according to International Consortium for Investigative Journalists"
|
followthemoney/schema/Page.yaml
CHANGED
|
@@ -6,7 +6,6 @@ Person:
|
|
|
6
6
|
description: >
|
|
7
7
|
A natural person, as opposed to a corporation of some type.
|
|
8
8
|
matchable: true
|
|
9
|
-
rdf: http://xmlns.com/foaf/0.1/Person
|
|
10
9
|
featured:
|
|
11
10
|
- name
|
|
12
11
|
- nationality
|
|
@@ -26,14 +25,12 @@ Person:
|
|
|
26
25
|
properties:
|
|
27
26
|
title:
|
|
28
27
|
label: Title
|
|
29
|
-
rdf: http://xmlns.com/foaf/0.1/title
|
|
30
28
|
# The `firstName`, `lastName`, `secondName` etc. properties intentionally do not use
|
|
31
29
|
# the `name` property type. Many FtM tools (including Aleph) use name properties to
|
|
32
30
|
# compare/match entities, but matching entites just on e.g. a first name would lead to
|
|
33
31
|
# too many false positives.
|
|
34
32
|
firstName:
|
|
35
33
|
label: First name
|
|
36
|
-
rdf: http://xmlns.com/foaf/0.1/givenName
|
|
37
34
|
secondName:
|
|
38
35
|
label: Second name
|
|
39
36
|
middleName:
|
|
@@ -44,13 +41,11 @@ Person:
|
|
|
44
41
|
label: Matronymic
|
|
45
42
|
lastName:
|
|
46
43
|
label: Last name
|
|
47
|
-
rdf: http://xmlns.com/foaf/0.1/lastName
|
|
48
44
|
nameSuffix:
|
|
49
45
|
label: Name suffix
|
|
50
46
|
birthDate:
|
|
51
47
|
label: Birth date
|
|
52
48
|
type: date
|
|
53
|
-
rdf: http://xmlns.com/foaf/0.1/birthday
|
|
54
49
|
birthPlace:
|
|
55
50
|
label: Place of birth
|
|
56
51
|
birthCountry:
|
followthemoney/schema/Thing.yaml
CHANGED
|
@@ -14,7 +14,6 @@ Thing:
|
|
|
14
14
|
name:
|
|
15
15
|
label: Name
|
|
16
16
|
type: name
|
|
17
|
-
rdf: http://www.w3.org/2004/02/skos/core#prefLabel
|
|
18
17
|
summary: # a short one-liner kind of description
|
|
19
18
|
label: Summary
|
|
20
19
|
type: text
|
|
@@ -27,7 +26,6 @@ Thing:
|
|
|
27
26
|
alias:
|
|
28
27
|
label: Other name
|
|
29
28
|
type: name
|
|
30
|
-
rdf: http://www.w3.org/2004/02/skos/core#altLabel
|
|
31
29
|
previousName:
|
|
32
30
|
label: Previous name
|
|
33
31
|
type: name
|
|
@@ -28,11 +28,14 @@ UserAccount:
|
|
|
28
28
|
label: "Service"
|
|
29
29
|
type: string
|
|
30
30
|
email:
|
|
31
|
-
label:
|
|
31
|
+
label: E-Mail
|
|
32
32
|
type: email
|
|
33
|
-
|
|
34
|
-
|
|
33
|
+
description: "Email address"
|
|
34
|
+
phone:
|
|
35
|
+
label: Phone
|
|
35
36
|
type: phone
|
|
37
|
+
description: "Phone number"
|
|
38
|
+
maxLength: 32
|
|
36
39
|
username:
|
|
37
40
|
label: "Username"
|
|
38
41
|
type: string
|
followthemoney/schema.py
CHANGED
|
@@ -1,22 +1,12 @@
|
|
|
1
|
-
from typing import
|
|
2
|
-
|
|
3
|
-
Any,
|
|
4
|
-
Dict,
|
|
5
|
-
List,
|
|
6
|
-
Optional,
|
|
7
|
-
Set,
|
|
8
|
-
TypedDict,
|
|
9
|
-
Union,
|
|
10
|
-
cast,
|
|
11
|
-
)
|
|
1
|
+
from typing import TYPE_CHECKING, Any, cast
|
|
2
|
+
from typing import Dict, List, Optional, Set, TypedDict, Union
|
|
12
3
|
from banal import ensure_list, ensure_dict, as_bool
|
|
13
4
|
from functools import lru_cache
|
|
14
5
|
|
|
15
6
|
from followthemoney.property import Property, PropertySpec, PropertyToDict, ReverseSpec
|
|
16
7
|
from followthemoney.types import registry
|
|
17
8
|
from followthemoney.exc import InvalidData, InvalidModel
|
|
18
|
-
from followthemoney.
|
|
19
|
-
from followthemoney.util import gettext
|
|
9
|
+
from followthemoney.util import gettext, const
|
|
20
10
|
|
|
21
11
|
if TYPE_CHECKING:
|
|
22
12
|
from followthemoney.model import Model
|
|
@@ -47,7 +37,6 @@ class SchemaSpec(TypedDict, total=False):
|
|
|
47
37
|
edge: EdgeSpec
|
|
48
38
|
temporalExtent: TemporalExtentSpec
|
|
49
39
|
description: Optional[str]
|
|
50
|
-
rdf: Optional[str]
|
|
51
40
|
abstract: bool
|
|
52
41
|
hidden: bool
|
|
53
42
|
generated: bool
|
|
@@ -90,7 +79,6 @@ class Schema:
|
|
|
90
79
|
"_plural",
|
|
91
80
|
"_description",
|
|
92
81
|
"_hash",
|
|
93
|
-
"uri",
|
|
94
82
|
"abstract",
|
|
95
83
|
"hidden",
|
|
96
84
|
"generated",
|
|
@@ -118,15 +106,12 @@ class Schema:
|
|
|
118
106
|
|
|
119
107
|
def __init__(self, model: "Model", name: str, data: SchemaSpec) -> None:
|
|
120
108
|
#: Machine-readable name of the schema, used for identification.
|
|
121
|
-
self.name = name
|
|
109
|
+
self.name = const(name)
|
|
122
110
|
self.model = model
|
|
123
111
|
self._label = data.get("label", name)
|
|
124
112
|
self._plural = data.get("plural", self.label)
|
|
125
113
|
self._description = data.get("description")
|
|
126
|
-
self._hash = hash("<Schema(%r)>" % name)
|
|
127
|
-
|
|
128
|
-
#: RDF identifier for this schema when it is transformed to a triple term.
|
|
129
|
-
self.uri = URIRef(cast(str, data.get("rdf", NS[name])))
|
|
114
|
+
self._hash = hash("<Schema(%r)>" % self.name)
|
|
130
115
|
|
|
131
116
|
#: Do not store or emit entities of this type, it is used only for
|
|
132
117
|
#: inheritance.
|
|
@@ -152,17 +137,17 @@ class Schema:
|
|
|
152
137
|
#: Mark a set of properties as important, i.e. they should be shown
|
|
153
138
|
#: first, or in an abridged view of the entity. In Aleph, these properties
|
|
154
139
|
#: are included in tabular entity listings.
|
|
155
|
-
self.featured =
|
|
140
|
+
self.featured = [const(f) for f in data.get("featured", [])]
|
|
156
141
|
|
|
157
142
|
#: Mark a set of properties as required. This is applied only when
|
|
158
143
|
#: an entity is created by the user - bulk created entities will
|
|
159
144
|
#: slip through even if it is technically invalid.
|
|
160
|
-
self.required =
|
|
145
|
+
self.required = [const(r) for r in data.get("required", [])]
|
|
161
146
|
|
|
162
147
|
#: Mark a set of properties to be used for the entity's caption.
|
|
163
148
|
#: They will be checked in order and the first existent value will
|
|
164
149
|
#: be used.
|
|
165
|
-
self.caption =
|
|
150
|
+
self.caption = [const(s) for s in data.get("caption", [])]
|
|
166
151
|
|
|
167
152
|
# A transform of the entity into an edge for its representation in
|
|
168
153
|
# the context of a property graph representation like Neo4J/Gephi.
|
|
@@ -173,7 +158,7 @@ class Schema:
|
|
|
173
158
|
#: Flag to indicate if this schema should be represented by an edge (rather than
|
|
174
159
|
#: a node) when the data is converted into a property graph.
|
|
175
160
|
self.edge: bool = self.edge_source is not None and self.edge_target is not None
|
|
176
|
-
self.edge_caption =
|
|
161
|
+
self.edge_caption = [const(p) for p in edge.get("caption", [])]
|
|
177
162
|
self._edge_label = edge.get("label", self._label)
|
|
178
163
|
|
|
179
164
|
#: Flag to indicate if the edge should be presented as directed to the user,
|
|
@@ -183,16 +168,16 @@ class Schema:
|
|
|
183
168
|
#: Specify which properties should be used to represent this schema in a
|
|
184
169
|
#: timeline.
|
|
185
170
|
temporal_extent = data.get("temporalExtent", {})
|
|
186
|
-
self._temporal_start =
|
|
187
|
-
self._temporal_end =
|
|
171
|
+
self._temporal_start = [const(s) for s in temporal_extent.get("start", [])]
|
|
172
|
+
self._temporal_end = [const(e) for e in temporal_extent.get("end", [])]
|
|
188
173
|
|
|
189
174
|
#: Direct parent schemata of this schema.
|
|
190
|
-
self._extends =
|
|
175
|
+
self._extends = [const(s) for s in data.get("extends", [])]
|
|
191
176
|
self.extends: Set["Schema"] = set()
|
|
192
177
|
|
|
193
178
|
#: All parents of this schema (including indirect parents and the schema
|
|
194
179
|
#: itself).
|
|
195
|
-
self.schemata = set([self])
|
|
180
|
+
self.schemata: Set[Schema] = set([self])
|
|
196
181
|
|
|
197
182
|
#: All names of :attr:`~schemata`.
|
|
198
183
|
self.names = set([self.name])
|
|
@@ -205,8 +190,8 @@ class Schema:
|
|
|
205
190
|
#: The full list of properties defined for the entity, including those
|
|
206
191
|
#: inherited from parent schemata.
|
|
207
192
|
self.properties: Dict[str, Property] = {}
|
|
208
|
-
for
|
|
209
|
-
self.properties[
|
|
193
|
+
for pname, prop in data.get("properties", {}).items():
|
|
194
|
+
self.properties[pname] = Property(self, pname, prop)
|
|
210
195
|
|
|
211
196
|
def generate(self, model: "Model") -> None:
|
|
212
197
|
"""While loading the schema, this function will validate and
|
|
@@ -317,12 +302,18 @@ class Schema:
|
|
|
317
302
|
|
|
318
303
|
@property
|
|
319
304
|
def source_prop(self) -> Optional[Property]:
|
|
320
|
-
"""The entity property to be used as an edge source
|
|
305
|
+
"""The entity property to be used as an edge source when the schema is
|
|
306
|
+
considered as a relationship."""
|
|
307
|
+
if self.edge_source is None:
|
|
308
|
+
return None
|
|
321
309
|
return self.get(self.edge_source)
|
|
322
310
|
|
|
323
311
|
@property
|
|
324
312
|
def target_prop(self) -> Optional[Property]:
|
|
325
|
-
"""The entity property to be used as an edge target
|
|
313
|
+
"""The entity property to be used as an edge target when the schema is transformed
|
|
314
|
+
into a relationship."""
|
|
315
|
+
if self.edge_target is None:
|
|
316
|
+
return None
|
|
326
317
|
return self.get(self.edge_target)
|
|
327
318
|
|
|
328
319
|
@property
|
|
@@ -404,13 +395,13 @@ class Schema:
|
|
|
404
395
|
other = other.name
|
|
405
396
|
return other in self.names
|
|
406
397
|
|
|
407
|
-
def get(self, name:
|
|
398
|
+
def get(self, name: str) -> Optional[Property]:
|
|
408
399
|
"""Retrieve a property defined for this schema by its name."""
|
|
409
400
|
if name is None:
|
|
410
401
|
return None
|
|
411
402
|
return self.properties.get(name)
|
|
412
403
|
|
|
413
|
-
def validate(self, data: Any) -> Optional[str]:
|
|
404
|
+
def validate(self, data: Dict[str, Any]) -> Optional[str]:
|
|
414
405
|
"""Validate a dictionary against the given schema.
|
|
415
406
|
This will also drop keys which are not valid as properties.
|
|
416
407
|
"""
|
|
@@ -478,7 +469,7 @@ class Schema:
|
|
|
478
469
|
def __eq__(self, other: Any) -> bool:
|
|
479
470
|
"""Compare two schemata (via hash)."""
|
|
480
471
|
try:
|
|
481
|
-
return self._hash ==
|
|
472
|
+
return self._hash == other._hash # type: ignore
|
|
482
473
|
except AttributeError:
|
|
483
474
|
return False
|
|
484
475
|
|
|
@@ -486,10 +477,7 @@ class Schema:
|
|
|
486
477
|
return self.name.__lt__(other.name)
|
|
487
478
|
|
|
488
479
|
def __hash__(self) -> int:
|
|
489
|
-
|
|
490
|
-
return self._hash
|
|
491
|
-
except AttributeError:
|
|
492
|
-
return super().__hash__()
|
|
480
|
+
return self._hash
|
|
493
481
|
|
|
494
482
|
def __repr__(self) -> str:
|
|
495
483
|
return "<Schema(%r)>" % self.name
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from followthemoney.statement.statement import Statement, StatementDict
|
|
2
|
+
from followthemoney.statement.serialize import CSV, JSON, PACK, FORMATS
|
|
3
|
+
from followthemoney.statement.serialize import write_statements
|
|
4
|
+
from followthemoney.statement.serialize import read_statements, read_path_statements
|
|
5
|
+
from followthemoney.statement.entity import SE, StatementEntity
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"Statement",
|
|
9
|
+
"StatementDict",
|
|
10
|
+
"StatementEntity",
|
|
11
|
+
"SE",
|
|
12
|
+
"CSV",
|
|
13
|
+
"JSON",
|
|
14
|
+
"PACK",
|
|
15
|
+
"FORMATS",
|
|
16
|
+
"write_statements",
|
|
17
|
+
"read_statements",
|
|
18
|
+
"read_path_statements",
|
|
19
|
+
]
|