followthemoney 1.3.7__py3-none-any.whl → 3.8.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- followthemoney/__init__.py +5 -3
- followthemoney/cli/__init__.py +17 -0
- followthemoney/cli/aggregate.py +56 -0
- followthemoney/cli/cli.py +88 -0
- followthemoney/cli/exports.py +121 -0
- followthemoney/cli/mapping.py +85 -0
- followthemoney/cli/sieve.py +67 -0
- followthemoney/cli/util.py +142 -0
- followthemoney/compare.py +130 -60
- followthemoney/exc.py +19 -6
- followthemoney/export/common.py +29 -0
- followthemoney/export/csv.py +82 -0
- followthemoney/export/excel.py +75 -0
- followthemoney/export/graph.py +79 -0
- followthemoney/export/neo4j.py +182 -0
- followthemoney/export/rdf.py +26 -0
- followthemoney/graph.py +308 -0
- followthemoney/helpers.py +212 -0
- followthemoney/mapping/__init__.py +1 -1
- followthemoney/mapping/csv.py +67 -35
- followthemoney/mapping/entity.py +116 -44
- followthemoney/mapping/property.py +90 -44
- followthemoney/mapping/query.py +27 -19
- followthemoney/mapping/source.py +15 -5
- followthemoney/mapping/sql.py +75 -61
- followthemoney/messages.py +13 -7
- followthemoney/model.py +108 -56
- followthemoney/namespace.py +119 -0
- followthemoney/offshore.py +48 -0
- followthemoney/ontology.py +77 -0
- followthemoney/property.py +204 -71
- followthemoney/proxy.py +455 -118
- followthemoney/rdf.py +9 -0
- followthemoney/schema/Address.yaml +78 -0
- followthemoney/schema/Airplane.yaml +17 -10
- followthemoney/schema/Analyzable.yaml +54 -0
- followthemoney/schema/Article.yaml +16 -0
- followthemoney/schema/Assessment.yaml +32 -0
- followthemoney/schema/Asset.yaml +10 -4
- followthemoney/schema/Associate.yaml +41 -0
- followthemoney/schema/Audio.yaml +24 -0
- followthemoney/schema/BankAccount.yaml +53 -9
- followthemoney/schema/Call.yaml +48 -0
- followthemoney/schema/CallForTenders.yaml +117 -0
- followthemoney/schema/Company.yaml +37 -12
- followthemoney/schema/Contract.yaml +41 -7
- followthemoney/schema/ContractAward.yaml +30 -11
- followthemoney/schema/CourtCase.yaml +16 -10
- followthemoney/schema/CourtCaseParty.yaml +17 -6
- followthemoney/schema/CryptoWallet.yaml +48 -0
- followthemoney/schema/Debt.yaml +37 -0
- followthemoney/schema/Directorship.yaml +17 -4
- followthemoney/schema/Document.yaml +72 -139
- followthemoney/schema/Documentation.yml +38 -0
- followthemoney/schema/EconomicActivity.yaml +32 -17
- followthemoney/schema/Email.yaml +76 -0
- followthemoney/schema/Employment.yaml +39 -0
- followthemoney/schema/Event.yaml +35 -3
- followthemoney/schema/Family.yaml +41 -0
- followthemoney/schema/Folder.yaml +13 -0
- followthemoney/schema/HyperText.yaml +21 -0
- followthemoney/schema/Identification.yaml +40 -0
- followthemoney/schema/Image.yaml +25 -0
- followthemoney/schema/Interest.yaml +3 -6
- followthemoney/schema/Interval.yaml +56 -5
- followthemoney/schema/LegalEntity.yaml +81 -20
- followthemoney/schema/License.yaml +7 -3
- followthemoney/schema/Membership.yaml +19 -4
- followthemoney/schema/Mention.yaml +54 -0
- followthemoney/schema/Message.yaml +78 -0
- followthemoney/schema/Note.yaml +23 -0
- followthemoney/schema/Occupancy.yaml +44 -0
- followthemoney/schema/Organization.yaml +38 -3
- followthemoney/schema/Ownership.yaml +16 -4
- followthemoney/schema/Package.yaml +17 -0
- followthemoney/schema/Page.yaml +43 -0
- followthemoney/schema/Pages.yaml +23 -0
- followthemoney/schema/Passport.yaml +16 -17
- followthemoney/schema/Payment.yaml +38 -7
- followthemoney/schema/Person.yaml +61 -5
- followthemoney/schema/PlainText.yaml +17 -0
- followthemoney/schema/Position.yaml +50 -0
- followthemoney/schema/Post.yaml +42 -0
- followthemoney/schema/Project.yaml +27 -0
- followthemoney/schema/ProjectParticipant.yaml +36 -0
- followthemoney/schema/PublicBody.yaml +14 -3
- followthemoney/schema/RealEstate.yaml +19 -3
- followthemoney/schema/Representation.yaml +17 -6
- followthemoney/schema/Sanction.yaml +45 -21
- followthemoney/schema/Security.yaml +59 -0
- followthemoney/schema/Similar.yaml +37 -0
- followthemoney/schema/Succession.yaml +36 -0
- followthemoney/schema/Table.yaml +32 -0
- followthemoney/schema/TaxRoll.yaml +27 -9
- followthemoney/schema/Thing.yaml +69 -13
- followthemoney/schema/Trip.yaml +42 -0
- followthemoney/schema/UnknownLink.yaml +17 -6
- followthemoney/schema/UserAccount.yaml +44 -0
- followthemoney/schema/Value.yaml +5 -1
- followthemoney/schema/Vehicle.yaml +25 -8
- followthemoney/schema/Vessel.yaml +18 -10
- followthemoney/schema/Video.yaml +20 -0
- followthemoney/schema/Workbook.yaml +18 -0
- followthemoney/schema.py +436 -135
- followthemoney/translations/ar/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/ar/LC_MESSAGES/followthemoney.po +2900 -787
- followthemoney/translations/bs/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/bs/LC_MESSAGES/followthemoney.po +2108 -520
- followthemoney/translations/de/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/de/LC_MESSAGES/followthemoney.po +2902 -782
- followthemoney/translations/es/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/es/LC_MESSAGES/followthemoney.po +2893 -779
- followthemoney/translations/fr/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/fr/LC_MESSAGES/followthemoney.po +4362 -0
- followthemoney/translations/fr/followthemoney.po +3861 -0
- followthemoney/translations/messages.pot +3021 -725
- followthemoney/translations/nb/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/nb/LC_MESSAGES/followthemoney.po +3778 -0
- followthemoney/translations/nl/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/nl/LC_MESSAGES/followthemoney.po +3837 -0
- followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.po +3784 -0
- followthemoney/translations/ru/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/ru/LC_MESSAGES/followthemoney.po +2837 -539
- followthemoney/translations/ru/followthemoney.po +4221 -0
- followthemoney/translations/tr/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/tr/LC_MESSAGES/followthemoney.po +2073 -491
- followthemoney/types/__init__.py +35 -17
- followthemoney/types/address.py +50 -21
- followthemoney/types/checksum.py +25 -0
- followthemoney/types/common.py +233 -88
- followthemoney/types/country.py +50 -56
- followthemoney/types/date.py +59 -76
- followthemoney/types/email.py +66 -35
- followthemoney/types/entity.py +66 -13
- followthemoney/types/gender.py +66 -0
- followthemoney/types/iban.py +47 -28
- followthemoney/types/identifier.py +49 -22
- followthemoney/types/ip.py +35 -21
- followthemoney/types/json.py +58 -0
- followthemoney/types/language.py +124 -37
- followthemoney/types/mimetype.py +44 -0
- followthemoney/types/name.py +56 -12
- followthemoney/types/number.py +30 -0
- followthemoney/types/phone.py +92 -34
- followthemoney/types/registry.py +52 -0
- followthemoney/types/string.py +43 -0
- followthemoney/types/topic.py +94 -0
- followthemoney/types/url.py +39 -17
- followthemoney/util.py +139 -45
- followthemoney-3.8.1.dist-info/METADATA +153 -0
- followthemoney-3.8.1.dist-info/RECORD +157 -0
- {followthemoney-1.3.7.dist-info → followthemoney-3.8.1.dist-info}/WHEEL +1 -2
- followthemoney-3.8.1.dist-info/entry_points.txt +17 -0
- followthemoney-1.3.7.dist-info/LICENSE.txt → followthemoney-3.8.1.dist-info/licenses/LICENSE +1 -1
- followthemoney/link.py +0 -75
- followthemoney/schema/Associate.yml +0 -19
- followthemoney/schema/Family.yml +0 -19
- followthemoney/schema/Land.yml +0 -9
- followthemoney/schema/Relationship.yaml +0 -26
- followthemoney/types/domain.py +0 -50
- followthemoney-1.3.7.dist-info/DESCRIPTION.rst +0 -3
- followthemoney-1.3.7.dist-info/METADATA +0 -39
- followthemoney-1.3.7.dist-info/RECORD +0 -108
- followthemoney-1.3.7.dist-info/entry_points.txt +0 -3
- followthemoney-1.3.7.dist-info/metadata.json +0 -1
- followthemoney-1.3.7.dist-info/namespace_packages.txt +0 -1
- followthemoney-1.3.7.dist-info/top_level.txt +0 -3
- ns/ontology.py +0 -128
- tests/types/test_addresses.py +0 -24
- tests/types/test_common.py +0 -32
- tests/types/test_countries.py +0 -27
- tests/types/test_dates.py +0 -73
- tests/types/test_domains.py +0 -23
- tests/types/test_emails.py +0 -32
- tests/types/test_entity.py +0 -19
- tests/types/test_iban.py +0 -109
- tests/types/test_identifiers.py +0 -27
- tests/types/test_ip.py +0 -29
- tests/types/test_languages.py +0 -23
- tests/types/test_names.py +0 -33
- tests/types/test_phones.py +0 -24
- tests/types/test_registry.py +0 -14
- tests/types/test_urls.py +0 -23
- {ns → followthemoney/export}/__init__.py +0 -0
- /tests/types/__init__.py → /followthemoney/py.typed +0 -0
followthemoney/proxy.py
CHANGED
|
@@ -1,171 +1,508 @@
|
|
|
1
|
-
|
|
2
|
-
from
|
|
3
|
-
|
|
1
|
+
import logging
|
|
2
|
+
from typing import (
|
|
3
|
+
TYPE_CHECKING,
|
|
4
|
+
Any,
|
|
5
|
+
Dict,
|
|
6
|
+
Generator,
|
|
7
|
+
List,
|
|
8
|
+
Optional,
|
|
9
|
+
Set,
|
|
10
|
+
Tuple,
|
|
11
|
+
Union,
|
|
12
|
+
Type,
|
|
13
|
+
TypeVar,
|
|
14
|
+
cast,
|
|
15
|
+
)
|
|
16
|
+
import warnings
|
|
17
|
+
from itertools import product
|
|
18
|
+
from banal import ensure_dict
|
|
4
19
|
|
|
5
20
|
from followthemoney.exc import InvalidData
|
|
6
21
|
from followthemoney.types import registry
|
|
22
|
+
from followthemoney.types.common import PropertyType
|
|
7
23
|
from followthemoney.property import Property
|
|
8
|
-
from followthemoney.
|
|
9
|
-
from followthemoney.util import
|
|
24
|
+
from followthemoney.rdf import SKOS, RDF, Literal, URIRef, Identifier
|
|
25
|
+
from followthemoney.util import sanitize_text, gettext
|
|
26
|
+
from followthemoney.util import merge_context, value_list, make_entity_id
|
|
27
|
+
|
|
28
|
+
if TYPE_CHECKING:
|
|
29
|
+
from followthemoney.model import Model
|
|
30
|
+
|
|
31
|
+
log = logging.getLogger(__name__)
|
|
32
|
+
P = Union[Property, str]
|
|
33
|
+
Triple = Tuple[Identifier, Identifier, Identifier]
|
|
34
|
+
E = TypeVar("E", bound="EntityProxy")
|
|
10
35
|
|
|
11
36
|
|
|
12
37
|
class EntityProxy(object):
|
|
13
38
|
"""A wrapper object for an entity, with utility functions for the
|
|
14
|
-
introspection and manipulation of its properties.
|
|
15
|
-
|
|
16
|
-
|
|
39
|
+
introspection and manipulation of its properties.
|
|
40
|
+
|
|
41
|
+
This is the main working object in the library, used to generate, validate
|
|
42
|
+
and emit data."""
|
|
43
|
+
|
|
44
|
+
__slots__ = ["schema", "id", "key_prefix", "context", "_properties", "_size"]
|
|
17
45
|
|
|
18
|
-
def __init__(
|
|
46
|
+
def __init__(
|
|
47
|
+
self,
|
|
48
|
+
model: "Model",
|
|
49
|
+
data: Dict[str, Any],
|
|
50
|
+
key_prefix: Optional[str] = None,
|
|
51
|
+
cleaned: bool = True,
|
|
52
|
+
):
|
|
53
|
+
data = dict(data or {})
|
|
54
|
+
properties = data.pop("properties", {})
|
|
55
|
+
if not cleaned:
|
|
56
|
+
properties = ensure_dict(properties)
|
|
57
|
+
|
|
58
|
+
#: The schema definition for this entity, which implies the properties
|
|
59
|
+
#: That can be set on it.
|
|
60
|
+
schema = model.get(data.pop("schema", None))
|
|
61
|
+
if schema is None:
|
|
62
|
+
raise InvalidData(gettext("No schema for entity."))
|
|
19
63
|
self.schema = schema
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
64
|
+
|
|
65
|
+
#: When using :meth:`~make_id` to generate a natural key for this entity,
|
|
66
|
+
#: the prefix will be added to the ID as a salt to make it easier to keep
|
|
67
|
+
#: IDs unique across datasets. This is somewhat redundant following the
|
|
68
|
+
#: introduction of :class:`~followthemoney.namespace.Namespace`.
|
|
69
|
+
self.key_prefix = key_prefix
|
|
70
|
+
|
|
71
|
+
#: A unique identifier for this entity, usually a hashed natural key,
|
|
72
|
+
#: a UUID, or a very simple slug. Can be signed using a
|
|
73
|
+
#: :class:`~followthemoney.namespace.Namespace`.
|
|
74
|
+
self.id = data.pop("id", None)
|
|
75
|
+
if not cleaned:
|
|
76
|
+
self.id = sanitize_text(self.id)
|
|
77
|
+
|
|
78
|
+
#: If the input dictionary for the entity proxy contains fields other
|
|
79
|
+
#: than ``id``, ``schema`` or ``properties``, they will be kept in here
|
|
80
|
+
#: and re-added upon serialization.
|
|
81
|
+
self.context = data
|
|
82
|
+
self._properties: Dict[str, List[str]] = {}
|
|
83
|
+
self._size = 0
|
|
84
|
+
|
|
85
|
+
for key, values in properties.items():
|
|
86
|
+
if key not in self.schema.properties:
|
|
87
|
+
continue
|
|
88
|
+
if cleaned:
|
|
89
|
+
# This does not call `self.add` as it might be called millions of times
|
|
90
|
+
# in some context and we want to avoid the performance overhead of
|
|
91
|
+
# doing so.
|
|
92
|
+
seen: Set[str] = set()
|
|
93
|
+
seen_add = seen.add
|
|
94
|
+
unique_values = [v for v in values if not (v in seen or seen_add(v))]
|
|
95
|
+
self._properties[key] = unique_values
|
|
96
|
+
self._size += sum([len(v) for v in unique_values])
|
|
97
|
+
else:
|
|
98
|
+
self.add(key, values, quiet=True)
|
|
99
|
+
|
|
100
|
+
def make_id(self, *parts: Any) -> Optional[str]:
|
|
101
|
+
"""Generate a (hopefully unique) ID for the given entity, composed
|
|
102
|
+
of the given components, and the :attr:`~key_prefix` defined in
|
|
103
|
+
the proxy.
|
|
104
|
+
"""
|
|
105
|
+
self.id = make_entity_id(*parts, key_prefix=self.key_prefix)
|
|
41
106
|
return self.id
|
|
42
107
|
|
|
43
|
-
def
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
108
|
+
def _prop_name(self, prop: P, quiet: bool = False) -> Optional[str]:
|
|
109
|
+
# This is pretty unwound because it gets called a *lot*.
|
|
110
|
+
if prop in self.schema.properties:
|
|
111
|
+
return cast(str, prop)
|
|
112
|
+
try:
|
|
113
|
+
obj = cast(Property, prop)
|
|
114
|
+
if obj.name in self.schema.properties:
|
|
115
|
+
return obj.name
|
|
116
|
+
except AttributeError:
|
|
117
|
+
pass
|
|
118
|
+
if quiet:
|
|
119
|
+
return None
|
|
120
|
+
msg = gettext("Unknown property (%s): %s")
|
|
121
|
+
raise InvalidData(msg % (self.schema, prop))
|
|
122
|
+
|
|
123
|
+
def get(self, prop: P, quiet: bool = False) -> List[str]:
|
|
124
|
+
"""Get all values of a property.
|
|
125
|
+
|
|
126
|
+
:param prop: can be given as a name or an instance of
|
|
127
|
+
:class:`~followthemoney.property.Property`.
|
|
128
|
+
:param quiet: a reference to an non-existent property will return
|
|
129
|
+
an empty list instead of raising an error.
|
|
130
|
+
:return: A list of values.
|
|
131
|
+
"""
|
|
132
|
+
prop_name = self._prop_name(prop, quiet=quiet)
|
|
133
|
+
if prop_name is None:
|
|
134
|
+
return []
|
|
135
|
+
return self._properties.get(prop_name, [])
|
|
136
|
+
|
|
137
|
+
def first(self, prop: P, quiet: bool = False) -> Optional[str]:
|
|
138
|
+
"""Get only the first value set for the property.
|
|
139
|
+
|
|
140
|
+
:param prop: can be given as a name or an instance of
|
|
141
|
+
:class:`~followthemoney.property.Property`.
|
|
142
|
+
:param quiet: a reference to an non-existent property will return
|
|
143
|
+
an empty list instead of raising an error.
|
|
144
|
+
:return: A value, or ``None``.
|
|
145
|
+
"""
|
|
146
|
+
for value in self.get(prop, quiet=quiet):
|
|
147
|
+
return value
|
|
148
|
+
return None
|
|
149
|
+
|
|
150
|
+
def has(self, prop: P, quiet: bool = False) -> bool:
|
|
151
|
+
"""Check to see if the given property has at least one value set.
|
|
152
|
+
|
|
153
|
+
:param prop: can be given as a name or an instance of
|
|
154
|
+
:class:`~followthemoney.property.Property`.
|
|
155
|
+
:param quiet: a reference to an non-existent property will return
|
|
156
|
+
an empty list instead of raising an error.
|
|
157
|
+
:return: a boolean.
|
|
158
|
+
"""
|
|
159
|
+
prop_name = self._prop_name(prop, quiet=quiet)
|
|
160
|
+
return prop_name in self._properties
|
|
161
|
+
|
|
162
|
+
def add(
|
|
163
|
+
self,
|
|
164
|
+
prop: P,
|
|
165
|
+
values: Any,
|
|
166
|
+
cleaned: bool = False,
|
|
167
|
+
quiet: bool = False,
|
|
168
|
+
fuzzy: bool = False,
|
|
169
|
+
format: Optional[str] = None,
|
|
170
|
+
) -> None:
|
|
171
|
+
"""Add the given value(s) to the property if they are valid for
|
|
172
|
+
the type of the property.
|
|
173
|
+
|
|
174
|
+
:param prop: can be given as a name or an instance of
|
|
175
|
+
:class:`~followthemoney.property.Property`.
|
|
176
|
+
:param values: either a single value, or a list of values to be added.
|
|
177
|
+
:param cleaned: should the data be normalised before adding it.
|
|
178
|
+
:param quiet: a reference to an non-existent property will return
|
|
179
|
+
an empty list instead of raising an error.
|
|
180
|
+
:param fuzzy: when normalising the data, should fuzzy matching be allowed.
|
|
181
|
+
:param format: when normalising the data, formatting for a date.
|
|
182
|
+
"""
|
|
183
|
+
prop_name = self._prop_name(prop, quiet=quiet)
|
|
184
|
+
if prop_name is None:
|
|
185
|
+
return None
|
|
186
|
+
prop = self.schema.properties[prop_name]
|
|
187
|
+
|
|
188
|
+
# Don't allow setting the reverse properties:
|
|
189
|
+
if prop.stub:
|
|
47
190
|
if quiet:
|
|
48
|
-
return
|
|
49
|
-
msg = gettext("
|
|
191
|
+
return None
|
|
192
|
+
msg = gettext("Stub property (%s): %s")
|
|
50
193
|
raise InvalidData(msg % (self.schema, prop))
|
|
51
|
-
return self.schema.get(prop)
|
|
52
194
|
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
195
|
+
for value in value_list(values):
|
|
196
|
+
if not cleaned:
|
|
197
|
+
format = format or prop.format
|
|
198
|
+
value = prop.type.clean(value, proxy=self, fuzzy=fuzzy, format=format)
|
|
199
|
+
self.unsafe_add(prop, value, cleaned=True)
|
|
200
|
+
return None
|
|
201
|
+
|
|
202
|
+
def unsafe_add(
|
|
203
|
+
self,
|
|
204
|
+
prop: Property,
|
|
205
|
+
value: Optional[str],
|
|
206
|
+
cleaned: bool = False,
|
|
207
|
+
fuzzy: bool = False,
|
|
208
|
+
format: Optional[str] = None,
|
|
209
|
+
) -> Optional[str]:
|
|
210
|
+
"""A version of `add()` to be used only in type-checking code. This accepts
|
|
211
|
+
only a single value, and performs input cleaning on the premise that the
|
|
212
|
+
value is already valid unicode. Returns the value that has been added."""
|
|
213
|
+
if not cleaned and value is not None:
|
|
214
|
+
format = format or prop.format
|
|
215
|
+
value = prop.type.clean_text(value, fuzzy=fuzzy, format=format, proxy=self)
|
|
216
|
+
|
|
217
|
+
if value is None:
|
|
218
|
+
return None
|
|
219
|
+
|
|
220
|
+
# Somewhat hacky: limit the maximum size of any particular
|
|
221
|
+
# field to avoid overloading upstream aleph/elasticsearch.
|
|
222
|
+
value_size = len(value)
|
|
223
|
+
if prop.type.total_size is not None:
|
|
224
|
+
if self._size + value_size > prop.type.total_size:
|
|
225
|
+
# msg = "[%s] too large. Rejecting additional values."
|
|
226
|
+
# log.warning(msg, prop.name)
|
|
227
|
+
return None
|
|
228
|
+
self._size += value_size
|
|
229
|
+
self._properties.setdefault(prop.name, list())
|
|
230
|
+
|
|
231
|
+
if value not in self._properties[prop.name]:
|
|
232
|
+
self._properties[prop.name].append(value)
|
|
233
|
+
|
|
234
|
+
return value
|
|
235
|
+
|
|
236
|
+
def set(
|
|
237
|
+
self,
|
|
238
|
+
prop: P,
|
|
239
|
+
values: Any,
|
|
240
|
+
cleaned: bool = False,
|
|
241
|
+
quiet: bool = False,
|
|
242
|
+
fuzzy: bool = False,
|
|
243
|
+
format: Optional[str] = None,
|
|
244
|
+
) -> None:
|
|
245
|
+
"""Replace the values of the property with the given value(s).
|
|
58
246
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
247
|
+
:param prop: can be given as a name or an instance of
|
|
248
|
+
:class:`~followthemoney.property.Property`.
|
|
249
|
+
:param values: either a single value, or a list of values to be added.
|
|
250
|
+
:param cleaned: should the data be normalised before adding it.
|
|
251
|
+
:param quiet: a reference to an non-existent property will return
|
|
252
|
+
an empty list instead of raising an error.
|
|
253
|
+
"""
|
|
254
|
+
prop_name = self._prop_name(prop, quiet=quiet)
|
|
255
|
+
if prop_name is None:
|
|
62
256
|
return
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
prop = self._get_prop(prop, quiet=quiet)
|
|
80
|
-
if prop is None:
|
|
257
|
+
self._properties.pop(prop_name, None)
|
|
258
|
+
return self.add(
|
|
259
|
+
prop, values, cleaned=cleaned, quiet=quiet, fuzzy=fuzzy, format=format
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
def pop(self, prop: P, quiet: bool = True) -> List[str]:
|
|
263
|
+
"""Remove all the values from the given property and return them.
|
|
264
|
+
|
|
265
|
+
:param prop: can be given as a name or an instance of
|
|
266
|
+
:class:`~followthemoney.property.Property`.
|
|
267
|
+
:param quiet: a reference to an non-existent property will return
|
|
268
|
+
an empty list instead of raising an error.
|
|
269
|
+
:return: a list of values, possibly empty.
|
|
270
|
+
"""
|
|
271
|
+
prop_name = self._prop_name(prop, quiet=quiet)
|
|
272
|
+
if prop_name is None or prop_name not in self._properties:
|
|
81
273
|
return []
|
|
82
|
-
return
|
|
274
|
+
return list(self._properties.pop(prop_name))
|
|
83
275
|
|
|
84
|
-
def
|
|
85
|
-
|
|
86
|
-
|
|
276
|
+
def remove(self, prop: P, value: str, quiet: bool = True) -> None:
|
|
277
|
+
"""Remove a single value from the given property. If it is not there,
|
|
278
|
+
no action takes place.
|
|
87
279
|
|
|
88
|
-
|
|
89
|
-
|
|
280
|
+
:param prop: can be given as a name or an instance of
|
|
281
|
+
:class:`~followthemoney.property.Property`.
|
|
282
|
+
:param value: will not be cleaned before checking.
|
|
283
|
+
:param quiet: a reference to an non-existent property will return
|
|
284
|
+
an empty list instead of raising an error.
|
|
285
|
+
"""
|
|
286
|
+
prop_name = self._prop_name(prop, quiet=quiet)
|
|
287
|
+
if prop_name is not None and prop_name in self._properties:
|
|
288
|
+
try:
|
|
289
|
+
self._properties[prop_name].remove(value)
|
|
290
|
+
except (KeyError, ValueError):
|
|
291
|
+
pass
|
|
292
|
+
|
|
293
|
+
def iterprops(self) -> List[Property]:
|
|
294
|
+
"""Iterate across all the properties for which a value is set in
|
|
295
|
+
the proxy (but do not return their values)."""
|
|
296
|
+
return [self.schema.properties[p] for p in self._properties.keys()]
|
|
297
|
+
|
|
298
|
+
def itervalues(self) -> Generator[Tuple[Property, str], None, None]:
|
|
299
|
+
"""Iterate across all values in the proxy one by one, each given as a
|
|
300
|
+
tuple of the property and the value."""
|
|
301
|
+
for name, values in self._properties.items():
|
|
302
|
+
prop = self.schema.properties[name]
|
|
90
303
|
for value in values:
|
|
91
304
|
yield (prop, value)
|
|
92
305
|
|
|
93
|
-
def
|
|
306
|
+
def edgepairs(self) -> Generator[Tuple[str, str], None, None]:
|
|
307
|
+
"""Return all the possible pairs of values for the edge source and target if
|
|
308
|
+
the schema allows for an edge representation of the entity."""
|
|
309
|
+
if self.schema.source_prop is not None and self.schema.target_prop is not None:
|
|
310
|
+
sources = self.get(self.schema.source_prop)
|
|
311
|
+
targets = self.get(self.schema.target_prop)
|
|
312
|
+
for source, target in product(sources, targets):
|
|
313
|
+
yield (source, target)
|
|
314
|
+
|
|
315
|
+
def get_type_values(
|
|
316
|
+
self, type_: PropertyType, matchable: bool = False
|
|
317
|
+
) -> List[str]:
|
|
318
|
+
"""All values of a particular type associated with a the entity. For
|
|
319
|
+
example, this lets you return all countries linked to an entity, rather
|
|
320
|
+
than manually checking each property to see if it contains countries.
|
|
321
|
+
|
|
322
|
+
:param type_: The type object to be searched.
|
|
323
|
+
:param matchable: Whether to return only property values marked as matchable.
|
|
324
|
+
"""
|
|
94
325
|
combined = set()
|
|
95
|
-
for
|
|
326
|
+
for prop_name, values in self._properties.items():
|
|
327
|
+
prop = self.schema.properties[prop_name]
|
|
328
|
+
if matchable and not prop.matchable:
|
|
329
|
+
continue
|
|
96
330
|
if prop.type == type_:
|
|
97
331
|
combined.update(values)
|
|
98
|
-
return
|
|
99
|
-
|
|
100
|
-
|
|
332
|
+
return list(combined)
|
|
333
|
+
|
|
334
|
+
@property
|
|
335
|
+
def names(self) -> List[str]:
|
|
336
|
+
"""Get the set of all name-type values set of the entity."""
|
|
337
|
+
return self.get_type_values(registry.name)
|
|
338
|
+
|
|
339
|
+
@property
|
|
340
|
+
def countries(self) -> List[str]:
|
|
341
|
+
"""Get the set of all country-type values set of the entity."""
|
|
342
|
+
return self.get_type_values(registry.country)
|
|
343
|
+
|
|
344
|
+
@property
|
|
345
|
+
def temporal_start(self) -> Optional[Tuple[Property, str]]:
|
|
346
|
+
"""Get a date that can be used to represent the start of the entity in a
|
|
347
|
+
timeline. If there are multiple possible dates, the earliest date is
|
|
348
|
+
returned."""
|
|
349
|
+
values = []
|
|
350
|
+
|
|
351
|
+
for prop in self.schema.temporal_start_props:
|
|
352
|
+
values += [(prop, value) for value in self.get(prop.name)]
|
|
101
353
|
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
354
|
+
values.sort(key=lambda tuple: tuple[1])
|
|
355
|
+
return next(iter(values), None)
|
|
356
|
+
|
|
357
|
+
@property
|
|
358
|
+
def temporal_end(self) -> Optional[Tuple[Property, str]]:
|
|
359
|
+
"""Get a date that can be used to represent the end of the entity in a timeline.
|
|
360
|
+
If therer are multiple possible dates, the latest date is returned."""
|
|
361
|
+
values = []
|
|
362
|
+
|
|
363
|
+
for prop in self.schema.temporal_end_props:
|
|
364
|
+
values += [(prop, value) for value in self.get(prop.name)]
|
|
365
|
+
|
|
366
|
+
values.sort(reverse=True, key=lambda tuple: tuple[1])
|
|
367
|
+
return next(iter(values), None)
|
|
368
|
+
|
|
369
|
+
def get_type_inverted(self, matchable: bool = False) -> Dict[str, List[str]]:
|
|
370
|
+
"""Return all the values of the entity arranged into a mapping with the
|
|
371
|
+
group name of their property type. These groups include ``countries``,
|
|
372
|
+
``addresses``, ``emails``, etc."""
|
|
373
|
+
data: Dict[str, List[str]] = {}
|
|
105
374
|
for group, type_ in registry.groups.items():
|
|
106
|
-
values = self.get_type_values(type_,
|
|
375
|
+
values = self.get_type_values(type_, matchable=matchable)
|
|
107
376
|
if len(values):
|
|
108
377
|
data[group] = values
|
|
109
378
|
return data
|
|
110
379
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
380
|
+
def triples(self, qualified: bool = True) -> Generator[Triple, None, None]:
|
|
381
|
+
"""Serialise the entity into a set of RDF triple statements. The
|
|
382
|
+
statements include the property values, an ``RDF#type`` definition
|
|
383
|
+
that refers to the entity schema, and a ``SKOS#prefLabel`` with the
|
|
384
|
+
entity caption."""
|
|
385
|
+
if self.id is None or self.schema is None:
|
|
386
|
+
return
|
|
387
|
+
uri = registry.entity.rdf(self.id)
|
|
388
|
+
yield (uri, RDF.type, self.schema.uri)
|
|
389
|
+
if qualified:
|
|
390
|
+
caption = self.caption
|
|
391
|
+
if caption != self.schema.label:
|
|
392
|
+
yield (uri, SKOS.prefLabel, Literal(caption))
|
|
114
393
|
for prop, value in self.itervalues():
|
|
115
|
-
|
|
394
|
+
value = prop.type.rdf(value)
|
|
395
|
+
if qualified:
|
|
396
|
+
yield (uri, prop.uri, value)
|
|
397
|
+
else:
|
|
398
|
+
yield (uri, URIRef(prop.name), value)
|
|
116
399
|
|
|
117
400
|
@property
|
|
118
|
-
def caption(self):
|
|
119
|
-
for
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
401
|
+
def caption(self) -> str:
|
|
402
|
+
"""The user-facing label to be used for this entity. This checks a list
|
|
403
|
+
of properties defined by the schema (caption) and returns the first
|
|
404
|
+
available value. If no caption is available, return the schema label."""
|
|
405
|
+
for prop in self.schema.caption:
|
|
406
|
+
for value in self.get(prop):
|
|
407
|
+
return value
|
|
408
|
+
return self.schema.label
|
|
123
409
|
|
|
124
410
|
@property
|
|
125
|
-
def
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
411
|
+
def country_hints(self) -> Set[str]:
|
|
412
|
+
"""Some property types, such as phone numbers and IBAN codes imply a
|
|
413
|
+
country that may be associated with the entity. This list can be used
|
|
414
|
+
for a more generous matching approach than the actual country values."""
|
|
415
|
+
countries = set(self.countries)
|
|
416
|
+
if not len(countries):
|
|
417
|
+
for prop, value in self.itervalues():
|
|
418
|
+
hint = prop.type.country_hint(value)
|
|
419
|
+
if hint is not None:
|
|
420
|
+
countries.add(hint)
|
|
421
|
+
return countries
|
|
422
|
+
|
|
423
|
+
@property
|
|
424
|
+
def properties(self) -> Dict[str, List[str]]:
|
|
425
|
+
"""Return a mapping of the properties and set values of the entity."""
|
|
426
|
+
return {p: list(vs) for p, vs in self._properties.items()}
|
|
427
|
+
|
|
428
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
429
|
+
"""Serialise the proxy into a dictionary with the defined properties, ID,
|
|
430
|
+
schema and any contextual values that were handed in initially. The resulting
|
|
431
|
+
dictionary can be used to make a new proxy, and it is commonly written to disk
|
|
432
|
+
or a database."""
|
|
433
|
+
data = dict(self.context)
|
|
434
|
+
extra = {
|
|
435
|
+
"id": self.id,
|
|
436
|
+
"schema": self.schema.name,
|
|
437
|
+
"properties": self.properties,
|
|
133
438
|
}
|
|
439
|
+
data.update(extra)
|
|
440
|
+
return data
|
|
134
441
|
|
|
135
|
-
def to_full_dict(self):
|
|
442
|
+
def to_full_dict(self, matchable: bool = False) -> Dict[str, Any]:
|
|
443
|
+
"""Return a serialised version of the entity with inverted type groups mixed
|
|
444
|
+
in. See :meth:`~get_type_inverted`."""
|
|
136
445
|
data = self.to_dict()
|
|
137
|
-
data
|
|
138
|
-
data.update(self.get_type_inverted())
|
|
446
|
+
data.update(self.get_type_inverted(matchable=matchable))
|
|
139
447
|
return data
|
|
140
448
|
|
|
141
|
-
def clone(self):
|
|
142
|
-
|
|
449
|
+
def clone(self: E) -> E:
|
|
450
|
+
"""Make a deep copy of the current entity proxy."""
|
|
451
|
+
return self.__class__.from_dict(self.schema.model, self.to_dict())
|
|
143
452
|
|
|
144
|
-
def merge(self, other):
|
|
453
|
+
def merge(self: E, other: E) -> E:
|
|
454
|
+
"""Merge another entity proxy into this one. This will try and find
|
|
455
|
+
the common schema between both entities and then add all property
|
|
456
|
+
values from the other entity into this one."""
|
|
145
457
|
model = self.schema.model
|
|
146
|
-
other = self.from_dict(model, other)
|
|
147
458
|
self.id = self.id or other.id
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
459
|
+
try:
|
|
460
|
+
self.schema = model.common_schema(self.schema, other.schema)
|
|
461
|
+
except InvalidData as e:
|
|
462
|
+
msg = "Cannot merge entities with id %s: %s"
|
|
463
|
+
raise InvalidData(msg % (self.id, e))
|
|
151
464
|
|
|
152
|
-
|
|
153
|
-
|
|
465
|
+
self.context = merge_context(self.context, other.context)
|
|
466
|
+
for prop, values in other._properties.items():
|
|
467
|
+
self.add(prop, values, cleaned=True, quiet=True)
|
|
468
|
+
return self
|
|
154
469
|
|
|
155
|
-
def __str__(self):
|
|
470
|
+
def __str__(self) -> str:
|
|
156
471
|
return self.caption
|
|
157
472
|
|
|
158
|
-
def
|
|
473
|
+
def __repr__(self) -> str:
|
|
474
|
+
return "<E(%r,%r)>" % (self.id, str(self))
|
|
475
|
+
|
|
476
|
+
def __len__(self) -> int:
|
|
477
|
+
return self._size
|
|
478
|
+
|
|
479
|
+
def __hash__(self) -> int:
|
|
480
|
+
if not self.id:
|
|
481
|
+
warnings.warn(
|
|
482
|
+
"Hashing an EntityProxy without an ID results in undefined behaviour",
|
|
483
|
+
RuntimeWarning,
|
|
484
|
+
)
|
|
159
485
|
return hash(self.id)
|
|
160
486
|
|
|
161
|
-
def __eq__(self, other):
|
|
162
|
-
|
|
487
|
+
def __eq__(self, other: Any) -> bool:
|
|
488
|
+
try:
|
|
489
|
+
if self.id is None or other.id is None:
|
|
490
|
+
warnings.warn(
|
|
491
|
+
"Comparing EntityProxys without IDs results in undefined behaviour",
|
|
492
|
+
RuntimeWarning,
|
|
493
|
+
)
|
|
494
|
+
return bool(self.id == other.id)
|
|
495
|
+
except AttributeError:
|
|
496
|
+
return False
|
|
163
497
|
|
|
164
498
|
@classmethod
|
|
165
|
-
def from_dict(
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
499
|
+
def from_dict(
|
|
500
|
+
cls: Type[E],
|
|
501
|
+
model: "Model",
|
|
502
|
+
data: Dict[str, Any],
|
|
503
|
+
cleaned: bool = True,
|
|
504
|
+
) -> E:
|
|
505
|
+
"""Instantiate a proxy based on the given model and serialised dictionary.
|
|
506
|
+
|
|
507
|
+
Use :meth:`followthemoney.model.Model.get_proxy` instead."""
|
|
508
|
+
return cls(model, data, cleaned=cleaned)
|
followthemoney/rdf.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# This module serves exclusively to mitigate the type checking clusterfuck
|
|
2
|
+
# that is rdflib 6.0.
|
|
3
|
+
from rdflib import Namespace
|
|
4
|
+
from rdflib.term import Identifier, URIRef, Literal
|
|
5
|
+
from rdflib import RDF, SKOS, XSD
|
|
6
|
+
|
|
7
|
+
NS = Namespace("https://schema.followthemoney.tech/#")
|
|
8
|
+
|
|
9
|
+
__all__ = ["NS", "XSD", "RDF", "SKOS", "Identifier", "URIRef", "Literal"]
|