followthemoney 4.5.1__py3-none-any.whl → 4.5.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- followthemoney/__init__.py +1 -1
- followthemoney/proxy.py +21 -3
- followthemoney/statement/entity.py +13 -4
- followthemoney/statement/serialize.py +3 -3
- followthemoney/types/common.py +1 -3
- followthemoney/types/json.py +1 -2
- followthemoney/types/string.py +3 -4
- followthemoney/util.py +2 -0
- {followthemoney-4.5.1.dist-info → followthemoney-4.5.2.dist-info}/METADATA +1 -1
- {followthemoney-4.5.1.dist-info → followthemoney-4.5.2.dist-info}/RECORD +13 -13
- {followthemoney-4.5.1.dist-info → followthemoney-4.5.2.dist-info}/WHEEL +0 -0
- {followthemoney-4.5.1.dist-info → followthemoney-4.5.2.dist-info}/entry_points.txt +0 -0
- {followthemoney-4.5.1.dist-info → followthemoney-4.5.2.dist-info}/licenses/LICENSE +0 -0
followthemoney/__init__.py
CHANGED
|
@@ -9,7 +9,7 @@ from followthemoney.statement import Statement, StatementEntity, SE
|
|
|
9
9
|
from followthemoney.dataset import Dataset, UndefinedDataset, DS
|
|
10
10
|
from followthemoney.util import set_model_locale
|
|
11
11
|
|
|
12
|
-
__version__ = "4.5.
|
|
12
|
+
__version__ = "4.5.2"
|
|
13
13
|
|
|
14
14
|
# Data model singleton
|
|
15
15
|
model = Model.instance()
|
followthemoney/proxy.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import hashlib
|
|
2
2
|
import logging
|
|
3
|
-
from typing import TYPE_CHECKING,
|
|
3
|
+
from typing import TYPE_CHECKING, Iterable, Any, cast
|
|
4
4
|
from typing import Dict, Generator, List, Optional, Set, Tuple, Union, Type, TypeVar
|
|
5
5
|
from itertools import product
|
|
6
6
|
from banal import ensure_dict
|
|
@@ -117,11 +117,28 @@ class EntityProxy(object):
|
|
|
117
117
|
an empty list instead of raising an error.
|
|
118
118
|
:return: A list of values.
|
|
119
119
|
"""
|
|
120
|
+
if prop in self.schema.properties:
|
|
121
|
+
return self._properties.get(prop, []) # type: ignore
|
|
122
|
+
|
|
120
123
|
prop_name = self._prop_name(prop, quiet=quiet)
|
|
121
124
|
if prop_name is None:
|
|
122
125
|
return []
|
|
123
126
|
return self._properties.get(prop_name, [])
|
|
124
127
|
|
|
128
|
+
def get_prop(self, prop: Property) -> Iterable[str]:
|
|
129
|
+
"""Get all values of a property, returning an empty list if the property
|
|
130
|
+
does not exist. This has better performance characteristics than `get()`
|
|
131
|
+
as it does not need to resolve the property name.
|
|
132
|
+
|
|
133
|
+
:param prop: can be given as a name or an instance of
|
|
134
|
+
:class:`~followthemoney.property.Property`.
|
|
135
|
+
:return: An iterable of values.
|
|
136
|
+
"""
|
|
137
|
+
try:
|
|
138
|
+
return self._properties[prop.name]
|
|
139
|
+
except KeyError:
|
|
140
|
+
return []
|
|
141
|
+
|
|
125
142
|
def first(self, prop: P, quiet: bool = False) -> Optional[str]:
|
|
126
143
|
"""Get only the first value set for the property.
|
|
127
144
|
|
|
@@ -311,10 +328,11 @@ class EntityProxy(object):
|
|
|
311
328
|
combined = set()
|
|
312
329
|
for prop_name, values in self._properties.items():
|
|
313
330
|
prop = self.schema.properties[prop_name]
|
|
331
|
+
if prop.type is not type_:
|
|
332
|
+
continue
|
|
314
333
|
if matchable and not prop.matchable:
|
|
315
334
|
continue
|
|
316
|
-
|
|
317
|
-
combined.update(values)
|
|
335
|
+
combined.update(values)
|
|
318
336
|
return list(combined)
|
|
319
337
|
|
|
320
338
|
@property
|
|
@@ -184,6 +184,13 @@ class StatementEntity(EntityProxy):
|
|
|
184
184
|
return []
|
|
185
185
|
return list({s.value for s in self._statements[prop_name]})
|
|
186
186
|
|
|
187
|
+
def get_prop(self, prop: Property) -> Iterable[str]:
|
|
188
|
+
try:
|
|
189
|
+
statements = self._statements[prop.name]
|
|
190
|
+
return {s.value for s in statements}
|
|
191
|
+
except KeyError:
|
|
192
|
+
return []
|
|
193
|
+
|
|
187
194
|
def get_statements(self, prop: P, quiet: bool = False) -> List[Statement]:
|
|
188
195
|
prop_name = self._prop_name(prop, quiet=quiet)
|
|
189
196
|
if prop_name is None or prop_name not in self._statements:
|
|
@@ -341,14 +348,16 @@ class StatementEntity(EntityProxy):
|
|
|
341
348
|
def get_type_statements(
|
|
342
349
|
self, type_: PropertyType, matchable: bool = False
|
|
343
350
|
) -> List[Statement]:
|
|
344
|
-
combined = []
|
|
351
|
+
combined: List[Statement] = []
|
|
345
352
|
for prop_name, statements in self._statements.items():
|
|
346
353
|
prop = self.schema.properties[prop_name]
|
|
354
|
+
# Used in performance-critical code paths:
|
|
355
|
+
if prop.type is not type_:
|
|
356
|
+
continue
|
|
347
357
|
if matchable and not prop.matchable:
|
|
348
358
|
continue
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
combined.append(statement)
|
|
359
|
+
for statement in statements:
|
|
360
|
+
combined.append(statement)
|
|
352
361
|
return combined
|
|
353
362
|
|
|
354
363
|
@property
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import csv
|
|
2
|
-
import sys
|
|
3
2
|
import click
|
|
4
3
|
import orjson
|
|
5
4
|
import logging
|
|
@@ -13,6 +12,7 @@ from rigour.env import ENCODING
|
|
|
13
12
|
|
|
14
13
|
from followthemoney.statement.statement import Statement, StatementDict
|
|
15
14
|
from followthemoney.statement.util import unpack_prop
|
|
15
|
+
from followthemoney.util import ENTITY_VALUE_MAX, PROP_VALUE_MAX
|
|
16
16
|
|
|
17
17
|
log = logging.getLogger(__name__)
|
|
18
18
|
|
|
@@ -50,12 +50,12 @@ LEGACY_PACK_COLUMNS = [
|
|
|
50
50
|
"first_seen",
|
|
51
51
|
"last_seen",
|
|
52
52
|
]
|
|
53
|
-
csv.field_size_limit(
|
|
53
|
+
csv.field_size_limit(PROP_VALUE_MAX)
|
|
54
54
|
|
|
55
55
|
|
|
56
56
|
def read_json_statements(
|
|
57
57
|
fh: BinaryIO,
|
|
58
|
-
max_line: int =
|
|
58
|
+
max_line: int = ENTITY_VALUE_MAX,
|
|
59
59
|
) -> Generator[Statement, None, None]:
|
|
60
60
|
while line := fh.readline(max_line):
|
|
61
61
|
data = orjson.loads(line)
|
followthemoney/types/common.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from inspect import cleandoc
|
|
2
2
|
from itertools import product
|
|
3
3
|
from babel.core import Locale
|
|
4
|
-
from banal import ensure_list
|
|
5
4
|
from normality import stringify
|
|
6
5
|
from typing import Any, Dict, Optional, Sequence, Callable, TYPE_CHECKING, TypedDict
|
|
7
6
|
|
|
@@ -115,7 +114,6 @@ class PropertyType(object):
|
|
|
115
114
|
"""Helper function for converting multi-valued FtM data into formats that
|
|
116
115
|
allow only a single value per field (e.g. CSV). This is not fully reversible
|
|
117
116
|
and should be used as a last option."""
|
|
118
|
-
values = ensure_list(values)
|
|
119
117
|
return "; ".join(values)
|
|
120
118
|
|
|
121
119
|
def _specificity(self, value: str) -> float:
|
|
@@ -154,7 +152,7 @@ class PropertyType(object):
|
|
|
154
152
|
) -> float:
|
|
155
153
|
"""Compare two sets of values and select the highest-scored result."""
|
|
156
154
|
results = []
|
|
157
|
-
for le, ri in product(
|
|
155
|
+
for le, ri in product(left, right):
|
|
158
156
|
results.append(self.compare(le, ri))
|
|
159
157
|
if not len(results):
|
|
160
158
|
return 0.0
|
followthemoney/types/json.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
2
|
from typing import Any, Optional, Sequence, TYPE_CHECKING
|
|
3
|
-
from banal import ensure_list
|
|
4
3
|
|
|
5
4
|
from followthemoney.types.common import PropertyType
|
|
6
5
|
from followthemoney.util import sanitize_text, defer as _
|
|
@@ -48,7 +47,7 @@ class JsonType(PropertyType):
|
|
|
48
47
|
|
|
49
48
|
def join(self, values: Sequence[str]) -> str:
|
|
50
49
|
"""Turn multiple values into a JSON array."""
|
|
51
|
-
values = [self.unpack(v) for v in
|
|
50
|
+
values = [self.unpack(v) for v in values]
|
|
52
51
|
data = self.pack(values)
|
|
53
52
|
if data is None:
|
|
54
53
|
return "[]"
|
followthemoney/types/string.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
from followthemoney.types.common import PropertyType
|
|
2
|
-
from followthemoney.util import const, defer as _
|
|
3
|
-
from followthemoney.util import MEGABYTE
|
|
2
|
+
from followthemoney.util import PROP_VALUE_MAX, const, defer as _
|
|
4
3
|
|
|
5
4
|
|
|
6
5
|
class StringType(PropertyType):
|
|
@@ -24,7 +23,7 @@ class TextType(StringType):
|
|
|
24
23
|
name = "text"
|
|
25
24
|
label = _("Text")
|
|
26
25
|
plural = _("Texts")
|
|
27
|
-
total_size =
|
|
26
|
+
total_size = PROP_VALUE_MAX
|
|
28
27
|
max_length = 65000
|
|
29
28
|
|
|
30
29
|
|
|
@@ -39,5 +38,5 @@ class HTMLType(StringType):
|
|
|
39
38
|
name = const("html")
|
|
40
39
|
label = _("HTML")
|
|
41
40
|
plural = _("HTMLs")
|
|
42
|
-
total_size =
|
|
41
|
+
total_size = PROP_VALUE_MAX
|
|
43
42
|
max_length = 65000
|
followthemoney/util.py
CHANGED
|
@@ -14,6 +14,8 @@ from rigour.env import ENCODING
|
|
|
14
14
|
from banal import is_mapping, unique_list, ensure_list
|
|
15
15
|
|
|
16
16
|
MEGABYTE = 1024 * 1024
|
|
17
|
+
PROP_VALUE_MAX = 30 * MEGABYTE
|
|
18
|
+
ENTITY_VALUE_MAX = 50 * MEGABYTE
|
|
17
19
|
HASH_ENCODING = "utf-8"
|
|
18
20
|
DEFAULT_LOCALE = "en"
|
|
19
21
|
ENTITY_ID_LEN = 200
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: followthemoney
|
|
3
|
-
Version: 4.5.
|
|
3
|
+
Version: 4.5.2
|
|
4
4
|
Summary: A data model for anti corruption data modeling and analysis.
|
|
5
5
|
Project-URL: Documentation, https://followthemoney.tech/
|
|
6
6
|
Project-URL: Repository, https://github.com/opensanctions/followthemoney.git
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
followthemoney/__init__.py,sha256=
|
|
1
|
+
followthemoney/__init__.py,sha256=nl5VzBB-U4sYL6j-D7z6ndi6LcvdcZ-ErMyjWbZnFhk,894
|
|
2
2
|
followthemoney/compare.py,sha256=6y6fqtbbfW7ee4_EVXPcKCIr75GjPqXvtHfvExJ-KK0,6119
|
|
3
3
|
followthemoney/entity.py,sha256=biAjuuHlwsVT02imAsaWP0YtgdfU8skCntzBU3mgJpg,4052
|
|
4
4
|
followthemoney/exc.py,sha256=GyMgwY4QVm87hLevDfV7gM1MJsDqfNCi_UQw7F_A8X8,858
|
|
@@ -10,11 +10,11 @@ followthemoney/names.py,sha256=LODQqExKEHdH4z6Mmbhlm0KeKRzGcptaSWzYXZ7lONI,1120
|
|
|
10
10
|
followthemoney/namespace.py,sha256=utggu9IGA8bhgEYom3OUB1KxkAJR_TrMNbY5MUF_db8,4536
|
|
11
11
|
followthemoney/ontology.py,sha256=WWY_PYQGl5Ket4zZBuZglzQxD2Bh9UqHok6GJNNX7GA,3001
|
|
12
12
|
followthemoney/property.py,sha256=1w7p9aKLxRqFRnl3PlssqmvulSErl_0D5T2SefT3UFU,8675
|
|
13
|
-
followthemoney/proxy.py,sha256=
|
|
13
|
+
followthemoney/proxy.py,sha256=nA8trRbMcPfbmoEwHQK8kdiEbZqOpTuwT_GfQkfZj8U,21415
|
|
14
14
|
followthemoney/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
15
|
followthemoney/schema.py,sha256=dwZg0eZF7yaxP9fJ5NQUKWadWZYTo9U-sVzzXZn_6do,18500
|
|
16
16
|
followthemoney/settings.py,sha256=wkwrkDPypsAICLo-d7l-zpKrKe_fTVYPllZO-RX3oGM,507
|
|
17
|
-
followthemoney/util.py,sha256=
|
|
17
|
+
followthemoney/util.py,sha256=hkUe2IOCxgZEppANr1vGcMChC4t9RcudSDHuRH2D_Hg,4214
|
|
18
18
|
followthemoney/value.py,sha256=BJ4Sj5Tg2kMrslR6FjQUr96d8Kt75U7ny9NgzVGT0ZE,2335
|
|
19
19
|
followthemoney/cli/__init__.py,sha256=0mmz84uhXRp2qUn3syKnDXofU3MMAAe291s7htqX0Bg,187
|
|
20
20
|
followthemoney/cli/aggregate.py,sha256=xQTFpU3cVVj7fplpX4OJVrRlTVpn6b9kBr_Vb87pKfg,2164
|
|
@@ -115,8 +115,8 @@ followthemoney/schema/Vessel.yaml,sha256=zWHUfSK8g6Pz58ZyCaK0AFJ4u_UHjEIUGC4c_7o
|
|
|
115
115
|
followthemoney/schema/Video.yaml,sha256=LY3DYMWTHXiAhL0hxBCNCz50cp2sPbUlEhhig5Fbjos,327
|
|
116
116
|
followthemoney/schema/Workbook.yaml,sha256=iikWPElz4klA7SkWH7eae6xqhbkMCIP_3zdeXzFEMU0,354
|
|
117
117
|
followthemoney/statement/__init__.py,sha256=7m2VUCAuqNZXIY0WFJRFkw5UG14QuxATL4f_xbqKwhw,633
|
|
118
|
-
followthemoney/statement/entity.py,sha256=
|
|
119
|
-
followthemoney/statement/serialize.py,sha256=
|
|
118
|
+
followthemoney/statement/entity.py,sha256=vpiHsg2uOXnyIDYHRfMr0TWoOvGSzbiEfO2qHx2txQ8,20072
|
|
119
|
+
followthemoney/statement/serialize.py,sha256=fbvTVSvBeeusXIXWw9-_3NZaoncDvEnet6my6diUj7k,7566
|
|
120
120
|
followthemoney/statement/statement.py,sha256=bAwrrKyYRdJVxRGuXDQIOA7cdEngk1NKUaij_gAwSd4,11876
|
|
121
121
|
followthemoney/statement/util.py,sha256=jHBwK3FIBynUJZRlBBOHayalAFrqpXf2f2JwkHi0zAU,1450
|
|
122
122
|
followthemoney/translations/messages.pot,sha256=JhtY9NJ9wP_EAX4APxOqMyvKcX53oIC9kAxBsliJkf4,107703
|
|
@@ -145,7 +145,7 @@ followthemoney/translations/tr/LC_MESSAGES/followthemoney.po,sha256=AZC3marhtVVq
|
|
|
145
145
|
followthemoney/types/__init__.py,sha256=rWwQeiuMh2BNIuvhpMfJ4bPADDvt9Axu1eedvNFi0qY,3350
|
|
146
146
|
followthemoney/types/address.py,sha256=Gc-hqz00dRRkeANqkyPD2wtt7ksR9wMf4CX-U-5XvMo,2214
|
|
147
147
|
followthemoney/types/checksum.py,sha256=_0ev2Wwtd4iX_bLz0Lu-xcJIxNfH_V9kBKKtuZhoAwg,802
|
|
148
|
-
followthemoney/types/common.py,sha256=
|
|
148
|
+
followthemoney/types/common.py,sha256=d2z3E0z93SZRU6mxlQ-ShQdaNU9ilD0FvaE5MpJwxhE,9988
|
|
149
149
|
followthemoney/types/country.py,sha256=pwDiI_ipts9Oi2U7fHALYMJPCJHOqyI_2Liq7XI2XrA,2086
|
|
150
150
|
followthemoney/types/date.py,sha256=SGk8q8qICIrA5Lf6wPbDU6v6oJYEiu1dZFZ18BO7i80,3551
|
|
151
151
|
followthemoney/types/email.py,sha256=L3RTYrMABlNQF7hCynXGfzoj6YNEHW5JAY_BwuhoZdA,3375
|
|
@@ -153,17 +153,17 @@ followthemoney/types/entity.py,sha256=56h6x8Ct7hWZIC3BjZHmRKGy9Ff2vuULNWH3xDRsKi
|
|
|
153
153
|
followthemoney/types/gender.py,sha256=XY9us98Sk25O1xnHN-88tbv9pHy6Mn7SR8GRYi6v5gI,1683
|
|
154
154
|
followthemoney/types/identifier.py,sha256=TYJwE7urjHFxEcDuiZMxGoCN6n34rAIdCt5_96Y7vI0,2198
|
|
155
155
|
followthemoney/types/ip.py,sha256=rCXkRrh_jDeWAhswCgSe6Z4uhIW7yvLAxIEw4x1SM3A,1279
|
|
156
|
-
followthemoney/types/json.py,sha256=
|
|
156
|
+
followthemoney/types/json.py,sha256=8aKPnDXiNHAh-h96D1rxFaM_FyB6O_LVBptFn90RnvU,1633
|
|
157
157
|
followthemoney/types/language.py,sha256=ymEXaHAPIZGlGySUqzadB7tMB6mW1ASsl1G6EtqKdls,2730
|
|
158
158
|
followthemoney/types/mimetype.py,sha256=oqVP8EfGckPAI3WAziHomp6oUN7KXdIPWzGZPsRtIA8,1242
|
|
159
159
|
followthemoney/types/name.py,sha256=zd0aC4VGp1SYUI8Rj0-ZXlrpUI7ZcnJIljZqsEsV-CY,2363
|
|
160
160
|
followthemoney/types/number.py,sha256=vpAyhmc7UQlIm8h7Z5k8k4cTk37ykRF-AgYA1r_g1QQ,3934
|
|
161
161
|
followthemoney/types/phone.py,sha256=_HanfxxTV7jp75gZO2evBc9HWwQTxEMQRaoVDcoXDIQ,3790
|
|
162
|
-
followthemoney/types/string.py,sha256=
|
|
162
|
+
followthemoney/types/string.py,sha256=1h92OUSgVVxu40N4BCiBZvX1VXFmvEgwtfICO92x9kc,1193
|
|
163
163
|
followthemoney/types/topic.py,sha256=9FIH_WmwVOFg1CJRBF4KeE6vNTn-QQkzsKU5XaMqNJ0,4604
|
|
164
164
|
followthemoney/types/url.py,sha256=sSHKtzvm4kc-VTvNCPIDykOG1hUoawhORj6Bklo0a2A,1434
|
|
165
|
-
followthemoney-4.5.
|
|
166
|
-
followthemoney-4.5.
|
|
167
|
-
followthemoney-4.5.
|
|
168
|
-
followthemoney-4.5.
|
|
169
|
-
followthemoney-4.5.
|
|
165
|
+
followthemoney-4.5.2.dist-info/METADATA,sha256=Z568uQ_TOejHVvbnqmRZKHIIySLcx8bd58l6K3cTCMo,6747
|
|
166
|
+
followthemoney-4.5.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
167
|
+
followthemoney-4.5.2.dist-info/entry_points.txt,sha256=caoFTlf213jhg5sz3TNSofutjUTzaKtWATuSIdd9Cps,653
|
|
168
|
+
followthemoney-4.5.2.dist-info/licenses/LICENSE,sha256=H6_EVXisnJC0-18CjXIaqrBSFq_VH3OnS7u3dccOv6g,1148
|
|
169
|
+
followthemoney-4.5.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|