followthemoney 4.5.1__py3-none-any.whl → 4.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,7 +9,7 @@ from followthemoney.statement import Statement, StatementEntity, SE
9
9
  from followthemoney.dataset import Dataset, UndefinedDataset, DS
10
10
  from followthemoney.util import set_model_locale
11
11
 
12
- __version__ = "4.5.1"
12
+ __version__ = "4.5.2"
13
13
 
14
14
  # Data model singleton
15
15
  model = Model.instance()
followthemoney/proxy.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import hashlib
2
2
  import logging
3
- from typing import TYPE_CHECKING, cast, Any
3
+ from typing import TYPE_CHECKING, Iterable, Any, cast
4
4
  from typing import Dict, Generator, List, Optional, Set, Tuple, Union, Type, TypeVar
5
5
  from itertools import product
6
6
  from banal import ensure_dict
@@ -117,11 +117,28 @@ class EntityProxy(object):
117
117
  an empty list instead of raising an error.
118
118
  :return: A list of values.
119
119
  """
120
+ if prop in self.schema.properties:
121
+ return self._properties.get(prop, []) # type: ignore
122
+
120
123
  prop_name = self._prop_name(prop, quiet=quiet)
121
124
  if prop_name is None:
122
125
  return []
123
126
  return self._properties.get(prop_name, [])
124
127
 
128
+ def get_prop(self, prop: Property) -> Iterable[str]:
129
+ """Get all values of a property, returning an empty list if the property
130
+ does not exist. This has better performance characteristics than `get()`
131
+ as it does not need to resolve the property name.
132
+
133
+ :param prop: can be given as a name or an instance of
134
+ :class:`~followthemoney.property.Property`.
135
+ :return: An iterable of values.
136
+ """
137
+ try:
138
+ return self._properties[prop.name]
139
+ except KeyError:
140
+ return []
141
+
125
142
  def first(self, prop: P, quiet: bool = False) -> Optional[str]:
126
143
  """Get only the first value set for the property.
127
144
 
@@ -311,10 +328,11 @@ class EntityProxy(object):
311
328
  combined = set()
312
329
  for prop_name, values in self._properties.items():
313
330
  prop = self.schema.properties[prop_name]
331
+ if prop.type is not type_:
332
+ continue
314
333
  if matchable and not prop.matchable:
315
334
  continue
316
- if prop.type == type_:
317
- combined.update(values)
335
+ combined.update(values)
318
336
  return list(combined)
319
337
 
320
338
  @property
@@ -184,6 +184,13 @@ class StatementEntity(EntityProxy):
184
184
  return []
185
185
  return list({s.value for s in self._statements[prop_name]})
186
186
 
187
+ def get_prop(self, prop: Property) -> Iterable[str]:
188
+ try:
189
+ statements = self._statements[prop.name]
190
+ return {s.value for s in statements}
191
+ except KeyError:
192
+ return []
193
+
187
194
  def get_statements(self, prop: P, quiet: bool = False) -> List[Statement]:
188
195
  prop_name = self._prop_name(prop, quiet=quiet)
189
196
  if prop_name is None or prop_name not in self._statements:
@@ -341,14 +348,16 @@ class StatementEntity(EntityProxy):
341
348
  def get_type_statements(
342
349
  self, type_: PropertyType, matchable: bool = False
343
350
  ) -> List[Statement]:
344
- combined = []
351
+ combined: List[Statement] = []
345
352
  for prop_name, statements in self._statements.items():
346
353
  prop = self.schema.properties[prop_name]
354
+ # Used in performance-critical code paths:
355
+ if prop.type is not type_:
356
+ continue
347
357
  if matchable and not prop.matchable:
348
358
  continue
349
- if prop.type == type_:
350
- for statement in statements:
351
- combined.append(statement)
359
+ for statement in statements:
360
+ combined.append(statement)
352
361
  return combined
353
362
 
354
363
  @property
@@ -1,5 +1,4 @@
1
1
  import csv
2
- import sys
3
2
  import click
4
3
  import orjson
5
4
  import logging
@@ -13,6 +12,7 @@ from rigour.env import ENCODING
13
12
 
14
13
  from followthemoney.statement.statement import Statement, StatementDict
15
14
  from followthemoney.statement.util import unpack_prop
15
+ from followthemoney.util import ENTITY_VALUE_MAX, PROP_VALUE_MAX
16
16
 
17
17
  log = logging.getLogger(__name__)
18
18
 
@@ -50,12 +50,12 @@ LEGACY_PACK_COLUMNS = [
50
50
  "first_seen",
51
51
  "last_seen",
52
52
  ]
53
- csv.field_size_limit(sys.maxsize)
53
+ csv.field_size_limit(PROP_VALUE_MAX)
54
54
 
55
55
 
56
56
  def read_json_statements(
57
57
  fh: BinaryIO,
58
- max_line: int = 40 * 1024 * 1024,
58
+ max_line: int = ENTITY_VALUE_MAX,
59
59
  ) -> Generator[Statement, None, None]:
60
60
  while line := fh.readline(max_line):
61
61
  data = orjson.loads(line)
@@ -1,7 +1,6 @@
1
1
  from inspect import cleandoc
2
2
  from itertools import product
3
3
  from babel.core import Locale
4
- from banal import ensure_list
5
4
  from normality import stringify
6
5
  from typing import Any, Dict, Optional, Sequence, Callable, TYPE_CHECKING, TypedDict
7
6
 
@@ -115,7 +114,6 @@ class PropertyType(object):
115
114
  """Helper function for converting multi-valued FtM data into formats that
116
115
  allow only a single value per field (e.g. CSV). This is not fully reversible
117
116
  and should be used as a last option."""
118
- values = ensure_list(values)
119
117
  return "; ".join(values)
120
118
 
121
119
  def _specificity(self, value: str) -> float:
@@ -154,7 +152,7 @@ class PropertyType(object):
154
152
  ) -> float:
155
153
  """Compare two sets of values and select the highest-scored result."""
156
154
  results = []
157
- for le, ri in product(ensure_list(left), ensure_list(right)):
155
+ for le, ri in product(left, right):
158
156
  results.append(self.compare(le, ri))
159
157
  if not len(results):
160
158
  return 0.0
@@ -1,6 +1,5 @@
1
1
  import json
2
2
  from typing import Any, Optional, Sequence, TYPE_CHECKING
3
- from banal import ensure_list
4
3
 
5
4
  from followthemoney.types.common import PropertyType
6
5
  from followthemoney.util import sanitize_text, defer as _
@@ -48,7 +47,7 @@ class JsonType(PropertyType):
48
47
 
49
48
  def join(self, values: Sequence[str]) -> str:
50
49
  """Turn multiple values into a JSON array."""
51
- values = [self.unpack(v) for v in ensure_list(values)]
50
+ values = [self.unpack(v) for v in values]
52
51
  data = self.pack(values)
53
52
  if data is None:
54
53
  return "[]"
@@ -1,6 +1,5 @@
1
1
  from followthemoney.types.common import PropertyType
2
- from followthemoney.util import const, defer as _
3
- from followthemoney.util import MEGABYTE
2
+ from followthemoney.util import PROP_VALUE_MAX, const, defer as _
4
3
 
5
4
 
6
5
  class StringType(PropertyType):
@@ -24,7 +23,7 @@ class TextType(StringType):
24
23
  name = "text"
25
24
  label = _("Text")
26
25
  plural = _("Texts")
27
- total_size = 30 * MEGABYTE
26
+ total_size = PROP_VALUE_MAX
28
27
  max_length = 65000
29
28
 
30
29
 
@@ -39,5 +38,5 @@ class HTMLType(StringType):
39
38
  name = const("html")
40
39
  label = _("HTML")
41
40
  plural = _("HTMLs")
42
- total_size = 30 * MEGABYTE
41
+ total_size = PROP_VALUE_MAX
43
42
  max_length = 65000
followthemoney/util.py CHANGED
@@ -14,6 +14,8 @@ from rigour.env import ENCODING
14
14
  from banal import is_mapping, unique_list, ensure_list
15
15
 
16
16
  MEGABYTE = 1024 * 1024
17
+ PROP_VALUE_MAX = 30 * MEGABYTE
18
+ ENTITY_VALUE_MAX = 50 * MEGABYTE
17
19
  HASH_ENCODING = "utf-8"
18
20
  DEFAULT_LOCALE = "en"
19
21
  ENTITY_ID_LEN = 200
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: followthemoney
3
- Version: 4.5.1
3
+ Version: 4.5.2
4
4
  Summary: A data model for anti corruption data modeling and analysis.
5
5
  Project-URL: Documentation, https://followthemoney.tech/
6
6
  Project-URL: Repository, https://github.com/opensanctions/followthemoney.git
@@ -1,4 +1,4 @@
1
- followthemoney/__init__.py,sha256=xxSC9m3THm5tFr_l8TY-RAEKb-ymcueZsmV4JVEjPyM,894
1
+ followthemoney/__init__.py,sha256=nl5VzBB-U4sYL6j-D7z6ndi6LcvdcZ-ErMyjWbZnFhk,894
2
2
  followthemoney/compare.py,sha256=6y6fqtbbfW7ee4_EVXPcKCIr75GjPqXvtHfvExJ-KK0,6119
3
3
  followthemoney/entity.py,sha256=biAjuuHlwsVT02imAsaWP0YtgdfU8skCntzBU3mgJpg,4052
4
4
  followthemoney/exc.py,sha256=GyMgwY4QVm87hLevDfV7gM1MJsDqfNCi_UQw7F_A8X8,858
@@ -10,11 +10,11 @@ followthemoney/names.py,sha256=LODQqExKEHdH4z6Mmbhlm0KeKRzGcptaSWzYXZ7lONI,1120
10
10
  followthemoney/namespace.py,sha256=utggu9IGA8bhgEYom3OUB1KxkAJR_TrMNbY5MUF_db8,4536
11
11
  followthemoney/ontology.py,sha256=WWY_PYQGl5Ket4zZBuZglzQxD2Bh9UqHok6GJNNX7GA,3001
12
12
  followthemoney/property.py,sha256=1w7p9aKLxRqFRnl3PlssqmvulSErl_0D5T2SefT3UFU,8675
13
- followthemoney/proxy.py,sha256=ZD8jK88oj1aUTpF7s6r91g8tmTiEfubUKX7CGV5I9rE,20714
13
+ followthemoney/proxy.py,sha256=nA8trRbMcPfbmoEwHQK8kdiEbZqOpTuwT_GfQkfZj8U,21415
14
14
  followthemoney/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  followthemoney/schema.py,sha256=dwZg0eZF7yaxP9fJ5NQUKWadWZYTo9U-sVzzXZn_6do,18500
16
16
  followthemoney/settings.py,sha256=wkwrkDPypsAICLo-d7l-zpKrKe_fTVYPllZO-RX3oGM,507
17
- followthemoney/util.py,sha256=JCZ35h_qrqxWO1FTxpa0eoOiko6DWTD9u1QKmH5ZaLU,4150
17
+ followthemoney/util.py,sha256=hkUe2IOCxgZEppANr1vGcMChC4t9RcudSDHuRH2D_Hg,4214
18
18
  followthemoney/value.py,sha256=BJ4Sj5Tg2kMrslR6FjQUr96d8Kt75U7ny9NgzVGT0ZE,2335
19
19
  followthemoney/cli/__init__.py,sha256=0mmz84uhXRp2qUn3syKnDXofU3MMAAe291s7htqX0Bg,187
20
20
  followthemoney/cli/aggregate.py,sha256=xQTFpU3cVVj7fplpX4OJVrRlTVpn6b9kBr_Vb87pKfg,2164
@@ -115,8 +115,8 @@ followthemoney/schema/Vessel.yaml,sha256=zWHUfSK8g6Pz58ZyCaK0AFJ4u_UHjEIUGC4c_7o
115
115
  followthemoney/schema/Video.yaml,sha256=LY3DYMWTHXiAhL0hxBCNCz50cp2sPbUlEhhig5Fbjos,327
116
116
  followthemoney/schema/Workbook.yaml,sha256=iikWPElz4klA7SkWH7eae6xqhbkMCIP_3zdeXzFEMU0,354
117
117
  followthemoney/statement/__init__.py,sha256=7m2VUCAuqNZXIY0WFJRFkw5UG14QuxATL4f_xbqKwhw,633
118
- followthemoney/statement/entity.py,sha256=jwT1OiM6NiGbCWTZIG4wi0a-ROGy-GN6Vpq9pyp5mbs,19759
119
- followthemoney/statement/serialize.py,sha256=TLpeCrpgFrQ6_AA3tcNxZhcOjlo_-c1MNfh3zE3idj4,7509
118
+ followthemoney/statement/entity.py,sha256=vpiHsg2uOXnyIDYHRfMr0TWoOvGSzbiEfO2qHx2txQ8,20072
119
+ followthemoney/statement/serialize.py,sha256=fbvTVSvBeeusXIXWw9-_3NZaoncDvEnet6my6diUj7k,7566
120
120
  followthemoney/statement/statement.py,sha256=bAwrrKyYRdJVxRGuXDQIOA7cdEngk1NKUaij_gAwSd4,11876
121
121
  followthemoney/statement/util.py,sha256=jHBwK3FIBynUJZRlBBOHayalAFrqpXf2f2JwkHi0zAU,1450
122
122
  followthemoney/translations/messages.pot,sha256=JhtY9NJ9wP_EAX4APxOqMyvKcX53oIC9kAxBsliJkf4,107703
@@ -145,7 +145,7 @@ followthemoney/translations/tr/LC_MESSAGES/followthemoney.po,sha256=AZC3marhtVVq
145
145
  followthemoney/types/__init__.py,sha256=rWwQeiuMh2BNIuvhpMfJ4bPADDvt9Axu1eedvNFi0qY,3350
146
146
  followthemoney/types/address.py,sha256=Gc-hqz00dRRkeANqkyPD2wtt7ksR9wMf4CX-U-5XvMo,2214
147
147
  followthemoney/types/checksum.py,sha256=_0ev2Wwtd4iX_bLz0Lu-xcJIxNfH_V9kBKKtuZhoAwg,802
148
- followthemoney/types/common.py,sha256=4ks7zPT8rknrGSd4JFc1zRkS-TL4SX-25_ZbjcVDos0,10081
148
+ followthemoney/types/common.py,sha256=d2z3E0z93SZRU6mxlQ-ShQdaNU9ilD0FvaE5MpJwxhE,9988
149
149
  followthemoney/types/country.py,sha256=pwDiI_ipts9Oi2U7fHALYMJPCJHOqyI_2Liq7XI2XrA,2086
150
150
  followthemoney/types/date.py,sha256=SGk8q8qICIrA5Lf6wPbDU6v6oJYEiu1dZFZ18BO7i80,3551
151
151
  followthemoney/types/email.py,sha256=L3RTYrMABlNQF7hCynXGfzoj6YNEHW5JAY_BwuhoZdA,3375
@@ -153,17 +153,17 @@ followthemoney/types/entity.py,sha256=56h6x8Ct7hWZIC3BjZHmRKGy9Ff2vuULNWH3xDRsKi
153
153
  followthemoney/types/gender.py,sha256=XY9us98Sk25O1xnHN-88tbv9pHy6Mn7SR8GRYi6v5gI,1683
154
154
  followthemoney/types/identifier.py,sha256=TYJwE7urjHFxEcDuiZMxGoCN6n34rAIdCt5_96Y7vI0,2198
155
155
  followthemoney/types/ip.py,sha256=rCXkRrh_jDeWAhswCgSe6Z4uhIW7yvLAxIEw4x1SM3A,1279
156
- followthemoney/types/json.py,sha256=Hefwns1-ziJf310MWvdfX5ICkOgj9cnnMJuqq1e6qKY,1676
156
+ followthemoney/types/json.py,sha256=8aKPnDXiNHAh-h96D1rxFaM_FyB6O_LVBptFn90RnvU,1633
157
157
  followthemoney/types/language.py,sha256=ymEXaHAPIZGlGySUqzadB7tMB6mW1ASsl1G6EtqKdls,2730
158
158
  followthemoney/types/mimetype.py,sha256=oqVP8EfGckPAI3WAziHomp6oUN7KXdIPWzGZPsRtIA8,1242
159
159
  followthemoney/types/name.py,sha256=zd0aC4VGp1SYUI8Rj0-ZXlrpUI7ZcnJIljZqsEsV-CY,2363
160
160
  followthemoney/types/number.py,sha256=vpAyhmc7UQlIm8h7Z5k8k4cTk37ykRF-AgYA1r_g1QQ,3934
161
161
  followthemoney/types/phone.py,sha256=_HanfxxTV7jp75gZO2evBc9HWwQTxEMQRaoVDcoXDIQ,3790
162
- followthemoney/types/string.py,sha256=SEh3xqQCnm377PGvwfR6ao85pHJCNeCUWBKnvccrJ7I,1216
162
+ followthemoney/types/string.py,sha256=1h92OUSgVVxu40N4BCiBZvX1VXFmvEgwtfICO92x9kc,1193
163
163
  followthemoney/types/topic.py,sha256=9FIH_WmwVOFg1CJRBF4KeE6vNTn-QQkzsKU5XaMqNJ0,4604
164
164
  followthemoney/types/url.py,sha256=sSHKtzvm4kc-VTvNCPIDykOG1hUoawhORj6Bklo0a2A,1434
165
- followthemoney-4.5.1.dist-info/METADATA,sha256=kRhvZB4Bvc1aVmGx2n2dyvYoOGhU16uUBqKf69C7bck,6747
166
- followthemoney-4.5.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
167
- followthemoney-4.5.1.dist-info/entry_points.txt,sha256=caoFTlf213jhg5sz3TNSofutjUTzaKtWATuSIdd9Cps,653
168
- followthemoney-4.5.1.dist-info/licenses/LICENSE,sha256=H6_EVXisnJC0-18CjXIaqrBSFq_VH3OnS7u3dccOv6g,1148
169
- followthemoney-4.5.1.dist-info/RECORD,,
165
+ followthemoney-4.5.2.dist-info/METADATA,sha256=Z568uQ_TOejHVvbnqmRZKHIIySLcx8bd58l6K3cTCMo,6747
166
+ followthemoney-4.5.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
167
+ followthemoney-4.5.2.dist-info/entry_points.txt,sha256=caoFTlf213jhg5sz3TNSofutjUTzaKtWATuSIdd9Cps,653
168
+ followthemoney-4.5.2.dist-info/licenses/LICENSE,sha256=H6_EVXisnJC0-18CjXIaqrBSFq_VH3OnS7u3dccOv6g,1148
169
+ followthemoney-4.5.2.dist-info/RECORD,,