followthemoney 4.1.0__py3-none-any.whl → 4.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of followthemoney might be problematic. Click here for more details.

@@ -9,7 +9,7 @@ from followthemoney.statement import Statement, StatementEntity, SE
9
9
  from followthemoney.dataset import Dataset, DefaultDataset, DS
10
10
  from followthemoney.util import set_model_locale
11
11
 
12
- __version__ = "4.1.0"
12
+ __version__ = "4.1.1"
13
13
 
14
14
  # Data model singleton
15
15
  model = Model.instance()
followthemoney/entity.py CHANGED
@@ -4,7 +4,7 @@ from rigour.names import pick_name
4
4
 
5
5
  from followthemoney.proxy import EntityProxy
6
6
  from followthemoney.schema import Schema
7
- from followthemoney.statement.util import BASE_ID
7
+ from followthemoney.statement import BASE_ID, Statement
8
8
 
9
9
  VE = TypeVar("VE", bound="ValueEntity")
10
10
 
@@ -38,11 +38,14 @@ class ValueEntity(EntityProxy):
38
38
  # add data from statement dict if present.
39
39
  # this updates the dataset and referents set
40
40
  for stmt_data in data.pop("statements", []):
41
- self.datasets.add(stmt_data["dataset"])
42
- if stmt_data["entity_id"] != self.id:
43
- self.referents.add(stmt_data["entity_id"])
44
- if stmt_data["prop"] != BASE_ID:
45
- self.add(stmt_data["prop"], stmt_data["value"])
41
+ stmt = Statement.from_dict(stmt_data)
42
+ self.datasets.add(stmt.dataset)
43
+ if stmt.schema != self.schema.name:
44
+ self.schema = schema.model.common_schema(self.schema, stmt.schema)
45
+ if stmt.entity_id != self.id:
46
+ self.referents.add(stmt.entity_id)
47
+ if stmt.prop != BASE_ID:
48
+ self.add(stmt.prop, stmt.value)
46
49
 
47
50
  def merge(self: VE, other: EntityProxy) -> VE:
48
51
  merged = super().merge(other)
@@ -3,6 +3,7 @@ from followthemoney.statement.serialize import CSV, JSON, PACK, FORMATS
3
3
  from followthemoney.statement.serialize import write_statements
4
4
  from followthemoney.statement.serialize import read_statements, read_path_statements
5
5
  from followthemoney.statement.entity import SE, StatementEntity
6
+ from followthemoney.statement.util import BASE_ID
6
7
 
7
8
  __all__ = [
8
9
  "Statement",
@@ -13,6 +14,7 @@ __all__ = [
13
14
  "JSON",
14
15
  "PACK",
15
16
  "FORMATS",
17
+ "BASE_ID",
16
18
  "write_statements",
17
19
  "read_statements",
18
20
  "read_path_statements",
@@ -363,39 +363,49 @@ class StatementEntity(EntityProxy):
363
363
  self.extra_referents.update(other.extra_referents)
364
364
  return self
365
365
 
366
- def to_dict(self) -> Dict[str, Any]:
366
+ def to_context_dict(self) -> Dict[str, Any]:
367
+ """Return a dictionary representation of the entity for context."""
367
368
  data: Dict[str, Any] = {
368
369
  "id": self.id,
369
370
  "caption": self.caption,
370
371
  "schema": self.schema.name,
371
- "properties": self.properties,
372
- "referents": list(self.referents),
373
- "datasets": list(self.datasets),
374
372
  }
375
- if self.first_seen is not None:
376
- data["first_seen"] = self.first_seen
377
- if self.last_seen is not None:
378
- data["last_seen"] = self.last_seen
373
+ referents: Set[Optional[str]] = set(self.extra_referents)
374
+ datasets = set(self.datasets)
375
+ first_seen = None
376
+ last_seen = None
377
+ for stmts in self._statements.values():
378
+ for stmt in stmts:
379
+ if stmt.first_seen is not None:
380
+ if first_seen is None or stmt.first_seen < first_seen:
381
+ first_seen = stmt.first_seen
382
+ if stmt.last_seen is not None:
383
+ if last_seen is None or stmt.last_seen > last_seen:
384
+ last_seen = stmt.last_seen
385
+ if stmt.entity_id is not None and stmt.entity_id != self.id:
386
+ referents.add(stmt.entity_id)
387
+ datasets.add(stmt.dataset)
388
+
389
+ data["referents"] = list(referents)
390
+ data["datasets"] = list(datasets)
391
+
392
+ if first_seen is not None:
393
+ data["first_seen"] = first_seen
394
+ if last_seen is not None:
395
+ data["last_seen"] = last_seen
379
396
  if self.last_change is not None:
380
397
  data["last_change"] = self.last_change
381
398
  return data
382
399
 
400
+ def to_dict(self) -> Dict[str, Any]:
401
+ data = self.to_context_dict()
402
+ data["properties"] = self.properties
403
+ return data
404
+
383
405
  def to_statement_dict(self) -> Dict[str, Any]:
384
406
  """Return a dictionary representation of the entity's statements."""
385
- data: Dict[str, Any] = {
386
- "id": self.id,
387
- "caption": self.caption,
388
- "schema": self.schema.name,
389
- "statements": [stmt.to_dict() for stmt in self.statements],
390
- "referents": list(self.referents),
391
- "datasets": list(self.datasets),
392
- }
393
- if self.first_seen is not None:
394
- data["first_seen"] = self.first_seen
395
- if self.last_seen is not None:
396
- data["last_seen"] = self.last_seen
397
- if self.last_change is not None:
398
- data["last_change"] = self.last_change
407
+ data = self.to_context_dict()
408
+ data["statements"] = [stmt.to_dict() for stmt in self.statements]
399
409
  return data
400
410
 
401
411
  def __len__(self) -> int:
@@ -1,6 +1,6 @@
1
1
  import re
2
2
  from typing import Optional, TYPE_CHECKING
3
- from normality import slugify_text, collapse_spaces
3
+ from normality import slugify_text, squash_spaces
4
4
  from rigour.addresses import normalize_address
5
5
  from rigour.text.distance import levenshtein_similarity
6
6
 
@@ -37,8 +37,8 @@ class AddressType(PropertyType):
37
37
  """Basic clean-up."""
38
38
  address = self.LINE_BREAKS.sub(", ", text)
39
39
  address = self.COMMATA.sub(", ", address)
40
- collapsed = collapse_spaces(address)
41
- if collapsed is None:
40
+ collapsed = squash_spaces(address)
41
+ if len(collapsed) < 1:
42
42
  return None
43
43
  return collapsed
44
44
 
@@ -1,6 +1,6 @@
1
1
  from typing import TYPE_CHECKING, Optional, Sequence
2
2
  from normality import slugify_text
3
- from normality.cleaning import collapse_spaces, strip_quotes
3
+ from normality.cleaning import squash_spaces, strip_quotes
4
4
  from rigour.env import MAX_NAME_LENGTH
5
5
  from rigour.names import pick_name, tokenize_name
6
6
  from rigour.text.distance import levenshtein_similarity
@@ -40,7 +40,10 @@ class NameType(PropertyType):
40
40
  name = strip_quotes(text)
41
41
  if name is None:
42
42
  return None
43
- return collapse_spaces(name)
43
+ name = squash_spaces(name)
44
+ if len(name) == 0:
45
+ return None
46
+ return name
44
47
 
45
48
  def pick(self, values: Sequence[str]) -> Optional[str]:
46
49
  """From a set of names, pick the most plausible user-facing one."""
followthemoney/util.py CHANGED
@@ -74,10 +74,11 @@ def sanitize_text(value: Any, encoding: str = DEFAULT_ENCODING) -> Optional[str]
74
74
  log.warning("Cannot NFC text: %s", ex)
75
75
  return None
76
76
  text = remove_unsafe_chars(text)
77
- if text is None:
78
- return None
79
77
  byte_text = text.encode(DEFAULT_ENCODING, "replace")
80
- return byte_text.decode(DEFAULT_ENCODING, "replace")
78
+ text = byte_text.decode(DEFAULT_ENCODING, "replace")
79
+ if len(text) == 0:
80
+ return None
81
+ return text
81
82
 
82
83
 
83
84
  def key_bytes(key: Any) -> bytes:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: followthemoney
3
- Version: 4.1.0
3
+ Version: 4.1.1
4
4
  Summary: A data model for anti corruption data modeling and analysis.
5
5
  Project-URL: Documentation, https://followthemoney.tech/
6
6
  Project-URL: Repository, https://github.com/opensanctions/followthemoney.git
@@ -41,7 +41,7 @@ Requires-Dist: banal<1.1.0,>=1.0.6
41
41
  Requires-Dist: click<9.0.0,>=8.0
42
42
  Requires-Dist: countrynames<2.0.0,>=1.13.0
43
43
  Requires-Dist: networkx<3.5,>=2.5
44
- Requires-Dist: normality<4.0.0,>=3.0.0
44
+ Requires-Dist: normality<4.0.0,>=3.0.1
45
45
  Requires-Dist: openpyxl<4.0.0,>=3.0.5
46
46
  Requires-Dist: orjson<4.0,>=3.10.18
47
47
  Requires-Dist: phonenumbers<10.0.0,>=8.12.22
@@ -51,7 +51,7 @@ Requires-Dist: pytz>=2021.1
51
51
  Requires-Dist: pyyaml<7.0.0,>=5.0.0
52
52
  Requires-Dist: rdflib<7.2.0,>=6.2.0
53
53
  Requires-Dist: requests<3.0.0,>=2.21.0
54
- Requires-Dist: rigour<2.0.0,>=1.1.0
54
+ Requires-Dist: rigour<2.0.0,>=1.1.1
55
55
  Requires-Dist: sqlalchemy[mypy]<3.0.0,>=2.0.0
56
56
  Provides-Extra: dev
57
57
  Requires-Dist: build; extra == 'dev'
@@ -1,6 +1,6 @@
1
- followthemoney/__init__.py,sha256=XG3sMmmgBvV0kMOTtFP0JSqRQnlTSqtDEwtQ4MFxkSs,856
1
+ followthemoney/__init__.py,sha256=_drb_fsELoJ6qpcyjNSJbn8OVaBH-ZdeDQKaFJ1S_Qk,856
2
2
  followthemoney/compare.py,sha256=bZlnj2VMoe67q4Lyq_VwS1a-EJnEK1kC8prbs8jyL9E,5774
3
- followthemoney/entity.py,sha256=9wLKE3iFapxRQWOs_OAMzK3wtklf2HXaHaMYydIInWE,3045
3
+ followthemoney/entity.py,sha256=hHY9yysn_iFTtXqcHg4hHhYfmgLw4prWul8rD1X82Y0,3184
4
4
  followthemoney/exc.py,sha256=GyMgwY4QVm87hLevDfV7gM1MJsDqfNCi_UQw7F_A8X8,858
5
5
  followthemoney/graph.py,sha256=7X1CGHGvmktS2LSZqld2iXWzG7B831eCNYyBqamqEJ8,10921
6
6
  followthemoney/helpers.py,sha256=Btb6BlHg_c-qCXZo-NP_LURKG-qu-QD3Fj1ev_c7Xic,7956
@@ -13,7 +13,7 @@ followthemoney/property.py,sha256=RDTzTXJeeLFLptQL1_gr1S1T-vdDe-8MGMwsRaGQh0I,76
13
13
  followthemoney/proxy.py,sha256=LD4K1oPABXMX212UZxwLu7XOHRDyVBwTlqudTUsUZRQ,19619
14
14
  followthemoney/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  followthemoney/schema.py,sha256=WYnPE4Lego0pJHlojECEv0aO9Miw_YIvEb35HoDo4Zk,18087
16
- followthemoney/util.py,sha256=QeNZI0rJPI2KcK2JQ-ka3rk_IobMkFSVN0jk-JJhAWI,4391
16
+ followthemoney/util.py,sha256=LoCSp1iE6VwXjotCkBXFRppeQs55726GzOuNIu3CvRE,4409
17
17
  followthemoney/value.py,sha256=BJ4Sj5Tg2kMrslR6FjQUr96d8Kt75U7ny9NgzVGT0ZE,2335
18
18
  followthemoney/cli/__init__.py,sha256=0mmz84uhXRp2qUn3syKnDXofU3MMAAe291s7htqX0Bg,187
19
19
  followthemoney/cli/aggregate.py,sha256=xQTFpU3cVVj7fplpX4OJVrRlTVpn6b9kBr_Vb87pKfg,2164
@@ -112,8 +112,8 @@ followthemoney/schema/Vehicle.yaml,sha256=Ypl4A5HJFOZfZh3DK0ewN-hyJuCMcovR0mPNdd
112
112
  followthemoney/schema/Vessel.yaml,sha256=nFaUJ_0BzFJstvog1iDvwV9DHKHr9ky4DLb1NZGGh1E,1096
113
113
  followthemoney/schema/Video.yaml,sha256=LY3DYMWTHXiAhL0hxBCNCz50cp2sPbUlEhhig5Fbjos,327
114
114
  followthemoney/schema/Workbook.yaml,sha256=iikWPElz4klA7SkWH7eae6xqhbkMCIP_3zdeXzFEMU0,354
115
- followthemoney/statement/__init__.py,sha256=PvhLPhmQrezBKCe8rEwJlyTWlrnCzSfyfchVc8gXXEA,568
116
- followthemoney/statement/entity.py,sha256=92tOai7Yt5GZkOylZcy7866P0iLJsYEzmt-2T7WbXMg,15540
115
+ followthemoney/statement/__init__.py,sha256=7m2VUCAuqNZXIY0WFJRFkw5UG14QuxATL4f_xbqKwhw,633
116
+ followthemoney/statement/entity.py,sha256=gXvBTBwHL-GfmBXdw9HoP6WSbiwBOrTTOllQwkIObyQ,15986
117
117
  followthemoney/statement/serialize.py,sha256=9eXzQ1biR2mSxWRID5C7xDdku4b4ZImHeRJ53yLZ0yo,7225
118
118
  followthemoney/statement/statement.py,sha256=Ae-EYuzS8S12BkaRqrvMuI1C7YwlRKa5C_pTBELyNMM,8029
119
119
  followthemoney/statement/util.py,sha256=B-ozuRc1TWvpop52873Pqt5OPj8H6uk4KyRJLfAhr10,780
@@ -141,7 +141,7 @@ followthemoney/translations/ru/LC_MESSAGES/followthemoney.po,sha256=7SQWytOTvoAQ
141
141
  followthemoney/translations/tr/LC_MESSAGES/followthemoney.mo,sha256=SC84e_ZF_oFJG1NKdyZY_W6Kb6POORZB6wdeAcEWmnE,487
142
142
  followthemoney/translations/tr/LC_MESSAGES/followthemoney.po,sha256=AZC3marhtVVq8Ck1FOgnt4sbDMz548nX48O9GDwImbQ,89826
143
143
  followthemoney/types/__init__.py,sha256=rWwQeiuMh2BNIuvhpMfJ4bPADDvt9Axu1eedvNFi0qY,3350
144
- followthemoney/types/address.py,sha256=8qapsxfAPFml9MyAcRxN5m_YkmkZYUUJyBXAiAAYUoE,2133
144
+ followthemoney/types/address.py,sha256=nMFCj5QJyqA1ddpUmDLpRTum0nGXE-J70_WGnaLXnYo,2130
145
145
  followthemoney/types/checksum.py,sha256=zZrU8WX4CY3Vta_vOyfgDNzIwbmtje7AaDv3O1fBMnk,823
146
146
  followthemoney/types/common.py,sha256=4ks7zPT8rknrGSd4JFc1zRkS-TL4SX-25_ZbjcVDos0,10081
147
147
  followthemoney/types/country.py,sha256=mUCjwhUbA5Ef5HYuKb1KbH4aZ3MxaNwE1p77uOZMuG0,1745
@@ -154,14 +154,14 @@ followthemoney/types/ip.py,sha256=mMFTODFiXAJROCUYJvoLAShyIiTIWVmMBh5zT_GquYM,13
154
154
  followthemoney/types/json.py,sha256=V3qJD5RxJykNX51u3w1Nx9xqoNBnkulhzkJI9XMYKFo,1690
155
155
  followthemoney/types/language.py,sha256=SXgRRH-DyPmyyrqYurSyMiG6WHB8a0Gw81XxroEGD-c,2747
156
156
  followthemoney/types/mimetype.py,sha256=NdpqVLx3Bre_myYvnbjmdd5wZBf01tllrbhegjO8_m0,1263
157
- followthemoney/types/name.py,sha256=1CzwNRgbvNpE5C9hm3PbMmJILCbpTDey5l3eL--bkTQ,2317
157
+ followthemoney/types/name.py,sha256=ZWGDebv01qByh_yBYOVoS3Edlm3_JVPShQMklKc6ZOA,2384
158
158
  followthemoney/types/number.py,sha256=OdVuHDd4IYIIHhx_317JKeMjBAGtsJ2TAcxoZKZ4MkY,3948
159
159
  followthemoney/types/phone.py,sha256=r8uRqWinS0CYnYBTs405k5gO4jeatUDgjdzzijoMKJE,3811
160
160
  followthemoney/types/string.py,sha256=fqyTauAm4mNnNaoH-yH087RBbNh-G5ZZUO3awTGQUUg,1230
161
161
  followthemoney/types/topic.py,sha256=CS5IoI8gm4MSVxfV6K4mGd20_tT1SaKMkcOt_ObSsAg,3678
162
162
  followthemoney/types/url.py,sha256=QFpS_JIV8unFHuh_uGv22SWUUkocBoOpzLsAJWom_gI,1455
163
- followthemoney-4.1.0.dist-info/METADATA,sha256=aHjJGapyWo-mWOU29zQqjpGS2szD62G2D36wAraAYTE,6791
164
- followthemoney-4.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
165
- followthemoney-4.1.0.dist-info/entry_points.txt,sha256=caoFTlf213jhg5sz3TNSofutjUTzaKtWATuSIdd9Cps,653
166
- followthemoney-4.1.0.dist-info/licenses/LICENSE,sha256=H6_EVXisnJC0-18CjXIaqrBSFq_VH3OnS7u3dccOv6g,1148
167
- followthemoney-4.1.0.dist-info/RECORD,,
163
+ followthemoney-4.1.1.dist-info/METADATA,sha256=OatNsAWxjixfh_s-iGY046rcZRoCs9p-6rPLwB8VxTM,6791
164
+ followthemoney-4.1.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
165
+ followthemoney-4.1.1.dist-info/entry_points.txt,sha256=caoFTlf213jhg5sz3TNSofutjUTzaKtWATuSIdd9Cps,653
166
+ followthemoney-4.1.1.dist-info/licenses/LICENSE,sha256=H6_EVXisnJC0-18CjXIaqrBSFq_VH3OnS7u3dccOv6g,1148
167
+ followthemoney-4.1.1.dist-info/RECORD,,