followthemoney 4.1.0__py3-none-any.whl → 4.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- followthemoney/__init__.py +1 -1
- followthemoney/entity.py +9 -6
- followthemoney/statement/__init__.py +2 -0
- followthemoney/statement/entity.py +37 -22
- followthemoney/types/address.py +3 -3
- followthemoney/types/country.py +2 -8
- followthemoney/types/name.py +5 -2
- followthemoney/util.py +4 -3
- {followthemoney-4.1.0.dist-info → followthemoney-4.1.2.dist-info}/METADATA +3 -4
- {followthemoney-4.1.0.dist-info → followthemoney-4.1.2.dist-info}/RECORD +13 -13
- {followthemoney-4.1.0.dist-info → followthemoney-4.1.2.dist-info}/WHEEL +0 -0
- {followthemoney-4.1.0.dist-info → followthemoney-4.1.2.dist-info}/entry_points.txt +0 -0
- {followthemoney-4.1.0.dist-info → followthemoney-4.1.2.dist-info}/licenses/LICENSE +0 -0
followthemoney/__init__.py
CHANGED
|
@@ -9,7 +9,7 @@ from followthemoney.statement import Statement, StatementEntity, SE
|
|
|
9
9
|
from followthemoney.dataset import Dataset, DefaultDataset, DS
|
|
10
10
|
from followthemoney.util import set_model_locale
|
|
11
11
|
|
|
12
|
-
__version__ = "4.1.
|
|
12
|
+
__version__ = "4.1.2"
|
|
13
13
|
|
|
14
14
|
# Data model singleton
|
|
15
15
|
model = Model.instance()
|
followthemoney/entity.py
CHANGED
|
@@ -4,7 +4,7 @@ from rigour.names import pick_name
|
|
|
4
4
|
|
|
5
5
|
from followthemoney.proxy import EntityProxy
|
|
6
6
|
from followthemoney.schema import Schema
|
|
7
|
-
from followthemoney.statement
|
|
7
|
+
from followthemoney.statement import BASE_ID, Statement
|
|
8
8
|
|
|
9
9
|
VE = TypeVar("VE", bound="ValueEntity")
|
|
10
10
|
|
|
@@ -38,11 +38,14 @@ class ValueEntity(EntityProxy):
|
|
|
38
38
|
# add data from statement dict if present.
|
|
39
39
|
# this updates the dataset and referents set
|
|
40
40
|
for stmt_data in data.pop("statements", []):
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
41
|
+
stmt = Statement.from_dict(stmt_data)
|
|
42
|
+
self.datasets.add(stmt.dataset)
|
|
43
|
+
if stmt.schema != self.schema.name:
|
|
44
|
+
self.schema = schema.model.common_schema(self.schema, stmt.schema)
|
|
45
|
+
if stmt.entity_id != self.id:
|
|
46
|
+
self.referents.add(stmt.entity_id)
|
|
47
|
+
if stmt.prop != BASE_ID:
|
|
48
|
+
self.add(stmt.prop, stmt.value)
|
|
46
49
|
|
|
47
50
|
def merge(self: VE, other: EntityProxy) -> VE:
|
|
48
51
|
merged = super().merge(other)
|
|
@@ -3,6 +3,7 @@ from followthemoney.statement.serialize import CSV, JSON, PACK, FORMATS
|
|
|
3
3
|
from followthemoney.statement.serialize import write_statements
|
|
4
4
|
from followthemoney.statement.serialize import read_statements, read_path_statements
|
|
5
5
|
from followthemoney.statement.entity import SE, StatementEntity
|
|
6
|
+
from followthemoney.statement.util import BASE_ID
|
|
6
7
|
|
|
7
8
|
__all__ = [
|
|
8
9
|
"Statement",
|
|
@@ -13,6 +14,7 @@ __all__ = [
|
|
|
13
14
|
"JSON",
|
|
14
15
|
"PACK",
|
|
15
16
|
"FORMATS",
|
|
17
|
+
"BASE_ID",
|
|
16
18
|
"write_statements",
|
|
17
19
|
"read_statements",
|
|
18
20
|
"read_path_statements",
|
|
@@ -363,39 +363,54 @@ class StatementEntity(EntityProxy):
|
|
|
363
363
|
self.extra_referents.update(other.extra_referents)
|
|
364
364
|
return self
|
|
365
365
|
|
|
366
|
-
def
|
|
366
|
+
def to_context_dict(self) -> Dict[str, Any]:
|
|
367
|
+
"""Return a dictionary representation of the entity for context."""
|
|
367
368
|
data: Dict[str, Any] = {
|
|
368
369
|
"id": self.id,
|
|
369
370
|
"caption": self.caption,
|
|
370
371
|
"schema": self.schema.name,
|
|
371
|
-
"properties": self.properties,
|
|
372
|
-
"referents": list(self.referents),
|
|
373
|
-
"datasets": list(self.datasets),
|
|
374
372
|
}
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
373
|
+
referents: Set[Optional[str]] = set(self.extra_referents)
|
|
374
|
+
datasets = set(self.datasets)
|
|
375
|
+
origins: Set[str] = set()
|
|
376
|
+
first_seen = None
|
|
377
|
+
last_seen = None
|
|
378
|
+
for stmts in self._statements.values():
|
|
379
|
+
for stmt in stmts:
|
|
380
|
+
if stmt.first_seen is not None:
|
|
381
|
+
if first_seen is None or stmt.first_seen < first_seen:
|
|
382
|
+
first_seen = stmt.first_seen
|
|
383
|
+
if stmt.last_seen is not None:
|
|
384
|
+
if last_seen is None or stmt.last_seen > last_seen:
|
|
385
|
+
last_seen = stmt.last_seen
|
|
386
|
+
if stmt.entity_id is not None and stmt.entity_id != self.id:
|
|
387
|
+
referents.add(stmt.entity_id)
|
|
388
|
+
datasets.add(stmt.dataset)
|
|
389
|
+
if stmt.origin is not None:
|
|
390
|
+
origins.add(stmt.origin)
|
|
391
|
+
|
|
392
|
+
data["referents"] = list(referents)
|
|
393
|
+
data["datasets"] = list(datasets)
|
|
394
|
+
if origins:
|
|
395
|
+
data["origin"] = list(origins)
|
|
396
|
+
|
|
397
|
+
if first_seen is not None:
|
|
398
|
+
data["first_seen"] = first_seen
|
|
399
|
+
if last_seen is not None:
|
|
400
|
+
data["last_seen"] = last_seen
|
|
379
401
|
if self.last_change is not None:
|
|
380
402
|
data["last_change"] = self.last_change
|
|
381
403
|
return data
|
|
382
404
|
|
|
405
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
406
|
+
data = self.to_context_dict()
|
|
407
|
+
data["properties"] = self.properties
|
|
408
|
+
return data
|
|
409
|
+
|
|
383
410
|
def to_statement_dict(self) -> Dict[str, Any]:
|
|
384
411
|
"""Return a dictionary representation of the entity's statements."""
|
|
385
|
-
data
|
|
386
|
-
|
|
387
|
-
"caption": self.caption,
|
|
388
|
-
"schema": self.schema.name,
|
|
389
|
-
"statements": [stmt.to_dict() for stmt in self.statements],
|
|
390
|
-
"referents": list(self.referents),
|
|
391
|
-
"datasets": list(self.datasets),
|
|
392
|
-
}
|
|
393
|
-
if self.first_seen is not None:
|
|
394
|
-
data["first_seen"] = self.first_seen
|
|
395
|
-
if self.last_seen is not None:
|
|
396
|
-
data["last_seen"] = self.last_seen
|
|
397
|
-
if self.last_change is not None:
|
|
398
|
-
data["last_change"] = self.last_change
|
|
412
|
+
data = self.to_context_dict()
|
|
413
|
+
data["statements"] = [stmt.to_dict() for stmt in self.statements]
|
|
399
414
|
return data
|
|
400
415
|
|
|
401
416
|
def __len__(self) -> int:
|
followthemoney/types/address.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import re
|
|
2
2
|
from typing import Optional, TYPE_CHECKING
|
|
3
|
-
from normality import slugify_text,
|
|
3
|
+
from normality import slugify_text, squash_spaces
|
|
4
4
|
from rigour.addresses import normalize_address
|
|
5
5
|
from rigour.text.distance import levenshtein_similarity
|
|
6
6
|
|
|
@@ -37,8 +37,8 @@ class AddressType(PropertyType):
|
|
|
37
37
|
"""Basic clean-up."""
|
|
38
38
|
address = self.LINE_BREAKS.sub(", ", text)
|
|
39
39
|
address = self.COMMATA.sub(", ", address)
|
|
40
|
-
collapsed =
|
|
41
|
-
if collapsed
|
|
40
|
+
collapsed = squash_spaces(address)
|
|
41
|
+
if len(collapsed) < 1:
|
|
42
42
|
return None
|
|
43
43
|
return collapsed
|
|
44
44
|
|
followthemoney/types/country.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
|
-
import countrynames
|
|
2
1
|
from typing import Optional, TYPE_CHECKING
|
|
3
2
|
from babel.core import Locale
|
|
4
|
-
from rigour.territories import
|
|
3
|
+
from rigour.territories import get_ftm_countries, lookup_territory
|
|
5
4
|
|
|
6
5
|
from followthemoney.types.common import EnumType, EnumValues
|
|
7
6
|
from followthemoney.util import const, defer as _
|
|
@@ -37,16 +36,11 @@ class CountryType(EnumType):
|
|
|
37
36
|
|
|
38
37
|
The input may be a country code, a country name, etc.
|
|
39
38
|
"""
|
|
40
|
-
territory =
|
|
39
|
+
territory = lookup_territory(text, fuzzy=fuzzy)
|
|
41
40
|
if territory is not None:
|
|
42
41
|
ftm_country = territory.ftm_country
|
|
43
42
|
if ftm_country is not None:
|
|
44
43
|
return ftm_country
|
|
45
|
-
code = countrynames.to_code(text, fuzzy=fuzzy)
|
|
46
|
-
if code is not None:
|
|
47
|
-
territory = get_territory(code)
|
|
48
|
-
if territory is not None:
|
|
49
|
-
return territory.ftm_country
|
|
50
44
|
return None
|
|
51
45
|
|
|
52
46
|
def country_hint(self, value: str) -> str:
|
followthemoney/types/name.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from typing import TYPE_CHECKING, Optional, Sequence
|
|
2
2
|
from normality import slugify_text
|
|
3
|
-
from normality.cleaning import
|
|
3
|
+
from normality.cleaning import squash_spaces, strip_quotes
|
|
4
4
|
from rigour.env import MAX_NAME_LENGTH
|
|
5
5
|
from rigour.names import pick_name, tokenize_name
|
|
6
6
|
from rigour.text.distance import levenshtein_similarity
|
|
@@ -40,7 +40,10 @@ class NameType(PropertyType):
|
|
|
40
40
|
name = strip_quotes(text)
|
|
41
41
|
if name is None:
|
|
42
42
|
return None
|
|
43
|
-
|
|
43
|
+
name = squash_spaces(name)
|
|
44
|
+
if len(name) == 0:
|
|
45
|
+
return None
|
|
46
|
+
return name
|
|
44
47
|
|
|
45
48
|
def pick(self, values: Sequence[str]) -> Optional[str]:
|
|
46
49
|
"""From a set of names, pick the most plausible user-facing one."""
|
followthemoney/util.py
CHANGED
|
@@ -74,10 +74,11 @@ def sanitize_text(value: Any, encoding: str = DEFAULT_ENCODING) -> Optional[str]
|
|
|
74
74
|
log.warning("Cannot NFC text: %s", ex)
|
|
75
75
|
return None
|
|
76
76
|
text = remove_unsafe_chars(text)
|
|
77
|
-
if text is None:
|
|
78
|
-
return None
|
|
79
77
|
byte_text = text.encode(DEFAULT_ENCODING, "replace")
|
|
80
|
-
|
|
78
|
+
text = byte_text.decode(DEFAULT_ENCODING, "replace")
|
|
79
|
+
if len(text) == 0:
|
|
80
|
+
return None
|
|
81
|
+
return text
|
|
81
82
|
|
|
82
83
|
|
|
83
84
|
def key_bytes(key: Any) -> bytes:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: followthemoney
|
|
3
|
-
Version: 4.1.
|
|
3
|
+
Version: 4.1.2
|
|
4
4
|
Summary: A data model for anti corruption data modeling and analysis.
|
|
5
5
|
Project-URL: Documentation, https://followthemoney.tech/
|
|
6
6
|
Project-URL: Repository, https://github.com/opensanctions/followthemoney.git
|
|
@@ -39,9 +39,8 @@ Requires-Python: >=3.10
|
|
|
39
39
|
Requires-Dist: babel<3.0.0,>=2.14.0
|
|
40
40
|
Requires-Dist: banal<1.1.0,>=1.0.6
|
|
41
41
|
Requires-Dist: click<9.0.0,>=8.0
|
|
42
|
-
Requires-Dist: countrynames<2.0.0,>=1.13.0
|
|
43
42
|
Requires-Dist: networkx<3.5,>=2.5
|
|
44
|
-
Requires-Dist: normality<4.0.0,>=3.0.
|
|
43
|
+
Requires-Dist: normality<4.0.0,>=3.0.1
|
|
45
44
|
Requires-Dist: openpyxl<4.0.0,>=3.0.5
|
|
46
45
|
Requires-Dist: orjson<4.0,>=3.10.18
|
|
47
46
|
Requires-Dist: phonenumbers<10.0.0,>=8.12.22
|
|
@@ -51,7 +50,7 @@ Requires-Dist: pytz>=2021.1
|
|
|
51
50
|
Requires-Dist: pyyaml<7.0.0,>=5.0.0
|
|
52
51
|
Requires-Dist: rdflib<7.2.0,>=6.2.0
|
|
53
52
|
Requires-Dist: requests<3.0.0,>=2.21.0
|
|
54
|
-
Requires-Dist: rigour<2.0.0,>=1.
|
|
53
|
+
Requires-Dist: rigour<2.0.0,>=1.2.0
|
|
55
54
|
Requires-Dist: sqlalchemy[mypy]<3.0.0,>=2.0.0
|
|
56
55
|
Provides-Extra: dev
|
|
57
56
|
Requires-Dist: build; extra == 'dev'
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
followthemoney/__init__.py,sha256=
|
|
1
|
+
followthemoney/__init__.py,sha256=1G8YlnRJL9_8pMYDUPl29Njf33pRdmat6kmg2elXBbs,856
|
|
2
2
|
followthemoney/compare.py,sha256=bZlnj2VMoe67q4Lyq_VwS1a-EJnEK1kC8prbs8jyL9E,5774
|
|
3
|
-
followthemoney/entity.py,sha256=
|
|
3
|
+
followthemoney/entity.py,sha256=hHY9yysn_iFTtXqcHg4hHhYfmgLw4prWul8rD1X82Y0,3184
|
|
4
4
|
followthemoney/exc.py,sha256=GyMgwY4QVm87hLevDfV7gM1MJsDqfNCi_UQw7F_A8X8,858
|
|
5
5
|
followthemoney/graph.py,sha256=7X1CGHGvmktS2LSZqld2iXWzG7B831eCNYyBqamqEJ8,10921
|
|
6
6
|
followthemoney/helpers.py,sha256=Btb6BlHg_c-qCXZo-NP_LURKG-qu-QD3Fj1ev_c7Xic,7956
|
|
@@ -13,7 +13,7 @@ followthemoney/property.py,sha256=RDTzTXJeeLFLptQL1_gr1S1T-vdDe-8MGMwsRaGQh0I,76
|
|
|
13
13
|
followthemoney/proxy.py,sha256=LD4K1oPABXMX212UZxwLu7XOHRDyVBwTlqudTUsUZRQ,19619
|
|
14
14
|
followthemoney/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
15
|
followthemoney/schema.py,sha256=WYnPE4Lego0pJHlojECEv0aO9Miw_YIvEb35HoDo4Zk,18087
|
|
16
|
-
followthemoney/util.py,sha256=
|
|
16
|
+
followthemoney/util.py,sha256=LoCSp1iE6VwXjotCkBXFRppeQs55726GzOuNIu3CvRE,4409
|
|
17
17
|
followthemoney/value.py,sha256=BJ4Sj5Tg2kMrslR6FjQUr96d8Kt75U7ny9NgzVGT0ZE,2335
|
|
18
18
|
followthemoney/cli/__init__.py,sha256=0mmz84uhXRp2qUn3syKnDXofU3MMAAe291s7htqX0Bg,187
|
|
19
19
|
followthemoney/cli/aggregate.py,sha256=xQTFpU3cVVj7fplpX4OJVrRlTVpn6b9kBr_Vb87pKfg,2164
|
|
@@ -112,8 +112,8 @@ followthemoney/schema/Vehicle.yaml,sha256=Ypl4A5HJFOZfZh3DK0ewN-hyJuCMcovR0mPNdd
|
|
|
112
112
|
followthemoney/schema/Vessel.yaml,sha256=nFaUJ_0BzFJstvog1iDvwV9DHKHr9ky4DLb1NZGGh1E,1096
|
|
113
113
|
followthemoney/schema/Video.yaml,sha256=LY3DYMWTHXiAhL0hxBCNCz50cp2sPbUlEhhig5Fbjos,327
|
|
114
114
|
followthemoney/schema/Workbook.yaml,sha256=iikWPElz4klA7SkWH7eae6xqhbkMCIP_3zdeXzFEMU0,354
|
|
115
|
-
followthemoney/statement/__init__.py,sha256=
|
|
116
|
-
followthemoney/statement/entity.py,sha256=
|
|
115
|
+
followthemoney/statement/__init__.py,sha256=7m2VUCAuqNZXIY0WFJRFkw5UG14QuxATL4f_xbqKwhw,633
|
|
116
|
+
followthemoney/statement/entity.py,sha256=MKHGmFeDwcW2lTbAeKSdU53YwDuoLm5iKy4roeb8_lo,16172
|
|
117
117
|
followthemoney/statement/serialize.py,sha256=9eXzQ1biR2mSxWRID5C7xDdku4b4ZImHeRJ53yLZ0yo,7225
|
|
118
118
|
followthemoney/statement/statement.py,sha256=Ae-EYuzS8S12BkaRqrvMuI1C7YwlRKa5C_pTBELyNMM,8029
|
|
119
119
|
followthemoney/statement/util.py,sha256=B-ozuRc1TWvpop52873Pqt5OPj8H6uk4KyRJLfAhr10,780
|
|
@@ -141,10 +141,10 @@ followthemoney/translations/ru/LC_MESSAGES/followthemoney.po,sha256=7SQWytOTvoAQ
|
|
|
141
141
|
followthemoney/translations/tr/LC_MESSAGES/followthemoney.mo,sha256=SC84e_ZF_oFJG1NKdyZY_W6Kb6POORZB6wdeAcEWmnE,487
|
|
142
142
|
followthemoney/translations/tr/LC_MESSAGES/followthemoney.po,sha256=AZC3marhtVVq8Ck1FOgnt4sbDMz548nX48O9GDwImbQ,89826
|
|
143
143
|
followthemoney/types/__init__.py,sha256=rWwQeiuMh2BNIuvhpMfJ4bPADDvt9Axu1eedvNFi0qY,3350
|
|
144
|
-
followthemoney/types/address.py,sha256=
|
|
144
|
+
followthemoney/types/address.py,sha256=nMFCj5QJyqA1ddpUmDLpRTum0nGXE-J70_WGnaLXnYo,2130
|
|
145
145
|
followthemoney/types/checksum.py,sha256=zZrU8WX4CY3Vta_vOyfgDNzIwbmtje7AaDv3O1fBMnk,823
|
|
146
146
|
followthemoney/types/common.py,sha256=4ks7zPT8rknrGSd4JFc1zRkS-TL4SX-25_ZbjcVDos0,10081
|
|
147
|
-
followthemoney/types/country.py,sha256=
|
|
147
|
+
followthemoney/types/country.py,sha256=n8vihijDVud_3Ra-as4Ize0jf_HbcdKVR5YX3TlKZy0,1533
|
|
148
148
|
followthemoney/types/date.py,sha256=PjcaEyW6CBzf0-gHWKUsKjWIaD3AVBEl0zLSRQOVXxc,3105
|
|
149
149
|
followthemoney/types/email.py,sha256=L3RTYrMABlNQF7hCynXGfzoj6YNEHW5JAY_BwuhoZdA,3375
|
|
150
150
|
followthemoney/types/entity.py,sha256=oDxVEhuxyU1ScpOpebPpUm3o0I9j_p7Qrq-t5yNpluQ,2338
|
|
@@ -154,14 +154,14 @@ followthemoney/types/ip.py,sha256=mMFTODFiXAJROCUYJvoLAShyIiTIWVmMBh5zT_GquYM,13
|
|
|
154
154
|
followthemoney/types/json.py,sha256=V3qJD5RxJykNX51u3w1Nx9xqoNBnkulhzkJI9XMYKFo,1690
|
|
155
155
|
followthemoney/types/language.py,sha256=SXgRRH-DyPmyyrqYurSyMiG6WHB8a0Gw81XxroEGD-c,2747
|
|
156
156
|
followthemoney/types/mimetype.py,sha256=NdpqVLx3Bre_myYvnbjmdd5wZBf01tllrbhegjO8_m0,1263
|
|
157
|
-
followthemoney/types/name.py,sha256=
|
|
157
|
+
followthemoney/types/name.py,sha256=ZWGDebv01qByh_yBYOVoS3Edlm3_JVPShQMklKc6ZOA,2384
|
|
158
158
|
followthemoney/types/number.py,sha256=OdVuHDd4IYIIHhx_317JKeMjBAGtsJ2TAcxoZKZ4MkY,3948
|
|
159
159
|
followthemoney/types/phone.py,sha256=r8uRqWinS0CYnYBTs405k5gO4jeatUDgjdzzijoMKJE,3811
|
|
160
160
|
followthemoney/types/string.py,sha256=fqyTauAm4mNnNaoH-yH087RBbNh-G5ZZUO3awTGQUUg,1230
|
|
161
161
|
followthemoney/types/topic.py,sha256=CS5IoI8gm4MSVxfV6K4mGd20_tT1SaKMkcOt_ObSsAg,3678
|
|
162
162
|
followthemoney/types/url.py,sha256=QFpS_JIV8unFHuh_uGv22SWUUkocBoOpzLsAJWom_gI,1455
|
|
163
|
-
followthemoney-4.1.
|
|
164
|
-
followthemoney-4.1.
|
|
165
|
-
followthemoney-4.1.
|
|
166
|
-
followthemoney-4.1.
|
|
167
|
-
followthemoney-4.1.
|
|
163
|
+
followthemoney-4.1.2.dist-info/METADATA,sha256=Hy34w4IjkrPyJ2O0nPRhUcamZ3oiN7h_kbGYdyIUnnI,6748
|
|
164
|
+
followthemoney-4.1.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
165
|
+
followthemoney-4.1.2.dist-info/entry_points.txt,sha256=caoFTlf213jhg5sz3TNSofutjUTzaKtWATuSIdd9Cps,653
|
|
166
|
+
followthemoney-4.1.2.dist-info/licenses/LICENSE,sha256=H6_EVXisnJC0-18CjXIaqrBSFq_VH3OnS7u3dccOv6g,1148
|
|
167
|
+
followthemoney-4.1.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|