followthemoney 4.3.4__py3-none-any.whl → 4.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,7 +9,7 @@ from followthemoney.statement import Statement, StatementEntity, SE
9
9
  from followthemoney.dataset import Dataset, DefaultDataset, DS
10
10
  from followthemoney.util import set_model_locale
11
11
 
12
- __version__ = "4.3.4"
12
+ __version__ = "4.5.0"
13
13
 
14
14
  # Data model singleton
15
15
  model = Model.instance()
followthemoney/compare.py CHANGED
@@ -71,31 +71,18 @@ def _compare(scores: Scores, weights: Weights, n_std: int = 1) -> float:
71
71
  return 1.0 / (1.0 + math.exp(-prob))
72
72
 
73
73
 
74
- def entity_is_same(left: EntityProxy, right: EntityProxy) -> bool:
75
- """Check if two entities are the same apart from their ID."""
76
- if left.schema != right.schema:
77
- return False
78
-
79
- props = set(left.properties.keys()).union(right.properties.keys())
80
- if 0 == len(props):
81
- return False
82
-
83
- for prop in props:
84
- left_vals = sorted(left.get(prop))
85
- right_vals = sorted(right.get(prop))
86
- if left_vals != right_vals:
87
- return False
88
- return True
89
-
90
-
91
74
  def compare(
92
75
  left: EntityProxy,
93
76
  right: EntityProxy,
94
77
  weights: Weights = COMPARE_WEIGHTS,
95
78
  ) -> float:
96
79
  """Compare two entities and return a match score."""
97
- if entity_is_same(left, right):
98
- return 1.0
80
+ if left.checksum == right.checksum:
81
+ # Check if there is any data at all (ie any basis for making a decision),
82
+ # if so, return a perfect match. This avoids marking two empty entities
83
+ # as matching. Bit ambiguous, but practical.
84
+ if len(left.properties) > 0 and len(right.properties) > 0:
85
+ return 1.0
99
86
  scores = compare_scores(left, right)
100
87
  return _compare(scores, weights)
101
88
 
@@ -1,3 +1,4 @@
1
+ from pathlib import Path
1
2
  import yaml
2
3
  import logging
3
4
  from functools import cached_property
@@ -38,6 +39,8 @@ class DatasetModel(BaseModel):
38
39
  coverage: DataCoverage | None = None
39
40
  resources: List[DataResource] = []
40
41
  children: Set[str] = set()
42
+ deprecation: Optional[str] = None
43
+ deprecated: bool = False
41
44
 
42
45
  @field_validator("name", mode="after")
43
46
  @classmethod
@@ -57,6 +60,18 @@ class DatasetModel(BaseModel):
57
60
  data["children"] = children
58
61
  return data
59
62
 
63
+ @model_validator(mode="after")
64
+ def evaluate_data(self) -> "DatasetModel":
65
+ # derive deprecated from deprecation notice:
66
+ if self.deprecation is not None:
67
+ self.deprecation = self.deprecation.strip()
68
+ if not len(self.deprecation):
69
+ self.deprecation = None
70
+ self.deprecated = self.deprecation is not None or self.deprecated
71
+ if self.deprecated and (self.coverage is None or self.coverage.end is None):
72
+ raise ValueError("Deprecated dataset coverage must have an end date.")
73
+ return self
74
+
60
75
  def get_resource(self, name: str) -> DataResource:
61
76
  for res in self.resources:
62
77
  if res.name == name:
@@ -121,10 +136,13 @@ class Dataset:
121
136
  ) -> DS:
122
137
  from followthemoney.dataset.catalog import DataCatalog
123
138
 
139
+ path = Path(path)
124
140
  with open(path, "r") as fh:
125
141
  data = yaml.safe_load(fh)
126
142
  if catalog is None:
127
143
  catalog = DataCatalog(cls, {})
144
+ if "name" not in data:
145
+ data["name"] = path.stem
128
146
  return catalog.make_dataset(data)
129
147
 
130
148
  @classmethod
followthemoney/entity.py CHANGED
@@ -5,6 +5,7 @@ from rigour.names import pick_name
5
5
  from followthemoney.proxy import EntityProxy
6
6
  from followthemoney.schema import Schema
7
7
  from followthemoney.statement import BASE_ID, Statement
8
+ from followthemoney.util import HASH_ENCODING
8
9
 
9
10
  VE = TypeVar("VE", bound="ValueEntity")
10
11
 
@@ -81,6 +82,19 @@ class ValueEntity(EntityProxy):
81
82
  merged.last_change = max(changed, default=None)
82
83
  return merged
83
84
 
85
+ @property
86
+ def checksum(self) -> str:
87
+ digest = self._checksum_digest()
88
+ for dataset in sorted(self.datasets):
89
+ digest.update(dataset.encode(HASH_ENCODING))
90
+ digest.update(b"\x1e")
91
+ for referent in sorted(self.referents):
92
+ digest.update(referent.encode(HASH_ENCODING))
93
+ digest.update(b"\x1e")
94
+ if self.last_change is not None:
95
+ digest.update(self.last_change.encode(HASH_ENCODING))
96
+ return digest.hexdigest()
97
+
84
98
  def to_dict(self) -> Dict[str, Any]:
85
99
  data = super().to_dict()
86
100
  data["referents"] = list(self.referents)
@@ -9,6 +9,7 @@ from typing import TYPE_CHECKING, cast
9
9
  from typing import Any, Dict, Generator, ItemsView, Iterable, List, Optional, Set, Tuple
10
10
 
11
11
  from followthemoney.mapping.source import Record, Source
12
+ from followthemoney.settings import USER_AGENT
12
13
  from followthemoney.util import sanitize_text
13
14
  from followthemoney.exc import InvalidMapping
14
15
 
@@ -64,7 +65,8 @@ class CSVSource(Source):
64
65
  parsed_url = urlparse(url)
65
66
  log.info("Loading: %s", url)
66
67
  if parsed_url.scheme in ["http", "https"]:
67
- res = requests.get(url, stream=True)
68
+ headers = {"User-Agent": USER_AGENT}
69
+ res = requests.get(url, stream=True, headers=headers)
68
70
  if not res.ok:
69
71
  raise InvalidMapping("Failed to open CSV: %s" % url)
70
72
  # if res.encoding is None:
followthemoney/model.py CHANGED
@@ -3,12 +3,14 @@ import yaml
3
3
  from functools import cache
4
4
  from typing import TYPE_CHECKING, Any
5
5
  from typing import Dict, Generator, Iterator, Optional, Set, TypedDict, Union
6
+ from rigour.env import ENCODING
6
7
 
7
8
  from followthemoney.types import registry
8
9
  from followthemoney.types.common import PropertyType, PropertyTypeToDict
9
10
  from followthemoney.schema import Schema, SchemaToDict
10
11
  from followthemoney.property import Property
11
12
  from followthemoney.exc import InvalidModel, InvalidData
13
+ from followthemoney.settings import MODEL_PATH
12
14
  from followthemoney.util import const
13
15
 
14
16
  if TYPE_CHECKING:
@@ -47,10 +49,7 @@ class Model(object):
47
49
  @classmethod
48
50
  def instance(cls) -> "Model":
49
51
  if cls._instance is None:
50
- model_path = os.path.dirname(__file__)
51
- model_path = os.path.join(model_path, "schema")
52
- model_path = os.environ.get("FTM_MODEL_PATH", model_path)
53
- cls._instance = cls(model_path)
52
+ cls._instance = cls(MODEL_PATH)
54
53
  return cls._instance
55
54
 
56
55
  def generate(self) -> None:
@@ -68,7 +67,7 @@ class Model(object):
68
67
  schema.properties[prop.name] = prop
69
68
 
70
69
  def _load(self, filepath: str) -> None:
71
- with open(filepath, "r", encoding="utf-8") as fh:
70
+ with open(filepath, "r", encoding=ENCODING) as fh:
72
71
  data = yaml.safe_load(fh)
73
72
  if not isinstance(data, dict):
74
73
  raise InvalidModel("Model file is not a mapping: %s" % filepath)
followthemoney/proxy.py CHANGED
@@ -1,3 +1,4 @@
1
+ import hashlib
1
2
  import logging
2
3
  from typing import TYPE_CHECKING, cast, Any
3
4
  from typing import Dict, Generator, List, Optional, Set, Tuple, Union, Type, TypeVar
@@ -10,13 +11,14 @@ from followthemoney.types import registry
10
11
  from followthemoney.types.common import PropertyType
11
12
  from followthemoney.property import Property
12
13
  from followthemoney.value import string_list, Values
13
- from followthemoney.util import sanitize_text, gettext
14
+ from followthemoney.util import HASH_ENCODING, sanitize_text, gettext
14
15
  from followthemoney.util import merge_context, make_entity_id
15
16
  from followthemoney.model import Model
16
17
  from followthemoney.schema import Schema
17
18
 
18
19
  if TYPE_CHECKING:
19
20
  from followthemoney.model import Model
21
+ from hashlib import _Hash
20
22
 
21
23
  log = logging.getLogger(__name__)
22
24
  P = Union[Property, str]
@@ -437,6 +439,28 @@ class EntityProxy(object):
437
439
  self.add(prop, values, cleaned=True, quiet=True)
438
440
  return self
439
441
 
442
+ def _checksum_digest(self) -> "_Hash":
443
+ """Create a SHA1 digest of the entity's ID, schema and properties for
444
+ change detection. This is returned as a hashlib digest object so that
445
+ it can be subclassed."""
446
+ digest = hashlib.sha1()
447
+ if self.id is not None:
448
+ digest.update(self.id.encode(HASH_ENCODING))
449
+ digest.update(self.schema.name.encode(HASH_ENCODING))
450
+ for prop in sorted(self._properties.keys()):
451
+ digest.update(prop.encode(HASH_ENCODING))
452
+ for value in sorted(self._properties[prop]):
453
+ digest.update(value.encode(HASH_ENCODING))
454
+ digest.update(b"\x1e")
455
+ digest.update(b"\x1f")
456
+ return digest
457
+
458
+ @property
459
+ def checksum(self) -> str:
460
+ """A SHA1 checksum hexdigest representing the current state of the
461
+ entity proxy. This can be used for change detection."""
462
+ return self._checksum_digest().hexdigest()
463
+
440
464
  def __getstate__(self) -> Dict[str, Any]:
441
465
  data = {slot: getattr(self, slot) for slot in self.__slots__}
442
466
  data["schema"] = self.schema.name
@@ -460,13 +484,13 @@ class EntityProxy(object):
460
484
 
461
485
  def __hash__(self) -> int:
462
486
  if self.id is None:
463
- raise RuntimeError("Cannot hash entity without an ID")
487
+ raise RuntimeError("Unhashable entity proxy without ID.")
464
488
  return hash(self.id)
465
489
 
466
490
  def __eq__(self, other: Any) -> bool:
467
491
  try:
468
492
  if self.id is None or other.id is None:
469
- raise RuntimeError("Cannot compare entities without IDs.")
493
+ raise RuntimeError("Cannot compare entity proxies without IDs.")
470
494
  return bool(self.id == other.id)
471
495
  except AttributeError:
472
496
  return False
@@ -19,6 +19,7 @@ Company:
19
19
  caption:
20
20
  - name
21
21
  - alias
22
+ - abbreviation
22
23
  - weakAlias
23
24
  - previousName
24
25
  - registrationNumber
@@ -26,6 +26,10 @@ CryptoWallet:
26
26
  maxLength: 128
27
27
  privateKey:
28
28
  label: Private key
29
+ accountId:
30
+ label: Account ID
31
+ description: Platform-specific user/account identifier
32
+ type: identifier
29
33
  creationDate:
30
34
  label: Creation date
31
35
  type: date
@@ -1,4 +1,7 @@
1
1
  Image:
2
+ # This schema defines an image file entity within the FollowTheMoney data model.
3
+ # If a `checksum` property is present, consider loading it from an Aleph archive
4
+ # or FtM data lake. Otherwise, use `sourceUrl` to fetch the image directly.
2
5
  extends:
3
6
  - Document
4
7
  label: Image
@@ -23,3 +26,7 @@ Image:
23
26
  label: "Images"
24
27
  type: entity
25
28
  range: Person
29
+ credit:
30
+ label: "Credit"
31
+ description: "The credit or attribution for the image."
32
+ type: string
@@ -18,6 +18,7 @@ LegalEntity:
18
18
  caption:
19
19
  - name
20
20
  - alias
21
+ - abbreviation
21
22
  - weakAlias
22
23
  - previousName
23
24
  - email
@@ -29,6 +30,12 @@ LegalEntity:
29
30
  end:
30
31
  - dissolutionDate
31
32
  properties:
33
+ abbreviation:
34
+ label: Abbreviation
35
+ type: name
36
+ description: "Abbreviated name or acronym"
37
+ # TODO: is un-matchable wise? The idea is to handle it like `weakAlias` rather than `alias`.
38
+ matchable: false
32
39
  email:
33
40
  label: E-Mail
34
41
  type: email
@@ -18,6 +18,7 @@ Organization:
18
18
  caption:
19
19
  - name
20
20
  - alias
21
+ - abbreviation
21
22
  - weakAlias
22
23
  - previousName
23
24
  - registrationNumber
@@ -15,8 +15,9 @@ Person:
15
15
  caption:
16
16
  - name
17
17
  - alias
18
- - weakAlias
19
18
  - previousName
19
+ - weakAlias
20
+ - abbreviation
20
21
  - lastName
21
22
  - email
22
23
  - phone
@@ -14,6 +14,7 @@ PublicBody:
14
14
  caption:
15
15
  - name
16
16
  - alias
17
+ - abbreviation
17
18
  - weakAlias
18
19
  - previousName
19
20
  required:
@@ -0,0 +1,19 @@
1
+ import os
2
+ import requests
3
+ from typing import List
4
+ from rigour.env import env_opt, env_str
5
+
6
+
7
+ def get_env_list(name: str, default: List[str] = []) -> List[str]:
8
+ value = env_opt(name)
9
+ if value is not None:
10
+ values = value.split(":")
11
+ if len(values):
12
+ return values
13
+ return default
14
+
15
+
16
+ MODEL_PATH = os.path.join(os.path.dirname(__file__), "schema")
17
+ MODEL_PATH = env_str("FTM_MODEL_PATH", MODEL_PATH)
18
+
19
+ USER_AGENT = env_str("FTM_USER_AGENT", requests.utils.default_user_agent())
@@ -1,6 +1,6 @@
1
1
  from hashlib import sha1
2
2
  from collections.abc import Mapping
3
- from typing import Any, Dict, List, Optional, Set, Type
3
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set, Type
4
4
  from typing import Generator, Iterable, Tuple, TypeVar
5
5
  from rigour.langs import LangStr
6
6
  from rigour.names.pick import pick_lang_name
@@ -10,7 +10,7 @@ from followthemoney.exc import InvalidData
10
10
  from followthemoney.schema import Schema
11
11
  from followthemoney.types.common import PropertyType
12
12
  from followthemoney.property import Property
13
- from followthemoney.util import gettext
13
+ from followthemoney.util import HASH_ENCODING, gettext
14
14
  from followthemoney.proxy import P
15
15
  from followthemoney.types import registry
16
16
  from followthemoney.value import string_list, Values
@@ -21,6 +21,9 @@ from followthemoney.statement.util import BASE_ID
21
21
 
22
22
  SE = TypeVar("SE", bound="StatementEntity")
23
23
 
24
+ if TYPE_CHECKING:
25
+ from hashlib import _Hash
26
+
24
27
 
25
28
  class StatementEntity(EntityProxy):
26
29
  """An entity object that can link to a set of datasets that it is sourced from."""
@@ -35,7 +38,12 @@ class StatementEntity(EntityProxy):
35
38
  "_statements",
36
39
  )
37
40
 
38
- def __init__(self, dataset: Dataset, data: Dict[str, Any], cleaned: bool = True):
41
+ def __init__(
42
+ self,
43
+ dataset: Dataset,
44
+ data: Dict[str, Any],
45
+ cleaned: bool = True,
46
+ ) -> None:
39
47
  data = dict(data or {})
40
48
  schema = Model.instance().get(data.pop("schema", None))
41
49
  if schema is None:
@@ -76,8 +84,7 @@ class StatementEntity(EntityProxy):
76
84
  for stmts in self._statements.values():
77
85
  for stmt in stmts:
78
86
  if stmt.entity_id is None and self.id is not None:
79
- stmt.entity_id = self.id
80
- stmt.id = stmt.generate_key()
87
+ stmt = stmt.clone(entity_id=self.id)
81
88
  if stmt.id is None:
82
89
  stmt.id = stmt.generate_key()
83
90
  yield stmt
@@ -97,9 +104,9 @@ class StatementEntity(EntityProxy):
97
104
  if stmt.first_seen is not None:
98
105
  first_seen.add(stmt.first_seen)
99
106
  if self.id is not None:
100
- digest = sha1(self.schema.name.encode("utf-8"))
107
+ digest = sha1(self.schema.name.encode(HASH_ENCODING))
101
108
  for id in sorted(ids):
102
- digest.update(id.encode("utf-8"))
109
+ digest.update(id.encode(HASH_ENCODING))
103
110
  checksum = digest.hexdigest()
104
111
  # This is to make the last_change value stable across
105
112
  # serialisation:
@@ -449,6 +456,23 @@ class StatementEntity(EntityProxy):
449
456
  data["statements"] = [stmt.to_dict() for stmt in self.statements]
450
457
  return data
451
458
 
459
+ def _checksum_digest(self) -> "_Hash":
460
+ """Create a SHA1 digest of the entity's ID, schema and properties for
461
+ change detection. This is returned as a hashlib digest object so that
462
+ it can be subclassed."""
463
+ digest = sha1()
464
+ if self.id is not None:
465
+ digest.update(self.id.encode(HASH_ENCODING))
466
+ statement_ids: List[str] = []
467
+ for stmts in self._statements.values():
468
+ for stmt in stmts:
469
+ if stmt.id is not None:
470
+ statement_ids.append(stmt.id)
471
+ for stmt_id in sorted(statement_ids):
472
+ digest.update(stmt_id.encode(HASH_ENCODING))
473
+ digest.update(b"\x1e")
474
+ return digest
475
+
452
476
  def __len__(self) -> int:
453
477
  return len(list(self._iter_stmt())) + 1
454
478
 
@@ -5,9 +5,10 @@ import logging
5
5
  from io import TextIOWrapper
6
6
  from pathlib import Path
7
7
  from types import TracebackType
8
- from typing import cast
8
+ from typing import Dict, Tuple, cast
9
9
  from typing import BinaryIO, Generator, Iterable, List, Optional, TextIO, Type
10
10
  from rigour.boolean import text_bool
11
+ from rigour.env import ENCODING
11
12
 
12
13
  from followthemoney.statement.statement import Statement, StatementDict
13
14
  from followthemoney.statement.util import unpack_prop
@@ -60,7 +61,7 @@ def read_json_statements(
60
61
 
61
62
 
62
63
  def read_csv_statements(fh: BinaryIO) -> Generator[Statement, None, None]:
63
- wrapped = TextIOWrapper(fh, encoding="utf-8")
64
+ wrapped = TextIOWrapper(fh, encoding=ENCODING)
64
65
  for row in csv.DictReader(wrapped, dialect=csv.unix_dialect):
65
66
  data = cast(StatementDict, row)
66
67
  data["external"] = text_bool(row.get("external")) or False
@@ -72,7 +73,7 @@ def read_csv_statements(fh: BinaryIO) -> Generator[Statement, None, None]:
72
73
 
73
74
 
74
75
  def read_pack_statements(fh: BinaryIO) -> Generator[Statement, None, None]:
75
- wrapped = TextIOWrapper(fh, encoding="utf-8")
76
+ wrapped = TextIOWrapper(fh, encoding=ENCODING)
76
77
  yield from read_pack_statements_decoded(wrapped)
77
78
 
78
79
 
@@ -129,10 +130,10 @@ def read_path_statements(path: Path, format: str) -> Generator[Statement, None,
129
130
 
130
131
  def get_statement_writer(fh: BinaryIO, format: str) -> "StatementWriter":
131
132
  if format == CSV:
132
- wrapped = TextIOWrapper(fh, encoding="utf-8")
133
+ wrapped = TextIOWrapper(fh, encoding=ENCODING)
133
134
  return CSVStatementWriter(wrapped)
134
135
  elif format == PACK:
135
- wrapped = TextIOWrapper(fh, encoding="utf-8")
136
+ wrapped = TextIOWrapper(fh, encoding=ENCODING)
136
137
  return PackStatementWriter(wrapped)
137
138
  elif format == JSON:
138
139
  return JSONStatementWriter(fh)
@@ -222,12 +223,14 @@ class PackStatementWriter(StatementWriter):
222
223
  "id",
223
224
  ]
224
225
  self.writer.writerow(columns)
225
- self._batch: List[List[Optional[str]]] = []
226
+ self._batch: Dict[str, Tuple[Optional[str], ...]] = {}
226
227
 
227
228
  def write(self, stmt: Statement) -> None:
228
229
  # HACK: This is very similar to the CSV writer, but at the very inner
229
230
  # loop of the application, so we're duplicating code here.
230
- row = [
231
+ if stmt.id is None:
232
+ raise RuntimeError("Cannot write pack statement without ID")
233
+ row = (
231
234
  stmt.entity_id,
232
235
  f"{stmt.schema}:{stmt.prop}",
233
236
  stmt.value,
@@ -239,13 +242,15 @@ class PackStatementWriter(StatementWriter):
239
242
  stmt.first_seen,
240
243
  stmt.last_seen,
241
244
  stmt.id,
242
- ]
243
- self._batch.append(row)
245
+ )
246
+ self._batch[stmt.id] = row
244
247
  if len(self._batch) >= CSV_BATCH:
245
- self.writer.writerows(self._batch)
246
- self._batch.clear()
248
+ self.flush()
249
+
250
+ def flush(self) -> None:
251
+ self.writer.writerows(self._batch.values())
252
+ self._batch.clear()
247
253
 
248
254
  def close(self) -> None:
249
- if len(self._batch) > 0:
250
- self.writer.writerows(self._batch)
255
+ self.flush()
251
256
  self.fh.close()
@@ -1,14 +1,22 @@
1
1
  import hashlib
2
2
  import warnings
3
3
  from sqlalchemy.engine import Row
4
- from typing import cast
5
- from typing import Any, Dict, Generator, Optional
4
+ from typing import Union, cast
5
+ from typing import Any, Dict, Generator, Optional, TypeGuard
6
6
  from typing_extensions import TypedDict, Self
7
7
  from rigour.time import datetime_iso, iso_datetime
8
8
  from rigour.boolean import bool_text
9
9
 
10
10
  from followthemoney.proxy import EntityProxy
11
- from followthemoney.statement.util import get_prop_type, BASE_ID
11
+ from followthemoney.statement.util import get_prop_type, BASE_ID, NON_LANG_TYPE_NAMES
12
+ from followthemoney.util import HASH_ENCODING
13
+
14
+
15
+ UNSET = object()
16
+
17
+
18
+ def is_not_unset(value: str | None | object) -> TypeGuard[str | None]:
19
+ return value is not UNSET
12
20
 
13
21
 
14
22
  class StatementDict(TypedDict):
@@ -42,15 +50,16 @@ class Statement(object):
42
50
 
43
51
  __slots__ = [
44
52
  "id",
45
- "entity_id",
53
+ "_entity_id",
46
54
  "canonical_id",
47
- "prop",
48
- "schema",
49
- "value",
50
- "dataset",
51
- "lang",
55
+ "_prop",
56
+ "_schema",
57
+ "_value",
58
+ "_dataset",
59
+ "_lang",
60
+ "prop_type",
52
61
  "original_value",
53
- "external",
62
+ "_external",
54
63
  "first_seen",
55
64
  "last_seen",
56
65
  "origin",
@@ -72,55 +81,95 @@ class Statement(object):
72
81
  last_seen: Optional[str] = None,
73
82
  origin: Optional[str] = None,
74
83
  ):
75
- self.entity_id = entity_id
84
+ self._entity_id = entity_id
76
85
  self.canonical_id = canonical_id or entity_id
77
- self.prop = prop
78
- self.schema = schema
79
- self.value = value
80
- self.dataset = dataset
81
- self.lang = lang
86
+ self._prop = prop
87
+ self._schema = schema
88
+ self.prop_type = get_prop_type(schema, prop)
89
+ self._value = value
90
+ self._dataset = dataset
91
+
92
+ # Remove lang for non-linguistic property types. The goal here is to avoid
93
+ # duplicate statements because of language tags, but the language metadata
94
+ # may be relevant as context for how the original_value was parsed so it's
95
+ # a bit of information loss.
96
+ if lang is not None:
97
+ if self.prop_type in NON_LANG_TYPE_NAMES:
98
+ lang = None
99
+ self._lang = lang
100
+
82
101
  self.original_value = original_value
83
102
  self.first_seen = first_seen
84
103
  self.last_seen = last_seen or first_seen
85
- self.external = external
104
+ self._external = external
86
105
  self.origin = origin
87
106
  if id is None:
88
107
  id = self.generate_key()
89
108
  self.id = id
90
109
 
91
110
  @property
92
- def prop_type(self) -> str:
93
- """The type of the property, e.g. 'string', 'number', 'url'."""
94
- return get_prop_type(self.schema, self.prop)
111
+ def entity_id(self) -> str:
112
+ """The (original) ID of the entity this statement is about."""
113
+ return self._entity_id
114
+
115
+ @property
116
+ def dataset(self) -> str:
117
+ """The dataset this statement was observed in."""
118
+ return self._dataset
119
+
120
+ @property
121
+ def prop(self) -> str:
122
+ """The property name this statement is about."""
123
+ return self._prop
124
+
125
+ @property
126
+ def schema(self) -> str:
127
+ """The schema of the entity this statement is about."""
128
+ return self._schema
129
+
130
+ @property
131
+ def value(self) -> str:
132
+ """The value of the property captured by this statement."""
133
+ return self._value
134
+
135
+ @property
136
+ def lang(self) -> Optional[str]:
137
+ """The language of the property value, if applicable."""
138
+ return self._lang
139
+
140
+ @property
141
+ def external(self) -> bool:
142
+ """Whether this statement was observed in an external dataset."""
143
+ return self._external
95
144
 
96
145
  def to_dict(self) -> StatementDict:
97
146
  return {
98
147
  "canonical_id": self.canonical_id,
99
- "entity_id": self.entity_id,
100
- "prop": self.prop,
101
- "schema": self.schema,
102
- "value": self.value,
103
- "dataset": self.dataset,
104
- "lang": self.lang,
148
+ "entity_id": self._entity_id,
149
+ "prop": self._prop,
150
+ "schema": self._schema,
151
+ "value": self._value,
152
+ "dataset": self._dataset,
153
+ "lang": self._lang,
105
154
  "original_value": self.original_value,
106
155
  "first_seen": self.first_seen,
107
156
  "last_seen": self.last_seen,
108
- "external": self.external,
157
+ "external": self._external,
109
158
  "origin": self.origin,
110
159
  "id": self.id,
111
160
  }
112
161
 
113
162
  def to_csv_row(self) -> Dict[str, Optional[str]]:
114
163
  data = cast(Dict[str, Optional[str]], self.to_dict())
115
- data["external"] = bool_text(self.external)
116
- data["prop_type"] = get_prop_type(self.schema, self.prop)
164
+ data["external"] = bool_text(self._external)
165
+ data["prop_type"] = self.prop_type
117
166
  return data
118
167
 
119
168
  def to_db_row(self) -> Dict[str, Any]:
120
169
  data = cast(Dict[str, Any], self.to_dict())
121
170
  data["first_seen"] = iso_datetime(self.first_seen)
122
171
  data["last_seen"] = iso_datetime(self.last_seen)
123
- data["prop_type"] = get_prop_type(self.schema, self.prop)
172
+ data["prop_type"] = self.prop_type
124
173
  return data
125
174
 
126
175
  def __hash__(self) -> int:
@@ -132,27 +181,83 @@ class Statement(object):
132
181
  return hash(self.id)
133
182
 
134
183
  def __repr__(self) -> str:
135
- return "<Statement(%r, %r, %r)>" % (self.entity_id, self.prop, self.value)
184
+ return "<Statement(%r, %r, %r)>" % (self._entity_id, self._prop, self._value)
136
185
 
137
186
  def __eq__(self, other: Any) -> bool:
138
187
  return not self.id != other.id
139
188
 
140
189
  def __lt__(self, other: Any) -> bool:
141
- self_key = (self.prop != BASE_ID, self.id or "")
142
- other_key = (other.prop != BASE_ID, other.id or "")
190
+ self_key = (self._prop != BASE_ID, self.id or "")
191
+ other_key = (other._prop != BASE_ID, other.id or "")
143
192
  return self_key < other_key
144
193
 
145
- def clone(self: Self) -> "Statement":
194
+ def clone(
195
+ self: Self,
196
+ *,
197
+ entity_id: Optional[str] = None,
198
+ prop: Optional[str] = None,
199
+ schema: Optional[str] = None,
200
+ value: Optional[str] = None,
201
+ dataset: Optional[str] = None,
202
+ lang: Union[str, None, object] = UNSET,
203
+ original_value: Union[str, None, object] = UNSET,
204
+ first_seen: Union[str, None, object] = UNSET,
205
+ external: Optional[bool] = None,
206
+ canonical_id: Optional[str] = None,
207
+ last_seen: Union[str, None, object] = UNSET,
208
+ origin: Union[str, None, object] = UNSET,
209
+ ) -> "Statement":
146
210
  """Make a deep copy of the given statement."""
147
- return Statement.from_dict(self.to_dict())
211
+ lang = lang if is_not_unset(lang) else self._lang
212
+ ov = original_value if is_not_unset(original_value) else self.original_value
213
+ first_seen = first_seen if is_not_unset(first_seen) else self.first_seen
214
+ last_seen = last_seen if is_not_unset(last_seen) else self.last_seen
215
+ origin = origin if is_not_unset(origin) else self.origin
216
+ if external is None:
217
+ external = self._external
218
+ if canonical_id is None and self._entity_id != self.canonical_id:
219
+ canonical_id = self.canonical_id
220
+
221
+ # Decide if the statement ID can be kept the same:
222
+ stmt_id = self.id
223
+ if entity_id is not None and entity_id != self.entity_id:
224
+ stmt_id = None
225
+ if prop is not None and prop != self._prop:
226
+ stmt_id = None
227
+ if schema is not None and schema != self._schema:
228
+ stmt_id = None
229
+ if value is not None and value != self._value:
230
+ stmt_id = None
231
+ if dataset is not None and dataset != self._dataset:
232
+ stmt_id = None
233
+ if external != self._external:
234
+ stmt_id = None
235
+ if lang != self._lang:
236
+ stmt_id = None
237
+ return Statement(
238
+ id=stmt_id,
239
+ entity_id=entity_id or self._entity_id,
240
+ prop=prop or self._prop,
241
+ schema=schema or self._schema,
242
+ value=value or self._value,
243
+ dataset=dataset or self._dataset,
244
+ lang=lang,
245
+ original_value=ov,
246
+ first_seen=first_seen,
247
+ external=external,
248
+ canonical_id=canonical_id,
249
+ last_seen=last_seen,
250
+ origin=origin,
251
+ )
148
252
 
149
253
  def generate_key(self) -> Optional[str]:
150
254
  return self.make_key(
151
- self.dataset,
152
- self.entity_id,
153
- self.prop,
154
- self.value,
155
- self.external,
255
+ self._dataset,
256
+ self._entity_id,
257
+ self._prop,
258
+ self._value,
259
+ self._external,
260
+ lang=self._lang,
156
261
  )
157
262
 
158
263
  @classmethod
@@ -163,17 +268,21 @@ class Statement(object):
163
268
  prop: str,
164
269
  value: str,
165
270
  external: Optional[bool],
271
+ lang: Optional[str] = None,
166
272
  ) -> Optional[str]:
167
273
  """Hash the key properties of a statement record to make a unique ID."""
168
274
  if prop is None or value is None:
169
275
  return None
170
- key = f"{dataset}.{entity_id}.{prop}.{value}"
276
+ if lang is None:
277
+ key = f"{dataset}.{entity_id}.{prop}.{value}"
278
+ else:
279
+ key = f"{dataset}.{entity_id}.{prop}.{value}@{lang}"
171
280
  if external:
172
281
  # We consider the external flag in key composition to avoid race conditions
173
282
  # where a certain entity might be emitted as external while it is already
174
283
  # linked in to the graph via another route.
175
284
  key = f"{key}.ext"
176
- return hashlib.sha1(key.encode("utf-8")).hexdigest()
285
+ return hashlib.sha1(key.encode(HASH_ENCODING)).hexdigest()
177
286
 
178
287
  @classmethod
179
288
  def from_dict(cls, data: StatementDict) -> "Statement":
@@ -2,10 +2,31 @@ from functools import cache
2
2
  from typing import Tuple
3
3
 
4
4
  from followthemoney.model import Model
5
+ from followthemoney.types import registry
5
6
  from followthemoney.util import const
6
7
 
7
8
  BASE_ID = "id"
8
9
 
10
+ # Some property types should not set the `lang` attribute on statements.
11
+ # These are typically non-linguistic types, although there's an argument
12
+ # that language metadata could be useful for dates and countries, where
13
+ # text parsing is likely to have taken place.
14
+ NON_LANG_TYPE_NAMES = {
15
+ registry.entity.name,
16
+ registry.date.name,
17
+ registry.checksum.name,
18
+ registry.email.name,
19
+ registry.phone.name,
20
+ registry.gender.name,
21
+ registry.mimetype.name,
22
+ registry.topic.name,
23
+ registry.url.name,
24
+ registry.country.name,
25
+ registry.language.name,
26
+ registry.ip.name,
27
+ BASE_ID,
28
+ }
29
+
9
30
 
10
31
  def pack_prop(schema: str, prop: str) -> str:
11
32
  return f"{schema}:{prop}"
@@ -1,6 +1,7 @@
1
- from typing import Optional, TYPE_CHECKING
1
+ from typing import Callable, Optional, TYPE_CHECKING, Sequence
2
2
  from babel.core import Locale
3
3
  from rigour.territories import get_ftm_countries, lookup_territory
4
+ from rigour.territories import territories_intersect
4
5
 
5
6
  from followthemoney.types.common import EnumType, EnumValues
6
7
  from followthemoney.util import defer as _
@@ -25,6 +26,20 @@ class CountryType(EnumType):
25
26
  def _locale_names(self, locale: Locale) -> EnumValues:
26
27
  return {t.code: t.name for t in get_ftm_countries()}
27
28
 
29
+ def compare(self, left: str, right: str) -> float:
30
+ overlap = territories_intersect([left], [right])
31
+ return 1.0 if len(overlap) else 0.0
32
+
33
+ def compare_sets(
34
+ self,
35
+ left: Sequence[str],
36
+ right: Sequence[str],
37
+ func: Callable[[Sequence[float]], float] = max,
38
+ ) -> float:
39
+ """Compare two sets of values and select the highest-scored result."""
40
+ overlap = territories_intersect(left, right)
41
+ return 1.0 if len(overlap) else 0.0
42
+
28
43
  def clean_text(
29
44
  self,
30
45
  text: str,
@@ -27,6 +27,16 @@ class DateType(PropertyType):
27
27
  matchable = True
28
28
  max_length = 32
29
29
 
30
+ HISTORIC = "1001-01-01"
31
+ """A sentinel date value representing a very old date, used to indicate historic (and often imprecise) dates
32
+ that can be assumed to be long in the past."""
33
+
34
+ RELEVANCE_MIN = "1900-01-01"
35
+ """A cutoff date value representing the minimum relevant date for modern fincrime applications."""
36
+
37
+ RELEVANCE_MAX = "2100-12-31"
38
+ """A cutoff date value representing the maximum relevant date for modern fincrime applications."""
39
+
30
40
  def validate(
31
41
  self, value: str, fuzzy: bool = False, format: Optional[str] = None
32
42
  ) -> bool:
@@ -4,7 +4,7 @@ from rigour.langs import iso_639_alpha3
4
4
 
5
5
  from followthemoney.types.common import EnumType, EnumValues
6
6
  from followthemoney.util import defer as _, gettext
7
- from followthemoney.util import get_env_list
7
+ from followthemoney.settings import get_env_list
8
8
 
9
9
  if TYPE_CHECKING:
10
10
  from followthemoney.proxy import EntityProxy
followthemoney/util.py CHANGED
@@ -10,10 +10,11 @@ from threading import local
10
10
  from typing import cast, Dict, Any, List, Optional, TypeVar, Union
11
11
  from normality import stringify
12
12
  from normality.cleaning import remove_unsafe_chars
13
- from normality.encoding import DEFAULT_ENCODING
13
+ from rigour.env import ENCODING
14
14
  from banal import is_mapping, unique_list, ensure_list
15
15
 
16
16
  MEGABYTE = 1024 * 1024
17
+ HASH_ENCODING = "utf-8"
17
18
  DEFAULT_LOCALE = "en"
18
19
  ENTITY_ID_LEN = 200
19
20
 
@@ -55,16 +56,7 @@ def get_locale() -> Locale:
55
56
  return Locale.parse(state.locale)
56
57
 
57
58
 
58
- def get_env_list(name: str, default: List[str] = []) -> List[str]:
59
- value = stringify(os.environ.get(name))
60
- if value is not None:
61
- values = value.split(":")
62
- if len(values):
63
- return values
64
- return default
65
-
66
-
67
- def sanitize_text(value: Any, encoding: str = DEFAULT_ENCODING) -> Optional[str]:
59
+ def sanitize_text(value: Any, encoding: str = ENCODING) -> Optional[str]:
68
60
  text = stringify(value, encoding_default=encoding)
69
61
  if text is None:
70
62
  return None
@@ -74,8 +66,8 @@ def sanitize_text(value: Any, encoding: str = DEFAULT_ENCODING) -> Optional[str]
74
66
  log.warning("Cannot NFC text: %s", ex)
75
67
  return None
76
68
  text = remove_unsafe_chars(text)
77
- byte_text = text.encode(DEFAULT_ENCODING, "replace")
78
- text = byte_text.decode(DEFAULT_ENCODING, "replace")
69
+ byte_text = text.encode("utf-8", "replace")
70
+ text = byte_text.decode("utf-8", "replace")
79
71
  if len(text) == 0:
80
72
  return None
81
73
  return text
@@ -88,7 +80,7 @@ def key_bytes(key: Any) -> bytes:
88
80
  text = stringify(key)
89
81
  if text is None:
90
82
  return b""
91
- return text.encode("utf-8")
83
+ return text.encode(ENCODING)
92
84
 
93
85
 
94
86
  def join_text(*parts: Any, sep: str = " ") -> Optional[str]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: followthemoney
3
- Version: 4.3.4
3
+ Version: 4.5.0
4
4
  Summary: A data model for anti corruption data modeling and analysis.
5
5
  Project-URL: Documentation, https://followthemoney.tech/
6
6
  Project-URL: Repository, https://github.com/opensanctions/followthemoney.git
@@ -48,9 +48,9 @@ Requires-Dist: prefixdate<1.0.0,>=0.5.0
48
48
  Requires-Dist: pydantic<3.0.0,>=2.11.0
49
49
  Requires-Dist: pytz>=2021.1
50
50
  Requires-Dist: pyyaml<7.0.0,>=5.0.0
51
- Requires-Dist: rdflib<7.5.0,>=6.2.0
51
+ Requires-Dist: rdflib<7.6.0,>=6.2.0
52
52
  Requires-Dist: requests<3.0.0,>=2.21.0
53
- Requires-Dist: rigour<2.0.0,>=1.4.0
53
+ Requires-Dist: rigour<2.0.0,>=1.6.0
54
54
  Requires-Dist: sqlalchemy[mypy]<3.0.0,>=2.0.0
55
55
  Provides-Extra: dev
56
56
  Requires-Dist: build; extra == 'dev'
@@ -1,19 +1,20 @@
1
- followthemoney/__init__.py,sha256=UHPYwFuppho0TsOPG7vZw6KSsIDHX_Ar7gO1Vw7dLig,856
2
- followthemoney/compare.py,sha256=frgumsDv4Ru9UkNof62jDjKCxxpCgV1Rusfu8s20uGA,6327
3
- followthemoney/entity.py,sha256=YB6u7BMeQX5toAe7DndZBiPtzy0BQ5CKp3ix6kHxk3Y,3499
1
+ followthemoney/__init__.py,sha256=3Mvq7FWl0cNbTovkJhys0iHU24UdFT44P2AqszoGs-8,856
2
+ followthemoney/compare.py,sha256=6y6fqtbbfW7ee4_EVXPcKCIr75GjPqXvtHfvExJ-KK0,6119
3
+ followthemoney/entity.py,sha256=biAjuuHlwsVT02imAsaWP0YtgdfU8skCntzBU3mgJpg,4052
4
4
  followthemoney/exc.py,sha256=GyMgwY4QVm87hLevDfV7gM1MJsDqfNCi_UQw7F_A8X8,858
5
5
  followthemoney/graph.py,sha256=7X1CGHGvmktS2LSZqld2iXWzG7B831eCNYyBqamqEJ8,10921
6
6
  followthemoney/helpers.py,sha256=KCdv1XAE7KQEXBiXp52Kvuck7wMaeNVBM3uaFemcvb4,7873
7
7
  followthemoney/messages.py,sha256=zUEa9CFecU8nRafIzhN6TKCh1kEihiIyIS1qr8PxY4g,806
8
- followthemoney/model.py,sha256=chAUGob5tXWS0o8f0X6mSFCCnI2HoHE5pXU9O5ukrpc,7447
8
+ followthemoney/model.py,sha256=kwv3GT9vGqEMF-m4z94OZJ0phFzcv-K0Eme1ElOMEmU,7346
9
9
  followthemoney/names.py,sha256=LODQqExKEHdH4z6Mmbhlm0KeKRzGcptaSWzYXZ7lONI,1120
10
10
  followthemoney/namespace.py,sha256=utggu9IGA8bhgEYom3OUB1KxkAJR_TrMNbY5MUF_db8,4536
11
11
  followthemoney/ontology.py,sha256=WWY_PYQGl5Ket4zZBuZglzQxD2Bh9UqHok6GJNNX7GA,3001
12
12
  followthemoney/property.py,sha256=1w7p9aKLxRqFRnl3PlssqmvulSErl_0D5T2SefT3UFU,8675
13
- followthemoney/proxy.py,sha256=xZUsT4W9sLojaSD8j6P2JQyQuOQKBWDVovm5epxvtI0,19674
13
+ followthemoney/proxy.py,sha256=ZD8jK88oj1aUTpF7s6r91g8tmTiEfubUKX7CGV5I9rE,20714
14
14
  followthemoney/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  followthemoney/schema.py,sha256=dwZg0eZF7yaxP9fJ5NQUKWadWZYTo9U-sVzzXZn_6do,18500
16
- followthemoney/util.py,sha256=LoCSp1iE6VwXjotCkBXFRppeQs55726GzOuNIu3CvRE,4409
16
+ followthemoney/settings.py,sha256=wkwrkDPypsAICLo-d7l-zpKrKe_fTVYPllZO-RX3oGM,507
17
+ followthemoney/util.py,sha256=JCZ35h_qrqxWO1FTxpa0eoOiko6DWTD9u1QKmH5ZaLU,4150
17
18
  followthemoney/value.py,sha256=BJ4Sj5Tg2kMrslR6FjQUr96d8Kt75U7ny9NgzVGT0ZE,2335
18
19
  followthemoney/cli/__init__.py,sha256=0mmz84uhXRp2qUn3syKnDXofU3MMAAe291s7htqX0Bg,187
19
20
  followthemoney/cli/aggregate.py,sha256=xQTFpU3cVVj7fplpX4OJVrRlTVpn6b9kBr_Vb87pKfg,2164
@@ -26,7 +27,7 @@ followthemoney/cli/util.py,sha256=C3nGMVY3-9JHSFLn3AGvTNcAdvGcgfFS-7jXIzKg6Ik,47
26
27
  followthemoney/dataset/__init__.py,sha256=rOKsI39dccDaYcSa7ASoNKkhmbFYUArxMCRqtrxy2iE,477
27
28
  followthemoney/dataset/catalog.py,sha256=bIpxr0jvJeutNSmCaXREQac7TyvZak2Y_QoCFdCM0d4,3001
28
29
  followthemoney/dataset/coverage.py,sha256=rBnKs7VngCtIuaDqrF5D0ygCHg8NAMkYbmtl7336PSI,724
29
- followthemoney/dataset/dataset.py,sha256=wWUzWsdzDW9qXLy8lS6Bpy08WMcaNU30oiMXU8jfo14,4724
30
+ followthemoney/dataset/dataset.py,sha256=7lP3gz94AAaxX6J7OWlNmvPoWkoKvH7ISwoNqYsC4Go,5495
30
31
  followthemoney/dataset/publisher.py,sha256=nexZe9XexV8WI5Id999vf5OH_DPUmiKQ_GT3c59eF44,893
31
32
  followthemoney/dataset/resource.py,sha256=S_-tNjMwHQ8LcSOsZO_xhXD-vLK90wyxtIRBbyCJ0Xo,1164
32
33
  followthemoney/dataset/util.py,sha256=mfVTXdbNnWly6cXo4SjNzHuJK1c1uNBwULYOVg1gK5I,1617
@@ -38,7 +39,7 @@ followthemoney/export/graph.py,sha256=v0z1FgadyFk5aQ0A5q8E9R4fSO-Tpi5JU9YTDwnRKD
38
39
  followthemoney/export/neo4j.py,sha256=4Lih9lt3-5ATERhyMcfJfkiETG3tqj9vY4N9s7jiYmw,7049
39
40
  followthemoney/export/rdf.py,sha256=BOd4AIAVobwpmJ5GjyIqn9ZQHUwKQ-3fMdnD-Lcid0s,2978
40
41
  followthemoney/mapping/__init__.py,sha256=iwNqzzvrzJNbNDlOCaDLlBTUrNTlnYHIB5cvo_-9oN4,82
41
- followthemoney/mapping/csv.py,sha256=1eqQk1tn5JSEcr4rrv44XdT5biUk7J0E275uvUNoOrA,3125
42
+ followthemoney/mapping/csv.py,sha256=bRaqFsr02DNBUZBj-GIBzJOaumnIRu8QbNlYWOMqjHU,3238
42
43
  followthemoney/mapping/entity.py,sha256=-x_VBHiVthIrZZ-PVKD3oBAq6LYcsyeYW-9TFv80k7M,5905
43
44
  followthemoney/mapping/property.py,sha256=41V16HJh6da7oKdSJWyRcyMkx2XFd6iDm9-4PH7Wihw,5036
44
45
  followthemoney/mapping/query.py,sha256=8M6bOlEX2p_bbVwEwTu_1slEtU0cfRJB7ajZp-F07CE,2622
@@ -54,12 +55,12 @@ followthemoney/schema/Audio.yaml,sha256=Eb1rZGUEOX7XDAj_1YIN28NCBzMvkopQBNwgHt_k
54
55
  followthemoney/schema/BankAccount.yaml,sha256=60v-VD296lW1Qq7fx--CzxfPNwfCcyMV6xIl8OrSy5g,1431
55
56
  followthemoney/schema/Call.yaml,sha256=kbVCnVxucBrEplxehXHThLSJAJjy_GhWan-IeZZjr0M,980
56
57
  followthemoney/schema/CallForTenders.yaml,sha256=2IWonTzfSbrkynMoEWqv5fekUeFM_xDKpKIbRe1XDbo,3227
57
- followthemoney/schema/Company.yaml,sha256=1P_JA2LIIQtzP_Y8FYM-a47VNSWneX8QjdmGD1DFpWQ,3527
58
+ followthemoney/schema/Company.yaml,sha256=R2reqGWtCtURvIAvVB8lRQqGiGPXqjSNdkSWrXLY26w,3546
58
59
  followthemoney/schema/Contract.yaml,sha256=aSPB64T1h-0nuLDv6krasUvvoPZgo6sWUbv60c3vmzI,1541
59
60
  followthemoney/schema/ContractAward.yaml,sha256=b2spaZHYCaP1yR1RCsrI7mUjk-fAF7BUE3dc8Vl3cUQ,1689
60
61
  followthemoney/schema/CourtCase.yaml,sha256=lcovnY0Ne_xcggvkqfCW_RHvsRKo8kFTCPCyovAXRtI,599
61
62
  followthemoney/schema/CourtCaseParty.yaml,sha256=MpodN2251V_MYD2dBOHZ_qD7Uv6cLg8Gd_b-I8HZjPI,588
62
- followthemoney/schema/CryptoWallet.yaml,sha256=dhUpPf1eONsyzyORXoo_zFXgn0BuSLB4OKj2sHZ5wzA,1069
63
+ followthemoney/schema/CryptoWallet.yaml,sha256=haae5h-b8qNBh9FQB2clBUqNYpdLoyyI_NEWMxe3tZ8,1192
63
64
  followthemoney/schema/Debt.yaml,sha256=gSGl1xKPaPWAYYEcX7MxezVn3Gu-CYBIzxGzMd9UTm4,657
64
65
  followthemoney/schema/Directorship.yaml,sha256=BMx2AQTLy5ta_lWPnYKj7LFjZWTwtu1hgWncISdKf28,773
65
66
  followthemoney/schema/Document.yaml,sha256=JxoYl_2o-ebVXh5MzRIKEHfV3q_E--wXGO2HG7zBaZg,2646
@@ -72,29 +73,29 @@ followthemoney/schema/Family.yaml,sha256=cgZ7ZM_2p_dzUGrLVj8QL86gpeDM-AoK8AbyHjh
72
73
  followthemoney/schema/Folder.yaml,sha256=W12iJHTYua7bF7oTpgiDbarEkfa0Q2gSmfLdl537HQY,180
73
74
  followthemoney/schema/HyperText.yaml,sha256=Wg5dWeLrVjbXiI-ao69tosJ7rI0DvNU8cCo8AhUXwh4,319
74
75
  followthemoney/schema/Identification.yaml,sha256=6txjZs6-3Kn94c3G4tDeDt9Jb4FW55-xjSnYVrvmiEA,853
75
- followthemoney/schema/Image.yaml,sha256=wuznboWECGiV96_GQiXq1-oKNoxO8zKisR4xyusnEn8,394
76
+ followthemoney/schema/Image.yaml,sha256=5fjTy46LqzoybJlPs79RAaBuI-9_i6y77419KGiGOTM,753
76
77
  followthemoney/schema/Interest.yaml,sha256=VUrehmsN1WgtS1oAa5jn_JGtSkZGGYLGNahp-R5JhOQ,282
77
78
  followthemoney/schema/Interval.yaml,sha256=8YJQ51GI-GxvbjYs3uC593kQtCepWW_7ZiNnlbPm2aM,2084
78
- followthemoney/schema/LegalEntity.yaml,sha256=Yy28a9NYl6mgnNvNvApO2MAnfkkaZRfzl-GtRn56CFI,5101
79
+ followthemoney/schema/LegalEntity.yaml,sha256=cG-o0slUnQNoJsrgQuk_CaFA_bschjalHHPl-zoKRPA,5352
79
80
  followthemoney/schema/License.yaml,sha256=bXESXY-JpSmc5sthZe4sssXhx50UoLPAMED9FvEUyRU,534
80
81
  followthemoney/schema/Membership.yaml,sha256=IPmaOX4Ai2r4sGcA5ig2WmLvWHb38akdxp4smEdDWOE,710
81
82
  followthemoney/schema/Mention.yaml,sha256=nBeulR_Jm4x75aJ7yNF0TAVhHJqXQaEzOutLIn_YU-4,1086
82
83
  followthemoney/schema/Message.yaml,sha256=PAxZ2NRFVvnOlp9Ohh5fJDEThjJ0jm3M2YCbJ9KtMuE,1565
83
84
  followthemoney/schema/Note.yaml,sha256=NohwtFupxIssZuEgQowiQWqKit4uQ-OatAu3yp9eJj4,411
84
85
  followthemoney/schema/Occupancy.yaml,sha256=WojlqzuWao84MJxRE9K6a-1D-Jtu78-0h6laODhdKw8,975
85
- followthemoney/schema/Organization.yaml,sha256=F_01GsdSsS2cUjJ732R6UfsiodylMITq23EiXdEuoDo,1136
86
+ followthemoney/schema/Organization.yaml,sha256=Dl6cFDBVFaQfiZrqnaeZAVNvGUiAtj7F4v_ZLxVtPCM,1155
86
87
  followthemoney/schema/Ownership.yaml,sha256=tLWESE9VX0aUuhe6C1pToq2-auPVZBdE3xvBmTRfmPc,1057
87
88
  followthemoney/schema/Package.yaml,sha256=gPr-P3lcg7OOAav_KVa8baK4yK57JwfcXwxXheD96UQ,310
88
89
  followthemoney/schema/Page.yaml,sha256=YjYqaH2sOry0z4xh44CsX_eyuRClD6ZS0d2o2uQXFbo,1062
89
90
  followthemoney/schema/Pages.yaml,sha256=KKPGZ06Ehp5mWIGnYfHUBN9jT03bk8nakw0pB5bA_7E,450
90
91
  followthemoney/schema/Passport.yaml,sha256=rpuLC86sdXnHF-prFQM4mAqYzlSGWKvPE4Cphtn2KRw,805
91
92
  followthemoney/schema/Payment.yaml,sha256=WRBJuj9ljsxLBs-0g9Z9UD87uR1RTtuUiYnWOnKr1qA,1757
92
- followthemoney/schema/Person.yaml,sha256=485xdaX3YYfJNtMkKuwRS78dD2l2bMYjGCmo-NqTvOY,2845
93
+ followthemoney/schema/Person.yaml,sha256=eFvXYhXbJ9RlhCeX0a0Fv6p_67JD9lMdBAJImZiSzMU,2864
93
94
  followthemoney/schema/PlainText.yaml,sha256=hfnVi-HmQeDbqDquSpkPJax9hNm86ioXGr4hzNzyPFE,278
94
95
  followthemoney/schema/Position.yaml,sha256=ZpxjWOLxwva_on32r9WD5ys0Ty3YxCju41mg9HG-pe0,1308
95
96
  followthemoney/schema/Project.yaml,sha256=2svtyGJopS0UrqPiuYGpBzj30V7k3LRDX4N1U56y4yY,462
96
97
  followthemoney/schema/ProjectParticipant.yaml,sha256=xNehEu90uqUfboNouezhZQ8ZQLxzWq1yyNO4kua-Lyc,727
97
- followthemoney/schema/PublicBody.yaml,sha256=uw4Ok36E8J5d7tyijHG9Sf_iNFOpUhGpo6UyKgTQp3Q,348
98
+ followthemoney/schema/PublicBody.yaml,sha256=N_3-dFNsilHxgIjkq60049PKX46wP0PvP7y9uDpnlwo,367
98
99
  followthemoney/schema/RealEstate.yaml,sha256=NWFHXqEHskYQN-kvQESZpu74nztShqoYSZEjZAr-DHM,1363
99
100
  followthemoney/schema/Representation.yaml,sha256=sCvFnUDQaElq2cqSB0rILcMYb2gaMZqlzxlHxyX9IGg,792
100
101
  followthemoney/schema/Risk.yaml,sha256=2BRVBqb6wiLHxb_V50P-YMAOhjC64UVHDyh5PASpCIA,728
@@ -114,10 +115,10 @@ followthemoney/schema/Vessel.yaml,sha256=zWHUfSK8g6Pz58ZyCaK0AFJ4u_UHjEIUGC4c_7o
114
115
  followthemoney/schema/Video.yaml,sha256=LY3DYMWTHXiAhL0hxBCNCz50cp2sPbUlEhhig5Fbjos,327
115
116
  followthemoney/schema/Workbook.yaml,sha256=iikWPElz4klA7SkWH7eae6xqhbkMCIP_3zdeXzFEMU0,354
116
117
  followthemoney/statement/__init__.py,sha256=7m2VUCAuqNZXIY0WFJRFkw5UG14QuxATL4f_xbqKwhw,633
117
- followthemoney/statement/entity.py,sha256=oeudwhqfYLJKqbzxEydasMHqevkDASNyYN6s0yddW6I,18755
118
- followthemoney/statement/serialize.py,sha256=9eXzQ1biR2mSxWRID5C7xDdku4b4ZImHeRJ53yLZ0yo,7225
119
- followthemoney/statement/statement.py,sha256=Ae-EYuzS8S12BkaRqrvMuI1C7YwlRKa5C_pTBELyNMM,8029
120
- followthemoney/statement/util.py,sha256=QMYSwAcnh2fCM1LtH_-v8Z5GdwOZfUTT1UkQ_ZMQ470,797
118
+ followthemoney/statement/entity.py,sha256=vznbPTMSWcezYt--bu2RretMresgwp-bUqCsv4w-U90,19568
119
+ followthemoney/statement/serialize.py,sha256=PcG2Qf1jYcF_rF1YybYWBhX7NiX6WZ94u1WUv9Mh-Lw,7386
120
+ followthemoney/statement/statement.py,sha256=bAwrrKyYRdJVxRGuXDQIOA7cdEngk1NKUaij_gAwSd4,11876
121
+ followthemoney/statement/util.py,sha256=jHBwK3FIBynUJZRlBBOHayalAFrqpXf2f2JwkHi0zAU,1450
121
122
  followthemoney/translations/messages.pot,sha256=JhtY9NJ9wP_EAX4APxOqMyvKcX53oIC9kAxBsliJkf4,107703
122
123
  followthemoney/translations/ar/LC_MESSAGES/followthemoney.mo,sha256=uhb2crSNh8K2ts_QUeD2wvgWgzzpLJWRzXok-Uyx3Zk,38795
123
124
  followthemoney/translations/ar/LC_MESSAGES/followthemoney.po,sha256=DuIfvR5v0sPGwFbeg3y6_jCbeglvHWXQ2LDH6prfwLc,121326
@@ -145,15 +146,15 @@ followthemoney/types/__init__.py,sha256=rWwQeiuMh2BNIuvhpMfJ4bPADDvt9Axu1eedvNFi
145
146
  followthemoney/types/address.py,sha256=Gc-hqz00dRRkeANqkyPD2wtt7ksR9wMf4CX-U-5XvMo,2214
146
147
  followthemoney/types/checksum.py,sha256=_0ev2Wwtd4iX_bLz0Lu-xcJIxNfH_V9kBKKtuZhoAwg,802
147
148
  followthemoney/types/common.py,sha256=4ks7zPT8rknrGSd4JFc1zRkS-TL4SX-25_ZbjcVDos0,10081
148
- followthemoney/types/country.py,sha256=X3Z1j6rIiCITpLtpFXwjTIh9uJwI99_gmPMJx8Jsq2w,1512
149
- followthemoney/types/date.py,sha256=O3Xav9QNBqjy7LuUWiZrUdGrOvwwOdk6ea5qQEStIwQ,3084
149
+ followthemoney/types/country.py,sha256=pwDiI_ipts9Oi2U7fHALYMJPCJHOqyI_2Liq7XI2XrA,2086
150
+ followthemoney/types/date.py,sha256=SGk8q8qICIrA5Lf6wPbDU6v6oJYEiu1dZFZ18BO7i80,3551
150
151
  followthemoney/types/email.py,sha256=L3RTYrMABlNQF7hCynXGfzoj6YNEHW5JAY_BwuhoZdA,3375
151
152
  followthemoney/types/entity.py,sha256=56h6x8Ct7hWZIC3BjZHmRKGy9Ff2vuULNWH3xDRsKiU,2317
152
153
  followthemoney/types/gender.py,sha256=XY9us98Sk25O1xnHN-88tbv9pHy6Mn7SR8GRYi6v5gI,1683
153
154
  followthemoney/types/identifier.py,sha256=TYJwE7urjHFxEcDuiZMxGoCN6n34rAIdCt5_96Y7vI0,2198
154
155
  followthemoney/types/ip.py,sha256=rCXkRrh_jDeWAhswCgSe6Z4uhIW7yvLAxIEw4x1SM3A,1279
155
156
  followthemoney/types/json.py,sha256=Hefwns1-ziJf310MWvdfX5ICkOgj9cnnMJuqq1e6qKY,1676
156
- followthemoney/types/language.py,sha256=JDFCO9g9lvgKihhYTz6e7TbJd3V9RTGJlS8kDn6aSCY,2726
157
+ followthemoney/types/language.py,sha256=ymEXaHAPIZGlGySUqzadB7tMB6mW1ASsl1G6EtqKdls,2730
157
158
  followthemoney/types/mimetype.py,sha256=oqVP8EfGckPAI3WAziHomp6oUN7KXdIPWzGZPsRtIA8,1242
158
159
  followthemoney/types/name.py,sha256=zd0aC4VGp1SYUI8Rj0-ZXlrpUI7ZcnJIljZqsEsV-CY,2363
159
160
  followthemoney/types/number.py,sha256=vpAyhmc7UQlIm8h7Z5k8k4cTk37ykRF-AgYA1r_g1QQ,3934
@@ -161,8 +162,8 @@ followthemoney/types/phone.py,sha256=_HanfxxTV7jp75gZO2evBc9HWwQTxEMQRaoVDcoXDIQ
161
162
  followthemoney/types/string.py,sha256=SEh3xqQCnm377PGvwfR6ao85pHJCNeCUWBKnvccrJ7I,1216
162
163
  followthemoney/types/topic.py,sha256=9FIH_WmwVOFg1CJRBF4KeE6vNTn-QQkzsKU5XaMqNJ0,4604
163
164
  followthemoney/types/url.py,sha256=sSHKtzvm4kc-VTvNCPIDykOG1hUoawhORj6Bklo0a2A,1434
164
- followthemoney-4.3.4.dist-info/METADATA,sha256=H3K0seI3SN6axQwxgjGK0zErE7ySmoYtOxDom-waNDU,6747
165
- followthemoney-4.3.4.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
166
- followthemoney-4.3.4.dist-info/entry_points.txt,sha256=caoFTlf213jhg5sz3TNSofutjUTzaKtWATuSIdd9Cps,653
167
- followthemoney-4.3.4.dist-info/licenses/LICENSE,sha256=H6_EVXisnJC0-18CjXIaqrBSFq_VH3OnS7u3dccOv6g,1148
168
- followthemoney-4.3.4.dist-info/RECORD,,
165
+ followthemoney-4.5.0.dist-info/METADATA,sha256=dteJlaqiJHM7Du9BTLMiOgl-DEjNp2ewkkFU8nMcCYA,6747
166
+ followthemoney-4.5.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
167
+ followthemoney-4.5.0.dist-info/entry_points.txt,sha256=caoFTlf213jhg5sz3TNSofutjUTzaKtWATuSIdd9Cps,653
168
+ followthemoney-4.5.0.dist-info/licenses/LICENSE,sha256=H6_EVXisnJC0-18CjXIaqrBSFq_VH3OnS7u3dccOv6g,1148
169
+ followthemoney-4.5.0.dist-info/RECORD,,