followthemoney 4.3.0__py3-none-any.whl → 4.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. followthemoney/__init__.py +1 -1
  2. followthemoney/compare.py +6 -0
  3. followthemoney/dataset/dataset.py +18 -0
  4. followthemoney/entity.py +29 -15
  5. followthemoney/mapping/csv.py +3 -1
  6. followthemoney/model.py +6 -5
  7. followthemoney/property.py +23 -4
  8. followthemoney/proxy.py +32 -11
  9. followthemoney/schema/Company.yaml +5 -0
  10. followthemoney/schema/CryptoWallet.yaml +4 -0
  11. followthemoney/schema/Image.yaml +7 -0
  12. followthemoney/schema/LegalEntity.yaml +10 -0
  13. followthemoney/schema/Organization.yaml +5 -0
  14. followthemoney/schema/Person.yaml +4 -0
  15. followthemoney/schema/PublicBody.yaml +4 -0
  16. followthemoney/schema/Thing.yaml +3 -2
  17. followthemoney/schema.py +16 -2
  18. followthemoney/settings.py +19 -0
  19. followthemoney/statement/entity.py +31 -7
  20. followthemoney/statement/serialize.py +18 -13
  21. followthemoney/statement/statement.py +151 -42
  22. followthemoney/statement/util.py +23 -2
  23. followthemoney/types/address.py +3 -3
  24. followthemoney/types/checksum.py +3 -3
  25. followthemoney/types/country.py +19 -4
  26. followthemoney/types/date.py +13 -3
  27. followthemoney/types/entity.py +3 -3
  28. followthemoney/types/gender.py +6 -6
  29. followthemoney/types/identifier.py +8 -8
  30. followthemoney/types/ip.py +3 -3
  31. followthemoney/types/json.py +2 -2
  32. followthemoney/types/language.py +3 -3
  33. followthemoney/types/mimetype.py +3 -3
  34. followthemoney/types/name.py +3 -3
  35. followthemoney/types/number.py +2 -2
  36. followthemoney/types/phone.py +3 -3
  37. followthemoney/types/string.py +2 -2
  38. followthemoney/types/topic.py +6 -3
  39. followthemoney/types/url.py +3 -3
  40. followthemoney/util.py +6 -14
  41. {followthemoney-4.3.0.dist-info → followthemoney-4.5.0.dist-info}/METADATA +3 -3
  42. {followthemoney-4.3.0.dist-info → followthemoney-4.5.0.dist-info}/RECORD +45 -44
  43. {followthemoney-4.3.0.dist-info → followthemoney-4.5.0.dist-info}/WHEEL +1 -1
  44. {followthemoney-4.3.0.dist-info → followthemoney-4.5.0.dist-info}/entry_points.txt +0 -0
  45. {followthemoney-4.3.0.dist-info → followthemoney-4.5.0.dist-info}/licenses/LICENSE +0 -0
@@ -5,9 +5,10 @@ import logging
5
5
  from io import TextIOWrapper
6
6
  from pathlib import Path
7
7
  from types import TracebackType
8
- from typing import cast
8
+ from typing import Dict, Tuple, cast
9
9
  from typing import BinaryIO, Generator, Iterable, List, Optional, TextIO, Type
10
10
  from rigour.boolean import text_bool
11
+ from rigour.env import ENCODING
11
12
 
12
13
  from followthemoney.statement.statement import Statement, StatementDict
13
14
  from followthemoney.statement.util import unpack_prop
@@ -60,7 +61,7 @@ def read_json_statements(
60
61
 
61
62
 
62
63
  def read_csv_statements(fh: BinaryIO) -> Generator[Statement, None, None]:
63
- wrapped = TextIOWrapper(fh, encoding="utf-8")
64
+ wrapped = TextIOWrapper(fh, encoding=ENCODING)
64
65
  for row in csv.DictReader(wrapped, dialect=csv.unix_dialect):
65
66
  data = cast(StatementDict, row)
66
67
  data["external"] = text_bool(row.get("external")) or False
@@ -72,7 +73,7 @@ def read_csv_statements(fh: BinaryIO) -> Generator[Statement, None, None]:
72
73
 
73
74
 
74
75
  def read_pack_statements(fh: BinaryIO) -> Generator[Statement, None, None]:
75
- wrapped = TextIOWrapper(fh, encoding="utf-8")
76
+ wrapped = TextIOWrapper(fh, encoding=ENCODING)
76
77
  yield from read_pack_statements_decoded(wrapped)
77
78
 
78
79
 
@@ -129,10 +130,10 @@ def read_path_statements(path: Path, format: str) -> Generator[Statement, None,
129
130
 
130
131
  def get_statement_writer(fh: BinaryIO, format: str) -> "StatementWriter":
131
132
  if format == CSV:
132
- wrapped = TextIOWrapper(fh, encoding="utf-8")
133
+ wrapped = TextIOWrapper(fh, encoding=ENCODING)
133
134
  return CSVStatementWriter(wrapped)
134
135
  elif format == PACK:
135
- wrapped = TextIOWrapper(fh, encoding="utf-8")
136
+ wrapped = TextIOWrapper(fh, encoding=ENCODING)
136
137
  return PackStatementWriter(wrapped)
137
138
  elif format == JSON:
138
139
  return JSONStatementWriter(fh)
@@ -222,12 +223,14 @@ class PackStatementWriter(StatementWriter):
222
223
  "id",
223
224
  ]
224
225
  self.writer.writerow(columns)
225
- self._batch: List[List[Optional[str]]] = []
226
+ self._batch: Dict[str, Tuple[Optional[str], ...]] = {}
226
227
 
227
228
  def write(self, stmt: Statement) -> None:
228
229
  # HACK: This is very similar to the CSV writer, but at the very inner
229
230
  # loop of the application, so we're duplicating code here.
230
- row = [
231
+ if stmt.id is None:
232
+ raise RuntimeError("Cannot write pack statement without ID")
233
+ row = (
231
234
  stmt.entity_id,
232
235
  f"{stmt.schema}:{stmt.prop}",
233
236
  stmt.value,
@@ -239,13 +242,15 @@ class PackStatementWriter(StatementWriter):
239
242
  stmt.first_seen,
240
243
  stmt.last_seen,
241
244
  stmt.id,
242
- ]
243
- self._batch.append(row)
245
+ )
246
+ self._batch[stmt.id] = row
244
247
  if len(self._batch) >= CSV_BATCH:
245
- self.writer.writerows(self._batch)
246
- self._batch.clear()
248
+ self.flush()
249
+
250
+ def flush(self) -> None:
251
+ self.writer.writerows(self._batch.values())
252
+ self._batch.clear()
247
253
 
248
254
  def close(self) -> None:
249
- if len(self._batch) > 0:
250
- self.writer.writerows(self._batch)
255
+ self.flush()
251
256
  self.fh.close()
@@ -1,14 +1,22 @@
1
1
  import hashlib
2
2
  import warnings
3
3
  from sqlalchemy.engine import Row
4
- from typing import cast
5
- from typing import Any, Dict, Generator, Optional
4
+ from typing import Union, cast
5
+ from typing import Any, Dict, Generator, Optional, TypeGuard
6
6
  from typing_extensions import TypedDict, Self
7
7
  from rigour.time import datetime_iso, iso_datetime
8
8
  from rigour.boolean import bool_text
9
9
 
10
10
  from followthemoney.proxy import EntityProxy
11
- from followthemoney.statement.util import get_prop_type, BASE_ID
11
+ from followthemoney.statement.util import get_prop_type, BASE_ID, NON_LANG_TYPE_NAMES
12
+ from followthemoney.util import HASH_ENCODING
13
+
14
+
15
+ UNSET = object()
16
+
17
+
18
+ def is_not_unset(value: str | None | object) -> TypeGuard[str | None]:
19
+ return value is not UNSET
12
20
 
13
21
 
14
22
  class StatementDict(TypedDict):
@@ -42,15 +50,16 @@ class Statement(object):
42
50
 
43
51
  __slots__ = [
44
52
  "id",
45
- "entity_id",
53
+ "_entity_id",
46
54
  "canonical_id",
47
- "prop",
48
- "schema",
49
- "value",
50
- "dataset",
51
- "lang",
55
+ "_prop",
56
+ "_schema",
57
+ "_value",
58
+ "_dataset",
59
+ "_lang",
60
+ "prop_type",
52
61
  "original_value",
53
- "external",
62
+ "_external",
54
63
  "first_seen",
55
64
  "last_seen",
56
65
  "origin",
@@ -72,55 +81,95 @@ class Statement(object):
72
81
  last_seen: Optional[str] = None,
73
82
  origin: Optional[str] = None,
74
83
  ):
75
- self.entity_id = entity_id
84
+ self._entity_id = entity_id
76
85
  self.canonical_id = canonical_id or entity_id
77
- self.prop = prop
78
- self.schema = schema
79
- self.value = value
80
- self.dataset = dataset
81
- self.lang = lang
86
+ self._prop = prop
87
+ self._schema = schema
88
+ self.prop_type = get_prop_type(schema, prop)
89
+ self._value = value
90
+ self._dataset = dataset
91
+
92
+ # Remove lang for non-linguistic property types. The goal here is to avoid
93
+ # duplicate statements because of language tags, but the language metadata
94
+ # may be relevant as context for how the original_value was parsed so it's
95
+ # a bit of information loss.
96
+ if lang is not None:
97
+ if self.prop_type in NON_LANG_TYPE_NAMES:
98
+ lang = None
99
+ self._lang = lang
100
+
82
101
  self.original_value = original_value
83
102
  self.first_seen = first_seen
84
103
  self.last_seen = last_seen or first_seen
85
- self.external = external
104
+ self._external = external
86
105
  self.origin = origin
87
106
  if id is None:
88
107
  id = self.generate_key()
89
108
  self.id = id
90
109
 
91
110
  @property
92
- def prop_type(self) -> str:
93
- """The type of the property, e.g. 'string', 'number', 'url'."""
94
- return get_prop_type(self.schema, self.prop)
111
+ def entity_id(self) -> str:
112
+ """The (original) ID of the entity this statement is about."""
113
+ return self._entity_id
114
+
115
+ @property
116
+ def dataset(self) -> str:
117
+ """The dataset this statement was observed in."""
118
+ return self._dataset
119
+
120
+ @property
121
+ def prop(self) -> str:
122
+ """The property name this statement is about."""
123
+ return self._prop
124
+
125
+ @property
126
+ def schema(self) -> str:
127
+ """The schema of the entity this statement is about."""
128
+ return self._schema
129
+
130
+ @property
131
+ def value(self) -> str:
132
+ """The value of the property captured by this statement."""
133
+ return self._value
134
+
135
+ @property
136
+ def lang(self) -> Optional[str]:
137
+ """The language of the property value, if applicable."""
138
+ return self._lang
139
+
140
+ @property
141
+ def external(self) -> bool:
142
+ """Whether this statement was observed in an external dataset."""
143
+ return self._external
95
144
 
96
145
  def to_dict(self) -> StatementDict:
97
146
  return {
98
147
  "canonical_id": self.canonical_id,
99
- "entity_id": self.entity_id,
100
- "prop": self.prop,
101
- "schema": self.schema,
102
- "value": self.value,
103
- "dataset": self.dataset,
104
- "lang": self.lang,
148
+ "entity_id": self._entity_id,
149
+ "prop": self._prop,
150
+ "schema": self._schema,
151
+ "value": self._value,
152
+ "dataset": self._dataset,
153
+ "lang": self._lang,
105
154
  "original_value": self.original_value,
106
155
  "first_seen": self.first_seen,
107
156
  "last_seen": self.last_seen,
108
- "external": self.external,
157
+ "external": self._external,
109
158
  "origin": self.origin,
110
159
  "id": self.id,
111
160
  }
112
161
 
113
162
  def to_csv_row(self) -> Dict[str, Optional[str]]:
114
163
  data = cast(Dict[str, Optional[str]], self.to_dict())
115
- data["external"] = bool_text(self.external)
116
- data["prop_type"] = get_prop_type(self.schema, self.prop)
164
+ data["external"] = bool_text(self._external)
165
+ data["prop_type"] = self.prop_type
117
166
  return data
118
167
 
119
168
  def to_db_row(self) -> Dict[str, Any]:
120
169
  data = cast(Dict[str, Any], self.to_dict())
121
170
  data["first_seen"] = iso_datetime(self.first_seen)
122
171
  data["last_seen"] = iso_datetime(self.last_seen)
123
- data["prop_type"] = get_prop_type(self.schema, self.prop)
172
+ data["prop_type"] = self.prop_type
124
173
  return data
125
174
 
126
175
  def __hash__(self) -> int:
@@ -132,27 +181,83 @@ class Statement(object):
132
181
  return hash(self.id)
133
182
 
134
183
  def __repr__(self) -> str:
135
- return "<Statement(%r, %r, %r)>" % (self.entity_id, self.prop, self.value)
184
+ return "<Statement(%r, %r, %r)>" % (self._entity_id, self._prop, self._value)
136
185
 
137
186
  def __eq__(self, other: Any) -> bool:
138
187
  return not self.id != other.id
139
188
 
140
189
  def __lt__(self, other: Any) -> bool:
141
- self_key = (self.prop != BASE_ID, self.id or "")
142
- other_key = (other.prop != BASE_ID, other.id or "")
190
+ self_key = (self._prop != BASE_ID, self.id or "")
191
+ other_key = (other._prop != BASE_ID, other.id or "")
143
192
  return self_key < other_key
144
193
 
145
- def clone(self: Self) -> "Statement":
194
+ def clone(
195
+ self: Self,
196
+ *,
197
+ entity_id: Optional[str] = None,
198
+ prop: Optional[str] = None,
199
+ schema: Optional[str] = None,
200
+ value: Optional[str] = None,
201
+ dataset: Optional[str] = None,
202
+ lang: Union[str, None, object] = UNSET,
203
+ original_value: Union[str, None, object] = UNSET,
204
+ first_seen: Union[str, None, object] = UNSET,
205
+ external: Optional[bool] = None,
206
+ canonical_id: Optional[str] = None,
207
+ last_seen: Union[str, None, object] = UNSET,
208
+ origin: Union[str, None, object] = UNSET,
209
+ ) -> "Statement":
146
210
  """Make a deep copy of the given statement."""
147
- return Statement.from_dict(self.to_dict())
211
+ lang = lang if is_not_unset(lang) else self._lang
212
+ ov = original_value if is_not_unset(original_value) else self.original_value
213
+ first_seen = first_seen if is_not_unset(first_seen) else self.first_seen
214
+ last_seen = last_seen if is_not_unset(last_seen) else self.last_seen
215
+ origin = origin if is_not_unset(origin) else self.origin
216
+ if external is None:
217
+ external = self._external
218
+ if canonical_id is None and self._entity_id != self.canonical_id:
219
+ canonical_id = self.canonical_id
220
+
221
+ # Decide if the statement ID can be kept the same:
222
+ stmt_id = self.id
223
+ if entity_id is not None and entity_id != self.entity_id:
224
+ stmt_id = None
225
+ if prop is not None and prop != self._prop:
226
+ stmt_id = None
227
+ if schema is not None and schema != self._schema:
228
+ stmt_id = None
229
+ if value is not None and value != self._value:
230
+ stmt_id = None
231
+ if dataset is not None and dataset != self._dataset:
232
+ stmt_id = None
233
+ if external != self._external:
234
+ stmt_id = None
235
+ if lang != self._lang:
236
+ stmt_id = None
237
+ return Statement(
238
+ id=stmt_id,
239
+ entity_id=entity_id or self._entity_id,
240
+ prop=prop or self._prop,
241
+ schema=schema or self._schema,
242
+ value=value or self._value,
243
+ dataset=dataset or self._dataset,
244
+ lang=lang,
245
+ original_value=ov,
246
+ first_seen=first_seen,
247
+ external=external,
248
+ canonical_id=canonical_id,
249
+ last_seen=last_seen,
250
+ origin=origin,
251
+ )
148
252
 
149
253
  def generate_key(self) -> Optional[str]:
150
254
  return self.make_key(
151
- self.dataset,
152
- self.entity_id,
153
- self.prop,
154
- self.value,
155
- self.external,
255
+ self._dataset,
256
+ self._entity_id,
257
+ self._prop,
258
+ self._value,
259
+ self._external,
260
+ lang=self._lang,
156
261
  )
157
262
 
158
263
  @classmethod
@@ -163,17 +268,21 @@ class Statement(object):
163
268
  prop: str,
164
269
  value: str,
165
270
  external: Optional[bool],
271
+ lang: Optional[str] = None,
166
272
  ) -> Optional[str]:
167
273
  """Hash the key properties of a statement record to make a unique ID."""
168
274
  if prop is None or value is None:
169
275
  return None
170
- key = f"{dataset}.{entity_id}.{prop}.{value}"
276
+ if lang is None:
277
+ key = f"{dataset}.{entity_id}.{prop}.{value}"
278
+ else:
279
+ key = f"{dataset}.{entity_id}.{prop}.{value}@{lang}"
171
280
  if external:
172
281
  # We consider the external flag in key composition to avoid race conditions
173
282
  # where a certain entity might be emitted as external while it is already
174
283
  # linked in to the graph via another route.
175
284
  key = f"{key}.ext"
176
- return hashlib.sha1(key.encode("utf-8")).hexdigest()
285
+ return hashlib.sha1(key.encode(HASH_ENCODING)).hexdigest()
177
286
 
178
287
  @classmethod
179
288
  def from_dict(cls, data: StatementDict) -> "Statement":
@@ -1,11 +1,32 @@
1
- import sys
2
1
  from functools import cache
3
2
  from typing import Tuple
4
3
 
5
4
  from followthemoney.model import Model
5
+ from followthemoney.types import registry
6
+ from followthemoney.util import const
6
7
 
7
8
  BASE_ID = "id"
8
9
 
10
+ # Some property types should not set the `lang` attribute on statements.
11
+ # These are typically non-linguistic types, although there's an argument
12
+ # that language metadata could be useful for dates and countries, where
13
+ # text parsing is likely to have taken place.
14
+ NON_LANG_TYPE_NAMES = {
15
+ registry.entity.name,
16
+ registry.date.name,
17
+ registry.checksum.name,
18
+ registry.email.name,
19
+ registry.phone.name,
20
+ registry.gender.name,
21
+ registry.mimetype.name,
22
+ registry.topic.name,
23
+ registry.url.name,
24
+ registry.country.name,
25
+ registry.language.name,
26
+ registry.ip.name,
27
+ BASE_ID,
28
+ }
29
+
9
30
 
10
31
  def pack_prop(schema: str, prop: str) -> str:
11
32
  return f"{schema}:{prop}"
@@ -28,4 +49,4 @@ def get_prop_type(schema: str, prop: str) -> str:
28
49
  def unpack_prop(id: str) -> Tuple[str, str, str]:
29
50
  schema, prop = id.split(":", 1)
30
51
  prop_type = get_prop_type(schema, prop)
31
- return sys.intern(schema), prop_type, sys.intern(prop)
52
+ return const(schema), prop_type, const(prop)
@@ -6,7 +6,7 @@ from rigour.text.distance import levenshtein_similarity
6
6
 
7
7
  from followthemoney.types.common import PropertyType
8
8
  from followthemoney.util import defer as _
9
- from followthemoney.util import dampen, const
9
+ from followthemoney.util import dampen
10
10
 
11
11
  if TYPE_CHECKING:
12
12
  from followthemoney.proxy import EntityProxy
@@ -20,8 +20,8 @@ class AddressType(PropertyType):
20
20
 
21
21
  LINE_BREAKS = re.compile(r"(\r\n|\n|<BR/>|<BR>|\t|ESQ\.,|ESQ,|;)")
22
22
  COMMATA = re.compile(r"(,\s?[,\.])")
23
- name = const("address")
24
- group = const("addresses")
23
+ name = "address"
24
+ group = "addresses"
25
25
  label = _("Address")
26
26
  plural = _("Addresses")
27
27
  matchable = True
@@ -1,5 +1,5 @@
1
1
  from followthemoney.types.common import PropertyType
2
- from followthemoney.util import const, defer as _
2
+ from followthemoney.util import defer as _
3
3
 
4
4
 
5
5
  class ChecksumType(PropertyType):
@@ -12,8 +12,8 @@ class ChecksumType(PropertyType):
12
12
  of this type are scrubbed when submitted via the normal API. Checksums can only
13
13
  be defined by uploading a document to be ingested."""
14
14
 
15
- name = const("checksum")
16
- group = const("checksums")
15
+ name = "checksum"
16
+ group = "checksums"
17
17
  label = _("Checksum")
18
18
  plural = _("Checksums")
19
19
  matchable = True
@@ -1,9 +1,10 @@
1
- from typing import Optional, TYPE_CHECKING
1
+ from typing import Callable, Optional, TYPE_CHECKING, Sequence
2
2
  from babel.core import Locale
3
3
  from rigour.territories import get_ftm_countries, lookup_territory
4
+ from rigour.territories import territories_intersect
4
5
 
5
6
  from followthemoney.types.common import EnumType, EnumValues
6
- from followthemoney.util import const, defer as _
7
+ from followthemoney.util import defer as _
7
8
 
8
9
  if TYPE_CHECKING:
9
10
  from followthemoney.proxy import EntityProxy
@@ -15,8 +16,8 @@ class CountryType(EnumType):
15
16
  a number of unusual and controversial designations (e.g. the Soviet Union,
16
17
  Transnistria, Somaliland, Kosovo)."""
17
18
 
18
- name = const("country")
19
- group = const("countries")
19
+ name = "country"
20
+ group = "countries"
20
21
  label = _("Country")
21
22
  plural = _("Countries")
22
23
  matchable = True
@@ -25,6 +26,20 @@ class CountryType(EnumType):
25
26
  def _locale_names(self, locale: Locale) -> EnumValues:
26
27
  return {t.code: t.name for t in get_ftm_countries()}
27
28
 
29
+ def compare(self, left: str, right: str) -> float:
30
+ overlap = territories_intersect([left], [right])
31
+ return 1.0 if len(overlap) else 0.0
32
+
33
+ def compare_sets(
34
+ self,
35
+ left: Sequence[str],
36
+ right: Sequence[str],
37
+ func: Callable[[Sequence[float]], float] = max,
38
+ ) -> float:
39
+ """Compare two sets of values and select the highest-scored result."""
40
+ overlap = territories_intersect(left, right)
41
+ return 1.0 if len(overlap) else 0.0
42
+
28
43
  def clean_text(
29
44
  self,
30
45
  text: str,
@@ -5,7 +5,7 @@ from prefixdate import parse, parse_format, Precision
5
5
 
6
6
  from followthemoney.types.common import PropertyType
7
7
  from followthemoney.util import defer as _
8
- from followthemoney.util import dampen, const
8
+ from followthemoney.util import dampen
9
9
 
10
10
  if TYPE_CHECKING:
11
11
  from followthemoney.proxy import EntityProxy
@@ -20,13 +20,23 @@ class DateType(PropertyType):
20
20
  The timezone is always expected to be UTC and cannot be specified otherwise. There is
21
21
  no support for calendar weeks (`2021-W7`) and date ranges (`2021-2024`)."""
22
22
 
23
- name = const("date")
24
- group = const("dates")
23
+ name = "date"
24
+ group = "dates"
25
25
  label = _("Date")
26
26
  plural = _("Dates")
27
27
  matchable = True
28
28
  max_length = 32
29
29
 
30
+ HISTORIC = "1001-01-01"
31
+ """A sentinel date value representing a very old date, used to indicate historic (and often imprecise) dates
32
+ that can be assumed to be long in the past."""
33
+
34
+ RELEVANCE_MIN = "1900-01-01"
35
+ """A cutoff date value representing the minimum relevant date for modern fincrime applications."""
36
+
37
+ RELEVANCE_MAX = "2100-12-31"
38
+ """A cutoff date value representing the maximum relevant date for modern fincrime applications."""
39
+
30
40
  def validate(
31
41
  self, value: str, fuzzy: bool = False, format: Optional[str] = None
32
42
  ) -> bool:
@@ -4,7 +4,7 @@ from typing import Any, Optional, TYPE_CHECKING
4
4
  from followthemoney.types.common import PropertyType
5
5
  from followthemoney.value import Value
6
6
  from followthemoney.util import ENTITY_ID_LEN, get_entity_id, sanitize_text
7
- from followthemoney.util import const, gettext, defer as _
7
+ from followthemoney.util import gettext, defer as _
8
8
  from followthemoney.exc import InvalidData
9
9
 
10
10
  if TYPE_CHECKING:
@@ -22,8 +22,8 @@ class EntityType(PropertyType):
22
22
 
23
23
  REGEX_RAW = r"^[0-9a-zA-Z]([0-9a-zA-Z\.\-]*[0-9a-zA-Z])?$"
24
24
  REGEX = re.compile(REGEX_RAW)
25
- name = const("entity")
26
- group = const("entities")
25
+ name = "entity"
26
+ group = "entities"
27
27
  label = _("Entity")
28
28
  plural = _("Entities")
29
29
  matchable = True
@@ -2,7 +2,7 @@ from typing import Optional, TYPE_CHECKING
2
2
  from babel.core import Locale
3
3
 
4
4
  from followthemoney.types.common import EnumType, EnumValues
5
- from followthemoney.util import const, gettext, defer as _
5
+ from followthemoney.util import gettext, defer as _
6
6
 
7
7
  if TYPE_CHECKING:
8
8
  from followthemoney.proxy import EntityProxy
@@ -14,9 +14,9 @@ class GenderType(EnumType):
14
14
  government databases and represent it in a way that can be used by
15
15
  structured tools. I'm not sure this justifies the simplification."""
16
16
 
17
- MALE = const("male")
18
- FEMALE = const("female")
19
- OTHER = const("other")
17
+ MALE = "male"
18
+ FEMALE = "female"
19
+ OTHER = "other"
20
20
 
21
21
  LOOKUP = {
22
22
  "m": MALE,
@@ -34,8 +34,8 @@ class GenderType(EnumType):
34
34
  "divers": OTHER,
35
35
  }
36
36
 
37
- name = const("gender")
38
- group = const("genders")
37
+ name = "gender"
38
+ group = "genders"
39
39
  label = _("Gender")
40
40
  plural = _("Genders")
41
41
  matchable = False
@@ -1,10 +1,10 @@
1
1
  import re
2
2
  from typing import Optional, TYPE_CHECKING
3
- from rigour.ids import get_identifier_format_names, get_identifier_format
3
+ from rigour.ids import get_identifier_format
4
4
 
5
5
  from followthemoney.types.common import PropertyType
6
6
  from followthemoney.util import dampen, shortest, longest
7
- from followthemoney.util import const, defer as _
7
+ from followthemoney.util import defer as _
8
8
 
9
9
  if TYPE_CHECKING:
10
10
  from followthemoney.proxy import EntityProxy
@@ -20,8 +20,8 @@ class IdentifierType(PropertyType):
20
20
  Four- or five-digit industry classifiers create more noise than value."""
21
21
 
22
22
  COMPARE_CLEAN = re.compile(r"[\W_]+")
23
- name = const("identifier")
24
- group = const("identifiers")
23
+ name = "identifier"
24
+ group = "identifiers"
25
25
  label = _("Identifier")
26
26
  plural = _("Identifiers")
27
27
  matchable = True
@@ -35,8 +35,8 @@ class IdentifierType(PropertyType):
35
35
  format: Optional[str] = None,
36
36
  proxy: Optional["EntityProxy"] = None,
37
37
  ) -> Optional[str]:
38
- if format in get_identifier_format_names():
39
- format_ = get_identifier_format(format)
38
+ format_ = get_identifier_format(format)
39
+ if format_ is not None:
40
40
  return format_.normalize(text)
41
41
  return text
42
42
 
@@ -61,7 +61,7 @@ class IdentifierType(PropertyType):
61
61
  return f"id:{value}"
62
62
 
63
63
  def caption(self, value: str, format: Optional[str] = None) -> str:
64
- if format in get_identifier_format_names():
65
- format_ = get_identifier_format(format)
64
+ format_ = get_identifier_format(format)
65
+ if format_ is not None:
66
66
  return format_.format(value)
67
67
  return value
@@ -2,7 +2,7 @@ from typing import Optional, TYPE_CHECKING
2
2
  from ipaddress import ip_address
3
3
 
4
4
  from followthemoney.types.common import PropertyType
5
- from followthemoney.util import const, defer as _
5
+ from followthemoney.util import defer as _
6
6
 
7
7
  if TYPE_CHECKING:
8
8
  from followthemoney.proxy import EntityProxy
@@ -13,8 +13,8 @@ class IpType(PropertyType):
13
13
  by the protocol versions 4 (e.g. `192.168.1.143`) and 6
14
14
  (e.g. `0:0:0:0:0:ffff:c0a8:18f`)."""
15
15
 
16
- name = const("ip")
17
- group = const("ips")
16
+ name = "ip"
17
+ group = "ips"
18
18
  label = _("IP Address")
19
19
  plural = _("IP Addresses")
20
20
  matchable = True
@@ -3,7 +3,7 @@ from typing import Any, Optional, Sequence, TYPE_CHECKING
3
3
  from banal import ensure_list
4
4
 
5
5
  from followthemoney.types.common import PropertyType
6
- from followthemoney.util import const, sanitize_text, defer as _
6
+ from followthemoney.util import sanitize_text, defer as _
7
7
 
8
8
  if TYPE_CHECKING:
9
9
  from followthemoney.proxy import EntityProxy
@@ -14,7 +14,7 @@ class JsonType(PropertyType):
14
14
  and some other edge cases. It's a really bad idea and we should try to get rid
15
15
  of JSON properties."""
16
16
 
17
- name = const("json")
17
+ name = "json"
18
18
  group = None
19
19
  label = _("Nested data")
20
20
  plural = _("Nested data")