followthemoney 3.8.5__py3-none-any.whl → 4.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. followthemoney/__init__.py +30 -10
  2. followthemoney/cli/cli.py +1 -1
  3. followthemoney/cli/exports.py +6 -2
  4. followthemoney/cli/statement.py +62 -0
  5. followthemoney/cli/util.py +2 -3
  6. followthemoney/compare.py +26 -16
  7. followthemoney/dataset/__init__.py +17 -0
  8. followthemoney/dataset/catalog.py +77 -0
  9. followthemoney/dataset/coverage.py +29 -0
  10. followthemoney/dataset/dataset.py +146 -0
  11. followthemoney/dataset/publisher.py +25 -0
  12. followthemoney/dataset/resource.py +30 -0
  13. followthemoney/dataset/util.py +55 -0
  14. followthemoney/entity.py +73 -0
  15. followthemoney/exc.py +6 -0
  16. followthemoney/export/rdf.py +57 -5
  17. followthemoney/graph.py +1 -2
  18. followthemoney/model.py +38 -11
  19. followthemoney/names.py +33 -0
  20. followthemoney/ontology.py +18 -16
  21. followthemoney/property.py +12 -15
  22. followthemoney/proxy.py +43 -64
  23. followthemoney/schema/Analyzable.yaml +2 -3
  24. followthemoney/schema/BankAccount.yaml +2 -3
  25. followthemoney/schema/Company.yaml +0 -6
  26. followthemoney/schema/Contract.yaml +0 -1
  27. followthemoney/schema/CryptoWallet.yaml +1 -1
  28. followthemoney/schema/Document.yaml +0 -6
  29. followthemoney/schema/Interval.yaml +7 -0
  30. followthemoney/schema/LegalEntity.yaml +6 -0
  31. followthemoney/schema/License.yaml +2 -0
  32. followthemoney/schema/Page.yaml +0 -1
  33. followthemoney/schema/Person.yaml +0 -5
  34. followthemoney/schema/Sanction.yaml +1 -0
  35. followthemoney/schema/Thing.yaml +0 -2
  36. followthemoney/schema/UserAccount.yaml +6 -3
  37. followthemoney/schema.py +30 -42
  38. followthemoney/statement/__init__.py +19 -0
  39. followthemoney/statement/entity.py +438 -0
  40. followthemoney/statement/serialize.py +251 -0
  41. followthemoney/statement/statement.py +256 -0
  42. followthemoney/statement/util.py +31 -0
  43. followthemoney/types/__init__.py +66 -23
  44. followthemoney/types/address.py +3 -3
  45. followthemoney/types/checksum.py +3 -7
  46. followthemoney/types/common.py +9 -14
  47. followthemoney/types/country.py +3 -7
  48. followthemoney/types/date.py +21 -11
  49. followthemoney/types/email.py +0 -4
  50. followthemoney/types/entity.py +5 -11
  51. followthemoney/types/gender.py +6 -10
  52. followthemoney/types/identifier.py +9 -3
  53. followthemoney/types/ip.py +5 -9
  54. followthemoney/types/json.py +2 -2
  55. followthemoney/types/language.py +3 -7
  56. followthemoney/types/mimetype.py +4 -8
  57. followthemoney/types/name.py +7 -8
  58. followthemoney/types/number.py +88 -6
  59. followthemoney/types/phone.py +4 -11
  60. followthemoney/types/string.py +4 -4
  61. followthemoney/types/topic.py +3 -7
  62. followthemoney/types/url.py +5 -10
  63. followthemoney/util.py +12 -13
  64. followthemoney/value.py +67 -0
  65. {followthemoney-3.8.5.dist-info → followthemoney-4.0.1.dist-info}/METADATA +23 -8
  66. {followthemoney-3.8.5.dist-info → followthemoney-4.0.1.dist-info}/RECORD +69 -59
  67. {followthemoney-3.8.5.dist-info → followthemoney-4.0.1.dist-info}/entry_points.txt +1 -0
  68. followthemoney/offshore.py +0 -48
  69. followthemoney/rdf.py +0 -9
  70. followthemoney/schema/Assessment.yaml +0 -32
  71. followthemoney/schema/Post.yaml +0 -42
  72. followthemoney/types/iban.py +0 -58
  73. followthemoney/types/registry.py +0 -52
  74. {followthemoney-3.8.5.dist-info → followthemoney-4.0.1.dist-info}/WHEEL +0 -0
  75. {followthemoney-3.8.5.dist-info → followthemoney-4.0.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,256 @@
1
+ import hashlib
2
+ import warnings
3
+ from sqlalchemy.engine import Row
4
+ from typing import cast
5
+ from typing import Any, Dict, Generator, Optional
6
+ from typing_extensions import TypedDict, Self
7
+ from rigour.time import datetime_iso, iso_datetime
8
+ from rigour.boolean import bool_text
9
+
10
+ from followthemoney.proxy import EntityProxy
11
+ from followthemoney.statement.util import get_prop_type, BASE_ID
12
+
13
+
14
+ class StatementDict(TypedDict):
15
+ id: Optional[str]
16
+ entity_id: str
17
+ canonical_id: str
18
+ prop: str
19
+ schema: str
20
+ value: str
21
+ dataset: str
22
+ lang: Optional[str]
23
+ original_value: Optional[str]
24
+ external: bool
25
+ first_seen: Optional[str]
26
+ last_seen: Optional[str]
27
+ origin: Optional[str]
28
+
29
+
30
+ class Statement(object):
31
+ """A single statement about a property relevant to an entity.
32
+
33
+ For example, this could be used to say: "In dataset A, entity X has the
34
+ property `name` set to 'John Smith'. I first observed this at K, and last
35
+ saw it at L."
36
+
37
+ Null property values are not supported. This might need to change if we
38
+ want to support making property-less entities.
39
+ """
40
+
41
+ BASE = BASE_ID
42
+
43
+ __slots__ = [
44
+ "id",
45
+ "entity_id",
46
+ "canonical_id",
47
+ "prop",
48
+ "schema",
49
+ "value",
50
+ "dataset",
51
+ "lang",
52
+ "original_value",
53
+ "external",
54
+ "first_seen",
55
+ "last_seen",
56
+ "origin",
57
+ ]
58
+
59
+ def __init__(
60
+ self,
61
+ entity_id: str,
62
+ prop: str,
63
+ schema: str,
64
+ value: str,
65
+ dataset: str,
66
+ lang: Optional[str] = None,
67
+ original_value: Optional[str] = None,
68
+ first_seen: Optional[str] = None,
69
+ external: bool = False,
70
+ id: Optional[str] = None,
71
+ canonical_id: Optional[str] = None,
72
+ last_seen: Optional[str] = None,
73
+ origin: Optional[str] = None,
74
+ ):
75
+ self.entity_id = entity_id
76
+ self.canonical_id = canonical_id or entity_id
77
+ self.prop = prop
78
+ self.schema = schema
79
+ self.value = value
80
+ self.dataset = dataset
81
+ self.lang = lang
82
+ self.original_value = original_value
83
+ self.first_seen = first_seen
84
+ self.last_seen = last_seen or first_seen
85
+ self.external = external
86
+ self.origin = origin
87
+ if id is None:
88
+ id = self.generate_key()
89
+ self.id = id
90
+
91
+ @property
92
+ def prop_type(self) -> str:
93
+ """The type of the property, e.g. 'string', 'number', 'url'."""
94
+ return get_prop_type(self.schema, self.prop)
95
+
96
+ def to_dict(self) -> StatementDict:
97
+ return {
98
+ "canonical_id": self.canonical_id,
99
+ "entity_id": self.entity_id,
100
+ "prop": self.prop,
101
+ "schema": self.schema,
102
+ "value": self.value,
103
+ "dataset": self.dataset,
104
+ "lang": self.lang,
105
+ "original_value": self.original_value,
106
+ "first_seen": self.first_seen,
107
+ "last_seen": self.last_seen,
108
+ "external": self.external,
109
+ "origin": self.origin,
110
+ "id": self.id,
111
+ }
112
+
113
+ def to_csv_row(self) -> Dict[str, Optional[str]]:
114
+ data = cast(Dict[str, Optional[str]], self.to_dict())
115
+ data["external"] = bool_text(self.external)
116
+ data["prop_type"] = get_prop_type(self.schema, self.prop)
117
+ return data
118
+
119
+ def to_db_row(self) -> Dict[str, Any]:
120
+ data = cast(Dict[str, Any], self.to_dict())
121
+ data["first_seen"] = iso_datetime(self.first_seen)
122
+ data["last_seen"] = iso_datetime(self.last_seen)
123
+ data["prop_type"] = get_prop_type(self.schema, self.prop)
124
+ return data
125
+
126
+ def __hash__(self) -> int:
127
+ if self.id is None:
128
+ warnings.warn(
129
+ "Hashing a statement without an ID results in undefined behaviour",
130
+ RuntimeWarning,
131
+ )
132
+ return hash(self.id)
133
+
134
+ def __repr__(self) -> str:
135
+ return "<Statement(%r, %r, %r)>" % (self.entity_id, self.prop, self.value)
136
+
137
+ def __eq__(self, other: Any) -> bool:
138
+ return not self.id != other.id
139
+
140
+ def __lt__(self, other: Any) -> bool:
141
+ self_key = (self.prop != BASE_ID, self.id or "")
142
+ other_key = (other.prop != BASE_ID, other.id or "")
143
+ return self_key < other_key
144
+
145
+ def clone(self: Self) -> "Statement":
146
+ """Make a deep copy of the given statement."""
147
+ return Statement.from_dict(self.to_dict())
148
+
149
+ def generate_key(self) -> Optional[str]:
150
+ return self.make_key(
151
+ self.dataset,
152
+ self.entity_id,
153
+ self.prop,
154
+ self.value,
155
+ self.external,
156
+ )
157
+
158
+ @classmethod
159
+ def make_key(
160
+ cls,
161
+ dataset: str,
162
+ entity_id: str,
163
+ prop: str,
164
+ value: str,
165
+ external: Optional[bool],
166
+ ) -> Optional[str]:
167
+ """Hash the key properties of a statement record to make a unique ID."""
168
+ if prop is None or value is None:
169
+ return None
170
+ key = f"{dataset}.{entity_id}.{prop}.{value}"
171
+ if external:
172
+ # We consider the external flag in key composition to avoid race conditions
173
+ # where a certain entity might be emitted as external while it is already
174
+ # linked in to the graph via another route.
175
+ key = f"{key}.ext"
176
+ return hashlib.sha1(key.encode("utf-8")).hexdigest()
177
+
178
+ @classmethod
179
+ def from_dict(cls, data: StatementDict) -> "Statement":
180
+ return cls(
181
+ entity_id=data["entity_id"],
182
+ prop=data["prop"],
183
+ schema=data["schema"],
184
+ value=data["value"],
185
+ dataset=data["dataset"],
186
+ lang=data.get("lang", None),
187
+ original_value=data.get("original_value", None),
188
+ first_seen=data.get("first_seen", None),
189
+ external=data.get("external", False),
190
+ id=data.get("id", None),
191
+ canonical_id=data.get("canonical_id", None),
192
+ last_seen=data.get("last_seen", None),
193
+ origin=data.get("origin", None),
194
+ )
195
+
196
+ @classmethod
197
+ def from_db_row(cls, row: Row[Any]) -> "Statement":
198
+ return cls(
199
+ id=row.id,
200
+ canonical_id=row.canonical_id,
201
+ entity_id=row.entity_id,
202
+ prop=row.prop,
203
+ schema=row.schema,
204
+ value=row.value,
205
+ dataset=row.dataset,
206
+ lang=row.lang,
207
+ original_value=row.original_value,
208
+ first_seen=datetime_iso(row.first_seen),
209
+ external=row.external,
210
+ last_seen=datetime_iso(row.last_seen),
211
+ origin=row.origin,
212
+ )
213
+
214
+ @classmethod
215
+ def from_entity(
216
+ cls,
217
+ entity: "EntityProxy",
218
+ dataset: str,
219
+ first_seen: Optional[str] = None,
220
+ last_seen: Optional[str] = None,
221
+ external: bool = False,
222
+ origin: Optional[str] = None,
223
+ ) -> Generator["Statement", None, None]:
224
+ from followthemoney.statement.entity import StatementEntity
225
+
226
+ if entity.id is None:
227
+ raise ValueError("Cannot create statements for entity without ID!")
228
+
229
+ # If the entity is already a StatementEntity, we return its statements directly.
230
+ if isinstance(entity, StatementEntity):
231
+ yield from entity.statements
232
+ return
233
+
234
+ yield cls(
235
+ entity_id=entity.id,
236
+ prop=BASE_ID,
237
+ schema=entity.schema.name,
238
+ value=entity.id,
239
+ dataset=dataset,
240
+ external=external,
241
+ first_seen=first_seen,
242
+ last_seen=last_seen,
243
+ origin=origin,
244
+ )
245
+ for prop, value in entity.itervalues():
246
+ yield cls(
247
+ entity_id=entity.id,
248
+ prop=prop.name,
249
+ schema=entity.schema.name,
250
+ value=value,
251
+ dataset=dataset,
252
+ external=external,
253
+ first_seen=first_seen,
254
+ last_seen=last_seen,
255
+ origin=origin,
256
+ )
@@ -0,0 +1,31 @@
1
+ import sys
2
+ from functools import cache
3
+ from typing import Tuple
4
+
5
+ from followthemoney.model import Model
6
+
7
+ BASE_ID = "id"
8
+
9
+
10
+ def pack_prop(schema: str, prop: str) -> str:
11
+ return f"{schema}:{prop}"
12
+
13
+
14
+ @cache
15
+ def get_prop_type(schema: str, prop: str) -> str:
16
+ if prop == BASE_ID:
17
+ return BASE_ID
18
+ schema_obj = Model.instance().get(schema)
19
+ if schema_obj is None:
20
+ raise TypeError("Schema not found: %s" % schema)
21
+ prop_obj = schema_obj.get(prop)
22
+ if prop_obj is None:
23
+ raise TypeError("Property not found: %s" % prop)
24
+ return prop_obj.type.name
25
+
26
+
27
+ @cache
28
+ def unpack_prop(id: str) -> Tuple[str, str, str]:
29
+ schema, prop = id.split(":", 1)
30
+ prop_type = get_prop_type(schema, prop)
31
+ return sys.intern(schema), prop_type, sys.intern(prop)
@@ -1,4 +1,6 @@
1
- from followthemoney.types.registry import Registry
1
+ from banal import ensure_list
2
+ from typing import Dict, Iterable, List, Set, cast
3
+
2
4
  from followthemoney.types.url import UrlType
3
5
  from followthemoney.types.name import NameType
4
6
  from followthemoney.types.email import EmailType
@@ -11,7 +13,6 @@ from followthemoney.types.language import LanguageType
11
13
  from followthemoney.types.mimetype import MimeType
12
14
  from followthemoney.types.checksum import ChecksumType
13
15
  from followthemoney.types.identifier import IdentifierType
14
- from followthemoney.types.iban import IbanType
15
16
  from followthemoney.types.entity import EntityType
16
17
  from followthemoney.types.topic import TopicType
17
18
  from followthemoney.types.gender import GenderType
@@ -22,27 +23,69 @@ from followthemoney.types.string import StringType
22
23
  from followthemoney.types.number import NumberType
23
24
  from followthemoney.types.common import PropertyType
24
25
 
26
+
27
+ class Registry(object):
28
+ """This registry keeps the processing helpers for all property types in the system. The
29
+ registry can be used to get a type, which can itself then clean, validate or format values
30
+ of that type."""
31
+
32
+ url = UrlType()
33
+ name = NameType()
34
+ email = EmailType()
35
+ ip = IpType()
36
+ address = AddressType()
37
+ date = DateType()
38
+ phone = PhoneType()
39
+ country = CountryType()
40
+ language = LanguageType()
41
+ mimetype = MimeType()
42
+ checksum = ChecksumType()
43
+ identifier = IdentifierType()
44
+ entity = EntityType()
45
+ topic = TopicType()
46
+ gender = GenderType()
47
+ json = JsonType()
48
+ text = TextType()
49
+ html = HTMLType()
50
+ string = StringType()
51
+ number = NumberType()
52
+
53
+ def __init__(self) -> None:
54
+ self.matchable: Set[PropertyType] = set()
55
+ self.types: Set[PropertyType] = set()
56
+ self.groups: Dict[str, PropertyType] = {}
57
+ self.pivots: Set[PropertyType] = set()
58
+ for name in dir(self):
59
+ type_ = getattr(self, name)
60
+ if not isinstance(type_, PropertyType):
61
+ continue
62
+ assert type_.name == name
63
+ self.types.add(type_)
64
+ if type_.matchable:
65
+ self.matchable.add(type_)
66
+ if type_.pivot:
67
+ self.pivots.add(type_)
68
+ if type_.group is not None:
69
+ self.groups[type_.group] = type_
70
+
71
+ def get(self, name: str) -> PropertyType:
72
+ """For a given property type name, get its type object. This can also
73
+ be used via getattr, e.g. ``registry.phone``."""
74
+ # Allow transparent re-checking.
75
+ if isinstance(name, PropertyType):
76
+ return name
77
+ return cast(PropertyType, getattr(self, name))
78
+
79
+ def get_types(self, names: Iterable[str]) -> List[PropertyType]:
80
+ """Get a list of all property type objects linked to a set of names."""
81
+ names = ensure_list(names)
82
+ types = [self.get(n) for n in names]
83
+ return [t for t in types if t is not None]
84
+
85
+ def __getitem__(self, name: str) -> PropertyType:
86
+ return cast(PropertyType, getattr(self, name))
87
+
88
+
25
89
  registry = Registry()
26
- registry.add(UrlType)
27
- registry.add(NameType)
28
- registry.add(EmailType)
29
- registry.add(IpType)
30
- registry.add(AddressType)
31
- registry.add(DateType)
32
- registry.add(PhoneType)
33
- registry.add(CountryType)
34
- registry.add(LanguageType)
35
- registry.add(MimeType)
36
- registry.add(ChecksumType)
37
- registry.add(IdentifierType)
38
- registry.add(IbanType) # TODO: remove
39
- registry.add(EntityType)
40
- registry.add(TopicType)
41
- registry.add(GenderType)
42
- registry.add(JsonType)
43
- registry.add(TextType)
44
- registry.add(HTMLType)
45
- registry.add(StringType)
46
- registry.add(NumberType)
47
90
 
48
91
  __all__ = ["PropertyType", "registry"]
@@ -7,7 +7,7 @@ from rigour.text.distance import levenshtein_similarity
7
7
 
8
8
  from followthemoney.types.common import PropertyType
9
9
  from followthemoney.util import defer as _
10
- from followthemoney.util import dampen
10
+ from followthemoney.util import dampen, const
11
11
 
12
12
  if TYPE_CHECKING:
13
13
  from followthemoney.proxy import EntityProxy
@@ -21,8 +21,8 @@ class AddressType(PropertyType):
21
21
 
22
22
  LINE_BREAKS = re.compile(r"(\r\n|\n|<BR/>|<BR>|\t|ESQ\.,|ESQ,|;)")
23
23
  COMMATA = re.compile(r"(,\s?[,\.])")
24
- name = "address"
25
- group = "addresses"
24
+ name = const("address")
25
+ group = const("addresses")
26
26
  label = _("Address")
27
27
  plural = _("Addresses")
28
28
  matchable = True
@@ -1,6 +1,5 @@
1
- from followthemoney.rdf import URIRef, Identifier
2
1
  from followthemoney.types.common import PropertyType
3
- from followthemoney.util import defer as _
2
+ from followthemoney.util import const, defer as _
4
3
 
5
4
 
6
5
  class ChecksumType(PropertyType):
@@ -13,13 +12,10 @@ class ChecksumType(PropertyType):
13
12
  of this type are scrubbed when submitted via the normal API. Checksums can only
14
13
  be defined by uploading a document to be ingested."""
15
14
 
16
- name = "checksum"
17
- group = "checksums"
15
+ name = const("checksum")
16
+ group = const("checksums")
18
17
  label = _("Checksum")
19
18
  plural = _("Checksums")
20
19
  matchable = True
21
20
  pivot = True
22
21
  max_length = 40
23
-
24
- def rdf(self, value: str) -> Identifier:
25
- return URIRef(f"hash:{value}")
@@ -5,8 +5,8 @@ from banal import ensure_list
5
5
  from normality import stringify
6
6
  from typing import Any, Dict, Optional, Sequence, Callable, TYPE_CHECKING, TypedDict
7
7
 
8
- from followthemoney.rdf import Literal, Identifier
9
- from followthemoney.util import get_locale
8
+ from followthemoney.value import Value
9
+ from followthemoney.util import get_locale, const
10
10
  from followthemoney.util import gettext, sanitize_text
11
11
 
12
12
  if TYPE_CHECKING:
@@ -29,7 +29,7 @@ class PropertyTypeToDict(TypedDict, total=False):
29
29
  class PropertyType(object):
30
30
  """Base class for all property types."""
31
31
 
32
- name: str = "any"
32
+ name: str = const("any")
33
33
  """A machine-facing, variable safe name for the given type."""
34
34
 
35
35
  group: Optional[str] = None
@@ -87,7 +87,7 @@ class PropertyType(object):
87
87
 
88
88
  def clean(
89
89
  self,
90
- raw: Any,
90
+ raw: Value,
91
91
  fuzzy: bool = False,
92
92
  format: Optional[str] = None,
93
93
  proxy: Optional["EntityProxy"] = None,
@@ -165,11 +165,6 @@ class PropertyType(object):
165
165
  be related to (e.g. using a country prefix on a phone number or IBAN)."""
166
166
  return None
167
167
 
168
- def rdf(self, value: str) -> Identifier:
169
- """Return an RDF term to represent the given value - either a string
170
- literal, or a URI reference."""
171
- return Literal(value)
172
-
173
168
  def pick(self, values: Sequence[str]) -> Optional[str]:
174
169
  """Pick the best value to show to the user."""
175
170
  raise NotImplementedError
@@ -178,7 +173,7 @@ class PropertyType(object):
178
173
  """Return an ID suitable to identify this entity as a typed node in a
179
174
  graph representation of some FtM data. It's usually the same as the the
180
175
  RDF form."""
181
- return str(self.rdf(value))
176
+ return f"{self.name}:{value}"
182
177
 
183
178
  def node_id_safe(self, value: Optional[str]) -> Optional[str]:
184
179
  """Wrapper for node_id to handle None values."""
@@ -186,7 +181,7 @@ class PropertyType(object):
186
181
  return None
187
182
  return self.node_id(value)
188
183
 
189
- def caption(self, value: str) -> Optional[str]:
184
+ def caption(self, value: str, format: Optional[str] = None) -> str:
190
185
  """Return a label for the given property value. This is often the same as the
191
186
  value, but for types like countries or languages, it would return the label,
192
187
  while other values like phone numbers can be formatted to be nicer to read."""
@@ -253,19 +248,19 @@ class EnumType(PropertyType):
253
248
 
254
249
  def clean_text(
255
250
  self,
256
- code: str,
251
+ text: str,
257
252
  fuzzy: bool = False,
258
253
  format: Optional[str] = None,
259
254
  proxy: Optional["EntityProxy"] = None,
260
255
  ) -> Optional[str]:
261
256
  """All code values are cleaned to be lowercase and trailing whitespace is
262
257
  removed."""
263
- code = code.lower().strip()
258
+ code = text.lower().strip()
264
259
  if code not in self.codes:
265
260
  return None
266
261
  return code
267
262
 
268
- def caption(self, value: str) -> str:
263
+ def caption(self, value: str, format: Optional[str] = None) -> str:
269
264
  """Given a code value, return the label that should be shown to a user."""
270
265
  return self.names.get(value, value)
271
266
 
@@ -3,9 +3,8 @@ from typing import Optional, TYPE_CHECKING
3
3
  from babel.core import Locale
4
4
  from rigour.territories import get_territory, get_ftm_countries
5
5
 
6
- from followthemoney.rdf import URIRef, Identifier
7
6
  from followthemoney.types.common import EnumType, EnumValues
8
- from followthemoney.util import defer as _
7
+ from followthemoney.util import const, defer as _
9
8
 
10
9
  if TYPE_CHECKING:
11
10
  from followthemoney.proxy import EntityProxy
@@ -17,8 +16,8 @@ class CountryType(EnumType):
17
16
  a number of unusual and controversial designations (e.g. the Soviet Union,
18
17
  Transnistria, Somaliland, Kosovo)."""
19
18
 
20
- name = "country"
21
- group = "countries"
19
+ name = const("country")
20
+ group = const("countries")
22
21
  label = _("Country")
23
22
  plural = _("Countries")
24
23
  matchable = True
@@ -52,6 +51,3 @@ class CountryType(EnumType):
52
51
 
53
52
  def country_hint(self, value: str) -> str:
54
53
  return value
55
-
56
- def rdf(self, value: str) -> Identifier:
57
- return URIRef(f"iso-3166:{value}")
@@ -4,9 +4,8 @@ from typing import Optional, TYPE_CHECKING
4
4
  from prefixdate import parse, parse_format, Precision
5
5
 
6
6
  from followthemoney.types.common import PropertyType
7
- from followthemoney.rdf import XSD, Literal, Identifier
8
7
  from followthemoney.util import defer as _
9
- from followthemoney.util import dampen
8
+ from followthemoney.util import dampen, const
10
9
 
11
10
  if TYPE_CHECKING:
12
11
  from followthemoney.proxy import EntityProxy
@@ -21,8 +20,8 @@ class DateType(PropertyType):
21
20
  The timezone is always expected to be UTC and cannot be specified otherwise. There is
22
21
  no support for calendar weeks (`2021-W7`) and date ranges (`2021-2024`)."""
23
22
 
24
- name = "date"
25
- group = "dates"
23
+ name = const("date")
24
+ group = const("dates")
26
25
  label = _("Date")
27
26
  plural = _("Dates")
28
27
  matchable = True
@@ -57,18 +56,29 @@ class DateType(PropertyType):
57
56
  prefix = os.path.commonprefix([left, right])
58
57
  return dampen(4, 10, prefix)
59
58
 
60
- def rdf(self, value: str) -> Identifier:
61
- if len(value) < Precision.HOUR.value:
62
- return Literal(value, datatype=XSD.date)
63
- return Literal(value, datatype=XSD.dateTime)
59
+ def to_datetime(self, value: str) -> Optional[datetime]:
60
+ """Convert a date string to a datetime object in UTC for handling in Python. This
61
+ will convert the unset fields beyond the prefix to the first possible value, e.g.
62
+ `2021-02` will become `2021-02-01T00:00:00Z`.
64
63
 
65
- def node_id(self, value: str) -> str:
66
- return f"date:{value}"
64
+ Args:
65
+ value (str): The date string to convert.
67
66
 
68
- def to_datetime(self, value: str) -> Optional[datetime]:
67
+ Returns:
68
+ Optional[datetime]: The parsed datetime object in UTC, or None if parsing fails.
69
+ """
69
70
  return parse(value).dt
70
71
 
71
72
  def to_number(self, value: str) -> Optional[float]:
73
+ """Convert a date string to a number, which is the number of seconds since the epoch
74
+ (1970-01-01T00:00:00Z).
75
+
76
+ Args:
77
+ value (str): The date string to convert.
78
+
79
+ Returns:
80
+ Optional[float]: The timestamp as a float, or None if parsing fails.
81
+ """
72
82
  date = self.to_datetime(value)
73
83
  if date is None:
74
84
  return None
@@ -4,7 +4,6 @@ from typing import Optional, TYPE_CHECKING
4
4
  from urllib.parse import urlparse
5
5
  from normality.cleaning import strip_quotes
6
6
 
7
- from followthemoney.rdf import URIRef, Identifier
8
7
  from followthemoney.types.common import PropertyType
9
8
  from followthemoney.util import sanitize_text, defer as _
10
9
 
@@ -80,6 +79,3 @@ class EmailType(PropertyType):
80
79
 
81
80
  # def country_hint(self, value)
82
81
  # TODO: do we want to use TLDs as country evidence?
83
-
84
- def rdf(self, value: str) -> Identifier:
85
- return URIRef("mailto:%s" % value.lower())
@@ -2,9 +2,9 @@ import re
2
2
  from typing import Any, Optional, TYPE_CHECKING
3
3
 
4
4
  from followthemoney.types.common import PropertyType
5
- from followthemoney.rdf import URIRef, Identifier
5
+ from followthemoney.value import Value
6
6
  from followthemoney.util import ENTITY_ID_LEN, get_entity_id, sanitize_text
7
- from followthemoney.util import gettext, defer as _
7
+ from followthemoney.util import const, gettext, defer as _
8
8
  from followthemoney.exc import InvalidData
9
9
 
10
10
  if TYPE_CHECKING:
@@ -22,8 +22,8 @@ class EntityType(PropertyType):
22
22
 
23
23
  REGEX_RAW = r"^[0-9a-zA-Z]([0-9a-zA-Z\.\-]*[0-9a-zA-Z])?$"
24
24
  REGEX = re.compile(REGEX_RAW)
25
- name = "entity"
26
- group = "entities"
25
+ name = const("entity")
26
+ group = const("entities")
27
27
  label = _("Entity")
28
28
  plural = _("Entities")
29
29
  matchable = True
@@ -31,7 +31,7 @@ class EntityType(PropertyType):
31
31
  max_length = ENTITY_ID_LEN
32
32
 
33
33
  def validate(
34
- self, value: str, fuzzy: bool = False, format: Optional[str] = None
34
+ self, value: Value, fuzzy: bool = False, format: Optional[str] = None
35
35
  ) -> bool:
36
36
  text = sanitize_text(value)
37
37
  if text is None:
@@ -66,9 +66,3 @@ class EntityType(PropertyType):
66
66
  if self.REGEX.match(text) is not None:
67
67
  return text
68
68
  return None
69
-
70
- def rdf(self, value: str) -> Identifier:
71
- return URIRef(f"entity:{value}")
72
-
73
- def caption(self, value: str) -> None:
74
- return None