followthemoney 3.8.4__py3-none-any.whl → 4.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. followthemoney/__init__.py +30 -10
  2. followthemoney/cli/__init__.py +3 -12
  3. followthemoney/cli/aggregate.py +1 -1
  4. followthemoney/cli/cli.py +1 -1
  5. followthemoney/cli/exports.py +6 -2
  6. followthemoney/cli/mapping.py +6 -4
  7. followthemoney/cli/sieve.py +1 -1
  8. followthemoney/cli/statement.py +62 -0
  9. followthemoney/cli/util.py +2 -3
  10. followthemoney/compare.py +26 -16
  11. followthemoney/dataset/__init__.py +17 -0
  12. followthemoney/dataset/catalog.py +77 -0
  13. followthemoney/dataset/coverage.py +29 -0
  14. followthemoney/dataset/dataset.py +137 -0
  15. followthemoney/dataset/publisher.py +25 -0
  16. followthemoney/dataset/resource.py +30 -0
  17. followthemoney/dataset/util.py +58 -0
  18. followthemoney/entity.py +73 -0
  19. followthemoney/exc.py +6 -0
  20. followthemoney/export/common.py +3 -3
  21. followthemoney/export/csv.py +10 -12
  22. followthemoney/export/neo4j.py +1 -1
  23. followthemoney/export/rdf.py +57 -5
  24. followthemoney/graph.py +6 -4
  25. followthemoney/mapping/csv.py +6 -18
  26. followthemoney/mapping/sql.py +3 -4
  27. followthemoney/model.py +36 -9
  28. followthemoney/namespace.py +3 -1
  29. followthemoney/ontology.py +18 -16
  30. followthemoney/property.py +12 -15
  31. followthemoney/proxy.py +44 -65
  32. followthemoney/schema/Analyzable.yaml +2 -3
  33. followthemoney/schema/BankAccount.yaml +2 -3
  34. followthemoney/schema/Company.yaml +0 -6
  35. followthemoney/schema/Contract.yaml +0 -1
  36. followthemoney/schema/CryptoWallet.yaml +1 -1
  37. followthemoney/schema/Document.yaml +0 -6
  38. followthemoney/schema/Interval.yaml +7 -0
  39. followthemoney/schema/LegalEntity.yaml +6 -0
  40. followthemoney/schema/License.yaml +2 -0
  41. followthemoney/schema/Page.yaml +0 -1
  42. followthemoney/schema/Person.yaml +0 -5
  43. followthemoney/schema/Sanction.yaml +1 -0
  44. followthemoney/schema/Thing.yaml +0 -2
  45. followthemoney/schema/UserAccount.yaml +6 -3
  46. followthemoney/schema.py +27 -39
  47. followthemoney/statement/__init__.py +19 -0
  48. followthemoney/statement/entity.py +437 -0
  49. followthemoney/statement/serialize.py +245 -0
  50. followthemoney/statement/statement.py +256 -0
  51. followthemoney/statement/util.py +31 -0
  52. followthemoney/types/__init__.py +66 -23
  53. followthemoney/types/address.py +3 -3
  54. followthemoney/types/checksum.py +3 -7
  55. followthemoney/types/common.py +9 -14
  56. followthemoney/types/country.py +3 -7
  57. followthemoney/types/date.py +21 -11
  58. followthemoney/types/email.py +0 -4
  59. followthemoney/types/entity.py +5 -11
  60. followthemoney/types/gender.py +6 -10
  61. followthemoney/types/identifier.py +9 -3
  62. followthemoney/types/ip.py +5 -9
  63. followthemoney/types/json.py +2 -2
  64. followthemoney/types/language.py +3 -7
  65. followthemoney/types/mimetype.py +4 -8
  66. followthemoney/types/name.py +7 -8
  67. followthemoney/types/number.py +88 -6
  68. followthemoney/types/phone.py +4 -11
  69. followthemoney/types/string.py +4 -4
  70. followthemoney/types/topic.py +3 -7
  71. followthemoney/types/url.py +5 -10
  72. followthemoney/util.py +12 -13
  73. followthemoney/value.py +67 -0
  74. {followthemoney-3.8.4.dist-info → followthemoney-4.0.0.dist-info}/METADATA +38 -34
  75. {followthemoney-3.8.4.dist-info → followthemoney-4.0.0.dist-info}/RECORD +78 -69
  76. {followthemoney-3.8.4.dist-info → followthemoney-4.0.0.dist-info}/entry_points.txt +1 -0
  77. {followthemoney-3.8.4.dist-info → followthemoney-4.0.0.dist-info}/licenses/LICENSE +1 -0
  78. followthemoney/offshore.py +0 -48
  79. followthemoney/rdf.py +0 -9
  80. followthemoney/schema/Assessment.yaml +0 -32
  81. followthemoney/schema/Post.yaml +0 -42
  82. followthemoney/types/iban.py +0 -58
  83. followthemoney/types/registry.py +0 -52
  84. {followthemoney-3.8.4.dist-info → followthemoney-4.0.0.dist-info}/WHEEL +0 -0
followthemoney/schema.py CHANGED
@@ -1,22 +1,12 @@
1
- from typing import (
2
- TYPE_CHECKING,
3
- Any,
4
- Dict,
5
- List,
6
- Optional,
7
- Set,
8
- TypedDict,
9
- Union,
10
- cast,
11
- )
1
+ from typing import TYPE_CHECKING, Any, cast
2
+ from typing import Dict, List, Optional, Set, TypedDict, Union
12
3
  from banal import ensure_list, ensure_dict, as_bool
13
4
  from functools import lru_cache
14
5
 
15
6
  from followthemoney.property import Property, PropertySpec, PropertyToDict, ReverseSpec
16
7
  from followthemoney.types import registry
17
8
  from followthemoney.exc import InvalidData, InvalidModel
18
- from followthemoney.rdf import URIRef, NS
19
- from followthemoney.util import gettext
9
+ from followthemoney.util import gettext, const
20
10
 
21
11
  if TYPE_CHECKING:
22
12
  from followthemoney.model import Model
@@ -47,7 +37,6 @@ class SchemaSpec(TypedDict, total=False):
47
37
  edge: EdgeSpec
48
38
  temporalExtent: TemporalExtentSpec
49
39
  description: Optional[str]
50
- rdf: Optional[str]
51
40
  abstract: bool
52
41
  hidden: bool
53
42
  generated: bool
@@ -90,7 +79,6 @@ class Schema:
90
79
  "_plural",
91
80
  "_description",
92
81
  "_hash",
93
- "uri",
94
82
  "abstract",
95
83
  "hidden",
96
84
  "generated",
@@ -118,15 +106,12 @@ class Schema:
118
106
 
119
107
  def __init__(self, model: "Model", name: str, data: SchemaSpec) -> None:
120
108
  #: Machine-readable name of the schema, used for identification.
121
- self.name = name
109
+ self.name = const(name)
122
110
  self.model = model
123
111
  self._label = data.get("label", name)
124
112
  self._plural = data.get("plural", self.label)
125
113
  self._description = data.get("description")
126
- self._hash = hash("<Schema(%r)>" % name)
127
-
128
- #: RDF identifier for this schema when it is transformed to a triple term.
129
- self.uri = URIRef(cast(str, data.get("rdf", NS[name])))
114
+ self._hash = hash("<Schema(%r)>" % self.name)
130
115
 
131
116
  #: Do not store or emit entities of this type, it is used only for
132
117
  #: inheritance.
@@ -152,17 +137,17 @@ class Schema:
152
137
  #: Mark a set of properties as important, i.e. they should be shown
153
138
  #: first, or in an abridged view of the entity. In Aleph, these properties
154
139
  #: are included in tabular entity listings.
155
- self.featured = ensure_list(data.get("featured", []))
140
+ self.featured = [const(f) for f in data.get("featured", [])]
156
141
 
157
142
  #: Mark a set of properties as required. This is applied only when
158
143
  #: an entity is created by the user - bulk created entities will
159
144
  #: slip through even if it is technically invalid.
160
- self.required = ensure_list(data.get("required", []))
145
+ self.required = [const(r) for r in data.get("required", [])]
161
146
 
162
147
  #: Mark a set of properties to be used for the entity's caption.
163
148
  #: They will be checked in order and the first existent value will
164
149
  #: be used.
165
- self.caption = ensure_list(data.get("caption", []))
150
+ self.caption = [const(s) for s in data.get("caption", [])]
166
151
 
167
152
  # A transform of the entity into an edge for its representation in
168
153
  # the context of a property graph representation like Neo4J/Gephi.
@@ -173,7 +158,7 @@ class Schema:
173
158
  #: Flag to indicate if this schema should be represented by an edge (rather than
174
159
  #: a node) when the data is converted into a property graph.
175
160
  self.edge: bool = self.edge_source is not None and self.edge_target is not None
176
- self.edge_caption = ensure_list(edge.get("caption", []))
161
+ self.edge_caption = [const(p) for p in edge.get("caption", [])]
177
162
  self._edge_label = edge.get("label", self._label)
178
163
 
179
164
  #: Flag to indicate if the edge should be presented as directed to the user,
@@ -183,16 +168,16 @@ class Schema:
183
168
  #: Specify which properties should be used to represent this schema in a
184
169
  #: timeline.
185
170
  temporal_extent = data.get("temporalExtent", {})
186
- self._temporal_start = ensure_list(temporal_extent.get("start", []))
187
- self._temporal_end = ensure_list(temporal_extent.get("end", []))
171
+ self._temporal_start = [const(s) for s in temporal_extent.get("start", [])]
172
+ self._temporal_end = [const(e) for e in temporal_extent.get("end", [])]
188
173
 
189
174
  #: Direct parent schemata of this schema.
190
- self._extends = ensure_list(data.get("extends", []))
175
+ self._extends = [const(s) for s in data.get("extends", [])]
191
176
  self.extends: Set["Schema"] = set()
192
177
 
193
178
  #: All parents of this schema (including indirect parents and the schema
194
179
  #: itself).
195
- self.schemata = set([self])
180
+ self.schemata: Set[Schema] = set([self])
196
181
 
197
182
  #: All names of :attr:`~schemata`.
198
183
  self.names = set([self.name])
@@ -205,8 +190,8 @@ class Schema:
205
190
  #: The full list of properties defined for the entity, including those
206
191
  #: inherited from parent schemata.
207
192
  self.properties: Dict[str, Property] = {}
208
- for name, prop in data.get("properties", {}).items():
209
- self.properties[name] = Property(self, name, prop)
193
+ for pname, prop in data.get("properties", {}).items():
194
+ self.properties[pname] = Property(self, pname, prop)
210
195
 
211
196
  def generate(self, model: "Model") -> None:
212
197
  """While loading the schema, this function will validate and
@@ -317,12 +302,18 @@ class Schema:
317
302
 
318
303
  @property
319
304
  def source_prop(self) -> Optional[Property]:
320
- """The entity property to be used as an edge source."""
305
+ """The entity property to be used as an edge source when the schema is
306
+ considered as a relationship."""
307
+ if self.edge_source is None:
308
+ return None
321
309
  return self.get(self.edge_source)
322
310
 
323
311
  @property
324
312
  def target_prop(self) -> Optional[Property]:
325
- """The entity property to be used as an edge target."""
313
+ """The entity property to be used as an edge target when the schema is transformed
314
+ into a relationship."""
315
+ if self.edge_target is None:
316
+ return None
326
317
  return self.get(self.edge_target)
327
318
 
328
319
  @property
@@ -404,13 +395,13 @@ class Schema:
404
395
  other = other.name
405
396
  return other in self.names
406
397
 
407
- def get(self, name: Optional[str]) -> Optional[Property]:
398
+ def get(self, name: str) -> Optional[Property]:
408
399
  """Retrieve a property defined for this schema by its name."""
409
400
  if name is None:
410
401
  return None
411
402
  return self.properties.get(name)
412
403
 
413
- def validate(self, data: Any) -> Optional[str]:
404
+ def validate(self, data: Dict[str, Any]) -> Optional[str]:
414
405
  """Validate a dictionary against the given schema.
415
406
  This will also drop keys which are not valid as properties.
416
407
  """
@@ -478,7 +469,7 @@ class Schema:
478
469
  def __eq__(self, other: Any) -> bool:
479
470
  """Compare two schemata (via hash)."""
480
471
  try:
481
- return self._hash == hash(other)
472
+ return self._hash == other._hash # type: ignore
482
473
  except AttributeError:
483
474
  return False
484
475
 
@@ -486,10 +477,7 @@ class Schema:
486
477
  return self.name.__lt__(other.name)
487
478
 
488
479
  def __hash__(self) -> int:
489
- try:
490
- return self._hash
491
- except AttributeError:
492
- return super().__hash__()
480
+ return self._hash
493
481
 
494
482
  def __repr__(self) -> str:
495
483
  return "<Schema(%r)>" % self.name
@@ -0,0 +1,19 @@
1
+ from followthemoney.statement.statement import Statement, StatementDict
2
+ from followthemoney.statement.serialize import CSV, JSON, PACK, FORMATS
3
+ from followthemoney.statement.serialize import write_statements
4
+ from followthemoney.statement.serialize import read_statements, read_path_statements
5
+ from followthemoney.statement.entity import SE, StatementEntity
6
+
7
+ __all__ = [
8
+ "Statement",
9
+ "StatementDict",
10
+ "StatementEntity",
11
+ "SE",
12
+ "CSV",
13
+ "JSON",
14
+ "PACK",
15
+ "FORMATS",
16
+ "write_statements",
17
+ "read_statements",
18
+ "read_path_statements",
19
+ ]
@@ -0,0 +1,437 @@
1
+ from hashlib import sha1
2
+ from collections.abc import Mapping
3
+ from typing import Any, Dict, List, Optional, Set, Type
4
+ from typing import Generator, Iterable, Tuple, TypeVar
5
+
6
+ from followthemoney.model import Model
7
+ from followthemoney.exc import InvalidData
8
+ from followthemoney.types.common import PropertyType
9
+ from followthemoney.property import Property
10
+ from followthemoney.util import gettext
11
+ from followthemoney.proxy import P
12
+ from followthemoney.types import registry
13
+ from followthemoney.value import string_list, Values
14
+ from followthemoney.proxy import EntityProxy
15
+ from followthemoney.dataset import Dataset, DefaultDataset
16
+ from followthemoney.statement.statement import Statement
17
+ from followthemoney.statement.util import BASE_ID
18
+
19
+ SE = TypeVar("SE", bound="StatementEntity")
20
+
21
+
22
+ class StatementEntity(EntityProxy):
23
+ """An entity object that can link to a set of datasets that it is sourced from."""
24
+
25
+ __slots__ = (
26
+ "schema",
27
+ "id",
28
+ "_caption",
29
+ "extra_referents",
30
+ "dataset",
31
+ "last_change",
32
+ "_statements",
33
+ )
34
+
35
+ def __init__(self, dataset: Dataset, data: Dict[str, Any], cleaned: bool = True):
36
+ data = dict(data or {})
37
+ schema = Model.instance().get(data.pop("schema", None))
38
+ if schema is None:
39
+ raise InvalidData(gettext("No schema for entity."))
40
+ self.schema = schema
41
+
42
+ self._caption: Optional[str] = None
43
+ """A pre-computed label for this entity."""
44
+
45
+ self.extra_referents: Set[str] = set(data.pop("referents", []))
46
+ """The IDs of all entities which are included in this canonical entity."""
47
+
48
+ self.last_change: Optional[str] = data.get("last_change", None)
49
+ """The last time this entity was changed."""
50
+
51
+ self.dataset = dataset
52
+ """The default dataset for new statements."""
53
+
54
+ self.id: Optional[str] = data.pop("id", None)
55
+ self._statements: Dict[str, Set[Statement]] = {}
56
+
57
+ properties = data.pop("properties", None)
58
+ if isinstance(properties, Mapping):
59
+ for key, value in properties.items():
60
+ self.add(key, value, cleaned=cleaned, quiet=True)
61
+
62
+ for stmt_data in data.pop("statements", []):
63
+ stmt = Statement.from_dict(stmt_data)
64
+ if self.id is not None:
65
+ stmt.canonical_id = self.id
66
+ self.add_statement(stmt)
67
+
68
+ @property
69
+ def _properties(self) -> Dict[str, List[str]]: # type: ignore
70
+ return {p: [s.value for s in v] for p, v in self._statements.items()}
71
+
72
+ def _iter_stmt(self) -> Generator[Statement, None, None]:
73
+ for stmts in self._statements.values():
74
+ for stmt in stmts:
75
+ if stmt.entity_id is None and self.id is not None:
76
+ stmt.entity_id = self.id
77
+ stmt.id = stmt.generate_key()
78
+ if stmt.id is None:
79
+ stmt.id = stmt.generate_key()
80
+ yield stmt
81
+
82
+ @property
83
+ def statements(self) -> Generator[Statement, None, None]:
84
+ """Return all statements for this entity, with extra ID statement."""
85
+ ids: List[str] = []
86
+ last_seen: Set[str] = set()
87
+ first_seen: Set[str] = set()
88
+ for stmt in self._iter_stmt():
89
+ yield stmt
90
+ if stmt.id is not None:
91
+ ids.append(stmt.id)
92
+ if stmt.last_seen is not None:
93
+ last_seen.add(stmt.last_seen)
94
+ if stmt.first_seen is not None:
95
+ first_seen.add(stmt.first_seen)
96
+ if self.id is not None:
97
+ digest = sha1(self.schema.name.encode("utf-8"))
98
+ for id in sorted(ids):
99
+ digest.update(id.encode("utf-8"))
100
+ checksum = digest.hexdigest()
101
+ # This is to make the last_change value stable across
102
+ # serialisation:
103
+ first = self.last_change or min(first_seen, default=None)
104
+ yield Statement(
105
+ canonical_id=self.id,
106
+ entity_id=self.id,
107
+ prop=BASE_ID,
108
+ schema=self.schema.name,
109
+ value=checksum,
110
+ dataset=self.dataset.name,
111
+ first_seen=first,
112
+ last_seen=max(last_seen, default=None),
113
+ )
114
+
115
+ @property
116
+ def first_seen(self) -> Optional[str]:
117
+ seen = (s.first_seen for s in self._iter_stmt() if s.first_seen is not None)
118
+ return min(seen, default=None)
119
+
120
+ @property
121
+ def last_seen(self) -> Optional[str]:
122
+ seen = (s.last_seen for s in self._iter_stmt() if s.last_seen is not None)
123
+ return max(seen, default=None)
124
+
125
+ @property
126
+ def datasets(self) -> Set[str]:
127
+ datasets: Set[str] = set()
128
+ for stmt in self._iter_stmt():
129
+ datasets.add(stmt.dataset)
130
+ return datasets
131
+
132
+ @property
133
+ def referents(self) -> Set[str]:
134
+ referents: Set[str] = set(self.extra_referents)
135
+ for stmt in self._iter_stmt():
136
+ if stmt.entity_id is not None and stmt.entity_id != self.id:
137
+ referents.add(stmt.entity_id)
138
+ return referents
139
+
140
+ @property
141
+ def key_prefix(self) -> Optional[str]:
142
+ return self.dataset.name
143
+
144
+ @key_prefix.setter
145
+ def key_prefix(self, dataset: Optional[str]) -> None:
146
+ raise NotImplementedError()
147
+
148
+ def add_statement(self, stmt: Statement) -> None:
149
+ schema = self.schema
150
+ if not schema.is_a(stmt.schema):
151
+ try:
152
+ self.schema = schema.model.common_schema(schema, stmt.schema)
153
+ except InvalidData as exc:
154
+ raise InvalidData(f"{self.id}: {exc}") from exc
155
+
156
+ if stmt.prop == BASE_ID:
157
+ if stmt.first_seen is not None:
158
+ # The last_change attribute describes the latest checksum change
159
+ # of any emitted component of the entity, which is stored in the BASE
160
+ # field.
161
+ if self.last_change is None:
162
+ self.last_change = stmt.first_seen
163
+ else:
164
+ self.last_change = max(self.last_change, stmt.first_seen)
165
+ else:
166
+ self._statements.setdefault(stmt.prop, set())
167
+ self._statements[stmt.prop].add(stmt)
168
+
169
+ def get(self, prop: P, quiet: bool = False) -> List[str]:
170
+ prop_name = self._prop_name(prop, quiet=quiet)
171
+ if prop_name is None or prop_name not in self._statements:
172
+ return []
173
+ return list({s.value for s in self._statements[prop_name]})
174
+
175
+ def get_statements(self, prop: P, quiet: bool = False) -> List[Statement]:
176
+ prop_name = self._prop_name(prop, quiet=quiet)
177
+ if prop_name is None or prop_name not in self._statements:
178
+ return []
179
+ return list(self._statements[prop_name])
180
+
181
+ def set(
182
+ self,
183
+ prop: P,
184
+ values: Values,
185
+ cleaned: bool = False,
186
+ quiet: bool = False,
187
+ fuzzy: bool = False,
188
+ format: Optional[str] = None,
189
+ lang: Optional[str] = None,
190
+ original_value: Optional[str] = None,
191
+ origin: Optional[str] = None,
192
+ ) -> None:
193
+ prop_name = self._prop_name(prop, quiet=quiet)
194
+ if prop_name is None:
195
+ return
196
+ self._statements.pop(prop_name, None)
197
+ return self.add(
198
+ prop,
199
+ values,
200
+ cleaned=cleaned,
201
+ quiet=quiet,
202
+ fuzzy=fuzzy,
203
+ format=format,
204
+ lang=lang,
205
+ original_value=original_value,
206
+ origin=origin,
207
+ )
208
+
209
+ def add(
210
+ self,
211
+ prop: P,
212
+ values: Values,
213
+ cleaned: bool = False,
214
+ quiet: bool = False,
215
+ fuzzy: bool = False,
216
+ format: Optional[str] = None,
217
+ lang: Optional[str] = None,
218
+ original_value: Optional[str] = None,
219
+ origin: Optional[str] = None,
220
+ ) -> None:
221
+ prop_name = self._prop_name(prop, quiet=quiet)
222
+ if prop_name is None:
223
+ return None
224
+ prop = self.schema.properties[prop_name]
225
+ for value in string_list(values, sanitize=not cleaned):
226
+ self.unsafe_add(
227
+ prop,
228
+ value,
229
+ cleaned=cleaned,
230
+ fuzzy=fuzzy,
231
+ format=format,
232
+ quiet=quiet,
233
+ lang=lang,
234
+ original_value=original_value,
235
+ origin=origin,
236
+ )
237
+ return None
238
+
239
+ def unsafe_add(
240
+ self,
241
+ prop: Property,
242
+ value: Optional[str],
243
+ cleaned: bool = False,
244
+ fuzzy: bool = False,
245
+ format: Optional[str] = None,
246
+ quiet: bool = False,
247
+ schema: Optional[str] = None,
248
+ dataset: Optional[str] = None,
249
+ seen: Optional[str] = None,
250
+ lang: Optional[str] = None,
251
+ original_value: Optional[str] = None,
252
+ origin: Optional[str] = None,
253
+ ) -> Optional[str]:
254
+ """Add a statement to the entity, possibly the value."""
255
+ if value is None or len(value) == 0:
256
+ return None
257
+
258
+ # Don't allow setting the reverse properties:
259
+ if prop.stub:
260
+ if quiet:
261
+ return None
262
+ msg = gettext("Stub property (%s): %s")
263
+ raise InvalidData(msg % (self.schema, prop))
264
+
265
+ if lang is not None:
266
+ lang = registry.language.clean_text(lang)
267
+
268
+ clean: Optional[str] = value
269
+ if not cleaned:
270
+ clean = prop.type.clean_text(value, proxy=self, fuzzy=fuzzy, format=format)
271
+
272
+ if clean is None:
273
+ return None
274
+
275
+ if original_value is None and clean != value:
276
+ original_value = value
277
+
278
+ if self.id is None:
279
+ raise InvalidData("Cannot add statement to entity without ID!")
280
+ stmt = Statement(
281
+ entity_id=self.id,
282
+ prop=prop.name,
283
+ schema=schema or self.schema.name,
284
+ value=clean,
285
+ dataset=dataset or self.dataset.name,
286
+ lang=lang,
287
+ original_value=original_value,
288
+ first_seen=seen,
289
+ origin=origin,
290
+ )
291
+ self.add_statement(stmt)
292
+ return clean
293
+
294
+ def pop(self, prop: P, quiet: bool = True) -> List[str]:
295
+ prop_name = self._prop_name(prop, quiet=quiet)
296
+ if prop_name is None or prop_name not in self._statements:
297
+ return []
298
+ return list({s.value for s in self._statements.pop(prop_name, [])})
299
+
300
+ def remove(self, prop: P, value: str, quiet: bool = True) -> None:
301
+ prop_name = self._prop_name(prop, quiet=quiet)
302
+ if prop_name is not None and prop_name in self._properties:
303
+ stmts = {s for s in self._statements[prop_name] if s.value != value}
304
+ self._statements[prop_name] = stmts
305
+
306
+ def itervalues(self) -> Generator[Tuple[Property, str], None, None]:
307
+ for name, statements in self._statements.items():
308
+ prop = self.schema.properties[name]
309
+ for value in set((s.value for s in statements)):
310
+ yield (prop, value)
311
+
312
+ def get_type_values(
313
+ self, type_: PropertyType, matchable: bool = False
314
+ ) -> List[str]:
315
+ combined: Set[str] = set()
316
+ for stmt in self.get_type_statements(type_, matchable=matchable):
317
+ combined.add(stmt.value)
318
+ return list(combined)
319
+
320
+ def get_type_statements(
321
+ self, type_: PropertyType, matchable: bool = False
322
+ ) -> List[Statement]:
323
+ combined = []
324
+ for prop_name, statements in self._statements.items():
325
+ prop = self.schema.properties[prop_name]
326
+ if matchable and not prop.matchable:
327
+ continue
328
+ if prop.type == type_:
329
+ for statement in statements:
330
+ combined.append(statement)
331
+ return combined
332
+
333
+ @property
334
+ def properties(self) -> Dict[str, List[str]]:
335
+ return {p: list({s.value for s in vs}) for p, vs in self._statements.items()}
336
+
337
+ def iterprops(self) -> List[Property]:
338
+ return [self.schema.properties[p] for p in self._statements.keys()]
339
+
340
+ def clone(self: SE) -> SE:
341
+ data = {"schema": self.schema.name, "id": self.id}
342
+ cloned = type(self)(self.dataset, data)
343
+ for stmt in self._iter_stmt():
344
+ cloned.add_statement(stmt)
345
+ return cloned
346
+
347
+ def merge(self: SE, other: EntityProxy) -> SE:
348
+ try:
349
+ self.schema = self.schema.model.common_schema(self.schema, other.schema)
350
+ except InvalidData as e:
351
+ msg = "Cannot merge entities with id %s: %s"
352
+ raise InvalidData(msg % (self.id, e))
353
+
354
+ if not isinstance(other, StatementEntity):
355
+ for prop, values in other._properties.items():
356
+ self.add(prop, values, cleaned=True, quiet=True)
357
+ return self
358
+ for stmt in other._iter_stmt():
359
+ if self.id is not None:
360
+ stmt.canonical_id = self.id
361
+ self.add_statement(stmt)
362
+ self.extra_referents.update(other.extra_referents)
363
+ return self
364
+
365
+ def to_dict(self) -> Dict[str, Any]:
366
+ data: Dict[str, Any] = {
367
+ "id": self.id,
368
+ "caption": self.caption,
369
+ "schema": self.schema.name,
370
+ "properties": self.properties,
371
+ "referents": list(self.referents),
372
+ "datasets": list(self.datasets),
373
+ }
374
+ if self.first_seen is not None:
375
+ data["first_seen"] = self.first_seen
376
+ if self.last_seen is not None:
377
+ data["last_seen"] = self.last_seen
378
+ if self.last_change is not None:
379
+ data["last_change"] = self.last_change
380
+ return data
381
+
382
+ def to_statement_dict(self) -> Dict[str, Any]:
383
+ """Return a dictionary representation of the entity's statements."""
384
+ data: Dict[str, Any] = {
385
+ "id": self.id,
386
+ "caption": self.caption,
387
+ "schema": self.schema.name,
388
+ "statements": [stmt.to_dict() for stmt in self.statements],
389
+ "referents": list(self.referents),
390
+ "datasets": list(self.datasets),
391
+ }
392
+ if self.first_seen is not None:
393
+ data["first_seen"] = self.first_seen
394
+ if self.last_seen is not None:
395
+ data["last_seen"] = self.last_seen
396
+ if self.last_change is not None:
397
+ data["last_change"] = self.last_change
398
+ return data
399
+
400
+ def __len__(self) -> int:
401
+ return len(list(self._iter_stmt())) + 1
402
+
403
+ @classmethod
404
+ def from_dict(
405
+ cls: Type[SE],
406
+ data: Dict[str, Any],
407
+ cleaned: bool = True,
408
+ default_dataset: Optional[Dataset] = None,
409
+ ) -> SE:
410
+ # Exists only for backwards compatibility.
411
+ dataset = default_dataset or DefaultDataset
412
+ return cls(dataset, data, cleaned=cleaned)
413
+
414
+ @classmethod
415
+ def from_data(
416
+ cls: Type[SE],
417
+ dataset: Dataset,
418
+ data: Dict[str, Any],
419
+ cleaned: bool = True,
420
+ ) -> SE:
421
+ return cls(dataset, data, cleaned=cleaned)
422
+
423
+ @classmethod
424
+ def from_statements(
425
+ cls: Type[SE],
426
+ dataset: Dataset,
427
+ statements: Iterable[Statement],
428
+ ) -> SE:
429
+ obj: Optional[SE] = None
430
+ for stmt in statements:
431
+ if obj is None:
432
+ data = {"schema": stmt.schema, "id": stmt.canonical_id}
433
+ obj = cls(dataset, data)
434
+ obj.add_statement(stmt)
435
+ if obj is None:
436
+ raise ValueError("No statements given!")
437
+ return obj