followthemoney 3.8.5__py3-none-any.whl → 4.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. followthemoney/__init__.py +30 -10
  2. followthemoney/cli/cli.py +1 -1
  3. followthemoney/cli/exports.py +6 -2
  4. followthemoney/cli/statement.py +62 -0
  5. followthemoney/cli/util.py +2 -3
  6. followthemoney/compare.py +26 -16
  7. followthemoney/dataset/__init__.py +17 -0
  8. followthemoney/dataset/catalog.py +77 -0
  9. followthemoney/dataset/coverage.py +29 -0
  10. followthemoney/dataset/dataset.py +137 -0
  11. followthemoney/dataset/publisher.py +25 -0
  12. followthemoney/dataset/resource.py +30 -0
  13. followthemoney/dataset/util.py +58 -0
  14. followthemoney/entity.py +73 -0
  15. followthemoney/exc.py +6 -0
  16. followthemoney/export/rdf.py +57 -5
  17. followthemoney/graph.py +1 -2
  18. followthemoney/model.py +36 -9
  19. followthemoney/ontology.py +18 -16
  20. followthemoney/property.py +12 -15
  21. followthemoney/proxy.py +43 -64
  22. followthemoney/schema/Analyzable.yaml +2 -3
  23. followthemoney/schema/BankAccount.yaml +2 -3
  24. followthemoney/schema/Company.yaml +0 -6
  25. followthemoney/schema/Contract.yaml +0 -1
  26. followthemoney/schema/CryptoWallet.yaml +1 -1
  27. followthemoney/schema/Document.yaml +0 -6
  28. followthemoney/schema/Interval.yaml +7 -0
  29. followthemoney/schema/LegalEntity.yaml +6 -0
  30. followthemoney/schema/License.yaml +2 -0
  31. followthemoney/schema/Page.yaml +0 -1
  32. followthemoney/schema/Person.yaml +0 -5
  33. followthemoney/schema/Sanction.yaml +1 -0
  34. followthemoney/schema/Thing.yaml +0 -2
  35. followthemoney/schema/UserAccount.yaml +6 -3
  36. followthemoney/schema.py +27 -39
  37. followthemoney/statement/__init__.py +19 -0
  38. followthemoney/statement/entity.py +437 -0
  39. followthemoney/statement/serialize.py +245 -0
  40. followthemoney/statement/statement.py +256 -0
  41. followthemoney/statement/util.py +31 -0
  42. followthemoney/types/__init__.py +66 -23
  43. followthemoney/types/address.py +3 -3
  44. followthemoney/types/checksum.py +3 -7
  45. followthemoney/types/common.py +9 -14
  46. followthemoney/types/country.py +3 -7
  47. followthemoney/types/date.py +21 -11
  48. followthemoney/types/email.py +0 -4
  49. followthemoney/types/entity.py +5 -11
  50. followthemoney/types/gender.py +6 -10
  51. followthemoney/types/identifier.py +9 -3
  52. followthemoney/types/ip.py +5 -9
  53. followthemoney/types/json.py +2 -2
  54. followthemoney/types/language.py +3 -7
  55. followthemoney/types/mimetype.py +4 -8
  56. followthemoney/types/name.py +7 -8
  57. followthemoney/types/number.py +88 -6
  58. followthemoney/types/phone.py +4 -11
  59. followthemoney/types/string.py +4 -4
  60. followthemoney/types/topic.py +3 -7
  61. followthemoney/types/url.py +5 -10
  62. followthemoney/util.py +12 -13
  63. followthemoney/value.py +67 -0
  64. {followthemoney-3.8.5.dist-info → followthemoney-4.0.0.dist-info}/METADATA +23 -8
  65. {followthemoney-3.8.5.dist-info → followthemoney-4.0.0.dist-info}/RECORD +68 -59
  66. {followthemoney-3.8.5.dist-info → followthemoney-4.0.0.dist-info}/entry_points.txt +1 -0
  67. followthemoney/offshore.py +0 -48
  68. followthemoney/rdf.py +0 -9
  69. followthemoney/schema/Assessment.yaml +0 -32
  70. followthemoney/schema/Post.yaml +0 -42
  71. followthemoney/types/iban.py +0 -58
  72. followthemoney/types/registry.py +0 -52
  73. {followthemoney-3.8.5.dist-info → followthemoney-4.0.0.dist-info}/WHEEL +0 -0
  74. {followthemoney-3.8.5.dist-info → followthemoney-4.0.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,73 @@
1
+ from typing import Any, Dict, List, Optional, Set, TypeVar
2
+
3
+ from rigour.names import pick_name
4
+
5
+ from followthemoney.proxy import EntityProxy
6
+ from followthemoney.schema import Schema
7
+ from followthemoney.statement.util import BASE_ID
8
+
9
+ VE = TypeVar("VE", bound="ValueEntity")
10
+
11
+
12
+ def _defined(*args: Optional[str]) -> List[str]:
13
+ return [arg for arg in args if arg is not None]
14
+
15
+
16
+ class ValueEntity(EntityProxy):
17
+ """
18
+ This class has the extended attributes from `StatementEntity` but without
19
+ statements. Useful for streaming around. Starting from followthemoeny 4.0,
20
+ applications should use this entity class as the base class.
21
+ """
22
+
23
+ def __init__(
24
+ self,
25
+ schema: Schema,
26
+ data: Dict[str, Any],
27
+ key_prefix: Optional[str] = None,
28
+ cleaned: bool = True,
29
+ ):
30
+ super().__init__(schema, data, key_prefix=key_prefix, cleaned=cleaned)
31
+ self._caption: Optional[str] = data.get("caption")
32
+ self.datasets: Set[str] = set(data.get("datasets", []))
33
+ self.referents: Set[str] = set(data.get("referents", []))
34
+ self.first_seen: Optional[str] = data.get("first_seen")
35
+ self.last_seen: Optional[str] = data.get("last_seen")
36
+ self.last_change: Optional[str] = data.get("last_change")
37
+
38
+ # add data from statement dict if present.
39
+ # this updates the dataset and referents set
40
+ for stmt_data in data.pop("statements", []):
41
+ self.datasets.add(stmt_data["dataset"])
42
+ if stmt_data["entity_id"] != self.id:
43
+ self.referents.add(stmt_data["entity_id"])
44
+ if stmt_data["prop"] != BASE_ID:
45
+ self.add(stmt_data["prop"], stmt_data["value"])
46
+
47
+ def merge(self: "ValueEntity", other: "ValueEntity") -> "ValueEntity":
48
+ merged = super().merge(other)
49
+ merged._caption = pick_name(_defined(self._caption, other._caption))
50
+ merged.referents.update(other.referents)
51
+ merged.datasets.update(other.datasets)
52
+ self.first_seen = min(_defined(self.first_seen, other.first_seen), default=None)
53
+ self.last_seen = max(_defined(self.last_seen, other.last_seen), default=None)
54
+ changed = _defined(self.last_change, other.last_change)
55
+ self.last_change = max(changed, default=None)
56
+ return merged
57
+
58
+ def to_dict(self) -> Dict[str, Any]:
59
+ data: Dict[str, Any] = {
60
+ "id": self.id,
61
+ "caption": self._caption or self.caption,
62
+ "schema": self.schema.name,
63
+ "properties": self.properties,
64
+ "referents": list(self.referents),
65
+ "datasets": list(self.datasets),
66
+ }
67
+ if self.first_seen is not None:
68
+ data["first_seen"] = self.first_seen
69
+ if self.last_seen is not None:
70
+ data["last_seen"] = self.last_seen
71
+ if self.last_change is not None:
72
+ data["last_change"] = self.last_change
73
+ return data
followthemoney/exc.py CHANGED
@@ -11,6 +11,12 @@ class FollowTheMoneyException(Exception):
11
11
  pass
12
12
 
13
13
 
14
+ class MetadataException(FollowTheMoneyException):
15
+ """An exception raised by dataset metadata validation."""
16
+
17
+ pass
18
+
19
+
14
20
  class InvalidData(FollowTheMoneyException):
15
21
  """Schema validation errors will be caught by the API."""
16
22
 
@@ -1,23 +1,75 @@
1
1
  import logging
2
- from rdflib import Graph
3
- from typing import List, Optional, TextIO
2
+ from prefixdate import Precision
3
+ from rdflib import Graph, Namespace
4
+ from rdflib.term import Identifier, URIRef, Literal
5
+ from rdflib import RDF, SKOS, XSD
6
+ from typing import Generator, List, Optional, TextIO, Tuple
4
7
 
5
8
  from followthemoney.export.common import Exporter
6
- from followthemoney.proxy import E
9
+ from followthemoney.types import registry
10
+ from followthemoney.proxy import EntityProxy
7
11
 
8
12
  log = logging.getLogger(__name__)
13
+ Triple = Tuple[Identifier, Identifier, Identifier]
14
+ NS = Namespace("https://schema.followthemoney.tech/#")
9
15
 
10
16
 
11
17
  class RDFExporter(Exporter):
18
+ """Export the entity as RDF N-Triples."""
19
+
20
+ TYPE_PREFIXES = {
21
+ registry.checksum: "hash:",
22
+ registry.country: "http://id.loc.gov/vocabulary/countries/",
23
+ registry.email: "mailto:",
24
+ registry.entity: "e:",
25
+ registry.gender: "gender:",
26
+ registry.ip: "ip:",
27
+ registry.identifier: "id:",
28
+ registry.language: "http://lexvo.org/id/iso639-3/",
29
+ registry.mimetype: "urn:mimetype:",
30
+ registry.phone: "tel:",
31
+ registry.topic: "ftm:topic:",
32
+ }
33
+
12
34
  def __init__(self, fh: TextIO, qualified: bool = True) -> None:
13
35
  super(RDFExporter, self).__init__()
14
36
  self.fh = fh
15
37
  self.qualified = qualified
16
38
 
17
- def write(self, proxy: E, extra: Optional[List[str]] = None) -> None:
39
+ def entity_triples(self, proxy: EntityProxy) -> Generator[Triple, None, None]:
40
+ if proxy.id is None or proxy.schema is None:
41
+ return
42
+ entity_prefix = self.TYPE_PREFIXES[registry.entity]
43
+ uri = URIRef(f"{entity_prefix}{proxy.id}")
44
+ yield (uri, RDF.type, NS[proxy.schema.name])
45
+ if self.qualified:
46
+ caption = proxy.caption
47
+ if caption != proxy.schema.label:
48
+ yield (uri, SKOS.prefLabel, Literal(caption))
49
+ for prop, value in proxy.itervalues():
50
+ if prop.type in self.TYPE_PREFIXES:
51
+ prefix = self.TYPE_PREFIXES[prop.type]
52
+ if prop.type == registry.identifier and prop.format is not None:
53
+ prefix = f"{prefix}{prop.format}:"
54
+ obj: Identifier = URIRef(f"{prefix}{value}")
55
+ elif prop.type == registry.date:
56
+ if len(value) < Precision.HOUR.value:
57
+ obj = Literal(value, datatype=XSD.date)
58
+ else:
59
+ obj = Literal(value, datatype=XSD.dateTime)
60
+ elif prop.type == registry.url:
61
+ obj = URIRef(value)
62
+ else:
63
+ obj = Literal(value)
64
+ if self.qualified:
65
+ yield (uri, NS[prop.qname], obj)
66
+ else:
67
+ yield (uri, URIRef(prop.name), obj)
68
+
69
+ def write(self, proxy: EntityProxy, extra: Optional[List[str]] = None) -> None:
18
70
  graph = Graph()
19
71
 
20
- for triple in proxy.triples(qualified=self.qualified):
72
+ for triple in self.entity_triples(proxy):
21
73
  graph.add(triple)
22
74
  try:
23
75
  nt = graph.serialize(format="nt11").strip()
followthemoney/graph.py CHANGED
@@ -196,8 +196,7 @@ class Graph(object):
196
196
  """
197
197
 
198
198
  def __init__(self, edge_types: Iterable[PropertyType] = registry.pivots) -> None:
199
- types = registry.get_types(edge_types)
200
- self.edge_types = [t for t in types if t.matchable]
199
+ self.edge_types = [t for t in edge_types if t.matchable]
201
200
  self.flush()
202
201
 
203
202
  def flush(self) -> None:
followthemoney/model.py CHANGED
@@ -1,16 +1,19 @@
1
1
  import os
2
2
  import yaml
3
3
  from functools import lru_cache
4
- from typing import Any, Dict, Generator, Iterator, Optional, Set, TypedDict, Union
4
+ from typing import TYPE_CHECKING, Any
5
+ from typing import Dict, Generator, Iterator, Optional, Set, TypedDict, Union
5
6
 
6
7
  from followthemoney.types import registry
7
8
  from followthemoney.types.common import PropertyType, PropertyTypeToDict
8
9
  from followthemoney.schema import Schema, SchemaToDict
9
10
  from followthemoney.property import Property
10
- from followthemoney.mapping import QueryMapping
11
- from followthemoney.proxy import EntityProxy
12
11
  from followthemoney.exc import InvalidModel, InvalidData
13
12
 
13
+ if TYPE_CHECKING:
14
+ from followthemoney.proxy import EntityProxy
15
+ from followthemoney.mapping import QueryMapping
16
+
14
17
 
15
18
  class ModelToDict(TypedDict):
16
19
  schemata: Dict[str, SchemaToDict]
@@ -22,6 +25,8 @@ class Model(object):
22
25
  provides some helper functions to find schemata, properties or to instantiate
23
26
  entity proxies based on the schema metadata."""
24
27
 
28
+ _instance: Optional["Model"] = None
29
+
25
30
  __slots__ = ("path", "schemata", "properties", "qnames")
26
31
 
27
32
  def __init__(self, path: str) -> None:
@@ -38,6 +43,15 @@ class Model(object):
38
43
  self._load(os.path.join(path, filename))
39
44
  self.generate()
40
45
 
46
+ @classmethod
47
+ def instance(cls) -> "Model":
48
+ if cls._instance is None:
49
+ model_path = os.path.dirname(__file__)
50
+ model_path = os.path.join(model_path, "schema")
51
+ model_path = os.environ.get("FTM_MODEL_PATH", model_path)
52
+ cls._instance = cls(model_path)
53
+ return cls._instance
54
+
41
55
  def generate(self) -> None:
42
56
  """Loading the model is a weird process because the schemata reference
43
57
  each other in complex ways, so the generation process cannot be fully
@@ -89,13 +103,15 @@ class Model(object):
89
103
 
90
104
  def make_mapping(
91
105
  self, mapping: Dict[str, Any], key_prefix: Optional[str] = None
92
- ) -> QueryMapping:
106
+ ) -> "QueryMapping":
93
107
  """Parse a mapping that applies (tabular) source data to the model."""
108
+ from followthemoney.mapping import QueryMapping
109
+
94
110
  return QueryMapping(self, mapping, key_prefix=key_prefix)
95
111
 
96
112
  def map_entities(
97
113
  self, mapping: Dict[str, Any], key_prefix: Optional[str] = None
98
- ) -> Generator[EntityProxy, None, None]:
114
+ ) -> Generator["EntityProxy", None, None]:
99
115
  """Given a mapping, yield a series of entities from the data source."""
100
116
  gen = self.make_mapping(mapping, key_prefix=key_prefix)
101
117
  for record in gen.source.records:
@@ -127,20 +143,31 @@ class Model(object):
127
143
  msg = "No common schema: %s and %s"
128
144
  raise InvalidData(msg % (left, right))
129
145
 
146
+ def matchable_schemata(self) -> Set[Schema]:
147
+ """Return a list of all schemata that are matchable."""
148
+ return set([s for s in self.schemata.values() if s.matchable])
149
+
130
150
  def make_entity(
131
151
  self, schema: Union[str, Schema], key_prefix: Optional[str] = None
132
- ) -> EntityProxy:
152
+ ) -> "EntityProxy":
133
153
  """Instantiate an empty entity proxy of the given schema type."""
134
- return EntityProxy(self, {"schema": schema}, key_prefix=key_prefix)
154
+ from followthemoney.proxy import EntityProxy
155
+
156
+ schema_ = self.get(schema)
157
+ if schema_ is None:
158
+ raise InvalidData("Schema does not exist: %s" % schema)
159
+ return EntityProxy(schema_, {}, key_prefix=key_prefix)
135
160
 
136
- def get_proxy(self, data: Dict[str, Any], cleaned: bool = True) -> EntityProxy:
161
+ def get_proxy(self, data: Dict[str, Any], cleaned: bool = True) -> "EntityProxy":
137
162
  """Create an entity proxy to reflect the entity data in the given
138
163
  dictionary. If ``cleaned`` is disabled, all property values are
139
164
  fully re-validated and normalised. Use this if handling input data
140
165
  from an untrusted source."""
166
+ from followthemoney.proxy import EntityProxy
167
+
141
168
  if isinstance(data, EntityProxy):
142
169
  return data
143
- return EntityProxy.from_dict(self, data, cleaned=cleaned)
170
+ return EntityProxy.from_dict(data, cleaned=cleaned)
144
171
 
145
172
  def to_dict(self) -> ModelToDict:
146
173
  """Return metadata for all schemata and properties, in a serializable form."""
@@ -1,15 +1,16 @@
1
1
  import sys
2
2
  from datetime import datetime
3
- from rdflib import Graph, URIRef, Literal
3
+ from rdflib import Graph, URIRef, Literal, Namespace
4
4
  from rdflib.namespace import OWL, DCTERMS, RDF, RDFS, XSD
5
5
 
6
6
  from followthemoney import model
7
7
  from followthemoney.property import Property
8
8
  from followthemoney.schema import Schema
9
9
  from followthemoney.types import registry
10
- from followthemoney.rdf import NS
11
10
  from followthemoney.util import PathLike
12
11
 
12
+ NS = Namespace("https://schema.followthemoney.tech/#")
13
+
13
14
 
14
15
  class Ontology(object):
15
16
  def __init__(self) -> None:
@@ -32,37 +33,38 @@ class Ontology(object):
32
33
  self.add_class(schema)
33
34
 
34
35
  def add_class(self, schema: Schema) -> None:
35
- self.graph.add((schema.uri, RDF.type, RDFS.Class))
36
- self.graph.add((schema.uri, RDFS.isDefinedBy, self.uri))
36
+ suri = NS[schema.name]
37
+ self.graph.add((suri, RDF.type, RDFS.Class))
38
+ self.graph.add((suri, RDFS.isDefinedBy, self.uri))
37
39
  for parent in schema.extends:
38
- self.graph.add((schema.uri, RDFS.subClassOf, parent.uri))
40
+ self.graph.add((suri, RDFS.subClassOf, NS[parent.name]))
39
41
 
40
- self.graph.add((schema.uri, RDFS.label, Literal(schema.label)))
42
+ self.graph.add((suri, RDFS.label, Literal(schema.label)))
41
43
  if schema.description is not None:
42
44
  description = Literal(schema.description)
43
- self.graph.add((schema.uri, RDFS.comment, description))
45
+ self.graph.add((suri, RDFS.comment, description))
44
46
 
45
47
  for _, prop in sorted(schema.properties.items()):
46
48
  self.add_property(prop)
47
49
 
48
50
  def add_property(self, prop: Property) -> None:
49
- self.graph.add((prop.uri, RDF.type, RDF.Property))
50
- self.graph.add((prop.uri, RDFS.isDefinedBy, self.uri))
51
+ puri = NS[prop.qname]
52
+ self.graph.add((puri, RDF.type, RDF.Property))
53
+ self.graph.add((puri, RDFS.isDefinedBy, self.uri))
51
54
 
52
- self.graph.add((prop.uri, RDFS.label, Literal(prop.label)))
55
+ self.graph.add((puri, RDFS.label, Literal(prop.label)))
53
56
  if prop.description is not None:
54
- self.graph.add((prop.uri, RDFS.comment, Literal(prop.description)))
57
+ self.graph.add((puri, RDFS.comment, Literal(prop.description)))
55
58
 
56
- self.graph.add((prop.uri, RDFS.domain, prop.schema.uri))
59
+ self.graph.add((puri, RDFS.domain, NS[prop.schema.name]))
57
60
  if prop.range is not None:
58
61
  range = model.get(prop.range)
59
62
  if range is not None:
60
- range_uri = range.uri
61
- self.graph.add((prop.uri, RDFS.range, range_uri))
63
+ self.graph.add((puri, RDFS.range, NS[range.name]))
62
64
  if prop.reverse is not None:
63
- self.graph.add((prop.uri, OWL.inverseOf, prop.reverse.uri))
65
+ self.graph.add((puri, OWL.inverseOf, NS[prop.reverse.qname]))
64
66
  if prop.type == registry.date:
65
- self.graph.add((prop.uri, RDFS.range, XSD.dateTime))
67
+ self.graph.add((puri, RDFS.range, XSD.dateTime))
66
68
 
67
69
  def write_namespace_docs(self, path: PathLike) -> None:
68
70
  xml_fn = "%s/ftm.xml" % path
@@ -1,10 +1,9 @@
1
1
  from banal import is_mapping, as_bool
2
- from typing import TYPE_CHECKING, cast, Any, List, Optional, TypedDict
2
+ from typing import TYPE_CHECKING, Any, List, Optional, TypedDict
3
3
 
4
4
  from followthemoney.exc import InvalidModel
5
5
  from followthemoney.types import registry
6
- from followthemoney.rdf import NS, URIRef
7
- from followthemoney.util import gettext, get_entity_id
6
+ from followthemoney.util import gettext, get_entity_id, const
8
7
 
9
8
  if TYPE_CHECKING:
10
9
  from followthemoney.schema import Schema
@@ -26,7 +25,6 @@ class PropertyDict(TypedDict, total=False):
26
25
  deprecated: Optional[bool]
27
26
  maxLength: Optional[int]
28
27
  # stub: Optional[bool]
29
- rdf: Optional[str]
30
28
  range: Optional[str]
31
29
  format: Optional[str]
32
30
 
@@ -66,7 +64,6 @@ class Property:
66
64
  "stub",
67
65
  "_reverse",
68
66
  "reverse",
69
- "uri",
70
67
  )
71
68
 
72
69
  #: Invalid property names.
@@ -79,10 +76,10 @@ class Property:
79
76
  self.schema = schema
80
77
 
81
78
  #: Machine-readable name for this property.
82
- self.name = name
79
+ self.name = const(name)
83
80
 
84
81
  #: Qualified property name, which also includes the schema name.
85
- self.qname = "%s:%s" % (schema.name, self.name)
82
+ self.qname = const("%s:%s" % (schema.name, self.name))
86
83
  if self.name in self.RESERVED:
87
84
  raise InvalidModel("Reserved name: %s" % self.name)
88
85
 
@@ -97,12 +94,11 @@ class Property:
97
94
  #: This property should not be shown or mentioned in the user interface.
98
95
  self.hidden = as_bool(data.get("hidden"))
99
96
 
100
- type_ = data.get("type", "string")
101
- if type_ is None or type_ not in registry.named:
102
- raise InvalidModel("Invalid type: %s" % type_)
103
-
97
+ type_ = data.get("type") or "string"
104
98
  #: The data type for this property.
105
- self.type = registry[type_]
99
+ self.type = registry.get(type_)
100
+ if self.type is None:
101
+ raise InvalidModel("Invalid type: %s" % type_)
106
102
 
107
103
  #: Whether this property should be used for matching and cross-referencing.
108
104
  _matchable = data.get("matchable")
@@ -137,9 +133,6 @@ class Property:
137
133
  self._reverse = data.get("reverse")
138
134
  self.reverse: Optional["Property"] = None
139
135
 
140
- #: RDF term for this property (i.e. the predicate URI).
141
- self.uri = URIRef(cast(str, data.get("rdf", NS[self.qname])))
142
-
143
136
  def generate(self, model: "Model") -> None:
144
137
  """Setup method used when loading the model in order to build out the reverse
145
138
  links of the property."""
@@ -170,6 +163,10 @@ class Property:
170
163
  return 0.0
171
164
  return self.type.specificity(value)
172
165
 
166
+ def caption(self, value: str) -> str:
167
+ """Return a user-friendly caption for the given value."""
168
+ return self.type.caption(value, format=self.format)
169
+
173
170
  def validate(self, data: List[Any]) -> Optional[str]:
174
171
  """Validate that the data should be stored.
175
172
 
followthemoney/proxy.py CHANGED
@@ -1,36 +1,25 @@
1
1
  import logging
2
- from typing import (
3
- TYPE_CHECKING,
4
- Any,
5
- Dict,
6
- Generator,
7
- List,
8
- Optional,
9
- Set,
10
- Tuple,
11
- Union,
12
- Type,
13
- TypeVar,
14
- cast,
15
- )
16
- import warnings
2
+ from typing import TYPE_CHECKING, cast, Any
3
+ from typing import Dict, Generator, List, Optional, Set, Tuple, Union, Type, TypeVar
17
4
  from itertools import product
18
5
  from banal import ensure_dict
6
+ from rigour.names import pick_name
19
7
 
20
8
  from followthemoney.exc import InvalidData
21
9
  from followthemoney.types import registry
22
10
  from followthemoney.types.common import PropertyType
23
11
  from followthemoney.property import Property
24
- from followthemoney.rdf import SKOS, RDF, Literal, URIRef, Identifier
12
+ from followthemoney.value import string_list, Values
25
13
  from followthemoney.util import sanitize_text, gettext
26
- from followthemoney.util import merge_context, value_list, make_entity_id
14
+ from followthemoney.util import merge_context, make_entity_id
15
+ from followthemoney.model import Model
16
+ from followthemoney.schema import Schema
27
17
 
28
18
  if TYPE_CHECKING:
29
19
  from followthemoney.model import Model
30
20
 
31
21
  log = logging.getLogger(__name__)
32
22
  P = Union[Property, str]
33
- Triple = Tuple[Identifier, Identifier, Identifier]
34
23
  E = TypeVar("E", bound="EntityProxy")
35
24
 
36
25
 
@@ -45,7 +34,7 @@ class EntityProxy(object):
45
34
 
46
35
  def __init__(
47
36
  self,
48
- model: "Model",
37
+ schema: Schema,
49
38
  data: Dict[str, Any],
50
39
  key_prefix: Optional[str] = None,
51
40
  cleaned: bool = True,
@@ -57,9 +46,6 @@ class EntityProxy(object):
57
46
 
58
47
  #: The schema definition for this entity, which implies the properties
59
48
  #: That can be set on it.
60
- schema = model.get(data.pop("schema", None))
61
- if schema is None:
62
- raise InvalidData(gettext("No schema for entity."))
63
49
  self.schema = schema
64
50
 
65
51
  #: When using :meth:`~make_id` to generate a natural key for this entity,
@@ -162,7 +148,7 @@ class EntityProxy(object):
162
148
  def add(
163
149
  self,
164
150
  prop: P,
165
- values: Any,
151
+ values: Values,
166
152
  cleaned: bool = False,
167
153
  quiet: bool = False,
168
154
  fuzzy: bool = False,
@@ -192,11 +178,9 @@ class EntityProxy(object):
192
178
  msg = gettext("Stub property (%s): %s")
193
179
  raise InvalidData(msg % (self.schema, prop))
194
180
 
195
- for value in value_list(values):
196
- if not cleaned:
197
- format = format or prop.format
198
- value = prop.type.clean(value, proxy=self, fuzzy=fuzzy, format=format)
199
- self.unsafe_add(prop, value, cleaned=True)
181
+ value: Optional[str] = None
182
+ for value in string_list(values, sanitize=not cleaned):
183
+ self.unsafe_add(prop, value, cleaned=cleaned, fuzzy=fuzzy, format=format)
200
184
  return None
201
185
 
202
186
  def unsafe_add(
@@ -236,7 +220,7 @@ class EntityProxy(object):
236
220
  def set(
237
221
  self,
238
222
  prop: P,
239
- values: Any,
223
+ values: Values,
240
224
  cleaned: bool = False,
241
225
  quiet: bool = False,
242
226
  fuzzy: bool = False,
@@ -377,34 +361,21 @@ class EntityProxy(object):
377
361
  data[group] = values
378
362
  return data
379
363
 
380
- def triples(self, qualified: bool = True) -> Generator[Triple, None, None]:
381
- """Serialise the entity into a set of RDF triple statements. The
382
- statements include the property values, an ``RDF#type`` definition
383
- that refers to the entity schema, and a ``SKOS#prefLabel`` with the
384
- entity caption."""
385
- if self.id is None or self.schema is None:
386
- return
387
- uri = registry.entity.rdf(self.id)
388
- yield (uri, RDF.type, self.schema.uri)
389
- if qualified:
390
- caption = self.caption
391
- if caption != self.schema.label:
392
- yield (uri, SKOS.prefLabel, Literal(caption))
393
- for prop, value in self.itervalues():
394
- value = prop.type.rdf(value)
395
- if qualified:
396
- yield (uri, prop.uri, value)
397
- else:
398
- yield (uri, URIRef(prop.name), value)
399
-
400
364
  @property
401
365
  def caption(self) -> str:
402
366
  """The user-facing label to be used for this entity. This checks a list
403
367
  of properties defined by the schema (caption) and returns the first
404
368
  available value. If no caption is available, return the schema label."""
405
- for prop in self.schema.caption:
406
- for value in self.get(prop):
407
- return value
369
+ for prop_ in self.schema.caption:
370
+ prop = self.schema.properties[prop_]
371
+ values = self.get(prop)
372
+ if prop.type == registry.name and len(values) > 1:
373
+ name = pick_name(sorted(values))
374
+ if name is not None:
375
+ return name
376
+ else:
377
+ for value in values:
378
+ return value
408
379
  return self.schema.label
409
380
 
410
381
  @property
@@ -448,7 +419,7 @@ class EntityProxy(object):
448
419
 
449
420
  def clone(self: E) -> E:
450
421
  """Make a deep copy of the current entity proxy."""
451
- return self.__class__.from_dict(self.schema.model, self.to_dict())
422
+ return self.__class__.from_dict(self.to_dict())
452
423
 
453
424
  def merge(self: E, other: E) -> E:
454
425
  """Merge another entity proxy into this one. This will try and find
@@ -467,30 +438,36 @@ class EntityProxy(object):
467
438
  self.add(prop, values, cleaned=True, quiet=True)
468
439
  return self
469
440
 
441
+ def __getstate__(self) -> Dict[str, Any]:
442
+ data = {slot: getattr(self, slot) for slot in self.__slots__}
443
+ data["schema"] = self.schema.name
444
+ return data
445
+
446
+ def __setstate__(self, data: Dict[str, Any]) -> None:
447
+ for slot in self.__slots__:
448
+ value = data.get(slot)
449
+ if slot == "schema":
450
+ value = Model.instance()[data["schema"]]
451
+ setattr(self, slot, value)
452
+
470
453
  def __str__(self) -> str:
471
454
  return self.caption
472
455
 
473
456
  def __repr__(self) -> str:
474
- return "<E(%r,%r)>" % (self.id, str(self))
457
+ return "<E(%r,%s,%r)>" % (self.id, self.schema.name, str(self))
475
458
 
476
459
  def __len__(self) -> int:
477
460
  return self._size
478
461
 
479
462
  def __hash__(self) -> int:
480
463
  if not self.id:
481
- warnings.warn(
482
- "Hashing an EntityProxy without an ID results in undefined behaviour",
483
- RuntimeWarning,
484
- )
464
+ raise RuntimeError("Cannot hash entity without an ID")
485
465
  return hash(self.id)
486
466
 
487
467
  def __eq__(self, other: Any) -> bool:
488
468
  try:
489
469
  if self.id is None or other.id is None:
490
- warnings.warn(
491
- "Comparing EntityProxys without IDs results in undefined behaviour",
492
- RuntimeWarning,
493
- )
470
+ raise RuntimeError("Cannot compare entities without IDs.")
494
471
  return bool(self.id == other.id)
495
472
  except AttributeError:
496
473
  return False
@@ -498,11 +475,13 @@ class EntityProxy(object):
498
475
  @classmethod
499
476
  def from_dict(
500
477
  cls: Type[E],
501
- model: "Model",
502
478
  data: Dict[str, Any],
503
479
  cleaned: bool = True,
504
480
  ) -> E:
505
481
  """Instantiate a proxy based on the given model and serialised dictionary.
506
482
 
507
483
  Use :meth:`followthemoney.model.Model.get_proxy` instead."""
508
- return cls(model, data, cleaned=cleaned)
484
+ schema = Model.instance().get(data.get("schema", ""))
485
+ if schema is None:
486
+ raise InvalidData(gettext("No schema for entity."))
487
+ return cls(schema, data, cleaned=cleaned)
@@ -32,9 +32,8 @@ Analyzable:
32
32
  ibanMentioned:
33
33
  label: "Detected IBANs"
34
34
  hidden: true
35
- # type: identifier
36
- # format: iban
37
- type: iban
35
+ type: identifier
36
+ format: iban
38
37
  ipMentioned:
39
38
  label: "Detected IP addresses"
40
39
  hidden: true
@@ -30,9 +30,8 @@ BankAccount:
30
30
  maxLength: 64
31
31
  iban:
32
32
  label: IBAN
33
- # type: identifier
34
- # format: iban
35
- type: iban
33
+ type: identifier
34
+ format: iban
36
35
  maxLength: 64
37
36
  bic:
38
37
  label: Bank Identifier Code