followthemoney 3.8.5__py3-none-any.whl → 4.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. followthemoney/__init__.py +30 -10
  2. followthemoney/cli/cli.py +1 -1
  3. followthemoney/cli/exports.py +6 -2
  4. followthemoney/cli/statement.py +62 -0
  5. followthemoney/cli/util.py +2 -3
  6. followthemoney/compare.py +26 -16
  7. followthemoney/dataset/__init__.py +17 -0
  8. followthemoney/dataset/catalog.py +77 -0
  9. followthemoney/dataset/coverage.py +29 -0
  10. followthemoney/dataset/dataset.py +146 -0
  11. followthemoney/dataset/publisher.py +25 -0
  12. followthemoney/dataset/resource.py +30 -0
  13. followthemoney/dataset/util.py +55 -0
  14. followthemoney/entity.py +73 -0
  15. followthemoney/exc.py +6 -0
  16. followthemoney/export/rdf.py +57 -5
  17. followthemoney/graph.py +1 -2
  18. followthemoney/model.py +38 -11
  19. followthemoney/names.py +33 -0
  20. followthemoney/ontology.py +18 -16
  21. followthemoney/property.py +12 -15
  22. followthemoney/proxy.py +43 -64
  23. followthemoney/schema/Analyzable.yaml +2 -3
  24. followthemoney/schema/BankAccount.yaml +2 -3
  25. followthemoney/schema/Company.yaml +0 -6
  26. followthemoney/schema/Contract.yaml +0 -1
  27. followthemoney/schema/CryptoWallet.yaml +1 -1
  28. followthemoney/schema/Document.yaml +0 -6
  29. followthemoney/schema/Interval.yaml +7 -0
  30. followthemoney/schema/LegalEntity.yaml +6 -0
  31. followthemoney/schema/License.yaml +2 -0
  32. followthemoney/schema/Page.yaml +0 -1
  33. followthemoney/schema/Person.yaml +0 -5
  34. followthemoney/schema/Sanction.yaml +1 -0
  35. followthemoney/schema/Thing.yaml +0 -2
  36. followthemoney/schema/UserAccount.yaml +6 -3
  37. followthemoney/schema.py +30 -42
  38. followthemoney/statement/__init__.py +19 -0
  39. followthemoney/statement/entity.py +438 -0
  40. followthemoney/statement/serialize.py +251 -0
  41. followthemoney/statement/statement.py +256 -0
  42. followthemoney/statement/util.py +31 -0
  43. followthemoney/types/__init__.py +66 -23
  44. followthemoney/types/address.py +3 -3
  45. followthemoney/types/checksum.py +3 -7
  46. followthemoney/types/common.py +9 -14
  47. followthemoney/types/country.py +3 -7
  48. followthemoney/types/date.py +21 -11
  49. followthemoney/types/email.py +0 -4
  50. followthemoney/types/entity.py +5 -11
  51. followthemoney/types/gender.py +6 -10
  52. followthemoney/types/identifier.py +9 -3
  53. followthemoney/types/ip.py +5 -9
  54. followthemoney/types/json.py +2 -2
  55. followthemoney/types/language.py +3 -7
  56. followthemoney/types/mimetype.py +4 -8
  57. followthemoney/types/name.py +7 -8
  58. followthemoney/types/number.py +88 -6
  59. followthemoney/types/phone.py +4 -11
  60. followthemoney/types/string.py +4 -4
  61. followthemoney/types/topic.py +3 -7
  62. followthemoney/types/url.py +5 -10
  63. followthemoney/util.py +12 -13
  64. followthemoney/value.py +67 -0
  65. {followthemoney-3.8.5.dist-info → followthemoney-4.0.1.dist-info}/METADATA +23 -8
  66. {followthemoney-3.8.5.dist-info → followthemoney-4.0.1.dist-info}/RECORD +69 -59
  67. {followthemoney-3.8.5.dist-info → followthemoney-4.0.1.dist-info}/entry_points.txt +1 -0
  68. followthemoney/offshore.py +0 -48
  69. followthemoney/rdf.py +0 -9
  70. followthemoney/schema/Assessment.yaml +0 -32
  71. followthemoney/schema/Post.yaml +0 -42
  72. followthemoney/types/iban.py +0 -58
  73. followthemoney/types/registry.py +0 -52
  74. {followthemoney-3.8.5.dist-info → followthemoney-4.0.1.dist-info}/WHEEL +0 -0
  75. {followthemoney-3.8.5.dist-info → followthemoney-4.0.1.dist-info}/licenses/LICENSE +0 -0
followthemoney/proxy.py CHANGED
@@ -1,36 +1,25 @@
1
1
  import logging
2
- from typing import (
3
- TYPE_CHECKING,
4
- Any,
5
- Dict,
6
- Generator,
7
- List,
8
- Optional,
9
- Set,
10
- Tuple,
11
- Union,
12
- Type,
13
- TypeVar,
14
- cast,
15
- )
16
- import warnings
2
+ from typing import TYPE_CHECKING, cast, Any
3
+ from typing import Dict, Generator, List, Optional, Set, Tuple, Union, Type, TypeVar
17
4
  from itertools import product
18
5
  from banal import ensure_dict
6
+ from rigour.names import pick_name
19
7
 
20
8
  from followthemoney.exc import InvalidData
21
9
  from followthemoney.types import registry
22
10
  from followthemoney.types.common import PropertyType
23
11
  from followthemoney.property import Property
24
- from followthemoney.rdf import SKOS, RDF, Literal, URIRef, Identifier
12
+ from followthemoney.value import string_list, Values
25
13
  from followthemoney.util import sanitize_text, gettext
26
- from followthemoney.util import merge_context, value_list, make_entity_id
14
+ from followthemoney.util import merge_context, make_entity_id
15
+ from followthemoney.model import Model
16
+ from followthemoney.schema import Schema
27
17
 
28
18
  if TYPE_CHECKING:
29
19
  from followthemoney.model import Model
30
20
 
31
21
  log = logging.getLogger(__name__)
32
22
  P = Union[Property, str]
33
- Triple = Tuple[Identifier, Identifier, Identifier]
34
23
  E = TypeVar("E", bound="EntityProxy")
35
24
 
36
25
 
@@ -45,7 +34,7 @@ class EntityProxy(object):
45
34
 
46
35
  def __init__(
47
36
  self,
48
- model: "Model",
37
+ schema: Schema,
49
38
  data: Dict[str, Any],
50
39
  key_prefix: Optional[str] = None,
51
40
  cleaned: bool = True,
@@ -57,9 +46,6 @@ class EntityProxy(object):
57
46
 
58
47
  #: The schema definition for this entity, which implies the properties
59
48
  #: That can be set on it.
60
- schema = model.get(data.pop("schema", None))
61
- if schema is None:
62
- raise InvalidData(gettext("No schema for entity."))
63
49
  self.schema = schema
64
50
 
65
51
  #: When using :meth:`~make_id` to generate a natural key for this entity,
@@ -162,7 +148,7 @@ class EntityProxy(object):
162
148
  def add(
163
149
  self,
164
150
  prop: P,
165
- values: Any,
151
+ values: Values,
166
152
  cleaned: bool = False,
167
153
  quiet: bool = False,
168
154
  fuzzy: bool = False,
@@ -192,11 +178,9 @@ class EntityProxy(object):
192
178
  msg = gettext("Stub property (%s): %s")
193
179
  raise InvalidData(msg % (self.schema, prop))
194
180
 
195
- for value in value_list(values):
196
- if not cleaned:
197
- format = format or prop.format
198
- value = prop.type.clean(value, proxy=self, fuzzy=fuzzy, format=format)
199
- self.unsafe_add(prop, value, cleaned=True)
181
+ value: Optional[str] = None
182
+ for value in string_list(values, sanitize=not cleaned):
183
+ self.unsafe_add(prop, value, cleaned=cleaned, fuzzy=fuzzy, format=format)
200
184
  return None
201
185
 
202
186
  def unsafe_add(
@@ -236,7 +220,7 @@ class EntityProxy(object):
236
220
  def set(
237
221
  self,
238
222
  prop: P,
239
- values: Any,
223
+ values: Values,
240
224
  cleaned: bool = False,
241
225
  quiet: bool = False,
242
226
  fuzzy: bool = False,
@@ -377,34 +361,21 @@ class EntityProxy(object):
377
361
  data[group] = values
378
362
  return data
379
363
 
380
- def triples(self, qualified: bool = True) -> Generator[Triple, None, None]:
381
- """Serialise the entity into a set of RDF triple statements. The
382
- statements include the property values, an ``RDF#type`` definition
383
- that refers to the entity schema, and a ``SKOS#prefLabel`` with the
384
- entity caption."""
385
- if self.id is None or self.schema is None:
386
- return
387
- uri = registry.entity.rdf(self.id)
388
- yield (uri, RDF.type, self.schema.uri)
389
- if qualified:
390
- caption = self.caption
391
- if caption != self.schema.label:
392
- yield (uri, SKOS.prefLabel, Literal(caption))
393
- for prop, value in self.itervalues():
394
- value = prop.type.rdf(value)
395
- if qualified:
396
- yield (uri, prop.uri, value)
397
- else:
398
- yield (uri, URIRef(prop.name), value)
399
-
400
364
  @property
401
365
  def caption(self) -> str:
402
366
  """The user-facing label to be used for this entity. This checks a list
403
367
  of properties defined by the schema (caption) and returns the first
404
368
  available value. If no caption is available, return the schema label."""
405
- for prop in self.schema.caption:
406
- for value in self.get(prop):
407
- return value
369
+ for prop_ in self.schema.caption:
370
+ prop = self.schema.properties[prop_]
371
+ values = self.get(prop)
372
+ if prop.type == registry.name and len(values) > 1:
373
+ name = pick_name(sorted(values))
374
+ if name is not None:
375
+ return name
376
+ else:
377
+ for value in values:
378
+ return value
408
379
  return self.schema.label
409
380
 
410
381
  @property
@@ -448,7 +419,7 @@ class EntityProxy(object):
448
419
 
449
420
  def clone(self: E) -> E:
450
421
  """Make a deep copy of the current entity proxy."""
451
- return self.__class__.from_dict(self.schema.model, self.to_dict())
422
+ return self.__class__.from_dict(self.to_dict())
452
423
 
453
424
  def merge(self: E, other: E) -> E:
454
425
  """Merge another entity proxy into this one. This will try and find
@@ -467,30 +438,36 @@ class EntityProxy(object):
467
438
  self.add(prop, values, cleaned=True, quiet=True)
468
439
  return self
469
440
 
441
+ def __getstate__(self) -> Dict[str, Any]:
442
+ data = {slot: getattr(self, slot) for slot in self.__slots__}
443
+ data["schema"] = self.schema.name
444
+ return data
445
+
446
+ def __setstate__(self, data: Dict[str, Any]) -> None:
447
+ for slot in self.__slots__:
448
+ value = data.get(slot)
449
+ if slot == "schema":
450
+ value = Model.instance()[data["schema"]]
451
+ setattr(self, slot, value)
452
+
470
453
  def __str__(self) -> str:
471
454
  return self.caption
472
455
 
473
456
  def __repr__(self) -> str:
474
- return "<E(%r,%r)>" % (self.id, str(self))
457
+ return "<E(%r,%s,%r)>" % (self.id, self.schema.name, str(self))
475
458
 
476
459
  def __len__(self) -> int:
477
460
  return self._size
478
461
 
479
462
  def __hash__(self) -> int:
480
463
  if not self.id:
481
- warnings.warn(
482
- "Hashing an EntityProxy without an ID results in undefined behaviour",
483
- RuntimeWarning,
484
- )
464
+ raise RuntimeError("Cannot hash entity without an ID")
485
465
  return hash(self.id)
486
466
 
487
467
  def __eq__(self, other: Any) -> bool:
488
468
  try:
489
469
  if self.id is None or other.id is None:
490
- warnings.warn(
491
- "Comparing EntityProxys without IDs results in undefined behaviour",
492
- RuntimeWarning,
493
- )
470
+ raise RuntimeError("Cannot compare entities without IDs.")
494
471
  return bool(self.id == other.id)
495
472
  except AttributeError:
496
473
  return False
@@ -498,11 +475,13 @@ class EntityProxy(object):
498
475
  @classmethod
499
476
  def from_dict(
500
477
  cls: Type[E],
501
- model: "Model",
502
478
  data: Dict[str, Any],
503
479
  cleaned: bool = True,
504
480
  ) -> E:
505
481
  """Instantiate a proxy based on the given model and serialised dictionary.
506
482
 
507
483
  Use :meth:`followthemoney.model.Model.get_proxy` instead."""
508
- return cls(model, data, cleaned=cleaned)
484
+ schema = Model.instance().get(data.get("schema", ""))
485
+ if schema is None:
486
+ raise InvalidData(gettext("No schema for entity."))
487
+ return cls(schema, data, cleaned=cleaned)
@@ -32,9 +32,8 @@ Analyzable:
32
32
  ibanMentioned:
33
33
  label: "Detected IBANs"
34
34
  hidden: true
35
- # type: identifier
36
- # format: iban
37
- type: iban
35
+ type: identifier
36
+ format: iban
38
37
  ipMentioned:
39
38
  label: "Detected IP addresses"
40
39
  hidden: true
@@ -30,9 +30,8 @@ BankAccount:
30
30
  maxLength: 64
31
31
  iban:
32
32
  label: IBAN
33
- # type: identifier
34
- # format: iban
35
- type: iban
33
+ type: identifier
34
+ format: iban
36
35
  maxLength: 64
37
36
  bic:
38
37
  label: Bank Identifier Code
@@ -56,12 +56,6 @@ Company:
56
56
  mbsCode:
57
57
  label: "MBS"
58
58
  type: identifier
59
- ibcRuc:
60
- # TODO: Remove this. It's a column name in the ICIJ-released OffshoreLeaks datasets
61
- # but seems to just mean "company number".
62
- deprecated: true
63
- label: "ibcRUC"
64
- type: identifier
65
59
  caemCode:
66
60
  label: "COD CAEM"
67
61
  description: "(RO) What kind of activity a legal entity is allowed to develop"
@@ -68,4 +68,3 @@ Contract:
68
68
  language:
69
69
  label: "Language"
70
70
  type: language
71
- rdf: http://purl.org/dc/terms/language
@@ -31,7 +31,7 @@ CryptoWallet:
31
31
  type: date
32
32
  currencySymbol:
33
33
  label: Currency short code
34
- mangingExchange:
34
+ managingExchange:
35
35
  label: Managing exchange
36
36
  holder:
37
37
  label: Wallet holder
@@ -30,12 +30,10 @@ Document:
30
30
  title:
31
31
  label: "Title"
32
32
  type: string
33
- rdf: http://purl.org/dc/elements/1.1/title
34
33
  caption: true
35
34
  author:
36
35
  label: "Author"
37
36
  description: "The original author, not the uploader"
38
- rdf: http://purl.org/dc/elements/1.1/creator
39
37
  generator:
40
38
  label: "Generator"
41
39
  description: "The program used to generate this file"
@@ -61,11 +59,9 @@ Document:
61
59
  mimeType:
62
60
  label: "MIME type"
63
61
  type: mimetype
64
- rdf: http://purl.org/dc/terms/format
65
62
  language:
66
63
  label: "Language"
67
64
  type: language
68
- rdf: http://purl.org/dc/terms/language
69
65
  translatedLanguage:
70
66
  label: "The language of the translated text"
71
67
  hidden: true
@@ -78,7 +74,6 @@ Document:
78
74
  label: "Date"
79
75
  description: "If not otherwise specified"
80
76
  type: date
81
- rdf: http://purl.org/dc/elements/1.1/date
82
77
  authoredAt:
83
78
  label: "Authored on"
84
79
  type: date
@@ -95,7 +90,6 @@ Document:
95
90
  name: children
96
91
  label: "Child documents"
97
92
  hidden: true
98
- rdf: http://purl.org/dc/terms/isPartOf
99
93
  ancestors:
100
94
  label: "Ancestors"
101
95
  type: entity
@@ -42,6 +42,13 @@ Interval:
42
42
  label: "Source link"
43
43
  type: url
44
44
  matchable: false
45
+ proof:
46
+ label: Source document
47
+ reverse:
48
+ name: provenIntervals
49
+ label: "Derived relationships"
50
+ type: entity
51
+ range: Document
45
52
  publisher:
46
53
  label: "Publishing source"
47
54
  publisherUrl:
@@ -102,6 +102,12 @@ LegalEntity:
102
102
  bvdId:
103
103
  label: Bureau van Dijk ID
104
104
  type: identifier
105
+ uscCode:
106
+ # cf. https://en.wikipedia.org/wiki/Unified_Social_Credit_Identifier
107
+ label: "USCC"
108
+ description: "Unified Social Credit Identifier"
109
+ type: identifier
110
+ format: uscc
105
111
  icijId:
106
112
  label: ICIJ ID
107
113
  description: "ID according to International Consortium for Investigative Journalists"
@@ -19,6 +19,8 @@ License:
19
19
  properties:
20
20
  area:
21
21
  label: "Area"
22
+ type: number
23
+ description: "Spatial area of the license or concession"
22
24
  commodities:
23
25
  label: "Commodities"
24
26
  reviewDate:
@@ -21,7 +21,6 @@ Page:
21
21
  name: pages
22
22
  label: "Pages"
23
23
  hidden: true
24
- rdf: http://purl.org/dc/terms/isPartOf
25
24
  detectedLanguage:
26
25
  label: "Detected language"
27
26
  type: language
@@ -6,7 +6,6 @@ Person:
6
6
  description: >
7
7
  A natural person, as opposed to a corporation of some type.
8
8
  matchable: true
9
- rdf: http://xmlns.com/foaf/0.1/Person
10
9
  featured:
11
10
  - name
12
11
  - nationality
@@ -26,14 +25,12 @@ Person:
26
25
  properties:
27
26
  title:
28
27
  label: Title
29
- rdf: http://xmlns.com/foaf/0.1/title
30
28
  # The `firstName`, `lastName`, `secondName` etc. properties intentionally do not use
31
29
  # the `name` property type. Many FtM tools (including Aleph) use name properties to
32
30
  # compare/match entities, but matching entites just on e.g. a first name would lead to
33
31
  # too many false positives.
34
32
  firstName:
35
33
  label: First name
36
- rdf: http://xmlns.com/foaf/0.1/givenName
37
34
  secondName:
38
35
  label: Second name
39
36
  middleName:
@@ -44,13 +41,11 @@ Person:
44
41
  label: Matronymic
45
42
  lastName:
46
43
  label: Last name
47
- rdf: http://xmlns.com/foaf/0.1/lastName
48
44
  nameSuffix:
49
45
  label: Name suffix
50
46
  birthDate:
51
47
  label: Birth date
52
48
  type: date
53
- rdf: http://xmlns.com/foaf/0.1/birthday
54
49
  birthPlace:
55
50
  label: Place of birth
56
51
  birthCountry:
@@ -49,6 +49,7 @@ Sanction:
49
49
  label: "Status"
50
50
  duration:
51
51
  label: "Duration"
52
+ type: number
52
53
  reason:
53
54
  label: "Reason"
54
55
  type: text
@@ -14,7 +14,6 @@ Thing:
14
14
  name:
15
15
  label: Name
16
16
  type: name
17
- rdf: http://www.w3.org/2004/02/skos/core#prefLabel
18
17
  summary: # a short one-liner kind of description
19
18
  label: Summary
20
19
  type: text
@@ -27,7 +26,6 @@ Thing:
27
26
  alias:
28
27
  label: Other name
29
28
  type: name
30
- rdf: http://www.w3.org/2004/02/skos/core#altLabel
31
29
  previousName:
32
30
  label: Previous name
33
31
  type: name
@@ -28,11 +28,14 @@ UserAccount:
28
28
  label: "Service"
29
29
  type: string
30
30
  email:
31
- label: "E-Mail"
31
+ label: E-Mail
32
32
  type: email
33
- number:
34
- label: "Phone Number"
33
+ description: "Email address"
34
+ phone:
35
+ label: Phone
35
36
  type: phone
37
+ description: "Phone number"
38
+ maxLength: 32
36
39
  username:
37
40
  label: "Username"
38
41
  type: string
followthemoney/schema.py CHANGED
@@ -1,22 +1,12 @@
1
- from typing import (
2
- TYPE_CHECKING,
3
- Any,
4
- Dict,
5
- List,
6
- Optional,
7
- Set,
8
- TypedDict,
9
- Union,
10
- cast,
11
- )
1
+ from typing import TYPE_CHECKING, Any, cast
2
+ from typing import Dict, List, Optional, Set, TypedDict, Union
12
3
  from banal import ensure_list, ensure_dict, as_bool
13
- from functools import lru_cache
4
+ from functools import cache
14
5
 
15
6
  from followthemoney.property import Property, PropertySpec, PropertyToDict, ReverseSpec
16
7
  from followthemoney.types import registry
17
8
  from followthemoney.exc import InvalidData, InvalidModel
18
- from followthemoney.rdf import URIRef, NS
19
- from followthemoney.util import gettext
9
+ from followthemoney.util import gettext, const
20
10
 
21
11
  if TYPE_CHECKING:
22
12
  from followthemoney.model import Model
@@ -47,7 +37,6 @@ class SchemaSpec(TypedDict, total=False):
47
37
  edge: EdgeSpec
48
38
  temporalExtent: TemporalExtentSpec
49
39
  description: Optional[str]
50
- rdf: Optional[str]
51
40
  abstract: bool
52
41
  hidden: bool
53
42
  generated: bool
@@ -90,7 +79,6 @@ class Schema:
90
79
  "_plural",
91
80
  "_description",
92
81
  "_hash",
93
- "uri",
94
82
  "abstract",
95
83
  "hidden",
96
84
  "generated",
@@ -118,15 +106,12 @@ class Schema:
118
106
 
119
107
  def __init__(self, model: "Model", name: str, data: SchemaSpec) -> None:
120
108
  #: Machine-readable name of the schema, used for identification.
121
- self.name = name
109
+ self.name = const(name)
122
110
  self.model = model
123
111
  self._label = data.get("label", name)
124
112
  self._plural = data.get("plural", self.label)
125
113
  self._description = data.get("description")
126
- self._hash = hash("<Schema(%r)>" % name)
127
-
128
- #: RDF identifier for this schema when it is transformed to a triple term.
129
- self.uri = URIRef(cast(str, data.get("rdf", NS[name])))
114
+ self._hash = hash("<Schema(%r)>" % self.name)
130
115
 
131
116
  #: Do not store or emit entities of this type, it is used only for
132
117
  #: inheritance.
@@ -152,17 +137,17 @@ class Schema:
152
137
  #: Mark a set of properties as important, i.e. they should be shown
153
138
  #: first, or in an abridged view of the entity. In Aleph, these properties
154
139
  #: are included in tabular entity listings.
155
- self.featured = ensure_list(data.get("featured", []))
140
+ self.featured = [const(f) for f in data.get("featured", [])]
156
141
 
157
142
  #: Mark a set of properties as required. This is applied only when
158
143
  #: an entity is created by the user - bulk created entities will
159
144
  #: slip through even if it is technically invalid.
160
- self.required = ensure_list(data.get("required", []))
145
+ self.required = [const(r) for r in data.get("required", [])]
161
146
 
162
147
  #: Mark a set of properties to be used for the entity's caption.
163
148
  #: They will be checked in order and the first existent value will
164
149
  #: be used.
165
- self.caption = ensure_list(data.get("caption", []))
150
+ self.caption = [const(s) for s in data.get("caption", [])]
166
151
 
167
152
  # A transform of the entity into an edge for its representation in
168
153
  # the context of a property graph representation like Neo4J/Gephi.
@@ -173,7 +158,7 @@ class Schema:
173
158
  #: Flag to indicate if this schema should be represented by an edge (rather than
174
159
  #: a node) when the data is converted into a property graph.
175
160
  self.edge: bool = self.edge_source is not None and self.edge_target is not None
176
- self.edge_caption = ensure_list(edge.get("caption", []))
161
+ self.edge_caption = [const(p) for p in edge.get("caption", [])]
177
162
  self._edge_label = edge.get("label", self._label)
178
163
 
179
164
  #: Flag to indicate if the edge should be presented as directed to the user,
@@ -183,16 +168,16 @@ class Schema:
183
168
  #: Specify which properties should be used to represent this schema in a
184
169
  #: timeline.
185
170
  temporal_extent = data.get("temporalExtent", {})
186
- self._temporal_start = ensure_list(temporal_extent.get("start", []))
187
- self._temporal_end = ensure_list(temporal_extent.get("end", []))
171
+ self._temporal_start = [const(s) for s in temporal_extent.get("start", [])]
172
+ self._temporal_end = [const(e) for e in temporal_extent.get("end", [])]
188
173
 
189
174
  #: Direct parent schemata of this schema.
190
- self._extends = ensure_list(data.get("extends", []))
175
+ self._extends = [const(s) for s in data.get("extends", [])]
191
176
  self.extends: Set["Schema"] = set()
192
177
 
193
178
  #: All parents of this schema (including indirect parents and the schema
194
179
  #: itself).
195
- self.schemata = set([self])
180
+ self.schemata: Set[Schema] = set([self])
196
181
 
197
182
  #: All names of :attr:`~schemata`.
198
183
  self.names = set([self.name])
@@ -205,8 +190,8 @@ class Schema:
205
190
  #: The full list of properties defined for the entity, including those
206
191
  #: inherited from parent schemata.
207
192
  self.properties: Dict[str, Property] = {}
208
- for name, prop in data.get("properties", {}).items():
209
- self.properties[name] = Property(self, name, prop)
193
+ for pname, prop in data.get("properties", {}).items():
194
+ self.properties[pname] = Property(self, pname, prop)
210
195
 
211
196
  def generate(self, model: "Model") -> None:
212
197
  """While loading the schema, this function will validate and
@@ -317,12 +302,18 @@ class Schema:
317
302
 
318
303
  @property
319
304
  def source_prop(self) -> Optional[Property]:
320
- """The entity property to be used as an edge source."""
305
+ """The entity property to be used as an edge source when the schema is
306
+ considered as a relationship."""
307
+ if self.edge_source is None:
308
+ return None
321
309
  return self.get(self.edge_source)
322
310
 
323
311
  @property
324
312
  def target_prop(self) -> Optional[Property]:
325
- """The entity property to be used as an edge target."""
313
+ """The entity property to be used as an edge target when the schema is transformed
314
+ into a relationship."""
315
+ if self.edge_target is None:
316
+ return None
326
317
  return self.get(self.edge_target)
327
318
 
328
319
  @property
@@ -391,12 +382,12 @@ class Schema:
391
382
  self._matchable_schemata.add(schema)
392
383
  return self._matchable_schemata
393
384
 
394
- @lru_cache(maxsize=None)
385
+ @cache
395
386
  def can_match(self, other: "Schema") -> bool:
396
387
  """Check if an schema can match with another schema."""
397
388
  return other in self.matchable_schemata
398
389
 
399
- @lru_cache(maxsize=None)
390
+ @cache
400
391
  def is_a(self, other: Union[str, "Schema"]) -> bool:
401
392
  """Check if the schema or one of its parents is the same as the given
402
393
  candidate ``other``."""
@@ -404,13 +395,13 @@ class Schema:
404
395
  other = other.name
405
396
  return other in self.names
406
397
 
407
- def get(self, name: Optional[str]) -> Optional[Property]:
398
+ def get(self, name: str) -> Optional[Property]:
408
399
  """Retrieve a property defined for this schema by its name."""
409
400
  if name is None:
410
401
  return None
411
402
  return self.properties.get(name)
412
403
 
413
- def validate(self, data: Any) -> Optional[str]:
404
+ def validate(self, data: Dict[str, Any]) -> Optional[str]:
414
405
  """Validate a dictionary against the given schema.
415
406
  This will also drop keys which are not valid as properties.
416
407
  """
@@ -478,7 +469,7 @@ class Schema:
478
469
  def __eq__(self, other: Any) -> bool:
479
470
  """Compare two schemata (via hash)."""
480
471
  try:
481
- return self._hash == hash(other)
472
+ return self._hash == other._hash # type: ignore
482
473
  except AttributeError:
483
474
  return False
484
475
 
@@ -486,10 +477,7 @@ class Schema:
486
477
  return self.name.__lt__(other.name)
487
478
 
488
479
  def __hash__(self) -> int:
489
- try:
490
- return self._hash
491
- except AttributeError:
492
- return super().__hash__()
480
+ return self._hash
493
481
 
494
482
  def __repr__(self) -> str:
495
483
  return "<Schema(%r)>" % self.name
@@ -0,0 +1,19 @@
1
+ from followthemoney.statement.statement import Statement, StatementDict
2
+ from followthemoney.statement.serialize import CSV, JSON, PACK, FORMATS
3
+ from followthemoney.statement.serialize import write_statements
4
+ from followthemoney.statement.serialize import read_statements, read_path_statements
5
+ from followthemoney.statement.entity import SE, StatementEntity
6
+
7
+ __all__ = [
8
+ "Statement",
9
+ "StatementDict",
10
+ "StatementEntity",
11
+ "SE",
12
+ "CSV",
13
+ "JSON",
14
+ "PACK",
15
+ "FORMATS",
16
+ "write_statements",
17
+ "read_statements",
18
+ "read_path_statements",
19
+ ]