followthemoney 1.3.7__py3-none-any.whl → 3.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. followthemoney/__init__.py +5 -3
  2. followthemoney/cli/__init__.py +17 -0
  3. followthemoney/cli/aggregate.py +56 -0
  4. followthemoney/cli/cli.py +88 -0
  5. followthemoney/cli/exports.py +121 -0
  6. followthemoney/cli/mapping.py +85 -0
  7. followthemoney/cli/sieve.py +67 -0
  8. followthemoney/cli/util.py +142 -0
  9. followthemoney/compare.py +130 -60
  10. followthemoney/exc.py +19 -6
  11. followthemoney/export/common.py +29 -0
  12. followthemoney/export/csv.py +82 -0
  13. followthemoney/export/excel.py +75 -0
  14. followthemoney/export/graph.py +79 -0
  15. followthemoney/export/neo4j.py +182 -0
  16. followthemoney/export/rdf.py +26 -0
  17. followthemoney/graph.py +308 -0
  18. followthemoney/helpers.py +212 -0
  19. followthemoney/mapping/__init__.py +1 -1
  20. followthemoney/mapping/csv.py +67 -35
  21. followthemoney/mapping/entity.py +116 -44
  22. followthemoney/mapping/property.py +90 -44
  23. followthemoney/mapping/query.py +27 -19
  24. followthemoney/mapping/source.py +15 -5
  25. followthemoney/mapping/sql.py +75 -61
  26. followthemoney/messages.py +13 -7
  27. followthemoney/model.py +108 -56
  28. followthemoney/namespace.py +119 -0
  29. followthemoney/offshore.py +48 -0
  30. followthemoney/ontology.py +77 -0
  31. followthemoney/property.py +204 -71
  32. followthemoney/proxy.py +455 -118
  33. followthemoney/rdf.py +9 -0
  34. followthemoney/schema/Address.yaml +78 -0
  35. followthemoney/schema/Airplane.yaml +17 -10
  36. followthemoney/schema/Analyzable.yaml +54 -0
  37. followthemoney/schema/Article.yaml +16 -0
  38. followthemoney/schema/Assessment.yaml +32 -0
  39. followthemoney/schema/Asset.yaml +10 -4
  40. followthemoney/schema/Associate.yaml +41 -0
  41. followthemoney/schema/Audio.yaml +24 -0
  42. followthemoney/schema/BankAccount.yaml +53 -9
  43. followthemoney/schema/Call.yaml +48 -0
  44. followthemoney/schema/CallForTenders.yaml +117 -0
  45. followthemoney/schema/Company.yaml +37 -12
  46. followthemoney/schema/Contract.yaml +41 -7
  47. followthemoney/schema/ContractAward.yaml +30 -11
  48. followthemoney/schema/CourtCase.yaml +16 -10
  49. followthemoney/schema/CourtCaseParty.yaml +17 -6
  50. followthemoney/schema/CryptoWallet.yaml +48 -0
  51. followthemoney/schema/Debt.yaml +37 -0
  52. followthemoney/schema/Directorship.yaml +17 -4
  53. followthemoney/schema/Document.yaml +72 -139
  54. followthemoney/schema/Documentation.yml +38 -0
  55. followthemoney/schema/EconomicActivity.yaml +32 -17
  56. followthemoney/schema/Email.yaml +76 -0
  57. followthemoney/schema/Employment.yaml +39 -0
  58. followthemoney/schema/Event.yaml +35 -3
  59. followthemoney/schema/Family.yaml +41 -0
  60. followthemoney/schema/Folder.yaml +13 -0
  61. followthemoney/schema/HyperText.yaml +21 -0
  62. followthemoney/schema/Identification.yaml +40 -0
  63. followthemoney/schema/Image.yaml +25 -0
  64. followthemoney/schema/Interest.yaml +3 -6
  65. followthemoney/schema/Interval.yaml +56 -5
  66. followthemoney/schema/LegalEntity.yaml +81 -20
  67. followthemoney/schema/License.yaml +7 -3
  68. followthemoney/schema/Membership.yaml +19 -4
  69. followthemoney/schema/Mention.yaml +54 -0
  70. followthemoney/schema/Message.yaml +78 -0
  71. followthemoney/schema/Note.yaml +23 -0
  72. followthemoney/schema/Occupancy.yaml +44 -0
  73. followthemoney/schema/Organization.yaml +38 -3
  74. followthemoney/schema/Ownership.yaml +16 -4
  75. followthemoney/schema/Package.yaml +17 -0
  76. followthemoney/schema/Page.yaml +43 -0
  77. followthemoney/schema/Pages.yaml +23 -0
  78. followthemoney/schema/Passport.yaml +16 -17
  79. followthemoney/schema/Payment.yaml +38 -7
  80. followthemoney/schema/Person.yaml +61 -5
  81. followthemoney/schema/PlainText.yaml +17 -0
  82. followthemoney/schema/Position.yaml +50 -0
  83. followthemoney/schema/Post.yaml +42 -0
  84. followthemoney/schema/Project.yaml +27 -0
  85. followthemoney/schema/ProjectParticipant.yaml +36 -0
  86. followthemoney/schema/PublicBody.yaml +14 -3
  87. followthemoney/schema/RealEstate.yaml +19 -3
  88. followthemoney/schema/Representation.yaml +17 -6
  89. followthemoney/schema/Sanction.yaml +45 -21
  90. followthemoney/schema/Security.yaml +59 -0
  91. followthemoney/schema/Similar.yaml +37 -0
  92. followthemoney/schema/Succession.yaml +36 -0
  93. followthemoney/schema/Table.yaml +32 -0
  94. followthemoney/schema/TaxRoll.yaml +27 -9
  95. followthemoney/schema/Thing.yaml +69 -13
  96. followthemoney/schema/Trip.yaml +42 -0
  97. followthemoney/schema/UnknownLink.yaml +17 -6
  98. followthemoney/schema/UserAccount.yaml +44 -0
  99. followthemoney/schema/Value.yaml +5 -1
  100. followthemoney/schema/Vehicle.yaml +25 -8
  101. followthemoney/schema/Vessel.yaml +18 -10
  102. followthemoney/schema/Video.yaml +20 -0
  103. followthemoney/schema/Workbook.yaml +18 -0
  104. followthemoney/schema.py +436 -135
  105. followthemoney/translations/ar/LC_MESSAGES/followthemoney.mo +0 -0
  106. followthemoney/translations/ar/LC_MESSAGES/followthemoney.po +2900 -787
  107. followthemoney/translations/bs/LC_MESSAGES/followthemoney.mo +0 -0
  108. followthemoney/translations/bs/LC_MESSAGES/followthemoney.po +2108 -520
  109. followthemoney/translations/de/LC_MESSAGES/followthemoney.mo +0 -0
  110. followthemoney/translations/de/LC_MESSAGES/followthemoney.po +2902 -782
  111. followthemoney/translations/es/LC_MESSAGES/followthemoney.mo +0 -0
  112. followthemoney/translations/es/LC_MESSAGES/followthemoney.po +2893 -779
  113. followthemoney/translations/fr/LC_MESSAGES/followthemoney.mo +0 -0
  114. followthemoney/translations/fr/LC_MESSAGES/followthemoney.po +4362 -0
  115. followthemoney/translations/fr/followthemoney.po +3861 -0
  116. followthemoney/translations/messages.pot +3021 -725
  117. followthemoney/translations/nb/LC_MESSAGES/followthemoney.mo +0 -0
  118. followthemoney/translations/nb/LC_MESSAGES/followthemoney.po +3778 -0
  119. followthemoney/translations/nl/LC_MESSAGES/followthemoney.mo +0 -0
  120. followthemoney/translations/nl/LC_MESSAGES/followthemoney.po +3837 -0
  121. followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.mo +0 -0
  122. followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.po +3784 -0
  123. followthemoney/translations/ru/LC_MESSAGES/followthemoney.mo +0 -0
  124. followthemoney/translations/ru/LC_MESSAGES/followthemoney.po +2837 -539
  125. followthemoney/translations/ru/followthemoney.po +4221 -0
  126. followthemoney/translations/tr/LC_MESSAGES/followthemoney.mo +0 -0
  127. followthemoney/translations/tr/LC_MESSAGES/followthemoney.po +2073 -491
  128. followthemoney/types/__init__.py +35 -17
  129. followthemoney/types/address.py +50 -21
  130. followthemoney/types/checksum.py +25 -0
  131. followthemoney/types/common.py +233 -88
  132. followthemoney/types/country.py +50 -56
  133. followthemoney/types/date.py +59 -76
  134. followthemoney/types/email.py +66 -35
  135. followthemoney/types/entity.py +66 -13
  136. followthemoney/types/gender.py +66 -0
  137. followthemoney/types/iban.py +47 -28
  138. followthemoney/types/identifier.py +49 -22
  139. followthemoney/types/ip.py +35 -21
  140. followthemoney/types/json.py +58 -0
  141. followthemoney/types/language.py +124 -37
  142. followthemoney/types/mimetype.py +44 -0
  143. followthemoney/types/name.py +56 -12
  144. followthemoney/types/number.py +30 -0
  145. followthemoney/types/phone.py +92 -34
  146. followthemoney/types/registry.py +52 -0
  147. followthemoney/types/string.py +43 -0
  148. followthemoney/types/topic.py +94 -0
  149. followthemoney/types/url.py +39 -17
  150. followthemoney/util.py +139 -45
  151. followthemoney-3.8.1.dist-info/METADATA +153 -0
  152. followthemoney-3.8.1.dist-info/RECORD +157 -0
  153. {followthemoney-1.3.7.dist-info → followthemoney-3.8.1.dist-info}/WHEEL +1 -2
  154. followthemoney-3.8.1.dist-info/entry_points.txt +17 -0
  155. followthemoney-1.3.7.dist-info/LICENSE.txt → followthemoney-3.8.1.dist-info/licenses/LICENSE +1 -1
  156. followthemoney/link.py +0 -75
  157. followthemoney/schema/Associate.yml +0 -19
  158. followthemoney/schema/Family.yml +0 -19
  159. followthemoney/schema/Land.yml +0 -9
  160. followthemoney/schema/Relationship.yaml +0 -26
  161. followthemoney/types/domain.py +0 -50
  162. followthemoney-1.3.7.dist-info/DESCRIPTION.rst +0 -3
  163. followthemoney-1.3.7.dist-info/METADATA +0 -39
  164. followthemoney-1.3.7.dist-info/RECORD +0 -108
  165. followthemoney-1.3.7.dist-info/entry_points.txt +0 -3
  166. followthemoney-1.3.7.dist-info/metadata.json +0 -1
  167. followthemoney-1.3.7.dist-info/namespace_packages.txt +0 -1
  168. followthemoney-1.3.7.dist-info/top_level.txt +0 -3
  169. ns/ontology.py +0 -128
  170. tests/types/test_addresses.py +0 -24
  171. tests/types/test_common.py +0 -32
  172. tests/types/test_countries.py +0 -27
  173. tests/types/test_dates.py +0 -73
  174. tests/types/test_domains.py +0 -23
  175. tests/types/test_emails.py +0 -32
  176. tests/types/test_entity.py +0 -19
  177. tests/types/test_iban.py +0 -109
  178. tests/types/test_identifiers.py +0 -27
  179. tests/types/test_ip.py +0 -29
  180. tests/types/test_languages.py +0 -23
  181. tests/types/test_names.py +0 -33
  182. tests/types/test_phones.py +0 -24
  183. tests/types/test_registry.py +0 -14
  184. tests/types/test_urls.py +0 -23
  185. {ns → followthemoney/export}/__init__.py +0 -0
  186. /tests/types/__init__.py → /followthemoney/py.typed +0 -0
followthemoney/schema.py CHANGED
@@ -1,193 +1,494 @@
1
- from rdflib import URIRef
1
+ from typing import (
2
+ TYPE_CHECKING,
3
+ Any,
4
+ Dict,
5
+ List,
6
+ Optional,
7
+ Set,
8
+ TypedDict,
9
+ Union,
10
+ cast,
11
+ )
2
12
  from banal import ensure_list, ensure_dict, as_bool
13
+ from functools import lru_cache
3
14
 
4
- from followthemoney.property import Property
15
+ from followthemoney.property import Property, PropertySpec, PropertyToDict, ReverseSpec
5
16
  from followthemoney.types import registry
6
17
  from followthemoney.exc import InvalidData, InvalidModel
7
- from followthemoney.util import gettext, NAMESPACE
18
+ from followthemoney.rdf import URIRef, NS
19
+ from followthemoney.util import gettext
8
20
 
21
+ if TYPE_CHECKING:
22
+ from followthemoney.model import Model
9
23
 
10
- class Schema(object):
11
- """Defines the abstract data model.
12
24
 
13
- Schema items define the entities and links available in the model.
25
+ class EdgeSpec(TypedDict, total=False):
26
+ source: str
27
+ target: str
28
+ caption: List[str]
29
+ label: str
30
+ directed: bool
31
+
32
+
33
+ class TemporalExtentSpec(TypedDict, total=False):
34
+ start: List[str]
35
+ end: List[str]
36
+
37
+
38
+ class SchemaSpec(TypedDict, total=False):
39
+ label: str
40
+ plural: str
41
+ schemata: List[str]
42
+ extends: List[str]
43
+ properties: Dict[str, PropertySpec]
44
+ featured: List[str]
45
+ required: List[str]
46
+ caption: List[str]
47
+ edge: EdgeSpec
48
+ temporalExtent: TemporalExtentSpec
49
+ description: Optional[str]
50
+ rdf: Optional[str]
51
+ abstract: bool
52
+ hidden: bool
53
+ generated: bool
54
+ matchable: bool
55
+ deprecated: Optional[bool]
56
+
57
+
58
+ class SchemaToDict(TypedDict, total=False):
59
+ label: str
60
+ plural: str
61
+ schemata: List[str]
62
+ extends: List[str]
63
+ properties: Dict[str, PropertyToDict]
64
+ featured: List[str]
65
+ required: List[str]
66
+ caption: List[str]
67
+ edge: EdgeSpec
68
+ temporalExtent: TemporalExtentSpec
69
+ description: Optional[str]
70
+ abstract: bool
71
+ hidden: bool
72
+ generated: bool
73
+ matchable: bool
74
+ deprecated: bool
75
+
76
+
77
+ class Schema:
78
+ """A type definition for a class of entities that have certain properties.
79
+
80
+ Schemata are arranged in a multi-rooted hierarchy: each schema can have multiple
81
+ parent schemata from which it inherits all of their properties. A schema can also
82
+ have descendant child schemata, which, in turn, add further properties. Schemata
83
+ are usually accessed via the model, which holds all available definitions.
14
84
  """
15
85
 
16
- def __init__(self, model, name, data):
17
- self.model = model
86
+ __slots__ = (
87
+ "model",
88
+ "name",
89
+ "_label",
90
+ "_plural",
91
+ "_description",
92
+ "_hash",
93
+ "uri",
94
+ "abstract",
95
+ "hidden",
96
+ "generated",
97
+ "matchable",
98
+ "featured",
99
+ "required",
100
+ "deprecated",
101
+ "caption",
102
+ "edge",
103
+ "_edge_label",
104
+ "edge_directed",
105
+ "edge_source",
106
+ "edge_target",
107
+ "edge_caption",
108
+ "_temporal_start",
109
+ "_temporal_end",
110
+ "_extends",
111
+ "extends",
112
+ "schemata",
113
+ "names",
114
+ "descendants",
115
+ "properties",
116
+ "_matchable_schemata",
117
+ )
118
+
119
+ def __init__(self, model: "Model", name: str, data: SchemaSpec) -> None:
120
+ #: Machine-readable name of the schema, used for identification.
18
121
  self.name = name
19
- self.data = data
20
- self.icon = data.get('icon')
21
- self._label = data.get('label', name)
22
- self._plural = data.get('plural', self.label)
23
- self._description = data.get('description')
24
- self._extends = ensure_list(data.get('extends'))
25
- self.featured = ensure_list(data.get('featured'))
26
-
27
- self.uri = NAMESPACE[name]
28
- if 'rdf' in data:
29
- self.uri = URIRef(data.get('rdf'))
30
-
31
- # Do not show in listings:
32
- self.abstract = as_bool(data.get('abstract'), False)
33
-
34
- # Try to perform fuzzy matching. Fuzzy similarity search does not
35
- # make sense for entities which have a lot of similar names, such
36
- # as land plots, assets etc.
37
- self.matchable = as_bool(data.get('matchable'), True)
38
-
39
- self._own_properties = []
40
- for name, prop in data.get('properties', {}).items():
41
- self._own_properties.append(Property(self, name, prop))
42
-
43
- def generate(self):
44
- for prop in self._own_properties:
45
- prop.generate()
122
+ self.model = model
123
+ self._label = data.get("label", name)
124
+ self._plural = data.get("plural", self.label)
125
+ self._description = data.get("description")
126
+ self._hash = hash("<Schema(%r)>" % name)
127
+
128
+ #: RDF identifier for this schema when it is transformed to a triple term.
129
+ self.uri = URIRef(cast(str, data.get("rdf", NS[name])))
130
+
131
+ #: Do not store or emit entities of this type, it is used only for
132
+ #: inheritance.
133
+ self.abstract = as_bool(data.get("abstract"), False)
134
+
135
+ #: This schema is deprecated and should not be used.
136
+ self.deprecated = as_bool(data.get("deprecated", False))
137
+
138
+ #: Hide this schema in listings.
139
+ self.hidden = as_bool(data.get("hidden"), False)
140
+ self.hidden = self.hidden and not self.abstract
141
+
142
+ #: Entities with this type are generated by the system - for example, via
143
+ #: `ingest-file`. The user should not be offered an option to create them
144
+ #: in the interface.
145
+ self.generated = as_bool(data.get("generated"), False)
146
+
147
+ #: Try to perform fuzzy matching. Fuzzy similarity search does not
148
+ #: make sense for entities which have a lot of similar names, such
149
+ #: as land plots, assets etc.
150
+ self.matchable = as_bool(data.get("matchable"), True)
151
+
152
+ #: Mark a set of properties as important, i.e. they should be shown
153
+ #: first, or in an abridged view of the entity. In Aleph, these properties
154
+ #: are included in tabular entity listings.
155
+ self.featured = ensure_list(data.get("featured", []))
156
+
157
+ #: Mark a set of properties as required. This is applied only when
158
+ #: an entity is created by the user - bulk created entities will
159
+ #: slip through even if it is technically invalid.
160
+ self.required = ensure_list(data.get("required", []))
161
+
162
+ #: Mark a set of properties to be used for the entity's caption.
163
+ #: They will be checked in order and the first existent value will
164
+ #: be used.
165
+ self.caption = ensure_list(data.get("caption", []))
166
+
167
+ # A transform of the entity into an edge for its representation in
168
+ # the context of a property graph representation like Neo4J/Gephi.
169
+ edge = data.get("edge", {})
170
+ self.edge_source = edge.get("source")
171
+ self.edge_target = edge.get("target")
172
+
173
+ #: Flag to indicate if this schema should be represented by an edge (rather than
174
+ #: a node) when the data is converted into a property graph.
175
+ self.edge: bool = self.edge_source is not None and self.edge_target is not None
176
+ self.edge_caption = ensure_list(edge.get("caption", []))
177
+ self._edge_label = edge.get("label", self._label)
178
+
179
+ #: Flag to indicate if the edge should be presented as directed to the user,
180
+ #: e.g. by showing an error at the target end of the edge.
181
+ self.edge_directed = as_bool(edge.get("directed", True))
182
+
183
+ #: Specify which properties should be used to represent this schema in a
184
+ #: timeline.
185
+ temporal_extent = data.get("temporalExtent", {})
186
+ self._temporal_start = ensure_list(temporal_extent.get("start", []))
187
+ self._temporal_end = ensure_list(temporal_extent.get("end", []))
188
+
189
+ #: Direct parent schemata of this schema.
190
+ self._extends = ensure_list(data.get("extends", []))
191
+ self.extends: Set["Schema"] = set()
192
+
193
+ #: All parents of this schema (including indirect parents and the schema
194
+ #: itself).
195
+ self.schemata = set([self])
196
+
197
+ #: All names of :attr:`~schemata`.
198
+ self.names = set([self.name])
199
+
200
+ #: Inverse of :attr:`~schemata`, all derived child types of this schema
201
+ #: and their children.
202
+ self.descendants: Set["Schema"] = set()
203
+ self._matchable_schemata: Optional[Set["Schema"]] = None
204
+
205
+ #: The full list of properties defined for the entity, including those
206
+ #: inherited from parent schemata.
207
+ self.properties: Dict[str, Property] = {}
208
+ for name, prop in data.get("properties", {}).items():
209
+ self.properties[name] = Property(self, name, prop)
210
+
211
+ def generate(self, model: "Model") -> None:
212
+ """While loading the schema, this function will validate and
213
+ load the hierarchy, properties, and flags of the definition."""
214
+ temporal_start: Optional[List[str]] = None
215
+ temporal_end: Optional[List[str]] = None
216
+ for extends in self._extends:
217
+ parent = model.get(extends)
218
+ if parent is None:
219
+ raise InvalidData("Invalid extends: %r" % extends)
220
+ parent.generate(model)
221
+
222
+ for name, prop in parent.properties.items():
223
+ if name not in self.properties:
224
+ self.properties[name] = prop
225
+
226
+ self.extends.add(parent)
227
+ for ancestor in parent.schemata:
228
+ self.schemata.add(ancestor)
229
+ self.names.add(ancestor.name)
230
+ ancestor.descendants.add(self)
231
+
232
+ if len(self._temporal_start) == 0 and parent.temporal_start:
233
+ if (
234
+ temporal_start is not None
235
+ and temporal_start != parent.temporal_start
236
+ ):
237
+ raise InvalidModel(
238
+ "Conflicting temporal start properties: %s" % self.name
239
+ )
240
+ temporal_start = parent.temporal_start
241
+
242
+ if len(self._temporal_end) == 0 and parent.temporal_end:
243
+ if temporal_end is not None and temporal_end != parent.temporal_end:
244
+ raise InvalidModel(
245
+ "Conflicting temporal start properties: %s" % self.name
246
+ )
247
+ temporal_end = parent.temporal_end
248
+
249
+ for prop in list(self.properties.values()):
250
+ prop.generate(model)
46
251
 
47
252
  for featured in self.featured:
48
253
  if self.get(featured) is None:
49
254
  raise InvalidModel("Missing featured property: %s" % featured)
50
255
 
51
- def _add_reverse(self, data, other):
52
- name = data.pop('name', None)
256
+ for caption in self.caption:
257
+ prop_ = self.get(caption)
258
+ if prop_ is None:
259
+ raise InvalidModel("Missing caption property: %s" % caption)
260
+ if prop_.type == registry.entity:
261
+ raise InvalidModel("Caption cannot be entity: %s" % caption)
262
+
263
+ for required in self.required:
264
+ if self.get(required) is None:
265
+ raise InvalidModel("Missing required property: %s" % required)
266
+
267
+ if self.edge:
268
+ if self.source_prop is None:
269
+ msg = "Missing edge source: %s" % self.edge_source
270
+ raise InvalidModel(msg)
271
+
272
+ if self.target_prop is None:
273
+ msg = "Missing edge target: %s" % self.edge_target
274
+ raise InvalidModel(msg)
275
+
276
+ def _add_reverse(
277
+ self, model: "Model", data: ReverseSpec, other: Property
278
+ ) -> Property:
279
+ name = data.get("name")
53
280
  if name is None:
54
281
  raise InvalidModel("Unnamed reverse: %s" % other)
55
282
 
56
283
  prop = self.get(name)
57
284
  if prop is None:
58
- data.update({
59
- 'type': 'entity',
60
- 'reverse': {'name': other.name},
61
- 'schema': other.schema.name
62
- })
63
- prop = Property(self, name, data, stub=True)
64
- prop.generate()
65
- self._own_properties.append(prop)
66
- self._flush_properties()
67
- assert prop.type == registry.entity, prop.type
285
+ spec: PropertySpec = {
286
+ "label": data.get("label"),
287
+ "type": registry.entity.name,
288
+ "reverse": {"name": other.name},
289
+ "range": other.schema.name,
290
+ "hidden": data.get("hidden", other.hidden),
291
+ }
292
+ prop = Property(self, name, spec)
293
+ prop.stub = True
294
+ prop.generate(model)
295
+ self.properties[name] = prop
68
296
  return prop
69
297
 
70
298
  @property
71
- def label(self):
299
+ def label(self) -> str:
300
+ """User-facing name of the schema."""
72
301
  return gettext(self._label)
73
302
 
74
303
  @property
75
- def plural(self):
304
+ def plural(self) -> str:
305
+ """Name of the schema to be used in plural constructions."""
76
306
  return gettext(self._plural)
77
307
 
78
308
  @property
79
- def description(self):
309
+ def description(self) -> Optional[str]:
310
+ """A longer description of the semantics of the schema."""
80
311
  return gettext(self._description)
81
312
 
82
313
  @property
83
- def extends(self):
84
- """Return the inherited schemata."""
85
- for base in self._extends:
86
- basecls = self.model.get(base)
87
- if basecls is None:
88
- raise InvalidModel("No such schema: %s" % base)
89
- yield basecls
314
+ def edge_label(self) -> Optional[str]:
315
+ """Description label for edges derived from entities of this schema."""
316
+ return gettext(self._edge_label)
317
+
318
+ @property
319
+ def source_prop(self) -> Optional[Property]:
320
+ """The entity property to be used as an edge source."""
321
+ return self.get(self.edge_source)
90
322
 
91
323
  @property
92
- def schemata(self):
93
- """Return the full inheritance chain."""
94
- seen = set([self])
95
- yield self
96
- for base in self.extends:
97
- for schema in base.schemata:
98
- if schema not in seen:
99
- seen.add(schema)
100
- yield schema
324
+ def target_prop(self) -> Optional[Property]:
325
+ """The entity property to be used as an edge target."""
326
+ return self.get(self.edge_target)
101
327
 
102
328
  @property
103
- def descendants(self):
104
- for schema in self.model:
105
- if schema == self:
106
- continue
107
- if self in schema.schemata:
108
- yield schema
329
+ def temporal_start(self) -> List[str]:
330
+ """The entity properties to be used as the start when representing the entity
331
+ in a timeline."""
332
+ if not len(self._temporal_start):
333
+ for parent in self.extends:
334
+ if len(parent.temporal_start):
335
+ return parent.temporal_start
336
+ return self._temporal_start
109
337
 
110
338
  @property
111
- def matchable_schemata(self):
112
- """The set of comparable types."""
113
- if not self.matchable:
114
- return
115
- # This is used by the cross-referencer to determine what
116
- # other schemata should be considered for matches. For
117
- # example, a Company may be compared to a Legal Entity,
118
- # but it makes no sense to compare it to an Aircraft.
119
- matchable = set(self.schemata)
120
- for schema in self.descendants:
121
- matchable.add(schema)
122
- for schema in matchable:
123
- if schema.matchable:
124
- yield schema
339
+ def temporal_end(self) -> List[str]:
340
+ """The entity properties to be used as the end when representing the entity
341
+ in a timeline."""
342
+ if not len(self._temporal_end):
343
+ for parent in self.extends:
344
+ if len(parent.temporal_end):
345
+ return parent.temporal_end
346
+ return self._temporal_end
125
347
 
126
348
  @property
127
- def names(self):
128
- return [s.name for s in self.schemata]
349
+ def temporal_start_props(self) -> List[Property]:
350
+ """The entity properties to be used as the start when representing the entity
351
+ in a timeline."""
352
+ props = [self.get(prop_name) for prop_name in self.temporal_start]
353
+ return [prop for prop in props if prop is not None]
129
354
 
130
- def is_a(self, parent):
131
- for schema in self.schemata:
132
- if schema == parent:
133
- return True
134
- return False
355
+ @property
356
+ def temporal_end_props(self) -> List[Property]:
357
+ """The entity properties to be used as the end when representing the entity
358
+ in a timeline."""
359
+ props = [self.get(prop_name) for prop_name in self.temporal_end]
360
+ return [prop for prop in props if prop is not None]
361
+
362
+ @property
363
+ def sorted_properties(self) -> List[Property]:
364
+ """All properties of the schema in the order in which they should be shown
365
+ to the user (alphabetically, with captions and featured properties first)."""
366
+ return sorted(
367
+ self.properties.values(),
368
+ key=lambda p: (
369
+ p.name not in self.caption,
370
+ p.name not in self.featured,
371
+ p.label,
372
+ ),
373
+ )
135
374
 
136
375
  @property
137
- def properties(self):
138
- """Return properties, those defined locally and in ancestors."""
139
- if not hasattr(self, '_properties') or self._properties is None:
140
- self._properties = {}
141
- for schema in self.extends:
142
- for name, prop in schema.properties.items():
143
- self._properties[name] = prop
144
- for prop in self._own_properties:
145
- self._properties[prop.name] = prop
146
- return self._properties
147
-
148
- def _flush_properties(self):
149
- for schema in self.descendants:
150
- schema._flush_properties()
151
- self._properties = None
152
-
153
- def get(self, name):
376
+ def matchable_schemata(self) -> Set["Schema"]:
377
+ """Return the set of schemata to which it makes sense to compare with this
378
+ schema. For example, it makes sense to compare a legal entity with a company,
379
+ but it does not make sense to compare a car and a person."""
380
+ if self._matchable_schemata is None:
381
+ self._matchable_schemata = set()
382
+ if self.matchable:
383
+ # This is used by the cross-referencer to determine what
384
+ # other schemata should be considered for matches. For
385
+ # example, a Company may be compared to a Legal Entity,
386
+ # but it makes no sense to compare it to an Aircraft.
387
+ candidates = set(self.schemata)
388
+ candidates.update(self.descendants)
389
+ for schema in candidates:
390
+ if schema.matchable:
391
+ self._matchable_schemata.add(schema)
392
+ return self._matchable_schemata
393
+
394
+ def can_match(self, other: "Schema") -> bool:
395
+ """Check if an schema can match with another schema."""
396
+ return other in self.matchable_schemata
397
+
398
+ @lru_cache(maxsize=None)
399
+ def is_a(self, other: Union[str, "Schema"]) -> bool:
400
+ """Check if the schema or one of its parents is the same as the given
401
+ candidate ``other``."""
402
+ if not isinstance(other, str):
403
+ other = other.name
404
+ return other in self.names
405
+
406
+ def get(self, name: Optional[str]) -> Optional[Property]:
407
+ """Retrieve a property defined for this schema by its name."""
408
+ if name is None:
409
+ return None
154
410
  return self.properties.get(name)
155
411
 
156
- def validate(self, data):
157
- """Validate a dataset against the given schema.
158
- This will also drop keys which are not present as properties.
412
+ def validate(self, data: Any) -> Optional[str]:
413
+ """Validate a dictionary against the given schema.
414
+ This will also drop keys which are not valid as properties.
159
415
  """
160
416
  errors = {}
161
- properties = ensure_dict(data.get('properties'))
417
+ properties = cast(Dict[str, Any], ensure_dict(data.get("properties")))
162
418
  for name, prop in self.properties.items():
163
- values = properties.get(name)
419
+ values = ensure_list(properties.get(name, []))
164
420
  error = prop.validate(values)
421
+ if error is None and not len(values):
422
+ if prop.name in self.required:
423
+ error = gettext("Required")
165
424
  if error is not None:
166
425
  errors[name] = error
167
426
  if len(errors):
168
- raise InvalidData({'properties': errors})
169
-
170
- def to_dict(self):
171
- data = {
172
- 'label': self.label,
173
- 'plural': self.plural,
174
- 'icon': self.icon,
175
- 'uri': str(self.uri),
176
- 'abstract': self.abstract,
177
- 'matchable': self.matchable,
178
- 'description': self.description,
179
- 'featured': self.featured,
180
- 'properties': {}
427
+ msg = gettext("Entity validation failed")
428
+ raise InvalidData(msg, errors={"properties": errors})
429
+ return None
430
+
431
+ def to_dict(self) -> SchemaToDict:
432
+ """Return schema metadata, including all properties, in a serializable form."""
433
+ data: SchemaToDict = {
434
+ "label": self.label,
435
+ "plural": self.plural,
436
+ "schemata": list(sorted(self.names)),
437
+ "extends": list(sorted([e.name for e in self.extends])),
181
438
  }
439
+ if self.edge_source and self.edge_target and self.edge_label:
440
+ data["edge"] = {
441
+ "source": self.edge_source,
442
+ "target": self.edge_target,
443
+ "caption": self.edge_caption,
444
+ "label": self.edge_label,
445
+ "directed": self.edge_directed,
446
+ }
447
+ if len(self.temporal_start) or len(self.temporal_end):
448
+ data["temporalExtent"] = {
449
+ "start": self.temporal_start,
450
+ "end": self.temporal_end,
451
+ }
452
+ if len(self.featured):
453
+ data["featured"] = self.featured
454
+ if len(self.required):
455
+ data["required"] = self.required
456
+ if len(self.caption):
457
+ data["caption"] = self.caption
458
+ if self.description:
459
+ data["description"] = self.description
460
+ if self.abstract:
461
+ data["abstract"] = True
462
+ if self.hidden:
463
+ data["hidden"] = True
464
+ if self.generated:
465
+ data["generated"] = True
466
+ if self.matchable:
467
+ data["matchable"] = True
468
+ if self.deprecated:
469
+ data["deprecated"] = True
470
+ properties: Dict[str, PropertyToDict] = {}
182
471
  for name, prop in self.properties.items():
183
- data['properties'][name] = prop.to_dict()
472
+ if prop.schema == self:
473
+ properties[name] = prop.to_dict()
474
+ data["properties"] = properties
184
475
  return data
185
476
 
186
- def __eq__(self, other):
187
- return hash(other) == hash(self)
477
+ def __eq__(self, other: Any) -> bool:
478
+ """Compare two schemata (via hash)."""
479
+ try:
480
+ return self._hash == hash(other)
481
+ except AttributeError:
482
+ return False
483
+
484
+ def __lt__(self, other: Any) -> bool:
485
+ return self.name.__lt__(other.name)
188
486
 
189
- def __hash__(self):
190
- return hash(self.name)
487
+ def __hash__(self) -> int:
488
+ try:
489
+ return self._hash
490
+ except AttributeError:
491
+ return super().__hash__()
191
492
 
192
- def __repr__(self):
193
- return '<Schema(%r)>' % self.name
493
+ def __repr__(self) -> str:
494
+ return "<Schema(%r)>" % self.name