followthemoney 1.3.7__py3-none-any.whl → 3.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. followthemoney/__init__.py +5 -3
  2. followthemoney/cli/__init__.py +17 -0
  3. followthemoney/cli/aggregate.py +56 -0
  4. followthemoney/cli/cli.py +88 -0
  5. followthemoney/cli/exports.py +121 -0
  6. followthemoney/cli/mapping.py +85 -0
  7. followthemoney/cli/sieve.py +67 -0
  8. followthemoney/cli/util.py +142 -0
  9. followthemoney/compare.py +130 -60
  10. followthemoney/exc.py +19 -6
  11. followthemoney/export/common.py +29 -0
  12. followthemoney/export/csv.py +82 -0
  13. followthemoney/export/excel.py +75 -0
  14. followthemoney/export/graph.py +79 -0
  15. followthemoney/export/neo4j.py +182 -0
  16. followthemoney/export/rdf.py +26 -0
  17. followthemoney/graph.py +308 -0
  18. followthemoney/helpers.py +212 -0
  19. followthemoney/mapping/__init__.py +1 -1
  20. followthemoney/mapping/csv.py +67 -35
  21. followthemoney/mapping/entity.py +116 -44
  22. followthemoney/mapping/property.py +90 -44
  23. followthemoney/mapping/query.py +27 -19
  24. followthemoney/mapping/source.py +15 -5
  25. followthemoney/mapping/sql.py +75 -61
  26. followthemoney/messages.py +13 -7
  27. followthemoney/model.py +108 -56
  28. followthemoney/namespace.py +119 -0
  29. followthemoney/offshore.py +48 -0
  30. followthemoney/ontology.py +77 -0
  31. followthemoney/property.py +204 -71
  32. followthemoney/proxy.py +455 -118
  33. followthemoney/rdf.py +9 -0
  34. followthemoney/schema/Address.yaml +78 -0
  35. followthemoney/schema/Airplane.yaml +17 -10
  36. followthemoney/schema/Analyzable.yaml +54 -0
  37. followthemoney/schema/Article.yaml +16 -0
  38. followthemoney/schema/Assessment.yaml +32 -0
  39. followthemoney/schema/Asset.yaml +10 -4
  40. followthemoney/schema/Associate.yaml +41 -0
  41. followthemoney/schema/Audio.yaml +24 -0
  42. followthemoney/schema/BankAccount.yaml +53 -9
  43. followthemoney/schema/Call.yaml +48 -0
  44. followthemoney/schema/CallForTenders.yaml +117 -0
  45. followthemoney/schema/Company.yaml +37 -12
  46. followthemoney/schema/Contract.yaml +41 -7
  47. followthemoney/schema/ContractAward.yaml +30 -11
  48. followthemoney/schema/CourtCase.yaml +16 -10
  49. followthemoney/schema/CourtCaseParty.yaml +17 -6
  50. followthemoney/schema/CryptoWallet.yaml +48 -0
  51. followthemoney/schema/Debt.yaml +37 -0
  52. followthemoney/schema/Directorship.yaml +17 -4
  53. followthemoney/schema/Document.yaml +72 -139
  54. followthemoney/schema/Documentation.yml +38 -0
  55. followthemoney/schema/EconomicActivity.yaml +32 -17
  56. followthemoney/schema/Email.yaml +76 -0
  57. followthemoney/schema/Employment.yaml +39 -0
  58. followthemoney/schema/Event.yaml +35 -3
  59. followthemoney/schema/Family.yaml +41 -0
  60. followthemoney/schema/Folder.yaml +13 -0
  61. followthemoney/schema/HyperText.yaml +21 -0
  62. followthemoney/schema/Identification.yaml +40 -0
  63. followthemoney/schema/Image.yaml +25 -0
  64. followthemoney/schema/Interest.yaml +3 -6
  65. followthemoney/schema/Interval.yaml +56 -5
  66. followthemoney/schema/LegalEntity.yaml +81 -20
  67. followthemoney/schema/License.yaml +7 -3
  68. followthemoney/schema/Membership.yaml +19 -4
  69. followthemoney/schema/Mention.yaml +54 -0
  70. followthemoney/schema/Message.yaml +73 -0
  71. followthemoney/schema/Note.yaml +23 -0
  72. followthemoney/schema/Occupancy.yaml +40 -0
  73. followthemoney/schema/Organization.yaml +38 -3
  74. followthemoney/schema/Ownership.yaml +16 -4
  75. followthemoney/schema/Package.yaml +17 -0
  76. followthemoney/schema/Page.yaml +43 -0
  77. followthemoney/schema/Pages.yaml +23 -0
  78. followthemoney/schema/Passport.yaml +15 -17
  79. followthemoney/schema/Payment.yaml +38 -7
  80. followthemoney/schema/Person.yaml +61 -5
  81. followthemoney/schema/PlainText.yaml +17 -0
  82. followthemoney/schema/Position.yaml +50 -0
  83. followthemoney/schema/Post.yaml +42 -0
  84. followthemoney/schema/Project.yaml +27 -0
  85. followthemoney/schema/ProjectParticipant.yaml +36 -0
  86. followthemoney/schema/PublicBody.yaml +14 -3
  87. followthemoney/schema/RealEstate.yaml +19 -3
  88. followthemoney/schema/Representation.yaml +17 -6
  89. followthemoney/schema/Sanction.yaml +44 -20
  90. followthemoney/schema/Security.yaml +59 -0
  91. followthemoney/schema/Similar.yaml +37 -0
  92. followthemoney/schema/Succession.yaml +36 -0
  93. followthemoney/schema/Table.yaml +32 -0
  94. followthemoney/schema/TaxRoll.yaml +27 -9
  95. followthemoney/schema/Thing.yaml +69 -13
  96. followthemoney/schema/Trip.yaml +42 -0
  97. followthemoney/schema/UnknownLink.yaml +17 -6
  98. followthemoney/schema/UserAccount.yaml +44 -0
  99. followthemoney/schema/Value.yaml +5 -1
  100. followthemoney/schema/Vehicle.yaml +25 -8
  101. followthemoney/schema/Vessel.yaml +18 -10
  102. followthemoney/schema/Video.yaml +20 -0
  103. followthemoney/schema/Workbook.yaml +18 -0
  104. followthemoney/schema.py +406 -135
  105. followthemoney/translations/ar/LC_MESSAGES/followthemoney.mo +0 -0
  106. followthemoney/translations/ar/LC_MESSAGES/followthemoney.po +2900 -787
  107. followthemoney/translations/bs/LC_MESSAGES/followthemoney.mo +0 -0
  108. followthemoney/translations/bs/LC_MESSAGES/followthemoney.po +2108 -520
  109. followthemoney/translations/de/LC_MESSAGES/followthemoney.mo +0 -0
  110. followthemoney/translations/de/LC_MESSAGES/followthemoney.po +2902 -782
  111. followthemoney/translations/es/LC_MESSAGES/followthemoney.mo +0 -0
  112. followthemoney/translations/es/LC_MESSAGES/followthemoney.po +2893 -779
  113. followthemoney/translations/fr/LC_MESSAGES/followthemoney.mo +0 -0
  114. followthemoney/translations/fr/LC_MESSAGES/followthemoney.po +4362 -0
  115. followthemoney/translations/fr/followthemoney.po +3861 -0
  116. followthemoney/translations/messages.pot +3021 -725
  117. followthemoney/translations/nb/LC_MESSAGES/followthemoney.mo +0 -0
  118. followthemoney/translations/nb/LC_MESSAGES/followthemoney.po +3778 -0
  119. followthemoney/translations/nl/LC_MESSAGES/followthemoney.mo +0 -0
  120. followthemoney/translations/nl/LC_MESSAGES/followthemoney.po +3837 -0
  121. followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.mo +0 -0
  122. followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.po +3784 -0
  123. followthemoney/translations/ru/LC_MESSAGES/followthemoney.mo +0 -0
  124. followthemoney/translations/ru/LC_MESSAGES/followthemoney.po +2837 -539
  125. followthemoney/translations/ru/followthemoney.po +4221 -0
  126. followthemoney/translations/tr/LC_MESSAGES/followthemoney.mo +0 -0
  127. followthemoney/translations/tr/LC_MESSAGES/followthemoney.po +2073 -491
  128. followthemoney/types/__init__.py +35 -17
  129. followthemoney/types/address.py +41 -21
  130. followthemoney/types/checksum.py +25 -0
  131. followthemoney/types/common.py +233 -88
  132. followthemoney/types/country.py +89 -56
  133. followthemoney/types/date.py +59 -76
  134. followthemoney/types/email.py +66 -35
  135. followthemoney/types/entity.py +66 -13
  136. followthemoney/types/gender.py +66 -0
  137. followthemoney/types/iban.py +47 -28
  138. followthemoney/types/identifier.py +49 -22
  139. followthemoney/types/ip.py +35 -21
  140. followthemoney/types/json.py +58 -0
  141. followthemoney/types/language.py +124 -37
  142. followthemoney/types/mimetype.py +44 -0
  143. followthemoney/types/name.py +56 -12
  144. followthemoney/types/number.py +30 -0
  145. followthemoney/types/phone.py +92 -34
  146. followthemoney/types/registry.py +52 -0
  147. followthemoney/types/string.py +43 -0
  148. followthemoney/types/topic.py +94 -0
  149. followthemoney/types/url.py +39 -17
  150. followthemoney/util.py +139 -45
  151. followthemoney-3.8.0.dist-info/METADATA +153 -0
  152. followthemoney-3.8.0.dist-info/RECORD +157 -0
  153. {followthemoney-1.3.7.dist-info → followthemoney-3.8.0.dist-info}/WHEEL +1 -2
  154. followthemoney-3.8.0.dist-info/entry_points.txt +17 -0
  155. followthemoney-1.3.7.dist-info/LICENSE.txt → followthemoney-3.8.0.dist-info/licenses/LICENSE +1 -1
  156. followthemoney/link.py +0 -75
  157. followthemoney/schema/Associate.yml +0 -19
  158. followthemoney/schema/Family.yml +0 -19
  159. followthemoney/schema/Land.yml +0 -9
  160. followthemoney/schema/Relationship.yaml +0 -26
  161. followthemoney/types/domain.py +0 -50
  162. followthemoney-1.3.7.dist-info/DESCRIPTION.rst +0 -3
  163. followthemoney-1.3.7.dist-info/METADATA +0 -39
  164. followthemoney-1.3.7.dist-info/RECORD +0 -108
  165. followthemoney-1.3.7.dist-info/entry_points.txt +0 -3
  166. followthemoney-1.3.7.dist-info/metadata.json +0 -1
  167. followthemoney-1.3.7.dist-info/namespace_packages.txt +0 -1
  168. followthemoney-1.3.7.dist-info/top_level.txt +0 -3
  169. ns/ontology.py +0 -128
  170. tests/types/test_addresses.py +0 -24
  171. tests/types/test_common.py +0 -32
  172. tests/types/test_countries.py +0 -27
  173. tests/types/test_dates.py +0 -73
  174. tests/types/test_domains.py +0 -23
  175. tests/types/test_emails.py +0 -32
  176. tests/types/test_entity.py +0 -19
  177. tests/types/test_iban.py +0 -109
  178. tests/types/test_identifiers.py +0 -27
  179. tests/types/test_ip.py +0 -29
  180. tests/types/test_languages.py +0 -23
  181. tests/types/test_names.py +0 -33
  182. tests/types/test_phones.py +0 -24
  183. tests/types/test_registry.py +0 -14
  184. tests/types/test_urls.py +0 -23
  185. {ns → followthemoney/export}/__init__.py +0 -0
  186. /tests/types/__init__.py → /followthemoney/py.typed +0 -0
followthemoney/schema.py CHANGED
@@ -1,193 +1,464 @@
1
- from rdflib import URIRef
1
+ from typing import (
2
+ TYPE_CHECKING,
3
+ Any,
4
+ Dict,
5
+ List,
6
+ Optional,
7
+ Set,
8
+ TypedDict,
9
+ Union,
10
+ cast,
11
+ )
2
12
  from banal import ensure_list, ensure_dict, as_bool
13
+ from functools import lru_cache
3
14
 
4
- from followthemoney.property import Property
15
+ from followthemoney.property import Property, PropertySpec, PropertyToDict, ReverseSpec
5
16
  from followthemoney.types import registry
6
17
  from followthemoney.exc import InvalidData, InvalidModel
7
- from followthemoney.util import gettext, NAMESPACE
18
+ from followthemoney.rdf import URIRef, NS
19
+ from followthemoney.util import gettext
8
20
 
21
+ if TYPE_CHECKING:
22
+ from followthemoney.model import Model
9
23
 
10
- class Schema(object):
11
- """Defines the abstract data model.
12
24
 
13
- Schema items define the entities and links available in the model.
25
+ class EdgeSpec(TypedDict, total=False):
26
+ source: str
27
+ target: str
28
+ caption: List[str]
29
+ label: str
30
+ directed: bool
31
+
32
+
33
+ class TemporalExtentSpec(TypedDict, total=False):
34
+ start: List[str]
35
+ end: List[str]
36
+
37
+
38
+ class SchemaSpec(TypedDict, total=False):
39
+ label: str
40
+ plural: str
41
+ schemata: List[str]
42
+ extends: List[str]
43
+ properties: Dict[str, PropertySpec]
44
+ featured: List[str]
45
+ required: List[str]
46
+ caption: List[str]
47
+ edge: EdgeSpec
48
+ temporalExtent: TemporalExtentSpec
49
+ description: Optional[str]
50
+ rdf: Optional[str]
51
+ abstract: bool
52
+ hidden: bool
53
+ generated: bool
54
+ matchable: bool
55
+ deprecated: Optional[bool]
56
+
57
+
58
+ class SchemaToDict(TypedDict, total=False):
59
+ label: str
60
+ plural: str
61
+ schemata: List[str]
62
+ extends: List[str]
63
+ properties: Dict[str, PropertyToDict]
64
+ featured: List[str]
65
+ required: List[str]
66
+ caption: List[str]
67
+ edge: EdgeSpec
68
+ temporalExtent: TemporalExtentSpec
69
+ description: Optional[str]
70
+ abstract: bool
71
+ hidden: bool
72
+ generated: bool
73
+ matchable: bool
74
+ deprecated: bool
75
+
76
+
77
+ class Schema:
78
+ """A type definition for a class of entities that have certain properties.
79
+
80
+ Schemata are arranged in a multi-rooted hierarchy: each schema can have multiple
81
+ parent schemata from which it inherits all of their properties. A schema can also
82
+ have descendant child schemata, which, in turn, add further properties. Schemata
83
+ are usually accessed via the model, which holds all available definitions.
14
84
  """
15
85
 
16
- def __init__(self, model, name, data):
17
- self.model = model
86
+ __slots__ = (
87
+ "model",
88
+ "name",
89
+ "_label",
90
+ "_plural",
91
+ "_description",
92
+ "_hash",
93
+ "uri",
94
+ "abstract",
95
+ "hidden",
96
+ "generated",
97
+ "matchable",
98
+ "featured",
99
+ "required",
100
+ "deprecated",
101
+ "caption",
102
+ "edge",
103
+ "_edge_label",
104
+ "edge_directed",
105
+ "edge_source",
106
+ "edge_target",
107
+ "edge_caption",
108
+ "temporal_start",
109
+ "temporal_end",
110
+ "_extends",
111
+ "extends",
112
+ "schemata",
113
+ "names",
114
+ "descendants",
115
+ "properties",
116
+ "_matchable_schemata",
117
+ )
118
+
119
+ def __init__(self, model: "Model", name: str, data: SchemaSpec) -> None:
120
+ #: Machine-readable name of the schema, used for identification.
18
121
  self.name = name
19
- self.data = data
20
- self.icon = data.get('icon')
21
- self._label = data.get('label', name)
22
- self._plural = data.get('plural', self.label)
23
- self._description = data.get('description')
24
- self._extends = ensure_list(data.get('extends'))
25
- self.featured = ensure_list(data.get('featured'))
26
-
27
- self.uri = NAMESPACE[name]
28
- if 'rdf' in data:
29
- self.uri = URIRef(data.get('rdf'))
30
-
31
- # Do not show in listings:
32
- self.abstract = as_bool(data.get('abstract'), False)
33
-
34
- # Try to perform fuzzy matching. Fuzzy similarity search does not
35
- # make sense for entities which have a lot of similar names, such
36
- # as land plots, assets etc.
37
- self.matchable = as_bool(data.get('matchable'), True)
38
-
39
- self._own_properties = []
40
- for name, prop in data.get('properties', {}).items():
41
- self._own_properties.append(Property(self, name, prop))
42
-
43
- def generate(self):
44
- for prop in self._own_properties:
45
- prop.generate()
122
+ self.model = model
123
+ self._label = data.get("label", name)
124
+ self._plural = data.get("plural", self.label)
125
+ self._description = data.get("description")
126
+ self._hash = hash("<Schema(%r)>" % name)
127
+
128
+ #: RDF identifier for this schema when it is transformed to a triple term.
129
+ self.uri = URIRef(cast(str, data.get("rdf", NS[name])))
130
+
131
+ #: Do not store or emit entities of this type, it is used only for
132
+ #: inheritance.
133
+ self.abstract = as_bool(data.get("abstract"), False)
134
+
135
+ #: This schema is deprecated and should not be used.
136
+ self.deprecated = as_bool(data.get("deprecated", False))
137
+
138
+ #: Hide this schema in listings.
139
+ self.hidden = as_bool(data.get("hidden"), False)
140
+ self.hidden = self.hidden and not self.abstract
141
+
142
+ #: Entities with this type are generated by the system - for example, via
143
+ #: `ingest-file`. The user should not be offered an option to create them
144
+ #: in the interface.
145
+ self.generated = as_bool(data.get("generated"), False)
146
+
147
+ #: Try to perform fuzzy matching. Fuzzy similarity search does not
148
+ #: make sense for entities which have a lot of similar names, such
149
+ #: as land plots, assets etc.
150
+ self.matchable = as_bool(data.get("matchable"), True)
151
+
152
+ #: Mark a set of properties as important, i.e. they should be shown
153
+ #: first, or in an abridged view of the entity. In Aleph, these properties
154
+ #: are included in tabular entity listings.
155
+ self.featured = ensure_list(data.get("featured", []))
156
+
157
+ #: Mark a set of properties as required. This is applied only when
158
+ #: an entity is created by the user - bulk created entities will
159
+ #: slip through even if it is technically invalid.
160
+ self.required = ensure_list(data.get("required", []))
161
+
162
+ #: Mark a set of properties to be used for the entity's caption.
163
+ #: They will be checked in order and the first existent value will
164
+ #: be used.
165
+ self.caption = ensure_list(data.get("caption", []))
166
+
167
+ # A transform of the entity into an edge for its representation in
168
+ # the context of a property graph representation like Neo4J/Gephi.
169
+ edge = data.get("edge", {})
170
+ self.edge_source = edge.get("source")
171
+ self.edge_target = edge.get("target")
172
+
173
+ #: Flag to indicate if this schema should be represented by an edge (rather than
174
+ #: a node) when the data is converted into a property graph.
175
+ self.edge: bool = self.edge_source is not None and self.edge_target is not None
176
+ self.edge_caption = ensure_list(edge.get("caption", []))
177
+ self._edge_label = edge.get("label", self._label)
178
+
179
+ #: Flag to indicate if the edge should be presented as directed to the user,
180
+ #: e.g. by showing an error at the target end of the edge.
181
+ self.edge_directed = as_bool(edge.get("directed", True))
182
+
183
+ #: Specify which properties should be used to represent this schema in a
184
+ #: timeline.
185
+ temporal_extent = data.get("temporalExtent", {})
186
+ self.temporal_start = set(temporal_extent.get("start", []))
187
+ self.temporal_end = set(temporal_extent.get("end", []))
188
+
189
+ #: Direct parent schemata of this schema.
190
+ self._extends = ensure_list(data.get("extends", []))
191
+ self.extends: Set["Schema"] = set()
192
+
193
+ #: All parents of this schema (including indirect parents and the schema
194
+ #: itself).
195
+ self.schemata = set([self])
196
+
197
+ #: All names of :attr:`~schemata`.
198
+ self.names = set([self.name])
199
+
200
+ #: Inverse of :attr:`~schemata`, all derived child types of this schema
201
+ #: and their children.
202
+ self.descendants: Set["Schema"] = set()
203
+ self._matchable_schemata: Optional[Set["Schema"]] = None
204
+
205
+ #: The full list of properties defined for the entity, including those
206
+ #: inherited from parent schemata.
207
+ self.properties: Dict[str, Property] = {}
208
+ for name, prop in data.get("properties", {}).items():
209
+ self.properties[name] = Property(self, name, prop)
210
+
211
+ def generate(self, model: "Model") -> None:
212
+ """While loading the schema, this function will validate and
213
+ load the hierarchy, properties, and flags of the definition."""
214
+ for extends in self._extends:
215
+ parent = model.get(extends)
216
+ if parent is None:
217
+ raise InvalidData("Invalid extends: %r" % extends)
218
+ parent.generate(model)
219
+
220
+ for name, prop in parent.properties.items():
221
+ if name not in self.properties:
222
+ self.properties[name] = prop
223
+
224
+ self.extends.add(parent)
225
+ for ancestor in parent.schemata:
226
+ self.schemata.add(ancestor)
227
+ self.names.add(ancestor.name)
228
+ ancestor.descendants.add(self)
229
+
230
+ self.temporal_start |= parent.temporal_start
231
+ self.temporal_end |= parent.temporal_end
232
+
233
+ for prop in list(self.properties.values()):
234
+ prop.generate(model)
46
235
 
47
236
  for featured in self.featured:
48
237
  if self.get(featured) is None:
49
238
  raise InvalidModel("Missing featured property: %s" % featured)
50
239
 
51
- def _add_reverse(self, data, other):
52
- name = data.pop('name', None)
240
+ for caption in self.caption:
241
+ prop_ = self.get(caption)
242
+ if prop_ is None:
243
+ raise InvalidModel("Missing caption property: %s" % caption)
244
+ if prop_.type == registry.entity:
245
+ raise InvalidModel("Caption cannot be entity: %s" % caption)
246
+
247
+ for required in self.required:
248
+ if self.get(required) is None:
249
+ raise InvalidModel("Missing required property: %s" % required)
250
+
251
+ if self.edge:
252
+ if self.source_prop is None:
253
+ msg = "Missing edge source: %s" % self.edge_source
254
+ raise InvalidModel(msg)
255
+
256
+ if self.target_prop is None:
257
+ msg = "Missing edge target: %s" % self.edge_target
258
+ raise InvalidModel(msg)
259
+
260
+ def _add_reverse(
261
+ self, model: "Model", data: ReverseSpec, other: Property
262
+ ) -> Property:
263
+ name = data.get("name")
53
264
  if name is None:
54
265
  raise InvalidModel("Unnamed reverse: %s" % other)
55
266
 
56
267
  prop = self.get(name)
57
268
  if prop is None:
58
- data.update({
59
- 'type': 'entity',
60
- 'reverse': {'name': other.name},
61
- 'schema': other.schema.name
62
- })
63
- prop = Property(self, name, data, stub=True)
64
- prop.generate()
65
- self._own_properties.append(prop)
66
- self._flush_properties()
67
- assert prop.type == registry.entity, prop.type
269
+ spec: PropertySpec = {
270
+ "label": data.get("label"),
271
+ "type": registry.entity.name,
272
+ "reverse": {"name": other.name},
273
+ "range": other.schema.name,
274
+ "hidden": data.get("hidden", other.hidden),
275
+ }
276
+ prop = Property(self, name, spec)
277
+ prop.stub = True
278
+ prop.generate(model)
279
+ self.properties[name] = prop
68
280
  return prop
69
281
 
70
282
  @property
71
- def label(self):
283
+ def label(self) -> str:
284
+ """User-facing name of the schema."""
72
285
  return gettext(self._label)
73
286
 
74
287
  @property
75
- def plural(self):
288
+ def plural(self) -> str:
289
+ """Name of the schema to be used in plural constructions."""
76
290
  return gettext(self._plural)
77
291
 
78
292
  @property
79
- def description(self):
293
+ def description(self) -> Optional[str]:
294
+ """A longer description of the semantics of the schema."""
80
295
  return gettext(self._description)
81
296
 
82
297
  @property
83
- def extends(self):
84
- """Return the inherited schemata."""
85
- for base in self._extends:
86
- basecls = self.model.get(base)
87
- if basecls is None:
88
- raise InvalidModel("No such schema: %s" % base)
89
- yield basecls
298
+ def edge_label(self) -> Optional[str]:
299
+ """Description label for edges derived from entities of this schema."""
300
+ return gettext(self._edge_label)
90
301
 
91
302
  @property
92
- def schemata(self):
93
- """Return the full inheritance chain."""
94
- seen = set([self])
95
- yield self
96
- for base in self.extends:
97
- for schema in base.schemata:
98
- if schema not in seen:
99
- seen.add(schema)
100
- yield schema
303
+ def source_prop(self) -> Optional[Property]:
304
+ """The entity property to be used as an edge source."""
305
+ return self.get(self.edge_source)
101
306
 
102
307
  @property
103
- def descendants(self):
104
- for schema in self.model:
105
- if schema == self:
106
- continue
107
- if self in schema.schemata:
108
- yield schema
308
+ def target_prop(self) -> Optional[Property]:
309
+ """The entity property to be used as an edge target."""
310
+ return self.get(self.edge_target)
109
311
 
110
312
  @property
111
- def matchable_schemata(self):
112
- """The set of comparable types."""
113
- if not self.matchable:
114
- return
115
- # This is used by the cross-referencer to determine what
116
- # other schemata should be considered for matches. For
117
- # example, a Company may be compared to a Legal Entity,
118
- # but it makes no sense to compare it to an Aircraft.
119
- matchable = set(self.schemata)
120
- for schema in self.descendants:
121
- matchable.add(schema)
122
- for schema in matchable:
123
- if schema.matchable:
124
- yield schema
313
+ def temporal_start_props(self) -> Set[Property]:
314
+ """The entity properties to be used as the start when representing the entity
315
+ in a timeline."""
316
+ props = [self.get(prop_name) for prop_name in self.temporal_start]
317
+ return set([prop for prop in props if prop is not None])
125
318
 
126
319
  @property
127
- def names(self):
128
- return [s.name for s in self.schemata]
320
+ def temporal_end_props(self) -> Set[Property]:
321
+ """The entity properties to be used as the end when representing the entity
322
+ in a timeline."""
323
+ props = [self.get(prop_name) for prop_name in self.temporal_end]
324
+ return set([prop for prop in props if prop is not None])
129
325
 
130
- def is_a(self, parent):
131
- for schema in self.schemata:
132
- if schema == parent:
133
- return True
134
- return False
326
+ @property
327
+ def sorted_properties(self) -> List[Property]:
328
+ """All properties of the schema in the order in which they should be shown
329
+ to the user (alphabetically, with captions and featured properties first)."""
330
+ return sorted(
331
+ self.properties.values(),
332
+ key=lambda p: (
333
+ p.name not in self.caption,
334
+ p.name not in self.featured,
335
+ p.label,
336
+ ),
337
+ )
135
338
 
136
339
  @property
137
- def properties(self):
138
- """Return properties, those defined locally and in ancestors."""
139
- if not hasattr(self, '_properties') or self._properties is None:
140
- self._properties = {}
141
- for schema in self.extends:
142
- for name, prop in schema.properties.items():
143
- self._properties[name] = prop
144
- for prop in self._own_properties:
145
- self._properties[prop.name] = prop
146
- return self._properties
147
-
148
- def _flush_properties(self):
149
- for schema in self.descendants:
150
- schema._flush_properties()
151
- self._properties = None
152
-
153
- def get(self, name):
340
+ def matchable_schemata(self) -> Set["Schema"]:
341
+ """Return the set of schemata to which it makes sense to compare with this
342
+ schema. For example, it makes sense to compare a legal entity with a company,
343
+ but it does not make sense to compare a car and a person."""
344
+ if self._matchable_schemata is None:
345
+ self._matchable_schemata = set()
346
+ if self.matchable:
347
+ # This is used by the cross-referencer to determine what
348
+ # other schemata should be considered for matches. For
349
+ # example, a Company may be compared to a Legal Entity,
350
+ # but it makes no sense to compare it to an Aircraft.
351
+ candidates = set(self.schemata)
352
+ candidates.update(self.descendants)
353
+ for schema in candidates:
354
+ if schema.matchable:
355
+ self._matchable_schemata.add(schema)
356
+ return self._matchable_schemata
357
+
358
+ def can_match(self, other: "Schema") -> bool:
359
+ """Check if an schema can match with another schema."""
360
+ return other in self.matchable_schemata
361
+
362
+ @lru_cache(maxsize=None)
363
+ def is_a(self, other: Union[str, "Schema"]) -> bool:
364
+ """Check if the schema or one of its parents is the same as the given
365
+ candidate ``other``."""
366
+ if not isinstance(other, str):
367
+ other = other.name
368
+ return other in self.names
369
+
370
+ def get(self, name: Optional[str]) -> Optional[Property]:
371
+ """Retrieve a property defined for this schema by its name."""
372
+ if name is None:
373
+ return None
154
374
  return self.properties.get(name)
155
375
 
156
- def validate(self, data):
157
- """Validate a dataset against the given schema.
158
- This will also drop keys which are not present as properties.
376
+ def validate(self, data: Any) -> Optional[str]:
377
+ """Validate a dictionary against the given schema.
378
+ This will also drop keys which are not valid as properties.
159
379
  """
160
380
  errors = {}
161
- properties = ensure_dict(data.get('properties'))
381
+ properties = cast(Dict[str, Any], ensure_dict(data.get("properties")))
162
382
  for name, prop in self.properties.items():
163
- values = properties.get(name)
383
+ values = ensure_list(properties.get(name, []))
164
384
  error = prop.validate(values)
385
+ if error is None and not len(values):
386
+ if prop.name in self.required:
387
+ error = gettext("Required")
165
388
  if error is not None:
166
389
  errors[name] = error
167
390
  if len(errors):
168
- raise InvalidData({'properties': errors})
169
-
170
- def to_dict(self):
171
- data = {
172
- 'label': self.label,
173
- 'plural': self.plural,
174
- 'icon': self.icon,
175
- 'uri': str(self.uri),
176
- 'abstract': self.abstract,
177
- 'matchable': self.matchable,
178
- 'description': self.description,
179
- 'featured': self.featured,
180
- 'properties': {}
391
+ msg = gettext("Entity validation failed")
392
+ raise InvalidData(msg, errors={"properties": errors})
393
+ return None
394
+
395
+ def to_dict(self) -> SchemaToDict:
396
+ """Return schema metadata, including all properties, in a serializable form."""
397
+ data: SchemaToDict = {
398
+ "label": self.label,
399
+ "plural": self.plural,
400
+ "schemata": list(sorted(self.names)),
401
+ "extends": list(sorted([e.name for e in self.extends])),
181
402
  }
403
+ if self.edge_source and self.edge_target and self.edge_label:
404
+ data["edge"] = {
405
+ "source": self.edge_source,
406
+ "target": self.edge_target,
407
+ "caption": self.edge_caption,
408
+ "label": self.edge_label,
409
+ "directed": self.edge_directed,
410
+ }
411
+ start_props = [
412
+ prop.name for prop in self.temporal_start_props if prop.schema == self
413
+ ]
414
+ end_props = [
415
+ prop.name for prop in self.temporal_end_props if prop.schema == self
416
+ ]
417
+ if start_props or end_props:
418
+ data["temporalExtent"] = {
419
+ "start": sorted(start_props),
420
+ "end": sorted(end_props),
421
+ }
422
+ if len(self.featured):
423
+ data["featured"] = self.featured
424
+ if len(self.required):
425
+ data["required"] = self.required
426
+ if len(self.caption):
427
+ data["caption"] = self.caption
428
+ if self.description:
429
+ data["description"] = self.description
430
+ if self.abstract:
431
+ data["abstract"] = True
432
+ if self.hidden:
433
+ data["hidden"] = True
434
+ if self.generated:
435
+ data["generated"] = True
436
+ if self.matchable:
437
+ data["matchable"] = True
438
+ if self.deprecated:
439
+ data["deprecated"] = True
440
+ properties: Dict[str, PropertyToDict] = {}
182
441
  for name, prop in self.properties.items():
183
- data['properties'][name] = prop.to_dict()
442
+ if prop.schema == self:
443
+ properties[name] = prop.to_dict()
444
+ data["properties"] = properties
184
445
  return data
185
446
 
186
- def __eq__(self, other):
187
- return hash(other) == hash(self)
447
+ def __eq__(self, other: Any) -> bool:
448
+ """Compare two schemata (via hash)."""
449
+ try:
450
+ return self._hash == hash(other)
451
+ except AttributeError:
452
+ return False
453
+
454
+ def __lt__(self, other: Any) -> bool:
455
+ return self.name.__lt__(other.name)
188
456
 
189
- def __hash__(self):
190
- return hash(self.name)
457
+ def __hash__(self) -> int:
458
+ try:
459
+ return self._hash
460
+ except AttributeError:
461
+ return super().__hash__()
191
462
 
192
- def __repr__(self):
193
- return '<Schema(%r)>' % self.name
463
+ def __repr__(self) -> str:
464
+ return "<Schema(%r)>" % self.name