followthemoney 1.3.7__py3-none-any.whl → 3.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. followthemoney/__init__.py +5 -3
  2. followthemoney/cli/__init__.py +17 -0
  3. followthemoney/cli/aggregate.py +56 -0
  4. followthemoney/cli/cli.py +88 -0
  5. followthemoney/cli/exports.py +121 -0
  6. followthemoney/cli/mapping.py +85 -0
  7. followthemoney/cli/sieve.py +67 -0
  8. followthemoney/cli/util.py +142 -0
  9. followthemoney/compare.py +130 -60
  10. followthemoney/exc.py +19 -6
  11. followthemoney/export/common.py +29 -0
  12. followthemoney/export/csv.py +82 -0
  13. followthemoney/export/excel.py +75 -0
  14. followthemoney/export/graph.py +79 -0
  15. followthemoney/export/neo4j.py +182 -0
  16. followthemoney/export/rdf.py +26 -0
  17. followthemoney/graph.py +308 -0
  18. followthemoney/helpers.py +212 -0
  19. followthemoney/mapping/__init__.py +1 -1
  20. followthemoney/mapping/csv.py +67 -35
  21. followthemoney/mapping/entity.py +116 -44
  22. followthemoney/mapping/property.py +90 -44
  23. followthemoney/mapping/query.py +27 -19
  24. followthemoney/mapping/source.py +15 -5
  25. followthemoney/mapping/sql.py +75 -61
  26. followthemoney/messages.py +13 -7
  27. followthemoney/model.py +108 -56
  28. followthemoney/namespace.py +119 -0
  29. followthemoney/offshore.py +48 -0
  30. followthemoney/ontology.py +77 -0
  31. followthemoney/property.py +204 -71
  32. followthemoney/proxy.py +455 -118
  33. followthemoney/rdf.py +9 -0
  34. followthemoney/schema/Address.yaml +78 -0
  35. followthemoney/schema/Airplane.yaml +17 -10
  36. followthemoney/schema/Analyzable.yaml +54 -0
  37. followthemoney/schema/Article.yaml +16 -0
  38. followthemoney/schema/Assessment.yaml +32 -0
  39. followthemoney/schema/Asset.yaml +10 -4
  40. followthemoney/schema/Associate.yaml +41 -0
  41. followthemoney/schema/Audio.yaml +24 -0
  42. followthemoney/schema/BankAccount.yaml +53 -9
  43. followthemoney/schema/Call.yaml +48 -0
  44. followthemoney/schema/CallForTenders.yaml +117 -0
  45. followthemoney/schema/Company.yaml +37 -12
  46. followthemoney/schema/Contract.yaml +41 -7
  47. followthemoney/schema/ContractAward.yaml +30 -11
  48. followthemoney/schema/CourtCase.yaml +16 -10
  49. followthemoney/schema/CourtCaseParty.yaml +17 -6
  50. followthemoney/schema/CryptoWallet.yaml +48 -0
  51. followthemoney/schema/Debt.yaml +37 -0
  52. followthemoney/schema/Directorship.yaml +17 -4
  53. followthemoney/schema/Document.yaml +72 -139
  54. followthemoney/schema/Documentation.yml +38 -0
  55. followthemoney/schema/EconomicActivity.yaml +32 -17
  56. followthemoney/schema/Email.yaml +76 -0
  57. followthemoney/schema/Employment.yaml +39 -0
  58. followthemoney/schema/Event.yaml +35 -3
  59. followthemoney/schema/Family.yaml +41 -0
  60. followthemoney/schema/Folder.yaml +13 -0
  61. followthemoney/schema/HyperText.yaml +21 -0
  62. followthemoney/schema/Identification.yaml +40 -0
  63. followthemoney/schema/Image.yaml +25 -0
  64. followthemoney/schema/Interest.yaml +3 -6
  65. followthemoney/schema/Interval.yaml +56 -5
  66. followthemoney/schema/LegalEntity.yaml +81 -20
  67. followthemoney/schema/License.yaml +7 -3
  68. followthemoney/schema/Membership.yaml +19 -4
  69. followthemoney/schema/Mention.yaml +54 -0
  70. followthemoney/schema/Message.yaml +73 -0
  71. followthemoney/schema/Note.yaml +23 -0
  72. followthemoney/schema/Occupancy.yaml +40 -0
  73. followthemoney/schema/Organization.yaml +38 -3
  74. followthemoney/schema/Ownership.yaml +16 -4
  75. followthemoney/schema/Package.yaml +17 -0
  76. followthemoney/schema/Page.yaml +43 -0
  77. followthemoney/schema/Pages.yaml +23 -0
  78. followthemoney/schema/Passport.yaml +15 -17
  79. followthemoney/schema/Payment.yaml +38 -7
  80. followthemoney/schema/Person.yaml +61 -5
  81. followthemoney/schema/PlainText.yaml +17 -0
  82. followthemoney/schema/Position.yaml +50 -0
  83. followthemoney/schema/Post.yaml +42 -0
  84. followthemoney/schema/Project.yaml +27 -0
  85. followthemoney/schema/ProjectParticipant.yaml +36 -0
  86. followthemoney/schema/PublicBody.yaml +14 -3
  87. followthemoney/schema/RealEstate.yaml +19 -3
  88. followthemoney/schema/Representation.yaml +17 -6
  89. followthemoney/schema/Sanction.yaml +44 -20
  90. followthemoney/schema/Security.yaml +59 -0
  91. followthemoney/schema/Similar.yaml +37 -0
  92. followthemoney/schema/Succession.yaml +36 -0
  93. followthemoney/schema/Table.yaml +32 -0
  94. followthemoney/schema/TaxRoll.yaml +27 -9
  95. followthemoney/schema/Thing.yaml +69 -13
  96. followthemoney/schema/Trip.yaml +42 -0
  97. followthemoney/schema/UnknownLink.yaml +17 -6
  98. followthemoney/schema/UserAccount.yaml +44 -0
  99. followthemoney/schema/Value.yaml +5 -1
  100. followthemoney/schema/Vehicle.yaml +25 -8
  101. followthemoney/schema/Vessel.yaml +18 -10
  102. followthemoney/schema/Video.yaml +20 -0
  103. followthemoney/schema/Workbook.yaml +18 -0
  104. followthemoney/schema.py +406 -135
  105. followthemoney/translations/ar/LC_MESSAGES/followthemoney.mo +0 -0
  106. followthemoney/translations/ar/LC_MESSAGES/followthemoney.po +2900 -787
  107. followthemoney/translations/bs/LC_MESSAGES/followthemoney.mo +0 -0
  108. followthemoney/translations/bs/LC_MESSAGES/followthemoney.po +2108 -520
  109. followthemoney/translations/de/LC_MESSAGES/followthemoney.mo +0 -0
  110. followthemoney/translations/de/LC_MESSAGES/followthemoney.po +2902 -782
  111. followthemoney/translations/es/LC_MESSAGES/followthemoney.mo +0 -0
  112. followthemoney/translations/es/LC_MESSAGES/followthemoney.po +2893 -779
  113. followthemoney/translations/fr/LC_MESSAGES/followthemoney.mo +0 -0
  114. followthemoney/translations/fr/LC_MESSAGES/followthemoney.po +4362 -0
  115. followthemoney/translations/fr/followthemoney.po +3861 -0
  116. followthemoney/translations/messages.pot +3021 -725
  117. followthemoney/translations/nb/LC_MESSAGES/followthemoney.mo +0 -0
  118. followthemoney/translations/nb/LC_MESSAGES/followthemoney.po +3778 -0
  119. followthemoney/translations/nl/LC_MESSAGES/followthemoney.mo +0 -0
  120. followthemoney/translations/nl/LC_MESSAGES/followthemoney.po +3837 -0
  121. followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.mo +0 -0
  122. followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.po +3784 -0
  123. followthemoney/translations/ru/LC_MESSAGES/followthemoney.mo +0 -0
  124. followthemoney/translations/ru/LC_MESSAGES/followthemoney.po +2837 -539
  125. followthemoney/translations/ru/followthemoney.po +4221 -0
  126. followthemoney/translations/tr/LC_MESSAGES/followthemoney.mo +0 -0
  127. followthemoney/translations/tr/LC_MESSAGES/followthemoney.po +2073 -491
  128. followthemoney/types/__init__.py +35 -17
  129. followthemoney/types/address.py +41 -21
  130. followthemoney/types/checksum.py +25 -0
  131. followthemoney/types/common.py +233 -88
  132. followthemoney/types/country.py +89 -56
  133. followthemoney/types/date.py +59 -76
  134. followthemoney/types/email.py +66 -35
  135. followthemoney/types/entity.py +66 -13
  136. followthemoney/types/gender.py +66 -0
  137. followthemoney/types/iban.py +47 -28
  138. followthemoney/types/identifier.py +49 -22
  139. followthemoney/types/ip.py +35 -21
  140. followthemoney/types/json.py +58 -0
  141. followthemoney/types/language.py +124 -37
  142. followthemoney/types/mimetype.py +44 -0
  143. followthemoney/types/name.py +56 -12
  144. followthemoney/types/number.py +30 -0
  145. followthemoney/types/phone.py +92 -34
  146. followthemoney/types/registry.py +52 -0
  147. followthemoney/types/string.py +43 -0
  148. followthemoney/types/topic.py +94 -0
  149. followthemoney/types/url.py +39 -17
  150. followthemoney/util.py +139 -45
  151. followthemoney-3.8.0.dist-info/METADATA +153 -0
  152. followthemoney-3.8.0.dist-info/RECORD +157 -0
  153. {followthemoney-1.3.7.dist-info → followthemoney-3.8.0.dist-info}/WHEEL +1 -2
  154. followthemoney-3.8.0.dist-info/entry_points.txt +17 -0
  155. followthemoney-1.3.7.dist-info/LICENSE.txt → followthemoney-3.8.0.dist-info/licenses/LICENSE +1 -1
  156. followthemoney/link.py +0 -75
  157. followthemoney/schema/Associate.yml +0 -19
  158. followthemoney/schema/Family.yml +0 -19
  159. followthemoney/schema/Land.yml +0 -9
  160. followthemoney/schema/Relationship.yaml +0 -26
  161. followthemoney/types/domain.py +0 -50
  162. followthemoney-1.3.7.dist-info/DESCRIPTION.rst +0 -3
  163. followthemoney-1.3.7.dist-info/METADATA +0 -39
  164. followthemoney-1.3.7.dist-info/RECORD +0 -108
  165. followthemoney-1.3.7.dist-info/entry_points.txt +0 -3
  166. followthemoney-1.3.7.dist-info/metadata.json +0 -1
  167. followthemoney-1.3.7.dist-info/namespace_packages.txt +0 -1
  168. followthemoney-1.3.7.dist-info/top_level.txt +0 -3
  169. ns/ontology.py +0 -128
  170. tests/types/test_addresses.py +0 -24
  171. tests/types/test_common.py +0 -32
  172. tests/types/test_countries.py +0 -27
  173. tests/types/test_dates.py +0 -73
  174. tests/types/test_domains.py +0 -23
  175. tests/types/test_emails.py +0 -32
  176. tests/types/test_entity.py +0 -19
  177. tests/types/test_iban.py +0 -109
  178. tests/types/test_identifiers.py +0 -27
  179. tests/types/test_ip.py +0 -29
  180. tests/types/test_languages.py +0 -23
  181. tests/types/test_names.py +0 -33
  182. tests/types/test_phones.py +0 -24
  183. tests/types/test_registry.py +0 -14
  184. tests/types/test_urls.py +0 -23
  185. {ns → followthemoney/export}/__init__.py +0 -0
  186. /tests/types/__init__.py → /followthemoney/py.typed +0 -0
followthemoney/proxy.py CHANGED
@@ -1,171 +1,508 @@
1
- from hashlib import sha1
2
- from banal import ensure_list, is_mapping
3
- from normality import stringify
1
+ import logging
2
+ from typing import (
3
+ TYPE_CHECKING,
4
+ Any,
5
+ Dict,
6
+ Generator,
7
+ List,
8
+ Optional,
9
+ Set,
10
+ Tuple,
11
+ Union,
12
+ Type,
13
+ TypeVar,
14
+ cast,
15
+ )
16
+ import warnings
17
+ from itertools import product
18
+ from banal import ensure_dict
4
19
 
5
20
  from followthemoney.exc import InvalidData
6
21
  from followthemoney.types import registry
22
+ from followthemoney.types.common import PropertyType
7
23
  from followthemoney.property import Property
8
- from followthemoney.link import Link
9
- from followthemoney.util import key_bytes, gettext
24
+ from followthemoney.rdf import SKOS, RDF, Literal, URIRef, Identifier
25
+ from followthemoney.util import sanitize_text, gettext
26
+ from followthemoney.util import merge_context, value_list, make_entity_id
27
+
28
+ if TYPE_CHECKING:
29
+ from followthemoney.model import Model
30
+
31
+ log = logging.getLogger(__name__)
32
+ P = Union[Property, str]
33
+ Triple = Tuple[Identifier, Identifier, Identifier]
34
+ E = TypeVar("E", bound="EntityProxy")
10
35
 
11
36
 
12
37
  class EntityProxy(object):
13
38
  """A wrapper object for an entity, with utility functions for the
14
- introspection and manipulation of its properties."""
15
- __slots__ = ['schema', 'id', 'key_prefix', '_properties',
16
- '_data', 'countries', 'names']
39
+ introspection and manipulation of its properties.
40
+
41
+ This is the main working object in the library, used to generate, validate
42
+ and emit data."""
43
+
44
+ __slots__ = ["schema", "id", "key_prefix", "context", "_properties", "_size"]
17
45
 
18
- def __init__(self, schema, id, properties, key_prefix=None):
46
+ def __init__(
47
+ self,
48
+ model: "Model",
49
+ data: Dict[str, Any],
50
+ key_prefix: Optional[str] = None,
51
+ cleaned: bool = True,
52
+ ):
53
+ data = dict(data or {})
54
+ properties = data.pop("properties", {})
55
+ if not cleaned:
56
+ properties = ensure_dict(properties)
57
+
58
+ #: The schema definition for this entity, which implies the properties
59
+ #: That can be set on it.
60
+ schema = model.get(data.pop("schema", None))
61
+ if schema is None:
62
+ raise InvalidData(gettext("No schema for entity."))
19
63
  self.schema = schema
20
- self.id = stringify(id)
21
- self.key_prefix = stringify(key_prefix)
22
- self.countries = set()
23
- self.names = set()
24
- self._properties = {}
25
-
26
- if is_mapping(properties):
27
- for key, value in properties.items():
28
- self.add(key, value, cleaned=True, quiet=True)
29
-
30
- def make_id(self, *parts):
31
- digest = sha1()
32
- if self.key_prefix:
33
- digest.update(key_bytes(self.key_prefix))
34
- base = digest.digest()
35
- for part in parts:
36
- digest.update(key_bytes(part))
37
- if digest.digest() == base:
38
- self.id = None
39
- return
40
- self.id = digest.hexdigest()
64
+
65
+ #: When using :meth:`~make_id` to generate a natural key for this entity,
66
+ #: the prefix will be added to the ID as a salt to make it easier to keep
67
+ #: IDs unique across datasets. This is somewhat redundant following the
68
+ #: introduction of :class:`~followthemoney.namespace.Namespace`.
69
+ self.key_prefix = key_prefix
70
+
71
+ #: A unique identifier for this entity, usually a hashed natural key,
72
+ #: a UUID, or a very simple slug. Can be signed using a
73
+ #: :class:`~followthemoney.namespace.Namespace`.
74
+ self.id = data.pop("id", None)
75
+ if not cleaned:
76
+ self.id = sanitize_text(self.id)
77
+
78
+ #: If the input dictionary for the entity proxy contains fields other
79
+ #: than ``id``, ``schema`` or ``properties``, they will be kept in here
80
+ #: and re-added upon serialization.
81
+ self.context = data
82
+ self._properties: Dict[str, List[str]] = {}
83
+ self._size = 0
84
+
85
+ for key, values in properties.items():
86
+ if key not in self.schema.properties:
87
+ continue
88
+ if cleaned:
89
+ # This does not call `self.add` as it might be called millions of times
90
+ # in some context and we want to avoid the performance overhead of
91
+ # doing so.
92
+ seen: Set[str] = set()
93
+ seen_add = seen.add
94
+ unique_values = [v for v in values if not (v in seen or seen_add(v))]
95
+ self._properties[key] = unique_values
96
+ self._size += sum([len(v) for v in unique_values])
97
+ else:
98
+ self.add(key, values, quiet=True)
99
+
100
+ def make_id(self, *parts: Any) -> Optional[str]:
101
+ """Generate a (hopefully unique) ID for the given entity, composed
102
+ of the given components, and the :attr:`~key_prefix` defined in
103
+ the proxy.
104
+ """
105
+ self.id = make_entity_id(*parts, key_prefix=self.key_prefix)
41
106
  return self.id
42
107
 
43
- def _get_prop(self, prop, quiet=False):
44
- if isinstance(prop, Property):
45
- return prop
46
- if prop not in self.schema.properties:
108
+ def _prop_name(self, prop: P, quiet: bool = False) -> Optional[str]:
109
+ # This is pretty unwound because it gets called a *lot*.
110
+ if prop in self.schema.properties:
111
+ return cast(str, prop)
112
+ try:
113
+ obj = cast(Property, prop)
114
+ if obj.name in self.schema.properties:
115
+ return obj.name
116
+ except AttributeError:
117
+ pass
118
+ if quiet:
119
+ return None
120
+ msg = gettext("Unknown property (%s): %s")
121
+ raise InvalidData(msg % (self.schema, prop))
122
+
123
+ def get(self, prop: P, quiet: bool = False) -> List[str]:
124
+ """Get all values of a property.
125
+
126
+ :param prop: can be given as a name or an instance of
127
+ :class:`~followthemoney.property.Property`.
128
+ :param quiet: a reference to an non-existent property will return
129
+ an empty list instead of raising an error.
130
+ :return: A list of values.
131
+ """
132
+ prop_name = self._prop_name(prop, quiet=quiet)
133
+ if prop_name is None:
134
+ return []
135
+ return self._properties.get(prop_name, [])
136
+
137
+ def first(self, prop: P, quiet: bool = False) -> Optional[str]:
138
+ """Get only the first value set for the property.
139
+
140
+ :param prop: can be given as a name or an instance of
141
+ :class:`~followthemoney.property.Property`.
142
+ :param quiet: a reference to an non-existent property will return
143
+ an empty list instead of raising an error.
144
+ :return: A value, or ``None``.
145
+ """
146
+ for value in self.get(prop, quiet=quiet):
147
+ return value
148
+ return None
149
+
150
+ def has(self, prop: P, quiet: bool = False) -> bool:
151
+ """Check to see if the given property has at least one value set.
152
+
153
+ :param prop: can be given as a name or an instance of
154
+ :class:`~followthemoney.property.Property`.
155
+ :param quiet: a reference to an non-existent property will return
156
+ an empty list instead of raising an error.
157
+ :return: a boolean.
158
+ """
159
+ prop_name = self._prop_name(prop, quiet=quiet)
160
+ return prop_name in self._properties
161
+
162
+ def add(
163
+ self,
164
+ prop: P,
165
+ values: Any,
166
+ cleaned: bool = False,
167
+ quiet: bool = False,
168
+ fuzzy: bool = False,
169
+ format: Optional[str] = None,
170
+ ) -> None:
171
+ """Add the given value(s) to the property if they are valid for
172
+ the type of the property.
173
+
174
+ :param prop: can be given as a name or an instance of
175
+ :class:`~followthemoney.property.Property`.
176
+ :param values: either a single value, or a list of values to be added.
177
+ :param cleaned: should the data be normalised before adding it.
178
+ :param quiet: a reference to an non-existent property will return
179
+ an empty list instead of raising an error.
180
+ :param fuzzy: when normalising the data, should fuzzy matching be allowed.
181
+ :param format: when normalising the data, formatting for a date.
182
+ """
183
+ prop_name = self._prop_name(prop, quiet=quiet)
184
+ if prop_name is None:
185
+ return None
186
+ prop = self.schema.properties[prop_name]
187
+
188
+ # Don't allow setting the reverse properties:
189
+ if prop.stub:
47
190
  if quiet:
48
- return
49
- msg = gettext("Unknown property (%s): %s")
191
+ return None
192
+ msg = gettext("Stub property (%s): %s")
50
193
  raise InvalidData(msg % (self.schema, prop))
51
- return self.schema.get(prop)
52
194
 
53
- def get(self, prop, quiet=False):
54
- prop = self._get_prop(prop, quiet=quiet)
55
- if prop is None or prop not in self._properties:
56
- return []
57
- return list(self._properties.get(prop))
195
+ for value in value_list(values):
196
+ if not cleaned:
197
+ format = format or prop.format
198
+ value = prop.type.clean(value, proxy=self, fuzzy=fuzzy, format=format)
199
+ self.unsafe_add(prop, value, cleaned=True)
200
+ return None
201
+
202
+ def unsafe_add(
203
+ self,
204
+ prop: Property,
205
+ value: Optional[str],
206
+ cleaned: bool = False,
207
+ fuzzy: bool = False,
208
+ format: Optional[str] = None,
209
+ ) -> Optional[str]:
210
+ """A version of `add()` to be used only in type-checking code. This accepts
211
+ only a single value, and performs input cleaning on the premise that the
212
+ value is already valid unicode. Returns the value that has been added."""
213
+ if not cleaned and value is not None:
214
+ format = format or prop.format
215
+ value = prop.type.clean_text(value, fuzzy=fuzzy, format=format, proxy=self)
216
+
217
+ if value is None:
218
+ return None
219
+
220
+ # Somewhat hacky: limit the maximum size of any particular
221
+ # field to avoid overloading upstream aleph/elasticsearch.
222
+ value_size = len(value)
223
+ if prop.type.total_size is not None:
224
+ if self._size + value_size > prop.type.total_size:
225
+ # msg = "[%s] too large. Rejecting additional values."
226
+ # log.warning(msg, prop.name)
227
+ return None
228
+ self._size += value_size
229
+ self._properties.setdefault(prop.name, list())
230
+
231
+ if value not in self._properties[prop.name]:
232
+ self._properties[prop.name].append(value)
233
+
234
+ return value
235
+
236
+ def set(
237
+ self,
238
+ prop: P,
239
+ values: Any,
240
+ cleaned: bool = False,
241
+ quiet: bool = False,
242
+ fuzzy: bool = False,
243
+ format: Optional[str] = None,
244
+ ) -> None:
245
+ """Replace the values of the property with the given value(s).
58
246
 
59
- def add(self, prop, values, cleaned=False, quiet=False):
60
- prop = self._get_prop(prop, quiet=quiet)
61
- if prop is None:
247
+ :param prop: can be given as a name or an instance of
248
+ :class:`~followthemoney.property.Property`.
249
+ :param values: either a single value, or a list of values to be added.
250
+ :param cleaned: should the data be normalised before adding it.
251
+ :param quiet: a reference to an non-existent property will return
252
+ an empty list instead of raising an error.
253
+ """
254
+ prop_name = self._prop_name(prop, quiet=quiet)
255
+ if prop_name is None:
62
256
  return
63
- for value in ensure_list(values):
64
- if not cleaned:
65
- value = prop.type.clean(value, countries=self.countries)
66
- if value is None:
67
- continue
68
- if prop not in self._properties:
69
- self._properties[prop] = set()
70
- self._properties[prop].add(value)
71
- if prop.type == registry.name:
72
- norm = prop.type.normalize(value, cleaned=True)
73
- self.names.update(norm)
74
- if prop.type == registry.country:
75
- norm = prop.type.normalize(value, cleaned=True)
76
- self.countries.update(norm)
77
-
78
- def pop(self, prop, quiet=False):
79
- prop = self._get_prop(prop, quiet=quiet)
80
- if prop is None:
257
+ self._properties.pop(prop_name, None)
258
+ return self.add(
259
+ prop, values, cleaned=cleaned, quiet=quiet, fuzzy=fuzzy, format=format
260
+ )
261
+
262
+ def pop(self, prop: P, quiet: bool = True) -> List[str]:
263
+ """Remove all the values from the given property and return them.
264
+
265
+ :param prop: can be given as a name or an instance of
266
+ :class:`~followthemoney.property.Property`.
267
+ :param quiet: a reference to an non-existent property will return
268
+ an empty list instead of raising an error.
269
+ :return: a list of values, possibly empty.
270
+ """
271
+ prop_name = self._prop_name(prop, quiet=quiet)
272
+ if prop_name is None or prop_name not in self._properties:
81
273
  return []
82
- return ensure_list(self._properties.pop(prop, []))
274
+ return list(self._properties.pop(prop_name))
83
275
 
84
- def iterprops(self):
85
- for prop in self.schema.properties.values():
86
- yield prop
276
+ def remove(self, prop: P, value: str, quiet: bool = True) -> None:
277
+ """Remove a single value from the given property. If it is not there,
278
+ no action takes place.
87
279
 
88
- def itervalues(self):
89
- for prop, values in self._properties.items():
280
+ :param prop: can be given as a name or an instance of
281
+ :class:`~followthemoney.property.Property`.
282
+ :param value: will not be cleaned before checking.
283
+ :param quiet: a reference to an non-existent property will return
284
+ an empty list instead of raising an error.
285
+ """
286
+ prop_name = self._prop_name(prop, quiet=quiet)
287
+ if prop_name is not None and prop_name in self._properties:
288
+ try:
289
+ self._properties[prop_name].remove(value)
290
+ except (KeyError, ValueError):
291
+ pass
292
+
293
+ def iterprops(self) -> List[Property]:
294
+ """Iterate across all the properties for which a value is set in
295
+ the proxy (but do not return their values)."""
296
+ return [self.schema.properties[p] for p in self._properties.keys()]
297
+
298
+ def itervalues(self) -> Generator[Tuple[Property, str], None, None]:
299
+ """Iterate across all values in the proxy one by one, each given as a
300
+ tuple of the property and the value."""
301
+ for name, values in self._properties.items():
302
+ prop = self.schema.properties[name]
90
303
  for value in values:
91
304
  yield (prop, value)
92
305
 
93
- def get_type_values(self, type_, cleaned=True):
306
+ def edgepairs(self) -> Generator[Tuple[str, str], None, None]:
307
+ """Return all the possible pairs of values for the edge source and target if
308
+ the schema allows for an edge representation of the entity."""
309
+ if self.schema.source_prop is not None and self.schema.target_prop is not None:
310
+ sources = self.get(self.schema.source_prop)
311
+ targets = self.get(self.schema.target_prop)
312
+ for source, target in product(sources, targets):
313
+ yield (source, target)
314
+
315
+ def get_type_values(
316
+ self, type_: PropertyType, matchable: bool = False
317
+ ) -> List[str]:
318
+ """All values of a particular type associated with a the entity. For
319
+ example, this lets you return all countries linked to an entity, rather
320
+ than manually checking each property to see if it contains countries.
321
+
322
+ :param type_: The type object to be searched.
323
+ :param matchable: Whether to return only property values marked as matchable.
324
+ """
94
325
  combined = set()
95
- for prop, values in self._properties.items():
326
+ for prop_name, values in self._properties.items():
327
+ prop = self.schema.properties[prop_name]
328
+ if matchable and not prop.matchable:
329
+ continue
96
330
  if prop.type == type_:
97
331
  combined.update(values)
98
- return type_.normalize_set(combined,
99
- cleaned=cleaned,
100
- countries=self.countries)
332
+ return list(combined)
333
+
334
+ @property
335
+ def names(self) -> List[str]:
336
+ """Get the set of all name-type values set of the entity."""
337
+ return self.get_type_values(registry.name)
338
+
339
+ @property
340
+ def countries(self) -> List[str]:
341
+ """Get the set of all country-type values set of the entity."""
342
+ return self.get_type_values(registry.country)
343
+
344
+ @property
345
+ def temporal_start(self) -> Optional[Tuple[Property, str]]:
346
+ """Get a date that can be used to represent the start of the entity in a
347
+ timeline. If there are multiple possible dates, the earliest date is
348
+ returned."""
349
+ values = []
350
+
351
+ for prop in self.schema.temporal_start_props:
352
+ values += [(prop, value) for value in self.get(prop.name)]
101
353
 
102
- def get_type_inverted(self, cleaned=True):
103
- """Invert the properties of an entity into their normalised form."""
104
- data = {}
354
+ values.sort(key=lambda tuple: tuple[1])
355
+ return next(iter(values), None)
356
+
357
+ @property
358
+ def temporal_end(self) -> Optional[Tuple[Property, str]]:
359
+ """Get a date that can be used to represent the end of the entity in a timeline.
360
+ If therer are multiple possible dates, the latest date is returned."""
361
+ values = []
362
+
363
+ for prop in self.schema.temporal_end_props:
364
+ values += [(prop, value) for value in self.get(prop.name)]
365
+
366
+ values.sort(reverse=True, key=lambda tuple: tuple[1])
367
+ return next(iter(values), None)
368
+
369
+ def get_type_inverted(self, matchable: bool = False) -> Dict[str, List[str]]:
370
+ """Return all the values of the entity arranged into a mapping with the
371
+ group name of their property type. These groups include ``countries``,
372
+ ``addresses``, ``emails``, etc."""
373
+ data: Dict[str, List[str]] = {}
105
374
  for group, type_ in registry.groups.items():
106
- values = self.get_type_values(type_, cleaned=cleaned)
375
+ values = self.get_type_values(type_, matchable=matchable)
107
376
  if len(values):
108
377
  data[group] = values
109
378
  return data
110
379
 
111
- @property
112
- def links(self):
113
- ref = registry.entity.ref(self.id)
380
+ def triples(self, qualified: bool = True) -> Generator[Triple, None, None]:
381
+ """Serialise the entity into a set of RDF triple statements. The
382
+ statements include the property values, an ``RDF#type`` definition
383
+ that refers to the entity schema, and a ``SKOS#prefLabel`` with the
384
+ entity caption."""
385
+ if self.id is None or self.schema is None:
386
+ return
387
+ uri = registry.entity.rdf(self.id)
388
+ yield (uri, RDF.type, self.schema.uri)
389
+ if qualified:
390
+ caption = self.caption
391
+ if caption != self.schema.label:
392
+ yield (uri, SKOS.prefLabel, Literal(caption))
114
393
  for prop, value in self.itervalues():
115
- yield Link(ref, prop, value)
394
+ value = prop.type.rdf(value)
395
+ if qualified:
396
+ yield (uri, prop.uri, value)
397
+ else:
398
+ yield (uri, URIRef(prop.name), value)
116
399
 
117
400
  @property
118
- def caption(self):
119
- for prop in self.iterprops():
120
- if prop.caption:
121
- for value in self.get(prop):
122
- return value
401
+ def caption(self) -> str:
402
+ """The user-facing label to be used for this entity. This checks a list
403
+ of properties defined by the schema (caption) and returns the first
404
+ available value. If no caption is available, return the schema label."""
405
+ for prop in self.schema.caption:
406
+ for value in self.get(prop):
407
+ return value
408
+ return self.schema.label
123
409
 
124
410
  @property
125
- def properties(self):
126
- return {p.name: self.get(p) for p in self._properties.keys()}
127
-
128
- def to_dict(self, inverted_index=False):
129
- return {
130
- 'id': self.id,
131
- 'schema': self.schema.name,
132
- 'properties': self.properties
411
+ def country_hints(self) -> Set[str]:
412
+ """Some property types, such as phone numbers and IBAN codes imply a
413
+ country that may be associated with the entity. This list can be used
414
+ for a more generous matching approach than the actual country values."""
415
+ countries = set(self.countries)
416
+ if not len(countries):
417
+ for prop, value in self.itervalues():
418
+ hint = prop.type.country_hint(value)
419
+ if hint is not None:
420
+ countries.add(hint)
421
+ return countries
422
+
423
+ @property
424
+ def properties(self) -> Dict[str, List[str]]:
425
+ """Return a mapping of the properties and set values of the entity."""
426
+ return {p: list(vs) for p, vs in self._properties.items()}
427
+
428
+ def to_dict(self) -> Dict[str, Any]:
429
+ """Serialise the proxy into a dictionary with the defined properties, ID,
430
+ schema and any contextual values that were handed in initially. The resulting
431
+ dictionary can be used to make a new proxy, and it is commonly written to disk
432
+ or a database."""
433
+ data = dict(self.context)
434
+ extra = {
435
+ "id": self.id,
436
+ "schema": self.schema.name,
437
+ "properties": self.properties,
133
438
  }
439
+ data.update(extra)
440
+ return data
134
441
 
135
- def to_full_dict(self):
442
+ def to_full_dict(self, matchable: bool = False) -> Dict[str, Any]:
443
+ """Return a serialised version of the entity with inverted type groups mixed
444
+ in. See :meth:`~get_type_inverted`."""
136
445
  data = self.to_dict()
137
- data['schemata'] = self.schema.names
138
- data.update(self.get_type_inverted())
446
+ data.update(self.get_type_inverted(matchable=matchable))
139
447
  return data
140
448
 
141
- def clone(self):
142
- return EntityProxy(self.schema, self.id, self._properties)
449
+ def clone(self: E) -> E:
450
+ """Make a deep copy of the current entity proxy."""
451
+ return self.__class__.from_dict(self.schema.model, self.to_dict())
143
452
 
144
- def merge(self, other):
453
+ def merge(self: E, other: E) -> E:
454
+ """Merge another entity proxy into this one. This will try and find
455
+ the common schema between both entities and then add all property
456
+ values from the other entity into this one."""
145
457
  model = self.schema.model
146
- other = self.from_dict(model, other)
147
458
  self.id = self.id or other.id
148
- self.schema = model.common_schema(self.schema, other.schema)
149
- for prop, value in other.itervalues():
150
- self.add(prop, value)
459
+ try:
460
+ self.schema = model.common_schema(self.schema, other.schema)
461
+ except InvalidData as e:
462
+ msg = "Cannot merge entities with id %s: %s"
463
+ raise InvalidData(msg % (self.id, e))
151
464
 
152
- def __repr__(self):
153
- return '<EntityProxy(%r,%r)>' % (self.id, self.schema)
465
+ self.context = merge_context(self.context, other.context)
466
+ for prop, values in other._properties.items():
467
+ self.add(prop, values, cleaned=True, quiet=True)
468
+ return self
154
469
 
155
- def __str__(self):
470
+ def __str__(self) -> str:
156
471
  return self.caption
157
472
 
158
- def __hash__(self):
473
+ def __repr__(self) -> str:
474
+ return "<E(%r,%r)>" % (self.id, str(self))
475
+
476
+ def __len__(self) -> int:
477
+ return self._size
478
+
479
+ def __hash__(self) -> int:
480
+ if not self.id:
481
+ warnings.warn(
482
+ "Hashing an EntityProxy without an ID results in undefined behaviour",
483
+ RuntimeWarning,
484
+ )
159
485
  return hash(self.id)
160
486
 
161
- def __eq__(self, other):
162
- return self.id == other.id
487
+ def __eq__(self, other: Any) -> bool:
488
+ try:
489
+ if self.id is None or other.id is None:
490
+ warnings.warn(
491
+ "Comparing EntityProxys without IDs results in undefined behaviour",
492
+ RuntimeWarning,
493
+ )
494
+ return bool(self.id == other.id)
495
+ except AttributeError:
496
+ return False
163
497
 
164
498
  @classmethod
165
- def from_dict(cls, model, data):
166
- if isinstance(data, cls):
167
- return data
168
- schema = model.get(data.get('schema'))
169
- if schema is None:
170
- raise InvalidData(gettext('No schema for entity.'))
171
- return cls(schema, data.get('id'), data.get('properties'))
499
+ def from_dict(
500
+ cls: Type[E],
501
+ model: "Model",
502
+ data: Dict[str, Any],
503
+ cleaned: bool = True,
504
+ ) -> E:
505
+ """Instantiate a proxy based on the given model and serialised dictionary.
506
+
507
+ Use :meth:`followthemoney.model.Model.get_proxy` instead."""
508
+ return cls(model, data, cleaned=cleaned)
followthemoney/rdf.py ADDED
@@ -0,0 +1,9 @@
1
+ # This module serves exclusively to mitigate the type checking clusterfuck
2
+ # that is rdflib 6.0.
3
+ from rdflib import Namespace
4
+ from rdflib.term import Identifier, URIRef, Literal
5
+ from rdflib import RDF, SKOS, XSD
6
+
7
+ NS = Namespace("https://w3id.org/ftm#")
8
+
9
+ __all__ = ["NS", "XSD", "RDF", "SKOS", "Identifier", "URIRef", "Literal"]