followthemoney 1.3.6__py3-none-any.whl → 3.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. followthemoney/__init__.py +5 -3
  2. followthemoney/cli/__init__.py +17 -0
  3. followthemoney/cli/aggregate.py +56 -0
  4. followthemoney/cli/cli.py +88 -0
  5. followthemoney/cli/exports.py +121 -0
  6. followthemoney/cli/mapping.py +85 -0
  7. followthemoney/cli/sieve.py +67 -0
  8. followthemoney/cli/util.py +142 -0
  9. followthemoney/compare.py +132 -55
  10. followthemoney/exc.py +19 -6
  11. followthemoney/export/common.py +29 -0
  12. followthemoney/export/csv.py +82 -0
  13. followthemoney/export/excel.py +75 -0
  14. followthemoney/export/graph.py +79 -0
  15. followthemoney/export/neo4j.py +182 -0
  16. followthemoney/export/rdf.py +26 -0
  17. followthemoney/graph.py +308 -0
  18. followthemoney/helpers.py +212 -0
  19. followthemoney/mapping/__init__.py +1 -1
  20. followthemoney/mapping/csv.py +67 -35
  21. followthemoney/mapping/entity.py +116 -44
  22. followthemoney/mapping/property.py +90 -44
  23. followthemoney/mapping/query.py +27 -19
  24. followthemoney/mapping/source.py +15 -5
  25. followthemoney/mapping/sql.py +75 -61
  26. followthemoney/messages.py +13 -7
  27. followthemoney/model.py +108 -56
  28. followthemoney/namespace.py +119 -0
  29. followthemoney/offshore.py +48 -0
  30. followthemoney/ontology.py +77 -0
  31. followthemoney/property.py +204 -71
  32. followthemoney/proxy.py +455 -118
  33. followthemoney/rdf.py +9 -0
  34. followthemoney/schema/Address.yaml +78 -0
  35. followthemoney/schema/Airplane.yaml +17 -10
  36. followthemoney/schema/Analyzable.yaml +54 -0
  37. followthemoney/schema/Article.yaml +16 -0
  38. followthemoney/schema/Assessment.yaml +32 -0
  39. followthemoney/schema/Asset.yaml +10 -4
  40. followthemoney/schema/Associate.yaml +41 -0
  41. followthemoney/schema/Audio.yaml +24 -0
  42. followthemoney/schema/BankAccount.yaml +53 -9
  43. followthemoney/schema/Call.yaml +48 -0
  44. followthemoney/schema/CallForTenders.yaml +117 -0
  45. followthemoney/schema/Company.yaml +37 -12
  46. followthemoney/schema/Contract.yaml +41 -7
  47. followthemoney/schema/ContractAward.yaml +30 -11
  48. followthemoney/schema/CourtCase.yaml +16 -10
  49. followthemoney/schema/CourtCaseParty.yaml +17 -6
  50. followthemoney/schema/CryptoWallet.yaml +48 -0
  51. followthemoney/schema/Debt.yaml +37 -0
  52. followthemoney/schema/Directorship.yaml +17 -4
  53. followthemoney/schema/Document.yaml +72 -139
  54. followthemoney/schema/Documentation.yml +38 -0
  55. followthemoney/schema/EconomicActivity.yaml +32 -17
  56. followthemoney/schema/Email.yaml +76 -0
  57. followthemoney/schema/Employment.yaml +39 -0
  58. followthemoney/schema/Event.yaml +35 -3
  59. followthemoney/schema/Family.yaml +41 -0
  60. followthemoney/schema/Folder.yaml +13 -0
  61. followthemoney/schema/HyperText.yaml +21 -0
  62. followthemoney/schema/Identification.yaml +40 -0
  63. followthemoney/schema/Image.yaml +25 -0
  64. followthemoney/schema/Interest.yaml +3 -6
  65. followthemoney/schema/Interval.yaml +56 -5
  66. followthemoney/schema/LegalEntity.yaml +81 -20
  67. followthemoney/schema/License.yaml +7 -3
  68. followthemoney/schema/Membership.yaml +19 -4
  69. followthemoney/schema/Mention.yaml +54 -0
  70. followthemoney/schema/Message.yaml +73 -0
  71. followthemoney/schema/Note.yaml +23 -0
  72. followthemoney/schema/Occupancy.yaml +40 -0
  73. followthemoney/schema/Organization.yaml +38 -3
  74. followthemoney/schema/Ownership.yaml +16 -4
  75. followthemoney/schema/Package.yaml +17 -0
  76. followthemoney/schema/Page.yaml +43 -0
  77. followthemoney/schema/Pages.yaml +23 -0
  78. followthemoney/schema/Passport.yaml +15 -17
  79. followthemoney/schema/Payment.yaml +38 -7
  80. followthemoney/schema/Person.yaml +61 -5
  81. followthemoney/schema/PlainText.yaml +17 -0
  82. followthemoney/schema/Position.yaml +50 -0
  83. followthemoney/schema/Post.yaml +42 -0
  84. followthemoney/schema/Project.yaml +27 -0
  85. followthemoney/schema/ProjectParticipant.yaml +36 -0
  86. followthemoney/schema/PublicBody.yaml +14 -3
  87. followthemoney/schema/RealEstate.yaml +19 -3
  88. followthemoney/schema/Representation.yaml +17 -6
  89. followthemoney/schema/Sanction.yaml +44 -20
  90. followthemoney/schema/Security.yaml +59 -0
  91. followthemoney/schema/Similar.yaml +37 -0
  92. followthemoney/schema/Succession.yaml +36 -0
  93. followthemoney/schema/Table.yaml +32 -0
  94. followthemoney/schema/TaxRoll.yaml +27 -9
  95. followthemoney/schema/Thing.yaml +69 -13
  96. followthemoney/schema/Trip.yaml +42 -0
  97. followthemoney/schema/UnknownLink.yaml +17 -6
  98. followthemoney/schema/UserAccount.yaml +44 -0
  99. followthemoney/schema/Value.yaml +5 -1
  100. followthemoney/schema/Vehicle.yaml +25 -8
  101. followthemoney/schema/Vessel.yaml +18 -10
  102. followthemoney/schema/Video.yaml +20 -0
  103. followthemoney/schema/Workbook.yaml +18 -0
  104. followthemoney/schema.py +406 -135
  105. followthemoney/translations/ar/LC_MESSAGES/followthemoney.mo +0 -0
  106. followthemoney/translations/ar/LC_MESSAGES/followthemoney.po +2900 -787
  107. followthemoney/translations/bs/LC_MESSAGES/followthemoney.mo +0 -0
  108. followthemoney/translations/bs/LC_MESSAGES/followthemoney.po +2108 -520
  109. followthemoney/translations/de/LC_MESSAGES/followthemoney.mo +0 -0
  110. followthemoney/translations/de/LC_MESSAGES/followthemoney.po +2902 -782
  111. followthemoney/translations/es/LC_MESSAGES/followthemoney.mo +0 -0
  112. followthemoney/translations/es/LC_MESSAGES/followthemoney.po +2893 -779
  113. followthemoney/translations/fr/LC_MESSAGES/followthemoney.mo +0 -0
  114. followthemoney/translations/fr/LC_MESSAGES/followthemoney.po +4362 -0
  115. followthemoney/translations/fr/followthemoney.po +3861 -0
  116. followthemoney/translations/messages.pot +3021 -725
  117. followthemoney/translations/nb/LC_MESSAGES/followthemoney.mo +0 -0
  118. followthemoney/translations/nb/LC_MESSAGES/followthemoney.po +3778 -0
  119. followthemoney/translations/nl/LC_MESSAGES/followthemoney.mo +0 -0
  120. followthemoney/translations/nl/LC_MESSAGES/followthemoney.po +3837 -0
  121. followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.mo +0 -0
  122. followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.po +3784 -0
  123. followthemoney/translations/ru/LC_MESSAGES/followthemoney.mo +0 -0
  124. followthemoney/translations/ru/LC_MESSAGES/followthemoney.po +2837 -539
  125. followthemoney/translations/ru/followthemoney.po +4221 -0
  126. followthemoney/translations/tr/LC_MESSAGES/followthemoney.mo +0 -0
  127. followthemoney/translations/tr/LC_MESSAGES/followthemoney.po +2073 -491
  128. followthemoney/types/__init__.py +35 -17
  129. followthemoney/types/address.py +41 -21
  130. followthemoney/types/checksum.py +25 -0
  131. followthemoney/types/common.py +233 -88
  132. followthemoney/types/country.py +89 -56
  133. followthemoney/types/date.py +59 -76
  134. followthemoney/types/email.py +66 -35
  135. followthemoney/types/entity.py +66 -13
  136. followthemoney/types/gender.py +66 -0
  137. followthemoney/types/iban.py +47 -28
  138. followthemoney/types/identifier.py +49 -22
  139. followthemoney/types/ip.py +35 -21
  140. followthemoney/types/json.py +58 -0
  141. followthemoney/types/language.py +124 -37
  142. followthemoney/types/mimetype.py +44 -0
  143. followthemoney/types/name.py +56 -12
  144. followthemoney/types/number.py +30 -0
  145. followthemoney/types/phone.py +92 -34
  146. followthemoney/types/registry.py +52 -0
  147. followthemoney/types/string.py +43 -0
  148. followthemoney/types/topic.py +94 -0
  149. followthemoney/types/url.py +39 -17
  150. followthemoney/util.py +139 -45
  151. followthemoney-3.8.0.dist-info/METADATA +153 -0
  152. followthemoney-3.8.0.dist-info/RECORD +157 -0
  153. {followthemoney-1.3.6.dist-info → followthemoney-3.8.0.dist-info}/WHEEL +1 -2
  154. followthemoney-3.8.0.dist-info/entry_points.txt +17 -0
  155. followthemoney-1.3.6.dist-info/LICENSE.txt → followthemoney-3.8.0.dist-info/licenses/LICENSE +1 -1
  156. followthemoney/link.py +0 -75
  157. followthemoney/schema/Associate.yml +0 -19
  158. followthemoney/schema/Family.yml +0 -19
  159. followthemoney/schema/Land.yml +0 -9
  160. followthemoney/schema/Relationship.yaml +0 -26
  161. followthemoney/types/domain.py +0 -50
  162. followthemoney-1.3.6.dist-info/DESCRIPTION.rst +0 -3
  163. followthemoney-1.3.6.dist-info/METADATA +0 -39
  164. followthemoney-1.3.6.dist-info/RECORD +0 -108
  165. followthemoney-1.3.6.dist-info/entry_points.txt +0 -3
  166. followthemoney-1.3.6.dist-info/metadata.json +0 -1
  167. followthemoney-1.3.6.dist-info/namespace_packages.txt +0 -1
  168. followthemoney-1.3.6.dist-info/top_level.txt +0 -3
  169. ns/ontology.py +0 -128
  170. tests/types/test_addresses.py +0 -24
  171. tests/types/test_common.py +0 -27
  172. tests/types/test_countries.py +0 -21
  173. tests/types/test_dates.py +0 -72
  174. tests/types/test_domains.py +0 -23
  175. tests/types/test_emails.py +0 -30
  176. tests/types/test_entity.py +0 -16
  177. tests/types/test_iban.py +0 -109
  178. tests/types/test_identifiers.py +0 -25
  179. tests/types/test_ip.py +0 -26
  180. tests/types/test_languages.py +0 -20
  181. tests/types/test_names.py +0 -33
  182. tests/types/test_phones.py +0 -24
  183. tests/types/test_registry.py +0 -14
  184. tests/types/test_urls.py +0 -23
  185. {ns → followthemoney/export}/__init__.py +0 -0
  186. /tests/types/__init__.py → /followthemoney/py.typed +0 -0
@@ -1,62 +1,111 @@
1
+ import logging
1
2
  from hashlib import sha1
2
- from banal import ensure_list
3
+ from warnings import warn
4
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set
5
+ from banal import keys_values
6
+ from normality import stringify
3
7
 
4
- from followthemoney.mapping.property import PropertyMapping
5
8
  from followthemoney.types import registry
6
9
  from followthemoney.util import key_bytes
10
+ from followthemoney.proxy import EntityProxy
11
+ from followthemoney.mapping.property import PropertyMapping
12
+ from followthemoney.mapping.source import Record
7
13
  from followthemoney.exc import InvalidMapping
8
14
 
15
+ if TYPE_CHECKING:
16
+ from followthemoney.model import Model
17
+ from followthemoney.mapping.query import QueryMapping
18
+
19
+ log = logging.getLogger(__name__)
20
+
9
21
 
10
22
  class EntityMapping(object):
23
+ __slots__ = (
24
+ "model",
25
+ "name",
26
+ "seed",
27
+ "keys",
28
+ "id_column",
29
+ "schema",
30
+ "refs",
31
+ "dependencies",
32
+ "properties",
33
+ )
11
34
 
12
- def __init__(self, model, query, name, data, key_prefix=None):
35
+ def __init__(
36
+ self,
37
+ model: "Model",
38
+ query: "QueryMapping",
39
+ name: str,
40
+ data: Dict[str, Any],
41
+ key_prefix: Optional[str] = None,
42
+ ) -> None:
13
43
  self.model = model
14
44
  self.name = name
15
- self.data = data
16
45
 
17
46
  self.seed = sha1(key_bytes(key_prefix))
18
- self.seed.update(key_bytes(data.get('key_literal')))
47
+ self.seed.update(key_bytes(data.get("key_literal")))
19
48
 
20
- self.keys = ensure_list(data.get('key'))
21
- self.keys.extend(ensure_list(data.get('keys')))
22
- if not len(self.keys):
23
- raise InvalidMapping("No keys: %r" % name)
49
+ self.keys = keys_values(data, "key", "keys")
50
+ self.id_column = stringify(data.get("id_column"))
51
+ if not len(self.keys) and self.id_column is None:
52
+ raise InvalidMapping("No keys or ID: %r" % name)
53
+ if len(self.keys) and self.id_column is not None:
54
+ msg = "Please use only keys or id_column, not both: %r" % name
55
+ raise InvalidMapping(msg)
24
56
 
25
- self.schema = model.get(data.get('schema'))
26
- if self.schema is None:
27
- raise InvalidMapping("Invalid schema: %s" % data.get('schema'))
57
+ schema_name = stringify(data.get("schema"))
58
+ if schema_name is None:
59
+ raise InvalidMapping("No schema: %s" % name)
60
+ schema = model.get(schema_name)
61
+ if schema is None:
62
+ raise InvalidMapping("Invalid schema: %s" % schema_name)
63
+ if schema.deprecated:
64
+ warn(
65
+ "Mapping uses a deprecated schema: %r" % schema,
66
+ DeprecationWarning,
67
+ stacklevel=2,
68
+ )
69
+ self.schema = schema
28
70
 
29
71
  self.refs = set(self.keys)
30
- self.dependencies = set()
31
- self.properties = []
32
- for name, prop in data.get('properties', {}).items():
33
- prop_schema = self.schema.get(name)
34
- if prop_schema is None:
72
+ if self.id_column:
73
+ self.refs.add(self.id_column)
74
+ self.dependencies: Set[str] = set()
75
+ self.properties: List[PropertyMapping] = []
76
+ for name, prop_mapping in data.get("properties", {}).items():
77
+ prop = self.schema.get(name)
78
+ if prop is None:
35
79
  raise InvalidMapping("Invalid property: %s" % name)
36
- prop = PropertyMapping(query, prop, prop_schema)
37
- self.properties.append(prop)
38
- self.refs.update(prop.refs)
39
- if prop.entity:
40
- self.dependencies.add(prop.entity)
80
+ mapping = PropertyMapping(query, prop_mapping, prop)
81
+ self.properties.append(mapping)
82
+ self.refs.update(mapping.refs)
83
+ if mapping.entity:
84
+ self.dependencies.add(mapping.entity)
41
85
 
42
- def bind(self):
86
+ def bind(self) -> None:
43
87
  for prop in self.properties:
44
88
  prop.bind()
45
89
 
46
- def compute_key(self, record):
90
+ def compute_key(self, record: Record) -> Optional[str]:
47
91
  """Generate a key for this entity, based on the given fields."""
92
+ if self.id_column is not None:
93
+ return record.get(self.id_column)
48
94
  values = [key_bytes(record.get(k)) for k in self.keys]
49
95
  digest = self.seed.copy()
96
+ has_value = False
50
97
  for value in sorted(values):
51
- digest.update(value)
52
- if digest.digest() != self.seed.digest():
98
+ if len(value):
99
+ has_value = True
100
+ digest.update(value)
101
+ if has_value:
53
102
  return digest.hexdigest()
103
+ return None
54
104
 
55
- def map(self, record, entities):
105
+ def map(
106
+ self, record: Record, entities: Dict[str, EntityProxy]
107
+ ) -> Optional[EntityProxy]:
56
108
  proxy = self.model.make_entity(self.schema)
57
- proxy.id = self.compute_key(record)
58
- if proxy.id is None:
59
- return
60
109
 
61
110
  # THIS IS HACKY
62
111
  # Some of the converters, e.g. for phone numbers, work better if they
@@ -64,23 +113,46 @@ class EntityMapping(object):
64
113
  # detail, we are first running country fields, then making the data
65
114
  # from that accessible to phone and address parsers.
66
115
  for prop in self.properties:
67
- if prop.schema.type == registry.country:
68
- proxy.add(prop.schema, prop.map(record, entities))
116
+ if prop.prop.type == registry.country:
117
+ discarded_values = prop.map(proxy, record, entities)
118
+ for value in discarded_values:
119
+ log.warning(
120
+ f'[{self.name}] Discarded unclean value "{value}" for property "{prop.prop.qname}".'
121
+ )
69
122
 
70
123
  for prop in self.properties:
71
- if prop.schema.type != registry.country:
72
- proxy.add(prop.schema, prop.map(record, entities,
73
- countries=proxy.countries))
124
+ if prop.prop.type != registry.country:
125
+ discarded_values = prop.map(proxy, record, entities)
126
+ for value in discarded_values:
127
+ log.warning(
128
+ f'[{self.name}] Discarding unclean value "{value}" for property "{prop.prop.qname}".'
129
+ )
130
+
131
+ # Generate the ID at the end to avoid self-reference checks on empty
132
+ # keys.
133
+ proxy.id = self.compute_key(record)
134
+ if proxy.id is None:
135
+ if self.id_column:
136
+ log.warning(
137
+ f'[{self.name}] Skipping entity because no ID could be computed. Make sure that there are no empty values in the "{self.id_column}" column.'
138
+ )
139
+ if self.keys:
140
+ log.warning(
141
+ f"[{self.name}] Skipping entity because no ID could be computed. Make sure that there are no empty values in key columns."
142
+ )
143
+ return None
74
144
 
75
145
  for prop in self.properties:
76
- if prop.required:
77
- if not len(proxy.get(prop.schema)):
78
- # This is a bit weird, it flags fields to be required in
79
- # the mapping, not in the model. Basically it means: if
80
- # this row of source data doesn't have that field, then do
81
- # not map it again.
82
- return
146
+ if prop.required and not proxy.has(prop.prop):
147
+ # This is a bit weird, it flags fields to be required in
148
+ # the mapping, not in the model. Basically it means: if
149
+ # this row of source data doesn't have that field, then do
150
+ # not map it again.
151
+ log.warning(
152
+ f'[{self.name}] Skipping entity because required property "{prop.prop.name}" is empty.'
153
+ )
154
+ return None
83
155
  return proxy
84
156
 
85
- def __repr__(self):
86
- return '<EntityMapping(%r)>' % self.name
157
+ def __repr__(self) -> str:
158
+ return "<EntityMapping(%r)>" % self.name
@@ -1,46 +1,76 @@
1
1
  import re
2
2
  from copy import deepcopy
3
+ from warnings import warn
3
4
  from normality import stringify
4
- from banal import unique_list, ensure_list
5
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, cast
6
+ from banal import keys_values, as_bool
5
7
 
8
+ from followthemoney.helpers import inline_names
6
9
  from followthemoney.exc import InvalidMapping
10
+ from followthemoney.proxy import EntityProxy
11
+ from followthemoney.util import sanitize_text
12
+ from followthemoney.property import Property
13
+ from followthemoney.mapping.source import Record
14
+
15
+ if TYPE_CHECKING:
16
+ from followthemoney.mapping.query import QueryMapping
7
17
 
8
18
 
9
19
  class PropertyMapping(object):
10
20
  """Map values from a given record (e.g. a CSV row or SQL result) to the
11
21
  schema form."""
12
- FORMAT_PATTERN = re.compile('{{([^(}})]*)}}')
13
22
 
14
- def __init__(self, query, data, schema):
23
+ __slots__ = (
24
+ "query",
25
+ "prop",
26
+ "refs",
27
+ "join",
28
+ "split",
29
+ "entity",
30
+ "format",
31
+ "fuzzy",
32
+ "required",
33
+ "literals",
34
+ "template",
35
+ "replacements",
36
+ )
37
+
38
+ FORMAT_PATTERN = re.compile("{{([^(}})]*)}}")
39
+
40
+ def __init__(
41
+ self, query: "QueryMapping", data: Dict[str, Any], prop: Property
42
+ ) -> None:
15
43
  self.query = query
16
44
  data = deepcopy(data)
17
- self.data = data
18
- self.schema = schema
19
- self.name = schema.name
20
- self.type = schema.type
21
-
22
- self.refs = ensure_list(data.pop('column', []))
23
- self.refs.extend(ensure_list(data.pop('columns', [])))
24
-
25
- self.literals = ensure_list(data.pop('literal', []))
26
- self.literals.extend(ensure_list(data.pop('literals', [])))
27
-
28
- self.join = data.pop('join', None)
29
- self.split = data.pop('split', None)
30
- self.entity = data.pop('entity', None)
31
- self.required = data.pop('required', False)
32
-
33
- self.template = stringify(data.pop('template', None))
34
- self.replacements = {}
45
+ self.prop = prop
46
+
47
+ self.refs = cast(List[str], keys_values(data, "column", "columns"))
48
+ self.join = cast(Optional[str], data.pop("join", None))
49
+ self.split = cast(Optional[str], data.pop("split", None))
50
+ self.entity = stringify(data.pop("entity", None))
51
+ self.format = stringify(data.pop("format", None))
52
+ self.fuzzy = as_bool(data.pop("fuzzy", False))
53
+ self.required = as_bool(data.pop("required", False))
54
+ self.literals = cast(List[str], keys_values(data, "literal", "literals"))
55
+
56
+ self.template = sanitize_text(data.pop("template", None))
57
+ self.replacements: Dict[str, str] = {}
35
58
  if self.template is not None:
36
59
  # this is hacky, trying to generate refs from template
37
60
  for ref in self.FORMAT_PATTERN.findall(self.template):
38
61
  self.refs.append(ref)
39
- self.replacements['{{%s}}' % ref] = ref
62
+ self.replacements["{{%s}}" % ref] = ref
40
63
 
41
- def bind(self):
42
- if self.schema.stub:
43
- raise InvalidMapping("Property for [%s] is a stub" % self.name)
64
+ def bind(self) -> None:
65
+ if self.prop.stub:
66
+ raise InvalidMapping("Property for [%r] is a stub" % self.prop)
67
+
68
+ if self.prop.deprecated:
69
+ warn(
70
+ "Mapping uses a deprecated property: %r" % self.prop,
71
+ DeprecationWarning,
72
+ stacklevel=2,
73
+ )
44
74
 
45
75
  if self.entity is None:
46
76
  return
@@ -52,44 +82,47 @@ class PropertyMapping(object):
52
82
  for entity in self.query.entities:
53
83
  if entity.name != self.entity:
54
84
  continue
55
- if not entity.schema.is_a(self.schema.range):
56
- raise InvalidMapping("The entity [%s] must be a %s (not %s)" %
57
- (self.name, self.schema.range, entity.schema.name)) # noqa
85
+ if not self.prop.range or not entity.schema.is_a(self.prop.range):
86
+ raise InvalidMapping(
87
+ "The entity [%r] must be a %s (not %s)"
88
+ % (self.prop, self.prop.range, entity.schema.name)
89
+ ) # noqa
58
90
  return
59
91
 
60
- raise InvalidMapping("No entity [%s] for property [%s]"
61
- % (self.entity, self.name))
92
+ raise InvalidMapping(
93
+ "No entity [%s] for property [%r]" % (self.entity, self.prop)
94
+ )
62
95
 
63
- def record_values(self, record):
96
+ def record_values(self, record: Record) -> List[str]:
64
97
  if self.template is not None:
65
98
  # replace mentions of any refs with the values present in the
66
99
  # current record
67
100
  value = self.template
68
101
  for repl, ref in self.replacements.items():
69
- ref_value = record.get(ref) or ''
102
+ ref_value = record.get(ref) or ""
70
103
  value = value.replace(repl, ref_value)
71
104
  return [value.strip()]
72
105
 
73
106
  values = list(self.literals)
74
- values.extend([record.get(r) for r in self.refs])
107
+ for ref in self.refs:
108
+ rec_value = record.get(ref)
109
+ if rec_value is not None:
110
+ values.append(rec_value)
75
111
  return values
76
112
 
77
- def map(self, record, entities, **kwargs):
78
- kwargs.update(self.data)
79
-
113
+ def map(
114
+ self, proxy: EntityProxy, record: Record, entities: Dict[str, EntityProxy]
115
+ ) -> List[str]:
80
116
  if self.entity is not None:
81
117
  entity = entities.get(self.entity)
82
118
  if entity is not None:
83
- return [entity.id]
119
+ proxy.unsafe_add(self.prop, entity.id, cleaned=True)
120
+ inline_names(proxy, entity)
84
121
  return []
85
122
 
86
123
  # clean the values returned by the query, or by using literals, or
87
124
  # formats.
88
- values = []
89
- for value in self.record_values(record):
90
- value = self.type.clean(value, **kwargs)
91
- if value is not None:
92
- values.append(value)
125
+ values: List[str] = self.record_values(record)
93
126
 
94
127
  if self.join is not None:
95
128
  values = [self.join.join(values)]
@@ -97,7 +130,20 @@ class PropertyMapping(object):
97
130
  if self.split is not None:
98
131
  splote = []
99
132
  for value in values:
100
- splote = splote + value.split(self.split)
133
+ splote.extend(value.split(self.split))
101
134
  values = splote
102
135
 
103
- return unique_list(values)
136
+ discarded_values: List[str] = []
137
+
138
+ for value in values:
139
+ added_value = proxy.unsafe_add(
140
+ prop=self.prop,
141
+ value=value,
142
+ fuzzy=self.fuzzy,
143
+ format=self.format,
144
+ )
145
+
146
+ if value is not None and added_value is None:
147
+ discarded_values.append(value)
148
+
149
+ return discarded_values
@@ -1,20 +1,27 @@
1
+ from followthemoney.mapping.source import Record, Source
2
+ from typing import TYPE_CHECKING, Any, List, Optional, Set, Dict
3
+
4
+ from followthemoney.proxy import EntityProxy
1
5
  from followthemoney.mapping.entity import EntityMapping
2
6
  from followthemoney.mapping.sql import SQLSource
3
7
  from followthemoney.mapping.csv import CSVSource
4
8
  from followthemoney.exc import InvalidMapping
5
9
 
10
+ if TYPE_CHECKING:
11
+ from followthemoney.model import Model
6
12
 
7
- class QueryMapping(object):
8
13
 
9
- def __init__(self, model, data, key_prefix=None):
10
- self.model = model
11
- self.data = data
14
+ class QueryMapping:
15
+ __slots__ = ("model", "data", "refs", "entities", "source")
12
16
 
13
- self.refs = set()
14
- self.entities = []
15
- for name, data in data.get('entities', {}).items():
16
- entity = EntityMapping(model, self, name, data,
17
- key_prefix=key_prefix)
17
+ def __init__(
18
+ self, model: "Model", data: Dict[str, Any], key_prefix: Optional[str] = None
19
+ ) -> None:
20
+ self.model = model
21
+ self.refs: Set[str] = set()
22
+ self.entities: List[EntityMapping] = []
23
+ for name, edata in data.get("entities", {}).items():
24
+ entity = EntityMapping(model, self, name, edata, key_prefix=key_prefix)
18
25
 
19
26
  self.entities.append(entity)
20
27
  self.refs.update(entity.refs)
@@ -32,7 +39,7 @@ class QueryMapping(object):
32
39
  # in dependent entities.
33
40
  entities = self.entities
34
41
  self.entities = []
35
- resolved = set()
42
+ resolved: Set[str] = set()
36
43
  while len(entities) > 0:
37
44
  before = len(entities)
38
45
  for entity in entities:
@@ -44,16 +51,17 @@ class QueryMapping(object):
44
51
  if before == len(entities):
45
52
  raise InvalidMapping("Circular entity dependency detected.")
46
53
 
47
- @property
48
- def source(self):
49
- if 'database' in self.data:
50
- return SQLSource(self, self.data)
51
- elif 'csv_url' in self.data or 'csv_urls' in self.data:
52
- return CSVSource(self, self.data)
53
- raise InvalidMapping("Cannot determine mapping type")
54
+ self.source = self._get_source(data)
55
+
56
+ def _get_source(self, data: Dict[str, Any]) -> Source:
57
+ if "database" in data:
58
+ return SQLSource(self, data)
59
+ if "csv_url" in data or "csv_urls" in data:
60
+ return CSVSource(self, data)
61
+ raise InvalidMapping("Cannot determine mapping type: %r" % data)
54
62
 
55
- def map(self, record):
56
- data = {}
63
+ def map(self, record: Record) -> Dict[str, EntityProxy]:
64
+ data: Dict[str, EntityProxy] = {}
57
65
  for entity in self.entities:
58
66
  mapped = entity.map(record, data)
59
67
  if mapped is not None:
@@ -1,11 +1,21 @@
1
+ from typing import TYPE_CHECKING, Any, Dict, Generator, Optional, Set, cast
1
2
 
3
+ if TYPE_CHECKING:
4
+ from followthemoney.mapping.query import QueryMapping
5
+
6
+ Filter = Set[Optional[str]]
7
+ Record = Dict[str, str]
2
8
 
3
- class Source(object):
4
9
 
5
- def __init__(self, query, data):
10
+ class Source(object):
11
+ def __init__(self, query: "QueryMapping", data: Dict[str, Any]) -> None:
6
12
  self.query = query
7
- self.filters = query.data.get('filters', {}).items()
8
- self.filters_not = query.data.get('filters_not', {}).items()
13
+ self.filters = cast(Dict[str, Any], data.get("filters", {})).items()
14
+ self.filters_not = cast(Dict[str, Any], data.get("filters_not", {})).items()
15
+
16
+ @property
17
+ def records(self) -> Generator[Record, None, None]:
18
+ raise NotImplementedError
9
19
 
10
- def __len__(self):
20
+ def __len__(self) -> int:
11
21
  return 0