followthemoney 1.3.6__py3-none-any.whl → 3.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- followthemoney/__init__.py +5 -3
- followthemoney/cli/__init__.py +17 -0
- followthemoney/cli/aggregate.py +56 -0
- followthemoney/cli/cli.py +88 -0
- followthemoney/cli/exports.py +121 -0
- followthemoney/cli/mapping.py +85 -0
- followthemoney/cli/sieve.py +67 -0
- followthemoney/cli/util.py +142 -0
- followthemoney/compare.py +132 -55
- followthemoney/exc.py +19 -6
- followthemoney/export/common.py +29 -0
- followthemoney/export/csv.py +82 -0
- followthemoney/export/excel.py +75 -0
- followthemoney/export/graph.py +79 -0
- followthemoney/export/neo4j.py +182 -0
- followthemoney/export/rdf.py +26 -0
- followthemoney/graph.py +308 -0
- followthemoney/helpers.py +212 -0
- followthemoney/mapping/__init__.py +1 -1
- followthemoney/mapping/csv.py +67 -35
- followthemoney/mapping/entity.py +116 -44
- followthemoney/mapping/property.py +90 -44
- followthemoney/mapping/query.py +27 -19
- followthemoney/mapping/source.py +15 -5
- followthemoney/mapping/sql.py +75 -61
- followthemoney/messages.py +13 -7
- followthemoney/model.py +108 -56
- followthemoney/namespace.py +119 -0
- followthemoney/offshore.py +48 -0
- followthemoney/ontology.py +77 -0
- followthemoney/property.py +204 -71
- followthemoney/proxy.py +455 -118
- followthemoney/rdf.py +9 -0
- followthemoney/schema/Address.yaml +78 -0
- followthemoney/schema/Airplane.yaml +17 -10
- followthemoney/schema/Analyzable.yaml +54 -0
- followthemoney/schema/Article.yaml +16 -0
- followthemoney/schema/Assessment.yaml +32 -0
- followthemoney/schema/Asset.yaml +10 -4
- followthemoney/schema/Associate.yaml +41 -0
- followthemoney/schema/Audio.yaml +24 -0
- followthemoney/schema/BankAccount.yaml +53 -9
- followthemoney/schema/Call.yaml +48 -0
- followthemoney/schema/CallForTenders.yaml +117 -0
- followthemoney/schema/Company.yaml +37 -12
- followthemoney/schema/Contract.yaml +41 -7
- followthemoney/schema/ContractAward.yaml +30 -11
- followthemoney/schema/CourtCase.yaml +16 -10
- followthemoney/schema/CourtCaseParty.yaml +17 -6
- followthemoney/schema/CryptoWallet.yaml +48 -0
- followthemoney/schema/Debt.yaml +37 -0
- followthemoney/schema/Directorship.yaml +17 -4
- followthemoney/schema/Document.yaml +72 -139
- followthemoney/schema/Documentation.yml +38 -0
- followthemoney/schema/EconomicActivity.yaml +32 -17
- followthemoney/schema/Email.yaml +76 -0
- followthemoney/schema/Employment.yaml +39 -0
- followthemoney/schema/Event.yaml +35 -3
- followthemoney/schema/Family.yaml +41 -0
- followthemoney/schema/Folder.yaml +13 -0
- followthemoney/schema/HyperText.yaml +21 -0
- followthemoney/schema/Identification.yaml +40 -0
- followthemoney/schema/Image.yaml +25 -0
- followthemoney/schema/Interest.yaml +3 -6
- followthemoney/schema/Interval.yaml +56 -5
- followthemoney/schema/LegalEntity.yaml +81 -20
- followthemoney/schema/License.yaml +7 -3
- followthemoney/schema/Membership.yaml +19 -4
- followthemoney/schema/Mention.yaml +54 -0
- followthemoney/schema/Message.yaml +73 -0
- followthemoney/schema/Note.yaml +23 -0
- followthemoney/schema/Occupancy.yaml +40 -0
- followthemoney/schema/Organization.yaml +38 -3
- followthemoney/schema/Ownership.yaml +16 -4
- followthemoney/schema/Package.yaml +17 -0
- followthemoney/schema/Page.yaml +43 -0
- followthemoney/schema/Pages.yaml +23 -0
- followthemoney/schema/Passport.yaml +15 -17
- followthemoney/schema/Payment.yaml +38 -7
- followthemoney/schema/Person.yaml +61 -5
- followthemoney/schema/PlainText.yaml +17 -0
- followthemoney/schema/Position.yaml +50 -0
- followthemoney/schema/Post.yaml +42 -0
- followthemoney/schema/Project.yaml +27 -0
- followthemoney/schema/ProjectParticipant.yaml +36 -0
- followthemoney/schema/PublicBody.yaml +14 -3
- followthemoney/schema/RealEstate.yaml +19 -3
- followthemoney/schema/Representation.yaml +17 -6
- followthemoney/schema/Sanction.yaml +44 -20
- followthemoney/schema/Security.yaml +59 -0
- followthemoney/schema/Similar.yaml +37 -0
- followthemoney/schema/Succession.yaml +36 -0
- followthemoney/schema/Table.yaml +32 -0
- followthemoney/schema/TaxRoll.yaml +27 -9
- followthemoney/schema/Thing.yaml +69 -13
- followthemoney/schema/Trip.yaml +42 -0
- followthemoney/schema/UnknownLink.yaml +17 -6
- followthemoney/schema/UserAccount.yaml +44 -0
- followthemoney/schema/Value.yaml +5 -1
- followthemoney/schema/Vehicle.yaml +25 -8
- followthemoney/schema/Vessel.yaml +18 -10
- followthemoney/schema/Video.yaml +20 -0
- followthemoney/schema/Workbook.yaml +18 -0
- followthemoney/schema.py +406 -135
- followthemoney/translations/ar/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/ar/LC_MESSAGES/followthemoney.po +2900 -787
- followthemoney/translations/bs/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/bs/LC_MESSAGES/followthemoney.po +2108 -520
- followthemoney/translations/de/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/de/LC_MESSAGES/followthemoney.po +2902 -782
- followthemoney/translations/es/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/es/LC_MESSAGES/followthemoney.po +2893 -779
- followthemoney/translations/fr/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/fr/LC_MESSAGES/followthemoney.po +4362 -0
- followthemoney/translations/fr/followthemoney.po +3861 -0
- followthemoney/translations/messages.pot +3021 -725
- followthemoney/translations/nb/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/nb/LC_MESSAGES/followthemoney.po +3778 -0
- followthemoney/translations/nl/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/nl/LC_MESSAGES/followthemoney.po +3837 -0
- followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.po +3784 -0
- followthemoney/translations/ru/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/ru/LC_MESSAGES/followthemoney.po +2837 -539
- followthemoney/translations/ru/followthemoney.po +4221 -0
- followthemoney/translations/tr/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/tr/LC_MESSAGES/followthemoney.po +2073 -491
- followthemoney/types/__init__.py +35 -17
- followthemoney/types/address.py +41 -21
- followthemoney/types/checksum.py +25 -0
- followthemoney/types/common.py +233 -88
- followthemoney/types/country.py +89 -56
- followthemoney/types/date.py +59 -76
- followthemoney/types/email.py +66 -35
- followthemoney/types/entity.py +66 -13
- followthemoney/types/gender.py +66 -0
- followthemoney/types/iban.py +47 -28
- followthemoney/types/identifier.py +49 -22
- followthemoney/types/ip.py +35 -21
- followthemoney/types/json.py +58 -0
- followthemoney/types/language.py +124 -37
- followthemoney/types/mimetype.py +44 -0
- followthemoney/types/name.py +56 -12
- followthemoney/types/number.py +30 -0
- followthemoney/types/phone.py +92 -34
- followthemoney/types/registry.py +52 -0
- followthemoney/types/string.py +43 -0
- followthemoney/types/topic.py +94 -0
- followthemoney/types/url.py +39 -17
- followthemoney/util.py +139 -45
- followthemoney-3.8.0.dist-info/METADATA +153 -0
- followthemoney-3.8.0.dist-info/RECORD +157 -0
- {followthemoney-1.3.6.dist-info → followthemoney-3.8.0.dist-info}/WHEEL +1 -2
- followthemoney-3.8.0.dist-info/entry_points.txt +17 -0
- followthemoney-1.3.6.dist-info/LICENSE.txt → followthemoney-3.8.0.dist-info/licenses/LICENSE +1 -1
- followthemoney/link.py +0 -75
- followthemoney/schema/Associate.yml +0 -19
- followthemoney/schema/Family.yml +0 -19
- followthemoney/schema/Land.yml +0 -9
- followthemoney/schema/Relationship.yaml +0 -26
- followthemoney/types/domain.py +0 -50
- followthemoney-1.3.6.dist-info/DESCRIPTION.rst +0 -3
- followthemoney-1.3.6.dist-info/METADATA +0 -39
- followthemoney-1.3.6.dist-info/RECORD +0 -108
- followthemoney-1.3.6.dist-info/entry_points.txt +0 -3
- followthemoney-1.3.6.dist-info/metadata.json +0 -1
- followthemoney-1.3.6.dist-info/namespace_packages.txt +0 -1
- followthemoney-1.3.6.dist-info/top_level.txt +0 -3
- ns/ontology.py +0 -128
- tests/types/test_addresses.py +0 -24
- tests/types/test_common.py +0 -27
- tests/types/test_countries.py +0 -21
- tests/types/test_dates.py +0 -72
- tests/types/test_domains.py +0 -23
- tests/types/test_emails.py +0 -30
- tests/types/test_entity.py +0 -16
- tests/types/test_iban.py +0 -109
- tests/types/test_identifiers.py +0 -25
- tests/types/test_ip.py +0 -26
- tests/types/test_languages.py +0 -20
- tests/types/test_names.py +0 -33
- tests/types/test_phones.py +0 -24
- tests/types/test_registry.py +0 -14
- tests/types/test_urls.py +0 -23
- {ns → followthemoney/export}/__init__.py +0 -0
- /tests/types/__init__.py → /followthemoney/py.typed +0 -0
followthemoney/mapping/entity.py
CHANGED
|
@@ -1,62 +1,111 @@
|
|
|
1
|
+
import logging
|
|
1
2
|
from hashlib import sha1
|
|
2
|
-
from
|
|
3
|
+
from warnings import warn
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Set
|
|
5
|
+
from banal import keys_values
|
|
6
|
+
from normality import stringify
|
|
3
7
|
|
|
4
|
-
from followthemoney.mapping.property import PropertyMapping
|
|
5
8
|
from followthemoney.types import registry
|
|
6
9
|
from followthemoney.util import key_bytes
|
|
10
|
+
from followthemoney.proxy import EntityProxy
|
|
11
|
+
from followthemoney.mapping.property import PropertyMapping
|
|
12
|
+
from followthemoney.mapping.source import Record
|
|
7
13
|
from followthemoney.exc import InvalidMapping
|
|
8
14
|
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from followthemoney.model import Model
|
|
17
|
+
from followthemoney.mapping.query import QueryMapping
|
|
18
|
+
|
|
19
|
+
log = logging.getLogger(__name__)
|
|
20
|
+
|
|
9
21
|
|
|
10
22
|
class EntityMapping(object):
|
|
23
|
+
__slots__ = (
|
|
24
|
+
"model",
|
|
25
|
+
"name",
|
|
26
|
+
"seed",
|
|
27
|
+
"keys",
|
|
28
|
+
"id_column",
|
|
29
|
+
"schema",
|
|
30
|
+
"refs",
|
|
31
|
+
"dependencies",
|
|
32
|
+
"properties",
|
|
33
|
+
)
|
|
11
34
|
|
|
12
|
-
def __init__(
|
|
35
|
+
def __init__(
|
|
36
|
+
self,
|
|
37
|
+
model: "Model",
|
|
38
|
+
query: "QueryMapping",
|
|
39
|
+
name: str,
|
|
40
|
+
data: Dict[str, Any],
|
|
41
|
+
key_prefix: Optional[str] = None,
|
|
42
|
+
) -> None:
|
|
13
43
|
self.model = model
|
|
14
44
|
self.name = name
|
|
15
|
-
self.data = data
|
|
16
45
|
|
|
17
46
|
self.seed = sha1(key_bytes(key_prefix))
|
|
18
|
-
self.seed.update(key_bytes(data.get(
|
|
47
|
+
self.seed.update(key_bytes(data.get("key_literal")))
|
|
19
48
|
|
|
20
|
-
self.keys =
|
|
21
|
-
self.
|
|
22
|
-
if not len(self.keys):
|
|
23
|
-
raise InvalidMapping("No keys: %r" % name)
|
|
49
|
+
self.keys = keys_values(data, "key", "keys")
|
|
50
|
+
self.id_column = stringify(data.get("id_column"))
|
|
51
|
+
if not len(self.keys) and self.id_column is None:
|
|
52
|
+
raise InvalidMapping("No keys or ID: %r" % name)
|
|
53
|
+
if len(self.keys) and self.id_column is not None:
|
|
54
|
+
msg = "Please use only keys or id_column, not both: %r" % name
|
|
55
|
+
raise InvalidMapping(msg)
|
|
24
56
|
|
|
25
|
-
|
|
26
|
-
if
|
|
27
|
-
raise InvalidMapping("
|
|
57
|
+
schema_name = stringify(data.get("schema"))
|
|
58
|
+
if schema_name is None:
|
|
59
|
+
raise InvalidMapping("No schema: %s" % name)
|
|
60
|
+
schema = model.get(schema_name)
|
|
61
|
+
if schema is None:
|
|
62
|
+
raise InvalidMapping("Invalid schema: %s" % schema_name)
|
|
63
|
+
if schema.deprecated:
|
|
64
|
+
warn(
|
|
65
|
+
"Mapping uses a deprecated schema: %r" % schema,
|
|
66
|
+
DeprecationWarning,
|
|
67
|
+
stacklevel=2,
|
|
68
|
+
)
|
|
69
|
+
self.schema = schema
|
|
28
70
|
|
|
29
71
|
self.refs = set(self.keys)
|
|
30
|
-
self.
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
72
|
+
if self.id_column:
|
|
73
|
+
self.refs.add(self.id_column)
|
|
74
|
+
self.dependencies: Set[str] = set()
|
|
75
|
+
self.properties: List[PropertyMapping] = []
|
|
76
|
+
for name, prop_mapping in data.get("properties", {}).items():
|
|
77
|
+
prop = self.schema.get(name)
|
|
78
|
+
if prop is None:
|
|
35
79
|
raise InvalidMapping("Invalid property: %s" % name)
|
|
36
|
-
|
|
37
|
-
self.properties.append(
|
|
38
|
-
self.refs.update(
|
|
39
|
-
if
|
|
40
|
-
self.dependencies.add(
|
|
80
|
+
mapping = PropertyMapping(query, prop_mapping, prop)
|
|
81
|
+
self.properties.append(mapping)
|
|
82
|
+
self.refs.update(mapping.refs)
|
|
83
|
+
if mapping.entity:
|
|
84
|
+
self.dependencies.add(mapping.entity)
|
|
41
85
|
|
|
42
|
-
def bind(self):
|
|
86
|
+
def bind(self) -> None:
|
|
43
87
|
for prop in self.properties:
|
|
44
88
|
prop.bind()
|
|
45
89
|
|
|
46
|
-
def compute_key(self, record):
|
|
90
|
+
def compute_key(self, record: Record) -> Optional[str]:
|
|
47
91
|
"""Generate a key for this entity, based on the given fields."""
|
|
92
|
+
if self.id_column is not None:
|
|
93
|
+
return record.get(self.id_column)
|
|
48
94
|
values = [key_bytes(record.get(k)) for k in self.keys]
|
|
49
95
|
digest = self.seed.copy()
|
|
96
|
+
has_value = False
|
|
50
97
|
for value in sorted(values):
|
|
51
|
-
|
|
52
|
-
|
|
98
|
+
if len(value):
|
|
99
|
+
has_value = True
|
|
100
|
+
digest.update(value)
|
|
101
|
+
if has_value:
|
|
53
102
|
return digest.hexdigest()
|
|
103
|
+
return None
|
|
54
104
|
|
|
55
|
-
def map(
|
|
105
|
+
def map(
|
|
106
|
+
self, record: Record, entities: Dict[str, EntityProxy]
|
|
107
|
+
) -> Optional[EntityProxy]:
|
|
56
108
|
proxy = self.model.make_entity(self.schema)
|
|
57
|
-
proxy.id = self.compute_key(record)
|
|
58
|
-
if proxy.id is None:
|
|
59
|
-
return
|
|
60
109
|
|
|
61
110
|
# THIS IS HACKY
|
|
62
111
|
# Some of the converters, e.g. for phone numbers, work better if they
|
|
@@ -64,23 +113,46 @@ class EntityMapping(object):
|
|
|
64
113
|
# detail, we are first running country fields, then making the data
|
|
65
114
|
# from that accessible to phone and address parsers.
|
|
66
115
|
for prop in self.properties:
|
|
67
|
-
if prop.
|
|
68
|
-
|
|
116
|
+
if prop.prop.type == registry.country:
|
|
117
|
+
discarded_values = prop.map(proxy, record, entities)
|
|
118
|
+
for value in discarded_values:
|
|
119
|
+
log.warning(
|
|
120
|
+
f'[{self.name}] Discarded unclean value "{value}" for property "{prop.prop.qname}".'
|
|
121
|
+
)
|
|
69
122
|
|
|
70
123
|
for prop in self.properties:
|
|
71
|
-
if prop.
|
|
72
|
-
|
|
73
|
-
|
|
124
|
+
if prop.prop.type != registry.country:
|
|
125
|
+
discarded_values = prop.map(proxy, record, entities)
|
|
126
|
+
for value in discarded_values:
|
|
127
|
+
log.warning(
|
|
128
|
+
f'[{self.name}] Discarding unclean value "{value}" for property "{prop.prop.qname}".'
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# Generate the ID at the end to avoid self-reference checks on empty
|
|
132
|
+
# keys.
|
|
133
|
+
proxy.id = self.compute_key(record)
|
|
134
|
+
if proxy.id is None:
|
|
135
|
+
if self.id_column:
|
|
136
|
+
log.warning(
|
|
137
|
+
f'[{self.name}] Skipping entity because no ID could be computed. Make sure that there are no empty values in the "{self.id_column}" column.'
|
|
138
|
+
)
|
|
139
|
+
if self.keys:
|
|
140
|
+
log.warning(
|
|
141
|
+
f"[{self.name}] Skipping entity because no ID could be computed. Make sure that there are no empty values in key columns."
|
|
142
|
+
)
|
|
143
|
+
return None
|
|
74
144
|
|
|
75
145
|
for prop in self.properties:
|
|
76
|
-
if prop.required:
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
146
|
+
if prop.required and not proxy.has(prop.prop):
|
|
147
|
+
# This is a bit weird, it flags fields to be required in
|
|
148
|
+
# the mapping, not in the model. Basically it means: if
|
|
149
|
+
# this row of source data doesn't have that field, then do
|
|
150
|
+
# not map it again.
|
|
151
|
+
log.warning(
|
|
152
|
+
f'[{self.name}] Skipping entity because required property "{prop.prop.name}" is empty.'
|
|
153
|
+
)
|
|
154
|
+
return None
|
|
83
155
|
return proxy
|
|
84
156
|
|
|
85
|
-
def __repr__(self):
|
|
86
|
-
return
|
|
157
|
+
def __repr__(self) -> str:
|
|
158
|
+
return "<EntityMapping(%r)>" % self.name
|
|
@@ -1,46 +1,76 @@
|
|
|
1
1
|
import re
|
|
2
2
|
from copy import deepcopy
|
|
3
|
+
from warnings import warn
|
|
3
4
|
from normality import stringify
|
|
4
|
-
from
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, cast
|
|
6
|
+
from banal import keys_values, as_bool
|
|
5
7
|
|
|
8
|
+
from followthemoney.helpers import inline_names
|
|
6
9
|
from followthemoney.exc import InvalidMapping
|
|
10
|
+
from followthemoney.proxy import EntityProxy
|
|
11
|
+
from followthemoney.util import sanitize_text
|
|
12
|
+
from followthemoney.property import Property
|
|
13
|
+
from followthemoney.mapping.source import Record
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from followthemoney.mapping.query import QueryMapping
|
|
7
17
|
|
|
8
18
|
|
|
9
19
|
class PropertyMapping(object):
|
|
10
20
|
"""Map values from a given record (e.g. a CSV row or SQL result) to the
|
|
11
21
|
schema form."""
|
|
12
|
-
FORMAT_PATTERN = re.compile('{{([^(}})]*)}}')
|
|
13
22
|
|
|
14
|
-
|
|
23
|
+
__slots__ = (
|
|
24
|
+
"query",
|
|
25
|
+
"prop",
|
|
26
|
+
"refs",
|
|
27
|
+
"join",
|
|
28
|
+
"split",
|
|
29
|
+
"entity",
|
|
30
|
+
"format",
|
|
31
|
+
"fuzzy",
|
|
32
|
+
"required",
|
|
33
|
+
"literals",
|
|
34
|
+
"template",
|
|
35
|
+
"replacements",
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
FORMAT_PATTERN = re.compile("{{([^(}})]*)}}")
|
|
39
|
+
|
|
40
|
+
def __init__(
|
|
41
|
+
self, query: "QueryMapping", data: Dict[str, Any], prop: Property
|
|
42
|
+
) -> None:
|
|
15
43
|
self.query = query
|
|
16
44
|
data = deepcopy(data)
|
|
17
|
-
self.
|
|
18
|
-
|
|
19
|
-
self.
|
|
20
|
-
self.
|
|
21
|
-
|
|
22
|
-
self.
|
|
23
|
-
self.
|
|
24
|
-
|
|
25
|
-
self.
|
|
26
|
-
self.literals
|
|
27
|
-
|
|
28
|
-
self.
|
|
29
|
-
self.
|
|
30
|
-
self.entity = data.pop('entity', None)
|
|
31
|
-
self.required = data.pop('required', False)
|
|
32
|
-
|
|
33
|
-
self.template = stringify(data.pop('template', None))
|
|
34
|
-
self.replacements = {}
|
|
45
|
+
self.prop = prop
|
|
46
|
+
|
|
47
|
+
self.refs = cast(List[str], keys_values(data, "column", "columns"))
|
|
48
|
+
self.join = cast(Optional[str], data.pop("join", None))
|
|
49
|
+
self.split = cast(Optional[str], data.pop("split", None))
|
|
50
|
+
self.entity = stringify(data.pop("entity", None))
|
|
51
|
+
self.format = stringify(data.pop("format", None))
|
|
52
|
+
self.fuzzy = as_bool(data.pop("fuzzy", False))
|
|
53
|
+
self.required = as_bool(data.pop("required", False))
|
|
54
|
+
self.literals = cast(List[str], keys_values(data, "literal", "literals"))
|
|
55
|
+
|
|
56
|
+
self.template = sanitize_text(data.pop("template", None))
|
|
57
|
+
self.replacements: Dict[str, str] = {}
|
|
35
58
|
if self.template is not None:
|
|
36
59
|
# this is hacky, trying to generate refs from template
|
|
37
60
|
for ref in self.FORMAT_PATTERN.findall(self.template):
|
|
38
61
|
self.refs.append(ref)
|
|
39
|
-
self.replacements[
|
|
62
|
+
self.replacements["{{%s}}" % ref] = ref
|
|
40
63
|
|
|
41
|
-
def bind(self):
|
|
42
|
-
if self.
|
|
43
|
-
raise InvalidMapping("Property for [%
|
|
64
|
+
def bind(self) -> None:
|
|
65
|
+
if self.prop.stub:
|
|
66
|
+
raise InvalidMapping("Property for [%r] is a stub" % self.prop)
|
|
67
|
+
|
|
68
|
+
if self.prop.deprecated:
|
|
69
|
+
warn(
|
|
70
|
+
"Mapping uses a deprecated property: %r" % self.prop,
|
|
71
|
+
DeprecationWarning,
|
|
72
|
+
stacklevel=2,
|
|
73
|
+
)
|
|
44
74
|
|
|
45
75
|
if self.entity is None:
|
|
46
76
|
return
|
|
@@ -52,44 +82,47 @@ class PropertyMapping(object):
|
|
|
52
82
|
for entity in self.query.entities:
|
|
53
83
|
if entity.name != self.entity:
|
|
54
84
|
continue
|
|
55
|
-
if not entity.schema.is_a(self.
|
|
56
|
-
raise InvalidMapping(
|
|
57
|
-
|
|
85
|
+
if not self.prop.range or not entity.schema.is_a(self.prop.range):
|
|
86
|
+
raise InvalidMapping(
|
|
87
|
+
"The entity [%r] must be a %s (not %s)"
|
|
88
|
+
% (self.prop, self.prop.range, entity.schema.name)
|
|
89
|
+
) # noqa
|
|
58
90
|
return
|
|
59
91
|
|
|
60
|
-
raise InvalidMapping(
|
|
61
|
-
|
|
92
|
+
raise InvalidMapping(
|
|
93
|
+
"No entity [%s] for property [%r]" % (self.entity, self.prop)
|
|
94
|
+
)
|
|
62
95
|
|
|
63
|
-
def record_values(self, record):
|
|
96
|
+
def record_values(self, record: Record) -> List[str]:
|
|
64
97
|
if self.template is not None:
|
|
65
98
|
# replace mentions of any refs with the values present in the
|
|
66
99
|
# current record
|
|
67
100
|
value = self.template
|
|
68
101
|
for repl, ref in self.replacements.items():
|
|
69
|
-
ref_value = record.get(ref) or
|
|
102
|
+
ref_value = record.get(ref) or ""
|
|
70
103
|
value = value.replace(repl, ref_value)
|
|
71
104
|
return [value.strip()]
|
|
72
105
|
|
|
73
106
|
values = list(self.literals)
|
|
74
|
-
|
|
107
|
+
for ref in self.refs:
|
|
108
|
+
rec_value = record.get(ref)
|
|
109
|
+
if rec_value is not None:
|
|
110
|
+
values.append(rec_value)
|
|
75
111
|
return values
|
|
76
112
|
|
|
77
|
-
def map(
|
|
78
|
-
|
|
79
|
-
|
|
113
|
+
def map(
|
|
114
|
+
self, proxy: EntityProxy, record: Record, entities: Dict[str, EntityProxy]
|
|
115
|
+
) -> List[str]:
|
|
80
116
|
if self.entity is not None:
|
|
81
117
|
entity = entities.get(self.entity)
|
|
82
118
|
if entity is not None:
|
|
83
|
-
|
|
119
|
+
proxy.unsafe_add(self.prop, entity.id, cleaned=True)
|
|
120
|
+
inline_names(proxy, entity)
|
|
84
121
|
return []
|
|
85
122
|
|
|
86
123
|
# clean the values returned by the query, or by using literals, or
|
|
87
124
|
# formats.
|
|
88
|
-
values =
|
|
89
|
-
for value in self.record_values(record):
|
|
90
|
-
value = self.type.clean(value, **kwargs)
|
|
91
|
-
if value is not None:
|
|
92
|
-
values.append(value)
|
|
125
|
+
values: List[str] = self.record_values(record)
|
|
93
126
|
|
|
94
127
|
if self.join is not None:
|
|
95
128
|
values = [self.join.join(values)]
|
|
@@ -97,7 +130,20 @@ class PropertyMapping(object):
|
|
|
97
130
|
if self.split is not None:
|
|
98
131
|
splote = []
|
|
99
132
|
for value in values:
|
|
100
|
-
splote
|
|
133
|
+
splote.extend(value.split(self.split))
|
|
101
134
|
values = splote
|
|
102
135
|
|
|
103
|
-
|
|
136
|
+
discarded_values: List[str] = []
|
|
137
|
+
|
|
138
|
+
for value in values:
|
|
139
|
+
added_value = proxy.unsafe_add(
|
|
140
|
+
prop=self.prop,
|
|
141
|
+
value=value,
|
|
142
|
+
fuzzy=self.fuzzy,
|
|
143
|
+
format=self.format,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
if value is not None and added_value is None:
|
|
147
|
+
discarded_values.append(value)
|
|
148
|
+
|
|
149
|
+
return discarded_values
|
followthemoney/mapping/query.py
CHANGED
|
@@ -1,20 +1,27 @@
|
|
|
1
|
+
from followthemoney.mapping.source import Record, Source
|
|
2
|
+
from typing import TYPE_CHECKING, Any, List, Optional, Set, Dict
|
|
3
|
+
|
|
4
|
+
from followthemoney.proxy import EntityProxy
|
|
1
5
|
from followthemoney.mapping.entity import EntityMapping
|
|
2
6
|
from followthemoney.mapping.sql import SQLSource
|
|
3
7
|
from followthemoney.mapping.csv import CSVSource
|
|
4
8
|
from followthemoney.exc import InvalidMapping
|
|
5
9
|
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from followthemoney.model import Model
|
|
6
12
|
|
|
7
|
-
class QueryMapping(object):
|
|
8
13
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
self.data = data
|
|
14
|
+
class QueryMapping:
|
|
15
|
+
__slots__ = ("model", "data", "refs", "entities", "source")
|
|
12
16
|
|
|
13
|
-
|
|
14
|
-
self
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
17
|
+
def __init__(
|
|
18
|
+
self, model: "Model", data: Dict[str, Any], key_prefix: Optional[str] = None
|
|
19
|
+
) -> None:
|
|
20
|
+
self.model = model
|
|
21
|
+
self.refs: Set[str] = set()
|
|
22
|
+
self.entities: List[EntityMapping] = []
|
|
23
|
+
for name, edata in data.get("entities", {}).items():
|
|
24
|
+
entity = EntityMapping(model, self, name, edata, key_prefix=key_prefix)
|
|
18
25
|
|
|
19
26
|
self.entities.append(entity)
|
|
20
27
|
self.refs.update(entity.refs)
|
|
@@ -32,7 +39,7 @@ class QueryMapping(object):
|
|
|
32
39
|
# in dependent entities.
|
|
33
40
|
entities = self.entities
|
|
34
41
|
self.entities = []
|
|
35
|
-
resolved = set()
|
|
42
|
+
resolved: Set[str] = set()
|
|
36
43
|
while len(entities) > 0:
|
|
37
44
|
before = len(entities)
|
|
38
45
|
for entity in entities:
|
|
@@ -44,16 +51,17 @@ class QueryMapping(object):
|
|
|
44
51
|
if before == len(entities):
|
|
45
52
|
raise InvalidMapping("Circular entity dependency detected.")
|
|
46
53
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
+
self.source = self._get_source(data)
|
|
55
|
+
|
|
56
|
+
def _get_source(self, data: Dict[str, Any]) -> Source:
|
|
57
|
+
if "database" in data:
|
|
58
|
+
return SQLSource(self, data)
|
|
59
|
+
if "csv_url" in data or "csv_urls" in data:
|
|
60
|
+
return CSVSource(self, data)
|
|
61
|
+
raise InvalidMapping("Cannot determine mapping type: %r" % data)
|
|
54
62
|
|
|
55
|
-
def map(self, record):
|
|
56
|
-
data = {}
|
|
63
|
+
def map(self, record: Record) -> Dict[str, EntityProxy]:
|
|
64
|
+
data: Dict[str, EntityProxy] = {}
|
|
57
65
|
for entity in self.entities:
|
|
58
66
|
mapped = entity.map(record, data)
|
|
59
67
|
if mapped is not None:
|
followthemoney/mapping/source.py
CHANGED
|
@@ -1,11 +1,21 @@
|
|
|
1
|
+
from typing import TYPE_CHECKING, Any, Dict, Generator, Optional, Set, cast
|
|
1
2
|
|
|
3
|
+
if TYPE_CHECKING:
|
|
4
|
+
from followthemoney.mapping.query import QueryMapping
|
|
5
|
+
|
|
6
|
+
Filter = Set[Optional[str]]
|
|
7
|
+
Record = Dict[str, str]
|
|
2
8
|
|
|
3
|
-
class Source(object):
|
|
4
9
|
|
|
5
|
-
|
|
10
|
+
class Source(object):
|
|
11
|
+
def __init__(self, query: "QueryMapping", data: Dict[str, Any]) -> None:
|
|
6
12
|
self.query = query
|
|
7
|
-
self.filters =
|
|
8
|
-
self.filters_not =
|
|
13
|
+
self.filters = cast(Dict[str, Any], data.get("filters", {})).items()
|
|
14
|
+
self.filters_not = cast(Dict[str, Any], data.get("filters_not", {})).items()
|
|
15
|
+
|
|
16
|
+
@property
|
|
17
|
+
def records(self) -> Generator[Record, None, None]:
|
|
18
|
+
raise NotImplementedError
|
|
9
19
|
|
|
10
|
-
def __len__(self):
|
|
20
|
+
def __len__(self) -> int:
|
|
11
21
|
return 0
|