followthemoney 1.3.7__py3-none-any.whl → 3.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- followthemoney/__init__.py +5 -3
- followthemoney/cli/__init__.py +17 -0
- followthemoney/cli/aggregate.py +56 -0
- followthemoney/cli/cli.py +88 -0
- followthemoney/cli/exports.py +121 -0
- followthemoney/cli/mapping.py +85 -0
- followthemoney/cli/sieve.py +67 -0
- followthemoney/cli/util.py +142 -0
- followthemoney/compare.py +130 -60
- followthemoney/exc.py +19 -6
- followthemoney/export/common.py +29 -0
- followthemoney/export/csv.py +82 -0
- followthemoney/export/excel.py +75 -0
- followthemoney/export/graph.py +79 -0
- followthemoney/export/neo4j.py +182 -0
- followthemoney/export/rdf.py +26 -0
- followthemoney/graph.py +308 -0
- followthemoney/helpers.py +212 -0
- followthemoney/mapping/__init__.py +1 -1
- followthemoney/mapping/csv.py +67 -35
- followthemoney/mapping/entity.py +116 -44
- followthemoney/mapping/property.py +90 -44
- followthemoney/mapping/query.py +27 -19
- followthemoney/mapping/source.py +15 -5
- followthemoney/mapping/sql.py +75 -61
- followthemoney/messages.py +13 -7
- followthemoney/model.py +108 -56
- followthemoney/namespace.py +119 -0
- followthemoney/offshore.py +48 -0
- followthemoney/ontology.py +77 -0
- followthemoney/property.py +204 -71
- followthemoney/proxy.py +455 -118
- followthemoney/rdf.py +9 -0
- followthemoney/schema/Address.yaml +78 -0
- followthemoney/schema/Airplane.yaml +17 -10
- followthemoney/schema/Analyzable.yaml +54 -0
- followthemoney/schema/Article.yaml +16 -0
- followthemoney/schema/Assessment.yaml +32 -0
- followthemoney/schema/Asset.yaml +10 -4
- followthemoney/schema/Associate.yaml +41 -0
- followthemoney/schema/Audio.yaml +24 -0
- followthemoney/schema/BankAccount.yaml +53 -9
- followthemoney/schema/Call.yaml +48 -0
- followthemoney/schema/CallForTenders.yaml +117 -0
- followthemoney/schema/Company.yaml +37 -12
- followthemoney/schema/Contract.yaml +41 -7
- followthemoney/schema/ContractAward.yaml +30 -11
- followthemoney/schema/CourtCase.yaml +16 -10
- followthemoney/schema/CourtCaseParty.yaml +17 -6
- followthemoney/schema/CryptoWallet.yaml +48 -0
- followthemoney/schema/Debt.yaml +37 -0
- followthemoney/schema/Directorship.yaml +17 -4
- followthemoney/schema/Document.yaml +72 -139
- followthemoney/schema/Documentation.yml +38 -0
- followthemoney/schema/EconomicActivity.yaml +32 -17
- followthemoney/schema/Email.yaml +76 -0
- followthemoney/schema/Employment.yaml +39 -0
- followthemoney/schema/Event.yaml +35 -3
- followthemoney/schema/Family.yaml +41 -0
- followthemoney/schema/Folder.yaml +13 -0
- followthemoney/schema/HyperText.yaml +21 -0
- followthemoney/schema/Identification.yaml +40 -0
- followthemoney/schema/Image.yaml +25 -0
- followthemoney/schema/Interest.yaml +3 -6
- followthemoney/schema/Interval.yaml +56 -5
- followthemoney/schema/LegalEntity.yaml +81 -20
- followthemoney/schema/License.yaml +7 -3
- followthemoney/schema/Membership.yaml +19 -4
- followthemoney/schema/Mention.yaml +54 -0
- followthemoney/schema/Message.yaml +73 -0
- followthemoney/schema/Note.yaml +23 -0
- followthemoney/schema/Occupancy.yaml +40 -0
- followthemoney/schema/Organization.yaml +38 -3
- followthemoney/schema/Ownership.yaml +16 -4
- followthemoney/schema/Package.yaml +17 -0
- followthemoney/schema/Page.yaml +43 -0
- followthemoney/schema/Pages.yaml +23 -0
- followthemoney/schema/Passport.yaml +15 -17
- followthemoney/schema/Payment.yaml +38 -7
- followthemoney/schema/Person.yaml +61 -5
- followthemoney/schema/PlainText.yaml +17 -0
- followthemoney/schema/Position.yaml +50 -0
- followthemoney/schema/Post.yaml +42 -0
- followthemoney/schema/Project.yaml +27 -0
- followthemoney/schema/ProjectParticipant.yaml +36 -0
- followthemoney/schema/PublicBody.yaml +14 -3
- followthemoney/schema/RealEstate.yaml +19 -3
- followthemoney/schema/Representation.yaml +17 -6
- followthemoney/schema/Sanction.yaml +44 -20
- followthemoney/schema/Security.yaml +59 -0
- followthemoney/schema/Similar.yaml +37 -0
- followthemoney/schema/Succession.yaml +36 -0
- followthemoney/schema/Table.yaml +32 -0
- followthemoney/schema/TaxRoll.yaml +27 -9
- followthemoney/schema/Thing.yaml +69 -13
- followthemoney/schema/Trip.yaml +42 -0
- followthemoney/schema/UnknownLink.yaml +17 -6
- followthemoney/schema/UserAccount.yaml +44 -0
- followthemoney/schema/Value.yaml +5 -1
- followthemoney/schema/Vehicle.yaml +25 -8
- followthemoney/schema/Vessel.yaml +18 -10
- followthemoney/schema/Video.yaml +20 -0
- followthemoney/schema/Workbook.yaml +18 -0
- followthemoney/schema.py +406 -135
- followthemoney/translations/ar/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/ar/LC_MESSAGES/followthemoney.po +2900 -787
- followthemoney/translations/bs/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/bs/LC_MESSAGES/followthemoney.po +2108 -520
- followthemoney/translations/de/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/de/LC_MESSAGES/followthemoney.po +2902 -782
- followthemoney/translations/es/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/es/LC_MESSAGES/followthemoney.po +2893 -779
- followthemoney/translations/fr/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/fr/LC_MESSAGES/followthemoney.po +4362 -0
- followthemoney/translations/fr/followthemoney.po +3861 -0
- followthemoney/translations/messages.pot +3021 -725
- followthemoney/translations/nb/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/nb/LC_MESSAGES/followthemoney.po +3778 -0
- followthemoney/translations/nl/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/nl/LC_MESSAGES/followthemoney.po +3837 -0
- followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.po +3784 -0
- followthemoney/translations/ru/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/ru/LC_MESSAGES/followthemoney.po +2837 -539
- followthemoney/translations/ru/followthemoney.po +4221 -0
- followthemoney/translations/tr/LC_MESSAGES/followthemoney.mo +0 -0
- followthemoney/translations/tr/LC_MESSAGES/followthemoney.po +2073 -491
- followthemoney/types/__init__.py +35 -17
- followthemoney/types/address.py +41 -21
- followthemoney/types/checksum.py +25 -0
- followthemoney/types/common.py +233 -88
- followthemoney/types/country.py +89 -56
- followthemoney/types/date.py +59 -76
- followthemoney/types/email.py +66 -35
- followthemoney/types/entity.py +66 -13
- followthemoney/types/gender.py +66 -0
- followthemoney/types/iban.py +47 -28
- followthemoney/types/identifier.py +49 -22
- followthemoney/types/ip.py +35 -21
- followthemoney/types/json.py +58 -0
- followthemoney/types/language.py +124 -37
- followthemoney/types/mimetype.py +44 -0
- followthemoney/types/name.py +56 -12
- followthemoney/types/number.py +30 -0
- followthemoney/types/phone.py +92 -34
- followthemoney/types/registry.py +52 -0
- followthemoney/types/string.py +43 -0
- followthemoney/types/topic.py +94 -0
- followthemoney/types/url.py +39 -17
- followthemoney/util.py +139 -45
- followthemoney-3.8.0.dist-info/METADATA +153 -0
- followthemoney-3.8.0.dist-info/RECORD +157 -0
- {followthemoney-1.3.7.dist-info → followthemoney-3.8.0.dist-info}/WHEEL +1 -2
- followthemoney-3.8.0.dist-info/entry_points.txt +17 -0
- followthemoney-1.3.7.dist-info/LICENSE.txt → followthemoney-3.8.0.dist-info/licenses/LICENSE +1 -1
- followthemoney/link.py +0 -75
- followthemoney/schema/Associate.yml +0 -19
- followthemoney/schema/Family.yml +0 -19
- followthemoney/schema/Land.yml +0 -9
- followthemoney/schema/Relationship.yaml +0 -26
- followthemoney/types/domain.py +0 -50
- followthemoney-1.3.7.dist-info/DESCRIPTION.rst +0 -3
- followthemoney-1.3.7.dist-info/METADATA +0 -39
- followthemoney-1.3.7.dist-info/RECORD +0 -108
- followthemoney-1.3.7.dist-info/entry_points.txt +0 -3
- followthemoney-1.3.7.dist-info/metadata.json +0 -1
- followthemoney-1.3.7.dist-info/namespace_packages.txt +0 -1
- followthemoney-1.3.7.dist-info/top_level.txt +0 -3
- ns/ontology.py +0 -128
- tests/types/test_addresses.py +0 -24
- tests/types/test_common.py +0 -32
- tests/types/test_countries.py +0 -27
- tests/types/test_dates.py +0 -73
- tests/types/test_domains.py +0 -23
- tests/types/test_emails.py +0 -32
- tests/types/test_entity.py +0 -19
- tests/types/test_iban.py +0 -109
- tests/types/test_identifiers.py +0 -27
- tests/types/test_ip.py +0 -29
- tests/types/test_languages.py +0 -23
- tests/types/test_names.py +0 -33
- tests/types/test_phones.py +0 -24
- tests/types/test_registry.py +0 -14
- tests/types/test_urls.py +0 -23
- {ns → followthemoney/export}/__init__.py +0 -0
- /tests/types/__init__.py → /followthemoney/py.typed +0 -0
|
@@ -1,46 +1,76 @@
|
|
|
1
1
|
import re
|
|
2
2
|
from copy import deepcopy
|
|
3
|
+
from warnings import warn
|
|
3
4
|
from normality import stringify
|
|
4
|
-
from
|
|
5
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, cast
|
|
6
|
+
from banal import keys_values, as_bool
|
|
5
7
|
|
|
8
|
+
from followthemoney.helpers import inline_names
|
|
6
9
|
from followthemoney.exc import InvalidMapping
|
|
10
|
+
from followthemoney.proxy import EntityProxy
|
|
11
|
+
from followthemoney.util import sanitize_text
|
|
12
|
+
from followthemoney.property import Property
|
|
13
|
+
from followthemoney.mapping.source import Record
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
from followthemoney.mapping.query import QueryMapping
|
|
7
17
|
|
|
8
18
|
|
|
9
19
|
class PropertyMapping(object):
|
|
10
20
|
"""Map values from a given record (e.g. a CSV row or SQL result) to the
|
|
11
21
|
schema form."""
|
|
12
|
-
FORMAT_PATTERN = re.compile('{{([^(}})]*)}}')
|
|
13
22
|
|
|
14
|
-
|
|
23
|
+
__slots__ = (
|
|
24
|
+
"query",
|
|
25
|
+
"prop",
|
|
26
|
+
"refs",
|
|
27
|
+
"join",
|
|
28
|
+
"split",
|
|
29
|
+
"entity",
|
|
30
|
+
"format",
|
|
31
|
+
"fuzzy",
|
|
32
|
+
"required",
|
|
33
|
+
"literals",
|
|
34
|
+
"template",
|
|
35
|
+
"replacements",
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
FORMAT_PATTERN = re.compile("{{([^(}})]*)}}")
|
|
39
|
+
|
|
40
|
+
def __init__(
|
|
41
|
+
self, query: "QueryMapping", data: Dict[str, Any], prop: Property
|
|
42
|
+
) -> None:
|
|
15
43
|
self.query = query
|
|
16
44
|
data = deepcopy(data)
|
|
17
|
-
self.
|
|
18
|
-
|
|
19
|
-
self.
|
|
20
|
-
self.
|
|
21
|
-
|
|
22
|
-
self.
|
|
23
|
-
self.
|
|
24
|
-
|
|
25
|
-
self.
|
|
26
|
-
self.literals
|
|
27
|
-
|
|
28
|
-
self.
|
|
29
|
-
self.
|
|
30
|
-
self.entity = data.pop('entity', None)
|
|
31
|
-
self.required = data.pop('required', False)
|
|
32
|
-
|
|
33
|
-
self.template = stringify(data.pop('template', None))
|
|
34
|
-
self.replacements = {}
|
|
45
|
+
self.prop = prop
|
|
46
|
+
|
|
47
|
+
self.refs = cast(List[str], keys_values(data, "column", "columns"))
|
|
48
|
+
self.join = cast(Optional[str], data.pop("join", None))
|
|
49
|
+
self.split = cast(Optional[str], data.pop("split", None))
|
|
50
|
+
self.entity = stringify(data.pop("entity", None))
|
|
51
|
+
self.format = stringify(data.pop("format", None))
|
|
52
|
+
self.fuzzy = as_bool(data.pop("fuzzy", False))
|
|
53
|
+
self.required = as_bool(data.pop("required", False))
|
|
54
|
+
self.literals = cast(List[str], keys_values(data, "literal", "literals"))
|
|
55
|
+
|
|
56
|
+
self.template = sanitize_text(data.pop("template", None))
|
|
57
|
+
self.replacements: Dict[str, str] = {}
|
|
35
58
|
if self.template is not None:
|
|
36
59
|
# this is hacky, trying to generate refs from template
|
|
37
60
|
for ref in self.FORMAT_PATTERN.findall(self.template):
|
|
38
61
|
self.refs.append(ref)
|
|
39
|
-
self.replacements[
|
|
62
|
+
self.replacements["{{%s}}" % ref] = ref
|
|
40
63
|
|
|
41
|
-
def bind(self):
|
|
42
|
-
if self.
|
|
43
|
-
raise InvalidMapping("Property for [%
|
|
64
|
+
def bind(self) -> None:
|
|
65
|
+
if self.prop.stub:
|
|
66
|
+
raise InvalidMapping("Property for [%r] is a stub" % self.prop)
|
|
67
|
+
|
|
68
|
+
if self.prop.deprecated:
|
|
69
|
+
warn(
|
|
70
|
+
"Mapping uses a deprecated property: %r" % self.prop,
|
|
71
|
+
DeprecationWarning,
|
|
72
|
+
stacklevel=2,
|
|
73
|
+
)
|
|
44
74
|
|
|
45
75
|
if self.entity is None:
|
|
46
76
|
return
|
|
@@ -52,44 +82,47 @@ class PropertyMapping(object):
|
|
|
52
82
|
for entity in self.query.entities:
|
|
53
83
|
if entity.name != self.entity:
|
|
54
84
|
continue
|
|
55
|
-
if not entity.schema.is_a(self.
|
|
56
|
-
raise InvalidMapping(
|
|
57
|
-
|
|
85
|
+
if not self.prop.range or not entity.schema.is_a(self.prop.range):
|
|
86
|
+
raise InvalidMapping(
|
|
87
|
+
"The entity [%r] must be a %s (not %s)"
|
|
88
|
+
% (self.prop, self.prop.range, entity.schema.name)
|
|
89
|
+
) # noqa
|
|
58
90
|
return
|
|
59
91
|
|
|
60
|
-
raise InvalidMapping(
|
|
61
|
-
|
|
92
|
+
raise InvalidMapping(
|
|
93
|
+
"No entity [%s] for property [%r]" % (self.entity, self.prop)
|
|
94
|
+
)
|
|
62
95
|
|
|
63
|
-
def record_values(self, record):
|
|
96
|
+
def record_values(self, record: Record) -> List[str]:
|
|
64
97
|
if self.template is not None:
|
|
65
98
|
# replace mentions of any refs with the values present in the
|
|
66
99
|
# current record
|
|
67
100
|
value = self.template
|
|
68
101
|
for repl, ref in self.replacements.items():
|
|
69
|
-
ref_value = record.get(ref) or
|
|
102
|
+
ref_value = record.get(ref) or ""
|
|
70
103
|
value = value.replace(repl, ref_value)
|
|
71
104
|
return [value.strip()]
|
|
72
105
|
|
|
73
106
|
values = list(self.literals)
|
|
74
|
-
|
|
107
|
+
for ref in self.refs:
|
|
108
|
+
rec_value = record.get(ref)
|
|
109
|
+
if rec_value is not None:
|
|
110
|
+
values.append(rec_value)
|
|
75
111
|
return values
|
|
76
112
|
|
|
77
|
-
def map(
|
|
78
|
-
|
|
79
|
-
|
|
113
|
+
def map(
|
|
114
|
+
self, proxy: EntityProxy, record: Record, entities: Dict[str, EntityProxy]
|
|
115
|
+
) -> List[str]:
|
|
80
116
|
if self.entity is not None:
|
|
81
117
|
entity = entities.get(self.entity)
|
|
82
118
|
if entity is not None:
|
|
83
|
-
|
|
119
|
+
proxy.unsafe_add(self.prop, entity.id, cleaned=True)
|
|
120
|
+
inline_names(proxy, entity)
|
|
84
121
|
return []
|
|
85
122
|
|
|
86
123
|
# clean the values returned by the query, or by using literals, or
|
|
87
124
|
# formats.
|
|
88
|
-
values =
|
|
89
|
-
for value in self.record_values(record):
|
|
90
|
-
value = self.type.clean(value, **kwargs)
|
|
91
|
-
if value is not None:
|
|
92
|
-
values.append(value)
|
|
125
|
+
values: List[str] = self.record_values(record)
|
|
93
126
|
|
|
94
127
|
if self.join is not None:
|
|
95
128
|
values = [self.join.join(values)]
|
|
@@ -97,7 +130,20 @@ class PropertyMapping(object):
|
|
|
97
130
|
if self.split is not None:
|
|
98
131
|
splote = []
|
|
99
132
|
for value in values:
|
|
100
|
-
splote
|
|
133
|
+
splote.extend(value.split(self.split))
|
|
101
134
|
values = splote
|
|
102
135
|
|
|
103
|
-
|
|
136
|
+
discarded_values: List[str] = []
|
|
137
|
+
|
|
138
|
+
for value in values:
|
|
139
|
+
added_value = proxy.unsafe_add(
|
|
140
|
+
prop=self.prop,
|
|
141
|
+
value=value,
|
|
142
|
+
fuzzy=self.fuzzy,
|
|
143
|
+
format=self.format,
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
if value is not None and added_value is None:
|
|
147
|
+
discarded_values.append(value)
|
|
148
|
+
|
|
149
|
+
return discarded_values
|
followthemoney/mapping/query.py
CHANGED
|
@@ -1,20 +1,27 @@
|
|
|
1
|
+
from followthemoney.mapping.source import Record, Source
|
|
2
|
+
from typing import TYPE_CHECKING, Any, List, Optional, Set, Dict
|
|
3
|
+
|
|
4
|
+
from followthemoney.proxy import EntityProxy
|
|
1
5
|
from followthemoney.mapping.entity import EntityMapping
|
|
2
6
|
from followthemoney.mapping.sql import SQLSource
|
|
3
7
|
from followthemoney.mapping.csv import CSVSource
|
|
4
8
|
from followthemoney.exc import InvalidMapping
|
|
5
9
|
|
|
10
|
+
if TYPE_CHECKING:
|
|
11
|
+
from followthemoney.model import Model
|
|
6
12
|
|
|
7
|
-
class QueryMapping(object):
|
|
8
13
|
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
self.data = data
|
|
14
|
+
class QueryMapping:
|
|
15
|
+
__slots__ = ("model", "data", "refs", "entities", "source")
|
|
12
16
|
|
|
13
|
-
|
|
14
|
-
self
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
17
|
+
def __init__(
|
|
18
|
+
self, model: "Model", data: Dict[str, Any], key_prefix: Optional[str] = None
|
|
19
|
+
) -> None:
|
|
20
|
+
self.model = model
|
|
21
|
+
self.refs: Set[str] = set()
|
|
22
|
+
self.entities: List[EntityMapping] = []
|
|
23
|
+
for name, edata in data.get("entities", {}).items():
|
|
24
|
+
entity = EntityMapping(model, self, name, edata, key_prefix=key_prefix)
|
|
18
25
|
|
|
19
26
|
self.entities.append(entity)
|
|
20
27
|
self.refs.update(entity.refs)
|
|
@@ -32,7 +39,7 @@ class QueryMapping(object):
|
|
|
32
39
|
# in dependent entities.
|
|
33
40
|
entities = self.entities
|
|
34
41
|
self.entities = []
|
|
35
|
-
resolved = set()
|
|
42
|
+
resolved: Set[str] = set()
|
|
36
43
|
while len(entities) > 0:
|
|
37
44
|
before = len(entities)
|
|
38
45
|
for entity in entities:
|
|
@@ -44,16 +51,17 @@ class QueryMapping(object):
|
|
|
44
51
|
if before == len(entities):
|
|
45
52
|
raise InvalidMapping("Circular entity dependency detected.")
|
|
46
53
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
+
self.source = self._get_source(data)
|
|
55
|
+
|
|
56
|
+
def _get_source(self, data: Dict[str, Any]) -> Source:
|
|
57
|
+
if "database" in data:
|
|
58
|
+
return SQLSource(self, data)
|
|
59
|
+
if "csv_url" in data or "csv_urls" in data:
|
|
60
|
+
return CSVSource(self, data)
|
|
61
|
+
raise InvalidMapping("Cannot determine mapping type: %r" % data)
|
|
54
62
|
|
|
55
|
-
def map(self, record):
|
|
56
|
-
data = {}
|
|
63
|
+
def map(self, record: Record) -> Dict[str, EntityProxy]:
|
|
64
|
+
data: Dict[str, EntityProxy] = {}
|
|
57
65
|
for entity in self.entities:
|
|
58
66
|
mapped = entity.map(record, data)
|
|
59
67
|
if mapped is not None:
|
followthemoney/mapping/source.py
CHANGED
|
@@ -1,11 +1,21 @@
|
|
|
1
|
+
from typing import TYPE_CHECKING, Any, Dict, Generator, Optional, Set, cast
|
|
1
2
|
|
|
3
|
+
if TYPE_CHECKING:
|
|
4
|
+
from followthemoney.mapping.query import QueryMapping
|
|
5
|
+
|
|
6
|
+
Filter = Set[Optional[str]]
|
|
7
|
+
Record = Dict[str, str]
|
|
2
8
|
|
|
3
|
-
class Source(object):
|
|
4
9
|
|
|
5
|
-
|
|
10
|
+
class Source(object):
|
|
11
|
+
def __init__(self, query: "QueryMapping", data: Dict[str, Any]) -> None:
|
|
6
12
|
self.query = query
|
|
7
|
-
self.filters =
|
|
8
|
-
self.filters_not =
|
|
13
|
+
self.filters = cast(Dict[str, Any], data.get("filters", {})).items()
|
|
14
|
+
self.filters_not = cast(Dict[str, Any], data.get("filters_not", {})).items()
|
|
15
|
+
|
|
16
|
+
@property
|
|
17
|
+
def records(self) -> Generator[Record, None, None]:
|
|
18
|
+
raise NotImplementedError
|
|
9
19
|
|
|
10
|
-
def __len__(self):
|
|
20
|
+
def __len__(self) -> int:
|
|
11
21
|
return 0
|
followthemoney/mapping/sql.py
CHANGED
|
@@ -1,18 +1,24 @@
|
|
|
1
1
|
import os
|
|
2
|
-
import six
|
|
3
2
|
import logging
|
|
4
3
|
from uuid import uuid4
|
|
5
|
-
from
|
|
6
|
-
from
|
|
7
|
-
from sqlalchemy import
|
|
8
|
-
from sqlalchemy import select
|
|
9
|
-
|
|
4
|
+
from typing import TYPE_CHECKING, Any, Dict, Generator, List, Optional, Union, cast
|
|
5
|
+
from banal import ensure_list, is_listish, keys_values
|
|
6
|
+
from sqlalchemy import MetaData, func
|
|
7
|
+
from sqlalchemy.future import select
|
|
8
|
+
from sqlalchemy.engine import Engine, create_engine
|
|
9
|
+
from sqlalchemy.sql.elements import Label
|
|
10
10
|
from sqlalchemy.pool import NullPool
|
|
11
11
|
from sqlalchemy.schema import Table
|
|
12
|
+
from sqlalchemy.sql.expression import Select
|
|
12
13
|
|
|
13
|
-
from followthemoney.mapping.source import Source
|
|
14
|
+
from followthemoney.mapping.source import Record, Source
|
|
15
|
+
from followthemoney.util import sanitize_text
|
|
14
16
|
from followthemoney.exc import InvalidMapping
|
|
15
17
|
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
from followthemoney.mapping.query import QueryMapping
|
|
20
|
+
|
|
21
|
+
|
|
16
22
|
log = logging.getLogger(__name__)
|
|
17
23
|
DATA_PAGE = 1000
|
|
18
24
|
|
|
@@ -20,20 +26,22 @@ DATA_PAGE = 1000
|
|
|
20
26
|
class QueryTable(object):
|
|
21
27
|
"""A table to be joined in."""
|
|
22
28
|
|
|
23
|
-
def __init__(
|
|
24
|
-
self
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
self.
|
|
29
|
+
def __init__(
|
|
30
|
+
self, meta: MetaData, engine: Engine, data: Union[str, Dict[str, str]]
|
|
31
|
+
) -> None:
|
|
32
|
+
if isinstance(data, str):
|
|
33
|
+
data = {"table": data}
|
|
34
|
+
table_ref = data.get("table")
|
|
35
|
+
if table_ref is None:
|
|
36
|
+
raise InvalidMapping("Query has no table!")
|
|
37
|
+
alias_ref = data.get("alias", table_ref)
|
|
38
|
+
self.table = Table(table_ref, meta, autoload_with=engine)
|
|
39
|
+
self.alias = self.table.alias(alias_ref)
|
|
40
|
+
|
|
41
|
+
self.refs: Dict[str, Label[Any]] = {}
|
|
34
42
|
for column in self.alias.columns:
|
|
35
|
-
name =
|
|
36
|
-
labeled_column = column.label(
|
|
43
|
+
name = "%s.%s" % (alias_ref, column.name)
|
|
44
|
+
labeled_column = column.label("col_%s" % uuid4().hex[:10])
|
|
37
45
|
self.refs[name] = labeled_column
|
|
38
46
|
self.refs[column.name] = labeled_column
|
|
39
47
|
|
|
@@ -41,70 +49,76 @@ class QueryTable(object):
|
|
|
41
49
|
class SQLSource(Source):
|
|
42
50
|
"""Query mapper for loading data from a SQL query."""
|
|
43
51
|
|
|
44
|
-
def __init__(self, query, data):
|
|
52
|
+
def __init__(self, query: "QueryMapping", data: Dict[str, Any]) -> None:
|
|
45
53
|
super(SQLSource, self).__init__(query, data)
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
self.engine = create_engine(self.database_uri,
|
|
51
|
-
poolclass=NullPool,
|
|
52
|
-
**kwargs)
|
|
54
|
+
database = data.get("database")
|
|
55
|
+
if database is None:
|
|
56
|
+
raise InvalidMapping("No database in SQL mapping!")
|
|
57
|
+
self.database_uri = cast(str, os.path.expandvars(database))
|
|
58
|
+
self.engine = create_engine(self.database_uri, poolclass=NullPool)
|
|
53
59
|
self.meta = MetaData()
|
|
54
|
-
self.meta.bind = self.engine
|
|
55
60
|
|
|
56
|
-
tables =
|
|
57
|
-
tables
|
|
58
|
-
self.
|
|
59
|
-
self.joins = ensure_list(data.get('joins'))
|
|
61
|
+
tables = keys_values(data, "table", "tables")
|
|
62
|
+
self.tables = [QueryTable(self.meta, self.engine, f) for f in tables]
|
|
63
|
+
self.joins = cast(List[Dict[str, str]], ensure_list(data.get("joins")))
|
|
60
64
|
|
|
61
|
-
def get_column(self, ref):
|
|
65
|
+
def get_column(self, ref: Optional[str]) -> Label[Any]:
|
|
62
66
|
for table in self.tables:
|
|
63
67
|
if ref in table.refs:
|
|
64
|
-
return table.refs
|
|
68
|
+
return table.refs[ref]
|
|
65
69
|
raise InvalidMapping("Missing reference: %s" % ref)
|
|
66
70
|
|
|
67
|
-
def apply_filters(self, q):
|
|
71
|
+
def apply_filters(self, q: Select) -> Select:
|
|
68
72
|
for col, val in self.filters:
|
|
69
|
-
|
|
73
|
+
if is_listish(val):
|
|
74
|
+
q = q.where(self.get_column(col).in_(val))
|
|
75
|
+
else:
|
|
76
|
+
q = q.where(self.get_column(col) == val)
|
|
70
77
|
for col, val in self.filters_not:
|
|
71
|
-
|
|
78
|
+
if is_listish(val):
|
|
79
|
+
q = q.where(self.get_column(col).notin_(val))
|
|
80
|
+
else:
|
|
81
|
+
q = q.where(self.get_column(col) != val)
|
|
72
82
|
# not sure this is a great idea:
|
|
73
83
|
# if self.data.get('where'):
|
|
74
84
|
# q = q.where(sql_text(self.data.get('where')))
|
|
75
85
|
for join in self.joins:
|
|
76
|
-
left = self.get_column(join.get(
|
|
77
|
-
right = self.get_column(join.get(
|
|
86
|
+
left = self.get_column(join.get("left"))
|
|
87
|
+
right = self.get_column(join.get("right"))
|
|
78
88
|
q = q.where(left == right)
|
|
79
89
|
return q
|
|
80
90
|
|
|
81
|
-
def compose_query(self):
|
|
82
|
-
from_clause = [t.alias for t in self.tables]
|
|
91
|
+
def compose_query(self) -> Select:
|
|
83
92
|
columns = [self.get_column(r) for r in self.query.refs]
|
|
84
|
-
q = select(columns
|
|
93
|
+
q = select(*columns)
|
|
94
|
+
q = q.select_from(*[t.alias for t in self.tables])
|
|
85
95
|
return self.apply_filters(q)
|
|
86
96
|
|
|
87
97
|
@property
|
|
88
|
-
def records(self):
|
|
98
|
+
def records(self) -> Generator[Record, None, None]:
|
|
89
99
|
"""Compose the actual query and return an iterator of ``Record``."""
|
|
90
100
|
mapping = [(r, self.get_column(r).name) for r in self.query.refs]
|
|
91
101
|
q = self.compose_query()
|
|
92
102
|
log.info("Query: %s", q)
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
data
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
103
|
+
with self.engine.connect() as conn:
|
|
104
|
+
rp = conn.execution_options(stream_results=True).execute(q)
|
|
105
|
+
while True:
|
|
106
|
+
rows = rp.fetchmany(size=DATA_PAGE)
|
|
107
|
+
if not len(rows):
|
|
108
|
+
break
|
|
109
|
+
for row in rows:
|
|
110
|
+
row_map = row._mapping
|
|
111
|
+
data: Record = {}
|
|
112
|
+
for ref, name in mapping:
|
|
113
|
+
value = sanitize_text(row_map[name])
|
|
114
|
+
if value is not None:
|
|
115
|
+
data[ref] = value
|
|
116
|
+
yield data
|
|
117
|
+
|
|
118
|
+
def __len__(self) -> int:
|
|
119
|
+
q = select(func.count("*"))
|
|
120
|
+
q = q.select_from(*[t.alias for t in self.tables])
|
|
108
121
|
q = self.apply_filters(q)
|
|
109
|
-
|
|
110
|
-
|
|
122
|
+
with self.engine.connect() as conn:
|
|
123
|
+
rp = conn.execute(q)
|
|
124
|
+
return int(rp.scalar() or 0)
|
followthemoney/messages.py
CHANGED
|
@@ -1,19 +1,25 @@
|
|
|
1
|
-
import six
|
|
2
1
|
import yaml
|
|
2
|
+
from typing import Any, Dict, Generator, List, TextIO, Tuple
|
|
3
3
|
|
|
4
|
+
Message = Tuple[Any, Any, List[str], List[str]]
|
|
4
5
|
|
|
5
|
-
|
|
6
|
+
|
|
7
|
+
def extract_object(
|
|
8
|
+
data: Dict[str, Any], path: List[str]
|
|
9
|
+
) -> Generator[Message, None, None]:
|
|
6
10
|
for key, value in data.items():
|
|
7
11
|
subpath = path + [key]
|
|
8
|
-
if isinstance(value,
|
|
9
|
-
if key in [
|
|
10
|
-
comment =
|
|
12
|
+
if isinstance(value, str):
|
|
13
|
+
if key in ["label", "reverse", "description", "plural"]:
|
|
14
|
+
comment = ".".join(subpath)
|
|
11
15
|
yield (None, None, [value], [comment])
|
|
12
16
|
if isinstance(value, dict):
|
|
13
17
|
for res in extract_object(value, subpath):
|
|
14
18
|
yield res
|
|
15
19
|
|
|
16
20
|
|
|
17
|
-
def extract_yaml(
|
|
18
|
-
|
|
21
|
+
def extract_yaml(
|
|
22
|
+
fileobj: TextIO, keywords: Any, comment_tags: Any, options: Any
|
|
23
|
+
) -> Generator[Message, None, None]:
|
|
24
|
+
data = yaml.safe_load(fileobj)
|
|
19
25
|
return extract_object(data, [])
|