followthemoney 1.3.7__py3-none-any.whl → 3.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. followthemoney/__init__.py +5 -3
  2. followthemoney/cli/__init__.py +17 -0
  3. followthemoney/cli/aggregate.py +56 -0
  4. followthemoney/cli/cli.py +88 -0
  5. followthemoney/cli/exports.py +121 -0
  6. followthemoney/cli/mapping.py +85 -0
  7. followthemoney/cli/sieve.py +67 -0
  8. followthemoney/cli/util.py +142 -0
  9. followthemoney/compare.py +130 -60
  10. followthemoney/exc.py +19 -6
  11. followthemoney/export/common.py +29 -0
  12. followthemoney/export/csv.py +82 -0
  13. followthemoney/export/excel.py +75 -0
  14. followthemoney/export/graph.py +79 -0
  15. followthemoney/export/neo4j.py +182 -0
  16. followthemoney/export/rdf.py +26 -0
  17. followthemoney/graph.py +308 -0
  18. followthemoney/helpers.py +212 -0
  19. followthemoney/mapping/__init__.py +1 -1
  20. followthemoney/mapping/csv.py +67 -35
  21. followthemoney/mapping/entity.py +116 -44
  22. followthemoney/mapping/property.py +90 -44
  23. followthemoney/mapping/query.py +27 -19
  24. followthemoney/mapping/source.py +15 -5
  25. followthemoney/mapping/sql.py +75 -61
  26. followthemoney/messages.py +13 -7
  27. followthemoney/model.py +108 -56
  28. followthemoney/namespace.py +119 -0
  29. followthemoney/offshore.py +48 -0
  30. followthemoney/ontology.py +77 -0
  31. followthemoney/property.py +204 -71
  32. followthemoney/proxy.py +455 -118
  33. followthemoney/rdf.py +9 -0
  34. followthemoney/schema/Address.yaml +78 -0
  35. followthemoney/schema/Airplane.yaml +17 -10
  36. followthemoney/schema/Analyzable.yaml +54 -0
  37. followthemoney/schema/Article.yaml +16 -0
  38. followthemoney/schema/Assessment.yaml +32 -0
  39. followthemoney/schema/Asset.yaml +10 -4
  40. followthemoney/schema/Associate.yaml +41 -0
  41. followthemoney/schema/Audio.yaml +24 -0
  42. followthemoney/schema/BankAccount.yaml +53 -9
  43. followthemoney/schema/Call.yaml +48 -0
  44. followthemoney/schema/CallForTenders.yaml +117 -0
  45. followthemoney/schema/Company.yaml +37 -12
  46. followthemoney/schema/Contract.yaml +41 -7
  47. followthemoney/schema/ContractAward.yaml +30 -11
  48. followthemoney/schema/CourtCase.yaml +16 -10
  49. followthemoney/schema/CourtCaseParty.yaml +17 -6
  50. followthemoney/schema/CryptoWallet.yaml +48 -0
  51. followthemoney/schema/Debt.yaml +37 -0
  52. followthemoney/schema/Directorship.yaml +17 -4
  53. followthemoney/schema/Document.yaml +72 -139
  54. followthemoney/schema/Documentation.yml +38 -0
  55. followthemoney/schema/EconomicActivity.yaml +32 -17
  56. followthemoney/schema/Email.yaml +76 -0
  57. followthemoney/schema/Employment.yaml +39 -0
  58. followthemoney/schema/Event.yaml +35 -3
  59. followthemoney/schema/Family.yaml +41 -0
  60. followthemoney/schema/Folder.yaml +13 -0
  61. followthemoney/schema/HyperText.yaml +21 -0
  62. followthemoney/schema/Identification.yaml +40 -0
  63. followthemoney/schema/Image.yaml +25 -0
  64. followthemoney/schema/Interest.yaml +3 -6
  65. followthemoney/schema/Interval.yaml +56 -5
  66. followthemoney/schema/LegalEntity.yaml +81 -20
  67. followthemoney/schema/License.yaml +7 -3
  68. followthemoney/schema/Membership.yaml +19 -4
  69. followthemoney/schema/Mention.yaml +54 -0
  70. followthemoney/schema/Message.yaml +73 -0
  71. followthemoney/schema/Note.yaml +23 -0
  72. followthemoney/schema/Occupancy.yaml +40 -0
  73. followthemoney/schema/Organization.yaml +38 -3
  74. followthemoney/schema/Ownership.yaml +16 -4
  75. followthemoney/schema/Package.yaml +17 -0
  76. followthemoney/schema/Page.yaml +43 -0
  77. followthemoney/schema/Pages.yaml +23 -0
  78. followthemoney/schema/Passport.yaml +15 -17
  79. followthemoney/schema/Payment.yaml +38 -7
  80. followthemoney/schema/Person.yaml +61 -5
  81. followthemoney/schema/PlainText.yaml +17 -0
  82. followthemoney/schema/Position.yaml +50 -0
  83. followthemoney/schema/Post.yaml +42 -0
  84. followthemoney/schema/Project.yaml +27 -0
  85. followthemoney/schema/ProjectParticipant.yaml +36 -0
  86. followthemoney/schema/PublicBody.yaml +14 -3
  87. followthemoney/schema/RealEstate.yaml +19 -3
  88. followthemoney/schema/Representation.yaml +17 -6
  89. followthemoney/schema/Sanction.yaml +44 -20
  90. followthemoney/schema/Security.yaml +59 -0
  91. followthemoney/schema/Similar.yaml +37 -0
  92. followthemoney/schema/Succession.yaml +36 -0
  93. followthemoney/schema/Table.yaml +32 -0
  94. followthemoney/schema/TaxRoll.yaml +27 -9
  95. followthemoney/schema/Thing.yaml +69 -13
  96. followthemoney/schema/Trip.yaml +42 -0
  97. followthemoney/schema/UnknownLink.yaml +17 -6
  98. followthemoney/schema/UserAccount.yaml +44 -0
  99. followthemoney/schema/Value.yaml +5 -1
  100. followthemoney/schema/Vehicle.yaml +25 -8
  101. followthemoney/schema/Vessel.yaml +18 -10
  102. followthemoney/schema/Video.yaml +20 -0
  103. followthemoney/schema/Workbook.yaml +18 -0
  104. followthemoney/schema.py +406 -135
  105. followthemoney/translations/ar/LC_MESSAGES/followthemoney.mo +0 -0
  106. followthemoney/translations/ar/LC_MESSAGES/followthemoney.po +2900 -787
  107. followthemoney/translations/bs/LC_MESSAGES/followthemoney.mo +0 -0
  108. followthemoney/translations/bs/LC_MESSAGES/followthemoney.po +2108 -520
  109. followthemoney/translations/de/LC_MESSAGES/followthemoney.mo +0 -0
  110. followthemoney/translations/de/LC_MESSAGES/followthemoney.po +2902 -782
  111. followthemoney/translations/es/LC_MESSAGES/followthemoney.mo +0 -0
  112. followthemoney/translations/es/LC_MESSAGES/followthemoney.po +2893 -779
  113. followthemoney/translations/fr/LC_MESSAGES/followthemoney.mo +0 -0
  114. followthemoney/translations/fr/LC_MESSAGES/followthemoney.po +4362 -0
  115. followthemoney/translations/fr/followthemoney.po +3861 -0
  116. followthemoney/translations/messages.pot +3021 -725
  117. followthemoney/translations/nb/LC_MESSAGES/followthemoney.mo +0 -0
  118. followthemoney/translations/nb/LC_MESSAGES/followthemoney.po +3778 -0
  119. followthemoney/translations/nl/LC_MESSAGES/followthemoney.mo +0 -0
  120. followthemoney/translations/nl/LC_MESSAGES/followthemoney.po +3837 -0
  121. followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.mo +0 -0
  122. followthemoney/translations/pt_BR/LC_MESSAGES/followthemoney.po +3784 -0
  123. followthemoney/translations/ru/LC_MESSAGES/followthemoney.mo +0 -0
  124. followthemoney/translations/ru/LC_MESSAGES/followthemoney.po +2837 -539
  125. followthemoney/translations/ru/followthemoney.po +4221 -0
  126. followthemoney/translations/tr/LC_MESSAGES/followthemoney.mo +0 -0
  127. followthemoney/translations/tr/LC_MESSAGES/followthemoney.po +2073 -491
  128. followthemoney/types/__init__.py +35 -17
  129. followthemoney/types/address.py +41 -21
  130. followthemoney/types/checksum.py +25 -0
  131. followthemoney/types/common.py +233 -88
  132. followthemoney/types/country.py +89 -56
  133. followthemoney/types/date.py +59 -76
  134. followthemoney/types/email.py +66 -35
  135. followthemoney/types/entity.py +66 -13
  136. followthemoney/types/gender.py +66 -0
  137. followthemoney/types/iban.py +47 -28
  138. followthemoney/types/identifier.py +49 -22
  139. followthemoney/types/ip.py +35 -21
  140. followthemoney/types/json.py +58 -0
  141. followthemoney/types/language.py +124 -37
  142. followthemoney/types/mimetype.py +44 -0
  143. followthemoney/types/name.py +56 -12
  144. followthemoney/types/number.py +30 -0
  145. followthemoney/types/phone.py +92 -34
  146. followthemoney/types/registry.py +52 -0
  147. followthemoney/types/string.py +43 -0
  148. followthemoney/types/topic.py +94 -0
  149. followthemoney/types/url.py +39 -17
  150. followthemoney/util.py +139 -45
  151. followthemoney-3.8.0.dist-info/METADATA +153 -0
  152. followthemoney-3.8.0.dist-info/RECORD +157 -0
  153. {followthemoney-1.3.7.dist-info → followthemoney-3.8.0.dist-info}/WHEEL +1 -2
  154. followthemoney-3.8.0.dist-info/entry_points.txt +17 -0
  155. followthemoney-1.3.7.dist-info/LICENSE.txt → followthemoney-3.8.0.dist-info/licenses/LICENSE +1 -1
  156. followthemoney/link.py +0 -75
  157. followthemoney/schema/Associate.yml +0 -19
  158. followthemoney/schema/Family.yml +0 -19
  159. followthemoney/schema/Land.yml +0 -9
  160. followthemoney/schema/Relationship.yaml +0 -26
  161. followthemoney/types/domain.py +0 -50
  162. followthemoney-1.3.7.dist-info/DESCRIPTION.rst +0 -3
  163. followthemoney-1.3.7.dist-info/METADATA +0 -39
  164. followthemoney-1.3.7.dist-info/RECORD +0 -108
  165. followthemoney-1.3.7.dist-info/entry_points.txt +0 -3
  166. followthemoney-1.3.7.dist-info/metadata.json +0 -1
  167. followthemoney-1.3.7.dist-info/namespace_packages.txt +0 -1
  168. followthemoney-1.3.7.dist-info/top_level.txt +0 -3
  169. ns/ontology.py +0 -128
  170. tests/types/test_addresses.py +0 -24
  171. tests/types/test_common.py +0 -32
  172. tests/types/test_countries.py +0 -27
  173. tests/types/test_dates.py +0 -73
  174. tests/types/test_domains.py +0 -23
  175. tests/types/test_emails.py +0 -32
  176. tests/types/test_entity.py +0 -19
  177. tests/types/test_iban.py +0 -109
  178. tests/types/test_identifiers.py +0 -27
  179. tests/types/test_ip.py +0 -29
  180. tests/types/test_languages.py +0 -23
  181. tests/types/test_names.py +0 -33
  182. tests/types/test_phones.py +0 -24
  183. tests/types/test_registry.py +0 -14
  184. tests/types/test_urls.py +0 -23
  185. {ns → followthemoney/export}/__init__.py +0 -0
  186. /tests/types/__init__.py → /followthemoney/py.typed +0 -0
@@ -1,46 +1,76 @@
1
1
  import re
2
2
  from copy import deepcopy
3
+ from warnings import warn
3
4
  from normality import stringify
4
- from banal import unique_list, ensure_list
5
+ from typing import TYPE_CHECKING, Any, Dict, List, Optional, cast
6
+ from banal import keys_values, as_bool
5
7
 
8
+ from followthemoney.helpers import inline_names
6
9
  from followthemoney.exc import InvalidMapping
10
+ from followthemoney.proxy import EntityProxy
11
+ from followthemoney.util import sanitize_text
12
+ from followthemoney.property import Property
13
+ from followthemoney.mapping.source import Record
14
+
15
+ if TYPE_CHECKING:
16
+ from followthemoney.mapping.query import QueryMapping
7
17
 
8
18
 
9
19
  class PropertyMapping(object):
10
20
  """Map values from a given record (e.g. a CSV row or SQL result) to the
11
21
  schema form."""
12
- FORMAT_PATTERN = re.compile('{{([^(}})]*)}}')
13
22
 
14
- def __init__(self, query, data, schema):
23
+ __slots__ = (
24
+ "query",
25
+ "prop",
26
+ "refs",
27
+ "join",
28
+ "split",
29
+ "entity",
30
+ "format",
31
+ "fuzzy",
32
+ "required",
33
+ "literals",
34
+ "template",
35
+ "replacements",
36
+ )
37
+
38
+ FORMAT_PATTERN = re.compile("{{([^(}})]*)}}")
39
+
40
+ def __init__(
41
+ self, query: "QueryMapping", data: Dict[str, Any], prop: Property
42
+ ) -> None:
15
43
  self.query = query
16
44
  data = deepcopy(data)
17
- self.data = data
18
- self.schema = schema
19
- self.name = schema.name
20
- self.type = schema.type
21
-
22
- self.refs = ensure_list(data.pop('column', []))
23
- self.refs.extend(ensure_list(data.pop('columns', [])))
24
-
25
- self.literals = ensure_list(data.pop('literal', []))
26
- self.literals.extend(ensure_list(data.pop('literals', [])))
27
-
28
- self.join = data.pop('join', None)
29
- self.split = data.pop('split', None)
30
- self.entity = data.pop('entity', None)
31
- self.required = data.pop('required', False)
32
-
33
- self.template = stringify(data.pop('template', None))
34
- self.replacements = {}
45
+ self.prop = prop
46
+
47
+ self.refs = cast(List[str], keys_values(data, "column", "columns"))
48
+ self.join = cast(Optional[str], data.pop("join", None))
49
+ self.split = cast(Optional[str], data.pop("split", None))
50
+ self.entity = stringify(data.pop("entity", None))
51
+ self.format = stringify(data.pop("format", None))
52
+ self.fuzzy = as_bool(data.pop("fuzzy", False))
53
+ self.required = as_bool(data.pop("required", False))
54
+ self.literals = cast(List[str], keys_values(data, "literal", "literals"))
55
+
56
+ self.template = sanitize_text(data.pop("template", None))
57
+ self.replacements: Dict[str, str] = {}
35
58
  if self.template is not None:
36
59
  # this is hacky, trying to generate refs from template
37
60
  for ref in self.FORMAT_PATTERN.findall(self.template):
38
61
  self.refs.append(ref)
39
- self.replacements['{{%s}}' % ref] = ref
62
+ self.replacements["{{%s}}" % ref] = ref
40
63
 
41
- def bind(self):
42
- if self.schema.stub:
43
- raise InvalidMapping("Property for [%s] is a stub" % self.name)
64
+ def bind(self) -> None:
65
+ if self.prop.stub:
66
+ raise InvalidMapping("Property for [%r] is a stub" % self.prop)
67
+
68
+ if self.prop.deprecated:
69
+ warn(
70
+ "Mapping uses a deprecated property: %r" % self.prop,
71
+ DeprecationWarning,
72
+ stacklevel=2,
73
+ )
44
74
 
45
75
  if self.entity is None:
46
76
  return
@@ -52,44 +82,47 @@ class PropertyMapping(object):
52
82
  for entity in self.query.entities:
53
83
  if entity.name != self.entity:
54
84
  continue
55
- if not entity.schema.is_a(self.schema.range):
56
- raise InvalidMapping("The entity [%s] must be a %s (not %s)" %
57
- (self.name, self.schema.range, entity.schema.name)) # noqa
85
+ if not self.prop.range or not entity.schema.is_a(self.prop.range):
86
+ raise InvalidMapping(
87
+ "The entity [%r] must be a %s (not %s)"
88
+ % (self.prop, self.prop.range, entity.schema.name)
89
+ ) # noqa
58
90
  return
59
91
 
60
- raise InvalidMapping("No entity [%s] for property [%s]"
61
- % (self.entity, self.name))
92
+ raise InvalidMapping(
93
+ "No entity [%s] for property [%r]" % (self.entity, self.prop)
94
+ )
62
95
 
63
- def record_values(self, record):
96
+ def record_values(self, record: Record) -> List[str]:
64
97
  if self.template is not None:
65
98
  # replace mentions of any refs with the values present in the
66
99
  # current record
67
100
  value = self.template
68
101
  for repl, ref in self.replacements.items():
69
- ref_value = record.get(ref) or ''
102
+ ref_value = record.get(ref) or ""
70
103
  value = value.replace(repl, ref_value)
71
104
  return [value.strip()]
72
105
 
73
106
  values = list(self.literals)
74
- values.extend([record.get(r) for r in self.refs])
107
+ for ref in self.refs:
108
+ rec_value = record.get(ref)
109
+ if rec_value is not None:
110
+ values.append(rec_value)
75
111
  return values
76
112
 
77
- def map(self, record, entities, **kwargs):
78
- kwargs.update(self.data)
79
-
113
+ def map(
114
+ self, proxy: EntityProxy, record: Record, entities: Dict[str, EntityProxy]
115
+ ) -> List[str]:
80
116
  if self.entity is not None:
81
117
  entity = entities.get(self.entity)
82
118
  if entity is not None:
83
- return [entity.id]
119
+ proxy.unsafe_add(self.prop, entity.id, cleaned=True)
120
+ inline_names(proxy, entity)
84
121
  return []
85
122
 
86
123
  # clean the values returned by the query, or by using literals, or
87
124
  # formats.
88
- values = []
89
- for value in self.record_values(record):
90
- value = self.type.clean(value, **kwargs)
91
- if value is not None:
92
- values.append(value)
125
+ values: List[str] = self.record_values(record)
93
126
 
94
127
  if self.join is not None:
95
128
  values = [self.join.join(values)]
@@ -97,7 +130,20 @@ class PropertyMapping(object):
97
130
  if self.split is not None:
98
131
  splote = []
99
132
  for value in values:
100
- splote = splote + value.split(self.split)
133
+ splote.extend(value.split(self.split))
101
134
  values = splote
102
135
 
103
- return unique_list(values)
136
+ discarded_values: List[str] = []
137
+
138
+ for value in values:
139
+ added_value = proxy.unsafe_add(
140
+ prop=self.prop,
141
+ value=value,
142
+ fuzzy=self.fuzzy,
143
+ format=self.format,
144
+ )
145
+
146
+ if value is not None and added_value is None:
147
+ discarded_values.append(value)
148
+
149
+ return discarded_values
@@ -1,20 +1,27 @@
1
+ from followthemoney.mapping.source import Record, Source
2
+ from typing import TYPE_CHECKING, Any, List, Optional, Set, Dict
3
+
4
+ from followthemoney.proxy import EntityProxy
1
5
  from followthemoney.mapping.entity import EntityMapping
2
6
  from followthemoney.mapping.sql import SQLSource
3
7
  from followthemoney.mapping.csv import CSVSource
4
8
  from followthemoney.exc import InvalidMapping
5
9
 
10
+ if TYPE_CHECKING:
11
+ from followthemoney.model import Model
6
12
 
7
- class QueryMapping(object):
8
13
 
9
- def __init__(self, model, data, key_prefix=None):
10
- self.model = model
11
- self.data = data
14
+ class QueryMapping:
15
+ __slots__ = ("model", "data", "refs", "entities", "source")
12
16
 
13
- self.refs = set()
14
- self.entities = []
15
- for name, data in data.get('entities', {}).items():
16
- entity = EntityMapping(model, self, name, data,
17
- key_prefix=key_prefix)
17
+ def __init__(
18
+ self, model: "Model", data: Dict[str, Any], key_prefix: Optional[str] = None
19
+ ) -> None:
20
+ self.model = model
21
+ self.refs: Set[str] = set()
22
+ self.entities: List[EntityMapping] = []
23
+ for name, edata in data.get("entities", {}).items():
24
+ entity = EntityMapping(model, self, name, edata, key_prefix=key_prefix)
18
25
 
19
26
  self.entities.append(entity)
20
27
  self.refs.update(entity.refs)
@@ -32,7 +39,7 @@ class QueryMapping(object):
32
39
  # in dependent entities.
33
40
  entities = self.entities
34
41
  self.entities = []
35
- resolved = set()
42
+ resolved: Set[str] = set()
36
43
  while len(entities) > 0:
37
44
  before = len(entities)
38
45
  for entity in entities:
@@ -44,16 +51,17 @@ class QueryMapping(object):
44
51
  if before == len(entities):
45
52
  raise InvalidMapping("Circular entity dependency detected.")
46
53
 
47
- @property
48
- def source(self):
49
- if 'database' in self.data:
50
- return SQLSource(self, self.data)
51
- elif 'csv_url' in self.data or 'csv_urls' in self.data:
52
- return CSVSource(self, self.data)
53
- raise InvalidMapping("Cannot determine mapping type")
54
+ self.source = self._get_source(data)
55
+
56
+ def _get_source(self, data: Dict[str, Any]) -> Source:
57
+ if "database" in data:
58
+ return SQLSource(self, data)
59
+ if "csv_url" in data or "csv_urls" in data:
60
+ return CSVSource(self, data)
61
+ raise InvalidMapping("Cannot determine mapping type: %r" % data)
54
62
 
55
- def map(self, record):
56
- data = {}
63
+ def map(self, record: Record) -> Dict[str, EntityProxy]:
64
+ data: Dict[str, EntityProxy] = {}
57
65
  for entity in self.entities:
58
66
  mapped = entity.map(record, data)
59
67
  if mapped is not None:
@@ -1,11 +1,21 @@
1
+ from typing import TYPE_CHECKING, Any, Dict, Generator, Optional, Set, cast
1
2
 
3
+ if TYPE_CHECKING:
4
+ from followthemoney.mapping.query import QueryMapping
5
+
6
+ Filter = Set[Optional[str]]
7
+ Record = Dict[str, str]
2
8
 
3
- class Source(object):
4
9
 
5
- def __init__(self, query, data):
10
+ class Source(object):
11
+ def __init__(self, query: "QueryMapping", data: Dict[str, Any]) -> None:
6
12
  self.query = query
7
- self.filters = query.data.get('filters', {}).items()
8
- self.filters_not = query.data.get('filters_not', {}).items()
13
+ self.filters = cast(Dict[str, Any], data.get("filters", {})).items()
14
+ self.filters_not = cast(Dict[str, Any], data.get("filters_not", {})).items()
15
+
16
+ @property
17
+ def records(self) -> Generator[Record, None, None]:
18
+ raise NotImplementedError
9
19
 
10
- def __len__(self):
20
+ def __len__(self) -> int:
11
21
  return 0
@@ -1,18 +1,24 @@
1
1
  import os
2
- import six
3
2
  import logging
4
3
  from uuid import uuid4
5
- from banal import ensure_list
6
- from normality import stringify
7
- from sqlalchemy import create_engine, MetaData
8
- from sqlalchemy import select, func
9
- # from sqlalchemy import text as sql_text
4
+ from typing import TYPE_CHECKING, Any, Dict, Generator, List, Optional, Union, cast
5
+ from banal import ensure_list, is_listish, keys_values
6
+ from sqlalchemy import MetaData, func
7
+ from sqlalchemy.future import select
8
+ from sqlalchemy.engine import Engine, create_engine
9
+ from sqlalchemy.sql.elements import Label
10
10
  from sqlalchemy.pool import NullPool
11
11
  from sqlalchemy.schema import Table
12
+ from sqlalchemy.sql.expression import Select
12
13
 
13
- from followthemoney.mapping.source import Source
14
+ from followthemoney.mapping.source import Record, Source
15
+ from followthemoney.util import sanitize_text
14
16
  from followthemoney.exc import InvalidMapping
15
17
 
18
+ if TYPE_CHECKING:
19
+ from followthemoney.mapping.query import QueryMapping
20
+
21
+
16
22
  log = logging.getLogger(__name__)
17
23
  DATA_PAGE = 1000
18
24
 
@@ -20,20 +26,22 @@ DATA_PAGE = 1000
20
26
  class QueryTable(object):
21
27
  """A table to be joined in."""
22
28
 
23
- def __init__(self, query, data):
24
- self.query = query
25
- if isinstance(data, six.string_types):
26
- data = {'table': data}
27
- self.data = data
28
- self.table_ref = data.get('table')
29
- self.alias_ref = data.get('alias', self.table_ref)
30
- self.table = Table(self.table_ref, self.query.meta, autoload=True)
31
- self.alias = self.table.alias(self.alias_ref)
32
-
33
- self.refs = {}
29
+ def __init__(
30
+ self, meta: MetaData, engine: Engine, data: Union[str, Dict[str, str]]
31
+ ) -> None:
32
+ if isinstance(data, str):
33
+ data = {"table": data}
34
+ table_ref = data.get("table")
35
+ if table_ref is None:
36
+ raise InvalidMapping("Query has no table!")
37
+ alias_ref = data.get("alias", table_ref)
38
+ self.table = Table(table_ref, meta, autoload_with=engine)
39
+ self.alias = self.table.alias(alias_ref)
40
+
41
+ self.refs: Dict[str, Label[Any]] = {}
34
42
  for column in self.alias.columns:
35
- name = '%s.%s' % (self.alias_ref, column.name)
36
- labeled_column = column.label('col_%s' % uuid4().hex[:10])
43
+ name = "%s.%s" % (alias_ref, column.name)
44
+ labeled_column = column.label("col_%s" % uuid4().hex[:10])
37
45
  self.refs[name] = labeled_column
38
46
  self.refs[column.name] = labeled_column
39
47
 
@@ -41,70 +49,76 @@ class QueryTable(object):
41
49
  class SQLSource(Source):
42
50
  """Query mapper for loading data from a SQL query."""
43
51
 
44
- def __init__(self, query, data):
52
+ def __init__(self, query: "QueryMapping", data: Dict[str, Any]) -> None:
45
53
  super(SQLSource, self).__init__(query, data)
46
- self.database_uri = os.path.expandvars(data.get('database'))
47
- kwargs = {}
48
- if self.database_uri.lower().startswith('postgres'):
49
- kwargs['server_side_cursors'] = True
50
- self.engine = create_engine(self.database_uri,
51
- poolclass=NullPool,
52
- **kwargs)
54
+ database = data.get("database")
55
+ if database is None:
56
+ raise InvalidMapping("No database in SQL mapping!")
57
+ self.database_uri = cast(str, os.path.expandvars(database))
58
+ self.engine = create_engine(self.database_uri, poolclass=NullPool)
53
59
  self.meta = MetaData()
54
- self.meta.bind = self.engine
55
60
 
56
- tables = ensure_list(data.get('table'))
57
- tables.extend(ensure_list(data.get('tables')))
58
- self.tables = [QueryTable(self, f) for f in tables]
59
- self.joins = ensure_list(data.get('joins'))
61
+ tables = keys_values(data, "table", "tables")
62
+ self.tables = [QueryTable(self.meta, self.engine, f) for f in tables]
63
+ self.joins = cast(List[Dict[str, str]], ensure_list(data.get("joins")))
60
64
 
61
- def get_column(self, ref):
65
+ def get_column(self, ref: Optional[str]) -> Label[Any]:
62
66
  for table in self.tables:
63
67
  if ref in table.refs:
64
- return table.refs.get(ref)
68
+ return table.refs[ref]
65
69
  raise InvalidMapping("Missing reference: %s" % ref)
66
70
 
67
- def apply_filters(self, q):
71
+ def apply_filters(self, q: Select) -> Select:
68
72
  for col, val in self.filters:
69
- q = q.where(self.get_column(col) == val)
73
+ if is_listish(val):
74
+ q = q.where(self.get_column(col).in_(val))
75
+ else:
76
+ q = q.where(self.get_column(col) == val)
70
77
  for col, val in self.filters_not:
71
- q = q.where(self.get_column(col) != val)
78
+ if is_listish(val):
79
+ q = q.where(self.get_column(col).notin_(val))
80
+ else:
81
+ q = q.where(self.get_column(col) != val)
72
82
  # not sure this is a great idea:
73
83
  # if self.data.get('where'):
74
84
  # q = q.where(sql_text(self.data.get('where')))
75
85
  for join in self.joins:
76
- left = self.get_column(join.get('left'))
77
- right = self.get_column(join.get('right'))
86
+ left = self.get_column(join.get("left"))
87
+ right = self.get_column(join.get("right"))
78
88
  q = q.where(left == right)
79
89
  return q
80
90
 
81
- def compose_query(self):
82
- from_clause = [t.alias for t in self.tables]
91
+ def compose_query(self) -> Select:
83
92
  columns = [self.get_column(r) for r in self.query.refs]
84
- q = select(columns=columns, from_obj=from_clause, use_labels=True)
93
+ q = select(*columns)
94
+ q = q.select_from(*[t.alias for t in self.tables])
85
95
  return self.apply_filters(q)
86
96
 
87
97
  @property
88
- def records(self):
98
+ def records(self) -> Generator[Record, None, None]:
89
99
  """Compose the actual query and return an iterator of ``Record``."""
90
100
  mapping = [(r, self.get_column(r).name) for r in self.query.refs]
91
101
  q = self.compose_query()
92
102
  log.info("Query: %s", q)
93
- rp = self.engine.execute(q)
94
- while True:
95
- rows = rp.fetchmany(size=DATA_PAGE)
96
- if not len(rows):
97
- break
98
- for row in rows:
99
- data = {}
100
- for ref, name in mapping:
101
- data[ref] = stringify(row[name])
102
- yield data
103
-
104
- def __len__(self):
105
- from_clause = [t.alias for t in self.tables]
106
- columns = [func.count('*')]
107
- q = select(columns=columns, from_obj=from_clause, use_labels=True)
103
+ with self.engine.connect() as conn:
104
+ rp = conn.execution_options(stream_results=True).execute(q)
105
+ while True:
106
+ rows = rp.fetchmany(size=DATA_PAGE)
107
+ if not len(rows):
108
+ break
109
+ for row in rows:
110
+ row_map = row._mapping
111
+ data: Record = {}
112
+ for ref, name in mapping:
113
+ value = sanitize_text(row_map[name])
114
+ if value is not None:
115
+ data[ref] = value
116
+ yield data
117
+
118
+ def __len__(self) -> int:
119
+ q = select(func.count("*"))
120
+ q = q.select_from(*[t.alias for t in self.tables])
108
121
  q = self.apply_filters(q)
109
- rp = self.engine.execute(q)
110
- return rp.scalar()
122
+ with self.engine.connect() as conn:
123
+ rp = conn.execute(q)
124
+ return int(rp.scalar() or 0)
@@ -1,19 +1,25 @@
1
- import six
2
1
  import yaml
2
+ from typing import Any, Dict, Generator, List, TextIO, Tuple
3
3
 
4
+ Message = Tuple[Any, Any, List[str], List[str]]
4
5
 
5
- def extract_object(data, path):
6
+
7
+ def extract_object(
8
+ data: Dict[str, Any], path: List[str]
9
+ ) -> Generator[Message, None, None]:
6
10
  for key, value in data.items():
7
11
  subpath = path + [key]
8
- if isinstance(value, six.string_types):
9
- if key in ['label', 'reverse', 'description', 'plural']:
10
- comment = '.'.join(subpath)
12
+ if isinstance(value, str):
13
+ if key in ["label", "reverse", "description", "plural"]:
14
+ comment = ".".join(subpath)
11
15
  yield (None, None, [value], [comment])
12
16
  if isinstance(value, dict):
13
17
  for res in extract_object(value, subpath):
14
18
  yield res
15
19
 
16
20
 
17
- def extract_yaml(fileobj, keywords, comment_tags, options):
18
- data = yaml.load(fileobj)
21
+ def extract_yaml(
22
+ fileobj: TextIO, keywords: Any, comment_tags: Any, options: Any
23
+ ) -> Generator[Message, None, None]:
24
+ data = yaml.safe_load(fileobj)
19
25
  return extract_object(data, [])