hotglue-singer-sdk 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hotglue_singer_sdk/__init__.py +34 -0
- hotglue_singer_sdk/authenticators.py +554 -0
- hotglue_singer_sdk/cli/__init__.py +1 -0
- hotglue_singer_sdk/cli/common_options.py +37 -0
- hotglue_singer_sdk/configuration/__init__.py +1 -0
- hotglue_singer_sdk/configuration/_dict_config.py +101 -0
- hotglue_singer_sdk/exceptions.py +52 -0
- hotglue_singer_sdk/helpers/__init__.py +1 -0
- hotglue_singer_sdk/helpers/_catalog.py +122 -0
- hotglue_singer_sdk/helpers/_classproperty.py +18 -0
- hotglue_singer_sdk/helpers/_compat.py +15 -0
- hotglue_singer_sdk/helpers/_flattening.py +374 -0
- hotglue_singer_sdk/helpers/_schema.py +100 -0
- hotglue_singer_sdk/helpers/_secrets.py +41 -0
- hotglue_singer_sdk/helpers/_simpleeval.py +678 -0
- hotglue_singer_sdk/helpers/_singer.py +280 -0
- hotglue_singer_sdk/helpers/_state.py +282 -0
- hotglue_singer_sdk/helpers/_typing.py +231 -0
- hotglue_singer_sdk/helpers/_util.py +27 -0
- hotglue_singer_sdk/helpers/capabilities.py +240 -0
- hotglue_singer_sdk/helpers/jsonpath.py +39 -0
- hotglue_singer_sdk/io_base.py +134 -0
- hotglue_singer_sdk/mapper.py +691 -0
- hotglue_singer_sdk/mapper_base.py +156 -0
- hotglue_singer_sdk/plugin_base.py +415 -0
- hotglue_singer_sdk/py.typed +0 -0
- hotglue_singer_sdk/sinks/__init__.py +14 -0
- hotglue_singer_sdk/sinks/batch.py +90 -0
- hotglue_singer_sdk/sinks/core.py +412 -0
- hotglue_singer_sdk/sinks/record.py +66 -0
- hotglue_singer_sdk/sinks/sql.py +299 -0
- hotglue_singer_sdk/streams/__init__.py +14 -0
- hotglue_singer_sdk/streams/core.py +1294 -0
- hotglue_singer_sdk/streams/graphql.py +74 -0
- hotglue_singer_sdk/streams/rest.py +611 -0
- hotglue_singer_sdk/streams/sql.py +1023 -0
- hotglue_singer_sdk/tap_base.py +580 -0
- hotglue_singer_sdk/target_base.py +554 -0
- hotglue_singer_sdk/target_sdk/__init__.py +0 -0
- hotglue_singer_sdk/target_sdk/auth.py +124 -0
- hotglue_singer_sdk/target_sdk/client.py +286 -0
- hotglue_singer_sdk/target_sdk/common.py +13 -0
- hotglue_singer_sdk/target_sdk/lambda.py +121 -0
- hotglue_singer_sdk/target_sdk/rest.py +108 -0
- hotglue_singer_sdk/target_sdk/sinks.py +16 -0
- hotglue_singer_sdk/target_sdk/target.py +570 -0
- hotglue_singer_sdk/target_sdk/target_base.py +627 -0
- hotglue_singer_sdk/testing.py +198 -0
- hotglue_singer_sdk/typing.py +603 -0
- hotglue_singer_sdk-1.0.2.dist-info/METADATA +53 -0
- hotglue_singer_sdk-1.0.2.dist-info/RECORD +53 -0
- hotglue_singer_sdk-1.0.2.dist-info/WHEEL +4 -0
- hotglue_singer_sdk-1.0.2.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,1023 @@
|
|
|
1
|
+
"""Base class for SQL-type streams."""
|
|
2
|
+
|
|
3
|
+
import abc
|
|
4
|
+
import logging
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from functools import lru_cache
|
|
7
|
+
from typing import Any, Dict, Iterable, List, Optional, Tuple, Type, Union, cast
|
|
8
|
+
|
|
9
|
+
import sqlalchemy
|
|
10
|
+
from sqlalchemy.engine import Engine
|
|
11
|
+
from sqlalchemy.engine.reflection import Inspector
|
|
12
|
+
|
|
13
|
+
from hotglue_singer_sdk import typing as th
|
|
14
|
+
from hotglue_singer_sdk.exceptions import ConfigValidationError
|
|
15
|
+
from hotglue_singer_sdk.helpers._schema import SchemaPlus
|
|
16
|
+
from hotglue_singer_sdk.helpers._singer import CatalogEntry, MetadataMapping
|
|
17
|
+
from hotglue_singer_sdk.plugin_base import PluginBase as TapBaseClass
|
|
18
|
+
from hotglue_singer_sdk.streams.core import Stream
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class SQLConnector:
|
|
22
|
+
"""Base class for SQLAlchemy-based connectors.
|
|
23
|
+
|
|
24
|
+
The connector class serves as a wrapper around the SQL connection.
|
|
25
|
+
|
|
26
|
+
The functions of the connector are:
|
|
27
|
+
|
|
28
|
+
- connecting to the source
|
|
29
|
+
- generating SQLAlchemy connection and engine objects
|
|
30
|
+
- discovering schema catalog entries
|
|
31
|
+
- performing type conversions to/from JSONSchema types
|
|
32
|
+
- dialect-specific functions, such as escaping and fully qualified names
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
allow_column_add: bool = True # Whether ADD COLUMN is supported.
|
|
36
|
+
allow_column_rename: bool = True # Whether RENAME COLUMN is supported.
|
|
37
|
+
allow_column_alter: bool = False # Whether altering column types is supported.
|
|
38
|
+
allow_merge_upsert: bool = False # Whether MERGE UPSERT is supported.
|
|
39
|
+
allow_temp_tables: bool = True # Whether temp tables are supported.
|
|
40
|
+
|
|
41
|
+
def __init__(
|
|
42
|
+
self, config: Optional[dict] = None, sqlalchemy_url: Optional[str] = None
|
|
43
|
+
) -> None:
|
|
44
|
+
"""Initialize the SQL connector.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
config: The parent tap or target object's config.
|
|
48
|
+
sqlalchemy_url: Optional URL for the connection.
|
|
49
|
+
"""
|
|
50
|
+
self._config: Dict[str, Any] = config or {}
|
|
51
|
+
self._sqlalchemy_url: Optional[str] = sqlalchemy_url or None
|
|
52
|
+
self._connection: Optional[sqlalchemy.engine.Connection] = None
|
|
53
|
+
|
|
54
|
+
@property
|
|
55
|
+
def config(self) -> dict:
|
|
56
|
+
"""If set, provides access to the tap or target config.
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
The settings as a dict.
|
|
60
|
+
"""
|
|
61
|
+
return self._config
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def logger(self) -> logging.Logger:
|
|
65
|
+
"""Get logger.
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
Plugin logger.
|
|
69
|
+
"""
|
|
70
|
+
return logging.getLogger("sqlconnector")
|
|
71
|
+
|
|
72
|
+
def create_sqlalchemy_connection(self) -> sqlalchemy.engine.Connection:
|
|
73
|
+
"""Return a new SQLAlchemy connection using the provided config.
|
|
74
|
+
|
|
75
|
+
By default this will create using the sqlalchemy `stream_results=True` option
|
|
76
|
+
described here:
|
|
77
|
+
https://docs.sqlalchemy.org/en/14/core/connections.html#using-server-side-cursors-a-k-a-stream-results
|
|
78
|
+
|
|
79
|
+
Developers may override this method if their provider does not support
|
|
80
|
+
server side cursors (`stream_results`) or in order to use different
|
|
81
|
+
configurations options when creating the connection object.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
A newly created SQLAlchemy engine object.
|
|
85
|
+
"""
|
|
86
|
+
return (
|
|
87
|
+
self.create_sqlalchemy_engine()
|
|
88
|
+
.connect()
|
|
89
|
+
.execution_options(stream_results=True)
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
def create_sqlalchemy_engine(self) -> sqlalchemy.engine.Engine:
|
|
93
|
+
"""Return a new SQLAlchemy engine using the provided config.
|
|
94
|
+
|
|
95
|
+
Developers can generally override just one of the following:
|
|
96
|
+
`sqlalchemy_engine`, sqlalchemy_url`.
|
|
97
|
+
|
|
98
|
+
Returns:
|
|
99
|
+
A newly created SQLAlchemy engine object.
|
|
100
|
+
"""
|
|
101
|
+
return sqlalchemy.create_engine(self.sqlalchemy_url, echo=False)
|
|
102
|
+
|
|
103
|
+
@property
|
|
104
|
+
def connection(self) -> sqlalchemy.engine.Connection:
|
|
105
|
+
"""Return or set the SQLAlchemy connection object.
|
|
106
|
+
|
|
107
|
+
Returns:
|
|
108
|
+
The active SQLAlchemy connection object.
|
|
109
|
+
"""
|
|
110
|
+
if not self._connection:
|
|
111
|
+
self._connection = self.create_sqlalchemy_connection()
|
|
112
|
+
|
|
113
|
+
return self._connection
|
|
114
|
+
|
|
115
|
+
@property
|
|
116
|
+
def sqlalchemy_url(self) -> str:
|
|
117
|
+
"""Return the SQLAlchemy URL string.
|
|
118
|
+
|
|
119
|
+
Returns:
|
|
120
|
+
The URL as a string.
|
|
121
|
+
"""
|
|
122
|
+
if not self._sqlalchemy_url:
|
|
123
|
+
self._sqlalchemy_url = self.get_sqlalchemy_url(self.config)
|
|
124
|
+
|
|
125
|
+
return self._sqlalchemy_url
|
|
126
|
+
|
|
127
|
+
def get_sqlalchemy_url(self, config: Dict[str, Any]) -> str:
|
|
128
|
+
"""Return the SQLAlchemy URL string.
|
|
129
|
+
|
|
130
|
+
Developers can generally override just one of the following:
|
|
131
|
+
`sqlalchemy_engine`, `get_sqlalchemy_url`.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
config: A dictionary of settings from the tap or target config.
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
The URL as a string.
|
|
138
|
+
|
|
139
|
+
Raises:
|
|
140
|
+
ConfigValidationError: If no valid sqlalchemy_url can be found.
|
|
141
|
+
"""
|
|
142
|
+
if "sqlalchemy_url" not in config:
|
|
143
|
+
raise ConfigValidationError(
|
|
144
|
+
"Could not find or create 'sqlalchemy_url' for connection."
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
return cast(str, config["sqlalchemy_url"])
|
|
148
|
+
|
|
149
|
+
@staticmethod
|
|
150
|
+
def to_jsonschema_type(
|
|
151
|
+
sql_type: Union[
|
|
152
|
+
str, sqlalchemy.types.TypeEngine, Type[sqlalchemy.types.TypeEngine], Any
|
|
153
|
+
]
|
|
154
|
+
) -> dict:
|
|
155
|
+
"""Return a JSON Schema representation of the provided type.
|
|
156
|
+
|
|
157
|
+
By default will call `typing.to_jsonschema_type()` for strings and SQLAlchemy
|
|
158
|
+
types.
|
|
159
|
+
|
|
160
|
+
Developers may override this method to accept additional input argument types,
|
|
161
|
+
to support non-standard types, or to provide custom typing logic.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
sql_type: The string representation of the SQL type, a SQLAlchemy
|
|
165
|
+
TypeEngine class or object, or a custom-specified object.
|
|
166
|
+
|
|
167
|
+
Raises:
|
|
168
|
+
ValueError: If the type received could not be translated to jsonschema.
|
|
169
|
+
|
|
170
|
+
Returns:
|
|
171
|
+
The JSON Schema representation of the provided type.
|
|
172
|
+
"""
|
|
173
|
+
if isinstance(sql_type, (str, sqlalchemy.types.TypeEngine)):
|
|
174
|
+
return th.to_jsonschema_type(sql_type)
|
|
175
|
+
|
|
176
|
+
if isinstance(sql_type, type):
|
|
177
|
+
if issubclass(sql_type, sqlalchemy.types.TypeEngine):
|
|
178
|
+
return th.to_jsonschema_type(sql_type)
|
|
179
|
+
|
|
180
|
+
raise ValueError(f"Unexpected type received: '{sql_type.__name__}'")
|
|
181
|
+
|
|
182
|
+
raise ValueError(f"Unexpected type received: '{type(sql_type).__name__}'")
|
|
183
|
+
|
|
184
|
+
@staticmethod
|
|
185
|
+
def to_sql_type(jsonschema_type: dict) -> sqlalchemy.types.TypeEngine:
|
|
186
|
+
"""Return a JSON Schema representation of the provided type.
|
|
187
|
+
|
|
188
|
+
By default will call `typing.to_sql_type()`.
|
|
189
|
+
|
|
190
|
+
Developers may override this method to accept additional input argument types,
|
|
191
|
+
to support non-standard types, or to provide custom typing logic.
|
|
192
|
+
|
|
193
|
+
If overriding this method, developers should call the default implementation
|
|
194
|
+
from the base class for all unhandled cases.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
jsonschema_type: The JSON Schema representation of the source type.
|
|
198
|
+
|
|
199
|
+
Returns:
|
|
200
|
+
The SQLAlchemy type representation of the data type.
|
|
201
|
+
"""
|
|
202
|
+
return th.to_sql_type(jsonschema_type)
|
|
203
|
+
|
|
204
|
+
@staticmethod
|
|
205
|
+
def get_fully_qualified_name(
|
|
206
|
+
table_name: str,
|
|
207
|
+
schema_name: Optional[str] = None,
|
|
208
|
+
db_name: Optional[str] = None,
|
|
209
|
+
delimiter: str = ".",
|
|
210
|
+
) -> str:
|
|
211
|
+
"""Concatenates a fully qualified name from the parts.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
table_name: The name of the table.
|
|
215
|
+
schema_name: The name of the schema. Defaults to None.
|
|
216
|
+
db_name: The name of the database. Defaults to None.
|
|
217
|
+
delimiter: Generally: '.' for SQL names and '-' for Singer names.
|
|
218
|
+
|
|
219
|
+
Raises:
|
|
220
|
+
ValueError: If table_name is not provided or if neither schema_name or
|
|
221
|
+
db_name are provided.
|
|
222
|
+
|
|
223
|
+
Returns:
|
|
224
|
+
The fully qualified name as a string.
|
|
225
|
+
"""
|
|
226
|
+
if db_name and schema_name:
|
|
227
|
+
result = delimiter.join([db_name, schema_name, table_name])
|
|
228
|
+
elif db_name:
|
|
229
|
+
result = delimiter.join([db_name, table_name])
|
|
230
|
+
elif schema_name:
|
|
231
|
+
result = delimiter.join([schema_name, table_name])
|
|
232
|
+
elif table_name:
|
|
233
|
+
result = table_name
|
|
234
|
+
else:
|
|
235
|
+
raise ValueError(
|
|
236
|
+
"Could not generate fully qualified name for stream: "
|
|
237
|
+
+ ":".join(
|
|
238
|
+
[
|
|
239
|
+
db_name or "(unknown-db)",
|
|
240
|
+
schema_name or "(unknown-schema)",
|
|
241
|
+
table_name or "(unknown-table-name)",
|
|
242
|
+
]
|
|
243
|
+
)
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
return result
|
|
247
|
+
|
|
248
|
+
@property
|
|
249
|
+
def _dialect(self) -> sqlalchemy.engine.Dialect:
|
|
250
|
+
"""Return the dialect object.
|
|
251
|
+
|
|
252
|
+
Returns:
|
|
253
|
+
The dialect object.
|
|
254
|
+
"""
|
|
255
|
+
return cast(sqlalchemy.engine.Dialect, self.connection.engine.dialect)
|
|
256
|
+
|
|
257
|
+
@property
|
|
258
|
+
def _engine(self) -> sqlalchemy.engine.Engine:
|
|
259
|
+
"""Return the dialect object.
|
|
260
|
+
|
|
261
|
+
Returns:
|
|
262
|
+
The dialect object.
|
|
263
|
+
"""
|
|
264
|
+
return cast(sqlalchemy.engine.Engine, self.connection.engine)
|
|
265
|
+
|
|
266
|
+
def quote(self, name: str) -> str:
|
|
267
|
+
"""Quote a name if it needs quoting, using '.' as a name-part delimiter.
|
|
268
|
+
|
|
269
|
+
Examples:
|
|
270
|
+
"my_table" => "`my_table`"
|
|
271
|
+
"my_schema.my_table" => "`my_schema`.`my_table`"
|
|
272
|
+
|
|
273
|
+
Args:
|
|
274
|
+
name: The unquoted name.
|
|
275
|
+
|
|
276
|
+
Returns:
|
|
277
|
+
str: The quoted name.
|
|
278
|
+
"""
|
|
279
|
+
return ".".join(
|
|
280
|
+
[
|
|
281
|
+
self._dialect.identifier_preparer.quote(name_part)
|
|
282
|
+
for name_part in name.split(".")
|
|
283
|
+
]
|
|
284
|
+
)
|
|
285
|
+
|
|
286
|
+
@lru_cache()
|
|
287
|
+
def _warn_no_view_detection(self) -> None:
|
|
288
|
+
"""Print a warning, but only the first time."""
|
|
289
|
+
self.logger.warning(
|
|
290
|
+
"Provider does not support get_view_names(). "
|
|
291
|
+
"Streams list may be incomplete or `is_view` may be unpopulated."
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
def get_schema_names(self, engine: Engine, inspected: Inspector) -> List[str]:
|
|
295
|
+
"""Return a list of schema names in DB.
|
|
296
|
+
|
|
297
|
+
Args:
|
|
298
|
+
engine: SQLAlchemy engine
|
|
299
|
+
inspected: SQLAlchemy inspector instance for engine
|
|
300
|
+
|
|
301
|
+
Returns:
|
|
302
|
+
List of schema names
|
|
303
|
+
"""
|
|
304
|
+
return inspected.get_schema_names()
|
|
305
|
+
|
|
306
|
+
def get_object_names(
|
|
307
|
+
self, engine: Engine, inspected: Inspector, schema_name: str
|
|
308
|
+
) -> List[Tuple[str, bool]]:
|
|
309
|
+
"""Return a list of syncable objects.
|
|
310
|
+
|
|
311
|
+
Args:
|
|
312
|
+
engine: SQLAlchemy engine
|
|
313
|
+
inspected: SQLAlchemy inspector instance for engine
|
|
314
|
+
schema_name: Schema name to inspect
|
|
315
|
+
|
|
316
|
+
Returns:
|
|
317
|
+
List of tuples (<table_or_view_name>, <is_view>)
|
|
318
|
+
"""
|
|
319
|
+
# Get list of tables and views
|
|
320
|
+
table_names = inspected.get_table_names(schema=schema_name)
|
|
321
|
+
try:
|
|
322
|
+
view_names = inspected.get_view_names(schema=schema_name)
|
|
323
|
+
except NotImplementedError:
|
|
324
|
+
# Some DB providers do not understand 'views'
|
|
325
|
+
self._warn_no_view_detection()
|
|
326
|
+
view_names = []
|
|
327
|
+
object_names = [(t, False) for t in table_names] + [
|
|
328
|
+
(v, True) for v in view_names
|
|
329
|
+
]
|
|
330
|
+
|
|
331
|
+
return object_names
|
|
332
|
+
|
|
333
|
+
# TODO maybe should be splitted into smaller parts?
|
|
334
|
+
def discover_catalog_entry(
|
|
335
|
+
self,
|
|
336
|
+
engine: Engine,
|
|
337
|
+
inspected: Inspector,
|
|
338
|
+
schema_name: str,
|
|
339
|
+
table_name: str,
|
|
340
|
+
is_view: bool,
|
|
341
|
+
) -> CatalogEntry:
|
|
342
|
+
"""Create `CatalogEntry` object for the given table or a view.
|
|
343
|
+
|
|
344
|
+
Args:
|
|
345
|
+
engine: SQLAlchemy engine
|
|
346
|
+
inspected: SQLAlchemy inspector instance for engine
|
|
347
|
+
schema_name: Schema name to inspect
|
|
348
|
+
table_name: Name of the table or a view
|
|
349
|
+
is_view: Flag whether this object is a view, returned by `get_object_names`
|
|
350
|
+
|
|
351
|
+
Returns:
|
|
352
|
+
`CatalogEntry` object for the given table or a view
|
|
353
|
+
"""
|
|
354
|
+
# Initialize unique stream name
|
|
355
|
+
unique_stream_id = self.get_fully_qualified_name(
|
|
356
|
+
db_name=None,
|
|
357
|
+
schema_name=schema_name,
|
|
358
|
+
table_name=table_name,
|
|
359
|
+
delimiter="-",
|
|
360
|
+
)
|
|
361
|
+
|
|
362
|
+
# Detect key properties
|
|
363
|
+
possible_primary_keys: List[List[str]] = []
|
|
364
|
+
pk_def = inspected.get_pk_constraint(table_name, schema=schema_name)
|
|
365
|
+
if pk_def and "constrained_columns" in pk_def:
|
|
366
|
+
possible_primary_keys.append(pk_def["constrained_columns"])
|
|
367
|
+
for index_def in inspected.get_indexes(table_name, schema=schema_name):
|
|
368
|
+
if index_def.get("unique", False):
|
|
369
|
+
possible_primary_keys.append(index_def["column_names"])
|
|
370
|
+
key_properties = next(iter(possible_primary_keys), None)
|
|
371
|
+
|
|
372
|
+
# Initialize columns list
|
|
373
|
+
table_schema = th.PropertiesList()
|
|
374
|
+
for column_def in inspected.get_columns(table_name, schema=schema_name):
|
|
375
|
+
column_name = column_def["name"]
|
|
376
|
+
is_nullable = column_def.get("nullable", False)
|
|
377
|
+
jsonschema_type: dict = self.to_jsonschema_type(
|
|
378
|
+
cast(sqlalchemy.types.TypeEngine, column_def["type"])
|
|
379
|
+
)
|
|
380
|
+
table_schema.append(
|
|
381
|
+
th.Property(
|
|
382
|
+
name=column_name,
|
|
383
|
+
wrapped=th.CustomType(jsonschema_type),
|
|
384
|
+
required=not is_nullable,
|
|
385
|
+
)
|
|
386
|
+
)
|
|
387
|
+
schema = table_schema.to_dict()
|
|
388
|
+
|
|
389
|
+
# Initialize available replication methods
|
|
390
|
+
addl_replication_methods: List[str] = [""] # By default an empty list.
|
|
391
|
+
# Notes regarding replication methods:
|
|
392
|
+
# - 'INCREMENTAL' replication must be enabled by the user by specifying
|
|
393
|
+
# a replication_key value.
|
|
394
|
+
# - 'LOG_BASED' replication must be enabled by the developer, according
|
|
395
|
+
# to source-specific implementation capabilities.
|
|
396
|
+
replication_method = next(reversed(["FULL_TABLE"] + addl_replication_methods))
|
|
397
|
+
|
|
398
|
+
# Create the catalog entry object
|
|
399
|
+
catalog_entry = CatalogEntry(
|
|
400
|
+
tap_stream_id=unique_stream_id,
|
|
401
|
+
stream=unique_stream_id,
|
|
402
|
+
table=table_name,
|
|
403
|
+
key_properties=key_properties,
|
|
404
|
+
schema=SchemaPlus.from_dict(schema),
|
|
405
|
+
is_view=is_view,
|
|
406
|
+
replication_method=replication_method,
|
|
407
|
+
metadata=MetadataMapping.get_standard_metadata(
|
|
408
|
+
schema_name=schema_name,
|
|
409
|
+
schema=schema,
|
|
410
|
+
replication_method=replication_method,
|
|
411
|
+
key_properties=key_properties,
|
|
412
|
+
valid_replication_keys=None, # Must be defined by user
|
|
413
|
+
),
|
|
414
|
+
database=None, # Expects single-database context
|
|
415
|
+
row_count=None,
|
|
416
|
+
stream_alias=None,
|
|
417
|
+
replication_key=None, # Must be defined by user
|
|
418
|
+
)
|
|
419
|
+
|
|
420
|
+
return catalog_entry
|
|
421
|
+
|
|
422
|
+
def discover_catalog_entries(self) -> List[dict]:
|
|
423
|
+
"""Return a list of catalog entries from discovery.
|
|
424
|
+
|
|
425
|
+
Returns:
|
|
426
|
+
The discovered catalog entries as a list.
|
|
427
|
+
"""
|
|
428
|
+
result: List[dict] = []
|
|
429
|
+
engine = self.create_sqlalchemy_engine()
|
|
430
|
+
inspected = sqlalchemy.inspect(engine)
|
|
431
|
+
for schema_name in self.get_schema_names(engine, inspected):
|
|
432
|
+
# Iterate through each table and view
|
|
433
|
+
for table_name, is_view in self.get_object_names(
|
|
434
|
+
engine, inspected, schema_name
|
|
435
|
+
):
|
|
436
|
+
catalog_entry = self.discover_catalog_entry(
|
|
437
|
+
engine, inspected, schema_name, table_name, is_view
|
|
438
|
+
)
|
|
439
|
+
result.append(catalog_entry.to_dict())
|
|
440
|
+
|
|
441
|
+
return result
|
|
442
|
+
|
|
443
|
+
def parse_full_table_name(
|
|
444
|
+
self, full_table_name: str
|
|
445
|
+
) -> Tuple[Optional[str], Optional[str], str]:
|
|
446
|
+
"""Parse a fully qualified table name into its parts.
|
|
447
|
+
|
|
448
|
+
Developers may override this method if their platform does not support the
|
|
449
|
+
traditional 3-part convention: `db_name.schema_name.table_name`
|
|
450
|
+
|
|
451
|
+
Args:
|
|
452
|
+
full_table_name: A table name or a fully qualified table name. Depending on
|
|
453
|
+
SQL the platform, this could take the following forms:
|
|
454
|
+
- `<db>.<schema>.<table>` (three part names)
|
|
455
|
+
- `<db>.<table>` (platforms which do not use schema groupings)
|
|
456
|
+
- `<schema>.<name>` (if DB name is already in context)
|
|
457
|
+
- `<table>` (if DB name and schema name are already in context)
|
|
458
|
+
|
|
459
|
+
Returns:
|
|
460
|
+
A three part tuple (db_name, schema_name, table_name) with any unspecified
|
|
461
|
+
or unused parts returned as None.
|
|
462
|
+
"""
|
|
463
|
+
db_name: Optional[str] = None
|
|
464
|
+
schema_name: Optional[str] = None
|
|
465
|
+
|
|
466
|
+
parts = full_table_name.split(".")
|
|
467
|
+
if len(parts) == 1:
|
|
468
|
+
table_name = full_table_name
|
|
469
|
+
if len(parts) == 2:
|
|
470
|
+
schema_name, table_name = parts
|
|
471
|
+
if len(parts) == 3:
|
|
472
|
+
db_name, schema_name, table_name = parts
|
|
473
|
+
|
|
474
|
+
return db_name, schema_name, table_name
|
|
475
|
+
|
|
476
|
+
def table_exists(self, full_table_name: str) -> bool:
|
|
477
|
+
"""Determine if the target table already exists.
|
|
478
|
+
|
|
479
|
+
Args:
|
|
480
|
+
full_table_name: the target table name.
|
|
481
|
+
|
|
482
|
+
Returns:
|
|
483
|
+
True if table exists, False if not, None if unsure or undetectable.
|
|
484
|
+
"""
|
|
485
|
+
return cast(
|
|
486
|
+
bool,
|
|
487
|
+
sqlalchemy.inspect(self._engine).has_table(full_table_name),
|
|
488
|
+
)
|
|
489
|
+
|
|
490
|
+
def get_table_columns(self, full_table_name: str) -> Dict[str, sqlalchemy.Column]:
|
|
491
|
+
"""Return a list of table columns.
|
|
492
|
+
|
|
493
|
+
Args:
|
|
494
|
+
full_table_name: Fully qualified table name.
|
|
495
|
+
|
|
496
|
+
Returns:
|
|
497
|
+
An ordered list of column objects.
|
|
498
|
+
"""
|
|
499
|
+
_, schema_name, table_name = self.parse_full_table_name(full_table_name)
|
|
500
|
+
inspector = sqlalchemy.inspect(self._engine)
|
|
501
|
+
columns = inspector.get_columns(table_name, schema_name)
|
|
502
|
+
|
|
503
|
+
result: Dict[str, sqlalchemy.Column] = {}
|
|
504
|
+
for col_meta in columns:
|
|
505
|
+
result[col_meta["name"]] = sqlalchemy.Column(
|
|
506
|
+
col_meta["name"],
|
|
507
|
+
col_meta["type"],
|
|
508
|
+
nullable=col_meta.get("nullable", False),
|
|
509
|
+
)
|
|
510
|
+
|
|
511
|
+
return result
|
|
512
|
+
|
|
513
|
+
def get_table(self, full_table_name: str) -> sqlalchemy.Table:
|
|
514
|
+
"""Return a table object.
|
|
515
|
+
|
|
516
|
+
Args:
|
|
517
|
+
full_table_name: Fully qualified table name.
|
|
518
|
+
|
|
519
|
+
Returns:
|
|
520
|
+
A table object with column list.
|
|
521
|
+
"""
|
|
522
|
+
columns = self.get_table_columns(full_table_name).values()
|
|
523
|
+
_, schema_name, table_name = self.parse_full_table_name(full_table_name)
|
|
524
|
+
meta = sqlalchemy.MetaData()
|
|
525
|
+
return sqlalchemy.schema.Table(
|
|
526
|
+
table_name, meta, *list(columns), schema=schema_name
|
|
527
|
+
)
|
|
528
|
+
|
|
529
|
+
def column_exists(self, full_table_name: str, column_name: str) -> bool:
|
|
530
|
+
"""Determine if the target table already exists.
|
|
531
|
+
|
|
532
|
+
Args:
|
|
533
|
+
full_table_name: the target table name.
|
|
534
|
+
column_name: the target column name.
|
|
535
|
+
|
|
536
|
+
Returns:
|
|
537
|
+
True if table exists, False if not.
|
|
538
|
+
"""
|
|
539
|
+
return column_name in self.get_table_columns(full_table_name)
|
|
540
|
+
|
|
541
|
+
def create_empty_table(
|
|
542
|
+
self,
|
|
543
|
+
full_table_name: str,
|
|
544
|
+
schema: dict,
|
|
545
|
+
primary_keys: Optional[List[str]] = None,
|
|
546
|
+
partition_keys: Optional[List[str]] = None,
|
|
547
|
+
as_temp_table: bool = False,
|
|
548
|
+
) -> None:
|
|
549
|
+
"""Create an empty target table.
|
|
550
|
+
|
|
551
|
+
Args:
|
|
552
|
+
full_table_name: the target table name.
|
|
553
|
+
schema: the JSON schema for the new table.
|
|
554
|
+
primary_keys: list of key properties.
|
|
555
|
+
partition_keys: list of partition keys.
|
|
556
|
+
as_temp_table: True to create a temp table.
|
|
557
|
+
|
|
558
|
+
Raises:
|
|
559
|
+
NotImplementedError: if temp tables are unsupported and as_temp_table=True.
|
|
560
|
+
RuntimeError: if a variant schema is passed with no properties defined.
|
|
561
|
+
"""
|
|
562
|
+
if as_temp_table:
|
|
563
|
+
raise NotImplementedError("Temporary tables are not supported.")
|
|
564
|
+
|
|
565
|
+
_ = partition_keys # Not supported in generic implementation.
|
|
566
|
+
|
|
567
|
+
meta = sqlalchemy.MetaData()
|
|
568
|
+
columns: List[sqlalchemy.Column] = []
|
|
569
|
+
primary_keys = primary_keys or []
|
|
570
|
+
try:
|
|
571
|
+
properties: dict = schema["properties"]
|
|
572
|
+
except KeyError:
|
|
573
|
+
raise RuntimeError(
|
|
574
|
+
f"Schema for '{full_table_name}' does not define properties: {schema}"
|
|
575
|
+
)
|
|
576
|
+
for property_name, property_jsonschema in properties.items():
|
|
577
|
+
is_primary_key = property_name in primary_keys
|
|
578
|
+
columns.append(
|
|
579
|
+
sqlalchemy.Column(
|
|
580
|
+
property_name,
|
|
581
|
+
self.to_sql_type(property_jsonschema),
|
|
582
|
+
primary_key=is_primary_key,
|
|
583
|
+
)
|
|
584
|
+
)
|
|
585
|
+
|
|
586
|
+
_ = sqlalchemy.Table(full_table_name, meta, *columns)
|
|
587
|
+
meta.create_all(self._engine)
|
|
588
|
+
|
|
589
|
+
def _create_empty_column(
|
|
590
|
+
self,
|
|
591
|
+
full_table_name: str,
|
|
592
|
+
column_name: str,
|
|
593
|
+
sql_type: sqlalchemy.types.TypeEngine,
|
|
594
|
+
) -> None:
|
|
595
|
+
"""Create a new column.
|
|
596
|
+
|
|
597
|
+
Args:
|
|
598
|
+
full_table_name: The target table name.
|
|
599
|
+
column_name: The name of the new column.
|
|
600
|
+
sql_type: SQLAlchemy type engine to be used in creating the new column.
|
|
601
|
+
|
|
602
|
+
Raises:
|
|
603
|
+
NotImplementedError: if adding columns is not supported.
|
|
604
|
+
"""
|
|
605
|
+
if not self.allow_column_add:
|
|
606
|
+
raise NotImplementedError("Adding columns is not supported.")
|
|
607
|
+
|
|
608
|
+
create_column_clause = sqlalchemy.schema.CreateColumn(
|
|
609
|
+
sqlalchemy.Column(
|
|
610
|
+
column_name,
|
|
611
|
+
sql_type,
|
|
612
|
+
)
|
|
613
|
+
)
|
|
614
|
+
self.connection.execute(
|
|
615
|
+
sqlalchemy.DDL(
|
|
616
|
+
"ALTER TABLE %(table)s ADD COLUMN %(create_column)s",
|
|
617
|
+
{
|
|
618
|
+
"table": full_table_name,
|
|
619
|
+
"create_column": create_column_clause,
|
|
620
|
+
},
|
|
621
|
+
)
|
|
622
|
+
)
|
|
623
|
+
|
|
624
|
+
def prepare_table(
|
|
625
|
+
self,
|
|
626
|
+
full_table_name: str,
|
|
627
|
+
schema: dict,
|
|
628
|
+
primary_keys: List[str],
|
|
629
|
+
partition_keys: Optional[List[str]] = None,
|
|
630
|
+
as_temp_table: bool = False,
|
|
631
|
+
) -> None:
|
|
632
|
+
"""Adapt target table to provided schema if possible.
|
|
633
|
+
|
|
634
|
+
Args:
|
|
635
|
+
full_table_name: the target table name.
|
|
636
|
+
schema: the JSON Schema for the table.
|
|
637
|
+
primary_keys: list of key properties.
|
|
638
|
+
partition_keys: list of partition keys.
|
|
639
|
+
as_temp_table: True to create a temp table.
|
|
640
|
+
"""
|
|
641
|
+
if not self.table_exists(full_table_name=full_table_name):
|
|
642
|
+
self.create_empty_table(
|
|
643
|
+
full_table_name=full_table_name,
|
|
644
|
+
schema=schema,
|
|
645
|
+
primary_keys=primary_keys,
|
|
646
|
+
partition_keys=partition_keys,
|
|
647
|
+
as_temp_table=as_temp_table,
|
|
648
|
+
)
|
|
649
|
+
return
|
|
650
|
+
|
|
651
|
+
for property_name, property_def in schema["properties"].items():
|
|
652
|
+
self.prepare_column(
|
|
653
|
+
full_table_name, property_name, self.to_sql_type(property_def)
|
|
654
|
+
)
|
|
655
|
+
|
|
656
|
+
def prepare_column(
|
|
657
|
+
self,
|
|
658
|
+
full_table_name: str,
|
|
659
|
+
column_name: str,
|
|
660
|
+
sql_type: sqlalchemy.types.TypeEngine,
|
|
661
|
+
) -> None:
|
|
662
|
+
"""Adapt target table to provided schema if possible.
|
|
663
|
+
|
|
664
|
+
Args:
|
|
665
|
+
full_table_name: the target table name.
|
|
666
|
+
column_name: the target column name.
|
|
667
|
+
sql_type: the SQLAlchemy type.
|
|
668
|
+
"""
|
|
669
|
+
if not self.column_exists(full_table_name, column_name):
|
|
670
|
+
self._create_empty_column(
|
|
671
|
+
full_table_name=full_table_name,
|
|
672
|
+
column_name=column_name,
|
|
673
|
+
sql_type=sql_type,
|
|
674
|
+
)
|
|
675
|
+
return
|
|
676
|
+
|
|
677
|
+
self._adapt_column_type(
|
|
678
|
+
full_table_name,
|
|
679
|
+
column_name=column_name,
|
|
680
|
+
sql_type=sql_type,
|
|
681
|
+
)
|
|
682
|
+
|
|
683
|
+
def rename_column(self, full_table_name: str, old_name: str, new_name: str) -> None:
|
|
684
|
+
"""Rename the provided columns.
|
|
685
|
+
|
|
686
|
+
Args:
|
|
687
|
+
full_table_name: The fully qualified table name.
|
|
688
|
+
old_name: The old column to be renamed.
|
|
689
|
+
new_name: The new name for the column.
|
|
690
|
+
|
|
691
|
+
Raises:
|
|
692
|
+
NotImplementedError: If `self.allow_column_rename` is false.
|
|
693
|
+
"""
|
|
694
|
+
if not self.allow_column_rename:
|
|
695
|
+
raise NotImplementedError("Renaming columns is not supported.")
|
|
696
|
+
|
|
697
|
+
self.connection.execute(
|
|
698
|
+
f"ALTER TABLE {full_table_name} "
|
|
699
|
+
f'RENAME COLUMN "{old_name}" to "{new_name}"'
|
|
700
|
+
)
|
|
701
|
+
|
|
702
|
+
def merge_sql_types(
|
|
703
|
+
self, sql_types: List[sqlalchemy.types.TypeEngine]
|
|
704
|
+
) -> sqlalchemy.types.TypeEngine:
|
|
705
|
+
"""Return a compatible SQL type for the selected type list.
|
|
706
|
+
|
|
707
|
+
Args:
|
|
708
|
+
sql_types: List of SQL types.
|
|
709
|
+
|
|
710
|
+
Returns:
|
|
711
|
+
A SQL type that is compatible with the input types.
|
|
712
|
+
|
|
713
|
+
Raises:
|
|
714
|
+
ValueError: If sql_types argument has zero members.
|
|
715
|
+
"""
|
|
716
|
+
if not sql_types:
|
|
717
|
+
raise ValueError("Expected at least one member in `sql_types` argument.")
|
|
718
|
+
|
|
719
|
+
if len(sql_types) == 1:
|
|
720
|
+
return sql_types[0]
|
|
721
|
+
|
|
722
|
+
sql_types = self._sort_types(sql_types)
|
|
723
|
+
|
|
724
|
+
if len(sql_types) > 2:
|
|
725
|
+
return self.merge_sql_types(
|
|
726
|
+
[self.merge_sql_types([sql_types[0], sql_types[1]])] + sql_types[2:]
|
|
727
|
+
)
|
|
728
|
+
|
|
729
|
+
assert len(sql_types) == 2
|
|
730
|
+
generic_type = type(sql_types[0].as_generic())
|
|
731
|
+
if isinstance(generic_type, type):
|
|
732
|
+
if issubclass(
|
|
733
|
+
generic_type,
|
|
734
|
+
(sqlalchemy.types.String, sqlalchemy.types.Unicode),
|
|
735
|
+
):
|
|
736
|
+
return sql_types[0]
|
|
737
|
+
|
|
738
|
+
elif isinstance(
|
|
739
|
+
generic_type,
|
|
740
|
+
(sqlalchemy.types.String, sqlalchemy.types.Unicode),
|
|
741
|
+
):
|
|
742
|
+
return sql_types[0]
|
|
743
|
+
|
|
744
|
+
raise ValueError(
|
|
745
|
+
f"Unable to merge sql types: {', '.join([str(t) for t in sql_types])}"
|
|
746
|
+
)
|
|
747
|
+
|
|
748
|
+
def _sort_types(
|
|
749
|
+
self,
|
|
750
|
+
sql_types: Iterable[sqlalchemy.types.TypeEngine],
|
|
751
|
+
) -> List[sqlalchemy.types.TypeEngine]:
|
|
752
|
+
"""Return the input types sorted from most to least compatible.
|
|
753
|
+
|
|
754
|
+
For example, [Smallint, Integer, Datetime, String, Double] would become
|
|
755
|
+
[Unicode, String, Double, Integer, Smallint, Datetime].
|
|
756
|
+
String types will be listed first, then decimal types, then integer types,
|
|
757
|
+
then bool types, and finally datetime and date. Higher precision, scale, and
|
|
758
|
+
length will be sorted earlier.
|
|
759
|
+
|
|
760
|
+
Args:
|
|
761
|
+
sql_types (List[sqlalchemy.types.TypeEngine]): [description]
|
|
762
|
+
|
|
763
|
+
Returns:
|
|
764
|
+
The sorted list.
|
|
765
|
+
"""
|
|
766
|
+
|
|
767
|
+
def _get_type_sort_key(
|
|
768
|
+
sql_type: sqlalchemy.types.TypeEngine,
|
|
769
|
+
) -> Tuple[int, int]:
|
|
770
|
+
# return rank, with higher numbers ranking first
|
|
771
|
+
|
|
772
|
+
_len = int(getattr(sql_type, "length", 0) or 0)
|
|
773
|
+
|
|
774
|
+
_pytype = cast(type, sql_type.python_type)
|
|
775
|
+
if issubclass(_pytype, (str, bytes)):
|
|
776
|
+
return 900, _len
|
|
777
|
+
elif issubclass(_pytype, datetime):
|
|
778
|
+
return 600, _len
|
|
779
|
+
elif issubclass(_pytype, float):
|
|
780
|
+
return 400, _len
|
|
781
|
+
elif issubclass(_pytype, int):
|
|
782
|
+
return 300, _len
|
|
783
|
+
|
|
784
|
+
return 0, _len
|
|
785
|
+
|
|
786
|
+
return sorted(sql_types, key=_get_type_sort_key, reverse=True)
|
|
787
|
+
|
|
788
|
+
def _get_column_type(
|
|
789
|
+
self, full_table_name: str, column_name: str
|
|
790
|
+
) -> sqlalchemy.types.TypeEngine:
|
|
791
|
+
"""Gets the SQL type of the declared column.
|
|
792
|
+
|
|
793
|
+
Args:
|
|
794
|
+
full_table_name: The name of the table.
|
|
795
|
+
column_name: The name of the column.
|
|
796
|
+
|
|
797
|
+
Returns:
|
|
798
|
+
The type of the column.
|
|
799
|
+
|
|
800
|
+
Raises:
|
|
801
|
+
KeyError: If the provided column name does not exist.
|
|
802
|
+
"""
|
|
803
|
+
try:
|
|
804
|
+
column = self.get_table_columns(full_table_name)[column_name]
|
|
805
|
+
except KeyError as ex:
|
|
806
|
+
raise KeyError(
|
|
807
|
+
f"Column `{column_name}` does not exist in table `{full_table_name}`."
|
|
808
|
+
) from ex
|
|
809
|
+
|
|
810
|
+
return cast(sqlalchemy.types.TypeEngine, column.type)
|
|
811
|
+
|
|
812
|
+
def _adapt_column_type(
|
|
813
|
+
self,
|
|
814
|
+
full_table_name: str,
|
|
815
|
+
column_name: str,
|
|
816
|
+
sql_type: sqlalchemy.types.TypeEngine,
|
|
817
|
+
) -> None:
|
|
818
|
+
"""Adapt table column type to support the new JSON schema type.
|
|
819
|
+
|
|
820
|
+
Args:
|
|
821
|
+
full_table_name: The target table name.
|
|
822
|
+
column_name: The target column name.
|
|
823
|
+
sql_type: The new SQLAlchemy type.
|
|
824
|
+
|
|
825
|
+
Raises:
|
|
826
|
+
NotImplementedError: if altering columns is not supported.
|
|
827
|
+
"""
|
|
828
|
+
current_type = self._get_column_type(full_table_name, column_name)
|
|
829
|
+
compatible_sql_type = self.merge_sql_types([current_type, sql_type])
|
|
830
|
+
if current_type == compatible_sql_type:
|
|
831
|
+
# Nothing to do
|
|
832
|
+
return
|
|
833
|
+
|
|
834
|
+
if not self.allow_column_alter:
|
|
835
|
+
raise NotImplementedError(
|
|
836
|
+
"Altering columns is not supported. "
|
|
837
|
+
f"Could not convert column '{full_table_name}.column_name' "
|
|
838
|
+
f"from '{current_type}' to '{compatible_sql_type}'."
|
|
839
|
+
)
|
|
840
|
+
|
|
841
|
+
self.connection.execute(
|
|
842
|
+
sqlalchemy.DDL(
|
|
843
|
+
"ALTER TABLE %(table)s ALTER COLUMN %(col_name)s (%(col_type)s)",
|
|
844
|
+
{
|
|
845
|
+
"table": full_table_name,
|
|
846
|
+
"col_name": column_name,
|
|
847
|
+
"col_type": compatible_sql_type,
|
|
848
|
+
},
|
|
849
|
+
)
|
|
850
|
+
)
|
|
851
|
+
|
|
852
|
+
|
|
853
|
+
class SQLStream(Stream, metaclass=abc.ABCMeta):
|
|
854
|
+
"""Base class for SQLAlchemy-based streams."""
|
|
855
|
+
|
|
856
|
+
connector_class = SQLConnector
|
|
857
|
+
|
|
858
|
+
def __init__(
|
|
859
|
+
self,
|
|
860
|
+
tap: TapBaseClass,
|
|
861
|
+
catalog_entry: dict,
|
|
862
|
+
connector: Optional[SQLConnector] = None,
|
|
863
|
+
) -> None:
|
|
864
|
+
"""Initialize the database stream.
|
|
865
|
+
|
|
866
|
+
If `connector` is omitted, a new connector will be created.
|
|
867
|
+
|
|
868
|
+
Args:
|
|
869
|
+
tap: The parent tap object.
|
|
870
|
+
catalog_entry: Catalog entry dict.
|
|
871
|
+
connector: Optional connector to reuse.
|
|
872
|
+
"""
|
|
873
|
+
self._connector: SQLConnector
|
|
874
|
+
if connector:
|
|
875
|
+
self._connector = connector
|
|
876
|
+
else:
|
|
877
|
+
self._connector = self.connector_class(dict(tap.config))
|
|
878
|
+
|
|
879
|
+
self.catalog_entry = catalog_entry
|
|
880
|
+
super().__init__(
|
|
881
|
+
tap=tap,
|
|
882
|
+
schema=self.schema,
|
|
883
|
+
name=self.tap_stream_id,
|
|
884
|
+
)
|
|
885
|
+
|
|
886
|
+
@property
|
|
887
|
+
def _singer_catalog_entry(self) -> CatalogEntry:
|
|
888
|
+
"""Return catalog entry as specified by the Singer catalog spec.
|
|
889
|
+
|
|
890
|
+
Returns:
|
|
891
|
+
A CatalogEntry object.
|
|
892
|
+
"""
|
|
893
|
+
return cast(CatalogEntry, CatalogEntry.from_dict(self.catalog_entry))
|
|
894
|
+
|
|
895
|
+
@property
|
|
896
|
+
def connector(self) -> SQLConnector:
|
|
897
|
+
"""The connector object.
|
|
898
|
+
|
|
899
|
+
Returns:
|
|
900
|
+
The connector object.
|
|
901
|
+
"""
|
|
902
|
+
return self._connector
|
|
903
|
+
|
|
904
|
+
@property
|
|
905
|
+
def metadata(self) -> MetadataMapping:
|
|
906
|
+
"""The Singer metadata.
|
|
907
|
+
|
|
908
|
+
Metadata from an input catalog will override standard metadata.
|
|
909
|
+
|
|
910
|
+
Returns:
|
|
911
|
+
Metadata object as specified in the Singer spec.
|
|
912
|
+
"""
|
|
913
|
+
return self._singer_catalog_entry.metadata
|
|
914
|
+
|
|
915
|
+
@property
|
|
916
|
+
def schema(self) -> dict:
|
|
917
|
+
"""Return metadata object (dict) as specified in the Singer spec.
|
|
918
|
+
|
|
919
|
+
Metadata from an input catalog will override standard metadata.
|
|
920
|
+
|
|
921
|
+
Returns:
|
|
922
|
+
The schema object.
|
|
923
|
+
"""
|
|
924
|
+
return cast(dict, self._singer_catalog_entry.schema.to_dict())
|
|
925
|
+
|
|
926
|
+
@property
|
|
927
|
+
def tap_stream_id(self) -> str:
|
|
928
|
+
"""Return the unique ID used by the tap to identify this stream.
|
|
929
|
+
|
|
930
|
+
Generally, this is the same value as in `Stream.name`.
|
|
931
|
+
|
|
932
|
+
In rare cases, such as for database types with multi-part names,
|
|
933
|
+
this may be slightly different from `Stream.name`.
|
|
934
|
+
|
|
935
|
+
Returns:
|
|
936
|
+
The unique tap stream ID as a string.
|
|
937
|
+
"""
|
|
938
|
+
return self._singer_catalog_entry.tap_stream_id
|
|
939
|
+
|
|
940
|
+
@property
|
|
941
|
+
def primary_keys(self) -> Optional[List[str]]:
|
|
942
|
+
"""Get primary keys from the catalog entry definition.
|
|
943
|
+
|
|
944
|
+
Returns:
|
|
945
|
+
A list of primary key(s) for the stream.
|
|
946
|
+
"""
|
|
947
|
+
return self._singer_catalog_entry.metadata.root.table_key_properties or []
|
|
948
|
+
|
|
949
|
+
@primary_keys.setter
|
|
950
|
+
def primary_keys(self, new_value: List[str]) -> None:
|
|
951
|
+
"""Set or reset the primary key(s) in the stream's catalog entry.
|
|
952
|
+
|
|
953
|
+
Args:
|
|
954
|
+
new_value: a list of one or more column names
|
|
955
|
+
"""
|
|
956
|
+
self._singer_catalog_entry.metadata.root.table_key_properties = new_value
|
|
957
|
+
|
|
958
|
+
@property
|
|
959
|
+
def fully_qualified_name(self) -> str:
|
|
960
|
+
"""Generate the fully qualified version of the table name.
|
|
961
|
+
|
|
962
|
+
Raises:
|
|
963
|
+
ValueError: If table_name is not able to be detected.
|
|
964
|
+
|
|
965
|
+
Returns:
|
|
966
|
+
The fully qualified name.
|
|
967
|
+
"""
|
|
968
|
+
catalog_entry = self._singer_catalog_entry
|
|
969
|
+
if not catalog_entry.table:
|
|
970
|
+
raise ValueError(
|
|
971
|
+
f"Missing table name in catalog entry: {catalog_entry.to_dict()}"
|
|
972
|
+
)
|
|
973
|
+
|
|
974
|
+
return self.connector.get_fully_qualified_name(
|
|
975
|
+
table_name=catalog_entry.table,
|
|
976
|
+
schema_name=catalog_entry.metadata.root.schema_name,
|
|
977
|
+
db_name=catalog_entry.database,
|
|
978
|
+
)
|
|
979
|
+
|
|
980
|
+
# Get records from stream
|
|
981
|
+
|
|
982
|
+
def get_records(self, context: Optional[dict]) -> Iterable[Dict[str, Any]]:
|
|
983
|
+
"""Return a generator of row-type dictionary objects.
|
|
984
|
+
|
|
985
|
+
If the stream has a replication_key value defined, records will be sorted by the
|
|
986
|
+
incremental key. If the stream also has an available starting bookmark, the
|
|
987
|
+
records will be filtered for values greater than or equal to the bookmark value.
|
|
988
|
+
|
|
989
|
+
Args:
|
|
990
|
+
context: If partition context is provided, will read specifically from this
|
|
991
|
+
data slice.
|
|
992
|
+
|
|
993
|
+
Yields:
|
|
994
|
+
One dict per record.
|
|
995
|
+
|
|
996
|
+
Raises:
|
|
997
|
+
NotImplementedError: If partition is passed in context and the stream does
|
|
998
|
+
not support partitioning.
|
|
999
|
+
"""
|
|
1000
|
+
if context:
|
|
1001
|
+
raise NotImplementedError(
|
|
1002
|
+
f"Stream '{self.name}' does not support partitioning."
|
|
1003
|
+
)
|
|
1004
|
+
|
|
1005
|
+
table = self.connector.get_table(self.fully_qualified_name)
|
|
1006
|
+
query = table.select()
|
|
1007
|
+
if self.replication_key:
|
|
1008
|
+
replication_key_col = table.columns[self.replication_key]
|
|
1009
|
+
query = query.order_by(replication_key_col)
|
|
1010
|
+
|
|
1011
|
+
start_val = self.get_starting_replication_key_value(context)
|
|
1012
|
+
if start_val:
|
|
1013
|
+
query = query.where(
|
|
1014
|
+
sqlalchemy.text(":replication_key >= :start_val").bindparams(
|
|
1015
|
+
replication_key=replication_key_col, start_val=start_val
|
|
1016
|
+
)
|
|
1017
|
+
)
|
|
1018
|
+
|
|
1019
|
+
for row in self.connector.connection.execute(query):
|
|
1020
|
+
yield dict(row)
|
|
1021
|
+
|
|
1022
|
+
|
|
1023
|
+
__all__ = ["SQLStream", "SQLConnector"]
|