hotglue-singer-sdk 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. hotglue_singer_sdk/__init__.py +34 -0
  2. hotglue_singer_sdk/authenticators.py +554 -0
  3. hotglue_singer_sdk/cli/__init__.py +1 -0
  4. hotglue_singer_sdk/cli/common_options.py +37 -0
  5. hotglue_singer_sdk/configuration/__init__.py +1 -0
  6. hotglue_singer_sdk/configuration/_dict_config.py +101 -0
  7. hotglue_singer_sdk/exceptions.py +52 -0
  8. hotglue_singer_sdk/helpers/__init__.py +1 -0
  9. hotglue_singer_sdk/helpers/_catalog.py +122 -0
  10. hotglue_singer_sdk/helpers/_classproperty.py +18 -0
  11. hotglue_singer_sdk/helpers/_compat.py +15 -0
  12. hotglue_singer_sdk/helpers/_flattening.py +374 -0
  13. hotglue_singer_sdk/helpers/_schema.py +100 -0
  14. hotglue_singer_sdk/helpers/_secrets.py +41 -0
  15. hotglue_singer_sdk/helpers/_simpleeval.py +678 -0
  16. hotglue_singer_sdk/helpers/_singer.py +280 -0
  17. hotglue_singer_sdk/helpers/_state.py +282 -0
  18. hotglue_singer_sdk/helpers/_typing.py +231 -0
  19. hotglue_singer_sdk/helpers/_util.py +27 -0
  20. hotglue_singer_sdk/helpers/capabilities.py +240 -0
  21. hotglue_singer_sdk/helpers/jsonpath.py +39 -0
  22. hotglue_singer_sdk/io_base.py +134 -0
  23. hotglue_singer_sdk/mapper.py +691 -0
  24. hotglue_singer_sdk/mapper_base.py +156 -0
  25. hotglue_singer_sdk/plugin_base.py +415 -0
  26. hotglue_singer_sdk/py.typed +0 -0
  27. hotglue_singer_sdk/sinks/__init__.py +14 -0
  28. hotglue_singer_sdk/sinks/batch.py +90 -0
  29. hotglue_singer_sdk/sinks/core.py +412 -0
  30. hotglue_singer_sdk/sinks/record.py +66 -0
  31. hotglue_singer_sdk/sinks/sql.py +299 -0
  32. hotglue_singer_sdk/streams/__init__.py +14 -0
  33. hotglue_singer_sdk/streams/core.py +1294 -0
  34. hotglue_singer_sdk/streams/graphql.py +74 -0
  35. hotglue_singer_sdk/streams/rest.py +611 -0
  36. hotglue_singer_sdk/streams/sql.py +1023 -0
  37. hotglue_singer_sdk/tap_base.py +580 -0
  38. hotglue_singer_sdk/target_base.py +554 -0
  39. hotglue_singer_sdk/target_sdk/__init__.py +0 -0
  40. hotglue_singer_sdk/target_sdk/auth.py +124 -0
  41. hotglue_singer_sdk/target_sdk/client.py +286 -0
  42. hotglue_singer_sdk/target_sdk/common.py +13 -0
  43. hotglue_singer_sdk/target_sdk/lambda.py +121 -0
  44. hotglue_singer_sdk/target_sdk/rest.py +108 -0
  45. hotglue_singer_sdk/target_sdk/sinks.py +16 -0
  46. hotglue_singer_sdk/target_sdk/target.py +570 -0
  47. hotglue_singer_sdk/target_sdk/target_base.py +627 -0
  48. hotglue_singer_sdk/testing.py +198 -0
  49. hotglue_singer_sdk/typing.py +603 -0
  50. hotglue_singer_sdk-1.0.2.dist-info/METADATA +53 -0
  51. hotglue_singer_sdk-1.0.2.dist-info/RECORD +53 -0
  52. hotglue_singer_sdk-1.0.2.dist-info/WHEEL +4 -0
  53. hotglue_singer_sdk-1.0.2.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,1023 @@
1
+ """Base class for SQL-type streams."""
2
+
3
+ import abc
4
+ import logging
5
+ from datetime import datetime
6
+ from functools import lru_cache
7
+ from typing import Any, Dict, Iterable, List, Optional, Tuple, Type, Union, cast
8
+
9
+ import sqlalchemy
10
+ from sqlalchemy.engine import Engine
11
+ from sqlalchemy.engine.reflection import Inspector
12
+
13
+ from hotglue_singer_sdk import typing as th
14
+ from hotglue_singer_sdk.exceptions import ConfigValidationError
15
+ from hotglue_singer_sdk.helpers._schema import SchemaPlus
16
+ from hotglue_singer_sdk.helpers._singer import CatalogEntry, MetadataMapping
17
+ from hotglue_singer_sdk.plugin_base import PluginBase as TapBaseClass
18
+ from hotglue_singer_sdk.streams.core import Stream
19
+
20
+
21
+ class SQLConnector:
22
+ """Base class for SQLAlchemy-based connectors.
23
+
24
+ The connector class serves as a wrapper around the SQL connection.
25
+
26
+ The functions of the connector are:
27
+
28
+ - connecting to the source
29
+ - generating SQLAlchemy connection and engine objects
30
+ - discovering schema catalog entries
31
+ - performing type conversions to/from JSONSchema types
32
+ - dialect-specific functions, such as escaping and fully qualified names
33
+ """
34
+
35
+ allow_column_add: bool = True # Whether ADD COLUMN is supported.
36
+ allow_column_rename: bool = True # Whether RENAME COLUMN is supported.
37
+ allow_column_alter: bool = False # Whether altering column types is supported.
38
+ allow_merge_upsert: bool = False # Whether MERGE UPSERT is supported.
39
+ allow_temp_tables: bool = True # Whether temp tables are supported.
40
+
41
+ def __init__(
42
+ self, config: Optional[dict] = None, sqlalchemy_url: Optional[str] = None
43
+ ) -> None:
44
+ """Initialize the SQL connector.
45
+
46
+ Args:
47
+ config: The parent tap or target object's config.
48
+ sqlalchemy_url: Optional URL for the connection.
49
+ """
50
+ self._config: Dict[str, Any] = config or {}
51
+ self._sqlalchemy_url: Optional[str] = sqlalchemy_url or None
52
+ self._connection: Optional[sqlalchemy.engine.Connection] = None
53
+
54
+ @property
55
+ def config(self) -> dict:
56
+ """If set, provides access to the tap or target config.
57
+
58
+ Returns:
59
+ The settings as a dict.
60
+ """
61
+ return self._config
62
+
63
+ @property
64
+ def logger(self) -> logging.Logger:
65
+ """Get logger.
66
+
67
+ Returns:
68
+ Plugin logger.
69
+ """
70
+ return logging.getLogger("sqlconnector")
71
+
72
+ def create_sqlalchemy_connection(self) -> sqlalchemy.engine.Connection:
73
+ """Return a new SQLAlchemy connection using the provided config.
74
+
75
+ By default this will create using the sqlalchemy `stream_results=True` option
76
+ described here:
77
+ https://docs.sqlalchemy.org/en/14/core/connections.html#using-server-side-cursors-a-k-a-stream-results
78
+
79
+ Developers may override this method if their provider does not support
80
+ server side cursors (`stream_results`) or in order to use different
81
+ configurations options when creating the connection object.
82
+
83
+ Returns:
84
+ A newly created SQLAlchemy engine object.
85
+ """
86
+ return (
87
+ self.create_sqlalchemy_engine()
88
+ .connect()
89
+ .execution_options(stream_results=True)
90
+ )
91
+
92
+ def create_sqlalchemy_engine(self) -> sqlalchemy.engine.Engine:
93
+ """Return a new SQLAlchemy engine using the provided config.
94
+
95
+ Developers can generally override just one of the following:
96
+ `sqlalchemy_engine`, sqlalchemy_url`.
97
+
98
+ Returns:
99
+ A newly created SQLAlchemy engine object.
100
+ """
101
+ return sqlalchemy.create_engine(self.sqlalchemy_url, echo=False)
102
+
103
+ @property
104
+ def connection(self) -> sqlalchemy.engine.Connection:
105
+ """Return or set the SQLAlchemy connection object.
106
+
107
+ Returns:
108
+ The active SQLAlchemy connection object.
109
+ """
110
+ if not self._connection:
111
+ self._connection = self.create_sqlalchemy_connection()
112
+
113
+ return self._connection
114
+
115
+ @property
116
+ def sqlalchemy_url(self) -> str:
117
+ """Return the SQLAlchemy URL string.
118
+
119
+ Returns:
120
+ The URL as a string.
121
+ """
122
+ if not self._sqlalchemy_url:
123
+ self._sqlalchemy_url = self.get_sqlalchemy_url(self.config)
124
+
125
+ return self._sqlalchemy_url
126
+
127
+ def get_sqlalchemy_url(self, config: Dict[str, Any]) -> str:
128
+ """Return the SQLAlchemy URL string.
129
+
130
+ Developers can generally override just one of the following:
131
+ `sqlalchemy_engine`, `get_sqlalchemy_url`.
132
+
133
+ Args:
134
+ config: A dictionary of settings from the tap or target config.
135
+
136
+ Returns:
137
+ The URL as a string.
138
+
139
+ Raises:
140
+ ConfigValidationError: If no valid sqlalchemy_url can be found.
141
+ """
142
+ if "sqlalchemy_url" not in config:
143
+ raise ConfigValidationError(
144
+ "Could not find or create 'sqlalchemy_url' for connection."
145
+ )
146
+
147
+ return cast(str, config["sqlalchemy_url"])
148
+
149
+ @staticmethod
150
+ def to_jsonschema_type(
151
+ sql_type: Union[
152
+ str, sqlalchemy.types.TypeEngine, Type[sqlalchemy.types.TypeEngine], Any
153
+ ]
154
+ ) -> dict:
155
+ """Return a JSON Schema representation of the provided type.
156
+
157
+ By default will call `typing.to_jsonschema_type()` for strings and SQLAlchemy
158
+ types.
159
+
160
+ Developers may override this method to accept additional input argument types,
161
+ to support non-standard types, or to provide custom typing logic.
162
+
163
+ Args:
164
+ sql_type: The string representation of the SQL type, a SQLAlchemy
165
+ TypeEngine class or object, or a custom-specified object.
166
+
167
+ Raises:
168
+ ValueError: If the type received could not be translated to jsonschema.
169
+
170
+ Returns:
171
+ The JSON Schema representation of the provided type.
172
+ """
173
+ if isinstance(sql_type, (str, sqlalchemy.types.TypeEngine)):
174
+ return th.to_jsonschema_type(sql_type)
175
+
176
+ if isinstance(sql_type, type):
177
+ if issubclass(sql_type, sqlalchemy.types.TypeEngine):
178
+ return th.to_jsonschema_type(sql_type)
179
+
180
+ raise ValueError(f"Unexpected type received: '{sql_type.__name__}'")
181
+
182
+ raise ValueError(f"Unexpected type received: '{type(sql_type).__name__}'")
183
+
184
+ @staticmethod
185
+ def to_sql_type(jsonschema_type: dict) -> sqlalchemy.types.TypeEngine:
186
+ """Return a JSON Schema representation of the provided type.
187
+
188
+ By default will call `typing.to_sql_type()`.
189
+
190
+ Developers may override this method to accept additional input argument types,
191
+ to support non-standard types, or to provide custom typing logic.
192
+
193
+ If overriding this method, developers should call the default implementation
194
+ from the base class for all unhandled cases.
195
+
196
+ Args:
197
+ jsonschema_type: The JSON Schema representation of the source type.
198
+
199
+ Returns:
200
+ The SQLAlchemy type representation of the data type.
201
+ """
202
+ return th.to_sql_type(jsonschema_type)
203
+
204
+ @staticmethod
205
+ def get_fully_qualified_name(
206
+ table_name: str,
207
+ schema_name: Optional[str] = None,
208
+ db_name: Optional[str] = None,
209
+ delimiter: str = ".",
210
+ ) -> str:
211
+ """Concatenates a fully qualified name from the parts.
212
+
213
+ Args:
214
+ table_name: The name of the table.
215
+ schema_name: The name of the schema. Defaults to None.
216
+ db_name: The name of the database. Defaults to None.
217
+ delimiter: Generally: '.' for SQL names and '-' for Singer names.
218
+
219
+ Raises:
220
+ ValueError: If table_name is not provided or if neither schema_name or
221
+ db_name are provided.
222
+
223
+ Returns:
224
+ The fully qualified name as a string.
225
+ """
226
+ if db_name and schema_name:
227
+ result = delimiter.join([db_name, schema_name, table_name])
228
+ elif db_name:
229
+ result = delimiter.join([db_name, table_name])
230
+ elif schema_name:
231
+ result = delimiter.join([schema_name, table_name])
232
+ elif table_name:
233
+ result = table_name
234
+ else:
235
+ raise ValueError(
236
+ "Could not generate fully qualified name for stream: "
237
+ + ":".join(
238
+ [
239
+ db_name or "(unknown-db)",
240
+ schema_name or "(unknown-schema)",
241
+ table_name or "(unknown-table-name)",
242
+ ]
243
+ )
244
+ )
245
+
246
+ return result
247
+
248
+ @property
249
+ def _dialect(self) -> sqlalchemy.engine.Dialect:
250
+ """Return the dialect object.
251
+
252
+ Returns:
253
+ The dialect object.
254
+ """
255
+ return cast(sqlalchemy.engine.Dialect, self.connection.engine.dialect)
256
+
257
+ @property
258
+ def _engine(self) -> sqlalchemy.engine.Engine:
259
+ """Return the dialect object.
260
+
261
+ Returns:
262
+ The dialect object.
263
+ """
264
+ return cast(sqlalchemy.engine.Engine, self.connection.engine)
265
+
266
+ def quote(self, name: str) -> str:
267
+ """Quote a name if it needs quoting, using '.' as a name-part delimiter.
268
+
269
+ Examples:
270
+ "my_table" => "`my_table`"
271
+ "my_schema.my_table" => "`my_schema`.`my_table`"
272
+
273
+ Args:
274
+ name: The unquoted name.
275
+
276
+ Returns:
277
+ str: The quoted name.
278
+ """
279
+ return ".".join(
280
+ [
281
+ self._dialect.identifier_preparer.quote(name_part)
282
+ for name_part in name.split(".")
283
+ ]
284
+ )
285
+
286
+ @lru_cache()
287
+ def _warn_no_view_detection(self) -> None:
288
+ """Print a warning, but only the first time."""
289
+ self.logger.warning(
290
+ "Provider does not support get_view_names(). "
291
+ "Streams list may be incomplete or `is_view` may be unpopulated."
292
+ )
293
+
294
+ def get_schema_names(self, engine: Engine, inspected: Inspector) -> List[str]:
295
+ """Return a list of schema names in DB.
296
+
297
+ Args:
298
+ engine: SQLAlchemy engine
299
+ inspected: SQLAlchemy inspector instance for engine
300
+
301
+ Returns:
302
+ List of schema names
303
+ """
304
+ return inspected.get_schema_names()
305
+
306
+ def get_object_names(
307
+ self, engine: Engine, inspected: Inspector, schema_name: str
308
+ ) -> List[Tuple[str, bool]]:
309
+ """Return a list of syncable objects.
310
+
311
+ Args:
312
+ engine: SQLAlchemy engine
313
+ inspected: SQLAlchemy inspector instance for engine
314
+ schema_name: Schema name to inspect
315
+
316
+ Returns:
317
+ List of tuples (<table_or_view_name>, <is_view>)
318
+ """
319
+ # Get list of tables and views
320
+ table_names = inspected.get_table_names(schema=schema_name)
321
+ try:
322
+ view_names = inspected.get_view_names(schema=schema_name)
323
+ except NotImplementedError:
324
+ # Some DB providers do not understand 'views'
325
+ self._warn_no_view_detection()
326
+ view_names = []
327
+ object_names = [(t, False) for t in table_names] + [
328
+ (v, True) for v in view_names
329
+ ]
330
+
331
+ return object_names
332
+
333
+ # TODO maybe should be splitted into smaller parts?
334
+ def discover_catalog_entry(
335
+ self,
336
+ engine: Engine,
337
+ inspected: Inspector,
338
+ schema_name: str,
339
+ table_name: str,
340
+ is_view: bool,
341
+ ) -> CatalogEntry:
342
+ """Create `CatalogEntry` object for the given table or a view.
343
+
344
+ Args:
345
+ engine: SQLAlchemy engine
346
+ inspected: SQLAlchemy inspector instance for engine
347
+ schema_name: Schema name to inspect
348
+ table_name: Name of the table or a view
349
+ is_view: Flag whether this object is a view, returned by `get_object_names`
350
+
351
+ Returns:
352
+ `CatalogEntry` object for the given table or a view
353
+ """
354
+ # Initialize unique stream name
355
+ unique_stream_id = self.get_fully_qualified_name(
356
+ db_name=None,
357
+ schema_name=schema_name,
358
+ table_name=table_name,
359
+ delimiter="-",
360
+ )
361
+
362
+ # Detect key properties
363
+ possible_primary_keys: List[List[str]] = []
364
+ pk_def = inspected.get_pk_constraint(table_name, schema=schema_name)
365
+ if pk_def and "constrained_columns" in pk_def:
366
+ possible_primary_keys.append(pk_def["constrained_columns"])
367
+ for index_def in inspected.get_indexes(table_name, schema=schema_name):
368
+ if index_def.get("unique", False):
369
+ possible_primary_keys.append(index_def["column_names"])
370
+ key_properties = next(iter(possible_primary_keys), None)
371
+
372
+ # Initialize columns list
373
+ table_schema = th.PropertiesList()
374
+ for column_def in inspected.get_columns(table_name, schema=schema_name):
375
+ column_name = column_def["name"]
376
+ is_nullable = column_def.get("nullable", False)
377
+ jsonschema_type: dict = self.to_jsonschema_type(
378
+ cast(sqlalchemy.types.TypeEngine, column_def["type"])
379
+ )
380
+ table_schema.append(
381
+ th.Property(
382
+ name=column_name,
383
+ wrapped=th.CustomType(jsonschema_type),
384
+ required=not is_nullable,
385
+ )
386
+ )
387
+ schema = table_schema.to_dict()
388
+
389
+ # Initialize available replication methods
390
+ addl_replication_methods: List[str] = [""] # By default an empty list.
391
+ # Notes regarding replication methods:
392
+ # - 'INCREMENTAL' replication must be enabled by the user by specifying
393
+ # a replication_key value.
394
+ # - 'LOG_BASED' replication must be enabled by the developer, according
395
+ # to source-specific implementation capabilities.
396
+ replication_method = next(reversed(["FULL_TABLE"] + addl_replication_methods))
397
+
398
+ # Create the catalog entry object
399
+ catalog_entry = CatalogEntry(
400
+ tap_stream_id=unique_stream_id,
401
+ stream=unique_stream_id,
402
+ table=table_name,
403
+ key_properties=key_properties,
404
+ schema=SchemaPlus.from_dict(schema),
405
+ is_view=is_view,
406
+ replication_method=replication_method,
407
+ metadata=MetadataMapping.get_standard_metadata(
408
+ schema_name=schema_name,
409
+ schema=schema,
410
+ replication_method=replication_method,
411
+ key_properties=key_properties,
412
+ valid_replication_keys=None, # Must be defined by user
413
+ ),
414
+ database=None, # Expects single-database context
415
+ row_count=None,
416
+ stream_alias=None,
417
+ replication_key=None, # Must be defined by user
418
+ )
419
+
420
+ return catalog_entry
421
+
422
+ def discover_catalog_entries(self) -> List[dict]:
423
+ """Return a list of catalog entries from discovery.
424
+
425
+ Returns:
426
+ The discovered catalog entries as a list.
427
+ """
428
+ result: List[dict] = []
429
+ engine = self.create_sqlalchemy_engine()
430
+ inspected = sqlalchemy.inspect(engine)
431
+ for schema_name in self.get_schema_names(engine, inspected):
432
+ # Iterate through each table and view
433
+ for table_name, is_view in self.get_object_names(
434
+ engine, inspected, schema_name
435
+ ):
436
+ catalog_entry = self.discover_catalog_entry(
437
+ engine, inspected, schema_name, table_name, is_view
438
+ )
439
+ result.append(catalog_entry.to_dict())
440
+
441
+ return result
442
+
443
+ def parse_full_table_name(
444
+ self, full_table_name: str
445
+ ) -> Tuple[Optional[str], Optional[str], str]:
446
+ """Parse a fully qualified table name into its parts.
447
+
448
+ Developers may override this method if their platform does not support the
449
+ traditional 3-part convention: `db_name.schema_name.table_name`
450
+
451
+ Args:
452
+ full_table_name: A table name or a fully qualified table name. Depending on
453
+ SQL the platform, this could take the following forms:
454
+ - `<db>.<schema>.<table>` (three part names)
455
+ - `<db>.<table>` (platforms which do not use schema groupings)
456
+ - `<schema>.<name>` (if DB name is already in context)
457
+ - `<table>` (if DB name and schema name are already in context)
458
+
459
+ Returns:
460
+ A three part tuple (db_name, schema_name, table_name) with any unspecified
461
+ or unused parts returned as None.
462
+ """
463
+ db_name: Optional[str] = None
464
+ schema_name: Optional[str] = None
465
+
466
+ parts = full_table_name.split(".")
467
+ if len(parts) == 1:
468
+ table_name = full_table_name
469
+ if len(parts) == 2:
470
+ schema_name, table_name = parts
471
+ if len(parts) == 3:
472
+ db_name, schema_name, table_name = parts
473
+
474
+ return db_name, schema_name, table_name
475
+
476
+ def table_exists(self, full_table_name: str) -> bool:
477
+ """Determine if the target table already exists.
478
+
479
+ Args:
480
+ full_table_name: the target table name.
481
+
482
+ Returns:
483
+ True if table exists, False if not, None if unsure or undetectable.
484
+ """
485
+ return cast(
486
+ bool,
487
+ sqlalchemy.inspect(self._engine).has_table(full_table_name),
488
+ )
489
+
490
+ def get_table_columns(self, full_table_name: str) -> Dict[str, sqlalchemy.Column]:
491
+ """Return a list of table columns.
492
+
493
+ Args:
494
+ full_table_name: Fully qualified table name.
495
+
496
+ Returns:
497
+ An ordered list of column objects.
498
+ """
499
+ _, schema_name, table_name = self.parse_full_table_name(full_table_name)
500
+ inspector = sqlalchemy.inspect(self._engine)
501
+ columns = inspector.get_columns(table_name, schema_name)
502
+
503
+ result: Dict[str, sqlalchemy.Column] = {}
504
+ for col_meta in columns:
505
+ result[col_meta["name"]] = sqlalchemy.Column(
506
+ col_meta["name"],
507
+ col_meta["type"],
508
+ nullable=col_meta.get("nullable", False),
509
+ )
510
+
511
+ return result
512
+
513
+ def get_table(self, full_table_name: str) -> sqlalchemy.Table:
514
+ """Return a table object.
515
+
516
+ Args:
517
+ full_table_name: Fully qualified table name.
518
+
519
+ Returns:
520
+ A table object with column list.
521
+ """
522
+ columns = self.get_table_columns(full_table_name).values()
523
+ _, schema_name, table_name = self.parse_full_table_name(full_table_name)
524
+ meta = sqlalchemy.MetaData()
525
+ return sqlalchemy.schema.Table(
526
+ table_name, meta, *list(columns), schema=schema_name
527
+ )
528
+
529
+ def column_exists(self, full_table_name: str, column_name: str) -> bool:
530
+ """Determine if the target table already exists.
531
+
532
+ Args:
533
+ full_table_name: the target table name.
534
+ column_name: the target column name.
535
+
536
+ Returns:
537
+ True if table exists, False if not.
538
+ """
539
+ return column_name in self.get_table_columns(full_table_name)
540
+
541
+ def create_empty_table(
542
+ self,
543
+ full_table_name: str,
544
+ schema: dict,
545
+ primary_keys: Optional[List[str]] = None,
546
+ partition_keys: Optional[List[str]] = None,
547
+ as_temp_table: bool = False,
548
+ ) -> None:
549
+ """Create an empty target table.
550
+
551
+ Args:
552
+ full_table_name: the target table name.
553
+ schema: the JSON schema for the new table.
554
+ primary_keys: list of key properties.
555
+ partition_keys: list of partition keys.
556
+ as_temp_table: True to create a temp table.
557
+
558
+ Raises:
559
+ NotImplementedError: if temp tables are unsupported and as_temp_table=True.
560
+ RuntimeError: if a variant schema is passed with no properties defined.
561
+ """
562
+ if as_temp_table:
563
+ raise NotImplementedError("Temporary tables are not supported.")
564
+
565
+ _ = partition_keys # Not supported in generic implementation.
566
+
567
+ meta = sqlalchemy.MetaData()
568
+ columns: List[sqlalchemy.Column] = []
569
+ primary_keys = primary_keys or []
570
+ try:
571
+ properties: dict = schema["properties"]
572
+ except KeyError:
573
+ raise RuntimeError(
574
+ f"Schema for '{full_table_name}' does not define properties: {schema}"
575
+ )
576
+ for property_name, property_jsonschema in properties.items():
577
+ is_primary_key = property_name in primary_keys
578
+ columns.append(
579
+ sqlalchemy.Column(
580
+ property_name,
581
+ self.to_sql_type(property_jsonschema),
582
+ primary_key=is_primary_key,
583
+ )
584
+ )
585
+
586
+ _ = sqlalchemy.Table(full_table_name, meta, *columns)
587
+ meta.create_all(self._engine)
588
+
589
+ def _create_empty_column(
590
+ self,
591
+ full_table_name: str,
592
+ column_name: str,
593
+ sql_type: sqlalchemy.types.TypeEngine,
594
+ ) -> None:
595
+ """Create a new column.
596
+
597
+ Args:
598
+ full_table_name: The target table name.
599
+ column_name: The name of the new column.
600
+ sql_type: SQLAlchemy type engine to be used in creating the new column.
601
+
602
+ Raises:
603
+ NotImplementedError: if adding columns is not supported.
604
+ """
605
+ if not self.allow_column_add:
606
+ raise NotImplementedError("Adding columns is not supported.")
607
+
608
+ create_column_clause = sqlalchemy.schema.CreateColumn(
609
+ sqlalchemy.Column(
610
+ column_name,
611
+ sql_type,
612
+ )
613
+ )
614
+ self.connection.execute(
615
+ sqlalchemy.DDL(
616
+ "ALTER TABLE %(table)s ADD COLUMN %(create_column)s",
617
+ {
618
+ "table": full_table_name,
619
+ "create_column": create_column_clause,
620
+ },
621
+ )
622
+ )
623
+
624
+ def prepare_table(
625
+ self,
626
+ full_table_name: str,
627
+ schema: dict,
628
+ primary_keys: List[str],
629
+ partition_keys: Optional[List[str]] = None,
630
+ as_temp_table: bool = False,
631
+ ) -> None:
632
+ """Adapt target table to provided schema if possible.
633
+
634
+ Args:
635
+ full_table_name: the target table name.
636
+ schema: the JSON Schema for the table.
637
+ primary_keys: list of key properties.
638
+ partition_keys: list of partition keys.
639
+ as_temp_table: True to create a temp table.
640
+ """
641
+ if not self.table_exists(full_table_name=full_table_name):
642
+ self.create_empty_table(
643
+ full_table_name=full_table_name,
644
+ schema=schema,
645
+ primary_keys=primary_keys,
646
+ partition_keys=partition_keys,
647
+ as_temp_table=as_temp_table,
648
+ )
649
+ return
650
+
651
+ for property_name, property_def in schema["properties"].items():
652
+ self.prepare_column(
653
+ full_table_name, property_name, self.to_sql_type(property_def)
654
+ )
655
+
656
+ def prepare_column(
657
+ self,
658
+ full_table_name: str,
659
+ column_name: str,
660
+ sql_type: sqlalchemy.types.TypeEngine,
661
+ ) -> None:
662
+ """Adapt target table to provided schema if possible.
663
+
664
+ Args:
665
+ full_table_name: the target table name.
666
+ column_name: the target column name.
667
+ sql_type: the SQLAlchemy type.
668
+ """
669
+ if not self.column_exists(full_table_name, column_name):
670
+ self._create_empty_column(
671
+ full_table_name=full_table_name,
672
+ column_name=column_name,
673
+ sql_type=sql_type,
674
+ )
675
+ return
676
+
677
+ self._adapt_column_type(
678
+ full_table_name,
679
+ column_name=column_name,
680
+ sql_type=sql_type,
681
+ )
682
+
683
+ def rename_column(self, full_table_name: str, old_name: str, new_name: str) -> None:
684
+ """Rename the provided columns.
685
+
686
+ Args:
687
+ full_table_name: The fully qualified table name.
688
+ old_name: The old column to be renamed.
689
+ new_name: The new name for the column.
690
+
691
+ Raises:
692
+ NotImplementedError: If `self.allow_column_rename` is false.
693
+ """
694
+ if not self.allow_column_rename:
695
+ raise NotImplementedError("Renaming columns is not supported.")
696
+
697
+ self.connection.execute(
698
+ f"ALTER TABLE {full_table_name} "
699
+ f'RENAME COLUMN "{old_name}" to "{new_name}"'
700
+ )
701
+
702
+ def merge_sql_types(
703
+ self, sql_types: List[sqlalchemy.types.TypeEngine]
704
+ ) -> sqlalchemy.types.TypeEngine:
705
+ """Return a compatible SQL type for the selected type list.
706
+
707
+ Args:
708
+ sql_types: List of SQL types.
709
+
710
+ Returns:
711
+ A SQL type that is compatible with the input types.
712
+
713
+ Raises:
714
+ ValueError: If sql_types argument has zero members.
715
+ """
716
+ if not sql_types:
717
+ raise ValueError("Expected at least one member in `sql_types` argument.")
718
+
719
+ if len(sql_types) == 1:
720
+ return sql_types[0]
721
+
722
+ sql_types = self._sort_types(sql_types)
723
+
724
+ if len(sql_types) > 2:
725
+ return self.merge_sql_types(
726
+ [self.merge_sql_types([sql_types[0], sql_types[1]])] + sql_types[2:]
727
+ )
728
+
729
+ assert len(sql_types) == 2
730
+ generic_type = type(sql_types[0].as_generic())
731
+ if isinstance(generic_type, type):
732
+ if issubclass(
733
+ generic_type,
734
+ (sqlalchemy.types.String, sqlalchemy.types.Unicode),
735
+ ):
736
+ return sql_types[0]
737
+
738
+ elif isinstance(
739
+ generic_type,
740
+ (sqlalchemy.types.String, sqlalchemy.types.Unicode),
741
+ ):
742
+ return sql_types[0]
743
+
744
+ raise ValueError(
745
+ f"Unable to merge sql types: {', '.join([str(t) for t in sql_types])}"
746
+ )
747
+
748
+ def _sort_types(
749
+ self,
750
+ sql_types: Iterable[sqlalchemy.types.TypeEngine],
751
+ ) -> List[sqlalchemy.types.TypeEngine]:
752
+ """Return the input types sorted from most to least compatible.
753
+
754
+ For example, [Smallint, Integer, Datetime, String, Double] would become
755
+ [Unicode, String, Double, Integer, Smallint, Datetime].
756
+ String types will be listed first, then decimal types, then integer types,
757
+ then bool types, and finally datetime and date. Higher precision, scale, and
758
+ length will be sorted earlier.
759
+
760
+ Args:
761
+ sql_types (List[sqlalchemy.types.TypeEngine]): [description]
762
+
763
+ Returns:
764
+ The sorted list.
765
+ """
766
+
767
+ def _get_type_sort_key(
768
+ sql_type: sqlalchemy.types.TypeEngine,
769
+ ) -> Tuple[int, int]:
770
+ # return rank, with higher numbers ranking first
771
+
772
+ _len = int(getattr(sql_type, "length", 0) or 0)
773
+
774
+ _pytype = cast(type, sql_type.python_type)
775
+ if issubclass(_pytype, (str, bytes)):
776
+ return 900, _len
777
+ elif issubclass(_pytype, datetime):
778
+ return 600, _len
779
+ elif issubclass(_pytype, float):
780
+ return 400, _len
781
+ elif issubclass(_pytype, int):
782
+ return 300, _len
783
+
784
+ return 0, _len
785
+
786
+ return sorted(sql_types, key=_get_type_sort_key, reverse=True)
787
+
788
+ def _get_column_type(
789
+ self, full_table_name: str, column_name: str
790
+ ) -> sqlalchemy.types.TypeEngine:
791
+ """Gets the SQL type of the declared column.
792
+
793
+ Args:
794
+ full_table_name: The name of the table.
795
+ column_name: The name of the column.
796
+
797
+ Returns:
798
+ The type of the column.
799
+
800
+ Raises:
801
+ KeyError: If the provided column name does not exist.
802
+ """
803
+ try:
804
+ column = self.get_table_columns(full_table_name)[column_name]
805
+ except KeyError as ex:
806
+ raise KeyError(
807
+ f"Column `{column_name}` does not exist in table `{full_table_name}`."
808
+ ) from ex
809
+
810
+ return cast(sqlalchemy.types.TypeEngine, column.type)
811
+
812
+ def _adapt_column_type(
813
+ self,
814
+ full_table_name: str,
815
+ column_name: str,
816
+ sql_type: sqlalchemy.types.TypeEngine,
817
+ ) -> None:
818
+ """Adapt table column type to support the new JSON schema type.
819
+
820
+ Args:
821
+ full_table_name: The target table name.
822
+ column_name: The target column name.
823
+ sql_type: The new SQLAlchemy type.
824
+
825
+ Raises:
826
+ NotImplementedError: if altering columns is not supported.
827
+ """
828
+ current_type = self._get_column_type(full_table_name, column_name)
829
+ compatible_sql_type = self.merge_sql_types([current_type, sql_type])
830
+ if current_type == compatible_sql_type:
831
+ # Nothing to do
832
+ return
833
+
834
+ if not self.allow_column_alter:
835
+ raise NotImplementedError(
836
+ "Altering columns is not supported. "
837
+ f"Could not convert column '{full_table_name}.column_name' "
838
+ f"from '{current_type}' to '{compatible_sql_type}'."
839
+ )
840
+
841
+ self.connection.execute(
842
+ sqlalchemy.DDL(
843
+ "ALTER TABLE %(table)s ALTER COLUMN %(col_name)s (%(col_type)s)",
844
+ {
845
+ "table": full_table_name,
846
+ "col_name": column_name,
847
+ "col_type": compatible_sql_type,
848
+ },
849
+ )
850
+ )
851
+
852
+
853
+ class SQLStream(Stream, metaclass=abc.ABCMeta):
854
+ """Base class for SQLAlchemy-based streams."""
855
+
856
+ connector_class = SQLConnector
857
+
858
+ def __init__(
859
+ self,
860
+ tap: TapBaseClass,
861
+ catalog_entry: dict,
862
+ connector: Optional[SQLConnector] = None,
863
+ ) -> None:
864
+ """Initialize the database stream.
865
+
866
+ If `connector` is omitted, a new connector will be created.
867
+
868
+ Args:
869
+ tap: The parent tap object.
870
+ catalog_entry: Catalog entry dict.
871
+ connector: Optional connector to reuse.
872
+ """
873
+ self._connector: SQLConnector
874
+ if connector:
875
+ self._connector = connector
876
+ else:
877
+ self._connector = self.connector_class(dict(tap.config))
878
+
879
+ self.catalog_entry = catalog_entry
880
+ super().__init__(
881
+ tap=tap,
882
+ schema=self.schema,
883
+ name=self.tap_stream_id,
884
+ )
885
+
886
+ @property
887
+ def _singer_catalog_entry(self) -> CatalogEntry:
888
+ """Return catalog entry as specified by the Singer catalog spec.
889
+
890
+ Returns:
891
+ A CatalogEntry object.
892
+ """
893
+ return cast(CatalogEntry, CatalogEntry.from_dict(self.catalog_entry))
894
+
895
+ @property
896
+ def connector(self) -> SQLConnector:
897
+ """The connector object.
898
+
899
+ Returns:
900
+ The connector object.
901
+ """
902
+ return self._connector
903
+
904
+ @property
905
+ def metadata(self) -> MetadataMapping:
906
+ """The Singer metadata.
907
+
908
+ Metadata from an input catalog will override standard metadata.
909
+
910
+ Returns:
911
+ Metadata object as specified in the Singer spec.
912
+ """
913
+ return self._singer_catalog_entry.metadata
914
+
915
+ @property
916
+ def schema(self) -> dict:
917
+ """Return metadata object (dict) as specified in the Singer spec.
918
+
919
+ Metadata from an input catalog will override standard metadata.
920
+
921
+ Returns:
922
+ The schema object.
923
+ """
924
+ return cast(dict, self._singer_catalog_entry.schema.to_dict())
925
+
926
+ @property
927
+ def tap_stream_id(self) -> str:
928
+ """Return the unique ID used by the tap to identify this stream.
929
+
930
+ Generally, this is the same value as in `Stream.name`.
931
+
932
+ In rare cases, such as for database types with multi-part names,
933
+ this may be slightly different from `Stream.name`.
934
+
935
+ Returns:
936
+ The unique tap stream ID as a string.
937
+ """
938
+ return self._singer_catalog_entry.tap_stream_id
939
+
940
+ @property
941
+ def primary_keys(self) -> Optional[List[str]]:
942
+ """Get primary keys from the catalog entry definition.
943
+
944
+ Returns:
945
+ A list of primary key(s) for the stream.
946
+ """
947
+ return self._singer_catalog_entry.metadata.root.table_key_properties or []
948
+
949
+ @primary_keys.setter
950
+ def primary_keys(self, new_value: List[str]) -> None:
951
+ """Set or reset the primary key(s) in the stream's catalog entry.
952
+
953
+ Args:
954
+ new_value: a list of one or more column names
955
+ """
956
+ self._singer_catalog_entry.metadata.root.table_key_properties = new_value
957
+
958
+ @property
959
+ def fully_qualified_name(self) -> str:
960
+ """Generate the fully qualified version of the table name.
961
+
962
+ Raises:
963
+ ValueError: If table_name is not able to be detected.
964
+
965
+ Returns:
966
+ The fully qualified name.
967
+ """
968
+ catalog_entry = self._singer_catalog_entry
969
+ if not catalog_entry.table:
970
+ raise ValueError(
971
+ f"Missing table name in catalog entry: {catalog_entry.to_dict()}"
972
+ )
973
+
974
+ return self.connector.get_fully_qualified_name(
975
+ table_name=catalog_entry.table,
976
+ schema_name=catalog_entry.metadata.root.schema_name,
977
+ db_name=catalog_entry.database,
978
+ )
979
+
980
+ # Get records from stream
981
+
982
+ def get_records(self, context: Optional[dict]) -> Iterable[Dict[str, Any]]:
983
+ """Return a generator of row-type dictionary objects.
984
+
985
+ If the stream has a replication_key value defined, records will be sorted by the
986
+ incremental key. If the stream also has an available starting bookmark, the
987
+ records will be filtered for values greater than or equal to the bookmark value.
988
+
989
+ Args:
990
+ context: If partition context is provided, will read specifically from this
991
+ data slice.
992
+
993
+ Yields:
994
+ One dict per record.
995
+
996
+ Raises:
997
+ NotImplementedError: If partition is passed in context and the stream does
998
+ not support partitioning.
999
+ """
1000
+ if context:
1001
+ raise NotImplementedError(
1002
+ f"Stream '{self.name}' does not support partitioning."
1003
+ )
1004
+
1005
+ table = self.connector.get_table(self.fully_qualified_name)
1006
+ query = table.select()
1007
+ if self.replication_key:
1008
+ replication_key_col = table.columns[self.replication_key]
1009
+ query = query.order_by(replication_key_col)
1010
+
1011
+ start_val = self.get_starting_replication_key_value(context)
1012
+ if start_val:
1013
+ query = query.where(
1014
+ sqlalchemy.text(":replication_key >= :start_val").bindparams(
1015
+ replication_key=replication_key_col, start_val=start_val
1016
+ )
1017
+ )
1018
+
1019
+ for row in self.connector.connection.execute(query):
1020
+ yield dict(row)
1021
+
1022
+
1023
+ __all__ = ["SQLStream", "SQLConnector"]