hotglue-singer-sdk 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. hotglue_singer_sdk/__init__.py +34 -0
  2. hotglue_singer_sdk/authenticators.py +554 -0
  3. hotglue_singer_sdk/cli/__init__.py +1 -0
  4. hotglue_singer_sdk/cli/common_options.py +37 -0
  5. hotglue_singer_sdk/configuration/__init__.py +1 -0
  6. hotglue_singer_sdk/configuration/_dict_config.py +101 -0
  7. hotglue_singer_sdk/exceptions.py +52 -0
  8. hotglue_singer_sdk/helpers/__init__.py +1 -0
  9. hotglue_singer_sdk/helpers/_catalog.py +122 -0
  10. hotglue_singer_sdk/helpers/_classproperty.py +18 -0
  11. hotglue_singer_sdk/helpers/_compat.py +15 -0
  12. hotglue_singer_sdk/helpers/_flattening.py +374 -0
  13. hotglue_singer_sdk/helpers/_schema.py +100 -0
  14. hotglue_singer_sdk/helpers/_secrets.py +41 -0
  15. hotglue_singer_sdk/helpers/_simpleeval.py +678 -0
  16. hotglue_singer_sdk/helpers/_singer.py +280 -0
  17. hotglue_singer_sdk/helpers/_state.py +282 -0
  18. hotglue_singer_sdk/helpers/_typing.py +231 -0
  19. hotglue_singer_sdk/helpers/_util.py +27 -0
  20. hotglue_singer_sdk/helpers/capabilities.py +240 -0
  21. hotglue_singer_sdk/helpers/jsonpath.py +39 -0
  22. hotglue_singer_sdk/io_base.py +134 -0
  23. hotglue_singer_sdk/mapper.py +691 -0
  24. hotglue_singer_sdk/mapper_base.py +156 -0
  25. hotglue_singer_sdk/plugin_base.py +415 -0
  26. hotglue_singer_sdk/py.typed +0 -0
  27. hotglue_singer_sdk/sinks/__init__.py +14 -0
  28. hotglue_singer_sdk/sinks/batch.py +90 -0
  29. hotglue_singer_sdk/sinks/core.py +412 -0
  30. hotglue_singer_sdk/sinks/record.py +66 -0
  31. hotglue_singer_sdk/sinks/sql.py +299 -0
  32. hotglue_singer_sdk/streams/__init__.py +14 -0
  33. hotglue_singer_sdk/streams/core.py +1294 -0
  34. hotglue_singer_sdk/streams/graphql.py +74 -0
  35. hotglue_singer_sdk/streams/rest.py +611 -0
  36. hotglue_singer_sdk/streams/sql.py +1023 -0
  37. hotglue_singer_sdk/tap_base.py +580 -0
  38. hotglue_singer_sdk/target_base.py +554 -0
  39. hotglue_singer_sdk/target_sdk/__init__.py +0 -0
  40. hotglue_singer_sdk/target_sdk/auth.py +124 -0
  41. hotglue_singer_sdk/target_sdk/client.py +286 -0
  42. hotglue_singer_sdk/target_sdk/common.py +13 -0
  43. hotglue_singer_sdk/target_sdk/lambda.py +121 -0
  44. hotglue_singer_sdk/target_sdk/rest.py +108 -0
  45. hotglue_singer_sdk/target_sdk/sinks.py +16 -0
  46. hotglue_singer_sdk/target_sdk/target.py +570 -0
  47. hotglue_singer_sdk/target_sdk/target_base.py +627 -0
  48. hotglue_singer_sdk/testing.py +198 -0
  49. hotglue_singer_sdk/typing.py +603 -0
  50. hotglue_singer_sdk-1.0.2.dist-info/METADATA +53 -0
  51. hotglue_singer_sdk-1.0.2.dist-info/RECORD +53 -0
  52. hotglue_singer_sdk-1.0.2.dist-info/WHEEL +4 -0
  53. hotglue_singer_sdk-1.0.2.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,299 @@
1
+ """Sink classes load data to SQL targets."""
2
+
3
+ from textwrap import dedent
4
+ from typing import Any, Dict, Iterable, List, Optional, Type
5
+
6
+ import sqlalchemy
7
+ from pendulum import now
8
+ from sqlalchemy.sql.expression import bindparam
9
+
10
+ from hotglue_singer_sdk.plugin_base import PluginBase
11
+ from hotglue_singer_sdk.sinks.batch import BatchSink
12
+ from hotglue_singer_sdk.streams.sql import SQLConnector
13
+
14
+
15
+ class SQLSink(BatchSink):
16
+ """SQL-type sink type."""
17
+
18
+ connector_class: Type[SQLConnector]
19
+ soft_delete_column_name = "_sdc_deleted_at"
20
+ version_column_name = "_sdc_table_version"
21
+
22
+ def __init__(
23
+ self,
24
+ target: PluginBase,
25
+ stream_name: str,
26
+ schema: Dict,
27
+ key_properties: Optional[List[str]],
28
+ connector: Optional[SQLConnector] = None,
29
+ ) -> None:
30
+ """Initialize SQL Sink.
31
+
32
+ Args:
33
+ target: The target object.
34
+ stream_name: The source tap's stream name.
35
+ schema: The JSON Schema definition.
36
+ key_properties: The primary key columns.
37
+ connector: Optional connector to reuse.
38
+ """
39
+ self._connector: SQLConnector
40
+ if connector:
41
+ self._connector = connector
42
+ else:
43
+ self._connector = self.connector_class(dict(target.config))
44
+
45
+ super().__init__(target, stream_name, schema, key_properties)
46
+
47
+ @property
48
+ def connector(self) -> SQLConnector:
49
+ """The connector object.
50
+
51
+ Returns:
52
+ The connector object.
53
+ """
54
+ return self._connector
55
+
56
+ @property
57
+ def connection(self) -> sqlalchemy.engine.Connection:
58
+ """Get or set the SQLAlchemy connection for this sink.
59
+
60
+ Returns:
61
+ A connection object.
62
+ """
63
+ return self.connector.connection
64
+
65
+ @property
66
+ def table_name(self) -> str:
67
+ """Returns the table name, with no schema or database part.
68
+
69
+ Returns:
70
+ The target table name.
71
+ """
72
+ parts = self.stream_name.split("-")
73
+
74
+ if len(parts) == 1:
75
+ return self.stream_name
76
+ else:
77
+ return parts[-1]
78
+
79
+ @property
80
+ def schema_name(self) -> Optional[str]:
81
+ """Returns the schema name or `None` if using names with no schema part.
82
+
83
+ Returns:
84
+ The target schema name.
85
+ """
86
+ return None # Assumes single-schema target context.
87
+
88
+ @property
89
+ def database_name(self) -> Optional[str]:
90
+ """Returns the DB name or `None` if using names with no database part.
91
+
92
+ Returns:
93
+ The target database name.
94
+ """
95
+ return None # Assumes single-DB target context.
96
+
97
+ def process_batch(self, context: dict) -> None:
98
+ """Process a batch with the given batch context.
99
+
100
+ Writes a batch to the SQL target. Developers may override this method
101
+ in order to provide a more efficient upload/upsert process.
102
+
103
+ Args:
104
+ context: Stream partition or context dictionary.
105
+ """
106
+ # If duplicates are merged, these can be tracked via
107
+ # :meth:`~hotglue_singer_sdk.Sink.tally_duplicate_merged()`.
108
+ self.connector.prepare_table(
109
+ full_table_name=self.full_table_name,
110
+ schema=self.schema,
111
+ primary_keys=self.key_properties,
112
+ as_temp_table=False,
113
+ )
114
+ self.bulk_insert_records(
115
+ full_table_name=self.full_table_name,
116
+ schema=self.schema,
117
+ records=context["records"],
118
+ )
119
+
120
+ @property
121
+ def full_table_name(self) -> str:
122
+ """Gives the fully qualified table name.
123
+
124
+ Returns:
125
+ The fully qualified table name.
126
+ """
127
+ return self.connector.get_fully_qualified_name(
128
+ self.table_name,
129
+ self.schema_name,
130
+ self.database_name,
131
+ )
132
+
133
+ def create_table_with_records(
134
+ self,
135
+ full_table_name: Optional[str],
136
+ schema: dict,
137
+ records: Iterable[Dict[str, Any]],
138
+ primary_keys: Optional[List[str]] = None,
139
+ partition_keys: Optional[List[str]] = None,
140
+ as_temp_table: bool = False,
141
+ ) -> None:
142
+ """Create an empty table.
143
+
144
+ Args:
145
+ full_table_name: the target table name.
146
+ schema: the JSON schema for the new table.
147
+ records: records to load.
148
+ primary_keys: list of key properties.
149
+ partition_keys: list of partition keys.
150
+ as_temp_table: True to create a temp table.
151
+ """
152
+ full_table_name = full_table_name or self.full_table_name
153
+ if primary_keys is None:
154
+ primary_keys = self.key_properties
155
+ partition_keys = partition_keys or None
156
+ self.connector.prepare_table(
157
+ full_table_name=full_table_name,
158
+ primary_keys=primary_keys,
159
+ schema=schema,
160
+ as_temp_table=as_temp_table,
161
+ )
162
+ self.bulk_insert_records(
163
+ full_table_name=full_table_name, schema=schema, records=records
164
+ )
165
+
166
+ def generate_insert_statement(
167
+ self,
168
+ full_table_name: str,
169
+ schema: dict,
170
+ ) -> str:
171
+ """Generate an insert statement for the given records.
172
+
173
+ Args:
174
+ full_table_name: the target table name.
175
+ schema: the JSON schema for the new table.
176
+
177
+ Returns:
178
+ An insert statement.
179
+ """
180
+ property_names = list(schema["properties"].keys())
181
+ statement = dedent(
182
+ f"""\
183
+ INSERT INTO {full_table_name}
184
+ ({", ".join(property_names)})
185
+ VALUES ({", ".join([f":{name}" for name in property_names])})
186
+ """
187
+ )
188
+
189
+ return statement.rstrip()
190
+
191
+ def bulk_insert_records(
192
+ self,
193
+ full_table_name: str,
194
+ schema: dict,
195
+ records: Iterable[Dict[str, Any]],
196
+ ) -> Optional[int]:
197
+ """Bulk insert records to an existing destination table.
198
+
199
+ The default implementation uses a generic SQLAlchemy bulk insert operation.
200
+ This method may optionally be overridden by developers in order to provide
201
+ faster, native bulk uploads.
202
+
203
+ Args:
204
+ full_table_name: the target table name.
205
+ schema: the JSON schema for the new table, to be used when inferring column
206
+ names.
207
+ records: the input records.
208
+
209
+ Returns:
210
+ True if table exists, False if not, None if unsure or undetectable.
211
+ """
212
+ insert_sql = self.generate_insert_statement(
213
+ full_table_name,
214
+ schema,
215
+ )
216
+ self.logger.info("Inserting with SQL: %s", insert_sql)
217
+ self.connector.connection.execute(
218
+ sqlalchemy.text(insert_sql),
219
+ records,
220
+ )
221
+ if isinstance(records, list):
222
+ return len(records) # If list, we can quickly return record count.
223
+
224
+ return None # Unknown record count.
225
+
226
+ def merge_upsert_from_table(
227
+ self, target_table_name: str, from_table_name: str, join_keys: List[str]
228
+ ) -> Optional[int]:
229
+ """Merge upsert data from one table to another.
230
+
231
+ Args:
232
+ target_table_name: The destination table name.
233
+ from_table_name: The source table name.
234
+ join_keys: The merge upsert keys, or `None` to append.
235
+
236
+ Return:
237
+ The number of records copied, if detectable, or `None` if the API does not
238
+ report number of records affected/inserted.
239
+
240
+ Raises:
241
+ NotImplementedError: if the merge upsert capability does not exist or is
242
+ undefined.
243
+ """
244
+ raise NotImplementedError()
245
+
246
+ def activate_version(self, new_version: int) -> None:
247
+ """Bump the active version of the target table.
248
+
249
+ Args:
250
+ new_version: The version number to activate.
251
+ """
252
+ # There's nothing to do if the table doesn't exist yet
253
+ # (which it won't the first time the stream is processed)
254
+ if not self.connector.table_exists(self.full_table_name):
255
+ return
256
+
257
+ deleted_at = now()
258
+
259
+ if not self.connector.column_exists(
260
+ full_table_name=self.full_table_name,
261
+ column_name=self.version_column_name,
262
+ ):
263
+ self.connector.prepare_column(
264
+ self.full_table_name,
265
+ self.version_column_name,
266
+ sql_type=sqlalchemy.types.Integer(),
267
+ )
268
+
269
+ if self.config.get("hard_delete", True):
270
+ self.connection.execute(
271
+ f"DELETE FROM {self.full_table_name} "
272
+ f"WHERE {self.version_column_name} <= {new_version}"
273
+ )
274
+ return
275
+
276
+ if not self.connector.column_exists(
277
+ full_table_name=self.full_table_name,
278
+ column_name=self.soft_delete_column_name,
279
+ ):
280
+ self.connector.prepare_column(
281
+ self.full_table_name,
282
+ self.soft_delete_column_name,
283
+ sql_type=sqlalchemy.types.DateTime(),
284
+ )
285
+
286
+ query = sqlalchemy.text(
287
+ f"UPDATE {self.full_table_name}\n"
288
+ f"SET {self.soft_delete_column_name} = :deletedate \n"
289
+ f"WHERE {self.version_column_name} < :version \n"
290
+ f" AND {self.soft_delete_column_name} IS NULL\n"
291
+ )
292
+ query = query.bindparams(
293
+ bindparam("deletedate", value=deleted_at, type_=sqlalchemy.types.DateTime),
294
+ bindparam("version", value=new_version, type_=sqlalchemy.types.Integer),
295
+ )
296
+ self.connector.connection.execute(query)
297
+
298
+
299
+ __all__ = ["SQLSink", "SQLConnector"]
@@ -0,0 +1,14 @@
1
+ """SDK for building singer-compliant taps."""
2
+
3
+ from hotglue_singer_sdk.streams.core import Stream
4
+ from hotglue_singer_sdk.streams.graphql import GraphQLStream
5
+ from hotglue_singer_sdk.streams.rest import RESTStream
6
+ from hotglue_singer_sdk.streams.sql import SQLConnector, SQLStream
7
+
8
+ __all__ = [
9
+ "Stream",
10
+ "GraphQLStream",
11
+ "RESTStream",
12
+ "SQLStream",
13
+ "SQLConnector",
14
+ ]