hotglue-singer-sdk 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hotglue_singer_sdk/__init__.py +34 -0
- hotglue_singer_sdk/authenticators.py +554 -0
- hotglue_singer_sdk/cli/__init__.py +1 -0
- hotglue_singer_sdk/cli/common_options.py +37 -0
- hotglue_singer_sdk/configuration/__init__.py +1 -0
- hotglue_singer_sdk/configuration/_dict_config.py +101 -0
- hotglue_singer_sdk/exceptions.py +52 -0
- hotglue_singer_sdk/helpers/__init__.py +1 -0
- hotglue_singer_sdk/helpers/_catalog.py +122 -0
- hotglue_singer_sdk/helpers/_classproperty.py +18 -0
- hotglue_singer_sdk/helpers/_compat.py +15 -0
- hotglue_singer_sdk/helpers/_flattening.py +374 -0
- hotglue_singer_sdk/helpers/_schema.py +100 -0
- hotglue_singer_sdk/helpers/_secrets.py +41 -0
- hotglue_singer_sdk/helpers/_simpleeval.py +678 -0
- hotglue_singer_sdk/helpers/_singer.py +280 -0
- hotglue_singer_sdk/helpers/_state.py +282 -0
- hotglue_singer_sdk/helpers/_typing.py +231 -0
- hotglue_singer_sdk/helpers/_util.py +27 -0
- hotglue_singer_sdk/helpers/capabilities.py +240 -0
- hotglue_singer_sdk/helpers/jsonpath.py +39 -0
- hotglue_singer_sdk/io_base.py +134 -0
- hotglue_singer_sdk/mapper.py +691 -0
- hotglue_singer_sdk/mapper_base.py +156 -0
- hotglue_singer_sdk/plugin_base.py +415 -0
- hotglue_singer_sdk/py.typed +0 -0
- hotglue_singer_sdk/sinks/__init__.py +14 -0
- hotglue_singer_sdk/sinks/batch.py +90 -0
- hotglue_singer_sdk/sinks/core.py +412 -0
- hotglue_singer_sdk/sinks/record.py +66 -0
- hotglue_singer_sdk/sinks/sql.py +299 -0
- hotglue_singer_sdk/streams/__init__.py +14 -0
- hotglue_singer_sdk/streams/core.py +1294 -0
- hotglue_singer_sdk/streams/graphql.py +74 -0
- hotglue_singer_sdk/streams/rest.py +611 -0
- hotglue_singer_sdk/streams/sql.py +1023 -0
- hotglue_singer_sdk/tap_base.py +580 -0
- hotglue_singer_sdk/target_base.py +554 -0
- hotglue_singer_sdk/target_sdk/__init__.py +0 -0
- hotglue_singer_sdk/target_sdk/auth.py +124 -0
- hotglue_singer_sdk/target_sdk/client.py +286 -0
- hotglue_singer_sdk/target_sdk/common.py +13 -0
- hotglue_singer_sdk/target_sdk/lambda.py +121 -0
- hotglue_singer_sdk/target_sdk/rest.py +108 -0
- hotglue_singer_sdk/target_sdk/sinks.py +16 -0
- hotglue_singer_sdk/target_sdk/target.py +570 -0
- hotglue_singer_sdk/target_sdk/target_base.py +627 -0
- hotglue_singer_sdk/testing.py +198 -0
- hotglue_singer_sdk/typing.py +603 -0
- hotglue_singer_sdk-1.0.2.dist-info/METADATA +53 -0
- hotglue_singer_sdk-1.0.2.dist-info/RECORD +53 -0
- hotglue_singer_sdk-1.0.2.dist-info/WHEEL +4 -0
- hotglue_singer_sdk-1.0.2.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,554 @@
|
|
|
1
|
+
"""Target abstract class."""
|
|
2
|
+
|
|
3
|
+
import abc
|
|
4
|
+
import copy
|
|
5
|
+
import json
|
|
6
|
+
import sys
|
|
7
|
+
import time
|
|
8
|
+
from io import FileIO
|
|
9
|
+
from pathlib import Path, PurePath
|
|
10
|
+
from typing import IO, Callable, Counter, Dict, List, Optional, Tuple, Type, Union
|
|
11
|
+
|
|
12
|
+
import click
|
|
13
|
+
from joblib import Parallel, delayed, parallel_backend
|
|
14
|
+
|
|
15
|
+
from hotglue_singer_sdk.cli import common_options
|
|
16
|
+
from hotglue_singer_sdk.exceptions import RecordsWitoutSchemaException
|
|
17
|
+
from hotglue_singer_sdk.helpers._classproperty import classproperty
|
|
18
|
+
from hotglue_singer_sdk.helpers._compat import final
|
|
19
|
+
from hotglue_singer_sdk.helpers.capabilities import CapabilitiesEnum, PluginCapabilities
|
|
20
|
+
from hotglue_singer_sdk.io_base import SingerMessageType, SingerReader
|
|
21
|
+
from hotglue_singer_sdk.mapper import PluginMapper
|
|
22
|
+
from hotglue_singer_sdk.plugin_base import PluginBase
|
|
23
|
+
from hotglue_singer_sdk.sinks import Sink
|
|
24
|
+
|
|
25
|
+
_MAX_PARALLELISM = 8
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class Target(PluginBase, SingerReader, metaclass=abc.ABCMeta):
|
|
29
|
+
"""Abstract base class for targets.
|
|
30
|
+
|
|
31
|
+
The `Target` class manages config information and is responsible for processing the
|
|
32
|
+
incoming Singer data stream and orchestrating any needed target `Sink` objects. As
|
|
33
|
+
messages are received from the tap, the `Target` class will automatically create
|
|
34
|
+
any needed target `Sink` objects and send records along to the appropriate `Sink`
|
|
35
|
+
object for that record.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
_MAX_RECORD_AGE_IN_MINUTES: float = 30.0
|
|
39
|
+
|
|
40
|
+
# Default class to use for creating new sink objects.
|
|
41
|
+
# Required if `Target.get_sink_class()` is not defined.
|
|
42
|
+
default_sink_class: Optional[Type[Sink]] = None
|
|
43
|
+
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
config: Optional[Union[dict, PurePath, str, List[Union[PurePath, str]]]] = None,
|
|
47
|
+
parse_env_config: bool = False,
|
|
48
|
+
validate_config: bool = True,
|
|
49
|
+
) -> None:
|
|
50
|
+
"""Initialize the target.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
config: Target configuration. Can be a dictionary, a single path to a
|
|
54
|
+
configuration file, or a list of paths to multiple configuration
|
|
55
|
+
files.
|
|
56
|
+
parse_env_config: Whether to look for configuration values in environment
|
|
57
|
+
variables.
|
|
58
|
+
validate_config: True to require validation of config settings.
|
|
59
|
+
"""
|
|
60
|
+
super().__init__(
|
|
61
|
+
config=config,
|
|
62
|
+
parse_env_config=parse_env_config,
|
|
63
|
+
validate_config=validate_config,
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
self._latest_state: Dict[str, dict] = {}
|
|
67
|
+
self._drained_state: Dict[str, dict] = {}
|
|
68
|
+
self._sinks_active: Dict[str, Sink] = {}
|
|
69
|
+
self._sinks_to_clear: List[Sink] = []
|
|
70
|
+
self._max_parallelism: Optional[int] = _MAX_PARALLELISM
|
|
71
|
+
|
|
72
|
+
# Approximated for max record age enforcement
|
|
73
|
+
self._last_full_drain_at: float = time.time()
|
|
74
|
+
|
|
75
|
+
# Initialize mapper
|
|
76
|
+
self.mapper: PluginMapper
|
|
77
|
+
self.mapper = PluginMapper(
|
|
78
|
+
plugin_config=dict(self.config),
|
|
79
|
+
logger=self.logger,
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
@classproperty
|
|
83
|
+
def capabilities(self) -> List[CapabilitiesEnum]:
|
|
84
|
+
"""Get target capabilities.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
A list of capabilities supported by this target.
|
|
88
|
+
"""
|
|
89
|
+
return [
|
|
90
|
+
PluginCapabilities.ABOUT,
|
|
91
|
+
PluginCapabilities.STREAM_MAPS,
|
|
92
|
+
PluginCapabilities.FLATTENING,
|
|
93
|
+
]
|
|
94
|
+
|
|
95
|
+
@property
|
|
96
|
+
def max_parallelism(self) -> int:
|
|
97
|
+
"""Get max parallel sinks.
|
|
98
|
+
|
|
99
|
+
The default is 8 if not overridden.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
Max number of sinks that can be drained in parallel.
|
|
103
|
+
"""
|
|
104
|
+
if self._max_parallelism is not None:
|
|
105
|
+
return self._max_parallelism
|
|
106
|
+
|
|
107
|
+
return _MAX_PARALLELISM
|
|
108
|
+
|
|
109
|
+
@max_parallelism.setter
|
|
110
|
+
def max_parallelism(self, new_value: int) -> None:
|
|
111
|
+
"""Override the default (max) parallelism.
|
|
112
|
+
|
|
113
|
+
The default is 8 if not overridden.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
new_value: The new max degree of parallelism for this target.
|
|
117
|
+
"""
|
|
118
|
+
self._max_parallelism = new_value
|
|
119
|
+
|
|
120
|
+
def get_sink(
|
|
121
|
+
self,
|
|
122
|
+
stream_name: str,
|
|
123
|
+
*,
|
|
124
|
+
record: Optional[dict] = None,
|
|
125
|
+
schema: Optional[dict] = None,
|
|
126
|
+
key_properties: Optional[List[str]] = None,
|
|
127
|
+
) -> Sink:
|
|
128
|
+
"""Return a sink for the given stream name.
|
|
129
|
+
|
|
130
|
+
A new sink will be created if `schema` is provided and if either `schema` or
|
|
131
|
+
`key_properties` has changed. If so, the old sink becomes archived and held
|
|
132
|
+
until the next drain_all() operation.
|
|
133
|
+
|
|
134
|
+
Developers only need to override this method if they want to provide a different
|
|
135
|
+
sink depending on the values within the `record` object. Otherwise, please see
|
|
136
|
+
`default_sink_class` property and/or the `get_sink_class()` method.
|
|
137
|
+
|
|
138
|
+
Raises :class:`hotglue_singer_sdk.exceptions.RecordsWitoutSchemaException` if sink does
|
|
139
|
+
not exist and schema is not sent.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
stream_name: Name of the stream.
|
|
143
|
+
record: Record being processed.
|
|
144
|
+
schema: Stream schema.
|
|
145
|
+
key_properties: Primary key of the stream.
|
|
146
|
+
|
|
147
|
+
Returns:
|
|
148
|
+
The sink used for this target.
|
|
149
|
+
"""
|
|
150
|
+
_ = record # Custom implementations may use record in sink selection.
|
|
151
|
+
if schema is None:
|
|
152
|
+
self._assert_sink_exists(stream_name)
|
|
153
|
+
return self._sinks_active[stream_name]
|
|
154
|
+
|
|
155
|
+
existing_sink = self._sinks_active.get(stream_name, None)
|
|
156
|
+
if not existing_sink:
|
|
157
|
+
return self.add_sink(stream_name, schema, key_properties)
|
|
158
|
+
|
|
159
|
+
if (
|
|
160
|
+
existing_sink.schema != schema
|
|
161
|
+
or existing_sink.key_properties != key_properties
|
|
162
|
+
):
|
|
163
|
+
self.logger.info(
|
|
164
|
+
f"Schema or key properties for '{stream_name}' stream have changed. "
|
|
165
|
+
f"Initializing a new '{stream_name}' sink..."
|
|
166
|
+
)
|
|
167
|
+
self._sinks_to_clear.append(self._sinks_active.pop(stream_name))
|
|
168
|
+
return self.add_sink(stream_name, schema, key_properties)
|
|
169
|
+
|
|
170
|
+
return existing_sink
|
|
171
|
+
|
|
172
|
+
def get_sink_class(self, stream_name: str) -> Type[Sink]:
|
|
173
|
+
"""Get sink for a stream.
|
|
174
|
+
|
|
175
|
+
Developers can override this method to return a custom Sink type depending
|
|
176
|
+
on the value of `stream_name`. Optional when `default_sink_class` is set.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
stream_name: Name of the stream.
|
|
180
|
+
|
|
181
|
+
Raises:
|
|
182
|
+
ValueError: If no :class:`hotglue_singer_sdk.sinks.Sink` class is defined.
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
The sink class to be used with the stream.
|
|
186
|
+
"""
|
|
187
|
+
if self.default_sink_class:
|
|
188
|
+
return self.default_sink_class
|
|
189
|
+
|
|
190
|
+
raise ValueError(
|
|
191
|
+
f"No sink class defined for '{stream_name}' "
|
|
192
|
+
"and no default sink class available."
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
def sink_exists(self, stream_name: str) -> bool:
|
|
196
|
+
"""Check sink for a stream.
|
|
197
|
+
|
|
198
|
+
This method is internal to the SDK and should not need to be overridden.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
stream_name: Name of the stream
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
True if a sink has been initialized.
|
|
205
|
+
"""
|
|
206
|
+
return stream_name in self._sinks_active
|
|
207
|
+
|
|
208
|
+
@final
|
|
209
|
+
def add_sink(
|
|
210
|
+
self, stream_name: str, schema: dict, key_properties: Optional[List[str]] = None
|
|
211
|
+
) -> Sink:
|
|
212
|
+
"""Create a sink and register it.
|
|
213
|
+
|
|
214
|
+
This method is internal to the SDK and should not need to be overridden.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
stream_name: Name of the stream.
|
|
218
|
+
schema: Schema of the stream.
|
|
219
|
+
key_properties: Primary key of the stream.
|
|
220
|
+
|
|
221
|
+
Returns:
|
|
222
|
+
A new sink for the stream.
|
|
223
|
+
"""
|
|
224
|
+
self.logger.info(f"Initializing '{self.name}' target sink...")
|
|
225
|
+
sink_class = self.get_sink_class(stream_name=stream_name)
|
|
226
|
+
result = sink_class(
|
|
227
|
+
target=self,
|
|
228
|
+
stream_name=stream_name,
|
|
229
|
+
schema=schema,
|
|
230
|
+
key_properties=key_properties,
|
|
231
|
+
)
|
|
232
|
+
self._sinks_active[stream_name] = result
|
|
233
|
+
return result
|
|
234
|
+
|
|
235
|
+
def _assert_sink_exists(self, stream_name: str) -> None:
|
|
236
|
+
"""Raise a RecordsWitoutSchemaException exception if stream doesn't exist.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
stream_name: TODO
|
|
240
|
+
|
|
241
|
+
Raises:
|
|
242
|
+
RecordsWitoutSchemaException: If sink does not exist and schema is not sent.
|
|
243
|
+
"""
|
|
244
|
+
if not self.sink_exists(stream_name):
|
|
245
|
+
raise RecordsWitoutSchemaException(
|
|
246
|
+
f"A record for stream '{stream_name}' was encountered before a "
|
|
247
|
+
"corresponding schema."
|
|
248
|
+
)
|
|
249
|
+
|
|
250
|
+
# Message handling
|
|
251
|
+
|
|
252
|
+
def _process_lines(self, file_input: IO[str]) -> Counter[str]:
|
|
253
|
+
"""Internal method to process jsonl lines from a Singer tap.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
file_input: Readable stream of messages, each on a separate line.
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
A counter object for the processed lines.
|
|
260
|
+
"""
|
|
261
|
+
self.logger.info(f"Target '{self.name}' is listening for input from tap.")
|
|
262
|
+
counter = super()._process_lines(file_input)
|
|
263
|
+
|
|
264
|
+
line_count = sum(counter.values())
|
|
265
|
+
|
|
266
|
+
self.logger.info(
|
|
267
|
+
f"Target '{self.name}' completed reading {line_count} lines of input "
|
|
268
|
+
f"({counter[SingerMessageType.RECORD]} records, "
|
|
269
|
+
f"{counter[SingerMessageType.STATE]} state messages)."
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
return counter
|
|
273
|
+
|
|
274
|
+
def _process_endofpipe(self) -> None:
|
|
275
|
+
"""Called after all input lines have been read."""
|
|
276
|
+
self.drain_all(is_endofpipe=True)
|
|
277
|
+
|
|
278
|
+
def _process_record_message(self, message_dict: dict) -> None:
|
|
279
|
+
"""Process a RECORD message.
|
|
280
|
+
|
|
281
|
+
Args:
|
|
282
|
+
message_dict: TODO
|
|
283
|
+
"""
|
|
284
|
+
self._assert_line_requires(message_dict, requires={"stream", "record"})
|
|
285
|
+
|
|
286
|
+
stream_name = message_dict["stream"]
|
|
287
|
+
for stream_map in self.mapper.stream_maps[stream_name]:
|
|
288
|
+
# new_schema = helpers._float_to_decimal(new_schema)
|
|
289
|
+
raw_record = copy.copy(message_dict["record"])
|
|
290
|
+
transformed_record = stream_map.transform(raw_record)
|
|
291
|
+
if transformed_record is None:
|
|
292
|
+
# Record was filtered out by the map transform
|
|
293
|
+
continue
|
|
294
|
+
|
|
295
|
+
sink = self.get_sink(stream_map.stream_alias, record=transformed_record)
|
|
296
|
+
context = sink._get_context(transformed_record)
|
|
297
|
+
if sink.include_sdc_metadata_properties:
|
|
298
|
+
sink._add_sdc_metadata_to_record(
|
|
299
|
+
transformed_record, message_dict, context
|
|
300
|
+
)
|
|
301
|
+
else:
|
|
302
|
+
sink._remove_sdc_metadata_from_record(transformed_record)
|
|
303
|
+
|
|
304
|
+
sink._validate_and_parse(transformed_record)
|
|
305
|
+
|
|
306
|
+
sink.tally_record_read()
|
|
307
|
+
transformed_record = sink.preprocess_record(transformed_record, context)
|
|
308
|
+
sink.process_record(transformed_record, context)
|
|
309
|
+
sink._after_process_record(context)
|
|
310
|
+
|
|
311
|
+
if sink.is_full:
|
|
312
|
+
self.logger.info(
|
|
313
|
+
f"Target sink for '{sink.stream_name}' is full. Draining..."
|
|
314
|
+
)
|
|
315
|
+
self.drain_one(sink)
|
|
316
|
+
|
|
317
|
+
def _process_schema_message(self, message_dict: dict) -> None:
|
|
318
|
+
"""Process a SCHEMA messages.
|
|
319
|
+
|
|
320
|
+
Args:
|
|
321
|
+
message_dict: The newly received schema message.
|
|
322
|
+
"""
|
|
323
|
+
self._assert_line_requires(message_dict, requires={"stream", "schema"})
|
|
324
|
+
|
|
325
|
+
stream_name = message_dict["stream"]
|
|
326
|
+
schema = message_dict["schema"]
|
|
327
|
+
key_properties = message_dict.get("key_properties", None)
|
|
328
|
+
do_registration = False
|
|
329
|
+
if stream_name not in self.mapper.stream_maps:
|
|
330
|
+
do_registration = True
|
|
331
|
+
elif self.mapper.stream_maps[stream_name][0].raw_schema != schema:
|
|
332
|
+
self.logger.info(
|
|
333
|
+
f"Schema has changed for stream '{stream_name}'. "
|
|
334
|
+
"Mapping definitions will be reset."
|
|
335
|
+
)
|
|
336
|
+
do_registration = True
|
|
337
|
+
elif (
|
|
338
|
+
self.mapper.stream_maps[stream_name][0].raw_key_properties != key_properties
|
|
339
|
+
):
|
|
340
|
+
self.logger.info(
|
|
341
|
+
f"Key properties have changed for stream '{stream_name}'. "
|
|
342
|
+
"Mapping definitions will be reset."
|
|
343
|
+
)
|
|
344
|
+
do_registration = True
|
|
345
|
+
|
|
346
|
+
if not do_registration:
|
|
347
|
+
self.logger.debug(
|
|
348
|
+
f"No changes detected in SCHEMA message for stream '{stream_name}'. "
|
|
349
|
+
"Ignoring."
|
|
350
|
+
)
|
|
351
|
+
return
|
|
352
|
+
|
|
353
|
+
self.mapper.register_raw_stream_schema(
|
|
354
|
+
stream_name,
|
|
355
|
+
schema,
|
|
356
|
+
key_properties,
|
|
357
|
+
)
|
|
358
|
+
for stream_map in self.mapper.stream_maps[stream_name]:
|
|
359
|
+
# new_schema = helpers._float_to_decimal(new_schema)
|
|
360
|
+
_ = self.get_sink(
|
|
361
|
+
stream_map.stream_alias,
|
|
362
|
+
schema=stream_map.transformed_schema,
|
|
363
|
+
key_properties=stream_map.transformed_key_properties,
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
@property
|
|
367
|
+
def _max_record_age_in_minutes(self) -> float:
|
|
368
|
+
return (time.time() - self._last_full_drain_at) / 60
|
|
369
|
+
|
|
370
|
+
def _reset_max_record_age(self) -> None:
|
|
371
|
+
self._last_full_drain_at = time.time()
|
|
372
|
+
|
|
373
|
+
def _process_state_message(self, message_dict: dict) -> None:
|
|
374
|
+
"""Process a state message. drain sinks if needed.
|
|
375
|
+
|
|
376
|
+
If state is unchanged, no actions will be taken.
|
|
377
|
+
|
|
378
|
+
Args:
|
|
379
|
+
message_dict: TODO
|
|
380
|
+
"""
|
|
381
|
+
self._assert_line_requires(message_dict, requires={"value"})
|
|
382
|
+
state = message_dict["value"]
|
|
383
|
+
if self._latest_state == state:
|
|
384
|
+
return
|
|
385
|
+
self._latest_state = state
|
|
386
|
+
if self._max_record_age_in_minutes > self._MAX_RECORD_AGE_IN_MINUTES:
|
|
387
|
+
self.logger.info(
|
|
388
|
+
"One or more records have exceeded the max age of "
|
|
389
|
+
f"{self._MAX_RECORD_AGE_IN_MINUTES} minutes. Draining all sinks."
|
|
390
|
+
)
|
|
391
|
+
self.drain_all()
|
|
392
|
+
|
|
393
|
+
def _process_activate_version_message(self, message_dict: dict) -> None:
|
|
394
|
+
"""Handle the optional ACTIVATE_VERSION message extension.
|
|
395
|
+
|
|
396
|
+
Args:
|
|
397
|
+
message_dict: TODO
|
|
398
|
+
"""
|
|
399
|
+
stream_name = message_dict["stream"]
|
|
400
|
+
sink = self.get_sink(stream_name)
|
|
401
|
+
sink.activate_version(message_dict["version"])
|
|
402
|
+
|
|
403
|
+
# Sink drain methods
|
|
404
|
+
|
|
405
|
+
@final
|
|
406
|
+
def drain_all(self, is_endofpipe: bool = False) -> None:
|
|
407
|
+
"""Drains all sinks, starting with those cleared due to changed schema.
|
|
408
|
+
|
|
409
|
+
This method is internal to the SDK and should not need to be overridden.
|
|
410
|
+
|
|
411
|
+
Args:
|
|
412
|
+
is_endofpipe: This is passed by the
|
|
413
|
+
:meth:`~hotglue_singer_sdk.Sink._process_endofpipe()` which
|
|
414
|
+
is called after the target instance has finished
|
|
415
|
+
listening to the stdin
|
|
416
|
+
"""
|
|
417
|
+
state = copy.deepcopy(self._latest_state)
|
|
418
|
+
self._drain_all(self._sinks_to_clear, 1)
|
|
419
|
+
if is_endofpipe:
|
|
420
|
+
for sink in self._sinks_to_clear:
|
|
421
|
+
sink.clean_up()
|
|
422
|
+
self._sinks_to_clear = []
|
|
423
|
+
self._drain_all(list(self._sinks_active.values()), self.max_parallelism)
|
|
424
|
+
if is_endofpipe:
|
|
425
|
+
for sink in self._sinks_active.values():
|
|
426
|
+
sink.clean_up()
|
|
427
|
+
self._write_state_message(state)
|
|
428
|
+
self._reset_max_record_age()
|
|
429
|
+
|
|
430
|
+
@final
|
|
431
|
+
def drain_one(self, sink: Sink) -> None:
|
|
432
|
+
"""Drain a specific sink.
|
|
433
|
+
|
|
434
|
+
This method is internal to the SDK and should not need to be overridden.
|
|
435
|
+
|
|
436
|
+
Args:
|
|
437
|
+
sink: Sink to be drained.
|
|
438
|
+
"""
|
|
439
|
+
if sink.current_size == 0:
|
|
440
|
+
return
|
|
441
|
+
|
|
442
|
+
draining_status = sink.start_drain()
|
|
443
|
+
sink.process_batch(draining_status)
|
|
444
|
+
sink.mark_drained()
|
|
445
|
+
|
|
446
|
+
def _drain_all(self, sink_list: List[Sink], parallelism: int) -> None:
|
|
447
|
+
if parallelism == 1:
|
|
448
|
+
for sink in sink_list:
|
|
449
|
+
self.drain_one(sink)
|
|
450
|
+
return
|
|
451
|
+
|
|
452
|
+
def _drain_sink(sink: Sink) -> None:
|
|
453
|
+
self.drain_one(sink)
|
|
454
|
+
|
|
455
|
+
with parallel_backend("threading", n_jobs=parallelism):
|
|
456
|
+
Parallel()(delayed(_drain_sink)(sink=sink) for sink in sink_list)
|
|
457
|
+
|
|
458
|
+
def _write_state_message(self, state: dict) -> None:
|
|
459
|
+
"""Emit the stream's latest state.
|
|
460
|
+
|
|
461
|
+
Args:
|
|
462
|
+
state: TODO
|
|
463
|
+
"""
|
|
464
|
+
state_json = json.dumps(state)
|
|
465
|
+
self.logger.info(f"Emitting completed target state {state_json}")
|
|
466
|
+
sys.stdout.write(f"{state_json}\n")
|
|
467
|
+
sys.stdout.flush()
|
|
468
|
+
|
|
469
|
+
# CLI handler
|
|
470
|
+
|
|
471
|
+
@classproperty
|
|
472
|
+
def cli(cls) -> Callable:
|
|
473
|
+
"""Execute standard CLI handler for taps.
|
|
474
|
+
|
|
475
|
+
Returns:
|
|
476
|
+
A callable CLI object.
|
|
477
|
+
"""
|
|
478
|
+
|
|
479
|
+
@common_options.PLUGIN_VERSION
|
|
480
|
+
@common_options.PLUGIN_ABOUT
|
|
481
|
+
@common_options.PLUGIN_ABOUT_FORMAT
|
|
482
|
+
@common_options.PLUGIN_CONFIG
|
|
483
|
+
@common_options.PLUGIN_FILE_INPUT
|
|
484
|
+
@click.command(
|
|
485
|
+
help="Execute the Singer target.",
|
|
486
|
+
context_settings={"help_option_names": ["--help"]},
|
|
487
|
+
)
|
|
488
|
+
def cli(
|
|
489
|
+
version: bool = False,
|
|
490
|
+
about: bool = False,
|
|
491
|
+
config: Tuple[str, ...] = (),
|
|
492
|
+
format: str = None,
|
|
493
|
+
file_input: FileIO = None,
|
|
494
|
+
) -> None:
|
|
495
|
+
"""Handle command line execution.
|
|
496
|
+
|
|
497
|
+
Args:
|
|
498
|
+
version: Display the package version.
|
|
499
|
+
about: Display package metadata and settings.
|
|
500
|
+
format: Specify output style for `--about`.
|
|
501
|
+
config: Configuration file location or 'ENV' to use environment
|
|
502
|
+
variables. Accepts multiple inputs as a tuple.
|
|
503
|
+
file_input: Specify a path to an input file to read messages from.
|
|
504
|
+
Defaults to standard in if unspecified.
|
|
505
|
+
|
|
506
|
+
Raises:
|
|
507
|
+
FileNotFoundError: If the config file does not exist.
|
|
508
|
+
"""
|
|
509
|
+
if version:
|
|
510
|
+
cls.print_version()
|
|
511
|
+
return
|
|
512
|
+
|
|
513
|
+
if not about:
|
|
514
|
+
cls.print_version(print_fn=cls.logger.info)
|
|
515
|
+
else:
|
|
516
|
+
cls.print_about(format=format)
|
|
517
|
+
return
|
|
518
|
+
|
|
519
|
+
validate_config: bool = True
|
|
520
|
+
|
|
521
|
+
cls.print_version(print_fn=cls.logger.info)
|
|
522
|
+
|
|
523
|
+
parse_env_config = False
|
|
524
|
+
config_files: List[PurePath] = []
|
|
525
|
+
for config_path in config:
|
|
526
|
+
if config_path == "ENV":
|
|
527
|
+
# Allow parse from env vars:
|
|
528
|
+
parse_env_config = True
|
|
529
|
+
continue
|
|
530
|
+
|
|
531
|
+
# Validate config file paths before adding to list
|
|
532
|
+
if not Path(config_path).is_file():
|
|
533
|
+
raise FileNotFoundError(
|
|
534
|
+
f"Could not locate config file at '{config_path}'."
|
|
535
|
+
"Please check that the file exists."
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
config_files.append(Path(config_path))
|
|
539
|
+
|
|
540
|
+
target = cls( # type: ignore # Ignore 'type not callable'
|
|
541
|
+
config=config_files or None,
|
|
542
|
+
parse_env_config=parse_env_config,
|
|
543
|
+
validate_config=validate_config,
|
|
544
|
+
)
|
|
545
|
+
|
|
546
|
+
target.listen(file_input)
|
|
547
|
+
|
|
548
|
+
return cli
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
class SQLTarget(Target):
|
|
552
|
+
"""Target implementation for SQL destinations."""
|
|
553
|
+
|
|
554
|
+
pass
|
|
File without changes
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from abc import abstractmethod
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from typing import Any, Dict, Optional
|
|
5
|
+
|
|
6
|
+
import logging
|
|
7
|
+
import requests
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Authenticator:
|
|
11
|
+
def __init__(self, target, state: Dict[str, Any] = dict()):
|
|
12
|
+
self.target_name: str = target.name
|
|
13
|
+
self._config: Dict[str, Any] = target._config
|
|
14
|
+
self._auth_headers: Dict[str, Any] = {}
|
|
15
|
+
self._auth_params: Dict[str, Any] = {}
|
|
16
|
+
self.logger: logging.Logger = target.logger
|
|
17
|
+
self._config_file_path = target._config_file_path
|
|
18
|
+
self._target = target
|
|
19
|
+
self.state = state
|
|
20
|
+
|
|
21
|
+
@property
|
|
22
|
+
@abstractmethod
|
|
23
|
+
def auth_headers(self) -> dict:
|
|
24
|
+
raise NotImplementedError()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class ApiAuthenticator(Authenticator):
|
|
28
|
+
def __init__(
|
|
29
|
+
self,
|
|
30
|
+
target,
|
|
31
|
+
state: Dict[str, Any] = {},
|
|
32
|
+
header_name = "x-api-key",
|
|
33
|
+
header_value_prefix = "",
|
|
34
|
+
config_key = "api_key",
|
|
35
|
+
) -> None:
|
|
36
|
+
"""Init authenticator.
|
|
37
|
+
"""
|
|
38
|
+
super().__init__(target, state)
|
|
39
|
+
self._header_name = header_name
|
|
40
|
+
self._header_value_prefix = header_value_prefix
|
|
41
|
+
self._config_key = config_key
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def auth_headers(self) -> dict:
|
|
45
|
+
result = {}
|
|
46
|
+
|
|
47
|
+
result[self._header_name] = f"{self._header_value_prefix}{self._config[self._config_key]}"
|
|
48
|
+
|
|
49
|
+
return result
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class OAuthAuthenticator(Authenticator):
|
|
53
|
+
"""API Authenticator for OAuth 2.0 flows."""
|
|
54
|
+
|
|
55
|
+
def __init__(
|
|
56
|
+
self,
|
|
57
|
+
target,
|
|
58
|
+
state = {},
|
|
59
|
+
auth_endpoint: Optional[str] = None,
|
|
60
|
+
) -> None:
|
|
61
|
+
"""Init authenticator.
|
|
62
|
+
"""
|
|
63
|
+
super().__init__(target, state)
|
|
64
|
+
self._auth_endpoint = auth_endpoint
|
|
65
|
+
|
|
66
|
+
@property
|
|
67
|
+
def auth_headers(self) -> dict:
|
|
68
|
+
if not self.is_token_valid():
|
|
69
|
+
self.update_access_token()
|
|
70
|
+
result = {}
|
|
71
|
+
result["Authorization"] = f"Bearer {self._config.get('access_token')}"
|
|
72
|
+
return result
|
|
73
|
+
|
|
74
|
+
@property
|
|
75
|
+
def oauth_request_body(self) -> dict:
|
|
76
|
+
"""Define the OAuth request body."""
|
|
77
|
+
return {
|
|
78
|
+
"refresh_token": self._config["refresh_token"],
|
|
79
|
+
"grant_type": "refresh_token",
|
|
80
|
+
"client_id": self._config["client_id"],
|
|
81
|
+
"client_secret": self._config["client_secret"],
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
def is_token_valid(self) -> bool:
|
|
85
|
+
access_token = self._config.get("access_token")
|
|
86
|
+
now = round(datetime.utcnow().timestamp())
|
|
87
|
+
expires_in = self._config.get("expires_in")
|
|
88
|
+
if expires_in is not None:
|
|
89
|
+
expires_in = int(expires_in)
|
|
90
|
+
if not access_token:
|
|
91
|
+
return False
|
|
92
|
+
|
|
93
|
+
if not expires_in:
|
|
94
|
+
return False
|
|
95
|
+
|
|
96
|
+
return not ((expires_in - now) < 120)
|
|
97
|
+
|
|
98
|
+
def update_access_token(self) -> None:
|
|
99
|
+
headers = {"Content-Type": "application/x-www-form-urlencoded"}
|
|
100
|
+
self.logger.info(f"Oauth request - endpoint: {self._auth_endpoint}, body: {self.oauth_request_body}")
|
|
101
|
+
token_response = requests.post(
|
|
102
|
+
self._auth_endpoint, data=self.oauth_request_body, headers=headers
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
try:
|
|
106
|
+
token_response.raise_for_status()
|
|
107
|
+
self.logger.info("OAuth authorization attempt was successful.")
|
|
108
|
+
except Exception as ex:
|
|
109
|
+
self.state.update({"auth_error_response": token_response.json()})
|
|
110
|
+
raise RuntimeError(
|
|
111
|
+
f"Failed OAuth login, response was '{token_response.json()}'. {ex}"
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
token_json = token_response.json()
|
|
115
|
+
self.access_token = token_json["access_token"]
|
|
116
|
+
|
|
117
|
+
self._config["access_token"] = token_json["access_token"]
|
|
118
|
+
self._config["refresh_token"] = token_json["refresh_token"]
|
|
119
|
+
now = round(datetime.utcnow().timestamp())
|
|
120
|
+
self._config["expires_in"] = int(token_json["expires_in"]) + now
|
|
121
|
+
|
|
122
|
+
with open(self._config_file_path, "w") as outfile:
|
|
123
|
+
json.dump(self._config, outfile, indent=4)
|
|
124
|
+
|