hotglue-singer-sdk 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hotglue_singer_sdk/__init__.py +34 -0
- hotglue_singer_sdk/authenticators.py +554 -0
- hotglue_singer_sdk/cli/__init__.py +1 -0
- hotglue_singer_sdk/cli/common_options.py +37 -0
- hotglue_singer_sdk/configuration/__init__.py +1 -0
- hotglue_singer_sdk/configuration/_dict_config.py +101 -0
- hotglue_singer_sdk/exceptions.py +52 -0
- hotglue_singer_sdk/helpers/__init__.py +1 -0
- hotglue_singer_sdk/helpers/_catalog.py +122 -0
- hotglue_singer_sdk/helpers/_classproperty.py +18 -0
- hotglue_singer_sdk/helpers/_compat.py +15 -0
- hotglue_singer_sdk/helpers/_flattening.py +374 -0
- hotglue_singer_sdk/helpers/_schema.py +100 -0
- hotglue_singer_sdk/helpers/_secrets.py +41 -0
- hotglue_singer_sdk/helpers/_simpleeval.py +678 -0
- hotglue_singer_sdk/helpers/_singer.py +280 -0
- hotglue_singer_sdk/helpers/_state.py +282 -0
- hotglue_singer_sdk/helpers/_typing.py +231 -0
- hotglue_singer_sdk/helpers/_util.py +27 -0
- hotglue_singer_sdk/helpers/capabilities.py +240 -0
- hotglue_singer_sdk/helpers/jsonpath.py +39 -0
- hotglue_singer_sdk/io_base.py +134 -0
- hotglue_singer_sdk/mapper.py +691 -0
- hotglue_singer_sdk/mapper_base.py +156 -0
- hotglue_singer_sdk/plugin_base.py +415 -0
- hotglue_singer_sdk/py.typed +0 -0
- hotglue_singer_sdk/sinks/__init__.py +14 -0
- hotglue_singer_sdk/sinks/batch.py +90 -0
- hotglue_singer_sdk/sinks/core.py +412 -0
- hotglue_singer_sdk/sinks/record.py +66 -0
- hotglue_singer_sdk/sinks/sql.py +299 -0
- hotglue_singer_sdk/streams/__init__.py +14 -0
- hotglue_singer_sdk/streams/core.py +1294 -0
- hotglue_singer_sdk/streams/graphql.py +74 -0
- hotglue_singer_sdk/streams/rest.py +611 -0
- hotglue_singer_sdk/streams/sql.py +1023 -0
- hotglue_singer_sdk/tap_base.py +580 -0
- hotglue_singer_sdk/target_base.py +554 -0
- hotglue_singer_sdk/target_sdk/__init__.py +0 -0
- hotglue_singer_sdk/target_sdk/auth.py +124 -0
- hotglue_singer_sdk/target_sdk/client.py +286 -0
- hotglue_singer_sdk/target_sdk/common.py +13 -0
- hotglue_singer_sdk/target_sdk/lambda.py +121 -0
- hotglue_singer_sdk/target_sdk/rest.py +108 -0
- hotglue_singer_sdk/target_sdk/sinks.py +16 -0
- hotglue_singer_sdk/target_sdk/target.py +570 -0
- hotglue_singer_sdk/target_sdk/target_base.py +627 -0
- hotglue_singer_sdk/testing.py +198 -0
- hotglue_singer_sdk/typing.py +603 -0
- hotglue_singer_sdk-1.0.2.dist-info/METADATA +53 -0
- hotglue_singer_sdk-1.0.2.dist-info/RECORD +53 -0
- hotglue_singer_sdk-1.0.2.dist-info/WHEEL +4 -0
- hotglue_singer_sdk-1.0.2.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,691 @@
|
|
|
1
|
+
"""Stream Mapper classes.
|
|
2
|
+
|
|
3
|
+
Mappers allow inline stream transformation, filtering, aliasing, and duplication.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import abc
|
|
9
|
+
import copy
|
|
10
|
+
import hashlib
|
|
11
|
+
import logging
|
|
12
|
+
from typing import Any, Callable
|
|
13
|
+
|
|
14
|
+
from hotglue_singer_sdk.exceptions import MapExpressionError, StreamMapConfigError
|
|
15
|
+
from hotglue_singer_sdk.helpers import _simpleeval as simpleeval
|
|
16
|
+
from hotglue_singer_sdk.helpers._catalog import get_selected_schema
|
|
17
|
+
from hotglue_singer_sdk.helpers._flattening import (
|
|
18
|
+
FlatteningOptions,
|
|
19
|
+
flatten_record,
|
|
20
|
+
flatten_schema,
|
|
21
|
+
get_flattening_options,
|
|
22
|
+
)
|
|
23
|
+
from hotglue_singer_sdk.helpers._singer import Catalog
|
|
24
|
+
from hotglue_singer_sdk.typing import (
|
|
25
|
+
CustomType,
|
|
26
|
+
IntegerType,
|
|
27
|
+
JSONTypeHelper,
|
|
28
|
+
NumberType,
|
|
29
|
+
PropertiesList,
|
|
30
|
+
Property,
|
|
31
|
+
StringType,
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
MAPPER_ELSE_OPTION = "__else__"
|
|
35
|
+
MAPPER_FILTER_OPTION = "__filter__"
|
|
36
|
+
MAPPER_SOURCE_OPTION = "__source__"
|
|
37
|
+
MAPPER_ALIAS_OPTION = "__alias__"
|
|
38
|
+
MAPPER_KEY_PROPERTIES_OPTION = "__key_properties__"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def md5(input: str) -> str:
|
|
42
|
+
"""Digest a string using MD5. This is a function for inline calculations.
|
|
43
|
+
|
|
44
|
+
Args:
|
|
45
|
+
input: String to digest.
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
A string digested into MD5.
|
|
49
|
+
"""
|
|
50
|
+
return hashlib.md5(input.encode("utf-8")).hexdigest()
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class StreamMap(metaclass=abc.ABCMeta):
|
|
54
|
+
"""Abstract base class for all map classes."""
|
|
55
|
+
|
|
56
|
+
def __init__(
|
|
57
|
+
self,
|
|
58
|
+
stream_alias: str,
|
|
59
|
+
raw_schema: dict,
|
|
60
|
+
key_properties: list[str] | None,
|
|
61
|
+
flattening_options: FlatteningOptions | None,
|
|
62
|
+
) -> None:
|
|
63
|
+
"""Initialize mapper.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
stream_alias: Stream name.
|
|
67
|
+
raw_schema: Original stream JSON schema.
|
|
68
|
+
key_properties: Primary key of the source stream.
|
|
69
|
+
flattening_options: Flattening options, or None to skip flattening.
|
|
70
|
+
"""
|
|
71
|
+
self.stream_alias = stream_alias
|
|
72
|
+
self.raw_schema = raw_schema
|
|
73
|
+
self.raw_key_properties = key_properties
|
|
74
|
+
self.transformed_schema = raw_schema
|
|
75
|
+
self.transformed_key_properties = key_properties
|
|
76
|
+
self.flattening_options = flattening_options
|
|
77
|
+
if self.flattening_enabled:
|
|
78
|
+
self.transformed_schema = self.flatten_schema(self.transformed_schema)
|
|
79
|
+
|
|
80
|
+
@property
|
|
81
|
+
def flattening_enabled(self) -> bool:
|
|
82
|
+
"""True if flattening is enabled for this stream map.
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
True if flattening is enabled, otherwise False.
|
|
86
|
+
"""
|
|
87
|
+
return (
|
|
88
|
+
self.flattening_options is not None
|
|
89
|
+
and self.flattening_options.flattening_enabled
|
|
90
|
+
and self.flattening_options.max_level > 0
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
def flatten_record(self, record: dict) -> dict:
|
|
94
|
+
"""If flattening is enabled, flatten a record and return the result.
|
|
95
|
+
|
|
96
|
+
If flattening is disabled, the original record will be returned.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
record: An individual record dictionary in a stream.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
A new dictionary representing the flattened record.
|
|
103
|
+
"""
|
|
104
|
+
if not self.flattening_options or not self.flattening_enabled:
|
|
105
|
+
return record
|
|
106
|
+
|
|
107
|
+
return flatten_record(
|
|
108
|
+
record,
|
|
109
|
+
flattened_schema=self.transformed_schema,
|
|
110
|
+
max_level=self.flattening_options.max_level,
|
|
111
|
+
separator=self.flattening_options.separator,
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
def flatten_schema(self, raw_schema: dict) -> dict:
|
|
115
|
+
"""Flatten the provided schema.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
raw_schema: The raw schema to flatten.
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
The flattened version of the schema.
|
|
122
|
+
"""
|
|
123
|
+
if not self.flattening_options or not self.flattening_enabled:
|
|
124
|
+
return raw_schema
|
|
125
|
+
|
|
126
|
+
return flatten_schema(
|
|
127
|
+
raw_schema,
|
|
128
|
+
separator=self.flattening_options.separator,
|
|
129
|
+
max_level=self.flattening_options.max_level,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
@abc.abstractmethod
|
|
133
|
+
def transform(self, record: dict) -> dict | None:
|
|
134
|
+
"""Transform a record and return the result.
|
|
135
|
+
|
|
136
|
+
Record flattening will also be performed, if enabled.
|
|
137
|
+
|
|
138
|
+
Subclasses should call the super().transform(record) after any other custom
|
|
139
|
+
transforms are performed.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
record: An individual record dictionary in a stream.
|
|
143
|
+
|
|
144
|
+
Returns:
|
|
145
|
+
A new dictionary representing a transformed record.
|
|
146
|
+
"""
|
|
147
|
+
return self.flatten_record(record)
|
|
148
|
+
|
|
149
|
+
@abc.abstractmethod
|
|
150
|
+
def get_filter_result(self, record: dict) -> bool:
|
|
151
|
+
"""Exclude records from a stream.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
record: An individual record dictionary in a stream.
|
|
155
|
+
|
|
156
|
+
Return:
|
|
157
|
+
True to include the record or False to exclude.
|
|
158
|
+
|
|
159
|
+
Raises:
|
|
160
|
+
NotImplementedError: If the derived class doesn't override this method.
|
|
161
|
+
"""
|
|
162
|
+
raise NotImplementedError
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
class DefaultStreamMap(StreamMap):
|
|
166
|
+
"""Abstract base class for default maps which do not require custom config."""
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
class RemoveRecordTransform(DefaultStreamMap):
|
|
170
|
+
"""Default mapper which simply excludes any records."""
|
|
171
|
+
|
|
172
|
+
def transform(self, record: dict) -> None:
|
|
173
|
+
"""Return None (always exclude).
|
|
174
|
+
|
|
175
|
+
Args:
|
|
176
|
+
record: An individual record dictionary in a stream.
|
|
177
|
+
|
|
178
|
+
Returns:
|
|
179
|
+
None
|
|
180
|
+
"""
|
|
181
|
+
_ = record # Drop the record
|
|
182
|
+
return None
|
|
183
|
+
|
|
184
|
+
def get_filter_result(self, record: dict) -> bool:
|
|
185
|
+
"""Exclude all records.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
record: An individual record dictionary in a stream.
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
Always `False`.
|
|
192
|
+
"""
|
|
193
|
+
return False
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
class SameRecordTransform(DefaultStreamMap):
|
|
197
|
+
"""Default mapper which simply returns the original records."""
|
|
198
|
+
|
|
199
|
+
def transform(self, record: dict) -> dict | None:
|
|
200
|
+
"""Return original record unchanged.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
record: An individual record dictionary in a stream.
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
The original record unchanged.
|
|
207
|
+
"""
|
|
208
|
+
return super().transform(record)
|
|
209
|
+
|
|
210
|
+
def get_filter_result(self, record: dict) -> bool:
|
|
211
|
+
"""Return True (always include).
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
record: An individual record dictionary in a stream.
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
Always `True`.
|
|
218
|
+
"""
|
|
219
|
+
return True
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
class CustomStreamMap(StreamMap):
|
|
223
|
+
"""Defines transformation logic for a singer stream map."""
|
|
224
|
+
|
|
225
|
+
def __init__(
|
|
226
|
+
self,
|
|
227
|
+
stream_alias: str,
|
|
228
|
+
map_config: dict,
|
|
229
|
+
raw_schema: dict,
|
|
230
|
+
key_properties: list[str] | None,
|
|
231
|
+
map_transform: dict,
|
|
232
|
+
flattening_options: FlatteningOptions | None,
|
|
233
|
+
) -> None:
|
|
234
|
+
"""Initialize mapper.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
stream_alias: Stream name.
|
|
238
|
+
map_config: Stream map configuration.
|
|
239
|
+
raw_schema: Original stream's JSON schema.
|
|
240
|
+
key_properties: Primary key of the source stream.
|
|
241
|
+
map_transform: Dictionary of transformations to apply to the stream.
|
|
242
|
+
flattening_options: Flattening options, or None to skip flattening.
|
|
243
|
+
"""
|
|
244
|
+
super().__init__(
|
|
245
|
+
stream_alias=stream_alias,
|
|
246
|
+
raw_schema=raw_schema,
|
|
247
|
+
key_properties=key_properties,
|
|
248
|
+
flattening_options=flattening_options,
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
self.map_config = map_config
|
|
252
|
+
self._transform_fn: Callable[[dict], dict | None]
|
|
253
|
+
self._filter_fn: Callable[[dict], bool]
|
|
254
|
+
(
|
|
255
|
+
self._filter_fn,
|
|
256
|
+
self._transform_fn,
|
|
257
|
+
self.transformed_schema,
|
|
258
|
+
) = self._init_functions_and_schema(stream_map=map_transform)
|
|
259
|
+
|
|
260
|
+
def transform(self, record: dict) -> dict | None:
|
|
261
|
+
"""Return a transformed record.
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
record: An individual record dictionary in a stream.
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
The transformed record.
|
|
268
|
+
"""
|
|
269
|
+
transformed_record = self._transform_fn(record)
|
|
270
|
+
if not transformed_record:
|
|
271
|
+
return None
|
|
272
|
+
|
|
273
|
+
return super().transform(transformed_record)
|
|
274
|
+
|
|
275
|
+
def get_filter_result(self, record: dict) -> bool:
|
|
276
|
+
"""Return True to include or False to exclude.
|
|
277
|
+
|
|
278
|
+
Args:
|
|
279
|
+
record: An individual record dictionary in a stream.
|
|
280
|
+
|
|
281
|
+
Returns:
|
|
282
|
+
Boolean flag for record selection.
|
|
283
|
+
"""
|
|
284
|
+
return self._filter_fn(record)
|
|
285
|
+
|
|
286
|
+
@property
|
|
287
|
+
def functions(self) -> dict[str, Callable]:
|
|
288
|
+
"""Get availabale transformation functions.
|
|
289
|
+
|
|
290
|
+
Returns:
|
|
291
|
+
Functions which should be available for expression evaluation.
|
|
292
|
+
"""
|
|
293
|
+
funcs: dict[str, Any] = simpleeval.DEFAULT_FUNCTIONS.copy()
|
|
294
|
+
funcs["md5"] = md5
|
|
295
|
+
return funcs
|
|
296
|
+
|
|
297
|
+
def _eval(
|
|
298
|
+
self, expr: str, record: dict, property_name: str | None
|
|
299
|
+
) -> str | int | float:
|
|
300
|
+
"""Solve an expression.
|
|
301
|
+
|
|
302
|
+
Args:
|
|
303
|
+
expr: String expression to evaluate.
|
|
304
|
+
record: Individual stream record.
|
|
305
|
+
property_name: Name of property to transform in the record.
|
|
306
|
+
|
|
307
|
+
Returns:
|
|
308
|
+
Evaluated expression.
|
|
309
|
+
|
|
310
|
+
Raises:
|
|
311
|
+
MapExpressionError: If the mapping expression failed to evaluate.
|
|
312
|
+
"""
|
|
313
|
+
names = record.copy() # Start with names from record properties
|
|
314
|
+
names["_"] = record # Add a shorthand alias in case of reserved words in names
|
|
315
|
+
names["record"] = record # ...and a longhand alias
|
|
316
|
+
names["config"] = self.map_config # Allow map config access within transform
|
|
317
|
+
if property_name and property_name in record:
|
|
318
|
+
# Allow access to original property value if applicable
|
|
319
|
+
names["self"] = record[property_name]
|
|
320
|
+
try:
|
|
321
|
+
result: str | int | float = simpleeval.simple_eval(
|
|
322
|
+
expr, functions=self.functions, names=names
|
|
323
|
+
)
|
|
324
|
+
logging.debug(f"Eval result: {expr} = {result}")
|
|
325
|
+
except Exception as ex:
|
|
326
|
+
raise MapExpressionError(
|
|
327
|
+
f"Failed to evaluate simpleeval expressions {expr}."
|
|
328
|
+
) from ex
|
|
329
|
+
return result
|
|
330
|
+
|
|
331
|
+
def _eval_type(
|
|
332
|
+
self, expr: str, default: JSONTypeHelper | None = None
|
|
333
|
+
) -> JSONTypeHelper:
|
|
334
|
+
"""Evaluate an expression's type.
|
|
335
|
+
|
|
336
|
+
Args:
|
|
337
|
+
expr: String expression to evaluate.
|
|
338
|
+
default: TODO.
|
|
339
|
+
|
|
340
|
+
Returns:
|
|
341
|
+
TODO
|
|
342
|
+
"""
|
|
343
|
+
assert expr is not None, "Expression should be str, not None"
|
|
344
|
+
|
|
345
|
+
default = default or StringType()
|
|
346
|
+
|
|
347
|
+
if expr.startswith("float("):
|
|
348
|
+
return NumberType()
|
|
349
|
+
|
|
350
|
+
if expr.startswith("int("):
|
|
351
|
+
return IntegerType()
|
|
352
|
+
|
|
353
|
+
if expr.startswith("str("):
|
|
354
|
+
return StringType()
|
|
355
|
+
|
|
356
|
+
if expr[0] == "'" and expr[-1] == "'":
|
|
357
|
+
return StringType()
|
|
358
|
+
|
|
359
|
+
return default
|
|
360
|
+
|
|
361
|
+
def _init_functions_and_schema(
|
|
362
|
+
self, stream_map: dict
|
|
363
|
+
) -> tuple[Callable[[dict], bool], Callable[[dict], dict | None], dict]:
|
|
364
|
+
"""Return a tuple: filter_fn, transform_fn, transformed_schema.
|
|
365
|
+
|
|
366
|
+
Args:
|
|
367
|
+
stream_map: TODO
|
|
368
|
+
|
|
369
|
+
Returns:
|
|
370
|
+
TODO.
|
|
371
|
+
|
|
372
|
+
Raises:
|
|
373
|
+
NotImplementedError: TODO
|
|
374
|
+
StreamMapConfigError: TODO
|
|
375
|
+
"""
|
|
376
|
+
stream_map = copy.copy(stream_map)
|
|
377
|
+
|
|
378
|
+
filter_rule: str | None = None
|
|
379
|
+
include_by_default = True
|
|
380
|
+
if stream_map and MAPPER_FILTER_OPTION in stream_map:
|
|
381
|
+
filter_rule = stream_map.pop(MAPPER_FILTER_OPTION)
|
|
382
|
+
logging.info(f"Found '{self.stream_alias}' filter rule: {filter_rule}")
|
|
383
|
+
|
|
384
|
+
if stream_map and MAPPER_KEY_PROPERTIES_OPTION in stream_map:
|
|
385
|
+
self.transformed_key_properties: list[str] = stream_map.pop(
|
|
386
|
+
MAPPER_KEY_PROPERTIES_OPTION
|
|
387
|
+
)
|
|
388
|
+
logging.info(
|
|
389
|
+
f"Found stream map override for '{self.stream_alias}' key properties: "
|
|
390
|
+
f"{str(self.transformed_key_properties)}"
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
if stream_map and MAPPER_ELSE_OPTION in stream_map:
|
|
394
|
+
if stream_map[MAPPER_ELSE_OPTION] is None:
|
|
395
|
+
logging.info(
|
|
396
|
+
f"Detected `{MAPPER_ELSE_OPTION}=None` rule. "
|
|
397
|
+
"Unmapped, non-key properties will be excluded from output."
|
|
398
|
+
)
|
|
399
|
+
include_by_default = False
|
|
400
|
+
else:
|
|
401
|
+
raise NotImplementedError(
|
|
402
|
+
f"Option '{MAPPER_ELSE_OPTION}={stream_map[MAPPER_ELSE_OPTION]}' "
|
|
403
|
+
"is not supported."
|
|
404
|
+
)
|
|
405
|
+
stream_map.pop(MAPPER_ELSE_OPTION)
|
|
406
|
+
|
|
407
|
+
# Transform the schema as needed
|
|
408
|
+
|
|
409
|
+
transformed_schema = copy.copy(self.raw_schema)
|
|
410
|
+
if not include_by_default:
|
|
411
|
+
# Start with only the defined (or transformed) key properties
|
|
412
|
+
transformed_schema = PropertiesList().to_dict()
|
|
413
|
+
|
|
414
|
+
if "properties" not in transformed_schema:
|
|
415
|
+
transformed_schema["properties"] = {}
|
|
416
|
+
|
|
417
|
+
for prop_key, prop_def in list(stream_map.items()):
|
|
418
|
+
if prop_def is None:
|
|
419
|
+
if prop_key in (self.transformed_key_properties or []):
|
|
420
|
+
raise StreamMapConfigError(
|
|
421
|
+
f"Removing key property '{prop_key}' is not permitted in "
|
|
422
|
+
f"'{self.stream_alias}' stream map config. To remove a key "
|
|
423
|
+
"property, use the `__key_properties__` operator "
|
|
424
|
+
"to specify either a new list of key property names or `null` "
|
|
425
|
+
"to replicate with no key properties in the stream."
|
|
426
|
+
)
|
|
427
|
+
transformed_schema["properties"].pop(prop_key, None)
|
|
428
|
+
elif isinstance(prop_def, str):
|
|
429
|
+
default_type: JSONTypeHelper = StringType() # Fallback to string
|
|
430
|
+
existing_schema: dict = transformed_schema["properties"].get(
|
|
431
|
+
prop_key, {}
|
|
432
|
+
)
|
|
433
|
+
if existing_schema:
|
|
434
|
+
# Set default type if property exists already in JSON Schema
|
|
435
|
+
default_type = CustomType(existing_schema)
|
|
436
|
+
|
|
437
|
+
transformed_schema["properties"].update(
|
|
438
|
+
Property(
|
|
439
|
+
prop_key, self._eval_type(prop_def, default=default_type)
|
|
440
|
+
).to_dict()
|
|
441
|
+
)
|
|
442
|
+
else:
|
|
443
|
+
raise StreamMapConfigError(
|
|
444
|
+
f"Unexpected type '{type(prop_def).__name__}' in stream map "
|
|
445
|
+
f"for '{self.stream_alias}:{prop_key}'."
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
for key_property in self.transformed_key_properties or []:
|
|
449
|
+
if key_property not in transformed_schema["properties"]:
|
|
450
|
+
raise StreamMapConfigError(
|
|
451
|
+
f"Invalid key properties for '{self.stream_alias}': "
|
|
452
|
+
f"[{','.join(self.transformed_key_properties)}]. "
|
|
453
|
+
f"Property '{key_property}' was not detected in schema."
|
|
454
|
+
)
|
|
455
|
+
|
|
456
|
+
if self.flattening_enabled:
|
|
457
|
+
transformed_schema = self.flatten_schema(transformed_schema)
|
|
458
|
+
|
|
459
|
+
# Declare function variables
|
|
460
|
+
|
|
461
|
+
def eval_filter(filter_rule: str) -> Callable[[dict], bool]:
|
|
462
|
+
def _inner(record: dict) -> bool:
|
|
463
|
+
filter_result = self._eval(
|
|
464
|
+
expr=filter_rule, record=record, property_name=None
|
|
465
|
+
)
|
|
466
|
+
logging.debug(
|
|
467
|
+
f"Filter result for '{filter_rule}' "
|
|
468
|
+
"in '{self.name}' stream: {filter_result}"
|
|
469
|
+
)
|
|
470
|
+
if not filter_result:
|
|
471
|
+
logging.debug("Excluding record due to filter.")
|
|
472
|
+
return False
|
|
473
|
+
|
|
474
|
+
return True
|
|
475
|
+
|
|
476
|
+
return _inner
|
|
477
|
+
|
|
478
|
+
def always_true(record: dict) -> bool:
|
|
479
|
+
_ = record
|
|
480
|
+
return True
|
|
481
|
+
|
|
482
|
+
if isinstance(filter_rule, str):
|
|
483
|
+
filter_fn = eval_filter(filter_rule)
|
|
484
|
+
elif filter_rule is None:
|
|
485
|
+
filter_fn = always_true
|
|
486
|
+
else:
|
|
487
|
+
raise StreamMapConfigError(
|
|
488
|
+
f"Unexpected filter rule type '{type(filter_rule).__name__}' in "
|
|
489
|
+
f"expression {str(filter_rule)}. Expected 'str' or 'None'."
|
|
490
|
+
)
|
|
491
|
+
|
|
492
|
+
def transform_fn(record: dict) -> dict | None:
|
|
493
|
+
nonlocal include_by_default, stream_map
|
|
494
|
+
|
|
495
|
+
if not self.get_filter_result(record):
|
|
496
|
+
return None
|
|
497
|
+
|
|
498
|
+
if include_by_default:
|
|
499
|
+
result = record.copy()
|
|
500
|
+
else:
|
|
501
|
+
# Start with only the defined (or transformed) key properties
|
|
502
|
+
result = {}
|
|
503
|
+
for key_property in self.transformed_key_properties or []:
|
|
504
|
+
if key_property in record:
|
|
505
|
+
result[key_property] = record[key_property]
|
|
506
|
+
|
|
507
|
+
for prop_key, prop_def in list(stream_map.items()):
|
|
508
|
+
if prop_def is None:
|
|
509
|
+
# Remove property from result
|
|
510
|
+
result.pop(prop_key, None)
|
|
511
|
+
continue
|
|
512
|
+
|
|
513
|
+
if isinstance(prop_def, str):
|
|
514
|
+
# Apply property transform
|
|
515
|
+
result[prop_key] = self._eval(
|
|
516
|
+
expr=prop_def, record=record, property_name=prop_key
|
|
517
|
+
)
|
|
518
|
+
continue
|
|
519
|
+
|
|
520
|
+
raise StreamMapConfigError(
|
|
521
|
+
f"Unexpected mapping type '{type(prop_def).__name__}' in "
|
|
522
|
+
f"map expression '{prop_def}'. Expected 'str' or 'None'."
|
|
523
|
+
)
|
|
524
|
+
|
|
525
|
+
return result
|
|
526
|
+
|
|
527
|
+
return filter_fn, transform_fn, transformed_schema
|
|
528
|
+
|
|
529
|
+
|
|
530
|
+
class PluginMapper:
|
|
531
|
+
"""Inline map tranformer."""
|
|
532
|
+
|
|
533
|
+
def __init__(
|
|
534
|
+
self,
|
|
535
|
+
plugin_config: dict[str, dict[str, str | dict]],
|
|
536
|
+
logger: logging.Logger,
|
|
537
|
+
) -> None:
|
|
538
|
+
"""Initialize mapper.
|
|
539
|
+
|
|
540
|
+
Args:
|
|
541
|
+
plugin_config: TODO
|
|
542
|
+
logger: TODO
|
|
543
|
+
|
|
544
|
+
Raises:
|
|
545
|
+
StreamMapConfigError: TODO
|
|
546
|
+
"""
|
|
547
|
+
self.stream_maps: dict[str, list[StreamMap]] = {}
|
|
548
|
+
self.map_config = plugin_config.get("stream_map_config", {})
|
|
549
|
+
self.flattening_options = get_flattening_options(plugin_config)
|
|
550
|
+
self.default_mapper_type: type[DefaultStreamMap] = SameRecordTransform
|
|
551
|
+
self.logger = logger
|
|
552
|
+
|
|
553
|
+
self.stream_maps_dict: dict[str, str | dict] = plugin_config.get(
|
|
554
|
+
"stream_maps", {}
|
|
555
|
+
)
|
|
556
|
+
if MAPPER_ELSE_OPTION in self.stream_maps_dict:
|
|
557
|
+
if self.stream_maps_dict[MAPPER_ELSE_OPTION] is None:
|
|
558
|
+
logging.info(
|
|
559
|
+
f"Found '{MAPPER_ELSE_OPTION}=None' default mapper. "
|
|
560
|
+
"Unmapped streams will be excluded from output."
|
|
561
|
+
)
|
|
562
|
+
self.default_mapper_type = RemoveRecordTransform
|
|
563
|
+
self.stream_maps_dict.pop(MAPPER_ELSE_OPTION)
|
|
564
|
+
else:
|
|
565
|
+
raise StreamMapConfigError(
|
|
566
|
+
f"Undefined transform for '{MAPPER_ELSE_OPTION}'' case: "
|
|
567
|
+
f"{self.stream_maps_dict[MAPPER_ELSE_OPTION]}"
|
|
568
|
+
)
|
|
569
|
+
else:
|
|
570
|
+
logging.debug(
|
|
571
|
+
f"Operator '{MAPPER_ELSE_OPTION}=None' was not found. "
|
|
572
|
+
"Unmapped streams will be included in output."
|
|
573
|
+
)
|
|
574
|
+
for stream_map_key, stream_def in self.stream_maps_dict.items():
|
|
575
|
+
if stream_map_key.startswith("__"):
|
|
576
|
+
raise StreamMapConfigError(
|
|
577
|
+
f"Option '{stream_map_key}:{stream_def}' is not expected."
|
|
578
|
+
)
|
|
579
|
+
|
|
580
|
+
def register_raw_streams_from_catalog(self, catalog: Catalog) -> None:
|
|
581
|
+
"""Register all streams as described in the catalog dict.
|
|
582
|
+
|
|
583
|
+
Args:
|
|
584
|
+
catalog: TODO
|
|
585
|
+
"""
|
|
586
|
+
for catalog_entry in catalog.streams:
|
|
587
|
+
self.register_raw_stream_schema(
|
|
588
|
+
catalog_entry.stream or catalog_entry.tap_stream_id,
|
|
589
|
+
get_selected_schema(
|
|
590
|
+
catalog_entry.stream or catalog_entry.tap_stream_id,
|
|
591
|
+
catalog_entry.schema.to_dict(),
|
|
592
|
+
catalog_entry.metadata.resolve_selection(),
|
|
593
|
+
self.logger,
|
|
594
|
+
),
|
|
595
|
+
catalog_entry.key_properties,
|
|
596
|
+
)
|
|
597
|
+
|
|
598
|
+
def register_raw_stream_schema(
|
|
599
|
+
self, stream_name: str, schema: dict, key_properties: list[str] | None
|
|
600
|
+
) -> None:
|
|
601
|
+
"""Register a new stream as described by its name and schema.
|
|
602
|
+
|
|
603
|
+
If stream has already been registered and schema or key_properties has changed,
|
|
604
|
+
the older registration will be removed and replaced with new, updated mappings.
|
|
605
|
+
|
|
606
|
+
Args:
|
|
607
|
+
stream_name: The stream name.
|
|
608
|
+
schema: The schema definition for the stream.
|
|
609
|
+
key_properties: The key properties of the stream.
|
|
610
|
+
|
|
611
|
+
Raises:
|
|
612
|
+
StreamMapConfigError: If the configuration is invalid.
|
|
613
|
+
"""
|
|
614
|
+
if stream_name in self.stream_maps:
|
|
615
|
+
primary_mapper = self.stream_maps[stream_name][0]
|
|
616
|
+
if (
|
|
617
|
+
primary_mapper.raw_schema != schema
|
|
618
|
+
or primary_mapper.raw_key_properties != key_properties
|
|
619
|
+
):
|
|
620
|
+
# Unload/reset stream maps if schema or key properties have changed.
|
|
621
|
+
self.stream_maps.pop(stream_name)
|
|
622
|
+
|
|
623
|
+
if stream_name not in self.stream_maps:
|
|
624
|
+
# The 0th mapper should be the same-named treatment.
|
|
625
|
+
# Additional items may be added for aliasing or multi projections.
|
|
626
|
+
self.stream_maps[stream_name] = [
|
|
627
|
+
self.default_mapper_type(
|
|
628
|
+
stream_name,
|
|
629
|
+
schema,
|
|
630
|
+
key_properties,
|
|
631
|
+
flattening_options=self.flattening_options,
|
|
632
|
+
)
|
|
633
|
+
]
|
|
634
|
+
|
|
635
|
+
for stream_map_key, stream_def in self.stream_maps_dict.items():
|
|
636
|
+
stream_alias: str = stream_map_key
|
|
637
|
+
source_stream: str = stream_map_key
|
|
638
|
+
if isinstance(stream_def, str):
|
|
639
|
+
if stream_name == stream_map_key:
|
|
640
|
+
# TODO: Add any expected cases for str expressions (currently none)
|
|
641
|
+
pass
|
|
642
|
+
|
|
643
|
+
raise StreamMapConfigError(
|
|
644
|
+
f"Option '{stream_map_key}:{stream_def}' is not expected."
|
|
645
|
+
)
|
|
646
|
+
|
|
647
|
+
if stream_def is None:
|
|
648
|
+
if stream_name != stream_map_key:
|
|
649
|
+
continue
|
|
650
|
+
|
|
651
|
+
self.stream_maps[stream_map_key][0] = RemoveRecordTransform(
|
|
652
|
+
stream_alias=stream_map_key,
|
|
653
|
+
raw_schema=schema,
|
|
654
|
+
key_properties=None,
|
|
655
|
+
flattening_options=self.flattening_options,
|
|
656
|
+
)
|
|
657
|
+
logging.info(f"Set null tansform as default for '{stream_name}'")
|
|
658
|
+
continue
|
|
659
|
+
|
|
660
|
+
if not isinstance(stream_def, dict):
|
|
661
|
+
raise StreamMapConfigError(
|
|
662
|
+
"Unexpected stream definition type. Expected str, dict, or None. "
|
|
663
|
+
f"Got '{type(stream_def).__name__}'."
|
|
664
|
+
)
|
|
665
|
+
|
|
666
|
+
if MAPPER_SOURCE_OPTION in stream_def:
|
|
667
|
+
source_stream = stream_def.pop(MAPPER_SOURCE_OPTION)
|
|
668
|
+
|
|
669
|
+
if source_stream != stream_name:
|
|
670
|
+
# Not a match
|
|
671
|
+
continue
|
|
672
|
+
|
|
673
|
+
if MAPPER_ALIAS_OPTION in stream_def:
|
|
674
|
+
stream_alias = stream_def.pop(MAPPER_ALIAS_OPTION)
|
|
675
|
+
|
|
676
|
+
mapper = CustomStreamMap(
|
|
677
|
+
stream_alias=stream_alias,
|
|
678
|
+
map_transform=stream_def,
|
|
679
|
+
map_config=self.map_config,
|
|
680
|
+
raw_schema=schema,
|
|
681
|
+
key_properties=key_properties,
|
|
682
|
+
flattening_options=self.flattening_options,
|
|
683
|
+
)
|
|
684
|
+
|
|
685
|
+
if source_stream == stream_map_key:
|
|
686
|
+
# Zero-th mapper should be the same-keyed mapper.
|
|
687
|
+
# Override the default mapper with this custom map.
|
|
688
|
+
self.stream_maps[stream_name][0] = mapper
|
|
689
|
+
else:
|
|
690
|
+
# Additional mappers for aliasing and multi-projection:
|
|
691
|
+
self.stream_maps[stream_name].append(mapper)
|