hotglue-singer-sdk 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. hotglue_singer_sdk/__init__.py +34 -0
  2. hotglue_singer_sdk/authenticators.py +554 -0
  3. hotglue_singer_sdk/cli/__init__.py +1 -0
  4. hotglue_singer_sdk/cli/common_options.py +37 -0
  5. hotglue_singer_sdk/configuration/__init__.py +1 -0
  6. hotglue_singer_sdk/configuration/_dict_config.py +101 -0
  7. hotglue_singer_sdk/exceptions.py +52 -0
  8. hotglue_singer_sdk/helpers/__init__.py +1 -0
  9. hotglue_singer_sdk/helpers/_catalog.py +122 -0
  10. hotglue_singer_sdk/helpers/_classproperty.py +18 -0
  11. hotglue_singer_sdk/helpers/_compat.py +15 -0
  12. hotglue_singer_sdk/helpers/_flattening.py +374 -0
  13. hotglue_singer_sdk/helpers/_schema.py +100 -0
  14. hotglue_singer_sdk/helpers/_secrets.py +41 -0
  15. hotglue_singer_sdk/helpers/_simpleeval.py +678 -0
  16. hotglue_singer_sdk/helpers/_singer.py +280 -0
  17. hotglue_singer_sdk/helpers/_state.py +282 -0
  18. hotglue_singer_sdk/helpers/_typing.py +231 -0
  19. hotglue_singer_sdk/helpers/_util.py +27 -0
  20. hotglue_singer_sdk/helpers/capabilities.py +240 -0
  21. hotglue_singer_sdk/helpers/jsonpath.py +39 -0
  22. hotglue_singer_sdk/io_base.py +134 -0
  23. hotglue_singer_sdk/mapper.py +691 -0
  24. hotglue_singer_sdk/mapper_base.py +156 -0
  25. hotglue_singer_sdk/plugin_base.py +415 -0
  26. hotglue_singer_sdk/py.typed +0 -0
  27. hotglue_singer_sdk/sinks/__init__.py +14 -0
  28. hotglue_singer_sdk/sinks/batch.py +90 -0
  29. hotglue_singer_sdk/sinks/core.py +412 -0
  30. hotglue_singer_sdk/sinks/record.py +66 -0
  31. hotglue_singer_sdk/sinks/sql.py +299 -0
  32. hotglue_singer_sdk/streams/__init__.py +14 -0
  33. hotglue_singer_sdk/streams/core.py +1294 -0
  34. hotglue_singer_sdk/streams/graphql.py +74 -0
  35. hotglue_singer_sdk/streams/rest.py +611 -0
  36. hotglue_singer_sdk/streams/sql.py +1023 -0
  37. hotglue_singer_sdk/tap_base.py +580 -0
  38. hotglue_singer_sdk/target_base.py +554 -0
  39. hotglue_singer_sdk/target_sdk/__init__.py +0 -0
  40. hotglue_singer_sdk/target_sdk/auth.py +124 -0
  41. hotglue_singer_sdk/target_sdk/client.py +286 -0
  42. hotglue_singer_sdk/target_sdk/common.py +13 -0
  43. hotglue_singer_sdk/target_sdk/lambda.py +121 -0
  44. hotglue_singer_sdk/target_sdk/rest.py +108 -0
  45. hotglue_singer_sdk/target_sdk/sinks.py +16 -0
  46. hotglue_singer_sdk/target_sdk/target.py +570 -0
  47. hotglue_singer_sdk/target_sdk/target_base.py +627 -0
  48. hotglue_singer_sdk/testing.py +198 -0
  49. hotglue_singer_sdk/typing.py +603 -0
  50. hotglue_singer_sdk-1.0.2.dist-info/METADATA +53 -0
  51. hotglue_singer_sdk-1.0.2.dist-info/RECORD +53 -0
  52. hotglue_singer_sdk-1.0.2.dist-info/WHEEL +4 -0
  53. hotglue_singer_sdk-1.0.2.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,627 @@
1
+ """Target abstract class."""
2
+ import abc
3
+ import copy
4
+ import json
5
+ import sys
6
+ import time
7
+ import os
8
+ from io import FileIO
9
+ from pathlib import Path, PurePath
10
+ from typing import IO, Callable, Counter, Dict, List, Optional, Tuple, Type, Union
11
+
12
+ import click
13
+ from joblib import Parallel, delayed, parallel_backend
14
+
15
+ from hotglue_singer_sdk.cli import common_options
16
+ from hotglue_singer_sdk.exceptions import RecordsWitoutSchemaException
17
+ from hotglue_singer_sdk.helpers._classproperty import classproperty
18
+ from hotglue_singer_sdk.helpers._compat import final
19
+ from hotglue_singer_sdk.helpers.capabilities import CapabilitiesEnum, PluginCapabilities
20
+ from hotglue_singer_sdk.io_base import SingerMessageType, SingerReader
21
+ from hotglue_singer_sdk.mapper import PluginMapper
22
+ from hotglue_singer_sdk.plugin_base import PluginBase
23
+ from hotglue_singer_sdk.sinks import Sink, BatchSink
24
+
25
+ _MAX_PARALLELISM = 8
26
+
27
+
28
+ class Target(PluginBase, SingerReader, metaclass=abc.ABCMeta):
29
+ """Abstract base class for targets.
30
+
31
+ The `Target` class manages config information and is responsible for processing the
32
+ incoming Singer data stream and orchestrating any needed target `Sink` objects. As
33
+ messages are received from the tap, the `Target` class will automatically create
34
+ any needed target `Sink` objects and send records along to the appropriate `Sink`
35
+ object for that record.
36
+ """
37
+
38
+ _MAX_RECORD_AGE_IN_MINUTES: float = 30.0
39
+
40
+ # Default class to use for creating new sink objects.
41
+ # Required if `Target.get_sink_class()` is not defined.
42
+ default_sink_class: Optional[Type[Sink]] = None
43
+
44
+ def __init__(
45
+ self,
46
+ config: Optional[Union[dict, PurePath, str, List[Union[PurePath, str]]]] = None,
47
+ parse_env_config: bool = False,
48
+ validate_config: bool = True,
49
+ ) -> None:
50
+ """Initialize the target.
51
+
52
+ Args:
53
+ config: Target configuration. Can be a dictionary, a single path to a
54
+ configuration file, or a list of paths to multiple configuration
55
+ files.
56
+ parse_env_config: Whether to look for configuration values in environment
57
+ variables.
58
+ validate_config: True to require validation of config settings.
59
+ """
60
+ super().__init__(
61
+ config=config,
62
+ parse_env_config=parse_env_config,
63
+ validate_config=validate_config,
64
+ )
65
+
66
+ self.streaming_job = os.environ.get('STREAMING_JOB') == 'True'
67
+ if self.streaming_job:
68
+ self._latest_state: Dict[str, dict] = { "tap": {}, "target": {} }
69
+ else:
70
+ self._latest_state: Dict[str, dict] = {}
71
+ self._drained_state: Dict[str, dict] = {}
72
+ self._sinks_active: Dict[str, Sink] = {}
73
+ self._sinks_to_clear: List[Sink] = []
74
+ self._max_parallelism: Optional[int] = _MAX_PARALLELISM
75
+
76
+ # Approximated for max record age enforcement
77
+ self._last_full_drain_at: float = time.time()
78
+
79
+ # Initialize mapper
80
+ self.mapper: PluginMapper
81
+ self.mapper = PluginMapper(
82
+ plugin_config=dict(self.config),
83
+ logger=self.logger,
84
+ )
85
+
86
+ @classproperty
87
+ def capabilities(self) -> List[CapabilitiesEnum]:
88
+ """Get target capabilities.
89
+
90
+ Returns:
91
+ A list of capabilities supported by this target.
92
+ """
93
+ return [
94
+ PluginCapabilities.ABOUT,
95
+ PluginCapabilities.STREAM_MAPS,
96
+ PluginCapabilities.FLATTENING,
97
+ PluginCapabilities.HOTGLUE_EXCEPTIONS_CLASSES
98
+ ]
99
+
100
+ @property
101
+ def max_parallelism(self) -> int:
102
+ """Get max parallel sinks.
103
+
104
+ The default is 8 if not overridden.
105
+
106
+ Returns:
107
+ Max number of sinks that can be drained in parallel.
108
+ """
109
+ if self._max_parallelism is not None:
110
+ return self._max_parallelism
111
+
112
+ return _MAX_PARALLELISM
113
+
114
+ @max_parallelism.setter
115
+ def max_parallelism(self, new_value: int) -> None:
116
+ """Override the default (max) parallelism.
117
+
118
+ The default is 8 if not overridden.
119
+
120
+ Args:
121
+ new_value: The new max degree of parallelism for this target.
122
+ """
123
+ self._max_parallelism = new_value
124
+
125
+ def get_sink(
126
+ self,
127
+ stream_name: str,
128
+ *,
129
+ record: Optional[dict] = None,
130
+ schema: Optional[dict] = None,
131
+ key_properties: Optional[List[str]] = None,
132
+ ) -> Sink:
133
+ """Return a sink for the given stream name.
134
+
135
+ A new sink will be created if `schema` is provided and if either `schema` or
136
+ `key_properties` has changed. If so, the old sink becomes archived and held
137
+ until the next drain_all() operation.
138
+
139
+ Developers only need to override this method if they want to provide a different
140
+ sink depending on the values within the `record` object. Otherwise, please see
141
+ `default_sink_class` property and/or the `get_sink_class()` method.
142
+
143
+ Raises :class:`hotglue_singer_sdk.exceptions.RecordsWitoutSchemaException` if sink does
144
+ not exist and schema is not sent.
145
+
146
+ Args:
147
+ stream_name: Name of the stream.
148
+ record: Record being processed.
149
+ schema: Stream schema.
150
+ key_properties: Primary key of the stream.
151
+
152
+ Returns:
153
+ The sink used for this target.
154
+ """
155
+ _ = record # Custom implementations may use record in sink selection.
156
+ if schema is None:
157
+ self._assert_sink_exists(stream_name)
158
+ return self._sinks_active[stream_name]
159
+
160
+ existing_sink = self._sinks_active.get(stream_name, None)
161
+ if not existing_sink:
162
+ return self.add_sink(stream_name, schema, key_properties)
163
+
164
+ if (
165
+ existing_sink.schema != schema
166
+ or existing_sink.key_properties != key_properties
167
+ ):
168
+ self.logger.info(
169
+ f"Schema or key properties for '{stream_name}' stream have changed. "
170
+ f"Initializing a new '{stream_name}' sink..."
171
+ )
172
+ self._sinks_to_clear.append(self._sinks_active.pop(stream_name))
173
+ return self.add_sink(stream_name, schema, key_properties)
174
+
175
+ return existing_sink
176
+
177
+ def get_sink_class(self, stream_name: str) -> Type[Sink]:
178
+ """Get sink for a stream.
179
+
180
+ Developers can override this method to return a custom Sink type depending
181
+ on the value of `stream_name`. Optional when `default_sink_class` is set.
182
+
183
+ Args:
184
+ stream_name: Name of the stream.
185
+
186
+ Raises:
187
+ ValueError: If no :class:`hotglue_singer_sdk.sinks.Sink` class is defined.
188
+
189
+ Returns:
190
+ The sink class to be used with the stream.
191
+ """
192
+ if self.default_sink_class:
193
+ return self.default_sink_class
194
+
195
+ raise ValueError(
196
+ f"No sink class defined for '{stream_name}' "
197
+ "and no default sink class available."
198
+ )
199
+
200
+ def sink_exists(self, stream_name: str) -> bool:
201
+ """Check sink for a stream.
202
+
203
+ This method is internal to the SDK and should not need to be overridden.
204
+
205
+ Args:
206
+ stream_name: Name of the stream
207
+
208
+ Returns:
209
+ True if a sink has been initialized.
210
+ """
211
+ return stream_name in self._sinks_active
212
+
213
+ @final
214
+ def add_sink(
215
+ self, stream_name: str, schema: dict, key_properties: Optional[List[str]] = None
216
+ ) -> Sink:
217
+ """Create a sink and register it.
218
+
219
+ This method is internal to the SDK and should not need to be overridden.
220
+
221
+ Args:
222
+ stream_name: Name of the stream.
223
+ schema: Schema of the stream.
224
+ key_properties: Primary key of the stream.
225
+
226
+ Returns:
227
+ A new sink for the stream.
228
+ """
229
+ self.logger.info(f"Initializing '{self.name}' target sink...")
230
+ sink_class = self.get_sink_class(stream_name=stream_name)
231
+
232
+ if not sink_class:
233
+ self.logger.warning(f"Sink class for \"{stream_name}\" stream not found")
234
+ self._sinks_active[stream_name] = None
235
+ return None
236
+
237
+ result = sink_class(
238
+ target=self,
239
+ stream_name=stream_name,
240
+ schema=schema,
241
+ key_properties=key_properties,
242
+ )
243
+ self._sinks_active[stream_name] = result
244
+ return result
245
+
246
+ def _assert_sink_exists(self, stream_name: str) -> None:
247
+ """Raise a RecordsWitoutSchemaException exception if stream doesn't exist.
248
+
249
+ Args:
250
+ stream_name: TODO
251
+
252
+ Raises:
253
+ RecordsWitoutSchemaException: If sink does not exist and schema is not sent.
254
+ """
255
+ if not self.sink_exists(stream_name):
256
+ raise RecordsWitoutSchemaException(
257
+ f"A record for stream '{stream_name}' was encountered before a "
258
+ "corresponding schema."
259
+ )
260
+
261
+ # Message handling
262
+
263
+ def _process_lines(self, file_input: IO[str]) -> Counter[str]:
264
+ """Internal method to process jsonl lines from a Singer tap.
265
+
266
+ Args:
267
+ file_input: Readable stream of messages, each on a separate line.
268
+
269
+ Returns:
270
+ A counter object for the processed lines.
271
+ """
272
+ self.logger.info(f"Target '{self.name}' is listening for input from tap.")
273
+ counter = super()._process_lines(file_input)
274
+
275
+ line_count = sum(counter.values())
276
+
277
+ self.logger.info(
278
+ f"Target '{self.name}' completed reading {line_count} lines of input "
279
+ f"({counter[SingerMessageType.RECORD]} records, "
280
+ f"{counter[SingerMessageType.STATE]} state messages)."
281
+ )
282
+
283
+ return counter
284
+
285
+ def _process_endofpipe(self) -> None:
286
+ """Called after all input lines have been read."""
287
+ self.drain_all(is_endofpipe=True)
288
+
289
+ def _process_record_message(self, message_dict: dict) -> None:
290
+ """Process a RECORD message.
291
+
292
+ Args:
293
+ message_dict: TODO
294
+ """
295
+ self._assert_line_requires(message_dict, requires={"stream", "record"})
296
+
297
+ stream_name = message_dict["stream"]
298
+ for stream_map in self.mapper.stream_maps[stream_name]:
299
+ # new_schema = helpers._float_to_decimal(new_schema)
300
+ raw_record = copy.copy(message_dict["record"])
301
+ transformed_record = stream_map.transform(raw_record)
302
+ if transformed_record is None:
303
+ # Record was filtered out by the map transform
304
+ continue
305
+
306
+ sink = self.get_sink(stream_map.stream_alias, record=transformed_record)
307
+ context = sink._get_context(transformed_record)
308
+ if sink.include_sdc_metadata_properties:
309
+ sink._add_sdc_metadata_to_record(
310
+ transformed_record, message_dict, context
311
+ )
312
+ else:
313
+ sink._remove_sdc_metadata_from_record(transformed_record)
314
+
315
+ sink._validate_and_parse(transformed_record)
316
+
317
+ sink.tally_record_read()
318
+ transformed_record = sink.preprocess_record(transformed_record, context)
319
+ sink.process_record(transformed_record, context)
320
+ sink._after_process_record(context)
321
+
322
+ if sink.is_full:
323
+ self.logger.info(
324
+ f"Target sink for '{sink.stream_name}' is full. Draining..."
325
+ )
326
+ self.drain_one(sink)
327
+
328
+ def _process_schema_message(self, message_dict: dict) -> None:
329
+ """Process a SCHEMA messages.
330
+
331
+ Args:
332
+ message_dict: The newly received schema message.
333
+ """
334
+ self._assert_line_requires(message_dict, requires={"stream", "schema"})
335
+
336
+ stream_name = message_dict["stream"]
337
+ schema = message_dict["schema"]
338
+ key_properties = message_dict.get("key_properties", None)
339
+ do_registration = False
340
+ if stream_name not in self.mapper.stream_maps:
341
+ do_registration = True
342
+ elif self.mapper.stream_maps[stream_name][0].raw_schema != schema:
343
+ self.logger.info(
344
+ f"Schema has changed for stream '{stream_name}'. "
345
+ "Mapping definitions will be reset."
346
+ )
347
+ do_registration = True
348
+ elif (
349
+ self.mapper.stream_maps[stream_name][0].raw_key_properties != key_properties
350
+ ):
351
+ self.logger.info(
352
+ f"Key properties have changed for stream '{stream_name}'. "
353
+ "Mapping definitions will be reset."
354
+ )
355
+ do_registration = True
356
+
357
+ if not do_registration:
358
+ self.logger.debug(
359
+ f"No changes detected in SCHEMA message for stream '{stream_name}'. "
360
+ "Ignoring."
361
+ )
362
+ return
363
+
364
+ self.mapper.register_raw_stream_schema(
365
+ stream_name,
366
+ schema,
367
+ key_properties,
368
+ )
369
+ for stream_map in self.mapper.stream_maps[stream_name]:
370
+ # new_schema = helpers._float_to_decimal(new_schema)
371
+ _ = self.get_sink(
372
+ stream_map.stream_alias,
373
+ schema=stream_map.transformed_schema,
374
+ key_properties=stream_map.transformed_key_properties,
375
+ )
376
+
377
+ @property
378
+ def _max_record_age_in_minutes(self) -> float:
379
+ return (time.time() - self._last_full_drain_at) / 60
380
+
381
+ def _reset_max_record_age(self) -> None:
382
+ self._last_full_drain_at = time.time()
383
+
384
+ def _process_state_message(self, message_dict: dict) -> None:
385
+ """Process a state message. drain sinks if needed.
386
+
387
+ If state is unchanged, no actions will be taken.
388
+
389
+ Args:
390
+ message_dict: TODO
391
+ """
392
+ self._assert_line_requires(message_dict, requires={"value"})
393
+ state = message_dict["value"]
394
+ # Determine where to store state based on streaming_job
395
+ if self.streaming_job:
396
+ current_state = self._latest_state["tap"]
397
+ if current_state == state:
398
+ return
399
+ self._latest_state["tap"] = state
400
+ else:
401
+ current_state = self._latest_state
402
+ if current_state == state:
403
+ return
404
+ self._latest_state = state
405
+
406
+ if self._max_record_age_in_minutes > self._MAX_RECORD_AGE_IN_MINUTES:
407
+ self.logger.info(
408
+ "One or more records have exceeded the max age of "
409
+ f"{self._MAX_RECORD_AGE_IN_MINUTES} minutes. Draining all sinks."
410
+ )
411
+ self.drain_all()
412
+
413
+ def _process_activate_version_message(self, message_dict: dict) -> None:
414
+ """Handle the optional ACTIVATE_VERSION message extension.
415
+
416
+ Args:
417
+ message_dict: TODO
418
+ """
419
+ stream_name = message_dict["stream"]
420
+ sink = self.get_sink(stream_name)
421
+ if sink:
422
+ sink.activate_version(message_dict["version"])
423
+
424
+ # Sink drain methods
425
+
426
+ @final
427
+ def drain_all(self, is_endofpipe: bool = False) -> None:
428
+ """Drains all sinks, starting with those cleared due to changed schema.
429
+
430
+ This method is internal to the SDK and should not need to be overridden.
431
+
432
+ Args:
433
+ is_endofpipe: This is passed by the
434
+ :meth:`~hotglue_singer_sdk.Sink._process_endofpipe()` which
435
+ is called after the target instance has finished
436
+ listening to the stdin
437
+ """
438
+ if self.streaming_job:
439
+ state = copy.deepcopy(self._latest_state) or { "tap": {}, "target": {} }
440
+ else:
441
+ state = copy.deepcopy(self._latest_state) or {}
442
+ self._drain_all(self._sinks_to_clear, 1)
443
+ if is_endofpipe:
444
+ for sink in self._sinks_to_clear:
445
+ if sink:
446
+ sink.clean_up()
447
+ self._sinks_to_clear = []
448
+ self._drain_all(list(self._sinks_active.values()), self.max_parallelism)
449
+ if is_endofpipe:
450
+ for sink in self._sinks_active.values():
451
+ if sink:
452
+ sink.clean_up()
453
+
454
+ # Build state from BatchSinks
455
+ batch_sinks = [s for s in self._sinks_active.values() if isinstance(s, BatchSink)]
456
+
457
+ for s in batch_sinks:
458
+ if self.streaming_job:
459
+ if s.name not in state["target"].get("bookmarks", []):
460
+ state["target"] = update_state(state["target"], s.latest_state, self.logger)
461
+ else:
462
+ state["target"]["bookmarks"][s.name] = s.latest_state["bookmarks"][s.name]
463
+ state["target"]["summary"][s.name] = s.latest_state["summary"][s.name]
464
+ else:
465
+ if s.name not in state.get("bookmarks", []):
466
+ state = update_state(state, s.latest_state, self.logger)
467
+ else:
468
+ state["bookmarks"][s.name] = s.latest_state["bookmarks"][s.name]
469
+ state["summary"][s.name] = s.latest_state["summary"][s.name]
470
+
471
+ self._write_state_message(state)
472
+ self._reset_max_record_age()
473
+
474
+ @final
475
+ def drain_one(self, sink: Optional[Sink]) -> None:
476
+ """Drain a specific sink.
477
+
478
+ This method is internal to the SDK and should not need to be overridden.
479
+
480
+ Args:
481
+ sink: Sink to be drained.
482
+ """
483
+ if not sink or sink.current_size == 0:
484
+ return
485
+
486
+ draining_status = sink.start_drain()
487
+ sink.process_batch(draining_status)
488
+ sink.mark_drained()
489
+
490
+ def _drain_all(self, sink_list: List[Optional[Sink]], parallelism: int) -> None:
491
+ if parallelism == 1:
492
+ for sink in sink_list:
493
+ self.drain_one(sink)
494
+ return
495
+
496
+ def _drain_sink(sink: Optional[Sink]) -> None:
497
+ self.drain_one(sink)
498
+
499
+ with parallel_backend("threading", n_jobs=parallelism):
500
+ Parallel()(delayed(_drain_sink)(sink=sink) for sink in sink_list)
501
+
502
+ def _write_state_message(self, state: dict) -> None:
503
+ """Emit the stream's latest state.
504
+
505
+ Args:
506
+ state: TODO
507
+ """
508
+ state_json = json.dumps(state)
509
+ self.logger.info(f"Emitting completed target state {state_json}")
510
+ sys.stdout.write(f"{state_json}\n")
511
+ sys.stdout.flush()
512
+
513
+ # CLI handler
514
+
515
+ @classproperty
516
+ def cli(cls) -> Callable:
517
+ """Execute standard CLI handler for taps.
518
+
519
+ Returns:
520
+ A callable CLI object.
521
+ """
522
+
523
+ @common_options.PLUGIN_VERSION
524
+ @common_options.PLUGIN_ABOUT
525
+ @common_options.PLUGIN_ABOUT_FORMAT
526
+ @common_options.PLUGIN_CONFIG
527
+ @common_options.PLUGIN_FILE_INPUT
528
+ @click.command(
529
+ help="Execute the Singer target.",
530
+ context_settings={"help_option_names": ["--help"]},
531
+ )
532
+ def cli(
533
+ version: bool = False,
534
+ about: bool = False,
535
+ config: Tuple[str, ...] = (),
536
+ format: str = None,
537
+ file_input: FileIO = None,
538
+ ) -> None:
539
+ """Handle command line execution.
540
+
541
+ Args:
542
+ version: Display the package version.
543
+ about: Display package metadata and settings.
544
+ format: Specify output style for `--about`.
545
+ config: Configuration file location or 'ENV' to use environment
546
+ variables. Accepts multiple inputs as a tuple.
547
+ file_input: Specify a path to an input file to read messages from.
548
+ Defaults to standard in if unspecified.
549
+
550
+ Raises:
551
+ FileNotFoundError: If the config file does not exist.
552
+ """
553
+ if version:
554
+ cls.print_version()
555
+ return
556
+
557
+ if not about:
558
+ cls.print_version(print_fn=cls.logger.info)
559
+ else:
560
+ cls.print_about(format=format)
561
+ return
562
+
563
+ validate_config: bool = True
564
+
565
+ cls.print_version(print_fn=cls.logger.info)
566
+
567
+ parse_env_config = False
568
+ config_files: List[PurePath] = []
569
+ for config_path in config:
570
+ if config_path == "ENV":
571
+ # Allow parse from env vars:
572
+ parse_env_config = True
573
+ continue
574
+
575
+ # Validate config file paths before adding to list
576
+ if not Path(config_path).is_file():
577
+ raise FileNotFoundError(
578
+ f"Could not locate config file at '{config_path}'."
579
+ "Please check that the file exists."
580
+ )
581
+
582
+ config_files.append(Path(config_path))
583
+
584
+ target = cls( # type: ignore # Ignore 'type not callable'
585
+ config=config_files or None,
586
+ parse_env_config=parse_env_config,
587
+ validate_config=validate_config,
588
+ )
589
+
590
+ target.listen(file_input)
591
+
592
+ return cli
593
+
594
+
595
+ def update_state(old_state: dict, new_state: dict, logger) -> dict:
596
+ state = copy.deepcopy(old_state) or dict()
597
+
598
+ if not new_state:
599
+ return state
600
+
601
+ if not state.get("bookmarks"):
602
+ state["bookmarks"] = dict()
603
+
604
+ if not state.get("summary"):
605
+ state["summary"] = dict()
606
+
607
+ if new_state.get("bookmarks"):
608
+ for (k, bookmarks) in new_state["bookmarks"].items():
609
+ if not state["bookmarks"].get(k):
610
+ state["bookmarks"][k] = list()
611
+ state["bookmarks"][k] = state["bookmarks"][k] + bookmarks
612
+
613
+ if new_state.get("summary"):
614
+ for (k, summary) in new_state["summary"].items():
615
+ if not state["summary"].get(k):
616
+ state["summary"][k] = summary
617
+ else:
618
+ for (s_key, s_value) in state["summary"][k].items():
619
+ state["summary"][k][s_key] = s_value + summary[s_key]
620
+
621
+ return state
622
+
623
+
624
+ class SQLTarget(Target):
625
+ """Target implementation for SQL destinations."""
626
+
627
+ pass