hotglue-singer-sdk 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. hotglue_singer_sdk/__init__.py +34 -0
  2. hotglue_singer_sdk/authenticators.py +554 -0
  3. hotglue_singer_sdk/cli/__init__.py +1 -0
  4. hotglue_singer_sdk/cli/common_options.py +37 -0
  5. hotglue_singer_sdk/configuration/__init__.py +1 -0
  6. hotglue_singer_sdk/configuration/_dict_config.py +101 -0
  7. hotglue_singer_sdk/exceptions.py +52 -0
  8. hotglue_singer_sdk/helpers/__init__.py +1 -0
  9. hotglue_singer_sdk/helpers/_catalog.py +122 -0
  10. hotglue_singer_sdk/helpers/_classproperty.py +18 -0
  11. hotglue_singer_sdk/helpers/_compat.py +15 -0
  12. hotglue_singer_sdk/helpers/_flattening.py +374 -0
  13. hotglue_singer_sdk/helpers/_schema.py +100 -0
  14. hotglue_singer_sdk/helpers/_secrets.py +41 -0
  15. hotglue_singer_sdk/helpers/_simpleeval.py +678 -0
  16. hotglue_singer_sdk/helpers/_singer.py +280 -0
  17. hotglue_singer_sdk/helpers/_state.py +282 -0
  18. hotglue_singer_sdk/helpers/_typing.py +231 -0
  19. hotglue_singer_sdk/helpers/_util.py +27 -0
  20. hotglue_singer_sdk/helpers/capabilities.py +240 -0
  21. hotglue_singer_sdk/helpers/jsonpath.py +39 -0
  22. hotglue_singer_sdk/io_base.py +134 -0
  23. hotglue_singer_sdk/mapper.py +691 -0
  24. hotglue_singer_sdk/mapper_base.py +156 -0
  25. hotglue_singer_sdk/plugin_base.py +415 -0
  26. hotglue_singer_sdk/py.typed +0 -0
  27. hotglue_singer_sdk/sinks/__init__.py +14 -0
  28. hotglue_singer_sdk/sinks/batch.py +90 -0
  29. hotglue_singer_sdk/sinks/core.py +412 -0
  30. hotglue_singer_sdk/sinks/record.py +66 -0
  31. hotglue_singer_sdk/sinks/sql.py +299 -0
  32. hotglue_singer_sdk/streams/__init__.py +14 -0
  33. hotglue_singer_sdk/streams/core.py +1294 -0
  34. hotglue_singer_sdk/streams/graphql.py +74 -0
  35. hotglue_singer_sdk/streams/rest.py +611 -0
  36. hotglue_singer_sdk/streams/sql.py +1023 -0
  37. hotglue_singer_sdk/tap_base.py +580 -0
  38. hotglue_singer_sdk/target_base.py +554 -0
  39. hotglue_singer_sdk/target_sdk/__init__.py +0 -0
  40. hotglue_singer_sdk/target_sdk/auth.py +124 -0
  41. hotglue_singer_sdk/target_sdk/client.py +286 -0
  42. hotglue_singer_sdk/target_sdk/common.py +13 -0
  43. hotglue_singer_sdk/target_sdk/lambda.py +121 -0
  44. hotglue_singer_sdk/target_sdk/rest.py +108 -0
  45. hotglue_singer_sdk/target_sdk/sinks.py +16 -0
  46. hotglue_singer_sdk/target_sdk/target.py +570 -0
  47. hotglue_singer_sdk/target_sdk/target_base.py +627 -0
  48. hotglue_singer_sdk/testing.py +198 -0
  49. hotglue_singer_sdk/typing.py +603 -0
  50. hotglue_singer_sdk-1.0.2.dist-info/METADATA +53 -0
  51. hotglue_singer_sdk-1.0.2.dist-info/RECORD +53 -0
  52. hotglue_singer_sdk-1.0.2.dist-info/WHEEL +4 -0
  53. hotglue_singer_sdk-1.0.2.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,580 @@
1
+ """Tap abstract class."""
2
+
3
+ import abc
4
+ import json
5
+ from enum import Enum
6
+ from pathlib import Path, PurePath
7
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union, cast
8
+
9
+ import click
10
+
11
+ from hotglue_singer_sdk.cli import common_options
12
+ from hotglue_singer_sdk.exceptions import MaxRecordsLimitException
13
+ from hotglue_singer_sdk.helpers import _state
14
+ from hotglue_singer_sdk.helpers._classproperty import classproperty
15
+ from hotglue_singer_sdk.helpers._compat import final
16
+ from hotglue_singer_sdk.helpers._singer import Catalog
17
+ from hotglue_singer_sdk.helpers._state import write_stream_state
18
+ from hotglue_singer_sdk.helpers._util import read_json_file
19
+ from hotglue_singer_sdk.helpers.capabilities import (
20
+ CapabilitiesEnum,
21
+ PluginCapabilities,
22
+ TapCapabilities,
23
+ )
24
+ from hotglue_singer_sdk.mapper import PluginMapper
25
+ from hotglue_singer_sdk.plugin_base import PluginBase
26
+ from hotglue_singer_sdk.streams import SQLStream, Stream
27
+
28
+ STREAM_MAPS_CONFIG = "stream_maps"
29
+
30
+
31
+ class CliTestOptionValue(Enum):
32
+ """Values for CLI option --test."""
33
+
34
+ All = "all"
35
+ Schema = "schema"
36
+ Disabled = False
37
+
38
+
39
+ class Tap(PluginBase, metaclass=abc.ABCMeta):
40
+ """Abstract base class for taps.
41
+
42
+ The Tap class governs configuration, validation, and stream discovery for tap
43
+ plugins.
44
+ """
45
+
46
+ # Constructor
47
+
48
+ def __init__(
49
+ self,
50
+ config: Optional[Union[dict, PurePath, str, List[Union[PurePath, str]]]] = None,
51
+ catalog: Union[PurePath, str, dict, Catalog, None] = None,
52
+ state: Union[PurePath, str, dict, None] = None,
53
+ parse_env_config: bool = False,
54
+ validate_config: bool = True,
55
+ ) -> None:
56
+ """Initialize the tap.
57
+
58
+ Args:
59
+ config: Tap configuration. Can be a dictionary, a single path to a
60
+ configuration file, or a list of paths to multiple configuration
61
+ files.
62
+ catalog: Tap catalog. Can be a dictionary or a path to the catalog file.
63
+ state: Tap state. Can be dictionary or a path to the state file.
64
+ parse_env_config: Whether to look for configuration values in environment
65
+ variables.
66
+ validate_config: True to require validation of config settings.
67
+ """
68
+ super().__init__(
69
+ config=config,
70
+ parse_env_config=parse_env_config,
71
+ validate_config=validate_config,
72
+ )
73
+
74
+ # Declare private members
75
+ self._streams: Optional[Dict[str, Stream]] = None
76
+ self._input_catalog: Optional[Catalog] = None
77
+ self._state: Dict[str, Stream] = {}
78
+ self._catalog: Optional[Catalog] = None # Tap's working catalog
79
+ self.config_file = config[0] if config else None
80
+
81
+ # Process input catalog
82
+ if isinstance(catalog, Catalog):
83
+ self._input_catalog = catalog
84
+ elif isinstance(catalog, dict):
85
+ self._input_catalog = Catalog.from_dict(catalog)
86
+ elif catalog is not None:
87
+ self._input_catalog = Catalog.from_dict(read_json_file(catalog))
88
+
89
+ # Initialize mapper
90
+ self.mapper: PluginMapper
91
+ self.mapper = PluginMapper(
92
+ plugin_config=dict(self.config),
93
+ logger=self.logger,
94
+ )
95
+
96
+ self.mapper.register_raw_streams_from_catalog(self.catalog)
97
+
98
+ # Process state
99
+ state_dict: dict = {}
100
+ if isinstance(state, dict):
101
+ state_dict = state
102
+ elif state:
103
+ state_dict = read_json_file(state)
104
+ self.load_state(state_dict)
105
+
106
+ # Class properties
107
+
108
+ @property
109
+ def streams(self) -> Dict[str, Stream]:
110
+ """Get streams discovered or catalogued for this tap.
111
+
112
+ Results will be cached after first execution.
113
+
114
+ Returns:
115
+ A mapping of names to streams, using discovery or a provided catalog.
116
+ """
117
+ input_catalog = self.input_catalog
118
+
119
+ if self._streams is None:
120
+ self._streams = {}
121
+ for stream in self.load_streams():
122
+ if input_catalog is not None:
123
+ stream.apply_catalog(input_catalog)
124
+ self._streams[stream.name] = stream
125
+ return self._streams
126
+
127
+ @property
128
+ def state(self) -> dict:
129
+ """Get tap state.
130
+
131
+ Returns:
132
+ The tap's state dictionary
133
+
134
+ Raises:
135
+ RuntimeError: If state has not been initialized.
136
+ """
137
+ if self._state is None:
138
+ raise RuntimeError("Could not read from uninitialized state.")
139
+ return self._state
140
+
141
+ @property
142
+ def input_catalog(self) -> Optional[Catalog]:
143
+ """Get the catalog passed to the tap.
144
+
145
+ Returns:
146
+ Catalog dictionary input, or None if not provided.
147
+ """
148
+ return self._input_catalog
149
+
150
+ @property
151
+ def catalog(self) -> Catalog:
152
+ """Get the tap's working catalog.
153
+
154
+ Returns:
155
+ A Singer catalog object.
156
+ """
157
+ if self._catalog is None:
158
+ self._catalog = self.input_catalog or self._singer_catalog
159
+
160
+ return self._catalog
161
+
162
+ @classproperty
163
+ def capabilities(self) -> List[CapabilitiesEnum]:
164
+ """Get tap capabilities.
165
+
166
+ Returns:
167
+ A list of capabilities supported by this tap.
168
+ """
169
+ return [
170
+ TapCapabilities.CATALOG,
171
+ TapCapabilities.STATE,
172
+ TapCapabilities.DISCOVER,
173
+ PluginCapabilities.ABOUT,
174
+ PluginCapabilities.STREAM_MAPS,
175
+ PluginCapabilities.FLATTENING,
176
+ ]
177
+
178
+ # Connection test:
179
+
180
+ @final
181
+ def run_connection_test(self) -> bool:
182
+ """Run connection test.
183
+
184
+ Returns:
185
+ True if the test succeeded.
186
+ """
187
+ for stream in self.streams.values():
188
+ # Initialize streams' record limits before beginning the sync test.
189
+ stream._MAX_RECORDS_LIMIT = 1
190
+
191
+ for stream in self.streams.values():
192
+ if stream.parent_stream_type:
193
+ self.logger.debug(
194
+ f"Child stream '{type(stream).__name__}' should be called by "
195
+ f"parent stream '{stream.parent_stream_type.__name__}'. "
196
+ "Skipping direct invocation."
197
+ )
198
+ continue
199
+ try:
200
+ stream.sync()
201
+ except MaxRecordsLimitException:
202
+ pass
203
+ return True
204
+
205
+ @final
206
+ def write_schemas(self) -> None:
207
+ """Write a SCHEMA message for all known streams to STDOUT."""
208
+ for stream in self.streams.values():
209
+ stream._write_schema_message()
210
+
211
+ # Stream detection:
212
+
213
+ def run_discovery(self) -> str:
214
+ """Write the catalog json to STDOUT and return as a string.
215
+
216
+ Returns:
217
+ The catalog as a string of JSON.
218
+ """
219
+ catalog_text = self.catalog_json_text
220
+ print(catalog_text)
221
+ return catalog_text
222
+
223
+ @property
224
+ def catalog_dict(self) -> dict:
225
+ """Get catalog dictionary.
226
+
227
+ Returns:
228
+ The tap's catalog as a dict
229
+ """
230
+ return cast(dict, self._singer_catalog.to_dict())
231
+
232
+ @property
233
+ def catalog_json_text(self) -> str:
234
+ """Get catalog JSON.
235
+
236
+ Returns:
237
+ The tap's catalog as formatted JSON text.
238
+ """
239
+ return json.dumps(self.catalog_dict, indent=2)
240
+
241
+ @property
242
+ def _singer_catalog(self) -> Catalog:
243
+ """Return a Catalog object.
244
+
245
+ Returns:
246
+ :class:`hotglue_singer_sdk.helpers._singer.Catalog`.
247
+ """
248
+ return Catalog(
249
+ (stream.tap_stream_id, stream._singer_catalog_entry)
250
+ for stream in self.streams.values()
251
+ )
252
+
253
+ def discover_streams(self) -> List[Stream]:
254
+ """Initialize all available streams and return them as a list.
255
+
256
+ Return:
257
+ List of discovered Stream objects.
258
+
259
+ Raises:
260
+ NotImplementedError: If the tap implementation does not override this
261
+ method.
262
+ """
263
+ raise NotImplementedError(
264
+ f"Tap '{self.name}' does not support discovery. "
265
+ "Please set the '--catalog' command line argument and try again."
266
+ )
267
+
268
+ @final
269
+ def load_streams(self) -> List[Stream]:
270
+ """Load streams from discovery and initialize DAG.
271
+
272
+ Return the output of `self.discover_streams()` to enumerate
273
+ discovered streams.
274
+
275
+ Returns:
276
+ A list of discovered streams, ordered by name.
277
+ """
278
+ # Build the parent-child dependency DAG
279
+
280
+ # Index streams by type
281
+ streams_by_type: Dict[Type[Stream], List[Stream]] = {}
282
+ for stream in self.discover_streams():
283
+ stream_type = type(stream)
284
+ if stream_type not in streams_by_type:
285
+ streams_by_type[stream_type] = []
286
+ streams_by_type[stream_type].append(stream)
287
+
288
+ # Initialize child streams list for parents
289
+ for stream_type, streams in streams_by_type.items():
290
+ if stream_type.parent_stream_type:
291
+ parents = streams_by_type[stream_type.parent_stream_type]
292
+ for parent in parents:
293
+ for stream in streams:
294
+ parent.child_streams.append(stream)
295
+ self.logger.info(
296
+ f"Added '{stream.name}' as child stream to '{parent.name}'"
297
+ )
298
+
299
+ streams = [stream for streams in streams_by_type.values() for stream in streams]
300
+ return sorted(
301
+ streams,
302
+ key=lambda x: x.name,
303
+ reverse=False,
304
+ )
305
+
306
+ # Bookmarks and state management
307
+
308
+ def load_state(self, state: Dict[str, Any]) -> None:
309
+ """Merge or initialize stream state with the provided state dictionary input.
310
+
311
+ Override this method to perform validation and backwards-compatibility patches
312
+ on self.state. If overriding, we recommend first running
313
+ `super().load_state(state)` to ensure compatibility with the SDK.
314
+
315
+ Args:
316
+ state: Initialize the tap's state with this value.
317
+
318
+ Raises:
319
+ ValueError: If the tap's own state is None, meaning it has not been
320
+ initialized.
321
+ """
322
+ if self.state is None:
323
+ raise ValueError("Cannot write to uninitialized state dictionary.")
324
+
325
+ for stream_name, stream_state in state.get("bookmarks", {}).items():
326
+ for key, val in stream_state.items():
327
+ write_stream_state(
328
+ self.state,
329
+ stream_name,
330
+ key,
331
+ val,
332
+ )
333
+
334
+ # State handling
335
+
336
+ def _reset_state_progress_markers(self) -> None:
337
+ """Clear prior jobs' progress markers at beginning of sync."""
338
+ for _, state in self.state.get("bookmarks", {}).items():
339
+ _state.reset_state_progress_markers(state)
340
+ for partition_state in state.get("partitions", []):
341
+ _state.reset_state_progress_markers(partition_state)
342
+
343
+ # Fix sync replication method incompatibilities
344
+
345
+ def _set_compatible_replication_methods(self) -> None:
346
+ stream: Stream
347
+ for stream in self.streams.values():
348
+ for descendent in stream.descendent_streams:
349
+ if descendent.selected and descendent.ignore_parent_replication_key:
350
+ self.logger.warning(
351
+ f"Stream descendent '{descendent.name}' is selected and "
352
+ f"its parent '{stream.name}' does not use inclusive "
353
+ f"replication keys. "
354
+ f"Forcing full table replication for '{stream.name}'."
355
+ )
356
+ stream.replication_key = None
357
+ stream.forced_replication_method = "FULL_TABLE"
358
+
359
+ # Sync methods
360
+
361
+ @final
362
+ def sync_all(self) -> None:
363
+ """Sync all streams."""
364
+ self._reset_state_progress_markers()
365
+ self._set_compatible_replication_methods()
366
+ stream: "Stream"
367
+ for stream in self.streams.values():
368
+ if not stream.selected and not stream.has_selected_descendents:
369
+ self.logger.info(f"Skipping deselected stream '{stream.name}'.")
370
+ continue
371
+
372
+ if stream.parent_stream_type:
373
+ self.logger.debug(
374
+ f"Child stream '{type(stream).__name__}' is expected to be called "
375
+ f"by parent stream '{stream.parent_stream_type.__name__}'. "
376
+ "Skipping direct invocation."
377
+ )
378
+ continue
379
+
380
+ stream.sync()
381
+ stream.finalize_state_progress_markers()
382
+
383
+ # this second loop is needed for all streams to print out their costs
384
+ # including child streams which are otherwise skipped in the loop above
385
+ for stream in self.streams.values():
386
+ stream.log_sync_costs()
387
+
388
+ # Command Line Execution
389
+
390
+ @classproperty
391
+ def cli(cls) -> Callable:
392
+ """Execute standard CLI handler for taps.
393
+
394
+ Returns:
395
+ A callable CLI object.
396
+ """
397
+
398
+ @common_options.PLUGIN_VERSION
399
+ @common_options.PLUGIN_ABOUT
400
+ @common_options.PLUGIN_ABOUT_FORMAT
401
+ @common_options.PLUGIN_CONFIG
402
+ @click.option(
403
+ "--discover",
404
+ is_flag=True,
405
+ help="Run the tap in discovery mode.",
406
+ )
407
+ @click.option(
408
+ "--test",
409
+ is_flag=False,
410
+ flag_value=CliTestOptionValue.All.value,
411
+ default=CliTestOptionValue.Disabled,
412
+ help=(
413
+ "Use --test to sync a single record for each stream. "
414
+ + "Use --test=schema to test schema output without syncing "
415
+ + "records."
416
+ ),
417
+ )
418
+ @click.option(
419
+ "--catalog",
420
+ help="Use a Singer catalog file with the tap.",
421
+ type=click.Path(),
422
+ )
423
+ @click.option(
424
+ "--state",
425
+ help="Use a bookmarks file for incremental replication.",
426
+ type=click.Path(),
427
+ )
428
+ @click.command(
429
+ help="Execute the Singer tap.",
430
+ context_settings={"help_option_names": ["--help"]},
431
+ )
432
+ def cli(
433
+ version: bool = False,
434
+ about: bool = False,
435
+ discover: bool = False,
436
+ test: CliTestOptionValue = CliTestOptionValue.Disabled,
437
+ config: Tuple[str, ...] = (),
438
+ state: str = None,
439
+ catalog: str = None,
440
+ format: str = None,
441
+ ) -> None:
442
+ """Handle command line execution.
443
+
444
+ Args:
445
+ version: Display the package version.
446
+ about: Display package metadata and settings.
447
+ discover: Run the tap in discovery mode.
448
+ test: Test connectivity by syncing a single record and exiting.
449
+ format: Specify output style for `--about`.
450
+ config: Configuration file location or 'ENV' to use environment
451
+ variables. Accepts multiple inputs as a tuple.
452
+ catalog: Use a Singer catalog file with the tap.",
453
+ state: Use a bookmarks file for incremental replication.
454
+
455
+ Raises:
456
+ FileNotFoundError: If the config file does not exist.
457
+ """
458
+ if version:
459
+ cls.print_version()
460
+ return
461
+
462
+ if not about:
463
+ cls.print_version(print_fn=cls.logger.info)
464
+ else:
465
+ cls.print_about(format=format)
466
+ return
467
+
468
+ validate_config: bool = True
469
+ if discover:
470
+ # Don't abort on validation failures
471
+ validate_config = False
472
+
473
+ parse_env_config = False
474
+ config_files: List[PurePath] = []
475
+ for config_path in config:
476
+ if config_path == "ENV":
477
+ # Allow parse from env vars:
478
+ parse_env_config = True
479
+ continue
480
+
481
+ # Validate config file paths before adding to list
482
+ if not Path(config_path).is_file():
483
+ raise FileNotFoundError(
484
+ f"Could not locate config file at '{config_path}'."
485
+ "Please check that the file exists."
486
+ )
487
+
488
+ config_files.append(Path(config_path))
489
+
490
+ tap = cls( # type: ignore # Ignore 'type not callable'
491
+ config=config_files or None,
492
+ state=state,
493
+ catalog=catalog,
494
+ parse_env_config=parse_env_config,
495
+ validate_config=validate_config,
496
+ )
497
+
498
+ if discover:
499
+ tap.run_discovery()
500
+ if test == CliTestOptionValue.All.value:
501
+ tap.run_connection_test()
502
+ elif test == CliTestOptionValue.All.value:
503
+ tap.run_connection_test()
504
+ elif test == CliTestOptionValue.Schema.value:
505
+ tap.write_schemas()
506
+ else:
507
+ tap.sync_all()
508
+
509
+ return cli
510
+
511
+
512
+ class SQLTap(Tap):
513
+ """A specialized Tap for extracting from SQL streams."""
514
+
515
+ # Stream class used to initialize new SQL streams from their catalog declarations.
516
+ default_stream_class: Type[SQLStream]
517
+
518
+ def __init__(
519
+ self,
520
+ config: Optional[Union[dict, PurePath, str, List[Union[PurePath, str]]]] = None,
521
+ catalog: Union[PurePath, str, dict, None] = None,
522
+ state: Union[PurePath, str, dict, None] = None,
523
+ parse_env_config: bool = False,
524
+ validate_config: bool = True,
525
+ ) -> None:
526
+ """Initialize the SQL tap.
527
+
528
+ The SQLTap initializer additionally creates a cache variable for _catalog_dict.
529
+
530
+ Args:
531
+ config: Tap configuration. Can be a dictionary, a single path to a
532
+ configuration file, or a list of paths to multiple configuration
533
+ files.
534
+ catalog: Tap catalog. Can be a dictionary or a path to the catalog file.
535
+ state: Tap state. Can be dictionary or a path to the state file.
536
+ parse_env_config: Whether to look for configuration values in environment
537
+ variables.
538
+ validate_config: True to require validation of config settings.
539
+ """
540
+ self._catalog_dict: Optional[dict] = None
541
+ super().__init__(
542
+ config=config,
543
+ catalog=catalog,
544
+ state=state,
545
+ parse_env_config=parse_env_config,
546
+ validate_config=validate_config,
547
+ )
548
+
549
+ @property
550
+ def catalog_dict(self) -> dict:
551
+ """Get catalog dictionary.
552
+
553
+ Returns:
554
+ The tap's catalog as a dict
555
+ """
556
+ if self._catalog_dict:
557
+ return self._catalog_dict
558
+
559
+ if self.input_catalog:
560
+ return self.input_catalog.to_dict()
561
+
562
+ connector = self.default_stream_class.connector_class(dict(self.config))
563
+
564
+ result: Dict[str, List[dict]] = {"streams": []}
565
+ result["streams"].extend(connector.discover_catalog_entries())
566
+
567
+ self._catalog_dict = result
568
+ return self._catalog_dict
569
+
570
+ def discover_streams(self) -> List[Stream]:
571
+ """Initialize all available streams and return them as a list.
572
+
573
+ Returns:
574
+ List of discovered Stream objects.
575
+ """
576
+ result: List[Stream] = []
577
+ for catalog_entry in self.catalog_dict["streams"]:
578
+ result.append(self.default_stream_class(self, catalog_entry))
579
+
580
+ return result