hotglue-singer-sdk 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. hotglue_singer_sdk/__init__.py +34 -0
  2. hotglue_singer_sdk/authenticators.py +554 -0
  3. hotglue_singer_sdk/cli/__init__.py +1 -0
  4. hotglue_singer_sdk/cli/common_options.py +37 -0
  5. hotglue_singer_sdk/configuration/__init__.py +1 -0
  6. hotglue_singer_sdk/configuration/_dict_config.py +101 -0
  7. hotglue_singer_sdk/exceptions.py +52 -0
  8. hotglue_singer_sdk/helpers/__init__.py +1 -0
  9. hotglue_singer_sdk/helpers/_catalog.py +122 -0
  10. hotglue_singer_sdk/helpers/_classproperty.py +18 -0
  11. hotglue_singer_sdk/helpers/_compat.py +15 -0
  12. hotglue_singer_sdk/helpers/_flattening.py +374 -0
  13. hotglue_singer_sdk/helpers/_schema.py +100 -0
  14. hotglue_singer_sdk/helpers/_secrets.py +41 -0
  15. hotglue_singer_sdk/helpers/_simpleeval.py +678 -0
  16. hotglue_singer_sdk/helpers/_singer.py +280 -0
  17. hotglue_singer_sdk/helpers/_state.py +282 -0
  18. hotglue_singer_sdk/helpers/_typing.py +231 -0
  19. hotglue_singer_sdk/helpers/_util.py +27 -0
  20. hotglue_singer_sdk/helpers/capabilities.py +240 -0
  21. hotglue_singer_sdk/helpers/jsonpath.py +39 -0
  22. hotglue_singer_sdk/io_base.py +134 -0
  23. hotglue_singer_sdk/mapper.py +691 -0
  24. hotglue_singer_sdk/mapper_base.py +156 -0
  25. hotglue_singer_sdk/plugin_base.py +415 -0
  26. hotglue_singer_sdk/py.typed +0 -0
  27. hotglue_singer_sdk/sinks/__init__.py +14 -0
  28. hotglue_singer_sdk/sinks/batch.py +90 -0
  29. hotglue_singer_sdk/sinks/core.py +412 -0
  30. hotglue_singer_sdk/sinks/record.py +66 -0
  31. hotglue_singer_sdk/sinks/sql.py +299 -0
  32. hotglue_singer_sdk/streams/__init__.py +14 -0
  33. hotglue_singer_sdk/streams/core.py +1294 -0
  34. hotglue_singer_sdk/streams/graphql.py +74 -0
  35. hotglue_singer_sdk/streams/rest.py +611 -0
  36. hotglue_singer_sdk/streams/sql.py +1023 -0
  37. hotglue_singer_sdk/tap_base.py +580 -0
  38. hotglue_singer_sdk/target_base.py +554 -0
  39. hotglue_singer_sdk/target_sdk/__init__.py +0 -0
  40. hotglue_singer_sdk/target_sdk/auth.py +124 -0
  41. hotglue_singer_sdk/target_sdk/client.py +286 -0
  42. hotglue_singer_sdk/target_sdk/common.py +13 -0
  43. hotglue_singer_sdk/target_sdk/lambda.py +121 -0
  44. hotglue_singer_sdk/target_sdk/rest.py +108 -0
  45. hotglue_singer_sdk/target_sdk/sinks.py +16 -0
  46. hotglue_singer_sdk/target_sdk/target.py +570 -0
  47. hotglue_singer_sdk/target_sdk/target_base.py +627 -0
  48. hotglue_singer_sdk/testing.py +198 -0
  49. hotglue_singer_sdk/typing.py +603 -0
  50. hotglue_singer_sdk-1.0.2.dist-info/METADATA +53 -0
  51. hotglue_singer_sdk-1.0.2.dist-info/RECORD +53 -0
  52. hotglue_singer_sdk-1.0.2.dist-info/WHEEL +4 -0
  53. hotglue_singer_sdk-1.0.2.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,231 @@
1
+ """General helper functions for json typing."""
2
+
3
+ import copy
4
+ import datetime
5
+ import logging
6
+ from enum import Enum
7
+ from functools import lru_cache
8
+ from typing import Any, Dict, List, Optional, Tuple, cast
9
+
10
+ import pendulum
11
+
12
+ _MAX_TIMESTAMP = "9999-12-31 23:59:59.999999"
13
+ _MAX_TIME = "23:59:59.999999"
14
+
15
+
16
+ class DatetimeErrorTreatmentEnum(Enum):
17
+ """Enum for treatment options for date parsing error."""
18
+
19
+ ERROR = "error"
20
+ MAX = "max"
21
+ NULL = "null"
22
+
23
+
24
+ def to_json_compatible(val: Any) -> Any:
25
+ """Return as string if datetime. JSON does not support proper datetime types.
26
+
27
+ If given a naive datetime object, pendulum automatically makes it utc
28
+ """
29
+ if isinstance(val, (datetime.datetime, pendulum.DateTime)):
30
+ val = pendulum.instance(val).isoformat()
31
+ return val
32
+
33
+
34
+ def append_type(type_dict: dict, new_type: str) -> dict:
35
+ """Return a combined type definition using the 'anyOf' JSON Schema construct."""
36
+ result = copy.deepcopy(type_dict)
37
+ if "anyOf" in result:
38
+ if isinstance(result["anyOf"], list) and new_type not in result["anyOf"]:
39
+ result["anyOf"].append(new_type)
40
+ elif new_type != result["anyOf"]:
41
+ result["anyOf"] = [result["anyOf"], new_type]
42
+ return result
43
+
44
+ elif "type" in result:
45
+ if isinstance(result["type"], list) and new_type not in result["type"]:
46
+ result["type"].append(new_type)
47
+ elif new_type != result["type"]:
48
+ result["type"] = [result["type"], new_type]
49
+ return result
50
+
51
+ raise ValueError(
52
+ "Could not append type because the JSON schema for the dictionary "
53
+ f"`{type_dict}` appears to be invalid."
54
+ )
55
+
56
+
57
+ def is_object_type(property_schema: dict) -> Optional[bool]:
58
+ """Return true if the JSON Schema type is an object or None if detection fails."""
59
+ if "anyOf" not in property_schema and "type" not in property_schema:
60
+ return None # Could not detect data type
61
+ for property_type in property_schema.get("anyOf", [property_schema.get("type")]):
62
+ if "object" in property_type or property_type == "object":
63
+ return True
64
+ return False
65
+
66
+
67
+ def is_datetime_type(type_dict: dict) -> bool:
68
+ """Return True if JSON Schema type definition is a 'date-time' type.
69
+
70
+ Also returns True if 'date-time' is nested within an 'anyOf' type Array.
71
+ """
72
+ if not type_dict:
73
+ raise ValueError(
74
+ "Could not detect type from empty type_dict. "
75
+ "Did you forget to define a property in the stream schema?"
76
+ )
77
+ if "anyOf" in type_dict:
78
+ for type_dict in type_dict["anyOf"]:
79
+ if is_datetime_type(type_dict):
80
+ return True
81
+ return False
82
+ elif "type" in type_dict:
83
+ return type_dict.get("format") == "date-time"
84
+ raise ValueError(
85
+ f"Could not detect type of replication key using schema '{type_dict}'"
86
+ )
87
+
88
+
89
+ def get_datelike_property_type(property_schema: Dict) -> Optional[str]:
90
+ """Return one of 'date-time', 'time', or 'date' if property is date-like.
91
+
92
+ Otherwise return None.
93
+ """
94
+ if _is_string_with_format(property_schema):
95
+ return cast(str, property_schema["format"])
96
+ elif "anyOf" in property_schema:
97
+ for type_dict in property_schema["anyOf"]:
98
+ if _is_string_with_format(type_dict):
99
+ return cast(str, type_dict["format"])
100
+ return None
101
+
102
+
103
+ def _is_string_with_format(type_dict):
104
+ if "string" in type_dict.get("type", []) and type_dict.get("format") in {
105
+ "date-time",
106
+ "time",
107
+ "date",
108
+ }:
109
+ return True
110
+
111
+
112
+ def handle_invalid_timestamp_in_record(
113
+ record,
114
+ key_breadcrumb: List[str],
115
+ invalid_value: str,
116
+ datelike_typename: str,
117
+ ex: Exception,
118
+ treatment: Optional[DatetimeErrorTreatmentEnum],
119
+ logger: logging.Logger,
120
+ ) -> Any:
121
+ """Apply treatment or raise an error for invalid time values."""
122
+ treatment = treatment or DatetimeErrorTreatmentEnum.ERROR
123
+ msg = (
124
+ f"Could not parse value '{invalid_value}' for "
125
+ f"field '{':'.join(key_breadcrumb)}'."
126
+ )
127
+ if treatment == DatetimeErrorTreatmentEnum.MAX:
128
+ logger.warning(f"{msg}. Replacing with MAX value.\n{ex}\n")
129
+ return _MAX_TIMESTAMP if datelike_typename != "time" else _MAX_TIME
130
+
131
+ if treatment == DatetimeErrorTreatmentEnum.NULL:
132
+ logger.warning(f"{msg}. Replacing with NULL.\n{ex}\n")
133
+ return None
134
+
135
+ raise ValueError(msg)
136
+
137
+
138
+ def is_string_array_type(type_dict: dict) -> bool:
139
+ """Return True if JSON Schema type definition is a string array."""
140
+ if not type_dict:
141
+ raise ValueError(
142
+ "Could not detect type from empty type_dict. "
143
+ "Did you forget to define a property in the stream schema?"
144
+ )
145
+
146
+ if "anyOf" in type_dict:
147
+ return any([is_string_array_type(t) for t in type_dict["anyOf"]])
148
+
149
+ if "type" not in type_dict:
150
+ raise ValueError(f"Could not detect type from schema '{type_dict}'")
151
+
152
+ return "array" in type_dict["type"] and bool(is_string_type(type_dict["items"]))
153
+
154
+
155
+ def is_boolean_type(property_schema: dict) -> Optional[bool]:
156
+ """Return true if the JSON Schema type is a boolean or None if detection fails."""
157
+ if "anyOf" not in property_schema and "type" not in property_schema:
158
+ return None # Could not detect data type
159
+ for property_type in property_schema.get("anyOf", [property_schema.get("type")]):
160
+ if "boolean" in property_type or property_type == "boolean":
161
+ return True
162
+ return False
163
+
164
+
165
+ def is_string_type(property_schema: dict) -> Optional[bool]:
166
+ """Return true if the JSON Schema type is a boolean or None if detection fails."""
167
+ if "anyOf" not in property_schema and "type" not in property_schema:
168
+ return None # Could not detect data type
169
+ for property_type in property_schema.get("anyOf", [property_schema.get("type")]):
170
+ if "string" in property_type or property_type == "string":
171
+ return True
172
+ return False
173
+
174
+
175
+ @lru_cache()
176
+ def _warn_unmapped_properties(
177
+ stream_name: str, property_names: Tuple[str], logger: logging.Logger
178
+ ):
179
+ logger.info(
180
+ f"Properties {property_names} were present in the '{stream_name}' stream but "
181
+ "not found in catalog schema. Ignoring."
182
+ )
183
+
184
+
185
+ def conform_record_data_types( # noqa: C901
186
+ stream_name: str, row: Dict[str, Any], schema: dict, logger: logging.Logger
187
+ ) -> Dict[str, Any]:
188
+ """Translate values in record dictionary to singer-compatible data types.
189
+
190
+ Any property names not found in the schema catalog will be removed, and a
191
+ warning will be logged exactly once per unmapped property name.
192
+ """
193
+ rec: Dict[str, Any] = {}
194
+ unmapped_properties: List[str] = []
195
+ for property_name, elem in row.items():
196
+ if property_name not in schema["properties"]:
197
+ unmapped_properties.append(property_name)
198
+ continue
199
+
200
+ property_schema = schema["properties"][property_name]
201
+ if isinstance(elem, (datetime.datetime, pendulum.DateTime)):
202
+ rec[property_name] = to_json_compatible(elem)
203
+ elif isinstance(elem, datetime.date):
204
+ rec[property_name] = elem.isoformat() + "T00:00:00+00:00"
205
+ elif isinstance(elem, datetime.timedelta):
206
+ epoch = datetime.datetime.utcfromtimestamp(0)
207
+ timedelta_from_epoch = epoch + elem
208
+ rec[property_name] = timedelta_from_epoch.isoformat() + "+00:00"
209
+ elif isinstance(elem, datetime.time):
210
+ rec[property_name] = str(elem)
211
+ elif isinstance(elem, bytes):
212
+ # for BIT value, treat 0 as False and anything else as True
213
+ bit_representation: bool
214
+ if is_boolean_type(property_schema):
215
+ bit_representation = elem != b"\x00"
216
+ rec[property_name] = bit_representation
217
+ else:
218
+ rec[property_name] = elem.hex()
219
+ elif is_boolean_type(property_schema):
220
+ boolean_representation: Optional[bool]
221
+ if elem is None:
222
+ boolean_representation = None
223
+ elif elem == 0:
224
+ boolean_representation = False
225
+ else:
226
+ boolean_representation = True
227
+ rec[property_name] = boolean_representation
228
+ else:
229
+ rec[property_name] = elem
230
+ _warn_unmapped_properties(stream_name, tuple(unmapped_properties), logger)
231
+ return rec
@@ -0,0 +1,27 @@
1
+ """General helper functions, helper classes, and decorators."""
2
+
3
+ import json
4
+ from pathlib import Path, PurePath
5
+ from typing import Any, Dict, Union, cast
6
+
7
+ import pendulum
8
+
9
+
10
+ def read_json_file(path: Union[PurePath, str]) -> Dict[str, Any]:
11
+ """Read json file, thowing an error if missing."""
12
+ if not path:
13
+ raise RuntimeError("Could not open file. Filepath not provided.")
14
+
15
+ if not Path(path).exists():
16
+ msg = f"File at '{path}' was not found."
17
+ for template in [f"{path}.template"]:
18
+ if Path(template).exists():
19
+ msg += f"\nFor more info, please see the sample template at: {template}"
20
+ raise FileExistsError(msg)
21
+
22
+ return cast(dict, json.loads(Path(path).read_text()))
23
+
24
+
25
+ def utc_now() -> pendulum.DateTime:
26
+ """Return current time in UTC."""
27
+ return pendulum.now(tz="UTC")
@@ -0,0 +1,240 @@
1
+ """Module with helpers to declare capabilities and plugin behavior."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from enum import Enum, EnumMeta
6
+ from typing import Any, TypeVar
7
+ from warnings import warn
8
+
9
+ from hotglue_singer_sdk.typing import (
10
+ BooleanType,
11
+ IntegerType,
12
+ ObjectType,
13
+ PropertiesList,
14
+ Property,
15
+ )
16
+
17
+ _EnumMemberT = TypeVar("_EnumMemberT")
18
+
19
+ # Default JSON Schema to support config for built-in capabilities:
20
+
21
+ STREAM_MAPS_CONFIG = PropertiesList(
22
+ Property(
23
+ "stream_maps",
24
+ ObjectType(),
25
+ description="Config object for stream maps capability. "
26
+ + "For more information check out "
27
+ + "[Stream Maps](https://sdk.meltano.com/en/latest/stream_maps.html).",
28
+ ),
29
+ Property(
30
+ "stream_map_config",
31
+ ObjectType(),
32
+ description="User-defined config values to be used within map expressions.",
33
+ ),
34
+ ).to_dict()
35
+ FLATTENING_CONFIG = PropertiesList(
36
+ Property(
37
+ "flattening_enabled",
38
+ BooleanType(),
39
+ description=(
40
+ "'True' to enable schema flattening and automatically expand nested "
41
+ "properties."
42
+ ),
43
+ ),
44
+ Property(
45
+ "flattening_max_depth",
46
+ IntegerType(),
47
+ description="The max depth to flatten schemas.",
48
+ ),
49
+ ).to_dict()
50
+
51
+
52
+ class DeprecatedEnum(Enum):
53
+ """Base class for capabilities enumeration."""
54
+
55
+ def __new__(
56
+ cls,
57
+ value: _EnumMemberT,
58
+ deprecation: str | None = None,
59
+ ) -> DeprecatedEnum:
60
+ """Create a new enum member.
61
+
62
+ Args:
63
+ value: Enum member value.
64
+ deprecation: Deprecation message.
65
+
66
+ Returns:
67
+ An enum member value.
68
+ """
69
+ member: DeprecatedEnum = object.__new__(cls)
70
+ member._value_ = value
71
+ member._deprecation = deprecation
72
+ return member
73
+
74
+ @property
75
+ def deprecation_message(self) -> str | None:
76
+ """Get deprecation message.
77
+
78
+ Returns:
79
+ Deprecation message.
80
+ """
81
+ self._deprecation: str | None
82
+ return self._deprecation
83
+
84
+ def emit_warning(self) -> None:
85
+ """Emit deprecation warning."""
86
+ warn(
87
+ f"{self.name} is deprecated. {self.deprecation_message}",
88
+ DeprecationWarning,
89
+ stacklevel=3,
90
+ )
91
+
92
+
93
+ class DeprecatedEnumMeta(EnumMeta):
94
+ """Metaclass for enumeration with deprecation support."""
95
+
96
+ def __getitem__(self, name: str) -> Any: # noqa: ANN401
97
+ """Retrieve mapping item.
98
+
99
+ Args:
100
+ name: Item name.
101
+
102
+ Returns:
103
+ Enum member.
104
+ """
105
+ obj: Enum = super().__getitem__(name)
106
+ if isinstance(obj, DeprecatedEnum) and obj.deprecation_message:
107
+ obj.emit_warning()
108
+ return obj
109
+
110
+ def __getattribute__(cls, name: str) -> Any: # noqa: ANN401
111
+ """Retrieve enum attribute.
112
+
113
+ Args:
114
+ name: Attribute name.
115
+
116
+ Returns:
117
+ Attribute.
118
+ """
119
+ obj = super().__getattribute__(name)
120
+ if isinstance(obj, DeprecatedEnum) and obj.deprecation_message:
121
+ obj.emit_warning()
122
+ return obj
123
+
124
+ def __call__(self, *args: Any, **kwargs: Any) -> Any: # noqa: ANN401
125
+ """Call enum member.
126
+
127
+ Args:
128
+ args: Positional arguments.
129
+ kwargs: Keyword arguments.
130
+
131
+ Returns:
132
+ Enum member.
133
+ """
134
+ obj = super().__call__(*args, **kwargs)
135
+ if isinstance(obj, DeprecatedEnum) and obj.deprecation_message:
136
+ obj.emit_warning()
137
+ return obj
138
+
139
+
140
+ class CapabilitiesEnum(DeprecatedEnum, metaclass=DeprecatedEnumMeta):
141
+ """Base capabilities enumeration."""
142
+
143
+ def __str__(self) -> str:
144
+ """String representation.
145
+
146
+ Returns:
147
+ Stringified enum value.
148
+ """
149
+ return str(self.value)
150
+
151
+ def __repr__(self) -> str:
152
+ """String representation.
153
+
154
+ Returns:
155
+ Stringified enum value.
156
+ """
157
+ return str(self.value)
158
+
159
+
160
+ class PluginCapabilities(CapabilitiesEnum):
161
+ """Core capabilities which can be supported by taps and targets."""
162
+
163
+ #: Support plugin capability and setting discovery.
164
+ ABOUT = "about"
165
+
166
+ #: Support :doc:`inline stream map transforms</stream_maps>`.
167
+ STREAM_MAPS = "stream-maps"
168
+
169
+ #: Support schema flattening, aka denesting of complex properties.
170
+ FLATTENING = "schema-flattening"
171
+
172
+ #: Support the
173
+ #: `ACTIVATE_VERSION <https://hub.meltano.com/singer/docs#activate-version>`_
174
+ #: extension.
175
+ ACTIVATE_VERSION = "activate-version"
176
+
177
+ #: Input and output from
178
+ #: `batched files <https://hub.meltano.com/singer/docs#batch>`_.
179
+ #: A.K.A ``FAST_SYNC``.
180
+ BATCH = "batch"
181
+
182
+ # Supports raising hotglue exception classes
183
+ HOTGLUE_EXCEPTIONS_CLASSES = "hotglue-exceptions-classes"
184
+
185
+
186
+ class TapCapabilities(CapabilitiesEnum):
187
+ """Tap-specific capabilities."""
188
+
189
+ #: Generate a catalog with `--discover`.
190
+ DISCOVER = "discover"
191
+
192
+ #: Accept input catalog, apply metadata and selection rules.
193
+ CATALOG = "catalog"
194
+
195
+ #: Incremental refresh by means of state tracking.
196
+ STATE = "state"
197
+
198
+ #: Automatic connectivity and stream init test via :ref:`--test<Test connectivity>`.
199
+ TEST = "test"
200
+
201
+ #: Support for ``replication_method: LOG_BASED``. You can read more about this
202
+ #: feature in `MeltanoHub <https://hub.meltano.com/singer/docs#log-based>`_.
203
+ LOG_BASED = "log-based"
204
+
205
+ #: Deprecated. Please use :attr:`~TapCapabilities.CATALOG` instead.
206
+ PROPERTIES = "properties", "Please use CATALOG instead."
207
+
208
+
209
+ class TargetCapabilities(CapabilitiesEnum):
210
+ """Target-specific capabilities."""
211
+
212
+ #: Allows a ``soft_delete=True`` config option.
213
+ #: Requires a tap stream supporting :attr:`PluginCapabilities.ACTIVATE_VERSION`
214
+ #: and/or :attr:`TapCapabilities.LOG_BASED`.
215
+ SOFT_DELETE = "soft-delete"
216
+
217
+ #: Allows a ``hard_delete=True`` config option.
218
+ #: Requires a tap stream supporting :attr:`PluginCapabilities.ACTIVATE_VERSION`
219
+ #: and/or :attr:`TapCapabilities.LOG_BASED`.
220
+ HARD_DELETE = "hard-delete"
221
+
222
+ #: Fail safe for unknown JSON Schema types.
223
+ DATATYPE_FAILSAFE = "datatype-failsafe"
224
+
225
+ #: Allow denesting complex properties.
226
+ RECORD_FLATTENING = "record-flattening"
227
+
228
+ #: Allow setting the target schema.
229
+ TARGET_SCHEMA = "target-schema"
230
+
231
+ class AlertingLevel(Enum):
232
+ """
233
+ The Alerting level to be used when this connector fails on an unexpected error.
234
+
235
+ This can be used to prevent too much noise from this connector's failures.
236
+ """
237
+
238
+ WARNING = "warning"
239
+ ERROR = "error"
240
+ NONE = "none"
@@ -0,0 +1,39 @@
1
+ """JSONPath helpers."""
2
+
3
+ from typing import Any, Generator, Union
4
+
5
+ import jsonpath_ng
6
+ import memoization
7
+ from jsonpath_ng.ext import parse
8
+
9
+
10
+ def extract_jsonpath(
11
+ expression: str, input: Union[dict, list]
12
+ ) -> Generator[Any, None, None]:
13
+ """Extract records from an input based on a JSONPath expression.
14
+
15
+ Args:
16
+ expression: JSONPath expression to match against the input.
17
+ input: JSON object or array to extract records from.
18
+
19
+ Yields:
20
+ Records matched with JSONPath expression.
21
+ """
22
+ compiled_jsonpath = _compile_jsonpath(expression)
23
+
24
+ match: jsonpath_ng.DatumInContext
25
+ for match in compiled_jsonpath.find(input):
26
+ yield match.value
27
+
28
+
29
+ @memoization.cached
30
+ def _compile_jsonpath(expression: str) -> jsonpath_ng.JSONPath:
31
+ """Parse a JSONPath expression and cache the result.
32
+
33
+ Args:
34
+ expression: A string representing a JSONPath expression.
35
+
36
+ Returns:
37
+ A compiled JSONPath object.
38
+ """
39
+ return parse(expression)
@@ -0,0 +1,134 @@
1
+ """Abstract base classes for all Singer messages IO operations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import abc
6
+ import enum
7
+ import json
8
+ import logging
9
+ import sys
10
+ from collections import Counter, defaultdict
11
+ from typing import IO
12
+ from typing import Counter as CounterType
13
+
14
+ from hotglue_singer_sdk.helpers._compat import final
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class SingerMessageType(str, enum.Enum):
20
+ """Singer specification message types."""
21
+
22
+ RECORD = "RECORD"
23
+ SCHEMA = "SCHEMA"
24
+ STATE = "STATE"
25
+ ACTIVATE_VERSION = "ACTIVATE_VERSION"
26
+
27
+
28
+ class SingerReader(metaclass=abc.ABCMeta):
29
+ """Interface for all plugins reading Singer messages from stdin."""
30
+
31
+ @final
32
+ def listen(self, file_input: IO[str] | None = None) -> None:
33
+ """Read from input until all messages are processed.
34
+
35
+ Args:
36
+ file_input: Readable stream of messages. Defaults to standard in.
37
+
38
+ This method is internal to the SDK and should not need to be overridden.
39
+ """
40
+ if not file_input:
41
+ file_input = sys.stdin
42
+
43
+ self._process_lines(file_input)
44
+ self._process_endofpipe()
45
+
46
+ @staticmethod
47
+ def _assert_line_requires(line_dict: dict, requires: set[str]) -> None:
48
+ """Check if dictionary .
49
+
50
+ Args:
51
+ line_dict: TODO
52
+ requires: TODO
53
+
54
+ Raises:
55
+ Exception: TODO
56
+ """
57
+ if not requires.issubset(line_dict):
58
+ missing = requires - set(line_dict)
59
+ raise Exception(
60
+ f"Line is missing required {', '.join(missing)} key(s): {line_dict}"
61
+ )
62
+
63
+ def _process_lines(self, file_input: IO[str]) -> CounterType[str]:
64
+ """Internal method to process jsonl lines from a Singer tap.
65
+
66
+ Args:
67
+ file_input: Readable stream of messages, each on a separate line.
68
+
69
+ Returns:
70
+ A counter object for the processed lines.
71
+
72
+ Raises:
73
+ json.decoder.JSONDecodeError: raised if any lines are not valid json
74
+ """
75
+ stats: dict[str, int] = defaultdict(int)
76
+ for line in file_input:
77
+ try:
78
+ line_dict = json.loads(line)
79
+ except json.decoder.JSONDecodeError as exc:
80
+ logger.error("Unable to parse:\n%s", line, exc_info=exc)
81
+ raise
82
+
83
+ self._assert_line_requires(line_dict, requires={"type"})
84
+
85
+ record_type: SingerMessageType = line_dict["type"]
86
+ if record_type == SingerMessageType.SCHEMA:
87
+ self._process_schema_message(line_dict)
88
+
89
+ elif record_type == SingerMessageType.RECORD:
90
+ self._process_record_message(line_dict)
91
+
92
+ elif record_type == SingerMessageType.ACTIVATE_VERSION:
93
+ self._process_activate_version_message(line_dict)
94
+
95
+ elif record_type == SingerMessageType.STATE:
96
+ self._process_state_message(line_dict)
97
+
98
+ else:
99
+ self._process_unknown_message(line_dict)
100
+
101
+ stats[record_type] += 1
102
+
103
+ return Counter(**stats)
104
+
105
+ @abc.abstractmethod
106
+ def _process_schema_message(self, message_dict: dict) -> None:
107
+ ...
108
+
109
+ @abc.abstractmethod
110
+ def _process_record_message(self, message_dict: dict) -> None:
111
+ ...
112
+
113
+ @abc.abstractmethod
114
+ def _process_state_message(self, message_dict: dict) -> None:
115
+ ...
116
+
117
+ @abc.abstractmethod
118
+ def _process_activate_version_message(self, message_dict: dict) -> None:
119
+ ...
120
+
121
+ def _process_unknown_message(self, message_dict: dict) -> None:
122
+ """Internal method to process unknown message types from a Singer tap.
123
+
124
+ Args:
125
+ message_dict: Dictionary representation of the Singer message.
126
+
127
+ Raises:
128
+ ValueError: raised if a message type is not recognized
129
+ """
130
+ record_type = message_dict["type"]
131
+ raise ValueError(f"Unknown message type '{record_type}' in message.")
132
+
133
+ def _process_endofpipe(self) -> None:
134
+ pass