hotglue-singer-sdk 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hotglue_singer_sdk/__init__.py +34 -0
- hotglue_singer_sdk/authenticators.py +554 -0
- hotglue_singer_sdk/cli/__init__.py +1 -0
- hotglue_singer_sdk/cli/common_options.py +37 -0
- hotglue_singer_sdk/configuration/__init__.py +1 -0
- hotglue_singer_sdk/configuration/_dict_config.py +101 -0
- hotglue_singer_sdk/exceptions.py +52 -0
- hotglue_singer_sdk/helpers/__init__.py +1 -0
- hotglue_singer_sdk/helpers/_catalog.py +122 -0
- hotglue_singer_sdk/helpers/_classproperty.py +18 -0
- hotglue_singer_sdk/helpers/_compat.py +15 -0
- hotglue_singer_sdk/helpers/_flattening.py +374 -0
- hotglue_singer_sdk/helpers/_schema.py +100 -0
- hotglue_singer_sdk/helpers/_secrets.py +41 -0
- hotglue_singer_sdk/helpers/_simpleeval.py +678 -0
- hotglue_singer_sdk/helpers/_singer.py +280 -0
- hotglue_singer_sdk/helpers/_state.py +282 -0
- hotglue_singer_sdk/helpers/_typing.py +231 -0
- hotglue_singer_sdk/helpers/_util.py +27 -0
- hotglue_singer_sdk/helpers/capabilities.py +240 -0
- hotglue_singer_sdk/helpers/jsonpath.py +39 -0
- hotglue_singer_sdk/io_base.py +134 -0
- hotglue_singer_sdk/mapper.py +691 -0
- hotglue_singer_sdk/mapper_base.py +156 -0
- hotglue_singer_sdk/plugin_base.py +415 -0
- hotglue_singer_sdk/py.typed +0 -0
- hotglue_singer_sdk/sinks/__init__.py +14 -0
- hotglue_singer_sdk/sinks/batch.py +90 -0
- hotglue_singer_sdk/sinks/core.py +412 -0
- hotglue_singer_sdk/sinks/record.py +66 -0
- hotglue_singer_sdk/sinks/sql.py +299 -0
- hotglue_singer_sdk/streams/__init__.py +14 -0
- hotglue_singer_sdk/streams/core.py +1294 -0
- hotglue_singer_sdk/streams/graphql.py +74 -0
- hotglue_singer_sdk/streams/rest.py +611 -0
- hotglue_singer_sdk/streams/sql.py +1023 -0
- hotglue_singer_sdk/tap_base.py +580 -0
- hotglue_singer_sdk/target_base.py +554 -0
- hotglue_singer_sdk/target_sdk/__init__.py +0 -0
- hotglue_singer_sdk/target_sdk/auth.py +124 -0
- hotglue_singer_sdk/target_sdk/client.py +286 -0
- hotglue_singer_sdk/target_sdk/common.py +13 -0
- hotglue_singer_sdk/target_sdk/lambda.py +121 -0
- hotglue_singer_sdk/target_sdk/rest.py +108 -0
- hotglue_singer_sdk/target_sdk/sinks.py +16 -0
- hotglue_singer_sdk/target_sdk/target.py +570 -0
- hotglue_singer_sdk/target_sdk/target_base.py +627 -0
- hotglue_singer_sdk/testing.py +198 -0
- hotglue_singer_sdk/typing.py +603 -0
- hotglue_singer_sdk-1.0.2.dist-info/METADATA +53 -0
- hotglue_singer_sdk-1.0.2.dist-info/RECORD +53 -0
- hotglue_singer_sdk-1.0.2.dist-info/WHEEL +4 -0
- hotglue_singer_sdk-1.0.2.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,280 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from dataclasses import dataclass, fields
|
|
3
|
+
from enum import Enum
|
|
4
|
+
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union, cast
|
|
5
|
+
|
|
6
|
+
from singer.catalog import Catalog as BaseCatalog
|
|
7
|
+
from singer.catalog import CatalogEntry as BaseCatalogEntry
|
|
8
|
+
|
|
9
|
+
from hotglue_singer_sdk.helpers._schema import SchemaPlus
|
|
10
|
+
|
|
11
|
+
Breadcrumb = Tuple[str, ...]
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class SelectionMask(Dict[Breadcrumb, bool]):
|
|
17
|
+
"""Boolean mask for property selection in schemas and records."""
|
|
18
|
+
|
|
19
|
+
def __missing__(self, breadcrumb: Breadcrumb) -> bool:
|
|
20
|
+
"""Handle missing breadcrumbs.
|
|
21
|
+
|
|
22
|
+
- Properties default to parent value if available.
|
|
23
|
+
- Root (stream) defaults to True.
|
|
24
|
+
"""
|
|
25
|
+
if len(breadcrumb) >= 2:
|
|
26
|
+
parent = breadcrumb[:-2]
|
|
27
|
+
return self[parent]
|
|
28
|
+
else:
|
|
29
|
+
return True
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class Metadata:
|
|
34
|
+
"""Base stream or property metadata."""
|
|
35
|
+
|
|
36
|
+
class InclusionType(str, Enum):
|
|
37
|
+
"""Catalog inclusion types."""
|
|
38
|
+
|
|
39
|
+
AVAILABLE = "available"
|
|
40
|
+
AUTOMATIC = "automatic"
|
|
41
|
+
UNSUPPORTED = "unsupported"
|
|
42
|
+
|
|
43
|
+
inclusion: Optional[InclusionType] = None
|
|
44
|
+
selected: Optional[bool] = None
|
|
45
|
+
selected_by_default: Optional[bool] = None
|
|
46
|
+
|
|
47
|
+
@classmethod
|
|
48
|
+
def from_dict(cls, value: Dict[str, Any]):
|
|
49
|
+
"""Parse metadata dictionary."""
|
|
50
|
+
return cls(
|
|
51
|
+
**{
|
|
52
|
+
field.name: value.get(field.name.replace("_", "-"))
|
|
53
|
+
for field in fields(cls)
|
|
54
|
+
}
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
58
|
+
"""Convert metadata to a JSON-encodeable dictionary."""
|
|
59
|
+
result = {}
|
|
60
|
+
|
|
61
|
+
for field in fields(self):
|
|
62
|
+
value = getattr(self, field.name)
|
|
63
|
+
if value is not None:
|
|
64
|
+
result[field.name.replace("_", "-")] = value
|
|
65
|
+
|
|
66
|
+
return result
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@dataclass
|
|
70
|
+
class StreamMetadata(Metadata):
|
|
71
|
+
"""Stream metadata."""
|
|
72
|
+
|
|
73
|
+
table_key_properties: Optional[List[str]] = None
|
|
74
|
+
forced_replication_method: Optional[str] = None
|
|
75
|
+
valid_replication_keys: Optional[List[str]] = None
|
|
76
|
+
schema_name: Optional[str] = None
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class MetadataMapping(Dict[Breadcrumb, Union[Metadata, StreamMetadata]]):
|
|
80
|
+
"""Stream metadata mapping."""
|
|
81
|
+
|
|
82
|
+
@classmethod
|
|
83
|
+
def from_iterable(cls, iterable: Iterable[Dict[str, Any]]):
|
|
84
|
+
"""Create a metadata mapping from an iterable of metadata dictionaries."""
|
|
85
|
+
mapping = cls()
|
|
86
|
+
for d in iterable:
|
|
87
|
+
breadcrumb = tuple(d["breadcrumb"])
|
|
88
|
+
metadata = d["metadata"]
|
|
89
|
+
if breadcrumb:
|
|
90
|
+
mapping[breadcrumb] = Metadata.from_dict(metadata)
|
|
91
|
+
else:
|
|
92
|
+
mapping[breadcrumb] = StreamMetadata.from_dict(metadata)
|
|
93
|
+
|
|
94
|
+
return mapping
|
|
95
|
+
|
|
96
|
+
def to_list(self) -> List[Dict[str, Any]]:
|
|
97
|
+
"""Convert mapping to a JSON-encodable list."""
|
|
98
|
+
return [
|
|
99
|
+
{"breadcrumb": list(k), "metadata": v.to_dict()} for k, v in self.items()
|
|
100
|
+
]
|
|
101
|
+
|
|
102
|
+
def __missing__(self, breadcrumb: Breadcrumb):
|
|
103
|
+
"""Handle missing metadata entries."""
|
|
104
|
+
self[breadcrumb] = Metadata() if breadcrumb else StreamMetadata()
|
|
105
|
+
return self[breadcrumb]
|
|
106
|
+
|
|
107
|
+
@property
|
|
108
|
+
def root(self):
|
|
109
|
+
"""Get stream (root) metadata from this mapping."""
|
|
110
|
+
meta: StreamMetadata = self[()]
|
|
111
|
+
return meta
|
|
112
|
+
|
|
113
|
+
@classmethod
|
|
114
|
+
def get_standard_metadata(
|
|
115
|
+
cls,
|
|
116
|
+
schema: Optional[Dict[str, Any]] = None,
|
|
117
|
+
schema_name: Optional[str] = None,
|
|
118
|
+
key_properties: Optional[List[str]] = None,
|
|
119
|
+
valid_replication_keys: Optional[List[str]] = None,
|
|
120
|
+
replication_method: Optional[str] = None,
|
|
121
|
+
):
|
|
122
|
+
"""Get default metadata for a stream."""
|
|
123
|
+
mapping = cls()
|
|
124
|
+
root = StreamMetadata(
|
|
125
|
+
table_key_properties=key_properties,
|
|
126
|
+
forced_replication_method=replication_method,
|
|
127
|
+
valid_replication_keys=valid_replication_keys,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
if schema:
|
|
131
|
+
root.inclusion = Metadata.InclusionType.AVAILABLE
|
|
132
|
+
|
|
133
|
+
if schema_name:
|
|
134
|
+
root.schema_name = schema_name
|
|
135
|
+
|
|
136
|
+
for field_name in schema.get("properties", {}).keys():
|
|
137
|
+
if key_properties and field_name in key_properties:
|
|
138
|
+
entry = Metadata(inclusion=Metadata.InclusionType.AUTOMATIC)
|
|
139
|
+
else:
|
|
140
|
+
entry = Metadata(inclusion=Metadata.InclusionType.AVAILABLE)
|
|
141
|
+
|
|
142
|
+
mapping[("properties", field_name)] = entry
|
|
143
|
+
|
|
144
|
+
mapping[()] = root
|
|
145
|
+
|
|
146
|
+
return mapping
|
|
147
|
+
|
|
148
|
+
def resolve_selection(self) -> SelectionMask:
|
|
149
|
+
"""Resolve selection for metadata breadcrumbs and store them in a mapping."""
|
|
150
|
+
return SelectionMask(
|
|
151
|
+
(breadcrumb, self._breadcrumb_is_selected(breadcrumb))
|
|
152
|
+
for breadcrumb in self
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
def _breadcrumb_is_selected(self, breadcrumb: Breadcrumb) -> bool:
|
|
156
|
+
"""Determine if a property breadcrumb is selected based on existing metadata.
|
|
157
|
+
|
|
158
|
+
An empty breadcrumb (empty tuple) indicates the stream itself. Otherwise, the
|
|
159
|
+
breadcrumb is the path to a property within the stream.
|
|
160
|
+
"""
|
|
161
|
+
if not self:
|
|
162
|
+
# Default to true if no metadata to say otherwise
|
|
163
|
+
return True
|
|
164
|
+
|
|
165
|
+
md_entry = self.get(breadcrumb, Metadata())
|
|
166
|
+
parent_value = None
|
|
167
|
+
|
|
168
|
+
if len(breadcrumb) > 0:
|
|
169
|
+
parent_breadcrumb = breadcrumb[:-2]
|
|
170
|
+
parent_value = self._breadcrumb_is_selected(parent_breadcrumb)
|
|
171
|
+
|
|
172
|
+
if parent_value is False:
|
|
173
|
+
return parent_value
|
|
174
|
+
|
|
175
|
+
if md_entry.inclusion == Metadata.InclusionType.UNSUPPORTED:
|
|
176
|
+
if md_entry.selected is True:
|
|
177
|
+
logger.debug(
|
|
178
|
+
"Property '%s' was selected but is not supported. "
|
|
179
|
+
"Ignoring selected==True input.",
|
|
180
|
+
":".join(breadcrumb),
|
|
181
|
+
)
|
|
182
|
+
return False
|
|
183
|
+
|
|
184
|
+
if md_entry.inclusion == Metadata.InclusionType.AUTOMATIC:
|
|
185
|
+
if md_entry.selected is False:
|
|
186
|
+
logger.debug(
|
|
187
|
+
"Property '%s' was deselected while also set "
|
|
188
|
+
"for automatic inclusion. Ignoring selected==False input.",
|
|
189
|
+
":".join(breadcrumb),
|
|
190
|
+
)
|
|
191
|
+
return True
|
|
192
|
+
|
|
193
|
+
if md_entry.selected is not None:
|
|
194
|
+
return md_entry.selected
|
|
195
|
+
|
|
196
|
+
if md_entry.selected_by_default is not None:
|
|
197
|
+
return md_entry.selected_by_default
|
|
198
|
+
|
|
199
|
+
logger.debug(
|
|
200
|
+
"Selection metadata omitted for '%s'. "
|
|
201
|
+
"Using parent value of selected=%s.",
|
|
202
|
+
breadcrumb,
|
|
203
|
+
parent_value,
|
|
204
|
+
)
|
|
205
|
+
return parent_value or False
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
@dataclass
|
|
209
|
+
class CatalogEntry(BaseCatalogEntry):
|
|
210
|
+
"""Singer catalog entry."""
|
|
211
|
+
|
|
212
|
+
tap_stream_id: str
|
|
213
|
+
metadata: MetadataMapping
|
|
214
|
+
schema: SchemaPlus
|
|
215
|
+
stream: Optional[str] = None
|
|
216
|
+
key_properties: Optional[List[str]] = None
|
|
217
|
+
replication_key: Optional[str] = None
|
|
218
|
+
is_view: Optional[bool] = None
|
|
219
|
+
database: Optional[str] = None
|
|
220
|
+
table: Optional[str] = None
|
|
221
|
+
row_count: Optional[int] = None
|
|
222
|
+
stream_alias: Optional[str] = None
|
|
223
|
+
replication_method: Optional[str] = None
|
|
224
|
+
|
|
225
|
+
@classmethod
|
|
226
|
+
def from_dict(cls, stream: Dict[str, Any]):
|
|
227
|
+
"""Create a catalog entry from a dictionary."""
|
|
228
|
+
return cls(
|
|
229
|
+
tap_stream_id=stream["tap_stream_id"],
|
|
230
|
+
stream=stream.get("stream"),
|
|
231
|
+
replication_key=stream.get("replication_key"),
|
|
232
|
+
key_properties=stream.get("key_properties"),
|
|
233
|
+
database=stream.get("database_name"),
|
|
234
|
+
table=stream.get("table_name"),
|
|
235
|
+
schema=SchemaPlus.from_dict(stream.get("schema", {})),
|
|
236
|
+
is_view=stream.get("is_view"),
|
|
237
|
+
stream_alias=stream.get("stream_alias"),
|
|
238
|
+
metadata=MetadataMapping.from_iterable(stream.get("metadata", [])),
|
|
239
|
+
replication_method=stream.get("replication_method"),
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
def to_dict(self):
|
|
243
|
+
"""Convert entry to a dictionary."""
|
|
244
|
+
d = super().to_dict()
|
|
245
|
+
d["metadata"] = self.metadata.to_list()
|
|
246
|
+
return d
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
class Catalog(Dict[str, CatalogEntry], BaseCatalog):
|
|
250
|
+
"""Singer catalog mapping of stream entries."""
|
|
251
|
+
|
|
252
|
+
@classmethod
|
|
253
|
+
def from_dict(cls, data: Dict[str, List[Dict[str, Any]]]) -> "Catalog":
|
|
254
|
+
"""Create a catalog from a dictionary."""
|
|
255
|
+
instance = cls()
|
|
256
|
+
for stream in data.get("streams", []):
|
|
257
|
+
entry = CatalogEntry.from_dict(stream)
|
|
258
|
+
instance[entry.tap_stream_id] = entry
|
|
259
|
+
return instance
|
|
260
|
+
|
|
261
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
262
|
+
"""Return a dictionary representation of the catalog.
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
A dictionary with the defined catalog streams.
|
|
266
|
+
"""
|
|
267
|
+
return cast(Dict[str, Any], super().to_dict())
|
|
268
|
+
|
|
269
|
+
@property
|
|
270
|
+
def streams(self) -> List[CatalogEntry]:
|
|
271
|
+
"""Get catalog entries."""
|
|
272
|
+
return list(self.values())
|
|
273
|
+
|
|
274
|
+
def add_stream(self, entry: CatalogEntry) -> None:
|
|
275
|
+
"""Add a stream entry to the catalog."""
|
|
276
|
+
self[entry.tap_stream_id] = entry
|
|
277
|
+
|
|
278
|
+
def get_stream(self, stream_id: str) -> Optional[CatalogEntry]:
|
|
279
|
+
"""Retrieve a stream entry from the catalog."""
|
|
280
|
+
return self.get(stream_id)
|
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
"""Helper functions for state and bookmark management."""
|
|
2
|
+
|
|
3
|
+
import datetime
|
|
4
|
+
from typing import Any, Callable, List, Optional, Union, cast
|
|
5
|
+
|
|
6
|
+
from hotglue_singer_sdk.exceptions import InvalidStreamSortException
|
|
7
|
+
from hotglue_singer_sdk.helpers._typing import to_json_compatible
|
|
8
|
+
|
|
9
|
+
PROGRESS_MARKERS = "progress_markers"
|
|
10
|
+
PROGRESS_MARKER_NOTE = "Note"
|
|
11
|
+
SIGNPOST_MARKER = "replication_key_signpost"
|
|
12
|
+
STARTING_MARKER = "starting_replication_value"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def get_state_if_exists(
|
|
16
|
+
tap_state: dict,
|
|
17
|
+
tap_stream_id: str,
|
|
18
|
+
state_partition_context: Optional[dict] = None,
|
|
19
|
+
key: Optional[str] = None,
|
|
20
|
+
) -> Optional[Any]:
|
|
21
|
+
"""Return the stream or partition state, creating a new one if it does not exist.
|
|
22
|
+
|
|
23
|
+
Parameters
|
|
24
|
+
----------
|
|
25
|
+
tap_state : dict
|
|
26
|
+
the existing state dict which contains all streams.
|
|
27
|
+
tap_stream_id : str
|
|
28
|
+
the id of the stream
|
|
29
|
+
state_partition_context : Optional[dict], optional
|
|
30
|
+
keys which identify the partition context, by default None (not partitioned)
|
|
31
|
+
key : Optional[str], optional
|
|
32
|
+
name of the key searched for, by default None (return entire state if found)
|
|
33
|
+
|
|
34
|
+
Returns
|
|
35
|
+
-------
|
|
36
|
+
Optional[Any]
|
|
37
|
+
Returns the state if exists, otherwise None
|
|
38
|
+
|
|
39
|
+
Raises
|
|
40
|
+
------
|
|
41
|
+
ValueError
|
|
42
|
+
Raised if state is invalid or cannot be parsed.
|
|
43
|
+
|
|
44
|
+
"""
|
|
45
|
+
if "bookmarks" not in tap_state:
|
|
46
|
+
return None
|
|
47
|
+
if tap_stream_id not in tap_state["bookmarks"]:
|
|
48
|
+
return None
|
|
49
|
+
|
|
50
|
+
stream_state = tap_state["bookmarks"][tap_stream_id]
|
|
51
|
+
if not state_partition_context:
|
|
52
|
+
if key:
|
|
53
|
+
return stream_state.get(key, None)
|
|
54
|
+
return stream_state
|
|
55
|
+
if "partitions" not in stream_state:
|
|
56
|
+
return None # No partitions defined
|
|
57
|
+
|
|
58
|
+
matched_partition = _find_in_partitions_list(
|
|
59
|
+
stream_state["partitions"], state_partition_context
|
|
60
|
+
)
|
|
61
|
+
if matched_partition is None:
|
|
62
|
+
return None # Partition definition not present
|
|
63
|
+
if key:
|
|
64
|
+
return matched_partition.get(key, None)
|
|
65
|
+
return matched_partition
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def get_state_partitions_list(
|
|
69
|
+
tap_state: dict, tap_stream_id: str
|
|
70
|
+
) -> Optional[List[dict]]:
|
|
71
|
+
"""Return a list of partitions defined in the state, or None if not defined."""
|
|
72
|
+
return (get_state_if_exists(tap_state, tap_stream_id) or {}).get("partitions", None)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _find_in_partitions_list(
|
|
76
|
+
partitions: List[dict], state_partition_context: dict
|
|
77
|
+
) -> Optional[dict]:
|
|
78
|
+
found = [
|
|
79
|
+
partition_state
|
|
80
|
+
for partition_state in partitions
|
|
81
|
+
if partition_state["context"] == state_partition_context
|
|
82
|
+
]
|
|
83
|
+
if len(found) > 1:
|
|
84
|
+
raise ValueError(
|
|
85
|
+
f"State file contains duplicate entries for partition: "
|
|
86
|
+
"{state_partition_context}.\n"
|
|
87
|
+
f"Matching state values were: {str(found)}"
|
|
88
|
+
)
|
|
89
|
+
if found:
|
|
90
|
+
return cast(dict, found[0])
|
|
91
|
+
|
|
92
|
+
return None
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def _create_in_partitions_list(
|
|
96
|
+
partitions: List[dict], state_partition_context: dict
|
|
97
|
+
) -> dict:
|
|
98
|
+
# Existing partition not found. Creating new state entry in partitions list...
|
|
99
|
+
new_partition_state = {"context": state_partition_context}
|
|
100
|
+
partitions.append(new_partition_state)
|
|
101
|
+
return new_partition_state
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def get_writeable_state_dict(
|
|
105
|
+
tap_state: dict, tap_stream_id: str, state_partition_context: Optional[dict] = None
|
|
106
|
+
) -> dict:
|
|
107
|
+
"""Return the stream or partition state, creating a new one if it does not exist.
|
|
108
|
+
|
|
109
|
+
Parameters
|
|
110
|
+
----------
|
|
111
|
+
tap_state : dict
|
|
112
|
+
the existing state dict which contains all streams.
|
|
113
|
+
tap_stream_id : str
|
|
114
|
+
the id of the stream
|
|
115
|
+
state_partition_context : Optional[dict], optional
|
|
116
|
+
keys which identify the partition context, by default None (not partitioned)
|
|
117
|
+
|
|
118
|
+
Returns
|
|
119
|
+
-------
|
|
120
|
+
dict
|
|
121
|
+
Returns a writeable dict at the stream or partition level.
|
|
122
|
+
|
|
123
|
+
Raises
|
|
124
|
+
------
|
|
125
|
+
ValueError
|
|
126
|
+
Raise an error if duplicate entries are found.
|
|
127
|
+
|
|
128
|
+
"""
|
|
129
|
+
if tap_state is None:
|
|
130
|
+
raise ValueError("Cannot write state to missing state dictionary.")
|
|
131
|
+
|
|
132
|
+
if "bookmarks" not in tap_state:
|
|
133
|
+
tap_state["bookmarks"] = {}
|
|
134
|
+
if tap_stream_id not in tap_state["bookmarks"]:
|
|
135
|
+
tap_state["bookmarks"][tap_stream_id] = {}
|
|
136
|
+
stream_state = cast(dict, tap_state["bookmarks"][tap_stream_id])
|
|
137
|
+
if not state_partition_context:
|
|
138
|
+
return stream_state
|
|
139
|
+
|
|
140
|
+
if "partitions" not in stream_state:
|
|
141
|
+
stream_state["partitions"] = []
|
|
142
|
+
stream_state_partitions: List[dict] = stream_state["partitions"]
|
|
143
|
+
found = _find_in_partitions_list(stream_state_partitions, state_partition_context)
|
|
144
|
+
if found:
|
|
145
|
+
return found
|
|
146
|
+
|
|
147
|
+
return _create_in_partitions_list(stream_state_partitions, state_partition_context)
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def write_stream_state(
|
|
151
|
+
tap_state,
|
|
152
|
+
tap_stream_id: str,
|
|
153
|
+
key,
|
|
154
|
+
val,
|
|
155
|
+
*,
|
|
156
|
+
state_partition_context: Optional[dict] = None,
|
|
157
|
+
) -> None:
|
|
158
|
+
"""Write stream state."""
|
|
159
|
+
state_dict = get_writeable_state_dict(
|
|
160
|
+
tap_state, tap_stream_id, state_partition_context=state_partition_context
|
|
161
|
+
)
|
|
162
|
+
state_dict[key] = val
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def reset_state_progress_markers(stream_or_partition_state: dict) -> Optional[dict]:
|
|
166
|
+
"""Wipe the state once sync is complete.
|
|
167
|
+
|
|
168
|
+
For logging purposes, return the wiped 'progress_markers' object if it existed.
|
|
169
|
+
"""
|
|
170
|
+
progress_markers = stream_or_partition_state.pop(PROGRESS_MARKERS, {})
|
|
171
|
+
# Remove auto-generated human-readable note:
|
|
172
|
+
progress_markers.pop(PROGRESS_MARKER_NOTE, None)
|
|
173
|
+
# Return remaining 'progress_markers' if any:
|
|
174
|
+
return progress_markers or None
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def write_replication_key_signpost(
|
|
178
|
+
stream_or_partition_state: dict,
|
|
179
|
+
new_signpost_value: Any,
|
|
180
|
+
) -> None:
|
|
181
|
+
"""Write signpost value."""
|
|
182
|
+
stream_or_partition_state[SIGNPOST_MARKER] = to_json_compatible(new_signpost_value)
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def write_starting_replication_value(
|
|
186
|
+
stream_or_partition_state: dict,
|
|
187
|
+
initial_value: Any,
|
|
188
|
+
) -> None:
|
|
189
|
+
"""Write initial replication value to state."""
|
|
190
|
+
stream_or_partition_state[STARTING_MARKER] = to_json_compatible(initial_value)
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def get_starting_replication_value(stream_or_partition_state: dict):
|
|
194
|
+
"""Retrieve initial replication marker value from state."""
|
|
195
|
+
if not stream_or_partition_state:
|
|
196
|
+
return None
|
|
197
|
+
return stream_or_partition_state.get(STARTING_MARKER)
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def increment_state(
|
|
201
|
+
stream_or_partition_state: dict,
|
|
202
|
+
latest_record: dict,
|
|
203
|
+
replication_key: str,
|
|
204
|
+
is_sorted: bool,
|
|
205
|
+
check_sorted: bool,
|
|
206
|
+
) -> None:
|
|
207
|
+
"""Update the state using data from the latest record.
|
|
208
|
+
|
|
209
|
+
Raises InvalidStreamSortException if is_sorted=True, check_sorted=True and unsorted
|
|
210
|
+
data is detected in the stream.
|
|
211
|
+
"""
|
|
212
|
+
progress_dict = stream_or_partition_state
|
|
213
|
+
if not is_sorted:
|
|
214
|
+
if PROGRESS_MARKERS not in stream_or_partition_state:
|
|
215
|
+
stream_or_partition_state[PROGRESS_MARKERS] = {
|
|
216
|
+
PROGRESS_MARKER_NOTE: "Progress is not resumable if interrupted."
|
|
217
|
+
}
|
|
218
|
+
progress_dict = stream_or_partition_state[PROGRESS_MARKERS]
|
|
219
|
+
old_rk_value = to_json_compatible(progress_dict.get("replication_key_value"))
|
|
220
|
+
new_rk_value = to_json_compatible(latest_record[replication_key])
|
|
221
|
+
if old_rk_value is None or not check_sorted or new_rk_value >= old_rk_value:
|
|
222
|
+
progress_dict["replication_key"] = replication_key
|
|
223
|
+
progress_dict["replication_key_value"] = new_rk_value
|
|
224
|
+
return
|
|
225
|
+
|
|
226
|
+
if is_sorted:
|
|
227
|
+
raise InvalidStreamSortException(
|
|
228
|
+
f"Unsorted data detected in stream. Latest value '{new_rk_value}' is "
|
|
229
|
+
f"smaller than previous max '{old_rk_value}'."
|
|
230
|
+
)
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def _greater_than_signpost(
|
|
234
|
+
signpost: Union[datetime.datetime, str, int, float],
|
|
235
|
+
new_value: Union[datetime.datetime, str, int, float],
|
|
236
|
+
) -> bool:
|
|
237
|
+
"""Compare and return True if new_value is greater than signpost."""
|
|
238
|
+
return ( # fails if signpost and bookmark are incompatible types
|
|
239
|
+
new_value > signpost # type: ignore
|
|
240
|
+
)
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def finalize_state_progress_markers(stream_or_partition_state: dict) -> Optional[dict]:
|
|
244
|
+
"""Promote or wipe progress markers once sync is complete."""
|
|
245
|
+
signpost_value = stream_or_partition_state.pop(SIGNPOST_MARKER, None)
|
|
246
|
+
stream_or_partition_state.pop(STARTING_MARKER, None)
|
|
247
|
+
if PROGRESS_MARKERS in stream_or_partition_state:
|
|
248
|
+
if "replication_key" in stream_or_partition_state[PROGRESS_MARKERS]:
|
|
249
|
+
# Replication keys valid (only) after sync is complete
|
|
250
|
+
progress_markers = stream_or_partition_state[PROGRESS_MARKERS]
|
|
251
|
+
stream_or_partition_state["replication_key"] = progress_markers.pop(
|
|
252
|
+
"replication_key"
|
|
253
|
+
)
|
|
254
|
+
new_rk_value = progress_markers.pop("replication_key_value")
|
|
255
|
+
if signpost_value and _greater_than_signpost(signpost_value, new_rk_value):
|
|
256
|
+
new_rk_value = signpost_value
|
|
257
|
+
stream_or_partition_state["replication_key_value"] = new_rk_value
|
|
258
|
+
# Wipe and return any markers that have not been promoted
|
|
259
|
+
return reset_state_progress_markers(stream_or_partition_state)
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def log_sort_error(
|
|
263
|
+
ex: Exception,
|
|
264
|
+
log_fn: Callable,
|
|
265
|
+
stream_name: str,
|
|
266
|
+
current_context: Optional[dict],
|
|
267
|
+
state_partition_context: Optional[dict],
|
|
268
|
+
record_count: int,
|
|
269
|
+
partition_record_count: int,
|
|
270
|
+
) -> None:
|
|
271
|
+
"""Log a sort error."""
|
|
272
|
+
msg = f"Sorting error detected in '{stream_name}'." f"on record #{record_count}. "
|
|
273
|
+
if partition_record_count != record_count:
|
|
274
|
+
msg += (
|
|
275
|
+
f"Record was partition record "
|
|
276
|
+
f"#{partition_record_count} with"
|
|
277
|
+
f" state partition context {state_partition_context}. "
|
|
278
|
+
)
|
|
279
|
+
if current_context:
|
|
280
|
+
msg += f"Context was {str(current_context)}. "
|
|
281
|
+
msg += str(ex)
|
|
282
|
+
log_fn(msg)
|