hotglue-singer-sdk 1.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hotglue_singer_sdk/__init__.py +34 -0
- hotglue_singer_sdk/authenticators.py +554 -0
- hotglue_singer_sdk/cli/__init__.py +1 -0
- hotglue_singer_sdk/cli/common_options.py +37 -0
- hotglue_singer_sdk/configuration/__init__.py +1 -0
- hotglue_singer_sdk/configuration/_dict_config.py +101 -0
- hotglue_singer_sdk/exceptions.py +52 -0
- hotglue_singer_sdk/helpers/__init__.py +1 -0
- hotglue_singer_sdk/helpers/_catalog.py +122 -0
- hotglue_singer_sdk/helpers/_classproperty.py +18 -0
- hotglue_singer_sdk/helpers/_compat.py +15 -0
- hotglue_singer_sdk/helpers/_flattening.py +374 -0
- hotglue_singer_sdk/helpers/_schema.py +100 -0
- hotglue_singer_sdk/helpers/_secrets.py +41 -0
- hotglue_singer_sdk/helpers/_simpleeval.py +678 -0
- hotglue_singer_sdk/helpers/_singer.py +280 -0
- hotglue_singer_sdk/helpers/_state.py +282 -0
- hotglue_singer_sdk/helpers/_typing.py +231 -0
- hotglue_singer_sdk/helpers/_util.py +27 -0
- hotglue_singer_sdk/helpers/capabilities.py +240 -0
- hotglue_singer_sdk/helpers/jsonpath.py +39 -0
- hotglue_singer_sdk/io_base.py +134 -0
- hotglue_singer_sdk/mapper.py +691 -0
- hotglue_singer_sdk/mapper_base.py +156 -0
- hotglue_singer_sdk/plugin_base.py +415 -0
- hotglue_singer_sdk/py.typed +0 -0
- hotglue_singer_sdk/sinks/__init__.py +14 -0
- hotglue_singer_sdk/sinks/batch.py +90 -0
- hotglue_singer_sdk/sinks/core.py +412 -0
- hotglue_singer_sdk/sinks/record.py +66 -0
- hotglue_singer_sdk/sinks/sql.py +299 -0
- hotglue_singer_sdk/streams/__init__.py +14 -0
- hotglue_singer_sdk/streams/core.py +1294 -0
- hotglue_singer_sdk/streams/graphql.py +74 -0
- hotglue_singer_sdk/streams/rest.py +611 -0
- hotglue_singer_sdk/streams/sql.py +1023 -0
- hotglue_singer_sdk/tap_base.py +580 -0
- hotglue_singer_sdk/target_base.py +554 -0
- hotglue_singer_sdk/target_sdk/__init__.py +0 -0
- hotglue_singer_sdk/target_sdk/auth.py +124 -0
- hotglue_singer_sdk/target_sdk/client.py +286 -0
- hotglue_singer_sdk/target_sdk/common.py +13 -0
- hotglue_singer_sdk/target_sdk/lambda.py +121 -0
- hotglue_singer_sdk/target_sdk/rest.py +108 -0
- hotglue_singer_sdk/target_sdk/sinks.py +16 -0
- hotglue_singer_sdk/target_sdk/target.py +570 -0
- hotglue_singer_sdk/target_sdk/target_base.py +627 -0
- hotglue_singer_sdk/testing.py +198 -0
- hotglue_singer_sdk/typing.py +603 -0
- hotglue_singer_sdk-1.0.2.dist-info/METADATA +53 -0
- hotglue_singer_sdk-1.0.2.dist-info/RECORD +53 -0
- hotglue_singer_sdk-1.0.2.dist-info/WHEEL +4 -0
- hotglue_singer_sdk-1.0.2.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
"""WoocommerceSink target sink class, which handles writing streams."""
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
from abc import abstractmethod
|
|
7
|
+
from pydantic import BaseModel
|
|
8
|
+
from typing import Any, Dict, List, Optional
|
|
9
|
+
from hotglue_singer_sdk.target_sdk.rest import Rest
|
|
10
|
+
from hotglue_singer_sdk.target_sdk.auth import Authenticator
|
|
11
|
+
from hotglue_singer_sdk.target_sdk.common import HGJSONEncoder
|
|
12
|
+
from hotglue_singer_sdk.plugin_base import PluginBase
|
|
13
|
+
from hotglue_singer_sdk.sinks import RecordSink, BatchSink
|
|
14
|
+
from hotglue_etl_exceptions import InvalidCredentialsError, InvalidPayloadError
|
|
15
|
+
import os
|
|
16
|
+
|
|
17
|
+
class HotglueBaseSink(Rest):
|
|
18
|
+
summary_init = False
|
|
19
|
+
# include any stream names if externalId needs to be passed in the payload
|
|
20
|
+
allows_externalid = []
|
|
21
|
+
previous_state = None
|
|
22
|
+
processed_hashes = []
|
|
23
|
+
|
|
24
|
+
@property
|
|
25
|
+
@abstractmethod
|
|
26
|
+
def name(self) -> str:
|
|
27
|
+
raise NotImplementedError()
|
|
28
|
+
|
|
29
|
+
@property
|
|
30
|
+
@abstractmethod
|
|
31
|
+
def endpoint(self) -> str:
|
|
32
|
+
raise NotImplementedError()
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
@abstractmethod
|
|
36
|
+
def base_url(self) -> str:
|
|
37
|
+
raise NotImplementedError()
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
@abstractmethod
|
|
41
|
+
def unified_schema(self) -> BaseModel:
|
|
42
|
+
raise NotImplementedError()
|
|
43
|
+
|
|
44
|
+
def __init__(
|
|
45
|
+
self,
|
|
46
|
+
target: PluginBase,
|
|
47
|
+
stream_name: str,
|
|
48
|
+
schema: Dict,
|
|
49
|
+
key_properties: Optional[List[str]],
|
|
50
|
+
) -> None:
|
|
51
|
+
self._state = dict(target._state)
|
|
52
|
+
self._target = target
|
|
53
|
+
super().__init__(target, stream_name, schema, key_properties)
|
|
54
|
+
|
|
55
|
+
def url(self, endpoint=None):
|
|
56
|
+
if not endpoint:
|
|
57
|
+
endpoint = self.endpoint
|
|
58
|
+
return f"{self.base_url}{endpoint}"
|
|
59
|
+
|
|
60
|
+
def validate_input(self, record: dict):
|
|
61
|
+
raise NotImplementedError()
|
|
62
|
+
|
|
63
|
+
def validate_output(self, mapping):
|
|
64
|
+
return mapping
|
|
65
|
+
|
|
66
|
+
def get_previous_state(self):
|
|
67
|
+
if not self.previous_state:
|
|
68
|
+
previous_state_path = self._target.incremental_target_state_path
|
|
69
|
+
if os.path.exists(previous_state_path):
|
|
70
|
+
with open(previous_state_path, "r") as f:
|
|
71
|
+
self.previous_state = json.load(f)
|
|
72
|
+
else:
|
|
73
|
+
self.previous_state = {}
|
|
74
|
+
|
|
75
|
+
# remove failed records from the previous state so retrigger retries those records
|
|
76
|
+
if self.previous_state:
|
|
77
|
+
for stream in self.previous_state["bookmarks"]:
|
|
78
|
+
self.previous_state["bookmarks"][stream] = [record for record in self.previous_state["bookmarks"][stream] if record.get("success") != False]
|
|
79
|
+
for stream in self.previous_state["summary"]:
|
|
80
|
+
self.previous_state["summary"][stream]["fail"] = 0
|
|
81
|
+
return self.previous_state
|
|
82
|
+
|
|
83
|
+
def init_state(self):
|
|
84
|
+
# on first run, initialize state with the previous job state if it exists
|
|
85
|
+
if self.previous_state is None:
|
|
86
|
+
previous_state = self.get_previous_state()
|
|
87
|
+
if previous_state:
|
|
88
|
+
self._target._latest_state = previous_state
|
|
89
|
+
|
|
90
|
+
# if previous state exists, add the hashes to the processed_hashes
|
|
91
|
+
if self.previous_state:
|
|
92
|
+
self.processed_hashes.extend([record["hash"] for record in self.previous_state["bookmarks"][self.name] if record.get("hash")])
|
|
93
|
+
|
|
94
|
+
# get the full target state
|
|
95
|
+
target_state = self._target._latest_state
|
|
96
|
+
|
|
97
|
+
# If there is data for the stream name in target_state use that to initialize the state
|
|
98
|
+
if target_state:
|
|
99
|
+
if not self._state and target_state.get("bookmarks", {}).get(self.name) and target_state.get("summary", {}).get(self.name):
|
|
100
|
+
self.latest_state = target_state
|
|
101
|
+
# If not init sink state latest_state
|
|
102
|
+
if not self.latest_state:
|
|
103
|
+
self.latest_state = self._state or {"bookmarks": {}, "summary": {}}
|
|
104
|
+
|
|
105
|
+
if self.name not in self.latest_state["bookmarks"]:
|
|
106
|
+
if not self.latest_state["bookmarks"].get(self.name):
|
|
107
|
+
self.latest_state["bookmarks"][self.name] = []
|
|
108
|
+
|
|
109
|
+
if not self.summary_init:
|
|
110
|
+
if not self.latest_state.get("summary"):
|
|
111
|
+
self.latest_state["summary"] = {}
|
|
112
|
+
if not self.latest_state["summary"].get(self.name):
|
|
113
|
+
self.latest_state["summary"][self.name] = {"success": 0, "fail": 0, "existing": 0, "updated": 0}
|
|
114
|
+
|
|
115
|
+
self.summary_init = True
|
|
116
|
+
|
|
117
|
+
def error_to_string(self, error: Any):
|
|
118
|
+
return str(error)
|
|
119
|
+
|
|
120
|
+
def process_error_state(self, state: dict):
|
|
121
|
+
# log full error
|
|
122
|
+
self.logger.error(f"Error processing record of type {self.name}: {state.get('error')}")
|
|
123
|
+
# clean error for state
|
|
124
|
+
state["error"] = self.error_to_string(state.get("error"))
|
|
125
|
+
return state
|
|
126
|
+
|
|
127
|
+
def update_state(self, state: dict, is_duplicate=False, record=None):
|
|
128
|
+
if is_duplicate:
|
|
129
|
+
self.logger.info(f"Record of type {self.name} already exists with id: {state.get('id')}")
|
|
130
|
+
self.latest_state["summary"][self.name]["existing"] += 1
|
|
131
|
+
|
|
132
|
+
elif not state.get("success", False):
|
|
133
|
+
self.latest_state["summary"][self.name]["fail"] += 1
|
|
134
|
+
self.process_error_state(state)
|
|
135
|
+
elif state.get("is_updated", False):
|
|
136
|
+
self.latest_state["summary"][self.name]["updated"] += 1
|
|
137
|
+
state.pop("is_updated", None)
|
|
138
|
+
else:
|
|
139
|
+
self.latest_state["summary"][self.name]["success"] += 1
|
|
140
|
+
|
|
141
|
+
# add the mapped record to the state if it exists and env var OUTPUT_MAPPED_RECORD is set to true
|
|
142
|
+
if record and os.getenv("OUTPUT_MAPPED_RECORD", "false").lower() == "true":
|
|
143
|
+
state["mapped_record"] = record
|
|
144
|
+
|
|
145
|
+
self.latest_state["bookmarks"][self.name].append(state)
|
|
146
|
+
|
|
147
|
+
# If "authenticator" exists and if it's an instance of "Authenticator" class,
|
|
148
|
+
# update "self.latest_state" with the the "authenticator" state
|
|
149
|
+
if self.authenticator and isinstance(self.authenticator, Authenticator):
|
|
150
|
+
self.latest_state.update(self.authenticator.state)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
class HotglueSink(HotglueBaseSink, RecordSink):
|
|
154
|
+
"""Hotglue target sink class."""
|
|
155
|
+
def upsert_record(self, record: dict, context: dict):
|
|
156
|
+
response = self.request_api("POST", request_data=record)
|
|
157
|
+
id = response.json().get("id")
|
|
158
|
+
return id, response.ok, dict()
|
|
159
|
+
|
|
160
|
+
def build_record_hash(self, record: dict):
|
|
161
|
+
return hashlib.sha256(json.dumps(record, cls=HGJSONEncoder).encode()).hexdigest()
|
|
162
|
+
|
|
163
|
+
def get_existing_state(self, hash: str):
|
|
164
|
+
"""
|
|
165
|
+
Returns the existing state if it exists
|
|
166
|
+
"""
|
|
167
|
+
states = self.latest_state["bookmarks"][self.name]
|
|
168
|
+
|
|
169
|
+
existing_state = next((s for s in states if hash==s.get("hash") and s.get("success")), None)
|
|
170
|
+
|
|
171
|
+
return existing_state
|
|
172
|
+
|
|
173
|
+
@abstractmethod
|
|
174
|
+
def preprocess_record(self, record: dict, context: dict) -> dict:
|
|
175
|
+
raise NotImplementedError()
|
|
176
|
+
|
|
177
|
+
def process_record(self, record: dict, context: dict) -> None:
|
|
178
|
+
"""Process the record."""
|
|
179
|
+
if not self.latest_state:
|
|
180
|
+
self.init_state()
|
|
181
|
+
|
|
182
|
+
id = None
|
|
183
|
+
external_id = None
|
|
184
|
+
success = None
|
|
185
|
+
state = {}
|
|
186
|
+
state_updates = dict()
|
|
187
|
+
|
|
188
|
+
try:
|
|
189
|
+
if not self.name in self.allows_externalid and record.get(self._target.EXTERNAL_ID_KEY):
|
|
190
|
+
external_id = record.pop(self._target.EXTERNAL_ID_KEY, None)
|
|
191
|
+
|
|
192
|
+
record = self.preprocess_record(record, context)
|
|
193
|
+
|
|
194
|
+
if record and external_id:
|
|
195
|
+
record[self._target.EXTERNAL_ID_KEY] = external_id
|
|
196
|
+
except Exception as e:
|
|
197
|
+
success = False
|
|
198
|
+
self.logger.exception(f"Preprocess record error {str(e)}")
|
|
199
|
+
state_updates['error'] = str(e)
|
|
200
|
+
if isinstance(e, (InvalidCredentialsError, InvalidPayloadError)):
|
|
201
|
+
state_updates['hg_error_class'] = e.__class__.__name__
|
|
202
|
+
|
|
203
|
+
if success is not False:
|
|
204
|
+
|
|
205
|
+
hash = self.build_record_hash(record)
|
|
206
|
+
|
|
207
|
+
if hash in self.processed_hashes:
|
|
208
|
+
self.logger.info(f"Record of type {self.name} already exists with hash: {hash}")
|
|
209
|
+
return
|
|
210
|
+
|
|
211
|
+
existing_state = self.get_existing_state(hash)
|
|
212
|
+
|
|
213
|
+
if self.name in self.allows_externalid:
|
|
214
|
+
external_id = record.get("externalId")
|
|
215
|
+
else:
|
|
216
|
+
external_id = record.pop("externalId", None)
|
|
217
|
+
|
|
218
|
+
if existing_state:
|
|
219
|
+
return self.update_state(existing_state, is_duplicate=True, record=record)
|
|
220
|
+
|
|
221
|
+
state["hash"] = hash
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
try:
|
|
225
|
+
id, success, state_updates = self.upsert_record(record, context)
|
|
226
|
+
except Exception as e:
|
|
227
|
+
self.logger.exception(f"Upsert record error {str(e)}")
|
|
228
|
+
state_updates['error'] = str(e)
|
|
229
|
+
if isinstance(e, (InvalidCredentialsError, InvalidPayloadError)):
|
|
230
|
+
state_updates['hg_error_class'] = e.__class__.__name__
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
if success:
|
|
234
|
+
self.logger.info(f"{self.name} processed id: {id}")
|
|
235
|
+
|
|
236
|
+
state["success"] = success
|
|
237
|
+
|
|
238
|
+
if id:
|
|
239
|
+
state["id"] = id
|
|
240
|
+
|
|
241
|
+
if external_id:
|
|
242
|
+
state["externalId"] = external_id
|
|
243
|
+
|
|
244
|
+
# if is_duplicate is in state_updates, set is_duplicate to True
|
|
245
|
+
is_duplicate = False
|
|
246
|
+
if state_updates.pop("existing", False):
|
|
247
|
+
is_duplicate = True
|
|
248
|
+
|
|
249
|
+
if state_updates and isinstance(state_updates, dict):
|
|
250
|
+
state = dict(state, **state_updates)
|
|
251
|
+
|
|
252
|
+
self.update_state(state, is_duplicate=is_duplicate, record=record)
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
class HotglueBatchSink(HotglueBaseSink, BatchSink):
|
|
256
|
+
"""Hotglue target sink class."""
|
|
257
|
+
|
|
258
|
+
def process_batch_record(self, record: dict, index: int) -> dict:
|
|
259
|
+
return record
|
|
260
|
+
|
|
261
|
+
@abstractmethod
|
|
262
|
+
def make_batch_request(self, records: List[dict]):
|
|
263
|
+
raise NotImplementedError()
|
|
264
|
+
|
|
265
|
+
def handle_batch_response(self, response) -> dict:
|
|
266
|
+
"""
|
|
267
|
+
This method should return a dict.
|
|
268
|
+
It's recommended that you return a key named "state_updates".
|
|
269
|
+
This key should be an array of all state updates
|
|
270
|
+
"""
|
|
271
|
+
return dict()
|
|
272
|
+
|
|
273
|
+
def process_batch(self, context: dict) -> None:
|
|
274
|
+
if not self.latest_state:
|
|
275
|
+
self.init_state()
|
|
276
|
+
|
|
277
|
+
raw_records = context["records"]
|
|
278
|
+
|
|
279
|
+
records = list(map(lambda e: self.process_batch_record(e[1], e[0]), enumerate(raw_records)))
|
|
280
|
+
|
|
281
|
+
response = self.make_batch_request(records)
|
|
282
|
+
|
|
283
|
+
result = self.handle_batch_response(response)
|
|
284
|
+
|
|
285
|
+
for state in result.get("state_updates", list()):
|
|
286
|
+
self.update_state(state)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from datetime import datetime
|
|
2
|
+
from json import JSONEncoder
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
class HGJSONEncoder(JSONEncoder):
|
|
6
|
+
def default(self, o: Any) -> Any:
|
|
7
|
+
if isinstance(o, datetime):
|
|
8
|
+
return o.isoformat()
|
|
9
|
+
|
|
10
|
+
try:
|
|
11
|
+
return super().default(o)
|
|
12
|
+
except Exception as e:
|
|
13
|
+
return str(o)
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import json, pathlib, uuid, subprocess, os
|
|
2
|
+
from logging import Logger
|
|
3
|
+
from typing import Union, Optional
|
|
4
|
+
|
|
5
|
+
class RealTime:
|
|
6
|
+
def __init__(
|
|
7
|
+
self,
|
|
8
|
+
command: str,
|
|
9
|
+
config: dict,
|
|
10
|
+
stream_name: str,
|
|
11
|
+
schema_line: str,
|
|
12
|
+
record_line: str,
|
|
13
|
+
logger: Logger,
|
|
14
|
+
input_path: Optional[str] = None,
|
|
15
|
+
):
|
|
16
|
+
self.command = command
|
|
17
|
+
self.config = config
|
|
18
|
+
self.stream_name = stream_name
|
|
19
|
+
self.schema_line = schema_line
|
|
20
|
+
self.record_line = record_line
|
|
21
|
+
self.logger = logger
|
|
22
|
+
self.id = str(uuid.uuid4())
|
|
23
|
+
self.config_file_path = f"/tmp/{self.id}.config.json"
|
|
24
|
+
self.singer_file_path = f"/tmp/{self.id}.data.singer" if not input_path else input_path
|
|
25
|
+
self.state_file_path = f"/tmp/{self.id}.state.json"
|
|
26
|
+
os.makedirs("/tmp", exist_ok=True)
|
|
27
|
+
|
|
28
|
+
def _create_singer_file(self):
|
|
29
|
+
if os.path.exists(self.singer_file_path):
|
|
30
|
+
return
|
|
31
|
+
|
|
32
|
+
with open(self.singer_file_path, "w") as f:
|
|
33
|
+
f.writelines([
|
|
34
|
+
self.schema_line + "\n",
|
|
35
|
+
self.record_line
|
|
36
|
+
])
|
|
37
|
+
|
|
38
|
+
def _create_config_file(self):
|
|
39
|
+
with open(self.config_file_path, "w") as f:
|
|
40
|
+
f.write(json.dumps(self.config))
|
|
41
|
+
|
|
42
|
+
def _delete_singer_file(self):
|
|
43
|
+
pathlib.Path(self.singer_file_path).unlink(missing_ok=True)
|
|
44
|
+
|
|
45
|
+
def _delete_state_file(self):
|
|
46
|
+
pathlib.Path(self.state_file_path).unlink(missing_ok=True)
|
|
47
|
+
|
|
48
|
+
def prepare(self):
|
|
49
|
+
self._create_config_file()
|
|
50
|
+
self._create_singer_file()
|
|
51
|
+
|
|
52
|
+
def run(self):
|
|
53
|
+
command = f"cat {self.singer_file_path} | {self.command} --config {self.config_file_path} > {self.state_file_path}"
|
|
54
|
+
self.logger.info(f"Running command: {command}")
|
|
55
|
+
proc = subprocess.run(
|
|
56
|
+
command,
|
|
57
|
+
shell=True,
|
|
58
|
+
text=True,
|
|
59
|
+
capture_output=True
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
logs = proc.stdout.strip() or proc.stderr.strip()
|
|
63
|
+
|
|
64
|
+
self.logger.info(logs)
|
|
65
|
+
|
|
66
|
+
return {
|
|
67
|
+
"tracebackInLogs": "Traceback" in logs,
|
|
68
|
+
"logs": logs,
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
def get_state(self) -> Union[dict, str]:
|
|
72
|
+
with open(self.state_file_path, "r") as f:
|
|
73
|
+
lines = f.readlines()
|
|
74
|
+
try:
|
|
75
|
+
return json.loads(lines[-1].strip())
|
|
76
|
+
except:
|
|
77
|
+
return "".join(lines)
|
|
78
|
+
|
|
79
|
+
def clean_up(self):
|
|
80
|
+
self._delete_singer_file()
|
|
81
|
+
self._delete_state_file()
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def real_time_handler(
|
|
85
|
+
config: dict,
|
|
86
|
+
stream_name: str,
|
|
87
|
+
schema_line: str,
|
|
88
|
+
record_line: str,
|
|
89
|
+
logger: Logger,
|
|
90
|
+
input_path: Optional[str] = None,
|
|
91
|
+
cli_cmd: Optional[str] = None,
|
|
92
|
+
):
|
|
93
|
+
cli_cmd = cli_cmd or os.environ.get("CLI_CMD")
|
|
94
|
+
if not cli_cmd:
|
|
95
|
+
logger.info(f"Parameter cli_cmd or CLI_CMD env var are not set. This target does not support real time")
|
|
96
|
+
raise Exception("This target does not support real time")
|
|
97
|
+
logger.info(f"Entering \"real_time_handler\": cli_cmd={cli_cmd}, config={config}, stream_name={stream_name}")
|
|
98
|
+
logger.info(f"Schema line: {schema_line}")
|
|
99
|
+
logger.info(f"Record line: {record_line}")
|
|
100
|
+
real_time = RealTime(
|
|
101
|
+
cli_cmd,
|
|
102
|
+
config,
|
|
103
|
+
stream_name,
|
|
104
|
+
schema_line,
|
|
105
|
+
record_line,
|
|
106
|
+
logger,
|
|
107
|
+
input_path,
|
|
108
|
+
)
|
|
109
|
+
logger.info(f"Preparing files...")
|
|
110
|
+
real_time.prepare()
|
|
111
|
+
logger.info(f"Running target...")
|
|
112
|
+
target_metrics = real_time.run()
|
|
113
|
+
logger.info(f"Getting state...")
|
|
114
|
+
state = real_time.get_state()
|
|
115
|
+
logger.info(f"Cleaning up...")
|
|
116
|
+
real_time.clean_up()
|
|
117
|
+
logger.info(f"Done")
|
|
118
|
+
return {
|
|
119
|
+
"state": state,
|
|
120
|
+
"metrics": target_metrics,
|
|
121
|
+
}
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Hotglue target sink class, which handles writing streams."""
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
|
|
5
|
+
import backoff
|
|
6
|
+
import requests
|
|
7
|
+
import json
|
|
8
|
+
from typing import Any, Dict, Optional
|
|
9
|
+
from hotglue_singer_sdk.exceptions import FatalAPIError, RetriableAPIError
|
|
10
|
+
from hotglue_singer_sdk.target_sdk.auth import Authenticator
|
|
11
|
+
from hotglue_singer_sdk.target_sdk.common import HGJSONEncoder
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Rest:
|
|
15
|
+
timeout: int = 300
|
|
16
|
+
http_headers: Dict[str, Any] = {}
|
|
17
|
+
params: Dict[str, Any] = {}
|
|
18
|
+
authenticator: Optional[Authenticator] = None
|
|
19
|
+
|
|
20
|
+
@property
|
|
21
|
+
def default_headers(self):
|
|
22
|
+
headers = self.http_headers
|
|
23
|
+
|
|
24
|
+
if self.authenticator and isinstance(self.authenticator, Authenticator):
|
|
25
|
+
headers.update(self.authenticator.auth_headers)
|
|
26
|
+
|
|
27
|
+
return headers
|
|
28
|
+
|
|
29
|
+
@backoff.on_exception(
|
|
30
|
+
backoff.expo,
|
|
31
|
+
(RetriableAPIError, requests.exceptions.ReadTimeout),
|
|
32
|
+
max_tries=5,
|
|
33
|
+
factor=2,
|
|
34
|
+
)
|
|
35
|
+
def _request(
|
|
36
|
+
self, http_method, endpoint, params={}, request_data=None, headers={}, verify=True
|
|
37
|
+
) -> requests.PreparedRequest:
|
|
38
|
+
"""Prepare a request object."""
|
|
39
|
+
url = self.url(endpoint)
|
|
40
|
+
headers.update(self.default_headers)
|
|
41
|
+
headers.update({"Content-Type": "application/json"})
|
|
42
|
+
params.update(self.params)
|
|
43
|
+
data = (
|
|
44
|
+
json.dumps(request_data, cls=HGJSONEncoder)
|
|
45
|
+
if request_data
|
|
46
|
+
else None
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
response = requests.request(
|
|
50
|
+
method=http_method,
|
|
51
|
+
url=url,
|
|
52
|
+
params=params,
|
|
53
|
+
headers=headers,
|
|
54
|
+
data=data,
|
|
55
|
+
verify=verify
|
|
56
|
+
)
|
|
57
|
+
self.validate_response(response)
|
|
58
|
+
return response
|
|
59
|
+
|
|
60
|
+
def request_api(self, http_method, endpoint=None, params={}, request_data=None, headers={}, verify=True):
|
|
61
|
+
"""Request records from REST endpoint(s), returning response records."""
|
|
62
|
+
resp = self._request(http_method, endpoint, params, request_data, headers, verify=verify)
|
|
63
|
+
return resp
|
|
64
|
+
|
|
65
|
+
def validate_response(self, response: requests.Response) -> None:
|
|
66
|
+
"""Validate HTTP response."""
|
|
67
|
+
if response.status_code in [429] or 500 <= response.status_code < 600:
|
|
68
|
+
msg = self.response_error_message(response)
|
|
69
|
+
raise RetriableAPIError(msg, response)
|
|
70
|
+
elif 400 <= response.status_code < 500:
|
|
71
|
+
try:
|
|
72
|
+
msg = response.text
|
|
73
|
+
except:
|
|
74
|
+
msg = self.response_error_message(response)
|
|
75
|
+
raise FatalAPIError(msg)
|
|
76
|
+
|
|
77
|
+
def response_error_message(self, response: requests.Response) -> str:
|
|
78
|
+
"""Build error message for invalid http statuses."""
|
|
79
|
+
if 400 <= response.status_code < 500:
|
|
80
|
+
error_type = "Client"
|
|
81
|
+
else:
|
|
82
|
+
error_type = "Server"
|
|
83
|
+
|
|
84
|
+
return (
|
|
85
|
+
f"{response.status_code} {error_type} Error: "
|
|
86
|
+
f"{response.reason} for path: {self.endpoint}"
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
@staticmethod
|
|
90
|
+
def clean_dict_items(dict):
|
|
91
|
+
return {k: v for k, v in dict.items() if v not in [None, ""]}
|
|
92
|
+
|
|
93
|
+
def clean_payload(self, item):
|
|
94
|
+
item = self.clean_dict_items(item)
|
|
95
|
+
output = {}
|
|
96
|
+
for k, v in item.items():
|
|
97
|
+
if isinstance(v, datetime):
|
|
98
|
+
dt_str = v.strftime("%Y-%m-%dT%H:%M:%S%z")
|
|
99
|
+
if len(dt_str) > 20:
|
|
100
|
+
output[k] = f"{dt_str[:-2]}:{dt_str[-2:]}"
|
|
101
|
+
else:
|
|
102
|
+
output[k] = dt_str
|
|
103
|
+
elif isinstance(v, dict):
|
|
104
|
+
output[k] = self.clean_payload(v)
|
|
105
|
+
else:
|
|
106
|
+
output[k] = v
|
|
107
|
+
return output
|
|
108
|
+
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
"""Hotglue target sink class, which handles writing streams."""
|
|
2
|
+
from abc import ABC, abstractmethod
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
from hotglue_singer_sdk.target_sdk.client import HotglueSink
|
|
6
|
+
|
|
7
|
+
class ModelSink(HotglueSink):
|
|
8
|
+
"""Model target sink class."""
|
|
9
|
+
|
|
10
|
+
@abstractmethod
|
|
11
|
+
def preprocess_record(self, record: dict, context: dict) -> dict:
|
|
12
|
+
pass
|
|
13
|
+
|
|
14
|
+
@abstractmethod
|
|
15
|
+
def process_record(self, record: dict, context: dict) -> None:
|
|
16
|
+
pass
|