hotglue-singer-sdk 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. hotglue_singer_sdk/__init__.py +34 -0
  2. hotglue_singer_sdk/authenticators.py +554 -0
  3. hotglue_singer_sdk/cli/__init__.py +1 -0
  4. hotglue_singer_sdk/cli/common_options.py +37 -0
  5. hotglue_singer_sdk/configuration/__init__.py +1 -0
  6. hotglue_singer_sdk/configuration/_dict_config.py +101 -0
  7. hotglue_singer_sdk/exceptions.py +52 -0
  8. hotglue_singer_sdk/helpers/__init__.py +1 -0
  9. hotglue_singer_sdk/helpers/_catalog.py +122 -0
  10. hotglue_singer_sdk/helpers/_classproperty.py +18 -0
  11. hotglue_singer_sdk/helpers/_compat.py +15 -0
  12. hotglue_singer_sdk/helpers/_flattening.py +374 -0
  13. hotglue_singer_sdk/helpers/_schema.py +100 -0
  14. hotglue_singer_sdk/helpers/_secrets.py +41 -0
  15. hotglue_singer_sdk/helpers/_simpleeval.py +678 -0
  16. hotglue_singer_sdk/helpers/_singer.py +280 -0
  17. hotglue_singer_sdk/helpers/_state.py +282 -0
  18. hotglue_singer_sdk/helpers/_typing.py +231 -0
  19. hotglue_singer_sdk/helpers/_util.py +27 -0
  20. hotglue_singer_sdk/helpers/capabilities.py +240 -0
  21. hotglue_singer_sdk/helpers/jsonpath.py +39 -0
  22. hotglue_singer_sdk/io_base.py +134 -0
  23. hotglue_singer_sdk/mapper.py +691 -0
  24. hotglue_singer_sdk/mapper_base.py +156 -0
  25. hotglue_singer_sdk/plugin_base.py +415 -0
  26. hotglue_singer_sdk/py.typed +0 -0
  27. hotglue_singer_sdk/sinks/__init__.py +14 -0
  28. hotglue_singer_sdk/sinks/batch.py +90 -0
  29. hotglue_singer_sdk/sinks/core.py +412 -0
  30. hotglue_singer_sdk/sinks/record.py +66 -0
  31. hotglue_singer_sdk/sinks/sql.py +299 -0
  32. hotglue_singer_sdk/streams/__init__.py +14 -0
  33. hotglue_singer_sdk/streams/core.py +1294 -0
  34. hotglue_singer_sdk/streams/graphql.py +74 -0
  35. hotglue_singer_sdk/streams/rest.py +611 -0
  36. hotglue_singer_sdk/streams/sql.py +1023 -0
  37. hotglue_singer_sdk/tap_base.py +580 -0
  38. hotglue_singer_sdk/target_base.py +554 -0
  39. hotglue_singer_sdk/target_sdk/__init__.py +0 -0
  40. hotglue_singer_sdk/target_sdk/auth.py +124 -0
  41. hotglue_singer_sdk/target_sdk/client.py +286 -0
  42. hotglue_singer_sdk/target_sdk/common.py +13 -0
  43. hotglue_singer_sdk/target_sdk/lambda.py +121 -0
  44. hotglue_singer_sdk/target_sdk/rest.py +108 -0
  45. hotglue_singer_sdk/target_sdk/sinks.py +16 -0
  46. hotglue_singer_sdk/target_sdk/target.py +570 -0
  47. hotglue_singer_sdk/target_sdk/target_base.py +627 -0
  48. hotglue_singer_sdk/testing.py +198 -0
  49. hotglue_singer_sdk/typing.py +603 -0
  50. hotglue_singer_sdk-1.0.2.dist-info/METADATA +53 -0
  51. hotglue_singer_sdk-1.0.2.dist-info/RECORD +53 -0
  52. hotglue_singer_sdk-1.0.2.dist-info/WHEEL +4 -0
  53. hotglue_singer_sdk-1.0.2.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,286 @@
1
+ """WoocommerceSink target sink class, which handles writing streams."""
2
+
3
+ import hashlib
4
+ import json
5
+ import os
6
+ from abc import abstractmethod
7
+ from pydantic import BaseModel
8
+ from typing import Any, Dict, List, Optional
9
+ from hotglue_singer_sdk.target_sdk.rest import Rest
10
+ from hotglue_singer_sdk.target_sdk.auth import Authenticator
11
+ from hotglue_singer_sdk.target_sdk.common import HGJSONEncoder
12
+ from hotglue_singer_sdk.plugin_base import PluginBase
13
+ from hotglue_singer_sdk.sinks import RecordSink, BatchSink
14
+ from hotglue_etl_exceptions import InvalidCredentialsError, InvalidPayloadError
15
+ import os
16
+
17
+ class HotglueBaseSink(Rest):
18
+ summary_init = False
19
+ # include any stream names if externalId needs to be passed in the payload
20
+ allows_externalid = []
21
+ previous_state = None
22
+ processed_hashes = []
23
+
24
+ @property
25
+ @abstractmethod
26
+ def name(self) -> str:
27
+ raise NotImplementedError()
28
+
29
+ @property
30
+ @abstractmethod
31
+ def endpoint(self) -> str:
32
+ raise NotImplementedError()
33
+
34
+ @property
35
+ @abstractmethod
36
+ def base_url(self) -> str:
37
+ raise NotImplementedError()
38
+
39
+ @property
40
+ @abstractmethod
41
+ def unified_schema(self) -> BaseModel:
42
+ raise NotImplementedError()
43
+
44
+ def __init__(
45
+ self,
46
+ target: PluginBase,
47
+ stream_name: str,
48
+ schema: Dict,
49
+ key_properties: Optional[List[str]],
50
+ ) -> None:
51
+ self._state = dict(target._state)
52
+ self._target = target
53
+ super().__init__(target, stream_name, schema, key_properties)
54
+
55
+ def url(self, endpoint=None):
56
+ if not endpoint:
57
+ endpoint = self.endpoint
58
+ return f"{self.base_url}{endpoint}"
59
+
60
+ def validate_input(self, record: dict):
61
+ raise NotImplementedError()
62
+
63
+ def validate_output(self, mapping):
64
+ return mapping
65
+
66
+ def get_previous_state(self):
67
+ if not self.previous_state:
68
+ previous_state_path = self._target.incremental_target_state_path
69
+ if os.path.exists(previous_state_path):
70
+ with open(previous_state_path, "r") as f:
71
+ self.previous_state = json.load(f)
72
+ else:
73
+ self.previous_state = {}
74
+
75
+ # remove failed records from the previous state so retrigger retries those records
76
+ if self.previous_state:
77
+ for stream in self.previous_state["bookmarks"]:
78
+ self.previous_state["bookmarks"][stream] = [record for record in self.previous_state["bookmarks"][stream] if record.get("success") != False]
79
+ for stream in self.previous_state["summary"]:
80
+ self.previous_state["summary"][stream]["fail"] = 0
81
+ return self.previous_state
82
+
83
+ def init_state(self):
84
+ # on first run, initialize state with the previous job state if it exists
85
+ if self.previous_state is None:
86
+ previous_state = self.get_previous_state()
87
+ if previous_state:
88
+ self._target._latest_state = previous_state
89
+
90
+ # if previous state exists, add the hashes to the processed_hashes
91
+ if self.previous_state:
92
+ self.processed_hashes.extend([record["hash"] for record in self.previous_state["bookmarks"][self.name] if record.get("hash")])
93
+
94
+ # get the full target state
95
+ target_state = self._target._latest_state
96
+
97
+ # If there is data for the stream name in target_state use that to initialize the state
98
+ if target_state:
99
+ if not self._state and target_state.get("bookmarks", {}).get(self.name) and target_state.get("summary", {}).get(self.name):
100
+ self.latest_state = target_state
101
+ # If not init sink state latest_state
102
+ if not self.latest_state:
103
+ self.latest_state = self._state or {"bookmarks": {}, "summary": {}}
104
+
105
+ if self.name not in self.latest_state["bookmarks"]:
106
+ if not self.latest_state["bookmarks"].get(self.name):
107
+ self.latest_state["bookmarks"][self.name] = []
108
+
109
+ if not self.summary_init:
110
+ if not self.latest_state.get("summary"):
111
+ self.latest_state["summary"] = {}
112
+ if not self.latest_state["summary"].get(self.name):
113
+ self.latest_state["summary"][self.name] = {"success": 0, "fail": 0, "existing": 0, "updated": 0}
114
+
115
+ self.summary_init = True
116
+
117
+ def error_to_string(self, error: Any):
118
+ return str(error)
119
+
120
+ def process_error_state(self, state: dict):
121
+ # log full error
122
+ self.logger.error(f"Error processing record of type {self.name}: {state.get('error')}")
123
+ # clean error for state
124
+ state["error"] = self.error_to_string(state.get("error"))
125
+ return state
126
+
127
+ def update_state(self, state: dict, is_duplicate=False, record=None):
128
+ if is_duplicate:
129
+ self.logger.info(f"Record of type {self.name} already exists with id: {state.get('id')}")
130
+ self.latest_state["summary"][self.name]["existing"] += 1
131
+
132
+ elif not state.get("success", False):
133
+ self.latest_state["summary"][self.name]["fail"] += 1
134
+ self.process_error_state(state)
135
+ elif state.get("is_updated", False):
136
+ self.latest_state["summary"][self.name]["updated"] += 1
137
+ state.pop("is_updated", None)
138
+ else:
139
+ self.latest_state["summary"][self.name]["success"] += 1
140
+
141
+ # add the mapped record to the state if it exists and env var OUTPUT_MAPPED_RECORD is set to true
142
+ if record and os.getenv("OUTPUT_MAPPED_RECORD", "false").lower() == "true":
143
+ state["mapped_record"] = record
144
+
145
+ self.latest_state["bookmarks"][self.name].append(state)
146
+
147
+ # If "authenticator" exists and if it's an instance of "Authenticator" class,
148
+ # update "self.latest_state" with the the "authenticator" state
149
+ if self.authenticator and isinstance(self.authenticator, Authenticator):
150
+ self.latest_state.update(self.authenticator.state)
151
+
152
+
153
+ class HotglueSink(HotglueBaseSink, RecordSink):
154
+ """Hotglue target sink class."""
155
+ def upsert_record(self, record: dict, context: dict):
156
+ response = self.request_api("POST", request_data=record)
157
+ id = response.json().get("id")
158
+ return id, response.ok, dict()
159
+
160
+ def build_record_hash(self, record: dict):
161
+ return hashlib.sha256(json.dumps(record, cls=HGJSONEncoder).encode()).hexdigest()
162
+
163
+ def get_existing_state(self, hash: str):
164
+ """
165
+ Returns the existing state if it exists
166
+ """
167
+ states = self.latest_state["bookmarks"][self.name]
168
+
169
+ existing_state = next((s for s in states if hash==s.get("hash") and s.get("success")), None)
170
+
171
+ return existing_state
172
+
173
+ @abstractmethod
174
+ def preprocess_record(self, record: dict, context: dict) -> dict:
175
+ raise NotImplementedError()
176
+
177
+ def process_record(self, record: dict, context: dict) -> None:
178
+ """Process the record."""
179
+ if not self.latest_state:
180
+ self.init_state()
181
+
182
+ id = None
183
+ external_id = None
184
+ success = None
185
+ state = {}
186
+ state_updates = dict()
187
+
188
+ try:
189
+ if not self.name in self.allows_externalid and record.get(self._target.EXTERNAL_ID_KEY):
190
+ external_id = record.pop(self._target.EXTERNAL_ID_KEY, None)
191
+
192
+ record = self.preprocess_record(record, context)
193
+
194
+ if record and external_id:
195
+ record[self._target.EXTERNAL_ID_KEY] = external_id
196
+ except Exception as e:
197
+ success = False
198
+ self.logger.exception(f"Preprocess record error {str(e)}")
199
+ state_updates['error'] = str(e)
200
+ if isinstance(e, (InvalidCredentialsError, InvalidPayloadError)):
201
+ state_updates['hg_error_class'] = e.__class__.__name__
202
+
203
+ if success is not False:
204
+
205
+ hash = self.build_record_hash(record)
206
+
207
+ if hash in self.processed_hashes:
208
+ self.logger.info(f"Record of type {self.name} already exists with hash: {hash}")
209
+ return
210
+
211
+ existing_state = self.get_existing_state(hash)
212
+
213
+ if self.name in self.allows_externalid:
214
+ external_id = record.get("externalId")
215
+ else:
216
+ external_id = record.pop("externalId", None)
217
+
218
+ if existing_state:
219
+ return self.update_state(existing_state, is_duplicate=True, record=record)
220
+
221
+ state["hash"] = hash
222
+
223
+
224
+ try:
225
+ id, success, state_updates = self.upsert_record(record, context)
226
+ except Exception as e:
227
+ self.logger.exception(f"Upsert record error {str(e)}")
228
+ state_updates['error'] = str(e)
229
+ if isinstance(e, (InvalidCredentialsError, InvalidPayloadError)):
230
+ state_updates['hg_error_class'] = e.__class__.__name__
231
+
232
+
233
+ if success:
234
+ self.logger.info(f"{self.name} processed id: {id}")
235
+
236
+ state["success"] = success
237
+
238
+ if id:
239
+ state["id"] = id
240
+
241
+ if external_id:
242
+ state["externalId"] = external_id
243
+
244
+ # if is_duplicate is in state_updates, set is_duplicate to True
245
+ is_duplicate = False
246
+ if state_updates.pop("existing", False):
247
+ is_duplicate = True
248
+
249
+ if state_updates and isinstance(state_updates, dict):
250
+ state = dict(state, **state_updates)
251
+
252
+ self.update_state(state, is_duplicate=is_duplicate, record=record)
253
+
254
+
255
+ class HotglueBatchSink(HotglueBaseSink, BatchSink):
256
+ """Hotglue target sink class."""
257
+
258
+ def process_batch_record(self, record: dict, index: int) -> dict:
259
+ return record
260
+
261
+ @abstractmethod
262
+ def make_batch_request(self, records: List[dict]):
263
+ raise NotImplementedError()
264
+
265
+ def handle_batch_response(self, response) -> dict:
266
+ """
267
+ This method should return a dict.
268
+ It's recommended that you return a key named "state_updates".
269
+ This key should be an array of all state updates
270
+ """
271
+ return dict()
272
+
273
+ def process_batch(self, context: dict) -> None:
274
+ if not self.latest_state:
275
+ self.init_state()
276
+
277
+ raw_records = context["records"]
278
+
279
+ records = list(map(lambda e: self.process_batch_record(e[1], e[0]), enumerate(raw_records)))
280
+
281
+ response = self.make_batch_request(records)
282
+
283
+ result = self.handle_batch_response(response)
284
+
285
+ for state in result.get("state_updates", list()):
286
+ self.update_state(state)
@@ -0,0 +1,13 @@
1
+ from datetime import datetime
2
+ from json import JSONEncoder
3
+ from typing import Any
4
+
5
+ class HGJSONEncoder(JSONEncoder):
6
+ def default(self, o: Any) -> Any:
7
+ if isinstance(o, datetime):
8
+ return o.isoformat()
9
+
10
+ try:
11
+ return super().default(o)
12
+ except Exception as e:
13
+ return str(o)
@@ -0,0 +1,121 @@
1
+ import json, pathlib, uuid, subprocess, os
2
+ from logging import Logger
3
+ from typing import Union, Optional
4
+
5
+ class RealTime:
6
+ def __init__(
7
+ self,
8
+ command: str,
9
+ config: dict,
10
+ stream_name: str,
11
+ schema_line: str,
12
+ record_line: str,
13
+ logger: Logger,
14
+ input_path: Optional[str] = None,
15
+ ):
16
+ self.command = command
17
+ self.config = config
18
+ self.stream_name = stream_name
19
+ self.schema_line = schema_line
20
+ self.record_line = record_line
21
+ self.logger = logger
22
+ self.id = str(uuid.uuid4())
23
+ self.config_file_path = f"/tmp/{self.id}.config.json"
24
+ self.singer_file_path = f"/tmp/{self.id}.data.singer" if not input_path else input_path
25
+ self.state_file_path = f"/tmp/{self.id}.state.json"
26
+ os.makedirs("/tmp", exist_ok=True)
27
+
28
+ def _create_singer_file(self):
29
+ if os.path.exists(self.singer_file_path):
30
+ return
31
+
32
+ with open(self.singer_file_path, "w") as f:
33
+ f.writelines([
34
+ self.schema_line + "\n",
35
+ self.record_line
36
+ ])
37
+
38
+ def _create_config_file(self):
39
+ with open(self.config_file_path, "w") as f:
40
+ f.write(json.dumps(self.config))
41
+
42
+ def _delete_singer_file(self):
43
+ pathlib.Path(self.singer_file_path).unlink(missing_ok=True)
44
+
45
+ def _delete_state_file(self):
46
+ pathlib.Path(self.state_file_path).unlink(missing_ok=True)
47
+
48
+ def prepare(self):
49
+ self._create_config_file()
50
+ self._create_singer_file()
51
+
52
+ def run(self):
53
+ command = f"cat {self.singer_file_path} | {self.command} --config {self.config_file_path} > {self.state_file_path}"
54
+ self.logger.info(f"Running command: {command}")
55
+ proc = subprocess.run(
56
+ command,
57
+ shell=True,
58
+ text=True,
59
+ capture_output=True
60
+ )
61
+
62
+ logs = proc.stdout.strip() or proc.stderr.strip()
63
+
64
+ self.logger.info(logs)
65
+
66
+ return {
67
+ "tracebackInLogs": "Traceback" in logs,
68
+ "logs": logs,
69
+ }
70
+
71
+ def get_state(self) -> Union[dict, str]:
72
+ with open(self.state_file_path, "r") as f:
73
+ lines = f.readlines()
74
+ try:
75
+ return json.loads(lines[-1].strip())
76
+ except:
77
+ return "".join(lines)
78
+
79
+ def clean_up(self):
80
+ self._delete_singer_file()
81
+ self._delete_state_file()
82
+
83
+
84
+ def real_time_handler(
85
+ config: dict,
86
+ stream_name: str,
87
+ schema_line: str,
88
+ record_line: str,
89
+ logger: Logger,
90
+ input_path: Optional[str] = None,
91
+ cli_cmd: Optional[str] = None,
92
+ ):
93
+ cli_cmd = cli_cmd or os.environ.get("CLI_CMD")
94
+ if not cli_cmd:
95
+ logger.info(f"Parameter cli_cmd or CLI_CMD env var are not set. This target does not support real time")
96
+ raise Exception("This target does not support real time")
97
+ logger.info(f"Entering \"real_time_handler\": cli_cmd={cli_cmd}, config={config}, stream_name={stream_name}")
98
+ logger.info(f"Schema line: {schema_line}")
99
+ logger.info(f"Record line: {record_line}")
100
+ real_time = RealTime(
101
+ cli_cmd,
102
+ config,
103
+ stream_name,
104
+ schema_line,
105
+ record_line,
106
+ logger,
107
+ input_path,
108
+ )
109
+ logger.info(f"Preparing files...")
110
+ real_time.prepare()
111
+ logger.info(f"Running target...")
112
+ target_metrics = real_time.run()
113
+ logger.info(f"Getting state...")
114
+ state = real_time.get_state()
115
+ logger.info(f"Cleaning up...")
116
+ real_time.clean_up()
117
+ logger.info(f"Done")
118
+ return {
119
+ "state": state,
120
+ "metrics": target_metrics,
121
+ }
@@ -0,0 +1,108 @@
1
+ """Hotglue target sink class, which handles writing streams."""
2
+
3
+ from datetime import datetime
4
+
5
+ import backoff
6
+ import requests
7
+ import json
8
+ from typing import Any, Dict, Optional
9
+ from hotglue_singer_sdk.exceptions import FatalAPIError, RetriableAPIError
10
+ from hotglue_singer_sdk.target_sdk.auth import Authenticator
11
+ from hotglue_singer_sdk.target_sdk.common import HGJSONEncoder
12
+
13
+
14
+ class Rest:
15
+ timeout: int = 300
16
+ http_headers: Dict[str, Any] = {}
17
+ params: Dict[str, Any] = {}
18
+ authenticator: Optional[Authenticator] = None
19
+
20
+ @property
21
+ def default_headers(self):
22
+ headers = self.http_headers
23
+
24
+ if self.authenticator and isinstance(self.authenticator, Authenticator):
25
+ headers.update(self.authenticator.auth_headers)
26
+
27
+ return headers
28
+
29
+ @backoff.on_exception(
30
+ backoff.expo,
31
+ (RetriableAPIError, requests.exceptions.ReadTimeout),
32
+ max_tries=5,
33
+ factor=2,
34
+ )
35
+ def _request(
36
+ self, http_method, endpoint, params={}, request_data=None, headers={}, verify=True
37
+ ) -> requests.PreparedRequest:
38
+ """Prepare a request object."""
39
+ url = self.url(endpoint)
40
+ headers.update(self.default_headers)
41
+ headers.update({"Content-Type": "application/json"})
42
+ params.update(self.params)
43
+ data = (
44
+ json.dumps(request_data, cls=HGJSONEncoder)
45
+ if request_data
46
+ else None
47
+ )
48
+
49
+ response = requests.request(
50
+ method=http_method,
51
+ url=url,
52
+ params=params,
53
+ headers=headers,
54
+ data=data,
55
+ verify=verify
56
+ )
57
+ self.validate_response(response)
58
+ return response
59
+
60
+ def request_api(self, http_method, endpoint=None, params={}, request_data=None, headers={}, verify=True):
61
+ """Request records from REST endpoint(s), returning response records."""
62
+ resp = self._request(http_method, endpoint, params, request_data, headers, verify=verify)
63
+ return resp
64
+
65
+ def validate_response(self, response: requests.Response) -> None:
66
+ """Validate HTTP response."""
67
+ if response.status_code in [429] or 500 <= response.status_code < 600:
68
+ msg = self.response_error_message(response)
69
+ raise RetriableAPIError(msg, response)
70
+ elif 400 <= response.status_code < 500:
71
+ try:
72
+ msg = response.text
73
+ except:
74
+ msg = self.response_error_message(response)
75
+ raise FatalAPIError(msg)
76
+
77
+ def response_error_message(self, response: requests.Response) -> str:
78
+ """Build error message for invalid http statuses."""
79
+ if 400 <= response.status_code < 500:
80
+ error_type = "Client"
81
+ else:
82
+ error_type = "Server"
83
+
84
+ return (
85
+ f"{response.status_code} {error_type} Error: "
86
+ f"{response.reason} for path: {self.endpoint}"
87
+ )
88
+
89
+ @staticmethod
90
+ def clean_dict_items(dict):
91
+ return {k: v for k, v in dict.items() if v not in [None, ""]}
92
+
93
+ def clean_payload(self, item):
94
+ item = self.clean_dict_items(item)
95
+ output = {}
96
+ for k, v in item.items():
97
+ if isinstance(v, datetime):
98
+ dt_str = v.strftime("%Y-%m-%dT%H:%M:%S%z")
99
+ if len(dt_str) > 20:
100
+ output[k] = f"{dt_str[:-2]}:{dt_str[-2:]}"
101
+ else:
102
+ output[k] = dt_str
103
+ elif isinstance(v, dict):
104
+ output[k] = self.clean_payload(v)
105
+ else:
106
+ output[k] = v
107
+ return output
108
+
@@ -0,0 +1,16 @@
1
+ """Hotglue target sink class, which handles writing streams."""
2
+ from abc import ABC, abstractmethod
3
+
4
+ from pydantic import BaseModel
5
+ from hotglue_singer_sdk.target_sdk.client import HotglueSink
6
+
7
+ class ModelSink(HotglueSink):
8
+ """Model target sink class."""
9
+
10
+ @abstractmethod
11
+ def preprocess_record(self, record: dict, context: dict) -> dict:
12
+ pass
13
+
14
+ @abstractmethod
15
+ def process_record(self, record: dict, context: dict) -> None:
16
+ pass