dc-python-sdk 1.5.43__tar.gz → 1.5.45__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dc_python_sdk-1.5.43/src/dc_python_sdk.egg-info → dc_python_sdk-1.5.45}/PKG-INFO +1 -1
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/pyproject.toml +6 -2
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/setup.cfg +1 -1
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45/src/dc_python_sdk.egg-info}/PKG-INFO +1 -1
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/src/dc_sdk/src/models/pipeline_details.py +3 -1
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/src/dc_sdk/src/pipeline.py +186 -8
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/src/dc_sdk/src/services/api.py +6 -0
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/src/dc_sdk/src/services/aws.py +0 -3
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/LICENSE +0 -0
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/README.md +0 -0
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/src/dc_python_sdk.egg-info/SOURCES.txt +0 -0
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/src/dc_python_sdk.egg-info/dependency_links.txt +0 -0
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/src/dc_python_sdk.egg-info/entry_points.txt +0 -0
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/src/dc_python_sdk.egg-info/requires.txt +0 -0
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/src/dc_python_sdk.egg-info/top_level.txt +0 -0
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/src/dc_sdk/__init__.py +0 -0
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/src/dc_sdk/app.py +0 -0
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/src/dc_sdk/cli.py +0 -0
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/src/dc_sdk/errors.py +0 -0
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/src/dc_sdk/handler.py +0 -0
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/src/dc_sdk/src/__init__.py +0 -0
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/src/dc_sdk/src/ai.py +0 -0
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/src/dc_sdk/src/ai_http.py +0 -0
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/src/dc_sdk/src/mapping.py +0 -0
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/src/dc_sdk/src/models/__init__.py +0 -0
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/src/dc_sdk/src/models/enums.py +0 -0
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/src/dc_sdk/src/models/errors.py +0 -0
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/src/dc_sdk/src/models/log_templates.py +0 -0
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/src/dc_sdk/src/server.py +0 -0
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/src/dc_sdk/src/services/__init__.py +0 -0
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/src/dc_sdk/src/services/environment.py +0 -0
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/src/dc_sdk/src/services/loader.py +0 -0
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/src/dc_sdk/src/services/logger.py +0 -0
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/src/dc_sdk/src/services/session.py +0 -0
- {dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/src/dc_sdk/types.py +0 -0
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "dc-python-sdk"
|
|
7
|
-
version = "1.5.
|
|
7
|
+
version = "1.5.45"
|
|
8
8
|
description = "Data Connector Python SDK"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.6"
|
|
@@ -45,4 +45,8 @@ ai = [
|
|
|
45
45
|
dc-sdk = "dc_sdk.cli:main"
|
|
46
46
|
|
|
47
47
|
[tool.setuptools.packages.find]
|
|
48
|
-
where = ["src"]
|
|
48
|
+
where = ["src"]
|
|
49
|
+
|
|
50
|
+
[tool.pytest.ini_options]
|
|
51
|
+
pythonpath = ["src", "tests"]
|
|
52
|
+
testpaths = ["tests"]
|
|
@@ -32,9 +32,11 @@ class PipelineDetails:
|
|
|
32
32
|
self.destination_ecs_task_version_nbr = row_data['destination_ecs_task_version_nbr']
|
|
33
33
|
self.options = json.loads(row_data['pipeline_object_options_json']) if 'pipeline_object_options_json' in row_data and row_data['pipeline_object_options_json'] else dict()
|
|
34
34
|
self.max_allowed_retrieval = row_data.get('max_allowed_retrieval')
|
|
35
|
-
self.primary_key_column_nm = row_data.get('primary_key_column_nm')
|
|
36
35
|
self.destination_credential_information = row_data.get('destination_credential_information')
|
|
37
36
|
self.source_credential_information = row_data.get('source_credential_information')
|
|
37
|
+
self.primary_key_column_nm = row_data.get('primary_key_column_nm')
|
|
38
|
+
self.updated_date_column_nm = row_data.get('updated_date_column_nm')
|
|
39
|
+
self.sync_cursor_dsc = row_data.get('sync_cursor_dsc')
|
|
38
40
|
|
|
39
41
|
def increment_stage(self):
|
|
40
42
|
self.stage += 1
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import json, io, math, re
|
|
1
|
+
import json, io, math, re, inspect
|
|
2
2
|
import time
|
|
3
3
|
from .services.environment import PipelineEnvironment
|
|
4
4
|
from .services.api import DataConnectorAPI
|
|
@@ -35,6 +35,7 @@ class PipelineConductor:
|
|
|
35
35
|
self.mapping = kwargs.get("mapping")
|
|
36
36
|
self.pipeline_object_id = kwargs.get("pipeline_mapping_id")
|
|
37
37
|
self.successful_keys = []
|
|
38
|
+
self.extracted_sync_cursor = None
|
|
38
39
|
self.config = PipelineEnvironment
|
|
39
40
|
self.api = api or DataConnectorAPI()
|
|
40
41
|
self.aws = aws or AwsService(PipelineEnvironment.aws_s3_bucket)
|
|
@@ -46,7 +47,7 @@ class PipelineConductor:
|
|
|
46
47
|
|
|
47
48
|
# get connector credentials
|
|
48
49
|
Connector = load_connector()
|
|
49
|
-
self.connector = Connector
|
|
50
|
+
self.connector = self._create_connector(Connector, self.credentials)
|
|
50
51
|
|
|
51
52
|
self.row_count = 0
|
|
52
53
|
self.log_templates = self._get_log_messages()
|
|
@@ -131,6 +132,7 @@ class PipelineConductor:
|
|
|
131
132
|
|
|
132
133
|
while "next_page" in results and results["next_page"] != None:
|
|
133
134
|
if "data" in results and results["data"] != None and results["data"] != []:
|
|
135
|
+
self._update_extracted_sync_cursor(results.get("metadata"))
|
|
134
136
|
limit_reached = self._process_rows(results["data"], max_allowed)
|
|
135
137
|
if limit_reached:
|
|
136
138
|
break
|
|
@@ -145,9 +147,15 @@ class PipelineConductor:
|
|
|
145
147
|
results = self.connector.get_data(self.pipeline_details.source_object_id, self._get_field_ids(), n_rows=nrows, filters=self._get_filters(), options=self.pipeline_details.options, next_page=results["next_page"])
|
|
146
148
|
|
|
147
149
|
if "data" in results and results["data"] != None and results["data"] != []:
|
|
150
|
+
self._update_extracted_sync_cursor(results.get("metadata"))
|
|
148
151
|
self._process_rows(results["data"], max_allowed)
|
|
149
152
|
elif results["data"] != []:
|
|
150
153
|
self.internal_log(self.log_templates.INTERNAL_GET_DATA_FETCHED.format(0))
|
|
154
|
+
else:
|
|
155
|
+
self._update_extracted_sync_cursor(results.get("metadata"))
|
|
156
|
+
|
|
157
|
+
if self._should_track_sync_cursor():
|
|
158
|
+
self._upload_extracted_sync_cursor()
|
|
151
159
|
|
|
152
160
|
self.log(self.log_templates.GET_DATA_FINISH.format(self.row_count, self.pipeline_details.source_object_id))
|
|
153
161
|
|
|
@@ -163,7 +171,7 @@ class PipelineConductor:
|
|
|
163
171
|
|
|
164
172
|
if not keys:
|
|
165
173
|
# Call load_data with empty data when there are no keys
|
|
166
|
-
loaded = self.
|
|
174
|
+
loaded = self._call_connector_load_data([], self.pipeline_details.destination_object_id, self._get_mapping(), self.pipeline_details.update_method_cd, 0, 1)
|
|
167
175
|
if not loaded:
|
|
168
176
|
raise errors.LoadDataError("Loading data failed.")
|
|
169
177
|
else:
|
|
@@ -177,7 +185,7 @@ class PipelineConductor:
|
|
|
177
185
|
|
|
178
186
|
data = json.load(file_object)
|
|
179
187
|
self.log(self.log_templates.LOAD_DATA_LOADED.format(len(data), self.pipeline_details.destination_object_id))
|
|
180
|
-
loaded = self.
|
|
188
|
+
loaded = self._call_connector_load_data(data, self.pipeline_details.destination_object_id, self._get_mapping(), self.pipeline_details.update_method_cd, index, len(keys))
|
|
181
189
|
if loaded:
|
|
182
190
|
self.row_count += len(data)
|
|
183
191
|
if self.mode == "prod":
|
|
@@ -189,6 +197,159 @@ class PipelineConductor:
|
|
|
189
197
|
else:
|
|
190
198
|
raise errors.LoadDataError("Loading data failed.")
|
|
191
199
|
self.log(self.log_templates.LOAD_DATA_FINISHED.format(self.row_count, self.pipeline_details.destination_object_id))
|
|
200
|
+
self._persist_sync_cursor_after_load()
|
|
201
|
+
|
|
202
|
+
def _create_connector(self, connector_cls, credentials):
|
|
203
|
+
init_params = inspect.signature(connector_cls.__init__).parameters
|
|
204
|
+
if "pipeline_context" in init_params:
|
|
205
|
+
return connector_cls(credentials, pipeline_context=self._get_pipeline_context())
|
|
206
|
+
return connector_cls(credentials)
|
|
207
|
+
|
|
208
|
+
def _normalize_update_method(self, update_method):
|
|
209
|
+
if update_method is None:
|
|
210
|
+
return None
|
|
211
|
+
if isinstance(update_method, int):
|
|
212
|
+
return update_method
|
|
213
|
+
if isinstance(update_method, str) and update_method.isdigit():
|
|
214
|
+
return int(update_method)
|
|
215
|
+
return update_method
|
|
216
|
+
|
|
217
|
+
def _get_sync_cursor(self):
|
|
218
|
+
return getattr(self.pipeline_details, "sync_cursor_dsc", None)
|
|
219
|
+
|
|
220
|
+
def _get_pipeline_context(self):
|
|
221
|
+
if self.task != "SOURCE":
|
|
222
|
+
return None
|
|
223
|
+
|
|
224
|
+
update_method = self._normalize_update_method(
|
|
225
|
+
getattr(self.pipeline_details, "update_method_cd", None)
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
return {
|
|
229
|
+
"update_method": update_method,
|
|
230
|
+
"update_date_column_nm": getattr(
|
|
231
|
+
self.pipeline_details, "updated_date_column_nm", None
|
|
232
|
+
),
|
|
233
|
+
"sync_cursor": self._get_sync_cursor(),
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
def _should_track_sync_cursor(self):
|
|
237
|
+
if self.task != "SOURCE":
|
|
238
|
+
return False
|
|
239
|
+
|
|
240
|
+
update_method = self._normalize_update_method(
|
|
241
|
+
getattr(self.pipeline_details, "update_method_cd", None)
|
|
242
|
+
)
|
|
243
|
+
updated_date_column_nm = getattr(
|
|
244
|
+
self.pipeline_details, "updated_date_column_nm", None
|
|
245
|
+
)
|
|
246
|
+
return update_method == 3 and updated_date_column_nm
|
|
247
|
+
|
|
248
|
+
def _update_extracted_sync_cursor(self, metadata):
|
|
249
|
+
if not self._should_track_sync_cursor() or not metadata:
|
|
250
|
+
return
|
|
251
|
+
|
|
252
|
+
sync_cursor = metadata.get("sync_cursor")
|
|
253
|
+
if sync_cursor is None:
|
|
254
|
+
return
|
|
255
|
+
|
|
256
|
+
if self.extracted_sync_cursor is None or sync_cursor > self.extracted_sync_cursor:
|
|
257
|
+
self.extracted_sync_cursor = sync_cursor
|
|
258
|
+
|
|
259
|
+
def _get_sync_cursor_key_name(self):
|
|
260
|
+
return f"transfers/e{self.pipeline_run_history_id}-sync_cursor.json"
|
|
261
|
+
|
|
262
|
+
def _upload_extracted_sync_cursor(self):
|
|
263
|
+
if self.extracted_sync_cursor is None:
|
|
264
|
+
return
|
|
265
|
+
|
|
266
|
+
payload = json.dumps({"sync_cursor": self.extracted_sync_cursor})
|
|
267
|
+
json_buffer = io.StringIO(payload)
|
|
268
|
+
key_name = self._get_sync_cursor_key_name()
|
|
269
|
+
|
|
270
|
+
if PipelineEnvironment.platform == "aws":
|
|
271
|
+
self.aws.upload_object(key_name, json_buffer=json_buffer)
|
|
272
|
+
elif PipelineEnvironment.platform == "azure":
|
|
273
|
+
key_name = f"{PipelineEnvironment.app_env}/{key_name}"
|
|
274
|
+
self.azure.upload_object(key_name, json_buffer=json_buffer)
|
|
275
|
+
|
|
276
|
+
self.internal_log(f"Uploaded sync cursor sidecar to {key_name}")
|
|
277
|
+
|
|
278
|
+
def _load_extracted_sync_cursor(self):
|
|
279
|
+
key_name = self._get_sync_cursor_key_name()
|
|
280
|
+
|
|
281
|
+
try:
|
|
282
|
+
if PipelineEnvironment.platform == "aws":
|
|
283
|
+
file_object = self.aws.download_object(key_name)
|
|
284
|
+
elif PipelineEnvironment.platform == "azure":
|
|
285
|
+
file_object = self.azure.download_object(
|
|
286
|
+
f"{PipelineEnvironment.app_env}/{key_name}"
|
|
287
|
+
)
|
|
288
|
+
else:
|
|
289
|
+
return None
|
|
290
|
+
|
|
291
|
+
payload = json.load(file_object)
|
|
292
|
+
return payload.get("sync_cursor")
|
|
293
|
+
except Exception as e:
|
|
294
|
+
self.internal_log(f"No sync cursor sidecar found: {e}")
|
|
295
|
+
return None
|
|
296
|
+
|
|
297
|
+
def _persist_sync_cursor_after_load(self):
|
|
298
|
+
if self.task != "DESTINATION":
|
|
299
|
+
return
|
|
300
|
+
|
|
301
|
+
sync_cursor = self._load_extracted_sync_cursor()
|
|
302
|
+
if sync_cursor is None:
|
|
303
|
+
return
|
|
304
|
+
|
|
305
|
+
if self.mode == "prod":
|
|
306
|
+
self.api.save_sync_cursor(self.pipeline_id, sync_cursor)
|
|
307
|
+
|
|
308
|
+
if PipelineEnvironment.platform == "aws":
|
|
309
|
+
self.aws.delete_object(self._get_sync_cursor_key_name())
|
|
310
|
+
elif PipelineEnvironment.platform == "azure":
|
|
311
|
+
self.azure.delete_object(
|
|
312
|
+
f"{PipelineEnvironment.app_env}/{self._get_sync_cursor_key_name()}"
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
def _call_connector_load_data(
|
|
316
|
+
self,
|
|
317
|
+
data,
|
|
318
|
+
object_id,
|
|
319
|
+
mapping,
|
|
320
|
+
update_method,
|
|
321
|
+
batch_number,
|
|
322
|
+
total_batches
|
|
323
|
+
):
|
|
324
|
+
options = getattr(self.pipeline_details, "options", None) or {}
|
|
325
|
+
|
|
326
|
+
# Put primary key into options for upsert/incremental loads
|
|
327
|
+
primary_key_column_nm = getattr(self.pipeline_details, "primary_key_column_nm", None)
|
|
328
|
+
if primary_key_column_nm:
|
|
329
|
+
options["primary_key_column_nm"] = primary_key_column_nm
|
|
330
|
+
|
|
331
|
+
sig = inspect.signature(self.connector.load_data)
|
|
332
|
+
|
|
333
|
+
if "options" in sig.parameters:
|
|
334
|
+
return self.connector.load_data(
|
|
335
|
+
data,
|
|
336
|
+
object_id,
|
|
337
|
+
mapping,
|
|
338
|
+
update_method,
|
|
339
|
+
batch_number,
|
|
340
|
+
total_batches,
|
|
341
|
+
options=options
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
return self.connector.load_data(
|
|
345
|
+
data,
|
|
346
|
+
object_id,
|
|
347
|
+
mapping,
|
|
348
|
+
update_method,
|
|
349
|
+
batch_number,
|
|
350
|
+
total_batches
|
|
351
|
+
)
|
|
352
|
+
|
|
192
353
|
|
|
193
354
|
def start_next_connector(self):
|
|
194
355
|
self.internal_log(self.log_templates.INTERNAL_START_NEXT_CONNECTOR.format(self.pipeline_details.destination_connector_nm, self.pipeline_details.destination_object_id))
|
|
@@ -302,13 +463,30 @@ class PipelineConductor:
|
|
|
302
463
|
return limit_reached
|
|
303
464
|
|
|
304
465
|
def _get_credentials(self):
|
|
305
|
-
|
|
466
|
+
is_source = self.task == "SOURCE"
|
|
306
467
|
|
|
307
|
-
|
|
468
|
+
encryption_txt = (
|
|
469
|
+
self.pipeline_details.source_encryption_credential_txt
|
|
470
|
+
if is_source
|
|
471
|
+
else self.pipeline_details.destination_encryption_credential_txt
|
|
472
|
+
)
|
|
473
|
+
|
|
474
|
+
if not encryption_txt:
|
|
308
475
|
return None
|
|
309
476
|
|
|
310
|
-
|
|
311
|
-
|
|
477
|
+
credential_information = (
|
|
478
|
+
self.pipeline_details.source_credential_information
|
|
479
|
+
if is_source
|
|
480
|
+
else self.pipeline_details.destination_credential_information
|
|
481
|
+
) or {}
|
|
482
|
+
|
|
483
|
+
return self.aws.decrypt_customer_data_object(
|
|
484
|
+
encrypted_data=encryption_txt,
|
|
485
|
+
customer_id=self.pipeline_details.customer_metadata_uuid,
|
|
486
|
+
encrypted_data_key_txt=credential_information.get("encrypted_data_key_txt"),
|
|
487
|
+
encryption_iv_txt=credential_information.get("encryption_iv_txt"),
|
|
488
|
+
encryption_auth_tag_txt=credential_information.get("encryption_auth_tag_txt"),
|
|
489
|
+
)
|
|
312
490
|
|
|
313
491
|
def _get_pipeline_details(self):
|
|
314
492
|
return self.api.get_pipeline_details(str(self.pipeline_id), self.task, str(self.pipeline_run_history_id), pipeline_mapping_id=self.pipeline_object_id)
|
|
@@ -113,6 +113,12 @@ class DataConnectorAPI:
|
|
|
113
113
|
|
|
114
114
|
self.post(f"{pipeline_id}/mapping", body)
|
|
115
115
|
|
|
116
|
+
def save_sync_cursor(self, pipeline_id, sync_cursor):
|
|
117
|
+
self.put(
|
|
118
|
+
f"{pipeline_id}/sync-cursor",
|
|
119
|
+
{"sync_cursor_dsc": sync_cursor},
|
|
120
|
+
)
|
|
121
|
+
|
|
116
122
|
def _request(self, method, endpoint, body=None, max_attempts=4):
|
|
117
123
|
endpoint = endpoint.lstrip("/")
|
|
118
124
|
url = f"{self.pipelines_base_url}/{endpoint}"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dc_python_sdk-1.5.43 → dc_python_sdk-1.5.45}/src/dc_python_sdk.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|