dc-python-sdk 1.5.43__py3-none-any.whl → 1.5.45__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dc-python-sdk
3
- Version: 1.5.43
3
+ Version: 1.5.45
4
4
  Summary: Data Connector Python SDK
5
5
  Home-page: https://github.com/data-connector/dc-python-sdk
6
6
  Author: DataConnector
@@ -1,4 +1,4 @@
1
- dc_python_sdk-1.5.43.dist-info/licenses/LICENSE,sha256=ACwmltkrXIz5VsEQcrqljq-fat6ZXAMepjXGoe40KtE,1069
1
+ dc_python_sdk-1.5.45.dist-info/licenses/LICENSE,sha256=ACwmltkrXIz5VsEQcrqljq-fat6ZXAMepjXGoe40KtE,1069
2
2
  dc_sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  dc_sdk/app.py,sha256=VeGe1q2F71XtZp9VWBUlNowGcF6J-gYsqOu7k2CaeuU,8320
4
4
  dc_sdk/cli.py,sha256=fu4ePzpurYO_URvxyaaUuiptOLqiWiwLoieSbKeni3c,891
@@ -9,22 +9,22 @@ dc_sdk/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  dc_sdk/src/ai.py,sha256=aIKN6TiBKF8ZJYNpcphwWNqtI34Ni36UipxT-AEyQ1I,23321
10
10
  dc_sdk/src/ai_http.py,sha256=9c1X4Sr-2uutTrovHSZVmAacLLwcmFaQ8vA-tOKC0L0,24696
11
11
  dc_sdk/src/mapping.py,sha256=NuXrdE1MuRVRT7ILpc86RrVJq60t6Dl_lnlG1A7Ulks,3653
12
- dc_sdk/src/pipeline.py,sha256=L-C0eKDmSdyZfk3Vqyd6wbC1gCHceH-6xWbfQiLoEiY,21012
12
+ dc_sdk/src/pipeline.py,sha256=nc7sXc-qNHisUQYdONNB7uosrN69ezNzv8fSiQRRNck,27262
13
13
  dc_sdk/src/server.py,sha256=2ZhITJhVcj-jd4a-768lAmmmDjTWATGmgwKD-1OVOVI,3028
14
14
  dc_sdk/src/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  dc_sdk/src/models/enums.py,sha256=UwUH7Bnu9SMyl_sgkfhrgFsuA7rM-eQV-b0PEjhTs_U,981
16
16
  dc_sdk/src/models/errors.py,sha256=uD7EDiQUFUZWjYfowJC4cx6Z0X53UoD1ckh7LD2hcWk,11413
17
17
  dc_sdk/src/models/log_templates.py,sha256=gNmU8c9V-fjiraCMSKi3GfojZ6KjJ4EHQst3-G_9OCE,3111
18
- dc_sdk/src/models/pipeline_details.py,sha256=5xgiFCQyWTwHBfzcg_ALif4gSS-Ou8WNAHhn78s1IpQ,2719
18
+ dc_sdk/src/models/pipeline_details.py,sha256=mZPtPw0X-_dctARCV_i8PJRpHgjl4OMk_bplSNGOltg,2859
19
19
  dc_sdk/src/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
- dc_sdk/src/services/api.py,sha256=8SUl3nETTQ_8fZdUcWp2sALQ9FyX95-OCypfswljnds,7127
21
- dc_sdk/src/services/aws.py,sha256=uiKtHoXmcFxeKgsFr_ao-gdlh4fAo1TxtCkhfbpe9JI,9414
20
+ dc_sdk/src/services/api.py,sha256=mXX_ELmHWEUgWpYoEzR010LnXXCyB-RrbTmkMnw9o58,7302
21
+ dc_sdk/src/services/aws.py,sha256=zYAbTAjQ4bAO_YLfPYCVhdNz9SC_nCy4vAm8HdteHwY,9411
22
22
  dc_sdk/src/services/environment.py,sha256=lKOuDyzFbjdriPVeVy1GGeuZjutzhwUsZCQcF84XyHM,6447
23
23
  dc_sdk/src/services/loader.py,sha256=0zlQm4HOPKffN9Q6YlMv73fCMBsrwkEBnka5rEJTvqU,1175
24
24
  dc_sdk/src/services/logger.py,sha256=GrQGfgYfrzmPYFRHkctDSg54jvN7ObveETTPezYldXs,2055
25
25
  dc_sdk/src/services/session.py,sha256=SpaZ_qnxMbEZO5fxsbjw6WZXVP5JpncnWalgLqB3MYo,423
26
- dc_python_sdk-1.5.43.dist-info/METADATA,sha256=ON-ZyZ9xpuKYoBHpGHb_57U67IdKrcmW3tDnhyRUkU0,15980
27
- dc_python_sdk-1.5.43.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
28
- dc_python_sdk-1.5.43.dist-info/entry_points.txt,sha256=HSIKPlg7qer7Atu8oOKVpptahi_ZxqfJE-Bh2bmVSss,43
29
- dc_python_sdk-1.5.43.dist-info/top_level.txt,sha256=gD_FZnF6mZfFU5EmdB2knYHDHKfH5kXJMtSgEZga3BI,7
30
- dc_python_sdk-1.5.43.dist-info/RECORD,,
26
+ dc_python_sdk-1.5.45.dist-info/METADATA,sha256=syZWEl2LAbT7SEs1eMhok9MhIGvle5FL8pWslPDAVA0,15980
27
+ dc_python_sdk-1.5.45.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
28
+ dc_python_sdk-1.5.45.dist-info/entry_points.txt,sha256=HSIKPlg7qer7Atu8oOKVpptahi_ZxqfJE-Bh2bmVSss,43
29
+ dc_python_sdk-1.5.45.dist-info/top_level.txt,sha256=gD_FZnF6mZfFU5EmdB2knYHDHKfH5kXJMtSgEZga3BI,7
30
+ dc_python_sdk-1.5.45.dist-info/RECORD,,
@@ -32,9 +32,11 @@ class PipelineDetails:
32
32
  self.destination_ecs_task_version_nbr = row_data['destination_ecs_task_version_nbr']
33
33
  self.options = json.loads(row_data['pipeline_object_options_json']) if 'pipeline_object_options_json' in row_data and row_data['pipeline_object_options_json'] else dict()
34
34
  self.max_allowed_retrieval = row_data.get('max_allowed_retrieval')
35
- self.primary_key_column_nm = row_data.get('primary_key_column_nm')
36
35
  self.destination_credential_information = row_data.get('destination_credential_information')
37
36
  self.source_credential_information = row_data.get('source_credential_information')
37
+ self.primary_key_column_nm = row_data.get('primary_key_column_nm')
38
+ self.updated_date_column_nm = row_data.get('updated_date_column_nm')
39
+ self.sync_cursor_dsc = row_data.get('sync_cursor_dsc')
38
40
 
39
41
  def increment_stage(self):
40
42
  self.stage += 1
dc_sdk/src/pipeline.py CHANGED
@@ -1,4 +1,4 @@
1
- import json, io, math, re
1
+ import json, io, math, re, inspect
2
2
  import time
3
3
  from .services.environment import PipelineEnvironment
4
4
  from .services.api import DataConnectorAPI
@@ -35,6 +35,7 @@ class PipelineConductor:
35
35
  self.mapping = kwargs.get("mapping")
36
36
  self.pipeline_object_id = kwargs.get("pipeline_mapping_id")
37
37
  self.successful_keys = []
38
+ self.extracted_sync_cursor = None
38
39
  self.config = PipelineEnvironment
39
40
  self.api = api or DataConnectorAPI()
40
41
  self.aws = aws or AwsService(PipelineEnvironment.aws_s3_bucket)
@@ -46,7 +47,7 @@ class PipelineConductor:
46
47
 
47
48
  # get connector credentials
48
49
  Connector = load_connector()
49
- self.connector = Connector(self.credentials)
50
+ self.connector = self._create_connector(Connector, self.credentials)
50
51
 
51
52
  self.row_count = 0
52
53
  self.log_templates = self._get_log_messages()
@@ -131,6 +132,7 @@ class PipelineConductor:
131
132
 
132
133
  while "next_page" in results and results["next_page"] != None:
133
134
  if "data" in results and results["data"] != None and results["data"] != []:
135
+ self._update_extracted_sync_cursor(results.get("metadata"))
134
136
  limit_reached = self._process_rows(results["data"], max_allowed)
135
137
  if limit_reached:
136
138
  break
@@ -145,9 +147,15 @@ class PipelineConductor:
145
147
  results = self.connector.get_data(self.pipeline_details.source_object_id, self._get_field_ids(), n_rows=nrows, filters=self._get_filters(), options=self.pipeline_details.options, next_page=results["next_page"])
146
148
 
147
149
  if "data" in results and results["data"] != None and results["data"] != []:
150
+ self._update_extracted_sync_cursor(results.get("metadata"))
148
151
  self._process_rows(results["data"], max_allowed)
149
152
  elif results["data"] != []:
150
153
  self.internal_log(self.log_templates.INTERNAL_GET_DATA_FETCHED.format(0))
154
+ else:
155
+ self._update_extracted_sync_cursor(results.get("metadata"))
156
+
157
+ if self._should_track_sync_cursor():
158
+ self._upload_extracted_sync_cursor()
151
159
 
152
160
  self.log(self.log_templates.GET_DATA_FINISH.format(self.row_count, self.pipeline_details.source_object_id))
153
161
 
@@ -163,7 +171,7 @@ class PipelineConductor:
163
171
 
164
172
  if not keys:
165
173
  # Call load_data with empty data when there are no keys
166
- loaded = self.connector.load_data([], self.pipeline_details.destination_object_id, self._get_mapping(), self.pipeline_details.update_method_cd, 0, 1)
174
+ loaded = self._call_connector_load_data([], self.pipeline_details.destination_object_id, self._get_mapping(), self.pipeline_details.update_method_cd, 0, 1)
167
175
  if not loaded:
168
176
  raise errors.LoadDataError("Loading data failed.")
169
177
  else:
@@ -177,7 +185,7 @@ class PipelineConductor:
177
185
 
178
186
  data = json.load(file_object)
179
187
  self.log(self.log_templates.LOAD_DATA_LOADED.format(len(data), self.pipeline_details.destination_object_id))
180
- loaded = self.connector.load_data(data, self.pipeline_details.destination_object_id, self._get_mapping(), self.pipeline_details.update_method_cd, index, len(keys))
188
+ loaded = self._call_connector_load_data(data, self.pipeline_details.destination_object_id, self._get_mapping(), self.pipeline_details.update_method_cd, index, len(keys))
181
189
  if loaded:
182
190
  self.row_count += len(data)
183
191
  if self.mode == "prod":
@@ -189,6 +197,159 @@ class PipelineConductor:
189
197
  else:
190
198
  raise errors.LoadDataError("Loading data failed.")
191
199
  self.log(self.log_templates.LOAD_DATA_FINISHED.format(self.row_count, self.pipeline_details.destination_object_id))
200
+ self._persist_sync_cursor_after_load()
201
+
202
+ def _create_connector(self, connector_cls, credentials):
203
+ init_params = inspect.signature(connector_cls.__init__).parameters
204
+ if "pipeline_context" in init_params:
205
+ return connector_cls(credentials, pipeline_context=self._get_pipeline_context())
206
+ return connector_cls(credentials)
207
+
208
+ def _normalize_update_method(self, update_method):
209
+ if update_method is None:
210
+ return None
211
+ if isinstance(update_method, int):
212
+ return update_method
213
+ if isinstance(update_method, str) and update_method.isdigit():
214
+ return int(update_method)
215
+ return update_method
216
+
217
+ def _get_sync_cursor(self):
218
+ return getattr(self.pipeline_details, "sync_cursor_dsc", None)
219
+
220
+ def _get_pipeline_context(self):
221
+ if self.task != "SOURCE":
222
+ return None
223
+
224
+ update_method = self._normalize_update_method(
225
+ getattr(self.pipeline_details, "update_method_cd", None)
226
+ )
227
+
228
+ return {
229
+ "update_method": update_method,
230
+ "update_date_column_nm": getattr(
231
+ self.pipeline_details, "updated_date_column_nm", None
232
+ ),
233
+ "sync_cursor": self._get_sync_cursor(),
234
+ }
235
+
236
+ def _should_track_sync_cursor(self):
237
+ if self.task != "SOURCE":
238
+ return False
239
+
240
+ update_method = self._normalize_update_method(
241
+ getattr(self.pipeline_details, "update_method_cd", None)
242
+ )
243
+ updated_date_column_nm = getattr(
244
+ self.pipeline_details, "updated_date_column_nm", None
245
+ )
246
+ return update_method == 3 and updated_date_column_nm
247
+
248
+ def _update_extracted_sync_cursor(self, metadata):
249
+ if not self._should_track_sync_cursor() or not metadata:
250
+ return
251
+
252
+ sync_cursor = metadata.get("sync_cursor")
253
+ if sync_cursor is None:
254
+ return
255
+
256
+ if self.extracted_sync_cursor is None or sync_cursor > self.extracted_sync_cursor:
257
+ self.extracted_sync_cursor = sync_cursor
258
+
259
+ def _get_sync_cursor_key_name(self):
260
+ return f"transfers/e{self.pipeline_run_history_id}-sync_cursor.json"
261
+
262
+ def _upload_extracted_sync_cursor(self):
263
+ if self.extracted_sync_cursor is None:
264
+ return
265
+
266
+ payload = json.dumps({"sync_cursor": self.extracted_sync_cursor})
267
+ json_buffer = io.StringIO(payload)
268
+ key_name = self._get_sync_cursor_key_name()
269
+
270
+ if PipelineEnvironment.platform == "aws":
271
+ self.aws.upload_object(key_name, json_buffer=json_buffer)
272
+ elif PipelineEnvironment.platform == "azure":
273
+ key_name = f"{PipelineEnvironment.app_env}/{key_name}"
274
+ self.azure.upload_object(key_name, json_buffer=json_buffer)
275
+
276
+ self.internal_log(f"Uploaded sync cursor sidecar to {key_name}")
277
+
278
+ def _load_extracted_sync_cursor(self):
279
+ key_name = self._get_sync_cursor_key_name()
280
+
281
+ try:
282
+ if PipelineEnvironment.platform == "aws":
283
+ file_object = self.aws.download_object(key_name)
284
+ elif PipelineEnvironment.platform == "azure":
285
+ file_object = self.azure.download_object(
286
+ f"{PipelineEnvironment.app_env}/{key_name}"
287
+ )
288
+ else:
289
+ return None
290
+
291
+ payload = json.load(file_object)
292
+ return payload.get("sync_cursor")
293
+ except Exception as e:
294
+ self.internal_log(f"No sync cursor sidecar found: {e}")
295
+ return None
296
+
297
+ def _persist_sync_cursor_after_load(self):
298
+ if self.task != "DESTINATION":
299
+ return
300
+
301
+ sync_cursor = self._load_extracted_sync_cursor()
302
+ if sync_cursor is None:
303
+ return
304
+
305
+ if self.mode == "prod":
306
+ self.api.save_sync_cursor(self.pipeline_id, sync_cursor)
307
+
308
+ if PipelineEnvironment.platform == "aws":
309
+ self.aws.delete_object(self._get_sync_cursor_key_name())
310
+ elif PipelineEnvironment.platform == "azure":
311
+ self.azure.delete_object(
312
+ f"{PipelineEnvironment.app_env}/{self._get_sync_cursor_key_name()}"
313
+ )
314
+
315
+ def _call_connector_load_data(
316
+ self,
317
+ data,
318
+ object_id,
319
+ mapping,
320
+ update_method,
321
+ batch_number,
322
+ total_batches
323
+ ):
324
+ options = getattr(self.pipeline_details, "options", None) or {}
325
+
326
+ # Put primary key into options for upsert/incremental loads
327
+ primary_key_column_nm = getattr(self.pipeline_details, "primary_key_column_nm", None)
328
+ if primary_key_column_nm:
329
+ options["primary_key_column_nm"] = primary_key_column_nm
330
+
331
+ sig = inspect.signature(self.connector.load_data)
332
+
333
+ if "options" in sig.parameters:
334
+ return self.connector.load_data(
335
+ data,
336
+ object_id,
337
+ mapping,
338
+ update_method,
339
+ batch_number,
340
+ total_batches,
341
+ options=options
342
+ )
343
+
344
+ return self.connector.load_data(
345
+ data,
346
+ object_id,
347
+ mapping,
348
+ update_method,
349
+ batch_number,
350
+ total_batches
351
+ )
352
+
192
353
 
193
354
  def start_next_connector(self):
194
355
  self.internal_log(self.log_templates.INTERNAL_START_NEXT_CONNECTOR.format(self.pipeline_details.destination_connector_nm, self.pipeline_details.destination_object_id))
@@ -302,13 +463,30 @@ class PipelineConductor:
302
463
  return limit_reached
303
464
 
304
465
  def _get_credentials(self):
305
- encyption_txt = self.pipeline_details.source_encryption_credential_txt if self.task == "SOURCE" else self.pipeline_details.destination_encryption_credential_txt
466
+ is_source = self.task == "SOURCE"
306
467
 
307
- if not encyption_txt:
468
+ encryption_txt = (
469
+ self.pipeline_details.source_encryption_credential_txt
470
+ if is_source
471
+ else self.pipeline_details.destination_encryption_credential_txt
472
+ )
473
+
474
+ if not encryption_txt:
308
475
  return None
309
476
 
310
- # Set Connector Credentials
311
- return self.aws.decrypt_customer_data_object(encyption_txt, self.pipeline_details.customer_metadata_uuid)
477
+ credential_information = (
478
+ self.pipeline_details.source_credential_information
479
+ if is_source
480
+ else self.pipeline_details.destination_credential_information
481
+ ) or {}
482
+
483
+ return self.aws.decrypt_customer_data_object(
484
+ encrypted_data=encryption_txt,
485
+ customer_id=self.pipeline_details.customer_metadata_uuid,
486
+ encrypted_data_key_txt=credential_information.get("encrypted_data_key_txt"),
487
+ encryption_iv_txt=credential_information.get("encryption_iv_txt"),
488
+ encryption_auth_tag_txt=credential_information.get("encryption_auth_tag_txt"),
489
+ )
312
490
 
313
491
  def _get_pipeline_details(self):
314
492
  return self.api.get_pipeline_details(str(self.pipeline_id), self.task, str(self.pipeline_run_history_id), pipeline_mapping_id=self.pipeline_object_id)
@@ -113,6 +113,12 @@ class DataConnectorAPI:
113
113
 
114
114
  self.post(f"{pipeline_id}/mapping", body)
115
115
 
116
+ def save_sync_cursor(self, pipeline_id, sync_cursor):
117
+ self.put(
118
+ f"{pipeline_id}/sync-cursor",
119
+ {"sync_cursor_dsc": sync_cursor},
120
+ )
121
+
116
122
  def _request(self, method, endpoint, body=None, max_attempts=4):
117
123
  endpoint = endpoint.lstrip("/")
118
124
  url = f"{self.pipelines_base_url}/{endpoint}"
@@ -164,9 +164,6 @@ class AwsService:
164
164
  )
165
165
 
166
166
 
167
-
168
-
169
-
170
167
  class EncryptionService:
171
168
  def decrypt_customer_data(
172
169
  self,