dc-python-sdk 1.5.44__py3-none-any.whl → 1.5.45__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dc-python-sdk
3
- Version: 1.5.44
3
+ Version: 1.5.45
4
4
  Summary: Data Connector Python SDK
5
5
  Home-page: https://github.com/data-connector/dc-python-sdk
6
6
  Author: DataConnector
@@ -1,4 +1,4 @@
1
- dc_python_sdk-1.5.44.dist-info/licenses/LICENSE,sha256=ACwmltkrXIz5VsEQcrqljq-fat6ZXAMepjXGoe40KtE,1069
1
+ dc_python_sdk-1.5.45.dist-info/licenses/LICENSE,sha256=ACwmltkrXIz5VsEQcrqljq-fat6ZXAMepjXGoe40KtE,1069
2
2
  dc_sdk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  dc_sdk/app.py,sha256=VeGe1q2F71XtZp9VWBUlNowGcF6J-gYsqOu7k2CaeuU,8320
4
4
  dc_sdk/cli.py,sha256=fu4ePzpurYO_URvxyaaUuiptOLqiWiwLoieSbKeni3c,891
@@ -9,22 +9,22 @@ dc_sdk/src/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
9
  dc_sdk/src/ai.py,sha256=aIKN6TiBKF8ZJYNpcphwWNqtI34Ni36UipxT-AEyQ1I,23321
10
10
  dc_sdk/src/ai_http.py,sha256=9c1X4Sr-2uutTrovHSZVmAacLLwcmFaQ8vA-tOKC0L0,24696
11
11
  dc_sdk/src/mapping.py,sha256=NuXrdE1MuRVRT7ILpc86RrVJq60t6Dl_lnlG1A7Ulks,3653
12
- dc_sdk/src/pipeline.py,sha256=WDlAuQsSob6Bcv7BLBEEIWd5XEmouBkI_2I-6rng4Bk,22678
12
+ dc_sdk/src/pipeline.py,sha256=nc7sXc-qNHisUQYdONNB7uosrN69ezNzv8fSiQRRNck,27262
13
13
  dc_sdk/src/server.py,sha256=2ZhITJhVcj-jd4a-768lAmmmDjTWATGmgwKD-1OVOVI,3028
14
14
  dc_sdk/src/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
15
  dc_sdk/src/models/enums.py,sha256=UwUH7Bnu9SMyl_sgkfhrgFsuA7rM-eQV-b0PEjhTs_U,981
16
16
  dc_sdk/src/models/errors.py,sha256=uD7EDiQUFUZWjYfowJC4cx6Z0X53UoD1ckh7LD2hcWk,11413
17
17
  dc_sdk/src/models/log_templates.py,sha256=gNmU8c9V-fjiraCMSKi3GfojZ6KjJ4EHQst3-G_9OCE,3111
18
- dc_sdk/src/models/pipeline_details.py,sha256=5xgiFCQyWTwHBfzcg_ALif4gSS-Ou8WNAHhn78s1IpQ,2719
18
+ dc_sdk/src/models/pipeline_details.py,sha256=mZPtPw0X-_dctARCV_i8PJRpHgjl4OMk_bplSNGOltg,2859
19
19
  dc_sdk/src/services/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
- dc_sdk/src/services/api.py,sha256=8SUl3nETTQ_8fZdUcWp2sALQ9FyX95-OCypfswljnds,7127
21
- dc_sdk/src/services/aws.py,sha256=uiKtHoXmcFxeKgsFr_ao-gdlh4fAo1TxtCkhfbpe9JI,9414
20
+ dc_sdk/src/services/api.py,sha256=mXX_ELmHWEUgWpYoEzR010LnXXCyB-RrbTmkMnw9o58,7302
21
+ dc_sdk/src/services/aws.py,sha256=zYAbTAjQ4bAO_YLfPYCVhdNz9SC_nCy4vAm8HdteHwY,9411
22
22
  dc_sdk/src/services/environment.py,sha256=lKOuDyzFbjdriPVeVy1GGeuZjutzhwUsZCQcF84XyHM,6447
23
23
  dc_sdk/src/services/loader.py,sha256=0zlQm4HOPKffN9Q6YlMv73fCMBsrwkEBnka5rEJTvqU,1175
24
24
  dc_sdk/src/services/logger.py,sha256=GrQGfgYfrzmPYFRHkctDSg54jvN7ObveETTPezYldXs,2055
25
25
  dc_sdk/src/services/session.py,sha256=SpaZ_qnxMbEZO5fxsbjw6WZXVP5JpncnWalgLqB3MYo,423
26
- dc_python_sdk-1.5.44.dist-info/METADATA,sha256=Dv6GWgPyYpeC4mwXaI-GWKBdBHhLJ68R6Q7Q1mSl_BY,15980
27
- dc_python_sdk-1.5.44.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
28
- dc_python_sdk-1.5.44.dist-info/entry_points.txt,sha256=HSIKPlg7qer7Atu8oOKVpptahi_ZxqfJE-Bh2bmVSss,43
29
- dc_python_sdk-1.5.44.dist-info/top_level.txt,sha256=gD_FZnF6mZfFU5EmdB2knYHDHKfH5kXJMtSgEZga3BI,7
30
- dc_python_sdk-1.5.44.dist-info/RECORD,,
26
+ dc_python_sdk-1.5.45.dist-info/METADATA,sha256=syZWEl2LAbT7SEs1eMhok9MhIGvle5FL8pWslPDAVA0,15980
27
+ dc_python_sdk-1.5.45.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
28
+ dc_python_sdk-1.5.45.dist-info/entry_points.txt,sha256=HSIKPlg7qer7Atu8oOKVpptahi_ZxqfJE-Bh2bmVSss,43
29
+ dc_python_sdk-1.5.45.dist-info/top_level.txt,sha256=gD_FZnF6mZfFU5EmdB2knYHDHKfH5kXJMtSgEZga3BI,7
30
+ dc_python_sdk-1.5.45.dist-info/RECORD,,
@@ -32,9 +32,11 @@ class PipelineDetails:
32
32
  self.destination_ecs_task_version_nbr = row_data['destination_ecs_task_version_nbr']
33
33
  self.options = json.loads(row_data['pipeline_object_options_json']) if 'pipeline_object_options_json' in row_data and row_data['pipeline_object_options_json'] else dict()
34
34
  self.max_allowed_retrieval = row_data.get('max_allowed_retrieval')
35
- self.primary_key_column_nm = row_data.get('primary_key_column_nm')
36
35
  self.destination_credential_information = row_data.get('destination_credential_information')
37
36
  self.source_credential_information = row_data.get('source_credential_information')
37
+ self.primary_key_column_nm = row_data.get('primary_key_column_nm')
38
+ self.updated_date_column_nm = row_data.get('updated_date_column_nm')
39
+ self.sync_cursor_dsc = row_data.get('sync_cursor_dsc')
38
40
 
39
41
  def increment_stage(self):
40
42
  self.stage += 1
dc_sdk/src/pipeline.py CHANGED
@@ -35,6 +35,7 @@ class PipelineConductor:
35
35
  self.mapping = kwargs.get("mapping")
36
36
  self.pipeline_object_id = kwargs.get("pipeline_mapping_id")
37
37
  self.successful_keys = []
38
+ self.extracted_sync_cursor = None
38
39
  self.config = PipelineEnvironment
39
40
  self.api = api or DataConnectorAPI()
40
41
  self.aws = aws or AwsService(PipelineEnvironment.aws_s3_bucket)
@@ -46,7 +47,7 @@ class PipelineConductor:
46
47
 
47
48
  # get connector credentials
48
49
  Connector = load_connector()
49
- self.connector = Connector(self.credentials)
50
+ self.connector = self._create_connector(Connector, self.credentials)
50
51
 
51
52
  self.row_count = 0
52
53
  self.log_templates = self._get_log_messages()
@@ -131,6 +132,7 @@ class PipelineConductor:
131
132
 
132
133
  while "next_page" in results and results["next_page"] != None:
133
134
  if "data" in results and results["data"] != None and results["data"] != []:
135
+ self._update_extracted_sync_cursor(results.get("metadata"))
134
136
  limit_reached = self._process_rows(results["data"], max_allowed)
135
137
  if limit_reached:
136
138
  break
@@ -145,9 +147,15 @@ class PipelineConductor:
145
147
  results = self.connector.get_data(self.pipeline_details.source_object_id, self._get_field_ids(), n_rows=nrows, filters=self._get_filters(), options=self.pipeline_details.options, next_page=results["next_page"])
146
148
 
147
149
  if "data" in results and results["data"] != None and results["data"] != []:
150
+ self._update_extracted_sync_cursor(results.get("metadata"))
148
151
  self._process_rows(results["data"], max_allowed)
149
152
  elif results["data"] != []:
150
153
  self.internal_log(self.log_templates.INTERNAL_GET_DATA_FETCHED.format(0))
154
+ else:
155
+ self._update_extracted_sync_cursor(results.get("metadata"))
156
+
157
+ if self._should_track_sync_cursor():
158
+ self._upload_extracted_sync_cursor()
151
159
 
152
160
  self.log(self.log_templates.GET_DATA_FINISH.format(self.row_count, self.pipeline_details.source_object_id))
153
161
 
@@ -189,6 +197,120 @@ class PipelineConductor:
189
197
  else:
190
198
  raise errors.LoadDataError("Loading data failed.")
191
199
  self.log(self.log_templates.LOAD_DATA_FINISHED.format(self.row_count, self.pipeline_details.destination_object_id))
200
+ self._persist_sync_cursor_after_load()
201
+
202
+ def _create_connector(self, connector_cls, credentials):
203
+ init_params = inspect.signature(connector_cls.__init__).parameters
204
+ if "pipeline_context" in init_params:
205
+ return connector_cls(credentials, pipeline_context=self._get_pipeline_context())
206
+ return connector_cls(credentials)
207
+
208
+ def _normalize_update_method(self, update_method):
209
+ if update_method is None:
210
+ return None
211
+ if isinstance(update_method, int):
212
+ return update_method
213
+ if isinstance(update_method, str) and update_method.isdigit():
214
+ return int(update_method)
215
+ return update_method
216
+
217
+ def _get_sync_cursor(self):
218
+ return getattr(self.pipeline_details, "sync_cursor_dsc", None)
219
+
220
+ def _get_pipeline_context(self):
221
+ if self.task != "SOURCE":
222
+ return None
223
+
224
+ update_method = self._normalize_update_method(
225
+ getattr(self.pipeline_details, "update_method_cd", None)
226
+ )
227
+
228
+ return {
229
+ "update_method": update_method,
230
+ "update_date_column_nm": getattr(
231
+ self.pipeline_details, "updated_date_column_nm", None
232
+ ),
233
+ "sync_cursor": self._get_sync_cursor(),
234
+ }
235
+
236
+ def _should_track_sync_cursor(self):
237
+ if self.task != "SOURCE":
238
+ return False
239
+
240
+ update_method = self._normalize_update_method(
241
+ getattr(self.pipeline_details, "update_method_cd", None)
242
+ )
243
+ updated_date_column_nm = getattr(
244
+ self.pipeline_details, "updated_date_column_nm", None
245
+ )
246
+ return update_method == 3 and updated_date_column_nm
247
+
248
+ def _update_extracted_sync_cursor(self, metadata):
249
+ if not self._should_track_sync_cursor() or not metadata:
250
+ return
251
+
252
+ sync_cursor = metadata.get("sync_cursor")
253
+ if sync_cursor is None:
254
+ return
255
+
256
+ if self.extracted_sync_cursor is None or sync_cursor > self.extracted_sync_cursor:
257
+ self.extracted_sync_cursor = sync_cursor
258
+
259
+ def _get_sync_cursor_key_name(self):
260
+ return f"transfers/e{self.pipeline_run_history_id}-sync_cursor.json"
261
+
262
+ def _upload_extracted_sync_cursor(self):
263
+ if self.extracted_sync_cursor is None:
264
+ return
265
+
266
+ payload = json.dumps({"sync_cursor": self.extracted_sync_cursor})
267
+ json_buffer = io.StringIO(payload)
268
+ key_name = self._get_sync_cursor_key_name()
269
+
270
+ if PipelineEnvironment.platform == "aws":
271
+ self.aws.upload_object(key_name, json_buffer=json_buffer)
272
+ elif PipelineEnvironment.platform == "azure":
273
+ key_name = f"{PipelineEnvironment.app_env}/{key_name}"
274
+ self.azure.upload_object(key_name, json_buffer=json_buffer)
275
+
276
+ self.internal_log(f"Uploaded sync cursor sidecar to {key_name}")
277
+
278
+ def _load_extracted_sync_cursor(self):
279
+ key_name = self._get_sync_cursor_key_name()
280
+
281
+ try:
282
+ if PipelineEnvironment.platform == "aws":
283
+ file_object = self.aws.download_object(key_name)
284
+ elif PipelineEnvironment.platform == "azure":
285
+ file_object = self.azure.download_object(
286
+ f"{PipelineEnvironment.app_env}/{key_name}"
287
+ )
288
+ else:
289
+ return None
290
+
291
+ payload = json.load(file_object)
292
+ return payload.get("sync_cursor")
293
+ except Exception as e:
294
+ self.internal_log(f"No sync cursor sidecar found: {e}")
295
+ return None
296
+
297
+ def _persist_sync_cursor_after_load(self):
298
+ if self.task != "DESTINATION":
299
+ return
300
+
301
+ sync_cursor = self._load_extracted_sync_cursor()
302
+ if sync_cursor is None:
303
+ return
304
+
305
+ if self.mode == "prod":
306
+ self.api.save_sync_cursor(self.pipeline_id, sync_cursor)
307
+
308
+ if PipelineEnvironment.platform == "aws":
309
+ self.aws.delete_object(self._get_sync_cursor_key_name())
310
+ elif PipelineEnvironment.platform == "azure":
311
+ self.azure.delete_object(
312
+ f"{PipelineEnvironment.app_env}/{self._get_sync_cursor_key_name()}"
313
+ )
192
314
 
193
315
  def _call_connector_load_data(
194
316
  self,
@@ -113,6 +113,12 @@ class DataConnectorAPI:
113
113
 
114
114
  self.post(f"{pipeline_id}/mapping", body)
115
115
 
116
+ def save_sync_cursor(self, pipeline_id, sync_cursor):
117
+ self.put(
118
+ f"{pipeline_id}/sync-cursor",
119
+ {"sync_cursor_dsc": sync_cursor},
120
+ )
121
+
116
122
  def _request(self, method, endpoint, body=None, max_attempts=4):
117
123
  endpoint = endpoint.lstrip("/")
118
124
  url = f"{self.pipelines_base_url}/{endpoint}"
@@ -164,9 +164,6 @@ class AwsService:
164
164
  )
165
165
 
166
166
 
167
-
168
-
169
-
170
167
  class EncryptionService:
171
168
  def decrypt_customer_data(
172
169
  self,