dc-python-sdk 1.5.27__tar.gz → 1.5.29__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dc_python_sdk-1.5.27/src/dc_python_sdk.egg-info → dc_python_sdk-1.5.29}/PKG-INFO +1 -1
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/pyproject.toml +1 -1
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/setup.cfg +1 -1
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29/src/dc_python_sdk.egg-info}/PKG-INFO +1 -1
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_sdk/app.py +6 -1
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_sdk/src/ai_http.py +38 -6
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_sdk/src/models/pipeline_details.py +1 -0
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_sdk/src/pipeline.py +16 -23
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_sdk/src/services/api.py +6 -3
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_sdk/src/services/environment.py +1 -0
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/LICENSE +0 -0
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/README.md +0 -0
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_python_sdk.egg-info/SOURCES.txt +0 -0
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_python_sdk.egg-info/dependency_links.txt +0 -0
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_python_sdk.egg-info/entry_points.txt +0 -0
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_python_sdk.egg-info/requires.txt +0 -0
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_python_sdk.egg-info/top_level.txt +0 -0
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_sdk/__init__.py +0 -0
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_sdk/cli.py +0 -0
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_sdk/errors.py +0 -0
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_sdk/handler.py +0 -0
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_sdk/src/__init__.py +0 -0
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_sdk/src/ai.py +0 -0
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_sdk/src/mapping.py +0 -0
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_sdk/src/models/__init__.py +0 -0
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_sdk/src/models/enums.py +0 -0
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_sdk/src/models/errors.py +0 -0
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_sdk/src/models/log_templates.py +0 -0
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_sdk/src/server.py +0 -0
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_sdk/src/services/__init__.py +0 -0
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_sdk/src/services/aws.py +0 -0
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_sdk/src/services/loader.py +0 -0
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_sdk/src/services/logger.py +0 -0
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_sdk/src/services/session.py +0 -0
- {dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_sdk/types.py +0 -0
|
@@ -26,7 +26,7 @@ def run_pipeline():
|
|
|
26
26
|
pipeline_run_history_id = api.create_new_history(PipelineEnvironment.pipeline_id)
|
|
27
27
|
PipelineEnvironment.set_pipeline_run_history_id(pipeline_run_history_id)
|
|
28
28
|
|
|
29
|
-
pipeline_conductor = PipelineConductor(PipelineEnvironment.task, pipeline_id=PipelineEnvironment.pipeline_id, pipeline_run_history_id=PipelineEnvironment.pipeline_run_history_id)
|
|
29
|
+
pipeline_conductor = PipelineConductor(PipelineEnvironment.task, pipeline_id=PipelineEnvironment.pipeline_id, pipeline_run_history_id=PipelineEnvironment.pipeline_run_history_id, pipeline_mapping_id=PipelineEnvironment.pipeline_mapping_id if PipelineEnvironment.pipeline_mapping_id else None)
|
|
30
30
|
pipeline_conductor.internal_log(pipeline_conductor.log_templates.INTERNAL_CONNECTOR_START.format(TASK.title(), pipeline_conductor.pipeline_details.connector_nm))
|
|
31
31
|
except Exception as e:
|
|
32
32
|
error_trace = traceback.format_exc()
|
|
@@ -65,6 +65,11 @@ def run_pipeline():
|
|
|
65
65
|
if TASK == TasksEnum.SOURCE.value:
|
|
66
66
|
pipeline_conductor.authenticate_source()
|
|
67
67
|
pipeline_conductor.pipeline_details.increment_stage()
|
|
68
|
+
|
|
69
|
+
if not pipeline_conductor.pipeline_details.pipeline_mapping_json:
|
|
70
|
+
pipeline_conductor.configure_fields()
|
|
71
|
+
|
|
72
|
+
pipeline_conductor.pipeline_details.increment_stage()
|
|
68
73
|
pipeline_conductor.get_data()
|
|
69
74
|
|
|
70
75
|
pipeline_conductor.pipeline_details.increment_stage()
|
|
@@ -17,6 +17,44 @@ app = FastAPI()
|
|
|
17
17
|
|
|
18
18
|
_DEFAULT_PROCESS_TTL_SECONDS = 600.0
|
|
19
19
|
|
|
20
|
+
METHOD_CONTEXT = {
|
|
21
|
+
"authenticate": "Authenticate using self.credentials. Return True when authentication is successful, otherwise raise a meaningful error.",
|
|
22
|
+
"get_objects": "Return a list of object dictionaries with keys: object_id, object_name, object_label, and optional object_group/visible.",
|
|
23
|
+
"get_fields": "Return a list of field dictionaries for an object_id with keys: field_id, field_name, field_label, and optional metadata like data_type/size.",
|
|
24
|
+
"get_data": "Return paginated data in shape {'next_page': <token_or_none>, 'data': [row_dict, ...]}. Respect object_id, field_ids, n_rows, filters, next_page.",
|
|
25
|
+
"load_data": "Load mapped destination rows into object_id using mapping m and update_method. Return True on success or raise a specific load/write error.",
|
|
26
|
+
"close": "Close open sessions/connections safely. Should not fail when resources were never initialized.",
|
|
27
|
+
"FOLLOWUP": "Refine output quality based on user follow-up feedback after a method succeeds.",
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
REQUIRED_METHODS = [
|
|
31
|
+
"authenticate",
|
|
32
|
+
"get_objects",
|
|
33
|
+
"get_fields",
|
|
34
|
+
"get_data",
|
|
35
|
+
"load_data",
|
|
36
|
+
"close",
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
METHOD_SHAPE_REFERENCE = """
|
|
41
|
+
Reference behavior patterns (from production connectors like PostgreSQL):
|
|
42
|
+
- authenticate(self) -> bool
|
|
43
|
+
- validates credentials and initializes client/connection state.
|
|
44
|
+
- get_objects(self) -> list[dict]
|
|
45
|
+
- each object has: object_id, object_name, object_label, optional object_group.
|
|
46
|
+
- if we need to hardcode the objects, we should not do it here. Instead return an empty list with a comment that says objects pulled from API.
|
|
47
|
+
- get_fields(self, object_id, options=dict()) -> list[dict]
|
|
48
|
+
- each field has: field_id, field_name, field_label, optional data_type/size.
|
|
49
|
+
- get_data(self, object_id, field_ids, n_rows=None, filters=None, next_page=None, options=dict()) -> dict
|
|
50
|
+
- returns {"next_page": token_or_none, "data": [row_dict, ...]}.
|
|
51
|
+
- load_data(self, data, object_id, m, update_method, batch_number: int, total_batches: int) -> bool
|
|
52
|
+
- maps/transforms rows and writes data to destination object.
|
|
53
|
+
- close(self) -> None
|
|
54
|
+
- gracefully closes resources (session/connection/tunnel/etc).
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
|
|
20
58
|
# Default ports: connector HTTP (dc-sdk http) 5000, AI FastAPI 5001, code-server 5002.
|
|
21
59
|
CONNECTOR_PORT = os.getenv("CONNECTOR_PORT", 5000)
|
|
22
60
|
AI_PORT = os.getenv("AI_PORT", 5001)
|
|
@@ -325,12 +363,6 @@ def _build_full_fix_prompt(
|
|
|
325
363
|
stage: Optional[str],
|
|
326
364
|
connector_context: ConnectorContext,
|
|
327
365
|
) -> str:
|
|
328
|
-
from dc_sdk.src.ai import (
|
|
329
|
-
METHOD_CONTEXT,
|
|
330
|
-
METHOD_SHAPE_REFERENCE,
|
|
331
|
-
REQUIRED_METHODS,
|
|
332
|
-
)
|
|
333
|
-
|
|
334
366
|
extra_block = ""
|
|
335
367
|
if connector_context.notes:
|
|
336
368
|
extra_block = f"\nAdditional connector details:\n{connector_context.notes}\n"
|
|
@@ -11,6 +11,7 @@ class PipelineDetails:
|
|
|
11
11
|
self.destination_object_id = row_data['destination_object_id']
|
|
12
12
|
self.pipeline_mapping_json = row_data['pipeline_mapping_json']
|
|
13
13
|
self.update_method_cd = row_data['update_method_cd']
|
|
14
|
+
self.primary_key_column_nm = row_data['primary_key_column_nm']
|
|
14
15
|
self.source_connector_id = row_data['source_connector_id']
|
|
15
16
|
self.source_connector_nm = row_data['source_connector_nm']
|
|
16
17
|
self.destination_connector_id = row_data['destination_connector_id']
|
|
@@ -33,13 +33,14 @@ class PipelineConductor:
|
|
|
33
33
|
self.prefix = kwargs.get("prefix")
|
|
34
34
|
self.update_method = kwargs.get("update_method")
|
|
35
35
|
self.mapping = kwargs.get("mapping")
|
|
36
|
+
self.pipeline_object_id = kwargs.get("pipeline_mapping_id")
|
|
36
37
|
self.successful_keys = []
|
|
37
38
|
self.config = PipelineEnvironment
|
|
38
39
|
self.api = api or DataConnectorAPI()
|
|
39
40
|
self.aws = aws or AwsService(PipelineEnvironment.aws_s3_bucket)
|
|
40
41
|
|
|
41
42
|
# Set Pipeline Details
|
|
42
|
-
self.pipeline_details = self._get_pipeline_details()
|
|
43
|
+
self.pipeline_details = self._get_pipeline_details()
|
|
43
44
|
|
|
44
45
|
self.credentials = self._get_credentials() if mode == "prod" else kwargs.get('credentials')
|
|
45
46
|
|
|
@@ -202,10 +203,22 @@ class PipelineConductor:
|
|
|
202
203
|
if unhandled:
|
|
203
204
|
print("An unrecognized issue has occurred on our side. Our team will be in contact within 24-48 hours, or try emailing support@dataconnector.com.")
|
|
204
205
|
|
|
205
|
-
|
|
206
206
|
def update_history(self, payload):
|
|
207
207
|
self.api.put("{0}/history".format(self.pipeline_run_history_id), payload)
|
|
208
208
|
|
|
209
|
+
def configure_fields(self):
|
|
210
|
+
# get fields from connector
|
|
211
|
+
fields = self.connector.get_fields(self.pipeline_details.source_object_id)
|
|
212
|
+
|
|
213
|
+
if not fields:
|
|
214
|
+
raise errors.NoFieldsFoundError(self.pipeline_details.source_object_id)
|
|
215
|
+
|
|
216
|
+
mapping = [ { "column": field["field_id"], "mapped": field["field_id"] } for field in fields ]
|
|
217
|
+
|
|
218
|
+
self.pipeline_details.pipeline_mapping_json = json.dumps(mapping)
|
|
219
|
+
|
|
220
|
+
self.api.create_pipeline_mapping(self.pipeline_id, self.pipeline_details.pipeline_mapping_json)
|
|
221
|
+
|
|
209
222
|
def _process_rows(self, rows, max_allowed=None):
|
|
210
223
|
# Check if we have a limit and need to truncate rows
|
|
211
224
|
limit_reached = False
|
|
@@ -269,27 +282,7 @@ class PipelineConductor:
|
|
|
269
282
|
return self.aws.decrypt_customer_data_object(encyption_txt, self.pipeline_details.customer_metadata_uuid)
|
|
270
283
|
|
|
271
284
|
def _get_pipeline_details(self):
|
|
272
|
-
return self.api.get_pipeline_details(str(self.pipeline_id), self.task, str(self.
|
|
273
|
-
|
|
274
|
-
def _get_pipeline_details_dev(self):
|
|
275
|
-
class PipelineDetail:
|
|
276
|
-
def __init__(self, object_id, filters, mapping, update_method) -> None:
|
|
277
|
-
self.source_object_id = object_id
|
|
278
|
-
self.destination_object_id = object_id
|
|
279
|
-
self.pipeline_mapping_json = mapping
|
|
280
|
-
self.update_method_cd = update_method
|
|
281
|
-
self.source_credential_nm = "Test"
|
|
282
|
-
self.destination_credential_nm = "Test"
|
|
283
|
-
# TODO: UPDATE OPTIONS
|
|
284
|
-
self.options = None
|
|
285
|
-
# self.filtered_column_nm = filters['filtered_column_nm'] if 'filtered_'
|
|
286
|
-
# self.start_selection_nm = filters['start_selection_nm']
|
|
287
|
-
# self.start_value_txt =filters['start_value_txt']
|
|
288
|
-
# self.end_selection_nm = filters['end_selection_nm']
|
|
289
|
-
# self.end_value_txt = filters['end_value_txt']
|
|
290
|
-
# self.timezone_offset_nbr = filters['timezone_offset_nbr']
|
|
291
|
-
|
|
292
|
-
return PipelineDetail(self.object_id, self.filters, self.mapping, self.update_method)
|
|
285
|
+
return self.api.get_pipeline_details(str(self.pipeline_id), self.task, str(self.pipeline_run_histoy_id), pipeline_mapping_id=self.pipeline_mapping_id)
|
|
293
286
|
|
|
294
287
|
def _get_batch_row_count(self):
|
|
295
288
|
results = self.connector.get_data(
|
|
@@ -45,13 +45,16 @@ class DataConnectorAPI:
|
|
|
45
45
|
|
|
46
46
|
self.post('logv2', payload)
|
|
47
47
|
|
|
48
|
-
def get_pipeline_details(self, pipeline_id: str, task, pipeline_run_history_id: str):
|
|
49
|
-
|
|
48
|
+
def get_pipeline_details(self, pipeline_id: str, task, pipeline_run_history_id: str, pipeline_mapping_id: str = None):
|
|
49
|
+
url = f"{pipeline_id}"
|
|
50
|
+
if pipeline_object_id:
|
|
51
|
+
url = f"{url}?PipelineMappingID={pipeline_mapping_id}"
|
|
52
|
+
json = self.get(url)
|
|
50
53
|
|
|
51
54
|
return PipelineDetails(json, task, pipeline_id, pipeline_run_history_id)
|
|
52
55
|
|
|
53
56
|
def create_new_history(self, pipeline_id):
|
|
54
|
-
response = self.post(f"{pipeline_id}/history",
|
|
57
|
+
response = self.post(f"{pipeline_id}/history", {"PipelineMappingID": self.pipeline_mapping_id})
|
|
55
58
|
|
|
56
59
|
return response['PipelineRunHistoryID']
|
|
57
60
|
|
|
@@ -27,6 +27,7 @@ class PipelineEnvironment:
|
|
|
27
27
|
task_id: ClassVar[str] = None
|
|
28
28
|
source_endpoint: ClassVar[str] = os.getenv("SOURCE_ENDPOINT") or "http://localhost:5000"
|
|
29
29
|
destination_endpoint: ClassVar[str] = os.getenv("DESTINATION_ENDPOINT") or "http://localhost:5001"
|
|
30
|
+
pipeline_mapping_id: ClassVar[str] = os.getenv("PIPELINE_MAPPING_ID") or ""
|
|
30
31
|
|
|
31
32
|
@staticmethod
|
|
32
33
|
def validate_environment():
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_python_sdk.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|