PyPI - dc-python-sdk - Versions diffs - 1.5.27__tar.gz → 1.5.29__tar.gz - Mend

dc-python-sdk 1.5.27tar.gz → 1.5.29tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

{dc_python_sdk-1.5.27/src/dc_python_sdk.egg-info → dc_python_sdk-1.5.29}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dc-python-sdk
-Version: 1.5.27
+Version: 1.5.29
 Summary: Data Connector Python SDK
 Home-page: https://github.com/data-connector/dc-python-sdk
 Author: DataConnector

{dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "dc-python-sdk"
-version = "1.5.27"
+version = "1.5.29"
 description = "Data Connector Python SDK"
 readme = "README.md"
 requires-python = ">=3.6"

{dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/setup.cfg RENAMED Viewed

@@ -1,6 +1,6 @@
 [metadata]
 name = dc-python-sdk
-version = 1.5.27
+version = 1.5.29
 author = DataConnector
 author_email = josh@dataconnector.com
 description = A small example package

{dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29/src/dc_python_sdk.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dc-python-sdk
-Version: 1.5.27
+Version: 1.5.29
 Summary: Data Connector Python SDK
 Home-page: https://github.com/data-connector/dc-python-sdk
 Author: DataConnector

{dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_sdk/app.py RENAMED Viewed

@@ -26,7 +26,7 @@ def run_pipeline():
             pipeline_run_history_id = api.create_new_history(PipelineEnvironment.pipeline_id)
             PipelineEnvironment.set_pipeline_run_history_id(pipeline_run_history_id)
-        pipeline_conductor = PipelineConductor(PipelineEnvironment.task, pipeline_id=PipelineEnvironment.pipeline_id, pipeline_run_history_id=PipelineEnvironment.pipeline_run_history_id)
+        pipeline_conductor = PipelineConductor(PipelineEnvironment.task, pipeline_id=PipelineEnvironment.pipeline_id, pipeline_run_history_id=PipelineEnvironment.pipeline_run_history_id, pipeline_mapping_id=PipelineEnvironment.pipeline_mapping_id if PipelineEnvironment.pipeline_mapping_id else None)
         pipeline_conductor.internal_log(pipeline_conductor.log_templates.INTERNAL_CONNECTOR_START.format(TASK.title(), pipeline_conductor.pipeline_details.connector_nm))
     except Exception as e:
         error_trace = traceback.format_exc()
@@ -65,6 +65,11 @@ def run_pipeline():
         if TASK == TasksEnum.SOURCE.value:
             pipeline_conductor.authenticate_source()
             pipeline_conductor.pipeline_details.increment_stage()
+            if not pipeline_conductor.pipeline_details.pipeline_mapping_json:
+                pipeline_conductor.configure_fields()
+            pipeline_conductor.pipeline_details.increment_stage()
             pipeline_conductor.get_data()
             pipeline_conductor.pipeline_details.increment_stage()

{dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_sdk/src/ai_http.py RENAMED Viewed

@@ -17,6 +17,44 @@ app = FastAPI()
 _DEFAULT_PROCESS_TTL_SECONDS = 600.0
+METHOD_CONTEXT = {
+    "authenticate": "Authenticate using self.credentials. Return True when authentication is successful, otherwise raise a meaningful error.",
+    "get_objects": "Return a list of object dictionaries with keys: object_id, object_name, object_label, and optional object_group/visible.",
+    "get_fields": "Return a list of field dictionaries for an object_id with keys: field_id, field_name, field_label, and optional metadata like data_type/size.",
+    "get_data": "Return paginated data in shape {'next_page': <token_or_none>, 'data': [row_dict, ...]}. Respect object_id, field_ids, n_rows, filters, next_page.",
+    "load_data": "Load mapped destination rows into object_id using mapping m and update_method. Return True on success or raise a specific load/write error.",
+    "close": "Close open sessions/connections safely. Should not fail when resources were never initialized.",
+    "FOLLOWUP": "Refine output quality based on user follow-up feedback after a method succeeds.",
+}
+REQUIRED_METHODS = [
+    "authenticate",
+    "get_objects",
+    "get_fields",
+    "get_data",
+    "load_data",
+    "close",
+]
+METHOD_SHAPE_REFERENCE = """
+Reference behavior patterns (from production connectors like PostgreSQL):
+- authenticate(self) -> bool
+  - validates credentials and initializes client/connection state.
+- get_objects(self) -> list[dict]
+  - each object has: object_id, object_name, object_label, optional object_group.
+  - if we need to hardcode the objects, we should not do it here. Instead return an empty list with a comment that says objects pulled from API.
+- get_fields(self, object_id, options=dict()) -> list[dict]
+  - each field has: field_id, field_name, field_label, optional data_type/size.
+- get_data(self, object_id, field_ids, n_rows=None, filters=None, next_page=None, options=dict()) -> dict
+  - returns {"next_page": token_or_none, "data": [row_dict, ...]}.
+- load_data(self, data, object_id, m, update_method, batch_number: int, total_batches: int) -> bool
+  - maps/transforms rows and writes data to destination object.
+- close(self) -> None
+  - gracefully closes resources (session/connection/tunnel/etc).
+"""
 # Default ports: connector HTTP (dc-sdk http) 5000, AI FastAPI 5001, code-server 5002.
 CONNECTOR_PORT = os.getenv("CONNECTOR_PORT", 5000)
 AI_PORT = os.getenv("AI_PORT", 5001)
@@ -325,12 +363,6 @@ def _build_full_fix_prompt(
     stage: Optional[str],
     connector_context: ConnectorContext,
 ) -> str:
-    from dc_sdk.src.ai import (
-        METHOD_CONTEXT,
-        METHOD_SHAPE_REFERENCE,
-        REQUIRED_METHODS,
-    )
     extra_block = ""
     if connector_context.notes:
         extra_block = f"\nAdditional connector details:\n{connector_context.notes}\n"

{dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_sdk/src/models/pipeline_details.py RENAMED Viewed

@@ -11,6 +11,7 @@ class PipelineDetails:
         self.destination_object_id = row_data['destination_object_id']
         self.pipeline_mapping_json = row_data['pipeline_mapping_json']
         self.update_method_cd = row_data['update_method_cd']
+        self.primary_key_column_nm = row_data['primary_key_column_nm']
         self.source_connector_id = row_data['source_connector_id']
         self.source_connector_nm = row_data['source_connector_nm']
         self.destination_connector_id = row_data['destination_connector_id']

{dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_sdk/src/pipeline.py RENAMED Viewed

@@ -33,13 +33,14 @@ class PipelineConductor:
         self.prefix = kwargs.get("prefix")
         self.update_method = kwargs.get("update_method")
         self.mapping = kwargs.get("mapping")
+        self.pipeline_object_id = kwargs.get("pipeline_mapping_id")
         self.successful_keys = []
         self.config = PipelineEnvironment
         self.api = api or DataConnectorAPI()
         self.aws = aws or AwsService(PipelineEnvironment.aws_s3_bucket)
         # Set Pipeline Details
-        self.pipeline_details = self._get_pipeline_details() if mode == "prod" else self._get_pipeline_details_dev()
+        self.pipeline_details = self._get_pipeline_details()
         self.credentials = self._get_credentials() if mode == "prod" else kwargs.get('credentials')
@@ -202,10 +203,22 @@ class PipelineConductor:
             if unhandled:
                 print("An unrecognized issue has occurred on our side. Our team will be in contact within 24-48 hours, or try emailing support@dataconnector.com.")
     def update_history(self, payload):
         self.api.put("{0}/history".format(self.pipeline_run_history_id), payload)
+    def configure_fields(self):
+        # get fields from connector
+        fields = self.connector.get_fields(self.pipeline_details.source_object_id)
+        if not fields:
+            raise errors.NoFieldsFoundError(self.pipeline_details.source_object_id)
+        mapping = [ { "column": field["field_id"], "mapped": field["field_id"] } for field in fields ]
+        self.pipeline_details.pipeline_mapping_json = json.dumps(mapping)
+        self.api.create_pipeline_mapping(self.pipeline_id, self.pipeline_details.pipeline_mapping_json)
     def _process_rows(self, rows, max_allowed=None):
         # Check if we have a limit and need to truncate rows
         limit_reached = False
@@ -269,27 +282,7 @@ class PipelineConductor:
         return self.aws.decrypt_customer_data_object(encyption_txt, self.pipeline_details.customer_metadata_uuid)
     def _get_pipeline_details(self):
-        return self.api.get_pipeline_details(str(self.pipeline_id), self.task, str(self.pipeline_run_history_id))
-    def _get_pipeline_details_dev(self):
-        class PipelineDetail:
-            def __init__(self, object_id, filters, mapping, update_method) -> None:
-                self.source_object_id = object_id
-                self.destination_object_id = object_id
-                self.pipeline_mapping_json = mapping
-                self.update_method_cd = update_method
-                self.source_credential_nm = "Test"
-                self.destination_credential_nm = "Test"
-                # TODO: UPDATE OPTIONS
-                self.options = None
-                # self.filtered_column_nm = filters['filtered_column_nm'] if 'filtered_'
-                # self.start_selection_nm = filters['start_selection_nm']
-                # self.start_value_txt =filters['start_value_txt']
-                # self.end_selection_nm = filters['end_selection_nm']
-                # self.end_value_txt = filters['end_value_txt']
-                # self.timezone_offset_nbr = filters['timezone_offset_nbr']
-        return PipelineDetail(self.object_id, self.filters, self.mapping, self.update_method)
+        return self.api.get_pipeline_details(str(self.pipeline_id), self.task, str(self.pipeline_run_histoy_id), pipeline_mapping_id=self.pipeline_mapping_id)
     def _get_batch_row_count(self):
         results = self.connector.get_data(

{dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_sdk/src/services/api.py RENAMED Viewed

@@ -45,13 +45,16 @@ class DataConnectorAPI:
         self.post('logv2', payload)
-    def get_pipeline_details(self, pipeline_id: str, task, pipeline_run_history_id: str):
-        json = self.get(pipeline_id)
+    def get_pipeline_details(self, pipeline_id: str, task, pipeline_run_history_id: str, pipeline_mapping_id: str = None):
+        url = f"{pipeline_id}"
+        if pipeline_object_id:
+            url = f"{url}?PipelineMappingID={pipeline_mapping_id}"
+        json = self.get(url)
         return PipelineDetails(json, task, pipeline_id, pipeline_run_history_id)
     def create_new_history(self, pipeline_id):
-        response = self.post(f"{pipeline_id}/history", None)
+        response = self.post(f"{pipeline_id}/history", {"PipelineMappingID": self.pipeline_mapping_id})
         return response['PipelineRunHistoryID']

{dc_python_sdk-1.5.27 → dc_python_sdk-1.5.29}/src/dc_sdk/src/services/environment.py RENAMED Viewed

@@ -27,6 +27,7 @@ class PipelineEnvironment:
     task_id: ClassVar[str] = None
     source_endpoint: ClassVar[str] = os.getenv("SOURCE_ENDPOINT") or "http://localhost:5000"
     destination_endpoint: ClassVar[str] = os.getenv("DESTINATION_ENDPOINT") or "http://localhost:5001"
+    pipeline_mapping_id: ClassVar[str] = os.getenv("PIPELINE_MAPPING_ID") or ""
     @staticmethod
     def validate_environment():