PyPI - acryl-datahub - Versions diffs - 1.3.0.1rc6__py3-none-any.whl → 1.3.0.1rc8__py3-none-any.whl - Mend

acryl-datahub 1.3.0.1rc6py3-none-any.whl → 1.3.0.1rc8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of acryl-datahub might be problematic. Click here for more details.

Files changed (35) hide show

datahub/ingestion/source/sql/mssql/source.py CHANGED Viewed

@@ -135,6 +135,10 @@ class SQLServerConfig(BasicSQLAlchemyConfig):
         default=False,
         description="Represent a schema identifiers combined with quoting preferences. See [sqlalchemy quoted_name docs](https://docs.sqlalchemy.org/en/20/core/sqlelement.html#sqlalchemy.sql.expression.quoted_name).",
     )
+    is_aws_rds: Optional[bool] = Field(
+        default=None,
+        description="Indicates if the SQL Server instance is running on AWS RDS. When None (default), automatic detection will be attempted using server name analysis.",
+    )
     @pydantic.validator("uri_args")
     def passwords_match(cls, v, values, **kwargs):
@@ -367,18 +371,42 @@ class SQLServerSource(SQLAlchemySource):
     def _detect_rds_environment(self, conn: Connection) -> bool:
         """
         Detect if we're running in an RDS/managed environment vs on-premises.
+        Uses explicit configuration if provided, otherwise attempts automatic detection.
         Returns True if RDS/managed, False if on-premises.
         """
+        if self.config.is_aws_rds is not None:
+            logger.info(
+                f"Using explicit is_aws_rds configuration: {self.config.is_aws_rds}"
+            )
+            return self.config.is_aws_rds
         try:
-            # Try to access system tables directly - this typically fails in RDS
-            conn.execute("SELECT TOP 1 * FROM msdb.dbo.sysjobs")
-            logger.debug(
-                "Direct table access successful - likely on-premises environment"
+            result = conn.execute("SELECT @@servername AS server_name")
+            server_name_row = result.fetchone()
+            if server_name_row:
+                server_name = server_name_row["server_name"].lower()
+                aws_indicators = ["amazon", "amzn", "amaz", "ec2", "rds.amazonaws.com"]
+                is_rds = any(indicator in server_name for indicator in aws_indicators)
+                if is_rds:
+                    logger.info(f"AWS RDS detected based on server name: {server_name}")
+                else:
+                    logger.info(
+                        f"Non-RDS environment detected based on server name: {server_name}"
+                    )
+                return is_rds
+            else:
+                logger.warning(
+                    "Could not retrieve server name, assuming non-RDS environment"
+                )
+                return False
+        except Exception as e:
+            logger.warning(
+                f"Failed to detect RDS/managed vs on-prem env, assuming non-RDS environment ({e})"
             )
             return False
-        except Exception:
-            logger.debug("Direct table access failed - likely RDS/managed environment")
-            return True
     def _get_jobs(self, conn: Connection, db_name: str) -> Dict[str, Dict[str, Any]]:
         """
@@ -453,7 +481,10 @@ class SQLServerSource(SQLAlchemySource):
         jobs_result = conn.execute("EXEC msdb.dbo.sp_help_job")
         jobs_data = {}
-        for row in jobs_result:
+        # SQLAlchemy 1.3 support was dropped in Sept 2023 (PR #8810)
+        # SQLAlchemy 1.4+ returns LegacyRow objects that don't support dictionary-style .get() method
+        # Use .mappings() to get MappingResult with dictionary-like rows that support .get()
+        for row in jobs_result.mappings():
             job_id = str(row["job_id"])
             jobs_data[job_id] = {
                 "job_id": job_id,
@@ -473,7 +504,8 @@ class SQLServerSource(SQLAlchemySource):
                 )
                 job_steps = {}
-                for step_row in steps_result:
+                # Use .mappings() for dictionary-like access (SQLAlchemy 1.4+ compatibility)
+                for step_row in steps_result.mappings():
                     # Only include steps that run against our target database
                     step_database = step_row.get("database_name", "")
                     if step_database.lower() == db_name.lower() or not step_database:

datahub/ingestion/source/sql_queries.py CHANGED Viewed

@@ -93,7 +93,7 @@ class SqlQueriesSourceReport(SourceReport):
     sql_aggregator: Optional[SqlAggregatorReport] = None
-@platform_name("SQL Queries")
+@platform_name("SQL Queries", id="sql-queries")
 @config_class(SqlQueriesSourceConfig)
 @support_status(SupportStatus.INCUBATING)
 @capability(SourceCapability.LINEAGE_COARSE, "Parsed from SQL queries")

datahub/ingestion/source/unity/source.py CHANGED Viewed

@@ -176,7 +176,7 @@ logger: logging.Logger = logging.getLogger(__name__)
     supported=True,
 )
 @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
-@support_status(SupportStatus.INCUBATING)
+@support_status(SupportStatus.CERTIFIED)
 class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
     """
     This plugin extracts the following metadata from Databricks Unity Catalog:

datahub/ingestion/source/vertexai/vertexai.py CHANGED Viewed

@@ -145,7 +145,7 @@ class PipelineMetadata:
 @platform_name("Vertex AI", id="vertexai")
 @config_class(VertexAIConfig)
-@support_status(SupportStatus.TESTING)
+@support_status(SupportStatus.INCUBATING)
 @capability(
     SourceCapability.DESCRIPTIONS,
     "Extract descriptions for Vertex AI Registered Models and Model Versions",

datahub/metadata/schema.avsc CHANGED Viewed

@@ -1996,7 +1996,8 @@
             "mlPrimaryKey",
             "mlModelGroup",
             "domain",
-            "dataProduct"
+            "dataProduct",
+            "businessAttribute"
           ],
           "name": "ReferencedBy"
         },
@@ -2023,7 +2024,8 @@
           "mlPrimaryKey",
           "mlModelGroup",
           "domain",
-          "dataProduct"
+          "dataProduct",
+          "businessAttribute"
         ],
         "type": [
           "null",

datahub/metadata/schemas/DataHubFileInfo.avsc CHANGED Viewed

@@ -91,7 +91,8 @@
           "mlPrimaryKey",
           "mlModelGroup",
           "domain",
-          "dataProduct"
+          "dataProduct",
+          "businessAttribute"
         ],
         "name": "ReferencedBy"
       },
@@ -125,7 +126,8 @@
         "mlPrimaryKey",
         "mlModelGroup",
         "domain",
-        "dataProduct"
+        "dataProduct",
+        "businessAttribute"
       ]
     },
     {

datahub/sdk/mlmodel.py CHANGED Viewed

@@ -293,6 +293,25 @@ class MLModel(
                 job for job in props.downstreamJobs if job != job_str
             ]
+    @property
+    def deployments(self) -> Optional[List[str]]:
+        return self._ensure_model_props().deployments
+    def set_deployments(self, deployments: Sequence[str]) -> None:
+        self._ensure_model_props().deployments = list(deployments)
+    def add_deployment(self, deployment: str) -> None:
+        props = self._ensure_model_props()
+        if props.deployments is None:
+            props.deployments = []
+        if deployment not in props.deployments:
+            props.deployments.append(deployment)
+    def remove_deployment(self, deployment: str) -> None:
+        props = self._ensure_model_props()
+        if props.deployments is not None:
+            props.deployments = [d for d in props.deployments if d != deployment]
     def _init_basic_properties(
         self,
         version: Optional[str] = None,

{acryl_datahub-1.3.0.1rc6.dist-info → acryl_datahub-1.3.0.1rc8.dist-info}/WHEEL RENAMED Viewed

File without changes

{acryl_datahub-1.3.0.1rc6.dist-info → acryl_datahub-1.3.0.1rc8.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{acryl_datahub-1.3.0.1rc6.dist-info → acryl_datahub-1.3.0.1rc8.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{acryl_datahub-1.3.0.1rc6.dist-info → acryl_datahub-1.3.0.1rc8.dist-info}/top_level.txt RENAMED Viewed

File without changes

acryl-datahub 1.3.0.1rc6__py3-none-any.whl → 1.3.0.1rc8__py3-none-any.whl

Potentially problematic release.

acryl-datahub 1.3.0.1rc6py3-none-any.whl → 1.3.0.1rc8py3-none-any.whl