acryl-datahub 1.3.0.1rc6__py3-none-any.whl → 1.3.0.1rc8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.3.0.1rc6.dist-info → acryl_datahub-1.3.0.1rc8.dist-info}/METADATA +2679 -2680
- {acryl_datahub-1.3.0.1rc6.dist-info → acryl_datahub-1.3.0.1rc8.dist-info}/RECORD +35 -33
- datahub/_version.py +1 -1
- datahub/cli/docker_check.py +1 -1
- datahub/emitter/mce_builder.py +6 -0
- datahub/ingestion/autogenerated/capability_summary.json +12 -12
- datahub/ingestion/source/bigquery_v2/bigquery_queries.py +2 -0
- datahub/ingestion/source/common/subtypes.py +2 -0
- datahub/ingestion/source/dremio/dremio_source.py +15 -15
- datahub/ingestion/source/dynamodb/dynamodb.py +1 -1
- datahub/ingestion/source/fivetran/config.py +33 -0
- datahub/ingestion/source/fivetran/fivetran.py +184 -13
- datahub/ingestion/source/fivetran/fivetran_log_api.py +20 -5
- datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
- datahub/ingestion/source/fivetran/response_models.py +97 -0
- datahub/ingestion/source/hex/hex.py +1 -1
- datahub/ingestion/source/iceberg/iceberg.py +1 -1
- datahub/ingestion/source/metabase.py +23 -4
- datahub/ingestion/source/mlflow.py +1 -1
- datahub/ingestion/source/s3/source.py +1 -1
- datahub/ingestion/source/salesforce.py +1 -1
- datahub/ingestion/source/slack/slack.py +1 -1
- datahub/ingestion/source/snowflake/snowflake_queries.py +3 -0
- datahub/ingestion/source/snowflake/snowflake_summary.py +1 -1
- datahub/ingestion/source/sql/mssql/source.py +41 -9
- datahub/ingestion/source/sql_queries.py +1 -1
- datahub/ingestion/source/unity/source.py +1 -1
- datahub/ingestion/source/vertexai/vertexai.py +1 -1
- datahub/metadata/schema.avsc +4 -2
- datahub/metadata/schemas/DataHubFileInfo.avsc +4 -2
- datahub/sdk/mlmodel.py +19 -0
- {acryl_datahub-1.3.0.1rc6.dist-info → acryl_datahub-1.3.0.1rc8.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.3.0.1rc6.dist-info → acryl_datahub-1.3.0.1rc8.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.3.0.1rc6.dist-info → acryl_datahub-1.3.0.1rc8.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.3.0.1rc6.dist-info → acryl_datahub-1.3.0.1rc8.dist-info}/top_level.txt +0 -0
|
@@ -135,6 +135,10 @@ class SQLServerConfig(BasicSQLAlchemyConfig):
|
|
|
135
135
|
default=False,
|
|
136
136
|
description="Represent a schema identifiers combined with quoting preferences. See [sqlalchemy quoted_name docs](https://docs.sqlalchemy.org/en/20/core/sqlelement.html#sqlalchemy.sql.expression.quoted_name).",
|
|
137
137
|
)
|
|
138
|
+
is_aws_rds: Optional[bool] = Field(
|
|
139
|
+
default=None,
|
|
140
|
+
description="Indicates if the SQL Server instance is running on AWS RDS. When None (default), automatic detection will be attempted using server name analysis.",
|
|
141
|
+
)
|
|
138
142
|
|
|
139
143
|
@pydantic.validator("uri_args")
|
|
140
144
|
def passwords_match(cls, v, values, **kwargs):
|
|
@@ -367,18 +371,42 @@ class SQLServerSource(SQLAlchemySource):
|
|
|
367
371
|
def _detect_rds_environment(self, conn: Connection) -> bool:
|
|
368
372
|
"""
|
|
369
373
|
Detect if we're running in an RDS/managed environment vs on-premises.
|
|
374
|
+
Uses explicit configuration if provided, otherwise attempts automatic detection.
|
|
370
375
|
Returns True if RDS/managed, False if on-premises.
|
|
371
376
|
"""
|
|
377
|
+
if self.config.is_aws_rds is not None:
|
|
378
|
+
logger.info(
|
|
379
|
+
f"Using explicit is_aws_rds configuration: {self.config.is_aws_rds}"
|
|
380
|
+
)
|
|
381
|
+
return self.config.is_aws_rds
|
|
382
|
+
|
|
372
383
|
try:
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
384
|
+
result = conn.execute("SELECT @@servername AS server_name")
|
|
385
|
+
server_name_row = result.fetchone()
|
|
386
|
+
if server_name_row:
|
|
387
|
+
server_name = server_name_row["server_name"].lower()
|
|
388
|
+
|
|
389
|
+
aws_indicators = ["amazon", "amzn", "amaz", "ec2", "rds.amazonaws.com"]
|
|
390
|
+
is_rds = any(indicator in server_name for indicator in aws_indicators)
|
|
391
|
+
if is_rds:
|
|
392
|
+
logger.info(f"AWS RDS detected based on server name: {server_name}")
|
|
393
|
+
else:
|
|
394
|
+
logger.info(
|
|
395
|
+
f"Non-RDS environment detected based on server name: {server_name}"
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
return is_rds
|
|
399
|
+
else:
|
|
400
|
+
logger.warning(
|
|
401
|
+
"Could not retrieve server name, assuming non-RDS environment"
|
|
402
|
+
)
|
|
403
|
+
return False
|
|
404
|
+
|
|
405
|
+
except Exception as e:
|
|
406
|
+
logger.warning(
|
|
407
|
+
f"Failed to detect RDS/managed vs on-prem env, assuming non-RDS environment ({e})"
|
|
377
408
|
)
|
|
378
409
|
return False
|
|
379
|
-
except Exception:
|
|
380
|
-
logger.debug("Direct table access failed - likely RDS/managed environment")
|
|
381
|
-
return True
|
|
382
410
|
|
|
383
411
|
def _get_jobs(self, conn: Connection, db_name: str) -> Dict[str, Dict[str, Any]]:
|
|
384
412
|
"""
|
|
@@ -453,7 +481,10 @@ class SQLServerSource(SQLAlchemySource):
|
|
|
453
481
|
jobs_result = conn.execute("EXEC msdb.dbo.sp_help_job")
|
|
454
482
|
jobs_data = {}
|
|
455
483
|
|
|
456
|
-
|
|
484
|
+
# SQLAlchemy 1.3 support was dropped in Sept 2023 (PR #8810)
|
|
485
|
+
# SQLAlchemy 1.4+ returns LegacyRow objects that don't support dictionary-style .get() method
|
|
486
|
+
# Use .mappings() to get MappingResult with dictionary-like rows that support .get()
|
|
487
|
+
for row in jobs_result.mappings():
|
|
457
488
|
job_id = str(row["job_id"])
|
|
458
489
|
jobs_data[job_id] = {
|
|
459
490
|
"job_id": job_id,
|
|
@@ -473,7 +504,8 @@ class SQLServerSource(SQLAlchemySource):
|
|
|
473
504
|
)
|
|
474
505
|
|
|
475
506
|
job_steps = {}
|
|
476
|
-
for
|
|
507
|
+
# Use .mappings() for dictionary-like access (SQLAlchemy 1.4+ compatibility)
|
|
508
|
+
for step_row in steps_result.mappings():
|
|
477
509
|
# Only include steps that run against our target database
|
|
478
510
|
step_database = step_row.get("database_name", "")
|
|
479
511
|
if step_database.lower() == db_name.lower() or not step_database:
|
|
@@ -93,7 +93,7 @@ class SqlQueriesSourceReport(SourceReport):
|
|
|
93
93
|
sql_aggregator: Optional[SqlAggregatorReport] = None
|
|
94
94
|
|
|
95
95
|
|
|
96
|
-
@platform_name("SQL Queries")
|
|
96
|
+
@platform_name("SQL Queries", id="sql-queries")
|
|
97
97
|
@config_class(SqlQueriesSourceConfig)
|
|
98
98
|
@support_status(SupportStatus.INCUBATING)
|
|
99
99
|
@capability(SourceCapability.LINEAGE_COARSE, "Parsed from SQL queries")
|
|
@@ -176,7 +176,7 @@ logger: logging.Logger = logging.getLogger(__name__)
|
|
|
176
176
|
supported=True,
|
|
177
177
|
)
|
|
178
178
|
@capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
|
|
179
|
-
@support_status(SupportStatus.
|
|
179
|
+
@support_status(SupportStatus.CERTIFIED)
|
|
180
180
|
class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
|
|
181
181
|
"""
|
|
182
182
|
This plugin extracts the following metadata from Databricks Unity Catalog:
|
|
@@ -145,7 +145,7 @@ class PipelineMetadata:
|
|
|
145
145
|
|
|
146
146
|
@platform_name("Vertex AI", id="vertexai")
|
|
147
147
|
@config_class(VertexAIConfig)
|
|
148
|
-
@support_status(SupportStatus.
|
|
148
|
+
@support_status(SupportStatus.INCUBATING)
|
|
149
149
|
@capability(
|
|
150
150
|
SourceCapability.DESCRIPTIONS,
|
|
151
151
|
"Extract descriptions for Vertex AI Registered Models and Model Versions",
|
datahub/metadata/schema.avsc
CHANGED
|
@@ -1996,7 +1996,8 @@
|
|
|
1996
1996
|
"mlPrimaryKey",
|
|
1997
1997
|
"mlModelGroup",
|
|
1998
1998
|
"domain",
|
|
1999
|
-
"dataProduct"
|
|
1999
|
+
"dataProduct",
|
|
2000
|
+
"businessAttribute"
|
|
2000
2001
|
],
|
|
2001
2002
|
"name": "ReferencedBy"
|
|
2002
2003
|
},
|
|
@@ -2023,7 +2024,8 @@
|
|
|
2023
2024
|
"mlPrimaryKey",
|
|
2024
2025
|
"mlModelGroup",
|
|
2025
2026
|
"domain",
|
|
2026
|
-
"dataProduct"
|
|
2027
|
+
"dataProduct",
|
|
2028
|
+
"businessAttribute"
|
|
2027
2029
|
],
|
|
2028
2030
|
"type": [
|
|
2029
2031
|
"null",
|
|
@@ -91,7 +91,8 @@
|
|
|
91
91
|
"mlPrimaryKey",
|
|
92
92
|
"mlModelGroup",
|
|
93
93
|
"domain",
|
|
94
|
-
"dataProduct"
|
|
94
|
+
"dataProduct",
|
|
95
|
+
"businessAttribute"
|
|
95
96
|
],
|
|
96
97
|
"name": "ReferencedBy"
|
|
97
98
|
},
|
|
@@ -125,7 +126,8 @@
|
|
|
125
126
|
"mlPrimaryKey",
|
|
126
127
|
"mlModelGroup",
|
|
127
128
|
"domain",
|
|
128
|
-
"dataProduct"
|
|
129
|
+
"dataProduct",
|
|
130
|
+
"businessAttribute"
|
|
129
131
|
]
|
|
130
132
|
},
|
|
131
133
|
{
|
datahub/sdk/mlmodel.py
CHANGED
|
@@ -293,6 +293,25 @@ class MLModel(
|
|
|
293
293
|
job for job in props.downstreamJobs if job != job_str
|
|
294
294
|
]
|
|
295
295
|
|
|
296
|
+
@property
|
|
297
|
+
def deployments(self) -> Optional[List[str]]:
|
|
298
|
+
return self._ensure_model_props().deployments
|
|
299
|
+
|
|
300
|
+
def set_deployments(self, deployments: Sequence[str]) -> None:
|
|
301
|
+
self._ensure_model_props().deployments = list(deployments)
|
|
302
|
+
|
|
303
|
+
def add_deployment(self, deployment: str) -> None:
|
|
304
|
+
props = self._ensure_model_props()
|
|
305
|
+
if props.deployments is None:
|
|
306
|
+
props.deployments = []
|
|
307
|
+
if deployment not in props.deployments:
|
|
308
|
+
props.deployments.append(deployment)
|
|
309
|
+
|
|
310
|
+
def remove_deployment(self, deployment: str) -> None:
|
|
311
|
+
props = self._ensure_model_props()
|
|
312
|
+
if props.deployments is not None:
|
|
313
|
+
props.deployments = [d for d in props.deployments if d != deployment]
|
|
314
|
+
|
|
296
315
|
def _init_basic_properties(
|
|
297
316
|
self,
|
|
298
317
|
version: Optional[str] = None,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|