acryl-datahub 1.3.0.1rc6__py3-none-any.whl → 1.3.0.1rc8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (35) hide show
  1. {acryl_datahub-1.3.0.1rc6.dist-info → acryl_datahub-1.3.0.1rc8.dist-info}/METADATA +2679 -2680
  2. {acryl_datahub-1.3.0.1rc6.dist-info → acryl_datahub-1.3.0.1rc8.dist-info}/RECORD +35 -33
  3. datahub/_version.py +1 -1
  4. datahub/cli/docker_check.py +1 -1
  5. datahub/emitter/mce_builder.py +6 -0
  6. datahub/ingestion/autogenerated/capability_summary.json +12 -12
  7. datahub/ingestion/source/bigquery_v2/bigquery_queries.py +2 -0
  8. datahub/ingestion/source/common/subtypes.py +2 -0
  9. datahub/ingestion/source/dremio/dremio_source.py +15 -15
  10. datahub/ingestion/source/dynamodb/dynamodb.py +1 -1
  11. datahub/ingestion/source/fivetran/config.py +33 -0
  12. datahub/ingestion/source/fivetran/fivetran.py +184 -13
  13. datahub/ingestion/source/fivetran/fivetran_log_api.py +20 -5
  14. datahub/ingestion/source/fivetran/fivetran_rest_api.py +65 -0
  15. datahub/ingestion/source/fivetran/response_models.py +97 -0
  16. datahub/ingestion/source/hex/hex.py +1 -1
  17. datahub/ingestion/source/iceberg/iceberg.py +1 -1
  18. datahub/ingestion/source/metabase.py +23 -4
  19. datahub/ingestion/source/mlflow.py +1 -1
  20. datahub/ingestion/source/s3/source.py +1 -1
  21. datahub/ingestion/source/salesforce.py +1 -1
  22. datahub/ingestion/source/slack/slack.py +1 -1
  23. datahub/ingestion/source/snowflake/snowflake_queries.py +3 -0
  24. datahub/ingestion/source/snowflake/snowflake_summary.py +1 -1
  25. datahub/ingestion/source/sql/mssql/source.py +41 -9
  26. datahub/ingestion/source/sql_queries.py +1 -1
  27. datahub/ingestion/source/unity/source.py +1 -1
  28. datahub/ingestion/source/vertexai/vertexai.py +1 -1
  29. datahub/metadata/schema.avsc +4 -2
  30. datahub/metadata/schemas/DataHubFileInfo.avsc +4 -2
  31. datahub/sdk/mlmodel.py +19 -0
  32. {acryl_datahub-1.3.0.1rc6.dist-info → acryl_datahub-1.3.0.1rc8.dist-info}/WHEEL +0 -0
  33. {acryl_datahub-1.3.0.1rc6.dist-info → acryl_datahub-1.3.0.1rc8.dist-info}/entry_points.txt +0 -0
  34. {acryl_datahub-1.3.0.1rc6.dist-info → acryl_datahub-1.3.0.1rc8.dist-info}/licenses/LICENSE +0 -0
  35. {acryl_datahub-1.3.0.1rc6.dist-info → acryl_datahub-1.3.0.1rc8.dist-info}/top_level.txt +0 -0
@@ -135,6 +135,10 @@ class SQLServerConfig(BasicSQLAlchemyConfig):
135
135
  default=False,
136
136
  description="Represent a schema identifiers combined with quoting preferences. See [sqlalchemy quoted_name docs](https://docs.sqlalchemy.org/en/20/core/sqlelement.html#sqlalchemy.sql.expression.quoted_name).",
137
137
  )
138
+ is_aws_rds: Optional[bool] = Field(
139
+ default=None,
140
+ description="Indicates if the SQL Server instance is running on AWS RDS. When None (default), automatic detection will be attempted using server name analysis.",
141
+ )
138
142
 
139
143
  @pydantic.validator("uri_args")
140
144
  def passwords_match(cls, v, values, **kwargs):
@@ -367,18 +371,42 @@ class SQLServerSource(SQLAlchemySource):
367
371
  def _detect_rds_environment(self, conn: Connection) -> bool:
368
372
  """
369
373
  Detect if we're running in an RDS/managed environment vs on-premises.
374
+ Uses explicit configuration if provided, otherwise attempts automatic detection.
370
375
  Returns True if RDS/managed, False if on-premises.
371
376
  """
377
+ if self.config.is_aws_rds is not None:
378
+ logger.info(
379
+ f"Using explicit is_aws_rds configuration: {self.config.is_aws_rds}"
380
+ )
381
+ return self.config.is_aws_rds
382
+
372
383
  try:
373
- # Try to access system tables directly - this typically fails in RDS
374
- conn.execute("SELECT TOP 1 * FROM msdb.dbo.sysjobs")
375
- logger.debug(
376
- "Direct table access successful - likely on-premises environment"
384
+ result = conn.execute("SELECT @@servername AS server_name")
385
+ server_name_row = result.fetchone()
386
+ if server_name_row:
387
+ server_name = server_name_row["server_name"].lower()
388
+
389
+ aws_indicators = ["amazon", "amzn", "amaz", "ec2", "rds.amazonaws.com"]
390
+ is_rds = any(indicator in server_name for indicator in aws_indicators)
391
+ if is_rds:
392
+ logger.info(f"AWS RDS detected based on server name: {server_name}")
393
+ else:
394
+ logger.info(
395
+ f"Non-RDS environment detected based on server name: {server_name}"
396
+ )
397
+
398
+ return is_rds
399
+ else:
400
+ logger.warning(
401
+ "Could not retrieve server name, assuming non-RDS environment"
402
+ )
403
+ return False
404
+
405
+ except Exception as e:
406
+ logger.warning(
407
+ f"Failed to detect RDS/managed vs on-prem env, assuming non-RDS environment ({e})"
377
408
  )
378
409
  return False
379
- except Exception:
380
- logger.debug("Direct table access failed - likely RDS/managed environment")
381
- return True
382
410
 
383
411
  def _get_jobs(self, conn: Connection, db_name: str) -> Dict[str, Dict[str, Any]]:
384
412
  """
@@ -453,7 +481,10 @@ class SQLServerSource(SQLAlchemySource):
453
481
  jobs_result = conn.execute("EXEC msdb.dbo.sp_help_job")
454
482
  jobs_data = {}
455
483
 
456
- for row in jobs_result:
484
+ # SQLAlchemy 1.3 support was dropped in Sept 2023 (PR #8810)
485
+ # SQLAlchemy 1.4+ returns LegacyRow objects that don't support dictionary-style .get() method
486
+ # Use .mappings() to get MappingResult with dictionary-like rows that support .get()
487
+ for row in jobs_result.mappings():
457
488
  job_id = str(row["job_id"])
458
489
  jobs_data[job_id] = {
459
490
  "job_id": job_id,
@@ -473,7 +504,8 @@ class SQLServerSource(SQLAlchemySource):
473
504
  )
474
505
 
475
506
  job_steps = {}
476
- for step_row in steps_result:
507
+ # Use .mappings() for dictionary-like access (SQLAlchemy 1.4+ compatibility)
508
+ for step_row in steps_result.mappings():
477
509
  # Only include steps that run against our target database
478
510
  step_database = step_row.get("database_name", "")
479
511
  if step_database.lower() == db_name.lower() or not step_database:
@@ -93,7 +93,7 @@ class SqlQueriesSourceReport(SourceReport):
93
93
  sql_aggregator: Optional[SqlAggregatorReport] = None
94
94
 
95
95
 
96
- @platform_name("SQL Queries")
96
+ @platform_name("SQL Queries", id="sql-queries")
97
97
  @config_class(SqlQueriesSourceConfig)
98
98
  @support_status(SupportStatus.INCUBATING)
99
99
  @capability(SourceCapability.LINEAGE_COARSE, "Parsed from SQL queries")
@@ -176,7 +176,7 @@ logger: logging.Logger = logging.getLogger(__name__)
176
176
  supported=True,
177
177
  )
178
178
  @capability(SourceCapability.TEST_CONNECTION, "Enabled by default")
179
- @support_status(SupportStatus.INCUBATING)
179
+ @support_status(SupportStatus.CERTIFIED)
180
180
  class UnityCatalogSource(StatefulIngestionSourceBase, TestableSource):
181
181
  """
182
182
  This plugin extracts the following metadata from Databricks Unity Catalog:
@@ -145,7 +145,7 @@ class PipelineMetadata:
145
145
 
146
146
  @platform_name("Vertex AI", id="vertexai")
147
147
  @config_class(VertexAIConfig)
148
- @support_status(SupportStatus.TESTING)
148
+ @support_status(SupportStatus.INCUBATING)
149
149
  @capability(
150
150
  SourceCapability.DESCRIPTIONS,
151
151
  "Extract descriptions for Vertex AI Registered Models and Model Versions",
@@ -1996,7 +1996,8 @@
1996
1996
  "mlPrimaryKey",
1997
1997
  "mlModelGroup",
1998
1998
  "domain",
1999
- "dataProduct"
1999
+ "dataProduct",
2000
+ "businessAttribute"
2000
2001
  ],
2001
2002
  "name": "ReferencedBy"
2002
2003
  },
@@ -2023,7 +2024,8 @@
2023
2024
  "mlPrimaryKey",
2024
2025
  "mlModelGroup",
2025
2026
  "domain",
2026
- "dataProduct"
2027
+ "dataProduct",
2028
+ "businessAttribute"
2027
2029
  ],
2028
2030
  "type": [
2029
2031
  "null",
@@ -91,7 +91,8 @@
91
91
  "mlPrimaryKey",
92
92
  "mlModelGroup",
93
93
  "domain",
94
- "dataProduct"
94
+ "dataProduct",
95
+ "businessAttribute"
95
96
  ],
96
97
  "name": "ReferencedBy"
97
98
  },
@@ -125,7 +126,8 @@
125
126
  "mlPrimaryKey",
126
127
  "mlModelGroup",
127
128
  "domain",
128
- "dataProduct"
129
+ "dataProduct",
130
+ "businessAttribute"
129
131
  ]
130
132
  },
131
133
  {
datahub/sdk/mlmodel.py CHANGED
@@ -293,6 +293,25 @@ class MLModel(
293
293
  job for job in props.downstreamJobs if job != job_str
294
294
  ]
295
295
 
296
+ @property
297
+ def deployments(self) -> Optional[List[str]]:
298
+ return self._ensure_model_props().deployments
299
+
300
+ def set_deployments(self, deployments: Sequence[str]) -> None:
301
+ self._ensure_model_props().deployments = list(deployments)
302
+
303
+ def add_deployment(self, deployment: str) -> None:
304
+ props = self._ensure_model_props()
305
+ if props.deployments is None:
306
+ props.deployments = []
307
+ if deployment not in props.deployments:
308
+ props.deployments.append(deployment)
309
+
310
+ def remove_deployment(self, deployment: str) -> None:
311
+ props = self._ensure_model_props()
312
+ if props.deployments is not None:
313
+ props.deployments = [d for d in props.deployments if d != deployment]
314
+
296
315
  def _init_basic_properties(
297
316
  self,
298
317
  version: Optional[str] = None,