acryl-datahub 1.2.0.10rc3__py3-none-any.whl → 1.2.0.10rc4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.2.0.10rc3.dist-info → acryl_datahub-1.2.0.10rc4.dist-info}/METADATA +2668 -2752
- {acryl_datahub-1.2.0.10rc3.dist-info → acryl_datahub-1.2.0.10rc4.dist-info}/RECORD +82 -82
- datahub/_version.py +1 -1
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/dataproduct/dataproduct.py +6 -3
- datahub/api/entities/dataset/dataset.py +9 -18
- datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
- datahub/api/graphql/operation.py +10 -6
- datahub/cli/docker_check.py +2 -2
- datahub/configuration/common.py +29 -1
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/pydantic_migration_helpers.py +0 -9
- datahub/configuration/source_common.py +3 -2
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +5 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/ingestion/run/pipeline_config.py +2 -2
- datahub/ingestion/source/azure/azure_common.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +28 -14
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +4 -5
- datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
- datahub/ingestion/source/data_lake_common/path_spec.py +16 -16
- datahub/ingestion/source/datahub/config.py +8 -9
- datahub/ingestion/source/delta_lake/config.py +1 -1
- datahub/ingestion/source/dremio/dremio_config.py +3 -4
- datahub/ingestion/source/feast.py +8 -10
- datahub/ingestion/source/fivetran/config.py +1 -1
- datahub/ingestion/source/ge_profiling_config.py +26 -22
- datahub/ingestion/source/grafana/grafana_config.py +2 -2
- datahub/ingestion/source/grafana/models.py +12 -14
- datahub/ingestion/source/hex/hex.py +6 -1
- datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/looker/looker_common.py +1 -1
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/lookml_config.py +1 -1
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +1 -1
- datahub/ingestion/source/mode.py +13 -5
- datahub/ingestion/source/nifi.py +1 -1
- datahub/ingestion/source/powerbi/config.py +14 -21
- datahub/ingestion/source/preset.py +1 -1
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/redshift/config.py +6 -3
- datahub/ingestion/source/salesforce.py +13 -9
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +12 -15
- datahub/ingestion/source/snowflake/snowflake_connection.py +8 -3
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +15 -2
- datahub/ingestion/source/snowflake/snowflake_queries.py +4 -5
- datahub/ingestion/source/sql/athena.py +2 -1
- datahub/ingestion/source/sql/clickhouse.py +12 -7
- datahub/ingestion/source/sql/cockroachdb.py +5 -3
- datahub/ingestion/source/sql/druid.py +2 -2
- datahub/ingestion/source/sql/hive.py +4 -3
- datahub/ingestion/source/sql/hive_metastore.py +7 -9
- datahub/ingestion/source/sql/mssql/source.py +2 -2
- datahub/ingestion/source/sql/mysql.py +2 -2
- datahub/ingestion/source/sql/oracle.py +3 -3
- datahub/ingestion/source/sql/presto.py +2 -1
- datahub/ingestion/source/sql/teradata.py +4 -4
- datahub/ingestion/source/sql/trino.py +2 -1
- datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
- datahub/ingestion/source/sql/vertica.py +1 -1
- datahub/ingestion/source/sql_queries.py +6 -6
- datahub/ingestion/source/state/checkpoint.py +5 -1
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/stateful_ingestion_base.py +5 -8
- datahub/ingestion/source/superset.py +1 -2
- datahub/ingestion/source/tableau/tableau.py +17 -3
- datahub/ingestion/source/unity/config.py +7 -3
- datahub/ingestion/source/usage/usage_common.py +3 -3
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/sdk/search_filters.py +1 -7
- {acryl_datahub-1.2.0.10rc3.dist-info → acryl_datahub-1.2.0.10rc4.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.2.0.10rc3.dist-info → acryl_datahub-1.2.0.10rc4.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.2.0.10rc3.dist-info → acryl_datahub-1.2.0.10rc4.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.2.0.10rc3.dist-info → acryl_datahub-1.2.0.10rc4.dist-info}/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
from pydantic.fields import Field
|
|
2
2
|
|
|
3
|
-
from datahub.configuration.common import AllowDenyPattern
|
|
3
|
+
from datahub.configuration.common import AllowDenyPattern, HiddenFromDocs
|
|
4
4
|
from datahub.ingestion.api.common import PipelineContext
|
|
5
5
|
from datahub.ingestion.api.decorators import (
|
|
6
6
|
SourceCapability,
|
|
@@ -14,8 +14,10 @@ from datahub.ingestion.source.sql.postgres import PostgresConfig, PostgresSource
|
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
class CockroachDBConfig(PostgresConfig):
|
|
17
|
-
scheme = Field(
|
|
18
|
-
|
|
17
|
+
scheme: HiddenFromDocs[str] = Field(
|
|
18
|
+
default="cockroachdb+psycopg2", description="database scheme"
|
|
19
|
+
)
|
|
20
|
+
schema_pattern: AllowDenyPattern = Field(
|
|
19
21
|
default=AllowDenyPattern(deny=["information_schema", "crdb_internal"])
|
|
20
22
|
)
|
|
21
23
|
|
|
@@ -6,7 +6,7 @@ from pydantic.fields import Field
|
|
|
6
6
|
from pydruid.db.sqlalchemy import DruidDialect
|
|
7
7
|
from sqlalchemy.exc import ResourceClosedError
|
|
8
8
|
|
|
9
|
-
from datahub.configuration.common import AllowDenyPattern
|
|
9
|
+
from datahub.configuration.common import AllowDenyPattern, HiddenFromDocs
|
|
10
10
|
from datahub.ingestion.api.decorators import (
|
|
11
11
|
SourceCapability,
|
|
12
12
|
SupportStatus,
|
|
@@ -34,7 +34,7 @@ DruidDialect.get_table_names = get_table_names
|
|
|
34
34
|
|
|
35
35
|
class DruidConfig(BasicSQLAlchemyConfig):
|
|
36
36
|
# defaults
|
|
37
|
-
scheme: str = "druid"
|
|
37
|
+
scheme: HiddenFromDocs[str] = "druid"
|
|
38
38
|
schema_pattern: AllowDenyPattern = Field(
|
|
39
39
|
default=AllowDenyPattern(deny=["^(lookup|sysgit|view).*"]),
|
|
40
40
|
description="regex patterns for schemas to filter in ingestion.",
|
|
@@ -6,7 +6,7 @@ from enum import Enum
|
|
|
6
6
|
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
|
|
7
7
|
from urllib.parse import urlparse
|
|
8
8
|
|
|
9
|
-
from pydantic
|
|
9
|
+
from pydantic import validator
|
|
10
10
|
from pydantic.fields import Field
|
|
11
11
|
|
|
12
12
|
# This import verifies that the dependencies are available.
|
|
@@ -14,6 +14,7 @@ from pyhive import hive # noqa: F401
|
|
|
14
14
|
from pyhive.sqlalchemy_hive import HiveDate, HiveDecimal, HiveDialect, HiveTimestamp
|
|
15
15
|
from sqlalchemy.engine.reflection import Inspector
|
|
16
16
|
|
|
17
|
+
from datahub.configuration.common import HiddenFromDocs
|
|
17
18
|
from datahub.emitter.mce_builder import (
|
|
18
19
|
make_data_platform_urn,
|
|
19
20
|
make_dataplatform_instance_urn,
|
|
@@ -651,10 +652,10 @@ HiveDialect.get_view_definition = get_view_definition_patched
|
|
|
651
652
|
|
|
652
653
|
class HiveConfig(TwoTierSQLAlchemyConfig):
|
|
653
654
|
# defaults
|
|
654
|
-
scheme: str = Field(default="hive"
|
|
655
|
+
scheme: HiddenFromDocs[str] = Field(default="hive")
|
|
655
656
|
|
|
656
657
|
# Overriding as table location lineage is richer implementation here than with include_table_location_lineage
|
|
657
|
-
include_table_location_lineage: bool = Field(default=False
|
|
658
|
+
include_table_location_lineage: HiddenFromDocs[bool] = Field(default=False)
|
|
658
659
|
|
|
659
660
|
emit_storage_lineage: bool = Field(
|
|
660
661
|
default=False,
|
|
@@ -1,17 +1,15 @@
|
|
|
1
1
|
import base64
|
|
2
|
+
import dataclasses
|
|
2
3
|
import json
|
|
3
4
|
import logging
|
|
4
5
|
from collections import namedtuple
|
|
5
6
|
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
|
|
6
7
|
|
|
7
|
-
from pydantic
|
|
8
|
-
from pydantic.fields import Field
|
|
9
|
-
|
|
10
|
-
# This import verifies that the dependencies are available.
|
|
8
|
+
from pydantic import Field
|
|
11
9
|
from sqlalchemy import create_engine, text
|
|
12
10
|
from sqlalchemy.engine.reflection import Inspector
|
|
13
11
|
|
|
14
|
-
from datahub.configuration.common import AllowDenyPattern
|
|
12
|
+
from datahub.configuration.common import AllowDenyPattern, HiddenFromDocs
|
|
15
13
|
from datahub.emitter.mce_builder import make_dataset_urn_with_platform_instance
|
|
16
14
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
17
15
|
from datahub.ingestion.api.common import PipelineContext
|
|
@@ -73,7 +71,7 @@ class HiveMetastoreConfigMode(StrEnum):
|
|
|
73
71
|
trino = "trino"
|
|
74
72
|
|
|
75
73
|
|
|
76
|
-
@dataclass
|
|
74
|
+
@dataclasses.dataclass
|
|
77
75
|
class ViewDataset:
|
|
78
76
|
dataset_name: str
|
|
79
77
|
schema_name: str
|
|
@@ -99,7 +97,7 @@ class HiveMetastore(BasicSQLAlchemyConfig):
|
|
|
99
97
|
default="localhost:3306",
|
|
100
98
|
description="Host URL and port to connect to. Example: localhost:3306",
|
|
101
99
|
)
|
|
102
|
-
scheme: str = Field(default="mysql+pymysql"
|
|
100
|
+
scheme: HiddenFromDocs[str] = Field(default="mysql+pymysql")
|
|
103
101
|
|
|
104
102
|
database_pattern: AllowDenyPattern = Field(
|
|
105
103
|
default=AllowDenyPattern.allow_all(),
|
|
@@ -123,8 +121,8 @@ class HiveMetastore(BasicSQLAlchemyConfig):
|
|
|
123
121
|
description="Dataset Subtype name to be 'Table' or 'View' Valid options: ['True', 'False']",
|
|
124
122
|
)
|
|
125
123
|
|
|
126
|
-
include_view_lineage: bool = Field(
|
|
127
|
-
default=False,
|
|
124
|
+
include_view_lineage: HiddenFromDocs[bool] = Field(
|
|
125
|
+
default=False,
|
|
128
126
|
)
|
|
129
127
|
|
|
130
128
|
include_catalog_name_in_ids: bool = Field(
|
|
@@ -13,7 +13,7 @@ from sqlalchemy.exc import ProgrammingError, ResourceClosedError
|
|
|
13
13
|
from sqlalchemy.sql import quoted_name
|
|
14
14
|
|
|
15
15
|
import datahub.metadata.schema_classes as models
|
|
16
|
-
from datahub.configuration.common import AllowDenyPattern
|
|
16
|
+
from datahub.configuration.common import AllowDenyPattern, HiddenFromDocs
|
|
17
17
|
from datahub.configuration.pattern_utils import UUID_REGEX
|
|
18
18
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
19
19
|
from datahub.ingestion.api.common import PipelineContext
|
|
@@ -75,7 +75,7 @@ DEFAULT_TEMP_TABLES_PATTERNS = [
|
|
|
75
75
|
class SQLServerConfig(BasicSQLAlchemyConfig):
|
|
76
76
|
# defaults
|
|
77
77
|
host_port: str = Field(default="localhost:1433", description="MSSQL host URL.")
|
|
78
|
-
scheme: str = Field(default="mssql+pytds"
|
|
78
|
+
scheme: HiddenFromDocs[str] = Field(default="mssql+pytds")
|
|
79
79
|
|
|
80
80
|
# TODO: rename to include_procedures ?
|
|
81
81
|
include_stored_procedures: bool = Field(
|
|
@@ -9,7 +9,7 @@ from sqlalchemy.dialects.mysql import BIT, base
|
|
|
9
9
|
from sqlalchemy.dialects.mysql.enumerated import SET
|
|
10
10
|
from sqlalchemy.engine.reflection import Inspector
|
|
11
11
|
|
|
12
|
-
from datahub.configuration.common import AllowDenyPattern
|
|
12
|
+
from datahub.configuration.common import AllowDenyPattern, HiddenFromDocs
|
|
13
13
|
from datahub.ingestion.api.decorators import (
|
|
14
14
|
SourceCapability,
|
|
15
15
|
SupportStatus,
|
|
@@ -57,7 +57,7 @@ base.ischema_names["decimal128"] = DECIMAL128
|
|
|
57
57
|
class MySQLConnectionConfig(SQLAlchemyConnectionConfig):
|
|
58
58
|
# defaults
|
|
59
59
|
host_port: str = Field(default="localhost:3306", description="MySQL host URL.")
|
|
60
|
-
scheme: str = "mysql+pymysql"
|
|
60
|
+
scheme: HiddenFromDocs[str] = "mysql+pymysql"
|
|
61
61
|
|
|
62
62
|
|
|
63
63
|
class MySQLConfig(MySQLConnectionConfig, TwoTierSQLAlchemyConfig):
|
|
@@ -110,10 +110,10 @@ class OracleConfig(BasicSQLAlchemyConfig):
|
|
|
110
110
|
return v
|
|
111
111
|
|
|
112
112
|
@pydantic.validator("data_dictionary_mode")
|
|
113
|
-
def check_data_dictionary_mode(cls,
|
|
114
|
-
if
|
|
113
|
+
def check_data_dictionary_mode(cls, value):
|
|
114
|
+
if value not in ("ALL", "DBA"):
|
|
115
115
|
raise ValueError("Specify one of data dictionary views mode: 'ALL', 'DBA'.")
|
|
116
|
-
return
|
|
116
|
+
return value
|
|
117
117
|
|
|
118
118
|
@pydantic.validator("thick_mode_lib_dir", always=True)
|
|
119
119
|
def check_thick_mode_lib_dir(cls, v, values):
|
|
@@ -8,6 +8,7 @@ from sqlalchemy import exc, sql
|
|
|
8
8
|
from sqlalchemy.engine import reflection
|
|
9
9
|
from sqlalchemy.engine.base import Engine
|
|
10
10
|
|
|
11
|
+
from datahub.configuration.common import HiddenFromDocs
|
|
11
12
|
from datahub.ingestion.api.common import PipelineContext
|
|
12
13
|
from datahub.ingestion.api.decorators import (
|
|
13
14
|
SourceCapability,
|
|
@@ -87,7 +88,7 @@ PrestoDialect._get_full_table = _get_full_table
|
|
|
87
88
|
|
|
88
89
|
class PrestoConfig(TrinoConfig):
|
|
89
90
|
# defaults
|
|
90
|
-
scheme: str = Field(default="presto"
|
|
91
|
+
scheme: HiddenFromDocs[str] = Field(default="presto")
|
|
91
92
|
|
|
92
93
|
|
|
93
94
|
@platform_name("Presto", doc_order=1)
|
|
@@ -468,23 +468,23 @@ class TeradataConfig(BaseTeradataConfig, BaseTimeWindowConfig):
|
|
|
468
468
|
),
|
|
469
469
|
)
|
|
470
470
|
|
|
471
|
-
database_pattern = Field(
|
|
471
|
+
database_pattern: AllowDenyPattern = Field(
|
|
472
472
|
default=AllowDenyPattern(deny=EXCLUDED_DATABASES),
|
|
473
473
|
description="Regex patterns for databases to filter in ingestion.",
|
|
474
474
|
)
|
|
475
|
-
include_table_lineage = Field(
|
|
475
|
+
include_table_lineage: bool = Field(
|
|
476
476
|
default=False,
|
|
477
477
|
description="Whether to include table lineage in the ingestion. "
|
|
478
478
|
"This requires to have the table lineage feature enabled.",
|
|
479
479
|
)
|
|
480
480
|
|
|
481
|
-
include_view_lineage = Field(
|
|
481
|
+
include_view_lineage: bool = Field(
|
|
482
482
|
default=True,
|
|
483
483
|
description="Whether to include view lineage in the ingestion. "
|
|
484
484
|
"This requires to have the view lineage feature enabled.",
|
|
485
485
|
)
|
|
486
486
|
|
|
487
|
-
include_queries = Field(
|
|
487
|
+
include_queries: bool = Field(
|
|
488
488
|
default=True,
|
|
489
489
|
description="Whether to generate query entities for SQL queries. "
|
|
490
490
|
"Query entities provide metadata about individual SQL queries including "
|
|
@@ -18,6 +18,7 @@ from sqlalchemy.types import TypeEngine
|
|
|
18
18
|
from trino.sqlalchemy import datatype
|
|
19
19
|
from trino.sqlalchemy.dialect import TrinoDialect
|
|
20
20
|
|
|
21
|
+
from datahub.configuration.common import HiddenFromDocs
|
|
21
22
|
from datahub.configuration.source_common import (
|
|
22
23
|
EnvConfigMixin,
|
|
23
24
|
PlatformInstanceConfigMixin,
|
|
@@ -222,7 +223,7 @@ class ConnectorDetail(PlatformInstanceConfigMixin, EnvConfigMixin):
|
|
|
222
223
|
|
|
223
224
|
class TrinoConfig(BasicSQLAlchemyConfig):
|
|
224
225
|
# defaults
|
|
225
|
-
scheme: str = Field(default="trino"
|
|
226
|
+
scheme: HiddenFromDocs[str] = Field(default="trino")
|
|
226
227
|
database: str = Field(description="database (catalog)")
|
|
227
228
|
|
|
228
229
|
catalog_to_connector_details: Dict[str, ConnectorDetail] = Field(
|
|
@@ -7,7 +7,7 @@ from sqlalchemy import create_engine, inspect
|
|
|
7
7
|
from sqlalchemy.engine import URL
|
|
8
8
|
from sqlalchemy.engine.reflection import Inspector
|
|
9
9
|
|
|
10
|
-
from datahub.configuration.common import AllowDenyPattern
|
|
10
|
+
from datahub.configuration.common import AllowDenyPattern, HiddenFromDocs
|
|
11
11
|
from datahub.configuration.validate_field_rename import pydantic_renamed_field
|
|
12
12
|
from datahub.emitter.mcp_builder import ContainerKey
|
|
13
13
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
@@ -27,11 +27,10 @@ class TwoTierSQLAlchemyConfig(BasicSQLAlchemyConfig):
|
|
|
27
27
|
default=AllowDenyPattern.allow_all(),
|
|
28
28
|
description="Regex patterns for databases to filter in ingestion.",
|
|
29
29
|
)
|
|
30
|
-
schema_pattern: AllowDenyPattern = Field(
|
|
30
|
+
schema_pattern: HiddenFromDocs[AllowDenyPattern] = Field(
|
|
31
31
|
# The superclass contains a `schema_pattern` field, so we need this here
|
|
32
32
|
# to override the documentation.
|
|
33
33
|
default=AllowDenyPattern.allow_all(),
|
|
34
|
-
hidden_from_docs=True,
|
|
35
34
|
description="Deprecated in favour of database_pattern.",
|
|
36
35
|
)
|
|
37
36
|
|
|
@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Set, Tupl
|
|
|
5
5
|
|
|
6
6
|
import pydantic
|
|
7
7
|
import pytest
|
|
8
|
-
from pydantic
|
|
8
|
+
from pydantic import validator
|
|
9
9
|
from vertica_sqlalchemy_dialect.base import VerticaInspector
|
|
10
10
|
|
|
11
11
|
from datahub.configuration.common import AllowDenyPattern
|
|
@@ -8,6 +8,7 @@ from typing import ClassVar, Iterable, List, Optional, Union
|
|
|
8
8
|
|
|
9
9
|
from pydantic import BaseModel, Field, validator
|
|
10
10
|
|
|
11
|
+
from datahub.configuration.common import HiddenFromDocs
|
|
11
12
|
from datahub.configuration.datetimes import parse_user_datetime
|
|
12
13
|
from datahub.configuration.source_common import (
|
|
13
14
|
EnvConfigMixin,
|
|
@@ -66,22 +67,21 @@ class SqlQueriesSourceConfig(
|
|
|
66
67
|
default=BaseUsageConfig(),
|
|
67
68
|
)
|
|
68
69
|
|
|
69
|
-
use_schema_resolver: bool = Field(
|
|
70
|
+
use_schema_resolver: HiddenFromDocs[bool] = Field(
|
|
71
|
+
True,
|
|
70
72
|
description="Read SchemaMetadata aspects from DataHub to aid in SQL parsing. Turn off only for testing.",
|
|
71
|
-
default=True,
|
|
72
|
-
hidden_from_docs=True,
|
|
73
73
|
)
|
|
74
74
|
default_db: Optional[str] = Field(
|
|
75
|
+
None,
|
|
75
76
|
description="The default database to use for unqualified table names",
|
|
76
|
-
default=None,
|
|
77
77
|
)
|
|
78
78
|
default_schema: Optional[str] = Field(
|
|
79
|
+
None,
|
|
79
80
|
description="The default schema to use for unqualified table names",
|
|
80
|
-
default=None,
|
|
81
81
|
)
|
|
82
82
|
override_dialect: Optional[str] = Field(
|
|
83
|
+
None,
|
|
83
84
|
description="The SQL dialect to use when parsing queries. Overrides automatic dialect detection.",
|
|
84
|
-
default=None,
|
|
85
85
|
)
|
|
86
86
|
|
|
87
87
|
|
|
@@ -68,7 +68,11 @@ class CheckpointStateBase(ConfigModel):
|
|
|
68
68
|
|
|
69
69
|
@staticmethod
|
|
70
70
|
def _to_bytes_utf8(model: ConfigModel) -> bytes:
|
|
71
|
-
|
|
71
|
+
pydantic_json = model.model_dump_json(exclude={"version", "serde"})
|
|
72
|
+
# We decode and re-encode so that Python's default whitespace is included.
|
|
73
|
+
# This is purely to keep tests consistent as we migrate to pydantic v2,
|
|
74
|
+
# and can be removed once we're fully migrated.
|
|
75
|
+
return json.dumps(json.loads(pydantic_json)).encode("utf-8")
|
|
72
76
|
|
|
73
77
|
@staticmethod
|
|
74
78
|
def _to_bytes_base85_json(
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Any, Dict, Iterable, List, Tuple, Type
|
|
1
|
+
from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Tuple, Type
|
|
2
2
|
|
|
3
3
|
import pydantic
|
|
4
4
|
|
|
@@ -8,13 +8,16 @@ from datahub.utilities.checkpoint_state_util import CheckpointStateUtil
|
|
|
8
8
|
from datahub.utilities.dedup_list import deduplicate_list
|
|
9
9
|
from datahub.utilities.urns.urn import guess_entity_type
|
|
10
10
|
|
|
11
|
+
if TYPE_CHECKING:
|
|
12
|
+
from pydantic.deprecated.class_validators import V1RootValidator
|
|
13
|
+
|
|
11
14
|
STATEFUL_INGESTION_IGNORED_ENTITY_TYPES = {
|
|
12
15
|
"dataProcessInstance",
|
|
13
16
|
"query",
|
|
14
17
|
}
|
|
15
18
|
|
|
16
19
|
|
|
17
|
-
def pydantic_state_migrator(mapping: Dict[str, str]) ->
|
|
20
|
+
def pydantic_state_migrator(mapping: Dict[str, str]) -> "V1RootValidator":
|
|
18
21
|
# mapping would be something like:
|
|
19
22
|
# {
|
|
20
23
|
# 'encoded_view_urns': 'dataset',
|
|
@@ -10,6 +10,7 @@ from datahub.configuration.common import (
|
|
|
10
10
|
ConfigModel,
|
|
11
11
|
ConfigurationError,
|
|
12
12
|
DynamicTypedConfig,
|
|
13
|
+
HiddenFromDocs,
|
|
13
14
|
)
|
|
14
15
|
from datahub.configuration.pydantic_migration_helpers import GenericModel
|
|
15
16
|
from datahub.configuration.time_window_config import BaseTimeWindowConfig
|
|
@@ -55,25 +56,21 @@ class StatefulIngestionConfig(ConfigModel):
|
|
|
55
56
|
description="Whether or not to enable stateful ingest. "
|
|
56
57
|
"Default: True if a pipeline_name is set and either a datahub-rest sink or `datahub_api` is specified, otherwise False",
|
|
57
58
|
)
|
|
58
|
-
max_checkpoint_state_size: pydantic.PositiveInt = Field(
|
|
59
|
+
max_checkpoint_state_size: HiddenFromDocs[pydantic.PositiveInt] = Field(
|
|
59
60
|
default=2**24, # 16 MB
|
|
60
61
|
description="The maximum size of the checkpoint state in bytes. Default is 16MB",
|
|
61
|
-
hidden_from_docs=True,
|
|
62
62
|
)
|
|
63
|
-
state_provider: Optional[DynamicTypedStateProviderConfig] = Field(
|
|
63
|
+
state_provider: HiddenFromDocs[Optional[DynamicTypedStateProviderConfig]] = Field(
|
|
64
64
|
default=None,
|
|
65
65
|
description="The ingestion state provider configuration.",
|
|
66
|
-
hidden_from_docs=True,
|
|
67
66
|
)
|
|
68
|
-
ignore_old_state: bool = Field(
|
|
67
|
+
ignore_old_state: HiddenFromDocs[bool] = Field(
|
|
69
68
|
default=False,
|
|
70
69
|
description="If set to True, ignores the previous checkpoint state.",
|
|
71
|
-
hidden_from_docs=True,
|
|
72
70
|
)
|
|
73
|
-
ignore_new_state: bool = Field(
|
|
71
|
+
ignore_new_state: HiddenFromDocs[bool] = Field(
|
|
74
72
|
default=False,
|
|
75
73
|
description="If set to True, ignores the current checkpoint state.",
|
|
76
|
-
hidden_from_docs=True,
|
|
77
74
|
)
|
|
78
75
|
|
|
79
76
|
@pydantic.root_validator(skip_on_failure=True)
|
|
@@ -9,8 +9,7 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
|
|
|
9
9
|
import dateutil.parser as dp
|
|
10
10
|
import requests
|
|
11
11
|
import sqlglot
|
|
12
|
-
from pydantic import BaseModel
|
|
13
|
-
from pydantic.class_validators import root_validator, validator
|
|
12
|
+
from pydantic import BaseModel, root_validator, validator
|
|
14
13
|
from pydantic.fields import Field
|
|
15
14
|
from requests.adapters import HTTPAdapter
|
|
16
15
|
from urllib3.util.retry import Retry
|
|
@@ -3,6 +3,7 @@ import logging
|
|
|
3
3
|
import re
|
|
4
4
|
import time
|
|
5
5
|
from collections import OrderedDict, defaultdict
|
|
6
|
+
from copy import deepcopy
|
|
6
7
|
from dataclasses import dataclass, field as dataclass_field
|
|
7
8
|
from datetime import datetime, timedelta, timezone
|
|
8
9
|
from functools import lru_cache
|
|
@@ -474,6 +475,13 @@ class TableauPageSizeConfig(ConfigModel):
|
|
|
474
475
|
return self.database_table_page_size or self.page_size
|
|
475
476
|
|
|
476
477
|
|
|
478
|
+
_IngestHiddenAssetsOptionsType = Literal["worksheet", "dashboard"]
|
|
479
|
+
_IngestHiddenAssetsOptions: List[_IngestHiddenAssetsOptionsType] = [
|
|
480
|
+
"worksheet",
|
|
481
|
+
"dashboard",
|
|
482
|
+
]
|
|
483
|
+
|
|
484
|
+
|
|
477
485
|
class TableauConfig(
|
|
478
486
|
DatasetLineageProviderConfigBase,
|
|
479
487
|
StatefulIngestionConfigBase,
|
|
@@ -625,8 +633,8 @@ class TableauConfig(
|
|
|
625
633
|
description="Configuration settings for ingesting Tableau groups and their capabilities as custom properties.",
|
|
626
634
|
)
|
|
627
635
|
|
|
628
|
-
ingest_hidden_assets: Union[List[
|
|
629
|
-
|
|
636
|
+
ingest_hidden_assets: Union[List[_IngestHiddenAssetsOptionsType], bool] = Field(
|
|
637
|
+
_IngestHiddenAssetsOptions,
|
|
630
638
|
description=(
|
|
631
639
|
"When enabled, hidden worksheets and dashboards are ingested into Datahub."
|
|
632
640
|
" If a dashboard or worksheet is hidden in Tableau the luid is blank."
|
|
@@ -648,6 +656,11 @@ class TableauConfig(
|
|
|
648
656
|
# pre = True because we want to take some decision before pydantic initialize the configuration to default values
|
|
649
657
|
@root_validator(pre=True)
|
|
650
658
|
def projects_backward_compatibility(cls, values: Dict) -> Dict:
|
|
659
|
+
# In-place update of the input dict would cause state contamination. This was discovered through test failures
|
|
660
|
+
# in test_hex.py where the same dict is reused.
|
|
661
|
+
# So a copy is performed first.
|
|
662
|
+
values = deepcopy(values)
|
|
663
|
+
|
|
651
664
|
projects = values.get("projects")
|
|
652
665
|
project_pattern = values.get("project_pattern")
|
|
653
666
|
project_path_pattern = values.get("project_path_pattern")
|
|
@@ -659,6 +672,7 @@ class TableauConfig(
|
|
|
659
672
|
values["project_pattern"] = AllowDenyPattern(
|
|
660
673
|
allow=[f"^{prj}$" for prj in projects]
|
|
661
674
|
)
|
|
675
|
+
values.pop("projects")
|
|
662
676
|
elif (project_pattern or project_path_pattern) and projects:
|
|
663
677
|
raise ValueError(
|
|
664
678
|
"projects is deprecated. Please use project_path_pattern only."
|
|
@@ -670,7 +684,7 @@ class TableauConfig(
|
|
|
670
684
|
|
|
671
685
|
return values
|
|
672
686
|
|
|
673
|
-
@root_validator()
|
|
687
|
+
@root_validator(skip_on_failure=True)
|
|
674
688
|
def validate_config_values(cls, values: Dict) -> Dict:
|
|
675
689
|
tags_for_hidden_assets = values.get("tags_for_hidden_assets")
|
|
676
690
|
ingest_tags = values.get("ingest_tags")
|
|
@@ -8,7 +8,12 @@ import pydantic
|
|
|
8
8
|
from pydantic import Field
|
|
9
9
|
from typing_extensions import Literal
|
|
10
10
|
|
|
11
|
-
from datahub.configuration.common import
|
|
11
|
+
from datahub.configuration.common import (
|
|
12
|
+
AllowDenyPattern,
|
|
13
|
+
ConfigEnum,
|
|
14
|
+
ConfigModel,
|
|
15
|
+
HiddenFromDocs,
|
|
16
|
+
)
|
|
12
17
|
from datahub.configuration.source_common import (
|
|
13
18
|
DatasetSourceConfigMixin,
|
|
14
19
|
LowerCaseDatasetUrnConfigMixin,
|
|
@@ -285,10 +290,9 @@ class UnityCatalogSourceConfig(
|
|
|
285
290
|
description="Limit the number of columns to get column level lineage. ",
|
|
286
291
|
)
|
|
287
292
|
|
|
288
|
-
lineage_max_workers: int = pydantic.Field(
|
|
293
|
+
lineage_max_workers: HiddenFromDocs[int] = pydantic.Field(
|
|
289
294
|
default=5 * (os.cpu_count() or 4),
|
|
290
295
|
description="Number of worker threads to use for column lineage thread pool executor. Set to 1 to disable.",
|
|
291
|
-
hidden_from_docs=True,
|
|
292
296
|
)
|
|
293
297
|
|
|
294
298
|
databricks_api_page_size: int = pydantic.Field(
|
|
@@ -18,7 +18,7 @@ import pydantic
|
|
|
18
18
|
from pydantic.fields import Field
|
|
19
19
|
|
|
20
20
|
import datahub.emitter.mce_builder as builder
|
|
21
|
-
from datahub.configuration.common import AllowDenyPattern
|
|
21
|
+
from datahub.configuration.common import AllowDenyPattern, HiddenFromDocs
|
|
22
22
|
from datahub.configuration.time_window_config import (
|
|
23
23
|
BaseTimeWindowConfig,
|
|
24
24
|
BucketDuration,
|
|
@@ -194,13 +194,13 @@ class GenericAggregatedDataset(Generic[ResourceType]):
|
|
|
194
194
|
|
|
195
195
|
|
|
196
196
|
class BaseUsageConfig(BaseTimeWindowConfig):
|
|
197
|
-
queries_character_limit: int = Field(
|
|
197
|
+
queries_character_limit: HiddenFromDocs[int] = Field(
|
|
198
|
+
# Hidden since we don't want to encourage people to break elasticsearch.
|
|
198
199
|
default=DEFAULT_QUERIES_CHARACTER_LIMIT,
|
|
199
200
|
description=(
|
|
200
201
|
"Total character limit for all queries in a single usage aspect."
|
|
201
202
|
" Queries will be truncated to length `queries_character_limit / top_n_queries`."
|
|
202
203
|
),
|
|
203
|
-
hidden_from_docs=True, # Don't want to encourage people to break elasticsearch
|
|
204
204
|
)
|
|
205
205
|
|
|
206
206
|
top_n_queries: pydantic.PositiveInt = Field(
|
|
@@ -2,6 +2,7 @@ import re
|
|
|
2
2
|
from typing import Dict, List, Optional, Union
|
|
3
3
|
from urllib.parse import urlparse
|
|
4
4
|
|
|
5
|
+
import pydantic
|
|
5
6
|
from pydantic import Field, validator
|
|
6
7
|
|
|
7
8
|
from datahub.configuration.common import AllowDenyPattern
|
|
@@ -121,7 +122,8 @@ class PulsarSourceConfig(
|
|
|
121
122
|
)
|
|
122
123
|
return client_secret
|
|
123
124
|
|
|
124
|
-
@
|
|
125
|
+
@pydantic.field_validator("web_service_url", mode="after")
|
|
126
|
+
@classmethod
|
|
125
127
|
def web_service_url_scheme_host_port(cls, val: str) -> str:
|
|
126
128
|
# Tokenize the web url
|
|
127
129
|
url = urlparse(val)
|
datahub/sdk/search_filters.py
CHANGED
|
@@ -39,13 +39,7 @@ _OrFilters = List[_AndSearchFilterRule]
|
|
|
39
39
|
|
|
40
40
|
|
|
41
41
|
class _BaseFilter(ConfigModel):
|
|
42
|
-
|
|
43
|
-
# We can't wrap this in a TYPE_CHECKING block because the pydantic plugin
|
|
44
|
-
# doesn't recognize it properly. So unfortunately we'll need to live
|
|
45
|
-
# with the deprecation warning w/ pydantic v2.
|
|
46
|
-
allow_population_by_field_name = True
|
|
47
|
-
if PYDANTIC_VERSION_2:
|
|
48
|
-
populate_by_name = True
|
|
42
|
+
model_config = pydantic.ConfigDict(populate_by_name=True)
|
|
49
43
|
|
|
50
44
|
@abc.abstractmethod
|
|
51
45
|
def compile(self) -> _OrFilters: ...
|
|
File without changes
|
{acryl_datahub-1.2.0.10rc3.dist-info → acryl_datahub-1.2.0.10rc4.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{acryl_datahub-1.2.0.10rc3.dist-info → acryl_datahub-1.2.0.10rc4.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
|
File without changes
|