acryl-datahub 1.2.0.9rc2__py3-none-any.whl → 1.2.0.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.2.0.9rc2.dist-info → acryl_datahub-1.2.0.10.dist-info}/METADATA +2553 -2611
- {acryl_datahub-1.2.0.9rc2.dist-info → acryl_datahub-1.2.0.10.dist-info}/RECORD +118 -111
- {acryl_datahub-1.2.0.9rc2.dist-info → acryl_datahub-1.2.0.10.dist-info}/entry_points.txt +2 -0
- datahub/_version.py +1 -1
- datahub/api/entities/assertion/assertion.py +1 -1
- datahub/api/entities/corpgroup/corpgroup.py +1 -1
- datahub/api/entities/dataproduct/dataproduct.py +6 -3
- datahub/api/entities/dataset/dataset.py +9 -18
- datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
- datahub/api/graphql/operation.py +10 -6
- datahub/cli/docker_check.py +2 -2
- datahub/configuration/common.py +29 -1
- datahub/configuration/connection_resolver.py +5 -2
- datahub/configuration/import_resolver.py +7 -4
- datahub/configuration/pydantic_migration_helpers.py +0 -9
- datahub/configuration/source_common.py +3 -2
- datahub/configuration/validate_field_deprecation.py +5 -2
- datahub/configuration/validate_field_removal.py +5 -2
- datahub/configuration/validate_field_rename.py +6 -5
- datahub/configuration/validate_multiline_string.py +5 -2
- datahub/ingestion/autogenerated/capability_summary.json +45 -1
- datahub/ingestion/run/pipeline_config.py +2 -2
- datahub/ingestion/source/azure/azure_common.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +28 -14
- datahub/ingestion/source/bigquery_v2/queries_extractor.py +4 -5
- datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
- datahub/ingestion/source/data_lake_common/path_spec.py +16 -16
- datahub/ingestion/source/datahub/config.py +8 -9
- datahub/ingestion/source/dbt/dbt_common.py +65 -5
- datahub/ingestion/source/delta_lake/config.py +1 -1
- datahub/ingestion/source/dremio/dremio_config.py +3 -4
- datahub/ingestion/source/feast.py +8 -10
- datahub/ingestion/source/fivetran/config.py +1 -1
- datahub/ingestion/source/gcs/gcs_source.py +19 -2
- datahub/ingestion/source/ge_data_profiler.py +15 -2
- datahub/ingestion/source/ge_profiling_config.py +26 -22
- datahub/ingestion/source/grafana/grafana_config.py +2 -2
- datahub/ingestion/source/grafana/models.py +12 -14
- datahub/ingestion/source/hex/hex.py +6 -1
- datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
- datahub/ingestion/source/kafka_connect/common.py +2 -2
- datahub/ingestion/source/looker/looker_common.py +76 -75
- datahub/ingestion/source/looker/looker_config.py +15 -4
- datahub/ingestion/source/looker/looker_source.py +493 -547
- datahub/ingestion/source/looker/lookml_config.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +46 -88
- datahub/ingestion/source/metabase.py +9 -2
- datahub/ingestion/source/metadata/business_glossary.py +7 -7
- datahub/ingestion/source/metadata/lineage.py +1 -1
- datahub/ingestion/source/mode.py +13 -5
- datahub/ingestion/source/nifi.py +1 -1
- datahub/ingestion/source/powerbi/config.py +14 -21
- datahub/ingestion/source/preset.py +1 -1
- datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
- datahub/ingestion/source/redash.py +1 -1
- datahub/ingestion/source/redshift/config.py +6 -3
- datahub/ingestion/source/redshift/query.py +23 -19
- datahub/ingestion/source/s3/source.py +26 -24
- datahub/ingestion/source/salesforce.py +13 -9
- datahub/ingestion/source/schema/json_schema.py +14 -14
- datahub/ingestion/source/sigma/data_classes.py +3 -0
- datahub/ingestion/source/snaplogic/__init__.py +0 -0
- datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
- datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
- datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
- datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
- datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
- datahub/ingestion/source/snowflake/snowflake_config.py +12 -15
- datahub/ingestion/source/snowflake/snowflake_connection.py +8 -3
- datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +15 -2
- datahub/ingestion/source/snowflake/snowflake_queries.py +4 -5
- datahub/ingestion/source/sql/athena.py +2 -1
- datahub/ingestion/source/sql/clickhouse.py +12 -7
- datahub/ingestion/source/sql/cockroachdb.py +5 -3
- datahub/ingestion/source/sql/druid.py +2 -2
- datahub/ingestion/source/sql/hive.py +4 -3
- datahub/ingestion/source/sql/hive_metastore.py +7 -9
- datahub/ingestion/source/sql/mssql/source.py +2 -2
- datahub/ingestion/source/sql/mysql.py +2 -2
- datahub/ingestion/source/sql/oracle.py +3 -3
- datahub/ingestion/source/sql/presto.py +2 -1
- datahub/ingestion/source/sql/teradata.py +4 -4
- datahub/ingestion/source/sql/trino.py +2 -1
- datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
- datahub/ingestion/source/sql/vertica.py +1 -1
- datahub/ingestion/source/sql_queries.py +6 -6
- datahub/ingestion/source/state/checkpoint.py +5 -1
- datahub/ingestion/source/state/entity_removal_state.py +5 -2
- datahub/ingestion/source/state/stateful_ingestion_base.py +5 -8
- datahub/ingestion/source/superset.py +122 -15
- datahub/ingestion/source/tableau/tableau.py +68 -14
- datahub/ingestion/source/tableau/tableau_common.py +5 -0
- datahub/ingestion/source/tableau/tableau_constant.py +1 -0
- datahub/ingestion/source/tableau/tableau_server_wrapper.py +3 -0
- datahub/ingestion/source/unity/config.py +7 -3
- datahub/ingestion/source/usage/usage_common.py +3 -3
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/transformer/set_browse_path.py +112 -0
- datahub/metadata/_internal_schema_classes.py +728 -528
- datahub/metadata/_urns/urn_defs.py +1702 -1702
- datahub/metadata/com/linkedin/pegasus2avro/common/__init__.py +2 -0
- datahub/metadata/com/linkedin/pegasus2avro/settings/global/__init__.py +4 -0
- datahub/metadata/schema.avsc +17434 -17732
- datahub/metadata/schemas/GlobalSettingsInfo.avsc +72 -0
- datahub/metadata/schemas/InstitutionalMemory.avsc +22 -0
- datahub/metadata/schemas/LogicalParent.avsc +2 -1
- datahub/metadata/schemas/MLModelGroupKey.avsc +2 -1
- datahub/metadata/schemas/MetadataChangeEvent.avsc +22 -0
- datahub/sdk/_shared.py +126 -0
- datahub/sdk/chart.py +87 -30
- datahub/sdk/dashboard.py +79 -34
- datahub/sdk/entity_client.py +11 -4
- datahub/sdk/lineage_client.py +3 -3
- datahub/sdk/search_filters.py +1 -7
- datahub/sql_parsing/split_statements.py +13 -0
- {acryl_datahub-1.2.0.9rc2.dist-info → acryl_datahub-1.2.0.10.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.2.0.9rc2.dist-info → acryl_datahub-1.2.0.10.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.2.0.9rc2.dist-info → acryl_datahub-1.2.0.10.dist-info}/top_level.txt +0 -0
|
@@ -89,7 +89,7 @@ class RedshiftCommonQuery:
|
|
|
89
89
|
) -> str:
|
|
90
90
|
# NOTE: it looks like description is available only in pg_description
|
|
91
91
|
# So this remains preferrred way
|
|
92
|
-
tables_query = """
|
|
92
|
+
tables_query = f"""
|
|
93
93
|
SELECT CASE c.relkind
|
|
94
94
|
WHEN 'r' THEN 'TABLE'
|
|
95
95
|
WHEN 'v' THEN 'VIEW'
|
|
@@ -120,6 +120,7 @@ class RedshiftCommonQuery:
|
|
|
120
120
|
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
|
|
121
121
|
LEFT JOIN pg_class_info as ci on c.oid = ci.reloid
|
|
122
122
|
LEFT JOIN pg_catalog.pg_description pgd ON pgd.objsubid = 0 AND pgd.objoid = c.oid
|
|
123
|
+
JOIN svv_redshift_schemas rs ON rs.schema_name = n.nspname AND rs.database_name = '{database}'
|
|
123
124
|
WHERE c.relkind IN ('r','v','m','S','f')
|
|
124
125
|
AND n.nspname !~ '^pg_'
|
|
125
126
|
AND n.nspname != 'information_schema'
|
|
@@ -128,23 +129,24 @@ class RedshiftCommonQuery:
|
|
|
128
129
|
external_tables_query = f"""
|
|
129
130
|
SELECT 'EXTERNAL_TABLE' as tabletype,
|
|
130
131
|
NULL AS "schema_oid",
|
|
131
|
-
schemaname AS "schema",
|
|
132
|
+
t.schemaname AS "schema",
|
|
132
133
|
NULL AS "rel_oid",
|
|
133
|
-
tablename AS "relname",
|
|
134
|
+
t.tablename AS "relname",
|
|
134
135
|
NULL as "creation_time",
|
|
135
136
|
NULL AS "diststyle",
|
|
136
137
|
NULL AS "owner_id",
|
|
137
138
|
NULL AS "owner_name",
|
|
138
139
|
NULL AS "view_definition",
|
|
139
140
|
NULL AS "privileges",
|
|
140
|
-
"location",
|
|
141
|
-
parameters,
|
|
142
|
-
input_format,
|
|
143
|
-
output_format,
|
|
144
|
-
serde_parameters,
|
|
141
|
+
t."location",
|
|
142
|
+
t.parameters,
|
|
143
|
+
t.input_format,
|
|
144
|
+
t.output_format,
|
|
145
|
+
t.serde_parameters,
|
|
145
146
|
NULL as table_description
|
|
146
|
-
FROM pg_catalog.svv_external_tables
|
|
147
|
-
|
|
147
|
+
FROM pg_catalog.svv_external_tables t
|
|
148
|
+
JOIN SVV_EXTERNAL_SCHEMAS s ON t.schemaname = s.schemaname
|
|
149
|
+
WHERE t.redshift_database_name='{database}'
|
|
148
150
|
ORDER BY "schema",
|
|
149
151
|
"relname"
|
|
150
152
|
"""
|
|
@@ -232,11 +234,12 @@ class RedshiftCommonQuery:
|
|
|
232
234
|
ON att.attrelid = c.oid
|
|
233
235
|
LEFT JOIN pg_catalog.pg_attrdef ad
|
|
234
236
|
ON (att.attrelid, att.attnum) = (ad.adrelid, ad.adnum)
|
|
237
|
+
JOIN svv_redshift_schemas rs ON rs.schema_name = n.nspname AND rs.database_name = '{database_name}'
|
|
235
238
|
WHERE n.nspname !~ '^pg_'
|
|
236
239
|
AND n.nspname != 'information_schema'
|
|
237
240
|
AND att.attnum > 0
|
|
238
241
|
AND NOT att.attisdropped
|
|
239
|
-
and
|
|
242
|
+
and n.nspname = '{schema_name}'
|
|
240
243
|
UNION
|
|
241
244
|
SELECT
|
|
242
245
|
view_schema as "schema",
|
|
@@ -263,26 +266,27 @@ class RedshiftCommonQuery:
|
|
|
263
266
|
WHERE 1 and schema = '{schema_name}'
|
|
264
267
|
UNION
|
|
265
268
|
SELECT
|
|
266
|
-
schemaname as "schema",
|
|
267
|
-
tablename as "table_name",
|
|
268
|
-
columnname as "name",
|
|
269
|
+
c.schemaname as "schema",
|
|
270
|
+
c.tablename as "table_name",
|
|
271
|
+
c.columnname as "name",
|
|
269
272
|
null as "encode",
|
|
270
273
|
-- Spectrum represents data types differently.
|
|
271
274
|
-- Standardize, so we can infer types.
|
|
272
|
-
external_type AS "type",
|
|
275
|
+
c.external_type AS "type",
|
|
273
276
|
null as "distkey",
|
|
274
277
|
0 as "sortkey",
|
|
275
278
|
null as "notnull",
|
|
276
279
|
null as "comment",
|
|
277
280
|
null as "adsrc",
|
|
278
281
|
null as "attnum",
|
|
279
|
-
external_type AS "format_type",
|
|
282
|
+
c.external_type AS "format_type",
|
|
280
283
|
null as "default",
|
|
281
284
|
null as "schema_oid",
|
|
282
285
|
null as "table_oid"
|
|
283
|
-
FROM SVV_EXTERNAL_COLUMNS
|
|
284
|
-
|
|
285
|
-
|
|
286
|
+
FROM SVV_EXTERNAL_COLUMNS c
|
|
287
|
+
JOIN SVV_EXTERNAL_SCHEMAS s ON c.schemaname = s.schemaname
|
|
288
|
+
WHERE c.schemaname = '{schema_name}'
|
|
289
|
+
AND c.redshift_database_name = '{database_name}'
|
|
286
290
|
ORDER BY "schema", "table_name", "attnum"
|
|
287
291
|
"""
|
|
288
292
|
|
|
@@ -115,14 +115,7 @@ profiling_flags_to_report = [
|
|
|
115
115
|
"include_field_sample_values",
|
|
116
116
|
]
|
|
117
117
|
|
|
118
|
-
|
|
119
|
-
# LOCAL_BROWSE_PATH_TRANSFORMER_CONFIG = AddDatasetBrowsePathConfig(
|
|
120
|
-
# path_templates=["/ENV/PLATFORMDATASET_PARTS"], replace_existing=True
|
|
121
|
-
# )
|
|
122
|
-
#
|
|
123
|
-
# LOCAL_BROWSE_PATH_TRANSFORMER = AddDatasetBrowsePathTransformer(
|
|
124
|
-
# ctx=None, config=LOCAL_BROWSE_PATH_TRANSFORMER_CONFIG
|
|
125
|
-
# )
|
|
118
|
+
URI_SCHEME_REGEX = re.compile(r"^[a-z0-9]+://")
|
|
126
119
|
|
|
127
120
|
|
|
128
121
|
def partitioned_folder_comparator(folder1: str, folder2: str) -> int:
|
|
@@ -448,9 +441,8 @@ class S3Source(StatefulIngestionSourceBase):
|
|
|
448
441
|
self.source_config.verify_ssl
|
|
449
442
|
)
|
|
450
443
|
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
)
|
|
444
|
+
path = re.sub(URI_SCHEME_REGEX, "s3://", table_data.full_path)
|
|
445
|
+
file = smart_open(path, "rb", transport_params={"client": s3_client})
|
|
454
446
|
else:
|
|
455
447
|
# We still use smart_open here to take advantage of the compression
|
|
456
448
|
# capabilities of smart_open.
|
|
@@ -668,11 +660,9 @@ class S3Source(StatefulIngestionSourceBase):
|
|
|
668
660
|
aspects: List[Optional[_Aspect]] = []
|
|
669
661
|
|
|
670
662
|
logger.info(f"Extracting table schema from file: {table_data.full_path}")
|
|
671
|
-
|
|
672
|
-
|
|
673
|
-
|
|
674
|
-
else table_data.table_path.strip("/")
|
|
675
|
-
)
|
|
663
|
+
|
|
664
|
+
# remove protocol and any leading or trailing slashes
|
|
665
|
+
browse_path = re.sub(URI_SCHEME_REGEX, "", table_data.table_path).strip("/")
|
|
676
666
|
|
|
677
667
|
data_platform_urn = make_data_platform_urn(self.source_config.platform)
|
|
678
668
|
logger.info(f"Creating dataset urn with name: {browse_path}")
|
|
@@ -806,10 +796,20 @@ class S3Source(StatefulIngestionSourceBase):
|
|
|
806
796
|
else:
|
|
807
797
|
return relative_path
|
|
808
798
|
|
|
809
|
-
def
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
799
|
+
def extract_table_name_and_path(
|
|
800
|
+
self, path_spec: PathSpec, path: str
|
|
801
|
+
) -> Tuple[str, str]:
|
|
802
|
+
# Extract the table name and base path from a path that's been normalized back to the
|
|
803
|
+
# "s3://" scheme that matches the path_spec
|
|
804
|
+
table_name, table_path = path_spec.extract_table_name_and_path(
|
|
805
|
+
self._normalize_uri_for_pattern_matching(path)
|
|
806
|
+
)
|
|
807
|
+
# Then convert the table base path back to the original scheme
|
|
808
|
+
scheme = re.match(URI_SCHEME_REGEX, path)
|
|
809
|
+
if scheme:
|
|
810
|
+
table_path = re.sub(URI_SCHEME_REGEX, scheme[0], table_path)
|
|
811
|
+
|
|
812
|
+
return table_name, table_path
|
|
813
813
|
|
|
814
814
|
def extract_table_data(
|
|
815
815
|
self,
|
|
@@ -819,7 +819,7 @@ class S3Source(StatefulIngestionSourceBase):
|
|
|
819
819
|
path = browse_path.file
|
|
820
820
|
partitions = browse_path.partitions
|
|
821
821
|
logger.debug(f"Getting table data for path: {path}")
|
|
822
|
-
table_name, table_path =
|
|
822
|
+
table_name, table_path = self.extract_table_name_and_path(path_spec, path)
|
|
823
823
|
return TableData(
|
|
824
824
|
display_name=table_name,
|
|
825
825
|
is_s3=self.is_s3_platform(),
|
|
@@ -992,7 +992,9 @@ class S3Source(StatefulIngestionSourceBase):
|
|
|
992
992
|
)
|
|
993
993
|
|
|
994
994
|
# If partition_id is None, it means the folder is not a partition
|
|
995
|
-
partition_id = path_spec.get_partition_from_path(
|
|
995
|
+
partition_id = path_spec.get_partition_from_path(
|
|
996
|
+
self._normalize_uri_for_pattern_matching(max_file_s3_path)
|
|
997
|
+
)
|
|
996
998
|
|
|
997
999
|
yield Folder(
|
|
998
1000
|
partition_id=partition_id,
|
|
@@ -1143,8 +1145,8 @@ class S3Source(StatefulIngestionSourceBase):
|
|
|
1143
1145
|
|
|
1144
1146
|
# Extract table name using the ORIGINAL path spec pattern matching (not the modified one)
|
|
1145
1147
|
# This uses the compiled regex pattern to extract the table name from the full path
|
|
1146
|
-
table_name,
|
|
1147
|
-
table_s3_path
|
|
1148
|
+
table_name, _ = self.extract_table_name_and_path(
|
|
1149
|
+
path_spec, table_s3_path
|
|
1148
1150
|
)
|
|
1149
1151
|
|
|
1150
1152
|
# Apply table name filtering if configured
|
|
@@ -110,30 +110,33 @@ class SalesforceConfig(
|
|
|
110
110
|
auth: SalesforceAuthType = SalesforceAuthType.USERNAME_PASSWORD
|
|
111
111
|
|
|
112
112
|
# Username, Password Auth
|
|
113
|
-
username: Optional[str] = Field(description="Salesforce username")
|
|
114
|
-
password: Optional[str] = Field(description="Password for Salesforce user")
|
|
113
|
+
username: Optional[str] = Field(None, description="Salesforce username")
|
|
114
|
+
password: Optional[str] = Field(None, description="Password for Salesforce user")
|
|
115
115
|
consumer_key: Optional[str] = Field(
|
|
116
|
-
description="Consumer key for Salesforce JSON web token access"
|
|
116
|
+
None, description="Consumer key for Salesforce JSON web token access"
|
|
117
117
|
)
|
|
118
118
|
private_key: Optional[str] = Field(
|
|
119
|
-
description="Private key as a string for Salesforce JSON web token access"
|
|
119
|
+
None, description="Private key as a string for Salesforce JSON web token access"
|
|
120
120
|
)
|
|
121
121
|
security_token: Optional[str] = Field(
|
|
122
|
-
description="Security token for Salesforce username"
|
|
122
|
+
None, description="Security token for Salesforce username"
|
|
123
123
|
)
|
|
124
124
|
# client_id, client_secret not required
|
|
125
125
|
|
|
126
126
|
# Direct - Instance URL, Access Token Auth
|
|
127
127
|
instance_url: Optional[str] = Field(
|
|
128
|
-
|
|
128
|
+
None,
|
|
129
|
+
description="Salesforce instance url. e.g. https://MyDomainName.my.salesforce.com",
|
|
129
130
|
)
|
|
130
131
|
# Flag to indicate whether the instance is production or sandbox
|
|
131
132
|
is_sandbox: bool = Field(
|
|
132
133
|
default=False, description="Connect to Sandbox instance of your Salesforce"
|
|
133
134
|
)
|
|
134
|
-
access_token: Optional[str] = Field(
|
|
135
|
+
access_token: Optional[str] = Field(
|
|
136
|
+
None, description="Access token for instance url"
|
|
137
|
+
)
|
|
135
138
|
|
|
136
|
-
ingest_tags:
|
|
139
|
+
ingest_tags: bool = Field(
|
|
137
140
|
default=False,
|
|
138
141
|
description="Ingest Tags from source. This will override Tags entered from UI",
|
|
139
142
|
)
|
|
@@ -147,7 +150,8 @@ class SalesforceConfig(
|
|
|
147
150
|
description='Regex patterns for tables/schemas to describe domain_key domain key (domain_key can be any string like "sales".) There can be multiple domain keys specified.',
|
|
148
151
|
)
|
|
149
152
|
api_version: Optional[str] = Field(
|
|
150
|
-
|
|
153
|
+
None,
|
|
154
|
+
description="If specified, overrides default version used by the Salesforce package. Example value: '59.0'",
|
|
151
155
|
)
|
|
152
156
|
|
|
153
157
|
profiling: SalesforceProfilingConfig = SalesforceProfilingConfig()
|
|
@@ -4,7 +4,6 @@ import logging
|
|
|
4
4
|
import os
|
|
5
5
|
import tempfile
|
|
6
6
|
import unittest
|
|
7
|
-
import urllib.request
|
|
8
7
|
from dataclasses import dataclass
|
|
9
8
|
from os.path import basename, dirname
|
|
10
9
|
from pathlib import Path
|
|
@@ -12,6 +11,7 @@ from typing import Any, Iterable, List, Optional, Union
|
|
|
12
11
|
from urllib.parse import urlparse
|
|
13
12
|
|
|
14
13
|
import jsonref
|
|
14
|
+
import requests
|
|
15
15
|
from pydantic import AnyHttpUrl, DirectoryPath, FilePath, validator
|
|
16
16
|
from pydantic.fields import Field
|
|
17
17
|
|
|
@@ -91,19 +91,18 @@ class JsonSchemaSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMix
|
|
|
91
91
|
)
|
|
92
92
|
|
|
93
93
|
@validator("path")
|
|
94
|
-
def download_http_url_to_temp_file(v):
|
|
94
|
+
def download_http_url_to_temp_file(cls, v):
|
|
95
95
|
if isinstance(v, AnyHttpUrl):
|
|
96
96
|
try:
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
)
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
return tmp_file.name
|
|
97
|
+
response = requests.get(str(v))
|
|
98
|
+
response.raise_for_status()
|
|
99
|
+
schema_dict = response.json()
|
|
100
|
+
if not JsonSchemaTranslator._get_id_from_any_schema(schema_dict):
|
|
101
|
+
schema_dict["$id"] = str(v)
|
|
102
|
+
with tempfile.NamedTemporaryFile(mode="w", delete=False) as tmp_file:
|
|
103
|
+
tmp_file.write(json.dumps(schema_dict))
|
|
104
|
+
tmp_file.flush()
|
|
105
|
+
return tmp_file.name
|
|
107
106
|
except Exception as e:
|
|
108
107
|
logger.error(
|
|
109
108
|
f"Failed to localize url {v} due to {e}. Run with --debug to get full stacktrace"
|
|
@@ -353,7 +352,7 @@ class JsonSchemaSource(StatefulIngestionSourceBase):
|
|
|
353
352
|
if self.config.platform_instance:
|
|
354
353
|
browse_prefix = f"/{self.config.env.lower()}/{self.config.platform}/{self.config.platform_instance}"
|
|
355
354
|
|
|
356
|
-
if
|
|
355
|
+
if isinstance(self.config.path, Path) and self.config.path.is_dir():
|
|
357
356
|
for root, _, files in os.walk(self.config.path, topdown=False):
|
|
358
357
|
for file_name in [f for f in files if f.endswith(".json")]:
|
|
359
358
|
try:
|
|
@@ -373,10 +372,11 @@ class JsonSchemaSource(StatefulIngestionSourceBase):
|
|
|
373
372
|
|
|
374
373
|
else:
|
|
375
374
|
try:
|
|
375
|
+
assert isinstance(self.config.path, Path)
|
|
376
376
|
yield from self._load_one_file(
|
|
377
377
|
ref_loader,
|
|
378
378
|
browse_prefix=browse_prefix,
|
|
379
|
-
root_dir=
|
|
379
|
+
root_dir=self.config.path.parent,
|
|
380
380
|
file_name=str(self.config.path),
|
|
381
381
|
)
|
|
382
382
|
except Exception as e:
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
from copy import deepcopy
|
|
1
2
|
from datetime import datetime
|
|
2
3
|
from typing import Dict, List, Optional
|
|
3
4
|
|
|
@@ -23,6 +24,8 @@ class Workspace(BaseModel):
|
|
|
23
24
|
|
|
24
25
|
@root_validator(pre=True)
|
|
25
26
|
def update_values(cls, values: Dict) -> Dict:
|
|
27
|
+
# Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
|
|
28
|
+
values = deepcopy(values)
|
|
26
29
|
# Update name if presonal workspace
|
|
27
30
|
if values["name"] == "User Folder":
|
|
28
31
|
values["name"] = "My documents"
|
|
File without changes
|