acryl-datahub 1.2.0.10rc3__py3-none-any.whl → 1.2.0.10rc5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (94) hide show
  1. {acryl_datahub-1.2.0.10rc3.dist-info → acryl_datahub-1.2.0.10rc5.dist-info}/METADATA +2513 -2571
  2. {acryl_datahub-1.2.0.10rc3.dist-info → acryl_datahub-1.2.0.10rc5.dist-info}/RECORD +94 -87
  3. {acryl_datahub-1.2.0.10rc3.dist-info → acryl_datahub-1.2.0.10rc5.dist-info}/entry_points.txt +2 -0
  4. datahub/_version.py +1 -1
  5. datahub/api/entities/assertion/assertion.py +1 -1
  6. datahub/api/entities/corpgroup/corpgroup.py +1 -1
  7. datahub/api/entities/dataproduct/dataproduct.py +6 -3
  8. datahub/api/entities/dataset/dataset.py +9 -18
  9. datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
  10. datahub/api/graphql/operation.py +10 -6
  11. datahub/cli/docker_check.py +2 -2
  12. datahub/configuration/common.py +29 -1
  13. datahub/configuration/connection_resolver.py +5 -2
  14. datahub/configuration/import_resolver.py +7 -4
  15. datahub/configuration/pydantic_migration_helpers.py +0 -9
  16. datahub/configuration/source_common.py +3 -2
  17. datahub/configuration/validate_field_deprecation.py +5 -2
  18. datahub/configuration/validate_field_removal.py +5 -2
  19. datahub/configuration/validate_field_rename.py +6 -5
  20. datahub/configuration/validate_multiline_string.py +5 -2
  21. datahub/ingestion/autogenerated/capability_summary.json +33 -1
  22. datahub/ingestion/run/pipeline_config.py +2 -2
  23. datahub/ingestion/source/azure/azure_common.py +1 -1
  24. datahub/ingestion/source/bigquery_v2/bigquery_config.py +28 -14
  25. datahub/ingestion/source/bigquery_v2/queries_extractor.py +4 -5
  26. datahub/ingestion/source/common/gcp_credentials_config.py +3 -1
  27. datahub/ingestion/source/data_lake_common/path_spec.py +16 -16
  28. datahub/ingestion/source/datahub/config.py +8 -9
  29. datahub/ingestion/source/delta_lake/config.py +1 -1
  30. datahub/ingestion/source/dremio/dremio_config.py +3 -4
  31. datahub/ingestion/source/feast.py +8 -10
  32. datahub/ingestion/source/fivetran/config.py +1 -1
  33. datahub/ingestion/source/ge_profiling_config.py +26 -22
  34. datahub/ingestion/source/grafana/grafana_config.py +2 -2
  35. datahub/ingestion/source/grafana/models.py +12 -14
  36. datahub/ingestion/source/hex/hex.py +6 -1
  37. datahub/ingestion/source/iceberg/iceberg_profiler.py +4 -2
  38. datahub/ingestion/source/kafka_connect/common.py +2 -2
  39. datahub/ingestion/source/looker/looker_common.py +1 -1
  40. datahub/ingestion/source/looker/looker_config.py +15 -4
  41. datahub/ingestion/source/looker/looker_source.py +52 -3
  42. datahub/ingestion/source/looker/lookml_config.py +1 -1
  43. datahub/ingestion/source/metadata/business_glossary.py +7 -7
  44. datahub/ingestion/source/metadata/lineage.py +1 -1
  45. datahub/ingestion/source/mode.py +13 -5
  46. datahub/ingestion/source/nifi.py +1 -1
  47. datahub/ingestion/source/powerbi/config.py +14 -21
  48. datahub/ingestion/source/preset.py +1 -1
  49. datahub/ingestion/source/qlik_sense/data_classes.py +28 -8
  50. datahub/ingestion/source/redshift/config.py +6 -3
  51. datahub/ingestion/source/salesforce.py +13 -9
  52. datahub/ingestion/source/schema/json_schema.py +14 -14
  53. datahub/ingestion/source/sigma/data_classes.py +3 -0
  54. datahub/ingestion/source/snaplogic/__init__.py +0 -0
  55. datahub/ingestion/source/snaplogic/snaplogic.py +355 -0
  56. datahub/ingestion/source/snaplogic/snaplogic_config.py +37 -0
  57. datahub/ingestion/source/snaplogic/snaplogic_lineage_extractor.py +107 -0
  58. datahub/ingestion/source/snaplogic/snaplogic_parser.py +168 -0
  59. datahub/ingestion/source/snaplogic/snaplogic_utils.py +31 -0
  60. datahub/ingestion/source/snowflake/snowflake_config.py +12 -15
  61. datahub/ingestion/source/snowflake/snowflake_connection.py +8 -3
  62. datahub/ingestion/source/snowflake/snowflake_lineage_v2.py +15 -2
  63. datahub/ingestion/source/snowflake/snowflake_queries.py +4 -5
  64. datahub/ingestion/source/sql/athena.py +2 -1
  65. datahub/ingestion/source/sql/clickhouse.py +12 -7
  66. datahub/ingestion/source/sql/cockroachdb.py +5 -3
  67. datahub/ingestion/source/sql/druid.py +2 -2
  68. datahub/ingestion/source/sql/hive.py +4 -3
  69. datahub/ingestion/source/sql/hive_metastore.py +7 -9
  70. datahub/ingestion/source/sql/mssql/source.py +2 -2
  71. datahub/ingestion/source/sql/mysql.py +2 -2
  72. datahub/ingestion/source/sql/oracle.py +3 -3
  73. datahub/ingestion/source/sql/presto.py +2 -1
  74. datahub/ingestion/source/sql/teradata.py +4 -4
  75. datahub/ingestion/source/sql/trino.py +2 -1
  76. datahub/ingestion/source/sql/two_tier_sql_source.py +2 -3
  77. datahub/ingestion/source/sql/vertica.py +1 -1
  78. datahub/ingestion/source/sql_queries.py +6 -6
  79. datahub/ingestion/source/state/checkpoint.py +5 -1
  80. datahub/ingestion/source/state/entity_removal_state.py +5 -2
  81. datahub/ingestion/source/state/stateful_ingestion_base.py +5 -8
  82. datahub/ingestion/source/superset.py +1 -2
  83. datahub/ingestion/source/tableau/tableau.py +20 -6
  84. datahub/ingestion/source/unity/config.py +7 -3
  85. datahub/ingestion/source/usage/usage_common.py +3 -3
  86. datahub/ingestion/source_config/pulsar.py +3 -1
  87. datahub/ingestion/transformer/set_browse_path.py +112 -0
  88. datahub/sdk/_shared.py +126 -0
  89. datahub/sdk/chart.py +87 -30
  90. datahub/sdk/dashboard.py +79 -32
  91. datahub/sdk/search_filters.py +1 -7
  92. {acryl_datahub-1.2.0.10rc3.dist-info → acryl_datahub-1.2.0.10rc5.dist-info}/WHEEL +0 -0
  93. {acryl_datahub-1.2.0.10rc3.dist-info → acryl_datahub-1.2.0.10rc5.dist-info}/licenses/LICENSE +0 -0
  94. {acryl_datahub-1.2.0.10rc3.dist-info → acryl_datahub-1.2.0.10rc5.dist-info}/top_level.txt +0 -0
@@ -5,11 +5,11 @@ import time
5
5
  from dataclasses import dataclass, field
6
6
  from typing import Any, Dict, Iterable, List, Optional, TypeVar, Union
7
7
 
8
- from pydantic import validator
8
+ import pydantic
9
9
  from pydantic.fields import Field
10
10
 
11
11
  import datahub.metadata.schema_classes as models
12
- from datahub.configuration.common import ConfigModel
12
+ from datahub.configuration.common import ConfigModel, LaxStr
13
13
  from datahub.configuration.config_loader import load_config_file
14
14
  from datahub.emitter.mce_builder import (
15
15
  datahub_guid,
@@ -66,7 +66,7 @@ class GlossaryTermConfig(ConfigModel):
66
66
  contains: Optional[List[str]] = None
67
67
  values: Optional[List[str]] = None
68
68
  related_terms: Optional[List[str]] = None
69
- custom_properties: Optional[Dict[str, str]] = None
69
+ custom_properties: Optional[Dict[str, LaxStr]] = None
70
70
  knowledge_links: Optional[List[KnowledgeCard]] = None
71
71
  domain: Optional[str] = None
72
72
 
@@ -82,7 +82,7 @@ class GlossaryNodeConfig(ConfigModel):
82
82
  terms: Optional[List["GlossaryTermConfig"]] = None
83
83
  nodes: Optional[List["GlossaryNodeConfig"]] = None
84
84
  knowledge_links: Optional[List[KnowledgeCard]] = None
85
- custom_properties: Optional[Dict[str, str]] = None
85
+ custom_properties: Optional[Dict[str, LaxStr]] = None
86
86
 
87
87
  # Private fields.
88
88
  _urn: str
@@ -108,12 +108,12 @@ class BusinessGlossarySourceConfig(ConfigModel):
108
108
 
109
109
 
110
110
  class BusinessGlossaryConfig(DefaultConfig):
111
- version: str
111
+ version: LaxStr
112
112
  terms: Optional[List["GlossaryTermConfig"]] = None
113
113
  nodes: Optional[List["GlossaryNodeConfig"]] = None
114
114
 
115
- @validator("version")
116
- def version_must_be_1(cls, v):
115
+ @pydantic.field_validator("version", mode="after")
116
+ def version_must_be_1(cls, v: str) -> str:
117
117
  if v != "1":
118
118
  raise ValueError("Only version 1 is supported")
119
119
  return v
@@ -49,7 +49,7 @@ class EntityConfig(EnvConfigMixin):
49
49
  name: str
50
50
  type: str
51
51
  platform: str
52
- platform_instance: Optional[str]
52
+ platform_instance: Optional[str] = None
53
53
 
54
54
  @validator("type")
55
55
  def type_must_be_supported(cls, v: str) -> str:
@@ -7,7 +7,16 @@ from dataclasses import dataclass
7
7
  from datetime import datetime, timezone
8
8
  from functools import lru_cache
9
9
  from json import JSONDecodeError
10
- from typing import Dict, Iterable, Iterator, List, Optional, Set, Tuple, Union
10
+ from typing import (
11
+ Dict,
12
+ Iterable,
13
+ Iterator,
14
+ List,
15
+ Optional,
16
+ Set,
17
+ Tuple,
18
+ Union,
19
+ )
11
20
 
12
21
  import dateutil.parser as dp
13
22
  import psutil
@@ -24,7 +33,7 @@ from requests.models import HTTPBasicAuth, HTTPError
24
33
  from tenacity import retry_if_exception_type, stop_after_attempt, wait_exponential
25
34
 
26
35
  import datahub.emitter.mce_builder as builder
27
- from datahub.configuration.common import AllowDenyPattern, ConfigModel
36
+ from datahub.configuration.common import AllowDenyPattern, ConfigModel, HiddenFromDocs
28
37
  from datahub.configuration.source_common import (
29
38
  DatasetLineageProviderConfigBase,
30
39
  )
@@ -200,10 +209,9 @@ class ModeConfig(
200
209
  default=True, description="Tag measures and dimensions in the schema"
201
210
  )
202
211
 
203
- items_per_page: int = Field(
204
- default=DEFAULT_API_ITEMS_PER_PAGE,
212
+ items_per_page: HiddenFromDocs[int] = Field(
213
+ DEFAULT_API_ITEMS_PER_PAGE,
205
214
  description="Number of items per page for paginated API requests.",
206
- hidden_from_docs=True,
207
215
  )
208
216
 
209
217
  @validator("connect_uri")
@@ -166,7 +166,7 @@ class NifiSourceConfig(StatefulIngestionConfigBase, EnvConfigMixin):
166
166
  )
167
167
 
168
168
  @root_validator(skip_on_failure=True)
169
- def validate_auth_params(cla, values):
169
+ def validate_auth_params(cls, values):
170
170
  if values.get("auth") is NifiAuthType.CLIENT_CERT and not values.get(
171
171
  "client_cert_file"
172
172
  ):
@@ -4,11 +4,10 @@ from enum import Enum
4
4
  from typing import Dict, List, Literal, Optional, Union
5
5
 
6
6
  import pydantic
7
- from pydantic import validator
8
- from pydantic.class_validators import root_validator
7
+ from pydantic import root_validator, validator
9
8
 
10
9
  import datahub.emitter.mce_builder as builder
11
- from datahub.configuration.common import AllowDenyPattern, ConfigModel
10
+ from datahub.configuration.common import AllowDenyPattern, ConfigModel, HiddenFromDocs
12
11
  from datahub.configuration.source_common import DatasetSourceConfigMixin, PlatformDetail
13
12
  from datahub.configuration.validate_field_deprecation import pydantic_field_deprecated
14
13
  from datahub.ingestion.api.incremental_lineage_helper import (
@@ -291,22 +290,18 @@ class PowerBiProfilingConfig(ConfigModel):
291
290
  class PowerBiDashboardSourceConfig(
292
291
  StatefulIngestionConfigBase, DatasetSourceConfigMixin, IncrementalLineageConfigMixin
293
292
  ):
294
- platform_name: str = pydantic.Field(
295
- default=Constant.PLATFORM_NAME, hidden_from_docs=True
296
- )
293
+ platform_name: HiddenFromDocs[str] = pydantic.Field(default=Constant.PLATFORM_NAME)
297
294
 
298
- platform_urn: str = pydantic.Field(
295
+ platform_urn: HiddenFromDocs[str] = pydantic.Field(
299
296
  default=builder.make_data_platform_urn(platform=Constant.PLATFORM_NAME),
300
- hidden_from_docs=True,
301
297
  )
302
298
 
303
299
  # Organization Identifier
304
300
  tenant_id: str = pydantic.Field(description="PowerBI tenant identifier")
305
301
  # PowerBi workspace identifier
306
- workspace_id: Optional[str] = pydantic.Field(
302
+ workspace_id: HiddenFromDocs[Optional[str]] = pydantic.Field(
307
303
  default=None,
308
304
  description="[deprecated] Use workspace_id_pattern instead",
309
- hidden_from_docs=True,
310
305
  )
311
306
  # PowerBi workspace identifier
312
307
  workspace_id_pattern: AllowDenyPattern = pydantic.Field(
@@ -326,15 +321,14 @@ class PowerBiDashboardSourceConfig(
326
321
  # Dataset type mapping PowerBI support many type of data-sources. Here user needs to define what type of PowerBI
327
322
  # DataSource needs to be mapped to corresponding DataHub Platform DataSource. For example, PowerBI `Snowflake` is
328
323
  # mapped to DataHub `snowflake` PowerBI `PostgreSQL` is mapped to DataHub `postgres` and so on.
329
- dataset_type_mapping: Union[Dict[str, str], Dict[str, PlatformDetail]] = (
330
- pydantic.Field(
331
- default_factory=default_for_dataset_type_mapping,
332
- description="[deprecated] Use server_to_platform_instance instead. Mapping of PowerBI datasource type to "
333
- "DataHub supported datasources."
334
- "You can configured platform instance for dataset lineage. "
335
- "See Quickstart Recipe for mapping",
336
- hidden_from_docs=True,
337
- )
324
+ dataset_type_mapping: HiddenFromDocs[
325
+ Union[Dict[str, str], Dict[str, PlatformDetail]]
326
+ ] = pydantic.Field(
327
+ default_factory=default_for_dataset_type_mapping,
328
+ description="[deprecated] Use server_to_platform_instance instead. Mapping of PowerBI datasource type to "
329
+ "DataHub supported datasources."
330
+ "You can configured platform instance for dataset lineage. "
331
+ "See Quickstart Recipe for mapping",
338
332
  )
339
333
  # PowerBI datasource's server to platform instance mapping
340
334
  server_to_platform_instance: Dict[
@@ -541,10 +535,9 @@ class PowerBiDashboardSourceConfig(
541
535
  "Increase this value if you encounter the 'M-Query Parsing Timeout' message in the connector report.",
542
536
  )
543
537
 
544
- metadata_api_timeout: int = pydantic.Field(
538
+ metadata_api_timeout: HiddenFromDocs[int] = pydantic.Field(
545
539
  default=30,
546
540
  description="timeout in seconds for Metadata Rest Api.",
547
- hidden_from_docs=True,
548
541
  )
549
542
 
550
543
  @root_validator(skip_on_failure=True)
@@ -2,7 +2,7 @@ import logging
2
2
  from typing import Dict, Optional
3
3
 
4
4
  import requests
5
- from pydantic.class_validators import root_validator, validator
5
+ from pydantic import root_validator, validator
6
6
  from pydantic.fields import Field
7
7
 
8
8
  from datahub.emitter.mce_builder import DEFAULT_ENV
@@ -1,8 +1,9 @@
1
+ from copy import deepcopy
1
2
  from datetime import datetime
2
3
  from enum import Enum
3
4
  from typing import Dict, List, Optional, Type, Union
4
5
 
5
- from pydantic import BaseModel, Field, root_validator
6
+ from pydantic import BaseModel, ConfigDict, Field, root_validator
6
7
 
7
8
  from datahub.emitter.mcp_builder import ContainerKey
8
9
  from datahub.ingestion.source.qlik_sense.config import QLIK_DATETIME_FORMAT, Constant
@@ -78,7 +79,11 @@ PERSONAL_SPACE_DICT = {
78
79
  }
79
80
 
80
81
 
81
- class Space(BaseModel):
82
+ class _QlikBaseModel(BaseModel):
83
+ model_config = ConfigDict(coerce_numbers_to_str=True)
84
+
85
+
86
+ class Space(_QlikBaseModel):
82
87
  id: str
83
88
  name: str
84
89
  description: str
@@ -89,6 +94,9 @@ class Space(BaseModel):
89
94
 
90
95
  @root_validator(pre=True)
91
96
  def update_values(cls, values: Dict) -> Dict:
97
+ # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
98
+ values = deepcopy(values)
99
+
92
100
  values[Constant.CREATEDAT] = datetime.strptime(
93
101
  values[Constant.CREATEDAT], QLIK_DATETIME_FORMAT
94
102
  )
@@ -98,7 +106,7 @@ class Space(BaseModel):
98
106
  return values
99
107
 
100
108
 
101
- class Item(BaseModel):
109
+ class Item(_QlikBaseModel):
102
110
  id: str
103
111
  description: str = ""
104
112
  ownerId: str
@@ -107,7 +115,7 @@ class Item(BaseModel):
107
115
  updatedAt: datetime
108
116
 
109
117
 
110
- class SchemaField(BaseModel):
118
+ class SchemaField(_QlikBaseModel):
111
119
  name: str
112
120
  dataType: Optional[str] = None
113
121
  primaryKey: Optional[bool] = None
@@ -115,6 +123,8 @@ class SchemaField(BaseModel):
115
123
 
116
124
  @root_validator(pre=True)
117
125
  def update_values(cls, values: Dict) -> Dict:
126
+ # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
127
+ values = deepcopy(values)
118
128
  values[Constant.DATATYPE] = values.get(Constant.DATATYPE, {}).get(Constant.TYPE)
119
129
  return values
120
130
 
@@ -130,6 +140,8 @@ class QlikDataset(Item):
130
140
 
131
141
  @root_validator(pre=True)
132
142
  def update_values(cls, values: Dict) -> Dict:
143
+ # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
144
+ values = deepcopy(values)
133
145
  # Update str time to datetime
134
146
  values[Constant.CREATEDAT] = datetime.strptime(
135
147
  values[Constant.CREATEDTIME], QLIK_DATETIME_FORMAT
@@ -148,13 +160,13 @@ class QlikDataset(Item):
148
160
  return values
149
161
 
150
162
 
151
- class AxisProperty(BaseModel):
163
+ class AxisProperty(_QlikBaseModel):
152
164
  Title: str = Field(alias="qFallbackTitle")
153
165
  Min: str = Field(alias="qMin")
154
166
  Max: str = Field(alias="qMax")
155
167
 
156
168
 
157
- class Chart(BaseModel):
169
+ class Chart(_QlikBaseModel):
158
170
  qId: str
159
171
  visualization: str
160
172
  title: str
@@ -164,13 +176,15 @@ class Chart(BaseModel):
164
176
 
165
177
  @root_validator(pre=True)
166
178
  def update_values(cls, values: Dict) -> Dict:
179
+ # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
180
+ values = deepcopy(values)
167
181
  values[Constant.QID] = values[Constant.QINFO][Constant.QID]
168
182
  values["qDimension"] = values[Constant.HYPERCUBE]["qDimensionInfo"]
169
183
  values["qMeasure"] = values[Constant.HYPERCUBE]["qMeasureInfo"]
170
184
  return values
171
185
 
172
186
 
173
- class Sheet(BaseModel):
187
+ class Sheet(_QlikBaseModel):
174
188
  id: str
175
189
  title: str
176
190
  description: str
@@ -181,6 +195,8 @@ class Sheet(BaseModel):
181
195
 
182
196
  @root_validator(pre=True)
183
197
  def update_values(cls, values: Dict) -> Dict:
198
+ # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
199
+ values = deepcopy(values)
184
200
  values[Constant.CREATEDAT] = datetime.strptime(
185
201
  values[Constant.CREATEDDATE], QLIK_DATETIME_FORMAT
186
202
  )
@@ -190,7 +206,7 @@ class Sheet(BaseModel):
190
206
  return values
191
207
 
192
208
 
193
- class QlikTable(BaseModel):
209
+ class QlikTable(_QlikBaseModel):
194
210
  tableName: str
195
211
  type: BoxType = Field(alias="boxType")
196
212
  tableAlias: str
@@ -206,6 +222,8 @@ class QlikTable(BaseModel):
206
222
 
207
223
  @root_validator(pre=True)
208
224
  def update_values(cls, values: Dict) -> Dict:
225
+ # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
226
+ values = deepcopy(values)
209
227
  values[Constant.DATACONNECTORID] = values[Constant.CONNECTIONINFO][Constant.ID]
210
228
  values[Constant.DATACONNECTORPLATFORM] = values[Constant.CONNECTIONINFO][
211
229
  Constant.SOURCECONNECTORID
@@ -223,6 +241,8 @@ class App(Item):
223
241
 
224
242
  @root_validator(pre=True)
225
243
  def update_values(cls, values: Dict) -> Dict:
244
+ # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
245
+ values = deepcopy(values)
226
246
  values[Constant.CREATEDAT] = datetime.strptime(
227
247
  values[Constant.CREATEDDATE], QLIK_DATETIME_FORMAT
228
248
  )
@@ -1,4 +1,5 @@
1
1
  import logging
2
+ from copy import deepcopy
2
3
  from enum import Enum
3
4
  from typing import Any, Dict, List, Optional
4
5
 
@@ -6,7 +7,7 @@ from pydantic import root_validator
6
7
  from pydantic.fields import Field
7
8
 
8
9
  from datahub.configuration import ConfigModel
9
- from datahub.configuration.common import AllowDenyPattern
10
+ from datahub.configuration.common import AllowDenyPattern, HiddenFromDocs
10
11
  from datahub.configuration.source_common import DatasetLineageProviderConfigBase
11
12
  from datahub.configuration.validate_field_removal import pydantic_removed_field
12
13
  from datahub.configuration.validate_field_rename import pydantic_renamed_field
@@ -95,10 +96,9 @@ class RedshiftConfig(
95
96
  # Because of this behavior, it uses dramatically fewer round trips for
96
97
  # large Redshift warehouses. As an example, see this query for the columns:
97
98
  # https://github.com/sqlalchemy-redshift/sqlalchemy-redshift/blob/60b4db04c1d26071c291aeea52f1dcb5dd8b0eb0/sqlalchemy_redshift/dialect.py#L745.
98
- scheme: str = Field(
99
+ scheme: HiddenFromDocs[str] = Field(
99
100
  default="redshift+redshift_connector",
100
101
  description="",
101
- hidden_from_docs=True,
102
102
  )
103
103
 
104
104
  _database_alias_removed = pydantic_removed_field("database_alias")
@@ -216,6 +216,9 @@ class RedshiftConfig(
216
216
 
217
217
  @root_validator(skip_on_failure=True)
218
218
  def connection_config_compatibility_set(cls, values: Dict) -> Dict:
219
+ # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
220
+ values = deepcopy(values)
221
+
219
222
  if (
220
223
  ("options" in values and "connect_args" in values["options"])
221
224
  and "extra_client_options" in values
@@ -110,30 +110,33 @@ class SalesforceConfig(
110
110
  auth: SalesforceAuthType = SalesforceAuthType.USERNAME_PASSWORD
111
111
 
112
112
  # Username, Password Auth
113
- username: Optional[str] = Field(description="Salesforce username")
114
- password: Optional[str] = Field(description="Password for Salesforce user")
113
+ username: Optional[str] = Field(None, description="Salesforce username")
114
+ password: Optional[str] = Field(None, description="Password for Salesforce user")
115
115
  consumer_key: Optional[str] = Field(
116
- description="Consumer key for Salesforce JSON web token access"
116
+ None, description="Consumer key for Salesforce JSON web token access"
117
117
  )
118
118
  private_key: Optional[str] = Field(
119
- description="Private key as a string for Salesforce JSON web token access"
119
+ None, description="Private key as a string for Salesforce JSON web token access"
120
120
  )
121
121
  security_token: Optional[str] = Field(
122
- description="Security token for Salesforce username"
122
+ None, description="Security token for Salesforce username"
123
123
  )
124
124
  # client_id, client_secret not required
125
125
 
126
126
  # Direct - Instance URL, Access Token Auth
127
127
  instance_url: Optional[str] = Field(
128
- description="Salesforce instance url. e.g. https://MyDomainName.my.salesforce.com"
128
+ None,
129
+ description="Salesforce instance url. e.g. https://MyDomainName.my.salesforce.com",
129
130
  )
130
131
  # Flag to indicate whether the instance is production or sandbox
131
132
  is_sandbox: bool = Field(
132
133
  default=False, description="Connect to Sandbox instance of your Salesforce"
133
134
  )
134
- access_token: Optional[str] = Field(description="Access token for instance url")
135
+ access_token: Optional[str] = Field(
136
+ None, description="Access token for instance url"
137
+ )
135
138
 
136
- ingest_tags: Optional[bool] = Field(
139
+ ingest_tags: bool = Field(
137
140
  default=False,
138
141
  description="Ingest Tags from source. This will override Tags entered from UI",
139
142
  )
@@ -147,7 +150,8 @@ class SalesforceConfig(
147
150
  description='Regex patterns for tables/schemas to describe domain_key domain key (domain_key can be any string like "sales".) There can be multiple domain keys specified.',
148
151
  )
149
152
  api_version: Optional[str] = Field(
150
- description="If specified, overrides default version used by the Salesforce package. Example value: '59.0'"
153
+ None,
154
+ description="If specified, overrides default version used by the Salesforce package. Example value: '59.0'",
151
155
  )
152
156
 
153
157
  profiling: SalesforceProfilingConfig = SalesforceProfilingConfig()
@@ -4,7 +4,6 @@ import logging
4
4
  import os
5
5
  import tempfile
6
6
  import unittest
7
- import urllib.request
8
7
  from dataclasses import dataclass
9
8
  from os.path import basename, dirname
10
9
  from pathlib import Path
@@ -12,6 +11,7 @@ from typing import Any, Iterable, List, Optional, Union
12
11
  from urllib.parse import urlparse
13
12
 
14
13
  import jsonref
14
+ import requests
15
15
  from pydantic import AnyHttpUrl, DirectoryPath, FilePath, validator
16
16
  from pydantic.fields import Field
17
17
 
@@ -91,19 +91,18 @@ class JsonSchemaSourceConfig(StatefulIngestionConfigBase, DatasetSourceConfigMix
91
91
  )
92
92
 
93
93
  @validator("path")
94
- def download_http_url_to_temp_file(v):
94
+ def download_http_url_to_temp_file(cls, v):
95
95
  if isinstance(v, AnyHttpUrl):
96
96
  try:
97
- with urllib.request.urlopen(v) as response:
98
- schema_dict = json.load(response)
99
- if not JsonSchemaTranslator._get_id_from_any_schema(schema_dict):
100
- schema_dict["$id"] = str(v)
101
- with tempfile.NamedTemporaryFile(
102
- mode="w", delete=False
103
- ) as tmp_file:
104
- tmp_file.write(json.dumps(schema_dict))
105
- tmp_file.flush()
106
- return tmp_file.name
97
+ response = requests.get(str(v))
98
+ response.raise_for_status()
99
+ schema_dict = response.json()
100
+ if not JsonSchemaTranslator._get_id_from_any_schema(schema_dict):
101
+ schema_dict["$id"] = str(v)
102
+ with tempfile.NamedTemporaryFile(mode="w", delete=False) as tmp_file:
103
+ tmp_file.write(json.dumps(schema_dict))
104
+ tmp_file.flush()
105
+ return tmp_file.name
107
106
  except Exception as e:
108
107
  logger.error(
109
108
  f"Failed to localize url {v} due to {e}. Run with --debug to get full stacktrace"
@@ -353,7 +352,7 @@ class JsonSchemaSource(StatefulIngestionSourceBase):
353
352
  if self.config.platform_instance:
354
353
  browse_prefix = f"/{self.config.env.lower()}/{self.config.platform}/{self.config.platform_instance}"
355
354
 
356
- if os.path.isdir(self.config.path):
355
+ if isinstance(self.config.path, Path) and self.config.path.is_dir():
357
356
  for root, _, files in os.walk(self.config.path, topdown=False):
358
357
  for file_name in [f for f in files if f.endswith(".json")]:
359
358
  try:
@@ -373,10 +372,11 @@ class JsonSchemaSource(StatefulIngestionSourceBase):
373
372
 
374
373
  else:
375
374
  try:
375
+ assert isinstance(self.config.path, Path)
376
376
  yield from self._load_one_file(
377
377
  ref_loader,
378
378
  browse_prefix=browse_prefix,
379
- root_dir=Path(os.path.dirname(Path(self.config.path))),
379
+ root_dir=self.config.path.parent,
380
380
  file_name=str(self.config.path),
381
381
  )
382
382
  except Exception as e:
@@ -1,3 +1,4 @@
1
+ from copy import deepcopy
1
2
  from datetime import datetime
2
3
  from typing import Dict, List, Optional
3
4
 
@@ -23,6 +24,8 @@ class Workspace(BaseModel):
23
24
 
24
25
  @root_validator(pre=True)
25
26
  def update_values(cls, values: Dict) -> Dict:
27
+ # Create a copy to avoid modifying the input dictionary, preventing state contamination in tests
28
+ values = deepcopy(values)
26
29
  # Update name if presonal workspace
27
30
  if values["name"] == "User Folder":
28
31
  values["name"] = "My documents"
File without changes