acryl-datahub 0.15.0.6rc2__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/METADATA +2522 -2493
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/RECORD +205 -192
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/WHEEL +1 -1
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/entry_points.txt +1 -0
- datahub/_version.py +1 -1
- datahub/api/entities/common/serialized_value.py +4 -3
- datahub/api/entities/dataset/dataset.py +731 -42
- datahub/api/entities/structuredproperties/structuredproperties.py +2 -2
- datahub/cli/check_cli.py +72 -19
- datahub/cli/docker_cli.py +3 -3
- datahub/cli/iceberg_cli.py +31 -7
- datahub/cli/ingest_cli.py +30 -93
- datahub/cli/lite_cli.py +4 -2
- datahub/cli/specific/dataproduct_cli.py +1 -1
- datahub/cli/specific/dataset_cli.py +128 -14
- datahub/configuration/common.py +10 -2
- datahub/configuration/git.py +1 -3
- datahub/configuration/kafka.py +1 -1
- datahub/emitter/mce_builder.py +28 -13
- datahub/emitter/mcp_builder.py +4 -1
- datahub/emitter/response_helper.py +145 -0
- datahub/emitter/rest_emitter.py +323 -10
- datahub/ingestion/api/decorators.py +1 -1
- datahub/ingestion/api/source_helpers.py +4 -0
- datahub/ingestion/fs/s3_fs.py +2 -2
- datahub/ingestion/glossary/classification_mixin.py +1 -5
- datahub/ingestion/graph/client.py +41 -22
- datahub/ingestion/graph/entity_versioning.py +3 -3
- datahub/ingestion/graph/filters.py +64 -37
- datahub/ingestion/reporting/datahub_ingestion_run_summary_provider.py +1 -6
- datahub/ingestion/run/pipeline.py +112 -148
- datahub/ingestion/run/sink_callback.py +77 -0
- datahub/ingestion/sink/datahub_rest.py +8 -0
- datahub/ingestion/source/abs/config.py +2 -4
- datahub/ingestion/source/bigquery_v2/bigquery_audit.py +1 -1
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +2 -46
- datahub/ingestion/source/bigquery_v2/bigquery_schema.py +6 -1
- datahub/ingestion/source/bigquery_v2/bigquery_schema_gen.py +7 -4
- datahub/ingestion/source/cassandra/cassandra.py +152 -233
- datahub/ingestion/source/cassandra/cassandra_api.py +13 -5
- datahub/ingestion/source/common/gcp_credentials_config.py +53 -0
- datahub/ingestion/source/common/subtypes.py +12 -0
- datahub/ingestion/source/csv_enricher.py +3 -3
- datahub/ingestion/source/data_lake_common/path_spec.py +1 -3
- datahub/ingestion/source/dbt/dbt_common.py +8 -5
- datahub/ingestion/source/dbt/dbt_core.py +11 -9
- datahub/ingestion/source/dbt/dbt_tests.py +4 -8
- datahub/ingestion/source/delta_lake/config.py +8 -1
- datahub/ingestion/source/delta_lake/report.py +4 -2
- datahub/ingestion/source/delta_lake/source.py +20 -5
- datahub/ingestion/source/dremio/dremio_api.py +4 -8
- datahub/ingestion/source/dremio/dremio_aspects.py +3 -5
- datahub/ingestion/source/dynamodb/dynamodb.py +6 -0
- datahub/ingestion/source/elastic_search.py +26 -6
- datahub/ingestion/source/feast.py +27 -8
- datahub/ingestion/source/file.py +6 -3
- datahub/ingestion/source/gc/dataprocess_cleanup.py +1 -1
- datahub/ingestion/source/gc/execution_request_cleanup.py +2 -1
- datahub/ingestion/source/ge_data_profiler.py +12 -15
- datahub/ingestion/source/iceberg/iceberg.py +46 -12
- datahub/ingestion/source/iceberg/iceberg_common.py +71 -21
- datahub/ingestion/source/identity/okta.py +37 -7
- datahub/ingestion/source/kafka/kafka.py +1 -1
- datahub/ingestion/source/kafka_connect/common.py +2 -7
- datahub/ingestion/source/kafka_connect/kafka_connect.py +97 -4
- datahub/ingestion/source/kafka_connect/sink_connectors.py +2 -2
- datahub/ingestion/source/kafka_connect/source_connectors.py +6 -9
- datahub/ingestion/source/looker/looker_common.py +6 -5
- datahub/ingestion/source/looker/looker_file_loader.py +2 -2
- datahub/ingestion/source/looker/looker_lib_wrapper.py +2 -1
- datahub/ingestion/source/looker/looker_source.py +1 -1
- datahub/ingestion/source/looker/looker_template_language.py +4 -2
- datahub/ingestion/source/looker/lookml_source.py +3 -2
- datahub/ingestion/source/metabase.py +57 -35
- datahub/ingestion/source/metadata/business_glossary.py +45 -3
- datahub/ingestion/source/metadata/lineage.py +2 -2
- datahub/ingestion/source/mlflow.py +365 -35
- datahub/ingestion/source/mode.py +18 -8
- datahub/ingestion/source/neo4j/neo4j_source.py +27 -7
- datahub/ingestion/source/nifi.py +37 -11
- datahub/ingestion/source/openapi.py +1 -1
- datahub/ingestion/source/openapi_parser.py +49 -17
- datahub/ingestion/source/powerbi/m_query/parser.py +3 -2
- datahub/ingestion/source/powerbi/m_query/tree_function.py +2 -1
- datahub/ingestion/source/powerbi/powerbi.py +1 -3
- datahub/ingestion/source/powerbi/rest_api_wrapper/data_resolver.py +2 -1
- datahub/ingestion/source/powerbi_report_server/report_server.py +26 -7
- datahub/ingestion/source/powerbi_report_server/report_server_domain.py +1 -1
- datahub/ingestion/source/preset.py +7 -4
- datahub/ingestion/source/pulsar.py +3 -2
- datahub/ingestion/source/qlik_sense/websocket_connection.py +4 -2
- datahub/ingestion/source/redash.py +31 -7
- datahub/ingestion/source/redshift/config.py +4 -0
- datahub/ingestion/source/redshift/datashares.py +236 -0
- datahub/ingestion/source/redshift/lineage.py +6 -2
- datahub/ingestion/source/redshift/lineage_v2.py +24 -9
- datahub/ingestion/source/redshift/profile.py +1 -1
- datahub/ingestion/source/redshift/query.py +133 -33
- datahub/ingestion/source/redshift/redshift.py +46 -73
- datahub/ingestion/source/redshift/redshift_schema.py +186 -6
- datahub/ingestion/source/redshift/report.py +3 -0
- datahub/ingestion/source/s3/config.py +5 -5
- datahub/ingestion/source/s3/source.py +20 -41
- datahub/ingestion/source/salesforce.py +550 -275
- datahub/ingestion/source/schema_inference/object.py +1 -1
- datahub/ingestion/source/sigma/sigma.py +1 -1
- datahub/ingestion/source/slack/slack.py +31 -10
- datahub/ingestion/source/snowflake/snowflake_connection.py +2 -2
- datahub/ingestion/source/snowflake/snowflake_queries.py +19 -13
- datahub/ingestion/source/snowflake/snowflake_query.py +6 -4
- datahub/ingestion/source/snowflake/snowflake_schema.py +3 -4
- datahub/ingestion/source/snowflake/snowflake_v2.py +1 -1
- datahub/ingestion/source/sql/athena.py +10 -16
- datahub/ingestion/source/sql/druid.py +1 -5
- datahub/ingestion/source/sql/hive.py +15 -6
- datahub/ingestion/source/sql/hive_metastore.py +3 -2
- datahub/ingestion/source/sql/mssql/job_models.py +29 -0
- datahub/ingestion/source/sql/mssql/source.py +11 -5
- datahub/ingestion/source/sql/oracle.py +127 -63
- datahub/ingestion/source/sql/sql_common.py +16 -18
- datahub/ingestion/source/sql/sql_types.py +2 -2
- datahub/ingestion/source/sql/teradata.py +19 -5
- datahub/ingestion/source/sql/trino.py +2 -2
- datahub/ingestion/source/state/stale_entity_removal_handler.py +4 -8
- datahub/ingestion/source/superset.py +222 -62
- datahub/ingestion/source/tableau/tableau.py +22 -6
- datahub/ingestion/source/tableau/tableau_common.py +3 -2
- datahub/ingestion/source/unity/ge_profiler.py +2 -1
- datahub/ingestion/source/unity/source.py +11 -1
- datahub/ingestion/source/vertexai.py +697 -0
- datahub/ingestion/source_config/pulsar.py +3 -1
- datahub/ingestion/transformer/pattern_cleanup_ownership.py +25 -7
- datahub/lite/duckdb_lite.py +3 -10
- datahub/lite/lite_local.py +1 -1
- datahub/lite/lite_util.py +4 -3
- datahub/metadata/_schema_classes.py +714 -417
- datahub/metadata/_urns/urn_defs.py +1673 -1649
- datahub/metadata/com/linkedin/pegasus2avro/incident/__init__.py +4 -0
- datahub/metadata/schema.avsc +16438 -16603
- datahub/metadata/schemas/AssertionInfo.avsc +3 -1
- datahub/metadata/schemas/BusinessAttributeInfo.avsc +6 -2
- datahub/metadata/schemas/BusinessAttributes.avsc +6 -0
- datahub/metadata/schemas/ChartInfo.avsc +1 -0
- datahub/metadata/schemas/CorpGroupKey.avsc +2 -1
- datahub/metadata/schemas/CorpUserInfo.avsc +13 -0
- datahub/metadata/schemas/CorpUserKey.avsc +2 -1
- datahub/metadata/schemas/DataHubIngestionSourceInfo.avsc +8 -3
- datahub/metadata/schemas/DataProcessInstanceInput.avsc +129 -1
- datahub/metadata/schemas/DataProcessInstanceOutput.avsc +131 -3
- datahub/metadata/schemas/DataProcessKey.avsc +2 -1
- datahub/metadata/schemas/DataProductKey.avsc +2 -1
- datahub/metadata/schemas/DomainKey.avsc +2 -1
- datahub/metadata/schemas/EditableSchemaMetadata.avsc +6 -2
- datahub/metadata/schemas/GlossaryNodeKey.avsc +3 -1
- datahub/metadata/schemas/GlossaryTermKey.avsc +2 -1
- datahub/metadata/schemas/GlossaryTerms.avsc +3 -1
- datahub/metadata/schemas/IncidentInfo.avsc +130 -46
- datahub/metadata/schemas/InputFields.avsc +3 -1
- datahub/metadata/schemas/MLFeatureKey.avsc +2 -1
- datahub/metadata/schemas/MLFeatureTableKey.avsc +2 -1
- datahub/metadata/schemas/MLModelDeploymentKey.avsc +2 -1
- datahub/metadata/schemas/MLModelGroupKey.avsc +3 -1
- datahub/metadata/schemas/MLModelKey.avsc +3 -1
- datahub/metadata/schemas/MLPrimaryKeyKey.avsc +2 -1
- datahub/metadata/schemas/MetadataChangeEvent.avsc +20 -2
- datahub/metadata/schemas/PostKey.avsc +2 -1
- datahub/metadata/schemas/SchemaFieldKey.avsc +2 -1
- datahub/metadata/schemas/SchemaMetadata.avsc +3 -1
- datahub/metadata/schemas/StructuredPropertyDefinition.avsc +14 -0
- datahub/metadata/schemas/VersionProperties.avsc +18 -0
- datahub/metadata/schemas/VersionSetProperties.avsc +5 -0
- datahub/pydantic/__init__.py +0 -0
- datahub/pydantic/compat.py +58 -0
- datahub/sdk/__init__.py +30 -12
- datahub/sdk/_all_entities.py +1 -1
- datahub/sdk/_attribution.py +4 -0
- datahub/sdk/_shared.py +258 -16
- datahub/sdk/_utils.py +35 -0
- datahub/sdk/container.py +30 -6
- datahub/sdk/dataset.py +118 -20
- datahub/sdk/{_entity.py → entity.py} +24 -1
- datahub/sdk/entity_client.py +1 -1
- datahub/sdk/main_client.py +23 -0
- datahub/sdk/resolver_client.py +17 -29
- datahub/sdk/search_client.py +50 -0
- datahub/sdk/search_filters.py +374 -0
- datahub/specific/dataset.py +3 -4
- datahub/sql_parsing/_sqlglot_patch.py +2 -10
- datahub/sql_parsing/schema_resolver.py +1 -1
- datahub/sql_parsing/split_statements.py +220 -126
- datahub/sql_parsing/sql_parsing_common.py +7 -0
- datahub/sql_parsing/sqlglot_lineage.py +1 -1
- datahub/sql_parsing/sqlglot_utils.py +1 -4
- datahub/testing/check_sql_parser_result.py +5 -6
- datahub/testing/compare_metadata_json.py +7 -6
- datahub/testing/pytest_hooks.py +56 -0
- datahub/upgrade/upgrade.py +2 -2
- datahub/utilities/file_backed_collections.py +3 -14
- datahub/utilities/ingest_utils.py +106 -0
- datahub/utilities/mapping.py +1 -1
- datahub/utilities/memory_footprint.py +3 -2
- datahub/utilities/sentinels.py +22 -0
- datahub/utilities/unified_diff.py +5 -1
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/LICENSE +0 -0
- {acryl_datahub-0.15.0.6rc2.dist-info → acryl_datahub-1.0.0.dist-info}/top_level.txt +0 -0
|
@@ -4,7 +4,7 @@ import time
|
|
|
4
4
|
from dataclasses import dataclass, field as dataclass_field
|
|
5
5
|
from datetime import datetime
|
|
6
6
|
from enum import Enum
|
|
7
|
-
from typing import Any, Dict, Iterable, List, Optional
|
|
7
|
+
from typing import Any, Dict, Iterable, List, Literal, Optional, TypedDict
|
|
8
8
|
|
|
9
9
|
import requests
|
|
10
10
|
from pydantic import Field, validator
|
|
@@ -17,7 +17,9 @@ from datahub.configuration.common import (
|
|
|
17
17
|
ConfigModel,
|
|
18
18
|
ConfigurationError,
|
|
19
19
|
)
|
|
20
|
-
from datahub.configuration.source_common import
|
|
20
|
+
from datahub.configuration.source_common import (
|
|
21
|
+
DatasetSourceConfigMixin,
|
|
22
|
+
)
|
|
21
23
|
from datahub.emitter.mcp import MetadataChangeProposalWrapper
|
|
22
24
|
from datahub.emitter.mcp_builder import add_domain_to_entity_wu
|
|
23
25
|
from datahub.ingestion.api.common import PipelineContext
|
|
@@ -29,9 +31,17 @@ from datahub.ingestion.api.decorators import (
|
|
|
29
31
|
platform_name,
|
|
30
32
|
support_status,
|
|
31
33
|
)
|
|
32
|
-
from datahub.ingestion.api.source import
|
|
34
|
+
from datahub.ingestion.api.source import MetadataWorkUnitProcessor, SourceReport
|
|
33
35
|
from datahub.ingestion.api.workunit import MetadataWorkUnit
|
|
34
36
|
from datahub.ingestion.source.common.subtypes import DatasetSubTypes
|
|
37
|
+
from datahub.ingestion.source.state.stale_entity_removal_handler import (
|
|
38
|
+
StaleEntityRemovalHandler,
|
|
39
|
+
StaleEntityRemovalSourceReport,
|
|
40
|
+
)
|
|
41
|
+
from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
42
|
+
StatefulIngestionConfigBase,
|
|
43
|
+
StatefulIngestionSourceBase,
|
|
44
|
+
)
|
|
35
45
|
from datahub.ingestion.source_config.operation_config import (
|
|
36
46
|
OperationConfig,
|
|
37
47
|
is_profiling_enabled,
|
|
@@ -41,6 +51,7 @@ from datahub.metadata.schema_classes import (
|
|
|
41
51
|
BooleanTypeClass,
|
|
42
52
|
BytesTypeClass,
|
|
43
53
|
DataPlatformInstanceClass,
|
|
54
|
+
DatasetLineageTypeClass,
|
|
44
55
|
DatasetProfileClass,
|
|
45
56
|
DatasetPropertiesClass,
|
|
46
57
|
DateTypeClass,
|
|
@@ -59,6 +70,8 @@ from datahub.metadata.schema_classes import (
|
|
|
59
70
|
StringTypeClass,
|
|
60
71
|
SubTypesClass,
|
|
61
72
|
TagAssociationClass,
|
|
73
|
+
UpstreamClass,
|
|
74
|
+
UpstreamLineageClass,
|
|
62
75
|
)
|
|
63
76
|
from datahub.utilities import config_clean
|
|
64
77
|
from datahub.utilities.lossy_collections import LossyList
|
|
@@ -85,7 +98,10 @@ class SalesforceProfilingConfig(ConfigModel):
|
|
|
85
98
|
# TODO - support field level profiling
|
|
86
99
|
|
|
87
100
|
|
|
88
|
-
class SalesforceConfig(
|
|
101
|
+
class SalesforceConfig(
|
|
102
|
+
StatefulIngestionConfigBase,
|
|
103
|
+
DatasetSourceConfigMixin,
|
|
104
|
+
):
|
|
89
105
|
platform: str = "salesforce"
|
|
90
106
|
|
|
91
107
|
auth: SalesforceAuthType = SalesforceAuthType.USERNAME_PASSWORD
|
|
@@ -138,6 +154,12 @@ class SalesforceConfig(DatasetSourceConfigMixin):
|
|
|
138
154
|
description="Regex patterns for profiles to filter in ingestion, allowed by the `object_pattern`.",
|
|
139
155
|
)
|
|
140
156
|
|
|
157
|
+
# Given lack of ERD visual graph view support, this alternate is useful.
|
|
158
|
+
use_referenced_entities_as_upstreams: bool = Field(
|
|
159
|
+
default=False,
|
|
160
|
+
description="(Experimental) If enabled, referenced entities will be treated as upstream entities.",
|
|
161
|
+
)
|
|
162
|
+
|
|
141
163
|
def is_profiling_enabled(self) -> bool:
|
|
142
164
|
return self.profiling.enabled and is_profiling_enabled(
|
|
143
165
|
self.profiling.operation_config
|
|
@@ -149,9 +171,15 @@ class SalesforceConfig(DatasetSourceConfigMixin):
|
|
|
149
171
|
|
|
150
172
|
|
|
151
173
|
@dataclass
|
|
152
|
-
class SalesforceSourceReport(
|
|
174
|
+
class SalesforceSourceReport(StaleEntityRemovalSourceReport):
|
|
153
175
|
filtered: LossyList[str] = dataclass_field(default_factory=LossyList)
|
|
154
176
|
|
|
177
|
+
objects_with_calculated_field: LossyList[str] = dataclass_field(
|
|
178
|
+
default_factory=LossyList
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
num_objects_missing_formula: int = 0
|
|
182
|
+
|
|
155
183
|
def report_dropped(self, ent_name: str) -> None:
|
|
156
184
|
self.filtered.append(ent_name)
|
|
157
185
|
|
|
@@ -186,6 +214,310 @@ FIELD_TYPE_MAPPING = {
|
|
|
186
214
|
}
|
|
187
215
|
|
|
188
216
|
|
|
217
|
+
class EntityDefinition(TypedDict):
|
|
218
|
+
DurableId: str
|
|
219
|
+
QualifiedApiName: str
|
|
220
|
+
DeveloperName: str
|
|
221
|
+
Label: str
|
|
222
|
+
PluralLabel: str
|
|
223
|
+
InternalSharingModel: str
|
|
224
|
+
ExternalSharingModel: str
|
|
225
|
+
DeploymentStatus: Literal[
|
|
226
|
+
"Deployed", "InDevelopment"
|
|
227
|
+
] # Common values for DeploymentStatus
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
class UserInfo(TypedDict):
|
|
231
|
+
Username: str
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
class FieldDefinition(TypedDict):
|
|
235
|
+
DataType: str
|
|
236
|
+
LastModifiedDate: str
|
|
237
|
+
LastModifiedBy: UserInfo
|
|
238
|
+
IsIndexed: bool
|
|
239
|
+
ComplianceGroup: Optional[str]
|
|
240
|
+
Description: Optional[str]
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
class ReferenceTo(TypedDict):
|
|
244
|
+
referenceTo: List[str]
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
class EntityParticle(TypedDict):
|
|
248
|
+
QualifiedApiName: str
|
|
249
|
+
DeveloperName: str
|
|
250
|
+
Label: str
|
|
251
|
+
DataType: str
|
|
252
|
+
Precision: Optional[int]
|
|
253
|
+
Scale: Optional[int]
|
|
254
|
+
Length: Optional[int]
|
|
255
|
+
Digits: Optional[int]
|
|
256
|
+
IsUnique: bool
|
|
257
|
+
IsCompound: bool
|
|
258
|
+
IsComponent: bool
|
|
259
|
+
ReferenceTo: Optional[ReferenceTo]
|
|
260
|
+
RelationshipName: Optional[str]
|
|
261
|
+
IsNillable: bool
|
|
262
|
+
InlineHelpText: Optional[str]
|
|
263
|
+
IsCalculated: bool
|
|
264
|
+
FieldDefinition: FieldDefinition
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
class CustomObject(TypedDict):
|
|
268
|
+
Description: Optional[str]
|
|
269
|
+
Language: str
|
|
270
|
+
ManageableState: Literal["unmanaged", "installed", "beta", "released"]
|
|
271
|
+
CreatedDate: str
|
|
272
|
+
CreatedBy: UserInfo
|
|
273
|
+
LastModifiedDate: str
|
|
274
|
+
LastModifiedBy: UserInfo
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
class CustomField(TypedDict):
|
|
278
|
+
DeveloperName: str
|
|
279
|
+
CreatedDate: str
|
|
280
|
+
CreatedBy: UserInfo
|
|
281
|
+
InlineHelpText: Optional[str]
|
|
282
|
+
LastModifiedDate: str
|
|
283
|
+
LastModifiedBy: UserInfo
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
class SObjectRecordCount(TypedDict):
|
|
287
|
+
count: int
|
|
288
|
+
name: str
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
class SObjectField(TypedDict):
|
|
292
|
+
name: str
|
|
293
|
+
calculatedFormula: Optional[str]
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
class SObjectDescribe(TypedDict):
|
|
297
|
+
fields: List[SObjectField]
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
class SalesforceApi:
|
|
301
|
+
def __init__(
|
|
302
|
+
self, sf: Salesforce, config: SalesforceConfig, report: SalesforceSourceReport
|
|
303
|
+
) -> None:
|
|
304
|
+
self.config = config
|
|
305
|
+
self.report = report
|
|
306
|
+
self.sf = sf
|
|
307
|
+
self.base_url = "https://{instance}/services/data/v{sf_version}/".format(
|
|
308
|
+
instance=self.sf.sf_instance, sf_version=self.sf.sf_version
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
@staticmethod
|
|
312
|
+
def create_salesforce_client(config: SalesforceConfig) -> Salesforce:
|
|
313
|
+
common_args: Dict[str, Any] = {
|
|
314
|
+
"domain": "test" if config.is_sandbox else None,
|
|
315
|
+
"session": requests.Session(),
|
|
316
|
+
}
|
|
317
|
+
if config.api_version:
|
|
318
|
+
common_args["version"] = config.api_version
|
|
319
|
+
|
|
320
|
+
if config.auth is SalesforceAuthType.DIRECT_ACCESS_TOKEN:
|
|
321
|
+
logger.debug("Access Token Provided in Config")
|
|
322
|
+
assert config.access_token is not None, (
|
|
323
|
+
"Config access_token is required for DIRECT_ACCESS_TOKEN auth"
|
|
324
|
+
)
|
|
325
|
+
assert config.instance_url is not None, (
|
|
326
|
+
"Config instance_url is required for DIRECT_ACCESS_TOKEN auth"
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
sf = Salesforce(
|
|
330
|
+
instance_url=config.instance_url,
|
|
331
|
+
session_id=config.access_token,
|
|
332
|
+
**common_args,
|
|
333
|
+
)
|
|
334
|
+
elif config.auth is SalesforceAuthType.USERNAME_PASSWORD:
|
|
335
|
+
logger.debug("Username/Password Provided in Config")
|
|
336
|
+
assert config.username is not None, (
|
|
337
|
+
"Config username is required for USERNAME_PASSWORD auth"
|
|
338
|
+
)
|
|
339
|
+
assert config.password is not None, (
|
|
340
|
+
"Config password is required for USERNAME_PASSWORD auth"
|
|
341
|
+
)
|
|
342
|
+
assert config.security_token is not None, (
|
|
343
|
+
"Config security_token is required for USERNAME_PASSWORD auth"
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
sf = Salesforce(
|
|
347
|
+
username=config.username,
|
|
348
|
+
password=config.password,
|
|
349
|
+
security_token=config.security_token,
|
|
350
|
+
**common_args,
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
elif config.auth is SalesforceAuthType.JSON_WEB_TOKEN:
|
|
354
|
+
logger.debug("Json Web Token provided in the config")
|
|
355
|
+
assert config.username is not None, (
|
|
356
|
+
"Config username is required for JSON_WEB_TOKEN auth"
|
|
357
|
+
)
|
|
358
|
+
assert config.consumer_key is not None, (
|
|
359
|
+
"Config consumer_key is required for JSON_WEB_TOKEN auth"
|
|
360
|
+
)
|
|
361
|
+
assert config.private_key is not None, (
|
|
362
|
+
"Config private_key is required for JSON_WEB_TOKEN auth"
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
sf = Salesforce(
|
|
366
|
+
username=config.username,
|
|
367
|
+
consumer_key=config.consumer_key,
|
|
368
|
+
privatekey=config.private_key,
|
|
369
|
+
**common_args,
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
SalesforceApi.update_salesforce_api_version(config, sf)
|
|
373
|
+
|
|
374
|
+
return sf
|
|
375
|
+
|
|
376
|
+
@staticmethod
|
|
377
|
+
def update_salesforce_api_version(config: SalesforceConfig, sf: Salesforce) -> None:
|
|
378
|
+
if not config.api_version:
|
|
379
|
+
# List all REST API versions and use latest one
|
|
380
|
+
versions_url = "https://{instance}/services/data/".format(
|
|
381
|
+
instance=sf.sf_instance,
|
|
382
|
+
)
|
|
383
|
+
versions_response = sf._call_salesforce("GET", versions_url).json()
|
|
384
|
+
latest_version = versions_response[-1]
|
|
385
|
+
version = latest_version["version"]
|
|
386
|
+
# we could avoid setting the version like below (after the Salesforce object has been already initiated
|
|
387
|
+
# above), since, according to the docs:
|
|
388
|
+
# https://developer.salesforce.com/docs/atlas.en-us.api_rest.meta/api_rest/dome_versions.htm
|
|
389
|
+
# we don't need to be authenticated to list the versions (so we could perform this call before even
|
|
390
|
+
# authenticating)
|
|
391
|
+
sf.sf_version = version
|
|
392
|
+
logger.debug(
|
|
393
|
+
"Using Salesforce REST API version: {version}".format(version=sf.sf_version)
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
def list_objects(self) -> List[EntityDefinition]:
|
|
397
|
+
# Using Describe Global REST API returns many more objects than required.
|
|
398
|
+
# Response does not have the attribute ("customizable") that can be used
|
|
399
|
+
# to filter out entities not on ObjectManager UI. Hence SOQL on EntityDefinition
|
|
400
|
+
# object is used instead, as suggested by salesforce support.
|
|
401
|
+
|
|
402
|
+
query_url = (
|
|
403
|
+
self.base_url
|
|
404
|
+
+ "tooling/query/?q=SELECT DurableId,QualifiedApiName,DeveloperName,"
|
|
405
|
+
+ "Label,PluralLabel,InternalSharingModel,ExternalSharingModel,DeploymentStatus "
|
|
406
|
+
+ "FROM EntityDefinition WHERE IsCustomizable = true"
|
|
407
|
+
)
|
|
408
|
+
entities_response = self.sf._call_salesforce("GET", query_url).json()
|
|
409
|
+
logger.debug(
|
|
410
|
+
"Salesforce EntityDefinition query returned {count} sObjects".format(
|
|
411
|
+
count=len(entities_response["records"])
|
|
412
|
+
)
|
|
413
|
+
)
|
|
414
|
+
return entities_response["records"]
|
|
415
|
+
|
|
416
|
+
def describe_object(self, sObjectName: str) -> SObjectDescribe:
|
|
417
|
+
logger.debug(f"Querying Salesforce {sObjectName} describe REST API")
|
|
418
|
+
|
|
419
|
+
describe_endpoint = f"{self.base_url}sobjects/{sObjectName}/describe/"
|
|
420
|
+
response = self.sf._call_salesforce("GET", describe_endpoint)
|
|
421
|
+
|
|
422
|
+
logger.debug(f"Received Salesforce {sObjectName} describe respone")
|
|
423
|
+
return {"fields": response.json()["fields"]}
|
|
424
|
+
|
|
425
|
+
def get_custom_object_details(
|
|
426
|
+
self, sObjectDeveloperName: str
|
|
427
|
+
) -> Optional[CustomObject]:
|
|
428
|
+
query_url = (
|
|
429
|
+
self.base_url
|
|
430
|
+
+ "tooling/query/?q=SELECT Description, Language, ManageableState, "
|
|
431
|
+
+ "CreatedDate, CreatedBy.Username, LastModifiedDate, LastModifiedBy.Username "
|
|
432
|
+
+ f"FROM CustomObject where DeveloperName='{sObjectDeveloperName}'"
|
|
433
|
+
)
|
|
434
|
+
custom_objects_response = self.sf._call_salesforce("GET", query_url).json()
|
|
435
|
+
if len(custom_objects_response["records"]) > 0:
|
|
436
|
+
logger.debug("Salesforce CustomObject query returned with details")
|
|
437
|
+
return custom_objects_response["records"][0]
|
|
438
|
+
return None
|
|
439
|
+
|
|
440
|
+
def get_fields_for_object(
|
|
441
|
+
self, sObjectName: str, sObjectDurableId: str
|
|
442
|
+
) -> List[EntityParticle]:
|
|
443
|
+
sObject_fields_query_url = (
|
|
444
|
+
self.base_url
|
|
445
|
+
+ "tooling/query?q=SELECT "
|
|
446
|
+
+ "QualifiedApiName,DeveloperName,Label, FieldDefinition.DataType, DataType,"
|
|
447
|
+
+ "FieldDefinition.LastModifiedDate, FieldDefinition.LastModifiedBy.Username,"
|
|
448
|
+
+ "Precision, Scale, Length, Digits ,FieldDefinition.IsIndexed, IsUnique,"
|
|
449
|
+
+ "IsCompound, IsComponent, ReferenceTo, FieldDefinition.ComplianceGroup,"
|
|
450
|
+
+ "RelationshipName, IsNillable, FieldDefinition.Description, InlineHelpText, "
|
|
451
|
+
+ "IsCalculated FROM EntityParticle WHERE EntityDefinitionId='{}'".format(
|
|
452
|
+
sObjectDurableId
|
|
453
|
+
)
|
|
454
|
+
)
|
|
455
|
+
|
|
456
|
+
sObject_fields_response = self.sf._call_salesforce(
|
|
457
|
+
"GET", sObject_fields_query_url
|
|
458
|
+
).json()
|
|
459
|
+
|
|
460
|
+
logger.debug(f"Received Salesforce {sObjectName} fields response")
|
|
461
|
+
|
|
462
|
+
all_fields = sObject_fields_response["records"]
|
|
463
|
+
return all_fields
|
|
464
|
+
|
|
465
|
+
def get_custom_fields_for_object(
|
|
466
|
+
self, sObjectName: str, sObjectDurableId: str
|
|
467
|
+
) -> Dict[str, CustomField]:
|
|
468
|
+
sObject_custom_fields_query_url = (
|
|
469
|
+
self.base_url
|
|
470
|
+
+ "tooling/query?q=SELECT "
|
|
471
|
+
+ "DeveloperName,CreatedDate,CreatedBy.Username,InlineHelpText,"
|
|
472
|
+
+ "LastModifiedDate,LastModifiedBy.Username "
|
|
473
|
+
+ "FROM CustomField WHERE EntityDefinitionId='{}'".format(sObjectDurableId)
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
customFields: Dict[str, CustomField] = {}
|
|
477
|
+
try:
|
|
478
|
+
sObject_custom_fields_response = self.sf._call_salesforce(
|
|
479
|
+
"GET", sObject_custom_fields_query_url
|
|
480
|
+
).json()
|
|
481
|
+
|
|
482
|
+
logger.debug(
|
|
483
|
+
"Received Salesforce {sObject} custom fields response".format(
|
|
484
|
+
sObject=sObjectName
|
|
485
|
+
)
|
|
486
|
+
)
|
|
487
|
+
|
|
488
|
+
except Exception as e:
|
|
489
|
+
error = "Salesforce CustomField query failed. "
|
|
490
|
+
if "sObject type 'CustomField' is not supported." in str(e):
|
|
491
|
+
# https://github.com/afawcett/apex-toolingapi/issues/19
|
|
492
|
+
error += "Please verify if user has 'View All Data' permission."
|
|
493
|
+
|
|
494
|
+
self.report.warning(message=error, exc=e)
|
|
495
|
+
else:
|
|
496
|
+
customFields = {
|
|
497
|
+
record["DeveloperName"]: record
|
|
498
|
+
for record in sObject_custom_fields_response["records"]
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
return customFields
|
|
502
|
+
|
|
503
|
+
def get_approximate_record_count(self, sObjectName: str) -> SObjectRecordCount:
|
|
504
|
+
sObject_records_count_url = (
|
|
505
|
+
f"{self.base_url}limits/recordCount?sObjects={sObjectName}"
|
|
506
|
+
)
|
|
507
|
+
|
|
508
|
+
sObject_record_count_response = self.sf._call_salesforce(
|
|
509
|
+
"GET", sObject_records_count_url
|
|
510
|
+
).json()
|
|
511
|
+
|
|
512
|
+
logger.debug(
|
|
513
|
+
"Received Salesforce {sObject} record count response".format(
|
|
514
|
+
sObject=sObjectName
|
|
515
|
+
)
|
|
516
|
+
)
|
|
517
|
+
sobject_record_counts = sObject_record_count_response.get("sObjects", [])
|
|
518
|
+
return sobject_record_counts[0]
|
|
519
|
+
|
|
520
|
+
|
|
189
521
|
@platform_name("Salesforce")
|
|
190
522
|
@config_class(SalesforceConfig)
|
|
191
523
|
@support_status(SupportStatus.INCUBATING)
|
|
@@ -214,123 +546,44 @@ FIELD_TYPE_MAPPING = {
|
|
|
214
546
|
capability_name=SourceCapability.TAGS,
|
|
215
547
|
description="Enabled by default",
|
|
216
548
|
)
|
|
217
|
-
class SalesforceSource(
|
|
218
|
-
base_url: str
|
|
219
|
-
config: SalesforceConfig
|
|
220
|
-
report: SalesforceSourceReport
|
|
221
|
-
session: requests.Session
|
|
222
|
-
sf: Salesforce
|
|
223
|
-
fieldCounts: Dict[str, int]
|
|
224
|
-
|
|
549
|
+
class SalesforceSource(StatefulIngestionSourceBase):
|
|
225
550
|
def __init__(self, config: SalesforceConfig, ctx: PipelineContext) -> None:
|
|
226
|
-
super().__init__(ctx)
|
|
551
|
+
super().__init__(config, ctx)
|
|
552
|
+
self.ctx = ctx
|
|
227
553
|
self.config = config
|
|
228
|
-
self.report = SalesforceSourceReport()
|
|
229
|
-
self.session = requests.Session()
|
|
554
|
+
self.report: SalesforceSourceReport = SalesforceSourceReport()
|
|
230
555
|
self.platform: str = "salesforce"
|
|
231
|
-
self.fieldCounts = {}
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
556
|
+
self.fieldCounts: Dict[str, int] = {}
|
|
557
|
+
|
|
558
|
+
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
559
|
+
return [
|
|
560
|
+
*super().get_workunit_processors(),
|
|
561
|
+
StaleEntityRemovalHandler.create(
|
|
562
|
+
self, self.config, self.ctx
|
|
563
|
+
).workunit_processor,
|
|
564
|
+
]
|
|
238
565
|
|
|
566
|
+
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
|
|
239
567
|
try:
|
|
240
|
-
|
|
241
|
-
logger.debug("Access Token Provided in Config")
|
|
242
|
-
assert self.config.access_token is not None, (
|
|
243
|
-
"Config access_token is required for DIRECT_ACCESS_TOKEN auth"
|
|
244
|
-
)
|
|
245
|
-
assert self.config.instance_url is not None, (
|
|
246
|
-
"Config instance_url is required for DIRECT_ACCESS_TOKEN auth"
|
|
247
|
-
)
|
|
248
|
-
|
|
249
|
-
self.sf = Salesforce(
|
|
250
|
-
instance_url=self.config.instance_url,
|
|
251
|
-
session_id=self.config.access_token,
|
|
252
|
-
**common_args,
|
|
253
|
-
)
|
|
254
|
-
elif self.config.auth is SalesforceAuthType.USERNAME_PASSWORD:
|
|
255
|
-
logger.debug("Username/Password Provided in Config")
|
|
256
|
-
assert self.config.username is not None, (
|
|
257
|
-
"Config username is required for USERNAME_PASSWORD auth"
|
|
258
|
-
)
|
|
259
|
-
assert self.config.password is not None, (
|
|
260
|
-
"Config password is required for USERNAME_PASSWORD auth"
|
|
261
|
-
)
|
|
262
|
-
assert self.config.security_token is not None, (
|
|
263
|
-
"Config security_token is required for USERNAME_PASSWORD auth"
|
|
264
|
-
)
|
|
265
|
-
|
|
266
|
-
self.sf = Salesforce(
|
|
267
|
-
username=self.config.username,
|
|
268
|
-
password=self.config.password,
|
|
269
|
-
security_token=self.config.security_token,
|
|
270
|
-
**common_args,
|
|
271
|
-
)
|
|
272
|
-
|
|
273
|
-
elif self.config.auth is SalesforceAuthType.JSON_WEB_TOKEN:
|
|
274
|
-
logger.debug("Json Web Token provided in the config")
|
|
275
|
-
assert self.config.username is not None, (
|
|
276
|
-
"Config username is required for JSON_WEB_TOKEN auth"
|
|
277
|
-
)
|
|
278
|
-
assert self.config.consumer_key is not None, (
|
|
279
|
-
"Config consumer_key is required for JSON_WEB_TOKEN auth"
|
|
280
|
-
)
|
|
281
|
-
assert self.config.private_key is not None, (
|
|
282
|
-
"Config private_key is required for JSON_WEB_TOKEN auth"
|
|
283
|
-
)
|
|
284
|
-
|
|
285
|
-
self.sf = Salesforce(
|
|
286
|
-
username=self.config.username,
|
|
287
|
-
consumer_key=self.config.consumer_key,
|
|
288
|
-
privatekey=self.config.private_key,
|
|
289
|
-
**common_args,
|
|
290
|
-
)
|
|
291
|
-
|
|
568
|
+
sf = SalesforceApi.create_salesforce_client(self.config)
|
|
292
569
|
except SalesforceAuthenticationFailed as e:
|
|
293
|
-
logger.error(e)
|
|
294
570
|
if "API_CURRENTLY_DISABLED" in str(e):
|
|
295
571
|
# https://help.salesforce.com/s/articleView?id=001473830&type=1
|
|
296
|
-
error = "
|
|
572
|
+
error = "Please make sure user has API Enabled Access."
|
|
297
573
|
else:
|
|
298
|
-
error = "
|
|
574
|
+
error = "Please verify your credentials."
|
|
299
575
|
if (
|
|
300
576
|
self.config.instance_url
|
|
301
577
|
and "sandbox" in self.config.instance_url.lower()
|
|
302
578
|
):
|
|
303
579
|
error += "Please set `is_sandbox: True` in recipe if this is sandbox account."
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
if not self.config.api_version:
|
|
307
|
-
# List all REST API versions and use latest one
|
|
308
|
-
versions_url = "https://{instance}/services/data/".format(
|
|
309
|
-
instance=self.sf.sf_instance,
|
|
310
|
-
)
|
|
311
|
-
versions_response = self.sf._call_salesforce("GET", versions_url).json()
|
|
312
|
-
latest_version = versions_response[-1]
|
|
313
|
-
version = latest_version["version"]
|
|
314
|
-
# we could avoid setting the version like below (after the Salesforce object has been already initiated
|
|
315
|
-
# above), since, according to the docs:
|
|
316
|
-
# https://developer.salesforce.com/docs/atlas.en-us.api_rest.meta/api_rest/dome_versions.htm
|
|
317
|
-
# we don't need to be authenticated to list the versions (so we could perform this call before even
|
|
318
|
-
# authenticating)
|
|
319
|
-
self.sf.sf_version = version
|
|
320
|
-
|
|
321
|
-
self.base_url = "https://{instance}/services/data/v{sf_version}/".format(
|
|
322
|
-
instance=self.sf.sf_instance, sf_version=self.sf.sf_version
|
|
323
|
-
)
|
|
580
|
+
self.report.failure(title="Salesforce login failed", message=error, exc=e)
|
|
581
|
+
return
|
|
324
582
|
|
|
325
|
-
|
|
326
|
-
"Using Salesforce REST API version: {version}".format(
|
|
327
|
-
version=self.sf.sf_version
|
|
328
|
-
)
|
|
329
|
-
)
|
|
583
|
+
self.sf_api = SalesforceApi(sf, self.config, self.report)
|
|
330
584
|
|
|
331
|
-
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
|
|
332
585
|
try:
|
|
333
|
-
sObjects = self.
|
|
586
|
+
sObjects = self.sf_api.list_objects()
|
|
334
587
|
except Exception as e:
|
|
335
588
|
if "sObject type 'EntityDefinition' is not supported." in str(e):
|
|
336
589
|
# https://developer.salesforce.com/docs/atlas.en-us.api_tooling.meta/api_tooling/tooling_api_objects_entitydefinition.htm
|
|
@@ -344,7 +597,7 @@ class SalesforceSource(Source):
|
|
|
344
597
|
yield from self.get_salesforce_object_workunits(sObject)
|
|
345
598
|
|
|
346
599
|
def get_salesforce_object_workunits(
|
|
347
|
-
self, sObject:
|
|
600
|
+
self, sObject: EntityDefinition
|
|
348
601
|
) -> Iterable[MetadataWorkUnit]:
|
|
349
602
|
sObjectName = sObject["QualifiedApiName"]
|
|
350
603
|
|
|
@@ -364,19 +617,50 @@ class SalesforceSource(Source):
|
|
|
364
617
|
self.config.env,
|
|
365
618
|
)
|
|
366
619
|
|
|
367
|
-
customObject =
|
|
620
|
+
customObject = None
|
|
368
621
|
if sObjectName.endswith("__c"): # Is Custom Object
|
|
369
|
-
customObject = self.get_custom_object_details(
|
|
622
|
+
customObject = self.sf_api.get_custom_object_details(
|
|
623
|
+
sObject["DeveloperName"]
|
|
624
|
+
)
|
|
370
625
|
|
|
371
626
|
# Table Created, LastModified is available for Custom Object
|
|
372
627
|
yield from self.get_operation_workunit(customObject, datasetUrn)
|
|
373
628
|
|
|
374
629
|
yield self.get_properties_workunit(sObject, customObject, datasetUrn)
|
|
375
630
|
|
|
631
|
+
allFields = self.sf_api.get_fields_for_object(sObjectName, sObject["DurableId"])
|
|
632
|
+
|
|
633
|
+
customFields = self.sf_api.get_custom_fields_for_object(
|
|
634
|
+
sObjectName, sObject["DurableId"]
|
|
635
|
+
)
|
|
636
|
+
|
|
637
|
+
if any(field["IsCalculated"] for field in allFields):
|
|
638
|
+
# Although formula is present in Metadata column of CustomField entity,
|
|
639
|
+
# we can not use it as it allows querying only for one field at a time
|
|
640
|
+
# and that would not be performant
|
|
641
|
+
calculated_field_formulae = self.get_calculated_field_formulae(sObjectName)
|
|
642
|
+
if calculated_field_formulae:
|
|
643
|
+
self.report.objects_with_calculated_field.append(sObjectName)
|
|
644
|
+
else:
|
|
645
|
+
# For some objects, although some fields are calculated, formula is absent
|
|
646
|
+
# These are typically salesforce system calculated fields whose formula
|
|
647
|
+
# is not exposed
|
|
648
|
+
self.report.num_objects_missing_formula += 1
|
|
649
|
+
else:
|
|
650
|
+
calculated_field_formulae = {}
|
|
651
|
+
|
|
376
652
|
yield from self.get_schema_metadata_workunit(
|
|
377
|
-
sObjectName,
|
|
653
|
+
sObjectName,
|
|
654
|
+
allFields,
|
|
655
|
+
customFields,
|
|
656
|
+
customObject,
|
|
657
|
+
datasetUrn,
|
|
658
|
+
calculated_field_formulae,
|
|
378
659
|
)
|
|
379
660
|
|
|
661
|
+
if self.config.use_referenced_entities_as_upstreams:
|
|
662
|
+
yield from self.get_upstream_workunit(datasetUrn, allFields)
|
|
663
|
+
|
|
380
664
|
yield self.get_subtypes_workunit(sObjectName, datasetUrn)
|
|
381
665
|
|
|
382
666
|
if self.config.platform_instance is not None:
|
|
@@ -390,39 +674,33 @@ class SalesforceSource(Source):
|
|
|
390
674
|
):
|
|
391
675
|
yield from self.get_profile_workunit(sObjectName, datasetUrn)
|
|
392
676
|
|
|
393
|
-
def
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
logger.debug(
|
|
421
|
-
"Salesforce EntityDefinition query returned {count} sObjects".format(
|
|
422
|
-
count=len(entities_response["records"])
|
|
423
|
-
)
|
|
424
|
-
)
|
|
425
|
-
return entities_response["records"]
|
|
677
|
+
def get_upstream_workunit(
|
|
678
|
+
self, datasetUrn: str, allFields: List[EntityParticle]
|
|
679
|
+
) -> Iterable[MetadataWorkUnit]:
|
|
680
|
+
upstreams: List[UpstreamClass] = []
|
|
681
|
+
for field in allFields:
|
|
682
|
+
if (
|
|
683
|
+
field["DataType"] == "reference"
|
|
684
|
+
and field["ReferenceTo"]
|
|
685
|
+
and field["ReferenceTo"]["referenceTo"]
|
|
686
|
+
):
|
|
687
|
+
for referenced_sObjectName in field["ReferenceTo"]["referenceTo"]:
|
|
688
|
+
upstreams.append(
|
|
689
|
+
UpstreamClass(
|
|
690
|
+
dataset=builder.make_dataset_urn_with_platform_instance(
|
|
691
|
+
self.platform,
|
|
692
|
+
referenced_sObjectName,
|
|
693
|
+
self.config.platform_instance,
|
|
694
|
+
self.config.env,
|
|
695
|
+
),
|
|
696
|
+
type=DatasetLineageTypeClass.TRANSFORMED,
|
|
697
|
+
)
|
|
698
|
+
)
|
|
699
|
+
|
|
700
|
+
if upstreams:
|
|
701
|
+
yield MetadataChangeProposalWrapper(
|
|
702
|
+
entityUrn=datasetUrn, aspect=UpstreamLineageClass(upstreams=upstreams)
|
|
703
|
+
).as_workunit()
|
|
426
704
|
|
|
427
705
|
def get_domain_workunit(
|
|
428
706
|
self, dataset_name: str, datasetUrn: str
|
|
@@ -452,11 +730,15 @@ class SalesforceSource(Source):
|
|
|
452
730
|
).as_workunit()
|
|
453
731
|
|
|
454
732
|
def get_operation_workunit(
|
|
455
|
-
self, customObject:
|
|
733
|
+
self, customObject: Optional[CustomObject], datasetUrn: str
|
|
456
734
|
) -> Iterable[MetadataWorkUnit]:
|
|
457
735
|
reported_time: int = int(time.time() * 1000)
|
|
458
736
|
|
|
459
|
-
if
|
|
737
|
+
if (
|
|
738
|
+
customObject
|
|
739
|
+
and customObject.get("CreatedBy")
|
|
740
|
+
and customObject.get("CreatedDate")
|
|
741
|
+
):
|
|
460
742
|
timestamp = self.get_time_from_salesforce_timestamp(
|
|
461
743
|
customObject["CreatedDate"]
|
|
462
744
|
)
|
|
@@ -499,7 +781,10 @@ class SalesforceSource(Source):
|
|
|
499
781
|
)
|
|
500
782
|
|
|
501
783
|
def get_properties_workunit(
|
|
502
|
-
self,
|
|
784
|
+
self,
|
|
785
|
+
sObject: EntityDefinition,
|
|
786
|
+
customObject: Optional[CustomObject],
|
|
787
|
+
datasetUrn: str,
|
|
503
788
|
) -> MetadataWorkUnit:
|
|
504
789
|
propertyLabels = {
|
|
505
790
|
# from EntityDefinition
|
|
@@ -520,17 +805,18 @@ class SalesforceSource(Source):
|
|
|
520
805
|
for k, v in sObject.items()
|
|
521
806
|
if k in propertyLabels and v is not None
|
|
522
807
|
}
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
808
|
+
if customObject:
|
|
809
|
+
sObjectProperties.update(
|
|
810
|
+
{
|
|
811
|
+
propertyLabels[k]: str(v)
|
|
812
|
+
for k, v in customObject.items()
|
|
813
|
+
if k in propertyLabels and v is not None
|
|
814
|
+
}
|
|
815
|
+
)
|
|
530
816
|
|
|
531
817
|
datasetProperties = DatasetPropertiesClass(
|
|
532
818
|
name=sObject["Label"],
|
|
533
|
-
description=customObject.get("Description"),
|
|
819
|
+
description=customObject.get("Description") if customObject else None,
|
|
534
820
|
customProperties=sObjectProperties,
|
|
535
821
|
)
|
|
536
822
|
return MetadataChangeProposalWrapper(
|
|
@@ -555,58 +841,58 @@ class SalesforceSource(Source):
|
|
|
555
841
|
) -> Iterable[MetadataWorkUnit]:
|
|
556
842
|
# Here approximate record counts as returned by recordCount API are used as rowCount
|
|
557
843
|
# In future, count() SOQL query may be used instead, if required, might be more expensive
|
|
558
|
-
|
|
559
|
-
f"{self.base_url}limits/recordCount?sObjects={sObjectName}"
|
|
560
|
-
)
|
|
561
|
-
|
|
562
|
-
sObject_record_count_response = self.sf._call_salesforce(
|
|
563
|
-
"GET", sObject_records_count_url
|
|
564
|
-
).json()
|
|
844
|
+
sobject_record_count = self.sf_api.get_approximate_record_count(sObjectName)
|
|
565
845
|
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
846
|
+
datasetProfile = DatasetProfileClass(
|
|
847
|
+
timestampMillis=int(time.time() * 1000),
|
|
848
|
+
rowCount=sobject_record_count["count"],
|
|
849
|
+
columnCount=self.fieldCounts[sObjectName],
|
|
570
850
|
)
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
timestampMillis=int(time.time() * 1000),
|
|
575
|
-
rowCount=entry["count"],
|
|
576
|
-
columnCount=self.fieldCounts[sObjectName],
|
|
577
|
-
)
|
|
578
|
-
yield MetadataChangeProposalWrapper(
|
|
579
|
-
entityUrn=datasetUrn, aspect=datasetProfile
|
|
580
|
-
).as_workunit()
|
|
851
|
+
yield MetadataChangeProposalWrapper(
|
|
852
|
+
entityUrn=datasetUrn, aspect=datasetProfile
|
|
853
|
+
).as_workunit()
|
|
581
854
|
|
|
582
855
|
# Here field description is created from label, description and inlineHelpText
|
|
583
|
-
def _get_field_description(
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
856
|
+
def _get_field_description(
|
|
857
|
+
self,
|
|
858
|
+
field: EntityParticle,
|
|
859
|
+
customField: Optional[CustomField],
|
|
860
|
+
formula: Optional[str],
|
|
861
|
+
) -> str:
|
|
862
|
+
description_parts: List[str] = []
|
|
863
|
+
|
|
864
|
+
if field.get("Label") and field["Label"].startswith("#"):
|
|
865
|
+
description_parts.append("\\" + field["Label"])
|
|
866
|
+
elif field.get("Label"):
|
|
867
|
+
description_parts.append(field["Label"])
|
|
590
868
|
|
|
591
869
|
text = field.get("FieldDefinition", {}).get("Description", None)
|
|
592
870
|
if text:
|
|
593
871
|
prefix = "\\" if text.startswith("#") else ""
|
|
594
|
-
|
|
872
|
+
description_parts.append(f"{prefix}{text}")
|
|
595
873
|
|
|
596
|
-
text = field.get("InlineHelpText"
|
|
874
|
+
text = field.get("InlineHelpText")
|
|
597
875
|
if text:
|
|
598
876
|
prefix = "\\" if text.startswith("#") else ""
|
|
599
|
-
|
|
877
|
+
description_parts.append(f"{prefix}{text}")
|
|
878
|
+
|
|
879
|
+
if formula:
|
|
880
|
+
description_parts.append(f"Formula: {formula}")
|
|
600
881
|
|
|
601
|
-
return
|
|
882
|
+
return "\n\n".join(description_parts)
|
|
602
883
|
|
|
603
884
|
# Here jsonProps is used to add additional salesforce field level properties.
|
|
604
|
-
def _get_field_json_props(
|
|
885
|
+
def _get_field_json_props(
|
|
886
|
+
self, field: EntityParticle, customField: Optional[CustomField]
|
|
887
|
+
) -> str:
|
|
605
888
|
jsonProps = {}
|
|
606
889
|
|
|
607
890
|
if field.get("IsUnique"):
|
|
608
891
|
jsonProps["IsUnique"] = True
|
|
609
892
|
|
|
893
|
+
if field.get("IsCalculated"):
|
|
894
|
+
jsonProps["IsCalculated"] = True
|
|
895
|
+
|
|
610
896
|
return json.dumps(jsonProps)
|
|
611
897
|
|
|
612
898
|
def _get_schema_field(
|
|
@@ -614,8 +900,9 @@ class SalesforceSource(Source):
|
|
|
614
900
|
sObjectName: str,
|
|
615
901
|
fieldName: str,
|
|
616
902
|
fieldType: str,
|
|
617
|
-
field:
|
|
618
|
-
customField:
|
|
903
|
+
field: EntityParticle,
|
|
904
|
+
customField: Optional[CustomField],
|
|
905
|
+
formula: Optional[str] = None,
|
|
619
906
|
) -> SchemaFieldClass:
|
|
620
907
|
fieldPath = fieldName
|
|
621
908
|
|
|
@@ -629,7 +916,7 @@ class SalesforceSource(Source):
|
|
|
629
916
|
|
|
630
917
|
fieldTags: List[str] = self.get_field_tags(fieldName, field)
|
|
631
918
|
|
|
632
|
-
description = self._get_field_description(field, customField)
|
|
919
|
+
description = self._get_field_description(field, customField, formula)
|
|
633
920
|
|
|
634
921
|
schemaField = SchemaFieldClass(
|
|
635
922
|
fieldPath=fieldPath,
|
|
@@ -644,11 +931,19 @@ class SalesforceSource(Source):
|
|
|
644
931
|
)
|
|
645
932
|
|
|
646
933
|
# Created and LastModified Date and Actor are available for Custom Fields only
|
|
647
|
-
if
|
|
934
|
+
if (
|
|
935
|
+
customField
|
|
936
|
+
and customField.get("CreatedDate")
|
|
937
|
+
and customField.get("CreatedBy")
|
|
938
|
+
):
|
|
648
939
|
schemaField.created = self.get_audit_stamp(
|
|
649
940
|
customField["CreatedDate"], customField["CreatedBy"]["Username"]
|
|
650
941
|
)
|
|
651
|
-
if
|
|
942
|
+
if (
|
|
943
|
+
customField
|
|
944
|
+
and customField.get("LastModifiedDate")
|
|
945
|
+
and customField.get("LastModifiedBy")
|
|
946
|
+
):
|
|
652
947
|
schemaField.lastModified = self.get_audit_stamp(
|
|
653
948
|
customField["LastModifiedDate"],
|
|
654
949
|
customField["LastModifiedBy"]["Username"],
|
|
@@ -656,7 +951,7 @@ class SalesforceSource(Source):
|
|
|
656
951
|
|
|
657
952
|
return schemaField
|
|
658
953
|
|
|
659
|
-
def get_field_tags(self, fieldName: str, field:
|
|
954
|
+
def get_field_tags(self, fieldName: str, field: EntityParticle) -> List[str]:
|
|
660
955
|
fieldTags: List[str] = []
|
|
661
956
|
|
|
662
957
|
if fieldName.endswith("__c"):
|
|
@@ -689,69 +984,39 @@ class SalesforceSource(Source):
|
|
|
689
984
|
actor=builder.make_user_urn(username),
|
|
690
985
|
)
|
|
691
986
|
|
|
692
|
-
def
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
sObject_fields_query_url = (
|
|
696
|
-
self.base_url
|
|
697
|
-
+ "tooling/query?q=SELECT "
|
|
698
|
-
+ "QualifiedApiName,DeveloperName,Label, FieldDefinition.DataType, DataType,"
|
|
699
|
-
+ "FieldDefinition.LastModifiedDate, FieldDefinition.LastModifiedBy.Username,"
|
|
700
|
-
+ "Precision, Scale, Length, Digits ,FieldDefinition.IsIndexed, IsUnique,"
|
|
701
|
-
+ "IsCompound, IsComponent, ReferenceTo, FieldDefinition.ComplianceGroup,"
|
|
702
|
-
+ "RelationshipName, IsNillable, FieldDefinition.Description, InlineHelpText "
|
|
703
|
-
+ "FROM EntityParticle WHERE EntityDefinitionId='{}'".format(
|
|
704
|
-
sObject["DurableId"]
|
|
705
|
-
)
|
|
706
|
-
)
|
|
987
|
+
def get_calculated_field_formulae(self, sObjectName: str) -> Dict[str, str]:
|
|
988
|
+
# extract field wise formula and return response
|
|
989
|
+
# Includes entries for calculated fields only
|
|
707
990
|
|
|
708
|
-
|
|
709
|
-
"GET", sObject_fields_query_url
|
|
710
|
-
).json()
|
|
711
|
-
|
|
712
|
-
logger.debug(f"Received Salesforce {sObjectName} fields response")
|
|
713
|
-
|
|
714
|
-
sObject_custom_fields_query_url = (
|
|
715
|
-
self.base_url
|
|
716
|
-
+ "tooling/query?q=SELECT "
|
|
717
|
-
+ "DeveloperName,CreatedDate,CreatedBy.Username,InlineHelpText,"
|
|
718
|
-
+ "LastModifiedDate,LastModifiedBy.Username "
|
|
719
|
-
+ "FROM CustomField WHERE EntityDefinitionId='{}'".format(
|
|
720
|
-
sObject["DurableId"]
|
|
721
|
-
)
|
|
722
|
-
)
|
|
723
|
-
|
|
724
|
-
customFields: Dict[str, Dict] = {}
|
|
991
|
+
calculated_fields = {}
|
|
725
992
|
try:
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
logger.debug(
|
|
731
|
-
"Received Salesforce {sObject} custom fields response".format(
|
|
732
|
-
sObject=sObjectName
|
|
733
|
-
)
|
|
734
|
-
)
|
|
735
|
-
|
|
993
|
+
describe_object_result = self.sf_api.describe_object(sObjectName)
|
|
994
|
+
for field in describe_object_result["fields"]:
|
|
995
|
+
if field["calculatedFormula"]:
|
|
996
|
+
calculated_fields[field["name"]] = field["calculatedFormula"]
|
|
736
997
|
except Exception as e:
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
else:
|
|
744
|
-
customFields = {
|
|
745
|
-
record["DeveloperName"]: record
|
|
746
|
-
for record in sObject_custom_fields_response["records"]
|
|
747
|
-
}
|
|
998
|
+
self.report.warning(
|
|
999
|
+
message="Failed to get calculated field formulae",
|
|
1000
|
+
context=sObjectName,
|
|
1001
|
+
exc=e,
|
|
1002
|
+
)
|
|
1003
|
+
return calculated_fields
|
|
748
1004
|
|
|
1005
|
+
def get_schema_metadata_workunit(
|
|
1006
|
+
self,
|
|
1007
|
+
sObjectName: str,
|
|
1008
|
+
all_fields: List[EntityParticle],
|
|
1009
|
+
custom_fields: Dict[str, CustomField],
|
|
1010
|
+
customObject: Optional[CustomObject],
|
|
1011
|
+
datasetUrn: str,
|
|
1012
|
+
calculated_field_formulae: Dict[str, str],
|
|
1013
|
+
) -> Iterable[MetadataWorkUnit]:
|
|
749
1014
|
fields: List[SchemaFieldClass] = []
|
|
750
1015
|
primaryKeys: List[str] = []
|
|
751
1016
|
foreignKeys: List[ForeignKeyConstraintClass] = []
|
|
752
1017
|
|
|
753
|
-
for field in
|
|
754
|
-
customField =
|
|
1018
|
+
for field in all_fields:
|
|
1019
|
+
customField = custom_fields.get(field["DeveloperName"])
|
|
755
1020
|
|
|
756
1021
|
fieldName = field["QualifiedApiName"]
|
|
757
1022
|
fieldType = field["DataType"]
|
|
@@ -761,20 +1026,21 @@ class SalesforceSource(Source):
|
|
|
761
1026
|
continue
|
|
762
1027
|
|
|
763
1028
|
schemaField: SchemaFieldClass = self._get_schema_field(
|
|
764
|
-
sObjectName,
|
|
1029
|
+
sObjectName,
|
|
1030
|
+
fieldName,
|
|
1031
|
+
fieldType,
|
|
1032
|
+
field,
|
|
1033
|
+
customField,
|
|
1034
|
+
calculated_field_formulae.get(fieldName),
|
|
765
1035
|
)
|
|
766
1036
|
fields.append(schemaField)
|
|
767
1037
|
|
|
768
1038
|
if fieldType == "id":
|
|
769
1039
|
primaryKeys.append(fieldName)
|
|
770
1040
|
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
):
|
|
775
|
-
foreignKeys.extend(
|
|
776
|
-
list(self.get_foreign_keys_from_field(fieldName, field, datasetUrn))
|
|
777
|
-
)
|
|
1041
|
+
foreignKeys.extend(
|
|
1042
|
+
list(self.get_foreign_keys_from_field(fieldName, field, datasetUrn))
|
|
1043
|
+
)
|
|
778
1044
|
|
|
779
1045
|
schemaMetadata = SchemaMetadataClass(
|
|
780
1046
|
schemaName="",
|
|
@@ -788,7 +1054,11 @@ class SalesforceSource(Source):
|
|
|
788
1054
|
)
|
|
789
1055
|
|
|
790
1056
|
# Created Date and Actor are available for Custom Object only
|
|
791
|
-
if
|
|
1057
|
+
if (
|
|
1058
|
+
customObject
|
|
1059
|
+
and customObject.get("CreatedDate")
|
|
1060
|
+
and customObject.get("CreatedBy")
|
|
1061
|
+
):
|
|
792
1062
|
schemaMetadata.created = self.get_audit_stamp(
|
|
793
1063
|
customObject["CreatedDate"], customObject["CreatedBy"]["Username"]
|
|
794
1064
|
)
|
|
@@ -799,26 +1069,31 @@ class SalesforceSource(Source):
|
|
|
799
1069
|
).as_workunit()
|
|
800
1070
|
|
|
801
1071
|
def get_foreign_keys_from_field(
|
|
802
|
-
self, fieldName: str, field:
|
|
1072
|
+
self, fieldName: str, field: EntityParticle, datasetUrn: str
|
|
803
1073
|
) -> Iterable[ForeignKeyConstraintClass]:
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
1074
|
+
if (
|
|
1075
|
+
field["DataType"] == "reference"
|
|
1076
|
+
and field["ReferenceTo"]
|
|
1077
|
+
and field["ReferenceTo"]["referenceTo"] is not None
|
|
1078
|
+
):
|
|
1079
|
+
# https://developer.salesforce.com/docs/atlas.en-us.object_reference.meta/object_reference/field_types.htm#i1435823
|
|
1080
|
+
foreignDatasets = [
|
|
1081
|
+
builder.make_dataset_urn_with_platform_instance(
|
|
1082
|
+
self.platform,
|
|
1083
|
+
fsObject,
|
|
1084
|
+
self.config.platform_instance,
|
|
1085
|
+
self.config.env,
|
|
1086
|
+
)
|
|
1087
|
+
for fsObject in field["ReferenceTo"]["referenceTo"]
|
|
1088
|
+
]
|
|
1089
|
+
|
|
1090
|
+
for foreignDataset in foreignDatasets:
|
|
1091
|
+
yield ForeignKeyConstraintClass(
|
|
1092
|
+
name=field["RelationshipName"] if field["RelationshipName"] else "",
|
|
1093
|
+
foreignDataset=foreignDataset,
|
|
1094
|
+
foreignFields=[builder.make_schema_field_urn(foreignDataset, "Id")],
|
|
1095
|
+
sourceFields=[builder.make_schema_field_urn(datasetUrn, fieldName)],
|
|
1096
|
+
)
|
|
822
1097
|
|
|
823
1098
|
def get_report(self) -> SourceReport:
|
|
824
1099
|
return self.report
|