acryl-datahub 1.0.0rc15__py3-none-any.whl → 1.0.0rc17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.0.0rc15.dist-info → acryl_datahub-1.0.0rc17.dist-info}/METADATA +2499 -2469
- {acryl_datahub-1.0.0rc15.dist-info → acryl_datahub-1.0.0rc17.dist-info}/RECORD +18 -15
- {acryl_datahub-1.0.0rc15.dist-info → acryl_datahub-1.0.0rc17.dist-info}/entry_points.txt +1 -0
- datahub/_version.py +1 -1
- datahub/cli/ingest_cli.py +1 -1
- datahub/configuration/common.py +8 -0
- datahub/emitter/response_helper.py +145 -0
- datahub/emitter/rest_emitter.py +161 -3
- datahub/ingestion/graph/client.py +3 -0
- datahub/ingestion/sink/datahub_rest.py +4 -0
- datahub/ingestion/source/bigquery_v2/bigquery_config.py +2 -46
- datahub/ingestion/source/common/gcp_credentials_config.py +53 -0
- datahub/ingestion/source/salesforce.py +529 -276
- datahub/ingestion/source/sql/hive.py +13 -0
- datahub/ingestion/source/vertexai.py +697 -0
- {acryl_datahub-1.0.0rc15.dist-info → acryl_datahub-1.0.0rc17.dist-info}/LICENSE +0 -0
- {acryl_datahub-1.0.0rc15.dist-info → acryl_datahub-1.0.0rc17.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.0.0rc15.dist-info → acryl_datahub-1.0.0rc17.dist-info}/top_level.txt +0 -0
|
@@ -4,7 +4,7 @@ import time
|
|
|
4
4
|
from dataclasses import dataclass, field as dataclass_field
|
|
5
5
|
from datetime import datetime
|
|
6
6
|
from enum import Enum
|
|
7
|
-
from typing import Any, Dict, Iterable, List, Optional
|
|
7
|
+
from typing import Any, Dict, Iterable, List, Literal, Optional, TypedDict
|
|
8
8
|
|
|
9
9
|
import requests
|
|
10
10
|
from pydantic import Field, validator
|
|
@@ -51,6 +51,7 @@ from datahub.metadata.schema_classes import (
|
|
|
51
51
|
BooleanTypeClass,
|
|
52
52
|
BytesTypeClass,
|
|
53
53
|
DataPlatformInstanceClass,
|
|
54
|
+
DatasetLineageTypeClass,
|
|
54
55
|
DatasetProfileClass,
|
|
55
56
|
DatasetPropertiesClass,
|
|
56
57
|
DateTypeClass,
|
|
@@ -69,6 +70,8 @@ from datahub.metadata.schema_classes import (
|
|
|
69
70
|
StringTypeClass,
|
|
70
71
|
SubTypesClass,
|
|
71
72
|
TagAssociationClass,
|
|
73
|
+
UpstreamClass,
|
|
74
|
+
UpstreamLineageClass,
|
|
72
75
|
)
|
|
73
76
|
from datahub.utilities import config_clean
|
|
74
77
|
from datahub.utilities.lossy_collections import LossyList
|
|
@@ -151,6 +154,12 @@ class SalesforceConfig(
|
|
|
151
154
|
description="Regex patterns for profiles to filter in ingestion, allowed by the `object_pattern`.",
|
|
152
155
|
)
|
|
153
156
|
|
|
157
|
+
# Given lack of ERD visual graph view support, this alternate is useful.
|
|
158
|
+
use_referenced_entities_as_upstreams: bool = Field(
|
|
159
|
+
default=False,
|
|
160
|
+
description="(Experimental) If enabled, referenced entities will be treated as upstream entities.",
|
|
161
|
+
)
|
|
162
|
+
|
|
154
163
|
def is_profiling_enabled(self) -> bool:
|
|
155
164
|
return self.profiling.enabled and is_profiling_enabled(
|
|
156
165
|
self.profiling.operation_config
|
|
@@ -165,6 +174,12 @@ class SalesforceConfig(
|
|
|
165
174
|
class SalesforceSourceReport(StaleEntityRemovalSourceReport):
|
|
166
175
|
filtered: LossyList[str] = dataclass_field(default_factory=LossyList)
|
|
167
176
|
|
|
177
|
+
objects_with_calculated_field: LossyList[str] = dataclass_field(
|
|
178
|
+
default_factory=LossyList
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
num_objects_missing_formula: int = 0
|
|
182
|
+
|
|
168
183
|
def report_dropped(self, ent_name: str) -> None:
|
|
169
184
|
self.filtered.append(ent_name)
|
|
170
185
|
|
|
@@ -199,6 +214,310 @@ FIELD_TYPE_MAPPING = {
|
|
|
199
214
|
}
|
|
200
215
|
|
|
201
216
|
|
|
217
|
+
class EntityDefinition(TypedDict):
|
|
218
|
+
DurableId: str
|
|
219
|
+
QualifiedApiName: str
|
|
220
|
+
DeveloperName: str
|
|
221
|
+
Label: str
|
|
222
|
+
PluralLabel: str
|
|
223
|
+
InternalSharingModel: str
|
|
224
|
+
ExternalSharingModel: str
|
|
225
|
+
DeploymentStatus: Literal[
|
|
226
|
+
"Deployed", "InDevelopment"
|
|
227
|
+
] # Common values for DeploymentStatus
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
class UserInfo(TypedDict):
|
|
231
|
+
Username: str
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
class FieldDefinition(TypedDict):
|
|
235
|
+
DataType: str
|
|
236
|
+
LastModifiedDate: str
|
|
237
|
+
LastModifiedBy: UserInfo
|
|
238
|
+
IsIndexed: bool
|
|
239
|
+
ComplianceGroup: Optional[str]
|
|
240
|
+
Description: Optional[str]
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
class ReferenceTo(TypedDict):
|
|
244
|
+
referenceTo: List[str]
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
class EntityParticle(TypedDict):
|
|
248
|
+
QualifiedApiName: str
|
|
249
|
+
DeveloperName: str
|
|
250
|
+
Label: str
|
|
251
|
+
DataType: str
|
|
252
|
+
Precision: Optional[int]
|
|
253
|
+
Scale: Optional[int]
|
|
254
|
+
Length: Optional[int]
|
|
255
|
+
Digits: Optional[int]
|
|
256
|
+
IsUnique: bool
|
|
257
|
+
IsCompound: bool
|
|
258
|
+
IsComponent: bool
|
|
259
|
+
ReferenceTo: Optional[ReferenceTo]
|
|
260
|
+
RelationshipName: Optional[str]
|
|
261
|
+
IsNillable: bool
|
|
262
|
+
InlineHelpText: Optional[str]
|
|
263
|
+
IsCalculated: bool
|
|
264
|
+
FieldDefinition: FieldDefinition
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
class CustomObject(TypedDict):
|
|
268
|
+
Description: Optional[str]
|
|
269
|
+
Language: str
|
|
270
|
+
ManageableState: Literal["unmanaged", "installed", "beta", "released"]
|
|
271
|
+
CreatedDate: str
|
|
272
|
+
CreatedBy: UserInfo
|
|
273
|
+
LastModifiedDate: str
|
|
274
|
+
LastModifiedBy: UserInfo
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
class CustomField(TypedDict):
|
|
278
|
+
DeveloperName: str
|
|
279
|
+
CreatedDate: str
|
|
280
|
+
CreatedBy: UserInfo
|
|
281
|
+
InlineHelpText: Optional[str]
|
|
282
|
+
LastModifiedDate: str
|
|
283
|
+
LastModifiedBy: UserInfo
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
class SObjectRecordCount(TypedDict):
|
|
287
|
+
count: int
|
|
288
|
+
name: str
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
class SObjectField(TypedDict):
|
|
292
|
+
name: str
|
|
293
|
+
calculatedFormula: Optional[str]
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
class SObjectDescribe(TypedDict):
|
|
297
|
+
fields: List[SObjectField]
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
class SalesforceApi:
|
|
301
|
+
def __init__(
|
|
302
|
+
self, sf: Salesforce, config: SalesforceConfig, report: SalesforceSourceReport
|
|
303
|
+
) -> None:
|
|
304
|
+
self.config = config
|
|
305
|
+
self.report = report
|
|
306
|
+
self.sf = sf
|
|
307
|
+
self.base_url = "https://{instance}/services/data/v{sf_version}/".format(
|
|
308
|
+
instance=self.sf.sf_instance, sf_version=self.sf.sf_version
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
@staticmethod
|
|
312
|
+
def create_salesforce_client(config: SalesforceConfig) -> Salesforce:
|
|
313
|
+
common_args: Dict[str, Any] = {
|
|
314
|
+
"domain": "test" if config.is_sandbox else None,
|
|
315
|
+
"session": requests.Session(),
|
|
316
|
+
}
|
|
317
|
+
if config.api_version:
|
|
318
|
+
common_args["version"] = config.api_version
|
|
319
|
+
|
|
320
|
+
if config.auth is SalesforceAuthType.DIRECT_ACCESS_TOKEN:
|
|
321
|
+
logger.debug("Access Token Provided in Config")
|
|
322
|
+
assert config.access_token is not None, (
|
|
323
|
+
"Config access_token is required for DIRECT_ACCESS_TOKEN auth"
|
|
324
|
+
)
|
|
325
|
+
assert config.instance_url is not None, (
|
|
326
|
+
"Config instance_url is required for DIRECT_ACCESS_TOKEN auth"
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
sf = Salesforce(
|
|
330
|
+
instance_url=config.instance_url,
|
|
331
|
+
session_id=config.access_token,
|
|
332
|
+
**common_args,
|
|
333
|
+
)
|
|
334
|
+
elif config.auth is SalesforceAuthType.USERNAME_PASSWORD:
|
|
335
|
+
logger.debug("Username/Password Provided in Config")
|
|
336
|
+
assert config.username is not None, (
|
|
337
|
+
"Config username is required for USERNAME_PASSWORD auth"
|
|
338
|
+
)
|
|
339
|
+
assert config.password is not None, (
|
|
340
|
+
"Config password is required for USERNAME_PASSWORD auth"
|
|
341
|
+
)
|
|
342
|
+
assert config.security_token is not None, (
|
|
343
|
+
"Config security_token is required for USERNAME_PASSWORD auth"
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
sf = Salesforce(
|
|
347
|
+
username=config.username,
|
|
348
|
+
password=config.password,
|
|
349
|
+
security_token=config.security_token,
|
|
350
|
+
**common_args,
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
elif config.auth is SalesforceAuthType.JSON_WEB_TOKEN:
|
|
354
|
+
logger.debug("Json Web Token provided in the config")
|
|
355
|
+
assert config.username is not None, (
|
|
356
|
+
"Config username is required for JSON_WEB_TOKEN auth"
|
|
357
|
+
)
|
|
358
|
+
assert config.consumer_key is not None, (
|
|
359
|
+
"Config consumer_key is required for JSON_WEB_TOKEN auth"
|
|
360
|
+
)
|
|
361
|
+
assert config.private_key is not None, (
|
|
362
|
+
"Config private_key is required for JSON_WEB_TOKEN auth"
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
sf = Salesforce(
|
|
366
|
+
username=config.username,
|
|
367
|
+
consumer_key=config.consumer_key,
|
|
368
|
+
privatekey=config.private_key,
|
|
369
|
+
**common_args,
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
SalesforceApi.update_salesforce_api_version(config, sf)
|
|
373
|
+
|
|
374
|
+
return sf
|
|
375
|
+
|
|
376
|
+
@staticmethod
|
|
377
|
+
def update_salesforce_api_version(config: SalesforceConfig, sf: Salesforce) -> None:
|
|
378
|
+
if not config.api_version:
|
|
379
|
+
# List all REST API versions and use latest one
|
|
380
|
+
versions_url = "https://{instance}/services/data/".format(
|
|
381
|
+
instance=sf.sf_instance,
|
|
382
|
+
)
|
|
383
|
+
versions_response = sf._call_salesforce("GET", versions_url).json()
|
|
384
|
+
latest_version = versions_response[-1]
|
|
385
|
+
version = latest_version["version"]
|
|
386
|
+
# we could avoid setting the version like below (after the Salesforce object has been already initiated
|
|
387
|
+
# above), since, according to the docs:
|
|
388
|
+
# https://developer.salesforce.com/docs/atlas.en-us.api_rest.meta/api_rest/dome_versions.htm
|
|
389
|
+
# we don't need to be authenticated to list the versions (so we could perform this call before even
|
|
390
|
+
# authenticating)
|
|
391
|
+
sf.sf_version = version
|
|
392
|
+
logger.debug(
|
|
393
|
+
"Using Salesforce REST API version: {version}".format(version=sf.sf_version)
|
|
394
|
+
)
|
|
395
|
+
|
|
396
|
+
def list_objects(self) -> List[EntityDefinition]:
|
|
397
|
+
# Using Describe Global REST API returns many more objects than required.
|
|
398
|
+
# Response does not have the attribute ("customizable") that can be used
|
|
399
|
+
# to filter out entities not on ObjectManager UI. Hence SOQL on EntityDefinition
|
|
400
|
+
# object is used instead, as suggested by salesforce support.
|
|
401
|
+
|
|
402
|
+
query_url = (
|
|
403
|
+
self.base_url
|
|
404
|
+
+ "tooling/query/?q=SELECT DurableId,QualifiedApiName,DeveloperName,"
|
|
405
|
+
+ "Label,PluralLabel,InternalSharingModel,ExternalSharingModel,DeploymentStatus "
|
|
406
|
+
+ "FROM EntityDefinition WHERE IsCustomizable = true"
|
|
407
|
+
)
|
|
408
|
+
entities_response = self.sf._call_salesforce("GET", query_url).json()
|
|
409
|
+
logger.debug(
|
|
410
|
+
"Salesforce EntityDefinition query returned {count} sObjects".format(
|
|
411
|
+
count=len(entities_response["records"])
|
|
412
|
+
)
|
|
413
|
+
)
|
|
414
|
+
return entities_response["records"]
|
|
415
|
+
|
|
416
|
+
def describe_object(self, sObjectName: str) -> SObjectDescribe:
|
|
417
|
+
logger.debug(f"Querying Salesforce {sObjectName} describe REST API")
|
|
418
|
+
|
|
419
|
+
describe_endpoint = f"{self.base_url}sobjects/{sObjectName}/describe/"
|
|
420
|
+
response = self.sf._call_salesforce("GET", describe_endpoint)
|
|
421
|
+
|
|
422
|
+
logger.debug(f"Received Salesforce {sObjectName} describe respone")
|
|
423
|
+
return {"fields": response.json()["fields"]}
|
|
424
|
+
|
|
425
|
+
def get_custom_object_details(
|
|
426
|
+
self, sObjectDeveloperName: str
|
|
427
|
+
) -> Optional[CustomObject]:
|
|
428
|
+
query_url = (
|
|
429
|
+
self.base_url
|
|
430
|
+
+ "tooling/query/?q=SELECT Description, Language, ManageableState, "
|
|
431
|
+
+ "CreatedDate, CreatedBy.Username, LastModifiedDate, LastModifiedBy.Username "
|
|
432
|
+
+ f"FROM CustomObject where DeveloperName='{sObjectDeveloperName}'"
|
|
433
|
+
)
|
|
434
|
+
custom_objects_response = self.sf._call_salesforce("GET", query_url).json()
|
|
435
|
+
if len(custom_objects_response["records"]) > 0:
|
|
436
|
+
logger.debug("Salesforce CustomObject query returned with details")
|
|
437
|
+
return custom_objects_response["records"][0]
|
|
438
|
+
return None
|
|
439
|
+
|
|
440
|
+
def get_fields_for_object(
|
|
441
|
+
self, sObjectName: str, sObjectDurableId: str
|
|
442
|
+
) -> List[EntityParticle]:
|
|
443
|
+
sObject_fields_query_url = (
|
|
444
|
+
self.base_url
|
|
445
|
+
+ "tooling/query?q=SELECT "
|
|
446
|
+
+ "QualifiedApiName,DeveloperName,Label, FieldDefinition.DataType, DataType,"
|
|
447
|
+
+ "FieldDefinition.LastModifiedDate, FieldDefinition.LastModifiedBy.Username,"
|
|
448
|
+
+ "Precision, Scale, Length, Digits ,FieldDefinition.IsIndexed, IsUnique,"
|
|
449
|
+
+ "IsCompound, IsComponent, ReferenceTo, FieldDefinition.ComplianceGroup,"
|
|
450
|
+
+ "RelationshipName, IsNillable, FieldDefinition.Description, InlineHelpText, "
|
|
451
|
+
+ "IsCalculated FROM EntityParticle WHERE EntityDefinitionId='{}'".format(
|
|
452
|
+
sObjectDurableId
|
|
453
|
+
)
|
|
454
|
+
)
|
|
455
|
+
|
|
456
|
+
sObject_fields_response = self.sf._call_salesforce(
|
|
457
|
+
"GET", sObject_fields_query_url
|
|
458
|
+
).json()
|
|
459
|
+
|
|
460
|
+
logger.debug(f"Received Salesforce {sObjectName} fields response")
|
|
461
|
+
|
|
462
|
+
all_fields = sObject_fields_response["records"]
|
|
463
|
+
return all_fields
|
|
464
|
+
|
|
465
|
+
def get_custom_fields_for_object(
|
|
466
|
+
self, sObjectName: str, sObjectDurableId: str
|
|
467
|
+
) -> Dict[str, CustomField]:
|
|
468
|
+
sObject_custom_fields_query_url = (
|
|
469
|
+
self.base_url
|
|
470
|
+
+ "tooling/query?q=SELECT "
|
|
471
|
+
+ "DeveloperName,CreatedDate,CreatedBy.Username,InlineHelpText,"
|
|
472
|
+
+ "LastModifiedDate,LastModifiedBy.Username "
|
|
473
|
+
+ "FROM CustomField WHERE EntityDefinitionId='{}'".format(sObjectDurableId)
|
|
474
|
+
)
|
|
475
|
+
|
|
476
|
+
customFields: Dict[str, CustomField] = {}
|
|
477
|
+
try:
|
|
478
|
+
sObject_custom_fields_response = self.sf._call_salesforce(
|
|
479
|
+
"GET", sObject_custom_fields_query_url
|
|
480
|
+
).json()
|
|
481
|
+
|
|
482
|
+
logger.debug(
|
|
483
|
+
"Received Salesforce {sObject} custom fields response".format(
|
|
484
|
+
sObject=sObjectName
|
|
485
|
+
)
|
|
486
|
+
)
|
|
487
|
+
|
|
488
|
+
except Exception as e:
|
|
489
|
+
error = "Salesforce CustomField query failed. "
|
|
490
|
+
if "sObject type 'CustomField' is not supported." in str(e):
|
|
491
|
+
# https://github.com/afawcett/apex-toolingapi/issues/19
|
|
492
|
+
error += "Please verify if user has 'View All Data' permission."
|
|
493
|
+
|
|
494
|
+
self.report.warning(message=error, exc=e)
|
|
495
|
+
else:
|
|
496
|
+
customFields = {
|
|
497
|
+
record["DeveloperName"]: record
|
|
498
|
+
for record in sObject_custom_fields_response["records"]
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
return customFields
|
|
502
|
+
|
|
503
|
+
def get_approximate_record_count(self, sObjectName: str) -> SObjectRecordCount:
|
|
504
|
+
sObject_records_count_url = (
|
|
505
|
+
f"{self.base_url}limits/recordCount?sObjects={sObjectName}"
|
|
506
|
+
)
|
|
507
|
+
|
|
508
|
+
sObject_record_count_response = self.sf._call_salesforce(
|
|
509
|
+
"GET", sObject_records_count_url
|
|
510
|
+
).json()
|
|
511
|
+
|
|
512
|
+
logger.debug(
|
|
513
|
+
"Received Salesforce {sObject} record count response".format(
|
|
514
|
+
sObject=sObjectName
|
|
515
|
+
)
|
|
516
|
+
)
|
|
517
|
+
sobject_record_counts = sObject_record_count_response.get("sObjects", [])
|
|
518
|
+
return sobject_record_counts[0]
|
|
519
|
+
|
|
520
|
+
|
|
202
521
|
@platform_name("Salesforce")
|
|
203
522
|
@config_class(SalesforceConfig)
|
|
204
523
|
@support_status(SupportStatus.INCUBATING)
|
|
@@ -228,131 +547,43 @@ FIELD_TYPE_MAPPING = {
|
|
|
228
547
|
description="Enabled by default",
|
|
229
548
|
)
|
|
230
549
|
class SalesforceSource(StatefulIngestionSourceBase):
|
|
231
|
-
base_url: str
|
|
232
|
-
config: SalesforceConfig
|
|
233
|
-
report: SalesforceSourceReport
|
|
234
|
-
session: requests.Session
|
|
235
|
-
sf: Salesforce
|
|
236
|
-
fieldCounts: Dict[str, int]
|
|
237
|
-
|
|
238
550
|
def __init__(self, config: SalesforceConfig, ctx: PipelineContext) -> None:
|
|
239
551
|
super().__init__(config, ctx)
|
|
240
552
|
self.ctx = ctx
|
|
241
553
|
self.config = config
|
|
242
|
-
self.report = SalesforceSourceReport()
|
|
243
|
-
self.session = requests.Session()
|
|
554
|
+
self.report: SalesforceSourceReport = SalesforceSourceReport()
|
|
244
555
|
self.platform: str = "salesforce"
|
|
245
|
-
self.fieldCounts = {}
|
|
246
|
-
common_args: Dict[str, Any] = {
|
|
247
|
-
"domain": "test" if self.config.is_sandbox else None,
|
|
248
|
-
"session": self.session,
|
|
249
|
-
}
|
|
250
|
-
if self.config.api_version:
|
|
251
|
-
common_args["version"] = self.config.api_version
|
|
252
|
-
|
|
253
|
-
try:
|
|
254
|
-
if self.config.auth is SalesforceAuthType.DIRECT_ACCESS_TOKEN:
|
|
255
|
-
logger.debug("Access Token Provided in Config")
|
|
256
|
-
assert self.config.access_token is not None, (
|
|
257
|
-
"Config access_token is required for DIRECT_ACCESS_TOKEN auth"
|
|
258
|
-
)
|
|
259
|
-
assert self.config.instance_url is not None, (
|
|
260
|
-
"Config instance_url is required for DIRECT_ACCESS_TOKEN auth"
|
|
261
|
-
)
|
|
556
|
+
self.fieldCounts: Dict[str, int] = {}
|
|
262
557
|
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
assert self.config.username is not None, (
|
|
271
|
-
"Config username is required for USERNAME_PASSWORD auth"
|
|
272
|
-
)
|
|
273
|
-
assert self.config.password is not None, (
|
|
274
|
-
"Config password is required for USERNAME_PASSWORD auth"
|
|
275
|
-
)
|
|
276
|
-
assert self.config.security_token is not None, (
|
|
277
|
-
"Config security_token is required for USERNAME_PASSWORD auth"
|
|
278
|
-
)
|
|
279
|
-
|
|
280
|
-
self.sf = Salesforce(
|
|
281
|
-
username=self.config.username,
|
|
282
|
-
password=self.config.password,
|
|
283
|
-
security_token=self.config.security_token,
|
|
284
|
-
**common_args,
|
|
285
|
-
)
|
|
286
|
-
|
|
287
|
-
elif self.config.auth is SalesforceAuthType.JSON_WEB_TOKEN:
|
|
288
|
-
logger.debug("Json Web Token provided in the config")
|
|
289
|
-
assert self.config.username is not None, (
|
|
290
|
-
"Config username is required for JSON_WEB_TOKEN auth"
|
|
291
|
-
)
|
|
292
|
-
assert self.config.consumer_key is not None, (
|
|
293
|
-
"Config consumer_key is required for JSON_WEB_TOKEN auth"
|
|
294
|
-
)
|
|
295
|
-
assert self.config.private_key is not None, (
|
|
296
|
-
"Config private_key is required for JSON_WEB_TOKEN auth"
|
|
297
|
-
)
|
|
298
|
-
|
|
299
|
-
self.sf = Salesforce(
|
|
300
|
-
username=self.config.username,
|
|
301
|
-
consumer_key=self.config.consumer_key,
|
|
302
|
-
privatekey=self.config.private_key,
|
|
303
|
-
**common_args,
|
|
304
|
-
)
|
|
558
|
+
def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
|
|
559
|
+
return [
|
|
560
|
+
*super().get_workunit_processors(),
|
|
561
|
+
StaleEntityRemovalHandler.create(
|
|
562
|
+
self, self.config, self.ctx
|
|
563
|
+
).workunit_processor,
|
|
564
|
+
]
|
|
305
565
|
|
|
566
|
+
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
|
|
567
|
+
try:
|
|
568
|
+
sf = SalesforceApi.create_salesforce_client(self.config)
|
|
306
569
|
except SalesforceAuthenticationFailed as e:
|
|
307
|
-
logger.error(e)
|
|
308
570
|
if "API_CURRENTLY_DISABLED" in str(e):
|
|
309
571
|
# https://help.salesforce.com/s/articleView?id=001473830&type=1
|
|
310
|
-
error = "
|
|
572
|
+
error = "Please make sure user has API Enabled Access."
|
|
311
573
|
else:
|
|
312
|
-
error = "
|
|
574
|
+
error = "Please verify your credentials."
|
|
313
575
|
if (
|
|
314
576
|
self.config.instance_url
|
|
315
577
|
and "sandbox" in self.config.instance_url.lower()
|
|
316
578
|
):
|
|
317
579
|
error += "Please set `is_sandbox: True` in recipe if this is sandbox account."
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
if not self.config.api_version:
|
|
321
|
-
# List all REST API versions and use latest one
|
|
322
|
-
versions_url = "https://{instance}/services/data/".format(
|
|
323
|
-
instance=self.sf.sf_instance,
|
|
324
|
-
)
|
|
325
|
-
versions_response = self.sf._call_salesforce("GET", versions_url).json()
|
|
326
|
-
latest_version = versions_response[-1]
|
|
327
|
-
version = latest_version["version"]
|
|
328
|
-
# we could avoid setting the version like below (after the Salesforce object has been already initiated
|
|
329
|
-
# above), since, according to the docs:
|
|
330
|
-
# https://developer.salesforce.com/docs/atlas.en-us.api_rest.meta/api_rest/dome_versions.htm
|
|
331
|
-
# we don't need to be authenticated to list the versions (so we could perform this call before even
|
|
332
|
-
# authenticating)
|
|
333
|
-
self.sf.sf_version = version
|
|
334
|
-
|
|
335
|
-
self.base_url = "https://{instance}/services/data/v{sf_version}/".format(
|
|
336
|
-
instance=self.sf.sf_instance, sf_version=self.sf.sf_version
|
|
337
|
-
)
|
|
338
|
-
|
|
339
|
-
logger.debug(
|
|
340
|
-
"Using Salesforce REST API version: {version}".format(
|
|
341
|
-
version=self.sf.sf_version
|
|
342
|
-
)
|
|
343
|
-
)
|
|
580
|
+
self.report.failure(title="Salesforce login failed", message=error, exc=e)
|
|
581
|
+
return
|
|
344
582
|
|
|
345
|
-
|
|
346
|
-
return [
|
|
347
|
-
*super().get_workunit_processors(),
|
|
348
|
-
StaleEntityRemovalHandler.create(
|
|
349
|
-
self, self.config, self.ctx
|
|
350
|
-
).workunit_processor,
|
|
351
|
-
]
|
|
583
|
+
self.sf_api = SalesforceApi(sf, self.config, self.report)
|
|
352
584
|
|
|
353
|
-
def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
|
|
354
585
|
try:
|
|
355
|
-
sObjects = self.
|
|
586
|
+
sObjects = self.sf_api.list_objects()
|
|
356
587
|
except Exception as e:
|
|
357
588
|
if "sObject type 'EntityDefinition' is not supported." in str(e):
|
|
358
589
|
# https://developer.salesforce.com/docs/atlas.en-us.api_tooling.meta/api_tooling/tooling_api_objects_entitydefinition.htm
|
|
@@ -366,7 +597,7 @@ class SalesforceSource(StatefulIngestionSourceBase):
|
|
|
366
597
|
yield from self.get_salesforce_object_workunits(sObject)
|
|
367
598
|
|
|
368
599
|
def get_salesforce_object_workunits(
|
|
369
|
-
self, sObject:
|
|
600
|
+
self, sObject: EntityDefinition
|
|
370
601
|
) -> Iterable[MetadataWorkUnit]:
|
|
371
602
|
sObjectName = sObject["QualifiedApiName"]
|
|
372
603
|
|
|
@@ -386,19 +617,50 @@ class SalesforceSource(StatefulIngestionSourceBase):
|
|
|
386
617
|
self.config.env,
|
|
387
618
|
)
|
|
388
619
|
|
|
389
|
-
customObject =
|
|
620
|
+
customObject = None
|
|
390
621
|
if sObjectName.endswith("__c"): # Is Custom Object
|
|
391
|
-
customObject = self.get_custom_object_details(
|
|
622
|
+
customObject = self.sf_api.get_custom_object_details(
|
|
623
|
+
sObject["DeveloperName"]
|
|
624
|
+
)
|
|
392
625
|
|
|
393
626
|
# Table Created, LastModified is available for Custom Object
|
|
394
627
|
yield from self.get_operation_workunit(customObject, datasetUrn)
|
|
395
628
|
|
|
396
629
|
yield self.get_properties_workunit(sObject, customObject, datasetUrn)
|
|
397
630
|
|
|
631
|
+
allFields = self.sf_api.get_fields_for_object(sObjectName, sObject["DurableId"])
|
|
632
|
+
|
|
633
|
+
customFields = self.sf_api.get_custom_fields_for_object(
|
|
634
|
+
sObjectName, sObject["DurableId"]
|
|
635
|
+
)
|
|
636
|
+
|
|
637
|
+
if any(field["IsCalculated"] for field in allFields):
|
|
638
|
+
# Although formula is present in Metadata column of CustomField entity,
|
|
639
|
+
# we can not use it as it allows querying only for one field at a time
|
|
640
|
+
# and that would not be performant
|
|
641
|
+
calculated_field_formulae = self.get_calculated_field_formulae(sObjectName)
|
|
642
|
+
if calculated_field_formulae:
|
|
643
|
+
self.report.objects_with_calculated_field.append(sObjectName)
|
|
644
|
+
else:
|
|
645
|
+
# For some objects, although some fields are calculated, formula is absent
|
|
646
|
+
# These are typically salesforce system calculated fields whose formula
|
|
647
|
+
# is not exposed
|
|
648
|
+
self.report.num_objects_missing_formula += 1
|
|
649
|
+
else:
|
|
650
|
+
calculated_field_formulae = {}
|
|
651
|
+
|
|
398
652
|
yield from self.get_schema_metadata_workunit(
|
|
399
|
-
sObjectName,
|
|
653
|
+
sObjectName,
|
|
654
|
+
allFields,
|
|
655
|
+
customFields,
|
|
656
|
+
customObject,
|
|
657
|
+
datasetUrn,
|
|
658
|
+
calculated_field_formulae,
|
|
400
659
|
)
|
|
401
660
|
|
|
661
|
+
if self.config.use_referenced_entities_as_upstreams:
|
|
662
|
+
yield from self.get_upstream_workunit(datasetUrn, allFields)
|
|
663
|
+
|
|
402
664
|
yield self.get_subtypes_workunit(sObjectName, datasetUrn)
|
|
403
665
|
|
|
404
666
|
if self.config.platform_instance is not None:
|
|
@@ -412,39 +674,33 @@ class SalesforceSource(StatefulIngestionSourceBase):
|
|
|
412
674
|
):
|
|
413
675
|
yield from self.get_profile_workunit(sObjectName, datasetUrn)
|
|
414
676
|
|
|
415
|
-
def
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
logger.debug(
|
|
443
|
-
"Salesforce EntityDefinition query returned {count} sObjects".format(
|
|
444
|
-
count=len(entities_response["records"])
|
|
445
|
-
)
|
|
446
|
-
)
|
|
447
|
-
return entities_response["records"]
|
|
677
|
+
def get_upstream_workunit(
|
|
678
|
+
self, datasetUrn: str, allFields: List[EntityParticle]
|
|
679
|
+
) -> Iterable[MetadataWorkUnit]:
|
|
680
|
+
upstreams: List[UpstreamClass] = []
|
|
681
|
+
for field in allFields:
|
|
682
|
+
if (
|
|
683
|
+
field["DataType"] == "reference"
|
|
684
|
+
and field["ReferenceTo"]
|
|
685
|
+
and field["ReferenceTo"]["referenceTo"]
|
|
686
|
+
):
|
|
687
|
+
for referenced_sObjectName in field["ReferenceTo"]["referenceTo"]:
|
|
688
|
+
upstreams.append(
|
|
689
|
+
UpstreamClass(
|
|
690
|
+
dataset=builder.make_dataset_urn_with_platform_instance(
|
|
691
|
+
self.platform,
|
|
692
|
+
referenced_sObjectName,
|
|
693
|
+
self.config.platform_instance,
|
|
694
|
+
self.config.env,
|
|
695
|
+
),
|
|
696
|
+
type=DatasetLineageTypeClass.TRANSFORMED,
|
|
697
|
+
)
|
|
698
|
+
)
|
|
699
|
+
|
|
700
|
+
if upstreams:
|
|
701
|
+
yield MetadataChangeProposalWrapper(
|
|
702
|
+
entityUrn=datasetUrn, aspect=UpstreamLineageClass(upstreams=upstreams)
|
|
703
|
+
).as_workunit()
|
|
448
704
|
|
|
449
705
|
def get_domain_workunit(
|
|
450
706
|
self, dataset_name: str, datasetUrn: str
|
|
@@ -474,11 +730,15 @@ class SalesforceSource(StatefulIngestionSourceBase):
|
|
|
474
730
|
).as_workunit()
|
|
475
731
|
|
|
476
732
|
def get_operation_workunit(
|
|
477
|
-
self, customObject:
|
|
733
|
+
self, customObject: Optional[CustomObject], datasetUrn: str
|
|
478
734
|
) -> Iterable[MetadataWorkUnit]:
|
|
479
735
|
reported_time: int = int(time.time() * 1000)
|
|
480
736
|
|
|
481
|
-
if
|
|
737
|
+
if (
|
|
738
|
+
customObject
|
|
739
|
+
and customObject.get("CreatedBy")
|
|
740
|
+
and customObject.get("CreatedDate")
|
|
741
|
+
):
|
|
482
742
|
timestamp = self.get_time_from_salesforce_timestamp(
|
|
483
743
|
customObject["CreatedDate"]
|
|
484
744
|
)
|
|
@@ -521,7 +781,10 @@ class SalesforceSource(StatefulIngestionSourceBase):
|
|
|
521
781
|
)
|
|
522
782
|
|
|
523
783
|
def get_properties_workunit(
|
|
524
|
-
self,
|
|
784
|
+
self,
|
|
785
|
+
sObject: EntityDefinition,
|
|
786
|
+
customObject: Optional[CustomObject],
|
|
787
|
+
datasetUrn: str,
|
|
525
788
|
) -> MetadataWorkUnit:
|
|
526
789
|
propertyLabels = {
|
|
527
790
|
# from EntityDefinition
|
|
@@ -542,17 +805,18 @@ class SalesforceSource(StatefulIngestionSourceBase):
|
|
|
542
805
|
for k, v in sObject.items()
|
|
543
806
|
if k in propertyLabels and v is not None
|
|
544
807
|
}
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
808
|
+
if customObject:
|
|
809
|
+
sObjectProperties.update(
|
|
810
|
+
{
|
|
811
|
+
propertyLabels[k]: str(v)
|
|
812
|
+
for k, v in customObject.items()
|
|
813
|
+
if k in propertyLabels and v is not None
|
|
814
|
+
}
|
|
815
|
+
)
|
|
552
816
|
|
|
553
817
|
datasetProperties = DatasetPropertiesClass(
|
|
554
818
|
name=sObject["Label"],
|
|
555
|
-
description=customObject.get("Description"),
|
|
819
|
+
description=customObject.get("Description") if customObject else None,
|
|
556
820
|
customProperties=sObjectProperties,
|
|
557
821
|
)
|
|
558
822
|
return MetadataChangeProposalWrapper(
|
|
@@ -577,58 +841,58 @@ class SalesforceSource(StatefulIngestionSourceBase):
|
|
|
577
841
|
) -> Iterable[MetadataWorkUnit]:
|
|
578
842
|
# Here approximate record counts as returned by recordCount API are used as rowCount
|
|
579
843
|
# In future, count() SOQL query may be used instead, if required, might be more expensive
|
|
580
|
-
|
|
581
|
-
f"{self.base_url}limits/recordCount?sObjects={sObjectName}"
|
|
582
|
-
)
|
|
583
|
-
|
|
584
|
-
sObject_record_count_response = self.sf._call_salesforce(
|
|
585
|
-
"GET", sObject_records_count_url
|
|
586
|
-
).json()
|
|
844
|
+
sobject_record_count = self.sf_api.get_approximate_record_count(sObjectName)
|
|
587
845
|
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
846
|
+
datasetProfile = DatasetProfileClass(
|
|
847
|
+
timestampMillis=int(time.time() * 1000),
|
|
848
|
+
rowCount=sobject_record_count["count"],
|
|
849
|
+
columnCount=self.fieldCounts[sObjectName],
|
|
592
850
|
)
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
timestampMillis=int(time.time() * 1000),
|
|
597
|
-
rowCount=entry["count"],
|
|
598
|
-
columnCount=self.fieldCounts[sObjectName],
|
|
599
|
-
)
|
|
600
|
-
yield MetadataChangeProposalWrapper(
|
|
601
|
-
entityUrn=datasetUrn, aspect=datasetProfile
|
|
602
|
-
).as_workunit()
|
|
851
|
+
yield MetadataChangeProposalWrapper(
|
|
852
|
+
entityUrn=datasetUrn, aspect=datasetProfile
|
|
853
|
+
).as_workunit()
|
|
603
854
|
|
|
604
855
|
# Here field description is created from label, description and inlineHelpText
|
|
605
|
-
def _get_field_description(
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
856
|
+
def _get_field_description(
|
|
857
|
+
self,
|
|
858
|
+
field: EntityParticle,
|
|
859
|
+
customField: Optional[CustomField],
|
|
860
|
+
formula: Optional[str],
|
|
861
|
+
) -> str:
|
|
862
|
+
description_parts: List[str] = []
|
|
863
|
+
|
|
864
|
+
if field.get("Label") and field["Label"].startswith("#"):
|
|
865
|
+
description_parts.append("\\" + field["Label"])
|
|
866
|
+
elif field.get("Label"):
|
|
867
|
+
description_parts.append(field["Label"])
|
|
612
868
|
|
|
613
869
|
text = field.get("FieldDefinition", {}).get("Description", None)
|
|
614
870
|
if text:
|
|
615
871
|
prefix = "\\" if text.startswith("#") else ""
|
|
616
|
-
|
|
872
|
+
description_parts.append(f"{prefix}{text}")
|
|
617
873
|
|
|
618
874
|
text = field.get("InlineHelpText")
|
|
619
875
|
if text:
|
|
620
876
|
prefix = "\\" if text.startswith("#") else ""
|
|
621
|
-
|
|
877
|
+
description_parts.append(f"{prefix}{text}")
|
|
878
|
+
|
|
879
|
+
if formula:
|
|
880
|
+
description_parts.append(f"Formula: {formula}")
|
|
622
881
|
|
|
623
|
-
return
|
|
882
|
+
return "\n\n".join(description_parts)
|
|
624
883
|
|
|
625
884
|
# Here jsonProps is used to add additional salesforce field level properties.
|
|
626
|
-
def _get_field_json_props(
|
|
885
|
+
def _get_field_json_props(
|
|
886
|
+
self, field: EntityParticle, customField: Optional[CustomField]
|
|
887
|
+
) -> str:
|
|
627
888
|
jsonProps = {}
|
|
628
889
|
|
|
629
890
|
if field.get("IsUnique"):
|
|
630
891
|
jsonProps["IsUnique"] = True
|
|
631
892
|
|
|
893
|
+
if field.get("IsCalculated"):
|
|
894
|
+
jsonProps["IsCalculated"] = True
|
|
895
|
+
|
|
632
896
|
return json.dumps(jsonProps)
|
|
633
897
|
|
|
634
898
|
def _get_schema_field(
|
|
@@ -636,8 +900,9 @@ class SalesforceSource(StatefulIngestionSourceBase):
|
|
|
636
900
|
sObjectName: str,
|
|
637
901
|
fieldName: str,
|
|
638
902
|
fieldType: str,
|
|
639
|
-
field:
|
|
640
|
-
customField:
|
|
903
|
+
field: EntityParticle,
|
|
904
|
+
customField: Optional[CustomField],
|
|
905
|
+
formula: Optional[str] = None,
|
|
641
906
|
) -> SchemaFieldClass:
|
|
642
907
|
fieldPath = fieldName
|
|
643
908
|
|
|
@@ -651,7 +916,7 @@ class SalesforceSource(StatefulIngestionSourceBase):
|
|
|
651
916
|
|
|
652
917
|
fieldTags: List[str] = self.get_field_tags(fieldName, field)
|
|
653
918
|
|
|
654
|
-
description = self._get_field_description(field, customField)
|
|
919
|
+
description = self._get_field_description(field, customField, formula)
|
|
655
920
|
|
|
656
921
|
schemaField = SchemaFieldClass(
|
|
657
922
|
fieldPath=fieldPath,
|
|
@@ -666,11 +931,19 @@ class SalesforceSource(StatefulIngestionSourceBase):
|
|
|
666
931
|
)
|
|
667
932
|
|
|
668
933
|
# Created and LastModified Date and Actor are available for Custom Fields only
|
|
669
|
-
if
|
|
934
|
+
if (
|
|
935
|
+
customField
|
|
936
|
+
and customField.get("CreatedDate")
|
|
937
|
+
and customField.get("CreatedBy")
|
|
938
|
+
):
|
|
670
939
|
schemaField.created = self.get_audit_stamp(
|
|
671
940
|
customField["CreatedDate"], customField["CreatedBy"]["Username"]
|
|
672
941
|
)
|
|
673
|
-
if
|
|
942
|
+
if (
|
|
943
|
+
customField
|
|
944
|
+
and customField.get("LastModifiedDate")
|
|
945
|
+
and customField.get("LastModifiedBy")
|
|
946
|
+
):
|
|
674
947
|
schemaField.lastModified = self.get_audit_stamp(
|
|
675
948
|
customField["LastModifiedDate"],
|
|
676
949
|
customField["LastModifiedBy"]["Username"],
|
|
@@ -678,7 +951,7 @@ class SalesforceSource(StatefulIngestionSourceBase):
|
|
|
678
951
|
|
|
679
952
|
return schemaField
|
|
680
953
|
|
|
681
|
-
def get_field_tags(self, fieldName: str, field:
|
|
954
|
+
def get_field_tags(self, fieldName: str, field: EntityParticle) -> List[str]:
|
|
682
955
|
fieldTags: List[str] = []
|
|
683
956
|
|
|
684
957
|
if fieldName.endswith("__c"):
|
|
@@ -711,69 +984,39 @@ class SalesforceSource(StatefulIngestionSourceBase):
|
|
|
711
984
|
actor=builder.make_user_urn(username),
|
|
712
985
|
)
|
|
713
986
|
|
|
714
|
-
def
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
sObject_fields_query_url = (
|
|
718
|
-
self.base_url
|
|
719
|
-
+ "tooling/query?q=SELECT "
|
|
720
|
-
+ "QualifiedApiName,DeveloperName,Label, FieldDefinition.DataType, DataType,"
|
|
721
|
-
+ "FieldDefinition.LastModifiedDate, FieldDefinition.LastModifiedBy.Username,"
|
|
722
|
-
+ "Precision, Scale, Length, Digits ,FieldDefinition.IsIndexed, IsUnique,"
|
|
723
|
-
+ "IsCompound, IsComponent, ReferenceTo, FieldDefinition.ComplianceGroup,"
|
|
724
|
-
+ "RelationshipName, IsNillable, FieldDefinition.Description, InlineHelpText "
|
|
725
|
-
+ "FROM EntityParticle WHERE EntityDefinitionId='{}'".format(
|
|
726
|
-
sObject["DurableId"]
|
|
727
|
-
)
|
|
728
|
-
)
|
|
729
|
-
|
|
730
|
-
sObject_fields_response = self.sf._call_salesforce(
|
|
731
|
-
"GET", sObject_fields_query_url
|
|
732
|
-
).json()
|
|
733
|
-
|
|
734
|
-
logger.debug(f"Received Salesforce {sObjectName} fields response")
|
|
735
|
-
|
|
736
|
-
sObject_custom_fields_query_url = (
|
|
737
|
-
self.base_url
|
|
738
|
-
+ "tooling/query?q=SELECT "
|
|
739
|
-
+ "DeveloperName,CreatedDate,CreatedBy.Username,InlineHelpText,"
|
|
740
|
-
+ "LastModifiedDate,LastModifiedBy.Username "
|
|
741
|
-
+ "FROM CustomField WHERE EntityDefinitionId='{}'".format(
|
|
742
|
-
sObject["DurableId"]
|
|
743
|
-
)
|
|
744
|
-
)
|
|
987
|
+
def get_calculated_field_formulae(self, sObjectName: str) -> Dict[str, str]:
|
|
988
|
+
# extract field wise formula and return response
|
|
989
|
+
# Includes entries for calculated fields only
|
|
745
990
|
|
|
746
|
-
|
|
991
|
+
calculated_fields = {}
|
|
747
992
|
try:
|
|
748
|
-
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
logger.debug(
|
|
753
|
-
"Received Salesforce {sObject} custom fields response".format(
|
|
754
|
-
sObject=sObjectName
|
|
755
|
-
)
|
|
756
|
-
)
|
|
757
|
-
|
|
993
|
+
describe_object_result = self.sf_api.describe_object(sObjectName)
|
|
994
|
+
for field in describe_object_result["fields"]:
|
|
995
|
+
if field["calculatedFormula"]:
|
|
996
|
+
calculated_fields[field["name"]] = field["calculatedFormula"]
|
|
758
997
|
except Exception as e:
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
else:
|
|
766
|
-
customFields = {
|
|
767
|
-
record["DeveloperName"]: record
|
|
768
|
-
for record in sObject_custom_fields_response["records"]
|
|
769
|
-
}
|
|
998
|
+
self.report.warning(
|
|
999
|
+
message="Failed to get calculated field formulae",
|
|
1000
|
+
context=sObjectName,
|
|
1001
|
+
exc=e,
|
|
1002
|
+
)
|
|
1003
|
+
return calculated_fields
|
|
770
1004
|
|
|
1005
|
+
def get_schema_metadata_workunit(
|
|
1006
|
+
self,
|
|
1007
|
+
sObjectName: str,
|
|
1008
|
+
all_fields: List[EntityParticle],
|
|
1009
|
+
custom_fields: Dict[str, CustomField],
|
|
1010
|
+
customObject: Optional[CustomObject],
|
|
1011
|
+
datasetUrn: str,
|
|
1012
|
+
calculated_field_formulae: Dict[str, str],
|
|
1013
|
+
) -> Iterable[MetadataWorkUnit]:
|
|
771
1014
|
fields: List[SchemaFieldClass] = []
|
|
772
1015
|
primaryKeys: List[str] = []
|
|
773
1016
|
foreignKeys: List[ForeignKeyConstraintClass] = []
|
|
774
1017
|
|
|
775
|
-
for field in
|
|
776
|
-
customField =
|
|
1018
|
+
for field in all_fields:
|
|
1019
|
+
customField = custom_fields.get(field["DeveloperName"])
|
|
777
1020
|
|
|
778
1021
|
fieldName = field["QualifiedApiName"]
|
|
779
1022
|
fieldType = field["DataType"]
|
|
@@ -783,20 +1026,21 @@ class SalesforceSource(StatefulIngestionSourceBase):
|
|
|
783
1026
|
continue
|
|
784
1027
|
|
|
785
1028
|
schemaField: SchemaFieldClass = self._get_schema_field(
|
|
786
|
-
sObjectName,
|
|
1029
|
+
sObjectName,
|
|
1030
|
+
fieldName,
|
|
1031
|
+
fieldType,
|
|
1032
|
+
field,
|
|
1033
|
+
customField,
|
|
1034
|
+
calculated_field_formulae.get(fieldName),
|
|
787
1035
|
)
|
|
788
1036
|
fields.append(schemaField)
|
|
789
1037
|
|
|
790
1038
|
if fieldType == "id":
|
|
791
1039
|
primaryKeys.append(fieldName)
|
|
792
1040
|
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
):
|
|
797
|
-
foreignKeys.extend(
|
|
798
|
-
list(self.get_foreign_keys_from_field(fieldName, field, datasetUrn))
|
|
799
|
-
)
|
|
1041
|
+
foreignKeys.extend(
|
|
1042
|
+
list(self.get_foreign_keys_from_field(fieldName, field, datasetUrn))
|
|
1043
|
+
)
|
|
800
1044
|
|
|
801
1045
|
schemaMetadata = SchemaMetadataClass(
|
|
802
1046
|
schemaName="",
|
|
@@ -810,7 +1054,11 @@ class SalesforceSource(StatefulIngestionSourceBase):
|
|
|
810
1054
|
)
|
|
811
1055
|
|
|
812
1056
|
# Created Date and Actor are available for Custom Object only
|
|
813
|
-
if
|
|
1057
|
+
if (
|
|
1058
|
+
customObject
|
|
1059
|
+
and customObject.get("CreatedDate")
|
|
1060
|
+
and customObject.get("CreatedBy")
|
|
1061
|
+
):
|
|
814
1062
|
schemaMetadata.created = self.get_audit_stamp(
|
|
815
1063
|
customObject["CreatedDate"], customObject["CreatedBy"]["Username"]
|
|
816
1064
|
)
|
|
@@ -821,26 +1069,31 @@ class SalesforceSource(StatefulIngestionSourceBase):
|
|
|
821
1069
|
).as_workunit()
|
|
822
1070
|
|
|
823
1071
|
def get_foreign_keys_from_field(
|
|
824
|
-
self, fieldName: str, field:
|
|
1072
|
+
self, fieldName: str, field: EntityParticle, datasetUrn: str
|
|
825
1073
|
) -> Iterable[ForeignKeyConstraintClass]:
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
838
|
-
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
1074
|
+
if (
|
|
1075
|
+
field["DataType"] == "reference"
|
|
1076
|
+
and field["ReferenceTo"]
|
|
1077
|
+
and field["ReferenceTo"]["referenceTo"] is not None
|
|
1078
|
+
):
|
|
1079
|
+
# https://developer.salesforce.com/docs/atlas.en-us.object_reference.meta/object_reference/field_types.htm#i1435823
|
|
1080
|
+
foreignDatasets = [
|
|
1081
|
+
builder.make_dataset_urn_with_platform_instance(
|
|
1082
|
+
self.platform,
|
|
1083
|
+
fsObject,
|
|
1084
|
+
self.config.platform_instance,
|
|
1085
|
+
self.config.env,
|
|
1086
|
+
)
|
|
1087
|
+
for fsObject in field["ReferenceTo"]["referenceTo"]
|
|
1088
|
+
]
|
|
1089
|
+
|
|
1090
|
+
for foreignDataset in foreignDatasets:
|
|
1091
|
+
yield ForeignKeyConstraintClass(
|
|
1092
|
+
name=field["RelationshipName"] if field["RelationshipName"] else "",
|
|
1093
|
+
foreignDataset=foreignDataset,
|
|
1094
|
+
foreignFields=[builder.make_schema_field_urn(foreignDataset, "Id")],
|
|
1095
|
+
sourceFields=[builder.make_schema_field_urn(datasetUrn, fieldName)],
|
|
1096
|
+
)
|
|
844
1097
|
|
|
845
1098
|
def get_report(self) -> SourceReport:
|
|
846
1099
|
return self.report
|