acryl-datahub 1.0.0.1rc1__py3-none-any.whl → 1.0.0.1rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (62) hide show
  1. {acryl_datahub-1.0.0.1rc1.dist-info → acryl_datahub-1.0.0.1rc2.dist-info}/METADATA +2471 -2470
  2. {acryl_datahub-1.0.0.1rc1.dist-info → acryl_datahub-1.0.0.1rc2.dist-info}/RECORD +61 -46
  3. {acryl_datahub-1.0.0.1rc1.dist-info → acryl_datahub-1.0.0.1rc2.dist-info}/WHEEL +1 -1
  4. {acryl_datahub-1.0.0.1rc1.dist-info → acryl_datahub-1.0.0.1rc2.dist-info}/entry_points.txt +2 -1
  5. datahub/_version.py +1 -1
  6. datahub/api/entities/dataset/dataset.py +1 -28
  7. datahub/emitter/request_helper.py +19 -14
  8. datahub/ingestion/api/source.py +6 -2
  9. datahub/ingestion/api/source_helpers.py +6 -2
  10. datahub/ingestion/extractor/schema_util.py +1 -0
  11. datahub/ingestion/source/common/data_platforms.py +23 -0
  12. datahub/ingestion/source/common/gcp_credentials_config.py +6 -0
  13. datahub/ingestion/source/common/subtypes.py +15 -0
  14. datahub/ingestion/source/data_lake_common/path_spec.py +21 -1
  15. datahub/ingestion/source/dbt/dbt_common.py +6 -4
  16. datahub/ingestion/source/dbt/dbt_core.py +4 -6
  17. datahub/ingestion/source/dbt/dbt_tests.py +8 -6
  18. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
  19. datahub/ingestion/source/dremio/dremio_entities.py +6 -5
  20. datahub/ingestion/source/dremio/dremio_source.py +96 -117
  21. datahub/ingestion/source/hex/__init__.py +0 -0
  22. datahub/ingestion/source/hex/api.py +394 -0
  23. datahub/ingestion/source/hex/constants.py +3 -0
  24. datahub/ingestion/source/hex/hex.py +167 -0
  25. datahub/ingestion/source/hex/mapper.py +372 -0
  26. datahub/ingestion/source/hex/model.py +68 -0
  27. datahub/ingestion/source/iceberg/iceberg.py +62 -66
  28. datahub/ingestion/source/mlflow.py +198 -7
  29. datahub/ingestion/source/mode.py +11 -1
  30. datahub/ingestion/source/openapi.py +69 -34
  31. datahub/ingestion/source/powerbi/powerbi.py +29 -23
  32. datahub/ingestion/source/s3/source.py +11 -0
  33. datahub/ingestion/source/slack/slack.py +399 -82
  34. datahub/ingestion/source/superset.py +15 -6
  35. datahub/ingestion/source/vertexai/__init__.py +0 -0
  36. datahub/ingestion/source/vertexai/vertexai.py +1055 -0
  37. datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
  38. datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +68 -0
  39. datahub/metadata/_schema_classes.py +472 -1
  40. datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
  41. datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
  42. datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
  43. datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
  44. datahub/metadata/schema.avsc +307 -0
  45. datahub/metadata/schemas/CorpUserEditableInfo.avsc +14 -0
  46. datahub/metadata/schemas/CorpUserKey.avsc +2 -1
  47. datahub/metadata/schemas/CorpUserSettings.avsc +95 -0
  48. datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
  49. datahub/metadata/schemas/MetadataChangeEvent.avsc +30 -0
  50. datahub/metadata/schemas/QueryProperties.avsc +20 -0
  51. datahub/metadata/schemas/Siblings.avsc +2 -0
  52. datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
  53. datahub/sdk/dataset.py +122 -0
  54. datahub/sdk/entity.py +99 -3
  55. datahub/sdk/entity_client.py +27 -3
  56. datahub/sdk/main_client.py +22 -0
  57. datahub/sdk/search_filters.py +4 -4
  58. datahub/sql_parsing/sql_parsing_aggregator.py +6 -0
  59. datahub/sql_parsing/tool_meta_extractor.py +27 -2
  60. datahub/ingestion/source/vertexai.py +0 -695
  61. {acryl_datahub-1.0.0.1rc1.dist-info → acryl_datahub-1.0.0.1rc2.dist-info/licenses}/LICENSE +0 -0
  62. {acryl_datahub-1.0.0.1rc1.dist-info → acryl_datahub-1.0.0.1rc2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1055 @@
1
+ import dataclasses
2
+ import logging
3
+ from typing import Dict, Iterable, List, Optional, Tuple, TypeVar, Union
4
+
5
+ from google.api_core.exceptions import GoogleAPICallError
6
+ from google.cloud import aiplatform
7
+ from google.cloud.aiplatform import (
8
+ AutoMLForecastingTrainingJob,
9
+ AutoMLImageTrainingJob,
10
+ AutoMLTabularTrainingJob,
11
+ AutoMLTextTrainingJob,
12
+ AutoMLVideoTrainingJob,
13
+ Endpoint,
14
+ ExperimentRun,
15
+ )
16
+ from google.cloud.aiplatform.base import VertexAiResourceNoun
17
+ from google.cloud.aiplatform.metadata.execution import Execution
18
+ from google.cloud.aiplatform.metadata.experiment_resources import Experiment
19
+ from google.cloud.aiplatform.models import Model, VersionInfo
20
+ from google.cloud.aiplatform.training_jobs import _TrainingJob
21
+ from google.oauth2 import service_account
22
+
23
+ import datahub.emitter.mce_builder as builder
24
+ from datahub.emitter.mcp import MetadataChangeProposalWrapper
25
+ from datahub.emitter.mcp_builder import ContainerKey, ProjectIdKey, gen_containers
26
+ from datahub.ingestion.api.common import PipelineContext
27
+ from datahub.ingestion.api.decorators import (
28
+ SupportStatus,
29
+ capability,
30
+ config_class,
31
+ platform_name,
32
+ support_status,
33
+ )
34
+ from datahub.ingestion.api.source import Source, SourceCapability, SourceReport
35
+ from datahub.ingestion.api.source_helpers import auto_workunit
36
+ from datahub.ingestion.api.workunit import MetadataWorkUnit
37
+ from datahub.ingestion.source.common.subtypes import MLAssetSubTypes
38
+ from datahub.ingestion.source.vertexai.vertexai_config import VertexAIConfig
39
+ from datahub.ingestion.source.vertexai.vertexai_result_type_utils import (
40
+ get_execution_result_status,
41
+ get_job_result_status,
42
+ is_status_for_run_event_class,
43
+ )
44
+ from datahub.metadata.com.linkedin.pegasus2avro.dataprocess import (
45
+ DataProcessInstanceRelationships,
46
+ )
47
+ from datahub.metadata.com.linkedin.pegasus2avro.ml.metadata import (
48
+ MLTrainingRunProperties,
49
+ )
50
+ from datahub.metadata.schema_classes import (
51
+ AuditStampClass,
52
+ ContainerClass,
53
+ DataPlatformInstanceClass,
54
+ DataProcessInstanceInputClass,
55
+ DataProcessInstanceOutputClass,
56
+ DataProcessInstancePropertiesClass,
57
+ DataProcessInstanceRunEventClass,
58
+ DataProcessInstanceRunResultClass,
59
+ DataProcessRunStatusClass,
60
+ DatasetPropertiesClass,
61
+ EdgeClass,
62
+ MetadataAttributionClass,
63
+ MLHyperParamClass,
64
+ MLMetricClass,
65
+ MLModelDeploymentPropertiesClass,
66
+ MLModelGroupPropertiesClass,
67
+ MLModelPropertiesClass,
68
+ MLTrainingRunPropertiesClass,
69
+ RunResultTypeClass,
70
+ SubTypesClass,
71
+ TimeStampClass,
72
+ VersionPropertiesClass,
73
+ VersionTagClass,
74
+ )
75
+ from datahub.metadata.urns import DataPlatformUrn, MlModelUrn, VersionSetUrn
76
+ from datahub.utilities.time import datetime_to_ts_millis
77
+
78
+ T = TypeVar("T")
79
+
80
+ logger = logging.getLogger(__name__)
81
+
82
+
83
+ @dataclasses.dataclass
84
+ class TrainingJobMetadata:
85
+ job: VertexAiResourceNoun
86
+ input_dataset: Optional[VertexAiResourceNoun] = None
87
+ output_model: Optional[Model] = None
88
+ output_model_version: Optional[VersionInfo] = None
89
+
90
+
91
+ @dataclasses.dataclass
92
+ class ModelMetadata:
93
+ model: Model
94
+ model_version: VersionInfo
95
+ training_job_urn: Optional[str] = None
96
+ endpoints: Optional[List[Endpoint]] = None
97
+
98
+
99
+ class ContainerKeyWithId(ContainerKey):
100
+ id: str
101
+
102
+
103
+ @platform_name("Vertex AI", id="vertexai")
104
+ @config_class(VertexAIConfig)
105
+ @support_status(SupportStatus.TESTING)
106
+ @capability(
107
+ SourceCapability.DESCRIPTIONS,
108
+ "Extract descriptions for Vertex AI Registered Models and Model Versions",
109
+ )
110
+ @capability(SourceCapability.TAGS, "Extract tags for Vertex AI Registered Model Stages")
111
+ class VertexAISource(Source):
112
+ platform: str = "vertexai"
113
+
114
+ def __init__(self, ctx: PipelineContext, config: VertexAIConfig):
115
+ super().__init__(ctx)
116
+ self.config = config
117
+ self.report = SourceReport()
118
+
119
+ creds = self.config.get_credentials()
120
+ credentials = (
121
+ service_account.Credentials.from_service_account_info(creds)
122
+ if creds
123
+ else None
124
+ )
125
+
126
+ aiplatform.init(
127
+ project=config.project_id, location=config.region, credentials=credentials
128
+ )
129
+ self.client = aiplatform
130
+ self.endpoints: Optional[Dict[str, List[Endpoint]]] = None
131
+ self.datasets: Optional[Dict[str, VertexAiResourceNoun]] = None
132
+ self.experiments: Optional[List[Experiment]] = None
133
+
134
+ def get_report(self) -> SourceReport:
135
+ return self.report
136
+
137
+ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
138
+ """
139
+ Main Function to fetch and yields mcps for various VertexAI resources.
140
+ - Models and Model Versions from the Model Registry
141
+ - Training Jobs
142
+ """
143
+
144
+ # Ingest Project
145
+ yield from self._gen_project_workunits()
146
+ # Fetch and Ingest Models, Model Versions a from Model Registry
147
+ yield from auto_workunit(self._get_ml_models_mcps())
148
+ # Fetch and Ingest Training Jobs
149
+ yield from auto_workunit(self._get_training_jobs_mcps())
150
+ # Fetch and Ingest Experiments
151
+ yield from self._get_experiments_workunits()
152
+ # Fetch and Ingest Experiment Runs
153
+ yield from auto_workunit(self._get_experiment_runs_mcps())
154
+
155
+ def _get_experiments_workunits(self) -> Iterable[MetadataWorkUnit]:
156
+ # List all experiments
157
+ self.experiments = aiplatform.Experiment.list()
158
+
159
+ logger.info("Fetching experiments from VertexAI server")
160
+ for experiment in self.experiments:
161
+ yield from self._gen_experiment_workunits(experiment)
162
+
163
+ def _get_experiment_runs_mcps(self) -> Iterable[MetadataChangeProposalWrapper]:
164
+ if self.experiments is None:
165
+ self.experiments = aiplatform.Experiment.list()
166
+ for experiment in self.experiments:
167
+ logger.info(f"Fetching experiment runs for experiment {experiment.name}")
168
+ experiment_runs = aiplatform.ExperimentRun.list(experiment=experiment.name)
169
+ for run in experiment_runs:
170
+ yield from self._gen_experiment_run_mcps(experiment, run)
171
+
172
+ def _gen_experiment_workunits(
173
+ self, experiment: Experiment
174
+ ) -> Iterable[MetadataWorkUnit]:
175
+ yield from gen_containers(
176
+ parent_container_key=self._get_project_container(),
177
+ container_key=ContainerKeyWithId(
178
+ platform=self.platform,
179
+ id=self._make_vertexai_experiment_name(experiment.name),
180
+ ),
181
+ name=experiment.name,
182
+ sub_types=[MLAssetSubTypes.VERTEX_EXPERIMENT],
183
+ extra_properties={
184
+ "name": experiment.name,
185
+ "resourceName": experiment.resource_name,
186
+ "dashboardURL": experiment.dashboard_url
187
+ if experiment.dashboard_url
188
+ else "",
189
+ },
190
+ external_url=self._make_experiment_external_url(experiment),
191
+ )
192
+
193
+ def _get_experiment_run_params(self, run: ExperimentRun) -> List[MLHyperParamClass]:
194
+ return [
195
+ MLHyperParamClass(name=k, value=str(v)) for k, v in run.get_params().items()
196
+ ]
197
+
198
+ def _get_experiment_run_metrics(self, run: ExperimentRun) -> List[MLMetricClass]:
199
+ return [
200
+ MLMetricClass(name=k, value=str(v)) for k, v in run.get_metrics().items()
201
+ ]
202
+
203
+ def _get_run_timestamps(
204
+ self, run: ExperimentRun
205
+ ) -> Tuple[Optional[int], Optional[int]]:
206
+ executions = run.get_executions()
207
+ if len(executions) == 1:
208
+ create_time = executions[0].create_time
209
+ update_time = executions[0].update_time
210
+ duration = update_time.timestamp() * 1000 - create_time.timestamp() * 1000
211
+ return int(create_time.timestamp() * 1000), int(duration)
212
+ # When no execution context started, start time and duration are not available
213
+ # When multiple execution contexts stared on a run, not unable to know which context to use for create_time and duration
214
+ else:
215
+ return None, None
216
+
217
+ def _get_run_result_status(self, status: str) -> Union[str, RunResultTypeClass]:
218
+ if status == "COMPLETE":
219
+ return RunResultTypeClass.SUCCESS
220
+ elif status == "FAILED":
221
+ return RunResultTypeClass.FAILURE
222
+ elif status == "RUNNING": # No Corresponding RunResultTypeClass for RUNNING
223
+ return "RUNNING"
224
+ else:
225
+ return "UNKNOWN"
226
+
227
+ def _make_custom_properties_for_run(
228
+ self, experiment: Experiment, run: ExperimentRun
229
+ ) -> dict:
230
+ properties: Dict[str, str] = dict()
231
+ properties["externalUrl"] = self._make_experiment_run_external_url(
232
+ experiment, run
233
+ )
234
+ for exec in run.get_executions():
235
+ exec_name = exec.name
236
+ properties[f"created time ({exec_name})"] = str(exec.create_time)
237
+ properties[f"update time ({exec_name}) "] = str(exec.update_time)
238
+ return properties
239
+
240
+ def _make_custom_properties_for_execution(self, execution: Execution) -> dict:
241
+ properties: Dict[str, Optional[str]] = dict()
242
+ for input in execution.get_input_artifacts():
243
+ properties[f"input artifact ({input.name})"] = input.uri
244
+ for output in execution.get_output_artifacts():
245
+ properties[f"output artifact ({output.name})"] = output.uri
246
+
247
+ return properties
248
+
249
+ def _gen_run_execution(
250
+ self, execution: Execution, run: ExperimentRun, exp: Experiment
251
+ ) -> Iterable[MetadataChangeProposalWrapper]:
252
+ create_time = execution.create_time
253
+ update_time = execution.update_time
254
+ duration = datetime_to_ts_millis(update_time) - datetime_to_ts_millis(
255
+ create_time
256
+ )
257
+ result_status: Union[str, RunResultTypeClass] = get_execution_result_status(
258
+ execution.state
259
+ )
260
+ execution_urn = builder.make_data_process_instance_urn(
261
+ self._make_vertexai_run_execution_name(execution.name)
262
+ )
263
+
264
+ yield from MetadataChangeProposalWrapper.construct_many(
265
+ entityUrn=str(execution_urn),
266
+ aspects=[
267
+ DataProcessInstancePropertiesClass(
268
+ name=execution.name,
269
+ created=AuditStampClass(
270
+ time=datetime_to_ts_millis(create_time),
271
+ actor="urn:li:corpuser:datahub",
272
+ ),
273
+ externalUrl=self._make_artifact_external_url(
274
+ experiment=exp, run=run
275
+ ),
276
+ customProperties=self._make_custom_properties_for_execution(
277
+ execution
278
+ ),
279
+ ),
280
+ DataPlatformInstanceClass(platform=str(DataPlatformUrn(self.platform))),
281
+ SubTypesClass(typeNames=[MLAssetSubTypes.VERTEX_EXECUTION]),
282
+ (
283
+ DataProcessInstanceRunEventClass(
284
+ status=DataProcessRunStatusClass.COMPLETE,
285
+ timestampMillis=datetime_to_ts_millis(create_time),
286
+ result=DataProcessInstanceRunResultClass(
287
+ type=result_status,
288
+ nativeResultType=self.platform,
289
+ ),
290
+ durationMillis=int(duration),
291
+ )
292
+ if is_status_for_run_event_class(result_status) and duration
293
+ else None
294
+ ),
295
+ DataProcessInstanceRelationships(
296
+ upstreamInstances=[self._make_experiment_run_urn(exp, run)],
297
+ parentInstance=self._make_experiment_run_urn(exp, run),
298
+ ),
299
+ DataProcessInstanceInputClass(
300
+ inputs=[],
301
+ inputEdges=[
302
+ EdgeClass(
303
+ destinationUrn=self._make_experiment_run_urn(exp, run)
304
+ ),
305
+ ],
306
+ ),
307
+ ],
308
+ )
309
+
310
+ def _gen_experiment_run_mcps(
311
+ self, experiment: Experiment, run: ExperimentRun
312
+ ) -> Iterable[MetadataChangeProposalWrapper]:
313
+ experiment_key = ContainerKeyWithId(
314
+ platform=self.platform,
315
+ id=self._make_vertexai_experiment_name(experiment.name),
316
+ )
317
+ run_urn = self._make_experiment_run_urn(experiment, run)
318
+ created_time, duration = self._get_run_timestamps(run)
319
+ created_actor = "urn:li:corpuser:datahub"
320
+ run_result_type = self._get_run_result_status(run.get_state())
321
+
322
+ # generating mcps for run execution
323
+ for execution in run.get_executions():
324
+ yield from self._gen_run_execution(
325
+ execution=execution, exp=experiment, run=run
326
+ )
327
+
328
+ # generating mcps for run
329
+ yield from MetadataChangeProposalWrapper.construct_many(
330
+ entityUrn=run_urn,
331
+ aspects=[
332
+ DataProcessInstancePropertiesClass(
333
+ name=run.name,
334
+ created=AuditStampClass(
335
+ time=created_time if created_time else 0,
336
+ actor=created_actor,
337
+ ),
338
+ externalUrl=self._make_experiment_run_external_url(experiment, run),
339
+ customProperties=self._make_custom_properties_for_run(
340
+ experiment, run
341
+ ),
342
+ ),
343
+ ContainerClass(container=experiment_key.as_urn()),
344
+ MLTrainingRunPropertiesClass(
345
+ hyperParams=self._get_experiment_run_params(run),
346
+ trainingMetrics=self._get_experiment_run_metrics(run),
347
+ externalUrl=self._make_experiment_run_external_url(experiment, run),
348
+ id=f"{experiment.name}-{run.name}",
349
+ ),
350
+ DataPlatformInstanceClass(platform=str(DataPlatformUrn(self.platform))),
351
+ SubTypesClass(typeNames=[MLAssetSubTypes.VERTEX_EXPERIMENT_RUN]),
352
+ (
353
+ DataProcessInstanceRunEventClass(
354
+ status=DataProcessRunStatusClass.COMPLETE,
355
+ timestampMillis=created_time
356
+ if created_time
357
+ else 0, # None is not allowed, 0 as default value
358
+ result=DataProcessInstanceRunResultClass(
359
+ type=run_result_type,
360
+ nativeResultType=self.platform,
361
+ ),
362
+ durationMillis=duration if duration else None,
363
+ )
364
+ if is_status_for_run_event_class(run_result_type)
365
+ else None
366
+ ),
367
+ ],
368
+ )
369
+
370
+ def _gen_project_workunits(self) -> Iterable[MetadataWorkUnit]:
371
+ yield from gen_containers(
372
+ container_key=self._get_project_container(),
373
+ name=self.config.project_id,
374
+ sub_types=[MLAssetSubTypes.VERTEX_PROJECT],
375
+ )
376
+
377
+ def _get_ml_models_mcps(self) -> Iterable[MetadataChangeProposalWrapper]:
378
+ """
379
+ Fetch List of Models in Model Registry and generate a corresponding mcp.
380
+ """
381
+ registered_models = self.client.Model.list()
382
+ for model in registered_models:
383
+ # create mcp for Model Group (= Model in VertexAI)
384
+ yield from self._gen_ml_group_mcps(model)
385
+ model_versions = model.versioning_registry.list_versions()
386
+ for model_version in model_versions:
387
+ # create mcp for Model (= Model Version in VertexAI)
388
+ logger.info(
389
+ f"Ingesting a model (name: {model.display_name} id:{model.name})"
390
+ )
391
+ yield from self._get_ml_model_mcps(
392
+ model=model, model_version=model_version
393
+ )
394
+
395
+ def _get_ml_model_mcps(
396
+ self, model: Model, model_version: VersionInfo
397
+ ) -> Iterable[MetadataChangeProposalWrapper]:
398
+ model_meta: ModelMetadata = self._get_ml_model_metadata(model, model_version)
399
+ # Create ML Model Entity
400
+ yield from self._gen_ml_model_mcps(model_meta)
401
+ # Create Endpoint Entity
402
+ yield from self._gen_endpoints_mcps(model_meta)
403
+
404
+ def _get_ml_model_metadata(
405
+ self, model: Model, model_version: VersionInfo
406
+ ) -> ModelMetadata:
407
+ model_meta = ModelMetadata(model=model, model_version=model_version)
408
+ # Search for endpoints associated with the model
409
+ endpoints = self._search_endpoint(model)
410
+ model_meta.endpoints = endpoints
411
+ return model_meta
412
+
413
+ def _get_training_jobs_mcps(self) -> Iterable[MetadataChangeProposalWrapper]:
414
+ """
415
+ Fetches training jobs from Vertex AI and generates corresponding mcps.
416
+ This method retrieves various types of training jobs from Vertex AI, including
417
+ CustomJob, CustomTrainingJob, CustomContainerTrainingJob, CustomPythonPackageTrainingJob,
418
+ AutoMLTabularTrainingJob, AutoMLTextTrainingJob, AutoMLImageTrainingJob, AutoMLVideoTrainingJob,
419
+ and AutoMLForecastingTrainingJob. For each job, it generates mcps containing metadata
420
+ about the job, its inputs, and its outputs.
421
+ """
422
+ class_names = [
423
+ "CustomJob",
424
+ "CustomTrainingJob",
425
+ "CustomContainerTrainingJob",
426
+ "CustomPythonPackageTrainingJob",
427
+ "AutoMLTabularTrainingJob",
428
+ "AutoMLTextTrainingJob",
429
+ "AutoMLImageTrainingJob",
430
+ "AutoMLVideoTrainingJob",
431
+ "AutoMLForecastingTrainingJob",
432
+ ]
433
+ # Iterate over class names and call the list() function
434
+ for class_name in class_names:
435
+ logger.info(f"Fetching a list of {class_name}s from VertexAI server")
436
+ for job in getattr(self.client, class_name).list():
437
+ yield from self._get_training_job_mcps(job)
438
+
439
+ def _get_training_job_mcps(
440
+ self, job: VertexAiResourceNoun
441
+ ) -> Iterable[MetadataChangeProposalWrapper]:
442
+ job_meta: TrainingJobMetadata = self._get_training_job_metadata(job)
443
+ # Create DataProcessInstance for the training job
444
+ yield from self._gen_training_job_mcps(job_meta)
445
+ # Create Dataset entity for Input Dataset of Training job
446
+ yield from self._get_input_dataset_mcps(job_meta)
447
+ # Create ML Model entity for output ML model of this training job
448
+ yield from self._gen_output_model_mcps(job_meta)
449
+
450
+ def _gen_output_model_mcps(
451
+ self, job_meta: TrainingJobMetadata
452
+ ) -> Iterable[MetadataChangeProposalWrapper]:
453
+ if job_meta.output_model and job_meta.output_model_version:
454
+ job = job_meta.job
455
+ job_urn = builder.make_data_process_instance_urn(
456
+ self._make_vertexai_job_name(entity_id=job.name)
457
+ )
458
+
459
+ yield from self._gen_ml_model_mcps(
460
+ ModelMetadata(
461
+ model=job_meta.output_model,
462
+ model_version=job_meta.output_model_version,
463
+ training_job_urn=job_urn,
464
+ )
465
+ )
466
+
467
+ def _get_job_duration_millis(self, job: VertexAiResourceNoun) -> Optional[int]:
468
+ create_time = job.create_time
469
+ duration = None
470
+ if isinstance(job, _TrainingJob) and job.create_time and job.end_time:
471
+ end_time = job.end_time
472
+ duration = datetime_to_ts_millis(end_time) - datetime_to_ts_millis(
473
+ create_time
474
+ )
475
+
476
+ return int(duration) if duration else None
477
+
478
+ def _gen_training_job_mcps(
479
+ self, job_meta: TrainingJobMetadata
480
+ ) -> Iterable[MetadataChangeProposalWrapper]:
481
+ """
482
+ Generate a mcp for VertexAI Training Job
483
+ """
484
+ job = job_meta.job
485
+ job_id = self._make_vertexai_job_name(entity_id=job.name)
486
+ job_urn = builder.make_data_process_instance_urn(job_id)
487
+
488
+ created_time = datetime_to_ts_millis(job.create_time) if job.create_time else 0
489
+ duration = self._get_job_duration_millis(job)
490
+
491
+ # If Training job has Input Dataset
492
+ dataset_urn = (
493
+ builder.make_dataset_urn(
494
+ platform=self.platform,
495
+ name=self._make_vertexai_dataset_name(
496
+ entity_id=job_meta.input_dataset.name
497
+ ),
498
+ env=self.config.env,
499
+ )
500
+ if job_meta.input_dataset
501
+ else None
502
+ )
503
+ # If Training Job has Output Model
504
+ model_urn = (
505
+ self._make_ml_model_urn(
506
+ model_version=job_meta.output_model_version,
507
+ model_name=self._make_vertexai_model_name(
508
+ entity_id=job_meta.output_model.name
509
+ ),
510
+ )
511
+ if job_meta.output_model and job_meta.output_model_version
512
+ else None
513
+ )
514
+
515
+ result_type = get_job_result_status(job)
516
+
517
+ yield from MetadataChangeProposalWrapper.construct_many(
518
+ job_urn,
519
+ aspects=[
520
+ DataProcessInstancePropertiesClass(
521
+ name=job.display_name,
522
+ created=AuditStampClass(
523
+ time=created_time,
524
+ actor="urn:li:corpuser:datahub",
525
+ ),
526
+ externalUrl=self._make_job_external_url(job),
527
+ customProperties={
528
+ "jobType": job.__class__.__name__,
529
+ },
530
+ ),
531
+ MLTrainingRunProperties(
532
+ externalUrl=self._make_job_external_url(job), id=job.name
533
+ ),
534
+ SubTypesClass(typeNames=[MLAssetSubTypes.VERTEX_TRAINING_JOB]),
535
+ ContainerClass(container=self._get_project_container().as_urn()),
536
+ DataPlatformInstanceClass(platform=str(DataPlatformUrn(self.platform))),
537
+ (
538
+ DataProcessInstanceInputClass(
539
+ inputs=[],
540
+ inputEdges=[
541
+ EdgeClass(destinationUrn=dataset_urn),
542
+ ],
543
+ )
544
+ if dataset_urn
545
+ else None
546
+ ),
547
+ (
548
+ DataProcessInstanceOutputClass(
549
+ outputs=[],
550
+ outputEdges=[
551
+ EdgeClass(destinationUrn=model_urn),
552
+ ],
553
+ )
554
+ if model_urn
555
+ else None
556
+ ),
557
+ (
558
+ DataProcessInstanceRunEventClass(
559
+ status=DataProcessRunStatusClass.COMPLETE,
560
+ timestampMillis=created_time,
561
+ result=DataProcessInstanceRunResultClass(
562
+ type=result_type,
563
+ nativeResultType=self.platform,
564
+ ),
565
+ durationMillis=duration,
566
+ )
567
+ if is_status_for_run_event_class(result_type) and duration
568
+ else None
569
+ ),
570
+ ],
571
+ )
572
+
573
+ def _gen_ml_group_mcps(
574
+ self,
575
+ model: Model,
576
+ ) -> Iterable[MetadataChangeProposalWrapper]:
577
+ """
578
+ Generate an MLModelGroup mcp for a VertexAI Model.
579
+ """
580
+ ml_model_group_urn = self._make_ml_model_group_urn(model)
581
+
582
+ yield from MetadataChangeProposalWrapper.construct_many(
583
+ ml_model_group_urn,
584
+ aspects=[
585
+ MLModelGroupPropertiesClass(
586
+ name=model.display_name,
587
+ description=model.description,
588
+ created=(
589
+ TimeStampClass(
590
+ time=datetime_to_ts_millis(model.create_time),
591
+ actor="urn:li:corpuser:datahub",
592
+ )
593
+ if model.create_time
594
+ else None
595
+ ),
596
+ lastModified=(
597
+ TimeStampClass(
598
+ time=datetime_to_ts_millis(model.update_time),
599
+ actor="urn:li:corpuser:datahub",
600
+ )
601
+ if model.update_time
602
+ else None
603
+ ),
604
+ customProperties=None,
605
+ ),
606
+ SubTypesClass(typeNames=[MLAssetSubTypes.VERTEX_MODEL_GROUP]),
607
+ ContainerClass(container=self._get_project_container().as_urn()),
608
+ DataPlatformInstanceClass(platform=str(DataPlatformUrn(self.platform))),
609
+ ],
610
+ )
611
+
612
+ def _make_ml_model_group_urn(self, model: Model) -> str:
613
+ urn = builder.make_ml_model_group_urn(
614
+ platform=self.platform,
615
+ group_name=self._make_vertexai_model_group_name(model.name),
616
+ env=self.config.env,
617
+ )
618
+ return urn
619
+
620
+ def _get_project_container(self) -> ProjectIdKey:
621
+ return ProjectIdKey(project_id=self.config.project_id, platform=self.platform)
622
+
623
+ def _is_automl_job(self, job: VertexAiResourceNoun) -> bool:
624
+ return isinstance(
625
+ job,
626
+ (
627
+ AutoMLTabularTrainingJob,
628
+ AutoMLTextTrainingJob,
629
+ AutoMLImageTrainingJob,
630
+ AutoMLVideoTrainingJob,
631
+ AutoMLForecastingTrainingJob,
632
+ ),
633
+ )
634
+
635
+ def _search_model_version(
636
+ self, model: Model, version_id: str
637
+ ) -> Optional[VersionInfo]:
638
+ for version in model.versioning_registry.list_versions():
639
+ if version.version_id == version_id:
640
+ return version
641
+ return None
642
+
643
+ def _search_dataset(self, dataset_id: str) -> Optional[VertexAiResourceNoun]:
644
+ """
645
+ Search for a dataset by its ID in Vertex AI.
646
+ This method iterates through different types of datasets (Text, Tabular, Image,
647
+ TimeSeries, and Video) to find a dataset that matches the given dataset ID.
648
+ """
649
+
650
+ dataset_types = [
651
+ "TextDataset",
652
+ "TabularDataset",
653
+ "ImageDataset",
654
+ "TimeSeriesDataset",
655
+ "VideoDataset",
656
+ ]
657
+
658
+ if self.datasets is None:
659
+ self.datasets = {}
660
+
661
+ for dtype in dataset_types:
662
+ dataset_class = getattr(self.client.datasets, dtype)
663
+ for ds in dataset_class.list():
664
+ self.datasets[ds.name] = ds
665
+
666
+ return self.datasets.get(dataset_id)
667
+
668
+ def _get_input_dataset_mcps(
669
+ self, job_meta: TrainingJobMetadata
670
+ ) -> Iterable[MetadataChangeProposalWrapper]:
671
+ """
672
+ Create a DatasetPropertiesClass aspect for a given Vertex AI dataset.
673
+ """
674
+ ds = job_meta.input_dataset
675
+
676
+ if ds:
677
+ # Create URN of Input Dataset for Training Job
678
+ dataset_name = self._make_vertexai_dataset_name(entity_id=ds.name)
679
+ dataset_urn = builder.make_dataset_urn(
680
+ platform=self.platform,
681
+ name=dataset_name,
682
+ env=self.config.env,
683
+ )
684
+
685
+ yield from MetadataChangeProposalWrapper.construct_many(
686
+ dataset_urn,
687
+ aspects=[
688
+ DatasetPropertiesClass(
689
+ name=ds.display_name,
690
+ created=(
691
+ TimeStampClass(time=datetime_to_ts_millis(ds.create_time))
692
+ if ds.create_time
693
+ else None
694
+ ),
695
+ description=ds.display_name,
696
+ customProperties={
697
+ "resourceName": ds.resource_name,
698
+ },
699
+ qualifiedName=ds.resource_name,
700
+ ),
701
+ SubTypesClass(typeNames=[MLAssetSubTypes.VERTEX_DATASET]),
702
+ ContainerClass(container=self._get_project_container().as_urn()),
703
+ DataPlatformInstanceClass(
704
+ platform=str(DataPlatformUrn(self.platform))
705
+ ),
706
+ ],
707
+ )
708
+
709
+ def _get_training_job_metadata(
710
+ self, job: VertexAiResourceNoun
711
+ ) -> TrainingJobMetadata:
712
+ """
713
+ Retrieve metadata for a given Vertex AI training job.
714
+ This method extracts metadata for a Vertex AI training job, including input datasets
715
+ and output models. It checks if the job is an AutoML job and retrieves the relevant
716
+ input dataset and output model information.
717
+ """
718
+ job_meta = TrainingJobMetadata(job=job)
719
+ # Check if the job is an AutoML job
720
+ if self._is_automl_job(job):
721
+ job_conf = job.to_dict()
722
+ # Check if input dataset is present in the job configuration
723
+ if (
724
+ "inputDataConfig" in job_conf
725
+ and "datasetId" in job_conf["inputDataConfig"]
726
+ ):
727
+ # Create URN of Input Dataset for Training Job
728
+ dataset_id = job_conf["inputDataConfig"]["datasetId"]
729
+ logger.info(
730
+ f"Found input dataset (id: {dataset_id}) for training job ({job.display_name})"
731
+ )
732
+
733
+ if dataset_id:
734
+ input_ds = self._search_dataset(dataset_id)
735
+ if input_ds:
736
+ logger.info(
737
+ f"Found the name of input dataset ({input_ds.display_name}) with dataset id ({dataset_id})"
738
+ )
739
+ job_meta.input_dataset = input_ds
740
+
741
+ # Check if output model is present in the job configuration
742
+ if (
743
+ "modelToUpload" in job_conf
744
+ and "name" in job_conf["modelToUpload"]
745
+ and job_conf["modelToUpload"]["name"]
746
+ and job_conf["modelToUpload"]["versionId"]
747
+ ):
748
+ model_name = job_conf["modelToUpload"]["name"]
749
+ model_version_str = job_conf["modelToUpload"]["versionId"]
750
+ try:
751
+ model = Model(model_name=model_name)
752
+ model_version = self._search_model_version(model, model_version_str)
753
+ if model and model_version:
754
+ logger.info(
755
+ f"Found output model (name:{model.display_name} id:{model_version_str}) "
756
+ f"for training job: {job.display_name}"
757
+ )
758
+ job_meta.output_model = model
759
+ job_meta.output_model_version = model_version
760
+ except GoogleAPICallError as e:
761
+ self.report.report_failure(
762
+ title="Unable to fetch model and model version",
763
+ message="Encountered an error while fetching output model and model version which training job generates",
764
+ exc=e,
765
+ )
766
+
767
+ return job_meta
768
+
769
+ def _gen_endpoints_mcps(
770
+ self, model_meta: ModelMetadata
771
+ ) -> Iterable[MetadataChangeProposalWrapper]:
772
+ model: Model = model_meta.model
773
+ model_version: VersionInfo = model_meta.model_version
774
+
775
+ if model_meta.endpoints:
776
+ for endpoint in model_meta.endpoints:
777
+ endpoint_urn = builder.make_ml_model_deployment_urn(
778
+ platform=self.platform,
779
+ deployment_name=self._make_vertexai_endpoint_name(
780
+ entity_id=endpoint.name
781
+ ),
782
+ env=self.config.env,
783
+ )
784
+
785
+ yield from MetadataChangeProposalWrapper.construct_many(
786
+ entityUrn=endpoint_urn,
787
+ aspects=[
788
+ MLModelDeploymentPropertiesClass(
789
+ description=model.description,
790
+ createdAt=datetime_to_ts_millis(endpoint.create_time),
791
+ version=VersionTagClass(
792
+ versionTag=str(model_version.version_id)
793
+ ),
794
+ customProperties={"displayName": endpoint.display_name},
795
+ ),
796
+ ContainerClass(
797
+ container=self._get_project_container().as_urn()
798
+ ),
799
+ # TODO add Subtype when metadata for MLModelDeployment is updated (not supported)
800
+ # SubTypesClass(typeNames=[MLTypes.ENDPOINT])
801
+ ],
802
+ )
803
+
804
+ def _gen_ml_model_mcps(
805
+ self, ModelMetadata: ModelMetadata
806
+ ) -> Iterable[MetadataChangeProposalWrapper]:
807
+ """
808
+ Generate an MLModel and Endpoint mcp for an VertexAI Model Version.
809
+ """
810
+
811
+ model: Model = ModelMetadata.model
812
+ model_version: VersionInfo = ModelMetadata.model_version
813
+ training_job_urn: Optional[str] = ModelMetadata.training_job_urn
814
+ endpoints: Optional[List[Endpoint]] = ModelMetadata.endpoints
815
+ endpoint_urns: List[str] = list()
816
+
817
+ logging.info(f"generating model mcp for {model.name}")
818
+
819
+ # Generate list of endpoint URL
820
+ if endpoints:
821
+ for endpoint in endpoints:
822
+ logger.info(
823
+ f"found endpoint ({endpoint.display_name}) for model ({model.resource_name})"
824
+ )
825
+ endpoint_urns.append(
826
+ builder.make_ml_model_deployment_urn(
827
+ platform=self.platform,
828
+ deployment_name=self._make_vertexai_endpoint_name(
829
+ entity_id=endpoint.display_name
830
+ ),
831
+ env=self.config.env,
832
+ )
833
+ )
834
+
835
+ # Create URN for Model and Model Version
836
+ model_group_urn = self._make_ml_model_group_urn(model)
837
+ model_name = self._make_vertexai_model_name(entity_id=model.name)
838
+ model_urn = self._make_ml_model_urn(model_version, model_name=model_name)
839
+
840
+ yield from MetadataChangeProposalWrapper.construct_many(
841
+ entityUrn=model_urn,
842
+ aspects=[
843
+ MLModelPropertiesClass(
844
+ name=f"{model.display_name}_{model_version.version_id}",
845
+ description=model_version.version_description,
846
+ customProperties={
847
+ "versionId": f"{model_version.version_id}",
848
+ "resourceName": model.resource_name,
849
+ },
850
+ created=(
851
+ TimeStampClass(
852
+ time=datetime_to_ts_millis(
853
+ model_version.version_create_time
854
+ ),
855
+ actor="urn:li:corpuser:datahub",
856
+ )
857
+ if model_version.version_create_time
858
+ else None
859
+ ),
860
+ lastModified=(
861
+ TimeStampClass(
862
+ time=datetime_to_ts_millis(
863
+ model_version.version_update_time
864
+ ),
865
+ actor="urn:li:corpuser:datahub",
866
+ )
867
+ if model_version.version_update_time
868
+ else None
869
+ ),
870
+ version=VersionTagClass(versionTag=str(model_version.version_id)),
871
+ groups=[model_group_urn], # link model version to model group
872
+ trainingJobs=(
873
+ [training_job_urn] if training_job_urn else None
874
+ ), # link to training job
875
+ deployments=endpoint_urns,
876
+ externalUrl=self._make_model_version_external_url(model),
877
+ type="ML Model",
878
+ ),
879
+ ContainerClass(
880
+ container=self._get_project_container().as_urn(),
881
+ ),
882
+ SubTypesClass(typeNames=[MLAssetSubTypes.VERTEX_MODEL]),
883
+ DataPlatformInstanceClass(platform=str(DataPlatformUrn(self.platform))),
884
+ VersionPropertiesClass(
885
+ version=VersionTagClass(
886
+ versionTag=str(model_version.version_id),
887
+ metadataAttribution=(
888
+ MetadataAttributionClass(
889
+ time=int(
890
+ model_version.version_create_time.timestamp() * 1000
891
+ ),
892
+ actor="urn:li:corpuser:datahub",
893
+ )
894
+ if model_version.version_create_time
895
+ else None
896
+ ),
897
+ ),
898
+ versionSet=str(self._get_version_set_urn(model)),
899
+ sortId=str(model_version.version_id).zfill(10),
900
+ aliases=None,
901
+ ),
902
+ ],
903
+ )
904
+
905
+ def _get_version_set_urn(self, model: Model) -> VersionSetUrn:
906
+ guid_dict = {"platform": self.platform, "name": model.name}
907
+ version_set_urn = VersionSetUrn(
908
+ id=builder.datahub_guid(guid_dict),
909
+ entity_type=MlModelUrn.ENTITY_TYPE,
910
+ )
911
+ return version_set_urn
912
+
913
+ def _search_endpoint(self, model: Model) -> List[Endpoint]:
914
+ """
915
+ Search for an endpoint associated with the model.
916
+ """
917
+ if self.endpoints is None:
918
+ endpoint_dict: Dict[str, List[Endpoint]] = {}
919
+ for endpoint in self.client.Endpoint.list():
920
+ for resource in endpoint.list_models():
921
+ if resource.model not in endpoint_dict:
922
+ endpoint_dict[resource.model] = []
923
+ endpoint_dict[resource.model].append(endpoint)
924
+ self.endpoints = endpoint_dict
925
+
926
+ return self.endpoints.get(model.resource_name, [])
927
+
928
+ def _make_experiment_run_urn(
929
+ self, experiment: Experiment, run: ExperimentRun
930
+ ) -> str:
931
+ return builder.make_data_process_instance_urn(
932
+ self._make_vertexai_experiment_run_name(
933
+ entity_id=f"{experiment.name}-{run.name}"
934
+ )
935
+ )
936
+
937
+ def _make_ml_model_urn(self, model_version: VersionInfo, model_name: str) -> str:
938
+ urn = builder.make_ml_model_urn(
939
+ platform=self.platform,
940
+ model_name=f"{model_name}_{model_version.version_id}",
941
+ env=self.config.env,
942
+ )
943
+ return urn
944
+
945
+ def _make_training_job_urn(self, job: VertexAiResourceNoun) -> str:
946
+ job_id = self._make_vertexai_job_name(entity_id=job.name)
947
+ urn = builder.make_data_process_instance_urn(dataProcessInstanceId=job_id)
948
+ return urn
949
+
950
+ def _make_vertexai_model_group_name(
951
+ self,
952
+ entity_id: str,
953
+ ) -> str:
954
+ return f"{self.config.project_id}.model_group.{entity_id}"
955
+
956
+ def _make_vertexai_endpoint_name(self, entity_id: str) -> str:
957
+ return f"{self.config.project_id}.endpoint.{entity_id}"
958
+
959
+ def _make_vertexai_model_name(self, entity_id: str) -> str:
960
+ return f"{self.config.project_id}.model.{entity_id}"
961
+
962
+ def _make_vertexai_dataset_name(self, entity_id: str) -> str:
963
+ return f"{self.config.project_id}.dataset.{entity_id}"
964
+
965
+ def _make_vertexai_job_name(
966
+ self,
967
+ entity_id: Optional[str],
968
+ ) -> str:
969
+ return f"{self.config.project_id}.job.{entity_id}"
970
+
971
+ def _make_vertexai_experiment_name(self, entity_id: Optional[str]) -> str:
972
+ return f"{self.config.project_id}.experiment.{entity_id}"
973
+
974
+ def _make_vertexai_experiment_run_name(self, entity_id: Optional[str]) -> str:
975
+ return f"{self.config.project_id}.experiment_run.{entity_id}"
976
+
977
+ def _make_vertexai_run_execution_name(self, entity_id: Optional[str]) -> str:
978
+ return f"{self.config.project_id}.execution.{entity_id}"
979
+
980
+ def _make_artifact_external_url(
981
+ self, experiment: Experiment, run: ExperimentRun
982
+ ) -> str:
983
+ """
984
+ Model external URL in Vertex AI
985
+ Sample URL:
986
+ https://console.cloud.google.com/vertex-ai/experiments/locations/us-west2/experiments/test-experiment-job-metadata/runs/test-experiment-job-metadata-run-3/artifacts?project=acryl-poc
987
+ """
988
+ external_url: str = (
989
+ f"{self.config.vertexai_url}/experiments/locations/{self.config.region}/experiments/{experiment.name}/runs/{experiment.name}-{run.name}/artifacts"
990
+ f"?project={self.config.project_id}"
991
+ )
992
+ return external_url
993
+
994
+ def _make_job_external_url(self, job: VertexAiResourceNoun) -> str:
995
+ """
996
+ Model external URL in Vertex AI
997
+ Sample URLs:
998
+ https://console.cloud.google.com/vertex-ai/training/training-pipelines?project=acryl-poc&trainingPipelineId=5401695018589093888
999
+ """
1000
+ external_url: str = (
1001
+ f"{self.config.vertexai_url}/training/training-pipelines?trainingPipelineId={job.name}"
1002
+ f"?project={self.config.project_id}"
1003
+ )
1004
+ return external_url
1005
+
1006
+ def _make_model_external_url(self, model: Model) -> str:
1007
+ """
1008
+ Model external URL in Vertex AI
1009
+ Sample URL:
1010
+ https://console.cloud.google.com/vertex-ai/models/locations/us-west2/models/812468724182286336?project=acryl-poc
1011
+ """
1012
+ external_url: str = (
1013
+ f"{self.config.vertexai_url}/models/locations/{self.config.region}/models/{model.name}"
1014
+ f"?project={self.config.project_id}"
1015
+ )
1016
+ return external_url
1017
+
1018
+ def _make_model_version_external_url(self, model: Model) -> str:
1019
+ """
1020
+ Model Version external URL in Vertex AI
1021
+ Sample URL:
1022
+ https://console.cloud.google.com/vertex-ai/models/locations/us-west2/models/812468724182286336/versions/1?project=acryl-poc
1023
+ """
1024
+ external_url: str = (
1025
+ f"{self.config.vertexai_url}/models/locations/{self.config.region}/models/{model.name}"
1026
+ f"/versions/{model.version_id}"
1027
+ f"?project={self.config.project_id}"
1028
+ )
1029
+ return external_url
1030
+
1031
+ def _make_experiment_external_url(self, experiment: Experiment) -> str:
1032
+ """
1033
+ Experiment external URL in Vertex AI
1034
+ https://console.cloud.google.com/vertex-ai/experiments/locations/us-west2/experiments/experiment-run-with-automljob-1/runs?project=acryl-poc
1035
+ """
1036
+
1037
+ external_url: str = (
1038
+ f"{self.config.vertexai_url}/experiments/locations/{self.config.region}/experiments/{experiment.name}"
1039
+ f"/runs?project={self.config.project_id}"
1040
+ )
1041
+ return external_url
1042
+
1043
+ def _make_experiment_run_external_url(
1044
+ self, experiment: Experiment, run: ExperimentRun
1045
+ ) -> str:
1046
+ """
1047
+ Experiment Run external URL in Vertex AI
1048
+ https://console.cloud.google.com/vertex-ai/experiments/locations/us-west2/experiments/experiment-run-with-automljob-1/runs/experiment-run-with-automljob-1-automl-job-with-run-1/charts?project=acryl-poc
1049
+ """
1050
+
1051
+ external_url: str = (
1052
+ f"{self.config.vertexai_url}/experiments/locations/{self.config.region}/experiments/{experiment.name}"
1053
+ f"/runs/{experiment.name}-{run.name}/charts?project={self.config.project_id}"
1054
+ )
1055
+ return external_url