acryl-datahub 1.0.0rc17__py3-none-any.whl → 1.0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

Files changed (107) hide show
  1. {acryl_datahub-1.0.0rc17.dist-info → acryl_datahub-1.0.0.1.dist-info}/METADATA +2426 -2427
  2. {acryl_datahub-1.0.0rc17.dist-info → acryl_datahub-1.0.0.1.dist-info}/RECORD +106 -89
  3. {acryl_datahub-1.0.0rc17.dist-info → acryl_datahub-1.0.0.1.dist-info}/WHEEL +1 -1
  4. {acryl_datahub-1.0.0rc17.dist-info → acryl_datahub-1.0.0.1.dist-info}/entry_points.txt +2 -1
  5. datahub/_version.py +1 -1
  6. datahub/api/entities/dataset/dataset.py +1 -28
  7. datahub/cli/specific/dataset_cli.py +26 -10
  8. datahub/emitter/mce_builder.py +1 -3
  9. datahub/emitter/mcp_builder.py +8 -0
  10. datahub/emitter/request_helper.py +19 -14
  11. datahub/emitter/response_helper.py +25 -18
  12. datahub/emitter/rest_emitter.py +23 -7
  13. datahub/errors.py +8 -0
  14. datahub/ingestion/api/source.py +7 -2
  15. datahub/ingestion/api/source_helpers.py +14 -2
  16. datahub/ingestion/extractor/schema_util.py +1 -0
  17. datahub/ingestion/graph/client.py +26 -20
  18. datahub/ingestion/graph/filters.py +62 -17
  19. datahub/ingestion/sink/datahub_rest.py +2 -2
  20. datahub/ingestion/source/cassandra/cassandra.py +1 -10
  21. datahub/ingestion/source/common/data_platforms.py +23 -0
  22. datahub/ingestion/source/common/gcp_credentials_config.py +6 -0
  23. datahub/ingestion/source/common/subtypes.py +17 -1
  24. datahub/ingestion/source/data_lake_common/path_spec.py +21 -1
  25. datahub/ingestion/source/dbt/dbt_common.py +6 -4
  26. datahub/ingestion/source/dbt/dbt_core.py +4 -6
  27. datahub/ingestion/source/dbt/dbt_tests.py +8 -6
  28. datahub/ingestion/source/dremio/dremio_datahub_source_mapping.py +1 -1
  29. datahub/ingestion/source/dremio/dremio_entities.py +6 -5
  30. datahub/ingestion/source/dremio/dremio_source.py +96 -117
  31. datahub/ingestion/source/gc/soft_deleted_entity_cleanup.py +101 -104
  32. datahub/ingestion/source/ge_data_profiler.py +11 -1
  33. datahub/ingestion/source/hex/__init__.py +0 -0
  34. datahub/ingestion/source/hex/api.py +394 -0
  35. datahub/ingestion/source/hex/constants.py +3 -0
  36. datahub/ingestion/source/hex/hex.py +167 -0
  37. datahub/ingestion/source/hex/mapper.py +372 -0
  38. datahub/ingestion/source/hex/model.py +68 -0
  39. datahub/ingestion/source/iceberg/iceberg.py +193 -140
  40. datahub/ingestion/source/iceberg/iceberg_profiler.py +21 -18
  41. datahub/ingestion/source/mlflow.py +217 -8
  42. datahub/ingestion/source/mode.py +11 -1
  43. datahub/ingestion/source/openapi.py +69 -34
  44. datahub/ingestion/source/powerbi/config.py +31 -4
  45. datahub/ingestion/source/powerbi/m_query/data_classes.py +1 -0
  46. datahub/ingestion/source/powerbi/m_query/pattern_handler.py +111 -10
  47. datahub/ingestion/source/powerbi/m_query/resolver.py +10 -0
  48. datahub/ingestion/source/powerbi/powerbi.py +41 -24
  49. datahub/ingestion/source/powerbi/rest_api_wrapper/powerbi_api.py +11 -11
  50. datahub/ingestion/source/redshift/lineage_v2.py +9 -1
  51. datahub/ingestion/source/redshift/query.py +1 -1
  52. datahub/ingestion/source/s3/source.py +11 -0
  53. datahub/ingestion/source/sigma/config.py +3 -4
  54. datahub/ingestion/source/sigma/sigma.py +10 -6
  55. datahub/ingestion/source/slack/slack.py +399 -82
  56. datahub/ingestion/source/snowflake/constants.py +1 -0
  57. datahub/ingestion/source/snowflake/snowflake_config.py +14 -1
  58. datahub/ingestion/source/snowflake/snowflake_queries.py +16 -13
  59. datahub/ingestion/source/snowflake/snowflake_query.py +17 -0
  60. datahub/ingestion/source/snowflake/snowflake_report.py +3 -0
  61. datahub/ingestion/source/snowflake/snowflake_schema.py +29 -0
  62. datahub/ingestion/source/snowflake/snowflake_schema_gen.py +112 -42
  63. datahub/ingestion/source/snowflake/snowflake_utils.py +25 -1
  64. datahub/ingestion/source/sql/mssql/job_models.py +15 -1
  65. datahub/ingestion/source/sql/mssql/source.py +8 -4
  66. datahub/ingestion/source/sql/oracle.py +51 -4
  67. datahub/ingestion/source/sql/stored_procedures/__init__.py +0 -0
  68. datahub/ingestion/source/sql/stored_procedures/base.py +242 -0
  69. datahub/ingestion/source/sql/{mssql/stored_procedure_lineage.py → stored_procedures/lineage.py} +1 -29
  70. datahub/ingestion/source/superset.py +291 -35
  71. datahub/ingestion/source/usage/usage_common.py +0 -65
  72. datahub/ingestion/source/vertexai/__init__.py +0 -0
  73. datahub/ingestion/source/vertexai/vertexai.py +1055 -0
  74. datahub/ingestion/source/vertexai/vertexai_config.py +29 -0
  75. datahub/ingestion/source/vertexai/vertexai_result_type_utils.py +68 -0
  76. datahub/metadata/_schema_classes.py +472 -1
  77. datahub/metadata/com/linkedin/pegasus2avro/dataplatform/slack/__init__.py +15 -0
  78. datahub/metadata/com/linkedin/pegasus2avro/event/__init__.py +11 -0
  79. datahub/metadata/com/linkedin/pegasus2avro/event/notification/__init__.py +15 -0
  80. datahub/metadata/com/linkedin/pegasus2avro/event/notification/settings/__init__.py +19 -0
  81. datahub/metadata/schema.avsc +313 -2
  82. datahub/metadata/schemas/CorpUserEditableInfo.avsc +14 -0
  83. datahub/metadata/schemas/CorpUserKey.avsc +2 -1
  84. datahub/metadata/schemas/CorpUserSettings.avsc +95 -0
  85. datahub/metadata/schemas/DataProcessInstanceInput.avsc +2 -1
  86. datahub/metadata/schemas/DataProcessInstanceOutput.avsc +2 -1
  87. datahub/metadata/schemas/Deprecation.avsc +2 -0
  88. datahub/metadata/schemas/MLModelGroupProperties.avsc +16 -0
  89. datahub/metadata/schemas/MetadataChangeEvent.avsc +32 -0
  90. datahub/metadata/schemas/QueryProperties.avsc +20 -0
  91. datahub/metadata/schemas/Siblings.avsc +2 -0
  92. datahub/metadata/schemas/SlackUserInfo.avsc +160 -0
  93. datahub/sdk/__init__.py +1 -0
  94. datahub/sdk/dataset.py +122 -0
  95. datahub/sdk/entity.py +99 -3
  96. datahub/sdk/entity_client.py +27 -3
  97. datahub/sdk/main_client.py +24 -1
  98. datahub/sdk/search_client.py +81 -8
  99. datahub/sdk/search_filters.py +94 -37
  100. datahub/sql_parsing/split_statements.py +17 -3
  101. datahub/sql_parsing/sql_parsing_aggregator.py +6 -0
  102. datahub/sql_parsing/tool_meta_extractor.py +27 -2
  103. datahub/testing/mcp_diff.py +1 -18
  104. datahub/utilities/threaded_iterator_executor.py +16 -3
  105. datahub/ingestion/source/vertexai.py +0 -697
  106. {acryl_datahub-1.0.0rc17.dist-info → acryl_datahub-1.0.0.1.dist-info/licenses}/LICENSE +0 -0
  107. {acryl_datahub-1.0.0rc17.dist-info → acryl_datahub-1.0.0.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,394 @@
1
+ import logging
2
+ from dataclasses import dataclass
3
+ from datetime import datetime, timezone
4
+ from typing import Any, Dict, Generator, List, Optional, Union
5
+
6
+ import requests
7
+ from pydantic import BaseModel, Field, ValidationError, validator
8
+ from typing_extensions import assert_never
9
+
10
+ from datahub.ingestion.api.source import SourceReport
11
+ from datahub.ingestion.source.hex.constants import (
12
+ HEX_API_BASE_URL_DEFAULT,
13
+ HEX_API_PAGE_SIZE_DEFAULT,
14
+ )
15
+ from datahub.ingestion.source.hex.model import (
16
+ Analytics,
17
+ Category,
18
+ Collection,
19
+ Component,
20
+ Owner,
21
+ Project,
22
+ Status,
23
+ )
24
+ from datahub.utilities.str_enum import StrEnum
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+ # The following models were Claude-generated from Hex API OpenAPI definition https://static.hex.site/openapi.json
29
+ # To be exclusively used internally for the deserialization of the API response
30
+
31
+
32
+ class HexApiAppViewStats(BaseModel):
33
+ """App view analytics data model."""
34
+
35
+ all_time: Optional[int] = Field(default=None, alias="allTime")
36
+ last_seven_days: Optional[int] = Field(default=None, alias="lastSevenDays")
37
+ last_fourteen_days: Optional[int] = Field(default=None, alias="lastFourteenDays")
38
+ last_thirty_days: Optional[int] = Field(default=None, alias="lastThirtyDays")
39
+
40
+
41
+ class HexApiProjectAnalytics(BaseModel):
42
+ """Analytics data model for projects."""
43
+
44
+ app_views: Optional[HexApiAppViewStats] = Field(default=None, alias="appViews")
45
+ last_viewed_at: Optional[datetime] = Field(default=None, alias="lastViewedAt")
46
+ published_results_updated_at: Optional[datetime] = Field(
47
+ default=None, alias="publishedResultsUpdatedAt"
48
+ )
49
+
50
+ @validator("last_viewed_at", "published_results_updated_at", pre=True)
51
+ def parse_datetime(cls, value):
52
+ if value is None:
53
+ return None
54
+ if isinstance(value, str):
55
+ return datetime.strptime(value, "%Y-%m-%dT%H:%M:%S.%fZ").replace(
56
+ tzinfo=timezone.utc
57
+ )
58
+ return value
59
+
60
+
61
+ class HexApiProjectStatus(BaseModel):
62
+ """Project status model."""
63
+
64
+ name: str
65
+
66
+
67
+ class HexApiCategory(BaseModel):
68
+ """Category model."""
69
+
70
+ name: str
71
+ description: Optional[str] = None
72
+
73
+
74
+ class HexApiReviews(BaseModel):
75
+ """Reviews configuration model."""
76
+
77
+ required: bool
78
+
79
+
80
+ class HexApiUser(BaseModel):
81
+ """User model."""
82
+
83
+ email: str
84
+
85
+
86
+ class HexApiAccessType(StrEnum):
87
+ """Access type enum."""
88
+
89
+ NONE = "NONE"
90
+ VIEW = "VIEW"
91
+ EDIT = "EDIT"
92
+ FULL_ACCESS = "FULL_ACCESS"
93
+
94
+
95
+ class HexApiUserAccess(BaseModel):
96
+ """User access model."""
97
+
98
+ user: HexApiUser
99
+ access: Optional[HexApiAccessType] = None
100
+
101
+
102
+ class HexApiCollectionData(BaseModel):
103
+ """Collection data model."""
104
+
105
+ name: str
106
+
107
+
108
+ class HexApiCollectionAccess(BaseModel):
109
+ """Collection access model."""
110
+
111
+ collection: HexApiCollectionData
112
+ access: Optional[HexApiAccessType] = None
113
+
114
+
115
+ class HexApiAccessSettings(BaseModel):
116
+ """Access settings model."""
117
+
118
+ access: Optional[HexApiAccessType] = None
119
+
120
+
121
+ class HexApiWeeklySchedule(BaseModel):
122
+ """Weekly schedule model."""
123
+
124
+ day_of_week: str = Field(alias="dayOfWeek")
125
+ hour: int
126
+ minute: int
127
+ timezone: str
128
+
129
+
130
+ class HexApiSchedule(BaseModel):
131
+ """Schedule model."""
132
+
133
+ cadence: str
134
+ enabled: bool
135
+ hourly: Optional[Any] = None
136
+ daily: Optional[Any] = None
137
+ weekly: Optional[HexApiWeeklySchedule] = None
138
+ monthly: Optional[Any] = None
139
+ custom: Optional[Any] = None
140
+
141
+
142
+ class HexApiSharing(BaseModel):
143
+ """Sharing configuration model."""
144
+
145
+ users: Optional[List[HexApiUserAccess]] = []
146
+ collections: Optional[List[HexApiCollectionAccess]] = []
147
+ groups: Optional[List[Any]] = []
148
+ workspace: Optional[HexApiAccessSettings] = None
149
+ public_web: Optional[HexApiAccessSettings] = Field(default=None, alias="publicWeb")
150
+ support: Optional[HexApiAccessSettings] = None
151
+
152
+ class Config:
153
+ extra = "ignore" # Allow extra fields in the JSON
154
+
155
+
156
+ class HexApiItemType(StrEnum):
157
+ """Item type enum."""
158
+
159
+ PROJECT = "PROJECT"
160
+ COMPONENT = "COMPONENT"
161
+
162
+
163
+ class HexApiProjectApiResource(BaseModel):
164
+ """Base model for Hex items (projects and components) from the API."""
165
+
166
+ id: str
167
+ title: str
168
+ description: Optional[str] = None
169
+ type: HexApiItemType
170
+ creator: Optional[HexApiUser] = None
171
+ owner: Optional[HexApiUser] = None
172
+ status: Optional[HexApiProjectStatus] = None
173
+ categories: Optional[List[HexApiCategory]] = []
174
+ reviews: Optional[HexApiReviews] = None
175
+ analytics: Optional[HexApiProjectAnalytics] = None
176
+ last_edited_at: Optional[datetime] = Field(default=None, alias="lastEditedAt")
177
+ last_published_at: Optional[datetime] = Field(default=None, alias="lastPublishedAt")
178
+ created_at: Optional[datetime] = Field(default=None, alias="createdAt")
179
+ archived_at: Optional[datetime] = Field(default=None, alias="archivedAt")
180
+ trashed_at: Optional[datetime] = Field(default=None, alias="trashedAt")
181
+ schedules: Optional[List[HexApiSchedule]] = []
182
+ sharing: Optional[HexApiSharing] = None
183
+
184
+ class Config:
185
+ extra = "ignore" # Allow extra fields in the JSON
186
+
187
+ @validator(
188
+ "created_at",
189
+ "last_edited_at",
190
+ "last_published_at",
191
+ "archived_at",
192
+ "trashed_at",
193
+ pre=True,
194
+ )
195
+ def parse_datetime(cls, value):
196
+ if value is None:
197
+ return None
198
+ if isinstance(value, str):
199
+ return datetime.strptime(value, "%Y-%m-%dT%H:%M:%S.%fZ").replace(
200
+ tzinfo=timezone.utc
201
+ )
202
+ return value
203
+
204
+
205
+ class HexApiPageCursors(BaseModel):
206
+ """Pagination cursor model."""
207
+
208
+ after: Optional[str] = None
209
+ before: Optional[str] = None
210
+
211
+
212
+ class HexApiProjectsListResponse(BaseModel):
213
+ """Response model for the list projects API."""
214
+
215
+ values: List[HexApiProjectApiResource]
216
+ pagination: Optional[HexApiPageCursors] = None
217
+
218
+ class Config:
219
+ extra = "ignore" # Allow extra fields in the JSON
220
+
221
+
222
+ @dataclass
223
+ class HexApiReport(SourceReport):
224
+ fetch_projects_page_calls: int = 0
225
+ fetch_projects_page_items: int = 0
226
+
227
+
228
+ class HexApi:
229
+ """https://learn.hex.tech/docs/api/api-reference"""
230
+
231
+ def __init__(
232
+ self,
233
+ token: str,
234
+ report: HexApiReport,
235
+ base_url: str = HEX_API_BASE_URL_DEFAULT,
236
+ page_size: int = HEX_API_PAGE_SIZE_DEFAULT,
237
+ ):
238
+ self.token = token
239
+ self.base_url = base_url
240
+ self.report = report
241
+ self.page_size = page_size
242
+
243
+ def _list_projects_url(self):
244
+ return f"{self.base_url}/projects"
245
+
246
+ def _auth_header(self):
247
+ return {"Authorization": f"Bearer {self.token}"}
248
+
249
+ def fetch_projects(
250
+ self,
251
+ include_components: bool = True,
252
+ include_archived: bool = False,
253
+ include_trashed: bool = False,
254
+ ) -> Generator[Union[Project, Component], None, None]:
255
+ """Fetch all projects and components
256
+
257
+ https://learn.hex.tech/docs/api/api-reference#operation/ListProjects
258
+ """
259
+ params = {
260
+ "includeComponents": include_components,
261
+ "includeArchived": include_archived,
262
+ "includeTrashed": include_trashed,
263
+ "includeSharing": True,
264
+ "limit": self.page_size,
265
+ "after": None,
266
+ "before": None,
267
+ "sortBy": "CREATED_AT",
268
+ "sortDirection": "ASC",
269
+ }
270
+ yield from self._fetch_projects_page(params)
271
+
272
+ while params["after"]:
273
+ yield from self._fetch_projects_page(params)
274
+
275
+ def _fetch_projects_page(
276
+ self, params: Dict[str, Any]
277
+ ) -> Generator[Union[Project, Component], None, None]:
278
+ logger.debug(f"Fetching projects page with params: {params}")
279
+ self.report.fetch_projects_page_calls += 1
280
+ try:
281
+ response = requests.get(
282
+ url=self._list_projects_url(),
283
+ headers=self._auth_header(),
284
+ params=params,
285
+ timeout=30,
286
+ )
287
+ response.raise_for_status()
288
+
289
+ api_response = HexApiProjectsListResponse.parse_obj(response.json())
290
+ logger.info(f"Fetched {len(api_response.values)} items")
291
+ params["after"] = (
292
+ api_response.pagination.after if api_response.pagination else None
293
+ )
294
+
295
+ self.report.fetch_projects_page_items += len(api_response.values)
296
+
297
+ for item in api_response.values:
298
+ try:
299
+ ret = self._map_data_from_model(item)
300
+ yield ret
301
+ except Exception as e:
302
+ self.report.warning(
303
+ title="Incomplete metadata",
304
+ message="Incomplete metadata because of error mapping item",
305
+ context=str(item),
306
+ exc=e,
307
+ )
308
+ except ValidationError as e:
309
+ self.report.failure(
310
+ title="Listing Projects and Components API response parsing error",
311
+ message="Error parsing API response and halting metadata ingestion",
312
+ context=str(response.json()),
313
+ exc=e,
314
+ )
315
+ except (requests.RequestException, Exception) as e:
316
+ self.report.failure(
317
+ title="Listing Projects and Components API request error",
318
+ message="Error fetching Projects and Components and halting metadata ingestion",
319
+ context=str(params),
320
+ exc=e,
321
+ )
322
+
323
+ def _map_data_from_model(
324
+ self, hex_item: HexApiProjectApiResource
325
+ ) -> Union[Project, Component]:
326
+ """
327
+ Maps a HexApi pydantic model parsed from the API to our domain model
328
+ """
329
+
330
+ # Map status
331
+ status = Status(name=hex_item.status.name) if hex_item.status else None
332
+
333
+ # Map categories
334
+ categories = []
335
+ if hex_item.categories:
336
+ categories = [
337
+ Category(name=cat.name, description=cat.description)
338
+ for cat in hex_item.categories
339
+ ]
340
+
341
+ # Map collections
342
+ collections = []
343
+ if hex_item.sharing and hex_item.sharing.collections:
344
+ collections = [
345
+ Collection(name=col.collection.name)
346
+ for col in hex_item.sharing.collections
347
+ ]
348
+
349
+ # Map creator and owner
350
+ creator = Owner(email=hex_item.creator.email) if hex_item.creator else None
351
+ owner = Owner(email=hex_item.owner.email) if hex_item.owner else None
352
+
353
+ # Map analytics
354
+ analytics = None
355
+ if hex_item.analytics and hex_item.analytics.app_views:
356
+ analytics = Analytics(
357
+ appviews_all_time=hex_item.analytics.app_views.all_time,
358
+ appviews_last_7_days=hex_item.analytics.app_views.last_seven_days,
359
+ appviews_last_14_days=hex_item.analytics.app_views.last_fourteen_days,
360
+ appviews_last_30_days=hex_item.analytics.app_views.last_thirty_days,
361
+ last_viewed_at=hex_item.analytics.last_viewed_at,
362
+ )
363
+
364
+ # Create the appropriate domain model based on type
365
+ if hex_item.type == HexApiItemType.PROJECT:
366
+ return Project(
367
+ id=hex_item.id,
368
+ title=hex_item.title,
369
+ description=hex_item.description,
370
+ created_at=hex_item.created_at,
371
+ last_edited_at=hex_item.last_edited_at,
372
+ status=status,
373
+ categories=categories,
374
+ collections=collections,
375
+ creator=creator,
376
+ owner=owner,
377
+ analytics=analytics,
378
+ )
379
+ elif hex_item.type == HexApiItemType.COMPONENT:
380
+ return Component(
381
+ id=hex_item.id,
382
+ title=hex_item.title,
383
+ description=hex_item.description,
384
+ created_at=hex_item.created_at,
385
+ last_edited_at=hex_item.last_edited_at,
386
+ status=status,
387
+ categories=categories,
388
+ collections=collections,
389
+ creator=creator,
390
+ owner=owner,
391
+ analytics=analytics,
392
+ )
393
+ else:
394
+ assert_never(hex_item.type)
@@ -0,0 +1,3 @@
1
+ HEX_PLATFORM_NAME = "hex"
2
+ HEX_API_BASE_URL_DEFAULT = "https://app.hex.tech/api/v1"
3
+ HEX_API_PAGE_SIZE_DEFAULT = 100
@@ -0,0 +1,167 @@
1
+ from typing import Any, Dict, Iterable, List, Optional
2
+
3
+ from pydantic import Field, SecretStr
4
+ from typing_extensions import assert_never
5
+
6
+ from datahub.configuration.common import AllowDenyPattern
7
+ from datahub.configuration.source_common import (
8
+ EnvConfigMixin,
9
+ PlatformInstanceConfigMixin,
10
+ )
11
+ from datahub.ingestion.api.common import PipelineContext
12
+ from datahub.ingestion.api.decorators import (
13
+ SourceCapability,
14
+ SupportStatus,
15
+ capability,
16
+ config_class,
17
+ platform_name,
18
+ support_status,
19
+ )
20
+ from datahub.ingestion.api.source import MetadataWorkUnitProcessor
21
+ from datahub.ingestion.api.workunit import MetadataWorkUnit
22
+ from datahub.ingestion.source.hex.api import HexApi, HexApiReport
23
+ from datahub.ingestion.source.hex.constants import (
24
+ HEX_API_BASE_URL_DEFAULT,
25
+ HEX_API_PAGE_SIZE_DEFAULT,
26
+ HEX_PLATFORM_NAME,
27
+ )
28
+ from datahub.ingestion.source.hex.mapper import Mapper
29
+ from datahub.ingestion.source.hex.model import Component, Project
30
+ from datahub.ingestion.source.state.stale_entity_removal_handler import (
31
+ StaleEntityRemovalHandler,
32
+ StaleEntityRemovalSourceReport,
33
+ StatefulStaleMetadataRemovalConfig,
34
+ )
35
+ from datahub.ingestion.source.state.stateful_ingestion_base import (
36
+ StatefulIngestionConfigBase,
37
+ StatefulIngestionReport,
38
+ StatefulIngestionSourceBase,
39
+ )
40
+
41
+
42
+ class HexSourceConfig(
43
+ StatefulIngestionConfigBase, PlatformInstanceConfigMixin, EnvConfigMixin
44
+ ):
45
+ workspace_name: str = Field(
46
+ description="Hex workspace name. You can find this name in your Hex home page URL: https://app.hex.tech/<workspace_name>",
47
+ )
48
+ token: SecretStr = Field(
49
+ description="Hex API token; either PAT or Workflow token - https://learn.hex.tech/docs/api/api-overview#authentication",
50
+ )
51
+ base_url: str = Field(
52
+ default=HEX_API_BASE_URL_DEFAULT,
53
+ description="Hex API base URL. For most Hex users, this will be https://app.hex.tech/api/v1. "
54
+ "Single-tenant app users should replace this with the URL they use to access Hex.",
55
+ )
56
+ stateful_ingestion: Optional[StatefulStaleMetadataRemovalConfig] = Field(
57
+ default=None,
58
+ description="Configuration for stateful ingestion and stale metadata removal.",
59
+ )
60
+ include_components: bool = Field(
61
+ default=True,
62
+ desciption="Include Hex Components in the ingestion",
63
+ )
64
+ page_size: int = Field(
65
+ default=HEX_API_PAGE_SIZE_DEFAULT,
66
+ description="Number of items to fetch per Hex API call.",
67
+ )
68
+ patch_metadata: bool = Field(
69
+ default=False,
70
+ description="Emit metadata as patch events",
71
+ )
72
+ collections_as_tags: bool = Field(
73
+ default=True,
74
+ description="Emit Hex Collections as tags",
75
+ )
76
+ status_as_tag: bool = Field(
77
+ default=True,
78
+ description="Emit Hex Status as tags",
79
+ )
80
+ categories_as_tags: bool = Field(
81
+ default=True,
82
+ description="Emit Hex Category as tags",
83
+ )
84
+ project_title_pattern: AllowDenyPattern = Field(
85
+ default=AllowDenyPattern.allow_all(),
86
+ description="Regex pattern for project titles to filter in ingestion.",
87
+ )
88
+ component_title_pattern: AllowDenyPattern = Field(
89
+ default=AllowDenyPattern.allow_all(),
90
+ description="Regex pattern for component titles to filter in ingestion.",
91
+ )
92
+ set_ownership_from_email: bool = Field(
93
+ default=True,
94
+ description="Set ownership identity from owner/creator email",
95
+ )
96
+
97
+
98
+ class HexReport(StaleEntityRemovalSourceReport, HexApiReport):
99
+ pass
100
+
101
+
102
+ @platform_name("Hex")
103
+ @config_class(HexSourceConfig)
104
+ @support_status(SupportStatus.TESTING)
105
+ @capability(SourceCapability.DESCRIPTIONS, "Supported by default")
106
+ @capability(SourceCapability.OWNERSHIP, "Supported by default")
107
+ @capability(SourceCapability.PLATFORM_INSTANCE, "Enabled by default")
108
+ @capability(SourceCapability.CONTAINERS, "Enabled by default")
109
+ class HexSource(StatefulIngestionSourceBase):
110
+ def __init__(self, config: HexSourceConfig, ctx: PipelineContext):
111
+ super().__init__(config, ctx)
112
+ self.source_config = config
113
+ self.report = HexReport()
114
+ self.platform = HEX_PLATFORM_NAME
115
+ self.hex_api = HexApi(
116
+ report=self.report,
117
+ token=self.source_config.token.get_secret_value(),
118
+ base_url=self.source_config.base_url,
119
+ page_size=self.source_config.page_size,
120
+ )
121
+ self.mapper = Mapper(
122
+ workspace_name=self.source_config.workspace_name,
123
+ platform_instance=self.source_config.platform_instance,
124
+ env=self.source_config.env,
125
+ base_url=self.source_config.base_url,
126
+ patch_metadata=self.source_config.patch_metadata,
127
+ collections_as_tags=self.source_config.collections_as_tags,
128
+ status_as_tag=self.source_config.status_as_tag,
129
+ categories_as_tags=self.source_config.categories_as_tags,
130
+ set_ownership_from_email=self.source_config.set_ownership_from_email,
131
+ )
132
+
133
+ @classmethod
134
+ def create(cls, config_dict: Dict[str, Any], ctx: PipelineContext) -> "HexSource":
135
+ config = HexSourceConfig.parse_obj(config_dict)
136
+ return cls(config, ctx)
137
+
138
+ def get_workunit_processors(self) -> List[Optional[MetadataWorkUnitProcessor]]:
139
+ return [
140
+ *super().get_workunit_processors(),
141
+ StaleEntityRemovalHandler.create(
142
+ self, self.source_config, self.ctx
143
+ ).workunit_processor,
144
+ ]
145
+
146
+ def get_report(self) -> StatefulIngestionReport:
147
+ return self.report
148
+
149
+ def get_workunits_internal(self) -> Iterable[MetadataWorkUnit]:
150
+ yield from self.mapper.map_workspace()
151
+
152
+ for project_or_component in self.hex_api.fetch_projects():
153
+ if isinstance(project_or_component, Project):
154
+ if self.source_config.project_title_pattern.allowed(
155
+ project_or_component.title
156
+ ):
157
+ yield from self.mapper.map_project(project=project_or_component)
158
+ elif isinstance(project_or_component, Component):
159
+ if (
160
+ self.source_config.include_components
161
+ and self.source_config.component_title_pattern.allowed(
162
+ project_or_component.title
163
+ )
164
+ ):
165
+ yield from self.mapper.map_component(component=project_or_component)
166
+ else:
167
+ assert_never(project_or_component)