acryl-datahub 1.2.0.10rc8__py3-none-any.whl → 1.2.0.11rc2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of acryl-datahub might be problematic. Click here for more details.
- {acryl_datahub-1.2.0.10rc8.dist-info → acryl_datahub-1.2.0.11rc2.dist-info}/METADATA +2624 -2624
- {acryl_datahub-1.2.0.10rc8.dist-info → acryl_datahub-1.2.0.11rc2.dist-info}/RECORD +22 -20
- datahub/_version.py +1 -1
- datahub/configuration/validate_field_removal.py +3 -0
- datahub/ingestion/source/looker/looker_common.py +6 -0
- datahub/ingestion/source/looker/looker_constant.py +4 -0
- datahub/ingestion/source/looker/looker_lib_wrapper.py +36 -3
- datahub/ingestion/source/looker/looker_view_id_cache.py +1 -1
- datahub/ingestion/source/looker/lookml_concept_context.py +1 -1
- datahub/ingestion/source/looker/lookml_config.py +30 -2
- datahub/ingestion/source/looker/lookml_refinement.py +1 -1
- datahub/ingestion/source/looker/lookml_source.py +42 -29
- datahub/ingestion/source/looker/view_upstream.py +494 -1
- datahub/sdk/search_filters.py +122 -1
- datahub/secret/datahub_secret_store.py +3 -0
- datahub/secret/environment_secret_store.py +29 -0
- datahub/secret/file_secret_store.py +49 -0
- datahub/utilities/file_backed_collections.py +7 -8
- {acryl_datahub-1.2.0.10rc8.dist-info → acryl_datahub-1.2.0.11rc2.dist-info}/WHEEL +0 -0
- {acryl_datahub-1.2.0.10rc8.dist-info → acryl_datahub-1.2.0.11rc2.dist-info}/entry_points.txt +0 -0
- {acryl_datahub-1.2.0.10rc8.dist-info → acryl_datahub-1.2.0.11rc2.dist-info}/licenses/LICENSE +0 -0
- {acryl_datahub-1.2.0.10rc8.dist-info → acryl_datahub-1.2.0.11rc2.dist-info}/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
acryl_datahub-1.2.0.
|
|
1
|
+
acryl_datahub-1.2.0.11rc2.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
|
|
2
2
|
datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
|
|
3
3
|
datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
|
|
4
|
-
datahub/_version.py,sha256=
|
|
4
|
+
datahub/_version.py,sha256=wA3SrSgI7aUwEw5r0jMdQQHnFvzLaVy7PuHAjLJoDJI,324
|
|
5
5
|
datahub/entrypoints.py,sha256=9Qf-37rNnTzbGlx8S75OCDazIclFp6zWNcCEL1zCZto,9015
|
|
6
6
|
datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
|
|
7
7
|
datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -115,7 +115,7 @@ datahub/configuration/source_common.py,sha256=G4fkFw-dG0zVMSSsXOZn1ua_w4QJBpVMMq
|
|
|
115
115
|
datahub/configuration/time_window_config.py,sha256=c4mbrgmTobt4t_j6unDeYvmGSlbRB2hAgAst6yq4nHA,5412
|
|
116
116
|
datahub/configuration/toml.py,sha256=Ohc5sAWLPoAinPYL8njyheZ3ak81fC2Sp8IbBbESPGg,380
|
|
117
117
|
datahub/configuration/validate_field_deprecation.py,sha256=szzs0130AXcK6aoYMmvEK5oWXPKcnf-EqRqvE6SuW-U,1377
|
|
118
|
-
datahub/configuration/validate_field_removal.py,sha256=
|
|
118
|
+
datahub/configuration/validate_field_removal.py,sha256=LWVmUD8aDS0WkuNCqBCuIG_wOSZ3G-ANv8WyvKOEdDo,1285
|
|
119
119
|
datahub/configuration/validate_field_rename.py,sha256=o_MPYuVLiLRalhDa-p2pUxzx4_rBU-tQ-3wZFHXAOTg,2287
|
|
120
120
|
datahub/configuration/validate_host_port.py,sha256=dgR9XPreNV_fABOmv2UHYF-OSN6AHD92Zi2nKsfdTiE,867
|
|
121
121
|
datahub/configuration/validate_multiline_string.py,sha256=j5EABUvFOpV7c21eBWZSwy2ryIA2PI67lN5ebyrHv00,1370
|
|
@@ -383,26 +383,26 @@ datahub/ingestion/source/kafka_connect/sink_connectors.py,sha256=QTMY0FmOHkTxfIC
|
|
|
383
383
|
datahub/ingestion/source/kafka_connect/source_connectors.py,sha256=A9q-u5IoV35swvoyMrzT75FVV9-SBeYGhLKDYRge-IQ,23845
|
|
384
384
|
datahub/ingestion/source/looker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
385
385
|
datahub/ingestion/source/looker/lkml_patched.py,sha256=XShEU7Wbz0DubDhYMjKf9wjKZrBJa2XPg9MIjp8rPhk,733
|
|
386
|
-
datahub/ingestion/source/looker/looker_common.py,sha256=
|
|
386
|
+
datahub/ingestion/source/looker/looker_common.py,sha256=vXD6w1VAhxKlFp283WZkRcccGQt1uKij175o5oqO1HM,67912
|
|
387
387
|
datahub/ingestion/source/looker/looker_config.py,sha256=4aYK9FQvH2BOH3IdtcL93QreFKqNC8_gm5OPRjcT2XA,13905
|
|
388
388
|
datahub/ingestion/source/looker/looker_connection.py,sha256=yDmC6lDsHmL2e_Pw8ULylwOIHPWPp_6gT1iyLvD0fTw,2075
|
|
389
|
-
datahub/ingestion/source/looker/looker_constant.py,sha256=
|
|
389
|
+
datahub/ingestion/source/looker/looker_constant.py,sha256=6v8DeeLOgnWXaz5t6Ghl514PYoqe0DTXykX_MJ2kYf0,530
|
|
390
390
|
datahub/ingestion/source/looker/looker_dataclasses.py,sha256=MrDeZ4Nd0wQnJbCoI1qePYlYeObnUw5dvpWcmhKuNgc,12346
|
|
391
391
|
datahub/ingestion/source/looker/looker_file_loader.py,sha256=gb2Z97_w28MsybYe01JFMMqlvBbn2occyUEknf_mYMA,4882
|
|
392
|
-
datahub/ingestion/source/looker/looker_lib_wrapper.py,sha256=
|
|
392
|
+
datahub/ingestion/source/looker/looker_lib_wrapper.py,sha256=M6dVvTLWi58f7WRY1kFkKwA3vRQO_QgtKYvjIjXlRBQ,12708
|
|
393
393
|
datahub/ingestion/source/looker/looker_liquid_tag.py,sha256=27WnOuTghayaH-HL4lLoq0IcHvNm1UybMqMnoaxN8Cs,5383
|
|
394
394
|
datahub/ingestion/source/looker/looker_query_model.py,sha256=N0jBbFruiCIIGT6sJn6tNeppeQ78KGTkOwTLirhxFNc,2144
|
|
395
395
|
datahub/ingestion/source/looker/looker_source.py,sha256=7mRlIJq2DoM1h2y-heNdNoqok8sNl7Qmpwsx0dQsYP8,67273
|
|
396
396
|
datahub/ingestion/source/looker/looker_template_language.py,sha256=5fZFPKFP3IYbJg3jLifjaji4wWg8wRy-1XDvc8Qucus,17949
|
|
397
397
|
datahub/ingestion/source/looker/looker_usage.py,sha256=qFBX7OHtIcarYIqFe0jQMrDV8MMPV_nN4PZrZRUznTw,23029
|
|
398
|
-
datahub/ingestion/source/looker/looker_view_id_cache.py,sha256=
|
|
399
|
-
datahub/ingestion/source/looker/lookml_concept_context.py,sha256=
|
|
400
|
-
datahub/ingestion/source/looker/lookml_config.py,sha256=
|
|
401
|
-
datahub/ingestion/source/looker/lookml_refinement.py,sha256=
|
|
402
|
-
datahub/ingestion/source/looker/lookml_source.py,sha256=
|
|
398
|
+
datahub/ingestion/source/looker/looker_view_id_cache.py,sha256=smgH0Z3OlugaDKU0xSlSA4cggZVhuX8p5TNbeqX24II,4447
|
|
399
|
+
datahub/ingestion/source/looker/lookml_concept_context.py,sha256=s3eSNKchikE9gg30rdW-kOmV2uki_wYUldhW4SgrISU,18066
|
|
400
|
+
datahub/ingestion/source/looker/lookml_config.py,sha256=Z6sqKMwuJWJzE4pIX9Pm1ERtUgx5v0g-PM-SME4Q06M,13138
|
|
401
|
+
datahub/ingestion/source/looker/lookml_refinement.py,sha256=_FV7-8zdZJhntkAaS3FuWXjibq7LQa91fQuolAdav4c,9559
|
|
402
|
+
datahub/ingestion/source/looker/lookml_source.py,sha256=aaFntJjISMV1QeQqzEpMxjvFod0-CX9ONEaAfFvAxOA,43475
|
|
403
403
|
datahub/ingestion/source/looker/str_functions.py,sha256=zceEX2ka_4WaWwWgEdyknUSz7X3GrO951BkwSbF2afo,766
|
|
404
404
|
datahub/ingestion/source/looker/urn_functions.py,sha256=4VvqEfGvIMq3rNHHps0-HlPurMPnpqdxNtDAOOHIZww,528
|
|
405
|
-
datahub/ingestion/source/looker/view_upstream.py,sha256=
|
|
405
|
+
datahub/ingestion/source/looker/view_upstream.py,sha256=Ajc9G1NKWTS7neuN5OOgg9maePrla8aKc1tokiNs7Ic,47489
|
|
406
406
|
datahub/ingestion/source/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
407
407
|
datahub/ingestion/source/metadata/business_glossary.py,sha256=sGOw_5IQxlLxqw7NvvQX4QeCaursuDg4aOyoAN973g4,19709
|
|
408
408
|
datahub/ingestion/source/metadata/lineage.py,sha256=8jtlZqlgrHOfk1SpJ0lXWcyNfNxr0uYB_kvsqTyqFHo,9618
|
|
@@ -980,10 +980,12 @@ datahub/sdk/mlmodel.py,sha256=cO5R8BYVljmQ0w33RIOuZmj4nq8OJCDVAZGTQI6YFS8,12628
|
|
|
980
980
|
datahub/sdk/mlmodelgroup.py,sha256=wlZZHny0UORpF0fRYuVkWLSQwIHX_fWl5lPb1NKR6dM,8194
|
|
981
981
|
datahub/sdk/resolver_client.py,sha256=nKMAZJt2tRSGfKSzoREIh43PXqjM3umLiYkYHJjo1io,3243
|
|
982
982
|
datahub/sdk/search_client.py,sha256=f2L_aOy-pPB9Mk7WdLSJ6Htp3OT4cEgDIrqnCweNtM8,3592
|
|
983
|
-
datahub/sdk/search_filters.py,sha256=
|
|
983
|
+
datahub/sdk/search_filters.py,sha256=EAIsO12rwTDJexQcxiiCsuk_TjKRk8Cx8G1ENqYYnUc,22056
|
|
984
984
|
datahub/secret/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
985
|
-
datahub/secret/datahub_secret_store.py,sha256=
|
|
985
|
+
datahub/secret/datahub_secret_store.py,sha256=qW7JI_k2xJdTZyluiJDaG5B3C68VnDpQk_M_L-Uz2vY,2583
|
|
986
986
|
datahub/secret/datahub_secrets_client.py,sha256=nDmhziKdvseJHlaDVUcAwK8Fv8maeAaG-ktZtWG2b70,1316
|
|
987
|
+
datahub/secret/environment_secret_store.py,sha256=JbTAswfmYycjZc7xQyVOELb-3nc0Kswme_Ml7pjMpic,824
|
|
988
|
+
datahub/secret/file_secret_store.py,sha256=At_EdN_KXNpPh3_zJVa9FPBBxUbA2D43Sua36MXvpL8,1614
|
|
987
989
|
datahub/secret/secret_common.py,sha256=g4anQtYPm7cI6kEJUZHjpBqeCyiUKIim2rJQByaeOoY,2864
|
|
988
990
|
datahub/secret/secret_store.py,sha256=2VP_Vd336Cy7C-2kwp4rx8MAqtYgtwv8XyzzNTXE5x8,1124
|
|
989
991
|
datahub/specific/__init__.py,sha256=r5RYM5mDnskLzin3vc87HV-9GSz3P6uQw8AlsN14LaI,88
|
|
@@ -1044,7 +1046,7 @@ datahub/utilities/dedup_list.py,sha256=dUSpe1AajfuwlHVJKNv-CzDXSCkaw0HgSMOsxqUkQ
|
|
|
1044
1046
|
datahub/utilities/delayed_iter.py,sha256=XlsI0DCXkVVejFKOW_uMT0E8DTqqOHQN3Ooak4EcULE,645
|
|
1045
1047
|
datahub/utilities/delta.py,sha256=hkpF8W7Lvg2gUJBQR3mmIzOxsRQ6i5cchRPFlAVoV10,1128
|
|
1046
1048
|
datahub/utilities/docs_build.py,sha256=uFMK3z1d4BExpsrvguHunidbEDAzQ8hoOP7iQ0A_IVw,211
|
|
1047
|
-
datahub/utilities/file_backed_collections.py,sha256=
|
|
1049
|
+
datahub/utilities/file_backed_collections.py,sha256=eOW7_8CzopvzFk1IATVuGGzQvS4yLQzHR_HU3h6T4kY,21675
|
|
1048
1050
|
datahub/utilities/global_warning_util.py,sha256=adrEl3WhetQ-bymrPINjd976ZFndhbvk3QosUYGsos8,261
|
|
1049
1051
|
datahub/utilities/groupby.py,sha256=pe6rP4ZCttYB98yjbs0Aey8C32aLb7rq-NJ_BFky0H4,524
|
|
1050
1052
|
datahub/utilities/hive_schema_to_avro.py,sha256=YCdq3jNUTij8ehWgX9v6CiOrf5aTCXr4DERcp_-wBbo,11608
|
|
@@ -1121,8 +1123,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
|
|
|
1121
1123
|
datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
|
|
1122
1124
|
datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
|
|
1123
1125
|
datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
|
|
1124
|
-
acryl_datahub-1.2.0.
|
|
1125
|
-
acryl_datahub-1.2.0.
|
|
1126
|
-
acryl_datahub-1.2.0.
|
|
1127
|
-
acryl_datahub-1.2.0.
|
|
1128
|
-
acryl_datahub-1.2.0.
|
|
1126
|
+
acryl_datahub-1.2.0.11rc2.dist-info/METADATA,sha256=7a2zx4CgdIqVcUawhQNKvcgQmDg6sxLzleaTZ1mnWkA,184162
|
|
1127
|
+
acryl_datahub-1.2.0.11rc2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
1128
|
+
acryl_datahub-1.2.0.11rc2.dist-info/entry_points.txt,sha256=pzsBoTx-D-iTcmpX8oCGCyzlHP2112EygUMzZWz56M8,10105
|
|
1129
|
+
acryl_datahub-1.2.0.11rc2.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
|
|
1130
|
+
acryl_datahub-1.2.0.11rc2.dist-info/RECORD,,
|
datahub/_version.py
CHANGED
|
@@ -24,6 +24,9 @@ def pydantic_removed_field(
|
|
|
24
24
|
values.pop(field)
|
|
25
25
|
return values
|
|
26
26
|
|
|
27
|
+
# Mark the function as handling a removed field for doc generation
|
|
28
|
+
_validate_field_removal._doc_removed_field = field # type: ignore[attr-defined]
|
|
29
|
+
|
|
27
30
|
# Hack: Pydantic maintains unique list of validators by referring its __name__.
|
|
28
31
|
# https://github.com/pydantic/pydantic/blob/v1.10.9/pydantic/main.py#L264
|
|
29
32
|
# This hack ensures that multiple field removals do not overwrite each other.
|
|
@@ -307,6 +307,12 @@ class ViewFieldType(Enum):
|
|
|
307
307
|
UNKNOWN = "Unknown"
|
|
308
308
|
|
|
309
309
|
|
|
310
|
+
class ViewFieldDimensionGroupType(Enum):
|
|
311
|
+
# Ref: https://cloud.google.com/looker/docs/reference/param-field-dimension-group
|
|
312
|
+
TIME = "time"
|
|
313
|
+
DURATION = "duration"
|
|
314
|
+
|
|
315
|
+
|
|
310
316
|
class ViewFieldValue(Enum):
|
|
311
317
|
NOT_AVAILABLE = "NotAvailable"
|
|
312
318
|
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
|
+
from enum import Enum
|
|
5
6
|
from functools import lru_cache
|
|
6
7
|
from typing import Dict, List, MutableMapping, Optional, Sequence, Set, Union, cast
|
|
7
8
|
|
|
@@ -31,6 +32,14 @@ from datahub.configuration.common import ConfigurationError
|
|
|
31
32
|
logger = logging.getLogger(__name__)
|
|
32
33
|
|
|
33
34
|
|
|
35
|
+
class LookerQueryResponseFormat(Enum):
|
|
36
|
+
# result_format - Ref: https://cloud.google.com/looker/docs/reference/looker-api/latest/methods/Query/run_inline_query
|
|
37
|
+
JSON = "json"
|
|
38
|
+
SQL = (
|
|
39
|
+
"sql" # Note: This does not execute the query, it only generates the SQL query.
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
34
43
|
class TransportOptionsConfig(ConfigModel):
|
|
35
44
|
timeout: int
|
|
36
45
|
headers: MutableMapping[str, str]
|
|
@@ -69,6 +78,7 @@ class LookerAPIStats(BaseModel):
|
|
|
69
78
|
search_looks_calls: int = 0
|
|
70
79
|
search_dashboards_calls: int = 0
|
|
71
80
|
all_user_calls: int = 0
|
|
81
|
+
generate_sql_query_calls: int = 0
|
|
72
82
|
|
|
73
83
|
|
|
74
84
|
class LookerAPI:
|
|
@@ -170,17 +180,40 @@ class LookerAPI:
|
|
|
170
180
|
logger.debug(f"Executing query {write_query}")
|
|
171
181
|
self.client_stats.query_calls += 1
|
|
172
182
|
|
|
173
|
-
|
|
174
|
-
result_format=
|
|
183
|
+
response = self.client.run_inline_query(
|
|
184
|
+
result_format=LookerQueryResponseFormat.JSON.value,
|
|
175
185
|
body=write_query,
|
|
176
186
|
transport_options=self.transport_options,
|
|
177
187
|
)
|
|
178
188
|
|
|
189
|
+
data = json.loads(response)
|
|
190
|
+
|
|
179
191
|
logger.debug("=================Response=================")
|
|
180
|
-
data = json.loads(response_json)
|
|
181
192
|
logger.debug("Length of response: %d", len(data))
|
|
182
193
|
return data
|
|
183
194
|
|
|
195
|
+
def generate_sql_query(
|
|
196
|
+
self, write_query: WriteQuery, use_cache: bool = False
|
|
197
|
+
) -> str:
|
|
198
|
+
"""
|
|
199
|
+
Generates a SQL query string for a given WriteQuery.
|
|
200
|
+
|
|
201
|
+
Note: This does not execute the query, it only generates the SQL query.
|
|
202
|
+
"""
|
|
203
|
+
logger.debug(f"Generating SQL query for {write_query}")
|
|
204
|
+
self.client_stats.generate_sql_query_calls += 1
|
|
205
|
+
|
|
206
|
+
response = self.client.run_inline_query(
|
|
207
|
+
result_format=LookerQueryResponseFormat.SQL.value,
|
|
208
|
+
body=write_query,
|
|
209
|
+
transport_options=self.transport_options,
|
|
210
|
+
cache=use_cache,
|
|
211
|
+
)
|
|
212
|
+
|
|
213
|
+
logger.debug("=================Response=================")
|
|
214
|
+
logger.debug("Length of SQL response: %d", len(response))
|
|
215
|
+
return str(response)
|
|
216
|
+
|
|
184
217
|
def dashboard(self, dashboard_id: str, fields: Union[str, List[str]]) -> Dashboard:
|
|
185
218
|
self.client_stats.dashboard_calls += 1
|
|
186
219
|
return self.client.dashboard(
|
|
@@ -3,11 +3,11 @@ from typing import Dict, List, Optional
|
|
|
3
3
|
|
|
4
4
|
from datahub.ingestion.source.looker.looker_common import LookerViewId, ViewFieldValue
|
|
5
5
|
from datahub.ingestion.source.looker.looker_config import LookerConnectionDefinition
|
|
6
|
+
from datahub.ingestion.source.looker.looker_constant import NAME
|
|
6
7
|
from datahub.ingestion.source.looker.looker_dataclasses import LookerModel
|
|
7
8
|
from datahub.ingestion.source.looker.looker_file_loader import LookerViewFileLoader
|
|
8
9
|
from datahub.ingestion.source.looker.lookml_config import (
|
|
9
10
|
BASE_PROJECT_NAME,
|
|
10
|
-
NAME,
|
|
11
11
|
LookMLSourceReport,
|
|
12
12
|
)
|
|
13
13
|
|
|
@@ -12,12 +12,12 @@ from datahub.ingestion.source.looker.looker_constant import (
|
|
|
12
12
|
DIMENSION_GROUPS,
|
|
13
13
|
DIMENSIONS,
|
|
14
14
|
MEASURES,
|
|
15
|
+
NAME,
|
|
15
16
|
)
|
|
16
17
|
from datahub.ingestion.source.looker.looker_dataclasses import LookerViewFile
|
|
17
18
|
from datahub.ingestion.source.looker.looker_file_loader import LookerViewFileLoader
|
|
18
19
|
from datahub.ingestion.source.looker.lookml_config import (
|
|
19
20
|
DERIVED_VIEW_SUFFIX,
|
|
20
|
-
NAME,
|
|
21
21
|
LookMLSourceReport,
|
|
22
22
|
)
|
|
23
23
|
from datahub.ingestion.source.looker.lookml_refinement import LookerRefinementResolver
|
|
@@ -28,11 +28,10 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
|
|
|
28
28
|
StatefulIngestionConfigBase,
|
|
29
29
|
)
|
|
30
30
|
from datahub.utilities.lossy_collections import LossyList
|
|
31
|
+
from datahub.utilities.stats_collections import TopKDict, float_top_k_dict
|
|
31
32
|
|
|
32
33
|
logger = logging.getLogger(__name__)
|
|
33
34
|
|
|
34
|
-
NAME: str = "name"
|
|
35
|
-
|
|
36
35
|
BASE_PROJECT_NAME = "__BASE"
|
|
37
36
|
|
|
38
37
|
EXPLORE_FILE_EXTENSION = ".explore.lkml"
|
|
@@ -47,6 +46,9 @@ DERIVED_VIEW_PATTERN: str = r"\$\{([^}]*)\}"
|
|
|
47
46
|
@dataclass
|
|
48
47
|
class LookMLSourceReport(StaleEntityRemovalSourceReport):
|
|
49
48
|
git_clone_latency: Optional[timedelta] = None
|
|
49
|
+
looker_query_api_latency_seconds: TopKDict[str, float] = dataclass_field(
|
|
50
|
+
default_factory=float_top_k_dict
|
|
51
|
+
)
|
|
50
52
|
models_discovered: int = 0
|
|
51
53
|
models_dropped: LossyList[str] = dataclass_field(default_factory=LossyList)
|
|
52
54
|
views_discovered: int = 0
|
|
@@ -81,6 +83,11 @@ class LookMLSourceReport(StaleEntityRemovalSourceReport):
|
|
|
81
83
|
self.api_stats = self._looker_api.compute_stats()
|
|
82
84
|
return super().compute_stats()
|
|
83
85
|
|
|
86
|
+
def report_looker_query_api_latency(
|
|
87
|
+
self, view_urn: str, latency: timedelta
|
|
88
|
+
) -> None:
|
|
89
|
+
self.looker_query_api_latency_seconds[view_urn] = latency.total_seconds()
|
|
90
|
+
|
|
84
91
|
|
|
85
92
|
class LookMLSourceConfig(
|
|
86
93
|
LookerCommonConfig, StatefulIngestionConfigBase, EnvConfigMixin
|
|
@@ -122,6 +129,16 @@ class LookMLSourceConfig(
|
|
|
122
129
|
description="List of regex patterns for LookML views to include in the extraction.",
|
|
123
130
|
)
|
|
124
131
|
parse_table_names_from_sql: bool = Field(True, description="See note below.")
|
|
132
|
+
use_api_for_view_lineage: bool = Field(
|
|
133
|
+
False,
|
|
134
|
+
description="When enabled, uses Looker API to get SQL representation of views for lineage parsing instead of parsing LookML files directly. Requires 'api' configuration to be provided."
|
|
135
|
+
"Coverage of regex based lineage extraction has limitations, it only supportes ${TABLE}.column_name syntax, See (https://cloud.google.com/looker/docs/reference/param-field-sql#sql_for_dimensions) to"
|
|
136
|
+
"understand the other substitutions and cross-references allowed in LookML.",
|
|
137
|
+
)
|
|
138
|
+
use_api_cache_for_view_lineage: bool = Field(
|
|
139
|
+
False,
|
|
140
|
+
description="When enabled, uses Looker API server-side caching for query execution. Requires 'api' configuration to be provided.",
|
|
141
|
+
)
|
|
125
142
|
api: Optional[LookerAPIConfig] = None
|
|
126
143
|
project_name: Optional[str] = Field(
|
|
127
144
|
None,
|
|
@@ -239,6 +256,17 @@ class LookMLSourceConfig(
|
|
|
239
256
|
)
|
|
240
257
|
return values
|
|
241
258
|
|
|
259
|
+
@root_validator(skip_on_failure=True)
|
|
260
|
+
def check_api_provided_for_view_lineage(cls, values):
|
|
261
|
+
"""Validate that we must have an api credential to use Looker API for view's column lineage"""
|
|
262
|
+
if not values.get("api") and values.get("use_api_for_view_lineage"):
|
|
263
|
+
raise ValueError(
|
|
264
|
+
"API credential was not found. LookML source requires api credentials "
|
|
265
|
+
"for Looker to use Looker APIs for view's column lineage extraction."
|
|
266
|
+
"Set `use_api_for_view_lineage` to False to skip using Looker APIs."
|
|
267
|
+
)
|
|
268
|
+
return values
|
|
269
|
+
|
|
242
270
|
@validator("base_folder", always=True)
|
|
243
271
|
def check_base_folder_if_not_provided(
|
|
244
272
|
cls, v: Optional[pydantic.DirectoryPath], values: Dict[str, Any]
|
|
@@ -4,10 +4,10 @@ import logging
|
|
|
4
4
|
from typing import ClassVar, Dict, List, Set
|
|
5
5
|
|
|
6
6
|
from datahub.ingestion.source.looker.looker_config import LookerConnectionDefinition
|
|
7
|
+
from datahub.ingestion.source.looker.looker_constant import NAME
|
|
7
8
|
from datahub.ingestion.source.looker.looker_dataclasses import LookerModel
|
|
8
9
|
from datahub.ingestion.source.looker.looker_file_loader import LookerViewFileLoader
|
|
9
10
|
from datahub.ingestion.source.looker.lookml_config import (
|
|
10
|
-
NAME,
|
|
11
11
|
LookMLSourceConfig,
|
|
12
12
|
LookMLSourceReport,
|
|
13
13
|
)
|
|
@@ -142,6 +142,8 @@ class LookerView:
|
|
|
142
142
|
ctx: PipelineContext,
|
|
143
143
|
extract_col_level_lineage: bool = False,
|
|
144
144
|
populate_sql_logic_in_descriptions: bool = False,
|
|
145
|
+
looker_client: Optional[LookerAPI] = None,
|
|
146
|
+
view_to_explore_map: Optional[Dict[str, str]] = None,
|
|
145
147
|
) -> Optional["LookerView"]:
|
|
146
148
|
view_name = view_context.name()
|
|
147
149
|
|
|
@@ -160,6 +162,8 @@ class LookerView:
|
|
|
160
162
|
config=config,
|
|
161
163
|
ctx=ctx,
|
|
162
164
|
reporter=reporter,
|
|
165
|
+
looker_client=looker_client,
|
|
166
|
+
view_to_explore_map=view_to_explore_map,
|
|
163
167
|
)
|
|
164
168
|
|
|
165
169
|
field_type_vs_raw_fields = OrderedDict(
|
|
@@ -705,6 +709,11 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
705
709
|
# Value: Tuple(model file name, connection name)
|
|
706
710
|
view_connection_map: Dict[str, Tuple[str, str]] = {}
|
|
707
711
|
|
|
712
|
+
# Map of view name to explore name for API-based view lineage
|
|
713
|
+
# A view can be referenced by multiple explores, we only need one of the explores to use Looker Query API
|
|
714
|
+
# Key: view_name, Value: explore_name
|
|
715
|
+
view_to_explore_map: Dict[str, str] = {}
|
|
716
|
+
|
|
708
717
|
# The ** means "this directory and all subdirectories", and hence should
|
|
709
718
|
# include all the files we want.
|
|
710
719
|
model_files = sorted(
|
|
@@ -759,37 +768,37 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
759
768
|
)
|
|
760
769
|
)
|
|
761
770
|
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
continue
|
|
771
|
+
model_explores_map = {d["name"]: d for d in model.explores}
|
|
772
|
+
for explore_dict in model.explores:
|
|
773
|
+
try:
|
|
774
|
+
if LookerRefinementResolver.is_refinement(explore_dict["name"]):
|
|
775
|
+
continue
|
|
768
776
|
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
for view_name in explore.upstream_views:
|
|
777
|
+
explore_dict = looker_refinement_resolver.apply_explore_refinement(
|
|
778
|
+
explore_dict
|
|
779
|
+
)
|
|
780
|
+
explore: LookerExplore = LookerExplore.from_dict(
|
|
781
|
+
model_name,
|
|
782
|
+
explore_dict,
|
|
783
|
+
model.resolved_includes,
|
|
784
|
+
viewfile_loader,
|
|
785
|
+
self.reporter,
|
|
786
|
+
model_explores_map,
|
|
787
|
+
)
|
|
788
|
+
if explore.upstream_views:
|
|
789
|
+
for view_name in explore.upstream_views:
|
|
790
|
+
if self.source_config.emit_reachable_views_only:
|
|
784
791
|
explore_reachable_views.add(view_name.include)
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
792
|
+
# Build view to explore mapping for API-based view lineage
|
|
793
|
+
view_to_explore_map[view_name.include] = explore.name
|
|
794
|
+
except Exception as e:
|
|
795
|
+
self.reporter.report_warning(
|
|
796
|
+
title="Failed to process explores",
|
|
797
|
+
message="Failed to process explore dictionary.",
|
|
798
|
+
context=f"Explore Details: {explore_dict}",
|
|
799
|
+
exc=e,
|
|
800
|
+
)
|
|
801
|
+
logger.debug("Failed to process explore", exc_info=e)
|
|
793
802
|
|
|
794
803
|
processed_view_files = processed_view_map.setdefault(
|
|
795
804
|
model.connection, set()
|
|
@@ -878,6 +887,10 @@ class LookMLSource(StatefulIngestionSourceBase):
|
|
|
878
887
|
populate_sql_logic_in_descriptions=self.source_config.populate_sql_logic_for_missing_descriptions,
|
|
879
888
|
config=self.source_config,
|
|
880
889
|
ctx=self.ctx,
|
|
890
|
+
looker_client=self.looker_client,
|
|
891
|
+
view_to_explore_map=view_to_explore_map
|
|
892
|
+
if view_to_explore_map
|
|
893
|
+
else None,
|
|
881
894
|
)
|
|
882
895
|
except Exception as e:
|
|
883
896
|
self.reporter.report_warning(
|