acryl-datahub 1.2.0.10rc8__py3-none-any.whl → 1.2.0.11rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of acryl-datahub might be problematic. Click here for more details.

@@ -1,7 +1,7 @@
1
- acryl_datahub-1.2.0.10rc8.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
1
+ acryl_datahub-1.2.0.11rc2.dist-info/licenses/LICENSE,sha256=9xNHpsD0uYF5ONzXsKDCuHHB-xbiCrSbueWXqrTNsxk,11365
2
2
  datahub/__init__.py,sha256=aq_i5lVREmoLfYIqcx_pEQicO855YlhD19tWc1eZZNI,59
3
3
  datahub/__main__.py,sha256=pegIvQ9hzK7IhqVeUi1MeADSZ2QlP-D3K0OQdEg55RU,106
4
- datahub/_version.py,sha256=xDxMP7YG6O7bgAgZ4LVjgQh_jfbuV9rXJU1zFdpjQEA,324
4
+ datahub/_version.py,sha256=wA3SrSgI7aUwEw5r0jMdQQHnFvzLaVy7PuHAjLJoDJI,324
5
5
  datahub/entrypoints.py,sha256=9Qf-37rNnTzbGlx8S75OCDazIclFp6zWNcCEL1zCZto,9015
6
6
  datahub/errors.py,sha256=p5rFAdAGVCk4Lqolol1YvthceadUSwpaCxLXRcyCCFQ,676
7
7
  datahub/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -115,7 +115,7 @@ datahub/configuration/source_common.py,sha256=G4fkFw-dG0zVMSSsXOZn1ua_w4QJBpVMMq
115
115
  datahub/configuration/time_window_config.py,sha256=c4mbrgmTobt4t_j6unDeYvmGSlbRB2hAgAst6yq4nHA,5412
116
116
  datahub/configuration/toml.py,sha256=Ohc5sAWLPoAinPYL8njyheZ3ak81fC2Sp8IbBbESPGg,380
117
117
  datahub/configuration/validate_field_deprecation.py,sha256=szzs0130AXcK6aoYMmvEK5oWXPKcnf-EqRqvE6SuW-U,1377
118
- datahub/configuration/validate_field_removal.py,sha256=6f3nzdjsukVqPQ8weaLCXXKmN7mBF-LoWsVe7cxu-DA,1128
118
+ datahub/configuration/validate_field_removal.py,sha256=LWVmUD8aDS0WkuNCqBCuIG_wOSZ3G-ANv8WyvKOEdDo,1285
119
119
  datahub/configuration/validate_field_rename.py,sha256=o_MPYuVLiLRalhDa-p2pUxzx4_rBU-tQ-3wZFHXAOTg,2287
120
120
  datahub/configuration/validate_host_port.py,sha256=dgR9XPreNV_fABOmv2UHYF-OSN6AHD92Zi2nKsfdTiE,867
121
121
  datahub/configuration/validate_multiline_string.py,sha256=j5EABUvFOpV7c21eBWZSwy2ryIA2PI67lN5ebyrHv00,1370
@@ -383,26 +383,26 @@ datahub/ingestion/source/kafka_connect/sink_connectors.py,sha256=QTMY0FmOHkTxfIC
383
383
  datahub/ingestion/source/kafka_connect/source_connectors.py,sha256=A9q-u5IoV35swvoyMrzT75FVV9-SBeYGhLKDYRge-IQ,23845
384
384
  datahub/ingestion/source/looker/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
385
385
  datahub/ingestion/source/looker/lkml_patched.py,sha256=XShEU7Wbz0DubDhYMjKf9wjKZrBJa2XPg9MIjp8rPhk,733
386
- datahub/ingestion/source/looker/looker_common.py,sha256=0V0nGejI3Do4KvE_nG0KpBD6yTKbM21vECel24T1xX0,67739
386
+ datahub/ingestion/source/looker/looker_common.py,sha256=vXD6w1VAhxKlFp283WZkRcccGQt1uKij175o5oqO1HM,67912
387
387
  datahub/ingestion/source/looker/looker_config.py,sha256=4aYK9FQvH2BOH3IdtcL93QreFKqNC8_gm5OPRjcT2XA,13905
388
388
  datahub/ingestion/source/looker/looker_connection.py,sha256=yDmC6lDsHmL2e_Pw8ULylwOIHPWPp_6gT1iyLvD0fTw,2075
389
- datahub/ingestion/source/looker/looker_constant.py,sha256=GMKYtNXlpojPxa9azridKfcGLSJwKdUCTesp7U8dIrQ,402
389
+ datahub/ingestion/source/looker/looker_constant.py,sha256=6v8DeeLOgnWXaz5t6Ghl514PYoqe0DTXykX_MJ2kYf0,530
390
390
  datahub/ingestion/source/looker/looker_dataclasses.py,sha256=MrDeZ4Nd0wQnJbCoI1qePYlYeObnUw5dvpWcmhKuNgc,12346
391
391
  datahub/ingestion/source/looker/looker_file_loader.py,sha256=gb2Z97_w28MsybYe01JFMMqlvBbn2occyUEknf_mYMA,4882
392
- datahub/ingestion/source/looker/looker_lib_wrapper.py,sha256=6smUt_Ya7ZJMHWdGZl3TnhM7XHZVpYQ6gz2i5hHejZ4,11547
392
+ datahub/ingestion/source/looker/looker_lib_wrapper.py,sha256=M6dVvTLWi58f7WRY1kFkKwA3vRQO_QgtKYvjIjXlRBQ,12708
393
393
  datahub/ingestion/source/looker/looker_liquid_tag.py,sha256=27WnOuTghayaH-HL4lLoq0IcHvNm1UybMqMnoaxN8Cs,5383
394
394
  datahub/ingestion/source/looker/looker_query_model.py,sha256=N0jBbFruiCIIGT6sJn6tNeppeQ78KGTkOwTLirhxFNc,2144
395
395
  datahub/ingestion/source/looker/looker_source.py,sha256=7mRlIJq2DoM1h2y-heNdNoqok8sNl7Qmpwsx0dQsYP8,67273
396
396
  datahub/ingestion/source/looker/looker_template_language.py,sha256=5fZFPKFP3IYbJg3jLifjaji4wWg8wRy-1XDvc8Qucus,17949
397
397
  datahub/ingestion/source/looker/looker_usage.py,sha256=qFBX7OHtIcarYIqFe0jQMrDV8MMPV_nN4PZrZRUznTw,23029
398
- datahub/ingestion/source/looker/looker_view_id_cache.py,sha256=92gDy6NONhJYBp92z_IBzDVZvezmUIkaBCZY1bdk6mE,4392
399
- datahub/ingestion/source/looker/lookml_concept_context.py,sha256=eDaze9S7cgO5eFP7-0azUMEJyR3EfMjmfj5pMPjpm8c,18066
400
- datahub/ingestion/source/looker/lookml_config.py,sha256=_iMHpRDrlg2FvOkgqjfJIaW5NQM-C2hAgCeC2yZRNsc,11402
401
- datahub/ingestion/source/looker/lookml_refinement.py,sha256=MkVreI0BylaCFyDHihDHaCcXyDSP84eF9p1h5d-ZHnM,9504
402
- datahub/ingestion/source/looker/lookml_source.py,sha256=aNvEoW0njzqQSoj1KeHiDOBbEGYWWDEcjX-a2RYgqOY,42735
398
+ datahub/ingestion/source/looker/looker_view_id_cache.py,sha256=smgH0Z3OlugaDKU0xSlSA4cggZVhuX8p5TNbeqX24II,4447
399
+ datahub/ingestion/source/looker/lookml_concept_context.py,sha256=s3eSNKchikE9gg30rdW-kOmV2uki_wYUldhW4SgrISU,18066
400
+ datahub/ingestion/source/looker/lookml_config.py,sha256=Z6sqKMwuJWJzE4pIX9Pm1ERtUgx5v0g-PM-SME4Q06M,13138
401
+ datahub/ingestion/source/looker/lookml_refinement.py,sha256=_FV7-8zdZJhntkAaS3FuWXjibq7LQa91fQuolAdav4c,9559
402
+ datahub/ingestion/source/looker/lookml_source.py,sha256=aaFntJjISMV1QeQqzEpMxjvFod0-CX9ONEaAfFvAxOA,43475
403
403
  datahub/ingestion/source/looker/str_functions.py,sha256=zceEX2ka_4WaWwWgEdyknUSz7X3GrO951BkwSbF2afo,766
404
404
  datahub/ingestion/source/looker/urn_functions.py,sha256=4VvqEfGvIMq3rNHHps0-HlPurMPnpqdxNtDAOOHIZww,528
405
- datahub/ingestion/source/looker/view_upstream.py,sha256=4FCjZaU6p2G7npB2RJpP4Gv2yLjbvbsYWEbAg55IvjY,26110
405
+ datahub/ingestion/source/looker/view_upstream.py,sha256=Ajc9G1NKWTS7neuN5OOgg9maePrla8aKc1tokiNs7Ic,47489
406
406
  datahub/ingestion/source/metadata/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
407
407
  datahub/ingestion/source/metadata/business_glossary.py,sha256=sGOw_5IQxlLxqw7NvvQX4QeCaursuDg4aOyoAN973g4,19709
408
408
  datahub/ingestion/source/metadata/lineage.py,sha256=8jtlZqlgrHOfk1SpJ0lXWcyNfNxr0uYB_kvsqTyqFHo,9618
@@ -980,10 +980,12 @@ datahub/sdk/mlmodel.py,sha256=cO5R8BYVljmQ0w33RIOuZmj4nq8OJCDVAZGTQI6YFS8,12628
980
980
  datahub/sdk/mlmodelgroup.py,sha256=wlZZHny0UORpF0fRYuVkWLSQwIHX_fWl5lPb1NKR6dM,8194
981
981
  datahub/sdk/resolver_client.py,sha256=nKMAZJt2tRSGfKSzoREIh43PXqjM3umLiYkYHJjo1io,3243
982
982
  datahub/sdk/search_client.py,sha256=f2L_aOy-pPB9Mk7WdLSJ6Htp3OT4cEgDIrqnCweNtM8,3592
983
- datahub/sdk/search_filters.py,sha256=99D-dve11eh0Km60tWjkAZRFjWLQHREjjUpKnuA1pMs,18063
983
+ datahub/sdk/search_filters.py,sha256=EAIsO12rwTDJexQcxiiCsuk_TjKRk8Cx8G1ENqYYnUc,22056
984
984
  datahub/secret/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
985
- datahub/secret/datahub_secret_store.py,sha256=xyNAZY62d6KSz_kYF9wN7RDMLvNhu2ayOzcYvubOX1E,2519
985
+ datahub/secret/datahub_secret_store.py,sha256=qW7JI_k2xJdTZyluiJDaG5B3C68VnDpQk_M_L-Uz2vY,2583
986
986
  datahub/secret/datahub_secrets_client.py,sha256=nDmhziKdvseJHlaDVUcAwK8Fv8maeAaG-ktZtWG2b70,1316
987
+ datahub/secret/environment_secret_store.py,sha256=JbTAswfmYycjZc7xQyVOELb-3nc0Kswme_Ml7pjMpic,824
988
+ datahub/secret/file_secret_store.py,sha256=At_EdN_KXNpPh3_zJVa9FPBBxUbA2D43Sua36MXvpL8,1614
987
989
  datahub/secret/secret_common.py,sha256=g4anQtYPm7cI6kEJUZHjpBqeCyiUKIim2rJQByaeOoY,2864
988
990
  datahub/secret/secret_store.py,sha256=2VP_Vd336Cy7C-2kwp4rx8MAqtYgtwv8XyzzNTXE5x8,1124
989
991
  datahub/specific/__init__.py,sha256=r5RYM5mDnskLzin3vc87HV-9GSz3P6uQw8AlsN14LaI,88
@@ -1044,7 +1046,7 @@ datahub/utilities/dedup_list.py,sha256=dUSpe1AajfuwlHVJKNv-CzDXSCkaw0HgSMOsxqUkQ
1044
1046
  datahub/utilities/delayed_iter.py,sha256=XlsI0DCXkVVejFKOW_uMT0E8DTqqOHQN3Ooak4EcULE,645
1045
1047
  datahub/utilities/delta.py,sha256=hkpF8W7Lvg2gUJBQR3mmIzOxsRQ6i5cchRPFlAVoV10,1128
1046
1048
  datahub/utilities/docs_build.py,sha256=uFMK3z1d4BExpsrvguHunidbEDAzQ8hoOP7iQ0A_IVw,211
1047
- datahub/utilities/file_backed_collections.py,sha256=4_11YQUaIdjr7SUV4AHczArBi8OrrlzuX15ldR1GhKA,21673
1049
+ datahub/utilities/file_backed_collections.py,sha256=eOW7_8CzopvzFk1IATVuGGzQvS4yLQzHR_HU3h6T4kY,21675
1048
1050
  datahub/utilities/global_warning_util.py,sha256=adrEl3WhetQ-bymrPINjd976ZFndhbvk3QosUYGsos8,261
1049
1051
  datahub/utilities/groupby.py,sha256=pe6rP4ZCttYB98yjbs0Aey8C32aLb7rq-NJ_BFky0H4,524
1050
1052
  datahub/utilities/hive_schema_to_avro.py,sha256=YCdq3jNUTij8ehWgX9v6CiOrf5aTCXr4DERcp_-wBbo,11608
@@ -1121,8 +1123,8 @@ datahub_provider/operators/datahub_assertion_operator.py,sha256=uvTQ-jk2F0sbqqxp
1121
1123
  datahub_provider/operators/datahub_assertion_sensor.py,sha256=lCBj_3x1cf5GMNpHdfkpHuyHfVxsm6ff5x2Z5iizcAo,140
1122
1124
  datahub_provider/operators/datahub_operation_operator.py,sha256=aevDp2FzX7FxGlXrR0khoHNbxbhKR2qPEX5e8O2Jyzw,174
1123
1125
  datahub_provider/operators/datahub_operation_sensor.py,sha256=8fcdVBCEPgqy1etTXgLoiHoJrRt_nzFZQMdSzHqSG7M,168
1124
- acryl_datahub-1.2.0.10rc8.dist-info/METADATA,sha256=FnAZxap4iq7bDMvUTCEiquA1YbX_NclQz6-LeoenjG0,184162
1125
- acryl_datahub-1.2.0.10rc8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1126
- acryl_datahub-1.2.0.10rc8.dist-info/entry_points.txt,sha256=pzsBoTx-D-iTcmpX8oCGCyzlHP2112EygUMzZWz56M8,10105
1127
- acryl_datahub-1.2.0.10rc8.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1128
- acryl_datahub-1.2.0.10rc8.dist-info/RECORD,,
1126
+ acryl_datahub-1.2.0.11rc2.dist-info/METADATA,sha256=7a2zx4CgdIqVcUawhQNKvcgQmDg6sxLzleaTZ1mnWkA,184162
1127
+ acryl_datahub-1.2.0.11rc2.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
1128
+ acryl_datahub-1.2.0.11rc2.dist-info/entry_points.txt,sha256=pzsBoTx-D-iTcmpX8oCGCyzlHP2112EygUMzZWz56M8,10105
1129
+ acryl_datahub-1.2.0.11rc2.dist-info/top_level.txt,sha256=iLjSrLK5ox1YVYcglRUkcvfZPvKlobBWx7CTUXx8_GI,25
1130
+ acryl_datahub-1.2.0.11rc2.dist-info/RECORD,,
datahub/_version.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Published at https://pypi.org/project/acryl-datahub/.
2
2
  __package_name__ = "acryl-datahub"
3
- __version__ = "1.2.0.10rc8"
3
+ __version__ = "1.2.0.11rc2"
4
4
 
5
5
 
6
6
  def is_dev_mode() -> bool:
@@ -24,6 +24,9 @@ def pydantic_removed_field(
24
24
  values.pop(field)
25
25
  return values
26
26
 
27
+ # Mark the function as handling a removed field for doc generation
28
+ _validate_field_removal._doc_removed_field = field # type: ignore[attr-defined]
29
+
27
30
  # Hack: Pydantic maintains unique list of validators by referring its __name__.
28
31
  # https://github.com/pydantic/pydantic/blob/v1.10.9/pydantic/main.py#L264
29
32
  # This hack ensures that multiple field removals do not overwrite each other.
@@ -307,6 +307,12 @@ class ViewFieldType(Enum):
307
307
  UNKNOWN = "Unknown"
308
308
 
309
309
 
310
+ class ViewFieldDimensionGroupType(Enum):
311
+ # Ref: https://cloud.google.com/looker/docs/reference/param-field-dimension-group
312
+ TIME = "time"
313
+ DURATION = "duration"
314
+
315
+
310
316
  class ViewFieldValue(Enum):
311
317
  NOT_AVAILABLE = "NotAvailable"
312
318
 
@@ -11,3 +11,7 @@ prod = "prod"
11
11
  dev = "dev"
12
12
  NAME = "name"
13
13
  DERIVED_DOT_SQL = "derived.sql"
14
+
15
+ VIEW_FIELD_TYPE_ATTRIBUTE = "type"
16
+ VIEW_FIELD_INTERVALS_ATTRIBUTE = "intervals"
17
+ VIEW_FIELD_TIMEFRAMES_ATTRIBUTE = "timeframes"
@@ -2,6 +2,7 @@
2
2
  import json
3
3
  import logging
4
4
  import os
5
+ from enum import Enum
5
6
  from functools import lru_cache
6
7
  from typing import Dict, List, MutableMapping, Optional, Sequence, Set, Union, cast
7
8
 
@@ -31,6 +32,14 @@ from datahub.configuration.common import ConfigurationError
31
32
  logger = logging.getLogger(__name__)
32
33
 
33
34
 
35
+ class LookerQueryResponseFormat(Enum):
36
+ # result_format - Ref: https://cloud.google.com/looker/docs/reference/looker-api/latest/methods/Query/run_inline_query
37
+ JSON = "json"
38
+ SQL = (
39
+ "sql" # Note: This does not execute the query, it only generates the SQL query.
40
+ )
41
+
42
+
34
43
  class TransportOptionsConfig(ConfigModel):
35
44
  timeout: int
36
45
  headers: MutableMapping[str, str]
@@ -69,6 +78,7 @@ class LookerAPIStats(BaseModel):
69
78
  search_looks_calls: int = 0
70
79
  search_dashboards_calls: int = 0
71
80
  all_user_calls: int = 0
81
+ generate_sql_query_calls: int = 0
72
82
 
73
83
 
74
84
  class LookerAPI:
@@ -170,17 +180,40 @@ class LookerAPI:
170
180
  logger.debug(f"Executing query {write_query}")
171
181
  self.client_stats.query_calls += 1
172
182
 
173
- response_json = self.client.run_inline_query(
174
- result_format="json",
183
+ response = self.client.run_inline_query(
184
+ result_format=LookerQueryResponseFormat.JSON.value,
175
185
  body=write_query,
176
186
  transport_options=self.transport_options,
177
187
  )
178
188
 
189
+ data = json.loads(response)
190
+
179
191
  logger.debug("=================Response=================")
180
- data = json.loads(response_json)
181
192
  logger.debug("Length of response: %d", len(data))
182
193
  return data
183
194
 
195
+ def generate_sql_query(
196
+ self, write_query: WriteQuery, use_cache: bool = False
197
+ ) -> str:
198
+ """
199
+ Generates a SQL query string for a given WriteQuery.
200
+
201
+ Note: This does not execute the query, it only generates the SQL query.
202
+ """
203
+ logger.debug(f"Generating SQL query for {write_query}")
204
+ self.client_stats.generate_sql_query_calls += 1
205
+
206
+ response = self.client.run_inline_query(
207
+ result_format=LookerQueryResponseFormat.SQL.value,
208
+ body=write_query,
209
+ transport_options=self.transport_options,
210
+ cache=use_cache,
211
+ )
212
+
213
+ logger.debug("=================Response=================")
214
+ logger.debug("Length of SQL response: %d", len(response))
215
+ return str(response)
216
+
184
217
  def dashboard(self, dashboard_id: str, fields: Union[str, List[str]]) -> Dashboard:
185
218
  self.client_stats.dashboard_calls += 1
186
219
  return self.client.dashboard(
@@ -3,11 +3,11 @@ from typing import Dict, List, Optional
3
3
 
4
4
  from datahub.ingestion.source.looker.looker_common import LookerViewId, ViewFieldValue
5
5
  from datahub.ingestion.source.looker.looker_config import LookerConnectionDefinition
6
+ from datahub.ingestion.source.looker.looker_constant import NAME
6
7
  from datahub.ingestion.source.looker.looker_dataclasses import LookerModel
7
8
  from datahub.ingestion.source.looker.looker_file_loader import LookerViewFileLoader
8
9
  from datahub.ingestion.source.looker.lookml_config import (
9
10
  BASE_PROJECT_NAME,
10
- NAME,
11
11
  LookMLSourceReport,
12
12
  )
13
13
 
@@ -12,12 +12,12 @@ from datahub.ingestion.source.looker.looker_constant import (
12
12
  DIMENSION_GROUPS,
13
13
  DIMENSIONS,
14
14
  MEASURES,
15
+ NAME,
15
16
  )
16
17
  from datahub.ingestion.source.looker.looker_dataclasses import LookerViewFile
17
18
  from datahub.ingestion.source.looker.looker_file_loader import LookerViewFileLoader
18
19
  from datahub.ingestion.source.looker.lookml_config import (
19
20
  DERIVED_VIEW_SUFFIX,
20
- NAME,
21
21
  LookMLSourceReport,
22
22
  )
23
23
  from datahub.ingestion.source.looker.lookml_refinement import LookerRefinementResolver
@@ -28,11 +28,10 @@ from datahub.ingestion.source.state.stateful_ingestion_base import (
28
28
  StatefulIngestionConfigBase,
29
29
  )
30
30
  from datahub.utilities.lossy_collections import LossyList
31
+ from datahub.utilities.stats_collections import TopKDict, float_top_k_dict
31
32
 
32
33
  logger = logging.getLogger(__name__)
33
34
 
34
- NAME: str = "name"
35
-
36
35
  BASE_PROJECT_NAME = "__BASE"
37
36
 
38
37
  EXPLORE_FILE_EXTENSION = ".explore.lkml"
@@ -47,6 +46,9 @@ DERIVED_VIEW_PATTERN: str = r"\$\{([^}]*)\}"
47
46
  @dataclass
48
47
  class LookMLSourceReport(StaleEntityRemovalSourceReport):
49
48
  git_clone_latency: Optional[timedelta] = None
49
+ looker_query_api_latency_seconds: TopKDict[str, float] = dataclass_field(
50
+ default_factory=float_top_k_dict
51
+ )
50
52
  models_discovered: int = 0
51
53
  models_dropped: LossyList[str] = dataclass_field(default_factory=LossyList)
52
54
  views_discovered: int = 0
@@ -81,6 +83,11 @@ class LookMLSourceReport(StaleEntityRemovalSourceReport):
81
83
  self.api_stats = self._looker_api.compute_stats()
82
84
  return super().compute_stats()
83
85
 
86
+ def report_looker_query_api_latency(
87
+ self, view_urn: str, latency: timedelta
88
+ ) -> None:
89
+ self.looker_query_api_latency_seconds[view_urn] = latency.total_seconds()
90
+
84
91
 
85
92
  class LookMLSourceConfig(
86
93
  LookerCommonConfig, StatefulIngestionConfigBase, EnvConfigMixin
@@ -122,6 +129,16 @@ class LookMLSourceConfig(
122
129
  description="List of regex patterns for LookML views to include in the extraction.",
123
130
  )
124
131
  parse_table_names_from_sql: bool = Field(True, description="See note below.")
132
+ use_api_for_view_lineage: bool = Field(
133
+ False,
134
+ description="When enabled, uses Looker API to get SQL representation of views for lineage parsing instead of parsing LookML files directly. Requires 'api' configuration to be provided."
135
+ "Coverage of regex based lineage extraction has limitations, it only supportes ${TABLE}.column_name syntax, See (https://cloud.google.com/looker/docs/reference/param-field-sql#sql_for_dimensions) to"
136
+ "understand the other substitutions and cross-references allowed in LookML.",
137
+ )
138
+ use_api_cache_for_view_lineage: bool = Field(
139
+ False,
140
+ description="When enabled, uses Looker API server-side caching for query execution. Requires 'api' configuration to be provided.",
141
+ )
125
142
  api: Optional[LookerAPIConfig] = None
126
143
  project_name: Optional[str] = Field(
127
144
  None,
@@ -239,6 +256,17 @@ class LookMLSourceConfig(
239
256
  )
240
257
  return values
241
258
 
259
+ @root_validator(skip_on_failure=True)
260
+ def check_api_provided_for_view_lineage(cls, values):
261
+ """Validate that we must have an api credential to use Looker API for view's column lineage"""
262
+ if not values.get("api") and values.get("use_api_for_view_lineage"):
263
+ raise ValueError(
264
+ "API credential was not found. LookML source requires api credentials "
265
+ "for Looker to use Looker APIs for view's column lineage extraction."
266
+ "Set `use_api_for_view_lineage` to False to skip using Looker APIs."
267
+ )
268
+ return values
269
+
242
270
  @validator("base_folder", always=True)
243
271
  def check_base_folder_if_not_provided(
244
272
  cls, v: Optional[pydantic.DirectoryPath], values: Dict[str, Any]
@@ -4,10 +4,10 @@ import logging
4
4
  from typing import ClassVar, Dict, List, Set
5
5
 
6
6
  from datahub.ingestion.source.looker.looker_config import LookerConnectionDefinition
7
+ from datahub.ingestion.source.looker.looker_constant import NAME
7
8
  from datahub.ingestion.source.looker.looker_dataclasses import LookerModel
8
9
  from datahub.ingestion.source.looker.looker_file_loader import LookerViewFileLoader
9
10
  from datahub.ingestion.source.looker.lookml_config import (
10
- NAME,
11
11
  LookMLSourceConfig,
12
12
  LookMLSourceReport,
13
13
  )
@@ -142,6 +142,8 @@ class LookerView:
142
142
  ctx: PipelineContext,
143
143
  extract_col_level_lineage: bool = False,
144
144
  populate_sql_logic_in_descriptions: bool = False,
145
+ looker_client: Optional[LookerAPI] = None,
146
+ view_to_explore_map: Optional[Dict[str, str]] = None,
145
147
  ) -> Optional["LookerView"]:
146
148
  view_name = view_context.name()
147
149
 
@@ -160,6 +162,8 @@ class LookerView:
160
162
  config=config,
161
163
  ctx=ctx,
162
164
  reporter=reporter,
165
+ looker_client=looker_client,
166
+ view_to_explore_map=view_to_explore_map,
163
167
  )
164
168
 
165
169
  field_type_vs_raw_fields = OrderedDict(
@@ -705,6 +709,11 @@ class LookMLSource(StatefulIngestionSourceBase):
705
709
  # Value: Tuple(model file name, connection name)
706
710
  view_connection_map: Dict[str, Tuple[str, str]] = {}
707
711
 
712
+ # Map of view name to explore name for API-based view lineage
713
+ # A view can be referenced by multiple explores, we only need one of the explores to use Looker Query API
714
+ # Key: view_name, Value: explore_name
715
+ view_to_explore_map: Dict[str, str] = {}
716
+
708
717
  # The ** means "this directory and all subdirectories", and hence should
709
718
  # include all the files we want.
710
719
  model_files = sorted(
@@ -759,37 +768,37 @@ class LookMLSource(StatefulIngestionSourceBase):
759
768
  )
760
769
  )
761
770
 
762
- if self.source_config.emit_reachable_views_only:
763
- model_explores_map = {d["name"]: d for d in model.explores}
764
- for explore_dict in model.explores:
765
- try:
766
- if LookerRefinementResolver.is_refinement(explore_dict["name"]):
767
- continue
771
+ model_explores_map = {d["name"]: d for d in model.explores}
772
+ for explore_dict in model.explores:
773
+ try:
774
+ if LookerRefinementResolver.is_refinement(explore_dict["name"]):
775
+ continue
768
776
 
769
- explore_dict = (
770
- looker_refinement_resolver.apply_explore_refinement(
771
- explore_dict
772
- )
773
- )
774
- explore: LookerExplore = LookerExplore.from_dict(
775
- model_name,
776
- explore_dict,
777
- model.resolved_includes,
778
- viewfile_loader,
779
- self.reporter,
780
- model_explores_map,
781
- )
782
- if explore.upstream_views:
783
- for view_name in explore.upstream_views:
777
+ explore_dict = looker_refinement_resolver.apply_explore_refinement(
778
+ explore_dict
779
+ )
780
+ explore: LookerExplore = LookerExplore.from_dict(
781
+ model_name,
782
+ explore_dict,
783
+ model.resolved_includes,
784
+ viewfile_loader,
785
+ self.reporter,
786
+ model_explores_map,
787
+ )
788
+ if explore.upstream_views:
789
+ for view_name in explore.upstream_views:
790
+ if self.source_config.emit_reachable_views_only:
784
791
  explore_reachable_views.add(view_name.include)
785
- except Exception as e:
786
- self.reporter.report_warning(
787
- title="Failed to process explores",
788
- message="Failed to process explore dictionary.",
789
- context=f"Explore Details: {explore_dict}",
790
- exc=e,
791
- )
792
- logger.debug("Failed to process explore", exc_info=e)
792
+ # Build view to explore mapping for API-based view lineage
793
+ view_to_explore_map[view_name.include] = explore.name
794
+ except Exception as e:
795
+ self.reporter.report_warning(
796
+ title="Failed to process explores",
797
+ message="Failed to process explore dictionary.",
798
+ context=f"Explore Details: {explore_dict}",
799
+ exc=e,
800
+ )
801
+ logger.debug("Failed to process explore", exc_info=e)
793
802
 
794
803
  processed_view_files = processed_view_map.setdefault(
795
804
  model.connection, set()
@@ -878,6 +887,10 @@ class LookMLSource(StatefulIngestionSourceBase):
878
887
  populate_sql_logic_in_descriptions=self.source_config.populate_sql_logic_for_missing_descriptions,
879
888
  config=self.source_config,
880
889
  ctx=self.ctx,
890
+ looker_client=self.looker_client,
891
+ view_to_explore_map=view_to_explore_map
892
+ if view_to_explore_map
893
+ else None,
881
894
  )
882
895
  except Exception as e:
883
896
  self.reporter.report_warning(