castor-extractor 0.22.0__py3-none-any.whl → 0.22.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of castor-extractor might be problematic. Click here for more details.

Files changed (38) hide show
  1. CHANGELOG.md +20 -0
  2. castor_extractor/utils/__init__.py +1 -0
  3. castor_extractor/utils/time.py +4 -0
  4. castor_extractor/utils/time_test.py +8 -1
  5. castor_extractor/visualization/looker_studio/__init__.py +6 -0
  6. castor_extractor/visualization/looker_studio/assets.py +6 -0
  7. castor_extractor/visualization/looker_studio/client/__init__.py +3 -0
  8. castor_extractor/visualization/looker_studio/client/admin_sdk_client.py +90 -0
  9. castor_extractor/visualization/looker_studio/client/client.py +37 -0
  10. castor_extractor/visualization/looker_studio/client/credentials.py +20 -0
  11. castor_extractor/visualization/looker_studio/client/endpoints.py +18 -0
  12. castor_extractor/visualization/looker_studio/client/enums.py +8 -0
  13. castor_extractor/visualization/looker_studio/client/looker_studio_api_client.py +102 -0
  14. castor_extractor/visualization/looker_studio/client/pagination.py +31 -0
  15. castor_extractor/visualization/looker_studio/client/scopes.py +6 -0
  16. castor_extractor/visualization/sigma/client/client.py +64 -10
  17. castor_extractor/visualization/thoughtspot/assets.py +3 -1
  18. castor_extractor/visualization/thoughtspot/client/client.py +67 -14
  19. castor_extractor/visualization/thoughtspot/client/utils.py +10 -4
  20. castor_extractor/visualization/thoughtspot/client/utils_test.py +22 -4
  21. castor_extractor/warehouse/databricks/api_client.py +2 -60
  22. castor_extractor/warehouse/databricks/client.py +4 -47
  23. castor_extractor/warehouse/databricks/client_test.py +1 -35
  24. castor_extractor/warehouse/databricks/credentials.py +4 -6
  25. castor_extractor/warehouse/databricks/enums.py +15 -0
  26. castor_extractor/warehouse/databricks/extract.py +13 -11
  27. castor_extractor/warehouse/databricks/lineage.py +47 -119
  28. castor_extractor/warehouse/databricks/lineage_test.py +86 -31
  29. castor_extractor/warehouse/databricks/sql_client.py +23 -8
  30. castor_extractor/warehouse/databricks/types.py +0 -7
  31. castor_extractor/warehouse/salesforce/format.py +12 -5
  32. castor_extractor/warehouse/salesforce/format_test.py +22 -6
  33. {castor_extractor-0.22.0.dist-info → castor_extractor-0.22.5.dist-info}/METADATA +23 -1
  34. {castor_extractor-0.22.0.dist-info → castor_extractor-0.22.5.dist-info}/RECORD +37 -26
  35. castor_extractor/warehouse/databricks/test_constants.py +0 -79
  36. {castor_extractor-0.22.0.dist-info → castor_extractor-0.22.5.dist-info}/LICENCE +0 -0
  37. {castor_extractor-0.22.0.dist-info → castor_extractor-0.22.5.dist-info}/WHEEL +0 -0
  38. {castor_extractor-0.22.0.dist-info → castor_extractor-0.22.5.dist-info}/entry_points.txt +0 -0
@@ -1,34 +1,89 @@
1
- from .lineage import LineageLinks
2
- from .test_constants import (
3
- CLOSER_DATE,
4
- OLDER_DATE,
5
- )
1
+ from .enums import LineageEntity
2
+ from .lineage import LineageProcessor, valid_lineage
3
+
4
+ _OLDER_DATE = "2025-01-01 00:00:01.0"
5
+ _CLOSER_DATE = "2025-01-01 02:02:02.0"
6
+
7
+ _TABLE_LINEAGES = [
8
+ {
9
+ "source_table_full_name": "a.b.source",
10
+ "target_table_full_name": "a.b.target",
11
+ "event_time": _CLOSER_DATE,
12
+ "other": "more recent stuff",
13
+ },
14
+ {
15
+ "source_table_full_name": "a.b.source",
16
+ "target_table_full_name": "a.b.target",
17
+ "event_time": _OLDER_DATE,
18
+ "other": "stuff that's too old",
19
+ },
20
+ {
21
+ "source_table_full_name": "no target",
22
+ "target_table_full_name": None,
23
+ "event_time": _CLOSER_DATE,
24
+ },
25
+ {
26
+ "source_table_full_name": None,
27
+ "target_table_full_name": "no source",
28
+ "event_time": _CLOSER_DATE,
29
+ },
30
+ ]
31
+
32
+
33
+ _COLUMN_LINEAGES = [
34
+ {
35
+ "source_table_full_name": "a.b.source",
36
+ "source_column_name": "src_col",
37
+ "target_table_full_name": "a.b.target",
38
+ "target_column_name": "trgt_col",
39
+ "event_time": _OLDER_DATE,
40
+ "other": "old stuff",
41
+ },
42
+ {
43
+ "source_table_full_name": "a.b.source",
44
+ "source_column_name": "src_col",
45
+ "target_table_full_name": "a.b.target",
46
+ "target_column_name": "trgt_col",
47
+ "event_time": _CLOSER_DATE,
48
+ "other": "newer stuff",
49
+ },
50
+ {
51
+ "source_table_full_name": "a.b.toto",
52
+ "source_column_name": "toto_col",
53
+ "target_table_full_name": "a.b.tata",
54
+ "target_column_name": "tata_col",
55
+ "event_time": _OLDER_DATE,
56
+ },
57
+ {
58
+ "source_table_full_name": "a.b.source",
59
+ "source_column_name": "a.b.source",
60
+ "target_table_full_name": None,
61
+ "target_column_name": None,
62
+ "event_time": _CLOSER_DATE,
63
+ },
64
+ ]
65
+
66
+
67
+ def test_valid_lineage():
68
+ table_links = valid_lineage(_TABLE_LINEAGES, LineageEntity.TABLE)
69
+
70
+ assert len(table_links) == 1
71
+ assert table_links[0]["source_table_full_name"] == "a.b.source"
72
+ assert table_links[0]["target_table_full_name"] == "a.b.target"
73
+ assert table_links[0]["event_time"] == _CLOSER_DATE
74
+ assert table_links[0]["other"] == "more recent stuff"
6
75
 
7
76
 
8
77
  def test_LineageLinks_add():
9
- links = LineageLinks()
10
- timestamped_link = ("parent", "child", None)
11
- expected_key = ("parent", "child")
12
-
13
- links.add(timestamped_link)
14
-
15
- assert expected_key in links.lineage
16
- assert links.lineage[expected_key] is None
17
-
18
- # we replace None by an actual timestamp
19
- timestamped_link = ("parent", "child", OLDER_DATE)
20
- links.add(timestamped_link)
21
- assert expected_key in links.lineage
22
- assert links.lineage[expected_key] == OLDER_DATE
23
-
24
- # we update with the more recent timestamp
25
- timestamped_link = ("parent", "child", CLOSER_DATE)
26
- links.add(timestamped_link)
27
- assert expected_key in links.lineage
28
- assert links.lineage[expected_key] == CLOSER_DATE
29
-
30
- # we keep the more recent timestamp
31
- timestamped_link = ("parent", "child", OLDER_DATE)
32
- links.add(timestamped_link)
33
- assert expected_key in links.lineage
34
- assert links.lineage[expected_key] == CLOSER_DATE
78
+ deduplicated_lineage = LineageProcessor(LineageEntity.COLUMN)
79
+ for link in _COLUMN_LINEAGES:
80
+ deduplicated_lineage.add(link)
81
+
82
+ lineage = deduplicated_lineage.lineage
83
+ assert len(lineage) == 2
84
+ assert ("a.b.source.src_col", "a.b.target.trgt_col") in lineage
85
+ assert ("a.b.toto.toto_col", "a.b.tata.tata_col") in lineage
86
+ assert (
87
+ lineage[("a.b.source.src_col", "a.b.target.trgt_col")]["other"]
88
+ == "newer stuff"
89
+ )
@@ -1,24 +1,24 @@
1
1
  import logging
2
2
  from collections import defaultdict
3
- from enum import Enum
3
+ from datetime import date
4
4
  from typing import Optional
5
5
 
6
6
  from databricks import sql # type: ignore
7
7
 
8
8
  from .credentials import DatabricksCredentials
9
+ from .enums import LineageEntity, TagEntity
9
10
  from .format import TagMapping
11
+ from .lineage import valid_lineage
10
12
  from .utils import build_path, tag_label
11
13
 
12
14
  logger = logging.getLogger(__name__)
13
15
 
14
16
  _INFORMATION_SCHEMA_SQL = "SELECT * FROM system.information_schema"
15
17
 
16
-
17
- class TagEntity(Enum):
18
- """Entities that can be tagged in Databricks"""
19
-
20
- COLUMN = "COLUMN"
21
- TABLE = "TABLE"
18
+ _LINEAGE_SQL_TPL = """
19
+ SELECT * FROM system.access.{table_name}
20
+ WHERE event_date = :day
21
+ """
22
22
 
23
23
 
24
24
  class DatabricksSQLClient:
@@ -71,7 +71,6 @@ class DatabricksSQLClient:
71
71
  https://docs.databricks.com/en/sql/language-manual/information-schema/column_tags.html
72
72
  """
73
73
  if not self._needs_extraction(entity):
74
- # extracting tags require additional credentials (http_path)
75
74
  return dict()
76
75
 
77
76
  table = f"{entity.value.lower()}_tags"
@@ -88,3 +87,19 @@ class DatabricksSQLClient:
88
87
  mapping[path].append(label)
89
88
 
90
89
  return mapping
90
+
91
+ def get_lineage(
92
+ self, lineage_entity: LineageEntity, day: date
93
+ ) -> list[dict]:
94
+ """
95
+ Fetch {TABLE|COLUMN} lineage of the given day, via system tables
96
+ https://docs.databricks.com/en/admin/system-tables/lineage.html
97
+ """
98
+ table_name = f"{lineage_entity.value.lower()}_lineage"
99
+ query = _LINEAGE_SQL_TPL.format(table_name=table_name)
100
+ params = {"day": day}
101
+ result = self.execute_sql(query, params)
102
+ data = []
103
+ for row in result:
104
+ data.append(row.asDict())
105
+ return valid_lineage(data, lineage_entity)
@@ -1,8 +1 @@
1
- from typing import Optional
2
-
3
- Link = tuple[str, str]
4
1
  TablesColumns = tuple[list[dict], list[dict]]
5
- Ostr = Optional[str]
6
- TimestampedLink = tuple[str, str, Ostr]
7
-
8
- OTimestampedLink = Optional[TimestampedLink]
@@ -4,7 +4,7 @@ from typing import Any
4
4
  from ...utils import group_by
5
5
  from .constants import SCHEMA_NAME
6
6
 
7
- _HAS_DUPLICATE_KEY = "#has_duplicate"
7
+ _HAS_DUPLICATE_KEY = "#has_duplicate_label"
8
8
 
9
9
 
10
10
  def _clean(raw: str) -> str:
@@ -70,9 +70,15 @@ def _to_table_payload(sobject: dict) -> dict:
70
70
  }
71
71
 
72
72
 
73
- def _detect_duplicates(sobjects: list[dict]) -> list[dict]:
73
+ def _remove_duplicates(sobjects: list[dict]) -> list[dict]:
74
+ """only keep one object per QualifiedApiName"""
75
+ by_name = group_by("QualifiedApiName", sobjects)
76
+ return [objects[0] for _, objects in by_name.items()]
77
+
78
+
79
+ def _detect_duplicate_labels(sobjects: list[dict]) -> list[dict]:
74
80
  """
75
- enrich the given data with "has_duplicate" flag:
81
+ enrich the given data with "has_duplicate_label" flag:
76
82
  - True when another asset has the same Label in the list
77
83
  - False otherwise
78
84
  """
@@ -94,7 +100,8 @@ class SalesforceFormatter:
94
100
  """
95
101
  formats the raw list of sobjects to tables
96
102
  """
97
- sobjects = _detect_duplicates(sobjects)
103
+ sobjects = _remove_duplicates(sobjects)
104
+ sobjects = _detect_duplicate_labels(sobjects)
98
105
  for sobject in sobjects:
99
106
  yield _to_table_payload(sobject)
100
107
 
@@ -102,6 +109,6 @@ class SalesforceFormatter:
102
109
  def columns(sobject_fields: dict[str, list[dict]]) -> Iterator[dict]:
103
110
  """formats the raw list of sobject fields to columns"""
104
111
  for table_name, fields in sobject_fields.items():
105
- fields = _detect_duplicates(fields)
112
+ fields = _detect_duplicate_labels(fields)
106
113
  for index, field in enumerate(fields):
107
114
  yield _to_column_payload(field, index, table_name)
@@ -1,9 +1,10 @@
1
1
  from .format import (
2
2
  _HAS_DUPLICATE_KEY,
3
3
  SalesforceFormatter,
4
- _detect_duplicates,
4
+ _detect_duplicate_labels,
5
5
  _field_description,
6
6
  _name,
7
+ _remove_duplicates,
7
8
  )
8
9
 
9
10
 
@@ -11,9 +12,10 @@ def _tables_sobjects() -> tuple[dict[str, str], ...]:
11
12
  """Returns 4 sobjects with 2 sharing the same label"""
12
13
  a = {"Label": "a", "QualifiedApiName": "a_one"}
13
14
  b = {"Label": "b", "QualifiedApiName": "b"}
14
- c = {"Label": "c", "QualifiedApiName": "c"}
15
+ c = {"Label": "c", "QualifiedApiName": "c_unique_so_doesnt_matter"}
15
16
  a_prime = {"Label": "a", "QualifiedApiName": "a_two"}
16
- return a, b, c, a_prime
17
+ b_exact_duplicate = {"Label": "b", "QualifiedApiName": "b"}
18
+ return a, b, c, a_prime, b_exact_duplicate
17
19
 
18
20
 
19
21
  def _columns_sobjects() -> dict[str, list[dict]]:
@@ -79,14 +81,14 @@ def test__name():
79
81
  assert _name(empty_label_sobject) == "empty_label"
80
82
 
81
83
 
82
- def test__detect_duplicates():
84
+ def test__detect_duplicate_labels():
83
85
  objects = [
84
86
  {"Label": "Foo"},
85
87
  {"Label": "Bar"},
86
88
  {"Label": "Foo"},
87
89
  ]
88
90
 
89
- objects = _detect_duplicates(objects)
91
+ objects = _detect_duplicate_labels(objects)
90
92
  assert objects == [
91
93
  {"Label": "Foo", _HAS_DUPLICATE_KEY: True},
92
94
  {"Label": "Bar", _HAS_DUPLICATE_KEY: False},
@@ -94,11 +96,25 @@ def test__detect_duplicates():
94
96
  ]
95
97
 
96
98
 
99
+ def test__remove_duplicates():
100
+ objects = [
101
+ {"QualifiedApiName": "Foo"},
102
+ {"QualifiedApiName": "Bar"},
103
+ {"QualifiedApiName": "Foo"},
104
+ ]
105
+
106
+ objects = _remove_duplicates(objects)
107
+ assert len(objects) == 2
108
+ names = {sobject["QualifiedApiName"] for sobject in objects}
109
+ assert names == {"Foo", "Bar"}
110
+
111
+
97
112
  def test_salesforce_formatter_tables():
98
113
  sobjects = [*_tables_sobjects()]
99
- tables = SalesforceFormatter.tables(sobjects)
114
+ tables = [t for t in SalesforceFormatter.tables(sobjects)]
100
115
  expected_names = {"a (a_one)", "a (a_two)", "b", "c"}
101
116
  payload_names = {t["table_name"] for t in tables}
117
+ assert len(tables) == 4 # we only keep one "b"
102
118
  assert payload_names == expected_names
103
119
 
104
120
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: castor-extractor
3
- Version: 0.22.0
3
+ Version: 0.22.5
4
4
  Summary: Extract your metadata assets.
5
5
  Home-page: https://www.castordoc.com/
6
6
  License: EULA
@@ -19,6 +19,7 @@ Provides-Extra: bigquery
19
19
  Provides-Extra: databricks
20
20
  Provides-Extra: dbt
21
21
  Provides-Extra: looker
22
+ Provides-Extra: lookerstudio
22
23
  Provides-Extra: metabase
23
24
  Provides-Extra: mysql
24
25
  Provides-Extra: postgres
@@ -31,6 +32,7 @@ Provides-Extra: tableau
31
32
  Requires-Dist: cryptography (>=43.0.0,<44.0.0) ; extra == "snowflake"
32
33
  Requires-Dist: databricks-sql-connector (>=3.2.0,<4.0.0) ; extra == "databricks" or extra == "all"
33
34
  Requires-Dist: google-api-core (>=2.1.1,<3.0.0)
35
+ Requires-Dist: google-api-python-client (>=2.121.0,<3.0.0) ; extra == "lookerstudio" or extra == "all"
34
36
  Requires-Dist: google-auth (>=2,<3)
35
37
  Requires-Dist: google-cloud-core (>=2.1.0,<3.0.0)
36
38
  Requires-Dist: google-cloud-storage (>=2,<3)
@@ -205,6 +207,26 @@ For any questions or bug report, contact us at [support@castordoc.com](mailto:su
205
207
 
206
208
  # Changelog
207
209
 
210
+ ## 0.22.5 - 2025-01-09
211
+
212
+ * Databricks: validate and deduplicate lineage links
213
+
214
+ ## 0.22.4 - 2025-01-08
215
+
216
+ * ThoughtSpot: extract answers
217
+
218
+ ## 0.22.3 - 2024-12-10
219
+
220
+ * Databricks: extract lineage from system tables
221
+
222
+ ## 0.22.2 - 2024-12-06
223
+
224
+ * Sigma: multithreading to retrieve lineage
225
+
226
+ ## 0.22.1 - 2024-12-05
227
+
228
+ * Salesforce: deduplicate tables
229
+
208
230
  ## 0.22.0 - 2024-12-04
209
231
 
210
232
  * Stop supporting python3.8
@@ -1,4 +1,4 @@
1
- CHANGELOG.md,sha256=hLifRdD-7Mm2l8gfTHM6y37ld5FLwhMAsmm8FVVQdks,15000
1
+ CHANGELOG.md,sha256=JzTJEZxIMP9F_aePVfIvqLt0OuG0jYcDygsLyfTAV84,15335
2
2
  Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
3
3
  DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
4
4
  LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
@@ -76,7 +76,7 @@ castor_extractor/uploader/settings.py,sha256=3MvOX-UFRqrLZoiT7wYn9jUGro7NX4RCafY
76
76
  castor_extractor/uploader/upload.py,sha256=PSQfkO_7LSE0WBo9Tm_hlS2ONepKeB0cBFdJXySnues,4310
77
77
  castor_extractor/uploader/upload_test.py,sha256=7fwstdQe7FjuwGilsCdFpEQr1qLoR2WTRUzyy93fISw,402
78
78
  castor_extractor/uploader/utils.py,sha256=otAaySj5aeem6f0CTd0Te6ioJ6uP2J1p348j-SdIwDI,802
79
- castor_extractor/utils/__init__.py,sha256=jyYquzC2-R-UYl3VTP49ZDHB0IErGogTPMy3GfScbaA,1524
79
+ castor_extractor/utils/__init__.py,sha256=X7WOOgrpGf7Vh8r-7eNGjuC0rKs0g9GTO3d7hZ18gwo,1550
80
80
  castor_extractor/utils/argument_parser.py,sha256=S4EcIh3wNDjs3fOrQnttCcPsAmG8m_Txl7xvEh0Q37s,283
81
81
  castor_extractor/utils/argument_parser_test.py,sha256=wnyLFJ74iEiPxxLSbwFtckR7FIHxsFOVU38ljs9gqRA,633
82
82
  castor_extractor/utils/client/__init__.py,sha256=h5gm8UNNCCkAqhjYK5f6BY7k0cHFOyAvkmlktqwpir0,392
@@ -135,8 +135,8 @@ castor_extractor/utils/salesforce/pagination.py,sha256=wJq0rKLdacFRggyHwB6Fh3K6i
135
135
  castor_extractor/utils/store.py,sha256=hnyrFwCsL48e9QrsBns-n8FospujZrkUy1P2YHAh_C0,2067
136
136
  castor_extractor/utils/string.py,sha256=IQqNum7CJwuSvDGPbTAmz46YwtYDYgJKeXY7iixdjI4,2370
137
137
  castor_extractor/utils/string_test.py,sha256=u3P2tAPhyfCLvD19rH_JcpHhPuWTHUdg0z_N_-Kxwno,2501
138
- castor_extractor/utils/time.py,sha256=Mv-wTbh1uONXNEd09nb_B8wB5mP8DjDUfPg0S3xmo9Y,1619
139
- castor_extractor/utils/time_test.py,sha256=pEwpcHI7wGPnfgwrH1DNHEbPz3HEAryNF5yPL7Dqkp8,448
138
+ castor_extractor/utils/time.py,sha256=jmP1QWg4lv21Jp_Oy71lfJ47hjNOSgHiBOFf964RMPU,1732
139
+ castor_extractor/utils/time_test.py,sha256=pH8DSosNlwDYZXZNNjYDcL0WbmZc_c212LEEn88Oqew,647
140
140
  castor_extractor/utils/type.py,sha256=Sd8JlEgbGkBUZnRqCUDtREeBkOMTXtlNMyCph90_J0Q,328
141
141
  castor_extractor/utils/validation.py,sha256=kQAFtqt3gfy7YqYQ0u-60vyNYUF_96he5QDVUQnZmDo,1896
142
142
  castor_extractor/utils/validation_test.py,sha256=aSetitOCkH_K-Wto9ISOVGso5jGfTUOBLm3AZnvavO8,1181
@@ -168,6 +168,17 @@ castor_extractor/visualization/looker/extract.py,sha256=O_hzRftww3Cw1cgijL-K-8gh
168
168
  castor_extractor/visualization/looker/fields.py,sha256=7oC7p-3Wp7XHBP_FT_D1wH3kINFRnc_qGVeH1a4UNZY,623
169
169
  castor_extractor/visualization/looker/fields_test.py,sha256=7Cwq8Qky6aTZg8nCHp1gmPJtd9pGNB4QeMIRRWdHo5w,782
170
170
  castor_extractor/visualization/looker/multithreading.py,sha256=Muuh3usBLqtv3sfHoyPYJ6jJ7V5ajR6N9ZJ_F-bNc60,2608
171
+ castor_extractor/visualization/looker_studio/__init__.py,sha256=p3mTWz7Yk1_m9vYohxCqwxnuE7SUYbU--TH2ezhf734,142
172
+ castor_extractor/visualization/looker_studio/assets.py,sha256=_ir4L2RTmGDb1WetAm6-EZ6W4tPXxi0kNppNBlmy9QE,135
173
+ castor_extractor/visualization/looker_studio/client/__init__.py,sha256=YkQaVDJa-7KSwdOLjtgKJMRiafbGNKC_46YVx0hYZ1Q,129
174
+ castor_extractor/visualization/looker_studio/client/admin_sdk_client.py,sha256=hYKdU6TlWKkXx07r6HsZ4Wbxhasx8DP_jO6iDCjHjgk,3508
175
+ castor_extractor/visualization/looker_studio/client/client.py,sha256=AYdR46NOdn_ITK_wPAASROW0gJjx-iA0Gi43QeuU5BU,1302
176
+ castor_extractor/visualization/looker_studio/client/credentials.py,sha256=yzTaiJQ5cArTnbybUPF6fZZXbX9XQ0SBq-jVI2ECovA,521
177
+ castor_extractor/visualization/looker_studio/client/endpoints.py,sha256=5eY-ffqNDdlDBOOpiF7LpjyHMrzeClJktidCr1pTDUs,669
178
+ castor_extractor/visualization/looker_studio/client/enums.py,sha256=fHgemTaQpnwee8cw1YQVDsVnH--vTyFwT4Px8aVYYHQ,167
179
+ castor_extractor/visualization/looker_studio/client/looker_studio_api_client.py,sha256=Oqu_bGBEqYRR_aitBFyvfCZnx0kSZf4qGEI16tIRnhw,3482
180
+ castor_extractor/visualization/looker_studio/client/pagination.py,sha256=9HQ3Rkdiz2VB6AvYtZ0F-WouiD0pMmdZyAmkv-3wh08,783
181
+ castor_extractor/visualization/looker_studio/client/scopes.py,sha256=824cqqgZuGq4L-rPNoHJe0ibXsxkRwB0CLG_kqw9Q0g,256
171
182
  castor_extractor/visualization/metabase/__init__.py,sha256=3E36cmkMyEgBB6Ot5rWk-N75i0G-7k24QTlc-Iol4pM,193
172
183
  castor_extractor/visualization/metabase/assets.py,sha256=nu3FwQBU_hdS2DBvgXAwQlEEi76QiNK2tMKEtMyctaY,2874
173
184
  castor_extractor/visualization/metabase/client/__init__.py,sha256=KBvaPMofBRV3m_sZAnKNCrJGr-Z88EbpdzEzWPQ_uBk,99
@@ -241,7 +252,7 @@ castor_extractor/visualization/salesforce_reporting/extract.py,sha256=ScStilebLG
241
252
  castor_extractor/visualization/sigma/__init__.py,sha256=GINql4yJLtjfOJgjHaWNpE13cMtnKNytiFRomwav27Q,114
242
253
  castor_extractor/visualization/sigma/assets.py,sha256=JZ1Cpxnml8P3mIJoTUM57hvylB18ErECQXaP5FF63O4,268
243
254
  castor_extractor/visualization/sigma/client/__init__.py,sha256=YQv06FBBQHvBMFg_tN0nUcmUp2NCL2s-eFTXG8rXaBg,74
244
- castor_extractor/visualization/sigma/client/client.py,sha256=nT61lN2yRpKd6jeqwR0NVOAUVpA5KAQyHkEGTl7n00A,6283
255
+ castor_extractor/visualization/sigma/client/client.py,sha256=d9CpE7vRZAPGzck0jFn37LY_6E_Njz9D1sCnFVGJSWk,8006
245
256
  castor_extractor/visualization/sigma/client/credentials.py,sha256=XddAuQSmCKpxJ70TQgRnOj0vMPYVtiStk_lMMQ1AiNM,693
246
257
  castor_extractor/visualization/sigma/client/endpoints.py,sha256=DBFphbgoH78_MZUGM_bKBAq28Nl7LWSZ6VRsbxrxtDg,1162
247
258
  castor_extractor/visualization/sigma/client/pagination.py,sha256=kNEhNq08tTGbypyMjxs0w4uvDtQc_iaWpOZweaa_FsU,690
@@ -295,13 +306,13 @@ castor_extractor/visualization/tableau_revamp/client/rest_fields.py,sha256=3kvaq
295
306
  castor_extractor/visualization/tableau_revamp/constants.py,sha256=lHGB50FgVNO2nXeIhkvQKivD8ZFBIjDrflgD5cTXKJw,104
296
307
  castor_extractor/visualization/tableau_revamp/extract.py,sha256=HqnBypuNGx_xKk-68WEOy_ucD15LuRF4t2xXf0XKPE0,1370
297
308
  castor_extractor/visualization/thoughtspot/__init__.py,sha256=NhTGUk5Kdt54oCjHYoAt0cLBmVLys5lFYiRANL6wCmI,150
298
- castor_extractor/visualization/thoughtspot/assets.py,sha256=lPRvXk0PKybgLv1AcDVxg-ssf4XLTs0biRqLrqC2TzU,196
309
+ castor_extractor/visualization/thoughtspot/assets.py,sha256=SAQWPKaD2NTSDg7-GSkcRSSEkKSws0MJfOVcHkdeTSg,276
299
310
  castor_extractor/visualization/thoughtspot/client/__init__.py,sha256=svrE2rMxR-OXctjPeAHMEPePlfcra-9KDevTMcHunAA,86
300
- castor_extractor/visualization/thoughtspot/client/client.py,sha256=RHOaJjvlWcSdASXzvlgMbmsSU9oTIixPhH8g0NgyIbc,3719
311
+ castor_extractor/visualization/thoughtspot/client/client.py,sha256=mtwMCPI1-1tyZb1gSYYr-O2QZMTFQwNgillU6ycsOU4,5552
301
312
  castor_extractor/visualization/thoughtspot/client/credentials.py,sha256=fp4YHiZy-dstWiLr5c4kFU9SyPK5rd2nCeh8k5sVRpM,462
302
313
  castor_extractor/visualization/thoughtspot/client/endpoints.py,sha256=u3FRkmG6j5OIMEeXWZcgRObP8JeC4EutIJEeitNV44c,330
303
- castor_extractor/visualization/thoughtspot/client/utils.py,sha256=ua7-10HKpFHYRDBVGLJ5hIEfuUA7ryIH9tl0sBjl0MU,883
304
- castor_extractor/visualization/thoughtspot/client/utils_test.py,sha256=-5ZaEYpQSrIp1-Sx-ViQOLPlv2LoOajEs2mE5YNi_tU,1887
314
+ castor_extractor/visualization/thoughtspot/client/utils.py,sha256=3LgbIWoG1e39VW8rYaV4ot_0EFipziwf3rFAZKxrlEY,1072
315
+ castor_extractor/visualization/thoughtspot/client/utils_test.py,sha256=2XysRU7a58KA2JgNwU2j4GPrN0rkN7Gvk8kQCJlYXVk,2469
305
316
  castor_extractor/visualization/thoughtspot/extract.py,sha256=mcXS0jGFpa50td98AVbbTqxchyI5wDCpB-v1o5iRc3g,1354
306
317
  castor_extractor/warehouse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
307
318
  castor_extractor/warehouse/abstract/__init__.py,sha256=Fdfa026tgOo64MvzVRLHM_F2G-JmcehrF0mh3dHgb7s,419
@@ -329,21 +340,21 @@ castor_extractor/warehouse/bigquery/queries/view_ddl.sql,sha256=obCm-IN9V8_YSZTw
329
340
  castor_extractor/warehouse/bigquery/query.py,sha256=FEekxlkrfAXzsT8Kj1AIqYd5mURB5MlZIkbFVXVqEhU,4762
330
341
  castor_extractor/warehouse/bigquery/types.py,sha256=rfKkKA13Et7TM4I0uVaXkLfuaBXkv51bNTp4AO0QSdw,57
331
342
  castor_extractor/warehouse/databricks/__init__.py,sha256=YG3YSIJgCFRjjI8eExy9T7qGnfnjWhMFh8c15KTs_BA,184
332
- castor_extractor/warehouse/databricks/api_client.py,sha256=1E3t8uCi3b8xVXLCodwlH5y8FIGmu9otORvA7ZqcGKE,8283
343
+ castor_extractor/warehouse/databricks/api_client.py,sha256=kLcUGSgrfybZUrpt0tE7qe2OoSSN7IK4myyB7c0czOY,6260
333
344
  castor_extractor/warehouse/databricks/api_client_test.py,sha256=YTWC-X7L-XAfK5b39TUgTmR1ifv0QrY5tvLNoSbpmjg,466
334
- castor_extractor/warehouse/databricks/client.py,sha256=K3RafGL_UerFAGmRKK2Cp2IXzalQYqkneQFvgsYdOZY,4993
335
- castor_extractor/warehouse/databricks/client_test.py,sha256=UKr_D3M8mhqV1oL2_3y_6pEzAFLVE3FHDNZh4omFLK4,2286
336
- castor_extractor/warehouse/databricks/credentials.py,sha256=iphbVynVTQXMEbJy4QaT5fer-GpOi7QtbAlg8R7-Lj4,598
345
+ castor_extractor/warehouse/databricks/client.py,sha256=H6vcKfos7op5AKSQF9qduG4afx-GZgBdyGE7waS6__o,3292
346
+ castor_extractor/warehouse/databricks/client_test.py,sha256=hOuSPh45z6m9T1hjuqpOayby_q8bYdJVdq5qiwkiXrg,1370
347
+ castor_extractor/warehouse/databricks/credentials.py,sha256=ExtVcl2NpMXTx1Lg8vHQdzQtSEm2aqpg3D1BJrNAUjI,528
337
348
  castor_extractor/warehouse/databricks/endpoints.py,sha256=qPoL9CtPFJdwVuW9rJ37nmeMd-nChOBouEVYb4SlaUE,670
338
- castor_extractor/warehouse/databricks/extract.py,sha256=G_-78-vrvEyn8rcKXXDXlxjad4Ot-Ko4vnhvEcOzjJQ,7389
349
+ castor_extractor/warehouse/databricks/enums.py,sha256=3T6BbVvbWvfWkD23krsYT1x0kKh1qRzNPl6WpcXe300,274
350
+ castor_extractor/warehouse/databricks/extract.py,sha256=Z4VTEIf0QMiua0QGAlJdQ86kxmGAXekQ304aCKme6IY,7358
339
351
  castor_extractor/warehouse/databricks/format.py,sha256=FUBMrFFWSa_lX5PtixJCDR3eRYycqeMw0oKHt7AkA4o,6732
340
352
  castor_extractor/warehouse/databricks/format_test.py,sha256=ls0IcOElqp_qecAzNbK0zdca7Pms4seCHimbw8NAoAI,3322
341
- castor_extractor/warehouse/databricks/lineage.py,sha256=RUCcKz19R0dJVab6JUSUbGx4L5Vyb4sVoTAwLbfgjxo,4700
342
- castor_extractor/warehouse/databricks/lineage_test.py,sha256=EejO4qKH_kJlJSrIap6GvkUi9E55RFvfiySKazAh0_A,1048
353
+ castor_extractor/warehouse/databricks/lineage.py,sha256=jwiRXrgqBAtzQt5EgErYrN8YRyviEEHmyrSbw8TSPq4,2105
354
+ castor_extractor/warehouse/databricks/lineage_test.py,sha256=PyBn1eAoxLm4Bz5M0F4zmaxFX2mXRTM_uug5OKbQPQs,2684
343
355
  castor_extractor/warehouse/databricks/pagination.py,sha256=sM1G0sN1pf1TPpI0Y3Oew378UGEKVkMRc2Mlu9tDjLo,545
344
- castor_extractor/warehouse/databricks/sql_client.py,sha256=KBP0rmMQBWw3jshDfv_NpFW8HqPxGfcBkS4d9T9aXvE,2977
345
- castor_extractor/warehouse/databricks/test_constants.py,sha256=Hm96yq_ltVAKv7WYhYz637r4Cuj-1cCdyOuxMEe3J-Q,2246
346
- castor_extractor/warehouse/databricks/types.py,sha256=-qO5y-uI95B666iDhyNM0TL8WlwYC-3Q4xZuolh3PwE,205
356
+ castor_extractor/warehouse/databricks/sql_client.py,sha256=5isGsRL0MW1lu_E_xTyCvSj_rwaJ2nh-kPlhvTvDy_w,3566
357
+ castor_extractor/warehouse/databricks/types.py,sha256=-TFX4jS6_c3wQLOpJTKpLeGS21YIPjKDjISnzeUPdCc,46
347
358
  castor_extractor/warehouse/databricks/utils.py,sha256=5CKn6Me1Tus97H_qDEz_5tkhd4ARmwk2qiC3GndjyCc,1969
348
359
  castor_extractor/warehouse/databricks/utils_test.py,sha256=_guTuzRWRTZdDY7ils0X1K8jhI9T877MEtw3x_YDg9I,2415
349
360
  castor_extractor/warehouse/mysql/__init__.py,sha256=2KFDogo9GNbApHqw3Vm5t_uNmIRjdp76nmP_WQQMfQY,116
@@ -390,8 +401,8 @@ castor_extractor/warehouse/salesforce/__init__.py,sha256=NR4aNea5jeE1xYqeZ_29dee
390
401
  castor_extractor/warehouse/salesforce/client.py,sha256=067ZyccmIYoY6VwLTSneefOJqUpobtnoEzxJMY2oSPs,3268
391
402
  castor_extractor/warehouse/salesforce/constants.py,sha256=GusduVBCPvwpk_Im6F3bDvXeNQ7hRnCMdIAjIg65RnE,52
392
403
  castor_extractor/warehouse/salesforce/extract.py,sha256=BUQ1ZxGGSq9wWCJfRbKIzIBBeth_YXg8YSV72lbz2lc,3417
393
- castor_extractor/warehouse/salesforce/format.py,sha256=TUQrxkVEbgs9GDXDI4gsR8LqRmwmVs9Xs-Q5R36ibuQ,3385
394
- castor_extractor/warehouse/salesforce/format_test.py,sha256=z1Jwo5W74YfBHw_e_DFAJTTCN2ltbdPI294dj4LoGcg,3228
404
+ castor_extractor/warehouse/salesforce/format.py,sha256=M5uGA8aURL_Nt27T8R2tDfbU5ZUM3ECG4fGalEkWkYA,3688
405
+ castor_extractor/warehouse/salesforce/format_test.py,sha256=puTL-Co84jE2SQzKFKGLYU9rey4Ja_Ox8xiKy4iOjeo,3780
395
406
  castor_extractor/warehouse/salesforce/pagination.py,sha256=m1S9JRNf6Oe-6dDghYUY5wwTzGzKW5H9pE60PCXMha0,920
396
407
  castor_extractor/warehouse/salesforce/soql.py,sha256=XB8ohKwHFfC4Xger7Y84DXLW17IJDye_bZ3FL6DCcOI,1188
397
408
  castor_extractor/warehouse/snowflake/__init__.py,sha256=TEGXTyxWp4Tr9gIHb-UFVTRKj6YWmrRtqHruiKSZGiY,174
@@ -425,8 +436,8 @@ castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=kbBQP-TdG5px1IVgyx
425
436
  castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
426
437
  castor_extractor/warehouse/sqlserver/query.py,sha256=g0hPT-RmeGi2DyenAi3o72cTlQsLToXIFYojqc8E5fQ,533
427
438
  castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
428
- castor_extractor-0.22.0.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
429
- castor_extractor-0.22.0.dist-info/METADATA,sha256=qDZUEOjIUsdzWwEQI5MLVA7wTdqgX3LeCcFeP11Zb-4,21885
430
- castor_extractor-0.22.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
431
- castor_extractor-0.22.0.dist-info/entry_points.txt,sha256=7aVSxc-_2dicp28Ow-S4y0p4wGoTm9zGmVptMvfLdw8,1649
432
- castor_extractor-0.22.0.dist-info/RECORD,,
439
+ castor_extractor-0.22.5.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
440
+ castor_extractor-0.22.5.dist-info/METADATA,sha256=11A9xI9Bd6Uu1Na_AJngfTbkt-ECXjsabWNTppaZsOk,22352
441
+ castor_extractor-0.22.5.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
442
+ castor_extractor-0.22.5.dist-info/entry_points.txt,sha256=7aVSxc-_2dicp28Ow-S4y0p4wGoTm9zGmVptMvfLdw8,1649
443
+ castor_extractor-0.22.5.dist-info/RECORD,,
@@ -1,79 +0,0 @@
1
- OLDER_DATE = "2024-04-18 20:20:20.0"
2
- CLOSER_DATE = "2024-04-19 20:20:20.0"
3
-
4
- MOCK_TABLES_FOR_TABLE_LINEAGE = [
5
- {
6
- "id": "f51ba2ca-8cc3-4de6-8f8b-730359e8f40f",
7
- "schema_id": "dev.silver",
8
- "table_name": "analytics",
9
- },
10
- {
11
- "id": "4e140bdc-a67c-4b68-8a07-c684657d8b44",
12
- "schema_id": "dev.silver",
13
- "table_name": "pre_analytics",
14
- },
15
- {
16
- "id": "7d403198-55ea-4a40-9995-6ee2f4c79dfa",
17
- "schema_id": "dev.bronze",
18
- "table_name": "analytics",
19
- },
20
- ]
21
-
22
- _RAW_LINEAGE_DEV_SILVER_ANALYTICS = {
23
- "upstreams": [
24
- { # there could be other keys: jobInfos, notebookInfos, queryInfos
25
- "tableInfo": {
26
- "name": "pre_analytics",
27
- "catalog_name": "dev",
28
- "schema_name": "silver",
29
- "table_type": "PERSISTED_VIEW", # not used
30
- "lineage_timestamp": OLDER_DATE,
31
- }
32
- },
33
- {
34
- "tableInfo": {
35
- "name": "analytics",
36
- "catalog_name": "dev",
37
- "schema_name": "bronze",
38
- "table_type": "PERSISTED_VIEW", # not used
39
- "lineage_timestamp": CLOSER_DATE,
40
- }
41
- },
42
- ],
43
- "downstreams": [],
44
- }
45
- _RAW_LINEAGE_DEV_SILVER_PRE_ANALYTICS = {
46
- "upstreams": [],
47
- "downstreams": [
48
- {
49
- "tableInfo": {
50
- "name": "analytics",
51
- "catalog_name": "dev",
52
- "schema_name": "silver",
53
- "table_type": "PERSISTED_VIEW", # not used
54
- "lineage_timestamp": OLDER_DATE,
55
- }
56
- },
57
- ],
58
- }
59
- _RAW_LINEAGE_DEV_BRONZE_ANALYTICS = {
60
- "upstreams": [],
61
- "downstreams": [
62
- {
63
- "tableInfo": {
64
- "name": "analytics",
65
- "catalog_name": "dev",
66
- "schema_name": "silver",
67
- "table_type": "PERSISTED_VIEW", # not used
68
- "lineage_timestamp": OLDER_DATE,
69
- }
70
- },
71
- ],
72
- }
73
-
74
- # should be in the same order as MOCK_TABLES_FOR_TABLE_LINEAGE
75
- TABLE_LINEAGE_SIDE_EFFECT: tuple = (
76
- _RAW_LINEAGE_DEV_SILVER_ANALYTICS,
77
- _RAW_LINEAGE_DEV_SILVER_PRE_ANALYTICS,
78
- _RAW_LINEAGE_DEV_BRONZE_ANALYTICS,
79
- )