castor-extractor 0.22.0__py3-none-any.whl → 0.22.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of castor-extractor might be problematic. Click here for more details.
- CHANGELOG.md +20 -0
- castor_extractor/utils/__init__.py +1 -0
- castor_extractor/utils/time.py +4 -0
- castor_extractor/utils/time_test.py +8 -1
- castor_extractor/visualization/looker_studio/__init__.py +6 -0
- castor_extractor/visualization/looker_studio/assets.py +6 -0
- castor_extractor/visualization/looker_studio/client/__init__.py +3 -0
- castor_extractor/visualization/looker_studio/client/admin_sdk_client.py +90 -0
- castor_extractor/visualization/looker_studio/client/client.py +37 -0
- castor_extractor/visualization/looker_studio/client/credentials.py +20 -0
- castor_extractor/visualization/looker_studio/client/endpoints.py +18 -0
- castor_extractor/visualization/looker_studio/client/enums.py +8 -0
- castor_extractor/visualization/looker_studio/client/looker_studio_api_client.py +102 -0
- castor_extractor/visualization/looker_studio/client/pagination.py +31 -0
- castor_extractor/visualization/looker_studio/client/scopes.py +6 -0
- castor_extractor/visualization/sigma/client/client.py +64 -10
- castor_extractor/visualization/thoughtspot/assets.py +3 -1
- castor_extractor/visualization/thoughtspot/client/client.py +67 -14
- castor_extractor/visualization/thoughtspot/client/utils.py +10 -4
- castor_extractor/visualization/thoughtspot/client/utils_test.py +22 -4
- castor_extractor/warehouse/databricks/api_client.py +2 -60
- castor_extractor/warehouse/databricks/client.py +4 -47
- castor_extractor/warehouse/databricks/client_test.py +1 -35
- castor_extractor/warehouse/databricks/credentials.py +4 -6
- castor_extractor/warehouse/databricks/enums.py +15 -0
- castor_extractor/warehouse/databricks/extract.py +13 -11
- castor_extractor/warehouse/databricks/lineage.py +47 -119
- castor_extractor/warehouse/databricks/lineage_test.py +86 -31
- castor_extractor/warehouse/databricks/sql_client.py +23 -8
- castor_extractor/warehouse/databricks/types.py +0 -7
- castor_extractor/warehouse/salesforce/format.py +12 -5
- castor_extractor/warehouse/salesforce/format_test.py +22 -6
- {castor_extractor-0.22.0.dist-info → castor_extractor-0.22.5.dist-info}/METADATA +23 -1
- {castor_extractor-0.22.0.dist-info → castor_extractor-0.22.5.dist-info}/RECORD +37 -26
- castor_extractor/warehouse/databricks/test_constants.py +0 -79
- {castor_extractor-0.22.0.dist-info → castor_extractor-0.22.5.dist-info}/LICENCE +0 -0
- {castor_extractor-0.22.0.dist-info → castor_extractor-0.22.5.dist-info}/WHEEL +0 -0
- {castor_extractor-0.22.0.dist-info → castor_extractor-0.22.5.dist-info}/entry_points.txt +0 -0
|
@@ -1,34 +1,89 @@
|
|
|
1
|
-
from .
|
|
2
|
-
from .
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
1
|
+
from .enums import LineageEntity
|
|
2
|
+
from .lineage import LineageProcessor, valid_lineage
|
|
3
|
+
|
|
4
|
+
_OLDER_DATE = "2025-01-01 00:00:01.0"
|
|
5
|
+
_CLOSER_DATE = "2025-01-01 02:02:02.0"
|
|
6
|
+
|
|
7
|
+
_TABLE_LINEAGES = [
|
|
8
|
+
{
|
|
9
|
+
"source_table_full_name": "a.b.source",
|
|
10
|
+
"target_table_full_name": "a.b.target",
|
|
11
|
+
"event_time": _CLOSER_DATE,
|
|
12
|
+
"other": "more recent stuff",
|
|
13
|
+
},
|
|
14
|
+
{
|
|
15
|
+
"source_table_full_name": "a.b.source",
|
|
16
|
+
"target_table_full_name": "a.b.target",
|
|
17
|
+
"event_time": _OLDER_DATE,
|
|
18
|
+
"other": "stuff that's too old",
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
"source_table_full_name": "no target",
|
|
22
|
+
"target_table_full_name": None,
|
|
23
|
+
"event_time": _CLOSER_DATE,
|
|
24
|
+
},
|
|
25
|
+
{
|
|
26
|
+
"source_table_full_name": None,
|
|
27
|
+
"target_table_full_name": "no source",
|
|
28
|
+
"event_time": _CLOSER_DATE,
|
|
29
|
+
},
|
|
30
|
+
]
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
_COLUMN_LINEAGES = [
|
|
34
|
+
{
|
|
35
|
+
"source_table_full_name": "a.b.source",
|
|
36
|
+
"source_column_name": "src_col",
|
|
37
|
+
"target_table_full_name": "a.b.target",
|
|
38
|
+
"target_column_name": "trgt_col",
|
|
39
|
+
"event_time": _OLDER_DATE,
|
|
40
|
+
"other": "old stuff",
|
|
41
|
+
},
|
|
42
|
+
{
|
|
43
|
+
"source_table_full_name": "a.b.source",
|
|
44
|
+
"source_column_name": "src_col",
|
|
45
|
+
"target_table_full_name": "a.b.target",
|
|
46
|
+
"target_column_name": "trgt_col",
|
|
47
|
+
"event_time": _CLOSER_DATE,
|
|
48
|
+
"other": "newer stuff",
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
"source_table_full_name": "a.b.toto",
|
|
52
|
+
"source_column_name": "toto_col",
|
|
53
|
+
"target_table_full_name": "a.b.tata",
|
|
54
|
+
"target_column_name": "tata_col",
|
|
55
|
+
"event_time": _OLDER_DATE,
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
"source_table_full_name": "a.b.source",
|
|
59
|
+
"source_column_name": "a.b.source",
|
|
60
|
+
"target_table_full_name": None,
|
|
61
|
+
"target_column_name": None,
|
|
62
|
+
"event_time": _CLOSER_DATE,
|
|
63
|
+
},
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def test_valid_lineage():
|
|
68
|
+
table_links = valid_lineage(_TABLE_LINEAGES, LineageEntity.TABLE)
|
|
69
|
+
|
|
70
|
+
assert len(table_links) == 1
|
|
71
|
+
assert table_links[0]["source_table_full_name"] == "a.b.source"
|
|
72
|
+
assert table_links[0]["target_table_full_name"] == "a.b.target"
|
|
73
|
+
assert table_links[0]["event_time"] == _CLOSER_DATE
|
|
74
|
+
assert table_links[0]["other"] == "more recent stuff"
|
|
6
75
|
|
|
7
76
|
|
|
8
77
|
def test_LineageLinks_add():
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
assert
|
|
16
|
-
assert
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
assert expected_key in links.lineage
|
|
22
|
-
assert links.lineage[expected_key] == OLDER_DATE
|
|
23
|
-
|
|
24
|
-
# we update with the more recent timestamp
|
|
25
|
-
timestamped_link = ("parent", "child", CLOSER_DATE)
|
|
26
|
-
links.add(timestamped_link)
|
|
27
|
-
assert expected_key in links.lineage
|
|
28
|
-
assert links.lineage[expected_key] == CLOSER_DATE
|
|
29
|
-
|
|
30
|
-
# we keep the more recent timestamp
|
|
31
|
-
timestamped_link = ("parent", "child", OLDER_DATE)
|
|
32
|
-
links.add(timestamped_link)
|
|
33
|
-
assert expected_key in links.lineage
|
|
34
|
-
assert links.lineage[expected_key] == CLOSER_DATE
|
|
78
|
+
deduplicated_lineage = LineageProcessor(LineageEntity.COLUMN)
|
|
79
|
+
for link in _COLUMN_LINEAGES:
|
|
80
|
+
deduplicated_lineage.add(link)
|
|
81
|
+
|
|
82
|
+
lineage = deduplicated_lineage.lineage
|
|
83
|
+
assert len(lineage) == 2
|
|
84
|
+
assert ("a.b.source.src_col", "a.b.target.trgt_col") in lineage
|
|
85
|
+
assert ("a.b.toto.toto_col", "a.b.tata.tata_col") in lineage
|
|
86
|
+
assert (
|
|
87
|
+
lineage[("a.b.source.src_col", "a.b.target.trgt_col")]["other"]
|
|
88
|
+
== "newer stuff"
|
|
89
|
+
)
|
|
@@ -1,24 +1,24 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
from collections import defaultdict
|
|
3
|
-
from
|
|
3
|
+
from datetime import date
|
|
4
4
|
from typing import Optional
|
|
5
5
|
|
|
6
6
|
from databricks import sql # type: ignore
|
|
7
7
|
|
|
8
8
|
from .credentials import DatabricksCredentials
|
|
9
|
+
from .enums import LineageEntity, TagEntity
|
|
9
10
|
from .format import TagMapping
|
|
11
|
+
from .lineage import valid_lineage
|
|
10
12
|
from .utils import build_path, tag_label
|
|
11
13
|
|
|
12
14
|
logger = logging.getLogger(__name__)
|
|
13
15
|
|
|
14
16
|
_INFORMATION_SCHEMA_SQL = "SELECT * FROM system.information_schema"
|
|
15
17
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
COLUMN = "COLUMN"
|
|
21
|
-
TABLE = "TABLE"
|
|
18
|
+
_LINEAGE_SQL_TPL = """
|
|
19
|
+
SELECT * FROM system.access.{table_name}
|
|
20
|
+
WHERE event_date = :day
|
|
21
|
+
"""
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
class DatabricksSQLClient:
|
|
@@ -71,7 +71,6 @@ class DatabricksSQLClient:
|
|
|
71
71
|
https://docs.databricks.com/en/sql/language-manual/information-schema/column_tags.html
|
|
72
72
|
"""
|
|
73
73
|
if not self._needs_extraction(entity):
|
|
74
|
-
# extracting tags require additional credentials (http_path)
|
|
75
74
|
return dict()
|
|
76
75
|
|
|
77
76
|
table = f"{entity.value.lower()}_tags"
|
|
@@ -88,3 +87,19 @@ class DatabricksSQLClient:
|
|
|
88
87
|
mapping[path].append(label)
|
|
89
88
|
|
|
90
89
|
return mapping
|
|
90
|
+
|
|
91
|
+
def get_lineage(
|
|
92
|
+
self, lineage_entity: LineageEntity, day: date
|
|
93
|
+
) -> list[dict]:
|
|
94
|
+
"""
|
|
95
|
+
Fetch {TABLE|COLUMN} lineage of the given day, via system tables
|
|
96
|
+
https://docs.databricks.com/en/admin/system-tables/lineage.html
|
|
97
|
+
"""
|
|
98
|
+
table_name = f"{lineage_entity.value.lower()}_lineage"
|
|
99
|
+
query = _LINEAGE_SQL_TPL.format(table_name=table_name)
|
|
100
|
+
params = {"day": day}
|
|
101
|
+
result = self.execute_sql(query, params)
|
|
102
|
+
data = []
|
|
103
|
+
for row in result:
|
|
104
|
+
data.append(row.asDict())
|
|
105
|
+
return valid_lineage(data, lineage_entity)
|
|
@@ -4,7 +4,7 @@ from typing import Any
|
|
|
4
4
|
from ...utils import group_by
|
|
5
5
|
from .constants import SCHEMA_NAME
|
|
6
6
|
|
|
7
|
-
_HAS_DUPLICATE_KEY = "#
|
|
7
|
+
_HAS_DUPLICATE_KEY = "#has_duplicate_label"
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
def _clean(raw: str) -> str:
|
|
@@ -70,9 +70,15 @@ def _to_table_payload(sobject: dict) -> dict:
|
|
|
70
70
|
}
|
|
71
71
|
|
|
72
72
|
|
|
73
|
-
def
|
|
73
|
+
def _remove_duplicates(sobjects: list[dict]) -> list[dict]:
|
|
74
|
+
"""only keep one object per QualifiedApiName"""
|
|
75
|
+
by_name = group_by("QualifiedApiName", sobjects)
|
|
76
|
+
return [objects[0] for _, objects in by_name.items()]
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _detect_duplicate_labels(sobjects: list[dict]) -> list[dict]:
|
|
74
80
|
"""
|
|
75
|
-
enrich the given data with "
|
|
81
|
+
enrich the given data with "has_duplicate_label" flag:
|
|
76
82
|
- True when another asset has the same Label in the list
|
|
77
83
|
- False otherwise
|
|
78
84
|
"""
|
|
@@ -94,7 +100,8 @@ class SalesforceFormatter:
|
|
|
94
100
|
"""
|
|
95
101
|
formats the raw list of sobjects to tables
|
|
96
102
|
"""
|
|
97
|
-
sobjects =
|
|
103
|
+
sobjects = _remove_duplicates(sobjects)
|
|
104
|
+
sobjects = _detect_duplicate_labels(sobjects)
|
|
98
105
|
for sobject in sobjects:
|
|
99
106
|
yield _to_table_payload(sobject)
|
|
100
107
|
|
|
@@ -102,6 +109,6 @@ class SalesforceFormatter:
|
|
|
102
109
|
def columns(sobject_fields: dict[str, list[dict]]) -> Iterator[dict]:
|
|
103
110
|
"""formats the raw list of sobject fields to columns"""
|
|
104
111
|
for table_name, fields in sobject_fields.items():
|
|
105
|
-
fields =
|
|
112
|
+
fields = _detect_duplicate_labels(fields)
|
|
106
113
|
for index, field in enumerate(fields):
|
|
107
114
|
yield _to_column_payload(field, index, table_name)
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
from .format import (
|
|
2
2
|
_HAS_DUPLICATE_KEY,
|
|
3
3
|
SalesforceFormatter,
|
|
4
|
-
|
|
4
|
+
_detect_duplicate_labels,
|
|
5
5
|
_field_description,
|
|
6
6
|
_name,
|
|
7
|
+
_remove_duplicates,
|
|
7
8
|
)
|
|
8
9
|
|
|
9
10
|
|
|
@@ -11,9 +12,10 @@ def _tables_sobjects() -> tuple[dict[str, str], ...]:
|
|
|
11
12
|
"""Returns 4 sobjects with 2 sharing the same label"""
|
|
12
13
|
a = {"Label": "a", "QualifiedApiName": "a_one"}
|
|
13
14
|
b = {"Label": "b", "QualifiedApiName": "b"}
|
|
14
|
-
c = {"Label": "c", "QualifiedApiName": "
|
|
15
|
+
c = {"Label": "c", "QualifiedApiName": "c_unique_so_doesnt_matter"}
|
|
15
16
|
a_prime = {"Label": "a", "QualifiedApiName": "a_two"}
|
|
16
|
-
|
|
17
|
+
b_exact_duplicate = {"Label": "b", "QualifiedApiName": "b"}
|
|
18
|
+
return a, b, c, a_prime, b_exact_duplicate
|
|
17
19
|
|
|
18
20
|
|
|
19
21
|
def _columns_sobjects() -> dict[str, list[dict]]:
|
|
@@ -79,14 +81,14 @@ def test__name():
|
|
|
79
81
|
assert _name(empty_label_sobject) == "empty_label"
|
|
80
82
|
|
|
81
83
|
|
|
82
|
-
def
|
|
84
|
+
def test__detect_duplicate_labels():
|
|
83
85
|
objects = [
|
|
84
86
|
{"Label": "Foo"},
|
|
85
87
|
{"Label": "Bar"},
|
|
86
88
|
{"Label": "Foo"},
|
|
87
89
|
]
|
|
88
90
|
|
|
89
|
-
objects =
|
|
91
|
+
objects = _detect_duplicate_labels(objects)
|
|
90
92
|
assert objects == [
|
|
91
93
|
{"Label": "Foo", _HAS_DUPLICATE_KEY: True},
|
|
92
94
|
{"Label": "Bar", _HAS_DUPLICATE_KEY: False},
|
|
@@ -94,11 +96,25 @@ def test__detect_duplicates():
|
|
|
94
96
|
]
|
|
95
97
|
|
|
96
98
|
|
|
99
|
+
def test__remove_duplicates():
|
|
100
|
+
objects = [
|
|
101
|
+
{"QualifiedApiName": "Foo"},
|
|
102
|
+
{"QualifiedApiName": "Bar"},
|
|
103
|
+
{"QualifiedApiName": "Foo"},
|
|
104
|
+
]
|
|
105
|
+
|
|
106
|
+
objects = _remove_duplicates(objects)
|
|
107
|
+
assert len(objects) == 2
|
|
108
|
+
names = {sobject["QualifiedApiName"] for sobject in objects}
|
|
109
|
+
assert names == {"Foo", "Bar"}
|
|
110
|
+
|
|
111
|
+
|
|
97
112
|
def test_salesforce_formatter_tables():
|
|
98
113
|
sobjects = [*_tables_sobjects()]
|
|
99
|
-
tables = SalesforceFormatter.tables(sobjects)
|
|
114
|
+
tables = [t for t in SalesforceFormatter.tables(sobjects)]
|
|
100
115
|
expected_names = {"a (a_one)", "a (a_two)", "b", "c"}
|
|
101
116
|
payload_names = {t["table_name"] for t in tables}
|
|
117
|
+
assert len(tables) == 4 # we only keep one "b"
|
|
102
118
|
assert payload_names == expected_names
|
|
103
119
|
|
|
104
120
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: castor-extractor
|
|
3
|
-
Version: 0.22.
|
|
3
|
+
Version: 0.22.5
|
|
4
4
|
Summary: Extract your metadata assets.
|
|
5
5
|
Home-page: https://www.castordoc.com/
|
|
6
6
|
License: EULA
|
|
@@ -19,6 +19,7 @@ Provides-Extra: bigquery
|
|
|
19
19
|
Provides-Extra: databricks
|
|
20
20
|
Provides-Extra: dbt
|
|
21
21
|
Provides-Extra: looker
|
|
22
|
+
Provides-Extra: lookerstudio
|
|
22
23
|
Provides-Extra: metabase
|
|
23
24
|
Provides-Extra: mysql
|
|
24
25
|
Provides-Extra: postgres
|
|
@@ -31,6 +32,7 @@ Provides-Extra: tableau
|
|
|
31
32
|
Requires-Dist: cryptography (>=43.0.0,<44.0.0) ; extra == "snowflake"
|
|
32
33
|
Requires-Dist: databricks-sql-connector (>=3.2.0,<4.0.0) ; extra == "databricks" or extra == "all"
|
|
33
34
|
Requires-Dist: google-api-core (>=2.1.1,<3.0.0)
|
|
35
|
+
Requires-Dist: google-api-python-client (>=2.121.0,<3.0.0) ; extra == "lookerstudio" or extra == "all"
|
|
34
36
|
Requires-Dist: google-auth (>=2,<3)
|
|
35
37
|
Requires-Dist: google-cloud-core (>=2.1.0,<3.0.0)
|
|
36
38
|
Requires-Dist: google-cloud-storage (>=2,<3)
|
|
@@ -205,6 +207,26 @@ For any questions or bug report, contact us at [support@castordoc.com](mailto:su
|
|
|
205
207
|
|
|
206
208
|
# Changelog
|
|
207
209
|
|
|
210
|
+
## 0.22.5 - 2025-01-09
|
|
211
|
+
|
|
212
|
+
* Databricks: validate and deduplicate lineage links
|
|
213
|
+
|
|
214
|
+
## 0.22.4 - 2025-01-08
|
|
215
|
+
|
|
216
|
+
* ThoughtSpot: extract answers
|
|
217
|
+
|
|
218
|
+
## 0.22.3 - 2024-12-10
|
|
219
|
+
|
|
220
|
+
* Databricks: extract lineage from system tables
|
|
221
|
+
|
|
222
|
+
## 0.22.2 - 2024-12-06
|
|
223
|
+
|
|
224
|
+
* Sigma: multithreading to retrieve lineage
|
|
225
|
+
|
|
226
|
+
## 0.22.1 - 2024-12-05
|
|
227
|
+
|
|
228
|
+
* Salesforce: deduplicate tables
|
|
229
|
+
|
|
208
230
|
## 0.22.0 - 2024-12-04
|
|
209
231
|
|
|
210
232
|
* Stop supporting python3.8
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
CHANGELOG.md,sha256=
|
|
1
|
+
CHANGELOG.md,sha256=JzTJEZxIMP9F_aePVfIvqLt0OuG0jYcDygsLyfTAV84,15335
|
|
2
2
|
Dockerfile,sha256=xQ05-CFfGShT3oUqaiumaldwA288dj9Yb_pxofQpufg,301
|
|
3
3
|
DockerfileUsage.md,sha256=2hkJQF-5JuuzfPZ7IOxgM6QgIQW7l-9oRMFVwyXC4gE,998
|
|
4
4
|
LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
@@ -76,7 +76,7 @@ castor_extractor/uploader/settings.py,sha256=3MvOX-UFRqrLZoiT7wYn9jUGro7NX4RCafY
|
|
|
76
76
|
castor_extractor/uploader/upload.py,sha256=PSQfkO_7LSE0WBo9Tm_hlS2ONepKeB0cBFdJXySnues,4310
|
|
77
77
|
castor_extractor/uploader/upload_test.py,sha256=7fwstdQe7FjuwGilsCdFpEQr1qLoR2WTRUzyy93fISw,402
|
|
78
78
|
castor_extractor/uploader/utils.py,sha256=otAaySj5aeem6f0CTd0Te6ioJ6uP2J1p348j-SdIwDI,802
|
|
79
|
-
castor_extractor/utils/__init__.py,sha256=
|
|
79
|
+
castor_extractor/utils/__init__.py,sha256=X7WOOgrpGf7Vh8r-7eNGjuC0rKs0g9GTO3d7hZ18gwo,1550
|
|
80
80
|
castor_extractor/utils/argument_parser.py,sha256=S4EcIh3wNDjs3fOrQnttCcPsAmG8m_Txl7xvEh0Q37s,283
|
|
81
81
|
castor_extractor/utils/argument_parser_test.py,sha256=wnyLFJ74iEiPxxLSbwFtckR7FIHxsFOVU38ljs9gqRA,633
|
|
82
82
|
castor_extractor/utils/client/__init__.py,sha256=h5gm8UNNCCkAqhjYK5f6BY7k0cHFOyAvkmlktqwpir0,392
|
|
@@ -135,8 +135,8 @@ castor_extractor/utils/salesforce/pagination.py,sha256=wJq0rKLdacFRggyHwB6Fh3K6i
|
|
|
135
135
|
castor_extractor/utils/store.py,sha256=hnyrFwCsL48e9QrsBns-n8FospujZrkUy1P2YHAh_C0,2067
|
|
136
136
|
castor_extractor/utils/string.py,sha256=IQqNum7CJwuSvDGPbTAmz46YwtYDYgJKeXY7iixdjI4,2370
|
|
137
137
|
castor_extractor/utils/string_test.py,sha256=u3P2tAPhyfCLvD19rH_JcpHhPuWTHUdg0z_N_-Kxwno,2501
|
|
138
|
-
castor_extractor/utils/time.py,sha256=
|
|
139
|
-
castor_extractor/utils/time_test.py,sha256=
|
|
138
|
+
castor_extractor/utils/time.py,sha256=jmP1QWg4lv21Jp_Oy71lfJ47hjNOSgHiBOFf964RMPU,1732
|
|
139
|
+
castor_extractor/utils/time_test.py,sha256=pH8DSosNlwDYZXZNNjYDcL0WbmZc_c212LEEn88Oqew,647
|
|
140
140
|
castor_extractor/utils/type.py,sha256=Sd8JlEgbGkBUZnRqCUDtREeBkOMTXtlNMyCph90_J0Q,328
|
|
141
141
|
castor_extractor/utils/validation.py,sha256=kQAFtqt3gfy7YqYQ0u-60vyNYUF_96he5QDVUQnZmDo,1896
|
|
142
142
|
castor_extractor/utils/validation_test.py,sha256=aSetitOCkH_K-Wto9ISOVGso5jGfTUOBLm3AZnvavO8,1181
|
|
@@ -168,6 +168,17 @@ castor_extractor/visualization/looker/extract.py,sha256=O_hzRftww3Cw1cgijL-K-8gh
|
|
|
168
168
|
castor_extractor/visualization/looker/fields.py,sha256=7oC7p-3Wp7XHBP_FT_D1wH3kINFRnc_qGVeH1a4UNZY,623
|
|
169
169
|
castor_extractor/visualization/looker/fields_test.py,sha256=7Cwq8Qky6aTZg8nCHp1gmPJtd9pGNB4QeMIRRWdHo5w,782
|
|
170
170
|
castor_extractor/visualization/looker/multithreading.py,sha256=Muuh3usBLqtv3sfHoyPYJ6jJ7V5ajR6N9ZJ_F-bNc60,2608
|
|
171
|
+
castor_extractor/visualization/looker_studio/__init__.py,sha256=p3mTWz7Yk1_m9vYohxCqwxnuE7SUYbU--TH2ezhf734,142
|
|
172
|
+
castor_extractor/visualization/looker_studio/assets.py,sha256=_ir4L2RTmGDb1WetAm6-EZ6W4tPXxi0kNppNBlmy9QE,135
|
|
173
|
+
castor_extractor/visualization/looker_studio/client/__init__.py,sha256=YkQaVDJa-7KSwdOLjtgKJMRiafbGNKC_46YVx0hYZ1Q,129
|
|
174
|
+
castor_extractor/visualization/looker_studio/client/admin_sdk_client.py,sha256=hYKdU6TlWKkXx07r6HsZ4Wbxhasx8DP_jO6iDCjHjgk,3508
|
|
175
|
+
castor_extractor/visualization/looker_studio/client/client.py,sha256=AYdR46NOdn_ITK_wPAASROW0gJjx-iA0Gi43QeuU5BU,1302
|
|
176
|
+
castor_extractor/visualization/looker_studio/client/credentials.py,sha256=yzTaiJQ5cArTnbybUPF6fZZXbX9XQ0SBq-jVI2ECovA,521
|
|
177
|
+
castor_extractor/visualization/looker_studio/client/endpoints.py,sha256=5eY-ffqNDdlDBOOpiF7LpjyHMrzeClJktidCr1pTDUs,669
|
|
178
|
+
castor_extractor/visualization/looker_studio/client/enums.py,sha256=fHgemTaQpnwee8cw1YQVDsVnH--vTyFwT4Px8aVYYHQ,167
|
|
179
|
+
castor_extractor/visualization/looker_studio/client/looker_studio_api_client.py,sha256=Oqu_bGBEqYRR_aitBFyvfCZnx0kSZf4qGEI16tIRnhw,3482
|
|
180
|
+
castor_extractor/visualization/looker_studio/client/pagination.py,sha256=9HQ3Rkdiz2VB6AvYtZ0F-WouiD0pMmdZyAmkv-3wh08,783
|
|
181
|
+
castor_extractor/visualization/looker_studio/client/scopes.py,sha256=824cqqgZuGq4L-rPNoHJe0ibXsxkRwB0CLG_kqw9Q0g,256
|
|
171
182
|
castor_extractor/visualization/metabase/__init__.py,sha256=3E36cmkMyEgBB6Ot5rWk-N75i0G-7k24QTlc-Iol4pM,193
|
|
172
183
|
castor_extractor/visualization/metabase/assets.py,sha256=nu3FwQBU_hdS2DBvgXAwQlEEi76QiNK2tMKEtMyctaY,2874
|
|
173
184
|
castor_extractor/visualization/metabase/client/__init__.py,sha256=KBvaPMofBRV3m_sZAnKNCrJGr-Z88EbpdzEzWPQ_uBk,99
|
|
@@ -241,7 +252,7 @@ castor_extractor/visualization/salesforce_reporting/extract.py,sha256=ScStilebLG
|
|
|
241
252
|
castor_extractor/visualization/sigma/__init__.py,sha256=GINql4yJLtjfOJgjHaWNpE13cMtnKNytiFRomwav27Q,114
|
|
242
253
|
castor_extractor/visualization/sigma/assets.py,sha256=JZ1Cpxnml8P3mIJoTUM57hvylB18ErECQXaP5FF63O4,268
|
|
243
254
|
castor_extractor/visualization/sigma/client/__init__.py,sha256=YQv06FBBQHvBMFg_tN0nUcmUp2NCL2s-eFTXG8rXaBg,74
|
|
244
|
-
castor_extractor/visualization/sigma/client/client.py,sha256=
|
|
255
|
+
castor_extractor/visualization/sigma/client/client.py,sha256=d9CpE7vRZAPGzck0jFn37LY_6E_Njz9D1sCnFVGJSWk,8006
|
|
245
256
|
castor_extractor/visualization/sigma/client/credentials.py,sha256=XddAuQSmCKpxJ70TQgRnOj0vMPYVtiStk_lMMQ1AiNM,693
|
|
246
257
|
castor_extractor/visualization/sigma/client/endpoints.py,sha256=DBFphbgoH78_MZUGM_bKBAq28Nl7LWSZ6VRsbxrxtDg,1162
|
|
247
258
|
castor_extractor/visualization/sigma/client/pagination.py,sha256=kNEhNq08tTGbypyMjxs0w4uvDtQc_iaWpOZweaa_FsU,690
|
|
@@ -295,13 +306,13 @@ castor_extractor/visualization/tableau_revamp/client/rest_fields.py,sha256=3kvaq
|
|
|
295
306
|
castor_extractor/visualization/tableau_revamp/constants.py,sha256=lHGB50FgVNO2nXeIhkvQKivD8ZFBIjDrflgD5cTXKJw,104
|
|
296
307
|
castor_extractor/visualization/tableau_revamp/extract.py,sha256=HqnBypuNGx_xKk-68WEOy_ucD15LuRF4t2xXf0XKPE0,1370
|
|
297
308
|
castor_extractor/visualization/thoughtspot/__init__.py,sha256=NhTGUk5Kdt54oCjHYoAt0cLBmVLys5lFYiRANL6wCmI,150
|
|
298
|
-
castor_extractor/visualization/thoughtspot/assets.py,sha256=
|
|
309
|
+
castor_extractor/visualization/thoughtspot/assets.py,sha256=SAQWPKaD2NTSDg7-GSkcRSSEkKSws0MJfOVcHkdeTSg,276
|
|
299
310
|
castor_extractor/visualization/thoughtspot/client/__init__.py,sha256=svrE2rMxR-OXctjPeAHMEPePlfcra-9KDevTMcHunAA,86
|
|
300
|
-
castor_extractor/visualization/thoughtspot/client/client.py,sha256=
|
|
311
|
+
castor_extractor/visualization/thoughtspot/client/client.py,sha256=mtwMCPI1-1tyZb1gSYYr-O2QZMTFQwNgillU6ycsOU4,5552
|
|
301
312
|
castor_extractor/visualization/thoughtspot/client/credentials.py,sha256=fp4YHiZy-dstWiLr5c4kFU9SyPK5rd2nCeh8k5sVRpM,462
|
|
302
313
|
castor_extractor/visualization/thoughtspot/client/endpoints.py,sha256=u3FRkmG6j5OIMEeXWZcgRObP8JeC4EutIJEeitNV44c,330
|
|
303
|
-
castor_extractor/visualization/thoughtspot/client/utils.py,sha256=
|
|
304
|
-
castor_extractor/visualization/thoughtspot/client/utils_test.py,sha256
|
|
314
|
+
castor_extractor/visualization/thoughtspot/client/utils.py,sha256=3LgbIWoG1e39VW8rYaV4ot_0EFipziwf3rFAZKxrlEY,1072
|
|
315
|
+
castor_extractor/visualization/thoughtspot/client/utils_test.py,sha256=2XysRU7a58KA2JgNwU2j4GPrN0rkN7Gvk8kQCJlYXVk,2469
|
|
305
316
|
castor_extractor/visualization/thoughtspot/extract.py,sha256=mcXS0jGFpa50td98AVbbTqxchyI5wDCpB-v1o5iRc3g,1354
|
|
306
317
|
castor_extractor/warehouse/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
307
318
|
castor_extractor/warehouse/abstract/__init__.py,sha256=Fdfa026tgOo64MvzVRLHM_F2G-JmcehrF0mh3dHgb7s,419
|
|
@@ -329,21 +340,21 @@ castor_extractor/warehouse/bigquery/queries/view_ddl.sql,sha256=obCm-IN9V8_YSZTw
|
|
|
329
340
|
castor_extractor/warehouse/bigquery/query.py,sha256=FEekxlkrfAXzsT8Kj1AIqYd5mURB5MlZIkbFVXVqEhU,4762
|
|
330
341
|
castor_extractor/warehouse/bigquery/types.py,sha256=rfKkKA13Et7TM4I0uVaXkLfuaBXkv51bNTp4AO0QSdw,57
|
|
331
342
|
castor_extractor/warehouse/databricks/__init__.py,sha256=YG3YSIJgCFRjjI8eExy9T7qGnfnjWhMFh8c15KTs_BA,184
|
|
332
|
-
castor_extractor/warehouse/databricks/api_client.py,sha256=
|
|
343
|
+
castor_extractor/warehouse/databricks/api_client.py,sha256=kLcUGSgrfybZUrpt0tE7qe2OoSSN7IK4myyB7c0czOY,6260
|
|
333
344
|
castor_extractor/warehouse/databricks/api_client_test.py,sha256=YTWC-X7L-XAfK5b39TUgTmR1ifv0QrY5tvLNoSbpmjg,466
|
|
334
|
-
castor_extractor/warehouse/databricks/client.py,sha256=
|
|
335
|
-
castor_extractor/warehouse/databricks/client_test.py,sha256=
|
|
336
|
-
castor_extractor/warehouse/databricks/credentials.py,sha256=
|
|
345
|
+
castor_extractor/warehouse/databricks/client.py,sha256=H6vcKfos7op5AKSQF9qduG4afx-GZgBdyGE7waS6__o,3292
|
|
346
|
+
castor_extractor/warehouse/databricks/client_test.py,sha256=hOuSPh45z6m9T1hjuqpOayby_q8bYdJVdq5qiwkiXrg,1370
|
|
347
|
+
castor_extractor/warehouse/databricks/credentials.py,sha256=ExtVcl2NpMXTx1Lg8vHQdzQtSEm2aqpg3D1BJrNAUjI,528
|
|
337
348
|
castor_extractor/warehouse/databricks/endpoints.py,sha256=qPoL9CtPFJdwVuW9rJ37nmeMd-nChOBouEVYb4SlaUE,670
|
|
338
|
-
castor_extractor/warehouse/databricks/
|
|
349
|
+
castor_extractor/warehouse/databricks/enums.py,sha256=3T6BbVvbWvfWkD23krsYT1x0kKh1qRzNPl6WpcXe300,274
|
|
350
|
+
castor_extractor/warehouse/databricks/extract.py,sha256=Z4VTEIf0QMiua0QGAlJdQ86kxmGAXekQ304aCKme6IY,7358
|
|
339
351
|
castor_extractor/warehouse/databricks/format.py,sha256=FUBMrFFWSa_lX5PtixJCDR3eRYycqeMw0oKHt7AkA4o,6732
|
|
340
352
|
castor_extractor/warehouse/databricks/format_test.py,sha256=ls0IcOElqp_qecAzNbK0zdca7Pms4seCHimbw8NAoAI,3322
|
|
341
|
-
castor_extractor/warehouse/databricks/lineage.py,sha256=
|
|
342
|
-
castor_extractor/warehouse/databricks/lineage_test.py,sha256=
|
|
353
|
+
castor_extractor/warehouse/databricks/lineage.py,sha256=jwiRXrgqBAtzQt5EgErYrN8YRyviEEHmyrSbw8TSPq4,2105
|
|
354
|
+
castor_extractor/warehouse/databricks/lineage_test.py,sha256=PyBn1eAoxLm4Bz5M0F4zmaxFX2mXRTM_uug5OKbQPQs,2684
|
|
343
355
|
castor_extractor/warehouse/databricks/pagination.py,sha256=sM1G0sN1pf1TPpI0Y3Oew378UGEKVkMRc2Mlu9tDjLo,545
|
|
344
|
-
castor_extractor/warehouse/databricks/sql_client.py,sha256=
|
|
345
|
-
castor_extractor/warehouse/databricks/
|
|
346
|
-
castor_extractor/warehouse/databricks/types.py,sha256=-qO5y-uI95B666iDhyNM0TL8WlwYC-3Q4xZuolh3PwE,205
|
|
356
|
+
castor_extractor/warehouse/databricks/sql_client.py,sha256=5isGsRL0MW1lu_E_xTyCvSj_rwaJ2nh-kPlhvTvDy_w,3566
|
|
357
|
+
castor_extractor/warehouse/databricks/types.py,sha256=-TFX4jS6_c3wQLOpJTKpLeGS21YIPjKDjISnzeUPdCc,46
|
|
347
358
|
castor_extractor/warehouse/databricks/utils.py,sha256=5CKn6Me1Tus97H_qDEz_5tkhd4ARmwk2qiC3GndjyCc,1969
|
|
348
359
|
castor_extractor/warehouse/databricks/utils_test.py,sha256=_guTuzRWRTZdDY7ils0X1K8jhI9T877MEtw3x_YDg9I,2415
|
|
349
360
|
castor_extractor/warehouse/mysql/__init__.py,sha256=2KFDogo9GNbApHqw3Vm5t_uNmIRjdp76nmP_WQQMfQY,116
|
|
@@ -390,8 +401,8 @@ castor_extractor/warehouse/salesforce/__init__.py,sha256=NR4aNea5jeE1xYqeZ_29dee
|
|
|
390
401
|
castor_extractor/warehouse/salesforce/client.py,sha256=067ZyccmIYoY6VwLTSneefOJqUpobtnoEzxJMY2oSPs,3268
|
|
391
402
|
castor_extractor/warehouse/salesforce/constants.py,sha256=GusduVBCPvwpk_Im6F3bDvXeNQ7hRnCMdIAjIg65RnE,52
|
|
392
403
|
castor_extractor/warehouse/salesforce/extract.py,sha256=BUQ1ZxGGSq9wWCJfRbKIzIBBeth_YXg8YSV72lbz2lc,3417
|
|
393
|
-
castor_extractor/warehouse/salesforce/format.py,sha256=
|
|
394
|
-
castor_extractor/warehouse/salesforce/format_test.py,sha256=
|
|
404
|
+
castor_extractor/warehouse/salesforce/format.py,sha256=M5uGA8aURL_Nt27T8R2tDfbU5ZUM3ECG4fGalEkWkYA,3688
|
|
405
|
+
castor_extractor/warehouse/salesforce/format_test.py,sha256=puTL-Co84jE2SQzKFKGLYU9rey4Ja_Ox8xiKy4iOjeo,3780
|
|
395
406
|
castor_extractor/warehouse/salesforce/pagination.py,sha256=m1S9JRNf6Oe-6dDghYUY5wwTzGzKW5H9pE60PCXMha0,920
|
|
396
407
|
castor_extractor/warehouse/salesforce/soql.py,sha256=XB8ohKwHFfC4Xger7Y84DXLW17IJDye_bZ3FL6DCcOI,1188
|
|
397
408
|
castor_extractor/warehouse/snowflake/__init__.py,sha256=TEGXTyxWp4Tr9gIHb-UFVTRKj6YWmrRtqHruiKSZGiY,174
|
|
@@ -425,8 +436,8 @@ castor_extractor/warehouse/sqlserver/queries/table.sql,sha256=kbBQP-TdG5px1IVgyx
|
|
|
425
436
|
castor_extractor/warehouse/sqlserver/queries/user.sql,sha256=gOrZsMVypusR2dc4vwVs4E1a-CliRsr_UjnD2EbXs-A,94
|
|
426
437
|
castor_extractor/warehouse/sqlserver/query.py,sha256=g0hPT-RmeGi2DyenAi3o72cTlQsLToXIFYojqc8E5fQ,533
|
|
427
438
|
castor_extractor/warehouse/synapse/queries/column.sql,sha256=lNcFoIW3Y0PFOqoOzJEXmPvZvfAsY0AP63Mu2LuPzPo,1351
|
|
428
|
-
castor_extractor-0.22.
|
|
429
|
-
castor_extractor-0.22.
|
|
430
|
-
castor_extractor-0.22.
|
|
431
|
-
castor_extractor-0.22.
|
|
432
|
-
castor_extractor-0.22.
|
|
439
|
+
castor_extractor-0.22.5.dist-info/LICENCE,sha256=sL-IGa4hweyya1HgzMskrRdybbIa2cktzxb5qmUgDg8,8254
|
|
440
|
+
castor_extractor-0.22.5.dist-info/METADATA,sha256=11A9xI9Bd6Uu1Na_AJngfTbkt-ECXjsabWNTppaZsOk,22352
|
|
441
|
+
castor_extractor-0.22.5.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
|
442
|
+
castor_extractor-0.22.5.dist-info/entry_points.txt,sha256=7aVSxc-_2dicp28Ow-S4y0p4wGoTm9zGmVptMvfLdw8,1649
|
|
443
|
+
castor_extractor-0.22.5.dist-info/RECORD,,
|
|
@@ -1,79 +0,0 @@
|
|
|
1
|
-
OLDER_DATE = "2024-04-18 20:20:20.0"
|
|
2
|
-
CLOSER_DATE = "2024-04-19 20:20:20.0"
|
|
3
|
-
|
|
4
|
-
MOCK_TABLES_FOR_TABLE_LINEAGE = [
|
|
5
|
-
{
|
|
6
|
-
"id": "f51ba2ca-8cc3-4de6-8f8b-730359e8f40f",
|
|
7
|
-
"schema_id": "dev.silver",
|
|
8
|
-
"table_name": "analytics",
|
|
9
|
-
},
|
|
10
|
-
{
|
|
11
|
-
"id": "4e140bdc-a67c-4b68-8a07-c684657d8b44",
|
|
12
|
-
"schema_id": "dev.silver",
|
|
13
|
-
"table_name": "pre_analytics",
|
|
14
|
-
},
|
|
15
|
-
{
|
|
16
|
-
"id": "7d403198-55ea-4a40-9995-6ee2f4c79dfa",
|
|
17
|
-
"schema_id": "dev.bronze",
|
|
18
|
-
"table_name": "analytics",
|
|
19
|
-
},
|
|
20
|
-
]
|
|
21
|
-
|
|
22
|
-
_RAW_LINEAGE_DEV_SILVER_ANALYTICS = {
|
|
23
|
-
"upstreams": [
|
|
24
|
-
{ # there could be other keys: jobInfos, notebookInfos, queryInfos
|
|
25
|
-
"tableInfo": {
|
|
26
|
-
"name": "pre_analytics",
|
|
27
|
-
"catalog_name": "dev",
|
|
28
|
-
"schema_name": "silver",
|
|
29
|
-
"table_type": "PERSISTED_VIEW", # not used
|
|
30
|
-
"lineage_timestamp": OLDER_DATE,
|
|
31
|
-
}
|
|
32
|
-
},
|
|
33
|
-
{
|
|
34
|
-
"tableInfo": {
|
|
35
|
-
"name": "analytics",
|
|
36
|
-
"catalog_name": "dev",
|
|
37
|
-
"schema_name": "bronze",
|
|
38
|
-
"table_type": "PERSISTED_VIEW", # not used
|
|
39
|
-
"lineage_timestamp": CLOSER_DATE,
|
|
40
|
-
}
|
|
41
|
-
},
|
|
42
|
-
],
|
|
43
|
-
"downstreams": [],
|
|
44
|
-
}
|
|
45
|
-
_RAW_LINEAGE_DEV_SILVER_PRE_ANALYTICS = {
|
|
46
|
-
"upstreams": [],
|
|
47
|
-
"downstreams": [
|
|
48
|
-
{
|
|
49
|
-
"tableInfo": {
|
|
50
|
-
"name": "analytics",
|
|
51
|
-
"catalog_name": "dev",
|
|
52
|
-
"schema_name": "silver",
|
|
53
|
-
"table_type": "PERSISTED_VIEW", # not used
|
|
54
|
-
"lineage_timestamp": OLDER_DATE,
|
|
55
|
-
}
|
|
56
|
-
},
|
|
57
|
-
],
|
|
58
|
-
}
|
|
59
|
-
_RAW_LINEAGE_DEV_BRONZE_ANALYTICS = {
|
|
60
|
-
"upstreams": [],
|
|
61
|
-
"downstreams": [
|
|
62
|
-
{
|
|
63
|
-
"tableInfo": {
|
|
64
|
-
"name": "analytics",
|
|
65
|
-
"catalog_name": "dev",
|
|
66
|
-
"schema_name": "silver",
|
|
67
|
-
"table_type": "PERSISTED_VIEW", # not used
|
|
68
|
-
"lineage_timestamp": OLDER_DATE,
|
|
69
|
-
}
|
|
70
|
-
},
|
|
71
|
-
],
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
# should be in the same order as MOCK_TABLES_FOR_TABLE_LINEAGE
|
|
75
|
-
TABLE_LINEAGE_SIDE_EFFECT: tuple = (
|
|
76
|
-
_RAW_LINEAGE_DEV_SILVER_ANALYTICS,
|
|
77
|
-
_RAW_LINEAGE_DEV_SILVER_PRE_ANALYTICS,
|
|
78
|
-
_RAW_LINEAGE_DEV_BRONZE_ANALYTICS,
|
|
79
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|