ingestr 0.13.23__py3-none-any.whl → 0.13.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/conftest.py +63 -0
- ingestr/main.py +4 -2
- ingestr/src/buildinfo.py +1 -1
- ingestr/src/filters.py +21 -0
- ingestr/src/github/helpers.py +5 -5
- ingestr/src/google_sheets/__init__.py +4 -4
- ingestr/src/google_sheets/helpers/data_processing.py +2 -2
- {ingestr-0.13.23.dist-info → ingestr-0.13.24.dist-info}/METADATA +5 -4
- {ingestr-0.13.23.dist-info → ingestr-0.13.24.dist-info}/RECORD +12 -11
- {ingestr-0.13.23.dist-info → ingestr-0.13.24.dist-info}/WHEEL +0 -0
- {ingestr-0.13.23.dist-info → ingestr-0.13.24.dist-info}/entry_points.txt +0 -0
- {ingestr-0.13.23.dist-info → ingestr-0.13.24.dist-info}/licenses/LICENSE.md +0 -0
ingestr/conftest.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import tempfile
|
|
3
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
4
|
+
|
|
5
|
+
from main_test import DESTINATIONS, SOURCES # type: ignore
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def pytest_configure(config):
|
|
9
|
+
if is_master(config):
|
|
10
|
+
config.shared_directory = tempfile.mkdtemp()
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def pytest_configure_node(node):
|
|
14
|
+
"""xdist hook"""
|
|
15
|
+
node.workerinput["shared_directory"] = node.config.shared_directory
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def is_master(config):
|
|
19
|
+
"""True if the code running the given pytest.config object is running in a xdist master
|
|
20
|
+
node or not running xdist at all.
|
|
21
|
+
"""
|
|
22
|
+
return not hasattr(config, "workerinput")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def start_containers(config):
|
|
26
|
+
if hasattr(config, "workerinput"):
|
|
27
|
+
return
|
|
28
|
+
|
|
29
|
+
unique_containers = set(SOURCES.values()) | set(DESTINATIONS.values())
|
|
30
|
+
for container in unique_containers:
|
|
31
|
+
container.container_lock_dir = config.shared_directory
|
|
32
|
+
|
|
33
|
+
with ThreadPoolExecutor() as executor:
|
|
34
|
+
for container in unique_containers:
|
|
35
|
+
executor.submit(container.start_fully)
|
|
36
|
+
# futures = [
|
|
37
|
+
# executor.submit(container.start_fully) for container in unique_containers
|
|
38
|
+
# ]
|
|
39
|
+
# # Wait for all futures to complete
|
|
40
|
+
# for future in futures:
|
|
41
|
+
# future.result()
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def stop_containers(config):
|
|
45
|
+
if hasattr(config, "workerinput"):
|
|
46
|
+
return
|
|
47
|
+
|
|
48
|
+
should_manage_containers = os.environ.get("PYTEST_XDIST_WORKER", "gw0") == "gw0"
|
|
49
|
+
if not should_manage_containers:
|
|
50
|
+
return
|
|
51
|
+
|
|
52
|
+
unique_containers = set(SOURCES.values()) | set(DESTINATIONS.values())
|
|
53
|
+
|
|
54
|
+
for container in unique_containers:
|
|
55
|
+
container.stop_fully()
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def pytest_sessionstart(session):
|
|
59
|
+
start_containers(session.config)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def pytest_sessionfinish(session, exitstatus):
|
|
63
|
+
stop_containers(session.config)
|
ingestr/main.py
CHANGED
|
@@ -11,7 +11,7 @@ from typing_extensions import Annotated
|
|
|
11
11
|
import ingestr.src.partition as partition
|
|
12
12
|
import ingestr.src.resource as resource
|
|
13
13
|
from ingestr.src.destinations import AthenaDestination
|
|
14
|
-
from ingestr.src.filters import cast_set_to_list
|
|
14
|
+
from ingestr.src.filters import cast_set_to_list, handle_mysql_empty_dates
|
|
15
15
|
from ingestr.src.telemetry.event import track
|
|
16
16
|
|
|
17
17
|
app = typer.Typer(
|
|
@@ -35,7 +35,7 @@ DATE_FORMATS = [
|
|
|
35
35
|
|
|
36
36
|
# https://dlthub.com/docs/dlt-ecosystem/file-formats/parquet#supported-destinations
|
|
37
37
|
PARQUET_SUPPORTED_DESTINATIONS = [
|
|
38
|
-
"
|
|
38
|
+
"athenabigquery",
|
|
39
39
|
"duckdb",
|
|
40
40
|
"snowflake",
|
|
41
41
|
"databricks",
|
|
@@ -553,6 +553,8 @@ def ingest(
|
|
|
553
553
|
)
|
|
554
554
|
|
|
555
555
|
resource.for_each(dlt_source, lambda x: x.add_map(cast_set_to_list))
|
|
556
|
+
if factory.source_scheme.startswith("mysql"):
|
|
557
|
+
resource.for_each(dlt_source, lambda x: x.add_map(handle_mysql_empty_dates))
|
|
556
558
|
|
|
557
559
|
def col_h(x):
|
|
558
560
|
if column_hints:
|
ingestr/src/buildinfo.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
version = "v0.13.
|
|
1
|
+
version = "v0.13.24"
|
ingestr/src/filters.py
CHANGED
|
@@ -10,6 +10,27 @@ def cast_set_to_list(row):
|
|
|
10
10
|
return row
|
|
11
11
|
|
|
12
12
|
|
|
13
|
+
def handle_mysql_empty_dates(row):
|
|
14
|
+
# MySQL returns empty dates as 0000-00-00, which is not a valid date, we handle them here.
|
|
15
|
+
if not isinstance(row, dict):
|
|
16
|
+
return row
|
|
17
|
+
|
|
18
|
+
for key in row.keys():
|
|
19
|
+
if not isinstance(row[key], str):
|
|
20
|
+
continue
|
|
21
|
+
|
|
22
|
+
if row[key] == "0000-00-00":
|
|
23
|
+
from datetime import date
|
|
24
|
+
|
|
25
|
+
row[key] = date(1970, 1, 1)
|
|
26
|
+
|
|
27
|
+
elif row[key] == "0000-00-00 00:00:00":
|
|
28
|
+
from datetime import datetime
|
|
29
|
+
|
|
30
|
+
row[key] = datetime(1970, 1, 1, 0, 0, 0)
|
|
31
|
+
return row
|
|
32
|
+
|
|
33
|
+
|
|
13
34
|
def table_adapter_exclude_columns(cols: list[str]):
|
|
14
35
|
def excluder(table: Table):
|
|
15
36
|
cols_to_remove = [col for col in table._columns if col.name in cols] # type: ignore
|
ingestr/src/github/helpers.py
CHANGED
|
@@ -103,9 +103,9 @@ def get_reactions_data(
|
|
|
103
103
|
|
|
104
104
|
|
|
105
105
|
def _extract_top_connection(data: StrAny, node_type: str) -> StrAny:
|
|
106
|
-
assert (
|
|
107
|
-
|
|
108
|
-
)
|
|
106
|
+
assert isinstance(data, dict) and len(data) == 1, (
|
|
107
|
+
f"The data with list of {node_type} must be a dictionary and contain only one element"
|
|
108
|
+
)
|
|
109
109
|
data = next(iter(data.values()))
|
|
110
110
|
return data[node_type] # type: ignore
|
|
111
111
|
|
|
@@ -158,7 +158,7 @@ def _get_graphql_pages(
|
|
|
158
158
|
)
|
|
159
159
|
items_count += len(data_items)
|
|
160
160
|
print(
|
|
161
|
-
f
|
|
161
|
+
f"Got {len(data_items)}/{items_count} {node_type}s, query cost {rate_limit['cost']}, remaining credits: {rate_limit['remaining']}"
|
|
162
162
|
)
|
|
163
163
|
if data_items:
|
|
164
164
|
yield data_items
|
|
@@ -187,7 +187,7 @@ def _get_comment_reaction(comment_ids: List[str], access_token: str) -> StrAny:
|
|
|
187
187
|
# print(query)
|
|
188
188
|
page, rate_limit = _run_graphql_query(access_token, query, {})
|
|
189
189
|
print(
|
|
190
|
-
f
|
|
190
|
+
f"Got {len(page)} comments, query cost {rate_limit['cost']}, remaining credits: {rate_limit['remaining']}"
|
|
191
191
|
)
|
|
192
192
|
data.update(page)
|
|
193
193
|
return data
|
|
@@ -70,9 +70,9 @@ def google_spreadsheet(
|
|
|
70
70
|
spreadsheet_id=spreadsheet_id,
|
|
71
71
|
range_names=list(all_range_names),
|
|
72
72
|
)
|
|
73
|
-
assert len(all_range_names) == len(
|
|
74
|
-
|
|
75
|
-
)
|
|
73
|
+
assert len(all_range_names) == len(all_range_data), (
|
|
74
|
+
"Google Sheets API must return values for all requested ranges"
|
|
75
|
+
)
|
|
76
76
|
|
|
77
77
|
# get metadata for two first rows of each range
|
|
78
78
|
# first should contain headers
|
|
@@ -126,7 +126,7 @@ def google_spreadsheet(
|
|
|
126
126
|
headers = get_range_headers(headers_metadata, name)
|
|
127
127
|
if headers is None:
|
|
128
128
|
# generate automatic headers and treat the first row as data
|
|
129
|
-
headers = [f"col_{idx+1}" for idx in range(len(headers_metadata))]
|
|
129
|
+
headers = [f"col_{idx + 1}" for idx in range(len(headers_metadata))]
|
|
130
130
|
data_row_metadata = headers_metadata
|
|
131
131
|
rows_data = values[0:]
|
|
132
132
|
logger.warning(
|
|
@@ -149,12 +149,12 @@ def get_range_headers(headers_metadata: List[DictStrAny], range_name: str) -> Li
|
|
|
149
149
|
header_val = str(f"col_{idx + 1}")
|
|
150
150
|
else:
|
|
151
151
|
logger.warning(
|
|
152
|
-
f"In range {range_name}, header value: {header_val} at position {idx+1} is not a string!"
|
|
152
|
+
f"In range {range_name}, header value: {header_val} at position {idx + 1} is not a string!"
|
|
153
153
|
)
|
|
154
154
|
return None
|
|
155
155
|
else:
|
|
156
156
|
logger.warning(
|
|
157
|
-
f"In range {range_name}, header at position {idx+1} is not missing!"
|
|
157
|
+
f"In range {range_name}, header at position {idx + 1} is not missing!"
|
|
158
158
|
)
|
|
159
159
|
return None
|
|
160
160
|
headers.append(header_val)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.24
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -46,10 +46,10 @@ Requires-Dist: databricks-sqlalchemy==1.0.2
|
|
|
46
46
|
Requires-Dist: dataclasses-json==0.6.7
|
|
47
47
|
Requires-Dist: decorator==5.2.1
|
|
48
48
|
Requires-Dist: deprecation==2.1.0
|
|
49
|
-
Requires-Dist: dlt==1.
|
|
49
|
+
Requires-Dist: dlt==1.9.0
|
|
50
50
|
Requires-Dist: dnspython==2.7.0
|
|
51
|
-
Requires-Dist: duckdb-engine==0.
|
|
52
|
-
Requires-Dist: duckdb==1.2.
|
|
51
|
+
Requires-Dist: duckdb-engine==0.17.0
|
|
52
|
+
Requires-Dist: duckdb==1.2.1
|
|
53
53
|
Requires-Dist: et-xmlfile==2.0.0
|
|
54
54
|
Requires-Dist: facebook-business==20.0.0
|
|
55
55
|
Requires-Dist: filelock==3.17.0
|
|
@@ -168,6 +168,7 @@ Requires-Dist: sqlalchemy-hana==2.0.0
|
|
|
168
168
|
Requires-Dist: sqlalchemy-redshift==0.8.14
|
|
169
169
|
Requires-Dist: sqlalchemy2-stubs==0.0.2a38
|
|
170
170
|
Requires-Dist: sqlalchemy==1.4.52
|
|
171
|
+
Requires-Dist: sqlglot==26.12.1
|
|
171
172
|
Requires-Dist: stripe==10.7.0
|
|
172
173
|
Requires-Dist: tenacity==9.0.0
|
|
173
174
|
Requires-Dist: thrift==0.16.0
|
|
@@ -1,11 +1,12 @@
|
|
|
1
|
-
ingestr/
|
|
1
|
+
ingestr/conftest.py,sha256=Q03FIJIZpLBbpj55cfCHIKEjc1FCvWJhMF2cidUJKQU,1748
|
|
2
|
+
ingestr/main.py,sha256=wvbRCJ2--M0Zw2cYtSH874TxTtlD0wadHREeLG3anOY,25618
|
|
2
3
|
ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
3
4
|
ingestr/src/blob.py,sha256=onMe5ZHxPXTdcB_s2oGNdMo-XQJ3ajwOsWE9eSTGFmc,1495
|
|
4
|
-
ingestr/src/buildinfo.py,sha256=
|
|
5
|
+
ingestr/src/buildinfo.py,sha256=x-bxDOuFDQ1rgDJf03eHD1bXb9Yfo3wX39XyaBE0LkU,21
|
|
5
6
|
ingestr/src/destinations.py,sha256=vrGij4qMPCdXTMIimROWBJFqzOqCM4DFmgyubgSHejA,11279
|
|
6
7
|
ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
|
|
7
8
|
ingestr/src/factory.py,sha256=1jqcLv_QUUGeyg1OYN3ywrRdcDZyDRtMOongwyjDapU,5268
|
|
8
|
-
ingestr/src/filters.py,sha256=
|
|
9
|
+
ingestr/src/filters.py,sha256=5LNpBgm8FJXdrFHGyM7dLVyphKykSpPk7yuQAZ8GML4,1133
|
|
9
10
|
ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
|
|
10
11
|
ingestr/src/partition.py,sha256=E0WHqh1FTheQAIVK_-jWUx0dgyYZCD1VxlAm362gao4,964
|
|
11
12
|
ingestr/src/resource.py,sha256=XG-sbBapFVEM7OhHQFQRTdTLlh-mHB-N4V1t8F8Tsww,543
|
|
@@ -41,7 +42,7 @@ ingestr/src/filesystem/__init__.py,sha256=zkIwbRr0ir0EUdniI25p2zGiVc-7M9EmR351Aj
|
|
|
41
42
|
ingestr/src/filesystem/helpers.py,sha256=bg0muSHZr3hMa8H4jN2-LGWzI-SUoKlQNiWJ74-YYms,3211
|
|
42
43
|
ingestr/src/filesystem/readers.py,sha256=a0fKkaRpnAOGsXI3EBNYZa7x6tlmAOsgRzb883StY30,3987
|
|
43
44
|
ingestr/src/github/__init__.py,sha256=xVijF-Wi4p88hkVJnKH-oTixismjD3aUcGqGa6Wr4e4,5889
|
|
44
|
-
ingestr/src/github/helpers.py,sha256=
|
|
45
|
+
ingestr/src/github/helpers.py,sha256=rpv_3HzuOl4PQ-FUeA66pev-pgze9SaE8RUHIPYfZ_A,6759
|
|
45
46
|
ingestr/src/github/queries.py,sha256=W34C02jUEdjFmOE7f7u9xvYyBNDMfVZAu0JIRZI2mkU,2302
|
|
46
47
|
ingestr/src/github/settings.py,sha256=N5ahWrDIQ_4IWV9i-hTXxyYduqY9Ym2BTwqsWxcDdJ8,258
|
|
47
48
|
ingestr/src/google_ads/__init__.py,sha256=bH0TtnRWcOUESezpvoA7VEUHAq_0ITGQeX4GGVBfl1I,3725
|
|
@@ -52,10 +53,10 @@ ingestr/src/google_ads/reports.py,sha256=AVY1pPt5yaIFskQe1k5VW2Dhlux3bzewsHlDrdG
|
|
|
52
53
|
ingestr/src/google_analytics/__init__.py,sha256=8Evpmoy464YpNbCI_NmvFHIzWCu7J7SjJw-RrPZ6AL8,3674
|
|
53
54
|
ingestr/src/google_analytics/helpers.py,sha256=vLmFyQ_IEJEK5LlxBJQeJw0VHaE5gRRZdBa54U72CaQ,5965
|
|
54
55
|
ingestr/src/google_sheets/README.md,sha256=wFQhvmGpRA38Ba2N_WIax6duyD4c7c_pwvvprRfQDnw,5470
|
|
55
|
-
ingestr/src/google_sheets/__init__.py,sha256=
|
|
56
|
+
ingestr/src/google_sheets/__init__.py,sha256=CL0HfY74uxX8-ge0ucI0VhWMYZVAfoX7WRPBitRi-CI,6647
|
|
56
57
|
ingestr/src/google_sheets/helpers/__init__.py,sha256=5hXZrZK8cMO3UOuL-s4OKOpdACdihQD0hYYlSEu-iQ8,35
|
|
57
58
|
ingestr/src/google_sheets/helpers/api_calls.py,sha256=RiVfdacbaneszhmuhYilkJnkc9kowZvQUCUxz0G6SlI,5404
|
|
58
|
-
ingestr/src/google_sheets/helpers/data_processing.py,sha256=
|
|
59
|
+
ingestr/src/google_sheets/helpers/data_processing.py,sha256=RNt2MYfdJhk4bRahnQVezpNg2x9z0vx60YFq2ukZ8vI,11004
|
|
59
60
|
ingestr/src/gorgias/__init__.py,sha256=_mFkMYwlY5OKEY0o_FK1OKol03A-8uk7bm1cKlmt5cs,21432
|
|
60
61
|
ingestr/src/gorgias/helpers.py,sha256=DamuijnvhGY9hysQO4txrVMf4izkGbh5qfBKImdOINE,5427
|
|
61
62
|
ingestr/src/hubspot/__init__.py,sha256=NYgSIAPXQh2Qp1eKun7TgcerKogq6pWtNkr-_f0FXbI,9464
|
|
@@ -118,8 +119,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
|
|
|
118
119
|
ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
|
|
119
120
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
120
121
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
121
|
-
ingestr-0.13.
|
|
122
|
-
ingestr-0.13.
|
|
123
|
-
ingestr-0.13.
|
|
124
|
-
ingestr-0.13.
|
|
125
|
-
ingestr-0.13.
|
|
122
|
+
ingestr-0.13.24.dist-info/METADATA,sha256=qzKyEOTPIb6cTD49Q6zC-bqSn7ax45JmCKcGh0jtJRw,13659
|
|
123
|
+
ingestr-0.13.24.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
124
|
+
ingestr-0.13.24.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
125
|
+
ingestr-0.13.24.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
126
|
+
ingestr-0.13.24.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|