ingestr 0.3.2__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/main.py +53 -26
- ingestr/src/factory.py +1 -0
- ingestr/src/version.py +1 -1
- ingestr/testdata/test_append.db +0 -0
- ingestr/testdata/test_create_replace.db +0 -0
- ingestr/testdata/test_delete_insert_with_timerange.db +0 -0
- ingestr/testdata/test_delete_insert_without_primary_key.db +0 -0
- ingestr/testdata/test_merge_with_primary_key.db +0 -0
- {ingestr-0.3.2.dist-info → ingestr-0.4.0.dist-info}/METADATA +7 -1
- {ingestr-0.3.2.dist-info → ingestr-0.4.0.dist-info}/RECORD +13 -13
- {ingestr-0.3.2.dist-info → ingestr-0.4.0.dist-info}/WHEEL +0 -0
- {ingestr-0.3.2.dist-info → ingestr-0.4.0.dist-info}/entry_points.txt +0 -0
- {ingestr-0.3.2.dist-info → ingestr-0.4.0.dist-info}/licenses/LICENSE.md +0 -0
ingestr/main.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import hashlib
|
|
2
2
|
from datetime import datetime
|
|
3
|
+
from enum import Enum
|
|
3
4
|
from typing import Optional
|
|
4
5
|
|
|
5
6
|
import dlt
|
|
@@ -83,6 +84,30 @@ class SpinnerCollector(Collector):
|
|
|
83
84
|
self.status.stop()
|
|
84
85
|
|
|
85
86
|
|
|
87
|
+
class IncrementalStrategy(str, Enum):
|
|
88
|
+
create_replace = "replace"
|
|
89
|
+
append = "append"
|
|
90
|
+
delete_insert = "delete+insert"
|
|
91
|
+
merge = "merge"
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class LoaderFileFormat(str, Enum):
|
|
95
|
+
jsonl = "jsonl"
|
|
96
|
+
parquet = "parquet"
|
|
97
|
+
insert_values = "insert_values"
|
|
98
|
+
csv = "csv"
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class SqlBackend(str, Enum):
|
|
102
|
+
sqlalchemy = "sqlalchemy"
|
|
103
|
+
pyarrow = "pyarrow"
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class Progress(str, Enum):
|
|
107
|
+
interactive = "interactive"
|
|
108
|
+
log = "log"
|
|
109
|
+
|
|
110
|
+
|
|
86
111
|
@app.command()
|
|
87
112
|
def ingest(
|
|
88
113
|
source_uri: Annotated[
|
|
@@ -117,12 +142,12 @@ def ingest(
|
|
|
117
142
|
),
|
|
118
143
|
] = None, # type: ignore
|
|
119
144
|
incremental_strategy: Annotated[
|
|
120
|
-
|
|
145
|
+
IncrementalStrategy,
|
|
121
146
|
typer.Option(
|
|
122
|
-
help="The incremental strategy to use
|
|
147
|
+
help="The incremental strategy to use",
|
|
123
148
|
envvar="INCREMENTAL_STRATEGY",
|
|
124
149
|
),
|
|
125
|
-
] =
|
|
150
|
+
] = IncrementalStrategy.create_replace, # type: ignore
|
|
126
151
|
interval_start: Annotated[
|
|
127
152
|
Optional[datetime],
|
|
128
153
|
typer.Option(
|
|
@@ -161,26 +186,26 @@ def ingest(
|
|
|
161
186
|
),
|
|
162
187
|
] = False, # type: ignore
|
|
163
188
|
progress: Annotated[
|
|
164
|
-
|
|
189
|
+
Progress,
|
|
165
190
|
typer.Option(
|
|
166
191
|
help="The progress display type, must be one of 'interactive', 'log'",
|
|
167
192
|
envvar="PROGRESS",
|
|
168
193
|
),
|
|
169
|
-
] =
|
|
194
|
+
] = Progress.interactive, # type: ignore
|
|
170
195
|
sql_backend: Annotated[
|
|
171
|
-
|
|
196
|
+
SqlBackend,
|
|
172
197
|
typer.Option(
|
|
173
|
-
help="The SQL backend to use
|
|
198
|
+
help="The SQL backend to use",
|
|
174
199
|
envvar="SQL_BACKEND",
|
|
175
200
|
),
|
|
176
|
-
] =
|
|
201
|
+
] = SqlBackend.pyarrow, # type: ignore
|
|
177
202
|
loader_file_format: Annotated[
|
|
178
|
-
Optional[
|
|
203
|
+
Optional[LoaderFileFormat],
|
|
179
204
|
typer.Option(
|
|
180
|
-
help="The file format to use when loading data
|
|
205
|
+
help="The file format to use when loading data",
|
|
181
206
|
envvar="LOADER_FILE_FORMAT",
|
|
182
207
|
),
|
|
183
|
-
] =
|
|
208
|
+
] = None, # type: ignore
|
|
184
209
|
):
|
|
185
210
|
track(
|
|
186
211
|
"command_triggered",
|
|
@@ -210,15 +235,15 @@ def ingest(
|
|
|
210
235
|
original_incremental_strategy = incremental_strategy
|
|
211
236
|
|
|
212
237
|
merge_key = None
|
|
213
|
-
if incremental_strategy ==
|
|
238
|
+
if incremental_strategy == IncrementalStrategy.delete_insert:
|
|
214
239
|
merge_key = incremental_key
|
|
215
|
-
incremental_strategy =
|
|
240
|
+
incremental_strategy = IncrementalStrategy.merge
|
|
216
241
|
|
|
217
242
|
m = hashlib.sha256()
|
|
218
243
|
m.update(dest_table.encode("utf-8"))
|
|
219
244
|
|
|
220
245
|
progressInstance: Collector = SpinnerCollector()
|
|
221
|
-
if progress ==
|
|
246
|
+
if progress == Progress.log:
|
|
222
247
|
progressInstance = LogCollector()
|
|
223
248
|
|
|
224
249
|
pipeline = dlt.pipeline(
|
|
@@ -240,7 +265,7 @@ def ingest(
|
|
|
240
265
|
f"[bold yellow] Destination:[/bold yellow] {factory.destination_scheme} / {dest_table}"
|
|
241
266
|
)
|
|
242
267
|
print(
|
|
243
|
-
f"[bold yellow] Incremental Strategy:[/bold yellow] {incremental_strategy}"
|
|
268
|
+
f"[bold yellow] Incremental Strategy:[/bold yellow] {incremental_strategy.value}"
|
|
244
269
|
)
|
|
245
270
|
print(
|
|
246
271
|
f"[bold yellow] Incremental Key:[/bold yellow] {incremental_key if incremental_key else 'None'}"
|
|
@@ -266,24 +291,24 @@ def ingest(
|
|
|
266
291
|
merge_key=merge_key,
|
|
267
292
|
interval_start=interval_start,
|
|
268
293
|
interval_end=interval_end,
|
|
269
|
-
sql_backend=sql_backend,
|
|
294
|
+
sql_backend=sql_backend.value,
|
|
270
295
|
)
|
|
271
296
|
|
|
272
|
-
if original_incremental_strategy ==
|
|
297
|
+
if original_incremental_strategy == IncrementalStrategy.delete_insert:
|
|
273
298
|
dlt_source.incremental.primary_key = ()
|
|
274
299
|
|
|
275
300
|
if (
|
|
276
301
|
factory.destination_scheme in PARQUET_SUPPORTED_DESTINATIONS
|
|
277
|
-
and loader_file_format
|
|
302
|
+
and loader_file_format is None
|
|
278
303
|
):
|
|
279
|
-
loader_file_format =
|
|
304
|
+
loader_file_format = LoaderFileFormat.parquet
|
|
280
305
|
|
|
281
306
|
# if the source is a JSON returning source, we cannot use Parquet loader for BigQuery
|
|
282
|
-
if
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
307
|
+
if (
|
|
308
|
+
factory.destination_scheme == "bigquery"
|
|
309
|
+
and factory.source_scheme in JSON_RETURNING_SOURCES
|
|
310
|
+
):
|
|
311
|
+
loader_file_format = None
|
|
287
312
|
|
|
288
313
|
run_info = pipeline.run(
|
|
289
314
|
dlt_source,
|
|
@@ -291,9 +316,11 @@ def ingest(
|
|
|
291
316
|
uri=dest_uri,
|
|
292
317
|
table=dest_table,
|
|
293
318
|
),
|
|
294
|
-
write_disposition=incremental_strategy, # type: ignore
|
|
319
|
+
write_disposition=incremental_strategy.value, # type: ignore
|
|
295
320
|
primary_key=(primary_key if primary_key and len(primary_key) > 0 else None), # type: ignore
|
|
296
|
-
loader_file_format=loader_file_format
|
|
321
|
+
loader_file_format=loader_file_format.value
|
|
322
|
+
if loader_file_format is not None
|
|
323
|
+
else None, # type: ignore
|
|
297
324
|
)
|
|
298
325
|
|
|
299
326
|
destination.post_load()
|
ingestr/src/factory.py
CHANGED
ingestr/src/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.4.0"
|
ingestr/testdata/test_append.db
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -31,6 +31,7 @@ Requires-Dist: rich==13.7.1
|
|
|
31
31
|
Requires-Dist: rudder-sdk-python==2.1.0
|
|
32
32
|
Requires-Dist: snowflake-sqlalchemy==1.5.3
|
|
33
33
|
Requires-Dist: sqlalchemy-bigquery==1.11.0
|
|
34
|
+
Requires-Dist: sqlalchemy-hana==2.0.0
|
|
34
35
|
Requires-Dist: sqlalchemy-redshift==0.8.14
|
|
35
36
|
Requires-Dist: sqlalchemy2-stubs==0.0.2a38
|
|
36
37
|
Requires-Dist: sqlalchemy==1.4.52
|
|
@@ -150,6 +151,11 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
|
|
|
150
151
|
<td>✅</td>
|
|
151
152
|
<td>❌</td>
|
|
152
153
|
</tr>
|
|
154
|
+
<tr>
|
|
155
|
+
<td>SAP Hana</td>
|
|
156
|
+
<td>✅</td>
|
|
157
|
+
<td>❌</td>
|
|
158
|
+
</tr>
|
|
153
159
|
<tr>
|
|
154
160
|
<td>SQLite</td>
|
|
155
161
|
<td>✅</td>
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
ingestr/main.py,sha256=
|
|
1
|
+
ingestr/main.py,sha256=tXw5VRr9yyGTvd_Wp5HBdSlTt3oe61vcFvnsyljFd7g,13853
|
|
2
2
|
ingestr/main_test.py,sha256=fXZw1qZd5CElrFSRnsI5003813LcIMGpGCMnGNFVhNI,19946
|
|
3
3
|
ingestr/src/destinations.py,sha256=_PIoAU-_tDEyX_-vDOgGB5eqXoGhPwtCRApHufj1ae4,6350
|
|
4
4
|
ingestr/src/destinations_test.py,sha256=rgEk8EpAntFbSOwXovC4prv3RA22mwq8pIO6sZ_rYzg,4212
|
|
5
|
-
ingestr/src/factory.py,sha256=
|
|
5
|
+
ingestr/src/factory.py,sha256=RYdl1etpU0Q7UDzcaCjKriaUvDiMpoYJZqATQW2sums,3126
|
|
6
6
|
ingestr/src/factory_test.py,sha256=X9sFkvNByWChIcyeDt1QiIPMIzGNKb7M5A_GUE0-nnI,664
|
|
7
7
|
ingestr/src/sources.py,sha256=Bl8Q1gXGqYWAZtwyF2CoaPB5CgAwoctXHwW9OZYK__8,3978
|
|
8
8
|
ingestr/src/sources_test.py,sha256=t94u1lYAspxzfe-DkxVtq5vw6xrLWphipvwntrwrzqg,3930
|
|
9
|
-
ingestr/src/version.py,sha256=
|
|
9
|
+
ingestr/src/version.py,sha256=42STGor_9nKYXumfeV5tiyD_M8VdcddX7CEexmibPBk,22
|
|
10
10
|
ingestr/src/mongodb/__init__.py,sha256=E7SDeCyYNkYZZ_RFhjCRDZUGpKtaxpPG5sFSmKJV62U,4336
|
|
11
11
|
ingestr/src/mongodb/helpers.py,sha256=80vtAeNyUn1iMN0CeLrTlKqYN6I6fHF81Kd2UuE8Kns,5653
|
|
12
12
|
ingestr/src/notion/__init__.py,sha256=36wUui8finbc85ObkRMq8boMraXMUehdABN_AMe_hzA,1834
|
|
@@ -21,13 +21,13 @@ ingestr/src/sql_database/schema_types.py,sha256=foGHh4iGagGLfS7nF3uGYhBjqgX0jlrj
|
|
|
21
21
|
ingestr/src/telemetry/event.py,sha256=MpWc5tt0lSJ1pWKe9HQ11BHrcPBxSH40l4wjZi9u0tI,924
|
|
22
22
|
ingestr/src/testdata/fakebqcredentials.json,sha256=scc6TUc963KAbKTLZCfcmqVzbtzDCW1_8JNRnyAXyy8,628
|
|
23
23
|
ingestr/testdata/.gitignore,sha256=DFzYYOpqdTiT7S1HjCT-jffZSmEvFZge295_upAB0FY,13
|
|
24
|
-
ingestr/testdata/test_append.db,sha256=
|
|
25
|
-
ingestr/testdata/test_create_replace.db,sha256=
|
|
26
|
-
ingestr/testdata/test_delete_insert_with_timerange.db,sha256
|
|
27
|
-
ingestr/testdata/test_delete_insert_without_primary_key.db,sha256=
|
|
28
|
-
ingestr/testdata/test_merge_with_primary_key.db,sha256=
|
|
29
|
-
ingestr-0.
|
|
30
|
-
ingestr-0.
|
|
31
|
-
ingestr-0.
|
|
32
|
-
ingestr-0.
|
|
33
|
-
ingestr-0.
|
|
24
|
+
ingestr/testdata/test_append.db,sha256=rJUEAoJMQL5G1WETOHPcWt-ircGQJDg6NMDxMloux8g,798720
|
|
25
|
+
ingestr/testdata/test_create_replace.db,sha256=wwbu-ByA_JNKcRzgKZNGiXmSjrD1kaHcHixmvYht5Tc,798720
|
|
26
|
+
ingestr/testdata/test_delete_insert_with_timerange.db,sha256=-ITVU-6b2XeSBsnckjfziGGVnWnGobzXgU78ikzHw4g,1585152
|
|
27
|
+
ingestr/testdata/test_delete_insert_without_primary_key.db,sha256=KkcYdlKwB-pd9_N1bo6b2MuhLBD5s77muFGvAWarMjk,1847296
|
|
28
|
+
ingestr/testdata/test_merge_with_primary_key.db,sha256=Jc94_5H4q3liUDroyWQExrWT_3lSpWFoz6vne_s6KHI,1847296
|
|
29
|
+
ingestr-0.4.0.dist-info/METADATA,sha256=BhqFBbVuQdkJ4mm706Pe55WNHBLGPH6n_x7QMrEqsDQ,5434
|
|
30
|
+
ingestr-0.4.0.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
|
|
31
|
+
ingestr-0.4.0.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
32
|
+
ingestr-0.4.0.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
33
|
+
ingestr-0.4.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|