ingestr 0.0.3__py3-none-any.whl → 0.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/main.py +180 -115
- ingestr/main_test.py +579 -0
- ingestr/src/factory.py +5 -3
- ingestr/src/sources.py +7 -3
- ingestr/src/sources_test.py +4 -2
- ingestr/src/sql_database/__init__.py +3 -47
- ingestr/src/sql_database/helpers.py +0 -1
- ingestr/src/telemetry/event.py +14 -0
- ingestr/testdata/test_append.db +0 -0
- ingestr/testdata/test_create_replace.db +0 -0
- ingestr/testdata/test_delete_insert_with_timerange.db +0 -0
- ingestr/testdata/test_delete_insert_without_primary_key.db +0 -0
- ingestr/testdata/test_merge_with_primary_key.db +0 -0
- {ingestr-0.0.3.dist-info → ingestr-0.0.4.dist-info}/METADATA +19 -6
- ingestr-0.0.4.dist-info/RECORD +23 -0
- ingestr-0.0.3.dist-info/RECORD +0 -16
- {ingestr-0.0.3.dist-info → ingestr-0.0.4.dist-info}/WHEEL +0 -0
- {ingestr-0.0.3.dist-info → ingestr-0.0.4.dist-info}/entry_points.txt +0 -0
- {ingestr-0.0.3.dist-info → ingestr-0.0.4.dist-info}/licenses/LICENSE.md +0 -0
ingestr/main.py
CHANGED
|
@@ -1,209 +1,274 @@
|
|
|
1
1
|
import hashlib
|
|
2
|
+
from datetime import datetime
|
|
3
|
+
from typing import Optional
|
|
2
4
|
|
|
3
5
|
import dlt
|
|
6
|
+
import humanize
|
|
4
7
|
import typer
|
|
5
|
-
|
|
6
|
-
from ingestr.src.factory import SourceDestinationFactory
|
|
7
8
|
from rich.console import Console
|
|
8
|
-
from dlt.common.pipeline import LoadInfo
|
|
9
|
-
import humanize
|
|
10
9
|
from typing_extensions import Annotated
|
|
11
10
|
|
|
11
|
+
from ingestr.src.factory import SourceDestinationFactory
|
|
12
|
+
from ingestr.src.telemetry.event import track
|
|
13
|
+
|
|
12
14
|
app = typer.Typer(
|
|
13
15
|
name="ingestr",
|
|
14
16
|
help="ingestr is the CLI tool to ingest data from one source to another",
|
|
15
17
|
rich_markup_mode="rich",
|
|
16
18
|
)
|
|
17
19
|
|
|
18
|
-
|
|
19
20
|
console = Console()
|
|
20
21
|
print = console.print
|
|
21
22
|
|
|
23
|
+
DATE_FORMATS = [
|
|
24
|
+
"%Y-%m-%d",
|
|
25
|
+
"%Y-%m-%dT%H:%M:%S",
|
|
26
|
+
"%Y-%m-%dT%H:%M:%S%z",
|
|
27
|
+
"%Y-%m-%d %H:%M:%S",
|
|
28
|
+
"%Y-%m-%dT%H:%M:%S.%f",
|
|
29
|
+
"%Y-%m-%dT%H:%M:%S.%f%z",
|
|
30
|
+
]
|
|
31
|
+
|
|
22
32
|
|
|
23
33
|
@app.command()
|
|
24
34
|
def ingest(
|
|
25
|
-
source_uri: Annotated[
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
factory = SourceDestinationFactory(source_uri, dest_uri)
|
|
40
|
-
source = factory.get_source()
|
|
41
|
-
destination = factory.get_destination()
|
|
42
|
-
|
|
43
|
-
m = hashlib.sha256()
|
|
44
|
-
m.update(dest_table.encode("utf-8"))
|
|
45
|
-
pipeline_name = m.hexdigest()
|
|
46
|
-
short_pipeline_name = pipeline_name[:8]
|
|
47
|
-
|
|
48
|
-
pipeline = dlt.pipeline(
|
|
49
|
-
pipeline_name=pipeline_name,
|
|
50
|
-
destination=destination.dlt_dest(
|
|
51
|
-
uri=dest_uri,
|
|
35
|
+
source_uri: Annotated[
|
|
36
|
+
str, typer.Option(help="The URI of the [green]source[/green]")
|
|
37
|
+
], # type: ignore
|
|
38
|
+
dest_uri: Annotated[
|
|
39
|
+
str, typer.Option(help="The URI of the [cyan]destination[/cyan]")
|
|
40
|
+
], # type: ignore
|
|
41
|
+
source_table: Annotated[
|
|
42
|
+
str, typer.Option(help="The table name in the [green]source[/green] to fetch")
|
|
43
|
+
], # type: ignore
|
|
44
|
+
dest_table: Annotated[
|
|
45
|
+
str,
|
|
46
|
+
typer.Option(
|
|
47
|
+
help="The table in the [cyan]destination[/cyan] to save the data into"
|
|
52
48
|
),
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
f"[bold yellow] Destination:[/bold yellow] {factory.destination_scheme} / {dest_table}"
|
|
65
|
-
)
|
|
66
|
-
print(f"[bold yellow] Incremental Strategy:[/bold yellow] {incremental_strategy}")
|
|
67
|
-
print(
|
|
68
|
-
f"[bold yellow] Incremental Key:[/bold yellow] {incremental_key if incremental_key else 'None'}"
|
|
69
|
-
)
|
|
70
|
-
print()
|
|
71
|
-
|
|
72
|
-
continuePipeline = typer.confirm("Are you sure you would like to continue?")
|
|
73
|
-
if not continuePipeline:
|
|
74
|
-
raise typer.Abort()
|
|
75
|
-
|
|
76
|
-
print()
|
|
77
|
-
print(f"[bold green]Starting the ingestion...[/bold green]")
|
|
78
|
-
print()
|
|
79
|
-
|
|
80
|
-
incremental = []
|
|
81
|
-
if incremental_key:
|
|
82
|
-
incremental = [incremental_key]
|
|
83
|
-
|
|
84
|
-
run_info = pipeline.run(
|
|
85
|
-
source.dlt_source(
|
|
86
|
-
uri=source_uri,
|
|
87
|
-
table=source_table,
|
|
88
|
-
incremental_key=incremental_key,
|
|
89
|
-
incremental_strategy=incremental_strategy,
|
|
49
|
+
] = None, # type: ignore
|
|
50
|
+
incremental_key: Annotated[
|
|
51
|
+
str,
|
|
52
|
+
typer.Option(
|
|
53
|
+
help="The incremental key from the table to be used for incremental strategies"
|
|
54
|
+
),
|
|
55
|
+
] = None, # type: ignore
|
|
56
|
+
incremental_strategy: Annotated[
|
|
57
|
+
str,
|
|
58
|
+
typer.Option(
|
|
59
|
+
help="The incremental strategy to use, must be one of 'replace', 'append', 'delete+insert', or 'merge'"
|
|
90
60
|
),
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
61
|
+
] = "replace", # type: ignore
|
|
62
|
+
interval_start: Annotated[
|
|
63
|
+
Optional[datetime],
|
|
64
|
+
typer.Option(
|
|
65
|
+
help="The start of the interval the incremental key will cover",
|
|
66
|
+
formats=DATE_FORMATS,
|
|
94
67
|
),
|
|
95
|
-
|
|
96
|
-
|
|
68
|
+
] = None, # type: ignore
|
|
69
|
+
interval_end: Annotated[
|
|
70
|
+
Optional[datetime],
|
|
71
|
+
typer.Option(
|
|
72
|
+
help="The end of the interval the incremental key will cover",
|
|
73
|
+
formats=DATE_FORMATS,
|
|
74
|
+
),
|
|
75
|
+
] = None, # type: ignore
|
|
76
|
+
primary_key: Annotated[Optional[list[str]], typer.Option(help="The merge ")] = None, # type: ignore
|
|
77
|
+
):
|
|
78
|
+
track(
|
|
79
|
+
"command_triggered",
|
|
80
|
+
{
|
|
81
|
+
"command": "ingest",
|
|
82
|
+
},
|
|
97
83
|
)
|
|
98
84
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
85
|
+
try:
|
|
86
|
+
if not dest_table:
|
|
87
|
+
print()
|
|
88
|
+
print(
|
|
89
|
+
"[yellow]Destination table is not given, defaulting to the source table.[/yellow]"
|
|
90
|
+
)
|
|
91
|
+
dest_table = source_table
|
|
92
|
+
|
|
93
|
+
merge_key = None
|
|
94
|
+
if incremental_strategy == "delete+insert":
|
|
95
|
+
merge_key = incremental_key
|
|
96
|
+
incremental_strategy = "merge"
|
|
97
|
+
|
|
98
|
+
factory = SourceDestinationFactory(source_uri, dest_uri)
|
|
99
|
+
source = factory.get_source()
|
|
100
|
+
destination = factory.get_destination()
|
|
101
|
+
|
|
102
|
+
m = hashlib.sha256()
|
|
103
|
+
m.update(dest_table.encode("utf-8"))
|
|
104
|
+
|
|
105
|
+
pipeline = dlt.pipeline(
|
|
106
|
+
pipeline_name=m.hexdigest(),
|
|
107
|
+
destination=destination.dlt_dest(
|
|
108
|
+
uri=dest_uri,
|
|
109
|
+
),
|
|
110
|
+
progress=dlt.progress.log(dump_system_stats=False),
|
|
111
|
+
pipelines_dir="pipeline_data",
|
|
112
|
+
dataset_name="testschema",
|
|
113
|
+
)
|
|
103
114
|
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
115
|
+
print()
|
|
116
|
+
print("[bold green]Initiated the pipeline with the following:[/bold green]")
|
|
117
|
+
print(
|
|
118
|
+
f"[bold yellow] Source:[/bold yellow] {factory.source_scheme} / {source_table}"
|
|
119
|
+
)
|
|
120
|
+
print(
|
|
121
|
+
f"[bold yellow] Destination:[/bold yellow] {factory.destination_scheme} / {dest_table}"
|
|
122
|
+
)
|
|
123
|
+
print(
|
|
124
|
+
f"[bold yellow] Incremental Strategy:[/bold yellow] {incremental_strategy}"
|
|
125
|
+
)
|
|
126
|
+
print(
|
|
127
|
+
f"[bold yellow] Incremental Key:[/bold yellow] {incremental_key if incremental_key else 'None'}"
|
|
128
|
+
)
|
|
129
|
+
print()
|
|
107
130
|
|
|
108
|
-
|
|
131
|
+
continuePipeline = typer.confirm("Are you sure you would like to continue?")
|
|
132
|
+
if not continuePipeline:
|
|
133
|
+
track("command_finished", {"command": "ingest", "status": "aborted"})
|
|
134
|
+
raise typer.Abort()
|
|
109
135
|
|
|
136
|
+
print()
|
|
137
|
+
print("[bold green]Starting the ingestion...[/bold green]")
|
|
138
|
+
print()
|
|
110
139
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
140
|
+
run_info = pipeline.run(
|
|
141
|
+
source.dlt_source(
|
|
142
|
+
uri=source_uri,
|
|
143
|
+
table=source_table,
|
|
144
|
+
incremental_key=incremental_key,
|
|
145
|
+
merge_key=merge_key,
|
|
146
|
+
interval_start=interval_start,
|
|
147
|
+
interval_end=interval_end,
|
|
148
|
+
),
|
|
149
|
+
**destination.dlt_run_params(
|
|
150
|
+
uri=dest_uri,
|
|
151
|
+
table=dest_table,
|
|
152
|
+
),
|
|
153
|
+
write_disposition=incremental_strategy, # type: ignore
|
|
154
|
+
primary_key=(primary_key if primary_key and len(primary_key) > 0 else None), # type: ignore
|
|
116
155
|
)
|
|
117
156
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
157
|
+
elapsedHuman = ""
|
|
158
|
+
if run_info.started_at:
|
|
159
|
+
elapsed = run_info.finished_at - run_info.started_at
|
|
160
|
+
elapsedHuman = f"in {humanize.precisedelta(elapsed)}"
|
|
121
161
|
|
|
122
|
-
|
|
123
|
-
f" {connector} {len(info.loads_ids)} load package{'s were' if len(info.loads_ids) > 1 else ' was'} loaded to destination [bold cyan]{info.destination_name}[/bold cyan] and into dataset [bold cyan]{info.dataset_name}[/bold cyan]",
|
|
124
|
-
highlight=False,
|
|
125
|
-
)
|
|
126
|
-
if info.staging_name:
|
|
162
|
+
print()
|
|
127
163
|
print(
|
|
128
|
-
f"
|
|
164
|
+
f"[bold green]Successfully finished loading data from '{factory.source_scheme}' to '{factory.destination_scheme}' {elapsedHuman} [/bold green]"
|
|
129
165
|
)
|
|
166
|
+
print()
|
|
167
|
+
track(
|
|
168
|
+
"command_finished",
|
|
169
|
+
{
|
|
170
|
+
"command": "ingest",
|
|
171
|
+
"status": "success",
|
|
172
|
+
},
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
except Exception as e:
|
|
176
|
+
track(
|
|
177
|
+
"command_finished",
|
|
178
|
+
{"command": "ingest", "status": "failed", "error": str(e)},
|
|
179
|
+
)
|
|
180
|
+
raise
|
|
130
181
|
|
|
131
182
|
|
|
132
183
|
@app.command()
|
|
133
184
|
def example_uris():
|
|
185
|
+
track(
|
|
186
|
+
"command_triggered",
|
|
187
|
+
{
|
|
188
|
+
"command": "example-uris",
|
|
189
|
+
},
|
|
190
|
+
)
|
|
191
|
+
|
|
134
192
|
print()
|
|
135
193
|
typer.echo(
|
|
136
|
-
|
|
194
|
+
"Following are some example URI formats for supported sources and destinations:"
|
|
137
195
|
)
|
|
138
196
|
|
|
139
197
|
print()
|
|
140
198
|
print(
|
|
141
|
-
|
|
199
|
+
"[bold green]Postgres:[/bold green] [white]postgres://user:password@host:port/dbname?sslmode=require [/white]"
|
|
142
200
|
)
|
|
143
201
|
print(
|
|
144
|
-
|
|
202
|
+
"[white dim]└── https://docs.sqlalchemy.org/en/20/core/engines.html#postgresql[/white dim]"
|
|
145
203
|
)
|
|
146
204
|
|
|
147
205
|
print()
|
|
148
206
|
print(
|
|
149
|
-
|
|
207
|
+
"[bold green]BigQuery:[/bold green] [white]bigquery://project-id?credentials_path=/path/to/credentials.json&location=US [/white]"
|
|
150
208
|
)
|
|
151
209
|
print(
|
|
152
|
-
|
|
210
|
+
"[white dim]└── https://github.com/googleapis/python-bigquery-sqlalchemy?tab=readme-ov-file#connection-string-parameters[/white dim]"
|
|
153
211
|
)
|
|
154
212
|
|
|
155
213
|
print()
|
|
156
214
|
print(
|
|
157
|
-
|
|
215
|
+
"[bold green]Snowflake:[/bold green] [white]snowflake://user:password@account/dbname?warehouse=COMPUTE_WH [/white]"
|
|
158
216
|
)
|
|
159
217
|
print(
|
|
160
|
-
|
|
218
|
+
"[white dim]└── https://docs.snowflake.com/en/developer-guide/python-connector/sqlalchemy#connection-parameters"
|
|
161
219
|
)
|
|
162
220
|
|
|
163
221
|
print()
|
|
164
222
|
print(
|
|
165
|
-
|
|
223
|
+
"[bold green]Redshift:[/bold green] [white]redshift://user:password@host:port/dbname?sslmode=require [/white]"
|
|
166
224
|
)
|
|
167
225
|
print(
|
|
168
|
-
|
|
226
|
+
"[white dim]└── https://aws.amazon.com/blogs/big-data/use-the-amazon-redshift-sqlalchemy-dialect-to-interact-with-amazon-redshift/[/white dim]"
|
|
169
227
|
)
|
|
170
228
|
|
|
171
229
|
print()
|
|
172
230
|
print(
|
|
173
|
-
|
|
231
|
+
"[bold green]Databricks:[/bold green] [white]databricks://token:<access_token>@<server_hostname>?http_path=<http_path>&catalog=<catalog>&schema=<schema>[/white]"
|
|
174
232
|
)
|
|
175
|
-
print(
|
|
233
|
+
print("[white dim]└── https://docs.databricks.com/en/dev-tools/sqlalchemy.html")
|
|
176
234
|
|
|
177
235
|
print()
|
|
178
236
|
print(
|
|
179
|
-
|
|
237
|
+
"[bold green]Microsoft SQL Server:[/bold green] [white]mssql://user:password@host:port/dbname?driver=ODBC+Driver+18+for+SQL+Server&TrustServerCertificate=yes [/white]"
|
|
180
238
|
)
|
|
181
239
|
print(
|
|
182
|
-
|
|
240
|
+
"[white dim]└── https://docs.sqlalchemy.org/en/20/core/engines.html#microsoft-sql-server"
|
|
183
241
|
)
|
|
184
242
|
|
|
185
243
|
print()
|
|
186
244
|
print(
|
|
187
|
-
|
|
245
|
+
"[bold green]MySQL:[/bold green] [white]mysql://user:password@host:port/dbname [/white]"
|
|
188
246
|
)
|
|
189
247
|
print(
|
|
190
|
-
|
|
248
|
+
"[white dim]└── https://docs.sqlalchemy.org/en/20/core/engines.html#mysql[/white dim]"
|
|
191
249
|
)
|
|
192
250
|
|
|
193
251
|
print()
|
|
194
|
-
print(
|
|
195
|
-
print(
|
|
252
|
+
print("[bold green]DuckDB:[/bold green] [white]duckdb://path/to/database [/white]")
|
|
253
|
+
print("[white dim]└── https://github.com/Mause/duckdb_engine[/white dim]")
|
|
196
254
|
|
|
197
255
|
print()
|
|
198
|
-
print(
|
|
256
|
+
print("[bold green]SQLite:[/bold green] [white]sqlite://path/to/database [/white]")
|
|
199
257
|
print(
|
|
200
|
-
|
|
258
|
+
"[white dim]└── https://docs.sqlalchemy.org/en/20/core/engines.html#sqlite[/white dim]"
|
|
201
259
|
)
|
|
202
260
|
|
|
203
261
|
print()
|
|
204
262
|
typer.echo(
|
|
205
263
|
"These are all coming from SQLAlchemy's URI format, so they should be familiar to most users."
|
|
206
264
|
)
|
|
265
|
+
track(
|
|
266
|
+
"command_finished",
|
|
267
|
+
{
|
|
268
|
+
"command": "example-uris",
|
|
269
|
+
"status": "success",
|
|
270
|
+
},
|
|
271
|
+
)
|
|
207
272
|
|
|
208
273
|
|
|
209
274
|
def main():
|
ingestr/main_test.py
ADDED
|
@@ -0,0 +1,579 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import shutil
|
|
3
|
+
|
|
4
|
+
import duckdb
|
|
5
|
+
import pytest
|
|
6
|
+
from typer.testing import CliRunner
|
|
7
|
+
|
|
8
|
+
from ingestr.main import app
|
|
9
|
+
|
|
10
|
+
runner = CliRunner()
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_abs_path(relative_path):
|
|
14
|
+
return os.path.abspath(os.path.join(os.path.dirname(__file__), relative_path))
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def invoke_ingest_command(
|
|
18
|
+
source_uri,
|
|
19
|
+
source_table,
|
|
20
|
+
dest_uri,
|
|
21
|
+
dest_table,
|
|
22
|
+
inc_strategy=None,
|
|
23
|
+
inc_key=None,
|
|
24
|
+
primary_key=None,
|
|
25
|
+
merge_key=None,
|
|
26
|
+
interval_start=None,
|
|
27
|
+
interval_end=None,
|
|
28
|
+
):
|
|
29
|
+
args = [
|
|
30
|
+
"ingest",
|
|
31
|
+
"--source-uri",
|
|
32
|
+
source_uri,
|
|
33
|
+
"--source-table",
|
|
34
|
+
source_table,
|
|
35
|
+
"--dest-uri",
|
|
36
|
+
dest_uri,
|
|
37
|
+
"--dest-table",
|
|
38
|
+
dest_table,
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
if inc_strategy:
|
|
42
|
+
args.append("--incremental-strategy")
|
|
43
|
+
args.append(inc_strategy)
|
|
44
|
+
|
|
45
|
+
if inc_key:
|
|
46
|
+
args.append("--incremental-key")
|
|
47
|
+
args.append(inc_key)
|
|
48
|
+
|
|
49
|
+
if primary_key:
|
|
50
|
+
args.append("--primary-key")
|
|
51
|
+
args.append(primary_key)
|
|
52
|
+
|
|
53
|
+
if merge_key:
|
|
54
|
+
args.append("--merge-key")
|
|
55
|
+
args.append(merge_key)
|
|
56
|
+
|
|
57
|
+
if interval_start:
|
|
58
|
+
args.append("--interval-start")
|
|
59
|
+
args.append(interval_start)
|
|
60
|
+
|
|
61
|
+
if interval_end:
|
|
62
|
+
args.append("--interval-end")
|
|
63
|
+
args.append(interval_end)
|
|
64
|
+
|
|
65
|
+
result = runner.invoke(
|
|
66
|
+
app,
|
|
67
|
+
args,
|
|
68
|
+
input="y\n",
|
|
69
|
+
env={"DISABLE_TELEMETRY": "true"},
|
|
70
|
+
)
|
|
71
|
+
return result
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def test_create_replace():
|
|
75
|
+
abs_db_path = get_abs_path("./testdata/test_create_replace.db")
|
|
76
|
+
rel_db_path_to_command = "ingestr/testdata/test_create_replace.db"
|
|
77
|
+
|
|
78
|
+
conn = duckdb.connect(abs_db_path)
|
|
79
|
+
conn.execute("DROP SCHEMA IF EXISTS testschema CASCADE")
|
|
80
|
+
conn.execute("CREATE SCHEMA testschema")
|
|
81
|
+
conn.execute(
|
|
82
|
+
"CREATE TABLE testschema.input (id INTEGER, val VARCHAR, updated_at TIMESTAMP)"
|
|
83
|
+
)
|
|
84
|
+
conn.execute("INSERT INTO testschema.input VALUES (1, 'val1', '2022-01-01')")
|
|
85
|
+
conn.execute("INSERT INTO testschema.input VALUES (2, 'val2', '2022-02-01')")
|
|
86
|
+
|
|
87
|
+
res = conn.sql("select count(*) from testschema.input").fetchall()
|
|
88
|
+
assert res[0][0] == 2
|
|
89
|
+
|
|
90
|
+
result = invoke_ingest_command(
|
|
91
|
+
f"duckdb:///{rel_db_path_to_command}",
|
|
92
|
+
"testschema.input",
|
|
93
|
+
f"duckdb:///{rel_db_path_to_command}",
|
|
94
|
+
"testschema.output",
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
assert result.exit_code == 0
|
|
98
|
+
|
|
99
|
+
res = conn.sql(
|
|
100
|
+
"select id, val, strftime(updated_at, '%Y-%m-%d') as updated_at from testschema.output"
|
|
101
|
+
).fetchall()
|
|
102
|
+
assert len(res) == 2
|
|
103
|
+
assert res[0] == (1, "val1", "2022-01-01")
|
|
104
|
+
assert res[1] == (2, "val2", "2022-02-01")
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
@pytest.mark.skip(
|
|
108
|
+
reason="this doesn't work at the moment due to a bug with dlt: https://github.com/dlt-hub/dlt/issues/971"
|
|
109
|
+
)
|
|
110
|
+
def test_append():
|
|
111
|
+
try:
|
|
112
|
+
shutil.rmtree(get_abs_path("../pipeline_data"))
|
|
113
|
+
except Exception:
|
|
114
|
+
pass
|
|
115
|
+
|
|
116
|
+
abs_db_path = get_abs_path("./testdata/test_append.db")
|
|
117
|
+
rel_db_path_to_command = "ingestr/testdata/test_append.db"
|
|
118
|
+
uri = f"duckdb:///{rel_db_path_to_command}"
|
|
119
|
+
|
|
120
|
+
conn = duckdb.connect(abs_db_path)
|
|
121
|
+
conn.execute("DROP SCHEMA IF EXISTS testschema_append CASCADE")
|
|
122
|
+
conn.execute("CHECKPOINT")
|
|
123
|
+
|
|
124
|
+
conn.execute("CREATE SCHEMA testschema_append")
|
|
125
|
+
conn.execute(
|
|
126
|
+
"CREATE TABLE testschema_append.input (id INTEGER, val VARCHAR, updated_at DATE)"
|
|
127
|
+
)
|
|
128
|
+
conn.execute(
|
|
129
|
+
"INSERT INTO testschema_append.input VALUES (1, 'val1', '2022-01-01'), (2, 'val2', '2022-01-02')"
|
|
130
|
+
)
|
|
131
|
+
conn.execute("CHECKPOINT")
|
|
132
|
+
|
|
133
|
+
res = conn.sql("select count(*) from testschema_append.input").fetchall()
|
|
134
|
+
assert res[0][0] == 2
|
|
135
|
+
|
|
136
|
+
def run():
|
|
137
|
+
res = invoke_ingest_command(
|
|
138
|
+
uri,
|
|
139
|
+
"testschema_append.input",
|
|
140
|
+
uri,
|
|
141
|
+
"testschema_append.output",
|
|
142
|
+
"append",
|
|
143
|
+
"updated_at",
|
|
144
|
+
)
|
|
145
|
+
assert res.exit_code == 0
|
|
146
|
+
|
|
147
|
+
def get_output_table():
|
|
148
|
+
conn.execute("CHECKPOINT")
|
|
149
|
+
return conn.sql(
|
|
150
|
+
"select id, val, strftime(updated_at, '%Y-%m-%d') as updated_at from testschema_append.output"
|
|
151
|
+
).fetchall()
|
|
152
|
+
|
|
153
|
+
run()
|
|
154
|
+
|
|
155
|
+
res = get_output_table()
|
|
156
|
+
assert len(res) == 2
|
|
157
|
+
assert res[0] == (1, "val1", "2022-01-01")
|
|
158
|
+
assert res[1] == (2, "val2", "2022-01-02")
|
|
159
|
+
|
|
160
|
+
# # run again, nothing should be inserted into the output table
|
|
161
|
+
run()
|
|
162
|
+
|
|
163
|
+
res = get_output_table()
|
|
164
|
+
assert len(res) == 2
|
|
165
|
+
assert res[0] == (1, "val1", "2022-01-01")
|
|
166
|
+
assert res[1] == (2, "val2", "2022-02-01")
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def test_merge_with_primary_key():
|
|
170
|
+
try:
|
|
171
|
+
shutil.rmtree(get_abs_path("../pipeline_data"))
|
|
172
|
+
except Exception:
|
|
173
|
+
pass
|
|
174
|
+
|
|
175
|
+
abs_db_path = get_abs_path("./testdata/test_merge_with_primary_key.db")
|
|
176
|
+
rel_db_path_to_command = "ingestr/testdata/test_merge_with_primary_key.db"
|
|
177
|
+
uri = f"duckdb:///{rel_db_path_to_command}"
|
|
178
|
+
|
|
179
|
+
conn = duckdb.connect(abs_db_path)
|
|
180
|
+
conn.execute("DROP SCHEMA IF EXISTS testschema_merge CASCADE")
|
|
181
|
+
conn.execute("CREATE SCHEMA testschema_merge")
|
|
182
|
+
conn.execute(
|
|
183
|
+
"CREATE TABLE testschema_merge.input (id INTEGER, val VARCHAR, updated_at TIMESTAMP)"
|
|
184
|
+
)
|
|
185
|
+
conn.execute("INSERT INTO testschema_merge.input VALUES (1, 'val1', '2022-01-01')")
|
|
186
|
+
conn.execute("INSERT INTO testschema_merge.input VALUES (2, 'val2', '2022-02-01')")
|
|
187
|
+
|
|
188
|
+
res = conn.sql("select count(*) from testschema_merge.input").fetchall()
|
|
189
|
+
assert res[0][0] == 2
|
|
190
|
+
|
|
191
|
+
def run():
|
|
192
|
+
res = invoke_ingest_command(
|
|
193
|
+
uri,
|
|
194
|
+
"testschema_merge.input",
|
|
195
|
+
uri,
|
|
196
|
+
"testschema_merge.output",
|
|
197
|
+
"merge",
|
|
198
|
+
"updated_at",
|
|
199
|
+
"id",
|
|
200
|
+
)
|
|
201
|
+
assert res.exit_code == 0
|
|
202
|
+
return res
|
|
203
|
+
|
|
204
|
+
def get_output_rows():
|
|
205
|
+
conn.execute("CHECKPOINT")
|
|
206
|
+
return conn.sql(
|
|
207
|
+
"select id, val, strftime(updated_at, '%Y-%m-%d') as updated_at from testschema_merge.output order by id asc"
|
|
208
|
+
).fetchall()
|
|
209
|
+
|
|
210
|
+
def assert_output_equals(expected):
|
|
211
|
+
res = get_output_rows()
|
|
212
|
+
assert len(res) == len(expected)
|
|
213
|
+
for i, row in enumerate(expected):
|
|
214
|
+
assert res[i] == row
|
|
215
|
+
|
|
216
|
+
run()
|
|
217
|
+
assert_output_equals([(1, "val1", "2022-01-01"), (2, "val2", "2022-02-01")])
|
|
218
|
+
|
|
219
|
+
first_run_id = conn.sql(
|
|
220
|
+
"select _dlt_load_id from testschema_merge.output limit 1"
|
|
221
|
+
).fetchall()[0][0]
|
|
222
|
+
|
|
223
|
+
##############################
|
|
224
|
+
# we'll run again, we don't expect any changes since the data hasn't changed
|
|
225
|
+
run()
|
|
226
|
+
assert_output_equals([(1, "val1", "2022-01-01"), (2, "val2", "2022-02-01")])
|
|
227
|
+
|
|
228
|
+
# we also ensure that the other rows were not touched
|
|
229
|
+
count_by_run_id = conn.sql(
|
|
230
|
+
"select _dlt_load_id, count(*) from testschema_merge.output group by 1"
|
|
231
|
+
).fetchall()
|
|
232
|
+
assert len(count_by_run_id) == 1
|
|
233
|
+
assert count_by_run_id[0][1] == 2
|
|
234
|
+
assert count_by_run_id[0][0] == first_run_id
|
|
235
|
+
##############################
|
|
236
|
+
|
|
237
|
+
##############################
|
|
238
|
+
# now we'll modify the source data but not the updated at, the output table should not be updated
|
|
239
|
+
conn.execute("UPDATE testschema_merge.input SET val = 'val1_modified' WHERE id = 2")
|
|
240
|
+
|
|
241
|
+
run()
|
|
242
|
+
assert_output_equals([(1, "val1", "2022-01-01"), (2, "val2", "2022-02-01")])
|
|
243
|
+
|
|
244
|
+
# we also ensure that the other rows were not touched
|
|
245
|
+
count_by_run_id = conn.sql(
|
|
246
|
+
"select _dlt_load_id, count(*) from testschema_merge.output group by 1"
|
|
247
|
+
).fetchall()
|
|
248
|
+
assert len(count_by_run_id) == 1
|
|
249
|
+
assert count_by_run_id[0][1] == 2
|
|
250
|
+
assert count_by_run_id[0][0] == first_run_id
|
|
251
|
+
##############################
|
|
252
|
+
|
|
253
|
+
##############################
|
|
254
|
+
# now we'll insert a new row but with an old date, the new row will not show up
|
|
255
|
+
conn.execute("INSERT INTO testschema_merge.input VALUES (3, 'val3', '2022-01-01')")
|
|
256
|
+
|
|
257
|
+
run()
|
|
258
|
+
assert_output_equals([(1, "val1", "2022-01-01"), (2, "val2", "2022-02-01")])
|
|
259
|
+
|
|
260
|
+
# we also ensure that the other rows were not touched
|
|
261
|
+
count_by_run_id = conn.sql(
|
|
262
|
+
"select _dlt_load_id, count(*) from testschema_merge.output group by 1"
|
|
263
|
+
).fetchall()
|
|
264
|
+
assert len(count_by_run_id) == 1
|
|
265
|
+
assert count_by_run_id[0][1] == 2
|
|
266
|
+
assert count_by_run_id[0][0] == first_run_id
|
|
267
|
+
##############################
|
|
268
|
+
|
|
269
|
+
##############################
|
|
270
|
+
# now we'll insert a new row but with a new date, the new row will show up
|
|
271
|
+
conn.execute("INSERT INTO testschema_merge.input VALUES (3, 'val3', '2022-02-02')")
|
|
272
|
+
|
|
273
|
+
run()
|
|
274
|
+
assert_output_equals(
|
|
275
|
+
[
|
|
276
|
+
(1, "val1", "2022-01-01"),
|
|
277
|
+
(2, "val2", "2022-02-01"),
|
|
278
|
+
(3, "val3", "2022-02-02"),
|
|
279
|
+
]
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
# we have a new run that inserted rows to this table, so the run count should be 2
|
|
283
|
+
count_by_run_id = conn.sql(
|
|
284
|
+
"select _dlt_load_id, count(*) from testschema_merge.output group by 1 order by 2 desc"
|
|
285
|
+
).fetchall()
|
|
286
|
+
assert len(count_by_run_id) == 2
|
|
287
|
+
assert count_by_run_id[0][1] == 2
|
|
288
|
+
assert count_by_run_id[0][0] == first_run_id
|
|
289
|
+
# we don't care about the run ID
|
|
290
|
+
assert count_by_run_id[1][1] == 1
|
|
291
|
+
##############################
|
|
292
|
+
|
|
293
|
+
##############################
|
|
294
|
+
# lastly, let's try modifying the updated_at of an old column, it should be updated in the output table
|
|
295
|
+
conn.execute(
|
|
296
|
+
"UPDATE testschema_merge.input SET val='val2_modified', updated_at = '2022-02-03' WHERE id = 2"
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
run()
|
|
300
|
+
assert_output_equals(
|
|
301
|
+
[
|
|
302
|
+
(1, "val1", "2022-01-01"),
|
|
303
|
+
(2, "val2_modified", "2022-02-03"),
|
|
304
|
+
(3, "val3", "2022-02-02"),
|
|
305
|
+
]
|
|
306
|
+
)
|
|
307
|
+
|
|
308
|
+
# we have a new run that inserted rows to this table, so the run count should be 2
|
|
309
|
+
count_by_run_id = conn.sql(
|
|
310
|
+
"select _dlt_load_id, count(*) from testschema_merge.output group by 1 order by 2 desc, 1 asc"
|
|
311
|
+
).fetchall()
|
|
312
|
+
assert len(count_by_run_id) == 3
|
|
313
|
+
assert count_by_run_id[0][1] == 1
|
|
314
|
+
assert count_by_run_id[0][0] == first_run_id
|
|
315
|
+
# we don't care about the rest of the run IDs
|
|
316
|
+
assert count_by_run_id[1][1] == 1
|
|
317
|
+
assert count_by_run_id[2][1] == 1
|
|
318
|
+
##############################
|
|
319
|
+
|
|
320
|
+
|
|
321
|
+
def test_delete_insert_without_primary_key():
|
|
322
|
+
try:
|
|
323
|
+
shutil.rmtree(get_abs_path("../pipeline_data"))
|
|
324
|
+
except Exception:
|
|
325
|
+
pass
|
|
326
|
+
|
|
327
|
+
abs_db_path = get_abs_path("./testdata/test_delete_insert_without_primary_key.db")
|
|
328
|
+
rel_db_path_to_command = (
|
|
329
|
+
"ingestr/testdata/test_delete_insert_without_primary_key.db"
|
|
330
|
+
)
|
|
331
|
+
uri = f"duckdb:///{rel_db_path_to_command}"
|
|
332
|
+
|
|
333
|
+
conn = duckdb.connect(abs_db_path)
|
|
334
|
+
conn.execute("DROP SCHEMA IF EXISTS testschema_delete_insert CASCADE")
|
|
335
|
+
conn.execute("CREATE SCHEMA testschema_delete_insert")
|
|
336
|
+
conn.execute(
|
|
337
|
+
"CREATE TABLE testschema_delete_insert.input (id INTEGER, val VARCHAR, updated_at TIMESTAMP)"
|
|
338
|
+
)
|
|
339
|
+
conn.execute(
|
|
340
|
+
"INSERT INTO testschema_delete_insert.input VALUES (1, 'val1', '2022-01-01')"
|
|
341
|
+
)
|
|
342
|
+
conn.execute(
|
|
343
|
+
"INSERT INTO testschema_delete_insert.input VALUES (2, 'val2', '2022-02-01')"
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
res = conn.sql("select count(*) from testschema_delete_insert.input").fetchall()
|
|
347
|
+
assert res[0][0] == 2
|
|
348
|
+
|
|
349
|
+
def run():
|
|
350
|
+
res = invoke_ingest_command(
|
|
351
|
+
uri,
|
|
352
|
+
"testschema_delete_insert.input",
|
|
353
|
+
uri,
|
|
354
|
+
"testschema_delete_insert.output",
|
|
355
|
+
inc_strategy="delete+insert",
|
|
356
|
+
inc_key="updated_at",
|
|
357
|
+
)
|
|
358
|
+
assert res.exit_code == 0
|
|
359
|
+
return res
|
|
360
|
+
|
|
361
|
+
def get_output_rows():
|
|
362
|
+
conn.execute("CHECKPOINT")
|
|
363
|
+
return conn.sql(
|
|
364
|
+
"select id, val, strftime(updated_at, '%Y-%m-%d') as updated_at from testschema_delete_insert.output order by id asc"
|
|
365
|
+
).fetchall()
|
|
366
|
+
|
|
367
|
+
def assert_output_equals(expected):
|
|
368
|
+
res = get_output_rows()
|
|
369
|
+
assert len(res) == len(expected)
|
|
370
|
+
for i, row in enumerate(expected):
|
|
371
|
+
assert res[i] == row
|
|
372
|
+
|
|
373
|
+
run()
|
|
374
|
+
assert_output_equals([(1, "val1", "2022-01-01"), (2, "val2", "2022-02-01")])
|
|
375
|
+
|
|
376
|
+
first_run_id = conn.sql(
|
|
377
|
+
"select _dlt_load_id from testschema_delete_insert.output limit 1"
|
|
378
|
+
).fetchall()[0][0]
|
|
379
|
+
|
|
380
|
+
##############################
|
|
381
|
+
# we'll run again, since this is a delete+insert, we expect the run ID to change for the last one
|
|
382
|
+
run()
|
|
383
|
+
assert_output_equals([(1, "val1", "2022-01-01"), (2, "val2", "2022-02-01")])
|
|
384
|
+
|
|
385
|
+
# we ensure that one of the rows is updated with a new run
|
|
386
|
+
count_by_run_id = conn.sql(
|
|
387
|
+
"select _dlt_load_id, count(*) from testschema_delete_insert.output group by 1 order by 1 asc"
|
|
388
|
+
).fetchall()
|
|
389
|
+
assert len(count_by_run_id) == 2
|
|
390
|
+
assert count_by_run_id[0][0] == first_run_id
|
|
391
|
+
assert count_by_run_id[0][1] == 1
|
|
392
|
+
assert count_by_run_id[1][0] != first_run_id
|
|
393
|
+
assert count_by_run_id[1][1] == 1
|
|
394
|
+
##############################
|
|
395
|
+
|
|
396
|
+
##############################
|
|
397
|
+
# now we'll insert a few more lines for the same day, the new rows should show up
|
|
398
|
+
conn.execute(
|
|
399
|
+
"INSERT INTO testschema_delete_insert.input VALUES (3, 'val3', '2022-02-01'), (4, 'val4', '2022-02-01')"
|
|
400
|
+
)
|
|
401
|
+
conn.execute("CHECKPOINT")
|
|
402
|
+
|
|
403
|
+
run()
|
|
404
|
+
assert_output_equals(
|
|
405
|
+
[
|
|
406
|
+
(1, "val1", "2022-01-01"),
|
|
407
|
+
(2, "val2", "2022-02-01"),
|
|
408
|
+
(3, "val3", "2022-02-01"),
|
|
409
|
+
(4, "val4", "2022-02-01"),
|
|
410
|
+
]
|
|
411
|
+
)
|
|
412
|
+
|
|
413
|
+
# the new rows should have a new run ID, there should be 2 distinct runs now
|
|
414
|
+
count_by_run_id = conn.sql(
|
|
415
|
+
"select _dlt_load_id, count(*) from testschema_delete_insert.output group by 1 order by 2 desc, 1 asc"
|
|
416
|
+
).fetchall()
|
|
417
|
+
assert len(count_by_run_id) == 2
|
|
418
|
+
assert count_by_run_id[0][0] != first_run_id
|
|
419
|
+
assert count_by_run_id[0][1] == 3 # 2 new rows + 1 old row
|
|
420
|
+
assert count_by_run_id[1][0] == first_run_id
|
|
421
|
+
assert count_by_run_id[1][1] == 1
|
|
422
|
+
##############################
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
def test_delete_insert_with_timerange():
|
|
426
|
+
try:
|
|
427
|
+
shutil.rmtree(get_abs_path("../pipeline_data"))
|
|
428
|
+
except Exception:
|
|
429
|
+
pass
|
|
430
|
+
|
|
431
|
+
abs_db_path = get_abs_path("./testdata/test_delete_insert_with_timerange.db")
|
|
432
|
+
rel_db_path_to_command = "ingestr/testdata/test_delete_insert_with_timerange.db"
|
|
433
|
+
uri = f"duckdb:///{rel_db_path_to_command}"
|
|
434
|
+
|
|
435
|
+
conn = duckdb.connect(abs_db_path)
|
|
436
|
+
conn.execute("DROP SCHEMA IF EXISTS testschema_delete_insert_timerange CASCADE")
|
|
437
|
+
conn.execute("CREATE SCHEMA testschema_delete_insert_timerange")
|
|
438
|
+
conn.execute(
|
|
439
|
+
"CREATE TABLE testschema_delete_insert_timerange.input (id INTEGER, val VARCHAR, updated_at TIMESTAMP)"
|
|
440
|
+
)
|
|
441
|
+
conn.execute(
|
|
442
|
+
"""INSERT INTO testschema_delete_insert_timerange.input VALUES
|
|
443
|
+
(1, 'val1', '2022-01-01'),
|
|
444
|
+
(2, 'val2', '2022-01-01'),
|
|
445
|
+
(3, 'val3', '2022-01-02'),
|
|
446
|
+
(4, 'val4', '2022-01-02'),
|
|
447
|
+
(5, 'val5', '2022-01-03'),
|
|
448
|
+
(6, 'val6', '2022-01-03')
|
|
449
|
+
"""
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
res = conn.sql(
|
|
453
|
+
"select count(*) from testschema_delete_insert_timerange.input"
|
|
454
|
+
).fetchall()
|
|
455
|
+
assert res[0][0] == 6
|
|
456
|
+
|
|
457
|
+
def run(start_date: str, end_date: str):
|
|
458
|
+
res = invoke_ingest_command(
|
|
459
|
+
uri,
|
|
460
|
+
"testschema_delete_insert_timerange.input",
|
|
461
|
+
uri,
|
|
462
|
+
"testschema_delete_insert_timerange.output",
|
|
463
|
+
inc_strategy="delete+insert",
|
|
464
|
+
inc_key="updated_at",
|
|
465
|
+
interval_start=start_date,
|
|
466
|
+
interval_end=end_date,
|
|
467
|
+
)
|
|
468
|
+
assert res.exit_code == 0
|
|
469
|
+
return res
|
|
470
|
+
|
|
471
|
+
def get_output_rows():
|
|
472
|
+
conn.execute("CHECKPOINT")
|
|
473
|
+
return conn.sql(
|
|
474
|
+
"select id, val, strftime(updated_at, '%Y-%m-%d') as updated_at from testschema_delete_insert_timerange.output order by id asc"
|
|
475
|
+
).fetchall()
|
|
476
|
+
|
|
477
|
+
def assert_output_equals(expected):
|
|
478
|
+
res = get_output_rows()
|
|
479
|
+
assert len(res) == len(expected)
|
|
480
|
+
for i, row in enumerate(expected):
|
|
481
|
+
assert res[i] == row
|
|
482
|
+
|
|
483
|
+
run(
|
|
484
|
+
"2022-01-01T00:00:00Z", "2022-01-02T00:00:00Z"
|
|
485
|
+
) # dlt runs them with the end date exclusive
|
|
486
|
+
assert_output_equals([(1, "val1", "2022-01-01"), (2, "val2", "2022-01-01")])
|
|
487
|
+
|
|
488
|
+
first_run_id = conn.sql(
|
|
489
|
+
"select _dlt_load_id from testschema_delete_insert_timerange.output limit 1"
|
|
490
|
+
).fetchall()[0][0]
|
|
491
|
+
|
|
492
|
+
##############################
|
|
493
|
+
# we'll run again, since this is a delete+insert, we expect the run ID to change for the last one
|
|
494
|
+
run(
|
|
495
|
+
"2022-01-01T00:00:00Z", "2022-01-02T00:00:00Z"
|
|
496
|
+
) # dlt runs them with the end date exclusive
|
|
497
|
+
assert_output_equals([(1, "val1", "2022-01-01"), (2, "val2", "2022-01-01")])
|
|
498
|
+
|
|
499
|
+
# both rows should have a new run ID
|
|
500
|
+
count_by_run_id = conn.sql(
|
|
501
|
+
"select _dlt_load_id, count(*) from testschema_delete_insert_timerange.output group by 1 order by 1 asc"
|
|
502
|
+
).fetchall()
|
|
503
|
+
assert len(count_by_run_id) == 1
|
|
504
|
+
assert count_by_run_id[0][0] != first_run_id
|
|
505
|
+
assert count_by_run_id[0][1] == 2
|
|
506
|
+
##############################
|
|
507
|
+
|
|
508
|
+
##############################
|
|
509
|
+
# now run for the day after, new rows should land
|
|
510
|
+
run("2022-01-02T00:00:00Z", "2022-01-03T00:00:00Z")
|
|
511
|
+
assert_output_equals(
|
|
512
|
+
[
|
|
513
|
+
(1, "val1", "2022-01-01"),
|
|
514
|
+
(2, "val2", "2022-01-01"),
|
|
515
|
+
(3, "val3", "2022-01-02"),
|
|
516
|
+
(4, "val4", "2022-01-02"),
|
|
517
|
+
]
|
|
518
|
+
)
|
|
519
|
+
|
|
520
|
+
# there should be 4 rows with 2 distinct run IDs
|
|
521
|
+
count_by_run_id = conn.sql(
|
|
522
|
+
"select _dlt_load_id, count(*) from testschema_delete_insert_timerange.output group by 1 order by 1 asc"
|
|
523
|
+
).fetchall()
|
|
524
|
+
assert len(count_by_run_id) == 2
|
|
525
|
+
assert count_by_run_id[0][1] == 2
|
|
526
|
+
assert count_by_run_id[1][1] == 2
|
|
527
|
+
##############################
|
|
528
|
+
|
|
529
|
+
##############################
|
|
530
|
+
# let's bring in the rows for the third day
|
|
531
|
+
run("2022-01-03T00:00:00Z", "2022-01-04T00:00:00Z")
|
|
532
|
+
assert_output_equals(
|
|
533
|
+
[
|
|
534
|
+
(1, "val1", "2022-01-01"),
|
|
535
|
+
(2, "val2", "2022-01-01"),
|
|
536
|
+
(3, "val3", "2022-01-02"),
|
|
537
|
+
(4, "val4", "2022-01-02"),
|
|
538
|
+
(5, "val5", "2022-01-03"),
|
|
539
|
+
(6, "val6", "2022-01-03"),
|
|
540
|
+
]
|
|
541
|
+
)
|
|
542
|
+
|
|
543
|
+
# there should be 6 rows with 3 distinct run IDs
|
|
544
|
+
count_by_run_id = conn.sql(
|
|
545
|
+
"select _dlt_load_id, count(*) from testschema_delete_insert_timerange.output group by 1 order by 1 asc"
|
|
546
|
+
).fetchall()
|
|
547
|
+
assert len(count_by_run_id) == 3
|
|
548
|
+
assert count_by_run_id[0][1] == 2
|
|
549
|
+
assert count_by_run_id[1][1] == 2
|
|
550
|
+
assert count_by_run_id[2][1] == 2
|
|
551
|
+
##############################
|
|
552
|
+
|
|
553
|
+
##############################
|
|
554
|
+
# now let's do a backfill for the first day again, the rows should be updated
|
|
555
|
+
conn.execute(
|
|
556
|
+
"UPDATE testschema_delete_insert_timerange.input SET val = 'val1_modified' WHERE id = 1"
|
|
557
|
+
)
|
|
558
|
+
|
|
559
|
+
run("2022-01-01T00:00:00Z", "2022-01-02T00:00:00Z")
|
|
560
|
+
assert_output_equals(
|
|
561
|
+
[
|
|
562
|
+
(1, "val1_modified", "2022-01-01"),
|
|
563
|
+
(2, "val2", "2022-01-01"),
|
|
564
|
+
(3, "val3", "2022-01-02"),
|
|
565
|
+
(4, "val4", "2022-01-02"),
|
|
566
|
+
(5, "val5", "2022-01-03"),
|
|
567
|
+
(6, "val6", "2022-01-03"),
|
|
568
|
+
]
|
|
569
|
+
)
|
|
570
|
+
|
|
571
|
+
# there should still be 6 rows with 3 distinct run IDs
|
|
572
|
+
count_by_run_id = conn.sql(
|
|
573
|
+
"select _dlt_load_id, count(*) from testschema_delete_insert_timerange.output group by 1 order by 1 asc"
|
|
574
|
+
).fetchall()
|
|
575
|
+
assert len(count_by_run_id) == 3
|
|
576
|
+
assert count_by_run_id[0][1] == 2
|
|
577
|
+
assert count_by_run_id[1][1] == 2
|
|
578
|
+
assert count_by_run_id[2][1] == 2
|
|
579
|
+
##############################
|
ingestr/src/factory.py
CHANGED
|
@@ -3,6 +3,7 @@ from urllib.parse import urlparse
|
|
|
3
3
|
|
|
4
4
|
from ingestr.src.destinations import (
|
|
5
5
|
BigQueryDestination,
|
|
6
|
+
DatabricksDestination,
|
|
6
7
|
DuckDBDestination,
|
|
7
8
|
MsSQLDestination,
|
|
8
9
|
PostgresDestination,
|
|
@@ -59,12 +60,13 @@ class SourceDestinationFactory:
|
|
|
59
60
|
def get_destination(self) -> DestinationProtocol:
|
|
60
61
|
match: dict[str, DestinationProtocol] = {
|
|
61
62
|
"bigquery": BigQueryDestination(),
|
|
63
|
+
"databricks": DatabricksDestination(),
|
|
64
|
+
"duckdb": DuckDBDestination(),
|
|
65
|
+
"mssql": MsSQLDestination(),
|
|
62
66
|
"postgres": PostgresDestination(),
|
|
63
67
|
"postgresql": PostgresDestination(),
|
|
64
|
-
"snowflake": SnowflakeDestination(),
|
|
65
68
|
"redshift": RedshiftDestination(),
|
|
66
|
-
"
|
|
67
|
-
"mssql": MsSQLDestination(),
|
|
69
|
+
"snowflake": SnowflakeDestination(),
|
|
68
70
|
}
|
|
69
71
|
|
|
70
72
|
if self.destination_scheme in match:
|
ingestr/src/sources.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from typing import Callable
|
|
2
2
|
|
|
3
3
|
import dlt
|
|
4
|
-
import pendulum
|
|
5
4
|
|
|
6
5
|
from ingestr.src.sql_database import sql_table
|
|
7
6
|
|
|
@@ -19,10 +18,14 @@ class SqlSource:
|
|
|
19
18
|
|
|
20
19
|
incremental = None
|
|
21
20
|
if kwargs.get("incremental_key"):
|
|
21
|
+
start_value = kwargs.get("interval_start")
|
|
22
|
+
end_value = kwargs.get("interval_end")
|
|
23
|
+
|
|
22
24
|
incremental = dlt.sources.incremental(
|
|
23
25
|
kwargs.get("incremental_key", ""),
|
|
24
|
-
primary_key=(),
|
|
25
|
-
initial_value=
|
|
26
|
+
# primary_key=(),
|
|
27
|
+
initial_value=start_value,
|
|
28
|
+
end_value=end_value,
|
|
26
29
|
)
|
|
27
30
|
|
|
28
31
|
table_instance = self.table_builder(
|
|
@@ -30,6 +33,7 @@ class SqlSource:
|
|
|
30
33
|
schema=table_fields[-2],
|
|
31
34
|
table=table_fields[-1],
|
|
32
35
|
incremental=incremental,
|
|
36
|
+
merge_key=kwargs.get("merge_key"),
|
|
33
37
|
)
|
|
34
38
|
|
|
35
39
|
return table_instance
|
ingestr/src/sources_test.py
CHANGED
|
@@ -22,11 +22,12 @@ class SqlSourceTest(unittest.TestCase):
|
|
|
22
22
|
table = "schema.table"
|
|
23
23
|
|
|
24
24
|
# monkey patch the sql_table function
|
|
25
|
-
def sql_table(credentials, schema, table, incremental):
|
|
25
|
+
def sql_table(credentials, schema, table, incremental, merge_key):
|
|
26
26
|
self.assertEqual(credentials, uri)
|
|
27
27
|
self.assertEqual(schema, "schema")
|
|
28
28
|
self.assertEqual(table, "table")
|
|
29
29
|
self.assertIsNone(incremental)
|
|
30
|
+
self.assertIsNone(merge_key)
|
|
30
31
|
return dlt.resource()
|
|
31
32
|
|
|
32
33
|
source = SqlSource(table_builder=sql_table)
|
|
@@ -39,12 +40,13 @@ class SqlSourceTest(unittest.TestCase):
|
|
|
39
40
|
incremental_key = "id"
|
|
40
41
|
|
|
41
42
|
# monkey patch the sql_table function
|
|
42
|
-
def sql_table(credentials, schema, table, incremental):
|
|
43
|
+
def sql_table(credentials, schema, table, incremental, merge_key):
|
|
43
44
|
self.assertEqual(credentials, uri)
|
|
44
45
|
self.assertEqual(schema, "schema")
|
|
45
46
|
self.assertEqual(table, "table")
|
|
46
47
|
self.assertIsInstance(incremental, dlt.sources.incremental)
|
|
47
48
|
self.assertEqual(incremental.cursor_path, incremental_key)
|
|
49
|
+
self.assertIsNone(merge_key)
|
|
48
50
|
return dlt.resource()
|
|
49
51
|
|
|
50
52
|
source = SqlSource(table_builder=sql_table)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""Source that loads tables form any SQLAlchemy supported database, supports batching requests and incremental loads."""
|
|
2
2
|
|
|
3
|
-
from typing import Any,
|
|
3
|
+
from typing import Any, Optional, Union
|
|
4
4
|
|
|
5
5
|
import dlt
|
|
6
6
|
from dlt.sources import DltResource
|
|
@@ -9,7 +9,6 @@ from sqlalchemy import MetaData, Table
|
|
|
9
9
|
from sqlalchemy.engine import Engine
|
|
10
10
|
|
|
11
11
|
from .helpers import (
|
|
12
|
-
SqlDatabaseTableConfiguration,
|
|
13
12
|
engine_from_credentials,
|
|
14
13
|
get_primary_key,
|
|
15
14
|
table_rows,
|
|
@@ -17,51 +16,6 @@ from .helpers import (
|
|
|
17
16
|
from .schema_types import table_to_columns
|
|
18
17
|
|
|
19
18
|
|
|
20
|
-
@dlt.source
|
|
21
|
-
def sql_database(
|
|
22
|
-
credentials: Union[ConnectionStringCredentials, Engine, str] = dlt.secrets.value,
|
|
23
|
-
schema: Optional[str] = dlt.config.value,
|
|
24
|
-
metadata: Optional[MetaData] = None,
|
|
25
|
-
table_names: Optional[List[str]] = dlt.config.value,
|
|
26
|
-
detect_precision_hints: Optional[bool] = dlt.config.value,
|
|
27
|
-
) -> Iterable[DltResource]:
|
|
28
|
-
"""
|
|
29
|
-
A DLT source which loads data from an SQL database using SQLAlchemy.
|
|
30
|
-
Resources are automatically created for each table in the schema or from the given list of tables.
|
|
31
|
-
|
|
32
|
-
Args:
|
|
33
|
-
credentials (Union[ConnectionStringCredentials, Engine, str]): Database credentials or an `sqlalchemy.Engine` instance.
|
|
34
|
-
schema (Optional[str]): Name of the database schema to load (if different from default).
|
|
35
|
-
metadata (Optional[MetaData]): Optional `sqlalchemy.MetaData` instance. `schema` argument is ignored when this is used.
|
|
36
|
-
table_names (Optional[List[str]]): A list of table names to load. By default, all tables in the schema are loaded.
|
|
37
|
-
detect_precision_hints (bool): Set column precision and scale hints for supported data types in the target schema based on the columns in the source tables.
|
|
38
|
-
This is disabled by default.
|
|
39
|
-
Returns:
|
|
40
|
-
Iterable[DltResource]: A list of DLT resources for each table to be loaded.
|
|
41
|
-
"""
|
|
42
|
-
|
|
43
|
-
# set up alchemy engine
|
|
44
|
-
engine = engine_from_credentials(credentials)
|
|
45
|
-
engine.execution_options(stream_results=True)
|
|
46
|
-
metadata = metadata or MetaData(schema=schema)
|
|
47
|
-
|
|
48
|
-
# use provided tables or all tables
|
|
49
|
-
if table_names:
|
|
50
|
-
tables = [Table(name, metadata, autoload_with=engine) for name in table_names]
|
|
51
|
-
else:
|
|
52
|
-
metadata.reflect(bind=engine)
|
|
53
|
-
tables = list(metadata.tables.values())
|
|
54
|
-
|
|
55
|
-
for table in tables:
|
|
56
|
-
yield dlt.resource(
|
|
57
|
-
table_rows,
|
|
58
|
-
name=table.name,
|
|
59
|
-
primary_key=get_primary_key(table),
|
|
60
|
-
spec=SqlDatabaseTableConfiguration,
|
|
61
|
-
columns=table_to_columns(table) if detect_precision_hints else None,
|
|
62
|
-
)(engine, table)
|
|
63
|
-
|
|
64
|
-
|
|
65
19
|
def sql_table(
|
|
66
20
|
credentials: Union[ConnectionStringCredentials, Engine, str] = dlt.secrets.value,
|
|
67
21
|
table: str = dlt.config.value,
|
|
@@ -69,6 +23,7 @@ def sql_table(
|
|
|
69
23
|
metadata: Optional[MetaData] = None,
|
|
70
24
|
incremental: Optional[dlt.sources.incremental[Any]] = None,
|
|
71
25
|
detect_precision_hints: Optional[bool] = dlt.config.value,
|
|
26
|
+
merge_key: Optional[str] = None,
|
|
72
27
|
) -> DltResource:
|
|
73
28
|
"""
|
|
74
29
|
A dlt resource which loads data from an SQL database table using SQLAlchemy.
|
|
@@ -101,4 +56,5 @@ def sql_table(
|
|
|
101
56
|
name=table_obj.name,
|
|
102
57
|
primary_key=get_primary_key(table_obj),
|
|
103
58
|
columns=table_to_columns(table_obj) if detect_precision_hints else None,
|
|
59
|
+
merge_key=merge_key, # type: ignore
|
|
104
60
|
)(engine, table_obj, incremental=incremental)
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import machineid
|
|
4
|
+
import rudderstack.analytics as rudder_analytics # type: ignore
|
|
5
|
+
|
|
6
|
+
rudder_analytics.write_key = "2cUr13DDQcX2x2kAfMEfdrKvrQa"
|
|
7
|
+
rudder_analytics.dataPlaneUrl = "https://getbruinbumlky.dataplane.rudderstack.com"
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def track(event_name, event_properties):
|
|
11
|
+
if os.environ.get("DISABLE_TELEMETRY", False):
|
|
12
|
+
return
|
|
13
|
+
|
|
14
|
+
rudder_analytics.track(machineid.hashed_id(), event_name, event_properties)
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.4
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -16,11 +16,14 @@ Classifier: Topic :: Database
|
|
|
16
16
|
Requires-Python: >=3.9
|
|
17
17
|
Requires-Dist: databricks-sql-connector==2.9.3
|
|
18
18
|
Requires-Dist: dlt==0.4.3
|
|
19
|
+
Requires-Dist: duckdb-engine==0.11.1
|
|
19
20
|
Requires-Dist: duckdb==0.9.2
|
|
21
|
+
Requires-Dist: google-cloud-bigquery-storage
|
|
20
22
|
Requires-Dist: pendulum==3.0.0
|
|
21
23
|
Requires-Dist: psycopg2==2.9.9
|
|
22
24
|
Requires-Dist: pyodbc==5.1.0
|
|
23
25
|
Requires-Dist: rich==13.7.0
|
|
26
|
+
Requires-Dist: rudder-sdk-python==2.0.2
|
|
24
27
|
Requires-Dist: snowflake-sqlalchemy==1.5.1
|
|
25
28
|
Requires-Dist: sqlalchemy-bigquery==1.9.0
|
|
26
29
|
Requires-Dist: sqlalchemy2-stubs==0.0.2a38
|
|
@@ -32,18 +35,19 @@ Description-Content-Type: text/markdown
|
|
|
32
35
|
<div align="center">
|
|
33
36
|
<img src="./resources/ingestr.svg" width="500" />
|
|
34
37
|
<p>Ingest & copy data from any source to any destination without any code</p>
|
|
38
|
+
<img src="./resources/demo.gif" width="500" />
|
|
35
39
|
</div>
|
|
36
40
|
|
|
41
|
+
|
|
37
42
|
-----
|
|
38
43
|
|
|
39
44
|
Ingestr is a command-line application that allows you to ingest data from any source into any destination using simple command-line flags, no code necessary.
|
|
40
45
|
|
|
41
|
-
- ✨ copy data from your
|
|
42
|
-
- ➕ incremental loading
|
|
46
|
+
- ✨ copy data from your database into any destination
|
|
47
|
+
- ➕ incremental loading: `append`, `merge` or `delete+insert`
|
|
43
48
|
- 🐍 single-command installation
|
|
44
|
-
- 💅 Docker image for easy installation & usage
|
|
45
49
|
|
|
46
|
-
ingestr takes away the complexity of managing any backend or writing any code for ingesting data, simply run the command and watch the
|
|
50
|
+
ingestr takes away the complexity of managing any backend or writing any code for ingesting data, simply run the command and watch the data land on its destination.
|
|
47
51
|
|
|
48
52
|
|
|
49
53
|
## Installation
|
|
@@ -67,6 +71,10 @@ This command will:
|
|
|
67
71
|
- get the table `public.some_data` from the Postgres instance.
|
|
68
72
|
- upload this data to your BigQuery warehouse under the schema `ingestr` and table `some_data`.
|
|
69
73
|
|
|
74
|
+
## Documentation
|
|
75
|
+
You can see the full documentation [here](https://bruindata.github.com/ingestr).
|
|
76
|
+
|
|
77
|
+
|
|
70
78
|
## Supported Sources & Destinations
|
|
71
79
|
|
|
72
80
|
| Database | Source | Destination |
|
|
@@ -79,4 +87,9 @@ This command will:
|
|
|
79
87
|
| DuckDB | ✅ | ✅ |
|
|
80
88
|
| Microsoft SQL Server | ✅ | ✅ |
|
|
81
89
|
| SQLite | ✅ | ❌ |
|
|
82
|
-
| MySQL | ✅ | ❌ |
|
|
90
|
+
| MySQL | ✅ | ❌ |
|
|
91
|
+
|
|
92
|
+
More to come soon!
|
|
93
|
+
|
|
94
|
+
## Acknowledgements
|
|
95
|
+
This project would not have been possible without the amazing work done by the [SQLAlchemy](https://www.sqlalchemy.org/) and [dlt](https://dlthub.com/) teams. We relied on their work to connect to various sources and destinations, and built `ingestr` as a simple, opinionated wrapper around their work.
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
ingestr/main.py,sha256=fogv6KlxO2Y0fYEl36s0YVTF1S7TCmksQvL_siVI7rE,8703
|
|
2
|
+
ingestr/main_test.py,sha256=IQx2bjTnuixoY9ndgErBTaP3QE5AWvsLtv7ZPi8vJtA,19285
|
|
3
|
+
ingestr/src/destinations.py,sha256=LyA_26S3tHMeJiFwxX3XYV39lLOKqKACL0wWn3IGyP4,2673
|
|
4
|
+
ingestr/src/destinations_test.py,sha256=rgEk8EpAntFbSOwXovC4prv3RA22mwq8pIO6sZ_rYzg,4212
|
|
5
|
+
ingestr/src/factory.py,sha256=iBmp2spbUkkvOfwdRf6uo_5j9fasTTSWdS79Kc4jRQw,2141
|
|
6
|
+
ingestr/src/sources.py,sha256=WdbkY0S54H3rNy8kgOH3VBfE5oB0TSsOCCY5GBDU8Ss,1130
|
|
7
|
+
ingestr/src/sources_test.py,sha256=l_Obs7z3_WulTVa2ZeCDYOTT97Mj1nd1AgA3P5OivV0,1929
|
|
8
|
+
ingestr/src/sql_database/__init__.py,sha256=_hOl_RpOzzd2Sf6r0ETzcjr8gD9fjXRfrgr8QM4pe6w,2606
|
|
9
|
+
ingestr/src/sql_database/helpers.py,sha256=1yw-E9uTr4_6VnFxYpFBZkA76nC3UmXYwFxoxMRnhC0,4441
|
|
10
|
+
ingestr/src/sql_database/schema_types.py,sha256=PCIdLrT5Xc4vmoaf6OJSeXLlyN05alwgcQ-TDXd8hbQ,2153
|
|
11
|
+
ingestr/src/sql_database/settings.py,sha256=PaLPayAb1QGHHcPlrZ7eJ1fonDA6-sOGh-ZiueIFhRg,76
|
|
12
|
+
ingestr/src/telemetry/event.py,sha256=ByPdlu5YYRXKM6hnZa48uAKvvz5jOq4bP-OAn3Nc7bQ,426
|
|
13
|
+
ingestr/src/testdata/fakebqcredentials.json,sha256=scc6TUc963KAbKTLZCfcmqVzbtzDCW1_8JNRnyAXyy8,628
|
|
14
|
+
ingestr/testdata/test_append.db,sha256=OI0K5lwvwJpbTKdv3MkIq1RKiGA4SHclCXThPW0q4Xo,1060864
|
|
15
|
+
ingestr/testdata/test_create_replace.db,sha256=-ByzuQxPW5wa7hiVUHxvckGgEn1NpGZgN2zogsg80-U,536576
|
|
16
|
+
ingestr/testdata/test_delete_insert_with_timerange.db,sha256=ClL0WO4f3lq7kaEDHs8LTKKl5pdJUB9HncvamRZqsjY,1585152
|
|
17
|
+
ingestr/testdata/test_delete_insert_without_primary_key.db,sha256=feNVnNzfJY-DXQF84eBmW62YNKfB8WxXXt66Or9HqZ4,1847296
|
|
18
|
+
ingestr/testdata/test_merge_with_primary_key.db,sha256=t5fm_kKLtl4G2l8PLdXqOs3eAwEv8keS-sD7fwlRQDQ,1847296
|
|
19
|
+
ingestr-0.0.4.dist-info/METADATA,sha256=lgJ-p08vbVwbgCpWPhpWFFJoaTPeKSKrOVqIvlzHjso,3563
|
|
20
|
+
ingestr-0.0.4.dist-info/WHEEL,sha256=TJPnKdtrSue7xZ_AVGkp9YXcvDrobsjBds1du3Nx6dc,87
|
|
21
|
+
ingestr-0.0.4.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
22
|
+
ingestr-0.0.4.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
23
|
+
ingestr-0.0.4.dist-info/RECORD,,
|
ingestr-0.0.3.dist-info/RECORD
DELETED
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
ingestr/main.py,sha256=KheVlifq6v4I5KCd_l1fi2mpk8kfYE4delBXHH_SVxY,7537
|
|
2
|
-
ingestr/src/destinations.py,sha256=LyA_26S3tHMeJiFwxX3XYV39lLOKqKACL0wWn3IGyP4,2673
|
|
3
|
-
ingestr/src/destinations_test.py,sha256=rgEk8EpAntFbSOwXovC4prv3RA22mwq8pIO6sZ_rYzg,4212
|
|
4
|
-
ingestr/src/factory.py,sha256=YMIZSWY9ojrrl-M5_VC5eDR7TdBRF9Wem9pDCOw5DbU,2063
|
|
5
|
-
ingestr/src/sources.py,sha256=8UeHfi1XY0C8us0fHQObkjLXDkWA_fAWhfBltUW2hTQ,956
|
|
6
|
-
ingestr/src/sources_test.py,sha256=gcoEFpakLeYSl9cliwoSAakzr1R9pVml5V4H9ibMCZA,1825
|
|
7
|
-
ingestr/src/sql_database/__init__.py,sha256=mLiaDGaz3h5LRCnmjPTUNhp1fIjgU5rCcBOQ2r3Iu1Q,4643
|
|
8
|
-
ingestr/src/sql_database/helpers.py,sha256=r3b5MDpMjpV4U94gCKSOUH9t7YdP0OCAOqgpjbe7iSs,4442
|
|
9
|
-
ingestr/src/sql_database/schema_types.py,sha256=PCIdLrT5Xc4vmoaf6OJSeXLlyN05alwgcQ-TDXd8hbQ,2153
|
|
10
|
-
ingestr/src/sql_database/settings.py,sha256=PaLPayAb1QGHHcPlrZ7eJ1fonDA6-sOGh-ZiueIFhRg,76
|
|
11
|
-
ingestr/src/testdata/fakebqcredentials.json,sha256=scc6TUc963KAbKTLZCfcmqVzbtzDCW1_8JNRnyAXyy8,628
|
|
12
|
-
ingestr-0.0.3.dist-info/METADATA,sha256=EeURFoj26cB2ivj3hfdAu07AL4ZWKwLE70zDosxQjaQ,2972
|
|
13
|
-
ingestr-0.0.3.dist-info/WHEEL,sha256=TJPnKdtrSue7xZ_AVGkp9YXcvDrobsjBds1du3Nx6dc,87
|
|
14
|
-
ingestr-0.0.3.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
15
|
-
ingestr-0.0.3.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
16
|
-
ingestr-0.0.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|