ingestr 0.0.1__tar.gz → 0.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- {ingestr-0.0.1 → ingestr-0.0.3}/Makefile +7 -5
- {ingestr-0.0.1 → ingestr-0.0.3}/PKG-INFO +18 -1
- {ingestr-0.0.1 → ingestr-0.0.3}/ingestr/main.py +69 -46
- {ingestr-0.0.1 → ingestr-0.0.3}/pyproject.toml +10 -2
- ingestr-0.0.3/requirements-dev.txt +7 -0
- {ingestr-0.0.1 → ingestr-0.0.3}/requirements.txt +0 -6
- ingestr-0.0.3/resources/demo.gif +0 -0
- ingestr-0.0.3/resources/demo.tape +32 -0
- {ingestr-0.0.1 → ingestr-0.0.3}/.gitignore +0 -0
- {ingestr-0.0.1 → ingestr-0.0.3}/LICENSE.md +0 -0
- {ingestr-0.0.1 → ingestr-0.0.3}/README.md +0 -0
- {ingestr-0.0.1 → ingestr-0.0.3}/ingestr/src/destinations.py +0 -0
- {ingestr-0.0.1 → ingestr-0.0.3}/ingestr/src/destinations_test.py +0 -0
- {ingestr-0.0.1 → ingestr-0.0.3}/ingestr/src/factory.py +0 -0
- {ingestr-0.0.1 → ingestr-0.0.3}/ingestr/src/sources.py +0 -0
- {ingestr-0.0.1 → ingestr-0.0.3}/ingestr/src/sources_test.py +0 -0
- {ingestr-0.0.1 → ingestr-0.0.3}/ingestr/src/sql_database/__init__.py +0 -0
- {ingestr-0.0.1 → ingestr-0.0.3}/ingestr/src/sql_database/helpers.py +0 -0
- {ingestr-0.0.1 → ingestr-0.0.3}/ingestr/src/sql_database/schema_types.py +0 -0
- {ingestr-0.0.1 → ingestr-0.0.3}/ingestr/src/sql_database/settings.py +0 -0
- {ingestr-0.0.1 → ingestr-0.0.3}/ingestr/src/testdata/fakebqcredentials.json +0 -0
- {ingestr-0.0.1 → ingestr-0.0.3}/resources/ingestr.svg +0 -0
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
.ONESHELL:
|
|
2
|
-
.PHONY: test lint format ftl test-ci lint-ci build
|
|
2
|
+
.PHONY: test lint format ftl test-ci lint-ci build upload-release
|
|
3
3
|
|
|
4
4
|
venv: venv/touchfile
|
|
5
5
|
|
|
6
|
-
venv/touchfile: requirements.txt
|
|
6
|
+
venv/touchfile: requirements-dev.txt
|
|
7
7
|
test -d venv || python3 -m venv venv
|
|
8
|
-
. venv/bin/activate; pip install -r requirements.txt
|
|
8
|
+
. venv/bin/activate; pip install -r requirements-dev.txt
|
|
9
9
|
touch venv/touchfile
|
|
10
10
|
|
|
11
|
-
|
|
12
|
-
pip install -r requirements.txt
|
|
11
|
+
deps:
|
|
12
|
+
pip install -r requirements-dev.txt
|
|
13
13
|
|
|
14
14
|
test-ci:
|
|
15
15
|
pytest -rP -vv --tb=short --cov=ingestr --no-cov-on-fail
|
|
@@ -32,3 +32,5 @@ tl: test lint
|
|
|
32
32
|
build:
|
|
33
33
|
python3 -m build
|
|
34
34
|
|
|
35
|
+
upload-release:
|
|
36
|
+
twine upload dist/*
|
|
@@ -1,15 +1,32 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.3
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
7
7
|
Author-email: Burak Karakan <burak.karakan@getbruin.com>
|
|
8
8
|
License-File: LICENSE.md
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Environment :: Console
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
9
12
|
Classifier: License :: OSI Approved :: MIT License
|
|
10
13
|
Classifier: Operating System :: OS Independent
|
|
11
14
|
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Topic :: Database
|
|
12
16
|
Requires-Python: >=3.9
|
|
17
|
+
Requires-Dist: databricks-sql-connector==2.9.3
|
|
18
|
+
Requires-Dist: dlt==0.4.3
|
|
19
|
+
Requires-Dist: duckdb==0.9.2
|
|
20
|
+
Requires-Dist: pendulum==3.0.0
|
|
21
|
+
Requires-Dist: psycopg2==2.9.9
|
|
22
|
+
Requires-Dist: pyodbc==5.1.0
|
|
23
|
+
Requires-Dist: rich==13.7.0
|
|
24
|
+
Requires-Dist: snowflake-sqlalchemy==1.5.1
|
|
25
|
+
Requires-Dist: sqlalchemy-bigquery==1.9.0
|
|
26
|
+
Requires-Dist: sqlalchemy2-stubs==0.0.2a38
|
|
27
|
+
Requires-Dist: sqlalchemy==1.4.51
|
|
28
|
+
Requires-Dist: tqdm==4.66.2
|
|
29
|
+
Requires-Dist: typer==0.9.0
|
|
13
30
|
Description-Content-Type: text/markdown
|
|
14
31
|
|
|
15
32
|
<div align="center">
|
|
@@ -4,38 +4,37 @@ import dlt
|
|
|
4
4
|
import typer
|
|
5
5
|
|
|
6
6
|
from ingestr.src.factory import SourceDestinationFactory
|
|
7
|
-
from rich import
|
|
7
|
+
from rich.console import Console
|
|
8
8
|
from dlt.common.pipeline import LoadInfo
|
|
9
9
|
import humanize
|
|
10
|
+
from typing_extensions import Annotated
|
|
10
11
|
|
|
11
|
-
app = typer.Typer(
|
|
12
|
+
app = typer.Typer(
|
|
13
|
+
name="ingestr",
|
|
14
|
+
help="ingestr is the CLI tool to ingest data from one source to another",
|
|
15
|
+
rich_markup_mode="rich",
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
console = Console()
|
|
20
|
+
print = console.print
|
|
12
21
|
|
|
13
22
|
|
|
14
23
|
@app.command()
|
|
15
24
|
def ingest(
|
|
16
|
-
source_uri: str =
|
|
17
|
-
dest_uri: str =
|
|
18
|
-
source_table: str =
|
|
19
|
-
dest_table: str = None, # type: ignore
|
|
20
|
-
incremental_key: str = None, # type: ignore
|
|
21
|
-
incremental_strategy: str = "replace", # type: ignore
|
|
25
|
+
source_uri: Annotated[str, typer.Option(help="The URI of the [green]source[/green]")], # type: ignore
|
|
26
|
+
dest_uri: Annotated[str, typer.Option(help="The URI of the [cyan]destination[/cyan]")], # type: ignore
|
|
27
|
+
source_table: Annotated[str, typer.Option(help="The table name in the [green]source[/green] to fetch")], # type: ignore
|
|
28
|
+
dest_table: Annotated[str, typer.Option(help="The table in the [cyan]destination[/cyan] to save the data into")] = None, # type: ignore
|
|
29
|
+
incremental_key: Annotated[str, typer.Option(help="The incremental key from the table to be used for incremental strategies")] = None, # type: ignore
|
|
30
|
+
incremental_strategy: Annotated[str, typer.Option(help="The incremental strategy to use, must be one of 'replace', 'append' or 'merge'")] = "replace", # type: ignore
|
|
22
31
|
):
|
|
23
|
-
if not source_uri:
|
|
24
|
-
typer.echo("Please provide a source URI")
|
|
25
|
-
raise typer.Abort()
|
|
26
|
-
|
|
27
|
-
if not dest_uri:
|
|
28
|
-
typer.echo("Please provide a destination URI")
|
|
29
|
-
raise typer.Abort()
|
|
30
|
-
|
|
31
|
-
if not source_table:
|
|
32
|
-
print("[bold red]Please provide a source table [\red bold]")
|
|
33
|
-
raise typer.Abort()
|
|
34
|
-
|
|
35
32
|
if not dest_table:
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
33
|
+
print()
|
|
34
|
+
print(
|
|
35
|
+
"[yellow]Destination table is not given, defaulting to the source table.[/yellow]"
|
|
36
|
+
)
|
|
37
|
+
dest_table = source_table
|
|
39
38
|
|
|
40
39
|
factory = SourceDestinationFactory(source_uri, dest_uri)
|
|
41
40
|
source = factory.get_source()
|
|
@@ -43,9 +42,11 @@ def ingest(
|
|
|
43
42
|
|
|
44
43
|
m = hashlib.sha256()
|
|
45
44
|
m.update(dest_table.encode("utf-8"))
|
|
45
|
+
pipeline_name = m.hexdigest()
|
|
46
|
+
short_pipeline_name = pipeline_name[:8]
|
|
46
47
|
|
|
47
48
|
pipeline = dlt.pipeline(
|
|
48
|
-
pipeline_name=
|
|
49
|
+
pipeline_name=pipeline_name,
|
|
49
50
|
destination=destination.dlt_dest(
|
|
50
51
|
uri=dest_uri,
|
|
51
52
|
),
|
|
@@ -54,7 +55,26 @@ def ingest(
|
|
|
54
55
|
)
|
|
55
56
|
|
|
56
57
|
print()
|
|
57
|
-
print(f"[bold green]Initiated pipeline
|
|
58
|
+
print(f"[bold green]Initiated the pipeline with the following:[/bold green]")
|
|
59
|
+
print(f"[bold yellow] Pipeline ID:[/bold yellow] {short_pipeline_name}")
|
|
60
|
+
print(
|
|
61
|
+
f"[bold yellow] Source:[/bold yellow] {factory.source_scheme} / {source_table}"
|
|
62
|
+
)
|
|
63
|
+
print(
|
|
64
|
+
f"[bold yellow] Destination:[/bold yellow] {factory.destination_scheme} / {dest_table}"
|
|
65
|
+
)
|
|
66
|
+
print(f"[bold yellow] Incremental Strategy:[/bold yellow] {incremental_strategy}")
|
|
67
|
+
print(
|
|
68
|
+
f"[bold yellow] Incremental Key:[/bold yellow] {incremental_key if incremental_key else 'None'}"
|
|
69
|
+
)
|
|
70
|
+
print()
|
|
71
|
+
|
|
72
|
+
continuePipeline = typer.confirm("Are you sure you would like to continue?")
|
|
73
|
+
if not continuePipeline:
|
|
74
|
+
raise typer.Abort()
|
|
75
|
+
|
|
76
|
+
print()
|
|
77
|
+
print(f"[bold green]Starting the ingestion...[/bold green]")
|
|
58
78
|
print()
|
|
59
79
|
|
|
60
80
|
incremental = []
|
|
@@ -76,34 +96,37 @@ def ingest(
|
|
|
76
96
|
primary_key=incremental,
|
|
77
97
|
)
|
|
78
98
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
99
|
+
elapsedHuman = ""
|
|
100
|
+
if run_info.started_at:
|
|
101
|
+
elapsed = run_info.finished_at - run_info.started_at
|
|
102
|
+
elapsedHuman = f"in {humanize.precisedelta(elapsed)}"
|
|
103
|
+
|
|
104
|
+
print(
|
|
105
|
+
f"[bold green]Successfully finished loading data from '{factory.source_scheme}' to '{factory.destination_scheme}' {elapsedHuman} [/bold green]"
|
|
106
|
+
)
|
|
82
107
|
|
|
108
|
+
# printLoadInfo(short_pipeline_name, run_info)
|
|
83
109
|
|
|
84
|
-
|
|
85
|
-
|
|
110
|
+
|
|
111
|
+
def printLoadInfo(short_pipeline_name: str, info: LoadInfo):
|
|
86
112
|
if info.started_at:
|
|
87
113
|
elapsed = info.finished_at - info.started_at
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
msg += "---"
|
|
91
|
-
msg += (
|
|
92
|
-
f"\n{len(info.loads_ids)} load package(s) were loaded to destination"
|
|
93
|
-
f" {info.destination_name} and into dataset {info.dataset_name}\n"
|
|
94
|
-
)
|
|
95
|
-
if info.staging_name:
|
|
96
|
-
msg += (
|
|
97
|
-
f"The {info.staging_name} staging destination used"
|
|
98
|
-
f" {info.staging_displayable_credentials} location to stage data\n"
|
|
114
|
+
print(
|
|
115
|
+
f" ├── Pipeline {short_pipeline_name} load step completed in [bold green]{humanize.precisedelta(elapsed)}[/bold green]"
|
|
99
116
|
)
|
|
100
117
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
118
|
+
connector = "└──"
|
|
119
|
+
if info.staging_name:
|
|
120
|
+
connector = "├──"
|
|
121
|
+
|
|
122
|
+
print(
|
|
123
|
+
f" {connector} {len(info.loads_ids)} load package{'s were' if len(info.loads_ids) > 1 else ' was'} loaded to destination [bold cyan]{info.destination_name}[/bold cyan] and into dataset [bold cyan]{info.dataset_name}[/bold cyan]",
|
|
124
|
+
highlight=False,
|
|
104
125
|
)
|
|
105
|
-
|
|
106
|
-
|
|
126
|
+
if info.staging_name:
|
|
127
|
+
print(
|
|
128
|
+
f" └── The [bold cyan]{info.staging_name}[/bold cyan] staging destination used [bold cyan]{info.staging_displayable_credentials}[/bold cyan] location to stage data"
|
|
129
|
+
)
|
|
107
130
|
|
|
108
131
|
|
|
109
132
|
@app.command()
|
|
@@ -65,22 +65,27 @@ exclude = ['venv']
|
|
|
65
65
|
|
|
66
66
|
|
|
67
67
|
[build-system]
|
|
68
|
-
requires = ["hatchling"]
|
|
68
|
+
requires = ["hatchling", "hatch-requirements-txt"]
|
|
69
69
|
build-backend = "hatchling.build"
|
|
70
70
|
|
|
71
71
|
[project]
|
|
72
72
|
name = "ingestr"
|
|
73
|
-
version = "0.0.
|
|
73
|
+
version = "0.0.3"
|
|
74
74
|
authors = [
|
|
75
75
|
{ name="Burak Karakan", email="burak.karakan@getbruin.com" },
|
|
76
76
|
]
|
|
77
77
|
description = "ingestr is a command-line application that ingests data from various sources and stores them in any database."
|
|
78
78
|
readme = "README.md"
|
|
79
79
|
requires-python = ">=3.9"
|
|
80
|
+
dynamic = ["dependencies"]
|
|
80
81
|
classifiers = [
|
|
81
82
|
"Programming Language :: Python :: 3",
|
|
82
83
|
"License :: OSI Approved :: MIT License",
|
|
83
84
|
"Operating System :: OS Independent",
|
|
85
|
+
"Development Status :: 4 - Beta",
|
|
86
|
+
"Environment :: Console",
|
|
87
|
+
"Intended Audience :: Developers",
|
|
88
|
+
"Topic :: Database"
|
|
84
89
|
]
|
|
85
90
|
|
|
86
91
|
[project.urls]
|
|
@@ -92,3 +97,6 @@ packages = ["ingestr"]
|
|
|
92
97
|
|
|
93
98
|
[project.scripts]
|
|
94
99
|
ingestr = "ingestr.main:main"
|
|
100
|
+
|
|
101
|
+
[tool.hatch.metadata.hooks.requirements_txt]
|
|
102
|
+
files = ["requirements.txt"]
|
|
@@ -1,16 +1,10 @@
|
|
|
1
1
|
databricks-sql-connector==2.9.3
|
|
2
|
-
deepdiff==6.7.1
|
|
3
2
|
dlt==0.4.3
|
|
4
3
|
duckdb==0.9.2
|
|
5
|
-
isort==5.13.2
|
|
6
|
-
mypy==1.8.0
|
|
7
4
|
pendulum==3.0.0
|
|
8
5
|
psycopg2==2.9.9
|
|
9
6
|
pyodbc==5.1.0
|
|
10
|
-
pytest-cov==4.1.0
|
|
11
|
-
pytest==8.0.0
|
|
12
7
|
rich==13.7.0
|
|
13
|
-
ruff==0.2.1
|
|
14
8
|
snowflake-sqlalchemy==1.5.1
|
|
15
9
|
sqlalchemy-bigquery==1.9.0
|
|
16
10
|
SQLAlchemy==1.4.51
|
|
Binary file
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
# Where should we write the GIF?
|
|
2
|
+
Output demo.gif
|
|
3
|
+
|
|
4
|
+
# Set up a 1200x600 terminal with 46px font.
|
|
5
|
+
Set FontSize 20
|
|
6
|
+
Set Width 1200
|
|
7
|
+
Set Height 900
|
|
8
|
+
Set LineHeight 1.8
|
|
9
|
+
Set TypingSpeed 0.01
|
|
10
|
+
Set Framerate 60
|
|
11
|
+
|
|
12
|
+
Type "ingestr ingest \"
|
|
13
|
+
Ctrl+Enter
|
|
14
|
+
|
|
15
|
+
Type " --source-uri $POSTGRES_URI \"
|
|
16
|
+
Ctrl+Enter
|
|
17
|
+
|
|
18
|
+
Type " --source-table 'testschema.table_name' \"
|
|
19
|
+
Ctrl+Enter
|
|
20
|
+
|
|
21
|
+
Type " --dest-uri $MSSQL_URI\"
|
|
22
|
+
Ctrl+Enter
|
|
23
|
+
|
|
24
|
+
Type " --dest-table 'testschema.table_name'"
|
|
25
|
+
Enter
|
|
26
|
+
|
|
27
|
+
Sleep 2s
|
|
28
|
+
|
|
29
|
+
Type "y"
|
|
30
|
+
Ctrl+Enter
|
|
31
|
+
|
|
32
|
+
Sleep 1s
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|