ingestr 0.0.1__tar.gz → 0.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

@@ -1,15 +1,15 @@
1
1
  .ONESHELL:
2
- .PHONY: test lint format ftl test-ci lint-ci build
2
+ .PHONY: test lint format ftl test-ci lint-ci build upload-release
3
3
 
4
4
  venv: venv/touchfile
5
5
 
6
- venv/touchfile: requirements.txt
6
+ venv/touchfile: requirements-dev.txt
7
7
  test -d venv || python3 -m venv venv
8
- . venv/bin/activate; pip install -r requirements.txt
8
+ . venv/bin/activate; pip install -r requirements-dev.txt
9
9
  touch venv/touchfile
10
10
 
11
- install-deps:
12
- pip install -r requirements.txt
11
+ deps:
12
+ pip install -r requirements-dev.txt
13
13
 
14
14
  test-ci:
15
15
  pytest -rP -vv --tb=short --cov=ingestr --no-cov-on-fail
@@ -32,3 +32,5 @@ tl: test lint
32
32
  build:
33
33
  python3 -m build
34
34
 
35
+ upload-release:
36
+ twine upload dist/*
@@ -1,15 +1,32 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ingestr
3
- Version: 0.0.1
3
+ Version: 0.0.3
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
7
7
  Author-email: Burak Karakan <burak.karakan@getbruin.com>
8
8
  License-File: LICENSE.md
9
+ Classifier: Development Status :: 4 - Beta
10
+ Classifier: Environment :: Console
11
+ Classifier: Intended Audience :: Developers
9
12
  Classifier: License :: OSI Approved :: MIT License
10
13
  Classifier: Operating System :: OS Independent
11
14
  Classifier: Programming Language :: Python :: 3
15
+ Classifier: Topic :: Database
12
16
  Requires-Python: >=3.9
17
+ Requires-Dist: databricks-sql-connector==2.9.3
18
+ Requires-Dist: dlt==0.4.3
19
+ Requires-Dist: duckdb==0.9.2
20
+ Requires-Dist: pendulum==3.0.0
21
+ Requires-Dist: psycopg2==2.9.9
22
+ Requires-Dist: pyodbc==5.1.0
23
+ Requires-Dist: rich==13.7.0
24
+ Requires-Dist: snowflake-sqlalchemy==1.5.1
25
+ Requires-Dist: sqlalchemy-bigquery==1.9.0
26
+ Requires-Dist: sqlalchemy2-stubs==0.0.2a38
27
+ Requires-Dist: sqlalchemy==1.4.51
28
+ Requires-Dist: tqdm==4.66.2
29
+ Requires-Dist: typer==0.9.0
13
30
  Description-Content-Type: text/markdown
14
31
 
15
32
  <div align="center">
@@ -4,38 +4,37 @@ import dlt
4
4
  import typer
5
5
 
6
6
  from ingestr.src.factory import SourceDestinationFactory
7
- from rich import print
7
+ from rich.console import Console
8
8
  from dlt.common.pipeline import LoadInfo
9
9
  import humanize
10
+ from typing_extensions import Annotated
10
11
 
11
- app = typer.Typer(name="ingestr")
12
+ app = typer.Typer(
13
+ name="ingestr",
14
+ help="ingestr is the CLI tool to ingest data from one source to another",
15
+ rich_markup_mode="rich",
16
+ )
17
+
18
+
19
+ console = Console()
20
+ print = console.print
12
21
 
13
22
 
14
23
  @app.command()
15
24
  def ingest(
16
- source_uri: str = None, # type: ignore
17
- dest_uri: str = None, # type: ignore
18
- source_table: str = None, # type: ignore
19
- dest_table: str = None, # type: ignore
20
- incremental_key: str = None, # type: ignore
21
- incremental_strategy: str = "replace", # type: ignore
25
+ source_uri: Annotated[str, typer.Option(help="The URI of the [green]source[/green]")], # type: ignore
26
+ dest_uri: Annotated[str, typer.Option(help="The URI of the [cyan]destination[/cyan]")], # type: ignore
27
+ source_table: Annotated[str, typer.Option(help="The table name in the [green]source[/green] to fetch")], # type: ignore
28
+ dest_table: Annotated[str, typer.Option(help="The table in the [cyan]destination[/cyan] to save the data into")] = None, # type: ignore
29
+ incremental_key: Annotated[str, typer.Option(help="The incremental key from the table to be used for incremental strategies")] = None, # type: ignore
30
+ incremental_strategy: Annotated[str, typer.Option(help="The incremental strategy to use, must be one of 'replace', 'append' or 'merge'")] = "replace", # type: ignore
22
31
  ):
23
- if not source_uri:
24
- typer.echo("Please provide a source URI")
25
- raise typer.Abort()
26
-
27
- if not dest_uri:
28
- typer.echo("Please provide a destination URI")
29
- raise typer.Abort()
30
-
31
- if not source_table:
32
- print("[bold red]Please provide a source table [\red bold]")
33
- raise typer.Abort()
34
-
35
32
  if not dest_table:
36
- typer.echo("Please provide a destination table")
37
- raise typer.Abort()
38
-
33
+ print()
34
+ print(
35
+ "[yellow]Destination table is not given, defaulting to the source table.[/yellow]"
36
+ )
37
+ dest_table = source_table
39
38
 
40
39
  factory = SourceDestinationFactory(source_uri, dest_uri)
41
40
  source = factory.get_source()
@@ -43,9 +42,11 @@ def ingest(
43
42
 
44
43
  m = hashlib.sha256()
45
44
  m.update(dest_table.encode("utf-8"))
45
+ pipeline_name = m.hexdigest()
46
+ short_pipeline_name = pipeline_name[:8]
46
47
 
47
48
  pipeline = dlt.pipeline(
48
- pipeline_name=m.hexdigest(),
49
+ pipeline_name=pipeline_name,
49
50
  destination=destination.dlt_dest(
50
51
  uri=dest_uri,
51
52
  ),
@@ -54,7 +55,26 @@ def ingest(
54
55
  )
55
56
 
56
57
  print()
57
- print(f"[bold green]Initiated pipeline, starting...[/bold green]")
58
+ print(f"[bold green]Initiated the pipeline with the following:[/bold green]")
59
+ print(f"[bold yellow] Pipeline ID:[/bold yellow] {short_pipeline_name}")
60
+ print(
61
+ f"[bold yellow] Source:[/bold yellow] {factory.source_scheme} / {source_table}"
62
+ )
63
+ print(
64
+ f"[bold yellow] Destination:[/bold yellow] {factory.destination_scheme} / {dest_table}"
65
+ )
66
+ print(f"[bold yellow] Incremental Strategy:[/bold yellow] {incremental_strategy}")
67
+ print(
68
+ f"[bold yellow] Incremental Key:[/bold yellow] {incremental_key if incremental_key else 'None'}"
69
+ )
70
+ print()
71
+
72
+ continuePipeline = typer.confirm("Are you sure you would like to continue?")
73
+ if not continuePipeline:
74
+ raise typer.Abort()
75
+
76
+ print()
77
+ print(f"[bold green]Starting the ingestion...[/bold green]")
58
78
  print()
59
79
 
60
80
  incremental = []
@@ -76,34 +96,37 @@ def ingest(
76
96
  primary_key=incremental,
77
97
  )
78
98
 
79
- print()
80
- print(f"[bold green]Successfully finished loading data from '{factory.source_scheme}' to '{factory.destination_scheme}'. [/bold green]")
81
- # typer.echo(printLoadInfo(run_info))
99
+ elapsedHuman = ""
100
+ if run_info.started_at:
101
+ elapsed = run_info.finished_at - run_info.started_at
102
+ elapsedHuman = f"in {humanize.precisedelta(elapsed)}"
103
+
104
+ print(
105
+ f"[bold green]Successfully finished loading data from '{factory.source_scheme}' to '{factory.destination_scheme}' {elapsedHuman} [/bold green]"
106
+ )
82
107
 
108
+ # printLoadInfo(short_pipeline_name, run_info)
83
109
 
84
- def printLoadInfo(info: LoadInfo):
85
- msg = f"Pipeline {info.pipeline.pipeline_name} load step completed in "
110
+
111
+ def printLoadInfo(short_pipeline_name: str, info: LoadInfo):
86
112
  if info.started_at:
87
113
  elapsed = info.finished_at - info.started_at
88
- msg += humanize.precisedelta(elapsed)
89
- else:
90
- msg += "---"
91
- msg += (
92
- f"\n{len(info.loads_ids)} load package(s) were loaded to destination"
93
- f" {info.destination_name} and into dataset {info.dataset_name}\n"
94
- )
95
- if info.staging_name:
96
- msg += (
97
- f"The {info.staging_name} staging destination used"
98
- f" {info.staging_displayable_credentials} location to stage data\n"
114
+ print(
115
+ f" ├── Pipeline {short_pipeline_name} load step completed in [bold green]{humanize.precisedelta(elapsed)}[/bold green]"
99
116
  )
100
117
 
101
- msg += (
102
- f"The {info.destination_name} destination used"
103
- f" {info.destination_displayable_credentials} location to store data"
118
+ connector = "└──"
119
+ if info.staging_name:
120
+ connector = "├──"
121
+
122
+ print(
123
+ f" {connector} {len(info.loads_ids)} load package{'s were' if len(info.loads_ids) > 1 else ' was'} loaded to destination [bold cyan]{info.destination_name}[/bold cyan] and into dataset [bold cyan]{info.dataset_name}[/bold cyan]",
124
+ highlight=False,
104
125
  )
105
- msg += info._load_packages_asstr(info.load_packages, 0)
106
- return msg
126
+ if info.staging_name:
127
+ print(
128
+ f" └── The [bold cyan]{info.staging_name}[/bold cyan] staging destination used [bold cyan]{info.staging_displayable_credentials}[/bold cyan] location to stage data"
129
+ )
107
130
 
108
131
 
109
132
  @app.command()
@@ -65,22 +65,27 @@ exclude = ['venv']
65
65
 
66
66
 
67
67
  [build-system]
68
- requires = ["hatchling"]
68
+ requires = ["hatchling", "hatch-requirements-txt"]
69
69
  build-backend = "hatchling.build"
70
70
 
71
71
  [project]
72
72
  name = "ingestr"
73
- version = "0.0.1"
73
+ version = "0.0.3"
74
74
  authors = [
75
75
  { name="Burak Karakan", email="burak.karakan@getbruin.com" },
76
76
  ]
77
77
  description = "ingestr is a command-line application that ingests data from various sources and stores them in any database."
78
78
  readme = "README.md"
79
79
  requires-python = ">=3.9"
80
+ dynamic = ["dependencies"]
80
81
  classifiers = [
81
82
  "Programming Language :: Python :: 3",
82
83
  "License :: OSI Approved :: MIT License",
83
84
  "Operating System :: OS Independent",
85
+ "Development Status :: 4 - Beta",
86
+ "Environment :: Console",
87
+ "Intended Audience :: Developers",
88
+ "Topic :: Database"
84
89
  ]
85
90
 
86
91
  [project.urls]
@@ -92,3 +97,6 @@ packages = ["ingestr"]
92
97
 
93
98
  [project.scripts]
94
99
  ingestr = "ingestr.main:main"
100
+
101
+ [tool.hatch.metadata.hooks.requirements_txt]
102
+ files = ["requirements.txt"]
@@ -0,0 +1,7 @@
1
+ -r requirements.txt
2
+
3
+ isort==5.13.2
4
+ mypy==1.8.0
5
+ pytest-cov==4.1.0
6
+ pytest==8.0.0
7
+ ruff==0.2.1
@@ -1,16 +1,10 @@
1
1
  databricks-sql-connector==2.9.3
2
- deepdiff==6.7.1
3
2
  dlt==0.4.3
4
3
  duckdb==0.9.2
5
- isort==5.13.2
6
- mypy==1.8.0
7
4
  pendulum==3.0.0
8
5
  psycopg2==2.9.9
9
6
  pyodbc==5.1.0
10
- pytest-cov==4.1.0
11
- pytest==8.0.0
12
7
  rich==13.7.0
13
- ruff==0.2.1
14
8
  snowflake-sqlalchemy==1.5.1
15
9
  sqlalchemy-bigquery==1.9.0
16
10
  SQLAlchemy==1.4.51
Binary file
@@ -0,0 +1,32 @@
1
+ # Where should we write the GIF?
2
+ Output demo.gif
3
+
4
+ # Set up a 1200x600 terminal with 46px font.
5
+ Set FontSize 20
6
+ Set Width 1200
7
+ Set Height 900
8
+ Set LineHeight 1.8
9
+ Set TypingSpeed 0.01
10
+ Set Framerate 60
11
+
12
+ Type "ingestr ingest \"
13
+ Ctrl+Enter
14
+
15
+ Type " --source-uri $POSTGRES_URI \"
16
+ Ctrl+Enter
17
+
18
+ Type " --source-table 'testschema.table_name' \"
19
+ Ctrl+Enter
20
+
21
+ Type " --dest-uri $MSSQL_URI\"
22
+ Ctrl+Enter
23
+
24
+ Type " --dest-table 'testschema.table_name'"
25
+ Enter
26
+
27
+ Sleep 2s
28
+
29
+ Type "y"
30
+ Ctrl+Enter
31
+
32
+ Sleep 1s
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes