ingestr 0.7.0__tar.gz → 0.7.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

Files changed (88) hide show
  1. ingestr-0.7.0/.github/workflows/docker.yml → ingestr-0.7.3/.github/workflows/tests.yml +18 -14
  2. ingestr-0.7.3/Dockerfile +45 -0
  3. {ingestr-0.7.0 → ingestr-0.7.3}/PKG-INFO +2 -1
  4. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/main.py +16 -3
  5. ingestr-0.7.3/ingestr/src/version.py +1 -0
  6. {ingestr-0.7.0 → ingestr-0.7.3}/requirements.txt +4 -3
  7. ingestr-0.7.0/.github/workflows/tests.yml +0 -36
  8. ingestr-0.7.0/Dockerfile +0 -19
  9. ingestr-0.7.0/ingestr/src/version.py +0 -1
  10. {ingestr-0.7.0 → ingestr-0.7.3}/.dockerignore +0 -0
  11. {ingestr-0.7.0 → ingestr-0.7.3}/.github/workflows/deploy-docs.yml +0 -0
  12. {ingestr-0.7.0 → ingestr-0.7.3}/.gitignore +0 -0
  13. {ingestr-0.7.0 → ingestr-0.7.3}/.python-version +0 -0
  14. {ingestr-0.7.0 → ingestr-0.7.3}/LICENSE.md +0 -0
  15. {ingestr-0.7.0 → ingestr-0.7.3}/Makefile +0 -0
  16. {ingestr-0.7.0 → ingestr-0.7.3}/README.md +0 -0
  17. {ingestr-0.7.0 → ingestr-0.7.3}/docs/.vitepress/config.mjs +0 -0
  18. {ingestr-0.7.0 → ingestr-0.7.3}/docs/.vitepress/theme/custom.css +0 -0
  19. {ingestr-0.7.0 → ingestr-0.7.3}/docs/.vitepress/theme/index.js +0 -0
  20. {ingestr-0.7.0 → ingestr-0.7.3}/docs/commands/example-uris.md +0 -0
  21. {ingestr-0.7.0 → ingestr-0.7.3}/docs/commands/ingest.md +0 -0
  22. {ingestr-0.7.0 → ingestr-0.7.3}/docs/getting-started/core-concepts.md +0 -0
  23. {ingestr-0.7.0 → ingestr-0.7.3}/docs/getting-started/incremental-loading.md +0 -0
  24. {ingestr-0.7.0 → ingestr-0.7.3}/docs/getting-started/quickstart.md +0 -0
  25. {ingestr-0.7.0 → ingestr-0.7.3}/docs/getting-started/telemetry.md +0 -0
  26. {ingestr-0.7.0 → ingestr-0.7.3}/docs/index.md +0 -0
  27. {ingestr-0.7.0 → ingestr-0.7.3}/docs/supported-sources/bigquery.md +0 -0
  28. {ingestr-0.7.0 → ingestr-0.7.3}/docs/supported-sources/csv.md +0 -0
  29. {ingestr-0.7.0 → ingestr-0.7.3}/docs/supported-sources/databricks.md +0 -0
  30. {ingestr-0.7.0 → ingestr-0.7.3}/docs/supported-sources/duckdb.md +0 -0
  31. {ingestr-0.7.0 → ingestr-0.7.3}/docs/supported-sources/gorgias.md +0 -0
  32. {ingestr-0.7.0 → ingestr-0.7.3}/docs/supported-sources/gsheets.md +0 -0
  33. {ingestr-0.7.0 → ingestr-0.7.3}/docs/supported-sources/mongodb.md +0 -0
  34. {ingestr-0.7.0 → ingestr-0.7.3}/docs/supported-sources/mssql.md +0 -0
  35. {ingestr-0.7.0 → ingestr-0.7.3}/docs/supported-sources/mysql.md +0 -0
  36. {ingestr-0.7.0 → ingestr-0.7.3}/docs/supported-sources/notion.md +0 -0
  37. {ingestr-0.7.0 → ingestr-0.7.3}/docs/supported-sources/oracle.md +0 -0
  38. {ingestr-0.7.0 → ingestr-0.7.3}/docs/supported-sources/overview.md +0 -0
  39. {ingestr-0.7.0 → ingestr-0.7.3}/docs/supported-sources/postgres.md +0 -0
  40. {ingestr-0.7.0 → ingestr-0.7.3}/docs/supported-sources/redshift.md +0 -0
  41. {ingestr-0.7.0 → ingestr-0.7.3}/docs/supported-sources/sap-hana.md +0 -0
  42. {ingestr-0.7.0 → ingestr-0.7.3}/docs/supported-sources/shopify.md +0 -0
  43. {ingestr-0.7.0 → ingestr-0.7.3}/docs/supported-sources/snowflake.md +0 -0
  44. {ingestr-0.7.0 → ingestr-0.7.3}/docs/supported-sources/sqlite.md +0 -0
  45. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/src/destinations.py +0 -0
  46. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/src/factory.py +0 -0
  47. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/src/google_sheets/README.md +0 -0
  48. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/src/google_sheets/__init__.py +0 -0
  49. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
  50. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
  51. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
  52. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/src/gorgias/__init__.py +0 -0
  53. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/src/gorgias/helpers.py +0 -0
  54. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/src/mongodb/__init__.py +0 -0
  55. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/src/mongodb/helpers.py +0 -0
  56. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/src/notion/__init__.py +0 -0
  57. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/src/notion/helpers/__init__.py +0 -0
  58. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/src/notion/helpers/client.py +0 -0
  59. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/src/notion/helpers/database.py +0 -0
  60. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/src/notion/settings.py +0 -0
  61. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/src/shopify/__init__.py +0 -0
  62. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/src/shopify/exceptions.py +0 -0
  63. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/src/shopify/helpers.py +0 -0
  64. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/src/shopify/settings.py +0 -0
  65. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/src/sources.py +0 -0
  66. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/src/sql_database/__init__.py +0 -0
  67. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/src/sql_database/arrow_helpers.py +0 -0
  68. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/src/sql_database/helpers.py +0 -0
  69. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/src/sql_database/override.py +0 -0
  70. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/src/sql_database/schema_types.py +0 -0
  71. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/src/table_definition.py +0 -0
  72. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/src/telemetry/event.py +0 -0
  73. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/src/testdata/fakebqcredentials.json +0 -0
  74. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/testdata/.gitignore +0 -0
  75. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/testdata/create_replace.csv +0 -0
  76. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/testdata/delete_insert_expected.csv +0 -0
  77. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/testdata/delete_insert_part1.csv +0 -0
  78. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/testdata/delete_insert_part2.csv +0 -0
  79. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/testdata/merge_expected.csv +0 -0
  80. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/testdata/merge_part1.csv +0 -0
  81. {ingestr-0.7.0 → ingestr-0.7.3}/ingestr/testdata/merge_part2.csv +0 -0
  82. {ingestr-0.7.0 → ingestr-0.7.3}/package-lock.json +0 -0
  83. {ingestr-0.7.0 → ingestr-0.7.3}/package.json +0 -0
  84. {ingestr-0.7.0 → ingestr-0.7.3}/pyproject.toml +0 -0
  85. {ingestr-0.7.0 → ingestr-0.7.3}/requirements-dev.txt +0 -0
  86. {ingestr-0.7.0 → ingestr-0.7.3}/resources/demo.gif +0 -0
  87. {ingestr-0.7.0 → ingestr-0.7.3}/resources/demo.tape +0 -0
  88. {ingestr-0.7.0 → ingestr-0.7.3}/resources/ingestr.svg +0 -0
@@ -5,39 +5,43 @@ on:
5
5
  tags:
6
6
  - "*"
7
7
  workflow_dispatch: {}
8
+ pull_request:
9
+ branches:
10
+ - main
8
11
 
9
12
  concurrency:
10
- group: docker-${{ github.ref }}
11
- cancel-in-progress: false
13
+ group: ${{ github.workflow }}-${{ github.ref }}
14
+ cancel-in-progress: true
12
15
 
13
16
  env:
14
17
  REGISTRY: ghcr.io
15
18
  IMAGE_NAME: ${{ github.repository }}
16
19
 
20
+
17
21
  jobs:
18
22
  tests:
19
23
  runs-on: ubuntu-latest
20
- strategy:
21
- matrix:
22
- python-version: ["3.9", "3.10", "3.11"]
23
24
  steps:
24
25
  - uses: actions/checkout@v4
25
- - name: Set up Python ${{ matrix.python-version }}
26
- uses: actions/setup-python@v4
26
+ - name: install Microsoft ODBC
27
+ run: sudo ACCEPT_EULA=Y apt-get install msodbcsql18 -y
28
+ - uses: actions/setup-python@v4
27
29
  with:
28
- python-version: ${{ matrix.python-version }}
30
+ python-version: '3.11'
31
+ cache: 'pip'
29
32
  - name: Cache dependencies
30
33
  uses: actions/cache@v3
31
34
  id: cache
32
35
  with:
33
36
  path: ${{ env.pythonLocation }}
34
- key: ${{ env.pythonLocation }}-${{ matrix.python-version }}-${{ hashFiles('requirements.txt') }}
35
- - name: install uv
36
- run: curl -LsSf https://astral.sh/uv/install.sh | sh
37
+ key: ${{ env.pythonLocation }}-${{ hashFiles('requirements.txt') }}
38
+ - name: Install pip dependencies
39
+ if: steps.cache.outputs.cache-hit != 'true'
40
+ run: make deps-ci
37
41
  - name: run tests
38
- run: make test
42
+ run: make test-ci
39
43
  - name: check the formatting
40
- run: make lint
44
+ run: make lint-ci
41
45
 
42
46
  build-and-push-image:
43
47
  needs: tests
@@ -68,7 +72,7 @@ jobs:
68
72
  uses: docker/build-push-action@v5
69
73
  with:
70
74
  context: .
71
- platforms: linux/amd64,linux/arm64,linux/arm64/v8
75
+ platforms: linux/amd64
72
76
  push: true
73
77
  tags: ${{ steps.meta.outputs.tags }}
74
78
  labels: ${{ steps.meta.outputs.labels }}
@@ -0,0 +1,45 @@
1
+ FROM python:3.11-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY ./requirements.txt /app/requirements.txt
6
+
7
+ # Setup dependencies for pyodbc
8
+ RUN \
9
+ export ACCEPT_EULA='Y' && \
10
+ export MYSQL_CONNECTOR='mysql-connector-odbc-8.0.33-linux-glibc2.28-x86-64bit' && \
11
+ export MYSQL_CONNECTOR_CHECKSUM='41d03d5df0c631f8071cc697f7714620' && \
12
+ # Install build dependencies
13
+ apt-get update && \
14
+ apt-get install -y curl gcc libpq-dev build-essential unixodbc-dev g++ apt-transport-https && \
15
+ # Install pyodbc db drivers for MSSQL, PG and MySQL
16
+ curl -sSL https://packages.microsoft.com/keys/microsoft.asc | gpg --dearmor > /usr/share/keyrings/microsoft-prod.gpg && \
17
+ curl -sSL https://packages.microsoft.com/config/debian/12/prod.list | tee /etc/apt/sources.list.d/mssql-release.list && \
18
+ # install the mysql connector
19
+ curl -L -o ${MYSQL_CONNECTOR}.tar.gz https://dev.mysql.com/get/Downloads/Connector-ODBC/8.0/${MYSQL_CONNECTOR}.tar.gz && \
20
+ echo "${MYSQL_CONNECTOR_CHECKSUM} ${MYSQL_CONNECTOR}.tar.gz" | md5sum -c - && \
21
+ gunzip ${MYSQL_CONNECTOR}.tar.gz && tar xvf ${MYSQL_CONNECTOR}.tar && \
22
+ cp -r ${MYSQL_CONNECTOR}/bin/* /usr/local/bin && cp -r ${MYSQL_CONNECTOR}/lib/* /usr/local/lib && \
23
+ myodbc-installer -a -d -n "MySQL ODBC 8.0.33 Driver" -t "Driver=/usr/local/lib/libmyodbc8w.so" && \
24
+ myodbc-installer -a -d -n "MySQL ODBC 8.0.33" -t "Driver=/usr/local/lib/libmyodbc8a.so" && \
25
+ # install the rest of them
26
+ apt-get update && \
27
+ ACCEPT_EULA=Y apt-get install -y msodbcsql17 msodbcsql18 odbc-postgresql && \
28
+ # Update odbcinst.ini to make sure full path to driver is listed, and set CommLog to 0. i.e disables any communication logs to be written to files
29
+ sed 's/Driver=psql/Driver=\/usr\/lib\/x86_64-linux-gnu\/odbc\/psql/;s/CommLog=1/CommLog=0/' /etc/odbcinst.ini > /tmp/temp.ini && \
30
+ mv -f /tmp/temp.ini /etc/odbcinst.ini && \
31
+ # Cleanup build dependencies
32
+ rm -rf ${MYSQL_CONNECTOR}*
33
+
34
+
35
+ ENV VIRTUAL_ENV=/usr/local
36
+ ADD --chmod=755 https://astral.sh/uv/install.sh /install.sh
37
+ RUN /install.sh && rm /install.sh
38
+
39
+ RUN /root/.cargo/bin/uv pip install --system --no-cache -r requirements.txt
40
+
41
+ COPY . /app
42
+
43
+ RUN pip3 install -e .
44
+
45
+ ENTRYPOINT ["ingestr"]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ingestr
3
- Version: 0.7.0
3
+ Version: 0.7.3
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -14,6 +14,7 @@ Classifier: Operating System :: OS Independent
14
14
  Classifier: Programming Language :: Python :: 3
15
15
  Classifier: Topic :: Database
16
16
  Requires-Python: >=3.9
17
+ Requires-Dist: connectorx==0.3.3
17
18
  Requires-Dist: cx-oracle==8.3.0
18
19
  Requires-Dist: databricks-sql-connector==2.9.3
19
20
  Requires-Dist: dlt==0.5.1
@@ -104,6 +104,7 @@ class LoaderFileFormat(str, Enum):
104
104
  class SqlBackend(str, Enum):
105
105
  sqlalchemy = "sqlalchemy"
106
106
  pyarrow = "pyarrow"
107
+ connectorx = "connectorx"
107
108
 
108
109
 
109
110
  class Progress(str, Enum):
@@ -111,6 +112,11 @@ class Progress(str, Enum):
111
112
  log = "log"
112
113
 
113
114
 
115
+ class SchemaNaming(str, Enum):
116
+ default = "default"
117
+ direct = "direct"
118
+
119
+
114
120
  @app.command()
115
121
  def ingest(
116
122
  source_uri: Annotated[
@@ -223,6 +229,13 @@ def ingest(
223
229
  envvar="LOADER_FILE_SIZE",
224
230
  ),
225
231
  ] = 100000, # type: ignore
232
+ schema_naming: Annotated[
233
+ SchemaNaming,
234
+ typer.Option(
235
+ help="The naming convention to use when moving the tables from source to destination. The default behavior is explained here: https://dlthub.com/docs/general-usage/schema#naming-convention",
236
+ envvar="SCHEMA_NAMING",
237
+ ),
238
+ ] = SchemaNaming.default, # type: ignore
226
239
  ):
227
240
  track(
228
241
  "command_triggered",
@@ -231,9 +244,9 @@ def ingest(
231
244
  },
232
245
  )
233
246
 
234
- dlt.config["normalize.parquet_normalizer.add_dlt_load_id"] = True
235
- dlt.config["normalize.parquet_normalizer.add_dlt_id"] = True
236
247
  dlt.config["data_writer.file_max_items"] = loader_file_size
248
+ if schema_naming != SchemaNaming.default:
249
+ dlt.config["schema.naming"] = schema_naming.value
237
250
 
238
251
  try:
239
252
  if not dest_table:
@@ -265,7 +278,7 @@ def ingest(
265
278
 
266
279
  progressInstance: Collector = SpinnerCollector()
267
280
  if progress == Progress.log:
268
- progressInstance = LogCollector()
281
+ progressInstance = LogCollector(dump_system_stats=False)
269
282
 
270
283
  pipeline = dlt.pipeline(
271
284
  pipeline_name=m.hexdigest(),
@@ -0,0 +1 @@
1
+ __version__ = "0.7.3"
@@ -1,10 +1,12 @@
1
+ connectorx==0.3.3
1
2
  cx_Oracle==8.3.0
2
3
  databricks-sql-connector==2.9.3
3
4
  dlt==0.5.1
4
5
  duckdb_engine==0.11.5
5
6
  duckdb==0.10.2
6
- google-cloud-bigquery-storage==2.24.0
7
7
  google-api-python-client==2.130.0
8
+ google-cloud-bigquery-storage==2.24.0
9
+ mysql-connector-python==9.0.0
8
10
  pendulum==3.0.0
9
11
  psycopg2-binary==2.9.9
10
12
  py-machineid==0.5.1
@@ -17,10 +19,9 @@ rich==13.7.1
17
19
  rudder-sdk-python==2.1.0
18
20
  snowflake-sqlalchemy==1.5.3
19
21
  sqlalchemy-bigquery==1.11.0
22
+ sqlalchemy-hana==2.0.0
20
23
  sqlalchemy-redshift==0.8.14
21
24
  SQLAlchemy==1.4.52
22
25
  sqlalchemy2-stubs==0.0.2a38
23
26
  tqdm==4.66.2
24
27
  typer==0.12.3
25
- sqlalchemy-hana==2.0.0
26
- mysql-connector-python==9.0.0
@@ -1,36 +0,0 @@
1
- on:
2
- push:
3
- branches:
4
- - main
5
- pull_request:
6
- branches:
7
- - main
8
-
9
- concurrency:
10
- group: ${{ github.workflow }}-${{ github.ref }}
11
- cancel-in-progress: true
12
-
13
- jobs:
14
- tests:
15
- runs-on: ubuntu-latest
16
- steps:
17
- - uses: actions/checkout@v4
18
- - name: install Microsoft ODBC
19
- run: sudo ACCEPT_EULA=Y apt-get install msodbcsql18 -y
20
- - uses: actions/setup-python@v4
21
- with:
22
- python-version: '3.11'
23
- cache: 'pip'
24
- - name: Cache dependencies
25
- uses: actions/cache@v3
26
- id: cache
27
- with:
28
- path: ${{ env.pythonLocation }}
29
- key: ${{ env.pythonLocation }}-${{ hashFiles('requirements.txt') }}
30
- - name: Install pip dependencies
31
- if: steps.cache.outputs.cache-hit != 'true'
32
- run: make deps-ci
33
- - name: run tests
34
- run: make test-ci
35
- - name: check the formatting
36
- run: make lint-ci
ingestr-0.7.0/Dockerfile DELETED
@@ -1,19 +0,0 @@
1
- FROM python:3.11-slim
2
-
3
- WORKDIR /app
4
-
5
- COPY ./requirements.txt /app/requirements.txt
6
-
7
- RUN apt-get update && apt-get -y install libpq-dev gcc curl g++
8
-
9
- ENV VIRTUAL_ENV=/usr/local
10
- ADD --chmod=755 https://astral.sh/uv/install.sh /install.sh
11
- RUN /install.sh && rm /install.sh
12
-
13
- RUN /root/.cargo/bin/uv pip install --system --no-cache -r requirements.txt
14
-
15
- COPY . /app
16
-
17
- RUN pip3 install -e .
18
-
19
- ENTRYPOINT ["ingestr"]
@@ -1 +0,0 @@
1
- __version__ = "0.7.0"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes