ingestr 0.9.0__tar.gz → 0.9.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- {ingestr-0.9.0 → ingestr-0.9.2}/PKG-INFO +5 -4
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/supported-sources/mssql.md +8 -0
- ingestr-0.9.2/ingestr/src/arrow/__init__.py +77 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/factory.py +3 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/mongodb/__init__.py +1 -1
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/mongodb/helpers.py +1 -1
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/sources.py +52 -6
- ingestr-0.9.2/ingestr/src/version.py +1 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/pyproject.toml +8 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/requirements.txt +0 -2
- ingestr-0.9.0/ingestr/src/version.py +0 -1
- {ingestr-0.9.0 → ingestr-0.9.2}/.dockerignore +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/.github/workflows/deploy-docs.yml +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/.github/workflows/tests.yml +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/.gitignore +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/.python-version +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/Dockerfile +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/LICENSE.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/Makefile +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/README.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/.vitepress/config.mjs +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/.vitepress/theme/custom.css +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/.vitepress/theme/index.js +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/commands/example-uris.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/commands/ingest.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/getting-started/core-concepts.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/getting-started/incremental-loading.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/getting-started/quickstart.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/getting-started/telemetry.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/index.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/supported-sources/adjust.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/supported-sources/airtable.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/supported-sources/appsflyer.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/supported-sources/bigquery.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/supported-sources/chess.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/supported-sources/csv.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/supported-sources/databricks.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/supported-sources/duckdb.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/supported-sources/facebook-ads.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/supported-sources/gorgias.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/supported-sources/gsheets.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/supported-sources/hubspot.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/supported-sources/kafka.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/supported-sources/klaviyo.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/supported-sources/mongodb.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/supported-sources/mysql.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/supported-sources/notion.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/supported-sources/oracle.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/supported-sources/postgres.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/supported-sources/redshift.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/supported-sources/s3.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/supported-sources/sap-hana.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/supported-sources/shopify.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/supported-sources/slack.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/supported-sources/snowflake.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/supported-sources/sqlite.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/supported-sources/stripe.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/docs/supported-sources/zendesk.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/main.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/.gitignore +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/adjust/_init_.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/adjust/helpers.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/airtable/__init__.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/appsflyer/_init_.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/appsflyer/client.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/chess/__init__.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/chess/helpers.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/chess/settings.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/destinations.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/facebook_ads/__init__.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/facebook_ads/exceptions.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/facebook_ads/helpers.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/facebook_ads/settings.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/filesystem/__init__.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/filesystem/helpers.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/filesystem/readers.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/google_sheets/README.md +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/google_sheets/__init__.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/gorgias/__init__.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/gorgias/helpers.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/hubspot/__init__.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/hubspot/helpers.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/hubspot/settings.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/kafka/__init__.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/kafka/helpers.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/klaviyo/_init_.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/klaviyo/client.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/klaviyo/helpers.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/notion/__init__.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/notion/helpers/__init__.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/notion/helpers/client.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/notion/helpers/database.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/notion/settings.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/shopify/__init__.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/shopify/exceptions.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/shopify/helpers.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/shopify/settings.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/slack/__init__.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/slack/helpers.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/slack/settings.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/sql_database/__init__.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/sql_database/arrow_helpers.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/sql_database/helpers.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/sql_database/override.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/sql_database/schema_types.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/stripe_analytics/__init__.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/stripe_analytics/helpers.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/stripe_analytics/settings.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/table_definition.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/telemetry/event.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/testdata/fakebqcredentials.json +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/zendesk/__init__.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/zendesk/helpers/__init__.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/zendesk/helpers/api_helpers.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/zendesk/helpers/credentials.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/zendesk/helpers/talk_api.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/src/zendesk/settings.py +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/testdata/.gitignore +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/testdata/create_replace.csv +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/testdata/delete_insert_expected.csv +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/testdata/delete_insert_part1.csv +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/testdata/delete_insert_part2.csv +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/testdata/merge_expected.csv +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/testdata/merge_part1.csv +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/ingestr/testdata/merge_part2.csv +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/package-lock.json +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/package.json +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/requirements-dev.txt +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/resources/demo.gif +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/resources/demo.tape +0 -0
- {ingestr-0.9.0 → ingestr-0.9.2}/resources/ingestr.svg +0 -0
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.2
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
7
7
|
Author-email: Burak Karakan <burak.karakan@getbruin.com>
|
|
8
|
-
License-File: LICENSE.md
|
|
9
8
|
Classifier: Development Status :: 4 - Beta
|
|
10
9
|
Classifier: Environment :: Console
|
|
11
10
|
Classifier: Intended Audience :: Developers
|
|
@@ -15,7 +14,6 @@ Classifier: Programming Language :: Python :: 3
|
|
|
15
14
|
Classifier: Topic :: Database
|
|
16
15
|
Requires-Python: >=3.9
|
|
17
16
|
Requires-Dist: confluent-kafka>=2.3.0
|
|
18
|
-
Requires-Dist: cx-oracle==8.3.0
|
|
19
17
|
Requires-Dist: databricks-sql-connector==2.9.3
|
|
20
18
|
Requires-Dist: dlt==0.5.1
|
|
21
19
|
Requires-Dist: duckdb-engine==0.11.5
|
|
@@ -30,7 +28,6 @@ Requires-Dist: py-machineid==0.5.1
|
|
|
30
28
|
Requires-Dist: pyairtable==2.3.3
|
|
31
29
|
Requires-Dist: pymongo==4.6.3
|
|
32
30
|
Requires-Dist: pymysql==1.1.0
|
|
33
|
-
Requires-Dist: pyodbc==5.1.0
|
|
34
31
|
Requires-Dist: pyrate-limiter==3.6.1
|
|
35
32
|
Requires-Dist: redshift-connector==2.1.0
|
|
36
33
|
Requires-Dist: rich==13.7.1
|
|
@@ -46,6 +43,10 @@ Requires-Dist: stripe==10.7.0
|
|
|
46
43
|
Requires-Dist: tqdm==4.66.2
|
|
47
44
|
Requires-Dist: typer==0.12.3
|
|
48
45
|
Requires-Dist: types-requests==2.32.0.20240907
|
|
46
|
+
Provides-Extra: odbc
|
|
47
|
+
Requires-Dist: pyodbc==5.1.0; extra == 'odbc'
|
|
48
|
+
Provides-Extra: oracle
|
|
49
|
+
Requires-Dist: cx-oracle==8.3.0; extra == 'oracle'
|
|
49
50
|
Description-Content-Type: text/markdown
|
|
50
51
|
|
|
51
52
|
<div align="center">
|
|
@@ -3,6 +3,14 @@ Microsoft SQL Server is a relational database management system developed by Mic
|
|
|
3
3
|
|
|
4
4
|
ingestr supports Microsoft SQL Server as both a source and destination.
|
|
5
5
|
|
|
6
|
+
## Installation
|
|
7
|
+
|
|
8
|
+
To use Microsoft SQL Server with ingestr, you need to install the `pyodbc` add-on as well. You can do this by running:
|
|
9
|
+
|
|
10
|
+
```bash
|
|
11
|
+
pip install ingestr[odbc]
|
|
12
|
+
```
|
|
13
|
+
|
|
6
14
|
## URI Format
|
|
7
15
|
The URI format for Microsoft SQL Server is as follows:
|
|
8
16
|
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""Source that loads tables form Airtable.
|
|
2
|
+
Supports whitelisting of tables or loading of all tables from a specified base.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import Any, Optional
|
|
6
|
+
|
|
7
|
+
import dlt
|
|
8
|
+
from dlt.common.schema.typing import TColumnNames, TTableSchemaColumns
|
|
9
|
+
from dlt.extract.items import TTableHintTemplate
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def memory_mapped_arrow(
|
|
13
|
+
path: str,
|
|
14
|
+
columns: Optional[TTableSchemaColumns] = None,
|
|
15
|
+
primary_key: Optional[TTableHintTemplate[TColumnNames]] = None,
|
|
16
|
+
merge_key: Optional[TTableHintTemplate[TColumnNames]] = None,
|
|
17
|
+
incremental: Optional[dlt.sources.incremental[Any]] = None,
|
|
18
|
+
):
|
|
19
|
+
@dlt.resource(
|
|
20
|
+
name="arrow_mmap",
|
|
21
|
+
columns=columns, # type: ignore
|
|
22
|
+
primary_key=primary_key, # type: ignore
|
|
23
|
+
merge_key=merge_key, # type: ignore
|
|
24
|
+
)
|
|
25
|
+
def arrow_mmap(
|
|
26
|
+
incremental: Optional[dlt.sources.incremental[Any]] = incremental,
|
|
27
|
+
):
|
|
28
|
+
import pyarrow as pa # type: ignore
|
|
29
|
+
import pyarrow.ipc as ipc # type: ignore
|
|
30
|
+
|
|
31
|
+
with pa.memory_map(path, "rb") as mmap:
|
|
32
|
+
reader: ipc.RecordBatchFileReader = ipc.open_file(mmap)
|
|
33
|
+
table = reader.read_all()
|
|
34
|
+
|
|
35
|
+
last_value = None
|
|
36
|
+
end_value = None
|
|
37
|
+
if incremental:
|
|
38
|
+
if incremental.cursor_path not in table.column_names:
|
|
39
|
+
raise KeyError(
|
|
40
|
+
f"Cursor column '{incremental.cursor_path}' does not exist in table"
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
last_value = incremental.last_value
|
|
44
|
+
end_value = incremental.end_value
|
|
45
|
+
|
|
46
|
+
if last_value is not None:
|
|
47
|
+
# Check if the column is a date type
|
|
48
|
+
if pa.types.is_temporal(table.schema.field(incremental.cursor_path).type): # type: ignore
|
|
49
|
+
if not isinstance(last_value, pa.TimestampScalar):
|
|
50
|
+
last_value = pa.scalar(last_value, type=pa.timestamp("ns"))
|
|
51
|
+
|
|
52
|
+
table = table.filter(
|
|
53
|
+
pa.compute.field(incremental.cursor_path) > last_value # type: ignore
|
|
54
|
+
)
|
|
55
|
+
else:
|
|
56
|
+
# For non-date types, use direct comparison
|
|
57
|
+
table = table.filter(
|
|
58
|
+
pa.compute.field(incremental.cursor_path) > last_value # type: ignore
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
if end_value is not None:
|
|
62
|
+
if pa.types.is_timestamp(table.schema.field(incremental.cursor_path).type): # type: ignore
|
|
63
|
+
# Convert end_value to timestamp if it's not already
|
|
64
|
+
if not isinstance(end_value, pa.TimestampScalar):
|
|
65
|
+
end_value = pa.scalar(end_value, type=pa.timestamp("ns"))
|
|
66
|
+
table = table.filter(
|
|
67
|
+
pa.compute.field(incremental.cursor_path) < end_value # type: ignore
|
|
68
|
+
)
|
|
69
|
+
else:
|
|
70
|
+
# For non-date types, use direct comparison
|
|
71
|
+
table = table.filter(
|
|
72
|
+
pa.compute.field(incremental.cursor_path) < end_value # type: ignore
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
yield table
|
|
76
|
+
|
|
77
|
+
return arrow_mmap
|
|
@@ -18,6 +18,7 @@ from ingestr.src.sources import (
|
|
|
18
18
|
AdjustSource,
|
|
19
19
|
AirtableSource,
|
|
20
20
|
AppsflyerSource,
|
|
21
|
+
ArrowMemoryMappedSource,
|
|
21
22
|
ChessSource,
|
|
22
23
|
FacebookAdsSource,
|
|
23
24
|
GoogleSheetsSource,
|
|
@@ -136,6 +137,8 @@ class SourceDestinationFactory:
|
|
|
136
137
|
return AdjustSource()
|
|
137
138
|
elif self.source_scheme == "zendesk":
|
|
138
139
|
return ZendeskSource()
|
|
140
|
+
elif self.source_scheme == "mmap":
|
|
141
|
+
return ArrowMemoryMappedSource()
|
|
139
142
|
elif self.source_scheme == "s3":
|
|
140
143
|
return S3Source()
|
|
141
144
|
else:
|
|
@@ -65,7 +65,7 @@ def mongodb(
|
|
|
65
65
|
sections=("sources", "mongodb"), spec=MongoDbCollectionResourceConfiguration
|
|
66
66
|
)
|
|
67
67
|
def mongodb_collection(
|
|
68
|
-
connection_url: str = dlt.
|
|
68
|
+
connection_url: str = dlt.config.value,
|
|
69
69
|
database: Optional[str] = dlt.config.value,
|
|
70
70
|
collection: str = dlt.config.value,
|
|
71
71
|
incremental: Optional[dlt.sources.incremental] = None, # type: ignore[type-arg]
|
|
@@ -155,7 +155,7 @@ class MongoDbCollectionConfiguration(BaseConfiguration):
|
|
|
155
155
|
|
|
156
156
|
@configspec
|
|
157
157
|
class MongoDbCollectionResourceConfiguration(BaseConfiguration):
|
|
158
|
-
connection_url: str = dlt.
|
|
158
|
+
connection_url: str = dlt.config.value
|
|
159
159
|
database: Optional[str] = dlt.config.value
|
|
160
160
|
collection: str = dlt.config.value
|
|
161
161
|
incremental: Optional[dlt.sources.incremental] = None # type: ignore[type-arg]
|
|
@@ -12,6 +12,7 @@ from dlt.common.typing import TSecretStrValue
|
|
|
12
12
|
from ingestr.src.adjust._init_ import adjust_source
|
|
13
13
|
from ingestr.src.airtable import airtable_source
|
|
14
14
|
from ingestr.src.appsflyer._init_ import appsflyer_source
|
|
15
|
+
from ingestr.src.arrow import memory_mapped_arrow
|
|
15
16
|
from ingestr.src.chess import source
|
|
16
17
|
from ingestr.src.facebook_ads import facebook_ads_source, facebook_insights_source
|
|
17
18
|
from ingestr.src.filesystem import readers
|
|
@@ -75,6 +76,51 @@ class SqlSource:
|
|
|
75
76
|
return table_instance
|
|
76
77
|
|
|
77
78
|
|
|
79
|
+
class ArrowMemoryMappedSource:
|
|
80
|
+
table_builder: Callable
|
|
81
|
+
|
|
82
|
+
def __init__(self, table_builder=memory_mapped_arrow) -> None:
|
|
83
|
+
self.table_builder = table_builder
|
|
84
|
+
|
|
85
|
+
def handles_incrementality(self) -> bool:
|
|
86
|
+
return False
|
|
87
|
+
|
|
88
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
89
|
+
import os
|
|
90
|
+
|
|
91
|
+
incremental = None
|
|
92
|
+
if kwargs.get("incremental_key"):
|
|
93
|
+
start_value = kwargs.get("interval_start")
|
|
94
|
+
end_value = kwargs.get("interval_end")
|
|
95
|
+
|
|
96
|
+
incremental = dlt.sources.incremental(
|
|
97
|
+
kwargs.get("incremental_key", ""),
|
|
98
|
+
initial_value=start_value,
|
|
99
|
+
end_value=end_value,
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
file_path = uri.split("://")[1]
|
|
103
|
+
if not os.path.exists(file_path):
|
|
104
|
+
raise ValueError(f"File at path {file_path} does not exist")
|
|
105
|
+
|
|
106
|
+
if os.path.isdir(file_path):
|
|
107
|
+
raise ValueError(
|
|
108
|
+
f"Path {file_path} is a directory, it should be an Arrow memory mapped file"
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
primary_key = kwargs.get("primary_key")
|
|
112
|
+
merge_key = kwargs.get("merge_key")
|
|
113
|
+
|
|
114
|
+
table_instance = self.table_builder(
|
|
115
|
+
path=file_path,
|
|
116
|
+
incremental=incremental,
|
|
117
|
+
merge_key=merge_key,
|
|
118
|
+
primary_key=primary_key,
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
return table_instance
|
|
122
|
+
|
|
123
|
+
|
|
78
124
|
class MongoDbSource:
|
|
79
125
|
table_builder: Callable
|
|
80
126
|
|
|
@@ -656,12 +702,12 @@ class KafkaSource:
|
|
|
656
702
|
credentials=KafkaCredentials(
|
|
657
703
|
bootstrap_servers=bootstrap_servers[0],
|
|
658
704
|
group_id=group_id[0],
|
|
659
|
-
security_protocol=
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
sasl_mechanisms=
|
|
663
|
-
|
|
664
|
-
|
|
705
|
+
security_protocol=(
|
|
706
|
+
security_protocol[0] if len(security_protocol) > 0 else None
|
|
707
|
+
), # type: ignore
|
|
708
|
+
sasl_mechanisms=(
|
|
709
|
+
sasl_mechanisms[0] if len(sasl_mechanisms) > 0 else None
|
|
710
|
+
), # type: ignore
|
|
665
711
|
sasl_username=sasl_username[0] if len(sasl_username) > 0 else None, # type: ignore
|
|
666
712
|
sasl_password=sasl_password[0] if len(sasl_password) > 0 else None, # type: ignore
|
|
667
713
|
),
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.9.2"
|
|
@@ -128,6 +128,14 @@ classifiers = [
|
|
|
128
128
|
"Topic :: Database"
|
|
129
129
|
]
|
|
130
130
|
|
|
131
|
+
[project.optional-dependencies]
|
|
132
|
+
oracle = [
|
|
133
|
+
"cx_Oracle==8.3.0",
|
|
134
|
+
]
|
|
135
|
+
odbc = [
|
|
136
|
+
"pyodbc==5.1.0",
|
|
137
|
+
]
|
|
138
|
+
|
|
131
139
|
[project.urls]
|
|
132
140
|
Homepage = "https://github.com/bruin-data/ingestr"
|
|
133
141
|
Issues = "https://github.com/bruin-data/ingestr/issues"
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
cx_Oracle==8.3.0
|
|
2
1
|
confluent-kafka>=2.3.0
|
|
3
2
|
databricks-sql-connector==2.9.3
|
|
4
3
|
dlt==0.5.1
|
|
@@ -12,7 +11,6 @@ psycopg2-binary==2.9.9
|
|
|
12
11
|
py-machineid==0.5.1
|
|
13
12
|
pymongo==4.6.3
|
|
14
13
|
pymysql==1.1.0
|
|
15
|
-
pyodbc==5.1.0
|
|
16
14
|
pyrate-limiter==3.6.1
|
|
17
15
|
redshift-connector==2.1.0
|
|
18
16
|
rich==13.7.1
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.9.0"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|