ingestr 0.3.1__tar.gz → 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- {ingestr-0.3.1 → ingestr-0.3.2}/PKG-INFO +1 -1
- {ingestr-0.3.1 → ingestr-0.3.2}/docs/supported-sources/notion.md +1 -18
- {ingestr-0.3.1 → ingestr-0.3.2}/ingestr/main.py +8 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/ingestr/src/notion/__init__.py +1 -1
- ingestr-0.3.2/ingestr/src/version.py +1 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/ingestr/testdata/test_append.db +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/ingestr/testdata/test_create_replace.db +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/ingestr/testdata/test_delete_insert_with_timerange.db +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/ingestr/testdata/test_delete_insert_without_primary_key.db +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/ingestr/testdata/test_merge_with_primary_key.db +0 -0
- ingestr-0.3.1/docs/supported-sources/images/notion_example.png +0 -0
- ingestr-0.3.1/ingestr/src/version.py +0 -1
- {ingestr-0.3.1 → ingestr-0.3.2}/.dockerignore +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/.github/workflows/deploy-docs.yml +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/.github/workflows/docker.yml +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/.gitignore +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/Dockerfile +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/LICENSE.md +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/Makefile +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/README.md +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/docs/.vitepress/config.mjs +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/docs/.vitepress/theme/custom.css +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/docs/.vitepress/theme/index.js +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/docs/commands/example-uris.md +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/docs/commands/ingest.md +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/docs/getting-started/core-concepts.md +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/docs/getting-started/incremental-loading.md +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/docs/getting-started/quickstart.md +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/docs/getting-started/telemetry.md +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/docs/index.md +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/docs/supported-sources/bigquery.md +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/docs/supported-sources/csv.md +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/docs/supported-sources/databricks.md +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/docs/supported-sources/duckdb.md +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/docs/supported-sources/mongodb.md +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/docs/supported-sources/mssql.md +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/docs/supported-sources/mysql.md +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/docs/supported-sources/oracle.md +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/docs/supported-sources/overview.md +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/docs/supported-sources/postgres.md +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/docs/supported-sources/redshift.md +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/docs/supported-sources/snowflake.md +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/docs/supported-sources/sqlite.md +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/ingestr/main_test.py +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/ingestr/src/destinations.py +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/ingestr/src/destinations_test.py +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/ingestr/src/factory.py +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/ingestr/src/factory_test.py +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/ingestr/src/mongodb/__init__.py +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/ingestr/src/mongodb/helpers.py +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/ingestr/src/notion/helpers/__init__.py +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/ingestr/src/notion/helpers/client.py +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/ingestr/src/notion/helpers/database.py +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/ingestr/src/notion/settings.py +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/ingestr/src/sources.py +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/ingestr/src/sources_test.py +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/ingestr/src/sql_database/__init__.py +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/ingestr/src/sql_database/helpers.py +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/ingestr/src/sql_database/override.py +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/ingestr/src/sql_database/schema_types.py +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/ingestr/src/telemetry/event.py +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/ingestr/src/testdata/fakebqcredentials.json +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/ingestr/testdata/.gitignore +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/package-lock.json +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/package.json +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/pyproject.toml +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/requirements-dev.txt +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/requirements.txt +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/resources/demo.gif +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/resources/demo.tape +0 -0
- {ingestr-0.3.1 → ingestr-0.3.2}/resources/ingestr.svg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.2
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -25,24 +25,7 @@ Once you complete the guide, you should have an API key, and the table ID to con
|
|
|
25
25
|
ingestr ingest --source-uri 'notion://?api_key=secret_12345' --source-table 'bfeaafc0c25f40a9asdasd672a9456f3' --dest-uri duckdb:///notion.duckdb --dest-table 'notion.output'
|
|
26
26
|
```
|
|
27
27
|
|
|
28
|
-
The result of this command will be a
|
|
29
|
-
|
|
30
|
-
Take a look at the following Notion table:
|
|
31
|
-

|
|
32
|
-
|
|
33
|
-
Ingesting this table using ingestr will create a bunch of new tables with quite a lot of details in them. The following query is a reconstruction of the table as it looks on Notion:
|
|
34
|
-
|
|
35
|
-
```sql
|
|
36
|
-
select n.text__content, s.text__content, o.properties__numerical_value__number, r.text__content
|
|
37
|
-
from notion.output o
|
|
38
|
-
join notion.output__properties__name__title n on n._dlt_parent_id = o._dlt_id
|
|
39
|
-
join notion.output__properties__another_col__rich_text r on r._dlt_parent_id = o._dlt_id
|
|
40
|
-
join notion.output__properties__second_value__rich_text s on s._dlt_parent_id = o._dlt_id
|
|
41
|
-
order by 1;
|
|
42
|
-
```
|
|
43
|
-
|
|
44
|
-
Take this as a starting point and play around with the data.
|
|
45
|
-
|
|
28
|
+
The result of this command will be a table in the `notion.duckdb` database with JSON columns.
|
|
46
29
|
|
|
47
30
|
> [!CAUTION]
|
|
48
31
|
> Notion does not support incremental loading, which means every time you run the command, it will copy the entire table from Notion to the destination. This can be slow for large tables.
|
|
@@ -41,6 +41,9 @@ PARQUET_SUPPORTED_DESTINATIONS = [
|
|
|
41
41
|
"synapse",
|
|
42
42
|
]
|
|
43
43
|
|
|
44
|
+
# these sources would return a JSON for sure, which means they cannot be used with Parquet loader for BigQuery
|
|
45
|
+
JSON_RETURNING_SOURCES = ["notion"]
|
|
46
|
+
|
|
44
47
|
|
|
45
48
|
class SpinnerCollector(Collector):
|
|
46
49
|
status: Status
|
|
@@ -274,6 +277,11 @@ def ingest(
|
|
|
274
277
|
and loader_file_format == "default"
|
|
275
278
|
):
|
|
276
279
|
loader_file_format = "parquet"
|
|
280
|
+
|
|
281
|
+
# if the source is a JSON returning source, we cannot use Parquet loader for BigQuery
|
|
282
|
+
if factory.destination_scheme == 'bigquery' and factory.source_scheme in JSON_RETURNING_SOURCES:
|
|
283
|
+
loader_file_format = "jsonl"
|
|
284
|
+
|
|
277
285
|
elif loader_file_format == "default":
|
|
278
286
|
loader_file_format = "jsonl"
|
|
279
287
|
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.3.2"
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.3.1"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|