ingestr 0.2.6__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

Files changed (77) hide show
  1. {ingestr-0.2.6 → ingestr-0.3.1}/Makefile +1 -1
  2. {ingestr-0.2.6 → ingestr-0.3.1}/PKG-INFO +87 -23
  3. {ingestr-0.2.6 → ingestr-0.3.1}/README.md +78 -14
  4. {ingestr-0.2.6 → ingestr-0.3.1}/docs/.vitepress/config.mjs +29 -20
  5. ingestr-0.3.1/docs/supported-sources/images/notion_example.png +0 -0
  6. ingestr-0.3.1/docs/supported-sources/notion.md +49 -0
  7. ingestr-0.3.1/docs/supported-sources/overview.md +84 -0
  8. {ingestr-0.2.6 → ingestr-0.3.1}/docs/supported-sources/snowflake.md +3 -2
  9. {ingestr-0.2.6 → ingestr-0.3.1}/ingestr/main.py +46 -4
  10. {ingestr-0.2.6 → ingestr-0.3.1}/ingestr/main_test.py +31 -11
  11. {ingestr-0.2.6 → ingestr-0.3.1}/ingestr/src/destinations.py +2 -1
  12. {ingestr-0.2.6 → ingestr-0.3.1}/ingestr/src/factory.py +3 -1
  13. {ingestr-0.2.6 → ingestr-0.3.1}/ingestr/src/mongodb/__init__.py +1 -1
  14. {ingestr-0.2.6 → ingestr-0.3.1}/ingestr/src/mongodb/helpers.py +5 -5
  15. ingestr-0.3.1/ingestr/src/notion/__init__.py +55 -0
  16. ingestr-0.3.1/ingestr/src/notion/helpers/__init__.py +0 -0
  17. ingestr-0.3.1/ingestr/src/notion/helpers/client.py +164 -0
  18. ingestr-0.3.1/ingestr/src/notion/helpers/database.py +78 -0
  19. ingestr-0.3.1/ingestr/src/notion/settings.py +3 -0
  20. {ingestr-0.2.6 → ingestr-0.3.1}/ingestr/src/sources.py +25 -0
  21. {ingestr-0.2.6 → ingestr-0.3.1}/ingestr/src/sources_test.py +4 -2
  22. ingestr-0.3.1/ingestr/src/sql_database/__init__.py +172 -0
  23. ingestr-0.3.1/ingestr/src/sql_database/helpers.py +258 -0
  24. ingestr-0.3.1/ingestr/src/sql_database/override.py +9 -0
  25. ingestr-0.3.1/ingestr/src/sql_database/schema_types.py +162 -0
  26. ingestr-0.3.1/ingestr/src/version.py +1 -0
  27. {ingestr-0.2.6 → ingestr-0.3.1}/ingestr/testdata/test_append.db +0 -0
  28. {ingestr-0.2.6 → ingestr-0.3.1}/ingestr/testdata/test_create_replace.db +0 -0
  29. {ingestr-0.2.6 → ingestr-0.3.1}/ingestr/testdata/test_delete_insert_with_timerange.db +0 -0
  30. {ingestr-0.2.6 → ingestr-0.3.1}/ingestr/testdata/test_delete_insert_without_primary_key.db +0 -0
  31. {ingestr-0.2.6 → ingestr-0.3.1}/ingestr/testdata/test_merge_with_primary_key.db +0 -0
  32. {ingestr-0.2.6 → ingestr-0.3.1}/pyproject.toml +12 -1
  33. ingestr-0.3.1/requirements-dev.txt +9 -0
  34. {ingestr-0.2.6 → ingestr-0.3.1}/requirements.txt +8 -8
  35. ingestr-0.2.6/docs/supported-sources/overview.md +0 -19
  36. ingestr-0.2.6/ingestr/src/sql_database/__init__.py +0 -60
  37. ingestr-0.2.6/ingestr/src/sql_database/helpers.py +0 -128
  38. ingestr-0.2.6/ingestr/src/sql_database/schema_types.py +0 -54
  39. ingestr-0.2.6/ingestr/src/sql_database/settings.py +0 -3
  40. ingestr-0.2.6/ingestr/src/version.py +0 -1
  41. ingestr-0.2.6/requirements-dev.txt +0 -10
  42. {ingestr-0.2.6 → ingestr-0.3.1}/.dockerignore +0 -0
  43. {ingestr-0.2.6 → ingestr-0.3.1}/.github/workflows/deploy-docs.yml +0 -0
  44. {ingestr-0.2.6 → ingestr-0.3.1}/.github/workflows/docker.yml +0 -0
  45. {ingestr-0.2.6 → ingestr-0.3.1}/.gitignore +0 -0
  46. {ingestr-0.2.6 → ingestr-0.3.1}/Dockerfile +0 -0
  47. {ingestr-0.2.6 → ingestr-0.3.1}/LICENSE.md +0 -0
  48. {ingestr-0.2.6 → ingestr-0.3.1}/docs/.vitepress/theme/custom.css +0 -0
  49. {ingestr-0.2.6 → ingestr-0.3.1}/docs/.vitepress/theme/index.js +0 -0
  50. {ingestr-0.2.6 → ingestr-0.3.1}/docs/commands/example-uris.md +0 -0
  51. {ingestr-0.2.6 → ingestr-0.3.1}/docs/commands/ingest.md +0 -0
  52. {ingestr-0.2.6 → ingestr-0.3.1}/docs/getting-started/core-concepts.md +0 -0
  53. {ingestr-0.2.6 → ingestr-0.3.1}/docs/getting-started/incremental-loading.md +0 -0
  54. {ingestr-0.2.6 → ingestr-0.3.1}/docs/getting-started/quickstart.md +0 -0
  55. {ingestr-0.2.6 → ingestr-0.3.1}/docs/getting-started/telemetry.md +0 -0
  56. {ingestr-0.2.6 → ingestr-0.3.1}/docs/index.md +0 -0
  57. {ingestr-0.2.6 → ingestr-0.3.1}/docs/supported-sources/bigquery.md +0 -0
  58. {ingestr-0.2.6 → ingestr-0.3.1}/docs/supported-sources/csv.md +0 -0
  59. {ingestr-0.2.6 → ingestr-0.3.1}/docs/supported-sources/databricks.md +0 -0
  60. {ingestr-0.2.6 → ingestr-0.3.1}/docs/supported-sources/duckdb.md +0 -0
  61. {ingestr-0.2.6 → ingestr-0.3.1}/docs/supported-sources/mongodb.md +0 -0
  62. {ingestr-0.2.6 → ingestr-0.3.1}/docs/supported-sources/mssql.md +0 -0
  63. {ingestr-0.2.6 → ingestr-0.3.1}/docs/supported-sources/mysql.md +0 -0
  64. {ingestr-0.2.6 → ingestr-0.3.1}/docs/supported-sources/oracle.md +0 -0
  65. {ingestr-0.2.6 → ingestr-0.3.1}/docs/supported-sources/postgres.md +0 -0
  66. {ingestr-0.2.6 → ingestr-0.3.1}/docs/supported-sources/redshift.md +0 -0
  67. {ingestr-0.2.6 → ingestr-0.3.1}/docs/supported-sources/sqlite.md +0 -0
  68. {ingestr-0.2.6 → ingestr-0.3.1}/ingestr/src/destinations_test.py +0 -0
  69. {ingestr-0.2.6 → ingestr-0.3.1}/ingestr/src/factory_test.py +0 -0
  70. {ingestr-0.2.6 → ingestr-0.3.1}/ingestr/src/telemetry/event.py +0 -0
  71. {ingestr-0.2.6 → ingestr-0.3.1}/ingestr/src/testdata/fakebqcredentials.json +0 -0
  72. {ingestr-0.2.6 → ingestr-0.3.1}/ingestr/testdata/.gitignore +0 -0
  73. {ingestr-0.2.6 → ingestr-0.3.1}/package-lock.json +0 -0
  74. {ingestr-0.2.6 → ingestr-0.3.1}/package.json +0 -0
  75. {ingestr-0.2.6 → ingestr-0.3.1}/resources/demo.gif +0 -0
  76. {ingestr-0.2.6 → ingestr-0.3.1}/resources/demo.tape +0 -0
  77. {ingestr-0.2.6 → ingestr-0.3.1}/resources/ingestr.svg +0 -0
@@ -22,7 +22,7 @@ test-specific: venv
22
22
 
23
23
  lint-ci:
24
24
  ruff check ingestr --fix && ruff format ingestr
25
- mypy --explicit-package-bases ingestr --config-file pyproject.toml
25
+ mypy --config-file pyproject.toml --explicit-package-bases ingestr
26
26
 
27
27
  lint: venv
28
28
  . venv/bin/activate; $(MAKE) lint-ci
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ingestr
3
- Version: 0.2.6
3
+ Version: 0.3.1
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -16,26 +16,26 @@ Classifier: Topic :: Database
16
16
  Requires-Python: >=3.9
17
17
  Requires-Dist: cx-oracle==8.3.0
18
18
  Requires-Dist: databricks-sql-connector==2.9.3
19
- Requires-Dist: dlt==0.4.6
20
- Requires-Dist: duckdb-engine==0.11.2
21
- Requires-Dist: duckdb==0.10.1
19
+ Requires-Dist: dlt==0.4.8
20
+ Requires-Dist: duckdb-engine==0.11.5
21
+ Requires-Dist: duckdb==0.10.2
22
22
  Requires-Dist: google-cloud-bigquery-storage==2.24.0
23
23
  Requires-Dist: pendulum==3.0.0
24
24
  Requires-Dist: psycopg2-binary==2.9.9
25
25
  Requires-Dist: py-machineid==0.5.1
26
- Requires-Dist: pymongo==4.6.2
26
+ Requires-Dist: pymongo==4.6.3
27
27
  Requires-Dist: pymysql==1.1.0
28
28
  Requires-Dist: pyodbc==5.1.0
29
29
  Requires-Dist: redshift-connector==2.1.0
30
- Requires-Dist: rich==13.7.0
30
+ Requires-Dist: rich==13.7.1
31
31
  Requires-Dist: rudder-sdk-python==2.1.0
32
- Requires-Dist: snowflake-sqlalchemy==1.5.1
33
- Requires-Dist: sqlalchemy-bigquery==1.10.0
32
+ Requires-Dist: snowflake-sqlalchemy==1.5.3
33
+ Requires-Dist: sqlalchemy-bigquery==1.11.0
34
34
  Requires-Dist: sqlalchemy-redshift==0.8.14
35
35
  Requires-Dist: sqlalchemy2-stubs==0.0.2a38
36
36
  Requires-Dist: sqlalchemy==1.4.52
37
37
  Requires-Dist: tqdm==4.66.2
38
- Requires-Dist: typer==0.9.0
38
+ Requires-Dist: typer==0.12.3
39
39
  Description-Content-Type: text/markdown
40
40
 
41
41
  <div align="center">
@@ -91,20 +91,84 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
91
91
 
92
92
  ## Supported Sources & Destinations
93
93
 
94
- | Database | Source | Destination |
95
- |----------------------|--------|-------------|
96
- | Postgres | ✅ | ✅ |
97
- | BigQuery | ✅ | ✅ |
98
- | Snowflake | ✅ | ✅ |
99
- | Redshift | ✅ | ✅ |
100
- | Databricks | ✅ | ✅ |
101
- | DuckDB | ✅ | ✅ |
102
- | Microsoft SQL Server | ✅ | ✅ |
103
- | Local CSV file | ✅ | ✅ |
104
- | MongoDB | ✅ | ❌ |
105
- | Oracle | ✅ | ❌ |
106
- | SQLite | ✅ | ❌ |
107
- | MySQL | ✅ | ❌ |
94
+ <table>
95
+ <tr>
96
+ <th></th>
97
+ <th>Source</th>
98
+ <th>Destination</th>
99
+ </tr>
100
+ <tr>
101
+ <td colspan="3" style='text-align:center;'><strong>Databases</strong></td>
102
+ </tr>
103
+ <tr>
104
+ <td>Postgres</td>
105
+ <td>✅</td>
106
+ <td>✅</td>
107
+ </tr>
108
+ <tr>
109
+ <td>BigQuery</td>
110
+ <td>✅</td>
111
+ <td>✅</td>
112
+ </tr>
113
+ <tr>
114
+ <td>Snowflake</td>
115
+ <td>✅</td>
116
+ <td>✅</td>
117
+ </tr>
118
+ <tr>
119
+ <td>Redshift</td>
120
+ <td>✅</td>
121
+ <td>✅</td>
122
+ </tr>
123
+ <tr>
124
+ <td>Databricks</td>
125
+ <td>✅</td>
126
+ <td>✅</td>
127
+ </tr>
128
+ <tr>
129
+ <td>DuckDB</td>
130
+ <td>✅</td>
131
+ <td>✅</td>
132
+ </tr>
133
+ <tr>
134
+ <td>Microsoft SQL Server</td>
135
+ <td>✅</td>
136
+ <td>✅</td>
137
+ </tr>
138
+ <tr>
139
+ <td>Local CSV file</td>
140
+ <td>✅</td>
141
+ <td>✅</td>
142
+ </tr>
143
+ <tr>
144
+ <td>MongoDB</td>
145
+ <td>✅</td>
146
+ <td>❌</td>
147
+ </tr>
148
+ <tr>
149
+ <td>Oracle</td>
150
+ <td>✅</td>
151
+ <td>❌</td>
152
+ </tr>
153
+ <tr>
154
+ <td>SQLite</td>
155
+ <td>✅</td>
156
+ <td>❌</td>
157
+ </tr>
158
+ <tr>
159
+ <td>MySQL</td>
160
+ <td>✅</td>
161
+ <td>❌</td>
162
+ </tr>
163
+ <tr>
164
+ <td colspan="3" style='text-align:center;'><strong>Platforms</strong></td>
165
+ </tr>
166
+ <tr>
167
+ <td>Notion</td>
168
+ <td>✅</td>
169
+ <td>❌</td>
170
+ </tr>
171
+ </table>
108
172
 
109
173
  More to come soon!
110
174
 
@@ -51,20 +51,84 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
51
51
 
52
52
  ## Supported Sources & Destinations
53
53
 
54
- | Database | Source | Destination |
55
- |----------------------|--------|-------------|
56
- | Postgres | ✅ | ✅ |
57
- | BigQuery | ✅ | ✅ |
58
- | Snowflake | ✅ | ✅ |
59
- | Redshift | ✅ | ✅ |
60
- | Databricks | ✅ | ✅ |
61
- | DuckDB | ✅ | ✅ |
62
- | Microsoft SQL Server | ✅ | ✅ |
63
- | Local CSV file | ✅ | ✅ |
64
- | MongoDB | ✅ | ❌ |
65
- | Oracle | ✅ | ❌ |
66
- | SQLite | ✅ | ❌ |
67
- | MySQL | ✅ | ❌ |
54
+ <table>
55
+ <tr>
56
+ <th></th>
57
+ <th>Source</th>
58
+ <th>Destination</th>
59
+ </tr>
60
+ <tr>
61
+ <td colspan="3" style='text-align:center;'><strong>Databases</strong></td>
62
+ </tr>
63
+ <tr>
64
+ <td>Postgres</td>
65
+ <td>✅</td>
66
+ <td>✅</td>
67
+ </tr>
68
+ <tr>
69
+ <td>BigQuery</td>
70
+ <td>✅</td>
71
+ <td>✅</td>
72
+ </tr>
73
+ <tr>
74
+ <td>Snowflake</td>
75
+ <td>✅</td>
76
+ <td>✅</td>
77
+ </tr>
78
+ <tr>
79
+ <td>Redshift</td>
80
+ <td>✅</td>
81
+ <td>✅</td>
82
+ </tr>
83
+ <tr>
84
+ <td>Databricks</td>
85
+ <td>✅</td>
86
+ <td>✅</td>
87
+ </tr>
88
+ <tr>
89
+ <td>DuckDB</td>
90
+ <td>✅</td>
91
+ <td>✅</td>
92
+ </tr>
93
+ <tr>
94
+ <td>Microsoft SQL Server</td>
95
+ <td>✅</td>
96
+ <td>✅</td>
97
+ </tr>
98
+ <tr>
99
+ <td>Local CSV file</td>
100
+ <td>✅</td>
101
+ <td>✅</td>
102
+ </tr>
103
+ <tr>
104
+ <td>MongoDB</td>
105
+ <td>✅</td>
106
+ <td>❌</td>
107
+ </tr>
108
+ <tr>
109
+ <td>Oracle</td>
110
+ <td>✅</td>
111
+ <td>❌</td>
112
+ </tr>
113
+ <tr>
114
+ <td>SQLite</td>
115
+ <td>✅</td>
116
+ <td>❌</td>
117
+ </tr>
118
+ <tr>
119
+ <td>MySQL</td>
120
+ <td>✅</td>
121
+ <td>❌</td>
122
+ </tr>
123
+ <tr>
124
+ <td colspan="3" style='text-align:center;'><strong>Platforms</strong></td>
125
+ </tr>
126
+ <tr>
127
+ <td>Notion</td>
128
+ <td>✅</td>
129
+ <td>❌</td>
130
+ </tr>
131
+ </table>
68
132
 
69
133
  More to come soon!
70
134
 
@@ -4,20 +4,17 @@ import { defineConfig } from "vitepress";
4
4
  export default defineConfig({
5
5
  title: "ingestr",
6
6
  description: "Ingest & copy data between any source and any destination",
7
- base: '/ingestr/',
7
+ base: "/ingestr/",
8
8
  head: [
9
+ ["script", { async: "", src: "https://www.googletagmanager.com/gtag/js?id=G-MZJ20PP4MJ" }],
9
10
  [
10
- 'script',
11
- { async: '', src: 'https://www.googletagmanager.com/gtag/js?id=G-MZJ20PP4MJ' }
12
- ],
13
- [
14
- 'script',
11
+ "script",
15
12
  {},
16
13
  `window.dataLayer = window.dataLayer || [];
17
14
  function gtag(){dataLayer.push(arguments);}
18
15
  gtag('js', new Date());
19
- gtag('config', 'G-MZJ20PP4MJ');`
20
- ]
16
+ gtag('config', 'G-MZJ20PP4MJ');`,
17
+ ],
21
18
  ],
22
19
  themeConfig: {
23
20
  // https://vitepress.dev/reference/default-theme-config
@@ -46,19 +43,31 @@ export default defineConfig({
46
43
  {
47
44
  text: "Sources & Destinations",
48
45
  items: [
49
- { text: "AWS Redshift", link: "/supported-sources/redshift.md" },
50
- { text: "Databricks", link: "/supported-sources/databricks.md" },
51
- { text: "DuckDB", link: "/supported-sources/duckdb.md" },
52
- { text: "Google BigQuery", link: "/supported-sources/bigquery.md" },
53
- { text: "Local CSV Files", link: "/supported-sources/csv.md" },
54
- { text: "Microsoft SQL Server", link: "/supported-sources/mssql.md" },
55
- { text: "MongoDB", link: "/supported-sources/mongodb.md" },
56
- { text: "MySQL", link: "/supported-sources/mysql.md" },
57
- { text: "Oracle", link: "/supported-sources/oracle.md" },
58
46
  { text: "Overview", link: "/supported-sources/overview.md" },
59
- { text: "Postgres", link: "/supported-sources/postgres.md" },
60
- { text: "Snowflake", link: "/supported-sources/snowflake.md" },
61
- { text: "SQLite", link: "/supported-sources/sqlite.md" },
47
+ {
48
+ text: "Databases",
49
+ collapsed: false,
50
+ items: [
51
+ { text: "AWS Redshift", link: "/supported-sources/redshift.md" },
52
+ { text: "Databricks", link: "/supported-sources/databricks.md" },
53
+ { text: "DuckDB", link: "/supported-sources/duckdb.md" },
54
+ { text: "Google BigQuery", link: "/supported-sources/bigquery.md" },
55
+ { text: "Local CSV Files", link: "/supported-sources/csv.md" },
56
+ { text: "Microsoft SQL Server", link: "/supported-sources/mssql.md" },
57
+ { text: "MongoDB", link: "/supported-sources/mongodb.md" },
58
+ { text: "MySQL", link: "/supported-sources/mysql.md" },
59
+ { text: "Oracle", link: "/supported-sources/oracle.md" },
60
+ { text: "Postgres", link: "/supported-sources/postgres.md" },
61
+ { text: "Snowflake", link: "/supported-sources/snowflake.md" },
62
+ { text: "SQLite", link: "/supported-sources/sqlite.md" },
63
+ ],
64
+ },
65
+
66
+ {
67
+ text: "Platforms",
68
+ collapsed: false,
69
+ items: [{ text: "Notion", link: "/supported-sources/notion.md" }],
70
+ },
62
71
  ],
63
72
  },
64
73
  ],
@@ -0,0 +1,49 @@
1
+ # Notion
2
+ [Notion](https://www.notion.so/) is an all-in-one workspace for note-taking, project management, and database management.
3
+
4
+ ingestr supports Notion as a source.
5
+
6
+ ## URI Format
7
+ The URI format for Notion is as follows:
8
+
9
+ ```plaintext
10
+ notion://?api_key=token
11
+ ```
12
+
13
+ URI parameters:
14
+ - `api_key`: the integration token used for authentication with the Notion API
15
+
16
+ The URI is used to connect to the Notion API for extracting data. More details on setting up Notion integrations can be found [here](https://developers.notion.com/docs/getting-started).
17
+
18
+ ## Setting up a Notion Integration
19
+
20
+ Notion requires a few steps to set up an integration, please follow the guide dltHub [has built here](https://dlthub.com/docs/dlt-ecosystem/verified-sources/notion#setup-guide).
21
+
22
+ Once you complete the guide, you should have an API key, and the table ID to connect to. Let's say your API token is `secret_12345` and the database you'd like to connect to is `bfeaafc0c25f40a9asdasd672a9456f3`, here's a sample command that will copy the data from the Notion table into a duckdb database:
23
+
24
+ ```sh
25
+ ingestr ingest --source-uri 'notion://?api_key=secret_12345' --source-table 'bfeaafc0c25f40a9asdasd672a9456f3' --dest-uri duckdb:///notion.duckdb --dest-table 'notion.output'
26
+ ```
27
+
28
+ The result of this command will be a bunch of tables in the `notion.duckdb` database. The Notion integration creates a bunch of extra tables in the schema to keep track of additional information about every field in a database. You should take some time to play around with the data and understand how it's structured, and take a good look at `_dlt_parent_id` column in the tables to understand the relationships between tables.
29
+
30
+ Take a look at the following Notion table:
31
+ ![an example Notion database](./images/notion_example.png)
32
+
33
+ Ingesting this table using ingestr will create a bunch of new tables with quite a lot of details in them. The following query is a reconstruction of the table as it looks on Notion:
34
+
35
+ ```sql
36
+ select n.text__content, s.text__content, o.properties__numerical_value__number, r.text__content
37
+ from notion.output o
38
+ join notion.output__properties__name__title n on n._dlt_parent_id = o._dlt_id
39
+ join notion.output__properties__another_col__rich_text r on r._dlt_parent_id = o._dlt_id
40
+ join notion.output__properties__second_value__rich_text s on s._dlt_parent_id = o._dlt_id
41
+ order by 1;
42
+ ```
43
+
44
+ Take this as a starting point and play around with the data.
45
+
46
+
47
+ > [!CAUTION]
48
+ > Notion does not support incremental loading, which means every time you run the command, it will copy the entire table from Notion to the destination. This can be slow for large tables.
49
+
@@ -0,0 +1,84 @@
1
+ # Supported Sources & Destinations
2
+ ingestr supports the following sources and destinations:
3
+
4
+
5
+ <table>
6
+ <tr>
7
+ <th></th>
8
+ <th>Source</th>
9
+ <th>Destination</th>
10
+ </tr>
11
+ <tr>
12
+ <td colspan="3" style='text-align:center;'><strong>Databases</strong></td>
13
+ </tr>
14
+ <tr>
15
+ <td>Postgres</td>
16
+ <td>✅</td>
17
+ <td>✅</td>
18
+ </tr>
19
+ <tr>
20
+ <td>BigQuery</td>
21
+ <td>✅</td>
22
+ <td>✅</td>
23
+ </tr>
24
+ <tr>
25
+ <td>Snowflake</td>
26
+ <td>✅</td>
27
+ <td>✅</td>
28
+ </tr>
29
+ <tr>
30
+ <td>Redshift</td>
31
+ <td>✅</td>
32
+ <td>✅</td>
33
+ </tr>
34
+ <tr>
35
+ <td>Databricks</td>
36
+ <td>✅</td>
37
+ <td>✅</td>
38
+ </tr>
39
+ <tr>
40
+ <td>DuckDB</td>
41
+ <td>✅</td>
42
+ <td>✅</td>
43
+ </tr>
44
+ <tr>
45
+ <td>Microsoft SQL Server</td>
46
+ <td>✅</td>
47
+ <td>✅</td>
48
+ </tr>
49
+ <tr>
50
+ <td>Local CSV file</td>
51
+ <td>✅</td>
52
+ <td>✅</td>
53
+ </tr>
54
+ <tr>
55
+ <td>MongoDB</td>
56
+ <td>✅</td>
57
+ <td>❌</td>
58
+ </tr>
59
+ <tr>
60
+ <td>Oracle</td>
61
+ <td>✅</td>
62
+ <td>❌</td>
63
+ </tr>
64
+ <tr>
65
+ <td>SQLite</td>
66
+ <td>✅</td>
67
+ <td>❌</td>
68
+ </tr>
69
+ <tr>
70
+ <td>MySQL</td>
71
+ <td>✅</td>
72
+ <td>❌</td>
73
+ </tr>
74
+ <tr>
75
+ <td colspan="3" style='text-align:center;'><strong>Platforms</strong></td>
76
+ </tr>
77
+ <tr>
78
+ <td>Notion</td>
79
+ <td>✅</td>
80
+ <td>❌</td>
81
+ </tr>
82
+ </table>
83
+
84
+ More to come soon!
@@ -7,7 +7,7 @@ ingestr supports Snowflake as both a source and destination.
7
7
  The URI format for Snowflake is as follows:
8
8
 
9
9
  ```plaintext
10
- snowflake://user:password@account/dbname?warehouse=COMPUTE_WH
10
+ snowflake://user:password@account/dbname?warehouse=COMPUTE_WH&role=data_scientist
11
11
  ```
12
12
 
13
13
  URI parameters:
@@ -15,6 +15,7 @@ URI parameters:
15
15
  - `password`: the password for the user
16
16
  - `account`: your Snowflake account identifier
17
17
  - `dbname`: the name of the database to connect to
18
- - `warehouse`: the name of the warehouse to use
18
+ - `warehouse`: the name of the warehouse to use (optional)
19
+ - `role`: the name of the role to use (optional)
19
20
 
20
21
  The same URI structure can be used both for sources and destinations. You can read more about SQLAlchemy's Snowflake dialect [here](https://docs.snowflake.com/en/developer-guide/python-connector/sqlalchemy#connection-parameters).
@@ -5,7 +5,7 @@ from typing import Optional
5
5
  import dlt
6
6
  import humanize
7
7
  import typer
8
- from dlt.common.runtime.collector import Collector
8
+ from dlt.common.runtime.collector import Collector, LogCollector
9
9
  from rich.console import Console
10
10
  from rich.status import Status
11
11
  from typing_extensions import Annotated
@@ -32,10 +32,17 @@ DATE_FORMATS = [
32
32
  "%Y-%m-%dT%H:%M:%S.%f%z",
33
33
  ]
34
34
 
35
+ # https://dlthub.com/docs/dlt-ecosystem/file-formats/parquet#supported-destinations
36
+ PARQUET_SUPPORTED_DESTINATIONS = [
37
+ "bigquery",
38
+ "duckdb",
39
+ "snowflake",
40
+ "databricks",
41
+ "synapse",
42
+ ]
35
43
 
36
- class SpinnerCollector(Collector):
37
- """A Collector that shows progress with `tqdm` progress bars"""
38
44
 
45
+ class SpinnerCollector(Collector):
39
46
  status: Status
40
47
  current_step: str
41
48
  started: bool
@@ -150,6 +157,27 @@ def ingest(
150
157
  envvar="FULL_REFRESH",
151
158
  ),
152
159
  ] = False, # type: ignore
160
+ progress: Annotated[
161
+ Optional[str],
162
+ typer.Option(
163
+ help="The progress display type, must be one of 'interactive', 'log'",
164
+ envvar="PROGRESS",
165
+ ),
166
+ ] = "interactive", # type: ignore
167
+ sql_backend: Annotated[
168
+ Optional[str],
169
+ typer.Option(
170
+ help="The SQL backend to use, must be one of 'sqlalchemy', 'pyarrow'",
171
+ envvar="SQL_BACKEND",
172
+ ),
173
+ ] = "pyarrow", # type: ignore
174
+ loader_file_format: Annotated[
175
+ Optional[str],
176
+ typer.Option(
177
+ help="The file format to use when loading data, must be one of 'jsonl', 'parquet', 'default'",
178
+ envvar="LOADER_FILE_FORMAT",
179
+ ),
180
+ ] = "default", # type: ignore
153
181
  ):
154
182
  track(
155
183
  "command_triggered",
@@ -186,12 +214,16 @@ def ingest(
186
214
  m = hashlib.sha256()
187
215
  m.update(dest_table.encode("utf-8"))
188
216
 
217
+ progressInstance: Collector = SpinnerCollector()
218
+ if progress == "log":
219
+ progressInstance = LogCollector()
220
+
189
221
  pipeline = dlt.pipeline(
190
222
  pipeline_name=m.hexdigest(),
191
223
  destination=destination.dlt_dest(
192
224
  uri=dest_uri,
193
225
  ),
194
- progress=SpinnerCollector(),
226
+ progress=progressInstance,
195
227
  pipelines_dir="pipeline_data",
196
228
  full_refresh=full_refresh,
197
229
  )
@@ -231,11 +263,20 @@ def ingest(
231
263
  merge_key=merge_key,
232
264
  interval_start=interval_start,
233
265
  interval_end=interval_end,
266
+ sql_backend=sql_backend,
234
267
  )
235
268
 
236
269
  if original_incremental_strategy == "delete+insert":
237
270
  dlt_source.incremental.primary_key = ()
238
271
 
272
+ if (
273
+ factory.destination_scheme in PARQUET_SUPPORTED_DESTINATIONS
274
+ and loader_file_format == "default"
275
+ ):
276
+ loader_file_format = "parquet"
277
+ elif loader_file_format == "default":
278
+ loader_file_format = "jsonl"
279
+
239
280
  run_info = pipeline.run(
240
281
  dlt_source,
241
282
  **destination.dlt_run_params(
@@ -244,6 +285,7 @@ def ingest(
244
285
  ),
245
286
  write_disposition=incremental_strategy, # type: ignore
246
287
  primary_key=(primary_key if primary_key and len(primary_key) > 0 else None), # type: ignore
288
+ loader_file_format=loader_file_format, # type: ignore
247
289
  )
248
290
 
249
291
  destination.post_load()