ingestr 0.7.6__tar.gz → 0.7.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- {ingestr-0.7.6 → ingestr-0.7.8}/.gitignore +2 -1
- {ingestr-0.7.6 → ingestr-0.7.8}/PKG-INFO +31 -5
- {ingestr-0.7.6 → ingestr-0.7.8}/README.md +27 -4
- {ingestr-0.7.6 → ingestr-0.7.8}/docs/.vitepress/config.mjs +8 -0
- ingestr-0.7.8/docs/supported-sources/airtable.md +37 -0
- ingestr-0.7.8/docs/supported-sources/facebook-ads.md +51 -0
- ingestr-0.7.8/docs/supported-sources/kafka.md +38 -0
- ingestr-0.7.8/docs/supported-sources/klaviyo.md +64 -0
- ingestr-0.7.8/docs/supported-sources/slack.md +42 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/main.py +11 -1
- ingestr-0.7.8/ingestr/src/.gitignore +10 -0
- ingestr-0.7.8/ingestr/src/airtable/__init__.py +69 -0
- ingestr-0.7.8/ingestr/src/facebook_ads/__init__.py +197 -0
- ingestr-0.7.8/ingestr/src/facebook_ads/exceptions.py +5 -0
- ingestr-0.7.8/ingestr/src/facebook_ads/helpers.py +255 -0
- ingestr-0.7.8/ingestr/src/facebook_ads/settings.py +208 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/factory.py +15 -0
- ingestr-0.7.8/ingestr/src/kafka/__init__.py +103 -0
- ingestr-0.7.8/ingestr/src/kafka/helpers.py +227 -0
- ingestr-0.7.8/ingestr/src/klaviyo/_init_.py +173 -0
- ingestr-0.7.8/ingestr/src/klaviyo/client.py +212 -0
- ingestr-0.7.8/ingestr/src/klaviyo/helpers.py +19 -0
- ingestr-0.7.8/ingestr/src/slack/__init__.py +272 -0
- ingestr-0.7.8/ingestr/src/slack/helpers.py +204 -0
- ingestr-0.7.8/ingestr/src/slack/settings.py +22 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/sources.py +222 -1
- ingestr-0.7.8/ingestr/src/version.py +1 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/pyproject.toml +14 -4
- {ingestr-0.7.6 → ingestr-0.7.8}/requirements.txt +3 -0
- ingestr-0.7.6/ingestr/src/version.py +0 -1
- {ingestr-0.7.6 → ingestr-0.7.8}/.dockerignore +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/.github/workflows/deploy-docs.yml +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/.github/workflows/tests.yml +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/.python-version +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/Dockerfile +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/LICENSE.md +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/Makefile +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/docs/.vitepress/theme/custom.css +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/docs/.vitepress/theme/index.js +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/docs/commands/example-uris.md +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/docs/commands/ingest.md +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/docs/getting-started/core-concepts.md +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/docs/getting-started/incremental-loading.md +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/docs/getting-started/quickstart.md +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/docs/getting-started/telemetry.md +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/docs/index.md +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/bigquery.md +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/chess.md +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/csv.md +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/databricks.md +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/duckdb.md +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/gorgias.md +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/gsheets.md +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/hubspot.md +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/mongodb.md +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/mssql.md +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/mysql.md +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/notion.md +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/oracle.md +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/postgres.md +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/redshift.md +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/sap-hana.md +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/shopify.md +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/snowflake.md +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/sqlite.md +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/stripe.md +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/chess/__init__.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/chess/helpers.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/chess/settings.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/destinations.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/google_sheets/README.md +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/google_sheets/__init__.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/gorgias/__init__.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/gorgias/helpers.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/hubspot/__init__.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/hubspot/helpers.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/hubspot/settings.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/mongodb/__init__.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/mongodb/helpers.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/notion/__init__.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/notion/helpers/__init__.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/notion/helpers/client.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/notion/helpers/database.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/notion/settings.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/shopify/__init__.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/shopify/exceptions.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/shopify/helpers.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/shopify/settings.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/sql_database/__init__.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/sql_database/arrow_helpers.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/sql_database/helpers.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/sql_database/override.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/sql_database/schema_types.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/stripe_analytics/__init__.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/stripe_analytics/helpers.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/stripe_analytics/settings.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/table_definition.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/telemetry/event.py +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/testdata/fakebqcredentials.json +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/testdata/.gitignore +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/testdata/create_replace.csv +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/testdata/delete_insert_expected.csv +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/testdata/delete_insert_part1.csv +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/testdata/delete_insert_part2.csv +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/testdata/merge_expected.csv +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/testdata/merge_part1.csv +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/testdata/merge_part2.csv +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/package-lock.json +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/package.json +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/requirements-dev.txt +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/resources/demo.gif +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/resources/demo.tape +0 -0
- {ingestr-0.7.6 → ingestr-0.7.8}/resources/ingestr.svg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.8
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -14,17 +14,20 @@ Classifier: Operating System :: OS Independent
|
|
|
14
14
|
Classifier: Programming Language :: Python :: 3
|
|
15
15
|
Classifier: Topic :: Database
|
|
16
16
|
Requires-Python: >=3.9
|
|
17
|
+
Requires-Dist: confluent-kafka>=2.3.0
|
|
17
18
|
Requires-Dist: cx-oracle==8.3.0
|
|
18
19
|
Requires-Dist: databricks-sql-connector==2.9.3
|
|
19
20
|
Requires-Dist: dlt==0.5.1
|
|
20
21
|
Requires-Dist: duckdb-engine==0.11.5
|
|
21
22
|
Requires-Dist: duckdb==0.10.2
|
|
23
|
+
Requires-Dist: facebook-business==20.0.0
|
|
22
24
|
Requires-Dist: google-api-python-client==2.130.0
|
|
23
25
|
Requires-Dist: google-cloud-bigquery-storage==2.24.0
|
|
24
26
|
Requires-Dist: mysql-connector-python==9.0.0
|
|
25
27
|
Requires-Dist: pendulum==3.0.0
|
|
26
28
|
Requires-Dist: psycopg2-binary==2.9.9
|
|
27
29
|
Requires-Dist: py-machineid==0.5.1
|
|
30
|
+
Requires-Dist: pyairtable==2.3.3
|
|
28
31
|
Requires-Dist: pymongo==4.6.3
|
|
29
32
|
Requires-Dist: pymysql==1.1.0
|
|
30
33
|
Requires-Dist: pyodbc==5.1.0
|
|
@@ -55,7 +58,7 @@ Description-Content-Type: text/markdown
|
|
|
55
58
|
</a>
|
|
56
59
|
</div>
|
|
57
60
|
|
|
58
|
-
|
|
61
|
+
---
|
|
59
62
|
|
|
60
63
|
Ingestr is a command-line application that allows you to ingest data from any source into any destination using simple command-line flags, no code necessary.
|
|
61
64
|
|
|
@@ -65,8 +68,8 @@ Ingestr is a command-line application that allows you to ingest data from any so
|
|
|
65
68
|
|
|
66
69
|
ingestr takes away the complexity of managing any backend or writing any code for ingesting data, simply run the command and watch the data land on its destination.
|
|
67
70
|
|
|
68
|
-
|
|
69
71
|
## Installation
|
|
72
|
+
|
|
70
73
|
```
|
|
71
74
|
pip install ingestr
|
|
72
75
|
```
|
|
@@ -84,15 +87,17 @@ ingestr ingest \
|
|
|
84
87
|
That's it.
|
|
85
88
|
|
|
86
89
|
This command will:
|
|
90
|
+
|
|
87
91
|
- get the table `public.some_data` from the Postgres instance.
|
|
88
92
|
- upload this data to your BigQuery warehouse under the schema `ingestr` and table `some_data`.
|
|
89
93
|
|
|
90
94
|
## Documentation
|
|
95
|
+
|
|
91
96
|
You can see the full documentation [here](https://bruin-data.github.io/ingestr/getting-started/quickstart.html).
|
|
92
97
|
|
|
93
98
|
## Community
|
|
94
|
-
Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shared_invite/zt-2dl2i8foy-bVsuMUauHeN9M2laVm3ZVg).
|
|
95
99
|
|
|
100
|
+
Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shared_invite/zt-2dl2i8foy-bVsuMUauHeN9M2laVm3ZVg).
|
|
96
101
|
|
|
97
102
|
## Supported Sources & Destinations
|
|
98
103
|
|
|
@@ -173,10 +178,20 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
|
|
|
173
178
|
<tr>
|
|
174
179
|
<td colspan="3" style='text-align:center;'><strong>Platforms</strong></td>
|
|
175
180
|
</tr>
|
|
181
|
+
<tr>
|
|
182
|
+
<td>Airtable</td>
|
|
183
|
+
<td>✅</td>
|
|
184
|
+
<td>-</td>
|
|
185
|
+
</tr>
|
|
176
186
|
<tr>
|
|
177
187
|
<td>Chess.com</td>
|
|
178
188
|
<td>✅</td>
|
|
179
189
|
<td>-</td>
|
|
190
|
+
</tr>
|
|
191
|
+
<tr>
|
|
192
|
+
<td>Facebook Ads</td>
|
|
193
|
+
<td>✅</td>
|
|
194
|
+
<td>-</td>
|
|
180
195
|
</tr>
|
|
181
196
|
<tr>
|
|
182
197
|
<td>Gorgias</td>
|
|
@@ -192,6 +207,11 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
|
|
|
192
207
|
<td>HubSpot</td>
|
|
193
208
|
<td>✅</td>
|
|
194
209
|
<td>-</td>
|
|
210
|
+
</tr>
|
|
211
|
+
<tr>
|
|
212
|
+
<td>Klaviyo</td>
|
|
213
|
+
<td>✅</td>
|
|
214
|
+
<td>-</td>
|
|
195
215
|
</tr>
|
|
196
216
|
<tr>
|
|
197
217
|
<td>Notion</td>
|
|
@@ -202,6 +222,11 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
|
|
|
202
222
|
<td>Shopify</td>
|
|
203
223
|
<td>✅</td>
|
|
204
224
|
<td>-</td>
|
|
225
|
+
</tr>
|
|
226
|
+
<tr>
|
|
227
|
+
<td>Slack</td>
|
|
228
|
+
<td>✅</td>
|
|
229
|
+
<td>-</td>
|
|
205
230
|
</tr>
|
|
206
231
|
<tr>
|
|
207
232
|
<td>Stripe</td>
|
|
@@ -213,4 +238,5 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
|
|
|
213
238
|
More to come soon!
|
|
214
239
|
|
|
215
240
|
## Acknowledgements
|
|
216
|
-
|
|
241
|
+
|
|
242
|
+
This project would not have been possible without the amazing work done by the [SQLAlchemy](https://www.sqlalchemy.org/) and [dlt](https://dlthub.com/) teams. We relied on their work to connect to various sources and destinations, and built `ingestr` as a simple, opinionated wrapper around their work.
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
</a>
|
|
11
11
|
</div>
|
|
12
12
|
|
|
13
|
-
|
|
13
|
+
---
|
|
14
14
|
|
|
15
15
|
Ingestr is a command-line application that allows you to ingest data from any source into any destination using simple command-line flags, no code necessary.
|
|
16
16
|
|
|
@@ -20,8 +20,8 @@ Ingestr is a command-line application that allows you to ingest data from any so
|
|
|
20
20
|
|
|
21
21
|
ingestr takes away the complexity of managing any backend or writing any code for ingesting data, simply run the command and watch the data land on its destination.
|
|
22
22
|
|
|
23
|
-
|
|
24
23
|
## Installation
|
|
24
|
+
|
|
25
25
|
```
|
|
26
26
|
pip install ingestr
|
|
27
27
|
```
|
|
@@ -39,15 +39,17 @@ ingestr ingest \
|
|
|
39
39
|
That's it.
|
|
40
40
|
|
|
41
41
|
This command will:
|
|
42
|
+
|
|
42
43
|
- get the table `public.some_data` from the Postgres instance.
|
|
43
44
|
- upload this data to your BigQuery warehouse under the schema `ingestr` and table `some_data`.
|
|
44
45
|
|
|
45
46
|
## Documentation
|
|
47
|
+
|
|
46
48
|
You can see the full documentation [here](https://bruin-data.github.io/ingestr/getting-started/quickstart.html).
|
|
47
49
|
|
|
48
50
|
## Community
|
|
49
|
-
Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shared_invite/zt-2dl2i8foy-bVsuMUauHeN9M2laVm3ZVg).
|
|
50
51
|
|
|
52
|
+
Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shared_invite/zt-2dl2i8foy-bVsuMUauHeN9M2laVm3ZVg).
|
|
51
53
|
|
|
52
54
|
## Supported Sources & Destinations
|
|
53
55
|
|
|
@@ -128,10 +130,20 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
|
|
|
128
130
|
<tr>
|
|
129
131
|
<td colspan="3" style='text-align:center;'><strong>Platforms</strong></td>
|
|
130
132
|
</tr>
|
|
133
|
+
<tr>
|
|
134
|
+
<td>Airtable</td>
|
|
135
|
+
<td>✅</td>
|
|
136
|
+
<td>-</td>
|
|
137
|
+
</tr>
|
|
131
138
|
<tr>
|
|
132
139
|
<td>Chess.com</td>
|
|
133
140
|
<td>✅</td>
|
|
134
141
|
<td>-</td>
|
|
142
|
+
</tr>
|
|
143
|
+
<tr>
|
|
144
|
+
<td>Facebook Ads</td>
|
|
145
|
+
<td>✅</td>
|
|
146
|
+
<td>-</td>
|
|
135
147
|
</tr>
|
|
136
148
|
<tr>
|
|
137
149
|
<td>Gorgias</td>
|
|
@@ -147,6 +159,11 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
|
|
|
147
159
|
<td>HubSpot</td>
|
|
148
160
|
<td>✅</td>
|
|
149
161
|
<td>-</td>
|
|
162
|
+
</tr>
|
|
163
|
+
<tr>
|
|
164
|
+
<td>Klaviyo</td>
|
|
165
|
+
<td>✅</td>
|
|
166
|
+
<td>-</td>
|
|
150
167
|
</tr>
|
|
151
168
|
<tr>
|
|
152
169
|
<td>Notion</td>
|
|
@@ -157,6 +174,11 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
|
|
|
157
174
|
<td>Shopify</td>
|
|
158
175
|
<td>✅</td>
|
|
159
176
|
<td>-</td>
|
|
177
|
+
</tr>
|
|
178
|
+
<tr>
|
|
179
|
+
<td>Slack</td>
|
|
180
|
+
<td>✅</td>
|
|
181
|
+
<td>-</td>
|
|
160
182
|
</tr>
|
|
161
183
|
<tr>
|
|
162
184
|
<td>Stripe</td>
|
|
@@ -168,4 +190,5 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
|
|
|
168
190
|
More to come soon!
|
|
169
191
|
|
|
170
192
|
## Acknowledgements
|
|
171
|
-
|
|
193
|
+
|
|
194
|
+
This project would not have been possible without the amazing work done by the [SQLAlchemy](https://www.sqlalchemy.org/) and [dlt](https://dlthub.com/) teams. We relied on their work to connect to various sources and destinations, and built `ingestr` as a simple, opinionated wrapper around their work.
|
|
@@ -64,6 +64,7 @@ export default defineConfig({
|
|
|
64
64
|
text: "Google BigQuery",
|
|
65
65
|
link: "/supported-sources/bigquery.md",
|
|
66
66
|
},
|
|
67
|
+
{ text: "Kafka", link: "/supported-sources/kafka.md" },
|
|
67
68
|
{ text: "Local CSV Files", link: "/supported-sources/csv.md" },
|
|
68
69
|
{
|
|
69
70
|
text: "Microsoft SQL Server",
|
|
@@ -83,12 +84,19 @@ export default defineConfig({
|
|
|
83
84
|
text: "Platforms",
|
|
84
85
|
collapsed: false,
|
|
85
86
|
items: [
|
|
87
|
+
{ text: "Airtable", link: "/supported-sources/airtable.md" },
|
|
86
88
|
{ text: "Chess.com", link: "/supported-sources/chess.md" },
|
|
89
|
+
{
|
|
90
|
+
text: "Facebook Ads",
|
|
91
|
+
link: "/supported-sources/facebook-ads.md",
|
|
92
|
+
},
|
|
87
93
|
{ text: "Google Sheets", link: "/supported-sources/gsheets.md" },
|
|
88
94
|
{ text: "Gorgias", link: "/supported-sources/gorgias.md" },
|
|
89
95
|
{ text: "HubSpot", link: "/supported-sources/hubspot.md" },
|
|
96
|
+
{ text: "Klaviyo", link: "/supported-sources/klaviyo.md" },
|
|
90
97
|
{ text: "Notion", link: "/supported-sources/notion.md" },
|
|
91
98
|
{ text: "Shopify", link: "/supported-sources/shopify.md" },
|
|
99
|
+
{ text: "Slack", link: "/supported-sources/slack.md" },
|
|
92
100
|
{ text: "Stripe", link: "/supported-sources/stripe.md" },
|
|
93
101
|
],
|
|
94
102
|
},
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
# Airtable
|
|
2
|
+
|
|
3
|
+
[Airtable](https://airtable.com/) is a cloud-based platform that combines spreadsheet and database functionalities, designed for data management and collaboration.
|
|
4
|
+
|
|
5
|
+
ingestr supports Airtable as a source.
|
|
6
|
+
|
|
7
|
+
## URI Format
|
|
8
|
+
|
|
9
|
+
The URI format for Airtable is as follows:
|
|
10
|
+
|
|
11
|
+
```plaintext
|
|
12
|
+
airtable://?access_token=<access_token>&base_id=<base_id>
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
URI parameters:
|
|
16
|
+
|
|
17
|
+
- `base_id`: A unique identifier for an Airtable base.
|
|
18
|
+
- `access_token`: A personal access token for authentication with the Airtable API.
|
|
19
|
+
|
|
20
|
+
The URI is used to connect to the Airtable API for extracting data. More details on setting up Airtable integrations can be found [here](https://airtable.com/developers/web/api).
|
|
21
|
+
|
|
22
|
+
## Setting up a Airtable Integration
|
|
23
|
+
|
|
24
|
+
Airtable requires a few steps to set up an integration, please follow the guide dltHub [has built here](https://dlthub.com/docs/dlt-ecosystem/verified-sources/airtable#setup-guide).
|
|
25
|
+
|
|
26
|
+
Once you complete the guide, you should have an Access Token and Base Id. Let's say your Access Token is `patr123.abc` and Base Id is `appXYZ`, here's a sample command that will copy the data from Airtable into a duckdb database:
|
|
27
|
+
|
|
28
|
+
```sh
|
|
29
|
+
ingestr ingest --source-uri 'airtable://?base_id=appXYc&access_token=patr123.abc' --source-table 'employee' --dest-uri 'duckdb:///airtable.duckdb' --dest-table 'des.employee'
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
The result of this command will be an `employee` table containing data from the `employee` source in the `Airtable.duckdb` database.
|
|
33
|
+
|
|
34
|
+
The `source-table` can include multiple table names that share the `same base_id` (e.g.--source-table 'employee,users') but this will merge all the data from the specified tables into a single destination table.
|
|
35
|
+
|
|
36
|
+
> [!CAUTION]
|
|
37
|
+
> Airtable does not support incremental loading, which means every time you run the command, the entire table will be copied from Airtable to the destination. This can be slow for large tables.
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# Facebook Ads
|
|
2
|
+
|
|
3
|
+
Facebook Ads is the advertising platform that helps users to create targeted ads on Facebook, Instagram and Messenger.
|
|
4
|
+
|
|
5
|
+
ingestr supports Facebook Ads as a source.
|
|
6
|
+
|
|
7
|
+
## URI Format
|
|
8
|
+
|
|
9
|
+
The URI format for Facebook Ads is as follows:
|
|
10
|
+
|
|
11
|
+
```plaintext
|
|
12
|
+
facebookads://?access_token=<access_token>&account_id=<account_id>
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
URI parameters:
|
|
16
|
+
|
|
17
|
+
- `access_token` is associated with Business Facebook App.
|
|
18
|
+
- `account_id` is associated with Ad manager.
|
|
19
|
+
|
|
20
|
+
Both are used for authentication with Facebook Ads API.
|
|
21
|
+
|
|
22
|
+
The URI is used to connect to Facebook Ads API for extracting data.
|
|
23
|
+
|
|
24
|
+
## Setting up a Facebook Ads Integration
|
|
25
|
+
|
|
26
|
+
Facebook Ads requires a few steps to set up an integration, please follow the guide dltHub [has built here](https://dlthub.com/docs/dlt-ecosystem/verified-sources/facebook_ads#setup-guide).
|
|
27
|
+
|
|
28
|
+
Once you complete the guide, you should have an Access_Token and Account ID . Let's say your access_token is `abcdef` and account_id is `1234` , here's a sample command that will copy the data from Facebook Ads into a duckdb database:
|
|
29
|
+
|
|
30
|
+
```sh
|
|
31
|
+
ingestr ingest \
|
|
32
|
+
--source-uri 'facebookads://?access_token=easdyh&account_id=1234' \
|
|
33
|
+
--source-table 'campaigns' \
|
|
34
|
+
--dest-uri 'duckdb:///facebook.duckdb' \
|
|
35
|
+
--dest-table 'dest.campaigns'
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
The result of this command will be a table in the `facebook.duckdb` database.
|
|
39
|
+
|
|
40
|
+
## Available Tables
|
|
41
|
+
|
|
42
|
+
Facebook Ads source allows ingesting the following sources into separate tables:
|
|
43
|
+
|
|
44
|
+
- `campaigns`: Retrieves all DEFAULT_CAMPAIGN_FIELDS.
|
|
45
|
+
- `ad_sets`: Retrieves all DEFAULT_ADSET_FIELDS.
|
|
46
|
+
- `leads`: Retrieves all DEFAULT_LEAD_FIELDS.
|
|
47
|
+
- `ads_creatives`: Retrieves all DEFAULT_ADCREATIVE_FIELDS.
|
|
48
|
+
- `ads`: Retrieves all DEFAULT_ADS_FIELDS.
|
|
49
|
+
- `facebook_insights`: Retrieves all DEFAULT_INSIGHTS_FIELDS.
|
|
50
|
+
|
|
51
|
+
Use these as `--source-table` parameter in the `ingestr ingest` command.
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# Apache Kafka
|
|
2
|
+
[Apache Kafka](https://kafka.apache.org/) is a distributed event streaming platform used by thousands of companies for high-performance data pipelines, streaming analytics, data integration, and mission-critical applications.
|
|
3
|
+
|
|
4
|
+
ingestr supports Apache Kafka as a source.
|
|
5
|
+
|
|
6
|
+
## URI Format
|
|
7
|
+
The URI format for Apache Kafka is as follows:
|
|
8
|
+
|
|
9
|
+
```plaintext
|
|
10
|
+
kafka://?bootstrap_servers=localhost:9092&group_id=test_group&security_protocol=SASL_SSL&sasl_mechanisms=PLAIN&sasl_username=example_username&sasl_password=example_secret&batch_size=1000&batch_timeout=3
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
URI parameters:
|
|
14
|
+
- `bootstrap_servers`: The Kafka server(s) to connect to, typically in the form of a host and port (e.g., `localhost:9092`).
|
|
15
|
+
- `group_id`: The consumer group ID used for identifying the client when consuming messages.
|
|
16
|
+
- `security_protocol`: The protocol used to communicate with brokers (e.g., `SASL_SSL` for secure communication).
|
|
17
|
+
- `sasl_mechanisms`: The SASL mechanism to be used for authentication (e.g., `PLAIN`).
|
|
18
|
+
- `sasl_username`: The username for SASL authentication.
|
|
19
|
+
- `sasl_password`: The password for SASL authentication.
|
|
20
|
+
- `batch_size`: The number of messages to fetch in a single batch, defaults to 3000.
|
|
21
|
+
- `batch_timeout`: The maximum time to wait for messages, defaults to 3 seconds.
|
|
22
|
+
|
|
23
|
+
The URI is used to connect to the Kafka brokers for ingesting messages.
|
|
24
|
+
|
|
25
|
+
### Group ID
|
|
26
|
+
The group ID is used to identify the consumer group that reads messages from a topic. Kafka uses the group ID to manage consumer offsets and assign partitions to consumers, which means that the group ID is the key to reading messages from the correct partition and position in the topic.
|
|
27
|
+
|
|
28
|
+
Once you have your Kafka server, credentials, and group ID set up, here's a sample command to ingest messages from a Kafka topic into a duckdb database:
|
|
29
|
+
|
|
30
|
+
```sh
|
|
31
|
+
ingestr ingest \
|
|
32
|
+
--source-uri 'kafka://?bootstrap_servers=localhost:9092' \
|
|
33
|
+
--source-table 'my-topic' \
|
|
34
|
+
--dest-uri duckdb:///kafka.duckdb \
|
|
35
|
+
--dest-table 'kafka.my_topic'
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
The result of this command will be a table in the `kafka.duckdb` database with JSON columns.
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
# Klaviyo
|
|
2
|
+
|
|
3
|
+
[Klaviyo](https://www.klaviyo.com/) is a marketing automation platform that helps businesses build and manage smarter digital relationships with their customers by connecting through personalized email and enhancing customer loyality.
|
|
4
|
+
|
|
5
|
+
ingestr supports Klaviyo as a source.
|
|
6
|
+
|
|
7
|
+
## URI Format
|
|
8
|
+
|
|
9
|
+
The URI format for Klaviyo is as follows:
|
|
10
|
+
|
|
11
|
+
```plaintext
|
|
12
|
+
klaviyo://?api_key=<api-key>
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
URI parameters:
|
|
16
|
+
|
|
17
|
+
- `api_key`: The API key used for authentication with the Klaviyo API.
|
|
18
|
+
|
|
19
|
+
The URI is used to connect to the Klaviyo API for extracting data.
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
ingestr ingest --source-table 'events' --source-uri 'klaviyo://?api_key=pk_test' --dest-uri duckdb:///klaviyo.duckdb --interval-start 2022-01-01 --dest-table 'klaviyo.events' --extract-parallelism 20
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
This command fethes all the events that are created/updated since 2022-01-01 and writes them to `klaviyo.events` table on DuckDB, using 20 parallel threads to improve performance and efficiently handle large data .
|
|
26
|
+
|
|
27
|
+
## Available Tables
|
|
28
|
+
|
|
29
|
+
Klaviyo source allows ingesting the following sources into separate tables:
|
|
30
|
+
|
|
31
|
+
[events](https://developers.klaviyo.com/en/reference/events_api_overview): Retrieves all events in an account where each event represents an action taken by a profile such as a password reset or a product order.
|
|
32
|
+
|
|
33
|
+
[profiles](https://developers.klaviyo.com/en/reference/profiles_api_overview): Retrieves all profiles in an account where each profile includes details like organization, job title, email and other attributes.
|
|
34
|
+
|
|
35
|
+
[campaigns](https://developers.klaviyo.com/en/reference/campaigns_api_overview): Retrieves all campaigns in an account where each campaign is a targeted message sent to a specific audience.
|
|
36
|
+
|
|
37
|
+
[metrics](https://developers.klaviyo.com/en/reference/metrics_api_overview): Retrieves all metrics in an account where each metric represents a category of events or actions a person can take.
|
|
38
|
+
|
|
39
|
+
[tags](https://developers.klaviyo.com/en/reference/get_tags): Retrieves all tags in an account.
|
|
40
|
+
|
|
41
|
+
[coupons](https://developers.klaviyo.com/en/reference/get_coupons): Retrieves all coupons in an account.
|
|
42
|
+
|
|
43
|
+
[catalog-variants](https://developers.klaviyo.com/en/reference/get_catalog_variants): Retrieves all variants in an account.
|
|
44
|
+
|
|
45
|
+
[catalog-categories](https://developers.klaviyo.com/en/reference/get_catalog_categories): Retrieves all catalog categories in an account.
|
|
46
|
+
|
|
47
|
+
[catalog-items](https://developers.klaviyo.com/en/reference/get_catalog_items): Retrieves all catalog items in an account.
|
|
48
|
+
|
|
49
|
+
[flows](https://developers.klaviyo.com/en/reference/get_flows): Retrieves all flows in an account where flow is a sequence of automated actions that is triggered when a person performs a specific action.
|
|
50
|
+
|
|
51
|
+
[lists](https://developers.klaviyo.com/en/reference/get_lists): Retrieves all lists in an account.
|
|
52
|
+
|
|
53
|
+
[images](https://developers.klaviyo.com/en/reference/get_images): Retrieves all images in an account..
|
|
54
|
+
|
|
55
|
+
[segments](https://developers.klaviyo.com/en/reference/get_segments): Retrieves all segments in an account where segment is a dynamic list that contains profiles meeting a certain set of conditions.
|
|
56
|
+
|
|
57
|
+
[forms](https://developers.klaviyo.com/en/reference/get_forms): Retrieves all forms in an account.
|
|
58
|
+
|
|
59
|
+
[templates](https://developers.klaviyo.com/en/reference/get_templates): Retrieves all templates in an account.
|
|
60
|
+
|
|
61
|
+
Use these as `--source-table` parameter in the `ingestr ingest` command.
|
|
62
|
+
|
|
63
|
+
> [!WARNING]
|
|
64
|
+
> Klaviyo does not support incremental loading for many endpoints in its APIs, which means ingestr will load endpoints incrementally if they support it, and do a full-refresh if not.
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# Slack
|
|
2
|
+
|
|
3
|
+
[Slack](https://www.Slack.com/) is a messaging platform for teams and organizations where they can collaborate, share ideas and information.
|
|
4
|
+
|
|
5
|
+
ingestr supports Slack as a source.
|
|
6
|
+
|
|
7
|
+
## URI Format
|
|
8
|
+
|
|
9
|
+
The URI format for Slack is as follows:
|
|
10
|
+
|
|
11
|
+
```plaintext
|
|
12
|
+
slack://?api_key=<api-key-here>
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
URI parameters:
|
|
16
|
+
|
|
17
|
+
- `api_key`: The API key used for authentication with the Slack API.
|
|
18
|
+
|
|
19
|
+
The URI is used to connect to the Slack API for extracting data.
|
|
20
|
+
|
|
21
|
+
## Setting up a Slack Integration
|
|
22
|
+
|
|
23
|
+
Slack requires a few steps to set up an integration, please follow the guide dltHub [has built here](https://dlthub.com/docs/dlt-ecosystem/verified-sources/Slack#setup-guide).
|
|
24
|
+
|
|
25
|
+
Once you complete the guide, you should have an API key with the necessary permissions as mentioned in the guide. Let's say your API key is axb-test-564. Here's a sample command that will copy the data from Slack into a DuckDB database:
|
|
26
|
+
|
|
27
|
+
```sh
|
|
28
|
+
ingestr ingest --source-uri 'slack://?api_key=axb-test-564' --source-table 'channels' --dest-uri duckdb:///slack.duckdb --dest-table 'dest.channels'
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
The result of this command will be a table in the `slack.duckdb` database.
|
|
32
|
+
|
|
33
|
+
## Available Tables
|
|
34
|
+
|
|
35
|
+
Slack source allows ingesting the following sources into separate tables:
|
|
36
|
+
|
|
37
|
+
- `channels`: Retrieves information about all the channels.
|
|
38
|
+
- `users`: Retrieves information about all the users.
|
|
39
|
+
- `messages:chan1,chan2`: Retrieves messages from specified channels, where chan1 and chan2 represent user-defined channels (e.g: general, memes). Multiple channels can be listed.
|
|
40
|
+
- `access_logs`: Retrieves all the access logs.
|
|
41
|
+
|
|
42
|
+
Use these as `--source-table` parameter in the `ingestr ingest` command.
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import hashlib
|
|
2
|
+
import tempfile
|
|
2
3
|
from datetime import datetime
|
|
3
4
|
from enum import Enum
|
|
4
|
-
import tempfile
|
|
5
5
|
from typing import Optional
|
|
6
6
|
|
|
7
7
|
import dlt
|
|
@@ -244,6 +244,13 @@ def ingest(
|
|
|
244
244
|
envvar="PIPELINES_DIR",
|
|
245
245
|
),
|
|
246
246
|
] = None, # type: ignore
|
|
247
|
+
extract_parallelism: Annotated[
|
|
248
|
+
Optional[int],
|
|
249
|
+
typer.Option(
|
|
250
|
+
help="The number of parallel jobs to run for extracting data from the source, only applicable for certain sources",
|
|
251
|
+
envvar="EXTRACT_PARALLELISM",
|
|
252
|
+
),
|
|
253
|
+
] = 5, # type: ignore
|
|
247
254
|
):
|
|
248
255
|
track(
|
|
249
256
|
"command_triggered",
|
|
@@ -253,6 +260,8 @@ def ingest(
|
|
|
253
260
|
)
|
|
254
261
|
|
|
255
262
|
dlt.config["data_writer.file_max_items"] = loader_file_size
|
|
263
|
+
dlt.config["extract.workers"] = extract_parallelism
|
|
264
|
+
dlt.config["extract.max_parallel_items"] = extract_parallelism
|
|
256
265
|
if schema_naming != SchemaNaming.default:
|
|
257
266
|
dlt.config["schema.naming"] = schema_naming.value
|
|
258
267
|
|
|
@@ -413,6 +422,7 @@ def ingest(
|
|
|
413
422
|
# remove the pipelines_dir folder if it was created by ingestr
|
|
414
423
|
if is_pipelines_dir_temp:
|
|
415
424
|
import shutil
|
|
425
|
+
|
|
416
426
|
shutil.rmtree(pipelines_dir)
|
|
417
427
|
|
|
418
428
|
print(
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""Source that loads tables form Airtable.
|
|
2
|
+
Supports whitelisting of tables or loading of all tables from a specified base.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import Any, Dict, Iterable, Iterator, List, Optional
|
|
6
|
+
|
|
7
|
+
import dlt
|
|
8
|
+
import pyairtable
|
|
9
|
+
from dlt.sources import DltResource
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dlt.source
|
|
13
|
+
def airtable_source(
|
|
14
|
+
base_id: str = dlt.config.value,
|
|
15
|
+
table_names: Optional[List[str]] = dlt.config.value,
|
|
16
|
+
access_token: str = dlt.secrets.value,
|
|
17
|
+
) -> Iterable[DltResource]:
|
|
18
|
+
"""
|
|
19
|
+
Represents tables for a single Airtable base.
|
|
20
|
+
Args:
|
|
21
|
+
base_id (str): The id of the base. Obtain it e.g. from the URL in your webbrowser.
|
|
22
|
+
It starts with "app". See https://support.airtable.com/docs/finding-airtable-ids
|
|
23
|
+
table_names (Optional[List[str]]): A list of table IDs or table names to load.
|
|
24
|
+
Unless specified otherwise, all tables in the schema are loaded.
|
|
25
|
+
Names are freely user-defined. IDs start with "tbl". See https://support.airtable.com/docs/finding-airtable-ids
|
|
26
|
+
access_token (str): The personal access token.
|
|
27
|
+
See https://support.airtable.com/docs/creating-and-using-api-keys-and-access-tokens#personal-access-tokens-basic-actions
|
|
28
|
+
"""
|
|
29
|
+
api = pyairtable.Api(access_token)
|
|
30
|
+
all_tables_url = api.build_url(f"meta/bases/{base_id}/tables")
|
|
31
|
+
tables = api.request(method="GET", url=all_tables_url).get("tables")
|
|
32
|
+
for t in tables:
|
|
33
|
+
if table_names:
|
|
34
|
+
if t.get("id") in table_names or t.get("name") in table_names:
|
|
35
|
+
yield airtable_resource(api, base_id, t)
|
|
36
|
+
else:
|
|
37
|
+
yield airtable_resource(api, base_id, t)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def airtable_resource(
|
|
41
|
+
api: pyairtable.Api,
|
|
42
|
+
base_id: str,
|
|
43
|
+
table: Dict[str, Any],
|
|
44
|
+
) -> DltResource:
|
|
45
|
+
"""
|
|
46
|
+
Represents a single airtable.
|
|
47
|
+
Args:
|
|
48
|
+
api (pyairtable.Api): The API connection object
|
|
49
|
+
base_id (str): The id of the base. Obtain it e.g. from the URL in your webbrowser.
|
|
50
|
+
It starts with "app". See https://support.airtable.com/docs/finding-airtable-ids
|
|
51
|
+
table (Dict[str, Any]): Metadata about an airtable, does not contain the actual records
|
|
52
|
+
"""
|
|
53
|
+
primary_key_id = table["primaryFieldId"]
|
|
54
|
+
primary_key_field = [
|
|
55
|
+
field for field in table["fields"] if field["id"] == primary_key_id
|
|
56
|
+
][0]
|
|
57
|
+
table_name: str = table["name"]
|
|
58
|
+
primary_key: List[str] = [primary_key_field["name"]]
|
|
59
|
+
air_table = api.table(base_id, table["id"])
|
|
60
|
+
|
|
61
|
+
# Table.iterate() supports rich customization options, such as chunk size, fields, cell format, timezone, locale, and view
|
|
62
|
+
air_table_generator: Iterator[List[Any]] = air_table.iterate()
|
|
63
|
+
|
|
64
|
+
return dlt.resource(
|
|
65
|
+
air_table_generator,
|
|
66
|
+
name=table_name,
|
|
67
|
+
primary_key=primary_key,
|
|
68
|
+
write_disposition="replace",
|
|
69
|
+
)
|