ingestr 0.7.6__tar.gz → 0.7.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

Files changed (116) hide show
  1. {ingestr-0.7.6 → ingestr-0.7.8}/.gitignore +2 -1
  2. {ingestr-0.7.6 → ingestr-0.7.8}/PKG-INFO +31 -5
  3. {ingestr-0.7.6 → ingestr-0.7.8}/README.md +27 -4
  4. {ingestr-0.7.6 → ingestr-0.7.8}/docs/.vitepress/config.mjs +8 -0
  5. ingestr-0.7.8/docs/supported-sources/airtable.md +37 -0
  6. ingestr-0.7.8/docs/supported-sources/facebook-ads.md +51 -0
  7. ingestr-0.7.8/docs/supported-sources/kafka.md +38 -0
  8. ingestr-0.7.8/docs/supported-sources/klaviyo.md +64 -0
  9. ingestr-0.7.8/docs/supported-sources/slack.md +42 -0
  10. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/main.py +11 -1
  11. ingestr-0.7.8/ingestr/src/.gitignore +10 -0
  12. ingestr-0.7.8/ingestr/src/airtable/__init__.py +69 -0
  13. ingestr-0.7.8/ingestr/src/facebook_ads/__init__.py +197 -0
  14. ingestr-0.7.8/ingestr/src/facebook_ads/exceptions.py +5 -0
  15. ingestr-0.7.8/ingestr/src/facebook_ads/helpers.py +255 -0
  16. ingestr-0.7.8/ingestr/src/facebook_ads/settings.py +208 -0
  17. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/factory.py +15 -0
  18. ingestr-0.7.8/ingestr/src/kafka/__init__.py +103 -0
  19. ingestr-0.7.8/ingestr/src/kafka/helpers.py +227 -0
  20. ingestr-0.7.8/ingestr/src/klaviyo/_init_.py +173 -0
  21. ingestr-0.7.8/ingestr/src/klaviyo/client.py +212 -0
  22. ingestr-0.7.8/ingestr/src/klaviyo/helpers.py +19 -0
  23. ingestr-0.7.8/ingestr/src/slack/__init__.py +272 -0
  24. ingestr-0.7.8/ingestr/src/slack/helpers.py +204 -0
  25. ingestr-0.7.8/ingestr/src/slack/settings.py +22 -0
  26. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/sources.py +222 -1
  27. ingestr-0.7.8/ingestr/src/version.py +1 -0
  28. {ingestr-0.7.6 → ingestr-0.7.8}/pyproject.toml +14 -4
  29. {ingestr-0.7.6 → ingestr-0.7.8}/requirements.txt +3 -0
  30. ingestr-0.7.6/ingestr/src/version.py +0 -1
  31. {ingestr-0.7.6 → ingestr-0.7.8}/.dockerignore +0 -0
  32. {ingestr-0.7.6 → ingestr-0.7.8}/.github/workflows/deploy-docs.yml +0 -0
  33. {ingestr-0.7.6 → ingestr-0.7.8}/.github/workflows/tests.yml +0 -0
  34. {ingestr-0.7.6 → ingestr-0.7.8}/.python-version +0 -0
  35. {ingestr-0.7.6 → ingestr-0.7.8}/Dockerfile +0 -0
  36. {ingestr-0.7.6 → ingestr-0.7.8}/LICENSE.md +0 -0
  37. {ingestr-0.7.6 → ingestr-0.7.8}/Makefile +0 -0
  38. {ingestr-0.7.6 → ingestr-0.7.8}/docs/.vitepress/theme/custom.css +0 -0
  39. {ingestr-0.7.6 → ingestr-0.7.8}/docs/.vitepress/theme/index.js +0 -0
  40. {ingestr-0.7.6 → ingestr-0.7.8}/docs/commands/example-uris.md +0 -0
  41. {ingestr-0.7.6 → ingestr-0.7.8}/docs/commands/ingest.md +0 -0
  42. {ingestr-0.7.6 → ingestr-0.7.8}/docs/getting-started/core-concepts.md +0 -0
  43. {ingestr-0.7.6 → ingestr-0.7.8}/docs/getting-started/incremental-loading.md +0 -0
  44. {ingestr-0.7.6 → ingestr-0.7.8}/docs/getting-started/quickstart.md +0 -0
  45. {ingestr-0.7.6 → ingestr-0.7.8}/docs/getting-started/telemetry.md +0 -0
  46. {ingestr-0.7.6 → ingestr-0.7.8}/docs/index.md +0 -0
  47. {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/bigquery.md +0 -0
  48. {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/chess.md +0 -0
  49. {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/csv.md +0 -0
  50. {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/databricks.md +0 -0
  51. {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/duckdb.md +0 -0
  52. {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/gorgias.md +0 -0
  53. {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/gsheets.md +0 -0
  54. {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/hubspot.md +0 -0
  55. {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/mongodb.md +0 -0
  56. {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/mssql.md +0 -0
  57. {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/mysql.md +0 -0
  58. {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/notion.md +0 -0
  59. {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/oracle.md +0 -0
  60. {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/postgres.md +0 -0
  61. {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/redshift.md +0 -0
  62. {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/sap-hana.md +0 -0
  63. {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/shopify.md +0 -0
  64. {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/snowflake.md +0 -0
  65. {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/sqlite.md +0 -0
  66. {ingestr-0.7.6 → ingestr-0.7.8}/docs/supported-sources/stripe.md +0 -0
  67. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/chess/__init__.py +0 -0
  68. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/chess/helpers.py +0 -0
  69. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/chess/settings.py +0 -0
  70. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/destinations.py +0 -0
  71. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/google_sheets/README.md +0 -0
  72. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/google_sheets/__init__.py +0 -0
  73. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
  74. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
  75. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
  76. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/gorgias/__init__.py +0 -0
  77. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/gorgias/helpers.py +0 -0
  78. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/hubspot/__init__.py +0 -0
  79. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/hubspot/helpers.py +0 -0
  80. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/hubspot/settings.py +0 -0
  81. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/mongodb/__init__.py +0 -0
  82. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/mongodb/helpers.py +0 -0
  83. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/notion/__init__.py +0 -0
  84. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/notion/helpers/__init__.py +0 -0
  85. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/notion/helpers/client.py +0 -0
  86. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/notion/helpers/database.py +0 -0
  87. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/notion/settings.py +0 -0
  88. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/shopify/__init__.py +0 -0
  89. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/shopify/exceptions.py +0 -0
  90. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/shopify/helpers.py +0 -0
  91. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/shopify/settings.py +0 -0
  92. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/sql_database/__init__.py +0 -0
  93. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/sql_database/arrow_helpers.py +0 -0
  94. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/sql_database/helpers.py +0 -0
  95. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/sql_database/override.py +0 -0
  96. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/sql_database/schema_types.py +0 -0
  97. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/stripe_analytics/__init__.py +0 -0
  98. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/stripe_analytics/helpers.py +0 -0
  99. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/stripe_analytics/settings.py +0 -0
  100. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/table_definition.py +0 -0
  101. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/telemetry/event.py +0 -0
  102. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/src/testdata/fakebqcredentials.json +0 -0
  103. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/testdata/.gitignore +0 -0
  104. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/testdata/create_replace.csv +0 -0
  105. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/testdata/delete_insert_expected.csv +0 -0
  106. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/testdata/delete_insert_part1.csv +0 -0
  107. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/testdata/delete_insert_part2.csv +0 -0
  108. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/testdata/merge_expected.csv +0 -0
  109. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/testdata/merge_part1.csv +0 -0
  110. {ingestr-0.7.6 → ingestr-0.7.8}/ingestr/testdata/merge_part2.csv +0 -0
  111. {ingestr-0.7.6 → ingestr-0.7.8}/package-lock.json +0 -0
  112. {ingestr-0.7.6 → ingestr-0.7.8}/package.json +0 -0
  113. {ingestr-0.7.6 → ingestr-0.7.8}/requirements-dev.txt +0 -0
  114. {ingestr-0.7.6 → ingestr-0.7.8}/resources/demo.gif +0 -0
  115. {ingestr-0.7.6 → ingestr-0.7.8}/resources/demo.tape +0 -0
  116. {ingestr-0.7.6 → ingestr-0.7.8}/resources/ingestr.svg +0 -0
@@ -13,4 +13,5 @@ pipeline_data
13
13
  dist
14
14
  docs/.vitepress/dist
15
15
  docs/.vitepress/cache
16
- node_modules
16
+ node_modules
17
+ *.duckdb
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ingestr
3
- Version: 0.7.6
3
+ Version: 0.7.8
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -14,17 +14,20 @@ Classifier: Operating System :: OS Independent
14
14
  Classifier: Programming Language :: Python :: 3
15
15
  Classifier: Topic :: Database
16
16
  Requires-Python: >=3.9
17
+ Requires-Dist: confluent-kafka>=2.3.0
17
18
  Requires-Dist: cx-oracle==8.3.0
18
19
  Requires-Dist: databricks-sql-connector==2.9.3
19
20
  Requires-Dist: dlt==0.5.1
20
21
  Requires-Dist: duckdb-engine==0.11.5
21
22
  Requires-Dist: duckdb==0.10.2
23
+ Requires-Dist: facebook-business==20.0.0
22
24
  Requires-Dist: google-api-python-client==2.130.0
23
25
  Requires-Dist: google-cloud-bigquery-storage==2.24.0
24
26
  Requires-Dist: mysql-connector-python==9.0.0
25
27
  Requires-Dist: pendulum==3.0.0
26
28
  Requires-Dist: psycopg2-binary==2.9.9
27
29
  Requires-Dist: py-machineid==0.5.1
30
+ Requires-Dist: pyairtable==2.3.3
28
31
  Requires-Dist: pymongo==4.6.3
29
32
  Requires-Dist: pymysql==1.1.0
30
33
  Requires-Dist: pyodbc==5.1.0
@@ -55,7 +58,7 @@ Description-Content-Type: text/markdown
55
58
  </a>
56
59
  </div>
57
60
 
58
- -----
61
+ ---
59
62
 
60
63
  Ingestr is a command-line application that allows you to ingest data from any source into any destination using simple command-line flags, no code necessary.
61
64
 
@@ -65,8 +68,8 @@ Ingestr is a command-line application that allows you to ingest data from any so
65
68
 
66
69
  ingestr takes away the complexity of managing any backend or writing any code for ingesting data, simply run the command and watch the data land on its destination.
67
70
 
68
-
69
71
  ## Installation
72
+
70
73
  ```
71
74
  pip install ingestr
72
75
  ```
@@ -84,15 +87,17 @@ ingestr ingest \
84
87
  That's it.
85
88
 
86
89
  This command will:
90
+
87
91
  - get the table `public.some_data` from the Postgres instance.
88
92
  - upload this data to your BigQuery warehouse under the schema `ingestr` and table `some_data`.
89
93
 
90
94
  ## Documentation
95
+
91
96
  You can see the full documentation [here](https://bruin-data.github.io/ingestr/getting-started/quickstart.html).
92
97
 
93
98
  ## Community
94
- Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shared_invite/zt-2dl2i8foy-bVsuMUauHeN9M2laVm3ZVg).
95
99
 
100
+ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shared_invite/zt-2dl2i8foy-bVsuMUauHeN9M2laVm3ZVg).
96
101
 
97
102
  ## Supported Sources & Destinations
98
103
 
@@ -173,10 +178,20 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
173
178
  <tr>
174
179
  <td colspan="3" style='text-align:center;'><strong>Platforms</strong></td>
175
180
  </tr>
181
+ <tr>
182
+ <td>Airtable</td>
183
+ <td>✅</td>
184
+ <td>-</td>
185
+ </tr>
176
186
  <tr>
177
187
  <td>Chess.com</td>
178
188
  <td>✅</td>
179
189
  <td>-</td>
190
+ </tr>
191
+ <tr>
192
+ <td>Facebook Ads</td>
193
+ <td>✅</td>
194
+ <td>-</td>
180
195
  </tr>
181
196
  <tr>
182
197
  <td>Gorgias</td>
@@ -192,6 +207,11 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
192
207
  <td>HubSpot</td>
193
208
  <td>✅</td>
194
209
  <td>-</td>
210
+ </tr>
211
+ <tr>
212
+ <td>Klaviyo</td>
213
+ <td>✅</td>
214
+ <td>-</td>
195
215
  </tr>
196
216
  <tr>
197
217
  <td>Notion</td>
@@ -202,6 +222,11 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
202
222
  <td>Shopify</td>
203
223
  <td>✅</td>
204
224
  <td>-</td>
225
+ </tr>
226
+ <tr>
227
+ <td>Slack</td>
228
+ <td>✅</td>
229
+ <td>-</td>
205
230
  </tr>
206
231
  <tr>
207
232
  <td>Stripe</td>
@@ -213,4 +238,5 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
213
238
  More to come soon!
214
239
 
215
240
  ## Acknowledgements
216
- This project would not have been possible without the amazing work done by the [SQLAlchemy](https://www.sqlalchemy.org/) and [dlt](https://dlthub.com/) teams. We relied on their work to connect to various sources and destinations, and built `ingestr` as a simple, opinionated wrapper around their work.
241
+
242
+ This project would not have been possible without the amazing work done by the [SQLAlchemy](https://www.sqlalchemy.org/) and [dlt](https://dlthub.com/) teams. We relied on their work to connect to various sources and destinations, and built `ingestr` as a simple, opinionated wrapper around their work.
@@ -10,7 +10,7 @@
10
10
  </a>
11
11
  </div>
12
12
 
13
- -----
13
+ ---
14
14
 
15
15
  Ingestr is a command-line application that allows you to ingest data from any source into any destination using simple command-line flags, no code necessary.
16
16
 
@@ -20,8 +20,8 @@ Ingestr is a command-line application that allows you to ingest data from any so
20
20
 
21
21
  ingestr takes away the complexity of managing any backend or writing any code for ingesting data, simply run the command and watch the data land on its destination.
22
22
 
23
-
24
23
  ## Installation
24
+
25
25
  ```
26
26
  pip install ingestr
27
27
  ```
@@ -39,15 +39,17 @@ ingestr ingest \
39
39
  That's it.
40
40
 
41
41
  This command will:
42
+
42
43
  - get the table `public.some_data` from the Postgres instance.
43
44
  - upload this data to your BigQuery warehouse under the schema `ingestr` and table `some_data`.
44
45
 
45
46
  ## Documentation
47
+
46
48
  You can see the full documentation [here](https://bruin-data.github.io/ingestr/getting-started/quickstart.html).
47
49
 
48
50
  ## Community
49
- Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shared_invite/zt-2dl2i8foy-bVsuMUauHeN9M2laVm3ZVg).
50
51
 
52
+ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shared_invite/zt-2dl2i8foy-bVsuMUauHeN9M2laVm3ZVg).
51
53
 
52
54
  ## Supported Sources & Destinations
53
55
 
@@ -128,10 +130,20 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
128
130
  <tr>
129
131
  <td colspan="3" style='text-align:center;'><strong>Platforms</strong></td>
130
132
  </tr>
133
+ <tr>
134
+ <td>Airtable</td>
135
+ <td>✅</td>
136
+ <td>-</td>
137
+ </tr>
131
138
  <tr>
132
139
  <td>Chess.com</td>
133
140
  <td>✅</td>
134
141
  <td>-</td>
142
+ </tr>
143
+ <tr>
144
+ <td>Facebook Ads</td>
145
+ <td>✅</td>
146
+ <td>-</td>
135
147
  </tr>
136
148
  <tr>
137
149
  <td>Gorgias</td>
@@ -147,6 +159,11 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
147
159
  <td>HubSpot</td>
148
160
  <td>✅</td>
149
161
  <td>-</td>
162
+ </tr>
163
+ <tr>
164
+ <td>Klaviyo</td>
165
+ <td>✅</td>
166
+ <td>-</td>
150
167
  </tr>
151
168
  <tr>
152
169
  <td>Notion</td>
@@ -157,6 +174,11 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
157
174
  <td>Shopify</td>
158
175
  <td>✅</td>
159
176
  <td>-</td>
177
+ </tr>
178
+ <tr>
179
+ <td>Slack</td>
180
+ <td>✅</td>
181
+ <td>-</td>
160
182
  </tr>
161
183
  <tr>
162
184
  <td>Stripe</td>
@@ -168,4 +190,5 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
168
190
  More to come soon!
169
191
 
170
192
  ## Acknowledgements
171
- This project would not have been possible without the amazing work done by the [SQLAlchemy](https://www.sqlalchemy.org/) and [dlt](https://dlthub.com/) teams. We relied on their work to connect to various sources and destinations, and built `ingestr` as a simple, opinionated wrapper around their work.
193
+
194
+ This project would not have been possible without the amazing work done by the [SQLAlchemy](https://www.sqlalchemy.org/) and [dlt](https://dlthub.com/) teams. We relied on their work to connect to various sources and destinations, and built `ingestr` as a simple, opinionated wrapper around their work.
@@ -64,6 +64,7 @@ export default defineConfig({
64
64
  text: "Google BigQuery",
65
65
  link: "/supported-sources/bigquery.md",
66
66
  },
67
+ { text: "Kafka", link: "/supported-sources/kafka.md" },
67
68
  { text: "Local CSV Files", link: "/supported-sources/csv.md" },
68
69
  {
69
70
  text: "Microsoft SQL Server",
@@ -83,12 +84,19 @@ export default defineConfig({
83
84
  text: "Platforms",
84
85
  collapsed: false,
85
86
  items: [
87
+ { text: "Airtable", link: "/supported-sources/airtable.md" },
86
88
  { text: "Chess.com", link: "/supported-sources/chess.md" },
89
+ {
90
+ text: "Facebook Ads",
91
+ link: "/supported-sources/facebook-ads.md",
92
+ },
87
93
  { text: "Google Sheets", link: "/supported-sources/gsheets.md" },
88
94
  { text: "Gorgias", link: "/supported-sources/gorgias.md" },
89
95
  { text: "HubSpot", link: "/supported-sources/hubspot.md" },
96
+ { text: "Klaviyo", link: "/supported-sources/klaviyo.md" },
90
97
  { text: "Notion", link: "/supported-sources/notion.md" },
91
98
  { text: "Shopify", link: "/supported-sources/shopify.md" },
99
+ { text: "Slack", link: "/supported-sources/slack.md" },
92
100
  { text: "Stripe", link: "/supported-sources/stripe.md" },
93
101
  ],
94
102
  },
@@ -0,0 +1,37 @@
1
+ # Airtable
2
+
3
+ [Airtable](https://airtable.com/) is a cloud-based platform that combines spreadsheet and database functionalities, designed for data management and collaboration.
4
+
5
+ ingestr supports Airtable as a source.
6
+
7
+ ## URI Format
8
+
9
+ The URI format for Airtable is as follows:
10
+
11
+ ```plaintext
12
+ airtable://?access_token=<access_token>&base_id=<base_id>
13
+ ```
14
+
15
+ URI parameters:
16
+
17
+ - `base_id`: A unique identifier for an Airtable base.
18
+ - `access_token`: A personal access token for authentication with the Airtable API.
19
+
20
+ The URI is used to connect to the Airtable API for extracting data. More details on setting up Airtable integrations can be found [here](https://airtable.com/developers/web/api).
21
+
22
+ ## Setting up a Airtable Integration
23
+
24
+ Airtable requires a few steps to set up an integration, please follow the guide dltHub [has built here](https://dlthub.com/docs/dlt-ecosystem/verified-sources/airtable#setup-guide).
25
+
26
+ Once you complete the guide, you should have an Access Token and Base Id. Let's say your Access Token is `patr123.abc` and Base Id is `appXYZ`, here's a sample command that will copy the data from Airtable into a duckdb database:
27
+
28
+ ```sh
29
+ ingestr ingest --source-uri 'airtable://?base_id=appXYc&access_token=patr123.abc' --source-table 'employee' --dest-uri 'duckdb:///airtable.duckdb' --dest-table 'des.employee'
30
+ ```
31
+
32
+ The result of this command will be an `employee` table containing data from the `employee` source in the `Airtable.duckdb` database.
33
+
34
+ The `source-table` can include multiple table names that share the `same base_id` (e.g.--source-table 'employee,users') but this will merge all the data from the specified tables into a single destination table.
35
+
36
+ > [!CAUTION]
37
+ > Airtable does not support incremental loading, which means every time you run the command, the entire table will be copied from Airtable to the destination. This can be slow for large tables.
@@ -0,0 +1,51 @@
1
+ # Facebook Ads
2
+
3
+ Facebook Ads is the advertising platform that helps users to create targeted ads on Facebook, Instagram and Messenger.
4
+
5
+ ingestr supports Facebook Ads as a source.
6
+
7
+ ## URI Format
8
+
9
+ The URI format for Facebook Ads is as follows:
10
+
11
+ ```plaintext
12
+ facebookads://?access_token=<access_token>&account_id=<account_id>
13
+ ```
14
+
15
+ URI parameters:
16
+
17
+ - `access_token` is associated with Business Facebook App.
18
+ - `account_id` is associated with Ad manager.
19
+
20
+ Both are used for authentication with Facebook Ads API.
21
+
22
+ The URI is used to connect to Facebook Ads API for extracting data.
23
+
24
+ ## Setting up a Facebook Ads Integration
25
+
26
+ Facebook Ads requires a few steps to set up an integration, please follow the guide dltHub [has built here](https://dlthub.com/docs/dlt-ecosystem/verified-sources/facebook_ads#setup-guide).
27
+
28
+ Once you complete the guide, you should have an Access_Token and Account ID . Let's say your access_token is `abcdef` and account_id is `1234` , here's a sample command that will copy the data from Facebook Ads into a duckdb database:
29
+
30
+ ```sh
31
+ ingestr ingest \
32
+ --source-uri 'facebookads://?access_token=easdyh&account_id=1234' \
33
+ --source-table 'campaigns' \
34
+ --dest-uri 'duckdb:///facebook.duckdb' \
35
+ --dest-table 'dest.campaigns'
36
+ ```
37
+
38
+ The result of this command will be a table in the `facebook.duckdb` database.
39
+
40
+ ## Available Tables
41
+
42
+ Facebook Ads source allows ingesting the following sources into separate tables:
43
+
44
+ - `campaigns`: Retrieves all DEFAULT_CAMPAIGN_FIELDS.
45
+ - `ad_sets`: Retrieves all DEFAULT_ADSET_FIELDS.
46
+ - `leads`: Retrieves all DEFAULT_LEAD_FIELDS.
47
+ - `ads_creatives`: Retrieves all DEFAULT_ADCREATIVE_FIELDS.
48
+ - `ads`: Retrieves all DEFAULT_ADS_FIELDS.
49
+ - `facebook_insights`: Retrieves all DEFAULT_INSIGHTS_FIELDS.
50
+
51
+ Use these as `--source-table` parameter in the `ingestr ingest` command.
@@ -0,0 +1,38 @@
1
+ # Apache Kafka
2
+ [Apache Kafka](https://kafka.apache.org/) is a distributed event streaming platform used by thousands of companies for high-performance data pipelines, streaming analytics, data integration, and mission-critical applications.
3
+
4
+ ingestr supports Apache Kafka as a source.
5
+
6
+ ## URI Format
7
+ The URI format for Apache Kafka is as follows:
8
+
9
+ ```plaintext
10
+ kafka://?bootstrap_servers=localhost:9092&group_id=test_group&security_protocol=SASL_SSL&sasl_mechanisms=PLAIN&sasl_username=example_username&sasl_password=example_secret&batch_size=1000&batch_timeout=3
11
+ ```
12
+
13
+ URI parameters:
14
+ - `bootstrap_servers`: The Kafka server(s) to connect to, typically in the form of a host and port (e.g., `localhost:9092`).
15
+ - `group_id`: The consumer group ID used for identifying the client when consuming messages.
16
+ - `security_protocol`: The protocol used to communicate with brokers (e.g., `SASL_SSL` for secure communication).
17
+ - `sasl_mechanisms`: The SASL mechanism to be used for authentication (e.g., `PLAIN`).
18
+ - `sasl_username`: The username for SASL authentication.
19
+ - `sasl_password`: The password for SASL authentication.
20
+ - `batch_size`: The number of messages to fetch in a single batch, defaults to 3000.
21
+ - `batch_timeout`: The maximum time to wait for messages, defaults to 3 seconds.
22
+
23
+ The URI is used to connect to the Kafka brokers for ingesting messages.
24
+
25
+ ### Group ID
26
+ The group ID is used to identify the consumer group that reads messages from a topic. Kafka uses the group ID to manage consumer offsets and assign partitions to consumers, which means that the group ID is the key to reading messages from the correct partition and position in the topic.
27
+
28
+ Once you have your Kafka server, credentials, and group ID set up, here's a sample command to ingest messages from a Kafka topic into a duckdb database:
29
+
30
+ ```sh
31
+ ingestr ingest \
32
+ --source-uri 'kafka://?bootstrap_servers=localhost:9092' \
33
+ --source-table 'my-topic' \
34
+ --dest-uri duckdb:///kafka.duckdb \
35
+ --dest-table 'kafka.my_topic'
36
+ ```
37
+
38
+ The result of this command will be a table in the `kafka.duckdb` database with JSON columns.
@@ -0,0 +1,64 @@
1
+ # Klaviyo
2
+
3
+ [Klaviyo](https://www.klaviyo.com/) is a marketing automation platform that helps businesses build and manage smarter digital relationships with their customers by connecting through personalized email and enhancing customer loyality.
4
+
5
+ ingestr supports Klaviyo as a source.
6
+
7
+ ## URI Format
8
+
9
+ The URI format for Klaviyo is as follows:
10
+
11
+ ```plaintext
12
+ klaviyo://?api_key=<api-key>
13
+ ```
14
+
15
+ URI parameters:
16
+
17
+ - `api_key`: The API key used for authentication with the Klaviyo API.
18
+
19
+ The URI is used to connect to the Klaviyo API for extracting data.
20
+
21
+ ```bash
22
+ ingestr ingest --source-table 'events' --source-uri 'klaviyo://?api_key=pk_test' --dest-uri duckdb:///klaviyo.duckdb --interval-start 2022-01-01 --dest-table 'klaviyo.events' --extract-parallelism 20
23
+ ```
24
+
25
+ This command fethes all the events that are created/updated since 2022-01-01 and writes them to `klaviyo.events` table on DuckDB, using 20 parallel threads to improve performance and efficiently handle large data .
26
+
27
+ ## Available Tables
28
+
29
+ Klaviyo source allows ingesting the following sources into separate tables:
30
+
31
+ [events](https://developers.klaviyo.com/en/reference/events_api_overview): Retrieves all events in an account where each event represents an action taken by a profile such as a password reset or a product order.
32
+
33
+ [profiles](https://developers.klaviyo.com/en/reference/profiles_api_overview): Retrieves all profiles in an account where each profile includes details like organization, job title, email and other attributes.
34
+
35
+ [campaigns](https://developers.klaviyo.com/en/reference/campaigns_api_overview): Retrieves all campaigns in an account where each campaign is a targeted message sent to a specific audience.
36
+
37
+ [metrics](https://developers.klaviyo.com/en/reference/metrics_api_overview): Retrieves all metrics in an account where each metric represents a category of events or actions a person can take.
38
+
39
+ [tags](https://developers.klaviyo.com/en/reference/get_tags): Retrieves all tags in an account.
40
+
41
+ [coupons](https://developers.klaviyo.com/en/reference/get_coupons): Retrieves all coupons in an account.
42
+
43
+ [catalog-variants](https://developers.klaviyo.com/en/reference/get_catalog_variants): Retrieves all variants in an account.
44
+
45
+ [catalog-categories](https://developers.klaviyo.com/en/reference/get_catalog_categories): Retrieves all catalog categories in an account.
46
+
47
+ [catalog-items](https://developers.klaviyo.com/en/reference/get_catalog_items): Retrieves all catalog items in an account.
48
+
49
+ [flows](https://developers.klaviyo.com/en/reference/get_flows): Retrieves all flows in an account where flow is a sequence of automated actions that is triggered when a person performs a specific action.
50
+
51
+ [lists](https://developers.klaviyo.com/en/reference/get_lists): Retrieves all lists in an account.
52
+
53
+ [images](https://developers.klaviyo.com/en/reference/get_images): Retrieves all images in an account..
54
+
55
+ [segments](https://developers.klaviyo.com/en/reference/get_segments): Retrieves all segments in an account where segment is a dynamic list that contains profiles meeting a certain set of conditions.
56
+
57
+ [forms](https://developers.klaviyo.com/en/reference/get_forms): Retrieves all forms in an account.
58
+
59
+ [templates](https://developers.klaviyo.com/en/reference/get_templates): Retrieves all templates in an account.
60
+
61
+ Use these as `--source-table` parameter in the `ingestr ingest` command.
62
+
63
+ > [!WARNING]
64
+ > Klaviyo does not support incremental loading for many endpoints in its APIs, which means ingestr will load endpoints incrementally if they support it, and do a full-refresh if not.
@@ -0,0 +1,42 @@
1
+ # Slack
2
+
3
+ [Slack](https://www.Slack.com/) is a messaging platform for teams and organizations where they can collaborate, share ideas and information.
4
+
5
+ ingestr supports Slack as a source.
6
+
7
+ ## URI Format
8
+
9
+ The URI format for Slack is as follows:
10
+
11
+ ```plaintext
12
+ slack://?api_key=<api-key-here>
13
+ ```
14
+
15
+ URI parameters:
16
+
17
+ - `api_key`: The API key used for authentication with the Slack API.
18
+
19
+ The URI is used to connect to the Slack API for extracting data.
20
+
21
+ ## Setting up a Slack Integration
22
+
23
+ Slack requires a few steps to set up an integration, please follow the guide dltHub [has built here](https://dlthub.com/docs/dlt-ecosystem/verified-sources/Slack#setup-guide).
24
+
25
+ Once you complete the guide, you should have an API key with the necessary permissions as mentioned in the guide. Let's say your API key is axb-test-564. Here's a sample command that will copy the data from Slack into a DuckDB database:
26
+
27
+ ```sh
28
+ ingestr ingest --source-uri 'slack://?api_key=axb-test-564' --source-table 'channels' --dest-uri duckdb:///slack.duckdb --dest-table 'dest.channels'
29
+ ```
30
+
31
+ The result of this command will be a table in the `slack.duckdb` database.
32
+
33
+ ## Available Tables
34
+
35
+ Slack source allows ingesting the following sources into separate tables:
36
+
37
+ - `channels`: Retrieves information about all the channels.
38
+ - `users`: Retrieves information about all the users.
39
+ - `messages:chan1,chan2`: Retrieves messages from specified channels, where chan1 and chan2 represent user-defined channels (e.g: general, memes). Multiple channels can be listed.
40
+ - `access_logs`: Retrieves all the access logs.
41
+
42
+ Use these as `--source-table` parameter in the `ingestr ingest` command.
@@ -1,7 +1,7 @@
1
1
  import hashlib
2
+ import tempfile
2
3
  from datetime import datetime
3
4
  from enum import Enum
4
- import tempfile
5
5
  from typing import Optional
6
6
 
7
7
  import dlt
@@ -244,6 +244,13 @@ def ingest(
244
244
  envvar="PIPELINES_DIR",
245
245
  ),
246
246
  ] = None, # type: ignore
247
+ extract_parallelism: Annotated[
248
+ Optional[int],
249
+ typer.Option(
250
+ help="The number of parallel jobs to run for extracting data from the source, only applicable for certain sources",
251
+ envvar="EXTRACT_PARALLELISM",
252
+ ),
253
+ ] = 5, # type: ignore
247
254
  ):
248
255
  track(
249
256
  "command_triggered",
@@ -253,6 +260,8 @@ def ingest(
253
260
  )
254
261
 
255
262
  dlt.config["data_writer.file_max_items"] = loader_file_size
263
+ dlt.config["extract.workers"] = extract_parallelism
264
+ dlt.config["extract.max_parallel_items"] = extract_parallelism
256
265
  if schema_naming != SchemaNaming.default:
257
266
  dlt.config["schema.naming"] = schema_naming.value
258
267
 
@@ -413,6 +422,7 @@ def ingest(
413
422
  # remove the pipelines_dir folder if it was created by ingestr
414
423
  if is_pipelines_dir_temp:
415
424
  import shutil
425
+
416
426
  shutil.rmtree(pipelines_dir)
417
427
 
418
428
  print(
@@ -0,0 +1,10 @@
1
+ # ignore secrets, virtual environments and typical python compilation artifacts
2
+ secrets.toml
3
+ # ignore basic python artifacts
4
+ .env
5
+ **/__pycache__/
6
+ **/*.py[cod]
7
+ **/*$py.class
8
+ # ignore duckdb
9
+ *.duckdb
10
+ *.wal
@@ -0,0 +1,69 @@
1
+ """Source that loads tables form Airtable.
2
+ Supports whitelisting of tables or loading of all tables from a specified base.
3
+ """
4
+
5
+ from typing import Any, Dict, Iterable, Iterator, List, Optional
6
+
7
+ import dlt
8
+ import pyairtable
9
+ from dlt.sources import DltResource
10
+
11
+
12
+ @dlt.source
13
+ def airtable_source(
14
+ base_id: str = dlt.config.value,
15
+ table_names: Optional[List[str]] = dlt.config.value,
16
+ access_token: str = dlt.secrets.value,
17
+ ) -> Iterable[DltResource]:
18
+ """
19
+ Represents tables for a single Airtable base.
20
+ Args:
21
+ base_id (str): The id of the base. Obtain it e.g. from the URL in your webbrowser.
22
+ It starts with "app". See https://support.airtable.com/docs/finding-airtable-ids
23
+ table_names (Optional[List[str]]): A list of table IDs or table names to load.
24
+ Unless specified otherwise, all tables in the schema are loaded.
25
+ Names are freely user-defined. IDs start with "tbl". See https://support.airtable.com/docs/finding-airtable-ids
26
+ access_token (str): The personal access token.
27
+ See https://support.airtable.com/docs/creating-and-using-api-keys-and-access-tokens#personal-access-tokens-basic-actions
28
+ """
29
+ api = pyairtable.Api(access_token)
30
+ all_tables_url = api.build_url(f"meta/bases/{base_id}/tables")
31
+ tables = api.request(method="GET", url=all_tables_url).get("tables")
32
+ for t in tables:
33
+ if table_names:
34
+ if t.get("id") in table_names or t.get("name") in table_names:
35
+ yield airtable_resource(api, base_id, t)
36
+ else:
37
+ yield airtable_resource(api, base_id, t)
38
+
39
+
40
+ def airtable_resource(
41
+ api: pyairtable.Api,
42
+ base_id: str,
43
+ table: Dict[str, Any],
44
+ ) -> DltResource:
45
+ """
46
+ Represents a single airtable.
47
+ Args:
48
+ api (pyairtable.Api): The API connection object
49
+ base_id (str): The id of the base. Obtain it e.g. from the URL in your webbrowser.
50
+ It starts with "app". See https://support.airtable.com/docs/finding-airtable-ids
51
+ table (Dict[str, Any]): Metadata about an airtable, does not contain the actual records
52
+ """
53
+ primary_key_id = table["primaryFieldId"]
54
+ primary_key_field = [
55
+ field for field in table["fields"] if field["id"] == primary_key_id
56
+ ][0]
57
+ table_name: str = table["name"]
58
+ primary_key: List[str] = [primary_key_field["name"]]
59
+ air_table = api.table(base_id, table["id"])
60
+
61
+ # Table.iterate() supports rich customization options, such as chunk size, fields, cell format, timezone, locale, and view
62
+ air_table_generator: Iterator[List[Any]] = air_table.iterate()
63
+
64
+ return dlt.resource(
65
+ air_table_generator,
66
+ name=table_name,
67
+ primary_key=primary_key,
68
+ write_disposition="replace",
69
+ )