ingestr 0.7.7__tar.gz → 0.7.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

Files changed (116) hide show
  1. {ingestr-0.7.7 → ingestr-0.7.8}/.gitignore +2 -1
  2. {ingestr-0.7.7 → ingestr-0.7.8}/PKG-INFO +13 -1
  3. {ingestr-0.7.7 → ingestr-0.7.8}/README.md +10 -0
  4. {ingestr-0.7.7 → ingestr-0.7.8}/docs/.vitepress/config.mjs +6 -0
  5. ingestr-0.7.8/docs/supported-sources/facebook-ads.md +51 -0
  6. ingestr-0.7.8/docs/supported-sources/kafka.md +38 -0
  7. ingestr-0.7.8/docs/supported-sources/klaviyo.md +64 -0
  8. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/main.py +9 -0
  9. ingestr-0.7.8/ingestr/src/.gitignore +10 -0
  10. ingestr-0.7.8/ingestr/src/facebook_ads/__init__.py +197 -0
  11. ingestr-0.7.8/ingestr/src/facebook_ads/exceptions.py +5 -0
  12. ingestr-0.7.8/ingestr/src/facebook_ads/helpers.py +255 -0
  13. ingestr-0.7.8/ingestr/src/facebook_ads/settings.py +208 -0
  14. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/factory.py +9 -0
  15. ingestr-0.7.8/ingestr/src/kafka/__init__.py +103 -0
  16. ingestr-0.7.8/ingestr/src/kafka/helpers.py +227 -0
  17. ingestr-0.7.8/ingestr/src/klaviyo/_init_.py +173 -0
  18. ingestr-0.7.8/ingestr/src/klaviyo/client.py +212 -0
  19. ingestr-0.7.8/ingestr/src/klaviyo/helpers.py +19 -0
  20. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/sources.py +141 -0
  21. ingestr-0.7.8/ingestr/src/version.py +1 -0
  22. {ingestr-0.7.7 → ingestr-0.7.8}/pyproject.toml +6 -1
  23. {ingestr-0.7.7 → ingestr-0.7.8}/requirements.txt +2 -0
  24. ingestr-0.7.7/ingestr/src/version.py +0 -1
  25. {ingestr-0.7.7 → ingestr-0.7.8}/.dockerignore +0 -0
  26. {ingestr-0.7.7 → ingestr-0.7.8}/.github/workflows/deploy-docs.yml +0 -0
  27. {ingestr-0.7.7 → ingestr-0.7.8}/.github/workflows/tests.yml +0 -0
  28. {ingestr-0.7.7 → ingestr-0.7.8}/.python-version +0 -0
  29. {ingestr-0.7.7 → ingestr-0.7.8}/Dockerfile +0 -0
  30. {ingestr-0.7.7 → ingestr-0.7.8}/LICENSE.md +0 -0
  31. {ingestr-0.7.7 → ingestr-0.7.8}/Makefile +0 -0
  32. {ingestr-0.7.7 → ingestr-0.7.8}/docs/.vitepress/theme/custom.css +0 -0
  33. {ingestr-0.7.7 → ingestr-0.7.8}/docs/.vitepress/theme/index.js +0 -0
  34. {ingestr-0.7.7 → ingestr-0.7.8}/docs/commands/example-uris.md +0 -0
  35. {ingestr-0.7.7 → ingestr-0.7.8}/docs/commands/ingest.md +0 -0
  36. {ingestr-0.7.7 → ingestr-0.7.8}/docs/getting-started/core-concepts.md +0 -0
  37. {ingestr-0.7.7 → ingestr-0.7.8}/docs/getting-started/incremental-loading.md +0 -0
  38. {ingestr-0.7.7 → ingestr-0.7.8}/docs/getting-started/quickstart.md +0 -0
  39. {ingestr-0.7.7 → ingestr-0.7.8}/docs/getting-started/telemetry.md +0 -0
  40. {ingestr-0.7.7 → ingestr-0.7.8}/docs/index.md +0 -0
  41. {ingestr-0.7.7 → ingestr-0.7.8}/docs/supported-sources/airtable.md +0 -0
  42. {ingestr-0.7.7 → ingestr-0.7.8}/docs/supported-sources/bigquery.md +0 -0
  43. {ingestr-0.7.7 → ingestr-0.7.8}/docs/supported-sources/chess.md +0 -0
  44. {ingestr-0.7.7 → ingestr-0.7.8}/docs/supported-sources/csv.md +0 -0
  45. {ingestr-0.7.7 → ingestr-0.7.8}/docs/supported-sources/databricks.md +0 -0
  46. {ingestr-0.7.7 → ingestr-0.7.8}/docs/supported-sources/duckdb.md +0 -0
  47. {ingestr-0.7.7 → ingestr-0.7.8}/docs/supported-sources/gorgias.md +0 -0
  48. {ingestr-0.7.7 → ingestr-0.7.8}/docs/supported-sources/gsheets.md +0 -0
  49. {ingestr-0.7.7 → ingestr-0.7.8}/docs/supported-sources/hubspot.md +0 -0
  50. {ingestr-0.7.7 → ingestr-0.7.8}/docs/supported-sources/mongodb.md +0 -0
  51. {ingestr-0.7.7 → ingestr-0.7.8}/docs/supported-sources/mssql.md +0 -0
  52. {ingestr-0.7.7 → ingestr-0.7.8}/docs/supported-sources/mysql.md +0 -0
  53. {ingestr-0.7.7 → ingestr-0.7.8}/docs/supported-sources/notion.md +0 -0
  54. {ingestr-0.7.7 → ingestr-0.7.8}/docs/supported-sources/oracle.md +0 -0
  55. {ingestr-0.7.7 → ingestr-0.7.8}/docs/supported-sources/postgres.md +0 -0
  56. {ingestr-0.7.7 → ingestr-0.7.8}/docs/supported-sources/redshift.md +0 -0
  57. {ingestr-0.7.7 → ingestr-0.7.8}/docs/supported-sources/sap-hana.md +0 -0
  58. {ingestr-0.7.7 → ingestr-0.7.8}/docs/supported-sources/shopify.md +0 -0
  59. {ingestr-0.7.7 → ingestr-0.7.8}/docs/supported-sources/slack.md +0 -0
  60. {ingestr-0.7.7 → ingestr-0.7.8}/docs/supported-sources/snowflake.md +0 -0
  61. {ingestr-0.7.7 → ingestr-0.7.8}/docs/supported-sources/sqlite.md +0 -0
  62. {ingestr-0.7.7 → ingestr-0.7.8}/docs/supported-sources/stripe.md +0 -0
  63. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/airtable/__init__.py +0 -0
  64. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/chess/__init__.py +0 -0
  65. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/chess/helpers.py +0 -0
  66. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/chess/settings.py +0 -0
  67. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/destinations.py +0 -0
  68. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/google_sheets/README.md +0 -0
  69. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/google_sheets/__init__.py +0 -0
  70. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
  71. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
  72. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
  73. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/gorgias/__init__.py +0 -0
  74. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/gorgias/helpers.py +0 -0
  75. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/hubspot/__init__.py +0 -0
  76. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/hubspot/helpers.py +0 -0
  77. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/hubspot/settings.py +0 -0
  78. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/mongodb/__init__.py +0 -0
  79. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/mongodb/helpers.py +0 -0
  80. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/notion/__init__.py +0 -0
  81. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/notion/helpers/__init__.py +0 -0
  82. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/notion/helpers/client.py +0 -0
  83. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/notion/helpers/database.py +0 -0
  84. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/notion/settings.py +0 -0
  85. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/shopify/__init__.py +0 -0
  86. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/shopify/exceptions.py +0 -0
  87. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/shopify/helpers.py +0 -0
  88. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/shopify/settings.py +0 -0
  89. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/slack/__init__.py +0 -0
  90. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/slack/helpers.py +0 -0
  91. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/slack/settings.py +0 -0
  92. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/sql_database/__init__.py +0 -0
  93. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/sql_database/arrow_helpers.py +0 -0
  94. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/sql_database/helpers.py +0 -0
  95. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/sql_database/override.py +0 -0
  96. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/sql_database/schema_types.py +0 -0
  97. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/stripe_analytics/__init__.py +0 -0
  98. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/stripe_analytics/helpers.py +0 -0
  99. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/stripe_analytics/settings.py +0 -0
  100. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/table_definition.py +0 -0
  101. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/telemetry/event.py +0 -0
  102. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/src/testdata/fakebqcredentials.json +0 -0
  103. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/testdata/.gitignore +0 -0
  104. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/testdata/create_replace.csv +0 -0
  105. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/testdata/delete_insert_expected.csv +0 -0
  106. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/testdata/delete_insert_part1.csv +0 -0
  107. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/testdata/delete_insert_part2.csv +0 -0
  108. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/testdata/merge_expected.csv +0 -0
  109. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/testdata/merge_part1.csv +0 -0
  110. {ingestr-0.7.7 → ingestr-0.7.8}/ingestr/testdata/merge_part2.csv +0 -0
  111. {ingestr-0.7.7 → ingestr-0.7.8}/package-lock.json +0 -0
  112. {ingestr-0.7.7 → ingestr-0.7.8}/package.json +0 -0
  113. {ingestr-0.7.7 → ingestr-0.7.8}/requirements-dev.txt +0 -0
  114. {ingestr-0.7.7 → ingestr-0.7.8}/resources/demo.gif +0 -0
  115. {ingestr-0.7.7 → ingestr-0.7.8}/resources/demo.tape +0 -0
  116. {ingestr-0.7.7 → ingestr-0.7.8}/resources/ingestr.svg +0 -0
@@ -13,4 +13,5 @@ pipeline_data
13
13
  dist
14
14
  docs/.vitepress/dist
15
15
  docs/.vitepress/cache
16
- node_modules
16
+ node_modules
17
+ *.duckdb
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ingestr
3
- Version: 0.7.7
3
+ Version: 0.7.8
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -14,11 +14,13 @@ Classifier: Operating System :: OS Independent
14
14
  Classifier: Programming Language :: Python :: 3
15
15
  Classifier: Topic :: Database
16
16
  Requires-Python: >=3.9
17
+ Requires-Dist: confluent-kafka>=2.3.0
17
18
  Requires-Dist: cx-oracle==8.3.0
18
19
  Requires-Dist: databricks-sql-connector==2.9.3
19
20
  Requires-Dist: dlt==0.5.1
20
21
  Requires-Dist: duckdb-engine==0.11.5
21
22
  Requires-Dist: duckdb==0.10.2
23
+ Requires-Dist: facebook-business==20.0.0
22
24
  Requires-Dist: google-api-python-client==2.130.0
23
25
  Requires-Dist: google-cloud-bigquery-storage==2.24.0
24
26
  Requires-Dist: mysql-connector-python==9.0.0
@@ -185,6 +187,11 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
185
187
  <td>Chess.com</td>
186
188
  <td>✅</td>
187
189
  <td>-</td>
190
+ </tr>
191
+ <tr>
192
+ <td>Facebook Ads</td>
193
+ <td>✅</td>
194
+ <td>-</td>
188
195
  </tr>
189
196
  <tr>
190
197
  <td>Gorgias</td>
@@ -200,6 +207,11 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
200
207
  <td>HubSpot</td>
201
208
  <td>✅</td>
202
209
  <td>-</td>
210
+ </tr>
211
+ <tr>
212
+ <td>Klaviyo</td>
213
+ <td>✅</td>
214
+ <td>-</td>
203
215
  </tr>
204
216
  <tr>
205
217
  <td>Notion</td>
@@ -139,6 +139,11 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
139
139
  <td>Chess.com</td>
140
140
  <td>✅</td>
141
141
  <td>-</td>
142
+ </tr>
143
+ <tr>
144
+ <td>Facebook Ads</td>
145
+ <td>✅</td>
146
+ <td>-</td>
142
147
  </tr>
143
148
  <tr>
144
149
  <td>Gorgias</td>
@@ -154,6 +159,11 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
154
159
  <td>HubSpot</td>
155
160
  <td>✅</td>
156
161
  <td>-</td>
162
+ </tr>
163
+ <tr>
164
+ <td>Klaviyo</td>
165
+ <td>✅</td>
166
+ <td>-</td>
157
167
  </tr>
158
168
  <tr>
159
169
  <td>Notion</td>
@@ -64,6 +64,7 @@ export default defineConfig({
64
64
  text: "Google BigQuery",
65
65
  link: "/supported-sources/bigquery.md",
66
66
  },
67
+ { text: "Kafka", link: "/supported-sources/kafka.md" },
67
68
  { text: "Local CSV Files", link: "/supported-sources/csv.md" },
68
69
  {
69
70
  text: "Microsoft SQL Server",
@@ -85,9 +86,14 @@ export default defineConfig({
85
86
  items: [
86
87
  { text: "Airtable", link: "/supported-sources/airtable.md" },
87
88
  { text: "Chess.com", link: "/supported-sources/chess.md" },
89
+ {
90
+ text: "Facebook Ads",
91
+ link: "/supported-sources/facebook-ads.md",
92
+ },
88
93
  { text: "Google Sheets", link: "/supported-sources/gsheets.md" },
89
94
  { text: "Gorgias", link: "/supported-sources/gorgias.md" },
90
95
  { text: "HubSpot", link: "/supported-sources/hubspot.md" },
96
+ { text: "Klaviyo", link: "/supported-sources/klaviyo.md" },
91
97
  { text: "Notion", link: "/supported-sources/notion.md" },
92
98
  { text: "Shopify", link: "/supported-sources/shopify.md" },
93
99
  { text: "Slack", link: "/supported-sources/slack.md" },
@@ -0,0 +1,51 @@
1
+ # Facebook Ads
2
+
3
+ Facebook Ads is the advertising platform that helps users to create targeted ads on Facebook, Instagram and Messenger.
4
+
5
+ ingestr supports Facebook Ads as a source.
6
+
7
+ ## URI Format
8
+
9
+ The URI format for Facebook Ads is as follows:
10
+
11
+ ```plaintext
12
+ facebookads://?access_token=<access_token>&account_id=<account_id>
13
+ ```
14
+
15
+ URI parameters:
16
+
17
+ - `access_token` is associated with Business Facebook App.
18
+ - `account_id` is associated with Ad manager.
19
+
20
+ Both are used for authentication with Facebook Ads API.
21
+
22
+ The URI is used to connect to Facebook Ads API for extracting data.
23
+
24
+ ## Setting up a Facebook Ads Integration
25
+
26
+ Facebook Ads requires a few steps to set up an integration, please follow the guide dltHub [has built here](https://dlthub.com/docs/dlt-ecosystem/verified-sources/facebook_ads#setup-guide).
27
+
28
+ Once you complete the guide, you should have an Access_Token and Account ID . Let's say your access_token is `abcdef` and account_id is `1234` , here's a sample command that will copy the data from Facebook Ads into a duckdb database:
29
+
30
+ ```sh
31
+ ingestr ingest \
32
+ --source-uri 'facebookads://?access_token=easdyh&account_id=1234' \
33
+ --source-table 'campaigns' \
34
+ --dest-uri 'duckdb:///facebook.duckdb' \
35
+ --dest-table 'dest.campaigns'
36
+ ```
37
+
38
+ The result of this command will be a table in the `facebook.duckdb` database.
39
+
40
+ ## Available Tables
41
+
42
+ Facebook Ads source allows ingesting the following sources into separate tables:
43
+
44
+ - `campaigns`: Retrieves all DEFAULT_CAMPAIGN_FIELDS.
45
+ - `ad_sets`: Retrieves all DEFAULT_ADSET_FIELDS.
46
+ - `leads`: Retrieves all DEFAULT_LEAD_FIELDS.
47
+ - `ads_creatives`: Retrieves all DEFAULT_ADCREATIVE_FIELDS.
48
+ - `ads`: Retrieves all DEFAULT_ADS_FIELDS.
49
+ - `facebook_insights`: Retrieves all DEFAULT_INSIGHTS_FIELDS.
50
+
51
+ Use these as `--source-table` parameter in the `ingestr ingest` command.
@@ -0,0 +1,38 @@
1
+ # Apache Kafka
2
+ [Apache Kafka](https://kafka.apache.org/) is a distributed event streaming platform used by thousands of companies for high-performance data pipelines, streaming analytics, data integration, and mission-critical applications.
3
+
4
+ ingestr supports Apache Kafka as a source.
5
+
6
+ ## URI Format
7
+ The URI format for Apache Kafka is as follows:
8
+
9
+ ```plaintext
10
+ kafka://?bootstrap_servers=localhost:9092&group_id=test_group&security_protocol=SASL_SSL&sasl_mechanisms=PLAIN&sasl_username=example_username&sasl_password=example_secret&batch_size=1000&batch_timeout=3
11
+ ```
12
+
13
+ URI parameters:
14
+ - `bootstrap_servers`: The Kafka server(s) to connect to, typically in the form of a host and port (e.g., `localhost:9092`).
15
+ - `group_id`: The consumer group ID used for identifying the client when consuming messages.
16
+ - `security_protocol`: The protocol used to communicate with brokers (e.g., `SASL_SSL` for secure communication).
17
+ - `sasl_mechanisms`: The SASL mechanism to be used for authentication (e.g., `PLAIN`).
18
+ - `sasl_username`: The username for SASL authentication.
19
+ - `sasl_password`: The password for SASL authentication.
20
+ - `batch_size`: The number of messages to fetch in a single batch, defaults to 3000.
21
+ - `batch_timeout`: The maximum time to wait for messages, defaults to 3 seconds.
22
+
23
+ The URI is used to connect to the Kafka brokers for ingesting messages.
24
+
25
+ ### Group ID
26
+ The group ID is used to identify the consumer group that reads messages from a topic. Kafka uses the group ID to manage consumer offsets and assign partitions to consumers, which means that the group ID is the key to reading messages from the correct partition and position in the topic.
27
+
28
+ Once you have your Kafka server, credentials, and group ID set up, here's a sample command to ingest messages from a Kafka topic into a duckdb database:
29
+
30
+ ```sh
31
+ ingestr ingest \
32
+ --source-uri 'kafka://?bootstrap_servers=localhost:9092' \
33
+ --source-table 'my-topic' \
34
+ --dest-uri duckdb:///kafka.duckdb \
35
+ --dest-table 'kafka.my_topic'
36
+ ```
37
+
38
+ The result of this command will be a table in the `kafka.duckdb` database with JSON columns.
@@ -0,0 +1,64 @@
1
+ # Klaviyo
2
+
3
+ [Klaviyo](https://www.klaviyo.com/) is a marketing automation platform that helps businesses build and manage smarter digital relationships with their customers by connecting through personalized email and enhancing customer loyality.
4
+
5
+ ingestr supports Klaviyo as a source.
6
+
7
+ ## URI Format
8
+
9
+ The URI format for Klaviyo is as follows:
10
+
11
+ ```plaintext
12
+ klaviyo://?api_key=<api-key>
13
+ ```
14
+
15
+ URI parameters:
16
+
17
+ - `api_key`: The API key used for authentication with the Klaviyo API.
18
+
19
+ The URI is used to connect to the Klaviyo API for extracting data.
20
+
21
+ ```bash
22
+ ingestr ingest --source-table 'events' --source-uri 'klaviyo://?api_key=pk_test' --dest-uri duckdb:///klaviyo.duckdb --interval-start 2022-01-01 --dest-table 'klaviyo.events' --extract-parallelism 20
23
+ ```
24
+
25
+ This command fethes all the events that are created/updated since 2022-01-01 and writes them to `klaviyo.events` table on DuckDB, using 20 parallel threads to improve performance and efficiently handle large data .
26
+
27
+ ## Available Tables
28
+
29
+ Klaviyo source allows ingesting the following sources into separate tables:
30
+
31
+ [events](https://developers.klaviyo.com/en/reference/events_api_overview): Retrieves all events in an account where each event represents an action taken by a profile such as a password reset or a product order.
32
+
33
+ [profiles](https://developers.klaviyo.com/en/reference/profiles_api_overview): Retrieves all profiles in an account where each profile includes details like organization, job title, email and other attributes.
34
+
35
+ [campaigns](https://developers.klaviyo.com/en/reference/campaigns_api_overview): Retrieves all campaigns in an account where each campaign is a targeted message sent to a specific audience.
36
+
37
+ [metrics](https://developers.klaviyo.com/en/reference/metrics_api_overview): Retrieves all metrics in an account where each metric represents a category of events or actions a person can take.
38
+
39
+ [tags](https://developers.klaviyo.com/en/reference/get_tags): Retrieves all tags in an account.
40
+
41
+ [coupons](https://developers.klaviyo.com/en/reference/get_coupons): Retrieves all coupons in an account.
42
+
43
+ [catalog-variants](https://developers.klaviyo.com/en/reference/get_catalog_variants): Retrieves all variants in an account.
44
+
45
+ [catalog-categories](https://developers.klaviyo.com/en/reference/get_catalog_categories): Retrieves all catalog categories in an account.
46
+
47
+ [catalog-items](https://developers.klaviyo.com/en/reference/get_catalog_items): Retrieves all catalog items in an account.
48
+
49
+ [flows](https://developers.klaviyo.com/en/reference/get_flows): Retrieves all flows in an account where flow is a sequence of automated actions that is triggered when a person performs a specific action.
50
+
51
+ [lists](https://developers.klaviyo.com/en/reference/get_lists): Retrieves all lists in an account.
52
+
53
+ [images](https://developers.klaviyo.com/en/reference/get_images): Retrieves all images in an account..
54
+
55
+ [segments](https://developers.klaviyo.com/en/reference/get_segments): Retrieves all segments in an account where segment is a dynamic list that contains profiles meeting a certain set of conditions.
56
+
57
+ [forms](https://developers.klaviyo.com/en/reference/get_forms): Retrieves all forms in an account.
58
+
59
+ [templates](https://developers.klaviyo.com/en/reference/get_templates): Retrieves all templates in an account.
60
+
61
+ Use these as `--source-table` parameter in the `ingestr ingest` command.
62
+
63
+ > [!WARNING]
64
+ > Klaviyo does not support incremental loading for many endpoints in its APIs, which means ingestr will load endpoints incrementally if they support it, and do a full-refresh if not.
@@ -244,6 +244,13 @@ def ingest(
244
244
  envvar="PIPELINES_DIR",
245
245
  ),
246
246
  ] = None, # type: ignore
247
+ extract_parallelism: Annotated[
248
+ Optional[int],
249
+ typer.Option(
250
+ help="The number of parallel jobs to run for extracting data from the source, only applicable for certain sources",
251
+ envvar="EXTRACT_PARALLELISM",
252
+ ),
253
+ ] = 5, # type: ignore
247
254
  ):
248
255
  track(
249
256
  "command_triggered",
@@ -253,6 +260,8 @@ def ingest(
253
260
  )
254
261
 
255
262
  dlt.config["data_writer.file_max_items"] = loader_file_size
263
+ dlt.config["extract.workers"] = extract_parallelism
264
+ dlt.config["extract.max_parallel_items"] = extract_parallelism
256
265
  if schema_naming != SchemaNaming.default:
257
266
  dlt.config["schema.naming"] = schema_naming.value
258
267
 
@@ -0,0 +1,10 @@
1
+ # ignore secrets, virtual environments and typical python compilation artifacts
2
+ secrets.toml
3
+ # ignore basic python artifacts
4
+ .env
5
+ **/__pycache__/
6
+ **/*.py[cod]
7
+ **/*$py.class
8
+ # ignore duckdb
9
+ *.duckdb
10
+ *.wal
@@ -0,0 +1,197 @@
1
+ """Loads campaigns, ads sets, ads, leads and insight data from Facebook Marketing API"""
2
+
3
+ from typing import Iterator, Sequence
4
+
5
+ import dlt
6
+ from dlt.common import pendulum
7
+ from dlt.common.typing import TDataItems
8
+ from dlt.sources import DltResource
9
+ from facebook_business.adobjects.ad import Ad
10
+
11
+ from .helpers import (
12
+ execute_job,
13
+ get_ads_account,
14
+ get_data_chunked,
15
+ get_start_date,
16
+ process_report_item,
17
+ )
18
+ from .settings import (
19
+ ALL_ACTION_ATTRIBUTION_WINDOWS,
20
+ ALL_ACTION_BREAKDOWNS,
21
+ DEFAULT_AD_FIELDS,
22
+ DEFAULT_ADCREATIVE_FIELDS,
23
+ DEFAULT_ADSET_FIELDS,
24
+ DEFAULT_CAMPAIGN_FIELDS,
25
+ DEFAULT_INSIGHT_FIELDS,
26
+ DEFAULT_LEAD_FIELDS,
27
+ INSIGHT_FIELDS_TYPES,
28
+ INSIGHTS_BREAKDOWNS_OPTIONS,
29
+ INSIGHTS_PRIMARY_KEY,
30
+ INVALID_INSIGHTS_FIELDS,
31
+ TInsightsBreakdownOptions,
32
+ TInsightsLevels,
33
+ )
34
+
35
+
36
+ @dlt.source(name="facebook_ads", max_table_nesting=0)
37
+ def facebook_ads_source(
38
+ account_id: str = dlt.config.value,
39
+ access_token: str = dlt.secrets.value,
40
+ chunk_size: int = 50,
41
+ request_timeout: float = 300.0,
42
+ app_api_version: str = "v20.0",
43
+ ) -> Sequence[DltResource]:
44
+ """Returns a list of resources to load campaigns, ad sets, ads, creatives and ad leads data from Facebook Marketing API.
45
+
46
+ All the resources have `replace` write disposition by default and define primary keys. Resources are parametrized and allow the user
47
+ to change the set of fields that will be loaded from the API and the object statuses that will be loaded. See the demonstration script for details.
48
+
49
+ You can convert the source into merge resource to keep the deleted objects. Currently Marketing API does not return deleted objects. See the demo script.
50
+
51
+ We also provide a transformation `enrich_ad_objects` that you can add to any of the resources to get additional data per object via `object.get_api`
52
+
53
+ Args:
54
+ account_id (str, optional): Account id associated with add manager. See README.md
55
+ access_token (str, optional): Access token associated with the Business Facebook App. See README.md
56
+ chunk_size (int, optional): A size of the page and batch request. You may need to decrease it if you request a lot of fields. Defaults to 50.
57
+ request_timeout (float, optional): Connection timeout. Defaults to 300.0.
58
+ app_api_version(str, optional): A version of the facebook api required by the app for which the access tokens were issued ie. 'v17.0'. Defaults to the facebook_business library default version
59
+
60
+ Returns:
61
+ Sequence[DltResource]: campaigns, ads, ad_sets, ad_creatives, leads
62
+ """
63
+ account = get_ads_account(
64
+ account_id, access_token, request_timeout, app_api_version
65
+ )
66
+
67
+ @dlt.resource(primary_key="id", write_disposition="replace")
68
+ def campaigns(
69
+ fields: Sequence[str] = DEFAULT_CAMPAIGN_FIELDS, states: Sequence[str] = None
70
+ ) -> Iterator[TDataItems]:
71
+ yield get_data_chunked(account.get_campaigns, fields, states, chunk_size)
72
+
73
+ @dlt.resource(primary_key="id", write_disposition="replace")
74
+ def ads(
75
+ fields: Sequence[str] = DEFAULT_AD_FIELDS, states: Sequence[str] = None
76
+ ) -> Iterator[TDataItems]:
77
+ yield get_data_chunked(account.get_ads, fields, states, chunk_size)
78
+
79
+ @dlt.resource(primary_key="id", write_disposition="replace")
80
+ def ad_sets(
81
+ fields: Sequence[str] = DEFAULT_ADSET_FIELDS, states: Sequence[str] = None
82
+ ) -> Iterator[TDataItems]:
83
+ yield get_data_chunked(account.get_ad_sets, fields, states, chunk_size)
84
+
85
+ @dlt.transformer(primary_key="id", write_disposition="replace", selected=True)
86
+ def leads(
87
+ items: TDataItems,
88
+ fields: Sequence[str] = DEFAULT_LEAD_FIELDS,
89
+ states: Sequence[str] = None,
90
+ ) -> Iterator[TDataItems]:
91
+ for item in items:
92
+ ad = Ad(item["id"])
93
+ yield get_data_chunked(ad.get_leads, fields, states, chunk_size)
94
+
95
+ @dlt.resource(primary_key="id", write_disposition="replace")
96
+ def ad_creatives(
97
+ fields: Sequence[str] = DEFAULT_ADCREATIVE_FIELDS, states: Sequence[str] = None
98
+ ) -> Iterator[TDataItems]:
99
+ yield get_data_chunked(account.get_ad_creatives, fields, states, chunk_size)
100
+
101
+ return campaigns, ads, ad_sets, ad_creatives, ads | leads
102
+
103
+
104
+ @dlt.source(name="facebook_ads", max_table_nesting=0)
105
+ def facebook_insights_source(
106
+ account_id: str = dlt.config.value,
107
+ access_token: str = dlt.secrets.value,
108
+ initial_load_past_days: int = 1,
109
+ fields: Sequence[str] = DEFAULT_INSIGHT_FIELDS,
110
+ attribution_window_days_lag: int = 7,
111
+ time_increment_days: int = 1,
112
+ breakdowns: TInsightsBreakdownOptions = "ads_insights",
113
+ action_breakdowns: Sequence[str] = ALL_ACTION_BREAKDOWNS,
114
+ level: TInsightsLevels = "ad",
115
+ action_attribution_windows: Sequence[str] = ALL_ACTION_ATTRIBUTION_WINDOWS,
116
+ batch_size: int = 50,
117
+ request_timeout: int = 300,
118
+ app_api_version: str = None,
119
+ ) -> DltResource:
120
+ """Incrementally loads insight reports with defined granularity level, fields, breakdowns etc.
121
+
122
+ By default, the reports are generated one by one for each day, starting with today - attribution_window_days_lag. On subsequent runs, only the reports
123
+ from the last report date until today are loaded (incremental load). The reports from last 7 days (`attribution_window_days_lag`) are refreshed on each load to
124
+ account for changes during attribution window.
125
+
126
+ Mind that each report is a job and takes some time to execute.
127
+
128
+ Args:
129
+ account_id: str = dlt.config.value,
130
+ access_token: str = dlt.secrets.value,
131
+ initial_load_past_days (int, optional): How many past days (starting from today) to intially load. Defaults to 30.
132
+ fields (Sequence[str], optional): A list of fields to include in each reports. Note that `breakdowns` option adds fields automatically. Defaults to DEFAULT_INSIGHT_FIELDS.
133
+ attribution_window_days_lag (int, optional): Attribution window in days. The reports in attribution window are refreshed on each run.. Defaults to 7.
134
+ time_increment_days (int, optional): The report aggregation window in days. use 7 for weekly aggregation. Defaults to 1.
135
+ breakdowns (TInsightsBreakdownOptions, optional): A presents with common aggregations. See settings.py for details. Defaults to "ads_insights_age_and_gender".
136
+ action_breakdowns (Sequence[str], optional): Action aggregation types. See settings.py for details. Defaults to ALL_ACTION_BREAKDOWNS.
137
+ level (TInsightsLevels, optional): The granularity level. Defaults to "ad".
138
+ action_attribution_windows (Sequence[str], optional): Attribution windows for actions. Defaults to ALL_ACTION_ATTRIBUTION_WINDOWS.
139
+ batch_size (int, optional): Page size when reading data from particular report. Defaults to 50.
140
+ request_timeout (int, optional): Connection timeout. Defaults to 300.
141
+ app_api_version(str, optional): A version of the facebook api required by the app for which the access tokens were issued ie. 'v17.0'. Defaults to the facebook_business library default version
142
+
143
+ Returns:
144
+ DltResource: facebook_insights
145
+
146
+ """
147
+ account = get_ads_account(
148
+ account_id, access_token, request_timeout, app_api_version
149
+ )
150
+
151
+ # we load with a defined lag
152
+ initial_load_start_date = pendulum.today().subtract(days=initial_load_past_days)
153
+ initial_load_start_date_str = initial_load_start_date.isoformat()
154
+
155
+ @dlt.resource(
156
+ primary_key=INSIGHTS_PRIMARY_KEY,
157
+ write_disposition="merge",
158
+ columns=INSIGHT_FIELDS_TYPES,
159
+ )
160
+ def facebook_insights(
161
+ date_start: dlt.sources.incremental[str] = dlt.sources.incremental(
162
+ "date_start", initial_value=initial_load_start_date_str
163
+ ),
164
+ ) -> Iterator[TDataItems]:
165
+ start_date = get_start_date(date_start, attribution_window_days_lag)
166
+ end_date = pendulum.now()
167
+
168
+ # fetch insights in incremental day steps
169
+ while start_date <= end_date:
170
+ query = {
171
+ "level": level,
172
+ "action_breakdowns": list(action_breakdowns),
173
+ "breakdowns": list(
174
+ INSIGHTS_BREAKDOWNS_OPTIONS[breakdowns]["breakdowns"]
175
+ ),
176
+ "limit": batch_size,
177
+ "fields": list(
178
+ set(fields)
179
+ .union(INSIGHTS_BREAKDOWNS_OPTIONS[breakdowns]["fields"])
180
+ .difference(INVALID_INSIGHTS_FIELDS)
181
+ ),
182
+ "time_increment": time_increment_days,
183
+ "action_attribution_windows": list(action_attribution_windows),
184
+ "time_ranges": [
185
+ {
186
+ "since": start_date.to_date_string(),
187
+ "until": start_date.add(
188
+ days=time_increment_days - 1
189
+ ).to_date_string(),
190
+ }
191
+ ],
192
+ }
193
+ job = execute_job(account.get_insights(params=query, is_async=True))
194
+ yield list(map(process_report_item, job.get_result()))
195
+ start_date = start_date.add(days=time_increment_days)
196
+
197
+ return facebook_insights
@@ -0,0 +1,5 @@
1
+ from dlt.extract.exceptions import DltResourceException
2
+
3
+
4
+ class InsightsJobTimeout(DltResourceException):
5
+ pass