ingestr 0.12.1__tar.gz → 0.12.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- {ingestr-0.12.1 → ingestr-0.12.3}/PKG-INFO +1 -1
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/.vitepress/config.mjs +7 -0
- ingestr-0.12.3/docs/media/tiktok.png +0 -0
- ingestr-0.12.3/docs/supported-sources/custom_queries.md +50 -0
- ingestr-0.12.3/docs/supported-sources/tiktok-ads.md +58 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/factory.py +2 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/sources.py +195 -28
- ingestr-0.12.3/ingestr/src/tiktok_ads/__init__.py +139 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/tiktok_ads/tiktok_helpers.py +32 -13
- ingestr-0.12.3/ingestr/src/version.py +1 -0
- ingestr-0.12.1/ingestr/src/tiktok_ads/__init__.py +0 -106
- ingestr-0.12.1/ingestr/src/version.py +0 -1
- {ingestr-0.12.1 → ingestr-0.12.3}/.dockerignore +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/.githooks/pre-commit-hook.sh +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/.github/workflows/deploy-docs.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/.github/workflows/secrets-scan.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/.github/workflows/tests.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/.gitignore +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/.gitleaksignore +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/.python-version +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/.vale.ini +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/Dockerfile +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/LICENSE.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/Makefile +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/README.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/.vitepress/theme/custom.css +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/.vitepress/theme/index.js +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/commands/example-uris.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/commands/ingest.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/getting-started/core-concepts.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/getting-started/incremental-loading.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/getting-started/quickstart.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/getting-started/telemetry.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/index.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/media/athena.png +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/supported-sources/adjust.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/supported-sources/airtable.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/supported-sources/appsflyer.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/supported-sources/asana.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/supported-sources/athena.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/supported-sources/bigquery.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/supported-sources/chess.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/supported-sources/csv.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/supported-sources/databricks.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/supported-sources/duckdb.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/supported-sources/dynamodb.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/supported-sources/facebook-ads.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/supported-sources/gorgias.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/supported-sources/gsheets.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/supported-sources/hubspot.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/supported-sources/kafka.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/supported-sources/klaviyo.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/supported-sources/mongodb.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/supported-sources/mssql.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/supported-sources/mysql.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/supported-sources/notion.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/supported-sources/oracle.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/supported-sources/postgres.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/supported-sources/redshift.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/supported-sources/s3.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/supported-sources/sap-hana.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/supported-sources/shopify.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/supported-sources/slack.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/supported-sources/snowflake.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/supported-sources/sqlite.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/supported-sources/stripe.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/docs/supported-sources/zendesk.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/main.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/.gitignore +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/adjust/__init__.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/adjust/adjust_helpers.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/airtable/__init__.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/appsflyer/_init_.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/appsflyer/client.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/arrow/__init__.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/asana_source/__init__.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/asana_source/helpers.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/asana_source/settings.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/chess/__init__.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/chess/helpers.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/chess/settings.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/destinations.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/dynamodb/__init__.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/facebook_ads/__init__.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/facebook_ads/exceptions.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/facebook_ads/helpers.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/facebook_ads/settings.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/filesystem/__init__.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/filesystem/helpers.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/filesystem/readers.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/filters.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/google_sheets/README.md +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/google_sheets/__init__.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/gorgias/__init__.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/gorgias/helpers.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/hubspot/__init__.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/hubspot/helpers.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/hubspot/settings.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/kafka/__init__.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/kafka/helpers.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/klaviyo/_init_.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/klaviyo/client.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/klaviyo/helpers.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/mongodb/__init__.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/mongodb/helpers.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/notion/__init__.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/notion/helpers/__init__.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/notion/helpers/client.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/notion/helpers/database.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/notion/settings.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/shopify/__init__.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/shopify/exceptions.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/shopify/helpers.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/shopify/settings.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/slack/__init__.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/slack/helpers.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/slack/settings.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/stripe_analytics/__init__.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/stripe_analytics/helpers.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/stripe_analytics/settings.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/table_definition.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/telemetry/event.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/testdata/fakebqcredentials.json +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/time.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/zendesk/__init__.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/zendesk/helpers/__init__.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/zendesk/helpers/api_helpers.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/zendesk/helpers/credentials.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/zendesk/helpers/talk_api.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/src/zendesk/settings.py +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/testdata/.gitignore +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/testdata/create_replace.csv +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/testdata/delete_insert_expected.csv +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/testdata/delete_insert_part1.csv +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/testdata/delete_insert_part2.csv +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/testdata/merge_expected.csv +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/testdata/merge_part1.csv +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/ingestr/testdata/merge_part2.csv +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/package-lock.json +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/package.json +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/pyproject.toml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/requirements-dev.txt +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/requirements.txt +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/resources/demo.gif +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/resources/demo.tape +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/resources/ingestr.svg +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/Google/AMPM.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/Google/Acronyms.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/Google/Colons.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/Google/Contractions.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/Google/DateFormat.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/Google/Ellipses.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/Google/EmDash.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/Google/Exclamation.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/Google/FirstPerson.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/Google/Gender.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/Google/GenderBias.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/Google/HeadingPunctuation.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/Google/Headings.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/Google/Latin.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/Google/LyHyphens.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/Google/OptionalPlurals.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/Google/Ordinal.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/Google/OxfordComma.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/Google/Parens.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/Google/Passive.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/Google/Periods.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/Google/Quotes.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/Google/Ranges.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/Google/Semicolons.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/Google/Slang.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/Google/Spacing.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/Google/Spelling.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/Google/Units.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/Google/We.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/Google/Will.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/Google/WordList.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/Google/meta.json +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/Google/vocab.txt +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/bruin/Ingestr.yml +0 -0
- {ingestr-0.12.1 → ingestr-0.12.3}/styles/config/vocabularies/bruin/accept.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.12.
|
|
3
|
+
Version: 0.12.3
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -78,6 +78,12 @@ export default defineConfig({
|
|
|
78
78
|
{ text: "SAP Hana", link: "/supported-sources/sap-hana.md" },
|
|
79
79
|
{ text: "Snowflake", link: "/supported-sources/snowflake.md" },
|
|
80
80
|
{ text: "SQLite", link: "/supported-sources/sqlite.md" },
|
|
81
|
+
{
|
|
82
|
+
text: "Experimental",
|
|
83
|
+
items: [
|
|
84
|
+
{ text: "Custom Queries", link: "/supported-sources/custom_queries.md" },
|
|
85
|
+
],
|
|
86
|
+
},
|
|
81
87
|
],
|
|
82
88
|
},
|
|
83
89
|
|
|
@@ -104,6 +110,7 @@ export default defineConfig({
|
|
|
104
110
|
{ text: "Shopify", link: "/supported-sources/shopify.md" },
|
|
105
111
|
{ text: "Slack", link: "/supported-sources/slack.md" },
|
|
106
112
|
{ text: "Stripe", link: "/supported-sources/stripe.md" },
|
|
113
|
+
{ text: "TikTok Ads", link: "/supported-sources/tiktok-ads.md" },
|
|
107
114
|
{ text: "Zendesk", link: "/supported-sources/zendesk.md" },
|
|
108
115
|
],
|
|
109
116
|
},
|
|
Binary file
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# Custom Queries for SQL Sources
|
|
2
|
+
|
|
3
|
+
ingestr has primarily supported table replication for SQL sources due to that being a common use case. However, there are certain scenarios where loading a table only is not possible:
|
|
4
|
+
- you might want to load a subset of rows from a table
|
|
5
|
+
- you might want to load a table that has a complex query that cannot be expressed as a simple table
|
|
6
|
+
- you could technically create a view in the database, but sometimes you don't have access/permissions to do so.
|
|
7
|
+
- you might want to do incremental loads but the table you want to load does not have an incremental key, so it needs to be joined with another table that does.
|
|
8
|
+
|
|
9
|
+
In order to support these scenarios, ingestr has added experimental support for custom queries.
|
|
10
|
+
|
|
11
|
+
> [!DANGER]
|
|
12
|
+
> This is an experimental feature, so do not expect it to work for all use cases. Please create an issue if you find a use case that doesn't work.
|
|
13
|
+
|
|
14
|
+
## How to use custom queries
|
|
15
|
+
|
|
16
|
+
To use a custom query, you can pass a `query:` prefix to the source name:
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
ingestr ingest \
|
|
20
|
+
--source-uri $POSTGRES_URI \
|
|
21
|
+
--dest-uri "duckdb:///mydb.db" \
|
|
22
|
+
--dest-table "public.output" \
|
|
23
|
+
--source-table "query:select oi.*, o.updated_at from order_items oi join orders o on oi.order_id = o.id"
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Ingestr uses SQLAlchemy to run the queries, therefore you can use any valid SQLAlchemy query.
|
|
27
|
+
|
|
28
|
+
### Incremental loads
|
|
29
|
+
|
|
30
|
+
Custom queries support incremental loads, but there are some caveats:
|
|
31
|
+
- the incremental key must be a column that is returned by the query
|
|
32
|
+
- the incremental key must be a datetime/timestamp column
|
|
33
|
+
- you must do your own filtering in the query for the incremental load
|
|
34
|
+
- you can use the `interval_start` and `interval_end` variables to filter the data
|
|
35
|
+
|
|
36
|
+
Here's an example of how to do an incremental load:
|
|
37
|
+
|
|
38
|
+
```bash
|
|
39
|
+
ingestr ingest \
|
|
40
|
+
--source-uri $POSTGRES_URI \
|
|
41
|
+
--dest-uri "duckdb:///mydb.db" \
|
|
42
|
+
--dest-table "public.output" \
|
|
43
|
+
--source-table "query:select oi.*, o.updated_at from order_items oi join orders o on oi.order_id = o.id where o.updated_at > :interval_start" \
|
|
44
|
+
--incremental-key updated_at \
|
|
45
|
+
--incremental-strategy merge \
|
|
46
|
+
--primary-key id
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
In this example, the query is filtering the data to only include rows where the `updated_at` column is greater than the `interval_start` variable.
|
|
50
|
+
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# TikTok Ads
|
|
2
|
+
TikTok Ads is an advertising platform that enables businesses and marketers to create, manage, and analyze ad campaigns targeting TikTok's user base.
|
|
3
|
+
|
|
4
|
+
Ingestr supports TikTok Ads as a Source.
|
|
5
|
+
|
|
6
|
+
## URI format
|
|
7
|
+
The URI format for TikTok Ads as a Source is as follows:
|
|
8
|
+
|
|
9
|
+
```plaintext
|
|
10
|
+
tiktok://?access_token=<ACCESS_TOKEN>&advertiser_ids=<advertiser_ids>&timezone=<timezone>
|
|
11
|
+
```
|
|
12
|
+
## URI parameters:
|
|
13
|
+
- `access_token` (required): Used for authentication and is necessary to access reports through the TikTok Marketing API.
|
|
14
|
+
- `advertiser_ids` (required): The comma-separated list of advertiser IDs to retrieve data for.
|
|
15
|
+
- `timezone` (optional): The timezone to use for the data retrieval, you should set this value to the timezone of the advertiser account. Defaults to `UTC`.
|
|
16
|
+
|
|
17
|
+
TikTok requires an `access_token` and `advertiser_ids` to retrieve reports from the TikTok marketing API. Please follow the guide to obtain the [credentials](https://business-api.tiktok.com/portal/docs?id=1738373141733378).
|
|
18
|
+
|
|
19
|
+
## Table: Custom Reports
|
|
20
|
+
Custom reports allow you to retrieve data based on specific `dimensions`, `metrics`, and `filters`.
|
|
21
|
+
|
|
22
|
+
Custom Table Format:
|
|
23
|
+
```
|
|
24
|
+
custom:<dimensions>:<metrics>[:<filter_name,filter_values>]
|
|
25
|
+
```
|
|
26
|
+
### Parameters:
|
|
27
|
+
- `dimensions`(required): A comma-separated list of [dimensions](https://business-api.tiktok.com/portal/docs?id=1751443956638721) to retrieve.
|
|
28
|
+
- `metrics`(required): A comma-separated list of [metrics](https://business-api.tiktok.com/portal/docs?id=1751443967255553) to retrieve.
|
|
29
|
+
- `filters` (optional): Filters are specified in the format `<filter_name=filter_values>`.
|
|
30
|
+
- `filter_name`: The name of the filter (e.g. `campaign_ids`).
|
|
31
|
+
- `filter_values`: A comma-separated list of one or more values associated with the filter name (e.g., `camp_id123,camp_id456`). Only the `IN` filter type is supported. Learn more about [filters](https://business-api.tiktok.com/portal/docs?id=1751443975608321.).
|
|
32
|
+
|
|
33
|
+
> [!NOTE]
|
|
34
|
+
> Ingestr will fetch data for the last 30 days and use the default page size of `1000`. You can override this by specifying the `interval_start` and `interval_end` parameters.
|
|
35
|
+
|
|
36
|
+
### Example
|
|
37
|
+
|
|
38
|
+
Retrieve data for campaigns with `campaign_ids` camp_id123 and camp_id456:
|
|
39
|
+
```sh
|
|
40
|
+
ingestr ingest \
|
|
41
|
+
--source-uri "tiktok://?access_token=token_123&advertiser_ids=0594720014,0594720015" \
|
|
42
|
+
--source-table "custom:campaign_id,stat_time_day:clicks,cpc" \
|
|
43
|
+
--dest-uri "duckdb:///campaigns.duckdb" \
|
|
44
|
+
--dest-table "dest.clicks"
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
The applied parameters for the report are:
|
|
48
|
+
- dimensions: `campaign_id` and `country_code`
|
|
49
|
+
- metrics: `clicks` and `cpc`
|
|
50
|
+
- filters: `campaign_ids` for `camp_id123` and `camp_id456`
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
This command will retrieve data for the specified date range and save it to the `dest.clicks` table in the DuckDB database.
|
|
54
|
+
|
|
55
|
+
<img alt="titok_ads_img" src="../media/tiktok.png" />
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
|
|
@@ -20,6 +20,7 @@ from ingestr.src.sources import (
|
|
|
20
20
|
AirtableSource,
|
|
21
21
|
AppsflyerSource,
|
|
22
22
|
ArrowMemoryMappedSource,
|
|
23
|
+
AsanaSource,
|
|
23
24
|
ChessSource,
|
|
24
25
|
DynamoDBSource,
|
|
25
26
|
FacebookAdsSource,
|
|
@@ -115,6 +116,7 @@ class SourceDestinationFactory:
|
|
|
115
116
|
"mmap": ArrowMemoryMappedSource,
|
|
116
117
|
"s3": S3Source,
|
|
117
118
|
"dynamodb": DynamoDBSource,
|
|
119
|
+
"asana": AsanaSource,
|
|
118
120
|
"tiktok": TikTokSource,
|
|
119
121
|
}
|
|
120
122
|
destinations: Dict[str, Type[DestinationProtocol]] = {
|
|
@@ -3,17 +3,42 @@ import csv
|
|
|
3
3
|
import json
|
|
4
4
|
import os
|
|
5
5
|
import re
|
|
6
|
-
from datetime import date
|
|
7
|
-
from typing import
|
|
6
|
+
from datetime import date, datetime
|
|
7
|
+
from typing import (
|
|
8
|
+
Any,
|
|
9
|
+
Callable,
|
|
10
|
+
Dict,
|
|
11
|
+
Iterator,
|
|
12
|
+
List,
|
|
13
|
+
Literal,
|
|
14
|
+
Optional,
|
|
15
|
+
Union,
|
|
16
|
+
)
|
|
8
17
|
from urllib.parse import ParseResult, parse_qs, quote, urlparse
|
|
9
18
|
|
|
10
19
|
import dlt
|
|
11
20
|
import pendulum
|
|
12
|
-
|
|
21
|
+
import sqlalchemy
|
|
22
|
+
from dlt.common.configuration.specs import (
|
|
23
|
+
AwsCredentials,
|
|
24
|
+
)
|
|
25
|
+
from dlt.common.libs.sql_alchemy import (
|
|
26
|
+
Engine,
|
|
27
|
+
MetaData,
|
|
28
|
+
)
|
|
13
29
|
from dlt.common.time import ensure_pendulum_datetime
|
|
14
|
-
from dlt.common.typing import TSecretStrValue
|
|
30
|
+
from dlt.common.typing import TDataItem, TSecretStrValue
|
|
31
|
+
from dlt.extract import Incremental
|
|
15
32
|
from dlt.sources.credentials import ConnectionStringCredentials
|
|
16
33
|
from dlt.sources.sql_database import sql_table
|
|
34
|
+
from dlt.sources.sql_database.helpers import TableLoader
|
|
35
|
+
from dlt.sources.sql_database.schema_types import (
|
|
36
|
+
ReflectionLevel,
|
|
37
|
+
SelectAny,
|
|
38
|
+
Table,
|
|
39
|
+
TTypeAdapter,
|
|
40
|
+
)
|
|
41
|
+
from sqlalchemy import Column
|
|
17
42
|
from sqlalchemy import types as sa
|
|
18
43
|
from sqlalchemy.dialects import mysql
|
|
19
44
|
|
|
@@ -39,7 +64,7 @@ from ingestr.src.notion import notion_databases
|
|
|
39
64
|
from ingestr.src.shopify import shopify_source
|
|
40
65
|
from ingestr.src.slack import slack_source
|
|
41
66
|
from ingestr.src.stripe_analytics import stripe_source
|
|
42
|
-
from ingestr.src.table_definition import table_string_to_dataclass
|
|
67
|
+
from ingestr.src.table_definition import TableDefinition, table_string_to_dataclass
|
|
43
68
|
from ingestr.src.tiktok_ads import tiktok_source
|
|
44
69
|
from ingestr.src.time import isotime
|
|
45
70
|
from ingestr.src.zendesk import zendesk_chat, zendesk_support, zendesk_talk
|
|
@@ -48,6 +73,9 @@ from ingestr.src.zendesk.helpers.credentials import (
|
|
|
48
73
|
ZendeskCredentialsToken,
|
|
49
74
|
)
|
|
50
75
|
|
|
76
|
+
TableBackend = Literal["sqlalchemy", "pyarrow", "pandas", "connectorx"]
|
|
77
|
+
TQueryAdapter = Callable[[SelectAny, Table], SelectAny]
|
|
78
|
+
|
|
51
79
|
|
|
52
80
|
class SqlSource:
|
|
53
81
|
table_builder: Callable
|
|
@@ -59,7 +87,9 @@ class SqlSource:
|
|
|
59
87
|
return False
|
|
60
88
|
|
|
61
89
|
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
62
|
-
table_fields =
|
|
90
|
+
table_fields = TableDefinition(dataset="custom", table="custom")
|
|
91
|
+
if not table.startswith("query:"):
|
|
92
|
+
table_fields = table_string_to_dataclass(table)
|
|
63
93
|
|
|
64
94
|
incremental = None
|
|
65
95
|
if kwargs.get("incremental_key"):
|
|
@@ -87,6 +117,110 @@ class SqlSource:
|
|
|
87
117
|
query = query.order_by(kwargs.get("incremental_key"))
|
|
88
118
|
return query
|
|
89
119
|
|
|
120
|
+
defer_table_reflect = False
|
|
121
|
+
sql_backend = kwargs.get("sql_backend", "sqlalchemy")
|
|
122
|
+
if table.startswith("query:"):
|
|
123
|
+
if kwargs.get("sql_limit"):
|
|
124
|
+
raise ValueError(
|
|
125
|
+
"sql_limit is not supported for custom queries, please apply the limit in the query instead"
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
sql_backend = "sqlalchemy"
|
|
129
|
+
defer_table_reflect = True
|
|
130
|
+
query_value = table.split(":", 1)[1]
|
|
131
|
+
|
|
132
|
+
# this is a very hacky version of the table_rows function. it is built this way to go around the dlt's table loader.
|
|
133
|
+
# I didn't want to write a full fledged sqlalchemy source for now, and wanted to benefit from the existing stuff to begin with.
|
|
134
|
+
# this is by no means a production ready solution, but it works for now.
|
|
135
|
+
# the core idea behind this implementation is to create a mock table instance with the columns that are absolutely necessary for the incremental load to work.
|
|
136
|
+
# the table loader will then use the query adapter callback to apply the actual query and load the rows.
|
|
137
|
+
def table_rows(
|
|
138
|
+
engine: Engine,
|
|
139
|
+
table: Union[Table, str],
|
|
140
|
+
metadata: MetaData,
|
|
141
|
+
chunk_size: int,
|
|
142
|
+
backend: TableBackend,
|
|
143
|
+
incremental: Optional[Incremental[Any]] = None,
|
|
144
|
+
table_adapter_callback: Callable[[Table], None] = None, # type: ignore
|
|
145
|
+
reflection_level: ReflectionLevel = "minimal",
|
|
146
|
+
backend_kwargs: Dict[str, Any] = None, # type: ignore
|
|
147
|
+
type_adapter_callback: Optional[TTypeAdapter] = None,
|
|
148
|
+
included_columns: Optional[List[str]] = None,
|
|
149
|
+
query_adapter_callback: Optional[TQueryAdapter] = None,
|
|
150
|
+
resolve_foreign_keys: bool = False,
|
|
151
|
+
) -> Iterator[TDataItem]:
|
|
152
|
+
hints = { # type: ignore
|
|
153
|
+
"columns": [],
|
|
154
|
+
}
|
|
155
|
+
cols = [] # type: ignore
|
|
156
|
+
|
|
157
|
+
if incremental:
|
|
158
|
+
switchDict = {
|
|
159
|
+
int: sa.INTEGER,
|
|
160
|
+
datetime: sa.TIMESTAMP,
|
|
161
|
+
pendulum.Date: sa.DATE,
|
|
162
|
+
pendulum.DateTime: sa.TIMESTAMP,
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
if incremental.last_value is not None:
|
|
166
|
+
cols.append(
|
|
167
|
+
Column(
|
|
168
|
+
incremental.cursor_path,
|
|
169
|
+
switchDict[type(incremental.last_value)], # type: ignore
|
|
170
|
+
)
|
|
171
|
+
)
|
|
172
|
+
else:
|
|
173
|
+
cols.append(Column(incremental.cursor_path, sa.TIMESTAMP)) # type: ignore
|
|
174
|
+
|
|
175
|
+
table = Table(
|
|
176
|
+
"query_result",
|
|
177
|
+
metadata,
|
|
178
|
+
*cols,
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
loader = TableLoader(
|
|
182
|
+
engine,
|
|
183
|
+
backend,
|
|
184
|
+
table,
|
|
185
|
+
hints["columns"], # type: ignore
|
|
186
|
+
incremental=incremental,
|
|
187
|
+
chunk_size=chunk_size,
|
|
188
|
+
query_adapter_callback=query_adapter_callback,
|
|
189
|
+
)
|
|
190
|
+
try:
|
|
191
|
+
yield from loader.load_rows(backend_kwargs)
|
|
192
|
+
finally:
|
|
193
|
+
if getattr(engine, "may_dispose_after_use", False):
|
|
194
|
+
engine.dispose()
|
|
195
|
+
|
|
196
|
+
dlt.sources.sql_database.table_rows = table_rows
|
|
197
|
+
|
|
198
|
+
def query_adapter_callback(query, table, incremental=None, engine=None):
|
|
199
|
+
params = {}
|
|
200
|
+
if incremental:
|
|
201
|
+
params["interval_start"] = (
|
|
202
|
+
incremental.last_value
|
|
203
|
+
if incremental.last_value is not None
|
|
204
|
+
else datetime(year=1, month=1, day=1)
|
|
205
|
+
)
|
|
206
|
+
if incremental.end_value is not None:
|
|
207
|
+
params["interval_end"] = incremental.end_value
|
|
208
|
+
else:
|
|
209
|
+
if ":interval_start" in query_value:
|
|
210
|
+
params["interval_start"] = (
|
|
211
|
+
datetime.min
|
|
212
|
+
if kwargs.get("interval_start") is None
|
|
213
|
+
else kwargs.get("interval_start")
|
|
214
|
+
)
|
|
215
|
+
if ":interval_end" in query_value:
|
|
216
|
+
params["interval_end"] = (
|
|
217
|
+
datetime.max
|
|
218
|
+
if kwargs.get("interval_end") is None
|
|
219
|
+
else kwargs.get("interval_end")
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
return sqlalchemy.text(query_value).bindparams(**params)
|
|
223
|
+
|
|
90
224
|
def type_adapter_callback(sql_type):
|
|
91
225
|
if isinstance(sql_type, mysql.SET):
|
|
92
226
|
return sa.JSON
|
|
@@ -97,7 +231,7 @@ class SqlSource:
|
|
|
97
231
|
schema=table_fields.dataset,
|
|
98
232
|
table=table_fields.table,
|
|
99
233
|
incremental=incremental,
|
|
100
|
-
backend=
|
|
234
|
+
backend=sql_backend,
|
|
101
235
|
chunk_size=kwargs.get("page_size", None),
|
|
102
236
|
reflection_level=reflection_level,
|
|
103
237
|
query_adapter_callback=query_adapter_callback,
|
|
@@ -105,6 +239,7 @@ class SqlSource:
|
|
|
105
239
|
table_adapter_callback=table_adapter_exclude_columns(
|
|
106
240
|
kwargs.get("sql_exclude_columns", [])
|
|
107
241
|
),
|
|
242
|
+
defer_table_reflect=defer_table_reflect,
|
|
108
243
|
)
|
|
109
244
|
|
|
110
245
|
return builder_res
|
|
@@ -1015,29 +1150,28 @@ class TikTokSource:
|
|
|
1015
1150
|
if not access_token:
|
|
1016
1151
|
raise ValueError("access_token is required to connect to TikTok")
|
|
1017
1152
|
|
|
1018
|
-
|
|
1153
|
+
timezone = "UTC"
|
|
1154
|
+
if source_fields.get("timezone") is not None:
|
|
1155
|
+
timezone = source_fields.get("timezone")[0] # type: ignore
|
|
1156
|
+
|
|
1157
|
+
advertiser_ids = source_fields.get("advertiser_ids")
|
|
1158
|
+
if not advertiser_ids:
|
|
1159
|
+
raise ValueError("advertiser_ids is required to connect to TikTok")
|
|
1019
1160
|
|
|
1020
|
-
|
|
1021
|
-
if not advertiser_id:
|
|
1022
|
-
raise ValueError("advertiser_id is required to connect to TikTok")
|
|
1161
|
+
advertiser_ids = advertiser_ids[0].replace(" ", "").split(",")
|
|
1023
1162
|
|
|
1024
|
-
start_date = pendulum.now().subtract(days=
|
|
1025
|
-
end_date = ensure_pendulum_datetime(pendulum.now()).in_tz(
|
|
1163
|
+
start_date = pendulum.now().subtract(days=30).in_tz(timezone)
|
|
1164
|
+
end_date = ensure_pendulum_datetime(pendulum.now()).in_tz(timezone)
|
|
1026
1165
|
|
|
1027
1166
|
interval_start = kwargs.get("interval_start")
|
|
1028
1167
|
if interval_start is not None:
|
|
1029
|
-
start_date = ensure_pendulum_datetime(interval_start).in_tz(
|
|
1168
|
+
start_date = ensure_pendulum_datetime(interval_start).in_tz(timezone)
|
|
1030
1169
|
|
|
1031
1170
|
interval_end = kwargs.get("interval_end")
|
|
1032
1171
|
if interval_end is not None:
|
|
1033
|
-
end_date = ensure_pendulum_datetime(interval_end).in_tz(
|
|
1172
|
+
end_date = ensure_pendulum_datetime(interval_end).in_tz(timezone)
|
|
1034
1173
|
|
|
1035
|
-
page_size = kwargs.get("page_size")
|
|
1036
|
-
if page_size is not None and not isinstance(page_size, int):
|
|
1037
|
-
page_size = int(page_size)
|
|
1038
|
-
|
|
1039
|
-
if page_size > 1000:
|
|
1040
|
-
page_size = 1000
|
|
1174
|
+
page_size = min(1000, kwargs.get("page_size", 1000))
|
|
1041
1175
|
|
|
1042
1176
|
if table.startswith("custom:"):
|
|
1043
1177
|
fields = table.split(":", 3)
|
|
@@ -1049,28 +1183,61 @@ class TikTokSource:
|
|
|
1049
1183
|
dimensions = fields[1].replace(" ", "").split(",")
|
|
1050
1184
|
if (
|
|
1051
1185
|
"campaign_id" not in dimensions
|
|
1052
|
-
and "advertiser_id" not in dimensions
|
|
1053
1186
|
and "adgroup_id" not in dimensions
|
|
1054
1187
|
and "ad_id" not in dimensions
|
|
1055
1188
|
):
|
|
1056
1189
|
raise ValueError(
|
|
1057
|
-
"
|
|
1190
|
+
"TikTok API requires at least one ID dimension, please use one of the following dimensions: [campaign_id, adgroup_id, ad_id]"
|
|
1058
1191
|
)
|
|
1059
1192
|
|
|
1193
|
+
if "advertiser_id" in dimensions:
|
|
1194
|
+
dimensions.remove("advertiser_id")
|
|
1195
|
+
|
|
1060
1196
|
metrics = fields[2].replace(" ", "").split(",")
|
|
1061
|
-
|
|
1197
|
+
filtering_param = False
|
|
1198
|
+
filter_name = ""
|
|
1199
|
+
filter_value = []
|
|
1062
1200
|
if len(fields) == 4:
|
|
1063
|
-
|
|
1201
|
+
|
|
1202
|
+
def parse_filters(filters_raw: str) -> dict:
|
|
1203
|
+
# Parse filter string like "key1=value1,key2=value2,value3,value4"
|
|
1204
|
+
filters = {}
|
|
1205
|
+
current_key = None
|
|
1206
|
+
|
|
1207
|
+
for item in filters_raw.split(","):
|
|
1208
|
+
if "=" in item:
|
|
1209
|
+
# Start of a new key-value pair
|
|
1210
|
+
key, value = item.split("=")
|
|
1211
|
+
filters[key] = [value] # Always start with a list
|
|
1212
|
+
current_key = key
|
|
1213
|
+
elif current_key is not None:
|
|
1214
|
+
# Additional value for the current key
|
|
1215
|
+
filters[current_key].append(item)
|
|
1216
|
+
|
|
1217
|
+
# Convert single-item lists to simple values
|
|
1218
|
+
return {k: v[0] if len(v) == 1 else v for k, v in filters.items()}
|
|
1219
|
+
|
|
1220
|
+
filtering_param = True
|
|
1221
|
+
filters = parse_filters(fields[3])
|
|
1222
|
+
if len(filters) > 1:
|
|
1223
|
+
raise ValueError(
|
|
1224
|
+
"Only one filter is allowed for TikTok custom reports"
|
|
1225
|
+
)
|
|
1226
|
+
filter_name = list(filters.keys())[0]
|
|
1227
|
+
filter_value = list(map(int, filters[list(filters.keys())[0]]))
|
|
1228
|
+
|
|
1064
1229
|
return tiktok_source(
|
|
1065
1230
|
start_date=start_date,
|
|
1066
1231
|
end_date=end_date,
|
|
1067
1232
|
access_token=access_token[0],
|
|
1068
|
-
|
|
1069
|
-
|
|
1233
|
+
advertiser_ids=advertiser_ids,
|
|
1234
|
+
timezone=timezone,
|
|
1070
1235
|
dimensions=dimensions,
|
|
1071
1236
|
metrics=metrics,
|
|
1072
|
-
filters=filters,
|
|
1073
1237
|
page_size=page_size,
|
|
1238
|
+
filter_name=filter_name,
|
|
1239
|
+
filter_value=filter_value,
|
|
1240
|
+
filtering_param=filtering_param,
|
|
1074
1241
|
).with_resources(endpoint)
|
|
1075
1242
|
|
|
1076
1243
|
|
|
@@ -0,0 +1,139 @@
|
|
|
1
|
+
from typing import Iterable
|
|
2
|
+
|
|
3
|
+
import dlt
|
|
4
|
+
import pendulum
|
|
5
|
+
from dlt.common.time import ensure_pendulum_datetime
|
|
6
|
+
from dlt.common.typing import TDataItem
|
|
7
|
+
from dlt.sources import DltResource
|
|
8
|
+
|
|
9
|
+
from .tiktok_helpers import TikTokAPI
|
|
10
|
+
|
|
11
|
+
KNOWN_TYPE_HINTS = {
|
|
12
|
+
"spend": {"data_type": "decimal"},
|
|
13
|
+
"billed_cost": {"data_type": "decimal"},
|
|
14
|
+
"cash_spend": {"data_type": "decimal"},
|
|
15
|
+
"voucher_spend": {"data_type": "decimal"},
|
|
16
|
+
"cpc": {"data_type": "decimal"},
|
|
17
|
+
"cpm": {"data_type": "decimal"},
|
|
18
|
+
"impressions": {"data_type": "bigint"},
|
|
19
|
+
"gross_impressions": {"data_type": "bigint"},
|
|
20
|
+
"clicks": {"data_type": "bigint"},
|
|
21
|
+
"ctr": {"data_type": "decimal"},
|
|
22
|
+
"reach": {"data_type": "bigint"},
|
|
23
|
+
"cost_per_1000_reached": {"data_type": "decimal"},
|
|
24
|
+
"frequency": {"data_type": "decimal"},
|
|
25
|
+
"conversion": {"data_type": "bigint"},
|
|
26
|
+
"cost_per_conversion": {"data_type": "decimal"},
|
|
27
|
+
"conversion_rate": {"data_type": "decimal"},
|
|
28
|
+
"conversion_rate_v2": {"data_type": "decimal"},
|
|
29
|
+
"real_time_conversion": {"data_type": "bigint"},
|
|
30
|
+
"real_time_cost_per_conversion": {"data_type": "decimal"},
|
|
31
|
+
"real_time_conversion_rate": {"data_type": "decimal"},
|
|
32
|
+
"real_time_conversion_rate_v2": {"data_type": "decimal"},
|
|
33
|
+
"result": {"data_type": "bigint"},
|
|
34
|
+
"cost_per_result": {"data_type": "decimal"},
|
|
35
|
+
"result_rate": {"data_type": "decimal"},
|
|
36
|
+
"real_time_result": {"data_type": "bigint"},
|
|
37
|
+
"real_time_cost_per_result": {"data_type": "decimal"},
|
|
38
|
+
"real_time_result_rate": {"data_type": "decimal"},
|
|
39
|
+
"secondary_goal_result": {"data_type": "bigint"},
|
|
40
|
+
"cost_per_secondary_goal_result": {"data_type": "decimal"},
|
|
41
|
+
"secondary_goal_result_rate": {"data_type": "decimal"},
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def find_intervals(
|
|
46
|
+
current_date: pendulum.DateTime,
|
|
47
|
+
end_date: pendulum.DateTime,
|
|
48
|
+
interval_days: int,
|
|
49
|
+
):
|
|
50
|
+
intervals = []
|
|
51
|
+
while current_date <= end_date:
|
|
52
|
+
interval_end = min(current_date.add(days=interval_days), end_date)
|
|
53
|
+
intervals.append((current_date, interval_end))
|
|
54
|
+
current_date = interval_end.add(days=1)
|
|
55
|
+
|
|
56
|
+
return intervals
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@dlt.source(max_table_nesting=0)
|
|
60
|
+
def tiktok_source(
|
|
61
|
+
start_date: pendulum.DateTime,
|
|
62
|
+
end_date: pendulum.DateTime,
|
|
63
|
+
access_token: str,
|
|
64
|
+
advertiser_ids: list[str],
|
|
65
|
+
timezone: str,
|
|
66
|
+
page_size: int,
|
|
67
|
+
filtering_param: bool,
|
|
68
|
+
filter_name: str,
|
|
69
|
+
filter_value: list[int],
|
|
70
|
+
dimensions: list[str],
|
|
71
|
+
metrics: list[str],
|
|
72
|
+
) -> DltResource:
|
|
73
|
+
tiktok_api = TikTokAPI(
|
|
74
|
+
access_token=access_token,
|
|
75
|
+
timezone=timezone,
|
|
76
|
+
page_size=page_size,
|
|
77
|
+
filtering_param=filtering_param,
|
|
78
|
+
filter_name=filter_name,
|
|
79
|
+
filter_value=filter_value,
|
|
80
|
+
)
|
|
81
|
+
incremental_loading_param = ""
|
|
82
|
+
is_incremental = False
|
|
83
|
+
interval_days = 365
|
|
84
|
+
|
|
85
|
+
if "stat_time_day" in dimensions:
|
|
86
|
+
incremental_loading_param = "stat_time_day"
|
|
87
|
+
is_incremental = True
|
|
88
|
+
interval_days = 30
|
|
89
|
+
|
|
90
|
+
if "stat_time_hour" in dimensions:
|
|
91
|
+
incremental_loading_param = "stat_time_hour"
|
|
92
|
+
is_incremental = True
|
|
93
|
+
interval_days = 0
|
|
94
|
+
|
|
95
|
+
type_hints = {
|
|
96
|
+
"advertiser_id": {"data_type": "text"},
|
|
97
|
+
}
|
|
98
|
+
for dimension in dimensions:
|
|
99
|
+
if dimension in KNOWN_TYPE_HINTS:
|
|
100
|
+
type_hints[dimension] = KNOWN_TYPE_HINTS[dimension]
|
|
101
|
+
for metric in metrics:
|
|
102
|
+
if metric in KNOWN_TYPE_HINTS:
|
|
103
|
+
type_hints[metric] = KNOWN_TYPE_HINTS[metric]
|
|
104
|
+
|
|
105
|
+
@dlt.resource(
|
|
106
|
+
write_disposition="merge",
|
|
107
|
+
primary_key=dimensions + ["advertiser_id"],
|
|
108
|
+
columns=type_hints,
|
|
109
|
+
parallelized=True,
|
|
110
|
+
)
|
|
111
|
+
def custom_reports(
|
|
112
|
+
datetime=(
|
|
113
|
+
dlt.sources.incremental(incremental_loading_param, start_date)
|
|
114
|
+
if is_incremental
|
|
115
|
+
else None
|
|
116
|
+
),
|
|
117
|
+
) -> Iterable[TDataItem]:
|
|
118
|
+
current_date = start_date.in_tz(timezone)
|
|
119
|
+
|
|
120
|
+
if datetime is not None:
|
|
121
|
+
datetime_str = datetime.last_value
|
|
122
|
+
current_date = ensure_pendulum_datetime(datetime_str).in_tz(timezone)
|
|
123
|
+
|
|
124
|
+
list_of_interval = find_intervals(
|
|
125
|
+
current_date=current_date,
|
|
126
|
+
end_date=end_date,
|
|
127
|
+
interval_days=interval_days,
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
for start, end in list_of_interval:
|
|
131
|
+
yield tiktok_api.fetch_pages(
|
|
132
|
+
advertiser_ids=advertiser_ids,
|
|
133
|
+
start_time=start,
|
|
134
|
+
end_time=end,
|
|
135
|
+
dimensions=dimensions,
|
|
136
|
+
metrics=metrics,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
return custom_reports
|