ingestr 0.9.1__tar.gz → 0.9.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- {ingestr-0.9.1 → ingestr-0.9.3}/.gitignore +1 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/Dockerfile +1 -1
- {ingestr-0.9.1 → ingestr-0.9.3}/PKG-INFO +3 -2
- ingestr-0.9.3/docs/supported-sources/adjust.md +84 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/supported-sources/kafka.md +3 -3
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/supported-sources/mssql.md +8 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/main.py +3 -1
- ingestr-0.9.3/ingestr/src/adjust/__init__.py +100 -0
- ingestr-0.9.1/ingestr/src/adjust/helpers.py → ingestr-0.9.3/ingestr/src/adjust/adjust_helpers.py +46 -23
- ingestr-0.9.3/ingestr/src/arrow/__init__.py +77 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/factory.py +3 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/mongodb/__init__.py +1 -1
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/mongodb/helpers.py +1 -1
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/sources.py +109 -23
- ingestr-0.9.3/ingestr/src/version.py +1 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/pyproject.toml +3 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/requirements.txt +0 -1
- ingestr-0.9.1/docs/supported-sources/adjust.md +0 -30
- ingestr-0.9.1/ingestr/src/adjust/_init_.py +0 -31
- ingestr-0.9.1/ingestr/src/version.py +0 -1
- {ingestr-0.9.1 → ingestr-0.9.3}/.dockerignore +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/.github/workflows/deploy-docs.yml +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/.github/workflows/tests.yml +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/.python-version +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/LICENSE.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/Makefile +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/README.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/.vitepress/config.mjs +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/.vitepress/theme/custom.css +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/.vitepress/theme/index.js +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/commands/example-uris.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/commands/ingest.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/getting-started/core-concepts.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/getting-started/incremental-loading.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/getting-started/quickstart.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/getting-started/telemetry.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/index.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/supported-sources/airtable.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/supported-sources/appsflyer.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/supported-sources/bigquery.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/supported-sources/chess.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/supported-sources/csv.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/supported-sources/databricks.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/supported-sources/duckdb.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/supported-sources/facebook-ads.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/supported-sources/gorgias.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/supported-sources/gsheets.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/supported-sources/hubspot.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/supported-sources/klaviyo.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/supported-sources/mongodb.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/supported-sources/mysql.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/supported-sources/notion.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/supported-sources/oracle.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/supported-sources/postgres.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/supported-sources/redshift.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/supported-sources/s3.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/supported-sources/sap-hana.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/supported-sources/shopify.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/supported-sources/slack.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/supported-sources/snowflake.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/supported-sources/sqlite.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/supported-sources/stripe.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/docs/supported-sources/zendesk.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/.gitignore +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/airtable/__init__.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/appsflyer/_init_.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/appsflyer/client.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/chess/__init__.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/chess/helpers.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/chess/settings.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/destinations.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/facebook_ads/__init__.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/facebook_ads/exceptions.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/facebook_ads/helpers.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/facebook_ads/settings.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/filesystem/__init__.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/filesystem/helpers.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/filesystem/readers.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/google_sheets/README.md +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/google_sheets/__init__.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/google_sheets/helpers/__init__.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/google_sheets/helpers/api_calls.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/google_sheets/helpers/data_processing.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/gorgias/__init__.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/gorgias/helpers.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/hubspot/__init__.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/hubspot/helpers.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/hubspot/settings.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/kafka/__init__.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/kafka/helpers.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/klaviyo/_init_.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/klaviyo/client.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/klaviyo/helpers.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/notion/__init__.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/notion/helpers/__init__.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/notion/helpers/client.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/notion/helpers/database.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/notion/settings.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/shopify/__init__.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/shopify/exceptions.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/shopify/helpers.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/shopify/settings.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/slack/__init__.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/slack/helpers.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/slack/settings.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/sql_database/__init__.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/sql_database/arrow_helpers.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/sql_database/helpers.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/sql_database/override.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/sql_database/schema_types.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/stripe_analytics/__init__.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/stripe_analytics/helpers.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/stripe_analytics/settings.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/table_definition.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/telemetry/event.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/testdata/fakebqcredentials.json +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/zendesk/__init__.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/zendesk/helpers/__init__.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/zendesk/helpers/api_helpers.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/zendesk/helpers/credentials.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/zendesk/helpers/talk_api.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/src/zendesk/settings.py +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/testdata/.gitignore +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/testdata/create_replace.csv +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/testdata/delete_insert_expected.csv +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/testdata/delete_insert_part1.csv +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/testdata/delete_insert_part2.csv +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/testdata/merge_expected.csv +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/testdata/merge_part1.csv +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/ingestr/testdata/merge_part2.csv +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/package-lock.json +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/package.json +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/requirements-dev.txt +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/resources/demo.gif +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/resources/demo.tape +0 -0
- {ingestr-0.9.1 → ingestr-0.9.3}/resources/ingestr.svg +0 -0
|
@@ -28,7 +28,7 @@ ENV VIRTUAL_ENV=/usr/local
|
|
|
28
28
|
ADD --chmod=755 https://astral.sh/uv/install.sh /install.sh
|
|
29
29
|
RUN /install.sh && rm /install.sh
|
|
30
30
|
|
|
31
|
-
RUN
|
|
31
|
+
RUN $HOME/.local/bin/uv pip install --system --no-cache -r requirements.txt
|
|
32
32
|
|
|
33
33
|
COPY . /app
|
|
34
34
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.3
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -28,7 +28,6 @@ Requires-Dist: py-machineid==0.5.1
|
|
|
28
28
|
Requires-Dist: pyairtable==2.3.3
|
|
29
29
|
Requires-Dist: pymongo==4.6.3
|
|
30
30
|
Requires-Dist: pymysql==1.1.0
|
|
31
|
-
Requires-Dist: pyodbc==5.1.0
|
|
32
31
|
Requires-Dist: pyrate-limiter==3.6.1
|
|
33
32
|
Requires-Dist: redshift-connector==2.1.0
|
|
34
33
|
Requires-Dist: rich==13.7.1
|
|
@@ -44,6 +43,8 @@ Requires-Dist: stripe==10.7.0
|
|
|
44
43
|
Requires-Dist: tqdm==4.66.2
|
|
45
44
|
Requires-Dist: typer==0.12.3
|
|
46
45
|
Requires-Dist: types-requests==2.32.0.20240907
|
|
46
|
+
Provides-Extra: odbc
|
|
47
|
+
Requires-Dist: pyodbc==5.1.0; extra == 'odbc'
|
|
47
48
|
Provides-Extra: oracle
|
|
48
49
|
Requires-Dist: cx-oracle==8.3.0; extra == 'oracle'
|
|
49
50
|
Description-Content-Type: text/markdown
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# Adjust
|
|
2
|
+
|
|
3
|
+
[Adjust](https://www.adjust.com/) is a mobile marketing analytics platform that provides solutions for measuring and optimizing campaigns, as well as protecting user data.
|
|
4
|
+
|
|
5
|
+
ingestr supports Adjust as a source.
|
|
6
|
+
|
|
7
|
+
## URI Format
|
|
8
|
+
|
|
9
|
+
The URI format for Adjust is as follows:
|
|
10
|
+
|
|
11
|
+
```plaintext
|
|
12
|
+
adjust://?api_key=<api-key-here>
|
|
13
|
+
```
|
|
14
|
+
Parameters:
|
|
15
|
+
- `api_key`: Required. The API key for the Adjust account.
|
|
16
|
+
- `lookback_days`: Optional. The number of days to go back than the given start date for data. Defaults to 30 days.
|
|
17
|
+
|
|
18
|
+
An API token is required to retrieve reports from the Adjust reporting API. please follow the guide to [obtain an API key](https://dev.adjust.com/en/api/rs-api/authentication/).
|
|
19
|
+
|
|
20
|
+
Once you complete the guide, you should have an API key. Let's say your API key is `nr_123`, here's a sample command that will copy the data from Adjust into a DuckDB database:
|
|
21
|
+
|
|
22
|
+
```sh
|
|
23
|
+
ingestr ingest --source-uri 'adjust://?api_key=nr_123' --source-table 'campaigns' --dest-uri duckdb:///adjust.duckdb --dest-table 'adjust.output'
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
The result of this command will be a table in the `adjust.duckdb` database.
|
|
27
|
+
|
|
28
|
+
### Lookback Days
|
|
29
|
+
|
|
30
|
+
Adjust data may change going back, which means you'll need to change your start date to get the latest data. The `lookback_days` parameter allows you to specify how many days to go back when calculating the start date, and takes care of automatically updating the start date and getting the past data as well. It defaults to 30 days.
|
|
31
|
+
|
|
32
|
+
## Tables
|
|
33
|
+
Adjust source allows ingesting data from various sources:
|
|
34
|
+
|
|
35
|
+
- `campaigns`: Retrieves data for a campaign, showing the app's revenue and network costs over multiple days.
|
|
36
|
+
- `creatives`: Retrieves data for a creative assest, detailing the app's revenue and network costs across multiple days.
|
|
37
|
+
- `custom`: Retrieves custom data based on the dimensions and metrics specified.
|
|
38
|
+
|
|
39
|
+
### Custom Table: `custom:<dimensions>:<metrics>[:<filters>]`
|
|
40
|
+
|
|
41
|
+
The custom table allows you to retrieve data based on specific dimensions and metrics, and apply filters to the data.
|
|
42
|
+
|
|
43
|
+
The format for the custom table is:
|
|
44
|
+
```plaintext
|
|
45
|
+
custom:<dimensions>:<metrics>[:<filters>]
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Parameters:
|
|
49
|
+
- `dimensions`: A comma-separated list of [dimensions](https://dev.adjust.com/en/api/rs-api/reports#dimensions) to retrieve.
|
|
50
|
+
- `metrics`: A comma-separated list of [metrics](https://dev.adjust.com/en/api/rs-api/reports#metrics) to retrieve.
|
|
51
|
+
- `filters`: A comma-separated list of [filters](https://dev.adjust.com/en/api/rs-api/reports#filters) to apply to the data.
|
|
52
|
+
- Parsing the `filters` key is smart enough to handle filters that contain commas inside them.
|
|
53
|
+
|
|
54
|
+
> [!WARNING]
|
|
55
|
+
> Custom tables require a time-based dimension for efficient operation, such as `hour`, `day`, `week`, `month`, or `year`.
|
|
56
|
+
|
|
57
|
+
## Examples
|
|
58
|
+
|
|
59
|
+
Copy campaigns data from Adjust into a DuckDB database:
|
|
60
|
+
```sh
|
|
61
|
+
ingestr ingest \
|
|
62
|
+
--source-uri 'adjust://?api_key=nr_123' \
|
|
63
|
+
--source-table 'campaigns' \
|
|
64
|
+
--dest-uri duckdb:///adjust.duckdb \
|
|
65
|
+
--dest-table 'adjust.output'
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
Copy creatives data from Adjust into a DuckDB database:
|
|
69
|
+
```sh
|
|
70
|
+
ingestr ingest \
|
|
71
|
+
--source-uri 'adjust://?api_key=nr_123' \
|
|
72
|
+
--source-table 'creatives' \
|
|
73
|
+
--dest-uri duckdb:///adjust.duckdb \
|
|
74
|
+
--dest-table 'adjust.output'
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
Copy custom data from Adjust into a DuckDB database:
|
|
78
|
+
```sh
|
|
79
|
+
ingestr ingest \
|
|
80
|
+
--source-uri "adjust://?api_key=nr_123&lookback_days=2" \
|
|
81
|
+
--source-table "custom:hour,app,store_id,channel,os_name,country_code,campaign_network,campaign_id_network,adgroup_network, adgroup_id_network,creative_network,creative_id_network:impressions,clicks,cost,network_cost,installs,ad_revenue,all_revenue" \
|
|
82
|
+
--dest-uri duckdb:///adjust.db \
|
|
83
|
+
--dest-table "mat.example"
|
|
84
|
+
```
|
|
@@ -11,8 +11,8 @@ kafka://?bootstrap_servers=localhost:9092&group_id=test_group&security_protocol=
|
|
|
11
11
|
```
|
|
12
12
|
|
|
13
13
|
URI parameters:
|
|
14
|
-
- `bootstrap_servers
|
|
15
|
-
- `group_id
|
|
14
|
+
- `bootstrap_servers`(required): The Kafka server(s) to connect to, typically in the form of a host and port (e.g., `localhost:9092`).
|
|
15
|
+
- `group_id`(required): The consumer group ID used for identifying the client when consuming messages.
|
|
16
16
|
- `security_protocol`: The protocol used to communicate with brokers (e.g., `SASL_SSL` for secure communication).
|
|
17
17
|
- `sasl_mechanisms`: The SASL mechanism to be used for authentication (e.g., `PLAIN`).
|
|
18
18
|
- `sasl_username`: The username for SASL authentication.
|
|
@@ -29,7 +29,7 @@ Once you have your Kafka server, credentials, and group ID set up, here's a samp
|
|
|
29
29
|
|
|
30
30
|
```sh
|
|
31
31
|
ingestr ingest \
|
|
32
|
-
--source-uri 'kafka://?bootstrap_servers=localhost:9092' \
|
|
32
|
+
--source-uri 'kafka://?bootstrap_servers=localhost:9092&group_id=test_group' \
|
|
33
33
|
--source-table 'my-topic' \
|
|
34
34
|
--dest-uri duckdb:///kafka.duckdb \
|
|
35
35
|
--dest-table 'kafka.my_topic'
|
|
@@ -3,6 +3,14 @@ Microsoft SQL Server is a relational database management system developed by Mic
|
|
|
3
3
|
|
|
4
4
|
ingestr supports Microsoft SQL Server as both a source and destination.
|
|
5
5
|
|
|
6
|
+
## Installation
|
|
7
|
+
|
|
8
|
+
To use Microsoft SQL Server with ingestr, you need to install the `pyodbc` add-on as well. You can do this by running:
|
|
9
|
+
|
|
10
|
+
```bash
|
|
11
|
+
pip install ingestr[odbc]
|
|
12
|
+
```
|
|
13
|
+
|
|
6
14
|
## URI Format
|
|
7
15
|
The URI format for Microsoft SQL Server is as follows:
|
|
8
16
|
|
|
@@ -323,10 +323,12 @@ def ingest(
|
|
|
323
323
|
else "Platform-specific"
|
|
324
324
|
)
|
|
325
325
|
|
|
326
|
+
source_table_print = source_table.split(":")[0]
|
|
327
|
+
|
|
326
328
|
print()
|
|
327
329
|
print("[bold green]Initiated the pipeline with the following:[/bold green]")
|
|
328
330
|
print(
|
|
329
|
-
f"[bold yellow] Source:[/bold yellow] {factory.source_scheme} / {
|
|
331
|
+
f"[bold yellow] Source:[/bold yellow] {factory.source_scheme} / {source_table_print}"
|
|
330
332
|
)
|
|
331
333
|
print(
|
|
332
334
|
f"[bold yellow] Destination:[/bold yellow] {factory.destination_scheme} / {dest_table}"
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
from typing import Optional, Sequence
|
|
2
|
+
|
|
3
|
+
import dlt
|
|
4
|
+
import pendulum
|
|
5
|
+
from dlt.sources import DltResource
|
|
6
|
+
|
|
7
|
+
from .adjust_helpers import DEFAULT_DIMENSIONS, DEFAULT_METRICS, AdjustAPI
|
|
8
|
+
|
|
9
|
+
REQUIRED_CUSTOM_DIMENSIONS = [
|
|
10
|
+
"hour",
|
|
11
|
+
"day",
|
|
12
|
+
"week",
|
|
13
|
+
"month",
|
|
14
|
+
"quarter",
|
|
15
|
+
"year",
|
|
16
|
+
]
|
|
17
|
+
KNOWN_TYPE_HINTS = {
|
|
18
|
+
"hour": {"data_type": "timestamp"},
|
|
19
|
+
"day": {"data_type": "date"},
|
|
20
|
+
"week": {"data_type": "text"},
|
|
21
|
+
"month": {"data_type": "text"},
|
|
22
|
+
"quarter": {"data_type": "text"},
|
|
23
|
+
"year": {"data_type": "text"},
|
|
24
|
+
"campaign": {"data_type": "text"},
|
|
25
|
+
"adgroup": {"data_type": "text"},
|
|
26
|
+
"creative": {"data_type": "text"},
|
|
27
|
+
# metrics
|
|
28
|
+
"installs": {"data_type": "bigint"},
|
|
29
|
+
"clicks": {"data_type": "bigint"},
|
|
30
|
+
"cost": {"data_type": "decimal"},
|
|
31
|
+
"network_cost": {"data_type": "decimal"},
|
|
32
|
+
"impressions": {"data_type": "bigint"},
|
|
33
|
+
"ad_revenue": {"data_type": "decimal"},
|
|
34
|
+
"all_revenue": {"data_type": "decimal"},
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dlt.source(max_table_nesting=0)
|
|
39
|
+
def adjust_source(
|
|
40
|
+
start_date: pendulum.DateTime,
|
|
41
|
+
end_date: pendulum.DateTime,
|
|
42
|
+
api_key: str,
|
|
43
|
+
dimensions: Optional[list[str]] = None,
|
|
44
|
+
metrics: Optional[list[str]] = None,
|
|
45
|
+
merge_key: Optional[str] = None,
|
|
46
|
+
filters: Optional[dict] = None,
|
|
47
|
+
) -> Sequence[DltResource]:
|
|
48
|
+
@dlt.resource(write_disposition="merge", merge_key="day")
|
|
49
|
+
def campaigns():
|
|
50
|
+
adjust_api = AdjustAPI(api_key=api_key)
|
|
51
|
+
yield from adjust_api.fetch_report_data(
|
|
52
|
+
start_date=start_date,
|
|
53
|
+
end_date=end_date,
|
|
54
|
+
dimensions=DEFAULT_DIMENSIONS,
|
|
55
|
+
metrics=DEFAULT_METRICS,
|
|
56
|
+
filters=filters,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
@dlt.resource(write_disposition="merge", merge_key="day")
|
|
60
|
+
def creatives():
|
|
61
|
+
adjust_api = AdjustAPI(api_key=api_key)
|
|
62
|
+
yield from adjust_api.fetch_report_data(
|
|
63
|
+
start_date=start_date,
|
|
64
|
+
end_date=end_date,
|
|
65
|
+
dimensions=DEFAULT_DIMENSIONS + ["adgroup", "creative"],
|
|
66
|
+
metrics=DEFAULT_METRICS,
|
|
67
|
+
filters=filters,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
merge_key = merge_key
|
|
71
|
+
for dimension in REQUIRED_CUSTOM_DIMENSIONS:
|
|
72
|
+
if dimension in dimensions:
|
|
73
|
+
merge_key = dimension
|
|
74
|
+
break
|
|
75
|
+
|
|
76
|
+
type_hints = {}
|
|
77
|
+
for dimension in dimensions:
|
|
78
|
+
if dimension in KNOWN_TYPE_HINTS:
|
|
79
|
+
type_hints[dimension] = KNOWN_TYPE_HINTS[dimension]
|
|
80
|
+
for metric in metrics:
|
|
81
|
+
if metric in KNOWN_TYPE_HINTS:
|
|
82
|
+
type_hints[metric] = KNOWN_TYPE_HINTS[metric]
|
|
83
|
+
|
|
84
|
+
@dlt.resource(
|
|
85
|
+
write_disposition={"disposition": "merge", "strategy": "delete+insert"},
|
|
86
|
+
merge_key=merge_key,
|
|
87
|
+
primary_key=dimensions,
|
|
88
|
+
columns=type_hints,
|
|
89
|
+
)
|
|
90
|
+
def custom():
|
|
91
|
+
adjust_api = AdjustAPI(api_key=api_key)
|
|
92
|
+
yield from adjust_api.fetch_report_data(
|
|
93
|
+
start_date=start_date,
|
|
94
|
+
end_date=end_date,
|
|
95
|
+
dimensions=dimensions,
|
|
96
|
+
metrics=metrics,
|
|
97
|
+
filters=filters,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
return campaigns, creatives, custom
|
ingestr-0.9.1/ingestr/src/adjust/helpers.py → ingestr-0.9.3/ingestr/src/adjust/adjust_helpers.py
RENAMED
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
import pendulum
|
|
1
4
|
import requests
|
|
2
5
|
from dlt.sources.helpers.requests import Client
|
|
3
6
|
from requests.exceptions import HTTPError
|
|
@@ -32,33 +35,32 @@ class AdjustAPI:
|
|
|
32
35
|
|
|
33
36
|
def fetch_report_data(
|
|
34
37
|
self,
|
|
35
|
-
start_date,
|
|
36
|
-
end_date,
|
|
38
|
+
start_date: pendulum.DateTime,
|
|
39
|
+
end_date: pendulum.DateTime,
|
|
37
40
|
dimensions=DEFAULT_DIMENSIONS,
|
|
38
41
|
metrics=DEFAULT_METRICS,
|
|
39
|
-
|
|
40
|
-
ad_spend_mode="network",
|
|
41
|
-
attribution_source="first",
|
|
42
|
-
attribution_type="all",
|
|
43
|
-
cohort_maturity="immature",
|
|
44
|
-
reattributed="all",
|
|
45
|
-
sandbox="false",
|
|
42
|
+
filters: Optional[dict] = None,
|
|
46
43
|
):
|
|
47
44
|
headers = {"Authorization": f"Bearer {self.api_key}"}
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
"
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
45
|
+
params = {}
|
|
46
|
+
|
|
47
|
+
if filters:
|
|
48
|
+
for key, value in filters.items():
|
|
49
|
+
if isinstance(value, list):
|
|
50
|
+
params[key] = ",".join(value)
|
|
51
|
+
else:
|
|
52
|
+
params[key] = value
|
|
53
|
+
|
|
54
|
+
params["date_period"] = (
|
|
55
|
+
f"{start_date.format('YYYY-MM-DD')}:{end_date.format('YYYY-MM-DD')}"
|
|
56
|
+
)
|
|
57
|
+
params["dimensions"] = ",".join(dimensions)
|
|
58
|
+
params["metrics"] = ",".join(metrics)
|
|
59
|
+
|
|
60
|
+
if start_date > end_date:
|
|
61
|
+
raise ValueError(
|
|
62
|
+
f"Invalid date range: Start date ({start_date}) must be earlier than end date ({end_date})."
|
|
63
|
+
)
|
|
62
64
|
|
|
63
65
|
def retry_on_limit(
|
|
64
66
|
response: requests.Response, exception: BaseException
|
|
@@ -80,3 +82,24 @@ class AdjustAPI:
|
|
|
80
82
|
yield items
|
|
81
83
|
else:
|
|
82
84
|
raise HTTPError(f"Request failed with status code: {response.status_code}")
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def parse_filters(filters_raw: str) -> dict:
|
|
88
|
+
# Parse filter string like "key1=value1,key2=value2,value3,value4"
|
|
89
|
+
filters = {}
|
|
90
|
+
current_key = None
|
|
91
|
+
|
|
92
|
+
for item in filters_raw.split(","):
|
|
93
|
+
if "=" in item:
|
|
94
|
+
# Start of a new key-value pair
|
|
95
|
+
key, value = item.split("=")
|
|
96
|
+
filters[key] = [value] # Always start with a list
|
|
97
|
+
current_key = key
|
|
98
|
+
elif current_key is not None:
|
|
99
|
+
# Additional value for the current key
|
|
100
|
+
filters[current_key].append(item)
|
|
101
|
+
|
|
102
|
+
# Convert single-item lists to simple values
|
|
103
|
+
filters = {k: v[0] if len(v) == 1 else v for k, v in filters.items()}
|
|
104
|
+
|
|
105
|
+
return filters
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""Source that loads tables form Airtable.
|
|
2
|
+
Supports whitelisting of tables or loading of all tables from a specified base.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import Any, Optional
|
|
6
|
+
|
|
7
|
+
import dlt
|
|
8
|
+
from dlt.common.schema.typing import TColumnNames, TTableSchemaColumns
|
|
9
|
+
from dlt.extract.items import TTableHintTemplate
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def memory_mapped_arrow(
|
|
13
|
+
path: str,
|
|
14
|
+
columns: Optional[TTableSchemaColumns] = None,
|
|
15
|
+
primary_key: Optional[TTableHintTemplate[TColumnNames]] = None,
|
|
16
|
+
merge_key: Optional[TTableHintTemplate[TColumnNames]] = None,
|
|
17
|
+
incremental: Optional[dlt.sources.incremental[Any]] = None,
|
|
18
|
+
):
|
|
19
|
+
@dlt.resource(
|
|
20
|
+
name="arrow_mmap",
|
|
21
|
+
columns=columns, # type: ignore
|
|
22
|
+
primary_key=primary_key, # type: ignore
|
|
23
|
+
merge_key=merge_key, # type: ignore
|
|
24
|
+
)
|
|
25
|
+
def arrow_mmap(
|
|
26
|
+
incremental: Optional[dlt.sources.incremental[Any]] = incremental,
|
|
27
|
+
):
|
|
28
|
+
import pyarrow as pa # type: ignore
|
|
29
|
+
import pyarrow.ipc as ipc # type: ignore
|
|
30
|
+
|
|
31
|
+
with pa.memory_map(path, "rb") as mmap:
|
|
32
|
+
reader: ipc.RecordBatchFileReader = ipc.open_file(mmap)
|
|
33
|
+
table = reader.read_all()
|
|
34
|
+
|
|
35
|
+
last_value = None
|
|
36
|
+
end_value = None
|
|
37
|
+
if incremental:
|
|
38
|
+
if incremental.cursor_path not in table.column_names:
|
|
39
|
+
raise KeyError(
|
|
40
|
+
f"Cursor column '{incremental.cursor_path}' does not exist in table"
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
last_value = incremental.last_value
|
|
44
|
+
end_value = incremental.end_value
|
|
45
|
+
|
|
46
|
+
if last_value is not None:
|
|
47
|
+
# Check if the column is a date type
|
|
48
|
+
if pa.types.is_temporal(table.schema.field(incremental.cursor_path).type): # type: ignore
|
|
49
|
+
if not isinstance(last_value, pa.TimestampScalar):
|
|
50
|
+
last_value = pa.scalar(last_value, type=pa.timestamp("ns"))
|
|
51
|
+
|
|
52
|
+
table = table.filter(
|
|
53
|
+
pa.compute.field(incremental.cursor_path) > last_value # type: ignore
|
|
54
|
+
)
|
|
55
|
+
else:
|
|
56
|
+
# For non-date types, use direct comparison
|
|
57
|
+
table = table.filter(
|
|
58
|
+
pa.compute.field(incremental.cursor_path) > last_value # type: ignore
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
if end_value is not None:
|
|
62
|
+
if pa.types.is_timestamp(table.schema.field(incremental.cursor_path).type): # type: ignore
|
|
63
|
+
# Convert end_value to timestamp if it's not already
|
|
64
|
+
if not isinstance(end_value, pa.TimestampScalar):
|
|
65
|
+
end_value = pa.scalar(end_value, type=pa.timestamp("ns"))
|
|
66
|
+
table = table.filter(
|
|
67
|
+
pa.compute.field(incremental.cursor_path) < end_value # type: ignore
|
|
68
|
+
)
|
|
69
|
+
else:
|
|
70
|
+
# For non-date types, use direct comparison
|
|
71
|
+
table = table.filter(
|
|
72
|
+
pa.compute.field(incremental.cursor_path) < end_value # type: ignore
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
yield table
|
|
76
|
+
|
|
77
|
+
return arrow_mmap
|
|
@@ -18,6 +18,7 @@ from ingestr.src.sources import (
|
|
|
18
18
|
AdjustSource,
|
|
19
19
|
AirtableSource,
|
|
20
20
|
AppsflyerSource,
|
|
21
|
+
ArrowMemoryMappedSource,
|
|
21
22
|
ChessSource,
|
|
22
23
|
FacebookAdsSource,
|
|
23
24
|
GoogleSheetsSource,
|
|
@@ -136,6 +137,8 @@ class SourceDestinationFactory:
|
|
|
136
137
|
return AdjustSource()
|
|
137
138
|
elif self.source_scheme == "zendesk":
|
|
138
139
|
return ZendeskSource()
|
|
140
|
+
elif self.source_scheme == "mmap":
|
|
141
|
+
return ArrowMemoryMappedSource()
|
|
139
142
|
elif self.source_scheme == "s3":
|
|
140
143
|
return S3Source()
|
|
141
144
|
else:
|
|
@@ -65,7 +65,7 @@ def mongodb(
|
|
|
65
65
|
sections=("sources", "mongodb"), spec=MongoDbCollectionResourceConfiguration
|
|
66
66
|
)
|
|
67
67
|
def mongodb_collection(
|
|
68
|
-
connection_url: str = dlt.
|
|
68
|
+
connection_url: str = dlt.config.value,
|
|
69
69
|
database: Optional[str] = dlt.config.value,
|
|
70
70
|
collection: str = dlt.config.value,
|
|
71
71
|
incremental: Optional[dlt.sources.incremental] = None, # type: ignore[type-arg]
|
|
@@ -155,7 +155,7 @@ class MongoDbCollectionConfiguration(BaseConfiguration):
|
|
|
155
155
|
|
|
156
156
|
@configspec
|
|
157
157
|
class MongoDbCollectionResourceConfiguration(BaseConfiguration):
|
|
158
|
-
connection_url: str = dlt.
|
|
158
|
+
connection_url: str = dlt.config.value
|
|
159
159
|
database: Optional[str] = dlt.config.value
|
|
160
160
|
collection: str = dlt.config.value
|
|
161
161
|
incremental: Optional[dlt.sources.incremental] = None # type: ignore[type-arg]
|
|
@@ -1,17 +1,21 @@
|
|
|
1
1
|
import base64
|
|
2
2
|
import csv
|
|
3
3
|
import json
|
|
4
|
-
from datetime import date
|
|
4
|
+
from datetime import date
|
|
5
5
|
from typing import Any, Callable, Optional
|
|
6
6
|
from urllib.parse import parse_qs, urlparse
|
|
7
7
|
|
|
8
8
|
import dlt
|
|
9
|
+
import pendulum
|
|
9
10
|
from dlt.common.configuration.specs import AwsCredentials
|
|
11
|
+
from dlt.common.time import ensure_pendulum_datetime
|
|
10
12
|
from dlt.common.typing import TSecretStrValue
|
|
11
13
|
|
|
12
|
-
from ingestr.src.adjust
|
|
14
|
+
from ingestr.src.adjust import REQUIRED_CUSTOM_DIMENSIONS, adjust_source
|
|
15
|
+
from ingestr.src.adjust.adjust_helpers import parse_filters
|
|
13
16
|
from ingestr.src.airtable import airtable_source
|
|
14
17
|
from ingestr.src.appsflyer._init_ import appsflyer_source
|
|
18
|
+
from ingestr.src.arrow import memory_mapped_arrow
|
|
15
19
|
from ingestr.src.chess import source
|
|
16
20
|
from ingestr.src.facebook_ads import facebook_ads_source, facebook_insights_source
|
|
17
21
|
from ingestr.src.filesystem import readers
|
|
@@ -75,6 +79,51 @@ class SqlSource:
|
|
|
75
79
|
return table_instance
|
|
76
80
|
|
|
77
81
|
|
|
82
|
+
class ArrowMemoryMappedSource:
|
|
83
|
+
table_builder: Callable
|
|
84
|
+
|
|
85
|
+
def __init__(self, table_builder=memory_mapped_arrow) -> None:
|
|
86
|
+
self.table_builder = table_builder
|
|
87
|
+
|
|
88
|
+
def handles_incrementality(self) -> bool:
|
|
89
|
+
return False
|
|
90
|
+
|
|
91
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
92
|
+
import os
|
|
93
|
+
|
|
94
|
+
incremental = None
|
|
95
|
+
if kwargs.get("incremental_key"):
|
|
96
|
+
start_value = kwargs.get("interval_start")
|
|
97
|
+
end_value = kwargs.get("interval_end")
|
|
98
|
+
|
|
99
|
+
incremental = dlt.sources.incremental(
|
|
100
|
+
kwargs.get("incremental_key", ""),
|
|
101
|
+
initial_value=start_value,
|
|
102
|
+
end_value=end_value,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
file_path = uri.split("://")[1]
|
|
106
|
+
if not os.path.exists(file_path):
|
|
107
|
+
raise ValueError(f"File at path {file_path} does not exist")
|
|
108
|
+
|
|
109
|
+
if os.path.isdir(file_path):
|
|
110
|
+
raise ValueError(
|
|
111
|
+
f"Path {file_path} is a directory, it should be an Arrow memory mapped file"
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
primary_key = kwargs.get("primary_key")
|
|
115
|
+
merge_key = kwargs.get("merge_key")
|
|
116
|
+
|
|
117
|
+
table_instance = self.table_builder(
|
|
118
|
+
path=file_path,
|
|
119
|
+
incremental=incremental,
|
|
120
|
+
merge_key=merge_key,
|
|
121
|
+
primary_key=primary_key,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
return table_instance
|
|
125
|
+
|
|
126
|
+
|
|
78
127
|
class MongoDbSource:
|
|
79
128
|
table_builder: Callable
|
|
80
129
|
|
|
@@ -656,12 +705,12 @@ class KafkaSource:
|
|
|
656
705
|
credentials=KafkaCredentials(
|
|
657
706
|
bootstrap_servers=bootstrap_servers[0],
|
|
658
707
|
group_id=group_id[0],
|
|
659
|
-
security_protocol=
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
sasl_mechanisms=
|
|
663
|
-
|
|
664
|
-
|
|
708
|
+
security_protocol=(
|
|
709
|
+
security_protocol[0] if len(security_protocol) > 0 else None
|
|
710
|
+
), # type: ignore
|
|
711
|
+
sasl_mechanisms=(
|
|
712
|
+
sasl_mechanisms[0] if len(sasl_mechanisms) > 0 else None
|
|
713
|
+
), # type: ignore
|
|
665
714
|
sasl_username=sasl_username[0] if len(sasl_username) > 0 else None, # type: ignore
|
|
666
715
|
sasl_password=sasl_password[0] if len(sasl_password) > 0 else None, # type: ignore
|
|
667
716
|
),
|
|
@@ -673,10 +722,10 @@ class KafkaSource:
|
|
|
673
722
|
|
|
674
723
|
class AdjustSource:
|
|
675
724
|
def handles_incrementality(self) -> bool:
|
|
676
|
-
return
|
|
725
|
+
return False
|
|
677
726
|
|
|
678
727
|
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
679
|
-
if kwargs.get("incremental_key"):
|
|
728
|
+
if kwargs.get("incremental_key") and not table.startswith("custom:"):
|
|
680
729
|
raise ValueError(
|
|
681
730
|
"Adjust takes care of incrementality on its own, you should not provide incremental_key"
|
|
682
731
|
)
|
|
@@ -688,25 +737,62 @@ class AdjustSource:
|
|
|
688
737
|
if not api_key:
|
|
689
738
|
raise ValueError("api_key in the URI is required to connect to Adjust")
|
|
690
739
|
|
|
691
|
-
|
|
692
|
-
interval_end = kwargs.get("interval_end")
|
|
740
|
+
lookback_days = int(source_params.get("lookback_days", [30])[0])
|
|
693
741
|
|
|
694
742
|
start_date = (
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
interval_end.strftime("%Y-%m-%d")
|
|
699
|
-
if interval_end
|
|
700
|
-
else datetime.now().strftime("%Y-%m-%d")
|
|
743
|
+
pendulum.now()
|
|
744
|
+
.replace(hour=0, minute=0, second=0, microsecond=0)
|
|
745
|
+
.subtract(days=lookback_days)
|
|
701
746
|
)
|
|
747
|
+
if kwargs.get("interval_start"):
|
|
748
|
+
start_date = (
|
|
749
|
+
ensure_pendulum_datetime(str(kwargs.get("interval_start")))
|
|
750
|
+
.replace(hour=0, minute=0, second=0, microsecond=0)
|
|
751
|
+
.subtract(days=lookback_days)
|
|
752
|
+
)
|
|
702
753
|
|
|
703
|
-
|
|
704
|
-
if
|
|
705
|
-
|
|
754
|
+
end_date = pendulum.now()
|
|
755
|
+
if kwargs.get("interval_end"):
|
|
756
|
+
end_date = ensure_pendulum_datetime(str(kwargs.get("interval_end")))
|
|
757
|
+
|
|
758
|
+
dimensions = None
|
|
759
|
+
metrics = None
|
|
760
|
+
filters = []
|
|
761
|
+
if table.startswith("custom:"):
|
|
762
|
+
fields = table.split(":")
|
|
763
|
+
if len(fields) != 3 and len(fields) != 4:
|
|
764
|
+
raise ValueError(
|
|
765
|
+
"Invalid Adjust custom table format. Expected format: custom:<dimensions>,<metrics> or custom:<dimensions>:<metrics>:<filters>"
|
|
766
|
+
)
|
|
767
|
+
|
|
768
|
+
dimensions = fields[1].split(",")
|
|
769
|
+
metrics = fields[2].split(",")
|
|
770
|
+
table = "custom"
|
|
771
|
+
|
|
772
|
+
found = False
|
|
773
|
+
for dimension in dimensions:
|
|
774
|
+
if dimension in REQUIRED_CUSTOM_DIMENSIONS:
|
|
775
|
+
found = True
|
|
776
|
+
break
|
|
777
|
+
|
|
778
|
+
if not found:
|
|
779
|
+
raise ValueError(
|
|
780
|
+
f"At least one of the required dimensions is missing for custom Adjust report: {REQUIRED_CUSTOM_DIMENSIONS}"
|
|
781
|
+
)
|
|
782
|
+
|
|
783
|
+
if len(fields) == 4:
|
|
784
|
+
filters_raw = fields[3]
|
|
785
|
+
filters = parse_filters(filters_raw)
|
|
706
786
|
|
|
707
787
|
return adjust_source(
|
|
708
|
-
start_date=start_date,
|
|
709
|
-
|
|
788
|
+
start_date=start_date,
|
|
789
|
+
end_date=end_date,
|
|
790
|
+
api_key=api_key[0],
|
|
791
|
+
dimensions=dimensions,
|
|
792
|
+
metrics=metrics,
|
|
793
|
+
merge_key=kwargs.get("merge_key"),
|
|
794
|
+
filters=filters,
|
|
795
|
+
).with_resources(table)
|
|
710
796
|
|
|
711
797
|
|
|
712
798
|
class AppsflyerSource:
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "0.9.3"
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
# Adjust
|
|
2
|
-
|
|
3
|
-
[Adjust](https://www.adjust.com/) is a mobile marketing analytics platform that provides solutions for measuring and optimizing campaigns, as well as protecting user data.
|
|
4
|
-
|
|
5
|
-
ingestr supports Adjust as a source.
|
|
6
|
-
|
|
7
|
-
## URI Format
|
|
8
|
-
|
|
9
|
-
The URI format for Adjust is as follows:
|
|
10
|
-
|
|
11
|
-
```plaintext
|
|
12
|
-
adjust://?api_key=<api-key-here>
|
|
13
|
-
```
|
|
14
|
-
|
|
15
|
-
An API token is required to retrieve reports from the Adjust reporting API. please follow the guide to [obtain a API key](https://dev.adjust.com/en/api/rs-api/authentication/).
|
|
16
|
-
|
|
17
|
-
Once you complete the guide, you should have an API key. Let's say your API key is `nr_123`, here's a sample command that will copy the data from Adjust into a duckdb database:
|
|
18
|
-
|
|
19
|
-
```sh
|
|
20
|
-
ingestr ingest --source-uri 'adjust://?api_key=nr_123' --source-table 'campaigns' --dest-uri duckdb:///adjust.duckdb --dest-table 'adjust.output' --interval-start '2024-09-05' --interval-end '2024-09-08'
|
|
21
|
-
```
|
|
22
|
-
|
|
23
|
-
The result of this command will be a table in the `adjust.duckdb` database
|
|
24
|
-
|
|
25
|
-
Available Source Table:
|
|
26
|
-
Adjust source allows ingesting the following source into separate tables:
|
|
27
|
-
|
|
28
|
-
-`Campaigns`: Retrieves data for a campaign, showing the app's revenue and network costs over multiple days.
|
|
29
|
-
|
|
30
|
-
--`Creatives`: Retrieves data for a creative assest, detailing the app's revenue and network costs across multiple days
|
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
from typing import Sequence
|
|
2
|
-
|
|
3
|
-
import dlt
|
|
4
|
-
from dlt.sources import DltResource
|
|
5
|
-
|
|
6
|
-
from .helpers import DEFAULT_DIMENSIONS, AdjustAPI
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
@dlt.source(max_table_nesting=0)
|
|
10
|
-
def adjust_source(
|
|
11
|
-
start_date: str,
|
|
12
|
-
end_date: str,
|
|
13
|
-
api_key: str,
|
|
14
|
-
) -> Sequence[DltResource]:
|
|
15
|
-
@dlt.resource(write_disposition="merge", merge_key="day")
|
|
16
|
-
def campaigns():
|
|
17
|
-
adjust_api = AdjustAPI(api_key=api_key)
|
|
18
|
-
yield from adjust_api.fetch_report_data(
|
|
19
|
-
start_date=start_date,
|
|
20
|
-
end_date=end_date,
|
|
21
|
-
)
|
|
22
|
-
|
|
23
|
-
@dlt.resource(write_disposition="merge", merge_key="day")
|
|
24
|
-
def creatives():
|
|
25
|
-
dimensions = DEFAULT_DIMENSIONS + ["adgroup", "creative"]
|
|
26
|
-
adjust_api = AdjustAPI(api_key=api_key)
|
|
27
|
-
yield from adjust_api.fetch_report_data(
|
|
28
|
-
start_date=start_date, end_date=end_date, dimensions=dimensions
|
|
29
|
-
)
|
|
30
|
-
|
|
31
|
-
return campaigns, creatives
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "0.9.1"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|