ingestr 0.7.8__py3-none-any.whl → 0.8.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

ingestr/main.py CHANGED
@@ -259,6 +259,7 @@ def ingest(
259
259
  },
260
260
  )
261
261
 
262
+ dlt.config["data_writer.buffer_max_items"] = page_size
262
263
  dlt.config["data_writer.file_max_items"] = loader_file_size
263
264
  dlt.config["extract.workers"] = extract_parallelism
264
265
  dlt.config["extract.max_parallel_items"] = extract_parallelism
@@ -0,0 +1,31 @@
1
+ from typing import Sequence
2
+
3
+ import dlt
4
+ from dlt.sources import DltResource
5
+
6
+ from .helpers import DEFAULT_DIMENSIONS, AdjustAPI
7
+
8
+
9
+ @dlt.source(max_table_nesting=0)
10
+ def adjust_source(
11
+ start_date: str,
12
+ end_date: str,
13
+ api_key: str,
14
+ ) -> Sequence[DltResource]:
15
+ @dlt.resource(write_disposition="merge", merge_key="day")
16
+ def campaigns():
17
+ adjust_api = AdjustAPI(api_key=api_key)
18
+ yield from adjust_api.fetch_report_data(
19
+ start_date=start_date,
20
+ end_date=end_date,
21
+ )
22
+
23
+ @dlt.resource(write_disposition="merge", merge_key="day")
24
+ def creatives():
25
+ dimensions = DEFAULT_DIMENSIONS + ["adgroup", "creative"]
26
+ adjust_api = AdjustAPI(api_key=api_key)
27
+ yield from adjust_api.fetch_report_data(
28
+ start_date=start_date, end_date=end_date, dimensions=dimensions
29
+ )
30
+
31
+ return campaigns, creatives
@@ -0,0 +1,82 @@
1
+ import requests
2
+ from dlt.sources.helpers.requests import Client
3
+ from requests.exceptions import HTTPError
4
+
5
+ DEFAULT_DIMENSIONS = ["campaign", "day", "app", "store_type", "channel", "country"]
6
+
7
+ DEFAULT_METRICS = [
8
+ "network_cost",
9
+ "all_revenue_total_d0",
10
+ "ad_revenue_total_d0",
11
+ "revenue_total_d0",
12
+ "all_revenue_total_d1",
13
+ "ad_revenue_total_d1",
14
+ "revenue_total_d1",
15
+ "all_revenue_total_d3",
16
+ "ad_revenue_total_d3",
17
+ "revenue_total_d3",
18
+ "all_revenue_total_d7",
19
+ "ad_revenue_total_d7",
20
+ "revenue_total_d7",
21
+ "all_revenue_total_d14",
22
+ "ad_revenue_total_d14",
23
+ "revenue_total_d14",
24
+ "all_revenue_total_d21",
25
+ ]
26
+
27
+
28
+ class AdjustAPI:
29
+ def __init__(self, api_key):
30
+ self.api_key = api_key
31
+ self.uri = "https://automate.adjust.com/reports-service/report"
32
+
33
+ def fetch_report_data(
34
+ self,
35
+ start_date,
36
+ end_date,
37
+ dimensions=DEFAULT_DIMENSIONS,
38
+ metrics=DEFAULT_METRICS,
39
+ utc_offset="+00:00",
40
+ ad_spend_mode="network",
41
+ attribution_source="first",
42
+ attribution_type="all",
43
+ cohort_maturity="immature",
44
+ reattributed="all",
45
+ sandbox="false",
46
+ ):
47
+ headers = {"Authorization": f"Bearer {self.api_key}"}
48
+ comma_separated_dimensions = ",".join(dimensions)
49
+ comma_separated_metrics = ",".join(metrics)
50
+ params = {
51
+ "date_period": f"{start_date}:{end_date}",
52
+ "dimensions": comma_separated_dimensions,
53
+ "metrics": comma_separated_metrics,
54
+ "utc_offset": utc_offset,
55
+ "ad_spend_mode": ad_spend_mode,
56
+ "attribution_source": attribution_source,
57
+ "attribution_type": attribution_type,
58
+ "cohort_maturity": cohort_maturity,
59
+ "reattributed": reattributed,
60
+ "sandbox": sandbox,
61
+ }
62
+
63
+ def retry_on_limit(
64
+ response: requests.Response, exception: BaseException
65
+ ) -> bool:
66
+ return response.status_code == 429
67
+
68
+ request_client = Client(
69
+ request_timeout=8.0,
70
+ raise_for_status=False,
71
+ retry_condition=retry_on_limit,
72
+ request_max_attempts=12,
73
+ request_backoff_factor=2,
74
+ ).session
75
+
76
+ response = request_client.get(self.uri, headers=headers, params=params)
77
+ if response.status_code == 200:
78
+ result = response.json()
79
+ items = result.get("rows", [])
80
+ yield items
81
+ else:
82
+ raise HTTPError(f"Request failed with status code: {response.status_code}")
@@ -0,0 +1,24 @@
1
+ from typing import Iterable
2
+
3
+ import dlt
4
+ from dlt.common.typing import TDataItem
5
+ from dlt.sources import DltResource
6
+
7
+ from ingestr.src.appsflyer.client import AppsflyerClient
8
+
9
+
10
+ @dlt.source(max_table_nesting=0)
11
+ def appsflyer_source(
12
+ api_key: str, start_date: str, end_date: str
13
+ ) -> Iterable[DltResource]:
14
+ client = AppsflyerClient(api_key)
15
+
16
+ @dlt.resource(write_disposition="merge", merge_key="install_time")
17
+ def campaigns() -> Iterable[TDataItem]:
18
+ yield from client.fetch_campaigns(start_date, end_date)
19
+
20
+ @dlt.resource(write_disposition="merge", merge_key="install_time")
21
+ def creatives() -> Iterable[TDataItem]:
22
+ yield from client.fetch_creatives(start_date, end_date)
23
+
24
+ return campaigns, creatives
@@ -0,0 +1,106 @@
1
+ from typing import Optional
2
+
3
+ import requests
4
+ from dlt.sources.helpers.requests import Client
5
+ from requests.exceptions import HTTPError
6
+
7
+ DEFAULT_GROUPING = ["c", "geo", "app_id", "install_time"]
8
+ DEFAULT_KPIS = [
9
+ "impressions",
10
+ "clicks",
11
+ "installs",
12
+ "cost",
13
+ "revenue",
14
+ "average_ecpi",
15
+ "loyal_users",
16
+ "uninstalls",
17
+ "roi",
18
+ ]
19
+
20
+
21
+ class AppsflyerClient:
22
+ def __init__(self, api_key: str):
23
+ self.api_key = api_key
24
+ self.uri = "https://hq1.appsflyer.com/api/master-agg-data/v4/app/all"
25
+
26
+ def __get_headers(self):
27
+ return {
28
+ "Authorization": f"{self.api_key}",
29
+ "accept": "text/json",
30
+ }
31
+
32
+ def _fetch_data(
33
+ self,
34
+ from_date: str,
35
+ to_date: str,
36
+ maximum_rows=1000000,
37
+ dimensions=DEFAULT_GROUPING,
38
+ metrics=DEFAULT_KPIS,
39
+ ):
40
+ params = {
41
+ "from": from_date,
42
+ "to": to_date,
43
+ "groupings": ",".join(dimensions),
44
+ "kpis": ",".join(metrics),
45
+ "format": "json",
46
+ "maximum_rows": maximum_rows,
47
+ }
48
+
49
+ def retry_on_limit(
50
+ response: Optional[requests.Response], exception: Optional[BaseException]
51
+ ) -> bool:
52
+ return (
53
+ isinstance(response, requests.Response) and response.status_code == 429
54
+ )
55
+
56
+ request_client = Client(
57
+ request_timeout=10.0,
58
+ raise_for_status=False,
59
+ retry_condition=retry_on_limit,
60
+ request_max_attempts=12,
61
+ request_backoff_factor=2,
62
+ ).session
63
+
64
+ try:
65
+ response = request_client.get(
66
+ url=self.uri, headers=self.__get_headers(), params=params
67
+ )
68
+
69
+ if response.status_code == 200:
70
+ result = response.json()
71
+ yield result
72
+ else:
73
+ raise HTTPError(
74
+ f"Request failed with status code: {response.status_code}"
75
+ )
76
+
77
+ except requests.RequestException as e:
78
+ raise HTTPError(f"Request failed: {e}")
79
+
80
+ def fetch_campaigns(
81
+ self,
82
+ start_date: str,
83
+ end_date: str,
84
+ ):
85
+ metrics = DEFAULT_KPIS + [
86
+ "cohort_day_1_revenue_per_user",
87
+ "cohort_day_1_total_revenue_per_user",
88
+ "cohort_day_3_revenue_per_user",
89
+ "cohort_day_3_total_revenue_per_user",
90
+ "cohort_day_7_total_revenue_per_user",
91
+ "cohort_day_7_revenue_per_user",
92
+ "cohort_day_14_total_revenue_per_user",
93
+ "cohort_day_14_revenue_per_user",
94
+ "cohort_day_21_total_revenue_per_user",
95
+ "cohort_day_21_revenue_per_user",
96
+ "retention_day_7",
97
+ ]
98
+ return self._fetch_data(start_date, end_date, metrics=metrics)
99
+
100
+ def fetch_creatives(
101
+ self,
102
+ start_date: str,
103
+ end_date: str,
104
+ ):
105
+ dimensions = DEFAULT_GROUPING + ["af_adset_id", "af_adset", "af_ad_id"]
106
+ return self._fetch_data(start_date, end_date, dimensions=dimensions)
ingestr/src/factory.py CHANGED
@@ -15,7 +15,9 @@ from ingestr.src.destinations import (
15
15
  SynapseDestination,
16
16
  )
17
17
  from ingestr.src.sources import (
18
+ AdjustSource,
18
19
  AirtableSource,
20
+ AppsflyerSource,
19
21
  ChessSource,
20
22
  FacebookAdsSource,
21
23
  GoogleSheetsSource,
@@ -124,8 +126,12 @@ class SourceDestinationFactory:
124
126
  return AirtableSource()
125
127
  elif self.source_scheme == "klaviyo":
126
128
  return KlaviyoSource()
129
+ elif self.source_scheme == "appsflyer":
130
+ return AppsflyerSource()
127
131
  elif self.source_scheme == "kafka":
128
132
  return KafkaSource()
133
+ elif self.source_scheme == "adjust":
134
+ return AdjustSource()
129
135
  else:
130
136
  raise ValueError(f"Unsupported source scheme: {self.source_scheme}")
131
137