ingestr 0.13.11__py3-none-any.whl → 0.13.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,4 @@
1
+ from datetime import timedelta
1
2
  from typing import Iterator
2
3
 
3
4
  import dlt
@@ -11,20 +12,23 @@ from pendulum.date import Date
11
12
 
12
13
  @dlt.source(max_table_nesting=0)
13
14
  def applovin_max_source(
14
- start_date: str,
15
- application: str,
15
+ start_date: Date,
16
+ applications: list[str],
16
17
  api_key: str,
17
- end_date: str | None,
18
+ end_date: Date | None,
18
19
  ) -> DltResource:
19
20
  @dlt.resource(
20
- name="ad_revenue",
21
+ name="user_ad_revenue",
21
22
  write_disposition="merge",
22
- merge_key="_partition_date",
23
+ merge_key="partition_date",
24
+ columns={
25
+ "partition_date": {"data_type": "date", "partition": True},
26
+ },
23
27
  )
24
28
  def fetch_ad_revenue_report(
25
29
  dateTime=(
26
30
  dlt.sources.incremental(
27
- "_partition_date",
31
+ "partition_date",
28
32
  initial_value=start_date,
29
33
  end_value=end_date,
30
34
  range_start="closed",
@@ -33,18 +37,31 @@ def applovin_max_source(
33
37
  ),
34
38
  ) -> Iterator[dict]:
35
39
  url = "https://r.applovin.com/max/userAdRevenueReport"
36
- start_date = pendulum.from_format(dateTime.last_value, "YYYY-MM-DD").date()
40
+ start_date = dateTime.last_value
41
+
37
42
  if dateTime.end_value is None:
38
43
  end_date = (pendulum.yesterday("UTC")).date()
39
44
  else:
40
- end_date = pendulum.from_format(dateTime.end_value, "YYYY-MM-DD").date()
41
- yield get_data(
42
- url=url,
43
- start_date=start_date,
44
- end_date=end_date,
45
- application=application,
46
- api_key=api_key,
47
- )
45
+ end_date = dateTime.end_value
46
+
47
+ client = create_client()
48
+ platforms = ["ios", "android", "fireos"]
49
+
50
+ for app in applications:
51
+ current_date = start_date
52
+ while current_date <= end_date:
53
+ for platform in platforms:
54
+ df = get_data(
55
+ url=url,
56
+ current_date=current_date,
57
+ application=app,
58
+ api_key=api_key,
59
+ client=client,
60
+ platform=platform,
61
+ )
62
+ if df is not None:
63
+ yield df
64
+ current_date = current_date + timedelta(days=1)
48
65
 
49
66
  return fetch_ad_revenue_report
50
67
 
@@ -67,33 +84,32 @@ def retry_on_limit(
67
84
 
68
85
 
69
86
  def get_data(
70
- url: str, start_date: Date, end_date: Date, application: str, api_key: str
87
+ url: str,
88
+ current_date: Date,
89
+ application: str,
90
+ api_key: str,
91
+ platform: str,
92
+ client: requests.Session,
71
93
  ):
72
- client = create_client()
73
- platforms = ["ios", "android", "fireos"]
74
- current_date = start_date
75
- while current_date <= end_date:
76
- for platform in platforms:
77
- params = {
78
- "api_key": api_key,
79
- "date": current_date.strftime("%Y-%m-%d"),
80
- "platform": platform,
81
- "application": application,
82
- "aggregated": "false",
83
- }
84
-
85
- response = client.get(url=url, params=params)
86
-
87
- if response.status_code == 400:
88
- raise ValueError(response.text)
89
-
90
- if response.status_code != 200:
91
- continue
94
+ params = {
95
+ "api_key": api_key,
96
+ "date": current_date.isoformat(),
97
+ "platform": platform,
98
+ "application": application,
99
+ "aggregated": "false",
100
+ }
92
101
 
93
- response_url = response.json().get("ad_revenue_report_url")
94
- df = pd.read_csv(response_url)
95
- df["Date"] = pd.to_datetime(df["Date"])
96
- df["_partition_date"] = df["Date"].dt.strftime("%Y-%m-%d")
97
- yield df
102
+ response = client.get(url=url, params=params)
98
103
 
99
- current_date = current_date.add(days=1)
104
+ if response.status_code != 200:
105
+ if response.status_code == 404:
106
+ if "No Mediation App Id found for platform" in response.text:
107
+ return None
108
+ error_message = f"AppLovin MAX API error (status {response.status_code}): {response.text}"
109
+ raise requests.HTTPError(error_message)
110
+
111
+ response_url = response.json().get("ad_revenue_report_url")
112
+ df = pd.read_csv(response_url)
113
+ df["Date"] = pd.to_datetime(df["Date"])
114
+ df["partition_date"] = df["Date"].dt.date
115
+ return df
ingestr/src/buildinfo.py CHANGED
@@ -1 +1 @@
1
- version = "v0.13.11"
1
+ version = "v0.13.13"
ingestr/src/factory.py CHANGED
@@ -41,6 +41,7 @@ from ingestr.src.sources import (
41
41
  LocalCsvSource,
42
42
  MongoDbSource,
43
43
  NotionSource,
44
+ PersonioSource,
44
45
  S3Source,
45
46
  SalesforceSource,
46
47
  ShopifySource,
@@ -49,7 +50,6 @@ from ingestr.src.sources import (
49
50
  StripeAnalyticsSource,
50
51
  TikTokSource,
51
52
  ZendeskSource,
52
- PersonioSource,
53
53
  )
54
54
 
55
55
  SQL_SOURCE_SCHEMES = [
@@ -165,7 +165,7 @@ def personio_source(
165
165
  Returns:
166
166
  Iterable: A generator of attendances.
167
167
  """
168
-
168
+
169
169
  end_date = end_date or pendulum.now()
170
170
  if updated_at.last_value:
171
171
  updated_iso = updated_at.last_value.format("YYYY-MM-DDTHH:mm:ss")
@@ -1,4 +1,5 @@
1
1
  """Personio source helpers"""
2
+
2
3
  from typing import Any, Iterable, Optional
3
4
  from urllib.parse import urljoin
4
5
 
ingestr/src/sources.py CHANGED
@@ -402,6 +402,7 @@ class LocalCsvSource:
402
402
  if inc_value < incremental.start_value:
403
403
  continue
404
404
 
405
+ dictionary = self.remove_empty_columns(dictionary)
405
406
  page.append(dictionary)
406
407
  current_items += 1
407
408
  else:
@@ -425,6 +426,9 @@ class LocalCsvSource:
425
426
  )
426
427
  )
427
428
 
429
+ def remove_empty_columns(self, row: Dict[str, str]) -> Dict[str, str]:
430
+ return {k: v for k, v in row.items() if v.strip() != ""}
431
+
428
432
 
429
433
  class NotionSource:
430
434
  table_builder: Callable
@@ -1829,6 +1833,9 @@ class AppLovinSource:
1829
1833
 
1830
1834
 
1831
1835
  class ApplovinMaxSource:
1836
+ #expected uri format: applovinmax://?api_key=<api_key>
1837
+ #expected table format: user_ad_revenue:app_id_1,app_id_2
1838
+
1832
1839
  def handles_incrementality(self) -> bool:
1833
1840
  return True
1834
1841
 
@@ -1839,38 +1846,54 @@ class ApplovinMaxSource:
1839
1846
  api_key = params.get("api_key")
1840
1847
  if api_key is None:
1841
1848
  raise ValueError("api_key is required to connect to AppLovin Max API.")
1849
+
1850
+ AVAILABLE_TABLES = ["user_ad_revenue"]
1842
1851
 
1843
- application = params.get("application")
1844
- if application is None:
1845
- raise ValueError("application is required to connect to AppLovin Max API.")
1846
-
1847
- interval_start = kwargs.get("interval_start")
1848
- interval_end = kwargs.get("interval_end")
1852
+ table_fields = table.split(":")
1853
+ requested_table = table_fields[0]
1849
1854
 
1850
- if "ad_revenue" in table:
1851
- table = "ad_revenue"
1852
- else:
1855
+ if len(table_fields) != 2:
1856
+ raise ValueError(
1857
+ "Invalid table format. Expected format is user_ad_revenue:app_id_1,app_id_2"
1858
+ )
1859
+
1860
+ if requested_table not in AVAILABLE_TABLES:
1861
+ raise ValueError(
1862
+ f"Table name '{requested_table}' is not supported for AppLovin Max source yet."
1863
+ f"Only '{AVAILABLE_TABLES}' are currently supported. "
1864
+ "If you need additional tables, please create a GitHub issue at "
1865
+ "https://github.com/bruin-data/ingestr"
1866
+ )
1867
+
1868
+ applications = [i for i in table_fields[1].replace(" ", "").split(",") if i.strip()]
1869
+ if len(applications) == 0:
1853
1870
  raise ValueError(
1854
- f"Table name '{table}' is not supported for AppLovin Max source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
1871
+ "At least one application id is required"
1855
1872
  )
1873
+
1874
+ if len(applications) != len(set(applications)):
1875
+ raise ValueError(
1876
+ "Application ids must be unique."
1877
+ )
1878
+
1879
+ interval_start = kwargs.get("interval_start")
1880
+ interval_end = kwargs.get("interval_end")
1856
1881
 
1857
1882
  now = pendulum.now("UTC")
1858
1883
  default_start = now.subtract(days=30).date()
1859
1884
 
1860
1885
  start_date = (
1861
- interval_start if interval_start is not None else default_start
1862
- ).strftime("%Y-%m-%d")
1863
-
1864
- end_date = (
1865
- interval_end.strftime("%Y-%m-%d") if interval_end is not None else None
1886
+ interval_start.date() if interval_start is not None else default_start
1866
1887
  )
1867
1888
 
1889
+ end_date = interval_end.date() if interval_end is not None else None
1890
+
1868
1891
  return applovin_max_source(
1869
1892
  start_date=start_date,
1870
1893
  end_date=end_date,
1871
1894
  api_key=api_key[0],
1872
- application=application[0],
1873
- ).with_resources(table)
1895
+ applications=applications,
1896
+ ).with_resources(requested_table)
1874
1897
 
1875
1898
 
1876
1899
  class SalesforceSource:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.13.11
3
+ Version: 0.13.13
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -57,6 +57,7 @@ Requires-Dist: stripe==10.7.0
57
57
  Requires-Dist: tqdm==4.67.1
58
58
  Requires-Dist: typer==0.13.1
59
59
  Requires-Dist: types-requests==2.32.0.20240907
60
+ Requires-Dist: zstd==1.5.6.1
60
61
  Provides-Extra: odbc
61
62
  Requires-Dist: pyodbc==5.1.0; extra == 'odbc'
62
63
  Provides-Extra: oracle
@@ -149,7 +150,7 @@ Pull requests are welcome. However, please open an issue first to discuss what y
149
150
  </tr>
150
151
  <tr>
151
152
  <td>ClickHouse</td>
152
- <td>❌</td>
153
+ <td>✅</td>
153
154
  <td>✅</td>
154
155
  </tr>
155
156
  <tr>
@@ -1,13 +1,13 @@
1
1
  ingestr/main.py,sha256=ufn8AcM2ID80ChUApJzYDjnQaurMXOkYfTm6GzAggSQ,24746
2
2
  ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
3
3
  ingestr/src/blob.py,sha256=LtEZWoUhm5i2aKerdgEpLtNCf3fdhGGMM4td-LRZVbY,1407
4
- ingestr/src/buildinfo.py,sha256=PnFKBMVizeXpYaYJ6rkY9m_oU0QCJzbLAOJyEQ8gyRg,21
4
+ ingestr/src/buildinfo.py,sha256=hLtrn_2NNb0KhD_VpjlyF4uvE7jjsD2W10llc0aDpxE,21
5
5
  ingestr/src/destinations.py,sha256=vrGij4qMPCdXTMIimROWBJFqzOqCM4DFmgyubgSHejA,11279
6
6
  ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
7
- ingestr/src/factory.py,sha256=dOdY4fzeQ-2dgFBGIDFD5ilxpYNfCVqQOureuWzOL-w,5127
7
+ ingestr/src/factory.py,sha256=lvq-RTdzCqok-F0IVCZN0Q9ALVnuhNT06T1ju3lnPn8,5127
8
8
  ingestr/src/filters.py,sha256=0JQXeAr2APFMnW2sd-6BlAMWv93bXV17j8b5MM8sHmM,580
9
9
  ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
10
- ingestr/src/sources.py,sha256=YlWokgTZoeMQ6PVb9UVU3I99R0cdhkYjEzPf5LNGs30,68582
10
+ ingestr/src/sources.py,sha256=vCZ20jVD5jCCNyTxR7VxNXpJFCE218t0ydbaZwls48M,69510
11
11
  ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
12
12
  ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
13
13
  ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
@@ -15,7 +15,7 @@ ingestr/src/adjust/__init__.py,sha256=ULjtJqrNS6XDvUyGl0tjl12-tLyXlCgeFe2icTbtu3
15
15
  ingestr/src/adjust/adjust_helpers.py,sha256=av97NPSn-hQtTbAC0vUSCAWYePmOiG5R-DGdMssm7FQ,3646
16
16
  ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
17
17
  ingestr/src/applovin/__init__.py,sha256=X_YCLppPrnL8KXfYWICE_uDfMzHHH3JZ-DBGZ1RlaOI,6984
18
- ingestr/src/applovin_max/__init__.py,sha256=1NUOeJzRyZZQ95KEirbrlSrk-8SNc9JrlM_5pGgBgHg,2878
18
+ ingestr/src/applovin_max/__init__.py,sha256=o0aL4jBZqwK528MVw9dS1G5EZbF4tx6_Ef0IfqkhAT0,3294
19
19
  ingestr/src/appsflyer/_init_.py,sha256=ne2-9FQ654Drtd3GkKQv8Bwb6LEqCnJw49MfO5Jyzgs,739
20
20
  ingestr/src/appsflyer/client.py,sha256=TNmwakLzmO6DZW3wcfLfQRl7aNBHgFqSsk4ef-MmJ1w,3084
21
21
  ingestr/src/appstore/__init__.py,sha256=3P4VZH2WJF477QjW19jMTwu6L8DXcLkYSdutnvp3AmM,4742
@@ -74,8 +74,8 @@ ingestr/src/notion/settings.py,sha256=MwQVZViJtnvOegfjXYc_pJ50oUYgSRPgwqu7TvpeMO
74
74
  ingestr/src/notion/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
75
75
  ingestr/src/notion/helpers/client.py,sha256=QXuudkf5Zzff98HRsCqA1g1EZWIrnfn1falPrnKg_y4,5500
76
76
  ingestr/src/notion/helpers/database.py,sha256=gigPibTeVefP3lA-8w4aOwX67pj7RlciPk5koDs1ry8,2737
77
- ingestr/src/personio/__init__.py,sha256=CQ8XX8Q8BG-wgoen3emhe_r8Cx414Fux7P8jQNawWvY,11646
78
- ingestr/src/personio/helpers.py,sha256=OmeMzfg4MVtpI7f75D3-9OGZb8SDsKyz0svNm1zJLTw,2900
77
+ ingestr/src/personio/__init__.py,sha256=sHYpoV-rg-kA1YsflctChis0hKcTrL6mka9O0CHV4zA,11638
78
+ ingestr/src/personio/helpers.py,sha256=EKmBN0Lf4R0lc3yqqs7D-RjoZ75E8gPcctt59xwHxrY,2901
79
79
  ingestr/src/salesforce/__init__.py,sha256=2hik5pRrxVODdDTlUEMoyccNC07zozjnxkMHcjMT1qA,4558
80
80
  ingestr/src/salesforce/helpers.py,sha256=QTdazBt-qRTBbCQMZnyclIaDQFmBixBy_RDKD00Lt-8,2492
81
81
  ingestr/src/shopify/__init__.py,sha256=PF_6VQnS065Br1UzSIekTVXBu3WtrMQL_v5CfbfaX5Y,63151
@@ -108,8 +108,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
108
108
  ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
109
109
  ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
110
110
  ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
111
- ingestr-0.13.11.dist-info/METADATA,sha256=8vjvshEDHgAZEMt3ykbUSlEl_Ky0KtHf6p6vjT6RDGI,9171
112
- ingestr-0.13.11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
113
- ingestr-0.13.11.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
114
- ingestr-0.13.11.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
115
- ingestr-0.13.11.dist-info/RECORD,,
111
+ ingestr-0.13.13.dist-info/METADATA,sha256=D8jPk86Pl8XTtLvx2VFSbjC-wqCz9X0r8K74q29T26o,9200
112
+ ingestr-0.13.13.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
113
+ ingestr-0.13.13.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
114
+ ingestr-0.13.13.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
115
+ ingestr-0.13.13.dist-info/RECORD,,