ingestr 0.12.2__py3-none-any.whl → 0.12.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

@@ -1,4 +1,4 @@
1
- from typing import Iterable, Optional
1
+ from typing import Iterable
2
2
 
3
3
  import dlt
4
4
  import pendulum
@@ -8,6 +8,39 @@ from dlt.sources import DltResource
8
8
 
9
9
  from .tiktok_helpers import TikTokAPI
10
10
 
11
+ KNOWN_TYPE_HINTS = {
12
+ "spend": {"data_type": "decimal"},
13
+ "billed_cost": {"data_type": "decimal"},
14
+ "cash_spend": {"data_type": "decimal"},
15
+ "voucher_spend": {"data_type": "decimal"},
16
+ "cpc": {"data_type": "decimal"},
17
+ "cpm": {"data_type": "decimal"},
18
+ "impressions": {"data_type": "bigint"},
19
+ "gross_impressions": {"data_type": "bigint"},
20
+ "clicks": {"data_type": "bigint"},
21
+ "ctr": {"data_type": "decimal"},
22
+ "reach": {"data_type": "bigint"},
23
+ "cost_per_1000_reached": {"data_type": "decimal"},
24
+ "frequency": {"data_type": "decimal"},
25
+ "conversion": {"data_type": "bigint"},
26
+ "cost_per_conversion": {"data_type": "decimal"},
27
+ "conversion_rate": {"data_type": "decimal"},
28
+ "conversion_rate_v2": {"data_type": "decimal"},
29
+ "real_time_conversion": {"data_type": "bigint"},
30
+ "real_time_cost_per_conversion": {"data_type": "decimal"},
31
+ "real_time_conversion_rate": {"data_type": "decimal"},
32
+ "real_time_conversion_rate_v2": {"data_type": "decimal"},
33
+ "result": {"data_type": "bigint"},
34
+ "cost_per_result": {"data_type": "decimal"},
35
+ "result_rate": {"data_type": "decimal"},
36
+ "real_time_result": {"data_type": "bigint"},
37
+ "real_time_cost_per_result": {"data_type": "decimal"},
38
+ "real_time_result_rate": {"data_type": "decimal"},
39
+ "secondary_goal_result": {"data_type": "bigint"},
40
+ "cost_per_secondary_goal_result": {"data_type": "decimal"},
41
+ "secondary_goal_result_rate": {"data_type": "decimal"},
42
+ }
43
+
11
44
 
12
45
  def find_intervals(
13
46
  current_date: pendulum.DateTime,
@@ -23,42 +56,27 @@ def find_intervals(
23
56
  return intervals
24
57
 
25
58
 
26
- def fetch_tiktok_reports(
27
- tiktok_api: TikTokAPI,
28
- current_date: pendulum.DateTime,
29
- interval_end: pendulum.DateTime,
30
- advertiser_id: str,
31
- dimensions: list[str],
32
- metrics: list[str],
33
- filters: Optional[dict] | None,
34
- ) -> Iterable[TDataItem]:
35
- try:
36
- yield from tiktok_api.fetch_pages(
37
- advertiser_id=advertiser_id,
38
- start_time=current_date,
39
- end_time=interval_end,
40
- dimensions=dimensions,
41
- metrics=metrics,
42
- filters=None,
43
- )
44
- except Exception as e:
45
- raise RuntimeError(f"Error fetching TikTok report: {e}")
46
-
47
-
48
59
  @dlt.source(max_table_nesting=0)
49
60
  def tiktok_source(
50
61
  start_date: pendulum.DateTime,
51
62
  end_date: pendulum.DateTime,
52
63
  access_token: str,
53
- advertiser_id: str,
54
- time_zone: str,
64
+ advertiser_ids: list[str],
65
+ timezone: str,
55
66
  page_size: int,
67
+ filtering_param: bool,
68
+ filter_name: str,
69
+ filter_value: list[int],
56
70
  dimensions: list[str],
57
71
  metrics: list[str],
58
- filters=None,
59
72
  ) -> DltResource:
60
73
  tiktok_api = TikTokAPI(
61
- access_token=access_token, time_zone=time_zone, page_size=page_size
74
+ access_token=access_token,
75
+ timezone=timezone,
76
+ page_size=page_size,
77
+ filtering_param=filtering_param,
78
+ filter_name=filter_name,
79
+ filter_value=filter_value,
62
80
  )
63
81
  incremental_loading_param = ""
64
82
  is_incremental = False
@@ -74,17 +92,34 @@ def tiktok_source(
74
92
  is_incremental = True
75
93
  interval_days = 0
76
94
 
77
- @dlt.resource(write_disposition="merge", primary_key=dimensions)
95
+ type_hints = {
96
+ "advertiser_id": {"data_type": "text"},
97
+ }
98
+ for dimension in dimensions:
99
+ if dimension in KNOWN_TYPE_HINTS:
100
+ type_hints[dimension] = KNOWN_TYPE_HINTS[dimension]
101
+ for metric in metrics:
102
+ if metric in KNOWN_TYPE_HINTS:
103
+ type_hints[metric] = KNOWN_TYPE_HINTS[metric]
104
+
105
+ @dlt.resource(
106
+ write_disposition="merge",
107
+ primary_key=dimensions + ["advertiser_id"],
108
+ columns=type_hints,
109
+ parallelized=True,
110
+ )
78
111
  def custom_reports(
79
- datetime=dlt.sources.incremental(incremental_loading_param, start_date)
80
- if is_incremental
81
- else None,
112
+ datetime=(
113
+ dlt.sources.incremental(incremental_loading_param, start_date)
114
+ if is_incremental
115
+ else None
116
+ ),
82
117
  ) -> Iterable[TDataItem]:
83
- current_date = start_date.in_tz(time_zone)
118
+ current_date = start_date.in_tz(timezone)
84
119
 
85
120
  if datetime is not None:
86
121
  datetime_str = datetime.last_value
87
- current_date = ensure_pendulum_datetime(datetime_str).in_tz(time_zone)
122
+ current_date = ensure_pendulum_datetime(datetime_str).in_tz(timezone)
88
123
 
89
124
  list_of_interval = find_intervals(
90
125
  current_date=current_date,
@@ -93,14 +128,12 @@ def tiktok_source(
93
128
  )
94
129
 
95
130
  for start, end in list_of_interval:
96
- yield from fetch_tiktok_reports(
97
- tiktok_api=tiktok_api,
98
- current_date=start,
99
- interval_end=end,
100
- advertiser_id=advertiser_id,
131
+ yield tiktok_api.fetch_pages(
132
+ advertiser_ids=advertiser_ids,
133
+ start_time=start,
134
+ end_time=end,
101
135
  dimensions=dimensions,
102
136
  metrics=metrics,
103
- filters=None,
104
137
  )
105
138
 
106
139
  return custom_reports
@@ -25,17 +25,17 @@ def create_client() -> requests.Session:
25
25
  ).session
26
26
 
27
27
 
28
- def flat_structure(items, time_zone="UTC"):
28
+ def flat_structure(items, timezone="UTC"):
29
29
  for item in items:
30
30
  if "dimensions" in item:
31
31
  for key, value in item["dimensions"].items():
32
32
  if key == "stat_time_day":
33
33
  item["stat_time_day"] = ensure_pendulum_datetime(value).in_tz(
34
- time_zone
34
+ timezone
35
35
  )
36
36
  elif key == "stat_time_hour":
37
37
  item["stat_time_hour"] = ensure_pendulum_datetime(value).in_tz(
38
- time_zone
38
+ timezone
39
39
  )
40
40
  else:
41
41
  item[key] = value
@@ -49,15 +49,26 @@ def flat_structure(items, time_zone="UTC"):
49
49
 
50
50
 
51
51
  class TikTokAPI:
52
- def __init__(self, access_token, time_zone, page_size):
52
+ def __init__(
53
+ self,
54
+ access_token,
55
+ timezone,
56
+ page_size,
57
+ filtering_param,
58
+ filter_name,
59
+ filter_value,
60
+ ):
53
61
  self.headers = {
54
62
  "Access-Token": access_token,
55
63
  }
56
- self.time_zone = time_zone
64
+ self.timezone = timezone
57
65
  self.page_size = page_size
66
+ self.filtering_param = filtering_param
67
+ self.filter_name = filter_name
68
+ self.filter_value = filter_value
58
69
 
59
70
  def fetch_pages(
60
- self, advertiser_id: str, start_time, end_time, dimensions, metrics, filters
71
+ self, advertiser_ids: list[str], start_time, end_time, dimensions, metrics
61
72
  ):
62
73
  data_level_mapping = {
63
74
  "advertiser_id": "AUCTION_ADVERTISER",
@@ -75,8 +86,15 @@ class TikTokAPI:
75
86
  start_time = ensure_pendulum_datetime(start_time).to_date_string()
76
87
  end_time = ensure_pendulum_datetime(end_time).to_date_string()
77
88
 
78
- self.params = {
79
- "advertiser_id": advertiser_id,
89
+ filtering = [
90
+ {
91
+ "field_name": self.filter_name,
92
+ "filter_type": "IN",
93
+ "filter_value": json.dumps(self.filter_value),
94
+ }
95
+ ]
96
+ params = {
97
+ "advertiser_ids": json.dumps(advertiser_ids),
80
98
  "report_type": "BASIC",
81
99
  "data_level": data_level,
82
100
  "start_date": start_time,
@@ -85,12 +103,13 @@ class TikTokAPI:
85
103
  "dimensions": json.dumps(dimensions),
86
104
  "metrics": json.dumps(metrics),
87
105
  }
106
+
107
+ if self.filtering_param:
108
+ params["filtering"] = json.dumps(filtering)
88
109
  client = create_client()
89
110
  while True:
90
- self.params["page"] = current_page
91
- response = client.get(
92
- url=BASE_URL, headers=self.headers, params=self.params
93
- )
111
+ params["page"] = current_page
112
+ response = client.get(url=BASE_URL, headers=self.headers, params=params)
94
113
 
95
114
  result = response.json()
96
115
  if result.get("message") != "OK":
@@ -99,7 +118,7 @@ class TikTokAPI:
99
118
  result_data = result.get("data", {})
100
119
  items = result_data.get("list", [])
101
120
 
102
- flat_structure(items=items, time_zone=self.time_zone)
121
+ flat_structure(items=items, timezone=self.timezone)
103
122
 
104
123
  yield items
105
124
 
ingestr/src/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.12.2"
1
+ __version__ = "0.12.4"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.12.2
3
+ Version: 0.12.4
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -21,6 +21,7 @@ Requires-Dist: dlt==1.4.0
21
21
  Requires-Dist: duckdb-engine==0.13.5
22
22
  Requires-Dist: duckdb==1.1.3
23
23
  Requires-Dist: facebook-business==20.0.0
24
+ Requires-Dist: google-analytics-data==0.18.15
24
25
  Requires-Dist: google-api-python-client==2.130.0
25
26
  Requires-Dist: google-cloud-bigquery-storage==2.24.0
26
27
  Requires-Dist: mysql-connector-python==9.1.0
@@ -1,12 +1,12 @@
1
- ingestr/main.py,sha256=wkU2uLMy1q8YarJ9mXNfJepeRjp6AuPDeNDOmMUt6n0,22309
1
+ ingestr/main.py,sha256=AG6ycOEpCyBN1qEOzW3j8sKK8KX0mrBAL-A25MdRldY,24712
2
2
  ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
3
3
  ingestr/src/destinations.py,sha256=zcHJIIHAZmcD9sJomd6G1Bc-1KsxnBD2aByOSV_9L3g,8850
4
- ingestr/src/factory.py,sha256=UyE1TzTHn_V8JZno5SSYfQsho1eFYzzvOylogw4S49E,4389
4
+ ingestr/src/factory.py,sha256=aE7TjHzONb4DKYcfh_6-CJJfvs4lmw7iUySvSm4yQbM,4516
5
5
  ingestr/src/filters.py,sha256=0JQXeAr2APFMnW2sd-6BlAMWv93bXV17j8b5MM8sHmM,580
6
- ingestr/src/sources.py,sha256=QCyfkhLl5jgmosZUeh4BTrmqHk74Vus7zLgk_MBdPhc,41096
6
+ ingestr/src/sources.py,sha256=zkK24y3jyucbrW2MU3i0Rx1SImZWatM9_A_8Wa7ExCM,51887
7
7
  ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
8
8
  ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
9
- ingestr/src/version.py,sha256=NJQQPiZZfrBXFMqZlsia0JrhloS2PexbdxYYUs0c2Us,23
9
+ ingestr/src/version.py,sha256=DoMS9KOhsApLyuLYhLEsd5nmoLFQ_IvVkEs_jKRzFk8,23
10
10
  ingestr/src/adjust/__init__.py,sha256=NaRNwDhItG8Q7vUHw7zQvyfWjmT32M0CSc5ufjmBM9U,3067
11
11
  ingestr/src/adjust/adjust_helpers.py,sha256=-tmmxy9k3wms-ZEIgxmlp2cAQ2X_O1lgjY1128bbMu4,3224
12
12
  ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
@@ -27,6 +27,13 @@ ingestr/src/facebook_ads/settings.py,sha256=1IxZeP_4rN3IBvAncNHOoqpzAirx0Hz-MUK_
27
27
  ingestr/src/filesystem/__init__.py,sha256=wHHaKFuAjsR_ZRjl6g_Flf6FhVs9qhwREthTr03_7cc,4162
28
28
  ingestr/src/filesystem/helpers.py,sha256=bg0muSHZr3hMa8H4jN2-LGWzI-SUoKlQNiWJ74-YYms,3211
29
29
  ingestr/src/filesystem/readers.py,sha256=a0fKkaRpnAOGsXI3EBNYZa7x6tlmAOsgRzb883StY30,3987
30
+ ingestr/src/github/__init__.py,sha256=csA2VcjOxXrVrvp7zY-JodO9Lpy98bJ4AqRdHCLTcGM,5838
31
+ ingestr/src/github/helpers.py,sha256=Tmnik9811zBWNO6cJwV9PFQxEx2j32LHAQCvNbubsEI,6759
32
+ ingestr/src/github/queries.py,sha256=W34C02jUEdjFmOE7f7u9xvYyBNDMfVZAu0JIRZI2mkU,2302
33
+ ingestr/src/github/settings.py,sha256=N5ahWrDIQ_4IWV9i-hTXxyYduqY9Ym2BTwqsWxcDdJ8,258
34
+ ingestr/src/google_analytics/__init__.py,sha256=HjA13wfJm2MGfy3h_DiM5ekkNqM2dgwYCKJ3pprnDtI,2482
35
+ ingestr/src/google_analytics/helpers/__init__.py,sha256=y_q7dinlEwNBEpq6kCzjTa8lAhe2bb23bDPP0fcy7fY,2744
36
+ ingestr/src/google_analytics/helpers/data_processing.py,sha256=fIdEKr9CmZN_s1T2i9BL8IYTPPqNoK6Vaquq2y8StfE,6072
30
37
  ingestr/src/google_sheets/README.md,sha256=wFQhvmGpRA38Ba2N_WIax6duyD4c7c_pwvvprRfQDnw,5470
31
38
  ingestr/src/google_sheets/__init__.py,sha256=5qlX-6ilx5MW7klC7B_0jGSxloQSLkSESTh4nlY3Aos,6643
32
39
  ingestr/src/google_sheets/helpers/__init__.py,sha256=5hXZrZK8cMO3UOuL-s4OKOpdACdihQD0hYYlSEu-iQ8,35
@@ -61,8 +68,8 @@ ingestr/src/stripe_analytics/helpers.py,sha256=iqZOyiGIOhOAhVXXU16DP0hkkTKcTrDu6
61
68
  ingestr/src/stripe_analytics/settings.py,sha256=rl9L5XumxO0pjkZf7MGesXHp4QLRgnz3RWLuDWDBKXo,380
62
69
  ingestr/src/telemetry/event.py,sha256=MpWc5tt0lSJ1pWKe9HQ11BHrcPBxSH40l4wjZi9u0tI,924
63
70
  ingestr/src/testdata/fakebqcredentials.json,sha256=scc6TUc963KAbKTLZCfcmqVzbtzDCW1_8JNRnyAXyy8,628
64
- ingestr/src/tiktok_ads/__init__.py,sha256=vJjVxEw3W1Rvc2QDQbox_8Ma0Cp1RT7iKsQ9MAv6Cgc,3036
65
- ingestr/src/tiktok_ads/tiktok_helpers.py,sha256=lY7yWl_aJh5Hj-bVvt07MHvhfvXnghaGOLhGHF5gLh4,3444
71
+ ingestr/src/tiktok_ads/__init__.py,sha256=U4ZHPUW0c4LpKx4hjT2Lz5hgWFgwQSbAAkkYIrxYHZo,4469
72
+ ingestr/src/tiktok_ads/tiktok_helpers.py,sha256=cfdPflCeR_mCk5fxq0v4d7pzlvZDiAoz8bWQJYqKALM,3935
66
73
  ingestr/src/zendesk/__init__.py,sha256=C7HkN195DGdOHId2_Sa_kAlcBrUmnVYZUa_tPkiyf1Q,17564
67
74
  ingestr/src/zendesk/settings.py,sha256=Vdj706nTJFQ-3KH4nO97iYCQuba3dV3E9gfnmLK6xwU,2294
68
75
  ingestr/src/zendesk/helpers/__init__.py,sha256=YTJejCiUjfIcsj9FrkY0l-JGYDI7RRte1Ydq5FDH_0c,888
@@ -77,8 +84,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
77
84
  ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
78
85
  ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
79
86
  ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
80
- ingestr-0.12.2.dist-info/METADATA,sha256=SAZJKqigL1ARQdv3eGX4RZVigZwYJCEcCt36lpvZtsQ,7910
81
- ingestr-0.12.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
82
- ingestr-0.12.2.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
83
- ingestr-0.12.2.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
84
- ingestr-0.12.2.dist-info/RECORD,,
87
+ ingestr-0.12.4.dist-info/METADATA,sha256=VN9cqnH_rmALlSxePi6XOxOxndDGLYWTW0K6eafYVDw,7956
88
+ ingestr-0.12.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
89
+ ingestr-0.12.4.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
90
+ ingestr-0.12.4.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
91
+ ingestr-0.12.4.dist-info/RECORD,,