ingestr 0.12.2__py3-none-any.whl → 0.12.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/src/sources.py +195 -28
- ingestr/src/tiktok_ads/__init__.py +72 -39
- ingestr/src/tiktok_ads/tiktok_helpers.py +32 -13
- ingestr/src/version.py +1 -1
- {ingestr-0.12.2.dist-info → ingestr-0.12.3.dist-info}/METADATA +1 -1
- {ingestr-0.12.2.dist-info → ingestr-0.12.3.dist-info}/RECORD +9 -9
- {ingestr-0.12.2.dist-info → ingestr-0.12.3.dist-info}/WHEEL +0 -0
- {ingestr-0.12.2.dist-info → ingestr-0.12.3.dist-info}/entry_points.txt +0 -0
- {ingestr-0.12.2.dist-info → ingestr-0.12.3.dist-info}/licenses/LICENSE.md +0 -0
ingestr/src/sources.py
CHANGED
|
@@ -3,17 +3,42 @@ import csv
|
|
|
3
3
|
import json
|
|
4
4
|
import os
|
|
5
5
|
import re
|
|
6
|
-
from datetime import date
|
|
7
|
-
from typing import
|
|
6
|
+
from datetime import date, datetime
|
|
7
|
+
from typing import (
|
|
8
|
+
Any,
|
|
9
|
+
Callable,
|
|
10
|
+
Dict,
|
|
11
|
+
Iterator,
|
|
12
|
+
List,
|
|
13
|
+
Literal,
|
|
14
|
+
Optional,
|
|
15
|
+
Union,
|
|
16
|
+
)
|
|
8
17
|
from urllib.parse import ParseResult, parse_qs, quote, urlparse
|
|
9
18
|
|
|
10
19
|
import dlt
|
|
11
20
|
import pendulum
|
|
12
|
-
|
|
21
|
+
import sqlalchemy
|
|
22
|
+
from dlt.common.configuration.specs import (
|
|
23
|
+
AwsCredentials,
|
|
24
|
+
)
|
|
25
|
+
from dlt.common.libs.sql_alchemy import (
|
|
26
|
+
Engine,
|
|
27
|
+
MetaData,
|
|
28
|
+
)
|
|
13
29
|
from dlt.common.time import ensure_pendulum_datetime
|
|
14
|
-
from dlt.common.typing import TSecretStrValue
|
|
30
|
+
from dlt.common.typing import TDataItem, TSecretStrValue
|
|
31
|
+
from dlt.extract import Incremental
|
|
15
32
|
from dlt.sources.credentials import ConnectionStringCredentials
|
|
16
33
|
from dlt.sources.sql_database import sql_table
|
|
34
|
+
from dlt.sources.sql_database.helpers import TableLoader
|
|
35
|
+
from dlt.sources.sql_database.schema_types import (
|
|
36
|
+
ReflectionLevel,
|
|
37
|
+
SelectAny,
|
|
38
|
+
Table,
|
|
39
|
+
TTypeAdapter,
|
|
40
|
+
)
|
|
41
|
+
from sqlalchemy import Column
|
|
17
42
|
from sqlalchemy import types as sa
|
|
18
43
|
from sqlalchemy.dialects import mysql
|
|
19
44
|
|
|
@@ -39,7 +64,7 @@ from ingestr.src.notion import notion_databases
|
|
|
39
64
|
from ingestr.src.shopify import shopify_source
|
|
40
65
|
from ingestr.src.slack import slack_source
|
|
41
66
|
from ingestr.src.stripe_analytics import stripe_source
|
|
42
|
-
from ingestr.src.table_definition import table_string_to_dataclass
|
|
67
|
+
from ingestr.src.table_definition import TableDefinition, table_string_to_dataclass
|
|
43
68
|
from ingestr.src.tiktok_ads import tiktok_source
|
|
44
69
|
from ingestr.src.time import isotime
|
|
45
70
|
from ingestr.src.zendesk import zendesk_chat, zendesk_support, zendesk_talk
|
|
@@ -48,6 +73,9 @@ from ingestr.src.zendesk.helpers.credentials import (
|
|
|
48
73
|
ZendeskCredentialsToken,
|
|
49
74
|
)
|
|
50
75
|
|
|
76
|
+
TableBackend = Literal["sqlalchemy", "pyarrow", "pandas", "connectorx"]
|
|
77
|
+
TQueryAdapter = Callable[[SelectAny, Table], SelectAny]
|
|
78
|
+
|
|
51
79
|
|
|
52
80
|
class SqlSource:
|
|
53
81
|
table_builder: Callable
|
|
@@ -59,7 +87,9 @@ class SqlSource:
|
|
|
59
87
|
return False
|
|
60
88
|
|
|
61
89
|
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
62
|
-
table_fields =
|
|
90
|
+
table_fields = TableDefinition(dataset="custom", table="custom")
|
|
91
|
+
if not table.startswith("query:"):
|
|
92
|
+
table_fields = table_string_to_dataclass(table)
|
|
63
93
|
|
|
64
94
|
incremental = None
|
|
65
95
|
if kwargs.get("incremental_key"):
|
|
@@ -87,6 +117,110 @@ class SqlSource:
|
|
|
87
117
|
query = query.order_by(kwargs.get("incremental_key"))
|
|
88
118
|
return query
|
|
89
119
|
|
|
120
|
+
defer_table_reflect = False
|
|
121
|
+
sql_backend = kwargs.get("sql_backend", "sqlalchemy")
|
|
122
|
+
if table.startswith("query:"):
|
|
123
|
+
if kwargs.get("sql_limit"):
|
|
124
|
+
raise ValueError(
|
|
125
|
+
"sql_limit is not supported for custom queries, please apply the limit in the query instead"
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
sql_backend = "sqlalchemy"
|
|
129
|
+
defer_table_reflect = True
|
|
130
|
+
query_value = table.split(":", 1)[1]
|
|
131
|
+
|
|
132
|
+
# this is a very hacky version of the table_rows function. it is built this way to go around the dlt's table loader.
|
|
133
|
+
# I didn't want to write a full fledged sqlalchemy source for now, and wanted to benefit from the existing stuff to begin with.
|
|
134
|
+
# this is by no means a production ready solution, but it works for now.
|
|
135
|
+
# the core idea behind this implementation is to create a mock table instance with the columns that are absolutely necessary for the incremental load to work.
|
|
136
|
+
# the table loader will then use the query adapter callback to apply the actual query and load the rows.
|
|
137
|
+
def table_rows(
|
|
138
|
+
engine: Engine,
|
|
139
|
+
table: Union[Table, str],
|
|
140
|
+
metadata: MetaData,
|
|
141
|
+
chunk_size: int,
|
|
142
|
+
backend: TableBackend,
|
|
143
|
+
incremental: Optional[Incremental[Any]] = None,
|
|
144
|
+
table_adapter_callback: Callable[[Table], None] = None, # type: ignore
|
|
145
|
+
reflection_level: ReflectionLevel = "minimal",
|
|
146
|
+
backend_kwargs: Dict[str, Any] = None, # type: ignore
|
|
147
|
+
type_adapter_callback: Optional[TTypeAdapter] = None,
|
|
148
|
+
included_columns: Optional[List[str]] = None,
|
|
149
|
+
query_adapter_callback: Optional[TQueryAdapter] = None,
|
|
150
|
+
resolve_foreign_keys: bool = False,
|
|
151
|
+
) -> Iterator[TDataItem]:
|
|
152
|
+
hints = { # type: ignore
|
|
153
|
+
"columns": [],
|
|
154
|
+
}
|
|
155
|
+
cols = [] # type: ignore
|
|
156
|
+
|
|
157
|
+
if incremental:
|
|
158
|
+
switchDict = {
|
|
159
|
+
int: sa.INTEGER,
|
|
160
|
+
datetime: sa.TIMESTAMP,
|
|
161
|
+
pendulum.Date: sa.DATE,
|
|
162
|
+
pendulum.DateTime: sa.TIMESTAMP,
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
if incremental.last_value is not None:
|
|
166
|
+
cols.append(
|
|
167
|
+
Column(
|
|
168
|
+
incremental.cursor_path,
|
|
169
|
+
switchDict[type(incremental.last_value)], # type: ignore
|
|
170
|
+
)
|
|
171
|
+
)
|
|
172
|
+
else:
|
|
173
|
+
cols.append(Column(incremental.cursor_path, sa.TIMESTAMP)) # type: ignore
|
|
174
|
+
|
|
175
|
+
table = Table(
|
|
176
|
+
"query_result",
|
|
177
|
+
metadata,
|
|
178
|
+
*cols,
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
loader = TableLoader(
|
|
182
|
+
engine,
|
|
183
|
+
backend,
|
|
184
|
+
table,
|
|
185
|
+
hints["columns"], # type: ignore
|
|
186
|
+
incremental=incremental,
|
|
187
|
+
chunk_size=chunk_size,
|
|
188
|
+
query_adapter_callback=query_adapter_callback,
|
|
189
|
+
)
|
|
190
|
+
try:
|
|
191
|
+
yield from loader.load_rows(backend_kwargs)
|
|
192
|
+
finally:
|
|
193
|
+
if getattr(engine, "may_dispose_after_use", False):
|
|
194
|
+
engine.dispose()
|
|
195
|
+
|
|
196
|
+
dlt.sources.sql_database.table_rows = table_rows
|
|
197
|
+
|
|
198
|
+
def query_adapter_callback(query, table, incremental=None, engine=None):
|
|
199
|
+
params = {}
|
|
200
|
+
if incremental:
|
|
201
|
+
params["interval_start"] = (
|
|
202
|
+
incremental.last_value
|
|
203
|
+
if incremental.last_value is not None
|
|
204
|
+
else datetime(year=1, month=1, day=1)
|
|
205
|
+
)
|
|
206
|
+
if incremental.end_value is not None:
|
|
207
|
+
params["interval_end"] = incremental.end_value
|
|
208
|
+
else:
|
|
209
|
+
if ":interval_start" in query_value:
|
|
210
|
+
params["interval_start"] = (
|
|
211
|
+
datetime.min
|
|
212
|
+
if kwargs.get("interval_start") is None
|
|
213
|
+
else kwargs.get("interval_start")
|
|
214
|
+
)
|
|
215
|
+
if ":interval_end" in query_value:
|
|
216
|
+
params["interval_end"] = (
|
|
217
|
+
datetime.max
|
|
218
|
+
if kwargs.get("interval_end") is None
|
|
219
|
+
else kwargs.get("interval_end")
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
return sqlalchemy.text(query_value).bindparams(**params)
|
|
223
|
+
|
|
90
224
|
def type_adapter_callback(sql_type):
|
|
91
225
|
if isinstance(sql_type, mysql.SET):
|
|
92
226
|
return sa.JSON
|
|
@@ -97,7 +231,7 @@ class SqlSource:
|
|
|
97
231
|
schema=table_fields.dataset,
|
|
98
232
|
table=table_fields.table,
|
|
99
233
|
incremental=incremental,
|
|
100
|
-
backend=
|
|
234
|
+
backend=sql_backend,
|
|
101
235
|
chunk_size=kwargs.get("page_size", None),
|
|
102
236
|
reflection_level=reflection_level,
|
|
103
237
|
query_adapter_callback=query_adapter_callback,
|
|
@@ -105,6 +239,7 @@ class SqlSource:
|
|
|
105
239
|
table_adapter_callback=table_adapter_exclude_columns(
|
|
106
240
|
kwargs.get("sql_exclude_columns", [])
|
|
107
241
|
),
|
|
242
|
+
defer_table_reflect=defer_table_reflect,
|
|
108
243
|
)
|
|
109
244
|
|
|
110
245
|
return builder_res
|
|
@@ -1015,29 +1150,28 @@ class TikTokSource:
|
|
|
1015
1150
|
if not access_token:
|
|
1016
1151
|
raise ValueError("access_token is required to connect to TikTok")
|
|
1017
1152
|
|
|
1018
|
-
|
|
1153
|
+
timezone = "UTC"
|
|
1154
|
+
if source_fields.get("timezone") is not None:
|
|
1155
|
+
timezone = source_fields.get("timezone")[0] # type: ignore
|
|
1156
|
+
|
|
1157
|
+
advertiser_ids = source_fields.get("advertiser_ids")
|
|
1158
|
+
if not advertiser_ids:
|
|
1159
|
+
raise ValueError("advertiser_ids is required to connect to TikTok")
|
|
1019
1160
|
|
|
1020
|
-
|
|
1021
|
-
if not advertiser_id:
|
|
1022
|
-
raise ValueError("advertiser_id is required to connect to TikTok")
|
|
1161
|
+
advertiser_ids = advertiser_ids[0].replace(" ", "").split(",")
|
|
1023
1162
|
|
|
1024
|
-
start_date = pendulum.now().subtract(days=
|
|
1025
|
-
end_date = ensure_pendulum_datetime(pendulum.now()).in_tz(
|
|
1163
|
+
start_date = pendulum.now().subtract(days=30).in_tz(timezone)
|
|
1164
|
+
end_date = ensure_pendulum_datetime(pendulum.now()).in_tz(timezone)
|
|
1026
1165
|
|
|
1027
1166
|
interval_start = kwargs.get("interval_start")
|
|
1028
1167
|
if interval_start is not None:
|
|
1029
|
-
start_date = ensure_pendulum_datetime(interval_start).in_tz(
|
|
1168
|
+
start_date = ensure_pendulum_datetime(interval_start).in_tz(timezone)
|
|
1030
1169
|
|
|
1031
1170
|
interval_end = kwargs.get("interval_end")
|
|
1032
1171
|
if interval_end is not None:
|
|
1033
|
-
end_date = ensure_pendulum_datetime(interval_end).in_tz(
|
|
1172
|
+
end_date = ensure_pendulum_datetime(interval_end).in_tz(timezone)
|
|
1034
1173
|
|
|
1035
|
-
page_size = kwargs.get("page_size")
|
|
1036
|
-
if page_size is not None and not isinstance(page_size, int):
|
|
1037
|
-
page_size = int(page_size)
|
|
1038
|
-
|
|
1039
|
-
if page_size > 1000:
|
|
1040
|
-
page_size = 1000
|
|
1174
|
+
page_size = min(1000, kwargs.get("page_size", 1000))
|
|
1041
1175
|
|
|
1042
1176
|
if table.startswith("custom:"):
|
|
1043
1177
|
fields = table.split(":", 3)
|
|
@@ -1049,28 +1183,61 @@ class TikTokSource:
|
|
|
1049
1183
|
dimensions = fields[1].replace(" ", "").split(",")
|
|
1050
1184
|
if (
|
|
1051
1185
|
"campaign_id" not in dimensions
|
|
1052
|
-
and "advertiser_id" not in dimensions
|
|
1053
1186
|
and "adgroup_id" not in dimensions
|
|
1054
1187
|
and "ad_id" not in dimensions
|
|
1055
1188
|
):
|
|
1056
1189
|
raise ValueError(
|
|
1057
|
-
"
|
|
1190
|
+
"TikTok API requires at least one ID dimension, please use one of the following dimensions: [campaign_id, adgroup_id, ad_id]"
|
|
1058
1191
|
)
|
|
1059
1192
|
|
|
1193
|
+
if "advertiser_id" in dimensions:
|
|
1194
|
+
dimensions.remove("advertiser_id")
|
|
1195
|
+
|
|
1060
1196
|
metrics = fields[2].replace(" ", "").split(",")
|
|
1061
|
-
|
|
1197
|
+
filtering_param = False
|
|
1198
|
+
filter_name = ""
|
|
1199
|
+
filter_value = []
|
|
1062
1200
|
if len(fields) == 4:
|
|
1063
|
-
|
|
1201
|
+
|
|
1202
|
+
def parse_filters(filters_raw: str) -> dict:
|
|
1203
|
+
# Parse filter string like "key1=value1,key2=value2,value3,value4"
|
|
1204
|
+
filters = {}
|
|
1205
|
+
current_key = None
|
|
1206
|
+
|
|
1207
|
+
for item in filters_raw.split(","):
|
|
1208
|
+
if "=" in item:
|
|
1209
|
+
# Start of a new key-value pair
|
|
1210
|
+
key, value = item.split("=")
|
|
1211
|
+
filters[key] = [value] # Always start with a list
|
|
1212
|
+
current_key = key
|
|
1213
|
+
elif current_key is not None:
|
|
1214
|
+
# Additional value for the current key
|
|
1215
|
+
filters[current_key].append(item)
|
|
1216
|
+
|
|
1217
|
+
# Convert single-item lists to simple values
|
|
1218
|
+
return {k: v[0] if len(v) == 1 else v for k, v in filters.items()}
|
|
1219
|
+
|
|
1220
|
+
filtering_param = True
|
|
1221
|
+
filters = parse_filters(fields[3])
|
|
1222
|
+
if len(filters) > 1:
|
|
1223
|
+
raise ValueError(
|
|
1224
|
+
"Only one filter is allowed for TikTok custom reports"
|
|
1225
|
+
)
|
|
1226
|
+
filter_name = list(filters.keys())[0]
|
|
1227
|
+
filter_value = list(map(int, filters[list(filters.keys())[0]]))
|
|
1228
|
+
|
|
1064
1229
|
return tiktok_source(
|
|
1065
1230
|
start_date=start_date,
|
|
1066
1231
|
end_date=end_date,
|
|
1067
1232
|
access_token=access_token[0],
|
|
1068
|
-
|
|
1069
|
-
|
|
1233
|
+
advertiser_ids=advertiser_ids,
|
|
1234
|
+
timezone=timezone,
|
|
1070
1235
|
dimensions=dimensions,
|
|
1071
1236
|
metrics=metrics,
|
|
1072
|
-
filters=filters,
|
|
1073
1237
|
page_size=page_size,
|
|
1238
|
+
filter_name=filter_name,
|
|
1239
|
+
filter_value=filter_value,
|
|
1240
|
+
filtering_param=filtering_param,
|
|
1074
1241
|
).with_resources(endpoint)
|
|
1075
1242
|
|
|
1076
1243
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
from typing import Iterable
|
|
1
|
+
from typing import Iterable
|
|
2
2
|
|
|
3
3
|
import dlt
|
|
4
4
|
import pendulum
|
|
@@ -8,6 +8,39 @@ from dlt.sources import DltResource
|
|
|
8
8
|
|
|
9
9
|
from .tiktok_helpers import TikTokAPI
|
|
10
10
|
|
|
11
|
+
KNOWN_TYPE_HINTS = {
|
|
12
|
+
"spend": {"data_type": "decimal"},
|
|
13
|
+
"billed_cost": {"data_type": "decimal"},
|
|
14
|
+
"cash_spend": {"data_type": "decimal"},
|
|
15
|
+
"voucher_spend": {"data_type": "decimal"},
|
|
16
|
+
"cpc": {"data_type": "decimal"},
|
|
17
|
+
"cpm": {"data_type": "decimal"},
|
|
18
|
+
"impressions": {"data_type": "bigint"},
|
|
19
|
+
"gross_impressions": {"data_type": "bigint"},
|
|
20
|
+
"clicks": {"data_type": "bigint"},
|
|
21
|
+
"ctr": {"data_type": "decimal"},
|
|
22
|
+
"reach": {"data_type": "bigint"},
|
|
23
|
+
"cost_per_1000_reached": {"data_type": "decimal"},
|
|
24
|
+
"frequency": {"data_type": "decimal"},
|
|
25
|
+
"conversion": {"data_type": "bigint"},
|
|
26
|
+
"cost_per_conversion": {"data_type": "decimal"},
|
|
27
|
+
"conversion_rate": {"data_type": "decimal"},
|
|
28
|
+
"conversion_rate_v2": {"data_type": "decimal"},
|
|
29
|
+
"real_time_conversion": {"data_type": "bigint"},
|
|
30
|
+
"real_time_cost_per_conversion": {"data_type": "decimal"},
|
|
31
|
+
"real_time_conversion_rate": {"data_type": "decimal"},
|
|
32
|
+
"real_time_conversion_rate_v2": {"data_type": "decimal"},
|
|
33
|
+
"result": {"data_type": "bigint"},
|
|
34
|
+
"cost_per_result": {"data_type": "decimal"},
|
|
35
|
+
"result_rate": {"data_type": "decimal"},
|
|
36
|
+
"real_time_result": {"data_type": "bigint"},
|
|
37
|
+
"real_time_cost_per_result": {"data_type": "decimal"},
|
|
38
|
+
"real_time_result_rate": {"data_type": "decimal"},
|
|
39
|
+
"secondary_goal_result": {"data_type": "bigint"},
|
|
40
|
+
"cost_per_secondary_goal_result": {"data_type": "decimal"},
|
|
41
|
+
"secondary_goal_result_rate": {"data_type": "decimal"},
|
|
42
|
+
}
|
|
43
|
+
|
|
11
44
|
|
|
12
45
|
def find_intervals(
|
|
13
46
|
current_date: pendulum.DateTime,
|
|
@@ -23,42 +56,27 @@ def find_intervals(
|
|
|
23
56
|
return intervals
|
|
24
57
|
|
|
25
58
|
|
|
26
|
-
def fetch_tiktok_reports(
|
|
27
|
-
tiktok_api: TikTokAPI,
|
|
28
|
-
current_date: pendulum.DateTime,
|
|
29
|
-
interval_end: pendulum.DateTime,
|
|
30
|
-
advertiser_id: str,
|
|
31
|
-
dimensions: list[str],
|
|
32
|
-
metrics: list[str],
|
|
33
|
-
filters: Optional[dict] | None,
|
|
34
|
-
) -> Iterable[TDataItem]:
|
|
35
|
-
try:
|
|
36
|
-
yield from tiktok_api.fetch_pages(
|
|
37
|
-
advertiser_id=advertiser_id,
|
|
38
|
-
start_time=current_date,
|
|
39
|
-
end_time=interval_end,
|
|
40
|
-
dimensions=dimensions,
|
|
41
|
-
metrics=metrics,
|
|
42
|
-
filters=None,
|
|
43
|
-
)
|
|
44
|
-
except Exception as e:
|
|
45
|
-
raise RuntimeError(f"Error fetching TikTok report: {e}")
|
|
46
|
-
|
|
47
|
-
|
|
48
59
|
@dlt.source(max_table_nesting=0)
|
|
49
60
|
def tiktok_source(
|
|
50
61
|
start_date: pendulum.DateTime,
|
|
51
62
|
end_date: pendulum.DateTime,
|
|
52
63
|
access_token: str,
|
|
53
|
-
|
|
54
|
-
|
|
64
|
+
advertiser_ids: list[str],
|
|
65
|
+
timezone: str,
|
|
55
66
|
page_size: int,
|
|
67
|
+
filtering_param: bool,
|
|
68
|
+
filter_name: str,
|
|
69
|
+
filter_value: list[int],
|
|
56
70
|
dimensions: list[str],
|
|
57
71
|
metrics: list[str],
|
|
58
|
-
filters=None,
|
|
59
72
|
) -> DltResource:
|
|
60
73
|
tiktok_api = TikTokAPI(
|
|
61
|
-
access_token=access_token,
|
|
74
|
+
access_token=access_token,
|
|
75
|
+
timezone=timezone,
|
|
76
|
+
page_size=page_size,
|
|
77
|
+
filtering_param=filtering_param,
|
|
78
|
+
filter_name=filter_name,
|
|
79
|
+
filter_value=filter_value,
|
|
62
80
|
)
|
|
63
81
|
incremental_loading_param = ""
|
|
64
82
|
is_incremental = False
|
|
@@ -74,17 +92,34 @@ def tiktok_source(
|
|
|
74
92
|
is_incremental = True
|
|
75
93
|
interval_days = 0
|
|
76
94
|
|
|
77
|
-
|
|
95
|
+
type_hints = {
|
|
96
|
+
"advertiser_id": {"data_type": "text"},
|
|
97
|
+
}
|
|
98
|
+
for dimension in dimensions:
|
|
99
|
+
if dimension in KNOWN_TYPE_HINTS:
|
|
100
|
+
type_hints[dimension] = KNOWN_TYPE_HINTS[dimension]
|
|
101
|
+
for metric in metrics:
|
|
102
|
+
if metric in KNOWN_TYPE_HINTS:
|
|
103
|
+
type_hints[metric] = KNOWN_TYPE_HINTS[metric]
|
|
104
|
+
|
|
105
|
+
@dlt.resource(
|
|
106
|
+
write_disposition="merge",
|
|
107
|
+
primary_key=dimensions + ["advertiser_id"],
|
|
108
|
+
columns=type_hints,
|
|
109
|
+
parallelized=True,
|
|
110
|
+
)
|
|
78
111
|
def custom_reports(
|
|
79
|
-
datetime=
|
|
80
|
-
|
|
81
|
-
|
|
112
|
+
datetime=(
|
|
113
|
+
dlt.sources.incremental(incremental_loading_param, start_date)
|
|
114
|
+
if is_incremental
|
|
115
|
+
else None
|
|
116
|
+
),
|
|
82
117
|
) -> Iterable[TDataItem]:
|
|
83
|
-
current_date = start_date.in_tz(
|
|
118
|
+
current_date = start_date.in_tz(timezone)
|
|
84
119
|
|
|
85
120
|
if datetime is not None:
|
|
86
121
|
datetime_str = datetime.last_value
|
|
87
|
-
current_date = ensure_pendulum_datetime(datetime_str).in_tz(
|
|
122
|
+
current_date = ensure_pendulum_datetime(datetime_str).in_tz(timezone)
|
|
88
123
|
|
|
89
124
|
list_of_interval = find_intervals(
|
|
90
125
|
current_date=current_date,
|
|
@@ -93,14 +128,12 @@ def tiktok_source(
|
|
|
93
128
|
)
|
|
94
129
|
|
|
95
130
|
for start, end in list_of_interval:
|
|
96
|
-
yield
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
advertiser_id=advertiser_id,
|
|
131
|
+
yield tiktok_api.fetch_pages(
|
|
132
|
+
advertiser_ids=advertiser_ids,
|
|
133
|
+
start_time=start,
|
|
134
|
+
end_time=end,
|
|
101
135
|
dimensions=dimensions,
|
|
102
136
|
metrics=metrics,
|
|
103
|
-
filters=None,
|
|
104
137
|
)
|
|
105
138
|
|
|
106
139
|
return custom_reports
|
|
@@ -25,17 +25,17 @@ def create_client() -> requests.Session:
|
|
|
25
25
|
).session
|
|
26
26
|
|
|
27
27
|
|
|
28
|
-
def flat_structure(items,
|
|
28
|
+
def flat_structure(items, timezone="UTC"):
|
|
29
29
|
for item in items:
|
|
30
30
|
if "dimensions" in item:
|
|
31
31
|
for key, value in item["dimensions"].items():
|
|
32
32
|
if key == "stat_time_day":
|
|
33
33
|
item["stat_time_day"] = ensure_pendulum_datetime(value).in_tz(
|
|
34
|
-
|
|
34
|
+
timezone
|
|
35
35
|
)
|
|
36
36
|
elif key == "stat_time_hour":
|
|
37
37
|
item["stat_time_hour"] = ensure_pendulum_datetime(value).in_tz(
|
|
38
|
-
|
|
38
|
+
timezone
|
|
39
39
|
)
|
|
40
40
|
else:
|
|
41
41
|
item[key] = value
|
|
@@ -49,15 +49,26 @@ def flat_structure(items, time_zone="UTC"):
|
|
|
49
49
|
|
|
50
50
|
|
|
51
51
|
class TikTokAPI:
|
|
52
|
-
def __init__(
|
|
52
|
+
def __init__(
|
|
53
|
+
self,
|
|
54
|
+
access_token,
|
|
55
|
+
timezone,
|
|
56
|
+
page_size,
|
|
57
|
+
filtering_param,
|
|
58
|
+
filter_name,
|
|
59
|
+
filter_value,
|
|
60
|
+
):
|
|
53
61
|
self.headers = {
|
|
54
62
|
"Access-Token": access_token,
|
|
55
63
|
}
|
|
56
|
-
self.
|
|
64
|
+
self.timezone = timezone
|
|
57
65
|
self.page_size = page_size
|
|
66
|
+
self.filtering_param = filtering_param
|
|
67
|
+
self.filter_name = filter_name
|
|
68
|
+
self.filter_value = filter_value
|
|
58
69
|
|
|
59
70
|
def fetch_pages(
|
|
60
|
-
self,
|
|
71
|
+
self, advertiser_ids: list[str], start_time, end_time, dimensions, metrics
|
|
61
72
|
):
|
|
62
73
|
data_level_mapping = {
|
|
63
74
|
"advertiser_id": "AUCTION_ADVERTISER",
|
|
@@ -75,8 +86,15 @@ class TikTokAPI:
|
|
|
75
86
|
start_time = ensure_pendulum_datetime(start_time).to_date_string()
|
|
76
87
|
end_time = ensure_pendulum_datetime(end_time).to_date_string()
|
|
77
88
|
|
|
78
|
-
|
|
79
|
-
|
|
89
|
+
filtering = [
|
|
90
|
+
{
|
|
91
|
+
"field_name": self.filter_name,
|
|
92
|
+
"filter_type": "IN",
|
|
93
|
+
"filter_value": json.dumps(self.filter_value),
|
|
94
|
+
}
|
|
95
|
+
]
|
|
96
|
+
params = {
|
|
97
|
+
"advertiser_ids": json.dumps(advertiser_ids),
|
|
80
98
|
"report_type": "BASIC",
|
|
81
99
|
"data_level": data_level,
|
|
82
100
|
"start_date": start_time,
|
|
@@ -85,12 +103,13 @@ class TikTokAPI:
|
|
|
85
103
|
"dimensions": json.dumps(dimensions),
|
|
86
104
|
"metrics": json.dumps(metrics),
|
|
87
105
|
}
|
|
106
|
+
|
|
107
|
+
if self.filtering_param:
|
|
108
|
+
params["filtering"] = json.dumps(filtering)
|
|
88
109
|
client = create_client()
|
|
89
110
|
while True:
|
|
90
|
-
|
|
91
|
-
response = client.get(
|
|
92
|
-
url=BASE_URL, headers=self.headers, params=self.params
|
|
93
|
-
)
|
|
111
|
+
params["page"] = current_page
|
|
112
|
+
response = client.get(url=BASE_URL, headers=self.headers, params=params)
|
|
94
113
|
|
|
95
114
|
result = response.json()
|
|
96
115
|
if result.get("message") != "OK":
|
|
@@ -99,7 +118,7 @@ class TikTokAPI:
|
|
|
99
118
|
result_data = result.get("data", {})
|
|
100
119
|
items = result_data.get("list", [])
|
|
101
120
|
|
|
102
|
-
flat_structure(items=items,
|
|
121
|
+
flat_structure(items=items, timezone=self.timezone)
|
|
103
122
|
|
|
104
123
|
yield items
|
|
105
124
|
|
ingestr/src/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.12.
|
|
1
|
+
__version__ = "0.12.3"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.12.
|
|
3
|
+
Version: 0.12.3
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -3,10 +3,10 @@ ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
|
3
3
|
ingestr/src/destinations.py,sha256=zcHJIIHAZmcD9sJomd6G1Bc-1KsxnBD2aByOSV_9L3g,8850
|
|
4
4
|
ingestr/src/factory.py,sha256=UyE1TzTHn_V8JZno5SSYfQsho1eFYzzvOylogw4S49E,4389
|
|
5
5
|
ingestr/src/filters.py,sha256=0JQXeAr2APFMnW2sd-6BlAMWv93bXV17j8b5MM8sHmM,580
|
|
6
|
-
ingestr/src/sources.py,sha256=
|
|
6
|
+
ingestr/src/sources.py,sha256=Jy1N5EfbxfTae0L7PiZmPVxVYWLvOuLlw3kJ6vbT50M,48027
|
|
7
7
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
8
8
|
ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
|
|
9
|
-
ingestr/src/version.py,sha256=
|
|
9
|
+
ingestr/src/version.py,sha256=Qu8-91hLcRe7wfW37PwNdivTonHHKLrqtJPOAq3Jvhc,23
|
|
10
10
|
ingestr/src/adjust/__init__.py,sha256=NaRNwDhItG8Q7vUHw7zQvyfWjmT32M0CSc5ufjmBM9U,3067
|
|
11
11
|
ingestr/src/adjust/adjust_helpers.py,sha256=-tmmxy9k3wms-ZEIgxmlp2cAQ2X_O1lgjY1128bbMu4,3224
|
|
12
12
|
ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
|
|
@@ -61,8 +61,8 @@ ingestr/src/stripe_analytics/helpers.py,sha256=iqZOyiGIOhOAhVXXU16DP0hkkTKcTrDu6
|
|
|
61
61
|
ingestr/src/stripe_analytics/settings.py,sha256=rl9L5XumxO0pjkZf7MGesXHp4QLRgnz3RWLuDWDBKXo,380
|
|
62
62
|
ingestr/src/telemetry/event.py,sha256=MpWc5tt0lSJ1pWKe9HQ11BHrcPBxSH40l4wjZi9u0tI,924
|
|
63
63
|
ingestr/src/testdata/fakebqcredentials.json,sha256=scc6TUc963KAbKTLZCfcmqVzbtzDCW1_8JNRnyAXyy8,628
|
|
64
|
-
ingestr/src/tiktok_ads/__init__.py,sha256=
|
|
65
|
-
ingestr/src/tiktok_ads/tiktok_helpers.py,sha256=
|
|
64
|
+
ingestr/src/tiktok_ads/__init__.py,sha256=U4ZHPUW0c4LpKx4hjT2Lz5hgWFgwQSbAAkkYIrxYHZo,4469
|
|
65
|
+
ingestr/src/tiktok_ads/tiktok_helpers.py,sha256=cfdPflCeR_mCk5fxq0v4d7pzlvZDiAoz8bWQJYqKALM,3935
|
|
66
66
|
ingestr/src/zendesk/__init__.py,sha256=C7HkN195DGdOHId2_Sa_kAlcBrUmnVYZUa_tPkiyf1Q,17564
|
|
67
67
|
ingestr/src/zendesk/settings.py,sha256=Vdj706nTJFQ-3KH4nO97iYCQuba3dV3E9gfnmLK6xwU,2294
|
|
68
68
|
ingestr/src/zendesk/helpers/__init__.py,sha256=YTJejCiUjfIcsj9FrkY0l-JGYDI7RRte1Ydq5FDH_0c,888
|
|
@@ -77,8 +77,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
|
|
|
77
77
|
ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
|
|
78
78
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
79
79
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
80
|
-
ingestr-0.12.
|
|
81
|
-
ingestr-0.12.
|
|
82
|
-
ingestr-0.12.
|
|
83
|
-
ingestr-0.12.
|
|
84
|
-
ingestr-0.12.
|
|
80
|
+
ingestr-0.12.3.dist-info/METADATA,sha256=Kh5v7a3mzxqmekWFp8ebU-uHKwzHRrN0HkNHPRV3_5U,7910
|
|
81
|
+
ingestr-0.12.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
82
|
+
ingestr-0.12.3.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
83
|
+
ingestr-0.12.3.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
84
|
+
ingestr-0.12.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|