ingestr 0.12.9__py3-none-any.whl → 0.12.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

@@ -0,0 +1,380 @@
1
+ from typing import Dict, List
2
+
3
+ from . import field
4
+
5
+
6
+ class Report:
7
+ resource: str
8
+ unfilterable: bool
9
+ dimensions: List[str]
10
+ metrics: List[str]
11
+ segments: List[str]
12
+
13
+ def __init__(
14
+ self,
15
+ resource: str = "",
16
+ dimensions: List[str] = [],
17
+ metrics: List[str] = [],
18
+ segments: List[str] = [],
19
+ unfilterable: bool = False,
20
+ ):
21
+ self.resource = resource
22
+ self.dimensions = dimensions
23
+ self.metrics = metrics
24
+ self.segments = segments
25
+ self.unfilterable = unfilterable
26
+
27
+ def primary_keys(self) -> List[str]:
28
+ return [field.to_column(k) for k in self.dimensions + self.segments]
29
+
30
+ @classmethod
31
+ def from_spec(cls, spec: str):
32
+ """
33
+ Parse a report specification string into a Report object.
34
+ The expected format is:
35
+ custom:{resource}:{dimensions}:{metrics}
36
+
37
+ Example:
38
+ custom:ad_group_ad_asset_view:ad_group.id,campaign.id:clicks,conversions
39
+ """
40
+ if spec.count(":") != 3:
41
+ raise ValueError(
42
+ "Invalid report specification format. Expected daily:{resource}:{dimensions}:{metrics}"
43
+ )
44
+
45
+ _, resource, dimensions, metrics = spec.split(":")
46
+
47
+ report = cls()
48
+ report.segments = ["segments.date"]
49
+ report.resource = resource
50
+ if dimensions.strip() != "":
51
+ report.dimensions = [
52
+ d for d in map(cls._parse_dimension, dimensions.split(","))
53
+ ]
54
+ if metrics.strip() != "":
55
+ report.metrics = [m for m in map(cls._parse_metric, metrics.split(","))]
56
+ return report
57
+
58
+ @classmethod
59
+ def _parse_dimension(self, dim: str):
60
+ dim = dim.strip()
61
+ if dim.count(".") == 0:
62
+ raise ValueError("Invalid dimension format. Expected {resource}.{field}")
63
+ if dim.startswith("segments."):
64
+ raise ValueError(
65
+ "Invalid dimension format. Segments are not allowed in dimensions."
66
+ )
67
+ return dim
68
+
69
+ @classmethod
70
+ def _parse_metric(self, metric: str):
71
+ metric = metric.strip()
72
+ if not metric.startswith("metrics."):
73
+ metric = f"metrics.{metric.strip()}"
74
+ return metric
75
+
76
+
77
+ BUILTIN_REPORTS: Dict[str, Report] = {
78
+ "account_report_daily": Report(
79
+ resource="campaign",
80
+ dimensions=[
81
+ "customer.id",
82
+ ],
83
+ metrics=[
84
+ "metrics.active_view_impressions",
85
+ "metrics.active_view_measurability",
86
+ "metrics.active_view_measurable_cost_micros",
87
+ "metrics.active_view_measurable_impressions",
88
+ "metrics.active_view_viewability",
89
+ "metrics.clicks",
90
+ "metrics.conversions",
91
+ "metrics.conversions_value",
92
+ "metrics.cost_micros",
93
+ "metrics.impressions",
94
+ "metrics.interactions",
95
+ "metrics.interaction_event_types",
96
+ "metrics.view_through_conversions",
97
+ ],
98
+ segments=[
99
+ "segments.date",
100
+ "segments.ad_network_type",
101
+ "segments.device",
102
+ ],
103
+ ),
104
+ "campaign_report_daily": Report(
105
+ resource="campaign",
106
+ dimensions=[
107
+ "campaign.id",
108
+ "customer.id",
109
+ ],
110
+ metrics=[
111
+ "metrics.active_view_impressions",
112
+ "metrics.active_view_measurability",
113
+ "metrics.active_view_measurable_cost_micros",
114
+ "metrics.active_view_measurable_impressions",
115
+ "metrics.active_view_viewability",
116
+ "metrics.clicks",
117
+ "metrics.conversions",
118
+ "metrics.conversions_value",
119
+ "metrics.cost_micros",
120
+ "metrics.impressions",
121
+ "metrics.interactions",
122
+ "metrics.interaction_event_types",
123
+ "metrics.view_through_conversions",
124
+ ],
125
+ segments=[
126
+ "segments.date",
127
+ "segments.ad_network_type",
128
+ "segments.device",
129
+ ],
130
+ ),
131
+ "ad_group_report_daily": Report(
132
+ resource="ad_group",
133
+ dimensions=[
134
+ "ad_group.id",
135
+ "customer.id",
136
+ "campaign.id",
137
+ ],
138
+ metrics=[
139
+ "metrics.active_view_impressions",
140
+ "metrics.active_view_measurability",
141
+ "metrics.active_view_measurable_cost_micros",
142
+ "metrics.active_view_measurable_impressions",
143
+ "metrics.active_view_viewability",
144
+ "metrics.clicks",
145
+ "metrics.conversions",
146
+ "metrics.conversions_value",
147
+ "metrics.cost_micros",
148
+ "metrics.impressions",
149
+ "metrics.interactions",
150
+ "metrics.interaction_event_types",
151
+ "metrics.view_through_conversions",
152
+ ],
153
+ segments=[
154
+ "segments.date",
155
+ "segments.ad_network_type",
156
+ "segments.device",
157
+ ],
158
+ ),
159
+ "ad_report_daily": Report(
160
+ resource="ad_group_ad",
161
+ dimensions=[
162
+ "ad_group.id",
163
+ "ad_group_ad.ad.id",
164
+ "customer.id",
165
+ "campaign.id",
166
+ ],
167
+ segments=[
168
+ "segments.date",
169
+ "segments.ad_network_type",
170
+ "segments.device",
171
+ ],
172
+ metrics=[
173
+ "metrics.active_view_impressions",
174
+ "metrics.active_view_measurability",
175
+ "metrics.active_view_measurable_cost_micros",
176
+ "metrics.active_view_measurable_impressions",
177
+ "metrics.active_view_viewability",
178
+ "metrics.clicks",
179
+ "metrics.conversions",
180
+ "metrics.conversions_value",
181
+ "metrics.cost_micros",
182
+ "metrics.impressions",
183
+ "metrics.interactions",
184
+ "metrics.interaction_event_types",
185
+ "metrics.view_through_conversions",
186
+ ],
187
+ ),
188
+ "audience_report_daily": Report(
189
+ resource="ad_group_audience_view",
190
+ dimensions=[
191
+ "ad_group.id",
192
+ "customer.id",
193
+ "campaign.id",
194
+ "ad_group_criterion.criterion_id",
195
+ ],
196
+ segments=[
197
+ "segments.date",
198
+ "segments.ad_network_type",
199
+ "segments.device",
200
+ ],
201
+ metrics=[
202
+ "metrics.active_view_impressions",
203
+ "metrics.active_view_measurability",
204
+ "metrics.active_view_measurable_cost_micros",
205
+ "metrics.active_view_measurable_impressions",
206
+ "metrics.active_view_viewability",
207
+ "metrics.clicks",
208
+ "metrics.conversions",
209
+ "metrics.conversions_value",
210
+ "metrics.cost_micros",
211
+ "metrics.impressions",
212
+ "metrics.interactions",
213
+ "metrics.interaction_event_types",
214
+ "metrics.view_through_conversions",
215
+ ],
216
+ ),
217
+ "keyword_report_daily": Report(
218
+ resource="keyword_view",
219
+ dimensions=[
220
+ "ad_group.id",
221
+ "customer.id",
222
+ "campaign.id",
223
+ "ad_group_criterion.criterion_id",
224
+ ],
225
+ segments=[
226
+ "segments.date",
227
+ "segments.ad_network_type",
228
+ "segments.device",
229
+ ],
230
+ metrics=[
231
+ "metrics.active_view_impressions",
232
+ "metrics.active_view_measurability",
233
+ "metrics.active_view_measurable_cost_micros",
234
+ "metrics.active_view_measurable_impressions",
235
+ "metrics.active_view_viewability",
236
+ "metrics.clicks",
237
+ "metrics.conversions",
238
+ "metrics.conversions_value",
239
+ "metrics.cost_micros",
240
+ "metrics.impressions",
241
+ "metrics.interactions",
242
+ "metrics.interaction_event_types",
243
+ "metrics.view_through_conversions",
244
+ ],
245
+ ),
246
+ "click_report_daily": Report(
247
+ resource="click_view",
248
+ dimensions=[
249
+ "click_view.gclid",
250
+ "customer.id",
251
+ "ad_group.id",
252
+ "campaign.id",
253
+ "segments.date",
254
+ ],
255
+ metrics=[
256
+ "metrics.clicks",
257
+ ],
258
+ ),
259
+ "landing_page_report_daily": Report(
260
+ resource="landing_page_view",
261
+ dimensions=[
262
+ "landing_page_view.unexpanded_final_url",
263
+ "landing_page_view.resource_name",
264
+ "customer.id",
265
+ "ad_group.id",
266
+ "campaign.id",
267
+ "segments.date",
268
+ ],
269
+ metrics=[
270
+ "metrics.average_cpc",
271
+ "metrics.clicks",
272
+ "metrics.cost_micros",
273
+ "metrics.ctr",
274
+ "metrics.impressions",
275
+ "metrics.mobile_friendly_clicks_percentage",
276
+ "metrics.speed_score",
277
+ "metrics.valid_accelerated_mobile_pages_clicks_percentage",
278
+ ],
279
+ ),
280
+ "search_keyword_report_daily": Report(
281
+ resource="keyword_view",
282
+ dimensions=[
283
+ "customer.id",
284
+ "ad_group.id",
285
+ "campaign.id",
286
+ "keyword_view.resource_name",
287
+ "ad_group_criterion.criterion_id",
288
+ "segments.date",
289
+ ],
290
+ metrics=[
291
+ "metrics.absolute_top_impression_percentage",
292
+ "metrics.average_cpc",
293
+ "metrics.average_cpm",
294
+ "metrics.clicks",
295
+ "metrics.conversions_from_interactions_rate",
296
+ "metrics.conversions_value",
297
+ "metrics.cost_micros",
298
+ "metrics.ctr",
299
+ "metrics.impressions",
300
+ "metrics.top_impression_percentage",
301
+ "metrics.view_through_conversions",
302
+ ],
303
+ ),
304
+ "search_term_report_daily": Report(
305
+ resource="search_term_view",
306
+ dimensions=[
307
+ "customer.id",
308
+ "ad_group.id",
309
+ "campaign.id",
310
+ "search_term_view.resource_name",
311
+ "search_term_view.search_term",
312
+ "search_term_view.status",
313
+ "segments.date",
314
+ ],
315
+ segments=[
316
+ "segments.search_term_match_type",
317
+ ],
318
+ metrics=[
319
+ "metrics.absolute_top_impression_percentage",
320
+ "metrics.average_cpc",
321
+ "metrics.clicks",
322
+ "metrics.conversions",
323
+ "metrics.conversions_from_interactions_rate",
324
+ "metrics.conversions_from_interactions_value_per_interaction",
325
+ "metrics.cost_micros",
326
+ "metrics.ctr",
327
+ "metrics.impressions",
328
+ "metrics.top_impression_percentage",
329
+ "metrics.view_through_conversions",
330
+ ],
331
+ ),
332
+ "lead_form_submission_data_report_daily": Report(
333
+ resource="lead_form_submission_data",
334
+ dimensions=[
335
+ "lead_form_submission_data.gclid",
336
+ "lead_form_submission_data.submission_date_time",
337
+ "lead_form_submission_data.lead_form_submission_fields",
338
+ "lead_form_submission_data.custom_lead_form_submission_fields",
339
+ "lead_form_submission_data.resource_name",
340
+ "customer.id",
341
+ "ad_group_ad.ad.id",
342
+ "ad_group.id",
343
+ "campaign.id",
344
+ ],
345
+ unfilterable=True,
346
+ ),
347
+ "local_services_lead_report_daily": Report(
348
+ resource="local_services_lead",
349
+ dimensions=[
350
+ "customer.id",
351
+ "local_services_lead.creation_date_time",
352
+ "local_services_lead.contact_details",
353
+ "local_services_lead.credit_details.credit_state",
354
+ "local_services_lead.credit_details.credit_state_last_update_date_time",
355
+ "local_services_lead.lead_charged",
356
+ "local_services_lead.lead_status",
357
+ "local_services_lead.lead_type",
358
+ "local_services_lead.locale",
359
+ "local_services_lead.note.description",
360
+ "local_services_lead.note.edit_date_time",
361
+ "local_services_lead.service_id",
362
+ ],
363
+ unfilterable=True,
364
+ ),
365
+ "local_services_lead_conversations_report_daily": Report(
366
+ resource="local_services_lead_conversation",
367
+ dimensions=[
368
+ "customer.id",
369
+ "local_services_lead_conversation.id",
370
+ "local_services_lead_conversation.event_date_time",
371
+ "local_services_lead_conversation.conversation_channel",
372
+ "local_services_lead_conversation.message_details.attachment_urls",
373
+ "local_services_lead_conversation.message_details.text",
374
+ "local_services_lead_conversation.participant_type",
375
+ "local_services_lead_conversation.phone_call_details.call_duration_millis",
376
+ "local_services_lead_conversation.phone_call_details.call_recording_url",
377
+ ],
378
+ unfilterable=True,
379
+ ),
380
+ }
@@ -0,0 +1,63 @@
1
+ from typing import Iterable
2
+
3
+ import dlt
4
+ import pendulum
5
+ from dlt.common.typing import TDataItem
6
+ from dlt.sources import DltResource
7
+ from pendulum import Date
8
+
9
+ from .dimension_time_enum import Dimension, TimeGranularity
10
+ from .helpers import LinkedInAdsAPI, find_intervals
11
+
12
+
13
+ @dlt.source(max_table_nesting=0)
14
+ def linked_in_ads_source(
15
+ start_date: Date,
16
+ end_date: Date | None,
17
+ access_token: str,
18
+ account_ids: list[str],
19
+ dimension: Dimension,
20
+ metrics: list[str],
21
+ time_granularity: TimeGranularity,
22
+ ) -> DltResource:
23
+ linkedin_api = LinkedInAdsAPI(
24
+ access_token=access_token,
25
+ account_ids=account_ids,
26
+ dimension=dimension,
27
+ metrics=metrics,
28
+ time_granularity=time_granularity,
29
+ )
30
+
31
+ if time_granularity == TimeGranularity.daily:
32
+ primary_key = [dimension.value, "date"]
33
+ incremental_loading_param = "date"
34
+ else:
35
+ primary_key = [dimension.value, "start_date", "end_date"]
36
+ incremental_loading_param = "start_date"
37
+
38
+ @dlt.resource(write_disposition="merge", primary_key=primary_key)
39
+ def custom_reports(
40
+ dateTime=(
41
+ dlt.sources.incremental(
42
+ incremental_loading_param,
43
+ initial_value=start_date,
44
+ end_value=end_date,
45
+ range_start="closed",
46
+ range_end="closed",
47
+ )
48
+ ),
49
+ ) -> Iterable[TDataItem]:
50
+ if dateTime.end_value is None:
51
+ end_date = pendulum.now().date()
52
+ else:
53
+ end_date = dateTime.end_value
54
+
55
+ list_of_interval = find_intervals(
56
+ start_date=dateTime.last_value,
57
+ end_date=end_date,
58
+ time_granularity=time_granularity,
59
+ )
60
+ for start, end in list_of_interval:
61
+ yield linkedin_api.fetch_pages(start, end)
62
+
63
+ return custom_reports
@@ -0,0 +1,12 @@
1
+ from enum import Enum
2
+
3
+
4
+ class Dimension(Enum):
5
+ campaign = "campaign"
6
+ creative = "creative"
7
+ account = "account"
8
+
9
+
10
+ class TimeGranularity(Enum):
11
+ daily = "DAILY"
12
+ monthly = "MONTHLY"
@@ -0,0 +1,148 @@
1
+ from urllib.parse import quote
2
+
3
+ import pendulum
4
+ import requests
5
+ from dlt.sources.helpers.requests import Client
6
+ from pendulum import Date
7
+
8
+ from .dimension_time_enum import Dimension, TimeGranularity
9
+
10
+
11
+ def retry_on_limit(
12
+ response: requests.Response | None, exception: BaseException | None
13
+ ) -> bool:
14
+ if response is None:
15
+ return False
16
+ return response.status_code == 429
17
+
18
+
19
+ def create_client() -> requests.Session:
20
+ return Client(
21
+ request_timeout=10.0,
22
+ raise_for_status=False,
23
+ retry_condition=retry_on_limit,
24
+ request_max_attempts=12,
25
+ ).session
26
+
27
+
28
+ def flat_structure(items, pivot: Dimension, time_granularity: TimeGranularity):
29
+ for item in items:
30
+ if "pivotValues" in item:
31
+ if len(item["pivotValues"]) > 1:
32
+ item[pivot.value.lower()] = item["pivotValues"]
33
+ else:
34
+ item[pivot.value.lower()] = item["pivotValues"][0]
35
+ if "dateRange" in item:
36
+ start_date = item["dateRange"]["start"]
37
+ start_dt = pendulum.date(
38
+ year=start_date["year"],
39
+ month=start_date["month"],
40
+ day=start_date["day"],
41
+ )
42
+ if time_granularity == TimeGranularity.daily:
43
+ item["date"] = start_dt
44
+ else:
45
+ end_date = item["dateRange"]["end"]
46
+ end_dt = pendulum.date(
47
+ year=end_date["year"],
48
+ month=end_date["month"],
49
+ day=end_date["day"],
50
+ )
51
+ item["start_date"] = start_dt
52
+ item["end_date"] = end_dt
53
+
54
+ del item["dateRange"]
55
+ del item["pivotValues"]
56
+
57
+ return items
58
+
59
+
60
+ def find_intervals(start_date: Date, end_date: Date, time_granularity: TimeGranularity):
61
+ intervals = []
62
+
63
+ if start_date > end_date:
64
+ raise ValueError("Start date must be less than end date")
65
+
66
+ while start_date <= end_date:
67
+ if time_granularity == TimeGranularity.daily:
68
+ next_date = min(start_date.add(months=6), end_date)
69
+ else:
70
+ next_date = min(start_date.add(years=2), end_date)
71
+
72
+ intervals.append((start_date, next_date))
73
+
74
+ start_date = next_date.add(days=1)
75
+
76
+ return intervals
77
+
78
+
79
+ def construct_url(
80
+ start: Date,
81
+ end: Date,
82
+ account_ids: list[str],
83
+ metrics: list[str],
84
+ dimension: Dimension,
85
+ time_granularity: TimeGranularity,
86
+ ):
87
+ date_range = f"(start:(year:{start.year},month:{start.month},day:{start.day})"
88
+ date_range += f",end:(year:{end.year},month:{end.month},day:{end.day}))"
89
+ accounts = ",".join(
90
+ [quote(f"urn:li:sponsoredAccount:{account_id}") for account_id in account_ids]
91
+ )
92
+ encoded_accounts = f"List({accounts})"
93
+ dimension_str = dimension.value.upper()
94
+ time_granularity_str = time_granularity.value
95
+ metrics_str = ",".join([metric for metric in metrics])
96
+
97
+ url = (
98
+ f"https://api.linkedin.com/rest/adAnalytics?"
99
+ f"q=analytics&timeGranularity={time_granularity_str}&"
100
+ f"dateRange={date_range}&accounts={encoded_accounts}&"
101
+ f"pivot={dimension_str}&fields={metrics_str}"
102
+ )
103
+
104
+ return url
105
+
106
+
107
+ class LinkedInAdsAPI:
108
+ def __init__(
109
+ self,
110
+ access_token,
111
+ time_granularity,
112
+ account_ids,
113
+ dimension,
114
+ metrics,
115
+ ):
116
+ self.time_granularity: TimeGranularity = time_granularity
117
+ self.account_ids: list[str] = account_ids
118
+ self.dimension: Dimension = dimension
119
+ self.metrics: list[str] = metrics
120
+ self.headers = {
121
+ "Authorization": f"Bearer {access_token}",
122
+ "Linkedin-Version": "202411",
123
+ "X-Restli-Protocol-Version": "2.0.0",
124
+ }
125
+
126
+ def fetch_pages(self, start: Date, end: Date):
127
+ client = create_client()
128
+ url = construct_url(
129
+ start=start,
130
+ end=end,
131
+ account_ids=self.account_ids,
132
+ metrics=self.metrics,
133
+ dimension=self.dimension,
134
+ time_granularity=self.time_granularity,
135
+ )
136
+ response = client.get(url=url, headers=self.headers)
137
+
138
+ if response.status_code != 200:
139
+ error_data = response.json()
140
+ raise ValueError(f"LinkedIn API Error: {error_data.get('message')}")
141
+
142
+ result = response.json()
143
+ items = result.get("elements", [])
144
+ yield flat_structure(
145
+ items=items,
146
+ pivot=self.dimension,
147
+ time_granularity=self.time_granularity,
148
+ )