ingestr 0.12.9__py3-none-any.whl → 0.12.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

ingestr/src/factory.py CHANGED
@@ -33,6 +33,7 @@ from ingestr.src.sources import (
33
33
  HubspotSource,
34
34
  KafkaSource,
35
35
  KlaviyoSource,
36
+ LinkedInAdsSource,
36
37
  LocalCsvSource,
37
38
  MongoDbSource,
38
39
  NotionSource,
@@ -126,6 +127,7 @@ class SourceDestinationFactory:
126
127
  "googleanalytics": GoogleAnalyticsSource,
127
128
  "appstore": AppleAppStoreSource,
128
129
  "gs": GCSSource,
130
+ "linkedinads": LinkedInAdsSource,
129
131
  }
130
132
  destinations: Dict[str, Type[DestinationProtocol]] = {
131
133
  "bigquery": BigQueryDestination,
@@ -0,0 +1,63 @@
1
+ from typing import Iterable
2
+
3
+ import dlt
4
+ import pendulum
5
+ from dlt.common.typing import TDataItem
6
+ from dlt.sources import DltResource
7
+ from pendulum import Date
8
+
9
+ from .dimension_time_enum import Dimension, TimeGranularity
10
+ from .helpers import LinkedInAdsAPI, find_intervals
11
+
12
+
13
+ @dlt.source(max_table_nesting=0)
14
+ def linked_in_ads_source(
15
+ start_date: Date,
16
+ end_date: Date | None,
17
+ access_token: str,
18
+ account_ids: list[str],
19
+ dimension: Dimension,
20
+ metrics: list[str],
21
+ time_granularity: TimeGranularity,
22
+ ) -> DltResource:
23
+ linkedin_api = LinkedInAdsAPI(
24
+ access_token=access_token,
25
+ account_ids=account_ids,
26
+ dimension=dimension,
27
+ metrics=metrics,
28
+ time_granularity=time_granularity,
29
+ )
30
+
31
+ if time_granularity == TimeGranularity.daily:
32
+ primary_key = [dimension.value, "date"]
33
+ incremental_loading_param = "date"
34
+ else:
35
+ primary_key = [dimension.value, "start_date", "end_date"]
36
+ incremental_loading_param = "start_date"
37
+
38
+ @dlt.resource(write_disposition="merge", primary_key=primary_key)
39
+ def custom_reports(
40
+ dateTime=(
41
+ dlt.sources.incremental(
42
+ incremental_loading_param,
43
+ initial_value=start_date,
44
+ end_value=end_date,
45
+ range_start="closed",
46
+ range_end="closed",
47
+ )
48
+ ),
49
+ ) -> Iterable[TDataItem]:
50
+ if dateTime.end_value is None:
51
+ end_date = pendulum.now().date()
52
+ else:
53
+ end_date = dateTime.end_value
54
+
55
+ list_of_interval = find_intervals(
56
+ start_date=dateTime.last_value,
57
+ end_date=end_date,
58
+ time_granularity=time_granularity,
59
+ )
60
+ for start, end in list_of_interval:
61
+ yield linkedin_api.fetch_pages(start, end)
62
+
63
+ return custom_reports
@@ -0,0 +1,12 @@
1
+ from enum import Enum
2
+
3
+
4
+ class Dimension(Enum):
5
+ campaign = "campaign"
6
+ creative = "creative"
7
+ account = "account"
8
+
9
+
10
+ class TimeGranularity(Enum):
11
+ daily = "DAILY"
12
+ monthly = "MONTHLY"
@@ -0,0 +1,148 @@
1
+ from urllib.parse import quote
2
+
3
+ import pendulum
4
+ import requests
5
+ from dlt.sources.helpers.requests import Client
6
+ from pendulum import Date
7
+
8
+ from .dimension_time_enum import Dimension, TimeGranularity
9
+
10
+
11
+ def retry_on_limit(
12
+ response: requests.Response | None, exception: BaseException | None
13
+ ) -> bool:
14
+ if response is None:
15
+ return False
16
+ return response.status_code == 429
17
+
18
+
19
+ def create_client() -> requests.Session:
20
+ return Client(
21
+ request_timeout=10.0,
22
+ raise_for_status=False,
23
+ retry_condition=retry_on_limit,
24
+ request_max_attempts=12,
25
+ ).session
26
+
27
+
28
+ def flat_structure(items, pivot: Dimension, time_granularity: TimeGranularity):
29
+ for item in items:
30
+ if "pivotValues" in item:
31
+ if len(item["pivotValues"]) > 1:
32
+ item[pivot.value.lower()] = item["pivotValues"]
33
+ else:
34
+ item[pivot.value.lower()] = item["pivotValues"][0]
35
+ if "dateRange" in item:
36
+ start_date = item["dateRange"]["start"]
37
+ start_dt = pendulum.date(
38
+ year=start_date["year"],
39
+ month=start_date["month"],
40
+ day=start_date["day"],
41
+ )
42
+ if time_granularity == TimeGranularity.daily:
43
+ item["date"] = start_dt
44
+ else:
45
+ end_date = item["dateRange"]["end"]
46
+ end_dt = pendulum.date(
47
+ year=end_date["year"],
48
+ month=end_date["month"],
49
+ day=end_date["day"],
50
+ )
51
+ item["start_date"] = start_dt
52
+ item["end_date"] = end_dt
53
+
54
+ del item["dateRange"]
55
+ del item["pivotValues"]
56
+
57
+ return items
58
+
59
+
60
+ def find_intervals(start_date: Date, end_date: Date, time_granularity: TimeGranularity):
61
+ intervals = []
62
+
63
+ if start_date > end_date:
64
+ raise ValueError("Start date must be less than end date")
65
+
66
+ while start_date <= end_date:
67
+ if time_granularity == TimeGranularity.daily:
68
+ next_date = min(start_date.add(months=6), end_date)
69
+ else:
70
+ next_date = min(start_date.add(years=2), end_date)
71
+
72
+ intervals.append((start_date, next_date))
73
+
74
+ start_date = next_date.add(days=1)
75
+
76
+ return intervals
77
+
78
+
79
+ def construct_url(
80
+ start: Date,
81
+ end: Date,
82
+ account_ids: list[str],
83
+ metrics: list[str],
84
+ dimension: Dimension,
85
+ time_granularity: TimeGranularity,
86
+ ):
87
+ date_range = f"(start:(year:{start.year},month:{start.month},day:{start.day})"
88
+ date_range += f",end:(year:{end.year},month:{end.month},day:{end.day}))"
89
+ accounts = ",".join(
90
+ [quote(f"urn:li:sponsoredAccount:{account_id}") for account_id in account_ids]
91
+ )
92
+ encoded_accounts = f"List({accounts})"
93
+ dimension_str = dimension.value.upper()
94
+ time_granularity_str = time_granularity.value
95
+ metrics_str = ",".join([metric for metric in metrics])
96
+
97
+ url = (
98
+ f"https://api.linkedin.com/rest/adAnalytics?"
99
+ f"q=analytics&timeGranularity={time_granularity_str}&"
100
+ f"dateRange={date_range}&accounts={encoded_accounts}&"
101
+ f"pivot={dimension_str}&fields={metrics_str}"
102
+ )
103
+
104
+ return url
105
+
106
+
107
+ class LinkedInAdsAPI:
108
+ def __init__(
109
+ self,
110
+ access_token,
111
+ time_granularity,
112
+ account_ids,
113
+ dimension,
114
+ metrics,
115
+ ):
116
+ self.time_granularity: TimeGranularity = time_granularity
117
+ self.account_ids: list[str] = account_ids
118
+ self.dimension: Dimension = dimension
119
+ self.metrics: list[str] = metrics
120
+ self.headers = {
121
+ "Authorization": f"Bearer {access_token}",
122
+ "Linkedin-Version": "202411",
123
+ "X-Restli-Protocol-Version": "2.0.0",
124
+ }
125
+
126
+ def fetch_pages(self, start: Date, end: Date):
127
+ client = create_client()
128
+ url = construct_url(
129
+ start=start,
130
+ end=end,
131
+ account_ids=self.account_ids,
132
+ metrics=self.metrics,
133
+ dimension=self.dimension,
134
+ time_granularity=self.time_granularity,
135
+ )
136
+ response = client.get(url=url, headers=self.headers)
137
+
138
+ if response.status_code != 200:
139
+ error_data = response.json()
140
+ raise ValueError(f"LinkedIn API Error: {error_data.get('message')}")
141
+
142
+ result = response.json()
143
+ items = result.get("elements", [])
144
+ yield flat_structure(
145
+ items=items,
146
+ pivot=self.dimension,
147
+ time_granularity=self.time_granularity,
148
+ )
ingestr/src/sources.py CHANGED
@@ -69,6 +69,11 @@ from ingestr.src.hubspot import hubspot
69
69
  from ingestr.src.kafka import kafka_consumer
70
70
  from ingestr.src.kafka.helpers import KafkaCredentials
71
71
  from ingestr.src.klaviyo._init_ import klaviyo_source
72
+ from ingestr.src.linkedin_ads import linked_in_ads_source
73
+ from ingestr.src.linkedin_ads.dimension_time_enum import (
74
+ Dimension,
75
+ TimeGranularity,
76
+ )
72
77
  from ingestr.src.mongodb import mongodb_collection
73
78
  from ingestr.src.notion import notion_databases
74
79
  from ingestr.src.shopify import shopify_source
@@ -1569,3 +1574,80 @@ class GCSSource:
1569
1574
  return readers(
1570
1575
  bucket_url, fs, path_to_file
1571
1576
  ).with_resources(endpoint)
1577
+
1578
+
1579
+ class LinkedInAdsSource:
1580
+ def handles_incrementality(self) -> bool:
1581
+ return True
1582
+
1583
+ def dlt_source(self, uri: str, table: str, **kwargs):
1584
+ parsed_uri = urlparse(uri)
1585
+ source_fields = parse_qs(parsed_uri.query)
1586
+
1587
+ access_token = source_fields.get("access_token")
1588
+ if not access_token:
1589
+ raise ValueError("access_token is required to connect to LinkedIn Ads")
1590
+
1591
+ account_ids = source_fields.get("account_ids")
1592
+
1593
+ if not account_ids:
1594
+ raise ValueError("account_ids is required to connect to LinkedIn Ads")
1595
+ account_ids = account_ids[0].replace(" ", "").split(",")
1596
+
1597
+ interval_start = kwargs.get("interval_start")
1598
+ interval_end = kwargs.get("interval_end")
1599
+ start_date = (
1600
+ ensure_pendulum_datetime(interval_start).date()
1601
+ if interval_start
1602
+ else pendulum.datetime(2018, 1, 1).date()
1603
+ )
1604
+ end_date = (
1605
+ ensure_pendulum_datetime(interval_end).date() if interval_end else None
1606
+ )
1607
+
1608
+ fields = table.split(":")
1609
+ if len(fields) != 3:
1610
+ raise ValueError(
1611
+ "Invalid table format. Expected format: custom:<dimensions>:<metrics>"
1612
+ )
1613
+
1614
+ dimensions = fields[1].replace(" ", "").split(",")
1615
+ dimensions = [item for item in dimensions if item.strip()]
1616
+ if (
1617
+ "campaign" not in dimensions
1618
+ and "creative" not in dimensions
1619
+ and "account" not in dimensions
1620
+ ):
1621
+ raise ValueError(
1622
+ "'campaign', 'creative' or 'account' is required to connect to LinkedIn Ads, please provide at least one of these dimensions."
1623
+ )
1624
+ if "date" not in dimensions and "month" not in dimensions:
1625
+ raise ValueError(
1626
+ "'date' or 'month' is required to connect to LinkedIn Ads, please provide at least one of these dimensions."
1627
+ )
1628
+
1629
+ if "date" in dimensions:
1630
+ time_granularity = TimeGranularity.daily
1631
+ dimensions.remove("date")
1632
+ else:
1633
+ time_granularity = TimeGranularity.monthly
1634
+ dimensions.remove("month")
1635
+
1636
+ dimension = Dimension[dimensions[0]]
1637
+
1638
+ metrics = fields[2].replace(" ", "").split(",")
1639
+ metrics = [item for item in metrics if item.strip()]
1640
+ if "dateRange" not in metrics:
1641
+ metrics.append("dateRange")
1642
+ if "pivotValues" not in metrics:
1643
+ metrics.append("pivotValues")
1644
+
1645
+ return linked_in_ads_source(
1646
+ start_date=start_date,
1647
+ end_date=end_date,
1648
+ access_token=access_token[0],
1649
+ account_ids=account_ids,
1650
+ dimension=dimension,
1651
+ metrics=metrics,
1652
+ time_granularity=time_granularity,
1653
+ ).with_resources("custom_reports")
ingestr/src/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.12.9"
1
+ __version__ = "0.12.10"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.12.9
3
+ Version: 0.12.10
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -2,12 +2,12 @@ ingestr/main.py,sha256=fRWnyoPzMvvxTa61EIAP_dsKu0B_0yOwoyt0Slq9WQU,24723
2
2
  ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
3
3
  ingestr/src/destinations.py,sha256=zcHJIIHAZmcD9sJomd6G1Bc-1KsxnBD2aByOSV_9L3g,8850
4
4
  ingestr/src/errors.py,sha256=MrdLY5Gpr3g3qbYjl-U8-m8kxBJQOJo4ZVOsQpQbRR8,447
5
- ingestr/src/factory.py,sha256=oNF9dovovLG34xLgRZ5fbyA_XSHxEuTW27s1cb35KDM,4622
5
+ ingestr/src/factory.py,sha256=kzbJ10fF1xySzarhDfJ2l4_Hm925cglsvzk3MAIMkaI,4687
6
6
  ingestr/src/filters.py,sha256=0JQXeAr2APFMnW2sd-6BlAMWv93bXV17j8b5MM8sHmM,580
7
- ingestr/src/sources.py,sha256=JoO-IQ_eB4Ia1fC1GWs6N74l9A3tXQT-Fj0uNBiSI_Y,55978
7
+ ingestr/src/sources.py,sha256=qUs9s_0kCurUJUmbrBCTiPzyLpEtaO-yCOnOYvsftUY,58965
8
8
  ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
9
9
  ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
10
- ingestr/src/version.py,sha256=FSGqM7DffUSCa5R2rqVlNo-yNzBd6cgAXS1_0tElLy0,23
10
+ ingestr/src/version.py,sha256=037G21EIHmneVX5BgQiyUajkoMsqfZoVvjyP3_6MaDs,24
11
11
  ingestr/src/adjust/__init__.py,sha256=ULjtJqrNS6XDvUyGl0tjl12-tLyXlCgeFe2icTbtu3Q,3255
12
12
  ingestr/src/adjust/adjust_helpers.py,sha256=av97NPSn-hQtTbAC0vUSCAWYePmOiG5R-DGdMssm7FQ,3646
13
13
  ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
@@ -54,6 +54,9 @@ ingestr/src/kafka/helpers.py,sha256=V9WcVn3PKnEpggArHda4vnAcaV8VDuh__dSmRviJb5Y,
54
54
  ingestr/src/klaviyo/_init_.py,sha256=ucWHqBe8DQvXVpbmxKFAV5ljpCFb4ps_2QTD0OSiWxY,7905
55
55
  ingestr/src/klaviyo/client.py,sha256=tPj79ia7AW0ZOJhzlKNPCliGbdojRNwUFp8HvB2ym5s,7434
56
56
  ingestr/src/klaviyo/helpers.py,sha256=_i-SHffhv25feLDcjy6Blj1UxYLISCwVCMgGtrlnYHk,496
57
+ ingestr/src/linkedin_ads/__init__.py,sha256=CAPWFyV24loziiphbLmODxZUXZJwm4JxlFkr56q0jfo,1855
58
+ ingestr/src/linkedin_ads/dimension_time_enum.py,sha256=EmHRdkFyTAfo4chGjThrwqffWJxmAadZMbpTvf0xkQc,198
59
+ ingestr/src/linkedin_ads/helpers.py,sha256=6jSIp4DF0iUafJWU3Y7DbIJGKRH6hrx4S7zCTDOjNuE,4528
57
60
  ingestr/src/mongodb/__init__.py,sha256=aMr1PFIDUMRv--ne61lR17HudsN-fsrzMeyxe9PqK2s,4335
58
61
  ingestr/src/mongodb/helpers.py,sha256=y9rYKR8eyIqam_eNsZmwSYevgi8mghh7Zp8qhTHl65s,5652
59
62
  ingestr/src/notion/__init__.py,sha256=36wUui8finbc85ObkRMq8boMraXMUehdABN_AMe_hzA,1834
@@ -91,8 +94,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
91
94
  ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
92
95
  ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
93
96
  ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
94
- ingestr-0.12.9.dist-info/METADATA,sha256=p7RGcw0cnHPU93RLIPWOkMtj36Ax9BnA7bPSKIQ3pfg,8056
95
- ingestr-0.12.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
96
- ingestr-0.12.9.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
97
- ingestr-0.12.9.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
98
- ingestr-0.12.9.dist-info/RECORD,,
97
+ ingestr-0.12.10.dist-info/METADATA,sha256=_jr6Mv4lUktQkO2MP3q4RBeHe2RnCy1-4WWGXva6Qbo,8057
98
+ ingestr-0.12.10.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
99
+ ingestr-0.12.10.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
100
+ ingestr-0.12.10.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
101
+ ingestr-0.12.10.dist-info/RECORD,,