ingestr 0.7.7__py3-none-any.whl → 0.7.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/main.py +9 -0
- ingestr/src/.gitignore +10 -0
- ingestr/src/facebook_ads/__init__.py +197 -0
- ingestr/src/facebook_ads/exceptions.py +5 -0
- ingestr/src/facebook_ads/helpers.py +255 -0
- ingestr/src/facebook_ads/settings.py +208 -0
- ingestr/src/factory.py +9 -0
- ingestr/src/kafka/__init__.py +103 -0
- ingestr/src/kafka/helpers.py +227 -0
- ingestr/src/klaviyo/_init_.py +173 -0
- ingestr/src/klaviyo/client.py +212 -0
- ingestr/src/klaviyo/helpers.py +19 -0
- ingestr/src/sources.py +141 -0
- ingestr/src/version.py +1 -1
- {ingestr-0.7.7.dist-info → ingestr-0.7.8.dist-info}/METADATA +13 -1
- {ingestr-0.7.7.dist-info → ingestr-0.7.8.dist-info}/RECORD +19 -9
- {ingestr-0.7.7.dist-info → ingestr-0.7.8.dist-info}/WHEEL +0 -0
- {ingestr-0.7.7.dist-info → ingestr-0.7.8.dist-info}/entry_points.txt +0 -0
- {ingestr-0.7.7.dist-info → ingestr-0.7.8.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
from typing import Iterable
|
|
2
|
+
|
|
3
|
+
import dlt
|
|
4
|
+
import pendulum
|
|
5
|
+
import requests
|
|
6
|
+
from dlt.common.time import ensure_pendulum_datetime
|
|
7
|
+
from dlt.common.typing import TAnyDateTime, TDataItem
|
|
8
|
+
from dlt.sources import DltResource
|
|
9
|
+
from dlt.sources.helpers.requests import Client
|
|
10
|
+
|
|
11
|
+
from ingestr.src.klaviyo.client import KlaviyoClient
|
|
12
|
+
from ingestr.src.klaviyo.helpers import split_date_range
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def retry_on_limit(response: requests.Response, exception: BaseException) -> bool:
|
|
16
|
+
return response.status_code == 429
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def create_client() -> requests.Session:
|
|
20
|
+
return Client(
|
|
21
|
+
request_timeout=10.0,
|
|
22
|
+
raise_for_status=False,
|
|
23
|
+
retry_condition=retry_on_limit,
|
|
24
|
+
request_max_attempts=12,
|
|
25
|
+
request_backoff_factor=2,
|
|
26
|
+
).session
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@dlt.source(max_table_nesting=0)
|
|
30
|
+
def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResource]:
|
|
31
|
+
start_date_obj = ensure_pendulum_datetime(start_date)
|
|
32
|
+
client = KlaviyoClient(api_key)
|
|
33
|
+
|
|
34
|
+
@dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
|
|
35
|
+
def events(
|
|
36
|
+
datetime=dlt.sources.incremental("datetime", start_date_obj.isoformat()),
|
|
37
|
+
) -> Iterable[TDataItem]:
|
|
38
|
+
intervals = split_date_range(
|
|
39
|
+
pendulum.parse(datetime.start_value), pendulum.now()
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
for start, end in intervals:
|
|
43
|
+
yield lambda s=start, e=end: client.fetch_events(create_client(), s, e)
|
|
44
|
+
|
|
45
|
+
@dlt.resource(write_disposition="merge", primary_key="id", parallelized=True)
|
|
46
|
+
def profiles(
|
|
47
|
+
updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
|
|
48
|
+
) -> Iterable[TDataItem]:
|
|
49
|
+
intervals = split_date_range(
|
|
50
|
+
pendulum.parse(updated.start_value), pendulum.now()
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
for start, end in intervals:
|
|
54
|
+
yield lambda s=start, e=end: client.fetch_profiles(create_client(), s, e)
|
|
55
|
+
|
|
56
|
+
@dlt.resource(write_disposition="merge", primary_key="id", parallelized=True)
|
|
57
|
+
def campaigns(
|
|
58
|
+
updated_at=dlt.sources.incremental("updated_at", start_date_obj.isoformat()),
|
|
59
|
+
) -> Iterable[TDataItem]:
|
|
60
|
+
intervals = split_date_range(
|
|
61
|
+
pendulum.parse(updated_at.start_value), pendulum.now()
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
for campaign_type in ["email", "sms"]:
|
|
65
|
+
for start, end in intervals:
|
|
66
|
+
yield lambda s=start, e=end, ct=campaign_type: client.fetch_campaigns(
|
|
67
|
+
create_client(), s, e, ct
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
71
|
+
def metrics(
|
|
72
|
+
updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
|
|
73
|
+
) -> Iterable[TDataItem]:
|
|
74
|
+
yield from client.fetch_metrics(create_client(), updated.start_value)
|
|
75
|
+
|
|
76
|
+
@dlt.resource(write_disposition="replace", primary_key="id")
|
|
77
|
+
def tags() -> Iterable[TAnyDateTime]:
|
|
78
|
+
yield from client.fetch_tag(create_client())
|
|
79
|
+
|
|
80
|
+
@dlt.resource(write_disposition="replace", primary_key="id")
|
|
81
|
+
def coupons() -> Iterable[TAnyDateTime]:
|
|
82
|
+
yield from client.fetch_coupons(create_client())
|
|
83
|
+
|
|
84
|
+
@dlt.resource(write_disposition="merge", primary_key="id", name="catalog-variants")
|
|
85
|
+
def catalog_variants(
|
|
86
|
+
updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
|
|
87
|
+
) -> Iterable[TDataItem]:
|
|
88
|
+
yield from client.fetch_catalog_variant(create_client(), updated.start_value)
|
|
89
|
+
|
|
90
|
+
@dlt.resource(
|
|
91
|
+
write_disposition="merge", primary_key="id", name="catalog-categories"
|
|
92
|
+
)
|
|
93
|
+
def catalog_categories(
|
|
94
|
+
updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
|
|
95
|
+
) -> Iterable[TDataItem]:
|
|
96
|
+
yield from client.fetch_catalog_categories(create_client(), updated.start_value)
|
|
97
|
+
|
|
98
|
+
@dlt.resource(write_disposition="merge", primary_key="id", name="catalog-items")
|
|
99
|
+
def catalog_items(
|
|
100
|
+
updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
|
|
101
|
+
) -> Iterable[TDataItem]:
|
|
102
|
+
yield from client.fetch_catalog_item(create_client(), updated.start_value)
|
|
103
|
+
|
|
104
|
+
@dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
|
|
105
|
+
def forms(
|
|
106
|
+
updated_at=dlt.sources.incremental("updated_at", start_date_obj.isoformat()),
|
|
107
|
+
) -> Iterable[TDataItem]:
|
|
108
|
+
intervals = split_date_range(
|
|
109
|
+
pendulum.parse(updated_at.start_value), pendulum.now()
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
for start, end in intervals:
|
|
113
|
+
yield lambda s=start, e=end: client.fetch_forms(create_client(), s, e)
|
|
114
|
+
|
|
115
|
+
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
116
|
+
def lists(
|
|
117
|
+
updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
|
|
118
|
+
) -> Iterable[TDataItem]:
|
|
119
|
+
yield from client.fetch_lists(create_client(), updated.start_value)
|
|
120
|
+
|
|
121
|
+
@dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
|
|
122
|
+
def images(
|
|
123
|
+
updated_at=dlt.sources.incremental("updated_at", start_date_obj.isoformat()),
|
|
124
|
+
) -> Iterable[TDataItem]:
|
|
125
|
+
intervals = split_date_range(
|
|
126
|
+
pendulum.parse(updated_at.start_value), pendulum.now()
|
|
127
|
+
)
|
|
128
|
+
for start, end in intervals:
|
|
129
|
+
yield lambda s=start, e=end: client.fetch_images(create_client(), s, e)
|
|
130
|
+
|
|
131
|
+
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
132
|
+
def segments(
|
|
133
|
+
updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
|
|
134
|
+
) -> Iterable[TDataItem]:
|
|
135
|
+
yield from client.fetch_segments(create_client(), updated.start_value)
|
|
136
|
+
|
|
137
|
+
@dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
|
|
138
|
+
def flows(
|
|
139
|
+
updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
|
|
140
|
+
) -> Iterable[TDataItem]:
|
|
141
|
+
intervals = split_date_range(
|
|
142
|
+
pendulum.parse(updated.start_value), pendulum.now()
|
|
143
|
+
)
|
|
144
|
+
for start, end in intervals:
|
|
145
|
+
yield lambda s=start, e=end: client.fetch_flows(create_client(), s, e)
|
|
146
|
+
|
|
147
|
+
@dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
|
|
148
|
+
def templates(
|
|
149
|
+
updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
|
|
150
|
+
) -> Iterable[TDataItem]:
|
|
151
|
+
intervals = split_date_range(
|
|
152
|
+
pendulum.parse(updated.start_value), pendulum.now()
|
|
153
|
+
)
|
|
154
|
+
for start, end in intervals:
|
|
155
|
+
yield lambda s=start, e=end: client.fetch_templates(create_client(), s, e)
|
|
156
|
+
|
|
157
|
+
return (
|
|
158
|
+
events,
|
|
159
|
+
profiles,
|
|
160
|
+
campaigns,
|
|
161
|
+
metrics,
|
|
162
|
+
tags,
|
|
163
|
+
coupons,
|
|
164
|
+
catalog_variants,
|
|
165
|
+
catalog_categories,
|
|
166
|
+
catalog_items,
|
|
167
|
+
forms,
|
|
168
|
+
lists,
|
|
169
|
+
images,
|
|
170
|
+
segments,
|
|
171
|
+
flows,
|
|
172
|
+
templates,
|
|
173
|
+
)
|
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
from urllib.parse import urlencode
|
|
2
|
+
|
|
3
|
+
import pendulum
|
|
4
|
+
import requests
|
|
5
|
+
|
|
6
|
+
BASE_URL = "https://a.klaviyo.com/api"
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class KlaviyoClient:
|
|
10
|
+
def __init__(self, api_key: str):
|
|
11
|
+
self.api_key = api_key
|
|
12
|
+
|
|
13
|
+
def __get_headers(self):
|
|
14
|
+
return {
|
|
15
|
+
"Authorization": f"Klaviyo-API-Key {self.api_key}",
|
|
16
|
+
"accept": "application/json",
|
|
17
|
+
"revision": "2024-07-15",
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
def _flatten_attributes(self, items: list):
|
|
21
|
+
for event in items:
|
|
22
|
+
if "attributes" not in event:
|
|
23
|
+
continue
|
|
24
|
+
|
|
25
|
+
for attribute_key in event["attributes"]:
|
|
26
|
+
event[attribute_key] = event["attributes"][attribute_key]
|
|
27
|
+
|
|
28
|
+
del event["attributes"]
|
|
29
|
+
return items
|
|
30
|
+
|
|
31
|
+
def _fetch_pages(
|
|
32
|
+
self, session: requests.Session, url: str, flat: bool = True
|
|
33
|
+
) -> list:
|
|
34
|
+
all_items = []
|
|
35
|
+
while True:
|
|
36
|
+
response = session.get(url=url, headers=self.__get_headers())
|
|
37
|
+
result = response.json()
|
|
38
|
+
items = result.get("data", [])
|
|
39
|
+
|
|
40
|
+
if flat:
|
|
41
|
+
items = self._flatten_attributes(items)
|
|
42
|
+
|
|
43
|
+
all_items.extend(items)
|
|
44
|
+
nextURL = result.get("links", {}).get("next")
|
|
45
|
+
if nextURL is None:
|
|
46
|
+
break
|
|
47
|
+
|
|
48
|
+
url = nextURL
|
|
49
|
+
|
|
50
|
+
return all_items
|
|
51
|
+
|
|
52
|
+
def fetch_events(
|
|
53
|
+
self,
|
|
54
|
+
session: requests.Session,
|
|
55
|
+
start_date: str,
|
|
56
|
+
end_date: str,
|
|
57
|
+
):
|
|
58
|
+
print(f"Fetching events for {start_date} to {end_date}")
|
|
59
|
+
url = f"{BASE_URL}/events/?sort=-datetime&filter=and(greater-or-equal(datetime,{start_date}),less-than(datetime,{end_date}))"
|
|
60
|
+
return self._fetch_pages(session, url)
|
|
61
|
+
|
|
62
|
+
def fetch_metrics(
|
|
63
|
+
self,
|
|
64
|
+
session: requests.Session,
|
|
65
|
+
last_updated: str,
|
|
66
|
+
):
|
|
67
|
+
print(f"Fetching metrics since {last_updated}")
|
|
68
|
+
url = f"{BASE_URL}/metrics"
|
|
69
|
+
items = self._fetch_pages(session, url)
|
|
70
|
+
|
|
71
|
+
last_updated_obj = pendulum.parse(last_updated)
|
|
72
|
+
for item in items:
|
|
73
|
+
updated_at = pendulum.parse(item["updated"])
|
|
74
|
+
if updated_at > last_updated_obj:
|
|
75
|
+
yield item
|
|
76
|
+
|
|
77
|
+
def fetch_profiles(
|
|
78
|
+
self,
|
|
79
|
+
session: requests.Session,
|
|
80
|
+
start_date: str,
|
|
81
|
+
end_date: str,
|
|
82
|
+
):
|
|
83
|
+
pendulum_start_date = pendulum.parse(start_date)
|
|
84
|
+
pendulum_start_date = pendulum_start_date.subtract(seconds=1)
|
|
85
|
+
url = f"{BASE_URL}/profiles/?sort=updated&filter=and(greater-than(updated,{pendulum_start_date.isoformat()}),less-than(updated,{end_date}))"
|
|
86
|
+
return self._fetch_pages(session, url)
|
|
87
|
+
|
|
88
|
+
def fetch_campaigns(
|
|
89
|
+
self,
|
|
90
|
+
session: requests.Session,
|
|
91
|
+
start_date: str,
|
|
92
|
+
end_date: str,
|
|
93
|
+
campaign_type: str,
|
|
94
|
+
):
|
|
95
|
+
print(f"Fetching {campaign_type} campaigns for {start_date} to {end_date}")
|
|
96
|
+
|
|
97
|
+
base_url = f"{BASE_URL}/campaigns/"
|
|
98
|
+
params = {
|
|
99
|
+
"sort": "updated_at",
|
|
100
|
+
"filter": f"and(equals(messages.channel,'{campaign_type}'),greater-or-equal(updated_at,{start_date}),less-than(updated_at,{end_date}))",
|
|
101
|
+
}
|
|
102
|
+
url = f"{base_url}?{urlencode(params)}"
|
|
103
|
+
pages = self._fetch_pages(session, url)
|
|
104
|
+
for page in pages:
|
|
105
|
+
page["campaign_type"] = campaign_type
|
|
106
|
+
|
|
107
|
+
return pages
|
|
108
|
+
|
|
109
|
+
def fetch_tag(self, session: requests.Session):
|
|
110
|
+
url = f"{BASE_URL}/tags"
|
|
111
|
+
return self._fetch_pages(session, url, False)
|
|
112
|
+
|
|
113
|
+
def fetch_catalog_variant(
|
|
114
|
+
self,
|
|
115
|
+
session: requests.Session,
|
|
116
|
+
last_updated: str,
|
|
117
|
+
):
|
|
118
|
+
url = f"{BASE_URL}/catalog-variants"
|
|
119
|
+
items = self._fetch_pages(session, url)
|
|
120
|
+
last_updated_obj = pendulum.parse(last_updated)
|
|
121
|
+
|
|
122
|
+
for item in items:
|
|
123
|
+
updated_at = pendulum.parse(item["updated"])
|
|
124
|
+
if updated_at > last_updated_obj:
|
|
125
|
+
yield item
|
|
126
|
+
|
|
127
|
+
def fetch_coupons(self, session: requests.Session):
|
|
128
|
+
url = f"{BASE_URL}/coupons"
|
|
129
|
+
return self._fetch_pages(session, url, False)
|
|
130
|
+
|
|
131
|
+
def fetch_catalog_categories(
|
|
132
|
+
self,
|
|
133
|
+
session: requests.Session,
|
|
134
|
+
last_updated: str,
|
|
135
|
+
):
|
|
136
|
+
url = f"{BASE_URL}/catalog-categories"
|
|
137
|
+
items = self._fetch_pages(session, url)
|
|
138
|
+
last_updated_obj = pendulum.parse(last_updated)
|
|
139
|
+
|
|
140
|
+
for item in items:
|
|
141
|
+
updated_at = pendulum.parse(item["updated"])
|
|
142
|
+
if updated_at > last_updated_obj:
|
|
143
|
+
yield item
|
|
144
|
+
|
|
145
|
+
def fetch_catalog_item(
|
|
146
|
+
self,
|
|
147
|
+
session: requests.Session,
|
|
148
|
+
last_updated: str,
|
|
149
|
+
):
|
|
150
|
+
url = f"{BASE_URL}/catalog-items"
|
|
151
|
+
items = self._fetch_pages(session, url)
|
|
152
|
+
last_updated_obj = pendulum.parse(last_updated)
|
|
153
|
+
|
|
154
|
+
for item in items:
|
|
155
|
+
updated_at = pendulum.parse(item["updated"])
|
|
156
|
+
if updated_at > last_updated_obj:
|
|
157
|
+
yield item
|
|
158
|
+
|
|
159
|
+
def fetch_forms(
|
|
160
|
+
self,
|
|
161
|
+
session: requests.Session,
|
|
162
|
+
start_date: str,
|
|
163
|
+
end_date: str,
|
|
164
|
+
):
|
|
165
|
+
print(f"Fetching forms for {start_date} to {end_date}")
|
|
166
|
+
url = f"{BASE_URL}/forms/?sort=-updated_at&filter=and(greater-or-equal(updated_at,{start_date}),less-than(updated_at,{end_date}))"
|
|
167
|
+
return self._fetch_pages(session, url)
|
|
168
|
+
|
|
169
|
+
def fetch_lists(
|
|
170
|
+
self,
|
|
171
|
+
session: requests.Session,
|
|
172
|
+
updated_date: str,
|
|
173
|
+
):
|
|
174
|
+
# https://a.klaviyo.com/api/lists/?sort=-updated&filter=greater-than(updated,2024-02-01 00:00:00+00:00)
|
|
175
|
+
url = f"{BASE_URL}/lists/?sort=-updated&filter=greater-than(updated,{updated_date})"
|
|
176
|
+
return self._fetch_pages(session, url)
|
|
177
|
+
|
|
178
|
+
def fetch_images(self, session: requests.Session, start_date: str, end_date: str):
|
|
179
|
+
# https://a.klaviyo.com/api/images/?sort=-updated_at&filter=greater-or-equal(updated_at,2024-06-01 00:00:00+00:00),less-than(updated_at,2024-09-01 00:00:00+00:00)
|
|
180
|
+
url = f"{BASE_URL}/images/?sort=-updated_at&filter=and(greater-or-equal(updated_at,{start_date}),less-than(updated_at,{end_date}))"
|
|
181
|
+
return self._fetch_pages(session, url)
|
|
182
|
+
|
|
183
|
+
def fetch_segments(
|
|
184
|
+
self,
|
|
185
|
+
session: requests.Session,
|
|
186
|
+
updated_date: str,
|
|
187
|
+
):
|
|
188
|
+
# https://a.klaviyo.com/api/segments/?sort=-updated&filter=greater-than(updated,2024-04-01 00:00:00+00:00)
|
|
189
|
+
url = f"{BASE_URL}/segments/?sort=-updated&filter=greater-than(updated,{updated_date})"
|
|
190
|
+
print("url", url)
|
|
191
|
+
return self._fetch_pages(session, url)
|
|
192
|
+
|
|
193
|
+
def fetch_flows(
|
|
194
|
+
self,
|
|
195
|
+
session: requests.Session,
|
|
196
|
+
start_date: str,
|
|
197
|
+
end_date: str,
|
|
198
|
+
):
|
|
199
|
+
print(f"Fetching events for {start_date} to {end_date}")
|
|
200
|
+
# https://a.klaviyo.com/api/flows/?sort=-updated&filter=and(greater-or-equal(updated,2024-06-01 00:00:00+00:00),less-than(updated,2024-09-01 00:00:00+00:00))
|
|
201
|
+
url = f"{BASE_URL}/flows/?sort=-updated&filter=and(greater-or-equal(updated,{start_date}),less-than(updated,{end_date}))"
|
|
202
|
+
return self._fetch_pages(session, url)
|
|
203
|
+
|
|
204
|
+
def fetch_templates(
|
|
205
|
+
self,
|
|
206
|
+
session: requests.Session,
|
|
207
|
+
start_date: str,
|
|
208
|
+
end_date: str,
|
|
209
|
+
):
|
|
210
|
+
# https://a.klaviyo.com/api/templates/?sort=-updated&filter=and(greater-or-equal(updated,2024-06-01 00:00:00+00:00),less-than(updated,2024-09-01 00:00:00+00:00))
|
|
211
|
+
url = f"{BASE_URL}/templates/?sort=-updated&filter=and(greater-or-equal(updated,{start_date}),less-than(updated,{end_date}))"
|
|
212
|
+
return self._fetch_pages(session, url)
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
import pendulum
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def split_date_range(
|
|
7
|
+
start_date: pendulum.DateTime, end_date: pendulum.DateTime
|
|
8
|
+
) -> List[tuple]:
|
|
9
|
+
interval = "days"
|
|
10
|
+
if (end_date - start_date).days <= 1:
|
|
11
|
+
interval = "hours"
|
|
12
|
+
|
|
13
|
+
intervals = []
|
|
14
|
+
current = start_date
|
|
15
|
+
while current < end_date:
|
|
16
|
+
next_date = min(current.add(**{interval: 1}), end_date)
|
|
17
|
+
intervals.append((current.isoformat(), next_date.isoformat()))
|
|
18
|
+
current = next_date
|
|
19
|
+
return intervals
|
ingestr/src/sources.py
CHANGED
|
@@ -9,9 +9,13 @@ import dlt
|
|
|
9
9
|
|
|
10
10
|
from ingestr.src.airtable import airtable_source
|
|
11
11
|
from ingestr.src.chess import source
|
|
12
|
+
from ingestr.src.facebook_ads import facebook_ads_source, facebook_insights_source
|
|
12
13
|
from ingestr.src.google_sheets import google_spreadsheet
|
|
13
14
|
from ingestr.src.gorgias import gorgias_source
|
|
14
15
|
from ingestr.src.hubspot import hubspot
|
|
16
|
+
from ingestr.src.kafka import kafka_consumer
|
|
17
|
+
from ingestr.src.kafka.helpers import KafkaCredentials
|
|
18
|
+
from ingestr.src.klaviyo._init_ import klaviyo_source
|
|
15
19
|
from ingestr.src.mongodb import mongodb_collection
|
|
16
20
|
from ingestr.src.notion import notion_databases
|
|
17
21
|
from ingestr.src.shopify import shopify_source
|
|
@@ -405,6 +409,48 @@ class StripeAnalyticsSource:
|
|
|
405
409
|
).with_resources(endpoint)
|
|
406
410
|
|
|
407
411
|
|
|
412
|
+
class FacebookAdsSource:
|
|
413
|
+
def handles_incrementality(self) -> bool:
|
|
414
|
+
return True
|
|
415
|
+
|
|
416
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
417
|
+
# facebook_ads://?access_token=abcd&account_id=1234
|
|
418
|
+
if kwargs.get("incremental_key"):
|
|
419
|
+
raise ValueError(
|
|
420
|
+
"Facebook Ads takes care of incrementality on its own, you should not provide incremental_key"
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
access_token = None
|
|
424
|
+
account_id = None
|
|
425
|
+
source_field = urlparse(uri)
|
|
426
|
+
source_params = parse_qs(source_field.query)
|
|
427
|
+
access_token = source_params.get("access_token")
|
|
428
|
+
account_id = source_params.get("account_id")
|
|
429
|
+
|
|
430
|
+
if not access_token or not account_id:
|
|
431
|
+
raise ValueError(
|
|
432
|
+
"access_token and accound_id are required to connect to Facebook Ads."
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
endpoint = None
|
|
436
|
+
if table in ["campaigns", "ad_sets", "ad_creatives", "ads", "leads"]:
|
|
437
|
+
endpoint = table
|
|
438
|
+
elif table in "facebook_insights":
|
|
439
|
+
return facebook_insights_source(
|
|
440
|
+
access_token=access_token[0],
|
|
441
|
+
account_id=account_id[0],
|
|
442
|
+
).with_resources("facebook_insights")
|
|
443
|
+
else:
|
|
444
|
+
raise ValueError(
|
|
445
|
+
"fResource '{table}' is not supported for Facebook Ads source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
return facebook_ads_source(
|
|
449
|
+
access_token=access_token[0],
|
|
450
|
+
account_id=account_id[0],
|
|
451
|
+
).with_resources(endpoint)
|
|
452
|
+
|
|
453
|
+
|
|
408
454
|
class SlackSource:
|
|
409
455
|
def handles_incrementality(self) -> bool:
|
|
410
456
|
return True
|
|
@@ -511,3 +557,98 @@ class AirtableSource:
|
|
|
511
557
|
return airtable_source(
|
|
512
558
|
base_id=base_id[0], table_names=tables, access_token=access_token[0]
|
|
513
559
|
)
|
|
560
|
+
|
|
561
|
+
|
|
562
|
+
class KlaviyoSource:
|
|
563
|
+
def handles_incrementality(self) -> bool:
|
|
564
|
+
return True
|
|
565
|
+
|
|
566
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
567
|
+
if kwargs.get("incremental_key"):
|
|
568
|
+
raise ValueError(
|
|
569
|
+
"klaviyo_source takes care of incrementality on its own, you should not provide incremental_key"
|
|
570
|
+
)
|
|
571
|
+
|
|
572
|
+
source_fields = urlparse(uri)
|
|
573
|
+
source_params = parse_qs(source_fields.query)
|
|
574
|
+
api_key = source_params.get("api_key")
|
|
575
|
+
|
|
576
|
+
if not api_key:
|
|
577
|
+
raise ValueError("api_key in the URI is required to connect to klaviyo")
|
|
578
|
+
|
|
579
|
+
resource = None
|
|
580
|
+
if table in [
|
|
581
|
+
"events",
|
|
582
|
+
"profiles",
|
|
583
|
+
"campaigns",
|
|
584
|
+
"metrics",
|
|
585
|
+
"tags",
|
|
586
|
+
"coupons",
|
|
587
|
+
"catalog-variants",
|
|
588
|
+
"catalog-categories",
|
|
589
|
+
"catalog-items",
|
|
590
|
+
"forms",
|
|
591
|
+
"lists",
|
|
592
|
+
"images",
|
|
593
|
+
"segments",
|
|
594
|
+
"flows",
|
|
595
|
+
"templates",
|
|
596
|
+
]:
|
|
597
|
+
resource = table
|
|
598
|
+
else:
|
|
599
|
+
raise ValueError(
|
|
600
|
+
f"Resource '{table}' is not supported for Klaviyo source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
|
|
601
|
+
)
|
|
602
|
+
|
|
603
|
+
start_date = kwargs.get("interval_start") or "2000-01-01"
|
|
604
|
+
return klaviyo_source(
|
|
605
|
+
api_key=api_key[0],
|
|
606
|
+
start_date=start_date,
|
|
607
|
+
).with_resources(resource)
|
|
608
|
+
|
|
609
|
+
|
|
610
|
+
class KafkaSource:
|
|
611
|
+
def handles_incrementality(self) -> bool:
|
|
612
|
+
return False
|
|
613
|
+
|
|
614
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
615
|
+
# kafka://?bootstrap_servers=localhost:9092&group_id=test_group&security_protocol=SASL_SSL&sasl_mechanisms=PLAIN&sasl_username=example_username&sasl_password=example_secret
|
|
616
|
+
source_fields = urlparse(uri)
|
|
617
|
+
source_params = parse_qs(source_fields.query)
|
|
618
|
+
|
|
619
|
+
bootstrap_servers = source_params.get("bootstrap_servers")
|
|
620
|
+
group_id = source_params.get("group_id")
|
|
621
|
+
security_protocol = source_params.get("security_protocol", [])
|
|
622
|
+
sasl_mechanisms = source_params.get("sasl_mechanisms", [])
|
|
623
|
+
sasl_username = source_params.get("sasl_username", [])
|
|
624
|
+
sasl_password = source_params.get("sasl_password", [])
|
|
625
|
+
batch_size = source_params.get("batch_size", [3000])
|
|
626
|
+
batch_timeout = source_params.get("batch_timeout", [3])
|
|
627
|
+
|
|
628
|
+
if not bootstrap_servers:
|
|
629
|
+
raise ValueError(
|
|
630
|
+
"bootstrap_servers in the URI is required to connect to kafka"
|
|
631
|
+
)
|
|
632
|
+
|
|
633
|
+
if not group_id:
|
|
634
|
+
raise ValueError("group_id in the URI is required to connect to kafka")
|
|
635
|
+
|
|
636
|
+
start_date = kwargs.get("interval_start")
|
|
637
|
+
return kafka_consumer(
|
|
638
|
+
topics=[table],
|
|
639
|
+
credentials=KafkaCredentials(
|
|
640
|
+
bootstrap_servers=bootstrap_servers[0],
|
|
641
|
+
group_id=group_id[0],
|
|
642
|
+
security_protocol=security_protocol[0]
|
|
643
|
+
if len(security_protocol) > 0
|
|
644
|
+
else None, # type: ignore
|
|
645
|
+
sasl_mechanisms=sasl_mechanisms[0]
|
|
646
|
+
if len(sasl_mechanisms) > 0
|
|
647
|
+
else None, # type: ignore
|
|
648
|
+
sasl_username=sasl_username[0] if len(sasl_username) > 0 else None, # type: ignore
|
|
649
|
+
sasl_password=sasl_password[0] if len(sasl_password) > 0 else None, # type: ignore
|
|
650
|
+
),
|
|
651
|
+
start_from=start_date,
|
|
652
|
+
batch_size=int(batch_size[0]),
|
|
653
|
+
batch_timeout=int(batch_timeout[0]),
|
|
654
|
+
)
|
ingestr/src/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.7.
|
|
1
|
+
__version__ = "0.7.8"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.7.
|
|
3
|
+
Version: 0.7.8
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -14,11 +14,13 @@ Classifier: Operating System :: OS Independent
|
|
|
14
14
|
Classifier: Programming Language :: Python :: 3
|
|
15
15
|
Classifier: Topic :: Database
|
|
16
16
|
Requires-Python: >=3.9
|
|
17
|
+
Requires-Dist: confluent-kafka>=2.3.0
|
|
17
18
|
Requires-Dist: cx-oracle==8.3.0
|
|
18
19
|
Requires-Dist: databricks-sql-connector==2.9.3
|
|
19
20
|
Requires-Dist: dlt==0.5.1
|
|
20
21
|
Requires-Dist: duckdb-engine==0.11.5
|
|
21
22
|
Requires-Dist: duckdb==0.10.2
|
|
23
|
+
Requires-Dist: facebook-business==20.0.0
|
|
22
24
|
Requires-Dist: google-api-python-client==2.130.0
|
|
23
25
|
Requires-Dist: google-cloud-bigquery-storage==2.24.0
|
|
24
26
|
Requires-Dist: mysql-connector-python==9.0.0
|
|
@@ -185,6 +187,11 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
|
|
|
185
187
|
<td>Chess.com</td>
|
|
186
188
|
<td>✅</td>
|
|
187
189
|
<td>-</td>
|
|
190
|
+
</tr>
|
|
191
|
+
<tr>
|
|
192
|
+
<td>Facebook Ads</td>
|
|
193
|
+
<td>✅</td>
|
|
194
|
+
<td>-</td>
|
|
188
195
|
</tr>
|
|
189
196
|
<tr>
|
|
190
197
|
<td>Gorgias</td>
|
|
@@ -200,6 +207,11 @@ Join our Slack community [here](https://join.slack.com/t/bruindatacommunity/shar
|
|
|
200
207
|
<td>HubSpot</td>
|
|
201
208
|
<td>✅</td>
|
|
202
209
|
<td>-</td>
|
|
210
|
+
</tr>
|
|
211
|
+
<tr>
|
|
212
|
+
<td>Klaviyo</td>
|
|
213
|
+
<td>✅</td>
|
|
214
|
+
<td>-</td>
|
|
203
215
|
</tr>
|
|
204
216
|
<tr>
|
|
205
217
|
<td>Notion</td>
|
|
@@ -1,13 +1,18 @@
|
|
|
1
|
-
ingestr/main.py,sha256=
|
|
1
|
+
ingestr/main.py,sha256=Hlcb8mUAWoGZr4ZKtQnoEhjLkjroiwx2-J86C6fN37E,17596
|
|
2
|
+
ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
2
3
|
ingestr/src/destinations.py,sha256=2SfPMjtTelPmzQmc3zNs8xGcKIPuGn_hoZFIBUuhjXI,6338
|
|
3
|
-
ingestr/src/factory.py,sha256=
|
|
4
|
-
ingestr/src/sources.py,sha256=
|
|
4
|
+
ingestr/src/factory.py,sha256=CTVaFeMVgZO1fC9AKOqx-Wu89l5_YL6GlmvDF-FkAew,4442
|
|
5
|
+
ingestr/src/sources.py,sha256=BlMsajIMcu_oqmU38uqlasXz2vtN_J8yXa24NHFcwJA,22696
|
|
5
6
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
6
|
-
ingestr/src/version.py,sha256=
|
|
7
|
+
ingestr/src/version.py,sha256=uC8wB9mRblQ0jUBAOUyCQLUQJ39MC2xybVLB_8ZsevU,22
|
|
7
8
|
ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
|
|
8
9
|
ingestr/src/chess/__init__.py,sha256=PaxT2DObudOGlhyoENE5LjR6rTdsxiqKKpAZeyzVLCA,6791
|
|
9
10
|
ingestr/src/chess/helpers.py,sha256=v1HTImOMjAF7AzZUPDIuHu00e7ut0o5y1kWcVYo4QZw,549
|
|
10
11
|
ingestr/src/chess/settings.py,sha256=p0RlCGgtXUacPDEvZmwzSWmzX0Apj1riwfz-nrMK89k,158
|
|
12
|
+
ingestr/src/facebook_ads/__init__.py,sha256=ZZyogV48gmhDcC3CYQEsC4qT3Q6JI9IOnMff2NS1M-A,9207
|
|
13
|
+
ingestr/src/facebook_ads/exceptions.py,sha256=4Nlbc0Mv3i5g-9AoyT-n1PIa8IDi3VCTfEAzholx4Wc,115
|
|
14
|
+
ingestr/src/facebook_ads/helpers.py,sha256=ZLbNHiKer5lPb4g3_435XeBJr57Wv0o1KTyBA1mQ100,9068
|
|
15
|
+
ingestr/src/facebook_ads/settings.py,sha256=1IxZeP_4rN3IBvAncNHOoqpzAirx0Hz-MUK_tl6UTFk,4881
|
|
11
16
|
ingestr/src/google_sheets/README.md,sha256=wFQhvmGpRA38Ba2N_WIax6duyD4c7c_pwvvprRfQDnw,5470
|
|
12
17
|
ingestr/src/google_sheets/__init__.py,sha256=5qlX-6ilx5MW7klC7B_0jGSxloQSLkSESTh4nlY3Aos,6643
|
|
13
18
|
ingestr/src/google_sheets/helpers/__init__.py,sha256=5hXZrZK8cMO3UOuL-s4OKOpdACdihQD0hYYlSEu-iQ8,35
|
|
@@ -18,6 +23,11 @@ ingestr/src/gorgias/helpers.py,sha256=DamuijnvhGY9hysQO4txrVMf4izkGbh5qfBKImdOIN
|
|
|
18
23
|
ingestr/src/hubspot/__init__.py,sha256=eSD_lEIEd16YijAtUATFG8FGO8YGPm-MtAk94KKsx6o,9740
|
|
19
24
|
ingestr/src/hubspot/helpers.py,sha256=PTn-UHJv1ENIvA5azUTaHCmFXgmHLJC1tUatQ1N-KFE,6727
|
|
20
25
|
ingestr/src/hubspot/settings.py,sha256=9P1OKiRL88kl_m8n1HhuG-Qpq9VGbqPLn5Q0QYneToU,2193
|
|
26
|
+
ingestr/src/kafka/__init__.py,sha256=wMCXdiraeKd1Kssi9WcVCGZaNGm2tJEtnNyuB4aR5_k,3541
|
|
27
|
+
ingestr/src/kafka/helpers.py,sha256=V9WcVn3PKnEpggArHda4vnAcaV8VDuh__dSmRviJb5Y,7502
|
|
28
|
+
ingestr/src/klaviyo/_init_.py,sha256=nq2T1p3Xc7yiwGabsZBp2Jy2fa8_n5oxqxBnUGhKOgg,6592
|
|
29
|
+
ingestr/src/klaviyo/client.py,sha256=tPj79ia7AW0ZOJhzlKNPCliGbdojRNwUFp8HvB2ym5s,7434
|
|
30
|
+
ingestr/src/klaviyo/helpers.py,sha256=_i-SHffhv25feLDcjy6Blj1UxYLISCwVCMgGtrlnYHk,496
|
|
21
31
|
ingestr/src/mongodb/__init__.py,sha256=E7SDeCyYNkYZZ_RFhjCRDZUGpKtaxpPG5sFSmKJV62U,4336
|
|
22
32
|
ingestr/src/mongodb/helpers.py,sha256=80vtAeNyUn1iMN0CeLrTlKqYN6I6fHF81Kd2UuE8Kns,5653
|
|
23
33
|
ingestr/src/notion/__init__.py,sha256=36wUui8finbc85ObkRMq8boMraXMUehdABN_AMe_hzA,1834
|
|
@@ -50,8 +60,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
|
|
|
50
60
|
ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
|
|
51
61
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
52
62
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
53
|
-
ingestr-0.7.
|
|
54
|
-
ingestr-0.7.
|
|
55
|
-
ingestr-0.7.
|
|
56
|
-
ingestr-0.7.
|
|
57
|
-
ingestr-0.7.
|
|
63
|
+
ingestr-0.7.8.dist-info/METADATA,sha256=JGJ_76vC0icT_tJSYDkbtRXuc_63sgHXJYYIksTSyOE,6561
|
|
64
|
+
ingestr-0.7.8.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
65
|
+
ingestr-0.7.8.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
66
|
+
ingestr-0.7.8.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
67
|
+
ingestr-0.7.8.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|