ingestr 0.7.7__py3-none-any.whl → 0.8.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,227 @@
1
+ from typing import Any, Dict, List, Optional
2
+
3
+ from confluent_kafka import Consumer, Message, TopicPartition # type: ignore
4
+ from confluent_kafka.admin import TopicMetadata # type: ignore
5
+ from dlt import config
6
+ from dlt.common import pendulum
7
+ from dlt.common.configuration import configspec
8
+ from dlt.common.configuration.specs import CredentialsConfiguration
9
+ from dlt.common.time import ensure_pendulum_datetime
10
+ from dlt.common.typing import DictStrAny, TSecretValue
11
+ from dlt.common.utils import digest128
12
+
13
+
14
+ def default_msg_processor(msg: Message) -> Dict[str, Any]:
15
+ """Basic Kafka message processor.
16
+
17
+ Returns the message value and metadata. Timestamp consists of two values:
18
+ (type of the timestamp, timestamp). Type represents one of the Python
19
+ Kafka constants:
20
+ TIMESTAMP_NOT_AVAILABLE - Timestamps not supported by broker.
21
+ TIMESTAMP_CREATE_TIME - Message creation time (or source / producer time).
22
+ TIMESTAMP_LOG_APPEND_TIME - Broker receive time.
23
+
24
+ Args:
25
+ msg (confluent_kafka.Message): A single Kafka message.
26
+
27
+ Returns:
28
+ dict: Processed Kafka message.
29
+ """
30
+ ts = msg.timestamp()
31
+ topic = msg.topic()
32
+ partition = msg.partition()
33
+ key = msg.key()
34
+ if key is not None:
35
+ key = key.decode("utf-8")
36
+
37
+ return {
38
+ "_kafka": {
39
+ "partition": partition,
40
+ "topic": topic,
41
+ "key": key,
42
+ "offset": msg.offset(),
43
+ "ts": {
44
+ "type": ts[0],
45
+ "value": ensure_pendulum_datetime(ts[1] / 1e3),
46
+ },
47
+ "data": msg.value().decode("utf-8"),
48
+ },
49
+ "_kafka_msg_id": digest128(topic + str(partition) + str(key)),
50
+ }
51
+
52
+
53
+ class OffsetTracker(dict): # type: ignore
54
+ """Object to control offsets of the given topics.
55
+
56
+ Tracks all the partitions of the given topics with two params:
57
+ current offset and maximum offset (partition length).
58
+
59
+ Args:
60
+ consumer (confluent_kafka.Consumer): Kafka consumer.
61
+ topic_names (List): Names of topics to track.
62
+ pl_state (DictStrAny): Pipeline current state.
63
+ start_from (Optional[pendulum.DateTime]): A timestamp, after which messages
64
+ are read. Older messages are ignored.
65
+ """
66
+
67
+ def __init__(
68
+ self,
69
+ consumer: Consumer,
70
+ topic_names: List[str],
71
+ pl_state: DictStrAny,
72
+ start_from: pendulum.DateTime = None, # type: ignore
73
+ ):
74
+ super().__init__()
75
+
76
+ self._consumer = consumer
77
+ self._topics = self._read_topics(topic_names)
78
+
79
+ # read/init current offsets
80
+ self._cur_offsets = pl_state.setdefault(
81
+ "offsets", {t_name: {} for t_name in topic_names}
82
+ )
83
+
84
+ self._init_partition_offsets(start_from)
85
+
86
+ def _read_topics(self, topic_names: List[str]) -> Dict[str, TopicMetadata]:
87
+ """Read the given topics metadata from Kafka.
88
+
89
+ Reads all the topics at once, instead of requesting
90
+ each in a separate call. Returns only those needed.
91
+
92
+ Args:
93
+ topic_names (list): Names of topics to be read.
94
+
95
+ Returns:
96
+ dict: Metadata of the given topics.
97
+ """
98
+ tracked_topics = {}
99
+ topics = self._consumer.list_topics().topics
100
+
101
+ for t_name in topic_names:
102
+ tracked_topics[t_name] = topics[t_name]
103
+
104
+ return tracked_topics
105
+
106
+ def _init_partition_offsets(self, start_from: pendulum.DateTime) -> None:
107
+ """Designate current and maximum offsets for every partition.
108
+
109
+ Current offsets are read from the state, if present. Set equal
110
+ to the partition beginning otherwise.
111
+
112
+ Args:
113
+ start_from (pendulum.DateTime): A timestamp, at which to start
114
+ reading. Older messages are ignored.
115
+ """
116
+ all_parts = []
117
+ for t_name, topic in self._topics.items():
118
+ self[t_name] = {}
119
+
120
+ # init all the topic partitions from the partitions' metadata
121
+ parts = [
122
+ TopicPartition(
123
+ t_name,
124
+ part,
125
+ start_from.int_timestamp * 1000 if start_from is not None else 0,
126
+ )
127
+ for part in topic.partitions
128
+ ]
129
+
130
+ # get offsets for the timestamp, if given
131
+ if start_from is not None:
132
+ ts_offsets = self._consumer.offsets_for_times(parts)
133
+
134
+ # designate current and maximum offsets for every partition
135
+ for i, part in enumerate(parts):
136
+ max_offset = self._consumer.get_watermark_offsets(part)[1]
137
+
138
+ if start_from is not None:
139
+ if ts_offsets[i].offset != -1:
140
+ cur_offset = ts_offsets[i].offset
141
+ else:
142
+ cur_offset = max_offset - 1
143
+ else:
144
+ cur_offset = (
145
+ self._cur_offsets[t_name].get(str(part.partition), -1) + 1
146
+ )
147
+
148
+ self[t_name][str(part.partition)] = {
149
+ "cur": cur_offset,
150
+ "max": max_offset,
151
+ }
152
+
153
+ parts[i].offset = cur_offset
154
+
155
+ all_parts += parts
156
+
157
+ # assign the current offsets to the consumer
158
+ self._consumer.assign(all_parts)
159
+
160
+ @property
161
+ def has_unread(self) -> bool:
162
+ """Check if there are unread messages in the tracked topics.
163
+
164
+ Returns:
165
+ bool: True, if there are messages to read, False if all
166
+ the current offsets are equal to their maximums.
167
+ """
168
+ for parts in self.values():
169
+ for part in parts.values():
170
+ if part["cur"] + 1 < part["max"]:
171
+ return True
172
+
173
+ return False
174
+
175
+ def renew(self, msg: Message) -> None:
176
+ """Update partition offset from the given message.
177
+
178
+ Args:
179
+ msg (confluent_kafka.Message): A read Kafka message.
180
+ """
181
+ topic = msg.topic()
182
+ partition = str(msg.partition())
183
+
184
+ offset = self[topic][partition]
185
+ offset["cur"] = msg.offset()
186
+
187
+ self._cur_offsets[topic][partition] = msg.offset()
188
+
189
+
190
+ @configspec
191
+ class KafkaCredentials(CredentialsConfiguration):
192
+ """Kafka source credentials.
193
+
194
+ NOTE: original Kafka credentials are written with a period, e.g.
195
+ bootstrap.servers. However, KafkaCredentials expect them to
196
+ use underscore symbols instead, e.g. bootstrap_servers.
197
+ """
198
+
199
+ bootstrap_servers: str = config.value
200
+ group_id: str = config.value
201
+ security_protocol: Optional[str] = None
202
+ sasl_mechanisms: Optional[str] = None
203
+ sasl_username: Optional[str] = None
204
+ sasl_password: Optional[TSecretValue] = None
205
+
206
+ def init_consumer(self) -> Consumer:
207
+ """Init a Kafka consumer from this credentials.
208
+
209
+ Returns:
210
+ confluent_kafka.Consumer: an initiated consumer.
211
+ """
212
+ config = {
213
+ "bootstrap.servers": self.bootstrap_servers,
214
+ "group.id": self.group_id,
215
+ "auto.offset.reset": "earliest",
216
+ }
217
+
218
+ if self.security_protocol:
219
+ config["security.protocol"] = self.security_protocol
220
+ if self.sasl_mechanisms:
221
+ config["sasl.mechanisms"] = self.sasl_mechanisms
222
+ if self.sasl_username:
223
+ config["sasl.username"] = self.sasl_username
224
+ if self.sasl_password:
225
+ config["sasl.password"] = self.sasl_password
226
+
227
+ return Consumer(config)
@@ -0,0 +1,173 @@
1
+ from typing import Iterable
2
+
3
+ import dlt
4
+ import pendulum
5
+ import requests
6
+ from dlt.common.time import ensure_pendulum_datetime
7
+ from dlt.common.typing import TAnyDateTime, TDataItem
8
+ from dlt.sources import DltResource
9
+ from dlt.sources.helpers.requests import Client
10
+
11
+ from ingestr.src.klaviyo.client import KlaviyoClient
12
+ from ingestr.src.klaviyo.helpers import split_date_range
13
+
14
+
15
+ def retry_on_limit(response: requests.Response, exception: BaseException) -> bool:
16
+ return response.status_code == 429
17
+
18
+
19
+ def create_client() -> requests.Session:
20
+ return Client(
21
+ request_timeout=10.0,
22
+ raise_for_status=False,
23
+ retry_condition=retry_on_limit,
24
+ request_max_attempts=12,
25
+ request_backoff_factor=2,
26
+ ).session
27
+
28
+
29
+ @dlt.source(max_table_nesting=0)
30
+ def klaviyo_source(api_key: str, start_date: TAnyDateTime) -> Iterable[DltResource]:
31
+ start_date_obj = ensure_pendulum_datetime(start_date)
32
+ client = KlaviyoClient(api_key)
33
+
34
+ @dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
35
+ def events(
36
+ datetime=dlt.sources.incremental("datetime", start_date_obj.isoformat()),
37
+ ) -> Iterable[TDataItem]:
38
+ intervals = split_date_range(
39
+ pendulum.parse(datetime.start_value), pendulum.now()
40
+ )
41
+
42
+ for start, end in intervals:
43
+ yield lambda s=start, e=end: client.fetch_events(create_client(), s, e)
44
+
45
+ @dlt.resource(write_disposition="merge", primary_key="id", parallelized=True)
46
+ def profiles(
47
+ updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
48
+ ) -> Iterable[TDataItem]:
49
+ intervals = split_date_range(
50
+ pendulum.parse(updated.start_value), pendulum.now()
51
+ )
52
+
53
+ for start, end in intervals:
54
+ yield lambda s=start, e=end: client.fetch_profiles(create_client(), s, e)
55
+
56
+ @dlt.resource(write_disposition="merge", primary_key="id", parallelized=True)
57
+ def campaigns(
58
+ updated_at=dlt.sources.incremental("updated_at", start_date_obj.isoformat()),
59
+ ) -> Iterable[TDataItem]:
60
+ intervals = split_date_range(
61
+ pendulum.parse(updated_at.start_value), pendulum.now()
62
+ )
63
+
64
+ for campaign_type in ["email", "sms"]:
65
+ for start, end in intervals:
66
+ yield lambda s=start, e=end, ct=campaign_type: client.fetch_campaigns(
67
+ create_client(), s, e, ct
68
+ )
69
+
70
+ @dlt.resource(write_disposition="merge", primary_key="id")
71
+ def metrics(
72
+ updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
73
+ ) -> Iterable[TDataItem]:
74
+ yield from client.fetch_metrics(create_client(), updated.start_value)
75
+
76
+ @dlt.resource(write_disposition="replace", primary_key="id")
77
+ def tags() -> Iterable[TAnyDateTime]:
78
+ yield from client.fetch_tag(create_client())
79
+
80
+ @dlt.resource(write_disposition="replace", primary_key="id")
81
+ def coupons() -> Iterable[TAnyDateTime]:
82
+ yield from client.fetch_coupons(create_client())
83
+
84
+ @dlt.resource(write_disposition="merge", primary_key="id", name="catalog-variants")
85
+ def catalog_variants(
86
+ updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
87
+ ) -> Iterable[TDataItem]:
88
+ yield from client.fetch_catalog_variant(create_client(), updated.start_value)
89
+
90
+ @dlt.resource(
91
+ write_disposition="merge", primary_key="id", name="catalog-categories"
92
+ )
93
+ def catalog_categories(
94
+ updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
95
+ ) -> Iterable[TDataItem]:
96
+ yield from client.fetch_catalog_categories(create_client(), updated.start_value)
97
+
98
+ @dlt.resource(write_disposition="merge", primary_key="id", name="catalog-items")
99
+ def catalog_items(
100
+ updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
101
+ ) -> Iterable[TDataItem]:
102
+ yield from client.fetch_catalog_item(create_client(), updated.start_value)
103
+
104
+ @dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
105
+ def forms(
106
+ updated_at=dlt.sources.incremental("updated_at", start_date_obj.isoformat()),
107
+ ) -> Iterable[TDataItem]:
108
+ intervals = split_date_range(
109
+ pendulum.parse(updated_at.start_value), pendulum.now()
110
+ )
111
+
112
+ for start, end in intervals:
113
+ yield lambda s=start, e=end: client.fetch_forms(create_client(), s, e)
114
+
115
+ @dlt.resource(write_disposition="merge", primary_key="id")
116
+ def lists(
117
+ updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
118
+ ) -> Iterable[TDataItem]:
119
+ yield from client.fetch_lists(create_client(), updated.start_value)
120
+
121
+ @dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
122
+ def images(
123
+ updated_at=dlt.sources.incremental("updated_at", start_date_obj.isoformat()),
124
+ ) -> Iterable[TDataItem]:
125
+ intervals = split_date_range(
126
+ pendulum.parse(updated_at.start_value), pendulum.now()
127
+ )
128
+ for start, end in intervals:
129
+ yield lambda s=start, e=end: client.fetch_images(create_client(), s, e)
130
+
131
+ @dlt.resource(write_disposition="merge", primary_key="id")
132
+ def segments(
133
+ updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
134
+ ) -> Iterable[TDataItem]:
135
+ yield from client.fetch_segments(create_client(), updated.start_value)
136
+
137
+ @dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
138
+ def flows(
139
+ updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
140
+ ) -> Iterable[TDataItem]:
141
+ intervals = split_date_range(
142
+ pendulum.parse(updated.start_value), pendulum.now()
143
+ )
144
+ for start, end in intervals:
145
+ yield lambda s=start, e=end: client.fetch_flows(create_client(), s, e)
146
+
147
+ @dlt.resource(write_disposition="append", primary_key="id", parallelized=True)
148
+ def templates(
149
+ updated=dlt.sources.incremental("updated", start_date_obj.isoformat()),
150
+ ) -> Iterable[TDataItem]:
151
+ intervals = split_date_range(
152
+ pendulum.parse(updated.start_value), pendulum.now()
153
+ )
154
+ for start, end in intervals:
155
+ yield lambda s=start, e=end: client.fetch_templates(create_client(), s, e)
156
+
157
+ return (
158
+ events,
159
+ profiles,
160
+ campaigns,
161
+ metrics,
162
+ tags,
163
+ coupons,
164
+ catalog_variants,
165
+ catalog_categories,
166
+ catalog_items,
167
+ forms,
168
+ lists,
169
+ images,
170
+ segments,
171
+ flows,
172
+ templates,
173
+ )
@@ -0,0 +1,212 @@
1
+ from urllib.parse import urlencode
2
+
3
+ import pendulum
4
+ import requests
5
+
6
+ BASE_URL = "https://a.klaviyo.com/api"
7
+
8
+
9
+ class KlaviyoClient:
10
+ def __init__(self, api_key: str):
11
+ self.api_key = api_key
12
+
13
+ def __get_headers(self):
14
+ return {
15
+ "Authorization": f"Klaviyo-API-Key {self.api_key}",
16
+ "accept": "application/json",
17
+ "revision": "2024-07-15",
18
+ }
19
+
20
+ def _flatten_attributes(self, items: list):
21
+ for event in items:
22
+ if "attributes" not in event:
23
+ continue
24
+
25
+ for attribute_key in event["attributes"]:
26
+ event[attribute_key] = event["attributes"][attribute_key]
27
+
28
+ del event["attributes"]
29
+ return items
30
+
31
+ def _fetch_pages(
32
+ self, session: requests.Session, url: str, flat: bool = True
33
+ ) -> list:
34
+ all_items = []
35
+ while True:
36
+ response = session.get(url=url, headers=self.__get_headers())
37
+ result = response.json()
38
+ items = result.get("data", [])
39
+
40
+ if flat:
41
+ items = self._flatten_attributes(items)
42
+
43
+ all_items.extend(items)
44
+ nextURL = result.get("links", {}).get("next")
45
+ if nextURL is None:
46
+ break
47
+
48
+ url = nextURL
49
+
50
+ return all_items
51
+
52
+ def fetch_events(
53
+ self,
54
+ session: requests.Session,
55
+ start_date: str,
56
+ end_date: str,
57
+ ):
58
+ print(f"Fetching events for {start_date} to {end_date}")
59
+ url = f"{BASE_URL}/events/?sort=-datetime&filter=and(greater-or-equal(datetime,{start_date}),less-than(datetime,{end_date}))"
60
+ return self._fetch_pages(session, url)
61
+
62
+ def fetch_metrics(
63
+ self,
64
+ session: requests.Session,
65
+ last_updated: str,
66
+ ):
67
+ print(f"Fetching metrics since {last_updated}")
68
+ url = f"{BASE_URL}/metrics"
69
+ items = self._fetch_pages(session, url)
70
+
71
+ last_updated_obj = pendulum.parse(last_updated)
72
+ for item in items:
73
+ updated_at = pendulum.parse(item["updated"])
74
+ if updated_at > last_updated_obj:
75
+ yield item
76
+
77
+ def fetch_profiles(
78
+ self,
79
+ session: requests.Session,
80
+ start_date: str,
81
+ end_date: str,
82
+ ):
83
+ pendulum_start_date = pendulum.parse(start_date)
84
+ pendulum_start_date = pendulum_start_date.subtract(seconds=1)
85
+ url = f"{BASE_URL}/profiles/?sort=updated&filter=and(greater-than(updated,{pendulum_start_date.isoformat()}),less-than(updated,{end_date}))"
86
+ return self._fetch_pages(session, url)
87
+
88
+ def fetch_campaigns(
89
+ self,
90
+ session: requests.Session,
91
+ start_date: str,
92
+ end_date: str,
93
+ campaign_type: str,
94
+ ):
95
+ print(f"Fetching {campaign_type} campaigns for {start_date} to {end_date}")
96
+
97
+ base_url = f"{BASE_URL}/campaigns/"
98
+ params = {
99
+ "sort": "updated_at",
100
+ "filter": f"and(equals(messages.channel,'{campaign_type}'),greater-or-equal(updated_at,{start_date}),less-than(updated_at,{end_date}))",
101
+ }
102
+ url = f"{base_url}?{urlencode(params)}"
103
+ pages = self._fetch_pages(session, url)
104
+ for page in pages:
105
+ page["campaign_type"] = campaign_type
106
+
107
+ return pages
108
+
109
+ def fetch_tag(self, session: requests.Session):
110
+ url = f"{BASE_URL}/tags"
111
+ return self._fetch_pages(session, url, False)
112
+
113
+ def fetch_catalog_variant(
114
+ self,
115
+ session: requests.Session,
116
+ last_updated: str,
117
+ ):
118
+ url = f"{BASE_URL}/catalog-variants"
119
+ items = self._fetch_pages(session, url)
120
+ last_updated_obj = pendulum.parse(last_updated)
121
+
122
+ for item in items:
123
+ updated_at = pendulum.parse(item["updated"])
124
+ if updated_at > last_updated_obj:
125
+ yield item
126
+
127
+ def fetch_coupons(self, session: requests.Session):
128
+ url = f"{BASE_URL}/coupons"
129
+ return self._fetch_pages(session, url, False)
130
+
131
+ def fetch_catalog_categories(
132
+ self,
133
+ session: requests.Session,
134
+ last_updated: str,
135
+ ):
136
+ url = f"{BASE_URL}/catalog-categories"
137
+ items = self._fetch_pages(session, url)
138
+ last_updated_obj = pendulum.parse(last_updated)
139
+
140
+ for item in items:
141
+ updated_at = pendulum.parse(item["updated"])
142
+ if updated_at > last_updated_obj:
143
+ yield item
144
+
145
+ def fetch_catalog_item(
146
+ self,
147
+ session: requests.Session,
148
+ last_updated: str,
149
+ ):
150
+ url = f"{BASE_URL}/catalog-items"
151
+ items = self._fetch_pages(session, url)
152
+ last_updated_obj = pendulum.parse(last_updated)
153
+
154
+ for item in items:
155
+ updated_at = pendulum.parse(item["updated"])
156
+ if updated_at > last_updated_obj:
157
+ yield item
158
+
159
+ def fetch_forms(
160
+ self,
161
+ session: requests.Session,
162
+ start_date: str,
163
+ end_date: str,
164
+ ):
165
+ print(f"Fetching forms for {start_date} to {end_date}")
166
+ url = f"{BASE_URL}/forms/?sort=-updated_at&filter=and(greater-or-equal(updated_at,{start_date}),less-than(updated_at,{end_date}))"
167
+ return self._fetch_pages(session, url)
168
+
169
+ def fetch_lists(
170
+ self,
171
+ session: requests.Session,
172
+ updated_date: str,
173
+ ):
174
+ # https://a.klaviyo.com/api/lists/?sort=-updated&filter=greater-than(updated,2024-02-01 00:00:00+00:00)
175
+ url = f"{BASE_URL}/lists/?sort=-updated&filter=greater-than(updated,{updated_date})"
176
+ return self._fetch_pages(session, url)
177
+
178
+ def fetch_images(self, session: requests.Session, start_date: str, end_date: str):
179
+ # https://a.klaviyo.com/api/images/?sort=-updated_at&filter=greater-or-equal(updated_at,2024-06-01 00:00:00+00:00),less-than(updated_at,2024-09-01 00:00:00+00:00)
180
+ url = f"{BASE_URL}/images/?sort=-updated_at&filter=and(greater-or-equal(updated_at,{start_date}),less-than(updated_at,{end_date}))"
181
+ return self._fetch_pages(session, url)
182
+
183
+ def fetch_segments(
184
+ self,
185
+ session: requests.Session,
186
+ updated_date: str,
187
+ ):
188
+ # https://a.klaviyo.com/api/segments/?sort=-updated&filter=greater-than(updated,2024-04-01 00:00:00+00:00)
189
+ url = f"{BASE_URL}/segments/?sort=-updated&filter=greater-than(updated,{updated_date})"
190
+ print("url", url)
191
+ return self._fetch_pages(session, url)
192
+
193
+ def fetch_flows(
194
+ self,
195
+ session: requests.Session,
196
+ start_date: str,
197
+ end_date: str,
198
+ ):
199
+ print(f"Fetching events for {start_date} to {end_date}")
200
+ # https://a.klaviyo.com/api/flows/?sort=-updated&filter=and(greater-or-equal(updated,2024-06-01 00:00:00+00:00),less-than(updated,2024-09-01 00:00:00+00:00))
201
+ url = f"{BASE_URL}/flows/?sort=-updated&filter=and(greater-or-equal(updated,{start_date}),less-than(updated,{end_date}))"
202
+ return self._fetch_pages(session, url)
203
+
204
+ def fetch_templates(
205
+ self,
206
+ session: requests.Session,
207
+ start_date: str,
208
+ end_date: str,
209
+ ):
210
+ # https://a.klaviyo.com/api/templates/?sort=-updated&filter=and(greater-or-equal(updated,2024-06-01 00:00:00+00:00),less-than(updated,2024-09-01 00:00:00+00:00))
211
+ url = f"{BASE_URL}/templates/?sort=-updated&filter=and(greater-or-equal(updated,{start_date}),less-than(updated,{end_date}))"
212
+ return self._fetch_pages(session, url)
@@ -0,0 +1,19 @@
1
+ from typing import List
2
+
3
+ import pendulum
4
+
5
+
6
+ def split_date_range(
7
+ start_date: pendulum.DateTime, end_date: pendulum.DateTime
8
+ ) -> List[tuple]:
9
+ interval = "days"
10
+ if (end_date - start_date).days <= 1:
11
+ interval = "hours"
12
+
13
+ intervals = []
14
+ current = start_date
15
+ while current < end_date:
16
+ next_date = min(current.add(**{interval: 1}), end_date)
17
+ intervals.append((current.isoformat(), next_date.isoformat()))
18
+ current = next_date
19
+ return intervals