ingestr 0.13.2__py3-none-any.whl → 0.14.104__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. ingestr/conftest.py +72 -0
  2. ingestr/main.py +134 -87
  3. ingestr/src/adjust/__init__.py +4 -4
  4. ingestr/src/adjust/adjust_helpers.py +7 -3
  5. ingestr/src/airtable/__init__.py +3 -2
  6. ingestr/src/allium/__init__.py +128 -0
  7. ingestr/src/anthropic/__init__.py +277 -0
  8. ingestr/src/anthropic/helpers.py +525 -0
  9. ingestr/src/applovin/__init__.py +262 -0
  10. ingestr/src/applovin_max/__init__.py +117 -0
  11. ingestr/src/appsflyer/__init__.py +325 -0
  12. ingestr/src/appsflyer/client.py +49 -45
  13. ingestr/src/appstore/__init__.py +1 -0
  14. ingestr/src/arrow/__init__.py +9 -1
  15. ingestr/src/asana_source/__init__.py +1 -1
  16. ingestr/src/attio/__init__.py +102 -0
  17. ingestr/src/attio/helpers.py +65 -0
  18. ingestr/src/blob.py +38 -11
  19. ingestr/src/buildinfo.py +1 -0
  20. ingestr/src/chess/__init__.py +1 -1
  21. ingestr/src/clickup/__init__.py +85 -0
  22. ingestr/src/clickup/helpers.py +47 -0
  23. ingestr/src/collector/spinner.py +43 -0
  24. ingestr/src/couchbase_source/__init__.py +118 -0
  25. ingestr/src/couchbase_source/helpers.py +135 -0
  26. ingestr/src/cursor/__init__.py +83 -0
  27. ingestr/src/cursor/helpers.py +188 -0
  28. ingestr/src/destinations.py +520 -33
  29. ingestr/src/docebo/__init__.py +589 -0
  30. ingestr/src/docebo/client.py +435 -0
  31. ingestr/src/docebo/helpers.py +97 -0
  32. ingestr/src/elasticsearch/__init__.py +80 -0
  33. ingestr/src/elasticsearch/helpers.py +138 -0
  34. ingestr/src/errors.py +8 -0
  35. ingestr/src/facebook_ads/__init__.py +47 -28
  36. ingestr/src/facebook_ads/helpers.py +59 -37
  37. ingestr/src/facebook_ads/settings.py +2 -0
  38. ingestr/src/facebook_ads/utils.py +39 -0
  39. ingestr/src/factory.py +116 -2
  40. ingestr/src/filesystem/__init__.py +8 -3
  41. ingestr/src/filters.py +46 -3
  42. ingestr/src/fluxx/__init__.py +9906 -0
  43. ingestr/src/fluxx/helpers.py +209 -0
  44. ingestr/src/frankfurter/__init__.py +157 -0
  45. ingestr/src/frankfurter/helpers.py +48 -0
  46. ingestr/src/freshdesk/__init__.py +89 -0
  47. ingestr/src/freshdesk/freshdesk_client.py +137 -0
  48. ingestr/src/freshdesk/settings.py +9 -0
  49. ingestr/src/fundraiseup/__init__.py +95 -0
  50. ingestr/src/fundraiseup/client.py +81 -0
  51. ingestr/src/github/__init__.py +41 -6
  52. ingestr/src/github/helpers.py +5 -5
  53. ingestr/src/google_analytics/__init__.py +22 -4
  54. ingestr/src/google_analytics/helpers.py +124 -6
  55. ingestr/src/google_sheets/__init__.py +4 -4
  56. ingestr/src/google_sheets/helpers/data_processing.py +2 -2
  57. ingestr/src/hostaway/__init__.py +302 -0
  58. ingestr/src/hostaway/client.py +288 -0
  59. ingestr/src/http/__init__.py +35 -0
  60. ingestr/src/http/readers.py +114 -0
  61. ingestr/src/http_client.py +24 -0
  62. ingestr/src/hubspot/__init__.py +66 -23
  63. ingestr/src/hubspot/helpers.py +52 -22
  64. ingestr/src/hubspot/settings.py +14 -7
  65. ingestr/src/influxdb/__init__.py +46 -0
  66. ingestr/src/influxdb/client.py +34 -0
  67. ingestr/src/intercom/__init__.py +142 -0
  68. ingestr/src/intercom/helpers.py +674 -0
  69. ingestr/src/intercom/settings.py +279 -0
  70. ingestr/src/isoc_pulse/__init__.py +159 -0
  71. ingestr/src/jira_source/__init__.py +340 -0
  72. ingestr/src/jira_source/helpers.py +439 -0
  73. ingestr/src/jira_source/settings.py +170 -0
  74. ingestr/src/kafka/__init__.py +4 -1
  75. ingestr/src/kinesis/__init__.py +139 -0
  76. ingestr/src/kinesis/helpers.py +82 -0
  77. ingestr/src/klaviyo/{_init_.py → __init__.py} +5 -6
  78. ingestr/src/linear/__init__.py +634 -0
  79. ingestr/src/linear/helpers.py +111 -0
  80. ingestr/src/linkedin_ads/helpers.py +0 -1
  81. ingestr/src/loader.py +69 -0
  82. ingestr/src/mailchimp/__init__.py +126 -0
  83. ingestr/src/mailchimp/helpers.py +226 -0
  84. ingestr/src/mailchimp/settings.py +164 -0
  85. ingestr/src/masking.py +344 -0
  86. ingestr/src/mixpanel/__init__.py +62 -0
  87. ingestr/src/mixpanel/client.py +99 -0
  88. ingestr/src/monday/__init__.py +246 -0
  89. ingestr/src/monday/helpers.py +392 -0
  90. ingestr/src/monday/settings.py +328 -0
  91. ingestr/src/mongodb/__init__.py +72 -8
  92. ingestr/src/mongodb/helpers.py +915 -38
  93. ingestr/src/partition.py +32 -0
  94. ingestr/src/personio/__init__.py +331 -0
  95. ingestr/src/personio/helpers.py +86 -0
  96. ingestr/src/phantombuster/__init__.py +65 -0
  97. ingestr/src/phantombuster/client.py +87 -0
  98. ingestr/src/pinterest/__init__.py +82 -0
  99. ingestr/src/pipedrive/__init__.py +198 -0
  100. ingestr/src/pipedrive/helpers/__init__.py +23 -0
  101. ingestr/src/pipedrive/helpers/custom_fields_munger.py +102 -0
  102. ingestr/src/pipedrive/helpers/pages.py +115 -0
  103. ingestr/src/pipedrive/settings.py +27 -0
  104. ingestr/src/pipedrive/typing.py +3 -0
  105. ingestr/src/plusvibeai/__init__.py +335 -0
  106. ingestr/src/plusvibeai/helpers.py +544 -0
  107. ingestr/src/plusvibeai/settings.py +252 -0
  108. ingestr/src/quickbooks/__init__.py +117 -0
  109. ingestr/src/resource.py +40 -0
  110. ingestr/src/revenuecat/__init__.py +83 -0
  111. ingestr/src/revenuecat/helpers.py +237 -0
  112. ingestr/src/salesforce/__init__.py +156 -0
  113. ingestr/src/salesforce/helpers.py +64 -0
  114. ingestr/src/shopify/__init__.py +1 -17
  115. ingestr/src/smartsheets/__init__.py +82 -0
  116. ingestr/src/snapchat_ads/__init__.py +489 -0
  117. ingestr/src/snapchat_ads/client.py +72 -0
  118. ingestr/src/snapchat_ads/helpers.py +535 -0
  119. ingestr/src/socrata_source/__init__.py +83 -0
  120. ingestr/src/socrata_source/helpers.py +85 -0
  121. ingestr/src/socrata_source/settings.py +8 -0
  122. ingestr/src/solidgate/__init__.py +219 -0
  123. ingestr/src/solidgate/helpers.py +154 -0
  124. ingestr/src/sources.py +3132 -212
  125. ingestr/src/stripe_analytics/__init__.py +49 -21
  126. ingestr/src/stripe_analytics/helpers.py +286 -1
  127. ingestr/src/stripe_analytics/settings.py +62 -10
  128. ingestr/src/telemetry/event.py +10 -9
  129. ingestr/src/tiktok_ads/__init__.py +12 -6
  130. ingestr/src/tiktok_ads/tiktok_helpers.py +0 -1
  131. ingestr/src/trustpilot/__init__.py +48 -0
  132. ingestr/src/trustpilot/client.py +48 -0
  133. ingestr/src/version.py +6 -1
  134. ingestr/src/wise/__init__.py +68 -0
  135. ingestr/src/wise/client.py +63 -0
  136. ingestr/src/zoom/__init__.py +99 -0
  137. ingestr/src/zoom/helpers.py +102 -0
  138. ingestr/tests/unit/test_smartsheets.py +133 -0
  139. ingestr-0.14.104.dist-info/METADATA +563 -0
  140. ingestr-0.14.104.dist-info/RECORD +203 -0
  141. ingestr/src/appsflyer/_init_.py +0 -24
  142. ingestr-0.13.2.dist-info/METADATA +0 -302
  143. ingestr-0.13.2.dist-info/RECORD +0 -107
  144. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/WHEEL +0 -0
  145. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/entry_points.txt +0 -0
  146. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/licenses/LICENSE.md +0 -0
ingestr/src/masking.py ADDED
@@ -0,0 +1,344 @@
1
+ import hashlib
2
+ import hmac
3
+ import random
4
+ import re
5
+ import string
6
+ import uuid
7
+ from datetime import date, datetime, timedelta
8
+ from typing import Any, Callable, Dict, Optional, Tuple, Union
9
+
10
+
11
+ class MaskingEngine:
12
+ def __init__(self):
13
+ self.token_cache: Dict[str, Union[str, int]] = {}
14
+ self.sequential_counter = 0
15
+
16
+ def parse_mask_config(self, config: str) -> Tuple[str, str, Optional[str]]:
17
+ parts = config.split(":")
18
+ if len(parts) == 2:
19
+ return parts[0], parts[1], None
20
+ elif len(parts) == 3:
21
+ return parts[0], parts[1], parts[2]
22
+ else:
23
+ raise ValueError(
24
+ f"Invalid mask configuration: {config}. Expected format: 'column:algorithm[:param]'"
25
+ )
26
+
27
+ def get_masking_function(
28
+ self, algorithm: str, param: Optional[str] = None
29
+ ) -> Callable:
30
+ algorithm = algorithm.lower()
31
+
32
+ # Hash-based masking
33
+ if algorithm == "hash" or algorithm == "sha256":
34
+ return self._hash_sha256
35
+ elif algorithm == "md5":
36
+ return self._hash_md5
37
+ elif algorithm == "hmac":
38
+ return lambda x: self._hash_hmac(x, param or "default-key")
39
+
40
+ # Format-preserving masking
41
+ elif algorithm == "email":
42
+ return self._mask_email
43
+ elif algorithm == "phone":
44
+ return self._mask_phone
45
+ elif algorithm == "credit_card":
46
+ return self._mask_credit_card
47
+ elif algorithm == "ssn":
48
+ return self._mask_ssn
49
+
50
+ # Redaction strategies
51
+ elif algorithm == "redact":
52
+ return lambda x: "REDACTED"
53
+ elif algorithm == "stars":
54
+ return lambda x: "*" * len(str(x)) if x else ""
55
+ elif algorithm == "fixed":
56
+ return lambda x: param or "MASKED"
57
+ elif algorithm == "random":
58
+ return self._random_replace
59
+
60
+ # Partial masking
61
+ elif algorithm == "partial":
62
+ chars = int(param) if param else 2
63
+ return lambda x: self._partial_mask(x, chars)
64
+ elif algorithm == "first_letter":
65
+ return self._first_letter_mask
66
+
67
+ # Tokenization
68
+ elif algorithm == "uuid":
69
+ return self._tokenize_uuid
70
+ elif algorithm == "sequential":
71
+ return self._tokenize_sequential
72
+
73
+ # Numeric masking
74
+ elif algorithm == "round":
75
+ precision = int(param) if param else 10
76
+ return lambda x: self._round_number(x, precision)
77
+ elif algorithm == "range":
78
+ bucket_size = int(param) if param else 100
79
+ return lambda x: self._range_mask(x, bucket_size)
80
+ elif algorithm == "noise":
81
+ noise_level = float(param) if param else 0.1
82
+ return lambda x: self._add_noise(x, noise_level)
83
+
84
+ # Date masking
85
+ elif algorithm == "date_shift":
86
+ max_days = int(param) if param else 30
87
+ return lambda x: self._date_shift(x, max_days)
88
+ elif algorithm == "year_only":
89
+ return self._year_only
90
+ elif algorithm == "month_year":
91
+ return self._month_year
92
+
93
+ else:
94
+ raise ValueError(f"Unknown masking algorithm: {algorithm}")
95
+
96
+ # Hash functions
97
+ def _hash_sha256(self, value: Any) -> Optional[str]:
98
+ if value is None:
99
+ return None
100
+ return hashlib.sha256(str(value).encode()).hexdigest()
101
+
102
+ def _hash_md5(self, value: Any) -> Optional[str]:
103
+ if value is None:
104
+ return None
105
+ return hashlib.md5(str(value).encode()).hexdigest()
106
+
107
+ def _hash_hmac(self, value: Any, key: str) -> Optional[str]:
108
+ if value is None:
109
+ return None
110
+ return hmac.new(key.encode(), str(value).encode(), hashlib.sha256).hexdigest()
111
+
112
+ # Format-preserving masks
113
+ def _mask_email(self, value: Any) -> Any:
114
+ if value is None or not value:
115
+ return value
116
+ email_str = str(value)
117
+ if "@" not in email_str:
118
+ return self._partial_mask(email_str, 2)
119
+
120
+ local, domain = email_str.split("@", 1)
121
+ if len(local) <= 2:
122
+ masked_local = "*" * len(local)
123
+ else:
124
+ masked_local = local[0] + "*" * (len(local) - 2) + local[-1]
125
+ return f"{masked_local}@{domain}"
126
+
127
+ def _mask_phone(self, value: Any) -> Any:
128
+ if value is None or not value:
129
+ return value
130
+ phone_str = re.sub(r"\D", "", str(value))
131
+ if len(phone_str) < 10:
132
+ return "*" * len(phone_str)
133
+
134
+ # Keep country code and area code, mask the rest
135
+ if len(phone_str) >= 10:
136
+ return phone_str[:3] + "-***-****"
137
+ return phone_str
138
+
139
+ def _mask_credit_card(self, value: Any) -> Any:
140
+ if value is None or not value:
141
+ return value
142
+ cc_str = re.sub(r"\D", "", str(value))
143
+ if len(cc_str) < 12:
144
+ return "*" * len(cc_str)
145
+ return "*" * (len(cc_str) - 4) + cc_str[-4:]
146
+
147
+ def _mask_ssn(self, value: Any) -> Any:
148
+ if value is None or not value:
149
+ return value
150
+ ssn_str = re.sub(r"\D", "", str(value))
151
+ if len(ssn_str) != 9:
152
+ return "*" * len(ssn_str)
153
+ return "***-**-" + ssn_str[-4:]
154
+
155
+ # Partial masking
156
+ def _partial_mask(self, value: Any, chars_to_show: int) -> Any:
157
+ if value is None or not value:
158
+ return value
159
+ val_str = str(value)
160
+ if len(val_str) <= chars_to_show * 2:
161
+ return "*" * len(val_str)
162
+ return (
163
+ val_str[:chars_to_show]
164
+ + "*" * (len(val_str) - chars_to_show * 2)
165
+ + val_str[-chars_to_show:]
166
+ )
167
+
168
+ def _first_letter_mask(self, value: Any) -> Any:
169
+ if value is None or not value:
170
+ return value
171
+ val_str = str(value)
172
+ if len(val_str) <= 1:
173
+ return val_str
174
+ return val_str[0] + "*" * (len(val_str) - 1)
175
+
176
+ # Random replacement
177
+ def _random_replace(self, value: Any) -> Any:
178
+ if value is None:
179
+ return value
180
+
181
+ if isinstance(value, (int, float)):
182
+ # Generate random number in similar range
183
+ if isinstance(value, int):
184
+ magnitude = len(str(abs(value)))
185
+ return random.randint(10 ** (magnitude - 1), 10**magnitude - 1)
186
+ else:
187
+ return random.uniform(0, abs(value) * 2)
188
+ elif isinstance(value, str):
189
+ # Generate random string of same length
190
+ return "".join(
191
+ random.choices(string.ascii_letters + string.digits, k=len(value))
192
+ )
193
+ else:
194
+ return str(value)
195
+
196
+ # Tokenization
197
+ def _tokenize_uuid(self, value: Any) -> Optional[str]:
198
+ if value is None:
199
+ return None
200
+ val_str = str(value)
201
+ if val_str not in self.token_cache:
202
+ self.token_cache[val_str] = str(uuid.uuid4())
203
+ return str(self.token_cache[val_str])
204
+
205
+ def _tokenize_sequential(self, value: Any) -> Optional[int]:
206
+ if value is None:
207
+ return None
208
+ val_str = str(value)
209
+ if val_str not in self.token_cache:
210
+ self.sequential_counter += 1
211
+ self.token_cache[val_str] = self.sequential_counter
212
+ return int(self.token_cache[val_str])
213
+
214
+ # Numeric masking
215
+ def _round_number(self, value: Any, precision: int) -> Any:
216
+ if value is None:
217
+ return value
218
+ try:
219
+ num = float(value)
220
+ return round(num / precision) * precision
221
+ except (ValueError, TypeError):
222
+ return value
223
+
224
+ def _range_mask(self, value: Any, bucket_size: int) -> Any:
225
+ if value is None:
226
+ return value
227
+ try:
228
+ num = float(value)
229
+ lower = int(num // bucket_size) * bucket_size
230
+ upper = lower + bucket_size
231
+ return f"{lower}-{upper}"
232
+ except (ValueError, TypeError):
233
+ return value
234
+
235
+ def _add_noise(self, value: Any, noise_level: float) -> Any:
236
+ if value is None:
237
+ return value
238
+ try:
239
+ num = float(value)
240
+ noise = random.uniform(-noise_level, noise_level) * abs(num)
241
+ result = num + noise
242
+ if isinstance(value, int):
243
+ return int(result)
244
+ return result
245
+ except (ValueError, TypeError):
246
+ return value
247
+
248
+ # Date masking
249
+ def _date_shift(self, value: Any, max_days: int) -> Any:
250
+ if value is None:
251
+ return value
252
+
253
+ if isinstance(value, (date, datetime)):
254
+ shift_days = random.randint(-max_days, max_days)
255
+ return value + timedelta(days=shift_days)
256
+
257
+ # Try to parse string dates
258
+ try:
259
+ from dateutil import parser # type: ignore
260
+
261
+ dt = parser.parse(str(value))
262
+ shift_days = random.randint(-max_days, max_days)
263
+ result = dt + timedelta(days=shift_days)
264
+ if isinstance(value, str):
265
+ return result.strftime("%Y-%m-%d")
266
+ return result
267
+ except Exception:
268
+ return value
269
+
270
+ def _year_only(self, value: Any) -> Any:
271
+ if value is None:
272
+ return value
273
+
274
+ if isinstance(value, (date, datetime)):
275
+ return value.year
276
+
277
+ # Try to parse string dates
278
+ try:
279
+ from dateutil import parser
280
+
281
+ dt = parser.parse(str(value))
282
+ return dt.year
283
+ except Exception:
284
+ return value
285
+
286
+ def _month_year(self, value: Any) -> Any:
287
+ if value is None:
288
+ return value
289
+
290
+ if isinstance(value, (date, datetime)):
291
+ return f"{value.year}-{value.month:02d}"
292
+
293
+ # Try to parse string dates
294
+ try:
295
+ from dateutil import parser
296
+
297
+ dt = parser.parse(str(value))
298
+ return f"{dt.year}-{dt.month:02d}"
299
+ except Exception:
300
+ return value
301
+
302
+
303
+ def create_masking_mapper(mask_configs: list[str]) -> Callable:
304
+ engine = MaskingEngine()
305
+
306
+ # Parse all configurations
307
+ masks = {}
308
+ for config in mask_configs:
309
+ column, algorithm, param = engine.parse_mask_config(config)
310
+ masks[column] = engine.get_masking_function(algorithm, param)
311
+
312
+ def apply_masks(data: Any) -> Any:
313
+ # Handle PyArrow tables
314
+ try:
315
+ import pyarrow as pa # type: ignore
316
+
317
+ if isinstance(data, pa.Table):
318
+ # Convert to pandas for easier manipulation
319
+ df = data.to_pandas()
320
+
321
+ # Apply masks to each column
322
+ for column, mask_func in masks.items():
323
+ if column in df.columns:
324
+ df[column] = df[column].apply(mask_func)
325
+
326
+ # Convert back to PyArrow table
327
+ return pa.Table.from_pandas(df)
328
+ except ImportError:
329
+ pass
330
+
331
+ # Handle dictionaries (original behavior)
332
+ if isinstance(data, dict):
333
+ for column, mask_func in masks.items():
334
+ if column in data:
335
+ try:
336
+ data[column] = mask_func(data[column])
337
+ except Exception as e:
338
+ print(f"Warning: Failed to mask column {column}: {e}")
339
+ return data
340
+
341
+ # Return as-is if not a supported type
342
+ return data
343
+
344
+ return apply_masks
@@ -0,0 +1,62 @@
1
+ from typing import Iterable
2
+
3
+ import dlt
4
+ import pendulum
5
+ from dlt.common.typing import TDataItem
6
+ from dlt.sources import DltResource
7
+
8
+ from .client import MixpanelClient
9
+
10
+
11
+ @dlt.source(max_table_nesting=0)
12
+ def mixpanel_source(
13
+ username: str,
14
+ password: str,
15
+ project_id: str,
16
+ server: str,
17
+ start_date: pendulum.DateTime,
18
+ end_date: pendulum.DateTime | None = None,
19
+ ) -> Iterable[DltResource]:
20
+ client = MixpanelClient(username, password, project_id, server)
21
+
22
+ @dlt.resource(write_disposition="merge", name="events", primary_key="distinct_id")
23
+ def events(
24
+ date=dlt.sources.incremental(
25
+ "time",
26
+ initial_value=start_date.int_timestamp,
27
+ end_value=end_date.int_timestamp if end_date else None,
28
+ range_end="closed",
29
+ range_start="closed",
30
+ ),
31
+ ) -> Iterable[TDataItem]:
32
+ if date.end_value is None:
33
+ end_dt = pendulum.now(tz="UTC")
34
+ else:
35
+ end_dt = pendulum.from_timestamp(date.end_value)
36
+
37
+ start_dt = pendulum.from_timestamp(date.last_value)
38
+
39
+ yield from client.fetch_events(
40
+ start_dt,
41
+ end_dt,
42
+ )
43
+
44
+ @dlt.resource(write_disposition="merge", primary_key="distinct_id", name="profiles")
45
+ def profiles(
46
+ last_seen=dlt.sources.incremental(
47
+ "last_seen",
48
+ initial_value=start_date,
49
+ end_value=end_date,
50
+ range_end="closed",
51
+ range_start="closed",
52
+ ),
53
+ ) -> Iterable[TDataItem]:
54
+ if last_seen.end_value is None:
55
+ end_dt = pendulum.now(tz="UTC")
56
+ else:
57
+ end_dt = last_seen.end_value
58
+
59
+ start_dt = last_seen.last_value
60
+ yield from client.fetch_profiles(start_dt, end_dt)
61
+
62
+ return events, profiles
@@ -0,0 +1,99 @@
1
+ import json
2
+ from typing import Iterable
3
+
4
+ import pendulum
5
+ from dlt.sources.helpers.requests import Client
6
+
7
+
8
+ class MixpanelClient:
9
+ def __init__(self, username: str, password: str, project_id: str, server: str):
10
+ self.username = username
11
+ self.password = password
12
+ self.project_id = project_id
13
+ self.server = server
14
+ self.session = Client(raise_for_status=False).session
15
+
16
+ def fetch_events(
17
+ self, start_date: pendulum.DateTime, end_date: pendulum.DateTime
18
+ ) -> Iterable[dict]:
19
+ if self.server == "us":
20
+ server = "data"
21
+ elif self.server == "in":
22
+ server = "data-in"
23
+ else:
24
+ server = "data-eu"
25
+
26
+ url = f"https://{server}.mixpanel.com/api/2.0/export/"
27
+ params = {
28
+ "project_id": self.project_id,
29
+ "from_date": start_date.format("YYYY-MM-DD"),
30
+ "to_date": end_date.format("YYYY-MM-DD"),
31
+ }
32
+ headers = {
33
+ "accept": "text/plain",
34
+ }
35
+ from requests.auth import HTTPBasicAuth
36
+
37
+ auth = HTTPBasicAuth(self.username, self.password)
38
+ resp = self.session.get(url, params=params, headers=headers, auth=auth)
39
+ resp.raise_for_status()
40
+ for line in resp.iter_lines():
41
+ if line:
42
+ data = json.loads(line.decode())
43
+ if "properties" in data:
44
+ for key, value in data["properties"].items():
45
+ if key.startswith("$"):
46
+ data[key[1:]] = value
47
+ else:
48
+ data[key] = value
49
+ del data["properties"]
50
+ yield data
51
+
52
+ def fetch_profiles(
53
+ self, start_date: pendulum.DateTime, end_date: pendulum.DateTime
54
+ ) -> Iterable[dict]:
55
+ if self.server == "us":
56
+ server = ""
57
+ elif self.server == "in":
58
+ server = "in."
59
+ else:
60
+ server = "eu."
61
+ url = f"https://{server}mixpanel.com/api/query/engage"
62
+ headers = {
63
+ "accept": "application/json",
64
+ "content-type": "application/x-www-form-urlencoded",
65
+ }
66
+ from requests.auth import HTTPBasicAuth
67
+
68
+ auth = HTTPBasicAuth(self.username, self.password)
69
+ page = 0
70
+ session_id = None
71
+ while True:
72
+ params = {"project_id": self.project_id, "page": str(page)}
73
+ if session_id:
74
+ params["session_id"] = session_id
75
+ start_str = start_date.format("YYYY-MM-DDTHH:mm:ss")
76
+ end_str = end_date.format("YYYY-MM-DDTHH:mm:ss")
77
+ where = f'properties["$last_seen"] >= "{start_str}" and properties["$last_seen"] <= "{end_str}"'
78
+ params["where"] = where
79
+ resp = self.session.post(url, params=params, headers=headers, auth=auth)
80
+
81
+ resp.raise_for_status()
82
+ data = resp.json()
83
+
84
+ for result in data.get("results", []):
85
+ for key, value in result["$properties"].items():
86
+ if key.startswith("$"):
87
+ if key == "$last_seen":
88
+ result["last_seen"] = pendulum.parse(value)
89
+ else:
90
+ result[key[1:]] = value
91
+ result["distinct_id"] = result["$distinct_id"]
92
+ del result["$properties"]
93
+ del result["$distinct_id"]
94
+ yield result
95
+ if not data.get("results"):
96
+ break
97
+ session_id = data.get("session_id", session_id)
98
+
99
+ page += 1