cohorly 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cohorly/__init__.py +462 -0
- cohorly/py.typed +0 -0
- cohorly-0.1.0.dist-info/METADATA +184 -0
- cohorly-0.1.0.dist-info/RECORD +6 -0
- cohorly-0.1.0.dist-info/WHEEL +5 -0
- cohorly-0.1.0.dist-info/top_level.txt +1 -0
cohorly/__init__.py
ADDED
|
@@ -0,0 +1,462 @@
|
|
|
1
|
+
"""Cohorly server-side Python SDK.
|
|
2
|
+
|
|
3
|
+
Mirrors the mixpanel-python API surface, targeting a self-hosted Cohorly
|
|
4
|
+
server (`/track`, `/engage`, `/alias`).
|
|
5
|
+
|
|
6
|
+
Basic usage::
|
|
7
|
+
|
|
8
|
+
from cohorly import Cohorly
|
|
9
|
+
|
|
10
|
+
ch = Cohorly("YOUR_PROJECT_TOKEN", api_host="http://localhost:4000")
|
|
11
|
+
ch.track("user-1", "Signed Up", {"plan": "pro"})
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import datetime
|
|
17
|
+
import json
|
|
18
|
+
import random
|
|
19
|
+
import time as _time_module
|
|
20
|
+
import urllib.error
|
|
21
|
+
import urllib.request
|
|
22
|
+
import uuid
|
|
23
|
+
from typing import Any, Callable, Dict, List, Optional, Type
|
|
24
|
+
|
|
25
|
+
__version__ = "0.1.0"
|
|
26
|
+
|
|
27
|
+
__all__ = [
|
|
28
|
+
"Cohorly",
|
|
29
|
+
"Consumer",
|
|
30
|
+
"BufferedConsumer",
|
|
31
|
+
"CohorlyException",
|
|
32
|
+
"DatetimeSerializer",
|
|
33
|
+
"__version__",
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
DEFAULT_API_HOST = "http://localhost:4000"
|
|
37
|
+
|
|
38
|
+
# Retry contract shared by all Cohorly SDKs.
|
|
39
|
+
BASE_RETRY_DELAY_SECONDS = 2.0
|
|
40
|
+
MAX_RETRY_DELAY_SECONDS = 600.0
|
|
41
|
+
RETRY_JITTER = 0.2
|
|
42
|
+
MAX_QUEUE_SIZE = 1000
|
|
43
|
+
# Server rejects batches larger than 500 events.
|
|
44
|
+
MAX_BATCH_SIZE = 500
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
class CohorlyException(Exception):
|
|
48
|
+
"""Raised when a message cannot be delivered to the Cohorly server."""
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class DatetimeSerializer(json.JSONEncoder):
|
|
52
|
+
"""JSON encoder that serializes datetime/date values as ISO-8601 strings."""
|
|
53
|
+
|
|
54
|
+
def default(self, o: Any) -> Any:
|
|
55
|
+
if isinstance(o, (datetime.datetime, datetime.date)):
|
|
56
|
+
return o.isoformat()
|
|
57
|
+
return super().default(o)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def json_dumps(data: Any, cls: Optional[Type[json.JSONEncoder]] = None) -> str:
|
|
61
|
+
# Separators are specified to eliminate whitespace.
|
|
62
|
+
return json.dumps(data, separators=(",", ":"), cls=cls)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _now_ms() -> int:
|
|
66
|
+
return int(_time_module.time() * 1000)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class _Response:
|
|
70
|
+
"""Outcome of a single HTTP POST (no exception escapes the transport)."""
|
|
71
|
+
|
|
72
|
+
__slots__ = ("status", "body", "retry_after")
|
|
73
|
+
|
|
74
|
+
def __init__(
|
|
75
|
+
self,
|
|
76
|
+
status: Optional[int],
|
|
77
|
+
body: str = "",
|
|
78
|
+
retry_after: Optional[float] = None,
|
|
79
|
+
) -> None:
|
|
80
|
+
self.status = status # None means network error / timeout
|
|
81
|
+
self.body = body
|
|
82
|
+
self.retry_after = retry_after
|
|
83
|
+
|
|
84
|
+
@property
|
|
85
|
+
def ok(self) -> bool:
|
|
86
|
+
return self.status is not None and 200 <= self.status < 300
|
|
87
|
+
|
|
88
|
+
@property
|
|
89
|
+
def retryable(self) -> bool:
|
|
90
|
+
"""429, any 5xx, or a network error."""
|
|
91
|
+
return self.status is None or self.status == 429 or self.status >= 500
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _parse_retry_after(value: Optional[str]) -> Optional[float]:
|
|
95
|
+
if not value:
|
|
96
|
+
return None
|
|
97
|
+
try:
|
|
98
|
+
return max(float(value), 0.0)
|
|
99
|
+
except ValueError:
|
|
100
|
+
return None
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _backoff_delay(
|
|
104
|
+
consecutive_failures: int,
|
|
105
|
+
retry_after: Optional[float],
|
|
106
|
+
jitter_fn: Callable[[], float],
|
|
107
|
+
) -> float:
|
|
108
|
+
"""Delay before the next attempt, honoring Retry-After when present.
|
|
109
|
+
|
|
110
|
+
Exponential: base 2s, doubling per consecutive failure, capped at 10 min,
|
|
111
|
+
with +/-20% jitter. Retry-After (when given) wins, capped at 10 min.
|
|
112
|
+
"""
|
|
113
|
+
if retry_after is not None:
|
|
114
|
+
return min(retry_after, MAX_RETRY_DELAY_SECONDS)
|
|
115
|
+
exponent = max(consecutive_failures - 1, 0)
|
|
116
|
+
delay = min(BASE_RETRY_DELAY_SECONDS * (2**exponent), MAX_RETRY_DELAY_SECONDS)
|
|
117
|
+
return min(delay * (1.0 + jitter_fn()), MAX_RETRY_DELAY_SECONDS)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class Consumer:
|
|
121
|
+
"""Synchronous consumer: sends every message immediately.
|
|
122
|
+
|
|
123
|
+
On 429/5xx/network errors it retries inline with the Cohorly backoff
|
|
124
|
+
schedule (base 2s doubling, +/-20% jitter, Retry-After honored) up to
|
|
125
|
+
``retry_limit`` times, then raises :class:`CohorlyException`. Any other
|
|
126
|
+
HTTP error raises immediately.
|
|
127
|
+
"""
|
|
128
|
+
|
|
129
|
+
_ENDPOINTS = {
|
|
130
|
+
"events": "/track",
|
|
131
|
+
"people": "/engage",
|
|
132
|
+
"alias": "/alias",
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
def __init__(
|
|
136
|
+
self,
|
|
137
|
+
api_host: str = DEFAULT_API_HOST,
|
|
138
|
+
request_timeout: float = 10.0,
|
|
139
|
+
retry_limit: int = 4,
|
|
140
|
+
) -> None:
|
|
141
|
+
self._api_host = api_host.rstrip("/")
|
|
142
|
+
self._request_timeout = request_timeout
|
|
143
|
+
self._retry_limit = retry_limit
|
|
144
|
+
# Injection points for tests.
|
|
145
|
+
self._sleep: Callable[[float], None] = _time_module.sleep
|
|
146
|
+
self._jitter: Callable[[], float] = lambda: random.uniform(
|
|
147
|
+
-RETRY_JITTER, RETRY_JITTER
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
def _request_url(self, endpoint: str) -> str:
|
|
151
|
+
return self._api_host + self._ENDPOINTS[endpoint]
|
|
152
|
+
|
|
153
|
+
def _check_endpoint(self, endpoint: str) -> None:
|
|
154
|
+
if endpoint not in self._ENDPOINTS:
|
|
155
|
+
raise CohorlyException(
|
|
156
|
+
"unknown endpoint {!r} (valid: {})".format(
|
|
157
|
+
endpoint, ", ".join(sorted(self._ENDPOINTS))
|
|
158
|
+
)
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
def send(self, endpoint: str, json_message: str) -> None:
|
|
162
|
+
"""Immediately deliver one JSON message to the given endpoint."""
|
|
163
|
+
self._check_endpoint(endpoint)
|
|
164
|
+
url = self._request_url(endpoint)
|
|
165
|
+
failures = 0
|
|
166
|
+
while True:
|
|
167
|
+
response = self._post(url, json_message)
|
|
168
|
+
if response.ok:
|
|
169
|
+
return
|
|
170
|
+
if response.retryable and failures < self._retry_limit:
|
|
171
|
+
failures += 1
|
|
172
|
+
self._sleep(_backoff_delay(failures, response.retry_after, self._jitter))
|
|
173
|
+
continue
|
|
174
|
+
raise CohorlyException(
|
|
175
|
+
"POST {} failed (status={}): {}".format(
|
|
176
|
+
url, response.status, response.body
|
|
177
|
+
)
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
def flush(self) -> bool:
|
|
181
|
+
"""No-op for the synchronous consumer (messages are never buffered)."""
|
|
182
|
+
return True
|
|
183
|
+
|
|
184
|
+
def _post(self, url: str, payload: str) -> _Response:
|
|
185
|
+
data = payload.encode("utf-8")
|
|
186
|
+
request = urllib.request.Request(
|
|
187
|
+
url,
|
|
188
|
+
data=data,
|
|
189
|
+
headers={"Content-Type": "application/json"},
|
|
190
|
+
method="POST",
|
|
191
|
+
)
|
|
192
|
+
try:
|
|
193
|
+
with urllib.request.urlopen(request, timeout=self._request_timeout) as raw:
|
|
194
|
+
return _Response(raw.status, raw.read().decode("utf-8", "replace"))
|
|
195
|
+
except urllib.error.HTTPError as exc:
|
|
196
|
+
try:
|
|
197
|
+
body = exc.read().decode("utf-8", "replace")
|
|
198
|
+
except Exception:
|
|
199
|
+
body = ""
|
|
200
|
+
return _Response(
|
|
201
|
+
exc.code, body, _parse_retry_after(exc.headers.get("Retry-After"))
|
|
202
|
+
)
|
|
203
|
+
except (urllib.error.URLError, OSError, ValueError) as exc:
|
|
204
|
+
return _Response(None, str(exc))
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
class BufferedConsumer(Consumer):
|
|
208
|
+
"""Batching consumer with the full Cohorly retry contract.
|
|
209
|
+
|
|
210
|
+
Messages accumulate in per-endpoint buffers and are sent in batches of up
|
|
211
|
+
to ``max_size`` (default 50, hard cap 500 - the server batch limit). A
|
|
212
|
+
buffer auto-flushes when it reaches ``max_size``; call :meth:`flush` to
|
|
213
|
+
drain everything (e.g. before process exit).
|
|
214
|
+
|
|
215
|
+
Retry contract:
|
|
216
|
+
|
|
217
|
+
- 429 / 5xx / network error: queue kept; exponential backoff (base 2s,
|
|
218
|
+
doubling, cap 10 min, +/-20% jitter), Retry-After honored when present.
|
|
219
|
+
Flush attempts before the backoff deadline are no-ops.
|
|
220
|
+
- 413: flush batch size halved (floor 1), queue kept, retried immediately.
|
|
221
|
+
A single message over the server body cap is dropped (raises).
|
|
222
|
+
- 400: that batch is dropped permanently (raises CohorlyException).
|
|
223
|
+
- 401: queue kept, backoff forced to the maximum delay.
|
|
224
|
+
- Buffers capped at ``max_queue_size`` (default 1000) messages each; the
|
|
225
|
+
OLDEST message is dropped on overflow.
|
|
226
|
+
"""
|
|
227
|
+
|
|
228
|
+
def __init__(
|
|
229
|
+
self,
|
|
230
|
+
max_size: int = 50,
|
|
231
|
+
api_host: str = DEFAULT_API_HOST,
|
|
232
|
+
request_timeout: float = 10.0,
|
|
233
|
+
max_queue_size: int = MAX_QUEUE_SIZE,
|
|
234
|
+
) -> None:
|
|
235
|
+
super().__init__(
|
|
236
|
+
api_host=api_host, request_timeout=request_timeout, retry_limit=0
|
|
237
|
+
)
|
|
238
|
+
self._max_size = min(max(max_size, 1), MAX_BATCH_SIZE)
|
|
239
|
+
self._max_queue_size = max(max_queue_size, 1)
|
|
240
|
+
self._buffers: Dict[str, List[str]] = {name: [] for name in self._ENDPOINTS}
|
|
241
|
+
self._flush_batch_size = self._max_size
|
|
242
|
+
self._consecutive_failures = 0
|
|
243
|
+
self._next_attempt_at = 0.0
|
|
244
|
+
# Injection point for tests.
|
|
245
|
+
self._clock: Callable[[], float] = _time_module.time
|
|
246
|
+
|
|
247
|
+
def send(self, endpoint: str, json_message: str) -> None:
|
|
248
|
+
"""Buffer one JSON message; auto-flushes when the buffer is full."""
|
|
249
|
+
self._check_endpoint(endpoint)
|
|
250
|
+
buf = self._buffers[endpoint]
|
|
251
|
+
buf.append(json_message)
|
|
252
|
+
if len(buf) > self._max_queue_size:
|
|
253
|
+
del buf[0 : len(buf) - self._max_queue_size] # drop oldest
|
|
254
|
+
if len(buf) >= self._max_size:
|
|
255
|
+
self._flush_endpoint(endpoint)
|
|
256
|
+
|
|
257
|
+
def flush(self) -> bool:
|
|
258
|
+
"""Attempt to drain all buffers.
|
|
259
|
+
|
|
260
|
+
Returns True when every buffer is empty afterwards; False when
|
|
261
|
+
messages remain queued (backoff window active or delivery failing).
|
|
262
|
+
Raises CohorlyException when the server permanently rejects a batch
|
|
263
|
+
(the rejected batch is dropped, the rest of the queue is kept).
|
|
264
|
+
"""
|
|
265
|
+
drained = True
|
|
266
|
+
for endpoint in self._ENDPOINTS:
|
|
267
|
+
drained = self._flush_endpoint(endpoint) and drained
|
|
268
|
+
return drained
|
|
269
|
+
|
|
270
|
+
# -- internals ---------------------------------------------------------
|
|
271
|
+
|
|
272
|
+
def _flush_endpoint(self, endpoint: str) -> bool:
|
|
273
|
+
buf = self._buffers[endpoint]
|
|
274
|
+
url = self._request_url(endpoint)
|
|
275
|
+
while buf:
|
|
276
|
+
if self._clock() < self._next_attempt_at:
|
|
277
|
+
return False # inside a backoff window; keep the queue
|
|
278
|
+
if endpoint == "alias":
|
|
279
|
+
batch_len = 1 # /alias takes a single object, not an array
|
|
280
|
+
payload = buf[0]
|
|
281
|
+
else:
|
|
282
|
+
batch_len = min(len(buf), max(self._flush_batch_size, 1))
|
|
283
|
+
payload = "[" + ",".join(buf[:batch_len]) + "]"
|
|
284
|
+
response = self._post(url, payload)
|
|
285
|
+
if response.ok:
|
|
286
|
+
del buf[:batch_len]
|
|
287
|
+
self._consecutive_failures = 0
|
|
288
|
+
self._next_attempt_at = 0.0
|
|
289
|
+
continue
|
|
290
|
+
if response.retryable:
|
|
291
|
+
self._register_failure(retry_after=response.retry_after)
|
|
292
|
+
return False
|
|
293
|
+
if response.status == 401:
|
|
294
|
+
# Invalid token: keep the queue, back off at the max delay.
|
|
295
|
+
self._register_failure(delay=MAX_RETRY_DELAY_SECONDS)
|
|
296
|
+
return False
|
|
297
|
+
if response.status == 413:
|
|
298
|
+
if batch_len > 1:
|
|
299
|
+
# Halve the batch size (floor 1) and retry immediately.
|
|
300
|
+
self._flush_batch_size = max(batch_len // 2, 1)
|
|
301
|
+
continue
|
|
302
|
+
# A single message is over the server body cap; it can never
|
|
303
|
+
# succeed, so drop it rather than wedging the queue.
|
|
304
|
+
del buf[:1]
|
|
305
|
+
raise CohorlyException(
|
|
306
|
+
"message exceeds the server payload limit and was dropped"
|
|
307
|
+
)
|
|
308
|
+
# 400 (or another non-retryable client error): drop the batch.
|
|
309
|
+
del buf[:batch_len]
|
|
310
|
+
raise CohorlyException(
|
|
311
|
+
"POST {} rejected batch (status={}): {}".format(
|
|
312
|
+
url, response.status, response.body
|
|
313
|
+
)
|
|
314
|
+
)
|
|
315
|
+
return True
|
|
316
|
+
|
|
317
|
+
def _register_failure(
|
|
318
|
+
self,
|
|
319
|
+
retry_after: Optional[float] = None,
|
|
320
|
+
delay: Optional[float] = None,
|
|
321
|
+
) -> None:
|
|
322
|
+
self._consecutive_failures += 1
|
|
323
|
+
if delay is None:
|
|
324
|
+
delay = _backoff_delay(
|
|
325
|
+
self._consecutive_failures, retry_after, self._jitter
|
|
326
|
+
)
|
|
327
|
+
self._next_attempt_at = self._clock() + delay
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
class Cohorly:
|
|
331
|
+
"""Cohorly tracking client (mirrors the ``Mixpanel`` class).
|
|
332
|
+
|
|
333
|
+
:param token: project token (from the Cohorly dashboard).
|
|
334
|
+
:param api_host: base URL of the Cohorly server.
|
|
335
|
+
:param consumer: message consumer; defaults to a synchronous
|
|
336
|
+
:class:`Consumer` pointed at ``api_host``. Pass a
|
|
337
|
+
:class:`BufferedConsumer` for batching.
|
|
338
|
+
:param serializer: JSON encoder class used to serialize messages.
|
|
339
|
+
"""
|
|
340
|
+
|
|
341
|
+
def __init__(
|
|
342
|
+
self,
|
|
343
|
+
token: str,
|
|
344
|
+
api_host: str = DEFAULT_API_HOST,
|
|
345
|
+
consumer: Optional[Consumer] = None,
|
|
346
|
+
serializer: Type[json.JSONEncoder] = DatetimeSerializer,
|
|
347
|
+
) -> None:
|
|
348
|
+
self._token = token
|
|
349
|
+
self._consumer = consumer if consumer is not None else Consumer(api_host)
|
|
350
|
+
self._serializer = serializer
|
|
351
|
+
|
|
352
|
+
# -- events ------------------------------------------------------------
|
|
353
|
+
|
|
354
|
+
def track(
|
|
355
|
+
self,
|
|
356
|
+
distinct_id: str,
|
|
357
|
+
event_name: str,
|
|
358
|
+
properties: Optional[Dict[str, Any]] = None,
|
|
359
|
+
meta: Optional[Dict[str, Any]] = None,
|
|
360
|
+
) -> None:
|
|
361
|
+
"""Record an event.
|
|
362
|
+
|
|
363
|
+
Default properties (``token``, ``distinct_id``, ``time`` in unix ms,
|
|
364
|
+
``$insert_id``, ``$lib``, ``$lib_version``) are stamped first; user
|
|
365
|
+
``properties`` merge over them, so a custom ``time`` or ``$insert_id``
|
|
366
|
+
(for deduplication) may be supplied.
|
|
367
|
+
"""
|
|
368
|
+
all_properties: Dict[str, Any] = {
|
|
369
|
+
"token": self._token,
|
|
370
|
+
"distinct_id": distinct_id,
|
|
371
|
+
"time": _now_ms(),
|
|
372
|
+
"$insert_id": uuid.uuid4().hex,
|
|
373
|
+
"$lib": "python",
|
|
374
|
+
"$lib_version": __version__,
|
|
375
|
+
}
|
|
376
|
+
if properties:
|
|
377
|
+
all_properties.update(properties)
|
|
378
|
+
event: Dict[str, Any] = {"event": event_name, "properties": all_properties}
|
|
379
|
+
if meta:
|
|
380
|
+
event.update(meta)
|
|
381
|
+
self._consumer.send("events", json_dumps(event, cls=self._serializer))
|
|
382
|
+
|
|
383
|
+
def import_data(
|
|
384
|
+
self,
|
|
385
|
+
distinct_id: str,
|
|
386
|
+
event_name: str,
|
|
387
|
+
timestamp: int,
|
|
388
|
+
properties: Optional[Dict[str, Any]] = None,
|
|
389
|
+
meta: Optional[Dict[str, Any]] = None,
|
|
390
|
+
) -> None:
|
|
391
|
+
"""Record a historical event with an explicit timestamp.
|
|
392
|
+
|
|
393
|
+
``timestamp`` is unix **milliseconds** (Cohorly convention - note this
|
|
394
|
+
differs from mixpanel-python's seconds). Uses the same ``/track``
|
|
395
|
+
endpoint; Cohorly has no separate import endpoint or api_key.
|
|
396
|
+
"""
|
|
397
|
+
props = dict(properties) if properties else {}
|
|
398
|
+
props["time"] = int(timestamp)
|
|
399
|
+
self.track(distinct_id, event_name, props, meta)
|
|
400
|
+
|
|
401
|
+
def alias(self, alias_id: str, original: str) -> None:
|
|
402
|
+
"""Link ``alias_id`` to the ``original`` distinct_id."""
|
|
403
|
+
record = {
|
|
404
|
+
"alias": alias_id,
|
|
405
|
+
"distinct_id": original,
|
|
406
|
+
"token": self._token,
|
|
407
|
+
}
|
|
408
|
+
self._consumer.send("alias", json_dumps(record, cls=self._serializer))
|
|
409
|
+
|
|
410
|
+
# -- people ------------------------------------------------------------
|
|
411
|
+
|
|
412
|
+
def people_set(
|
|
413
|
+
self,
|
|
414
|
+
distinct_id: str,
|
|
415
|
+
properties: Dict[str, Any],
|
|
416
|
+
meta: Optional[Dict[str, Any]] = None,
|
|
417
|
+
) -> None:
|
|
418
|
+
"""Set profile properties (overwrites existing values)."""
|
|
419
|
+
self.people_update({"distinct_id": distinct_id, "$set": properties}, meta)
|
|
420
|
+
|
|
421
|
+
def people_set_once(
|
|
422
|
+
self,
|
|
423
|
+
distinct_id: str,
|
|
424
|
+
properties: Dict[str, Any],
|
|
425
|
+
meta: Optional[Dict[str, Any]] = None,
|
|
426
|
+
) -> None:
|
|
427
|
+
"""Set profile properties only if they are not already set."""
|
|
428
|
+
self.people_update({"distinct_id": distinct_id, "$set_once": properties}, meta)
|
|
429
|
+
|
|
430
|
+
def people_increment(
|
|
431
|
+
self,
|
|
432
|
+
distinct_id: str,
|
|
433
|
+
properties: Dict[str, float],
|
|
434
|
+
meta: Optional[Dict[str, Any]] = None,
|
|
435
|
+
) -> None:
|
|
436
|
+
"""Increment numeric profile properties by the given amounts."""
|
|
437
|
+
self.people_update({"distinct_id": distinct_id, "$add": properties}, meta)
|
|
438
|
+
|
|
439
|
+
def people_unset(
|
|
440
|
+
self,
|
|
441
|
+
distinct_id: str,
|
|
442
|
+
properties: List[str],
|
|
443
|
+
meta: Optional[Dict[str, Any]] = None,
|
|
444
|
+
) -> None:
|
|
445
|
+
"""Remove the named properties from a profile."""
|
|
446
|
+
self.people_update({"distinct_id": distinct_id, "$unset": properties}, meta)
|
|
447
|
+
|
|
448
|
+
def people_delete(
|
|
449
|
+
self, distinct_id: str, meta: Optional[Dict[str, Any]] = None
|
|
450
|
+
) -> None:
|
|
451
|
+
"""Delete a profile entirely."""
|
|
452
|
+
self.people_update({"distinct_id": distinct_id, "$delete": True}, meta)
|
|
453
|
+
|
|
454
|
+
def people_update(
|
|
455
|
+
self, message: Dict[str, Any], meta: Optional[Dict[str, Any]] = None
|
|
456
|
+
) -> None:
|
|
457
|
+
"""Send a raw engage operation (must include ``distinct_id``)."""
|
|
458
|
+
record: Dict[str, Any] = {"token": self._token}
|
|
459
|
+
record.update(message)
|
|
460
|
+
if meta:
|
|
461
|
+
record.update(meta)
|
|
462
|
+
self._consumer.send("people", json_dumps(record, cls=self._serializer))
|
cohorly/py.typed
ADDED
|
File without changes
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: cohorly
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Official Cohorly server-side Python SDK (Mixpanel-style product analytics, self-hosted)
|
|
5
|
+
Author: Cohorly
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Keywords: analytics,cohorly,mixpanel,tracking,events
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
11
|
+
Classifier: Operating System :: OS Independent
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
14
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
15
|
+
Classifier: Typing :: Typed
|
|
16
|
+
Requires-Python: >=3.8
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
Provides-Extra: dev
|
|
19
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
20
|
+
|
|
21
|
+
# Cohorly Python SDK
|
|
22
|
+
|
|
23
|
+
The official server-side Python SDK for [Cohorly](../../README.md), a
|
|
24
|
+
self-hosted Mixpanel-style product analytics platform. The API mirrors
|
|
25
|
+
`mixpanel-python`, so migrating existing code is mostly a matter of swapping
|
|
26
|
+
the import and pointing at your Cohorly server.
|
|
27
|
+
|
|
28
|
+
- Zero runtime dependencies (stdlib `urllib` only)
|
|
29
|
+
- Python 3.8+, fully typed (`py.typed`)
|
|
30
|
+
- Synchronous `Consumer` and batching `BufferedConsumer` with the Cohorly
|
|
31
|
+
retry contract (exponential backoff, `Retry-After`, bounded queue)
|
|
32
|
+
|
|
33
|
+
## Installation
|
|
34
|
+
|
|
35
|
+
```bash
|
|
36
|
+
pip install cohorly
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
Or from this repo:
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
pip install ./sdks/python
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Quickstart
|
|
46
|
+
|
|
47
|
+
```python
|
|
48
|
+
from cohorly import Cohorly
|
|
49
|
+
|
|
50
|
+
ch = Cohorly("YOUR_PROJECT_TOKEN", api_host="http://localhost:4000")
|
|
51
|
+
|
|
52
|
+
# Track an event
|
|
53
|
+
ch.track("user-1", "Signed Up", {"plan": "pro", "source": "landing"})
|
|
54
|
+
|
|
55
|
+
# Link an alias to an existing distinct_id
|
|
56
|
+
ch.alias("user-1", "anon-7f3a")
|
|
57
|
+
|
|
58
|
+
# Update a user profile
|
|
59
|
+
ch.people_set("user-1", {"$first_name": "Ada", "plan": "pro"})
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
The project token comes from your Cohorly dashboard (Settings -> Projects).
|
|
63
|
+
`api_host` is the base URL of your Cohorly server (default
|
|
64
|
+
`http://localhost:4000`).
|
|
65
|
+
|
|
66
|
+
## Tracking events
|
|
67
|
+
|
|
68
|
+
`track(distinct_id, event_name, properties=None, meta=None)` stamps these
|
|
69
|
+
default properties before sending:
|
|
70
|
+
|
|
71
|
+
| Property | Value |
|
|
72
|
+
| -------------- | --------------------------------------- |
|
|
73
|
+
| `distinct_id` | the id you pass |
|
|
74
|
+
| `time` | current unix time in **milliseconds** |
|
|
75
|
+
| `$insert_id` | random uuid4 hex (server-side dedup) |
|
|
76
|
+
| `$lib` | `"python"` |
|
|
77
|
+
| `$lib_version` | SDK version |
|
|
78
|
+
| `token` | your project token (stripped by server) |
|
|
79
|
+
|
|
80
|
+
Your `properties` merge over the defaults, so you may supply a custom `time`
|
|
81
|
+
or `$insert_id` (e.g. for idempotent re-sends):
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
ch.track("user-1", "Order Completed", {
|
|
85
|
+
"amount": 42.5,
|
|
86
|
+
"$insert_id": f"order-{order.id}", # dedup key
|
|
87
|
+
})
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### Historical imports
|
|
91
|
+
|
|
92
|
+
Use `import_data` to record events with an explicit timestamp (unix
|
|
93
|
+
milliseconds - Cohorly's convention throughout):
|
|
94
|
+
|
|
95
|
+
```python
|
|
96
|
+
ch.import_data("user-1", "Legacy Signup", 1600000000000, {"source": "csv"})
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
Unlike Mixpanel there is no separate import endpoint, API secret, or 5-day
|
|
100
|
+
cutoff - it is the same `/track` pipeline.
|
|
101
|
+
|
|
102
|
+
## User profiles (people)
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
ch.people_set("user-1", {"plan": "pro"}) # set/overwrite
|
|
106
|
+
ch.people_set_once("user-1", {"created": "..."}) # only if unset
|
|
107
|
+
ch.people_increment("user-1", {"logins": 1}) # numeric add
|
|
108
|
+
ch.people_unset("user-1", ["plan"]) # remove properties
|
|
109
|
+
ch.people_delete("user-1") # delete the profile
|
|
110
|
+
ch.people_update({"distinct_id": "user-1", "$set": {"x": 1}}) # raw op
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
These map to the Cohorly `/engage` operations `$set`, `$set_once`, `$add`,
|
|
114
|
+
`$unset`, `$delete`.
|
|
115
|
+
|
|
116
|
+
## Consumers
|
|
117
|
+
|
|
118
|
+
By default every call sends immediately via a synchronous `Consumer`. For
|
|
119
|
+
higher throughput use `BufferedConsumer`, which batches messages (default 50
|
|
120
|
+
per request, server max 500) and implements the Cohorly retry contract:
|
|
121
|
+
|
|
122
|
+
```python
|
|
123
|
+
from cohorly import Cohorly, BufferedConsumer
|
|
124
|
+
|
|
125
|
+
consumer = BufferedConsumer(max_size=50, api_host="http://localhost:4000")
|
|
126
|
+
ch = Cohorly("YOUR_PROJECT_TOKEN", consumer=consumer)
|
|
127
|
+
|
|
128
|
+
for user in users:
|
|
129
|
+
ch.track(user.id, "Backfill Event", {"batch": True})
|
|
130
|
+
|
|
131
|
+
consumer.flush() # IMPORTANT: drain remaining messages before exit
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
Retry behavior (BufferedConsumer):
|
|
135
|
+
|
|
136
|
+
- **429 / 5xx / network error** - the queue is kept and retried with
|
|
137
|
+
exponential backoff: base 2s, doubling per consecutive failure, capped at
|
|
138
|
+
10 minutes, +/-20% jitter. A `Retry-After` header is honored when present.
|
|
139
|
+
- **413** - the flush batch size is halved (floor 1) and retried.
|
|
140
|
+
- **400** - the rejected batch is dropped and `CohorlyException` is raised.
|
|
141
|
+
- **401** (invalid token) - the queue is kept; backoff at the maximum delay.
|
|
142
|
+
- The in-memory queue is capped at 1000 messages per endpoint; the oldest
|
|
143
|
+
message is dropped on overflow.
|
|
144
|
+
|
|
145
|
+
Cohorly batch rejections are atomic (nothing partially inserted), so retrying
|
|
146
|
+
the same payload is always safe.
|
|
147
|
+
|
|
148
|
+
The synchronous `Consumer(api_host, request_timeout=10, retry_limit=4)`
|
|
149
|
+
retries 429/5xx/network errors inline with the same backoff schedule up to
|
|
150
|
+
`retry_limit` times, then raises `CohorlyException`.
|
|
151
|
+
|
|
152
|
+
Because the token travels with each message, several `Cohorly` instances with
|
|
153
|
+
different project tokens can share one consumer.
|
|
154
|
+
|
|
155
|
+
## Error handling
|
|
156
|
+
|
|
157
|
+
Delivery failures raise `cohorly.CohorlyException`:
|
|
158
|
+
|
|
159
|
+
```python
|
|
160
|
+
from cohorly import Cohorly, CohorlyException
|
|
161
|
+
|
|
162
|
+
try:
|
|
163
|
+
ch.track("user-1", "event")
|
|
164
|
+
except CohorlyException as exc:
|
|
165
|
+
log.warning("cohorly delivery failed: %s", exc)
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
## Serialization
|
|
169
|
+
|
|
170
|
+
Messages are JSON. `datetime`/`date` values are serialized to ISO-8601 by the
|
|
171
|
+
default `DatetimeSerializer`; pass your own `json.JSONEncoder` subclass via
|
|
172
|
+
`Cohorly(..., serializer=MyEncoder)` for custom types.
|
|
173
|
+
|
|
174
|
+
## Development
|
|
175
|
+
|
|
176
|
+
```bash
|
|
177
|
+
cd sdks/python
|
|
178
|
+
python3 -m venv .venv
|
|
179
|
+
.venv/bin/pip install pytest
|
|
180
|
+
.venv/bin/pytest
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
Tests run against the source tree (no install needed) and use a mocked
|
|
184
|
+
transport - no network required.
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
cohorly/__init__.py,sha256=7n0CshKraW3uetT0q4Dk96gtSo3skzR5sQYNR7z-aGA,16527
|
|
2
|
+
cohorly/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
+
cohorly-0.1.0.dist-info/METADATA,sha256=PxNbH0LLOihrEqCtLcTMpt3ZoEV31fvCX9kCmAZ27FE,6015
|
|
4
|
+
cohorly-0.1.0.dist-info/WHEEL,sha256=K260EYznzXsJYBQGqmI8VTxEdiZYNvDZwW9cBh9-_MA,91
|
|
5
|
+
cohorly-0.1.0.dist-info/top_level.txt,sha256=tcBbDP05wFRN4DPIvKLX-pdAW-3HJ_9HINVujBruodE,8
|
|
6
|
+
cohorly-0.1.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
cohorly
|