probalytics 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,31 @@
1
+ from probalytics._frames import FrameKind
2
+ from probalytics.client import ProbalyticsClient
3
+ from probalytics.clickhouse import ClickHouseClient
4
+ from probalytics.models import (
5
+ Fill,
6
+ Market,
7
+ MarketStatus,
8
+ MarketType,
9
+ OrderSide,
10
+ OrderbookSnapshot,
11
+ Outcome,
12
+ Platform,
13
+ Resolution,
14
+ ResolutionType,
15
+ )
16
+
17
+ __all__ = [
18
+ "Fill",
19
+ "FrameKind",
20
+ "Market",
21
+ "MarketStatus",
22
+ "MarketType",
23
+ "OrderSide",
24
+ "OrderbookSnapshot",
25
+ "Outcome",
26
+ "ClickHouseClient",
27
+ "Platform",
28
+ "ProbalyticsClient",
29
+ "Resolution",
30
+ "ResolutionType",
31
+ ]
@@ -0,0 +1,27 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Sequence
4
+ from uuid import UUID
5
+
6
+ from probalytics.models import Market
7
+
8
+ StringFilter = str | Sequence[str] | None
9
+ IDFilter = str | UUID | Sequence[str | UUID] | None
10
+
11
+
12
+ def market_filter(
13
+ market: Market | str | UUID | None,
14
+ market_id: IDFilter,
15
+ market_platform_id: StringFilter,
16
+ platform: StringFilter,
17
+ ) -> tuple[IDFilter, StringFilter, StringFilter]:
18
+ if isinstance(market, Market):
19
+ return market.id, market.platform_id, market.platform
20
+ if isinstance(market, UUID):
21
+ return market, market_platform_id, platform
22
+ if isinstance(market, str):
23
+ try:
24
+ return UUID(market), market_platform_id, platform
25
+ except ValueError:
26
+ return market_id, market, platform
27
+ return market_id, market_platform_id, platform
probalytics/_frames.py ADDED
@@ -0,0 +1,62 @@
1
+ from __future__ import annotations
2
+
3
+ import sys
4
+ from datetime import datetime, timezone
5
+ from typing import Any, Literal, cast
6
+
7
+ FrameKind = Literal["polars", "pandas"]
8
+ VALID_FRAMES = ("polars", "pandas")
9
+
10
+
11
+ def validate_frame(frame: str) -> FrameKind:
12
+ if frame not in VALID_FRAMES:
13
+ raise ValueError("frame must be 'polars' or 'pandas'")
14
+ return cast(FrameKind, frame)
15
+
16
+
17
+ def normalize_time(value: datetime | str | None) -> datetime | None:
18
+ if value is None:
19
+ return None
20
+ if isinstance(value, str):
21
+ original = value
22
+ value = value.strip()
23
+ try:
24
+ value = datetime.fromisoformat(value.replace("Z", "+00:00").replace("z", "+00:00"))
25
+ except ValueError as error:
26
+ raise ValueError(f"invalid datetime string: {original!r}") from error
27
+ if value.tzinfo is None:
28
+ value = value.replace(tzinfo=timezone.utc)
29
+ return value.astimezone(timezone.utc)
30
+
31
+
32
+ def dataframe_to_frame(df: Any, frame: FrameKind = "polars") -> Any:
33
+ frame = validate_frame(frame)
34
+ if frame == "polars":
35
+ import polars as pl
36
+
37
+ if isinstance(df, pl.DataFrame):
38
+ return df
39
+ pandas_dataframe = _loaded_class("pandas", "DataFrame")
40
+ if pandas_dataframe is not None and isinstance(df, pandas_dataframe):
41
+ return pl.from_pandas(df)
42
+ return pl.DataFrame(df)
43
+ if frame == "pandas":
44
+ try:
45
+ import pandas as pd
46
+ except ImportError as error:
47
+ raise ImportError("pandas support requires installing probalytics[pandas]") from error
48
+
49
+ import polars as pl
50
+
51
+ if isinstance(df, pd.DataFrame):
52
+ return df
53
+ if isinstance(df, pl.DataFrame):
54
+ return df.to_pandas()
55
+ return pd.DataFrame(df)
56
+ raise ValueError("frame must be 'polars' or 'pandas'")
57
+
58
+
59
+ def _loaded_class(module_name: str, class_name: str) -> type[Any] | None:
60
+ module = sys.modules.get(module_name)
61
+ value = getattr(module, class_name, None)
62
+ return value if isinstance(value, type) else None
@@ -0,0 +1,441 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Sequence
4
+ from datetime import datetime
5
+ from enum import Enum
6
+ from typing import Any
7
+ from uuid import UUID
8
+
9
+ from probalytics._frames import FrameKind, dataframe_to_frame, normalize_time, validate_frame
10
+ from probalytics._filters import IDFilter, StringFilter, market_filter
11
+ from probalytics.models import Fill, Market
12
+
13
+ OUTCOME_FRAME_EXPR = (
14
+ "CAST((toString(outcome.id), outcome.platform_id, outcome.name, outcome.index), "
15
+ "'Tuple(id String, platform_id String, name String, index UInt8)')"
16
+ )
17
+ OUTCOMES_FRAME_EXPR = (
18
+ "arrayMap(outcome -> "
19
+ "CAST((toString(outcome.id), outcome.platform_id, outcome.name, outcome.index), "
20
+ "'Tuple(id String, platform_id String, name String, index UInt8)'), outcomes)"
21
+ )
22
+ RESOLUTION_PAYOUTS_FRAME_EXPR = (
23
+ "arrayMap(payout -> "
24
+ "CAST((toString(payout.outcome_id), payout.payout), "
25
+ "'Tuple(outcome_id String, payout Decimal128(18))'), resolution_outcome_payouts)"
26
+ )
27
+
28
+
29
+ class ClickHouseClient:
30
+ def __init__(
31
+ self,
32
+ *,
33
+ host: str,
34
+ username: str,
35
+ password: str,
36
+ database: str = "probalytics",
37
+ secure: bool = True,
38
+ port: int | None = None,
39
+ compression: bool | str = "lz4",
40
+ tcp_keepalive: bool | tuple[int, int, int] = True,
41
+ **kwargs: Any,
42
+ ) -> None:
43
+ from clickhouse_driver import Client
44
+
45
+ settings = kwargs.pop("settings", {}).copy()
46
+ settings.setdefault("allow_experimental_object_type", 1)
47
+ settings.setdefault("namedtuple_as_json", True)
48
+ client_kwargs = {
49
+ "host": host,
50
+ "user": username,
51
+ "password": password,
52
+ "database": database,
53
+ "secure": secure,
54
+ "compression": compression,
55
+ "tcp_keepalive": tcp_keepalive,
56
+ "settings": settings,
57
+ **kwargs,
58
+ }
59
+ if port is not None:
60
+ client_kwargs["port"] = port
61
+ self.client = Client(**client_kwargs)
62
+
63
+ def close(self) -> None:
64
+ self.client.disconnect()
65
+
66
+ def __enter__(self) -> "ClickHouseClient":
67
+ return self
68
+
69
+ def __exit__(self, exc_type: Any, exc: Any, traceback: Any) -> None:
70
+ self.close()
71
+
72
+ def query(
73
+ self,
74
+ sql: str,
75
+ *,
76
+ parameters: dict[str, Any] | None = None,
77
+ frame: FrameKind = "polars",
78
+ ) -> Any:
79
+ return self._query_frame(
80
+ sql,
81
+ parameters or {},
82
+ validate_frame(frame),
83
+ )
84
+
85
+ def markets(
86
+ self,
87
+ *,
88
+ start_time: datetime | str | None = None,
89
+ end_time: datetime | str | None = None,
90
+ status: StringFilter = None,
91
+ platform: StringFilter = None,
92
+ market_id: IDFilter = None,
93
+ market_platform_id: StringFilter = None,
94
+ limit: int = 1000,
95
+ max_rows: int | None = None,
96
+ ) -> list[Market]:
97
+ limit = _limit(limit, max_rows)
98
+ where, params = _where(
99
+ [
100
+ ("created_at >= %(start_time)s", "start_time", normalize_time(start_time)),
101
+ ("created_at <= %(end_time)s", "end_time", normalize_time(end_time)),
102
+ _filter("status", "status", status),
103
+ _filter("platform", "platform", platform),
104
+ _filter("id", "market_id", market_id),
105
+ _filter("platform_id", "market_platform_id", market_platform_id),
106
+ ]
107
+ )
108
+ params["limit"] = limit
109
+ query = f"""
110
+ SELECT *
111
+ FROM markets FINAL
112
+ {where}
113
+ ORDER BY created_at DESC, id DESC
114
+ LIMIT %(limit)s
115
+ """
116
+ return [Market.model_validate(_market_row(row)) for row in self._json_rows(query, params)]
117
+
118
+ def markets_frame(self, *, frame: FrameKind = "polars", **filters: Any) -> Any:
119
+ return self._markets_arrow(frame=frame, **filters)
120
+
121
+ def fills(
122
+ self,
123
+ *,
124
+ start_time: datetime | str | None = None,
125
+ end_time: datetime | str | None = None,
126
+ platform: StringFilter = None,
127
+ market: Market | str | UUID | None = None,
128
+ market_id: IDFilter = None,
129
+ market_platform_id: StringFilter = None,
130
+ taker_side: StringFilter = None,
131
+ trader_id: StringFilter = None,
132
+ limit: int = 1000,
133
+ max_rows: int | None = None,
134
+ ) -> list[Fill]:
135
+ limit = _limit(limit, max_rows)
136
+ market_id, market_platform_id, platform = market_filter(market, market_id, market_platform_id, platform)
137
+ where, params = _where(
138
+ [
139
+ ("timestamp >= %(start_time)s", "start_time", normalize_time(start_time)),
140
+ ("timestamp <= %(end_time)s", "end_time", normalize_time(end_time)),
141
+ _filter("platform", "platform", platform),
142
+ _filter("market_id", "market_id", market_id),
143
+ _filter("market_platform_id", "market_platform_id", market_platform_id),
144
+ _filter("taker_side", "taker_side", taker_side),
145
+ _filter(
146
+ "trader_id",
147
+ "trader_id",
148
+ trader_id,
149
+ eq_sql="(taker_id = %(trader_id)s OR maker_id = %(trader_id)s)",
150
+ in_sql="(taker_id IN %(trader_id)s OR maker_id IN %(trader_id)s)",
151
+ ),
152
+ ]
153
+ )
154
+ params["limit"] = limit
155
+ query = f"""
156
+ SELECT
157
+ id, market_id, market_platform_id, platform, platform_id, outcome,
158
+ size, price, normalized_price, taker_side, taker_cash_flow,
159
+ maker_cash_flow, taker_id, maker_id, fee, timestamp
160
+ FROM fills
161
+ {where}
162
+ ORDER BY timestamp ASC, id ASC
163
+ LIMIT %(limit)s
164
+ """
165
+ return [Fill.model_validate(_fill_row(row)) for row in self._json_rows(query, params)]
166
+
167
+ def fills_frame(self, *, frame: FrameKind = "polars", **filters: Any) -> Any:
168
+ return self._fills_arrow(frame=frame, **filters)
169
+
170
+ def orderbook_snapshots(
171
+ self,
172
+ *,
173
+ start_time: datetime | str | None = None,
174
+ end_time: datetime | str | None = None,
175
+ platform: StringFilter = None,
176
+ market: Market | str | UUID | None = None,
177
+ market_id: IDFilter = None,
178
+ market_platform_id: StringFilter = None,
179
+ limit: int = 1000,
180
+ frame: FrameKind = "polars",
181
+ ) -> Any:
182
+ limit = _limit(limit, None)
183
+ market_id, market_platform_id, platform = market_filter(market, market_id, market_platform_id, platform)
184
+ where, params = _where(
185
+ [
186
+ ("timestamp >= %(start_time)s", "start_time", normalize_time(start_time)),
187
+ ("timestamp <= %(end_time)s", "end_time", normalize_time(end_time)),
188
+ _filter("platform", "platform", platform),
189
+ _filter("market_id", "market_id", market_id),
190
+ _filter("market_platform_id", "market_platform_id", market_platform_id),
191
+ ]
192
+ )
193
+ params["limit"] = limit
194
+ query = f"""
195
+ SELECT
196
+ market_id, market_platform_id, platform,
197
+ {OUTCOME_FRAME_EXPR} AS outcome,
198
+ bids, asks, timestamp
199
+ FROM orderbook_snapshots
200
+ {where}
201
+ ORDER BY timestamp ASC
202
+ LIMIT %(limit)s
203
+ """
204
+ return self._query_frame(query, params, frame)
205
+
206
+ def _markets_arrow(
207
+ self,
208
+ *,
209
+ start_time: datetime | str | None = None,
210
+ end_time: datetime | str | None = None,
211
+ status: StringFilter = None,
212
+ platform: StringFilter = None,
213
+ market_id: IDFilter = None,
214
+ market_platform_id: StringFilter = None,
215
+ limit: int = 1000,
216
+ max_rows: int | None = None,
217
+ frame: FrameKind = "polars",
218
+ ) -> Any:
219
+ limit = _limit(limit, max_rows)
220
+ where, params = _where(
221
+ [
222
+ ("created_at >= %(start_time)s", "start_time", normalize_time(start_time)),
223
+ ("created_at <= %(end_time)s", "end_time", normalize_time(end_time)),
224
+ _filter("status", "status", status),
225
+ _filter("platform", "platform", platform),
226
+ _filter("id", "market_id", market_id),
227
+ _filter("platform_id", "market_platform_id", market_platform_id),
228
+ ]
229
+ )
230
+ params["limit"] = limit
231
+ query = f"""
232
+ SELECT
233
+ id, platform, platform_id, slug, url, title, description,
234
+ category, tags, market_type, {OUTCOMES_FRAME_EXPR} AS outcomes,
235
+ created_at, opened_at, closes_at, resolves_at, end_date, reset_at,
236
+ resolution_type, resolution_winning_outcome_id,
237
+ {RESOLUTION_PAYOUTS_FRAME_EXPR} AS resolution_outcome_payouts,
238
+ resolution_resolved_by, resolution_resolved_at,
239
+ resolution_source_block_number, resolution_source_tx_hash,
240
+ status, source_block_number, source_tx_hash, indexed_at
241
+ FROM markets FINAL
242
+ {where}
243
+ ORDER BY created_at DESC, id DESC
244
+ LIMIT %(limit)s
245
+ """
246
+ return self._query_frame(query, params, frame)
247
+
248
+ def _fills_arrow(
249
+ self,
250
+ *,
251
+ start_time: datetime | str | None = None,
252
+ end_time: datetime | str | None = None,
253
+ platform: StringFilter = None,
254
+ market: Market | str | UUID | None = None,
255
+ market_id: IDFilter = None,
256
+ market_platform_id: StringFilter = None,
257
+ taker_side: StringFilter = None,
258
+ trader_id: StringFilter = None,
259
+ limit: int = 1000,
260
+ max_rows: int | None = None,
261
+ frame: FrameKind = "polars",
262
+ ) -> Any:
263
+ limit = _limit(limit, max_rows)
264
+ market_id, market_platform_id, platform = market_filter(market, market_id, market_platform_id, platform)
265
+ where, params = _where(
266
+ [
267
+ ("timestamp >= %(start_time)s", "start_time", normalize_time(start_time)),
268
+ ("timestamp <= %(end_time)s", "end_time", normalize_time(end_time)),
269
+ _filter("platform", "platform", platform),
270
+ _filter("market_id", "market_id", market_id),
271
+ _filter("market_platform_id", "market_platform_id", market_platform_id),
272
+ _filter("taker_side", "taker_side", taker_side),
273
+ _filter(
274
+ "trader_id",
275
+ "trader_id",
276
+ trader_id,
277
+ eq_sql="(taker_id = %(trader_id)s OR maker_id = %(trader_id)s)",
278
+ in_sql="(taker_id IN %(trader_id)s OR maker_id IN %(trader_id)s)",
279
+ ),
280
+ ]
281
+ )
282
+ params["limit"] = limit
283
+ query = f"""
284
+ SELECT
285
+ id, market_id, market_platform_id, platform, platform_id,
286
+ {OUTCOME_FRAME_EXPR} AS outcome,
287
+ size, price, normalized_price, taker_side, taker_cash_flow,
288
+ maker_cash_flow, taker_id, maker_id, fee, timestamp
289
+ FROM fills
290
+ {where}
291
+ ORDER BY timestamp ASC, id ASC
292
+ LIMIT %(limit)s
293
+ """
294
+ return self._query_frame(query, params, frame)
295
+
296
+ def _json_rows(self, query: str, params: dict[str, Any]) -> list[dict[str, Any]]:
297
+ rows, columns = self.client.execute(query, params=params, with_column_types=True)
298
+ names = [col[0] for col in columns]
299
+ return [dict(zip(names, row, strict=True)) for row in rows]
300
+
301
+ def _query_frame(
302
+ self,
303
+ query: str,
304
+ params: dict[str, Any],
305
+ frame: FrameKind,
306
+ ) -> Any:
307
+ frame = validate_frame(frame)
308
+ columns_data, columns = self.client.execute(
309
+ query,
310
+ params=params,
311
+ with_column_types=True,
312
+ columnar=True,
313
+ )
314
+ data = {column[0]: values for column, values in zip(columns, columns_data, strict=True)}
315
+ return dataframe_to_frame(data, frame)
316
+
317
+
318
+ def _where(parts: list[tuple[str, str, Any]]) -> tuple[str, dict[str, Any]]:
319
+ clauses: list[str] = []
320
+ params: dict[str, Any] = {}
321
+ for sql, name, value in parts:
322
+ if value is None or value == "" or value == ():
323
+ continue
324
+ clauses.append(sql)
325
+ params[name] = value
326
+ return ("WHERE " + " AND ".join(clauses) if clauses else ""), params
327
+
328
+
329
+ def _limit(limit: int, max_rows: int | None) -> int:
330
+ if limit < 1:
331
+ raise ValueError("limit must be at least 1")
332
+ if max_rows is not None and max_rows < 1:
333
+ raise ValueError("max_rows must be at least 1")
334
+ return min(limit, max_rows) if max_rows is not None else limit
335
+
336
+
337
+ def _filter(
338
+ column: str,
339
+ name: str,
340
+ value: Any,
341
+ *,
342
+ eq_sql: str | None = None,
343
+ in_sql: str | None = None,
344
+ ) -> tuple[str, str, Any]:
345
+ value = _filter_value(value)
346
+ if isinstance(value, tuple):
347
+ return in_sql or f"{column} IN %({name})s", name, value
348
+ return eq_sql or f"{column} = %({name})s", name, value
349
+
350
+
351
+ def _filter_value(value: Any) -> Any:
352
+ if value is None or value == "":
353
+ return None
354
+ if _is_multi_filter(value):
355
+ values = tuple(_scalar_filter_value(item) for item in value if item is not None and item != "")
356
+ return values or None
357
+ return _scalar_filter_value(value)
358
+
359
+
360
+ def _is_multi_filter(value: Any) -> bool:
361
+ return isinstance(value, Sequence | set | frozenset) and not isinstance(value, str | bytes | bytearray)
362
+
363
+
364
+ def _scalar_filter_value(value: Any) -> Any:
365
+ if isinstance(value, UUID):
366
+ return str(value)
367
+ if isinstance(value, Enum):
368
+ return value.value
369
+ return value
370
+
371
+
372
+ def _market_row(row: dict[str, Any]) -> dict[str, Any]:
373
+ resolution = None
374
+ if row.get("resolution_type"):
375
+ resolution = {
376
+ "type": row.get("resolution_type"),
377
+ "winning_outcome_id": row.get("resolution_winning_outcome_id"),
378
+ "outcome_payouts": [_outcome_payout(value) for value in row.get("resolution_outcome_payouts") or []],
379
+ "resolved_by": row.get("resolution_resolved_by") or "",
380
+ "resolved_at": row.get("resolution_resolved_at"),
381
+ }
382
+ return {
383
+ "id": row["id"],
384
+ "platform": row["platform"],
385
+ "platform_id": row["platform_id"],
386
+ "slug": row.get("slug", ""),
387
+ "url": row.get("url", ""),
388
+ "title": row.get("title", ""),
389
+ "description": row.get("description", ""),
390
+ "category": row.get("category", ""),
391
+ "tags": row.get("tags", []),
392
+ "market_type": row.get("market_type", "UNKNOWN"),
393
+ "outcomes": [_outcome(value) for value in row.get("outcomes", [])],
394
+ "status": row["status"],
395
+ "created_at": row["created_at"],
396
+ "opened_at": row.get("opened_at"),
397
+ "closes_at": row.get("closes_at"),
398
+ "resolves_at": row.get("resolves_at"),
399
+ "end_date": row.get("end_date"),
400
+ "reset_at": row.get("reset_at"),
401
+ "resolution": resolution,
402
+ }
403
+
404
+
405
+ def _fill_row(row: dict[str, Any]) -> dict[str, Any]:
406
+ return {
407
+ **row,
408
+ "outcome": _outcome(row["outcome"]),
409
+ }
410
+
411
+
412
+ def _outcome(value: Any) -> dict[str, Any]:
413
+ if isinstance(value, dict):
414
+ return {
415
+ "id": value.get("id"),
416
+ "platform_id": value.get("platform_id"),
417
+ "name": value.get("name"),
418
+ "index": value.get("index"),
419
+ }
420
+ if hasattr(value, "_asdict"):
421
+ return _outcome(value._asdict())
422
+ return {
423
+ "id": value[0],
424
+ "platform_id": value[1],
425
+ "name": value[2],
426
+ "index": value[3],
427
+ }
428
+
429
+
430
+ def _outcome_payout(value: Any) -> dict[str, Any]:
431
+ if isinstance(value, dict):
432
+ return {
433
+ "outcome_id": value.get("outcome_id"),
434
+ "payout": value.get("payout"),
435
+ }
436
+ if hasattr(value, "_asdict"):
437
+ return _outcome_payout(value._asdict())
438
+ return {
439
+ "outcome_id": value[0],
440
+ "payout": value[1],
441
+ }