prediction-market-agent-tooling 0.64.12.dev660__py3-none-any.whl → 0.65.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. prediction_market_agent_tooling/benchmark/agents.py +19 -16
  2. prediction_market_agent_tooling/benchmark/benchmark.py +94 -84
  3. prediction_market_agent_tooling/benchmark/utils.py +8 -9
  4. prediction_market_agent_tooling/deploy/agent.py +85 -125
  5. prediction_market_agent_tooling/deploy/agent_example.py +20 -10
  6. prediction_market_agent_tooling/deploy/betting_strategy.py +222 -96
  7. prediction_market_agent_tooling/deploy/constants.py +4 -0
  8. prediction_market_agent_tooling/jobs/jobs_models.py +15 -4
  9. prediction_market_agent_tooling/jobs/omen/omen_jobs.py +3 -3
  10. prediction_market_agent_tooling/markets/agent_market.py +145 -50
  11. prediction_market_agent_tooling/markets/blockchain_utils.py +10 -1
  12. prediction_market_agent_tooling/markets/data_models.py +83 -17
  13. prediction_market_agent_tooling/markets/manifold/api.py +18 -7
  14. prediction_market_agent_tooling/markets/manifold/data_models.py +23 -16
  15. prediction_market_agent_tooling/markets/manifold/manifold.py +18 -18
  16. prediction_market_agent_tooling/markets/manifold/utils.py +7 -12
  17. prediction_market_agent_tooling/markets/markets.py +2 -1
  18. prediction_market_agent_tooling/markets/metaculus/metaculus.py +29 -4
  19. prediction_market_agent_tooling/markets/omen/data_models.py +17 -32
  20. prediction_market_agent_tooling/markets/omen/omen.py +65 -108
  21. prediction_market_agent_tooling/markets/omen/omen_contracts.py +2 -5
  22. prediction_market_agent_tooling/markets/omen/omen_resolving.py +13 -13
  23. prediction_market_agent_tooling/markets/omen/omen_subgraph_handler.py +18 -12
  24. prediction_market_agent_tooling/markets/polymarket/data_models.py +7 -3
  25. prediction_market_agent_tooling/markets/polymarket/data_models_web.py +7 -3
  26. prediction_market_agent_tooling/markets/polymarket/polymarket.py +5 -4
  27. prediction_market_agent_tooling/markets/seer/data_models.py +0 -83
  28. prediction_market_agent_tooling/markets/seer/price_manager.py +44 -30
  29. prediction_market_agent_tooling/markets/seer/seer.py +105 -105
  30. prediction_market_agent_tooling/markets/seer/seer_subgraph_handler.py +34 -41
  31. prediction_market_agent_tooling/tools/betting_strategies/kelly_criterion.py +1 -1
  32. prediction_market_agent_tooling/tools/cow/cow_order.py +10 -3
  33. prediction_market_agent_tooling/tools/is_predictable.py +2 -3
  34. prediction_market_agent_tooling/tools/langfuse_client_utils.py +4 -4
  35. prediction_market_agent_tooling/tools/omen/sell_positions.py +3 -2
  36. prediction_market_agent_tooling/tools/utils.py +26 -13
  37. {prediction_market_agent_tooling-0.64.12.dev660.dist-info → prediction_market_agent_tooling-0.65.0.dist-info}/METADATA +2 -2
  38. {prediction_market_agent_tooling-0.64.12.dev660.dist-info → prediction_market_agent_tooling-0.65.0.dist-info}/RECORD +41 -51
  39. prediction_market_agent_tooling/monitor/financial_metrics/financial_metrics.py +0 -68
  40. prediction_market_agent_tooling/monitor/markets/manifold.py +0 -90
  41. prediction_market_agent_tooling/monitor/markets/metaculus.py +0 -43
  42. prediction_market_agent_tooling/monitor/markets/omen.py +0 -88
  43. prediction_market_agent_tooling/monitor/markets/polymarket.py +0 -49
  44. prediction_market_agent_tooling/monitor/monitor.py +0 -406
  45. prediction_market_agent_tooling/monitor/monitor_app.py +0 -149
  46. prediction_market_agent_tooling/monitor/monitor_settings.py +0 -27
  47. prediction_market_agent_tooling/tools/betting_strategies/market_moving.py +0 -146
  48. prediction_market_agent_tooling/tools/betting_strategies/minimum_bet_to_win.py +0 -12
  49. {prediction_market_agent_tooling-0.64.12.dev660.dist-info → prediction_market_agent_tooling-0.65.0.dist-info}/LICENSE +0 -0
  50. {prediction_market_agent_tooling-0.64.12.dev660.dist-info → prediction_market_agent_tooling-0.65.0.dist-info}/WHEEL +0 -0
  51. {prediction_market_agent_tooling-0.64.12.dev660.dist-info → prediction_market_agent_tooling-0.65.0.dist-info}/entry_points.txt +0 -0
@@ -1,11 +1,13 @@
1
1
  import random
2
2
  import typing as t
3
3
 
4
- from prediction_market_agent_tooling.benchmark.utils import (
5
- OutcomePrediction,
6
- Prediction,
7
- )
4
+ from prediction_market_agent_tooling.benchmark.utils import Prediction
8
5
  from prediction_market_agent_tooling.gtypes import Probability
6
+ from prediction_market_agent_tooling.markets.agent_market import AgentMarket
7
+ from prediction_market_agent_tooling.markets.data_models import (
8
+ CategoricalProbabilisticAnswer,
9
+ ProbabilisticAnswer,
10
+ )
9
11
  from prediction_market_agent_tooling.tools.utils import DatetimeUTC
10
12
 
11
13
 
@@ -64,20 +66,20 @@ class AbstractBenchmarkedAgent:
64
66
 
65
67
  def check_and_predict_restricted(
66
68
  self,
67
- market_question: str,
69
+ market: AgentMarket,
68
70
  time_restriction_up_to: DatetimeUTC,
69
71
  ) -> Prediction:
70
72
  """
71
73
  Data used must be restricted to the time_restriction_up_to.
72
74
  """
73
75
  is_predictable = self.is_predictable_restricted(
74
- market_question=market_question,
76
+ market_question=market.question,
75
77
  time_restriction_up_to=time_restriction_up_to,
76
78
  )
77
79
  if not is_predictable:
78
80
  return Prediction(is_predictable=is_predictable)
79
81
  return self.predict_restricted(
80
- market_question=market_question,
82
+ market_question=market.question,
81
83
  time_restriction_up_to=time_restriction_up_to,
82
84
  )
83
85
 
@@ -85,11 +87,10 @@ class AbstractBenchmarkedAgent:
85
87
  class RandomAgent(AbstractBenchmarkedAgent):
86
88
  def predict(self, market_question: str) -> Prediction:
87
89
  p_yes, confidence = random.random(), random.random()
90
+
88
91
  return Prediction(
89
- outcome_prediction=OutcomePrediction(
90
- p_yes=Probability(p_yes),
91
- confidence=confidence,
92
- info_utility=None,
92
+ outcome_prediction=CategoricalProbabilisticAnswer.from_probabilistic_answer(
93
+ ProbabilisticAnswer(p_yes=Probability(p_yes), confidence=confidence),
93
94
  ),
94
95
  )
95
96
 
@@ -108,12 +109,14 @@ class FixedAgent(AbstractBenchmarkedAgent):
108
109
 
109
110
  def predict(self, market_question: str) -> Prediction:
110
111
  p_yes, confidence = 1.0 if self.fixed_answer else 0.0, 1.0
112
+
111
113
  return Prediction(
112
- outcome_prediction=OutcomePrediction(
113
- p_yes=Probability(p_yes),
114
- confidence=confidence,
115
- info_utility=None,
116
- ),
114
+ outcome_prediction=CategoricalProbabilisticAnswer.from_probabilistic_answer(
115
+ ProbabilisticAnswer(
116
+ p_yes=Probability(p_yes),
117
+ confidence=confidence,
118
+ )
119
+ )
117
120
  )
118
121
 
119
122
  def predict_restricted(
@@ -1,6 +1,7 @@
1
1
  import concurrent.futures
2
2
  import os
3
3
  import typing as t
4
+ from collections import defaultdict
4
5
 
5
6
  import numpy as np
6
7
  import pandas as pd
@@ -8,11 +9,8 @@ from sklearn.metrics import precision_score, recall_score
8
9
  from tqdm import tqdm
9
10
 
10
11
  from prediction_market_agent_tooling.benchmark.agents import AbstractBenchmarkedAgent
11
- from prediction_market_agent_tooling.benchmark.utils import (
12
- Prediction,
13
- PredictionsCache,
14
- Resolution,
15
- )
12
+ from prediction_market_agent_tooling.benchmark.utils import Prediction, PredictionsCache
13
+ from prediction_market_agent_tooling.gtypes import OutcomeStr
16
14
  from prediction_market_agent_tooling.markets.agent_market import AgentMarket
17
15
  from prediction_market_agent_tooling.tools.costs import openai_costs
18
16
  from prediction_market_agent_tooling.tools.utils import (
@@ -72,37 +70,36 @@ class Benchmarker:
72
70
  "MSE for `p_yes`": self._compute_mse,
73
71
  "Mean confidence": self._compute_mean_confidence,
74
72
  "% within +-0.05": lambda predictions, markets: self._compute_percentage_within_range(
75
- predictions, markets, tolerance=0.05
73
+ predictions, markets, average_error_tolerance=0.05
76
74
  ),
77
75
  "% within +-0.1": lambda predictions, markets: self._compute_percentage_within_range(
78
- predictions, markets, tolerance=0.1
76
+ predictions, markets, average_error_tolerance=0.1
79
77
  ),
80
78
  "% within +-0.2": lambda predictions, markets: self._compute_percentage_within_range(
81
- predictions, markets, tolerance=0.2
79
+ predictions, markets, average_error_tolerance=0.2
82
80
  ),
83
81
  "% correct outcome": self._compute_correct_outcome_percentage,
84
82
  "% precision for `yes`": lambda predictions, markets: self._compute_precision_and_recall_percentages(
85
- predictions, markets, pos_label=1
83
+ predictions, markets
86
84
  )[
87
85
  0
88
86
  ],
89
87
  "% precision for `no`": lambda predictions, markets: self._compute_precision_and_recall_percentages(
90
- predictions, markets, pos_label=0
88
+ predictions, markets
91
89
  )[
92
90
  0
93
91
  ],
94
92
  "% recall for `yes`": lambda predictions, markets: self._compute_precision_and_recall_percentages(
95
- predictions, markets, pos_label=1
93
+ predictions, markets
96
94
  )[
97
95
  1
98
96
  ],
99
97
  "% recall for `no`": lambda predictions, markets: self._compute_precision_and_recall_percentages(
100
- predictions, markets, pos_label=0
98
+ predictions, markets
101
99
  )[
102
100
  1
103
101
  ],
104
102
  "confidence/p_yes error correlation": self._compute_confidence_p_yes_error_correlation,
105
- "Mean info_utility": self._compute_mean_info_utility,
106
103
  "Proportion answerable": self._compute_ratio_evaluated_as_answerable,
107
104
  "Proportion answered": self._compute_ratio_answered,
108
105
  "Mean cost ($)": self._compute_mean_cost,
@@ -146,7 +143,7 @@ class Benchmarker:
146
143
  if not market.is_resolved()
147
144
  else (
148
145
  agent.check_and_predict_restricted(
149
- market_question=market.question,
146
+ market=market,
150
147
  time_restriction_up_to=market.created_time, # TODO: Add support for resolved_at and any time in between.
151
148
  )
152
149
  if market.created_time is not None
@@ -198,20 +195,42 @@ class Benchmarker:
198
195
  )
199
196
  if not predictions:
200
197
  return None
201
- mse = sum(
202
- [
203
- (check_not_none(p.outcome_prediction).p_yes - m.current_p_yes) ** 2
204
- for p, m in zip(predictions, markets)
205
- ]
206
- ) / len(predictions)
207
- return mse
198
+
199
+ total_squared_errors = 0.0
200
+ for prediction, market in zip(predictions, markets):
201
+ squared_errors = self.calculate_squared_errors(prediction, market)
202
+ total_squared_errors += squared_errors
203
+
204
+ return total_squared_errors
205
+
206
+ @staticmethod
207
+ def calculate_errors_between_prediction_and_market(
208
+ prediction: Prediction, market: AgentMarket
209
+ ) -> list[float]:
210
+ pred_probs = check_not_none(prediction.outcome_prediction).probabilities
211
+ market_probs = market.probabilities
212
+
213
+ # Get common outcomes between prediction and market
214
+ common_outcomes = set(pred_probs.keys()) & set(market_probs.keys())
215
+
216
+ errors = [
217
+ (pred_probs[outcome] - market_probs[outcome]) for outcome in common_outcomes
218
+ ]
219
+
220
+ return errors
221
+
222
+ @staticmethod
223
+ def calculate_squared_errors(prediction: Prediction, market: AgentMarket) -> float:
224
+ errors = Benchmarker.calculate_errors_between_prediction_and_market(
225
+ prediction, market
226
+ )
227
+ squared_errors = sum([err**2 for err in errors], 0.0)
228
+ return squared_errors
208
229
 
209
230
  def _compute_mean_confidence(
210
231
  self, predictions: t.List[Prediction], markets: t.Sequence[AgentMarket]
211
232
  ) -> float | None:
212
- predictions, markets = self.filter_predictions_for_answered(
213
- predictions, markets
214
- )
233
+ predictions, _ = self.filter_predictions_for_answered(predictions, markets)
215
234
  if not predictions:
216
235
  return None
217
236
  mean_confidence = sum(
@@ -219,32 +238,11 @@ class Benchmarker:
219
238
  ) / len(predictions)
220
239
  return mean_confidence
221
240
 
222
- def _compute_mean_info_utility(
223
- self, predictions: t.List[Prediction], markets: t.Sequence[AgentMarket]
224
- ) -> float | None:
225
- predictions, markets = self.filter_predictions_for_answered(
226
- predictions, markets
227
- )
228
- predictions_with_info_utility = [
229
- p
230
- for p in predictions
231
- if check_not_none(p.outcome_prediction).info_utility is not None
232
- ]
233
- if not predictions_with_info_utility:
234
- return None
235
- mean_info_utility = sum(
236
- [
237
- check_not_none(check_not_none(p.outcome_prediction).info_utility)
238
- for p in predictions_with_info_utility
239
- ]
240
- ) / len(predictions_with_info_utility)
241
- return mean_info_utility
242
-
243
241
  def _compute_percentage_within_range(
244
242
  self,
245
243
  predictions: t.List[Prediction],
246
244
  markets: t.Sequence[AgentMarket],
247
- tolerance: float = 0.05,
245
+ average_error_tolerance: float = 0.05,
248
246
  ) -> float | None:
249
247
  predictions, markets = self.filter_predictions_for_answered(
250
248
  predictions, markets
@@ -252,15 +250,13 @@ class Benchmarker:
252
250
  if not predictions:
253
251
  return None
254
252
 
255
- within_range_count = 0
256
- for p, m in zip(predictions, markets):
257
- if (
258
- abs(check_not_none(p.outcome_prediction).p_yes - m.current_p_yes)
259
- <= tolerance
260
- ):
261
- within_range_count += 1
253
+ predictions_within_range = 0.0
254
+ for prediction, market in zip(predictions, markets):
255
+ squared_errors = self.calculate_squared_errors(prediction, market)
256
+ if squared_errors <= (average_error_tolerance**2):
257
+ predictions_within_range += 1
262
258
 
263
- return (100 * within_range_count) / len(predictions)
259
+ return (100 * predictions_within_range) / len(predictions)
264
260
 
265
261
  def _compute_correct_outcome_percentage(
266
262
  self, predictions: t.List[Prediction], markets: t.Sequence[AgentMarket]
@@ -285,7 +281,6 @@ class Benchmarker:
285
281
  self,
286
282
  predictions: t.List[Prediction],
287
283
  markets: t.Sequence[AgentMarket],
288
- pos_label: int,
289
284
  ) -> tuple[float | None, float | None]:
290
285
  predictions, markets = self.filter_predictions_for_answered(
291
286
  predictions, markets
@@ -294,25 +289,31 @@ class Benchmarker:
294
289
  return None, None
295
290
 
296
291
  ground_truth = [
297
- (1 if m.probable_resolution == Resolution.YES else 0) for m in markets
292
+ m.probable_resolution.outcome if m.probable_resolution else None
293
+ for m in markets
298
294
  ]
299
295
  y_pred = [
300
- (
301
- 1
302
- if check_not_none(p.outcome_prediction).probable_resolution
303
- == Resolution.YES
304
- else 0
305
- )
296
+ p.outcome_prediction.probable_resolution.outcome
297
+ if p.outcome_prediction is not None
298
+ else None
306
299
  for p in predictions
307
300
  ]
308
301
 
302
+ # Filter out None values
303
+ valid_indices = [
304
+ i
305
+ for i, (gt, pred) in enumerate(zip(ground_truth, y_pred))
306
+ if gt is not None and pred is not None
307
+ ]
308
+ if not valid_indices:
309
+ return None, None
310
+
311
+ ground_truth = [ground_truth[i] for i in valid_indices]
312
+ y_pred = [y_pred[i] for i in valid_indices]
309
313
  precision = precision_score(
310
- ground_truth, y_pred, pos_label=pos_label, zero_division=0.0
311
- )
312
- recall = recall_score(
313
- ground_truth, y_pred, pos_label=pos_label, zero_division=0.0
314
+ ground_truth, y_pred, average="micro", zero_division=0.0
314
315
  )
315
-
316
+ recall = recall_score(ground_truth, y_pred, average="micro", zero_division=0.0)
316
317
  return precision * 100, recall * 100
317
318
 
318
319
  def _compute_confidence_p_yes_error_correlation(
@@ -324,10 +325,12 @@ class Benchmarker:
324
325
  if not predictions:
325
326
  return None
326
327
 
327
- p_yes_errors = [
328
- abs(check_not_none(p.outcome_prediction).p_yes - m.current_p_yes)
329
- for p, m in zip(predictions, markets)
330
- ]
328
+ p_yes_errors = []
329
+ for p, m in zip(predictions, markets):
330
+ errors = self.calculate_errors_between_prediction_and_market(p, m)
331
+ mean_error = sum([abs(i) for i in errors]) / len(errors)
332
+ p_yes_errors.append(mean_error)
333
+
331
334
  confidences = [
332
335
  check_not_none(p.outcome_prediction).confidence for p in predictions
333
336
  ]
@@ -396,7 +399,7 @@ class Benchmarker:
396
399
  ]
397
400
  markets_summary[f"{agent} p_yes"] = [
398
401
  (
399
- f"{p.outcome_prediction.p_yes:.2f} [{p.outcome_prediction.probable_resolution.value}]"
402
+ f"{p.outcome_prediction.probabilities} [{p.outcome_prediction.probable_resolution}]"
400
403
  if p.is_predictable
401
404
  and p.outcome_prediction # Is answerable and answered
402
405
  else (
@@ -414,25 +417,32 @@ class Benchmarker:
414
417
  )
415
418
  for p in agent_predictions
416
419
  ]
417
- markets_summary[f"reference p_yes"] = [
418
- f"{m.current_p_yes:.2f} [{m.probable_resolution}]" for m in self.markets
420
+ markets_summary[f"reference probabilities"] = [
421
+ f"{m.probabilities} [{m.probable_resolution}]" for m in self.markets
419
422
  ]
420
423
  return markets_summary
421
424
 
422
425
  def get_markets_results(self) -> dict[str, list[str | float]]:
426
+ outcome_counts: dict[OutcomeStr, int] = defaultdict(int)
427
+ total_markets = len(self.markets)
428
+
429
+ for market in self.markets:
430
+ resolution = market.probable_resolution
431
+ if resolution.outcome:
432
+ outcome_counts[resolution.outcome] += 1
433
+
434
+ proportions = {
435
+ outcome: count / total_markets for outcome, count in outcome_counts.items()
436
+ }
423
437
  return {
424
- "Number of markets": [len(self.markets)],
438
+ "Number of markets": [total_markets],
425
439
  "Proportion resolved": [
426
- sum(1 for m in self.markets if m.is_resolved()) / len(self.markets)
427
- ],
428
- "Proportion YES": [
429
- sum(1 for m in self.markets if m.probable_resolution == Resolution.YES)
430
- / len(self.markets)
431
- ],
432
- "Proportion NO": [
433
- sum(1 for m in self.markets if m.probable_resolution == Resolution.NO)
434
- / len(self.markets)
440
+ sum(1 for m in self.markets if m.is_resolved()) / total_markets
435
441
  ],
442
+ **{
443
+ f"Proportion {outcome}": [proportions.get(outcome, 0)]
444
+ for outcome in outcome_counts
445
+ },
436
446
  }
437
447
 
438
448
  def generate_markdown_report(self) -> str:
@@ -3,23 +3,22 @@ import typing as t
3
3
 
4
4
  from pydantic import BaseModel
5
5
 
6
+ from prediction_market_agent_tooling.gtypes import OutcomeStr, Probability
6
7
  from prediction_market_agent_tooling.markets.data_models import (
7
- ProbabilisticAnswer,
8
- Resolution,
8
+ CategoricalProbabilisticAnswer,
9
9
  )
10
10
 
11
11
 
12
- class OutcomePrediction(ProbabilisticAnswer):
13
- info_utility: t.Optional[float]
14
-
15
- @property
16
- def probable_resolution(self) -> Resolution:
17
- return Resolution.YES if self.p_yes > 0.5 else Resolution.NO
12
+ def get_most_probable_outcome(
13
+ probability_map: dict[OutcomeStr, Probability],
14
+ ) -> OutcomeStr:
15
+ """Returns most probable outcome. If tied, returns first."""
16
+ return max(probability_map, key=lambda k: float(probability_map[k]))
18
17
 
19
18
 
20
19
  class Prediction(BaseModel):
21
20
  is_predictable: bool = True
22
- outcome_prediction: t.Optional[OutcomePrediction] = None
21
+ outcome_prediction: t.Optional[CategoricalProbabilisticAnswer] = None
23
22
 
24
23
  time: t.Optional[float] = None
25
24
  cost: t.Optional[float] = None