prediction-market-agent-tooling 0.51.0__py3-none-any.whl → 0.51.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -66,6 +66,7 @@ from prediction_market_agent_tooling.monitor.monitor_app import (
66
66
  )
67
67
  from prediction_market_agent_tooling.tools.hexbytes_custom import HexBytes
68
68
  from prediction_market_agent_tooling.tools.ipfs.ipfs_handler import IPFSHandler
69
+ from prediction_market_agent_tooling.tools.is_invalid import is_invalid
69
70
  from prediction_market_agent_tooling.tools.is_predictable import is_predictable_binary
70
71
  from prediction_market_agent_tooling.tools.langfuse_ import langfuse_context, observe
71
72
  from prediction_market_agent_tooling.tools.utils import DatetimeUTC, utcnow
@@ -295,6 +296,7 @@ class DeployableTraderAgent(DeployableAgent):
295
296
  bet_on_n_markets_per_run: int = 1
296
297
  min_required_balance_to_operate: xDai | None = xdai_type(1)
297
298
  min_balance_to_keep_in_native_currency: xDai | None = xdai_type(0.1)
299
+ allow_invalid_questions: bool = False
298
300
 
299
301
  def __init__(
300
302
  self,
@@ -403,6 +405,9 @@ class DeployableTraderAgent(DeployableAgent):
403
405
  if not is_predictable_binary(market.question):
404
406
  return False
405
407
 
408
+ if not self.allow_invalid_questions and is_invalid(market.question):
409
+ return False
410
+
406
411
  return True
407
412
 
408
413
  def answer_binary_market(self, market: AgentMarket) -> ProbabilisticAnswer | None:
@@ -3,93 +3,100 @@ from typing import Any
3
3
 
4
4
  from pydantic import BaseModel
5
5
 
6
+ from prediction_market_agent_tooling.gtypes import Probability
6
7
  from prediction_market_agent_tooling.tools.utils import DatetimeUTC
7
8
 
8
9
 
9
10
  class QuestionType(str, Enum):
10
- forecast = "forecast"
11
- notebook = "notebook"
12
- discussion = "discussion"
13
- claim = "claim"
14
- group = "group"
15
- conditional_group = "conditional_group"
16
- multiple_choice = "multiple_choice"
11
+ binary = "binary"
17
12
 
18
13
 
19
- class CommunityPrediction(BaseModel):
20
- y: list[float]
21
- q1: float | None = None
22
- q2: float | None = None
23
- q3: float | None = None
14
+ class AggregationItem(BaseModel):
15
+ start_time: DatetimeUTC
16
+ end_time: DatetimeUTC | None
17
+ forecast_values: list[float] | None
18
+ forecaster_count: int
19
+ interval_lower_bounds: list[float] | None
20
+ centers: list[float] | None
21
+ interval_upper_bounds: list[float] | None
22
+ means: list[float] | None
23
+ histogram: list[float] | None
24
24
 
25
- @property
26
- def p_yes(self) -> float:
27
- """
28
- q2 corresponds to the median, or 'second quartile' of the distribution.
29
25
 
30
- If no value is provided (i.e. the question is new and has not been
31
- answered yet), we default to 0.5.
32
- """
33
- return self.q2 if self.q2 is not None else 0.5
26
+ class Aggregation(BaseModel):
27
+ history: list[AggregationItem]
28
+ latest: AggregationItem | None
29
+ score_data: dict[str, Any]
34
30
 
35
31
 
36
- class Prediction(BaseModel):
37
- t: DatetimeUTC
38
- x: float
32
+ class Aggregations(BaseModel):
33
+ recency_weighted: Aggregation
34
+ unweighted: Aggregation
35
+ single_aggregation: Aggregation
36
+ metaculus_prediction: Aggregation
39
37
 
40
38
 
41
- class UserPredictions(BaseModel):
42
- id: int
43
- predictions: list[Prediction]
44
- points_won: float | None = None
45
- user: int
46
- username: str
47
- question: int
39
+ class MyForecast(BaseModel):
40
+ start_time: DatetimeUTC
41
+ end_time: DatetimeUTC | None
42
+ forecast_values: list[float] | None
43
+ interval_lower_bounds: list[float] | None
44
+ centers: list[float] | None
45
+ interval_upper_bounds: list[float] | None
48
46
 
49
47
 
50
- class CommunityPredictionStats(BaseModel):
51
- full: CommunityPrediction
52
- unweighted: CommunityPrediction
48
+ class MyAggregation(BaseModel):
49
+ history: list[MyForecast]
50
+ latest: MyForecast | None
51
+ score_data: dict[str, Any]
53
52
 
54
53
 
55
- class MetaculusQuestion(BaseModel):
56
- """
57
- https://www.metaculus.com/api2/schema/redoc/#tag/questions/operation/questions_retrieve
58
- """
54
+ class Question(BaseModel):
55
+ aggregations: Aggregations
56
+ my_forecasts: MyAggregation
57
+ type: QuestionType
58
+ possibilities: dict[str, str] | None
59
59
 
60
- active_state: Any
61
- url: str
62
- page_url: str
60
+
61
+ class MetaculusQuestion(BaseModel):
63
62
  id: int
64
- author: int
65
- author_name: str
66
63
  author_id: int
64
+ author_username: str
67
65
  title: str
68
- title_short: str
69
- group_label: str | None = None
70
- resolution: int | None
71
- resolved_option: int | None
72
- created_time: DatetimeUTC
73
- publish_time: DatetimeUTC | None = None
74
- close_time: DatetimeUTC | None = None
75
- effected_close_time: DatetimeUTC | None
76
- resolve_time: DatetimeUTC | None = None
77
- possibilities: dict[Any, Any] | None = None
78
- scoring: dict[Any, Any] = {}
79
- type: QuestionType | None = None
80
- user_perms: Any
81
- weekly_movement: float | None
82
- weekly_movement_direction: int | None = None
83
- cp_reveal_time: DatetimeUTC | None = None
84
- edited_time: DatetimeUTC
85
- last_activity_time: DatetimeUTC
86
- activity: float
66
+ created_at: DatetimeUTC
67
+ published_at: DatetimeUTC
68
+ scheduled_close_time: DatetimeUTC
69
+ scheduled_resolve_time: DatetimeUTC
70
+ user_permission: str
87
71
  comment_count: int
88
- votes: int
89
- community_prediction: CommunityPredictionStats
90
- my_predictions: UserPredictions | None = None
72
+ question: Question
91
73
  # TODO add the rest of the fields https://github.com/gnosis/prediction-market-agent-tooling/issues/301
92
74
 
75
+ @property
76
+ def page_url(self) -> str:
77
+ return f"https://www.metaculus.com/questions/{self.id}/"
78
+
79
+ @property
80
+ def p_yes(self) -> Probability:
81
+ if self.question.type != QuestionType.binary:
82
+ raise ValueError(f"Only binary markets can have p_yes.")
83
+ if (
84
+ self.question.aggregations.recency_weighted is None
85
+ or self.question.aggregations.recency_weighted.latest is None
86
+ or self.question.aggregations.recency_weighted.latest.forecast_values
87
+ is None
88
+ ):
89
+ # If no value is provided (i.e. the question is new and has not been answered yet), we default to 0.5.
90
+ return Probability(0.5)
91
+ if len(self.question.aggregations.recency_weighted.latest.forecast_values) != 2:
92
+ raise ValueError(
93
+ f"Invalid logic, assumed that binary markets will have two forecasts, got: {self.question.aggregations.recency_weighted.latest.forecast_values}"
94
+ )
95
+ # Experimentally figured out that they store "Yes" at index 1.
96
+ return Probability(
97
+ self.question.aggregations.recency_weighted.latest.forecast_values[1]
98
+ )
99
+
93
100
 
94
101
  class MetaculusQuestions(BaseModel):
95
102
  next: str | None
@@ -37,13 +37,12 @@ class MetaculusAgentMarket(AgentMarket):
37
37
  question=model.title,
38
38
  outcomes=[],
39
39
  resolution=None,
40
- current_p_yes=Probability(model.community_prediction.full.p_yes),
41
- created_time=model.created_time,
42
- close_time=model.close_time,
43
- url=model.url,
40
+ current_p_yes=model.p_yes,
41
+ created_time=model.created_at,
42
+ close_time=model.scheduled_close_time,
43
+ url=model.page_url,
44
44
  volume=None,
45
- have_predicted=model.my_predictions is not None
46
- and len(model.my_predictions.predictions) > 0,
45
+ have_predicted=model.question.my_forecasts.latest is not None,
47
46
  outcome_token_pool=None,
48
47
  )
49
48
 
@@ -0,0 +1,92 @@
1
+ import tenacity
2
+
3
+ from prediction_market_agent_tooling.config import APIKeys
4
+ from prediction_market_agent_tooling.loggers import logger
5
+ from prediction_market_agent_tooling.tools.cache import persistent_inmemory_cache
6
+ from prediction_market_agent_tooling.tools.is_predictable import (
7
+ parse_decision_yes_no_completion,
8
+ )
9
+ from prediction_market_agent_tooling.tools.langfuse_ import (
10
+ get_langfuse_langchain_config,
11
+ observe,
12
+ )
13
+ from prediction_market_agent_tooling.tools.utils import (
14
+ LLM_SEED,
15
+ LLM_SUPER_LOW_TEMPERATURE,
16
+ )
17
+
18
+ # I tried to make it return a JSON, but it didn't work well in combo with asking it to do chain of thought.
19
+ # Rules are almost copy-pasted from https://cdn.kleros.link/ipfs/QmZM12kkguXFk2C94ykrKpambt4iUVKsVsxGxDEdLS68ws/omen-rules.pdf,
20
+ # with some small prompting mods and I removed the point about "The outcome of the market must be known by its Resolution Date.", because that can not be verified before-hand.
21
+ # and also point about "in which none of the answers are valid will resolve as invalid" and "in which multiple answers are valid will resolve as invalid.", because before hand we can not know if one of the outcomes happened or not.
22
+ QUESTION_IS_INVALID_PROMPT = """Main signs about an invalid question (sometimes referred to as a "market"):
23
+ - The market's question is about immoral violence, death or assassination.
24
+ - The violent event can be caused by a single conscious being.
25
+ - The violent event is done illegally.
26
+ - The market should not directly incentivize immoral violent (such as murder, rape or unjust imprisonment) actions which could likely be performed by any participant.
27
+ - Invalid: Will Donald Trump be alive on the 01/12/2021? (Anyone could bet on “No” and kill him for a guaranteed profit. Anyone could bet on “Yes” to effectively put a bounty on his head).
28
+ - Invalid: Will Hera be a victim of swatting in 2020? (Anyone could falsely call the emergency services on him in order to win the bet)
29
+ - This does not prevent markets:
30
+ - Whose topics are violent events not caused by conscious beings.
31
+ - Valid: How many people will die from COVID19 in 2020? (Viruses don’t use prediction markets).
32
+ - Whose main source of uncertainty is not related to a potential violent action.
33
+ - Valid: Will Trump win the 2020 US presidential election? (The main source of uncertainty is the vote of US citizens, not a potential murder of a presidential candidate).
34
+ - Which could give an incentive only to specific participants to commit an immoral violent action, but are in practice unlikely.
35
+ - Valid: Will the US be engaged in a military conflict with a UN member state in 2021? (It’s unlikely for the US to declare war in order to win a bet on this market).
36
+ - Valid: Will Derek Chauvin go to jail for the murder of George Flyod? (It’s unlikely that the jurors would collude to make a wrong verdict in order to win this market).
37
+ - Questions with relative dates will resolve as invalid. Dates must be stated in absolute terms, not relative depending on the current time.
38
+ - Invalid: Who will be the president of the United States in 6 months? (“in 6 months depends on the current time”).
39
+ - Questions about moral values and not facts will be resolved as invalid.
40
+ - Invalid: “Is it ethical to eat meat?”.
41
+
42
+ Follow a chain of thought to evaluate if the question is invalid:
43
+
44
+ First, write the parts of the following question:
45
+
46
+ "{question}"
47
+
48
+ Then, write down what is the future event of the question, what it refers to and when that event will happen if the question contains it.
49
+
50
+ Then, explain why do you think it is or isn't invalid.
51
+
52
+ Finally, write your final decision, write `decision: ` followed by either "yes it is invalid" or "no it isn't invalid" about the question. Don't write anything else after that. You must include "yes" or "no".
53
+ """
54
+
55
+
56
+ @persistent_inmemory_cache
57
+ @tenacity.retry(stop=tenacity.stop_after_attempt(3), wait=tenacity.wait_fixed(1))
58
+ @observe()
59
+ def is_invalid(
60
+ question: str,
61
+ engine: str = "gpt-4o",
62
+ temperature: float = LLM_SUPER_LOW_TEMPERATURE,
63
+ seed: int = LLM_SEED,
64
+ prompt_template: str = QUESTION_IS_INVALID_PROMPT,
65
+ max_tokens: int = 1024,
66
+ ) -> bool:
67
+ """
68
+ Evaluate if the question is actually answerable.
69
+ """
70
+ try:
71
+ from langchain.prompts import ChatPromptTemplate
72
+ from langchain_openai import ChatOpenAI
73
+ except ImportError:
74
+ logger.error("langchain not installed, skipping is_invalid")
75
+ return True
76
+
77
+ llm = ChatOpenAI(
78
+ model=engine,
79
+ temperature=temperature,
80
+ seed=seed,
81
+ api_key=APIKeys().openai_api_key_secretstr_v1,
82
+ )
83
+
84
+ prompt = ChatPromptTemplate.from_template(template=prompt_template)
85
+ messages = prompt.format_messages(question=question)
86
+ completion = str(
87
+ llm.invoke(
88
+ messages, max_tokens=max_tokens, config=get_langfuse_langchain_config()
89
+ ).content
90
+ )
91
+
92
+ return parse_decision_yes_no_completion(question, completion)
@@ -22,8 +22,10 @@ from prediction_market_agent_tooling.loggers import logger
22
22
  T = TypeVar("T")
23
23
 
24
24
  # t=0 is mathematically impossible and it's not clear how OpenAI (and others) handle it, as a result, even with t=0, gpt-4-turbo produces very different outputs,
25
- # it seems that using a very low temperature is the best way to have as consistent outputs as possible: https://community.openai.com/t/why-the-api-output-is-inconsistent-even-after-the-temperature-is-set-to-0/329541/12
25
+ # see this experiment to figure out if you should use LLM_SUPER_LOW_TEMPERATURE or just 0: https://github.com/gnosis/prediction-market-agent/pull/438.
26
26
  LLM_SUPER_LOW_TEMPERATURE = 0.00000001
27
+ # For consistent results, also include seed for models that supports it.
28
+ LLM_SEED = 0
27
29
 
28
30
 
29
31
  def check_not_none(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: prediction-market-agent-tooling
3
- Version: 0.51.0
3
+ Version: 0.51.1
4
4
  Summary: Tools to benchmark, deploy and monitor prediction market agents.
5
5
  Author: Gnosis
6
6
  Requires-Python: >=3.10,<3.12
@@ -17,7 +17,7 @@ prediction_market_agent_tooling/benchmark/agents.py,sha256=B1-uWdyeN4GGKMWGK_-Cc
17
17
  prediction_market_agent_tooling/benchmark/benchmark.py,sha256=MqTiaaJ3cYiOLUVR7OyImLWxcEya3Rl5JyFYW-K0lwM,17097
18
18
  prediction_market_agent_tooling/benchmark/utils.py,sha256=D0MfUkVZllmvcU0VOurk9tcKT7JTtwwOp-63zuCBVuc,2880
19
19
  prediction_market_agent_tooling/config.py,sha256=WC30Nr16RGueTafA9i67OIB-6KDHZRryhiLPzebg9_I,6740
20
- prediction_market_agent_tooling/deploy/agent.py,sha256=c9ovjd5UUk1Qw64Si7q0OO7SvM6I6Wne1fySpu0oWco,22005
20
+ prediction_market_agent_tooling/deploy/agent.py,sha256=wGrjwWIh27xPLhA1fFzR1ytq-tT_9TmeJIjLzEZkB5E,22222
21
21
  prediction_market_agent_tooling/deploy/agent_example.py,sha256=dIIdZashExWk9tOdyDjw87AuUcGyM7jYxNChYrVK2dM,1001
22
22
  prediction_market_agent_tooling/deploy/betting_strategy.py,sha256=cOPznMX0jd380qHw06A-l1XUyoicV54AXBghirtPw0Q,12127
23
23
  prediction_market_agent_tooling/deploy/constants.py,sha256=M5ty8URipYMGe_G-RzxRydK3AFL6CyvmqCraJUrLBnE,82
@@ -40,8 +40,8 @@ prediction_market_agent_tooling/markets/manifold/manifold.py,sha256=JEEIPz9-U3av
40
40
  prediction_market_agent_tooling/markets/manifold/utils.py,sha256=cPPFWXm3vCYH1jy7_ctJZuQH9ZDaPL4_AgAYzGWkoow,513
41
41
  prediction_market_agent_tooling/markets/markets.py,sha256=_3nV9QTT48G2oJ2egkuWA1UzrTOGY6x3mXqIRgDaVIo,3245
42
42
  prediction_market_agent_tooling/markets/metaculus/api.py,sha256=4TRPGytQQbSdf42DCg2M_JWYPAuNjqZ3eBqaQBLkNks,2736
43
- prediction_market_agent_tooling/markets/metaculus/data_models.py,sha256=PIRN2FAQ32vNj78JRZPB1pXK61B0w2RBJvJSJ7dvvrg,2514
44
- prediction_market_agent_tooling/markets/metaculus/metaculus.py,sha256=S-XpK6Ij5AZgGMFMdXXcHm7hH2o8BRU-a_XqaW4xC54,3674
43
+ prediction_market_agent_tooling/markets/metaculus/data_models.py,sha256=2wDZ0BmK9O5Lud-q-FCzgW0tsK9GxMU0rUMlcPxSS04,3184
44
+ prediction_market_agent_tooling/markets/metaculus/metaculus.py,sha256=ctRcGm1G8qmUB5RMPoQ_C_GN3Ct24BWDB1gJyOhH_vE,3604
45
45
  prediction_market_agent_tooling/markets/omen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
46
  prediction_market_agent_tooling/markets/omen/data_models.py,sha256=yYPHpZM4fthC1EhyXkXC8iyDQ1ITNBBY9nOHeKD6XSw,27283
47
47
  prediction_market_agent_tooling/markets/omen/omen.py,sha256=6tBhn7qxtsdrk0xgDSqWy44f_m1qmhfXjvQUWXtW3TI,47974
@@ -78,6 +78,7 @@ prediction_market_agent_tooling/tools/httpx_cached_client.py,sha256=0-N1r0zcGKlY
78
78
  prediction_market_agent_tooling/tools/image_gen/image_gen.py,sha256=HzRwBx62hOXBOmrtpkXaP9Qq1Ku03uUGdREocyjLQ_k,1266
79
79
  prediction_market_agent_tooling/tools/image_gen/market_thumbnail_gen.py,sha256=8A3U2uxsCsOfLjru-6R_PPIAuiKY4qFkWp_GSBPV6-s,1280
80
80
  prediction_market_agent_tooling/tools/ipfs/ipfs_handler.py,sha256=CTTMfTvs_8PH4kAtlQby2aeEKwgpmxtuGbd4oYIdJ2A,1201
81
+ prediction_market_agent_tooling/tools/is_invalid.py,sha256=Lc5fWB4fmx7tFvRakmUOzo0Oq5EizorddZ2xjesEopY,4984
81
82
  prediction_market_agent_tooling/tools/is_predictable.py,sha256=NIoR2bTNMmADcyNY2aKNMWkiDw7Z_9kZMcFXEdyewy4,6771
82
83
  prediction_market_agent_tooling/tools/langfuse_.py,sha256=jI_4ROxqo41CCnWGS1vN_AeDVhRzLMaQLxH3kxDu3L8,1153
83
84
  prediction_market_agent_tooling/tools/langfuse_client_utils.py,sha256=7H9EzTA_q5TmuqoIeMpdVU2efF_RyttQEoTPLyS-ld4,5579
@@ -88,10 +89,10 @@ prediction_market_agent_tooling/tools/singleton.py,sha256=CiIELUiI-OeS7U7eeHEt0r
88
89
  prediction_market_agent_tooling/tools/streamlit_user_login.py,sha256=NXEqfjT9Lc9QtliwSGRASIz1opjQ7Btme43H4qJbzgE,3010
89
90
  prediction_market_agent_tooling/tools/tavily_storage/tavily_models.py,sha256=99S7w8BvnJRMOnUArGN0g4GVRoG8M0C-XyIFU8HnLn0,6374
90
91
  prediction_market_agent_tooling/tools/tavily_storage/tavily_storage.py,sha256=xrtQH9v5pXycBRyc5j45pWqkSffkoc9efNIU1_G633Q,3706
91
- prediction_market_agent_tooling/tools/utils.py,sha256=JZj_xM4VbsDSiDlbn2lZFZPEOBSYhqqxd1Y2NpCqnJ4,7117
92
+ prediction_market_agent_tooling/tools/utils.py,sha256=PGmh-9aeEJe_YcHVIiHMLybx31ppng7tVBlMaIfnWy8,7135
92
93
  prediction_market_agent_tooling/tools/web3_utils.py,sha256=dkcjG-LtuaWRh7WEMzRGmZ5B5rsxZTlliFOI6fj-EJ8,11842
93
- prediction_market_agent_tooling-0.51.0.dist-info/LICENSE,sha256=6or154nLLU6bELzjh0mCreFjt0m2v72zLi3yHE0QbeE,7650
94
- prediction_market_agent_tooling-0.51.0.dist-info/METADATA,sha256=G5RtTj92IHwgLBO6VKf6xCz4WWva0BNo77Xhbfl2lVs,8056
95
- prediction_market_agent_tooling-0.51.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
96
- prediction_market_agent_tooling-0.51.0.dist-info/entry_points.txt,sha256=m8PukHbeH5g0IAAmOf_1Ahm-sGAMdhSSRQmwtpmi2s8,81
97
- prediction_market_agent_tooling-0.51.0.dist-info/RECORD,,
94
+ prediction_market_agent_tooling-0.51.1.dist-info/LICENSE,sha256=6or154nLLU6bELzjh0mCreFjt0m2v72zLi3yHE0QbeE,7650
95
+ prediction_market_agent_tooling-0.51.1.dist-info/METADATA,sha256=rGMwRfY6ulZ8GTMKjPP7EWN1nrwTxxazh4aj4vf7Qoc,8056
96
+ prediction_market_agent_tooling-0.51.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
97
+ prediction_market_agent_tooling-0.51.1.dist-info/entry_points.txt,sha256=m8PukHbeH5g0IAAmOf_1Ahm-sGAMdhSSRQmwtpmi2s8,81
98
+ prediction_market_agent_tooling-0.51.1.dist-info/RECORD,,