PyPI - prediction-market-agent-tooling - Versions diffs - 0.51.0__py3-none-any.whl → 0.51.1__py3-none-any.whl - Mend

prediction-market-agent-tooling 0.51.0py3-none-any.whl → 0.51.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

prediction_market_agent_tooling/deploy/agent.py CHANGED Viewed

@@ -66,6 +66,7 @@ from prediction_market_agent_tooling.monitor.monitor_app import (
 )
 from prediction_market_agent_tooling.tools.hexbytes_custom import HexBytes
 from prediction_market_agent_tooling.tools.ipfs.ipfs_handler import IPFSHandler
+from prediction_market_agent_tooling.tools.is_invalid import is_invalid
 from prediction_market_agent_tooling.tools.is_predictable import is_predictable_binary
 from prediction_market_agent_tooling.tools.langfuse_ import langfuse_context, observe
 from prediction_market_agent_tooling.tools.utils import DatetimeUTC, utcnow
@@ -295,6 +296,7 @@ class DeployableTraderAgent(DeployableAgent):
     bet_on_n_markets_per_run: int = 1
     min_required_balance_to_operate: xDai | None = xdai_type(1)
     min_balance_to_keep_in_native_currency: xDai | None = xdai_type(0.1)
+    allow_invalid_questions: bool = False
     def __init__(
         self,
@@ -403,6 +405,9 @@ class DeployableTraderAgent(DeployableAgent):
         if not is_predictable_binary(market.question):
             return False
+        if not self.allow_invalid_questions and is_invalid(market.question):
+            return False
         return True
     def answer_binary_market(self, market: AgentMarket) -> ProbabilisticAnswer | None:

prediction_market_agent_tooling/markets/metaculus/data_models.py CHANGED Viewed

@@ -3,93 +3,100 @@ from typing import Any
 from pydantic import BaseModel
+from prediction_market_agent_tooling.gtypes import Probability
 from prediction_market_agent_tooling.tools.utils import DatetimeUTC
 class QuestionType(str, Enum):
-    forecast = "forecast"
-    notebook = "notebook"
-    discussion = "discussion"
-    claim = "claim"
-    group = "group"
-    conditional_group = "conditional_group"
-    multiple_choice = "multiple_choice"
+    binary = "binary"
-class CommunityPrediction(BaseModel):
-    y: list[float]
-    q1: float | None = None
-    q2: float | None = None
-    q3: float | None = None
+class AggregationItem(BaseModel):
+    start_time: DatetimeUTC
+    end_time: DatetimeUTC | None
+    forecast_values: list[float] | None
+    forecaster_count: int
+    interval_lower_bounds: list[float] | None
+    centers: list[float] | None
+    interval_upper_bounds: list[float] | None
+    means: list[float] | None
+    histogram: list[float] | None
-    @property
-    def p_yes(self) -> float:
-        """
-        q2 corresponds to the median, or 'second quartile' of the distribution.
-        If no value is provided (i.e. the question is new and has not been
-        answered yet), we default to 0.5.
-        """
-        return self.q2 if self.q2 is not None else 0.5
+class Aggregation(BaseModel):
+    history: list[AggregationItem]
+    latest: AggregationItem | None
+    score_data: dict[str, Any]
-class Prediction(BaseModel):
-    t: DatetimeUTC
-    x: float
+class Aggregations(BaseModel):
+    recency_weighted: Aggregation
+    unweighted: Aggregation
+    single_aggregation: Aggregation
+    metaculus_prediction: Aggregation
-class UserPredictions(BaseModel):
-    id: int
-    predictions: list[Prediction]
-    points_won: float | None = None
-    user: int
-    username: str
-    question: int
+class MyForecast(BaseModel):
+    start_time: DatetimeUTC
+    end_time: DatetimeUTC | None
+    forecast_values: list[float] | None
+    interval_lower_bounds: list[float] | None
+    centers: list[float] | None
+    interval_upper_bounds: list[float] | None
-class CommunityPredictionStats(BaseModel):
-    full: CommunityPrediction
-    unweighted: CommunityPrediction
+class MyAggregation(BaseModel):
+    history: list[MyForecast]
+    latest: MyForecast | None
+    score_data: dict[str, Any]
-class MetaculusQuestion(BaseModel):
-    """
-    https://www.metaculus.com/api2/schema/redoc/#tag/questions/operation/questions_retrieve
-    """
+class Question(BaseModel):
+    aggregations: Aggregations
+    my_forecasts: MyAggregation
+    type: QuestionType
+    possibilities: dict[str, str] | None
-    active_state: Any
-    url: str
-    page_url: str
+class MetaculusQuestion(BaseModel):
     id: int
-    author: int
-    author_name: str
     author_id: int
+    author_username: str
     title: str
-    title_short: str
-    group_label: str | None = None
-    resolution: int | None
-    resolved_option: int | None
-    created_time: DatetimeUTC
-    publish_time: DatetimeUTC | None = None
-    close_time: DatetimeUTC | None = None
-    effected_close_time: DatetimeUTC | None
-    resolve_time: DatetimeUTC | None = None
-    possibilities: dict[Any, Any] | None = None
-    scoring: dict[Any, Any] = {}
-    type: QuestionType | None = None
-    user_perms: Any
-    weekly_movement: float | None
-    weekly_movement_direction: int | None = None
-    cp_reveal_time: DatetimeUTC | None = None
-    edited_time: DatetimeUTC
-    last_activity_time: DatetimeUTC
-    activity: float
+    created_at: DatetimeUTC
+    published_at: DatetimeUTC
+    scheduled_close_time: DatetimeUTC
+    scheduled_resolve_time: DatetimeUTC
+    user_permission: str
     comment_count: int
-    votes: int
-    community_prediction: CommunityPredictionStats
-    my_predictions: UserPredictions | None = None
+    question: Question
     # TODO add the rest of the fields https://github.com/gnosis/prediction-market-agent-tooling/issues/301
+    @property
+    def page_url(self) -> str:
+        return f"https://www.metaculus.com/questions/{self.id}/"
+    @property
+    def p_yes(self) -> Probability:
+        if self.question.type != QuestionType.binary:
+            raise ValueError(f"Only binary markets can have p_yes.")
+        if (
+            self.question.aggregations.recency_weighted is None
+            or self.question.aggregations.recency_weighted.latest is None
+            or self.question.aggregations.recency_weighted.latest.forecast_values
+            is None
+        ):
+            # If no value is provided (i.e. the question is new and has not been answered yet), we default to 0.5.
+            return Probability(0.5)
+        if len(self.question.aggregations.recency_weighted.latest.forecast_values) != 2:
+            raise ValueError(
+                f"Invalid logic, assumed that binary markets will have two forecasts, got: {self.question.aggregations.recency_weighted.latest.forecast_values}"
+            )
+        # Experimentally figured out that they store "Yes" at index 1.
+        return Probability(
+            self.question.aggregations.recency_weighted.latest.forecast_values[1]
+        )
 class MetaculusQuestions(BaseModel):
     next: str | None

prediction_market_agent_tooling/markets/metaculus/metaculus.py CHANGED Viewed

@@ -37,13 +37,12 @@ class MetaculusAgentMarket(AgentMarket):
             question=model.title,
             outcomes=[],
             resolution=None,
-            current_p_yes=Probability(model.community_prediction.full.p_yes),
-            created_time=model.created_time,
-            close_time=model.close_time,
-            url=model.url,
+            current_p_yes=model.p_yes,
+            created_time=model.created_at,
+            close_time=model.scheduled_close_time,
+            url=model.page_url,
             volume=None,
-            have_predicted=model.my_predictions is not None
-            and len(model.my_predictions.predictions) > 0,
+            have_predicted=model.question.my_forecasts.latest is not None,
             outcome_token_pool=None,
         )

prediction_market_agent_tooling/tools/is_invalid.py ADDED Viewed

@@ -0,0 +1,92 @@
+import tenacity
+from prediction_market_agent_tooling.config import APIKeys
+from prediction_market_agent_tooling.loggers import logger
+from prediction_market_agent_tooling.tools.cache import persistent_inmemory_cache
+from prediction_market_agent_tooling.tools.is_predictable import (
+    parse_decision_yes_no_completion,
+)
+from prediction_market_agent_tooling.tools.langfuse_ import (
+    get_langfuse_langchain_config,
+    observe,
+)
+from prediction_market_agent_tooling.tools.utils import (
+    LLM_SEED,
+    LLM_SUPER_LOW_TEMPERATURE,
+)
+# I tried to make it return a JSON, but it didn't work well in combo with asking it to do chain of thought.
+# Rules are almost copy-pasted from https://cdn.kleros.link/ipfs/QmZM12kkguXFk2C94ykrKpambt4iUVKsVsxGxDEdLS68ws/omen-rules.pdf,
+# with some small prompting mods and I removed the point about "The outcome of the market must be known by its Resolution Date.", because that can not be verified before-hand.
+# and also point about "in which none of the answers are valid will resolve as invalid" and "in which multiple answers are valid will resolve as invalid.", because before hand we can not know if one of the outcomes happened or not.
+QUESTION_IS_INVALID_PROMPT = """Main signs about an invalid question (sometimes referred to as a "market"):
+- The market's question is about immoral violence, death or assassination.
+- The violent event can be caused by a single conscious being.
+- The violent event is done illegally.
+- The market should not directly incentivize immoral violent (such as murder, rape or unjust imprisonment) actions which could likely be performed by any participant.
+- Invalid: Will Donald Trump be alive on the 01/12/2021? (Anyone could bet on “No” and kill him for a guaranteed profit. Anyone could bet on “Yes” to effectively put a bounty on his head).
+- Invalid: Will Hera be a victim of swatting in 2020? (Anyone could falsely call the emergency services on him in order to win the bet)
+- This does not prevent markets:
+  - Whose topics are violent events not caused by conscious beings.
+  - Valid: How many people will die from COVID19 in 2020? (Viruses don’t use prediction markets).
+  - Whose main source of uncertainty is not related to a potential violent action.
+  - Valid: Will Trump win the 2020 US presidential election? (The main source of uncertainty is the vote of US citizens, not a potential murder of a presidential candidate).
+  - Which could give an incentive only to specific participants to commit an immoral violent action, but are in practice unlikely.
+  - Valid: Will the US be engaged in a military conflict with a UN member state in 2021? (It’s unlikely for the US to declare war in order to win a bet on this market).
+  - Valid: Will Derek Chauvin go to jail for the murder of George Flyod? (It’s unlikely that the jurors would collude to make a wrong verdict in order to win this market).
+- Questions with relative dates will resolve as invalid. Dates must be stated in absolute terms, not relative depending on the current time.
+- Invalid: Who will be the president of the United States in 6 months? (“in 6 months depends on the current time”).
+- Questions about moral values and not facts will be resolved as invalid.
+- Invalid: “Is it ethical to eat meat?”.
+Follow a chain of thought to evaluate if the question is invalid:
+First, write the parts of the following question:
+"{question}"
+Then, write down what is the future event of the question, what it refers to and when that event will happen if the question contains it.
+Then, explain why do you think it is or isn't invalid.
+Finally, write your final decision, write `decision: ` followed by either "yes it is invalid" or "no it isn't invalid" about the question. Don't write anything else after that. You must include "yes" or "no".
+"""
+@persistent_inmemory_cache
+@tenacity.retry(stop=tenacity.stop_after_attempt(3), wait=tenacity.wait_fixed(1))
+@observe()
+def is_invalid(
+    question: str,
+    engine: str = "gpt-4o",
+    temperature: float = LLM_SUPER_LOW_TEMPERATURE,
+    seed: int = LLM_SEED,
+    prompt_template: str = QUESTION_IS_INVALID_PROMPT,
+    max_tokens: int = 1024,
+) -> bool:
+    """
+    Evaluate if the question is actually answerable.
+    """
+    try:
+        from langchain.prompts import ChatPromptTemplate
+        from langchain_openai import ChatOpenAI
+    except ImportError:
+        logger.error("langchain not installed, skipping is_invalid")
+        return True
+    llm = ChatOpenAI(
+        model=engine,
+        temperature=temperature,
+        seed=seed,
+        api_key=APIKeys().openai_api_key_secretstr_v1,
+    )
+    prompt = ChatPromptTemplate.from_template(template=prompt_template)
+    messages = prompt.format_messages(question=question)
+    completion = str(
+        llm.invoke(
+            messages, max_tokens=max_tokens, config=get_langfuse_langchain_config()
+        ).content
+    )
+    return parse_decision_yes_no_completion(question, completion)

prediction_market_agent_tooling/tools/utils.py CHANGED Viewed

@@ -22,8 +22,10 @@ from prediction_market_agent_tooling.loggers import logger
 T = TypeVar("T")
 # t=0 is mathematically impossible and it's not clear how OpenAI (and others) handle it, as a result, even with t=0, gpt-4-turbo produces very different outputs,
-# it seems that using a very low temperature is the best way to have as consistent outputs as possible: https://community.openai.com/t/why-the-api-output-is-inconsistent-even-after-the-temperature-is-set-to-0/329541/12
+# see this experiment to figure out if you should use LLM_SUPER_LOW_TEMPERATURE or just 0: https://github.com/gnosis/prediction-market-agent/pull/438.
 LLM_SUPER_LOW_TEMPERATURE = 0.00000001
+# For consistent results, also include seed for models that supports it.
+LLM_SEED = 0
 def check_not_none(

{prediction_market_agent_tooling-0.51.0.dist-info → prediction_market_agent_tooling-0.51.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: prediction-market-agent-tooling
-Version: 0.51.0
+Version: 0.51.1
 Summary: Tools to benchmark, deploy and monitor prediction market agents.
 Author: Gnosis
 Requires-Python: >=3.10,<3.12

{prediction_market_agent_tooling-0.51.0.dist-info → prediction_market_agent_tooling-0.51.1.dist-info}/RECORD RENAMED Viewed

@@ -17,7 +17,7 @@ prediction_market_agent_tooling/benchmark/agents.py,sha256=B1-uWdyeN4GGKMWGK_-Cc
 prediction_market_agent_tooling/benchmark/benchmark.py,sha256=MqTiaaJ3cYiOLUVR7OyImLWxcEya3Rl5JyFYW-K0lwM,17097
 prediction_market_agent_tooling/benchmark/utils.py,sha256=D0MfUkVZllmvcU0VOurk9tcKT7JTtwwOp-63zuCBVuc,2880
 prediction_market_agent_tooling/config.py,sha256=WC30Nr16RGueTafA9i67OIB-6KDHZRryhiLPzebg9_I,6740
-prediction_market_agent_tooling/deploy/agent.py,sha256=c9ovjd5UUk1Qw64Si7q0OO7SvM6I6Wne1fySpu0oWco,22005
+prediction_market_agent_tooling/deploy/agent.py,sha256=wGrjwWIh27xPLhA1fFzR1ytq-tT_9TmeJIjLzEZkB5E,22222
 prediction_market_agent_tooling/deploy/agent_example.py,sha256=dIIdZashExWk9tOdyDjw87AuUcGyM7jYxNChYrVK2dM,1001
 prediction_market_agent_tooling/deploy/betting_strategy.py,sha256=cOPznMX0jd380qHw06A-l1XUyoicV54AXBghirtPw0Q,12127
 prediction_market_agent_tooling/deploy/constants.py,sha256=M5ty8URipYMGe_G-RzxRydK3AFL6CyvmqCraJUrLBnE,82
@@ -40,8 +40,8 @@ prediction_market_agent_tooling/markets/manifold/manifold.py,sha256=JEEIPz9-U3av
 prediction_market_agent_tooling/markets/manifold/utils.py,sha256=cPPFWXm3vCYH1jy7_ctJZuQH9ZDaPL4_AgAYzGWkoow,513
 prediction_market_agent_tooling/markets/markets.py,sha256=_3nV9QTT48G2oJ2egkuWA1UzrTOGY6x3mXqIRgDaVIo,3245
 prediction_market_agent_tooling/markets/metaculus/api.py,sha256=4TRPGytQQbSdf42DCg2M_JWYPAuNjqZ3eBqaQBLkNks,2736
-prediction_market_agent_tooling/markets/metaculus/data_models.py,sha256=PIRN2FAQ32vNj78JRZPB1pXK61B0w2RBJvJSJ7dvvrg,2514
-prediction_market_agent_tooling/markets/metaculus/metaculus.py,sha256=S-XpK6Ij5AZgGMFMdXXcHm7hH2o8BRU-a_XqaW4xC54,3674
+prediction_market_agent_tooling/markets/metaculus/data_models.py,sha256=2wDZ0BmK9O5Lud-q-FCzgW0tsK9GxMU0rUMlcPxSS04,3184
+prediction_market_agent_tooling/markets/metaculus/metaculus.py,sha256=ctRcGm1G8qmUB5RMPoQ_C_GN3Ct24BWDB1gJyOhH_vE,3604
 prediction_market_agent_tooling/markets/omen/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 prediction_market_agent_tooling/markets/omen/data_models.py,sha256=yYPHpZM4fthC1EhyXkXC8iyDQ1ITNBBY9nOHeKD6XSw,27283
 prediction_market_agent_tooling/markets/omen/omen.py,sha256=6tBhn7qxtsdrk0xgDSqWy44f_m1qmhfXjvQUWXtW3TI,47974
@@ -78,6 +78,7 @@ prediction_market_agent_tooling/tools/httpx_cached_client.py,sha256=0-N1r0zcGKlY
 prediction_market_agent_tooling/tools/image_gen/image_gen.py,sha256=HzRwBx62hOXBOmrtpkXaP9Qq1Ku03uUGdREocyjLQ_k,1266
 prediction_market_agent_tooling/tools/image_gen/market_thumbnail_gen.py,sha256=8A3U2uxsCsOfLjru-6R_PPIAuiKY4qFkWp_GSBPV6-s,1280
 prediction_market_agent_tooling/tools/ipfs/ipfs_handler.py,sha256=CTTMfTvs_8PH4kAtlQby2aeEKwgpmxtuGbd4oYIdJ2A,1201
+prediction_market_agent_tooling/tools/is_invalid.py,sha256=Lc5fWB4fmx7tFvRakmUOzo0Oq5EizorddZ2xjesEopY,4984
 prediction_market_agent_tooling/tools/is_predictable.py,sha256=NIoR2bTNMmADcyNY2aKNMWkiDw7Z_9kZMcFXEdyewy4,6771
 prediction_market_agent_tooling/tools/langfuse_.py,sha256=jI_4ROxqo41CCnWGS1vN_AeDVhRzLMaQLxH3kxDu3L8,1153
 prediction_market_agent_tooling/tools/langfuse_client_utils.py,sha256=7H9EzTA_q5TmuqoIeMpdVU2efF_RyttQEoTPLyS-ld4,5579
@@ -88,10 +89,10 @@ prediction_market_agent_tooling/tools/singleton.py,sha256=CiIELUiI-OeS7U7eeHEt0r
 prediction_market_agent_tooling/tools/streamlit_user_login.py,sha256=NXEqfjT9Lc9QtliwSGRASIz1opjQ7Btme43H4qJbzgE,3010
 prediction_market_agent_tooling/tools/tavily_storage/tavily_models.py,sha256=99S7w8BvnJRMOnUArGN0g4GVRoG8M0C-XyIFU8HnLn0,6374
 prediction_market_agent_tooling/tools/tavily_storage/tavily_storage.py,sha256=xrtQH9v5pXycBRyc5j45pWqkSffkoc9efNIU1_G633Q,3706
-prediction_market_agent_tooling/tools/utils.py,sha256=JZj_xM4VbsDSiDlbn2lZFZPEOBSYhqqxd1Y2NpCqnJ4,7117
+prediction_market_agent_tooling/tools/utils.py,sha256=PGmh-9aeEJe_YcHVIiHMLybx31ppng7tVBlMaIfnWy8,7135
 prediction_market_agent_tooling/tools/web3_utils.py,sha256=dkcjG-LtuaWRh7WEMzRGmZ5B5rsxZTlliFOI6fj-EJ8,11842
-prediction_market_agent_tooling-0.51.0.dist-info/LICENSE,sha256=6or154nLLU6bELzjh0mCreFjt0m2v72zLi3yHE0QbeE,7650
-prediction_market_agent_tooling-0.51.0.dist-info/METADATA,sha256=G5RtTj92IHwgLBO6VKf6xCz4WWva0BNo77Xhbfl2lVs,8056
-prediction_market_agent_tooling-0.51.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-prediction_market_agent_tooling-0.51.0.dist-info/entry_points.txt,sha256=m8PukHbeH5g0IAAmOf_1Ahm-sGAMdhSSRQmwtpmi2s8,81
-prediction_market_agent_tooling-0.51.0.dist-info/RECORD,,
+prediction_market_agent_tooling-0.51.1.dist-info/LICENSE,sha256=6or154nLLU6bELzjh0mCreFjt0m2v72zLi3yHE0QbeE,7650
+prediction_market_agent_tooling-0.51.1.dist-info/METADATA,sha256=rGMwRfY6ulZ8GTMKjPP7EWN1nrwTxxazh4aj4vf7Qoc,8056
+prediction_market_agent_tooling-0.51.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+prediction_market_agent_tooling-0.51.1.dist-info/entry_points.txt,sha256=m8PukHbeH5g0IAAmOf_1Ahm-sGAMdhSSRQmwtpmi2s8,81
+prediction_market_agent_tooling-0.51.1.dist-info/RECORD,,

{prediction_market_agent_tooling-0.51.0.dist-info → prediction_market_agent_tooling-0.51.1.dist-info}/LICENSE RENAMED Viewed

File without changes

{prediction_market_agent_tooling-0.51.0.dist-info → prediction_market_agent_tooling-0.51.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{prediction_market_agent_tooling-0.51.0.dist-info → prediction_market_agent_tooling-0.51.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

prediction-market-agent-tooling 0.51.0__py3-none-any.whl → 0.51.1__py3-none-any.whl

prediction-market-agent-tooling 0.51.0py3-none-any.whl → 0.51.1py3-none-any.whl