certora-cli-beta-mirror 8.2.0__py3-none-macosx_10_9_universal2.whl → 8.2.1__py3-none-macosx_10_9_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- certora_cli/Mutate/mutateValidate.py +12 -4
- {certora_cli_beta_mirror-8.2.0.dist-info → certora_cli_beta_mirror-8.2.1.dist-info}/METADATA +2 -2
- {certora_cli_beta_mirror-8.2.0.dist-info → certora_cli_beta_mirror-8.2.1.dist-info}/RECORD +10 -11
- certora_jars/ASTExtraction.jar +0 -0
- certora_jars/CERTORA-CLI-VERSION-METADATA.json +1 -1
- certora_jars/Typechecker.jar +0 -0
- certora_cli/CertoraProver/concordance.py +0 -939
- {certora_cli_beta_mirror-8.2.0.dist-info → certora_cli_beta_mirror-8.2.1.dist-info}/LICENSE +0 -0
- {certora_cli_beta_mirror-8.2.0.dist-info → certora_cli_beta_mirror-8.2.1.dist-info}/WHEEL +0 -0
- {certora_cli_beta_mirror-8.2.0.dist-info → certora_cli_beta_mirror-8.2.1.dist-info}/entry_points.txt +0 -0
- {certora_cli_beta_mirror-8.2.0.dist-info → certora_cli_beta_mirror-8.2.1.dist-info}/top_level.txt +0 -0
|
@@ -69,10 +69,18 @@ class MutateValidator:
|
|
|
69
69
|
except Exception as e:
|
|
70
70
|
raise Util.CertoraUserInputError(f"Invalid file_to_mutate in manual mutant: {mutant[Constants.FILE_TO_MUTATE]}", e)
|
|
71
71
|
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
72
|
+
mutants_location = mutant[Constants.MUTANTS_LOCATION]
|
|
73
|
+
if Path(mutants_location).is_dir():
|
|
74
|
+
try:
|
|
75
|
+
Vf.validate_dir(mutants_location)
|
|
76
|
+
except Exception as e:
|
|
77
|
+
raise Util.CertoraUserInputError(f"Invalid directory for mutants location {mutants_location}",
|
|
78
|
+
e)
|
|
79
|
+
else:
|
|
80
|
+
try:
|
|
81
|
+
Vf.validate_readable_file(mutants_location, Util.SOL_EXT)
|
|
82
|
+
except Exception as e:
|
|
83
|
+
raise Util.CertoraUserInputError(f"Invalid file for mutants location {mutants_location}", e)
|
|
76
84
|
|
|
77
85
|
def mutation_attribute_in_prover(self) -> None:
|
|
78
86
|
gambit_attrs = ['filename', 'contract', 'functions', 'seed', 'num_mutants']
|
{certora_cli_beta_mirror-8.2.0.dist-info → certora_cli_beta_mirror-8.2.1.dist-info}/METADATA
RENAMED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: certora-cli-beta-mirror
|
|
3
|
-
Version: 8.2.
|
|
3
|
+
Version: 8.2.1
|
|
4
4
|
Summary: Runner for the Certora Prover
|
|
5
5
|
Home-page: https://pypi.org/project/certora-cli-beta-mirror
|
|
6
6
|
Author: Certora
|
|
@@ -39,4 +39,4 @@ Dynamic: requires-dist
|
|
|
39
39
|
Dynamic: requires-python
|
|
40
40
|
Dynamic: summary
|
|
41
41
|
|
|
42
|
-
Commit
|
|
42
|
+
Commit a27f12a. Build and Run scripts for executing the Certora Prover on Solidity smart contracts.
|
|
@@ -37,7 +37,6 @@ certora_cli/CertoraProver/certoraProjectScanner.py,sha256=jT7FeWzcy8o83LrZRwsg_L
|
|
|
37
37
|
certora_cli/CertoraProver/certoraSourceFinders.py,sha256=qwJtwrQq3NUNYmdmn1UmANN4lmJFIUh4M-St2x1FJ2Y,19038
|
|
38
38
|
certora_cli/CertoraProver/certoraType.py,sha256=inwaLkMVwtJnwkyQhDJs-wRxoyytu2Xa_BJ5MdGlZqY,29737
|
|
39
39
|
certora_cli/CertoraProver/certoraVerifyGenerator.py,sha256=YMuzGj2RNOnADOx8UnV2ys1ptw_-2mermgC9ZLMWceo,11052
|
|
40
|
-
certora_cli/CertoraProver/concordance.py,sha256=CCDN7hT9rnlU-ZKJ7hVxHR2fS8ZXwKVdE59jtMTtY0M,38666
|
|
41
40
|
certora_cli/CertoraProver/erc7201.py,sha256=BME5kBZsDx6lgqLn7EE91I1cEOZtsnZ8BlRVF62eEBE,1660
|
|
42
41
|
certora_cli/CertoraProver/splitRules.py,sha256=dNhy05ShB_-rWYTnJH5m-Xc5A4HGStAvwLRs1BTu1GA,7627
|
|
43
42
|
certora_cli/CertoraProver/storageExtension.py,sha256=nrCrbH8ne-yCYSDFzh3J9A7Q6h96WxhEfLbfxGSUCSc,14363
|
|
@@ -61,7 +60,7 @@ certora_cli/Mutate/mutateApp.py,sha256=UY4TWn9f318b2upVAFUL0cqcgzlyTDZ5XeNW3Mjof
|
|
|
61
60
|
certora_cli/Mutate/mutateAttributes.py,sha256=2onGaPmztwmHg5V_X7BUG4HcQCThhqYzGYKBy695Izc,10587
|
|
62
61
|
certora_cli/Mutate/mutateConstants.py,sha256=LRrz3wMM8WpPYSshkc-PLYqT0nexcWQeBNsehip-LOE,3945
|
|
63
62
|
certora_cli/Mutate/mutateUtil.py,sha256=B7MCIFtZBetjR4MMxU6F5ikYsaot1wTG7XYMjgVXl4k,2287
|
|
64
|
-
certora_cli/Mutate/mutateValidate.py,sha256=
|
|
63
|
+
certora_cli/Mutate/mutateValidate.py,sha256=6DRYfnE-HCMvMxbb0v1dx2BTSd-x8YeFRVp35KplVPc,9760
|
|
65
64
|
certora_cli/Shared/ExpectedComparator.py,sha256=eyRR-jni4WJoa6j2TK2lnZ89Tyb8U99wT2PNdu4se8w,18457
|
|
66
65
|
certora_cli/Shared/__init__.py,sha256=s0dhvolFtsS4sRNzPVhC_rlw8mm194rCZ0WhOxInY40,1025
|
|
67
66
|
certora_cli/Shared/certoraAttrUtil.py,sha256=Nw8ban5Axp6c6dT-KJfCD9i9tKnGk1DbvRDDNH3--DU,8574
|
|
@@ -69,13 +68,13 @@ certora_cli/Shared/certoraLogging.py,sha256=cV2UQMhQ5j8crGXgeq9CEamI-Lk4HgdiA3HC
|
|
|
69
68
|
certora_cli/Shared/certoraUtils.py,sha256=buUE95PnbpDEbYEZss_jcP7pWCRJhXnfV-eOA6-zvYM,58407
|
|
70
69
|
certora_cli/Shared/certoraValidateFuncs.py,sha256=BPLuVsS3yAcYIuCvkXtDuFQKf2qaT74TIddB0lM84yM,42508
|
|
71
70
|
certora_cli/Shared/proverCommon.py,sha256=uZkl9PDLPj81kKRnBnlPUmvhMZovNP25_74No_7jaQ4,11215
|
|
72
|
-
certora_jars/ASTExtraction.jar,sha256=
|
|
73
|
-
certora_jars/CERTORA-CLI-VERSION-METADATA.json,sha256=
|
|
74
|
-
certora_jars/Typechecker.jar,sha256
|
|
71
|
+
certora_jars/ASTExtraction.jar,sha256=tRIhYz0x7agtytWLH0ta8zZMR6maGEzM13fOMrRBMMw,17614868
|
|
72
|
+
certora_jars/CERTORA-CLI-VERSION-METADATA.json,sha256=RHCGld9OX7ZGd559dhshflPnahIJShAHjF8GAaAzl1I,143
|
|
73
|
+
certora_jars/Typechecker.jar,sha256=u4KLY6doS3MNtQSPhF6BGgim7RViZ5jfXjSBMC7XVAs,17577025
|
|
75
74
|
certora_jars/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
76
|
-
certora_cli_beta_mirror-8.2.
|
|
77
|
-
certora_cli_beta_mirror-8.2.
|
|
78
|
-
certora_cli_beta_mirror-8.2.
|
|
79
|
-
certora_cli_beta_mirror-8.2.
|
|
80
|
-
certora_cli_beta_mirror-8.2.
|
|
81
|
-
certora_cli_beta_mirror-8.2.
|
|
75
|
+
certora_cli_beta_mirror-8.2.1.dist-info/LICENSE,sha256=UGKSKIJSetF8m906JLKqNLkUS2CL60XfQdNvxBvpQXo,620
|
|
76
|
+
certora_cli_beta_mirror-8.2.1.dist-info/METADATA,sha256=sqpw-Q34_jE5LItMjWt-BRiLsYxn7-eKSj9Wgv_2lBs,1286
|
|
77
|
+
certora_cli_beta_mirror-8.2.1.dist-info/WHEEL,sha256=9Ig2YBzm5cpS_YWKLeuYxVAxcKv_uDQsCzy9XJbRZ_g,110
|
|
78
|
+
certora_cli_beta_mirror-8.2.1.dist-info/entry_points.txt,sha256=ClZiFkCYDdK25_ufxZvnE2Rx_kNk1_4vj7KpgYUKxGM,509
|
|
79
|
+
certora_cli_beta_mirror-8.2.1.dist-info/top_level.txt,sha256=8C77w3JLanY0-NW45vpJsjRssyCqVP-qmPiN9FjWiX4,38
|
|
80
|
+
certora_cli_beta_mirror-8.2.1.dist-info/RECORD,,
|
certora_jars/ASTExtraction.jar
CHANGED
|
Binary file
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"name": "certora-cli-beta-mirror", "tag": "8.2.
|
|
1
|
+
{"name": "certora-cli-beta-mirror", "tag": "8.2.1", "branch": "", "commit": "a27f12a", "timestamp": "20250828.9.26.267405", "version": "8.2.1"}
|
certora_jars/Typechecker.jar
CHANGED
|
Binary file
|
|
@@ -1,939 +0,0 @@
|
|
|
1
|
-
# The Certora Prover
|
|
2
|
-
# Copyright (C) 2025 Certora Ltd.
|
|
3
|
-
#
|
|
4
|
-
# This program is free software: you can redistribute it and/or modify
|
|
5
|
-
# it under the terms of the GNU General Public License as published by
|
|
6
|
-
# the Free Software Foundation, version 3 of the License.
|
|
7
|
-
#
|
|
8
|
-
# This program is distributed in the hope that it will be useful,
|
|
9
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
10
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
11
|
-
# GNU General Public License for more details.
|
|
12
|
-
#
|
|
13
|
-
# You should have received a copy of the GNU General Public License
|
|
14
|
-
# along with this program. If not, see <https://www.gnu.org/licenses/>.
|
|
15
|
-
|
|
16
|
-
# ============================================================================
|
|
17
|
-
# IMPORTS
|
|
18
|
-
# ============================================================================
|
|
19
|
-
|
|
20
|
-
from langchain_anthropic import ChatAnthropic
|
|
21
|
-
from typing import Optional, List, TypedDict, Annotated, Literal, Required, TypeVar, Type, Protocol, Union, Any
|
|
22
|
-
from langchain_core.messages import ToolMessage, AnyMessage, SystemMessage, HumanMessage, BaseMessage
|
|
23
|
-
from langchain_core.tools import tool, InjectedToolCallId, BaseTool
|
|
24
|
-
from langchain_core.language_models.base import LanguageModelInput
|
|
25
|
-
from langchain_core.language_models.chat_models import BaseChatModel
|
|
26
|
-
from langchain_core.runnables import Runnable, RunnableConfig
|
|
27
|
-
from langgraph.graph import StateGraph, START, MessagesState
|
|
28
|
-
from langgraph.graph.state import CompiledStateGraph
|
|
29
|
-
from langgraph._internal._typing import StateLike
|
|
30
|
-
from langgraph.graph.message import add_messages
|
|
31
|
-
from langgraph.types import Command, interrupt
|
|
32
|
-
from langgraph.prebuilt import ToolNode
|
|
33
|
-
from langgraph.checkpoint.memory import MemorySaver
|
|
34
|
-
from pydantic import BaseModel, Field
|
|
35
|
-
import os
|
|
36
|
-
import tempfile
|
|
37
|
-
import json
|
|
38
|
-
import subprocess
|
|
39
|
-
import sys
|
|
40
|
-
import logging
|
|
41
|
-
import argparse
|
|
42
|
-
|
|
43
|
-
# ============================================================================
|
|
44
|
-
# LOGGING SETUP
|
|
45
|
-
# ============================================================================
|
|
46
|
-
|
|
47
|
-
logger = logging.getLogger("concordance")
|
|
48
|
-
response_logger = logger.getChild("response")
|
|
49
|
-
tool_logger = logger.getChild("tools")
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
# ============================================================================
|
|
53
|
-
# SHARED UTILITIES
|
|
54
|
-
# ============================================================================
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
class GraphInput(TypedDict):
|
|
58
|
-
code_input: str
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
class WithToolCallId(BaseModel):
|
|
62
|
-
tool_call_id: Annotated[str, InjectedToolCallId]
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
def tool_return(
|
|
66
|
-
tool_call_id: str,
|
|
67
|
-
content: str
|
|
68
|
-
) -> Command:
|
|
69
|
-
"""
|
|
70
|
-
Create a LangGraph Command for tool responses that need to continue processing.
|
|
71
|
-
|
|
72
|
-
Used by tools that want to return a result and continue the workflow by routing
|
|
73
|
-
back to the tool_result node for LLM processing.
|
|
74
|
-
|
|
75
|
-
Args:
|
|
76
|
-
tool_call_id: The ID of the tool call being responded to
|
|
77
|
-
content: The response content from the tool execution
|
|
78
|
-
|
|
79
|
-
Returns:
|
|
80
|
-
Command that updates messages and continues workflow
|
|
81
|
-
"""
|
|
82
|
-
return Command(
|
|
83
|
-
update={
|
|
84
|
-
"messages": [ToolMessage(tool_call_id=tool_call_id, content=content)]
|
|
85
|
-
}
|
|
86
|
-
)
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
def tool_output(tool_call_id: str, res: dict) -> Command:
|
|
90
|
-
"""
|
|
91
|
-
Create a LangGraph Command for final tool outputs that update workflow state.
|
|
92
|
-
|
|
93
|
-
Used by completion tools (like harness_output, rewrite_output) to set final
|
|
94
|
-
results in the workflow state. The workflow's conditional edge will detect
|
|
95
|
-
these state updates and route to completion.
|
|
96
|
-
|
|
97
|
-
Args:
|
|
98
|
-
tool_call_id: The ID of the tool call being responded to
|
|
99
|
-
res: Dictionary containing the final workflow results to merge into state
|
|
100
|
-
|
|
101
|
-
Returns:
|
|
102
|
-
Command that updates state with final results and a success message
|
|
103
|
-
"""
|
|
104
|
-
return Command(update={
|
|
105
|
-
**res,
|
|
106
|
-
"messages": [ToolMessage(
|
|
107
|
-
tool_call_id=tool_call_id,
|
|
108
|
-
content="Success"
|
|
109
|
-
)]
|
|
110
|
-
})
|
|
111
|
-
|
|
112
|
-
def pretty_print_messages(messages: list[AnyMessage]) -> str:
|
|
113
|
-
"""Format a list of AnyMessage objects for readable debug output."""
|
|
114
|
-
formatted_lines = []
|
|
115
|
-
for i, msg in enumerate(messages):
|
|
116
|
-
msg_type = type(msg).__name__
|
|
117
|
-
|
|
118
|
-
# Get message role if available
|
|
119
|
-
role = getattr(msg, 'role', 'unknown')
|
|
120
|
-
|
|
121
|
-
role = getattr(msg, 'type', 'unknown')
|
|
122
|
-
# Get content preview (handle both string and list content)
|
|
123
|
-
if hasattr(msg, 'content') and msg.content:
|
|
124
|
-
if isinstance(msg.content, list):
|
|
125
|
-
# For list content, show count and first item preview
|
|
126
|
-
content_preview = \
|
|
127
|
-
f"[{len(msg.content)} items: {str(msg.content[0])[:50] if msg.content else 'empty'}...]"
|
|
128
|
-
else:
|
|
129
|
-
content_preview = str(msg.content)[:100]
|
|
130
|
-
if len(str(msg.content)) > 100:
|
|
131
|
-
content_preview += "..."
|
|
132
|
-
else:
|
|
133
|
-
content_preview = "<empty>"
|
|
134
|
-
|
|
135
|
-
# Format tool calls if present
|
|
136
|
-
tool_info = ""
|
|
137
|
-
if tool_calls := getattr(msg, 'tool_calls', None):
|
|
138
|
-
tool_names = [tc.get('name', 'unknown') for tc in tool_calls]
|
|
139
|
-
tool_info = f" | Tools: {', '.join(tool_names)}"
|
|
140
|
-
|
|
141
|
-
formatted_lines.append(f" [{i}] {msg_type} (role: {role}): {content_preview}{tool_info}")
|
|
142
|
-
|
|
143
|
-
return "\n" + "\n".join(formatted_lines) if formatted_lines else " <no messages>"
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
class InitNodeFunction(Protocol):
|
|
147
|
-
"""Protocol defining the signature for LangGraph node functions."""
|
|
148
|
-
def __call__(self, state: GraphInput) -> dict[str, List[BaseMessage]]:
|
|
149
|
-
...
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
class ChatNodeFunction(Protocol):
|
|
153
|
-
def __call__(self, state: MessagesState) -> dict[str, List[BaseMessage]]:
|
|
154
|
-
...
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
def tool_result_generator(llm: Runnable[LanguageModelInput, BaseMessage]) -> ChatNodeFunction:
|
|
158
|
-
"""
|
|
159
|
-
Create a LangGraph node function that processes tool results by sending
|
|
160
|
-
the current message history to the LLM for the next response.
|
|
161
|
-
|
|
162
|
-
Args:
|
|
163
|
-
llm: The LLM bound with tools to invoke for generating responses
|
|
164
|
-
|
|
165
|
-
Returns:
|
|
166
|
-
A node function that takes MessagesState and returns updated messages
|
|
167
|
-
"""
|
|
168
|
-
def tool_result(state: MessagesState) -> dict[str, List[BaseMessage]]:
|
|
169
|
-
logger.debug("Tool result state messages:%s", pretty_print_messages(state["messages"]))
|
|
170
|
-
return {"messages": [llm.invoke(state["messages"])]}
|
|
171
|
-
return tool_result
|
|
172
|
-
|
|
173
|
-
def initial_node(sys_prompt: str, initial_prompt: str, llm: Runnable[LanguageModelInput, BaseMessage]) -> InitNodeFunction:
|
|
174
|
-
"""
|
|
175
|
-
Create a LangGraph node function that initializes a workflow with system and human messages,
|
|
176
|
-
then gets the first LLM response.
|
|
177
|
-
|
|
178
|
-
Args:
|
|
179
|
-
sys_prompt: System message content to set the LLM's role and context
|
|
180
|
-
initial_prompt: Human message template to start the conversation
|
|
181
|
-
llm: The LLM bound with tools to invoke for generating the initial response
|
|
182
|
-
|
|
183
|
-
Returns:
|
|
184
|
-
A node function that takes GraphInput and returns initial message history
|
|
185
|
-
"""
|
|
186
|
-
def to_return(state: GraphInput) -> dict[str, List[BaseMessage]]:
|
|
187
|
-
initial_messages : List[BaseMessage] = [
|
|
188
|
-
SystemMessage(
|
|
189
|
-
sys_prompt
|
|
190
|
-
),
|
|
191
|
-
HumanMessage(
|
|
192
|
-
content=[initial_prompt, state["code_input"]]
|
|
193
|
-
)
|
|
194
|
-
]
|
|
195
|
-
initial_messages.append(
|
|
196
|
-
llm.invoke(initial_messages)
|
|
197
|
-
)
|
|
198
|
-
return {"messages": initial_messages}
|
|
199
|
-
return to_return
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
# TypeVars for generic typing
|
|
203
|
-
StateT = TypeVar('StateT', bound=StateLike)
|
|
204
|
-
OutputT = TypeVar('OutputT', bound=StateLike)
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
def build_workflow(
|
|
208
|
-
state_class: Type[StateT],
|
|
209
|
-
tools_list: List[BaseTool],
|
|
210
|
-
sys_prompt: str,
|
|
211
|
-
initial_prompt: str,
|
|
212
|
-
output_key: str,
|
|
213
|
-
unbound_llm: BaseChatModel,
|
|
214
|
-
output_schema: Optional[Type[OutputT]] = None,
|
|
215
|
-
) -> StateGraph[StateT, None, GraphInput, OutputT]:
|
|
216
|
-
"""
|
|
217
|
-
Build a standard workflow with initial node -> tools -> tool_result pattern.
|
|
218
|
-
Uses fixed GraphInput schema and explicit LLM currying.
|
|
219
|
-
"""
|
|
220
|
-
# Node name constants
|
|
221
|
-
INITIAL_NODE = "initial"
|
|
222
|
-
TOOLS_NODE = "tools"
|
|
223
|
-
TOOL_RESULT_NODE = "tool_result"
|
|
224
|
-
|
|
225
|
-
def should_end(state: StateT) -> Literal["__end__", "tool_result"]:
|
|
226
|
-
"""Check if workflow should end based on output key being defined."""
|
|
227
|
-
assert isinstance(state, dict)
|
|
228
|
-
if state.get(output_key, None) is not None:
|
|
229
|
-
return "__end__"
|
|
230
|
-
return TOOL_RESULT_NODE
|
|
231
|
-
|
|
232
|
-
llm = unbound_llm.bind_tools(tools_list)
|
|
233
|
-
|
|
234
|
-
# Create initial node and tool node with curried LLM
|
|
235
|
-
init_node = initial_node(sys_prompt=sys_prompt, initial_prompt=initial_prompt, llm=llm)
|
|
236
|
-
tool_node = ToolNode(tools_list)
|
|
237
|
-
tool_result_node = tool_result_generator(llm)
|
|
238
|
-
|
|
239
|
-
# Build the graph with fixed input schema, no context
|
|
240
|
-
builder = StateGraph(
|
|
241
|
-
state_class,
|
|
242
|
-
input_schema=GraphInput,
|
|
243
|
-
output_schema=output_schema
|
|
244
|
-
)
|
|
245
|
-
builder.add_node(INITIAL_NODE, init_node)
|
|
246
|
-
builder.add_edge(START, INITIAL_NODE)
|
|
247
|
-
builder.add_node(TOOLS_NODE, tool_node)
|
|
248
|
-
builder.add_edge(INITIAL_NODE, TOOLS_NODE)
|
|
249
|
-
builder.add_node(TOOL_RESULT_NODE, tool_result_node)
|
|
250
|
-
builder.add_edge(TOOL_RESULT_NODE, TOOLS_NODE)
|
|
251
|
-
|
|
252
|
-
# Add conditional edge from tools
|
|
253
|
-
builder.add_conditional_edges(
|
|
254
|
-
TOOLS_NODE,
|
|
255
|
-
should_end
|
|
256
|
-
)
|
|
257
|
-
|
|
258
|
-
return builder
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
# ============================================================================
|
|
262
|
-
# SOLIDITY COMPILER TOOL (SHARED)
|
|
263
|
-
# ============================================================================
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
class SolidityCompilerInput(BaseModel):
|
|
267
|
-
"""
|
|
268
|
-
A Solidity compiler capable of compiling a single, Solidity file into EVM bytecode. The compiler
|
|
269
|
-
also performs typechecking and will flag any syntax errors. The compiler comes from the official
|
|
270
|
-
distribution channels for Solidity and understands all the Solidity language and features.
|
|
271
|
-
"""
|
|
272
|
-
compiler_version: str = \
|
|
273
|
-
Field(description=
|
|
274
|
-
"The compiler version string to use for compilation. Compiler versions are taken from the known compiler "
|
|
275
|
-
"releases (e.g., 0.8.2), but with the leading '0.' dropped (e.g., 8.2)."
|
|
276
|
-
)
|
|
277
|
-
|
|
278
|
-
source: str = Field(description="The Solidity source to be compiled")
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
@tool(args_schema=SolidityCompilerInput)
|
|
282
|
-
def solidity_compiler(source: str, compiler_version: str) -> str:
|
|
283
|
-
compiler_input = {
|
|
284
|
-
"language": "Solidity",
|
|
285
|
-
"sources": {
|
|
286
|
-
"harness.sol": {
|
|
287
|
-
"content": source
|
|
288
|
-
}
|
|
289
|
-
},
|
|
290
|
-
"settings": {
|
|
291
|
-
"outputSelection": {
|
|
292
|
-
"*": {
|
|
293
|
-
"*": []
|
|
294
|
-
}
|
|
295
|
-
}
|
|
296
|
-
}
|
|
297
|
-
}
|
|
298
|
-
compile_result = subprocess.run(
|
|
299
|
-
[f'solc{compiler_version}', "--standard-json"],
|
|
300
|
-
input=json.dumps(compiler_input),
|
|
301
|
-
text=True,
|
|
302
|
-
encoding="utf-8",
|
|
303
|
-
capture_output=True
|
|
304
|
-
)
|
|
305
|
-
res = f"Return code was: {compile_result.returncode}\nStdout:\n{compile_result.stdout}"
|
|
306
|
-
return res
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
# ============================================================================
|
|
310
|
-
# HARNESSING WORKFLOW
|
|
311
|
-
# ============================================================================
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
harness_system_prompt = """
|
|
315
|
-
You are an expert Solidity developer, with several years of experience writing smart contracts. You also
|
|
316
|
-
have a deep understanding of the EVM and how the Solidity language is ultimately compiled to the EVM bytecode.
|
|
317
|
-
This lets you understand why certain programs written in Solidity language are invalid and rejected by the
|
|
318
|
-
compiler. For example, you know that an `external` function in a `contract` cannot accept a reference type
|
|
319
|
-
marked as `storage`: you know that this is only allowed in a `library` which is always accessed with a delegatecall.
|
|
320
|
-
|
|
321
|
-
You also understand the subtleties around ABI encoding and decoding, and the translation of high-level types to
|
|
322
|
-
an ABI signature. For example, you know that a struct with two uint fields is represented in an ABI signature as
|
|
323
|
-
`(uint256,uint256)`.
|
|
324
|
-
"""
|
|
325
|
-
|
|
326
|
-
harnessing_prompt = """
|
|
327
|
-
Create an external 'harness contract' which provides a minimal way to execute the given 'internal' Solidity function
|
|
328
|
-
via an external function wrapper.
|
|
329
|
-
The external function wrapper should simply pass its arguments to the internal function, and return the result back
|
|
330
|
-
to the caller. The internal function being harnessed
|
|
331
|
-
should be included in the contract. You MAY include extra type definitions, but only if absolutely necessary for the
|
|
332
|
-
code to compile; definitions solely for documentation or explanation
|
|
333
|
-
purposes should NOT be included."
|
|
334
|
-
The external harness should be type correct and syntax correct. To ensure this, use the Solidity compiler and
|
|
335
|
-
incorporate its feedback to fix any type/syntax errors.
|
|
336
|
-
"""
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
class HarnessedOutput(TypedDict):
|
|
340
|
-
harness_definition: Optional[str]
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
class HarnessingState(TypedDict):
|
|
344
|
-
GraphInput: str
|
|
345
|
-
harness_definition: Optional[str]
|
|
346
|
-
messages: Annotated[list[AnyMessage], add_messages]
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
class HarnessOutputSchema(WithToolCallId):
|
|
350
|
-
"""
|
|
351
|
-
Used to communicate the results of harness generation, which is the minimal contract to exercise an internal
|
|
352
|
-
function, along with the ABI signature of the method which is the external entry point and the name of the contract.
|
|
353
|
-
Used only for successfully validated (type correct, syntax correct) harnesses.
|
|
354
|
-
"""
|
|
355
|
-
source_code: str = \
|
|
356
|
-
Field(description=
|
|
357
|
-
"The self-contained Solidity source code which wraps the provided internal function"
|
|
358
|
-
)
|
|
359
|
-
|
|
360
|
-
contract_name: str = \
|
|
361
|
-
Field(description=
|
|
362
|
-
"The name of the Solidity contract containing the external method that wraps the internal function"
|
|
363
|
-
)
|
|
364
|
-
|
|
365
|
-
abi_signature: str = \
|
|
366
|
-
Field(description=
|
|
367
|
-
"The ABI signature of the external function generated as the internal function wrapper. "
|
|
368
|
-
"Includes parameter types (but not return types)"
|
|
369
|
-
)
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
@tool(args_schema=HarnessOutputSchema)
|
|
373
|
-
def harness_output(source_code: str, tool_call_id: Annotated[str, InjectedToolCallId], contract_name: str, abi_signature: str) -> Command:
|
|
374
|
-
return tool_output(tool_call_id=tool_call_id, res={"harness_definition": source_code})
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
# Harness workflow setup
|
|
378
|
-
HARNESS_TOOLS = [harness_output, solidity_compiler]
|
|
379
|
-
|
|
380
|
-
# ============================================================================
|
|
381
|
-
# REWRITE WORKFLOW
|
|
382
|
-
# ============================================================================
|
|
383
|
-
|
|
384
|
-
simplification_system_prompt = """
|
|
385
|
-
You are an expert Solidity developer with several years of experience writing smart contracts and
|
|
386
|
-
optimizing them for gas costs. You know all about low-level assembly and how to use it to directly
|
|
387
|
-
access EVM internal representations to optimize execution. This means you are also familiar with how
|
|
388
|
-
the EVM works on a deep, fundamental level. You are also intimately familiar with how Solidity
|
|
389
|
-
lays out memory, and its allocation pattern. Among other facts, you know that it uses a bump allocator
|
|
390
|
-
with a free pointer whose value resides in memory at slot 0x40. You also know that memory in the range 0x0 - 0x40
|
|
391
|
-
is considered "scratch" and is freely usable, as is all memory after the current value of the free pointer. You
|
|
392
|
-
know that arrays are allocated to include an extra word at the beginning of the allocated block to hold the length
|
|
393
|
-
of the memory, followed by elements of the array. `bytes` and `string` arrays pack their elements tightly (1 byte
|
|
394
|
-
per element), whereas all other arrays use 32 bytes per element.
|
|
395
|
-
|
|
396
|
-
You also hold a PhD in static analysis, and are an expert in the field of points to analyses and memory
|
|
397
|
-
safety analyses. You help maintain a static analysis which attempts to recover the pointer relationships
|
|
398
|
-
between stack values and memory locations in compiled EVM bytecode. For soundness, this analysis
|
|
399
|
-
must be able to prove that every access to memory is either in the scratch areas OR said access can be
|
|
400
|
-
attributed to a field of some object. Accesses to memory which cannot be proven to satisfy one of these two conditions
|
|
401
|
-
cause the entire analysis to fail. The analysis is partially path sensitive, and can understand that
|
|
402
|
-
`i < array.length` means that `i` is a valid index into `array`. The analysis uses these facts to prove
|
|
403
|
-
accesses are safe AND which object's fields are being accessed by each memory operation.
|
|
404
|
-
"""
|
|
405
|
-
|
|
406
|
-
rewriting_prompt = """
|
|
407
|
-
<context>
|
|
408
|
-
The following contract "harnesses" a problematic internal function which causes a pointer analysis on EVM bytecode to fail.
|
|
409
|
-
This may be due to non-standard operations on memory that occcurs in memory blocks, or due to imprecision in the
|
|
410
|
-
pointer analysis.
|
|
411
|
-
</context>
|
|
412
|
-
|
|
413
|
-
<task>
|
|
414
|
-
Rewrite the *internal* function so that it is semantically equivalent but is more amenable to static analysis.
|
|
415
|
-
Common problems include:
|
|
416
|
-
- Inline assembly with direct memory manipulation
|
|
417
|
-
- Unchecked array/memory access
|
|
418
|
-
- Pointer arithmetic that the analyzer cannot track
|
|
419
|
-
- Non-standard memory layout assumptions
|
|
420
|
-
Your rewrite should satisfy the following constraints:
|
|
421
|
-
- It must be semantically equivalent to the original function.
|
|
422
|
-
- Wherever possible, eschew the use of inline assembly in favor of more straightforward, standard Solidity
|
|
423
|
-
- You may ignore the gas implications of any code you write: code that is accepted by the pointer analysis is
|
|
424
|
-
preferable to gas efficient code. However, you should consider that the original code may by optimized for gas
|
|
425
|
-
consumption, which should inform your understanding of its intent
|
|
426
|
-
"Semantic equivalence" means the following:
|
|
427
|
-
- Functions produce the same return value
|
|
428
|
-
- The functions have exactly the same observable effects. These external effects are:
|
|
429
|
-
- Reverting (including the revert data)
|
|
430
|
-
- EVM level returns (that is, the return opcode)
|
|
431
|
-
- External calls
|
|
432
|
-
- Changes to storage
|
|
433
|
-
- emitted logs/events
|
|
434
|
-
|
|
435
|
-
In other words, if the original function reverts, the rewritten function must also revert with
|
|
436
|
-
the same buffer.
|
|
437
|
-
For the purposes of this rewrite, you can ignore the possibility of out-of-gas exceptions.
|
|
438
|
-
Similarly, the rewrite must emit the same log events (if any) and in the same order.
|
|
439
|
-
The rewrite must also make the same external calls, and make the same modifications to storage.
|
|
440
|
-
However, remember that if both functions revert, any other side effects (external calls, storage changes, etc.)
|
|
441
|
-
are mooted.
|
|
442
|
-
</task>
|
|
443
|
-
|
|
444
|
-
<algorithm>
|
|
445
|
-
<input>An "original harness" around a "problematic internal function"</input>
|
|
446
|
-
<output>The rewritten "better function"</output>
|
|
447
|
-
<steps>
|
|
448
|
-
1. Analyze the "problematic internal function" in the "original harness" to understand its behavior.
|
|
449
|
-
Pay close attention to its revert conditions and side effects
|
|
450
|
-
2. Generate a rewrite of the internal function called the "better function", which uses straight-forward
|
|
451
|
-
solidity while preserving equivalence to the "problematic internal function"
|
|
452
|
-
a. Keep track of and remember any extra definitions required for this "better function" rewrite.
|
|
453
|
-
3. Adapt the "original harness" into a "rewrite harness" by replacing the "problematic internal function" with the
|
|
454
|
-
"better function" generated in step 2 and changing the name of the "original harness" contract.
|
|
455
|
-
Incorporate any definitions generated by step 2.a
|
|
456
|
-
4. Check that the "rewrite harness" is type correct and syntax correct using the solidity compiler
|
|
457
|
-
5. Check that the "rewrite harness" and "original harness" are semantically equivalent using the equivalence checker.
|
|
458
|
-
6. Interpret the results of the equivalence checker:
|
|
459
|
-
a. If the result is 'Equivalent', then go to step 7
|
|
460
|
-
b. Otherwise, examine the explanation provided by the equivalence checker for why the two functions are not
|
|
461
|
-
equivalent. Incorporating this feedback, adjust the definition of "better function" within the
|
|
462
|
-
"rewrite harness", and go to step 5.
|
|
463
|
-
7. Output the definition of the "better function" along with any of the extra definitions that are necessary.
|
|
464
|
-
</steps>
|
|
465
|
-
</algorithm>
|
|
466
|
-
|
|
467
|
-
<guidance>
|
|
468
|
-
<important>
|
|
469
|
-
When invoking the equivalence checker, you *may not* change the external entry point of
|
|
470
|
-
either the "original harness" or the "rewrite harness"
|
|
471
|
-
</important>
|
|
472
|
-
<important>
|
|
473
|
-
You *MAY NOT* change the "original harness" in any way: you must pass it to the equivalence checker without
|
|
474
|
-
modification.
|
|
475
|
-
</important>
|
|
476
|
-
<important>
|
|
477
|
-
The task is complete only when the equivalence checker says the implementations are 'Equivalent'
|
|
478
|
-
</important>
|
|
479
|
-
<soft_requirement>4
|
|
480
|
-
You should *not* add additional error/interface declarations unless absolutely necessary
|
|
481
|
-
for your rewrite to compile.
|
|
482
|
-
</soft_requirement>
|
|
483
|
-
<soft_requirement>
|
|
484
|
-
Inline assembly should be absolutely avoided unless you have no other option to preserve semantic equivalence.
|
|
485
|
-
If you have no choice but to use inline assembly, the inline assembly should hew as closely as possible to
|
|
486
|
-
standard Solidity memory access patterns; array accesses should be "guarded" by length checks, and so on.
|
|
487
|
-
</soft_requirement>
|
|
488
|
-
<reminder>
|
|
489
|
-
When adapting the "original harness" to check equivalence, you **should** change the name of the harnessing
|
|
490
|
-
contract.
|
|
491
|
-
</reminder>
|
|
492
|
-
<tool_advice>
|
|
493
|
-
You **should** check that your rewrite harness is type and syntax correct using the solidity compiler.
|
|
494
|
-
</tool_advice>
|
|
495
|
-
<tool_advice>
|
|
496
|
-
You are an automated tool, and should only use the the human_in_the_loop tool as a last resort to get "unstuck".
|
|
497
|
-
Be sure to iterate on a particular issue a few times before asking the user for help.
|
|
498
|
-
</tool_advice>
|
|
499
|
-
</guidance>
|
|
500
|
-
"""
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
class EquivalenceCheckerSchema(BaseModel):
|
|
504
|
-
"""
|
|
505
|
-
A formal verification tool that is able to compare the behavior of two external methods in two different contracts
|
|
506
|
-
on all possible inputs, and judges whether they have the same side effects.
|
|
507
|
-
A side effect includes: changes to storage, external calls, logs, and returns/reverts.
|
|
508
|
-
|
|
509
|
-
If the equivalence checker thinks the external contracts exhibit different behaviors, it will respond with
|
|
510
|
-
a concrete example demonstrating the difference in behaviors. Otherwise it will respond with just 'Equivalent'.
|
|
511
|
-
|
|
512
|
-
IMPORTANT: The name of the two contracts containing the external methods *must* be different and the external
|
|
513
|
-
methods *must* have the same ABI signature.
|
|
514
|
-
"""
|
|
515
|
-
|
|
516
|
-
contract1: str = \
|
|
517
|
-
Field(description=
|
|
518
|
-
"Solidity source code of the first contract to compare for equivalence. This source code must be s"
|
|
519
|
-
"elf-contained, and must be compilable with a standard solidity compiler. It must be type correct and "
|
|
520
|
-
"syntactically correct."
|
|
521
|
-
)
|
|
522
|
-
|
|
523
|
-
contract1_name: str = \
|
|
524
|
-
Field(description=
|
|
525
|
-
"The name of the contract defined in the `contract1` param. For example, if `contract1` contains the "
|
|
526
|
-
"source `contract Foo { ... }` this parameter should be `Foo`"
|
|
527
|
-
)
|
|
528
|
-
|
|
529
|
-
contract2: str = \
|
|
530
|
-
Field(description=
|
|
531
|
-
"Solidity source code of the second contract to compare for equivalence. The source code must be "
|
|
532
|
-
"self-contained, and must be compilable with a standard solidity compiler. It must therefore be type "
|
|
533
|
-
"correct and syntactically correct."
|
|
534
|
-
)
|
|
535
|
-
|
|
536
|
-
contract2_name: str = \
|
|
537
|
-
Field(description=
|
|
538
|
-
"The name of the contract defined in the `contract2` param. MUST be different from the value of "
|
|
539
|
-
"`contract1-name`. For example, if `contract2` contains the source code "
|
|
540
|
-
"`contract Bar { ... }` this parameter should be `Bar`."
|
|
541
|
-
)
|
|
542
|
-
|
|
543
|
-
abi_signature: str = \
|
|
544
|
-
Field(description=
|
|
545
|
-
"The ABI signature (name and parameter types) of the external method to compare between "
|
|
546
|
-
"contract1 and contract2"
|
|
547
|
-
)
|
|
548
|
-
|
|
549
|
-
compiler_version: str = \
|
|
550
|
-
Field(description=
|
|
551
|
-
"The compiler version string to use for compiling contract1 and contract2. Compiler versions are taken "
|
|
552
|
-
"from the known compiler releases (e.g., 0.8.2), but with the leading '0.' dropped (e.g., 8.2)."
|
|
553
|
-
)
|
|
554
|
-
|
|
555
|
-
loop_bound: int = \
|
|
556
|
-
Field(description=
|
|
557
|
-
"When verifying equivalence of looping code, how many times to unroll the loop for bounded verification. "
|
|
558
|
-
"For performance reasons, this should be set as small as possible while still demonstrating non-trivial "
|
|
559
|
-
"behavior. While values above 3 are supported, performance gets exponentially worse above these values, "
|
|
560
|
-
"and they should be avoided if possible."
|
|
561
|
-
)
|
|
562
|
-
|
|
563
|
-
|
|
564
|
-
@tool(args_schema=EquivalenceCheckerSchema)
|
|
565
|
-
def equivalence_check(
|
|
566
|
-
contract1: str,
|
|
567
|
-
contract1_name: str,
|
|
568
|
-
contract2: str,
|
|
569
|
-
contract2_name: str,
|
|
570
|
-
abi_signature: str,
|
|
571
|
-
loop_bound: int,
|
|
572
|
-
compiler_version: str
|
|
573
|
-
) -> str:
|
|
574
|
-
print("Running the equivalence checker...")
|
|
575
|
-
|
|
576
|
-
# Create temporary files - result in current directory, trace anywhere
|
|
577
|
-
with tempfile.NamedTemporaryFile(mode='w', dir=".", suffix='.sol') as f1, \
|
|
578
|
-
tempfile.NamedTemporaryFile(mode='w', dir=".", suffix='.sol') as f2, \
|
|
579
|
-
tempfile.NamedTemporaryFile(mode='w') as trace, \
|
|
580
|
-
tempfile.NamedTemporaryFile(mode='w', dir='.', suffix=".json") as result:
|
|
581
|
-
|
|
582
|
-
# Write contract bodies to files
|
|
583
|
-
f1.write(contract1)
|
|
584
|
-
f1.flush()
|
|
585
|
-
|
|
586
|
-
f2.write(contract2)
|
|
587
|
-
f2.flush()
|
|
588
|
-
|
|
589
|
-
# Build the command
|
|
590
|
-
command = [
|
|
591
|
-
'certoraRun.py',
|
|
592
|
-
f'{f1.name}:{contract1_name}',
|
|
593
|
-
f'{f2.name}:{contract2_name}',
|
|
594
|
-
'--equivalence_contracts', f'{contract1_name}={contract2_name}',
|
|
595
|
-
'--method', abi_signature,
|
|
596
|
-
'--prover_args', f'-equivalenceCheck true -maxHeuristicFoldingDepth 5 -equivTraceFile {trace.name}',
|
|
597
|
-
'--tool_output', os.path.basename(result.name),
|
|
598
|
-
'--loop_iter', str(loop_bound),
|
|
599
|
-
"--optimistic_hashing",
|
|
600
|
-
"--optimistic_loop",
|
|
601
|
-
'--solc', 'solc8.29'
|
|
602
|
-
]
|
|
603
|
-
|
|
604
|
-
# Run the command without assuming success
|
|
605
|
-
result_process = subprocess.run(command,
|
|
606
|
-
capture_output=True,
|
|
607
|
-
text=True,
|
|
608
|
-
env={**os.environ, "DONT_USE_VERIFICATION_RESULTS_FOR_EXITCODE": "1"}
|
|
609
|
-
)
|
|
610
|
-
|
|
611
|
-
# If non-zero exit, just return
|
|
612
|
-
if result_process.returncode != 0:
|
|
613
|
-
return f"The equivalence checker failed with returncode {result_process.returncode}. " \
|
|
614
|
-
"It's possible something in your code wasn't handled. " \
|
|
615
|
-
"Try a few more times, and then ask for assistance"
|
|
616
|
-
|
|
617
|
-
# Load and parse result JSON
|
|
618
|
-
with open(result.name, 'r') as result_file:
|
|
619
|
-
result_data = json.load(result_file)
|
|
620
|
-
|
|
621
|
-
# Extract the rules dictionary
|
|
622
|
-
rules_dict = result_data['rules']
|
|
623
|
-
|
|
624
|
-
# Get the single key-value pair (since it's a singleton)
|
|
625
|
-
_, rule_value = next(iter(rules_dict.items()))
|
|
626
|
-
|
|
627
|
-
# Check if SUCCESS
|
|
628
|
-
if rule_value == "SUCCESS":
|
|
629
|
-
print("Equivalence check passed")
|
|
630
|
-
return "Equivalent"
|
|
631
|
-
else:
|
|
632
|
-
print("Divergent behavior found; returning for refinement")
|
|
633
|
-
# Read and return trace contents
|
|
634
|
-
with open(trace.name, 'r') as trace_file:
|
|
635
|
-
to_return = trace_file.read()
|
|
636
|
-
tool_logger.info("Trace was:\n%s", to_return)
|
|
637
|
-
return to_return
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
class ExtraDefinition(BaseModel):
|
|
641
|
-
definition: str = \
|
|
642
|
-
Field(description=
|
|
643
|
-
"A snippet of Solidity that defines some type/error/interface etc. that is needed for the rewrite to work"
|
|
644
|
-
)
|
|
645
|
-
|
|
646
|
-
where: str = \
|
|
647
|
-
Field(description=
|
|
648
|
-
"Human readable description of where this definition should be placed. If there is no strong "
|
|
649
|
-
"guidance/requirement for where the definition lives, 'Nearby' is an acceptable answer"
|
|
650
|
-
)
|
|
651
|
-
|
|
652
|
-
justification: str = \
|
|
653
|
-
Field(description=
|
|
654
|
-
"Explanation for why this additional definition is necessary."
|
|
655
|
-
)
|
|
656
|
-
|
|
657
|
-
|
|
658
|
-
class RewriteResultSchema(WithToolCallId):
|
|
659
|
-
"""
|
|
660
|
-
Used to communicate the successful rewrite to the client. Should only be invoked once the problematic rewritten function has been
|
|
661
|
-
successfully validated using the equivalence checker; that is, it has returned "Equivalent".
|
|
662
|
-
"""
|
|
663
|
-
rewrite: str = \
|
|
664
|
-
Field(description=
|
|
665
|
-
"The validated; rewritten function. Should consist only of the internal function definition; "
|
|
666
|
-
"the surrounding external harness should NOT be included."
|
|
667
|
-
)
|
|
668
|
-
|
|
669
|
-
extra_definitions: List[ExtraDefinition] = \
|
|
670
|
-
Field(description="Any extra definitions that are necessary for the rewrite.")
|
|
671
|
-
|
|
672
|
-
remarks: str = \
|
|
673
|
-
Field(description=
|
|
674
|
-
"Any explanation of the rewrite. In particular, be sure to justify the use of any inline assembly or "
|
|
675
|
-
"extra type definitions included"
|
|
676
|
-
)
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
@tool(args_schema=RewriteResultSchema)
|
|
680
|
-
def rewrite_output(rewrite: str, extra_definitions: List[ExtraDefinition], remarks: str,
|
|
681
|
-
tool_call_id: Annotated[str, InjectedToolCallId]) -> Command:
|
|
682
|
-
return tool_output(
|
|
683
|
-
tool_call_id=tool_call_id,
|
|
684
|
-
res={
|
|
685
|
-
"result": RewriteResultSchema(
|
|
686
|
-
tool_call_id=tool_call_id,
|
|
687
|
-
extra_definitions=extra_definitions,
|
|
688
|
-
remarks=remarks,
|
|
689
|
-
rewrite=rewrite
|
|
690
|
-
)
|
|
691
|
-
}
|
|
692
|
-
)
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
class HumanInTheLoopSchema(WithToolCallId):
|
|
696
|
-
"""
|
|
697
|
-
A tool that allows the LLM agent to request human assistance when encountering divergent behaviors
|
|
698
|
-
during the rewriting process. This tool should be used when the equivalence checker reports
|
|
699
|
-
differences between the original and rewritten functions that the agent cannot resolve automatically.
|
|
700
|
-
"""
|
|
701
|
-
question: str = Field(description="The specific question or problem the agent needs help with")
|
|
702
|
-
|
|
703
|
-
context: str = \
|
|
704
|
-
Field(description=
|
|
705
|
-
"Relevant context about the divergent behavior, including equivalence checker output, "
|
|
706
|
-
"and what has been tried before (and what didn't work)"
|
|
707
|
-
)
|
|
708
|
-
|
|
709
|
-
original_function: str = Field(description="The original problematic function being rewritten")
|
|
710
|
-
attempted_rewrite: str = Field(description="The current attempted rewrite that shows divergent behavior")
|
|
711
|
-
|
|
712
|
-
|
|
713
|
-
@tool(args_schema=HumanInTheLoopSchema)
|
|
714
|
-
def human_in_the_loop(
|
|
715
|
-
question: str,
|
|
716
|
-
context: str,
|
|
717
|
-
original_function: str,
|
|
718
|
-
attempted_rewrite: str,
|
|
719
|
-
tool_call_id: Annotated[str, InjectedToolCallId]
|
|
720
|
-
) -> Command[Literal["tool_result", "error"]]:
|
|
721
|
-
"""
|
|
722
|
-
Request human assistance to resolve divergent behaviors during rewriting.
|
|
723
|
-
"""
|
|
724
|
-
# Use LangGraph's interrupt mechanism to pause execution and request human input
|
|
725
|
-
human_guidance = interrupt({
|
|
726
|
-
"question": question,
|
|
727
|
-
"context": context,
|
|
728
|
-
"original_function": original_function,
|
|
729
|
-
"attempted_rewrite": attempted_rewrite
|
|
730
|
-
})
|
|
731
|
-
|
|
732
|
-
return tool_return(
|
|
733
|
-
tool_call_id=tool_call_id,
|
|
734
|
-
content=f"Human guidance: {human_guidance}"
|
|
735
|
-
)
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
class ToolError(TypedDict, total=False):
|
|
739
|
-
error_message: Required[str]
|
|
740
|
-
tool_stdout: str
|
|
741
|
-
tool_stderr: str
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
class RewriterState(TypedDict):
|
|
745
|
-
messages: Annotated[list[AnyMessage], add_messages]
|
|
746
|
-
code_input: str
|
|
747
|
-
error: Optional[ToolError]
|
|
748
|
-
result: Optional[RewriteResultSchema]
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
# Rewrite workflow setup
|
|
752
|
-
rewrite_tools = [
|
|
753
|
-
rewrite_output,
|
|
754
|
-
solidity_compiler,
|
|
755
|
-
equivalence_check,
|
|
756
|
-
human_in_the_loop
|
|
757
|
-
]
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
# ============================================================================
|
|
761
|
-
# APPLICATION ORCHESTRATION AND CLI INTERFACE
|
|
762
|
-
# ============================================================================
|
|
763
|
-
|
|
764
|
-
def setup_argument_parser() -> argparse.ArgumentParser:
|
|
765
|
-
"""Configure command line argument parser."""
|
|
766
|
-
parser = argparse.ArgumentParser(description="Certora Concordance Tool for Solidity Function Rewriting")
|
|
767
|
-
parser.add_argument("input_file", help="Input Solidity file containing the function to process")
|
|
768
|
-
parser.add_argument("--harness-model", default="claude-sonnet-4-20250514",
|
|
769
|
-
help="Model to use for harness generation (default: claude-sonnet-4-20250514)")
|
|
770
|
-
parser.add_argument("--rewrite-model", default="claude-opus-4-20250514",
|
|
771
|
-
help="Model to use for function rewriting (default: claude-opus-4-20250514)")
|
|
772
|
-
parser.add_argument("--harness-tokens", type=int, default=1024,
|
|
773
|
-
help="Token budget for harness generation (default: 1024)")
|
|
774
|
-
parser.add_argument("--rewrite-tokens", type=int, default=4096,
|
|
775
|
-
help="Token budget for function rewriting (default: 4096)")
|
|
776
|
-
parser.add_argument("--thinking-tokens", type=int, default=2048,
|
|
777
|
-
help="Token budget for thinking in rewriting (default: 2048)")
|
|
778
|
-
parser.add_argument("--debug", action="store_true",
|
|
779
|
-
help="Enable debug logging output")
|
|
780
|
-
return parser
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
def setup_logging(debug: bool) -> None:
|
|
784
|
-
"""Configure logging based on debug flag."""
|
|
785
|
-
if debug:
|
|
786
|
-
logger.setLevel(logging.DEBUG)
|
|
787
|
-
if not logger.handlers:
|
|
788
|
-
handler = logging.StreamHandler()
|
|
789
|
-
handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
|
|
790
|
-
logger.addHandler(handler)
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
def create_harness_llm(args: argparse.Namespace) -> BaseChatModel:
|
|
794
|
-
"""Create and configure the harness generation LLM."""
|
|
795
|
-
return ChatAnthropic(
|
|
796
|
-
model_name=args.harness_model,
|
|
797
|
-
max_tokens_to_sample=args.harness_tokens,
|
|
798
|
-
temperature=0,
|
|
799
|
-
timeout=None,
|
|
800
|
-
max_retries=2,
|
|
801
|
-
stop=None
|
|
802
|
-
)
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
def create_rewrite_llm(args: argparse.Namespace) -> BaseChatModel:
|
|
806
|
-
"""Create and configure the rewrite LLM."""
|
|
807
|
-
return ChatAnthropic(
|
|
808
|
-
model_name=args.rewrite_model,
|
|
809
|
-
max_tokens_to_sample=args.rewrite_tokens,
|
|
810
|
-
temperature=1,
|
|
811
|
-
timeout=None,
|
|
812
|
-
max_retries=2,
|
|
813
|
-
stop=None,
|
|
814
|
-
thinking={"type": "enabled", "budget_tokens": args.thinking_tokens}
|
|
815
|
-
)
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
def generate_harness(harness_llm: BaseChatModel, input_file: str) -> str:
|
|
819
|
-
"""Generate harness for the input function."""
|
|
820
|
-
runner = build_workflow(
|
|
821
|
-
state_class=HarnessingState,
|
|
822
|
-
tools_list=HARNESS_TOOLS,
|
|
823
|
-
sys_prompt=harness_system_prompt,
|
|
824
|
-
initial_prompt=harnessing_prompt,
|
|
825
|
-
output_key="harness_definition",
|
|
826
|
-
output_schema=HarnessedOutput,
|
|
827
|
-
unbound_llm=harness_llm
|
|
828
|
-
).compile()
|
|
829
|
-
|
|
830
|
-
# Read input file
|
|
831
|
-
with open(input_file, "r") as f:
|
|
832
|
-
f_def = f.read()
|
|
833
|
-
|
|
834
|
-
# Generate harness
|
|
835
|
-
return runner.invoke(
|
|
836
|
-
input=GraphInput(code_input=f_def),
|
|
837
|
-
)["harness_definition"]
|
|
838
|
-
|
|
839
|
-
def handle_human_interrupt(interrupt_data: dict) -> str:
|
|
840
|
-
"""Handle human-in-the-loop interrupts and get user input."""
|
|
841
|
-
print("\n" + "=" * 80)
|
|
842
|
-
print("HUMAN ASSISTANCE REQUESTED")
|
|
843
|
-
print("=" * 80)
|
|
844
|
-
print(f"Question: {interrupt_data.get('question', 'N/A')}")
|
|
845
|
-
print(f"Context: {interrupt_data.get('context', 'N/A')}")
|
|
846
|
-
print(f"Original Function:\n{interrupt_data.get('original_function', 'N/A')}")
|
|
847
|
-
print(f"Attempted Rewrite:\n{interrupt_data.get('attempted_rewrite', 'N/A')}")
|
|
848
|
-
print("-" * 80)
|
|
849
|
-
return input("Please provide guidance: ")
|
|
850
|
-
|
|
851
|
-
def display_rewrite_result(result: RewriteResultSchema) -> None:
|
|
852
|
-
"""Display the final rewrite results to the user."""
|
|
853
|
-
print("\n" + "=" * 80)
|
|
854
|
-
print("REWRITE COMPLETED")
|
|
855
|
-
print("=" * 80)
|
|
856
|
-
print(f"Rewritten Function:\n{result.rewrite}")
|
|
857
|
-
|
|
858
|
-
# Format extra definitions nicely
|
|
859
|
-
if result.extra_definitions:
|
|
860
|
-
print("\nExtra Definitions:")
|
|
861
|
-
for i, extra_def in enumerate(result.extra_definitions, 1):
|
|
862
|
-
print(f" {i}. {extra_def.definition}")
|
|
863
|
-
print(f" Where: {extra_def.where}")
|
|
864
|
-
print(f" Justification: {extra_def.justification}")
|
|
865
|
-
if i < len(result.extra_definitions): # Add spacing between definitions
|
|
866
|
-
print()
|
|
867
|
-
|
|
868
|
-
print(f"\nRemarks: {result.remarks}")
|
|
869
|
-
|
|
870
|
-
def execute_rewrite_workflow(rewrite_llm: BaseChatModel, harness: str) -> int:
|
|
871
|
-
"""Execute the rewrite workflow with interrupt handling."""
|
|
872
|
-
# Add checkpointer for interrupt functionality
|
|
873
|
-
checkpointer = MemorySaver()
|
|
874
|
-
rewriter_exec: CompiledStateGraph[RewriterState, None, GraphInput, Any] = build_workflow(
|
|
875
|
-
state_class=RewriterState,
|
|
876
|
-
tools_list=rewrite_tools,
|
|
877
|
-
sys_prompt=simplification_system_prompt,
|
|
878
|
-
initial_prompt=rewriting_prompt,
|
|
879
|
-
output_key="result",
|
|
880
|
-
unbound_llm=rewrite_llm
|
|
881
|
-
).compile(checkpointer=checkpointer)
|
|
882
|
-
|
|
883
|
-
# Execute rewrite workflow with interrupt handling
|
|
884
|
-
thread_id = "rewrite_session"
|
|
885
|
-
config: RunnableConfig = {"configurable": {"thread_id": thread_id}}
|
|
886
|
-
|
|
887
|
-
# Start with initial input
|
|
888
|
-
current_input: Union[None, Command, GraphInput] = GraphInput(code_input=harness)
|
|
889
|
-
|
|
890
|
-
while True:
|
|
891
|
-
assert current_input is not None
|
|
892
|
-
# Stream execution
|
|
893
|
-
interrupted = False
|
|
894
|
-
r = current_input
|
|
895
|
-
current_input = None
|
|
896
|
-
for event in rewriter_exec.stream(input=r, config=config):
|
|
897
|
-
logger.debug("Stream event: %s", event)
|
|
898
|
-
|
|
899
|
-
# Check if we hit an interrupt
|
|
900
|
-
if "__interrupt__" in event:
|
|
901
|
-
interrupt_data = event["__interrupt__"][0].value
|
|
902
|
-
human_response = handle_human_interrupt(interrupt_data)
|
|
903
|
-
|
|
904
|
-
# Set up for resumption
|
|
905
|
-
current_input = Command(resume=human_response)
|
|
906
|
-
interrupted = True
|
|
907
|
-
break
|
|
908
|
-
|
|
909
|
-
# If we were interrupted, continue the loop to resume
|
|
910
|
-
if interrupted:
|
|
911
|
-
continue
|
|
912
|
-
|
|
913
|
-
state = rewriter_exec.get_state(config)
|
|
914
|
-
result = state.values.get("result", None)
|
|
915
|
-
if result is None or not isinstance(result, RewriteResultSchema):
|
|
916
|
-
return 1
|
|
917
|
-
|
|
918
|
-
display_rewrite_result(result)
|
|
919
|
-
return 0 # Success
|
|
920
|
-
|
|
921
|
-
def main() -> int:
|
|
922
|
-
"""Main entry point for the concordance tool."""
|
|
923
|
-
parser = setup_argument_parser()
|
|
924
|
-
args = parser.parse_args()
|
|
925
|
-
|
|
926
|
-
setup_logging(args.debug)
|
|
927
|
-
|
|
928
|
-
# Create configured LLMs
|
|
929
|
-
harness_llm = create_harness_llm(args)
|
|
930
|
-
rewrite_llm = create_rewrite_llm(args)
|
|
931
|
-
|
|
932
|
-
# Generate harness
|
|
933
|
-
harness = generate_harness(harness_llm, args.input_file)
|
|
934
|
-
|
|
935
|
-
# Execute rewrite workflow
|
|
936
|
-
return execute_rewrite_workflow(rewrite_llm, harness)
|
|
937
|
-
|
|
938
|
-
if __name__ == "__main__":
|
|
939
|
-
sys.exit(main())
|
|
File without changes
|
|
File without changes
|
{certora_cli_beta_mirror-8.2.0.dist-info → certora_cli_beta_mirror-8.2.1.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{certora_cli_beta_mirror-8.2.0.dist-info → certora_cli_beta_mirror-8.2.1.dist-info}/top_level.txt
RENAMED
|
File without changes
|