certora-cli-beta-mirror 8.2.0__py3-none-macosx_10_9_universal2.whl → 8.2.1__py3-none-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -69,10 +69,18 @@ class MutateValidator:
69
69
  except Exception as e:
70
70
  raise Util.CertoraUserInputError(f"Invalid file_to_mutate in manual mutant: {mutant[Constants.FILE_TO_MUTATE]}", e)
71
71
 
72
- try:
73
- Vf.validate_dir(mutant[Constants.MUTANTS_LOCATION])
74
- except Exception as e:
75
- raise Util.CertoraUserInputError(f"Invalid mutants location {mutant[Constants.MUTANTS_LOCATION]}", e)
72
+ mutants_location = mutant[Constants.MUTANTS_LOCATION]
73
+ if Path(mutants_location).is_dir():
74
+ try:
75
+ Vf.validate_dir(mutants_location)
76
+ except Exception as e:
77
+ raise Util.CertoraUserInputError(f"Invalid directory for mutants location {mutants_location}",
78
+ e)
79
+ else:
80
+ try:
81
+ Vf.validate_readable_file(mutants_location, Util.SOL_EXT)
82
+ except Exception as e:
83
+ raise Util.CertoraUserInputError(f"Invalid file for mutants location {mutants_location}", e)
76
84
 
77
85
  def mutation_attribute_in_prover(self) -> None:
78
86
  gambit_attrs = ['filename', 'contract', 'functions', 'seed', 'num_mutants']
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: certora-cli-beta-mirror
3
- Version: 8.2.0
3
+ Version: 8.2.1
4
4
  Summary: Runner for the Certora Prover
5
5
  Home-page: https://pypi.org/project/certora-cli-beta-mirror
6
6
  Author: Certora
@@ -39,4 +39,4 @@ Dynamic: requires-dist
39
39
  Dynamic: requires-python
40
40
  Dynamic: summary
41
41
 
42
- Commit 03cec31. Build and Run scripts for executing the Certora Prover on Solidity smart contracts.
42
+ Commit a27f12a. Build and Run scripts for executing the Certora Prover on Solidity smart contracts.
@@ -37,7 +37,6 @@ certora_cli/CertoraProver/certoraProjectScanner.py,sha256=jT7FeWzcy8o83LrZRwsg_L
37
37
  certora_cli/CertoraProver/certoraSourceFinders.py,sha256=qwJtwrQq3NUNYmdmn1UmANN4lmJFIUh4M-St2x1FJ2Y,19038
38
38
  certora_cli/CertoraProver/certoraType.py,sha256=inwaLkMVwtJnwkyQhDJs-wRxoyytu2Xa_BJ5MdGlZqY,29737
39
39
  certora_cli/CertoraProver/certoraVerifyGenerator.py,sha256=YMuzGj2RNOnADOx8UnV2ys1ptw_-2mermgC9ZLMWceo,11052
40
- certora_cli/CertoraProver/concordance.py,sha256=CCDN7hT9rnlU-ZKJ7hVxHR2fS8ZXwKVdE59jtMTtY0M,38666
41
40
  certora_cli/CertoraProver/erc7201.py,sha256=BME5kBZsDx6lgqLn7EE91I1cEOZtsnZ8BlRVF62eEBE,1660
42
41
  certora_cli/CertoraProver/splitRules.py,sha256=dNhy05ShB_-rWYTnJH5m-Xc5A4HGStAvwLRs1BTu1GA,7627
43
42
  certora_cli/CertoraProver/storageExtension.py,sha256=nrCrbH8ne-yCYSDFzh3J9A7Q6h96WxhEfLbfxGSUCSc,14363
@@ -61,7 +60,7 @@ certora_cli/Mutate/mutateApp.py,sha256=UY4TWn9f318b2upVAFUL0cqcgzlyTDZ5XeNW3Mjof
61
60
  certora_cli/Mutate/mutateAttributes.py,sha256=2onGaPmztwmHg5V_X7BUG4HcQCThhqYzGYKBy695Izc,10587
62
61
  certora_cli/Mutate/mutateConstants.py,sha256=LRrz3wMM8WpPYSshkc-PLYqT0nexcWQeBNsehip-LOE,3945
63
62
  certora_cli/Mutate/mutateUtil.py,sha256=B7MCIFtZBetjR4MMxU6F5ikYsaot1wTG7XYMjgVXl4k,2287
64
- certora_cli/Mutate/mutateValidate.py,sha256=vL3YxVkHIGoxolAcvPhK2xitUV0frQ4jTdTN270yeQ4,9298
63
+ certora_cli/Mutate/mutateValidate.py,sha256=6DRYfnE-HCMvMxbb0v1dx2BTSd-x8YeFRVp35KplVPc,9760
65
64
  certora_cli/Shared/ExpectedComparator.py,sha256=eyRR-jni4WJoa6j2TK2lnZ89Tyb8U99wT2PNdu4se8w,18457
66
65
  certora_cli/Shared/__init__.py,sha256=s0dhvolFtsS4sRNzPVhC_rlw8mm194rCZ0WhOxInY40,1025
67
66
  certora_cli/Shared/certoraAttrUtil.py,sha256=Nw8ban5Axp6c6dT-KJfCD9i9tKnGk1DbvRDDNH3--DU,8574
@@ -69,13 +68,13 @@ certora_cli/Shared/certoraLogging.py,sha256=cV2UQMhQ5j8crGXgeq9CEamI-Lk4HgdiA3HC
69
68
  certora_cli/Shared/certoraUtils.py,sha256=buUE95PnbpDEbYEZss_jcP7pWCRJhXnfV-eOA6-zvYM,58407
70
69
  certora_cli/Shared/certoraValidateFuncs.py,sha256=BPLuVsS3yAcYIuCvkXtDuFQKf2qaT74TIddB0lM84yM,42508
71
70
  certora_cli/Shared/proverCommon.py,sha256=uZkl9PDLPj81kKRnBnlPUmvhMZovNP25_74No_7jaQ4,11215
72
- certora_jars/ASTExtraction.jar,sha256=yGLGQYChi03jYAh5xcEdCDtJNrKJUaUvnKCvqPNEIoI,17614869
73
- certora_jars/CERTORA-CLI-VERSION-METADATA.json,sha256=7erl5giUb6YUO0NSTkPTRLlt1Ji4iJDxlLrnsCrPlzo,144
74
- certora_jars/Typechecker.jar,sha256=-EbNEH1eheFLgb1vVSlG5o1g1bAmK2Q6iBKFkKMaND0,17577026
71
+ certora_jars/ASTExtraction.jar,sha256=tRIhYz0x7agtytWLH0ta8zZMR6maGEzM13fOMrRBMMw,17614868
72
+ certora_jars/CERTORA-CLI-VERSION-METADATA.json,sha256=RHCGld9OX7ZGd559dhshflPnahIJShAHjF8GAaAzl1I,143
73
+ certora_jars/Typechecker.jar,sha256=u4KLY6doS3MNtQSPhF6BGgim7RViZ5jfXjSBMC7XVAs,17577025
75
74
  certora_jars/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
76
- certora_cli_beta_mirror-8.2.0.dist-info/LICENSE,sha256=UGKSKIJSetF8m906JLKqNLkUS2CL60XfQdNvxBvpQXo,620
77
- certora_cli_beta_mirror-8.2.0.dist-info/METADATA,sha256=bU4nWKle__qGpvqqT8bsiiLCehI_qEtlpBFNi-2tcX4,1286
78
- certora_cli_beta_mirror-8.2.0.dist-info/WHEEL,sha256=9Ig2YBzm5cpS_YWKLeuYxVAxcKv_uDQsCzy9XJbRZ_g,110
79
- certora_cli_beta_mirror-8.2.0.dist-info/entry_points.txt,sha256=ClZiFkCYDdK25_ufxZvnE2Rx_kNk1_4vj7KpgYUKxGM,509
80
- certora_cli_beta_mirror-8.2.0.dist-info/top_level.txt,sha256=8C77w3JLanY0-NW45vpJsjRssyCqVP-qmPiN9FjWiX4,38
81
- certora_cli_beta_mirror-8.2.0.dist-info/RECORD,,
75
+ certora_cli_beta_mirror-8.2.1.dist-info/LICENSE,sha256=UGKSKIJSetF8m906JLKqNLkUS2CL60XfQdNvxBvpQXo,620
76
+ certora_cli_beta_mirror-8.2.1.dist-info/METADATA,sha256=sqpw-Q34_jE5LItMjWt-BRiLsYxn7-eKSj9Wgv_2lBs,1286
77
+ certora_cli_beta_mirror-8.2.1.dist-info/WHEEL,sha256=9Ig2YBzm5cpS_YWKLeuYxVAxcKv_uDQsCzy9XJbRZ_g,110
78
+ certora_cli_beta_mirror-8.2.1.dist-info/entry_points.txt,sha256=ClZiFkCYDdK25_ufxZvnE2Rx_kNk1_4vj7KpgYUKxGM,509
79
+ certora_cli_beta_mirror-8.2.1.dist-info/top_level.txt,sha256=8C77w3JLanY0-NW45vpJsjRssyCqVP-qmPiN9FjWiX4,38
80
+ certora_cli_beta_mirror-8.2.1.dist-info/RECORD,,
Binary file
@@ -1 +1 @@
1
- {"name": "certora-cli-beta-mirror", "tag": "8.2.0", "branch": "", "commit": "03cec31", "timestamp": "20250821.16.28.619968", "version": "8.2.0"}
1
+ {"name": "certora-cli-beta-mirror", "tag": "8.2.1", "branch": "", "commit": "a27f12a", "timestamp": "20250828.9.26.267405", "version": "8.2.1"}
Binary file
@@ -1,939 +0,0 @@
1
- # The Certora Prover
2
- # Copyright (C) 2025 Certora Ltd.
3
- #
4
- # This program is free software: you can redistribute it and/or modify
5
- # it under the terms of the GNU General Public License as published by
6
- # the Free Software Foundation, version 3 of the License.
7
- #
8
- # This program is distributed in the hope that it will be useful,
9
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
10
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11
- # GNU General Public License for more details.
12
- #
13
- # You should have received a copy of the GNU General Public License
14
- # along with this program. If not, see <https://www.gnu.org/licenses/>.
15
-
16
- # ============================================================================
17
- # IMPORTS
18
- # ============================================================================
19
-
20
- from langchain_anthropic import ChatAnthropic
21
- from typing import Optional, List, TypedDict, Annotated, Literal, Required, TypeVar, Type, Protocol, Union, Any
22
- from langchain_core.messages import ToolMessage, AnyMessage, SystemMessage, HumanMessage, BaseMessage
23
- from langchain_core.tools import tool, InjectedToolCallId, BaseTool
24
- from langchain_core.language_models.base import LanguageModelInput
25
- from langchain_core.language_models.chat_models import BaseChatModel
26
- from langchain_core.runnables import Runnable, RunnableConfig
27
- from langgraph.graph import StateGraph, START, MessagesState
28
- from langgraph.graph.state import CompiledStateGraph
29
- from langgraph._internal._typing import StateLike
30
- from langgraph.graph.message import add_messages
31
- from langgraph.types import Command, interrupt
32
- from langgraph.prebuilt import ToolNode
33
- from langgraph.checkpoint.memory import MemorySaver
34
- from pydantic import BaseModel, Field
35
- import os
36
- import tempfile
37
- import json
38
- import subprocess
39
- import sys
40
- import logging
41
- import argparse
42
-
43
- # ============================================================================
44
- # LOGGING SETUP
45
- # ============================================================================
46
-
47
- logger = logging.getLogger("concordance")
48
- response_logger = logger.getChild("response")
49
- tool_logger = logger.getChild("tools")
50
-
51
-
52
- # ============================================================================
53
- # SHARED UTILITIES
54
- # ============================================================================
55
-
56
-
57
- class GraphInput(TypedDict):
58
- code_input: str
59
-
60
-
61
- class WithToolCallId(BaseModel):
62
- tool_call_id: Annotated[str, InjectedToolCallId]
63
-
64
-
65
- def tool_return(
66
- tool_call_id: str,
67
- content: str
68
- ) -> Command:
69
- """
70
- Create a LangGraph Command for tool responses that need to continue processing.
71
-
72
- Used by tools that want to return a result and continue the workflow by routing
73
- back to the tool_result node for LLM processing.
74
-
75
- Args:
76
- tool_call_id: The ID of the tool call being responded to
77
- content: The response content from the tool execution
78
-
79
- Returns:
80
- Command that updates messages and continues workflow
81
- """
82
- return Command(
83
- update={
84
- "messages": [ToolMessage(tool_call_id=tool_call_id, content=content)]
85
- }
86
- )
87
-
88
-
89
- def tool_output(tool_call_id: str, res: dict) -> Command:
90
- """
91
- Create a LangGraph Command for final tool outputs that update workflow state.
92
-
93
- Used by completion tools (like harness_output, rewrite_output) to set final
94
- results in the workflow state. The workflow's conditional edge will detect
95
- these state updates and route to completion.
96
-
97
- Args:
98
- tool_call_id: The ID of the tool call being responded to
99
- res: Dictionary containing the final workflow results to merge into state
100
-
101
- Returns:
102
- Command that updates state with final results and a success message
103
- """
104
- return Command(update={
105
- **res,
106
- "messages": [ToolMessage(
107
- tool_call_id=tool_call_id,
108
- content="Success"
109
- )]
110
- })
111
-
112
- def pretty_print_messages(messages: list[AnyMessage]) -> str:
113
- """Format a list of AnyMessage objects for readable debug output."""
114
- formatted_lines = []
115
- for i, msg in enumerate(messages):
116
- msg_type = type(msg).__name__
117
-
118
- # Get message role if available
119
- role = getattr(msg, 'role', 'unknown')
120
-
121
- role = getattr(msg, 'type', 'unknown')
122
- # Get content preview (handle both string and list content)
123
- if hasattr(msg, 'content') and msg.content:
124
- if isinstance(msg.content, list):
125
- # For list content, show count and first item preview
126
- content_preview = \
127
- f"[{len(msg.content)} items: {str(msg.content[0])[:50] if msg.content else 'empty'}...]"
128
- else:
129
- content_preview = str(msg.content)[:100]
130
- if len(str(msg.content)) > 100:
131
- content_preview += "..."
132
- else:
133
- content_preview = "<empty>"
134
-
135
- # Format tool calls if present
136
- tool_info = ""
137
- if tool_calls := getattr(msg, 'tool_calls', None):
138
- tool_names = [tc.get('name', 'unknown') for tc in tool_calls]
139
- tool_info = f" | Tools: {', '.join(tool_names)}"
140
-
141
- formatted_lines.append(f" [{i}] {msg_type} (role: {role}): {content_preview}{tool_info}")
142
-
143
- return "\n" + "\n".join(formatted_lines) if formatted_lines else " <no messages>"
144
-
145
-
146
- class InitNodeFunction(Protocol):
147
- """Protocol defining the signature for LangGraph node functions."""
148
- def __call__(self, state: GraphInput) -> dict[str, List[BaseMessage]]:
149
- ...
150
-
151
-
152
- class ChatNodeFunction(Protocol):
153
- def __call__(self, state: MessagesState) -> dict[str, List[BaseMessage]]:
154
- ...
155
-
156
-
157
- def tool_result_generator(llm: Runnable[LanguageModelInput, BaseMessage]) -> ChatNodeFunction:
158
- """
159
- Create a LangGraph node function that processes tool results by sending
160
- the current message history to the LLM for the next response.
161
-
162
- Args:
163
- llm: The LLM bound with tools to invoke for generating responses
164
-
165
- Returns:
166
- A node function that takes MessagesState and returns updated messages
167
- """
168
- def tool_result(state: MessagesState) -> dict[str, List[BaseMessage]]:
169
- logger.debug("Tool result state messages:%s", pretty_print_messages(state["messages"]))
170
- return {"messages": [llm.invoke(state["messages"])]}
171
- return tool_result
172
-
173
- def initial_node(sys_prompt: str, initial_prompt: str, llm: Runnable[LanguageModelInput, BaseMessage]) -> InitNodeFunction:
174
- """
175
- Create a LangGraph node function that initializes a workflow with system and human messages,
176
- then gets the first LLM response.
177
-
178
- Args:
179
- sys_prompt: System message content to set the LLM's role and context
180
- initial_prompt: Human message template to start the conversation
181
- llm: The LLM bound with tools to invoke for generating the initial response
182
-
183
- Returns:
184
- A node function that takes GraphInput and returns initial message history
185
- """
186
- def to_return(state: GraphInput) -> dict[str, List[BaseMessage]]:
187
- initial_messages : List[BaseMessage] = [
188
- SystemMessage(
189
- sys_prompt
190
- ),
191
- HumanMessage(
192
- content=[initial_prompt, state["code_input"]]
193
- )
194
- ]
195
- initial_messages.append(
196
- llm.invoke(initial_messages)
197
- )
198
- return {"messages": initial_messages}
199
- return to_return
200
-
201
-
202
- # TypeVars for generic typing
203
- StateT = TypeVar('StateT', bound=StateLike)
204
- OutputT = TypeVar('OutputT', bound=StateLike)
205
-
206
-
207
- def build_workflow(
208
- state_class: Type[StateT],
209
- tools_list: List[BaseTool],
210
- sys_prompt: str,
211
- initial_prompt: str,
212
- output_key: str,
213
- unbound_llm: BaseChatModel,
214
- output_schema: Optional[Type[OutputT]] = None,
215
- ) -> StateGraph[StateT, None, GraphInput, OutputT]:
216
- """
217
- Build a standard workflow with initial node -> tools -> tool_result pattern.
218
- Uses fixed GraphInput schema and explicit LLM currying.
219
- """
220
- # Node name constants
221
- INITIAL_NODE = "initial"
222
- TOOLS_NODE = "tools"
223
- TOOL_RESULT_NODE = "tool_result"
224
-
225
- def should_end(state: StateT) -> Literal["__end__", "tool_result"]:
226
- """Check if workflow should end based on output key being defined."""
227
- assert isinstance(state, dict)
228
- if state.get(output_key, None) is not None:
229
- return "__end__"
230
- return TOOL_RESULT_NODE
231
-
232
- llm = unbound_llm.bind_tools(tools_list)
233
-
234
- # Create initial node and tool node with curried LLM
235
- init_node = initial_node(sys_prompt=sys_prompt, initial_prompt=initial_prompt, llm=llm)
236
- tool_node = ToolNode(tools_list)
237
- tool_result_node = tool_result_generator(llm)
238
-
239
- # Build the graph with fixed input schema, no context
240
- builder = StateGraph(
241
- state_class,
242
- input_schema=GraphInput,
243
- output_schema=output_schema
244
- )
245
- builder.add_node(INITIAL_NODE, init_node)
246
- builder.add_edge(START, INITIAL_NODE)
247
- builder.add_node(TOOLS_NODE, tool_node)
248
- builder.add_edge(INITIAL_NODE, TOOLS_NODE)
249
- builder.add_node(TOOL_RESULT_NODE, tool_result_node)
250
- builder.add_edge(TOOL_RESULT_NODE, TOOLS_NODE)
251
-
252
- # Add conditional edge from tools
253
- builder.add_conditional_edges(
254
- TOOLS_NODE,
255
- should_end
256
- )
257
-
258
- return builder
259
-
260
-
261
- # ============================================================================
262
- # SOLIDITY COMPILER TOOL (SHARED)
263
- # ============================================================================
264
-
265
-
266
- class SolidityCompilerInput(BaseModel):
267
- """
268
- A Solidity compiler capable of compiling a single, Solidity file into EVM bytecode. The compiler
269
- also performs typechecking and will flag any syntax errors. The compiler comes from the official
270
- distribution channels for Solidity and understands all the Solidity language and features.
271
- """
272
- compiler_version: str = \
273
- Field(description=
274
- "The compiler version string to use for compilation. Compiler versions are taken from the known compiler "
275
- "releases (e.g., 0.8.2), but with the leading '0.' dropped (e.g., 8.2)."
276
- )
277
-
278
- source: str = Field(description="The Solidity source to be compiled")
279
-
280
-
281
- @tool(args_schema=SolidityCompilerInput)
282
- def solidity_compiler(source: str, compiler_version: str) -> str:
283
- compiler_input = {
284
- "language": "Solidity",
285
- "sources": {
286
- "harness.sol": {
287
- "content": source
288
- }
289
- },
290
- "settings": {
291
- "outputSelection": {
292
- "*": {
293
- "*": []
294
- }
295
- }
296
- }
297
- }
298
- compile_result = subprocess.run(
299
- [f'solc{compiler_version}', "--standard-json"],
300
- input=json.dumps(compiler_input),
301
- text=True,
302
- encoding="utf-8",
303
- capture_output=True
304
- )
305
- res = f"Return code was: {compile_result.returncode}\nStdout:\n{compile_result.stdout}"
306
- return res
307
-
308
-
309
- # ============================================================================
310
- # HARNESSING WORKFLOW
311
- # ============================================================================
312
-
313
-
314
- harness_system_prompt = """
315
- You are an expert Solidity developer, with several years of experience writing smart contracts. You also
316
- have a deep understanding of the EVM and how the Solidity language is ultimately compiled to the EVM bytecode.
317
- This lets you understand why certain programs written in Solidity language are invalid and rejected by the
318
- compiler. For example, you know that an `external` function in a `contract` cannot accept a reference type
319
- marked as `storage`: you know that this is only allowed in a `library` which is always accessed with a delegatecall.
320
-
321
- You also understand the subtleties around ABI encoding and decoding, and the translation of high-level types to
322
- an ABI signature. For example, you know that a struct with two uint fields is represented in an ABI signature as
323
- `(uint256,uint256)`.
324
- """
325
-
326
- harnessing_prompt = """
327
- Create an external 'harness contract' which provides a minimal way to execute the given 'internal' Solidity function
328
- via an external function wrapper.
329
- The external function wrapper should simply pass its arguments to the internal function, and return the result back
330
- to the caller. The internal function being harnessed
331
- should be included in the contract. You MAY include extra type definitions, but only if absolutely necessary for the
332
- code to compile; definitions solely for documentation or explanation
333
- purposes should NOT be included."
334
- The external harness should be type correct and syntax correct. To ensure this, use the Solidity compiler and
335
- incorporate its feedback to fix any type/syntax errors.
336
- """
337
-
338
-
339
- class HarnessedOutput(TypedDict):
340
- harness_definition: Optional[str]
341
-
342
-
343
- class HarnessingState(TypedDict):
344
- GraphInput: str
345
- harness_definition: Optional[str]
346
- messages: Annotated[list[AnyMessage], add_messages]
347
-
348
-
349
- class HarnessOutputSchema(WithToolCallId):
350
- """
351
- Used to communicate the results of harness generation, which is the minimal contract to exercise an internal
352
- function, along with the ABI signature of the method which is the external entry point and the name of the contract.
353
- Used only for successfully validated (type correct, syntax correct) harnesses.
354
- """
355
- source_code: str = \
356
- Field(description=
357
- "The self-contained Solidity source code which wraps the provided internal function"
358
- )
359
-
360
- contract_name: str = \
361
- Field(description=
362
- "The name of the Solidity contract containing the external method that wraps the internal function"
363
- )
364
-
365
- abi_signature: str = \
366
- Field(description=
367
- "The ABI signature of the external function generated as the internal function wrapper. "
368
- "Includes parameter types (but not return types)"
369
- )
370
-
371
-
372
- @tool(args_schema=HarnessOutputSchema)
373
- def harness_output(source_code: str, tool_call_id: Annotated[str, InjectedToolCallId], contract_name: str, abi_signature: str) -> Command:
374
- return tool_output(tool_call_id=tool_call_id, res={"harness_definition": source_code})
375
-
376
-
377
- # Harness workflow setup
378
- HARNESS_TOOLS = [harness_output, solidity_compiler]
379
-
380
- # ============================================================================
381
- # REWRITE WORKFLOW
382
- # ============================================================================
383
-
384
- simplification_system_prompt = """
385
- You are an expert Solidity developer with several years of experience writing smart contracts and
386
- optimizing them for gas costs. You know all about low-level assembly and how to use it to directly
387
- access EVM internal representations to optimize execution. This means you are also familiar with how
388
- the EVM works on a deep, fundamental level. You are also intimately familiar with how Solidity
389
- lays out memory, and its allocation pattern. Among other facts, you know that it uses a bump allocator
390
- with a free pointer whose value resides in memory at slot 0x40. You also know that memory in the range 0x0 - 0x40
391
- is considered "scratch" and is freely usable, as is all memory after the current value of the free pointer. You
392
- know that arrays are allocated to include an extra word at the beginning of the allocated block to hold the length
393
- of the memory, followed by elements of the array. `bytes` and `string` arrays pack their elements tightly (1 byte
394
- per element), whereas all other arrays use 32 bytes per element.
395
-
396
- You also hold a PhD in static analysis, and are an expert in the field of points to analyses and memory
397
- safety analyses. You help maintain a static analysis which attempts to recover the pointer relationships
398
- between stack values and memory locations in compiled EVM bytecode. For soundness, this analysis
399
- must be able to prove that every access to memory is either in the scratch areas OR said access can be
400
- attributed to a field of some object. Accesses to memory which cannot be proven to satisfy one of these two conditions
401
- cause the entire analysis to fail. The analysis is partially path sensitive, and can understand that
402
- `i < array.length` means that `i` is a valid index into `array`. The analysis uses these facts to prove
403
- accesses are safe AND which object's fields are being accessed by each memory operation.
404
- """
405
-
406
- rewriting_prompt = """
407
- <context>
408
- The following contract "harnesses" a problematic internal function which causes a pointer analysis on EVM bytecode to fail.
409
- This may be due to non-standard operations on memory that occcurs in memory blocks, or due to imprecision in the
410
- pointer analysis.
411
- </context>
412
-
413
- <task>
414
- Rewrite the *internal* function so that it is semantically equivalent but is more amenable to static analysis.
415
- Common problems include:
416
- - Inline assembly with direct memory manipulation
417
- - Unchecked array/memory access
418
- - Pointer arithmetic that the analyzer cannot track
419
- - Non-standard memory layout assumptions
420
- Your rewrite should satisfy the following constraints:
421
- - It must be semantically equivalent to the original function.
422
- - Wherever possible, eschew the use of inline assembly in favor of more straightforward, standard Solidity
423
- - You may ignore the gas implications of any code you write: code that is accepted by the pointer analysis is
424
- preferable to gas efficient code. However, you should consider that the original code may by optimized for gas
425
- consumption, which should inform your understanding of its intent
426
- "Semantic equivalence" means the following:
427
- - Functions produce the same return value
428
- - The functions have exactly the same observable effects. These external effects are:
429
- - Reverting (including the revert data)
430
- - EVM level returns (that is, the return opcode)
431
- - External calls
432
- - Changes to storage
433
- - emitted logs/events
434
-
435
- In other words, if the original function reverts, the rewritten function must also revert with
436
- the same buffer.
437
- For the purposes of this rewrite, you can ignore the possibility of out-of-gas exceptions.
438
- Similarly, the rewrite must emit the same log events (if any) and in the same order.
439
- The rewrite must also make the same external calls, and make the same modifications to storage.
440
- However, remember that if both functions revert, any other side effects (external calls, storage changes, etc.)
441
- are mooted.
442
- </task>
443
-
444
- <algorithm>
445
- <input>An "original harness" around a "problematic internal function"</input>
446
- <output>The rewritten "better function"</output>
447
- <steps>
448
- 1. Analyze the "problematic internal function" in the "original harness" to understand its behavior.
449
- Pay close attention to its revert conditions and side effects
450
- 2. Generate a rewrite of the internal function called the "better function", which uses straight-forward
451
- solidity while preserving equivalence to the "problematic internal function"
452
- a. Keep track of and remember any extra definitions required for this "better function" rewrite.
453
- 3. Adapt the "original harness" into a "rewrite harness" by replacing the "problematic internal function" with the
454
- "better function" generated in step 2 and changing the name of the "original harness" contract.
455
- Incorporate any definitions generated by step 2.a
456
- 4. Check that the "rewrite harness" is type correct and syntax correct using the solidity compiler
457
- 5. Check that the "rewrite harness" and "original harness" are semantically equivalent using the equivalence checker.
458
- 6. Interpret the results of the equivalence checker:
459
- a. If the result is 'Equivalent', then go to step 7
460
- b. Otherwise, examine the explanation provided by the equivalence checker for why the two functions are not
461
- equivalent. Incorporating this feedback, adjust the definition of "better function" within the
462
- "rewrite harness", and go to step 5.
463
- 7. Output the definition of the "better function" along with any of the extra definitions that are necessary.
464
- </steps>
465
- </algorithm>
466
-
467
- <guidance>
468
- <important>
469
- When invoking the equivalence checker, you *may not* change the external entry point of
470
- either the "original harness" or the "rewrite harness"
471
- </important>
472
- <important>
473
- You *MAY NOT* change the "original harness" in any way: you must pass it to the equivalence checker without
474
- modification.
475
- </important>
476
- <important>
477
- The task is complete only when the equivalence checker says the implementations are 'Equivalent'
478
- </important>
479
- <soft_requirement>4
480
- You should *not* add additional error/interface declarations unless absolutely necessary
481
- for your rewrite to compile.
482
- </soft_requirement>
483
- <soft_requirement>
484
- Inline assembly should be absolutely avoided unless you have no other option to preserve semantic equivalence.
485
- If you have no choice but to use inline assembly, the inline assembly should hew as closely as possible to
486
- standard Solidity memory access patterns; array accesses should be "guarded" by length checks, and so on.
487
- </soft_requirement>
488
- <reminder>
489
- When adapting the "original harness" to check equivalence, you **should** change the name of the harnessing
490
- contract.
491
- </reminder>
492
- <tool_advice>
493
- You **should** check that your rewrite harness is type and syntax correct using the solidity compiler.
494
- </tool_advice>
495
- <tool_advice>
496
- You are an automated tool, and should only use the the human_in_the_loop tool as a last resort to get "unstuck".
497
- Be sure to iterate on a particular issue a few times before asking the user for help.
498
- </tool_advice>
499
- </guidance>
500
- """
501
-
502
-
503
- class EquivalenceCheckerSchema(BaseModel):
504
- """
505
- A formal verification tool that is able to compare the behavior of two external methods in two different contracts
506
- on all possible inputs, and judges whether they have the same side effects.
507
- A side effect includes: changes to storage, external calls, logs, and returns/reverts.
508
-
509
- If the equivalence checker thinks the external contracts exhibit different behaviors, it will respond with
510
- a concrete example demonstrating the difference in behaviors. Otherwise it will respond with just 'Equivalent'.
511
-
512
- IMPORTANT: The name of the two contracts containing the external methods *must* be different and the external
513
- methods *must* have the same ABI signature.
514
- """
515
-
516
- contract1: str = \
517
- Field(description=
518
- "Solidity source code of the first contract to compare for equivalence. This source code must be s"
519
- "elf-contained, and must be compilable with a standard solidity compiler. It must be type correct and "
520
- "syntactically correct."
521
- )
522
-
523
- contract1_name: str = \
524
- Field(description=
525
- "The name of the contract defined in the `contract1` param. For example, if `contract1` contains the "
526
- "source `contract Foo { ... }` this parameter should be `Foo`"
527
- )
528
-
529
- contract2: str = \
530
- Field(description=
531
- "Solidity source code of the second contract to compare for equivalence. The source code must be "
532
- "self-contained, and must be compilable with a standard solidity compiler. It must therefore be type "
533
- "correct and syntactically correct."
534
- )
535
-
536
- contract2_name: str = \
537
- Field(description=
538
- "The name of the contract defined in the `contract2` param. MUST be different from the value of "
539
- "`contract1-name`. For example, if `contract2` contains the source code "
540
- "`contract Bar { ... }` this parameter should be `Bar`."
541
- )
542
-
543
- abi_signature: str = \
544
- Field(description=
545
- "The ABI signature (name and parameter types) of the external method to compare between "
546
- "contract1 and contract2"
547
- )
548
-
549
- compiler_version: str = \
550
- Field(description=
551
- "The compiler version string to use for compiling contract1 and contract2. Compiler versions are taken "
552
- "from the known compiler releases (e.g., 0.8.2), but with the leading '0.' dropped (e.g., 8.2)."
553
- )
554
-
555
- loop_bound: int = \
556
- Field(description=
557
- "When verifying equivalence of looping code, how many times to unroll the loop for bounded verification. "
558
- "For performance reasons, this should be set as small as possible while still demonstrating non-trivial "
559
- "behavior. While values above 3 are supported, performance gets exponentially worse above these values, "
560
- "and they should be avoided if possible."
561
- )
562
-
563
-
564
- @tool(args_schema=EquivalenceCheckerSchema)
565
- def equivalence_check(
566
- contract1: str,
567
- contract1_name: str,
568
- contract2: str,
569
- contract2_name: str,
570
- abi_signature: str,
571
- loop_bound: int,
572
- compiler_version: str
573
- ) -> str:
574
- print("Running the equivalence checker...")
575
-
576
- # Create temporary files - result in current directory, trace anywhere
577
- with tempfile.NamedTemporaryFile(mode='w', dir=".", suffix='.sol') as f1, \
578
- tempfile.NamedTemporaryFile(mode='w', dir=".", suffix='.sol') as f2, \
579
- tempfile.NamedTemporaryFile(mode='w') as trace, \
580
- tempfile.NamedTemporaryFile(mode='w', dir='.', suffix=".json") as result:
581
-
582
- # Write contract bodies to files
583
- f1.write(contract1)
584
- f1.flush()
585
-
586
- f2.write(contract2)
587
- f2.flush()
588
-
589
- # Build the command
590
- command = [
591
- 'certoraRun.py',
592
- f'{f1.name}:{contract1_name}',
593
- f'{f2.name}:{contract2_name}',
594
- '--equivalence_contracts', f'{contract1_name}={contract2_name}',
595
- '--method', abi_signature,
596
- '--prover_args', f'-equivalenceCheck true -maxHeuristicFoldingDepth 5 -equivTraceFile {trace.name}',
597
- '--tool_output', os.path.basename(result.name),
598
- '--loop_iter', str(loop_bound),
599
- "--optimistic_hashing",
600
- "--optimistic_loop",
601
- '--solc', 'solc8.29'
602
- ]
603
-
604
- # Run the command without assuming success
605
- result_process = subprocess.run(command,
606
- capture_output=True,
607
- text=True,
608
- env={**os.environ, "DONT_USE_VERIFICATION_RESULTS_FOR_EXITCODE": "1"}
609
- )
610
-
611
- # If non-zero exit, just return
612
- if result_process.returncode != 0:
613
- return f"The equivalence checker failed with returncode {result_process.returncode}. " \
614
- "It's possible something in your code wasn't handled. " \
615
- "Try a few more times, and then ask for assistance"
616
-
617
- # Load and parse result JSON
618
- with open(result.name, 'r') as result_file:
619
- result_data = json.load(result_file)
620
-
621
- # Extract the rules dictionary
622
- rules_dict = result_data['rules']
623
-
624
- # Get the single key-value pair (since it's a singleton)
625
- _, rule_value = next(iter(rules_dict.items()))
626
-
627
- # Check if SUCCESS
628
- if rule_value == "SUCCESS":
629
- print("Equivalence check passed")
630
- return "Equivalent"
631
- else:
632
- print("Divergent behavior found; returning for refinement")
633
- # Read and return trace contents
634
- with open(trace.name, 'r') as trace_file:
635
- to_return = trace_file.read()
636
- tool_logger.info("Trace was:\n%s", to_return)
637
- return to_return
638
-
639
-
640
- class ExtraDefinition(BaseModel):
641
- definition: str = \
642
- Field(description=
643
- "A snippet of Solidity that defines some type/error/interface etc. that is needed for the rewrite to work"
644
- )
645
-
646
- where: str = \
647
- Field(description=
648
- "Human readable description of where this definition should be placed. If there is no strong "
649
- "guidance/requirement for where the definition lives, 'Nearby' is an acceptable answer"
650
- )
651
-
652
- justification: str = \
653
- Field(description=
654
- "Explanation for why this additional definition is necessary."
655
- )
656
-
657
-
658
- class RewriteResultSchema(WithToolCallId):
659
- """
660
- Used to communicate the successful rewrite to the client. Should only be invoked once the problematic rewritten function has been
661
- successfully validated using the equivalence checker; that is, it has returned "Equivalent".
662
- """
663
- rewrite: str = \
664
- Field(description=
665
- "The validated; rewritten function. Should consist only of the internal function definition; "
666
- "the surrounding external harness should NOT be included."
667
- )
668
-
669
- extra_definitions: List[ExtraDefinition] = \
670
- Field(description="Any extra definitions that are necessary for the rewrite.")
671
-
672
- remarks: str = \
673
- Field(description=
674
- "Any explanation of the rewrite. In particular, be sure to justify the use of any inline assembly or "
675
- "extra type definitions included"
676
- )
677
-
678
-
679
- @tool(args_schema=RewriteResultSchema)
680
- def rewrite_output(rewrite: str, extra_definitions: List[ExtraDefinition], remarks: str,
681
- tool_call_id: Annotated[str, InjectedToolCallId]) -> Command:
682
- return tool_output(
683
- tool_call_id=tool_call_id,
684
- res={
685
- "result": RewriteResultSchema(
686
- tool_call_id=tool_call_id,
687
- extra_definitions=extra_definitions,
688
- remarks=remarks,
689
- rewrite=rewrite
690
- )
691
- }
692
- )
693
-
694
-
695
- class HumanInTheLoopSchema(WithToolCallId):
696
- """
697
- A tool that allows the LLM agent to request human assistance when encountering divergent behaviors
698
- during the rewriting process. This tool should be used when the equivalence checker reports
699
- differences between the original and rewritten functions that the agent cannot resolve automatically.
700
- """
701
- question: str = Field(description="The specific question or problem the agent needs help with")
702
-
703
- context: str = \
704
- Field(description=
705
- "Relevant context about the divergent behavior, including equivalence checker output, "
706
- "and what has been tried before (and what didn't work)"
707
- )
708
-
709
- original_function: str = Field(description="The original problematic function being rewritten")
710
- attempted_rewrite: str = Field(description="The current attempted rewrite that shows divergent behavior")
711
-
712
-
713
- @tool(args_schema=HumanInTheLoopSchema)
714
- def human_in_the_loop(
715
- question: str,
716
- context: str,
717
- original_function: str,
718
- attempted_rewrite: str,
719
- tool_call_id: Annotated[str, InjectedToolCallId]
720
- ) -> Command[Literal["tool_result", "error"]]:
721
- """
722
- Request human assistance to resolve divergent behaviors during rewriting.
723
- """
724
- # Use LangGraph's interrupt mechanism to pause execution and request human input
725
- human_guidance = interrupt({
726
- "question": question,
727
- "context": context,
728
- "original_function": original_function,
729
- "attempted_rewrite": attempted_rewrite
730
- })
731
-
732
- return tool_return(
733
- tool_call_id=tool_call_id,
734
- content=f"Human guidance: {human_guidance}"
735
- )
736
-
737
-
738
- class ToolError(TypedDict, total=False):
739
- error_message: Required[str]
740
- tool_stdout: str
741
- tool_stderr: str
742
-
743
-
744
- class RewriterState(TypedDict):
745
- messages: Annotated[list[AnyMessage], add_messages]
746
- code_input: str
747
- error: Optional[ToolError]
748
- result: Optional[RewriteResultSchema]
749
-
750
-
751
- # Rewrite workflow setup
752
- rewrite_tools = [
753
- rewrite_output,
754
- solidity_compiler,
755
- equivalence_check,
756
- human_in_the_loop
757
- ]
758
-
759
-
760
- # ============================================================================
761
- # APPLICATION ORCHESTRATION AND CLI INTERFACE
762
- # ============================================================================
763
-
764
- def setup_argument_parser() -> argparse.ArgumentParser:
765
- """Configure command line argument parser."""
766
- parser = argparse.ArgumentParser(description="Certora Concordance Tool for Solidity Function Rewriting")
767
- parser.add_argument("input_file", help="Input Solidity file containing the function to process")
768
- parser.add_argument("--harness-model", default="claude-sonnet-4-20250514",
769
- help="Model to use for harness generation (default: claude-sonnet-4-20250514)")
770
- parser.add_argument("--rewrite-model", default="claude-opus-4-20250514",
771
- help="Model to use for function rewriting (default: claude-opus-4-20250514)")
772
- parser.add_argument("--harness-tokens", type=int, default=1024,
773
- help="Token budget for harness generation (default: 1024)")
774
- parser.add_argument("--rewrite-tokens", type=int, default=4096,
775
- help="Token budget for function rewriting (default: 4096)")
776
- parser.add_argument("--thinking-tokens", type=int, default=2048,
777
- help="Token budget for thinking in rewriting (default: 2048)")
778
- parser.add_argument("--debug", action="store_true",
779
- help="Enable debug logging output")
780
- return parser
781
-
782
-
783
- def setup_logging(debug: bool) -> None:
784
- """Configure logging based on debug flag."""
785
- if debug:
786
- logger.setLevel(logging.DEBUG)
787
- if not logger.handlers:
788
- handler = logging.StreamHandler()
789
- handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s'))
790
- logger.addHandler(handler)
791
-
792
-
793
- def create_harness_llm(args: argparse.Namespace) -> BaseChatModel:
794
- """Create and configure the harness generation LLM."""
795
- return ChatAnthropic(
796
- model_name=args.harness_model,
797
- max_tokens_to_sample=args.harness_tokens,
798
- temperature=0,
799
- timeout=None,
800
- max_retries=2,
801
- stop=None
802
- )
803
-
804
-
805
- def create_rewrite_llm(args: argparse.Namespace) -> BaseChatModel:
806
- """Create and configure the rewrite LLM."""
807
- return ChatAnthropic(
808
- model_name=args.rewrite_model,
809
- max_tokens_to_sample=args.rewrite_tokens,
810
- temperature=1,
811
- timeout=None,
812
- max_retries=2,
813
- stop=None,
814
- thinking={"type": "enabled", "budget_tokens": args.thinking_tokens}
815
- )
816
-
817
-
818
- def generate_harness(harness_llm: BaseChatModel, input_file: str) -> str:
819
- """Generate harness for the input function."""
820
- runner = build_workflow(
821
- state_class=HarnessingState,
822
- tools_list=HARNESS_TOOLS,
823
- sys_prompt=harness_system_prompt,
824
- initial_prompt=harnessing_prompt,
825
- output_key="harness_definition",
826
- output_schema=HarnessedOutput,
827
- unbound_llm=harness_llm
828
- ).compile()
829
-
830
- # Read input file
831
- with open(input_file, "r") as f:
832
- f_def = f.read()
833
-
834
- # Generate harness
835
- return runner.invoke(
836
- input=GraphInput(code_input=f_def),
837
- )["harness_definition"]
838
-
839
- def handle_human_interrupt(interrupt_data: dict) -> str:
840
- """Handle human-in-the-loop interrupts and get user input."""
841
- print("\n" + "=" * 80)
842
- print("HUMAN ASSISTANCE REQUESTED")
843
- print("=" * 80)
844
- print(f"Question: {interrupt_data.get('question', 'N/A')}")
845
- print(f"Context: {interrupt_data.get('context', 'N/A')}")
846
- print(f"Original Function:\n{interrupt_data.get('original_function', 'N/A')}")
847
- print(f"Attempted Rewrite:\n{interrupt_data.get('attempted_rewrite', 'N/A')}")
848
- print("-" * 80)
849
- return input("Please provide guidance: ")
850
-
851
- def display_rewrite_result(result: RewriteResultSchema) -> None:
852
- """Display the final rewrite results to the user."""
853
- print("\n" + "=" * 80)
854
- print("REWRITE COMPLETED")
855
- print("=" * 80)
856
- print(f"Rewritten Function:\n{result.rewrite}")
857
-
858
- # Format extra definitions nicely
859
- if result.extra_definitions:
860
- print("\nExtra Definitions:")
861
- for i, extra_def in enumerate(result.extra_definitions, 1):
862
- print(f" {i}. {extra_def.definition}")
863
- print(f" Where: {extra_def.where}")
864
- print(f" Justification: {extra_def.justification}")
865
- if i < len(result.extra_definitions): # Add spacing between definitions
866
- print()
867
-
868
- print(f"\nRemarks: {result.remarks}")
869
-
870
- def execute_rewrite_workflow(rewrite_llm: BaseChatModel, harness: str) -> int:
871
- """Execute the rewrite workflow with interrupt handling."""
872
- # Add checkpointer for interrupt functionality
873
- checkpointer = MemorySaver()
874
- rewriter_exec: CompiledStateGraph[RewriterState, None, GraphInput, Any] = build_workflow(
875
- state_class=RewriterState,
876
- tools_list=rewrite_tools,
877
- sys_prompt=simplification_system_prompt,
878
- initial_prompt=rewriting_prompt,
879
- output_key="result",
880
- unbound_llm=rewrite_llm
881
- ).compile(checkpointer=checkpointer)
882
-
883
- # Execute rewrite workflow with interrupt handling
884
- thread_id = "rewrite_session"
885
- config: RunnableConfig = {"configurable": {"thread_id": thread_id}}
886
-
887
- # Start with initial input
888
- current_input: Union[None, Command, GraphInput] = GraphInput(code_input=harness)
889
-
890
- while True:
891
- assert current_input is not None
892
- # Stream execution
893
- interrupted = False
894
- r = current_input
895
- current_input = None
896
- for event in rewriter_exec.stream(input=r, config=config):
897
- logger.debug("Stream event: %s", event)
898
-
899
- # Check if we hit an interrupt
900
- if "__interrupt__" in event:
901
- interrupt_data = event["__interrupt__"][0].value
902
- human_response = handle_human_interrupt(interrupt_data)
903
-
904
- # Set up for resumption
905
- current_input = Command(resume=human_response)
906
- interrupted = True
907
- break
908
-
909
- # If we were interrupted, continue the loop to resume
910
- if interrupted:
911
- continue
912
-
913
- state = rewriter_exec.get_state(config)
914
- result = state.values.get("result", None)
915
- if result is None or not isinstance(result, RewriteResultSchema):
916
- return 1
917
-
918
- display_rewrite_result(result)
919
- return 0 # Success
920
-
921
- def main() -> int:
922
- """Main entry point for the concordance tool."""
923
- parser = setup_argument_parser()
924
- args = parser.parse_args()
925
-
926
- setup_logging(args.debug)
927
-
928
- # Create configured LLMs
929
- harness_llm = create_harness_llm(args)
930
- rewrite_llm = create_rewrite_llm(args)
931
-
932
- # Generate harness
933
- harness = generate_harness(harness_llm, args.input_file)
934
-
935
- # Execute rewrite workflow
936
- return execute_rewrite_workflow(rewrite_llm, harness)
937
-
938
- if __name__ == "__main__":
939
- sys.exit(main())