morphsdk 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. morphsdk/__init__.py +54 -0
  2. morphsdk/_agent/__init__.py +64 -0
  3. morphsdk/_agent/config.py +52 -0
  4. morphsdk/_agent/explore.py +276 -0
  5. morphsdk/_agent/github.py +57 -0
  6. morphsdk/_agent/helpers.py +133 -0
  7. morphsdk/_agent/parser.py +163 -0
  8. morphsdk/_agent/runner.py +524 -0
  9. morphsdk/_agent/tools.py +171 -0
  10. morphsdk/_agent/types.py +126 -0
  11. morphsdk/_base.py +309 -0
  12. morphsdk/_client.py +245 -0
  13. morphsdk/_config.py +37 -0
  14. morphsdk/_constants.py +53 -0
  15. morphsdk/_errors.py +111 -0
  16. morphsdk/_providers/__init__.py +36 -0
  17. morphsdk/_providers/_filter.py +92 -0
  18. morphsdk/_providers/base.py +94 -0
  19. morphsdk/_providers/code_storage_http.py +104 -0
  20. morphsdk/_providers/local.py +270 -0
  21. morphsdk/_providers/remote.py +161 -0
  22. morphsdk/_version.py +1 -0
  23. morphsdk/adapters/__init__.py +1 -0
  24. morphsdk/adapters/anthropic.py +360 -0
  25. morphsdk/adapters/langchain.py +120 -0
  26. morphsdk/adapters/openai.py +500 -0
  27. morphsdk/py.typed +0 -0
  28. morphsdk/resources/__init__.py +0 -0
  29. morphsdk/resources/browser.py +919 -0
  30. morphsdk/resources/compact.py +133 -0
  31. morphsdk/resources/edit.py +506 -0
  32. morphsdk/resources/explore.py +333 -0
  33. morphsdk/resources/git.py +861 -0
  34. morphsdk/resources/github.py +1214 -0
  35. morphsdk/resources/grep.py +583 -0
  36. morphsdk/resources/mobile.py +134 -0
  37. morphsdk/resources/reflex.py +414 -0
  38. morphsdk/resources/router.py +124 -0
  39. morphsdk/resources/search.py +110 -0
  40. morphsdk/tracing/__init__.py +70 -0
  41. morphsdk/tracing/_otel.py +101 -0
  42. morphsdk/tracing/core.py +249 -0
  43. morphsdk/tracing/interaction.py +284 -0
  44. morphsdk/tracing/otel.py +75 -0
  45. morphsdk/tracing/reflex.py +58 -0
  46. morphsdk/tracing/types.py +163 -0
  47. morphsdk/types/__init__.py +140 -0
  48. morphsdk/types/browser.py +118 -0
  49. morphsdk/types/compact.py +41 -0
  50. morphsdk/types/edit.py +31 -0
  51. morphsdk/types/explore.py +42 -0
  52. morphsdk/types/git.py +25 -0
  53. morphsdk/types/github.py +111 -0
  54. morphsdk/types/grep.py +41 -0
  55. morphsdk/types/mobile.py +25 -0
  56. morphsdk/types/reflex.py +137 -0
  57. morphsdk/types/router.py +21 -0
  58. morphsdk/types/search.py +33 -0
  59. morphsdk-0.2.5.dist-info/METADATA +226 -0
  60. morphsdk-0.2.5.dist-info/RECORD +61 -0
  61. morphsdk-0.2.5.dist-info/WHEEL +4 -0
morphsdk/__init__.py ADDED
@@ -0,0 +1,54 @@
1
+ """Morph SDK -- AI-powered code editing, search, browser automation, and more.
2
+
3
+ Usage::
4
+
5
+ from morphsdk import Morph
6
+
7
+ morph = Morph(api_key="sk-...")
8
+
9
+ # Edit files
10
+ result = morph.edit.file(path="app.py", instruction="Fix bug", code_edit="...")
11
+
12
+ # Search code
13
+ result = morph.search.code(query="authentication", repo_id="my-project")
14
+
15
+ # Browser automation
16
+ result = morph.browser.run(task="Test login", url="https://app.example.com")
17
+
18
+ # Context compression
19
+ result = morph.compact(input="Long text to compress...")
20
+
21
+ # Model routing
22
+ result = morph.router.select_model(input="Explain quicksort")
23
+ """
24
+
25
+ from ._client import AsyncMorph, Morph
26
+ from ._errors import (
27
+ APIConnectionError,
28
+ APITimeoutError,
29
+ AuthenticationError,
30
+ InternalError,
31
+ MorphError,
32
+ NotFoundError,
33
+ PermissionDeniedError,
34
+ RateLimitError,
35
+ ValidationError,
36
+ )
37
+ from ._version import __version__
38
+
39
+ __all__ = [
40
+ "__version__",
41
+ # Clients
42
+ "Morph",
43
+ "AsyncMorph",
44
+ # Errors
45
+ "MorphError",
46
+ "AuthenticationError",
47
+ "PermissionDeniedError",
48
+ "NotFoundError",
49
+ "RateLimitError",
50
+ "ValidationError",
51
+ "APIConnectionError",
52
+ "APITimeoutError",
53
+ "InternalError",
54
+ ]
@@ -0,0 +1,64 @@
1
+ """Agent orchestration internals (WarpGrep multi-turn loop, Explore subagent).
2
+
3
+ The WarpGrep agent core is **async** -- it awaits the async providers and a
4
+ single ``httpx`` chat-completions call per turn. ``run_warp_grep`` /
5
+ ``run_warp_grep_streaming`` are the public entry points consumed by the sync
6
+ ``GrepResource`` (via :func:`asyncio.run` / a thread bridge) and, in a later
7
+ wave, directly by the async client.
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from .explore import (
13
+ DEFAULT_MAX_TURNS as EXPLORE_DEFAULT_MAX_TURNS,
14
+ )
15
+ from .explore import (
16
+ DEFAULT_THOROUGHNESS,
17
+ ExploreContext,
18
+ ExploreMessageEvent,
19
+ ExploreRunResult,
20
+ ExploreStepEvent,
21
+ ExploreThoroughness,
22
+ run_explore,
23
+ run_explore_streaming,
24
+ )
25
+ from .runner import (
26
+ TOOL_SPECS,
27
+ call_model,
28
+ run_warp_grep,
29
+ run_warp_grep_streaming,
30
+ )
31
+ from .types import (
32
+ AgentFinish,
33
+ AgentRunResult,
34
+ ChatMessage,
35
+ FinishFileSpec,
36
+ ResolvedContext,
37
+ ToolCallRef,
38
+ WarpGrepExecutionMetrics,
39
+ WarpGrepStep,
40
+ )
41
+
42
+ __all__ = [
43
+ "run_warp_grep",
44
+ "run_warp_grep_streaming",
45
+ "call_model",
46
+ "TOOL_SPECS",
47
+ "run_explore",
48
+ "run_explore_streaming",
49
+ "ExploreRunResult",
50
+ "ExploreStepEvent",
51
+ "ExploreMessageEvent",
52
+ "ExploreContext",
53
+ "ExploreThoroughness",
54
+ "EXPLORE_DEFAULT_MAX_TURNS",
55
+ "DEFAULT_THOROUGHNESS",
56
+ "AgentRunResult",
57
+ "AgentFinish",
58
+ "ChatMessage",
59
+ "FinishFileSpec",
60
+ "ResolvedContext",
61
+ "ToolCallRef",
62
+ "WarpGrepStep",
63
+ "WarpGrepExecutionMetrics",
64
+ ]
@@ -0,0 +1,52 @@
1
+ """Agent loop configuration constants.
2
+
3
+ Cross-checked against the TypeScript ``AGENT_CONFIG`` (``agent/config.ts``).
4
+ Every value here matches ``_constants.py`` **except the timeout**: the TS default
5
+ is 60_000 ms, while ``_constants.WARP_GREP_TIMEOUT`` is 30.0 s. The TS value is
6
+ authoritative for the agent loop, so we override it module-locally here (the
7
+ shared ``_constants`` module is owned elsewhere and intentionally left untouched).
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import os
13
+
14
+ from morphsdk._constants import (
15
+ WARP_GREP_MAX_CONTEXT_CHARS,
16
+ WARP_GREP_MAX_LIST_DEPTH,
17
+ WARP_GREP_MAX_LIST_RESULTS,
18
+ WARP_GREP_MAX_OUTPUT_LINES,
19
+ WARP_GREP_MAX_READ_LINES,
20
+ WARP_GREP_MAX_TURNS,
21
+ WARP_GREP_MODEL,
22
+ )
23
+
24
+ DEFAULT_MODEL = WARP_GREP_MODEL
25
+ MAX_TURNS = WARP_GREP_MAX_TURNS
26
+ MAX_CONTEXT_CHARS = WARP_GREP_MAX_CONTEXT_CHARS
27
+ MAX_OUTPUT_LINES = WARP_GREP_MAX_OUTPUT_LINES
28
+ MAX_LIST_RESULTS = WARP_GREP_MAX_LIST_RESULTS
29
+ MAX_READ_LINES = WARP_GREP_MAX_READ_LINES
30
+ MAX_LIST_DEPTH = WARP_GREP_MAX_LIST_DEPTH
31
+
32
+ # TS AGENT_CONFIG.TIMEOUT_MS default is 60_000 ms (overridable via env). We expose
33
+ # it in **seconds** to match the rest of the Python SDK's timeout convention.
34
+ _DEFAULT_TIMEOUT_S = 60.0
35
+
36
+
37
+ def _resolve_timeout_s() -> float:
38
+ """Mirror TS ``parseEnvTimeout(MORPH_WARP_GREP_TIMEOUT, 60_000)`` (ms env -> s)."""
39
+ raw = os.environ.get("MORPH_WARP_GREP_TIMEOUT")
40
+ if not raw:
41
+ return _DEFAULT_TIMEOUT_S
42
+ try:
43
+ ms = int(raw)
44
+ except ValueError:
45
+ return _DEFAULT_TIMEOUT_S
46
+ return ms / 1000.0 if ms > 0 else _DEFAULT_TIMEOUT_S
47
+
48
+
49
+ DEFAULT_TIMEOUT_S = _resolve_timeout_s()
50
+
51
+ # Default code-search host for GitHub repo resolution + code-storage commands.
52
+ DEFAULT_CODE_SEARCH_URL = "https://morphllm.com"
@@ -0,0 +1,276 @@
1
+ """Explore subagent — async core.
2
+
3
+ A higher-level codebase-exploration orchestration built directly on the WarpGrep
4
+ agent. The WarpGrep agent runs on Morph's specialized ``morph-warp-grep-v2.1``
5
+ model and already performs a multi-turn search loop (grep / read / list / glob /
6
+ finish), so Explore drives *that* loop rather than a separate reasoning model:
7
+
8
+ 1. Explore runs the WarpGrep agent for the user's query against the chosen
9
+ provider, using the WarpGrep model.
10
+ 2. ``thoroughness`` scales how deep the WarpGrep loop is allowed to go by setting
11
+ its per-run turn cap (quick=3, medium=6, thorough=12).
12
+ 3. Each WarpGrep turn surfaces as an :class:`ExploreStepEvent` while streaming;
13
+ the resolved file contexts and the WarpGrep finish summary become the
14
+ :class:`ExploreRunResult`.
15
+
16
+ Model note
17
+ ----------
18
+ Explore intentionally uses the WarpGrep model end-to-end. The TS SDK runs the
19
+ explore reasoning loop on a caller-supplied Anthropic/Vercel model and only calls
20
+ WarpGrep for each search; the Python SDK has no such caller model, and the
21
+ WarpGrep model is the right Morph-hosted engine for codebase search, so Explore
22
+ delegates to the WarpGrep agent itself.
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import time
28
+ from collections.abc import AsyncIterator
29
+ from dataclasses import dataclass, field
30
+ from typing import Any, Literal
31
+
32
+ from morphsdk._providers.base import WarpGrepProvider
33
+
34
+ from .runner import run_warp_grep_streaming
35
+ from .types import AgentRunResult, WarpGrepStep
36
+
37
+ # --- thoroughness ------------------------------------------------------------
38
+
39
+ ExploreThoroughness = Literal["quick", "medium", "thorough"]
40
+
41
+ #: Default thoroughness.
42
+ DEFAULT_THOROUGHNESS: ExploreThoroughness = "medium"
43
+
44
+ #: Per-run WarpGrep turn cap by thoroughness. Deeper thoroughness lets the
45
+ #: WarpGrep loop take more search/read turns before it must finish.
46
+ DEFAULT_MAX_TURNS: dict[ExploreThoroughness, int] = {
47
+ "quick": 3,
48
+ "medium": 6,
49
+ "thorough": 12,
50
+ }
51
+
52
+ #: Tool calls that count as a "search" for :attr:`ExploreRunResult.search_count`.
53
+ _SEARCH_TOOLS = frozenset(("grep_search", "glob", "list_directory"))
54
+
55
+
56
+ # --- result / event shapes (mirror subagents/types.ts) -----------------------
57
+
58
+
59
+ @dataclass
60
+ class ExploreContext:
61
+ """A resolved code context: file path, content, optional line ranges.
62
+
63
+ Mirrors ``WarpGrepContext`` so Explore inherits the exact context shape the
64
+ TS ``ExploreResult.contexts`` carries.
65
+ """
66
+
67
+ file: str
68
+ content: str
69
+ lines: Any | None = None
70
+
71
+
72
+ @dataclass
73
+ class ExploreRunResult:
74
+ """Port of TS ``ExploreResult``."""
75
+
76
+ success: bool
77
+ summary: str
78
+ contexts: list[ExploreContext]
79
+ search_count: int
80
+ duration_ms: float
81
+ error: str | None = None
82
+
83
+
84
+ @dataclass
85
+ class ExploreStepEvent:
86
+ """Port of TS ``ExploreStep`` / the ``step`` streaming event."""
87
+
88
+ step: int
89
+ search_request: str
90
+ contexts_found: int
91
+ is_final: bool
92
+
93
+
94
+ @dataclass
95
+ class ExploreMessageEvent:
96
+ """Port of the TS ``message`` streaming event.
97
+
98
+ Retained for API/type compatibility. Explore-on-WarpGrep has no host
99
+ round-trips, so the core never emits these; consumers may still type against
100
+ them.
101
+ """
102
+
103
+ sender: Literal["explore", "host"]
104
+ content: str
105
+ timestamp: float
106
+
107
+
108
+ def _max_turns(thoroughness: ExploreThoroughness, max_turns: int | None) -> int:
109
+ return max_turns if max_turns is not None else DEFAULT_MAX_TURNS.get(thoroughness, 6)
110
+
111
+
112
+ def _deduplicate_contexts(contexts: list[ExploreContext]) -> list[ExploreContext]:
113
+ """Port of ``deduplicateContexts``: keep, per file, the longest-content version."""
114
+ by_file: dict[str, ExploreContext] = {}
115
+ for ctx in contexts:
116
+ existing = by_file.get(ctx.file)
117
+ if existing is None or len(ctx.content) > len(existing.content):
118
+ by_file[ctx.file] = ctx
119
+ return list(by_file.values())
120
+
121
+
122
+ def _step_label(step: WarpGrepStep) -> str:
123
+ """A concise human label for a WarpGrep turn, derived from its tool calls,
124
+ used as :attr:`ExploreStepEvent.search_request`."""
125
+ labels: list[str] = []
126
+ for call in step.tool_calls:
127
+ name = str(call.get("name") or "")
128
+ args = call.get("arguments") or {}
129
+ detail = ""
130
+ if isinstance(args, dict):
131
+ detail = str(
132
+ args.get("pattern")
133
+ or args.get("query")
134
+ or args.get("path")
135
+ or args.get("glob")
136
+ or ""
137
+ )
138
+ labels.append(f"{name}: {detail}".strip().rstrip(":").strip() if detail else name)
139
+ return "; ".join(label for label in labels if label)
140
+
141
+
142
+ def _count_searches(step: WarpGrepStep) -> int:
143
+ return sum(1 for call in step.tool_calls if str(call.get("name") or "") in _SEARCH_TOOLS)
144
+
145
+
146
+ def _contexts_from_result(result: AgentRunResult) -> list[ExploreContext]:
147
+ if result.termination_reason != "completed" or not result.resolved:
148
+ return []
149
+ return [
150
+ ExploreContext(file=r.path, content=r.content, lines=r.ranges)
151
+ for r in result.resolved
152
+ ]
153
+
154
+
155
+ @dataclass
156
+ class _LoopState:
157
+ search_count: int = 0
158
+ last_turn: int = 0
159
+ steps: list[WarpGrepStep] = field(default_factory=list)
160
+
161
+
162
+ async def run_explore_streaming(
163
+ *,
164
+ query: str,
165
+ repo_root: str,
166
+ provider: WarpGrepProvider,
167
+ api_key: str,
168
+ api_url: str | None = None,
169
+ thoroughness: ExploreThoroughness = DEFAULT_THOROUGHNESS,
170
+ max_turns: int | None = None,
171
+ timeout: float | None = None,
172
+ max_retries: int = 3,
173
+ ) -> AsyncIterator[ExploreStepEvent | ExploreMessageEvent | ExploreRunResult]:
174
+ """Drive the WarpGrep agent for ``query`` and stream exploration events.
175
+
176
+ Yields an :class:`ExploreStepEvent` per WarpGrep turn, a final ``is_final``
177
+ step, then a single :class:`ExploreRunResult` (the last item). Python
178
+ generators cannot carry a separate return value, so the terminal result is
179
+ yielded last; consumers distinguish by type.
180
+ """
181
+ start = time.monotonic()
182
+ state = _LoopState()
183
+ cap = _max_turns(thoroughness, max_turns)
184
+
185
+ try:
186
+ async for item in run_warp_grep_streaming(
187
+ search_term=query,
188
+ repo_root=repo_root,
189
+ provider=provider,
190
+ api_key=api_key,
191
+ api_url=api_url,
192
+ timeout=timeout,
193
+ max_turns=cap,
194
+ max_retries=max_retries,
195
+ ):
196
+ if isinstance(item, WarpGrepStep):
197
+ state.search_count += _count_searches(item)
198
+ state.last_turn = item.turn
199
+ state.steps.append(item)
200
+ yield ExploreStepEvent(
201
+ step=item.turn,
202
+ search_request=_step_label(item),
203
+ contexts_found=0,
204
+ is_final=False,
205
+ )
206
+ continue
207
+
208
+ # Terminal AgentRunResult.
209
+ contexts = _deduplicate_contexts(_contexts_from_result(item))
210
+ yield ExploreStepEvent(
211
+ step=state.last_turn,
212
+ search_request="",
213
+ contexts_found=len(contexts),
214
+ is_final=True,
215
+ )
216
+ if item.termination_reason == "completed":
217
+ summary = item.finish_payload or "Exploration completed."
218
+ yield ExploreRunResult(
219
+ success=True,
220
+ summary=summary,
221
+ contexts=contexts,
222
+ search_count=state.search_count,
223
+ duration_ms=(time.monotonic() - start) * 1000,
224
+ )
225
+ else:
226
+ error = item.errors[0]["message"] if item.errors else None
227
+ yield ExploreRunResult(
228
+ success=False,
229
+ summary=item.finish_payload or "",
230
+ contexts=contexts,
231
+ search_count=state.search_count,
232
+ duration_ms=(time.monotonic() - start) * 1000,
233
+ error=error,
234
+ )
235
+ return
236
+ except Exception as err: # noqa: BLE001 - surface as an error result, like TS
237
+ yield ExploreRunResult(
238
+ success=False,
239
+ summary="",
240
+ contexts=[],
241
+ search_count=state.search_count,
242
+ duration_ms=(time.monotonic() - start) * 1000,
243
+ error=str(err),
244
+ )
245
+
246
+
247
+ async def run_explore(
248
+ *,
249
+ query: str,
250
+ repo_root: str,
251
+ provider: WarpGrepProvider,
252
+ api_key: str,
253
+ api_url: str | None = None,
254
+ thoroughness: ExploreThoroughness = DEFAULT_THOROUGHNESS,
255
+ max_turns: int | None = None,
256
+ timeout: float | None = None,
257
+ max_retries: int = 3,
258
+ ) -> ExploreRunResult:
259
+ """Non-streaming convenience wrapper: drain the streaming loop and return the
260
+ terminal :class:`ExploreRunResult`."""
261
+ result: ExploreRunResult | None = None
262
+ async for item in run_explore_streaming(
263
+ query=query,
264
+ repo_root=repo_root,
265
+ provider=provider,
266
+ api_key=api_key,
267
+ api_url=api_url,
268
+ thoroughness=thoroughness,
269
+ max_turns=max_turns,
270
+ timeout=timeout,
271
+ max_retries=max_retries,
272
+ ):
273
+ if isinstance(item, ExploreRunResult):
274
+ result = item
275
+ assert result is not None # the loop always yields a terminal result
276
+ return result
@@ -0,0 +1,57 @@
1
+ """GitHub repo resolution for WarpGrep code search.
2
+
3
+ Faithful port of ``parseGitHubUrl`` (``utils/github.ts``) and
4
+ ``_resolveGitHubRepo`` (``client.ts``): parse an ``owner/repo`` shorthand or URL,
5
+ then hit ``GET {base}/api/code-search/get-or-create?url=owner/repo`` to obtain the
6
+ indexed ``repoId`` + ``defaultBranch`` used to build the code-storage commands.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import re
12
+
13
+ import httpx
14
+
15
+ _URL_PATTERN = re.compile(r"^(?:https?://)?github\.com/([^/]+)/([^/?#]+)")
16
+ _SHORT_PATTERN = re.compile(r"^([^/]+)/([^/]+)$")
17
+
18
+
19
+ def parse_github_url(value: str) -> tuple[str, str]:
20
+ """Parse ``owner/repo``, ``github.com/owner/repo``, or a full URL.
21
+
22
+ Returns ``(owner, repo)`` with any trailing ``.git`` stripped. Rejects an
23
+ ``owner`` containing ``.`` in the shorthand form (mirrors the TS guard).
24
+ """
25
+ value = value.strip()
26
+ match = _URL_PATTERN.match(value)
27
+ if match:
28
+ return match.group(1), re.sub(r"\.git$", "", match.group(2))
29
+ short = _SHORT_PATTERN.match(value)
30
+ if short and "." not in short.group(1):
31
+ return short.group(1), re.sub(r"\.git$", "", short.group(2))
32
+ raise ValueError(
33
+ f'Invalid GitHub URL or shorthand: "{value}". '
34
+ 'Expected "owner/repo" or "https://github.com/owner/repo"'
35
+ )
36
+
37
+
38
+ async def resolve_github_repo(
39
+ github: str,
40
+ base_url: str,
41
+ *,
42
+ timeout: float | None = None,
43
+ ) -> tuple[str, str, str]:
44
+ """Resolve a GitHub reference to ``(repo, repo_id, default_branch)``.
45
+
46
+ Calls ``get-or-create`` to import/locate the indexed repo, exactly like the
47
+ TS ``_resolveGitHubRepo``.
48
+ """
49
+ owner, repo = parse_github_url(github)
50
+ url = f"{base_url}/api/code-search/get-or-create"
51
+ async with httpx.AsyncClient(timeout=httpx.Timeout(timeout or 30.0)) as client:
52
+ res = await client.get(url, params={"url": f"{owner}/{repo}"})
53
+ if not res.is_success:
54
+ text = res.text or res.reason_phrase
55
+ raise RuntimeError(f"Failed to import repo {owner}/{repo}: {text}")
56
+ data = res.json()
57
+ return repo, str(data["repoId"]), str(data["defaultBranch"])
@@ -0,0 +1,133 @@
1
+ """Conversation-assembly helpers for the WarpGrep agent loop.
2
+
3
+ Faithful port of ``agent/helpers.ts``: the initial-state prompt, the turn-counter
4
+ hint, the context-budget tag, and the hard context-limit enforcement. The exact
5
+ string formats matter -- they match the model's training distribution.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import math
11
+ import os
12
+
13
+ from morphsdk._providers.base import WarpGrepProvider
14
+
15
+ from .config import MAX_CONTEXT_CHARS, MAX_OUTPUT_LINES, MAX_TURNS
16
+ from .types import ChatMessage
17
+
18
+ _TRUNCATED_MARKER = "[truncated for context limit]"
19
+
20
+
21
+ def _message_size(m: ChatMessage) -> int:
22
+ """Character size of a message (mirrors ``getMessageSize``)."""
23
+ if m.role == "tool":
24
+ return len(m.content or "")
25
+ if m.role == "assistant":
26
+ size = len(m.content) if isinstance(m.content, str) else 0
27
+ if m.tool_calls:
28
+ size += sum(len(tc.name) + len(tc.arguments) for tc in m.tool_calls)
29
+ return size
30
+ return len(m.content or "")
31
+
32
+
33
+ def format_turn_message(turns_used: int, max_turns: int) -> str:
34
+ """Format the turn-counter hint appended after each tool round."""
35
+ turns_remaining = max_turns - turns_used
36
+ if turns_remaining == 1:
37
+ return (
38
+ f"\nYou have used {turns_used} turns, you only have 1 turn remaining. "
39
+ "You have run out of turns to explore the code base and MUST call the "
40
+ "finish tool now"
41
+ )
42
+ plural = "" if turns_used == 1 else "s"
43
+ return f"\nYou have used {turns_used} turn{plural} and have {turns_remaining} remaining"
44
+
45
+
46
+ def calculate_context_budget(messages: list[ChatMessage]) -> str:
47
+ """Format the ``<context_budget>`` indicator (mirrors ``calculateContextBudget``)."""
48
+ total_chars = sum(_message_size(m) for m in messages)
49
+ max_chars = MAX_CONTEXT_CHARS
50
+ percent = math.floor((total_chars / max_chars) * 100)
51
+ used_k = math.floor(total_chars / 1000)
52
+ max_k = math.floor(max_chars / 1000)
53
+ return f"<context_budget>{percent}% ({used_k}K/{max_k}K chars)</context_budget>"
54
+
55
+
56
+ async def build_initial_state(
57
+ repo_root: str,
58
+ search_term: str,
59
+ provider: WarpGrepProvider,
60
+ *,
61
+ search_type: str | None = None,
62
+ ) -> str:
63
+ """Build the first user message: repo structure + search string + budget/turn.
64
+
65
+ Faithful port of ``buildInitialState`` -- a flat list of absolute paths
66
+ (repo root first), matching the training format, then the search string and
67
+ the budget/turn tags. Falls back to a root-only structure on listing errors.
68
+ """
69
+ budget = calculate_context_budget([])
70
+ turn_tag = f"You have used 0 turns and have {MAX_TURNS} remaining"
71
+ tree_depth = 1 if search_type == "node_modules" else 2
72
+ abs_root = repo_root
73
+
74
+ try:
75
+ entries = await provider.list_directory(
76
+ path=".",
77
+ max_results=MAX_OUTPUT_LINES,
78
+ max_depth=tree_depth,
79
+ )
80
+ lines = [abs_root]
81
+ for e in entries:
82
+ lines.append(os.path.join(abs_root, e.path))
83
+ structure = "\n".join(lines)
84
+ except Exception: # noqa: BLE001 - mirror TS catch -> root-only fallback
85
+ structure = abs_root
86
+
87
+ return (
88
+ f"<repo_structure>\n{structure}\n</repo_structure>\n\n"
89
+ f"<search_string>\n{search_term}\n</search_string>\n{budget}\n{turn_tag}"
90
+ )
91
+
92
+
93
+ def enforce_context_limit(
94
+ messages: list[ChatMessage],
95
+ max_chars: int = MAX_CONTEXT_CHARS,
96
+ ) -> list[ChatMessage]:
97
+ """Truncate tool/non-first-user messages in place until under *max_chars*.
98
+
99
+ Faithful port of ``enforceContextLimit``: the first user message (the search
100
+ query) and all assistant messages are preserved; tool results and later user
101
+ messages are replaced with a marker from oldest to newest until the total is
102
+ within budget.
103
+ """
104
+
105
+ def total() -> int:
106
+ return sum(_message_size(m) for m in messages)
107
+
108
+ if total() <= max_chars:
109
+ return messages
110
+
111
+ truncatable: list[int] = []
112
+ first_user_skipped = False
113
+ for i, m in enumerate(messages):
114
+ if m.role == "tool":
115
+ truncatable.append(i)
116
+ elif m.role == "user":
117
+ if not first_user_skipped:
118
+ first_user_skipped = True
119
+ continue
120
+ truncatable.append(i)
121
+
122
+ for idx in truncatable:
123
+ if total() <= max_chars:
124
+ break
125
+ m = messages[idx]
126
+ if m.role == "tool" and m.content != _TRUNCATED_MARKER:
127
+ messages[idx] = ChatMessage(
128
+ role="tool", content=_TRUNCATED_MARKER, tool_call_id=m.tool_call_id
129
+ )
130
+ elif m.role == "user" and m.content != _TRUNCATED_MARKER:
131
+ messages[idx] = ChatMessage(role="user", content=_TRUNCATED_MARKER)
132
+
133
+ return messages