PyPI - mycode-sdk - Versions diffs - 0.7.4__tar.gz → 0.7.6__tar.gz - Mend

mycode-sdk 0.7.4tar.gz → 0.7.6tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

{mycode_sdk-0.7.4 → mycode_sdk-0.7.6}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: mycode-sdk
-Version: 0.7.4
+Version: 0.7.6
 Summary: Lightweight Python SDK for building AI agents.
 Project-URL: Homepage, https://github.com/legibet/mycode
 Project-URL: Repository, https://github.com/legibet/mycode

{mycode_sdk-0.7.4 → mycode_sdk-0.7.6}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "mycode-sdk"
-version = "0.7.4"
+version = "0.7.6"
 description = "Lightweight Python SDK for building AI agents."
 readme = "README.md"
 requires-python = ">=3.12"

{mycode_sdk-0.7.4 → mycode_sdk-0.7.6}/src/mycode/agent.py RENAMED Viewed

@@ -183,7 +183,7 @@ class Agent:
             supports_pdf_input=supports_pdf_input,
         )
         self.max_tokens: int = meta.max_output_tokens or 16_384
-        self.context_window: int | None = meta.context_window or 128_000
+        self.context_window: int = meta.context_window or 128_000
         self.supports_reasoning: bool | None = meta.supports_reasoning
         self.supports_image_input: bool = bool(meta.supports_image_input)
         self.supports_pdf_input: bool = bool(meta.supports_pdf_input)
@@ -570,62 +570,85 @@ class Agent:
                     block["meta"] = {**meta, "duration_ms": thinking_duration_ms}
                     break
+            # Stamp context_window onto the persisted assistant message so
+            # rewinds and refreshed clients can render token-usage % without
+            # re-resolving model metadata.
+            meta = cast(dict[str, Any], assistant_message.setdefault("meta", {}))
+            meta["context_window"] = self.context_window
             self.messages.append(assistant_message)
             await persist(assistant_message)
-            # Phase 2: if the assistant requested tools, execute them locally and
-            # append one user-side tool_result message before continuing.
+            total_tokens = meta.get("total_tokens")
+            if total_tokens:
+                payload: dict[str, Any] = {
+                    "total_tokens": total_tokens,
+                    "model": meta.get("model") or self.model,
+                    "provider": meta.get("provider") or self.provider,
+                    "context_window": meta["context_window"],
+                }
+                yield Event("usage", payload)
             tool_calls = [
                 block
                 for block in assistant_message.get("content") or []
                 if isinstance(block, dict) and block.get("type") == "tool_use"
             ]
-            if not tool_calls:
-                break
+            if tool_calls:
+                tool_results: list[dict[str, Any]] = []
+                for tool_call in tool_calls:
+                    async for event in self._run_tool_call(tool_call):
+                        yield event
+                        if event.type != "tool_done":
+                            continue
+                        d = event.data
+                        output = str(d.get("output") or "")
+                        metadata = d.get("metadata") if isinstance(d.get("metadata"), dict) else None
+                        content = d.get("content")
+                        tool_results.append(
+                            tool_result_block(
+                                tool_use_id=str(d.get("tool_use_id") or ""),
+                                output=output,
+                                metadata=metadata,
+                                is_error=bool(d.get("is_error")),
+                                content=content if isinstance(content, list) else None,
+                            )
+                        )
-            tool_results: list[dict[str, Any]] = []
-            for tool_call in tool_calls:
-                async for event in self._run_tool_call(tool_call):
-                    yield event
+                        if self._cancel_event.is_set():
+                            tool_result_message = build_message("user", tool_results)
+                            self.messages.append(tool_result_message)
+                            await persist(tool_result_message)
+                            return
-                    if event.type != "tool_done":
-                        continue
+                tool_result_message = build_message("user", tool_results)
+                self.messages.append(tool_result_message)
+                await persist(tool_result_message)
-                    d = event.data
-                    output = str(d.get("output") or "")
-                    metadata = d.get("metadata") if isinstance(d.get("metadata"), dict) else None
-                    content = d.get("content")
-                    tool_results.append(
-                        tool_result_block(
-                            tool_use_id=str(d.get("tool_use_id") or ""),
-                            output=output,
-                            metadata=metadata,
-                            is_error=bool(d.get("is_error")),
-                            content=content if isinstance(content, list) else None,
-                        )
+            if self._cancel_event.is_set():
+                return
+            if should_compact(total_tokens, self.context_window, self.compact_threshold):
+                try:
+                    async for event in self._compact(adapter, persist, continue_now=bool(tool_calls)):
+                        yield event
+                except asyncio.CancelledError:
+                    raise
+                except Exception:
+                    logger.warning(
+                        "Context compaction failed, continuing without compaction",
+                        exc_info=True,
                     )
-                    if self._cancel_event.is_set():
-                        tool_result_message = build_message("user", tool_results)
-                        self.messages.append(tool_result_message)
-                        await persist(tool_result_message)
-                        return
-            tool_result_message = build_message("user", tool_results)
-            self.messages.append(tool_result_message)
-            await persist(tool_result_message)
+            if not tool_calls:
+                break
         else:
             # while loop exhausted max_turns without breaking
             yield Event("error", {"message": "max_turns reached"})
             return
-        # Turn completed normally (assistant stopped calling tools).
-        # Check whether context compaction is needed.
-        if not self._cancel_event.is_set():
-            async for event in self._compact_if_needed(adapter, persist):
-                yield event
     def run(
         self,
         user_input: str | ConversationMessage,
@@ -657,32 +680,12 @@ class Agent:
     # Context compaction
     # ------------------------------------------------------------------
-    async def _compact_if_needed(
-        self,
-        adapter: ProviderAdapter,
-        persist: PersistCallback,
-    ) -> AsyncIterator[Event]:
-        """Check token usage and run compaction if above threshold."""
-        usage: dict[str, Any] | None = None
-        for message in reversed(self.messages):
-            if message.get("role") == "assistant":
-                usage = (message.get("meta") or {}).get("usage")
-                break
-        if not should_compact(usage, self.context_window, self.compact_threshold):
-            return
-        try:
-            async for event in self._compact(adapter, persist):
-                yield event
-        except (Exception, asyncio.CancelledError):
-            logger.warning("Context compaction failed, continuing without compaction", exc_info=True)
     async def _compact(
         self,
         adapter: ProviderAdapter,
         persist: PersistCallback,
+        *,
+        continue_now: bool,
     ) -> AsyncIterator[Event]:
         """Generate a conversation summary and replace in-memory messages."""
@@ -712,29 +715,30 @@ class Agent:
                     summary_message = msg
         if not summary_message:
-            logger.warning("Compaction produced no response")
-            return
+            raise ValueError("compaction produced no response")
         summary_text = flatten_message_text(summary_message, include_thinking=False)
         if not summary_text:
-            logger.warning("Compaction produced empty summary")
-            return
+            raise ValueError("compaction produced empty summary")
-        summary_usage = (summary_message.get("meta") or {}).get("usage")
+        summary_total_tokens = (summary_message.get("meta") or {}).get("total_tokens")
         compact_event = build_compact_event(
             summary_text,
             provider=self.provider,
             model=self.model,
             compacted_count=compacted_count,
-            usage=summary_usage,
+            total_tokens=summary_total_tokens,
         )
         # Persist the compact event (append-only — original messages stay in JSONL).
         await persist(compact_event)
-        # Rebuild in-memory messages from the compact event.
         self.messages.append(compact_event)
-        self.messages = apply_compact(self.messages)
+        self.messages = apply_compact(
+            self.messages,
+            transcript_path=str(self._store.messages_path(self.session_id)) if self._store else None,
+            continue_now=continue_now,
+        )
         yield Event(
             "compact",

{mycode_sdk-0.7.4 → mycode_sdk-0.7.6}/src/mycode/messages.py RENAMED Viewed

@@ -12,7 +12,8 @@ details.
 Metadata contract:
 - assistant message `meta` keeps normalized top-level fields only:
-  `provider`, `model`, `provider_message_id`, `stop_reason`, `usage`
+  `provider`, `model`, `provider_message_id`, `stop_reason`, `total_tokens`,
+  `context_window` (see docs/sessions.md for `total_tokens` semantics)
 - provider-specific assistant message extras live under `meta.native`
 - provider-specific block replay hints live under `block.meta.native`
 - local display metadata, such as `block.meta.duration_ms`, is never sent
@@ -146,7 +147,7 @@ def assistant_message(
     model: str | None = None,
     provider_message_id: str | None = None,
     stop_reason: str | None = None,
-    usage: Any = None,
+    total_tokens: int | None = None,
     native_meta: dict[str, Any] | None = None,
 ) -> ConversationMessage:
     """Build a normalized assistant message with shared metadata fields."""
@@ -160,8 +161,8 @@ def assistant_message(
         meta["provider_message_id"] = provider_message_id
     if stop_reason:
         meta["stop_reason"] = stop_reason
-    if usage is not None:
-        meta["usage"] = usage
+    if total_tokens is not None:
+        meta["total_tokens"] = total_tokens
     if native_meta:
         native = omit_none(native_meta)
         if native:

{mycode_sdk-0.7.4 → mycode_sdk-0.7.6}/src/mycode/models_catalog.json RENAMED Viewed

@@ -794,6 +794,13 @@
       "supports_pdf_input": true,
       "supports_reasoning": true
     },
+    "gpt-5.5-pro": {
+      "context_window": 1050000,
+      "max_output_tokens": 128000,
+      "supports_image_input": true,
+      "supports_pdf_input": true,
+      "supports_reasoning": true
+    },
     "gpt-image-1": {
       "context_window": 0,
       "max_output_tokens": 0,
@@ -1545,6 +1552,13 @@
       "supports_pdf_input": false,
       "supports_reasoning": true
     },
+    "nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free": {
+      "context_window": 256000,
+      "max_output_tokens": 65536,
+      "supports_image_input": true,
+      "supports_pdf_input": false,
+      "supports_reasoning": true
+    },
     "nvidia/nemotron-3-super-120b-a12b": {
       "context_window": 262144,
       "max_output_tokens": 262144,
@@ -1755,6 +1769,13 @@
       "supports_pdf_input": true,
       "supports_reasoning": true
     },
+    "openai/gpt-5.5-pro": {
+      "context_window": 1050000,
+      "max_output_tokens": 128000,
+      "supports_image_input": true,
+      "supports_pdf_input": true,
+      "supports_reasoning": true
+    },
     "openai/gpt-oss-120b": {
       "context_window": 131072,
       "max_output_tokens": 32768,

{mycode_sdk-0.7.4 → mycode_sdk-0.7.6}/src/mycode/providers/anthropic_like.py RENAMED Viewed

@@ -219,13 +219,24 @@ class AnthropicLikeAdapter(ProviderAdapter):
             native_meta["stop_sequence"] = stop_sequence
         if service_tier := getattr(message, "service_tier", None):
             native_meta["service_tier"] = service_tier
+        # No `total_tokens` field — compute it from input + cache + output parts.
+        raw_usage = dump_model(getattr(message, "usage", None)) or {}
+        prompt_tokens = (
+            (raw_usage.get("input_tokens") or 0)
+            + (raw_usage.get("cache_creation_input_tokens") or 0)
+            + (raw_usage.get("cache_read_input_tokens") or 0)
+        )
+        output_tokens = raw_usage.get("output_tokens") or 0
+        total_tokens = prompt_tokens + output_tokens or None
         return assistant_message(
             blocks,
             provider=self.provider_id,
             model=getattr(message, "model", None),
             provider_message_id=getattr(message, "id", None),
             stop_reason=getattr(message, "stop_reason", None),
-            usage=dump_model(getattr(message, "usage", None)),
+            total_tokens=total_tokens,
             native_meta=native_meta,
         )

{mycode_sdk-0.7.4 → mycode_sdk-0.7.6}/src/mycode/providers/gemini.py RENAMED Viewed

@@ -92,6 +92,9 @@ class GoogleGeminiAdapter(ProviderAdapter):
             except Exception:
                 pass
+        raw_usage = usage or {}
+        total_tokens = raw_usage.get("total_token_count") or None
         yield ProviderStreamEvent(
             "message_done",
             {
@@ -101,7 +104,7 @@ class GoogleGeminiAdapter(ProviderAdapter):
                     model=response_model or request.model,
                     provider_message_id=response_id,
                     stop_reason=str(finish_reason) if finish_reason else None,
-                    usage=usage,
+                    total_tokens=total_tokens,
                     native_meta={"finish_message": str(finish_message)} if finish_message else None,
                 )
             },

{mycode_sdk-0.7.4 → mycode_sdk-0.7.6}/src/mycode/providers/openai_chat.py RENAMED Viewed

@@ -135,13 +135,16 @@ class OpenAIChatAdapter(ProviderAdapter):
                 )
             )
+        raw_usage = dump_model(usage) or {}
+        total_tokens = raw_usage.get("total_tokens") or None
         final_message = assistant_message(
             blocks,
             provider=self.provider_id,
             model=response_model or request.model,
             provider_message_id=response_id,
             stop_reason=finish_reason,
-            usage=dump_model(usage),
+            total_tokens=total_tokens,
         )
         yield ProviderStreamEvent("message_done", {"message": final_message})

{mycode_sdk-0.7.4 → mycode_sdk-0.7.6}/src/mycode/providers/openai_responses.py RENAMED Viewed

@@ -361,12 +361,15 @@ class OpenAIResponsesAdapter(ProviderAdapter):
                     )
                 )
+        raw_usage = dump_model(getattr(response, "usage", None)) or {}
+        total_tokens = raw_usage.get("total_tokens") or None
         return assistant_message(
             blocks,
             provider=self.provider_id,
             model=getattr(response, "model", None),
             provider_message_id=getattr(response, "id", None),
             stop_reason=getattr(response, "status", None),
-            usage=dump_model(getattr(response, "usage", None)),
+            total_tokens=total_tokens,
             native_meta={"output_items": dumped_output_items} if dumped_output_items else None,
         )

{mycode_sdk-0.7.4 → mycode_sdk-0.7.6}/src/mycode/session.py RENAMED Viewed

@@ -35,26 +35,38 @@ capture everything needed to continue the work seamlessly.
 Include:
-1. **User Requests**: Every distinct request or instruction the user gave, \
+1. **Task and Intent**: Describe the user's overall goal — what is being \
+built, fixed, or investigated, and why.
+2. **Decisions and Constraints**: List the decisions made, constraints \
+discovered, and approaches chosen or rejected, with the reasoning behind \
+each.
+3. **User Requests**: Every distinct request or instruction the user gave, \
 in chronological order. Preserve the user's original wording for ambiguous \
 or nuanced requests.
-2. **Completed Work**: What was accomplished — files created, modified, or \
-deleted; bugs fixed; features added. Include file paths and function names.
-3. **Current State**: The exact state of the work right now — what is working, \
-what is broken, what is partially done.
-4. **Key Decisions**: Important decisions made, constraints discovered, \
-approaches chosen or rejected, and why.
-5. **Next Steps**: What remains to be done, any work that was in progress \
-when this summary was generated.
+4. **Files and Changes**: Enumerate every file read, modified, or created \
+— paths, what changed, and any code snippets the next turn will need to \
+reason about, quoted verbatim.
+5. **Errors and Fixes**: List errors encountered with the original message \
+verbatim, the cause if known, and the resolution — or that it remains open.
+6. **Current State**: What is verified working, what is known broken, what \
+is in progress.
+7. **Next Step**: The next step to take, with a direct quote from the most \
+recent conversation showing where the work left off.
 Rules:
-- Be specific: include file paths, function names, error messages, and \
-concrete details.
+- Be specific: reproduce file paths, function names, error messages, and \
+other identifiers verbatim — never paraphrase them.
 - Do not add suggestions or opinions — only summarize what happened.
 - Keep it concise but complete.\
 """
-_COMPACT_ACK = "Understood. I have the context from the conversation summary and will continue the work."
+_CONTINUATION_HEADER = "This session is being continued from a previous conversation that was compacted to fit the context window. The summary below covers the earlier portion of the conversation."
+_TRANSCRIPT_HINT = "For verbatim details not captured in this summary (exact code snippets, error messages, or earlier output), read the original conversation log at: {path}"
+_CONTINUATION_FOOTER = 'Resume directly from where the work left off. Do not acknowledge this summary, do not recap, and do not preface with "I\'ll continue" or similar.'
+_COMPACT_ACK = "Acknowledged."
 # ---------------------------------------------------------------------
@@ -67,20 +79,20 @@ def _now() -> str:
 def should_compact(
-    last_usage: dict[str, Any] | None,
+    last_total_tokens: int | None,
     context_window: int | None,
     threshold: float,
 ) -> bool:
-    """Return True when the last response input tokens exceed the threshold."""
+    """True when the latest call's `total_tokens` ≥ `context_window × threshold`.
-    if not last_usage or not context_window or threshold <= 0:
-        return False
+    `total_tokens` already covers the next API call's prompt floor, so it is
+    the right input here. The `(1 - threshold)` headroom is reserved for the
+    compact LLM call itself (see docs/sessions.md).
+    """
-    # Providers report prompt/input usage under slightly different field names.
-    input_tokens = int(
-        last_usage.get("input_tokens") or last_usage.get("prompt_tokens") or last_usage.get("prompt_token_count") or 0
-    )
-    return input_tokens >= context_window * threshold
+    if not last_total_tokens or not context_window or threshold <= 0:
+        return False
+    return last_total_tokens >= context_window * threshold
 def build_compact_event(
@@ -89,7 +101,7 @@ def build_compact_event(
     provider: str,
     model: str,
     compacted_count: int,
-    usage: dict[str, Any] | None = None,
+    total_tokens: int | None = None,
 ) -> ConversationMessage:
     """Build the compact event stored in session JSONL."""
@@ -98,13 +110,22 @@ def build_compact_event(
         "model": model,
         "compacted_count": compacted_count,
     }
-    if usage is not None:
-        meta["usage"] = usage
+    if total_tokens is not None:
+        meta["total_tokens"] = total_tokens
     return build_message("compact", [text_block(summary_text)], meta=meta)
-def apply_compact(messages: list[ConversationMessage]) -> list[ConversationMessage]:
-    """Replace the latest compact event with a summary + synthetic ack."""
+def apply_compact(
+    messages: list[ConversationMessage],
+    *,
+    transcript_path: str | None = None,
+    continue_now: bool | None = None,
+) -> list[ConversationMessage]:
+    """Replace the latest compact event with a synthetic summary view.
+    ``continue_now`` omits the ack and leaves a user instruction last so the
+    agent loop can immediately request the next assistant response.
+    """
     # Only the newest compact event matters. Older history before it is no
     # longer visible once the summary replaces that earlier conversation.
@@ -122,15 +143,23 @@ def apply_compact(messages: list[ConversationMessage]) -> list[ConversationMessa
             summary_text = str(block.get("text") or "")
             break
-    return [
-        build_message(
-            "user",
-            [text_block(f"[Conversation Summary]\n\n{summary_text}")],
-            meta={"synthetic": True},
-        ),
-        build_message("assistant", [text_block(_COMPACT_ACK)], meta={"synthetic": True}),
-        *messages[last_compact_index + 1 :],
-    ]
+    tail = messages[last_compact_index + 1 :]
+    if continue_now is None:
+        # During live tool-loop compaction the next persisted message is the
+        # assistant continuation. Waiting compaction has no tail yet.
+        continue_now = bool(tail and tail[0].get("role") == "assistant")
+    parts = [_CONTINUATION_HEADER, summary_text]
+    if transcript_path:
+        parts.append(_TRANSCRIPT_HINT.format(path=transcript_path))
+    if continue_now:
+        parts.append(_CONTINUATION_FOOTER)
+    result = [build_message("user", [text_block("\n\n".join(parts))], meta={"synthetic": True})]
+    if not continue_now:
+        result.append(build_message("assistant", [text_block(_COMPACT_ACK)], meta={"synthetic": True}))
+    result.extend(tail)
+    return result
 def build_rewind_event(rewind_to: int) -> ConversationMessage:
@@ -317,7 +346,10 @@ class SessionStore:
         # 2) rewind truncates that visible list by message index
         # Orphan tool_use blocks (e.g. left open by a server crash) are
         # closed by the provider adapter at replay time, not here.
-        visible_messages = apply_compact(raw_messages)
+        visible_messages = apply_compact(
+            raw_messages,
+            transcript_path=str(self.messages_path(session_id)),
+        )
         visible_messages = apply_rewind(visible_messages)
         return {"session": self._summary(session_id, meta), "messages": visible_messages}