mycode-sdk 0.7.4__tar.gz → 0.7.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: mycode-sdk
3
- Version: 0.7.4
3
+ Version: 0.7.6
4
4
  Summary: Lightweight Python SDK for building AI agents.
5
5
  Project-URL: Homepage, https://github.com/legibet/mycode
6
6
  Project-URL: Repository, https://github.com/legibet/mycode
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "mycode-sdk"
7
- version = "0.7.4"
7
+ version = "0.7.6"
8
8
  description = "Lightweight Python SDK for building AI agents."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.12"
@@ -183,7 +183,7 @@ class Agent:
183
183
  supports_pdf_input=supports_pdf_input,
184
184
  )
185
185
  self.max_tokens: int = meta.max_output_tokens or 16_384
186
- self.context_window: int | None = meta.context_window or 128_000
186
+ self.context_window: int = meta.context_window or 128_000
187
187
  self.supports_reasoning: bool | None = meta.supports_reasoning
188
188
  self.supports_image_input: bool = bool(meta.supports_image_input)
189
189
  self.supports_pdf_input: bool = bool(meta.supports_pdf_input)
@@ -570,62 +570,85 @@ class Agent:
570
570
  block["meta"] = {**meta, "duration_ms": thinking_duration_ms}
571
571
  break
572
572
 
573
+ # Stamp context_window onto the persisted assistant message so
574
+ # rewinds and refreshed clients can render token-usage % without
575
+ # re-resolving model metadata.
576
+ meta = cast(dict[str, Any], assistant_message.setdefault("meta", {}))
577
+ meta["context_window"] = self.context_window
578
+
573
579
  self.messages.append(assistant_message)
574
580
  await persist(assistant_message)
575
581
 
576
- # Phase 2: if the assistant requested tools, execute them locally and
577
- # append one user-side tool_result message before continuing.
582
+ total_tokens = meta.get("total_tokens")
583
+ if total_tokens:
584
+ payload: dict[str, Any] = {
585
+ "total_tokens": total_tokens,
586
+ "model": meta.get("model") or self.model,
587
+ "provider": meta.get("provider") or self.provider,
588
+ "context_window": meta["context_window"],
589
+ }
590
+ yield Event("usage", payload)
591
+
578
592
  tool_calls = [
579
593
  block
580
594
  for block in assistant_message.get("content") or []
581
595
  if isinstance(block, dict) and block.get("type") == "tool_use"
582
596
  ]
583
- if not tool_calls:
584
- break
597
+ if tool_calls:
598
+ tool_results: list[dict[str, Any]] = []
599
+ for tool_call in tool_calls:
600
+ async for event in self._run_tool_call(tool_call):
601
+ yield event
602
+
603
+ if event.type != "tool_done":
604
+ continue
605
+
606
+ d = event.data
607
+ output = str(d.get("output") or "")
608
+ metadata = d.get("metadata") if isinstance(d.get("metadata"), dict) else None
609
+ content = d.get("content")
610
+ tool_results.append(
611
+ tool_result_block(
612
+ tool_use_id=str(d.get("tool_use_id") or ""),
613
+ output=output,
614
+ metadata=metadata,
615
+ is_error=bool(d.get("is_error")),
616
+ content=content if isinstance(content, list) else None,
617
+ )
618
+ )
585
619
 
586
- tool_results: list[dict[str, Any]] = []
587
- for tool_call in tool_calls:
588
- async for event in self._run_tool_call(tool_call):
589
- yield event
620
+ if self._cancel_event.is_set():
621
+ tool_result_message = build_message("user", tool_results)
622
+ self.messages.append(tool_result_message)
623
+ await persist(tool_result_message)
624
+ return
590
625
 
591
- if event.type != "tool_done":
592
- continue
626
+ tool_result_message = build_message("user", tool_results)
627
+ self.messages.append(tool_result_message)
628
+ await persist(tool_result_message)
593
629
 
594
- d = event.data
595
- output = str(d.get("output") or "")
596
- metadata = d.get("metadata") if isinstance(d.get("metadata"), dict) else None
597
- content = d.get("content")
598
- tool_results.append(
599
- tool_result_block(
600
- tool_use_id=str(d.get("tool_use_id") or ""),
601
- output=output,
602
- metadata=metadata,
603
- is_error=bool(d.get("is_error")),
604
- content=content if isinstance(content, list) else None,
605
- )
630
+ if self._cancel_event.is_set():
631
+ return
632
+ if should_compact(total_tokens, self.context_window, self.compact_threshold):
633
+ try:
634
+ async for event in self._compact(adapter, persist, continue_now=bool(tool_calls)):
635
+ yield event
636
+ except asyncio.CancelledError:
637
+ raise
638
+ except Exception:
639
+ logger.warning(
640
+ "Context compaction failed, continuing without compaction",
641
+ exc_info=True,
606
642
  )
607
643
 
608
- if self._cancel_event.is_set():
609
- tool_result_message = build_message("user", tool_results)
610
- self.messages.append(tool_result_message)
611
- await persist(tool_result_message)
612
- return
613
-
614
- tool_result_message = build_message("user", tool_results)
615
- self.messages.append(tool_result_message)
616
- await persist(tool_result_message)
644
+ if not tool_calls:
645
+ break
617
646
 
618
647
  else:
619
648
  # while loop exhausted max_turns without breaking
620
649
  yield Event("error", {"message": "max_turns reached"})
621
650
  return
622
651
 
623
- # Turn completed normally (assistant stopped calling tools).
624
- # Check whether context compaction is needed.
625
- if not self._cancel_event.is_set():
626
- async for event in self._compact_if_needed(adapter, persist):
627
- yield event
628
-
629
652
  def run(
630
653
  self,
631
654
  user_input: str | ConversationMessage,
@@ -657,32 +680,12 @@ class Agent:
657
680
  # Context compaction
658
681
  # ------------------------------------------------------------------
659
682
 
660
- async def _compact_if_needed(
661
- self,
662
- adapter: ProviderAdapter,
663
- persist: PersistCallback,
664
- ) -> AsyncIterator[Event]:
665
- """Check token usage and run compaction if above threshold."""
666
-
667
- usage: dict[str, Any] | None = None
668
- for message in reversed(self.messages):
669
- if message.get("role") == "assistant":
670
- usage = (message.get("meta") or {}).get("usage")
671
- break
672
-
673
- if not should_compact(usage, self.context_window, self.compact_threshold):
674
- return
675
-
676
- try:
677
- async for event in self._compact(adapter, persist):
678
- yield event
679
- except (Exception, asyncio.CancelledError):
680
- logger.warning("Context compaction failed, continuing without compaction", exc_info=True)
681
-
682
683
  async def _compact(
683
684
  self,
684
685
  adapter: ProviderAdapter,
685
686
  persist: PersistCallback,
687
+ *,
688
+ continue_now: bool,
686
689
  ) -> AsyncIterator[Event]:
687
690
  """Generate a conversation summary and replace in-memory messages."""
688
691
 
@@ -712,29 +715,30 @@ class Agent:
712
715
  summary_message = msg
713
716
 
714
717
  if not summary_message:
715
- logger.warning("Compaction produced no response")
716
- return
718
+ raise ValueError("compaction produced no response")
717
719
 
718
720
  summary_text = flatten_message_text(summary_message, include_thinking=False)
719
721
  if not summary_text:
720
- logger.warning("Compaction produced empty summary")
721
- return
722
+ raise ValueError("compaction produced empty summary")
722
723
 
723
- summary_usage = (summary_message.get("meta") or {}).get("usage")
724
+ summary_total_tokens = (summary_message.get("meta") or {}).get("total_tokens")
724
725
  compact_event = build_compact_event(
725
726
  summary_text,
726
727
  provider=self.provider,
727
728
  model=self.model,
728
729
  compacted_count=compacted_count,
729
- usage=summary_usage,
730
+ total_tokens=summary_total_tokens,
730
731
  )
731
732
 
732
733
  # Persist the compact event (append-only — original messages stay in JSONL).
733
734
  await persist(compact_event)
734
735
 
735
- # Rebuild in-memory messages from the compact event.
736
736
  self.messages.append(compact_event)
737
- self.messages = apply_compact(self.messages)
737
+ self.messages = apply_compact(
738
+ self.messages,
739
+ transcript_path=str(self._store.messages_path(self.session_id)) if self._store else None,
740
+ continue_now=continue_now,
741
+ )
738
742
 
739
743
  yield Event(
740
744
  "compact",
@@ -12,7 +12,8 @@ details.
12
12
  Metadata contract:
13
13
 
14
14
  - assistant message `meta` keeps normalized top-level fields only:
15
- `provider`, `model`, `provider_message_id`, `stop_reason`, `usage`
15
+ `provider`, `model`, `provider_message_id`, `stop_reason`, `total_tokens`,
16
+ `context_window` (see docs/sessions.md for `total_tokens` semantics)
16
17
  - provider-specific assistant message extras live under `meta.native`
17
18
  - provider-specific block replay hints live under `block.meta.native`
18
19
  - local display metadata, such as `block.meta.duration_ms`, is never sent
@@ -146,7 +147,7 @@ def assistant_message(
146
147
  model: str | None = None,
147
148
  provider_message_id: str | None = None,
148
149
  stop_reason: str | None = None,
149
- usage: Any = None,
150
+ total_tokens: int | None = None,
150
151
  native_meta: dict[str, Any] | None = None,
151
152
  ) -> ConversationMessage:
152
153
  """Build a normalized assistant message with shared metadata fields."""
@@ -160,8 +161,8 @@ def assistant_message(
160
161
  meta["provider_message_id"] = provider_message_id
161
162
  if stop_reason:
162
163
  meta["stop_reason"] = stop_reason
163
- if usage is not None:
164
- meta["usage"] = usage
164
+ if total_tokens is not None:
165
+ meta["total_tokens"] = total_tokens
165
166
  if native_meta:
166
167
  native = omit_none(native_meta)
167
168
  if native:
@@ -794,6 +794,13 @@
794
794
  "supports_pdf_input": true,
795
795
  "supports_reasoning": true
796
796
  },
797
+ "gpt-5.5-pro": {
798
+ "context_window": 1050000,
799
+ "max_output_tokens": 128000,
800
+ "supports_image_input": true,
801
+ "supports_pdf_input": true,
802
+ "supports_reasoning": true
803
+ },
797
804
  "gpt-image-1": {
798
805
  "context_window": 0,
799
806
  "max_output_tokens": 0,
@@ -1545,6 +1552,13 @@
1545
1552
  "supports_pdf_input": false,
1546
1553
  "supports_reasoning": true
1547
1554
  },
1555
+ "nvidia/nemotron-3-nano-omni-30b-a3b-reasoning:free": {
1556
+ "context_window": 256000,
1557
+ "max_output_tokens": 65536,
1558
+ "supports_image_input": true,
1559
+ "supports_pdf_input": false,
1560
+ "supports_reasoning": true
1561
+ },
1548
1562
  "nvidia/nemotron-3-super-120b-a12b": {
1549
1563
  "context_window": 262144,
1550
1564
  "max_output_tokens": 262144,
@@ -1755,6 +1769,13 @@
1755
1769
  "supports_pdf_input": true,
1756
1770
  "supports_reasoning": true
1757
1771
  },
1772
+ "openai/gpt-5.5-pro": {
1773
+ "context_window": 1050000,
1774
+ "max_output_tokens": 128000,
1775
+ "supports_image_input": true,
1776
+ "supports_pdf_input": true,
1777
+ "supports_reasoning": true
1778
+ },
1758
1779
  "openai/gpt-oss-120b": {
1759
1780
  "context_window": 131072,
1760
1781
  "max_output_tokens": 32768,
@@ -219,13 +219,24 @@ class AnthropicLikeAdapter(ProviderAdapter):
219
219
  native_meta["stop_sequence"] = stop_sequence
220
220
  if service_tier := getattr(message, "service_tier", None):
221
221
  native_meta["service_tier"] = service_tier
222
+
223
+ # No `total_tokens` field — compute it from input + cache + output parts.
224
+ raw_usage = dump_model(getattr(message, "usage", None)) or {}
225
+ prompt_tokens = (
226
+ (raw_usage.get("input_tokens") or 0)
227
+ + (raw_usage.get("cache_creation_input_tokens") or 0)
228
+ + (raw_usage.get("cache_read_input_tokens") or 0)
229
+ )
230
+ output_tokens = raw_usage.get("output_tokens") or 0
231
+ total_tokens = prompt_tokens + output_tokens or None
232
+
222
233
  return assistant_message(
223
234
  blocks,
224
235
  provider=self.provider_id,
225
236
  model=getattr(message, "model", None),
226
237
  provider_message_id=getattr(message, "id", None),
227
238
  stop_reason=getattr(message, "stop_reason", None),
228
- usage=dump_model(getattr(message, "usage", None)),
239
+ total_tokens=total_tokens,
229
240
  native_meta=native_meta,
230
241
  )
231
242
 
@@ -92,6 +92,9 @@ class GoogleGeminiAdapter(ProviderAdapter):
92
92
  except Exception:
93
93
  pass
94
94
 
95
+ raw_usage = usage or {}
96
+ total_tokens = raw_usage.get("total_token_count") or None
97
+
95
98
  yield ProviderStreamEvent(
96
99
  "message_done",
97
100
  {
@@ -101,7 +104,7 @@ class GoogleGeminiAdapter(ProviderAdapter):
101
104
  model=response_model or request.model,
102
105
  provider_message_id=response_id,
103
106
  stop_reason=str(finish_reason) if finish_reason else None,
104
- usage=usage,
107
+ total_tokens=total_tokens,
105
108
  native_meta={"finish_message": str(finish_message)} if finish_message else None,
106
109
  )
107
110
  },
@@ -135,13 +135,16 @@ class OpenAIChatAdapter(ProviderAdapter):
135
135
  )
136
136
  )
137
137
 
138
+ raw_usage = dump_model(usage) or {}
139
+ total_tokens = raw_usage.get("total_tokens") or None
140
+
138
141
  final_message = assistant_message(
139
142
  blocks,
140
143
  provider=self.provider_id,
141
144
  model=response_model or request.model,
142
145
  provider_message_id=response_id,
143
146
  stop_reason=finish_reason,
144
- usage=dump_model(usage),
147
+ total_tokens=total_tokens,
145
148
  )
146
149
  yield ProviderStreamEvent("message_done", {"message": final_message})
147
150
 
@@ -361,12 +361,15 @@ class OpenAIResponsesAdapter(ProviderAdapter):
361
361
  )
362
362
  )
363
363
 
364
+ raw_usage = dump_model(getattr(response, "usage", None)) or {}
365
+ total_tokens = raw_usage.get("total_tokens") or None
366
+
364
367
  return assistant_message(
365
368
  blocks,
366
369
  provider=self.provider_id,
367
370
  model=getattr(response, "model", None),
368
371
  provider_message_id=getattr(response, "id", None),
369
372
  stop_reason=getattr(response, "status", None),
370
- usage=dump_model(getattr(response, "usage", None)),
373
+ total_tokens=total_tokens,
371
374
  native_meta={"output_items": dumped_output_items} if dumped_output_items else None,
372
375
  )
@@ -35,26 +35,38 @@ capture everything needed to continue the work seamlessly.
35
35
 
36
36
  Include:
37
37
 
38
- 1. **User Requests**: Every distinct request or instruction the user gave, \
38
+ 1. **Task and Intent**: Describe the user's overall goal what is being \
39
+ built, fixed, or investigated, and why.
40
+ 2. **Decisions and Constraints**: List the decisions made, constraints \
41
+ discovered, and approaches chosen or rejected, with the reasoning behind \
42
+ each.
43
+ 3. **User Requests**: Every distinct request or instruction the user gave, \
39
44
  in chronological order. Preserve the user's original wording for ambiguous \
40
45
  or nuanced requests.
41
- 2. **Completed Work**: What was accomplished — files created, modified, or \
42
- deleted; bugs fixed; features added. Include file paths and function names.
43
- 3. **Current State**: The exact state of the work right now — what is working, \
44
- what is broken, what is partially done.
45
- 4. **Key Decisions**: Important decisions made, constraints discovered, \
46
- approaches chosen or rejected, and why.
47
- 5. **Next Steps**: What remains to be done, any work that was in progress \
48
- when this summary was generated.
46
+ 4. **Files and Changes**: Enumerate every file read, modified, or created \
47
+ paths, what changed, and any code snippets the next turn will need to \
48
+ reason about, quoted verbatim.
49
+ 5. **Errors and Fixes**: List errors encountered with the original message \
50
+ verbatim, the cause if known, and the resolution — or that it remains open.
51
+ 6. **Current State**: What is verified working, what is known broken, what \
52
+ is in progress.
53
+ 7. **Next Step**: The next step to take, with a direct quote from the most \
54
+ recent conversation showing where the work left off.
49
55
 
50
56
  Rules:
51
- - Be specific: include file paths, function names, error messages, and \
52
- concrete details.
57
+ - Be specific: reproduce file paths, function names, error messages, and \
58
+ other identifiers verbatim — never paraphrase them.
53
59
  - Do not add suggestions or opinions — only summarize what happened.
54
60
  - Keep it concise but complete.\
55
61
  """
56
62
 
57
- _COMPACT_ACK = "Understood. I have the context from the conversation summary and will continue the work."
63
+ _CONTINUATION_HEADER = "This session is being continued from a previous conversation that was compacted to fit the context window. The summary below covers the earlier portion of the conversation."
64
+
65
+ _TRANSCRIPT_HINT = "For verbatim details not captured in this summary (exact code snippets, error messages, or earlier output), read the original conversation log at: {path}"
66
+
67
+ _CONTINUATION_FOOTER = 'Resume directly from where the work left off. Do not acknowledge this summary, do not recap, and do not preface with "I\'ll continue" or similar.'
68
+
69
+ _COMPACT_ACK = "Acknowledged."
58
70
 
59
71
 
60
72
  # ---------------------------------------------------------------------
@@ -67,20 +79,20 @@ def _now() -> str:
67
79
 
68
80
 
69
81
  def should_compact(
70
- last_usage: dict[str, Any] | None,
82
+ last_total_tokens: int | None,
71
83
  context_window: int | None,
72
84
  threshold: float,
73
85
  ) -> bool:
74
- """Return True when the last response input tokens exceed the threshold."""
86
+ """True when the latest call's `total_tokens` `context_window × threshold`.
75
87
 
76
- if not last_usage or not context_window or threshold <= 0:
77
- return False
88
+ `total_tokens` already covers the next API call's prompt floor, so it is
89
+ the right input here. The `(1 - threshold)` headroom is reserved for the
90
+ compact LLM call itself (see docs/sessions.md).
91
+ """
78
92
 
79
- # Providers report prompt/input usage under slightly different field names.
80
- input_tokens = int(
81
- last_usage.get("input_tokens") or last_usage.get("prompt_tokens") or last_usage.get("prompt_token_count") or 0
82
- )
83
- return input_tokens >= context_window * threshold
93
+ if not last_total_tokens or not context_window or threshold <= 0:
94
+ return False
95
+ return last_total_tokens >= context_window * threshold
84
96
 
85
97
 
86
98
  def build_compact_event(
@@ -89,7 +101,7 @@ def build_compact_event(
89
101
  provider: str,
90
102
  model: str,
91
103
  compacted_count: int,
92
- usage: dict[str, Any] | None = None,
104
+ total_tokens: int | None = None,
93
105
  ) -> ConversationMessage:
94
106
  """Build the compact event stored in session JSONL."""
95
107
 
@@ -98,13 +110,22 @@ def build_compact_event(
98
110
  "model": model,
99
111
  "compacted_count": compacted_count,
100
112
  }
101
- if usage is not None:
102
- meta["usage"] = usage
113
+ if total_tokens is not None:
114
+ meta["total_tokens"] = total_tokens
103
115
  return build_message("compact", [text_block(summary_text)], meta=meta)
104
116
 
105
117
 
106
- def apply_compact(messages: list[ConversationMessage]) -> list[ConversationMessage]:
107
- """Replace the latest compact event with a summary + synthetic ack."""
118
+ def apply_compact(
119
+ messages: list[ConversationMessage],
120
+ *,
121
+ transcript_path: str | None = None,
122
+ continue_now: bool | None = None,
123
+ ) -> list[ConversationMessage]:
124
+ """Replace the latest compact event with a synthetic summary view.
125
+
126
+ ``continue_now`` omits the ack and leaves a user instruction last so the
127
+ agent loop can immediately request the next assistant response.
128
+ """
108
129
 
109
130
  # Only the newest compact event matters. Older history before it is no
110
131
  # longer visible once the summary replaces that earlier conversation.
@@ -122,15 +143,23 @@ def apply_compact(messages: list[ConversationMessage]) -> list[ConversationMessa
122
143
  summary_text = str(block.get("text") or "")
123
144
  break
124
145
 
125
- return [
126
- build_message(
127
- "user",
128
- [text_block(f"[Conversation Summary]\n\n{summary_text}")],
129
- meta={"synthetic": True},
130
- ),
131
- build_message("assistant", [text_block(_COMPACT_ACK)], meta={"synthetic": True}),
132
- *messages[last_compact_index + 1 :],
133
- ]
146
+ tail = messages[last_compact_index + 1 :]
147
+ if continue_now is None:
148
+ # During live tool-loop compaction the next persisted message is the
149
+ # assistant continuation. Waiting compaction has no tail yet.
150
+ continue_now = bool(tail and tail[0].get("role") == "assistant")
151
+
152
+ parts = [_CONTINUATION_HEADER, summary_text]
153
+ if transcript_path:
154
+ parts.append(_TRANSCRIPT_HINT.format(path=transcript_path))
155
+ if continue_now:
156
+ parts.append(_CONTINUATION_FOOTER)
157
+
158
+ result = [build_message("user", [text_block("\n\n".join(parts))], meta={"synthetic": True})]
159
+ if not continue_now:
160
+ result.append(build_message("assistant", [text_block(_COMPACT_ACK)], meta={"synthetic": True}))
161
+ result.extend(tail)
162
+ return result
134
163
 
135
164
 
136
165
  def build_rewind_event(rewind_to: int) -> ConversationMessage:
@@ -317,7 +346,10 @@ class SessionStore:
317
346
  # 2) rewind truncates that visible list by message index
318
347
  # Orphan tool_use blocks (e.g. left open by a server crash) are
319
348
  # closed by the provider adapter at replay time, not here.
320
- visible_messages = apply_compact(raw_messages)
349
+ visible_messages = apply_compact(
350
+ raw_messages,
351
+ transcript_path=str(self.messages_path(session_id)),
352
+ )
321
353
  visible_messages = apply_rewind(visible_messages)
322
354
 
323
355
  return {"session": self._summary(session_id, meta), "messages": visible_messages}
File without changes
File without changes
File without changes