hdsp-jupyter-extension 2.0.25__py3-none-any.whl → 2.0.27__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. agent_server/langchain/agent_factory.py +14 -14
  2. agent_server/langchain/agent_prompts/planner_prompt.py +17 -27
  3. agent_server/langchain/custom_middleware.py +83 -17
  4. agent_server/langchain/middleware/code_history_middleware.py +126 -37
  5. agent_server/langchain/middleware/subagent_middleware.py +24 -2
  6. agent_server/langchain/models/gpt_oss_chat.py +26 -13
  7. agent_server/langchain/prompts.py +11 -8
  8. agent_server/langchain/tools/jupyter_tools.py +43 -0
  9. agent_server/routers/langchain_agent.py +235 -23
  10. {hdsp_jupyter_extension-2.0.25.data → hdsp_jupyter_extension-2.0.27.data}/data/share/jupyter/labextensions/hdsp-agent/build_log.json +1 -1
  11. {hdsp_jupyter_extension-2.0.25.data → hdsp_jupyter_extension-2.0.27.data}/data/share/jupyter/labextensions/hdsp-agent/package.json +2 -2
  12. hdsp_jupyter_extension-2.0.25.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.ffc2b4bc8e6cb300e1e1.js → hdsp_jupyter_extension-2.0.27.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.4ab73bb5068405670214.js +2 -2
  13. jupyter_ext/labextension/static/remoteEntry.ffc2b4bc8e6cb300e1e1.js.map → hdsp_jupyter_extension-2.0.27.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.4ab73bb5068405670214.js.map +1 -1
  14. {hdsp_jupyter_extension-2.0.25.dist-info → hdsp_jupyter_extension-2.0.27.dist-info}/METADATA +1 -1
  15. {hdsp_jupyter_extension-2.0.25.dist-info → hdsp_jupyter_extension-2.0.27.dist-info}/RECORD +45 -45
  16. jupyter_ext/_version.py +1 -1
  17. jupyter_ext/labextension/build_log.json +1 -1
  18. jupyter_ext/labextension/package.json +2 -2
  19. jupyter_ext/labextension/static/{remoteEntry.ffc2b4bc8e6cb300e1e1.js → remoteEntry.4ab73bb5068405670214.js} +2 -2
  20. hdsp_jupyter_extension-2.0.25.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.ffc2b4bc8e6cb300e1e1.js.map → jupyter_ext/labextension/static/remoteEntry.4ab73bb5068405670214.js.map +1 -1
  21. {hdsp_jupyter_extension-2.0.25.data → hdsp_jupyter_extension-2.0.27.data}/data/etc/jupyter/jupyter_server_config.d/hdsp_jupyter_extension.json +0 -0
  22. {hdsp_jupyter_extension-2.0.25.data → hdsp_jupyter_extension-2.0.27.data}/data/share/jupyter/labextensions/hdsp-agent/install.json +0 -0
  23. {hdsp_jupyter_extension-2.0.25.data → hdsp_jupyter_extension-2.0.27.data}/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.b5e4416b4e07ec087aad.js +0 -0
  24. {hdsp_jupyter_extension-2.0.25.data → hdsp_jupyter_extension-2.0.27.data}/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.b5e4416b4e07ec087aad.js.map +0 -0
  25. {hdsp_jupyter_extension-2.0.25.data → hdsp_jupyter_extension-2.0.27.data}/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.67505497667f9c0a763d.js +0 -0
  26. {hdsp_jupyter_extension-2.0.25.data → hdsp_jupyter_extension-2.0.27.data}/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.67505497667f9c0a763d.js.map +0 -0
  27. {hdsp_jupyter_extension-2.0.25.data → hdsp_jupyter_extension-2.0.27.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js +0 -0
  28. {hdsp_jupyter_extension-2.0.25.data → hdsp_jupyter_extension-2.0.27.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js.map +0 -0
  29. {hdsp_jupyter_extension-2.0.25.data → hdsp_jupyter_extension-2.0.27.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js +0 -0
  30. {hdsp_jupyter_extension-2.0.25.data → hdsp_jupyter_extension-2.0.27.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js.map +0 -0
  31. {hdsp_jupyter_extension-2.0.25.data → hdsp_jupyter_extension-2.0.27.data}/data/share/jupyter/labextensions/hdsp-agent/static/style.js +0 -0
  32. {hdsp_jupyter_extension-2.0.25.data → hdsp_jupyter_extension-2.0.27.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js +0 -0
  33. {hdsp_jupyter_extension-2.0.25.data → hdsp_jupyter_extension-2.0.27.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js.map +0 -0
  34. {hdsp_jupyter_extension-2.0.25.data → hdsp_jupyter_extension-2.0.27.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js +0 -0
  35. {hdsp_jupyter_extension-2.0.25.data → hdsp_jupyter_extension-2.0.27.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js.map +0 -0
  36. {hdsp_jupyter_extension-2.0.25.data → hdsp_jupyter_extension-2.0.27.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js +0 -0
  37. {hdsp_jupyter_extension-2.0.25.data → hdsp_jupyter_extension-2.0.27.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js.map +0 -0
  38. {hdsp_jupyter_extension-2.0.25.data → hdsp_jupyter_extension-2.0.27.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js +0 -0
  39. {hdsp_jupyter_extension-2.0.25.data → hdsp_jupyter_extension-2.0.27.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js.map +0 -0
  40. {hdsp_jupyter_extension-2.0.25.data → hdsp_jupyter_extension-2.0.27.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js +0 -0
  41. {hdsp_jupyter_extension-2.0.25.data → hdsp_jupyter_extension-2.0.27.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js.map +0 -0
  42. {hdsp_jupyter_extension-2.0.25.data → hdsp_jupyter_extension-2.0.27.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js +0 -0
  43. {hdsp_jupyter_extension-2.0.25.data → hdsp_jupyter_extension-2.0.27.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js.map +0 -0
  44. {hdsp_jupyter_extension-2.0.25.dist-info → hdsp_jupyter_extension-2.0.27.dist-info}/WHEEL +0 -0
  45. {hdsp_jupyter_extension-2.0.25.dist-info → hdsp_jupyter_extension-2.0.27.dist-info}/licenses/LICENSE +0 -0
@@ -315,24 +315,24 @@ def create_main_agent(
315
315
  except Exception as e:
316
316
  logger.warning(f"Failed to add SummarizationMiddleware: {e}")
317
317
 
318
- # Build system prompt - FORCE default prompt for testing
319
- # TODO: Remove this override after frontend localStorage is cleared
320
- # Original priority: system_prompt_override > agent_prompts.planner > default
321
- # DEBUG: Log all prompt sources to find root cause of MALFORMED_FUNCTION_CALL
318
+ # Build system prompt with priority: system_prompt_override > agent_prompts.planner > default
322
319
  logger.info(
323
- "DEBUG Main Agent prompt sources: system_prompt_override=%s, "
324
- "agent_prompts.planner=%s, using=DEFAULT",
320
+ "Main Agent prompt sources: system_prompt_override=%s (len=%d), "
321
+ "agent_prompts.planner=%s",
325
322
  bool(system_prompt_override),
323
+ len(system_prompt_override) if system_prompt_override else 0,
326
324
  bool(agent_prompts.get("planner") if agent_prompts else None),
327
325
  )
328
- if agent_prompts:
329
- logger.info(
330
- "DEBUG: agent_prompts keys=%s, planner prompt length=%d",
331
- list(agent_prompts.keys()),
332
- len(agent_prompts.get("planner", "") or ""),
333
- )
334
- system_prompt = PLANNER_SYSTEM_PROMPT
335
- logger.info("Using PLANNER_SYSTEM_PROMPT (length=%d)", len(system_prompt))
326
+
327
+ if system_prompt_override and system_prompt_override.strip():
328
+ system_prompt = system_prompt_override.strip()
329
+ logger.info("Using system_prompt_override (length=%d)", len(system_prompt))
330
+ elif agent_prompts and agent_prompts.get("planner"):
331
+ system_prompt = agent_prompts["planner"]
332
+ logger.info("Using agent_prompts.planner (length=%d)", len(system_prompt))
333
+ else:
334
+ system_prompt = PLANNER_SYSTEM_PROMPT
335
+ logger.info("Using PLANNER_SYSTEM_PROMPT (length=%d)", len(system_prompt))
336
336
 
337
337
  # Log provider info for debugging
338
338
  provider = llm_config.get("provider", "")
@@ -5,9 +5,10 @@ Main Agent (Supervisor) System Prompt for Multi-Agent Mode
5
5
  PLANNER_SYSTEM_PROMPT = """당신은 작업을 조율하는 Main Agent입니다. 한국어로 응답하세요.
6
6
 
7
7
  # 핵심 원칙
8
- 2. 3단계 이상의 복잡한 작업을 요청받은 경우에만 write_todos 로 작업 목록 관리
9
- 3. **직접 코드, 쿼리 작성 금지** - 모든 코드/쿼리 생성은 task_tool로 서브에이전트에게 위임
8
+ 1. 3단계 이상의 복잡한 작업을 요청받은 경우에만 write_todos 로 작업 목록 관리
9
+ 2. **직접 코드, 쿼리 작성 금지** - 모든 코드/쿼리 생성은 task_tool로 서브에이전트에게 위임
10
10
  3. 서브에이전트가 반환한 코드를 적절한 도구로 실행
11
+ 4. 모든 응답 content는 2~3줄 내외로 핵심만 명확하게 전달
11
12
 
12
13
  # 작업 흐름
13
14
 
@@ -55,33 +56,22 @@ PLANNER_SYSTEM_PROMPT = """당신은 작업을 조율하는 Main Agent입니다.
55
56
  - content에 도구(tool)명 언급 금지
56
57
  - **[필수] 마지막 todo는 반드시 "작업 요약 및 다음 단계 제시"**
57
58
 
58
- # "작업 요약 및 다음 단계 제시" todo 작업 순서 [필수]
59
- 1. "작업 요약 및 다음 단계 제시"를 **in_progress**로 변경 (write_todos 호출)
60
- 2. **같은 응답에서** 아래 JSON을 텍스트로 출력:
61
- {
62
- "summary": "완료된 작업 요약",
63
- "next_items": [
64
- {
65
- "subject": "제목",
66
- "description": "설명"
67
- }
68
- ]
69
- }
70
- 3. JSON 출력과 함께 "작업 요약 및 다음 단계 제시"를 **completed**로 변경
71
- **중요**: JSON은 반드시 in_progress 상태일 때 출력! completed 먼저 표시 금지!
72
- - next_items 3개 이상 필수
73
- - **summary JSON 없이 종료 금지**
74
- - **주의**: JSON은 todo 항목이 아닌 일반 텍스트 응답으로 출력
75
-
76
-
77
- # 도구 사용시 주의할 점
59
+ # "작업 요약 및 다음 단계 제시" todo 완료 [필수]
60
+ 1. "작업 요약 및 다음 단계 제시"를 **in_progress**로 변경 (write_todos 호출)
61
+ 2. **반드시 final_summary_tool 호출**:
62
+ final_summary_tool(
63
+ summary="완료된 작업 요약",
64
+ next_items=[{"subject": "제목", "description": "설명"}, ...]
65
+ )
66
+
67
+ 3. final_summary_tool 호출 후 "작업 요약 및 다음 단계 제시" **completed**로 변경
78
68
 
79
- ## 서브에이전트 호출 (코드/쿼리 생성 시 필수)
80
- - task_tool: 서브에이전트에게 작업 위임
69
+ - next_items 3개 이상 필수
70
+ - **final_summary_tool 호출 없이 종료 금지**
81
71
 
82
- ## 탐색 (⚠️ 파일 위치 모를 때: search_files_tool → list_workspace_tool → 재검색 → ask_user_tool 순서로!)
83
- - list_workspace_tool: 파일/디렉토리 목록
84
- - search_files_tool: 파일 내용 검색 (regex 지원, 예: "titanic|error|*.csv")
72
+ # 도구 사용시 주의할
73
+ ## 파일 위치 모를 때 탐색 순서: search_files_tool → list_workspace_tool 재검색 → ask_user_tool 순서로!)
74
+ ## list_workspace_tool로 전체 디렉토리 파일 목록 검색 금지! 최대한 pattern 으로 drill down 해서 검색할 것
85
75
 
86
76
  # 금지 사항
87
77
  - 직접 코드/SQL 작성 (반드시 task_tool 사용)
@@ -165,6 +165,22 @@ def try_extract_tool_calls_from_additional_kwargs(
165
165
  if not raw_tool_calls:
166
166
  return None
167
167
 
168
+ # IMPORTANT: Only use the first tool_call to prevent parallel execution issues
169
+ # LLM sometimes generates multiple tool_calls despite prompt instructions
170
+ if len(raw_tool_calls) > 1:
171
+ first_tc = raw_tool_calls[0]
172
+ first_name = first_tc.get("function", {}).get("name", "unknown")
173
+ ignored_names = [
174
+ tc.get("function", {}).get("name", "unknown") for tc in raw_tool_calls[1:]
175
+ ]
176
+ logger.warning(
177
+ "Multiple tool_calls in additional_kwargs (%d), using only first one: %s. Ignored: %s",
178
+ len(raw_tool_calls),
179
+ first_name,
180
+ ignored_names,
181
+ )
182
+ raw_tool_calls = raw_tool_calls[:1]
183
+
168
184
  repaired_tool_calls = []
169
185
  for tc in raw_tool_calls:
170
186
  func = tc.get("function", {})
@@ -316,8 +332,7 @@ def create_handle_empty_response_middleware(wrap_model_call):
316
332
  summary_todo_completed = all_todos_completed and last_todo_is_summary
317
333
 
318
334
  if not summary_todo_completed and any(
319
- t.get("status") == "completed"
320
- and "작업 요약" in t.get("content", "")
335
+ t.get("status") == "completed" and "작업 요약" in t.get("content", "")
321
336
  for t in todos
322
337
  ):
323
338
  logger.debug(
@@ -439,15 +454,16 @@ def create_handle_empty_response_middleware(wrap_model_call):
439
454
  content = " ".join(str(p) for p in content)
440
455
 
441
456
  # Check if content contains summary JSON pattern
442
- has_summary_pattern = ('"summary"' in content or "'summary'" in content) and (
443
- '"next_items"' in content or "'next_items'" in content
444
- )
457
+ has_summary_pattern = (
458
+ '"summary"' in content or "'summary'" in content
459
+ ) and ('"next_items"' in content or "'next_items'" in content)
445
460
 
446
461
  if has_summary_pattern:
447
462
  # Check if pending todos exist - if so, don't force complete
448
463
  current_todos = request.state.get("todos", [])
449
464
  pending_todos = [
450
- t for t in current_todos
465
+ t
466
+ for t in current_todos
451
467
  if isinstance(t, dict) and t.get("status") == "pending"
452
468
  ]
453
469
  if pending_todos:
@@ -463,7 +479,12 @@ def create_handle_empty_response_middleware(wrap_model_call):
463
479
  try:
464
480
  # Try to find JSON object containing summary
465
481
  import re
466
- json_match = re.search(r'\{[^{}]*"summary"[^{}]*"next_items"[^{}]*\}', content, re.DOTALL)
482
+
483
+ json_match = re.search(
484
+ r'\{[^{}]*"summary"[^{}]*"next_items"[^{}]*\}',
485
+ content,
486
+ re.DOTALL,
487
+ )
467
488
  if json_match:
468
489
  repaired_summary = repair_json(
469
490
  json_match.group(), return_objects=True
@@ -503,7 +524,9 @@ def create_handle_empty_response_middleware(wrap_model_call):
503
524
  )
504
525
  return response
505
526
  except Exception as e:
506
- logger.debug(f"Failed to extract summary JSON from mixed content: {e}")
527
+ logger.debug(
528
+ f"Failed to extract summary JSON from mixed content: {e}"
529
+ )
507
530
 
508
531
  # Fallback: accept as-is if repair failed but looks like summary
509
532
  logger.info(
@@ -543,7 +566,9 @@ def create_handle_empty_response_middleware(wrap_model_call):
543
566
  is_summary_todo = any(
544
567
  kw in current_todo.get("content", "") for kw in summary_keywords
545
568
  )
546
- if is_summary_todo and ('"summary"' in content or "'summary'" in content):
569
+ if is_summary_todo and (
570
+ '"summary"' in content or "'summary'" in content
571
+ ):
547
572
  # This is a summary todo with summary content - accept it
548
573
  logger.info(
549
574
  "Summary todo with summary content detected - accepting"
@@ -785,7 +810,7 @@ def _create_synthetic_completion(request, response_message, has_content):
785
810
  logger.warning(
786
811
  "Force-completing %d pending todos that were never started: %s",
787
812
  pending_count,
788
- [t.get("content") for t in todos if t.get("status") == "pending"]
813
+ [t.get("content") for t in todos if t.get("status") == "pending"],
789
814
  )
790
815
 
791
816
  # Mark all todos as completed
@@ -858,6 +883,31 @@ def create_limit_tool_calls_middleware(wrap_model_call):
858
883
  )
859
884
  msg.tool_calls = [tool_calls[0]]
860
885
 
886
+ # Remove additional_kwargs["tool_calls"] entirely when
887
+ # msg.tool_calls exists. ChatOpenAI duplicates tool_calls
888
+ # into additional_kwargs, and leftover entries pollute the
889
+ # conversation context - LLM sees them and assumes all
890
+ # listed tool calls were executed.
891
+ additional_kwargs = getattr(msg, "additional_kwargs", {})
892
+ if msg.tool_calls and additional_kwargs.get("tool_calls"):
893
+ removed_count = len(additional_kwargs["tool_calls"])
894
+ del additional_kwargs["tool_calls"]
895
+ logger.info(
896
+ "Removed %d tool_calls from additional_kwargs "
897
+ "(canonical source: msg.tool_calls)",
898
+ removed_count,
899
+ )
900
+
901
+ # Clear content when tool_calls exist to avoid duplicate information
902
+ # Some models return both content and tool_calls, causing redundant
903
+ # "thinking" text in the conversation history
904
+ if msg.tool_calls and msg.content:
905
+ logger.info(
906
+ "Clearing AIMessage content (len=%d) because tool_calls exist",
907
+ len(msg.content),
908
+ )
909
+ msg.content = ""
910
+
861
911
  return response
862
912
 
863
913
  return limit_tool_calls_to_one
@@ -1026,29 +1076,45 @@ def create_normalize_tool_args_middleware(wrap_model_call, tools=None):
1026
1076
 
1027
1077
  # Validate: "작업 요약 및 다음 단계 제시" cannot be in_progress if pending todos exist
1028
1078
  # This prevents LLM from skipping pending tasks
1029
- summary_keywords = ["작업 요약", "다음 단계 제시"]
1079
+ summary_keywords = [
1080
+ "작업 요약",
1081
+ "다음 단계 제시",
1082
+ ]
1030
1083
  for i, todo in enumerate(todos):
1031
1084
  if not isinstance(todo, dict):
1032
1085
  continue
1033
1086
  content = todo.get("content", "")
1034
- is_summary_todo = any(kw in content for kw in summary_keywords)
1087
+ is_summary_todo = any(
1088
+ kw in content for kw in summary_keywords
1089
+ )
1035
1090
 
1036
- if is_summary_todo and todo.get("status") == "in_progress":
1091
+ if (
1092
+ is_summary_todo
1093
+ and todo.get("status") == "in_progress"
1094
+ ):
1037
1095
  # Check if there are pending todos before this one
1038
1096
  pending_before = [
1039
- t for t in todos[:i]
1040
- if isinstance(t, dict) and t.get("status") == "pending"
1097
+ t
1098
+ for t in todos[:i]
1099
+ if isinstance(t, dict)
1100
+ and t.get("status") == "pending"
1041
1101
  ]
1042
1102
  if pending_before:
1043
1103
  # Revert summary todo to pending
1044
1104
  todo["status"] = "pending"
1045
1105
  # Set the first pending todo to in_progress
1046
1106
  for t in todos:
1047
- if isinstance(t, dict) and t.get("status") == "pending":
1107
+ if (
1108
+ isinstance(t, dict)
1109
+ and t.get("status")
1110
+ == "pending"
1111
+ ):
1048
1112
  t["status"] = "in_progress"
1049
1113
  logger.warning(
1050
1114
  "Reverted summary todo to pending, set '%s' to in_progress (pending todos exist)",
1051
- t.get("content", "")[:30],
1115
+ t.get("content", "")[
1116
+ :30
1117
+ ],
1052
1118
  )
1053
1119
  break
1054
1120
  break
@@ -12,11 +12,12 @@ Features:
12
12
 
13
13
  import logging
14
14
  import threading
15
- import tiktoken
16
15
  from dataclasses import dataclass, field
17
16
  from datetime import datetime
18
17
  from typing import Any, Dict, List, Optional
19
18
 
19
+ import tiktoken
20
+
20
21
  logger = logging.getLogger(__name__)
21
22
 
22
23
  # Token limit for context (including system prompt)
@@ -31,7 +32,9 @@ PYTHON_DEV_SYSTEM_PROMPT_TOKENS = 2000
31
32
  class CodeHistoryEntry:
32
33
  """Represents a single code execution or file operation."""
33
34
 
34
- tool_name: str # jupyter_cell_tool, write_file_tool, edit_file_tool, multiedit_file_tool
35
+ tool_name: (
36
+ str # jupyter_cell_tool, write_file_tool, edit_file_tool, multiedit_file_tool
37
+ )
35
38
  timestamp: datetime = field(default_factory=datetime.now)
36
39
 
37
40
  # For jupyter_cell_tool
@@ -50,7 +53,9 @@ class CodeHistoryEntry:
50
53
  timestamp_str = self.timestamp.strftime("%H:%M:%S")
51
54
 
52
55
  if self.tool_name == "jupyter_cell_tool":
53
- output_preview = self._truncate(self.output, 500) if self.output else "(no output)"
56
+ output_preview = (
57
+ self._truncate(self.output, 500) if self.output else "(no output)"
58
+ )
54
59
  return f"""## Cell ({timestamp_str})
55
60
  ```python
56
61
  {self.code}
@@ -86,7 +91,7 @@ Changes: {edit_count} edits applied"""
86
91
  if self.tool_name == "jupyter_cell_tool":
87
92
  # Extract first meaningful line of code
88
93
  if self.code:
89
- first_line = self.code.strip().split('\n')[0][:60]
94
+ first_line = self.code.strip().split("\n")[0][:60]
90
95
  return f"- Cell: {first_line}..."
91
96
  return "- Cell: (empty)"
92
97
 
@@ -156,7 +161,9 @@ class CodeHistoryTracker:
156
161
  output=output,
157
162
  )
158
163
  self._history.append(entry)
159
- logger.info(f"CodeHistory: Added jupyter_cell (total: {len(self._history)})")
164
+ logger.info(
165
+ f"CodeHistory: Added jupyter_cell (total: {len(self._history)})"
166
+ )
160
167
 
161
168
  def add_write_file(self, file_path: str, content: str) -> None:
162
169
  """Track a write_file_tool execution."""
@@ -167,7 +174,9 @@ class CodeHistoryTracker:
167
174
  content=content,
168
175
  )
169
176
  self._history.append(entry)
170
- logger.info(f"CodeHistory: Added write_file {file_path} (total: {len(self._history)})")
177
+ logger.info(
178
+ f"CodeHistory: Added write_file {file_path} (total: {len(self._history)})"
179
+ )
171
180
 
172
181
  def add_edit_file(self, file_path: str, old_content: str, new_content: str) -> None:
173
182
  """Track an edit_file_tool execution."""
@@ -179,7 +188,9 @@ class CodeHistoryTracker:
179
188
  new_content=new_content,
180
189
  )
181
190
  self._history.append(entry)
182
- logger.info(f"CodeHistory: Added edit_file {file_path} (total: {len(self._history)})")
191
+ logger.info(
192
+ f"CodeHistory: Added edit_file {file_path} (total: {len(self._history)})"
193
+ )
183
194
 
184
195
  def add_multiedit_file(self, file_path: str, edits: List[Dict[str, str]]) -> None:
185
196
  """Track a multiedit_file_tool execution."""
@@ -190,7 +201,9 @@ class CodeHistoryTracker:
190
201
  edits=edits,
191
202
  )
192
203
  self._history.append(entry)
193
- logger.info(f"CodeHistory: Added multiedit_file {file_path} (total: {len(self._history)})")
204
+ logger.info(
205
+ f"CodeHistory: Added multiedit_file {file_path} (total: {len(self._history)})"
206
+ )
194
207
 
195
208
  def get_context_for_subagent(
196
209
  self,
@@ -216,8 +229,12 @@ class CodeHistoryTracker:
216
229
  return existing_context or ""
217
230
 
218
231
  # Calculate available tokens for history
219
- existing_tokens = self._count_tokens(existing_context) if existing_context else 0
220
- available_tokens = max_tokens - system_prompt_tokens - existing_tokens - 500 # 500 buffer
232
+ existing_tokens = (
233
+ self._count_tokens(existing_context) if existing_context else 0
234
+ )
235
+ available_tokens = (
236
+ max_tokens - system_prompt_tokens - existing_tokens - 500
237
+ ) # 500 buffer
221
238
 
222
239
  # Build full history string
223
240
  full_history = self._build_full_history()
@@ -311,49 +328,110 @@ class CodeHistoryTracker:
311
328
  return len(self._history)
312
329
 
313
330
 
314
- # Global tracker instance (per-thread tracking could be added if needed)
315
- _code_history_tracker: Optional[CodeHistoryTracker] = None
331
+ # Global tracker instances per threadId
332
+ _code_history_trackers: Dict[str, CodeHistoryTracker] = {}
333
+ _trackers_lock = threading.Lock()
334
+
335
+
336
+ def get_code_history_tracker(thread_id: Optional[str] = None) -> CodeHistoryTracker:
337
+ """
338
+ Get the CodeHistoryTracker instance for the given thread_id.
339
+
340
+ Args:
341
+ thread_id: Thread ID for session isolation. If None, returns a temporary tracker.
316
342
 
343
+ Returns:
344
+ CodeHistoryTracker instance for the thread
345
+ """
346
+ if thread_id is None:
347
+ logger.warning(
348
+ "get_code_history_tracker called without thread_id - using temporary tracker"
349
+ )
350
+ return CodeHistoryTracker()
317
351
 
318
- def get_code_history_tracker() -> CodeHistoryTracker:
319
- """Get the global CodeHistoryTracker instance."""
320
- global _code_history_tracker
321
- if _code_history_tracker is None:
322
- _code_history_tracker = CodeHistoryTracker()
323
- return _code_history_tracker
352
+ with _trackers_lock:
353
+ if thread_id not in _code_history_trackers:
354
+ _code_history_trackers[thread_id] = CodeHistoryTracker()
355
+ logger.info(f"CodeHistory: Created new tracker for thread_id={thread_id}")
356
+ return _code_history_trackers[thread_id]
324
357
 
325
358
 
326
- def track_jupyter_cell(code: str, output: str) -> None:
359
+ def track_jupyter_cell(code: str, output: str, thread_id: Optional[str] = None) -> None:
327
360
  """Convenience function to track jupyter_cell_tool execution."""
328
- get_code_history_tracker().add_jupyter_cell(code, output)
361
+ get_code_history_tracker(thread_id).add_jupyter_cell(code, output)
329
362
 
330
363
 
331
- def track_write_file(file_path: str, content: str) -> None:
364
+ def track_write_file(
365
+ file_path: str, content: str, thread_id: Optional[str] = None
366
+ ) -> None:
332
367
  """Convenience function to track write_file_tool execution."""
333
- get_code_history_tracker().add_write_file(file_path, content)
368
+ get_code_history_tracker(thread_id).add_write_file(file_path, content)
334
369
 
335
370
 
336
- def track_edit_file(file_path: str, old_content: str, new_content: str) -> None:
371
+ def track_edit_file(
372
+ file_path: str,
373
+ old_content: str,
374
+ new_content: str,
375
+ thread_id: Optional[str] = None,
376
+ ) -> None:
337
377
  """Convenience function to track edit_file_tool execution."""
338
- get_code_history_tracker().add_edit_file(file_path, old_content, new_content)
378
+ get_code_history_tracker(thread_id).add_edit_file(
379
+ file_path, old_content, new_content
380
+ )
339
381
 
340
382
 
341
- def track_multiedit_file(file_path: str, edits: List[Dict[str, str]]) -> None:
383
+ def track_multiedit_file(
384
+ file_path: str,
385
+ edits: List[Dict[str, str]],
386
+ thread_id: Optional[str] = None,
387
+ ) -> None:
342
388
  """Convenience function to track multiedit_file_tool execution."""
343
- get_code_history_tracker().add_multiedit_file(file_path, edits)
389
+ get_code_history_tracker(thread_id).add_multiedit_file(file_path, edits)
344
390
 
345
391
 
346
- def get_context_with_history(existing_context: Optional[str] = None) -> str:
392
+ def get_context_with_history(
393
+ existing_context: Optional[str] = None,
394
+ thread_id: Optional[str] = None,
395
+ ) -> str:
347
396
  """Get context string with code history injected."""
348
- return get_code_history_tracker().get_context_for_subagent(existing_context)
397
+ return get_code_history_tracker(thread_id).get_context_for_subagent(
398
+ existing_context
399
+ )
400
+
349
401
 
402
+ def clear_code_history(thread_id: Optional[str] = None) -> None:
403
+ """
404
+ Clear code history for a specific thread or all threads.
350
405
 
351
- def clear_code_history() -> None:
352
- """Clear all code history."""
353
- get_code_history_tracker().clear()
406
+ Args:
407
+ thread_id: Thread ID to clear. If None, clears all threads.
408
+ """
409
+ if thread_id is None:
410
+ # Clear all trackers
411
+ with _trackers_lock:
412
+ for tid, tracker in _code_history_trackers.items():
413
+ tracker.clear()
414
+ logger.info(f"CodeHistory: Cleared history for thread_id={tid}")
415
+ _code_history_trackers.clear()
416
+ logger.info("CodeHistory: Cleared all thread trackers")
417
+ else:
418
+ # Clear specific thread
419
+ with _trackers_lock:
420
+ if thread_id in _code_history_trackers:
421
+ _code_history_trackers[thread_id].clear()
422
+ del _code_history_trackers[thread_id]
423
+ logger.info(
424
+ f"CodeHistory: Cleared and removed tracker for thread_id={thread_id}"
425
+ )
426
+ else:
427
+ logger.info(f"CodeHistory: No tracker found for thread_id={thread_id}")
354
428
 
355
429
 
356
- def track_tool_execution(tool_name: str, args: Dict[str, Any]) -> None:
430
+ def track_tool_execution(
431
+ tool_name: str,
432
+ args: Dict[str, Any],
433
+ thread_id: Optional[str] = None,
434
+ ) -> None:
357
435
  """
358
436
  Track a tool execution from HITL decision processing.
359
437
 
@@ -363,6 +441,7 @@ def track_tool_execution(tool_name: str, args: Dict[str, Any]) -> None:
363
441
  Args:
364
442
  tool_name: Name of the tool (jupyter_cell_tool, write_file_tool, etc.)
365
443
  args: Tool arguments including execution_result
444
+ thread_id: Thread ID for session isolation
366
445
  """
367
446
  if not args:
368
447
  return
@@ -371,21 +450,26 @@ def track_tool_execution(tool_name: str, args: Dict[str, Any]) -> None:
371
450
  if not execution_result:
372
451
  return
373
452
 
374
- tracker = get_code_history_tracker()
453
+ tracker = get_code_history_tracker(thread_id)
375
454
 
376
455
  if tool_name == "jupyter_cell_tool":
377
456
  code = args.get("code", "")
378
457
  output = execution_result.get("output", "")
379
458
  if code:
380
459
  tracker.add_jupyter_cell(code, output)
381
- logger.info(f"CodeHistory: Tracked jupyter_cell execution (code len={len(code)})")
460
+ logger.info(
461
+ f"CodeHistory: Tracked jupyter_cell execution "
462
+ f"(code len={len(code)}, thread_id={thread_id})"
463
+ )
382
464
 
383
465
  elif tool_name == "write_file_tool":
384
466
  file_path = args.get("path", "")
385
467
  content = args.get("content", "")
386
468
  if file_path:
387
469
  tracker.add_write_file(file_path, content)
388
- logger.info(f"CodeHistory: Tracked write_file to {file_path}")
470
+ logger.info(
471
+ f"CodeHistory: Tracked write_file to {file_path} (thread_id={thread_id})"
472
+ )
389
473
 
390
474
  elif tool_name == "edit_file_tool":
391
475
  file_path = args.get("path", "")
@@ -393,7 +477,9 @@ def track_tool_execution(tool_name: str, args: Dict[str, Any]) -> None:
393
477
  new_string = args.get("new_string", "")
394
478
  if file_path:
395
479
  tracker.add_edit_file(file_path, old_string, new_string)
396
- logger.info(f"CodeHistory: Tracked edit_file to {file_path}")
480
+ logger.info(
481
+ f"CodeHistory: Tracked edit_file to {file_path} (thread_id={thread_id})"
482
+ )
397
483
 
398
484
  elif tool_name == "multiedit_file_tool":
399
485
  file_path = args.get("path", "")
@@ -409,4 +495,7 @@ def track_tool_execution(tool_name: str, args: Dict[str, Any]) -> None:
409
495
  elif isinstance(edit, dict):
410
496
  edits_as_dicts.append(edit)
411
497
  tracker.add_multiedit_file(file_path, edits_as_dicts)
412
- logger.info(f"CodeHistory: Tracked multiedit_file to {file_path} ({len(edits)} edits)")
498
+ logger.info(
499
+ f"CodeHistory: Tracked multiedit_file to {file_path} "
500
+ f"({len(edits)} edits, thread_id={thread_id})"
501
+ )
@@ -12,6 +12,7 @@ Key features:
12
12
  - Subagent caching: compiled agents are cached to avoid recompilation overhead
13
13
  """
14
14
 
15
+ import contextvars
15
16
  import hashlib
16
17
  import json
17
18
  import logging
@@ -25,6 +26,11 @@ if TYPE_CHECKING:
25
26
 
26
27
  logger = logging.getLogger(__name__)
27
28
 
29
+ # Context variable to track the current main agent's thread_id
30
+ _current_thread_id: contextvars.ContextVar[Optional[str]] = contextvars.ContextVar(
31
+ "current_thread_id", default=None
32
+ )
33
+
28
34
  # Global registry for subagent factories (set by AgentFactory)
29
35
  _subagent_factory = None
30
36
  _current_llm_config = None
@@ -92,6 +98,17 @@ def clear_subagent_cache():
92
98
  logger.info(f"Subagent cache cleared ({count} entries removed)")
93
99
 
94
100
 
101
+ def set_current_thread_id(thread_id: str) -> None:
102
+ """Set the current main agent's thread_id for code history tracking."""
103
+ _current_thread_id.set(thread_id)
104
+ logger.debug(f"Set current thread_id: {thread_id}")
105
+
106
+
107
+ def get_current_thread_id() -> Optional[str]:
108
+ """Get the current main agent's thread_id."""
109
+ return _current_thread_id.get()
110
+
111
+
95
112
  def create_task_tool(
96
113
  caller_name: str,
97
114
  allowed_subagents: Optional[List[str]] = None,
@@ -232,13 +249,18 @@ def create_task_tool(
232
249
  get_context_with_history,
233
250
  )
234
251
 
235
- tracker = get_code_history_tracker()
252
+ # Get main agent's thread_id for session-scoped history
253
+ main_thread_id = get_current_thread_id()
254
+ tracker = get_code_history_tracker(main_thread_id)
236
255
  if tracker.get_entry_count() > 0:
237
- enhanced_context = get_context_with_history(context)
256
+ enhanced_context = get_context_with_history(
257
+ context, main_thread_id
258
+ )
238
259
  t3 = time.time()
239
260
  logger.info(
240
261
  f"[TIMING] code history injection took {t3-t2:.2f}s "
241
262
  f"(entries={tracker.get_entry_count()}, "
263
+ f"thread_id={main_thread_id}, "
242
264
  f"context_len={len(enhanced_context) if enhanced_context else 0})"
243
265
  )
244
266
  except Exception as e: