hdsp-jupyter-extension 2.0.10__py3-none-any.whl → 2.0.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_server/core/notebook_generator.py +4 -4
- agent_server/langchain/custom_middleware.py +95 -9
- agent_server/langchain/hitl_config.py +5 -0
- agent_server/langchain/llm_factory.py +1 -85
- agent_server/langchain/prompts.py +105 -128
- agent_server/prompts/file_action_prompts.py +8 -8
- agent_server/routers/langchain_agent.py +78 -12
- hdsp_agent_core/__init__.py +46 -47
- hdsp_agent_core/factory.py +6 -10
- hdsp_agent_core/interfaces.py +4 -2
- hdsp_agent_core/knowledge/__init__.py +5 -5
- hdsp_agent_core/knowledge/chunking.py +87 -61
- hdsp_agent_core/knowledge/loader.py +103 -101
- hdsp_agent_core/llm/service.py +192 -107
- hdsp_agent_core/managers/config_manager.py +16 -22
- hdsp_agent_core/managers/session_manager.py +5 -4
- hdsp_agent_core/models/__init__.py +12 -12
- hdsp_agent_core/models/agent.py +15 -8
- hdsp_agent_core/models/common.py +1 -2
- hdsp_agent_core/models/rag.py +48 -111
- hdsp_agent_core/prompts/__init__.py +12 -12
- hdsp_agent_core/prompts/cell_action_prompts.py +9 -7
- hdsp_agent_core/services/agent_service.py +10 -8
- hdsp_agent_core/services/chat_service.py +10 -6
- hdsp_agent_core/services/rag_service.py +3 -6
- hdsp_agent_core/tests/conftest.py +4 -1
- hdsp_agent_core/tests/test_factory.py +2 -2
- hdsp_agent_core/tests/test_services.py +12 -19
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/build_log.json +1 -1
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/package.json +2 -2
- hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.dc6434bee96ab03a0539.js → hdsp_jupyter_extension-2.0.11.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.58c1e128ba0b76f41f04.js +81 -77
- hdsp_jupyter_extension-2.0.11.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.58c1e128ba0b76f41f04.js.map +1 -0
- hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.4a252df3ade74efee8d6.js → hdsp_jupyter_extension-2.0.11.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.9da31d1134a53b0c4af5.js +3 -3
- jupyter_ext/labextension/static/remoteEntry.4a252df3ade74efee8d6.js.map → hdsp_jupyter_extension-2.0.11.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.9da31d1134a53b0c4af5.js.map +1 -1
- {hdsp_jupyter_extension-2.0.10.dist-info → hdsp_jupyter_extension-2.0.11.dist-info}/METADATA +1 -1
- {hdsp_jupyter_extension-2.0.10.dist-info → hdsp_jupyter_extension-2.0.11.dist-info}/RECORD +68 -68
- jupyter_ext/__init__.py +21 -11
- jupyter_ext/_version.py +1 -1
- jupyter_ext/handlers.py +69 -50
- jupyter_ext/labextension/build_log.json +1 -1
- jupyter_ext/labextension/package.json +2 -2
- jupyter_ext/labextension/static/{lib_index_js.dc6434bee96ab03a0539.js → lib_index_js.58c1e128ba0b76f41f04.js} +81 -77
- jupyter_ext/labextension/static/lib_index_js.58c1e128ba0b76f41f04.js.map +1 -0
- jupyter_ext/labextension/static/{remoteEntry.4a252df3ade74efee8d6.js → remoteEntry.9da31d1134a53b0c4af5.js} +3 -3
- hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/remoteEntry.4a252df3ade74efee8d6.js.map → jupyter_ext/labextension/static/remoteEntry.9da31d1134a53b0c4af5.js.map +1 -1
- hdsp_jupyter_extension-2.0.10.data/data/share/jupyter/labextensions/hdsp-agent/static/lib_index_js.dc6434bee96ab03a0539.js.map +0 -1
- jupyter_ext/labextension/static/lib_index_js.dc6434bee96ab03a0539.js.map +0 -1
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.11.data}/data/etc/jupyter/jupyter_server_config.d/hdsp_jupyter_extension.json +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/install.json +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.2d9fb488c82498c45c2d.js +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/frontend_styles_index_js.2d9fb488c82498c45c2d.js.map +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b80.c095373419d05e6f141a.js.map +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/node_modules_emotion_use-insertion-effect-with-fallbacks_dist_emotion-use-insertion-effect-wi-3ba6b81.61e75fb98ecff46cf836.js.map +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/style.js +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_babel_runtime_helpers_esm_extends_js-node_modules_emotion_serialize_dist-051195.e2553aab0c3963b83dd7.js.map +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_cache_dist_emotion-cache_browser_development_esm_js.24edcc52a1c014a8a5f0.js.map +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_react_dist_emotion-react_browser_development_esm_js.19ecf6babe00caff6b8a.js.map +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_emotion_styled_dist_emotion-styled_browser_development_esm_js.661fb5836f4978a7c6e1.js.map +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_index_js.985697e0162d8d088ca2.js.map +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js +0 -0
- {hdsp_jupyter_extension-2.0.10.data → hdsp_jupyter_extension-2.0.11.data}/data/share/jupyter/labextensions/hdsp-agent/static/vendors-node_modules_mui_material_utils_createSvgIcon_js.1f5038488cdfd8b3a85d.js.map +0 -0
- {hdsp_jupyter_extension-2.0.10.dist-info → hdsp_jupyter_extension-2.0.11.dist-info}/WHEEL +0 -0
- {hdsp_jupyter_extension-2.0.10.dist-info → hdsp_jupyter_extension-2.0.11.dist-info}/licenses/LICENSE +0 -0
|
@@ -909,6 +909,26 @@ async def stream_agent(request: AgentRequest):
|
|
|
909
909
|
"event": "todos",
|
|
910
910
|
"data": json.dumps({"todos": todos}),
|
|
911
911
|
}
|
|
912
|
+
# Check if all todos are completed - terminate early
|
|
913
|
+
all_completed = all(
|
|
914
|
+
t.get("status") == "completed" for t in todos
|
|
915
|
+
)
|
|
916
|
+
if all_completed and len(todos) > 0:
|
|
917
|
+
logger.info(
|
|
918
|
+
"All %d todos completed in AIMessage tool_calls, auto-terminating",
|
|
919
|
+
len(todos),
|
|
920
|
+
)
|
|
921
|
+
yield {
|
|
922
|
+
"event": "debug_clear",
|
|
923
|
+
"data": json.dumps({}),
|
|
924
|
+
}
|
|
925
|
+
yield {
|
|
926
|
+
"event": "done",
|
|
927
|
+
"data": json.dumps(
|
|
928
|
+
{"reason": "all_todos_completed"}
|
|
929
|
+
),
|
|
930
|
+
}
|
|
931
|
+
return # Exit before executing more tool calls
|
|
912
932
|
for tool_call in tool_calls:
|
|
913
933
|
tool_name = tool_call.get("name", "unknown")
|
|
914
934
|
tool_args = tool_call.get("args", {})
|
|
@@ -1032,16 +1052,31 @@ async def stream_agent(request: AgentRequest):
|
|
|
1032
1052
|
content = "\n".join(text_parts)
|
|
1033
1053
|
|
|
1034
1054
|
# Filter out raw JSON tool responses
|
|
1055
|
+
content_stripped = content.strip() if content else ""
|
|
1056
|
+
|
|
1057
|
+
# Filter out tool call JSON (but allow summary/next_items JSON for frontend rendering)
|
|
1058
|
+
is_json_tool_response = (
|
|
1059
|
+
content_stripped.startswith('{"tool":')
|
|
1060
|
+
or content_stripped.startswith('{ "tool":')
|
|
1061
|
+
or content_stripped.startswith('{"tool" :')
|
|
1062
|
+
or content_stripped.startswith('{"status":')
|
|
1063
|
+
or '"pending_execution"' in content
|
|
1064
|
+
or '"status": "complete"' in content
|
|
1065
|
+
or (
|
|
1066
|
+
'"tool"' in content
|
|
1067
|
+
and '"write_todos"' in content
|
|
1068
|
+
)
|
|
1069
|
+
or (
|
|
1070
|
+
'"tool"' in content
|
|
1071
|
+
and '"arguments"' in content
|
|
1072
|
+
and content_stripped.startswith("{")
|
|
1073
|
+
)
|
|
1074
|
+
)
|
|
1035
1075
|
if (
|
|
1036
1076
|
content
|
|
1037
1077
|
and isinstance(content, str)
|
|
1038
1078
|
and not has_final_answer_tool
|
|
1039
|
-
and not
|
|
1040
|
-
content.strip().startswith('{"tool":')
|
|
1041
|
-
or content.strip().startswith('{"status":')
|
|
1042
|
-
or '"pending_execution"' in content
|
|
1043
|
-
or '"status": "complete"' in content
|
|
1044
|
-
)
|
|
1079
|
+
and not is_json_tool_response
|
|
1045
1080
|
):
|
|
1046
1081
|
# Check if we've already emitted this content (prevents duplicates)
|
|
1047
1082
|
content_hash = hash(content)
|
|
@@ -1814,16 +1849,27 @@ async def resume_agent(request: ResumeRequest):
|
|
|
1814
1849
|
content = "\n".join(text_parts)
|
|
1815
1850
|
|
|
1816
1851
|
# Filter out raw JSON tool responses
|
|
1852
|
+
content_stripped = content.strip() if content else ""
|
|
1853
|
+
# Filter out tool call JSON (but allow summary/next_items JSON for frontend rendering)
|
|
1854
|
+
is_json_tool_response = (
|
|
1855
|
+
content_stripped.startswith('{"tool":')
|
|
1856
|
+
or content_stripped.startswith('{ "tool":')
|
|
1857
|
+
or content_stripped.startswith('{"tool" :')
|
|
1858
|
+
or content_stripped.startswith('{"status":')
|
|
1859
|
+
or '"pending_execution"' in content
|
|
1860
|
+
or '"status": "complete"' in content
|
|
1861
|
+
or ('"tool"' in content and '"write_todos"' in content)
|
|
1862
|
+
or (
|
|
1863
|
+
'"tool"' in content
|
|
1864
|
+
and '"arguments"' in content
|
|
1865
|
+
and content_stripped.startswith("{")
|
|
1866
|
+
)
|
|
1867
|
+
)
|
|
1817
1868
|
if (
|
|
1818
1869
|
content
|
|
1819
1870
|
and isinstance(content, str)
|
|
1820
1871
|
and not has_final_answer_tool
|
|
1821
|
-
and not
|
|
1822
|
-
content.strip().startswith('{"tool":')
|
|
1823
|
-
or content.strip().startswith('{"status":')
|
|
1824
|
-
or '"pending_execution"' in content
|
|
1825
|
-
or '"status": "complete"' in content
|
|
1826
|
-
)
|
|
1872
|
+
and not is_json_tool_response
|
|
1827
1873
|
):
|
|
1828
1874
|
# Check if we've already emitted this content (prevents duplicates)
|
|
1829
1875
|
content_hash = hash(content)
|
|
@@ -1874,6 +1920,26 @@ async def resume_agent(request: ResumeRequest):
|
|
|
1874
1920
|
"event": "todos",
|
|
1875
1921
|
"data": json.dumps({"todos": todos}),
|
|
1876
1922
|
}
|
|
1923
|
+
# Check if all todos are completed - terminate early
|
|
1924
|
+
all_completed = all(
|
|
1925
|
+
t.get("status") == "completed" for t in todos
|
|
1926
|
+
)
|
|
1927
|
+
if all_completed and len(todos) > 0:
|
|
1928
|
+
logger.info(
|
|
1929
|
+
"Resume: All %d todos completed in AIMessage tool_calls, auto-terminating",
|
|
1930
|
+
len(todos),
|
|
1931
|
+
)
|
|
1932
|
+
yield {
|
|
1933
|
+
"event": "debug_clear",
|
|
1934
|
+
"data": json.dumps({}),
|
|
1935
|
+
}
|
|
1936
|
+
yield {
|
|
1937
|
+
"event": "done",
|
|
1938
|
+
"data": json.dumps(
|
|
1939
|
+
{"reason": "all_todos_completed"}
|
|
1940
|
+
),
|
|
1941
|
+
}
|
|
1942
|
+
return # Exit before executing more tool calls
|
|
1877
1943
|
|
|
1878
1944
|
# Process tool calls
|
|
1879
1945
|
for tool_call in new_tool_calls:
|
hdsp_agent_core/__init__.py
CHANGED
|
@@ -15,77 +15,76 @@ Modules:
|
|
|
15
15
|
__version__ = "1.0.0"
|
|
16
16
|
|
|
17
17
|
# Models
|
|
18
|
+
# Knowledge
|
|
19
|
+
from hdsp_agent_core.knowledge import (
|
|
20
|
+
LIBRARY_DESCRIPTIONS,
|
|
21
|
+
DocumentChunker,
|
|
22
|
+
KnowledgeBase,
|
|
23
|
+
KnowledgeLoader,
|
|
24
|
+
LibraryDetector,
|
|
25
|
+
chunk_file,
|
|
26
|
+
get_knowledge_base,
|
|
27
|
+
get_knowledge_loader,
|
|
28
|
+
get_library_detector,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
# LLM
|
|
32
|
+
from hdsp_agent_core.llm import (
|
|
33
|
+
LLMService,
|
|
34
|
+
call_llm,
|
|
35
|
+
call_llm_stream,
|
|
36
|
+
)
|
|
37
|
+
|
|
38
|
+
# Managers
|
|
39
|
+
from hdsp_agent_core.managers import (
|
|
40
|
+
ConfigManager,
|
|
41
|
+
SessionManager,
|
|
42
|
+
get_config_manager,
|
|
43
|
+
get_session_manager,
|
|
44
|
+
)
|
|
18
45
|
from hdsp_agent_core.models import (
|
|
19
46
|
# Common
|
|
20
47
|
APIResponse,
|
|
48
|
+
# Chat
|
|
49
|
+
ChatRequest,
|
|
50
|
+
ChatResponse,
|
|
51
|
+
# RAG
|
|
52
|
+
ChunkingConfig,
|
|
53
|
+
EmbeddingConfig,
|
|
21
54
|
ErrorInfo,
|
|
55
|
+
# Agent
|
|
56
|
+
ExecutionPlan,
|
|
22
57
|
GeminiConfig,
|
|
58
|
+
IndexStatusResponse,
|
|
23
59
|
LLMConfig,
|
|
24
60
|
NotebookContext,
|
|
25
61
|
OpenAIConfig,
|
|
26
|
-
ToolCall,
|
|
27
|
-
VLLMConfig,
|
|
28
|
-
# Agent
|
|
29
|
-
ExecutionPlan,
|
|
30
62
|
PlanRequest,
|
|
31
63
|
PlanResponse,
|
|
32
64
|
PlanStep,
|
|
33
|
-
RefineRequest,
|
|
34
|
-
RefineResponse,
|
|
35
|
-
ReplanRequest,
|
|
36
|
-
ReplanResponse,
|
|
37
|
-
ValidationIssue,
|
|
38
|
-
# Chat
|
|
39
|
-
ChatRequest,
|
|
40
|
-
ChatResponse,
|
|
41
|
-
StreamChunk,
|
|
42
|
-
# RAG
|
|
43
|
-
ChunkingConfig,
|
|
44
|
-
EmbeddingConfig,
|
|
45
|
-
IndexStatusResponse,
|
|
46
65
|
QdrantConfig,
|
|
47
66
|
RAGConfig,
|
|
67
|
+
RefineRequest,
|
|
68
|
+
RefineResponse,
|
|
48
69
|
ReindexRequest,
|
|
49
70
|
ReindexResponse,
|
|
71
|
+
ReplanRequest,
|
|
72
|
+
ReplanResponse,
|
|
50
73
|
SearchRequest,
|
|
51
74
|
SearchResponse,
|
|
75
|
+
StreamChunk,
|
|
76
|
+
ToolCall,
|
|
77
|
+
ValidationIssue,
|
|
78
|
+
VLLMConfig,
|
|
52
79
|
WatchdogConfig,
|
|
53
80
|
)
|
|
54
81
|
|
|
55
|
-
# Managers
|
|
56
|
-
from hdsp_agent_core.managers import (
|
|
57
|
-
ConfigManager,
|
|
58
|
-
SessionManager,
|
|
59
|
-
get_config_manager,
|
|
60
|
-
get_session_manager,
|
|
61
|
-
)
|
|
62
|
-
|
|
63
|
-
# LLM
|
|
64
|
-
from hdsp_agent_core.llm import (
|
|
65
|
-
LLMService,
|
|
66
|
-
call_llm,
|
|
67
|
-
call_llm_stream,
|
|
68
|
-
)
|
|
69
|
-
|
|
70
|
-
# Knowledge
|
|
71
|
-
from hdsp_agent_core.knowledge import (
|
|
72
|
-
DocumentChunker,
|
|
73
|
-
KnowledgeBase,
|
|
74
|
-
KnowledgeLoader,
|
|
75
|
-
LibraryDetector,
|
|
76
|
-
chunk_file,
|
|
77
|
-
get_knowledge_base,
|
|
78
|
-
get_knowledge_loader,
|
|
79
|
-
get_library_detector,
|
|
80
|
-
LIBRARY_DESCRIPTIONS,
|
|
81
|
-
)
|
|
82
|
-
|
|
83
82
|
# Prompts
|
|
84
83
|
from hdsp_agent_core.prompts import (
|
|
85
|
-
|
|
84
|
+
ADAPTIVE_REPLAN_PROMPT,
|
|
86
85
|
CODE_GENERATION_PROMPT,
|
|
87
86
|
ERROR_REFINEMENT_PROMPT,
|
|
88
|
-
|
|
87
|
+
PLAN_GENERATION_PROMPT,
|
|
89
88
|
format_plan_prompt,
|
|
90
89
|
format_refine_prompt,
|
|
91
90
|
format_replan_prompt,
|
hdsp_agent_core/factory.py
CHANGED
|
@@ -21,8 +21,9 @@ logger = logging.getLogger(__name__)
|
|
|
21
21
|
|
|
22
22
|
class AgentMode(Enum):
|
|
23
23
|
"""Agent execution mode"""
|
|
24
|
+
|
|
24
25
|
EMBEDDED = "embedded" # Direct in-process execution
|
|
25
|
-
PROXY = "proxy"
|
|
26
|
+
PROXY = "proxy" # HTTP proxy to external server
|
|
26
27
|
|
|
27
28
|
|
|
28
29
|
class ServiceFactory:
|
|
@@ -84,9 +85,7 @@ class ServiceFactory:
|
|
|
84
85
|
elif mode_str == "proxy":
|
|
85
86
|
return AgentMode.PROXY
|
|
86
87
|
else:
|
|
87
|
-
logger.warning(
|
|
88
|
-
f"Unknown HDSP_AGENT_MODE '{mode_str}', defaulting to proxy"
|
|
89
|
-
)
|
|
88
|
+
logger.warning(f"Unknown HDSP_AGENT_MODE '{mode_str}', defaulting to proxy")
|
|
90
89
|
return AgentMode.PROXY
|
|
91
90
|
|
|
92
91
|
@property
|
|
@@ -164,16 +163,13 @@ class ServiceFactory:
|
|
|
164
163
|
|
|
165
164
|
# Create proxy service instances
|
|
166
165
|
self._agent_service = ProxyAgentService(
|
|
167
|
-
base_url=self._server_url,
|
|
168
|
-
timeout=self._timeout
|
|
166
|
+
base_url=self._server_url, timeout=self._timeout
|
|
169
167
|
)
|
|
170
168
|
self._chat_service = ProxyChatService(
|
|
171
|
-
base_url=self._server_url,
|
|
172
|
-
timeout=self._timeout
|
|
169
|
+
base_url=self._server_url, timeout=self._timeout
|
|
173
170
|
)
|
|
174
171
|
self._rag_service = ProxyRAGService(
|
|
175
|
-
base_url=self._server_url,
|
|
176
|
-
timeout=self._timeout
|
|
172
|
+
base_url=self._server_url, timeout=self._timeout
|
|
177
173
|
)
|
|
178
174
|
|
|
179
175
|
# Optionally validate connectivity
|
hdsp_agent_core/interfaces.py
CHANGED
|
@@ -73,7 +73,9 @@ class IAgentService(ABC):
|
|
|
73
73
|
...
|
|
74
74
|
|
|
75
75
|
@abstractmethod
|
|
76
|
-
async def validate_code(
|
|
76
|
+
async def validate_code(
|
|
77
|
+
self, code: str, notebook_context: Optional[Dict] = None
|
|
78
|
+
) -> Dict[str, Any]:
|
|
77
79
|
"""
|
|
78
80
|
Validate code before execution.
|
|
79
81
|
|
|
@@ -154,7 +156,7 @@ class IRAGService(ABC):
|
|
|
154
156
|
self,
|
|
155
157
|
query: str,
|
|
156
158
|
detected_libraries: Optional[List[str]] = None,
|
|
157
|
-
max_results: int = 5
|
|
159
|
+
max_results: int = 5,
|
|
158
160
|
) -> Optional[str]:
|
|
159
161
|
"""
|
|
160
162
|
Get formatted context for a query (for prompt injection).
|
|
@@ -4,18 +4,18 @@ HDSP Agent Core - Knowledge Base
|
|
|
4
4
|
Deterministic library detection and API guide management.
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
|
+
from .chunking import (
|
|
8
|
+
DocumentChunker,
|
|
9
|
+
chunk_file,
|
|
10
|
+
)
|
|
7
11
|
from .loader import (
|
|
12
|
+
LIBRARY_DESCRIPTIONS,
|
|
8
13
|
KnowledgeBase,
|
|
9
14
|
KnowledgeLoader,
|
|
10
15
|
LibraryDetector,
|
|
11
16
|
get_knowledge_base,
|
|
12
17
|
get_knowledge_loader,
|
|
13
18
|
get_library_detector,
|
|
14
|
-
LIBRARY_DESCRIPTIONS,
|
|
15
|
-
)
|
|
16
|
-
from .chunking import (
|
|
17
|
-
DocumentChunker,
|
|
18
|
-
chunk_file,
|
|
19
19
|
)
|
|
20
20
|
|
|
21
21
|
__all__ = [
|
|
@@ -9,10 +9,10 @@ Provides intelligent chunking strategies:
|
|
|
9
9
|
Each strategy preserves context and adds relevant metadata.
|
|
10
10
|
"""
|
|
11
11
|
|
|
12
|
-
import re
|
|
13
12
|
import logging
|
|
14
|
-
|
|
13
|
+
import re
|
|
15
14
|
from pathlib import Path
|
|
15
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional
|
|
16
16
|
|
|
17
17
|
if TYPE_CHECKING:
|
|
18
18
|
from hdsp_agent_core.models.rag import ChunkingConfig
|
|
@@ -36,13 +36,14 @@ class DocumentChunker:
|
|
|
36
36
|
|
|
37
37
|
def __init__(self, config: Optional["ChunkingConfig"] = None):
|
|
38
38
|
from hdsp_agent_core.models.rag import ChunkingConfig
|
|
39
|
+
|
|
39
40
|
self._config = config or ChunkingConfig()
|
|
40
41
|
|
|
41
42
|
def chunk_document(
|
|
42
43
|
self,
|
|
43
44
|
content: str,
|
|
44
45
|
metadata: Optional[Dict[str, Any]] = None,
|
|
45
|
-
file_type: Optional[str] = None
|
|
46
|
+
file_type: Optional[str] = None,
|
|
46
47
|
) -> List[Dict[str, Any]]:
|
|
47
48
|
"""
|
|
48
49
|
Chunk document based on content type.
|
|
@@ -75,13 +76,12 @@ class DocumentChunker:
|
|
|
75
76
|
for chunk in chunks:
|
|
76
77
|
chunk_content = chunk["content"].strip()
|
|
77
78
|
if len(chunk_content) >= self._config.min_chunk_size:
|
|
78
|
-
result.append(
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
**metadata,
|
|
82
|
-
**chunk.get("metadata", {})
|
|
79
|
+
result.append(
|
|
80
|
+
{
|
|
81
|
+
"content": chunk_content,
|
|
82
|
+
"metadata": {**metadata, **chunk.get("metadata", {})},
|
|
83
83
|
}
|
|
84
|
-
|
|
84
|
+
)
|
|
85
85
|
|
|
86
86
|
logger.debug(f"Chunked document into {len(result)} chunks (type={file_type})")
|
|
87
87
|
return result
|
|
@@ -108,9 +108,9 @@ class DocumentChunker:
|
|
|
108
108
|
- Respect max chunk size with sub-splitting
|
|
109
109
|
"""
|
|
110
110
|
# Pattern for markdown headers
|
|
111
|
-
header_pattern = r
|
|
111
|
+
header_pattern = r"^(#{1,6})\s+(.+)$"
|
|
112
112
|
|
|
113
|
-
lines = content.split(
|
|
113
|
+
lines = content.split("\n")
|
|
114
114
|
chunks = []
|
|
115
115
|
current_chunk_lines = []
|
|
116
116
|
current_headers = [] # Stack of (level, text)
|
|
@@ -121,13 +121,19 @@ class DocumentChunker:
|
|
|
121
121
|
if header_match:
|
|
122
122
|
# Save current chunk if it has content
|
|
123
123
|
if current_chunk_lines:
|
|
124
|
-
chunk_content =
|
|
124
|
+
chunk_content = "\n".join(current_chunk_lines).strip()
|
|
125
125
|
if chunk_content:
|
|
126
|
-
section_path =
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
126
|
+
section_path = (
|
|
127
|
+
" > ".join(h[1] for h in current_headers)
|
|
128
|
+
if current_headers
|
|
129
|
+
else "Introduction"
|
|
130
|
+
)
|
|
131
|
+
chunks.append(
|
|
132
|
+
{
|
|
133
|
+
"content": chunk_content,
|
|
134
|
+
"metadata": {"section": section_path},
|
|
135
|
+
}
|
|
136
|
+
)
|
|
131
137
|
|
|
132
138
|
# Update header stack
|
|
133
139
|
level = len(header_match.group(1))
|
|
@@ -143,26 +149,35 @@ class DocumentChunker:
|
|
|
143
149
|
current_chunk_lines.append(line)
|
|
144
150
|
|
|
145
151
|
# Check chunk size limit
|
|
146
|
-
chunk_text =
|
|
152
|
+
chunk_text = "\n".join(current_chunk_lines)
|
|
147
153
|
if len(chunk_text) >= self._config.max_chunk_size:
|
|
148
|
-
section_path =
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
154
|
+
section_path = (
|
|
155
|
+
" > ".join(h[1] for h in current_headers)
|
|
156
|
+
if current_headers
|
|
157
|
+
else "Content"
|
|
158
|
+
)
|
|
159
|
+
chunks.append(
|
|
160
|
+
{
|
|
161
|
+
"content": chunk_text.strip(),
|
|
162
|
+
"metadata": {"section": section_path},
|
|
163
|
+
}
|
|
164
|
+
)
|
|
153
165
|
# Keep overlap for context continuity
|
|
154
166
|
overlap_lines = self._get_overlap_lines(current_chunk_lines)
|
|
155
167
|
current_chunk_lines = overlap_lines
|
|
156
168
|
|
|
157
169
|
# Save final chunk
|
|
158
170
|
if current_chunk_lines:
|
|
159
|
-
chunk_content =
|
|
171
|
+
chunk_content = "\n".join(current_chunk_lines).strip()
|
|
160
172
|
if chunk_content:
|
|
161
|
-
section_path =
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
173
|
+
section_path = (
|
|
174
|
+
" > ".join(h[1] for h in current_headers)
|
|
175
|
+
if current_headers
|
|
176
|
+
else "Content"
|
|
177
|
+
)
|
|
178
|
+
chunks.append(
|
|
179
|
+
{"content": chunk_content, "metadata": {"section": section_path}}
|
|
180
|
+
)
|
|
166
181
|
|
|
167
182
|
return chunks
|
|
168
183
|
|
|
@@ -176,9 +191,9 @@ class DocumentChunker:
|
|
|
176
191
|
- Preserve import statements and module docstrings
|
|
177
192
|
"""
|
|
178
193
|
# Pattern for class and function definitions (top-level only)
|
|
179
|
-
def_pattern = r
|
|
194
|
+
def_pattern = r"^(class|def|async\s+def)\s+(\w+)"
|
|
180
195
|
|
|
181
|
-
lines = content.split(
|
|
196
|
+
lines = content.split("\n")
|
|
182
197
|
chunks = []
|
|
183
198
|
current_chunk_lines = []
|
|
184
199
|
current_def = None
|
|
@@ -193,15 +208,21 @@ class DocumentChunker:
|
|
|
193
208
|
def_match = re.match(def_pattern, line)
|
|
194
209
|
|
|
195
210
|
# Check if this is a top-level definition (not indented)
|
|
196
|
-
if
|
|
211
|
+
if (
|
|
212
|
+
def_match
|
|
213
|
+
and not line.startswith((" ", "\t"))
|
|
214
|
+
and not in_multiline_string
|
|
215
|
+
):
|
|
197
216
|
# Save current chunk
|
|
198
217
|
if current_chunk_lines:
|
|
199
|
-
chunk_content =
|
|
218
|
+
chunk_content = "\n".join(current_chunk_lines).strip()
|
|
200
219
|
if chunk_content:
|
|
201
|
-
chunks.append(
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
220
|
+
chunks.append(
|
|
221
|
+
{
|
|
222
|
+
"content": chunk_content,
|
|
223
|
+
"metadata": {"definition": current_def or "module"},
|
|
224
|
+
}
|
|
225
|
+
)
|
|
205
226
|
|
|
206
227
|
current_def = f"{def_match.group(1)} {def_match.group(2)}"
|
|
207
228
|
current_chunk_lines = [line]
|
|
@@ -209,22 +230,26 @@ class DocumentChunker:
|
|
|
209
230
|
current_chunk_lines.append(line)
|
|
210
231
|
|
|
211
232
|
# Check max chunk size
|
|
212
|
-
if len(
|
|
213
|
-
chunks.append(
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
233
|
+
if len("\n".join(current_chunk_lines)) >= self._config.max_chunk_size:
|
|
234
|
+
chunks.append(
|
|
235
|
+
{
|
|
236
|
+
"content": "\n".join(current_chunk_lines).strip(),
|
|
237
|
+
"metadata": {"definition": current_def or "module"},
|
|
238
|
+
}
|
|
239
|
+
)
|
|
217
240
|
overlap_lines = self._get_overlap_lines(current_chunk_lines)
|
|
218
241
|
current_chunk_lines = overlap_lines
|
|
219
242
|
|
|
220
243
|
# Save final chunk
|
|
221
244
|
if current_chunk_lines:
|
|
222
|
-
chunk_content =
|
|
245
|
+
chunk_content = "\n".join(current_chunk_lines).strip()
|
|
223
246
|
if chunk_content:
|
|
224
|
-
chunks.append(
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
247
|
+
chunks.append(
|
|
248
|
+
{
|
|
249
|
+
"content": chunk_content,
|
|
250
|
+
"metadata": {"definition": current_def or "module"},
|
|
251
|
+
}
|
|
252
|
+
)
|
|
228
253
|
|
|
229
254
|
return chunks
|
|
230
255
|
|
|
@@ -251,10 +276,12 @@ class DocumentChunker:
|
|
|
251
276
|
if end >= len(content):
|
|
252
277
|
chunk_content = content[start:].strip()
|
|
253
278
|
if chunk_content:
|
|
254
|
-
chunks.append(
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
279
|
+
chunks.append(
|
|
280
|
+
{
|
|
281
|
+
"content": chunk_content,
|
|
282
|
+
"metadata": {"chunk_index": chunk_index},
|
|
283
|
+
}
|
|
284
|
+
)
|
|
258
285
|
break
|
|
259
286
|
|
|
260
287
|
# Try to find a good break point
|
|
@@ -263,10 +290,9 @@ class DocumentChunker:
|
|
|
263
290
|
|
|
264
291
|
chunk_content = content[start:end].strip()
|
|
265
292
|
if chunk_content:
|
|
266
|
-
chunks.append(
|
|
267
|
-
"content": chunk_content,
|
|
268
|
-
|
|
269
|
-
})
|
|
293
|
+
chunks.append(
|
|
294
|
+
{"content": chunk_content, "metadata": {"chunk_index": chunk_index}}
|
|
295
|
+
)
|
|
270
296
|
|
|
271
297
|
# Move start with overlap
|
|
272
298
|
start = max(end - overlap, start + 1)
|
|
@@ -288,12 +314,12 @@ class DocumentChunker:
|
|
|
288
314
|
search_start = start + (end - start) // 2 # Search in latter half
|
|
289
315
|
|
|
290
316
|
# Try paragraph break (double newline)
|
|
291
|
-
para_break = content.rfind(
|
|
317
|
+
para_break = content.rfind("\n\n", search_start, end)
|
|
292
318
|
if para_break > search_start:
|
|
293
319
|
return para_break + 2
|
|
294
320
|
|
|
295
321
|
# Try sentence break (. or ! or ? followed by space or newline)
|
|
296
|
-
sentence_pattern = r
|
|
322
|
+
sentence_pattern = r"[.!?]\s"
|
|
297
323
|
for match in re.finditer(sentence_pattern, content[search_start:end]):
|
|
298
324
|
last_match_end = search_start + match.end()
|
|
299
325
|
else:
|
|
@@ -301,15 +327,15 @@ class DocumentChunker:
|
|
|
301
327
|
|
|
302
328
|
# Find last sentence break
|
|
303
329
|
for i in range(end - 1, search_start, -1):
|
|
304
|
-
if i + 1 < len(content) and content[i] in
|
|
330
|
+
if i + 1 < len(content) and content[i] in ".!?" and content[i + 1] in " \n":
|
|
305
331
|
return i + 1
|
|
306
332
|
|
|
307
333
|
# Try word break (space or newline)
|
|
308
|
-
space_break = content.rfind(
|
|
334
|
+
space_break = content.rfind(" ", search_start, end)
|
|
309
335
|
if space_break > search_start:
|
|
310
336
|
return space_break + 1
|
|
311
337
|
|
|
312
|
-
newline_break = content.rfind(
|
|
338
|
+
newline_break = content.rfind("\n", search_start, end)
|
|
313
339
|
if newline_break > search_start:
|
|
314
340
|
return newline_break + 1
|
|
315
341
|
|
|
@@ -333,7 +359,7 @@ class DocumentChunker:
|
|
|
333
359
|
def chunk_file(
|
|
334
360
|
file_path: Path,
|
|
335
361
|
config: Optional["ChunkingConfig"] = None,
|
|
336
|
-
base_metadata: Optional[Dict[str, Any]] = None
|
|
362
|
+
base_metadata: Optional[Dict[str, Any]] = None,
|
|
337
363
|
) -> List[Dict[str, Any]]:
|
|
338
364
|
"""
|
|
339
365
|
Convenience function to chunk a file directly.
|