isage-middleware 0.2.4.3__cp311-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. isage_middleware-0.2.4.3.dist-info/METADATA +266 -0
  2. isage_middleware-0.2.4.3.dist-info/RECORD +94 -0
  3. isage_middleware-0.2.4.3.dist-info/WHEEL +5 -0
  4. isage_middleware-0.2.4.3.dist-info/top_level.txt +1 -0
  5. sage/middleware/__init__.py +59 -0
  6. sage/middleware/_version.py +6 -0
  7. sage/middleware/components/__init__.py +30 -0
  8. sage/middleware/components/extensions_compat.py +141 -0
  9. sage/middleware/components/sage_db/__init__.py +116 -0
  10. sage/middleware/components/sage_db/backend.py +136 -0
  11. sage/middleware/components/sage_db/service.py +15 -0
  12. sage/middleware/components/sage_flow/__init__.py +76 -0
  13. sage/middleware/components/sage_flow/python/__init__.py +14 -0
  14. sage/middleware/components/sage_flow/python/micro_service/__init__.py +4 -0
  15. sage/middleware/components/sage_flow/python/micro_service/sage_flow_service.py +88 -0
  16. sage/middleware/components/sage_flow/python/sage_flow.py +30 -0
  17. sage/middleware/components/sage_flow/service.py +14 -0
  18. sage/middleware/components/sage_mem/__init__.py +83 -0
  19. sage/middleware/components/sage_sias/__init__.py +59 -0
  20. sage/middleware/components/sage_sias/continual_learner.py +184 -0
  21. sage/middleware/components/sage_sias/coreset_selector.py +302 -0
  22. sage/middleware/components/sage_sias/types.py +94 -0
  23. sage/middleware/components/sage_tsdb/__init__.py +81 -0
  24. sage/middleware/components/sage_tsdb/python/__init__.py +21 -0
  25. sage/middleware/components/sage_tsdb/python/_sage_tsdb.pyi +17 -0
  26. sage/middleware/components/sage_tsdb/python/algorithms/__init__.py +17 -0
  27. sage/middleware/components/sage_tsdb/python/algorithms/base.py +51 -0
  28. sage/middleware/components/sage_tsdb/python/algorithms/out_of_order_join.py +248 -0
  29. sage/middleware/components/sage_tsdb/python/algorithms/window_aggregator.py +296 -0
  30. sage/middleware/components/sage_tsdb/python/micro_service/__init__.py +7 -0
  31. sage/middleware/components/sage_tsdb/python/micro_service/sage_tsdb_service.py +365 -0
  32. sage/middleware/components/sage_tsdb/python/sage_tsdb.py +523 -0
  33. sage/middleware/components/sage_tsdb/service.py +17 -0
  34. sage/middleware/components/vector_stores/__init__.py +25 -0
  35. sage/middleware/components/vector_stores/chroma.py +483 -0
  36. sage/middleware/components/vector_stores/chroma_adapter.py +185 -0
  37. sage/middleware/components/vector_stores/milvus.py +677 -0
  38. sage/middleware/operators/__init__.py +56 -0
  39. sage/middleware/operators/agent/__init__.py +24 -0
  40. sage/middleware/operators/agent/planning/__init__.py +5 -0
  41. sage/middleware/operators/agent/planning/llm_adapter.py +41 -0
  42. sage/middleware/operators/agent/planning/planner_adapter.py +98 -0
  43. sage/middleware/operators/agent/planning/router.py +107 -0
  44. sage/middleware/operators/agent/runtime.py +296 -0
  45. sage/middleware/operators/agentic/__init__.py +41 -0
  46. sage/middleware/operators/agentic/config.py +254 -0
  47. sage/middleware/operators/agentic/planning_operator.py +125 -0
  48. sage/middleware/operators/agentic/refined_searcher.py +132 -0
  49. sage/middleware/operators/agentic/runtime.py +241 -0
  50. sage/middleware/operators/agentic/timing_operator.py +125 -0
  51. sage/middleware/operators/agentic/tool_selection_operator.py +127 -0
  52. sage/middleware/operators/context/__init__.py +17 -0
  53. sage/middleware/operators/context/critic_evaluation.py +16 -0
  54. sage/middleware/operators/context/model_context.py +565 -0
  55. sage/middleware/operators/context/quality_label.py +12 -0
  56. sage/middleware/operators/context/search_query_results.py +61 -0
  57. sage/middleware/operators/context/search_result.py +42 -0
  58. sage/middleware/operators/context/search_session.py +79 -0
  59. sage/middleware/operators/filters/__init__.py +26 -0
  60. sage/middleware/operators/filters/context_sink.py +387 -0
  61. sage/middleware/operators/filters/context_source.py +376 -0
  62. sage/middleware/operators/filters/evaluate_filter.py +83 -0
  63. sage/middleware/operators/filters/tool_filter.py +74 -0
  64. sage/middleware/operators/llm/__init__.py +18 -0
  65. sage/middleware/operators/llm/sagellm_generator.py +432 -0
  66. sage/middleware/operators/rag/__init__.py +147 -0
  67. sage/middleware/operators/rag/arxiv.py +331 -0
  68. sage/middleware/operators/rag/chunk.py +13 -0
  69. sage/middleware/operators/rag/document_loaders.py +23 -0
  70. sage/middleware/operators/rag/evaluate.py +658 -0
  71. sage/middleware/operators/rag/generator.py +340 -0
  72. sage/middleware/operators/rag/index_builder/__init__.py +48 -0
  73. sage/middleware/operators/rag/index_builder/builder.py +363 -0
  74. sage/middleware/operators/rag/index_builder/manifest.py +101 -0
  75. sage/middleware/operators/rag/index_builder/storage.py +131 -0
  76. sage/middleware/operators/rag/pipeline.py +46 -0
  77. sage/middleware/operators/rag/profiler.py +59 -0
  78. sage/middleware/operators/rag/promptor.py +400 -0
  79. sage/middleware/operators/rag/refiner.py +231 -0
  80. sage/middleware/operators/rag/reranker.py +364 -0
  81. sage/middleware/operators/rag/retriever.py +1308 -0
  82. sage/middleware/operators/rag/searcher.py +37 -0
  83. sage/middleware/operators/rag/types.py +28 -0
  84. sage/middleware/operators/rag/writer.py +80 -0
  85. sage/middleware/operators/tools/__init__.py +71 -0
  86. sage/middleware/operators/tools/arxiv_paper_searcher.py +175 -0
  87. sage/middleware/operators/tools/arxiv_searcher.py +102 -0
  88. sage/middleware/operators/tools/duckduckgo_searcher.py +105 -0
  89. sage/middleware/operators/tools/image_captioner.py +104 -0
  90. sage/middleware/operators/tools/nature_news_fetcher.py +224 -0
  91. sage/middleware/operators/tools/searcher_tool.py +514 -0
  92. sage/middleware/operators/tools/text_detector.py +185 -0
  93. sage/middleware/operators/tools/url_text_extractor.py +104 -0
  94. sage/middleware/py.typed +2 -0
@@ -0,0 +1,79 @@
1
+ import time
2
+ from dataclasses import dataclass, field
3
+ from typing import Any
4
+ from uuid import uuid4
5
+
6
+ from .search_query_results import SearchQueryResults
7
+ from .search_result import SearchResult
8
+
9
+
10
+ @dataclass
11
+ class SearchSession:
12
+ """整个搜索会话的结果集合"""
13
+
14
+ session_id: str = field(default_factory=lambda: str(uuid4()))
15
+ query_results: list[SearchQueryResults] = field(default_factory=list)
16
+ session_timestamp: int = field(default_factory=lambda: int(time.time() * 1000))
17
+ original_question: str = ""
18
+ session_metadata: dict[str, Any] = field(default_factory=dict)
19
+
20
+ def add_query_results(self, query_results: SearchQueryResults) -> None:
21
+ """添加查询结果"""
22
+ self.query_results.append(query_results)
23
+
24
+ def get_all_queries(self) -> list[str]:
25
+ """获取所有查询字符串"""
26
+ return [qr.query for qr in self.query_results]
27
+
28
+ def get_total_results_count(self) -> int:
29
+ """获取所有查询的结果总数"""
30
+ return sum(qr.get_results_count() for qr in self.query_results)
31
+
32
+ def get_all_results(self) -> list[SearchResult]:
33
+ """获取所有搜索结果"""
34
+ all_results = []
35
+ for query_result in self.query_results:
36
+ all_results.extend(query_result.results)
37
+ return all_results
38
+
39
+ def get_results_by_query(self, query: str) -> SearchQueryResults | None:
40
+ """根据查询字符串获取结果"""
41
+ for qr in self.query_results:
42
+ if qr.query == query:
43
+ return qr
44
+ return None
45
+
46
+ def get_combined_content(self) -> str:
47
+ """获取所有搜索结果的组合内容"""
48
+ combined_parts = []
49
+ for i, query_result in enumerate(self.query_results, 1):
50
+ combined_parts.append(f"=== Query {i}: {query_result.query} ===")
51
+ for j, result in enumerate(query_result.results, 1):
52
+ combined_parts.append(f"[Result {j}] {result.title}")
53
+ combined_parts.append(f"Content: {result.content}")
54
+ combined_parts.append(f"Source: {result.source}")
55
+ combined_parts.append("")
56
+ return "\n".join(combined_parts)
57
+
58
+ def to_dict(self) -> dict[str, Any]:
59
+ """转换为字典"""
60
+ return {
61
+ "session_id": self.session_id,
62
+ "query_results": [qr.to_dict() for qr in self.query_results],
63
+ "session_timestamp": self.session_timestamp,
64
+ "original_question": self.original_question,
65
+ "session_metadata": self.session_metadata.copy(),
66
+ }
67
+
68
+ @classmethod
69
+ def from_dict(cls, data: dict[str, Any]) -> "SearchSession":
70
+ """从字典创建SearchSession"""
71
+ query_results = [SearchQueryResults.from_dict(qr) for qr in data.get("query_results", [])]
72
+
73
+ return cls(
74
+ session_id=data.get("session_id", str(uuid4())),
75
+ query_results=query_results,
76
+ session_timestamp=data.get("session_timestamp", int(time.time() * 1000)),
77
+ original_question=data.get("original_question", ""),
78
+ session_metadata=data.get("session_metadata", {}),
79
+ )
@@ -0,0 +1,26 @@
1
+ """
2
+ SAGE Filters - Data Filtering and Transformation
3
+
4
+ Layer: L3 (Core - Algorithm Library)
5
+
6
+ This module provides data filtering, transformation, and routing utilities
7
+ for agent workflows.
8
+
9
+ Available Filters:
10
+ - Tool Filter: Filter and select appropriate tools
11
+ - Evaluate Filter: Evaluate and score outputs
12
+ - Context Source: Context data sources
13
+ - Context Sink: Context data sinks
14
+ """
15
+
16
+ from .context_sink import * # noqa: F403
17
+ from .context_source import * # noqa: F403
18
+ from .evaluate_filter import * # noqa: F403
19
+ from .tool_filter import * # noqa: F403
20
+
21
+ __all__: list[str] = [
22
+ # Re-export from submodules
23
+ # Will be populated as modules are standardized
24
+ ]
25
+
26
+ __version__ = "0.1.0"
@@ -0,0 +1,387 @@
1
+ import json
2
+ import os
3
+ import threading
4
+ import time
5
+ from datetime import datetime
6
+ from pathlib import Path
7
+ from typing import Any
8
+
9
+ from sage.common.core import SinkFunction
10
+ from sage.middleware.operators.context.model_context import ModelContext
11
+
12
+
13
+ class ContextFileSink(SinkFunction):
14
+ """
15
+ ModelContext文件持久化Sink
16
+ 支持多种保存格式和组织策略
17
+ """
18
+
19
+ @staticmethod
20
+ def get_default_template_directory() -> str:
21
+ """
22
+ 获取默认的模板数据目录,统一存储在 .sage/data 下
23
+ 符合 SAGE 架构设计原则:所有运行时数据应在 .sage/ 目录下
24
+ """
25
+ project_root = Path(os.getcwd()) # 获取当前工作目录
26
+ template_data_dir = project_root / ".sage" / "data" / "model_context"
27
+ template_data_dir.mkdir(parents=True, exist_ok=True)
28
+ return str(template_data_dir)
29
+
30
+ @staticmethod
31
+ def get_default_config() -> dict[str, Any]:
32
+ """
33
+ 获取默认配置
34
+
35
+ Returns:
36
+ Dict[str, Any]: 默认配置字典
37
+ """
38
+ return {
39
+ "base_directory": None, # None表示使用默认目录
40
+ "stage_directory": "general", # 处理阶段目录名
41
+ "file_format": "json", # "json", "jsonl"
42
+ "organization": "date", # "date", "sequence", "uuid"
43
+ "max_files_per_dir": 1000,
44
+ "create_index": True,
45
+ "auto_create_dirs": True,
46
+ "compress_old_files": False, # 是否压缩旧文件
47
+ "backup_index": True, # 是否备份索引文件
48
+ }
49
+
50
+ def __init__(self, config: dict[str, Any], **kwargs):
51
+ """
52
+ 初始化TemplateFileSink
53
+
54
+ Args:
55
+ config: 配置字典,包含所有设置项
56
+ - base_directory: 基础保存目录,如果为None则使用默认目录
57
+ - stage_directory: 处理阶段目录名,如 "questionbot", "retriever", "chief", "critic"
58
+ - file_format: 文件格式 ("json", "jsonl")
59
+ - organization: 文件组织方式 ("date", "sequence", "uuid")
60
+ - max_files_per_dir: 每个目录最大文件数
61
+ - create_index: 是否创建索引文件
62
+ - auto_create_dirs: 是否自动创建目录
63
+ - compress_old_files: 是否压缩旧文件
64
+ - backup_index: 是否备份索引文件
65
+ **kwargs: 其他参数(向后兼容)
66
+ """
67
+ super().__init__(**kwargs)
68
+
69
+ # 合并配置(避免重复更新)
70
+ self.config = self.get_default_config()
71
+ if not isinstance(config, dict):
72
+ raise TypeError(f"Expected a dict for config, got {type(config)}")
73
+ # single update with provided config
74
+ self.config.update(config)
75
+
76
+ # 向后兼容:如果直接传递了参数,使用这些参数更新config
77
+ legacy_params = {
78
+ "base_directory": kwargs.get("base_directory"),
79
+ "file_format": kwargs.get("file_format"),
80
+ "organization": kwargs.get("organization"),
81
+ "max_files_per_dir": kwargs.get("max_files_per_dir"),
82
+ "create_index": kwargs.get("create_index"),
83
+ "stage_directory": kwargs.get("stage_directory"),
84
+ }
85
+
86
+ for key, value in legacy_params.items():
87
+ if value is not None:
88
+ self.config[key] = value
89
+
90
+ # 构建完整的目录路径
91
+ self._setup_directories()
92
+
93
+ # 索引管理
94
+ self.index_file = self.full_directory / "template_index.json"
95
+ self.index_lock = threading.Lock()
96
+ self.saved_count = 0
97
+
98
+ # 初始化索引
99
+ if self.config["create_index"] and not self.index_file.exists():
100
+ self._initialize_index()
101
+
102
+ def _setup_directories(self) -> None:
103
+ """设置目录结构"""
104
+ # 基础目录
105
+ if self.config["base_directory"] is None:
106
+ base_dir = self.get_default_template_directory()
107
+ else:
108
+ base_dir = self.config["base_directory"]
109
+
110
+ self.base_directory = Path(base_dir)
111
+
112
+ # 阶段目录
113
+ stage_dir = self.config["stage_directory"]
114
+ self.stage_directory = self.base_directory / stage_dir
115
+
116
+ # 完整目录路径:./data/template_data/questionbot/
117
+ self.full_directory = self.stage_directory
118
+
119
+ # 自动创建目录
120
+ if self.config["auto_create_dirs"]:
121
+ self.full_directory.mkdir(parents=True, exist_ok=True)
122
+
123
+ def runtime_init(self, ctx):
124
+ """
125
+ 运行时初始化
126
+
127
+ Note: ctx is injected into self.ctx by the framework (BaseFunction property).
128
+ This method logs initialization info after context is available.
129
+ """
130
+ # No need to call super().runtime_init(ctx) - BaseFunction doesn't have this method.
131
+ # The framework injects ctx into self.ctx automatically.
132
+ self.logger.info(f"TemplateFileSink runtime initialized with context: {ctx}")
133
+ self.logger.info(f"Template base directory: {self.base_directory}")
134
+ self.logger.info(f"Template stage directory: {self.stage_directory}")
135
+ self.logger.info(f"Template full directory: {self.full_directory}")
136
+ self.logger.info(f"File organization: {self.config['organization']}")
137
+ self.logger.info(f"File format: {self.config['file_format']}")
138
+
139
+ def _initialize_index(self) -> None:
140
+ """初始化索引文件"""
141
+ index_data = {
142
+ "created_at": datetime.now().isoformat(),
143
+ "total_templates": 0,
144
+ "config": self.config.copy(), # 保存完整配置
145
+ "directory_structure": {
146
+ "base_directory": str(self.base_directory),
147
+ "stage_directory": str(self.stage_directory),
148
+ "full_directory": str(self.full_directory),
149
+ },
150
+ "templates": {},
151
+ }
152
+
153
+ # 备份现有索引(如果存在)
154
+ if self.config["backup_index"] and self.index_file.exists():
155
+ backup_file = self.index_file.with_suffix(f".backup_{int(time.time())}.json")
156
+ try:
157
+ import shutil
158
+
159
+ shutil.copy2(self.index_file, backup_file)
160
+ self.logger.info(f"Backed up existing index to {backup_file}")
161
+ except Exception as e:
162
+ self.logger.warning(f"Failed to backup index: {e}")
163
+
164
+ with open(self.index_file, "w", encoding="utf-8") as f:
165
+ json.dump(index_data, f, ensure_ascii=False, indent=2)
166
+
167
+ def _get_file_path(self, template: ModelContext) -> Path:
168
+ """
169
+ 根据组织策略确定文件路径
170
+ 目录结构: base_directory/stage_directory/organization_structure/filename
171
+
172
+ Args:
173
+ template: ModelContext实例
174
+
175
+ Returns:
176
+ Path: 文件路径
177
+ """
178
+ organization = self.config["organization"]
179
+ file_format = self.config["file_format"]
180
+ max_files = self.config["max_files_per_dir"]
181
+
182
+ if organization == "date":
183
+ # 按日期组织: ./data/template_data/questionbot/2025/01/15/
184
+ dt = datetime.fromtimestamp(template.timestamp / 1000)
185
+ org_dir = self.full_directory / f"{dt.year:04d}" / f"{dt.month:02d}" / f"{dt.day:02d}"
186
+ filename = f"template_{template.uuid}.{file_format}"
187
+
188
+ elif organization == "sequence":
189
+ # 按序列号组织: ./data/template_data/questionbot/seq_0000-0999/
190
+ seq_range = (template.sequence // max_files) * max_files
191
+ org_dir = self.full_directory / f"seq_{seq_range:06d}-{seq_range + max_files - 1:06d}"
192
+ filename = f"template_{template.sequence:06d}_{template.uuid[:8]}.{file_format}"
193
+
194
+ else: # uuid organization
195
+ # 按UUID前缀组织: ./data/template_data/questionbot/ab/cd/
196
+ uuid_prefix1 = template.uuid[:2]
197
+ uuid_prefix2 = template.uuid[2:4]
198
+ org_dir = self.full_directory / uuid_prefix1 / uuid_prefix2
199
+ filename = f"template_{template.uuid}.{file_format}"
200
+
201
+ # 确保目录存在
202
+ if self.config["auto_create_dirs"]:
203
+ org_dir.mkdir(parents=True, exist_ok=True)
204
+
205
+ return org_dir / filename
206
+
207
+ def _update_index(self, template: ModelContext, file_path: Path) -> None:
208
+ """更新索引文件"""
209
+ if not self.config["create_index"]:
210
+ return
211
+
212
+ with self.index_lock:
213
+ try:
214
+ with open(self.index_file, encoding="utf-8") as f:
215
+ index_data = json.load(f)
216
+
217
+ # 更新索引信息
218
+ index_data["total_templates"] += 1
219
+ index_data["last_updated"] = datetime.now().isoformat()
220
+
221
+ # 添加模板记录
222
+ template_record = {
223
+ "uuid": template.uuid,
224
+ "sequence": template.sequence,
225
+ "timestamp": template.timestamp,
226
+ "file_path": str(file_path.relative_to(self.full_directory)),
227
+ "absolute_path": str(file_path),
228
+ "relative_to_base": str(file_path.relative_to(self.base_directory)),
229
+ "stage_directory": self.config["stage_directory"],
230
+ "raw_question_preview": (
231
+ template.raw_question[:100] if template.raw_question else None
232
+ ),
233
+ "has_response": bool(template.response),
234
+ "response_length": (len(template.response) if template.response else 0),
235
+ "chunks_count": (
236
+ len(template.retriver_chunks) if template.retriver_chunks else 0
237
+ ),
238
+ "prompts_count": len(template.prompts) if template.prompts else 0,
239
+ "organization": self.config["organization"],
240
+ "file_format": self.config["file_format"],
241
+ "saved_at": datetime.now().isoformat(),
242
+ }
243
+
244
+ index_data["templates"][template.uuid] = template_record
245
+
246
+ # 保存更新后的索引
247
+ with open(self.index_file, "w", encoding="utf-8") as f:
248
+ json.dump(index_data, f, ensure_ascii=False, indent=2)
249
+
250
+ except Exception as e:
251
+ self.logger.error(f"Failed to update index: {e}")
252
+
253
+ def execute(self, template: ModelContext) -> None:
254
+ """
255
+ 保存ModelContext到文件
256
+
257
+ Args:
258
+ template: 要保存的ModelContext
259
+ """
260
+ try:
261
+ # 确定文件路径
262
+ file_path = self._get_file_path(template)
263
+
264
+ # 保存模板
265
+ if self.config["file_format"] == "json":
266
+ template.save_to_file(str(file_path))
267
+ elif self.config["file_format"] == "jsonl":
268
+ # JSONL格式:每行一个JSON对象
269
+ with open(file_path, "a", encoding="utf-8") as f:
270
+ f.write(template.to_json().replace("\n", "") + "\n")
271
+
272
+ # 更新索引
273
+ self._update_index(template, file_path)
274
+
275
+ self.saved_count += 1
276
+
277
+ self.logger.debug(f"Saved template {template.uuid} to {file_path}")
278
+
279
+ # 每保存10个模板记录一次统计
280
+ if self.saved_count % 10 == 0:
281
+ self.logger.info(
282
+ f"TemplateFileSink[{self.config['stage_directory']}]: "
283
+ f"{self.saved_count} templates saved to {self.full_directory}"
284
+ )
285
+
286
+ except Exception as e:
287
+ self.logger.error(f"Failed to save template {template.uuid}: {e}")
288
+
289
+ def set_stage_directory(self, stage_name: str):
290
+ """
291
+ 动态设置阶段目录
292
+
293
+ Args:
294
+ stage_name: 新的阶段目录名
295
+ """
296
+ old_stage = self.config["stage_directory"]
297
+ self.config["stage_directory"] = stage_name
298
+ self._setup_directories()
299
+
300
+ # 重新设置索引文件路径
301
+ self.index_file = self.full_directory / "template_index.json"
302
+
303
+ # 如果需要,初始化新的索引
304
+ if self.config["create_index"] and not self.index_file.exists():
305
+ self._initialize_index()
306
+
307
+ self.logger.info(f"Stage directory changed from '{old_stage}' to '{stage_name}'")
308
+ self.logger.info(f"New full directory: {self.full_directory}")
309
+
310
+ def get_storage_info(self) -> dict[str, Any]:
311
+ """
312
+ 获取存储信息统计
313
+
314
+ Returns:
315
+ Dict[str, Any]: 存储统计信息
316
+ """
317
+ return {
318
+ "config": self.config.copy(),
319
+ "directory_structure": {
320
+ "base_directory": str(self.base_directory),
321
+ "stage_directory": str(self.stage_directory),
322
+ "full_directory": str(self.full_directory),
323
+ },
324
+ "runtime_stats": {
325
+ "saved_count": self.saved_count,
326
+ "index_file": str(self.index_file),
327
+ "index_exists": (
328
+ self.index_file.exists() if hasattr(self, "index_file") else False
329
+ ),
330
+ "directory_exists": self.full_directory.exists(),
331
+ },
332
+ }
333
+
334
+ def get_stage_statistics(self) -> dict[str, Any]:
335
+ """
336
+ 获取当前阶段的统计信息
337
+
338
+ Returns:
339
+ Dict[str, Any]: 阶段统计信息
340
+ """
341
+ try:
342
+ if not self.index_file.exists():
343
+ return {"error": "Index file does not exist"}
344
+
345
+ with open(self.index_file, encoding="utf-8") as f:
346
+ index_data = json.load(f)
347
+
348
+ templates = list(index_data.get("templates", {}).values())
349
+
350
+ # 统计信息
351
+ stats = {
352
+ "stage_directory": self.config["stage_directory"],
353
+ "total_templates": len(templates),
354
+ "with_response": sum(1 for t in templates if t.get("has_response")),
355
+ "without_response": sum(1 for t in templates if not t.get("has_response")),
356
+ "avg_response_length": 0,
357
+ "avg_chunks": 0,
358
+ "avg_prompts": 0,
359
+ "date_range": {"earliest": None, "latest": None},
360
+ }
361
+
362
+ if templates:
363
+ # 计算平均值
364
+ response_lengths = [
365
+ t.get("response_length", 0) for t in templates if t.get("has_response")
366
+ ]
367
+ stats["avg_response_length"] = (
368
+ sum(response_lengths) / len(response_lengths) if response_lengths else 0
369
+ )
370
+
371
+ stats["avg_chunks"] = sum(t.get("chunks_count", 0) for t in templates) / len(
372
+ templates
373
+ )
374
+ stats["avg_prompts"] = sum(t.get("prompts_count", 0) for t in templates) / len(
375
+ templates
376
+ )
377
+
378
+ # 时间范围
379
+ timestamps = [t.get("timestamp", 0) for t in templates]
380
+ stats["date_range"]["earliest"] = min(timestamps)
381
+ stats["date_range"]["latest"] = max(timestamps)
382
+
383
+ return stats
384
+
385
+ except Exception as e:
386
+ self.logger.error(f"Failed to get stage statistics: {e}")
387
+ return {"error": str(e)}