sirchmunk 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sirchmunk/api/__init__.py +1 -0
- sirchmunk/api/chat.py +1123 -0
- sirchmunk/api/components/__init__.py +0 -0
- sirchmunk/api/components/history_storage.py +402 -0
- sirchmunk/api/components/monitor_tracker.py +518 -0
- sirchmunk/api/components/settings_storage.py +353 -0
- sirchmunk/api/history.py +254 -0
- sirchmunk/api/knowledge.py +411 -0
- sirchmunk/api/main.py +120 -0
- sirchmunk/api/monitor.py +219 -0
- sirchmunk/api/run_server.py +54 -0
- sirchmunk/api/search.py +230 -0
- sirchmunk/api/settings.py +309 -0
- sirchmunk/api/tools.py +315 -0
- sirchmunk/cli/__init__.py +11 -0
- sirchmunk/cli/cli.py +789 -0
- sirchmunk/learnings/knowledge_base.py +5 -2
- sirchmunk/llm/prompts.py +12 -1
- sirchmunk/retrieve/text_retriever.py +186 -2
- sirchmunk/scan/file_scanner.py +2 -2
- sirchmunk/schema/knowledge.py +119 -35
- sirchmunk/search.py +384 -26
- sirchmunk/storage/__init__.py +2 -2
- sirchmunk/storage/{knowledge_manager.py → knowledge_storage.py} +265 -60
- sirchmunk/utils/constants.py +7 -5
- sirchmunk/utils/embedding_util.py +217 -0
- sirchmunk/utils/tokenizer_util.py +36 -1
- sirchmunk/version.py +1 -1
- {sirchmunk-0.0.1.dist-info → sirchmunk-0.0.2.dist-info}/METADATA +196 -14
- sirchmunk-0.0.2.dist-info/RECORD +69 -0
- {sirchmunk-0.0.1.dist-info → sirchmunk-0.0.2.dist-info}/WHEEL +1 -1
- sirchmunk-0.0.2.dist-info/top_level.txt +2 -0
- sirchmunk_mcp/__init__.py +25 -0
- sirchmunk_mcp/cli.py +478 -0
- sirchmunk_mcp/config.py +276 -0
- sirchmunk_mcp/server.py +355 -0
- sirchmunk_mcp/service.py +327 -0
- sirchmunk_mcp/setup.py +15 -0
- sirchmunk_mcp/tools.py +410 -0
- sirchmunk-0.0.1.dist-info/RECORD +0 -45
- sirchmunk-0.0.1.dist-info/top_level.txt +0 -1
- {sirchmunk-0.0.1.dist-info → sirchmunk-0.0.2.dist-info}/entry_points.txt +0 -0
- {sirchmunk-0.0.1.dist-info → sirchmunk-0.0.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -20,7 +20,7 @@ from sirchmunk.schema.knowledge import (
|
|
|
20
20
|
)
|
|
21
21
|
from sirchmunk.schema.metadata import FileInfo
|
|
22
22
|
from sirchmunk.schema.request import Request
|
|
23
|
-
from sirchmunk.utils.constants import
|
|
23
|
+
from sirchmunk.utils.constants import DEFAULT_SIRCHMUNK_WORK_PATH
|
|
24
24
|
from sirchmunk.utils.file_utils import StorageStructure, fast_extract
|
|
25
25
|
from sirchmunk.utils import create_logger, LogCallback
|
|
26
26
|
from sirchmunk.utils.utils import extract_fields
|
|
@@ -51,7 +51,9 @@ class KnowledgeBase:
|
|
|
51
51
|
self.llm = llm
|
|
52
52
|
self.metadata_map = metadata_map
|
|
53
53
|
self.work_path: Path = (
|
|
54
|
-
|
|
54
|
+
Path(DEFAULT_SIRCHMUNK_WORK_PATH).expanduser().resolve()
|
|
55
|
+
if work_path is None
|
|
56
|
+
else Path(work_path).expanduser().resolve()
|
|
55
57
|
)
|
|
56
58
|
self.metadata_path: Path = (
|
|
57
59
|
self.work_path / StorageStructure.CACHE_DIR / StorageStructure.METADATA_DIR
|
|
@@ -208,6 +210,7 @@ class KnowledgeBase:
|
|
|
208
210
|
|
|
209
211
|
cluster_id = f"C{hashlib.sha256(cluster_text.encode('utf-8')).hexdigest()[:10]}"
|
|
210
212
|
|
|
213
|
+
# TODO: Adapt cluster attributes based on real scenarios
|
|
211
214
|
cluster = KnowledgeCluster(
|
|
212
215
|
id=cluster_id,
|
|
213
216
|
name=cluster_name,
|
sirchmunk/llm/prompts.py
CHANGED
|
@@ -169,8 +169,19 @@ Analyze the provided {text_content} and generate a concise summary in the form o
|
|
|
169
169
|
- **User Input**: {user_input}
|
|
170
170
|
- **Search Result Text**: {text_content}
|
|
171
171
|
|
|
172
|
-
###
|
|
172
|
+
### Quality Evaluation
|
|
173
|
+
After generating the summary, evaluate whether this knowledge cluster is worth saving to the persistent cache based on:
|
|
174
|
+
1. Does the search result contain substantial, relevant information for the user input?
|
|
175
|
+
2. Is the content meaningful and not just error messages or "no information found"?
|
|
176
|
+
3. Are there sufficient evidences and context to answer the user's query?
|
|
177
|
+
|
|
178
|
+
If YES to all above, output "true"; otherwise output "false".
|
|
179
|
+
|
|
180
|
+
### Output Format
|
|
181
|
+
<SUMMARY>
|
|
173
182
|
[Generate the Markdown Briefing here]
|
|
183
|
+
</SUMMARY>
|
|
184
|
+
<SHOULD_SAVE>true/false</SHOULD_SAVE>
|
|
174
185
|
"""
|
|
175
186
|
|
|
176
187
|
|
|
@@ -9,7 +9,7 @@ from typing import Any, Dict, List, Literal, Optional, Union
|
|
|
9
9
|
|
|
10
10
|
from loguru import logger
|
|
11
11
|
|
|
12
|
-
from ..utils.constants import GREP_CONCURRENT_LIMIT,
|
|
12
|
+
from ..utils.constants import GREP_CONCURRENT_LIMIT, DEFAULT_SIRCHMUNK_WORK_PATH
|
|
13
13
|
from ..utils.file_utils import StorageStructure
|
|
14
14
|
from .base import BaseRetriever
|
|
15
15
|
|
|
@@ -29,7 +29,7 @@ class GrepRetriever(BaseRetriever):
|
|
|
29
29
|
def __init__(self, work_path: Union[str, Path] = None, **kwargs):
|
|
30
30
|
super().__init__()
|
|
31
31
|
|
|
32
|
-
self.work_path: Path = Path(work_path or
|
|
32
|
+
self.work_path: Path = Path(work_path or DEFAULT_SIRCHMUNK_WORK_PATH).expanduser().resolve()
|
|
33
33
|
self.rga_cache: Path = (
|
|
34
34
|
self.work_path / StorageStructure.CACHE_DIR / StorageStructure.GREP_DIR
|
|
35
35
|
)
|
|
@@ -688,6 +688,190 @@ class GrepRetriever(BaseRetriever):
|
|
|
688
688
|
|
|
689
689
|
return result["stdout"].strip().splitlines() if result["stdout"].strip() else []
|
|
690
690
|
|
|
691
|
+
async def retrieve_by_filename(
|
|
692
|
+
self,
|
|
693
|
+
patterns: Union[str, List[str]],
|
|
694
|
+
path: Union[str, Path, List[str], List[Path], None] = None,
|
|
695
|
+
*,
|
|
696
|
+
case_sensitive: bool = False,
|
|
697
|
+
max_depth: Optional[int] = None,
|
|
698
|
+
include: Optional[List[str]] = None,
|
|
699
|
+
exclude: Optional[List[str]] = None,
|
|
700
|
+
file_type: Optional[str] = None,
|
|
701
|
+
rank: bool = True,
|
|
702
|
+
timeout: float = 60.0,
|
|
703
|
+
) -> List[Dict[str, Any]]:
|
|
704
|
+
"""Search for files by filename patterns (fast file name matching).
|
|
705
|
+
|
|
706
|
+
This method performs filename-only search without reading file contents,
|
|
707
|
+
making it significantly faster than content-based search.
|
|
708
|
+
|
|
709
|
+
Args:
|
|
710
|
+
patterns: Single pattern (str) or list of patterns (List[str]) to match filenames.
|
|
711
|
+
Patterns are treated as regex by default (e.g., "test.*\\.py").
|
|
712
|
+
path: Single path (str/Path) or multiple paths (List[str]/List[Path]) to search in.
|
|
713
|
+
case_sensitive: If True, enable case-sensitive filename matching.
|
|
714
|
+
max_depth: Maximum directory depth to search.
|
|
715
|
+
include: List of glob patterns to include (e.g., ["*.py", "*.md"]).
|
|
716
|
+
exclude: List of glob patterns to exclude (e.g., ["*.pyc", "*.log"]).
|
|
717
|
+
file_type: Search only files of given type (e.g., 'py', 'md').
|
|
718
|
+
rank: If True, rank results by pattern match quality (e.g., exact match > partial match).
|
|
719
|
+
timeout: Maximum time in seconds to wait for the search to complete.
|
|
720
|
+
|
|
721
|
+
Returns:
|
|
722
|
+
List of match objects with structure:
|
|
723
|
+
[
|
|
724
|
+
{
|
|
725
|
+
'path': '/absolute/path/to/file.py',
|
|
726
|
+
'filename': 'file.py',
|
|
727
|
+
'match_score': 1.0, # relevance score (0.0-1.0)
|
|
728
|
+
'type': 'filename_match'
|
|
729
|
+
},
|
|
730
|
+
...
|
|
731
|
+
]
|
|
732
|
+
"""
|
|
733
|
+
# Normalize patterns
|
|
734
|
+
if isinstance(patterns, str):
|
|
735
|
+
patterns = [patterns]
|
|
736
|
+
|
|
737
|
+
logger.debug(f"retrieve_by_filename called with patterns: {patterns}, path: {path}, "
|
|
738
|
+
f"include: {include}, exclude: {exclude}, max_depth: {max_depth}")
|
|
739
|
+
|
|
740
|
+
# Normalize paths
|
|
741
|
+
if path is None:
|
|
742
|
+
paths = ["."]
|
|
743
|
+
elif isinstance(path, (str, Path)):
|
|
744
|
+
paths = [str(path)]
|
|
745
|
+
else:
|
|
746
|
+
paths = [str(p) for p in path]
|
|
747
|
+
|
|
748
|
+
# List all files in the specified paths
|
|
749
|
+
all_files = []
|
|
750
|
+
for search_path in paths:
|
|
751
|
+
try:
|
|
752
|
+
files = await self.list_files(
|
|
753
|
+
path=search_path,
|
|
754
|
+
max_depth=max_depth,
|
|
755
|
+
include=include,
|
|
756
|
+
exclude=exclude,
|
|
757
|
+
file_type=file_type,
|
|
758
|
+
)
|
|
759
|
+
all_files.extend(files)
|
|
760
|
+
except Exception as e:
|
|
761
|
+
logger.warning(f"Failed to list files in {search_path}: {e}")
|
|
762
|
+
continue
|
|
763
|
+
|
|
764
|
+
if not all_files:
|
|
765
|
+
logger.debug("No files found to search")
|
|
766
|
+
return []
|
|
767
|
+
|
|
768
|
+
logger.debug(f"Searching through {len(all_files)} files with patterns: {patterns}")
|
|
769
|
+
|
|
770
|
+
# Filter files by patterns
|
|
771
|
+
results = []
|
|
772
|
+
for file_path in all_files:
|
|
773
|
+
# Get both absolute and relative paths for proper handling
|
|
774
|
+
file_path_obj = Path(file_path)
|
|
775
|
+
filename = file_path_obj.name
|
|
776
|
+
|
|
777
|
+
# Check if filename matches any pattern
|
|
778
|
+
for pattern in patterns:
|
|
779
|
+
try:
|
|
780
|
+
# Compile regex pattern
|
|
781
|
+
flags = 0 if case_sensitive else re.IGNORECASE
|
|
782
|
+
regex = re.compile(pattern, flags)
|
|
783
|
+
|
|
784
|
+
match = regex.search(filename)
|
|
785
|
+
if match:
|
|
786
|
+
logger.debug(f"Pattern '{pattern}' matched file: {filename}")
|
|
787
|
+
|
|
788
|
+
# Calculate match score
|
|
789
|
+
match_score = self._calculate_filename_match_score(
|
|
790
|
+
filename=filename,
|
|
791
|
+
pattern=pattern,
|
|
792
|
+
case_sensitive=case_sensitive
|
|
793
|
+
)
|
|
794
|
+
|
|
795
|
+
# Use absolute path if file exists, otherwise keep original path
|
|
796
|
+
try:
|
|
797
|
+
abs_path = str(file_path_obj.resolve())
|
|
798
|
+
except (OSError, RuntimeError):
|
|
799
|
+
abs_path = str(file_path_obj.absolute()) if file_path_obj.is_absolute() else file_path
|
|
800
|
+
|
|
801
|
+
results.append({
|
|
802
|
+
'path': abs_path,
|
|
803
|
+
'filename': filename,
|
|
804
|
+
'match_score': match_score,
|
|
805
|
+
'type': 'filename_match',
|
|
806
|
+
'matched_pattern': pattern,
|
|
807
|
+
})
|
|
808
|
+
break # Only count each file once (first matching pattern)
|
|
809
|
+
|
|
810
|
+
except re.error as e:
|
|
811
|
+
logger.warning(f"Invalid regex pattern '{pattern}': {e}")
|
|
812
|
+
continue
|
|
813
|
+
|
|
814
|
+
logger.debug(f"Found {len(results)} matching files")
|
|
815
|
+
|
|
816
|
+
# Rank results by match score if requested
|
|
817
|
+
if rank and results:
|
|
818
|
+
results.sort(key=lambda x: x['match_score'], reverse=True)
|
|
819
|
+
|
|
820
|
+
return results
|
|
821
|
+
|
|
822
|
+
@staticmethod
|
|
823
|
+
def _calculate_filename_match_score(
|
|
824
|
+
filename: str,
|
|
825
|
+
pattern: str,
|
|
826
|
+
case_sensitive: bool = False
|
|
827
|
+
) -> float:
|
|
828
|
+
"""Calculate relevance score for filename pattern match.
|
|
829
|
+
|
|
830
|
+
Args:
|
|
831
|
+
filename: The filename that matched
|
|
832
|
+
pattern: The regex pattern that was matched
|
|
833
|
+
case_sensitive: Whether the match was case-sensitive
|
|
834
|
+
|
|
835
|
+
Returns:
|
|
836
|
+
Score between 0.0 and 1.0, where:
|
|
837
|
+
- 1.0 = exact match (highest priority)
|
|
838
|
+
- 0.9 = exact match with different case
|
|
839
|
+
- 0.7-0.8 = starts with pattern
|
|
840
|
+
- 0.5-0.6 = contains pattern
|
|
841
|
+
- 0.3-0.4 = partial regex match
|
|
842
|
+
"""
|
|
843
|
+
# Normalize for comparison
|
|
844
|
+
fn_lower = filename.lower()
|
|
845
|
+
pattern_lower = pattern.lower()
|
|
846
|
+
|
|
847
|
+
# Remove regex special characters for literal comparison
|
|
848
|
+
pattern_literal = re.sub(r'[.*+?^${}()|[\]\\]', '', pattern)
|
|
849
|
+
pattern_literal_lower = pattern_literal.lower()
|
|
850
|
+
|
|
851
|
+
# Exact match (case-sensitive)
|
|
852
|
+
if filename == pattern or filename == pattern_literal:
|
|
853
|
+
return 1.0
|
|
854
|
+
|
|
855
|
+
# Exact match (case-insensitive)
|
|
856
|
+
if not case_sensitive and (fn_lower == pattern_lower or fn_lower == pattern_literal_lower):
|
|
857
|
+
return 0.9
|
|
858
|
+
|
|
859
|
+
# Starts with pattern
|
|
860
|
+
if filename.startswith(pattern_literal):
|
|
861
|
+
return 0.8
|
|
862
|
+
if fn_lower.startswith(pattern_literal_lower):
|
|
863
|
+
return 0.75
|
|
864
|
+
|
|
865
|
+
# Contains pattern (full)
|
|
866
|
+
if pattern_literal in filename:
|
|
867
|
+
return 0.6
|
|
868
|
+
if pattern_literal_lower in fn_lower:
|
|
869
|
+
return 0.55
|
|
870
|
+
|
|
871
|
+
# Partial match (proportional to match length)
|
|
872
|
+
match_ratio = len(pattern_literal) / max(len(filename), 1)
|
|
873
|
+
return 0.3 + (match_ratio * 0.2) # Score between 0.3 and 0.5
|
|
874
|
+
|
|
691
875
|
def file_types(self) -> Dict[str, List[str]]:
|
|
692
876
|
"""List supported file types and their associated globs/extensions.
|
|
693
877
|
|
sirchmunk/scan/file_scanner.py
CHANGED
|
@@ -58,9 +58,9 @@ class FileScanner(BaseScanner):
|
|
|
58
58
|
corpus_path = [corpus_path]
|
|
59
59
|
self.corpus_paths: List[Path] = [Path(p).resolve() for p in corpus_path]
|
|
60
60
|
|
|
61
|
-
# Set work and metadata paths
|
|
61
|
+
# Set work and metadata paths (expand ~ and resolve to absolute path)
|
|
62
62
|
self.work_path: Path = (
|
|
63
|
-
Path.cwd() if work_path is None else Path(work_path).resolve()
|
|
63
|
+
Path.cwd() if work_path is None else Path(work_path).expanduser().resolve()
|
|
64
64
|
)
|
|
65
65
|
self.metadata_path: Path = (
|
|
66
66
|
self.work_path / StorageStructure.CACHE_DIR / StorageStructure.METADATA_DIR
|
sirchmunk/schema/knowledge.py
CHANGED
|
@@ -230,6 +230,10 @@ class KnowledgeCluster:
|
|
|
230
230
|
# Used to track which sources contributed to this knowledge cluster
|
|
231
231
|
search_results: List[str] = None
|
|
232
232
|
|
|
233
|
+
# Historical queries: list of original user input queries that led to this cluster
|
|
234
|
+
# Used for semantic similarity matching and cluster reuse
|
|
235
|
+
queries: List[str] = None
|
|
236
|
+
|
|
233
237
|
def __post_init__(self):
|
|
234
238
|
if self.related_clusters is None:
|
|
235
239
|
self.related_clusters = []
|
|
@@ -237,6 +241,9 @@ class KnowledgeCluster:
|
|
|
237
241
|
if self.search_results is None:
|
|
238
242
|
self.search_results = []
|
|
239
243
|
|
|
244
|
+
if self.queries is None:
|
|
245
|
+
self.queries = []
|
|
246
|
+
|
|
240
247
|
if self.create_time is None:
|
|
241
248
|
self.create_time = datetime.now(timezone.utc)
|
|
242
249
|
|
|
@@ -246,6 +253,117 @@ class KnowledgeCluster:
|
|
|
246
253
|
if self.version is None:
|
|
247
254
|
self.version = 0
|
|
248
255
|
|
|
256
|
+
def __repr__(self) -> str:
|
|
257
|
+
"""
|
|
258
|
+
Return a concise representation for debugging.
|
|
259
|
+
"""
|
|
260
|
+
# Get content length
|
|
261
|
+
content_len = 0
|
|
262
|
+
if isinstance(self.content, str):
|
|
263
|
+
content_len = len(self.content)
|
|
264
|
+
elif isinstance(self.content, list):
|
|
265
|
+
content_len = sum(len(c) for c in self.content)
|
|
266
|
+
|
|
267
|
+
return (
|
|
268
|
+
f"KnowledgeCluster(id={self.id!r}, name={self.name!r}, "
|
|
269
|
+
f"version={self.version}, lifecycle={self.lifecycle.value}, "
|
|
270
|
+
f"evidences={len(self.evidences)}, queries={len(self.queries)}, "
|
|
271
|
+
f"content_len={content_len}, search_results={len(self.search_results)})"
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
def __str__(self) -> str:
|
|
275
|
+
"""
|
|
276
|
+
Return a human-readable string representation.
|
|
277
|
+
"""
|
|
278
|
+
separator = "─" * 70 # Horizontal separator line
|
|
279
|
+
|
|
280
|
+
# Extract description text
|
|
281
|
+
desc_text = ""
|
|
282
|
+
if isinstance(self.description, str):
|
|
283
|
+
desc_text = self.description
|
|
284
|
+
elif isinstance(self.description, list):
|
|
285
|
+
desc_preview = []
|
|
286
|
+
for i, item in enumerate(self.description, 1):
|
|
287
|
+
desc_preview.append(f" [{i}] {item}")
|
|
288
|
+
desc_text = "\n".join(desc_preview)
|
|
289
|
+
|
|
290
|
+
# Extract content text
|
|
291
|
+
content_text = ""
|
|
292
|
+
if isinstance(self.content, str):
|
|
293
|
+
content_text = self.content
|
|
294
|
+
elif isinstance(self.content, list):
|
|
295
|
+
content_text = self.content[0] if self.content else "" # Preview first item
|
|
296
|
+
|
|
297
|
+
# Build basic info
|
|
298
|
+
lines = [
|
|
299
|
+
f"━━━ KnowledgeCluster: {self.name} ━━━",
|
|
300
|
+
f"ID: {self.id}",
|
|
301
|
+
f"Description:\n{desc_text}" if desc_text else "Description: N/A",
|
|
302
|
+
f"Lifecycle: {self.lifecycle.value} | Version: {self.version}",
|
|
303
|
+
f"Confidence: {self.confidence:.3f}" if self.confidence else "Confidence: N/A",
|
|
304
|
+
]
|
|
305
|
+
|
|
306
|
+
# Add content preview
|
|
307
|
+
if content_text:
|
|
308
|
+
lines.append(separator)
|
|
309
|
+
lines.append(f"Content Preview:\n{content_text}")
|
|
310
|
+
|
|
311
|
+
# Add evidences with preview (max 5)
|
|
312
|
+
if self.evidences:
|
|
313
|
+
lines.append(separator)
|
|
314
|
+
lines.append(f"Evidences ({len(self.evidences)} total):")
|
|
315
|
+
for i, evidence in enumerate(self.evidences[:5], 1):
|
|
316
|
+
file_path = str(evidence.file_or_url)
|
|
317
|
+
# Shorten path if too long
|
|
318
|
+
if len(file_path) > 60:
|
|
319
|
+
file_path = "..." + file_path[-57:]
|
|
320
|
+
summary_preview = evidence.summary[:80] + "..." if len(evidence.summary) > 80 else evidence.summary
|
|
321
|
+
lines.append(f" [{i}] {file_path}")
|
|
322
|
+
lines.append(f" {summary_preview}")
|
|
323
|
+
lines.append(f" Snippets: {len(evidence.snippets)}, Found: {evidence.is_found}")
|
|
324
|
+
if len(self.evidences) > 5:
|
|
325
|
+
lines.append(f" ... (+{len(self.evidences) - 5} more evidences)")
|
|
326
|
+
|
|
327
|
+
# Add optional fields
|
|
328
|
+
has_optional_fields = False
|
|
329
|
+
optional_lines = []
|
|
330
|
+
|
|
331
|
+
if self.hotness is not None:
|
|
332
|
+
optional_lines.append(f"Hotness: {self.hotness:.3f}")
|
|
333
|
+
has_optional_fields = True
|
|
334
|
+
|
|
335
|
+
if self.abstraction_level:
|
|
336
|
+
optional_lines.append(f"Abstraction: {self.abstraction_level.name}")
|
|
337
|
+
has_optional_fields = True
|
|
338
|
+
|
|
339
|
+
if self.queries:
|
|
340
|
+
queries_preview = ", ".join(f'"{q}"' for q in self.queries[:3])
|
|
341
|
+
if len(self.queries) > 3:
|
|
342
|
+
queries_preview += f" (+{len(self.queries) - 3} more)"
|
|
343
|
+
optional_lines.append(f"Related Queries: {queries_preview}")
|
|
344
|
+
has_optional_fields = True
|
|
345
|
+
|
|
346
|
+
if has_optional_fields:
|
|
347
|
+
lines.append(separator)
|
|
348
|
+
lines.extend(optional_lines)
|
|
349
|
+
|
|
350
|
+
# Add search results
|
|
351
|
+
if self.search_results:
|
|
352
|
+
lines.append(separator)
|
|
353
|
+
lines.append(f"Search Results ({len(self.search_results)} files):")
|
|
354
|
+
for i, result in enumerate(self.search_results[:5], 1):
|
|
355
|
+
result_preview = result[:80] + "..." if len(result) > 80 else result
|
|
356
|
+
lines.append(f" [{i}] {result_preview}")
|
|
357
|
+
if len(self.search_results) > 5:
|
|
358
|
+
lines.append(f" ... (+{len(self.search_results) - 5} more)")
|
|
359
|
+
|
|
360
|
+
# Add timestamp
|
|
361
|
+
if self.last_modified:
|
|
362
|
+
lines.append(separator)
|
|
363
|
+
lines.append(f"Last Modified: {self.last_modified.strftime('%Y-%m-%d %H:%M:%S')}")
|
|
364
|
+
|
|
365
|
+
return "\n".join(lines)
|
|
366
|
+
|
|
249
367
|
@property
|
|
250
368
|
def primary_evidence_files(self) -> Set[str]:
|
|
251
369
|
"""Return set of unique file IDs backing this cluster — useful for evidence-layer prefetch."""
|
|
@@ -279,40 +397,6 @@ class KnowledgeCluster:
|
|
|
279
397
|
"version": self.version,
|
|
280
398
|
"related_clusters": [rc.to_dict() for rc in self.related_clusters],
|
|
281
399
|
"search_results": self.search_results,
|
|
400
|
+
"queries": self.queries,
|
|
282
401
|
}
|
|
283
402
|
|
|
284
|
-
|
|
285
|
-
if __name__ == "__main__":
|
|
286
|
-
|
|
287
|
-
# Create instance
|
|
288
|
-
cluster = KnowledgeCluster(
|
|
289
|
-
id="C1001",
|
|
290
|
-
name="Test Cluster",
|
|
291
|
-
description=["A desc from perspective A.", "A desc from perspective B."],
|
|
292
|
-
content="Detailed content of the knowledge cluster.",
|
|
293
|
-
scripts=["print('Hello World')"],
|
|
294
|
-
resources=[
|
|
295
|
-
{"type": "url", "value": "https://example.com"},
|
|
296
|
-
{"type": "file", "value": "/data/image1.png"},
|
|
297
|
-
],
|
|
298
|
-
patterns=["pattern A", "pattern B"],
|
|
299
|
-
constraints=[Constraint("x > 0", "low", "x must be positive")],
|
|
300
|
-
evidences=[
|
|
301
|
-
EvidenceUnit(
|
|
302
|
-
doc_id="doc1",
|
|
303
|
-
file_or_url=Path("/data/file.txt"),
|
|
304
|
-
segment={"text": "supporting text", "type": "match", "line_number": 10},
|
|
305
|
-
score=0.9,
|
|
306
|
-
extracted_at=datetime(2025, 1, 1),
|
|
307
|
-
)
|
|
308
|
-
],
|
|
309
|
-
confidence=0.85,
|
|
310
|
-
abstraction_level=AbstractionLevel.PRINCIPLE,
|
|
311
|
-
landmark_potential=0.6,
|
|
312
|
-
hotness=0.4,
|
|
313
|
-
lifecycle=Lifecycle.STABLE,
|
|
314
|
-
create_time=datetime(2025, 1, 1),
|
|
315
|
-
last_modified=datetime(2025, 1, 2),
|
|
316
|
-
)
|
|
317
|
-
|
|
318
|
-
print(cluster.to_dict())
|