memplex 3.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memnex/__init__.py +31 -0
- memnex/__main__.py +6 -0
- memnex/_plugin/.claude-plugin/plugin.json +24 -0
- memnex/_plugin/.mcp.json +9 -0
- memnex/_plugin/__init__.py +0 -0
- memnex/_plugin/hooks/hooks.json +43 -0
- memnex/_plugin/scripts/hook-runner.py +166 -0
- memnex/_plugin/skills/mem-explore/SKILL.md +83 -0
- memnex/_plugin/skills/mem-manage/SKILL.md +92 -0
- memnex/_plugin/skills/mem-search/SKILL.md +85 -0
- memnex/_plugin/skills/mem-write/SKILL.md +78 -0
- memnex/adapters/__init__.py +14 -0
- memnex/adapters/claude_skill.py +169 -0
- memnex/adapters/cli.py +525 -0
- memnex/adapters/http_api.py +314 -0
- memnex/adapters/mcp_server.py +448 -0
- memnex/compaction.py +563 -0
- memnex/config.py +366 -0
- memnex/core/__init__.py +13 -0
- memnex/core/associator/__init__.py +8 -0
- memnex/core/associator/domain_classifier.py +75 -0
- memnex/core/associator/entity_aligner.py +127 -0
- memnex/core/associator/ref_linker.py +197 -0
- memnex/core/associator/term_mapper.py +77 -0
- memnex/core/dictionaries/__init__.py +50 -0
- memnex/core/engine.py +667 -0
- memnex/core/extractors/__init__.py +15 -0
- memnex/core/extractors/docx.py +97 -0
- memnex/core/extractors/image.py +233 -0
- memnex/core/extractors/markdown.py +139 -0
- memnex/core/extractors/pdf.py +133 -0
- memnex/core/extractors/vision_mapper.py +131 -0
- memnex/core/handlers/__init__.py +7 -0
- memnex/core/handlers/clipboard.py +40 -0
- memnex/core/handlers/file_handler.py +62 -0
- memnex/core/handlers/url_handler.py +132 -0
- memnex/llm/__init__.py +25 -0
- memnex/llm/enhancer.py +226 -0
- memnex/llm/fallback_chain.py +87 -0
- memnex/llm/injection_guard.py +178 -0
- memnex/llm/provider.py +130 -0
- memnex/llm/providers/__init__.py +22 -0
- memnex/llm/providers/anthropic.py +135 -0
- memnex/llm/providers/local.py +135 -0
- memnex/llm/providers/rule_based.py +68 -0
- memnex/llm/sanitizer.py +67 -0
- memnex/models/__init__.py +68 -0
- memnex/models/feedback.py +42 -0
- memnex/models/graph.py +33 -0
- memnex/models/memory.py +102 -0
- memnex/models/misc.py +185 -0
- memnex/models/paragraph.py +45 -0
- memnex/models/search.py +51 -0
- memnex/models/source.py +23 -0
- memnex/models/task.py +62 -0
- memnex/processing/__init__.py +1 -0
- memnex/processing/graph_builder.py +278 -0
- memnex/processing/merger/__init__.py +6 -0
- memnex/processing/merger/confidence_calculator.py +127 -0
- memnex/processing/merger/conflict_resolver.py +116 -0
- memnex/retrieval/__init__.py +1 -0
- memnex/retrieval/dedup.py +386 -0
- memnex/retrieval/embedding.py +289 -0
- memnex/retrieval/reranker.py +299 -0
- memnex/service.py +902 -0
- memnex/storage/__init__.py +65 -0
- memnex/storage/base.py +132 -0
- memnex/storage/changelog.py +106 -0
- memnex/storage/feedback.py +486 -0
- memnex/storage/lite/__init__.py +5 -0
- memnex/storage/lite/store.py +606 -0
- memnex/storage/vector.py +265 -0
- memnex/wiki/__init__.py +11 -0
- memnex/wiki/community.py +221 -0
- memnex/wiki/compiler.py +545 -0
- memnex/wiki/generator.py +270 -0
- memnex/wiki/search.py +282 -0
- memnex/worker.py +412 -0
- memplex-3.2.0.dist-info/METADATA +37 -0
- memplex-3.2.0.dist-info/RECORD +83 -0
- memplex-3.2.0.dist-info/WHEEL +5 -0
- memplex-3.2.0.dist-info/entry_points.txt +2 -0
- memplex-3.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"""Map Vision LLM components to L2 Function structures."""
|
|
2
|
+
|
|
3
|
+
from typing import List, Optional, Dict, Any
|
|
4
|
+
from memnex.models.memory import Function
|
|
5
|
+
from memnex.models.misc import FieldValue
|
|
6
|
+
import re
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class VisionMapper:
|
|
10
|
+
"""Converts Vision LLM output to L2 Function entities."""
|
|
11
|
+
|
|
12
|
+
COMPONENT_TYPE_MAP = {
|
|
13
|
+
"button": "button",
|
|
14
|
+
"nav": "navigation",
|
|
15
|
+
"navbar": "navigation",
|
|
16
|
+
"input": "input_field",
|
|
17
|
+
"textfield": "input_field",
|
|
18
|
+
"card": "card",
|
|
19
|
+
"kpi": "metric_card",
|
|
20
|
+
"chart": "chart",
|
|
21
|
+
"graph": "chart",
|
|
22
|
+
"table": "table",
|
|
23
|
+
"form": "form",
|
|
24
|
+
"modal": "modal",
|
|
25
|
+
"dialog": "modal",
|
|
26
|
+
"sidebar": "sidebar",
|
|
27
|
+
"header": "header",
|
|
28
|
+
"footer": "footer",
|
|
29
|
+
"label": "label",
|
|
30
|
+
"text": "text",
|
|
31
|
+
"icon": "icon",
|
|
32
|
+
"image": "image",
|
|
33
|
+
"link": "link",
|
|
34
|
+
"menu": "menu",
|
|
35
|
+
"dropdown": "dropdown",
|
|
36
|
+
"checkbox": "checkbox",
|
|
37
|
+
"radio": "radio",
|
|
38
|
+
"switch": "switch",
|
|
39
|
+
"slider": "slider",
|
|
40
|
+
"tab": "tab",
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
def vision_to_functions(
|
|
44
|
+
self,
|
|
45
|
+
vision_result: dict,
|
|
46
|
+
source_id: str = "vision"
|
|
47
|
+
) -> List[Function]:
|
|
48
|
+
"""Convert Vision LLM components to L2 Function objects."""
|
|
49
|
+
components = vision_result.get("components", [])
|
|
50
|
+
functions = []
|
|
51
|
+
|
|
52
|
+
for i, comp in enumerate(components):
|
|
53
|
+
func = self._component_to_function(comp, i, source_id, vision_result)
|
|
54
|
+
if func:
|
|
55
|
+
functions.append(func)
|
|
56
|
+
|
|
57
|
+
return functions
|
|
58
|
+
|
|
59
|
+
def _component_to_function(
|
|
60
|
+
self,
|
|
61
|
+
component: dict,
|
|
62
|
+
index: int,
|
|
63
|
+
source_id: str,
|
|
64
|
+
vision_result: dict
|
|
65
|
+
) -> Optional[Function]:
|
|
66
|
+
"""Convert a single Vision component to Function."""
|
|
67
|
+
comp_type = component.get("type", "unknown")
|
|
68
|
+
label = component.get("label", "")
|
|
69
|
+
function_name = component.get("function")
|
|
70
|
+
data = component.get("data", {})
|
|
71
|
+
|
|
72
|
+
if not label and not function_name:
|
|
73
|
+
return None
|
|
74
|
+
|
|
75
|
+
if function_name:
|
|
76
|
+
name = label if label else function_name
|
|
77
|
+
normalized = self._normalize_name(function_name)
|
|
78
|
+
trigger_desc = f"点击 {label} 按钮" if comp_type in ("button", "nav") else f"与 {label} 交互"
|
|
79
|
+
else:
|
|
80
|
+
name = label
|
|
81
|
+
normalized = self._normalize_name(label)
|
|
82
|
+
trigger_desc = f"查看 {label}"
|
|
83
|
+
|
|
84
|
+
action_desc = self._build_action(component)
|
|
85
|
+
|
|
86
|
+
func = Function(
|
|
87
|
+
id=f"vision_{source_id}_{index:03d}",
|
|
88
|
+
name=name,
|
|
89
|
+
name_normalized=normalized,
|
|
90
|
+
source_paragraphs=[source_id],
|
|
91
|
+
trigger=[FieldValue(desc=trigger_desc)],
|
|
92
|
+
condition=[],
|
|
93
|
+
action=[FieldValue(desc=action_desc)],
|
|
94
|
+
benefit=[],
|
|
95
|
+
confidence=0.85,
|
|
96
|
+
attributes={
|
|
97
|
+
"component_type": self.COMPONENT_TYPE_MAP.get(comp_type, comp_type),
|
|
98
|
+
"vision_data": data,
|
|
99
|
+
"layout": vision_result.get("layout"),
|
|
100
|
+
"page_type": vision_result.get("page_type"),
|
|
101
|
+
}
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
return func
|
|
105
|
+
|
|
106
|
+
def _normalize_name(self, name: str) -> str:
|
|
107
|
+
"""Normalize name to snake_case."""
|
|
108
|
+
normalized = re.sub(r'[^a-zA-Z0-9一-鿿]', '_', name)
|
|
109
|
+
normalized = normalized.strip('_').lower()
|
|
110
|
+
normalized = re.sub(r'_+', '_', normalized)
|
|
111
|
+
return normalized
|
|
112
|
+
|
|
113
|
+
def _build_action(self, component: dict) -> str:
|
|
114
|
+
"""Build action description from component."""
|
|
115
|
+
comp_type = component.get("type", "")
|
|
116
|
+
label = component.get("label", "")
|
|
117
|
+
|
|
118
|
+
action_map = {
|
|
119
|
+
"button": f"点击 {label}",
|
|
120
|
+
"nav": f"导航到 {label}",
|
|
121
|
+
"navbar": f"导航到 {label}",
|
|
122
|
+
"input": f"输入 {label}",
|
|
123
|
+
"textfield": f"输入 {label}",
|
|
124
|
+
"form": f"提交 {label} 表单",
|
|
125
|
+
"link": f"跳转 {label}",
|
|
126
|
+
"menu": f"打开 {label} 菜单",
|
|
127
|
+
"dropdown": f"选择 {label}",
|
|
128
|
+
"modal": f"打开 {label} 弹窗",
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
return action_map.get(comp_type, f"与 {label} 交互")
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""Handle text paste input."""
|
|
2
|
+
|
|
3
|
+
from typing import List, Tuple
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class ClipboardHandler:
|
|
7
|
+
"""Handles pasted text content."""
|
|
8
|
+
|
|
9
|
+
def parse(self, content: str) -> List[Tuple[str, str]]:
|
|
10
|
+
"""
|
|
11
|
+
Parse pasted content.
|
|
12
|
+
|
|
13
|
+
Returns:
|
|
14
|
+
List of (content_type, content) tuples.
|
|
15
|
+
content_type: "markdown", "text"
|
|
16
|
+
"""
|
|
17
|
+
if not content or not content.strip():
|
|
18
|
+
return []
|
|
19
|
+
|
|
20
|
+
content = content.strip()
|
|
21
|
+
|
|
22
|
+
if self._is_markdown(content):
|
|
23
|
+
return [("markdown", content)]
|
|
24
|
+
else:
|
|
25
|
+
return [("text", content)]
|
|
26
|
+
|
|
27
|
+
def _is_markdown(self, content: str) -> bool:
|
|
28
|
+
"""Simple markdown detection."""
|
|
29
|
+
markdown_indicators = [
|
|
30
|
+
'#',
|
|
31
|
+
'```',
|
|
32
|
+
'- ',
|
|
33
|
+
'* ',
|
|
34
|
+
'[ ]',
|
|
35
|
+
'**',
|
|
36
|
+
'__',
|
|
37
|
+
'```'
|
|
38
|
+
]
|
|
39
|
+
return any(content.startswith(ind) or f'\n{ind}' in content
|
|
40
|
+
for ind in markdown_indicators)
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
"""Handle local file input."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import List, Tuple, Optional
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class FileHandler:
|
|
9
|
+
"""Handles local file reading."""
|
|
10
|
+
|
|
11
|
+
SUPPORTED_EXTENSIONS = {
|
|
12
|
+
'.md', '.markdown', '.txt',
|
|
13
|
+
'.png', '.jpg', '.jpeg', '.gif', '.bmp', '.pdf',
|
|
14
|
+
'.docx',
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
def can_handle(self, path: str) -> bool:
|
|
18
|
+
"""Check if file is supported."""
|
|
19
|
+
ext = Path(path).suffix.lower()
|
|
20
|
+
return ext in self.SUPPORTED_EXTENSIONS
|
|
21
|
+
|
|
22
|
+
def read(self, path: str) -> Optional[Tuple[str, str]]:
|
|
23
|
+
"""
|
|
24
|
+
Read file content.
|
|
25
|
+
|
|
26
|
+
Returns:
|
|
27
|
+
(content_type, content) or None if unsupported
|
|
28
|
+
"""
|
|
29
|
+
if not os.path.exists(path):
|
|
30
|
+
return None
|
|
31
|
+
|
|
32
|
+
ext = Path(path).suffix.lower()
|
|
33
|
+
|
|
34
|
+
if ext in ('.md', '.markdown', '.txt'):
|
|
35
|
+
with open(path, 'r', encoding='utf-8') as f:
|
|
36
|
+
content = f.read()
|
|
37
|
+
return ("markdown" if ext in ('.md', '.markdown') else "text", content)
|
|
38
|
+
|
|
39
|
+
if ext in ('.png', '.jpg', '.jpeg', '.gif', '.bmp'):
|
|
40
|
+
return ("image", path)
|
|
41
|
+
|
|
42
|
+
if ext == '.pdf':
|
|
43
|
+
return ("pdf", path)
|
|
44
|
+
|
|
45
|
+
if ext == '.docx':
|
|
46
|
+
return ("docx", path)
|
|
47
|
+
|
|
48
|
+
return None
|
|
49
|
+
|
|
50
|
+
def list_files(self, directory: str, recursive: bool = False) -> List[str]:
|
|
51
|
+
"""List supported files in directory."""
|
|
52
|
+
files = []
|
|
53
|
+
path = Path(directory)
|
|
54
|
+
|
|
55
|
+
if recursive:
|
|
56
|
+
for ext in self.SUPPORTED_EXTENSIONS:
|
|
57
|
+
files.extend([str(p) for p in path.rglob(f'*{ext}')])
|
|
58
|
+
else:
|
|
59
|
+
for ext in self.SUPPORTED_EXTENSIONS:
|
|
60
|
+
files.extend([str(p) for p in path.glob(f'*{ext}')])
|
|
61
|
+
|
|
62
|
+
return files
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"""Handle remote URL input."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
import tempfile
|
|
5
|
+
import os
|
|
6
|
+
from typing import Optional, Tuple
|
|
7
|
+
from urllib.parse import urlparse
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class URLHandler:
|
|
11
|
+
"""Handles remote URL input and type resolution."""
|
|
12
|
+
|
|
13
|
+
URL_TYPE_PATTERNS = {
|
|
14
|
+
"pdf": [r"\.pdf$", r"/[^/]+\.pdf", r"\?.*\.pdf"],
|
|
15
|
+
"markdown": [r"\.md$", r"\.markdown$", r"\.mdown$"],
|
|
16
|
+
"html": [r"\.html?$", r"\.htm$"],
|
|
17
|
+
"image": [r"\.(png|jpg|jpeg|gif|bmp|webp)$"],
|
|
18
|
+
"docx": [r"\.docx$"],
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
DOMAIN_PARSERS = {
|
|
22
|
+
"github.com": "github",
|
|
23
|
+
"gist.github.com": "gist",
|
|
24
|
+
"confluence": "confluence",
|
|
25
|
+
"notion.so": "notion",
|
|
26
|
+
"notion.site": "notion",
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
def can_handle(self, path: str) -> bool:
|
|
30
|
+
"""Check if input is a URL."""
|
|
31
|
+
if not path:
|
|
32
|
+
return False
|
|
33
|
+
|
|
34
|
+
url_pattern = re.compile(
|
|
35
|
+
r'^https?://'
|
|
36
|
+
r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|'
|
|
37
|
+
r'localhost|'
|
|
38
|
+
r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})'
|
|
39
|
+
r'(?::\d+)?'
|
|
40
|
+
r'(?:/?|[/?]\S+)$', re.IGNORECASE)
|
|
41
|
+
|
|
42
|
+
return bool(url_pattern.match(path))
|
|
43
|
+
|
|
44
|
+
def resolve_type(self, url: str) -> str:
|
|
45
|
+
"""Resolve URL to content type based on extension."""
|
|
46
|
+
url_lower = url.lower()
|
|
47
|
+
|
|
48
|
+
for content_type, patterns in self.URL_TYPE_PATTERNS.items():
|
|
49
|
+
for pattern in patterns:
|
|
50
|
+
if re.search(pattern, url_lower):
|
|
51
|
+
return content_type
|
|
52
|
+
|
|
53
|
+
return "html"
|
|
54
|
+
|
|
55
|
+
def get_parser_type(self, url: str) -> str:
|
|
56
|
+
"""Get the appropriate parser type for URL."""
|
|
57
|
+
parsed = urlparse(url)
|
|
58
|
+
domain = parsed.netloc.lower()
|
|
59
|
+
|
|
60
|
+
for key, parser in self.DOMAIN_PARSERS.items():
|
|
61
|
+
if key in domain:
|
|
62
|
+
return parser
|
|
63
|
+
|
|
64
|
+
return "generic"
|
|
65
|
+
|
|
66
|
+
def extract_filename(self, url: str) -> Optional[str]:
|
|
67
|
+
"""Extract filename from URL path."""
|
|
68
|
+
parsed = urlparse(url)
|
|
69
|
+
path = parsed.path
|
|
70
|
+
|
|
71
|
+
if "/" in path:
|
|
72
|
+
filename = path.rsplit("/", 1)[-1]
|
|
73
|
+
if filename:
|
|
74
|
+
return filename
|
|
75
|
+
|
|
76
|
+
return None
|
|
77
|
+
|
|
78
|
+
def fetch(self, url: str) -> Optional[Tuple[str, str]]:
|
|
79
|
+
"""
|
|
80
|
+
Fetch content from URL.
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
(content_type, content_or_path) or None if fetch failed
|
|
84
|
+
"""
|
|
85
|
+
import urllib.request
|
|
86
|
+
import urllib.error
|
|
87
|
+
|
|
88
|
+
try:
|
|
89
|
+
req = urllib.request.Request(
|
|
90
|
+
url,
|
|
91
|
+
headers={
|
|
92
|
+
'User-Agent': 'Mozilla/5.0 (compatible; Content-Extractor/1.0)'
|
|
93
|
+
}
|
|
94
|
+
)
|
|
95
|
+
with urllib.request.urlopen(req, timeout=30) as response:
|
|
96
|
+
content_type = response.headers.get('Content-Type', '').lower()
|
|
97
|
+
data = response.read()
|
|
98
|
+
|
|
99
|
+
if 'text' in content_type or 'markdown' in content_type:
|
|
100
|
+
text = data.decode('utf-8', errors='replace')
|
|
101
|
+
resolved_type = self.resolve_type(url)
|
|
102
|
+
return (resolved_type if resolved_type != 'html' else 'text', text)
|
|
103
|
+
|
|
104
|
+
if 'image' in content_type or self.resolve_type(url) == 'image':
|
|
105
|
+
ext = os.path.splitext(self.extract_filename(url) or 'image.png')[1] or '.png'
|
|
106
|
+
temp_file = tempfile.NamedTemporaryFile(suffix=ext, delete=False)
|
|
107
|
+
temp_file.write(data)
|
|
108
|
+
temp_file.close()
|
|
109
|
+
return ("image", temp_file.name)
|
|
110
|
+
|
|
111
|
+
if 'pdf' in content_type or self.resolve_type(url) == 'pdf':
|
|
112
|
+
temp_file = tempfile.NamedTemporaryFile(suffix='.pdf', delete=False)
|
|
113
|
+
temp_file.write(data)
|
|
114
|
+
temp_file.close()
|
|
115
|
+
return ("pdf", temp_file.name)
|
|
116
|
+
|
|
117
|
+
text = data.decode('utf-8', errors='replace')
|
|
118
|
+
return ("html", text)
|
|
119
|
+
|
|
120
|
+
except Exception as e:
|
|
121
|
+
print(f"Failed to fetch URL {url}: {e}")
|
|
122
|
+
return None
|
|
123
|
+
|
|
124
|
+
def cleanup_temp_file(self, path: str) -> bool:
|
|
125
|
+
"""Delete a temp file if it exists."""
|
|
126
|
+
try:
|
|
127
|
+
if path and os.path.exists(path):
|
|
128
|
+
os.unlink(path)
|
|
129
|
+
return True
|
|
130
|
+
except Exception:
|
|
131
|
+
pass
|
|
132
|
+
return False
|
memnex/llm/__init__.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""MemNex LLM provider layer.
|
|
2
|
+
|
|
3
|
+
Exports:
|
|
4
|
+
LLMProvider -- the protocol all providers must satisfy
|
|
5
|
+
FallbackChain -- chain-of-responsibility fallback provider
|
|
6
|
+
LLMEnhancer -- LLM enhancement manager
|
|
7
|
+
LLMPromptSanitizer -- input sanitization for LLM prompts
|
|
8
|
+
IndirectInjectionGuard -- indirect prompt injection protection
|
|
9
|
+
create_provider -- factory function to instantiate providers by name
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from memnex.llm.provider import LLMProvider, create_provider
|
|
13
|
+
from memnex.llm.fallback_chain import FallbackChain
|
|
14
|
+
from memnex.llm.sanitizer import LLMPromptSanitizer
|
|
15
|
+
from memnex.llm.injection_guard import IndirectInjectionGuard
|
|
16
|
+
from memnex.llm.enhancer import LLMEnhancer
|
|
17
|
+
|
|
18
|
+
__all__ = [
|
|
19
|
+
"LLMProvider",
|
|
20
|
+
"FallbackChain",
|
|
21
|
+
"LLMEnhancer",
|
|
22
|
+
"LLMPromptSanitizer",
|
|
23
|
+
"IndirectInjectionGuard",
|
|
24
|
+
"create_provider",
|
|
25
|
+
]
|
memnex/llm/enhancer.py
ADDED
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
"""LLM enhancement manager: coordinates all LLM-augmented pipeline nodes."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from typing import TYPE_CHECKING, List
|
|
8
|
+
|
|
9
|
+
from memnex.models import (
|
|
10
|
+
EnhancedQuery,
|
|
11
|
+
FieldValue,
|
|
12
|
+
Function,
|
|
13
|
+
Summary,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from memnex.config import LLMConfig
|
|
18
|
+
from memnex.llm.provider import LLMProvider
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class LLMEnhancer:
|
|
24
|
+
"""Unified LLM enhancement manager.
|
|
25
|
+
|
|
26
|
+
Orchestrates all LLM-augmented pipeline nodes with per-feature
|
|
27
|
+
configuration switches. When a feature is disabled, a rule-based
|
|
28
|
+
fallback is used instead.
|
|
29
|
+
|
|
30
|
+
Parameters
|
|
31
|
+
----------
|
|
32
|
+
llm_provider:
|
|
33
|
+
An LLMProvider implementation to delegate calls to.
|
|
34
|
+
config:
|
|
35
|
+
LLMConfig controlling which enhancements are active.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
def __init__(self, llm_provider: LLMProvider, config: LLMConfig) -> None:
|
|
39
|
+
self.llm = llm_provider
|
|
40
|
+
self.config = config
|
|
41
|
+
|
|
42
|
+
# -- LLM Enhancement 1: Semantic Extraction -------------------------
|
|
43
|
+
|
|
44
|
+
async def semantic_extract_trigger(self, paragraph: str) -> List[FieldValue]:
|
|
45
|
+
"""Use LLM to semantically extract trigger conditions from a paragraph."""
|
|
46
|
+
from memnex.llm.sanitizer import LLMPromptSanitizer
|
|
47
|
+
|
|
48
|
+
if not self.config.semantic_extraction:
|
|
49
|
+
return self._rule_based_extract(paragraph, "trigger")
|
|
50
|
+
|
|
51
|
+
prompt = LLMPromptSanitizer.build_structured_prompt(
|
|
52
|
+
instruction="Extract trigger conditions from the following paragraph, "
|
|
53
|
+
"focusing on user intent rather than simple keyword matching",
|
|
54
|
+
user_input=paragraph,
|
|
55
|
+
output_schema={
|
|
56
|
+
"triggers": [{"desc": "str", "confidence": "float(0-1)"}]
|
|
57
|
+
},
|
|
58
|
+
)
|
|
59
|
+
result = await self.llm.complete_json(prompt)
|
|
60
|
+
return [
|
|
61
|
+
FieldValue(
|
|
62
|
+
desc=r["desc"],
|
|
63
|
+
sources=["llm_semantic"],
|
|
64
|
+
source_method="llm_semantic",
|
|
65
|
+
weight=r.get("weight", 0.8),
|
|
66
|
+
observation=r.get("confidence", 1.0),
|
|
67
|
+
created_at=datetime.utcnow(),
|
|
68
|
+
)
|
|
69
|
+
for r in result.get("triggers", [])
|
|
70
|
+
]
|
|
71
|
+
|
|
72
|
+
# -- LLM Enhancement 2: Query Enhancement ---------------------------
|
|
73
|
+
|
|
74
|
+
async def enhance_query(self, query: str) -> EnhancedQuery:
|
|
75
|
+
"""Use LLM to understand and expand a user query."""
|
|
76
|
+
from memnex.llm.sanitizer import LLMPromptSanitizer
|
|
77
|
+
|
|
78
|
+
if not self.config.query_enhancement:
|
|
79
|
+
return EnhancedQuery(original=query, expanded=[query], intent="search")
|
|
80
|
+
|
|
81
|
+
prompt = LLMPromptSanitizer.build_structured_prompt(
|
|
82
|
+
instruction="Analyze the user query intent. Return intent type, "
|
|
83
|
+
"expanded queries, and related concepts",
|
|
84
|
+
user_input=query,
|
|
85
|
+
output_schema={
|
|
86
|
+
"intent": "search|understand|compare|relation",
|
|
87
|
+
"expanded_queries": ["str"],
|
|
88
|
+
"related_concepts": ["str"],
|
|
89
|
+
},
|
|
90
|
+
)
|
|
91
|
+
result = await self.llm.complete_json(prompt)
|
|
92
|
+
return EnhancedQuery(
|
|
93
|
+
original=query,
|
|
94
|
+
expanded=result.get("expanded_queries", [query]),
|
|
95
|
+
intent=result.get("intent", "search"),
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# -- LLM Enhancement 2.5: HyDE --------------------------------------
|
|
99
|
+
|
|
100
|
+
async def enhance_query_hyde_text(self, query: str) -> str:
|
|
101
|
+
"""Generate a hypothetical answer text for HyDE embedding.
|
|
102
|
+
|
|
103
|
+
On failure, silently returns the original query so the main
|
|
104
|
+
pipeline is never blocked.
|
|
105
|
+
"""
|
|
106
|
+
from memnex.llm.sanitizer import LLMPromptSanitizer
|
|
107
|
+
|
|
108
|
+
if not self.config.query_enhancement:
|
|
109
|
+
return query
|
|
110
|
+
|
|
111
|
+
prompt = LLMPromptSanitizer.build_structured_prompt(
|
|
112
|
+
instruction="Assume a memory entry fully answers the user query. "
|
|
113
|
+
"Describe the core content of that memory in 2-3 sentences",
|
|
114
|
+
user_input=query,
|
|
115
|
+
output_schema={"hypothetical_memory": "str"},
|
|
116
|
+
)
|
|
117
|
+
try:
|
|
118
|
+
result = await self.llm.complete_json(prompt)
|
|
119
|
+
return result.get("hypothetical_memory", query)
|
|
120
|
+
except Exception:
|
|
121
|
+
return query
|
|
122
|
+
|
|
123
|
+
# -- LLM Enhancement 3: Conflict Resolution -------------------------
|
|
124
|
+
|
|
125
|
+
async def resolve_conflict(self, func1: Function, func2: Function) -> dict:
|
|
126
|
+
"""Use LLM to analyze two conflicting Function versions and propose a merge."""
|
|
127
|
+
from memnex.llm.sanitizer import LLMPromptSanitizer
|
|
128
|
+
|
|
129
|
+
if not self.config.conflict_resolution:
|
|
130
|
+
return self._authority_based_resolve(func1, func2)
|
|
131
|
+
|
|
132
|
+
conflict_data = {
|
|
133
|
+
"v1": {
|
|
134
|
+
"trigger": [fv.desc for fv in func1.trigger],
|
|
135
|
+
"condition": [fv.desc for fv in func1.condition],
|
|
136
|
+
},
|
|
137
|
+
"v2": {
|
|
138
|
+
"trigger": [fv.desc for fv in func2.trigger],
|
|
139
|
+
"condition": [fv.desc for fv in func2.condition],
|
|
140
|
+
},
|
|
141
|
+
}
|
|
142
|
+
prompt = LLMPromptSanitizer.build_structured_prompt(
|
|
143
|
+
instruction="Analyze two conflicting function versions and decide how to merge",
|
|
144
|
+
user_input=__import__("json").dumps(conflict_data, ensure_ascii=False),
|
|
145
|
+
output_schema={
|
|
146
|
+
"decision": "keep_v1|keep_v2|merge",
|
|
147
|
+
"reasoning": "str",
|
|
148
|
+
"merged_function": {},
|
|
149
|
+
},
|
|
150
|
+
)
|
|
151
|
+
result = await self.llm.complete_json(prompt)
|
|
152
|
+
return self._parse_resolution(result)
|
|
153
|
+
|
|
154
|
+
# -- LLM Enhancement 4: Summarization --------------------------------
|
|
155
|
+
|
|
156
|
+
async def summarize(self, memories: list) -> Summary:
|
|
157
|
+
"""Generate a summary from a list of MemoryNode objects."""
|
|
158
|
+
from memnex.llm.sanitizer import LLMPromptSanitizer
|
|
159
|
+
|
|
160
|
+
if not self.config.summarization:
|
|
161
|
+
return Summary(
|
|
162
|
+
key_points=[m.name for m in memories],
|
|
163
|
+
patterns=[],
|
|
164
|
+
changes=[],
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
# Only send structured fields, not raw free text (reduces injection risk)
|
|
168
|
+
summaries = [
|
|
169
|
+
f"{m.name}: {', '.join(fv.desc for fv in getattr(m, 'action', []))}"
|
|
170
|
+
for m in memories
|
|
171
|
+
]
|
|
172
|
+
prompt = LLMPromptSanitizer.build_structured_prompt(
|
|
173
|
+
instruction="Extract key information from the following memories "
|
|
174
|
+
"and generate a concise summary",
|
|
175
|
+
user_input="\n".join(summaries),
|
|
176
|
+
output_schema={
|
|
177
|
+
"key_points": ["str"],
|
|
178
|
+
"patterns": ["str"],
|
|
179
|
+
"changes": ["str"],
|
|
180
|
+
},
|
|
181
|
+
)
|
|
182
|
+
result = await self.llm.complete_json(prompt)
|
|
183
|
+
return Summary(
|
|
184
|
+
key_points=result.get("key_points", []),
|
|
185
|
+
patterns=result.get("patterns", []),
|
|
186
|
+
changes=result.get("changes", []),
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
# -- Private helpers -------------------------------------------------
|
|
190
|
+
|
|
191
|
+
@staticmethod
|
|
192
|
+
def _rule_based_extract(paragraph: str, role: str) -> List[FieldValue]:
|
|
193
|
+
"""Trivial rule-based extraction when LLM is disabled."""
|
|
194
|
+
sentences = [s.strip() for s in paragraph.split(".") if s.strip()]
|
|
195
|
+
return [
|
|
196
|
+
FieldValue(
|
|
197
|
+
desc=s,
|
|
198
|
+
sources=["rule_based"],
|
|
199
|
+
source_method="rule_based",
|
|
200
|
+
weight=0.5,
|
|
201
|
+
)
|
|
202
|
+
for s in sentences[:5]
|
|
203
|
+
]
|
|
204
|
+
|
|
205
|
+
@staticmethod
|
|
206
|
+
def _authority_based_resolve(func1: Function, func2: Function) -> dict:
|
|
207
|
+
"""Fallback conflict resolution based on source authority."""
|
|
208
|
+
priority = {"requirement": 4, "meeting": 3, "code": 2, "wiki": 1}
|
|
209
|
+
p1 = priority.get(
|
|
210
|
+
func1.source_type.value if func1.source_type else "wiki", 1
|
|
211
|
+
)
|
|
212
|
+
p2 = priority.get(
|
|
213
|
+
func2.source_type.value if func2.source_type else "wiki", 1
|
|
214
|
+
)
|
|
215
|
+
if p1 >= p2:
|
|
216
|
+
return {"decision": "keep_v1", "reasoning": "higher source authority"}
|
|
217
|
+
return {"decision": "keep_v2", "reasoning": "higher source authority"}
|
|
218
|
+
|
|
219
|
+
@staticmethod
|
|
220
|
+
def _parse_resolution(result: dict) -> dict:
|
|
221
|
+
"""Normalize the LLM conflict resolution response."""
|
|
222
|
+
return {
|
|
223
|
+
"decision": result.get("decision", "keep_v1"),
|
|
224
|
+
"reasoning": result.get("reasoning", ""),
|
|
225
|
+
"merged_function": result.get("merged_function", {}),
|
|
226
|
+
}
|