memplex 3.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- memnex/__init__.py +31 -0
- memnex/__main__.py +6 -0
- memnex/_plugin/.claude-plugin/plugin.json +24 -0
- memnex/_plugin/.mcp.json +9 -0
- memnex/_plugin/__init__.py +0 -0
- memnex/_plugin/hooks/hooks.json +43 -0
- memnex/_plugin/scripts/hook-runner.py +166 -0
- memnex/_plugin/skills/mem-explore/SKILL.md +83 -0
- memnex/_plugin/skills/mem-manage/SKILL.md +92 -0
- memnex/_plugin/skills/mem-search/SKILL.md +85 -0
- memnex/_plugin/skills/mem-write/SKILL.md +78 -0
- memnex/adapters/__init__.py +14 -0
- memnex/adapters/claude_skill.py +169 -0
- memnex/adapters/cli.py +525 -0
- memnex/adapters/http_api.py +314 -0
- memnex/adapters/mcp_server.py +448 -0
- memnex/compaction.py +563 -0
- memnex/config.py +366 -0
- memnex/core/__init__.py +13 -0
- memnex/core/associator/__init__.py +8 -0
- memnex/core/associator/domain_classifier.py +75 -0
- memnex/core/associator/entity_aligner.py +127 -0
- memnex/core/associator/ref_linker.py +197 -0
- memnex/core/associator/term_mapper.py +77 -0
- memnex/core/dictionaries/__init__.py +50 -0
- memnex/core/engine.py +667 -0
- memnex/core/extractors/__init__.py +15 -0
- memnex/core/extractors/docx.py +97 -0
- memnex/core/extractors/image.py +233 -0
- memnex/core/extractors/markdown.py +139 -0
- memnex/core/extractors/pdf.py +133 -0
- memnex/core/extractors/vision_mapper.py +131 -0
- memnex/core/handlers/__init__.py +7 -0
- memnex/core/handlers/clipboard.py +40 -0
- memnex/core/handlers/file_handler.py +62 -0
- memnex/core/handlers/url_handler.py +132 -0
- memnex/llm/__init__.py +25 -0
- memnex/llm/enhancer.py +226 -0
- memnex/llm/fallback_chain.py +87 -0
- memnex/llm/injection_guard.py +178 -0
- memnex/llm/provider.py +130 -0
- memnex/llm/providers/__init__.py +22 -0
- memnex/llm/providers/anthropic.py +135 -0
- memnex/llm/providers/local.py +135 -0
- memnex/llm/providers/rule_based.py +68 -0
- memnex/llm/sanitizer.py +67 -0
- memnex/models/__init__.py +68 -0
- memnex/models/feedback.py +42 -0
- memnex/models/graph.py +33 -0
- memnex/models/memory.py +102 -0
- memnex/models/misc.py +185 -0
- memnex/models/paragraph.py +45 -0
- memnex/models/search.py +51 -0
- memnex/models/source.py +23 -0
- memnex/models/task.py +62 -0
- memnex/processing/__init__.py +1 -0
- memnex/processing/graph_builder.py +278 -0
- memnex/processing/merger/__init__.py +6 -0
- memnex/processing/merger/confidence_calculator.py +127 -0
- memnex/processing/merger/conflict_resolver.py +116 -0
- memnex/retrieval/__init__.py +1 -0
- memnex/retrieval/dedup.py +386 -0
- memnex/retrieval/embedding.py +289 -0
- memnex/retrieval/reranker.py +299 -0
- memnex/service.py +902 -0
- memnex/storage/__init__.py +65 -0
- memnex/storage/base.py +132 -0
- memnex/storage/changelog.py +106 -0
- memnex/storage/feedback.py +486 -0
- memnex/storage/lite/__init__.py +5 -0
- memnex/storage/lite/store.py +606 -0
- memnex/storage/vector.py +265 -0
- memnex/wiki/__init__.py +11 -0
- memnex/wiki/community.py +221 -0
- memnex/wiki/compiler.py +545 -0
- memnex/wiki/generator.py +270 -0
- memnex/wiki/search.py +282 -0
- memnex/worker.py +412 -0
- memplex-3.2.0.dist-info/METADATA +37 -0
- memplex-3.2.0.dist-info/RECORD +83 -0
- memplex-3.2.0.dist-info/WHEEL +5 -0
- memplex-3.2.0.dist-info/entry_points.txt +2 -0
- memplex-3.2.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,197 @@
|
|
|
1
|
+
"""Cross-document reference extraction and linking."""
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from typing import List, Dict, Tuple, Optional
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class RefLinker:
|
|
8
|
+
"""Extracts and resolves cross-document references."""
|
|
9
|
+
|
|
10
|
+
# Reference patterns
|
|
11
|
+
CROSS_DOC_PATTERNS = [
|
|
12
|
+
r"详见[《\"]?(.+?)[》文档手册]",
|
|
13
|
+
r"参见[《\"]?(.+?)[》\]]",
|
|
14
|
+
r"[《\"]?(.+?)[》\]]\s*[第见]?\s*([0-9.]+)[章]?",
|
|
15
|
+
r"如上所述",
|
|
16
|
+
r"如前所述",
|
|
17
|
+
r"前述",
|
|
18
|
+
r"同上述([A-Za-z0-9_一-龥]+)",
|
|
19
|
+
r"同下述([A-Za-z0-9_一-龥]+)",
|
|
20
|
+
r"同前述([A-Za-z0-9_一-龥]+)",
|
|
21
|
+
r"参见([A-Za-z0-9_一-龥-]+)",
|
|
22
|
+
r"依据([A-Za-z0-9_一-龥-]+)",
|
|
23
|
+
r"按照([A-Za-z0-9_一-龥-]+)",
|
|
24
|
+
r"符合([A-Za-z0-9_一-龥-]+)",
|
|
25
|
+
r"满足([A-Za-z0-9_一-龥-]+)",
|
|
26
|
+
r"参照([A-Za-z0-9_一-龥-]+)",
|
|
27
|
+
r"根据([A-Za-z0-9_一-龥-]+)",
|
|
28
|
+
r"RFC-?(\d+)",
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
SECTION_PATTERNS = [
|
|
32
|
+
r"见第?([0-9.]+)节?",
|
|
33
|
+
r"如图?([0-9]+(?:\.[0-9]+)?)",
|
|
34
|
+
r"参考第?([0-9.]+)节",
|
|
35
|
+
r"第([一二三四五六七八九十零]+)章"
|
|
36
|
+
]
|
|
37
|
+
|
|
38
|
+
CN_DIGIT_MAP = {"一": 1, "二": 2, "三": 3, "四": 4, "五": 5,
|
|
39
|
+
"六": 6, "七": 7, "八": 8, "九": 9, "十": 10, "零": 0}
|
|
40
|
+
|
|
41
|
+
URL_PATTERN = r"https?://[^\s<>\"]+"
|
|
42
|
+
|
|
43
|
+
SEQUENTIAL_PATTERNS = [
|
|
44
|
+
r"之后",
|
|
45
|
+
r"随后",
|
|
46
|
+
r"接下来",
|
|
47
|
+
]
|
|
48
|
+
|
|
49
|
+
BACK_REFERENCE_PATTERNS = [
|
|
50
|
+
"如上所述",
|
|
51
|
+
"如前所述",
|
|
52
|
+
"前述",
|
|
53
|
+
]
|
|
54
|
+
|
|
55
|
+
def extract_references(self, text: str) -> List[Dict]:
|
|
56
|
+
"""
|
|
57
|
+
Extract all types of references from text.
|
|
58
|
+
|
|
59
|
+
Returns:
|
|
60
|
+
List of reference dicts with type, target, and confidence
|
|
61
|
+
"""
|
|
62
|
+
references = []
|
|
63
|
+
|
|
64
|
+
for pattern in self.CROSS_DOC_PATTERNS:
|
|
65
|
+
for match in re.finditer(pattern, text):
|
|
66
|
+
target = match.group(1).strip() if match.lastindex and match.group(1) else match.group(0).strip()
|
|
67
|
+
ref_type = "implicit" if target in self.BACK_REFERENCE_PATTERNS else "cross_doc"
|
|
68
|
+
references.append({
|
|
69
|
+
"type": ref_type,
|
|
70
|
+
"target": target,
|
|
71
|
+
"confidence": 0.95 if ref_type == "cross_doc" else 0.7,
|
|
72
|
+
"match": match.group(0)
|
|
73
|
+
})
|
|
74
|
+
|
|
75
|
+
for pattern in self.SEQUENTIAL_PATTERNS:
|
|
76
|
+
for match in re.finditer(pattern, text):
|
|
77
|
+
references.append({
|
|
78
|
+
"type": "sequential",
|
|
79
|
+
"target": "implicit_next",
|
|
80
|
+
"confidence": 0.6,
|
|
81
|
+
"match": match.group(0)
|
|
82
|
+
})
|
|
83
|
+
|
|
84
|
+
for pattern in self.SECTION_PATTERNS:
|
|
85
|
+
for match in re.finditer(pattern, text):
|
|
86
|
+
section = match.group(1)
|
|
87
|
+
if re.match(r"^[一-鿿]+$", section):
|
|
88
|
+
section_num = 0
|
|
89
|
+
if "十" in section:
|
|
90
|
+
parts = section.split("十")
|
|
91
|
+
if parts[0] == "":
|
|
92
|
+
section_num = 10
|
|
93
|
+
else:
|
|
94
|
+
section_num = self.CN_DIGIT_MAP.get(parts[0], 0) * 10
|
|
95
|
+
if len(parts) > 1 and parts[1]:
|
|
96
|
+
section_num += self.CN_DIGIT_MAP.get(parts[1], 0)
|
|
97
|
+
else:
|
|
98
|
+
section_num = self.CN_DIGIT_MAP.get(section, 0)
|
|
99
|
+
target = f"section_{section_num}"
|
|
100
|
+
else:
|
|
101
|
+
target = f"section_{section}"
|
|
102
|
+
references.append({
|
|
103
|
+
"type": "section",
|
|
104
|
+
"target": target,
|
|
105
|
+
"confidence": 0.9,
|
|
106
|
+
"match": match.group(0)
|
|
107
|
+
})
|
|
108
|
+
|
|
109
|
+
for match in re.finditer(self.URL_PATTERN, text):
|
|
110
|
+
references.append({
|
|
111
|
+
"type": "url",
|
|
112
|
+
"target": match.group(0),
|
|
113
|
+
"confidence": 0.85,
|
|
114
|
+
"match": match.group(0)
|
|
115
|
+
})
|
|
116
|
+
|
|
117
|
+
return references
|
|
118
|
+
|
|
119
|
+
def resolve_reference(
|
|
120
|
+
self,
|
|
121
|
+
ref: Dict,
|
|
122
|
+
known_entities: Dict[str, List[str]]
|
|
123
|
+
) -> Optional[str]:
|
|
124
|
+
"""Resolve reference to entity ID."""
|
|
125
|
+
target = ref["target"]
|
|
126
|
+
|
|
127
|
+
if target in known_entities:
|
|
128
|
+
return known_entities[target][0]
|
|
129
|
+
|
|
130
|
+
target_lower = target.lower()
|
|
131
|
+
for name, ids in known_entities.items():
|
|
132
|
+
if target_lower in name.lower() or name.lower() in target_lower:
|
|
133
|
+
return ids[0]
|
|
134
|
+
|
|
135
|
+
return None
|
|
136
|
+
|
|
137
|
+
def resolve_implicit_reference(
|
|
138
|
+
self,
|
|
139
|
+
ref: Dict,
|
|
140
|
+
known_entities: Dict[str, List[str]],
|
|
141
|
+
context: Optional[Dict] = None
|
|
142
|
+
) -> Tuple[Optional[str], float]:
|
|
143
|
+
"""Resolve implicit reference to entity ID with confidence score."""
|
|
144
|
+
target = ref.get("target", "")
|
|
145
|
+
ref_type = ref.get("type", "")
|
|
146
|
+
confidence = ref.get("confidence", 0.5)
|
|
147
|
+
|
|
148
|
+
if target in self.BACK_REFERENCE_PATTERNS:
|
|
149
|
+
if context and "previous_entity" in context:
|
|
150
|
+
prev = context["previous_entity"]
|
|
151
|
+
if prev in known_entities:
|
|
152
|
+
return known_entities[prev][0], 0.85
|
|
153
|
+
if known_entities:
|
|
154
|
+
first_key = next(iter(known_entities))
|
|
155
|
+
return known_entities[first_key][0], 0.6
|
|
156
|
+
return None, 0.0
|
|
157
|
+
|
|
158
|
+
rfc_match = re.match(r"RFC-?(\d+)", target, re.IGNORECASE)
|
|
159
|
+
if rfc_match:
|
|
160
|
+
rfc_num = rfc_match.group(1)
|
|
161
|
+
for name, ids in known_entities.items():
|
|
162
|
+
name_lower = name.lower()
|
|
163
|
+
if f"rfc_{rfc_num}" in name_lower or f"rfc {rfc_num}" in name_lower or f"rfc-{rfc_num}" in name_lower:
|
|
164
|
+
return ids[0], 0.9
|
|
165
|
+
return None, 0.0
|
|
166
|
+
|
|
167
|
+
if target.startswith("同") and len(target) > 1:
|
|
168
|
+
suffix = target[1:]
|
|
169
|
+
best_match = None
|
|
170
|
+
best_score = 0.0
|
|
171
|
+
for name, ids in known_entities.items():
|
|
172
|
+
if suffix in name:
|
|
173
|
+
score = len(suffix) / max(len(name), 1)
|
|
174
|
+
if score > best_score:
|
|
175
|
+
best_score = score
|
|
176
|
+
best_match = ids[0]
|
|
177
|
+
if best_match:
|
|
178
|
+
return best_match, min(0.5 + best_score * 0.4, 0.85)
|
|
179
|
+
return None, 0.0
|
|
180
|
+
|
|
181
|
+
if ref_type == "sequential" and target == "implicit_next":
|
|
182
|
+
if context and "next_entity" in context:
|
|
183
|
+
next_ent = context["next_entity"]
|
|
184
|
+
if next_ent in known_entities:
|
|
185
|
+
return known_entities[next_ent][0], 0.8
|
|
186
|
+
if len(known_entities) > 1:
|
|
187
|
+
keys = list(known_entities.keys())
|
|
188
|
+
return known_entities[keys[1]][0], 0.5
|
|
189
|
+
return None, 0.0
|
|
190
|
+
|
|
191
|
+
if ref_type == "implicit":
|
|
192
|
+
target_lower = target.lower()
|
|
193
|
+
for name, ids in known_entities.items():
|
|
194
|
+
if target_lower in name.lower() or name.lower() in target_lower:
|
|
195
|
+
return ids[0], 0.7
|
|
196
|
+
|
|
197
|
+
return None, confidence
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""Term-based association using dictionary lookup."""
|
|
2
|
+
|
|
3
|
+
from typing import List, Set, Tuple, Optional
|
|
4
|
+
from memnex.models.memory import Function
|
|
5
|
+
from memnex.models.misc import FieldValue
|
|
6
|
+
from memnex.core.dictionaries import TermDictionary
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TermMapper:
|
|
10
|
+
"""Maps terms between documents using dictionary lookup."""
|
|
11
|
+
|
|
12
|
+
def __init__(self, dictionary: TermDictionary = None):
|
|
13
|
+
self.dictionary = dictionary or TermDictionary()
|
|
14
|
+
|
|
15
|
+
def embed_text(self, text: str) -> Optional[List[float]]:
|
|
16
|
+
"""Generate embedding vector using sentence-transformers (if available)."""
|
|
17
|
+
try:
|
|
18
|
+
from sentence_transformers import SentenceTransformer
|
|
19
|
+
if not hasattr(self, '_embedding_model'):
|
|
20
|
+
self._embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
21
|
+
embedding = self._embedding_model.encode([text])[0]
|
|
22
|
+
return embedding.tolist()
|
|
23
|
+
except ImportError:
|
|
24
|
+
return None
|
|
25
|
+
|
|
26
|
+
def extract_terms(self, text: str) -> Set[str]:
|
|
27
|
+
"""Extract all matching terms from text."""
|
|
28
|
+
return self.dictionary.find_matching_terms(text)
|
|
29
|
+
|
|
30
|
+
def find_associations(
|
|
31
|
+
self,
|
|
32
|
+
source_terms: Set[str],
|
|
33
|
+
target_candidates: List[Function]
|
|
34
|
+
) -> List[Tuple[Function, float]]:
|
|
35
|
+
"""
|
|
36
|
+
Find associations based on term overlap.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
List of (function, confidence) tuples
|
|
40
|
+
"""
|
|
41
|
+
associations = []
|
|
42
|
+
|
|
43
|
+
for func in target_candidates:
|
|
44
|
+
score = self._calculate_term_overlap(source_terms, func)
|
|
45
|
+
if score > 0:
|
|
46
|
+
associations.append((func, score))
|
|
47
|
+
|
|
48
|
+
associations.sort(key=lambda x: x[1], reverse=True)
|
|
49
|
+
return associations
|
|
50
|
+
|
|
51
|
+
def _calculate_term_overlap(self, source_terms: Set[str], func: Function) -> float:
|
|
52
|
+
"""Calculate term overlap score between source and function."""
|
|
53
|
+
if not source_terms:
|
|
54
|
+
return 0.0
|
|
55
|
+
|
|
56
|
+
func_terms = self.extract_terms(func.name)
|
|
57
|
+
func_terms.update(self.extract_terms(func.name_normalized))
|
|
58
|
+
|
|
59
|
+
# Get terms from multi-value fields (List[FieldValue])
|
|
60
|
+
for fv_list in [func.trigger, func.condition, func.action, func.benefit]:
|
|
61
|
+
for fv in fv_list:
|
|
62
|
+
func_terms.update(self.extract_terms(fv.desc))
|
|
63
|
+
|
|
64
|
+
if not func_terms:
|
|
65
|
+
return 0.0
|
|
66
|
+
|
|
67
|
+
intersection = source_terms & func_terms
|
|
68
|
+
union = source_terms | func_terms
|
|
69
|
+
|
|
70
|
+
return len(intersection) / len(union) if union else 0.0
|
|
71
|
+
|
|
72
|
+
def build_term_normalized(self, text: str) -> str:
|
|
73
|
+
"""Build normalized term from text using dictionary."""
|
|
74
|
+
terms = self.extract_terms(text)
|
|
75
|
+
if terms:
|
|
76
|
+
return "_".join(sorted(terms))
|
|
77
|
+
return text.lower().replace(" ", "_")
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Term dictionary for association mapping."""
|
|
2
|
+
|
|
3
|
+
import yaml
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Dict, List, Set
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class TermDictionary:
|
|
9
|
+
"""Term dictionary for association mapping."""
|
|
10
|
+
|
|
11
|
+
def __init__(self, base_path: str = None):
|
|
12
|
+
if base_path is None:
|
|
13
|
+
base_path = Path(__file__).parent / "base_terms.yaml"
|
|
14
|
+
self.base_path = Path(base_path)
|
|
15
|
+
self.terms: Dict[str, List[str]] = {}
|
|
16
|
+
self.reverse_map: Dict[str, str] = {} # synonym -> canonical
|
|
17
|
+
self._load()
|
|
18
|
+
|
|
19
|
+
def _load(self):
|
|
20
|
+
"""Load dictionary from YAML."""
|
|
21
|
+
if not self.base_path.exists():
|
|
22
|
+
return
|
|
23
|
+
|
|
24
|
+
with open(self.base_path, 'r', encoding='utf-8') as f:
|
|
25
|
+
data = yaml.safe_load(f)
|
|
26
|
+
|
|
27
|
+
for canonical, synonyms in data.items():
|
|
28
|
+
self.terms[canonical] = synonyms
|
|
29
|
+
for syn in synonyms:
|
|
30
|
+
self.reverse_map[syn.lower()] = canonical
|
|
31
|
+
|
|
32
|
+
def get_canonical(self, term: str) -> str:
|
|
33
|
+
"""Get canonical form of a term."""
|
|
34
|
+
return self.reverse_map.get(term.lower(), term.lower())
|
|
35
|
+
|
|
36
|
+
def get_synonyms(self, term: str) -> List[str]:
|
|
37
|
+
"""Get all synonyms for a term."""
|
|
38
|
+
canonical = self.get_canonical(term)
|
|
39
|
+
return self.terms.get(canonical, [term])
|
|
40
|
+
|
|
41
|
+
def find_matching_terms(self, text: str) -> Set[str]:
|
|
42
|
+
"""Find all matching terms in text."""
|
|
43
|
+
text_lower = text.lower()
|
|
44
|
+
matches = set()
|
|
45
|
+
for term, synonyms in self.terms.items():
|
|
46
|
+
for syn in synonyms:
|
|
47
|
+
if syn.lower() in text_lower:
|
|
48
|
+
matches.add(term)
|
|
49
|
+
break
|
|
50
|
+
return matches
|