memplex 3.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. memnex/__init__.py +31 -0
  2. memnex/__main__.py +6 -0
  3. memnex/_plugin/.claude-plugin/plugin.json +24 -0
  4. memnex/_plugin/.mcp.json +9 -0
  5. memnex/_plugin/__init__.py +0 -0
  6. memnex/_plugin/hooks/hooks.json +43 -0
  7. memnex/_plugin/scripts/hook-runner.py +166 -0
  8. memnex/_plugin/skills/mem-explore/SKILL.md +83 -0
  9. memnex/_plugin/skills/mem-manage/SKILL.md +92 -0
  10. memnex/_plugin/skills/mem-search/SKILL.md +85 -0
  11. memnex/_plugin/skills/mem-write/SKILL.md +78 -0
  12. memnex/adapters/__init__.py +14 -0
  13. memnex/adapters/claude_skill.py +169 -0
  14. memnex/adapters/cli.py +525 -0
  15. memnex/adapters/http_api.py +314 -0
  16. memnex/adapters/mcp_server.py +448 -0
  17. memnex/compaction.py +563 -0
  18. memnex/config.py +366 -0
  19. memnex/core/__init__.py +13 -0
  20. memnex/core/associator/__init__.py +8 -0
  21. memnex/core/associator/domain_classifier.py +75 -0
  22. memnex/core/associator/entity_aligner.py +127 -0
  23. memnex/core/associator/ref_linker.py +197 -0
  24. memnex/core/associator/term_mapper.py +77 -0
  25. memnex/core/dictionaries/__init__.py +50 -0
  26. memnex/core/engine.py +667 -0
  27. memnex/core/extractors/__init__.py +15 -0
  28. memnex/core/extractors/docx.py +97 -0
  29. memnex/core/extractors/image.py +233 -0
  30. memnex/core/extractors/markdown.py +139 -0
  31. memnex/core/extractors/pdf.py +133 -0
  32. memnex/core/extractors/vision_mapper.py +131 -0
  33. memnex/core/handlers/__init__.py +7 -0
  34. memnex/core/handlers/clipboard.py +40 -0
  35. memnex/core/handlers/file_handler.py +62 -0
  36. memnex/core/handlers/url_handler.py +132 -0
  37. memnex/llm/__init__.py +25 -0
  38. memnex/llm/enhancer.py +226 -0
  39. memnex/llm/fallback_chain.py +87 -0
  40. memnex/llm/injection_guard.py +178 -0
  41. memnex/llm/provider.py +130 -0
  42. memnex/llm/providers/__init__.py +22 -0
  43. memnex/llm/providers/anthropic.py +135 -0
  44. memnex/llm/providers/local.py +135 -0
  45. memnex/llm/providers/rule_based.py +68 -0
  46. memnex/llm/sanitizer.py +67 -0
  47. memnex/models/__init__.py +68 -0
  48. memnex/models/feedback.py +42 -0
  49. memnex/models/graph.py +33 -0
  50. memnex/models/memory.py +102 -0
  51. memnex/models/misc.py +185 -0
  52. memnex/models/paragraph.py +45 -0
  53. memnex/models/search.py +51 -0
  54. memnex/models/source.py +23 -0
  55. memnex/models/task.py +62 -0
  56. memnex/processing/__init__.py +1 -0
  57. memnex/processing/graph_builder.py +278 -0
  58. memnex/processing/merger/__init__.py +6 -0
  59. memnex/processing/merger/confidence_calculator.py +127 -0
  60. memnex/processing/merger/conflict_resolver.py +116 -0
  61. memnex/retrieval/__init__.py +1 -0
  62. memnex/retrieval/dedup.py +386 -0
  63. memnex/retrieval/embedding.py +289 -0
  64. memnex/retrieval/reranker.py +299 -0
  65. memnex/service.py +902 -0
  66. memnex/storage/__init__.py +65 -0
  67. memnex/storage/base.py +132 -0
  68. memnex/storage/changelog.py +106 -0
  69. memnex/storage/feedback.py +486 -0
  70. memnex/storage/lite/__init__.py +5 -0
  71. memnex/storage/lite/store.py +606 -0
  72. memnex/storage/vector.py +265 -0
  73. memnex/wiki/__init__.py +11 -0
  74. memnex/wiki/community.py +221 -0
  75. memnex/wiki/compiler.py +545 -0
  76. memnex/wiki/generator.py +270 -0
  77. memnex/wiki/search.py +282 -0
  78. memnex/worker.py +412 -0
  79. memplex-3.2.0.dist-info/METADATA +37 -0
  80. memplex-3.2.0.dist-info/RECORD +83 -0
  81. memplex-3.2.0.dist-info/WHEEL +5 -0
  82. memplex-3.2.0.dist-info/entry_points.txt +2 -0
  83. memplex-3.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,197 @@
1
+ """Cross-document reference extraction and linking."""
2
+
3
+ import re
4
+ from typing import List, Dict, Tuple, Optional
5
+
6
+
7
+ class RefLinker:
8
+ """Extracts and resolves cross-document references."""
9
+
10
+ # Reference patterns
11
+ CROSS_DOC_PATTERNS = [
12
+ r"详见[《\"]?(.+?)[》文档手册]",
13
+ r"参见[《\"]?(.+?)[》\]]",
14
+ r"[《\"]?(.+?)[》\]]\s*[第见]?\s*([0-9.]+)[章]?",
15
+ r"如上所述",
16
+ r"如前所述",
17
+ r"前述",
18
+ r"同上述([A-Za-z0-9_一-龥]+)",
19
+ r"同下述([A-Za-z0-9_一-龥]+)",
20
+ r"同前述([A-Za-z0-9_一-龥]+)",
21
+ r"参见([A-Za-z0-9_一-龥-]+)",
22
+ r"依据([A-Za-z0-9_一-龥-]+)",
23
+ r"按照([A-Za-z0-9_一-龥-]+)",
24
+ r"符合([A-Za-z0-9_一-龥-]+)",
25
+ r"满足([A-Za-z0-9_一-龥-]+)",
26
+ r"参照([A-Za-z0-9_一-龥-]+)",
27
+ r"根据([A-Za-z0-9_一-龥-]+)",
28
+ r"RFC-?(\d+)",
29
+ ]
30
+
31
+ SECTION_PATTERNS = [
32
+ r"见第?([0-9.]+)节?",
33
+ r"如图?([0-9]+(?:\.[0-9]+)?)",
34
+ r"参考第?([0-9.]+)节",
35
+ r"第([一二三四五六七八九十零]+)章"
36
+ ]
37
+
38
+ CN_DIGIT_MAP = {"一": 1, "二": 2, "三": 3, "四": 4, "五": 5,
39
+ "六": 6, "七": 7, "八": 8, "九": 9, "十": 10, "零": 0}
40
+
41
+ URL_PATTERN = r"https?://[^\s<>\"]+"
42
+
43
+ SEQUENTIAL_PATTERNS = [
44
+ r"之后",
45
+ r"随后",
46
+ r"接下来",
47
+ ]
48
+
49
+ BACK_REFERENCE_PATTERNS = [
50
+ "如上所述",
51
+ "如前所述",
52
+ "前述",
53
+ ]
54
+
55
+ def extract_references(self, text: str) -> List[Dict]:
56
+ """
57
+ Extract all types of references from text.
58
+
59
+ Returns:
60
+ List of reference dicts with type, target, and confidence
61
+ """
62
+ references = []
63
+
64
+ for pattern in self.CROSS_DOC_PATTERNS:
65
+ for match in re.finditer(pattern, text):
66
+ target = match.group(1).strip() if match.lastindex and match.group(1) else match.group(0).strip()
67
+ ref_type = "implicit" if target in self.BACK_REFERENCE_PATTERNS else "cross_doc"
68
+ references.append({
69
+ "type": ref_type,
70
+ "target": target,
71
+ "confidence": 0.95 if ref_type == "cross_doc" else 0.7,
72
+ "match": match.group(0)
73
+ })
74
+
75
+ for pattern in self.SEQUENTIAL_PATTERNS:
76
+ for match in re.finditer(pattern, text):
77
+ references.append({
78
+ "type": "sequential",
79
+ "target": "implicit_next",
80
+ "confidence": 0.6,
81
+ "match": match.group(0)
82
+ })
83
+
84
+ for pattern in self.SECTION_PATTERNS:
85
+ for match in re.finditer(pattern, text):
86
+ section = match.group(1)
87
+ if re.match(r"^[一-鿿]+$", section):
88
+ section_num = 0
89
+ if "十" in section:
90
+ parts = section.split("十")
91
+ if parts[0] == "":
92
+ section_num = 10
93
+ else:
94
+ section_num = self.CN_DIGIT_MAP.get(parts[0], 0) * 10
95
+ if len(parts) > 1 and parts[1]:
96
+ section_num += self.CN_DIGIT_MAP.get(parts[1], 0)
97
+ else:
98
+ section_num = self.CN_DIGIT_MAP.get(section, 0)
99
+ target = f"section_{section_num}"
100
+ else:
101
+ target = f"section_{section}"
102
+ references.append({
103
+ "type": "section",
104
+ "target": target,
105
+ "confidence": 0.9,
106
+ "match": match.group(0)
107
+ })
108
+
109
+ for match in re.finditer(self.URL_PATTERN, text):
110
+ references.append({
111
+ "type": "url",
112
+ "target": match.group(0),
113
+ "confidence": 0.85,
114
+ "match": match.group(0)
115
+ })
116
+
117
+ return references
118
+
119
+ def resolve_reference(
120
+ self,
121
+ ref: Dict,
122
+ known_entities: Dict[str, List[str]]
123
+ ) -> Optional[str]:
124
+ """Resolve reference to entity ID."""
125
+ target = ref["target"]
126
+
127
+ if target in known_entities:
128
+ return known_entities[target][0]
129
+
130
+ target_lower = target.lower()
131
+ for name, ids in known_entities.items():
132
+ if target_lower in name.lower() or name.lower() in target_lower:
133
+ return ids[0]
134
+
135
+ return None
136
+
137
+ def resolve_implicit_reference(
138
+ self,
139
+ ref: Dict,
140
+ known_entities: Dict[str, List[str]],
141
+ context: Optional[Dict] = None
142
+ ) -> Tuple[Optional[str], float]:
143
+ """Resolve implicit reference to entity ID with confidence score."""
144
+ target = ref.get("target", "")
145
+ ref_type = ref.get("type", "")
146
+ confidence = ref.get("confidence", 0.5)
147
+
148
+ if target in self.BACK_REFERENCE_PATTERNS:
149
+ if context and "previous_entity" in context:
150
+ prev = context["previous_entity"]
151
+ if prev in known_entities:
152
+ return known_entities[prev][0], 0.85
153
+ if known_entities:
154
+ first_key = next(iter(known_entities))
155
+ return known_entities[first_key][0], 0.6
156
+ return None, 0.0
157
+
158
+ rfc_match = re.match(r"RFC-?(\d+)", target, re.IGNORECASE)
159
+ if rfc_match:
160
+ rfc_num = rfc_match.group(1)
161
+ for name, ids in known_entities.items():
162
+ name_lower = name.lower()
163
+ if f"rfc_{rfc_num}" in name_lower or f"rfc {rfc_num}" in name_lower or f"rfc-{rfc_num}" in name_lower:
164
+ return ids[0], 0.9
165
+ return None, 0.0
166
+
167
+ if target.startswith("同") and len(target) > 1:
168
+ suffix = target[1:]
169
+ best_match = None
170
+ best_score = 0.0
171
+ for name, ids in known_entities.items():
172
+ if suffix in name:
173
+ score = len(suffix) / max(len(name), 1)
174
+ if score > best_score:
175
+ best_score = score
176
+ best_match = ids[0]
177
+ if best_match:
178
+ return best_match, min(0.5 + best_score * 0.4, 0.85)
179
+ return None, 0.0
180
+
181
+ if ref_type == "sequential" and target == "implicit_next":
182
+ if context and "next_entity" in context:
183
+ next_ent = context["next_entity"]
184
+ if next_ent in known_entities:
185
+ return known_entities[next_ent][0], 0.8
186
+ if len(known_entities) > 1:
187
+ keys = list(known_entities.keys())
188
+ return known_entities[keys[1]][0], 0.5
189
+ return None, 0.0
190
+
191
+ if ref_type == "implicit":
192
+ target_lower = target.lower()
193
+ for name, ids in known_entities.items():
194
+ if target_lower in name.lower() or name.lower() in target_lower:
195
+ return ids[0], 0.7
196
+
197
+ return None, confidence
@@ -0,0 +1,77 @@
1
+ """Term-based association using dictionary lookup."""
2
+
3
+ from typing import List, Set, Tuple, Optional
4
+ from memnex.models.memory import Function
5
+ from memnex.models.misc import FieldValue
6
+ from memnex.core.dictionaries import TermDictionary
7
+
8
+
9
+ class TermMapper:
10
+ """Maps terms between documents using dictionary lookup."""
11
+
12
+ def __init__(self, dictionary: TermDictionary = None):
13
+ self.dictionary = dictionary or TermDictionary()
14
+
15
+ def embed_text(self, text: str) -> Optional[List[float]]:
16
+ """Generate embedding vector using sentence-transformers (if available)."""
17
+ try:
18
+ from sentence_transformers import SentenceTransformer
19
+ if not hasattr(self, '_embedding_model'):
20
+ self._embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
21
+ embedding = self._embedding_model.encode([text])[0]
22
+ return embedding.tolist()
23
+ except ImportError:
24
+ return None
25
+
26
+ def extract_terms(self, text: str) -> Set[str]:
27
+ """Extract all matching terms from text."""
28
+ return self.dictionary.find_matching_terms(text)
29
+
30
+ def find_associations(
31
+ self,
32
+ source_terms: Set[str],
33
+ target_candidates: List[Function]
34
+ ) -> List[Tuple[Function, float]]:
35
+ """
36
+ Find associations based on term overlap.
37
+
38
+ Returns:
39
+ List of (function, confidence) tuples
40
+ """
41
+ associations = []
42
+
43
+ for func in target_candidates:
44
+ score = self._calculate_term_overlap(source_terms, func)
45
+ if score > 0:
46
+ associations.append((func, score))
47
+
48
+ associations.sort(key=lambda x: x[1], reverse=True)
49
+ return associations
50
+
51
+ def _calculate_term_overlap(self, source_terms: Set[str], func: Function) -> float:
52
+ """Calculate term overlap score between source and function."""
53
+ if not source_terms:
54
+ return 0.0
55
+
56
+ func_terms = self.extract_terms(func.name)
57
+ func_terms.update(self.extract_terms(func.name_normalized))
58
+
59
+ # Get terms from multi-value fields (List[FieldValue])
60
+ for fv_list in [func.trigger, func.condition, func.action, func.benefit]:
61
+ for fv in fv_list:
62
+ func_terms.update(self.extract_terms(fv.desc))
63
+
64
+ if not func_terms:
65
+ return 0.0
66
+
67
+ intersection = source_terms & func_terms
68
+ union = source_terms | func_terms
69
+
70
+ return len(intersection) / len(union) if union else 0.0
71
+
72
+ def build_term_normalized(self, text: str) -> str:
73
+ """Build normalized term from text using dictionary."""
74
+ terms = self.extract_terms(text)
75
+ if terms:
76
+ return "_".join(sorted(terms))
77
+ return text.lower().replace(" ", "_")
@@ -0,0 +1,50 @@
1
+ """Term dictionary for association mapping."""
2
+
3
+ import yaml
4
+ from pathlib import Path
5
+ from typing import Dict, List, Set
6
+
7
+
8
+ class TermDictionary:
9
+ """Term dictionary for association mapping."""
10
+
11
+ def __init__(self, base_path: str = None):
12
+ if base_path is None:
13
+ base_path = Path(__file__).parent / "base_terms.yaml"
14
+ self.base_path = Path(base_path)
15
+ self.terms: Dict[str, List[str]] = {}
16
+ self.reverse_map: Dict[str, str] = {} # synonym -> canonical
17
+ self._load()
18
+
19
+ def _load(self):
20
+ """Load dictionary from YAML."""
21
+ if not self.base_path.exists():
22
+ return
23
+
24
+ with open(self.base_path, 'r', encoding='utf-8') as f:
25
+ data = yaml.safe_load(f)
26
+
27
+ for canonical, synonyms in data.items():
28
+ self.terms[canonical] = synonyms
29
+ for syn in synonyms:
30
+ self.reverse_map[syn.lower()] = canonical
31
+
32
+ def get_canonical(self, term: str) -> str:
33
+ """Get canonical form of a term."""
34
+ return self.reverse_map.get(term.lower(), term.lower())
35
+
36
+ def get_synonyms(self, term: str) -> List[str]:
37
+ """Get all synonyms for a term."""
38
+ canonical = self.get_canonical(term)
39
+ return self.terms.get(canonical, [term])
40
+
41
+ def find_matching_terms(self, text: str) -> Set[str]:
42
+ """Find all matching terms in text."""
43
+ text_lower = text.lower()
44
+ matches = set()
45
+ for term, synonyms in self.terms.items():
46
+ for syn in synonyms:
47
+ if syn.lower() in text_lower:
48
+ matches.add(term)
49
+ break
50
+ return matches