memplex 3.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. memnex/__init__.py +31 -0
  2. memnex/__main__.py +6 -0
  3. memnex/_plugin/.claude-plugin/plugin.json +24 -0
  4. memnex/_plugin/.mcp.json +9 -0
  5. memnex/_plugin/__init__.py +0 -0
  6. memnex/_plugin/hooks/hooks.json +43 -0
  7. memnex/_plugin/scripts/hook-runner.py +166 -0
  8. memnex/_plugin/skills/mem-explore/SKILL.md +83 -0
  9. memnex/_plugin/skills/mem-manage/SKILL.md +92 -0
  10. memnex/_plugin/skills/mem-search/SKILL.md +85 -0
  11. memnex/_plugin/skills/mem-write/SKILL.md +78 -0
  12. memnex/adapters/__init__.py +14 -0
  13. memnex/adapters/claude_skill.py +169 -0
  14. memnex/adapters/cli.py +525 -0
  15. memnex/adapters/http_api.py +314 -0
  16. memnex/adapters/mcp_server.py +448 -0
  17. memnex/compaction.py +563 -0
  18. memnex/config.py +366 -0
  19. memnex/core/__init__.py +13 -0
  20. memnex/core/associator/__init__.py +8 -0
  21. memnex/core/associator/domain_classifier.py +75 -0
  22. memnex/core/associator/entity_aligner.py +127 -0
  23. memnex/core/associator/ref_linker.py +197 -0
  24. memnex/core/associator/term_mapper.py +77 -0
  25. memnex/core/dictionaries/__init__.py +50 -0
  26. memnex/core/engine.py +667 -0
  27. memnex/core/extractors/__init__.py +15 -0
  28. memnex/core/extractors/docx.py +97 -0
  29. memnex/core/extractors/image.py +233 -0
  30. memnex/core/extractors/markdown.py +139 -0
  31. memnex/core/extractors/pdf.py +133 -0
  32. memnex/core/extractors/vision_mapper.py +131 -0
  33. memnex/core/handlers/__init__.py +7 -0
  34. memnex/core/handlers/clipboard.py +40 -0
  35. memnex/core/handlers/file_handler.py +62 -0
  36. memnex/core/handlers/url_handler.py +132 -0
  37. memnex/llm/__init__.py +25 -0
  38. memnex/llm/enhancer.py +226 -0
  39. memnex/llm/fallback_chain.py +87 -0
  40. memnex/llm/injection_guard.py +178 -0
  41. memnex/llm/provider.py +130 -0
  42. memnex/llm/providers/__init__.py +22 -0
  43. memnex/llm/providers/anthropic.py +135 -0
  44. memnex/llm/providers/local.py +135 -0
  45. memnex/llm/providers/rule_based.py +68 -0
  46. memnex/llm/sanitizer.py +67 -0
  47. memnex/models/__init__.py +68 -0
  48. memnex/models/feedback.py +42 -0
  49. memnex/models/graph.py +33 -0
  50. memnex/models/memory.py +102 -0
  51. memnex/models/misc.py +185 -0
  52. memnex/models/paragraph.py +45 -0
  53. memnex/models/search.py +51 -0
  54. memnex/models/source.py +23 -0
  55. memnex/models/task.py +62 -0
  56. memnex/processing/__init__.py +1 -0
  57. memnex/processing/graph_builder.py +278 -0
  58. memnex/processing/merger/__init__.py +6 -0
  59. memnex/processing/merger/confidence_calculator.py +127 -0
  60. memnex/processing/merger/conflict_resolver.py +116 -0
  61. memnex/retrieval/__init__.py +1 -0
  62. memnex/retrieval/dedup.py +386 -0
  63. memnex/retrieval/embedding.py +289 -0
  64. memnex/retrieval/reranker.py +299 -0
  65. memnex/service.py +902 -0
  66. memnex/storage/__init__.py +65 -0
  67. memnex/storage/base.py +132 -0
  68. memnex/storage/changelog.py +106 -0
  69. memnex/storage/feedback.py +486 -0
  70. memnex/storage/lite/__init__.py +5 -0
  71. memnex/storage/lite/store.py +606 -0
  72. memnex/storage/vector.py +265 -0
  73. memnex/wiki/__init__.py +11 -0
  74. memnex/wiki/community.py +221 -0
  75. memnex/wiki/compiler.py +545 -0
  76. memnex/wiki/generator.py +270 -0
  77. memnex/wiki/search.py +282 -0
  78. memnex/worker.py +412 -0
  79. memplex-3.2.0.dist-info/METADATA +37 -0
  80. memplex-3.2.0.dist-info/RECORD +83 -0
  81. memplex-3.2.0.dist-info/WHEEL +5 -0
  82. memplex-3.2.0.dist-info/entry_points.txt +2 -0
  83. memplex-3.2.0.dist-info/top_level.txt +1 -0
memnex/service.py ADDED
@@ -0,0 +1,902 @@
1
+ """MemNexService -- unified user-facing entry point.
2
+
3
+ Orchestrates intent detection -> multi-path retrieval -> Rerank -> return.
4
+ Users call ``service.query(text)`` and never need to know about scopes,
5
+ retrieval paths, or ranking internals.
6
+
7
+ Usage::
8
+
9
+ from memnex import MemNexService
10
+
11
+ svc = MemNexService() # uses default config
12
+ svc.start() # start background worker
13
+
14
+ result = svc.query("登录函数在哪")
15
+ for r in result.results:
16
+ print(r.name, r.relevance_score)
17
+
18
+ svc.stop()
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import asyncio
24
+ import concurrent.futures
25
+ import logging
26
+ from concurrent.futures import ThreadPoolExecutor, as_completed
27
+ from datetime import datetime
28
+ from typing import Any, Dict, List, Optional
29
+
30
+ from memnex.config import MemNexConfig, load_config
31
+ from memnex.core import CoreEngine
32
+ from memnex.retrieval.embedding import EmbeddingService, Vector
33
+ from memnex.processing.graph_builder import GraphBuilder
34
+ from memnex.models import (
35
+ CompactionResult,
36
+ CompactionScope,
37
+ ExtractedData,
38
+ FeedbackVerdict,
39
+ Function,
40
+ GraphData,
41
+ MemoryFeedback,
42
+ MemoryNode,
43
+ PendingReview,
44
+ QueryResult,
45
+ QueryScope,
46
+ SearchResult,
47
+ SourceDocument,
48
+ SourceType,
49
+ UpdateResult,
50
+ )
51
+ from memnex.retrieval.reranker import CrossEncoderReranker, Reranker
52
+ from memnex.storage import MemoryStore, create_store
53
+ from memnex.storage.feedback import FeedbackStore, create_feedback_store
54
+ from memnex.worker import BackgroundWorker
55
+ from memnex.compaction import CompactionPipeline
56
+ from memnex.llm import LLMEnhancer
57
+ from memnex.llm.provider import create_provider
58
+
59
+ logger = logging.getLogger(__name__)
60
+
61
+
62
+ # ── Helper ─────────────────────────────────────────────────────────────
63
+
64
+
65
+ def _detect_memory_type(text: str) -> str:
66
+ """Heuristic: classify text into a memory type.
67
+
68
+ Returns one of ``"function"`` | ``"fact"`` | ``"preference"`` |
69
+ ``"observation"``.
70
+ """
71
+ text_lower = text.lower()
72
+
73
+ # Observation patterns
74
+ obs_keywords = [
75
+ "observe", "observed", "noticed", "happened", "occurred",
76
+ "事件", "观察", "发生", "记录",
77
+ ]
78
+ if any(k in text_lower for k in obs_keywords):
79
+ return "observation"
80
+
81
+ # Preference patterns
82
+ pref_keywords = [
83
+ "prefer", "like", "dislike", "want", "always", "never",
84
+ "喜欢", "偏好", "讨厌", "倾向", "总是", "从不",
85
+ ]
86
+ if any(k in text_lower for k in pref_keywords):
87
+ return "preference"
88
+
89
+ # Fact patterns
90
+ fact_keywords = [
91
+ "is", "are", "means", "defined as", "refers to",
92
+ "是", "意味着", "定义为", "指的是", "事实",
93
+ ]
94
+ if any(k in text_lower for k in fact_keywords):
95
+ return "fact"
96
+
97
+ # Default: function (procedural / action-oriented)
98
+ return "function"
99
+
100
+
101
+ # ── MemNexService ──────────────────────────────────────────────────────
102
+
103
+
104
+ class MemNexService:
105
+ """Unified user-facing entry point for MemNex.
106
+
107
+ Responsibilities:
108
+ Intent detection -> multi-path retrieval -> Rerank -> return.
109
+
110
+ Does **not** hold data; delegates to ``MemoryStore``,
111
+ ``EmbeddingService``, ``Reranker``, ``LLMEnhancer``,
112
+ ``BackgroundWorker``, and ``CompactionPipeline``.
113
+
114
+ Parameters
115
+ ----------
116
+ config:
117
+ Full :class:`MemNexConfig`. When ``None``, loaded via
118
+ :func:`load_config`.
119
+ """
120
+
121
+ def __init__(self, config: Optional[MemNexConfig] = None) -> None:
122
+ self._config = config or load_config()
123
+ cfg = self._config
124
+
125
+ # ── Resolve backend (only "lite" is currently implemented) ──
126
+ _implemented_backends = {"lite"}
127
+ backend = cfg.storage.backend
128
+ if backend not in _implemented_backends:
129
+ logger.warning(
130
+ "Storage backend %r not available, falling back to 'lite'",
131
+ backend,
132
+ )
133
+ backend = "lite"
134
+
135
+ # ── Storage ─────────────────────────────────────────────
136
+ self.store: MemoryStore = create_store(
137
+ backend=backend,
138
+ path=cfg.storage.path,
139
+ )
140
+
141
+ # ── Embedding ───────────────────────────────────────────
142
+ self._embedding_service = EmbeddingService(
143
+ model=cfg.embedding.model,
144
+ dimension=cfg.embedding.dimension,
145
+ storage=self.store,
146
+ )
147
+
148
+ # ── Reranker ────────────────────────────────────────────
149
+ self._reranker = Reranker(
150
+ embedding_service=self._embedding_service,
151
+ weights=cfg.reranker.weights,
152
+ storage=self.store,
153
+ )
154
+
155
+ # ── Cross-encoder (stage 2, optional) ───────────────────
156
+ self._cross_reranker = CrossEncoderReranker(
157
+ model_name=cfg.reranker.cross_encoder_model,
158
+ enabled=cfg.reranker.cross_encoder_enabled,
159
+ )
160
+
161
+ # ── LLM Enhancer (optional) ─────────────────────────────
162
+ self._llm: Optional[LLMEnhancer] = None
163
+ self._init_llm(cfg)
164
+
165
+ # ── Feedback store ──────────────────────────────────────
166
+ self._feedback_store: FeedbackStore = create_feedback_store(
167
+ backend=backend,
168
+ )
169
+
170
+ # ── Background worker ───────────────────────────────────
171
+ self._worker = BackgroundWorker()
172
+
173
+ # ── Compaction pipeline ─────────────────────────────────
174
+ self._compaction = CompactionPipeline(
175
+ store=self.store,
176
+ embedding_service=self._embedding_service,
177
+ config=cfg,
178
+ )
179
+
180
+ # ── Graph builder ───────────────────────────────────────
181
+ self._graph_builder = GraphBuilder(
182
+ store=self.store,
183
+ config=cfg,
184
+ )
185
+
186
+ # ── Core engine (extraction pipeline) ──────────────────
187
+ self._engine = CoreEngine(store=self.store)
188
+
189
+ # ── LLM initialisation ──────────────────────────────────────
190
+
191
+ def _init_llm(self, cfg: MemNexConfig) -> None:
192
+ """Try to create an LLMEnhancer; silently skip on failure."""
193
+ try:
194
+ provider = create_provider(
195
+ provider=cfg.llm.provider,
196
+ anthropic_api_key=cfg.llm.anthropic_api_key,
197
+ local_endpoint=cfg.llm.local_endpoint,
198
+ local_model=cfg.llm.local_model,
199
+ fallback_chain=cfg.llm.fallback_chain,
200
+ )
201
+ self._llm = LLMEnhancer(llm_provider=provider, config=cfg.llm)
202
+ except Exception as exc:
203
+ logger.info("LLM enhancer not available (%s); using rule-based fallback", exc)
204
+ self._llm = None
205
+
206
+ # ════════════════════════════════════════════════════════════════
207
+ # Core query
208
+ # ════════════════════════════════════════════════════════════════
209
+
210
+ def query(
211
+ self,
212
+ text: str,
213
+ top_k: int = 10,
214
+ owner: Optional[str] = None,
215
+ max_tokens: int = 4000,
216
+ ) -> QueryResult:
217
+ """Unified query entry point.
218
+
219
+ Pipeline:
220
+ 1. Intent detection (LLM first, keyword fallback).
221
+ 2. Parallel multi-path retrieval (ThreadPoolExecutor, 3 workers).
222
+ 3. Merge + deduplicate by ``func_id`` (keep highest score).
223
+ 4. Rerank (5-dim bi-encoder + optional cross-encoder).
224
+ 5. Update ``access_count`` (persisted).
225
+ 6. Token budget truncation (greedy by ``relevance_score``).
226
+
227
+ Parameters
228
+ ----------
229
+ text:
230
+ User query string.
231
+ top_k:
232
+ Maximum results to return.
233
+ owner:
234
+ Optional owner filter.
235
+ max_tokens:
236
+ Token budget for the result set (0 = unlimited).
237
+ Estimated as ``len(summary) // 4``.
238
+ """
239
+ scope = self._detect_scope(text)
240
+ start = datetime.now()
241
+
242
+ # Pre-compute query_vector (multi-path reuse)
243
+ query_vector: Optional[Vector] = None
244
+ if self._embedding_service is not None:
245
+ if self._llm is not None and self._config.embedding.hyde_enabled:
246
+ query_vector = self._compute_hyde_vector(text)
247
+ query_vector = query_vector or self._embedding_service.embed(text)
248
+
249
+ # Parallel multi-path retrieval
250
+ futures: Dict[concurrent.futures.Future, str] = {}
251
+ with ThreadPoolExecutor(max_workers=3) as pool:
252
+ if scope in (QueryScope.IMMEDIATE, QueryScope.ALL):
253
+ futures[pool.submit(self._rag_search, text, top_k, query_vector)] = "rag"
254
+ if scope in (QueryScope.SYNTHESIS, QueryScope.ALL):
255
+ futures[pool.submit(self._wiki_search, text, top_k)] = "wiki"
256
+ if scope in (QueryScope.RELATION, QueryScope.ALL):
257
+ futures[pool.submit(self._graph_search, text, top_k, query_vector)] = "graph"
258
+
259
+ all_results: List[List[SearchResult]] = []
260
+ for future in as_completed(futures):
261
+ try:
262
+ all_results.append(future.result())
263
+ except Exception as exc:
264
+ logger.warning(
265
+ "Search path %s failed: %s", futures[future], exc
266
+ )
267
+
268
+ # Merge results
269
+ results = self._merge_multi_path(all_results)
270
+
271
+ # Stage 1: 5-dim bi-encoder rerank
272
+ if self._reranker is not None:
273
+ results = self._reranker.rerank(text, results, top_k * 2, query_vector)
274
+
275
+ # Stage 2: Cross-encoder precision rerank (optional)
276
+ if self._cross_reranker is not None:
277
+ results = self._cross_reranker.rerank(text, results)
278
+
279
+ results = results[:top_k]
280
+
281
+ # Update access_count (must persist for Reranker frequency dimension)
282
+ for r in results:
283
+ try:
284
+ self.store.increment_access(r.func_id)
285
+ except Exception:
286
+ pass
287
+
288
+ latency = int((datetime.now() - start).total_seconds() * 1000)
289
+
290
+ # Token budget truncation (greedy, by relevance_score desc)
291
+ truncated = False
292
+ used = 0
293
+ if max_tokens > 0:
294
+ kept: List[SearchResult] = []
295
+ for r in results:
296
+ est = max(r.token_estimate, len(r.summary) // 4 + 1)
297
+ r.token_estimate = est
298
+ if used + est <= max_tokens:
299
+ kept.append(r)
300
+ used += est
301
+ else:
302
+ truncated = True
303
+ results = kept
304
+ else:
305
+ used = sum(r.token_estimate for r in results)
306
+
307
+ return QueryResult(
308
+ results=results,
309
+ scope=scope,
310
+ latency_ms=latency,
311
+ tokens_used=used,
312
+ truncated=truncated,
313
+ )
314
+
315
+ async def query_async(
316
+ self,
317
+ text: str,
318
+ top_k: int = 10,
319
+ owner: Optional[str] = None,
320
+ max_tokens: int = 4000,
321
+ ) -> QueryResult:
322
+ """Async version of :meth:`query`.
323
+
324
+ Runs the synchronous ``query`` in a thread pool so it does not
325
+ block the event loop (for FastAPI / MCP Server use).
326
+ """
327
+ loop = asyncio.get_running_loop()
328
+ return await loop.run_in_executor(
329
+ None,
330
+ lambda: self.query(text, top_k=top_k, owner=owner, max_tokens=max_tokens),
331
+ )
332
+
333
+ # ════════════════════════════════════════════════════════════════
334
+ # Intent detection
335
+ # ════════════════════════════════════════════════════════════════
336
+
337
+ def _detect_scope(self, text: str) -> QueryScope:
338
+ """Intent detection: LLM path (priority) then keyword fallback.
339
+
340
+ LLM path: calls ``enhance_query()`` and maps the returned intent
341
+ to a :class:`QueryScope`.
342
+
343
+ Keyword path: multi-label scoring; highest score wins. Ties
344
+ resolve to ``ALL`` (multi-path merge).
345
+ """
346
+ # LLM path (priority)
347
+ if self._llm is not None and self._llm.config.query_enhancement:
348
+ try:
349
+ try:
350
+ asyncio.get_running_loop()
351
+ # Inside an existing event loop (FastAPI/MCP) --
352
+ # use a thread to avoid nested loop issues.
353
+ with concurrent.futures.ThreadPoolExecutor(max_workers=1) as _pool:
354
+ enhanced = _pool.submit(
355
+ asyncio.run, self._llm.enhance_query(text)
356
+ ).result(timeout=5.0)
357
+ except RuntimeError:
358
+ # No running loop (CLI / sync call)
359
+ enhanced = asyncio.run(self._llm.enhance_query(text))
360
+
361
+ intent_map = {
362
+ "search": QueryScope.IMMEDIATE,
363
+ "understand": QueryScope.SYNTHESIS,
364
+ "compare": QueryScope.ALL,
365
+ "relation": QueryScope.RELATION,
366
+ }
367
+ return intent_map.get(enhanced.intent, QueryScope.IMMEDIATE)
368
+ except Exception:
369
+ pass # LLM failed, fall through to keyword
370
+
371
+ # Keyword fallback
372
+ text_lower = text.lower()
373
+ negation_prefixes = [
374
+ "不", "没有", "没", "非", "不是", "un", "not", "no ", "non-",
375
+ ]
376
+ cleaned = text_lower
377
+ for neg in negation_prefixes:
378
+ cleaned = cleaned.replace(neg, " ")
379
+
380
+ scope_keywords = {
381
+ QueryScope.RELATION: [
382
+ "影响", "依赖", "调用", "关系", "哪些",
383
+ "affect", "depend", "call", "relation", "impact",
384
+ ],
385
+ QueryScope.SYNTHESIS: [
386
+ "设计", "架构", "概述", "整体", "概念", "原理",
387
+ "design", "architecture", "overview", "concept", "how does",
388
+ ],
389
+ QueryScope.IMMEDIATE: [
390
+ "在哪", "定义", "是什么", "查找", "搜索",
391
+ "where", "define", "what is", "find", "search",
392
+ ],
393
+ }
394
+
395
+ scores = {
396
+ scope: sum(1 for k in kw if k in cleaned)
397
+ for scope, kw in scope_keywords.items()
398
+ }
399
+ max_score = max(scores.values())
400
+ if max_score == 0:
401
+ return QueryScope.IMMEDIATE
402
+ top_scopes = [s for s, v in scores.items() if v == max_score]
403
+ return QueryScope.ALL if len(top_scopes) > 1 else top_scopes[0]
404
+
405
+ # ════════════════════════════════════════════════════════════════
406
+ # Multi-path retrieval
407
+ # ════════════════════════════════════════════════════════════════
408
+
409
+ def _rag_search(
410
+ self,
411
+ text: str,
412
+ top_k: int,
413
+ query_vector: Optional[Vector] = None,
414
+ ) -> List[SearchResult]:
415
+ """RAG vector + FTS hybrid search with FTS fallback."""
416
+ results = self.store.vector_search(text, top_k)
417
+ # FTS fallback when vector search returns nothing
418
+ if not results:
419
+ results = self.store.fts_search(text, top_k)
420
+ # Pre-fill vector_cache for Reranker reuse
421
+ if query_vector is not None:
422
+ for r in results:
423
+ r.vector_cache = query_vector
424
+ return results
425
+
426
+ def _wiki_search(self, text: str, top_k: int) -> List[SearchResult]:
427
+ """Wiki layer: FTS-based search over compiled wiki pages.
428
+
429
+ Falls back to ``store.fts_search`` when no WikiCompiler is
430
+ available.
431
+ """
432
+ return self.store.fts_search(text, top_k)
433
+
434
+ def _graph_search(
435
+ self,
436
+ text: str,
437
+ top_k: int,
438
+ query_vector: Optional[Vector] = None,
439
+ ) -> List[SearchResult]:
440
+ """Incremental graph traversal search.
441
+
442
+ 1. Vector search to find seed Functions (top_k=3).
443
+ 2. Expand 1-hop neighbours via ``get_neighbors()``.
444
+ 3. Filter to relation-type edges.
445
+ """
446
+ seed_results = self.store.vector_search(text, top_k=3)
447
+ if not seed_results:
448
+ seed_results = self.store.fts_search(text, top_k=3)
449
+ if not seed_results:
450
+ return []
451
+
452
+ results: List[SearchResult] = []
453
+ seen: set = set()
454
+
455
+ for seed in seed_results:
456
+ if seed.func_id in seen:
457
+ continue
458
+ seen.add(seed.func_id)
459
+ if query_vector is not None:
460
+ seed.vector_cache = query_vector
461
+ results.append(seed)
462
+
463
+ # Incremental: only get this seed's neighbours
464
+ try:
465
+ neighbors = self.store.get_neighbors(seed.func_id, max_hops=1)
466
+ except Exception:
467
+ continue
468
+ for neighbor in neighbors:
469
+ if neighbor.id not in seen:
470
+ results.append(SearchResult(
471
+ func_id=neighbor.id,
472
+ name=neighbor.name,
473
+ domain=neighbor.domain or "",
474
+ relevance_score=0.5,
475
+ summary=neighbor.name,
476
+ created_at=(
477
+ datetime.fromisoformat(neighbor.created_at)
478
+ if isinstance(neighbor.created_at, str) and neighbor.created_at
479
+ else neighbor.created_at
480
+ ),
481
+ updated_at=(
482
+ datetime.fromisoformat(neighbor.updated_at)
483
+ if isinstance(neighbor.updated_at, str) and neighbor.updated_at
484
+ else neighbor.updated_at
485
+ ),
486
+ origin=neighbor.origin_session or "",
487
+ ))
488
+ seen.add(neighbor.id)
489
+
490
+ return results[:top_k]
491
+
492
+ @staticmethod
493
+ def _merge_multi_path(
494
+ result_lists: List[List[SearchResult]],
495
+ ) -> List[SearchResult]:
496
+ """Merge multi-path results; deduplicate by ``func_id``, keeping
497
+ the highest ``relevance_score``."""
498
+ seen: Dict[str, SearchResult] = {}
499
+ for results in result_lists:
500
+ for r in results:
501
+ if r.func_id not in seen or r.relevance_score > seen[r.func_id].relevance_score:
502
+ seen[r.func_id] = r
503
+ return sorted(seen.values(), key=lambda x: x.relevance_score, reverse=True)
504
+
505
+ # ════════════════════════════════════════════════════════════════
506
+ # HyDE
507
+ # ════════════════════════════════════════════════════════════════
508
+
509
+ def _compute_hyde_vector(self, text: str) -> Optional[Vector]:
510
+ """Generate a HyDE (Hypothetical Document Embedding) vector.
511
+
512
+ Uses ThreadPoolExecutor to isolate ``asyncio.run`` so it works
513
+ in all environments (with or without a running event loop).
514
+
515
+ Returns ``None`` on failure; the caller falls back to a raw
516
+ query vector.
517
+ """
518
+ if self._llm is None:
519
+ return None
520
+ try:
521
+ with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
522
+ hyde_text = pool.submit(
523
+ asyncio.run,
524
+ self._llm.enhance_query_hyde_text(text),
525
+ ).result(timeout=5.0)
526
+ return self._embedding_service.embed(hyde_text)
527
+ except Exception as exc:
528
+ logger.warning("HyDE failed, falling back to raw query vector: %s", exc)
529
+ return None
530
+
531
+ # ════════════════════════════════════════════════════════════════
532
+ # Write operations
533
+ # ════════════════════════════════════════════════════════════════
534
+
535
+ def write(self, source: SourceDocument) -> ExtractedData:
536
+ """Write new content: extract Functions -> build graph edges
537
+ -> ``store.merge()``.
538
+
539
+ Parameters
540
+ ----------
541
+ source:
542
+ The source document to ingest.
543
+
544
+ Returns
545
+ -------
546
+ ExtractedData
547
+ The extracted Functions and graph data (including any
548
+ merge results).
549
+ """
550
+ # 1. CoreEngine: full extraction pipeline
551
+ extracted = self._engine.extract(source)
552
+
553
+ # 2. Merge into store
554
+ if extracted.functions:
555
+ self.store.merge(extracted.graph)
556
+
557
+ # Invalidate graph builder cache so next write sees new data
558
+ self._graph_builder.invalidate_cache()
559
+
560
+ # 3. Background tasks (index build, wiki compile, etc.)
561
+ # These are submitted to the background worker asynchronously.
562
+ try:
563
+ for func in extracted.functions:
564
+ self._worker.submit(
565
+ "build_index",
566
+ lambda f=func: self.store.add(f, source),
567
+ )
568
+ except Exception:
569
+ pass # Background tasks are best-effort
570
+
571
+ return extracted
572
+
573
+ def write_text(
574
+ self,
575
+ text: str,
576
+ source_type: str = "text",
577
+ ) -> ExtractedData:
578
+ """Convenience: write raw text content.
579
+
580
+ Parameters
581
+ ----------
582
+ text:
583
+ Raw text to ingest.
584
+ source_type:
585
+ Source type string (``"text"`` | ``"file"`` | ``"url"`` |
586
+ ``"clipboard"``).
587
+
588
+ Returns
589
+ -------
590
+ ExtractedData
591
+ """
592
+ source = SourceDocument(
593
+ type=source_type,
594
+ content=text,
595
+ source_type=SourceType.WIKI,
596
+ )
597
+ return self.write(source)
598
+
599
+ # ── Extraction helper ────────────────────────────────────────
600
+
601
+ def _extract_functions(self, source: SourceDocument) -> list:
602
+ """Rule-based Function extraction from a SourceDocument.
603
+
604
+ Splits content into paragraphs, creates one Function per
605
+ paragraph with detected trigger/action fields.
606
+ """
607
+ content = source.content or ""
608
+ if not content.strip():
609
+ return []
610
+
611
+ paragraphs = [p.strip() for p in content.split("\n\n") if p.strip()]
612
+ functions: list = []
613
+ import hashlib
614
+ import uuid
615
+
616
+ for i, para in enumerate(paragraphs):
617
+ # Generate stable ID from content hash
618
+ content_hash = hashlib.sha256(para.encode()).hexdigest()[:16]
619
+ func_id = f"func_{content_hash}"
620
+
621
+ memory_type = _detect_memory_type(para)
622
+
623
+ # Extract trigger/action via simple heuristics
624
+ sentences = [s.strip() for s in para.split("。") if s.strip()]
625
+ if not sentences:
626
+ sentences = [s.strip() for s in para.split(".") if s.strip()]
627
+
628
+ from memnex.models import FieldValue
629
+ triggers = []
630
+ actions = []
631
+ for s in sentences[:5]:
632
+ fv = FieldValue(
633
+ desc=s,
634
+ sources=[source.type],
635
+ source_method="rule_based",
636
+ weight=0.7,
637
+ )
638
+ # Heuristic: first sentence is trigger, rest are action
639
+ if not triggers:
640
+ triggers.append(fv)
641
+ else:
642
+ actions.append(fv)
643
+
644
+ from memnex.models.memory import Function as Func
645
+ func = Func(
646
+ id=func_id,
647
+ name=para[:50] + ("..." if len(para) > 50 else ""),
648
+ domain=None,
649
+ trigger=triggers,
650
+ action=actions,
651
+ source_type=source.source_type,
652
+ content_hash=hashlib.sha256(para.encode()).hexdigest(),
653
+ )
654
+ functions.append(func)
655
+
656
+ return functions
657
+
658
+ # ════════════════════════════════════════════════════════════════
659
+ # Memory operations
660
+ # ════════════════════════════════════════════════════════════════
661
+
662
+ def get(self, memory_id: str) -> Optional[Function]:
663
+ """Retrieve a single Function by ID, or ``None``."""
664
+ return self.store.get(memory_id)
665
+
666
+ def update_memory(
667
+ self,
668
+ memory_id: str,
669
+ role: str,
670
+ new_value: str,
671
+ ) -> UpdateResult:
672
+ """Update a Function's field value.
673
+
674
+ Parameters
675
+ ----------
676
+ memory_id:
677
+ Function ID to update.
678
+ role:
679
+ Which role field to modify (``"trigger"`` | ``"condition"`` |
680
+ ``"action"`` | ``"benefit"``).
681
+ new_value:
682
+ New description text for the FieldValue.
683
+
684
+ Returns
685
+ -------
686
+ UpdateResult
687
+ """
688
+ func = self.store.get(memory_id)
689
+ if func is None:
690
+ return UpdateResult(
691
+ memory_id=memory_id,
692
+ role=role,
693
+ new_value=new_value,
694
+ success=False,
695
+ error="Function not found",
696
+ )
697
+
698
+ old_value = None
699
+ values = getattr(func, role, None)
700
+ if values is None:
701
+ return UpdateResult(
702
+ memory_id=memory_id,
703
+ role=role,
704
+ new_value=new_value,
705
+ success=False,
706
+ error=f"Unknown role: {role}",
707
+ )
708
+
709
+ if values:
710
+ old_value = values[0].desc
711
+
712
+ from memnex.models import FieldValue
713
+ values.insert(0, FieldValue(
714
+ desc=new_value,
715
+ sources=["manual"],
716
+ source_method="manual",
717
+ weight=1.0,
718
+ ))
719
+
720
+ # Re-merge to persist
721
+ from memnex.models import SourceDocument as SD
722
+ self.store.add(func, SD(type="manual_update", source_type=SourceType.WIKI))
723
+
724
+ return UpdateResult(
725
+ memory_id=memory_id,
726
+ role=role,
727
+ old_value=old_value,
728
+ new_value=new_value,
729
+ version=func.version,
730
+ success=True,
731
+ )
732
+
733
+ def delete(self, memory_id: str) -> None:
734
+ """Soft-delete a Function by ID."""
735
+ self.store.delete(memory_id)
736
+
737
+ # ════════════════════════════════════════════════════════════════
738
+ # Feedback
739
+ # ════════════════════════════════════════════════════════════════
740
+
741
+ def submit_feedback(
742
+ self,
743
+ memory_id: str,
744
+ field_role: str,
745
+ value_index: int,
746
+ verdict: str,
747
+ reason: Optional[str] = None,
748
+ ) -> None:
749
+ """Submit user feedback on a memory field value.
750
+
751
+ Parameters
752
+ ----------
753
+ memory_id:
754
+ Function ID.
755
+ field_role:
756
+ Role of the field (``"trigger"`` | ``"action"`` | ...).
757
+ value_index:
758
+ Index within the FieldValue list.
759
+ verdict:
760
+ ``"correct"`` | ``"wrong"`` | ``"alternative"``.
761
+ reason:
762
+ Optional free-text explanation.
763
+ """
764
+ fb = MemoryFeedback(
765
+ memory_id=memory_id,
766
+ field_role=field_role,
767
+ value_index=value_index,
768
+ verdict=FeedbackVerdict(verdict),
769
+ reason=reason,
770
+ source="user",
771
+ )
772
+ self._feedback_store.record(fb)
773
+
774
+ def apply_resolution(
775
+ self,
776
+ memory_id: str,
777
+ field_role: str,
778
+ action: str,
779
+ new_value: Optional[str] = None,
780
+ ) -> dict:
781
+ """Apply a resolution to a pending feedback review.
782
+
783
+ Parameters
784
+ ----------
785
+ memory_id:
786
+ Function ID.
787
+ field_role:
788
+ Field role under review.
789
+ action:
790
+ ``"accept"`` | ``"reject"`` | ``"merge"``.
791
+ new_value:
792
+ Replacement value when action is ``"merge"``.
793
+
794
+ Returns
795
+ -------
796
+ dict
797
+ ``{"status": "resolved", "action": action}``
798
+ """
799
+ self._feedback_store.resolve(memory_id, field_role, action)
800
+
801
+ if action == "merge" and new_value:
802
+ self.update_memory(memory_id, field_role, new_value)
803
+
804
+ return {"status": "resolved", "action": action}
805
+
806
+ def get_pending_reviews(
807
+ self,
808
+ owner: Optional[str] = None,
809
+ limit: int = 100,
810
+ ) -> list:
811
+ """Return pending feedback reviews, optionally filtered by owner.
812
+
813
+ Returns
814
+ -------
815
+ list[PendingReview]
816
+ """
817
+ pending = self._feedback_store.get_pending()
818
+ if owner is not None:
819
+ pending = [p for p in pending if getattr(p, "source", None) == owner]
820
+ return pending[:limit]
821
+
822
+ # ════════════════════════════════════════════════════════════════
823
+ # Management
824
+ # ════════════════════════════════════════════════════════════════
825
+
826
+ def health(self) -> dict:
827
+ """Return health / readiness status.
828
+
829
+ Returns
830
+ -------
831
+ dict
832
+ Keys: ``status``, ``storage``, ``components``.
833
+ """
834
+ components: Dict[str, str] = {}
835
+ try:
836
+ funcs = self.store.list_functions(limit=1)
837
+ components["storage"] = "ok"
838
+ except Exception as exc:
839
+ components["storage"] = f"error: {exc}"
840
+
841
+ components["embedding"] = "ok" if self._embedding_service else "disabled"
842
+ components["reranker"] = "ok" if self._reranker else "disabled"
843
+ components["llm"] = "ok" if self._llm else "disabled"
844
+ components["worker"] = "running" if self._worker._running else "stopped"
845
+
846
+ all_ok = all(v == "ok" or v == "running" or v == "disabled" for v in components.values())
847
+
848
+ return {
849
+ "status": "ok" if all_ok else "degraded",
850
+ "storage_backend": self._config.storage.backend,
851
+ "components": components,
852
+ }
853
+
854
+ def stats(self) -> dict:
855
+ """Return storage and usage statistics."""
856
+ try:
857
+ funcs = self.store.list_functions(limit=100000)
858
+ total = len(funcs)
859
+ except Exception:
860
+ total = 0
861
+
862
+ graph = self.store.get_graph()
863
+ total_edges = len(graph.edges)
864
+
865
+ return {
866
+ "total_functions": total,
867
+ "total_edges": total_edges,
868
+ "storage_backend": self._config.storage.backend,
869
+ "embedding_model": self._config.embedding.model,
870
+ }
871
+
872
+ def compact(self, scope: str = "project") -> CompactionResult:
873
+ """Run the compaction pipeline synchronously.
874
+
875
+ Parameters
876
+ ----------
877
+ scope:
878
+ ``"session"`` | ``"project"`` | ``"global"``.
879
+ """
880
+ compaction_scope = CompactionScope(scope)
881
+ # CompactionPipeline.run is async; run it in a thread
882
+ with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
883
+ return pool.submit(asyncio.run, self._compaction.run(compaction_scope)).result()
884
+
885
+ def start(self) -> None:
886
+ """Start the background worker thread."""
887
+ self._worker.start()
888
+
889
+ def stop(self) -> None:
890
+ """Stop the background worker thread."""
891
+ self._worker.stop()
892
+
893
+ # ── Memory type detection ─────────────────────────────────────
894
+
895
+ @staticmethod
896
+ def _detect_memory_type(text: str) -> str:
897
+ """Classify content into a primary memory type.
898
+
899
+ Returns one of ``"function"`` | ``"fact"`` | ``"preference"`` |
900
+ ``"observation"``.
901
+ """
902
+ return _detect_memory_type(text)