PyPI - memplex - Versions diffs - 3.2.0__py3-none-any.whl - Mend

memplex 3.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (83) hide show

memnex/__init__.py +31 -0
memnex/__main__.py +6 -0
memnex/_plugin/.claude-plugin/plugin.json +24 -0
memnex/_plugin/.mcp.json +9 -0
memnex/_plugin/__init__.py +0 -0
memnex/_plugin/hooks/hooks.json +43 -0
memnex/_plugin/scripts/hook-runner.py +166 -0
memnex/_plugin/skills/mem-explore/SKILL.md +83 -0
memnex/_plugin/skills/mem-manage/SKILL.md +92 -0
memnex/_plugin/skills/mem-search/SKILL.md +85 -0
memnex/_plugin/skills/mem-write/SKILL.md +78 -0
memnex/adapters/__init__.py +14 -0
memnex/adapters/claude_skill.py +169 -0
memnex/adapters/cli.py +525 -0
memnex/adapters/http_api.py +314 -0
memnex/adapters/mcp_server.py +448 -0
memnex/compaction.py +563 -0
memnex/config.py +366 -0
memnex/core/__init__.py +13 -0
memnex/core/associator/__init__.py +8 -0
memnex/core/associator/domain_classifier.py +75 -0
memnex/core/associator/entity_aligner.py +127 -0
memnex/core/associator/ref_linker.py +197 -0
memnex/core/associator/term_mapper.py +77 -0
memnex/core/dictionaries/__init__.py +50 -0
memnex/core/engine.py +667 -0
memnex/core/extractors/__init__.py +15 -0
memnex/core/extractors/docx.py +97 -0
memnex/core/extractors/image.py +233 -0
memnex/core/extractors/markdown.py +139 -0
memnex/core/extractors/pdf.py +133 -0
memnex/core/extractors/vision_mapper.py +131 -0
memnex/core/handlers/__init__.py +7 -0
memnex/core/handlers/clipboard.py +40 -0
memnex/core/handlers/file_handler.py +62 -0
memnex/core/handlers/url_handler.py +132 -0
memnex/llm/__init__.py +25 -0
memnex/llm/enhancer.py +226 -0
memnex/llm/fallback_chain.py +87 -0
memnex/llm/injection_guard.py +178 -0
memnex/llm/provider.py +130 -0
memnex/llm/providers/__init__.py +22 -0
memnex/llm/providers/anthropic.py +135 -0
memnex/llm/providers/local.py +135 -0
memnex/llm/providers/rule_based.py +68 -0
memnex/llm/sanitizer.py +67 -0
memnex/models/__init__.py +68 -0
memnex/models/feedback.py +42 -0
memnex/models/graph.py +33 -0
memnex/models/memory.py +102 -0
memnex/models/misc.py +185 -0
memnex/models/paragraph.py +45 -0
memnex/models/search.py +51 -0
memnex/models/source.py +23 -0
memnex/models/task.py +62 -0
memnex/processing/__init__.py +1 -0
memnex/processing/graph_builder.py +278 -0
memnex/processing/merger/__init__.py +6 -0
memnex/processing/merger/confidence_calculator.py +127 -0
memnex/processing/merger/conflict_resolver.py +116 -0
memnex/retrieval/__init__.py +1 -0
memnex/retrieval/dedup.py +386 -0
memnex/retrieval/embedding.py +289 -0
memnex/retrieval/reranker.py +299 -0
memnex/service.py +902 -0
memnex/storage/__init__.py +65 -0
memnex/storage/base.py +132 -0
memnex/storage/changelog.py +106 -0
memnex/storage/feedback.py +486 -0
memnex/storage/lite/__init__.py +5 -0
memnex/storage/lite/store.py +606 -0
memnex/storage/vector.py +265 -0
memnex/wiki/__init__.py +11 -0
memnex/wiki/community.py +221 -0
memnex/wiki/compiler.py +545 -0
memnex/wiki/generator.py +270 -0
memnex/wiki/search.py +282 -0
memnex/worker.py +412 -0
memplex-3.2.0.dist-info/METADATA +37 -0
memplex-3.2.0.dist-info/RECORD +83 -0
memplex-3.2.0.dist-info/WHEEL +5 -0
memplex-3.2.0.dist-info/entry_points.txt +2 -0
memplex-3.2.0.dist-info/top_level.txt +1 -0

memnex/llm/providers/rule_based.py ADDED Viewed

@@ -0,0 +1,68 @@
+"""Rule-based LLM provider: zero-dependency fallback implementation."""
+import re
+from memnex.models import IntentType
+class RuleBasedProvider:
+    """Pure keyword-rule based LLM provider.
+    Zero external dependencies. Used as the final fallback when no real
+    LLM provider is available.
+    """
+    # Intent classification keyword mapping
+    _INTENT_KEYWORDS: dict[str, list[str]] = {
+        "understand": [
+            "what is", "what are", "explain", "how does", "how do",
+            "describe", "define", "tell me about", "什么是", "解释",
+            "描述", "如何", "怎么",
+        ],
+        "compare": [
+            "compare", "difference", "versus", "vs", "contrast",
+            "比较", "对比", "区别", "不同",
+        ],
+        "relation": [
+            "related", "connection", "linked", "between",
+            "关联", "关系", "联系", "连接",
+        ],
+    }
+    async def classify_intent(
+        self, query: str, context: dict | None = None
+    ) -> IntentType:
+        """Classify intent using keyword matching."""
+        q = query.lower()
+        for intent_name, keywords in self._INTENT_KEYWORDS.items():
+            for kw in keywords:
+                if kw in q:
+                    mapping = {
+                        "understand": IntentType.SYNTHESIS,
+                        "compare": IntentType.RELATION,
+                        "relation": IntentType.RELATION,
+                    }
+                    return mapping.get(intent_name, IntentType.IMMEDIATE)
+        return IntentType.IMMEDIATE
+    async def summarize(self, content: str, max_tokens: int = 256) -> str:
+        """Truncate content as a trivial summary."""
+        if len(content) <= max_tokens:
+            return content
+        return content[:max_tokens]
+    async def extract_structured(self, prompt: str, schema: dict) -> dict:
+        """Return empty dict -- no structured extraction without an LLM."""
+        return {}
+    async def generate_hypothetical(self, query: str) -> str:
+        """Return query unchanged -- no HyDE without an LLM."""
+        return query
+    async def complete(self, prompt: str) -> str:
+        """Return empty string -- no completion without an LLM."""
+        return ""
+    async def complete_json(self, prompt: str) -> dict:
+        """Return empty dict -- no JSON completion without an LLM."""
+        return {}

memnex/llm/sanitizer.py ADDED Viewed

@@ -0,0 +1,67 @@
+"""LLM input sanitization for safe prompt construction."""
+from __future__ import annotations
+import json
+import re
+import unicodedata
+from typing import Optional
+class LLMPromptSanitizer:
+    """Sanitize and structure LLM inputs to prevent injection and token overflow.
+    All public methods are static so the sanitizer can be used without
+    instantiation.
+    """
+    MAX_INPUT_LENGTH: int = 10000
+    @staticmethod
+    def sanitize(text: str, max_length: int = 10000) -> str:
+        """Sanitize raw text for LLM consumption.
+        Steps:
+        1. NFKC unicode normalization (eliminates visually-similar homoglyphs).
+        2. Zero-width character removal (U+200B, U+200C, U+200D, U+FEFF).
+        3. Length truncation to prevent token overflow.
+        """
+        text = unicodedata.normalize("NFKC", text)
+        # Remove zero-width characters
+        text = re.sub(r"[‌‍]", "", text)
+        if len(text) > max_length:
+            text = text[:max_length] + "...(truncated)"
+        return text
+    @staticmethod
+    def build_structured_prompt(
+        instruction: str,
+        user_input: str,
+        output_schema: Optional[dict] = None,
+        max_length: int = 10000,
+    ) -> str:
+        """Build a structured JSON prompt for safe LLM interaction.
+        The user input is embedded as a JSON value so ``json.dumps``
+        automatically escapes special characters, eliminating separator
+        escape and newline injection risks.
+        Parameters
+        ----------
+        instruction:
+            The system-level instruction for the LLM.
+        user_input:
+            Untrusted user content to be safely embedded.
+        output_schema:
+            Optional JSON schema describing the expected output format.
+        max_length:
+            Maximum character length for the sanitized user input.
+        """
+        safe_input = LLMPromptSanitizer.sanitize(user_input, max_length)
+        payload: dict = {
+            "instruction": instruction,
+            "user_input": safe_input,
+        }
+        if output_schema is not None:
+            payload["output_format"] = output_schema
+        return json.dumps(payload, ensure_ascii=False)

memnex/models/__init__.py ADDED Viewed

@@ -0,0 +1,68 @@
+"""MemNex data models."""
+from .source import (
+    SourceType,
+    SourceDocument,
+)
+from .graph import (
+    EdgeType,
+    GraphEdge,
+    GraphData,
+)
+from .search import (
+    QueryScope,
+    SearchResult,
+    SearchFilters,
+    QueryResult,
+)
+from .feedback import (
+    FeedbackVerdict,
+    MemoryFeedback,
+    PendingReview,
+)
+from .task import (
+    BackgroundTask,
+    TaskStatus,
+    TaskInfo,
+    CompactionScope,
+    CompactionStageResult,
+    CompactionResult,
+)
+from .misc import (
+    FieldValue,
+    ExtractedData,
+    MergeResult,
+    BatchResult,
+    ParagraphDelta,
+    DedupResult,
+    RefreshResult,
+    ValidationResult,
+    UpdateResult,
+    StorageStats,
+    ChangelogEvent,
+    IntentType,
+    EnhancedQuery,
+    Summary,
+    IncrementalState,
+    WikiPage,
+    WikiIndex,
+    LintIssue,
+    LintResult,
+    validate_func_id,
+    MAX_FUNC_ID_LENGTH,
+)
+from .memory import (
+    MemoryNode,
+    Function,
+    Fact,
+    Preference,
+    Observation,
+    Memory,
+    create_memory_node,
+)
+from .paragraph import (
+    Sentence,
+    SentenceRelation,
+    Paragraph,
+    ParagraphCollection,
+)

memnex/models/feedback.py ADDED Viewed

@@ -0,0 +1,42 @@
+"""Feedback types: FeedbackVerdict, MemoryFeedback, PendingReview."""
+from datetime import datetime
+from enum import Enum
+from dataclasses import dataclass, field
+from typing import List, Optional
+from memnex.models.misc import FieldValue
+class FeedbackVerdict(Enum):
+    CORRECT = "correct"
+    WRONG = "wrong"
+    ALTERNATIVE = "alternative"
+@dataclass
+class MemoryFeedback:
+    memory_id: str
+    field_role: str
+    value_index: int
+    verdict: FeedbackVerdict
+    reason: Optional[str] = None
+    source: str = "user"
+    timestamp: datetime = field(default_factory=datetime.now)
+    owner: Optional[str] = None
+    feedback_type: str = "field_value"
+    old_value: Optional[str] = None
+    new_value: Optional[str] = None
+    needs_review: bool = True
+    needs_review_until: Optional[datetime] = None
+    resolved_at: Optional[datetime] = None
+    resolution: Optional[str] = None
+@dataclass
+class PendingReview:
+    memory_id: str
+    field_role: str
+    conflicting_values: List[FieldValue] = field(default_factory=list)
+    detected_at: Optional[datetime] = None
+    source: str = ""

memnex/models/graph.py ADDED Viewed

@@ -0,0 +1,33 @@
+"""Graph types: EdgeType, GraphEdge, GraphData."""
+from datetime import datetime
+from enum import Enum
+from dataclasses import dataclass, field
+from typing import List, Optional
+class EdgeType(Enum):
+    REFERENCES = "REFERENCES"
+    ASSOCIATED_WITH = "ASSOCIATED_WITH"
+    SEMANTIC_SIMILAR = "SEMANTIC_SIMILAR"
+    BELONGS_TO = "BELONGS_TO"
+    IMPLEMENTS = "IMPLEMENTS"
+    DEPENDS_ON = "DEPENDS_ON"
+    CONFLICTS_WITH = "CONFLICTS_WITH"
+    EVOLVED_FROM = "EVOLVED_FROM"
+@dataclass
+class GraphEdge:
+    source: str
+    target: str
+    edge_type: str
+    weight: float = 1.0
+    evidence: List[str] = field(default_factory=list)
+    created_at: Optional[datetime] = None
+@dataclass
+class GraphData:
+    nodes: list = field(default_factory=list)  # List[MemoryNode]
+    edges: List[GraphEdge] = field(default_factory=list)

memnex/models/memory.py ADDED Viewed

@@ -0,0 +1,102 @@
+"""Memory node types: MemoryNode base + Function, Fact, Preference, Observation."""
+from dataclasses import dataclass, field
+from typing import List, Optional, ClassVar, Dict, Any
+from .source import SourceType
+from .misc import FieldValue, validate_func_id
+@dataclass(kw_only=True)
+class MemoryNode:
+    """Abstract base for all memory types."""
+    id: str = ""
+    memory_type: str = ""  # function | fact | preference | observation
+    name: str = ""
+    domain: Optional[str] = None
+    confidence: float = 1.0
+    source_type: SourceType = SourceType.WIKI
+    owner: Optional[str] = None
+    version: int = 1
+    created_at: Optional[str] = None
+    updated_at: Optional[str] = None
+    origin_session: Optional[str] = None
+    access_count: int = 0
+    last_accessed_at: Optional[str] = None
+    source_paragraphs: List[str] = field(default_factory=list)
+    needs_review: bool = False
+    needs_review_until: Optional[str] = None
+    content_hash: Optional[str] = None
+@dataclass
+class Function(MemoryNode):
+    """Procedural memory: actions/flows/interfaces with trigger/condition/action/benefit."""
+    memory_type: str = "function"
+    trigger: List[FieldValue] = field(default_factory=list)
+    condition: List[FieldValue] = field(default_factory=list)
+    action: List[FieldValue] = field(default_factory=list)
+    benefit: List[FieldValue] = field(default_factory=list)
+    name_normalized: str = ""
+    attributes: Dict[str, Any] = field(default_factory=dict)
+    cross_references: List[Dict] = field(default_factory=list)
+    priority_from_source: Optional[str] = None
+    source_authority: Optional[str] = None
+    MAX_VALUES_PER_FIELD: ClassVar[int] = 20
+    def __post_init__(self):
+        validate_func_id(self.id)
+        if not self.created_at:
+            from datetime import datetime
+            self.created_at = datetime.utcnow().isoformat()
+        if not self.updated_at:
+            from datetime import datetime
+            self.updated_at = self.created_at
+@dataclass
+class Fact(MemoryNode):
+    """Declarative memory: subject → predicate → object."""
+    memory_type: str = "fact"
+    subject: str = ""
+    predicate: str = ""
+    object_: str = ""
+    valid_until: Optional[str] = None
+@dataclass
+class Preference(MemoryNode):
+    """User/agent preference memory."""
+    memory_type: str = "preference"
+    aspect: str = ""
+    preference: str = ""
+    subject_id: Optional[str] = None
+@dataclass
+class Observation(MemoryNode):
+    """Runtime observation event memory."""
+    memory_type: str = "observation"
+    event: str = ""
+    context: str = ""
+    observed_at: Optional[str] = None
+    actor: str = "system"
+# Type alias: Memory = MemoryNode (emphasizes role in compaction pipeline)
+Memory = MemoryNode
+def create_memory_node(memory_type: str, **kwargs) -> MemoryNode:
+    """Factory: create the correct MemoryNode subclass by type string."""
+    cls_map = {
+        "function": Function,
+        "fact": Fact,
+        "preference": Preference,
+        "observation": Observation,
+    }
+    cls = cls_map.get(memory_type)
+    if not cls:
+        raise ValueError(f"Unknown memory_type: {memory_type!r}")
+    return cls(**kwargs)

memnex/models/misc.py ADDED Viewed

@@ -0,0 +1,185 @@
+"""Miscellaneous types: FieldValue, ChangelogEvent, MergeResult, and others."""
+import re
+from datetime import datetime
+from dataclasses import dataclass, field
+from typing import List, Optional, Dict, Any
+from enum import Enum
+from memnex.models.graph import GraphData
+from memnex.models.source import SourceType
+# ── Function ID validation ──────────────────────────────────────
+MAX_FUNC_ID_LENGTH = 128
+_FUNC_ID_PATTERN = re.compile(r'^[a-zA-Z0-9_-]+$')
+def validate_func_id(func_id: str) -> str:
+    if len(func_id) > MAX_FUNC_ID_LENGTH:
+        raise ValueError(f"Function ID 过长: {len(func_id)} > {MAX_FUNC_ID_LENGTH}")
+    if not _FUNC_ID_PATTERN.fullmatch(func_id):
+        raise ValueError(f"Function ID 含非法字符: {func_id!r}")
+    return func_id
+# ── FieldValue (multi-value field entry) ────────────────────────
+@dataclass
+class FieldValue:
+    desc: str
+    sources: List[str] = field(default_factory=list)
+    source_method: str = "rule_based"  # rule_based | llm_semantic | manual
+    weight: float = 1.0
+    observation: Optional[float] = None
+    created_at: Optional[datetime] = None
+    status: str = "active"  # active | deprecated | disputed
+# ── Auxiliary types ─────────────────────────────────────────────
+@dataclass
+class ExtractedData:
+    functions: list = field(default_factory=list)  # List[MemoryNode]
+    graph: GraphData = field(default_factory=GraphData)
+    delta: bool = False
+@dataclass
+class MergeResult:
+    merged: bool
+    new_functions: int = 0
+    updated_functions: int = 0
+    new_conflicts: int = 0
+    new_edges: int = 0
+@dataclass
+class BatchResult:
+    total: int = 0
+    succeeded: int = 0
+    failed_items: List[Dict] = field(default_factory=list)
+@dataclass
+class ParagraphDelta:
+    added: List[str] = field(default_factory=list)
+    removed: List[str] = field(default_factory=list)
+    modified: List[str] = field(default_factory=list)
+@dataclass
+class DedupResult:
+    original_count: int
+    final_count: int
+    exact_removed: int
+    semantic_removed: int
+    deduplicated: list = field(default_factory=list)
+@dataclass
+class RefreshResult:
+    total: int
+    refreshed: int
+@dataclass
+class ValidationResult:
+    valid: bool
+    issue: Optional[str] = None
+    truncated_content: Optional[str] = None
+@dataclass
+class UpdateResult:
+    memory_id: str
+    role: str
+    old_value: Optional[str] = None
+    new_value: str = ""
+    version: int = 0
+    success: bool = False
+    error: Optional[str] = None
+@dataclass
+class StorageStats:
+    total_functions: int
+    total_edges: int
+    total_observations: int
+    storage_size_mb: float
+    last_compaction: Optional[datetime] = None
+# ── Changelog types ─────────────────────────────────────────────
+@dataclass
+class ChangelogEvent:
+    func_id: str
+    timestamp: datetime
+    event_type: str  # created | updated | merged | field_added
+    description: str
+    source: str
+    actor: str  # user | ai | system
+# ── LLM types ──────────────────────────────────────────────────
+class IntentType(Enum):
+    IMMEDIATE = "immediate"
+    SYNTHESIS = "synthesis"
+    RELATION = "relation"
+    ALL = "all"
+@dataclass
+class EnhancedQuery:
+    original: str
+    expanded: List[str] = field(default_factory=list)
+    intent: str = "search"
+@dataclass
+class Summary:
+    key_points: List[str] = field(default_factory=list)
+    patterns: List[str] = field(default_factory=list)
+    changes: List[str] = field(default_factory=list)
+# ── Incremental types ──────────────────────────────────────────
+@dataclass
+class IncrementalState:
+    source_id: str
+    last_hash: Optional[str] = None
+    last_paragraphs: List[str] = field(default_factory=list)
+    processed_at: Optional[datetime] = None
+# ── Wiki types ─────────────────────────────────────────────────
+@dataclass
+class WikiPage:
+    page_id: str
+    content: str
+    metadata: dict = field(default_factory=dict)
+@dataclass
+class WikiIndex:
+    pages: List[WikiPage] = field(default_factory=list)
+    total: int = 0
+@dataclass
+class LintIssue:
+    page_id: str
+    severity: str  # error | warning
+    message: str
+@dataclass
+class LintResult:
+    total_pages: int
+    issues: List[LintIssue] = field(default_factory=list)
+    passed: bool = True

memnex/models/paragraph.py ADDED Viewed

@@ -0,0 +1,45 @@
+"""L1: Paragraph and Sentence models."""
+from dataclasses import dataclass, field
+from typing import List, Optional
+@dataclass
+class Sentence:
+    id: str
+    text: str
+    role: str  # trigger, condition, action, result
+@dataclass
+class SentenceRelation:
+    from_id: str
+    to_id: str
+    type: str  # if_then, cause_effect, etc.
+@dataclass
+class Paragraph:
+    id: str
+    source: str  # "filename.md#3.2.1"
+    section: str
+    raw_text: str
+    semantic_unit: bool = True
+    sentences: List[Sentence] = field(default_factory=list)
+    sentence_relations: List[SentenceRelation] = field(default_factory=list)
+    confidence: float = 1.0
+    needs_review: bool = False
+@dataclass
+class ParagraphCollection:
+    paragraphs: List[Paragraph] = field(default_factory=list)
+    def add(self, paragraph: Paragraph):
+        self.paragraphs.append(paragraph)
+    def get_by_id(self, para_id: str) -> Optional[Paragraph]:
+        for p in self.paragraphs:
+            if p.id in (f"para_{para_id}", para_id):
+                return p
+        return None

memnex/models/search.py ADDED Viewed

@@ -0,0 +1,51 @@
+"""Search types: QueryScope, SearchResult, SearchFilters, QueryResult."""
+from datetime import datetime
+from dataclasses import dataclass, field
+from typing import List, Optional, Dict, Any
+from enum import Enum
+from memnex.models.source import SourceType
+class QueryScope(Enum):
+    IMMEDIATE = "immediate"
+    SYNTHESIS = "synthesis"
+    RELATION = "relation"
+    ALL = "all"
+@dataclass
+class SearchResult:
+    func_id: str
+    name: str
+    domain: str
+    relevance_score: float
+    summary: str
+    source_type: SourceType = SourceType.WIKI
+    created_at: Optional[datetime] = None
+    updated_at: Optional[datetime] = None
+    origin: str = ""
+    vector_cache: Any = None
+    token_estimate: int = 0
+    graph_context: Optional[Dict] = None
+@dataclass
+class SearchFilters:
+    domain: Optional[List[str]] = None
+    source_type: Optional[List[SourceType]] = None
+    confidence_min: Optional[float] = None
+    updated_after: Optional[datetime] = None
+    updated_before: Optional[datetime] = None
+    needs_review: Optional[bool] = None
+    owner: Optional[str] = None
+@dataclass
+class QueryResult:
+    results: List[SearchResult]
+    scope: QueryScope
+    latency_ms: int
+    tokens_used: int = 0
+    truncated: bool = False

memnex/models/source.py ADDED Viewed

@@ -0,0 +1,23 @@
+"""Source types: SourceType, SourceDocument."""
+from enum import Enum
+from dataclasses import dataclass
+from typing import Optional
+class SourceType(Enum):
+    REQUIREMENT = "requirement"
+    MEETING = "meeting"
+    CODE = "code"
+    WIKI = "wiki"
+@dataclass
+class SourceDocument:
+    type: str  # text | file | url | clipboard
+    content: Optional[str] = None
+    source_path: Optional[str] = None
+    content_hash: Optional[str] = None
+    url: Optional[str] = None
+    vision: Optional[dict] = None
+    source_type: SourceType = SourceType.WIKI