memplex 3.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. memnex/__init__.py +31 -0
  2. memnex/__main__.py +6 -0
  3. memnex/_plugin/.claude-plugin/plugin.json +24 -0
  4. memnex/_plugin/.mcp.json +9 -0
  5. memnex/_plugin/__init__.py +0 -0
  6. memnex/_plugin/hooks/hooks.json +43 -0
  7. memnex/_plugin/scripts/hook-runner.py +166 -0
  8. memnex/_plugin/skills/mem-explore/SKILL.md +83 -0
  9. memnex/_plugin/skills/mem-manage/SKILL.md +92 -0
  10. memnex/_plugin/skills/mem-search/SKILL.md +85 -0
  11. memnex/_plugin/skills/mem-write/SKILL.md +78 -0
  12. memnex/adapters/__init__.py +14 -0
  13. memnex/adapters/claude_skill.py +169 -0
  14. memnex/adapters/cli.py +525 -0
  15. memnex/adapters/http_api.py +314 -0
  16. memnex/adapters/mcp_server.py +448 -0
  17. memnex/compaction.py +563 -0
  18. memnex/config.py +366 -0
  19. memnex/core/__init__.py +13 -0
  20. memnex/core/associator/__init__.py +8 -0
  21. memnex/core/associator/domain_classifier.py +75 -0
  22. memnex/core/associator/entity_aligner.py +127 -0
  23. memnex/core/associator/ref_linker.py +197 -0
  24. memnex/core/associator/term_mapper.py +77 -0
  25. memnex/core/dictionaries/__init__.py +50 -0
  26. memnex/core/engine.py +667 -0
  27. memnex/core/extractors/__init__.py +15 -0
  28. memnex/core/extractors/docx.py +97 -0
  29. memnex/core/extractors/image.py +233 -0
  30. memnex/core/extractors/markdown.py +139 -0
  31. memnex/core/extractors/pdf.py +133 -0
  32. memnex/core/extractors/vision_mapper.py +131 -0
  33. memnex/core/handlers/__init__.py +7 -0
  34. memnex/core/handlers/clipboard.py +40 -0
  35. memnex/core/handlers/file_handler.py +62 -0
  36. memnex/core/handlers/url_handler.py +132 -0
  37. memnex/llm/__init__.py +25 -0
  38. memnex/llm/enhancer.py +226 -0
  39. memnex/llm/fallback_chain.py +87 -0
  40. memnex/llm/injection_guard.py +178 -0
  41. memnex/llm/provider.py +130 -0
  42. memnex/llm/providers/__init__.py +22 -0
  43. memnex/llm/providers/anthropic.py +135 -0
  44. memnex/llm/providers/local.py +135 -0
  45. memnex/llm/providers/rule_based.py +68 -0
  46. memnex/llm/sanitizer.py +67 -0
  47. memnex/models/__init__.py +68 -0
  48. memnex/models/feedback.py +42 -0
  49. memnex/models/graph.py +33 -0
  50. memnex/models/memory.py +102 -0
  51. memnex/models/misc.py +185 -0
  52. memnex/models/paragraph.py +45 -0
  53. memnex/models/search.py +51 -0
  54. memnex/models/source.py +23 -0
  55. memnex/models/task.py +62 -0
  56. memnex/processing/__init__.py +1 -0
  57. memnex/processing/graph_builder.py +278 -0
  58. memnex/processing/merger/__init__.py +6 -0
  59. memnex/processing/merger/confidence_calculator.py +127 -0
  60. memnex/processing/merger/conflict_resolver.py +116 -0
  61. memnex/retrieval/__init__.py +1 -0
  62. memnex/retrieval/dedup.py +386 -0
  63. memnex/retrieval/embedding.py +289 -0
  64. memnex/retrieval/reranker.py +299 -0
  65. memnex/service.py +902 -0
  66. memnex/storage/__init__.py +65 -0
  67. memnex/storage/base.py +132 -0
  68. memnex/storage/changelog.py +106 -0
  69. memnex/storage/feedback.py +486 -0
  70. memnex/storage/lite/__init__.py +5 -0
  71. memnex/storage/lite/store.py +606 -0
  72. memnex/storage/vector.py +265 -0
  73. memnex/wiki/__init__.py +11 -0
  74. memnex/wiki/community.py +221 -0
  75. memnex/wiki/compiler.py +545 -0
  76. memnex/wiki/generator.py +270 -0
  77. memnex/wiki/search.py +282 -0
  78. memnex/worker.py +412 -0
  79. memplex-3.2.0.dist-info/METADATA +37 -0
  80. memplex-3.2.0.dist-info/RECORD +83 -0
  81. memplex-3.2.0.dist-info/WHEEL +5 -0
  82. memplex-3.2.0.dist-info/entry_points.txt +2 -0
  83. memplex-3.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,68 @@
1
+ """Rule-based LLM provider: zero-dependency fallback implementation."""
2
+
3
+ import re
4
+
5
+ from memnex.models import IntentType
6
+
7
+
8
+ class RuleBasedProvider:
9
+ """Pure keyword-rule based LLM provider.
10
+
11
+ Zero external dependencies. Used as the final fallback when no real
12
+ LLM provider is available.
13
+ """
14
+
15
+ # Intent classification keyword mapping
16
+ _INTENT_KEYWORDS: dict[str, list[str]] = {
17
+ "understand": [
18
+ "what is", "what are", "explain", "how does", "how do",
19
+ "describe", "define", "tell me about", "什么是", "解释",
20
+ "描述", "如何", "怎么",
21
+ ],
22
+ "compare": [
23
+ "compare", "difference", "versus", "vs", "contrast",
24
+ "比较", "对比", "区别", "不同",
25
+ ],
26
+ "relation": [
27
+ "related", "connection", "linked", "between",
28
+ "关联", "关系", "联系", "连接",
29
+ ],
30
+ }
31
+
32
+ async def classify_intent(
33
+ self, query: str, context: dict | None = None
34
+ ) -> IntentType:
35
+ """Classify intent using keyword matching."""
36
+ q = query.lower()
37
+ for intent_name, keywords in self._INTENT_KEYWORDS.items():
38
+ for kw in keywords:
39
+ if kw in q:
40
+ mapping = {
41
+ "understand": IntentType.SYNTHESIS,
42
+ "compare": IntentType.RELATION,
43
+ "relation": IntentType.RELATION,
44
+ }
45
+ return mapping.get(intent_name, IntentType.IMMEDIATE)
46
+ return IntentType.IMMEDIATE
47
+
48
+ async def summarize(self, content: str, max_tokens: int = 256) -> str:
49
+ """Truncate content as a trivial summary."""
50
+ if len(content) <= max_tokens:
51
+ return content
52
+ return content[:max_tokens]
53
+
54
+ async def extract_structured(self, prompt: str, schema: dict) -> dict:
55
+ """Return empty dict -- no structured extraction without an LLM."""
56
+ return {}
57
+
58
+ async def generate_hypothetical(self, query: str) -> str:
59
+ """Return query unchanged -- no HyDE without an LLM."""
60
+ return query
61
+
62
+ async def complete(self, prompt: str) -> str:
63
+ """Return empty string -- no completion without an LLM."""
64
+ return ""
65
+
66
+ async def complete_json(self, prompt: str) -> dict:
67
+ """Return empty dict -- no JSON completion without an LLM."""
68
+ return {}
@@ -0,0 +1,67 @@
1
+ """LLM input sanitization for safe prompt construction."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import re
7
+ import unicodedata
8
+ from typing import Optional
9
+
10
+
11
+ class LLMPromptSanitizer:
12
+ """Sanitize and structure LLM inputs to prevent injection and token overflow.
13
+
14
+ All public methods are static so the sanitizer can be used without
15
+ instantiation.
16
+ """
17
+
18
+ MAX_INPUT_LENGTH: int = 10000
19
+
20
+ @staticmethod
21
+ def sanitize(text: str, max_length: int = 10000) -> str:
22
+ """Sanitize raw text for LLM consumption.
23
+
24
+ Steps:
25
+ 1. NFKC unicode normalization (eliminates visually-similar homoglyphs).
26
+ 2. Zero-width character removal (U+200B, U+200C, U+200D, U+FEFF).
27
+ 3. Length truncation to prevent token overflow.
28
+ """
29
+ text = unicodedata.normalize("NFKC", text)
30
+ # Remove zero-width characters
31
+ text = re.sub(r"[​‌‍]", "", text)
32
+ if len(text) > max_length:
33
+ text = text[:max_length] + "...(truncated)"
34
+ return text
35
+
36
+ @staticmethod
37
+ def build_structured_prompt(
38
+ instruction: str,
39
+ user_input: str,
40
+ output_schema: Optional[dict] = None,
41
+ max_length: int = 10000,
42
+ ) -> str:
43
+ """Build a structured JSON prompt for safe LLM interaction.
44
+
45
+ The user input is embedded as a JSON value so ``json.dumps``
46
+ automatically escapes special characters, eliminating separator
47
+ escape and newline injection risks.
48
+
49
+ Parameters
50
+ ----------
51
+ instruction:
52
+ The system-level instruction for the LLM.
53
+ user_input:
54
+ Untrusted user content to be safely embedded.
55
+ output_schema:
56
+ Optional JSON schema describing the expected output format.
57
+ max_length:
58
+ Maximum character length for the sanitized user input.
59
+ """
60
+ safe_input = LLMPromptSanitizer.sanitize(user_input, max_length)
61
+ payload: dict = {
62
+ "instruction": instruction,
63
+ "user_input": safe_input,
64
+ }
65
+ if output_schema is not None:
66
+ payload["output_format"] = output_schema
67
+ return json.dumps(payload, ensure_ascii=False)
@@ -0,0 +1,68 @@
1
+ """MemNex data models."""
2
+
3
+ from .source import (
4
+ SourceType,
5
+ SourceDocument,
6
+ )
7
+ from .graph import (
8
+ EdgeType,
9
+ GraphEdge,
10
+ GraphData,
11
+ )
12
+ from .search import (
13
+ QueryScope,
14
+ SearchResult,
15
+ SearchFilters,
16
+ QueryResult,
17
+ )
18
+ from .feedback import (
19
+ FeedbackVerdict,
20
+ MemoryFeedback,
21
+ PendingReview,
22
+ )
23
+ from .task import (
24
+ BackgroundTask,
25
+ TaskStatus,
26
+ TaskInfo,
27
+ CompactionScope,
28
+ CompactionStageResult,
29
+ CompactionResult,
30
+ )
31
+ from .misc import (
32
+ FieldValue,
33
+ ExtractedData,
34
+ MergeResult,
35
+ BatchResult,
36
+ ParagraphDelta,
37
+ DedupResult,
38
+ RefreshResult,
39
+ ValidationResult,
40
+ UpdateResult,
41
+ StorageStats,
42
+ ChangelogEvent,
43
+ IntentType,
44
+ EnhancedQuery,
45
+ Summary,
46
+ IncrementalState,
47
+ WikiPage,
48
+ WikiIndex,
49
+ LintIssue,
50
+ LintResult,
51
+ validate_func_id,
52
+ MAX_FUNC_ID_LENGTH,
53
+ )
54
+ from .memory import (
55
+ MemoryNode,
56
+ Function,
57
+ Fact,
58
+ Preference,
59
+ Observation,
60
+ Memory,
61
+ create_memory_node,
62
+ )
63
+ from .paragraph import (
64
+ Sentence,
65
+ SentenceRelation,
66
+ Paragraph,
67
+ ParagraphCollection,
68
+ )
@@ -0,0 +1,42 @@
1
+ """Feedback types: FeedbackVerdict, MemoryFeedback, PendingReview."""
2
+
3
+ from datetime import datetime
4
+ from enum import Enum
5
+ from dataclasses import dataclass, field
6
+ from typing import List, Optional
7
+
8
+ from memnex.models.misc import FieldValue
9
+
10
+
11
+ class FeedbackVerdict(Enum):
12
+ CORRECT = "correct"
13
+ WRONG = "wrong"
14
+ ALTERNATIVE = "alternative"
15
+
16
+
17
+ @dataclass
18
+ class MemoryFeedback:
19
+ memory_id: str
20
+ field_role: str
21
+ value_index: int
22
+ verdict: FeedbackVerdict
23
+ reason: Optional[str] = None
24
+ source: str = "user"
25
+ timestamp: datetime = field(default_factory=datetime.now)
26
+ owner: Optional[str] = None
27
+ feedback_type: str = "field_value"
28
+ old_value: Optional[str] = None
29
+ new_value: Optional[str] = None
30
+ needs_review: bool = True
31
+ needs_review_until: Optional[datetime] = None
32
+ resolved_at: Optional[datetime] = None
33
+ resolution: Optional[str] = None
34
+
35
+
36
+ @dataclass
37
+ class PendingReview:
38
+ memory_id: str
39
+ field_role: str
40
+ conflicting_values: List[FieldValue] = field(default_factory=list)
41
+ detected_at: Optional[datetime] = None
42
+ source: str = ""
memnex/models/graph.py ADDED
@@ -0,0 +1,33 @@
1
+ """Graph types: EdgeType, GraphEdge, GraphData."""
2
+
3
+ from datetime import datetime
4
+ from enum import Enum
5
+ from dataclasses import dataclass, field
6
+ from typing import List, Optional
7
+
8
+
9
+ class EdgeType(Enum):
10
+ REFERENCES = "REFERENCES"
11
+ ASSOCIATED_WITH = "ASSOCIATED_WITH"
12
+ SEMANTIC_SIMILAR = "SEMANTIC_SIMILAR"
13
+ BELONGS_TO = "BELONGS_TO"
14
+ IMPLEMENTS = "IMPLEMENTS"
15
+ DEPENDS_ON = "DEPENDS_ON"
16
+ CONFLICTS_WITH = "CONFLICTS_WITH"
17
+ EVOLVED_FROM = "EVOLVED_FROM"
18
+
19
+
20
+ @dataclass
21
+ class GraphEdge:
22
+ source: str
23
+ target: str
24
+ edge_type: str
25
+ weight: float = 1.0
26
+ evidence: List[str] = field(default_factory=list)
27
+ created_at: Optional[datetime] = None
28
+
29
+
30
+ @dataclass
31
+ class GraphData:
32
+ nodes: list = field(default_factory=list) # List[MemoryNode]
33
+ edges: List[GraphEdge] = field(default_factory=list)
@@ -0,0 +1,102 @@
1
+ """Memory node types: MemoryNode base + Function, Fact, Preference, Observation."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import List, Optional, ClassVar, Dict, Any
5
+
6
+ from .source import SourceType
7
+ from .misc import FieldValue, validate_func_id
8
+
9
+
10
+ @dataclass(kw_only=True)
11
+ class MemoryNode:
12
+ """Abstract base for all memory types."""
13
+ id: str = ""
14
+ memory_type: str = "" # function | fact | preference | observation
15
+ name: str = ""
16
+ domain: Optional[str] = None
17
+ confidence: float = 1.0
18
+ source_type: SourceType = SourceType.WIKI
19
+ owner: Optional[str] = None
20
+ version: int = 1
21
+ created_at: Optional[str] = None
22
+ updated_at: Optional[str] = None
23
+ origin_session: Optional[str] = None
24
+ access_count: int = 0
25
+ last_accessed_at: Optional[str] = None
26
+ source_paragraphs: List[str] = field(default_factory=list)
27
+ needs_review: bool = False
28
+ needs_review_until: Optional[str] = None
29
+ content_hash: Optional[str] = None
30
+
31
+
32
+ @dataclass
33
+ class Function(MemoryNode):
34
+ """Procedural memory: actions/flows/interfaces with trigger/condition/action/benefit."""
35
+ memory_type: str = "function"
36
+ trigger: List[FieldValue] = field(default_factory=list)
37
+ condition: List[FieldValue] = field(default_factory=list)
38
+ action: List[FieldValue] = field(default_factory=list)
39
+ benefit: List[FieldValue] = field(default_factory=list)
40
+ name_normalized: str = ""
41
+ attributes: Dict[str, Any] = field(default_factory=dict)
42
+ cross_references: List[Dict] = field(default_factory=list)
43
+ priority_from_source: Optional[str] = None
44
+ source_authority: Optional[str] = None
45
+
46
+ MAX_VALUES_PER_FIELD: ClassVar[int] = 20
47
+
48
+ def __post_init__(self):
49
+ validate_func_id(self.id)
50
+ if not self.created_at:
51
+ from datetime import datetime
52
+ self.created_at = datetime.utcnow().isoformat()
53
+ if not self.updated_at:
54
+ from datetime import datetime
55
+ self.updated_at = self.created_at
56
+
57
+
58
+ @dataclass
59
+ class Fact(MemoryNode):
60
+ """Declarative memory: subject → predicate → object."""
61
+ memory_type: str = "fact"
62
+ subject: str = ""
63
+ predicate: str = ""
64
+ object_: str = ""
65
+ valid_until: Optional[str] = None
66
+
67
+
68
+ @dataclass
69
+ class Preference(MemoryNode):
70
+ """User/agent preference memory."""
71
+ memory_type: str = "preference"
72
+ aspect: str = ""
73
+ preference: str = ""
74
+ subject_id: Optional[str] = None
75
+
76
+
77
+ @dataclass
78
+ class Observation(MemoryNode):
79
+ """Runtime observation event memory."""
80
+ memory_type: str = "observation"
81
+ event: str = ""
82
+ context: str = ""
83
+ observed_at: Optional[str] = None
84
+ actor: str = "system"
85
+
86
+
87
+ # Type alias: Memory = MemoryNode (emphasizes role in compaction pipeline)
88
+ Memory = MemoryNode
89
+
90
+
91
+ def create_memory_node(memory_type: str, **kwargs) -> MemoryNode:
92
+ """Factory: create the correct MemoryNode subclass by type string."""
93
+ cls_map = {
94
+ "function": Function,
95
+ "fact": Fact,
96
+ "preference": Preference,
97
+ "observation": Observation,
98
+ }
99
+ cls = cls_map.get(memory_type)
100
+ if not cls:
101
+ raise ValueError(f"Unknown memory_type: {memory_type!r}")
102
+ return cls(**kwargs)
memnex/models/misc.py ADDED
@@ -0,0 +1,185 @@
1
+ """Miscellaneous types: FieldValue, ChangelogEvent, MergeResult, and others."""
2
+
3
+ import re
4
+ from datetime import datetime
5
+ from dataclasses import dataclass, field
6
+ from typing import List, Optional, Dict, Any
7
+ from enum import Enum
8
+
9
+ from memnex.models.graph import GraphData
10
+ from memnex.models.source import SourceType
11
+
12
+
13
+ # ── Function ID validation ──────────────────────────────────────
14
+
15
+ MAX_FUNC_ID_LENGTH = 128
16
+ _FUNC_ID_PATTERN = re.compile(r'^[a-zA-Z0-9_-]+$')
17
+
18
+
19
+ def validate_func_id(func_id: str) -> str:
20
+ if len(func_id) > MAX_FUNC_ID_LENGTH:
21
+ raise ValueError(f"Function ID 过长: {len(func_id)} > {MAX_FUNC_ID_LENGTH}")
22
+ if not _FUNC_ID_PATTERN.fullmatch(func_id):
23
+ raise ValueError(f"Function ID 含非法字符: {func_id!r}")
24
+ return func_id
25
+
26
+
27
+ # ── FieldValue (multi-value field entry) ────────────────────────
28
+
29
+ @dataclass
30
+ class FieldValue:
31
+ desc: str
32
+ sources: List[str] = field(default_factory=list)
33
+ source_method: str = "rule_based" # rule_based | llm_semantic | manual
34
+ weight: float = 1.0
35
+ observation: Optional[float] = None
36
+ created_at: Optional[datetime] = None
37
+ status: str = "active" # active | deprecated | disputed
38
+
39
+
40
+ # ── Auxiliary types ─────────────────────────────────────────────
41
+
42
+ @dataclass
43
+ class ExtractedData:
44
+ functions: list = field(default_factory=list) # List[MemoryNode]
45
+ graph: GraphData = field(default_factory=GraphData)
46
+ delta: bool = False
47
+
48
+
49
+ @dataclass
50
+ class MergeResult:
51
+ merged: bool
52
+ new_functions: int = 0
53
+ updated_functions: int = 0
54
+ new_conflicts: int = 0
55
+ new_edges: int = 0
56
+
57
+
58
+ @dataclass
59
+ class BatchResult:
60
+ total: int = 0
61
+ succeeded: int = 0
62
+ failed_items: List[Dict] = field(default_factory=list)
63
+
64
+
65
+ @dataclass
66
+ class ParagraphDelta:
67
+ added: List[str] = field(default_factory=list)
68
+ removed: List[str] = field(default_factory=list)
69
+ modified: List[str] = field(default_factory=list)
70
+
71
+
72
+ @dataclass
73
+ class DedupResult:
74
+ original_count: int
75
+ final_count: int
76
+ exact_removed: int
77
+ semantic_removed: int
78
+ deduplicated: list = field(default_factory=list)
79
+
80
+
81
+ @dataclass
82
+ class RefreshResult:
83
+ total: int
84
+ refreshed: int
85
+
86
+
87
+ @dataclass
88
+ class ValidationResult:
89
+ valid: bool
90
+ issue: Optional[str] = None
91
+ truncated_content: Optional[str] = None
92
+
93
+
94
+ @dataclass
95
+ class UpdateResult:
96
+ memory_id: str
97
+ role: str
98
+ old_value: Optional[str] = None
99
+ new_value: str = ""
100
+ version: int = 0
101
+ success: bool = False
102
+ error: Optional[str] = None
103
+
104
+
105
+ @dataclass
106
+ class StorageStats:
107
+ total_functions: int
108
+ total_edges: int
109
+ total_observations: int
110
+ storage_size_mb: float
111
+ last_compaction: Optional[datetime] = None
112
+
113
+
114
+ # ── Changelog types ─────────────────────────────────────────────
115
+
116
+ @dataclass
117
+ class ChangelogEvent:
118
+ func_id: str
119
+ timestamp: datetime
120
+ event_type: str # created | updated | merged | field_added
121
+ description: str
122
+ source: str
123
+ actor: str # user | ai | system
124
+
125
+
126
+ # ── LLM types ──────────────────────────────────────────────────
127
+
128
+ class IntentType(Enum):
129
+ IMMEDIATE = "immediate"
130
+ SYNTHESIS = "synthesis"
131
+ RELATION = "relation"
132
+ ALL = "all"
133
+
134
+
135
+ @dataclass
136
+ class EnhancedQuery:
137
+ original: str
138
+ expanded: List[str] = field(default_factory=list)
139
+ intent: str = "search"
140
+
141
+
142
+ @dataclass
143
+ class Summary:
144
+ key_points: List[str] = field(default_factory=list)
145
+ patterns: List[str] = field(default_factory=list)
146
+ changes: List[str] = field(default_factory=list)
147
+
148
+
149
+ # ── Incremental types ──────────────────────────────────────────
150
+
151
+ @dataclass
152
+ class IncrementalState:
153
+ source_id: str
154
+ last_hash: Optional[str] = None
155
+ last_paragraphs: List[str] = field(default_factory=list)
156
+ processed_at: Optional[datetime] = None
157
+
158
+
159
+ # ── Wiki types ─────────────────────────────────────────────────
160
+
161
+ @dataclass
162
+ class WikiPage:
163
+ page_id: str
164
+ content: str
165
+ metadata: dict = field(default_factory=dict)
166
+
167
+
168
+ @dataclass
169
+ class WikiIndex:
170
+ pages: List[WikiPage] = field(default_factory=list)
171
+ total: int = 0
172
+
173
+
174
+ @dataclass
175
+ class LintIssue:
176
+ page_id: str
177
+ severity: str # error | warning
178
+ message: str
179
+
180
+
181
+ @dataclass
182
+ class LintResult:
183
+ total_pages: int
184
+ issues: List[LintIssue] = field(default_factory=list)
185
+ passed: bool = True
@@ -0,0 +1,45 @@
1
+ """L1: Paragraph and Sentence models."""
2
+
3
+ from dataclasses import dataclass, field
4
+ from typing import List, Optional
5
+
6
+
7
+ @dataclass
8
+ class Sentence:
9
+ id: str
10
+ text: str
11
+ role: str # trigger, condition, action, result
12
+
13
+
14
+ @dataclass
15
+ class SentenceRelation:
16
+ from_id: str
17
+ to_id: str
18
+ type: str # if_then, cause_effect, etc.
19
+
20
+
21
+ @dataclass
22
+ class Paragraph:
23
+ id: str
24
+ source: str # "filename.md#3.2.1"
25
+ section: str
26
+ raw_text: str
27
+ semantic_unit: bool = True
28
+ sentences: List[Sentence] = field(default_factory=list)
29
+ sentence_relations: List[SentenceRelation] = field(default_factory=list)
30
+ confidence: float = 1.0
31
+ needs_review: bool = False
32
+
33
+
34
+ @dataclass
35
+ class ParagraphCollection:
36
+ paragraphs: List[Paragraph] = field(default_factory=list)
37
+
38
+ def add(self, paragraph: Paragraph):
39
+ self.paragraphs.append(paragraph)
40
+
41
+ def get_by_id(self, para_id: str) -> Optional[Paragraph]:
42
+ for p in self.paragraphs:
43
+ if p.id in (f"para_{para_id}", para_id):
44
+ return p
45
+ return None
@@ -0,0 +1,51 @@
1
+ """Search types: QueryScope, SearchResult, SearchFilters, QueryResult."""
2
+
3
+ from datetime import datetime
4
+ from dataclasses import dataclass, field
5
+ from typing import List, Optional, Dict, Any
6
+ from enum import Enum
7
+
8
+ from memnex.models.source import SourceType
9
+
10
+
11
+ class QueryScope(Enum):
12
+ IMMEDIATE = "immediate"
13
+ SYNTHESIS = "synthesis"
14
+ RELATION = "relation"
15
+ ALL = "all"
16
+
17
+
18
+ @dataclass
19
+ class SearchResult:
20
+ func_id: str
21
+ name: str
22
+ domain: str
23
+ relevance_score: float
24
+ summary: str
25
+ source_type: SourceType = SourceType.WIKI
26
+ created_at: Optional[datetime] = None
27
+ updated_at: Optional[datetime] = None
28
+ origin: str = ""
29
+ vector_cache: Any = None
30
+ token_estimate: int = 0
31
+ graph_context: Optional[Dict] = None
32
+
33
+
34
+ @dataclass
35
+ class SearchFilters:
36
+ domain: Optional[List[str]] = None
37
+ source_type: Optional[List[SourceType]] = None
38
+ confidence_min: Optional[float] = None
39
+ updated_after: Optional[datetime] = None
40
+ updated_before: Optional[datetime] = None
41
+ needs_review: Optional[bool] = None
42
+ owner: Optional[str] = None
43
+
44
+
45
+ @dataclass
46
+ class QueryResult:
47
+ results: List[SearchResult]
48
+ scope: QueryScope
49
+ latency_ms: int
50
+ tokens_used: int = 0
51
+ truncated: bool = False
@@ -0,0 +1,23 @@
1
+ """Source types: SourceType, SourceDocument."""
2
+
3
+ from enum import Enum
4
+ from dataclasses import dataclass
5
+ from typing import Optional
6
+
7
+
8
+ class SourceType(Enum):
9
+ REQUIREMENT = "requirement"
10
+ MEETING = "meeting"
11
+ CODE = "code"
12
+ WIKI = "wiki"
13
+
14
+
15
+ @dataclass
16
+ class SourceDocument:
17
+ type: str # text | file | url | clipboard
18
+ content: Optional[str] = None
19
+ source_path: Optional[str] = None
20
+ content_hash: Optional[str] = None
21
+ url: Optional[str] = None
22
+ vision: Optional[dict] = None
23
+ source_type: SourceType = SourceType.WIKI