memorisdk 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of memorisdk might be problematic. Click here for more details.
- memoriai/__init__.py +140 -0
- memoriai/agents/__init__.py +7 -0
- memoriai/agents/conscious_agent.py +506 -0
- memoriai/agents/memory_agent.py +322 -0
- memoriai/agents/retrieval_agent.py +579 -0
- memoriai/config/__init__.py +14 -0
- memoriai/config/manager.py +281 -0
- memoriai/config/settings.py +287 -0
- memoriai/core/__init__.py +6 -0
- memoriai/core/database.py +966 -0
- memoriai/core/memory.py +1349 -0
- memoriai/database/__init__.py +5 -0
- memoriai/database/connectors/__init__.py +9 -0
- memoriai/database/connectors/mysql_connector.py +159 -0
- memoriai/database/connectors/postgres_connector.py +158 -0
- memoriai/database/connectors/sqlite_connector.py +148 -0
- memoriai/database/queries/__init__.py +15 -0
- memoriai/database/queries/base_queries.py +204 -0
- memoriai/database/queries/chat_queries.py +157 -0
- memoriai/database/queries/entity_queries.py +236 -0
- memoriai/database/queries/memory_queries.py +178 -0
- memoriai/database/templates/__init__.py +0 -0
- memoriai/database/templates/basic_template.py +0 -0
- memoriai/database/templates/schemas/__init__.py +0 -0
- memoriai/integrations/__init__.py +68 -0
- memoriai/integrations/anthropic_integration.py +194 -0
- memoriai/integrations/litellm_integration.py +11 -0
- memoriai/integrations/openai_integration.py +273 -0
- memoriai/scripts/llm_text.py +50 -0
- memoriai/tools/__init__.py +5 -0
- memoriai/tools/memory_tool.py +544 -0
- memoriai/utils/__init__.py +89 -0
- memoriai/utils/exceptions.py +418 -0
- memoriai/utils/helpers.py +433 -0
- memoriai/utils/logging.py +204 -0
- memoriai/utils/pydantic_models.py +258 -0
- memoriai/utils/schemas.py +0 -0
- memoriai/utils/validators.py +339 -0
- memorisdk-1.0.0.dist-info/METADATA +386 -0
- memorisdk-1.0.0.dist-info/RECORD +44 -0
- memorisdk-1.0.0.dist-info/WHEEL +5 -0
- memorisdk-1.0.0.dist-info/entry_points.txt +2 -0
- memorisdk-1.0.0.dist-info/licenses/LICENSE +203 -0
- memorisdk-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pydantic Models for Structured Memory Processing
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from enum import Enum
|
|
7
|
+
from typing import Annotated, Dict, List, Literal, Optional, Tuple
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel, Field
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class MemoryCategoryType(str, Enum):
|
|
13
|
+
"""Primary memory categories"""
|
|
14
|
+
|
|
15
|
+
fact = "fact"
|
|
16
|
+
preference = "preference"
|
|
17
|
+
skill = "skill"
|
|
18
|
+
context = "context"
|
|
19
|
+
rule = "rule"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class RetentionType(str, Enum):
|
|
23
|
+
"""Memory retention types"""
|
|
24
|
+
|
|
25
|
+
short_term = "short_term"
|
|
26
|
+
long_term = "long_term"
|
|
27
|
+
permanent = "permanent"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class EntityType(str, Enum):
|
|
31
|
+
"""Types of entities that can be extracted"""
|
|
32
|
+
|
|
33
|
+
person = "person"
|
|
34
|
+
technology = "technology"
|
|
35
|
+
topic = "topic"
|
|
36
|
+
skill = "skill"
|
|
37
|
+
project = "project"
|
|
38
|
+
keyword = "keyword"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# Define constrained types using Annotated
|
|
42
|
+
ConfidenceScore = Annotated[float, Field(ge=0.0, le=1.0)]
|
|
43
|
+
ImportanceScore = Annotated[float, Field(ge=0.0, le=1.0)]
|
|
44
|
+
RelevanceScore = Annotated[float, Field(ge=0.0, le=1.0)]
|
|
45
|
+
PriorityLevel = Annotated[int, Field(ge=1, le=10)]
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
class MemoryCategory(BaseModel):
|
|
49
|
+
"""Memory categorization with confidence and reasoning"""
|
|
50
|
+
|
|
51
|
+
primary_category: MemoryCategoryType
|
|
52
|
+
confidence_score: ConfidenceScore = Field(
|
|
53
|
+
description="Confidence in categorization (0.0-1.0)"
|
|
54
|
+
)
|
|
55
|
+
reasoning: str = Field(
|
|
56
|
+
description="Brief explanation for why this category was chosen"
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class ExtractedEntity(BaseModel):
|
|
61
|
+
"""Individual extracted entity with metadata"""
|
|
62
|
+
|
|
63
|
+
entity_type: EntityType
|
|
64
|
+
value: str = Field(description="The actual entity value")
|
|
65
|
+
relevance_score: RelevanceScore = Field(
|
|
66
|
+
description="How relevant this entity is to the memory"
|
|
67
|
+
)
|
|
68
|
+
context: Optional[str] = Field(
|
|
69
|
+
default=None, description="Additional context about this entity"
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
class ExtractedEntities(BaseModel):
|
|
74
|
+
"""All entities extracted from a conversation"""
|
|
75
|
+
|
|
76
|
+
people: List[str] = Field(
|
|
77
|
+
default_factory=list, description="Names of people mentioned"
|
|
78
|
+
)
|
|
79
|
+
technologies: List[str] = Field(
|
|
80
|
+
default_factory=list, description="Technologies, tools, libraries mentioned"
|
|
81
|
+
)
|
|
82
|
+
topics: List[str] = Field(
|
|
83
|
+
default_factory=list, description="Main topics or subjects discussed"
|
|
84
|
+
)
|
|
85
|
+
skills: List[str] = Field(
|
|
86
|
+
default_factory=list, description="Skills, abilities, or competencies mentioned"
|
|
87
|
+
)
|
|
88
|
+
projects: List[str] = Field(
|
|
89
|
+
default_factory=list,
|
|
90
|
+
description="Projects, repositories, or initiatives mentioned",
|
|
91
|
+
)
|
|
92
|
+
keywords: List[str] = Field(
|
|
93
|
+
default_factory=list, description="Important keywords for search"
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# Structured entities with metadata
|
|
97
|
+
structured_entities: List[ExtractedEntity] = Field(
|
|
98
|
+
default_factory=list, description="Detailed entity extraction"
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class MemoryImportance(BaseModel):
|
|
103
|
+
"""Importance scoring and retention decisions"""
|
|
104
|
+
|
|
105
|
+
importance_score: ImportanceScore = Field(
|
|
106
|
+
description="Overall importance score (0.0-1.0)"
|
|
107
|
+
)
|
|
108
|
+
retention_type: RetentionType = Field(description="Recommended retention type")
|
|
109
|
+
reasoning: str = Field(
|
|
110
|
+
description="Explanation for the importance level and retention decision"
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
# Additional scoring factors
|
|
114
|
+
novelty_score: RelevanceScore = Field(
|
|
115
|
+
default=0.5, description="How novel/new this information is"
|
|
116
|
+
)
|
|
117
|
+
relevance_score: RelevanceScore = Field(
|
|
118
|
+
default=0.5, description="How relevant to user's interests"
|
|
119
|
+
)
|
|
120
|
+
actionability_score: RelevanceScore = Field(
|
|
121
|
+
default=0.5, description="How actionable this information is"
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
class ProcessedMemory(BaseModel):
|
|
126
|
+
"""Complete processed memory with all extracted information"""
|
|
127
|
+
|
|
128
|
+
# Core categorization
|
|
129
|
+
category: MemoryCategory
|
|
130
|
+
|
|
131
|
+
# Entity extraction
|
|
132
|
+
entities: ExtractedEntities
|
|
133
|
+
|
|
134
|
+
# Importance and retention
|
|
135
|
+
importance: MemoryImportance
|
|
136
|
+
|
|
137
|
+
# Content processing
|
|
138
|
+
summary: str = Field(description="Concise, searchable summary of the memory")
|
|
139
|
+
searchable_content: str = Field(
|
|
140
|
+
description="Content optimized for keyword and semantic search"
|
|
141
|
+
)
|
|
142
|
+
key_insights: List[str] = Field(
|
|
143
|
+
default_factory=list, description="Key insights or takeaways"
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
# Storage decision
|
|
147
|
+
should_store: bool = Field(description="Whether this memory should be stored")
|
|
148
|
+
storage_reasoning: str = Field(
|
|
149
|
+
description="Why this memory should or shouldn't be stored"
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
# Metadata (optional fields)
|
|
153
|
+
timestamp: Optional[datetime] = Field(
|
|
154
|
+
default_factory=datetime.now, description="When this memory was processed"
|
|
155
|
+
)
|
|
156
|
+
processing_metadata: Optional[Dict[str, str]] = Field(
|
|
157
|
+
default=None, description="Additional processing metadata"
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
class MemorySearchQuery(BaseModel):
|
|
162
|
+
"""Structured query for memory search"""
|
|
163
|
+
|
|
164
|
+
# Query components
|
|
165
|
+
query_text: str = Field(description="Original query text")
|
|
166
|
+
intent: str = Field(description="Interpreted intent of the query")
|
|
167
|
+
|
|
168
|
+
# Search parameters
|
|
169
|
+
entity_filters: List[str] = Field(
|
|
170
|
+
default_factory=list, description="Specific entities to search for"
|
|
171
|
+
)
|
|
172
|
+
category_filters: List[MemoryCategoryType] = Field(
|
|
173
|
+
default_factory=list, description="Memory categories to include"
|
|
174
|
+
)
|
|
175
|
+
time_range: Optional[str] = Field(
|
|
176
|
+
default=None, description="Time range for search (e.g., 'last_week')"
|
|
177
|
+
)
|
|
178
|
+
min_importance: ImportanceScore = Field(
|
|
179
|
+
default=0.0, description="Minimum importance score"
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
# Search strategy
|
|
183
|
+
search_strategy: List[str] = Field(
|
|
184
|
+
default_factory=list, description="Recommended search strategies"
|
|
185
|
+
)
|
|
186
|
+
expected_result_types: List[str] = Field(
|
|
187
|
+
default_factory=list, description="Expected types of results"
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
class MemoryRelationship(BaseModel):
|
|
192
|
+
"""Relationship between memories"""
|
|
193
|
+
|
|
194
|
+
source_memory_id: str
|
|
195
|
+
target_memory_id: str
|
|
196
|
+
relationship_type: Literal[
|
|
197
|
+
"builds_on", "contradicts", "supports", "related_to", "prerequisite"
|
|
198
|
+
]
|
|
199
|
+
strength: RelevanceScore = Field(description="Strength of the relationship")
|
|
200
|
+
reasoning: str = Field(description="Why these memories are related")
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
class UserRule(BaseModel):
|
|
204
|
+
"""User preferences and rules"""
|
|
205
|
+
|
|
206
|
+
rule_text: str = Field(description="The rule or preference in natural language")
|
|
207
|
+
rule_type: Literal["preference", "instruction", "constraint", "goal"]
|
|
208
|
+
priority: PriorityLevel = Field(default=5, description="Priority level (1-10)")
|
|
209
|
+
context: Optional[str] = Field(default=None, description="When this rule applies")
|
|
210
|
+
active: bool = Field(
|
|
211
|
+
default=True, description="Whether this rule is currently active"
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
class ConversationContext(BaseModel):
|
|
216
|
+
"""Context information for memory processing"""
|
|
217
|
+
|
|
218
|
+
model_config = {"protected_namespaces": ()}
|
|
219
|
+
|
|
220
|
+
user_id: Optional[str] = Field(default=None)
|
|
221
|
+
session_id: str
|
|
222
|
+
conversation_id: str
|
|
223
|
+
model_used: str
|
|
224
|
+
|
|
225
|
+
# User context
|
|
226
|
+
user_preferences: List[str] = Field(default_factory=list)
|
|
227
|
+
current_projects: List[str] = Field(default_factory=list)
|
|
228
|
+
relevant_skills: List[str] = Field(default_factory=list)
|
|
229
|
+
|
|
230
|
+
# Conversation metadata
|
|
231
|
+
conversation_length: int = Field(
|
|
232
|
+
default=1, description="Number of exchanges in this conversation"
|
|
233
|
+
)
|
|
234
|
+
topic_thread: Optional[str] = Field(
|
|
235
|
+
default=None, description="Main topic thread being discussed"
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
# Memory context
|
|
239
|
+
recent_memories: List[str] = Field(
|
|
240
|
+
default_factory=list, description="IDs of recently accessed memories"
|
|
241
|
+
)
|
|
242
|
+
applied_rules: List[str] = Field(
|
|
243
|
+
default_factory=list, description="Rules that were applied"
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
class MemoryStats(BaseModel):
|
|
248
|
+
"""Statistics about stored memories"""
|
|
249
|
+
|
|
250
|
+
total_memories: int
|
|
251
|
+
memories_by_category: Dict[str, int]
|
|
252
|
+
memories_by_retention: Dict[str, int]
|
|
253
|
+
average_importance: float
|
|
254
|
+
total_entities: int
|
|
255
|
+
most_common_entities: List[Tuple[str, int]]
|
|
256
|
+
storage_size_mb: float
|
|
257
|
+
oldest_memory_date: Optional[datetime]
|
|
258
|
+
newest_memory_date: Optional[datetime]
|
|
File without changes
|
|
@@ -0,0 +1,339 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Data validation utilities for Memoriai
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, Dict, Union
|
|
8
|
+
|
|
9
|
+
from .exceptions import ValidationError
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DataValidator:
|
|
13
|
+
"""Centralized data validation utilities"""
|
|
14
|
+
|
|
15
|
+
# Regex patterns
|
|
16
|
+
UUID_PATTERN = re.compile(
|
|
17
|
+
r"^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$", re.IGNORECASE
|
|
18
|
+
)
|
|
19
|
+
EMAIL_PATTERN = re.compile(r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$")
|
|
20
|
+
API_KEY_PATTERN = re.compile(r"^sk-[a-zA-Z0-9]{48}$")
|
|
21
|
+
|
|
22
|
+
@classmethod
|
|
23
|
+
def validate_uuid(cls, value: str, field_name: str = "UUID") -> str:
|
|
24
|
+
"""Validate UUID format"""
|
|
25
|
+
if not isinstance(value, str):
|
|
26
|
+
raise ValidationError(f"{field_name} must be a string")
|
|
27
|
+
|
|
28
|
+
if not cls.UUID_PATTERN.match(value):
|
|
29
|
+
raise ValidationError(f"{field_name} must be a valid UUID format")
|
|
30
|
+
|
|
31
|
+
return value
|
|
32
|
+
|
|
33
|
+
@classmethod
|
|
34
|
+
def validate_email(cls, value: str, field_name: str = "email") -> str:
|
|
35
|
+
"""Validate email format"""
|
|
36
|
+
if not isinstance(value, str):
|
|
37
|
+
raise ValidationError(f"{field_name} must be a string")
|
|
38
|
+
|
|
39
|
+
if not cls.EMAIL_PATTERN.match(value):
|
|
40
|
+
raise ValidationError(f"{field_name} must be a valid email address")
|
|
41
|
+
|
|
42
|
+
return value
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def validate_openai_api_key(cls, value: str, field_name: str = "API key") -> str:
|
|
46
|
+
"""Validate OpenAI API key format"""
|
|
47
|
+
if not isinstance(value, str):
|
|
48
|
+
raise ValidationError(f"{field_name} must be a string")
|
|
49
|
+
|
|
50
|
+
if not value.startswith("sk-"):
|
|
51
|
+
raise ValidationError(f"{field_name} must start with 'sk-'")
|
|
52
|
+
|
|
53
|
+
if len(value) != 51: # sk- + 48 characters
|
|
54
|
+
raise ValidationError(f"{field_name} must be 51 characters long")
|
|
55
|
+
|
|
56
|
+
return value
|
|
57
|
+
|
|
58
|
+
@classmethod
|
|
59
|
+
def validate_namespace(cls, value: str, field_name: str = "namespace") -> str:
|
|
60
|
+
"""Validate namespace format"""
|
|
61
|
+
if not isinstance(value, str):
|
|
62
|
+
raise ValidationError(f"{field_name} must be a string")
|
|
63
|
+
|
|
64
|
+
if not value:
|
|
65
|
+
raise ValidationError(f"{field_name} cannot be empty")
|
|
66
|
+
|
|
67
|
+
if len(value) > 64:
|
|
68
|
+
raise ValidationError(f"{field_name} cannot exceed 64 characters")
|
|
69
|
+
|
|
70
|
+
# Allow alphanumeric, underscore, hyphen
|
|
71
|
+
if not re.match(r"^[a-zA-Z0-9_-]+$", value):
|
|
72
|
+
raise ValidationError(
|
|
73
|
+
f"{field_name} can only contain letters, numbers, underscores, and hyphens"
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
return value
|
|
77
|
+
|
|
78
|
+
@classmethod
|
|
79
|
+
def validate_importance_score(
|
|
80
|
+
cls, value: float, field_name: str = "importance score"
|
|
81
|
+
) -> float:
|
|
82
|
+
"""Validate importance score (0.0 to 1.0)"""
|
|
83
|
+
if not isinstance(value, (int, float)):
|
|
84
|
+
raise ValidationError(f"{field_name} must be a number")
|
|
85
|
+
|
|
86
|
+
if not 0.0 <= value <= 1.0:
|
|
87
|
+
raise ValidationError(f"{field_name} must be between 0.0 and 1.0")
|
|
88
|
+
|
|
89
|
+
return float(value)
|
|
90
|
+
|
|
91
|
+
@classmethod
|
|
92
|
+
def validate_database_url(cls, value: str, field_name: str = "database URL") -> str:
|
|
93
|
+
"""Validate database connection string"""
|
|
94
|
+
if not isinstance(value, str):
|
|
95
|
+
raise ValidationError(f"{field_name} must be a string")
|
|
96
|
+
|
|
97
|
+
valid_schemes = ["sqlite://", "sqlite:///", "postgresql://", "mysql://"]
|
|
98
|
+
if not any(value.startswith(scheme) for scheme in valid_schemes):
|
|
99
|
+
raise ValidationError(f"{field_name} must use a supported database scheme")
|
|
100
|
+
|
|
101
|
+
return value
|
|
102
|
+
|
|
103
|
+
@classmethod
|
|
104
|
+
def validate_file_path(
|
|
105
|
+
cls,
|
|
106
|
+
value: Union[str, Path],
|
|
107
|
+
field_name: str = "file path",
|
|
108
|
+
must_exist: bool = False,
|
|
109
|
+
) -> Path:
|
|
110
|
+
"""Validate file path"""
|
|
111
|
+
if isinstance(value, str):
|
|
112
|
+
path = Path(value)
|
|
113
|
+
elif isinstance(value, Path):
|
|
114
|
+
path = value
|
|
115
|
+
else:
|
|
116
|
+
raise ValidationError(f"{field_name} must be a string or Path object")
|
|
117
|
+
|
|
118
|
+
if must_exist and not path.exists():
|
|
119
|
+
raise ValidationError(f"{field_name} does not exist: {path}")
|
|
120
|
+
|
|
121
|
+
return path
|
|
122
|
+
|
|
123
|
+
@classmethod
|
|
124
|
+
def validate_json_dict(
|
|
125
|
+
cls, value: Any, field_name: str = "JSON data"
|
|
126
|
+
) -> Dict[str, Any]:
|
|
127
|
+
"""Validate that value is a JSON-serializable dictionary"""
|
|
128
|
+
if not isinstance(value, dict):
|
|
129
|
+
raise ValidationError(f"{field_name} must be a dictionary")
|
|
130
|
+
|
|
131
|
+
try:
|
|
132
|
+
import json
|
|
133
|
+
|
|
134
|
+
json.dumps(value) # Test JSON serialization
|
|
135
|
+
except (TypeError, ValueError) as e:
|
|
136
|
+
raise ValidationError(f"{field_name} must be JSON serializable: {e}") from e
|
|
137
|
+
|
|
138
|
+
return value
|
|
139
|
+
|
|
140
|
+
@classmethod
|
|
141
|
+
def validate_memory_category(
|
|
142
|
+
cls, value: str, field_name: str = "memory category"
|
|
143
|
+
) -> str:
|
|
144
|
+
"""Validate memory category"""
|
|
145
|
+
if not isinstance(value, str):
|
|
146
|
+
raise ValidationError(f"{field_name} must be a string")
|
|
147
|
+
|
|
148
|
+
valid_categories = ["fact", "preference", "skill", "context", "rule"]
|
|
149
|
+
if value not in valid_categories:
|
|
150
|
+
raise ValidationError(
|
|
151
|
+
f"{field_name} must be one of: {', '.join(valid_categories)}"
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
return value
|
|
155
|
+
|
|
156
|
+
@classmethod
|
|
157
|
+
def validate_retention_type(
|
|
158
|
+
cls, value: str, field_name: str = "retention type"
|
|
159
|
+
) -> str:
|
|
160
|
+
"""Validate retention type"""
|
|
161
|
+
if not isinstance(value, str):
|
|
162
|
+
raise ValidationError(f"{field_name} must be a string")
|
|
163
|
+
|
|
164
|
+
valid_types = ["short_term", "long_term", "permanent"]
|
|
165
|
+
if value not in valid_types:
|
|
166
|
+
raise ValidationError(
|
|
167
|
+
f"{field_name} must be one of: {', '.join(valid_types)}"
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
return value
|
|
171
|
+
|
|
172
|
+
@classmethod
|
|
173
|
+
def validate_entity_type(cls, value: str, field_name: str = "entity type") -> str:
|
|
174
|
+
"""Validate entity type"""
|
|
175
|
+
if not isinstance(value, str):
|
|
176
|
+
raise ValidationError(f"{field_name} must be a string")
|
|
177
|
+
|
|
178
|
+
valid_types = [
|
|
179
|
+
"person",
|
|
180
|
+
"technology",
|
|
181
|
+
"topic",
|
|
182
|
+
"skill",
|
|
183
|
+
"project",
|
|
184
|
+
"keyword",
|
|
185
|
+
"location",
|
|
186
|
+
"organization",
|
|
187
|
+
]
|
|
188
|
+
if value not in valid_types:
|
|
189
|
+
raise ValidationError(
|
|
190
|
+
f"{field_name} must be one of: {', '.join(valid_types)}"
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
return value
|
|
194
|
+
|
|
195
|
+
@classmethod
|
|
196
|
+
def validate_positive_integer(
|
|
197
|
+
cls, value: int, field_name: str = "value", min_value: int = 1
|
|
198
|
+
) -> int:
|
|
199
|
+
"""Validate positive integer"""
|
|
200
|
+
if not isinstance(value, int):
|
|
201
|
+
raise ValidationError(f"{field_name} must be an integer")
|
|
202
|
+
|
|
203
|
+
if value < min_value:
|
|
204
|
+
raise ValidationError(f"{field_name} must be at least {min_value}")
|
|
205
|
+
|
|
206
|
+
return value
|
|
207
|
+
|
|
208
|
+
@classmethod
|
|
209
|
+
def validate_text_length(
|
|
210
|
+
cls,
|
|
211
|
+
value: str,
|
|
212
|
+
field_name: str = "text",
|
|
213
|
+
max_length: int = 1000,
|
|
214
|
+
min_length: int = 1,
|
|
215
|
+
) -> str:
|
|
216
|
+
"""Validate text length"""
|
|
217
|
+
if not isinstance(value, str):
|
|
218
|
+
raise ValidationError(f"{field_name} must be a string")
|
|
219
|
+
|
|
220
|
+
if len(value) < min_length:
|
|
221
|
+
raise ValidationError(
|
|
222
|
+
f"{field_name} must be at least {min_length} characters"
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
if len(value) > max_length:
|
|
226
|
+
raise ValidationError(f"{field_name} cannot exceed {max_length} characters")
|
|
227
|
+
|
|
228
|
+
return value
|
|
229
|
+
|
|
230
|
+
@classmethod
|
|
231
|
+
def sanitize_input(cls, value: str, field_name: str = "input") -> str:
|
|
232
|
+
"""Sanitize user input for security"""
|
|
233
|
+
if not isinstance(value, str):
|
|
234
|
+
raise ValidationError(f"{field_name} must be a string")
|
|
235
|
+
|
|
236
|
+
# Remove potential SQL injection patterns
|
|
237
|
+
dangerous_patterns = [
|
|
238
|
+
r"(\b(DROP|DELETE|UPDATE|INSERT|CREATE|ALTER|EXEC|EXECUTE)\b)",
|
|
239
|
+
r"(--|#|/\*|\*/)",
|
|
240
|
+
r"(\b(UNION|SELECT)\b.*\b(FROM|WHERE)\b)",
|
|
241
|
+
]
|
|
242
|
+
|
|
243
|
+
for pattern in dangerous_patterns:
|
|
244
|
+
if re.search(pattern, value, re.IGNORECASE):
|
|
245
|
+
raise ValidationError(
|
|
246
|
+
f"{field_name} contains potentially dangerous content"
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
# Basic HTML/script tag removal
|
|
250
|
+
value = re.sub(r"<[^>]*>", "", value)
|
|
251
|
+
|
|
252
|
+
return value.strip()
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
class MemoryValidator:
|
|
256
|
+
"""Specialized validator for memory-related data"""
|
|
257
|
+
|
|
258
|
+
@classmethod
|
|
259
|
+
def validate_memory_data(cls, data: Dict[str, Any]) -> Dict[str, Any]:
|
|
260
|
+
"""Validate complete memory data structure"""
|
|
261
|
+
validated = {}
|
|
262
|
+
|
|
263
|
+
# Required fields
|
|
264
|
+
if "memory_id" in data:
|
|
265
|
+
validated["memory_id"] = DataValidator.validate_uuid(
|
|
266
|
+
data["memory_id"], "memory_id"
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
if "namespace" in data:
|
|
270
|
+
validated["namespace"] = DataValidator.validate_namespace(data["namespace"])
|
|
271
|
+
|
|
272
|
+
if "importance_score" in data:
|
|
273
|
+
validated["importance_score"] = DataValidator.validate_importance_score(
|
|
274
|
+
data["importance_score"]
|
|
275
|
+
)
|
|
276
|
+
|
|
277
|
+
if "category_primary" in data:
|
|
278
|
+
validated["category_primary"] = DataValidator.validate_memory_category(
|
|
279
|
+
data["category_primary"]
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
if "retention_type" in data:
|
|
283
|
+
validated["retention_type"] = DataValidator.validate_retention_type(
|
|
284
|
+
data["retention_type"]
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
# Text fields
|
|
288
|
+
for field in ["summary", "searchable_content"]:
|
|
289
|
+
if field in data:
|
|
290
|
+
validated[field] = DataValidator.validate_text_length(
|
|
291
|
+
data[field],
|
|
292
|
+
field,
|
|
293
|
+
max_length=5000 if field == "searchable_content" else 1000,
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
# JSON fields
|
|
297
|
+
for field in ["processed_data", "metadata"]:
|
|
298
|
+
if field in data:
|
|
299
|
+
validated[field] = DataValidator.validate_json_dict(data[field], field)
|
|
300
|
+
|
|
301
|
+
return validated
|
|
302
|
+
|
|
303
|
+
@classmethod
|
|
304
|
+
def validate_chat_data(cls, data: Dict[str, Any]) -> Dict[str, Any]:
|
|
305
|
+
"""Validate chat data structure"""
|
|
306
|
+
validated = {}
|
|
307
|
+
|
|
308
|
+
# Required fields
|
|
309
|
+
required_fields = ["chat_id", "user_input", "ai_output", "model"]
|
|
310
|
+
for field in required_fields:
|
|
311
|
+
if field not in data:
|
|
312
|
+
raise ValidationError(f"Required field missing: {field}")
|
|
313
|
+
|
|
314
|
+
validated["chat_id"] = DataValidator.validate_uuid(data["chat_id"], "chat_id")
|
|
315
|
+
validated["user_input"] = DataValidator.sanitize_input(
|
|
316
|
+
data["user_input"], "user_input"
|
|
317
|
+
)
|
|
318
|
+
validated["ai_output"] = DataValidator.sanitize_input(
|
|
319
|
+
data["ai_output"], "ai_output"
|
|
320
|
+
)
|
|
321
|
+
validated["model"] = DataValidator.validate_text_length(
|
|
322
|
+
data["model"], "model", max_length=100
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
# Optional fields
|
|
326
|
+
if "namespace" in data:
|
|
327
|
+
validated["namespace"] = DataValidator.validate_namespace(data["namespace"])
|
|
328
|
+
|
|
329
|
+
if "tokens_used" in data:
|
|
330
|
+
validated["tokens_used"] = DataValidator.validate_positive_integer(
|
|
331
|
+
data["tokens_used"], "tokens_used", min_value=0
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
if "metadata" in data:
|
|
335
|
+
validated["metadata"] = DataValidator.validate_json_dict(
|
|
336
|
+
data["metadata"], "metadata"
|
|
337
|
+
)
|
|
338
|
+
|
|
339
|
+
return validated
|