MemoryOS 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MemoryOS might be problematic. Click here for more details.

Files changed (74) hide show
  1. {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/METADATA +2 -1
  2. {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/RECORD +72 -55
  3. memos/__init__.py +1 -1
  4. memos/api/config.py +156 -65
  5. memos/api/context/context.py +147 -0
  6. memos/api/context/dependencies.py +90 -0
  7. memos/api/product_models.py +5 -1
  8. memos/api/routers/product_router.py +54 -26
  9. memos/configs/graph_db.py +49 -1
  10. memos/configs/internet_retriever.py +6 -0
  11. memos/configs/mem_os.py +5 -0
  12. memos/configs/mem_reader.py +9 -0
  13. memos/configs/mem_scheduler.py +18 -4
  14. memos/configs/mem_user.py +58 -0
  15. memos/graph_dbs/base.py +9 -1
  16. memos/graph_dbs/factory.py +2 -0
  17. memos/graph_dbs/nebular.py +1364 -0
  18. memos/graph_dbs/neo4j.py +4 -4
  19. memos/log.py +1 -1
  20. memos/mem_cube/utils.py +13 -6
  21. memos/mem_os/core.py +140 -30
  22. memos/mem_os/main.py +1 -1
  23. memos/mem_os/product.py +266 -152
  24. memos/mem_os/utils/format_utils.py +314 -67
  25. memos/mem_reader/simple_struct.py +13 -5
  26. memos/mem_scheduler/base_scheduler.py +220 -250
  27. memos/mem_scheduler/general_scheduler.py +193 -73
  28. memos/mem_scheduler/modules/base.py +5 -5
  29. memos/mem_scheduler/modules/dispatcher.py +6 -9
  30. memos/mem_scheduler/modules/misc.py +81 -16
  31. memos/mem_scheduler/modules/monitor.py +52 -41
  32. memos/mem_scheduler/modules/rabbitmq_service.py +9 -7
  33. memos/mem_scheduler/modules/retriever.py +108 -191
  34. memos/mem_scheduler/modules/scheduler_logger.py +255 -0
  35. memos/mem_scheduler/mos_for_test_scheduler.py +16 -19
  36. memos/mem_scheduler/schemas/__init__.py +0 -0
  37. memos/mem_scheduler/schemas/general_schemas.py +43 -0
  38. memos/mem_scheduler/schemas/message_schemas.py +148 -0
  39. memos/mem_scheduler/schemas/monitor_schemas.py +329 -0
  40. memos/mem_scheduler/utils/__init__.py +0 -0
  41. memos/mem_scheduler/utils/filter_utils.py +176 -0
  42. memos/mem_scheduler/utils/misc_utils.py +61 -0
  43. memos/mem_user/factory.py +94 -0
  44. memos/mem_user/mysql_persistent_user_manager.py +271 -0
  45. memos/mem_user/mysql_user_manager.py +500 -0
  46. memos/mem_user/persistent_factory.py +96 -0
  47. memos/mem_user/user_manager.py +4 -4
  48. memos/memories/activation/item.py +4 -0
  49. memos/memories/textual/base.py +1 -1
  50. memos/memories/textual/general.py +35 -91
  51. memos/memories/textual/item.py +5 -33
  52. memos/memories/textual/tree.py +13 -7
  53. memos/memories/textual/tree_text_memory/organize/conflict.py +4 -2
  54. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +47 -43
  55. memos/memories/textual/tree_text_memory/organize/reorganizer.py +8 -5
  56. memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +6 -3
  57. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +2 -0
  58. memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +2 -0
  59. memos/memories/textual/tree_text_memory/retrieve/searcher.py +46 -23
  60. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +42 -15
  61. memos/memories/textual/tree_text_memory/retrieve/utils.py +11 -7
  62. memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +62 -58
  63. memos/memos_tools/dinding_report_bot.py +422 -0
  64. memos/memos_tools/notification_service.py +44 -0
  65. memos/memos_tools/notification_utils.py +96 -0
  66. memos/settings.py +3 -1
  67. memos/templates/mem_reader_prompts.py +2 -1
  68. memos/templates/mem_scheduler_prompts.py +41 -7
  69. memos/templates/mos_prompts.py +87 -0
  70. memos/mem_scheduler/modules/schemas.py +0 -328
  71. memos/mem_scheduler/utils.py +0 -75
  72. {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/LICENSE +0 -0
  73. {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/WHEEL +0 -0
  74. {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,329 @@
1
+ from collections import Counter
2
+ from datetime import datetime
3
+ from pathlib import Path
4
+ from typing import ClassVar
5
+ from uuid import uuid4
6
+
7
+ from pydantic import BaseModel, Field, computed_field, field_validator
8
+
9
+ from memos.log import get_logger
10
+ from memos.mem_scheduler.modules.misc import AutoDroppingQueue, DictConversionMixin
11
+ from memos.mem_scheduler.schemas.general_schemas import (
12
+ DEFAULT_MAX_QUERY_KEY_WORDS,
13
+ DEFAULT_WEIGHT_VECTOR_FOR_RANKING,
14
+ NOT_INITIALIZED,
15
+ )
16
+ from memos.mem_scheduler.utils.filter_utils import transform_name_to_key
17
+ from memos.memories.textual.tree import TextualMemoryItem
18
+
19
+
20
+ logger = get_logger(__name__)
21
+
22
+ FILE_PATH = Path(__file__).absolute()
23
+ BASE_DIR = FILE_PATH.parent.parent.parent.parent.parent
24
+
25
+
26
+ # ============== Queries ==============
27
+ class QueryMonitorItem(BaseModel, DictConversionMixin):
28
+ item_id: str = Field(
29
+ description="Unique identifier for the query item", default_factory=lambda: str(uuid4())
30
+ )
31
+ query_text: str = Field(
32
+ ...,
33
+ description="The actual user query text content",
34
+ min_length=1,
35
+ )
36
+ keywords: list[str] | None = Field(
37
+ default=None,
38
+ min_length=1, # If provided, shouldn't be empty
39
+ description="Semantic keywords extracted from the query text",
40
+ )
41
+ max_keywords: ClassVar[int] = DEFAULT_MAX_QUERY_KEY_WORDS
42
+
43
+ timestamp: datetime = Field(
44
+ default_factory=datetime.now, description="Timestamp indicating when query was submitted"
45
+ )
46
+
47
+ @field_validator("keywords", mode="before")
48
+ @classmethod
49
+ def validate_keywords(cls, v, values):
50
+ if v is None:
51
+ return None
52
+
53
+ if not isinstance(v, list):
54
+ raise ValueError("Keywords must be a list")
55
+
56
+ if len(v) > cls.max_keywords:
57
+ logger.warning(
58
+ f"Keywords list truncated from {len(v)} to {cls.max_keywords} items. "
59
+ f"Configure max_keywords class attribute to adjust this limit."
60
+ )
61
+ return v[: cls.max_keywords]
62
+ return v
63
+
64
+ @classmethod
65
+ def with_max_keywords(cls, limit: int):
66
+ """Create a new class with custom keywords limit."""
67
+ if not isinstance(limit, int) or limit <= 0:
68
+ raise ValueError("Max keywords limit must be positive integer")
69
+
70
+ return type(f"{cls.__name__}_MaxKeywords{limit}", (cls,), {"max_keywords": limit})
71
+
72
+
73
+ class QueryMonitorQueue(AutoDroppingQueue[QueryMonitorItem]):
74
+ """
75
+ A thread-safe queue for monitoring queries with timestamp and keyword tracking.
76
+ Each item is expected to be a dictionary containing:
77
+ """
78
+
79
+ def put(self, item: QueryMonitorItem, block: bool = True, timeout: float | None = None) -> None:
80
+ """
81
+ Add a query item to the queue. Ensures the item is of correct type.
82
+
83
+ Args:
84
+ item: A QueryMonitorItem instance
85
+ """
86
+ if not isinstance(item, QueryMonitorItem):
87
+ raise ValueError("Item must be an instance of QueryMonitorItem")
88
+ super().put(item, block, timeout)
89
+
90
+ def get_queries_by_timestamp(
91
+ self, start_time: datetime, end_time: datetime
92
+ ) -> list[QueryMonitorItem]:
93
+ """
94
+ Retrieve queries added between the specified time range.
95
+ """
96
+ with self.mutex:
97
+ return [item for item in self.queue if start_time <= item.timestamp <= end_time]
98
+
99
+ def get_keywords_collections(self) -> Counter:
100
+ """
101
+ Generate a Counter containing keyword frequencies across all queries.
102
+
103
+ Returns:
104
+ Counter object with keyword counts
105
+ """
106
+ with self.mutex:
107
+ all_keywords = [kw for item in self.queue for kw in item.keywords]
108
+ return Counter(all_keywords)
109
+
110
+ def get_queries_with_timesort(self, reverse: bool = True) -> list[str]:
111
+ """
112
+ Retrieve all queries sorted by timestamp.
113
+
114
+ Args:
115
+ reverse: If True, sort in descending order (newest first),
116
+ otherwise sort in ascending order (oldest first)
117
+
118
+ Returns:
119
+ List of query items sorted by timestamp
120
+ """
121
+ with self.mutex:
122
+ return [
123
+ monitor.query_text
124
+ for monitor in sorted(self.queue, key=lambda x: x.timestamp, reverse=reverse)
125
+ ]
126
+
127
+
128
+ # ============== Memories ==============
129
+ class MemoryMonitorItem(BaseModel, DictConversionMixin):
130
+ item_id: str = Field(
131
+ description="Unique identifier for the memory item", default_factory=lambda: str(uuid4())
132
+ )
133
+ memory_text: str = Field(
134
+ ...,
135
+ description="The actual content of the memory",
136
+ min_length=1,
137
+ )
138
+ tree_memory_item: TextualMemoryItem | None = Field(
139
+ default=None, description="Optional textual memory item"
140
+ )
141
+ tree_memory_item_mapping_key: str = Field(
142
+ description="Key generated from memory_text using transform_name_to_key",
143
+ )
144
+ keywords_score: float = Field(
145
+ default=NOT_INITIALIZED,
146
+ description="The score generate by counting keywords in queries",
147
+ ge=NOT_INITIALIZED, # Minimum value of 0
148
+ )
149
+ sorting_score: float = Field(
150
+ default=NOT_INITIALIZED,
151
+ description="The score generate from rerank process",
152
+ ge=NOT_INITIALIZED, # Minimum value of 0
153
+ )
154
+ importance_score: float = Field(
155
+ default=NOT_INITIALIZED,
156
+ description="Numerical score representing the memory's importance",
157
+ ge=NOT_INITIALIZED, # Minimum value of 0
158
+ )
159
+ recording_count: int = Field(
160
+ default=1,
161
+ description="How many times this memory has been recorded",
162
+ ge=1, # Greater than or equal to 1
163
+ )
164
+
165
+ @field_validator("tree_memory_item_mapping_key", mode="before")
166
+ def generate_mapping_key(cls, v, values): # noqa: N805
167
+ if v is None and "memory_text" in values:
168
+ return transform_name_to_key(values["memory_text"])
169
+ return v
170
+
171
+ def get_importance_score(self, weight_vector: list[float] | None = None) -> float:
172
+ """
173
+ Calculate the effective score for the memory item.
174
+
175
+ Returns:
176
+ float: The importance_score if it has been initialized (>=0),
177
+ otherwise the recording_count converted to float.
178
+
179
+ Note:
180
+ This method provides a unified way to retrieve a comparable score
181
+ for memory items, regardless of whether their importance has been explicitly set.
182
+ """
183
+ if weight_vector is None:
184
+ logger.warning("weight_vector of get_importance_score is None.")
185
+ weight_vector = DEFAULT_WEIGHT_VECTOR_FOR_RANKING
186
+ assert sum(weight_vector) == 1
187
+ normalized_keywords_score = min(self.keywords_score * weight_vector[1], 5)
188
+ normalized_recording_count_score = min(self.recording_count * weight_vector[2], 2)
189
+ self.importance_score = (
190
+ self.sorting_score * weight_vector[0]
191
+ + normalized_keywords_score
192
+ + normalized_recording_count_score
193
+ )
194
+ return self.importance_score
195
+
196
+
197
+ class MemoryMonitorManager(BaseModel, DictConversionMixin):
198
+ user_id: str = Field(..., description="Required user identifier", min_length=1)
199
+ mem_cube_id: str = Field(..., description="Required memory cube identifier", min_length=1)
200
+ memories: list[MemoryMonitorItem] = Field(
201
+ default_factory=list, description="Collection of memory items"
202
+ )
203
+ max_capacity: int | None = Field(
204
+ default=None, description="Maximum number of memories allowed (None for unlimited)", ge=1
205
+ )
206
+
207
+ @computed_field
208
+ @property
209
+ def memory_size(self) -> int:
210
+ """Automatically calculated count of memory items."""
211
+ return len(self.memories)
212
+
213
+ @property
214
+ def memories_mapping_dict(self) -> dict[str, MemoryMonitorItem]:
215
+ """
216
+ Generate a mapping dictionary for the memories in MemoryMonitorManager,
217
+ using tree_memory_item_mapping_key as the key and MemoryMonitorItem as the value.
218
+
219
+ Returns:
220
+ Dict[str, MemoryMonitorItem]: A dictionary where keys are
221
+ tree_memory_item_mapping_key values from MemoryMonitorItem,
222
+ and values are the corresponding MemoryMonitorItem objects.
223
+ """
224
+ mapping_dict = {
225
+ mem_item.tree_memory_item_mapping_key: mem_item for mem_item in self.memories
226
+ }
227
+
228
+ logger.debug(
229
+ f"Generated memories mapping dict for user_id={self.user_id}, "
230
+ f"mem_cube_id={self.mem_cube_id}, "
231
+ f"total_items={len(mapping_dict)}, "
232
+ f"source_memory_count={len(self.memories)}"
233
+ )
234
+ return mapping_dict
235
+
236
+ def get_sorted_mem_monitors(self, reverse=True) -> list[MemoryMonitorItem]:
237
+ """
238
+ Retrieve memory monitors sorted by their ranking score in descending order.
239
+
240
+ Returns:
241
+ list[MemoryMonitorItem]: Sorted list of memory monitor items.
242
+ """
243
+ return sorted(
244
+ self.memories,
245
+ key=lambda item: item.get_importance_score(
246
+ weight_vector=DEFAULT_WEIGHT_VECTOR_FOR_RANKING
247
+ ),
248
+ reverse=reverse,
249
+ )
250
+
251
+ def update_memories(
252
+ self, new_memory_monitors: list[MemoryMonitorItem], partial_retention_number: int
253
+ ) -> MemoryMonitorItem:
254
+ """
255
+ Update memories based on monitor_working_memories.
256
+ """
257
+
258
+ # Validate partial_retention_number
259
+ if partial_retention_number < 0:
260
+ raise ValueError("partial_retention_number must be non-negative")
261
+
262
+ # Step 1: Update existing memories or add new ones
263
+ added_count = 0
264
+ memories_mapping_dict = self.memories_mapping_dict
265
+ new_mem_set = set()
266
+ for memory_monitor in new_memory_monitors:
267
+ if memory_monitor.tree_memory_item_mapping_key in memories_mapping_dict:
268
+ # Update existing memory
269
+ item: MemoryMonitorItem = memories_mapping_dict[
270
+ memory_monitor.tree_memory_item_mapping_key
271
+ ]
272
+ item.recording_count += 1
273
+ item.keywords_score = memory_monitor.keywords_score
274
+ item.sorting_score = memory_monitor.sorting_score
275
+ else:
276
+ # Add new memory
277
+ self.memories.append(memory_monitor)
278
+ added_count += 1
279
+
280
+ new_mem_set.add(memory_monitor.tree_memory_item_mapping_key)
281
+
282
+ # Step 2: Identify memories to remove
283
+ old_mem_monitor_list = []
284
+ for mem_monitor in self.memories:
285
+ if mem_monitor.tree_memory_item_mapping_key not in new_mem_set:
286
+ old_mem_monitor_list.append(mem_monitor)
287
+
288
+ # Sort memories by recording_count in descending order
289
+ sorted_old_mem_monitors = sorted(
290
+ old_mem_monitor_list,
291
+ key=lambda item: item.get_importance_score(
292
+ weight_vector=DEFAULT_WEIGHT_VECTOR_FOR_RANKING
293
+ ),
294
+ reverse=True,
295
+ )
296
+
297
+ # Keep the top N old memories
298
+ memories_to_remove = sorted_old_mem_monitors[partial_retention_number:]
299
+ memories_to_change_score = sorted_old_mem_monitors[:partial_retention_number]
300
+
301
+ # Step 3: Remove identified memories and change the scores of left old memories
302
+ for memory in memories_to_remove:
303
+ self.memories.remove(memory)
304
+
305
+ for memory in memories_to_change_score:
306
+ memory.sorting_score = 0
307
+ memory.recording_count = 0
308
+ memory.keywords_score = 0
309
+
310
+ # Step 4: Enforce max_capacity if set
311
+ sorted_memories = sorted(
312
+ self.memories,
313
+ key=lambda item: item.get_importance_score(
314
+ weight_vector=DEFAULT_WEIGHT_VECTOR_FOR_RANKING
315
+ ),
316
+ reverse=True,
317
+ )
318
+ # Keep only the top max_capacity memories
319
+ self.memories = sorted_memories[: self.max_capacity]
320
+
321
+ # Log the update result
322
+ logger.info(
323
+ f"Updated monitor manager for user {self.user_id}, mem_cube {self.mem_cube_id}: "
324
+ f"Total memories: {len(self.memories)}, "
325
+ f"Added/Updated: {added_count}, "
326
+ f"Removed: {len(memories_to_remove)} (excluding top {partial_retention_number} by recording_count)"
327
+ )
328
+
329
+ return self.memories
File without changes
@@ -0,0 +1,176 @@
1
+ import re
2
+
3
+ from memos.dependency import require_python_package
4
+ from memos.log import get_logger
5
+
6
+
7
+ logger = get_logger(__name__)
8
+
9
+
10
+ def transform_name_to_key(name):
11
+ """
12
+ Normalize text by removing all punctuation marks, keeping only letters, numbers, and word characters.
13
+
14
+ Args:
15
+ name (str): Input text to be processed
16
+
17
+ Returns:
18
+ str: Processed text with all punctuation removed
19
+ """
20
+ # Match all characters that are NOT:
21
+ # \w - word characters (letters, digits, underscore)
22
+ # \u4e00-\u9fff - Chinese/Japanese/Korean characters
23
+ # \s - whitespace
24
+ pattern = r"[^\w\u4e00-\u9fff\s]"
25
+
26
+ # Substitute all matched punctuation marks with empty string
27
+ # re.UNICODE flag ensures proper handling of Unicode characters
28
+ normalized = re.sub(pattern, "", name, flags=re.UNICODE)
29
+
30
+ # Optional: Collapse multiple whitespaces into single space
31
+ normalized = "_".join(normalized.split())
32
+
33
+ normalized = normalized.lower()
34
+
35
+ return normalized
36
+
37
+
38
+ def is_all_english(input_string: str) -> bool:
39
+ """Determine if the string consists entirely of English characters (including spaces)"""
40
+ return all(char.isascii() or char.isspace() for char in input_string)
41
+
42
+
43
+ def is_all_chinese(input_string: str) -> bool:
44
+ """Determine if the string consists entirely of Chinese characters (including Chinese punctuation and spaces)"""
45
+ return all(
46
+ ("\u4e00" <= char <= "\u9fff") # Basic Chinese characters
47
+ or ("\u3400" <= char <= "\u4dbf") # Extension A
48
+ or ("\u20000" <= char <= "\u2a6df") # Extension B
49
+ or ("\u2a700" <= char <= "\u2b73f") # Extension C
50
+ or ("\u2b740" <= char <= "\u2b81f") # Extension D
51
+ or ("\u2b820" <= char <= "\u2ceaf") # Extension E
52
+ or ("\u2f800" <= char <= "\u2fa1f") # Extension F
53
+ or char.isspace() # Spaces
54
+ for char in input_string
55
+ )
56
+
57
+
58
+ @require_python_package(
59
+ import_name="sklearn",
60
+ install_command="pip install scikit-learn",
61
+ install_link="https://scikit-learn.org/stable/install.html",
62
+ )
63
+ def filter_similar_memories(
64
+ text_memories: list[str], similarity_threshold: float = 0.75
65
+ ) -> list[str]:
66
+ """
67
+ Filters out low-quality or duplicate memories based on text similarity.
68
+
69
+ Args:
70
+ text_memories: List of text memories to filter
71
+ similarity_threshold: Threshold for considering memories duplicates (0.0-1.0)
72
+ Higher values mean stricter filtering
73
+
74
+ Returns:
75
+ List of filtered memories with duplicates removed
76
+ """
77
+ from sklearn.feature_extraction.text import TfidfVectorizer
78
+ from sklearn.metrics.pairwise import cosine_similarity
79
+
80
+ if not text_memories:
81
+ logger.warning("Received empty memories list - nothing to filter")
82
+ return []
83
+
84
+ for idx in range(len(text_memories)):
85
+ if not isinstance(text_memories[idx], str):
86
+ logger.error(
87
+ f"{text_memories[idx]} in memories is not a string,"
88
+ f" and now has been transformed to be a string."
89
+ )
90
+ text_memories[idx] = str(text_memories[idx])
91
+
92
+ try:
93
+ # Step 1: Vectorize texts using TF-IDF
94
+ vectorizer = TfidfVectorizer()
95
+ tfidf_matrix = vectorizer.fit_transform(text_memories)
96
+
97
+ # Step 2: Calculate pairwise similarity matrix
98
+ similarity_matrix = cosine_similarity(tfidf_matrix)
99
+
100
+ # Step 3: Identify duplicates
101
+ to_keep = set(range(len(text_memories))) # Start with all indices
102
+ for i in range(len(similarity_matrix)):
103
+ if i not in to_keep:
104
+ continue # Already marked for removal
105
+
106
+ # Find all similar items to this one (excluding self and already removed)
107
+ similar_indices = [
108
+ j
109
+ for j in range(i + 1, len(similarity_matrix))
110
+ if similarity_matrix[i][j] >= similarity_threshold and j in to_keep
111
+ ]
112
+ similar_indices = set(similar_indices)
113
+
114
+ # Remove all similar items (keeping the first one - i)
115
+ to_keep -= similar_indices
116
+
117
+ # Return filtered memories
118
+ filtered_memories = [text_memories[i] for i in sorted(to_keep)]
119
+ logger.debug(f"filtered_memories: {filtered_memories}")
120
+ return filtered_memories
121
+
122
+ except Exception as e:
123
+ logger.error(f"Error filtering memories: {e!s}")
124
+ return text_memories # Return original list if error occurs
125
+
126
+
127
+ def filter_too_short_memories(
128
+ text_memories: list[str], min_length_threshold: int = 20
129
+ ) -> list[str]:
130
+ """
131
+ Filters out text memories that fall below the minimum length requirement.
132
+ Handles both English (word count) and Chinese (character count) differently.
133
+
134
+ Args:
135
+ text_memories: List of text memories to be filtered
136
+ min_length_threshold: Minimum length required to keep a memory.
137
+ For English: word count, for Chinese: character count.
138
+
139
+ Returns:
140
+ List of filtered memories meeting the length requirement
141
+ """
142
+ if not text_memories:
143
+ logger.debug("Empty memories list received in short memory filter")
144
+ return []
145
+
146
+ filtered_memories = []
147
+ removed_count = 0
148
+
149
+ for memory in text_memories:
150
+ stripped_memory = memory.strip()
151
+ if not stripped_memory: # Skip empty/whitespace memories
152
+ removed_count += 1
153
+ continue
154
+
155
+ # Determine measurement method based on language
156
+ if is_all_english(stripped_memory):
157
+ length = len(stripped_memory.split()) # Word count for English
158
+ elif is_all_chinese(stripped_memory):
159
+ length = len(stripped_memory) # Character count for Chinese
160
+ else:
161
+ logger.debug(f"Mixed-language memory, using character count: {stripped_memory[:50]}...")
162
+ length = len(stripped_memory) # Default to character count
163
+
164
+ if length >= min_length_threshold:
165
+ filtered_memories.append(memory)
166
+ else:
167
+ removed_count += 1
168
+
169
+ if removed_count > 0:
170
+ logger.info(
171
+ f"Filtered out {removed_count} short memories "
172
+ f"(below {min_length_threshold} units). "
173
+ f"Total remaining: {len(filtered_memories)}"
174
+ )
175
+
176
+ return filtered_memories
@@ -0,0 +1,61 @@
1
+ import json
2
+
3
+ from functools import wraps
4
+ from pathlib import Path
5
+
6
+ import yaml
7
+
8
+ from memos.log import get_logger
9
+
10
+
11
+ logger = get_logger(__name__)
12
+
13
+
14
+ def extract_json_dict(text: str):
15
+ text = text.strip()
16
+ patterns_to_remove = ["json```", "```python", "```json", "latex```", "```latex", "```"]
17
+ for pattern in patterns_to_remove:
18
+ text = text.replace(pattern, "")
19
+ res = json.loads(text.strip())
20
+ return res
21
+
22
+
23
+ def parse_yaml(yaml_file: str | Path):
24
+ yaml_path = Path(yaml_file)
25
+ if not yaml_path.is_file():
26
+ raise FileNotFoundError(f"No such file: {yaml_file}")
27
+
28
+ with yaml_path.open("r", encoding="utf-8") as fr:
29
+ data = yaml.safe_load(fr)
30
+
31
+ return data
32
+
33
+
34
+ def log_exceptions(logger=logger):
35
+ """
36
+ Exception-catching decorator that automatically logs errors (including stack traces)
37
+
38
+ Args:
39
+ logger: Optional logger object (default: module-level logger)
40
+
41
+ Example:
42
+ @log_exceptions()
43
+ def risky_function():
44
+ raise ValueError("Oops!")
45
+
46
+ @log_exceptions(logger=custom_logger)
47
+ def another_risky_function():
48
+ might_fail()
49
+ """
50
+
51
+ def decorator(func):
52
+ @wraps(func)
53
+ def wrapper(*args, **kwargs):
54
+ try:
55
+ return func(*args, **kwargs)
56
+ except Exception as e:
57
+ logger.error(f"Error in {func.__name__}: {e}", exc_info=True)
58
+
59
+ return wrapper
60
+
61
+ return decorator
@@ -0,0 +1,94 @@
1
+ from typing import Any, ClassVar
2
+
3
+ from memos.configs.mem_user import UserManagerConfigFactory
4
+ from memos.mem_user.mysql_user_manager import MySQLUserManager
5
+ from memos.mem_user.user_manager import UserManager
6
+
7
+
8
+ class UserManagerFactory:
9
+ """Factory class for creating user manager instances."""
10
+
11
+ backend_to_class: ClassVar[dict[str, Any]] = {
12
+ "sqlite": UserManager,
13
+ "mysql": MySQLUserManager,
14
+ }
15
+
16
+ @classmethod
17
+ def from_config(
18
+ cls, config_factory: UserManagerConfigFactory
19
+ ) -> UserManager | MySQLUserManager:
20
+ """Create a user manager instance from configuration.
21
+
22
+ Args:
23
+ config_factory: Configuration factory containing backend and config
24
+
25
+ Returns:
26
+ User manager instance
27
+
28
+ Raises:
29
+ ValueError: If backend is not supported
30
+ """
31
+ backend = config_factory.backend
32
+ if backend not in cls.backend_to_class:
33
+ raise ValueError(f"Invalid user manager backend: {backend}")
34
+
35
+ user_manager_class = cls.backend_to_class[backend]
36
+ config = config_factory.config
37
+
38
+ # Use model_dump() to convert Pydantic model to dict and unpack as kwargs
39
+ return user_manager_class(**config.model_dump())
40
+
41
+ @classmethod
42
+ def create_sqlite(cls, db_path: str | None = None, user_id: str = "root") -> UserManager:
43
+ """Create SQLite user manager with default configuration.
44
+
45
+ Args:
46
+ db_path: Path to SQLite database file
47
+ user_id: Default user ID for initialization
48
+
49
+ Returns:
50
+ SQLite user manager instance
51
+ """
52
+ config_factory = UserManagerConfigFactory(
53
+ backend="sqlite", config={"db_path": db_path, "user_id": user_id}
54
+ )
55
+ return cls.from_config(config_factory)
56
+
57
+ @classmethod
58
+ def create_mysql(
59
+ cls,
60
+ user_id: str = "root",
61
+ host: str = "localhost",
62
+ port: int = 3306,
63
+ username: str = "root",
64
+ password: str = "",
65
+ database: str = "memos_users",
66
+ charset: str = "utf8mb4",
67
+ ) -> MySQLUserManager:
68
+ """Create MySQL user manager with specified configuration.
69
+
70
+ Args:
71
+ user_id: Default user ID for initialization
72
+ host: MySQL server host
73
+ port: MySQL server port
74
+ username: MySQL username
75
+ password: MySQL password
76
+ database: MySQL database name
77
+ charset: MySQL charset
78
+
79
+ Returns:
80
+ MySQL user manager instance
81
+ """
82
+ config_factory = UserManagerConfigFactory(
83
+ backend="mysql",
84
+ config={
85
+ "user_id": user_id,
86
+ "host": host,
87
+ "port": port,
88
+ "username": username,
89
+ "password": password,
90
+ "database": database,
91
+ "charset": charset,
92
+ },
93
+ )
94
+ return cls.from_config(config_factory)