neural-memory 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. neural_memory/__init__.py +38 -0
  2. neural_memory/cli/__init__.py +15 -0
  3. neural_memory/cli/__main__.py +6 -0
  4. neural_memory/cli/config.py +176 -0
  5. neural_memory/cli/main.py +2702 -0
  6. neural_memory/cli/storage.py +169 -0
  7. neural_memory/cli/tui.py +471 -0
  8. neural_memory/core/__init__.py +52 -0
  9. neural_memory/core/brain.py +301 -0
  10. neural_memory/core/brain_mode.py +273 -0
  11. neural_memory/core/fiber.py +236 -0
  12. neural_memory/core/memory_types.py +331 -0
  13. neural_memory/core/neuron.py +168 -0
  14. neural_memory/core/project.py +257 -0
  15. neural_memory/core/synapse.py +215 -0
  16. neural_memory/engine/__init__.py +15 -0
  17. neural_memory/engine/activation.py +335 -0
  18. neural_memory/engine/encoder.py +391 -0
  19. neural_memory/engine/retrieval.py +440 -0
  20. neural_memory/extraction/__init__.py +42 -0
  21. neural_memory/extraction/entities.py +547 -0
  22. neural_memory/extraction/parser.py +337 -0
  23. neural_memory/extraction/router.py +396 -0
  24. neural_memory/extraction/temporal.py +428 -0
  25. neural_memory/mcp/__init__.py +9 -0
  26. neural_memory/mcp/__main__.py +6 -0
  27. neural_memory/mcp/server.py +621 -0
  28. neural_memory/py.typed +0 -0
  29. neural_memory/safety/__init__.py +31 -0
  30. neural_memory/safety/freshness.py +238 -0
  31. neural_memory/safety/sensitive.py +304 -0
  32. neural_memory/server/__init__.py +5 -0
  33. neural_memory/server/app.py +99 -0
  34. neural_memory/server/dependencies.py +33 -0
  35. neural_memory/server/models.py +138 -0
  36. neural_memory/server/routes/__init__.py +7 -0
  37. neural_memory/server/routes/brain.py +221 -0
  38. neural_memory/server/routes/memory.py +169 -0
  39. neural_memory/server/routes/sync.py +387 -0
  40. neural_memory/storage/__init__.py +17 -0
  41. neural_memory/storage/base.py +441 -0
  42. neural_memory/storage/factory.py +329 -0
  43. neural_memory/storage/memory_store.py +896 -0
  44. neural_memory/storage/shared_store.py +650 -0
  45. neural_memory/storage/sqlite_store.py +1613 -0
  46. neural_memory/sync/__init__.py +5 -0
  47. neural_memory/sync/client.py +435 -0
  48. neural_memory/unified_config.py +315 -0
  49. neural_memory/utils/__init__.py +5 -0
  50. neural_memory/utils/config.py +98 -0
  51. neural_memory-0.1.0.dist-info/METADATA +314 -0
  52. neural_memory-0.1.0.dist-info/RECORD +55 -0
  53. neural_memory-0.1.0.dist-info/WHEEL +4 -0
  54. neural_memory-0.1.0.dist-info/entry_points.txt +4 -0
  55. neural_memory-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,547 @@
1
+ """Entity extraction from text."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from dataclasses import dataclass
7
+ from enum import StrEnum
8
+
9
+
10
+ class EntityType(StrEnum):
11
+ """Types of named entities."""
12
+
13
+ PERSON = "person"
14
+ LOCATION = "location"
15
+ ORGANIZATION = "organization"
16
+ PRODUCT = "product"
17
+ EVENT = "event"
18
+ UNKNOWN = "unknown"
19
+
20
+
21
+ @dataclass
22
+ class Entity:
23
+ """
24
+ A named entity extracted from text.
25
+
26
+ Attributes:
27
+ text: The original text of the entity
28
+ type: The entity type
29
+ start: Start character position in source text
30
+ end: End character position in source text
31
+ confidence: Extraction confidence (0.0 - 1.0)
32
+ """
33
+
34
+ text: str
35
+ type: EntityType
36
+ start: int
37
+ end: int
38
+ confidence: float = 1.0
39
+
40
+
41
+ class EntityExtractor:
42
+ """
43
+ Entity extractor using pattern matching.
44
+
45
+ For production use, consider using spaCy or underthesea
46
+ for better entity recognition. This provides basic
47
+ rule-based extraction as a fallback.
48
+ """
49
+
50
+ # Common Vietnamese person name prefixes
51
+ VI_PERSON_PREFIXES = {
52
+ "anh",
53
+ "chị",
54
+ "em",
55
+ "bạn",
56
+ "cô",
57
+ "chú",
58
+ "bác",
59
+ "ông",
60
+ "bà",
61
+ "thầy",
62
+ "cô giáo",
63
+ "mr",
64
+ "mrs",
65
+ "ms",
66
+ "miss",
67
+ }
68
+
69
+ # Common location indicators
70
+ LOCATION_INDICATORS = {
71
+ # Vietnamese
72
+ "ở",
73
+ "tại",
74
+ "đến",
75
+ "từ",
76
+ "quán",
77
+ "cafe",
78
+ "cà phê",
79
+ "nhà hàng",
80
+ "công ty",
81
+ "văn phòng",
82
+ # English
83
+ "at",
84
+ "in",
85
+ "to",
86
+ "from",
87
+ "restaurant",
88
+ "office",
89
+ "building",
90
+ "hotel",
91
+ "shop",
92
+ "store",
93
+ }
94
+
95
+ # Pattern for capitalized words (potential entities)
96
+ CAPITALIZED_PATTERN = re.compile(r"\b([A-Z][a-zA-Z]+(?:\s+[A-Z][a-zA-Z]+)*)\b")
97
+
98
+ # Pattern for Vietnamese names (words after person prefixes)
99
+ VI_NAME_PATTERN = re.compile(
100
+ r"\b(?:anh|chị|em|bạn|cô|chú|bác|ông|bà)\s+([A-ZÀ-Ỹ][a-zà-ỹ]+(?:\s+[A-ZÀ-Ỹ][a-zà-ỹ]+)*)",
101
+ re.IGNORECASE,
102
+ )
103
+
104
+ def __init__(self, use_nlp: bool = False) -> None:
105
+ """
106
+ Initialize the extractor.
107
+
108
+ Args:
109
+ use_nlp: If True, try to use spaCy/underthesea (not implemented yet)
110
+ """
111
+ self._use_nlp = use_nlp
112
+ self._nlp_en = None
113
+ self._nlp_vi = None
114
+
115
+ if use_nlp:
116
+ self._init_nlp()
117
+
118
+ def _init_nlp(self) -> None:
119
+ """Initialize NLP models if available."""
120
+ # Try to load spaCy for English
121
+ try:
122
+ import spacy
123
+
124
+ self._nlp_en = spacy.load("en_core_web_sm")
125
+ except (ImportError, OSError):
126
+ pass
127
+
128
+ # Try to load underthesea for Vietnamese
129
+ try:
130
+ import underthesea
131
+
132
+ self._nlp_vi = underthesea
133
+ except ImportError:
134
+ pass
135
+
136
+ def extract(
137
+ self,
138
+ text: str,
139
+ language: str = "auto",
140
+ ) -> list[Entity]:
141
+ """
142
+ Extract entities from text.
143
+
144
+ Args:
145
+ text: The text to extract from
146
+ language: "vi", "en", or "auto"
147
+
148
+ Returns:
149
+ List of Entity objects
150
+ """
151
+ entities: list[Entity] = []
152
+
153
+ # Try NLP-based extraction first
154
+ if self._use_nlp:
155
+ nlp_entities = self._extract_with_nlp(text, language)
156
+ if nlp_entities:
157
+ return nlp_entities
158
+
159
+ # Fall back to pattern-based extraction
160
+ entities.extend(self._extract_vietnamese_names(text))
161
+ entities.extend(self._extract_capitalized_words(text, entities))
162
+ entities.extend(self._extract_locations(text, entities))
163
+
164
+ # Remove duplicates
165
+ seen: set[str] = set()
166
+ unique: list[Entity] = []
167
+ for entity in entities:
168
+ key = f"{entity.text.lower()}:{entity.type}"
169
+ if key not in seen:
170
+ seen.add(key)
171
+ unique.append(entity)
172
+
173
+ return unique
174
+
175
+ def _extract_with_nlp(
176
+ self,
177
+ text: str,
178
+ language: str,
179
+ ) -> list[Entity] | None:
180
+ """Try to extract using NLP models."""
181
+ if language in ("en", "auto") and self._nlp_en:
182
+ doc = self._nlp_en(text)
183
+ entities = []
184
+ for ent in doc.ents:
185
+ entity_type = self._map_spacy_type(ent.label_)
186
+ if entity_type:
187
+ entities.append(
188
+ Entity(
189
+ text=ent.text,
190
+ type=entity_type,
191
+ start=ent.start_char,
192
+ end=ent.end_char,
193
+ confidence=0.9,
194
+ )
195
+ )
196
+ if entities:
197
+ return entities
198
+
199
+ if language in ("vi", "auto") and self._nlp_vi:
200
+ try:
201
+ ner_results = self._nlp_vi.ner(text)
202
+ entities = []
203
+ for word, tag in ner_results:
204
+ if tag.startswith("B-") or tag.startswith("I-"):
205
+ entity_type = self._map_underthesea_type(tag[2:])
206
+ if entity_type:
207
+ # Find position in text
208
+ start = text.find(word)
209
+ if start >= 0:
210
+ entities.append(
211
+ Entity(
212
+ text=word,
213
+ type=entity_type,
214
+ start=start,
215
+ end=start + len(word),
216
+ confidence=0.85,
217
+ )
218
+ )
219
+ if entities:
220
+ return entities
221
+ except Exception:
222
+ pass
223
+
224
+ return None
225
+
226
+ def _map_spacy_type(self, label: str) -> EntityType | None:
227
+ """Map spaCy NER label to EntityType."""
228
+ mapping = {
229
+ "PERSON": EntityType.PERSON,
230
+ "PER": EntityType.PERSON,
231
+ "GPE": EntityType.LOCATION,
232
+ "LOC": EntityType.LOCATION,
233
+ "FAC": EntityType.LOCATION,
234
+ "ORG": EntityType.ORGANIZATION,
235
+ "PRODUCT": EntityType.PRODUCT,
236
+ "EVENT": EntityType.EVENT,
237
+ }
238
+ return mapping.get(label)
239
+
240
+ def _map_underthesea_type(self, label: str) -> EntityType | None:
241
+ """Map underthesea NER label to EntityType."""
242
+ mapping = {
243
+ "PER": EntityType.PERSON,
244
+ "LOC": EntityType.LOCATION,
245
+ "ORG": EntityType.ORGANIZATION,
246
+ }
247
+ return mapping.get(label)
248
+
249
+ def _extract_vietnamese_names(self, text: str) -> list[Entity]:
250
+ """Extract Vietnamese person names."""
251
+ entities = []
252
+
253
+ for match in self.VI_NAME_PATTERN.finditer(text):
254
+ name = match.group(1)
255
+ entities.append(
256
+ Entity(
257
+ text=name,
258
+ type=EntityType.PERSON,
259
+ start=match.start(1),
260
+ end=match.end(1),
261
+ confidence=0.8,
262
+ )
263
+ )
264
+
265
+ return entities
266
+
267
+ def _extract_capitalized_words(
268
+ self,
269
+ text: str,
270
+ existing: list[Entity],
271
+ ) -> list[Entity]:
272
+ """Extract capitalized words as potential entities."""
273
+ entities = []
274
+ existing_spans = {(e.start, e.end) for e in existing}
275
+
276
+ for match in self.CAPITALIZED_PATTERN.finditer(text):
277
+ # Skip if already extracted
278
+ if (match.start(), match.end()) in existing_spans:
279
+ continue
280
+
281
+ word = match.group(1)
282
+
283
+ # Skip common words
284
+ if word.lower() in {"the", "a", "an", "i", "my", "we", "they"}:
285
+ continue
286
+
287
+ # Skip if at start of sentence (could be just capitalization)
288
+ if match.start() == 0 or text[match.start() - 1] in ".!?\n":
289
+ # Still include if it looks like a proper noun
290
+ if len(word.split()) == 1 and len(word) < 4:
291
+ continue
292
+
293
+ entities.append(
294
+ Entity(
295
+ text=word,
296
+ type=EntityType.UNKNOWN,
297
+ start=match.start(),
298
+ end=match.end(),
299
+ confidence=0.5,
300
+ )
301
+ )
302
+
303
+ return entities
304
+
305
+ def _extract_locations(
306
+ self,
307
+ text: str,
308
+ existing: list[Entity],
309
+ ) -> list[Entity]:
310
+ """Extract locations based on context indicators."""
311
+ entities = []
312
+ existing_texts = {e.text.lower() for e in existing}
313
+
314
+ # Find words after location indicators
315
+ for indicator in self.LOCATION_INDICATORS:
316
+ pattern = re.compile(
317
+ rf"\b{re.escape(indicator)}\s+([A-ZÀ-Ỹ][a-zà-ỹA-ZÀ-Ỹ\s]+?)(?:[,.]|\s+(?:với|và|to|with|for)|$)",
318
+ re.IGNORECASE,
319
+ )
320
+
321
+ for match in pattern.finditer(text):
322
+ location = match.group(1).strip()
323
+
324
+ if location.lower() in existing_texts:
325
+ continue
326
+
327
+ if len(location) < 2:
328
+ continue
329
+
330
+ entities.append(
331
+ Entity(
332
+ text=location,
333
+ type=EntityType.LOCATION,
334
+ start=match.start(1),
335
+ end=match.start(1) + len(location),
336
+ confidence=0.7,
337
+ )
338
+ )
339
+
340
+ return entities
341
+
342
+
343
+ def extract_keywords(text: str, min_length: int = 2) -> list[str]:
344
+ """
345
+ Extract keywords from text (simple word extraction).
346
+
347
+ This is a basic keyword extractor. For better results,
348
+ consider using TF-IDF or other NLP techniques.
349
+
350
+ Args:
351
+ text: The text to extract from
352
+ min_length: Minimum word length
353
+
354
+ Returns:
355
+ List of keywords
356
+ """
357
+ # Common stop words (English + Vietnamese)
358
+ stop_words = {
359
+ # English
360
+ "the",
361
+ "a",
362
+ "an",
363
+ "is",
364
+ "are",
365
+ "was",
366
+ "were",
367
+ "be",
368
+ "been",
369
+ "being",
370
+ "have",
371
+ "has",
372
+ "had",
373
+ "do",
374
+ "does",
375
+ "did",
376
+ "will",
377
+ "would",
378
+ "could",
379
+ "should",
380
+ "may",
381
+ "might",
382
+ "must",
383
+ "shall",
384
+ "can",
385
+ "need",
386
+ "dare",
387
+ "ought",
388
+ "used",
389
+ "to",
390
+ "of",
391
+ "in",
392
+ "for",
393
+ "on",
394
+ "with",
395
+ "at",
396
+ "by",
397
+ "from",
398
+ "as",
399
+ "into",
400
+ "through",
401
+ "during",
402
+ "before",
403
+ "after",
404
+ "above",
405
+ "below",
406
+ "between",
407
+ "under",
408
+ "again",
409
+ "further",
410
+ "then",
411
+ "once",
412
+ "here",
413
+ "there",
414
+ "when",
415
+ "where",
416
+ "why",
417
+ "how",
418
+ "all",
419
+ "each",
420
+ "few",
421
+ "more",
422
+ "most",
423
+ "other",
424
+ "some",
425
+ "such",
426
+ "no",
427
+ "nor",
428
+ "not",
429
+ "only",
430
+ "own",
431
+ "same",
432
+ "so",
433
+ "than",
434
+ "too",
435
+ "very",
436
+ "just",
437
+ "and",
438
+ "but",
439
+ "if",
440
+ "or",
441
+ "because",
442
+ "until",
443
+ "while",
444
+ "this",
445
+ "that",
446
+ "these",
447
+ "those",
448
+ "i",
449
+ "me",
450
+ "my",
451
+ "myself",
452
+ "we",
453
+ "our",
454
+ "ours",
455
+ "ourselves",
456
+ "you",
457
+ "your",
458
+ "yours",
459
+ "yourself",
460
+ "he",
461
+ "him",
462
+ "his",
463
+ "himself",
464
+ "she",
465
+ "her",
466
+ "hers",
467
+ "herself",
468
+ "it",
469
+ "its",
470
+ "itself",
471
+ "they",
472
+ "them",
473
+ "their",
474
+ "theirs",
475
+ "what",
476
+ "which",
477
+ "who",
478
+ "whom",
479
+ # Vietnamese
480
+ "và",
481
+ "của",
482
+ "là",
483
+ "có",
484
+ "được",
485
+ "cho",
486
+ "với",
487
+ "này",
488
+ "trong",
489
+ "để",
490
+ "các",
491
+ "những",
492
+ "một",
493
+ "đã",
494
+ "tôi",
495
+ "bạn",
496
+ "anh",
497
+ "chị",
498
+ "em",
499
+ "ở",
500
+ "tại",
501
+ "khi",
502
+ "thì",
503
+ "mà",
504
+ "nếu",
505
+ "vì",
506
+ "cũng",
507
+ "như",
508
+ "từ",
509
+ "đến",
510
+ "lại",
511
+ "ra",
512
+ "vào",
513
+ "lên",
514
+ "xuống",
515
+ "rồi",
516
+ "sẽ",
517
+ "đang",
518
+ "vẫn",
519
+ "còn",
520
+ "chỉ",
521
+ "rất",
522
+ "quá",
523
+ "làm",
524
+ "gì",
525
+ "sao",
526
+ "nào",
527
+ "đâu",
528
+ "ai",
529
+ "bao",
530
+ "nhiêu",
531
+ }
532
+
533
+ # Tokenize (simple split)
534
+ words = re.findall(r"\b[a-zA-ZÀ-ỹ]+\b", text.lower())
535
+
536
+ # Filter
537
+ keywords = [w for w in words if len(w) >= min_length and w not in stop_words]
538
+
539
+ # Remove duplicates while preserving order
540
+ seen: set[str] = set()
541
+ unique: list[str] = []
542
+ for word in keywords:
543
+ if word not in seen:
544
+ seen.add(word)
545
+ unique.append(word)
546
+
547
+ return unique