memorygraphMCP 0.11.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. memorygraph/__init__.py +50 -0
  2. memorygraph/__main__.py +12 -0
  3. memorygraph/advanced_tools.py +509 -0
  4. memorygraph/analytics/__init__.py +46 -0
  5. memorygraph/analytics/advanced_queries.py +727 -0
  6. memorygraph/backends/__init__.py +21 -0
  7. memorygraph/backends/base.py +179 -0
  8. memorygraph/backends/cloud.py +75 -0
  9. memorygraph/backends/cloud_backend.py +858 -0
  10. memorygraph/backends/factory.py +577 -0
  11. memorygraph/backends/falkordb_backend.py +749 -0
  12. memorygraph/backends/falkordblite_backend.py +746 -0
  13. memorygraph/backends/ladybugdb_backend.py +242 -0
  14. memorygraph/backends/memgraph_backend.py +327 -0
  15. memorygraph/backends/neo4j_backend.py +298 -0
  16. memorygraph/backends/sqlite_fallback.py +463 -0
  17. memorygraph/backends/turso.py +448 -0
  18. memorygraph/cli.py +743 -0
  19. memorygraph/cloud_database.py +297 -0
  20. memorygraph/config.py +295 -0
  21. memorygraph/database.py +933 -0
  22. memorygraph/graph_analytics.py +631 -0
  23. memorygraph/integration/__init__.py +69 -0
  24. memorygraph/integration/context_capture.py +426 -0
  25. memorygraph/integration/project_analysis.py +583 -0
  26. memorygraph/integration/workflow_tracking.py +492 -0
  27. memorygraph/intelligence/__init__.py +59 -0
  28. memorygraph/intelligence/context_retrieval.py +447 -0
  29. memorygraph/intelligence/entity_extraction.py +386 -0
  30. memorygraph/intelligence/pattern_recognition.py +420 -0
  31. memorygraph/intelligence/temporal.py +374 -0
  32. memorygraph/migration/__init__.py +27 -0
  33. memorygraph/migration/manager.py +579 -0
  34. memorygraph/migration/models.py +142 -0
  35. memorygraph/migration/scripts/__init__.py +17 -0
  36. memorygraph/migration/scripts/bitemporal_migration.py +595 -0
  37. memorygraph/migration/scripts/multitenancy_migration.py +452 -0
  38. memorygraph/migration_tools_module.py +146 -0
  39. memorygraph/models.py +684 -0
  40. memorygraph/proactive/__init__.py +46 -0
  41. memorygraph/proactive/outcome_learning.py +444 -0
  42. memorygraph/proactive/predictive.py +410 -0
  43. memorygraph/proactive/session_briefing.py +399 -0
  44. memorygraph/relationships.py +668 -0
  45. memorygraph/server.py +883 -0
  46. memorygraph/sqlite_database.py +1876 -0
  47. memorygraph/tools/__init__.py +59 -0
  48. memorygraph/tools/activity_tools.py +262 -0
  49. memorygraph/tools/memory_tools.py +315 -0
  50. memorygraph/tools/migration_tools.py +181 -0
  51. memorygraph/tools/relationship_tools.py +147 -0
  52. memorygraph/tools/search_tools.py +406 -0
  53. memorygraph/tools/temporal_tools.py +339 -0
  54. memorygraph/utils/__init__.py +10 -0
  55. memorygraph/utils/context_extractor.py +429 -0
  56. memorygraph/utils/error_handling.py +151 -0
  57. memorygraph/utils/export_import.py +425 -0
  58. memorygraph/utils/graph_algorithms.py +200 -0
  59. memorygraph/utils/pagination.py +149 -0
  60. memorygraph/utils/project_detection.py +133 -0
  61. memorygraphmcp-0.11.7.dist-info/METADATA +970 -0
  62. memorygraphmcp-0.11.7.dist-info/RECORD +65 -0
  63. memorygraphmcp-0.11.7.dist-info/WHEEL +4 -0
  64. memorygraphmcp-0.11.7.dist-info/entry_points.txt +2 -0
  65. memorygraphmcp-0.11.7.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,386 @@
1
+ """
2
+ Entity Extraction - Automatic entity identification and linking.
3
+
4
+ This module extracts entities from memory content using regex patterns
5
+ and optional NLP models. Supports file paths, functions, classes, errors,
6
+ technologies, concepts, and more.
7
+ """
8
+
9
+ import re
10
+ import logging
11
+ from enum import Enum
12
+ from typing import Optional
13
+ from datetime import datetime
14
+ from pydantic import BaseModel, Field
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class EntityType(Enum):
20
+ """Types of entities that can be extracted from memory content."""
21
+
22
+ FILE = "file" # /path/to/file.py, file.txt
23
+ FUNCTION = "function" # function_name(), methodName()
24
+ CLASS = "class" # ClassName, ComponentName
25
+ ERROR = "error" # ErrorType, Exception, error codes
26
+ TECHNOLOGY = "technology" # Python, React, PostgreSQL
27
+ CONCEPT = "concept" # authentication, caching, CORS
28
+ PERSON = "person" # @username, developer names
29
+ PROJECT = "project" # project/repo names
30
+ COMMAND = "command" # CLI commands
31
+ PACKAGE = "package" # npm/pip package names
32
+ URL = "url" # HTTP(S) URLs
33
+ VARIABLE = "variable" # variable_name, CONSTANT_NAME
34
+
35
+
36
+ class Entity(BaseModel):
37
+ """Represents an extracted entity."""
38
+
39
+ text: str = Field(..., description="The extracted entity text")
40
+ entity_type: EntityType = Field(..., description="Type of the entity")
41
+ confidence: float = Field(default=1.0, ge=0.0, le=1.0, description="Extraction confidence")
42
+ context: Optional[str] = Field(None, description="Surrounding context")
43
+ start_pos: Optional[int] = Field(None, description="Start position in text")
44
+ end_pos: Optional[int] = Field(None, description="End position in text")
45
+
46
+
47
+ class EntityExtractor:
48
+ """Extracts entities from text using regex patterns."""
49
+
50
+ # Regex patterns for different entity types
51
+ PATTERNS = {
52
+ EntityType.FILE: [
53
+ # Absolute paths: /path/to/file.py
54
+ r"(?:/[\w\-./]+)",
55
+ # Relative paths with extension: src/file.py
56
+ r"(?:[\w\-./]+\.[\w]+)",
57
+ # Windows paths: C:\path\to\file.py
58
+ r"(?:[A-Z]:\\[\w\-\\./]+)",
59
+ ],
60
+ EntityType.FUNCTION: [
61
+ # function_name()
62
+ r"\b([a-z_]\w*)\(\)",
63
+ # methodName()
64
+ r"\b([a-z]\w*[A-Z]\w*)\(\)",
65
+ ],
66
+ EntityType.CLASS: [
67
+ # ClassName, Handler, Service, Manager, etc.
68
+ r"\b([A-Z][\w]*(?:Class|Handler|Service|Manager|Controller|Provider|Factory|Builder|Strategy|Adapter|Facade|Proxy|Decorator|Observer|Singleton|Component|Module|Store|Action|Reducer|Hook|Context))\b",
69
+ # Generic PascalCase
70
+ r"\b([A-Z][a-z]+(?:[A-Z][a-z]+)+)\b",
71
+ ],
72
+ EntityType.ERROR: [
73
+ # *Error, *Exception
74
+ r"\b(\w*(?:Error|Exception))\b",
75
+ # HTTP status codes
76
+ r"\b([45]\d{2})\b",
77
+ # Error codes like ERR_*, E_*
78
+ r"\b(E(?:RR)?_[\w_]+)\b",
79
+ ],
80
+ EntityType.TECHNOLOGY: [
81
+ # Programming languages
82
+ r"\b(Python|JavaScript|TypeScript|Java|Kotlin|Swift|Go|Rust|C\+\+|C#|Ruby|PHP|Scala|Haskell|Elixir|Clojure|Erlang)\b",
83
+ # Frameworks
84
+ r"\b(React|Vue|Angular|Django|Flask|FastAPI|Express|Spring|Rails|Laravel|Symfony|Nest\.?js|Next\.?js|Nuxt\.?js|Svelte|Solid)\b",
85
+ # Databases
86
+ r"\b(PostgreSQL|MySQL|MongoDB|Redis|Neo4j|Memgraph|SQLite|DynamoDB|Cassandra|CouchDB|Elasticsearch|MariaDB|Oracle|MSSQL)\b",
87
+ # Cloud/Infrastructure
88
+ r"\b(AWS|Azure|GCP|Docker|Kubernetes|Terraform|Ansible|Jenkins|GitHub|GitLab|CircleCI|Travis)\b",
89
+ ],
90
+ EntityType.CONCEPT: [
91
+ # Common programming concepts
92
+ r"\b(authentication|authorization|caching|logging|testing|debugging|deployment|migration|refactoring|optimization|validation|serialization|deserialization|encryption|decryption|compression|decompression)\b",
93
+ # Architecture patterns
94
+ r"\b(MVC|MVVM|MVP|REST|GraphQL|gRPC|microservices|monolith|serverless|event-driven|CQRS|DDD|hexagonal|clean architecture)\b",
95
+ # Security concepts
96
+ r"\b(CORS|XSS|CSRF|SQL injection|JWT|OAuth|SAML|TLS|SSL|HTTPS|firewall|WAF)\b",
97
+ ],
98
+ EntityType.COMMAND: [
99
+ # Commands in backticks or quotes
100
+ r"`([^`]+)`",
101
+ r'"([^"]+)"' + r'\s*(?:command|cmd|run|exec)',
102
+ ],
103
+ EntityType.PACKAGE: [
104
+ # npm/pip packages
105
+ r"\b((?:@[\w\-]+\/)?[\w\-]+)\b(?=\s*(?:package|library|module|dependency))",
106
+ # Common package patterns
107
+ r"\b(react-\w+|vue-\w+|@types/\w+|webpack-\w+|babel-\w+|eslint-\w+|pytest-\w+)\b",
108
+ ],
109
+ EntityType.URL: [
110
+ # HTTP(S) URLs
111
+ r"https?://[\w\-./]+(?:\?[\w\-=&]*)?",
112
+ ],
113
+ EntityType.VARIABLE: [
114
+ # CONSTANT_NAME
115
+ r"\b([A-Z][A-Z0-9_]{2,})\b",
116
+ # snake_case
117
+ r"\b([a-z_]\w*[a-z]\w*)\b(?=\s*[:=])",
118
+ ],
119
+ }
120
+
121
+ def __init__(self, enable_nlp: bool = False):
122
+ """
123
+ Initialize the entity extractor.
124
+
125
+ Args:
126
+ enable_nlp: Enable NLP-based extraction (requires spaCy). Default: False
127
+ """
128
+ self.enable_nlp = enable_nlp
129
+ self.nlp_model = None
130
+
131
+ if enable_nlp:
132
+ try:
133
+ import spacy # type: ignore
134
+
135
+ self.nlp_model = spacy.load("en_core_web_sm")
136
+ logger.info("NLP entity extraction enabled")
137
+ except (ImportError, OSError):
138
+ logger.warning(
139
+ "spaCy not available, falling back to regex-only extraction. "
140
+ "Install with: pip install spacy && python -m spacy download en_core_web_sm"
141
+ )
142
+ self.enable_nlp = False
143
+
144
+ def extract(self, text: str, min_confidence: float = 0.5) -> list[Entity]:
145
+ """
146
+ Extract entities from text.
147
+
148
+ Args:
149
+ text: Text to extract entities from
150
+ min_confidence: Minimum confidence threshold (0.0-1.0)
151
+
152
+ Returns:
153
+ List of extracted entities
154
+ """
155
+ entities: list[Entity] = []
156
+
157
+ # Extract using regex patterns
158
+ entities.extend(self._extract_with_regex(text))
159
+
160
+ # Extract using NLP if enabled
161
+ if self.enable_nlp and self.nlp_model:
162
+ entities.extend(self._extract_with_nlp(text))
163
+
164
+ # Deduplicate and filter by confidence
165
+ entities = self._deduplicate(entities)
166
+ entities = [e for e in entities if e.confidence >= min_confidence]
167
+
168
+ return entities
169
+
170
+ def _extract_with_regex(self, text: str) -> list[Entity]:
171
+ """Extract entities using regex patterns."""
172
+ entities: list[Entity] = []
173
+
174
+ for entity_type, patterns in self.PATTERNS.items():
175
+ for pattern in patterns:
176
+ for match in re.finditer(pattern, text, re.IGNORECASE):
177
+ entity_text = match.group(1) if match.groups() else match.group(0)
178
+
179
+ # Skip very short or very long matches
180
+ if len(entity_text) < 2 or len(entity_text) > 100:
181
+ continue
182
+
183
+ # Calculate confidence based on pattern specificity
184
+ confidence = self._calculate_confidence(entity_type, entity_text, text)
185
+
186
+ # Extract context (50 chars before and after)
187
+ start = max(0, match.start() - 50)
188
+ end = min(len(text), match.end() + 50)
189
+ context = text[start:end]
190
+
191
+ entities.append(
192
+ Entity(
193
+ text=entity_text,
194
+ entity_type=entity_type,
195
+ confidence=confidence,
196
+ context=context,
197
+ start_pos=match.start(),
198
+ end_pos=match.end(),
199
+ )
200
+ )
201
+
202
+ return entities
203
+
204
+ def _extract_with_nlp(self, text: str) -> list[Entity]:
205
+ """Extract entities using NLP (spaCy)."""
206
+ entities: list[Entity] = []
207
+
208
+ if not self.nlp_model:
209
+ return entities
210
+
211
+ doc = self.nlp_model(text)
212
+
213
+ # Map spaCy entity types to our EntityType
214
+ nlp_type_mapping = {
215
+ "PERSON": EntityType.PERSON,
216
+ "ORG": EntityType.PROJECT, # Organizations often map to projects
217
+ "PRODUCT": EntityType.TECHNOLOGY,
218
+ "GPE": EntityType.CONCEPT, # Geopolitical entities as concepts
219
+ }
220
+
221
+ for ent in doc.ents:
222
+ if ent.label_ in nlp_type_mapping:
223
+ entities.append(
224
+ Entity(
225
+ text=ent.text,
226
+ entity_type=nlp_type_mapping[ent.label_],
227
+ confidence=0.8, # NLP confidence is generally high
228
+ context=ent.sent.text if ent.sent else None,
229
+ start_pos=ent.start_char,
230
+ end_pos=ent.end_char,
231
+ )
232
+ )
233
+
234
+ return entities
235
+
236
+ def _calculate_confidence(self, entity_type: EntityType, text: str, full_text: str) -> float:
237
+ """
238
+ Calculate extraction confidence based on entity type and context.
239
+
240
+ Args:
241
+ entity_type: Type of entity
242
+ text: Extracted entity text
243
+ full_text: Full text being analyzed
244
+
245
+ Returns:
246
+ Confidence score (0.0-1.0)
247
+ """
248
+ confidence = 0.7 # Base confidence
249
+
250
+ # Boost confidence for specific patterns
251
+ if entity_type == EntityType.FILE:
252
+ if text.endswith((".py", ".js", ".ts", ".jsx", ".tsx", ".md", ".txt", ".json", ".yaml", ".yml")):
253
+ confidence = 0.95
254
+ elif "/" in text or "\\" in text:
255
+ confidence = 0.85
256
+
257
+ elif entity_type == EntityType.FUNCTION:
258
+ if "()" in text:
259
+ confidence = 0.9
260
+
261
+ elif entity_type == EntityType.CLASS:
262
+ # Higher confidence for known suffixes
263
+ if any(text.endswith(suffix) for suffix in ["Handler", "Service", "Manager", "Controller"]):
264
+ confidence = 0.95
265
+ else:
266
+ confidence = 0.75
267
+
268
+ elif entity_type == EntityType.ERROR:
269
+ if text.endswith(("Error", "Exception")):
270
+ confidence = 0.95
271
+ elif re.match(r"[45]\d{2}", text): # HTTP status codes
272
+ confidence = 0.9
273
+
274
+ elif entity_type == EntityType.TECHNOLOGY:
275
+ # Known technologies have high confidence
276
+ confidence = 0.95
277
+
278
+ elif entity_type == EntityType.URL:
279
+ confidence = 0.99
280
+
281
+ elif entity_type == EntityType.COMMAND:
282
+ # Commands in backticks are very reliable
283
+ confidence = 0.9
284
+
285
+ return min(confidence, 1.0)
286
+
287
+ def _deduplicate(self, entities: list[Entity]) -> list[Entity]:
288
+ """Remove duplicate entities, keeping highest confidence."""
289
+ seen: dict[tuple[str, EntityType], Entity] = {}
290
+
291
+ for entity in entities:
292
+ key = (entity.text.lower(), entity.entity_type)
293
+
294
+ if key not in seen or entity.confidence > seen[key].confidence:
295
+ seen[key] = entity
296
+
297
+ return list(seen.values())
298
+
299
+
300
+ # Singleton instance for convenience
301
+ _default_extractor = EntityExtractor()
302
+
303
+
304
+ def extract_entities(text: str, min_confidence: float = 0.5) -> list[Entity]:
305
+ """
306
+ Extract entities from text using the default extractor.
307
+
308
+ Args:
309
+ text: Text to extract entities from
310
+ min_confidence: Minimum confidence threshold (0.0-1.0)
311
+
312
+ Returns:
313
+ List of extracted entities
314
+
315
+ Example:
316
+ >>> entities = extract_entities("Fixed authentication bug in src/auth.py")
317
+ >>> for entity in entities:
318
+ ... print(f"{entity.entity_type.value}: {entity.text}")
319
+ file: src/auth.py
320
+ concept: authentication
321
+ """
322
+ return _default_extractor.extract(text, min_confidence)
323
+
324
+
325
+ async def link_entities(
326
+ backend,
327
+ memory_id: str,
328
+ entities: list[Entity],
329
+ ) -> list[str]:
330
+ """
331
+ Link extracted entities to a memory by creating entity nodes and MENTIONS relationships.
332
+
333
+ Args:
334
+ backend: Database backend instance
335
+ memory_id: ID of the memory to link entities to
336
+ entities: List of entities to link
337
+
338
+ Returns:
339
+ List of created entity IDs
340
+
341
+ Example:
342
+ >>> entities = extract_entities("Fixed React hooks issue")
343
+ >>> entity_ids = await link_entities(backend, memory_id, entities)
344
+ """
345
+ entity_ids: list[str] = []
346
+
347
+ for entity in entities:
348
+ # Create or find entity node
349
+ query = """
350
+ MERGE (e:Entity {text: $text, type: $type})
351
+ ON CREATE SET
352
+ e.id = randomUUID(),
353
+ e.created_at = datetime(),
354
+ e.occurrence_count = 1
355
+ ON MATCH SET
356
+ e.occurrence_count = e.occurrence_count + 1,
357
+ e.last_seen = datetime()
358
+ WITH e
359
+ MATCH (m:Memory {id: $memory_id})
360
+ MERGE (m)-[r:MENTIONS]->(e)
361
+ ON CREATE SET
362
+ r.confidence = $confidence,
363
+ r.created_at = datetime()
364
+ RETURN e.id as entity_id
365
+ """
366
+
367
+ params = {
368
+ "text": entity.text,
369
+ "type": entity.entity_type.value,
370
+ "memory_id": memory_id,
371
+ "confidence": entity.confidence,
372
+ }
373
+
374
+ try:
375
+ result = await backend.execute_query(query, params)
376
+ if result:
377
+ entity_ids.append(result[0]["entity_id"])
378
+ logger.debug(
379
+ f"Linked entity '{entity.text}' ({entity.entity_type.value}) "
380
+ f"to memory {memory_id}"
381
+ )
382
+ except Exception as e:
383
+ logger.error(f"Failed to link entity '{entity.text}': {e}")
384
+ continue
385
+
386
+ return entity_ids