alma-memory 0.5.1__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. alma/__init__.py +296 -226
  2. alma/compression/__init__.py +33 -0
  3. alma/compression/pipeline.py +980 -0
  4. alma/confidence/__init__.py +47 -47
  5. alma/confidence/engine.py +540 -540
  6. alma/confidence/types.py +351 -351
  7. alma/config/loader.py +157 -157
  8. alma/consolidation/__init__.py +23 -23
  9. alma/consolidation/engine.py +678 -678
  10. alma/consolidation/prompts.py +84 -84
  11. alma/core.py +1189 -430
  12. alma/domains/__init__.py +30 -30
  13. alma/domains/factory.py +359 -359
  14. alma/domains/schemas.py +448 -448
  15. alma/domains/types.py +272 -272
  16. alma/events/__init__.py +75 -75
  17. alma/events/emitter.py +285 -284
  18. alma/events/storage_mixin.py +246 -246
  19. alma/events/types.py +126 -126
  20. alma/events/webhook.py +425 -425
  21. alma/exceptions.py +49 -49
  22. alma/extraction/__init__.py +31 -31
  23. alma/extraction/auto_learner.py +265 -265
  24. alma/extraction/extractor.py +420 -420
  25. alma/graph/__init__.py +106 -106
  26. alma/graph/backends/__init__.py +32 -32
  27. alma/graph/backends/kuzu.py +624 -624
  28. alma/graph/backends/memgraph.py +432 -432
  29. alma/graph/backends/memory.py +236 -236
  30. alma/graph/backends/neo4j.py +417 -417
  31. alma/graph/base.py +159 -159
  32. alma/graph/extraction.py +198 -198
  33. alma/graph/store.py +860 -860
  34. alma/harness/__init__.py +35 -35
  35. alma/harness/base.py +386 -386
  36. alma/harness/domains.py +705 -705
  37. alma/initializer/__init__.py +37 -37
  38. alma/initializer/initializer.py +418 -418
  39. alma/initializer/types.py +250 -250
  40. alma/integration/__init__.py +62 -62
  41. alma/integration/claude_agents.py +444 -444
  42. alma/integration/helena.py +423 -423
  43. alma/integration/victor.py +471 -471
  44. alma/learning/__init__.py +101 -86
  45. alma/learning/decay.py +878 -0
  46. alma/learning/forgetting.py +1446 -1446
  47. alma/learning/heuristic_extractor.py +390 -390
  48. alma/learning/protocols.py +374 -374
  49. alma/learning/validation.py +346 -346
  50. alma/mcp/__init__.py +123 -45
  51. alma/mcp/__main__.py +156 -156
  52. alma/mcp/resources.py +122 -122
  53. alma/mcp/server.py +955 -591
  54. alma/mcp/tools.py +3254 -509
  55. alma/observability/__init__.py +91 -84
  56. alma/observability/config.py +302 -302
  57. alma/observability/guidelines.py +170 -0
  58. alma/observability/logging.py +424 -424
  59. alma/observability/metrics.py +583 -583
  60. alma/observability/tracing.py +440 -440
  61. alma/progress/__init__.py +21 -21
  62. alma/progress/tracker.py +607 -607
  63. alma/progress/types.py +250 -250
  64. alma/retrieval/__init__.py +134 -53
  65. alma/retrieval/budget.py +525 -0
  66. alma/retrieval/cache.py +1304 -1061
  67. alma/retrieval/embeddings.py +202 -202
  68. alma/retrieval/engine.py +850 -427
  69. alma/retrieval/modes.py +365 -0
  70. alma/retrieval/progressive.py +560 -0
  71. alma/retrieval/scoring.py +344 -344
  72. alma/retrieval/trust_scoring.py +637 -0
  73. alma/retrieval/verification.py +797 -0
  74. alma/session/__init__.py +19 -19
  75. alma/session/manager.py +442 -399
  76. alma/session/types.py +288 -288
  77. alma/storage/__init__.py +101 -90
  78. alma/storage/archive.py +233 -0
  79. alma/storage/azure_cosmos.py +1259 -1259
  80. alma/storage/base.py +1083 -583
  81. alma/storage/chroma.py +1443 -1443
  82. alma/storage/constants.py +103 -103
  83. alma/storage/file_based.py +614 -614
  84. alma/storage/migrations/__init__.py +21 -21
  85. alma/storage/migrations/base.py +321 -321
  86. alma/storage/migrations/runner.py +323 -323
  87. alma/storage/migrations/version_stores.py +337 -337
  88. alma/storage/migrations/versions/__init__.py +11 -11
  89. alma/storage/migrations/versions/v1_0_0.py +373 -373
  90. alma/storage/migrations/versions/v1_1_0_workflow_context.py +551 -0
  91. alma/storage/pinecone.py +1080 -1080
  92. alma/storage/postgresql.py +1948 -1559
  93. alma/storage/qdrant.py +1306 -1306
  94. alma/storage/sqlite_local.py +3041 -1457
  95. alma/testing/__init__.py +46 -46
  96. alma/testing/factories.py +301 -301
  97. alma/testing/mocks.py +389 -389
  98. alma/types.py +292 -264
  99. alma/utils/__init__.py +19 -0
  100. alma/utils/tokenizer.py +521 -0
  101. alma/workflow/__init__.py +83 -0
  102. alma/workflow/artifacts.py +170 -0
  103. alma/workflow/checkpoint.py +311 -0
  104. alma/workflow/context.py +228 -0
  105. alma/workflow/outcomes.py +189 -0
  106. alma/workflow/reducers.py +393 -0
  107. {alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/METADATA +210 -72
  108. alma_memory-0.7.0.dist-info/RECORD +112 -0
  109. alma_memory-0.5.1.dist-info/RECORD +0 -93
  110. {alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/WHEEL +0 -0
  111. {alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/top_level.txt +0 -0
@@ -1,202 +1,202 @@
1
- """
2
- ALMA Embedding Providers.
3
-
4
- Supports local (sentence-transformers) and Azure OpenAI embeddings.
5
- """
6
-
7
- import logging
8
- from abc import ABC, abstractmethod
9
- from typing import List, Optional
10
-
11
- logger = logging.getLogger(__name__)
12
-
13
-
14
- class EmbeddingProvider(ABC):
15
- """Abstract base class for embedding providers."""
16
-
17
- @abstractmethod
18
- def encode(self, text: str) -> List[float]:
19
- """Generate embedding for text."""
20
- pass
21
-
22
- @abstractmethod
23
- def encode_batch(self, texts: List[str]) -> List[List[float]]:
24
- """Generate embeddings for multiple texts."""
25
- pass
26
-
27
- @property
28
- @abstractmethod
29
- def dimension(self) -> int:
30
- """Return embedding dimension."""
31
- pass
32
-
33
-
34
- class LocalEmbedder(EmbeddingProvider):
35
- """
36
- Local embeddings using sentence-transformers.
37
-
38
- Default model: all-MiniLM-L6-v2 (384 dimensions, fast, good quality)
39
- """
40
-
41
- def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
42
- """
43
- Initialize local embedder.
44
-
45
- Args:
46
- model_name: Sentence-transformers model name
47
- """
48
- self.model_name = model_name
49
- self._model = None
50
- self._dimension: Optional[int] = None
51
-
52
- def _load_model(self):
53
- """Lazy load the model."""
54
- if self._model is None:
55
- try:
56
- from sentence_transformers import SentenceTransformer
57
-
58
- logger.info(f"Loading embedding model: {self.model_name}")
59
- self._model = SentenceTransformer(self.model_name)
60
- self._dimension = self._model.get_sentence_embedding_dimension()
61
- logger.info(f"Model loaded, dimension: {self._dimension}")
62
- except ImportError as err:
63
- raise ImportError(
64
- "sentence-transformers is required for local embeddings. "
65
- "Install with: pip install sentence-transformers"
66
- ) from err
67
-
68
- def encode(self, text: str) -> List[float]:
69
- """Generate embedding for text."""
70
- self._load_model()
71
- embedding = self._model.encode(text, normalize_embeddings=True)
72
- return embedding.tolist()
73
-
74
- def encode_batch(self, texts: List[str]) -> List[List[float]]:
75
- """Generate embeddings for multiple texts."""
76
- self._load_model()
77
- embeddings = self._model.encode(texts, normalize_embeddings=True)
78
- return [emb.tolist() for emb in embeddings]
79
-
80
- @property
81
- def dimension(self) -> int:
82
- """Return embedding dimension."""
83
- if self._dimension is None:
84
- self._load_model()
85
- return self._dimension or 384 # Default for all-MiniLM-L6-v2
86
-
87
-
88
- class AzureEmbedder(EmbeddingProvider):
89
- """
90
- Azure OpenAI embeddings.
91
-
92
- Uses text-embedding-3-small by default (1536 dimensions).
93
- """
94
-
95
- def __init__(
96
- self,
97
- endpoint: Optional[str] = None,
98
- api_key: Optional[str] = None,
99
- deployment: str = "text-embedding-3-small",
100
- api_version: str = "2024-02-01",
101
- ):
102
- """
103
- Initialize Azure OpenAI embedder.
104
-
105
- Args:
106
- endpoint: Azure OpenAI endpoint (or use AZURE_OPENAI_ENDPOINT env var)
107
- api_key: Azure OpenAI API key (or use AZURE_OPENAI_KEY env var)
108
- deployment: Deployment name for embedding model
109
- api_version: API version
110
- """
111
- import os
112
-
113
- self.endpoint = endpoint or os.environ.get("AZURE_OPENAI_ENDPOINT")
114
- self.api_key = api_key or os.environ.get("AZURE_OPENAI_KEY")
115
- self.deployment = deployment
116
- self.api_version = api_version
117
- self._client = None
118
- self._dimension = 1536 # Default for text-embedding-3-small
119
-
120
- if not self.endpoint:
121
- raise ValueError(
122
- "Azure OpenAI endpoint required. Set AZURE_OPENAI_ENDPOINT env var "
123
- "or pass endpoint parameter."
124
- )
125
-
126
- def _get_client(self):
127
- """Get or create Azure OpenAI client."""
128
- if self._client is None:
129
- try:
130
- from openai import AzureOpenAI
131
-
132
- self._client = AzureOpenAI(
133
- azure_endpoint=self.endpoint,
134
- api_key=self.api_key,
135
- api_version=self.api_version,
136
- )
137
- except ImportError as err:
138
- raise ImportError(
139
- "openai is required for Azure embeddings. "
140
- "Install with: pip install openai"
141
- ) from err
142
- return self._client
143
-
144
- def encode(self, text: str) -> List[float]:
145
- """Generate embedding for text."""
146
- client = self._get_client()
147
- response = client.embeddings.create(
148
- input=text,
149
- model=self.deployment,
150
- )
151
- return response.data[0].embedding
152
-
153
- def encode_batch(self, texts: List[str]) -> List[List[float]]:
154
- """Generate embeddings for multiple texts."""
155
- client = self._get_client()
156
- response = client.embeddings.create(
157
- input=texts,
158
- model=self.deployment,
159
- )
160
- # Sort by index to ensure order matches input
161
- sorted_data = sorted(response.data, key=lambda x: x.index)
162
- return [item.embedding for item in sorted_data]
163
-
164
- @property
165
- def dimension(self) -> int:
166
- """Return embedding dimension."""
167
- return self._dimension
168
-
169
-
170
- class MockEmbedder(EmbeddingProvider):
171
- """
172
- Mock embedder for testing.
173
-
174
- Generates deterministic fake embeddings based on text hash.
175
- """
176
-
177
- def __init__(self, dimension: int = 384):
178
- """Initialize mock embedder."""
179
- self._dimension = dimension
180
-
181
- def encode(self, text: str) -> List[float]:
182
- """Generate fake embedding based on text hash."""
183
- import hashlib
184
-
185
- # Create deterministic embedding from text hash
186
- hash_bytes = hashlib.sha256(text.encode()).digest()
187
- # Use first N bytes to create float values
188
- embedding = []
189
- for i in range(self._dimension):
190
- byte_val = hash_bytes[i % len(hash_bytes)]
191
- # Normalize to [-1, 1] range
192
- embedding.append((byte_val / 127.5) - 1.0)
193
- return embedding
194
-
195
- def encode_batch(self, texts: List[str]) -> List[List[float]]:
196
- """Generate fake embeddings for multiple texts."""
197
- return [self.encode(text) for text in texts]
198
-
199
- @property
200
- def dimension(self) -> int:
201
- """Return embedding dimension."""
202
- return self._dimension
1
+ """
2
+ ALMA Embedding Providers.
3
+
4
+ Supports local (sentence-transformers) and Azure OpenAI embeddings.
5
+ """
6
+
7
+ import logging
8
+ from abc import ABC, abstractmethod
9
+ from typing import List, Optional
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class EmbeddingProvider(ABC):
15
+ """Abstract base class for embedding providers."""
16
+
17
+ @abstractmethod
18
+ def encode(self, text: str) -> List[float]:
19
+ """Generate embedding for text."""
20
+ pass
21
+
22
+ @abstractmethod
23
+ def encode_batch(self, texts: List[str]) -> List[List[float]]:
24
+ """Generate embeddings for multiple texts."""
25
+ pass
26
+
27
+ @property
28
+ @abstractmethod
29
+ def dimension(self) -> int:
30
+ """Return embedding dimension."""
31
+ pass
32
+
33
+
34
+ class LocalEmbedder(EmbeddingProvider):
35
+ """
36
+ Local embeddings using sentence-transformers.
37
+
38
+ Default model: all-MiniLM-L6-v2 (384 dimensions, fast, good quality)
39
+ """
40
+
41
+ def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
42
+ """
43
+ Initialize local embedder.
44
+
45
+ Args:
46
+ model_name: Sentence-transformers model name
47
+ """
48
+ self.model_name = model_name
49
+ self._model = None
50
+ self._dimension: Optional[int] = None
51
+
52
+ def _load_model(self):
53
+ """Lazy load the model."""
54
+ if self._model is None:
55
+ try:
56
+ from sentence_transformers import SentenceTransformer
57
+
58
+ logger.info(f"Loading embedding model: {self.model_name}")
59
+ self._model = SentenceTransformer(self.model_name)
60
+ self._dimension = self._model.get_sentence_embedding_dimension()
61
+ logger.info(f"Model loaded, dimension: {self._dimension}")
62
+ except ImportError as err:
63
+ raise ImportError(
64
+ "sentence-transformers is required for local embeddings. "
65
+ "Install with: pip install sentence-transformers"
66
+ ) from err
67
+
68
+ def encode(self, text: str) -> List[float]:
69
+ """Generate embedding for text."""
70
+ self._load_model()
71
+ embedding = self._model.encode(text, normalize_embeddings=True)
72
+ return embedding.tolist()
73
+
74
+ def encode_batch(self, texts: List[str]) -> List[List[float]]:
75
+ """Generate embeddings for multiple texts."""
76
+ self._load_model()
77
+ embeddings = self._model.encode(texts, normalize_embeddings=True)
78
+ return [emb.tolist() for emb in embeddings]
79
+
80
+ @property
81
+ def dimension(self) -> int:
82
+ """Return embedding dimension."""
83
+ if self._dimension is None:
84
+ self._load_model()
85
+ return self._dimension or 384 # Default for all-MiniLM-L6-v2
86
+
87
+
88
+ class AzureEmbedder(EmbeddingProvider):
89
+ """
90
+ Azure OpenAI embeddings.
91
+
92
+ Uses text-embedding-3-small by default (1536 dimensions).
93
+ """
94
+
95
+ def __init__(
96
+ self,
97
+ endpoint: Optional[str] = None,
98
+ api_key: Optional[str] = None,
99
+ deployment: str = "text-embedding-3-small",
100
+ api_version: str = "2024-02-01",
101
+ ):
102
+ """
103
+ Initialize Azure OpenAI embedder.
104
+
105
+ Args:
106
+ endpoint: Azure OpenAI endpoint (or use AZURE_OPENAI_ENDPOINT env var)
107
+ api_key: Azure OpenAI API key (or use AZURE_OPENAI_KEY env var)
108
+ deployment: Deployment name for embedding model
109
+ api_version: API version
110
+ """
111
+ import os
112
+
113
+ self.endpoint = endpoint or os.environ.get("AZURE_OPENAI_ENDPOINT")
114
+ self.api_key = api_key or os.environ.get("AZURE_OPENAI_KEY")
115
+ self.deployment = deployment
116
+ self.api_version = api_version
117
+ self._client = None
118
+ self._dimension = 1536 # Default for text-embedding-3-small
119
+
120
+ if not self.endpoint:
121
+ raise ValueError(
122
+ "Azure OpenAI endpoint required. Set AZURE_OPENAI_ENDPOINT env var "
123
+ "or pass endpoint parameter."
124
+ )
125
+
126
+ def _get_client(self):
127
+ """Get or create Azure OpenAI client."""
128
+ if self._client is None:
129
+ try:
130
+ from openai import AzureOpenAI
131
+
132
+ self._client = AzureOpenAI(
133
+ azure_endpoint=self.endpoint,
134
+ api_key=self.api_key,
135
+ api_version=self.api_version,
136
+ )
137
+ except ImportError as err:
138
+ raise ImportError(
139
+ "openai is required for Azure embeddings. "
140
+ "Install with: pip install openai"
141
+ ) from err
142
+ return self._client
143
+
144
+ def encode(self, text: str) -> List[float]:
145
+ """Generate embedding for text."""
146
+ client = self._get_client()
147
+ response = client.embeddings.create(
148
+ input=text,
149
+ model=self.deployment,
150
+ )
151
+ return response.data[0].embedding
152
+
153
+ def encode_batch(self, texts: List[str]) -> List[List[float]]:
154
+ """Generate embeddings for multiple texts."""
155
+ client = self._get_client()
156
+ response = client.embeddings.create(
157
+ input=texts,
158
+ model=self.deployment,
159
+ )
160
+ # Sort by index to ensure order matches input
161
+ sorted_data = sorted(response.data, key=lambda x: x.index)
162
+ return [item.embedding for item in sorted_data]
163
+
164
+ @property
165
+ def dimension(self) -> int:
166
+ """Return embedding dimension."""
167
+ return self._dimension
168
+
169
+
170
+ class MockEmbedder(EmbeddingProvider):
171
+ """
172
+ Mock embedder for testing.
173
+
174
+ Generates deterministic fake embeddings based on text hash.
175
+ """
176
+
177
+ def __init__(self, dimension: int = 384):
178
+ """Initialize mock embedder."""
179
+ self._dimension = dimension
180
+
181
+ def encode(self, text: str) -> List[float]:
182
+ """Generate fake embedding based on text hash."""
183
+ import hashlib
184
+
185
+ # Create deterministic embedding from text hash
186
+ hash_bytes = hashlib.sha256(text.encode()).digest()
187
+ # Use first N bytes to create float values
188
+ embedding = []
189
+ for i in range(self._dimension):
190
+ byte_val = hash_bytes[i % len(hash_bytes)]
191
+ # Normalize to [-1, 1] range
192
+ embedding.append((byte_val / 127.5) - 1.0)
193
+ return embedding
194
+
195
+ def encode_batch(self, texts: List[str]) -> List[List[float]]:
196
+ """Generate fake embeddings for multiple texts."""
197
+ return [self.encode(text) for text in texts]
198
+
199
+ @property
200
+ def dimension(self) -> int:
201
+ """Return embedding dimension."""
202
+ return self._dimension