alma-memory 0.5.1__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alma/__init__.py +296 -226
- alma/compression/__init__.py +33 -0
- alma/compression/pipeline.py +980 -0
- alma/confidence/__init__.py +47 -47
- alma/confidence/engine.py +540 -540
- alma/confidence/types.py +351 -351
- alma/config/loader.py +157 -157
- alma/consolidation/__init__.py +23 -23
- alma/consolidation/engine.py +678 -678
- alma/consolidation/prompts.py +84 -84
- alma/core.py +1189 -430
- alma/domains/__init__.py +30 -30
- alma/domains/factory.py +359 -359
- alma/domains/schemas.py +448 -448
- alma/domains/types.py +272 -272
- alma/events/__init__.py +75 -75
- alma/events/emitter.py +285 -284
- alma/events/storage_mixin.py +246 -246
- alma/events/types.py +126 -126
- alma/events/webhook.py +425 -425
- alma/exceptions.py +49 -49
- alma/extraction/__init__.py +31 -31
- alma/extraction/auto_learner.py +265 -265
- alma/extraction/extractor.py +420 -420
- alma/graph/__init__.py +106 -106
- alma/graph/backends/__init__.py +32 -32
- alma/graph/backends/kuzu.py +624 -624
- alma/graph/backends/memgraph.py +432 -432
- alma/graph/backends/memory.py +236 -236
- alma/graph/backends/neo4j.py +417 -417
- alma/graph/base.py +159 -159
- alma/graph/extraction.py +198 -198
- alma/graph/store.py +860 -860
- alma/harness/__init__.py +35 -35
- alma/harness/base.py +386 -386
- alma/harness/domains.py +705 -705
- alma/initializer/__init__.py +37 -37
- alma/initializer/initializer.py +418 -418
- alma/initializer/types.py +250 -250
- alma/integration/__init__.py +62 -62
- alma/integration/claude_agents.py +444 -444
- alma/integration/helena.py +423 -423
- alma/integration/victor.py +471 -471
- alma/learning/__init__.py +101 -86
- alma/learning/decay.py +878 -0
- alma/learning/forgetting.py +1446 -1446
- alma/learning/heuristic_extractor.py +390 -390
- alma/learning/protocols.py +374 -374
- alma/learning/validation.py +346 -346
- alma/mcp/__init__.py +123 -45
- alma/mcp/__main__.py +156 -156
- alma/mcp/resources.py +122 -122
- alma/mcp/server.py +955 -591
- alma/mcp/tools.py +3254 -509
- alma/observability/__init__.py +91 -84
- alma/observability/config.py +302 -302
- alma/observability/guidelines.py +170 -0
- alma/observability/logging.py +424 -424
- alma/observability/metrics.py +583 -583
- alma/observability/tracing.py +440 -440
- alma/progress/__init__.py +21 -21
- alma/progress/tracker.py +607 -607
- alma/progress/types.py +250 -250
- alma/retrieval/__init__.py +134 -53
- alma/retrieval/budget.py +525 -0
- alma/retrieval/cache.py +1304 -1061
- alma/retrieval/embeddings.py +202 -202
- alma/retrieval/engine.py +850 -427
- alma/retrieval/modes.py +365 -0
- alma/retrieval/progressive.py +560 -0
- alma/retrieval/scoring.py +344 -344
- alma/retrieval/trust_scoring.py +637 -0
- alma/retrieval/verification.py +797 -0
- alma/session/__init__.py +19 -19
- alma/session/manager.py +442 -399
- alma/session/types.py +288 -288
- alma/storage/__init__.py +101 -90
- alma/storage/archive.py +233 -0
- alma/storage/azure_cosmos.py +1259 -1259
- alma/storage/base.py +1083 -583
- alma/storage/chroma.py +1443 -1443
- alma/storage/constants.py +103 -103
- alma/storage/file_based.py +614 -614
- alma/storage/migrations/__init__.py +21 -21
- alma/storage/migrations/base.py +321 -321
- alma/storage/migrations/runner.py +323 -323
- alma/storage/migrations/version_stores.py +337 -337
- alma/storage/migrations/versions/__init__.py +11 -11
- alma/storage/migrations/versions/v1_0_0.py +373 -373
- alma/storage/migrations/versions/v1_1_0_workflow_context.py +551 -0
- alma/storage/pinecone.py +1080 -1080
- alma/storage/postgresql.py +1948 -1559
- alma/storage/qdrant.py +1306 -1306
- alma/storage/sqlite_local.py +3041 -1457
- alma/testing/__init__.py +46 -46
- alma/testing/factories.py +301 -301
- alma/testing/mocks.py +389 -389
- alma/types.py +292 -264
- alma/utils/__init__.py +19 -0
- alma/utils/tokenizer.py +521 -0
- alma/workflow/__init__.py +83 -0
- alma/workflow/artifacts.py +170 -0
- alma/workflow/checkpoint.py +311 -0
- alma/workflow/context.py +228 -0
- alma/workflow/outcomes.py +189 -0
- alma/workflow/reducers.py +393 -0
- {alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/METADATA +210 -72
- alma_memory-0.7.0.dist-info/RECORD +112 -0
- alma_memory-0.5.1.dist-info/RECORD +0 -93
- {alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/WHEEL +0 -0
- {alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/top_level.txt +0 -0
alma/retrieval/embeddings.py
CHANGED
|
@@ -1,202 +1,202 @@
|
|
|
1
|
-
"""
|
|
2
|
-
ALMA Embedding Providers.
|
|
3
|
-
|
|
4
|
-
Supports local (sentence-transformers) and Azure OpenAI embeddings.
|
|
5
|
-
"""
|
|
6
|
-
|
|
7
|
-
import logging
|
|
8
|
-
from abc import ABC, abstractmethod
|
|
9
|
-
from typing import List, Optional
|
|
10
|
-
|
|
11
|
-
logger = logging.getLogger(__name__)
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class EmbeddingProvider(ABC):
|
|
15
|
-
"""Abstract base class for embedding providers."""
|
|
16
|
-
|
|
17
|
-
@abstractmethod
|
|
18
|
-
def encode(self, text: str) -> List[float]:
|
|
19
|
-
"""Generate embedding for text."""
|
|
20
|
-
pass
|
|
21
|
-
|
|
22
|
-
@abstractmethod
|
|
23
|
-
def encode_batch(self, texts: List[str]) -> List[List[float]]:
|
|
24
|
-
"""Generate embeddings for multiple texts."""
|
|
25
|
-
pass
|
|
26
|
-
|
|
27
|
-
@property
|
|
28
|
-
@abstractmethod
|
|
29
|
-
def dimension(self) -> int:
|
|
30
|
-
"""Return embedding dimension."""
|
|
31
|
-
pass
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
class LocalEmbedder(EmbeddingProvider):
|
|
35
|
-
"""
|
|
36
|
-
Local embeddings using sentence-transformers.
|
|
37
|
-
|
|
38
|
-
Default model: all-MiniLM-L6-v2 (384 dimensions, fast, good quality)
|
|
39
|
-
"""
|
|
40
|
-
|
|
41
|
-
def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
|
|
42
|
-
"""
|
|
43
|
-
Initialize local embedder.
|
|
44
|
-
|
|
45
|
-
Args:
|
|
46
|
-
model_name: Sentence-transformers model name
|
|
47
|
-
"""
|
|
48
|
-
self.model_name = model_name
|
|
49
|
-
self._model = None
|
|
50
|
-
self._dimension: Optional[int] = None
|
|
51
|
-
|
|
52
|
-
def _load_model(self):
|
|
53
|
-
"""Lazy load the model."""
|
|
54
|
-
if self._model is None:
|
|
55
|
-
try:
|
|
56
|
-
from sentence_transformers import SentenceTransformer
|
|
57
|
-
|
|
58
|
-
logger.info(f"Loading embedding model: {self.model_name}")
|
|
59
|
-
self._model = SentenceTransformer(self.model_name)
|
|
60
|
-
self._dimension = self._model.get_sentence_embedding_dimension()
|
|
61
|
-
logger.info(f"Model loaded, dimension: {self._dimension}")
|
|
62
|
-
except ImportError as err:
|
|
63
|
-
raise ImportError(
|
|
64
|
-
"sentence-transformers is required for local embeddings. "
|
|
65
|
-
"Install with: pip install sentence-transformers"
|
|
66
|
-
) from err
|
|
67
|
-
|
|
68
|
-
def encode(self, text: str) -> List[float]:
|
|
69
|
-
"""Generate embedding for text."""
|
|
70
|
-
self._load_model()
|
|
71
|
-
embedding = self._model.encode(text, normalize_embeddings=True)
|
|
72
|
-
return embedding.tolist()
|
|
73
|
-
|
|
74
|
-
def encode_batch(self, texts: List[str]) -> List[List[float]]:
|
|
75
|
-
"""Generate embeddings for multiple texts."""
|
|
76
|
-
self._load_model()
|
|
77
|
-
embeddings = self._model.encode(texts, normalize_embeddings=True)
|
|
78
|
-
return [emb.tolist() for emb in embeddings]
|
|
79
|
-
|
|
80
|
-
@property
|
|
81
|
-
def dimension(self) -> int:
|
|
82
|
-
"""Return embedding dimension."""
|
|
83
|
-
if self._dimension is None:
|
|
84
|
-
self._load_model()
|
|
85
|
-
return self._dimension or 384 # Default for all-MiniLM-L6-v2
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
class AzureEmbedder(EmbeddingProvider):
|
|
89
|
-
"""
|
|
90
|
-
Azure OpenAI embeddings.
|
|
91
|
-
|
|
92
|
-
Uses text-embedding-3-small by default (1536 dimensions).
|
|
93
|
-
"""
|
|
94
|
-
|
|
95
|
-
def __init__(
|
|
96
|
-
self,
|
|
97
|
-
endpoint: Optional[str] = None,
|
|
98
|
-
api_key: Optional[str] = None,
|
|
99
|
-
deployment: str = "text-embedding-3-small",
|
|
100
|
-
api_version: str = "2024-02-01",
|
|
101
|
-
):
|
|
102
|
-
"""
|
|
103
|
-
Initialize Azure OpenAI embedder.
|
|
104
|
-
|
|
105
|
-
Args:
|
|
106
|
-
endpoint: Azure OpenAI endpoint (or use AZURE_OPENAI_ENDPOINT env var)
|
|
107
|
-
api_key: Azure OpenAI API key (or use AZURE_OPENAI_KEY env var)
|
|
108
|
-
deployment: Deployment name for embedding model
|
|
109
|
-
api_version: API version
|
|
110
|
-
"""
|
|
111
|
-
import os
|
|
112
|
-
|
|
113
|
-
self.endpoint = endpoint or os.environ.get("AZURE_OPENAI_ENDPOINT")
|
|
114
|
-
self.api_key = api_key or os.environ.get("AZURE_OPENAI_KEY")
|
|
115
|
-
self.deployment = deployment
|
|
116
|
-
self.api_version = api_version
|
|
117
|
-
self._client = None
|
|
118
|
-
self._dimension = 1536 # Default for text-embedding-3-small
|
|
119
|
-
|
|
120
|
-
if not self.endpoint:
|
|
121
|
-
raise ValueError(
|
|
122
|
-
"Azure OpenAI endpoint required. Set AZURE_OPENAI_ENDPOINT env var "
|
|
123
|
-
"or pass endpoint parameter."
|
|
124
|
-
)
|
|
125
|
-
|
|
126
|
-
def _get_client(self):
|
|
127
|
-
"""Get or create Azure OpenAI client."""
|
|
128
|
-
if self._client is None:
|
|
129
|
-
try:
|
|
130
|
-
from openai import AzureOpenAI
|
|
131
|
-
|
|
132
|
-
self._client = AzureOpenAI(
|
|
133
|
-
azure_endpoint=self.endpoint,
|
|
134
|
-
api_key=self.api_key,
|
|
135
|
-
api_version=self.api_version,
|
|
136
|
-
)
|
|
137
|
-
except ImportError as err:
|
|
138
|
-
raise ImportError(
|
|
139
|
-
"openai is required for Azure embeddings. "
|
|
140
|
-
"Install with: pip install openai"
|
|
141
|
-
) from err
|
|
142
|
-
return self._client
|
|
143
|
-
|
|
144
|
-
def encode(self, text: str) -> List[float]:
|
|
145
|
-
"""Generate embedding for text."""
|
|
146
|
-
client = self._get_client()
|
|
147
|
-
response = client.embeddings.create(
|
|
148
|
-
input=text,
|
|
149
|
-
model=self.deployment,
|
|
150
|
-
)
|
|
151
|
-
return response.data[0].embedding
|
|
152
|
-
|
|
153
|
-
def encode_batch(self, texts: List[str]) -> List[List[float]]:
|
|
154
|
-
"""Generate embeddings for multiple texts."""
|
|
155
|
-
client = self._get_client()
|
|
156
|
-
response = client.embeddings.create(
|
|
157
|
-
input=texts,
|
|
158
|
-
model=self.deployment,
|
|
159
|
-
)
|
|
160
|
-
# Sort by index to ensure order matches input
|
|
161
|
-
sorted_data = sorted(response.data, key=lambda x: x.index)
|
|
162
|
-
return [item.embedding for item in sorted_data]
|
|
163
|
-
|
|
164
|
-
@property
|
|
165
|
-
def dimension(self) -> int:
|
|
166
|
-
"""Return embedding dimension."""
|
|
167
|
-
return self._dimension
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
class MockEmbedder(EmbeddingProvider):
|
|
171
|
-
"""
|
|
172
|
-
Mock embedder for testing.
|
|
173
|
-
|
|
174
|
-
Generates deterministic fake embeddings based on text hash.
|
|
175
|
-
"""
|
|
176
|
-
|
|
177
|
-
def __init__(self, dimension: int = 384):
|
|
178
|
-
"""Initialize mock embedder."""
|
|
179
|
-
self._dimension = dimension
|
|
180
|
-
|
|
181
|
-
def encode(self, text: str) -> List[float]:
|
|
182
|
-
"""Generate fake embedding based on text hash."""
|
|
183
|
-
import hashlib
|
|
184
|
-
|
|
185
|
-
# Create deterministic embedding from text hash
|
|
186
|
-
hash_bytes = hashlib.sha256(text.encode()).digest()
|
|
187
|
-
# Use first N bytes to create float values
|
|
188
|
-
embedding = []
|
|
189
|
-
for i in range(self._dimension):
|
|
190
|
-
byte_val = hash_bytes[i % len(hash_bytes)]
|
|
191
|
-
# Normalize to [-1, 1] range
|
|
192
|
-
embedding.append((byte_val / 127.5) - 1.0)
|
|
193
|
-
return embedding
|
|
194
|
-
|
|
195
|
-
def encode_batch(self, texts: List[str]) -> List[List[float]]:
|
|
196
|
-
"""Generate fake embeddings for multiple texts."""
|
|
197
|
-
return [self.encode(text) for text in texts]
|
|
198
|
-
|
|
199
|
-
@property
|
|
200
|
-
def dimension(self) -> int:
|
|
201
|
-
"""Return embedding dimension."""
|
|
202
|
-
return self._dimension
|
|
1
|
+
"""
|
|
2
|
+
ALMA Embedding Providers.
|
|
3
|
+
|
|
4
|
+
Supports local (sentence-transformers) and Azure OpenAI embeddings.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import logging
|
|
8
|
+
from abc import ABC, abstractmethod
|
|
9
|
+
from typing import List, Optional
|
|
10
|
+
|
|
11
|
+
logger = logging.getLogger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class EmbeddingProvider(ABC):
|
|
15
|
+
"""Abstract base class for embedding providers."""
|
|
16
|
+
|
|
17
|
+
@abstractmethod
|
|
18
|
+
def encode(self, text: str) -> List[float]:
|
|
19
|
+
"""Generate embedding for text."""
|
|
20
|
+
pass
|
|
21
|
+
|
|
22
|
+
@abstractmethod
|
|
23
|
+
def encode_batch(self, texts: List[str]) -> List[List[float]]:
|
|
24
|
+
"""Generate embeddings for multiple texts."""
|
|
25
|
+
pass
|
|
26
|
+
|
|
27
|
+
@property
|
|
28
|
+
@abstractmethod
|
|
29
|
+
def dimension(self) -> int:
|
|
30
|
+
"""Return embedding dimension."""
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class LocalEmbedder(EmbeddingProvider):
|
|
35
|
+
"""
|
|
36
|
+
Local embeddings using sentence-transformers.
|
|
37
|
+
|
|
38
|
+
Default model: all-MiniLM-L6-v2 (384 dimensions, fast, good quality)
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
|
|
42
|
+
"""
|
|
43
|
+
Initialize local embedder.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
model_name: Sentence-transformers model name
|
|
47
|
+
"""
|
|
48
|
+
self.model_name = model_name
|
|
49
|
+
self._model = None
|
|
50
|
+
self._dimension: Optional[int] = None
|
|
51
|
+
|
|
52
|
+
def _load_model(self):
|
|
53
|
+
"""Lazy load the model."""
|
|
54
|
+
if self._model is None:
|
|
55
|
+
try:
|
|
56
|
+
from sentence_transformers import SentenceTransformer
|
|
57
|
+
|
|
58
|
+
logger.info(f"Loading embedding model: {self.model_name}")
|
|
59
|
+
self._model = SentenceTransformer(self.model_name)
|
|
60
|
+
self._dimension = self._model.get_sentence_embedding_dimension()
|
|
61
|
+
logger.info(f"Model loaded, dimension: {self._dimension}")
|
|
62
|
+
except ImportError as err:
|
|
63
|
+
raise ImportError(
|
|
64
|
+
"sentence-transformers is required for local embeddings. "
|
|
65
|
+
"Install with: pip install sentence-transformers"
|
|
66
|
+
) from err
|
|
67
|
+
|
|
68
|
+
def encode(self, text: str) -> List[float]:
|
|
69
|
+
"""Generate embedding for text."""
|
|
70
|
+
self._load_model()
|
|
71
|
+
embedding = self._model.encode(text, normalize_embeddings=True)
|
|
72
|
+
return embedding.tolist()
|
|
73
|
+
|
|
74
|
+
def encode_batch(self, texts: List[str]) -> List[List[float]]:
|
|
75
|
+
"""Generate embeddings for multiple texts."""
|
|
76
|
+
self._load_model()
|
|
77
|
+
embeddings = self._model.encode(texts, normalize_embeddings=True)
|
|
78
|
+
return [emb.tolist() for emb in embeddings]
|
|
79
|
+
|
|
80
|
+
@property
|
|
81
|
+
def dimension(self) -> int:
|
|
82
|
+
"""Return embedding dimension."""
|
|
83
|
+
if self._dimension is None:
|
|
84
|
+
self._load_model()
|
|
85
|
+
return self._dimension or 384 # Default for all-MiniLM-L6-v2
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class AzureEmbedder(EmbeddingProvider):
|
|
89
|
+
"""
|
|
90
|
+
Azure OpenAI embeddings.
|
|
91
|
+
|
|
92
|
+
Uses text-embedding-3-small by default (1536 dimensions).
|
|
93
|
+
"""
|
|
94
|
+
|
|
95
|
+
def __init__(
|
|
96
|
+
self,
|
|
97
|
+
endpoint: Optional[str] = None,
|
|
98
|
+
api_key: Optional[str] = None,
|
|
99
|
+
deployment: str = "text-embedding-3-small",
|
|
100
|
+
api_version: str = "2024-02-01",
|
|
101
|
+
):
|
|
102
|
+
"""
|
|
103
|
+
Initialize Azure OpenAI embedder.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
endpoint: Azure OpenAI endpoint (or use AZURE_OPENAI_ENDPOINT env var)
|
|
107
|
+
api_key: Azure OpenAI API key (or use AZURE_OPENAI_KEY env var)
|
|
108
|
+
deployment: Deployment name for embedding model
|
|
109
|
+
api_version: API version
|
|
110
|
+
"""
|
|
111
|
+
import os
|
|
112
|
+
|
|
113
|
+
self.endpoint = endpoint or os.environ.get("AZURE_OPENAI_ENDPOINT")
|
|
114
|
+
self.api_key = api_key or os.environ.get("AZURE_OPENAI_KEY")
|
|
115
|
+
self.deployment = deployment
|
|
116
|
+
self.api_version = api_version
|
|
117
|
+
self._client = None
|
|
118
|
+
self._dimension = 1536 # Default for text-embedding-3-small
|
|
119
|
+
|
|
120
|
+
if not self.endpoint:
|
|
121
|
+
raise ValueError(
|
|
122
|
+
"Azure OpenAI endpoint required. Set AZURE_OPENAI_ENDPOINT env var "
|
|
123
|
+
"or pass endpoint parameter."
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
def _get_client(self):
|
|
127
|
+
"""Get or create Azure OpenAI client."""
|
|
128
|
+
if self._client is None:
|
|
129
|
+
try:
|
|
130
|
+
from openai import AzureOpenAI
|
|
131
|
+
|
|
132
|
+
self._client = AzureOpenAI(
|
|
133
|
+
azure_endpoint=self.endpoint,
|
|
134
|
+
api_key=self.api_key,
|
|
135
|
+
api_version=self.api_version,
|
|
136
|
+
)
|
|
137
|
+
except ImportError as err:
|
|
138
|
+
raise ImportError(
|
|
139
|
+
"openai is required for Azure embeddings. "
|
|
140
|
+
"Install with: pip install openai"
|
|
141
|
+
) from err
|
|
142
|
+
return self._client
|
|
143
|
+
|
|
144
|
+
def encode(self, text: str) -> List[float]:
|
|
145
|
+
"""Generate embedding for text."""
|
|
146
|
+
client = self._get_client()
|
|
147
|
+
response = client.embeddings.create(
|
|
148
|
+
input=text,
|
|
149
|
+
model=self.deployment,
|
|
150
|
+
)
|
|
151
|
+
return response.data[0].embedding
|
|
152
|
+
|
|
153
|
+
def encode_batch(self, texts: List[str]) -> List[List[float]]:
|
|
154
|
+
"""Generate embeddings for multiple texts."""
|
|
155
|
+
client = self._get_client()
|
|
156
|
+
response = client.embeddings.create(
|
|
157
|
+
input=texts,
|
|
158
|
+
model=self.deployment,
|
|
159
|
+
)
|
|
160
|
+
# Sort by index to ensure order matches input
|
|
161
|
+
sorted_data = sorted(response.data, key=lambda x: x.index)
|
|
162
|
+
return [item.embedding for item in sorted_data]
|
|
163
|
+
|
|
164
|
+
@property
|
|
165
|
+
def dimension(self) -> int:
|
|
166
|
+
"""Return embedding dimension."""
|
|
167
|
+
return self._dimension
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
class MockEmbedder(EmbeddingProvider):
|
|
171
|
+
"""
|
|
172
|
+
Mock embedder for testing.
|
|
173
|
+
|
|
174
|
+
Generates deterministic fake embeddings based on text hash.
|
|
175
|
+
"""
|
|
176
|
+
|
|
177
|
+
def __init__(self, dimension: int = 384):
|
|
178
|
+
"""Initialize mock embedder."""
|
|
179
|
+
self._dimension = dimension
|
|
180
|
+
|
|
181
|
+
def encode(self, text: str) -> List[float]:
|
|
182
|
+
"""Generate fake embedding based on text hash."""
|
|
183
|
+
import hashlib
|
|
184
|
+
|
|
185
|
+
# Create deterministic embedding from text hash
|
|
186
|
+
hash_bytes = hashlib.sha256(text.encode()).digest()
|
|
187
|
+
# Use first N bytes to create float values
|
|
188
|
+
embedding = []
|
|
189
|
+
for i in range(self._dimension):
|
|
190
|
+
byte_val = hash_bytes[i % len(hash_bytes)]
|
|
191
|
+
# Normalize to [-1, 1] range
|
|
192
|
+
embedding.append((byte_val / 127.5) - 1.0)
|
|
193
|
+
return embedding
|
|
194
|
+
|
|
195
|
+
def encode_batch(self, texts: List[str]) -> List[List[float]]:
|
|
196
|
+
"""Generate fake embeddings for multiple texts."""
|
|
197
|
+
return [self.encode(text) for text in texts]
|
|
198
|
+
|
|
199
|
+
@property
|
|
200
|
+
def dimension(self) -> int:
|
|
201
|
+
"""Return embedding dimension."""
|
|
202
|
+
return self._dimension
|