kite-agent 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kite/__init__.py +46 -0
- kite/ab_testing.py +384 -0
- kite/agent.py +556 -0
- kite/agents/__init__.py +3 -0
- kite/agents/plan_execute.py +191 -0
- kite/agents/react_agent.py +509 -0
- kite/agents/reflective_agent.py +90 -0
- kite/agents/rewoo.py +119 -0
- kite/agents/tot.py +151 -0
- kite/conversation.py +125 -0
- kite/core.py +974 -0
- kite/data_loaders.py +111 -0
- kite/embedding_providers.py +372 -0
- kite/llm_providers.py +1278 -0
- kite/memory/__init__.py +6 -0
- kite/memory/advanced_rag.py +333 -0
- kite/memory/graph_rag.py +719 -0
- kite/memory/session_memory.py +423 -0
- kite/memory/vector_memory.py +579 -0
- kite/monitoring.py +611 -0
- kite/observers.py +107 -0
- kite/optimization/__init__.py +9 -0
- kite/optimization/resource_router.py +80 -0
- kite/persistence.py +42 -0
- kite/pipeline/__init__.py +5 -0
- kite/pipeline/deterministic_pipeline.py +323 -0
- kite/pipeline/reactive_pipeline.py +171 -0
- kite/pipeline_manager.py +15 -0
- kite/routing/__init__.py +6 -0
- kite/routing/aggregator_router.py +325 -0
- kite/routing/llm_router.py +149 -0
- kite/routing/semantic_router.py +228 -0
- kite/safety/__init__.py +6 -0
- kite/safety/circuit_breaker.py +360 -0
- kite/safety/guardrails.py +82 -0
- kite/safety/idempotency_manager.py +304 -0
- kite/safety/kill_switch.py +75 -0
- kite/tool.py +183 -0
- kite/tool_registry.py +87 -0
- kite/tools/__init__.py +21 -0
- kite/tools/code_execution.py +53 -0
- kite/tools/contrib/__init__.py +19 -0
- kite/tools/contrib/calculator.py +26 -0
- kite/tools/contrib/datetime_utils.py +20 -0
- kite/tools/contrib/linkedin.py +428 -0
- kite/tools/contrib/web_search.py +30 -0
- kite/tools/mcp/__init__.py +31 -0
- kite/tools/mcp/database_mcp.py +267 -0
- kite/tools/mcp/gdrive_mcp_server.py +503 -0
- kite/tools/mcp/gmail_mcp_server.py +601 -0
- kite/tools/mcp/postgres_mcp_server.py +490 -0
- kite/tools/mcp/slack_mcp_server.py +538 -0
- kite/tools/mcp/stripe_mcp_server.py +219 -0
- kite/tools/search.py +90 -0
- kite/tools/system_tools.py +54 -0
- kite/tools_manager.py +27 -0
- kite_agent-0.1.0.dist-info/METADATA +621 -0
- kite_agent-0.1.0.dist-info/RECORD +61 -0
- kite_agent-0.1.0.dist-info/WHEEL +5 -0
- kite_agent-0.1.0.dist-info/licenses/LICENSE +21 -0
- kite_agent-0.1.0.dist-info/top_level.txt +1 -0
kite/data_loaders.py
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Document Loaders Module
|
|
3
|
+
Provides loaders for various document types: PDF, DOCX, CSV, HTML.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
from typing import List, Dict, Optional
|
|
8
|
+
import logging
|
|
9
|
+
|
|
10
|
+
class DocumentLoader:
|
|
11
|
+
"""Base class for document loaders."""
|
|
12
|
+
|
|
13
|
+
@staticmethod
|
|
14
|
+
def load_pdf(file_path: str) -> str:
|
|
15
|
+
"""Load text from PDF."""
|
|
16
|
+
try:
|
|
17
|
+
import PyPDF2
|
|
18
|
+
text = ""
|
|
19
|
+
with open(file_path, 'rb') as f:
|
|
20
|
+
reader = PyPDF2.PdfReader(f)
|
|
21
|
+
for page in reader.pages:
|
|
22
|
+
text += page.extract_text() + "\n"
|
|
23
|
+
return text
|
|
24
|
+
except ImportError:
|
|
25
|
+
return "Error: PyPDF2 not installed. Run 'pip install PyPDF2'"
|
|
26
|
+
except Exception as e:
|
|
27
|
+
return f"Error loading PDF: {str(e)}"
|
|
28
|
+
|
|
29
|
+
@staticmethod
|
|
30
|
+
def load_docx(file_path: str) -> str:
|
|
31
|
+
"""Load text from DOCX."""
|
|
32
|
+
try:
|
|
33
|
+
import docx
|
|
34
|
+
doc = docx.Document(file_path)
|
|
35
|
+
return "\n".join([para.text for para in doc.paragraphs])
|
|
36
|
+
except ImportError:
|
|
37
|
+
return "Error: python-docx not installed. Run 'pip install python-docx'"
|
|
38
|
+
except Exception as e:
|
|
39
|
+
return f"Error loading DOCX: {str(e)}"
|
|
40
|
+
|
|
41
|
+
@staticmethod
|
|
42
|
+
def load_csv(file_path: str) -> str:
|
|
43
|
+
"""Load text from CSV (summary or specific columns)."""
|
|
44
|
+
try:
|
|
45
|
+
import pandas as pd
|
|
46
|
+
df = pd.read_csv(file_path)
|
|
47
|
+
return df.to_string()
|
|
48
|
+
except ImportError:
|
|
49
|
+
import csv
|
|
50
|
+
text = ""
|
|
51
|
+
with open(file_path, mode='r', encoding='utf-8') as f:
|
|
52
|
+
reader = csv.reader(f)
|
|
53
|
+
for row in reader:
|
|
54
|
+
text += ", ".join(row) + "\n"
|
|
55
|
+
return text
|
|
56
|
+
except Exception as e:
|
|
57
|
+
return f"Error loading CSV: {str(e)}"
|
|
58
|
+
|
|
59
|
+
@staticmethod
|
|
60
|
+
def load_html(file_path: str) -> str:
|
|
61
|
+
"""Load text from HTML."""
|
|
62
|
+
try:
|
|
63
|
+
from bs4 import BeautifulSoup
|
|
64
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
65
|
+
soup = BeautifulSoup(f, 'html.parser')
|
|
66
|
+
# Remove script and style elements
|
|
67
|
+
for script in soup(["script", "style"]):
|
|
68
|
+
script.decompose()
|
|
69
|
+
return soup.get_text(separator=' ')
|
|
70
|
+
except ImportError:
|
|
71
|
+
return "Error: beautifulsoup4 not installed. Run 'pip install beautifulsoup4'"
|
|
72
|
+
except Exception as e:
|
|
73
|
+
return f"Error loading HTML: {str(e)}"
|
|
74
|
+
|
|
75
|
+
@classmethod
|
|
76
|
+
def load_any(cls, file_path: str) -> str:
|
|
77
|
+
"""Auto-detect and load any supported file type."""
|
|
78
|
+
ext = os.path.splitext(file_path)[1].lower()
|
|
79
|
+
if ext == '.pdf':
|
|
80
|
+
return cls.load_pdf(file_path)
|
|
81
|
+
elif ext == '.docx':
|
|
82
|
+
return cls.load_docx(file_path)
|
|
83
|
+
elif ext == '.csv':
|
|
84
|
+
return cls.load_csv(file_path)
|
|
85
|
+
elif ext == '.json':
|
|
86
|
+
# Add JSON support
|
|
87
|
+
try:
|
|
88
|
+
import json
|
|
89
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
90
|
+
data = json.load(f)
|
|
91
|
+
return json.dumps(data, indent=2)
|
|
92
|
+
except Exception as e:
|
|
93
|
+
return f"Error loading JSON: {str(e)}"
|
|
94
|
+
elif ext in ['.html', '.htm']:
|
|
95
|
+
return cls.load_html(file_path)
|
|
96
|
+
elif ext in ['.txt', '.md']:
|
|
97
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
98
|
+
return f.read()
|
|
99
|
+
else:
|
|
100
|
+
# Return None for unsupported formats instead of error message
|
|
101
|
+
return None
|
|
102
|
+
|
|
103
|
+
@classmethod
|
|
104
|
+
def load_directory(cls, directory_path: str) -> Dict[str, str]:
|
|
105
|
+
"""Load all supported files from a directory."""
|
|
106
|
+
results = {}
|
|
107
|
+
for filename in os.listdir(directory_path):
|
|
108
|
+
file_path = os.path.join(directory_path, filename)
|
|
109
|
+
if os.path.isfile(file_path):
|
|
110
|
+
results[filename] = cls.load_any(file_path)
|
|
111
|
+
return results
|
|
@@ -0,0 +1,372 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Embedding Provider Abstraction Layer
|
|
3
|
+
PRIORITY: Opensource, local-first solutions.
|
|
4
|
+
|
|
5
|
+
Providers:
|
|
6
|
+
- sentence-transformers (Local, Free) - PRIORITY
|
|
7
|
+
- FastEmbed (Local, Free, Fast)
|
|
8
|
+
- Ollama (Local, Free)
|
|
9
|
+
- OpenAI (Commercial)
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from typing import List, Optional
|
|
13
|
+
from abc import ABC, abstractmethod
|
|
14
|
+
import logging
|
|
15
|
+
import os
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class BaseEmbeddingProvider(ABC):
|
|
19
|
+
"""Base class for embedding providers."""
|
|
20
|
+
|
|
21
|
+
@abstractmethod
|
|
22
|
+
def embed(self, text: str) -> List[float]:
|
|
23
|
+
"""Generate embedding for single text."""
|
|
24
|
+
pass
|
|
25
|
+
|
|
26
|
+
@abstractmethod
|
|
27
|
+
def embed_batch(self, texts: List[str]) -> List[List[float]]:
|
|
28
|
+
"""Generate embeddings for multiple texts."""
|
|
29
|
+
pass
|
|
30
|
+
|
|
31
|
+
@property
|
|
32
|
+
@abstractmethod
|
|
33
|
+
def dimension(self) -> int:
|
|
34
|
+
"""Embedding dimension."""
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
@property
|
|
38
|
+
@abstractmethod
|
|
39
|
+
def name(self) -> str:
|
|
40
|
+
"""Provider name."""
|
|
41
|
+
pass
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# ============================================================================
|
|
45
|
+
# LOCAL / OPENSOURCE (PRIORITY)
|
|
46
|
+
# ============================================================================
|
|
47
|
+
|
|
48
|
+
class SentenceTransformersProvider(BaseEmbeddingProvider):
|
|
49
|
+
"""
|
|
50
|
+
sentence-transformers - Local, FREE, OPENSOURCE
|
|
51
|
+
|
|
52
|
+
Best models:
|
|
53
|
+
- all-MiniLM-L6-v2: Fast, 384d (Default)
|
|
54
|
+
- all-mpnet-base-v2: Good quality, 768d
|
|
55
|
+
- multi-qa-MiniLM-L6-cos-v1: Q&A optimized
|
|
56
|
+
|
|
57
|
+
Installation: pip install sentence-transformers
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
|
|
61
|
+
self.model_name = model_name
|
|
62
|
+
self.logger = logging.getLogger("SentenceTransformers")
|
|
63
|
+
|
|
64
|
+
try:
|
|
65
|
+
from sentence_transformers import SentenceTransformer
|
|
66
|
+
self.model = SentenceTransformer(model_name)
|
|
67
|
+
self._dimension = self.model.get_sentence_embedding_dimension()
|
|
68
|
+
self.logger.info(f"[OK] Loaded {model_name} ({self._dimension}d)")
|
|
69
|
+
except ImportError:
|
|
70
|
+
raise ImportError("pip install sentence-transformers")
|
|
71
|
+
|
|
72
|
+
def embed(self, text: str) -> List[float]:
|
|
73
|
+
"""Generate embedding."""
|
|
74
|
+
return self.model.encode(text, convert_to_numpy=True).tolist()
|
|
75
|
+
|
|
76
|
+
def embed_batch(self, texts: List[str]) -> List[List[float]]:
|
|
77
|
+
"""Batch embeddings."""
|
|
78
|
+
return self.model.encode(
|
|
79
|
+
texts,
|
|
80
|
+
convert_to_numpy=True,
|
|
81
|
+
show_progress_bar=len(texts) > 100
|
|
82
|
+
).tolist()
|
|
83
|
+
|
|
84
|
+
@property
|
|
85
|
+
def dimension(self) -> int:
|
|
86
|
+
return self._dimension
|
|
87
|
+
|
|
88
|
+
@property
|
|
89
|
+
def name(self) -> str:
|
|
90
|
+
return f"SentenceTransformers/{self.model_name}"
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class FastEmbedProvider(BaseEmbeddingProvider):
|
|
94
|
+
"""
|
|
95
|
+
FastEmbed - Faster than sentence-transformers
|
|
96
|
+
|
|
97
|
+
Installation: pip install fastembed
|
|
98
|
+
Speed: 2-3x faster, same quality
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
def __init__(self, model_name: str = "BAAI/bge-small-en-v1.5"):
|
|
102
|
+
self.model_name = model_name
|
|
103
|
+
self.logger = logging.getLogger("FastEmbed")
|
|
104
|
+
|
|
105
|
+
try:
|
|
106
|
+
from fastembed import TextEmbedding
|
|
107
|
+
self.model = TextEmbedding(model_name)
|
|
108
|
+
self._dimension = 384 # Most models are 384d
|
|
109
|
+
self.logger.info(f"[OK] FastEmbed loaded: {model_name}")
|
|
110
|
+
except ImportError:
|
|
111
|
+
raise ImportError("pip install fastembed")
|
|
112
|
+
|
|
113
|
+
def embed(self, text: str) -> List[float]:
|
|
114
|
+
"""Generate embedding."""
|
|
115
|
+
return list(self.model.embed([text]))[0].tolist()
|
|
116
|
+
|
|
117
|
+
def embed_batch(self, texts: List[str]) -> List[List[float]]:
|
|
118
|
+
"""Batch embeddings."""
|
|
119
|
+
return [emb.tolist() for emb in self.model.embed(texts)]
|
|
120
|
+
|
|
121
|
+
@property
|
|
122
|
+
def dimension(self) -> int:
|
|
123
|
+
return self._dimension
|
|
124
|
+
|
|
125
|
+
@property
|
|
126
|
+
def name(self) -> str:
|
|
127
|
+
return f"FastEmbed/{self.model_name}"
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class OllamaEmbeddingProvider(BaseEmbeddingProvider):
|
|
131
|
+
"""
|
|
132
|
+
Ollama Embeddings - Local, FREE
|
|
133
|
+
|
|
134
|
+
Models: nomic-embed-text, mxbai-embed-large
|
|
135
|
+
Same API as Ollama LLMs
|
|
136
|
+
"""
|
|
137
|
+
|
|
138
|
+
def __init__(self,
|
|
139
|
+
model: str = "nomic-embed-text",
|
|
140
|
+
base_url: str = "http://localhost:11434"):
|
|
141
|
+
self.model = model
|
|
142
|
+
self.base_url = base_url
|
|
143
|
+
self.logger = logging.getLogger("OllamaEmbed")
|
|
144
|
+
|
|
145
|
+
try:
|
|
146
|
+
import requests
|
|
147
|
+
self.requests = requests
|
|
148
|
+
self._dimension = self._get_dimension()
|
|
149
|
+
self.logger.info(f"[OK] Ollama embed: {model}")
|
|
150
|
+
except ImportError:
|
|
151
|
+
raise ImportError("pip install requests")
|
|
152
|
+
|
|
153
|
+
def _get_dimension(self) -> int:
|
|
154
|
+
"""Get embedding dimension."""
|
|
155
|
+
# Test with empty string
|
|
156
|
+
test_embed = self.embed("")
|
|
157
|
+
return len(test_embed)
|
|
158
|
+
|
|
159
|
+
def embed(self, text: str) -> List[float]:
|
|
160
|
+
"""Generate embedding."""
|
|
161
|
+
response = self.requests.post(
|
|
162
|
+
f"{self.base_url}/api/embeddings",
|
|
163
|
+
json={
|
|
164
|
+
"model": self.model,
|
|
165
|
+
"prompt": text
|
|
166
|
+
}
|
|
167
|
+
)
|
|
168
|
+
return response.json()["embedding"]
|
|
169
|
+
|
|
170
|
+
def embed_batch(self, texts: List[str]) -> List[List[float]]:
|
|
171
|
+
"""Batch embeddings."""
|
|
172
|
+
return [self.embed(text) for text in texts]
|
|
173
|
+
|
|
174
|
+
@property
|
|
175
|
+
def dimension(self) -> int:
|
|
176
|
+
return self._dimension
|
|
177
|
+
|
|
178
|
+
@property
|
|
179
|
+
def name(self) -> str:
|
|
180
|
+
return f"Ollama/{self.model}"
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
# ============================================================================
|
|
184
|
+
# COMMERCIAL (Fallback)
|
|
185
|
+
# ============================================================================
|
|
186
|
+
|
|
187
|
+
class OpenAIEmbeddingProvider(BaseEmbeddingProvider):
|
|
188
|
+
"""
|
|
189
|
+
OpenAI Embeddings (PAID)
|
|
190
|
+
|
|
191
|
+
Models: text-embedding-3-small (1536d), text-embedding-3-large (3072d)
|
|
192
|
+
"""
|
|
193
|
+
|
|
194
|
+
def __init__(self,
|
|
195
|
+
model: str = "text-embedding-3-small",
|
|
196
|
+
api_key: Optional[str] = None):
|
|
197
|
+
self.model = model
|
|
198
|
+
self.api_key = api_key or os.getenv("OPENAI_API_KEY")
|
|
199
|
+
self.logger = logging.getLogger("OpenAIEmbed")
|
|
200
|
+
|
|
201
|
+
if not self.api_key:
|
|
202
|
+
raise ValueError("OPENAI_API_KEY required")
|
|
203
|
+
|
|
204
|
+
try:
|
|
205
|
+
import openai
|
|
206
|
+
self.client = openai.OpenAI(api_key=self.api_key)
|
|
207
|
+
self._dimension = 1536 if "small" in model else 3072
|
|
208
|
+
self.logger.info(f"[OK] OpenAI embed: {model}")
|
|
209
|
+
except ImportError:
|
|
210
|
+
raise ImportError("pip install openai")
|
|
211
|
+
|
|
212
|
+
def embed(self, text: str) -> List[float]:
|
|
213
|
+
"""Generate embedding."""
|
|
214
|
+
response = self.client.embeddings.create(
|
|
215
|
+
model=self.model,
|
|
216
|
+
input=text
|
|
217
|
+
)
|
|
218
|
+
return response.data[0].embedding
|
|
219
|
+
|
|
220
|
+
def embed_batch(self, texts: List[str]) -> List[List[float]]:
|
|
221
|
+
"""Batch embeddings."""
|
|
222
|
+
response = self.client.embeddings.create(
|
|
223
|
+
model=self.model,
|
|
224
|
+
input=texts
|
|
225
|
+
)
|
|
226
|
+
return [d.embedding for d in response.data]
|
|
227
|
+
|
|
228
|
+
@property
|
|
229
|
+
def dimension(self) -> int:
|
|
230
|
+
return self._dimension
|
|
231
|
+
|
|
232
|
+
@property
|
|
233
|
+
def name(self) -> str:
|
|
234
|
+
return f"OpenAI/{self.model}"
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
# ============================================================================
|
|
238
|
+
# FACTORY
|
|
239
|
+
# ============================================================================
|
|
240
|
+
|
|
241
|
+
class EmbeddingFactory:
|
|
242
|
+
"""
|
|
243
|
+
Factory for creating embedding providers.
|
|
244
|
+
|
|
245
|
+
Priority:
|
|
246
|
+
1. sentence-transformers (Local, Free)
|
|
247
|
+
2. FastEmbed (Local, Faster)
|
|
248
|
+
3. Ollama (Local, Free)
|
|
249
|
+
4. OpenAI (Commercial)
|
|
250
|
+
"""
|
|
251
|
+
|
|
252
|
+
PROVIDERS = {
|
|
253
|
+
'sentence-transformers': SentenceTransformersProvider,
|
|
254
|
+
'fastembed': FastEmbedProvider,
|
|
255
|
+
'ollama': OllamaEmbeddingProvider,
|
|
256
|
+
'openai': OpenAIEmbeddingProvider,
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
@classmethod
|
|
260
|
+
def create(cls,
|
|
261
|
+
provider: str = "sentence-transformers",
|
|
262
|
+
model: Optional[str] = None,
|
|
263
|
+
**kwargs) -> BaseEmbeddingProvider:
|
|
264
|
+
"""Create embedding provider."""
|
|
265
|
+
if provider not in cls.PROVIDERS:
|
|
266
|
+
raise ValueError(f"Unknown provider: {provider}")
|
|
267
|
+
|
|
268
|
+
provider_class = cls.PROVIDERS[provider]
|
|
269
|
+
|
|
270
|
+
if model:
|
|
271
|
+
if provider == "sentence-transformers":
|
|
272
|
+
return provider_class(model_name=model, **kwargs)
|
|
273
|
+
else:
|
|
274
|
+
return provider_class(model=model, **kwargs)
|
|
275
|
+
else:
|
|
276
|
+
return provider_class(**kwargs)
|
|
277
|
+
|
|
278
|
+
@classmethod
|
|
279
|
+
def auto_detect(cls) -> BaseEmbeddingProvider:
|
|
280
|
+
"""
|
|
281
|
+
Auto-detect best embedding provider.
|
|
282
|
+
|
|
283
|
+
Priority:
|
|
284
|
+
1. sentence-transformers (best for most use cases)
|
|
285
|
+
2. FastEmbed (if speed is critical)
|
|
286
|
+
3. Ollama (if already running)
|
|
287
|
+
4. OpenAI (fallback)
|
|
288
|
+
"""
|
|
289
|
+
logger = logging.getLogger("EmbeddingFactory")
|
|
290
|
+
|
|
291
|
+
# Try sentence-transformers first
|
|
292
|
+
try:
|
|
293
|
+
provider = cls.create("sentence-transformers")
|
|
294
|
+
logger.info("[OK] Using sentence-transformers (local, free)")
|
|
295
|
+
return provider
|
|
296
|
+
except:
|
|
297
|
+
pass
|
|
298
|
+
|
|
299
|
+
# Try FastEmbed
|
|
300
|
+
try:
|
|
301
|
+
provider = cls.create("fastembed")
|
|
302
|
+
logger.info("[OK] Using FastEmbed (local, free, fast)")
|
|
303
|
+
return provider
|
|
304
|
+
except:
|
|
305
|
+
pass
|
|
306
|
+
|
|
307
|
+
# Try Ollama
|
|
308
|
+
try:
|
|
309
|
+
provider = cls.create("ollama")
|
|
310
|
+
logger.info("[OK] Using Ollama embeddings (local, free)")
|
|
311
|
+
return provider
|
|
312
|
+
except:
|
|
313
|
+
pass
|
|
314
|
+
|
|
315
|
+
# Fallback to OpenAI
|
|
316
|
+
if os.getenv("OPENAI_API_KEY"):
|
|
317
|
+
try:
|
|
318
|
+
provider = cls.create("openai")
|
|
319
|
+
logger.warning(" Using OpenAI embeddings (paid)")
|
|
320
|
+
return provider
|
|
321
|
+
except:
|
|
322
|
+
pass
|
|
323
|
+
|
|
324
|
+
raise RuntimeError(
|
|
325
|
+
"No embedding provider available. "
|
|
326
|
+
"Install: pip install sentence-transformers"
|
|
327
|
+
)
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
# ============================================================================
|
|
331
|
+
# RECOMMENDED MODELS
|
|
332
|
+
# ============================================================================
|
|
333
|
+
|
|
334
|
+
RECOMMENDED_MODELS = {
|
|
335
|
+
"sentence-transformers": {
|
|
336
|
+
"fast": "all-MiniLM-L6-v2", # 384d, fastest
|
|
337
|
+
"balanced": "all-mpnet-base-v2", # 768d, good quality
|
|
338
|
+
"multilingual": "paraphrase-multilingual-MiniLM-L12-v2",
|
|
339
|
+
"qa": "multi-qa-MiniLM-L6-cos-v1", # Q&A optimized
|
|
340
|
+
},
|
|
341
|
+
"fastembed": {
|
|
342
|
+
"fast": "BAAI/bge-small-en-v1.5",
|
|
343
|
+
"balanced": "BAAI/bge-base-en-v1.5",
|
|
344
|
+
},
|
|
345
|
+
"ollama": {
|
|
346
|
+
"default": "nomic-embed-text",
|
|
347
|
+
"large": "mxbai-embed-large",
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
if __name__ == "__main__":
|
|
353
|
+
print("Embedding Provider Examples:\n")
|
|
354
|
+
|
|
355
|
+
# Auto-detect
|
|
356
|
+
print("1. Auto-detect:")
|
|
357
|
+
embed = EmbeddingFactory.auto_detect()
|
|
358
|
+
print(f" Using: {embed.name}")
|
|
359
|
+
print(f" Dimension: {embed.dimension}\n")
|
|
360
|
+
|
|
361
|
+
# Test embedding
|
|
362
|
+
text = "This is a test sentence"
|
|
363
|
+
vector = embed.embed(text)
|
|
364
|
+
print(f" Test: '{text}'")
|
|
365
|
+
print(f" Vector: [{vector[0]:.4f}, {vector[1]:.4f}, ..., {vector[-1]:.4f}]")
|
|
366
|
+
print(f" Length: {len(vector)}\n")
|
|
367
|
+
|
|
368
|
+
print("2. Recommended models:")
|
|
369
|
+
for provider, models in RECOMMENDED_MODELS.items():
|
|
370
|
+
print(f"\n {provider}:")
|
|
371
|
+
for use_case, model in models.items():
|
|
372
|
+
print(f" - {use_case}: {model}")
|