mantisdk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of mantisdk might be problematic. Click here for more details.

Files changed (190) hide show
  1. mantisdk/__init__.py +22 -0
  2. mantisdk/adapter/__init__.py +15 -0
  3. mantisdk/adapter/base.py +94 -0
  4. mantisdk/adapter/messages.py +270 -0
  5. mantisdk/adapter/triplet.py +1028 -0
  6. mantisdk/algorithm/__init__.py +39 -0
  7. mantisdk/algorithm/apo/__init__.py +5 -0
  8. mantisdk/algorithm/apo/apo.py +889 -0
  9. mantisdk/algorithm/apo/prompts/apply_edit_variant01.poml +22 -0
  10. mantisdk/algorithm/apo/prompts/apply_edit_variant02.poml +18 -0
  11. mantisdk/algorithm/apo/prompts/text_gradient_variant01.poml +18 -0
  12. mantisdk/algorithm/apo/prompts/text_gradient_variant02.poml +16 -0
  13. mantisdk/algorithm/apo/prompts/text_gradient_variant03.poml +107 -0
  14. mantisdk/algorithm/base.py +162 -0
  15. mantisdk/algorithm/decorator.py +264 -0
  16. mantisdk/algorithm/fast.py +250 -0
  17. mantisdk/algorithm/gepa/__init__.py +59 -0
  18. mantisdk/algorithm/gepa/adapter.py +459 -0
  19. mantisdk/algorithm/gepa/gepa.py +364 -0
  20. mantisdk/algorithm/gepa/lib/__init__.py +18 -0
  21. mantisdk/algorithm/gepa/lib/adapters/README.md +12 -0
  22. mantisdk/algorithm/gepa/lib/adapters/__init__.py +0 -0
  23. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/README.md +341 -0
  24. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/__init__.py +1 -0
  25. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/anymaths_adapter.py +174 -0
  26. mantisdk/algorithm/gepa/lib/adapters/anymaths_adapter/requirements.txt +1 -0
  27. mantisdk/algorithm/gepa/lib/adapters/default_adapter/README.md +0 -0
  28. mantisdk/algorithm/gepa/lib/adapters/default_adapter/__init__.py +0 -0
  29. mantisdk/algorithm/gepa/lib/adapters/default_adapter/default_adapter.py +209 -0
  30. mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/README.md +7 -0
  31. mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/__init__.py +0 -0
  32. mantisdk/algorithm/gepa/lib/adapters/dspy_adapter/dspy_adapter.py +307 -0
  33. mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/README.md +99 -0
  34. mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/dspy_program_proposal_signature.py +137 -0
  35. mantisdk/algorithm/gepa/lib/adapters/dspy_full_program_adapter/full_program_adapter.py +266 -0
  36. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/GEPA_RAG.md +621 -0
  37. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/__init__.py +56 -0
  38. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/evaluation_metrics.py +226 -0
  39. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/generic_rag_adapter.py +496 -0
  40. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/rag_pipeline.py +238 -0
  41. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_store_interface.py +212 -0
  42. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/__init__.py +2 -0
  43. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/chroma_store.py +196 -0
  44. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/lancedb_store.py +422 -0
  45. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/milvus_store.py +409 -0
  46. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/qdrant_store.py +368 -0
  47. mantisdk/algorithm/gepa/lib/adapters/generic_rag_adapter/vector_stores/weaviate_store.py +418 -0
  48. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/README.md +552 -0
  49. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/__init__.py +37 -0
  50. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_adapter.py +705 -0
  51. mantisdk/algorithm/gepa/lib/adapters/mcp_adapter/mcp_client.py +364 -0
  52. mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/README.md +9 -0
  53. mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/__init__.py +0 -0
  54. mantisdk/algorithm/gepa/lib/adapters/terminal_bench_adapter/terminal_bench_adapter.py +217 -0
  55. mantisdk/algorithm/gepa/lib/api.py +375 -0
  56. mantisdk/algorithm/gepa/lib/core/__init__.py +0 -0
  57. mantisdk/algorithm/gepa/lib/core/adapter.py +180 -0
  58. mantisdk/algorithm/gepa/lib/core/data_loader.py +74 -0
  59. mantisdk/algorithm/gepa/lib/core/engine.py +356 -0
  60. mantisdk/algorithm/gepa/lib/core/result.py +233 -0
  61. mantisdk/algorithm/gepa/lib/core/state.py +636 -0
  62. mantisdk/algorithm/gepa/lib/examples/__init__.py +0 -0
  63. mantisdk/algorithm/gepa/lib/examples/aime.py +24 -0
  64. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/eval_default.py +111 -0
  65. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/instruction_prompt.txt +9 -0
  66. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/prompt-templates/optimal_prompt.txt +24 -0
  67. mantisdk/algorithm/gepa/lib/examples/anymaths-bench/train_anymaths.py +177 -0
  68. mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/arc_agi.ipynb +25705 -0
  69. mantisdk/algorithm/gepa/lib/examples/dspy_full_program_evolution/example.ipynb +348 -0
  70. mantisdk/algorithm/gepa/lib/examples/mcp_adapter/__init__.py +4 -0
  71. mantisdk/algorithm/gepa/lib/examples/mcp_adapter/mcp_optimization_example.py +455 -0
  72. mantisdk/algorithm/gepa/lib/examples/rag_adapter/RAG_GUIDE.md +613 -0
  73. mantisdk/algorithm/gepa/lib/examples/rag_adapter/__init__.py +9 -0
  74. mantisdk/algorithm/gepa/lib/examples/rag_adapter/rag_optimization.py +824 -0
  75. mantisdk/algorithm/gepa/lib/examples/rag_adapter/requirements-rag.txt +29 -0
  76. mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/instruction_prompt.txt +16 -0
  77. mantisdk/algorithm/gepa/lib/examples/terminal-bench/prompt-templates/terminus.txt +9 -0
  78. mantisdk/algorithm/gepa/lib/examples/terminal-bench/train_terminus.py +161 -0
  79. mantisdk/algorithm/gepa/lib/gepa_utils.py +117 -0
  80. mantisdk/algorithm/gepa/lib/logging/__init__.py +0 -0
  81. mantisdk/algorithm/gepa/lib/logging/experiment_tracker.py +187 -0
  82. mantisdk/algorithm/gepa/lib/logging/logger.py +75 -0
  83. mantisdk/algorithm/gepa/lib/logging/utils.py +103 -0
  84. mantisdk/algorithm/gepa/lib/proposer/__init__.py +0 -0
  85. mantisdk/algorithm/gepa/lib/proposer/base.py +31 -0
  86. mantisdk/algorithm/gepa/lib/proposer/merge.py +357 -0
  87. mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/__init__.py +0 -0
  88. mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/base.py +49 -0
  89. mantisdk/algorithm/gepa/lib/proposer/reflective_mutation/reflective_mutation.py +176 -0
  90. mantisdk/algorithm/gepa/lib/py.typed +0 -0
  91. mantisdk/algorithm/gepa/lib/strategies/__init__.py +0 -0
  92. mantisdk/algorithm/gepa/lib/strategies/batch_sampler.py +77 -0
  93. mantisdk/algorithm/gepa/lib/strategies/candidate_selector.py +50 -0
  94. mantisdk/algorithm/gepa/lib/strategies/component_selector.py +36 -0
  95. mantisdk/algorithm/gepa/lib/strategies/eval_policy.py +64 -0
  96. mantisdk/algorithm/gepa/lib/strategies/instruction_proposal.py +127 -0
  97. mantisdk/algorithm/gepa/lib/utils/__init__.py +10 -0
  98. mantisdk/algorithm/gepa/lib/utils/stop_condition.py +196 -0
  99. mantisdk/algorithm/gepa/tracing.py +105 -0
  100. mantisdk/algorithm/utils.py +177 -0
  101. mantisdk/algorithm/verl/__init__.py +5 -0
  102. mantisdk/algorithm/verl/interface.py +202 -0
  103. mantisdk/cli/__init__.py +56 -0
  104. mantisdk/cli/prometheus.py +115 -0
  105. mantisdk/cli/store.py +131 -0
  106. mantisdk/cli/vllm.py +29 -0
  107. mantisdk/client.py +408 -0
  108. mantisdk/config.py +348 -0
  109. mantisdk/emitter/__init__.py +43 -0
  110. mantisdk/emitter/annotation.py +370 -0
  111. mantisdk/emitter/exception.py +54 -0
  112. mantisdk/emitter/message.py +61 -0
  113. mantisdk/emitter/object.py +117 -0
  114. mantisdk/emitter/reward.py +320 -0
  115. mantisdk/env_var.py +156 -0
  116. mantisdk/execution/__init__.py +15 -0
  117. mantisdk/execution/base.py +64 -0
  118. mantisdk/execution/client_server.py +443 -0
  119. mantisdk/execution/events.py +69 -0
  120. mantisdk/execution/inter_process.py +16 -0
  121. mantisdk/execution/shared_memory.py +282 -0
  122. mantisdk/instrumentation/__init__.py +119 -0
  123. mantisdk/instrumentation/agentops.py +314 -0
  124. mantisdk/instrumentation/agentops_langchain.py +45 -0
  125. mantisdk/instrumentation/litellm.py +83 -0
  126. mantisdk/instrumentation/vllm.py +81 -0
  127. mantisdk/instrumentation/weave.py +500 -0
  128. mantisdk/litagent/__init__.py +11 -0
  129. mantisdk/litagent/decorator.py +536 -0
  130. mantisdk/litagent/litagent.py +252 -0
  131. mantisdk/llm_proxy.py +1890 -0
  132. mantisdk/logging.py +370 -0
  133. mantisdk/reward.py +7 -0
  134. mantisdk/runner/__init__.py +11 -0
  135. mantisdk/runner/agent.py +845 -0
  136. mantisdk/runner/base.py +182 -0
  137. mantisdk/runner/legacy.py +309 -0
  138. mantisdk/semconv.py +170 -0
  139. mantisdk/server.py +401 -0
  140. mantisdk/store/__init__.py +23 -0
  141. mantisdk/store/base.py +897 -0
  142. mantisdk/store/client_server.py +2092 -0
  143. mantisdk/store/collection/__init__.py +30 -0
  144. mantisdk/store/collection/base.py +587 -0
  145. mantisdk/store/collection/memory.py +970 -0
  146. mantisdk/store/collection/mongo.py +1412 -0
  147. mantisdk/store/collection_based.py +1823 -0
  148. mantisdk/store/insight.py +648 -0
  149. mantisdk/store/listener.py +58 -0
  150. mantisdk/store/memory.py +396 -0
  151. mantisdk/store/mongo.py +165 -0
  152. mantisdk/store/sqlite.py +3 -0
  153. mantisdk/store/threading.py +357 -0
  154. mantisdk/store/utils.py +142 -0
  155. mantisdk/tracer/__init__.py +16 -0
  156. mantisdk/tracer/agentops.py +242 -0
  157. mantisdk/tracer/base.py +287 -0
  158. mantisdk/tracer/dummy.py +106 -0
  159. mantisdk/tracer/otel.py +555 -0
  160. mantisdk/tracer/weave.py +677 -0
  161. mantisdk/trainer/__init__.py +6 -0
  162. mantisdk/trainer/init_utils.py +263 -0
  163. mantisdk/trainer/legacy.py +367 -0
  164. mantisdk/trainer/registry.py +12 -0
  165. mantisdk/trainer/trainer.py +618 -0
  166. mantisdk/types/__init__.py +6 -0
  167. mantisdk/types/core.py +553 -0
  168. mantisdk/types/resources.py +204 -0
  169. mantisdk/types/tracer.py +515 -0
  170. mantisdk/types/tracing.py +218 -0
  171. mantisdk/utils/__init__.py +1 -0
  172. mantisdk/utils/id.py +18 -0
  173. mantisdk/utils/metrics.py +1025 -0
  174. mantisdk/utils/otel.py +578 -0
  175. mantisdk/utils/otlp.py +536 -0
  176. mantisdk/utils/server_launcher.py +1045 -0
  177. mantisdk/utils/system_snapshot.py +81 -0
  178. mantisdk/verl/__init__.py +8 -0
  179. mantisdk/verl/__main__.py +6 -0
  180. mantisdk/verl/async_server.py +46 -0
  181. mantisdk/verl/config.yaml +27 -0
  182. mantisdk/verl/daemon.py +1154 -0
  183. mantisdk/verl/dataset.py +44 -0
  184. mantisdk/verl/entrypoint.py +248 -0
  185. mantisdk/verl/trainer.py +549 -0
  186. mantisdk-0.1.0.dist-info/METADATA +119 -0
  187. mantisdk-0.1.0.dist-info/RECORD +190 -0
  188. mantisdk-0.1.0.dist-info/WHEEL +4 -0
  189. mantisdk-0.1.0.dist-info/entry_points.txt +2 -0
  190. mantisdk-0.1.0.dist-info/licenses/LICENSE +19 -0
@@ -0,0 +1,422 @@
1
+ # Copyright (c) 2025 Lakshya A Agrawal and the GEPA contributors
2
+ # https://github.com/gepa-ai/gepa
3
+
4
+ from typing import Any
5
+
6
+ from mantisdk.algorithm.gepa.lib.adapters.generic_rag_adapter.vector_store_interface import VectorStoreInterface
7
+
8
+
9
+ class LanceDBVectorStore(VectorStoreInterface):
10
+ """
11
+ LanceDB implementation of the VectorStoreInterface.
12
+
13
+ LanceDB is a developer-friendly, serverless vector database for AI applications.
14
+ It provides excellent performance for both local development and production
15
+ deployments with support for SQL-like filtering and modern PyArrow integration.
16
+ """
17
+
18
+ def __init__(self, db, table_name: str, embedding_function=None):
19
+ """
20
+ Initialize LanceDBVectorStore.
21
+
22
+ Args:
23
+ db: LanceDB database connection
24
+ table_name: Name of the table to use
25
+ embedding_function: Optional function to compute embeddings for queries
26
+ """
27
+ import importlib.util
28
+
29
+ if importlib.util.find_spec("lancedb") is None:
30
+ raise ImportError(
31
+ "LanceDB is required for LanceDBVectorStore. Install with: pip install litellm lancedb pyarrow"
32
+ )
33
+
34
+ self.db = db
35
+ self.table_name = table_name
36
+ self.embedding_function = embedding_function
37
+
38
+ # Try to open table if it exists, otherwise it will be created in add_documents
39
+ try:
40
+ self.table = self.db.open_table(table_name)
41
+ except Exception:
42
+ self.table = None # Will be created when first adding documents
43
+
44
+ def similarity_search(
45
+ self,
46
+ query: str,
47
+ k: int = 5,
48
+ filters: dict[str, Any] | None = None,
49
+ ) -> list[dict[str, Any]]:
50
+ """Search for documents similar to the query text using embeddings."""
51
+ if self.embedding_function is None:
52
+ raise ValueError("No embedding function provided for similarity search")
53
+
54
+ # Compute embeddings for the query
55
+ try:
56
+ query_vector = self.embedding_function(query)
57
+ if hasattr(query_vector, "tolist"):
58
+ query_vector = query_vector.tolist()
59
+ except Exception as e:
60
+ raise RuntimeError(f"Failed to compute embeddings for query: {e!s}") from e
61
+
62
+ # Use vector search with computed embeddings
63
+ return self.vector_search(query_vector, k, filters)
64
+
65
+ def vector_search(
66
+ self,
67
+ query_vector: list[float],
68
+ k: int = 5,
69
+ filters: dict[str, Any] | None = None,
70
+ ) -> list[dict[str, Any]]:
71
+ """Search using a pre-computed query vector."""
72
+ if self.table is None:
73
+ return [] # No documents added yet
74
+
75
+ try:
76
+ # Build query with vector search
77
+ query_builder = self.table.search(query_vector).limit(k)
78
+
79
+ # Add filters if provided
80
+ if filters:
81
+ filter_expr = self._convert_filters(filters)
82
+ if filter_expr:
83
+ query_builder = query_builder.where(filter_expr)
84
+
85
+ # Execute query and get results
86
+ results = query_builder.to_pandas()
87
+
88
+ return self._format_results(results)
89
+
90
+ except Exception as e:
91
+ raise RuntimeError(f"LanceDB vector search failed: {e!s}") from e
92
+
93
+ def add_documents(
94
+ self,
95
+ documents: list[dict[str, Any]],
96
+ embeddings: list[list[float]],
97
+ ids: list[str] | None = None,
98
+ ) -> list[str]:
99
+ """Add documents with their embeddings to the table."""
100
+ if len(documents) != len(embeddings):
101
+ raise ValueError("Number of documents must match number of embeddings")
102
+
103
+ # Generate IDs if not provided
104
+ if ids is None:
105
+ ids = [f"doc_{i}" for i in range(len(documents))]
106
+ elif len(ids) != len(documents):
107
+ raise ValueError("Number of IDs must match number of documents")
108
+
109
+ # Prepare data for insertion
110
+ data_to_insert = []
111
+ for doc_id, doc, embedding in zip(ids, documents, embeddings, strict=False):
112
+ # LanceDB requires consistent field structure
113
+ record = {
114
+ "id": doc_id,
115
+ "vector": embedding,
116
+ **doc, # Include all document fields
117
+ }
118
+ data_to_insert.append(record)
119
+
120
+ try:
121
+ # Create table if it doesn't exist yet
122
+ if self.table is None:
123
+ self.table = self.db.create_table(self.table_name, data=data_to_insert)
124
+ else:
125
+ # Add data to existing table
126
+ self.table.add(data_to_insert, mode="append")
127
+ return ids
128
+
129
+ except Exception as e:
130
+ raise RuntimeError(f"Failed to add documents to LanceDB: {e!s}") from e
131
+
132
+ def delete_documents(self, ids: list[str]) -> bool:
133
+ """Delete documents by their IDs."""
134
+ try:
135
+ # Use parameterized filter to avoid injection
136
+ if len(ids) == 1:
137
+ filter_expr = {"id": ids[0]}
138
+ else:
139
+ filter_expr = {"id": {"$in": ids}}
140
+
141
+ # Delete documents
142
+ self.table.delete(filter_expr)
143
+ return True
144
+
145
+ except Exception as e:
146
+ raise RuntimeError(f"Failed to delete documents from LanceDB: {e!s}") from e
147
+
148
+ def get_collection_info(self) -> dict[str, Any]:
149
+ """Get metadata about the LanceDB table."""
150
+ try:
151
+ # Get table schema
152
+ schema = self.table.schema
153
+
154
+ # Count rows (this might be approximate for large tables)
155
+ try:
156
+ count_result = self.table.count_rows()
157
+ row_count = count_result if isinstance(count_result, int) else 0
158
+ except Exception:
159
+ # Fallback: count by querying
160
+ try:
161
+ sample_df = self.table.to_pandas()
162
+ row_count = len(sample_df)
163
+ except Exception:
164
+ row_count = 0
165
+
166
+ # Extract vector field information
167
+ vector_field = None
168
+ dimension = 0
169
+ for field in schema:
170
+ # Check if field is a list type (vector field)
171
+ if hasattr(field.type, "value_type") and "float" in str(field.type).lower():
172
+ vector_field = field.name
173
+ # Try to get dimension from list type
174
+ if hasattr(field.type, "list_size"):
175
+ dimension = field.type.list_size
176
+ elif "vector" in field.name.lower():
177
+ # This is likely our vector field
178
+ vector_field = field.name
179
+
180
+ # Get table version and other metadata
181
+ version = getattr(self.table, "version", "unknown")
182
+
183
+ return {
184
+ "name": self.table_name,
185
+ "document_count": row_count,
186
+ "dimension": dimension,
187
+ "vector_store_type": "lancedb",
188
+ "vector_field": vector_field,
189
+ "version": version,
190
+ "schema": str(schema),
191
+ }
192
+
193
+ except Exception as e:
194
+ # Fallback info if detailed info fails
195
+ return {
196
+ "name": self.table_name,
197
+ "document_count": 0,
198
+ "dimension": 0,
199
+ "vector_store_type": "lancedb",
200
+ "error": str(e),
201
+ }
202
+
203
+ def supports_hybrid_search(self) -> bool:
204
+ """LanceDB supports hybrid search through full-text search + vector search."""
205
+ return True
206
+
207
+ def hybrid_search(
208
+ self,
209
+ query: str,
210
+ k: int = 5,
211
+ alpha: float = 0.5,
212
+ filters: dict[str, Any] | None = None,
213
+ ) -> list[dict[str, Any]]:
214
+ """
215
+ Hybrid search combining vector similarity and full-text search.
216
+ """
217
+ try:
218
+ # Import FTS query if available
219
+ try:
220
+ from lancedb.query import FtsQuery
221
+
222
+ # Build hybrid query
223
+ FtsQuery(query=query)
224
+
225
+ # Get embedding for vector search
226
+ if self.embedding_function:
227
+ query_vector = self.embedding_function(query)
228
+ if hasattr(query_vector, "tolist"):
229
+ query_vector = query_vector.tolist()
230
+
231
+ # Perform hybrid search (this is a simplified version)
232
+ # In practice, you might want to combine scores differently
233
+ query_builder = self.table.search(query_vector, query_type="hybrid").limit(k)
234
+ else:
235
+ # Fall back to FTS only
236
+ query_builder = self.table.search(query, query_type="fts").limit(k)
237
+
238
+ # Add filters if provided
239
+ if filters:
240
+ filter_expr = self._convert_filters(filters)
241
+ if filter_expr:
242
+ query_builder = query_builder.where(filter_expr)
243
+
244
+ # Execute query
245
+ results = query_builder.to_pandas()
246
+ return self._format_results(results)
247
+
248
+ except ImportError:
249
+ # Fall back to vector search only
250
+ return self.similarity_search(query, k, filters)
251
+
252
+ except Exception:
253
+ # Fall back to regular vector search
254
+ return self.similarity_search(query, k, filters)
255
+
256
+ def _format_results(self, results_df) -> list[dict[str, Any]]:
257
+ """Convert LanceDB results to standardized format."""
258
+ documents = []
259
+
260
+ if results_df is None or len(results_df) == 0:
261
+ return documents
262
+
263
+ for _, row in results_df.iterrows():
264
+ row_dict = row.to_dict()
265
+
266
+ # Extract content - try different field names
267
+ content = ""
268
+ content_fields = ["content", "text", "document", "body", "description"]
269
+ for field in content_fields:
270
+ if row_dict.get(field):
271
+ content = str(row_dict[field])
272
+ break
273
+
274
+ # If no content field found, combine text fields
275
+ if not content:
276
+ text_properties = []
277
+ system_fields = ["id", "vector", "_distance"]
278
+ for key, value in row_dict.items():
279
+ if (
280
+ isinstance(value, str)
281
+ and value.strip()
282
+ and key not in system_fields
283
+ and key not in content_fields
284
+ ):
285
+ text_properties.append(f"{key}: {value}")
286
+ content = " | ".join(text_properties)
287
+
288
+ # Create metadata (all fields except content and system fields)
289
+ metadata = {}
290
+ system_fields = ["id", "vector", "_distance"] + content_fields
291
+ for key, value in row_dict.items():
292
+ if key not in system_fields and value is not None:
293
+ # Convert numpy types to Python types for JSON serialization
294
+ if hasattr(value, "item"):
295
+ value = value.item()
296
+ elif hasattr(value, "tolist"):
297
+ value = value.tolist()
298
+ metadata[key] = value
299
+
300
+ metadata["doc_id"] = str(row_dict.get("id", ""))
301
+
302
+ # Extract similarity score (LanceDB includes _distance column)
303
+ distance = row_dict.get("_distance", 0.0)
304
+ if hasattr(distance, "item"):
305
+ distance = distance.item()
306
+
307
+ # Convert distance to similarity score (lower distance = higher similarity)
308
+ # For L2 distance: similarity = 1 / (1 + distance)
309
+ # For cosine distance: similarity = 1 - distance (if distance is in [0,2])
310
+ if distance <= 1.0:
311
+ score = max(0.0, 1.0 - distance) # Cosine-like
312
+ else:
313
+ score = 1.0 / (1.0 + distance) # L2-like
314
+
315
+ documents.append(
316
+ {
317
+ "content": content,
318
+ "metadata": metadata,
319
+ "score": float(score),
320
+ }
321
+ )
322
+
323
+ return documents
324
+
325
+ def _convert_filters(self, filters: dict[str, Any]) -> str:
326
+ """Convert generic filters to LanceDB SQL-like expressions."""
327
+ if not filters:
328
+ return None
329
+
330
+ expressions = []
331
+
332
+ for key, value in filters.items():
333
+ if isinstance(value, str):
334
+ # String exact match - properly escape quotes and backslashes
335
+ escaped_value = value.replace("\\", "\\\\").replace("'", "''")
336
+ expressions.append(f"{key} = '{escaped_value}'")
337
+ elif isinstance(value, int | float):
338
+ # Numeric exact match
339
+ expressions.append(f"{key} = {value}")
340
+ elif isinstance(value, bool):
341
+ # Boolean match
342
+ expressions.append(f"{key} = {value}")
343
+ elif isinstance(value, list):
344
+ # IN clause for multiple values
345
+ if all(isinstance(v, str) for v in value):
346
+ # String values - properly escape
347
+ escaped_values = []
348
+ for v in value:
349
+ escaped_v = v.replace("'", "''")
350
+ escaped_values.append(f"'{escaped_v}'")
351
+ values_str = ", ".join(escaped_values)
352
+ expressions.append(f"{key} IN ({values_str})")
353
+ else:
354
+ # Numeric values
355
+ values_str = ", ".join(str(v) for v in value)
356
+ expressions.append(f"{key} IN ({values_str})")
357
+ elif isinstance(value, dict):
358
+ # Range queries
359
+ range_conditions = []
360
+ if "gte" in value:
361
+ range_conditions.append(f"{key} >= {value['gte']}")
362
+ if "gt" in value:
363
+ range_conditions.append(f"{key} > {value['gt']}")
364
+ if "lte" in value:
365
+ range_conditions.append(f"{key} <= {value['lte']}")
366
+ if "lt" in value:
367
+ range_conditions.append(f"{key} < {value['lt']}")
368
+
369
+ if range_conditions:
370
+ expressions.append("(" + " AND ".join(range_conditions) + ")")
371
+
372
+ if expressions:
373
+ return " AND ".join(expressions)
374
+
375
+ return None
376
+
377
+ @classmethod
378
+ def create_local(cls, table_name: str, embedding_function=None, db_path: str = "./lancedb", vector_size: int = 384):
379
+ """Create a local LanceDB vector store."""
380
+ import importlib.util
381
+
382
+ if importlib.util.find_spec("lancedb") is None or importlib.util.find_spec("pyarrow") is None:
383
+ raise ImportError("LanceDB and PyArrow are required. Install with: pip install litellm lancedb pyarrow")
384
+
385
+ import lancedb
386
+
387
+ # Connect to local database
388
+ db = lancedb.connect(db_path)
389
+
390
+ # For LanceDB, we'll create the table when first adding documents
391
+ # This allows LanceDB to infer the schema from actual data, avoiding conflicts
392
+
393
+ return cls(db, table_name, embedding_function)
394
+
395
+ @classmethod
396
+ def create_remote(
397
+ cls,
398
+ table_name: str,
399
+ embedding_function=None,
400
+ uri: str | None = None,
401
+ api_key: str | None = None,
402
+ region: str = "us-east-1",
403
+ vector_size: int = 384,
404
+ ):
405
+ """Create a remote LanceDB vector store (LanceDB Cloud)."""
406
+ import importlib.util
407
+
408
+ if importlib.util.find_spec("lancedb") is None or importlib.util.find_spec("pyarrow") is None:
409
+ raise ImportError("LanceDB and PyArrow are required. Install with: pip install litellm lancedb pyarrow")
410
+
411
+ import lancedb
412
+
413
+ if not uri or not api_key:
414
+ raise ValueError("URI and API key are required for remote LanceDB connection")
415
+
416
+ # Connect to remote database
417
+ db = lancedb.connect(uri, api_key=api_key, region=region)
418
+
419
+ # For LanceDB, we'll create the table when first adding documents
420
+ # This allows LanceDB to infer the schema from actual data, avoiding conflicts
421
+
422
+ return cls(db, table_name, embedding_function)