remdb 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +2 -0
- rem/agentic/README.md +650 -0
- rem/agentic/__init__.py +39 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +8 -0
- rem/agentic/context.py +148 -0
- rem/agentic/context_builder.py +329 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +107 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +151 -0
- rem/agentic/providers/phoenix.py +674 -0
- rem/agentic/providers/pydantic_ai.py +572 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +396 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +231 -0
- rem/api/README.md +420 -0
- rem/api/main.py +324 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +536 -0
- rem/api/mcp_router/server.py +213 -0
- rem/api/mcp_router/tools.py +584 -0
- rem/api/routers/auth.py +229 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/completions.py +281 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +124 -0
- rem/api/routers/chat/streaming.py +185 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +26 -0
- rem/auth/middleware.py +100 -0
- rem/auth/providers/__init__.py +13 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +455 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +126 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +565 -0
- rem/cli/commands/configure.py +423 -0
- rem/cli/commands/db.py +493 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1124 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +245 -0
- rem/cli/commands/schema.py +183 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +88 -0
- rem/config.py +237 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +64 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +628 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +243 -0
- rem/models/entities/__init__.py +43 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +35 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +191 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/user.py +85 -0
- rem/py.typed +0 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +128 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +16 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +806 -0
- rem/services/content/service.py +657 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +336 -0
- rem/services/dreaming/moment_service.py +264 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +120 -0
- rem/services/embeddings/worker.py +421 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +686 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +575 -0
- rem/services/postgres/__init__.py +23 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
- rem/services/postgres/register_type.py +352 -0
- rem/services/postgres/repository.py +337 -0
- rem/services/postgres/schema_generator.py +379 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +354 -0
- rem/services/rem/README.md +304 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +145 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +527 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +6 -0
- rem/services/session/compression.py +360 -0
- rem/services/session/reload.py +77 -0
- rem/settings.py +1235 -0
- rem/sql/002_install_models.sql +1068 -0
- rem/sql/background_indexes.sql +42 -0
- rem/sql/install_models.sql +1038 -0
- rem/sql/migrations/001_install.sql +503 -0
- rem/sql/migrations/002_install_models.sql +1202 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +583 -0
- rem/utils/__init__.py +43 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +423 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/markdown.py +16 -0
- rem/utils/model_helpers.py +236 -0
- rem/utils/schema_loader.py +229 -0
- rem/utils/sql_types.py +348 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +330 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +5 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- remdb-0.2.6.dist-info/METADATA +1191 -0
- remdb-0.2.6.dist-info/RECORD +187 -0
- remdb-0.2.6.dist-info/WHEEL +4 -0
- remdb-0.2.6.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,527 @@
|
|
|
1
|
+
"""
|
|
2
|
+
RemService - REM query execution service (wrapper around PostgresService).
|
|
3
|
+
|
|
4
|
+
Delegates to PostgreSQL functions for performance:
|
|
5
|
+
- LOOKUP → rem_lookup() function (O(1) KV_STORE)
|
|
6
|
+
- FUZZY → rem_fuzzy() function (pg_trgm similarity)
|
|
7
|
+
- SEARCH → rem_search() function (vector similarity with embeddings)
|
|
8
|
+
- SQL → Direct PostgresService.execute() (pushed down to Postgres)
|
|
9
|
+
- TRAVERSE → rem_traverse() function (recursive graph traversal)
|
|
10
|
+
|
|
11
|
+
Design:
|
|
12
|
+
- RemService wraps PostgresService, does NOT duplicate logic
|
|
13
|
+
- All queries pushed down to Postgres for performance
|
|
14
|
+
- Model schema inspection for validation only
|
|
15
|
+
- Exceptions for missing fields/embeddings
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from typing import Any
|
|
19
|
+
|
|
20
|
+
from loguru import logger
|
|
21
|
+
|
|
22
|
+
from .parser import RemQueryParser
|
|
23
|
+
from ...models.core import (
|
|
24
|
+
FuzzyParameters,
|
|
25
|
+
LookupParameters,
|
|
26
|
+
QueryType,
|
|
27
|
+
RemQuery,
|
|
28
|
+
SearchParameters,
|
|
29
|
+
SQLParameters,
|
|
30
|
+
TraverseParameters,
|
|
31
|
+
)
|
|
32
|
+
from .exceptions import (
|
|
33
|
+
ContentFieldNotFoundError,
|
|
34
|
+
EmbeddingFieldNotFoundError,
|
|
35
|
+
FieldNotFoundError,
|
|
36
|
+
InvalidParametersError,
|
|
37
|
+
QueryExecutionError,
|
|
38
|
+
)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class RemService:
|
|
42
|
+
"""
|
|
43
|
+
REM query execution service.
|
|
44
|
+
|
|
45
|
+
Wraps PostgresService and delegates all queries to PostgreSQL functions.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
def __init__(self, postgres_service: Any, model_registry: dict[str, Any] | None = None):
|
|
49
|
+
"""
|
|
50
|
+
Initialize REM service.
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
postgres_service: PostgresService instance
|
|
54
|
+
model_registry: Optional dict mapping table names to Pydantic models
|
|
55
|
+
"""
|
|
56
|
+
self.db = postgres_service
|
|
57
|
+
self.model_registry = model_registry or {}
|
|
58
|
+
|
|
59
|
+
def register_model(self, table_name: str, model: Any):
|
|
60
|
+
"""
|
|
61
|
+
Register a Pydantic model for schema validation.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
table_name: Table name (e.g., "resources")
|
|
65
|
+
model: Pydantic model class
|
|
66
|
+
"""
|
|
67
|
+
self.model_registry[table_name] = model
|
|
68
|
+
logger.debug(f"Registered model {model.__name__} for table {table_name}")
|
|
69
|
+
|
|
70
|
+
def _get_model_fields(self, table_name: str) -> list[str]:
|
|
71
|
+
"""Get list of field names from registered model."""
|
|
72
|
+
if table_name not in self.model_registry:
|
|
73
|
+
return []
|
|
74
|
+
model = self.model_registry[table_name]
|
|
75
|
+
return list(model.model_fields.keys())
|
|
76
|
+
|
|
77
|
+
def _get_embeddable_fields(self, table_name: str) -> list[str]:
|
|
78
|
+
"""
|
|
79
|
+
Get list of fields that have embeddings.
|
|
80
|
+
|
|
81
|
+
Uses register_type conventions:
|
|
82
|
+
- Fields with json_schema_extra={"embed": True}
|
|
83
|
+
- Default embeddable fields: content, description, summary, text, body, message, notes
|
|
84
|
+
"""
|
|
85
|
+
if table_name not in self.model_registry:
|
|
86
|
+
return []
|
|
87
|
+
|
|
88
|
+
model = self.model_registry[table_name]
|
|
89
|
+
embeddable = []
|
|
90
|
+
|
|
91
|
+
DEFAULT_EMBED_FIELDS = {
|
|
92
|
+
"content",
|
|
93
|
+
"description",
|
|
94
|
+
"summary",
|
|
95
|
+
"text",
|
|
96
|
+
"body",
|
|
97
|
+
"message",
|
|
98
|
+
"notes",
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
for field_name, field_info in model.model_fields.items():
|
|
102
|
+
# Check json_schema_extra for explicit embed configuration
|
|
103
|
+
json_extra = getattr(field_info, "json_schema_extra", None)
|
|
104
|
+
if json_extra and isinstance(json_extra, dict):
|
|
105
|
+
embed = json_extra.get("embed")
|
|
106
|
+
if embed is True:
|
|
107
|
+
embeddable.append(field_name)
|
|
108
|
+
continue
|
|
109
|
+
elif embed is False:
|
|
110
|
+
continue
|
|
111
|
+
|
|
112
|
+
# Default: embed if field name matches common content fields
|
|
113
|
+
if field_name.lower() in DEFAULT_EMBED_FIELDS:
|
|
114
|
+
embeddable.append(field_name)
|
|
115
|
+
|
|
116
|
+
return embeddable
|
|
117
|
+
|
|
118
|
+
async def execute_query(self, query: RemQuery) -> dict[str, Any]:
|
|
119
|
+
"""
|
|
120
|
+
Execute REM query with delegation to PostgreSQL functions.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
query: RemQuery with type and parameters
|
|
124
|
+
|
|
125
|
+
Returns:
|
|
126
|
+
Query results with metadata
|
|
127
|
+
|
|
128
|
+
Raises:
|
|
129
|
+
QueryExecutionError: If query execution fails
|
|
130
|
+
FieldNotFoundError: If field does not exist
|
|
131
|
+
EmbeddingFieldNotFoundError: If field has no embeddings
|
|
132
|
+
"""
|
|
133
|
+
try:
|
|
134
|
+
# RemQuery uses user_id for isolation (mapped to tenant_id in execution)
|
|
135
|
+
tenant_id = query.user_id
|
|
136
|
+
|
|
137
|
+
if query.query_type == QueryType.LOOKUP:
|
|
138
|
+
if isinstance(query.parameters, LookupParameters):
|
|
139
|
+
return await self._execute_lookup(query.parameters, tenant_id)
|
|
140
|
+
raise InvalidParametersError("LOOKUP", "Invalid parameters type")
|
|
141
|
+
elif query.query_type == QueryType.FUZZY:
|
|
142
|
+
if isinstance(query.parameters, FuzzyParameters):
|
|
143
|
+
return await self._execute_fuzzy(query.parameters, tenant_id)
|
|
144
|
+
raise InvalidParametersError("FUZZY", "Invalid parameters type")
|
|
145
|
+
elif query.query_type == QueryType.SEARCH:
|
|
146
|
+
if isinstance(query.parameters, SearchParameters):
|
|
147
|
+
return await self._execute_search(query.parameters, tenant_id)
|
|
148
|
+
raise InvalidParametersError("SEARCH", "Invalid parameters type")
|
|
149
|
+
elif query.query_type == QueryType.SQL:
|
|
150
|
+
if isinstance(query.parameters, SQLParameters):
|
|
151
|
+
return await self._execute_sql(query.parameters, tenant_id)
|
|
152
|
+
raise InvalidParametersError("SQL", "Invalid parameters type")
|
|
153
|
+
elif query.query_type == QueryType.TRAVERSE:
|
|
154
|
+
if isinstance(query.parameters, TraverseParameters):
|
|
155
|
+
return await self._execute_traverse(query.parameters, tenant_id)
|
|
156
|
+
raise InvalidParametersError("TRAVERSE", "Invalid parameters type")
|
|
157
|
+
else:
|
|
158
|
+
raise InvalidParametersError("UNKNOWN", f"Unknown query type: {query.query_type}")
|
|
159
|
+
except (FieldNotFoundError, EmbeddingFieldNotFoundError, InvalidParametersError):
|
|
160
|
+
# Re-raise our custom exceptions
|
|
161
|
+
raise
|
|
162
|
+
except Exception as e:
|
|
163
|
+
logger.exception(f"REM query execution failed: {e}")
|
|
164
|
+
raise QueryExecutionError(query.query_type.value, str(e), e)
|
|
165
|
+
|
|
166
|
+
async def _execute_lookup(
|
|
167
|
+
self, params: LookupParameters, tenant_id: str
|
|
168
|
+
) -> dict[str, Any]:
|
|
169
|
+
"""
|
|
170
|
+
Execute LOOKUP query via rem_lookup() PostgreSQL function.
|
|
171
|
+
|
|
172
|
+
Supports both single key and list of keys. When given a list, executes
|
|
173
|
+
multiple LOOKUP queries and aggregates results.
|
|
174
|
+
|
|
175
|
+
Delegates to: rem_lookup(entity_key, tenant_id, user_id)
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
params: LookupParameters with entity key (str or list[str])
|
|
179
|
+
tenant_id: Tenant identifier
|
|
180
|
+
|
|
181
|
+
Returns:
|
|
182
|
+
Dict with entity metadata from KV_STORE
|
|
183
|
+
"""
|
|
184
|
+
from .queries import LOOKUP_QUERY, get_lookup_params
|
|
185
|
+
|
|
186
|
+
# Handle both single key and list of keys
|
|
187
|
+
keys = params.key if isinstance(params.key, list) else [params.key]
|
|
188
|
+
|
|
189
|
+
all_results = []
|
|
190
|
+
for key in keys:
|
|
191
|
+
# Use tenant_id (from query.user_id) as the user_id param for lookup if params.user_id not set
|
|
192
|
+
user_id = params.user_id or tenant_id
|
|
193
|
+
query_params = get_lookup_params(key, tenant_id, user_id)
|
|
194
|
+
results = await self.db.execute(LOOKUP_QUERY, query_params)
|
|
195
|
+
all_results.extend(results)
|
|
196
|
+
|
|
197
|
+
return {
|
|
198
|
+
"query_type": "LOOKUP",
|
|
199
|
+
"keys": keys, # Return list for consistency
|
|
200
|
+
"results": all_results,
|
|
201
|
+
"count": len(all_results),
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
async def _execute_fuzzy(
|
|
205
|
+
self, params: FuzzyParameters, tenant_id: str
|
|
206
|
+
) -> dict[str, Any]:
|
|
207
|
+
"""
|
|
208
|
+
Execute FUZZY query via rem_fuzzy() PostgreSQL function.
|
|
209
|
+
|
|
210
|
+
Delegates to: rem_fuzzy(query, tenant_id, threshold, limit, user_id)
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
params: FuzzyParameters with query text and threshold
|
|
214
|
+
tenant_id: Tenant identifier
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
Dict with fuzzy-matched entities ordered by similarity
|
|
218
|
+
"""
|
|
219
|
+
from .queries import FUZZY_QUERY, get_fuzzy_params
|
|
220
|
+
|
|
221
|
+
query_params = get_fuzzy_params(
|
|
222
|
+
params.query_text,
|
|
223
|
+
tenant_id,
|
|
224
|
+
params.threshold,
|
|
225
|
+
params.limit,
|
|
226
|
+
tenant_id, # Use tenant_id (query.user_id) as user_id
|
|
227
|
+
)
|
|
228
|
+
results = await self.db.execute(FUZZY_QUERY, query_params)
|
|
229
|
+
|
|
230
|
+
return {
|
|
231
|
+
"query_type": "FUZZY",
|
|
232
|
+
"query_text": params.query_text,
|
|
233
|
+
"threshold": params.threshold,
|
|
234
|
+
"results": results,
|
|
235
|
+
"count": len(results),
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
async def _execute_search(
|
|
239
|
+
self, params: SearchParameters, tenant_id: str
|
|
240
|
+
) -> dict[str, Any]:
|
|
241
|
+
"""
|
|
242
|
+
Execute SEARCH query via rem_search() PostgreSQL function.
|
|
243
|
+
|
|
244
|
+
Validates:
|
|
245
|
+
- Table exists in model registry
|
|
246
|
+
- Field exists in model (or defaults to 'content')
|
|
247
|
+
- Field has embeddings configured
|
|
248
|
+
|
|
249
|
+
Delegates to: rem_search(query_embedding, table_name, field_name, ...)
|
|
250
|
+
|
|
251
|
+
Args:
|
|
252
|
+
params: SearchParameters with query text and table
|
|
253
|
+
tenant_id: Tenant identifier
|
|
254
|
+
|
|
255
|
+
Returns:
|
|
256
|
+
Dict with semantically similar entities
|
|
257
|
+
|
|
258
|
+
Raises:
|
|
259
|
+
FieldNotFoundError: If field does not exist
|
|
260
|
+
EmbeddingFieldNotFoundError: If field has no embeddings
|
|
261
|
+
ContentFieldNotFoundError: If no 'content' field and field_name not specified
|
|
262
|
+
"""
|
|
263
|
+
table_name = params.table_name
|
|
264
|
+
# SearchParameters doesn't have field_name, imply from table or default
|
|
265
|
+
field_name = "content" # Default
|
|
266
|
+
|
|
267
|
+
# Get model fields for validation
|
|
268
|
+
available_fields = self._get_model_fields(table_name)
|
|
269
|
+
embeddable_fields = self._get_embeddable_fields(table_name)
|
|
270
|
+
|
|
271
|
+
# Default to 'content' if field_name not specified
|
|
272
|
+
if field_name is None:
|
|
273
|
+
if "content" in available_fields:
|
|
274
|
+
field_name = "content"
|
|
275
|
+
else:
|
|
276
|
+
raise ContentFieldNotFoundError(
|
|
277
|
+
table_name or "UNKNOWN",
|
|
278
|
+
available_fields,
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
# Validate field exists
|
|
282
|
+
if available_fields and field_name not in available_fields:
|
|
283
|
+
raise FieldNotFoundError(
|
|
284
|
+
table_name or "UNKNOWN",
|
|
285
|
+
field_name,
|
|
286
|
+
available_fields,
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
# Validate field has embeddings
|
|
290
|
+
if embeddable_fields and field_name not in embeddable_fields:
|
|
291
|
+
raise EmbeddingFieldNotFoundError(
|
|
292
|
+
table_name or "UNKNOWN",
|
|
293
|
+
field_name,
|
|
294
|
+
embeddable_fields,
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
# Generate embedding for query text
|
|
298
|
+
from ...settings import settings
|
|
299
|
+
from ..embeddings.api import generate_embedding_async
|
|
300
|
+
from .queries import SEARCH_QUERY, get_search_params
|
|
301
|
+
|
|
302
|
+
# SearchParameters doesn't have provider, use default
|
|
303
|
+
provider = settings.llm.embedding_provider
|
|
304
|
+
|
|
305
|
+
query_embedding = await generate_embedding_async(
|
|
306
|
+
text=params.query_text,
|
|
307
|
+
model=settings.llm.embedding_model,
|
|
308
|
+
provider=provider,
|
|
309
|
+
)
|
|
310
|
+
|
|
311
|
+
# Execute vector search via rem_search() PostgreSQL function
|
|
312
|
+
query_params = get_search_params(
|
|
313
|
+
query_embedding,
|
|
314
|
+
table_name,
|
|
315
|
+
field_name,
|
|
316
|
+
tenant_id,
|
|
317
|
+
provider,
|
|
318
|
+
params.min_similarity or 0.7,
|
|
319
|
+
params.limit or 10,
|
|
320
|
+
tenant_id, # Use tenant_id (query.user_id) as user_id
|
|
321
|
+
)
|
|
322
|
+
results = await self.db.execute(SEARCH_QUERY, query_params)
|
|
323
|
+
|
|
324
|
+
return {
|
|
325
|
+
"query_type": "SEARCH",
|
|
326
|
+
"query_text": params.query_text,
|
|
327
|
+
"table_name": table_name,
|
|
328
|
+
"field_name": field_name,
|
|
329
|
+
"results": results,
|
|
330
|
+
"count": len(results),
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
async def _execute_sql(
|
|
334
|
+
self, params: SQLParameters, tenant_id: str
|
|
335
|
+
) -> dict[str, Any]:
|
|
336
|
+
"""
|
|
337
|
+
Execute SQL query via direct PostgresService.execute().
|
|
338
|
+
|
|
339
|
+
Pushes SELECT queries down to Postgres for performance.
|
|
340
|
+
|
|
341
|
+
Supports two modes:
|
|
342
|
+
1. Raw SQL: params.raw_query contains full SQL statement
|
|
343
|
+
2. Structured: params.table_name + where_clause (with tenant isolation)
|
|
344
|
+
|
|
345
|
+
Args:
|
|
346
|
+
params: SQLParameters with raw_query OR table_name + where_clause
|
|
347
|
+
tenant_id: Tenant identifier
|
|
348
|
+
|
|
349
|
+
Returns:
|
|
350
|
+
Query results
|
|
351
|
+
"""
|
|
352
|
+
# Mode 1: Raw SQL query (no tenant isolation added automatically)
|
|
353
|
+
if params.raw_query:
|
|
354
|
+
# Security: Block destructive operations
|
|
355
|
+
# Allow: SELECT, INSERT, UPDATE, WITH (read + data modifications)
|
|
356
|
+
# Block: DROP, DELETE, TRUNCATE, ALTER (destructive operations)
|
|
357
|
+
query_upper = params.raw_query.strip().upper()
|
|
358
|
+
forbidden_keywords = ["DROP", "DELETE", "TRUNCATE", "ALTER"]
|
|
359
|
+
|
|
360
|
+
for keyword in forbidden_keywords:
|
|
361
|
+
if query_upper.startswith(keyword):
|
|
362
|
+
raise ValueError(
|
|
363
|
+
f"Destructive SQL operation '{keyword}' is not allowed. "
|
|
364
|
+
f"Forbidden operations: {', '.join(forbidden_keywords)}"
|
|
365
|
+
)
|
|
366
|
+
|
|
367
|
+
results = await self.db.execute(params.raw_query)
|
|
368
|
+
return {
|
|
369
|
+
"query_type": "SQL",
|
|
370
|
+
"raw_query": params.raw_query,
|
|
371
|
+
"results": results,
|
|
372
|
+
"count": len(results),
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
# Mode 2: Structured query with tenant isolation
|
|
376
|
+
from .queries import build_sql_query
|
|
377
|
+
|
|
378
|
+
if not params.table_name:
|
|
379
|
+
raise ValueError("SQL query requires either raw_query or table_name")
|
|
380
|
+
|
|
381
|
+
# Build SQL query with tenant isolation
|
|
382
|
+
query = build_sql_query(
|
|
383
|
+
table_name=params.table_name,
|
|
384
|
+
where_clause=params.where_clause or "1=1",
|
|
385
|
+
tenant_id=tenant_id,
|
|
386
|
+
limit=params.limit,
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
results = await self.db.execute(query, (tenant_id,))
|
|
390
|
+
|
|
391
|
+
return {
|
|
392
|
+
"query_type": "SQL",
|
|
393
|
+
"table_name": params.table_name,
|
|
394
|
+
"results": results,
|
|
395
|
+
"count": len(results),
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
async def _execute_traverse(
|
|
399
|
+
self, params: TraverseParameters, tenant_id: str
|
|
400
|
+
) -> dict[str, Any]:
|
|
401
|
+
"""
|
|
402
|
+
Execute TRAVERSE query via rem_traverse() PostgreSQL function.
|
|
403
|
+
|
|
404
|
+
Delegates to: rem_traverse(entity_key, tenant_id, max_depth, rel_types, user_id)
|
|
405
|
+
|
|
406
|
+
Args:
|
|
407
|
+
params: TraverseParameters with start key and depth
|
|
408
|
+
tenant_id: Tenant identifier
|
|
409
|
+
|
|
410
|
+
Returns:
|
|
411
|
+
Dict with traversed entities and paths
|
|
412
|
+
"""
|
|
413
|
+
from .queries import TRAVERSE_QUERY, get_traverse_params
|
|
414
|
+
|
|
415
|
+
# Handle edge_types - PostgreSQL function takes single type, not array
|
|
416
|
+
# Use first type from list or None for all types
|
|
417
|
+
rel_type: str | None = None
|
|
418
|
+
if params.edge_types and "*" not in params.edge_types:
|
|
419
|
+
rel_type = params.edge_types[0] if params.edge_types else None
|
|
420
|
+
|
|
421
|
+
query_params = get_traverse_params(
|
|
422
|
+
start_key=params.initial_query,
|
|
423
|
+
tenant_id=tenant_id,
|
|
424
|
+
user_id=tenant_id, # Use tenant_id (query.user_id) as user_id
|
|
425
|
+
max_depth=params.max_depth or 1,
|
|
426
|
+
rel_type=rel_type,
|
|
427
|
+
keys_only=False,
|
|
428
|
+
)
|
|
429
|
+
results = await self.db.execute(TRAVERSE_QUERY, query_params)
|
|
430
|
+
|
|
431
|
+
return {
|
|
432
|
+
"query_type": "TRAVERSE",
|
|
433
|
+
"start_key": params.initial_query,
|
|
434
|
+
"max_depth": params.max_depth,
|
|
435
|
+
"edge_types": params.edge_types,
|
|
436
|
+
"results": results,
|
|
437
|
+
"count": len(results),
|
|
438
|
+
}
|
|
439
|
+
|
|
440
|
+
def _parse_query_string(self, query_string: str) -> tuple[QueryType, dict[str, Any]]:
|
|
441
|
+
"""
|
|
442
|
+
Parse REM query string using the robust RemQueryParser.
|
|
443
|
+
"""
|
|
444
|
+
parser = RemQueryParser()
|
|
445
|
+
return parser.parse(query_string)
|
|
446
|
+
|
|
447
|
+
async def ask_rem(
|
|
448
|
+
self, natural_query: str, tenant_id: str, llm_model: str | None = None, plan_mode: bool = False
|
|
449
|
+
) -> dict[str, Any]:
|
|
450
|
+
"""
|
|
451
|
+
Natural language to REM query conversion with optional execution.
|
|
452
|
+
|
|
453
|
+
Uses REM Query Agent (Cerebras Qwen) to convert user questions into REM query strings.
|
|
454
|
+
Auto-executes if confidence >= 0.7, otherwise returns query for review.
|
|
455
|
+
|
|
456
|
+
Args:
|
|
457
|
+
natural_query: Natural language question
|
|
458
|
+
tenant_id: Tenant identifier
|
|
459
|
+
llm_model: Optional LLM model override
|
|
460
|
+
plan_mode: If True, only shows generated query without executing
|
|
461
|
+
|
|
462
|
+
Returns:
|
|
463
|
+
Dict with:
|
|
464
|
+
- query: Generated REM query string (e.g., "LOOKUP sarah-chen")
|
|
465
|
+
- confidence: Confidence score (0.0-1.0)
|
|
466
|
+
- reasoning: Explanation (only if confidence < 0.7)
|
|
467
|
+
- results: Executed query results (if confidence >= 0.7 and not plan_mode)
|
|
468
|
+
- warning: Low confidence warning (if confidence < 0.7)
|
|
469
|
+
|
|
470
|
+
Example:
|
|
471
|
+
>>> result = await rem_service.ask_rem("Who is Sarah Chen?", tenant_id="acme")
|
|
472
|
+
>>> print(result["query"])
|
|
473
|
+
"LOOKUP sarah-chen"
|
|
474
|
+
>>> print(result["results"]["count"])
|
|
475
|
+
1
|
|
476
|
+
|
|
477
|
+
>>> # Plan mode - show query without executing
|
|
478
|
+
>>> result = await rem_service.ask_rem("Find Sarah", tenant_id="acme", plan_mode=True)
|
|
479
|
+
>>> print(result["query"])
|
|
480
|
+
"LOOKUP sarah"
|
|
481
|
+
>>> print("results" in result)
|
|
482
|
+
False
|
|
483
|
+
"""
|
|
484
|
+
from ...agentic.agents import ask_rem as agent_ask_rem
|
|
485
|
+
from ...models.core import RemQuery
|
|
486
|
+
|
|
487
|
+
# Get query string from REM Query Agent
|
|
488
|
+
query_output = await agent_ask_rem(
|
|
489
|
+
natural_query=natural_query,
|
|
490
|
+
llm_model=llm_model,
|
|
491
|
+
)
|
|
492
|
+
|
|
493
|
+
result = {
|
|
494
|
+
"query": query_output.query,
|
|
495
|
+
"confidence": query_output.confidence,
|
|
496
|
+
"reasoning": query_output.reasoning or "",
|
|
497
|
+
"natural_query": natural_query,
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
# Execute query if confidence is high enough and not in plan mode
|
|
501
|
+
if query_output.confidence >= 0.7 and not plan_mode:
|
|
502
|
+
try:
|
|
503
|
+
# Parse query string
|
|
504
|
+
query_type, parameters = self._parse_query_string(query_output.query)
|
|
505
|
+
|
|
506
|
+
# Create RemQuery and execute
|
|
507
|
+
# RemQuery takes user_id, which we treat as tenant_id
|
|
508
|
+
# Pydantic will validate and convert the dict to the correct parameter type
|
|
509
|
+
rem_query = RemQuery.model_validate({
|
|
510
|
+
"query_type": query_type,
|
|
511
|
+
"parameters": parameters,
|
|
512
|
+
"user_id": tenant_id,
|
|
513
|
+
})
|
|
514
|
+
|
|
515
|
+
result["results"] = await self.execute_query(rem_query)
|
|
516
|
+
|
|
517
|
+
except Exception as e:
|
|
518
|
+
result["warning"] = f"Failed to parse or execute query: {str(e)}"
|
|
519
|
+
logger.error(f"Query execution failed: {e}", exc_info=True)
|
|
520
|
+
|
|
521
|
+
elif plan_mode:
|
|
522
|
+
result["plan_mode"] = True
|
|
523
|
+
else:
|
|
524
|
+
# Low confidence - don't auto-execute
|
|
525
|
+
result["warning"] = "Low confidence score. Review reasoning before executing."
|
|
526
|
+
|
|
527
|
+
return result
|