remdb 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rem/__init__.py +2 -0
- rem/agentic/README.md +650 -0
- rem/agentic/__init__.py +39 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +8 -0
- rem/agentic/context.py +148 -0
- rem/agentic/context_builder.py +329 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +107 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +151 -0
- rem/agentic/providers/phoenix.py +674 -0
- rem/agentic/providers/pydantic_ai.py +572 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +396 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +231 -0
- rem/api/README.md +420 -0
- rem/api/main.py +324 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +536 -0
- rem/api/mcp_router/server.py +213 -0
- rem/api/mcp_router/tools.py +584 -0
- rem/api/routers/auth.py +229 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/completions.py +281 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +124 -0
- rem/api/routers/chat/streaming.py +185 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +26 -0
- rem/auth/middleware.py +100 -0
- rem/auth/providers/__init__.py +13 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +455 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +126 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +566 -0
- rem/cli/commands/configure.py +497 -0
- rem/cli/commands/db.py +493 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1302 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +245 -0
- rem/cli/commands/schema.py +183 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +96 -0
- rem/config.py +237 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +64 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +628 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +243 -0
- rem/models/entities/__init__.py +43 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +35 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +191 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/user.py +85 -0
- rem/py.typed +0 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +128 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +16 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +801 -0
- rem/services/content/service.py +676 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +336 -0
- rem/services/dreaming/moment_service.py +264 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +120 -0
- rem/services/embeddings/worker.py +421 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +686 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +575 -0
- rem/services/postgres/__init__.py +23 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
- rem/services/postgres/register_type.py +352 -0
- rem/services/postgres/repository.py +337 -0
- rem/services/postgres/schema_generator.py +379 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +354 -0
- rem/services/rem/README.md +304 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +145 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +527 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +6 -0
- rem/services/session/compression.py +360 -0
- rem/services/session/reload.py +77 -0
- rem/settings.py +1235 -0
- rem/sql/002_install_models.sql +1068 -0
- rem/sql/background_indexes.sql +42 -0
- rem/sql/install_models.sql +1038 -0
- rem/sql/migrations/001_install.sql +503 -0
- rem/sql/migrations/002_install_models.sql +1202 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +583 -0
- rem/utils/__init__.py +43 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +423 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/markdown.py +16 -0
- rem/utils/model_helpers.py +236 -0
- rem/utils/schema_loader.py +336 -0
- rem/utils/sql_types.py +348 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +330 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +5 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- remdb-0.3.7.dist-info/METADATA +1473 -0
- remdb-0.3.7.dist-info/RECORD +187 -0
- remdb-0.3.7.dist-info/WHEEL +4 -0
- remdb-0.3.7.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""
|
|
2
|
+
REM service exceptions.
|
|
3
|
+
|
|
4
|
+
Custom exceptions for REM query execution errors.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class REMException(Exception):
|
|
9
|
+
"""Base exception for REM service errors."""
|
|
10
|
+
|
|
11
|
+
pass
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class FieldNotFoundError(REMException):
|
|
15
|
+
"""Raised when a field does not exist in the model."""
|
|
16
|
+
|
|
17
|
+
def __init__(self, model_name: str, field_name: str, available_fields: list[str]):
|
|
18
|
+
self.model_name = model_name
|
|
19
|
+
self.field_name = field_name
|
|
20
|
+
self.available_fields = available_fields
|
|
21
|
+
super().__init__(
|
|
22
|
+
f"Field '{field_name}' not found in model '{model_name}'. "
|
|
23
|
+
f"Available fields: {', '.join(available_fields)}"
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class EmbeddingFieldNotFoundError(REMException):
|
|
28
|
+
"""Raised when trying to search on a field that has no embeddings."""
|
|
29
|
+
|
|
30
|
+
def __init__(self, model_name: str, field_name: str, embeddable_fields: list[str]):
|
|
31
|
+
self.model_name = model_name
|
|
32
|
+
self.field_name = field_name
|
|
33
|
+
self.embeddable_fields = embeddable_fields
|
|
34
|
+
msg = (
|
|
35
|
+
f"Field '{field_name}' in model '{model_name}' does not have embeddings. "
|
|
36
|
+
)
|
|
37
|
+
if embeddable_fields:
|
|
38
|
+
msg += f"Embeddable fields: {', '.join(embeddable_fields)}"
|
|
39
|
+
else:
|
|
40
|
+
msg += "No embeddable fields configured for this model."
|
|
41
|
+
super().__init__(msg)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class ContentFieldNotFoundError(REMException):
|
|
45
|
+
"""Raised when model has no 'content' field for default embedding search."""
|
|
46
|
+
|
|
47
|
+
def __init__(self, model_name: str, available_fields: list[str]):
|
|
48
|
+
self.model_name = model_name
|
|
49
|
+
self.available_fields = available_fields
|
|
50
|
+
super().__init__(
|
|
51
|
+
f"Model '{model_name}' has no 'content' field. "
|
|
52
|
+
f"Available fields: {', '.join(available_fields)}. "
|
|
53
|
+
f"Specify field_name explicitly in SearchParameters."
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class QueryExecutionError(REMException):
|
|
58
|
+
"""Raised when REM query execution fails."""
|
|
59
|
+
|
|
60
|
+
def __init__(self, query_type: str, message: str, original_error: Exception | None = None):
|
|
61
|
+
self.query_type = query_type
|
|
62
|
+
self.original_error = original_error
|
|
63
|
+
super().__init__(f"{query_type} query failed: {message}")
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class InvalidParametersError(REMException):
|
|
67
|
+
"""Raised when query parameters are invalid."""
|
|
68
|
+
|
|
69
|
+
def __init__(self, query_type: str, message: str):
|
|
70
|
+
self.query_type = query_type
|
|
71
|
+
super().__init__(f"Invalid {query_type} parameters: {message}")
|
|
@@ -0,0 +1,293 @@
|
|
|
1
|
+
"""
|
|
2
|
+
REM Query Executor - Shared PostgreSQL function calling layer.
|
|
3
|
+
|
|
4
|
+
This module provides the single source of truth for executing REM queries
|
|
5
|
+
against PostgreSQL functions (rem_lookup, rem_search, rem_fuzzy, rem_traverse).
|
|
6
|
+
|
|
7
|
+
Both REMQueryService (string-based) and RemService (Pydantic-based) delegate
|
|
8
|
+
to these functions to avoid code duplication.
|
|
9
|
+
|
|
10
|
+
Design:
|
|
11
|
+
- One function per query type
|
|
12
|
+
- All embedding generation happens here
|
|
13
|
+
- Direct PostgreSQL function calls
|
|
14
|
+
- Type-safe parameters via Pydantic models or dicts
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
import asyncio
|
|
18
|
+
from collections import defaultdict
|
|
19
|
+
from typing import Any, Optional, cast
|
|
20
|
+
from loguru import logger
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class REMQueryExecutor:
|
|
24
|
+
"""
|
|
25
|
+
Executor for REM PostgreSQL functions.
|
|
26
|
+
|
|
27
|
+
Provides unified backend for both string-based and Pydantic-based query services.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(self, postgres_service: Any):
|
|
31
|
+
"""
|
|
32
|
+
Initialize query executor.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
postgres_service: PostgresService instance
|
|
36
|
+
"""
|
|
37
|
+
self.db = postgres_service
|
|
38
|
+
logger.debug("Initialized REMQueryExecutor")
|
|
39
|
+
|
|
40
|
+
async def execute_lookup(
|
|
41
|
+
self,
|
|
42
|
+
entity_key: str,
|
|
43
|
+
user_id: str | None = None,
|
|
44
|
+
) -> list[dict[str, Any]]:
|
|
45
|
+
"""
|
|
46
|
+
Execute rem_lookup() PostgreSQL function.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
entity_key: Entity key to lookup
|
|
50
|
+
user_id: Optional user filter
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
List of entity dicts from KV_STORE
|
|
54
|
+
"""
|
|
55
|
+
sql = """
|
|
56
|
+
SELECT entity_type, data
|
|
57
|
+
FROM rem_lookup($1, $2, $3)
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
results = await self.db.execute(sql, (entity_key, user_id, user_id))
|
|
61
|
+
# Extract JSONB records from the data column and add aliases
|
|
62
|
+
entities = []
|
|
63
|
+
for row in results:
|
|
64
|
+
entity = dict(row["data"])
|
|
65
|
+
# Add entity_key as alias for name (for backward compat with tests)
|
|
66
|
+
if "name" in entity:
|
|
67
|
+
entity["entity_key"] = entity["name"]
|
|
68
|
+
# Add entity_id as alias for id (for backward compat with tests)
|
|
69
|
+
if "id" in entity:
|
|
70
|
+
entity["entity_id"] = entity["id"]
|
|
71
|
+
entities.append(entity)
|
|
72
|
+
logger.debug(f"LOOKUP '{entity_key}': {len(entities)} results")
|
|
73
|
+
return entities
|
|
74
|
+
|
|
75
|
+
async def execute_fetch(
|
|
76
|
+
self,
|
|
77
|
+
entity_keys: list[str],
|
|
78
|
+
entity_types: list[str],
|
|
79
|
+
user_id: str | None = None,
|
|
80
|
+
) -> list[dict[str, Any]]:
|
|
81
|
+
"""
|
|
82
|
+
Execute rem_fetch() PostgreSQL function.
|
|
83
|
+
|
|
84
|
+
Fetches full entity records (all columns) from multiple tables by entity keys.
|
|
85
|
+
Groups by table internally, fetches all records, returns unified JSONB result set.
|
|
86
|
+
Returns complete entities, not just KV store metadata.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
entity_keys: List of entity keys to fetch
|
|
90
|
+
entity_types: Parallel list of entity types (table names)
|
|
91
|
+
user_id: Optional user filter
|
|
92
|
+
|
|
93
|
+
Returns:
|
|
94
|
+
List of full entity records as dicts with entity_key, entity_type, and entity_record
|
|
95
|
+
"""
|
|
96
|
+
if not entity_keys:
|
|
97
|
+
return []
|
|
98
|
+
|
|
99
|
+
# Build JSONB structure: {"resources": ["key1", "key2"], "moments": ["key3"]}
|
|
100
|
+
import json
|
|
101
|
+
entities_by_table: dict[str, list[str]] = {}
|
|
102
|
+
for key, table in zip(entity_keys, entity_types):
|
|
103
|
+
if table not in entities_by_table:
|
|
104
|
+
entities_by_table[table] = []
|
|
105
|
+
entities_by_table[table].append(key)
|
|
106
|
+
|
|
107
|
+
entities_json = json.dumps(entities_by_table)
|
|
108
|
+
|
|
109
|
+
sql = """
|
|
110
|
+
SELECT entity_key, entity_type, entity_record
|
|
111
|
+
FROM rem_fetch($1::jsonb, $2)
|
|
112
|
+
"""
|
|
113
|
+
|
|
114
|
+
results = await self.db.execute(sql, (entities_json, user_id))
|
|
115
|
+
|
|
116
|
+
logger.debug(
|
|
117
|
+
f"FETCH: {len(results)}/{len(entity_keys)} records fetched from {len(set(entity_types))} tables"
|
|
118
|
+
)
|
|
119
|
+
return cast(list[dict[str, Any]], results)
|
|
120
|
+
|
|
121
|
+
async def execute_fuzzy(
|
|
122
|
+
self,
|
|
123
|
+
query_text: str,
|
|
124
|
+
user_id: str | None = None,
|
|
125
|
+
threshold: float = 0.3,
|
|
126
|
+
limit: int = 10,
|
|
127
|
+
) -> list[dict[str, Any]]:
|
|
128
|
+
"""
|
|
129
|
+
Execute rem_fuzzy() PostgreSQL function.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
query_text: Text to fuzzy match
|
|
133
|
+
user_id: Optional user filter
|
|
134
|
+
threshold: Similarity threshold (0.0-1.0)
|
|
135
|
+
limit: Max results
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
List of fuzzy-matched entities with similarity_score
|
|
139
|
+
"""
|
|
140
|
+
sql = """
|
|
141
|
+
SELECT entity_type, data, similarity_score
|
|
142
|
+
FROM rem_fuzzy($1, $2, $3, $4, $5)
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
results = await self.db.execute(
|
|
146
|
+
sql, (query_text, user_id, threshold, limit, user_id)
|
|
147
|
+
)
|
|
148
|
+
# Extract JSONB records and add similarity_score + entity_key alias
|
|
149
|
+
entities = []
|
|
150
|
+
for row in results:
|
|
151
|
+
entity = dict(row["data"])
|
|
152
|
+
entity["similarity_score"] = row["similarity_score"]
|
|
153
|
+
# Add entity_key as alias for name (for backward compat)
|
|
154
|
+
if "name" in entity:
|
|
155
|
+
entity["entity_key"] = entity["name"]
|
|
156
|
+
entities.append(entity)
|
|
157
|
+
logger.debug(f"FUZZY '{query_text}': {len(entities)} results (threshold={threshold})")
|
|
158
|
+
return entities
|
|
159
|
+
|
|
160
|
+
async def execute_search(
|
|
161
|
+
self,
|
|
162
|
+
query_embedding: list[float],
|
|
163
|
+
table_name: str,
|
|
164
|
+
field_name: str,
|
|
165
|
+
provider: str,
|
|
166
|
+
min_similarity: float = 0.7,
|
|
167
|
+
limit: int = 10,
|
|
168
|
+
user_id: str | None = None,
|
|
169
|
+
) -> list[dict[str, Any]]:
|
|
170
|
+
"""
|
|
171
|
+
Execute rem_search() PostgreSQL function.
|
|
172
|
+
|
|
173
|
+
Args:
|
|
174
|
+
query_embedding: Embedding vector for query
|
|
175
|
+
table_name: Table to search (resources, moments, users)
|
|
176
|
+
field_name: Field name to search
|
|
177
|
+
provider: Embedding provider (openai, anthropic)
|
|
178
|
+
min_similarity: Minimum cosine similarity
|
|
179
|
+
limit: Max results
|
|
180
|
+
user_id: Optional user filter
|
|
181
|
+
|
|
182
|
+
Returns:
|
|
183
|
+
List of similar entities with distance scores
|
|
184
|
+
"""
|
|
185
|
+
# Convert embedding to PostgreSQL vector format
|
|
186
|
+
embedding_str = "[" + ",".join(str(x) for x in query_embedding) + "]"
|
|
187
|
+
|
|
188
|
+
sql = """
|
|
189
|
+
SELECT entity_type, similarity_score, data
|
|
190
|
+
FROM rem_search($1::vector(1536), $2, $3, $4, $5, $6, $7, $8)
|
|
191
|
+
"""
|
|
192
|
+
|
|
193
|
+
results = await self.db.execute(
|
|
194
|
+
sql,
|
|
195
|
+
(
|
|
196
|
+
embedding_str,
|
|
197
|
+
table_name,
|
|
198
|
+
field_name,
|
|
199
|
+
user_id, # tenant_id (backward compat)
|
|
200
|
+
provider,
|
|
201
|
+
min_similarity,
|
|
202
|
+
limit,
|
|
203
|
+
user_id, # user_id
|
|
204
|
+
),
|
|
205
|
+
)
|
|
206
|
+
# Extract JSONB records and add similarity_score + entity_key alias
|
|
207
|
+
entities = []
|
|
208
|
+
for row in results:
|
|
209
|
+
entity = dict(row["data"])
|
|
210
|
+
entity["similarity_score"] = row["similarity_score"]
|
|
211
|
+
entity["entity_type"] = row["entity_type"]
|
|
212
|
+
# Add entity_key as alias for name (for backward compat)
|
|
213
|
+
if "name" in entity:
|
|
214
|
+
entity["entity_key"] = entity["name"]
|
|
215
|
+
# Add distance as alias for similarity_score (for backward compat)
|
|
216
|
+
# Note: similarity_score is cosine similarity (higher = more similar)
|
|
217
|
+
# distance is inverse (lower = more similar), so: distance = 1 - similarity_score
|
|
218
|
+
entity["distance"] = 1.0 - row["similarity_score"]
|
|
219
|
+
entities.append(entity)
|
|
220
|
+
logger.debug(
|
|
221
|
+
f"SEARCH in {table_name}.{field_name}: {len(entities)} results (similarity≥{min_similarity})"
|
|
222
|
+
)
|
|
223
|
+
return entities
|
|
224
|
+
|
|
225
|
+
async def execute_traverse(
|
|
226
|
+
self,
|
|
227
|
+
start_key: str,
|
|
228
|
+
direction: str,
|
|
229
|
+
max_depth: int,
|
|
230
|
+
edge_types: list[str] | None,
|
|
231
|
+
user_id: str | None = None,
|
|
232
|
+
) -> list[dict[str, Any]]:
|
|
233
|
+
"""
|
|
234
|
+
Execute rem_traverse() PostgreSQL function.
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
start_key: Starting entity key
|
|
238
|
+
direction: OUTBOUND, INBOUND, or BOTH (not used in current function)
|
|
239
|
+
max_depth: Maximum traversal depth
|
|
240
|
+
edge_types: Optional list of edge types to filter
|
|
241
|
+
user_id: Optional user filter
|
|
242
|
+
|
|
243
|
+
Returns:
|
|
244
|
+
List of traversed entities with path information
|
|
245
|
+
"""
|
|
246
|
+
# Convert edge_types to PostgreSQL array or NULL
|
|
247
|
+
edge_types_sql = None
|
|
248
|
+
if edge_types:
|
|
249
|
+
edge_types_sql = "{" + ",".join(edge_types) + "}"
|
|
250
|
+
|
|
251
|
+
# Note: rem_traverse signature is (entity_key, tenant_id, user_id, max_depth, rel_type, keys_only)
|
|
252
|
+
# tenant_id is for backward compat, set to user_id
|
|
253
|
+
# direction parameter is not used by the current PostgreSQL function
|
|
254
|
+
# edge_types is single value, not array
|
|
255
|
+
edge_type_filter = edge_types[0] if edge_types else None
|
|
256
|
+
|
|
257
|
+
sql = """
|
|
258
|
+
SELECT depth, entity_key, entity_type, entity_id, rel_type, rel_weight, path, entity_record
|
|
259
|
+
FROM rem_traverse($1, $2, $3, $4, $5, $6)
|
|
260
|
+
"""
|
|
261
|
+
|
|
262
|
+
results = await self.db.execute(
|
|
263
|
+
sql, (start_key, user_id, user_id, max_depth, edge_type_filter, False)
|
|
264
|
+
)
|
|
265
|
+
# Add edge_type alias for rel_type (backward compat)
|
|
266
|
+
processed_results = []
|
|
267
|
+
for row in results:
|
|
268
|
+
result = dict(row)
|
|
269
|
+
if "rel_type" in result:
|
|
270
|
+
result["edge_type"] = result["rel_type"]
|
|
271
|
+
processed_results.append(result)
|
|
272
|
+
|
|
273
|
+
logger.debug(
|
|
274
|
+
f"TRAVERSE from '{start_key}' (depth={max_depth}): {len(processed_results)} results"
|
|
275
|
+
)
|
|
276
|
+
return processed_results
|
|
277
|
+
|
|
278
|
+
async def execute_sql(
|
|
279
|
+
self,
|
|
280
|
+
query: str,
|
|
281
|
+
) -> list[dict[str, Any]]:
|
|
282
|
+
"""
|
|
283
|
+
Execute raw SQL query.
|
|
284
|
+
|
|
285
|
+
Args:
|
|
286
|
+
query: SQL query string
|
|
287
|
+
|
|
288
|
+
Returns:
|
|
289
|
+
Query results as list of dicts
|
|
290
|
+
"""
|
|
291
|
+
results = await self.db.execute(query)
|
|
292
|
+
logger.debug(f"SQL query: {len(results)} results")
|
|
293
|
+
return cast(list[dict[str, Any]], results)
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import shlex
|
|
2
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
3
|
+
|
|
4
|
+
from ...models.core import QueryType
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class RemQueryParser:
|
|
8
|
+
"""
|
|
9
|
+
Robust parser for REM query language using shlex for proper quoting support.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
def parse(self, query_string: str) -> Tuple[QueryType, Dict[str, Any]]:
|
|
13
|
+
"""
|
|
14
|
+
Parse a REM query string into a QueryType and a dictionary of parameters.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
query_string: The raw query string (e.g., 'LOOKUP "Sarah Chen"').
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
Tuple of (QueryType, parameters_dict).
|
|
21
|
+
|
|
22
|
+
Raises:
|
|
23
|
+
ValueError: If the query string is empty or has an invalid query type.
|
|
24
|
+
"""
|
|
25
|
+
if not query_string or not query_string.strip():
|
|
26
|
+
raise ValueError("Empty query string")
|
|
27
|
+
|
|
28
|
+
try:
|
|
29
|
+
# Use shlex to handle quoted strings correctly
|
|
30
|
+
tokens = shlex.split(query_string)
|
|
31
|
+
except ValueError as e:
|
|
32
|
+
raise ValueError(f"Failed to parse query string: {e}")
|
|
33
|
+
|
|
34
|
+
if not tokens:
|
|
35
|
+
raise ValueError("Empty query string")
|
|
36
|
+
|
|
37
|
+
query_type_str = tokens[0].upper()
|
|
38
|
+
|
|
39
|
+
# Try to match REM query types first
|
|
40
|
+
try:
|
|
41
|
+
query_type = QueryType(query_type_str)
|
|
42
|
+
except ValueError:
|
|
43
|
+
# If not a known REM query type, treat as raw SQL
|
|
44
|
+
# This supports SELECT, INSERT, UPDATE, DELETE, WITH, DROP, CREATE, ALTER, etc.
|
|
45
|
+
query_type = QueryType.SQL
|
|
46
|
+
# Return raw SQL query directly in params
|
|
47
|
+
params = {"raw_query": query_string.strip()}
|
|
48
|
+
return query_type, params
|
|
49
|
+
|
|
50
|
+
params: Dict[str, Any] = {}
|
|
51
|
+
positional_args: List[str] = []
|
|
52
|
+
|
|
53
|
+
# Process remaining tokens
|
|
54
|
+
for token in tokens[1:]:
|
|
55
|
+
if "=" in token:
|
|
56
|
+
# It's a keyword argument
|
|
57
|
+
key, value = token.split("=", 1)
|
|
58
|
+
# Handle parameter aliases
|
|
59
|
+
mapped_key = self._map_parameter_alias(key)
|
|
60
|
+
params[mapped_key] = self._convert_value(mapped_key, value)
|
|
61
|
+
else:
|
|
62
|
+
# It's a positional argument part
|
|
63
|
+
positional_args.append(token)
|
|
64
|
+
|
|
65
|
+
# Map positional arguments to specific fields based on QueryType
|
|
66
|
+
self._map_positional_args(query_type, positional_args, params)
|
|
67
|
+
|
|
68
|
+
return query_type, params
|
|
69
|
+
|
|
70
|
+
def _map_parameter_alias(self, key: str) -> str:
|
|
71
|
+
"""
|
|
72
|
+
Map common aliases to internal model field names.
|
|
73
|
+
"""
|
|
74
|
+
aliases = {
|
|
75
|
+
"table": "table_name",
|
|
76
|
+
"field": "field_name",
|
|
77
|
+
"where": "where_clause",
|
|
78
|
+
"depth": "max_depth",
|
|
79
|
+
"rel_type": "edge_types",
|
|
80
|
+
"rel_types": "edge_types",
|
|
81
|
+
}
|
|
82
|
+
return aliases.get(key, key)
|
|
83
|
+
|
|
84
|
+
def _convert_value(self, key: str, value: str) -> Union[str, int, float, List[str]]:
|
|
85
|
+
"""
|
|
86
|
+
Convert string values to appropriate types based on the key name.
|
|
87
|
+
"""
|
|
88
|
+
# Integer fields
|
|
89
|
+
if key in ("limit", "max_depth", "depth", "limit"):
|
|
90
|
+
try:
|
|
91
|
+
return int(value)
|
|
92
|
+
except ValueError:
|
|
93
|
+
return value # Return as string if conversion fails (validation will catch it)
|
|
94
|
+
|
|
95
|
+
# Float fields
|
|
96
|
+
if key in ("threshold", "min_similarity"):
|
|
97
|
+
try:
|
|
98
|
+
return float(value)
|
|
99
|
+
except ValueError:
|
|
100
|
+
return value
|
|
101
|
+
|
|
102
|
+
# List fields (comma-separated)
|
|
103
|
+
if key in ("edge_types", "tags"):
|
|
104
|
+
return [v.strip() for v in value.split(",")]
|
|
105
|
+
|
|
106
|
+
# Default to string
|
|
107
|
+
return value
|
|
108
|
+
|
|
109
|
+
def _map_positional_args(
|
|
110
|
+
self, query_type: QueryType, positional_args: List[str], params: Dict[str, Any]
|
|
111
|
+
) -> None:
|
|
112
|
+
"""
|
|
113
|
+
Map accumulated positional arguments to the primary field for the query type.
|
|
114
|
+
"""
|
|
115
|
+
if not positional_args:
|
|
116
|
+
return
|
|
117
|
+
|
|
118
|
+
# Join positional args with space to reconstruct the text
|
|
119
|
+
# This handles cases where the user didn't quote a multi-word string
|
|
120
|
+
# e.g. FUZZY Sarah Chen -> "Sarah Chen"
|
|
121
|
+
combined_value = " ".join(positional_args)
|
|
122
|
+
|
|
123
|
+
if query_type == QueryType.LOOKUP:
|
|
124
|
+
# LOOKUP supports list of keys, but as positional arg we treat as single key or comma-separated
|
|
125
|
+
# If the user provided "key1 key2", it might be interpreted as one key "key1 key2"
|
|
126
|
+
# or multiple keys. For now, let's assume it's a single key entity name unless it has commas.
|
|
127
|
+
if "," in combined_value:
|
|
128
|
+
params["key"] = [k.strip() for k in combined_value.split(",")]
|
|
129
|
+
else:
|
|
130
|
+
params["key"] = combined_value
|
|
131
|
+
|
|
132
|
+
elif query_type == QueryType.FUZZY:
|
|
133
|
+
params["query_text"] = combined_value
|
|
134
|
+
|
|
135
|
+
elif query_type == QueryType.SEARCH:
|
|
136
|
+
params["query_text"] = combined_value
|
|
137
|
+
|
|
138
|
+
elif query_type == QueryType.TRAVERSE:
|
|
139
|
+
params["initial_query"] = combined_value
|
|
140
|
+
|
|
141
|
+
# SQL typically requires named arguments (table=...), but if we supported
|
|
142
|
+
# SQL SELECT * FROM ..., we might handle it differently.
|
|
143
|
+
# For now, RemService expects table=...
|
|
144
|
+
# If there are positional args for SQL, we might ignore or raise,
|
|
145
|
+
# but current service doesn't use them.
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
"""
|
|
2
|
+
REM Query SQL Templates.
|
|
3
|
+
|
|
4
|
+
All SQL queries for REM operations are defined here with proper parameterization.
|
|
5
|
+
This separates query logic from business logic and makes queries easier to maintain.
|
|
6
|
+
|
|
7
|
+
Design Pattern:
|
|
8
|
+
- Each query is a named constant with $1, $2, etc. placeholders
|
|
9
|
+
- Query parameters are documented in docstrings
|
|
10
|
+
- Queries delegate to PostgreSQL functions for performance
|
|
11
|
+
- All queries include tenant isolation
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
# LOOKUP Query
|
|
15
|
+
# Delegates to rem_lookup() PostgreSQL function
|
|
16
|
+
# Returns raw JSONB data for LLM consumption
|
|
17
|
+
LOOKUP_QUERY = """
|
|
18
|
+
SELECT
|
|
19
|
+
entity_type,
|
|
20
|
+
data
|
|
21
|
+
FROM rem_lookup($1, $2, $3)
|
|
22
|
+
"""
|
|
23
|
+
# Parameters:
|
|
24
|
+
# $1: entity_key (str)
|
|
25
|
+
# $2: tenant_id (str)
|
|
26
|
+
# $3: user_id (str | None)
|
|
27
|
+
# Returns:
|
|
28
|
+
# - entity_type: Table name (e.g., "resources", "users")
|
|
29
|
+
# - data: Complete entity record as JSONB
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# FUZZY Query
|
|
33
|
+
# Delegates to rem_fuzzy() PostgreSQL function
|
|
34
|
+
# Returns raw JSONB data with similarity scores
|
|
35
|
+
FUZZY_QUERY = """
|
|
36
|
+
SELECT
|
|
37
|
+
entity_type,
|
|
38
|
+
similarity_score,
|
|
39
|
+
data
|
|
40
|
+
FROM rem_fuzzy($1, $2, $3, $4, $5)
|
|
41
|
+
"""
|
|
42
|
+
# Parameters:
|
|
43
|
+
# $1: query_text (str)
|
|
44
|
+
# $2: tenant_id (str)
|
|
45
|
+
# $3: threshold (float)
|
|
46
|
+
# $4: limit (int)
|
|
47
|
+
# $5: user_id (str | None)
|
|
48
|
+
# Returns:
|
|
49
|
+
# - entity_type: Table name (e.g., "resources", "files")
|
|
50
|
+
# - similarity_score: Fuzzy match score (0.0-1.0)
|
|
51
|
+
# - data: Complete entity record as JSONB
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
# SEARCH Query
|
|
55
|
+
# Delegates to rem_search() PostgreSQL function
|
|
56
|
+
# Returns raw JSONB data with similarity scores
|
|
57
|
+
SEARCH_QUERY = """
|
|
58
|
+
SELECT
|
|
59
|
+
entity_type,
|
|
60
|
+
similarity_score,
|
|
61
|
+
data
|
|
62
|
+
FROM rem_search($1, $2, $3, $4, $5, $6, $7, $8)
|
|
63
|
+
"""
|
|
64
|
+
# Parameters:
|
|
65
|
+
# $1: query_embedding (list[float])
|
|
66
|
+
# $2: table_name (str)
|
|
67
|
+
# $3: field_name (str)
|
|
68
|
+
# $4: tenant_id (str)
|
|
69
|
+
# $5: provider (str)
|
|
70
|
+
# $6: min_similarity (float)
|
|
71
|
+
# $7: limit (int)
|
|
72
|
+
# $8: user_id (str | None)
|
|
73
|
+
# Returns:
|
|
74
|
+
# - entity_type: Table name (e.g., "resources", "moments")
|
|
75
|
+
# - similarity_score: Vector similarity (0.0-1.0)
|
|
76
|
+
# - data: Complete entity record as JSONB
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
# TRAVERSE Query
|
|
80
|
+
# Delegates to rem_traverse() PostgreSQL function
|
|
81
|
+
TRAVERSE_QUERY = """
|
|
82
|
+
SELECT
|
|
83
|
+
depth,
|
|
84
|
+
entity_key,
|
|
85
|
+
entity_type,
|
|
86
|
+
entity_id,
|
|
87
|
+
rel_type,
|
|
88
|
+
rel_weight,
|
|
89
|
+
path
|
|
90
|
+
FROM rem_traverse($1, $2, $3, $4, $5, $6)
|
|
91
|
+
"""
|
|
92
|
+
# Parameters:
|
|
93
|
+
# $1: start_key (str)
|
|
94
|
+
# $2: tenant_id (str)
|
|
95
|
+
# $3: user_id (str | None)
|
|
96
|
+
# $4: max_depth (int)
|
|
97
|
+
# $5: rel_type (str | None) - single type, not array
|
|
98
|
+
# $6: keys_only (bool)
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
# SQL Query Builder
|
|
102
|
+
# Direct SQL queries with tenant isolation
|
|
103
|
+
def build_sql_query(table_name: str, where_clause: str, tenant_id: str, limit: int | None = None) -> str:
|
|
104
|
+
"""
|
|
105
|
+
Build SQL query with tenant isolation.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
table_name: Table name (e.g., "resources", "moments")
|
|
109
|
+
where_clause: WHERE clause (e.g., "moment_type='meeting'")
|
|
110
|
+
tenant_id: Tenant identifier for isolation
|
|
111
|
+
limit: Optional result limit
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
Parameterized SQL query string
|
|
115
|
+
|
|
116
|
+
Note:
|
|
117
|
+
This builds a dynamic query. Consider using prepared statements
|
|
118
|
+
or query builders like SQLAlchemy for production.
|
|
119
|
+
"""
|
|
120
|
+
# Sanitize table name (basic validation)
|
|
121
|
+
allowed_tables = ["resources", "moments", "messages", "users", "files"]
|
|
122
|
+
if table_name not in allowed_tables:
|
|
123
|
+
raise ValueError(f"Invalid table name: {table_name}")
|
|
124
|
+
|
|
125
|
+
# Build query with tenant isolation
|
|
126
|
+
where_clause = where_clause or "1=1"
|
|
127
|
+
query = f"SELECT * FROM {table_name} WHERE tenant_id = $1 AND ({where_clause})"
|
|
128
|
+
|
|
129
|
+
if limit:
|
|
130
|
+
query += f" LIMIT {int(limit)}"
|
|
131
|
+
|
|
132
|
+
return query
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
# Helper: Get query parameters for LOOKUP
|
|
136
|
+
def get_lookup_params(entity_key: str, tenant_id: str, user_id: str | None = None) -> tuple:
|
|
137
|
+
"""Get parameters for LOOKUP query."""
|
|
138
|
+
return (entity_key, tenant_id, user_id)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
# Helper: Get query parameters for FUZZY
|
|
142
|
+
def get_fuzzy_params(
|
|
143
|
+
query_text: str,
|
|
144
|
+
tenant_id: str,
|
|
145
|
+
threshold: float = 0.7,
|
|
146
|
+
limit: int = 10,
|
|
147
|
+
user_id: str | None = None,
|
|
148
|
+
) -> tuple:
|
|
149
|
+
"""Get parameters for FUZZY query."""
|
|
150
|
+
return (query_text, tenant_id, threshold, limit, user_id)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
# Helper: Get query parameters for SEARCH
|
|
154
|
+
def get_search_params(
|
|
155
|
+
query_embedding: list[float],
|
|
156
|
+
table_name: str,
|
|
157
|
+
field_name: str,
|
|
158
|
+
tenant_id: str,
|
|
159
|
+
provider: str,
|
|
160
|
+
min_similarity: float = 0.7,
|
|
161
|
+
limit: int = 10,
|
|
162
|
+
user_id: str | None = None,
|
|
163
|
+
) -> tuple:
|
|
164
|
+
"""
|
|
165
|
+
Get parameters for SEARCH query.
|
|
166
|
+
|
|
167
|
+
Note: provider parameter is required (no default) - should come from settings.
|
|
168
|
+
"""
|
|
169
|
+
return (
|
|
170
|
+
str(query_embedding),
|
|
171
|
+
table_name,
|
|
172
|
+
field_name,
|
|
173
|
+
tenant_id,
|
|
174
|
+
provider,
|
|
175
|
+
min_similarity,
|
|
176
|
+
limit,
|
|
177
|
+
user_id,
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
# Helper: Get query parameters for TRAVERSE
|
|
182
|
+
def get_traverse_params(
|
|
183
|
+
start_key: str,
|
|
184
|
+
tenant_id: str,
|
|
185
|
+
user_id: str | None,
|
|
186
|
+
max_depth: int = 1,
|
|
187
|
+
rel_type: str | None = None,
|
|
188
|
+
keys_only: bool = False,
|
|
189
|
+
) -> tuple:
|
|
190
|
+
"""
|
|
191
|
+
Get parameters for TRAVERSE query.
|
|
192
|
+
|
|
193
|
+
Note: rel_type is singular (not array) - PostgreSQL function filters by single type.
|
|
194
|
+
If you need multiple types, call traverse multiple times or update the function.
|
|
195
|
+
"""
|
|
196
|
+
return (start_key, tenant_id, user_id, max_depth, rel_type, keys_only)
|