remdb 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +2 -0
- rem/agentic/README.md +650 -0
- rem/agentic/__init__.py +39 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +8 -0
- rem/agentic/context.py +148 -0
- rem/agentic/context_builder.py +329 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +107 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +151 -0
- rem/agentic/providers/phoenix.py +674 -0
- rem/agentic/providers/pydantic_ai.py +572 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +396 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +231 -0
- rem/api/README.md +420 -0
- rem/api/main.py +324 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +536 -0
- rem/api/mcp_router/server.py +213 -0
- rem/api/mcp_router/tools.py +584 -0
- rem/api/routers/auth.py +229 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/completions.py +281 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +124 -0
- rem/api/routers/chat/streaming.py +185 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +26 -0
- rem/auth/middleware.py +100 -0
- rem/auth/providers/__init__.py +13 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +455 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +126 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +565 -0
- rem/cli/commands/configure.py +423 -0
- rem/cli/commands/db.py +493 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1124 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +245 -0
- rem/cli/commands/schema.py +183 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +88 -0
- rem/config.py +237 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +64 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +628 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +243 -0
- rem/models/entities/__init__.py +43 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +35 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +191 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/user.py +85 -0
- rem/py.typed +0 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +128 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +16 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +806 -0
- rem/services/content/service.py +657 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +336 -0
- rem/services/dreaming/moment_service.py +264 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +120 -0
- rem/services/embeddings/worker.py +421 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +686 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +575 -0
- rem/services/postgres/__init__.py +23 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
- rem/services/postgres/register_type.py +352 -0
- rem/services/postgres/repository.py +337 -0
- rem/services/postgres/schema_generator.py +379 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +354 -0
- rem/services/rem/README.md +304 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +145 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +527 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +6 -0
- rem/services/session/compression.py +360 -0
- rem/services/session/reload.py +77 -0
- rem/settings.py +1235 -0
- rem/sql/002_install_models.sql +1068 -0
- rem/sql/background_indexes.sql +42 -0
- rem/sql/install_models.sql +1038 -0
- rem/sql/migrations/001_install.sql +503 -0
- rem/sql/migrations/002_install_models.sql +1202 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +583 -0
- rem/utils/__init__.py +43 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +423 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/markdown.py +16 -0
- rem/utils/model_helpers.py +236 -0
- rem/utils/schema_loader.py +229 -0
- rem/utils/sql_types.py +348 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +330 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +5 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- remdb-0.2.6.dist-info/METADATA +1191 -0
- remdb-0.2.6.dist-info/RECORD +187 -0
- remdb-0.2.6.dist-info/WHEEL +4 -0
- remdb-0.2.6.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,352 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Dynamic table and embeddings schema generator from Pydantic models.
|
|
3
|
+
|
|
4
|
+
Generates:
|
|
5
|
+
1. Primary table for entity storage
|
|
6
|
+
2. embeddings_<table> for vector embeddings (one row per field per provider)
|
|
7
|
+
3. Registers entity in KV_STORE cache
|
|
8
|
+
4. Background index creation for performance
|
|
9
|
+
|
|
10
|
+
Design Patterns:
|
|
11
|
+
- Fields marked with json_schema_extra={\"embed\": True} get embeddings
|
|
12
|
+
- Content fields (TextField, description, etc.) embed by default
|
|
13
|
+
- Multiple embedding providers supported (OpenAI, Cohere, etc.)
|
|
14
|
+
- UNLOGGED KV_STORE for O(1) lookups
|
|
15
|
+
- Background index creation to avoid blocking writes
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from typing import Any, Type
|
|
19
|
+
|
|
20
|
+
from loguru import logger
|
|
21
|
+
from pydantic import BaseModel
|
|
22
|
+
|
|
23
|
+
from ...utils.sql_types import get_column_definition
|
|
24
|
+
|
|
25
|
+
# Embedding configuration
|
|
26
|
+
DEFAULT_EMBEDDING_PROVIDER = "openai"
|
|
27
|
+
DEFAULT_EMBEDDING_MODEL = "text-embedding-3-small"
|
|
28
|
+
DEFAULT_EMBEDDING_DIMENSIONS = 1536
|
|
29
|
+
|
|
30
|
+
# Fields that embed by default (if not explicitly disabled)
|
|
31
|
+
DEFAULT_EMBED_FIELD_NAMES = {
|
|
32
|
+
"content",
|
|
33
|
+
"description",
|
|
34
|
+
"summary",
|
|
35
|
+
"text",
|
|
36
|
+
"body",
|
|
37
|
+
"message",
|
|
38
|
+
"notes",
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def should_embed_field(field_name: str, field_info: Any) -> bool:
|
|
43
|
+
"""
|
|
44
|
+
Determine if a field should have embeddings generated.
|
|
45
|
+
|
|
46
|
+
Rules:
|
|
47
|
+
1. If json_schema_extra.embed = True, always embed
|
|
48
|
+
2. If json_schema_extra.embed = False, never embed
|
|
49
|
+
3. If field name in DEFAULT_EMBED_FIELD_NAMES, embed by default
|
|
50
|
+
4. Otherwise, don't embed
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
field_name: Field name from Pydantic model
|
|
54
|
+
field_info: Field metadata from model.model_fields
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
True if field should have embeddings
|
|
58
|
+
"""
|
|
59
|
+
# Check json_schema_extra for explicit embed configuration
|
|
60
|
+
json_extra = getattr(field_info, "json_schema_extra", None)
|
|
61
|
+
if json_extra:
|
|
62
|
+
if isinstance(json_extra, dict):
|
|
63
|
+
embed = json_extra.get("embed")
|
|
64
|
+
if embed is not None:
|
|
65
|
+
return bool(embed)
|
|
66
|
+
|
|
67
|
+
# Default: embed if field name matches common content fields
|
|
68
|
+
return field_name.lower() in DEFAULT_EMBED_FIELD_NAMES
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def generate_table_schema(
|
|
72
|
+
model: Type[BaseModel], table_name: str, tenant_scoped: bool = True
|
|
73
|
+
) -> str:
|
|
74
|
+
"""
|
|
75
|
+
Generate CREATE TABLE SQL for Pydantic model.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
model: Pydantic model class
|
|
79
|
+
table_name: Table name (e.g., "resources", "moments")
|
|
80
|
+
tenant_scoped: If True, add tenant_id column and indexes
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
SQL CREATE TABLE statement
|
|
84
|
+
"""
|
|
85
|
+
columns = []
|
|
86
|
+
indexes = []
|
|
87
|
+
|
|
88
|
+
# System fields that we add separately (skip if in model)
|
|
89
|
+
SYSTEM_FIELDS = {
|
|
90
|
+
"id", "created_at", "updated_at", "deleted_at",
|
|
91
|
+
"tenant_id", "user_id", "graph_edges", "metadata", "tags", "column"
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
# Always add id as primary key
|
|
95
|
+
columns.append("id UUID PRIMARY KEY DEFAULT uuid_generate_v4()")
|
|
96
|
+
|
|
97
|
+
# Add tenant_id if tenant scoped
|
|
98
|
+
if tenant_scoped:
|
|
99
|
+
columns.append("tenant_id VARCHAR(100) NOT NULL")
|
|
100
|
+
indexes.append(f"CREATE INDEX idx_{table_name}_tenant ON {table_name} (tenant_id);")
|
|
101
|
+
|
|
102
|
+
# Add user_id (owner field)
|
|
103
|
+
columns.append("user_id VARCHAR(256)")
|
|
104
|
+
indexes.append(f"CREATE INDEX idx_{table_name}_user ON {table_name} (user_id);")
|
|
105
|
+
|
|
106
|
+
# Process Pydantic fields (skip system fields)
|
|
107
|
+
for field_name, field_info in model.model_fields.items():
|
|
108
|
+
if field_name in SYSTEM_FIELDS:
|
|
109
|
+
continue # Skip system fields - we add them separately
|
|
110
|
+
|
|
111
|
+
# Use sql_types utility for consistent type mapping
|
|
112
|
+
column_def = get_column_definition(
|
|
113
|
+
field_info,
|
|
114
|
+
field_name,
|
|
115
|
+
nullable=not field_info.is_required(),
|
|
116
|
+
primary_key=False
|
|
117
|
+
)
|
|
118
|
+
columns.append(column_def)
|
|
119
|
+
|
|
120
|
+
# Add system fields (timestamps)
|
|
121
|
+
columns.append("created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP")
|
|
122
|
+
columns.append("updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP")
|
|
123
|
+
columns.append("deleted_at TIMESTAMP")
|
|
124
|
+
|
|
125
|
+
# Add graph_edges JSONB field
|
|
126
|
+
columns.append("graph_edges JSONB DEFAULT '[]'::jsonb")
|
|
127
|
+
indexes.append(
|
|
128
|
+
f"CREATE INDEX idx_{table_name}_graph_edges ON {table_name} USING GIN (graph_edges);"
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# Add metadata JSONB field
|
|
132
|
+
columns.append("metadata JSONB DEFAULT '{}'::jsonb")
|
|
133
|
+
indexes.append(
|
|
134
|
+
f"CREATE INDEX idx_{table_name}_metadata ON {table_name} USING GIN (metadata);"
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# Add tags field (TEXT[] for list[str])
|
|
138
|
+
columns.append("tags TEXT[] DEFAULT ARRAY[]::TEXT[]")
|
|
139
|
+
indexes.append(
|
|
140
|
+
f"CREATE INDEX idx_{table_name}_tags ON {table_name} USING GIN (tags);"
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# Generate CREATE TABLE statement
|
|
144
|
+
create_table = f"""
|
|
145
|
+
CREATE TABLE IF NOT EXISTS {table_name} (
|
|
146
|
+
{',\n '.join(columns)}
|
|
147
|
+
);
|
|
148
|
+
""".strip()
|
|
149
|
+
|
|
150
|
+
# Generate indexes
|
|
151
|
+
index_sql = "\n".join(indexes)
|
|
152
|
+
|
|
153
|
+
return f"{create_table}\n\n{index_sql}"
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def generate_embeddings_schema(
|
|
157
|
+
model: Type[BaseModel], table_name: str, embedding_provider: str = DEFAULT_EMBEDDING_PROVIDER
|
|
158
|
+
) -> tuple[str, list[str]]:
|
|
159
|
+
"""
|
|
160
|
+
Generate embeddings table schema for a model.
|
|
161
|
+
|
|
162
|
+
Creates embeddings_<table_name> with:
|
|
163
|
+
- One row per entity per field per provider
|
|
164
|
+
- Unique constraint on (entity_id, field_name, provider)
|
|
165
|
+
- Vector column with pgvector
|
|
166
|
+
- HNSW index for fast similarity search
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
model: Pydantic model class
|
|
170
|
+
table_name: Base table name
|
|
171
|
+
embedding_provider: Default provider (e.g., "openai", "cohere")
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
Tuple of (CREATE TABLE sql, list of embeddable field names)
|
|
175
|
+
"""
|
|
176
|
+
embeddings_table = f"embeddings_{table_name}"
|
|
177
|
+
embeddable_fields = []
|
|
178
|
+
|
|
179
|
+
# Find fields that should have embeddings
|
|
180
|
+
for field_name, field_info in model.model_fields.items():
|
|
181
|
+
if should_embed_field(field_name, field_info):
|
|
182
|
+
embeddable_fields.append(field_name)
|
|
183
|
+
|
|
184
|
+
if not embeddable_fields:
|
|
185
|
+
logger.warning(f"No embeddable fields found for {table_name}")
|
|
186
|
+
return "", []
|
|
187
|
+
|
|
188
|
+
# Generate embeddings table
|
|
189
|
+
create_sql = f"""
|
|
190
|
+
CREATE TABLE IF NOT EXISTS {embeddings_table} (
|
|
191
|
+
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
192
|
+
entity_id UUID NOT NULL REFERENCES {table_name}(id) ON DELETE CASCADE,
|
|
193
|
+
field_name VARCHAR(100) NOT NULL,
|
|
194
|
+
provider VARCHAR(50) NOT NULL DEFAULT '{embedding_provider}',
|
|
195
|
+
model VARCHAR(100) NOT NULL DEFAULT '{DEFAULT_EMBEDDING_MODEL}',
|
|
196
|
+
embedding vector({DEFAULT_EMBEDDING_DIMENSIONS}) NOT NULL,
|
|
197
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
198
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
199
|
+
|
|
200
|
+
-- Unique: one embedding per entity per field per provider
|
|
201
|
+
UNIQUE (entity_id, field_name, provider)
|
|
202
|
+
);
|
|
203
|
+
|
|
204
|
+
-- Index for entity lookup (get all embeddings for entity)
|
|
205
|
+
CREATE INDEX idx_{embeddings_table}_entity ON {embeddings_table} (entity_id);
|
|
206
|
+
|
|
207
|
+
-- Index for field + provider lookup
|
|
208
|
+
CREATE INDEX idx_{embeddings_table}_field_provider ON {embeddings_table} (field_name, provider);
|
|
209
|
+
|
|
210
|
+
-- HNSW index for vector similarity search (created in background)
|
|
211
|
+
-- Note: This will be created by background thread after data load
|
|
212
|
+
-- CREATE INDEX idx_{embeddings_table}_vector_hnsw ON {embeddings_table}
|
|
213
|
+
-- USING hnsw (embedding vector_cosine_ops);
|
|
214
|
+
""".strip()
|
|
215
|
+
|
|
216
|
+
logger.info(
|
|
217
|
+
f"Generated embeddings schema for {table_name} with fields: {embeddable_fields}"
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
return create_sql, embeddable_fields
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
# NOTE: _map_pydantic_to_postgres_type is now replaced by utils.sql_types.get_sql_type
|
|
224
|
+
# Removed to use the centralized utility instead
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def generate_kv_store_upsert(
|
|
228
|
+
table_name: str,
|
|
229
|
+
entity_key_field: str = "name",
|
|
230
|
+
) -> str:
|
|
231
|
+
"""
|
|
232
|
+
Generate trigger to maintain KV_STORE cache on entity changes.
|
|
233
|
+
|
|
234
|
+
Creates a trigger that:
|
|
235
|
+
1. Extracts entity_key from entity (e.g., name, key, label)
|
|
236
|
+
2. Updates KV_STORE on INSERT/UPDATE for O(1) lookups
|
|
237
|
+
3. Removes from KV_STORE on DELETE
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
table_name: Base table name
|
|
241
|
+
entity_key_field: Field to use as entity_key in KV_STORE
|
|
242
|
+
|
|
243
|
+
Returns:
|
|
244
|
+
SQL for trigger creation
|
|
245
|
+
"""
|
|
246
|
+
trigger_name = f"trg_{table_name}_kv_store"
|
|
247
|
+
function_name = f"fn_{table_name}_kv_store_upsert"
|
|
248
|
+
|
|
249
|
+
return f"""
|
|
250
|
+
-- Trigger function to maintain KV_STORE for {table_name}
|
|
251
|
+
CREATE OR REPLACE FUNCTION {function_name}()
|
|
252
|
+
RETURNS TRIGGER AS $$
|
|
253
|
+
BEGIN
|
|
254
|
+
IF (TG_OP = 'DELETE') THEN
|
|
255
|
+
-- Remove from KV_STORE on delete
|
|
256
|
+
DELETE FROM kv_store
|
|
257
|
+
WHERE entity_id = OLD.id;
|
|
258
|
+
RETURN OLD;
|
|
259
|
+
ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
|
|
260
|
+
-- Upsert to KV_STORE (O(1) lookup by entity_key)
|
|
261
|
+
INSERT INTO kv_store (
|
|
262
|
+
entity_key,
|
|
263
|
+
entity_type,
|
|
264
|
+
entity_id,
|
|
265
|
+
tenant_id,
|
|
266
|
+
user_id,
|
|
267
|
+
metadata,
|
|
268
|
+
graph_edges,
|
|
269
|
+
updated_at
|
|
270
|
+
) VALUES (
|
|
271
|
+
NEW.{entity_key_field}::VARCHAR,
|
|
272
|
+
'{table_name}',
|
|
273
|
+
NEW.id,
|
|
274
|
+
NEW.tenant_id,
|
|
275
|
+
NEW.user_id,
|
|
276
|
+
NEW.metadata,
|
|
277
|
+
COALESCE(NEW.graph_edges, '[]'::jsonb),
|
|
278
|
+
CURRENT_TIMESTAMP
|
|
279
|
+
)
|
|
280
|
+
ON CONFLICT (tenant_id, entity_key)
|
|
281
|
+
DO UPDATE SET
|
|
282
|
+
entity_id = EXCLUDED.entity_id,
|
|
283
|
+
user_id = EXCLUDED.user_id,
|
|
284
|
+
metadata = EXCLUDED.metadata,
|
|
285
|
+
graph_edges = EXCLUDED.graph_edges,
|
|
286
|
+
updated_at = CURRENT_TIMESTAMP;
|
|
287
|
+
|
|
288
|
+
RETURN NEW;
|
|
289
|
+
END IF;
|
|
290
|
+
END;
|
|
291
|
+
$$ LANGUAGE plpgsql;
|
|
292
|
+
|
|
293
|
+
-- Create trigger
|
|
294
|
+
DROP TRIGGER IF EXISTS {trigger_name} ON {table_name};
|
|
295
|
+
CREATE TRIGGER {trigger_name}
|
|
296
|
+
AFTER INSERT OR UPDATE OR DELETE ON {table_name}
|
|
297
|
+
FOR EACH ROW EXECUTE FUNCTION {function_name}();
|
|
298
|
+
""".strip()
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
async def register_type(
|
|
302
|
+
model: Type[BaseModel],
|
|
303
|
+
table_name: str,
|
|
304
|
+
entity_key_field: str = "name",
|
|
305
|
+
tenant_scoped: bool = True,
|
|
306
|
+
create_embeddings: bool = True,
|
|
307
|
+
create_kv_trigger: bool = True,
|
|
308
|
+
) -> dict[str, Any]:
|
|
309
|
+
"""
|
|
310
|
+
Register a Pydantic model as a database schema.
|
|
311
|
+
|
|
312
|
+
Creates:
|
|
313
|
+
1. Primary table for entity storage
|
|
314
|
+
2. Embeddings table (if create_embeddings=True)
|
|
315
|
+
3. KV_STORE trigger (if create_kv_trigger=True)
|
|
316
|
+
|
|
317
|
+
Args:
|
|
318
|
+
model: Pydantic model class
|
|
319
|
+
table_name: Table name
|
|
320
|
+
entity_key_field: Field to use as natural key in KV_STORE
|
|
321
|
+
tenant_scoped: Add tenant_id column and indexes
|
|
322
|
+
create_embeddings: Create embeddings table
|
|
323
|
+
create_kv_trigger: Create KV_STORE trigger
|
|
324
|
+
|
|
325
|
+
Returns:
|
|
326
|
+
Dict with SQL statements and metadata
|
|
327
|
+
"""
|
|
328
|
+
result = {
|
|
329
|
+
"table_name": table_name,
|
|
330
|
+
"model": model.__name__,
|
|
331
|
+
"sql": {},
|
|
332
|
+
"embeddable_fields": [],
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
# Generate primary table schema
|
|
336
|
+
table_sql = generate_table_schema(model, table_name, tenant_scoped)
|
|
337
|
+
result["sql"]["table"] = table_sql
|
|
338
|
+
|
|
339
|
+
# Generate embeddings schema
|
|
340
|
+
if create_embeddings:
|
|
341
|
+
embeddings_sql, embeddable_fields = generate_embeddings_schema(model, table_name)
|
|
342
|
+
result["sql"]["embeddings"] = embeddings_sql
|
|
343
|
+
result["embeddable_fields"] = embeddable_fields
|
|
344
|
+
|
|
345
|
+
# Generate KV_STORE trigger
|
|
346
|
+
if create_kv_trigger:
|
|
347
|
+
kv_trigger_sql = generate_kv_store_upsert(table_name, entity_key_field)
|
|
348
|
+
result["sql"]["kv_trigger"] = kv_trigger_sql
|
|
349
|
+
|
|
350
|
+
logger.info(f"Registered type {model.__name__} as table {table_name}")
|
|
351
|
+
|
|
352
|
+
return result
|
|
@@ -0,0 +1,337 @@
|
|
|
1
|
+
"""Generic repository for entity persistence.
|
|
2
|
+
|
|
3
|
+
Single repository class that works with any Pydantic model type.
|
|
4
|
+
No need for model-specific repository classes.
|
|
5
|
+
|
|
6
|
+
Usage:
|
|
7
|
+
from rem.models.entities import Message
|
|
8
|
+
from rem.services.repositories import Repository
|
|
9
|
+
|
|
10
|
+
repo = Repository(db, Message, table_name="messages")
|
|
11
|
+
message = await repo.upsert(message_instance)
|
|
12
|
+
messages = await repo.find({"session_id": "abc", "tenant_id": "xyz"})
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
from typing import Any, Generic, Type, TypeVar, TYPE_CHECKING
|
|
17
|
+
|
|
18
|
+
from loguru import logger
|
|
19
|
+
from pydantic import BaseModel
|
|
20
|
+
|
|
21
|
+
from .sql_builder import (
|
|
22
|
+
build_count,
|
|
23
|
+
build_delete,
|
|
24
|
+
build_insert,
|
|
25
|
+
build_select,
|
|
26
|
+
build_upsert,
|
|
27
|
+
)
|
|
28
|
+
from ...settings import settings
|
|
29
|
+
|
|
30
|
+
if TYPE_CHECKING:
|
|
31
|
+
from .service import PostgresService
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def get_postgres_service() -> "PostgresService | None":
|
|
35
|
+
"""
|
|
36
|
+
Get PostgresService instance with connection string from settings.
|
|
37
|
+
|
|
38
|
+
Returns None if Postgres is disabled.
|
|
39
|
+
"""
|
|
40
|
+
if not settings.postgres.enabled:
|
|
41
|
+
return None
|
|
42
|
+
|
|
43
|
+
from .service import PostgresService
|
|
44
|
+
return PostgresService()
|
|
45
|
+
|
|
46
|
+
T = TypeVar("T", bound=BaseModel)
|
|
47
|
+
|
|
48
|
+
# Known JSONB fields from CoreModel that need deserialization
|
|
49
|
+
JSONB_FIELDS = {"graph_edges", "metadata"}
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class Repository(Generic[T]):
|
|
53
|
+
"""Generic repository for any Pydantic model type."""
|
|
54
|
+
|
|
55
|
+
def __init__(
|
|
56
|
+
self,
|
|
57
|
+
model_class: Type[T],
|
|
58
|
+
table_name: str | None = None,
|
|
59
|
+
db: "PostgresService | None" = None,
|
|
60
|
+
):
|
|
61
|
+
"""
|
|
62
|
+
Initialize repository.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
model_class: Pydantic model class (e.g., Message, Resource)
|
|
66
|
+
table_name: Optional table name (defaults to lowercase model name + 's')
|
|
67
|
+
db: Optional PostgresService instance (creates from settings if None)
|
|
68
|
+
"""
|
|
69
|
+
self.db = db or get_postgres_service()
|
|
70
|
+
self.model_class = model_class
|
|
71
|
+
self.table_name = table_name or f"{model_class.__name__.lower()}s"
|
|
72
|
+
|
|
73
|
+
async def upsert(
|
|
74
|
+
self,
|
|
75
|
+
records: T | list[T],
|
|
76
|
+
embeddable_fields: list[str] | None = None,
|
|
77
|
+
generate_embeddings: bool = False,
|
|
78
|
+
) -> T | list[T]:
|
|
79
|
+
"""
|
|
80
|
+
Upsert single record or list of records (create or update on ID conflict).
|
|
81
|
+
|
|
82
|
+
Accepts both single items and lists - no need to distinguish batch vs non-batch.
|
|
83
|
+
Single items are coerced to lists internally for processing.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
records: Single model instance or list of model instances
|
|
87
|
+
embeddable_fields: Optional list of fields to generate embeddings for
|
|
88
|
+
generate_embeddings: Whether to queue embedding generation tasks
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
Single record or list of records with generated IDs (matches input type)
|
|
92
|
+
"""
|
|
93
|
+
# Coerce single item to list for uniform processing
|
|
94
|
+
is_single = not isinstance(records, list)
|
|
95
|
+
records_list: list[T]
|
|
96
|
+
if is_single:
|
|
97
|
+
records_list = [records] # type: ignore[list-item]
|
|
98
|
+
else:
|
|
99
|
+
records_list = records # Type narrowed by isinstance check
|
|
100
|
+
|
|
101
|
+
if not settings.postgres.enabled or not self.db:
|
|
102
|
+
logger.debug(f"Postgres disabled, skipping {self.model_class.__name__} upsert")
|
|
103
|
+
return records
|
|
104
|
+
|
|
105
|
+
# Ensure connection
|
|
106
|
+
if not self.db.pool:
|
|
107
|
+
await self.db.connect()
|
|
108
|
+
|
|
109
|
+
# Type guard: ensure pool is not None after connect
|
|
110
|
+
if not self.db.pool:
|
|
111
|
+
raise RuntimeError("Failed to establish database connection")
|
|
112
|
+
|
|
113
|
+
for record in records_list:
|
|
114
|
+
sql, params = build_upsert(record, self.table_name, conflict_field="id", return_id=True)
|
|
115
|
+
async with self.db.pool.acquire() as conn:
|
|
116
|
+
row = await conn.fetchrow(sql, *params)
|
|
117
|
+
if row and "id" in row:
|
|
118
|
+
record.id = row["id"] # type: ignore[attr-defined]
|
|
119
|
+
|
|
120
|
+
# Queue embedding generation if requested and worker is available
|
|
121
|
+
if generate_embeddings and embeddable_fields and self.db.embedding_worker:
|
|
122
|
+
from rem.services.embeddings import EmbeddingTask
|
|
123
|
+
|
|
124
|
+
for record in records_list:
|
|
125
|
+
for field_name in embeddable_fields:
|
|
126
|
+
content = getattr(record, field_name, None)
|
|
127
|
+
if content and isinstance(content, str):
|
|
128
|
+
task = EmbeddingTask(
|
|
129
|
+
task_id=f"{record.id}-{field_name}", # type: ignore[attr-defined]
|
|
130
|
+
entity_id=str(record.id), # type: ignore[attr-defined]
|
|
131
|
+
table_name=self.table_name,
|
|
132
|
+
field_name=field_name,
|
|
133
|
+
content=content,
|
|
134
|
+
provider="openai", # Default provider
|
|
135
|
+
model="text-embedding-3-small", # Default model
|
|
136
|
+
)
|
|
137
|
+
await self.db.embedding_worker.queue_task(task)
|
|
138
|
+
|
|
139
|
+
logger.debug(f"Queued {len(records_list) * len(embeddable_fields)} embedding tasks")
|
|
140
|
+
|
|
141
|
+
# Return single item or list to match input type
|
|
142
|
+
return records_list[0] if is_single else records_list
|
|
143
|
+
|
|
144
|
+
async def get_by_id(self, record_id: str, tenant_id: str) -> T | None:
|
|
145
|
+
"""
|
|
146
|
+
Get a single record by ID.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
record_id: Record identifier
|
|
150
|
+
tenant_id: Tenant identifier for multi-tenancy isolation
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
Model instance or None if not found
|
|
154
|
+
"""
|
|
155
|
+
if not settings.postgres.enabled or not self.db:
|
|
156
|
+
logger.debug(f"Postgres disabled, returning None for {self.model_class.__name__} get")
|
|
157
|
+
return None
|
|
158
|
+
|
|
159
|
+
# Ensure connection
|
|
160
|
+
if not self.db.pool:
|
|
161
|
+
await self.db.connect()
|
|
162
|
+
|
|
163
|
+
# Type guard: ensure pool is not None after connect
|
|
164
|
+
if not self.db.pool:
|
|
165
|
+
raise RuntimeError("Failed to establish database connection")
|
|
166
|
+
|
|
167
|
+
query = f"""
|
|
168
|
+
SELECT * FROM {self.table_name}
|
|
169
|
+
WHERE id = $1 AND tenant_id = $2 AND deleted_at IS NULL
|
|
170
|
+
"""
|
|
171
|
+
|
|
172
|
+
async with self.db.pool.acquire() as conn:
|
|
173
|
+
row = await conn.fetchrow(query, record_id, tenant_id)
|
|
174
|
+
|
|
175
|
+
if not row:
|
|
176
|
+
return None
|
|
177
|
+
|
|
178
|
+
# PostgreSQL JSONB columns come back as strings, need to parse them
|
|
179
|
+
row_dict = dict(row)
|
|
180
|
+
return self.model_class.model_validate(row_dict)
|
|
181
|
+
|
|
182
|
+
async def find(
|
|
183
|
+
self,
|
|
184
|
+
filters: dict[str, Any],
|
|
185
|
+
order_by: str = "created_at ASC",
|
|
186
|
+
limit: int | None = None,
|
|
187
|
+
offset: int = 0,
|
|
188
|
+
) -> list[T]:
|
|
189
|
+
"""
|
|
190
|
+
Find records matching filters.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
filters: Dict of field -> value filters (AND-ed together)
|
|
194
|
+
order_by: ORDER BY clause (default: "created_at ASC")
|
|
195
|
+
limit: Optional limit on number of records
|
|
196
|
+
offset: Offset for pagination
|
|
197
|
+
|
|
198
|
+
Returns:
|
|
199
|
+
List of model instances
|
|
200
|
+
|
|
201
|
+
Example:
|
|
202
|
+
messages = await repo.find({
|
|
203
|
+
"session_id": "abc-123",
|
|
204
|
+
"tenant_id": "acme-corp",
|
|
205
|
+
"user_id": "alice"
|
|
206
|
+
})
|
|
207
|
+
"""
|
|
208
|
+
if not settings.postgres.enabled or not self.db:
|
|
209
|
+
logger.debug(f"Postgres disabled, returning empty {self.model_class.__name__} list")
|
|
210
|
+
return []
|
|
211
|
+
|
|
212
|
+
# Ensure connection
|
|
213
|
+
if not self.db.pool:
|
|
214
|
+
await self.db.connect()
|
|
215
|
+
|
|
216
|
+
# Type guard: ensure pool is not None after connect
|
|
217
|
+
if not self.db.pool:
|
|
218
|
+
raise RuntimeError("Failed to establish database connection")
|
|
219
|
+
|
|
220
|
+
sql, params = build_select(
|
|
221
|
+
self.model_class,
|
|
222
|
+
self.table_name,
|
|
223
|
+
filters,
|
|
224
|
+
order_by=order_by,
|
|
225
|
+
limit=limit,
|
|
226
|
+
offset=offset,
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
async with self.db.pool.acquire() as conn:
|
|
230
|
+
rows = await conn.fetch(sql, *params)
|
|
231
|
+
|
|
232
|
+
return [self.model_class.model_validate(dict(row)) for row in rows]
|
|
233
|
+
|
|
234
|
+
async def find_one(self, filters: dict[str, Any]) -> T | None:
|
|
235
|
+
"""
|
|
236
|
+
Find single record matching filters.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
filters: Dict of field -> value filters
|
|
240
|
+
|
|
241
|
+
Returns:
|
|
242
|
+
Model instance or None if not found
|
|
243
|
+
"""
|
|
244
|
+
results = await self.find(filters, limit=1)
|
|
245
|
+
return results[0] if results else None
|
|
246
|
+
|
|
247
|
+
async def get_by_session(
|
|
248
|
+
self, session_id: str, tenant_id: str, user_id: str | None = None
|
|
249
|
+
) -> list[T]:
|
|
250
|
+
"""
|
|
251
|
+
Get all records for a session (convenience method for Message model).
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
session_id: Session identifier
|
|
255
|
+
tenant_id: Tenant identifier
|
|
256
|
+
user_id: Optional user identifier
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
List of model instances ordered by created_at
|
|
260
|
+
"""
|
|
261
|
+
filters = {"session_id": session_id, "tenant_id": tenant_id}
|
|
262
|
+
if user_id:
|
|
263
|
+
filters["user_id"] = user_id
|
|
264
|
+
|
|
265
|
+
return await self.find(filters, order_by="created_at ASC")
|
|
266
|
+
|
|
267
|
+
async def update(self, record: T) -> T:
|
|
268
|
+
"""
|
|
269
|
+
Update a record (upsert).
|
|
270
|
+
|
|
271
|
+
Args:
|
|
272
|
+
record: Model instance to update
|
|
273
|
+
|
|
274
|
+
Returns:
|
|
275
|
+
Updated record
|
|
276
|
+
"""
|
|
277
|
+
result = await self.upsert(record)
|
|
278
|
+
# upsert with single record returns single record
|
|
279
|
+
return result # type: ignore[return-value]
|
|
280
|
+
|
|
281
|
+
async def delete(self, record_id: str, tenant_id: str) -> bool:
|
|
282
|
+
"""
|
|
283
|
+
Soft delete a record (sets deleted_at).
|
|
284
|
+
|
|
285
|
+
Args:
|
|
286
|
+
record_id: Record identifier
|
|
287
|
+
tenant_id: Tenant identifier for multi-tenancy isolation
|
|
288
|
+
|
|
289
|
+
Returns:
|
|
290
|
+
True if deleted, False if not found
|
|
291
|
+
"""
|
|
292
|
+
if not settings.postgres.enabled or not self.db:
|
|
293
|
+
logger.debug(f"Postgres disabled, skipping {self.model_class.__name__} deletion")
|
|
294
|
+
return False
|
|
295
|
+
|
|
296
|
+
# Ensure connection
|
|
297
|
+
if not self.db.pool:
|
|
298
|
+
await self.db.connect()
|
|
299
|
+
|
|
300
|
+
# Type guard: ensure pool is not None after connect
|
|
301
|
+
if not self.db.pool:
|
|
302
|
+
raise RuntimeError("Failed to establish database connection")
|
|
303
|
+
|
|
304
|
+
sql, params = build_delete(self.table_name, record_id, tenant_id)
|
|
305
|
+
|
|
306
|
+
async with self.db.pool.acquire() as conn:
|
|
307
|
+
row = await conn.fetchrow(sql, *params)
|
|
308
|
+
|
|
309
|
+
return row is not None
|
|
310
|
+
|
|
311
|
+
async def count(self, filters: dict[str, Any]) -> int:
|
|
312
|
+
"""
|
|
313
|
+
Count records matching filters.
|
|
314
|
+
|
|
315
|
+
Args:
|
|
316
|
+
filters: Dict of field -> value filters
|
|
317
|
+
|
|
318
|
+
Returns:
|
|
319
|
+
Count of matching records
|
|
320
|
+
"""
|
|
321
|
+
if not settings.postgres.enabled or not self.db:
|
|
322
|
+
return 0
|
|
323
|
+
|
|
324
|
+
# Ensure connection
|
|
325
|
+
if not self.db.pool:
|
|
326
|
+
await self.db.connect()
|
|
327
|
+
|
|
328
|
+
# Type guard: ensure pool is not None after connect
|
|
329
|
+
if not self.db.pool:
|
|
330
|
+
raise RuntimeError("Failed to establish database connection")
|
|
331
|
+
|
|
332
|
+
sql, params = build_count(self.table_name, filters)
|
|
333
|
+
|
|
334
|
+
async with self.db.pool.acquire() as conn:
|
|
335
|
+
row = await conn.fetchrow(sql, *params)
|
|
336
|
+
|
|
337
|
+
return row[0] if row else 0
|