remdb 0.3.242__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +129 -0
- rem/agentic/README.md +760 -0
- rem/agentic/__init__.py +54 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +38 -0
- rem/agentic/agents/agent_manager.py +311 -0
- rem/agentic/agents/sse_simulator.py +502 -0
- rem/agentic/context.py +425 -0
- rem/agentic/context_builder.py +360 -0
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +273 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +240 -0
- rem/agentic/providers/phoenix.py +926 -0
- rem/agentic/providers/pydantic_ai.py +854 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +737 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +242 -0
- rem/api/README.md +657 -0
- rem/api/deps.py +253 -0
- rem/api/main.py +460 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +820 -0
- rem/api/mcp_router/server.py +243 -0
- rem/api/mcp_router/tools.py +1605 -0
- rem/api/middleware/tracking.py +172 -0
- rem/api/routers/admin.py +520 -0
- rem/api/routers/auth.py +898 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/child_streaming.py +394 -0
- rem/api/routers/chat/completions.py +702 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +202 -0
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +546 -0
- rem/api/routers/chat/streaming.py +950 -0
- rem/api/routers/chat/streaming_utils.py +327 -0
- rem/api/routers/common.py +18 -0
- rem/api/routers/dev.py +87 -0
- rem/api/routers/feedback.py +276 -0
- rem/api/routers/messages.py +620 -0
- rem/api/routers/models.py +86 -0
- rem/api/routers/query.py +362 -0
- rem/api/routers/shared_sessions.py +422 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +36 -0
- rem/auth/jwt.py +367 -0
- rem/auth/middleware.py +318 -0
- rem/auth/providers/__init__.py +16 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/email.py +215 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +517 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +299 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +549 -0
- rem/cli/commands/cluster.py +1808 -0
- rem/cli/commands/configure.py +495 -0
- rem/cli/commands/db.py +828 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1698 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +388 -0
- rem/cli/commands/query.py +109 -0
- rem/cli/commands/scaffold.py +47 -0
- rem/cli/commands/schema.py +230 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/commands/session.py +453 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +123 -0
- rem/config.py +244 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +70 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +672 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +246 -0
- rem/models/entities/__init__.py +68 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +64 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +181 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/session.py +84 -0
- rem/models/entities/shared_session.py +180 -0
- rem/models/entities/subscriber.py +175 -0
- rem/models/entities/user.py +93 -0
- rem/py.typed +0 -0
- rem/registry.py +373 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/agent-builder.yaml +235 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +132 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +18 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +760 -0
- rem/services/content/service.py +762 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +322 -0
- rem/services/dreaming/moment_service.py +251 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/email/__init__.py +10 -0
- rem/services/email/service.py +522 -0
- rem/services/email/templates.py +360 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +127 -0
- rem/services/embeddings/worker.py +435 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +960 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +757 -0
- rem/services/postgres/__init__.py +49 -0
- rem/services/postgres/diff_service.py +599 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/programmable_diff_service.py +635 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
- rem/services/postgres/register_type.py +353 -0
- rem/services/postgres/repository.py +481 -0
- rem/services/postgres/schema_generator.py +661 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +355 -0
- rem/services/rate_limit.py +113 -0
- rem/services/rem/README.md +318 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +180 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +608 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +13 -0
- rem/services/session/compression.py +488 -0
- rem/services/session/pydantic_messages.py +310 -0
- rem/services/session/reload.py +85 -0
- rem/services/user_service.py +130 -0
- rem/settings.py +1877 -0
- rem/sql/background_indexes.sql +52 -0
- rem/sql/migrations/001_install.sql +983 -0
- rem/sql/migrations/002_install_models.sql +3157 -0
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +282 -0
- rem/sql/migrations/005_schema_update.sql +145 -0
- rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +628 -0
- rem/utils/__init__.py +61 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/constants.py +97 -0
- rem/utils/date_utils.py +228 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +436 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/files.py +323 -0
- rem/utils/markdown.py +16 -0
- rem/utils/mime_types.py +158 -0
- rem/utils/model_helpers.py +492 -0
- rem/utils/schema_loader.py +649 -0
- rem/utils/sql_paths.py +146 -0
- rem/utils/sql_types.py +350 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +325 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +7 -0
- rem/workers/db_listener.py +579 -0
- rem/workers/db_maintainer.py +74 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- rem/workers/unlogged_maintainer.py +463 -0
- remdb-0.3.242.dist-info/METADATA +1632 -0
- remdb-0.3.242.dist-info/RECORD +235 -0
- remdb-0.3.242.dist-info/WHEEL +4 -0
- remdb-0.3.242.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,353 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Dynamic table and embeddings schema generator from Pydantic models.
|
|
3
|
+
|
|
4
|
+
Generates:
|
|
5
|
+
1. Primary table for entity storage
|
|
6
|
+
2. embeddings_<table> for vector embeddings (one row per field per provider)
|
|
7
|
+
3. Registers entity in KV_STORE cache
|
|
8
|
+
4. Background index creation for performance
|
|
9
|
+
|
|
10
|
+
Design Patterns:
|
|
11
|
+
- Fields marked with json_schema_extra={\"embed\": True} get embeddings
|
|
12
|
+
- Content fields (TextField, description, etc.) embed by default
|
|
13
|
+
- Multiple embedding providers supported (OpenAI, Cohere, etc.)
|
|
14
|
+
- UNLOGGED KV_STORE for O(1) lookups
|
|
15
|
+
- Background index creation to avoid blocking writes
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from typing import Any, Type
|
|
19
|
+
|
|
20
|
+
from loguru import logger
|
|
21
|
+
from pydantic import BaseModel
|
|
22
|
+
|
|
23
|
+
from ...utils.sql_types import get_column_definition
|
|
24
|
+
|
|
25
|
+
# Embedding configuration
|
|
26
|
+
DEFAULT_EMBEDDING_PROVIDER = "openai"
|
|
27
|
+
DEFAULT_EMBEDDING_MODEL = "text-embedding-3-small"
|
|
28
|
+
DEFAULT_EMBEDDING_DIMENSIONS = 1536
|
|
29
|
+
|
|
30
|
+
# Fields that embed by default (if not explicitly disabled)
|
|
31
|
+
DEFAULT_EMBED_FIELD_NAMES = {
|
|
32
|
+
"content",
|
|
33
|
+
"description",
|
|
34
|
+
"summary",
|
|
35
|
+
"text",
|
|
36
|
+
"body",
|
|
37
|
+
"message",
|
|
38
|
+
"notes",
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def should_embed_field(field_name: str, field_info: Any) -> bool:
|
|
43
|
+
"""
|
|
44
|
+
Determine if a field should have embeddings generated.
|
|
45
|
+
|
|
46
|
+
Rules:
|
|
47
|
+
1. If json_schema_extra.embed = True, always embed
|
|
48
|
+
2. If json_schema_extra.embed = False, never embed
|
|
49
|
+
3. If field name in DEFAULT_EMBED_FIELD_NAMES, embed by default
|
|
50
|
+
4. Otherwise, don't embed
|
|
51
|
+
|
|
52
|
+
Args:
|
|
53
|
+
field_name: Field name from Pydantic model
|
|
54
|
+
field_info: Field metadata from model.model_fields
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
True if field should have embeddings
|
|
58
|
+
"""
|
|
59
|
+
# Check json_schema_extra for explicit embed configuration
|
|
60
|
+
json_extra = getattr(field_info, "json_schema_extra", None)
|
|
61
|
+
if json_extra:
|
|
62
|
+
if isinstance(json_extra, dict):
|
|
63
|
+
embed = json_extra.get("embed")
|
|
64
|
+
if embed is not None:
|
|
65
|
+
return bool(embed)
|
|
66
|
+
|
|
67
|
+
# Default: embed if field name matches common content fields
|
|
68
|
+
return field_name.lower() in DEFAULT_EMBED_FIELD_NAMES
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def generate_table_schema(
|
|
72
|
+
model: Type[BaseModel], table_name: str, tenant_scoped: bool = True
|
|
73
|
+
) -> str:
|
|
74
|
+
"""
|
|
75
|
+
Generate CREATE TABLE SQL for Pydantic model.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
model: Pydantic model class
|
|
79
|
+
table_name: Table name (e.g., "resources", "moments")
|
|
80
|
+
tenant_scoped: If True, add tenant_id column and indexes
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
SQL CREATE TABLE statement
|
|
84
|
+
"""
|
|
85
|
+
columns = []
|
|
86
|
+
indexes = []
|
|
87
|
+
|
|
88
|
+
# System fields that we add separately (skip if in model)
|
|
89
|
+
SYSTEM_FIELDS = {
|
|
90
|
+
"id", "created_at", "updated_at", "deleted_at",
|
|
91
|
+
"tenant_id", "user_id", "graph_edges", "metadata", "tags", "column"
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
# Always add id as primary key
|
|
95
|
+
columns.append("id UUID PRIMARY KEY DEFAULT uuid_generate_v4()")
|
|
96
|
+
|
|
97
|
+
# Add tenant_id if tenant scoped (nullable - NULL means public/shared)
|
|
98
|
+
if tenant_scoped:
|
|
99
|
+
columns.append("tenant_id VARCHAR(100)")
|
|
100
|
+
indexes.append(f"CREATE INDEX IF NOT EXISTS idx_{table_name}_tenant ON {table_name} (tenant_id);")
|
|
101
|
+
|
|
102
|
+
# Add user_id (owner field)
|
|
103
|
+
columns.append("user_id VARCHAR(256)")
|
|
104
|
+
indexes.append(f"CREATE INDEX IF NOT EXISTS idx_{table_name}_user ON {table_name} (user_id);")
|
|
105
|
+
|
|
106
|
+
# Process Pydantic fields (skip system fields)
|
|
107
|
+
for field_name, field_info in model.model_fields.items():
|
|
108
|
+
if field_name in SYSTEM_FIELDS:
|
|
109
|
+
continue # Skip system fields - we add them separately
|
|
110
|
+
|
|
111
|
+
# Use sql_types utility for consistent type mapping
|
|
112
|
+
column_def = get_column_definition(
|
|
113
|
+
field_info,
|
|
114
|
+
field_name,
|
|
115
|
+
nullable=not field_info.is_required(),
|
|
116
|
+
primary_key=False
|
|
117
|
+
)
|
|
118
|
+
columns.append(column_def)
|
|
119
|
+
|
|
120
|
+
# Add system fields (timestamps)
|
|
121
|
+
columns.append("created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP")
|
|
122
|
+
columns.append("updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP")
|
|
123
|
+
columns.append("deleted_at TIMESTAMP")
|
|
124
|
+
|
|
125
|
+
# Add graph_edges JSONB field
|
|
126
|
+
columns.append("graph_edges JSONB DEFAULT '[]'::jsonb")
|
|
127
|
+
indexes.append(
|
|
128
|
+
f"CREATE INDEX IF NOT EXISTS idx_{table_name}_graph_edges ON {table_name} USING GIN (graph_edges);"
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# Add metadata JSONB field
|
|
132
|
+
columns.append("metadata JSONB DEFAULT '{}'::jsonb")
|
|
133
|
+
indexes.append(
|
|
134
|
+
f"CREATE INDEX IF NOT EXISTS idx_{table_name}_metadata ON {table_name} USING GIN (metadata);"
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# Add tags field (TEXT[] for list[str])
|
|
138
|
+
columns.append("tags TEXT[] DEFAULT ARRAY[]::TEXT[]")
|
|
139
|
+
indexes.append(
|
|
140
|
+
f"CREATE INDEX IF NOT EXISTS idx_{table_name}_tags ON {table_name} USING GIN (tags);"
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
# Generate CREATE TABLE statement
|
|
144
|
+
create_table = f"""
|
|
145
|
+
CREATE TABLE IF NOT EXISTS {table_name} (
|
|
146
|
+
{',\n '.join(columns)}
|
|
147
|
+
);
|
|
148
|
+
""".strip()
|
|
149
|
+
|
|
150
|
+
# Generate indexes
|
|
151
|
+
index_sql = "\n".join(indexes)
|
|
152
|
+
|
|
153
|
+
return f"{create_table}\n\n{index_sql}"
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def generate_embeddings_schema(
|
|
157
|
+
model: Type[BaseModel], table_name: str, embedding_provider: str = DEFAULT_EMBEDDING_PROVIDER
|
|
158
|
+
) -> tuple[str, list[str]]:
|
|
159
|
+
"""
|
|
160
|
+
Generate embeddings table schema for a model.
|
|
161
|
+
|
|
162
|
+
Creates embeddings_<table_name> with:
|
|
163
|
+
- One row per entity per field per provider
|
|
164
|
+
- Unique constraint on (entity_id, field_name, provider)
|
|
165
|
+
- Vector column with pgvector
|
|
166
|
+
- HNSW index for fast similarity search
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
model: Pydantic model class
|
|
170
|
+
table_name: Base table name
|
|
171
|
+
embedding_provider: Default provider (e.g., "openai", "cohere")
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
Tuple of (CREATE TABLE sql, list of embeddable field names)
|
|
175
|
+
"""
|
|
176
|
+
embeddings_table = f"embeddings_{table_name}"
|
|
177
|
+
embeddable_fields = []
|
|
178
|
+
|
|
179
|
+
# Find fields that should have embeddings
|
|
180
|
+
for field_name, field_info in model.model_fields.items():
|
|
181
|
+
if should_embed_field(field_name, field_info):
|
|
182
|
+
embeddable_fields.append(field_name)
|
|
183
|
+
|
|
184
|
+
if not embeddable_fields:
|
|
185
|
+
logger.warning(f"No embeddable fields found for {table_name}")
|
|
186
|
+
return "", []
|
|
187
|
+
|
|
188
|
+
# Generate embeddings table
|
|
189
|
+
create_sql = f"""
|
|
190
|
+
CREATE TABLE IF NOT EXISTS {embeddings_table} (
|
|
191
|
+
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
|
|
192
|
+
entity_id UUID NOT NULL REFERENCES {table_name}(id) ON DELETE CASCADE,
|
|
193
|
+
field_name VARCHAR(100) NOT NULL,
|
|
194
|
+
provider VARCHAR(50) NOT NULL DEFAULT '{embedding_provider}',
|
|
195
|
+
model VARCHAR(100) NOT NULL DEFAULT '{DEFAULT_EMBEDDING_MODEL}',
|
|
196
|
+
embedding vector({DEFAULT_EMBEDDING_DIMENSIONS}) NOT NULL,
|
|
197
|
+
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
198
|
+
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
|
199
|
+
|
|
200
|
+
-- Unique: one embedding per entity per field per provider
|
|
201
|
+
UNIQUE (entity_id, field_name, provider)
|
|
202
|
+
);
|
|
203
|
+
|
|
204
|
+
-- Index for entity lookup (get all embeddings for entity)
|
|
205
|
+
CREATE INDEX IF NOT EXISTS idx_{embeddings_table}_entity ON {embeddings_table} (entity_id);
|
|
206
|
+
|
|
207
|
+
-- Index for field + provider lookup
|
|
208
|
+
CREATE INDEX IF NOT EXISTS idx_{embeddings_table}_field_provider ON {embeddings_table} (field_name, provider);
|
|
209
|
+
|
|
210
|
+
-- HNSW index for vector similarity search (created in background)
|
|
211
|
+
-- Note: This will be created by background thread after data load
|
|
212
|
+
-- CREATE INDEX idx_{embeddings_table}_vector_hnsw ON {embeddings_table}
|
|
213
|
+
-- USING hnsw (embedding vector_cosine_ops);
|
|
214
|
+
""".strip()
|
|
215
|
+
|
|
216
|
+
logger.info(
|
|
217
|
+
f"Generated embeddings schema for {table_name} with fields: {embeddable_fields}"
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
return create_sql, embeddable_fields
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
# NOTE: _map_pydantic_to_postgres_type is now replaced by utils.sql_types.get_sql_type
|
|
224
|
+
# Removed to use the centralized utility instead
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def generate_kv_store_upsert(
|
|
228
|
+
table_name: str,
|
|
229
|
+
entity_key_field: str = "name",
|
|
230
|
+
) -> str:
|
|
231
|
+
"""
|
|
232
|
+
Generate trigger to maintain KV_STORE cache on entity changes.
|
|
233
|
+
|
|
234
|
+
Creates a trigger that:
|
|
235
|
+
1. Extracts entity_key from entity (e.g., name, key, label)
|
|
236
|
+
2. Updates KV_STORE on INSERT/UPDATE for O(1) lookups
|
|
237
|
+
3. Removes from KV_STORE on DELETE
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
table_name: Base table name
|
|
241
|
+
entity_key_field: Field to use as entity_key in KV_STORE
|
|
242
|
+
|
|
243
|
+
Returns:
|
|
244
|
+
SQL for trigger creation
|
|
245
|
+
"""
|
|
246
|
+
trigger_name = f"trg_{table_name}_kv_store"
|
|
247
|
+
function_name = f"fn_{table_name}_kv_store_upsert"
|
|
248
|
+
|
|
249
|
+
return f"""
|
|
250
|
+
-- Trigger function to maintain KV_STORE for {table_name}
|
|
251
|
+
CREATE OR REPLACE FUNCTION {function_name}()
|
|
252
|
+
RETURNS TRIGGER AS $$
|
|
253
|
+
BEGIN
|
|
254
|
+
IF (TG_OP = 'DELETE') THEN
|
|
255
|
+
-- Remove from KV_STORE on delete
|
|
256
|
+
DELETE FROM kv_store
|
|
257
|
+
WHERE entity_id = OLD.id;
|
|
258
|
+
RETURN OLD;
|
|
259
|
+
ELSIF (TG_OP = 'INSERT' OR TG_OP = 'UPDATE') THEN
|
|
260
|
+
-- Upsert to KV_STORE (O(1) lookup by entity_key)
|
|
261
|
+
-- tenant_id can be NULL (meaning public/shared data)
|
|
262
|
+
INSERT INTO kv_store (
|
|
263
|
+
entity_key,
|
|
264
|
+
entity_type,
|
|
265
|
+
entity_id,
|
|
266
|
+
tenant_id,
|
|
267
|
+
user_id,
|
|
268
|
+
metadata,
|
|
269
|
+
graph_edges,
|
|
270
|
+
updated_at
|
|
271
|
+
) VALUES (
|
|
272
|
+
normalize_key(NEW.{entity_key_field}::VARCHAR),
|
|
273
|
+
'{table_name}',
|
|
274
|
+
NEW.id,
|
|
275
|
+
NEW.tenant_id,
|
|
276
|
+
NEW.user_id,
|
|
277
|
+
NEW.metadata,
|
|
278
|
+
COALESCE(NEW.graph_edges, '[]'::jsonb),
|
|
279
|
+
CURRENT_TIMESTAMP
|
|
280
|
+
)
|
|
281
|
+
ON CONFLICT (COALESCE(tenant_id, ''), entity_key)
|
|
282
|
+
DO UPDATE SET
|
|
283
|
+
entity_id = EXCLUDED.entity_id,
|
|
284
|
+
user_id = EXCLUDED.user_id,
|
|
285
|
+
metadata = EXCLUDED.metadata,
|
|
286
|
+
graph_edges = EXCLUDED.graph_edges,
|
|
287
|
+
updated_at = CURRENT_TIMESTAMP;
|
|
288
|
+
|
|
289
|
+
RETURN NEW;
|
|
290
|
+
END IF;
|
|
291
|
+
END;
|
|
292
|
+
$$ LANGUAGE plpgsql;
|
|
293
|
+
|
|
294
|
+
-- Create trigger
|
|
295
|
+
DROP TRIGGER IF EXISTS {trigger_name} ON {table_name};
|
|
296
|
+
CREATE TRIGGER {trigger_name}
|
|
297
|
+
AFTER INSERT OR UPDATE OR DELETE ON {table_name}
|
|
298
|
+
FOR EACH ROW EXECUTE FUNCTION {function_name}();
|
|
299
|
+
""".strip()
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
async def register_type(
|
|
303
|
+
model: Type[BaseModel],
|
|
304
|
+
table_name: str,
|
|
305
|
+
entity_key_field: str = "name",
|
|
306
|
+
tenant_scoped: bool = True,
|
|
307
|
+
create_embeddings: bool = True,
|
|
308
|
+
create_kv_trigger: bool = True,
|
|
309
|
+
) -> dict[str, Any]:
|
|
310
|
+
"""
|
|
311
|
+
Register a Pydantic model as a database schema.
|
|
312
|
+
|
|
313
|
+
Creates:
|
|
314
|
+
1. Primary table for entity storage
|
|
315
|
+
2. Embeddings table (if create_embeddings=True)
|
|
316
|
+
3. KV_STORE trigger (if create_kv_trigger=True)
|
|
317
|
+
|
|
318
|
+
Args:
|
|
319
|
+
model: Pydantic model class
|
|
320
|
+
table_name: Table name
|
|
321
|
+
entity_key_field: Field to use as natural key in KV_STORE
|
|
322
|
+
tenant_scoped: Add tenant_id column and indexes
|
|
323
|
+
create_embeddings: Create embeddings table
|
|
324
|
+
create_kv_trigger: Create KV_STORE trigger
|
|
325
|
+
|
|
326
|
+
Returns:
|
|
327
|
+
Dict with SQL statements and metadata
|
|
328
|
+
"""
|
|
329
|
+
result = {
|
|
330
|
+
"table_name": table_name,
|
|
331
|
+
"model": model.__name__,
|
|
332
|
+
"sql": {},
|
|
333
|
+
"embeddable_fields": [],
|
|
334
|
+
}
|
|
335
|
+
|
|
336
|
+
# Generate primary table schema
|
|
337
|
+
table_sql = generate_table_schema(model, table_name, tenant_scoped)
|
|
338
|
+
result["sql"]["table"] = table_sql
|
|
339
|
+
|
|
340
|
+
# Generate embeddings schema
|
|
341
|
+
if create_embeddings:
|
|
342
|
+
embeddings_sql, embeddable_fields = generate_embeddings_schema(model, table_name)
|
|
343
|
+
result["sql"]["embeddings"] = embeddings_sql
|
|
344
|
+
result["embeddable_fields"] = embeddable_fields
|
|
345
|
+
|
|
346
|
+
# Generate KV_STORE trigger
|
|
347
|
+
if create_kv_trigger:
|
|
348
|
+
kv_trigger_sql = generate_kv_store_upsert(table_name, entity_key_field)
|
|
349
|
+
result["sql"]["kv_trigger"] = kv_trigger_sql
|
|
350
|
+
|
|
351
|
+
logger.info(f"Registered type {model.__name__} as table {table_name}")
|
|
352
|
+
|
|
353
|
+
return result
|