remdb 0.3.242__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +129 -0
- rem/agentic/README.md +760 -0
- rem/agentic/__init__.py +54 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +38 -0
- rem/agentic/agents/agent_manager.py +311 -0
- rem/agentic/agents/sse_simulator.py +502 -0
- rem/agentic/context.py +425 -0
- rem/agentic/context_builder.py +360 -0
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +273 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +240 -0
- rem/agentic/providers/phoenix.py +926 -0
- rem/agentic/providers/pydantic_ai.py +854 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +737 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +242 -0
- rem/api/README.md +657 -0
- rem/api/deps.py +253 -0
- rem/api/main.py +460 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +820 -0
- rem/api/mcp_router/server.py +243 -0
- rem/api/mcp_router/tools.py +1605 -0
- rem/api/middleware/tracking.py +172 -0
- rem/api/routers/admin.py +520 -0
- rem/api/routers/auth.py +898 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/child_streaming.py +394 -0
- rem/api/routers/chat/completions.py +702 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +202 -0
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +546 -0
- rem/api/routers/chat/streaming.py +950 -0
- rem/api/routers/chat/streaming_utils.py +327 -0
- rem/api/routers/common.py +18 -0
- rem/api/routers/dev.py +87 -0
- rem/api/routers/feedback.py +276 -0
- rem/api/routers/messages.py +620 -0
- rem/api/routers/models.py +86 -0
- rem/api/routers/query.py +362 -0
- rem/api/routers/shared_sessions.py +422 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +36 -0
- rem/auth/jwt.py +367 -0
- rem/auth/middleware.py +318 -0
- rem/auth/providers/__init__.py +16 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/email.py +215 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +517 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +299 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +549 -0
- rem/cli/commands/cluster.py +1808 -0
- rem/cli/commands/configure.py +495 -0
- rem/cli/commands/db.py +828 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1698 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +388 -0
- rem/cli/commands/query.py +109 -0
- rem/cli/commands/scaffold.py +47 -0
- rem/cli/commands/schema.py +230 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/commands/session.py +453 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +123 -0
- rem/config.py +244 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +70 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +672 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +246 -0
- rem/models/entities/__init__.py +68 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +64 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +181 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/session.py +84 -0
- rem/models/entities/shared_session.py +180 -0
- rem/models/entities/subscriber.py +175 -0
- rem/models/entities/user.py +93 -0
- rem/py.typed +0 -0
- rem/registry.py +373 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/agent-builder.yaml +235 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +132 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +18 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +760 -0
- rem/services/content/service.py +762 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +322 -0
- rem/services/dreaming/moment_service.py +251 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/email/__init__.py +10 -0
- rem/services/email/service.py +522 -0
- rem/services/email/templates.py +360 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +127 -0
- rem/services/embeddings/worker.py +435 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +960 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +757 -0
- rem/services/postgres/__init__.py +49 -0
- rem/services/postgres/diff_service.py +599 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/programmable_diff_service.py +635 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
- rem/services/postgres/register_type.py +353 -0
- rem/services/postgres/repository.py +481 -0
- rem/services/postgres/schema_generator.py +661 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +355 -0
- rem/services/rate_limit.py +113 -0
- rem/services/rem/README.md +318 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +180 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +608 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +13 -0
- rem/services/session/compression.py +488 -0
- rem/services/session/pydantic_messages.py +310 -0
- rem/services/session/reload.py +85 -0
- rem/services/user_service.py +130 -0
- rem/settings.py +1877 -0
- rem/sql/background_indexes.sql +52 -0
- rem/sql/migrations/001_install.sql +983 -0
- rem/sql/migrations/002_install_models.sql +3157 -0
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +282 -0
- rem/sql/migrations/005_schema_update.sql +145 -0
- rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +628 -0
- rem/utils/__init__.py +61 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/constants.py +97 -0
- rem/utils/date_utils.py +228 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +436 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/files.py +323 -0
- rem/utils/markdown.py +16 -0
- rem/utils/mime_types.py +158 -0
- rem/utils/model_helpers.py +492 -0
- rem/utils/schema_loader.py +649 -0
- rem/utils/sql_paths.py +146 -0
- rem/utils/sql_types.py +350 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +325 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +7 -0
- rem/workers/db_listener.py +579 -0
- rem/workers/db_maintainer.py +74 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- rem/workers/unlogged_maintainer.py +463 -0
- remdb-0.3.242.dist-info/METADATA +1632 -0
- remdb-0.3.242.dist-info/RECORD +235 -0
- remdb-0.3.242.dist-info/WHEEL +4 -0
- remdb-0.3.242.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,661 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Schema generation utility from Pydantic models.
|
|
3
|
+
|
|
4
|
+
Generates complete database schemas from:
|
|
5
|
+
1. REM's core models (Resource, Moment, User, etc.)
|
|
6
|
+
2. Models registered via rem.register_model() or rem.register_models()
|
|
7
|
+
3. Models discovered from a directory scan
|
|
8
|
+
|
|
9
|
+
Output includes:
|
|
10
|
+
- Primary tables
|
|
11
|
+
- Embeddings tables
|
|
12
|
+
- KV_STORE triggers
|
|
13
|
+
- Indexes (foreground and background)
|
|
14
|
+
- Migrations
|
|
15
|
+
- Schema table entries (for agent-like table access)
|
|
16
|
+
|
|
17
|
+
Usage:
|
|
18
|
+
from rem.services.postgres.schema_generator import SchemaGenerator
|
|
19
|
+
|
|
20
|
+
# Generate from registry (includes core + registered models)
|
|
21
|
+
generator = SchemaGenerator()
|
|
22
|
+
schema = await generator.generate_from_registry()
|
|
23
|
+
|
|
24
|
+
# Or generate from directory (legacy)
|
|
25
|
+
schema = await generator.generate_from_directory("src/rem/models/entities")
|
|
26
|
+
|
|
27
|
+
# Write to file
|
|
28
|
+
with open("src/rem/sql/schema.sql", "w") as f:
|
|
29
|
+
f.write(schema)
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
import importlib.util
|
|
33
|
+
import inspect
|
|
34
|
+
import json
|
|
35
|
+
import uuid
|
|
36
|
+
from pathlib import Path
|
|
37
|
+
from typing import Any, Type
|
|
38
|
+
|
|
39
|
+
from loguru import logger
|
|
40
|
+
from pydantic import BaseModel
|
|
41
|
+
|
|
42
|
+
from ...settings import settings
|
|
43
|
+
from ...utils.sql_paths import get_package_sql_dir
|
|
44
|
+
from .register_type import register_type, should_embed_field
|
|
45
|
+
|
|
46
|
+
# Namespace UUID for generating deterministic UUIDs from model names
|
|
47
|
+
# Using UUID5 with this namespace ensures same model always gets same UUID
|
|
48
|
+
REM_SCHEMA_NAMESPACE = uuid.UUID("6ba7b810-9dad-11d1-80b4-00c04fd430c8") # DNS namespace
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def generate_model_uuid(fully_qualified_name: str) -> uuid.UUID:
|
|
52
|
+
"""
|
|
53
|
+
Generate deterministic UUID from fully qualified model name.
|
|
54
|
+
|
|
55
|
+
Uses UUID5 (SHA-1 hash) with REM namespace for reproducibility.
|
|
56
|
+
Same fully qualified name always produces same UUID.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
fully_qualified_name: Full module path, e.g., "rem.models.entities.Resource"
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Deterministic UUID for this model
|
|
63
|
+
"""
|
|
64
|
+
return uuid.uuid5(REM_SCHEMA_NAMESPACE, fully_qualified_name)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def extract_model_schema_metadata(
|
|
68
|
+
model: Type[BaseModel],
|
|
69
|
+
table_name: str,
|
|
70
|
+
entity_key_field: str,
|
|
71
|
+
include_search_tool: bool = True,
|
|
72
|
+
) -> dict[str, Any]:
|
|
73
|
+
"""
|
|
74
|
+
Extract schema metadata from a Pydantic model for schemas table.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
model: Pydantic model class
|
|
78
|
+
table_name: Database table name
|
|
79
|
+
entity_key_field: Field used as entity key in kv_store
|
|
80
|
+
include_search_tool: If True, add search_rem tool for querying this table
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
Dict with schema metadata ready for schemas table insert
|
|
84
|
+
"""
|
|
85
|
+
# Get fully qualified name
|
|
86
|
+
fqn = f"{model.__module__}.{model.__name__}"
|
|
87
|
+
|
|
88
|
+
# Generate deterministic UUID
|
|
89
|
+
schema_id = generate_model_uuid(fqn)
|
|
90
|
+
|
|
91
|
+
# Get JSON schema from Pydantic
|
|
92
|
+
json_schema = model.model_json_schema()
|
|
93
|
+
|
|
94
|
+
# Find embedding fields
|
|
95
|
+
embedding_fields = []
|
|
96
|
+
for field_name, field_info in model.model_fields.items():
|
|
97
|
+
if should_embed_field(field_name, field_info):
|
|
98
|
+
embedding_fields.append(field_name)
|
|
99
|
+
|
|
100
|
+
# Build description with search capability note
|
|
101
|
+
base_description = model.__doc__ or f"Schema for {model.__name__}"
|
|
102
|
+
search_note = (
|
|
103
|
+
f"\n\nThis agent can search the `{table_name}` table using the `search_rem` tool. "
|
|
104
|
+
f"Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, "
|
|
105
|
+
f"SEARCH for semantic similarity, or SQL for complex queries."
|
|
106
|
+
) if include_search_tool else ""
|
|
107
|
+
|
|
108
|
+
# Build spec with table metadata and tools
|
|
109
|
+
# Note: default_search_table is used by create_agent to append a description
|
|
110
|
+
# suffix to the search_rem tool when loading it dynamically
|
|
111
|
+
has_embeddings = bool(embedding_fields)
|
|
112
|
+
|
|
113
|
+
spec = {
|
|
114
|
+
"type": "object",
|
|
115
|
+
"description": base_description + search_note,
|
|
116
|
+
"properties": json_schema.get("properties", {}),
|
|
117
|
+
"required": json_schema.get("required", []),
|
|
118
|
+
"json_schema_extra": {
|
|
119
|
+
"table_name": table_name,
|
|
120
|
+
"entity_key_field": entity_key_field,
|
|
121
|
+
"embedding_fields": embedding_fields,
|
|
122
|
+
"fully_qualified_name": fqn,
|
|
123
|
+
"tools": ["search_rem"] if include_search_tool else [],
|
|
124
|
+
"default_search_table": table_name,
|
|
125
|
+
"has_embeddings": has_embeddings,
|
|
126
|
+
},
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
# Build content (documentation)
|
|
130
|
+
content = f"""# {model.__name__}
|
|
131
|
+
|
|
132
|
+
{base_description}
|
|
133
|
+
|
|
134
|
+
## Overview
|
|
135
|
+
|
|
136
|
+
The `{model.__name__}` entity is stored in the `{table_name}` table. Each record is uniquely
|
|
137
|
+
identified by its `{entity_key_field}` field for lookups and graph traversal.
|
|
138
|
+
|
|
139
|
+
## Search Capabilities
|
|
140
|
+
|
|
141
|
+
This schema includes the `search_rem` tool which supports:
|
|
142
|
+
- **LOOKUP**: O(1) exact match by {entity_key_field} (e.g., `LOOKUP "entity-name"`)
|
|
143
|
+
- **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
|
|
144
|
+
- **SEARCH**: Semantic vector search on {', '.join(embedding_fields) if embedding_fields else 'content'} (e.g., `SEARCH "concept" FROM {table_name} LIMIT 10`)
|
|
145
|
+
- **SQL**: Complex queries (e.g., `SELECT * FROM {table_name} WHERE ...`)
|
|
146
|
+
|
|
147
|
+
## Table Info
|
|
148
|
+
|
|
149
|
+
| Property | Value |
|
|
150
|
+
|----------|-------|
|
|
151
|
+
| Table | `{table_name}` |
|
|
152
|
+
| Entity Key | `{entity_key_field}` |
|
|
153
|
+
| Embedding Fields | {', '.join(f'`{f}`' for f in embedding_fields) if embedding_fields else 'None'} |
|
|
154
|
+
| Tools | {', '.join(['`search_rem`'] if include_search_tool else ['None'])} |
|
|
155
|
+
|
|
156
|
+
## Fields
|
|
157
|
+
|
|
158
|
+
"""
|
|
159
|
+
for field_name, field_info in model.model_fields.items():
|
|
160
|
+
field_type = str(field_info.annotation) if field_info.annotation else "Any"
|
|
161
|
+
field_desc = field_info.description or ""
|
|
162
|
+
required = "Required" if field_info.is_required() else "Optional"
|
|
163
|
+
content += f"### `{field_name}`\n"
|
|
164
|
+
content += f"- **Type**: `{field_type}`\n"
|
|
165
|
+
content += f"- **{required}**\n"
|
|
166
|
+
if field_desc:
|
|
167
|
+
content += f"- {field_desc}\n"
|
|
168
|
+
content += "\n"
|
|
169
|
+
|
|
170
|
+
return {
|
|
171
|
+
"id": str(schema_id),
|
|
172
|
+
"name": model.__name__,
|
|
173
|
+
"table_name": table_name,
|
|
174
|
+
"entity_key_field": entity_key_field,
|
|
175
|
+
"embedding_fields": embedding_fields,
|
|
176
|
+
"fqn": fqn,
|
|
177
|
+
"spec": spec,
|
|
178
|
+
"content": content,
|
|
179
|
+
"category": "entity",
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def generate_schema_upsert_sql(schema_metadata: dict[str, Any]) -> str:
|
|
184
|
+
"""
|
|
185
|
+
Generate SQL UPSERT statement for schemas table.
|
|
186
|
+
|
|
187
|
+
Uses ON CONFLICT DO UPDATE for idempotency.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
schema_metadata: Dict from extract_model_schema_metadata()
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
SQL INSERT ... ON CONFLICT statement
|
|
194
|
+
"""
|
|
195
|
+
# Escape single quotes in content and spec
|
|
196
|
+
content_escaped = schema_metadata["content"].replace("'", "''")
|
|
197
|
+
spec_json = json.dumps(schema_metadata["spec"]).replace("'", "''")
|
|
198
|
+
|
|
199
|
+
sql = f"""
|
|
200
|
+
-- Schema entry for {schema_metadata['name']} ({schema_metadata['table_name']})
|
|
201
|
+
INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
|
|
202
|
+
VALUES (
|
|
203
|
+
'{schema_metadata['id']}'::uuid,
|
|
204
|
+
'system',
|
|
205
|
+
'{schema_metadata['name']}',
|
|
206
|
+
'{content_escaped}',
|
|
207
|
+
'{spec_json}'::jsonb,
|
|
208
|
+
'entity',
|
|
209
|
+
'{{"table_name": "{schema_metadata['table_name']}", "entity_key_field": "{schema_metadata['entity_key_field']}", "embedding_fields": {json.dumps(schema_metadata['embedding_fields'])}, "fqn": "{schema_metadata['fqn']}"}}'::jsonb
|
|
210
|
+
)
|
|
211
|
+
ON CONFLICT (id) DO UPDATE SET
|
|
212
|
+
name = EXCLUDED.name,
|
|
213
|
+
content = EXCLUDED.content,
|
|
214
|
+
spec = EXCLUDED.spec,
|
|
215
|
+
category = EXCLUDED.category,
|
|
216
|
+
metadata = EXCLUDED.metadata,
|
|
217
|
+
updated_at = CURRENT_TIMESTAMP;
|
|
218
|
+
"""
|
|
219
|
+
return sql.strip()
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
class SchemaGenerator:
|
|
223
|
+
"""
|
|
224
|
+
Generate database schema from Pydantic models in a directory.
|
|
225
|
+
|
|
226
|
+
Discovers all Pydantic models in Python files and generates:
|
|
227
|
+
- CREATE TABLE statements
|
|
228
|
+
- Embeddings tables
|
|
229
|
+
- KV_STORE triggers
|
|
230
|
+
- Indexes
|
|
231
|
+
"""
|
|
232
|
+
|
|
233
|
+
def __init__(self, output_dir: Path | None = None):
|
|
234
|
+
"""
|
|
235
|
+
Initialize schema generator.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
output_dir: Optional directory for output files (defaults to package sql dir)
|
|
239
|
+
"""
|
|
240
|
+
self.output_dir = output_dir or get_package_sql_dir()
|
|
241
|
+
self.schemas: dict[str, dict] = {}
|
|
242
|
+
|
|
243
|
+
def discover_models(self, directory: str | Path) -> dict[str, Type[BaseModel]]:
|
|
244
|
+
"""
|
|
245
|
+
Discover all Pydantic models in a directory.
|
|
246
|
+
|
|
247
|
+
Args:
|
|
248
|
+
directory: Path to directory containing Python files with models
|
|
249
|
+
|
|
250
|
+
Returns:
|
|
251
|
+
Dict mapping model name to model class
|
|
252
|
+
"""
|
|
253
|
+
import sys
|
|
254
|
+
import importlib
|
|
255
|
+
|
|
256
|
+
directory = Path(directory).resolve()
|
|
257
|
+
models = {}
|
|
258
|
+
|
|
259
|
+
logger.info(f"Discovering models in {directory}")
|
|
260
|
+
|
|
261
|
+
# Add src directory to Python path to handle relative imports
|
|
262
|
+
src_dir = directory
|
|
263
|
+
while src_dir.name != "src" and src_dir.parent != src_dir:
|
|
264
|
+
src_dir = src_dir.parent
|
|
265
|
+
|
|
266
|
+
if src_dir.name == "src" and str(src_dir) not in sys.path:
|
|
267
|
+
sys.path.insert(0, str(src_dir))
|
|
268
|
+
logger.debug(f"Added {src_dir} to sys.path for relative imports")
|
|
269
|
+
|
|
270
|
+
# Convert directory path to module path
|
|
271
|
+
# e.g., /path/to/src/rem/models/entities -> rem.models.entities
|
|
272
|
+
try:
|
|
273
|
+
rel_path = directory.relative_to(src_dir)
|
|
274
|
+
module_path = str(rel_path).replace("/", ".")
|
|
275
|
+
|
|
276
|
+
# Import the package to get all submodules
|
|
277
|
+
package = importlib.import_module(module_path)
|
|
278
|
+
|
|
279
|
+
# Find all Python files in the directory
|
|
280
|
+
for py_file in directory.rglob("*.py"):
|
|
281
|
+
if py_file.name.startswith("_"):
|
|
282
|
+
continue
|
|
283
|
+
|
|
284
|
+
try:
|
|
285
|
+
# Build module name from file path
|
|
286
|
+
rel_file = py_file.relative_to(src_dir)
|
|
287
|
+
module_name = str(rel_file.with_suffix("")).replace("/", ".")
|
|
288
|
+
|
|
289
|
+
# Import the module
|
|
290
|
+
module = importlib.import_module(module_name)
|
|
291
|
+
|
|
292
|
+
# Find Pydantic models
|
|
293
|
+
for name, obj in inspect.getmembers(module):
|
|
294
|
+
if (
|
|
295
|
+
inspect.isclass(obj)
|
|
296
|
+
and issubclass(obj, BaseModel)
|
|
297
|
+
and obj is not BaseModel
|
|
298
|
+
and not name.startswith("_")
|
|
299
|
+
# Only include models defined in this module
|
|
300
|
+
and obj.__module__ == module_name
|
|
301
|
+
):
|
|
302
|
+
models[name] = obj
|
|
303
|
+
logger.debug(f"Found model: {name} in {module_name}")
|
|
304
|
+
|
|
305
|
+
except Exception as e:
|
|
306
|
+
logger.warning(f"Failed to load {py_file}: {e}")
|
|
307
|
+
|
|
308
|
+
except Exception as e:
|
|
309
|
+
logger.error(f"Failed to discover models in {directory}: {e}")
|
|
310
|
+
|
|
311
|
+
logger.info(f"Discovered {len(models)} models")
|
|
312
|
+
return models
|
|
313
|
+
|
|
314
|
+
def infer_table_name(self, model: Type[BaseModel]) -> str:
|
|
315
|
+
"""
|
|
316
|
+
Infer table name from model class name.
|
|
317
|
+
|
|
318
|
+
Converts CamelCase to snake_case and pluralizes.
|
|
319
|
+
|
|
320
|
+
Examples:
|
|
321
|
+
Resource -> resources
|
|
322
|
+
UserProfile -> user_profiles
|
|
323
|
+
Message -> messages
|
|
324
|
+
|
|
325
|
+
Args:
|
|
326
|
+
model: Pydantic model class
|
|
327
|
+
|
|
328
|
+
Returns:
|
|
329
|
+
Table name
|
|
330
|
+
"""
|
|
331
|
+
import re
|
|
332
|
+
|
|
333
|
+
name = model.__name__
|
|
334
|
+
|
|
335
|
+
# Convert CamelCase to snake_case
|
|
336
|
+
name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name)
|
|
337
|
+
name = re.sub("([a-z0-9])([A-Z])", r"\1_\2", name).lower()
|
|
338
|
+
|
|
339
|
+
# Simple pluralization (add 's' if doesn't end in 's')
|
|
340
|
+
if not name.endswith("s"):
|
|
341
|
+
if name.endswith("y"):
|
|
342
|
+
name = name[:-1] + "ies" # category -> categories
|
|
343
|
+
else:
|
|
344
|
+
name = name + "s" # resource -> resources
|
|
345
|
+
|
|
346
|
+
return name
|
|
347
|
+
|
|
348
|
+
def infer_entity_key_field(self, model: Type[BaseModel]) -> str:
|
|
349
|
+
"""
|
|
350
|
+
Infer which field to use as entity_key in KV_STORE.
|
|
351
|
+
|
|
352
|
+
Priority:
|
|
353
|
+
1. Field with json_schema_extra={\"entity_key\": True}
|
|
354
|
+
2. Field named \"name\" (human-readable identifier)
|
|
355
|
+
3. Field named \"key\"
|
|
356
|
+
4. Field named \"uri\"
|
|
357
|
+
5. Field named \"id\" (fallback)
|
|
358
|
+
|
|
359
|
+
Args:
|
|
360
|
+
model: Pydantic model class
|
|
361
|
+
|
|
362
|
+
Returns:
|
|
363
|
+
Field name to use as entity_key
|
|
364
|
+
"""
|
|
365
|
+
# Check for explicit entity_key marker
|
|
366
|
+
for field_name, field_info in model.model_fields.items():
|
|
367
|
+
json_extra = getattr(field_info, "json_schema_extra", None)
|
|
368
|
+
if json_extra and isinstance(json_extra, dict):
|
|
369
|
+
if json_extra.get("entity_key"):
|
|
370
|
+
return field_name
|
|
371
|
+
|
|
372
|
+
# Check for key fields in priority order: name -> key -> uri -> id
|
|
373
|
+
# (matching sql_builder.get_entity_key convention)
|
|
374
|
+
for candidate in ["name", "key", "uri", "id"]:
|
|
375
|
+
if candidate in model.model_fields:
|
|
376
|
+
return candidate
|
|
377
|
+
|
|
378
|
+
# Should never reach here for CoreModel subclasses (they all have id)
|
|
379
|
+
logger.error(f"No suitable entity_key field found for {model.__name__}, using 'id'")
|
|
380
|
+
return "id"
|
|
381
|
+
|
|
382
|
+
async def generate_schema_for_model(
|
|
383
|
+
self,
|
|
384
|
+
model: Type[BaseModel],
|
|
385
|
+
table_name: str | None = None,
|
|
386
|
+
entity_key_field: str | None = None,
|
|
387
|
+
) -> dict:
|
|
388
|
+
"""
|
|
389
|
+
Generate schema for a single model.
|
|
390
|
+
|
|
391
|
+
Args:
|
|
392
|
+
model: Pydantic model class
|
|
393
|
+
table_name: Optional table name (inferred if not provided)
|
|
394
|
+
entity_key_field: Optional entity key field (inferred if not provided)
|
|
395
|
+
|
|
396
|
+
Returns:
|
|
397
|
+
Dict with SQL statements and metadata
|
|
398
|
+
"""
|
|
399
|
+
if table_name is None:
|
|
400
|
+
table_name = self.infer_table_name(model)
|
|
401
|
+
|
|
402
|
+
if entity_key_field is None:
|
|
403
|
+
entity_key_field = self.infer_entity_key_field(model)
|
|
404
|
+
|
|
405
|
+
logger.info(f"Generating schema for {model.__name__} -> {table_name}")
|
|
406
|
+
|
|
407
|
+
schema = await register_type(
|
|
408
|
+
model=model,
|
|
409
|
+
table_name=table_name,
|
|
410
|
+
entity_key_field=entity_key_field,
|
|
411
|
+
tenant_scoped=True,
|
|
412
|
+
create_embeddings=True,
|
|
413
|
+
create_kv_trigger=True,
|
|
414
|
+
)
|
|
415
|
+
|
|
416
|
+
# Extract schema metadata for schemas table entry
|
|
417
|
+
schema_metadata = extract_model_schema_metadata(
|
|
418
|
+
model=model,
|
|
419
|
+
table_name=table_name,
|
|
420
|
+
entity_key_field=entity_key_field,
|
|
421
|
+
)
|
|
422
|
+
schema["schema_metadata"] = schema_metadata
|
|
423
|
+
|
|
424
|
+
self.schemas[table_name] = schema
|
|
425
|
+
return schema
|
|
426
|
+
|
|
427
|
+
async def generate_from_registry(
|
|
428
|
+
self, output_file: str | None = None, include_core: bool = True
|
|
429
|
+
) -> str:
|
|
430
|
+
"""
|
|
431
|
+
Generate complete schema from the model registry.
|
|
432
|
+
|
|
433
|
+
Includes:
|
|
434
|
+
1. REM's core models (if include_core=True)
|
|
435
|
+
2. Models registered via rem.register_model() or rem.register_models()
|
|
436
|
+
|
|
437
|
+
Args:
|
|
438
|
+
output_file: Optional output file path (relative to output_dir)
|
|
439
|
+
include_core: If True, include REM's core models (default: True)
|
|
440
|
+
|
|
441
|
+
Returns:
|
|
442
|
+
Complete SQL schema as string
|
|
443
|
+
|
|
444
|
+
Example:
|
|
445
|
+
import rem
|
|
446
|
+
from rem.models.core import CoreModel
|
|
447
|
+
|
|
448
|
+
# Register custom model
|
|
449
|
+
@rem.register_model
|
|
450
|
+
class CustomEntity(CoreModel):
|
|
451
|
+
name: str
|
|
452
|
+
|
|
453
|
+
# Generate schema (includes core + custom)
|
|
454
|
+
generator = SchemaGenerator()
|
|
455
|
+
schema = await generator.generate_from_registry()
|
|
456
|
+
"""
|
|
457
|
+
from ...registry import get_model_registry
|
|
458
|
+
|
|
459
|
+
registry = get_model_registry()
|
|
460
|
+
models = registry.get_models(include_core=include_core)
|
|
461
|
+
|
|
462
|
+
logger.info(f"Generating schema from registry: {len(models)} models")
|
|
463
|
+
|
|
464
|
+
# Generate schemas for each model
|
|
465
|
+
for model_name, ext in models.items():
|
|
466
|
+
await self.generate_schema_for_model(
|
|
467
|
+
ext.model,
|
|
468
|
+
table_name=ext.table_name,
|
|
469
|
+
entity_key_field=ext.entity_key_field,
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
return self._generate_sql_output(
|
|
473
|
+
source="model registry",
|
|
474
|
+
output_file=output_file,
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
async def generate_from_directory(
|
|
478
|
+
self, directory: str | Path, output_file: str | None = None
|
|
479
|
+
) -> str:
|
|
480
|
+
"""
|
|
481
|
+
Generate complete schema from all models in a directory.
|
|
482
|
+
|
|
483
|
+
Note: For most use cases, prefer generate_from_registry() which uses
|
|
484
|
+
the model registry pattern.
|
|
485
|
+
|
|
486
|
+
Args:
|
|
487
|
+
directory: Path to directory with Pydantic models
|
|
488
|
+
output_file: Optional output file path (relative to output_dir)
|
|
489
|
+
|
|
490
|
+
Returns:
|
|
491
|
+
Complete SQL schema as string
|
|
492
|
+
"""
|
|
493
|
+
# Discover models
|
|
494
|
+
models = self.discover_models(directory)
|
|
495
|
+
|
|
496
|
+
# Generate schemas for each model
|
|
497
|
+
for model_name, model in models.items():
|
|
498
|
+
await self.generate_schema_for_model(model)
|
|
499
|
+
|
|
500
|
+
return self._generate_sql_output(
|
|
501
|
+
source=f"directory: {directory}",
|
|
502
|
+
output_file=output_file,
|
|
503
|
+
)
|
|
504
|
+
|
|
505
|
+
def _generate_sql_output(
|
|
506
|
+
self, source: str, output_file: str | None = None
|
|
507
|
+
) -> str:
|
|
508
|
+
"""
|
|
509
|
+
Generate SQL output from accumulated schemas.
|
|
510
|
+
|
|
511
|
+
Args:
|
|
512
|
+
source: Description of schema source (for header comment)
|
|
513
|
+
output_file: Optional output file path (relative to output_dir)
|
|
514
|
+
|
|
515
|
+
Returns:
|
|
516
|
+
Complete SQL schema as string
|
|
517
|
+
"""
|
|
518
|
+
import datetime
|
|
519
|
+
|
|
520
|
+
sql_parts = [
|
|
521
|
+
"-- REM Model Schema (install_models.sql)",
|
|
522
|
+
"-- Generated from Pydantic models",
|
|
523
|
+
f"-- Source: {source}",
|
|
524
|
+
f"-- Generated at: {datetime.datetime.now().isoformat()}",
|
|
525
|
+
"--",
|
|
526
|
+
"-- DO NOT EDIT MANUALLY - Regenerate with: rem db schema generate",
|
|
527
|
+
"--",
|
|
528
|
+
"-- This script creates:",
|
|
529
|
+
"-- 1. Primary entity tables",
|
|
530
|
+
"-- 2. Embeddings tables (embeddings_<table>)",
|
|
531
|
+
"-- 3. KV_STORE triggers for cache maintenance",
|
|
532
|
+
"-- 4. Indexes (foreground only, background indexes separate)",
|
|
533
|
+
"-- 5. Schema table entries (for agent-like table access)",
|
|
534
|
+
"",
|
|
535
|
+
"-- ============================================================================",
|
|
536
|
+
"-- PREREQUISITES CHECK",
|
|
537
|
+
"-- ============================================================================",
|
|
538
|
+
"",
|
|
539
|
+
"DO $$",
|
|
540
|
+
"BEGIN",
|
|
541
|
+
" -- Check that install.sql has been run",
|
|
542
|
+
" IF NOT EXISTS (SELECT 1 FROM pg_tables WHERE tablename = 'kv_store') THEN",
|
|
543
|
+
" RAISE EXCEPTION 'KV_STORE table not found. Run migrations/001_install.sql first.';",
|
|
544
|
+
" END IF;",
|
|
545
|
+
"",
|
|
546
|
+
" IF NOT EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'vector') THEN",
|
|
547
|
+
" RAISE EXCEPTION 'pgvector extension not found. Run migrations/001_install.sql first.';",
|
|
548
|
+
" END IF;",
|
|
549
|
+
"",
|
|
550
|
+
" RAISE NOTICE 'Prerequisites check passed';",
|
|
551
|
+
"END $$;",
|
|
552
|
+
"",
|
|
553
|
+
]
|
|
554
|
+
|
|
555
|
+
# Add each table schema
|
|
556
|
+
for table_name, schema in self.schemas.items():
|
|
557
|
+
sql_parts.append("-- " + "=" * 70)
|
|
558
|
+
sql_parts.append(f"-- {table_name.upper()} (Model: {schema['model']})")
|
|
559
|
+
sql_parts.append("-- " + "=" * 70)
|
|
560
|
+
sql_parts.append("")
|
|
561
|
+
|
|
562
|
+
# Primary table
|
|
563
|
+
if "table" in schema["sql"]:
|
|
564
|
+
sql_parts.append(schema["sql"]["table"])
|
|
565
|
+
sql_parts.append("")
|
|
566
|
+
|
|
567
|
+
# Embeddings table
|
|
568
|
+
if "embeddings" in schema["sql"] and schema["sql"]["embeddings"]:
|
|
569
|
+
sql_parts.append(f"-- Embeddings for {table_name}")
|
|
570
|
+
sql_parts.append(schema["sql"]["embeddings"])
|
|
571
|
+
sql_parts.append("")
|
|
572
|
+
|
|
573
|
+
# KV_STORE trigger
|
|
574
|
+
if "kv_trigger" in schema["sql"]:
|
|
575
|
+
sql_parts.append(f"-- KV_STORE trigger for {table_name}")
|
|
576
|
+
sql_parts.append(schema["sql"]["kv_trigger"])
|
|
577
|
+
sql_parts.append("")
|
|
578
|
+
|
|
579
|
+
# Add schema table entries (every entity table is also an "agent")
|
|
580
|
+
sql_parts.append("-- ============================================================================")
|
|
581
|
+
sql_parts.append("-- SCHEMA TABLE ENTRIES")
|
|
582
|
+
sql_parts.append("-- Every entity table gets a schemas entry for agent-like access")
|
|
583
|
+
sql_parts.append("-- ============================================================================")
|
|
584
|
+
sql_parts.append("")
|
|
585
|
+
|
|
586
|
+
for table_name, schema in self.schemas.items():
|
|
587
|
+
if "schema_metadata" in schema:
|
|
588
|
+
schema_upsert = generate_schema_upsert_sql(schema["schema_metadata"])
|
|
589
|
+
sql_parts.append(schema_upsert)
|
|
590
|
+
sql_parts.append("")
|
|
591
|
+
|
|
592
|
+
# Add migration record
|
|
593
|
+
sql_parts.append("-- ============================================================================")
|
|
594
|
+
sql_parts.append("-- RECORD MIGRATION")
|
|
595
|
+
sql_parts.append("-- ============================================================================")
|
|
596
|
+
sql_parts.append("")
|
|
597
|
+
sql_parts.append("INSERT INTO rem_migrations (name, type, version)")
|
|
598
|
+
sql_parts.append("VALUES ('install_models.sql', 'models', '1.0.0')")
|
|
599
|
+
sql_parts.append("ON CONFLICT (name) DO UPDATE")
|
|
600
|
+
sql_parts.append("SET applied_at = CURRENT_TIMESTAMP,")
|
|
601
|
+
sql_parts.append(" applied_by = CURRENT_USER;")
|
|
602
|
+
sql_parts.append("")
|
|
603
|
+
|
|
604
|
+
# Completion message
|
|
605
|
+
sql_parts.append("DO $$")
|
|
606
|
+
sql_parts.append("BEGIN")
|
|
607
|
+
sql_parts.append(" RAISE NOTICE '============================================================';")
|
|
608
|
+
sql_parts.append(f" RAISE NOTICE 'REM Model Schema Applied: {len(self.schemas)} tables';")
|
|
609
|
+
sql_parts.append(" RAISE NOTICE '============================================================';")
|
|
610
|
+
for table_name in sorted(self.schemas.keys()):
|
|
611
|
+
embeddable = len(self.schemas[table_name].get("embeddable_fields", []))
|
|
612
|
+
embed_info = f" ({embeddable} embeddable fields)" if embeddable else ""
|
|
613
|
+
sql_parts.append(f" RAISE NOTICE ' ✓ {table_name}{embed_info}';")
|
|
614
|
+
sql_parts.append(" RAISE NOTICE '';")
|
|
615
|
+
sql_parts.append(" RAISE NOTICE 'Next: Run background indexes if needed';")
|
|
616
|
+
sql_parts.append(" RAISE NOTICE ' rem db migrate --background-indexes';")
|
|
617
|
+
sql_parts.append(" RAISE NOTICE '============================================================';")
|
|
618
|
+
sql_parts.append("END $$;")
|
|
619
|
+
|
|
620
|
+
complete_sql = "\n".join(sql_parts)
|
|
621
|
+
|
|
622
|
+
# Write to file if specified
|
|
623
|
+
if output_file:
|
|
624
|
+
output_path = self.output_dir / output_file
|
|
625
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
626
|
+
output_path.write_text(complete_sql)
|
|
627
|
+
logger.info(f"Schema written to {output_path}")
|
|
628
|
+
|
|
629
|
+
return complete_sql
|
|
630
|
+
|
|
631
|
+
def generate_background_indexes(self) -> str:
|
|
632
|
+
"""
|
|
633
|
+
Generate SQL for background index creation.
|
|
634
|
+
|
|
635
|
+
These indexes are created CONCURRENTLY to avoid blocking writes.
|
|
636
|
+
Should be run after initial data load.
|
|
637
|
+
|
|
638
|
+
Returns:
|
|
639
|
+
SQL for background index creation
|
|
640
|
+
"""
|
|
641
|
+
sql_parts = [
|
|
642
|
+
"-- Background index creation",
|
|
643
|
+
"-- Run AFTER initial data load to avoid blocking writes",
|
|
644
|
+
"",
|
|
645
|
+
]
|
|
646
|
+
|
|
647
|
+
for table_name, schema in self.schemas.items():
|
|
648
|
+
if not schema.get("embeddable_fields"):
|
|
649
|
+
continue
|
|
650
|
+
|
|
651
|
+
embeddings_table = f"embeddings_{table_name}"
|
|
652
|
+
|
|
653
|
+
sql_parts.append(f"-- HNSW vector index for {embeddings_table}")
|
|
654
|
+
sql_parts.append(
|
|
655
|
+
f"CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_{embeddings_table}_vector_hnsw"
|
|
656
|
+
)
|
|
657
|
+
sql_parts.append(f"ON {embeddings_table}")
|
|
658
|
+
sql_parts.append("USING hnsw (embedding vector_cosine_ops);")
|
|
659
|
+
sql_parts.append("")
|
|
660
|
+
|
|
661
|
+
return "\n".join(sql_parts)
|