remdb 0.3.103__py3-none-any.whl → 0.3.118__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/agentic/context.py +28 -24
- rem/agentic/mcp/tool_wrapper.py +29 -3
- rem/agentic/otel/setup.py +92 -4
- rem/agentic/providers/pydantic_ai.py +88 -18
- rem/agentic/schema.py +358 -21
- rem/agentic/tools/rem_tools.py +3 -3
- rem/api/main.py +85 -16
- rem/api/mcp_router/resources.py +1 -1
- rem/api/mcp_router/server.py +18 -4
- rem/api/mcp_router/tools.py +383 -16
- rem/api/routers/admin.py +218 -1
- rem/api/routers/chat/completions.py +30 -3
- rem/api/routers/chat/streaming.py +143 -3
- rem/api/routers/feedback.py +12 -319
- rem/api/routers/query.py +360 -0
- rem/api/routers/shared_sessions.py +13 -13
- rem/cli/commands/README.md +237 -64
- rem/cli/commands/cluster.py +1300 -0
- rem/cli/commands/configure.py +1 -3
- rem/cli/commands/db.py +354 -143
- rem/cli/commands/process.py +14 -8
- rem/cli/commands/schema.py +92 -45
- rem/cli/main.py +27 -6
- rem/models/core/rem_query.py +5 -2
- rem/models/entities/shared_session.py +2 -28
- rem/registry.py +10 -4
- rem/services/content/service.py +30 -8
- rem/services/embeddings/api.py +4 -4
- rem/services/embeddings/worker.py +16 -16
- rem/services/postgres/README.md +151 -26
- rem/services/postgres/__init__.py +2 -1
- rem/services/postgres/diff_service.py +531 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
- rem/services/postgres/schema_generator.py +205 -4
- rem/services/postgres/service.py +6 -6
- rem/services/rem/parser.py +44 -9
- rem/services/rem/service.py +36 -2
- rem/services/session/reload.py +1 -1
- rem/settings.py +56 -7
- rem/sql/background_indexes.sql +19 -24
- rem/sql/migrations/001_install.sql +252 -69
- rem/sql/migrations/002_install_models.sql +2171 -593
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +548 -0
- rem/utils/__init__.py +18 -0
- rem/utils/date_utils.py +2 -2
- rem/utils/schema_loader.py +17 -13
- rem/utils/sql_paths.py +146 -0
- rem/workers/__init__.py +2 -1
- rem/workers/unlogged_maintainer.py +463 -0
- {remdb-0.3.103.dist-info → remdb-0.3.118.dist-info}/METADATA +149 -76
- {remdb-0.3.103.dist-info → remdb-0.3.118.dist-info}/RECORD +54 -48
- rem/sql/migrations/003_seed_default_user.sql +0 -48
- {remdb-0.3.103.dist-info → remdb-0.3.118.dist-info}/WHEEL +0 -0
- {remdb-0.3.103.dist-info → remdb-0.3.118.dist-info}/entry_points.txt +0 -0
rem/cli/commands/schema.py
CHANGED
|
@@ -8,6 +8,7 @@ Usage:
|
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
10
|
import asyncio
|
|
11
|
+
import importlib
|
|
11
12
|
from pathlib import Path
|
|
12
13
|
|
|
13
14
|
import click
|
|
@@ -15,16 +16,35 @@ from loguru import logger
|
|
|
15
16
|
|
|
16
17
|
from ...settings import settings
|
|
17
18
|
from ...services.postgres.schema_generator import SchemaGenerator
|
|
19
|
+
from ...utils.sql_paths import get_package_sql_dir, get_package_migrations_dir
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _import_model_modules() -> list[str]:
|
|
23
|
+
"""
|
|
24
|
+
Import modules specified in MODELS__IMPORT_MODULES setting.
|
|
25
|
+
|
|
26
|
+
This ensures downstream models decorated with @rem.register_model
|
|
27
|
+
are registered before schema generation.
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
List of successfully imported module names
|
|
31
|
+
"""
|
|
32
|
+
imported = []
|
|
33
|
+
for module_name in settings.models.module_list:
|
|
34
|
+
try:
|
|
35
|
+
importlib.import_module(module_name)
|
|
36
|
+
imported.append(module_name)
|
|
37
|
+
logger.debug(f"Imported model module: {module_name}")
|
|
38
|
+
except ImportError as e:
|
|
39
|
+
logger.warning(f"Failed to import model module '{module_name}': {e}")
|
|
40
|
+
click.echo(
|
|
41
|
+
click.style(f" ⚠ Could not import '{module_name}': {e}", fg="yellow"),
|
|
42
|
+
err=True,
|
|
43
|
+
)
|
|
44
|
+
return imported
|
|
18
45
|
|
|
19
46
|
|
|
20
47
|
@click.command()
|
|
21
|
-
@click.option(
|
|
22
|
-
"--models",
|
|
23
|
-
"-m",
|
|
24
|
-
required=True,
|
|
25
|
-
type=click.Path(exists=True, path_type=Path),
|
|
26
|
-
help="Directory containing Pydantic models",
|
|
27
|
-
)
|
|
28
48
|
@click.option(
|
|
29
49
|
"--output",
|
|
30
50
|
"-o",
|
|
@@ -36,13 +56,13 @@ from ...services.postgres.schema_generator import SchemaGenerator
|
|
|
36
56
|
"--output-dir",
|
|
37
57
|
type=click.Path(path_type=Path),
|
|
38
58
|
default=None,
|
|
39
|
-
help=
|
|
59
|
+
help="Base output directory (default: package sql/migrations)",
|
|
40
60
|
)
|
|
41
|
-
def generate(
|
|
61
|
+
def generate(output: Path, output_dir: Path | None):
|
|
42
62
|
"""
|
|
43
|
-
Generate database schema from Pydantic models.
|
|
63
|
+
Generate database schema from registered Pydantic models.
|
|
44
64
|
|
|
45
|
-
|
|
65
|
+
Uses the model registry (core models + user-registered models) to generate:
|
|
46
66
|
- CREATE TABLE statements
|
|
47
67
|
- Embeddings tables (embeddings_<table>)
|
|
48
68
|
- KV_STORE triggers for cache maintenance
|
|
@@ -51,24 +71,53 @@ def generate(models: Path, output: Path, output_dir: Path | None):
|
|
|
51
71
|
Output is written to src/rem/sql/migrations/002_install_models.sql by default.
|
|
52
72
|
|
|
53
73
|
Example:
|
|
54
|
-
rem db schema generate
|
|
74
|
+
rem db schema generate
|
|
75
|
+
|
|
76
|
+
To register custom models in downstream apps:
|
|
77
|
+
|
|
78
|
+
1. Create models with @rem.register_model decorator:
|
|
79
|
+
|
|
80
|
+
# models/__init__.py
|
|
81
|
+
import rem
|
|
82
|
+
from rem.models.core import CoreModel
|
|
83
|
+
|
|
84
|
+
@rem.register_model
|
|
85
|
+
class MyEntity(CoreModel):
|
|
86
|
+
name: str
|
|
87
|
+
|
|
88
|
+
2. Set MODELS__IMPORT_MODULES in your .env:
|
|
89
|
+
|
|
90
|
+
MODELS__IMPORT_MODULES=models
|
|
91
|
+
|
|
92
|
+
3. Run schema generation:
|
|
93
|
+
|
|
94
|
+
rem db schema generate
|
|
55
95
|
|
|
56
96
|
This creates:
|
|
57
97
|
- src/rem/sql/migrations/002_install_models.sql - Entity tables and triggers
|
|
58
98
|
- src/rem/sql/background_indexes.sql - HNSW indexes (apply after data load)
|
|
59
99
|
|
|
60
|
-
After generation,
|
|
61
|
-
rem db
|
|
100
|
+
After generation, verify with:
|
|
101
|
+
rem db diff
|
|
62
102
|
"""
|
|
63
|
-
|
|
103
|
+
from ...registry import get_model_registry
|
|
104
|
+
|
|
105
|
+
# Import downstream model modules to trigger @rem.register_model decorators
|
|
106
|
+
imported_modules = _import_model_modules()
|
|
107
|
+
if imported_modules:
|
|
108
|
+
click.echo(f"Imported model modules: {', '.join(imported_modules)}")
|
|
109
|
+
|
|
110
|
+
registry = get_model_registry()
|
|
111
|
+
models = registry.get_models(include_core=True)
|
|
112
|
+
click.echo(f"Generating schema from {len(models)} registered models")
|
|
64
113
|
|
|
65
|
-
# Default to migrations directory
|
|
66
|
-
actual_output_dir = output_dir or
|
|
114
|
+
# Default to package migrations directory
|
|
115
|
+
actual_output_dir = output_dir or get_package_migrations_dir()
|
|
67
116
|
generator = SchemaGenerator(output_dir=actual_output_dir)
|
|
68
117
|
|
|
69
|
-
# Generate schema
|
|
118
|
+
# Generate schema from registry
|
|
70
119
|
try:
|
|
71
|
-
schema_sql = asyncio.run(generator.
|
|
120
|
+
schema_sql = asyncio.run(generator.generate_from_registry(output_file=output.name))
|
|
72
121
|
|
|
73
122
|
click.echo(f"✓ Schema generated: {len(generator.schemas)} tables")
|
|
74
123
|
click.echo(f"✓ Written to: {actual_output_dir / output.name}")
|
|
@@ -76,7 +125,7 @@ def generate(models: Path, output: Path, output_dir: Path | None):
|
|
|
76
125
|
# Generate background indexes in parent sql dir
|
|
77
126
|
background_indexes = generator.generate_background_indexes()
|
|
78
127
|
if background_indexes:
|
|
79
|
-
bg_file =
|
|
128
|
+
bg_file = get_package_sql_dir() / "background_indexes.sql"
|
|
80
129
|
bg_file.write_text(background_indexes)
|
|
81
130
|
click.echo(f"✓ Background indexes: {bg_file}")
|
|
82
131
|
|
|
@@ -94,48 +143,46 @@ def generate(models: Path, output: Path, output_dir: Path | None):
|
|
|
94
143
|
|
|
95
144
|
|
|
96
145
|
@click.command()
|
|
97
|
-
|
|
98
|
-
"--models",
|
|
99
|
-
"-m",
|
|
100
|
-
required=True,
|
|
101
|
-
type=click.Path(exists=True, path_type=Path),
|
|
102
|
-
help="Directory containing Pydantic models",
|
|
103
|
-
)
|
|
104
|
-
def validate(models: Path):
|
|
146
|
+
def validate():
|
|
105
147
|
"""
|
|
106
|
-
Validate Pydantic models for schema generation.
|
|
148
|
+
Validate registered Pydantic models for schema generation.
|
|
107
149
|
|
|
108
150
|
Checks:
|
|
109
|
-
- Models can be loaded
|
|
151
|
+
- Models can be loaded from registry
|
|
110
152
|
- Models have suitable entity_key fields
|
|
111
153
|
- Fields with embeddings are properly configured
|
|
154
|
+
|
|
155
|
+
Set MODELS__IMPORT_MODULES to include custom models from downstream apps.
|
|
112
156
|
"""
|
|
113
|
-
|
|
157
|
+
from ...registry import get_model_registry
|
|
114
158
|
|
|
115
|
-
|
|
116
|
-
|
|
159
|
+
# Import downstream model modules to trigger @rem.register_model decorators
|
|
160
|
+
imported_modules = _import_model_modules()
|
|
161
|
+
if imported_modules:
|
|
162
|
+
click.echo(f"Imported model modules: {', '.join(imported_modules)}")
|
|
117
163
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
164
|
+
registry = get_model_registry()
|
|
165
|
+
models = registry.get_models(include_core=True)
|
|
166
|
+
|
|
167
|
+
click.echo(f"Validating {len(models)} registered models")
|
|
121
168
|
|
|
122
|
-
|
|
169
|
+
if not models:
|
|
170
|
+
click.echo("✗ No models found in registry", err=True)
|
|
171
|
+
raise click.Abort()
|
|
123
172
|
|
|
173
|
+
generator = SchemaGenerator()
|
|
124
174
|
errors: list[str] = []
|
|
125
175
|
warnings: list[str] = []
|
|
126
176
|
|
|
127
|
-
for model_name,
|
|
128
|
-
|
|
129
|
-
|
|
177
|
+
for model_name, ext in models.items():
|
|
178
|
+
model = ext.model
|
|
179
|
+
table_name = ext.table_name or generator.infer_table_name(model)
|
|
180
|
+
entity_key = ext.entity_key_field or generator.infer_entity_key_field(model)
|
|
130
181
|
|
|
131
182
|
# Check for entity_key
|
|
132
183
|
if entity_key == "id":
|
|
133
184
|
warnings.append(f"{model_name}: No natural key field, using 'id'")
|
|
134
185
|
|
|
135
|
-
# Check for embeddable fields
|
|
136
|
-
# TODO: Implement should_embed_field check
|
|
137
|
-
embeddable: list[str] = [] # Placeholder - needs implementation
|
|
138
|
-
|
|
139
186
|
click.echo(f" {model_name} -> {table_name} (key: {entity_key})")
|
|
140
187
|
|
|
141
188
|
if warnings:
|
|
@@ -158,7 +205,7 @@ def validate(models: Path):
|
|
|
158
205
|
"-o",
|
|
159
206
|
type=click.Path(path_type=Path),
|
|
160
207
|
default=None,
|
|
161
|
-
help=
|
|
208
|
+
help="Output file for background indexes (default: package sql/background_indexes.sql)",
|
|
162
209
|
)
|
|
163
210
|
def indexes(output: Path):
|
|
164
211
|
"""
|
rem/cli/main.py
CHANGED
|
@@ -22,17 +22,30 @@ except Exception:
|
|
|
22
22
|
__version__ = "unknown"
|
|
23
23
|
|
|
24
24
|
|
|
25
|
+
def _configure_logger(level: str):
|
|
26
|
+
"""Configure loguru with custom level icons."""
|
|
27
|
+
logger.remove()
|
|
28
|
+
|
|
29
|
+
# Configure level icons - only warnings and errors get visual indicators
|
|
30
|
+
logger.level("DEBUG", icon=" ")
|
|
31
|
+
logger.level("INFO", icon=" ")
|
|
32
|
+
logger.level("WARNING", icon="🟠")
|
|
33
|
+
logger.level("ERROR", icon="🔴")
|
|
34
|
+
logger.level("CRITICAL", icon="🔴")
|
|
35
|
+
|
|
36
|
+
logger.add(
|
|
37
|
+
sys.stderr,
|
|
38
|
+
level=level,
|
|
39
|
+
format="<green>{time:HH:mm:ss}</green> | {level.icon} <level>{level: <8}</level> | <level>{message}</level>",
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
25
43
|
@click.group()
|
|
26
44
|
@click.option("--verbose", "-v", is_flag=True, help="Enable verbose logging")
|
|
27
45
|
@click.version_option(version=__version__, prog_name="rem")
|
|
28
46
|
def cli(verbose: bool):
|
|
29
47
|
"""REM - Resources Entities Moments system CLI."""
|
|
30
|
-
if verbose
|
|
31
|
-
logger.remove()
|
|
32
|
-
logger.add(sys.stderr, level="DEBUG")
|
|
33
|
-
else:
|
|
34
|
-
logger.remove()
|
|
35
|
-
logger.add(sys.stderr, level="INFO")
|
|
48
|
+
_configure_logger("DEBUG" if verbose else "INFO")
|
|
36
49
|
|
|
37
50
|
|
|
38
51
|
@cli.group()
|
|
@@ -65,6 +78,12 @@ def dreaming():
|
|
|
65
78
|
pass
|
|
66
79
|
|
|
67
80
|
|
|
81
|
+
@cli.group()
|
|
82
|
+
def cluster():
|
|
83
|
+
"""Kubernetes cluster deployment and management."""
|
|
84
|
+
pass
|
|
85
|
+
|
|
86
|
+
|
|
68
87
|
# Register commands
|
|
69
88
|
from .commands.schema import register_commands as register_schema_commands
|
|
70
89
|
from .commands.db import register_commands as register_db_commands
|
|
@@ -76,11 +95,13 @@ from .commands.configure import register_command as register_configure_command
|
|
|
76
95
|
from .commands.serve import register_command as register_serve_command
|
|
77
96
|
from .commands.mcp import register_command as register_mcp_command
|
|
78
97
|
from .commands.scaffold import scaffold as scaffold_command
|
|
98
|
+
from .commands.cluster import register_commands as register_cluster_commands
|
|
79
99
|
|
|
80
100
|
register_schema_commands(schema)
|
|
81
101
|
register_db_commands(db)
|
|
82
102
|
register_process_commands(process)
|
|
83
103
|
register_dreaming_commands(dreaming)
|
|
104
|
+
register_cluster_commands(cluster)
|
|
84
105
|
register_ask_command(cli)
|
|
85
106
|
register_configure_command(cli)
|
|
86
107
|
register_serve_command(cli)
|
rem/models/core/rem_query.py
CHANGED
|
@@ -112,7 +112,7 @@ class SearchParameters(BaseModel):
|
|
|
112
112
|
table_name: str = Field(..., description="Table to search (resources, moments, etc.)")
|
|
113
113
|
limit: int = Field(default=10, gt=0, description="Maximum results")
|
|
114
114
|
min_similarity: float = Field(
|
|
115
|
-
default=0.
|
|
115
|
+
default=0.3, ge=0.0, le=1.0, description="Minimum similarity score (0.3 recommended for general queries)"
|
|
116
116
|
)
|
|
117
117
|
|
|
118
118
|
|
|
@@ -198,7 +198,10 @@ class RemQuery(BaseModel):
|
|
|
198
198
|
| SQLParameters
|
|
199
199
|
| TraverseParameters
|
|
200
200
|
) = Field(..., description="Query parameters")
|
|
201
|
-
user_id: str = Field(
|
|
201
|
+
user_id: Optional[str] = Field(
|
|
202
|
+
default=None,
|
|
203
|
+
description="User identifier (UUID5 hash of email). None = anonymous (shared/public data only)"
|
|
204
|
+
)
|
|
202
205
|
|
|
203
206
|
|
|
204
207
|
class TraverseStage(BaseModel):
|
|
@@ -111,28 +111,20 @@ To permanently delete, an admin can run:
|
|
|
111
111
|
|
|
112
112
|
from datetime import datetime
|
|
113
113
|
from typing import Optional
|
|
114
|
-
from uuid import UUID
|
|
115
114
|
|
|
116
115
|
from pydantic import BaseModel, Field
|
|
117
116
|
|
|
118
|
-
from
|
|
117
|
+
from ..core import CoreModel
|
|
119
118
|
|
|
120
119
|
|
|
121
|
-
class SharedSession(
|
|
120
|
+
class SharedSession(CoreModel):
|
|
122
121
|
"""
|
|
123
122
|
Session sharing record between users.
|
|
124
123
|
|
|
125
124
|
Links a session (identified by session_id from Message records) to a
|
|
126
125
|
recipient user, enabling collaborative access to conversation history.
|
|
127
|
-
|
|
128
|
-
This is NOT a CoreModel - it's a lightweight linking table without
|
|
129
|
-
graph edges, metadata, or embeddings.
|
|
130
126
|
"""
|
|
131
127
|
|
|
132
|
-
id: Optional[UUID] = Field(
|
|
133
|
-
default=None,
|
|
134
|
-
description="Unique identifier (auto-generated)",
|
|
135
|
-
)
|
|
136
128
|
session_id: str = Field(
|
|
137
129
|
...,
|
|
138
130
|
description="The session being shared (matches Message.session_id)",
|
|
@@ -145,24 +137,6 @@ class SharedSession(BaseModel):
|
|
|
145
137
|
...,
|
|
146
138
|
description="User ID of the recipient (who can now view the session)",
|
|
147
139
|
)
|
|
148
|
-
tenant_id: str = Field(
|
|
149
|
-
default="default",
|
|
150
|
-
description="Tenant identifier for multi-tenancy isolation",
|
|
151
|
-
)
|
|
152
|
-
created_at: datetime = Field(
|
|
153
|
-
default_factory=utc_now,
|
|
154
|
-
description="When the share was created",
|
|
155
|
-
)
|
|
156
|
-
updated_at: datetime = Field(
|
|
157
|
-
default_factory=utc_now,
|
|
158
|
-
description="Last modification timestamp",
|
|
159
|
-
)
|
|
160
|
-
deleted_at: Optional[datetime] = Field(
|
|
161
|
-
default=None,
|
|
162
|
-
description="Soft delete timestamp (null = active share)",
|
|
163
|
-
)
|
|
164
|
-
|
|
165
|
-
model_config = {"from_attributes": True}
|
|
166
140
|
|
|
167
141
|
|
|
168
142
|
class SharedSessionCreate(BaseModel):
|
rem/registry.py
CHANGED
|
@@ -123,6 +123,7 @@ class ModelRegistry:
|
|
|
123
123
|
return
|
|
124
124
|
|
|
125
125
|
from .models.entities import (
|
|
126
|
+
Feedback,
|
|
126
127
|
File,
|
|
127
128
|
ImageResource,
|
|
128
129
|
Message,
|
|
@@ -131,19 +132,24 @@ class ModelRegistry:
|
|
|
131
132
|
OntologyConfig,
|
|
132
133
|
Resource,
|
|
133
134
|
Schema,
|
|
135
|
+
Session,
|
|
136
|
+
SharedSession,
|
|
134
137
|
User,
|
|
135
138
|
)
|
|
136
139
|
|
|
137
140
|
core_models = [
|
|
138
|
-
|
|
141
|
+
Feedback,
|
|
142
|
+
File,
|
|
139
143
|
ImageResource,
|
|
140
144
|
Message,
|
|
141
|
-
User,
|
|
142
|
-
File,
|
|
143
145
|
Moment,
|
|
144
|
-
Schema,
|
|
145
146
|
Ontology,
|
|
146
147
|
OntologyConfig,
|
|
148
|
+
Resource,
|
|
149
|
+
Schema,
|
|
150
|
+
Session,
|
|
151
|
+
SharedSession,
|
|
152
|
+
User,
|
|
147
153
|
]
|
|
148
154
|
|
|
149
155
|
for model in core_models:
|
rem/services/content/service.py
CHANGED
|
@@ -370,11 +370,32 @@ class ContentService:
|
|
|
370
370
|
file_size = len(file_content)
|
|
371
371
|
logger.info(f"Read {file_size} bytes from {file_uri} (source: {source_type})")
|
|
372
372
|
|
|
373
|
-
# Step
|
|
373
|
+
# Step 1.5: Early schema detection for YAML/JSON files
|
|
374
|
+
# Skip File entity creation for schemas (agents/evaluators)
|
|
375
|
+
file_suffix = Path(file_name).suffix.lower()
|
|
376
|
+
if file_suffix in ['.yaml', '.yml', '.json']:
|
|
377
|
+
import yaml
|
|
378
|
+
import json
|
|
379
|
+
try:
|
|
380
|
+
content_text = file_content.decode('utf-8') if isinstance(file_content, bytes) else file_content
|
|
381
|
+
data = yaml.safe_load(content_text) if file_suffix in ['.yaml', '.yml'] else json.loads(content_text)
|
|
382
|
+
if isinstance(data, dict):
|
|
383
|
+
json_schema_extra = data.get('json_schema_extra', {})
|
|
384
|
+
kind = json_schema_extra.get('kind', '')
|
|
385
|
+
if kind in ['agent', 'evaluator']:
|
|
386
|
+
# Route directly to schema processing, skip File entity
|
|
387
|
+
logger.info(f"Detected {kind} schema: {file_name}, routing to _process_schema")
|
|
388
|
+
result = self.process_uri(file_uri)
|
|
389
|
+
return await self._process_schema(result, file_uri, user_id)
|
|
390
|
+
except Exception as e:
|
|
391
|
+
logger.debug(f"Early schema detection failed for {file_name}: {e}")
|
|
392
|
+
# Fall through to standard file processing
|
|
393
|
+
|
|
394
|
+
# Step 2: Write to internal storage (public or user-scoped)
|
|
374
395
|
file_id = str(uuid4())
|
|
375
396
|
storage_uri, internal_key, content_type, _ = await fs_service.write_to_internal_storage(
|
|
376
397
|
content=file_content,
|
|
377
|
-
tenant_id=user_id, #
|
|
398
|
+
tenant_id=user_id or "public", # Storage path: public/ or user_id/
|
|
378
399
|
file_name=file_name,
|
|
379
400
|
file_id=file_id,
|
|
380
401
|
)
|
|
@@ -383,7 +404,7 @@ class ContentService:
|
|
|
383
404
|
# Step 3: Create File entity
|
|
384
405
|
file_entity = File(
|
|
385
406
|
id=file_id,
|
|
386
|
-
tenant_id=user_id, #
|
|
407
|
+
tenant_id=user_id, # None = public/shared
|
|
387
408
|
user_id=user_id,
|
|
388
409
|
name=file_name,
|
|
389
410
|
uri=storage_uri,
|
|
@@ -538,7 +559,7 @@ class ContentService:
|
|
|
538
559
|
size_bytes=result["metadata"].get("size"),
|
|
539
560
|
mime_type=result["metadata"].get("content_type"),
|
|
540
561
|
processing_status="completed",
|
|
541
|
-
tenant_id=user_id
|
|
562
|
+
tenant_id=user_id, # None = public/shared
|
|
542
563
|
user_id=user_id,
|
|
543
564
|
)
|
|
544
565
|
|
|
@@ -571,7 +592,7 @@ class ContentService:
|
|
|
571
592
|
ordinal=i,
|
|
572
593
|
content=chunk,
|
|
573
594
|
category="document",
|
|
574
|
-
tenant_id=user_id
|
|
595
|
+
tenant_id=user_id, # None = public/shared
|
|
575
596
|
user_id=user_id,
|
|
576
597
|
)
|
|
577
598
|
for i, chunk in enumerate(chunks)
|
|
@@ -645,9 +666,10 @@ class ContentService:
|
|
|
645
666
|
# IMPORTANT: category field distinguishes agents from evaluators
|
|
646
667
|
# - kind=agent → category="agent" (AI agents with tools/resources)
|
|
647
668
|
# - kind=evaluator → category="evaluator" (LLM-as-a-Judge evaluators)
|
|
669
|
+
# Schemas (agents/evaluators) default to system tenant for shared access
|
|
648
670
|
schema_entity = Schema(
|
|
649
|
-
tenant_id=
|
|
650
|
-
user_id=
|
|
671
|
+
tenant_id="system",
|
|
672
|
+
user_id=None,
|
|
651
673
|
name=name,
|
|
652
674
|
spec=schema_data,
|
|
653
675
|
category=kind, # Maps kind → category for database filtering
|
|
@@ -717,7 +739,7 @@ class ContentService:
|
|
|
717
739
|
processor = EngramProcessor(postgres)
|
|
718
740
|
result = await processor.process_engram(
|
|
719
741
|
data=data,
|
|
720
|
-
tenant_id=user_id
|
|
742
|
+
tenant_id=user_id, # None = public/shared
|
|
721
743
|
user_id=user_id,
|
|
722
744
|
)
|
|
723
745
|
logger.info(f"✅ Engram processed: {result.get('resource_id')} with {len(result.get('moment_ids', []))} moments")
|
rem/services/embeddings/api.py
CHANGED
|
@@ -45,7 +45,7 @@ def generate_embedding(
|
|
|
45
45
|
return [0.0] * DEFAULT_EMBEDDING_DIMS
|
|
46
46
|
|
|
47
47
|
try:
|
|
48
|
-
logger.
|
|
48
|
+
logger.debug(f"Generating OpenAI embedding for text using {model}")
|
|
49
49
|
|
|
50
50
|
response = requests.post(
|
|
51
51
|
"https://api.openai.com/v1/embeddings",
|
|
@@ -60,7 +60,7 @@ def generate_embedding(
|
|
|
60
60
|
|
|
61
61
|
data = response.json()
|
|
62
62
|
embedding = data["data"][0]["embedding"]
|
|
63
|
-
logger.
|
|
63
|
+
logger.debug(f"Successfully generated embedding (dimension: {len(embedding)})")
|
|
64
64
|
return cast(list[float], embedding)
|
|
65
65
|
|
|
66
66
|
except Exception as e:
|
|
@@ -97,7 +97,7 @@ async def generate_embedding_async(
|
|
|
97
97
|
return [0.0] * DEFAULT_EMBEDDING_DIMS
|
|
98
98
|
|
|
99
99
|
try:
|
|
100
|
-
logger.
|
|
100
|
+
logger.debug(f"Generating OpenAI embedding for text using {model}")
|
|
101
101
|
|
|
102
102
|
async with httpx.AsyncClient() as client:
|
|
103
103
|
response = await client.post(
|
|
@@ -113,7 +113,7 @@ async def generate_embedding_async(
|
|
|
113
113
|
|
|
114
114
|
data = response.json()
|
|
115
115
|
embedding = data["data"][0]["embedding"]
|
|
116
|
-
logger.
|
|
116
|
+
logger.debug(
|
|
117
117
|
f"Successfully generated embedding (dimension: {len(embedding)})"
|
|
118
118
|
)
|
|
119
119
|
return cast(list[float], embedding)
|
|
@@ -69,7 +69,7 @@ def get_global_embedding_worker(postgres_service: Any = None) -> "EmbeddingWorke
|
|
|
69
69
|
if postgres_service is None:
|
|
70
70
|
raise RuntimeError("Must provide postgres_service on first call to get_global_embedding_worker")
|
|
71
71
|
_global_worker = EmbeddingWorker(postgres_service=postgres_service)
|
|
72
|
-
logger.
|
|
72
|
+
logger.debug("Created global EmbeddingWorker singleton")
|
|
73
73
|
|
|
74
74
|
return _global_worker
|
|
75
75
|
|
|
@@ -117,7 +117,7 @@ class EmbeddingWorker:
|
|
|
117
117
|
"No OpenAI API key provided - embeddings will use zero vectors"
|
|
118
118
|
)
|
|
119
119
|
|
|
120
|
-
logger.
|
|
120
|
+
logger.debug(
|
|
121
121
|
f"Initialized EmbeddingWorker: {num_workers} workers, "
|
|
122
122
|
f"batch_size={batch_size}, timeout={batch_timeout}s"
|
|
123
123
|
)
|
|
@@ -125,17 +125,17 @@ class EmbeddingWorker:
|
|
|
125
125
|
async def start(self) -> None:
|
|
126
126
|
"""Start worker pool."""
|
|
127
127
|
if self.running:
|
|
128
|
-
logger.
|
|
128
|
+
logger.debug("EmbeddingWorker already running")
|
|
129
129
|
return
|
|
130
130
|
|
|
131
131
|
self.running = True
|
|
132
|
-
logger.
|
|
132
|
+
logger.debug(f"Starting {self.num_workers} embedding workers")
|
|
133
133
|
|
|
134
134
|
for i in range(self.num_workers):
|
|
135
135
|
worker = asyncio.create_task(self._worker_loop(i))
|
|
136
136
|
self.workers.append(worker)
|
|
137
137
|
|
|
138
|
-
logger.
|
|
138
|
+
logger.debug("EmbeddingWorker started")
|
|
139
139
|
|
|
140
140
|
async def stop(self) -> None:
|
|
141
141
|
"""Stop worker pool gracefully - processes remaining queue before stopping."""
|
|
@@ -143,7 +143,7 @@ class EmbeddingWorker:
|
|
|
143
143
|
return
|
|
144
144
|
|
|
145
145
|
queue_size = self.task_queue.qsize()
|
|
146
|
-
logger.
|
|
146
|
+
logger.debug(f"Stopping EmbeddingWorker (processing {queue_size} queued tasks first)")
|
|
147
147
|
|
|
148
148
|
# Wait for queue to drain (with timeout)
|
|
149
149
|
max_wait = 30 # 30 seconds max
|
|
@@ -171,7 +171,7 @@ class EmbeddingWorker:
|
|
|
171
171
|
await asyncio.gather(*self.workers, return_exceptions=True)
|
|
172
172
|
|
|
173
173
|
self.workers.clear()
|
|
174
|
-
logger.
|
|
174
|
+
logger.debug("EmbeddingWorker stopped")
|
|
175
175
|
|
|
176
176
|
async def queue_task(self, task: EmbeddingTask) -> None:
|
|
177
177
|
"""
|
|
@@ -195,7 +195,7 @@ class EmbeddingWorker:
|
|
|
195
195
|
Args:
|
|
196
196
|
worker_id: Unique worker identifier
|
|
197
197
|
"""
|
|
198
|
-
logger.
|
|
198
|
+
logger.debug(f"Worker {worker_id} started")
|
|
199
199
|
|
|
200
200
|
while self.running:
|
|
201
201
|
try:
|
|
@@ -205,7 +205,7 @@ class EmbeddingWorker:
|
|
|
205
205
|
if not batch:
|
|
206
206
|
continue
|
|
207
207
|
|
|
208
|
-
logger.
|
|
208
|
+
logger.debug(f"Worker {worker_id} processing batch of {len(batch)} tasks")
|
|
209
209
|
|
|
210
210
|
# Generate embeddings for batch
|
|
211
211
|
await self._process_batch(batch)
|
|
@@ -213,14 +213,14 @@ class EmbeddingWorker:
|
|
|
213
213
|
logger.debug(f"Worker {worker_id} completed batch")
|
|
214
214
|
|
|
215
215
|
except asyncio.CancelledError:
|
|
216
|
-
logger.
|
|
216
|
+
logger.debug(f"Worker {worker_id} cancelled")
|
|
217
217
|
break
|
|
218
218
|
except Exception as e:
|
|
219
219
|
logger.error(f"Worker {worker_id} error: {e}", exc_info=True)
|
|
220
220
|
# Continue processing (don't crash worker on error)
|
|
221
221
|
await asyncio.sleep(1)
|
|
222
222
|
|
|
223
|
-
logger.
|
|
223
|
+
logger.debug(f"Worker {worker_id} stopped")
|
|
224
224
|
|
|
225
225
|
async def _collect_batch(self) -> list[EmbeddingTask]:
|
|
226
226
|
"""
|
|
@@ -284,10 +284,10 @@ class EmbeddingWorker:
|
|
|
284
284
|
)
|
|
285
285
|
|
|
286
286
|
# Upsert to database
|
|
287
|
-
logger.
|
|
287
|
+
logger.debug(f"Upserting {len(embeddings)} embeddings to database...")
|
|
288
288
|
await self._upsert_embeddings(batch, embeddings)
|
|
289
289
|
|
|
290
|
-
logger.
|
|
290
|
+
logger.debug(
|
|
291
291
|
f"Successfully generated and stored {len(embeddings)} embeddings "
|
|
292
292
|
f"(provider={provider}, model={model})"
|
|
293
293
|
)
|
|
@@ -315,7 +315,7 @@ class EmbeddingWorker:
|
|
|
315
315
|
"""
|
|
316
316
|
if provider == "openai" and self.openai_api_key:
|
|
317
317
|
try:
|
|
318
|
-
logger.
|
|
318
|
+
logger.debug(
|
|
319
319
|
f"Generating OpenAI embeddings for {len(texts)} texts using {model}"
|
|
320
320
|
)
|
|
321
321
|
|
|
@@ -336,7 +336,7 @@ class EmbeddingWorker:
|
|
|
336
336
|
data = response.json()
|
|
337
337
|
embeddings = [item["embedding"] for item in data["data"]]
|
|
338
338
|
|
|
339
|
-
logger.
|
|
339
|
+
logger.debug(
|
|
340
340
|
f"Successfully generated {len(embeddings)} embeddings from OpenAI"
|
|
341
341
|
)
|
|
342
342
|
return embeddings
|
|
@@ -409,7 +409,7 @@ class EmbeddingWorker:
|
|
|
409
409
|
),
|
|
410
410
|
)
|
|
411
411
|
|
|
412
|
-
logger.
|
|
412
|
+
logger.debug(
|
|
413
413
|
f"Upserted embedding: {task.table_name}.{task.entity_id}.{task.field_name}"
|
|
414
414
|
)
|
|
415
415
|
|