remdb 0.3.14__py3-none-any.whl → 0.3.133__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rem/agentic/README.md +76 -0
- rem/agentic/__init__.py +15 -0
- rem/agentic/agents/__init__.py +16 -2
- rem/agentic/agents/sse_simulator.py +502 -0
- rem/agentic/context.py +51 -27
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/mcp/tool_wrapper.py +112 -17
- rem/agentic/otel/setup.py +93 -4
- rem/agentic/providers/phoenix.py +302 -109
- rem/agentic/providers/pydantic_ai.py +215 -26
- rem/agentic/schema.py +361 -21
- rem/agentic/tools/rem_tools.py +3 -3
- rem/api/README.md +215 -1
- rem/api/deps.py +255 -0
- rem/api/main.py +132 -40
- rem/api/mcp_router/resources.py +1 -1
- rem/api/mcp_router/server.py +26 -5
- rem/api/mcp_router/tools.py +465 -7
- rem/api/routers/admin.py +494 -0
- rem/api/routers/auth.py +70 -0
- rem/api/routers/chat/completions.py +402 -20
- rem/api/routers/chat/models.py +88 -10
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +542 -0
- rem/api/routers/chat/streaming.py +642 -45
- rem/api/routers/dev.py +81 -0
- rem/api/routers/feedback.py +268 -0
- rem/api/routers/messages.py +473 -0
- rem/api/routers/models.py +78 -0
- rem/api/routers/query.py +360 -0
- rem/api/routers/shared_sessions.py +406 -0
- rem/auth/middleware.py +126 -27
- rem/cli/commands/README.md +237 -64
- rem/cli/commands/cluster.py +1808 -0
- rem/cli/commands/configure.py +1 -3
- rem/cli/commands/db.py +386 -143
- rem/cli/commands/experiments.py +418 -27
- rem/cli/commands/process.py +14 -8
- rem/cli/commands/schema.py +97 -50
- rem/cli/main.py +27 -6
- rem/config.py +10 -3
- rem/models/core/core_model.py +7 -1
- rem/models/core/experiment.py +54 -0
- rem/models/core/rem_query.py +5 -2
- rem/models/entities/__init__.py +21 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/message.py +30 -1
- rem/models/entities/session.py +83 -0
- rem/models/entities/shared_session.py +180 -0
- rem/registry.py +10 -4
- rem/schemas/agents/rem.yaml +7 -3
- rem/services/content/service.py +92 -20
- rem/services/embeddings/api.py +4 -4
- rem/services/embeddings/worker.py +16 -16
- rem/services/phoenix/client.py +154 -14
- rem/services/postgres/README.md +159 -15
- rem/services/postgres/__init__.py +2 -1
- rem/services/postgres/diff_service.py +531 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
- rem/services/postgres/repository.py +132 -0
- rem/services/postgres/schema_generator.py +205 -4
- rem/services/postgres/service.py +6 -6
- rem/services/rem/parser.py +44 -9
- rem/services/rem/service.py +36 -2
- rem/services/session/compression.py +24 -1
- rem/services/session/reload.py +1 -1
- rem/settings.py +324 -23
- rem/sql/background_indexes.sql +21 -16
- rem/sql/migrations/001_install.sql +387 -54
- rem/sql/migrations/002_install_models.sql +2320 -393
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +548 -0
- rem/utils/__init__.py +18 -0
- rem/utils/date_utils.py +2 -2
- rem/utils/model_helpers.py +156 -1
- rem/utils/schema_loader.py +220 -22
- rem/utils/sql_paths.py +146 -0
- rem/utils/sql_types.py +3 -1
- rem/workers/__init__.py +3 -1
- rem/workers/db_listener.py +579 -0
- rem/workers/unlogged_maintainer.py +463 -0
- {remdb-0.3.14.dist-info → remdb-0.3.133.dist-info}/METADATA +335 -226
- {remdb-0.3.14.dist-info → remdb-0.3.133.dist-info}/RECORD +86 -66
- {remdb-0.3.14.dist-info → remdb-0.3.133.dist-info}/WHEEL +1 -1
- rem/sql/002_install_models.sql +0 -1068
- rem/sql/install_models.sql +0 -1051
- rem/sql/migrations/003_seed_default_user.sql +0 -48
- {remdb-0.3.14.dist-info → remdb-0.3.133.dist-info}/entry_points.txt +0 -0
rem/models/entities/message.py
CHANGED
|
@@ -6,6 +6,11 @@ that can be grouped into conversations or moments.
|
|
|
6
6
|
|
|
7
7
|
Messages are simpler than Resources but share the same graph connectivity
|
|
8
8
|
through CoreModel inheritance.
|
|
9
|
+
|
|
10
|
+
Trace Integration:
|
|
11
|
+
- trace_id: OTEL trace ID for linking to observability
|
|
12
|
+
- span_id: OTEL span ID for specific span reference
|
|
13
|
+
- These enable feedback to be attached to Phoenix annotations
|
|
9
14
|
"""
|
|
10
15
|
|
|
11
16
|
from pydantic import Field
|
|
@@ -19,6 +24,9 @@ class Message(CoreModel):
|
|
|
19
24
|
|
|
20
25
|
Represents individual messages in conversations, chats, or other
|
|
21
26
|
communication contexts. Tenant isolation is provided via CoreModel.tenant_id field.
|
|
27
|
+
|
|
28
|
+
Trace fields (trace_id, span_id) enable integration with OTEL/Phoenix
|
|
29
|
+
for observability and feedback annotation.
|
|
22
30
|
"""
|
|
23
31
|
|
|
24
32
|
content: str = Field(
|
|
@@ -27,9 +35,30 @@ class Message(CoreModel):
|
|
|
27
35
|
)
|
|
28
36
|
message_type: str | None = Field(
|
|
29
37
|
default=None,
|
|
30
|
-
description="Message type e.g role",
|
|
38
|
+
description="Message type e.g. role: 'user', 'assistant', 'system', 'tool'",
|
|
31
39
|
)
|
|
32
40
|
session_id: str | None = Field(
|
|
33
41
|
default=None,
|
|
34
42
|
description="Session identifier for tracking message context",
|
|
35
43
|
)
|
|
44
|
+
prompt: str | None = Field(
|
|
45
|
+
default=None,
|
|
46
|
+
description="Custom prompt used for this message (if overridden from default)",
|
|
47
|
+
)
|
|
48
|
+
model: str | None = Field(
|
|
49
|
+
default=None,
|
|
50
|
+
description="Model used for generating this message (provider:model format)",
|
|
51
|
+
)
|
|
52
|
+
token_count: int | None = Field(
|
|
53
|
+
default=None,
|
|
54
|
+
description="Token count for this message",
|
|
55
|
+
)
|
|
56
|
+
# OTEL/Phoenix trace integration
|
|
57
|
+
trace_id: str | None = Field(
|
|
58
|
+
default=None,
|
|
59
|
+
description="OTEL trace ID for observability integration",
|
|
60
|
+
)
|
|
61
|
+
span_id: str | None = Field(
|
|
62
|
+
default=None,
|
|
63
|
+
description="OTEL span ID for specific span reference",
|
|
64
|
+
)
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Session - Conversation sessions in REM.
|
|
3
|
+
|
|
4
|
+
Sessions group related messages together and can have different modes:
|
|
5
|
+
- normal: Standard conversation session
|
|
6
|
+
- evaluation: For LLM evaluation, stores original trace and overridden settings
|
|
7
|
+
|
|
8
|
+
Sessions allow overriding settings like model, temperature, and custom prompts
|
|
9
|
+
for evaluation and experimentation purposes.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from enum import Enum
|
|
13
|
+
|
|
14
|
+
from pydantic import Field
|
|
15
|
+
|
|
16
|
+
from ..core import CoreModel
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class SessionMode(str, Enum):
|
|
20
|
+
"""Session mode types."""
|
|
21
|
+
|
|
22
|
+
NORMAL = "normal"
|
|
23
|
+
EVALUATION = "evaluation"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class Session(CoreModel):
|
|
27
|
+
"""
|
|
28
|
+
Conversation session container.
|
|
29
|
+
|
|
30
|
+
Groups messages together and supports different modes for normal conversations
|
|
31
|
+
and evaluation/experimentation scenarios.
|
|
32
|
+
|
|
33
|
+
For evaluation sessions, stores:
|
|
34
|
+
- original_trace_id: Reference to the original session being evaluated
|
|
35
|
+
- settings_overrides: Model, temperature, prompt overrides
|
|
36
|
+
- prompt: Custom prompt being tested
|
|
37
|
+
|
|
38
|
+
Default sessions are lightweight - just a session_id on messages.
|
|
39
|
+
Special sessions store additional metadata for experiments.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
name: str = Field(
|
|
43
|
+
...,
|
|
44
|
+
description="Session name/identifier",
|
|
45
|
+
json_schema_extra={"entity_key": True},
|
|
46
|
+
)
|
|
47
|
+
mode: SessionMode = Field(
|
|
48
|
+
default=SessionMode.NORMAL,
|
|
49
|
+
description="Session mode: 'normal' or 'evaluation'",
|
|
50
|
+
)
|
|
51
|
+
description: str | None = Field(
|
|
52
|
+
default=None,
|
|
53
|
+
description="Optional session description",
|
|
54
|
+
)
|
|
55
|
+
# Evaluation-specific fields
|
|
56
|
+
original_trace_id: str | None = Field(
|
|
57
|
+
default=None,
|
|
58
|
+
description="For evaluation mode: ID of the original session/trace being evaluated",
|
|
59
|
+
)
|
|
60
|
+
settings_overrides: dict | None = Field(
|
|
61
|
+
default=None,
|
|
62
|
+
description="Settings overrides (model, temperature, max_tokens, system_prompt)",
|
|
63
|
+
)
|
|
64
|
+
prompt: str | None = Field(
|
|
65
|
+
default=None,
|
|
66
|
+
description="Custom prompt for this session (can override agent prompt)",
|
|
67
|
+
)
|
|
68
|
+
# Agent context
|
|
69
|
+
agent_schema_uri: str | None = Field(
|
|
70
|
+
default=None,
|
|
71
|
+
description="Agent schema used for this session",
|
|
72
|
+
)
|
|
73
|
+
# Summary stats (updated as session progresses)
|
|
74
|
+
message_count: int = Field(
|
|
75
|
+
default=0,
|
|
76
|
+
description="Number of messages in this session",
|
|
77
|
+
)
|
|
78
|
+
total_tokens: int | None = Field(
|
|
79
|
+
default=None,
|
|
80
|
+
description="Total tokens used in this session",
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
model_config = {"use_enum_values": True}
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
"""
|
|
2
|
+
SharedSession - Session sharing between users in REM.
|
|
3
|
+
|
|
4
|
+
SharedSessions enable collaborative access to conversation sessions. When a user
|
|
5
|
+
shares a session with another user, a SharedSession record is created to track
|
|
6
|
+
this relationship.
|
|
7
|
+
|
|
8
|
+
## Design Philosophy
|
|
9
|
+
|
|
10
|
+
Messages already have a session_id field that links them to sessions. The Session
|
|
11
|
+
entity itself is optional and can be left-joined - we don't require explicit Session
|
|
12
|
+
records for sharing to work. What matters is the session_id on messages.
|
|
13
|
+
|
|
14
|
+
SharedSession is a lightweight linking table that:
|
|
15
|
+
1. Records who shared which session with whom
|
|
16
|
+
2. Enables soft deletion (deleted_at) so shares can be revoked without data loss
|
|
17
|
+
3. Supports aggregation queries to see "who is sharing with me"
|
|
18
|
+
|
|
19
|
+
## Data Model
|
|
20
|
+
|
|
21
|
+
SharedSession
|
|
22
|
+
├── session_id: str # The session being shared (matches Message.session_id)
|
|
23
|
+
├── owner_user_id: str # Who owns/created the session (the sharer)
|
|
24
|
+
├── shared_with_user_id: str # Who the session is shared with (the recipient)
|
|
25
|
+
├── tenant_id: str # Multi-tenancy isolation
|
|
26
|
+
├── created_at: datetime # When the share was created
|
|
27
|
+
├── updated_at: datetime # Last modification
|
|
28
|
+
└── deleted_at: datetime # Soft delete (null = active share)
|
|
29
|
+
|
|
30
|
+
## Aggregation Query
|
|
31
|
+
|
|
32
|
+
The primary use case is answering: "Who is sharing messages with me?"
|
|
33
|
+
|
|
34
|
+
This is provided by a Postgres function that aggregates:
|
|
35
|
+
- Messages grouped by owner_user_id
|
|
36
|
+
- Joined with users table for name/email
|
|
37
|
+
- Counting messages with min/max dates
|
|
38
|
+
- Filtering out deleted shares
|
|
39
|
+
|
|
40
|
+
Result shape:
|
|
41
|
+
{
|
|
42
|
+
"user_id": "uuid",
|
|
43
|
+
"name": "John Doe",
|
|
44
|
+
"email": "john@example.com",
|
|
45
|
+
"message_count": 42,
|
|
46
|
+
"first_message_at": "2024-01-15T10:30:00Z",
|
|
47
|
+
"last_message_at": "2024-03-20T14:45:00Z",
|
|
48
|
+
"session_count": 3
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
## API Endpoints
|
|
52
|
+
|
|
53
|
+
1. POST /api/v1/sessions/{session_id}/share
|
|
54
|
+
- Share a session with another user
|
|
55
|
+
- Body: { "shared_with_user_id": "..." }
|
|
56
|
+
- Creates SharedSession record
|
|
57
|
+
|
|
58
|
+
2. DELETE /api/v1/sessions/{session_id}/share/{shared_with_user_id}
|
|
59
|
+
- Revoke a share (soft delete)
|
|
60
|
+
- Sets deleted_at on SharedSession
|
|
61
|
+
|
|
62
|
+
3. GET /api/v1/shared-with-me
|
|
63
|
+
- Get paginated aggregate of users sharing with you
|
|
64
|
+
- Query params: page, page_size (default 50)
|
|
65
|
+
- Returns: list of user summaries with message counts
|
|
66
|
+
|
|
67
|
+
4. GET /api/v1/shared-with-me/{user_id}/messages
|
|
68
|
+
- Get messages from a specific user's shared sessions
|
|
69
|
+
- Uses existing session message loading
|
|
70
|
+
- Respects pagination
|
|
71
|
+
|
|
72
|
+
## Soft Delete Pattern
|
|
73
|
+
|
|
74
|
+
Removing a share does NOT delete the SharedSession record. Instead:
|
|
75
|
+
- deleted_at is set to current timestamp
|
|
76
|
+
- All queries filter WHERE deleted_at IS NULL
|
|
77
|
+
- This preserves audit trail and allows "undo"
|
|
78
|
+
|
|
79
|
+
To permanently delete, an admin can run:
|
|
80
|
+
DELETE FROM shared_sessions WHERE deleted_at IS NOT NULL AND deleted_at < NOW() - INTERVAL '30 days'
|
|
81
|
+
|
|
82
|
+
## Example Usage
|
|
83
|
+
|
|
84
|
+
# Share a session
|
|
85
|
+
POST /api/v1/sessions/abc-123/share
|
|
86
|
+
{"shared_with_user_id": "user-456"}
|
|
87
|
+
|
|
88
|
+
# See who's sharing with me
|
|
89
|
+
GET /api/v1/shared-with-me
|
|
90
|
+
{
|
|
91
|
+
"data": [
|
|
92
|
+
{
|
|
93
|
+
"user_id": "user-789",
|
|
94
|
+
"name": "Alice",
|
|
95
|
+
"email": "alice@example.com",
|
|
96
|
+
"message_count": 150,
|
|
97
|
+
"session_count": 5,
|
|
98
|
+
"first_message_at": "2024-01-01T00:00:00Z",
|
|
99
|
+
"last_message_at": "2024-03-15T12:00:00Z"
|
|
100
|
+
}
|
|
101
|
+
],
|
|
102
|
+
"metadata": {"total": 1, "page": 1, "page_size": 50, ...}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
# Get messages from Alice's shared sessions
|
|
106
|
+
GET /api/v1/shared-with-me/user-789/messages?page=1&page_size=50
|
|
107
|
+
|
|
108
|
+
# Revoke a share
|
|
109
|
+
DELETE /api/v1/sessions/abc-123/share/user-456
|
|
110
|
+
"""
|
|
111
|
+
|
|
112
|
+
from datetime import datetime
|
|
113
|
+
from typing import Optional
|
|
114
|
+
|
|
115
|
+
from pydantic import BaseModel, Field
|
|
116
|
+
|
|
117
|
+
from ..core import CoreModel
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class SharedSession(CoreModel):
|
|
121
|
+
"""
|
|
122
|
+
Session sharing record between users.
|
|
123
|
+
|
|
124
|
+
Links a session (identified by session_id from Message records) to a
|
|
125
|
+
recipient user, enabling collaborative access to conversation history.
|
|
126
|
+
"""
|
|
127
|
+
|
|
128
|
+
session_id: str = Field(
|
|
129
|
+
...,
|
|
130
|
+
description="The session being shared (matches Message.session_id)",
|
|
131
|
+
)
|
|
132
|
+
owner_user_id: str = Field(
|
|
133
|
+
...,
|
|
134
|
+
description="User ID of the session owner (the sharer)",
|
|
135
|
+
)
|
|
136
|
+
shared_with_user_id: str = Field(
|
|
137
|
+
...,
|
|
138
|
+
description="User ID of the recipient (who can now view the session)",
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
class SharedSessionCreate(BaseModel):
|
|
143
|
+
"""Request to create a session share."""
|
|
144
|
+
|
|
145
|
+
shared_with_user_id: str = Field(
|
|
146
|
+
...,
|
|
147
|
+
description="User ID to share the session with",
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
class SharedWithMeSummary(BaseModel):
|
|
152
|
+
"""
|
|
153
|
+
Aggregate summary of a user sharing sessions with you.
|
|
154
|
+
|
|
155
|
+
Returned by GET /api/v1/shared-with-me endpoint.
|
|
156
|
+
"""
|
|
157
|
+
|
|
158
|
+
user_id: str = Field(description="User ID of the person sharing with you")
|
|
159
|
+
name: Optional[str] = Field(default=None, description="User's display name")
|
|
160
|
+
email: Optional[str] = Field(default=None, description="User's email address")
|
|
161
|
+
message_count: int = Field(description="Total messages across all shared sessions")
|
|
162
|
+
session_count: int = Field(description="Number of sessions shared with you")
|
|
163
|
+
first_message_at: Optional[datetime] = Field(
|
|
164
|
+
default=None,
|
|
165
|
+
description="Timestamp of earliest message in shared sessions",
|
|
166
|
+
)
|
|
167
|
+
last_message_at: Optional[datetime] = Field(
|
|
168
|
+
default=None,
|
|
169
|
+
description="Timestamp of most recent message in shared sessions",
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
class SharedWithMeResponse(BaseModel):
|
|
174
|
+
"""Response for paginated shared-with-me query."""
|
|
175
|
+
|
|
176
|
+
object: str = "list"
|
|
177
|
+
data: list[SharedWithMeSummary] = Field(
|
|
178
|
+
description="List of users sharing sessions with you"
|
|
179
|
+
)
|
|
180
|
+
metadata: dict = Field(description="Pagination metadata")
|
rem/registry.py
CHANGED
|
@@ -123,6 +123,7 @@ class ModelRegistry:
|
|
|
123
123
|
return
|
|
124
124
|
|
|
125
125
|
from .models.entities import (
|
|
126
|
+
Feedback,
|
|
126
127
|
File,
|
|
127
128
|
ImageResource,
|
|
128
129
|
Message,
|
|
@@ -131,19 +132,24 @@ class ModelRegistry:
|
|
|
131
132
|
OntologyConfig,
|
|
132
133
|
Resource,
|
|
133
134
|
Schema,
|
|
135
|
+
Session,
|
|
136
|
+
SharedSession,
|
|
134
137
|
User,
|
|
135
138
|
)
|
|
136
139
|
|
|
137
140
|
core_models = [
|
|
138
|
-
|
|
141
|
+
Feedback,
|
|
142
|
+
File,
|
|
139
143
|
ImageResource,
|
|
140
144
|
Message,
|
|
141
|
-
User,
|
|
142
|
-
File,
|
|
143
145
|
Moment,
|
|
144
|
-
Schema,
|
|
145
146
|
Ontology,
|
|
146
147
|
OntologyConfig,
|
|
148
|
+
Resource,
|
|
149
|
+
Schema,
|
|
150
|
+
Session,
|
|
151
|
+
SharedSession,
|
|
152
|
+
User,
|
|
147
153
|
]
|
|
148
154
|
|
|
149
155
|
for model in core_models:
|
rem/schemas/agents/rem.yaml
CHANGED
|
@@ -63,9 +63,8 @@ description: "# REM Agent - Resources Entities Moments Expert\n\nYou are the REM
|
|
|
63
63
|
\ disabled, OTEL disabled for local dev)\n- Global settings singleton\n\n## Response\
|
|
64
64
|
\ Guidelines\n\n- Provide clear, concise answers with code examples when helpful\n\
|
|
65
65
|
- Reference specific design patterns from CLAUDE.md when applicable\n- Suggest best\
|
|
66
|
-
\ practices for cloud-native deployment\n-
|
|
67
|
-
\
|
|
68
|
-
\ to find more information\n\n## Example Queries You Can Answer\n\n- \"How do I\
|
|
66
|
+
\ practices for cloud-native deployment\n- If uncertain, say so and suggest where\
|
|
67
|
+
\ to find more information\n\n## Metadata Registration\n\nBefore generating your final response, call the `register_metadata` tool to provide confidence scores and source attribution.\n\n## Example Queries You Can Answer\n\n- \"How do I\
|
|
69
68
|
\ create a new REM entity?\"\n- \"What's the difference between LOOKUP and TRAVERSE\
|
|
70
69
|
\ queries?\"\n- \"How do I add MCP tools to my agent schema?\"\n- \"Explain the\
|
|
71
70
|
\ graph edge pattern in REM\"\n- \"How do I enable OTEL tracing for my agents?\"\
|
|
@@ -101,6 +100,9 @@ json_schema_extra:
|
|
|
101
100
|
kind: agent
|
|
102
101
|
name: rem
|
|
103
102
|
version: 1.0.0
|
|
103
|
+
# Disable structured output - properties become prompt guidance instead of JSON schema
|
|
104
|
+
# This enables natural language streaming while still informing the agent about expected elements
|
|
105
|
+
structured_output: false
|
|
104
106
|
# MCP server configuration for dynamic tool loading (in-process, no subprocess)
|
|
105
107
|
mcp_servers:
|
|
106
108
|
- type: local
|
|
@@ -117,6 +119,8 @@ json_schema_extra:
|
|
|
117
119
|
description: Ingest files into REM creating searchable resources and embeddings
|
|
118
120
|
- name: read_resource
|
|
119
121
|
description: Read MCP resources by URI (schemas, system status, etc.)
|
|
122
|
+
- name: register_metadata
|
|
123
|
+
description: Register response metadata (confidence, sources, references) to be emitted as SSE MetadataEvent. Call BEFORE generating final response.
|
|
120
124
|
|
|
121
125
|
# Explicit resource declarations for reference data
|
|
122
126
|
resources:
|
rem/services/content/service.py
CHANGED
|
@@ -278,6 +278,7 @@ class ContentService:
|
|
|
278
278
|
category: str | None = None,
|
|
279
279
|
tags: list[str] | None = None,
|
|
280
280
|
is_local_server: bool = False,
|
|
281
|
+
resource_type: str | None = None,
|
|
281
282
|
) -> dict[str, Any]:
|
|
282
283
|
"""
|
|
283
284
|
Complete file ingestion pipeline: read → store → parse → chunk → embed.
|
|
@@ -322,6 +323,9 @@ class ContentService:
|
|
|
322
323
|
category: Optional category tag (document, code, audio, etc.)
|
|
323
324
|
tags: Optional list of tags
|
|
324
325
|
is_local_server: True if running as local/stdio MCP server
|
|
326
|
+
resource_type: Optional resource type (case-insensitive). Supports:
|
|
327
|
+
- "resource", "resources", "Resource" → Resource (default)
|
|
328
|
+
- "domain-resource", "domain_resource", "DomainResource" → DomainResource
|
|
325
329
|
|
|
326
330
|
Returns:
|
|
327
331
|
dict with:
|
|
@@ -366,11 +370,32 @@ class ContentService:
|
|
|
366
370
|
file_size = len(file_content)
|
|
367
371
|
logger.info(f"Read {file_size} bytes from {file_uri} (source: {source_type})")
|
|
368
372
|
|
|
369
|
-
# Step
|
|
373
|
+
# Step 1.5: Early schema detection for YAML/JSON files
|
|
374
|
+
# Skip File entity creation for schemas (agents/evaluators)
|
|
375
|
+
file_suffix = Path(file_name).suffix.lower()
|
|
376
|
+
if file_suffix in ['.yaml', '.yml', '.json']:
|
|
377
|
+
import yaml
|
|
378
|
+
import json
|
|
379
|
+
try:
|
|
380
|
+
content_text = file_content.decode('utf-8') if isinstance(file_content, bytes) else file_content
|
|
381
|
+
data = yaml.safe_load(content_text) if file_suffix in ['.yaml', '.yml'] else json.loads(content_text)
|
|
382
|
+
if isinstance(data, dict):
|
|
383
|
+
json_schema_extra = data.get('json_schema_extra', {})
|
|
384
|
+
kind = json_schema_extra.get('kind', '')
|
|
385
|
+
if kind in ['agent', 'evaluator']:
|
|
386
|
+
# Route directly to schema processing, skip File entity
|
|
387
|
+
logger.info(f"Detected {kind} schema: {file_name}, routing to _process_schema")
|
|
388
|
+
result = self.process_uri(file_uri)
|
|
389
|
+
return await self._process_schema(result, file_uri, user_id)
|
|
390
|
+
except Exception as e:
|
|
391
|
+
logger.debug(f"Early schema detection failed for {file_name}: {e}")
|
|
392
|
+
# Fall through to standard file processing
|
|
393
|
+
|
|
394
|
+
# Step 2: Write to internal storage (public or user-scoped)
|
|
370
395
|
file_id = str(uuid4())
|
|
371
396
|
storage_uri, internal_key, content_type, _ = await fs_service.write_to_internal_storage(
|
|
372
397
|
content=file_content,
|
|
373
|
-
tenant_id=user_id, #
|
|
398
|
+
tenant_id=user_id or "public", # Storage path: public/ or user_id/
|
|
374
399
|
file_name=file_name,
|
|
375
400
|
file_id=file_id,
|
|
376
401
|
)
|
|
@@ -379,7 +404,7 @@ class ContentService:
|
|
|
379
404
|
# Step 3: Create File entity
|
|
380
405
|
file_entity = File(
|
|
381
406
|
id=file_id,
|
|
382
|
-
tenant_id=user_id, #
|
|
407
|
+
tenant_id=user_id, # None = public/shared
|
|
383
408
|
user_id=user_id,
|
|
384
409
|
name=file_name,
|
|
385
410
|
uri=storage_uri,
|
|
@@ -418,6 +443,7 @@ class ContentService:
|
|
|
418
443
|
processing_result = await self.process_and_save(
|
|
419
444
|
uri=storage_uri,
|
|
420
445
|
user_id=user_id,
|
|
446
|
+
resource_type=resource_type,
|
|
421
447
|
)
|
|
422
448
|
processing_status = processing_result.get("status", "completed")
|
|
423
449
|
resources_created = processing_result.get("chunk_count", 0)
|
|
@@ -459,7 +485,12 @@ class ContentService:
|
|
|
459
485
|
"message": f"File ingested and {processing_status}. Created {resources_created} resources.",
|
|
460
486
|
}
|
|
461
487
|
|
|
462
|
-
async def process_and_save(
|
|
488
|
+
async def process_and_save(
|
|
489
|
+
self,
|
|
490
|
+
uri: str,
|
|
491
|
+
user_id: str | None = None,
|
|
492
|
+
resource_type: str | None = None,
|
|
493
|
+
) -> dict[str, Any]:
|
|
463
494
|
"""
|
|
464
495
|
Process file end-to-end: extract → markdown → chunk → save.
|
|
465
496
|
|
|
@@ -474,6 +505,8 @@ class ContentService:
|
|
|
474
505
|
Args:
|
|
475
506
|
uri: File URI (s3://bucket/key or local path)
|
|
476
507
|
user_id: Optional user ID for multi-tenancy
|
|
508
|
+
resource_type: Optional resource type (case-insensitive). Defaults to "Resource".
|
|
509
|
+
Supports: resource, domain-resource, domain_resource, DomainResource, etc.
|
|
477
510
|
|
|
478
511
|
Returns:
|
|
479
512
|
dict with file metadata and chunk count
|
|
@@ -526,7 +559,7 @@ class ContentService:
|
|
|
526
559
|
size_bytes=result["metadata"].get("size"),
|
|
527
560
|
mime_type=result["metadata"].get("content_type"),
|
|
528
561
|
processing_status="completed",
|
|
529
|
-
tenant_id=user_id
|
|
562
|
+
tenant_id=user_id, # None = public/shared
|
|
530
563
|
user_id=user_id,
|
|
531
564
|
)
|
|
532
565
|
|
|
@@ -534,28 +567,66 @@ class ContentService:
|
|
|
534
567
|
await self.file_repo.upsert(file)
|
|
535
568
|
logger.info(f"Saved File: {filename}")
|
|
536
569
|
|
|
537
|
-
#
|
|
538
|
-
|
|
539
|
-
|
|
570
|
+
# Resolve resource model class from type parameter (case-insensitive)
|
|
571
|
+
from typing import cast, Type
|
|
572
|
+
from pydantic import BaseModel
|
|
573
|
+
from rem.utils.model_helpers import model_from_arbitrary_casing, get_table_name
|
|
574
|
+
|
|
575
|
+
resource_model: Type[BaseModel] = Resource # Default
|
|
576
|
+
if resource_type:
|
|
577
|
+
try:
|
|
578
|
+
resource_model = model_from_arbitrary_casing(resource_type)
|
|
579
|
+
logger.info(f"Using resource model: {resource_model.__name__}")
|
|
580
|
+
except ValueError as e:
|
|
581
|
+
logger.warning(f"Invalid resource_type '{resource_type}', using default Resource: {e}")
|
|
582
|
+
resource_model = Resource
|
|
583
|
+
|
|
584
|
+
# Get table name for the resolved model
|
|
585
|
+
table_name = get_table_name(resource_model)
|
|
586
|
+
|
|
587
|
+
# Create resource entities for each chunk
|
|
588
|
+
resources: list[BaseModel] = [
|
|
589
|
+
resource_model(
|
|
540
590
|
name=f"{filename}#chunk-{i}",
|
|
541
591
|
uri=f"{uri}#chunk-{i}",
|
|
542
592
|
ordinal=i,
|
|
543
593
|
content=chunk,
|
|
544
594
|
category="document",
|
|
545
|
-
tenant_id=user_id
|
|
595
|
+
tenant_id=user_id, # None = public/shared
|
|
546
596
|
user_id=user_id,
|
|
547
597
|
)
|
|
548
598
|
for i, chunk in enumerate(chunks)
|
|
549
599
|
]
|
|
550
600
|
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
601
|
+
# Save resources to the appropriate table
|
|
602
|
+
if resources:
|
|
603
|
+
from rem.services.postgres import get_postgres_service
|
|
604
|
+
|
|
605
|
+
postgres = get_postgres_service()
|
|
606
|
+
if postgres:
|
|
607
|
+
await postgres.connect()
|
|
608
|
+
try:
|
|
609
|
+
await postgres.batch_upsert(
|
|
610
|
+
records=cast(list[BaseModel | dict], resources),
|
|
611
|
+
model=resource_model,
|
|
612
|
+
table_name=table_name,
|
|
613
|
+
entity_key_field="name",
|
|
614
|
+
embeddable_fields=["content"],
|
|
615
|
+
generate_embeddings=True,
|
|
616
|
+
)
|
|
617
|
+
logger.info(f"Saved {len(resources)} {resource_model.__name__} chunks to {table_name}")
|
|
618
|
+
logger.info(f"Queued {len(resources)} embedding generation tasks for content field")
|
|
619
|
+
finally:
|
|
620
|
+
await postgres.disconnect()
|
|
621
|
+
elif self.resource_repo:
|
|
622
|
+
# Fallback to injected repo (only works for default Resource)
|
|
623
|
+
await self.resource_repo.upsert(
|
|
624
|
+
resources,
|
|
625
|
+
embeddable_fields=["content"],
|
|
626
|
+
generate_embeddings=True,
|
|
627
|
+
)
|
|
628
|
+
logger.info(f"Saved {len(resources)} Resource chunks")
|
|
629
|
+
logger.info(f"Queued {len(resources)} embedding generation tasks for content field")
|
|
559
630
|
|
|
560
631
|
return {
|
|
561
632
|
"file": file.model_dump(),
|
|
@@ -595,9 +666,10 @@ class ContentService:
|
|
|
595
666
|
# IMPORTANT: category field distinguishes agents from evaluators
|
|
596
667
|
# - kind=agent → category="agent" (AI agents with tools/resources)
|
|
597
668
|
# - kind=evaluator → category="evaluator" (LLM-as-a-Judge evaluators)
|
|
669
|
+
# Schemas (agents/evaluators) default to system tenant for shared access
|
|
598
670
|
schema_entity = Schema(
|
|
599
|
-
tenant_id=
|
|
600
|
-
user_id=
|
|
671
|
+
tenant_id="system",
|
|
672
|
+
user_id=None,
|
|
601
673
|
name=name,
|
|
602
674
|
spec=schema_data,
|
|
603
675
|
category=kind, # Maps kind → category for database filtering
|
|
@@ -667,7 +739,7 @@ class ContentService:
|
|
|
667
739
|
processor = EngramProcessor(postgres)
|
|
668
740
|
result = await processor.process_engram(
|
|
669
741
|
data=data,
|
|
670
|
-
tenant_id=user_id
|
|
742
|
+
tenant_id=user_id, # None = public/shared
|
|
671
743
|
user_id=user_id,
|
|
672
744
|
)
|
|
673
745
|
logger.info(f"✅ Engram processed: {result.get('resource_id')} with {len(result.get('moment_ids', []))} moments")
|
rem/services/embeddings/api.py
CHANGED
|
@@ -45,7 +45,7 @@ def generate_embedding(
|
|
|
45
45
|
return [0.0] * DEFAULT_EMBEDDING_DIMS
|
|
46
46
|
|
|
47
47
|
try:
|
|
48
|
-
logger.
|
|
48
|
+
logger.debug(f"Generating OpenAI embedding for text using {model}")
|
|
49
49
|
|
|
50
50
|
response = requests.post(
|
|
51
51
|
"https://api.openai.com/v1/embeddings",
|
|
@@ -60,7 +60,7 @@ def generate_embedding(
|
|
|
60
60
|
|
|
61
61
|
data = response.json()
|
|
62
62
|
embedding = data["data"][0]["embedding"]
|
|
63
|
-
logger.
|
|
63
|
+
logger.debug(f"Successfully generated embedding (dimension: {len(embedding)})")
|
|
64
64
|
return cast(list[float], embedding)
|
|
65
65
|
|
|
66
66
|
except Exception as e:
|
|
@@ -97,7 +97,7 @@ async def generate_embedding_async(
|
|
|
97
97
|
return [0.0] * DEFAULT_EMBEDDING_DIMS
|
|
98
98
|
|
|
99
99
|
try:
|
|
100
|
-
logger.
|
|
100
|
+
logger.debug(f"Generating OpenAI embedding for text using {model}")
|
|
101
101
|
|
|
102
102
|
async with httpx.AsyncClient() as client:
|
|
103
103
|
response = await client.post(
|
|
@@ -113,7 +113,7 @@ async def generate_embedding_async(
|
|
|
113
113
|
|
|
114
114
|
data = response.json()
|
|
115
115
|
embedding = data["data"][0]["embedding"]
|
|
116
|
-
logger.
|
|
116
|
+
logger.debug(
|
|
117
117
|
f"Successfully generated embedding (dimension: {len(embedding)})"
|
|
118
118
|
)
|
|
119
119
|
return cast(list[float], embedding)
|