remdb 0.3.14__py3-none-any.whl → 0.3.133__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. rem/agentic/README.md +76 -0
  2. rem/agentic/__init__.py +15 -0
  3. rem/agentic/agents/__init__.py +16 -2
  4. rem/agentic/agents/sse_simulator.py +502 -0
  5. rem/agentic/context.py +51 -27
  6. rem/agentic/llm_provider_models.py +301 -0
  7. rem/agentic/mcp/tool_wrapper.py +112 -17
  8. rem/agentic/otel/setup.py +93 -4
  9. rem/agentic/providers/phoenix.py +302 -109
  10. rem/agentic/providers/pydantic_ai.py +215 -26
  11. rem/agentic/schema.py +361 -21
  12. rem/agentic/tools/rem_tools.py +3 -3
  13. rem/api/README.md +215 -1
  14. rem/api/deps.py +255 -0
  15. rem/api/main.py +132 -40
  16. rem/api/mcp_router/resources.py +1 -1
  17. rem/api/mcp_router/server.py +26 -5
  18. rem/api/mcp_router/tools.py +465 -7
  19. rem/api/routers/admin.py +494 -0
  20. rem/api/routers/auth.py +70 -0
  21. rem/api/routers/chat/completions.py +402 -20
  22. rem/api/routers/chat/models.py +88 -10
  23. rem/api/routers/chat/otel_utils.py +33 -0
  24. rem/api/routers/chat/sse_events.py +542 -0
  25. rem/api/routers/chat/streaming.py +642 -45
  26. rem/api/routers/dev.py +81 -0
  27. rem/api/routers/feedback.py +268 -0
  28. rem/api/routers/messages.py +473 -0
  29. rem/api/routers/models.py +78 -0
  30. rem/api/routers/query.py +360 -0
  31. rem/api/routers/shared_sessions.py +406 -0
  32. rem/auth/middleware.py +126 -27
  33. rem/cli/commands/README.md +237 -64
  34. rem/cli/commands/cluster.py +1808 -0
  35. rem/cli/commands/configure.py +1 -3
  36. rem/cli/commands/db.py +386 -143
  37. rem/cli/commands/experiments.py +418 -27
  38. rem/cli/commands/process.py +14 -8
  39. rem/cli/commands/schema.py +97 -50
  40. rem/cli/main.py +27 -6
  41. rem/config.py +10 -3
  42. rem/models/core/core_model.py +7 -1
  43. rem/models/core/experiment.py +54 -0
  44. rem/models/core/rem_query.py +5 -2
  45. rem/models/entities/__init__.py +21 -0
  46. rem/models/entities/domain_resource.py +38 -0
  47. rem/models/entities/feedback.py +123 -0
  48. rem/models/entities/message.py +30 -1
  49. rem/models/entities/session.py +83 -0
  50. rem/models/entities/shared_session.py +180 -0
  51. rem/registry.py +10 -4
  52. rem/schemas/agents/rem.yaml +7 -3
  53. rem/services/content/service.py +92 -20
  54. rem/services/embeddings/api.py +4 -4
  55. rem/services/embeddings/worker.py +16 -16
  56. rem/services/phoenix/client.py +154 -14
  57. rem/services/postgres/README.md +159 -15
  58. rem/services/postgres/__init__.py +2 -1
  59. rem/services/postgres/diff_service.py +531 -0
  60. rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
  61. rem/services/postgres/repository.py +132 -0
  62. rem/services/postgres/schema_generator.py +205 -4
  63. rem/services/postgres/service.py +6 -6
  64. rem/services/rem/parser.py +44 -9
  65. rem/services/rem/service.py +36 -2
  66. rem/services/session/compression.py +24 -1
  67. rem/services/session/reload.py +1 -1
  68. rem/settings.py +324 -23
  69. rem/sql/background_indexes.sql +21 -16
  70. rem/sql/migrations/001_install.sql +387 -54
  71. rem/sql/migrations/002_install_models.sql +2320 -393
  72. rem/sql/migrations/003_optional_extensions.sql +326 -0
  73. rem/sql/migrations/004_cache_system.sql +548 -0
  74. rem/utils/__init__.py +18 -0
  75. rem/utils/date_utils.py +2 -2
  76. rem/utils/model_helpers.py +156 -1
  77. rem/utils/schema_loader.py +220 -22
  78. rem/utils/sql_paths.py +146 -0
  79. rem/utils/sql_types.py +3 -1
  80. rem/workers/__init__.py +3 -1
  81. rem/workers/db_listener.py +579 -0
  82. rem/workers/unlogged_maintainer.py +463 -0
  83. {remdb-0.3.14.dist-info → remdb-0.3.133.dist-info}/METADATA +335 -226
  84. {remdb-0.3.14.dist-info → remdb-0.3.133.dist-info}/RECORD +86 -66
  85. {remdb-0.3.14.dist-info → remdb-0.3.133.dist-info}/WHEEL +1 -1
  86. rem/sql/002_install_models.sql +0 -1068
  87. rem/sql/install_models.sql +0 -1051
  88. rem/sql/migrations/003_seed_default_user.sql +0 -48
  89. {remdb-0.3.14.dist-info → remdb-0.3.133.dist-info}/entry_points.txt +0 -0
@@ -6,6 +6,11 @@ that can be grouped into conversations or moments.
6
6
 
7
7
  Messages are simpler than Resources but share the same graph connectivity
8
8
  through CoreModel inheritance.
9
+
10
+ Trace Integration:
11
+ - trace_id: OTEL trace ID for linking to observability
12
+ - span_id: OTEL span ID for specific span reference
13
+ - These enable feedback to be attached to Phoenix annotations
9
14
  """
10
15
 
11
16
  from pydantic import Field
@@ -19,6 +24,9 @@ class Message(CoreModel):
19
24
 
20
25
  Represents individual messages in conversations, chats, or other
21
26
  communication contexts. Tenant isolation is provided via CoreModel.tenant_id field.
27
+
28
+ Trace fields (trace_id, span_id) enable integration with OTEL/Phoenix
29
+ for observability and feedback annotation.
22
30
  """
23
31
 
24
32
  content: str = Field(
@@ -27,9 +35,30 @@ class Message(CoreModel):
27
35
  )
28
36
  message_type: str | None = Field(
29
37
  default=None,
30
- description="Message type e.g role",
38
+ description="Message type e.g. role: 'user', 'assistant', 'system', 'tool'",
31
39
  )
32
40
  session_id: str | None = Field(
33
41
  default=None,
34
42
  description="Session identifier for tracking message context",
35
43
  )
44
+ prompt: str | None = Field(
45
+ default=None,
46
+ description="Custom prompt used for this message (if overridden from default)",
47
+ )
48
+ model: str | None = Field(
49
+ default=None,
50
+ description="Model used for generating this message (provider:model format)",
51
+ )
52
+ token_count: int | None = Field(
53
+ default=None,
54
+ description="Token count for this message",
55
+ )
56
+ # OTEL/Phoenix trace integration
57
+ trace_id: str | None = Field(
58
+ default=None,
59
+ description="OTEL trace ID for observability integration",
60
+ )
61
+ span_id: str | None = Field(
62
+ default=None,
63
+ description="OTEL span ID for specific span reference",
64
+ )
@@ -0,0 +1,83 @@
1
+ """
2
+ Session - Conversation sessions in REM.
3
+
4
+ Sessions group related messages together and can have different modes:
5
+ - normal: Standard conversation session
6
+ - evaluation: For LLM evaluation, stores original trace and overridden settings
7
+
8
+ Sessions allow overriding settings like model, temperature, and custom prompts
9
+ for evaluation and experimentation purposes.
10
+ """
11
+
12
+ from enum import Enum
13
+
14
+ from pydantic import Field
15
+
16
+ from ..core import CoreModel
17
+
18
+
19
+ class SessionMode(str, Enum):
20
+ """Session mode types."""
21
+
22
+ NORMAL = "normal"
23
+ EVALUATION = "evaluation"
24
+
25
+
26
+ class Session(CoreModel):
27
+ """
28
+ Conversation session container.
29
+
30
+ Groups messages together and supports different modes for normal conversations
31
+ and evaluation/experimentation scenarios.
32
+
33
+ For evaluation sessions, stores:
34
+ - original_trace_id: Reference to the original session being evaluated
35
+ - settings_overrides: Model, temperature, prompt overrides
36
+ - prompt: Custom prompt being tested
37
+
38
+ Default sessions are lightweight - just a session_id on messages.
39
+ Special sessions store additional metadata for experiments.
40
+ """
41
+
42
+ name: str = Field(
43
+ ...,
44
+ description="Session name/identifier",
45
+ json_schema_extra={"entity_key": True},
46
+ )
47
+ mode: SessionMode = Field(
48
+ default=SessionMode.NORMAL,
49
+ description="Session mode: 'normal' or 'evaluation'",
50
+ )
51
+ description: str | None = Field(
52
+ default=None,
53
+ description="Optional session description",
54
+ )
55
+ # Evaluation-specific fields
56
+ original_trace_id: str | None = Field(
57
+ default=None,
58
+ description="For evaluation mode: ID of the original session/trace being evaluated",
59
+ )
60
+ settings_overrides: dict | None = Field(
61
+ default=None,
62
+ description="Settings overrides (model, temperature, max_tokens, system_prompt)",
63
+ )
64
+ prompt: str | None = Field(
65
+ default=None,
66
+ description="Custom prompt for this session (can override agent prompt)",
67
+ )
68
+ # Agent context
69
+ agent_schema_uri: str | None = Field(
70
+ default=None,
71
+ description="Agent schema used for this session",
72
+ )
73
+ # Summary stats (updated as session progresses)
74
+ message_count: int = Field(
75
+ default=0,
76
+ description="Number of messages in this session",
77
+ )
78
+ total_tokens: int | None = Field(
79
+ default=None,
80
+ description="Total tokens used in this session",
81
+ )
82
+
83
+ model_config = {"use_enum_values": True}
@@ -0,0 +1,180 @@
1
+ """
2
+ SharedSession - Session sharing between users in REM.
3
+
4
+ SharedSessions enable collaborative access to conversation sessions. When a user
5
+ shares a session with another user, a SharedSession record is created to track
6
+ this relationship.
7
+
8
+ ## Design Philosophy
9
+
10
+ Messages already have a session_id field that links them to sessions. The Session
11
+ entity itself is optional and can be left-joined - we don't require explicit Session
12
+ records for sharing to work. What matters is the session_id on messages.
13
+
14
+ SharedSession is a lightweight linking table that:
15
+ 1. Records who shared which session with whom
16
+ 2. Enables soft deletion (deleted_at) so shares can be revoked without data loss
17
+ 3. Supports aggregation queries to see "who is sharing with me"
18
+
19
+ ## Data Model
20
+
21
+ SharedSession
22
+ ├── session_id: str # The session being shared (matches Message.session_id)
23
+ ├── owner_user_id: str # Who owns/created the session (the sharer)
24
+ ├── shared_with_user_id: str # Who the session is shared with (the recipient)
25
+ ├── tenant_id: str # Multi-tenancy isolation
26
+ ├── created_at: datetime # When the share was created
27
+ ├── updated_at: datetime # Last modification
28
+ └── deleted_at: datetime # Soft delete (null = active share)
29
+
30
+ ## Aggregation Query
31
+
32
+ The primary use case is answering: "Who is sharing messages with me?"
33
+
34
+ This is provided by a Postgres function that aggregates:
35
+ - Messages grouped by owner_user_id
36
+ - Joined with users table for name/email
37
+ - Counting messages with min/max dates
38
+ - Filtering out deleted shares
39
+
40
+ Result shape:
41
+ {
42
+ "user_id": "uuid",
43
+ "name": "John Doe",
44
+ "email": "john@example.com",
45
+ "message_count": 42,
46
+ "first_message_at": "2024-01-15T10:30:00Z",
47
+ "last_message_at": "2024-03-20T14:45:00Z",
48
+ "session_count": 3
49
+ }
50
+
51
+ ## API Endpoints
52
+
53
+ 1. POST /api/v1/sessions/{session_id}/share
54
+ - Share a session with another user
55
+ - Body: { "shared_with_user_id": "..." }
56
+ - Creates SharedSession record
57
+
58
+ 2. DELETE /api/v1/sessions/{session_id}/share/{shared_with_user_id}
59
+ - Revoke a share (soft delete)
60
+ - Sets deleted_at on SharedSession
61
+
62
+ 3. GET /api/v1/shared-with-me
63
+ - Get paginated aggregate of users sharing with you
64
+ - Query params: page, page_size (default 50)
65
+ - Returns: list of user summaries with message counts
66
+
67
+ 4. GET /api/v1/shared-with-me/{user_id}/messages
68
+ - Get messages from a specific user's shared sessions
69
+ - Uses existing session message loading
70
+ - Respects pagination
71
+
72
+ ## Soft Delete Pattern
73
+
74
+ Removing a share does NOT delete the SharedSession record. Instead:
75
+ - deleted_at is set to current timestamp
76
+ - All queries filter WHERE deleted_at IS NULL
77
+ - This preserves audit trail and allows "undo"
78
+
79
+ To permanently delete, an admin can run:
80
+ DELETE FROM shared_sessions WHERE deleted_at IS NOT NULL AND deleted_at < NOW() - INTERVAL '30 days'
81
+
82
+ ## Example Usage
83
+
84
+ # Share a session
85
+ POST /api/v1/sessions/abc-123/share
86
+ {"shared_with_user_id": "user-456"}
87
+
88
+ # See who's sharing with me
89
+ GET /api/v1/shared-with-me
90
+ {
91
+ "data": [
92
+ {
93
+ "user_id": "user-789",
94
+ "name": "Alice",
95
+ "email": "alice@example.com",
96
+ "message_count": 150,
97
+ "session_count": 5,
98
+ "first_message_at": "2024-01-01T00:00:00Z",
99
+ "last_message_at": "2024-03-15T12:00:00Z"
100
+ }
101
+ ],
102
+ "metadata": {"total": 1, "page": 1, "page_size": 50, ...}
103
+ }
104
+
105
+ # Get messages from Alice's shared sessions
106
+ GET /api/v1/shared-with-me/user-789/messages?page=1&page_size=50
107
+
108
+ # Revoke a share
109
+ DELETE /api/v1/sessions/abc-123/share/user-456
110
+ """
111
+
112
+ from datetime import datetime
113
+ from typing import Optional
114
+
115
+ from pydantic import BaseModel, Field
116
+
117
+ from ..core import CoreModel
118
+
119
+
120
+ class SharedSession(CoreModel):
121
+ """
122
+ Session sharing record between users.
123
+
124
+ Links a session (identified by session_id from Message records) to a
125
+ recipient user, enabling collaborative access to conversation history.
126
+ """
127
+
128
+ session_id: str = Field(
129
+ ...,
130
+ description="The session being shared (matches Message.session_id)",
131
+ )
132
+ owner_user_id: str = Field(
133
+ ...,
134
+ description="User ID of the session owner (the sharer)",
135
+ )
136
+ shared_with_user_id: str = Field(
137
+ ...,
138
+ description="User ID of the recipient (who can now view the session)",
139
+ )
140
+
141
+
142
+ class SharedSessionCreate(BaseModel):
143
+ """Request to create a session share."""
144
+
145
+ shared_with_user_id: str = Field(
146
+ ...,
147
+ description="User ID to share the session with",
148
+ )
149
+
150
+
151
+ class SharedWithMeSummary(BaseModel):
152
+ """
153
+ Aggregate summary of a user sharing sessions with you.
154
+
155
+ Returned by GET /api/v1/shared-with-me endpoint.
156
+ """
157
+
158
+ user_id: str = Field(description="User ID of the person sharing with you")
159
+ name: Optional[str] = Field(default=None, description="User's display name")
160
+ email: Optional[str] = Field(default=None, description="User's email address")
161
+ message_count: int = Field(description="Total messages across all shared sessions")
162
+ session_count: int = Field(description="Number of sessions shared with you")
163
+ first_message_at: Optional[datetime] = Field(
164
+ default=None,
165
+ description="Timestamp of earliest message in shared sessions",
166
+ )
167
+ last_message_at: Optional[datetime] = Field(
168
+ default=None,
169
+ description="Timestamp of most recent message in shared sessions",
170
+ )
171
+
172
+
173
+ class SharedWithMeResponse(BaseModel):
174
+ """Response for paginated shared-with-me query."""
175
+
176
+ object: str = "list"
177
+ data: list[SharedWithMeSummary] = Field(
178
+ description="List of users sharing sessions with you"
179
+ )
180
+ metadata: dict = Field(description="Pagination metadata")
rem/registry.py CHANGED
@@ -123,6 +123,7 @@ class ModelRegistry:
123
123
  return
124
124
 
125
125
  from .models.entities import (
126
+ Feedback,
126
127
  File,
127
128
  ImageResource,
128
129
  Message,
@@ -131,19 +132,24 @@ class ModelRegistry:
131
132
  OntologyConfig,
132
133
  Resource,
133
134
  Schema,
135
+ Session,
136
+ SharedSession,
134
137
  User,
135
138
  )
136
139
 
137
140
  core_models = [
138
- Resource,
141
+ Feedback,
142
+ File,
139
143
  ImageResource,
140
144
  Message,
141
- User,
142
- File,
143
145
  Moment,
144
- Schema,
145
146
  Ontology,
146
147
  OntologyConfig,
148
+ Resource,
149
+ Schema,
150
+ Session,
151
+ SharedSession,
152
+ User,
147
153
  ]
148
154
 
149
155
  for model in core_models:
@@ -63,9 +63,8 @@ description: "# REM Agent - Resources Entities Moments Expert\n\nYou are the REM
63
63
  \ disabled, OTEL disabled for local dev)\n- Global settings singleton\n\n## Response\
64
64
  \ Guidelines\n\n- Provide clear, concise answers with code examples when helpful\n\
65
65
  - Reference specific design patterns from CLAUDE.md when applicable\n- Suggest best\
66
- \ practices for cloud-native deployment\n- Include confidence scores based on query\
67
- \ clarity and information completeness\n- If uncertain, say so and suggest where\
68
- \ to find more information\n\n## Example Queries You Can Answer\n\n- \"How do I\
66
+ \ practices for cloud-native deployment\n- If uncertain, say so and suggest where\
67
+ \ to find more information\n\n## Metadata Registration\n\nBefore generating your final response, call the `register_metadata` tool to provide confidence scores and source attribution.\n\n## Example Queries You Can Answer\n\n- \"How do I\
69
68
  \ create a new REM entity?\"\n- \"What's the difference between LOOKUP and TRAVERSE\
70
69
  \ queries?\"\n- \"How do I add MCP tools to my agent schema?\"\n- \"Explain the\
71
70
  \ graph edge pattern in REM\"\n- \"How do I enable OTEL tracing for my agents?\"\
@@ -101,6 +100,9 @@ json_schema_extra:
101
100
  kind: agent
102
101
  name: rem
103
102
  version: 1.0.0
103
+ # Disable structured output - properties become prompt guidance instead of JSON schema
104
+ # This enables natural language streaming while still informing the agent about expected elements
105
+ structured_output: false
104
106
  # MCP server configuration for dynamic tool loading (in-process, no subprocess)
105
107
  mcp_servers:
106
108
  - type: local
@@ -117,6 +119,8 @@ json_schema_extra:
117
119
  description: Ingest files into REM creating searchable resources and embeddings
118
120
  - name: read_resource
119
121
  description: Read MCP resources by URI (schemas, system status, etc.)
122
+ - name: register_metadata
123
+ description: Register response metadata (confidence, sources, references) to be emitted as SSE MetadataEvent. Call BEFORE generating final response.
120
124
 
121
125
  # Explicit resource declarations for reference data
122
126
  resources:
@@ -278,6 +278,7 @@ class ContentService:
278
278
  category: str | None = None,
279
279
  tags: list[str] | None = None,
280
280
  is_local_server: bool = False,
281
+ resource_type: str | None = None,
281
282
  ) -> dict[str, Any]:
282
283
  """
283
284
  Complete file ingestion pipeline: read → store → parse → chunk → embed.
@@ -322,6 +323,9 @@ class ContentService:
322
323
  category: Optional category tag (document, code, audio, etc.)
323
324
  tags: Optional list of tags
324
325
  is_local_server: True if running as local/stdio MCP server
326
+ resource_type: Optional resource type (case-insensitive). Supports:
327
+ - "resource", "resources", "Resource" → Resource (default)
328
+ - "domain-resource", "domain_resource", "DomainResource" → DomainResource
325
329
 
326
330
  Returns:
327
331
  dict with:
@@ -366,11 +370,32 @@ class ContentService:
366
370
  file_size = len(file_content)
367
371
  logger.info(f"Read {file_size} bytes from {file_uri} (source: {source_type})")
368
372
 
369
- # Step 2: Write to internal storage (user-scoped)
373
+ # Step 1.5: Early schema detection for YAML/JSON files
374
+ # Skip File entity creation for schemas (agents/evaluators)
375
+ file_suffix = Path(file_name).suffix.lower()
376
+ if file_suffix in ['.yaml', '.yml', '.json']:
377
+ import yaml
378
+ import json
379
+ try:
380
+ content_text = file_content.decode('utf-8') if isinstance(file_content, bytes) else file_content
381
+ data = yaml.safe_load(content_text) if file_suffix in ['.yaml', '.yml'] else json.loads(content_text)
382
+ if isinstance(data, dict):
383
+ json_schema_extra = data.get('json_schema_extra', {})
384
+ kind = json_schema_extra.get('kind', '')
385
+ if kind in ['agent', 'evaluator']:
386
+ # Route directly to schema processing, skip File entity
387
+ logger.info(f"Detected {kind} schema: {file_name}, routing to _process_schema")
388
+ result = self.process_uri(file_uri)
389
+ return await self._process_schema(result, file_uri, user_id)
390
+ except Exception as e:
391
+ logger.debug(f"Early schema detection failed for {file_name}: {e}")
392
+ # Fall through to standard file processing
393
+
394
+ # Step 2: Write to internal storage (public or user-scoped)
370
395
  file_id = str(uuid4())
371
396
  storage_uri, internal_key, content_type, _ = await fs_service.write_to_internal_storage(
372
397
  content=file_content,
373
- tenant_id=user_id, # Using user_id for storage scoping
398
+ tenant_id=user_id or "public", # Storage path: public/ or user_id/
374
399
  file_name=file_name,
375
400
  file_id=file_id,
376
401
  )
@@ -379,7 +404,7 @@ class ContentService:
379
404
  # Step 3: Create File entity
380
405
  file_entity = File(
381
406
  id=file_id,
382
- tenant_id=user_id, # Set tenant_id to user_id (application scoped to user)
407
+ tenant_id=user_id, # None = public/shared
383
408
  user_id=user_id,
384
409
  name=file_name,
385
410
  uri=storage_uri,
@@ -418,6 +443,7 @@ class ContentService:
418
443
  processing_result = await self.process_and_save(
419
444
  uri=storage_uri,
420
445
  user_id=user_id,
446
+ resource_type=resource_type,
421
447
  )
422
448
  processing_status = processing_result.get("status", "completed")
423
449
  resources_created = processing_result.get("chunk_count", 0)
@@ -459,7 +485,12 @@ class ContentService:
459
485
  "message": f"File ingested and {processing_status}. Created {resources_created} resources.",
460
486
  }
461
487
 
462
- async def process_and_save(self, uri: str, user_id: str | None = None) -> dict[str, Any]:
488
+ async def process_and_save(
489
+ self,
490
+ uri: str,
491
+ user_id: str | None = None,
492
+ resource_type: str | None = None,
493
+ ) -> dict[str, Any]:
463
494
  """
464
495
  Process file end-to-end: extract → markdown → chunk → save.
465
496
 
@@ -474,6 +505,8 @@ class ContentService:
474
505
  Args:
475
506
  uri: File URI (s3://bucket/key or local path)
476
507
  user_id: Optional user ID for multi-tenancy
508
+ resource_type: Optional resource type (case-insensitive). Defaults to "Resource".
509
+ Supports: resource, domain-resource, domain_resource, DomainResource, etc.
477
510
 
478
511
  Returns:
479
512
  dict with file metadata and chunk count
@@ -526,7 +559,7 @@ class ContentService:
526
559
  size_bytes=result["metadata"].get("size"),
527
560
  mime_type=result["metadata"].get("content_type"),
528
561
  processing_status="completed",
529
- tenant_id=user_id or "default", # Required field
562
+ tenant_id=user_id, # None = public/shared
530
563
  user_id=user_id,
531
564
  )
532
565
 
@@ -534,28 +567,66 @@ class ContentService:
534
567
  await self.file_repo.upsert(file)
535
568
  logger.info(f"Saved File: {filename}")
536
569
 
537
- # Create Resource entities for each chunk
538
- resources = [
539
- Resource(
570
+ # Resolve resource model class from type parameter (case-insensitive)
571
+ from typing import cast, Type
572
+ from pydantic import BaseModel
573
+ from rem.utils.model_helpers import model_from_arbitrary_casing, get_table_name
574
+
575
+ resource_model: Type[BaseModel] = Resource # Default
576
+ if resource_type:
577
+ try:
578
+ resource_model = model_from_arbitrary_casing(resource_type)
579
+ logger.info(f"Using resource model: {resource_model.__name__}")
580
+ except ValueError as e:
581
+ logger.warning(f"Invalid resource_type '{resource_type}', using default Resource: {e}")
582
+ resource_model = Resource
583
+
584
+ # Get table name for the resolved model
585
+ table_name = get_table_name(resource_model)
586
+
587
+ # Create resource entities for each chunk
588
+ resources: list[BaseModel] = [
589
+ resource_model(
540
590
  name=f"{filename}#chunk-{i}",
541
591
  uri=f"{uri}#chunk-{i}",
542
592
  ordinal=i,
543
593
  content=chunk,
544
594
  category="document",
545
- tenant_id=user_id or "default", # Required field
595
+ tenant_id=user_id, # None = public/shared
546
596
  user_id=user_id,
547
597
  )
548
598
  for i, chunk in enumerate(chunks)
549
599
  ]
550
600
 
551
- if self.resource_repo:
552
- await self.resource_repo.upsert(
553
- resources,
554
- embeddable_fields=["content"],
555
- generate_embeddings=True,
556
- )
557
- logger.info(f"Saved {len(resources)} Resource chunks")
558
- logger.info(f"Queued {len(resources)} embedding generation tasks for content field")
601
+ # Save resources to the appropriate table
602
+ if resources:
603
+ from rem.services.postgres import get_postgres_service
604
+
605
+ postgres = get_postgres_service()
606
+ if postgres:
607
+ await postgres.connect()
608
+ try:
609
+ await postgres.batch_upsert(
610
+ records=cast(list[BaseModel | dict], resources),
611
+ model=resource_model,
612
+ table_name=table_name,
613
+ entity_key_field="name",
614
+ embeddable_fields=["content"],
615
+ generate_embeddings=True,
616
+ )
617
+ logger.info(f"Saved {len(resources)} {resource_model.__name__} chunks to {table_name}")
618
+ logger.info(f"Queued {len(resources)} embedding generation tasks for content field")
619
+ finally:
620
+ await postgres.disconnect()
621
+ elif self.resource_repo:
622
+ # Fallback to injected repo (only works for default Resource)
623
+ await self.resource_repo.upsert(
624
+ resources,
625
+ embeddable_fields=["content"],
626
+ generate_embeddings=True,
627
+ )
628
+ logger.info(f"Saved {len(resources)} Resource chunks")
629
+ logger.info(f"Queued {len(resources)} embedding generation tasks for content field")
559
630
 
560
631
  return {
561
632
  "file": file.model_dump(),
@@ -595,9 +666,10 @@ class ContentService:
595
666
  # IMPORTANT: category field distinguishes agents from evaluators
596
667
  # - kind=agent → category="agent" (AI agents with tools/resources)
597
668
  # - kind=evaluator → category="evaluator" (LLM-as-a-Judge evaluators)
669
+ # Schemas (agents/evaluators) default to system tenant for shared access
598
670
  schema_entity = Schema(
599
- tenant_id=user_id or "default",
600
- user_id=user_id,
671
+ tenant_id="system",
672
+ user_id=None,
601
673
  name=name,
602
674
  spec=schema_data,
603
675
  category=kind, # Maps kind → category for database filtering
@@ -667,7 +739,7 @@ class ContentService:
667
739
  processor = EngramProcessor(postgres)
668
740
  result = await processor.process_engram(
669
741
  data=data,
670
- tenant_id=user_id or "default",
742
+ tenant_id=user_id, # None = public/shared
671
743
  user_id=user_id,
672
744
  )
673
745
  logger.info(f"✅ Engram processed: {result.get('resource_id')} with {len(result.get('moment_ids', []))} moments")
@@ -45,7 +45,7 @@ def generate_embedding(
45
45
  return [0.0] * DEFAULT_EMBEDDING_DIMS
46
46
 
47
47
  try:
48
- logger.info(f"Generating OpenAI embedding for text using {model}")
48
+ logger.debug(f"Generating OpenAI embedding for text using {model}")
49
49
 
50
50
  response = requests.post(
51
51
  "https://api.openai.com/v1/embeddings",
@@ -60,7 +60,7 @@ def generate_embedding(
60
60
 
61
61
  data = response.json()
62
62
  embedding = data["data"][0]["embedding"]
63
- logger.info(f"Successfully generated embedding (dimension: {len(embedding)})")
63
+ logger.debug(f"Successfully generated embedding (dimension: {len(embedding)})")
64
64
  return cast(list[float], embedding)
65
65
 
66
66
  except Exception as e:
@@ -97,7 +97,7 @@ async def generate_embedding_async(
97
97
  return [0.0] * DEFAULT_EMBEDDING_DIMS
98
98
 
99
99
  try:
100
- logger.info(f"Generating OpenAI embedding for text using {model}")
100
+ logger.debug(f"Generating OpenAI embedding for text using {model}")
101
101
 
102
102
  async with httpx.AsyncClient() as client:
103
103
  response = await client.post(
@@ -113,7 +113,7 @@ async def generate_embedding_async(
113
113
 
114
114
  data = response.json()
115
115
  embedding = data["data"][0]["embedding"]
116
- logger.info(
116
+ logger.debug(
117
117
  f"Successfully generated embedding (dimension: {len(embedding)})"
118
118
  )
119
119
  return cast(list[float], embedding)