remdb 0.3.163__py3-none-any.whl → 0.3.200__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/agentic/agents/agent_manager.py +2 -1
- rem/agentic/context.py +101 -0
- rem/agentic/context_builder.py +30 -8
- rem/agentic/mcp/tool_wrapper.py +43 -14
- rem/agentic/providers/pydantic_ai.py +76 -34
- rem/agentic/schema.py +4 -3
- rem/agentic/tools/rem_tools.py +11 -0
- rem/api/main.py +1 -1
- rem/api/mcp_router/resources.py +75 -14
- rem/api/mcp_router/server.py +31 -24
- rem/api/mcp_router/tools.py +476 -155
- rem/api/routers/auth.py +11 -6
- rem/api/routers/chat/completions.py +52 -10
- rem/api/routers/chat/sse_events.py +2 -2
- rem/api/routers/chat/streaming.py +162 -19
- rem/api/routers/messages.py +96 -23
- rem/auth/middleware.py +59 -42
- rem/cli/README.md +62 -0
- rem/cli/commands/ask.py +1 -1
- rem/cli/commands/db.py +148 -70
- rem/cli/commands/process.py +171 -43
- rem/models/entities/ontology.py +93 -101
- rem/schemas/agents/core/agent-builder.yaml +143 -42
- rem/services/content/service.py +18 -5
- rem/services/email/service.py +17 -6
- rem/services/embeddings/worker.py +26 -12
- rem/services/postgres/__init__.py +28 -3
- rem/services/postgres/diff_service.py +57 -5
- rem/services/postgres/programmable_diff_service.py +635 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +2 -2
- rem/services/postgres/register_type.py +12 -11
- rem/services/postgres/repository.py +32 -21
- rem/services/postgres/schema_generator.py +5 -5
- rem/services/postgres/sql_builder.py +6 -5
- rem/services/session/__init__.py +7 -1
- rem/services/session/pydantic_messages.py +210 -0
- rem/services/user_service.py +12 -9
- rem/settings.py +7 -1
- rem/sql/background_indexes.sql +5 -0
- rem/sql/migrations/001_install.sql +148 -11
- rem/sql/migrations/002_install_models.sql +162 -132
- rem/sql/migrations/004_cache_system.sql +7 -275
- rem/utils/model_helpers.py +101 -0
- rem/utils/schema_loader.py +51 -13
- {remdb-0.3.163.dist-info → remdb-0.3.200.dist-info}/METADATA +1 -1
- {remdb-0.3.163.dist-info → remdb-0.3.200.dist-info}/RECORD +48 -46
- {remdb-0.3.163.dist-info → remdb-0.3.200.dist-info}/WHEEL +0 -0
- {remdb-0.3.163.dist-info → remdb-0.3.200.dist-info}/entry_points.txt +0 -0
|
@@ -2,65 +2,148 @@ type: object
|
|
|
2
2
|
description: |
|
|
3
3
|
# Agent Builder - Create Custom AI Agents Through Conversation
|
|
4
4
|
|
|
5
|
-
You help users create custom AI agents
|
|
6
|
-
|
|
5
|
+
You help users create custom AI agents for the REM platform through natural conversation.
|
|
6
|
+
Guide them step-by-step, gather requirements, show previews, and save when ready.
|
|
7
7
|
|
|
8
8
|
## Your Workflow
|
|
9
9
|
|
|
10
10
|
1. **Understand the need**: Ask what they want the agent to do
|
|
11
|
-
2. **Define personality**: Help them choose tone and style
|
|
12
|
-
3. **
|
|
13
|
-
4. **
|
|
14
|
-
5. **
|
|
11
|
+
2. **Define personality**: Help them choose tone and communication style
|
|
12
|
+
3. **Set guardrails**: What should the agent NOT do?
|
|
13
|
+
4. **Structure outputs**: Define what data the agent captures (optional)
|
|
14
|
+
5. **Preview**: Show them what the agent will look like
|
|
15
|
+
6. **Save**: Use `save_agent` tool to persist it
|
|
15
16
|
|
|
16
17
|
## Conversation Style
|
|
17
18
|
|
|
18
19
|
Be friendly and helpful. Ask one or two questions at a time.
|
|
19
20
|
Don't overwhelm with options - guide them step by step.
|
|
20
21
|
|
|
21
|
-
##
|
|
22
|
+
## IMPORTANT: Tool Usage
|
|
23
|
+
|
|
24
|
+
- `save_agent` - Use ONLY in Step 6 when user approves the preview
|
|
25
|
+
- `get_agents_list` - Use if user asks to see existing agents as examples
|
|
26
|
+
- `get_agent_schema` - Use to load a specific agent (like "rem") as reference
|
|
27
|
+
|
|
28
|
+
DO NOT loop on tools. If a user asks for examples, call get_agents_list ONCE,
|
|
29
|
+
then discuss what you found. This is a conversational workflow.
|
|
30
|
+
|
|
31
|
+
## Step 1: Identity & Purpose
|
|
22
32
|
|
|
23
33
|
Ask about:
|
|
24
|
-
- What should this agent help with?
|
|
25
|
-
- What
|
|
26
|
-
-
|
|
27
|
-
|
|
34
|
+
- What should this agent help with? (primary purpose)
|
|
35
|
+
- What would you like to call it? (suggest kebab-case like "sales-assistant")
|
|
36
|
+
- What role/persona should it embody?
|
|
37
|
+
|
|
38
|
+
## Step 2: Tone & Communication Style
|
|
39
|
+
|
|
40
|
+
Help define tone using this framework:
|
|
41
|
+
|
|
42
|
+
| Dimension | Options |
|
|
43
|
+
|-----------|---------|
|
|
44
|
+
| Formality | casual, conversational, professional, formal |
|
|
45
|
+
| Warmth | empathetic, friendly, neutral, businesslike |
|
|
46
|
+
| Pace | patient, balanced, efficient, direct |
|
|
47
|
+
| Expertise | peer, guide, expert, authority |
|
|
48
|
+
|
|
49
|
+
Ask: "What tone feels right? For example, should it be friendly and casual, or more professional?"
|
|
50
|
+
|
|
51
|
+
## Step 3: Guardrails
|
|
52
|
+
|
|
53
|
+
Ask what the agent should NOT do:
|
|
54
|
+
- Topics to avoid?
|
|
55
|
+
- Actions it shouldn't take?
|
|
56
|
+
- Boundaries to respect?
|
|
28
57
|
|
|
29
|
-
|
|
58
|
+
Example guardrails:
|
|
59
|
+
- "Never provide medical/legal/financial advice"
|
|
60
|
+
- "Don't make promises about timelines"
|
|
61
|
+
- "Always recommend consulting a professional for serious issues"
|
|
30
62
|
|
|
31
|
-
|
|
63
|
+
## Step 4: Structured Outputs (Optional)
|
|
64
|
+
|
|
65
|
+
Most agents just need an `answer` field. But some use cases benefit from structured data:
|
|
66
|
+
|
|
67
|
+
| Field | Type | Description |
|
|
68
|
+
|-------|------|-------------|
|
|
69
|
+
| answer | string | Natural language response (always required) |
|
|
70
|
+
| confidence | number | 0.0-1.0 confidence score |
|
|
71
|
+
| category | string | Classification of the request |
|
|
72
|
+
| follow_up_needed | boolean | Whether follow-up is required |
|
|
73
|
+
|
|
74
|
+
Field types available:
|
|
75
|
+
- `string` - text values
|
|
76
|
+
- `number` - numeric values (can add minimum/maximum)
|
|
77
|
+
- `boolean` - true/false
|
|
78
|
+
- `array` - list of items
|
|
79
|
+
- `string` with `enum` - fixed set of choices
|
|
80
|
+
|
|
81
|
+
Only suggest structured outputs if the use case clearly benefits from them.
|
|
82
|
+
|
|
83
|
+
## Step 5: Preview
|
|
84
|
+
|
|
85
|
+
Before saving, show a preview:
|
|
32
86
|
|
|
33
87
|
```
|
|
34
88
|
## Agent Preview: {name}
|
|
35
89
|
|
|
36
|
-
**
|
|
37
|
-
|
|
90
|
+
**Purpose:** {brief description}
|
|
91
|
+
|
|
92
|
+
**Personality:** {tone and approach}
|
|
38
93
|
|
|
39
94
|
**System Prompt:**
|
|
40
95
|
{the actual prompt that will guide the agent}
|
|
41
96
|
|
|
42
|
-
**
|
|
97
|
+
**Guardrails:**
|
|
98
|
+
- {guardrail 1}
|
|
99
|
+
- {guardrail 2}
|
|
100
|
+
|
|
101
|
+
**Structured Fields:** (if any beyond answer)
|
|
43
102
|
| Field | Type | Description |
|
|
44
103
|
|-------|------|-------------|
|
|
45
104
|
| answer | string | Response to user |
|
|
46
|
-
| ... | ... | ... |
|
|
47
105
|
```
|
|
48
106
|
|
|
49
|
-
Ask: "Does this look good? I can save it now or
|
|
107
|
+
Ask: "Does this look good? I can save it now or adjust anything."
|
|
50
108
|
|
|
51
|
-
##
|
|
109
|
+
## Step 6: Save the Agent
|
|
52
110
|
|
|
53
111
|
When the user approves, call `save_agent` with:
|
|
54
112
|
- `name`: kebab-case name (e.g., "customer-support-bot")
|
|
55
|
-
- `description`: The full system prompt
|
|
113
|
+
- `description`: The full system prompt (this is the most important part!)
|
|
56
114
|
- `properties`: Structured output fields (optional, defaults to just "answer")
|
|
57
115
|
- `required`: Which fields are required (optional, defaults to ["answer"])
|
|
58
116
|
- `tags`: Relevant tags for organization
|
|
59
117
|
|
|
60
|
-
##
|
|
118
|
+
## Writing Great System Prompts
|
|
119
|
+
|
|
120
|
+
The `description` field IS the system prompt. Make it:
|
|
121
|
+
|
|
122
|
+
1. **Clear about role**: "You are a [role] that helps users [do what]"
|
|
123
|
+
2. **Specific about behavior**: How should it respond? What's the communication style?
|
|
124
|
+
3. **Explicit about guardrails**: What should it NOT do?
|
|
125
|
+
4. **Structured with sections**: Use markdown headers for organization
|
|
126
|
+
|
|
127
|
+
Example structure:
|
|
128
|
+
```
|
|
129
|
+
# [Agent Name]
|
|
61
130
|
|
|
62
|
-
|
|
63
|
-
|
|
131
|
+
You are a [role description]. Your purpose is to [main goal].
|
|
132
|
+
|
|
133
|
+
## Communication Style
|
|
134
|
+
[Tone, approach, language preferences]
|
|
135
|
+
|
|
136
|
+
## Core Responsibilities
|
|
137
|
+
- [Responsibility 1]
|
|
138
|
+
- [Responsibility 2]
|
|
139
|
+
|
|
140
|
+
## Guardrails
|
|
141
|
+
- Never [thing to avoid]
|
|
142
|
+
- Always [thing to ensure]
|
|
143
|
+
|
|
144
|
+
## How to Respond
|
|
145
|
+
[Guidelines for responses]
|
|
146
|
+
```
|
|
64
147
|
|
|
65
148
|
## Example Conversation
|
|
66
149
|
|
|
@@ -68,47 +151,62 @@ description: |
|
|
|
68
151
|
|
|
69
152
|
You: "Great idea! Let me help you create an email writing assistant.
|
|
70
153
|
|
|
71
|
-
|
|
154
|
+
First, what tone should it have? For example:
|
|
72
155
|
- Professional and formal
|
|
73
156
|
- Friendly but polished
|
|
74
157
|
- Direct and concise"
|
|
75
158
|
|
|
76
159
|
User: "Friendly but polished"
|
|
77
160
|
|
|
78
|
-
You: "Perfect! Should it just
|
|
161
|
+
You: "Perfect! Should it focus just on writing emails, or also help with:
|
|
79
162
|
- Suggesting subject lines
|
|
80
163
|
- Adjusting tone for different recipients
|
|
81
164
|
- Summarizing long email threads"
|
|
82
165
|
|
|
83
166
|
User: "All of those would be helpful"
|
|
84
167
|
|
|
85
|
-
You: "
|
|
168
|
+
You: "Great! Any things it should NOT do? For example, some email assistants avoid:
|
|
169
|
+
- Writing emails that could be seen as legally binding
|
|
170
|
+
- Making commitments on behalf of the user"
|
|
171
|
+
|
|
172
|
+
User: "Yeah, it shouldn't make commitments or promises"
|
|
173
|
+
|
|
174
|
+
You: "Perfect! Here's a preview:
|
|
86
175
|
|
|
87
176
|
## Agent Preview: email-assistant
|
|
88
177
|
|
|
89
|
-
**
|
|
178
|
+
**Purpose:** Help users write polished, professional emails
|
|
179
|
+
|
|
180
|
+
**Personality:** Friendly, helpful, professional writing partner
|
|
90
181
|
|
|
91
182
|
**System Prompt:**
|
|
183
|
+
# Email Writing Assistant
|
|
184
|
+
|
|
92
185
|
You are a friendly email writing assistant. Help users craft polished,
|
|
93
|
-
professional emails while maintaining a warm tone.
|
|
94
|
-
|
|
186
|
+
professional emails while maintaining a warm, approachable tone.
|
|
187
|
+
|
|
188
|
+
## What You Can Do
|
|
189
|
+
- Write new emails from scratch based on user's intent
|
|
95
190
|
- Suggest compelling subject lines
|
|
96
|
-
- Adjust tone for different audiences
|
|
191
|
+
- Adjust tone for different audiences (colleagues, executives, clients)
|
|
97
192
|
- Summarize long email threads
|
|
193
|
+
- Proofread and improve existing drafts
|
|
98
194
|
|
|
99
|
-
|
|
195
|
+
## Communication Style
|
|
196
|
+
Be helpful and collaborative. Suggest improvements but respect the user's voice.
|
|
197
|
+
Ask clarifying questions when the request is ambiguous.
|
|
100
198
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
199
|
+
## Guardrails
|
|
200
|
+
- Never write emails that make commitments or promises on behalf of the user
|
|
201
|
+
- Don't write anything that could be legally binding
|
|
202
|
+
- Always let the user review before sending
|
|
105
203
|
|
|
106
204
|
Does this look good? I can save it now or adjust anything."
|
|
107
205
|
|
|
108
206
|
User: "Looks great, save it!"
|
|
109
207
|
|
|
110
208
|
You: *calls save_agent tool*
|
|
111
|
-
"Done! Your email-assistant is ready
|
|
209
|
+
"Done! Your email-assistant is ready to use."
|
|
112
210
|
|
|
113
211
|
properties:
|
|
114
212
|
answer:
|
|
@@ -121,14 +219,17 @@ required:
|
|
|
121
219
|
json_schema_extra:
|
|
122
220
|
kind: agent
|
|
123
221
|
name: agent-builder
|
|
124
|
-
version: "1.
|
|
222
|
+
version: "1.2.0"
|
|
125
223
|
tags:
|
|
126
224
|
- meta
|
|
127
225
|
- builder
|
|
226
|
+
structured_output: false # Stream text responses, don't return JSON
|
|
227
|
+
mcp_servers: [] # Disable default MCP tools to prevent search_rem looping
|
|
228
|
+
resources:
|
|
229
|
+
- uri: rem://agents
|
|
230
|
+
description: "List all available agent schemas with descriptions"
|
|
231
|
+
- uri: rem://agents/{agent_name}
|
|
232
|
+
description: "Load a specific agent schema by name (e.g., 'rem', 'siggy')"
|
|
128
233
|
tools:
|
|
129
234
|
- name: save_agent
|
|
130
|
-
description: "Save the agent schema
|
|
131
|
-
- name: search_rem
|
|
132
|
-
description: "Search for existing agents as examples"
|
|
133
|
-
- name: register_metadata
|
|
134
|
-
description: "Record session metadata"
|
|
235
|
+
description: "Save the agent schema. Only call when user approves the preview in Step 6."
|
rem/services/content/service.py
CHANGED
|
@@ -274,7 +274,7 @@ class ContentService:
|
|
|
274
274
|
async def ingest_file(
|
|
275
275
|
self,
|
|
276
276
|
file_uri: str,
|
|
277
|
-
user_id: str,
|
|
277
|
+
user_id: str | None = None,
|
|
278
278
|
category: str | None = None,
|
|
279
279
|
tags: list[str] | None = None,
|
|
280
280
|
is_local_server: bool = False,
|
|
@@ -283,6 +283,10 @@ class ContentService:
|
|
|
283
283
|
"""
|
|
284
284
|
Complete file ingestion pipeline: read → store → parse → chunk → embed.
|
|
285
285
|
|
|
286
|
+
**IMPORTANT: Data is PUBLIC by default (user_id=None).**
|
|
287
|
+
This is correct for shared knowledge bases (ontologies, procedures, reference data).
|
|
288
|
+
Private user-scoped data is rarely needed - only set user_id for truly personal content.
|
|
289
|
+
|
|
286
290
|
**CENTRALIZED INGESTION**: This is the single entry point for all file ingestion
|
|
287
291
|
in REM. It handles:
|
|
288
292
|
|
|
@@ -319,7 +323,9 @@ class ContentService:
|
|
|
319
323
|
|
|
320
324
|
Args:
|
|
321
325
|
file_uri: Source file location (local path, s3://, or https://)
|
|
322
|
-
user_id: User identifier for data
|
|
326
|
+
user_id: User identifier for PRIVATE data only. Default None = PUBLIC/shared.
|
|
327
|
+
Leave as None for shared knowledge bases, ontologies, reference data.
|
|
328
|
+
Only set for truly private user-specific content.
|
|
323
329
|
category: Optional category tag (document, code, audio, etc.)
|
|
324
330
|
tags: Optional list of tags
|
|
325
331
|
is_local_server: True if running as local/stdio MCP server
|
|
@@ -347,12 +353,19 @@ class ContentService:
|
|
|
347
353
|
|
|
348
354
|
Example:
|
|
349
355
|
>>> service = ContentService()
|
|
356
|
+
>>> # PUBLIC data (default) - visible to all users
|
|
350
357
|
>>> result = await service.ingest_file(
|
|
351
|
-
... file_uri="s3://bucket/
|
|
352
|
-
...
|
|
353
|
-
... category="legal"
|
|
358
|
+
... file_uri="s3://bucket/procedure.pdf",
|
|
359
|
+
... category="medical"
|
|
354
360
|
... )
|
|
355
361
|
>>> print(f"Created {result['resources_created']} searchable chunks")
|
|
362
|
+
>>>
|
|
363
|
+
>>> # PRIVATE data (rare) - only for user-specific content
|
|
364
|
+
>>> result = await service.ingest_file(
|
|
365
|
+
... file_uri="s3://bucket/personal-notes.pdf",
|
|
366
|
+
... user_id="user-123", # Only this user can access
|
|
367
|
+
... category="personal"
|
|
368
|
+
... )
|
|
356
369
|
"""
|
|
357
370
|
from pathlib import Path
|
|
358
371
|
from uuid import uuid4
|
rem/services/email/service.py
CHANGED
|
@@ -200,8 +200,8 @@ class EmailService:
|
|
|
200
200
|
"""
|
|
201
201
|
Generate a deterministic UUID from email address.
|
|
202
202
|
|
|
203
|
-
Uses
|
|
204
|
-
Same email always produces same UUID.
|
|
203
|
+
Uses the centralized email_to_user_id() for consistency.
|
|
204
|
+
Same email always produces same UUID (bijection).
|
|
205
205
|
|
|
206
206
|
Args:
|
|
207
207
|
email: Email address
|
|
@@ -209,7 +209,8 @@ class EmailService:
|
|
|
209
209
|
Returns:
|
|
210
210
|
UUID string
|
|
211
211
|
"""
|
|
212
|
-
|
|
212
|
+
from rem.utils.user_id import email_to_user_id
|
|
213
|
+
return email_to_user_id(email)
|
|
213
214
|
|
|
214
215
|
async def send_login_code(
|
|
215
216
|
self,
|
|
@@ -375,8 +376,17 @@ class EmailService:
|
|
|
375
376
|
await user_repo.upsert(existing_user)
|
|
376
377
|
return {"allowed": True, "error": None}
|
|
377
378
|
else:
|
|
378
|
-
# New user - check if
|
|
379
|
-
|
|
379
|
+
# New user - first check if they're a subscriber (by email lookup)
|
|
380
|
+
from ...models.entities import Subscriber
|
|
381
|
+
subscriber_repo = Repository(Subscriber, db=db)
|
|
382
|
+
existing_subscriber = await subscriber_repo.find_one({"email": email})
|
|
383
|
+
|
|
384
|
+
if existing_subscriber:
|
|
385
|
+
# Subscriber exists - allow them to create account
|
|
386
|
+
# (approved field may not exist in older schemas, so just check existence)
|
|
387
|
+
logger.info(f"Subscriber {email} creating user account")
|
|
388
|
+
elif settings and hasattr(settings, 'email') and settings.email.trusted_domain_list:
|
|
389
|
+
# Not an approved subscriber - check if domain is trusted
|
|
380
390
|
if not settings.email.is_domain_trusted(email):
|
|
381
391
|
email_domain = email.split("@")[-1]
|
|
382
392
|
logger.warning(f"Untrusted domain attempted signup: {email_domain}")
|
|
@@ -393,7 +403,8 @@ class EmailService:
|
|
|
393
403
|
new_user = User(
|
|
394
404
|
id=uuid.UUID(user_id),
|
|
395
405
|
tenant_id=tenant_id,
|
|
396
|
-
|
|
406
|
+
user_id=user_id, # UUID5 hash of email (same as id)
|
|
407
|
+
name=email, # Full email as entity_key for LOOKUP
|
|
397
408
|
email=email,
|
|
398
409
|
role=user_role,
|
|
399
410
|
metadata=login_metadata,
|
|
@@ -23,6 +23,8 @@ Future:
|
|
|
23
23
|
import asyncio
|
|
24
24
|
import os
|
|
25
25
|
from typing import Any, Optional
|
|
26
|
+
import hashlib
|
|
27
|
+
import uuid
|
|
26
28
|
from uuid import uuid4
|
|
27
29
|
|
|
28
30
|
import httpx
|
|
@@ -108,6 +110,7 @@ class EmbeddingWorker:
|
|
|
108
110
|
self.task_queue: asyncio.Queue = asyncio.Queue()
|
|
109
111
|
self.workers: list[asyncio.Task] = []
|
|
110
112
|
self.running = False
|
|
113
|
+
self._in_flight_count = 0 # Track tasks being processed (not just in queue)
|
|
111
114
|
|
|
112
115
|
# Store API key for direct HTTP requests
|
|
113
116
|
from ...settings import settings
|
|
@@ -143,17 +146,18 @@ class EmbeddingWorker:
|
|
|
143
146
|
return
|
|
144
147
|
|
|
145
148
|
queue_size = self.task_queue.qsize()
|
|
146
|
-
|
|
149
|
+
in_flight = self._in_flight_count
|
|
150
|
+
logger.debug(f"Stopping EmbeddingWorker (queue={queue_size}, in_flight={in_flight})")
|
|
147
151
|
|
|
148
|
-
# Wait for queue to drain
|
|
152
|
+
# Wait for both queue to drain AND in-flight tasks to complete
|
|
149
153
|
max_wait = 30 # 30 seconds max
|
|
150
154
|
waited = 0.0
|
|
151
|
-
while not self.task_queue.empty() and waited < max_wait:
|
|
155
|
+
while (not self.task_queue.empty() or self._in_flight_count > 0) and waited < max_wait:
|
|
152
156
|
await asyncio.sleep(0.5)
|
|
153
157
|
waited += 0.5
|
|
154
158
|
|
|
155
|
-
if not self.task_queue.empty():
|
|
156
|
-
remaining = self.task_queue.qsize()
|
|
159
|
+
if not self.task_queue.empty() or self._in_flight_count > 0:
|
|
160
|
+
remaining = self.task_queue.qsize() + self._in_flight_count
|
|
157
161
|
logger.warning(
|
|
158
162
|
f"EmbeddingWorker timeout: {remaining} tasks remaining after {max_wait}s"
|
|
159
163
|
)
|
|
@@ -205,12 +209,18 @@ class EmbeddingWorker:
|
|
|
205
209
|
if not batch:
|
|
206
210
|
continue
|
|
207
211
|
|
|
208
|
-
|
|
212
|
+
# Track in-flight tasks
|
|
213
|
+
self._in_flight_count += len(batch)
|
|
209
214
|
|
|
210
|
-
|
|
211
|
-
await self._process_batch(batch)
|
|
215
|
+
logger.debug(f"Worker {worker_id} processing batch of {len(batch)} tasks")
|
|
212
216
|
|
|
213
|
-
|
|
217
|
+
try:
|
|
218
|
+
# Generate embeddings for batch
|
|
219
|
+
await self._process_batch(batch)
|
|
220
|
+
logger.debug(f"Worker {worker_id} completed batch")
|
|
221
|
+
finally:
|
|
222
|
+
# Always decrement in-flight count, even on error
|
|
223
|
+
self._in_flight_count -= len(batch)
|
|
214
224
|
|
|
215
225
|
except asyncio.CancelledError:
|
|
216
226
|
logger.debug(f"Worker {worker_id} cancelled")
|
|
@@ -373,7 +383,11 @@ class EmbeddingWorker:
|
|
|
373
383
|
for task, embedding in zip(tasks, embeddings):
|
|
374
384
|
table_name = f"embeddings_{task.table_name}"
|
|
375
385
|
|
|
376
|
-
#
|
|
386
|
+
# Generate deterministic ID from key fields (entity_id, field_name, provider)
|
|
387
|
+
key_string = f"{task.entity_id}:{task.field_name}:{task.provider}"
|
|
388
|
+
embedding_id = str(uuid.UUID(hashlib.md5(key_string.encode()).hexdigest()))
|
|
389
|
+
|
|
390
|
+
# Build upsert SQL - conflict on deterministic ID
|
|
377
391
|
sql = f"""
|
|
378
392
|
INSERT INTO {table_name} (
|
|
379
393
|
id,
|
|
@@ -386,7 +400,7 @@ class EmbeddingWorker:
|
|
|
386
400
|
updated_at
|
|
387
401
|
)
|
|
388
402
|
VALUES ($1, $2, $3, $4, $5, $6, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP)
|
|
389
|
-
ON CONFLICT (
|
|
403
|
+
ON CONFLICT (id)
|
|
390
404
|
DO UPDATE SET
|
|
391
405
|
model = EXCLUDED.model,
|
|
392
406
|
embedding = EXCLUDED.embedding,
|
|
@@ -400,7 +414,7 @@ class EmbeddingWorker:
|
|
|
400
414
|
await self.postgres_service.execute(
|
|
401
415
|
sql,
|
|
402
416
|
(
|
|
403
|
-
|
|
417
|
+
embedding_id,
|
|
404
418
|
task.entity_id,
|
|
405
419
|
task.field_name,
|
|
406
420
|
task.provider,
|
|
@@ -3,22 +3,47 @@ PostgreSQL service for CloudNativePG database operations.
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
from .diff_service import DiffService, SchemaDiff
|
|
6
|
+
from .programmable_diff_service import (
|
|
7
|
+
DiffResult,
|
|
8
|
+
ObjectDiff,
|
|
9
|
+
ObjectType,
|
|
10
|
+
ProgrammableDiffService,
|
|
11
|
+
)
|
|
6
12
|
from .repository import Repository
|
|
7
13
|
from .service import PostgresService
|
|
8
14
|
|
|
9
15
|
|
|
16
|
+
_postgres_instance: PostgresService | None = None
|
|
17
|
+
|
|
18
|
+
|
|
10
19
|
def get_postgres_service() -> PostgresService | None:
|
|
11
20
|
"""
|
|
12
|
-
Get PostgresService instance.
|
|
21
|
+
Get PostgresService singleton instance.
|
|
13
22
|
|
|
14
23
|
Returns None if Postgres is disabled.
|
|
24
|
+
Uses singleton pattern to prevent connection pool exhaustion.
|
|
15
25
|
"""
|
|
26
|
+
global _postgres_instance
|
|
27
|
+
|
|
16
28
|
from ...settings import settings
|
|
17
29
|
|
|
18
30
|
if not settings.postgres.enabled:
|
|
19
31
|
return None
|
|
20
32
|
|
|
21
|
-
|
|
33
|
+
if _postgres_instance is None:
|
|
34
|
+
_postgres_instance = PostgresService()
|
|
35
|
+
|
|
36
|
+
return _postgres_instance
|
|
22
37
|
|
|
23
38
|
|
|
24
|
-
__all__ = [
|
|
39
|
+
__all__ = [
|
|
40
|
+
"DiffResult",
|
|
41
|
+
"DiffService",
|
|
42
|
+
"ObjectDiff",
|
|
43
|
+
"ObjectType",
|
|
44
|
+
"PostgresService",
|
|
45
|
+
"ProgrammableDiffService",
|
|
46
|
+
"Repository",
|
|
47
|
+
"SchemaDiff",
|
|
48
|
+
"get_postgres_service",
|
|
49
|
+
]
|
|
@@ -5,12 +5,17 @@ Uses Alembic autogenerate to detect differences between:
|
|
|
5
5
|
- Target schema (derived from Pydantic models)
|
|
6
6
|
- Current database schema
|
|
7
7
|
|
|
8
|
+
Also compares programmable objects (functions, triggers, views) which
|
|
9
|
+
Alembic does not track.
|
|
10
|
+
|
|
8
11
|
This enables:
|
|
9
12
|
1. Local development: See what would change before applying migrations
|
|
10
13
|
2. CI validation: Detect drift between code and database (--check mode)
|
|
11
14
|
3. Migration generation: Create incremental migration files
|
|
12
15
|
"""
|
|
13
16
|
|
|
17
|
+
import asyncio
|
|
18
|
+
import re
|
|
14
19
|
from dataclasses import dataclass, field
|
|
15
20
|
from pathlib import Path
|
|
16
21
|
from typing import Optional
|
|
@@ -51,11 +56,14 @@ class SchemaDiff:
|
|
|
51
56
|
sql: str = ""
|
|
52
57
|
upgrade_ops: Optional[ops.UpgradeOps] = None
|
|
53
58
|
filtered_count: int = 0 # Number of operations filtered out by strategy
|
|
59
|
+
# Programmable objects (functions, triggers, views)
|
|
60
|
+
programmable_summary: list[str] = field(default_factory=list)
|
|
61
|
+
programmable_sql: str = ""
|
|
54
62
|
|
|
55
63
|
@property
|
|
56
64
|
def change_count(self) -> int:
|
|
57
65
|
"""Total number of detected changes."""
|
|
58
|
-
return len(self.summary)
|
|
66
|
+
return len(self.summary) + len(self.programmable_summary)
|
|
59
67
|
|
|
60
68
|
|
|
61
69
|
class DiffService:
|
|
@@ -127,10 +135,13 @@ class DiffService:
|
|
|
127
135
|
# These are now generated in pydantic_to_sqlalchemy
|
|
128
136
|
return True
|
|
129
137
|
|
|
130
|
-
def compute_diff(self) -> SchemaDiff:
|
|
138
|
+
def compute_diff(self, include_programmable: bool = True) -> SchemaDiff:
|
|
131
139
|
"""
|
|
132
140
|
Compare Pydantic models against database and return differences.
|
|
133
141
|
|
|
142
|
+
Args:
|
|
143
|
+
include_programmable: If True, also diff functions/triggers/views
|
|
144
|
+
|
|
134
145
|
Returns:
|
|
135
146
|
SchemaDiff with detected changes
|
|
136
147
|
"""
|
|
@@ -167,21 +178,62 @@ class DiffService:
|
|
|
167
178
|
for op in filtered_ops:
|
|
168
179
|
summary.extend(self._describe_operation(op))
|
|
169
180
|
|
|
170
|
-
has_changes = len(summary) > 0
|
|
171
|
-
|
|
172
181
|
# Generate SQL if there are changes
|
|
173
182
|
sql = ""
|
|
174
|
-
if
|
|
183
|
+
if summary and upgrade_ops:
|
|
175
184
|
sql = self._render_sql(upgrade_ops, engine)
|
|
176
185
|
|
|
186
|
+
# Programmable objects diff (functions, triggers, views)
|
|
187
|
+
programmable_summary = []
|
|
188
|
+
programmable_sql = ""
|
|
189
|
+
if include_programmable:
|
|
190
|
+
prog_summary, prog_sql = self._compute_programmable_diff()
|
|
191
|
+
programmable_summary = prog_summary
|
|
192
|
+
programmable_sql = prog_sql
|
|
193
|
+
|
|
194
|
+
has_changes = len(summary) > 0 or len(programmable_summary) > 0
|
|
195
|
+
|
|
177
196
|
return SchemaDiff(
|
|
178
197
|
has_changes=has_changes,
|
|
179
198
|
summary=summary,
|
|
180
199
|
sql=sql,
|
|
181
200
|
upgrade_ops=upgrade_ops,
|
|
182
201
|
filtered_count=filtered_count,
|
|
202
|
+
programmable_summary=programmable_summary,
|
|
203
|
+
programmable_sql=programmable_sql,
|
|
183
204
|
)
|
|
184
205
|
|
|
206
|
+
def _compute_programmable_diff(self) -> tuple[list[str], str]:
|
|
207
|
+
"""
|
|
208
|
+
Compute diff for programmable objects (functions, triggers, views).
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
Tuple of (summary_lines, sync_sql)
|
|
212
|
+
"""
|
|
213
|
+
from .programmable_diff_service import ProgrammableDiffService
|
|
214
|
+
|
|
215
|
+
service = ProgrammableDiffService()
|
|
216
|
+
|
|
217
|
+
# Run async diff in sync context
|
|
218
|
+
try:
|
|
219
|
+
loop = asyncio.get_event_loop()
|
|
220
|
+
except RuntimeError:
|
|
221
|
+
loop = asyncio.new_event_loop()
|
|
222
|
+
asyncio.set_event_loop(loop)
|
|
223
|
+
|
|
224
|
+
result = loop.run_until_complete(service.compute_diff())
|
|
225
|
+
|
|
226
|
+
summary = []
|
|
227
|
+
for diff in result.diffs:
|
|
228
|
+
if diff.status == "missing":
|
|
229
|
+
summary.append(f"+ {diff.object_type.value.upper()} {diff.name} (missing)")
|
|
230
|
+
elif diff.status == "different":
|
|
231
|
+
summary.append(f"~ {diff.object_type.value.upper()} {diff.name} (different)")
|
|
232
|
+
elif diff.status == "extra":
|
|
233
|
+
summary.append(f"- {diff.object_type.value.upper()} {diff.name} (extra in db)")
|
|
234
|
+
|
|
235
|
+
return summary, result.sync_sql
|
|
236
|
+
|
|
185
237
|
def _filter_operations(self, operations: list) -> tuple[list, int]:
|
|
186
238
|
"""
|
|
187
239
|
Filter operations based on migration strategy.
|