remdb 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +2 -0
- rem/agentic/README.md +650 -0
- rem/agentic/__init__.py +39 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +8 -0
- rem/agentic/context.py +148 -0
- rem/agentic/context_builder.py +329 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +107 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +151 -0
- rem/agentic/providers/phoenix.py +674 -0
- rem/agentic/providers/pydantic_ai.py +572 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +396 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +231 -0
- rem/api/README.md +420 -0
- rem/api/main.py +324 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +536 -0
- rem/api/mcp_router/server.py +213 -0
- rem/api/mcp_router/tools.py +584 -0
- rem/api/routers/auth.py +229 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/completions.py +281 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +124 -0
- rem/api/routers/chat/streaming.py +185 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +26 -0
- rem/auth/middleware.py +100 -0
- rem/auth/providers/__init__.py +13 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +455 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +126 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +565 -0
- rem/cli/commands/configure.py +423 -0
- rem/cli/commands/db.py +493 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1124 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +245 -0
- rem/cli/commands/schema.py +183 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +88 -0
- rem/config.py +237 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +64 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +628 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +243 -0
- rem/models/entities/__init__.py +43 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +35 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +191 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/user.py +85 -0
- rem/py.typed +0 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +128 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +16 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +806 -0
- rem/services/content/service.py +657 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +336 -0
- rem/services/dreaming/moment_service.py +264 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +120 -0
- rem/services/embeddings/worker.py +421 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +686 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +575 -0
- rem/services/postgres/__init__.py +23 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
- rem/services/postgres/register_type.py +352 -0
- rem/services/postgres/repository.py +337 -0
- rem/services/postgres/schema_generator.py +379 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +354 -0
- rem/services/rem/README.md +304 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +145 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +527 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +6 -0
- rem/services/session/compression.py +360 -0
- rem/services/session/reload.py +77 -0
- rem/settings.py +1235 -0
- rem/sql/002_install_models.sql +1068 -0
- rem/sql/background_indexes.sql +42 -0
- rem/sql/install_models.sql +1038 -0
- rem/sql/migrations/001_install.sql +503 -0
- rem/sql/migrations/002_install_models.sql +1202 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +583 -0
- rem/utils/__init__.py +43 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +423 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/markdown.py +16 -0
- rem/utils/model_helpers.py +236 -0
- rem/utils/schema_loader.py +229 -0
- rem/utils/sql_types.py +348 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +330 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +5 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- remdb-0.2.6.dist-info/METADATA +1191 -0
- remdb-0.2.6.dist-info/RECORD +187 -0
- remdb-0.2.6.dist-info/WHEEL +4 -0
- remdb-0.2.6.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
# Dreaming Services
|
|
2
|
+
|
|
3
|
+
REM memory indexing and insight extraction services.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
The dreaming services module provides modular, composable services for building the REM knowledge graph through:
|
|
8
|
+
|
|
9
|
+
- **User Model Updates** (`user_model_service.py`): Extract and update user profiles from activity
|
|
10
|
+
- **Moment Construction** (`moment_service.py`): Identify temporal narratives from resources
|
|
11
|
+
- **Resource Affinity** (`affinity_service.py`): Build semantic relationships between resources
|
|
12
|
+
- **Ontology Extraction** (`ontology_service.py`): Extract domain-specific structured knowledge from files (stub)
|
|
13
|
+
|
|
14
|
+
## Architecture
|
|
15
|
+
|
|
16
|
+
```
|
|
17
|
+
┌─────────────────────────────────────────────────────────────┐
|
|
18
|
+
│ DreamingWorker │
|
|
19
|
+
│ (Orchestrator) │
|
|
20
|
+
├─────────────────────────────────────────────────────────────┤
|
|
21
|
+
│ │
|
|
22
|
+
│ ┌───────────────┐ ┌───────────────┐ ┌───────────────┐ │
|
|
23
|
+
│ │ User Model │ │ Moment │ │ Resource │ │
|
|
24
|
+
│ │ Service │ │ Service │ │ Affinity │ │
|
|
25
|
+
│ └───────┬───────┘ └───────┬───────┘ └───────┬───────┘ │
|
|
26
|
+
│ │ │ │ │
|
|
27
|
+
│ └──────────────────┼──────────────────┘ │
|
|
28
|
+
│ │ │
|
|
29
|
+
│ ┌───────▼───────┐ │
|
|
30
|
+
│ │ PostgreSQL │ │
|
|
31
|
+
│ │ Repository │ │
|
|
32
|
+
│ │ REM Service │ │
|
|
33
|
+
│ └───────────────┘ │
|
|
34
|
+
└─────────────────────────────────────────────────────────────┘
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Services
|
|
38
|
+
|
|
39
|
+
### User Model Service
|
|
40
|
+
|
|
41
|
+
**File**: `user_model_service.py`
|
|
42
|
+
|
|
43
|
+
**Function**: `update_user_model(user_id, db, default_model, time_window_days, max_sessions, max_moments, max_resources)`
|
|
44
|
+
|
|
45
|
+
**Purpose**: Analyzes recent sessions, moments, and resources to generate comprehensive user profile summaries using LLM analysis.
|
|
46
|
+
|
|
47
|
+
**Process**:
|
|
48
|
+
1. Query PostgreSQL for recent sessions, moments, resources for this user
|
|
49
|
+
2. Load UserProfileBuilder agent schema
|
|
50
|
+
3. Generate user profile using LLM
|
|
51
|
+
4. Update User entity with profile data and metadata
|
|
52
|
+
5. Add graph edges to key resources and moments
|
|
53
|
+
|
|
54
|
+
**Returns**: Statistics about user model update (sessions analyzed, moments included, resources included, graph edges added, etc.)
|
|
55
|
+
|
|
56
|
+
### Moment Service
|
|
57
|
+
|
|
58
|
+
**File**: `moment_service.py`
|
|
59
|
+
|
|
60
|
+
**Function**: `construct_moments(user_id, db, default_model, lookback_hours, limit)`
|
|
61
|
+
|
|
62
|
+
**Purpose**: Analyzes recent resources and sessions to identify temporal narratives (meetings, coding sessions, conversations) and creates Moment entities.
|
|
63
|
+
|
|
64
|
+
**Process**:
|
|
65
|
+
1. Query PostgreSQL for recent resources and sessions for this user
|
|
66
|
+
2. Load MomentBuilder agent schema from filesystem
|
|
67
|
+
3. Run agent to extract moments from data
|
|
68
|
+
4. Create Moment entities via Repository
|
|
69
|
+
5. Link moments to source resources via graph edges
|
|
70
|
+
6. Embeddings auto-generated by embedding worker
|
|
71
|
+
|
|
72
|
+
**Returns**: Statistics about moment construction (resources queried, sessions queried, moments created, graph edges added, analysis summary)
|
|
73
|
+
|
|
74
|
+
### Affinity Service
|
|
75
|
+
|
|
76
|
+
**File**: `affinity_service.py`
|
|
77
|
+
|
|
78
|
+
**Function**: `build_affinity(user_id, db, mode, default_model, lookback_hours, limit, similarity_threshold, top_k)`
|
|
79
|
+
|
|
80
|
+
**Purpose**: Creates semantic relationships between resources using either vector similarity (fast) or LLM analysis (intelligent).
|
|
81
|
+
|
|
82
|
+
**Modes**:
|
|
83
|
+
- **SEMANTIC**: Fast vector similarity search via REM SEARCH query (cheap, fast)
|
|
84
|
+
- **LLM**: Intelligent LLM-based relationship assessment (expensive, slow)
|
|
85
|
+
|
|
86
|
+
**Process**:
|
|
87
|
+
1. Query PostgreSQL for recent resources for this user
|
|
88
|
+
2. For each resource:
|
|
89
|
+
- Semantic: Query similar resources by vector using REM SEARCH
|
|
90
|
+
- LLM: Assess relationships using ResourceAffinityAssessor agent
|
|
91
|
+
3. Create graph edges with deduplication (keep highest weight)
|
|
92
|
+
4. Update resource entities with affinity edges
|
|
93
|
+
|
|
94
|
+
**Returns**: Statistics about affinity construction (resources processed, edges created, LLM calls made)
|
|
95
|
+
|
|
96
|
+
### Ontology Service
|
|
97
|
+
|
|
98
|
+
**File**: `ontology_service.py`
|
|
99
|
+
|
|
100
|
+
**Function**: `extract_ontologies(user_id, lookback_hours, limit)`
|
|
101
|
+
|
|
102
|
+
**Purpose**: Extract domain-specific knowledge from files using custom agents (stub - not yet implemented).
|
|
103
|
+
|
|
104
|
+
**Returns**: Statistics about ontology extraction (files queried, configs matched, ontologies created, embeddings generated, agent calls made)
|
|
105
|
+
|
|
106
|
+
## Utilities
|
|
107
|
+
|
|
108
|
+
**File**: `utils.py`
|
|
109
|
+
|
|
110
|
+
**Function**: `merge_graph_edges(existing_edges, new_edges)`
|
|
111
|
+
|
|
112
|
+
**Purpose**: Merge graph edges with deduplication. Keeps highest weight edge for each (dst, rel_type) pair.
|
|
113
|
+
|
|
114
|
+
## Usage
|
|
115
|
+
|
|
116
|
+
### Standalone Service Usage
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
from rem.services.dreaming import (
|
|
120
|
+
update_user_model,
|
|
121
|
+
construct_moments,
|
|
122
|
+
build_affinity,
|
|
123
|
+
extract_ontologies,
|
|
124
|
+
AffinityMode,
|
|
125
|
+
)
|
|
126
|
+
from rem.services.postgres import get_postgres_service
|
|
127
|
+
|
|
128
|
+
# Initialize database connection
|
|
129
|
+
db = get_postgres_service()
|
|
130
|
+
await db.connect()
|
|
131
|
+
|
|
132
|
+
try:
|
|
133
|
+
# Update user model from recent activity
|
|
134
|
+
result = await update_user_model(
|
|
135
|
+
user_id="user-123",
|
|
136
|
+
db=db,
|
|
137
|
+
default_model="gpt-4o",
|
|
138
|
+
time_window_days=30,
|
|
139
|
+
)
|
|
140
|
+
print(f"User model updated: {result}")
|
|
141
|
+
|
|
142
|
+
# Extract moments from resources
|
|
143
|
+
result = await construct_moments(
|
|
144
|
+
user_id="user-123",
|
|
145
|
+
db=db,
|
|
146
|
+
default_model="gpt-4o",
|
|
147
|
+
lookback_hours=24,
|
|
148
|
+
)
|
|
149
|
+
print(f"Moments created: {result['moments_created']}")
|
|
150
|
+
|
|
151
|
+
# Build resource affinity (semantic mode)
|
|
152
|
+
result = await build_affinity(
|
|
153
|
+
user_id="user-123",
|
|
154
|
+
db=db,
|
|
155
|
+
mode=AffinityMode.SEMANTIC,
|
|
156
|
+
default_model="gpt-4o",
|
|
157
|
+
lookback_hours=168,
|
|
158
|
+
)
|
|
159
|
+
print(f"Edges created: {result['edges_created']}")
|
|
160
|
+
|
|
161
|
+
finally:
|
|
162
|
+
await db.disconnect()
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
### Orchestrated Usage via DreamingWorker
|
|
166
|
+
|
|
167
|
+
```python
|
|
168
|
+
from rem.workers.dreaming import DreamingWorker
|
|
169
|
+
|
|
170
|
+
worker = DreamingWorker(
|
|
171
|
+
rem_api_url="http://rem-api:8000",
|
|
172
|
+
default_model="gpt-4o",
|
|
173
|
+
lookback_hours=24,
|
|
174
|
+
)
|
|
175
|
+
|
|
176
|
+
try:
|
|
177
|
+
# Run complete dreaming workflow
|
|
178
|
+
results = await worker.process_full(
|
|
179
|
+
user_id="user-123",
|
|
180
|
+
use_llm_affinity=False,
|
|
181
|
+
lookback_hours=24,
|
|
182
|
+
)
|
|
183
|
+
print(results)
|
|
184
|
+
finally:
|
|
185
|
+
await worker.close()
|
|
186
|
+
```
|
|
187
|
+
|
|
188
|
+
## Design Principles
|
|
189
|
+
|
|
190
|
+
1. **Modularity**: Each service is independent and can be used standalone
|
|
191
|
+
2. **Composability**: Services can be composed together in custom workflows
|
|
192
|
+
3. **DRY**: Shared utilities extracted to utils.py
|
|
193
|
+
4. **Delegation**: DreamingWorker delegates to services, acting as thin orchestrator
|
|
194
|
+
5. **Database Connection Management**: Caller manages database connection lifecycle
|
|
195
|
+
6. **Error Handling**: Services return statistics dicts with status field
|
|
196
|
+
7. **User-ID First**: All operations scoped by user_id (primary identifier)
|
|
197
|
+
|
|
198
|
+
## File Structure
|
|
199
|
+
|
|
200
|
+
```
|
|
201
|
+
rem/src/rem/services/dreaming/
|
|
202
|
+
├── __init__.py # Public API facade
|
|
203
|
+
├── README.md # This file
|
|
204
|
+
├── user_model_service.py # User profile updates (260 lines)
|
|
205
|
+
├── moment_service.py # Temporal narrative extraction (260 lines)
|
|
206
|
+
├── affinity_service.py # Resource relationship building (320 lines)
|
|
207
|
+
├── ontology_service.py # Domain knowledge extraction (45 lines, stub)
|
|
208
|
+
└── utils.py # Shared utilities (graph edge merging)
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
## Refactoring Benefits
|
|
212
|
+
|
|
213
|
+
**Before**: Single 1,297-line monolithic `workers/dreaming.py` file
|
|
214
|
+
|
|
215
|
+
**After**:
|
|
216
|
+
- 5 focused service modules (~900 lines total)
|
|
217
|
+
- 1 thin orchestrator (~400 lines)
|
|
218
|
+
- Improved testability (each service can be tested independently)
|
|
219
|
+
- Better separation of concerns
|
|
220
|
+
- Easier to extend (add new services without modifying orchestrator)
|
|
221
|
+
- Reusable services (can be used in other workflows)
|
|
222
|
+
|
|
223
|
+
## Future Enhancements
|
|
224
|
+
|
|
225
|
+
1. **Implement Ontology Service**: Complete the stub implementation
|
|
226
|
+
2. **Add Service-Level Caching**: Cache agent schemas and LLM responses
|
|
227
|
+
3. **Batch Operations**: Optimize database operations with batching
|
|
228
|
+
4. **Parallelization**: Run independent services concurrently
|
|
229
|
+
5. **Metrics and Tracing**: Add OpenTelemetry instrumentation
|
|
230
|
+
6. **Service Configuration**: Extract hardcoded values to configuration
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Dreaming Services - REM memory indexing and insight extraction.
|
|
3
|
+
|
|
4
|
+
This module provides services for building the REM knowledge graph through:
|
|
5
|
+
- User model updates: Extract and update user profiles from activity
|
|
6
|
+
- Moment construction: Identify temporal narratives from resources
|
|
7
|
+
- Resource affinity: Build semantic relationships between resources
|
|
8
|
+
- Ontology extraction: Extract domain-specific structured knowledge from files
|
|
9
|
+
|
|
10
|
+
Each service is designed to be used independently or composed together
|
|
11
|
+
in the DreamingWorker orchestrator for complete memory indexing workflows.
|
|
12
|
+
|
|
13
|
+
Usage:
|
|
14
|
+
from rem.services.dreaming import (
|
|
15
|
+
update_user_model,
|
|
16
|
+
construct_moments,
|
|
17
|
+
build_affinity,
|
|
18
|
+
extract_ontologies,
|
|
19
|
+
AffinityMode,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
# Update user model from recent activity
|
|
23
|
+
result = await update_user_model(user_id="user-123", db=db)
|
|
24
|
+
|
|
25
|
+
# Extract moments from resources
|
|
26
|
+
result = await construct_moments(user_id="user-123", db=db, lookback_hours=24)
|
|
27
|
+
|
|
28
|
+
# Build resource affinity (semantic mode)
|
|
29
|
+
result = await build_affinity(
|
|
30
|
+
user_id="user-123",
|
|
31
|
+
db=db,
|
|
32
|
+
mode=AffinityMode.SEMANTIC,
|
|
33
|
+
lookback_hours=168,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
# Extract ontologies (stub - not yet implemented)
|
|
37
|
+
result = await extract_ontologies(user_id="user-123", lookback_hours=24)
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
from .affinity_service import AffinityMode, build_affinity
|
|
41
|
+
from .moment_service import construct_moments
|
|
42
|
+
from .ontology_service import extract_ontologies
|
|
43
|
+
from .user_model_service import update_user_model
|
|
44
|
+
from .utils import merge_graph_edges
|
|
45
|
+
|
|
46
|
+
__all__ = [
|
|
47
|
+
"update_user_model",
|
|
48
|
+
"construct_moments",
|
|
49
|
+
"build_affinity",
|
|
50
|
+
"extract_ontologies",
|
|
51
|
+
"AffinityMode",
|
|
52
|
+
"merge_graph_edges",
|
|
53
|
+
]
|
|
@@ -0,0 +1,336 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Affinity Service - Builds resource relationship graph.
|
|
3
|
+
|
|
4
|
+
Creates semantic relationships between resources using either
|
|
5
|
+
vector similarity (fast) or LLM analysis (intelligent).
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
from datetime import datetime, timedelta
|
|
10
|
+
from enum import Enum
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any, Optional
|
|
13
|
+
|
|
14
|
+
import yaml
|
|
15
|
+
from loguru import logger
|
|
16
|
+
|
|
17
|
+
from ...agentic.providers.pydantic_ai import create_agent
|
|
18
|
+
from ...agentic.serialization import serialize_agent_result
|
|
19
|
+
from ...models.core import QueryType, RemQuery, SearchParameters
|
|
20
|
+
from ...models.entities.resource import Resource
|
|
21
|
+
from ...services.postgres.repository import Repository
|
|
22
|
+
from ...services.postgres.service import PostgresService
|
|
23
|
+
from ...services.rem.service import RemService
|
|
24
|
+
from .utils import merge_graph_edges
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
class AffinityMode(str, Enum):
|
|
28
|
+
"""Resource affinity modes."""
|
|
29
|
+
|
|
30
|
+
SEMANTIC = "semantic" # Fast vector similarity
|
|
31
|
+
LLM = "llm" # Intelligent LLM-based assessment
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
async def build_affinity(
|
|
35
|
+
user_id: str,
|
|
36
|
+
db: PostgresService,
|
|
37
|
+
mode: AffinityMode = AffinityMode.SEMANTIC,
|
|
38
|
+
default_model: str = "gpt-4o",
|
|
39
|
+
lookback_hours: int = 24,
|
|
40
|
+
limit: Optional[int] = None,
|
|
41
|
+
similarity_threshold: float = 0.7,
|
|
42
|
+
top_k: int = 3,
|
|
43
|
+
) -> dict[str, Any]:
|
|
44
|
+
"""
|
|
45
|
+
Build resource affinity graph.
|
|
46
|
+
|
|
47
|
+
Creates semantic relationships between resources using either
|
|
48
|
+
vector similarity (fast) or LLM analysis (intelligent).
|
|
49
|
+
|
|
50
|
+
Semantic Mode:
|
|
51
|
+
- Use vector similarity search via REM SEARCH query
|
|
52
|
+
- Create edges for similar resources (threshold: 0.7)
|
|
53
|
+
- Fast and cheap (no LLM calls)
|
|
54
|
+
|
|
55
|
+
LLM Mode:
|
|
56
|
+
- Use LLM to assess relationship context
|
|
57
|
+
- Create edges with rich metadata
|
|
58
|
+
- Slow and expensive (many LLM calls)
|
|
59
|
+
- ALWAYS use --limit to control costs
|
|
60
|
+
|
|
61
|
+
Process:
|
|
62
|
+
1. Query PostgreSQL for recent resources for this user
|
|
63
|
+
2. For each resource:
|
|
64
|
+
- Semantic: Query similar resources by vector using REM SEARCH
|
|
65
|
+
- LLM: Assess relationships using ResourceAffinityAssessor agent
|
|
66
|
+
3. Create graph edges with deduplication (keep highest weight)
|
|
67
|
+
4. Update resource entities with affinity edges
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
user_id: User to process
|
|
71
|
+
db: Database service (already connected)
|
|
72
|
+
mode: Affinity mode (semantic or llm)
|
|
73
|
+
default_model: LLM model for analysis (default: gpt-4o)
|
|
74
|
+
lookback_hours: Hours to look back (default: 24)
|
|
75
|
+
limit: Max resources to process (REQUIRED for LLM mode)
|
|
76
|
+
similarity_threshold: Minimum similarity score for semantic mode (default: 0.7)
|
|
77
|
+
top_k: Number of similar resources to find per resource (default: 3)
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
Statistics about affinity construction
|
|
81
|
+
"""
|
|
82
|
+
cutoff = datetime.utcnow() - timedelta(hours=lookback_hours)
|
|
83
|
+
|
|
84
|
+
# Create repositories and REM service
|
|
85
|
+
resource_repo = Repository(Resource, "resources", db=db)
|
|
86
|
+
rem_service = RemService(postgres_service=db)
|
|
87
|
+
|
|
88
|
+
# Register Resource model for REM queries
|
|
89
|
+
rem_service.register_model("resources", Resource)
|
|
90
|
+
|
|
91
|
+
# Query recent resources
|
|
92
|
+
resources = await resource_repo.find(
|
|
93
|
+
filters={
|
|
94
|
+
"user_id": user_id,
|
|
95
|
+
},
|
|
96
|
+
order_by="created_at DESC",
|
|
97
|
+
limit=limit,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
# Filter by timestamp
|
|
101
|
+
resources = [
|
|
102
|
+
r for r in resources if r.created_at and r.created_at >= cutoff
|
|
103
|
+
]
|
|
104
|
+
|
|
105
|
+
if not resources:
|
|
106
|
+
return {
|
|
107
|
+
"user_id": user_id,
|
|
108
|
+
"mode": mode.value,
|
|
109
|
+
"lookback_hours": lookback_hours,
|
|
110
|
+
"resources_processed": 0,
|
|
111
|
+
"edges_created": 0,
|
|
112
|
+
"llm_calls_made": 0 if mode == AffinityMode.LLM else None,
|
|
113
|
+
"status": "no_data",
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
logger.info(
|
|
117
|
+
f"Building affinity for {len(resources)} resources in {mode.value} mode"
|
|
118
|
+
)
|
|
119
|
+
|
|
120
|
+
# Statistics tracking
|
|
121
|
+
resources_processed = 0
|
|
122
|
+
total_edges_created = 0
|
|
123
|
+
llm_calls_made = 0
|
|
124
|
+
|
|
125
|
+
# Load LLM agent for relationship assessment if needed
|
|
126
|
+
affinity_agent = None
|
|
127
|
+
if mode == AffinityMode.LLM:
|
|
128
|
+
schema_path = (
|
|
129
|
+
Path(__file__).parent.parent.parent
|
|
130
|
+
/ "schemas"
|
|
131
|
+
/ "agents"
|
|
132
|
+
/ "resource-affinity-assessor.yaml"
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
if not schema_path.exists():
|
|
136
|
+
raise FileNotFoundError(
|
|
137
|
+
f"ResourceAffinityAssessor schema not found: {schema_path}"
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
with open(schema_path) as f:
|
|
141
|
+
agent_schema = yaml.safe_load(f)
|
|
142
|
+
|
|
143
|
+
affinity_agent_runtime = await create_agent(
|
|
144
|
+
agent_schema_override=agent_schema,
|
|
145
|
+
model_override=default_model, # type: ignore[arg-type]
|
|
146
|
+
)
|
|
147
|
+
affinity_agent = affinity_agent_runtime.agent
|
|
148
|
+
|
|
149
|
+
# Process each resource
|
|
150
|
+
for resource in resources:
|
|
151
|
+
if not resource.content:
|
|
152
|
+
logger.debug(f"Skipping resource {resource.id} - no content for embedding")
|
|
153
|
+
continue
|
|
154
|
+
|
|
155
|
+
# Find similar resources
|
|
156
|
+
similar_resources = []
|
|
157
|
+
|
|
158
|
+
if mode == AffinityMode.SEMANTIC:
|
|
159
|
+
# Use REM SEARCH for vector similarity
|
|
160
|
+
try:
|
|
161
|
+
search_query = RemQuery(
|
|
162
|
+
query_type=QueryType.SEARCH,
|
|
163
|
+
user_id=user_id,
|
|
164
|
+
parameters=SearchParameters(
|
|
165
|
+
table_name="resources",
|
|
166
|
+
query_text=resource.content[:1000], # Use first 1000 chars
|
|
167
|
+
limit=top_k + 1, # +1 to exclude self
|
|
168
|
+
min_similarity=similarity_threshold,
|
|
169
|
+
),
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
search_result = await rem_service.execute_query(search_query)
|
|
173
|
+
candidates = search_result.get("results", [])
|
|
174
|
+
|
|
175
|
+
# Filter out self and collect similar resources
|
|
176
|
+
# Note: SEARCH query returns {entity_type, similarity_score, data (JSONB)}
|
|
177
|
+
for candidate in candidates:
|
|
178
|
+
candidate_data = candidate.get("data", {})
|
|
179
|
+
candidate_id = candidate_data.get("id")
|
|
180
|
+
|
|
181
|
+
if candidate_id and candidate_id != str(resource.id):
|
|
182
|
+
similar_resources.append(
|
|
183
|
+
{
|
|
184
|
+
"resource": next(
|
|
185
|
+
(r for r in resources if str(r.id) == candidate_id),
|
|
186
|
+
None,
|
|
187
|
+
),
|
|
188
|
+
"similarity_score": candidate.get("similarity_score", 0.0),
|
|
189
|
+
"relationship_type": "semantic_similar",
|
|
190
|
+
"relationship_strength": "moderate",
|
|
191
|
+
"edge_labels": [],
|
|
192
|
+
}
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
except Exception as e:
|
|
196
|
+
logger.warning(
|
|
197
|
+
f"Vector search failed for resource {resource.id}: {e}"
|
|
198
|
+
)
|
|
199
|
+
continue
|
|
200
|
+
|
|
201
|
+
elif mode == AffinityMode.LLM:
|
|
202
|
+
# Use LLM to assess relationships with all other resources
|
|
203
|
+
assert affinity_agent is not None, "Agent must be initialized in LLM mode"
|
|
204
|
+
for other_resource in resources:
|
|
205
|
+
if other_resource.id == resource.id:
|
|
206
|
+
continue
|
|
207
|
+
|
|
208
|
+
# Prepare input for agent
|
|
209
|
+
input_data = {
|
|
210
|
+
"resource_a": {
|
|
211
|
+
"id": str(resource.id),
|
|
212
|
+
"name": resource.name,
|
|
213
|
+
"category": resource.category,
|
|
214
|
+
"content": resource.content[:2000], # Limit for token efficiency
|
|
215
|
+
"created_at": (
|
|
216
|
+
resource.created_at.isoformat()
|
|
217
|
+
if resource.created_at
|
|
218
|
+
else None
|
|
219
|
+
),
|
|
220
|
+
},
|
|
221
|
+
"resource_b": {
|
|
222
|
+
"id": str(other_resource.id),
|
|
223
|
+
"name": other_resource.name,
|
|
224
|
+
"category": other_resource.category,
|
|
225
|
+
"content": other_resource.content[:2000],
|
|
226
|
+
"created_at": (
|
|
227
|
+
other_resource.created_at.isoformat()
|
|
228
|
+
if other_resource.created_at
|
|
229
|
+
else None
|
|
230
|
+
),
|
|
231
|
+
},
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
# Run agent
|
|
235
|
+
result = await affinity_agent.run(json.dumps(input_data, indent=2))
|
|
236
|
+
llm_calls_made += 1
|
|
237
|
+
|
|
238
|
+
# Serialize result
|
|
239
|
+
assessment = serialize_agent_result(result.output)
|
|
240
|
+
|
|
241
|
+
# Type guard: ensure we have a dict
|
|
242
|
+
if not isinstance(assessment, dict):
|
|
243
|
+
logger.warning(f"Expected dict from affinity agent, got {type(assessment)}")
|
|
244
|
+
continue
|
|
245
|
+
|
|
246
|
+
# If relationship exists, add to similar resources
|
|
247
|
+
if assessment.get("relationship_exists"):
|
|
248
|
+
# Map strength to weight
|
|
249
|
+
strength_to_weight = {
|
|
250
|
+
"strong": 0.9,
|
|
251
|
+
"moderate": 0.7,
|
|
252
|
+
"weak": 0.4,
|
|
253
|
+
}
|
|
254
|
+
weight = strength_to_weight.get(
|
|
255
|
+
assessment.get("relationship_strength", "moderate"), 0.7
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
similar_resources.append(
|
|
259
|
+
{
|
|
260
|
+
"resource": other_resource,
|
|
261
|
+
"similarity_score": weight,
|
|
262
|
+
"relationship_type": assessment.get(
|
|
263
|
+
"relationship_type", "related"
|
|
264
|
+
),
|
|
265
|
+
"relationship_strength": assessment.get(
|
|
266
|
+
"relationship_strength", "moderate"
|
|
267
|
+
),
|
|
268
|
+
"edge_labels": assessment.get("edge_labels", []),
|
|
269
|
+
"reasoning": assessment.get("reasoning", ""),
|
|
270
|
+
}
|
|
271
|
+
)
|
|
272
|
+
|
|
273
|
+
# Limit LLM comparisons to top_k
|
|
274
|
+
if len(similar_resources) >= top_k:
|
|
275
|
+
break
|
|
276
|
+
|
|
277
|
+
# Create graph edges for similar resources
|
|
278
|
+
new_edges = []
|
|
279
|
+
for similar in similar_resources[:top_k]:
|
|
280
|
+
if not similar["resource"]:
|
|
281
|
+
continue
|
|
282
|
+
|
|
283
|
+
# Map similarity score to weight
|
|
284
|
+
if mode == AffinityMode.SEMANTIC:
|
|
285
|
+
# Semantic mode: map similarity score directly
|
|
286
|
+
weight = min(similar["similarity_score"], 1.0)
|
|
287
|
+
else:
|
|
288
|
+
# LLM mode: use assessed weight
|
|
289
|
+
weight = similar["similarity_score"]
|
|
290
|
+
|
|
291
|
+
# Create InlineEdge
|
|
292
|
+
edge = {
|
|
293
|
+
"dst": str(similar["resource"].id), # Convert UUID to string
|
|
294
|
+
"rel_type": similar["relationship_type"],
|
|
295
|
+
"weight": weight,
|
|
296
|
+
"properties": {
|
|
297
|
+
"entity_type": "resource",
|
|
298
|
+
"dst_name": similar["resource"].name,
|
|
299
|
+
"dst_category": similar["resource"].category,
|
|
300
|
+
"match_type": mode.value,
|
|
301
|
+
"similarity_score": similar["similarity_score"],
|
|
302
|
+
"relationship_strength": similar.get("relationship_strength"),
|
|
303
|
+
"edge_labels": similar.get("edge_labels", []),
|
|
304
|
+
"reasoning": similar.get("reasoning", ""),
|
|
305
|
+
},
|
|
306
|
+
"created_at": datetime.utcnow().isoformat(),
|
|
307
|
+
}
|
|
308
|
+
new_edges.append(edge)
|
|
309
|
+
|
|
310
|
+
# Merge with existing edges (deduplication: keep highest weight)
|
|
311
|
+
existing_edges = resource.graph_edges or []
|
|
312
|
+
merged_edges = merge_graph_edges(existing_edges, new_edges)
|
|
313
|
+
|
|
314
|
+
# Update resource with merged edges
|
|
315
|
+
resource.graph_edges = merged_edges
|
|
316
|
+
await resource_repo.upsert(resource)
|
|
317
|
+
|
|
318
|
+
resources_processed += 1
|
|
319
|
+
edges_added = len(new_edges)
|
|
320
|
+
total_edges_created += edges_added
|
|
321
|
+
|
|
322
|
+
logger.debug(
|
|
323
|
+
f"Processed resource {resource.id} ({resource.name}): "
|
|
324
|
+
f"found {len(similar_resources)} similar resources, "
|
|
325
|
+
f"added {edges_added} edges"
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
return {
|
|
329
|
+
"user_id": user_id,
|
|
330
|
+
"mode": mode.value,
|
|
331
|
+
"lookback_hours": lookback_hours,
|
|
332
|
+
"resources_processed": resources_processed,
|
|
333
|
+
"edges_created": total_edges_created,
|
|
334
|
+
"llm_calls_made": llm_calls_made if mode == AffinityMode.LLM else None,
|
|
335
|
+
"status": "success",
|
|
336
|
+
}
|