remdb 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. rem/__init__.py +2 -0
  2. rem/agentic/README.md +650 -0
  3. rem/agentic/__init__.py +39 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +8 -0
  6. rem/agentic/context.py +148 -0
  7. rem/agentic/context_builder.py +329 -0
  8. rem/agentic/mcp/__init__.py +0 -0
  9. rem/agentic/mcp/tool_wrapper.py +107 -0
  10. rem/agentic/otel/__init__.py +5 -0
  11. rem/agentic/otel/setup.py +151 -0
  12. rem/agentic/providers/phoenix.py +674 -0
  13. rem/agentic/providers/pydantic_ai.py +572 -0
  14. rem/agentic/query.py +117 -0
  15. rem/agentic/query_helper.py +89 -0
  16. rem/agentic/schema.py +396 -0
  17. rem/agentic/serialization.py +245 -0
  18. rem/agentic/tools/__init__.py +5 -0
  19. rem/agentic/tools/rem_tools.py +231 -0
  20. rem/api/README.md +420 -0
  21. rem/api/main.py +324 -0
  22. rem/api/mcp_router/prompts.py +182 -0
  23. rem/api/mcp_router/resources.py +536 -0
  24. rem/api/mcp_router/server.py +213 -0
  25. rem/api/mcp_router/tools.py +584 -0
  26. rem/api/routers/auth.py +229 -0
  27. rem/api/routers/chat/__init__.py +5 -0
  28. rem/api/routers/chat/completions.py +281 -0
  29. rem/api/routers/chat/json_utils.py +76 -0
  30. rem/api/routers/chat/models.py +124 -0
  31. rem/api/routers/chat/streaming.py +185 -0
  32. rem/auth/README.md +258 -0
  33. rem/auth/__init__.py +26 -0
  34. rem/auth/middleware.py +100 -0
  35. rem/auth/providers/__init__.py +13 -0
  36. rem/auth/providers/base.py +376 -0
  37. rem/auth/providers/google.py +163 -0
  38. rem/auth/providers/microsoft.py +237 -0
  39. rem/cli/README.md +455 -0
  40. rem/cli/__init__.py +8 -0
  41. rem/cli/commands/README.md +126 -0
  42. rem/cli/commands/__init__.py +3 -0
  43. rem/cli/commands/ask.py +566 -0
  44. rem/cli/commands/configure.py +497 -0
  45. rem/cli/commands/db.py +493 -0
  46. rem/cli/commands/dreaming.py +324 -0
  47. rem/cli/commands/experiments.py +1302 -0
  48. rem/cli/commands/mcp.py +66 -0
  49. rem/cli/commands/process.py +245 -0
  50. rem/cli/commands/schema.py +183 -0
  51. rem/cli/commands/serve.py +106 -0
  52. rem/cli/dreaming.py +363 -0
  53. rem/cli/main.py +96 -0
  54. rem/config.py +237 -0
  55. rem/mcp_server.py +41 -0
  56. rem/models/core/__init__.py +49 -0
  57. rem/models/core/core_model.py +64 -0
  58. rem/models/core/engram.py +333 -0
  59. rem/models/core/experiment.py +628 -0
  60. rem/models/core/inline_edge.py +132 -0
  61. rem/models/core/rem_query.py +243 -0
  62. rem/models/entities/__init__.py +43 -0
  63. rem/models/entities/file.py +57 -0
  64. rem/models/entities/image_resource.py +88 -0
  65. rem/models/entities/message.py +35 -0
  66. rem/models/entities/moment.py +123 -0
  67. rem/models/entities/ontology.py +191 -0
  68. rem/models/entities/ontology_config.py +131 -0
  69. rem/models/entities/resource.py +95 -0
  70. rem/models/entities/schema.py +87 -0
  71. rem/models/entities/user.py +85 -0
  72. rem/py.typed +0 -0
  73. rem/schemas/README.md +507 -0
  74. rem/schemas/__init__.py +6 -0
  75. rem/schemas/agents/README.md +92 -0
  76. rem/schemas/agents/core/moment-builder.yaml +178 -0
  77. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  78. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  79. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  80. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  81. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  82. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  83. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  84. rem/schemas/agents/examples/hello-world.yaml +37 -0
  85. rem/schemas/agents/examples/query.yaml +54 -0
  86. rem/schemas/agents/examples/simple.yaml +21 -0
  87. rem/schemas/agents/examples/test.yaml +29 -0
  88. rem/schemas/agents/rem.yaml +128 -0
  89. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  90. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  91. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  92. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  93. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  94. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  95. rem/services/__init__.py +16 -0
  96. rem/services/audio/INTEGRATION.md +308 -0
  97. rem/services/audio/README.md +376 -0
  98. rem/services/audio/__init__.py +15 -0
  99. rem/services/audio/chunker.py +354 -0
  100. rem/services/audio/transcriber.py +259 -0
  101. rem/services/content/README.md +1269 -0
  102. rem/services/content/__init__.py +5 -0
  103. rem/services/content/providers.py +801 -0
  104. rem/services/content/service.py +676 -0
  105. rem/services/dreaming/README.md +230 -0
  106. rem/services/dreaming/__init__.py +53 -0
  107. rem/services/dreaming/affinity_service.py +336 -0
  108. rem/services/dreaming/moment_service.py +264 -0
  109. rem/services/dreaming/ontology_service.py +54 -0
  110. rem/services/dreaming/user_model_service.py +297 -0
  111. rem/services/dreaming/utils.py +39 -0
  112. rem/services/embeddings/__init__.py +11 -0
  113. rem/services/embeddings/api.py +120 -0
  114. rem/services/embeddings/worker.py +421 -0
  115. rem/services/fs/README.md +662 -0
  116. rem/services/fs/__init__.py +62 -0
  117. rem/services/fs/examples.py +206 -0
  118. rem/services/fs/examples_paths.py +204 -0
  119. rem/services/fs/git_provider.py +935 -0
  120. rem/services/fs/local_provider.py +760 -0
  121. rem/services/fs/parsing-hooks-examples.md +172 -0
  122. rem/services/fs/paths.py +276 -0
  123. rem/services/fs/provider.py +460 -0
  124. rem/services/fs/s3_provider.py +1042 -0
  125. rem/services/fs/service.py +186 -0
  126. rem/services/git/README.md +1075 -0
  127. rem/services/git/__init__.py +17 -0
  128. rem/services/git/service.py +469 -0
  129. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  130. rem/services/phoenix/README.md +453 -0
  131. rem/services/phoenix/__init__.py +46 -0
  132. rem/services/phoenix/client.py +686 -0
  133. rem/services/phoenix/config.py +88 -0
  134. rem/services/phoenix/prompt_labels.py +477 -0
  135. rem/services/postgres/README.md +575 -0
  136. rem/services/postgres/__init__.py +23 -0
  137. rem/services/postgres/migration_service.py +427 -0
  138. rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
  139. rem/services/postgres/register_type.py +352 -0
  140. rem/services/postgres/repository.py +337 -0
  141. rem/services/postgres/schema_generator.py +379 -0
  142. rem/services/postgres/service.py +802 -0
  143. rem/services/postgres/sql_builder.py +354 -0
  144. rem/services/rem/README.md +304 -0
  145. rem/services/rem/__init__.py +23 -0
  146. rem/services/rem/exceptions.py +71 -0
  147. rem/services/rem/executor.py +293 -0
  148. rem/services/rem/parser.py +145 -0
  149. rem/services/rem/queries.py +196 -0
  150. rem/services/rem/query.py +371 -0
  151. rem/services/rem/service.py +527 -0
  152. rem/services/session/README.md +374 -0
  153. rem/services/session/__init__.py +6 -0
  154. rem/services/session/compression.py +360 -0
  155. rem/services/session/reload.py +77 -0
  156. rem/settings.py +1235 -0
  157. rem/sql/002_install_models.sql +1068 -0
  158. rem/sql/background_indexes.sql +42 -0
  159. rem/sql/install_models.sql +1038 -0
  160. rem/sql/migrations/001_install.sql +503 -0
  161. rem/sql/migrations/002_install_models.sql +1202 -0
  162. rem/utils/AGENTIC_CHUNKING.md +597 -0
  163. rem/utils/README.md +583 -0
  164. rem/utils/__init__.py +43 -0
  165. rem/utils/agentic_chunking.py +622 -0
  166. rem/utils/batch_ops.py +343 -0
  167. rem/utils/chunking.py +108 -0
  168. rem/utils/clip_embeddings.py +276 -0
  169. rem/utils/dict_utils.py +98 -0
  170. rem/utils/embeddings.py +423 -0
  171. rem/utils/examples/embeddings_example.py +305 -0
  172. rem/utils/examples/sql_types_example.py +202 -0
  173. rem/utils/markdown.py +16 -0
  174. rem/utils/model_helpers.py +236 -0
  175. rem/utils/schema_loader.py +336 -0
  176. rem/utils/sql_types.py +348 -0
  177. rem/utils/user_id.py +81 -0
  178. rem/utils/vision.py +330 -0
  179. rem/workers/README.md +506 -0
  180. rem/workers/__init__.py +5 -0
  181. rem/workers/dreaming.py +502 -0
  182. rem/workers/engram_processor.py +312 -0
  183. rem/workers/sqs_file_processor.py +193 -0
  184. remdb-0.3.7.dist-info/METADATA +1473 -0
  185. remdb-0.3.7.dist-info/RECORD +187 -0
  186. remdb-0.3.7.dist-info/WHEEL +4 -0
  187. remdb-0.3.7.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,230 @@
1
+ # Dreaming Services
2
+
3
+ REM memory indexing and insight extraction services.
4
+
5
+ ## Overview
6
+
7
+ The dreaming services module provides modular, composable services for building the REM knowledge graph through:
8
+
9
+ - **User Model Updates** (`user_model_service.py`): Extract and update user profiles from activity
10
+ - **Moment Construction** (`moment_service.py`): Identify temporal narratives from resources
11
+ - **Resource Affinity** (`affinity_service.py`): Build semantic relationships between resources
12
+ - **Ontology Extraction** (`ontology_service.py`): Extract domain-specific structured knowledge from files (stub)
13
+
14
+ ## Architecture
15
+
16
+ ```
17
+ ┌─────────────────────────────────────────────────────────────┐
18
+ │ DreamingWorker │
19
+ │ (Orchestrator) │
20
+ ├─────────────────────────────────────────────────────────────┤
21
+ │ │
22
+ │ ┌───────────────┐ ┌───────────────┐ ┌───────────────┐ │
23
+ │ │ User Model │ │ Moment │ │ Resource │ │
24
+ │ │ Service │ │ Service │ │ Affinity │ │
25
+ │ └───────┬───────┘ └───────┬───────┘ └───────┬───────┘ │
26
+ │ │ │ │ │
27
+ │ └──────────────────┼──────────────────┘ │
28
+ │ │ │
29
+ │ ┌───────▼───────┐ │
30
+ │ │ PostgreSQL │ │
31
+ │ │ Repository │ │
32
+ │ │ REM Service │ │
33
+ │ └───────────────┘ │
34
+ └─────────────────────────────────────────────────────────────┘
35
+ ```
36
+
37
+ ## Services
38
+
39
+ ### User Model Service
40
+
41
+ **File**: `user_model_service.py`
42
+
43
+ **Function**: `update_user_model(user_id, db, default_model, time_window_days, max_sessions, max_moments, max_resources)`
44
+
45
+ **Purpose**: Analyzes recent sessions, moments, and resources to generate comprehensive user profile summaries using LLM analysis.
46
+
47
+ **Process**:
48
+ 1. Query PostgreSQL for recent sessions, moments, resources for this user
49
+ 2. Load UserProfileBuilder agent schema
50
+ 3. Generate user profile using LLM
51
+ 4. Update User entity with profile data and metadata
52
+ 5. Add graph edges to key resources and moments
53
+
54
+ **Returns**: Statistics about user model update (sessions analyzed, moments included, resources included, graph edges added, etc.)
55
+
56
+ ### Moment Service
57
+
58
+ **File**: `moment_service.py`
59
+
60
+ **Function**: `construct_moments(user_id, db, default_model, lookback_hours, limit)`
61
+
62
+ **Purpose**: Analyzes recent resources and sessions to identify temporal narratives (meetings, coding sessions, conversations) and creates Moment entities.
63
+
64
+ **Process**:
65
+ 1. Query PostgreSQL for recent resources and sessions for this user
66
+ 2. Load MomentBuilder agent schema from filesystem
67
+ 3. Run agent to extract moments from data
68
+ 4. Create Moment entities via Repository
69
+ 5. Link moments to source resources via graph edges
70
+ 6. Embeddings auto-generated by embedding worker
71
+
72
+ **Returns**: Statistics about moment construction (resources queried, sessions queried, moments created, graph edges added, analysis summary)
73
+
74
+ ### Affinity Service
75
+
76
+ **File**: `affinity_service.py`
77
+
78
+ **Function**: `build_affinity(user_id, db, mode, default_model, lookback_hours, limit, similarity_threshold, top_k)`
79
+
80
+ **Purpose**: Creates semantic relationships between resources using either vector similarity (fast) or LLM analysis (intelligent).
81
+
82
+ **Modes**:
83
+ - **SEMANTIC**: Fast vector similarity search via REM SEARCH query (cheap, fast)
84
+ - **LLM**: Intelligent LLM-based relationship assessment (expensive, slow)
85
+
86
+ **Process**:
87
+ 1. Query PostgreSQL for recent resources for this user
88
+ 2. For each resource:
89
+ - Semantic: Query similar resources by vector using REM SEARCH
90
+ - LLM: Assess relationships using ResourceAffinityAssessor agent
91
+ 3. Create graph edges with deduplication (keep highest weight)
92
+ 4. Update resource entities with affinity edges
93
+
94
+ **Returns**: Statistics about affinity construction (resources processed, edges created, LLM calls made)
95
+
96
+ ### Ontology Service
97
+
98
+ **File**: `ontology_service.py`
99
+
100
+ **Function**: `extract_ontologies(user_id, lookback_hours, limit)`
101
+
102
+ **Purpose**: Extract domain-specific knowledge from files using custom agents (stub - not yet implemented).
103
+
104
+ **Returns**: Statistics about ontology extraction (files queried, configs matched, ontologies created, embeddings generated, agent calls made)
105
+
106
+ ## Utilities
107
+
108
+ **File**: `utils.py`
109
+
110
+ **Function**: `merge_graph_edges(existing_edges, new_edges)`
111
+
112
+ **Purpose**: Merge graph edges with deduplication. Keeps highest weight edge for each (dst, rel_type) pair.
113
+
114
+ ## Usage
115
+
116
+ ### Standalone Service Usage
117
+
118
+ ```python
119
+ from rem.services.dreaming import (
120
+ update_user_model,
121
+ construct_moments,
122
+ build_affinity,
123
+ extract_ontologies,
124
+ AffinityMode,
125
+ )
126
+ from rem.services.postgres import get_postgres_service
127
+
128
+ # Initialize database connection
129
+ db = get_postgres_service()
130
+ await db.connect()
131
+
132
+ try:
133
+ # Update user model from recent activity
134
+ result = await update_user_model(
135
+ user_id="user-123",
136
+ db=db,
137
+ default_model="gpt-4o",
138
+ time_window_days=30,
139
+ )
140
+ print(f"User model updated: {result}")
141
+
142
+ # Extract moments from resources
143
+ result = await construct_moments(
144
+ user_id="user-123",
145
+ db=db,
146
+ default_model="gpt-4o",
147
+ lookback_hours=24,
148
+ )
149
+ print(f"Moments created: {result['moments_created']}")
150
+
151
+ # Build resource affinity (semantic mode)
152
+ result = await build_affinity(
153
+ user_id="user-123",
154
+ db=db,
155
+ mode=AffinityMode.SEMANTIC,
156
+ default_model="gpt-4o",
157
+ lookback_hours=168,
158
+ )
159
+ print(f"Edges created: {result['edges_created']}")
160
+
161
+ finally:
162
+ await db.disconnect()
163
+ ```
164
+
165
+ ### Orchestrated Usage via DreamingWorker
166
+
167
+ ```python
168
+ from rem.workers.dreaming import DreamingWorker
169
+
170
+ worker = DreamingWorker(
171
+ rem_api_url="http://rem-api:8000",
172
+ default_model="gpt-4o",
173
+ lookback_hours=24,
174
+ )
175
+
176
+ try:
177
+ # Run complete dreaming workflow
178
+ results = await worker.process_full(
179
+ user_id="user-123",
180
+ use_llm_affinity=False,
181
+ lookback_hours=24,
182
+ )
183
+ print(results)
184
+ finally:
185
+ await worker.close()
186
+ ```
187
+
188
+ ## Design Principles
189
+
190
+ 1. **Modularity**: Each service is independent and can be used standalone
191
+ 2. **Composability**: Services can be composed together in custom workflows
192
+ 3. **DRY**: Shared utilities extracted to utils.py
193
+ 4. **Delegation**: DreamingWorker delegates to services, acting as thin orchestrator
194
+ 5. **Database Connection Management**: Caller manages database connection lifecycle
195
+ 6. **Error Handling**: Services return statistics dicts with status field
196
+ 7. **User-ID First**: All operations scoped by user_id (primary identifier)
197
+
198
+ ## File Structure
199
+
200
+ ```
201
+ rem/src/rem/services/dreaming/
202
+ ├── __init__.py # Public API facade
203
+ ├── README.md # This file
204
+ ├── user_model_service.py # User profile updates (260 lines)
205
+ ├── moment_service.py # Temporal narrative extraction (260 lines)
206
+ ├── affinity_service.py # Resource relationship building (320 lines)
207
+ ├── ontology_service.py # Domain knowledge extraction (45 lines, stub)
208
+ └── utils.py # Shared utilities (graph edge merging)
209
+ ```
210
+
211
+ ## Refactoring Benefits
212
+
213
+ **Before**: Single 1,297-line monolithic `workers/dreaming.py` file
214
+
215
+ **After**:
216
+ - 5 focused service modules (~900 lines total)
217
+ - 1 thin orchestrator (~400 lines)
218
+ - Improved testability (each service can be tested independently)
219
+ - Better separation of concerns
220
+ - Easier to extend (add new services without modifying orchestrator)
221
+ - Reusable services (can be used in other workflows)
222
+
223
+ ## Future Enhancements
224
+
225
+ 1. **Implement Ontology Service**: Complete the stub implementation
226
+ 2. **Add Service-Level Caching**: Cache agent schemas and LLM responses
227
+ 3. **Batch Operations**: Optimize database operations with batching
228
+ 4. **Parallelization**: Run independent services concurrently
229
+ 5. **Metrics and Tracing**: Add OpenTelemetry instrumentation
230
+ 6. **Service Configuration**: Extract hardcoded values to configuration
@@ -0,0 +1,53 @@
1
+ """
2
+ Dreaming Services - REM memory indexing and insight extraction.
3
+
4
+ This module provides services for building the REM knowledge graph through:
5
+ - User model updates: Extract and update user profiles from activity
6
+ - Moment construction: Identify temporal narratives from resources
7
+ - Resource affinity: Build semantic relationships between resources
8
+ - Ontology extraction: Extract domain-specific structured knowledge from files
9
+
10
+ Each service is designed to be used independently or composed together
11
+ in the DreamingWorker orchestrator for complete memory indexing workflows.
12
+
13
+ Usage:
14
+ from rem.services.dreaming import (
15
+ update_user_model,
16
+ construct_moments,
17
+ build_affinity,
18
+ extract_ontologies,
19
+ AffinityMode,
20
+ )
21
+
22
+ # Update user model from recent activity
23
+ result = await update_user_model(user_id="user-123", db=db)
24
+
25
+ # Extract moments from resources
26
+ result = await construct_moments(user_id="user-123", db=db, lookback_hours=24)
27
+
28
+ # Build resource affinity (semantic mode)
29
+ result = await build_affinity(
30
+ user_id="user-123",
31
+ db=db,
32
+ mode=AffinityMode.SEMANTIC,
33
+ lookback_hours=168,
34
+ )
35
+
36
+ # Extract ontologies (stub - not yet implemented)
37
+ result = await extract_ontologies(user_id="user-123", lookback_hours=24)
38
+ """
39
+
40
+ from .affinity_service import AffinityMode, build_affinity
41
+ from .moment_service import construct_moments
42
+ from .ontology_service import extract_ontologies
43
+ from .user_model_service import update_user_model
44
+ from .utils import merge_graph_edges
45
+
46
+ __all__ = [
47
+ "update_user_model",
48
+ "construct_moments",
49
+ "build_affinity",
50
+ "extract_ontologies",
51
+ "AffinityMode",
52
+ "merge_graph_edges",
53
+ ]
@@ -0,0 +1,336 @@
1
+ """
2
+ Affinity Service - Builds resource relationship graph.
3
+
4
+ Creates semantic relationships between resources using either
5
+ vector similarity (fast) or LLM analysis (intelligent).
6
+ """
7
+
8
+ import json
9
+ from datetime import datetime, timedelta
10
+ from enum import Enum
11
+ from pathlib import Path
12
+ from typing import Any, Optional
13
+
14
+ import yaml
15
+ from loguru import logger
16
+
17
+ from ...agentic.providers.pydantic_ai import create_agent
18
+ from ...agentic.serialization import serialize_agent_result
19
+ from ...models.core import QueryType, RemQuery, SearchParameters
20
+ from ...models.entities.resource import Resource
21
+ from ...services.postgres.repository import Repository
22
+ from ...services.postgres.service import PostgresService
23
+ from ...services.rem.service import RemService
24
+ from .utils import merge_graph_edges
25
+
26
+
27
+ class AffinityMode(str, Enum):
28
+ """Resource affinity modes."""
29
+
30
+ SEMANTIC = "semantic" # Fast vector similarity
31
+ LLM = "llm" # Intelligent LLM-based assessment
32
+
33
+
34
+ async def build_affinity(
35
+ user_id: str,
36
+ db: PostgresService,
37
+ mode: AffinityMode = AffinityMode.SEMANTIC,
38
+ default_model: str = "gpt-4o",
39
+ lookback_hours: int = 24,
40
+ limit: Optional[int] = None,
41
+ similarity_threshold: float = 0.7,
42
+ top_k: int = 3,
43
+ ) -> dict[str, Any]:
44
+ """
45
+ Build resource affinity graph.
46
+
47
+ Creates semantic relationships between resources using either
48
+ vector similarity (fast) or LLM analysis (intelligent).
49
+
50
+ Semantic Mode:
51
+ - Use vector similarity search via REM SEARCH query
52
+ - Create edges for similar resources (threshold: 0.7)
53
+ - Fast and cheap (no LLM calls)
54
+
55
+ LLM Mode:
56
+ - Use LLM to assess relationship context
57
+ - Create edges with rich metadata
58
+ - Slow and expensive (many LLM calls)
59
+ - ALWAYS use --limit to control costs
60
+
61
+ Process:
62
+ 1. Query PostgreSQL for recent resources for this user
63
+ 2. For each resource:
64
+ - Semantic: Query similar resources by vector using REM SEARCH
65
+ - LLM: Assess relationships using ResourceAffinityAssessor agent
66
+ 3. Create graph edges with deduplication (keep highest weight)
67
+ 4. Update resource entities with affinity edges
68
+
69
+ Args:
70
+ user_id: User to process
71
+ db: Database service (already connected)
72
+ mode: Affinity mode (semantic or llm)
73
+ default_model: LLM model for analysis (default: gpt-4o)
74
+ lookback_hours: Hours to look back (default: 24)
75
+ limit: Max resources to process (REQUIRED for LLM mode)
76
+ similarity_threshold: Minimum similarity score for semantic mode (default: 0.7)
77
+ top_k: Number of similar resources to find per resource (default: 3)
78
+
79
+ Returns:
80
+ Statistics about affinity construction
81
+ """
82
+ cutoff = datetime.utcnow() - timedelta(hours=lookback_hours)
83
+
84
+ # Create repositories and REM service
85
+ resource_repo = Repository(Resource, "resources", db=db)
86
+ rem_service = RemService(postgres_service=db)
87
+
88
+ # Register Resource model for REM queries
89
+ rem_service.register_model("resources", Resource)
90
+
91
+ # Query recent resources
92
+ resources = await resource_repo.find(
93
+ filters={
94
+ "user_id": user_id,
95
+ },
96
+ order_by="created_at DESC",
97
+ limit=limit,
98
+ )
99
+
100
+ # Filter by timestamp
101
+ resources = [
102
+ r for r in resources if r.created_at and r.created_at >= cutoff
103
+ ]
104
+
105
+ if not resources:
106
+ return {
107
+ "user_id": user_id,
108
+ "mode": mode.value,
109
+ "lookback_hours": lookback_hours,
110
+ "resources_processed": 0,
111
+ "edges_created": 0,
112
+ "llm_calls_made": 0 if mode == AffinityMode.LLM else None,
113
+ "status": "no_data",
114
+ }
115
+
116
+ logger.info(
117
+ f"Building affinity for {len(resources)} resources in {mode.value} mode"
118
+ )
119
+
120
+ # Statistics tracking
121
+ resources_processed = 0
122
+ total_edges_created = 0
123
+ llm_calls_made = 0
124
+
125
+ # Load LLM agent for relationship assessment if needed
126
+ affinity_agent = None
127
+ if mode == AffinityMode.LLM:
128
+ schema_path = (
129
+ Path(__file__).parent.parent.parent
130
+ / "schemas"
131
+ / "agents"
132
+ / "resource-affinity-assessor.yaml"
133
+ )
134
+
135
+ if not schema_path.exists():
136
+ raise FileNotFoundError(
137
+ f"ResourceAffinityAssessor schema not found: {schema_path}"
138
+ )
139
+
140
+ with open(schema_path) as f:
141
+ agent_schema = yaml.safe_load(f)
142
+
143
+ affinity_agent_runtime = await create_agent(
144
+ agent_schema_override=agent_schema,
145
+ model_override=default_model, # type: ignore[arg-type]
146
+ )
147
+ affinity_agent = affinity_agent_runtime.agent
148
+
149
+ # Process each resource
150
+ for resource in resources:
151
+ if not resource.content:
152
+ logger.debug(f"Skipping resource {resource.id} - no content for embedding")
153
+ continue
154
+
155
+ # Find similar resources
156
+ similar_resources = []
157
+
158
+ if mode == AffinityMode.SEMANTIC:
159
+ # Use REM SEARCH for vector similarity
160
+ try:
161
+ search_query = RemQuery(
162
+ query_type=QueryType.SEARCH,
163
+ user_id=user_id,
164
+ parameters=SearchParameters(
165
+ table_name="resources",
166
+ query_text=resource.content[:1000], # Use first 1000 chars
167
+ limit=top_k + 1, # +1 to exclude self
168
+ min_similarity=similarity_threshold,
169
+ ),
170
+ )
171
+
172
+ search_result = await rem_service.execute_query(search_query)
173
+ candidates = search_result.get("results", [])
174
+
175
+ # Filter out self and collect similar resources
176
+ # Note: SEARCH query returns {entity_type, similarity_score, data (JSONB)}
177
+ for candidate in candidates:
178
+ candidate_data = candidate.get("data", {})
179
+ candidate_id = candidate_data.get("id")
180
+
181
+ if candidate_id and candidate_id != str(resource.id):
182
+ similar_resources.append(
183
+ {
184
+ "resource": next(
185
+ (r for r in resources if str(r.id) == candidate_id),
186
+ None,
187
+ ),
188
+ "similarity_score": candidate.get("similarity_score", 0.0),
189
+ "relationship_type": "semantic_similar",
190
+ "relationship_strength": "moderate",
191
+ "edge_labels": [],
192
+ }
193
+ )
194
+
195
+ except Exception as e:
196
+ logger.warning(
197
+ f"Vector search failed for resource {resource.id}: {e}"
198
+ )
199
+ continue
200
+
201
+ elif mode == AffinityMode.LLM:
202
+ # Use LLM to assess relationships with all other resources
203
+ assert affinity_agent is not None, "Agent must be initialized in LLM mode"
204
+ for other_resource in resources:
205
+ if other_resource.id == resource.id:
206
+ continue
207
+
208
+ # Prepare input for agent
209
+ input_data = {
210
+ "resource_a": {
211
+ "id": str(resource.id),
212
+ "name": resource.name,
213
+ "category": resource.category,
214
+ "content": resource.content[:2000], # Limit for token efficiency
215
+ "created_at": (
216
+ resource.created_at.isoformat()
217
+ if resource.created_at
218
+ else None
219
+ ),
220
+ },
221
+ "resource_b": {
222
+ "id": str(other_resource.id),
223
+ "name": other_resource.name,
224
+ "category": other_resource.category,
225
+ "content": other_resource.content[:2000],
226
+ "created_at": (
227
+ other_resource.created_at.isoformat()
228
+ if other_resource.created_at
229
+ else None
230
+ ),
231
+ },
232
+ }
233
+
234
+ # Run agent
235
+ result = await affinity_agent.run(json.dumps(input_data, indent=2))
236
+ llm_calls_made += 1
237
+
238
+ # Serialize result
239
+ assessment = serialize_agent_result(result.output)
240
+
241
+ # Type guard: ensure we have a dict
242
+ if not isinstance(assessment, dict):
243
+ logger.warning(f"Expected dict from affinity agent, got {type(assessment)}")
244
+ continue
245
+
246
+ # If relationship exists, add to similar resources
247
+ if assessment.get("relationship_exists"):
248
+ # Map strength to weight
249
+ strength_to_weight = {
250
+ "strong": 0.9,
251
+ "moderate": 0.7,
252
+ "weak": 0.4,
253
+ }
254
+ weight = strength_to_weight.get(
255
+ assessment.get("relationship_strength", "moderate"), 0.7
256
+ )
257
+
258
+ similar_resources.append(
259
+ {
260
+ "resource": other_resource,
261
+ "similarity_score": weight,
262
+ "relationship_type": assessment.get(
263
+ "relationship_type", "related"
264
+ ),
265
+ "relationship_strength": assessment.get(
266
+ "relationship_strength", "moderate"
267
+ ),
268
+ "edge_labels": assessment.get("edge_labels", []),
269
+ "reasoning": assessment.get("reasoning", ""),
270
+ }
271
+ )
272
+
273
+ # Limit LLM comparisons to top_k
274
+ if len(similar_resources) >= top_k:
275
+ break
276
+
277
+ # Create graph edges for similar resources
278
+ new_edges = []
279
+ for similar in similar_resources[:top_k]:
280
+ if not similar["resource"]:
281
+ continue
282
+
283
+ # Map similarity score to weight
284
+ if mode == AffinityMode.SEMANTIC:
285
+ # Semantic mode: map similarity score directly
286
+ weight = min(similar["similarity_score"], 1.0)
287
+ else:
288
+ # LLM mode: use assessed weight
289
+ weight = similar["similarity_score"]
290
+
291
+ # Create InlineEdge
292
+ edge = {
293
+ "dst": str(similar["resource"].id), # Convert UUID to string
294
+ "rel_type": similar["relationship_type"],
295
+ "weight": weight,
296
+ "properties": {
297
+ "entity_type": "resource",
298
+ "dst_name": similar["resource"].name,
299
+ "dst_category": similar["resource"].category,
300
+ "match_type": mode.value,
301
+ "similarity_score": similar["similarity_score"],
302
+ "relationship_strength": similar.get("relationship_strength"),
303
+ "edge_labels": similar.get("edge_labels", []),
304
+ "reasoning": similar.get("reasoning", ""),
305
+ },
306
+ "created_at": datetime.utcnow().isoformat(),
307
+ }
308
+ new_edges.append(edge)
309
+
310
+ # Merge with existing edges (deduplication: keep highest weight)
311
+ existing_edges = resource.graph_edges or []
312
+ merged_edges = merge_graph_edges(existing_edges, new_edges)
313
+
314
+ # Update resource with merged edges
315
+ resource.graph_edges = merged_edges
316
+ await resource_repo.upsert(resource)
317
+
318
+ resources_processed += 1
319
+ edges_added = len(new_edges)
320
+ total_edges_created += edges_added
321
+
322
+ logger.debug(
323
+ f"Processed resource {resource.id} ({resource.name}): "
324
+ f"found {len(similar_resources)} similar resources, "
325
+ f"added {edges_added} edges"
326
+ )
327
+
328
+ return {
329
+ "user_id": user_id,
330
+ "mode": mode.value,
331
+ "lookback_hours": lookback_hours,
332
+ "resources_processed": resources_processed,
333
+ "edges_created": total_edges_created,
334
+ "llm_calls_made": llm_calls_made if mode == AffinityMode.LLM else None,
335
+ "status": "success",
336
+ }