remdb 0.3.242__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (235) hide show
  1. rem/__init__.py +129 -0
  2. rem/agentic/README.md +760 -0
  3. rem/agentic/__init__.py +54 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +38 -0
  6. rem/agentic/agents/agent_manager.py +311 -0
  7. rem/agentic/agents/sse_simulator.py +502 -0
  8. rem/agentic/context.py +425 -0
  9. rem/agentic/context_builder.py +360 -0
  10. rem/agentic/llm_provider_models.py +301 -0
  11. rem/agentic/mcp/__init__.py +0 -0
  12. rem/agentic/mcp/tool_wrapper.py +273 -0
  13. rem/agentic/otel/__init__.py +5 -0
  14. rem/agentic/otel/setup.py +240 -0
  15. rem/agentic/providers/phoenix.py +926 -0
  16. rem/agentic/providers/pydantic_ai.py +854 -0
  17. rem/agentic/query.py +117 -0
  18. rem/agentic/query_helper.py +89 -0
  19. rem/agentic/schema.py +737 -0
  20. rem/agentic/serialization.py +245 -0
  21. rem/agentic/tools/__init__.py +5 -0
  22. rem/agentic/tools/rem_tools.py +242 -0
  23. rem/api/README.md +657 -0
  24. rem/api/deps.py +253 -0
  25. rem/api/main.py +460 -0
  26. rem/api/mcp_router/prompts.py +182 -0
  27. rem/api/mcp_router/resources.py +820 -0
  28. rem/api/mcp_router/server.py +243 -0
  29. rem/api/mcp_router/tools.py +1605 -0
  30. rem/api/middleware/tracking.py +172 -0
  31. rem/api/routers/admin.py +520 -0
  32. rem/api/routers/auth.py +898 -0
  33. rem/api/routers/chat/__init__.py +5 -0
  34. rem/api/routers/chat/child_streaming.py +394 -0
  35. rem/api/routers/chat/completions.py +702 -0
  36. rem/api/routers/chat/json_utils.py +76 -0
  37. rem/api/routers/chat/models.py +202 -0
  38. rem/api/routers/chat/otel_utils.py +33 -0
  39. rem/api/routers/chat/sse_events.py +546 -0
  40. rem/api/routers/chat/streaming.py +950 -0
  41. rem/api/routers/chat/streaming_utils.py +327 -0
  42. rem/api/routers/common.py +18 -0
  43. rem/api/routers/dev.py +87 -0
  44. rem/api/routers/feedback.py +276 -0
  45. rem/api/routers/messages.py +620 -0
  46. rem/api/routers/models.py +86 -0
  47. rem/api/routers/query.py +362 -0
  48. rem/api/routers/shared_sessions.py +422 -0
  49. rem/auth/README.md +258 -0
  50. rem/auth/__init__.py +36 -0
  51. rem/auth/jwt.py +367 -0
  52. rem/auth/middleware.py +318 -0
  53. rem/auth/providers/__init__.py +16 -0
  54. rem/auth/providers/base.py +376 -0
  55. rem/auth/providers/email.py +215 -0
  56. rem/auth/providers/google.py +163 -0
  57. rem/auth/providers/microsoft.py +237 -0
  58. rem/cli/README.md +517 -0
  59. rem/cli/__init__.py +8 -0
  60. rem/cli/commands/README.md +299 -0
  61. rem/cli/commands/__init__.py +3 -0
  62. rem/cli/commands/ask.py +549 -0
  63. rem/cli/commands/cluster.py +1808 -0
  64. rem/cli/commands/configure.py +495 -0
  65. rem/cli/commands/db.py +828 -0
  66. rem/cli/commands/dreaming.py +324 -0
  67. rem/cli/commands/experiments.py +1698 -0
  68. rem/cli/commands/mcp.py +66 -0
  69. rem/cli/commands/process.py +388 -0
  70. rem/cli/commands/query.py +109 -0
  71. rem/cli/commands/scaffold.py +47 -0
  72. rem/cli/commands/schema.py +230 -0
  73. rem/cli/commands/serve.py +106 -0
  74. rem/cli/commands/session.py +453 -0
  75. rem/cli/dreaming.py +363 -0
  76. rem/cli/main.py +123 -0
  77. rem/config.py +244 -0
  78. rem/mcp_server.py +41 -0
  79. rem/models/core/__init__.py +49 -0
  80. rem/models/core/core_model.py +70 -0
  81. rem/models/core/engram.py +333 -0
  82. rem/models/core/experiment.py +672 -0
  83. rem/models/core/inline_edge.py +132 -0
  84. rem/models/core/rem_query.py +246 -0
  85. rem/models/entities/__init__.py +68 -0
  86. rem/models/entities/domain_resource.py +38 -0
  87. rem/models/entities/feedback.py +123 -0
  88. rem/models/entities/file.py +57 -0
  89. rem/models/entities/image_resource.py +88 -0
  90. rem/models/entities/message.py +64 -0
  91. rem/models/entities/moment.py +123 -0
  92. rem/models/entities/ontology.py +181 -0
  93. rem/models/entities/ontology_config.py +131 -0
  94. rem/models/entities/resource.py +95 -0
  95. rem/models/entities/schema.py +87 -0
  96. rem/models/entities/session.py +84 -0
  97. rem/models/entities/shared_session.py +180 -0
  98. rem/models/entities/subscriber.py +175 -0
  99. rem/models/entities/user.py +93 -0
  100. rem/py.typed +0 -0
  101. rem/registry.py +373 -0
  102. rem/schemas/README.md +507 -0
  103. rem/schemas/__init__.py +6 -0
  104. rem/schemas/agents/README.md +92 -0
  105. rem/schemas/agents/core/agent-builder.yaml +235 -0
  106. rem/schemas/agents/core/moment-builder.yaml +178 -0
  107. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  108. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  109. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  110. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  111. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  112. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  113. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  114. rem/schemas/agents/examples/hello-world.yaml +37 -0
  115. rem/schemas/agents/examples/query.yaml +54 -0
  116. rem/schemas/agents/examples/simple.yaml +21 -0
  117. rem/schemas/agents/examples/test.yaml +29 -0
  118. rem/schemas/agents/rem.yaml +132 -0
  119. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  120. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  121. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  122. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  123. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  124. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  125. rem/services/__init__.py +18 -0
  126. rem/services/audio/INTEGRATION.md +308 -0
  127. rem/services/audio/README.md +376 -0
  128. rem/services/audio/__init__.py +15 -0
  129. rem/services/audio/chunker.py +354 -0
  130. rem/services/audio/transcriber.py +259 -0
  131. rem/services/content/README.md +1269 -0
  132. rem/services/content/__init__.py +5 -0
  133. rem/services/content/providers.py +760 -0
  134. rem/services/content/service.py +762 -0
  135. rem/services/dreaming/README.md +230 -0
  136. rem/services/dreaming/__init__.py +53 -0
  137. rem/services/dreaming/affinity_service.py +322 -0
  138. rem/services/dreaming/moment_service.py +251 -0
  139. rem/services/dreaming/ontology_service.py +54 -0
  140. rem/services/dreaming/user_model_service.py +297 -0
  141. rem/services/dreaming/utils.py +39 -0
  142. rem/services/email/__init__.py +10 -0
  143. rem/services/email/service.py +522 -0
  144. rem/services/email/templates.py +360 -0
  145. rem/services/embeddings/__init__.py +11 -0
  146. rem/services/embeddings/api.py +127 -0
  147. rem/services/embeddings/worker.py +435 -0
  148. rem/services/fs/README.md +662 -0
  149. rem/services/fs/__init__.py +62 -0
  150. rem/services/fs/examples.py +206 -0
  151. rem/services/fs/examples_paths.py +204 -0
  152. rem/services/fs/git_provider.py +935 -0
  153. rem/services/fs/local_provider.py +760 -0
  154. rem/services/fs/parsing-hooks-examples.md +172 -0
  155. rem/services/fs/paths.py +276 -0
  156. rem/services/fs/provider.py +460 -0
  157. rem/services/fs/s3_provider.py +1042 -0
  158. rem/services/fs/service.py +186 -0
  159. rem/services/git/README.md +1075 -0
  160. rem/services/git/__init__.py +17 -0
  161. rem/services/git/service.py +469 -0
  162. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  163. rem/services/phoenix/README.md +453 -0
  164. rem/services/phoenix/__init__.py +46 -0
  165. rem/services/phoenix/client.py +960 -0
  166. rem/services/phoenix/config.py +88 -0
  167. rem/services/phoenix/prompt_labels.py +477 -0
  168. rem/services/postgres/README.md +757 -0
  169. rem/services/postgres/__init__.py +49 -0
  170. rem/services/postgres/diff_service.py +599 -0
  171. rem/services/postgres/migration_service.py +427 -0
  172. rem/services/postgres/programmable_diff_service.py +635 -0
  173. rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
  174. rem/services/postgres/register_type.py +353 -0
  175. rem/services/postgres/repository.py +481 -0
  176. rem/services/postgres/schema_generator.py +661 -0
  177. rem/services/postgres/service.py +802 -0
  178. rem/services/postgres/sql_builder.py +355 -0
  179. rem/services/rate_limit.py +113 -0
  180. rem/services/rem/README.md +318 -0
  181. rem/services/rem/__init__.py +23 -0
  182. rem/services/rem/exceptions.py +71 -0
  183. rem/services/rem/executor.py +293 -0
  184. rem/services/rem/parser.py +180 -0
  185. rem/services/rem/queries.py +196 -0
  186. rem/services/rem/query.py +371 -0
  187. rem/services/rem/service.py +608 -0
  188. rem/services/session/README.md +374 -0
  189. rem/services/session/__init__.py +13 -0
  190. rem/services/session/compression.py +488 -0
  191. rem/services/session/pydantic_messages.py +310 -0
  192. rem/services/session/reload.py +85 -0
  193. rem/services/user_service.py +130 -0
  194. rem/settings.py +1877 -0
  195. rem/sql/background_indexes.sql +52 -0
  196. rem/sql/migrations/001_install.sql +983 -0
  197. rem/sql/migrations/002_install_models.sql +3157 -0
  198. rem/sql/migrations/003_optional_extensions.sql +326 -0
  199. rem/sql/migrations/004_cache_system.sql +282 -0
  200. rem/sql/migrations/005_schema_update.sql +145 -0
  201. rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
  202. rem/utils/AGENTIC_CHUNKING.md +597 -0
  203. rem/utils/README.md +628 -0
  204. rem/utils/__init__.py +61 -0
  205. rem/utils/agentic_chunking.py +622 -0
  206. rem/utils/batch_ops.py +343 -0
  207. rem/utils/chunking.py +108 -0
  208. rem/utils/clip_embeddings.py +276 -0
  209. rem/utils/constants.py +97 -0
  210. rem/utils/date_utils.py +228 -0
  211. rem/utils/dict_utils.py +98 -0
  212. rem/utils/embeddings.py +436 -0
  213. rem/utils/examples/embeddings_example.py +305 -0
  214. rem/utils/examples/sql_types_example.py +202 -0
  215. rem/utils/files.py +323 -0
  216. rem/utils/markdown.py +16 -0
  217. rem/utils/mime_types.py +158 -0
  218. rem/utils/model_helpers.py +492 -0
  219. rem/utils/schema_loader.py +649 -0
  220. rem/utils/sql_paths.py +146 -0
  221. rem/utils/sql_types.py +350 -0
  222. rem/utils/user_id.py +81 -0
  223. rem/utils/vision.py +325 -0
  224. rem/workers/README.md +506 -0
  225. rem/workers/__init__.py +7 -0
  226. rem/workers/db_listener.py +579 -0
  227. rem/workers/db_maintainer.py +74 -0
  228. rem/workers/dreaming.py +502 -0
  229. rem/workers/engram_processor.py +312 -0
  230. rem/workers/sqs_file_processor.py +193 -0
  231. rem/workers/unlogged_maintainer.py +463 -0
  232. remdb-0.3.242.dist-info/METADATA +1632 -0
  233. remdb-0.3.242.dist-info/RECORD +235 -0
  234. remdb-0.3.242.dist-info/WHEEL +4 -0
  235. remdb-0.3.242.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,230 @@
1
+ # Dreaming Services
2
+
3
+ REM memory indexing and insight extraction services.
4
+
5
+ ## Overview
6
+
7
+ The dreaming services module provides modular, composable services for building the REM knowledge graph through:
8
+
9
+ - **User Model Updates** (`user_model_service.py`): Extract and update user profiles from activity
10
+ - **Moment Construction** (`moment_service.py`): Identify temporal narratives from resources
11
+ - **Resource Affinity** (`affinity_service.py`): Build semantic relationships between resources
12
+ - **Ontology Extraction** (`ontology_service.py`): Extract domain-specific structured knowledge from files (stub)
13
+
14
+ ## Architecture
15
+
16
+ ```
17
+ ┌─────────────────────────────────────────────────────────────┐
18
+ │ DreamingWorker │
19
+ │ (Orchestrator) │
20
+ ├─────────────────────────────────────────────────────────────┤
21
+ │ │
22
+ │ ┌───────────────┐ ┌───────────────┐ ┌───────────────┐ │
23
+ │ │ User Model │ │ Moment │ │ Resource │ │
24
+ │ │ Service │ │ Service │ │ Affinity │ │
25
+ │ └───────┬───────┘ └───────┬───────┘ └───────┬───────┘ │
26
+ │ │ │ │ │
27
+ │ └──────────────────┼──────────────────┘ │
28
+ │ │ │
29
+ │ ┌───────▼───────┐ │
30
+ │ │ PostgreSQL │ │
31
+ │ │ Repository │ │
32
+ │ │ REM Service │ │
33
+ │ └───────────────┘ │
34
+ └─────────────────────────────────────────────────────────────┘
35
+ ```
36
+
37
+ ## Services
38
+
39
+ ### User Model Service
40
+
41
+ **File**: `user_model_service.py`
42
+
43
+ **Function**: `update_user_model(user_id, db, default_model, time_window_days, max_sessions, max_moments, max_resources)`
44
+
45
+ **Purpose**: Analyzes recent sessions, moments, and resources to generate comprehensive user profile summaries using LLM analysis.
46
+
47
+ **Process**:
48
+ 1. Query PostgreSQL for recent sessions, moments, resources for this user
49
+ 2. Load UserProfileBuilder agent schema
50
+ 3. Generate user profile using LLM
51
+ 4. Update User entity with profile data and metadata
52
+ 5. Add graph edges to key resources and moments
53
+
54
+ **Returns**: Statistics about user model update (sessions analyzed, moments included, resources included, graph edges added, etc.)
55
+
56
+ ### Moment Service
57
+
58
+ **File**: `moment_service.py`
59
+
60
+ **Function**: `construct_moments(user_id, db, default_model, lookback_hours, limit)`
61
+
62
+ **Purpose**: Analyzes recent resources and sessions to identify temporal narratives (meetings, coding sessions, conversations) and creates Moment entities.
63
+
64
+ **Process**:
65
+ 1. Query PostgreSQL for recent resources and sessions for this user
66
+ 2. Load MomentBuilder agent schema from filesystem
67
+ 3. Run agent to extract moments from data
68
+ 4. Create Moment entities via Repository
69
+ 5. Link moments to source resources via graph edges
70
+ 6. Embeddings auto-generated by embedding worker
71
+
72
+ **Returns**: Statistics about moment construction (resources queried, sessions queried, moments created, graph edges added, analysis summary)
73
+
74
+ ### Affinity Service
75
+
76
+ **File**: `affinity_service.py`
77
+
78
+ **Function**: `build_affinity(user_id, db, mode, default_model, lookback_hours, limit, similarity_threshold, top_k)`
79
+
80
+ **Purpose**: Creates semantic relationships between resources using either vector similarity (fast) or LLM analysis (intelligent).
81
+
82
+ **Modes**:
83
+ - **SEMANTIC**: Fast vector similarity search via REM SEARCH query (cheap, fast)
84
+ - **LLM**: Intelligent LLM-based relationship assessment (expensive, slow)
85
+
86
+ **Process**:
87
+ 1. Query PostgreSQL for recent resources for this user
88
+ 2. For each resource:
89
+ - Semantic: Query similar resources by vector using REM SEARCH
90
+ - LLM: Assess relationships using ResourceAffinityAssessor agent
91
+ 3. Create graph edges with deduplication (keep highest weight)
92
+ 4. Update resource entities with affinity edges
93
+
94
+ **Returns**: Statistics about affinity construction (resources processed, edges created, LLM calls made)
95
+
96
+ ### Ontology Service
97
+
98
+ **File**: `ontology_service.py`
99
+
100
+ **Function**: `extract_ontologies(user_id, lookback_hours, limit)`
101
+
102
+ **Purpose**: Extract domain-specific knowledge from files using custom agents (stub - not yet implemented).
103
+
104
+ **Returns**: Statistics about ontology extraction (files queried, configs matched, ontologies created, embeddings generated, agent calls made)
105
+
106
+ ## Utilities
107
+
108
+ **File**: `utils.py`
109
+
110
+ **Function**: `merge_graph_edges(existing_edges, new_edges)`
111
+
112
+ **Purpose**: Merge graph edges with deduplication. Keeps highest weight edge for each (dst, rel_type) pair.
113
+
114
+ ## Usage
115
+
116
+ ### Standalone Service Usage
117
+
118
+ ```python
119
+ from rem.services.dreaming import (
120
+ update_user_model,
121
+ construct_moments,
122
+ build_affinity,
123
+ extract_ontologies,
124
+ AffinityMode,
125
+ )
126
+ from rem.services.postgres import get_postgres_service
127
+
128
+ # Initialize database connection
129
+ db = get_postgres_service()
130
+ await db.connect()
131
+
132
+ try:
133
+ # Update user model from recent activity
134
+ result = await update_user_model(
135
+ user_id="user-123",
136
+ db=db,
137
+ default_model="gpt-4o",
138
+ time_window_days=30,
139
+ )
140
+ print(f"User model updated: {result}")
141
+
142
+ # Extract moments from resources
143
+ result = await construct_moments(
144
+ user_id="user-123",
145
+ db=db,
146
+ default_model="gpt-4o",
147
+ lookback_hours=24,
148
+ )
149
+ print(f"Moments created: {result['moments_created']}")
150
+
151
+ # Build resource affinity (semantic mode)
152
+ result = await build_affinity(
153
+ user_id="user-123",
154
+ db=db,
155
+ mode=AffinityMode.SEMANTIC,
156
+ default_model="gpt-4o",
157
+ lookback_hours=168,
158
+ )
159
+ print(f"Edges created: {result['edges_created']}")
160
+
161
+ finally:
162
+ await db.disconnect()
163
+ ```
164
+
165
+ ### Orchestrated Usage via DreamingWorker
166
+
167
+ ```python
168
+ from rem.workers.dreaming import DreamingWorker
169
+
170
+ worker = DreamingWorker(
171
+ rem_api_url="http://rem-api:8000",
172
+ default_model="gpt-4o",
173
+ lookback_hours=24,
174
+ )
175
+
176
+ try:
177
+ # Run complete dreaming workflow
178
+ results = await worker.process_full(
179
+ user_id="user-123",
180
+ use_llm_affinity=False,
181
+ lookback_hours=24,
182
+ )
183
+ print(results)
184
+ finally:
185
+ await worker.close()
186
+ ```
187
+
188
+ ## Design Principles
189
+
190
+ 1. **Modularity**: Each service is independent and can be used standalone
191
+ 2. **Composability**: Services can be composed together in custom workflows
192
+ 3. **DRY**: Shared utilities extracted to utils.py
193
+ 4. **Delegation**: DreamingWorker delegates to services, acting as thin orchestrator
194
+ 5. **Database Connection Management**: Caller manages database connection lifecycle
195
+ 6. **Error Handling**: Services return statistics dicts with status field
196
+ 7. **User-ID First**: All operations scoped by user_id (primary identifier)
197
+
198
+ ## File Structure
199
+
200
+ ```
201
+ rem/src/rem/services/dreaming/
202
+ ├── __init__.py # Public API facade
203
+ ├── README.md # This file
204
+ ├── user_model_service.py # User profile updates (260 lines)
205
+ ├── moment_service.py # Temporal narrative extraction (260 lines)
206
+ ├── affinity_service.py # Resource relationship building (320 lines)
207
+ ├── ontology_service.py # Domain knowledge extraction (45 lines, stub)
208
+ └── utils.py # Shared utilities (graph edge merging)
209
+ ```
210
+
211
+ ## Refactoring Benefits
212
+
213
+ **Before**: Single 1,297-line monolithic `workers/dreaming.py` file
214
+
215
+ **After**:
216
+ - 5 focused service modules (~900 lines total)
217
+ - 1 thin orchestrator (~400 lines)
218
+ - Improved testability (each service can be tested independently)
219
+ - Better separation of concerns
220
+ - Easier to extend (add new services without modifying orchestrator)
221
+ - Reusable services (can be used in other workflows)
222
+
223
+ ## Future Enhancements
224
+
225
+ 1. **Implement Ontology Service**: Complete the stub implementation
226
+ 2. **Add Service-Level Caching**: Cache agent schemas and LLM responses
227
+ 3. **Batch Operations**: Optimize database operations with batching
228
+ 4. **Parallelization**: Run independent services concurrently
229
+ 5. **Metrics and Tracing**: Add OpenTelemetry instrumentation
230
+ 6. **Service Configuration**: Extract hardcoded values to configuration
@@ -0,0 +1,53 @@
1
+ """
2
+ Dreaming Services - REM memory indexing and insight extraction.
3
+
4
+ This module provides services for building the REM knowledge graph through:
5
+ - User model updates: Extract and update user profiles from activity
6
+ - Moment construction: Identify temporal narratives from resources
7
+ - Resource affinity: Build semantic relationships between resources
8
+ - Ontology extraction: Extract domain-specific structured knowledge from files
9
+
10
+ Each service is designed to be used independently or composed together
11
+ in the DreamingWorker orchestrator for complete memory indexing workflows.
12
+
13
+ Usage:
14
+ from rem.services.dreaming import (
15
+ update_user_model,
16
+ construct_moments,
17
+ build_affinity,
18
+ extract_ontologies,
19
+ AffinityMode,
20
+ )
21
+
22
+ # Update user model from recent activity
23
+ result = await update_user_model(user_id="user-123", db=db)
24
+
25
+ # Extract moments from resources
26
+ result = await construct_moments(user_id="user-123", db=db, lookback_hours=24)
27
+
28
+ # Build resource affinity (semantic mode)
29
+ result = await build_affinity(
30
+ user_id="user-123",
31
+ db=db,
32
+ mode=AffinityMode.SEMANTIC,
33
+ lookback_hours=168,
34
+ )
35
+
36
+ # Extract ontologies (stub - not yet implemented)
37
+ result = await extract_ontologies(user_id="user-123", lookback_hours=24)
38
+ """
39
+
40
+ from .affinity_service import AffinityMode, build_affinity
41
+ from .moment_service import construct_moments
42
+ from .ontology_service import extract_ontologies
43
+ from .user_model_service import update_user_model
44
+ from .utils import merge_graph_edges
45
+
46
+ __all__ = [
47
+ "update_user_model",
48
+ "construct_moments",
49
+ "build_affinity",
50
+ "extract_ontologies",
51
+ "AffinityMode",
52
+ "merge_graph_edges",
53
+ ]
@@ -0,0 +1,322 @@
1
+ """
2
+ Affinity Service - Builds resource relationship graph.
3
+
4
+ Creates semantic relationships between resources using either
5
+ vector similarity (fast) or LLM analysis (intelligent).
6
+ """
7
+
8
+ import json
9
+ from datetime import datetime, timedelta
10
+ from enum import Enum
11
+ from typing import Any, Optional
12
+
13
+ from loguru import logger
14
+
15
+ from ...utils.schema_loader import load_agent_schema
16
+ from ...agentic.providers.pydantic_ai import create_agent
17
+ from ...agentic.serialization import serialize_agent_result
18
+ from ...models.core import QueryType, RemQuery, SearchParameters
19
+ from ...models.entities.resource import Resource
20
+ from ...services.postgres.repository import Repository
21
+ from ...services.postgres.service import PostgresService
22
+ from ...services.rem.service import RemService
23
+ from .utils import merge_graph_edges
24
+
25
+
26
+ class AffinityMode(str, Enum):
27
+ """Resource affinity modes."""
28
+
29
+ SEMANTIC = "semantic" # Fast vector similarity
30
+ LLM = "llm" # Intelligent LLM-based assessment
31
+
32
+
33
+ async def build_affinity(
34
+ user_id: str,
35
+ db: PostgresService,
36
+ mode: AffinityMode = AffinityMode.SEMANTIC,
37
+ default_model: str = "gpt-4o",
38
+ lookback_hours: int = 24,
39
+ limit: Optional[int] = None,
40
+ similarity_threshold: float = 0.7,
41
+ top_k: int = 3,
42
+ ) -> dict[str, Any]:
43
+ """
44
+ Build resource affinity graph.
45
+
46
+ Creates semantic relationships between resources using either
47
+ vector similarity (fast) or LLM analysis (intelligent).
48
+
49
+ Semantic Mode:
50
+ - Use vector similarity search via REM SEARCH query
51
+ - Create edges for similar resources (threshold: 0.7)
52
+ - Fast and cheap (no LLM calls)
53
+
54
+ LLM Mode:
55
+ - Use LLM to assess relationship context
56
+ - Create edges with rich metadata
57
+ - Slow and expensive (many LLM calls)
58
+ - ALWAYS use --limit to control costs
59
+
60
+ Process:
61
+ 1. Query PostgreSQL for recent resources for this user
62
+ 2. For each resource:
63
+ - Semantic: Query similar resources by vector using REM SEARCH
64
+ - LLM: Assess relationships using ResourceAffinityAssessor agent
65
+ 3. Create graph edges with deduplication (keep highest weight)
66
+ 4. Update resource entities with affinity edges
67
+
68
+ Args:
69
+ user_id: User to process
70
+ db: Database service (already connected)
71
+ mode: Affinity mode (semantic or llm)
72
+ default_model: LLM model for analysis (default: gpt-4o)
73
+ lookback_hours: Hours to look back (default: 24)
74
+ limit: Max resources to process (REQUIRED for LLM mode)
75
+ similarity_threshold: Minimum similarity score for semantic mode (default: 0.7)
76
+ top_k: Number of similar resources to find per resource (default: 3)
77
+
78
+ Returns:
79
+ Statistics about affinity construction
80
+ """
81
+ cutoff = datetime.utcnow() - timedelta(hours=lookback_hours)
82
+
83
+ # Create repositories and REM service
84
+ resource_repo = Repository(Resource, "resources", db=db)
85
+ rem_service = RemService(postgres_service=db)
86
+
87
+ # Register Resource model for REM queries
88
+ rem_service.register_model("resources", Resource)
89
+
90
+ # Query recent resources
91
+ resources = await resource_repo.find(
92
+ filters={
93
+ "user_id": user_id,
94
+ },
95
+ order_by="created_at DESC",
96
+ limit=limit,
97
+ )
98
+
99
+ # Filter by timestamp
100
+ resources = [
101
+ r for r in resources if r.created_at and r.created_at >= cutoff
102
+ ]
103
+
104
+ if not resources:
105
+ return {
106
+ "user_id": user_id,
107
+ "mode": mode.value,
108
+ "lookback_hours": lookback_hours,
109
+ "resources_processed": 0,
110
+ "edges_created": 0,
111
+ "llm_calls_made": 0 if mode == AffinityMode.LLM else None,
112
+ "status": "no_data",
113
+ }
114
+
115
+ logger.info(
116
+ f"Building affinity for {len(resources)} resources in {mode.value} mode"
117
+ )
118
+
119
+ # Statistics tracking
120
+ resources_processed = 0
121
+ total_edges_created = 0
122
+ llm_calls_made = 0
123
+
124
+ # Load LLM agent for relationship assessment if needed
125
+ affinity_agent = None
126
+ if mode == AffinityMode.LLM:
127
+ agent_schema = load_agent_schema("resource-affinity-assessor")
128
+
129
+ affinity_agent_runtime = await create_agent(
130
+ agent_schema_override=agent_schema,
131
+ model_override=default_model, # type: ignore[arg-type]
132
+ )
133
+ affinity_agent = affinity_agent_runtime.agent
134
+
135
+ # Process each resource
136
+ for resource in resources:
137
+ if not resource.content:
138
+ logger.debug(f"Skipping resource {resource.id} - no content for embedding")
139
+ continue
140
+
141
+ # Find similar resources
142
+ similar_resources = []
143
+
144
+ if mode == AffinityMode.SEMANTIC:
145
+ # Use REM SEARCH for vector similarity
146
+ try:
147
+ search_query = RemQuery(
148
+ query_type=QueryType.SEARCH,
149
+ user_id=user_id,
150
+ parameters=SearchParameters(
151
+ table_name="resources",
152
+ query_text=resource.content[:1000], # Use first 1000 chars
153
+ limit=top_k + 1, # +1 to exclude self
154
+ min_similarity=similarity_threshold,
155
+ ),
156
+ )
157
+
158
+ search_result = await rem_service.execute_query(search_query)
159
+ candidates = search_result.get("results", [])
160
+
161
+ # Filter out self and collect similar resources
162
+ # Note: SEARCH query returns {entity_type, similarity_score, data (JSONB)}
163
+ for candidate in candidates:
164
+ candidate_data = candidate.get("data", {})
165
+ candidate_id = candidate_data.get("id")
166
+
167
+ if candidate_id and candidate_id != str(resource.id):
168
+ similar_resources.append(
169
+ {
170
+ "resource": next(
171
+ (r for r in resources if str(r.id) == candidate_id),
172
+ None,
173
+ ),
174
+ "similarity_score": candidate.get("similarity_score", 0.0),
175
+ "relationship_type": "semantic_similar",
176
+ "relationship_strength": "moderate",
177
+ "edge_labels": [],
178
+ }
179
+ )
180
+
181
+ except Exception as e:
182
+ logger.warning(
183
+ f"Vector search failed for resource {resource.id}: {e}"
184
+ )
185
+ continue
186
+
187
+ elif mode == AffinityMode.LLM:
188
+ # Use LLM to assess relationships with all other resources
189
+ assert affinity_agent is not None, "Agent must be initialized in LLM mode"
190
+ for other_resource in resources:
191
+ if other_resource.id == resource.id:
192
+ continue
193
+
194
+ # Prepare input for agent
195
+ input_data = {
196
+ "resource_a": {
197
+ "id": str(resource.id),
198
+ "name": resource.name,
199
+ "category": resource.category,
200
+ "content": resource.content[:2000], # Limit for token efficiency
201
+ "created_at": (
202
+ resource.created_at.isoformat()
203
+ if resource.created_at
204
+ else None
205
+ ),
206
+ },
207
+ "resource_b": {
208
+ "id": str(other_resource.id),
209
+ "name": other_resource.name,
210
+ "category": other_resource.category,
211
+ "content": other_resource.content[:2000],
212
+ "created_at": (
213
+ other_resource.created_at.isoformat()
214
+ if other_resource.created_at
215
+ else None
216
+ ),
217
+ },
218
+ }
219
+
220
+ # Run agent
221
+ result = await affinity_agent.run(json.dumps(input_data, indent=2))
222
+ llm_calls_made += 1
223
+
224
+ # Serialize result
225
+ assessment = serialize_agent_result(result.output)
226
+
227
+ # Type guard: ensure we have a dict
228
+ if not isinstance(assessment, dict):
229
+ logger.warning(f"Expected dict from affinity agent, got {type(assessment)}")
230
+ continue
231
+
232
+ # If relationship exists, add to similar resources
233
+ if assessment.get("relationship_exists"):
234
+ # Map strength to weight
235
+ strength_to_weight = {
236
+ "strong": 0.9,
237
+ "moderate": 0.7,
238
+ "weak": 0.4,
239
+ }
240
+ weight = strength_to_weight.get(
241
+ assessment.get("relationship_strength", "moderate"), 0.7
242
+ )
243
+
244
+ similar_resources.append(
245
+ {
246
+ "resource": other_resource,
247
+ "similarity_score": weight,
248
+ "relationship_type": assessment.get(
249
+ "relationship_type", "related"
250
+ ),
251
+ "relationship_strength": assessment.get(
252
+ "relationship_strength", "moderate"
253
+ ),
254
+ "edge_labels": assessment.get("edge_labels", []),
255
+ "reasoning": assessment.get("reasoning", ""),
256
+ }
257
+ )
258
+
259
+ # Limit LLM comparisons to top_k
260
+ if len(similar_resources) >= top_k:
261
+ break
262
+
263
+ # Create graph edges for similar resources
264
+ new_edges = []
265
+ for similar in similar_resources[:top_k]:
266
+ if not similar["resource"]:
267
+ continue
268
+
269
+ # Map similarity score to weight
270
+ if mode == AffinityMode.SEMANTIC:
271
+ # Semantic mode: map similarity score directly
272
+ weight = min(similar["similarity_score"], 1.0)
273
+ else:
274
+ # LLM mode: use assessed weight
275
+ weight = similar["similarity_score"]
276
+
277
+ # Create InlineEdge
278
+ edge = {
279
+ "dst": str(similar["resource"].id), # Convert UUID to string
280
+ "rel_type": similar["relationship_type"],
281
+ "weight": weight,
282
+ "properties": {
283
+ "entity_type": "resource",
284
+ "dst_name": similar["resource"].name,
285
+ "dst_category": similar["resource"].category,
286
+ "match_type": mode.value,
287
+ "similarity_score": similar["similarity_score"],
288
+ "relationship_strength": similar.get("relationship_strength"),
289
+ "edge_labels": similar.get("edge_labels", []),
290
+ "reasoning": similar.get("reasoning", ""),
291
+ },
292
+ "created_at": datetime.utcnow().isoformat(),
293
+ }
294
+ new_edges.append(edge)
295
+
296
+ # Merge with existing edges (deduplication: keep highest weight)
297
+ existing_edges = resource.graph_edges or []
298
+ merged_edges = merge_graph_edges(existing_edges, new_edges)
299
+
300
+ # Update resource with merged edges
301
+ resource.graph_edges = merged_edges
302
+ await resource_repo.upsert(resource)
303
+
304
+ resources_processed += 1
305
+ edges_added = len(new_edges)
306
+ total_edges_created += edges_added
307
+
308
+ logger.debug(
309
+ f"Processed resource {resource.id} ({resource.name}): "
310
+ f"found {len(similar_resources)} similar resources, "
311
+ f"added {edges_added} edges"
312
+ )
313
+
314
+ return {
315
+ "user_id": user_id,
316
+ "mode": mode.value,
317
+ "lookback_hours": lookback_hours,
318
+ "resources_processed": resources_processed,
319
+ "edges_created": total_edges_created,
320
+ "llm_calls_made": llm_calls_made if mode == AffinityMode.LLM else None,
321
+ "status": "success",
322
+ }