remdb 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. rem/__init__.py +2 -0
  2. rem/agentic/README.md +650 -0
  3. rem/agentic/__init__.py +39 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +8 -0
  6. rem/agentic/context.py +148 -0
  7. rem/agentic/context_builder.py +329 -0
  8. rem/agentic/mcp/__init__.py +0 -0
  9. rem/agentic/mcp/tool_wrapper.py +107 -0
  10. rem/agentic/otel/__init__.py +5 -0
  11. rem/agentic/otel/setup.py +151 -0
  12. rem/agentic/providers/phoenix.py +674 -0
  13. rem/agentic/providers/pydantic_ai.py +572 -0
  14. rem/agentic/query.py +117 -0
  15. rem/agentic/query_helper.py +89 -0
  16. rem/agentic/schema.py +396 -0
  17. rem/agentic/serialization.py +245 -0
  18. rem/agentic/tools/__init__.py +5 -0
  19. rem/agentic/tools/rem_tools.py +231 -0
  20. rem/api/README.md +420 -0
  21. rem/api/main.py +324 -0
  22. rem/api/mcp_router/prompts.py +182 -0
  23. rem/api/mcp_router/resources.py +536 -0
  24. rem/api/mcp_router/server.py +213 -0
  25. rem/api/mcp_router/tools.py +584 -0
  26. rem/api/routers/auth.py +229 -0
  27. rem/api/routers/chat/__init__.py +5 -0
  28. rem/api/routers/chat/completions.py +281 -0
  29. rem/api/routers/chat/json_utils.py +76 -0
  30. rem/api/routers/chat/models.py +124 -0
  31. rem/api/routers/chat/streaming.py +185 -0
  32. rem/auth/README.md +258 -0
  33. rem/auth/__init__.py +26 -0
  34. rem/auth/middleware.py +100 -0
  35. rem/auth/providers/__init__.py +13 -0
  36. rem/auth/providers/base.py +376 -0
  37. rem/auth/providers/google.py +163 -0
  38. rem/auth/providers/microsoft.py +237 -0
  39. rem/cli/README.md +455 -0
  40. rem/cli/__init__.py +8 -0
  41. rem/cli/commands/README.md +126 -0
  42. rem/cli/commands/__init__.py +3 -0
  43. rem/cli/commands/ask.py +566 -0
  44. rem/cli/commands/configure.py +497 -0
  45. rem/cli/commands/db.py +493 -0
  46. rem/cli/commands/dreaming.py +324 -0
  47. rem/cli/commands/experiments.py +1302 -0
  48. rem/cli/commands/mcp.py +66 -0
  49. rem/cli/commands/process.py +245 -0
  50. rem/cli/commands/schema.py +183 -0
  51. rem/cli/commands/serve.py +106 -0
  52. rem/cli/dreaming.py +363 -0
  53. rem/cli/main.py +96 -0
  54. rem/config.py +237 -0
  55. rem/mcp_server.py +41 -0
  56. rem/models/core/__init__.py +49 -0
  57. rem/models/core/core_model.py +64 -0
  58. rem/models/core/engram.py +333 -0
  59. rem/models/core/experiment.py +628 -0
  60. rem/models/core/inline_edge.py +132 -0
  61. rem/models/core/rem_query.py +243 -0
  62. rem/models/entities/__init__.py +43 -0
  63. rem/models/entities/file.py +57 -0
  64. rem/models/entities/image_resource.py +88 -0
  65. rem/models/entities/message.py +35 -0
  66. rem/models/entities/moment.py +123 -0
  67. rem/models/entities/ontology.py +191 -0
  68. rem/models/entities/ontology_config.py +131 -0
  69. rem/models/entities/resource.py +95 -0
  70. rem/models/entities/schema.py +87 -0
  71. rem/models/entities/user.py +85 -0
  72. rem/py.typed +0 -0
  73. rem/schemas/README.md +507 -0
  74. rem/schemas/__init__.py +6 -0
  75. rem/schemas/agents/README.md +92 -0
  76. rem/schemas/agents/core/moment-builder.yaml +178 -0
  77. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  78. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  79. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  80. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  81. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  82. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  83. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  84. rem/schemas/agents/examples/hello-world.yaml +37 -0
  85. rem/schemas/agents/examples/query.yaml +54 -0
  86. rem/schemas/agents/examples/simple.yaml +21 -0
  87. rem/schemas/agents/examples/test.yaml +29 -0
  88. rem/schemas/agents/rem.yaml +128 -0
  89. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  90. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  91. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  92. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  93. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  94. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  95. rem/services/__init__.py +16 -0
  96. rem/services/audio/INTEGRATION.md +308 -0
  97. rem/services/audio/README.md +376 -0
  98. rem/services/audio/__init__.py +15 -0
  99. rem/services/audio/chunker.py +354 -0
  100. rem/services/audio/transcriber.py +259 -0
  101. rem/services/content/README.md +1269 -0
  102. rem/services/content/__init__.py +5 -0
  103. rem/services/content/providers.py +801 -0
  104. rem/services/content/service.py +676 -0
  105. rem/services/dreaming/README.md +230 -0
  106. rem/services/dreaming/__init__.py +53 -0
  107. rem/services/dreaming/affinity_service.py +336 -0
  108. rem/services/dreaming/moment_service.py +264 -0
  109. rem/services/dreaming/ontology_service.py +54 -0
  110. rem/services/dreaming/user_model_service.py +297 -0
  111. rem/services/dreaming/utils.py +39 -0
  112. rem/services/embeddings/__init__.py +11 -0
  113. rem/services/embeddings/api.py +120 -0
  114. rem/services/embeddings/worker.py +421 -0
  115. rem/services/fs/README.md +662 -0
  116. rem/services/fs/__init__.py +62 -0
  117. rem/services/fs/examples.py +206 -0
  118. rem/services/fs/examples_paths.py +204 -0
  119. rem/services/fs/git_provider.py +935 -0
  120. rem/services/fs/local_provider.py +760 -0
  121. rem/services/fs/parsing-hooks-examples.md +172 -0
  122. rem/services/fs/paths.py +276 -0
  123. rem/services/fs/provider.py +460 -0
  124. rem/services/fs/s3_provider.py +1042 -0
  125. rem/services/fs/service.py +186 -0
  126. rem/services/git/README.md +1075 -0
  127. rem/services/git/__init__.py +17 -0
  128. rem/services/git/service.py +469 -0
  129. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  130. rem/services/phoenix/README.md +453 -0
  131. rem/services/phoenix/__init__.py +46 -0
  132. rem/services/phoenix/client.py +686 -0
  133. rem/services/phoenix/config.py +88 -0
  134. rem/services/phoenix/prompt_labels.py +477 -0
  135. rem/services/postgres/README.md +575 -0
  136. rem/services/postgres/__init__.py +23 -0
  137. rem/services/postgres/migration_service.py +427 -0
  138. rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
  139. rem/services/postgres/register_type.py +352 -0
  140. rem/services/postgres/repository.py +337 -0
  141. rem/services/postgres/schema_generator.py +379 -0
  142. rem/services/postgres/service.py +802 -0
  143. rem/services/postgres/sql_builder.py +354 -0
  144. rem/services/rem/README.md +304 -0
  145. rem/services/rem/__init__.py +23 -0
  146. rem/services/rem/exceptions.py +71 -0
  147. rem/services/rem/executor.py +293 -0
  148. rem/services/rem/parser.py +145 -0
  149. rem/services/rem/queries.py +196 -0
  150. rem/services/rem/query.py +371 -0
  151. rem/services/rem/service.py +527 -0
  152. rem/services/session/README.md +374 -0
  153. rem/services/session/__init__.py +6 -0
  154. rem/services/session/compression.py +360 -0
  155. rem/services/session/reload.py +77 -0
  156. rem/settings.py +1235 -0
  157. rem/sql/002_install_models.sql +1068 -0
  158. rem/sql/background_indexes.sql +42 -0
  159. rem/sql/install_models.sql +1038 -0
  160. rem/sql/migrations/001_install.sql +503 -0
  161. rem/sql/migrations/002_install_models.sql +1202 -0
  162. rem/utils/AGENTIC_CHUNKING.md +597 -0
  163. rem/utils/README.md +583 -0
  164. rem/utils/__init__.py +43 -0
  165. rem/utils/agentic_chunking.py +622 -0
  166. rem/utils/batch_ops.py +343 -0
  167. rem/utils/chunking.py +108 -0
  168. rem/utils/clip_embeddings.py +276 -0
  169. rem/utils/dict_utils.py +98 -0
  170. rem/utils/embeddings.py +423 -0
  171. rem/utils/examples/embeddings_example.py +305 -0
  172. rem/utils/examples/sql_types_example.py +202 -0
  173. rem/utils/markdown.py +16 -0
  174. rem/utils/model_helpers.py +236 -0
  175. rem/utils/schema_loader.py +336 -0
  176. rem/utils/sql_types.py +348 -0
  177. rem/utils/user_id.py +81 -0
  178. rem/utils/vision.py +330 -0
  179. rem/workers/README.md +506 -0
  180. rem/workers/__init__.py +5 -0
  181. rem/workers/dreaming.py +502 -0
  182. rem/workers/engram_processor.py +312 -0
  183. rem/workers/sqs_file_processor.py +193 -0
  184. remdb-0.3.7.dist-info/METADATA +1473 -0
  185. remdb-0.3.7.dist-info/RECORD +187 -0
  186. remdb-0.3.7.dist-info/WHEEL +4 -0
  187. remdb-0.3.7.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,264 @@
1
+ """
2
+ Moment Service - Extracts temporal narratives from resources.
3
+
4
+ Analyzes recent resources and sessions to identify temporal narratives
5
+ (meetings, coding sessions, conversations) and creates Moment entities
6
+ with temporal boundaries and metadata.
7
+ """
8
+
9
+ import json
10
+ from datetime import datetime, timedelta
11
+ from pathlib import Path
12
+ from typing import Any, Optional
13
+ from uuid import uuid4
14
+
15
+ import yaml
16
+ from loguru import logger
17
+
18
+ from ...agentic.providers.pydantic_ai import create_agent
19
+ from ...agentic.serialization import serialize_agent_result
20
+ from ...models.entities.moment import Moment, Person
21
+ from ...models.entities.resource import Resource
22
+ from ...models.entities.message import Message
23
+ from ...services.postgres.repository import Repository
24
+ from ...services.postgres.service import PostgresService
25
+
26
+
27
+ async def construct_moments(
28
+ user_id: str,
29
+ db: PostgresService,
30
+ default_model: str = "gpt-4o",
31
+ lookback_hours: int = 24,
32
+ limit: Optional[int] = None,
33
+ ) -> dict[str, Any]:
34
+ """
35
+ Extract moments from resources.
36
+
37
+ Analyzes recent resources to identify temporal narratives
38
+ (meetings, coding sessions, conversations) and creates
39
+ Moment entities with temporal boundaries and metadata.
40
+
41
+ Process:
42
+ 1. Query PostgreSQL for recent resources and sessions for this user
43
+ 2. Load MomentBuilder agent schema from filesystem
44
+ 3. Run agent to extract moments from data
45
+ 4. Create Moment entities via Repository
46
+ 5. Link moments to source resources via graph edges
47
+ 6. Embeddings auto-generated by embedding worker
48
+
49
+ Args:
50
+ user_id: User to process
51
+ db: Database service (already connected)
52
+ default_model: LLM model for analysis (default: gpt-4o)
53
+ lookback_hours: Hours to look back (default: 24)
54
+ limit: Max resources to process
55
+
56
+ Returns:
57
+ Statistics about moment construction
58
+ """
59
+ cutoff = datetime.utcnow() - timedelta(hours=lookback_hours)
60
+
61
+ # Create repositories
62
+ resource_repo = Repository(Resource, "resources", db=db)
63
+ message_repo = Repository(Message, "messages", db=db)
64
+ moment_repo = Repository(Moment, "moments", db=db)
65
+
66
+ # Query recent resources
67
+ resources = await resource_repo.find(
68
+ filters={
69
+ "user_id": user_id,
70
+ },
71
+ order_by="created_at DESC",
72
+ limit=limit,
73
+ )
74
+
75
+ # Filter by timestamp (SQL doesn't support comparisons in find yet)
76
+ resources = [
77
+ r for r in resources if r.created_at and r.created_at >= cutoff
78
+ ]
79
+
80
+ # Query recent messages (grouped by session_id for context)
81
+ messages = await message_repo.find(
82
+ filters={
83
+ "user_id": user_id,
84
+ },
85
+ order_by="created_at DESC",
86
+ limit=limit,
87
+ )
88
+
89
+ # Filter by timestamp
90
+ messages = [m for m in messages if m.created_at >= cutoff]
91
+
92
+ if not resources and not messages:
93
+ return {
94
+ "user_id": user_id,
95
+ "lookback_hours": lookback_hours,
96
+ "resources_queried": 0,
97
+ "messages_queried": 0,
98
+ "moments_created": 0,
99
+ "graph_edges_added": 0,
100
+ "status": "no_data",
101
+ }
102
+
103
+ # Load MomentBuilder agent schema
104
+ schema_path = (
105
+ Path(__file__).parent.parent.parent
106
+ / "schemas"
107
+ / "agents"
108
+ / "core"
109
+ / "moment-builder.yaml"
110
+ )
111
+
112
+ if not schema_path.exists():
113
+ raise FileNotFoundError(f"MomentBuilder schema not found: {schema_path}")
114
+
115
+ with open(schema_path) as f:
116
+ agent_schema = yaml.safe_load(f)
117
+
118
+ # Prepare input data for agent
119
+ input_data = {
120
+ "resources": [
121
+ {
122
+ "id": str(r.id),
123
+ "name": r.name,
124
+ "category": r.category,
125
+ "content": r.content,
126
+ "created_at": (
127
+ r.created_at.isoformat() if r.created_at else None
128
+ ),
129
+ }
130
+ for r in resources
131
+ ],
132
+ "messages": [
133
+ {
134
+ "id": str(m.id),
135
+ "session_id": m.session_id,
136
+ "message_type": m.message_type,
137
+ "content": m.content,
138
+ "created_at": m.created_at.isoformat(),
139
+ }
140
+ for m in messages
141
+ ],
142
+ }
143
+
144
+ # Create and run MomentBuilder agent
145
+ agent_runtime = await create_agent(
146
+ agent_schema_override=agent_schema,
147
+ model_override=default_model, # type: ignore[arg-type]
148
+ )
149
+
150
+ result = await agent_runtime.run(json.dumps(input_data, indent=2))
151
+
152
+ # Serialize result (critical for Pydantic models!)
153
+ output_data = serialize_agent_result(result.output)
154
+
155
+ # Type guard: ensure we have a dict
156
+ if not isinstance(output_data, dict):
157
+ raise ValueError(f"Expected dict from MomentBuilder agent, got {type(output_data)}")
158
+
159
+ # Extract moments
160
+ moments_data = output_data.get("moments", [])
161
+ analysis_summary = output_data.get("analysis_summary", "")
162
+
163
+ logger.info(
164
+ f"MomentBuilder extracted {len(moments_data)} moments. Summary: {analysis_summary}"
165
+ )
166
+
167
+ # Create Moment entities
168
+ created_moments = []
169
+ total_edges = 0
170
+
171
+ for moment_data in moments_data:
172
+ # Map created_at/resource_ends_timestamp to starts_timestamp/ends_timestamp
173
+ starts_ts_str = moment_data.get("created_at")
174
+ ends_ts_str = moment_data.get("resource_ends_timestamp")
175
+
176
+ if not starts_ts_str:
177
+ logger.warning(f"Skipping moment without start timestamp: {moment_data.get('name')}")
178
+ continue
179
+
180
+ starts_ts = datetime.fromisoformat(starts_ts_str.replace("Z", "+00:00"))
181
+ ends_ts = (
182
+ datetime.fromisoformat(ends_ts_str.replace("Z", "+00:00"))
183
+ if ends_ts_str
184
+ else None
185
+ )
186
+
187
+ # Build graph edges to source resources
188
+ source_resource_ids = moment_data.get("source_resource_ids", [])
189
+ source_session_ids = moment_data.get("source_session_ids", [])
190
+
191
+ graph_edges = []
192
+
193
+ # Add edges to source resources
194
+ for resource_id in source_resource_ids:
195
+ graph_edges.append(
196
+ {
197
+ "dst": resource_id,
198
+ "rel_type": "extracted_from",
199
+ "weight": 1.0,
200
+ "properties": {
201
+ "entity_type": "resource",
202
+ "extraction_method": "moment_builder_agent",
203
+ },
204
+ "created_at": datetime.utcnow().isoformat(),
205
+ }
206
+ )
207
+
208
+ # Add edges to source sessions
209
+ for session_id in source_session_ids:
210
+ graph_edges.append(
211
+ {
212
+ "dst": session_id,
213
+ "rel_type": "extracted_from",
214
+ "weight": 0.8,
215
+ "properties": {
216
+ "entity_type": "session",
217
+ "extraction_method": "moment_builder_agent",
218
+ },
219
+ "created_at": datetime.utcnow().isoformat(),
220
+ }
221
+ )
222
+
223
+ # Create Moment entity
224
+ moment = Moment(
225
+ id=str(uuid4()),
226
+ tenant_id=user_id, # Set tenant_id = user_id
227
+ user_id=user_id,
228
+ name=moment_data.get("name"),
229
+ moment_type=moment_data.get("moment_type"),
230
+ category=moment_data.get("moment_type"), # Use moment_type as category
231
+ starts_timestamp=starts_ts,
232
+ ends_timestamp=ends_ts,
233
+ present_persons=[
234
+ Person(id=p["id"], name=p["name"], role=p.get("comment"))
235
+ for p in moment_data.get("present_persons", [])
236
+ ],
237
+ emotion_tags=moment_data.get("emotion_tags", []),
238
+ topic_tags=moment_data.get("topic_tags", []),
239
+ summary=moment_data.get("content"), # Use content as summary
240
+ source_resource_ids=source_resource_ids,
241
+ graph_edges=graph_edges,
242
+ created_at=datetime.utcnow(),
243
+ updated_at=datetime.utcnow(),
244
+ )
245
+
246
+ # Save to database (embeddings auto-generated by embedding worker)
247
+ await moment_repo.upsert(moment)
248
+ created_moments.append(moment)
249
+ total_edges += len(graph_edges)
250
+
251
+ logger.debug(
252
+ f"Created moment: {moment.name} ({moment.moment_type}) with {len(graph_edges)} edges"
253
+ )
254
+
255
+ return {
256
+ "user_id": user_id,
257
+ "lookback_hours": lookback_hours,
258
+ "resources_queried": len(resources),
259
+ "messages_queried": len(messages),
260
+ "moments_created": len(created_moments),
261
+ "graph_edges_added": total_edges,
262
+ "analysis_summary": analysis_summary,
263
+ "status": "success",
264
+ }
@@ -0,0 +1,54 @@
1
+ """
2
+ Ontology Service - Extracts domain-specific knowledge from files.
3
+
4
+ Finds files processed within lookback window and applies matching
5
+ OntologyConfig rules to extract structured knowledge using custom agents.
6
+ """
7
+
8
+ from typing import Any, Optional
9
+
10
+
11
+ async def extract_ontologies(
12
+ user_id: str,
13
+ lookback_hours: int = 24,
14
+ limit: Optional[int] = None,
15
+ ) -> dict[str, Any]:
16
+ """
17
+ Extract domain-specific knowledge from files using custom agents.
18
+
19
+ Finds files processed within lookback window and applies matching
20
+ OntologyConfig rules to extract structured knowledge.
21
+
22
+ Process:
23
+ 1. Query REM for files processed by this user (lookback window)
24
+ 2. For each file, find matching OntologyConfig rules
25
+ 3. Load agent schemas from database
26
+ 4. Execute agents on file content
27
+ 5. Generate embeddings for extracted data
28
+ 6. Store Ontology entities
29
+
30
+ Args:
31
+ user_id: User to process
32
+ lookback_hours: Hours to look back (default: 24)
33
+ limit: Max files to process
34
+
35
+ Returns:
36
+ Statistics about ontology extraction
37
+ """
38
+ # TODO: Implement using REM query API + OntologyExtractorService
39
+ # Query files with timestamp filter and processing_status='completed'
40
+ # Load matching OntologyConfigs from database
41
+ # Use OntologyExtractorService to extract ontologies
42
+ # Generate embeddings for embedding_text field
43
+
44
+ # Stub implementation
45
+ return {
46
+ "user_id": user_id,
47
+ "lookback_hours": lookback_hours,
48
+ "files_queried": 0,
49
+ "configs_matched": 0,
50
+ "ontologies_created": 0,
51
+ "embeddings_generated": 0,
52
+ "agent_calls_made": 0,
53
+ "status": "stub_not_implemented",
54
+ }
@@ -0,0 +1,297 @@
1
+ """
2
+ User Model Service - Updates user profiles from activity.
3
+
4
+ Analyzes recent sessions, moments, and resources to generate
5
+ comprehensive user profile summaries using LLM analysis.
6
+ """
7
+
8
+ import json
9
+ from datetime import datetime, timedelta
10
+ from pathlib import Path
11
+ from typing import Any, Optional
12
+
13
+ import yaml
14
+ from loguru import logger
15
+
16
+ from ...agentic.providers.pydantic_ai import create_agent
17
+ from ...agentic.serialization import serialize_agent_result
18
+ from ...models.entities.moment import Moment
19
+ from ...models.entities.resource import Resource
20
+ from ...models.entities.message import Message
21
+ from ...models.entities.user import User
22
+ from ...services.postgres.repository import Repository
23
+ from ...services.postgres.service import PostgresService
24
+
25
+
26
+ async def update_user_model(
27
+ user_id: str,
28
+ db: PostgresService,
29
+ default_model: str = "gpt-4o",
30
+ time_window_days: int = 30,
31
+ max_messages: int = 100,
32
+ max_moments: int = 20,
33
+ max_resources: int = 20,
34
+ ) -> dict[str, Any]:
35
+ """
36
+ Update user model from recent activity.
37
+
38
+ Reads recent messages, moments, and resources to generate
39
+ a comprehensive user profile summary using LLM analysis.
40
+
41
+ Process:
42
+ 1. Query PostgreSQL for recent messages, moments, resources for this user
43
+ 2. Load UserProfileBuilder agent schema
44
+ 3. Generate user profile using LLM
45
+ 4. Update User entity with profile data and metadata
46
+ 5. Add graph edges to key resources and moments
47
+
48
+ Args:
49
+ user_id: User to process
50
+ db: Database service (already connected)
51
+ default_model: LLM model for analysis (default: gpt-4o)
52
+ time_window_days: Days to look back for activity (default: 30)
53
+ max_messages: Max messages to analyze
54
+ max_moments: Max moments to include
55
+ max_resources: Max resources to include
56
+
57
+ Returns:
58
+ Statistics about user model update
59
+ """
60
+ cutoff = datetime.utcnow() - timedelta(days=time_window_days)
61
+
62
+ # Create repositories
63
+ message_repo = Repository(Message, "messages", db=db)
64
+ moment_repo = Repository(Moment, "moments", db=db)
65
+ resource_repo = Repository(Resource, "resources", db=db)
66
+ user_repo = Repository(User, "users", db=db)
67
+
68
+ # Build filters using user_id
69
+ filters = {"user_id": user_id}
70
+
71
+ # Query recent messages
72
+ messages = await message_repo.find(
73
+ filters=filters,
74
+ order_by="created_at DESC",
75
+ limit=max_messages,
76
+ )
77
+ # Filter by cutoff (both are UTC-naive)
78
+ messages = [m for m in messages if m.created_at >= cutoff]
79
+
80
+ # Query recent moments
81
+ moments = await moment_repo.find(
82
+ filters=filters,
83
+ order_by="starts_timestamp DESC",
84
+ limit=max_moments,
85
+ )
86
+ # Filter by cutoff (both are UTC-naive)
87
+ moments = [m for m in moments if m.starts_timestamp >= cutoff]
88
+
89
+ # Query recent resources
90
+ resources = await resource_repo.find(
91
+ filters=filters,
92
+ order_by="created_at DESC",
93
+ limit=max_resources,
94
+ )
95
+ # Filter by cutoff (both are UTC-naive)
96
+ resources = [r for r in resources if r.created_at and r.created_at >= cutoff]
97
+
98
+ if not messages and not moments and not resources:
99
+ return {
100
+ "user_id": user_id,
101
+ "time_window_days": time_window_days,
102
+ "messages_analyzed": 0,
103
+ "moments_included": 0,
104
+ "resources_included": 0,
105
+ "user_updated": False,
106
+ "status": "no_data",
107
+ }
108
+
109
+ logger.info(
110
+ f"Building user profile for {user_id}: "
111
+ f"{len(messages)} messages, {len(moments)} moments, {len(resources)} resources"
112
+ )
113
+
114
+ # Load UserProfileBuilder agent schema
115
+ schema_path = (
116
+ Path(__file__).parent.parent.parent
117
+ / "schemas"
118
+ / "agents"
119
+ / "core"
120
+ / "user-profile-builder.yaml"
121
+ )
122
+
123
+ if not schema_path.exists():
124
+ raise FileNotFoundError(f"UserProfileBuilder schema not found: {schema_path}")
125
+
126
+ with open(schema_path) as f:
127
+ agent_schema = yaml.safe_load(f)
128
+
129
+ # Prepare input data for agent
130
+ input_data = {
131
+ "user_id": user_id,
132
+ "time_window_days": time_window_days,
133
+ "messages": [
134
+ {
135
+ "id": str(m.id),
136
+ "session_id": m.session_id,
137
+ "message_type": m.message_type,
138
+ "content": m.content[:500] if m.content else "", # Limit for token efficiency
139
+ "created_at": m.created_at.isoformat(),
140
+ }
141
+ for m in messages
142
+ ],
143
+ "moments": [
144
+ {
145
+ "id": str(m.id),
146
+ "name": m.name,
147
+ "moment_type": m.moment_type,
148
+ "emotion_tags": m.emotion_tags,
149
+ "topic_tags": m.topic_tags,
150
+ "present_persons": [
151
+ {"id": str(p.id), "name": p.name, "role": p.role}
152
+ for p in m.present_persons
153
+ ],
154
+ "starts_timestamp": m.starts_timestamp.isoformat(),
155
+ "summary": m.summary[:300] if m.summary else "",
156
+ }
157
+ for m in moments
158
+ ],
159
+ "resources": [
160
+ {
161
+ "id": str(r.id),
162
+ "name": r.name,
163
+ "category": r.category,
164
+ "content": r.content[:1000] if r.content else "", # First 1000 chars
165
+ "created_at": (
166
+ r.created_at.isoformat() if r.created_at else None
167
+ ),
168
+ }
169
+ for r in resources
170
+ ],
171
+ }
172
+
173
+ # Create and run UserProfileBuilder agent
174
+ agent_runtime = await create_agent(
175
+ agent_schema_override=agent_schema,
176
+ model_override=default_model, # type: ignore[arg-type]
177
+ )
178
+
179
+ result = await agent_runtime.run(json.dumps(input_data, indent=2))
180
+
181
+ # Serialize result (critical for Pydantic models!)
182
+ raw_result = serialize_agent_result(result.output)
183
+
184
+ # Ensure result is a dict (agent should always return structured data)
185
+ if not isinstance(raw_result, dict):
186
+ raise ValueError(f"Expected dict from user profile agent, got {type(raw_result)}")
187
+
188
+ profile_data: dict[str, Any] = raw_result
189
+
190
+ logger.info(
191
+ f"Generated user profile. Summary: {profile_data.get('summary', '')[:100]}..."
192
+ )
193
+
194
+ # Get or create User entity
195
+ # Use find() with user_id filter instead of get_by_id() since user_id is a string, not UUID
196
+ existing_users = await user_repo.find(filters={"user_id": user_id}, limit=1)
197
+ user = existing_users[0] if existing_users else None
198
+
199
+ if not user:
200
+ # Create new user (id will be auto-generated as UUID)
201
+ user = User(
202
+ tenant_id=user_id, # Set tenant_id = user_id
203
+ user_id=user_id,
204
+ name=user_id, # Default to user_id, can be updated later
205
+ metadata={},
206
+ graph_edges=[],
207
+ created_at=datetime.utcnow(),
208
+ updated_at=datetime.utcnow(),
209
+ )
210
+
211
+ # Update user metadata with full profile
212
+ user.metadata = user.metadata or {}
213
+ user.metadata.update(
214
+ {
215
+ "profile": profile_data,
216
+ "profile_generated_at": datetime.utcnow().isoformat(),
217
+ "profile_time_window_days": time_window_days,
218
+ }
219
+ )
220
+
221
+ # Update user model fields from profile
222
+ user.summary = profile_data.get("summary", "")
223
+ user.interests = [
224
+ area for area in profile_data.get("expertise_areas", [])
225
+ ] + [
226
+ interest for interest in profile_data.get("learning_interests", [])
227
+ ]
228
+ user.preferred_topics = profile_data.get("recommended_tags", [])
229
+
230
+ # Determine activity level based on data volume
231
+ total_activity = len(messages) + len(moments) + len(resources)
232
+ if total_activity >= 50:
233
+ user.activity_level = "active"
234
+ elif total_activity >= 10:
235
+ user.activity_level = "moderate"
236
+ else:
237
+ user.activity_level = "inactive"
238
+
239
+ user.last_active_at = datetime.utcnow()
240
+
241
+ # Build graph edges to key resources and moments
242
+ from .utils import merge_graph_edges
243
+
244
+ graph_edges = []
245
+
246
+ # Add edges to recent resources (top 5)
247
+ for resource in resources[:5]:
248
+ graph_edges.append(
249
+ {
250
+ "dst": str(resource.id), # Convert UUID to string
251
+ "rel_type": "recently_worked_on",
252
+ "weight": 0.8,
253
+ "properties": {
254
+ "entity_type": "resource",
255
+ "dst_name": resource.name,
256
+ "dst_category": resource.category,
257
+ },
258
+ "created_at": datetime.utcnow().isoformat(),
259
+ }
260
+ )
261
+
262
+ # Add edges to recent moments (top 5)
263
+ for moment in moments[:5]:
264
+ graph_edges.append(
265
+ {
266
+ "dst": str(moment.id), # Convert UUID to string
267
+ "rel_type": "participated_in",
268
+ "weight": 0.9,
269
+ "properties": {
270
+ "entity_type": "moment",
271
+ "dst_name": moment.name,
272
+ "dst_moment_type": moment.moment_type,
273
+ },
274
+ "created_at": datetime.utcnow().isoformat(),
275
+ }
276
+ )
277
+
278
+ # Merge edges with existing
279
+ user.graph_edges = merge_graph_edges(user.graph_edges or [], graph_edges)
280
+ user.updated_at = datetime.utcnow()
281
+
282
+ # Save user
283
+ await user_repo.upsert(user)
284
+
285
+ return {
286
+ "user_id": user_id,
287
+ "time_window_days": time_window_days,
288
+ "messages_analyzed": len(messages),
289
+ "moments_included": len(moments),
290
+ "resources_included": len(resources),
291
+ "current_projects": len(profile_data.get("current_projects", [])),
292
+ "technical_stack_size": len(profile_data.get("technical_stack", [])),
293
+ "key_collaborators": len(profile_data.get("key_collaborators", [])),
294
+ "graph_edges_added": len(graph_edges),
295
+ "user_updated": True,
296
+ "status": "success",
297
+ }
@@ -0,0 +1,39 @@
1
+ """
2
+ Dreaming utilities - Common functions for dreaming services.
3
+ """
4
+
5
+ from typing import Any
6
+
7
+
8
+ def merge_graph_edges(
9
+ existing_edges: list[dict[str, Any]], new_edges: list[dict[str, Any]]
10
+ ) -> list[dict[str, Any]]:
11
+ """
12
+ Merge graph edges with deduplication.
13
+
14
+ Keep highest weight edge for each (dst, rel_type) pair.
15
+ This prevents duplicate edges while preserving the strongest relationships.
16
+
17
+ Args:
18
+ existing_edges: Current edges on the resource
19
+ new_edges: New edges to add
20
+
21
+ Returns:
22
+ Merged list of edges with duplicates removed
23
+ """
24
+ edges_map: dict[tuple[str, str], dict[str, Any]] = {}
25
+
26
+ # Add existing edges
27
+ for edge in existing_edges:
28
+ key = (edge.get("dst", ""), edge.get("rel_type", ""))
29
+ edges_map[key] = edge
30
+
31
+ # Add new edges (replace if higher weight)
32
+ for edge in new_edges:
33
+ key = (edge.get("dst", ""), edge.get("rel_type", ""))
34
+ if key not in edges_map or edge.get("weight", 0) > edges_map[key].get(
35
+ "weight", 0
36
+ ):
37
+ edges_map[key] = edge
38
+
39
+ return list(edges_map.values())
@@ -0,0 +1,11 @@
1
+ """Embeddings service for background embedding generation."""
2
+
3
+ from .api import generate_embedding, generate_embedding_async
4
+ from .worker import EmbeddingTask, EmbeddingWorker
5
+
6
+ __all__ = [
7
+ "EmbeddingTask",
8
+ "EmbeddingWorker",
9
+ "generate_embedding",
10
+ "generate_embedding_async",
11
+ ]