remdb 0.2.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +2 -0
- rem/agentic/README.md +650 -0
- rem/agentic/__init__.py +39 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +8 -0
- rem/agentic/context.py +148 -0
- rem/agentic/context_builder.py +329 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +107 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +151 -0
- rem/agentic/providers/phoenix.py +674 -0
- rem/agentic/providers/pydantic_ai.py +572 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +396 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +231 -0
- rem/api/README.md +420 -0
- rem/api/main.py +324 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +536 -0
- rem/api/mcp_router/server.py +213 -0
- rem/api/mcp_router/tools.py +584 -0
- rem/api/routers/auth.py +229 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/completions.py +281 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +124 -0
- rem/api/routers/chat/streaming.py +185 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +26 -0
- rem/auth/middleware.py +100 -0
- rem/auth/providers/__init__.py +13 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +455 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +126 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +565 -0
- rem/cli/commands/configure.py +423 -0
- rem/cli/commands/db.py +493 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1124 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +245 -0
- rem/cli/commands/schema.py +183 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +88 -0
- rem/config.py +237 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +64 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +628 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +243 -0
- rem/models/entities/__init__.py +43 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +35 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +191 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/user.py +85 -0
- rem/py.typed +0 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +128 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +16 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +806 -0
- rem/services/content/service.py +657 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +336 -0
- rem/services/dreaming/moment_service.py +264 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +120 -0
- rem/services/embeddings/worker.py +421 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +686 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +575 -0
- rem/services/postgres/__init__.py +23 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
- rem/services/postgres/register_type.py +352 -0
- rem/services/postgres/repository.py +337 -0
- rem/services/postgres/schema_generator.py +379 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +354 -0
- rem/services/rem/README.md +304 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +145 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +527 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +6 -0
- rem/services/session/compression.py +360 -0
- rem/services/session/reload.py +77 -0
- rem/settings.py +1235 -0
- rem/sql/002_install_models.sql +1068 -0
- rem/sql/background_indexes.sql +42 -0
- rem/sql/install_models.sql +1038 -0
- rem/sql/migrations/001_install.sql +503 -0
- rem/sql/migrations/002_install_models.sql +1202 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +583 -0
- rem/utils/__init__.py +43 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +423 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/markdown.py +16 -0
- rem/utils/model_helpers.py +236 -0
- rem/utils/schema_loader.py +229 -0
- rem/utils/sql_types.py +348 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +330 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +5 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- remdb-0.2.6.dist-info/METADATA +1191 -0
- remdb-0.2.6.dist-info/RECORD +187 -0
- remdb-0.2.6.dist-info/WHEEL +4 -0
- remdb-0.2.6.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Moment Service - Extracts temporal narratives from resources.
|
|
3
|
+
|
|
4
|
+
Analyzes recent resources and sessions to identify temporal narratives
|
|
5
|
+
(meetings, coding sessions, conversations) and creates Moment entities
|
|
6
|
+
with temporal boundaries and metadata.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
from datetime import datetime, timedelta
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import Any, Optional
|
|
13
|
+
from uuid import uuid4
|
|
14
|
+
|
|
15
|
+
import yaml
|
|
16
|
+
from loguru import logger
|
|
17
|
+
|
|
18
|
+
from ...agentic.providers.pydantic_ai import create_agent
|
|
19
|
+
from ...agentic.serialization import serialize_agent_result
|
|
20
|
+
from ...models.entities.moment import Moment, Person
|
|
21
|
+
from ...models.entities.resource import Resource
|
|
22
|
+
from ...models.entities.message import Message
|
|
23
|
+
from ...services.postgres.repository import Repository
|
|
24
|
+
from ...services.postgres.service import PostgresService
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
async def construct_moments(
|
|
28
|
+
user_id: str,
|
|
29
|
+
db: PostgresService,
|
|
30
|
+
default_model: str = "gpt-4o",
|
|
31
|
+
lookback_hours: int = 24,
|
|
32
|
+
limit: Optional[int] = None,
|
|
33
|
+
) -> dict[str, Any]:
|
|
34
|
+
"""
|
|
35
|
+
Extract moments from resources.
|
|
36
|
+
|
|
37
|
+
Analyzes recent resources to identify temporal narratives
|
|
38
|
+
(meetings, coding sessions, conversations) and creates
|
|
39
|
+
Moment entities with temporal boundaries and metadata.
|
|
40
|
+
|
|
41
|
+
Process:
|
|
42
|
+
1. Query PostgreSQL for recent resources and sessions for this user
|
|
43
|
+
2. Load MomentBuilder agent schema from filesystem
|
|
44
|
+
3. Run agent to extract moments from data
|
|
45
|
+
4. Create Moment entities via Repository
|
|
46
|
+
5. Link moments to source resources via graph edges
|
|
47
|
+
6. Embeddings auto-generated by embedding worker
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
user_id: User to process
|
|
51
|
+
db: Database service (already connected)
|
|
52
|
+
default_model: LLM model for analysis (default: gpt-4o)
|
|
53
|
+
lookback_hours: Hours to look back (default: 24)
|
|
54
|
+
limit: Max resources to process
|
|
55
|
+
|
|
56
|
+
Returns:
|
|
57
|
+
Statistics about moment construction
|
|
58
|
+
"""
|
|
59
|
+
cutoff = datetime.utcnow() - timedelta(hours=lookback_hours)
|
|
60
|
+
|
|
61
|
+
# Create repositories
|
|
62
|
+
resource_repo = Repository(Resource, "resources", db=db)
|
|
63
|
+
message_repo = Repository(Message, "messages", db=db)
|
|
64
|
+
moment_repo = Repository(Moment, "moments", db=db)
|
|
65
|
+
|
|
66
|
+
# Query recent resources
|
|
67
|
+
resources = await resource_repo.find(
|
|
68
|
+
filters={
|
|
69
|
+
"user_id": user_id,
|
|
70
|
+
},
|
|
71
|
+
order_by="created_at DESC",
|
|
72
|
+
limit=limit,
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# Filter by timestamp (SQL doesn't support comparisons in find yet)
|
|
76
|
+
resources = [
|
|
77
|
+
r for r in resources if r.created_at and r.created_at >= cutoff
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
# Query recent messages (grouped by session_id for context)
|
|
81
|
+
messages = await message_repo.find(
|
|
82
|
+
filters={
|
|
83
|
+
"user_id": user_id,
|
|
84
|
+
},
|
|
85
|
+
order_by="created_at DESC",
|
|
86
|
+
limit=limit,
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
# Filter by timestamp
|
|
90
|
+
messages = [m for m in messages if m.created_at >= cutoff]
|
|
91
|
+
|
|
92
|
+
if not resources and not messages:
|
|
93
|
+
return {
|
|
94
|
+
"user_id": user_id,
|
|
95
|
+
"lookback_hours": lookback_hours,
|
|
96
|
+
"resources_queried": 0,
|
|
97
|
+
"messages_queried": 0,
|
|
98
|
+
"moments_created": 0,
|
|
99
|
+
"graph_edges_added": 0,
|
|
100
|
+
"status": "no_data",
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
# Load MomentBuilder agent schema
|
|
104
|
+
schema_path = (
|
|
105
|
+
Path(__file__).parent.parent.parent
|
|
106
|
+
/ "schemas"
|
|
107
|
+
/ "agents"
|
|
108
|
+
/ "core"
|
|
109
|
+
/ "moment-builder.yaml"
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
if not schema_path.exists():
|
|
113
|
+
raise FileNotFoundError(f"MomentBuilder schema not found: {schema_path}")
|
|
114
|
+
|
|
115
|
+
with open(schema_path) as f:
|
|
116
|
+
agent_schema = yaml.safe_load(f)
|
|
117
|
+
|
|
118
|
+
# Prepare input data for agent
|
|
119
|
+
input_data = {
|
|
120
|
+
"resources": [
|
|
121
|
+
{
|
|
122
|
+
"id": str(r.id),
|
|
123
|
+
"name": r.name,
|
|
124
|
+
"category": r.category,
|
|
125
|
+
"content": r.content,
|
|
126
|
+
"created_at": (
|
|
127
|
+
r.created_at.isoformat() if r.created_at else None
|
|
128
|
+
),
|
|
129
|
+
}
|
|
130
|
+
for r in resources
|
|
131
|
+
],
|
|
132
|
+
"messages": [
|
|
133
|
+
{
|
|
134
|
+
"id": str(m.id),
|
|
135
|
+
"session_id": m.session_id,
|
|
136
|
+
"message_type": m.message_type,
|
|
137
|
+
"content": m.content,
|
|
138
|
+
"created_at": m.created_at.isoformat(),
|
|
139
|
+
}
|
|
140
|
+
for m in messages
|
|
141
|
+
],
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
# Create and run MomentBuilder agent
|
|
145
|
+
agent_runtime = await create_agent(
|
|
146
|
+
agent_schema_override=agent_schema,
|
|
147
|
+
model_override=default_model, # type: ignore[arg-type]
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
result = await agent_runtime.run(json.dumps(input_data, indent=2))
|
|
151
|
+
|
|
152
|
+
# Serialize result (critical for Pydantic models!)
|
|
153
|
+
output_data = serialize_agent_result(result.output)
|
|
154
|
+
|
|
155
|
+
# Type guard: ensure we have a dict
|
|
156
|
+
if not isinstance(output_data, dict):
|
|
157
|
+
raise ValueError(f"Expected dict from MomentBuilder agent, got {type(output_data)}")
|
|
158
|
+
|
|
159
|
+
# Extract moments
|
|
160
|
+
moments_data = output_data.get("moments", [])
|
|
161
|
+
analysis_summary = output_data.get("analysis_summary", "")
|
|
162
|
+
|
|
163
|
+
logger.info(
|
|
164
|
+
f"MomentBuilder extracted {len(moments_data)} moments. Summary: {analysis_summary}"
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
# Create Moment entities
|
|
168
|
+
created_moments = []
|
|
169
|
+
total_edges = 0
|
|
170
|
+
|
|
171
|
+
for moment_data in moments_data:
|
|
172
|
+
# Map created_at/resource_ends_timestamp to starts_timestamp/ends_timestamp
|
|
173
|
+
starts_ts_str = moment_data.get("created_at")
|
|
174
|
+
ends_ts_str = moment_data.get("resource_ends_timestamp")
|
|
175
|
+
|
|
176
|
+
if not starts_ts_str:
|
|
177
|
+
logger.warning(f"Skipping moment without start timestamp: {moment_data.get('name')}")
|
|
178
|
+
continue
|
|
179
|
+
|
|
180
|
+
starts_ts = datetime.fromisoformat(starts_ts_str.replace("Z", "+00:00"))
|
|
181
|
+
ends_ts = (
|
|
182
|
+
datetime.fromisoformat(ends_ts_str.replace("Z", "+00:00"))
|
|
183
|
+
if ends_ts_str
|
|
184
|
+
else None
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
# Build graph edges to source resources
|
|
188
|
+
source_resource_ids = moment_data.get("source_resource_ids", [])
|
|
189
|
+
source_session_ids = moment_data.get("source_session_ids", [])
|
|
190
|
+
|
|
191
|
+
graph_edges = []
|
|
192
|
+
|
|
193
|
+
# Add edges to source resources
|
|
194
|
+
for resource_id in source_resource_ids:
|
|
195
|
+
graph_edges.append(
|
|
196
|
+
{
|
|
197
|
+
"dst": resource_id,
|
|
198
|
+
"rel_type": "extracted_from",
|
|
199
|
+
"weight": 1.0,
|
|
200
|
+
"properties": {
|
|
201
|
+
"entity_type": "resource",
|
|
202
|
+
"extraction_method": "moment_builder_agent",
|
|
203
|
+
},
|
|
204
|
+
"created_at": datetime.utcnow().isoformat(),
|
|
205
|
+
}
|
|
206
|
+
)
|
|
207
|
+
|
|
208
|
+
# Add edges to source sessions
|
|
209
|
+
for session_id in source_session_ids:
|
|
210
|
+
graph_edges.append(
|
|
211
|
+
{
|
|
212
|
+
"dst": session_id,
|
|
213
|
+
"rel_type": "extracted_from",
|
|
214
|
+
"weight": 0.8,
|
|
215
|
+
"properties": {
|
|
216
|
+
"entity_type": "session",
|
|
217
|
+
"extraction_method": "moment_builder_agent",
|
|
218
|
+
},
|
|
219
|
+
"created_at": datetime.utcnow().isoformat(),
|
|
220
|
+
}
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
# Create Moment entity
|
|
224
|
+
moment = Moment(
|
|
225
|
+
id=str(uuid4()),
|
|
226
|
+
tenant_id=user_id, # Set tenant_id = user_id
|
|
227
|
+
user_id=user_id,
|
|
228
|
+
name=moment_data.get("name"),
|
|
229
|
+
moment_type=moment_data.get("moment_type"),
|
|
230
|
+
category=moment_data.get("moment_type"), # Use moment_type as category
|
|
231
|
+
starts_timestamp=starts_ts,
|
|
232
|
+
ends_timestamp=ends_ts,
|
|
233
|
+
present_persons=[
|
|
234
|
+
Person(id=p["id"], name=p["name"], role=p.get("comment"))
|
|
235
|
+
for p in moment_data.get("present_persons", [])
|
|
236
|
+
],
|
|
237
|
+
emotion_tags=moment_data.get("emotion_tags", []),
|
|
238
|
+
topic_tags=moment_data.get("topic_tags", []),
|
|
239
|
+
summary=moment_data.get("content"), # Use content as summary
|
|
240
|
+
source_resource_ids=source_resource_ids,
|
|
241
|
+
graph_edges=graph_edges,
|
|
242
|
+
created_at=datetime.utcnow(),
|
|
243
|
+
updated_at=datetime.utcnow(),
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
# Save to database (embeddings auto-generated by embedding worker)
|
|
247
|
+
await moment_repo.upsert(moment)
|
|
248
|
+
created_moments.append(moment)
|
|
249
|
+
total_edges += len(graph_edges)
|
|
250
|
+
|
|
251
|
+
logger.debug(
|
|
252
|
+
f"Created moment: {moment.name} ({moment.moment_type}) with {len(graph_edges)} edges"
|
|
253
|
+
)
|
|
254
|
+
|
|
255
|
+
return {
|
|
256
|
+
"user_id": user_id,
|
|
257
|
+
"lookback_hours": lookback_hours,
|
|
258
|
+
"resources_queried": len(resources),
|
|
259
|
+
"messages_queried": len(messages),
|
|
260
|
+
"moments_created": len(created_moments),
|
|
261
|
+
"graph_edges_added": total_edges,
|
|
262
|
+
"analysis_summary": analysis_summary,
|
|
263
|
+
"status": "success",
|
|
264
|
+
}
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Ontology Service - Extracts domain-specific knowledge from files.
|
|
3
|
+
|
|
4
|
+
Finds files processed within lookback window and applies matching
|
|
5
|
+
OntologyConfig rules to extract structured knowledge using custom agents.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Any, Optional
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
async def extract_ontologies(
|
|
12
|
+
user_id: str,
|
|
13
|
+
lookback_hours: int = 24,
|
|
14
|
+
limit: Optional[int] = None,
|
|
15
|
+
) -> dict[str, Any]:
|
|
16
|
+
"""
|
|
17
|
+
Extract domain-specific knowledge from files using custom agents.
|
|
18
|
+
|
|
19
|
+
Finds files processed within lookback window and applies matching
|
|
20
|
+
OntologyConfig rules to extract structured knowledge.
|
|
21
|
+
|
|
22
|
+
Process:
|
|
23
|
+
1. Query REM for files processed by this user (lookback window)
|
|
24
|
+
2. For each file, find matching OntologyConfig rules
|
|
25
|
+
3. Load agent schemas from database
|
|
26
|
+
4. Execute agents on file content
|
|
27
|
+
5. Generate embeddings for extracted data
|
|
28
|
+
6. Store Ontology entities
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
user_id: User to process
|
|
32
|
+
lookback_hours: Hours to look back (default: 24)
|
|
33
|
+
limit: Max files to process
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
Statistics about ontology extraction
|
|
37
|
+
"""
|
|
38
|
+
# TODO: Implement using REM query API + OntologyExtractorService
|
|
39
|
+
# Query files with timestamp filter and processing_status='completed'
|
|
40
|
+
# Load matching OntologyConfigs from database
|
|
41
|
+
# Use OntologyExtractorService to extract ontologies
|
|
42
|
+
# Generate embeddings for embedding_text field
|
|
43
|
+
|
|
44
|
+
# Stub implementation
|
|
45
|
+
return {
|
|
46
|
+
"user_id": user_id,
|
|
47
|
+
"lookback_hours": lookback_hours,
|
|
48
|
+
"files_queried": 0,
|
|
49
|
+
"configs_matched": 0,
|
|
50
|
+
"ontologies_created": 0,
|
|
51
|
+
"embeddings_generated": 0,
|
|
52
|
+
"agent_calls_made": 0,
|
|
53
|
+
"status": "stub_not_implemented",
|
|
54
|
+
}
|
|
@@ -0,0 +1,297 @@
|
|
|
1
|
+
"""
|
|
2
|
+
User Model Service - Updates user profiles from activity.
|
|
3
|
+
|
|
4
|
+
Analyzes recent sessions, moments, and resources to generate
|
|
5
|
+
comprehensive user profile summaries using LLM analysis.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import json
|
|
9
|
+
from datetime import datetime, timedelta
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import Any, Optional
|
|
12
|
+
|
|
13
|
+
import yaml
|
|
14
|
+
from loguru import logger
|
|
15
|
+
|
|
16
|
+
from ...agentic.providers.pydantic_ai import create_agent
|
|
17
|
+
from ...agentic.serialization import serialize_agent_result
|
|
18
|
+
from ...models.entities.moment import Moment
|
|
19
|
+
from ...models.entities.resource import Resource
|
|
20
|
+
from ...models.entities.message import Message
|
|
21
|
+
from ...models.entities.user import User
|
|
22
|
+
from ...services.postgres.repository import Repository
|
|
23
|
+
from ...services.postgres.service import PostgresService
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
async def update_user_model(
|
|
27
|
+
user_id: str,
|
|
28
|
+
db: PostgresService,
|
|
29
|
+
default_model: str = "gpt-4o",
|
|
30
|
+
time_window_days: int = 30,
|
|
31
|
+
max_messages: int = 100,
|
|
32
|
+
max_moments: int = 20,
|
|
33
|
+
max_resources: int = 20,
|
|
34
|
+
) -> dict[str, Any]:
|
|
35
|
+
"""
|
|
36
|
+
Update user model from recent activity.
|
|
37
|
+
|
|
38
|
+
Reads recent messages, moments, and resources to generate
|
|
39
|
+
a comprehensive user profile summary using LLM analysis.
|
|
40
|
+
|
|
41
|
+
Process:
|
|
42
|
+
1. Query PostgreSQL for recent messages, moments, resources for this user
|
|
43
|
+
2. Load UserProfileBuilder agent schema
|
|
44
|
+
3. Generate user profile using LLM
|
|
45
|
+
4. Update User entity with profile data and metadata
|
|
46
|
+
5. Add graph edges to key resources and moments
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
user_id: User to process
|
|
50
|
+
db: Database service (already connected)
|
|
51
|
+
default_model: LLM model for analysis (default: gpt-4o)
|
|
52
|
+
time_window_days: Days to look back for activity (default: 30)
|
|
53
|
+
max_messages: Max messages to analyze
|
|
54
|
+
max_moments: Max moments to include
|
|
55
|
+
max_resources: Max resources to include
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
Statistics about user model update
|
|
59
|
+
"""
|
|
60
|
+
cutoff = datetime.utcnow() - timedelta(days=time_window_days)
|
|
61
|
+
|
|
62
|
+
# Create repositories
|
|
63
|
+
message_repo = Repository(Message, "messages", db=db)
|
|
64
|
+
moment_repo = Repository(Moment, "moments", db=db)
|
|
65
|
+
resource_repo = Repository(Resource, "resources", db=db)
|
|
66
|
+
user_repo = Repository(User, "users", db=db)
|
|
67
|
+
|
|
68
|
+
# Build filters using user_id
|
|
69
|
+
filters = {"user_id": user_id}
|
|
70
|
+
|
|
71
|
+
# Query recent messages
|
|
72
|
+
messages = await message_repo.find(
|
|
73
|
+
filters=filters,
|
|
74
|
+
order_by="created_at DESC",
|
|
75
|
+
limit=max_messages,
|
|
76
|
+
)
|
|
77
|
+
# Filter by cutoff (both are UTC-naive)
|
|
78
|
+
messages = [m for m in messages if m.created_at >= cutoff]
|
|
79
|
+
|
|
80
|
+
# Query recent moments
|
|
81
|
+
moments = await moment_repo.find(
|
|
82
|
+
filters=filters,
|
|
83
|
+
order_by="starts_timestamp DESC",
|
|
84
|
+
limit=max_moments,
|
|
85
|
+
)
|
|
86
|
+
# Filter by cutoff (both are UTC-naive)
|
|
87
|
+
moments = [m for m in moments if m.starts_timestamp >= cutoff]
|
|
88
|
+
|
|
89
|
+
# Query recent resources
|
|
90
|
+
resources = await resource_repo.find(
|
|
91
|
+
filters=filters,
|
|
92
|
+
order_by="created_at DESC",
|
|
93
|
+
limit=max_resources,
|
|
94
|
+
)
|
|
95
|
+
# Filter by cutoff (both are UTC-naive)
|
|
96
|
+
resources = [r for r in resources if r.created_at and r.created_at >= cutoff]
|
|
97
|
+
|
|
98
|
+
if not messages and not moments and not resources:
|
|
99
|
+
return {
|
|
100
|
+
"user_id": user_id,
|
|
101
|
+
"time_window_days": time_window_days,
|
|
102
|
+
"messages_analyzed": 0,
|
|
103
|
+
"moments_included": 0,
|
|
104
|
+
"resources_included": 0,
|
|
105
|
+
"user_updated": False,
|
|
106
|
+
"status": "no_data",
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
logger.info(
|
|
110
|
+
f"Building user profile for {user_id}: "
|
|
111
|
+
f"{len(messages)} messages, {len(moments)} moments, {len(resources)} resources"
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
# Load UserProfileBuilder agent schema
|
|
115
|
+
schema_path = (
|
|
116
|
+
Path(__file__).parent.parent.parent
|
|
117
|
+
/ "schemas"
|
|
118
|
+
/ "agents"
|
|
119
|
+
/ "core"
|
|
120
|
+
/ "user-profile-builder.yaml"
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
if not schema_path.exists():
|
|
124
|
+
raise FileNotFoundError(f"UserProfileBuilder schema not found: {schema_path}")
|
|
125
|
+
|
|
126
|
+
with open(schema_path) as f:
|
|
127
|
+
agent_schema = yaml.safe_load(f)
|
|
128
|
+
|
|
129
|
+
# Prepare input data for agent
|
|
130
|
+
input_data = {
|
|
131
|
+
"user_id": user_id,
|
|
132
|
+
"time_window_days": time_window_days,
|
|
133
|
+
"messages": [
|
|
134
|
+
{
|
|
135
|
+
"id": str(m.id),
|
|
136
|
+
"session_id": m.session_id,
|
|
137
|
+
"message_type": m.message_type,
|
|
138
|
+
"content": m.content[:500] if m.content else "", # Limit for token efficiency
|
|
139
|
+
"created_at": m.created_at.isoformat(),
|
|
140
|
+
}
|
|
141
|
+
for m in messages
|
|
142
|
+
],
|
|
143
|
+
"moments": [
|
|
144
|
+
{
|
|
145
|
+
"id": str(m.id),
|
|
146
|
+
"name": m.name,
|
|
147
|
+
"moment_type": m.moment_type,
|
|
148
|
+
"emotion_tags": m.emotion_tags,
|
|
149
|
+
"topic_tags": m.topic_tags,
|
|
150
|
+
"present_persons": [
|
|
151
|
+
{"id": str(p.id), "name": p.name, "role": p.role}
|
|
152
|
+
for p in m.present_persons
|
|
153
|
+
],
|
|
154
|
+
"starts_timestamp": m.starts_timestamp.isoformat(),
|
|
155
|
+
"summary": m.summary[:300] if m.summary else "",
|
|
156
|
+
}
|
|
157
|
+
for m in moments
|
|
158
|
+
],
|
|
159
|
+
"resources": [
|
|
160
|
+
{
|
|
161
|
+
"id": str(r.id),
|
|
162
|
+
"name": r.name,
|
|
163
|
+
"category": r.category,
|
|
164
|
+
"content": r.content[:1000] if r.content else "", # First 1000 chars
|
|
165
|
+
"created_at": (
|
|
166
|
+
r.created_at.isoformat() if r.created_at else None
|
|
167
|
+
),
|
|
168
|
+
}
|
|
169
|
+
for r in resources
|
|
170
|
+
],
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
# Create and run UserProfileBuilder agent
|
|
174
|
+
agent_runtime = await create_agent(
|
|
175
|
+
agent_schema_override=agent_schema,
|
|
176
|
+
model_override=default_model, # type: ignore[arg-type]
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
result = await agent_runtime.run(json.dumps(input_data, indent=2))
|
|
180
|
+
|
|
181
|
+
# Serialize result (critical for Pydantic models!)
|
|
182
|
+
raw_result = serialize_agent_result(result.output)
|
|
183
|
+
|
|
184
|
+
# Ensure result is a dict (agent should always return structured data)
|
|
185
|
+
if not isinstance(raw_result, dict):
|
|
186
|
+
raise ValueError(f"Expected dict from user profile agent, got {type(raw_result)}")
|
|
187
|
+
|
|
188
|
+
profile_data: dict[str, Any] = raw_result
|
|
189
|
+
|
|
190
|
+
logger.info(
|
|
191
|
+
f"Generated user profile. Summary: {profile_data.get('summary', '')[:100]}..."
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
# Get or create User entity
|
|
195
|
+
# Use find() with user_id filter instead of get_by_id() since user_id is a string, not UUID
|
|
196
|
+
existing_users = await user_repo.find(filters={"user_id": user_id}, limit=1)
|
|
197
|
+
user = existing_users[0] if existing_users else None
|
|
198
|
+
|
|
199
|
+
if not user:
|
|
200
|
+
# Create new user (id will be auto-generated as UUID)
|
|
201
|
+
user = User(
|
|
202
|
+
tenant_id=user_id, # Set tenant_id = user_id
|
|
203
|
+
user_id=user_id,
|
|
204
|
+
name=user_id, # Default to user_id, can be updated later
|
|
205
|
+
metadata={},
|
|
206
|
+
graph_edges=[],
|
|
207
|
+
created_at=datetime.utcnow(),
|
|
208
|
+
updated_at=datetime.utcnow(),
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
# Update user metadata with full profile
|
|
212
|
+
user.metadata = user.metadata or {}
|
|
213
|
+
user.metadata.update(
|
|
214
|
+
{
|
|
215
|
+
"profile": profile_data,
|
|
216
|
+
"profile_generated_at": datetime.utcnow().isoformat(),
|
|
217
|
+
"profile_time_window_days": time_window_days,
|
|
218
|
+
}
|
|
219
|
+
)
|
|
220
|
+
|
|
221
|
+
# Update user model fields from profile
|
|
222
|
+
user.summary = profile_data.get("summary", "")
|
|
223
|
+
user.interests = [
|
|
224
|
+
area for area in profile_data.get("expertise_areas", [])
|
|
225
|
+
] + [
|
|
226
|
+
interest for interest in profile_data.get("learning_interests", [])
|
|
227
|
+
]
|
|
228
|
+
user.preferred_topics = profile_data.get("recommended_tags", [])
|
|
229
|
+
|
|
230
|
+
# Determine activity level based on data volume
|
|
231
|
+
total_activity = len(messages) + len(moments) + len(resources)
|
|
232
|
+
if total_activity >= 50:
|
|
233
|
+
user.activity_level = "active"
|
|
234
|
+
elif total_activity >= 10:
|
|
235
|
+
user.activity_level = "moderate"
|
|
236
|
+
else:
|
|
237
|
+
user.activity_level = "inactive"
|
|
238
|
+
|
|
239
|
+
user.last_active_at = datetime.utcnow()
|
|
240
|
+
|
|
241
|
+
# Build graph edges to key resources and moments
|
|
242
|
+
from .utils import merge_graph_edges
|
|
243
|
+
|
|
244
|
+
graph_edges = []
|
|
245
|
+
|
|
246
|
+
# Add edges to recent resources (top 5)
|
|
247
|
+
for resource in resources[:5]:
|
|
248
|
+
graph_edges.append(
|
|
249
|
+
{
|
|
250
|
+
"dst": str(resource.id), # Convert UUID to string
|
|
251
|
+
"rel_type": "recently_worked_on",
|
|
252
|
+
"weight": 0.8,
|
|
253
|
+
"properties": {
|
|
254
|
+
"entity_type": "resource",
|
|
255
|
+
"dst_name": resource.name,
|
|
256
|
+
"dst_category": resource.category,
|
|
257
|
+
},
|
|
258
|
+
"created_at": datetime.utcnow().isoformat(),
|
|
259
|
+
}
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
# Add edges to recent moments (top 5)
|
|
263
|
+
for moment in moments[:5]:
|
|
264
|
+
graph_edges.append(
|
|
265
|
+
{
|
|
266
|
+
"dst": str(moment.id), # Convert UUID to string
|
|
267
|
+
"rel_type": "participated_in",
|
|
268
|
+
"weight": 0.9,
|
|
269
|
+
"properties": {
|
|
270
|
+
"entity_type": "moment",
|
|
271
|
+
"dst_name": moment.name,
|
|
272
|
+
"dst_moment_type": moment.moment_type,
|
|
273
|
+
},
|
|
274
|
+
"created_at": datetime.utcnow().isoformat(),
|
|
275
|
+
}
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
# Merge edges with existing
|
|
279
|
+
user.graph_edges = merge_graph_edges(user.graph_edges or [], graph_edges)
|
|
280
|
+
user.updated_at = datetime.utcnow()
|
|
281
|
+
|
|
282
|
+
# Save user
|
|
283
|
+
await user_repo.upsert(user)
|
|
284
|
+
|
|
285
|
+
return {
|
|
286
|
+
"user_id": user_id,
|
|
287
|
+
"time_window_days": time_window_days,
|
|
288
|
+
"messages_analyzed": len(messages),
|
|
289
|
+
"moments_included": len(moments),
|
|
290
|
+
"resources_included": len(resources),
|
|
291
|
+
"current_projects": len(profile_data.get("current_projects", [])),
|
|
292
|
+
"technical_stack_size": len(profile_data.get("technical_stack", [])),
|
|
293
|
+
"key_collaborators": len(profile_data.get("key_collaborators", [])),
|
|
294
|
+
"graph_edges_added": len(graph_edges),
|
|
295
|
+
"user_updated": True,
|
|
296
|
+
"status": "success",
|
|
297
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Dreaming utilities - Common functions for dreaming services.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def merge_graph_edges(
|
|
9
|
+
existing_edges: list[dict[str, Any]], new_edges: list[dict[str, Any]]
|
|
10
|
+
) -> list[dict[str, Any]]:
|
|
11
|
+
"""
|
|
12
|
+
Merge graph edges with deduplication.
|
|
13
|
+
|
|
14
|
+
Keep highest weight edge for each (dst, rel_type) pair.
|
|
15
|
+
This prevents duplicate edges while preserving the strongest relationships.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
existing_edges: Current edges on the resource
|
|
19
|
+
new_edges: New edges to add
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
Merged list of edges with duplicates removed
|
|
23
|
+
"""
|
|
24
|
+
edges_map: dict[tuple[str, str], dict[str, Any]] = {}
|
|
25
|
+
|
|
26
|
+
# Add existing edges
|
|
27
|
+
for edge in existing_edges:
|
|
28
|
+
key = (edge.get("dst", ""), edge.get("rel_type", ""))
|
|
29
|
+
edges_map[key] = edge
|
|
30
|
+
|
|
31
|
+
# Add new edges (replace if higher weight)
|
|
32
|
+
for edge in new_edges:
|
|
33
|
+
key = (edge.get("dst", ""), edge.get("rel_type", ""))
|
|
34
|
+
if key not in edges_map or edge.get("weight", 0) > edges_map[key].get(
|
|
35
|
+
"weight", 0
|
|
36
|
+
):
|
|
37
|
+
edges_map[key] = edge
|
|
38
|
+
|
|
39
|
+
return list(edges_map.values())
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""Embeddings service for background embedding generation."""
|
|
2
|
+
|
|
3
|
+
from .api import generate_embedding, generate_embedding_async
|
|
4
|
+
from .worker import EmbeddingTask, EmbeddingWorker
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"EmbeddingTask",
|
|
8
|
+
"EmbeddingWorker",
|
|
9
|
+
"generate_embedding",
|
|
10
|
+
"generate_embedding_async",
|
|
11
|
+
]
|