remdb 0.3.242__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +129 -0
- rem/agentic/README.md +760 -0
- rem/agentic/__init__.py +54 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +38 -0
- rem/agentic/agents/agent_manager.py +311 -0
- rem/agentic/agents/sse_simulator.py +502 -0
- rem/agentic/context.py +425 -0
- rem/agentic/context_builder.py +360 -0
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +273 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +240 -0
- rem/agentic/providers/phoenix.py +926 -0
- rem/agentic/providers/pydantic_ai.py +854 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +737 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +242 -0
- rem/api/README.md +657 -0
- rem/api/deps.py +253 -0
- rem/api/main.py +460 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +820 -0
- rem/api/mcp_router/server.py +243 -0
- rem/api/mcp_router/tools.py +1605 -0
- rem/api/middleware/tracking.py +172 -0
- rem/api/routers/admin.py +520 -0
- rem/api/routers/auth.py +898 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/child_streaming.py +394 -0
- rem/api/routers/chat/completions.py +702 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +202 -0
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +546 -0
- rem/api/routers/chat/streaming.py +950 -0
- rem/api/routers/chat/streaming_utils.py +327 -0
- rem/api/routers/common.py +18 -0
- rem/api/routers/dev.py +87 -0
- rem/api/routers/feedback.py +276 -0
- rem/api/routers/messages.py +620 -0
- rem/api/routers/models.py +86 -0
- rem/api/routers/query.py +362 -0
- rem/api/routers/shared_sessions.py +422 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +36 -0
- rem/auth/jwt.py +367 -0
- rem/auth/middleware.py +318 -0
- rem/auth/providers/__init__.py +16 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/email.py +215 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +517 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +299 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +549 -0
- rem/cli/commands/cluster.py +1808 -0
- rem/cli/commands/configure.py +495 -0
- rem/cli/commands/db.py +828 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1698 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +388 -0
- rem/cli/commands/query.py +109 -0
- rem/cli/commands/scaffold.py +47 -0
- rem/cli/commands/schema.py +230 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/commands/session.py +453 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +123 -0
- rem/config.py +244 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +70 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +672 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +246 -0
- rem/models/entities/__init__.py +68 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +64 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +181 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/session.py +84 -0
- rem/models/entities/shared_session.py +180 -0
- rem/models/entities/subscriber.py +175 -0
- rem/models/entities/user.py +93 -0
- rem/py.typed +0 -0
- rem/registry.py +373 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/agent-builder.yaml +235 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +132 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +18 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +760 -0
- rem/services/content/service.py +762 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +322 -0
- rem/services/dreaming/moment_service.py +251 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/email/__init__.py +10 -0
- rem/services/email/service.py +522 -0
- rem/services/email/templates.py +360 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +127 -0
- rem/services/embeddings/worker.py +435 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +960 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +757 -0
- rem/services/postgres/__init__.py +49 -0
- rem/services/postgres/diff_service.py +599 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/programmable_diff_service.py +635 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
- rem/services/postgres/register_type.py +353 -0
- rem/services/postgres/repository.py +481 -0
- rem/services/postgres/schema_generator.py +661 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +355 -0
- rem/services/rate_limit.py +113 -0
- rem/services/rem/README.md +318 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +180 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +608 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +13 -0
- rem/services/session/compression.py +488 -0
- rem/services/session/pydantic_messages.py +310 -0
- rem/services/session/reload.py +85 -0
- rem/services/user_service.py +130 -0
- rem/settings.py +1877 -0
- rem/sql/background_indexes.sql +52 -0
- rem/sql/migrations/001_install.sql +983 -0
- rem/sql/migrations/002_install_models.sql +3157 -0
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +282 -0
- rem/sql/migrations/005_schema_update.sql +145 -0
- rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +628 -0
- rem/utils/__init__.py +61 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/constants.py +97 -0
- rem/utils/date_utils.py +228 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +436 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/files.py +323 -0
- rem/utils/markdown.py +16 -0
- rem/utils/mime_types.py +158 -0
- rem/utils/model_helpers.py +492 -0
- rem/utils/schema_loader.py +649 -0
- rem/utils/sql_paths.py +146 -0
- rem/utils/sql_types.py +350 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +325 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +7 -0
- rem/workers/db_listener.py +579 -0
- rem/workers/db_maintainer.py +74 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- rem/workers/unlogged_maintainer.py +463 -0
- remdb-0.3.242.dist-info/METADATA +1632 -0
- remdb-0.3.242.dist-info/RECORD +235 -0
- remdb-0.3.242.dist-info/WHEEL +4 -0
- remdb-0.3.242.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"""
|
|
2
|
+
InlineEdge - Knowledge graph edge representation.
|
|
3
|
+
|
|
4
|
+
REM uses human-readable entity labels instead of UUIDs for graph edges,
|
|
5
|
+
enabling natural language queries without schema knowledge.
|
|
6
|
+
|
|
7
|
+
Key Design Decision:
|
|
8
|
+
- dst field contains LABELS (e.g., "sarah-chen", "tidb-migration-spec")
|
|
9
|
+
- NOT UUIDs (e.g., "550e8400-e29b-41d4-a716-446655440000")
|
|
10
|
+
- This enables LOOKUP operations on labels directly
|
|
11
|
+
- LLMs can query "LOOKUP sarah-chen" without knowing internal IDs
|
|
12
|
+
|
|
13
|
+
Edge Weight Guidelines:
|
|
14
|
+
- 1.0: Primary/strong relationships (authored_by, owns, part_of)
|
|
15
|
+
- 0.8-0.9: Important relationships (depends_on, reviewed_by, implements)
|
|
16
|
+
- 0.5-0.7: Secondary relationships (references, related_to, inspired_by)
|
|
17
|
+
- 0.3-0.4: Weak relationships (mentions, cites)
|
|
18
|
+
|
|
19
|
+
Destination Entity Type Convention (CRITICAL - properties.dst_entity_type):
|
|
20
|
+
|
|
21
|
+
Format: <table_schema>:<category>/<key>
|
|
22
|
+
|
|
23
|
+
Where:
|
|
24
|
+
- table_schema: Database table (resources, moments, users, etc.)
|
|
25
|
+
- category: Optional entity category within that table
|
|
26
|
+
- key: The actual entity key (must match dst field)
|
|
27
|
+
|
|
28
|
+
Examples:
|
|
29
|
+
- "resources:managers/bob" → Look up bob in resources table with category="managers"
|
|
30
|
+
- "users:engineers/sarah-chen" → Look up sarah-chen in users table with category="engineers"
|
|
31
|
+
- "moments:meetings/standup-2024-01" → Look up in moments table with category="meetings"
|
|
32
|
+
- "resources/api-design-v2" → Look up api-design-v2 in resources table (no category)
|
|
33
|
+
- "bob" → Defaults to resources table, no category (use sparingly)
|
|
34
|
+
|
|
35
|
+
IMPORTANT - Upsert Rules:
|
|
36
|
+
1. When upserting referenced entities, parse dst_entity_type to determine:
|
|
37
|
+
- table_schema → which table to upsert into
|
|
38
|
+
- category → set the 'category' field in that table
|
|
39
|
+
- key → match against entity_key_field (usually 'name' or 'id')
|
|
40
|
+
|
|
41
|
+
2. If dst_entity_type is missing or just a type like "managers":
|
|
42
|
+
- Default table_schema to "resources"
|
|
43
|
+
- Set category to the type (e.g., "managers")
|
|
44
|
+
- Use dst as the key
|
|
45
|
+
|
|
46
|
+
3. Agents should NEVER guess entity types
|
|
47
|
+
- If type is unknown, omit dst_entity_type or set to null
|
|
48
|
+
- Better to have no category than wrong category
|
|
49
|
+
- System will handle entities without categories
|
|
50
|
+
|
|
51
|
+
4. Category is optional and can be null - this is perfectly fine
|
|
52
|
+
- Categories enable filtering but are not required for graph traversal
|
|
53
|
+
- Use categories when they add semantic value (roles, types, domains)
|
|
54
|
+
|
|
55
|
+
Edge Type Format Guidelines (rel_type):
|
|
56
|
+
- Use snake_case: "authored_by", "depends_on", "references"
|
|
57
|
+
- Be specific but consistent: "reviewed_by" not "reviewed"
|
|
58
|
+
- Use passive voice for bidirectional clarity: "authored_by" (reverse: "authors")
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
from datetime import datetime, timezone
|
|
62
|
+
from typing import Optional
|
|
63
|
+
|
|
64
|
+
from pydantic import BaseModel, ConfigDict, Field
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class InlineEdge(BaseModel):
|
|
68
|
+
"""
|
|
69
|
+
Knowledge graph edge with human-readable destination labels.
|
|
70
|
+
|
|
71
|
+
Stores relationships between entities using natural language labels
|
|
72
|
+
instead of UUIDs, enabling conversational queries.
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
dst: str = Field(
|
|
76
|
+
...,
|
|
77
|
+
description="Human-readable destination key matching the entity's name/id field (e.g., 'tidb-migration-spec', 'sarah-chen', 'bob')",
|
|
78
|
+
)
|
|
79
|
+
rel_type: str = Field(
|
|
80
|
+
...,
|
|
81
|
+
description="Relationship type in snake_case (e.g., 'authored_by', 'depends_on', 'references')",
|
|
82
|
+
)
|
|
83
|
+
weight: float = Field(
|
|
84
|
+
default=0.5,
|
|
85
|
+
ge=0.0,
|
|
86
|
+
le=1.0,
|
|
87
|
+
description="Relationship strength: 1.0=primary, 0.8-0.9=important, 0.5-0.7=secondary, 0.3-0.4=weak",
|
|
88
|
+
)
|
|
89
|
+
properties: dict = Field(
|
|
90
|
+
default_factory=dict,
|
|
91
|
+
description=(
|
|
92
|
+
"Rich metadata. CRITICAL field: dst_entity_type with format 'table_schema:category/key' "
|
|
93
|
+
"(e.g., 'resources:managers/bob', 'users:engineers/sarah-chen'). "
|
|
94
|
+
"Used to determine upsert target table and category. Can be null/omitted if unknown."
|
|
95
|
+
),
|
|
96
|
+
)
|
|
97
|
+
created_at: datetime = Field(
|
|
98
|
+
default_factory=lambda: datetime.now(timezone.utc).replace(tzinfo=None), description="Edge creation timestamp"
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class InlineEdges(BaseModel):
|
|
103
|
+
"""
|
|
104
|
+
Collection of InlineEdge objects.
|
|
105
|
+
|
|
106
|
+
Used for structured edge operations and batch processing.
|
|
107
|
+
"""
|
|
108
|
+
|
|
109
|
+
edges: list[InlineEdge] = Field(
|
|
110
|
+
default_factory=list, description="List of graph edges"
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
def add_edge(
|
|
114
|
+
self,
|
|
115
|
+
dst: str,
|
|
116
|
+
rel_type: str,
|
|
117
|
+
weight: float = 0.5,
|
|
118
|
+
properties: Optional[dict] = None,
|
|
119
|
+
) -> None:
|
|
120
|
+
"""Add a new edge to the collection."""
|
|
121
|
+
edge = InlineEdge(
|
|
122
|
+
dst=dst, rel_type=rel_type, weight=weight, properties=properties or {}
|
|
123
|
+
)
|
|
124
|
+
self.edges.append(edge)
|
|
125
|
+
|
|
126
|
+
def filter_by_rel_type(self, rel_types: list[str]) -> list[InlineEdge]:
|
|
127
|
+
"""Filter edges by relationship types."""
|
|
128
|
+
return [edge for edge in self.edges if edge.rel_type in rel_types]
|
|
129
|
+
|
|
130
|
+
def filter_by_weight(self, min_weight: float = 0.0) -> list[InlineEdge]:
|
|
131
|
+
"""Filter edges by minimum weight threshold."""
|
|
132
|
+
return [edge for edge in self.edges if edge.weight >= min_weight]
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
"""
|
|
2
|
+
REM Query Models
|
|
3
|
+
|
|
4
|
+
REM provides schema-agnostic query operations optimized for LLM-augmented
|
|
5
|
+
iterated retrieval. Unlike traditional SQL, REM queries work with natural
|
|
6
|
+
language labels instead of UUIDs and support multi-turn exploration.
|
|
7
|
+
|
|
8
|
+
Query Types (Performance Contract):
|
|
9
|
+
- LOOKUP: O(1) schema-agnostic entity resolution
|
|
10
|
+
- FUZZY: Indexed fuzzy text matching across all entities
|
|
11
|
+
- SEARCH: Indexed semantic vector search
|
|
12
|
+
- SQL: Direct table queries (provider dialect)
|
|
13
|
+
- TRAVERSE: Iterative O(1) lookups on graph edges
|
|
14
|
+
|
|
15
|
+
Key Design Principles:
|
|
16
|
+
1. Natural language surface area (labels, not UUIDs)
|
|
17
|
+
2. Schema-agnostic operations (no table name required for LOOKUP/FUZZY/TRAVERSE)
|
|
18
|
+
3. Multi-turn iteration with stage tracking and memos
|
|
19
|
+
4. O(1) performance guarantees for entity resolution
|
|
20
|
+
|
|
21
|
+
Iterated Retrieval Pattern:
|
|
22
|
+
- Stage 1: Find entry point (LOOKUP/SEARCH)
|
|
23
|
+
- Stage 2: Analyze neighborhood (TRAVERSE DEPTH 0 = PLAN mode)
|
|
24
|
+
- Stage 3: Selective traversal (TRAVERSE with edge filters)
|
|
25
|
+
- Stage 4: Refinement based on results
|
|
26
|
+
|
|
27
|
+
Example Multi-Turn Query:
|
|
28
|
+
```python
|
|
29
|
+
# Turn 1: PLAN mode to analyze edges
|
|
30
|
+
TRAVERSE WITH LOOKUP "sarah chen" DEPTH 0
|
|
31
|
+
|
|
32
|
+
# Turn 2: Follow specific edge types
|
|
33
|
+
TRAVERSE manages,mentors WITH LOOKUP "sarah chen" DEPTH 2
|
|
34
|
+
|
|
35
|
+
# Turn 3: Refine based on results
|
|
36
|
+
TRAVERSE authored_by WITH LOOKUP "api-design-v2" DEPTH 1
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
REM Query Contract (MANDATORY for all providers):
|
|
40
|
+
| Query Type | Performance | Schema | Multi-Match | Required |
|
|
41
|
+
|------------|-------------|--------|-------------|----------|
|
|
42
|
+
| LOOKUP | O(1) | Agnostic | Yes | ✅ |
|
|
43
|
+
| FUZZY | Indexed | Agnostic | Yes | ✅ |
|
|
44
|
+
| SEARCH | Indexed | Specific | Yes | ✅ |
|
|
45
|
+
| SQL | O(n) | Specific | No | ✅ |
|
|
46
|
+
| TRAVERSE | O(k) | Agnostic | Yes | ✅ |
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
from enum import Enum
|
|
50
|
+
from typing import Any, Optional, Union
|
|
51
|
+
|
|
52
|
+
from pydantic import BaseModel, Field
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class QueryType(str, Enum):
|
|
56
|
+
"""
|
|
57
|
+
REM query types.
|
|
58
|
+
|
|
59
|
+
Each type has specific performance and schema requirements
|
|
60
|
+
defined in the REM contract.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
LOOKUP = "LOOKUP"
|
|
64
|
+
FUZZY = "FUZZY"
|
|
65
|
+
SEARCH = "SEARCH"
|
|
66
|
+
SQL = "SQL"
|
|
67
|
+
TRAVERSE = "TRAVERSE"
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class LookupParameters(BaseModel):
|
|
71
|
+
"""
|
|
72
|
+
LOOKUP query parameters.
|
|
73
|
+
|
|
74
|
+
Performance: O(1) per key
|
|
75
|
+
Schema: Agnostic - No table name required
|
|
76
|
+
Multi-match: Returns entities from ALL tables with matching keys
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
key: Union[str, list[str]] = Field(
|
|
80
|
+
..., description="Entity identifier(s) - single key or list of keys (natural language labels)"
|
|
81
|
+
)
|
|
82
|
+
user_id: Optional[str] = Field(
|
|
83
|
+
default=None, description="Optional user ID filter for multi-user tenants"
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
class FuzzyParameters(BaseModel):
|
|
88
|
+
"""
|
|
89
|
+
FUZZY query parameters.
|
|
90
|
+
|
|
91
|
+
Performance: Indexed - FTS or trigram index required
|
|
92
|
+
Schema: Agnostic - Searches across all entity names
|
|
93
|
+
Multi-match: Returns entities from ALL tables matching fuzzy pattern
|
|
94
|
+
"""
|
|
95
|
+
|
|
96
|
+
query_text: str = Field(..., description="Fuzzy search text")
|
|
97
|
+
threshold: float = Field(
|
|
98
|
+
default=0.5, ge=0.0, le=1.0, description="Similarity threshold"
|
|
99
|
+
)
|
|
100
|
+
limit: int = Field(default=5, gt=0, description="Maximum results")
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class SearchParameters(BaseModel):
|
|
104
|
+
"""
|
|
105
|
+
SEARCH query parameters.
|
|
106
|
+
|
|
107
|
+
Performance: Indexed - Vector index required (IVF, HNSW)
|
|
108
|
+
Schema: Table-specific - Requires table name
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
query_text: str = Field(..., description="Semantic search query")
|
|
112
|
+
table_name: str = Field(..., description="Table to search (resources, moments, etc.)")
|
|
113
|
+
limit: int = Field(default=10, gt=0, description="Maximum results")
|
|
114
|
+
min_similarity: float = Field(
|
|
115
|
+
default=0.3, ge=0.0, le=1.0, description="Minimum similarity score (0.3 recommended for general queries)"
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
class SQLParameters(BaseModel):
|
|
120
|
+
"""
|
|
121
|
+
SQL query parameters.
|
|
122
|
+
|
|
123
|
+
Performance: O(n) - Table scan with optional indexes
|
|
124
|
+
Schema: Table-specific - Requires table name and column knowledge
|
|
125
|
+
Provider-specific: Uses native SQL dialect
|
|
126
|
+
|
|
127
|
+
Supports two modes:
|
|
128
|
+
1. Structured: table_name + where_clause + order_by + limit
|
|
129
|
+
2. Raw: raw_query (full SQL statement like SELECT...)
|
|
130
|
+
"""
|
|
131
|
+
|
|
132
|
+
raw_query: Optional[str] = Field(
|
|
133
|
+
default=None, description="Raw SQL query (e.g., SELECT * FROM resources WHERE...)"
|
|
134
|
+
)
|
|
135
|
+
table_name: Optional[str] = Field(default=None, description="Table to query (structured mode)")
|
|
136
|
+
where_clause: Optional[str] = Field(
|
|
137
|
+
default=None, description="SQL WHERE clause (structured mode)"
|
|
138
|
+
)
|
|
139
|
+
order_by: Optional[str] = Field(default=None, description="SQL ORDER BY clause (structured mode)")
|
|
140
|
+
limit: Optional[int] = Field(default=None, description="SQL LIMIT (structured mode)")
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
class TraverseParameters(BaseModel):
|
|
144
|
+
"""
|
|
145
|
+
TRAVERSE query parameters.
|
|
146
|
+
|
|
147
|
+
Performance: O(k) where k = number of keys traversed
|
|
148
|
+
Schema: Agnostic - Follows graph edges across tables
|
|
149
|
+
Implementation: Iterative LOOKUP calls on edge destinations
|
|
150
|
+
|
|
151
|
+
Syntax: TRAVERSE {edge_filter} WITH [REM_QUERY] DEPTH [0-N]
|
|
152
|
+
|
|
153
|
+
Depth Modes:
|
|
154
|
+
- 0: PLAN mode (analyze edges without traversal)
|
|
155
|
+
- 1: Single-hop traversal (default)
|
|
156
|
+
- N: Multi-hop traversal (N hops from source)
|
|
157
|
+
|
|
158
|
+
Plan Memo:
|
|
159
|
+
Agent-maintained scratchpad for tracking multi-turn progress.
|
|
160
|
+
Kept terse for fast token generation.
|
|
161
|
+
Example: "Goal: org chart. Step 1: find CEO"
|
|
162
|
+
"""
|
|
163
|
+
|
|
164
|
+
initial_query: str = Field(
|
|
165
|
+
..., description="Initial query to find entry nodes (LOOKUP key, SEARCH text, etc.)"
|
|
166
|
+
)
|
|
167
|
+
edge_types: list[str] = Field(
|
|
168
|
+
default_factory=lambda: ["*"],
|
|
169
|
+
description="Edge types to follow (e.g., ['manages', 'reports-to']). Default: ['*'] (all)",
|
|
170
|
+
)
|
|
171
|
+
max_depth: int = Field(
|
|
172
|
+
default=1, ge=0, description="Maximum traversal depth. 0 = PLAN mode (no traversal)"
|
|
173
|
+
)
|
|
174
|
+
order_by: str = Field(
|
|
175
|
+
default="edge.created_at DESC",
|
|
176
|
+
description="Result ordering (edge.created_at, node.name, edge.weight)",
|
|
177
|
+
)
|
|
178
|
+
limit: int = Field(default=9, gt=0, description="Maximum nodes to return")
|
|
179
|
+
plan_memo: Optional[str] = Field(
|
|
180
|
+
default=None,
|
|
181
|
+
description="Agent's terse scratchpad for tracking multi-turn progress",
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
class RemQuery(BaseModel):
|
|
186
|
+
"""
|
|
187
|
+
REM query plan.
|
|
188
|
+
|
|
189
|
+
Combines query type with type-specific parameters.
|
|
190
|
+
Used by both direct REM queries and ask_rem() natural language interface.
|
|
191
|
+
"""
|
|
192
|
+
|
|
193
|
+
query_type: QueryType = Field(..., description="REM query type")
|
|
194
|
+
parameters: (
|
|
195
|
+
LookupParameters
|
|
196
|
+
| FuzzyParameters
|
|
197
|
+
| SearchParameters
|
|
198
|
+
| SQLParameters
|
|
199
|
+
| TraverseParameters
|
|
200
|
+
) = Field(..., description="Query parameters")
|
|
201
|
+
user_id: Optional[str] = Field(
|
|
202
|
+
default=None,
|
|
203
|
+
description="User identifier (UUID5 hash of email). None = anonymous (shared/public data only)"
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
class TraverseStage(BaseModel):
|
|
208
|
+
"""
|
|
209
|
+
TRAVERSE execution stage information.
|
|
210
|
+
|
|
211
|
+
Captures query execution details for LLM interaction and multi-turn planning.
|
|
212
|
+
"""
|
|
213
|
+
|
|
214
|
+
depth: int = Field(..., description="Traversal depth for this stage")
|
|
215
|
+
executed: str = Field(..., description="Query executed at this stage")
|
|
216
|
+
found: dict[str, int] = Field(
|
|
217
|
+
..., description="Discovery stats (nodes, edges counts)"
|
|
218
|
+
)
|
|
219
|
+
plan_memo: Optional[str] = Field(
|
|
220
|
+
default=None, description="Agent's memo echoed from request"
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
class TraverseResponse(BaseModel):
|
|
225
|
+
"""
|
|
226
|
+
TRAVERSE query response.
|
|
227
|
+
|
|
228
|
+
Returns nodes, execution stages, and metadata for LLM-driven iteration.
|
|
229
|
+
"""
|
|
230
|
+
|
|
231
|
+
nodes: list[dict[str, Any]] = Field(
|
|
232
|
+
default_factory=list, description="Discovered nodes"
|
|
233
|
+
)
|
|
234
|
+
stages: list[TraverseStage] = Field(
|
|
235
|
+
default_factory=list, description="Execution stage information"
|
|
236
|
+
)
|
|
237
|
+
source_nodes: list[str] = Field(
|
|
238
|
+
default_factory=list, description="Initial entry node labels"
|
|
239
|
+
)
|
|
240
|
+
edge_summary: list[tuple[str, str, str]] = Field(
|
|
241
|
+
default_factory=list,
|
|
242
|
+
description="Edge shorthand tuples (src, rel_type, dst) for analysis",
|
|
243
|
+
)
|
|
244
|
+
metadata: dict[str, Any] = Field(
|
|
245
|
+
default_factory=dict, description="Query metadata (total_nodes, max_depth_reached, etc.)"
|
|
246
|
+
)
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""
|
|
2
|
+
REM Entity Models
|
|
3
|
+
|
|
4
|
+
Core entity types for the REM system:
|
|
5
|
+
- Resources: Base content units (documents, conversations, artifacts)
|
|
6
|
+
- ImageResources: Image-specific resources with CLIP embeddings
|
|
7
|
+
- Messages: Communication content
|
|
8
|
+
- Sessions: Conversation sessions (normal or evaluation mode)
|
|
9
|
+
- SharedSessions: Session sharing between users for collaboration
|
|
10
|
+
- Feedback: User feedback on messages/sessions with trace integration
|
|
11
|
+
- Users: User entities
|
|
12
|
+
- Files: File metadata and tracking
|
|
13
|
+
- Moments: Temporal narratives (meetings, coding sessions, conversations)
|
|
14
|
+
- Schemas: Agent schema definitions (JsonSchema specifications for Pydantic AI)
|
|
15
|
+
- Ontologies: Domain-specific extracted knowledge from files
|
|
16
|
+
- OntologyConfigs: User-defined rules for automatic ontology extraction
|
|
17
|
+
|
|
18
|
+
All entities inherit from CoreModel and support:
|
|
19
|
+
- Graph connectivity via InlineEdge
|
|
20
|
+
- Temporal tracking
|
|
21
|
+
- Flexible metadata
|
|
22
|
+
- Natural language labels for conversational queries
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
from .domain_resource import DomainResource
|
|
26
|
+
from .feedback import Feedback, FeedbackCategory
|
|
27
|
+
from .file import File
|
|
28
|
+
from .image_resource import ImageResource
|
|
29
|
+
from .message import Message
|
|
30
|
+
from .moment import Moment
|
|
31
|
+
from .ontology import Ontology
|
|
32
|
+
from .ontology_config import OntologyConfig
|
|
33
|
+
from .resource import Resource
|
|
34
|
+
from .schema import Schema
|
|
35
|
+
from .session import Session, SessionMode
|
|
36
|
+
from .shared_session import (
|
|
37
|
+
SharedSession,
|
|
38
|
+
SharedSessionCreate,
|
|
39
|
+
SharedWithMeResponse,
|
|
40
|
+
SharedWithMeSummary,
|
|
41
|
+
)
|
|
42
|
+
from .subscriber import Subscriber, SubscriberOrigin, SubscriberStatus
|
|
43
|
+
from .user import User, UserTier
|
|
44
|
+
|
|
45
|
+
__all__ = [
|
|
46
|
+
"Resource",
|
|
47
|
+
"DomainResource",
|
|
48
|
+
"ImageResource",
|
|
49
|
+
"Message",
|
|
50
|
+
"Session",
|
|
51
|
+
"SessionMode",
|
|
52
|
+
"SharedSession",
|
|
53
|
+
"SharedSessionCreate",
|
|
54
|
+
"SharedWithMeResponse",
|
|
55
|
+
"SharedWithMeSummary",
|
|
56
|
+
"Feedback",
|
|
57
|
+
"FeedbackCategory",
|
|
58
|
+
"User",
|
|
59
|
+
"UserTier",
|
|
60
|
+
"Subscriber",
|
|
61
|
+
"SubscriberStatus",
|
|
62
|
+
"SubscriberOrigin",
|
|
63
|
+
"File",
|
|
64
|
+
"Moment",
|
|
65
|
+
"Schema",
|
|
66
|
+
"Ontology",
|
|
67
|
+
"OntologyConfig",
|
|
68
|
+
]
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""
|
|
2
|
+
DomainResource - Curated internal knowledge in REM.
|
|
3
|
+
|
|
4
|
+
DomainResources are a specialized subclass of Resource for storing curated,
|
|
5
|
+
domain-specific internal knowledge that is not part of general knowledge.
|
|
6
|
+
This includes proprietary information, internal documentation, institutional
|
|
7
|
+
knowledge, and other content that requires more careful curation.
|
|
8
|
+
|
|
9
|
+
Key Differences from Resource:
|
|
10
|
+
- Intended for curated, internal knowledge (not raw ingested content)
|
|
11
|
+
- Higher quality bar - content is reviewed/vetted before ingestion
|
|
12
|
+
- May contain proprietary or sensitive information
|
|
13
|
+
- Subject to different retention/governance policies
|
|
14
|
+
|
|
15
|
+
Use Cases:
|
|
16
|
+
- Internal documentation and procedures
|
|
17
|
+
- Proprietary research and analysis
|
|
18
|
+
- Institutional knowledge bases
|
|
19
|
+
- Domain-specific ontologies and taxonomies
|
|
20
|
+
- Curated best practices and guidelines
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from .resource import Resource
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class DomainResource(Resource):
|
|
27
|
+
"""
|
|
28
|
+
Curated domain-specific knowledge resource.
|
|
29
|
+
|
|
30
|
+
Inherits all fields from Resource but stored in a separate table
|
|
31
|
+
(domain_resources) to distinguish curated internal knowledge from
|
|
32
|
+
general ingested content.
|
|
33
|
+
|
|
34
|
+
The schema is identical to Resource, allowing seamless migration
|
|
35
|
+
of content between tables as curation status changes.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
pass
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Feedback - User feedback on chat messages and sessions.
|
|
3
|
+
|
|
4
|
+
Feedback allows users to rate and categorize responses, providing
|
|
5
|
+
data for evaluation and model improvement. Feedback can be attached
|
|
6
|
+
to specific messages or entire sessions.
|
|
7
|
+
|
|
8
|
+
Trace Integration:
|
|
9
|
+
- Feedback references trace_id/span_id for OTEL/Phoenix integration
|
|
10
|
+
- Can attach annotations to Phoenix spans for unified observability
|
|
11
|
+
|
|
12
|
+
Predefined Categories (system-defined, extensible):
|
|
13
|
+
- INCOMPLETE: Response lacks expected information
|
|
14
|
+
- INACCURATE: Response contains factual errors
|
|
15
|
+
- POOR_TONE: Inappropriate or unprofessional tone
|
|
16
|
+
- OFF_TOPIC: Response doesn't address the question
|
|
17
|
+
- TOO_VERBOSE: Unnecessarily long response
|
|
18
|
+
- TOO_BRIEF: Insufficiently detailed response
|
|
19
|
+
- HELPFUL: Positive feedback marker
|
|
20
|
+
- EXCELLENT: Exceptionally good response
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from enum import Enum
|
|
24
|
+
from typing import Any
|
|
25
|
+
|
|
26
|
+
from pydantic import Field
|
|
27
|
+
|
|
28
|
+
from ..core import CoreModel
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class FeedbackCategory(str, Enum):
|
|
32
|
+
"""Predefined feedback categories (system-defined)."""
|
|
33
|
+
|
|
34
|
+
# Negative categories
|
|
35
|
+
INCOMPLETE = "incomplete"
|
|
36
|
+
INACCURATE = "inaccurate"
|
|
37
|
+
POOR_TONE = "poor_tone"
|
|
38
|
+
OFF_TOPIC = "off_topic"
|
|
39
|
+
TOO_VERBOSE = "too_verbose"
|
|
40
|
+
TOO_BRIEF = "too_brief"
|
|
41
|
+
CONFUSING = "confusing"
|
|
42
|
+
UNSAFE = "unsafe"
|
|
43
|
+
|
|
44
|
+
# Positive categories
|
|
45
|
+
HELPFUL = "helpful"
|
|
46
|
+
EXCELLENT = "excellent"
|
|
47
|
+
ACCURATE = "accurate"
|
|
48
|
+
WELL_WRITTEN = "well_written"
|
|
49
|
+
|
|
50
|
+
# Neutral/Other
|
|
51
|
+
OTHER = "other"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class Feedback(CoreModel):
|
|
55
|
+
"""
|
|
56
|
+
User feedback on a message or session.
|
|
57
|
+
|
|
58
|
+
Captures structured feedback including:
|
|
59
|
+
- Rating (1-5 scale or thumbs up/down)
|
|
60
|
+
- Categories (predefined or custom)
|
|
61
|
+
- Free-text comment
|
|
62
|
+
- Trace reference for OTEL/Phoenix integration
|
|
63
|
+
|
|
64
|
+
The feedback can be attached to:
|
|
65
|
+
- A specific message (message_id set)
|
|
66
|
+
- An entire session (session_id set, message_id null)
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
# Target reference (at least one required)
|
|
70
|
+
session_id: str = Field(
|
|
71
|
+
...,
|
|
72
|
+
description="Session ID this feedback relates to",
|
|
73
|
+
)
|
|
74
|
+
message_id: str | None = Field(
|
|
75
|
+
default=None,
|
|
76
|
+
description="Specific message ID (null for session-level feedback)",
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
# Rating (flexible: 1-5, or -1/1 for thumbs)
|
|
80
|
+
rating: int | None = Field(
|
|
81
|
+
default=None,
|
|
82
|
+
ge=-1,
|
|
83
|
+
le=5,
|
|
84
|
+
description="Rating: -1 (thumbs down), 1 (thumbs up), or 1-5 scale",
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
# Categories (can select multiple)
|
|
88
|
+
categories: list[str] = Field(
|
|
89
|
+
default_factory=list,
|
|
90
|
+
description="Selected feedback categories (from FeedbackCategory or custom)",
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# Free-text comment
|
|
94
|
+
comment: str | None = Field(
|
|
95
|
+
default=None,
|
|
96
|
+
description="Optional free-text feedback comment",
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
# Trace reference for OTEL/Phoenix integration
|
|
100
|
+
trace_id: str | None = Field(
|
|
101
|
+
default=None,
|
|
102
|
+
description="OTEL trace ID for linking to observability",
|
|
103
|
+
)
|
|
104
|
+
span_id: str | None = Field(
|
|
105
|
+
default=None,
|
|
106
|
+
description="OTEL span ID for specific span feedback",
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# Phoenix annotation status
|
|
110
|
+
phoenix_synced: bool = Field(
|
|
111
|
+
default=False,
|
|
112
|
+
description="Whether feedback has been synced to Phoenix as annotation",
|
|
113
|
+
)
|
|
114
|
+
phoenix_annotation_id: str | None = Field(
|
|
115
|
+
default=None,
|
|
116
|
+
description="Phoenix annotation ID after sync",
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
# Annotator info
|
|
120
|
+
annotator_kind: str = Field(
|
|
121
|
+
default="HUMAN",
|
|
122
|
+
description="Annotator type: HUMAN, LLM, CODE",
|
|
123
|
+
)
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
"""
|
|
2
|
+
File - File metadata and tracking in REM.
|
|
3
|
+
|
|
4
|
+
Files represent uploaded or referenced files (PDFs, images, audio, etc.)
|
|
5
|
+
that are parsed into Resources or used as input to dreaming workflows.
|
|
6
|
+
|
|
7
|
+
File entities track:
|
|
8
|
+
- File metadata (name, size, mime type)
|
|
9
|
+
- Storage location (URI)
|
|
10
|
+
- Processing status
|
|
11
|
+
- Relationships to derived Resources
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from typing import Optional
|
|
15
|
+
|
|
16
|
+
from pydantic import Field
|
|
17
|
+
|
|
18
|
+
from ..core import CoreModel
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class File(CoreModel):
|
|
22
|
+
"""
|
|
23
|
+
File metadata and tracking.
|
|
24
|
+
|
|
25
|
+
Represents files uploaded to or referenced by the REM system,
|
|
26
|
+
tracking their metadata and processing status. Tenant isolation
|
|
27
|
+
is provided via CoreModel.tenant_id field.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
name: str = Field(
|
|
31
|
+
...,
|
|
32
|
+
description="File name",
|
|
33
|
+
)
|
|
34
|
+
uri: str = Field(
|
|
35
|
+
...,
|
|
36
|
+
description="File storage URI (S3, local path, etc.)",
|
|
37
|
+
)
|
|
38
|
+
content: Optional[str] = Field(
|
|
39
|
+
default=None,
|
|
40
|
+
description="Extracted text content (if applicable)",
|
|
41
|
+
)
|
|
42
|
+
timestamp: Optional[str] = Field(
|
|
43
|
+
default=None,
|
|
44
|
+
description="File creation/modification timestamp",
|
|
45
|
+
)
|
|
46
|
+
size_bytes: Optional[int] = Field(
|
|
47
|
+
default=None,
|
|
48
|
+
description="File size in bytes",
|
|
49
|
+
)
|
|
50
|
+
mime_type: Optional[str] = Field(
|
|
51
|
+
default=None,
|
|
52
|
+
description="File MIME type",
|
|
53
|
+
)
|
|
54
|
+
processing_status: Optional[str] = Field(
|
|
55
|
+
default="pending",
|
|
56
|
+
description="File processing status (pending, processing, completed, failed)",
|
|
57
|
+
)
|