remdb 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (187) hide show
  1. rem/__init__.py +2 -0
  2. rem/agentic/README.md +650 -0
  3. rem/agentic/__init__.py +39 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +8 -0
  6. rem/agentic/context.py +148 -0
  7. rem/agentic/context_builder.py +329 -0
  8. rem/agentic/mcp/__init__.py +0 -0
  9. rem/agentic/mcp/tool_wrapper.py +107 -0
  10. rem/agentic/otel/__init__.py +5 -0
  11. rem/agentic/otel/setup.py +151 -0
  12. rem/agentic/providers/phoenix.py +674 -0
  13. rem/agentic/providers/pydantic_ai.py +572 -0
  14. rem/agentic/query.py +117 -0
  15. rem/agentic/query_helper.py +89 -0
  16. rem/agentic/schema.py +396 -0
  17. rem/agentic/serialization.py +245 -0
  18. rem/agentic/tools/__init__.py +5 -0
  19. rem/agentic/tools/rem_tools.py +231 -0
  20. rem/api/README.md +420 -0
  21. rem/api/main.py +324 -0
  22. rem/api/mcp_router/prompts.py +182 -0
  23. rem/api/mcp_router/resources.py +536 -0
  24. rem/api/mcp_router/server.py +213 -0
  25. rem/api/mcp_router/tools.py +584 -0
  26. rem/api/routers/auth.py +229 -0
  27. rem/api/routers/chat/__init__.py +5 -0
  28. rem/api/routers/chat/completions.py +281 -0
  29. rem/api/routers/chat/json_utils.py +76 -0
  30. rem/api/routers/chat/models.py +124 -0
  31. rem/api/routers/chat/streaming.py +185 -0
  32. rem/auth/README.md +258 -0
  33. rem/auth/__init__.py +26 -0
  34. rem/auth/middleware.py +100 -0
  35. rem/auth/providers/__init__.py +13 -0
  36. rem/auth/providers/base.py +376 -0
  37. rem/auth/providers/google.py +163 -0
  38. rem/auth/providers/microsoft.py +237 -0
  39. rem/cli/README.md +455 -0
  40. rem/cli/__init__.py +8 -0
  41. rem/cli/commands/README.md +126 -0
  42. rem/cli/commands/__init__.py +3 -0
  43. rem/cli/commands/ask.py +565 -0
  44. rem/cli/commands/configure.py +423 -0
  45. rem/cli/commands/db.py +493 -0
  46. rem/cli/commands/dreaming.py +324 -0
  47. rem/cli/commands/experiments.py +1124 -0
  48. rem/cli/commands/mcp.py +66 -0
  49. rem/cli/commands/process.py +245 -0
  50. rem/cli/commands/schema.py +183 -0
  51. rem/cli/commands/serve.py +106 -0
  52. rem/cli/dreaming.py +363 -0
  53. rem/cli/main.py +88 -0
  54. rem/config.py +237 -0
  55. rem/mcp_server.py +41 -0
  56. rem/models/core/__init__.py +49 -0
  57. rem/models/core/core_model.py +64 -0
  58. rem/models/core/engram.py +333 -0
  59. rem/models/core/experiment.py +628 -0
  60. rem/models/core/inline_edge.py +132 -0
  61. rem/models/core/rem_query.py +243 -0
  62. rem/models/entities/__init__.py +43 -0
  63. rem/models/entities/file.py +57 -0
  64. rem/models/entities/image_resource.py +88 -0
  65. rem/models/entities/message.py +35 -0
  66. rem/models/entities/moment.py +123 -0
  67. rem/models/entities/ontology.py +191 -0
  68. rem/models/entities/ontology_config.py +131 -0
  69. rem/models/entities/resource.py +95 -0
  70. rem/models/entities/schema.py +87 -0
  71. rem/models/entities/user.py +85 -0
  72. rem/py.typed +0 -0
  73. rem/schemas/README.md +507 -0
  74. rem/schemas/__init__.py +6 -0
  75. rem/schemas/agents/README.md +92 -0
  76. rem/schemas/agents/core/moment-builder.yaml +178 -0
  77. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  78. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  79. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  80. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  81. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  82. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  83. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  84. rem/schemas/agents/examples/hello-world.yaml +37 -0
  85. rem/schemas/agents/examples/query.yaml +54 -0
  86. rem/schemas/agents/examples/simple.yaml +21 -0
  87. rem/schemas/agents/examples/test.yaml +29 -0
  88. rem/schemas/agents/rem.yaml +128 -0
  89. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  90. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  91. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  92. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  93. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  94. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  95. rem/services/__init__.py +16 -0
  96. rem/services/audio/INTEGRATION.md +308 -0
  97. rem/services/audio/README.md +376 -0
  98. rem/services/audio/__init__.py +15 -0
  99. rem/services/audio/chunker.py +354 -0
  100. rem/services/audio/transcriber.py +259 -0
  101. rem/services/content/README.md +1269 -0
  102. rem/services/content/__init__.py +5 -0
  103. rem/services/content/providers.py +806 -0
  104. rem/services/content/service.py +657 -0
  105. rem/services/dreaming/README.md +230 -0
  106. rem/services/dreaming/__init__.py +53 -0
  107. rem/services/dreaming/affinity_service.py +336 -0
  108. rem/services/dreaming/moment_service.py +264 -0
  109. rem/services/dreaming/ontology_service.py +54 -0
  110. rem/services/dreaming/user_model_service.py +297 -0
  111. rem/services/dreaming/utils.py +39 -0
  112. rem/services/embeddings/__init__.py +11 -0
  113. rem/services/embeddings/api.py +120 -0
  114. rem/services/embeddings/worker.py +421 -0
  115. rem/services/fs/README.md +662 -0
  116. rem/services/fs/__init__.py +62 -0
  117. rem/services/fs/examples.py +206 -0
  118. rem/services/fs/examples_paths.py +204 -0
  119. rem/services/fs/git_provider.py +935 -0
  120. rem/services/fs/local_provider.py +760 -0
  121. rem/services/fs/parsing-hooks-examples.md +172 -0
  122. rem/services/fs/paths.py +276 -0
  123. rem/services/fs/provider.py +460 -0
  124. rem/services/fs/s3_provider.py +1042 -0
  125. rem/services/fs/service.py +186 -0
  126. rem/services/git/README.md +1075 -0
  127. rem/services/git/__init__.py +17 -0
  128. rem/services/git/service.py +469 -0
  129. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  130. rem/services/phoenix/README.md +453 -0
  131. rem/services/phoenix/__init__.py +46 -0
  132. rem/services/phoenix/client.py +686 -0
  133. rem/services/phoenix/config.py +88 -0
  134. rem/services/phoenix/prompt_labels.py +477 -0
  135. rem/services/postgres/README.md +575 -0
  136. rem/services/postgres/__init__.py +23 -0
  137. rem/services/postgres/migration_service.py +427 -0
  138. rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
  139. rem/services/postgres/register_type.py +352 -0
  140. rem/services/postgres/repository.py +337 -0
  141. rem/services/postgres/schema_generator.py +379 -0
  142. rem/services/postgres/service.py +802 -0
  143. rem/services/postgres/sql_builder.py +354 -0
  144. rem/services/rem/README.md +304 -0
  145. rem/services/rem/__init__.py +23 -0
  146. rem/services/rem/exceptions.py +71 -0
  147. rem/services/rem/executor.py +293 -0
  148. rem/services/rem/parser.py +145 -0
  149. rem/services/rem/queries.py +196 -0
  150. rem/services/rem/query.py +371 -0
  151. rem/services/rem/service.py +527 -0
  152. rem/services/session/README.md +374 -0
  153. rem/services/session/__init__.py +6 -0
  154. rem/services/session/compression.py +360 -0
  155. rem/services/session/reload.py +77 -0
  156. rem/settings.py +1235 -0
  157. rem/sql/002_install_models.sql +1068 -0
  158. rem/sql/background_indexes.sql +42 -0
  159. rem/sql/install_models.sql +1038 -0
  160. rem/sql/migrations/001_install.sql +503 -0
  161. rem/sql/migrations/002_install_models.sql +1202 -0
  162. rem/utils/AGENTIC_CHUNKING.md +597 -0
  163. rem/utils/README.md +583 -0
  164. rem/utils/__init__.py +43 -0
  165. rem/utils/agentic_chunking.py +622 -0
  166. rem/utils/batch_ops.py +343 -0
  167. rem/utils/chunking.py +108 -0
  168. rem/utils/clip_embeddings.py +276 -0
  169. rem/utils/dict_utils.py +98 -0
  170. rem/utils/embeddings.py +423 -0
  171. rem/utils/examples/embeddings_example.py +305 -0
  172. rem/utils/examples/sql_types_example.py +202 -0
  173. rem/utils/markdown.py +16 -0
  174. rem/utils/model_helpers.py +236 -0
  175. rem/utils/schema_loader.py +229 -0
  176. rem/utils/sql_types.py +348 -0
  177. rem/utils/user_id.py +81 -0
  178. rem/utils/vision.py +330 -0
  179. rem/workers/README.md +506 -0
  180. rem/workers/__init__.py +5 -0
  181. rem/workers/dreaming.py +502 -0
  182. rem/workers/engram_processor.py +312 -0
  183. rem/workers/sqs_file_processor.py +193 -0
  184. remdb-0.2.6.dist-info/METADATA +1191 -0
  185. remdb-0.2.6.dist-info/RECORD +187 -0
  186. remdb-0.2.6.dist-info/WHEEL +4 -0
  187. remdb-0.2.6.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,132 @@
1
+ """
2
+ InlineEdge - Knowledge graph edge representation.
3
+
4
+ REM uses human-readable entity labels instead of UUIDs for graph edges,
5
+ enabling natural language queries without schema knowledge.
6
+
7
+ Key Design Decision:
8
+ - dst field contains LABELS (e.g., "sarah-chen", "tidb-migration-spec")
9
+ - NOT UUIDs (e.g., "550e8400-e29b-41d4-a716-446655440000")
10
+ - This enables LOOKUP operations on labels directly
11
+ - LLMs can query "LOOKUP sarah-chen" without knowing internal IDs
12
+
13
+ Edge Weight Guidelines:
14
+ - 1.0: Primary/strong relationships (authored_by, owns, part_of)
15
+ - 0.8-0.9: Important relationships (depends_on, reviewed_by, implements)
16
+ - 0.5-0.7: Secondary relationships (references, related_to, inspired_by)
17
+ - 0.3-0.4: Weak relationships (mentions, cites)
18
+
19
+ Destination Entity Type Convention (CRITICAL - properties.dst_entity_type):
20
+
21
+ Format: <table_schema>:<category>/<key>
22
+
23
+ Where:
24
+ - table_schema: Database table (resources, moments, users, etc.)
25
+ - category: Optional entity category within that table
26
+ - key: The actual entity key (must match dst field)
27
+
28
+ Examples:
29
+ - "resources:managers/bob" → Look up bob in resources table with category="managers"
30
+ - "users:engineers/sarah-chen" → Look up sarah-chen in users table with category="engineers"
31
+ - "moments:meetings/standup-2024-01" → Look up in moments table with category="meetings"
32
+ - "resources/api-design-v2" → Look up api-design-v2 in resources table (no category)
33
+ - "bob" → Defaults to resources table, no category (use sparingly)
34
+
35
+ IMPORTANT - Upsert Rules:
36
+ 1. When upserting referenced entities, parse dst_entity_type to determine:
37
+ - table_schema → which table to upsert into
38
+ - category → set the 'category' field in that table
39
+ - key → match against entity_key_field (usually 'name' or 'id')
40
+
41
+ 2. If dst_entity_type is missing or just a type like "managers":
42
+ - Default table_schema to "resources"
43
+ - Set category to the type (e.g., "managers")
44
+ - Use dst as the key
45
+
46
+ 3. Agents should NEVER guess entity types
47
+ - If type is unknown, omit dst_entity_type or set to null
48
+ - Better to have no category than wrong category
49
+ - System will handle entities without categories
50
+
51
+ 4. Category is optional and can be null - this is perfectly fine
52
+ - Categories enable filtering but are not required for graph traversal
53
+ - Use categories when they add semantic value (roles, types, domains)
54
+
55
+ Edge Type Format Guidelines (rel_type):
56
+ - Use snake_case: "authored_by", "depends_on", "references"
57
+ - Be specific but consistent: "reviewed_by" not "reviewed"
58
+ - Use passive voice for bidirectional clarity: "authored_by" (reverse: "authors")
59
+ """
60
+
61
+ from datetime import datetime, timezone
62
+ from typing import Optional
63
+
64
+ from pydantic import BaseModel, ConfigDict, Field
65
+
66
+
67
+ class InlineEdge(BaseModel):
68
+ """
69
+ Knowledge graph edge with human-readable destination labels.
70
+
71
+ Stores relationships between entities using natural language labels
72
+ instead of UUIDs, enabling conversational queries.
73
+ """
74
+
75
+ dst: str = Field(
76
+ ...,
77
+ description="Human-readable destination key matching the entity's name/id field (e.g., 'tidb-migration-spec', 'sarah-chen', 'bob')",
78
+ )
79
+ rel_type: str = Field(
80
+ ...,
81
+ description="Relationship type in snake_case (e.g., 'authored_by', 'depends_on', 'references')",
82
+ )
83
+ weight: float = Field(
84
+ default=0.5,
85
+ ge=0.0,
86
+ le=1.0,
87
+ description="Relationship strength: 1.0=primary, 0.8-0.9=important, 0.5-0.7=secondary, 0.3-0.4=weak",
88
+ )
89
+ properties: dict = Field(
90
+ default_factory=dict,
91
+ description=(
92
+ "Rich metadata. CRITICAL field: dst_entity_type with format 'table_schema:category/key' "
93
+ "(e.g., 'resources:managers/bob', 'users:engineers/sarah-chen'). "
94
+ "Used to determine upsert target table and category. Can be null/omitted if unknown."
95
+ ),
96
+ )
97
+ created_at: datetime = Field(
98
+ default_factory=lambda: datetime.now(timezone.utc).replace(tzinfo=None), description="Edge creation timestamp"
99
+ )
100
+
101
+
102
+ class InlineEdges(BaseModel):
103
+ """
104
+ Collection of InlineEdge objects.
105
+
106
+ Used for structured edge operations and batch processing.
107
+ """
108
+
109
+ edges: list[InlineEdge] = Field(
110
+ default_factory=list, description="List of graph edges"
111
+ )
112
+
113
+ def add_edge(
114
+ self,
115
+ dst: str,
116
+ rel_type: str,
117
+ weight: float = 0.5,
118
+ properties: Optional[dict] = None,
119
+ ) -> None:
120
+ """Add a new edge to the collection."""
121
+ edge = InlineEdge(
122
+ dst=dst, rel_type=rel_type, weight=weight, properties=properties or {}
123
+ )
124
+ self.edges.append(edge)
125
+
126
+ def filter_by_rel_type(self, rel_types: list[str]) -> list[InlineEdge]:
127
+ """Filter edges by relationship types."""
128
+ return [edge for edge in self.edges if edge.rel_type in rel_types]
129
+
130
+ def filter_by_weight(self, min_weight: float = 0.0) -> list[InlineEdge]:
131
+ """Filter edges by minimum weight threshold."""
132
+ return [edge for edge in self.edges if edge.weight >= min_weight]
@@ -0,0 +1,243 @@
1
+ """
2
+ REM Query Models
3
+
4
+ REM provides schema-agnostic query operations optimized for LLM-augmented
5
+ iterated retrieval. Unlike traditional SQL, REM queries work with natural
6
+ language labels instead of UUIDs and support multi-turn exploration.
7
+
8
+ Query Types (Performance Contract):
9
+ - LOOKUP: O(1) schema-agnostic entity resolution
10
+ - FUZZY: Indexed fuzzy text matching across all entities
11
+ - SEARCH: Indexed semantic vector search
12
+ - SQL: Direct table queries (provider dialect)
13
+ - TRAVERSE: Iterative O(1) lookups on graph edges
14
+
15
+ Key Design Principles:
16
+ 1. Natural language surface area (labels, not UUIDs)
17
+ 2. Schema-agnostic operations (no table name required for LOOKUP/FUZZY/TRAVERSE)
18
+ 3. Multi-turn iteration with stage tracking and memos
19
+ 4. O(1) performance guarantees for entity resolution
20
+
21
+ Iterated Retrieval Pattern:
22
+ - Stage 1: Find entry point (LOOKUP/SEARCH)
23
+ - Stage 2: Analyze neighborhood (TRAVERSE DEPTH 0 = PLAN mode)
24
+ - Stage 3: Selective traversal (TRAVERSE with edge filters)
25
+ - Stage 4: Refinement based on results
26
+
27
+ Example Multi-Turn Query:
28
+ ```python
29
+ # Turn 1: PLAN mode to analyze edges
30
+ TRAVERSE WITH LOOKUP "sarah chen" DEPTH 0
31
+
32
+ # Turn 2: Follow specific edge types
33
+ TRAVERSE manages,mentors WITH LOOKUP "sarah chen" DEPTH 2
34
+
35
+ # Turn 3: Refine based on results
36
+ TRAVERSE authored_by WITH LOOKUP "api-design-v2" DEPTH 1
37
+ ```
38
+
39
+ REM Query Contract (MANDATORY for all providers):
40
+ | Query Type | Performance | Schema | Multi-Match | Required |
41
+ |------------|-------------|--------|-------------|----------|
42
+ | LOOKUP | O(1) | Agnostic | Yes | ✅ |
43
+ | FUZZY | Indexed | Agnostic | Yes | ✅ |
44
+ | SEARCH | Indexed | Specific | Yes | ✅ |
45
+ | SQL | O(n) | Specific | No | ✅ |
46
+ | TRAVERSE | O(k) | Agnostic | Yes | ✅ |
47
+ """
48
+
49
+ from enum import Enum
50
+ from typing import Any, Optional, Union
51
+
52
+ from pydantic import BaseModel, Field
53
+
54
+
55
+ class QueryType(str, Enum):
56
+ """
57
+ REM query types.
58
+
59
+ Each type has specific performance and schema requirements
60
+ defined in the REM contract.
61
+ """
62
+
63
+ LOOKUP = "LOOKUP"
64
+ FUZZY = "FUZZY"
65
+ SEARCH = "SEARCH"
66
+ SQL = "SQL"
67
+ TRAVERSE = "TRAVERSE"
68
+
69
+
70
+ class LookupParameters(BaseModel):
71
+ """
72
+ LOOKUP query parameters.
73
+
74
+ Performance: O(1) per key
75
+ Schema: Agnostic - No table name required
76
+ Multi-match: Returns entities from ALL tables with matching keys
77
+ """
78
+
79
+ key: Union[str, list[str]] = Field(
80
+ ..., description="Entity identifier(s) - single key or list of keys (natural language labels)"
81
+ )
82
+ user_id: Optional[str] = Field(
83
+ default=None, description="Optional user ID filter for multi-user tenants"
84
+ )
85
+
86
+
87
+ class FuzzyParameters(BaseModel):
88
+ """
89
+ FUZZY query parameters.
90
+
91
+ Performance: Indexed - FTS or trigram index required
92
+ Schema: Agnostic - Searches across all entity names
93
+ Multi-match: Returns entities from ALL tables matching fuzzy pattern
94
+ """
95
+
96
+ query_text: str = Field(..., description="Fuzzy search text")
97
+ threshold: float = Field(
98
+ default=0.5, ge=0.0, le=1.0, description="Similarity threshold"
99
+ )
100
+ limit: int = Field(default=5, gt=0, description="Maximum results")
101
+
102
+
103
+ class SearchParameters(BaseModel):
104
+ """
105
+ SEARCH query parameters.
106
+
107
+ Performance: Indexed - Vector index required (IVF, HNSW)
108
+ Schema: Table-specific - Requires table name
109
+ """
110
+
111
+ query_text: str = Field(..., description="Semantic search query")
112
+ table_name: str = Field(..., description="Table to search (resources, moments, etc.)")
113
+ limit: int = Field(default=10, gt=0, description="Maximum results")
114
+ min_similarity: float = Field(
115
+ default=0.7, ge=0.0, le=1.0, description="Minimum similarity score"
116
+ )
117
+
118
+
119
+ class SQLParameters(BaseModel):
120
+ """
121
+ SQL query parameters.
122
+
123
+ Performance: O(n) - Table scan with optional indexes
124
+ Schema: Table-specific - Requires table name and column knowledge
125
+ Provider-specific: Uses native SQL dialect
126
+
127
+ Supports two modes:
128
+ 1. Structured: table_name + where_clause + order_by + limit
129
+ 2. Raw: raw_query (full SQL statement like SELECT...)
130
+ """
131
+
132
+ raw_query: Optional[str] = Field(
133
+ default=None, description="Raw SQL query (e.g., SELECT * FROM resources WHERE...)"
134
+ )
135
+ table_name: Optional[str] = Field(default=None, description="Table to query (structured mode)")
136
+ where_clause: Optional[str] = Field(
137
+ default=None, description="SQL WHERE clause (structured mode)"
138
+ )
139
+ order_by: Optional[str] = Field(default=None, description="SQL ORDER BY clause (structured mode)")
140
+ limit: Optional[int] = Field(default=None, description="SQL LIMIT (structured mode)")
141
+
142
+
143
+ class TraverseParameters(BaseModel):
144
+ """
145
+ TRAVERSE query parameters.
146
+
147
+ Performance: O(k) where k = number of keys traversed
148
+ Schema: Agnostic - Follows graph edges across tables
149
+ Implementation: Iterative LOOKUP calls on edge destinations
150
+
151
+ Syntax: TRAVERSE {edge_filter} WITH [REM_QUERY] DEPTH [0-N]
152
+
153
+ Depth Modes:
154
+ - 0: PLAN mode (analyze edges without traversal)
155
+ - 1: Single-hop traversal (default)
156
+ - N: Multi-hop traversal (N hops from source)
157
+
158
+ Plan Memo:
159
+ Agent-maintained scratchpad for tracking multi-turn progress.
160
+ Kept terse for fast token generation.
161
+ Example: "Goal: org chart. Step 1: find CEO"
162
+ """
163
+
164
+ initial_query: str = Field(
165
+ ..., description="Initial query to find entry nodes (LOOKUP key, SEARCH text, etc.)"
166
+ )
167
+ edge_types: list[str] = Field(
168
+ default_factory=lambda: ["*"],
169
+ description="Edge types to follow (e.g., ['manages', 'reports-to']). Default: ['*'] (all)",
170
+ )
171
+ max_depth: int = Field(
172
+ default=1, ge=0, description="Maximum traversal depth. 0 = PLAN mode (no traversal)"
173
+ )
174
+ order_by: str = Field(
175
+ default="edge.created_at DESC",
176
+ description="Result ordering (edge.created_at, node.name, edge.weight)",
177
+ )
178
+ limit: int = Field(default=9, gt=0, description="Maximum nodes to return")
179
+ plan_memo: Optional[str] = Field(
180
+ default=None,
181
+ description="Agent's terse scratchpad for tracking multi-turn progress",
182
+ )
183
+
184
+
185
+ class RemQuery(BaseModel):
186
+ """
187
+ REM query plan.
188
+
189
+ Combines query type with type-specific parameters.
190
+ Used by both direct REM queries and ask_rem() natural language interface.
191
+ """
192
+
193
+ query_type: QueryType = Field(..., description="REM query type")
194
+ parameters: (
195
+ LookupParameters
196
+ | FuzzyParameters
197
+ | SearchParameters
198
+ | SQLParameters
199
+ | TraverseParameters
200
+ ) = Field(..., description="Query parameters")
201
+ user_id: str = Field(..., description="User identifier for isolation")
202
+
203
+
204
+ class TraverseStage(BaseModel):
205
+ """
206
+ TRAVERSE execution stage information.
207
+
208
+ Captures query execution details for LLM interaction and multi-turn planning.
209
+ """
210
+
211
+ depth: int = Field(..., description="Traversal depth for this stage")
212
+ executed: str = Field(..., description="Query executed at this stage")
213
+ found: dict[str, int] = Field(
214
+ ..., description="Discovery stats (nodes, edges counts)"
215
+ )
216
+ plan_memo: Optional[str] = Field(
217
+ default=None, description="Agent's memo echoed from request"
218
+ )
219
+
220
+
221
+ class TraverseResponse(BaseModel):
222
+ """
223
+ TRAVERSE query response.
224
+
225
+ Returns nodes, execution stages, and metadata for LLM-driven iteration.
226
+ """
227
+
228
+ nodes: list[dict[str, Any]] = Field(
229
+ default_factory=list, description="Discovered nodes"
230
+ )
231
+ stages: list[TraverseStage] = Field(
232
+ default_factory=list, description="Execution stage information"
233
+ )
234
+ source_nodes: list[str] = Field(
235
+ default_factory=list, description="Initial entry node labels"
236
+ )
237
+ edge_summary: list[tuple[str, str, str]] = Field(
238
+ default_factory=list,
239
+ description="Edge shorthand tuples (src, rel_type, dst) for analysis",
240
+ )
241
+ metadata: dict[str, Any] = Field(
242
+ default_factory=dict, description="Query metadata (total_nodes, max_depth_reached, etc.)"
243
+ )
@@ -0,0 +1,43 @@
1
+ """
2
+ REM Entity Models
3
+
4
+ Core entity types for the REM system:
5
+ - Resources: Base content units (documents, conversations, artifacts)
6
+ - ImageResources: Image-specific resources with CLIP embeddings
7
+ - Messages: Communication content
8
+ - Users: User entities
9
+ - Files: File metadata and tracking
10
+ - Moments: Temporal narratives (meetings, coding sessions, conversations)
11
+ - Schemas: Agent schema definitions (JsonSchema specifications for Pydantic AI)
12
+ - Ontologies: Domain-specific extracted knowledge from files
13
+ - OntologyConfigs: User-defined rules for automatic ontology extraction
14
+
15
+ All entities inherit from CoreModel and support:
16
+ - Graph connectivity via InlineEdge
17
+ - Temporal tracking
18
+ - Flexible metadata
19
+ - Natural language labels for conversational queries
20
+ """
21
+
22
+ from .file import File
23
+ from .image_resource import ImageResource
24
+ from .message import Message
25
+ from .moment import Moment
26
+ from .ontology import Ontology
27
+ from .ontology_config import OntologyConfig
28
+ from .resource import Resource
29
+ from .schema import Schema
30
+ from .user import User, UserTier
31
+
32
+ __all__ = [
33
+ "Resource",
34
+ "ImageResource",
35
+ "Message",
36
+ "User",
37
+ "UserTier",
38
+ "File",
39
+ "Moment",
40
+ "Schema",
41
+ "Ontology",
42
+ "OntologyConfig",
43
+ ]
@@ -0,0 +1,57 @@
1
+ """
2
+ File - File metadata and tracking in REM.
3
+
4
+ Files represent uploaded or referenced files (PDFs, images, audio, etc.)
5
+ that are parsed into Resources or used as input to dreaming workflows.
6
+
7
+ File entities track:
8
+ - File metadata (name, size, mime type)
9
+ - Storage location (URI)
10
+ - Processing status
11
+ - Relationships to derived Resources
12
+ """
13
+
14
+ from typing import Optional
15
+
16
+ from pydantic import Field
17
+
18
+ from ..core import CoreModel
19
+
20
+
21
+ class File(CoreModel):
22
+ """
23
+ File metadata and tracking.
24
+
25
+ Represents files uploaded to or referenced by the REM system,
26
+ tracking their metadata and processing status. Tenant isolation
27
+ is provided via CoreModel.tenant_id field.
28
+ """
29
+
30
+ name: str = Field(
31
+ ...,
32
+ description="File name",
33
+ )
34
+ uri: str = Field(
35
+ ...,
36
+ description="File storage URI (S3, local path, etc.)",
37
+ )
38
+ content: Optional[str] = Field(
39
+ default=None,
40
+ description="Extracted text content (if applicable)",
41
+ )
42
+ timestamp: Optional[str] = Field(
43
+ default=None,
44
+ description="File creation/modification timestamp",
45
+ )
46
+ size_bytes: Optional[int] = Field(
47
+ default=None,
48
+ description="File size in bytes",
49
+ )
50
+ mime_type: Optional[str] = Field(
51
+ default=None,
52
+ description="File MIME type",
53
+ )
54
+ processing_status: Optional[str] = Field(
55
+ default="pending",
56
+ description="File processing status (pending, processing, completed, failed)",
57
+ )
@@ -0,0 +1,88 @@
1
+ """
2
+ ImageResource - Image-specific resource with CLIP embeddings.
3
+
4
+ ImageResources are a specialized subclass of Resource for images,
5
+ with support for CLIP embeddings and vision LLM descriptions.
6
+
7
+ Key differences from base Resource:
8
+ - **Separate table**: Stored in `image_resources` table, not `resources`
9
+ - **Different embeddings**: Uses CLIP embeddings (multimodal) instead of text embeddings
10
+ - **Embedding provider override**: Must use CLIP-compatible provider (Jina AI, self-hosted)
11
+ - **Vision descriptions**: Optional vision LLM descriptions (tier/sampling gated)
12
+ - **Image metadata**: Dimensions, format, and other image-specific fields
13
+
14
+ Why separate table?
15
+ 1. Different embedding dimensionality (512/768 vs 1536)
16
+ 2. Different embedding model (CLIP vs text-embedding-3-small)
17
+ 3. Multimodal search capabilities (text-to-image, image-to-image)
18
+ 4. Image-specific indexes and queries
19
+ 5. Cost tracking (CLIP tokens vs text tokens)
20
+
21
+ Usage:
22
+ - ImageProvider saves to ImageResource table with CLIP embeddings
23
+ - Regular text Resources use standard text embeddings
24
+ - Cross-modal search: text queries can search ImageResources via CLIP
25
+ """
26
+
27
+ from typing import Optional
28
+
29
+ from pydantic import Field
30
+
31
+ from .resource import Resource
32
+
33
+
34
+ class ImageResource(Resource):
35
+ """
36
+ Image-specific resource with CLIP embeddings.
37
+
38
+ Stored in separate `image_resources` table with CLIP embeddings
39
+ instead of text embeddings. This enables:
40
+ - Multimodal search (text-to-image, image-to-image)
41
+ - Proper dimensionality (512/768 for CLIP vs 1536 for text)
42
+ - Cost tracking (CLIP tokens separate from text tokens)
43
+
44
+ Embedding Strategy:
45
+ - Default (when JINA_API_KEY set): Jina CLIP API (jina-clip-v2)
46
+ - Future: Self-hosted OpenCLIP models via KEDA-scaled pods
47
+ - Fallback: No embeddings (images searchable by metadata only)
48
+
49
+ Vision LLM Strategy (tier/sampling gated):
50
+ - Gold tier: Always get vision descriptions
51
+ - Silver/Free: Probabilistic sampling (IMAGE_VLLM_SAMPLE_RATE)
52
+ - Fallback: Basic metadata only
53
+
54
+ Tenant isolation provided via CoreModel.tenant_id field.
55
+ """
56
+
57
+ image_width: Optional[int] = Field(
58
+ default=None,
59
+ description="Image width in pixels",
60
+ )
61
+ image_height: Optional[int] = Field(
62
+ default=None,
63
+ description="Image height in pixels",
64
+ )
65
+ image_format: Optional[str] = Field(
66
+ default=None,
67
+ description="Image format (PNG, JPEG, GIF, WebP)",
68
+ )
69
+ vision_description: Optional[str] = Field(
70
+ default=None,
71
+ description="Vision LLM generated description (markdown, only for gold tier or sampled images)",
72
+ )
73
+ vision_provider: Optional[str] = Field(
74
+ default=None,
75
+ description="Vision provider used (anthropic, gemini, openai)",
76
+ )
77
+ vision_model: Optional[str] = Field(
78
+ default=None,
79
+ description="Vision model used for description",
80
+ )
81
+ clip_embedding: Optional[list[float]] = Field(
82
+ default=None,
83
+ description="CLIP embedding vector (512 or 768 dimensions, from Jina AI or self-hosted)",
84
+ )
85
+ clip_dimensions: Optional[int] = Field(
86
+ default=None,
87
+ description="CLIP embedding dimensionality (512 for jina-clip-v2, 768 for jina-clip-v1)",
88
+ )
@@ -0,0 +1,35 @@
1
+ """
2
+ Message - Communication content in REM.
3
+
4
+ Messages represent individual communication units (chat messages, emails, etc.)
5
+ that can be grouped into conversations or moments.
6
+
7
+ Messages are simpler than Resources but share the same graph connectivity
8
+ through CoreModel inheritance.
9
+ """
10
+
11
+ from pydantic import Field
12
+
13
+ from ..core import CoreModel
14
+
15
+
16
+ class Message(CoreModel):
17
+ """
18
+ Communication content unit.
19
+
20
+ Represents individual messages in conversations, chats, or other
21
+ communication contexts. Tenant isolation is provided via CoreModel.tenant_id field.
22
+ """
23
+
24
+ content: str = Field(
25
+ ...,
26
+ description="Message content text",
27
+ )
28
+ message_type: str | None = Field(
29
+ default=None,
30
+ description="Message type e.g role",
31
+ )
32
+ session_id: str | None = Field(
33
+ default=None,
34
+ description="Session identifier for tracking message context",
35
+ )