remdb 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (187) hide show
  1. rem/__init__.py +2 -0
  2. rem/agentic/README.md +650 -0
  3. rem/agentic/__init__.py +39 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +8 -0
  6. rem/agentic/context.py +148 -0
  7. rem/agentic/context_builder.py +329 -0
  8. rem/agentic/mcp/__init__.py +0 -0
  9. rem/agentic/mcp/tool_wrapper.py +107 -0
  10. rem/agentic/otel/__init__.py +5 -0
  11. rem/agentic/otel/setup.py +151 -0
  12. rem/agentic/providers/phoenix.py +674 -0
  13. rem/agentic/providers/pydantic_ai.py +572 -0
  14. rem/agentic/query.py +117 -0
  15. rem/agentic/query_helper.py +89 -0
  16. rem/agentic/schema.py +396 -0
  17. rem/agentic/serialization.py +245 -0
  18. rem/agentic/tools/__init__.py +5 -0
  19. rem/agentic/tools/rem_tools.py +231 -0
  20. rem/api/README.md +420 -0
  21. rem/api/main.py +324 -0
  22. rem/api/mcp_router/prompts.py +182 -0
  23. rem/api/mcp_router/resources.py +536 -0
  24. rem/api/mcp_router/server.py +213 -0
  25. rem/api/mcp_router/tools.py +584 -0
  26. rem/api/routers/auth.py +229 -0
  27. rem/api/routers/chat/__init__.py +5 -0
  28. rem/api/routers/chat/completions.py +281 -0
  29. rem/api/routers/chat/json_utils.py +76 -0
  30. rem/api/routers/chat/models.py +124 -0
  31. rem/api/routers/chat/streaming.py +185 -0
  32. rem/auth/README.md +258 -0
  33. rem/auth/__init__.py +26 -0
  34. rem/auth/middleware.py +100 -0
  35. rem/auth/providers/__init__.py +13 -0
  36. rem/auth/providers/base.py +376 -0
  37. rem/auth/providers/google.py +163 -0
  38. rem/auth/providers/microsoft.py +237 -0
  39. rem/cli/README.md +455 -0
  40. rem/cli/__init__.py +8 -0
  41. rem/cli/commands/README.md +126 -0
  42. rem/cli/commands/__init__.py +3 -0
  43. rem/cli/commands/ask.py +565 -0
  44. rem/cli/commands/configure.py +423 -0
  45. rem/cli/commands/db.py +493 -0
  46. rem/cli/commands/dreaming.py +324 -0
  47. rem/cli/commands/experiments.py +1124 -0
  48. rem/cli/commands/mcp.py +66 -0
  49. rem/cli/commands/process.py +245 -0
  50. rem/cli/commands/schema.py +183 -0
  51. rem/cli/commands/serve.py +106 -0
  52. rem/cli/dreaming.py +363 -0
  53. rem/cli/main.py +88 -0
  54. rem/config.py +237 -0
  55. rem/mcp_server.py +41 -0
  56. rem/models/core/__init__.py +49 -0
  57. rem/models/core/core_model.py +64 -0
  58. rem/models/core/engram.py +333 -0
  59. rem/models/core/experiment.py +628 -0
  60. rem/models/core/inline_edge.py +132 -0
  61. rem/models/core/rem_query.py +243 -0
  62. rem/models/entities/__init__.py +43 -0
  63. rem/models/entities/file.py +57 -0
  64. rem/models/entities/image_resource.py +88 -0
  65. rem/models/entities/message.py +35 -0
  66. rem/models/entities/moment.py +123 -0
  67. rem/models/entities/ontology.py +191 -0
  68. rem/models/entities/ontology_config.py +131 -0
  69. rem/models/entities/resource.py +95 -0
  70. rem/models/entities/schema.py +87 -0
  71. rem/models/entities/user.py +85 -0
  72. rem/py.typed +0 -0
  73. rem/schemas/README.md +507 -0
  74. rem/schemas/__init__.py +6 -0
  75. rem/schemas/agents/README.md +92 -0
  76. rem/schemas/agents/core/moment-builder.yaml +178 -0
  77. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  78. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  79. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  80. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  81. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  82. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  83. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  84. rem/schemas/agents/examples/hello-world.yaml +37 -0
  85. rem/schemas/agents/examples/query.yaml +54 -0
  86. rem/schemas/agents/examples/simple.yaml +21 -0
  87. rem/schemas/agents/examples/test.yaml +29 -0
  88. rem/schemas/agents/rem.yaml +128 -0
  89. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  90. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  91. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  92. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  93. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  94. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  95. rem/services/__init__.py +16 -0
  96. rem/services/audio/INTEGRATION.md +308 -0
  97. rem/services/audio/README.md +376 -0
  98. rem/services/audio/__init__.py +15 -0
  99. rem/services/audio/chunker.py +354 -0
  100. rem/services/audio/transcriber.py +259 -0
  101. rem/services/content/README.md +1269 -0
  102. rem/services/content/__init__.py +5 -0
  103. rem/services/content/providers.py +806 -0
  104. rem/services/content/service.py +657 -0
  105. rem/services/dreaming/README.md +230 -0
  106. rem/services/dreaming/__init__.py +53 -0
  107. rem/services/dreaming/affinity_service.py +336 -0
  108. rem/services/dreaming/moment_service.py +264 -0
  109. rem/services/dreaming/ontology_service.py +54 -0
  110. rem/services/dreaming/user_model_service.py +297 -0
  111. rem/services/dreaming/utils.py +39 -0
  112. rem/services/embeddings/__init__.py +11 -0
  113. rem/services/embeddings/api.py +120 -0
  114. rem/services/embeddings/worker.py +421 -0
  115. rem/services/fs/README.md +662 -0
  116. rem/services/fs/__init__.py +62 -0
  117. rem/services/fs/examples.py +206 -0
  118. rem/services/fs/examples_paths.py +204 -0
  119. rem/services/fs/git_provider.py +935 -0
  120. rem/services/fs/local_provider.py +760 -0
  121. rem/services/fs/parsing-hooks-examples.md +172 -0
  122. rem/services/fs/paths.py +276 -0
  123. rem/services/fs/provider.py +460 -0
  124. rem/services/fs/s3_provider.py +1042 -0
  125. rem/services/fs/service.py +186 -0
  126. rem/services/git/README.md +1075 -0
  127. rem/services/git/__init__.py +17 -0
  128. rem/services/git/service.py +469 -0
  129. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  130. rem/services/phoenix/README.md +453 -0
  131. rem/services/phoenix/__init__.py +46 -0
  132. rem/services/phoenix/client.py +686 -0
  133. rem/services/phoenix/config.py +88 -0
  134. rem/services/phoenix/prompt_labels.py +477 -0
  135. rem/services/postgres/README.md +575 -0
  136. rem/services/postgres/__init__.py +23 -0
  137. rem/services/postgres/migration_service.py +427 -0
  138. rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
  139. rem/services/postgres/register_type.py +352 -0
  140. rem/services/postgres/repository.py +337 -0
  141. rem/services/postgres/schema_generator.py +379 -0
  142. rem/services/postgres/service.py +802 -0
  143. rem/services/postgres/sql_builder.py +354 -0
  144. rem/services/rem/README.md +304 -0
  145. rem/services/rem/__init__.py +23 -0
  146. rem/services/rem/exceptions.py +71 -0
  147. rem/services/rem/executor.py +293 -0
  148. rem/services/rem/parser.py +145 -0
  149. rem/services/rem/queries.py +196 -0
  150. rem/services/rem/query.py +371 -0
  151. rem/services/rem/service.py +527 -0
  152. rem/services/session/README.md +374 -0
  153. rem/services/session/__init__.py +6 -0
  154. rem/services/session/compression.py +360 -0
  155. rem/services/session/reload.py +77 -0
  156. rem/settings.py +1235 -0
  157. rem/sql/002_install_models.sql +1068 -0
  158. rem/sql/background_indexes.sql +42 -0
  159. rem/sql/install_models.sql +1038 -0
  160. rem/sql/migrations/001_install.sql +503 -0
  161. rem/sql/migrations/002_install_models.sql +1202 -0
  162. rem/utils/AGENTIC_CHUNKING.md +597 -0
  163. rem/utils/README.md +583 -0
  164. rem/utils/__init__.py +43 -0
  165. rem/utils/agentic_chunking.py +622 -0
  166. rem/utils/batch_ops.py +343 -0
  167. rem/utils/chunking.py +108 -0
  168. rem/utils/clip_embeddings.py +276 -0
  169. rem/utils/dict_utils.py +98 -0
  170. rem/utils/embeddings.py +423 -0
  171. rem/utils/examples/embeddings_example.py +305 -0
  172. rem/utils/examples/sql_types_example.py +202 -0
  173. rem/utils/markdown.py +16 -0
  174. rem/utils/model_helpers.py +236 -0
  175. rem/utils/schema_loader.py +229 -0
  176. rem/utils/sql_types.py +348 -0
  177. rem/utils/user_id.py +81 -0
  178. rem/utils/vision.py +330 -0
  179. rem/workers/README.md +506 -0
  180. rem/workers/__init__.py +5 -0
  181. rem/workers/dreaming.py +502 -0
  182. rem/workers/engram_processor.py +312 -0
  183. rem/workers/sqs_file_processor.py +193 -0
  184. remdb-0.2.6.dist-info/METADATA +1191 -0
  185. remdb-0.2.6.dist-info/RECORD +187 -0
  186. remdb-0.2.6.dist-info/WHEEL +4 -0
  187. remdb-0.2.6.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,123 @@
1
+ """
2
+ Moment - Temporal narrative in REM.
3
+
4
+ Moments are extracted from Resources through first-order dreaming workflows.
5
+ They represent temporal narratives like meetings, coding sessions, conversations,
6
+ or any classified time period when users were focused on specific activities.
7
+
8
+ Moments provide temporal structure to the REM graph:
9
+ - Temporal boundaries (starts_timestamp, ends_timestamp)
10
+ - Present persons (who was involved)
11
+ - Emotion tags (team sentiment)
12
+ - Topic tags (what was discussed)
13
+ - Natural language summaries
14
+
15
+ Moments enable temporal queries:
16
+ - "What happened between milestone A and B?"
17
+ - "When did Sarah and Mike meet?"
18
+ - "What was discussed in Q4 retrospective?"
19
+
20
+ Data Model:
21
+ - Inherits from CoreModel (id, tenant_id, timestamps, graph_edges, etc.)
22
+ - name: Human-readable moment name
23
+ - moment_type: Classification (meeting, coding-session, conversation, etc.)
24
+ - starts_timestamp: Start time
25
+ - ends_timestamp: End time
26
+ - present_persons: List of Person objects with id, name, role
27
+ - emotion_tags: Sentiment tags (happy, frustrated, focused)
28
+ - topic_tags: Topic/concept tags (project names, technologies)
29
+ - summary: Natural language description
30
+ - source_resource_ids: Resources used to construct this moment
31
+ """
32
+
33
+ from datetime import datetime
34
+ from typing import Optional
35
+
36
+ from pydantic import BaseModel, Field, model_validator
37
+
38
+ from ..core import CoreModel
39
+
40
+
41
+ class Person(BaseModel):
42
+ """Person reference in a moment."""
43
+
44
+ id: str = Field(..., description="Person entity label")
45
+ name: str = Field(..., description="Person name")
46
+ role: Optional[str] = Field(default=None, description="Person role in moment")
47
+
48
+
49
+
50
+ class Moment(CoreModel):
51
+ """
52
+ Temporal narrative extracted from resources.
53
+
54
+ Moments provide temporal structure and context for the REM graph,
55
+ enabling time-based queries and understanding of when events occurred.
56
+ Tenant isolation is provided via CoreModel.tenant_id field.
57
+ """
58
+
59
+ name: Optional[str] = Field(
60
+ default=None,
61
+ description="Human-readable moment name (used as graph label). Auto-generated from starts_timestamp+moment_type if not provided.",
62
+ json_schema_extra={"entity_key": True}, # Primary business key for KV lookups
63
+ )
64
+ moment_type: Optional[str] = Field(
65
+ default=None,
66
+ description="Moment classification (meeting, coding-session, conversation, etc.)",
67
+ )
68
+ category: Optional[str] = Field(
69
+ default=None,
70
+ description="Moment category for grouping and filtering",
71
+ )
72
+ starts_timestamp: datetime = Field(
73
+ ...,
74
+ description="Moment start time",
75
+ )
76
+ ends_timestamp: Optional[datetime] = Field(
77
+ default=None,
78
+ description="Moment end time",
79
+ )
80
+ present_persons: list[Person] = Field(
81
+ default_factory=list,
82
+ description="People present in the moment",
83
+ )
84
+
85
+ emotion_tags: list[str] = Field(
86
+ default_factory=list,
87
+ description="Emotion/sentiment tags (happy, frustrated, focused, etc.)",
88
+ )
89
+ topic_tags: list[str] = Field(
90
+ default_factory=list,
91
+ description="Topic/concept tags (project names, technologies, etc.)",
92
+ )
93
+ summary: Optional[str] = Field(
94
+ default=None,
95
+ description="Natural language summary of the moment",
96
+ )
97
+ source_resource_ids: list[str] = Field(
98
+ default_factory=list,
99
+ description="Resource IDs used to construct this moment",
100
+ )
101
+
102
+ @model_validator(mode='after')
103
+ def generate_name_if_missing(self) -> 'Moment':
104
+ """Auto-generate name from starts_timestamp+moment_type if not provided."""
105
+ if not self.name:
106
+ # Format: "Meeting on 2024-12-20" or "Coding Session on 2024-12-20 14:30"
107
+ if self.starts_timestamp:
108
+ date_str = self.starts_timestamp.strftime("%Y-%m-%d")
109
+ time_str = self.starts_timestamp.strftime("%H:%M")
110
+
111
+ if self.moment_type:
112
+ moment_label = self.moment_type.replace('-', ' ').replace('_', ' ').title()
113
+ self.name = f"{moment_label} on {date_str}"
114
+ else:
115
+ self.name = f"Moment on {date_str} {time_str}"
116
+ else:
117
+ # Fallback: use ID or generic name
118
+ if self.id:
119
+ self.name = f"moment-{str(self.id)[:8]}"
120
+ else:
121
+ self.name = "unnamed-moment"
122
+
123
+ return self
@@ -0,0 +1,191 @@
1
+ """Ontology entity for tenant-specific knowledge extensions.
2
+
3
+ **What is Ontology Extraction?**
4
+
5
+ Ontologies are **domain-specific structured knowledge** extracted from files using custom
6
+ agent schemas. They extend REM's normal file processing pipeline with tenant-specific
7
+ parsers that extract structured data the standard chunking pipeline would miss.
8
+
9
+ **Normal File Processing:**
10
+ File → extract text → chunk → embed → resources (semantic search ready)
11
+
12
+ **Ontology Processing (Tenant Knowledge Extensions):**
13
+ File → custom agent → structured JSON → ontology (domain knowledge)
14
+
15
+ **Why Ontologies?**
16
+ - Standard chunking gives you semantic search over raw content
17
+ - Ontologies give you **structured queryable fields** from domain logic
18
+ - Example: A contract PDF becomes both searchable chunks AND a structured record with
19
+ parties, dates, payment terms, obligations as queryable fields
20
+
21
+ **Architecture:**
22
+ - Runs as part of dreaming worker (background knowledge extraction)
23
+ - OntologyConfig defines which files trigger which extractors (MIME type, URI pattern, tags)
24
+ - Multiple ontologies per file (apply different domain lenses)
25
+ - Tenant-scoped: Each tenant can define their own extractors
26
+
27
+ **Use Cases:**
28
+
29
+ 1. **Recruitment (CV Parsing)**
30
+ - Standard pipeline: Chunks for "find me candidates with Python experience"
31
+ - Ontology: Structured fields for filtering/sorting (years_experience, seniority_level, skills[])
32
+
33
+ 2. **Legal (Contract Analysis)**
34
+ - Standard pipeline: Semantic search over contract text
35
+ - Ontology: Queryable fields (parties, effective_date, payment_amount, key_obligations[])
36
+
37
+ 3. **Medical (Health Records)**
38
+ - Standard pipeline: Find mentions of conditions
39
+ - Ontology: Structured diagnoses, medications, dosages, treatment plans
40
+
41
+ 4. **Finance (Report Analysis)**
42
+ - Standard pipeline: Search for financial terms
43
+ - Ontology: Extracted metrics, risk_flags, trends, forecasts
44
+
45
+ **Example Flow:**
46
+ 1. Tenant creates OntologyConfig: "Run cv-parser-v1 on files with mime_type='application/pdf' and tags=['resume']"
47
+ 2. File uploaded with tags=["resume"]
48
+ 3. Normal processing: File → chunks → resources
49
+ 4. Dreaming worker detects matching OntologyConfig
50
+ 5. Loads cv-parser-v1 agent schema from database
51
+ 6. Runs agent on file content → extracts structured data
52
+ 7. Stores Ontology with extracted_data = {candidate_name, skills, experience, education, ...}
53
+ 8. Ontology is now queryable via LOOKUP, SEARCH, or direct SQL
54
+
55
+ **Design:**
56
+ - Each ontology links to a File via file_id
57
+ - Agent schema tracked via agent_schema_id (human-readable label, not UUID)
58
+ - Structured data in `extracted_data` (arbitrary JSON, schema defined by agent)
59
+ - Embeddings generated for semantic search (configurable fields via agent schema)
60
+ - Multiple ontologies per file using different schemas
61
+ - Tenant-isolated: OntologyConfigs are tenant-scoped
62
+ """
63
+
64
+ from typing import Any, Optional
65
+ from uuid import UUID
66
+
67
+ from pydantic import ConfigDict
68
+
69
+ from ..core.core_model import CoreModel
70
+
71
+
72
+ class Ontology(CoreModel):
73
+ """Domain-specific knowledge extracted from files using custom agents.
74
+
75
+ Attributes:
76
+ name: Human-readable label for this ontology instance
77
+ file_id: Foreign key to File entity that was processed
78
+ agent_schema_id: Foreign key to Schema entity that performed extraction
79
+ provider_name: LLM provider used for extraction (e.g., "anthropic", "openai")
80
+ model_name: Specific model used (e.g., "claude-sonnet-4-5")
81
+ extracted_data: Structured data extracted by agent (arbitrary JSON)
82
+ confidence_score: Optional confidence score from extraction (0.0-1.0)
83
+ extraction_timestamp: When extraction was performed
84
+ embedding_text: Text used for generating embedding (derived from extracted_data)
85
+
86
+ Inherited from CoreModel:
87
+ id: UUID or string identifier
88
+ created_at: Entity creation timestamp
89
+ updated_at: Last update timestamp
90
+ deleted_at: Soft deletion timestamp
91
+ tenant_id: Multi-tenancy isolation
92
+ user_id: Ownership
93
+ graph_edges: Relationships to other entities
94
+ metadata: Flexible metadata storage
95
+ tags: Classification tags
96
+ column: Database schema metadata
97
+
98
+ Example Usage:
99
+ # CV extraction
100
+ cv_ontology = Ontology(
101
+ name="john-doe-cv-2024",
102
+ file_id="file-uuid-123",
103
+ agent_schema_id="cv-parser-v1",
104
+ provider_name="anthropic",
105
+ model_name="claude-sonnet-4-5-20250929",
106
+ extracted_data={
107
+ "candidate_name": "John Doe",
108
+ "email": "john@example.com",
109
+ "skills": ["Python", "PostgreSQL", "Kubernetes"],
110
+ "experience": [
111
+ {
112
+ "company": "TechCorp",
113
+ "role": "Senior Engineer",
114
+ "years": 3,
115
+ "achievements": ["Led migration to k8s", "Reduced costs 40%"]
116
+ }
117
+ ],
118
+ "education": [
119
+ {"degree": "BS Computer Science", "institution": "MIT", "year": 2018}
120
+ ]
121
+ },
122
+ confidence_score=0.95,
123
+ tags=["cv", "engineering", "senior-level"]
124
+ )
125
+
126
+ # Contract extraction
127
+ contract_ontology = Ontology(
128
+ name="acme-supplier-agreement-2024",
129
+ file_id="file-uuid-456",
130
+ agent_schema_id="contract-parser-v2",
131
+ provider_name="openai",
132
+ model_name="gpt-4o",
133
+ extracted_data={
134
+ "contract_type": "supplier_agreement",
135
+ "parties": [
136
+ {"name": "ACME Corp", "role": "buyer"},
137
+ {"name": "SupplyChain Inc", "role": "supplier"}
138
+ ],
139
+ "effective_date": "2024-01-01",
140
+ "termination_date": "2026-12-31",
141
+ "payment_terms": {
142
+ "amount": 500000,
143
+ "currency": "USD",
144
+ "frequency": "quarterly"
145
+ },
146
+ "key_obligations": [
147
+ "Supplier must deliver within 30 days",
148
+ "Buyer must pay within 60 days of invoice"
149
+ ]
150
+ },
151
+ confidence_score=0.92,
152
+ tags=["contract", "supplier", "procurement"]
153
+ )
154
+ """
155
+
156
+ # Core fields
157
+ name: str
158
+ file_id: UUID | str
159
+ agent_schema_id: str # Natural language label of Schema entity
160
+
161
+ # Extraction metadata
162
+ provider_name: str # LLM provider (anthropic, openai, etc.)
163
+ model_name: str # Specific model used
164
+ extracted_data: dict[str, Any] # Arbitrary structured data from agent
165
+ confidence_score: Optional[float] = None # 0.0-1.0 if provided by agent
166
+ extraction_timestamp: Optional[str] = None # ISO8601 timestamp
167
+
168
+ # Semantic search support
169
+ embedding_text: Optional[str] = None # Text for embedding generation
170
+
171
+ model_config = ConfigDict(
172
+ json_schema_extra={
173
+ "description": "Domain-specific knowledge extracted from files using custom agents",
174
+ "examples": [
175
+ {
176
+ "name": "john-doe-cv-2024",
177
+ "file_id": "550e8400-e29b-41d4-a716-446655440000",
178
+ "agent_schema_id": "cv-parser-v1",
179
+ "provider_name": "anthropic",
180
+ "model_name": "claude-sonnet-4-5-20250929",
181
+ "extracted_data": {
182
+ "candidate_name": "John Doe",
183
+ "skills": ["Python", "PostgreSQL"],
184
+ "experience": []
185
+ },
186
+ "confidence_score": 0.95,
187
+ "tags": ["cv", "engineering"]
188
+ }
189
+ ]
190
+ }
191
+ )
@@ -0,0 +1,131 @@
1
+ """OntologyConfig entity for user-defined ontology extraction rules.
2
+
3
+ OntologyConfig allows users to define which agent schemas should be applied to
4
+ which files during the dreaming/processing workflow. This enables domain-specific
5
+ knowledge extraction tailored to user needs.
6
+
7
+ Examples:
8
+ - "Apply cv-parser-v1 to all PDF files in /resumes/"
9
+ - "Apply contract-analyzer-v2 to files tagged with 'legal'"
10
+ - "Apply medical-records-extractor to files with mime_type application/pdf AND tags ['medical']"
11
+
12
+ Design:
13
+ - Each config is tenant-scoped for isolation
14
+ - File matching via mime_type patterns, tag filters, and URI patterns
15
+ - Multiple configs can match a single file (all will be applied)
16
+ - Priority field for execution order when multiple configs match
17
+ - Enabled/disabled toggle for temporary deactivation
18
+ """
19
+
20
+ from typing import Optional
21
+
22
+ from pydantic import ConfigDict
23
+
24
+ from ..core.core_model import CoreModel
25
+
26
+
27
+ class OntologyConfig(CoreModel):
28
+ """User configuration for automatic ontology extraction.
29
+
30
+ Attributes:
31
+ name: Human-readable config name
32
+ agent_schema_id: Foreign key to Schema entity to use for extraction
33
+ description: Purpose and scope of this config
34
+
35
+ # File matching rules (ANY matching rule triggers extraction)
36
+ mime_type_pattern: Regex pattern for file MIME types (e.g., "application/pdf")
37
+ uri_pattern: Regex pattern for file URIs (e.g., "s3://bucket/resumes/.*")
38
+ tag_filter: List of tags (file must have ALL tags to match)
39
+
40
+ # Execution control
41
+ priority: Execution order (higher = earlier, default 100)
42
+ enabled: Whether this config is active (default True)
43
+
44
+ # LLM provider configuration
45
+ provider_name: Optional LLM provider override (defaults to settings)
46
+ model_name: Optional model override (defaults to settings)
47
+
48
+ Inherited from CoreModel:
49
+ id, created_at, updated_at, deleted_at, tenant_id, user_id,
50
+ graph_edges, metadata, tags, column
51
+
52
+ Example Usage:
53
+ # CV extraction for recruitment
54
+ cv_config = OntologyConfig(
55
+ name="recruitment-cv-parser",
56
+ agent_schema_id="cv-parser-v1",
57
+ description="Extract candidate information from resumes",
58
+ mime_type_pattern="application/pdf",
59
+ uri_pattern=".*/resumes/.*",
60
+ tag_filter=["cv", "candidate"],
61
+ priority=100,
62
+ enabled=True,
63
+ tenant_id="acme-corp",
64
+ tags=["recruitment", "hr"]
65
+ )
66
+
67
+ # Contract analysis for legal team
68
+ contract_config = OntologyConfig(
69
+ name="legal-contract-analyzer",
70
+ agent_schema_id="contract-parser-v2",
71
+ description="Extract key terms from supplier contracts",
72
+ mime_type_pattern="application/(pdf|msword|vnd.openxmlformats.*)",
73
+ tag_filter=["legal", "contract"],
74
+ priority=200, # Higher priority = runs first
75
+ enabled=True,
76
+ provider_name="openai", # Override default provider
77
+ model_name="gpt-4o",
78
+ tenant_id="acme-corp",
79
+ tags=["legal", "procurement"]
80
+ )
81
+
82
+ # Medical records for healthcare
83
+ medical_config = OntologyConfig(
84
+ name="medical-records-extractor",
85
+ agent_schema_id="medical-parser-v1",
86
+ description="Extract diagnoses and treatments from medical records",
87
+ mime_type_pattern="application/pdf",
88
+ tag_filter=["medical", "patient-record"],
89
+ priority=50,
90
+ enabled=True,
91
+ tenant_id="healthsystem",
92
+ tags=["medical", "hipaa-compliant"]
93
+ )
94
+ """
95
+
96
+ # Core fields
97
+ name: str
98
+ agent_schema_id: str # Foreign key to Schema entity
99
+ description: Optional[str] = None
100
+
101
+ # File matching rules (ANY rule can trigger match)
102
+ mime_type_pattern: Optional[str] = None # Regex for MIME type
103
+ uri_pattern: Optional[str] = None # Regex for file URI
104
+ tag_filter: list[str] = [] # File must have ALL tags
105
+
106
+ # Execution control
107
+ priority: int = 100 # Higher = runs first
108
+ enabled: bool = True # Toggle to disable without deleting
109
+
110
+ # Optional provider overrides
111
+ provider_name: Optional[str] = None # Override default provider
112
+ model_name: Optional[str] = None # Override default model
113
+
114
+ model_config = ConfigDict(
115
+ json_schema_extra={
116
+ "description": "Configuration for automatic ontology extraction from files",
117
+ "examples": [
118
+ {
119
+ "name": "recruitment-cv-parser",
120
+ "agent_schema_id": "cv-parser-v1",
121
+ "description": "Extract candidate information from resumes",
122
+ "mime_type_pattern": "application/pdf",
123
+ "uri_pattern": ".*/resumes/.*",
124
+ "tag_filter": ["cv", "candidate"],
125
+ "priority": 100,
126
+ "enabled": True,
127
+ "tenant_id": "acme-corp"
128
+ }
129
+ ]
130
+ }
131
+ )
@@ -0,0 +1,95 @@
1
+ """
2
+ Resource - Base content unit in REM.
3
+
4
+ Resources represent documents, conversations, artifacts, and any other
5
+ content units that form the foundation of the REM memory system.
6
+
7
+ Resources are the primary input to dreaming workflows:
8
+ - First-order dreaming extracts Moments from Resources
9
+ - Second-order dreaming creates affinity edges between Resources
10
+ - Entity extraction populates related_entities field
11
+ - Graph edges stored in graph_edges (inherited from CoreModel)
12
+
13
+ Key Fields:
14
+ - name: Human-readable resource identifier (used in graph labels)
15
+ - uri: Content location or identifier
16
+ - content: Actual content text
17
+ - timestamp: Content creation/publication time
18
+ - category: Resource classification (document, conversation, artifact, etc.)
19
+ - related_entities: Extracted entities (people, projects, concepts)
20
+ """
21
+
22
+ from datetime import datetime
23
+ from typing import Optional
24
+
25
+ from pydantic import Field, model_validator
26
+
27
+ from ..core import CoreModel
28
+
29
+
30
+ class Resource(CoreModel):
31
+ """
32
+ Base content unit in REM.
33
+
34
+ Resources are content units that feed into dreaming workflows for moment
35
+ extraction and affinity graph construction. Tenant isolation is provided
36
+ via CoreModel.tenant_id field.
37
+ """
38
+
39
+ name: Optional[str] = Field(
40
+ default=None,
41
+ description="Human-readable resource name (used as graph label). Auto-generated from uri+ordinal if not provided.",
42
+ json_schema_extra={"entity_key": True}, # Primary business key for KV lookups
43
+ )
44
+ uri: Optional[str] = Field(
45
+ default=None,
46
+ description="Content URI or identifier (file path, URL, etc.)",
47
+ )
48
+ ordinal: int = Field(
49
+ default=0,
50
+ description="Chunk ordinal for splitting large documents (0 for single-chunk resources)",
51
+ json_schema_extra={"composite_key": True}, # Part of composite unique constraint
52
+ )
53
+ content: str = Field(
54
+ default="",
55
+ description="Resource content text",
56
+ )
57
+ timestamp: datetime = Field(
58
+ default_factory=datetime.utcnow,
59
+ description="Resource timestamp (content creation/publication time)",
60
+ )
61
+ category: Optional[str] = Field(
62
+ default=None,
63
+ description="Resource category (document, conversation, artifact, etc.)",
64
+ )
65
+ related_entities: list[dict] = Field(
66
+ default_factory=list,
67
+ description="Extracted entities (people, projects, concepts) with metadata",
68
+ )
69
+
70
+ @model_validator(mode='after')
71
+ def generate_name_if_missing(self) -> 'Resource':
72
+ """Auto-generate name from uri+ordinal if not provided."""
73
+ if not self.name:
74
+ if self.uri:
75
+ # Extract filename from URI if possible
76
+ uri_parts = self.uri.rstrip('/').split('/')
77
+ filename = uri_parts[-1]
78
+
79
+ # Remove file extension for cleaner names
80
+ if '.' in filename:
81
+ filename = filename.rsplit('.', 1)[0]
82
+
83
+ # Generate name with ordinal
84
+ if self.ordinal > 0:
85
+ self.name = f"{filename}-chunk-{self.ordinal}"
86
+ else:
87
+ self.name = filename
88
+ else:
89
+ # Fallback: use ID or generic name
90
+ if self.id:
91
+ self.name = f"resource-{str(self.id)[:8]}"
92
+ else:
93
+ self.name = "unnamed-resource"
94
+
95
+ return self
@@ -0,0 +1,87 @@
1
+ """
2
+ Schema - Agent schema definitions in REM.
3
+
4
+ Schemas represent agent definitions that can be loaded into Pydantic AI.
5
+ They store JsonSchema specifications that define agent capabilities, tools,
6
+ and output structures.
7
+
8
+ Schemas are used for:
9
+ - Agent definition storage and versioning
10
+ - Dynamic agent loading via X-Agent-Schema header
11
+ - Agent registry and discovery
12
+ - Schema validation and documentation
13
+ - Ontology extraction configuration
14
+
15
+ Key Fields:
16
+ - name: Human-readable schema identifier
17
+ - content: Markdown documentation and instructions
18
+ - spec: JsonSchema specification (Pydantic model definition)
19
+ - category: Schema classification (agent-type, workflow, ontology-extractor, etc.)
20
+ - provider_configs: Optional LLM provider configurations (for multi-provider support)
21
+ - embedding_fields: Fields in extracted_data that should be embedded for semantic search
22
+ """
23
+
24
+ from typing import Optional
25
+
26
+ from pydantic import Field
27
+
28
+ from ..core import CoreModel
29
+
30
+
31
+ class Schema(CoreModel):
32
+ """
33
+ Agent schema definition.
34
+
35
+ Schemas define agents that can be dynamically loaded into Pydantic AI.
36
+ They store JsonSchema specifications with embedded metadata for tools,
37
+ resources, and system prompts.
38
+
39
+ For ontology extraction agents:
40
+ - `provider_configs` enables multi-provider support (test across Anthropic, OpenAI, etc.)
41
+ - `embedding_fields` specifies which output fields should be embedded for semantic search
42
+
43
+ Tenant isolation is provided via CoreModel.tenant_id field.
44
+ """
45
+
46
+ name: str = Field(
47
+ ...,
48
+ description="Human-readable schema name (used as identifier)",
49
+ )
50
+
51
+ content: str = Field(
52
+ default="",
53
+ description="Markdown documentation and instructions for the schema",
54
+ )
55
+
56
+ spec: dict = Field(
57
+ ...,
58
+ description="JsonSchema specification defining the agent structure and capabilities",
59
+ )
60
+
61
+ category: Optional[str] = Field(
62
+ default=None,
63
+ description=(
64
+ "Schema category distinguishing schema types. "
65
+ "Values: 'agent' (AI agents), 'evaluator' (LLM-as-a-Judge evaluators). "
66
+ "Maps directly from json_schema_extra.kind field during ingestion."
67
+ ),
68
+ )
69
+
70
+ # Ontology extraction support
71
+ provider_configs: list[dict] = Field(
72
+ default_factory=list,
73
+ description=(
74
+ "Optional provider configurations for multi-provider testing. "
75
+ "Each dict has 'provider_name' and 'model_name'. "
76
+ "Example: [{'provider_name': 'anthropic', 'model_name': 'claude-sonnet-4-5'}]"
77
+ ),
78
+ )
79
+
80
+ embedding_fields: list[str] = Field(
81
+ default_factory=list,
82
+ description=(
83
+ "JSON paths in extracted_data to embed for semantic search. "
84
+ "Example: ['summary', 'candidate_name', 'skills'] for CV extraction. "
85
+ "Values will be concatenated and embedded using configured embedding provider."
86
+ ),
87
+ )