remdb 0.2.6__py3-none-any.whl → 0.3.118__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (104) hide show
  1. rem/__init__.py +129 -2
  2. rem/agentic/README.md +76 -0
  3. rem/agentic/__init__.py +15 -0
  4. rem/agentic/agents/__init__.py +16 -2
  5. rem/agentic/agents/sse_simulator.py +500 -0
  6. rem/agentic/context.py +28 -22
  7. rem/agentic/llm_provider_models.py +301 -0
  8. rem/agentic/mcp/tool_wrapper.py +29 -3
  9. rem/agentic/otel/setup.py +92 -4
  10. rem/agentic/providers/phoenix.py +32 -43
  11. rem/agentic/providers/pydantic_ai.py +168 -24
  12. rem/agentic/schema.py +358 -21
  13. rem/agentic/tools/rem_tools.py +3 -3
  14. rem/api/README.md +238 -1
  15. rem/api/deps.py +255 -0
  16. rem/api/main.py +154 -37
  17. rem/api/mcp_router/resources.py +1 -1
  18. rem/api/mcp_router/server.py +26 -5
  19. rem/api/mcp_router/tools.py +454 -7
  20. rem/api/middleware/tracking.py +172 -0
  21. rem/api/routers/admin.py +494 -0
  22. rem/api/routers/auth.py +124 -0
  23. rem/api/routers/chat/completions.py +152 -16
  24. rem/api/routers/chat/models.py +7 -3
  25. rem/api/routers/chat/sse_events.py +526 -0
  26. rem/api/routers/chat/streaming.py +608 -45
  27. rem/api/routers/dev.py +81 -0
  28. rem/api/routers/feedback.py +148 -0
  29. rem/api/routers/messages.py +473 -0
  30. rem/api/routers/models.py +78 -0
  31. rem/api/routers/query.py +360 -0
  32. rem/api/routers/shared_sessions.py +406 -0
  33. rem/auth/middleware.py +126 -27
  34. rem/cli/commands/README.md +237 -64
  35. rem/cli/commands/ask.py +15 -11
  36. rem/cli/commands/cluster.py +1300 -0
  37. rem/cli/commands/configure.py +170 -97
  38. rem/cli/commands/db.py +396 -139
  39. rem/cli/commands/experiments.py +278 -96
  40. rem/cli/commands/process.py +22 -15
  41. rem/cli/commands/scaffold.py +47 -0
  42. rem/cli/commands/schema.py +97 -50
  43. rem/cli/main.py +37 -6
  44. rem/config.py +2 -2
  45. rem/models/core/core_model.py +7 -1
  46. rem/models/core/rem_query.py +5 -2
  47. rem/models/entities/__init__.py +21 -0
  48. rem/models/entities/domain_resource.py +38 -0
  49. rem/models/entities/feedback.py +123 -0
  50. rem/models/entities/message.py +30 -1
  51. rem/models/entities/session.py +83 -0
  52. rem/models/entities/shared_session.py +180 -0
  53. rem/models/entities/user.py +10 -3
  54. rem/registry.py +373 -0
  55. rem/schemas/agents/rem.yaml +7 -3
  56. rem/services/content/providers.py +94 -140
  57. rem/services/content/service.py +115 -24
  58. rem/services/dreaming/affinity_service.py +2 -16
  59. rem/services/dreaming/moment_service.py +2 -15
  60. rem/services/embeddings/api.py +24 -17
  61. rem/services/embeddings/worker.py +16 -16
  62. rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
  63. rem/services/phoenix/client.py +252 -19
  64. rem/services/postgres/README.md +159 -15
  65. rem/services/postgres/__init__.py +2 -1
  66. rem/services/postgres/diff_service.py +531 -0
  67. rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
  68. rem/services/postgres/repository.py +132 -0
  69. rem/services/postgres/schema_generator.py +291 -9
  70. rem/services/postgres/service.py +6 -6
  71. rem/services/rate_limit.py +113 -0
  72. rem/services/rem/README.md +14 -0
  73. rem/services/rem/parser.py +44 -9
  74. rem/services/rem/service.py +36 -2
  75. rem/services/session/compression.py +17 -1
  76. rem/services/session/reload.py +1 -1
  77. rem/services/user_service.py +98 -0
  78. rem/settings.py +169 -22
  79. rem/sql/background_indexes.sql +21 -16
  80. rem/sql/migrations/001_install.sql +387 -54
  81. rem/sql/migrations/002_install_models.sql +2320 -393
  82. rem/sql/migrations/003_optional_extensions.sql +326 -0
  83. rem/sql/migrations/004_cache_system.sql +548 -0
  84. rem/utils/__init__.py +18 -0
  85. rem/utils/constants.py +97 -0
  86. rem/utils/date_utils.py +228 -0
  87. rem/utils/embeddings.py +17 -4
  88. rem/utils/files.py +167 -0
  89. rem/utils/mime_types.py +158 -0
  90. rem/utils/model_helpers.py +156 -1
  91. rem/utils/schema_loader.py +284 -21
  92. rem/utils/sql_paths.py +146 -0
  93. rem/utils/sql_types.py +3 -1
  94. rem/utils/vision.py +9 -14
  95. rem/workers/README.md +14 -14
  96. rem/workers/__init__.py +2 -1
  97. rem/workers/db_maintainer.py +74 -0
  98. rem/workers/unlogged_maintainer.py +463 -0
  99. {remdb-0.2.6.dist-info → remdb-0.3.118.dist-info}/METADATA +598 -171
  100. {remdb-0.2.6.dist-info → remdb-0.3.118.dist-info}/RECORD +102 -73
  101. {remdb-0.2.6.dist-info → remdb-0.3.118.dist-info}/WHEEL +1 -1
  102. rem/sql/002_install_models.sql +0 -1068
  103. rem/sql/install_models.sql +0 -1038
  104. {remdb-0.2.6.dist-info → remdb-0.3.118.dist-info}/entry_points.txt +0 -0
rem/agentic/schema.py CHANGED
@@ -13,7 +13,7 @@ The schema protocol serves as:
13
13
  """
14
14
 
15
15
  from typing import Any, Literal
16
- from pydantic import BaseModel, Field
16
+ from pydantic import BaseModel, Field, field_validator
17
17
 
18
18
 
19
19
  class MCPToolReference(BaseModel):
@@ -23,11 +23,21 @@ class MCPToolReference(BaseModel):
23
23
  Tools are functions that agents can call during execution to
24
24
  interact with external systems, retrieve data, or perform actions.
25
25
 
26
- Example:
26
+ Two usage patterns:
27
+ 1. With mcp_servers config: Just declare name + description, tools loaded from MCP servers
28
+ 2. Explicit MCP server: Specify mcp_server to load tool from specific server
29
+
30
+ Example (declarative with mcp_servers):
31
+ {
32
+ "name": "search_rem",
33
+ "description": "Execute REM queries for entity lookup and search"
34
+ }
35
+
36
+ Example (explicit server):
27
37
  {
28
38
  "name": "lookup_entity",
29
39
  "mcp_server": "rem",
30
- "description": "Lookup entities by exact key with O(1) performance"
40
+ "description": "Lookup entities by exact key"
31
41
  }
32
42
  """
33
43
 
@@ -38,20 +48,20 @@ class MCPToolReference(BaseModel):
38
48
  )
39
49
  )
40
50
 
41
- mcp_server: str = Field(
51
+ mcp_server: str | None = Field(
52
+ default=None,
42
53
  description=(
43
- "MCP server identifier. Resolved via environment variable: "
44
- "MCP_SERVER_{NAME} or MCP__{NAME}__URL. "
45
- "Common values: 'rem' (REM knowledge graph), 'filesystem', 'web'."
54
+ "MCP server identifier (optional when using mcp_servers config). "
55
+ "If not specified, tool is expected from configured mcp_servers. "
56
+ "Resolved via environment variable: MCP_SERVER_{NAME} or MCP__{NAME}__URL."
46
57
  )
47
58
  )
48
59
 
49
60
  description: str | None = Field(
50
61
  default=None,
51
62
  description=(
52
- "Optional description override. If provided, replaces the tool's "
53
- "description from the MCP server in the agent's context. "
54
- "Use this to provide agent-specific guidance on tool usage."
63
+ "Tool description for the agent. Explains what the tool does "
64
+ "and when to use it. This is visible to the LLM."
55
65
  ),
56
66
  )
57
67
 
@@ -63,29 +73,90 @@ class MCPResourceReference(BaseModel):
63
73
  Resources are data sources that can be read by agents, such as
64
74
  knowledge graph entities, files, or API endpoints.
65
75
 
66
- Example:
76
+ Two formats supported:
77
+ 1. uri: Exact URI or URI with query params
78
+ 2. uri_pattern: Regex pattern for flexible matching
79
+
80
+ Example (exact URI):
81
+ {
82
+ "uri": "rem://schemas",
83
+ "name": "Agent Schemas",
84
+ "description": "List all available agent schemas"
85
+ }
86
+
87
+ Example (pattern):
67
88
  {
68
89
  "uri_pattern": "rem://resources/.*",
69
90
  "mcp_server": "rem"
70
91
  }
71
92
  """
72
93
 
73
- uri_pattern: str = Field(
94
+ # Support both exact URI and pattern
95
+ uri: str | None = Field(
96
+ default=None,
97
+ description=(
98
+ "Exact resource URI or URI with query parameters. "
99
+ "Examples: 'rem://schemas', 'rem://resources?category=drug.*'"
100
+ )
101
+ )
102
+
103
+ uri_pattern: str | None = Field(
104
+ default=None,
74
105
  description=(
75
106
  "Regex pattern matching resource URIs. "
76
- "Examples: "
77
- "'rem://resources/.*' (all resources), "
78
- "'rem://moments/.*' (all moments), "
79
- "'file:///data/.*' (local files). "
80
- "Supports full regex syntax for flexible matching."
107
+ "Examples: 'rem://resources/.*' (all resources). "
108
+ "Use uri for exact URIs, uri_pattern for regex matching."
109
+ )
110
+ )
111
+
112
+ name: str | None = Field(
113
+ default=None,
114
+ description="Human-readable name for the resource."
115
+ )
116
+
117
+ description: str | None = Field(
118
+ default=None,
119
+ description="Description of what the resource provides."
120
+ )
121
+
122
+ mcp_server: str | None = Field(
123
+ default=None,
124
+ description=(
125
+ "MCP server identifier (optional when using mcp_servers config). "
126
+ "Resolved via environment variable MCP_SERVER_{NAME}."
127
+ )
128
+ )
129
+
130
+
131
+ class MCPServerConfig(BaseModel):
132
+ """
133
+ MCP server configuration for in-process tool loading.
134
+
135
+ Example:
136
+ {
137
+ "type": "local",
138
+ "module": "rem.mcp_server",
139
+ "id": "rem-local"
140
+ }
141
+ """
142
+
143
+ type: Literal["local"] = Field(
144
+ default="local",
145
+ description="Server type. Currently only 'local' (in-process) is supported.",
146
+ )
147
+
148
+ module: str = Field(
149
+ description=(
150
+ "Python module path containing the MCP server. "
151
+ "The module must export an 'mcp' object that supports get_tools(). "
152
+ "Example: 'rem.mcp_server'"
81
153
  )
82
154
  )
83
155
 
84
- mcp_server: str = Field(
156
+ id: str = Field(
85
157
  description=(
86
- "MCP server identifier that provides these resources. "
87
- "Resolved via environment variable MCP_SERVER_{NAME}. "
88
- "The server must expose resources matching the uri_pattern."
158
+ "Server identifier for logging and debugging. "
159
+ "Example: 'rem-local'"
89
160
  )
90
161
  )
91
162
 
@@ -130,6 +201,37 @@ class AgentSchemaMetadata(BaseModel):
130
201
  ),
131
202
  )
132
203
 
204
+ # System prompt override (takes precedence over description when present)
205
+ system_prompt: str | None = Field(
206
+ default=None,
207
+ description=(
208
+ "Custom system prompt that overrides or extends the schema description. "
209
+ "When present, this is combined with the main schema.description field "
210
+ "to form the complete system prompt. Use this for detailed instructions "
211
+ "that you don't want in the public schema description."
212
+ ),
213
+ )
214
+
215
+ # Structured output toggle
216
+ structured_output: bool = Field(
217
+ default=True,
218
+ description=(
219
+ "Whether to enforce structured JSON output. "
220
+ "When False, the agent produces free-form text and schema properties "
221
+ "are converted to prompt guidance instead. Default: True (JSON output)."
222
+ ),
223
+ )
224
+
225
+ # MCP server configurations (for dynamic tool loading)
226
+ mcp_servers: list[MCPServerConfig] = Field(
227
+ default_factory=list,
228
+ description=(
229
+ "MCP server configurations for dynamic tool loading. "
230
+ "Servers are loaded in-process at agent creation time. "
231
+ "All tools from configured servers become available to the agent."
232
+ ),
233
+ )
234
+
133
235
  tools: list[MCPToolReference] = Field(
134
236
  default_factory=list,
135
237
  description=(
@@ -394,3 +496,238 @@ def create_agent_schema(
394
496
  json_schema_extra=metadata.model_dump(),
395
497
  **kwargs,
396
498
  )
499
+
500
+
501
+ # =============================================================================
502
+ # YAML and Database Serialization
503
+ # =============================================================================
504
+
505
+
506
+ def schema_to_dict(schema: AgentSchema, exclude_none: bool = True) -> dict[str, Any]:
507
+ """
508
+ Serialize AgentSchema to a dictionary suitable for YAML or database storage.
509
+
510
+ This produces the canonical format used in:
511
+ - YAML files (schemas/agents/*.yaml)
512
+ - Database spec column (schemas table)
513
+ - API responses
514
+
515
+ Args:
516
+ schema: AgentSchema instance to serialize
517
+ exclude_none: If True, omit None values from output
518
+
519
+ Returns:
520
+ Dictionary representation of the schema
521
+
522
+ Example:
523
+ >>> schema = AgentSchema(
524
+ ... description="System prompt...",
525
+ ... properties={"answer": {"type": "string"}},
526
+ ... json_schema_extra={"name": "my-agent", "structured_output": False}
527
+ ... )
528
+ >>> d = schema_to_dict(schema)
529
+ >>> d["json_schema_extra"]["name"]
530
+ "my-agent"
531
+ """
532
+ return schema.model_dump(exclude_none=exclude_none)
533
+
534
+
535
+ def schema_from_dict(data: dict[str, Any]) -> AgentSchema:
536
+ """
537
+ Deserialize a dictionary to AgentSchema.
538
+
539
+ This handles:
540
+ - YAML files loaded with yaml.safe_load()
541
+ - Database spec column (JSON)
542
+ - API request bodies
543
+
544
+ Args:
545
+ data: Dictionary containing schema data
546
+
547
+ Returns:
548
+ Validated AgentSchema instance
549
+
550
+ Raises:
551
+ ValidationError: If data doesn't match schema structure
552
+
553
+ Example:
554
+ >>> data = {"type": "object", "description": "...", "properties": {}, "json_schema_extra": {"name": "test"}}
555
+ >>> schema = schema_from_dict(data)
556
+ >>> schema.json_schema_extra["name"]
557
+ "test"
558
+ """
559
+ return AgentSchema.model_validate(data)
560
+
561
+
562
+ def schema_to_yaml(schema: AgentSchema) -> str:
563
+ """
564
+ Serialize AgentSchema to YAML string.
565
+
566
+ The output format matches the canonical schema file format:
567
+ ```yaml
568
+ type: object
569
+ description: |
570
+ System prompt here...
571
+ properties:
572
+ answer:
573
+ type: string
574
+ json_schema_extra:
575
+ name: my-agent
576
+ system_prompt: |
577
+ Extended prompt here...
578
+ ```
579
+
580
+ Args:
581
+ schema: AgentSchema instance to serialize
582
+
583
+ Returns:
584
+ YAML string representation
585
+
586
+ Example:
587
+ >>> schema = create_agent_schema(
588
+ ... description="You are a test agent",
589
+ ... properties={"answer": {"type": "string"}},
590
+ ... required=["answer"],
591
+ ... name="test-agent"
592
+ ... )
593
+ >>> yaml_str = schema_to_yaml(schema)
594
+ >>> "test-agent" in yaml_str
595
+ True
596
+ """
597
+ import yaml
598
+
599
+ return yaml.dump(
600
+ schema_to_dict(schema),
601
+ default_flow_style=False,
602
+ allow_unicode=True,
603
+ sort_keys=False,
604
+ )
605
+
606
+
607
+ def schema_from_yaml(yaml_content: str) -> AgentSchema:
608
+ """
609
+ Deserialize YAML string to AgentSchema.
610
+
611
+ Args:
612
+ yaml_content: YAML string containing schema definition
613
+
614
+ Returns:
615
+ Validated AgentSchema instance
616
+
617
+ Raises:
618
+ yaml.YAMLError: If YAML parsing fails
619
+ ValidationError: If schema structure is invalid
620
+
621
+ Example:
622
+ >>> yaml_str = '''
623
+ ... type: object
624
+ ... description: Test agent
625
+ ... properties:
626
+ ... answer:
627
+ ... type: string
628
+ ... json_schema_extra:
629
+ ... name: test
630
+ ... '''
631
+ >>> schema = schema_from_yaml(yaml_str)
632
+ >>> schema.json_schema_extra["name"]
633
+ "test"
634
+ """
635
+ import yaml
636
+
637
+ data = yaml.safe_load(yaml_content)
638
+ return schema_from_dict(data)
639
+
640
+
641
+ def schema_from_yaml_file(file_path: str) -> AgentSchema:
642
+ """
643
+ Load AgentSchema from a YAML file.
644
+
645
+ Args:
646
+ file_path: Path to YAML file
647
+
648
+ Returns:
649
+ Validated AgentSchema instance
650
+
651
+ Raises:
652
+ FileNotFoundError: If file doesn't exist
653
+ yaml.YAMLError: If YAML parsing fails
654
+ ValidationError: If schema structure is invalid
655
+
656
+ Example:
657
+ >>> schema = schema_from_yaml_file("schemas/agents/rem.yaml")
658
+ >>> schema.json_schema_extra["name"]
659
+ "rem"
660
+ """
661
+ with open(file_path, "r") as f:
662
+ return schema_from_yaml(f.read())
663
+
664
+
665
+ def get_system_prompt(schema: AgentSchema | dict[str, Any]) -> str:
666
+ """
667
+ Extract the complete system prompt from a schema.
668
+
669
+ Combines:
670
+ 1. schema.description (base system prompt / public description)
671
+ 2. json_schema_extra.system_prompt (extended instructions if present)
672
+
673
+ Args:
674
+ schema: AgentSchema instance or raw dict
675
+
676
+ Returns:
677
+ Complete system prompt string
678
+
679
+ Example:
680
+ >>> schema = AgentSchema(
681
+ ... description="Base description",
682
+ ... properties={},
683
+ ... json_schema_extra={"name": "test", "system_prompt": "Extended instructions"}
684
+ ... )
685
+ >>> prompt = get_system_prompt(schema)
686
+ >>> "Base description" in prompt and "Extended instructions" in prompt
687
+ True
688
+ """
689
+ if isinstance(schema, dict):
690
+ base = schema.get("description", "")
691
+ extra = schema.get("json_schema_extra", {})
692
+ custom = extra.get("system_prompt") if isinstance(extra, dict) else None
693
+ else:
694
+ base = schema.description
695
+ extra = schema.json_schema_extra
696
+ if isinstance(extra, dict):
697
+ custom = extra.get("system_prompt")
698
+ elif isinstance(extra, AgentSchemaMetadata):
699
+ custom = extra.system_prompt
700
+ else:
701
+ custom = None
702
+
703
+ if custom:
704
+ return f"{base}\n\n{custom}" if base else custom
705
+ return base
706
+
707
+
708
+ def get_metadata(schema: AgentSchema | dict[str, Any]) -> AgentSchemaMetadata:
709
+ """
710
+ Extract and validate metadata from a schema.
711
+
712
+ Args:
713
+ schema: AgentSchema instance or raw dict
714
+
715
+ Returns:
716
+ Validated AgentSchemaMetadata instance
717
+
718
+ Example:
719
+ >>> schema = {"json_schema_extra": {"name": "test", "system_prompt": "hello"}}
720
+ >>> meta = get_metadata(schema)
721
+ >>> meta.name
722
+ "test"
723
+ >>> meta.system_prompt
724
+ "hello"
725
+ """
726
+ if isinstance(schema, dict):
727
+ extra = schema.get("json_schema_extra", {})
728
+ else:
729
+ extra = schema.json_schema_extra
730
+
731
+ if isinstance(extra, AgentSchemaMetadata):
732
+ return extra
733
+ return AgentSchemaMetadata.model_validate(extra)
@@ -162,10 +162,10 @@ async def search_rem_tool(
162
162
  return {"status": "error", "error": f"Unknown query_type: {query_type}"}
163
163
 
164
164
  # Execute query
165
- logger.info(f"Executing REM query: {query_type} for user {user_id}")
165
+ logger.debug(f"Executing REM query: {query_type} for user {user_id}")
166
166
  result = await rem_service.execute_query(query)
167
167
 
168
- logger.info(f"Query completed: {query_type}")
168
+ logger.debug(f"Query completed: {query_type}")
169
169
  return {
170
170
  "status": "success",
171
171
  "query_type": query_type,
@@ -212,7 +212,7 @@ async def ingest_file_tool(
212
212
  is_local_server=is_local_server,
213
213
  )
214
214
 
215
- logger.info(
215
+ logger.debug(
216
216
  f"File ingestion complete: {result['file_name']} "
217
217
  f"(status: {result['processing_status']}, "
218
218
  f"resources: {result['resources_created']})"