remdb 0.3.103__py3-none-any.whl → 0.3.118__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/agentic/context.py +28 -24
- rem/agentic/mcp/tool_wrapper.py +29 -3
- rem/agentic/otel/setup.py +92 -4
- rem/agentic/providers/pydantic_ai.py +88 -18
- rem/agentic/schema.py +358 -21
- rem/agentic/tools/rem_tools.py +3 -3
- rem/api/main.py +85 -16
- rem/api/mcp_router/resources.py +1 -1
- rem/api/mcp_router/server.py +18 -4
- rem/api/mcp_router/tools.py +383 -16
- rem/api/routers/admin.py +218 -1
- rem/api/routers/chat/completions.py +30 -3
- rem/api/routers/chat/streaming.py +143 -3
- rem/api/routers/feedback.py +12 -319
- rem/api/routers/query.py +360 -0
- rem/api/routers/shared_sessions.py +13 -13
- rem/cli/commands/README.md +237 -64
- rem/cli/commands/cluster.py +1300 -0
- rem/cli/commands/configure.py +1 -3
- rem/cli/commands/db.py +354 -143
- rem/cli/commands/process.py +14 -8
- rem/cli/commands/schema.py +92 -45
- rem/cli/main.py +27 -6
- rem/models/core/rem_query.py +5 -2
- rem/models/entities/shared_session.py +2 -28
- rem/registry.py +10 -4
- rem/services/content/service.py +30 -8
- rem/services/embeddings/api.py +4 -4
- rem/services/embeddings/worker.py +16 -16
- rem/services/postgres/README.md +151 -26
- rem/services/postgres/__init__.py +2 -1
- rem/services/postgres/diff_service.py +531 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
- rem/services/postgres/schema_generator.py +205 -4
- rem/services/postgres/service.py +6 -6
- rem/services/rem/parser.py +44 -9
- rem/services/rem/service.py +36 -2
- rem/services/session/reload.py +1 -1
- rem/settings.py +56 -7
- rem/sql/background_indexes.sql +19 -24
- rem/sql/migrations/001_install.sql +252 -69
- rem/sql/migrations/002_install_models.sql +2171 -593
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +548 -0
- rem/utils/__init__.py +18 -0
- rem/utils/date_utils.py +2 -2
- rem/utils/schema_loader.py +17 -13
- rem/utils/sql_paths.py +146 -0
- rem/workers/__init__.py +2 -1
- rem/workers/unlogged_maintainer.py +463 -0
- {remdb-0.3.103.dist-info → remdb-0.3.118.dist-info}/METADATA +149 -76
- {remdb-0.3.103.dist-info → remdb-0.3.118.dist-info}/RECORD +54 -48
- rem/sql/migrations/003_seed_default_user.sql +0 -48
- {remdb-0.3.103.dist-info → remdb-0.3.118.dist-info}/WHEEL +0 -0
- {remdb-0.3.103.dist-info → remdb-0.3.118.dist-info}/entry_points.txt +0 -0
rem/agentic/schema.py
CHANGED
|
@@ -13,7 +13,7 @@ The schema protocol serves as:
|
|
|
13
13
|
"""
|
|
14
14
|
|
|
15
15
|
from typing import Any, Literal
|
|
16
|
-
from pydantic import BaseModel, Field
|
|
16
|
+
from pydantic import BaseModel, Field, field_validator
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
class MCPToolReference(BaseModel):
|
|
@@ -23,11 +23,21 @@ class MCPToolReference(BaseModel):
|
|
|
23
23
|
Tools are functions that agents can call during execution to
|
|
24
24
|
interact with external systems, retrieve data, or perform actions.
|
|
25
25
|
|
|
26
|
-
|
|
26
|
+
Two usage patterns:
|
|
27
|
+
1. With mcp_servers config: Just declare name + description, tools loaded from MCP servers
|
|
28
|
+
2. Explicit MCP server: Specify mcp_server to load tool from specific server
|
|
29
|
+
|
|
30
|
+
Example (declarative with mcp_servers):
|
|
31
|
+
{
|
|
32
|
+
"name": "search_rem",
|
|
33
|
+
"description": "Execute REM queries for entity lookup and search"
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
Example (explicit server):
|
|
27
37
|
{
|
|
28
38
|
"name": "lookup_entity",
|
|
29
39
|
"mcp_server": "rem",
|
|
30
|
-
"description": "Lookup entities by exact key
|
|
40
|
+
"description": "Lookup entities by exact key"
|
|
31
41
|
}
|
|
32
42
|
"""
|
|
33
43
|
|
|
@@ -38,20 +48,20 @@ class MCPToolReference(BaseModel):
|
|
|
38
48
|
)
|
|
39
49
|
)
|
|
40
50
|
|
|
41
|
-
mcp_server: str = Field(
|
|
51
|
+
mcp_server: str | None = Field(
|
|
52
|
+
default=None,
|
|
42
53
|
description=(
|
|
43
|
-
"MCP server identifier
|
|
44
|
-
"
|
|
45
|
-
"
|
|
54
|
+
"MCP server identifier (optional when using mcp_servers config). "
|
|
55
|
+
"If not specified, tool is expected from configured mcp_servers. "
|
|
56
|
+
"Resolved via environment variable: MCP_SERVER_{NAME} or MCP__{NAME}__URL."
|
|
46
57
|
)
|
|
47
58
|
)
|
|
48
59
|
|
|
49
60
|
description: str | None = Field(
|
|
50
61
|
default=None,
|
|
51
62
|
description=(
|
|
52
|
-
"
|
|
53
|
-
"
|
|
54
|
-
"Use this to provide agent-specific guidance on tool usage."
|
|
63
|
+
"Tool description for the agent. Explains what the tool does "
|
|
64
|
+
"and when to use it. This is visible to the LLM."
|
|
55
65
|
),
|
|
56
66
|
)
|
|
57
67
|
|
|
@@ -63,29 +73,90 @@ class MCPResourceReference(BaseModel):
|
|
|
63
73
|
Resources are data sources that can be read by agents, such as
|
|
64
74
|
knowledge graph entities, files, or API endpoints.
|
|
65
75
|
|
|
66
|
-
|
|
76
|
+
Two formats supported:
|
|
77
|
+
1. uri: Exact URI or URI with query params
|
|
78
|
+
2. uri_pattern: Regex pattern for flexible matching
|
|
79
|
+
|
|
80
|
+
Example (exact URI):
|
|
81
|
+
{
|
|
82
|
+
"uri": "rem://schemas",
|
|
83
|
+
"name": "Agent Schemas",
|
|
84
|
+
"description": "List all available agent schemas"
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
Example (pattern):
|
|
67
88
|
{
|
|
68
89
|
"uri_pattern": "rem://resources/.*",
|
|
69
90
|
"mcp_server": "rem"
|
|
70
91
|
}
|
|
71
92
|
"""
|
|
72
93
|
|
|
73
|
-
|
|
94
|
+
# Support both exact URI and pattern
|
|
95
|
+
uri: str | None = Field(
|
|
96
|
+
default=None,
|
|
97
|
+
description=(
|
|
98
|
+
"Exact resource URI or URI with query parameters. "
|
|
99
|
+
"Examples: 'rem://schemas', 'rem://resources?category=drug.*'"
|
|
100
|
+
)
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
uri_pattern: str | None = Field(
|
|
104
|
+
default=None,
|
|
74
105
|
description=(
|
|
75
106
|
"Regex pattern matching resource URIs. "
|
|
76
|
-
"Examples: "
|
|
77
|
-
"
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
107
|
+
"Examples: 'rem://resources/.*' (all resources). "
|
|
108
|
+
"Use uri for exact URIs, uri_pattern for regex matching."
|
|
109
|
+
)
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
name: str | None = Field(
|
|
113
|
+
default=None,
|
|
114
|
+
description="Human-readable name for the resource."
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
description: str | None = Field(
|
|
118
|
+
default=None,
|
|
119
|
+
description="Description of what the resource provides."
|
|
120
|
+
)
|
|
121
|
+
|
|
122
|
+
mcp_server: str | None = Field(
|
|
123
|
+
default=None,
|
|
124
|
+
description=(
|
|
125
|
+
"MCP server identifier (optional when using mcp_servers config). "
|
|
126
|
+
"Resolved via environment variable MCP_SERVER_{NAME}."
|
|
127
|
+
)
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
class MCPServerConfig(BaseModel):
|
|
132
|
+
"""
|
|
133
|
+
MCP server configuration for in-process tool loading.
|
|
134
|
+
|
|
135
|
+
Example:
|
|
136
|
+
{
|
|
137
|
+
"type": "local",
|
|
138
|
+
"module": "rem.mcp_server",
|
|
139
|
+
"id": "rem-local"
|
|
140
|
+
}
|
|
141
|
+
"""
|
|
142
|
+
|
|
143
|
+
type: Literal["local"] = Field(
|
|
144
|
+
default="local",
|
|
145
|
+
description="Server type. Currently only 'local' (in-process) is supported.",
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
module: str = Field(
|
|
149
|
+
description=(
|
|
150
|
+
"Python module path containing the MCP server. "
|
|
151
|
+
"The module must export an 'mcp' object that supports get_tools(). "
|
|
152
|
+
"Example: 'rem.mcp_server'"
|
|
81
153
|
)
|
|
82
154
|
)
|
|
83
155
|
|
|
84
|
-
|
|
156
|
+
id: str = Field(
|
|
85
157
|
description=(
|
|
86
|
-
"
|
|
87
|
-
"
|
|
88
|
-
"The server must expose resources matching the uri_pattern."
|
|
158
|
+
"Server identifier for logging and debugging. "
|
|
159
|
+
"Example: 'rem-local'"
|
|
89
160
|
)
|
|
90
161
|
)
|
|
91
162
|
|
|
@@ -130,6 +201,37 @@ class AgentSchemaMetadata(BaseModel):
|
|
|
130
201
|
),
|
|
131
202
|
)
|
|
132
203
|
|
|
204
|
+
# System prompt override (takes precedence over description when present)
|
|
205
|
+
system_prompt: str | None = Field(
|
|
206
|
+
default=None,
|
|
207
|
+
description=(
|
|
208
|
+
"Custom system prompt that overrides or extends the schema description. "
|
|
209
|
+
"When present, this is combined with the main schema.description field "
|
|
210
|
+
"to form the complete system prompt. Use this for detailed instructions "
|
|
211
|
+
"that you don't want in the public schema description."
|
|
212
|
+
),
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
# Structured output toggle
|
|
216
|
+
structured_output: bool = Field(
|
|
217
|
+
default=True,
|
|
218
|
+
description=(
|
|
219
|
+
"Whether to enforce structured JSON output. "
|
|
220
|
+
"When False, the agent produces free-form text and schema properties "
|
|
221
|
+
"are converted to prompt guidance instead. Default: True (JSON output)."
|
|
222
|
+
),
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
# MCP server configurations (for dynamic tool loading)
|
|
226
|
+
mcp_servers: list[MCPServerConfig] = Field(
|
|
227
|
+
default_factory=list,
|
|
228
|
+
description=(
|
|
229
|
+
"MCP server configurations for dynamic tool loading. "
|
|
230
|
+
"Servers are loaded in-process at agent creation time. "
|
|
231
|
+
"All tools from configured servers become available to the agent."
|
|
232
|
+
),
|
|
233
|
+
)
|
|
234
|
+
|
|
133
235
|
tools: list[MCPToolReference] = Field(
|
|
134
236
|
default_factory=list,
|
|
135
237
|
description=(
|
|
@@ -394,3 +496,238 @@ def create_agent_schema(
|
|
|
394
496
|
json_schema_extra=metadata.model_dump(),
|
|
395
497
|
**kwargs,
|
|
396
498
|
)
|
|
499
|
+
|
|
500
|
+
|
|
501
|
+
# =============================================================================
|
|
502
|
+
# YAML and Database Serialization
|
|
503
|
+
# =============================================================================
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
def schema_to_dict(schema: AgentSchema, exclude_none: bool = True) -> dict[str, Any]:
|
|
507
|
+
"""
|
|
508
|
+
Serialize AgentSchema to a dictionary suitable for YAML or database storage.
|
|
509
|
+
|
|
510
|
+
This produces the canonical format used in:
|
|
511
|
+
- YAML files (schemas/agents/*.yaml)
|
|
512
|
+
- Database spec column (schemas table)
|
|
513
|
+
- API responses
|
|
514
|
+
|
|
515
|
+
Args:
|
|
516
|
+
schema: AgentSchema instance to serialize
|
|
517
|
+
exclude_none: If True, omit None values from output
|
|
518
|
+
|
|
519
|
+
Returns:
|
|
520
|
+
Dictionary representation of the schema
|
|
521
|
+
|
|
522
|
+
Example:
|
|
523
|
+
>>> schema = AgentSchema(
|
|
524
|
+
... description="System prompt...",
|
|
525
|
+
... properties={"answer": {"type": "string"}},
|
|
526
|
+
... json_schema_extra={"name": "my-agent", "structured_output": False}
|
|
527
|
+
... )
|
|
528
|
+
>>> d = schema_to_dict(schema)
|
|
529
|
+
>>> d["json_schema_extra"]["name"]
|
|
530
|
+
"my-agent"
|
|
531
|
+
"""
|
|
532
|
+
return schema.model_dump(exclude_none=exclude_none)
|
|
533
|
+
|
|
534
|
+
|
|
535
|
+
def schema_from_dict(data: dict[str, Any]) -> AgentSchema:
|
|
536
|
+
"""
|
|
537
|
+
Deserialize a dictionary to AgentSchema.
|
|
538
|
+
|
|
539
|
+
This handles:
|
|
540
|
+
- YAML files loaded with yaml.safe_load()
|
|
541
|
+
- Database spec column (JSON)
|
|
542
|
+
- API request bodies
|
|
543
|
+
|
|
544
|
+
Args:
|
|
545
|
+
data: Dictionary containing schema data
|
|
546
|
+
|
|
547
|
+
Returns:
|
|
548
|
+
Validated AgentSchema instance
|
|
549
|
+
|
|
550
|
+
Raises:
|
|
551
|
+
ValidationError: If data doesn't match schema structure
|
|
552
|
+
|
|
553
|
+
Example:
|
|
554
|
+
>>> data = {"type": "object", "description": "...", "properties": {}, "json_schema_extra": {"name": "test"}}
|
|
555
|
+
>>> schema = schema_from_dict(data)
|
|
556
|
+
>>> schema.json_schema_extra["name"]
|
|
557
|
+
"test"
|
|
558
|
+
"""
|
|
559
|
+
return AgentSchema.model_validate(data)
|
|
560
|
+
|
|
561
|
+
|
|
562
|
+
def schema_to_yaml(schema: AgentSchema) -> str:
|
|
563
|
+
"""
|
|
564
|
+
Serialize AgentSchema to YAML string.
|
|
565
|
+
|
|
566
|
+
The output format matches the canonical schema file format:
|
|
567
|
+
```yaml
|
|
568
|
+
type: object
|
|
569
|
+
description: |
|
|
570
|
+
System prompt here...
|
|
571
|
+
properties:
|
|
572
|
+
answer:
|
|
573
|
+
type: string
|
|
574
|
+
json_schema_extra:
|
|
575
|
+
name: my-agent
|
|
576
|
+
system_prompt: |
|
|
577
|
+
Extended prompt here...
|
|
578
|
+
```
|
|
579
|
+
|
|
580
|
+
Args:
|
|
581
|
+
schema: AgentSchema instance to serialize
|
|
582
|
+
|
|
583
|
+
Returns:
|
|
584
|
+
YAML string representation
|
|
585
|
+
|
|
586
|
+
Example:
|
|
587
|
+
>>> schema = create_agent_schema(
|
|
588
|
+
... description="You are a test agent",
|
|
589
|
+
... properties={"answer": {"type": "string"}},
|
|
590
|
+
... required=["answer"],
|
|
591
|
+
... name="test-agent"
|
|
592
|
+
... )
|
|
593
|
+
>>> yaml_str = schema_to_yaml(schema)
|
|
594
|
+
>>> "test-agent" in yaml_str
|
|
595
|
+
True
|
|
596
|
+
"""
|
|
597
|
+
import yaml
|
|
598
|
+
|
|
599
|
+
return yaml.dump(
|
|
600
|
+
schema_to_dict(schema),
|
|
601
|
+
default_flow_style=False,
|
|
602
|
+
allow_unicode=True,
|
|
603
|
+
sort_keys=False,
|
|
604
|
+
)
|
|
605
|
+
|
|
606
|
+
|
|
607
|
+
def schema_from_yaml(yaml_content: str) -> AgentSchema:
|
|
608
|
+
"""
|
|
609
|
+
Deserialize YAML string to AgentSchema.
|
|
610
|
+
|
|
611
|
+
Args:
|
|
612
|
+
yaml_content: YAML string containing schema definition
|
|
613
|
+
|
|
614
|
+
Returns:
|
|
615
|
+
Validated AgentSchema instance
|
|
616
|
+
|
|
617
|
+
Raises:
|
|
618
|
+
yaml.YAMLError: If YAML parsing fails
|
|
619
|
+
ValidationError: If schema structure is invalid
|
|
620
|
+
|
|
621
|
+
Example:
|
|
622
|
+
>>> yaml_str = '''
|
|
623
|
+
... type: object
|
|
624
|
+
... description: Test agent
|
|
625
|
+
... properties:
|
|
626
|
+
... answer:
|
|
627
|
+
... type: string
|
|
628
|
+
... json_schema_extra:
|
|
629
|
+
... name: test
|
|
630
|
+
... '''
|
|
631
|
+
>>> schema = schema_from_yaml(yaml_str)
|
|
632
|
+
>>> schema.json_schema_extra["name"]
|
|
633
|
+
"test"
|
|
634
|
+
"""
|
|
635
|
+
import yaml
|
|
636
|
+
|
|
637
|
+
data = yaml.safe_load(yaml_content)
|
|
638
|
+
return schema_from_dict(data)
|
|
639
|
+
|
|
640
|
+
|
|
641
|
+
def schema_from_yaml_file(file_path: str) -> AgentSchema:
|
|
642
|
+
"""
|
|
643
|
+
Load AgentSchema from a YAML file.
|
|
644
|
+
|
|
645
|
+
Args:
|
|
646
|
+
file_path: Path to YAML file
|
|
647
|
+
|
|
648
|
+
Returns:
|
|
649
|
+
Validated AgentSchema instance
|
|
650
|
+
|
|
651
|
+
Raises:
|
|
652
|
+
FileNotFoundError: If file doesn't exist
|
|
653
|
+
yaml.YAMLError: If YAML parsing fails
|
|
654
|
+
ValidationError: If schema structure is invalid
|
|
655
|
+
|
|
656
|
+
Example:
|
|
657
|
+
>>> schema = schema_from_yaml_file("schemas/agents/rem.yaml")
|
|
658
|
+
>>> schema.json_schema_extra["name"]
|
|
659
|
+
"rem"
|
|
660
|
+
"""
|
|
661
|
+
with open(file_path, "r") as f:
|
|
662
|
+
return schema_from_yaml(f.read())
|
|
663
|
+
|
|
664
|
+
|
|
665
|
+
def get_system_prompt(schema: AgentSchema | dict[str, Any]) -> str:
|
|
666
|
+
"""
|
|
667
|
+
Extract the complete system prompt from a schema.
|
|
668
|
+
|
|
669
|
+
Combines:
|
|
670
|
+
1. schema.description (base system prompt / public description)
|
|
671
|
+
2. json_schema_extra.system_prompt (extended instructions if present)
|
|
672
|
+
|
|
673
|
+
Args:
|
|
674
|
+
schema: AgentSchema instance or raw dict
|
|
675
|
+
|
|
676
|
+
Returns:
|
|
677
|
+
Complete system prompt string
|
|
678
|
+
|
|
679
|
+
Example:
|
|
680
|
+
>>> schema = AgentSchema(
|
|
681
|
+
... description="Base description",
|
|
682
|
+
... properties={},
|
|
683
|
+
... json_schema_extra={"name": "test", "system_prompt": "Extended instructions"}
|
|
684
|
+
... )
|
|
685
|
+
>>> prompt = get_system_prompt(schema)
|
|
686
|
+
>>> "Base description" in prompt and "Extended instructions" in prompt
|
|
687
|
+
True
|
|
688
|
+
"""
|
|
689
|
+
if isinstance(schema, dict):
|
|
690
|
+
base = schema.get("description", "")
|
|
691
|
+
extra = schema.get("json_schema_extra", {})
|
|
692
|
+
custom = extra.get("system_prompt") if isinstance(extra, dict) else None
|
|
693
|
+
else:
|
|
694
|
+
base = schema.description
|
|
695
|
+
extra = schema.json_schema_extra
|
|
696
|
+
if isinstance(extra, dict):
|
|
697
|
+
custom = extra.get("system_prompt")
|
|
698
|
+
elif isinstance(extra, AgentSchemaMetadata):
|
|
699
|
+
custom = extra.system_prompt
|
|
700
|
+
else:
|
|
701
|
+
custom = None
|
|
702
|
+
|
|
703
|
+
if custom:
|
|
704
|
+
return f"{base}\n\n{custom}" if base else custom
|
|
705
|
+
return base
|
|
706
|
+
|
|
707
|
+
|
|
708
|
+
def get_metadata(schema: AgentSchema | dict[str, Any]) -> AgentSchemaMetadata:
|
|
709
|
+
"""
|
|
710
|
+
Extract and validate metadata from a schema.
|
|
711
|
+
|
|
712
|
+
Args:
|
|
713
|
+
schema: AgentSchema instance or raw dict
|
|
714
|
+
|
|
715
|
+
Returns:
|
|
716
|
+
Validated AgentSchemaMetadata instance
|
|
717
|
+
|
|
718
|
+
Example:
|
|
719
|
+
>>> schema = {"json_schema_extra": {"name": "test", "system_prompt": "hello"}}
|
|
720
|
+
>>> meta = get_metadata(schema)
|
|
721
|
+
>>> meta.name
|
|
722
|
+
"test"
|
|
723
|
+
>>> meta.system_prompt
|
|
724
|
+
"hello"
|
|
725
|
+
"""
|
|
726
|
+
if isinstance(schema, dict):
|
|
727
|
+
extra = schema.get("json_schema_extra", {})
|
|
728
|
+
else:
|
|
729
|
+
extra = schema.json_schema_extra
|
|
730
|
+
|
|
731
|
+
if isinstance(extra, AgentSchemaMetadata):
|
|
732
|
+
return extra
|
|
733
|
+
return AgentSchemaMetadata.model_validate(extra)
|
rem/agentic/tools/rem_tools.py
CHANGED
|
@@ -162,10 +162,10 @@ async def search_rem_tool(
|
|
|
162
162
|
return {"status": "error", "error": f"Unknown query_type: {query_type}"}
|
|
163
163
|
|
|
164
164
|
# Execute query
|
|
165
|
-
logger.
|
|
165
|
+
logger.debug(f"Executing REM query: {query_type} for user {user_id}")
|
|
166
166
|
result = await rem_service.execute_query(query)
|
|
167
167
|
|
|
168
|
-
logger.
|
|
168
|
+
logger.debug(f"Query completed: {query_type}")
|
|
169
169
|
return {
|
|
170
170
|
"status": "success",
|
|
171
171
|
"query_type": query_type,
|
|
@@ -212,7 +212,7 @@ async def ingest_file_tool(
|
|
|
212
212
|
is_local_server=is_local_server,
|
|
213
213
|
)
|
|
214
214
|
|
|
215
|
-
logger.
|
|
215
|
+
logger.debug(
|
|
216
216
|
f"File ingestion complete: {result['file_name']} "
|
|
217
217
|
f"(status: {result['processing_status']}, "
|
|
218
218
|
f"resources: {result['resources_created']})"
|
rem/api/main.py
CHANGED
|
@@ -26,10 +26,10 @@ Endpoints:
|
|
|
26
26
|
- /health : Health check
|
|
27
27
|
- /api/v1/mcp : MCP endpoint (HTTP transport)
|
|
28
28
|
- /api/v1/chat/completions : OpenAI-compatible chat completions (streaming & non-streaming)
|
|
29
|
-
- /api/v1/query : REM query execution (
|
|
29
|
+
- /api/v1/query : REM query execution (rem-dialect or natural-language)
|
|
30
30
|
- /api/v1/resources : Resource CRUD (TODO)
|
|
31
31
|
- /api/v1/moments : Moment CRUD (TODO)
|
|
32
|
-
- /api/auth/* : OAuth/OIDC authentication
|
|
32
|
+
- /api/auth/* : OAuth/OIDC authentication
|
|
33
33
|
- /docs : OpenAPI documentation
|
|
34
34
|
|
|
35
35
|
Headers → AgentContext Mapping:
|
|
@@ -59,8 +59,16 @@ Running:
|
|
|
59
59
|
hypercorn rem.api.main:app --bind 0.0.0.0:8000
|
|
60
60
|
"""
|
|
61
61
|
|
|
62
|
+
import importlib.metadata
|
|
62
63
|
import secrets
|
|
64
|
+
import sys
|
|
63
65
|
import time
|
|
66
|
+
|
|
67
|
+
# Get package version for API responses
|
|
68
|
+
try:
|
|
69
|
+
__version__ = importlib.metadata.version("remdb")
|
|
70
|
+
except importlib.metadata.PackageNotFoundError:
|
|
71
|
+
__version__ = "0.0.0-dev"
|
|
64
72
|
from contextlib import asynccontextmanager
|
|
65
73
|
|
|
66
74
|
from fastapi import FastAPI, Request
|
|
@@ -73,6 +81,23 @@ from starlette.middleware.sessions import SessionMiddleware
|
|
|
73
81
|
from .mcp_router.server import create_mcp_server
|
|
74
82
|
from ..settings import settings
|
|
75
83
|
|
|
84
|
+
# Configure loguru based on settings
|
|
85
|
+
# Remove default handler and add one with configured level
|
|
86
|
+
logger.remove()
|
|
87
|
+
|
|
88
|
+
# Configure level icons - only warnings and errors get visual indicators
|
|
89
|
+
logger.level("DEBUG", icon=" ")
|
|
90
|
+
logger.level("INFO", icon=" ")
|
|
91
|
+
logger.level("WARNING", icon="🟠")
|
|
92
|
+
logger.level("ERROR", icon="🔴")
|
|
93
|
+
logger.level("CRITICAL", icon="🔴")
|
|
94
|
+
|
|
95
|
+
logger.add(
|
|
96
|
+
sys.stderr,
|
|
97
|
+
level=settings.api.log_level.upper(),
|
|
98
|
+
format="<green>{time:YYYY-MM-DD HH:mm:ss.SSS}</green> | {level.icon} <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
|
|
99
|
+
)
|
|
100
|
+
|
|
76
101
|
|
|
77
102
|
class RequestLoggingMiddleware(BaseHTTPMiddleware):
|
|
78
103
|
"""
|
|
@@ -82,26 +107,64 @@ class RequestLoggingMiddleware(BaseHTTPMiddleware):
|
|
|
82
107
|
- Logs request method, path, client, user-agent
|
|
83
108
|
- Logs response status, content-type, duration
|
|
84
109
|
- Essential for debugging OAuth flow and MCP sessions
|
|
110
|
+
- Health checks and 404s logged at DEBUG level to reduce noise
|
|
111
|
+
- Scanner/exploit attempts (common vulnerability probes) logged at DEBUG
|
|
85
112
|
"""
|
|
86
113
|
|
|
114
|
+
# Paths to log at DEBUG level (health checks, probes)
|
|
115
|
+
DEBUG_PATHS = {"/health", "/healthz", "/ready", "/readyz", "/livez"}
|
|
116
|
+
|
|
117
|
+
# Path patterns that indicate vulnerability scanners (log at DEBUG)
|
|
118
|
+
SCANNER_PATTERNS = (
|
|
119
|
+
"/vendor/", # PHP composer exploits
|
|
120
|
+
"/.git/", # Git config exposure
|
|
121
|
+
"/.env", # Environment file exposure
|
|
122
|
+
"/wp-", # WordPress exploits
|
|
123
|
+
"/phpunit/", # PHPUnit RCE
|
|
124
|
+
"/eval-stdin", # PHP eval exploits
|
|
125
|
+
"/console/", # Console exposure
|
|
126
|
+
"/actuator/", # Spring Boot actuator
|
|
127
|
+
"/debug/", # Debug endpoints
|
|
128
|
+
"/admin/", # Admin panel probes (when we don't have one)
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
def _should_log_at_debug(self, path: str, status_code: int) -> bool:
|
|
132
|
+
"""Determine if request should be logged at DEBUG level."""
|
|
133
|
+
# Health checks
|
|
134
|
+
if path in self.DEBUG_PATHS:
|
|
135
|
+
return True
|
|
136
|
+
# 404 responses (not found - includes scanner probes)
|
|
137
|
+
if status_code == 404:
|
|
138
|
+
return True
|
|
139
|
+
# Known scanner patterns
|
|
140
|
+
if any(pattern in path for pattern in self.SCANNER_PATTERNS):
|
|
141
|
+
return True
|
|
142
|
+
return False
|
|
143
|
+
|
|
87
144
|
async def dispatch(self, request: Request, call_next):
|
|
88
145
|
start_time = time.time()
|
|
146
|
+
path = request.url.path
|
|
89
147
|
|
|
90
|
-
# Log incoming request
|
|
148
|
+
# Log incoming request (preliminary - may adjust after response)
|
|
91
149
|
client_host = request.client.host if request.client else "unknown"
|
|
92
|
-
|
|
93
|
-
f"→ REQUEST: {request.method} {request.url.path} | "
|
|
94
|
-
f"Client: {client_host} | "
|
|
95
|
-
f"User-Agent: {request.headers.get('user-agent', 'unknown')[:100]}"
|
|
96
|
-
)
|
|
150
|
+
user_agent = request.headers.get('user-agent', 'unknown')[:100]
|
|
97
151
|
|
|
98
152
|
# Process request
|
|
99
153
|
response = await call_next(request)
|
|
100
154
|
|
|
101
|
-
#
|
|
155
|
+
# Determine log level based on path AND response status
|
|
102
156
|
duration_ms = (time.time() - start_time) * 1000
|
|
103
|
-
|
|
104
|
-
|
|
157
|
+
use_debug = self._should_log_at_debug(path, response.status_code)
|
|
158
|
+
log_fn = logger.debug if use_debug else logger.info
|
|
159
|
+
|
|
160
|
+
# Log request and response together
|
|
161
|
+
log_fn(
|
|
162
|
+
f"→ REQUEST: {request.method} {path} | "
|
|
163
|
+
f"Client: {client_host} | "
|
|
164
|
+
f"User-Agent: {user_agent}"
|
|
165
|
+
)
|
|
166
|
+
log_fn(
|
|
167
|
+
f"← RESPONSE: {request.method} {path} | "
|
|
105
168
|
f"Status: {response.status_code} | "
|
|
106
169
|
f"Duration: {duration_ms:.2f}ms"
|
|
107
170
|
)
|
|
@@ -154,7 +217,8 @@ async def lifespan(app: FastAPI):
|
|
|
154
217
|
"and history lookups are unavailable. Enable database with POSTGRES__ENABLED=true"
|
|
155
218
|
)
|
|
156
219
|
else:
|
|
157
|
-
|
|
220
|
+
# Log database host only - never log credentials
|
|
221
|
+
logger.info(f"Database enabled: {settings.postgres.host}:{settings.postgres.port}/{settings.postgres.database}")
|
|
158
222
|
|
|
159
223
|
yield
|
|
160
224
|
|
|
@@ -216,7 +280,7 @@ def create_app() -> FastAPI:
|
|
|
216
280
|
app = FastAPI(
|
|
217
281
|
title=f"{settings.app_name} API",
|
|
218
282
|
description=f"{settings.app_name} - Resources Entities Moments system for agentic AI",
|
|
219
|
-
version=
|
|
283
|
+
version=__version__,
|
|
220
284
|
lifespan=combined_lifespan,
|
|
221
285
|
root_path=settings.root_path if settings.root_path else "",
|
|
222
286
|
redirect_slashes=False, # Don't redirect /mcp/ -> /mcp
|
|
@@ -290,7 +354,7 @@ def create_app() -> FastAPI:
|
|
|
290
354
|
# TODO: If auth enabled and no user, return 401 with WWW-Authenticate
|
|
291
355
|
return {
|
|
292
356
|
"name": f"{settings.app_name} API",
|
|
293
|
-
"version":
|
|
357
|
+
"version": __version__,
|
|
294
358
|
"mcp_endpoint": "/api/v1/mcp",
|
|
295
359
|
"docs": "/docs",
|
|
296
360
|
}
|
|
@@ -299,7 +363,7 @@ def create_app() -> FastAPI:
|
|
|
299
363
|
@app.get("/health")
|
|
300
364
|
async def health():
|
|
301
365
|
"""Health check endpoint."""
|
|
302
|
-
return {"status": "healthy", "version":
|
|
366
|
+
return {"status": "healthy", "version": __version__}
|
|
303
367
|
|
|
304
368
|
# Register API routers
|
|
305
369
|
from .routers.chat import router as chat_router
|
|
@@ -308,13 +372,18 @@ def create_app() -> FastAPI:
|
|
|
308
372
|
from .routers.feedback import router as feedback_router
|
|
309
373
|
from .routers.admin import router as admin_router
|
|
310
374
|
from .routers.shared_sessions import router as shared_sessions_router
|
|
375
|
+
from .routers.query import router as query_router
|
|
311
376
|
|
|
312
377
|
app.include_router(chat_router)
|
|
313
378
|
app.include_router(models_router)
|
|
379
|
+
# shared_sessions_router MUST be before messages_router
|
|
380
|
+
# because messages_router has /sessions/{session_id} which would match
|
|
381
|
+
# before the more specific /sessions/shared-with-me routes
|
|
382
|
+
app.include_router(shared_sessions_router)
|
|
314
383
|
app.include_router(messages_router)
|
|
315
384
|
app.include_router(feedback_router)
|
|
316
385
|
app.include_router(admin_router)
|
|
317
|
-
app.include_router(
|
|
386
|
+
app.include_router(query_router)
|
|
318
387
|
|
|
319
388
|
# Register auth router (if enabled)
|
|
320
389
|
if settings.auth.enabled:
|
rem/api/mcp_router/resources.py
CHANGED
|
@@ -181,7 +181,7 @@ Parameters:
|
|
|
181
181
|
- table_name (required): Table to search (resources, moments, etc.)
|
|
182
182
|
- field_name (optional): Field to search (defaults to "content")
|
|
183
183
|
- provider (optional): Embedding provider (default: from LLM__EMBEDDING_PROVIDER setting)
|
|
184
|
-
- min_similarity (optional): Minimum similarity 0.0-1.0 (default: 0.
|
|
184
|
+
- min_similarity (optional): Minimum similarity 0.0-1.0 (default: 0.3)
|
|
185
185
|
- limit (optional): Max results (default: 10)
|
|
186
186
|
- user_id (optional): User scoping
|
|
187
187
|
|