remdb 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +2 -0
- rem/agentic/README.md +650 -0
- rem/agentic/__init__.py +39 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +8 -0
- rem/agentic/context.py +148 -0
- rem/agentic/context_builder.py +329 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +107 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +151 -0
- rem/agentic/providers/phoenix.py +674 -0
- rem/agentic/providers/pydantic_ai.py +572 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +396 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +231 -0
- rem/api/README.md +420 -0
- rem/api/main.py +324 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +536 -0
- rem/api/mcp_router/server.py +213 -0
- rem/api/mcp_router/tools.py +584 -0
- rem/api/routers/auth.py +229 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/completions.py +281 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +124 -0
- rem/api/routers/chat/streaming.py +185 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +26 -0
- rem/auth/middleware.py +100 -0
- rem/auth/providers/__init__.py +13 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +455 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +126 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +566 -0
- rem/cli/commands/configure.py +497 -0
- rem/cli/commands/db.py +493 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1302 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +245 -0
- rem/cli/commands/schema.py +183 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +96 -0
- rem/config.py +237 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +64 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +628 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +243 -0
- rem/models/entities/__init__.py +43 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +35 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +191 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/user.py +85 -0
- rem/py.typed +0 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +128 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +16 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +806 -0
- rem/services/content/service.py +676 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +336 -0
- rem/services/dreaming/moment_service.py +264 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +120 -0
- rem/services/embeddings/worker.py +421 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +686 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +575 -0
- rem/services/postgres/__init__.py +23 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
- rem/services/postgres/register_type.py +352 -0
- rem/services/postgres/repository.py +337 -0
- rem/services/postgres/schema_generator.py +379 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +354 -0
- rem/services/rem/README.md +304 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +145 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +527 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +6 -0
- rem/services/session/compression.py +360 -0
- rem/services/session/reload.py +77 -0
- rem/settings.py +1235 -0
- rem/sql/002_install_models.sql +1068 -0
- rem/sql/background_indexes.sql +42 -0
- rem/sql/install_models.sql +1038 -0
- rem/sql/migrations/001_install.sql +503 -0
- rem/sql/migrations/002_install_models.sql +1202 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +583 -0
- rem/utils/__init__.py +43 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +423 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/markdown.py +16 -0
- rem/utils/model_helpers.py +236 -0
- rem/utils/schema_loader.py +336 -0
- rem/utils/sql_types.py +348 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +330 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +5 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- remdb-0.3.0.dist-info/METADATA +1455 -0
- remdb-0.3.0.dist-info/RECORD +187 -0
- remdb-0.3.0.dist-info/WHEEL +4 -0
- remdb-0.3.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
"""
|
|
2
|
+
OpenAI-compatible API models for chat completions.
|
|
3
|
+
|
|
4
|
+
Design Pattern
|
|
5
|
+
- Full OpenAI compatibility for drop-in replacement
|
|
6
|
+
- Support for streaming (SSE) and non-streaming modes
|
|
7
|
+
- Response format control (text vs json_object)
|
|
8
|
+
- Headers map to AgentContext (X-User-Id, X-Tenant-Id, X-Agent-Schema, etc.)
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from typing import Literal
|
|
12
|
+
|
|
13
|
+
from pydantic import BaseModel, Field
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
# Request models
|
|
17
|
+
class ChatMessage(BaseModel):
|
|
18
|
+
"""OpenAI chat message format."""
|
|
19
|
+
|
|
20
|
+
role: Literal["system", "user", "assistant", "tool"]
|
|
21
|
+
content: str | None = None
|
|
22
|
+
name: str | None = None
|
|
23
|
+
tool_call_id: str | None = None
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ResponseFormat(BaseModel):
|
|
27
|
+
"""
|
|
28
|
+
Response format specification (OpenAI-compatible).
|
|
29
|
+
|
|
30
|
+
- text: Plain text response
|
|
31
|
+
- json_object: Best-effort JSON extraction from agent output
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
type: Literal["text", "json_object"] = Field(
|
|
35
|
+
default="text",
|
|
36
|
+
description="Response format type. Use 'json_object' to enable JSON mode.",
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class ChatCompletionRequest(BaseModel):
|
|
41
|
+
"""
|
|
42
|
+
OpenAI chat completion request format.
|
|
43
|
+
|
|
44
|
+
Compatible with OpenAI's /v1/chat/completions endpoint.
|
|
45
|
+
|
|
46
|
+
Headers Map to AgentContext:
|
|
47
|
+
- X-User-Id → context.user_id
|
|
48
|
+
- X-Tenant-Id → context.tenant_id
|
|
49
|
+
- X-Session-Id → context.session_id
|
|
50
|
+
- X-Agent-Schema → context.agent_schema_uri
|
|
51
|
+
|
|
52
|
+
Note: Model is specified in body.model (standard OpenAI field), not headers.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
model: str = Field(
|
|
56
|
+
default="anthropic:claude-sonnet-4-5-20250929",
|
|
57
|
+
description="Model to use (standard OpenAI field)",
|
|
58
|
+
)
|
|
59
|
+
messages: list[ChatMessage] = Field(description="Chat conversation history")
|
|
60
|
+
temperature: float | None = Field(default=None, ge=0, le=2)
|
|
61
|
+
max_tokens: int | None = Field(default=None, ge=1)
|
|
62
|
+
stream: bool = Field(default=False, description="Enable SSE streaming")
|
|
63
|
+
n: int | None = Field(default=1, ge=1, le=1, description="Number of completions (must be 1)")
|
|
64
|
+
stop: str | list[str] | None = None
|
|
65
|
+
presence_penalty: float | None = Field(default=None, ge=-2, le=2)
|
|
66
|
+
frequency_penalty: float | None = Field(default=None, ge=-2, le=2)
|
|
67
|
+
user: str | None = Field(default=None, description="Unique user identifier")
|
|
68
|
+
response_format: ResponseFormat | None = Field(
|
|
69
|
+
default=None,
|
|
70
|
+
description="Response format. Set type='json_object' to enable JSON mode.",
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# Response models
|
|
75
|
+
class ChatCompletionUsage(BaseModel):
|
|
76
|
+
"""Token usage statistics."""
|
|
77
|
+
|
|
78
|
+
prompt_tokens: int
|
|
79
|
+
completion_tokens: int
|
|
80
|
+
total_tokens: int
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class ChatCompletionMessageDelta(BaseModel):
|
|
84
|
+
"""Streaming delta for chat completion."""
|
|
85
|
+
|
|
86
|
+
role: Literal["system", "user", "assistant"] | None = None
|
|
87
|
+
content: str | None = None
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class ChatCompletionChoice(BaseModel):
|
|
91
|
+
"""Chat completion choice (non-streaming)."""
|
|
92
|
+
|
|
93
|
+
index: int
|
|
94
|
+
message: ChatMessage
|
|
95
|
+
finish_reason: Literal["stop", "length", "content_filter", "tool_calls"] | None
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class ChatCompletionStreamChoice(BaseModel):
|
|
99
|
+
"""Chat completion choice (streaming)."""
|
|
100
|
+
|
|
101
|
+
index: int
|
|
102
|
+
delta: ChatCompletionMessageDelta
|
|
103
|
+
finish_reason: Literal["stop", "length", "content_filter"] | None = None
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class ChatCompletionResponse(BaseModel):
|
|
107
|
+
"""OpenAI chat completion response (non-streaming)."""
|
|
108
|
+
|
|
109
|
+
id: str
|
|
110
|
+
object: Literal["chat.completion"] = "chat.completion"
|
|
111
|
+
created: int
|
|
112
|
+
model: str
|
|
113
|
+
choices: list[ChatCompletionChoice]
|
|
114
|
+
usage: ChatCompletionUsage
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
class ChatCompletionStreamResponse(BaseModel):
|
|
118
|
+
"""OpenAI chat completion chunk (streaming)."""
|
|
119
|
+
|
|
120
|
+
id: str
|
|
121
|
+
object: Literal["chat.completion.chunk"] = "chat.completion.chunk"
|
|
122
|
+
created: int
|
|
123
|
+
model: str
|
|
124
|
+
choices: list[ChatCompletionStreamChoice]
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"""
|
|
2
|
+
OpenAI-compatible streaming relay for Pydantic AI agents.
|
|
3
|
+
|
|
4
|
+
Design Pattern:
|
|
5
|
+
- Uses Pydantic AI's agent.iter() to capture full execution including tool calls
|
|
6
|
+
- Streams tool call events with [Calling: tool_name] markers
|
|
7
|
+
- Streams text content deltas as they arrive
|
|
8
|
+
- Proper OpenAI SSE format with data: prefix and [DONE] terminator
|
|
9
|
+
- Error handling with graceful degradation
|
|
10
|
+
|
|
11
|
+
Key Insight
|
|
12
|
+
- agent.run_stream() stops after first output, missing tool calls
|
|
13
|
+
- agent.iter() provides complete execution with tool call visibility
|
|
14
|
+
- Use PartStartEvent to detect tool calls
|
|
15
|
+
- Use PartDeltaEvent with TextPartDelta for content streaming
|
|
16
|
+
|
|
17
|
+
SSE Format:
|
|
18
|
+
data: {"id": "chatcmpl-...", "choices": [{"delta": {"content": "..."}}]}\\n\\n
|
|
19
|
+
data: [DONE]\\n\\n
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
import json
|
|
23
|
+
import time
|
|
24
|
+
import uuid
|
|
25
|
+
from typing import AsyncGenerator
|
|
26
|
+
|
|
27
|
+
from loguru import logger
|
|
28
|
+
from pydantic_ai.agent import Agent
|
|
29
|
+
from pydantic_ai.messages import (
|
|
30
|
+
PartDeltaEvent,
|
|
31
|
+
PartStartEvent,
|
|
32
|
+
TextPartDelta,
|
|
33
|
+
ToolCallPart,
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
from .models import (
|
|
37
|
+
ChatCompletionMessageDelta,
|
|
38
|
+
ChatCompletionStreamChoice,
|
|
39
|
+
ChatCompletionStreamResponse,
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
async def stream_openai_response(
|
|
44
|
+
agent: Agent,
|
|
45
|
+
prompt: str,
|
|
46
|
+
model: str,
|
|
47
|
+
request_id: str | None = None,
|
|
48
|
+
) -> AsyncGenerator[str, None]:
|
|
49
|
+
"""
|
|
50
|
+
Stream Pydantic AI agent responses in OpenAI SSE format with tool call events.
|
|
51
|
+
|
|
52
|
+
Design Pattern:
|
|
53
|
+
1. Use agent.iter() for complete execution (not run_stream())
|
|
54
|
+
2. Iterate over nodes to capture model requests and tool executions
|
|
55
|
+
3. Stream tool call start events as [Calling: tool_name]
|
|
56
|
+
4. Stream text content deltas as they arrive
|
|
57
|
+
5. Send final chunk with finish_reason="stop"
|
|
58
|
+
6. Send OpenAI termination marker [DONE]
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
agent: Pydantic AI agent instance
|
|
62
|
+
prompt: User prompt to run
|
|
63
|
+
model: Model name for response metadata
|
|
64
|
+
request_id: Optional request ID (generates UUID if not provided)
|
|
65
|
+
|
|
66
|
+
Yields:
|
|
67
|
+
SSE-formatted strings: "data: {json}\\n\\n"
|
|
68
|
+
|
|
69
|
+
Example Stream:
|
|
70
|
+
data: {"id": "chatcmpl-123", "choices": [{"delta": {"role": "assistant", "content": ""}}]}
|
|
71
|
+
|
|
72
|
+
data: {"id": "chatcmpl-123", "choices": [{"delta": {"content": "[Calling: search]"}}]}
|
|
73
|
+
|
|
74
|
+
data: {"id": "chatcmpl-123", "choices": [{"delta": {"content": "Found 3 results..."}}]}
|
|
75
|
+
|
|
76
|
+
data: {"id": "chatcmpl-123", "choices": [{"delta": {}, "finish_reason": "stop"}]}
|
|
77
|
+
|
|
78
|
+
data: [DONE]
|
|
79
|
+
"""
|
|
80
|
+
if request_id is None:
|
|
81
|
+
request_id = f"chatcmpl-{uuid.uuid4().hex[:24]}"
|
|
82
|
+
|
|
83
|
+
created_at = int(time.time())
|
|
84
|
+
is_first_chunk = True
|
|
85
|
+
|
|
86
|
+
try:
|
|
87
|
+
# Use agent.iter() to get complete execution with tool calls
|
|
88
|
+
# run_stream() stops after first output, missing tool calls
|
|
89
|
+
async with agent.iter(prompt) as agent_run:
|
|
90
|
+
async for node in agent_run:
|
|
91
|
+
# Check if this is a model request node (includes tool calls)
|
|
92
|
+
if Agent.is_model_request_node(node):
|
|
93
|
+
# Stream events from model request
|
|
94
|
+
async with node.stream(agent_run.ctx) as request_stream:
|
|
95
|
+
async for event in request_stream:
|
|
96
|
+
# Tool call start event
|
|
97
|
+
if isinstance(event, PartStartEvent) and isinstance(
|
|
98
|
+
event.part, ToolCallPart
|
|
99
|
+
):
|
|
100
|
+
logger.info(f"🔧 {event.part.tool_name}")
|
|
101
|
+
|
|
102
|
+
tool_call_chunk = ChatCompletionStreamResponse(
|
|
103
|
+
id=request_id,
|
|
104
|
+
created=created_at,
|
|
105
|
+
model=model,
|
|
106
|
+
choices=[
|
|
107
|
+
ChatCompletionStreamChoice(
|
|
108
|
+
index=0,
|
|
109
|
+
delta=ChatCompletionMessageDelta(
|
|
110
|
+
role="assistant" if is_first_chunk else None,
|
|
111
|
+
content=f"[Calling: {event.part.tool_name}]",
|
|
112
|
+
),
|
|
113
|
+
finish_reason=None,
|
|
114
|
+
)
|
|
115
|
+
],
|
|
116
|
+
)
|
|
117
|
+
is_first_chunk = False
|
|
118
|
+
yield f"data: {tool_call_chunk.model_dump_json()}\n\n"
|
|
119
|
+
|
|
120
|
+
# Text content delta
|
|
121
|
+
elif isinstance(event, PartDeltaEvent) and isinstance(
|
|
122
|
+
event.delta, TextPartDelta
|
|
123
|
+
):
|
|
124
|
+
content_chunk = ChatCompletionStreamResponse(
|
|
125
|
+
id=request_id,
|
|
126
|
+
created=created_at,
|
|
127
|
+
model=model,
|
|
128
|
+
choices=[
|
|
129
|
+
ChatCompletionStreamChoice(
|
|
130
|
+
index=0,
|
|
131
|
+
delta=ChatCompletionMessageDelta(
|
|
132
|
+
role="assistant" if is_first_chunk else None,
|
|
133
|
+
content=event.delta.content_delta,
|
|
134
|
+
),
|
|
135
|
+
finish_reason=None,
|
|
136
|
+
)
|
|
137
|
+
],
|
|
138
|
+
)
|
|
139
|
+
is_first_chunk = False
|
|
140
|
+
yield f"data: {content_chunk.model_dump_json()}\n\n"
|
|
141
|
+
|
|
142
|
+
# Check if this is a tool execution node
|
|
143
|
+
elif Agent.is_call_tools_node(node):
|
|
144
|
+
# Stream tool execution - tools complete here
|
|
145
|
+
async with node.stream(agent_run.ctx) as tools_stream:
|
|
146
|
+
async for event in tools_stream:
|
|
147
|
+
# We can log tool completion here if needed
|
|
148
|
+
# For now, we already logged the call start above
|
|
149
|
+
pass
|
|
150
|
+
|
|
151
|
+
# Final chunk with finish_reason
|
|
152
|
+
final_chunk = ChatCompletionStreamResponse(
|
|
153
|
+
id=request_id,
|
|
154
|
+
created=created_at,
|
|
155
|
+
model=model,
|
|
156
|
+
choices=[
|
|
157
|
+
ChatCompletionStreamChoice(
|
|
158
|
+
index=0,
|
|
159
|
+
delta=ChatCompletionMessageDelta(),
|
|
160
|
+
finish_reason="stop",
|
|
161
|
+
)
|
|
162
|
+
],
|
|
163
|
+
)
|
|
164
|
+
yield f"data: {final_chunk.model_dump_json()}\n\n"
|
|
165
|
+
|
|
166
|
+
# OpenAI termination marker
|
|
167
|
+
yield "data: [DONE]\n\n"
|
|
168
|
+
|
|
169
|
+
except Exception as e:
|
|
170
|
+
import traceback
|
|
171
|
+
|
|
172
|
+
error_msg = str(e)
|
|
173
|
+
logger.error(f"Streaming error: {error_msg}")
|
|
174
|
+
logger.error(traceback.format_exc())
|
|
175
|
+
|
|
176
|
+
# Send error as final chunk
|
|
177
|
+
error_data = {
|
|
178
|
+
"error": {
|
|
179
|
+
"message": error_msg,
|
|
180
|
+
"type": "internal_error",
|
|
181
|
+
"code": "stream_error",
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
yield f"data: {json.dumps(error_data)}\n\n"
|
|
185
|
+
yield "data: [DONE]\n\n"
|
rem/auth/README.md
ADDED
|
@@ -0,0 +1,258 @@
|
|
|
1
|
+
# OAuth 2.1 Authentication
|
|
2
|
+
|
|
3
|
+
OAuth 2.1 compliant authentication with Google and Microsoft Entra ID.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **OAuth 2.1 Security Best Practices**
|
|
8
|
+
- PKCE (Proof Key for Code Exchange) - mandatory for all flows
|
|
9
|
+
- State parameter for CSRF protection
|
|
10
|
+
- Nonce for ID token replay protection
|
|
11
|
+
- Token validation with JWKS
|
|
12
|
+
|
|
13
|
+
- **Supported Providers**
|
|
14
|
+
- Google OAuth 2.0 / OIDC
|
|
15
|
+
- Microsoft Entra ID (Azure AD) OIDC
|
|
16
|
+
|
|
17
|
+
- **Minimal Code**
|
|
18
|
+
- Leverages Authlib for standards compliance
|
|
19
|
+
- Authlib handles PKCE, token exchange, JWKS validation
|
|
20
|
+
- Clean integration with FastAPI
|
|
21
|
+
|
|
22
|
+
## Installation
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
pip install authlib httpx
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Configuration
|
|
29
|
+
|
|
30
|
+
### Google OAuth Setup
|
|
31
|
+
|
|
32
|
+
1. Go to [Google Cloud Console](https://console.cloud.google.com/apis/credentials)
|
|
33
|
+
2. Create OAuth 2.0 credentials
|
|
34
|
+
3. Add authorized redirect URI: `http://localhost:8000/api/auth/google/callback`
|
|
35
|
+
4. Set environment variables:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
AUTH__ENABLED=true
|
|
39
|
+
AUTH__SESSION_SECRET=$(python -c "import secrets; print(secrets.token_hex(32))")
|
|
40
|
+
|
|
41
|
+
AUTH__GOOGLE__CLIENT_ID=your-client-id.apps.googleusercontent.com
|
|
42
|
+
AUTH__GOOGLE__CLIENT_SECRET=your-client-secret
|
|
43
|
+
AUTH__GOOGLE__REDIRECT_URI=http://localhost:8000/api/auth/google/callback
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### Microsoft Entra ID Setup
|
|
47
|
+
|
|
48
|
+
1. Go to [Azure Portal](https://portal.azure.com/#view/Microsoft_AAD_RegisteredApps)
|
|
49
|
+
2. Register new application
|
|
50
|
+
3. Create client secret under "Certificates & secrets"
|
|
51
|
+
4. Add redirect URI: `http://localhost:8000/api/auth/microsoft/callback`
|
|
52
|
+
5. Add API permissions: Microsoft Graph > User.Read (delegated)
|
|
53
|
+
6. Set environment variables:
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
AUTH__ENABLED=true
|
|
57
|
+
AUTH__SESSION_SECRET=$(python -c "import secrets; print(secrets.token_hex(32))")
|
|
58
|
+
|
|
59
|
+
AUTH__MICROSOFT__CLIENT_ID=your-application-id
|
|
60
|
+
AUTH__MICROSOFT__CLIENT_SECRET=your-client-secret
|
|
61
|
+
AUTH__MICROSOFT__REDIRECT_URI=http://localhost:8000/api/auth/microsoft/callback
|
|
62
|
+
AUTH__MICROSOFT__TENANT=common # or your tenant ID
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
**Tenant options:**
|
|
66
|
+
- `common` - Multi-tenant + personal Microsoft accounts
|
|
67
|
+
- `organizations` - Work/school accounts only
|
|
68
|
+
- `consumers` - Personal Microsoft accounts only
|
|
69
|
+
- `{tenant-id}` - Single tenant (specific organization)
|
|
70
|
+
|
|
71
|
+
## Usage
|
|
72
|
+
|
|
73
|
+
### 1. Start the API server
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
cd rem
|
|
77
|
+
uv run python -m rem.api.main
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### 2. Initiate login
|
|
81
|
+
|
|
82
|
+
Navigate to:
|
|
83
|
+
- Google: `http://localhost:8000/api/auth/google/login`
|
|
84
|
+
- Microsoft: `http://localhost:8000/api/auth/microsoft/login`
|
|
85
|
+
|
|
86
|
+
### 3. OAuth Flow
|
|
87
|
+
|
|
88
|
+
```
|
|
89
|
+
User Browser API Server OAuth Provider
|
|
90
|
+
| | | |
|
|
91
|
+
|-- Click Login ---->| | |
|
|
92
|
+
| |-- GET /auth/google/login --> |
|
|
93
|
+
| | |-- Generate PKCE ------->|
|
|
94
|
+
| | | (code_verifier) |
|
|
95
|
+
| |<-- Redirect to Google --| |
|
|
96
|
+
|<-- Show Google login --| | |
|
|
97
|
+
| | | |
|
|
98
|
+
|-- Enter credentials --> | |
|
|
99
|
+
| |-- Authorize ----------------------->| |
|
|
100
|
+
| |<-- Redirect with code ----------------| |
|
|
101
|
+
| | | |
|
|
102
|
+
| |-- GET /auth/google/callback?code=xyz ---------->|
|
|
103
|
+
| | |-- Exchange code ------->|
|
|
104
|
+
| | | + code_verifier |
|
|
105
|
+
| | |<-- Tokens --------------|
|
|
106
|
+
| | |-- Validate ID token --->|
|
|
107
|
+
| | | (JWKS) |
|
|
108
|
+
| |<-- Set session cookie --| |
|
|
109
|
+
|<-- Redirect to app ---| | |
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### 4. Access protected endpoints
|
|
113
|
+
|
|
114
|
+
After login, session cookie is set automatically:
|
|
115
|
+
|
|
116
|
+
```bash
|
|
117
|
+
# Get current user
|
|
118
|
+
curl http://localhost:8000/api/auth/me \
|
|
119
|
+
-H "Cookie: rem_session=..."
|
|
120
|
+
|
|
121
|
+
# Protected API endpoint
|
|
122
|
+
curl http://localhost:8000/api/v1/chat/completions \
|
|
123
|
+
-H "Cookie: rem_session=..." \
|
|
124
|
+
-H "Content-Type: application/json" \
|
|
125
|
+
-d '{
|
|
126
|
+
"model": "anthropic:claude-sonnet-4-5-20250929",
|
|
127
|
+
"messages": [{"role": "user", "content": "Hello"}]
|
|
128
|
+
}'
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### 5. Logout
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
curl -X POST http://localhost:8000/api/auth/logout \
|
|
135
|
+
-H "Cookie: rem_session=..."
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
## API Endpoints
|
|
139
|
+
|
|
140
|
+
| Method | Path | Description |
|
|
141
|
+
|--------|------|-------------|
|
|
142
|
+
| GET | `/api/auth/google/login` | Initiate Google OAuth flow |
|
|
143
|
+
| GET | `/api/auth/google/callback` | Google OAuth callback |
|
|
144
|
+
| GET | `/api/auth/microsoft/login` | Initiate Microsoft OAuth flow |
|
|
145
|
+
| GET | `/api/auth/microsoft/callback` | Microsoft OAuth callback |
|
|
146
|
+
| POST | `/api/auth/logout` | Clear session |
|
|
147
|
+
| GET | `/api/auth/me` | Get current user info |
|
|
148
|
+
|
|
149
|
+
## Security Features
|
|
150
|
+
|
|
151
|
+
### OAuth 2.1 Compliance
|
|
152
|
+
|
|
153
|
+
- **PKCE**: All flows use code_challenge (S256 method)
|
|
154
|
+
- **State**: CSRF protection on all authorization requests
|
|
155
|
+
- **Nonce**: ID token replay protection
|
|
156
|
+
- **No implicit flow**: Only authorization code flow supported
|
|
157
|
+
- **JWKS validation**: ID tokens validated with provider's public keys
|
|
158
|
+
|
|
159
|
+
### Session Security
|
|
160
|
+
|
|
161
|
+
- **HTTPOnly cookies**: Session cookies not accessible to JavaScript
|
|
162
|
+
- **SameSite=Lax**: CSRF protection for cookie-based auth
|
|
163
|
+
- **Secure flag**: HTTPS-only cookies in production
|
|
164
|
+
- **Short expiration**: 1 hour session lifetime (configurable)
|
|
165
|
+
|
|
166
|
+
### Middleware Protection
|
|
167
|
+
|
|
168
|
+
- Protects `/api/v1/*` endpoints
|
|
169
|
+
- Excludes `/api/auth/*` and public endpoints
|
|
170
|
+
- Returns 401 for API requests (JSON)
|
|
171
|
+
- Redirects to login for browser requests
|
|
172
|
+
|
|
173
|
+
## Provider-Specific Features
|
|
174
|
+
|
|
175
|
+
### Google
|
|
176
|
+
|
|
177
|
+
- **Hosted domain restriction**: Limit to Google Workspace domain
|
|
178
|
+
- **Offline access**: Request refresh tokens
|
|
179
|
+
- **Incremental authorization**: Add scopes incrementally
|
|
180
|
+
|
|
181
|
+
```bash
|
|
182
|
+
AUTH__GOOGLE__HOSTED_DOMAIN=example.com # Google Workspace only
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
### Microsoft
|
|
186
|
+
|
|
187
|
+
- **Multi-tenant support**: common/organizations/consumers
|
|
188
|
+
- **Conditional access**: Honors Entra ID policies
|
|
189
|
+
- **Microsoft Graph**: Access user profile via Graph API
|
|
190
|
+
|
|
191
|
+
```bash
|
|
192
|
+
AUTH__MICROSOFT__TENANT=common # Multi-tenant
|
|
193
|
+
AUTH__MICROSOFT__TENANT=organizations # Work/school only
|
|
194
|
+
AUTH__MICROSOFT__TENANT=consumers # Personal accounts
|
|
195
|
+
AUTH__MICROSOFT__TENANT=contoso.com # Single tenant
|
|
196
|
+
```
|
|
197
|
+
|
|
198
|
+
## Architecture
|
|
199
|
+
|
|
200
|
+
```
|
|
201
|
+
rem/src/rem/auth/
|
|
202
|
+
├── __init__.py # Module exports
|
|
203
|
+
├── README.md # This file
|
|
204
|
+
├── middleware.py # FastAPI auth middleware
|
|
205
|
+
├── providers/ # OAuth provider implementations
|
|
206
|
+
│ ├── __init__.py
|
|
207
|
+
│ ├── base.py # Base OAuth provider (kept for reference)
|
|
208
|
+
│ ├── google.py # Google provider (kept for reference)
|
|
209
|
+
│ └── microsoft.py # Microsoft provider (kept for reference)
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
**Note**: Provider classes in `providers/` are kept for reference but not used.
|
|
213
|
+
The implementation uses Authlib's built-in provider support via `server_metadata_url`.
|
|
214
|
+
|
|
215
|
+
## Testing
|
|
216
|
+
|
|
217
|
+
```bash
|
|
218
|
+
# Test Google login flow
|
|
219
|
+
open http://localhost:8000/api/auth/google/login
|
|
220
|
+
|
|
221
|
+
# Test Microsoft login flow
|
|
222
|
+
open http://localhost:8000/api/auth/microsoft/login
|
|
223
|
+
|
|
224
|
+
# Check current user
|
|
225
|
+
curl http://localhost:8000/api/auth/me
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
## Troubleshooting
|
|
229
|
+
|
|
230
|
+
### "Authentication is disabled"
|
|
231
|
+
|
|
232
|
+
Set `AUTH__ENABLED=true` in environment or `.env` file.
|
|
233
|
+
|
|
234
|
+
### "Unknown provider: google"
|
|
235
|
+
|
|
236
|
+
Check that `AUTH__GOOGLE__CLIENT_ID` is set. The router only registers providers with valid credentials.
|
|
237
|
+
|
|
238
|
+
### Redirect URI mismatch
|
|
239
|
+
|
|
240
|
+
Ensure redirect URI in environment matches exactly what's registered with provider:
|
|
241
|
+
- Google: Check Google Cloud Console > Credentials
|
|
242
|
+
- Microsoft: Check Azure Portal > App registrations > Authentication
|
|
243
|
+
|
|
244
|
+
### PKCE errors
|
|
245
|
+
|
|
246
|
+
Authlib handles PKCE automatically. If you see PKCE errors:
|
|
247
|
+
1. Clear browser cookies and sessions
|
|
248
|
+
2. Ensure session middleware is registered before auth router
|
|
249
|
+
3. Check that `AUTH__SESSION_SECRET` is set
|
|
250
|
+
|
|
251
|
+
## References
|
|
252
|
+
|
|
253
|
+
- [OAuth 2.1 Draft](https://datatracker.ietf.org/doc/html/draft-ietf-oauth-v2-1-11)
|
|
254
|
+
- [OIDC Core](https://openid.net/specs/openid-connect-core-1_0.html)
|
|
255
|
+
- [PKCE RFC](https://datatracker.ietf.org/doc/html/rfc7636)
|
|
256
|
+
- [Authlib Documentation](https://docs.authlib.org/en/latest/)
|
|
257
|
+
- [Google OAuth](https://developers.google.com/identity/protocols/oauth2)
|
|
258
|
+
- [Microsoft identity platform](https://learn.microsoft.com/en-us/entra/identity-platform/)
|
rem/auth/__init__.py
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
"""
|
|
2
|
+
REM Authentication Module.
|
|
3
|
+
|
|
4
|
+
OAuth 2.1 compliant authentication with support for:
|
|
5
|
+
- Google OAuth
|
|
6
|
+
- Microsoft Entra ID (Azure AD) OIDC
|
|
7
|
+
- Custom OIDC providers
|
|
8
|
+
|
|
9
|
+
Design Pattern:
|
|
10
|
+
- Provider-agnostic base classes
|
|
11
|
+
- PKCE (Proof Key for Code Exchange) for all flows
|
|
12
|
+
- State parameter for CSRF protection
|
|
13
|
+
- Nonce for ID token replay protection
|
|
14
|
+
- Token validation with JWKS
|
|
15
|
+
- Clean separation: providers/ for OAuth logic, middleware.py for FastAPI integration
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from .providers.base import OAuthProvider
|
|
19
|
+
from .providers.google import GoogleOAuthProvider
|
|
20
|
+
from .providers.microsoft import MicrosoftOAuthProvider
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
"OAuthProvider",
|
|
24
|
+
"GoogleOAuthProvider",
|
|
25
|
+
"MicrosoftOAuthProvider",
|
|
26
|
+
]
|
rem/auth/middleware.py
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""
|
|
2
|
+
OAuth Authentication Middleware for FastAPI.
|
|
3
|
+
|
|
4
|
+
Protects API endpoints by requiring valid session.
|
|
5
|
+
Redirects unauthenticated requests to login page.
|
|
6
|
+
|
|
7
|
+
Design Pattern:
|
|
8
|
+
- Check session for user on protected paths
|
|
9
|
+
- Return 401 for API calls (JSON)
|
|
10
|
+
- Redirect to login for browser requests (HTML)
|
|
11
|
+
- Exclude auth endpoints and public paths
|
|
12
|
+
|
|
13
|
+
Usage:
|
|
14
|
+
from rem.auth.middleware import AuthMiddleware
|
|
15
|
+
|
|
16
|
+
app.add_middleware(
|
|
17
|
+
AuthMiddleware,
|
|
18
|
+
protected_paths=["/api/v1"],
|
|
19
|
+
excluded_paths=["/api/auth", "/health"],
|
|
20
|
+
)
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
from starlette.middleware.base import BaseHTTPMiddleware
|
|
24
|
+
from starlette.requests import Request
|
|
25
|
+
from starlette.responses import JSONResponse, RedirectResponse
|
|
26
|
+
from loguru import logger
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class AuthMiddleware(BaseHTTPMiddleware):
|
|
30
|
+
"""
|
|
31
|
+
Authentication middleware using session-based auth.
|
|
32
|
+
|
|
33
|
+
Checks for valid user session on protected paths.
|
|
34
|
+
Compatible with OAuth flows from auth router.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
def __init__(
|
|
38
|
+
self,
|
|
39
|
+
app,
|
|
40
|
+
protected_paths: list[str] | None = None,
|
|
41
|
+
excluded_paths: list[str] | None = None,
|
|
42
|
+
):
|
|
43
|
+
"""
|
|
44
|
+
Initialize auth middleware.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
app: ASGI application
|
|
48
|
+
protected_paths: Paths that require authentication
|
|
49
|
+
excluded_paths: Paths to exclude from auth check
|
|
50
|
+
"""
|
|
51
|
+
super().__init__(app)
|
|
52
|
+
self.protected_paths = protected_paths or ["/api/v1"]
|
|
53
|
+
self.excluded_paths = excluded_paths or ["/api/auth", "/health", "/docs", "/openapi.json"]
|
|
54
|
+
|
|
55
|
+
async def dispatch(self, request: Request, call_next):
|
|
56
|
+
"""
|
|
57
|
+
Check authentication for protected paths.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
request: HTTP request
|
|
61
|
+
call_next: Next middleware in chain
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
Response (401/redirect if unauthorized, normal response if authorized)
|
|
65
|
+
"""
|
|
66
|
+
path = request.url.path
|
|
67
|
+
|
|
68
|
+
# Check if path is protected
|
|
69
|
+
is_protected = any(path.startswith(p) for p in self.protected_paths)
|
|
70
|
+
is_excluded = any(path.startswith(p) for p in self.excluded_paths)
|
|
71
|
+
|
|
72
|
+
# Skip auth check for excluded paths
|
|
73
|
+
if not is_protected or is_excluded:
|
|
74
|
+
return await call_next(request)
|
|
75
|
+
|
|
76
|
+
# Check for valid session
|
|
77
|
+
user = request.session.get("user")
|
|
78
|
+
if not user:
|
|
79
|
+
logger.warning(f"Unauthorized access attempt: {path}")
|
|
80
|
+
|
|
81
|
+
# Return 401 for API requests (JSON)
|
|
82
|
+
# Check Accept header to determine if client expects JSON
|
|
83
|
+
accept = request.headers.get("accept", "")
|
|
84
|
+
if "application/json" in accept or path.startswith("/api/"):
|
|
85
|
+
return JSONResponse(
|
|
86
|
+
status_code=401,
|
|
87
|
+
content={"detail": "Authentication required"},
|
|
88
|
+
headers={
|
|
89
|
+
"WWW-Authenticate": 'Bearer realm="REM API"',
|
|
90
|
+
},
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
# Redirect to login for browser requests
|
|
94
|
+
# TODO: Store original URL for post-login redirect
|
|
95
|
+
return RedirectResponse(url="/api/auth/google/login", status_code=302)
|
|
96
|
+
|
|
97
|
+
# Add user to request state for downstream handlers
|
|
98
|
+
request.state.user = user
|
|
99
|
+
|
|
100
|
+
return await call_next(request)
|