remdb 0.3.242__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +129 -0
- rem/agentic/README.md +760 -0
- rem/agentic/__init__.py +54 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +38 -0
- rem/agentic/agents/agent_manager.py +311 -0
- rem/agentic/agents/sse_simulator.py +502 -0
- rem/agentic/context.py +425 -0
- rem/agentic/context_builder.py +360 -0
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +273 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +240 -0
- rem/agentic/providers/phoenix.py +926 -0
- rem/agentic/providers/pydantic_ai.py +854 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +737 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +242 -0
- rem/api/README.md +657 -0
- rem/api/deps.py +253 -0
- rem/api/main.py +460 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +820 -0
- rem/api/mcp_router/server.py +243 -0
- rem/api/mcp_router/tools.py +1605 -0
- rem/api/middleware/tracking.py +172 -0
- rem/api/routers/admin.py +520 -0
- rem/api/routers/auth.py +898 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/child_streaming.py +394 -0
- rem/api/routers/chat/completions.py +702 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +202 -0
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +546 -0
- rem/api/routers/chat/streaming.py +950 -0
- rem/api/routers/chat/streaming_utils.py +327 -0
- rem/api/routers/common.py +18 -0
- rem/api/routers/dev.py +87 -0
- rem/api/routers/feedback.py +276 -0
- rem/api/routers/messages.py +620 -0
- rem/api/routers/models.py +86 -0
- rem/api/routers/query.py +362 -0
- rem/api/routers/shared_sessions.py +422 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +36 -0
- rem/auth/jwt.py +367 -0
- rem/auth/middleware.py +318 -0
- rem/auth/providers/__init__.py +16 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/email.py +215 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +517 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +299 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +549 -0
- rem/cli/commands/cluster.py +1808 -0
- rem/cli/commands/configure.py +495 -0
- rem/cli/commands/db.py +828 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1698 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +388 -0
- rem/cli/commands/query.py +109 -0
- rem/cli/commands/scaffold.py +47 -0
- rem/cli/commands/schema.py +230 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/commands/session.py +453 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +123 -0
- rem/config.py +244 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +70 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +672 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +246 -0
- rem/models/entities/__init__.py +68 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +64 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +181 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/session.py +84 -0
- rem/models/entities/shared_session.py +180 -0
- rem/models/entities/subscriber.py +175 -0
- rem/models/entities/user.py +93 -0
- rem/py.typed +0 -0
- rem/registry.py +373 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/agent-builder.yaml +235 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +132 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +18 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +760 -0
- rem/services/content/service.py +762 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +322 -0
- rem/services/dreaming/moment_service.py +251 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/email/__init__.py +10 -0
- rem/services/email/service.py +522 -0
- rem/services/email/templates.py +360 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +127 -0
- rem/services/embeddings/worker.py +435 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +960 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +757 -0
- rem/services/postgres/__init__.py +49 -0
- rem/services/postgres/diff_service.py +599 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/programmable_diff_service.py +635 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
- rem/services/postgres/register_type.py +353 -0
- rem/services/postgres/repository.py +481 -0
- rem/services/postgres/schema_generator.py +661 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +355 -0
- rem/services/rate_limit.py +113 -0
- rem/services/rem/README.md +318 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +180 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +608 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +13 -0
- rem/services/session/compression.py +488 -0
- rem/services/session/pydantic_messages.py +310 -0
- rem/services/session/reload.py +85 -0
- rem/services/user_service.py +130 -0
- rem/settings.py +1877 -0
- rem/sql/background_indexes.sql +52 -0
- rem/sql/migrations/001_install.sql +983 -0
- rem/sql/migrations/002_install_models.sql +3157 -0
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +282 -0
- rem/sql/migrations/005_schema_update.sql +145 -0
- rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +628 -0
- rem/utils/__init__.py +61 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/constants.py +97 -0
- rem/utils/date_utils.py +228 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +436 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/files.py +323 -0
- rem/utils/markdown.py +16 -0
- rem/utils/mime_types.py +158 -0
- rem/utils/model_helpers.py +492 -0
- rem/utils/schema_loader.py +649 -0
- rem/utils/sql_paths.py +146 -0
- rem/utils/sql_types.py +350 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +325 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +7 -0
- rem/workers/db_listener.py +579 -0
- rem/workers/db_maintainer.py +74 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- rem/workers/unlogged_maintainer.py +463 -0
- remdb-0.3.242.dist-info/METADATA +1632 -0
- remdb-0.3.242.dist-info/RECORD +235 -0
- remdb-0.3.242.dist-info/WHEEL +4 -0
- remdb-0.3.242.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""
|
|
2
|
+
JSON extraction utilities for response_format='json_object' mode.
|
|
3
|
+
|
|
4
|
+
Design Pattern:
|
|
5
|
+
- Best-effort JSON extraction from agent output
|
|
6
|
+
- Handles fenced code blocks (```json ... ```)
|
|
7
|
+
- Handles raw JSON objects
|
|
8
|
+
- Graceful fallback to string if extraction fails
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import json
|
|
12
|
+
import re
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def extract_json_resilient(output: str | dict | list) -> str:
|
|
16
|
+
"""
|
|
17
|
+
Extract JSON from agent output with multiple fallback strategies.
|
|
18
|
+
|
|
19
|
+
Strategies (in order):
|
|
20
|
+
1. If already dict/list, serialize directly
|
|
21
|
+
2. Extract from fenced JSON code blocks (```json ... ```)
|
|
22
|
+
3. Find JSON object/array in text ({...} or [...])
|
|
23
|
+
4. Return as-is if all strategies fail
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
output: Agent output (str, dict, or list)
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
JSON string (best-effort)
|
|
30
|
+
|
|
31
|
+
Examples:
|
|
32
|
+
>>> extract_json_resilient({"answer": "test"})
|
|
33
|
+
'{"answer": "test"}'
|
|
34
|
+
|
|
35
|
+
>>> extract_json_resilient('Here is the result:\\n```json\\n{"answer": "test"}\\n```')
|
|
36
|
+
'{"answer": "test"}'
|
|
37
|
+
|
|
38
|
+
>>> extract_json_resilient('The answer is {"answer": "test"} as shown above.')
|
|
39
|
+
'{"answer": "test"}'
|
|
40
|
+
"""
|
|
41
|
+
# Strategy 1: Already structured
|
|
42
|
+
if isinstance(output, (dict, list)):
|
|
43
|
+
return json.dumps(output)
|
|
44
|
+
|
|
45
|
+
text = str(output)
|
|
46
|
+
|
|
47
|
+
# Strategy 2: Extract from fenced code blocks
|
|
48
|
+
fenced_match = re.search(r"```json\s*\n(.*?)\n```", text, re.DOTALL)
|
|
49
|
+
if fenced_match:
|
|
50
|
+
try:
|
|
51
|
+
json_str = fenced_match.group(1).strip()
|
|
52
|
+
# Validate it's valid JSON
|
|
53
|
+
json.loads(json_str)
|
|
54
|
+
return json_str
|
|
55
|
+
except json.JSONDecodeError:
|
|
56
|
+
pass
|
|
57
|
+
|
|
58
|
+
# Strategy 3: Find JSON object or array
|
|
59
|
+
# Look for {...} or [...]
|
|
60
|
+
for pattern in [
|
|
61
|
+
r"\{[^{}]*\}", # Simple object
|
|
62
|
+
r"\{.*\}", # Nested object
|
|
63
|
+
r"\[.*\]", # Array
|
|
64
|
+
]:
|
|
65
|
+
match = re.search(pattern, text, re.DOTALL)
|
|
66
|
+
if match:
|
|
67
|
+
try:
|
|
68
|
+
json_str = match.group(0)
|
|
69
|
+
# Validate it's valid JSON
|
|
70
|
+
json.loads(json_str)
|
|
71
|
+
return json_str
|
|
72
|
+
except json.JSONDecodeError:
|
|
73
|
+
continue
|
|
74
|
+
|
|
75
|
+
# Strategy 4: Fallback to string
|
|
76
|
+
return text
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
"""
|
|
2
|
+
OpenAI-compatible API models for chat completions.
|
|
3
|
+
|
|
4
|
+
Design Pattern:
|
|
5
|
+
- Full OpenAI compatibility for drop-in replacement
|
|
6
|
+
- Support for streaming (SSE) and non-streaming modes
|
|
7
|
+
- Response format control (text vs json_object)
|
|
8
|
+
- Headers map to AgentContext for session/context control
|
|
9
|
+
- Body fields for OpenAI-compatible parameters + metadata
|
|
10
|
+
|
|
11
|
+
Headers (context control):
|
|
12
|
+
X-User-Id → context.user_id (user identifier)
|
|
13
|
+
X-Tenant-Id → context.tenant_id (multi-tenancy, default: "default")
|
|
14
|
+
X-Session-Id → context.session_id (conversation continuity)
|
|
15
|
+
X-Agent-Schema → context.agent_schema_uri (which agent to use, default: "rem")
|
|
16
|
+
X-Model-Name → context.default_model (model override)
|
|
17
|
+
X-Chat-Is-Audio → triggers audio transcription ("true"/"false")
|
|
18
|
+
X-Is-Eval → context.is_eval (marks session as evaluation, sets mode=EVALUATION)
|
|
19
|
+
|
|
20
|
+
Body Fields (OpenAI-compatible + extensions):
|
|
21
|
+
model → LLM model (e.g., "openai:gpt-4.1", "anthropic:claude-sonnet-4-5-20250929")
|
|
22
|
+
messages → Chat conversation history
|
|
23
|
+
temperature → Sampling temperature (0-2)
|
|
24
|
+
max_tokens → Max tokens (deprecated, use max_completion_tokens)
|
|
25
|
+
max_completion_tokens → Max tokens to generate
|
|
26
|
+
stream → Enable SSE streaming
|
|
27
|
+
metadata → Key-value pairs merged with session metadata (for evals/experiments)
|
|
28
|
+
store → Whether to store for distillation/evaluation
|
|
29
|
+
seed → Deterministic sampling seed
|
|
30
|
+
top_p → Nucleus sampling probability
|
|
31
|
+
reasoning_effort → low/medium/high for o-series models
|
|
32
|
+
service_tier → auto/flex/priority/default
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
from typing import Any, Literal
|
|
36
|
+
|
|
37
|
+
from pydantic import BaseModel, Field
|
|
38
|
+
|
|
39
|
+
from rem.settings import settings
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
# Request models
|
|
43
|
+
class ChatMessage(BaseModel):
|
|
44
|
+
"""OpenAI chat message format."""
|
|
45
|
+
|
|
46
|
+
role: Literal["system", "user", "assistant", "tool"]
|
|
47
|
+
content: str | None = None
|
|
48
|
+
name: str | None = None
|
|
49
|
+
tool_call_id: str | None = None
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class ResponseFormat(BaseModel):
|
|
53
|
+
"""
|
|
54
|
+
Response format specification (OpenAI-compatible).
|
|
55
|
+
|
|
56
|
+
- text: Plain text response
|
|
57
|
+
- json_object: Best-effort JSON extraction from agent output
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
type: Literal["text", "json_object"] = Field(
|
|
61
|
+
default="text",
|
|
62
|
+
description="Response format type. Use 'json_object' to enable JSON mode.",
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class ChatCompletionRequest(BaseModel):
|
|
67
|
+
"""
|
|
68
|
+
OpenAI chat completion request format.
|
|
69
|
+
|
|
70
|
+
Compatible with OpenAI's /v1/chat/completions endpoint.
|
|
71
|
+
|
|
72
|
+
Headers Map to AgentContext:
|
|
73
|
+
X-User-Id → context.user_id
|
|
74
|
+
X-Tenant-Id → context.tenant_id (default: "default")
|
|
75
|
+
X-Session-Id → context.session_id
|
|
76
|
+
X-Agent-Schema → context.agent_schema_uri (default: "rem")
|
|
77
|
+
X-Model-Name → context.default_model
|
|
78
|
+
X-Chat-Is-Audio → triggers audio transcription
|
|
79
|
+
X-Is-Eval → context.is_eval (sets session mode=EVALUATION)
|
|
80
|
+
|
|
81
|
+
Body Fields for Metadata/Evals:
|
|
82
|
+
metadata → Key-value pairs merged with session metadata
|
|
83
|
+
store → Whether to store for distillation/evaluation
|
|
84
|
+
|
|
85
|
+
Note: Model is specified in body.model (standard OpenAI field), not headers.
|
|
86
|
+
"""
|
|
87
|
+
|
|
88
|
+
# TODO: default should come from settings.llm.default_model at request time
|
|
89
|
+
# Using None and resolving in endpoint to avoid import-time settings evaluation
|
|
90
|
+
model: str | None = Field(
|
|
91
|
+
default=None,
|
|
92
|
+
description="Model to use. Defaults to LLM__DEFAULT_MODEL from settings.",
|
|
93
|
+
)
|
|
94
|
+
messages: list[ChatMessage] = Field(description="Chat conversation history")
|
|
95
|
+
temperature: float | None = Field(default=None, ge=0, le=2)
|
|
96
|
+
max_tokens: int | None = Field(default=None, ge=1)
|
|
97
|
+
stream: bool = Field(default=False, description="Enable SSE streaming")
|
|
98
|
+
n: int | None = Field(default=1, ge=1, le=1, description="Number of completions (must be 1)")
|
|
99
|
+
stop: str | list[str] | None = None
|
|
100
|
+
presence_penalty: float | None = Field(default=None, ge=-2, le=2)
|
|
101
|
+
frequency_penalty: float | None = Field(default=None, ge=-2, le=2)
|
|
102
|
+
user: str | None = Field(default=None, description="Unique user identifier")
|
|
103
|
+
response_format: ResponseFormat | None = Field(
|
|
104
|
+
default=None,
|
|
105
|
+
description="Response format. Set type='json_object' to enable JSON mode.",
|
|
106
|
+
)
|
|
107
|
+
# Additional OpenAI-compatible fields
|
|
108
|
+
metadata: dict[str, str] | None = Field(
|
|
109
|
+
default=None,
|
|
110
|
+
description="Key-value pairs attached to the request (max 16 keys, 64/512 char limits). "
|
|
111
|
+
"Merged with session metadata for persistence.",
|
|
112
|
+
)
|
|
113
|
+
store: bool | None = Field(
|
|
114
|
+
default=None,
|
|
115
|
+
description="Whether to store for distillation/evaluation purposes.",
|
|
116
|
+
)
|
|
117
|
+
max_completion_tokens: int | None = Field(
|
|
118
|
+
default=None,
|
|
119
|
+
ge=1,
|
|
120
|
+
description="Max tokens to generate (replaces deprecated max_tokens).",
|
|
121
|
+
)
|
|
122
|
+
seed: int | None = Field(
|
|
123
|
+
default=None,
|
|
124
|
+
description="Seed for deterministic sampling (best effort).",
|
|
125
|
+
)
|
|
126
|
+
top_p: float | None = Field(
|
|
127
|
+
default=None,
|
|
128
|
+
ge=0,
|
|
129
|
+
le=1,
|
|
130
|
+
description="Nucleus sampling probability. Use temperature OR top_p, not both.",
|
|
131
|
+
)
|
|
132
|
+
logprobs: bool | None = Field(
|
|
133
|
+
default=None,
|
|
134
|
+
description="Whether to return log probabilities for output tokens.",
|
|
135
|
+
)
|
|
136
|
+
top_logprobs: int | None = Field(
|
|
137
|
+
default=None,
|
|
138
|
+
ge=0,
|
|
139
|
+
le=20,
|
|
140
|
+
description="Number of most likely tokens to return at each position (requires logprobs=true).",
|
|
141
|
+
)
|
|
142
|
+
reasoning_effort: Literal["low", "medium", "high"] | None = Field(
|
|
143
|
+
default=None,
|
|
144
|
+
description="Reasoning effort for o-series models (low/medium/high).",
|
|
145
|
+
)
|
|
146
|
+
service_tier: Literal["auto", "flex", "priority", "default"] | None = Field(
|
|
147
|
+
default=None,
|
|
148
|
+
description="Service tier for processing (flex is 50% cheaper but slower).",
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
# Response models
|
|
153
|
+
class ChatCompletionUsage(BaseModel):
|
|
154
|
+
"""Token usage statistics."""
|
|
155
|
+
|
|
156
|
+
prompt_tokens: int
|
|
157
|
+
completion_tokens: int
|
|
158
|
+
total_tokens: int
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
class ChatCompletionMessageDelta(BaseModel):
|
|
162
|
+
"""Streaming delta for chat completion."""
|
|
163
|
+
|
|
164
|
+
role: Literal["system", "user", "assistant"] | None = None
|
|
165
|
+
content: str | None = None
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
class ChatCompletionChoice(BaseModel):
|
|
169
|
+
"""Chat completion choice (non-streaming)."""
|
|
170
|
+
|
|
171
|
+
index: int
|
|
172
|
+
message: ChatMessage
|
|
173
|
+
finish_reason: Literal["stop", "length", "content_filter", "tool_calls"] | None
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
class ChatCompletionStreamChoice(BaseModel):
|
|
177
|
+
"""Chat completion choice (streaming)."""
|
|
178
|
+
|
|
179
|
+
index: int
|
|
180
|
+
delta: ChatCompletionMessageDelta
|
|
181
|
+
finish_reason: Literal["stop", "length", "content_filter"] | None = None
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
class ChatCompletionResponse(BaseModel):
|
|
185
|
+
"""OpenAI chat completion response (non-streaming)."""
|
|
186
|
+
|
|
187
|
+
id: str
|
|
188
|
+
object: Literal["chat.completion"] = "chat.completion"
|
|
189
|
+
created: int
|
|
190
|
+
model: str
|
|
191
|
+
choices: list[ChatCompletionChoice]
|
|
192
|
+
usage: ChatCompletionUsage
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
class ChatCompletionStreamResponse(BaseModel):
|
|
196
|
+
"""OpenAI chat completion chunk (streaming)."""
|
|
197
|
+
|
|
198
|
+
id: str
|
|
199
|
+
object: Literal["chat.completion.chunk"] = "chat.completion.chunk"
|
|
200
|
+
created: int
|
|
201
|
+
model: str
|
|
202
|
+
choices: list[ChatCompletionStreamChoice]
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""OTEL utilities for chat routers."""
|
|
2
|
+
|
|
3
|
+
from loguru import logger
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def get_tracer():
|
|
7
|
+
"""Get the OpenTelemetry tracer for chat completions."""
|
|
8
|
+
try:
|
|
9
|
+
from opentelemetry import trace
|
|
10
|
+
return trace.get_tracer("rem.chat.completions")
|
|
11
|
+
except Exception:
|
|
12
|
+
return None
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def get_current_trace_context() -> tuple[str | None, str | None]:
|
|
16
|
+
"""Get trace_id and span_id from current OTEL context.
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
Tuple of (trace_id, span_id) as hex strings, or (None, None) if not available.
|
|
20
|
+
"""
|
|
21
|
+
try:
|
|
22
|
+
from opentelemetry import trace
|
|
23
|
+
|
|
24
|
+
span = trace.get_current_span()
|
|
25
|
+
ctx = span.get_span_context()
|
|
26
|
+
if ctx.is_valid:
|
|
27
|
+
trace_id = format(ctx.trace_id, '032x')
|
|
28
|
+
span_id = format(ctx.span_id, '016x')
|
|
29
|
+
return trace_id, span_id
|
|
30
|
+
except Exception as e:
|
|
31
|
+
logger.debug(f"Could not get trace context: {e}")
|
|
32
|
+
|
|
33
|
+
return None, None
|