crewplus 0.2.89__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crewplus/__init__.py +10 -0
- crewplus/callbacks/__init__.py +1 -0
- crewplus/callbacks/async_langfuse_handler.py +166 -0
- crewplus/services/__init__.py +21 -0
- crewplus/services/azure_chat_model.py +145 -0
- crewplus/services/feedback.md +55 -0
- crewplus/services/feedback_manager.py +267 -0
- crewplus/services/gemini_chat_model.py +884 -0
- crewplus/services/init_services.py +57 -0
- crewplus/services/model_load_balancer.py +264 -0
- crewplus/services/schemas/feedback.py +61 -0
- crewplus/services/tracing_manager.py +182 -0
- crewplus/utils/__init__.py +4 -0
- crewplus/utils/schema_action.py +7 -0
- crewplus/utils/schema_document_updater.py +173 -0
- crewplus/utils/tracing_util.py +55 -0
- crewplus/vectorstores/milvus/__init__.py +5 -0
- crewplus/vectorstores/milvus/milvus_schema_manager.py +270 -0
- crewplus/vectorstores/milvus/schema_milvus.py +586 -0
- crewplus/vectorstores/milvus/vdb_service.py +917 -0
- crewplus-0.2.89.dist-info/METADATA +144 -0
- crewplus-0.2.89.dist-info/RECORD +29 -0
- crewplus-0.2.89.dist-info/WHEEL +4 -0
- crewplus-0.2.89.dist-info/entry_points.txt +4 -0
- crewplus-0.2.89.dist-info/licenses/LICENSE +21 -0
- docs/GeminiChatModel.md +247 -0
- docs/ModelLoadBalancer.md +134 -0
- docs/VDBService.md +238 -0
- docs/index.md +23 -0
crewplus/__init__.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
from .services.gemini_chat_model import GeminiChatModel
|
|
2
|
+
from .services.model_load_balancer import ModelLoadBalancer
|
|
3
|
+
from .vectorstores.milvus import SchemaMilvus, VDBService
|
|
4
|
+
|
|
5
|
+
__all__ = [
|
|
6
|
+
"GeminiChatModel",
|
|
7
|
+
"ModelLoadBalancer",
|
|
8
|
+
"SchemaMilvus",
|
|
9
|
+
"VDBService"
|
|
10
|
+
]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# This file makes the 'callbacks' directory a Python package.
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
# File: crewplus/callbacks/async_langfuse_handler.py
|
|
2
|
+
import asyncio
|
|
3
|
+
import contextvars
|
|
4
|
+
from contextlib import contextmanager
|
|
5
|
+
from typing import Any, Dict, List, Union, Optional, Sequence
|
|
6
|
+
from uuid import UUID
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
from langfuse.langchain import CallbackHandler as LangfuseCallbackHandler
|
|
10
|
+
from langchain_core.callbacks import AsyncCallbackHandler
|
|
11
|
+
from langchain_core.outputs import LLMResult, ChatGeneration
|
|
12
|
+
from langchain_core.messages import BaseMessage
|
|
13
|
+
from langchain.schema.agent import AgentAction, AgentFinish
|
|
14
|
+
from langchain.schema.document import Document
|
|
15
|
+
LANGFUSE_AVAILABLE = True
|
|
16
|
+
except ImportError:
|
|
17
|
+
LANGFUSE_AVAILABLE = False
|
|
18
|
+
LangfuseCallbackHandler = None
|
|
19
|
+
AsyncCallbackHandler = object
|
|
20
|
+
# Define dummy types if langchain is not available
|
|
21
|
+
LLMResult = object
|
|
22
|
+
BaseMessage = object
|
|
23
|
+
AgentAction = object
|
|
24
|
+
AgentFinish = object
|
|
25
|
+
Document = object
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
_ASYNC_CONTEXT_TOKEN = "in_async_context"
|
|
29
|
+
in_async_context = contextvars.ContextVar(_ASYNC_CONTEXT_TOKEN, default=False)
|
|
30
|
+
|
|
31
|
+
@contextmanager
|
|
32
|
+
def async_context():
|
|
33
|
+
"""A context manager to signal that we are in an async execution context."""
|
|
34
|
+
token = in_async_context.set(True)
|
|
35
|
+
try:
|
|
36
|
+
yield
|
|
37
|
+
finally:
|
|
38
|
+
in_async_context.reset(token)
|
|
39
|
+
|
|
40
|
+
class AsyncLangfuseCallbackHandler(AsyncCallbackHandler):
|
|
41
|
+
"""
|
|
42
|
+
Wraps the synchronous LangfuseCallbackHandler to make it fully compatible with
|
|
43
|
+
LangChain's async methods by handling all relevant events.
|
|
44
|
+
"""
|
|
45
|
+
def __init__(self, sync_handler: Optional[LangfuseCallbackHandler] = None, *args: Any, **kwargs: Any):
|
|
46
|
+
if not LANGFUSE_AVAILABLE:
|
|
47
|
+
raise ImportError("Langfuse is not available. Please install it with 'pip install langfuse'")
|
|
48
|
+
|
|
49
|
+
if sync_handler:
|
|
50
|
+
self.sync_handler = sync_handler
|
|
51
|
+
else:
|
|
52
|
+
self.sync_handler = LangfuseCallbackHandler(*args, **kwargs)
|
|
53
|
+
|
|
54
|
+
def __getattr__(self, name: str) -> Any:
|
|
55
|
+
return getattr(self.sync_handler, name)
|
|
56
|
+
|
|
57
|
+
# LLM Events
|
|
58
|
+
async def on_llm_start(
|
|
59
|
+
self, serialized: Dict[str, Any], prompts: List[str], *, run_id: UUID, parent_run_id: Optional[UUID] = None, **kwargs: Any
|
|
60
|
+
) -> None:
|
|
61
|
+
corrected_prompts = prompts if isinstance(prompts, list) else [prompts]
|
|
62
|
+
await asyncio.to_thread(
|
|
63
|
+
self.sync_handler.on_llm_start, serialized, corrected_prompts, run_id=run_id, parent_run_id=parent_run_id, **kwargs
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
async def on_chat_model_start(
|
|
67
|
+
self, serialized: Dict[str, Any], messages: List[List[BaseMessage]], *, run_id: UUID, parent_run_id: Optional[UUID] = None, **kwargs: Any
|
|
68
|
+
) -> Any:
|
|
69
|
+
await asyncio.to_thread(
|
|
70
|
+
self.sync_handler.on_chat_model_start, serialized, messages, run_id=run_id, parent_run_id=parent_run_id, **kwargs
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
async def on_llm_end(
|
|
74
|
+
self, response: LLMResult, *, run_id: UUID, parent_run_id: Optional[UUID] = None, **kwargs: Any
|
|
75
|
+
) -> None:
|
|
76
|
+
await asyncio.to_thread(
|
|
77
|
+
self.sync_handler.on_llm_end, response, run_id=run_id, parent_run_id=parent_run_id, **kwargs
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
async def on_llm_error(
|
|
81
|
+
self, error: Union[Exception, KeyboardInterrupt], *, run_id: UUID, parent_run_id: Optional[UUID] = None, **kwargs: Any
|
|
82
|
+
) -> None:
|
|
83
|
+
await asyncio.to_thread(
|
|
84
|
+
self.sync_handler.on_llm_error, error, run_id=run_id, parent_run_id=parent_run_id, **kwargs
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
# Chain Events
|
|
88
|
+
async def on_chain_start(
|
|
89
|
+
self, serialized: Dict[str, Any], inputs: Dict[str, Any], *, run_id: UUID, parent_run_id: Optional[UUID] = None, **kwargs: Any
|
|
90
|
+
) -> Any:
|
|
91
|
+
await asyncio.to_thread(
|
|
92
|
+
self.sync_handler.on_chain_start, serialized, inputs, run_id=run_id, parent_run_id=parent_run_id, **kwargs
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
async def on_chain_end(
|
|
96
|
+
self, outputs: Dict[str, Any], *, run_id: UUID, parent_run_id: Optional[UUID] = None, **kwargs: Any
|
|
97
|
+
) -> Any:
|
|
98
|
+
await asyncio.to_thread(
|
|
99
|
+
self.sync_handler.on_chain_end, outputs, run_id=run_id, parent_run_id=parent_run_id, **kwargs
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
async def on_chain_error(
|
|
103
|
+
self, error: Union[Exception, KeyboardInterrupt], *, run_id: UUID, parent_run_id: Optional[UUID] = None, **kwargs: Any
|
|
104
|
+
) -> Any:
|
|
105
|
+
await asyncio.to_thread(
|
|
106
|
+
self.sync_handler.on_chain_error, error, run_id=run_id, parent_run_id=parent_run_id, **kwargs
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# Tool Events
|
|
110
|
+
async def on_tool_start(
|
|
111
|
+
self, serialized: Dict[str, Any], input_str: str, *, run_id: UUID, parent_run_id: Optional[UUID] = None, **kwargs: Any
|
|
112
|
+
) -> Any:
|
|
113
|
+
await asyncio.to_thread(
|
|
114
|
+
self.sync_handler.on_tool_start, serialized, input_str, run_id=run_id, parent_run_id=parent_run_id, **kwargs
|
|
115
|
+
)
|
|
116
|
+
|
|
117
|
+
async def on_tool_end(
|
|
118
|
+
self, output: str, *, run_id: UUID, parent_run_id: Optional[UUID] = None, **kwargs: Any
|
|
119
|
+
) -> Any:
|
|
120
|
+
await asyncio.to_thread(
|
|
121
|
+
self.sync_handler.on_tool_end, output, run_id=run_id, parent_run_id=parent_run_id, **kwargs
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
async def on_tool_error(
|
|
125
|
+
self, error: Union[Exception, KeyboardInterrupt], *, run_id: UUID, parent_run_id: Optional[UUID] = None, **kwargs: Any
|
|
126
|
+
) -> Any:
|
|
127
|
+
await asyncio.to_thread(
|
|
128
|
+
self.sync_handler.on_tool_error, error, run_id=run_id, parent_run_id=parent_run_id, **kwargs
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
# Retriever Events
|
|
132
|
+
async def on_retriever_start(
|
|
133
|
+
self, serialized: Dict[str, Any], query: str, *, run_id: UUID, parent_run_id: Optional[UUID] = None, **kwargs: Any
|
|
134
|
+
) -> Any:
|
|
135
|
+
await asyncio.to_thread(
|
|
136
|
+
self.sync_handler.on_retriever_start, serialized, query, run_id=run_id, parent_run_id=parent_run_id, **kwargs
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
async def on_retriever_end(
|
|
140
|
+
self, documents: Sequence[Document], *, run_id: UUID, parent_run_id: Optional[UUID] = None, **kwargs: Any
|
|
141
|
+
) -> Any:
|
|
142
|
+
await asyncio.to_thread(
|
|
143
|
+
self.sync_handler.on_retriever_end, documents, run_id=run_id, parent_run_id=parent_run_id, **kwargs
|
|
144
|
+
)
|
|
145
|
+
|
|
146
|
+
async def on_retriever_error(
|
|
147
|
+
self, error: Union[Exception, KeyboardInterrupt], *, run_id: UUID, parent_run_id: Optional[UUID] = None, **kwargs: Any
|
|
148
|
+
) -> Any:
|
|
149
|
+
await asyncio.to_thread(
|
|
150
|
+
self.sync_handler.on_retriever_error, error, run_id=run_id, parent_run_id=parent_run_id, **kwargs
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
# Agent Events
|
|
154
|
+
async def on_agent_action(
|
|
155
|
+
self, action: AgentAction, *, run_id: UUID, parent_run_id: Optional[UUID] = None, **kwargs: Any
|
|
156
|
+
) -> Any:
|
|
157
|
+
await asyncio.to_thread(
|
|
158
|
+
self.sync_handler.on_agent_action, action, run_id=run_id, parent_run_id=parent_run_id, **kwargs
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
async def on_agent_finish(
|
|
162
|
+
self, finish: AgentFinish, *, run_id: UUID, parent_run_id: Optional[UUID] = None, **kwargs: Any
|
|
163
|
+
) -> Any:
|
|
164
|
+
await asyncio.to_thread(
|
|
165
|
+
self.sync_handler.on_agent_finish, finish, run_id=run_id, parent_run_id=parent_run_id, **kwargs
|
|
166
|
+
)
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from .gemini_chat_model import GeminiChatModel
|
|
2
|
+
from .init_services import init_load_balancer, get_model_balancer
|
|
3
|
+
from .model_load_balancer import ModelLoadBalancer
|
|
4
|
+
from .azure_chat_model import TracedAzureChatOpenAI
|
|
5
|
+
from .feedback_manager import LangfuseFeedbackManager
|
|
6
|
+
from .schemas.feedback import FeedbackIn, FeedbackUpdate, FeedbackOut
|
|
7
|
+
from .tracing_manager import TracingManager, TracingContext
|
|
8
|
+
|
|
9
|
+
__all__ = [
|
|
10
|
+
"GeminiChatModel",
|
|
11
|
+
"init_load_balancer",
|
|
12
|
+
"get_model_balancer",
|
|
13
|
+
"ModelLoadBalancer",
|
|
14
|
+
"TracedAzureChatOpenAI",
|
|
15
|
+
"LangfuseFeedbackManager",
|
|
16
|
+
"FeedbackIn",
|
|
17
|
+
"FeedbackUpdate",
|
|
18
|
+
"FeedbackOut",
|
|
19
|
+
"TracingManager",
|
|
20
|
+
"TracingContext"
|
|
21
|
+
]
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import logging
|
|
3
|
+
from typing import Any, Optional
|
|
4
|
+
|
|
5
|
+
from langchain_openai.chat_models.azure import AzureChatOpenAI
|
|
6
|
+
from pydantic import Field
|
|
7
|
+
from .tracing_manager import TracingManager, TracingContext
|
|
8
|
+
|
|
9
|
+
class TracedAzureChatOpenAI(AzureChatOpenAI):
|
|
10
|
+
"""
|
|
11
|
+
Wrapper for AzureChatOpenAI that integrates with tracing services like Langfuse.
|
|
12
|
+
|
|
13
|
+
This class automatically handles callback integration, making it easier
|
|
14
|
+
to trace and debug your interactions with the Azure OpenAI service.
|
|
15
|
+
|
|
16
|
+
**Tracing Integration (e.g., Langfuse):**
|
|
17
|
+
Tracing is automatically enabled when the respective environment variables are set.
|
|
18
|
+
For Langfuse:
|
|
19
|
+
- LANGFUSE_PUBLIC_KEY: Your Langfuse public key
|
|
20
|
+
- LANGFUSE_SECRET_KEY: Your Langfuse secret key
|
|
21
|
+
- LANGFUSE_HOST: Langfuse host URL (optional, defaults to https://cloud.langfuse.com)
|
|
22
|
+
|
|
23
|
+
You can explicitly control this with the `enable_tracing` parameter or disable
|
|
24
|
+
it for specific calls by adding `{"metadata": {"tracing_disabled": True}}`
|
|
25
|
+
to the `config` argument.
|
|
26
|
+
|
|
27
|
+
Attributes:
|
|
28
|
+
logger (Optional[logging.Logger]): An optional logger instance.
|
|
29
|
+
enable_tracing (Optional[bool]): Enable/disable tracing (auto-detect if None).
|
|
30
|
+
|
|
31
|
+
Example:
|
|
32
|
+
.. code-block:: python
|
|
33
|
+
|
|
34
|
+
# Set Langfuse environment variables (optional)
|
|
35
|
+
import os
|
|
36
|
+
os.environ["LANGFUSE_PUBLIC_KEY"] = "pk-lf-..."
|
|
37
|
+
os.environ["LANGFUSE_SECRET_KEY"] = "sk-lf-..."
|
|
38
|
+
|
|
39
|
+
from crewplus.services.azure_chat_model import TracedAzureChatOpenAI
|
|
40
|
+
from langchain_core.messages import HumanMessage
|
|
41
|
+
|
|
42
|
+
# Initialize the model
|
|
43
|
+
model = TracedAzureChatOpenAI(
|
|
44
|
+
azure_deployment="your-deployment",
|
|
45
|
+
api_version="2024-05-01-preview",
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
# --- Text-only usage (automatically traced if env vars set) ---
|
|
49
|
+
response = model.invoke("Hello, how are you?")
|
|
50
|
+
print("Text response:", response.content)
|
|
51
|
+
|
|
52
|
+
# --- Tracing with session/user tracking (for Langfuse) ---
|
|
53
|
+
response = model.invoke(
|
|
54
|
+
"What is AI?",
|
|
55
|
+
config={
|
|
56
|
+
"metadata": {
|
|
57
|
+
"langfuse_session_id": "chat-session-123",
|
|
58
|
+
"user_id": "user-456"
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# --- Disable tracing for a specific call ---
|
|
64
|
+
response = model.invoke(
|
|
65
|
+
"Hello without tracing",
|
|
66
|
+
config={"metadata": {"tracing_disabled": True}}
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# --- Asynchronous Streaming Usage ---
|
|
70
|
+
import asyncio
|
|
71
|
+
from langchain_core.messages import HumanMessage
|
|
72
|
+
|
|
73
|
+
async def main():
|
|
74
|
+
messages = [HumanMessage(content="Tell me a short story about a brave robot.")]
|
|
75
|
+
print("\nAsync Streaming response:")
|
|
76
|
+
async for chunk in model.astream(messages):
|
|
77
|
+
print(chunk.content, end="", flush=True)
|
|
78
|
+
print()
|
|
79
|
+
|
|
80
|
+
# In a real application, you would run this with:
|
|
81
|
+
# asyncio.run(main())
|
|
82
|
+
"""
|
|
83
|
+
logger: Optional[logging.Logger] = Field(default=None, description="Optional logger instance", exclude=True)
|
|
84
|
+
enable_tracing: Optional[bool] = Field(default=None, description="Enable tracing (auto-detect if None)")
|
|
85
|
+
|
|
86
|
+
_tracing_manager: Optional[TracingManager] = None
|
|
87
|
+
|
|
88
|
+
def __init__(self, **kwargs: Any):
|
|
89
|
+
super().__init__(**kwargs)
|
|
90
|
+
|
|
91
|
+
# Initialize logger
|
|
92
|
+
if self.logger is None:
|
|
93
|
+
self.logger = logging.getLogger(f"{self.__class__.__module__}.{self.__class__.__name__}")
|
|
94
|
+
if not self.logger.handlers:
|
|
95
|
+
self.logger.addHandler(logging.StreamHandler())
|
|
96
|
+
self.logger.setLevel(logging.INFO)
|
|
97
|
+
|
|
98
|
+
self._tracing_manager = TracingManager(self)
|
|
99
|
+
|
|
100
|
+
def get_model_identifier(self) -> str:
|
|
101
|
+
"""Return a string identifying this model for tracing and logging."""
|
|
102
|
+
return f"{self.__class__.__name__} (deployment='{self.deployment_name}')"
|
|
103
|
+
|
|
104
|
+
def _prepare_stream_kwargs(self, kwargs: Optional[dict], *, async_mode: bool) -> dict:
|
|
105
|
+
"""
|
|
106
|
+
Inject stream_options for Langfuse usage tracking only when streaming is supported.
|
|
107
|
+
Avoids passing illegal stream_options to non-streaming requests (which causes 400s).
|
|
108
|
+
"""
|
|
109
|
+
final_kwargs = dict(kwargs or {})
|
|
110
|
+
|
|
111
|
+
probe_kwargs = {**final_kwargs, "stream": final_kwargs.get("stream", True)}
|
|
112
|
+
|
|
113
|
+
# Older or mocked BaseChatModel variants might not expose _should_stream;
|
|
114
|
+
# if AttributeError is raised, default to previous behavior (assume streaming)
|
|
115
|
+
# so Langfuse usage tracking remains enabled instead of silently disabling it.
|
|
116
|
+
try:
|
|
117
|
+
will_stream = self._should_stream(async_api=async_mode, **probe_kwargs)
|
|
118
|
+
except AttributeError:
|
|
119
|
+
will_stream = True
|
|
120
|
+
|
|
121
|
+
if will_stream:
|
|
122
|
+
stream_options = dict(final_kwargs.get("stream_options") or {})
|
|
123
|
+
stream_options["include_usage"] = True
|
|
124
|
+
final_kwargs["stream_options"] = stream_options
|
|
125
|
+
|
|
126
|
+
return final_kwargs
|
|
127
|
+
|
|
128
|
+
def invoke(self, input, config=None, **kwargs):
|
|
129
|
+
config = self._tracing_manager.add_sync_callbacks_to_config(config)
|
|
130
|
+
return super().invoke(input, config=config, **kwargs)
|
|
131
|
+
|
|
132
|
+
async def ainvoke(self, input, config=None, **kwargs):
|
|
133
|
+
config = self._tracing_manager.add_async_callbacks_to_config(config)
|
|
134
|
+
return await super().ainvoke(input, config=config, **kwargs)
|
|
135
|
+
|
|
136
|
+
def stream(self, input, config=None, **kwargs):
|
|
137
|
+
kwargs = self._prepare_stream_kwargs(kwargs, async_mode=False)
|
|
138
|
+
config = self._tracing_manager.add_sync_callbacks_to_config(config)
|
|
139
|
+
yield from super().stream(input, config=config, **kwargs)
|
|
140
|
+
|
|
141
|
+
async def astream(self, input, config=None, **kwargs) :
|
|
142
|
+
kwargs = self._prepare_stream_kwargs(kwargs, async_mode=True)
|
|
143
|
+
config = self._tracing_manager.add_async_callbacks_to_config(config)
|
|
144
|
+
async for chunk in super().astream(input, config=config, **kwargs):
|
|
145
|
+
yield chunk
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
# Feedback Management with Langfuse
|
|
2
|
+
|
|
3
|
+
This document outlines the design and implementation of the Langsmith-style feedback system using Langfuse as the backend.
|
|
4
|
+
|
|
5
|
+
## Design Approach: Parent Observation for Feedback Groups
|
|
6
|
+
|
|
7
|
+
To replicate the grouped feedback functionality seen in Langsmith, we use Langfuse's hierarchical tracing structure. This approach is called the "Parent Observation" model.
|
|
8
|
+
|
|
9
|
+
- **Feedback Group**: A batch of feedback items submitted together (e.g., a user rating multiple aspects of a response) is represented as a single **`Span`** (also called an Observation) in Langfuse. This `Span` acts as a container for the group.
|
|
10
|
+
- **Group-Level Data**: All metadata that applies to the entire group, such as `correction` data or a unique `feedback_group_id`, is stored in the `metadata` of this parent `Span`.
|
|
11
|
+
- **Individual Feedback**: Each individual feedback item (e.g., "helpfulness: 0.9", "safety: true") is represented as a **`Score`** object.
|
|
12
|
+
- **Linking**: Crucially, each `Score` is linked to the parent `Span` via its `observation_id`. This creates the hierarchy that is visible in the Langfuse UI.
|
|
13
|
+
|
|
14
|
+
This design provides a clean UI representation and a semantically correct data model.
|
|
15
|
+
|
|
16
|
+
### Example
|
|
17
|
+
|
|
18
|
+
A user submits feedback for a chatbot response (`run_id: "trace-123"`):
|
|
19
|
+
- **Helpfulness**: 9/10
|
|
20
|
+
- **Safety**: Safe (True)
|
|
21
|
+
- **Correction**: The answer should have included details about X.
|
|
22
|
+
|
|
23
|
+
This is modeled in Langfuse as:
|
|
24
|
+
|
|
25
|
+
1. A **`Span`** is created in trace `trace-123`.
|
|
26
|
+
- `name`: "user-feedback-group"
|
|
27
|
+
- `metadata`: `{ "feedback_group_id": "fg-abc", "correction": { "expected": "..." } }`
|
|
28
|
+
- This span gets a unique `observation_id`, e.g., `"obs-xyz"`.
|
|
29
|
+
2. Two **`Score`** objects are created:
|
|
30
|
+
- Score 1:
|
|
31
|
+
- `name`: "helpfulness"
|
|
32
|
+
- `value`: `0.9`
|
|
33
|
+
- `trace_id`: "trace-123"
|
|
34
|
+
- `observation_id`: `"obs-xyz"` (links to the parent span)
|
|
35
|
+
- Score 2:
|
|
36
|
+
- `name`: "safety"
|
|
37
|
+
- `value`: `1.0` (boolean `True` is converted)
|
|
38
|
+
- `trace_id`: "trace-123"
|
|
39
|
+
- `observation_id`: `"obs-xyz"` (links to the parent span)
|
|
40
|
+
|
|
41
|
+
## Data Mapping: Langfuse API to `FeedbackOut` Schema
|
|
42
|
+
|
|
43
|
+
The `read_feedback` method in `LangfuseFeedbackManager` fetches data from multiple Langfuse objects and combines them into a single, consistent `FeedbackOut` Pydantic model.
|
|
44
|
+
|
|
45
|
+
| `FeedbackOut` Field | Source of Data | Example Value |
|
|
46
|
+
| --------------------- | ------------------------------------------------ | ---------------------------------------------- |
|
|
47
|
+
| `feedback_id` | `score.id` | `'sc-12345'` |
|
|
48
|
+
| `run_id` | `score.trace_id` | `'tr-abcde'` |
|
|
49
|
+
| `key` | `score.name` | `'helpfulness'` |
|
|
50
|
+
| `score` | `score.value` | `0.9` |
|
|
51
|
+
| `value` | `score.metadata.get('value_dict')` or `score.value` | `{'direction': 'up'}` or `'up'` |
|
|
52
|
+
| `comment` | `score.comment` | `'Very helpful.'` |
|
|
53
|
+
| `created_at` | `score.timestamp` | `datetime.datetime(...)` |
|
|
54
|
+
| `correction` | `parent_observation.metadata.get('correction')` | `{'expected': 'A perfect answer.'}` |
|
|
55
|
+
| `feedback_group_id` | `parent_observation.metadata.get('feedback_group_id')` | `'fg-xyz-789'` |
|
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
from typing import Optional, List, Union, Dict, Any
|
|
3
|
+
from uuid import UUID, uuid4
|
|
4
|
+
import datetime
|
|
5
|
+
|
|
6
|
+
from langfuse import Langfuse
|
|
7
|
+
from langfuse.types import TraceContext
|
|
8
|
+
from .schemas.feedback import FeedbackIn, FeedbackUpdate, FeedbackOut
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
class LangfuseFeedbackManager:
|
|
13
|
+
"""
|
|
14
|
+
Manages CRUD operations for Langsmith-style feedback using Langfuse as the backend.
|
|
15
|
+
|
|
16
|
+
This manager uses the "Parent Observation" approach, where a batch of feedback
|
|
17
|
+
is represented by a parent Span, and each individual feedback item is a Score
|
|
18
|
+
attached to that Span. It is designed to work with an existing trace (run).
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, langfuse_client: Langfuse):
|
|
22
|
+
"""
|
|
23
|
+
Initializes the feedback manager with a Langfuse client instance.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
langfuse_client: An initialized Langfuse client.
|
|
27
|
+
"""
|
|
28
|
+
if not langfuse_client:
|
|
29
|
+
raise ValueError("A valid Langfuse client instance is required.")
|
|
30
|
+
self.client = langfuse_client
|
|
31
|
+
|
|
32
|
+
def read_feedback(self, feedback_id: str) -> Optional[FeedbackOut]:
|
|
33
|
+
"""
|
|
34
|
+
Reads a single feedback item (a Score) and enriches it with data
|
|
35
|
+
from its parent observation (the feedback group).
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
feedback_id: The unique ID of the feedback item (score).
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
A FeedbackOut object containing the full context, or None if not found.
|
|
42
|
+
"""
|
|
43
|
+
try:
|
|
44
|
+
# Step 1: Fetch the score itself using the correct v2 client and method.
|
|
45
|
+
# The get method expects a list of IDs and returns a response object.
|
|
46
|
+
scores_response = self.client.api.score_v_2.get(score_ids=[feedback_id])
|
|
47
|
+
if not scores_response or not scores_response.data:
|
|
48
|
+
logger.warning(f"No score found for feedback_id: {feedback_id}")
|
|
49
|
+
return None
|
|
50
|
+
|
|
51
|
+
score = scores_response.data[0]
|
|
52
|
+
|
|
53
|
+
correction = None
|
|
54
|
+
feedback_group_id = None
|
|
55
|
+
value = score.value
|
|
56
|
+
run_id_to_return = score.trace_id
|
|
57
|
+
|
|
58
|
+
# Step 2: Fetch the parent observation to get group context
|
|
59
|
+
if score.observation_id:
|
|
60
|
+
parent_obs = self.client.api.observations.get(observation_id=score.observation_id)
|
|
61
|
+
if parent_obs.metadata:
|
|
62
|
+
correction = parent_obs.metadata.get("correction")
|
|
63
|
+
feedback_group_id = parent_obs.metadata.get("feedback_group_id")
|
|
64
|
+
original_run_id = parent_obs.metadata.get("original_run_id")
|
|
65
|
+
if original_run_id:
|
|
66
|
+
run_id_to_return = original_run_id
|
|
67
|
+
|
|
68
|
+
# Check if the original value was a dict stored in the score's metadata
|
|
69
|
+
if score.metadata and "value_dict" in score.metadata:
|
|
70
|
+
value = score.metadata["value_dict"]
|
|
71
|
+
|
|
72
|
+
# Step 3: Construct the rich FeedbackOut object from the collected data.
|
|
73
|
+
# This is where the raw data is parsed into your desired schema.
|
|
74
|
+
return FeedbackOut(
|
|
75
|
+
feedback_id=score.id,
|
|
76
|
+
run_id=run_id_to_return,
|
|
77
|
+
key=score.name,
|
|
78
|
+
score=score.value,
|
|
79
|
+
value=value,
|
|
80
|
+
comment=score.comment,
|
|
81
|
+
created_at=score.timestamp,
|
|
82
|
+
correction=correction,
|
|
83
|
+
feedback_group_id=feedback_group_id,
|
|
84
|
+
)
|
|
85
|
+
except Exception:
|
|
86
|
+
logger.exception("Failed to read feedback with id=%s", feedback_id)
|
|
87
|
+
return None
|
|
88
|
+
|
|
89
|
+
def create_feedback_batch(self, feedbacks: List[FeedbackIn]) -> List[FeedbackOut]:
|
|
90
|
+
"""
|
|
91
|
+
Creates a group of feedback items under a single parent observation in an existing trace.
|
|
92
|
+
|
|
93
|
+
Args:
|
|
94
|
+
feedbacks: A list of FeedbackIn objects. The `run_id`, `correction`, and
|
|
95
|
+
`feedback_group_id` from the first item are used for the group.
|
|
96
|
+
|
|
97
|
+
Returns:
|
|
98
|
+
A list of FeedbackOut objects for the created feedback items.
|
|
99
|
+
"""
|
|
100
|
+
if not feedbacks:
|
|
101
|
+
return []
|
|
102
|
+
|
|
103
|
+
first_item = feedbacks[0]
|
|
104
|
+
run_id = str(first_item.run_id)
|
|
105
|
+
# Convert the client's run_id to a deterministic Langfuse trace_id.
|
|
106
|
+
trace_id = Langfuse.create_trace_id(seed=run_id)
|
|
107
|
+
feedback_group_id = str(first_item.feedback_group_id or uuid4())
|
|
108
|
+
|
|
109
|
+
group_metadata = {
|
|
110
|
+
"feedback_group_id": feedback_group_id,
|
|
111
|
+
"original_run_id": run_id, # Store original run_id for retrieval
|
|
112
|
+
}
|
|
113
|
+
if first_item.correction:
|
|
114
|
+
group_metadata["correction"] = first_item.correction
|
|
115
|
+
|
|
116
|
+
try:
|
|
117
|
+
# Create the parent span within the existing trace to act as the group container.
|
|
118
|
+
feedback_group_span = self.client.start_span(
|
|
119
|
+
trace_context=TraceContext(trace_id=trace_id),
|
|
120
|
+
name="user-feedback-group",
|
|
121
|
+
metadata=group_metadata,
|
|
122
|
+
input=[f.dict(exclude_unset=True) for f in feedbacks]
|
|
123
|
+
)
|
|
124
|
+
parent_observation_id = feedback_group_span.id
|
|
125
|
+
|
|
126
|
+
created_feedbacks = []
|
|
127
|
+
for item in feedbacks:
|
|
128
|
+
score_value: Any = item.score if item.score is not None else item.value
|
|
129
|
+
|
|
130
|
+
if isinstance(score_value, bool):
|
|
131
|
+
score_value = 1.0 if score_value else 0.0
|
|
132
|
+
|
|
133
|
+
score_metadata = None
|
|
134
|
+
value_to_return = score_value
|
|
135
|
+
if isinstance(score_value, dict):
|
|
136
|
+
score_metadata = {"value_dict": score_value}
|
|
137
|
+
value_to_return = score_value
|
|
138
|
+
# Langfuse score `value` must be string or float. Set to neutral.
|
|
139
|
+
score_value = 0
|
|
140
|
+
|
|
141
|
+
if score_value is not None:
|
|
142
|
+
# Manually create a score ID to include it in the returned object.
|
|
143
|
+
score_id = self.client._create_observation_id()
|
|
144
|
+
self.client.create_score(
|
|
145
|
+
score_id=score_id,
|
|
146
|
+
name=item.key,
|
|
147
|
+
value=score_value,
|
|
148
|
+
trace_id=trace_id,
|
|
149
|
+
observation_id=parent_observation_id,
|
|
150
|
+
comment=item.comment,
|
|
151
|
+
metadata=score_metadata,
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
feedback_out = FeedbackOut(
|
|
155
|
+
feedback_id=score_id,
|
|
156
|
+
run_id=run_id,
|
|
157
|
+
key=item.key,
|
|
158
|
+
score=score_value,
|
|
159
|
+
value=value_to_return,
|
|
160
|
+
comment=item.comment,
|
|
161
|
+
created_at=datetime.datetime.now(datetime.timezone.utc),
|
|
162
|
+
correction=group_metadata.get("correction"),
|
|
163
|
+
feedback_group_id=feedback_group_id,
|
|
164
|
+
)
|
|
165
|
+
created_feedbacks.append(feedback_out)
|
|
166
|
+
|
|
167
|
+
feedback_group_span.end()
|
|
168
|
+
self.client.flush()
|
|
169
|
+
return created_feedbacks
|
|
170
|
+
except Exception:
|
|
171
|
+
logger.exception("Failed to create feedback batch for run_id=%s", run_id)
|
|
172
|
+
return []
|
|
173
|
+
|
|
174
|
+
def get_feedback_by_group_id(self, run_id: str, feedback_group_id: str) -> List[Dict[str, Any]]:
|
|
175
|
+
"""
|
|
176
|
+
Retrieves all scores for a given feedback group ID within a trace.
|
|
177
|
+
|
|
178
|
+
Args:
|
|
179
|
+
run_id: The application-specific run ID, which will be converted to a trace ID.
|
|
180
|
+
feedback_group_id: The ID of the feedback group.
|
|
181
|
+
|
|
182
|
+
Returns:
|
|
183
|
+
A list of score dictionaries.
|
|
184
|
+
"""
|
|
185
|
+
try:
|
|
186
|
+
trace_id = Langfuse.create_trace_id(seed=run_id)
|
|
187
|
+
full_trace = self.client.api.trace.get(trace_id=trace_id)
|
|
188
|
+
|
|
189
|
+
parent_obs = next(
|
|
190
|
+
(obs for obs in full_trace.observations
|
|
191
|
+
if obs.metadata and obs.metadata.get("feedback_group_id") == feedback_group_id),
|
|
192
|
+
None
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
if not parent_obs:
|
|
196
|
+
return []
|
|
197
|
+
|
|
198
|
+
return [
|
|
199
|
+
score.dict() for score in full_trace.scores
|
|
200
|
+
if score.observation_id == parent_obs.id
|
|
201
|
+
]
|
|
202
|
+
except Exception:
|
|
203
|
+
logger.exception(f"Error getting feedback for run_id {run_id} and group {feedback_group_id}")
|
|
204
|
+
return []
|
|
205
|
+
|
|
206
|
+
def delete_feedback(self, feedback_id: str) -> bool:
|
|
207
|
+
"""Deletes a single feedback item (a Score)."""
|
|
208
|
+
try:
|
|
209
|
+
self.client.api.score.delete(score_id=feedback_id)
|
|
210
|
+
# Flush immediately to minimize the non-atomic window.
|
|
211
|
+
self.client.flush()
|
|
212
|
+
return True
|
|
213
|
+
except Exception:
|
|
214
|
+
logger.exception("Failed to delete feedback with id=%s", feedback_id)
|
|
215
|
+
return False
|
|
216
|
+
|
|
217
|
+
def update_feedback(self, feedback_id: str, update_data: FeedbackUpdate) -> bool:
|
|
218
|
+
"""
|
|
219
|
+
Updates a feedback item (Score).
|
|
220
|
+
|
|
221
|
+
ATTENTION: Langfuse does not support native updates for scores. This method
|
|
222
|
+
simulates an update by DELETING the old score and CREATING a new one with the
|
|
223
|
+
same ID to preserve client-side consistency.
|
|
224
|
+
|
|
225
|
+
WARNING: This operation is not atomic. There is a small time window between
|
|
226
|
+
deletion and creation where a read for this feedback ID will fail.
|
|
227
|
+
|
|
228
|
+
Note: Updates to `correction` are ignored by this method, as `correction` is a
|
|
229
|
+
group-level attribute on the parent observation, which cannot be modified
|
|
230
|
+
after creation via the SDK.
|
|
231
|
+
"""
|
|
232
|
+
try:
|
|
233
|
+
scores_response = self.client.api.score_v_2.get(score_ids=[feedback_id])
|
|
234
|
+
if not scores_response or not scores_response.data:
|
|
235
|
+
logger.warning(f"Cannot update. No score found for feedback_id: {feedback_id}")
|
|
236
|
+
return False
|
|
237
|
+
score_to_update = scores_response.data[0]
|
|
238
|
+
|
|
239
|
+
if update_data.correction is not None:
|
|
240
|
+
logger.warning(
|
|
241
|
+
"Updating 'correction' on an individual feedback item is not supported "
|
|
242
|
+
"as it's a group-level attribute. This field will be ignored."
|
|
243
|
+
)
|
|
244
|
+
|
|
245
|
+
# Preserve the ID by deleting and then recreating the score.
|
|
246
|
+
self.delete_feedback(feedback_id)
|
|
247
|
+
|
|
248
|
+
new_val = update_data.score if update_data.score is not None else update_data.value
|
|
249
|
+
if new_val is None: new_val = score_to_update.value
|
|
250
|
+
if isinstance(new_val, bool): new_val = 1.0 if new_val else 0.0
|
|
251
|
+
|
|
252
|
+
self.client.create_score(
|
|
253
|
+
score_id=feedback_id, # Re-use the original score ID
|
|
254
|
+
name=score_to_update.name,
|
|
255
|
+
value=new_val,
|
|
256
|
+
trace_id=score_to_update.trace_id,
|
|
257
|
+
observation_id=score_to_update.observation_id,
|
|
258
|
+
comment=update_data.comment or score_to_update.comment,
|
|
259
|
+
metadata=score_to_update.metadata
|
|
260
|
+
)
|
|
261
|
+
# Flush immediately to minimize the non-atomic window.
|
|
262
|
+
self.client.flush()
|
|
263
|
+
|
|
264
|
+
return True
|
|
265
|
+
except Exception:
|
|
266
|
+
logger.exception("Failed to update feedback for id=%s", feedback_id)
|
|
267
|
+
return False
|