langwatch 0.10.0__py3-none-any.whl → 0.10.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langwatch/__version__.py +1 -1
- langwatch/client.py +16 -0
- langwatch/domain/__init__.py +3 -0
- langwatch/dspy/__init__.py +67 -34
- langwatch/experiment/experiment.py +122 -14
- langwatch/litellm.py +7 -0
- langwatch/openai.py +61 -34
- langwatch/prompts/local_loader.py +12 -0
- langwatch/prompts/prompt_facade.py +10 -3
- langwatch/types.py +5 -0
- langwatch/utils/initialization.py +12 -2
- langwatch/utils/utils.py +3 -1
- {langwatch-0.10.0.dist-info → langwatch-0.10.2.dist-info}/METADATA +1 -1
- {langwatch-0.10.0.dist-info → langwatch-0.10.2.dist-info}/RECORD +15 -15
- {langwatch-0.10.0.dist-info → langwatch-0.10.2.dist-info}/WHEEL +0 -0
langwatch/__version__.py
CHANGED
langwatch/client.py
CHANGED
|
@@ -50,6 +50,7 @@ class Client(LangWatchClientProtocol):
|
|
|
50
50
|
_registered_instrumentors: ClassVar[
|
|
51
51
|
dict[opentelemetry.trace.TracerProvider, set[BaseInstrumentor]]
|
|
52
52
|
] = {}
|
|
53
|
+
_prompts_path: ClassVar[Optional[str]] = None
|
|
53
54
|
|
|
54
55
|
# Regular attributes for protocol compatibility
|
|
55
56
|
base_attributes: BaseAttributes
|
|
@@ -69,6 +70,7 @@ class Client(LangWatchClientProtocol):
|
|
|
69
70
|
span_exclude_rules: Optional[List[SpanProcessingExcludeRule]] = None,
|
|
70
71
|
ignore_global_tracer_provider_override_warning: Optional[bool] = None,
|
|
71
72
|
skip_open_telemetry_setup: Optional[bool] = None,
|
|
73
|
+
prompts_path: Optional[str] = None,
|
|
72
74
|
) -> "Client":
|
|
73
75
|
"""Ensure only one instance of Client exists (singleton pattern)."""
|
|
74
76
|
if cls._instance is None:
|
|
@@ -88,6 +90,7 @@ class Client(LangWatchClientProtocol):
|
|
|
88
90
|
span_exclude_rules: Optional[List[SpanProcessingExcludeRule]] = None,
|
|
89
91
|
ignore_global_tracer_provider_override_warning: Optional[bool] = None,
|
|
90
92
|
skip_open_telemetry_setup: Optional[bool] = None,
|
|
93
|
+
prompts_path: Optional[str] = None,
|
|
91
94
|
):
|
|
92
95
|
"""
|
|
93
96
|
Initialize the LangWatch tracing client.
|
|
@@ -140,6 +143,8 @@ class Client(LangWatchClientProtocol):
|
|
|
140
143
|
)
|
|
141
144
|
if skip_open_telemetry_setup is not None:
|
|
142
145
|
Client._skip_open_telemetry_setup = skip_open_telemetry_setup
|
|
146
|
+
if prompts_path is not None:
|
|
147
|
+
Client._prompts_path = prompts_path
|
|
143
148
|
if base_attributes is not None:
|
|
144
149
|
Client._base_attributes = base_attributes
|
|
145
150
|
# Ensure required SDK attributes remain present after reconfiguration
|
|
@@ -215,6 +220,9 @@ class Client(LangWatchClientProtocol):
|
|
|
215
220
|
if skip_open_telemetry_setup is not None:
|
|
216
221
|
Client._skip_open_telemetry_setup = skip_open_telemetry_setup
|
|
217
222
|
|
|
223
|
+
if prompts_path is not None:
|
|
224
|
+
Client._prompts_path = prompts_path
|
|
225
|
+
|
|
218
226
|
if base_attributes is not None:
|
|
219
227
|
Client._base_attributes = base_attributes
|
|
220
228
|
elif not Client._base_attributes:
|
|
@@ -284,6 +292,7 @@ class Client(LangWatchClientProtocol):
|
|
|
284
292
|
span_exclude_rules: Optional[List[SpanProcessingExcludeRule]] = None,
|
|
285
293
|
ignore_global_tracer_provider_override_warning: Optional[bool] = None,
|
|
286
294
|
skip_open_telemetry_setup: Optional[bool] = None,
|
|
295
|
+
prompts_path: Optional[str] = None,
|
|
287
296
|
) -> "Client":
|
|
288
297
|
"""Create or get the singleton instance of the LangWatch client. Internal use only."""
|
|
289
298
|
if cls._instance is None:
|
|
@@ -299,6 +308,7 @@ class Client(LangWatchClientProtocol):
|
|
|
299
308
|
span_exclude_rules=span_exclude_rules,
|
|
300
309
|
ignore_global_tracer_provider_override_warning=ignore_global_tracer_provider_override_warning,
|
|
301
310
|
skip_open_telemetry_setup=skip_open_telemetry_setup,
|
|
311
|
+
prompts_path=prompts_path,
|
|
302
312
|
)
|
|
303
313
|
return cls._instance
|
|
304
314
|
|
|
@@ -327,6 +337,7 @@ class Client(LangWatchClientProtocol):
|
|
|
327
337
|
cls._skip_open_telemetry_setup = False
|
|
328
338
|
cls._tracer_provider = None
|
|
329
339
|
cls._rest_api_client = None
|
|
340
|
+
cls._prompts_path = None
|
|
330
341
|
cls._registered_instrumentors.clear()
|
|
331
342
|
|
|
332
343
|
@classmethod
|
|
@@ -416,6 +427,11 @@ class Client(LangWatchClientProtocol):
|
|
|
416
427
|
"""Get whether OpenTelemetry setup is skipped."""
|
|
417
428
|
return Client._skip_open_telemetry_setup
|
|
418
429
|
|
|
430
|
+
@property
|
|
431
|
+
def prompts_path(self) -> Optional[str]:
|
|
432
|
+
"""Get the base path for local prompt files."""
|
|
433
|
+
return Client._prompts_path
|
|
434
|
+
|
|
419
435
|
@disable_sending.setter
|
|
420
436
|
def disable_sending(self, value: bool) -> None:
|
|
421
437
|
"""Set whether sending is disabled. Spans are still created; the exporter conditionally drops them."""
|
langwatch/domain/__init__.py
CHANGED
|
@@ -43,6 +43,7 @@ class ChatMessage(TypedDict, total=False):
|
|
|
43
43
|
tool_calls: Optional[List[ToolCall]]
|
|
44
44
|
tool_call_id: Optional[str]
|
|
45
45
|
name: Optional[str]
|
|
46
|
+
reasoning_content: Optional[str]
|
|
46
47
|
|
|
47
48
|
|
|
48
49
|
class TypedValueChatMessages(TypedDict):
|
|
@@ -156,6 +157,7 @@ SpanTypes = Literal[
|
|
|
156
157
|
class SpanMetrics(TypedDict, total=False):
|
|
157
158
|
prompt_tokens: Optional[int]
|
|
158
159
|
completion_tokens: Optional[int]
|
|
160
|
+
reasoning_tokens: Optional[int]
|
|
159
161
|
cost: Optional[float]
|
|
160
162
|
first_token_ms: Optional[int]
|
|
161
163
|
|
|
@@ -179,6 +181,7 @@ class SpanParams(TypedDict, total=False):
|
|
|
179
181
|
functions: Optional[List[Dict[str, Any]]]
|
|
180
182
|
user: Optional[str]
|
|
181
183
|
response_format: Optional[Union[Dict[str, Any], BaseModel]]
|
|
184
|
+
reasoning_effort: Optional[str]
|
|
182
185
|
|
|
183
186
|
|
|
184
187
|
class BaseSpan(TypedDict):
|
langwatch/dspy/__init__.py
CHANGED
|
@@ -6,7 +6,8 @@ import warnings
|
|
|
6
6
|
import dspy
|
|
7
7
|
from typing import Callable, List, Optional, Any, Type, Union
|
|
8
8
|
from langwatch.utils.exceptions import better_raise_for_status
|
|
9
|
-
from langwatch.utils.transformation import truncate_object_recursively
|
|
9
|
+
from langwatch.utils.transformation import SerializableWithStringFallback, truncate_object_recursively
|
|
10
|
+
from langwatch.utils.utils import safe_get
|
|
10
11
|
from langwatch.telemetry.tracing import LangWatchTrace
|
|
11
12
|
from typing_extensions import TypedDict
|
|
12
13
|
import langwatch
|
|
@@ -824,6 +825,7 @@ class DSPyTracer:
|
|
|
824
825
|
"functions",
|
|
825
826
|
"user",
|
|
826
827
|
"response_format",
|
|
828
|
+
"reasoning_effort",
|
|
827
829
|
]
|
|
828
830
|
for param in params:
|
|
829
831
|
if all_kwargs.get(param):
|
|
@@ -842,23 +844,44 @@ class DSPyTracer:
|
|
|
842
844
|
|
|
843
845
|
result = self.__class__.__original_call__(self, prompt, messages, **kwargs) # type: ignore
|
|
844
846
|
|
|
847
|
+
history = self.history[-1] if len(self.history) > 0 else None
|
|
848
|
+
|
|
845
849
|
if span:
|
|
846
|
-
|
|
850
|
+
# Capture full message from history (includes reasoning_content) instead of just result
|
|
851
|
+
choices = safe_get(history, "response", "choices")
|
|
852
|
+
|
|
853
|
+
if choices and len(choices) > 0:
|
|
854
|
+
messages_output = []
|
|
855
|
+
for choice in choices:
|
|
856
|
+
msg = safe_get(choice, "message")
|
|
857
|
+
if msg is not None:
|
|
858
|
+
# Convert Pydantic model to dict if needed
|
|
859
|
+
if hasattr(msg, "model_dump"):
|
|
860
|
+
msg = msg.model_dump(exclude_unset=True)
|
|
861
|
+
elif hasattr(msg, "dict"):
|
|
862
|
+
msg = msg.dict(exclude_unset=True)
|
|
863
|
+
messages_output.append(msg)
|
|
864
|
+
if messages_output:
|
|
865
|
+
span.update(output=messages_output)
|
|
866
|
+
else:
|
|
867
|
+
span.update(output=result)
|
|
868
|
+
else:
|
|
869
|
+
span.update(output=result)
|
|
847
870
|
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
|
|
853
|
-
|
|
854
|
-
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
"completion_tokens": history["usage"]["completion_tokens"],
|
|
859
|
-
"prompt_tokens": history["usage"]["prompt_tokens"],
|
|
860
|
-
}
|
|
871
|
+
completion_tokens = safe_get(history, "usage", "completion_tokens")
|
|
872
|
+
prompt_tokens = safe_get(history, "usage", "prompt_tokens")
|
|
873
|
+
if span and completion_tokens is not None and prompt_tokens is not None:
|
|
874
|
+
metrics = {
|
|
875
|
+
"completion_tokens": completion_tokens,
|
|
876
|
+
"prompt_tokens": prompt_tokens,
|
|
877
|
+
}
|
|
878
|
+
# Capture reasoning_tokens if available
|
|
879
|
+
reasoning_tokens = safe_get(
|
|
880
|
+
history, "usage", "completion_tokens_details", "reasoning_tokens"
|
|
861
881
|
)
|
|
882
|
+
if reasoning_tokens is not None:
|
|
883
|
+
metrics["reasoning_tokens"] = reasoning_tokens
|
|
884
|
+
span.update(metrics=metrics)
|
|
862
885
|
|
|
863
886
|
return result
|
|
864
887
|
|
|
@@ -884,26 +907,36 @@ class DSPyTracer:
|
|
|
884
907
|
|
|
885
908
|
result = self.__class__.__original_basic_request__(self, prompt, **kwargs) # type: ignore
|
|
886
909
|
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
910
|
+
# Capture full messages from choices (includes reasoning_content)
|
|
911
|
+
choices = safe_get(result, "choices")
|
|
912
|
+
if span and choices and len(choices) > 0:
|
|
913
|
+
messages_output = []
|
|
914
|
+
for choice in choices:
|
|
915
|
+
msg = safe_get(choice, "message")
|
|
916
|
+
if msg is not None:
|
|
917
|
+
# Convert Pydantic model to dict if needed
|
|
918
|
+
if hasattr(msg, "model_dump"):
|
|
919
|
+
msg = msg.model_dump(exclude_unset=True)
|
|
920
|
+
elif hasattr(msg, "dict"):
|
|
921
|
+
msg = msg.dict(exclude_unset=True)
|
|
922
|
+
messages_output.append(msg)
|
|
923
|
+
if messages_output:
|
|
924
|
+
span.update(output=messages_output)
|
|
925
|
+
|
|
926
|
+
completion_tokens = safe_get(result, "usage", "completion_tokens")
|
|
927
|
+
prompt_tokens = safe_get(result, "usage", "prompt_tokens")
|
|
928
|
+
if span and completion_tokens is not None and prompt_tokens is not None:
|
|
929
|
+
metrics = {
|
|
930
|
+
"completion_tokens": completion_tokens,
|
|
931
|
+
"prompt_tokens": prompt_tokens,
|
|
932
|
+
}
|
|
933
|
+
# Capture reasoning_tokens if available
|
|
934
|
+
reasoning_tokens = safe_get(
|
|
935
|
+
result, "usage", "completion_tokens_details", "reasoning_tokens"
|
|
906
936
|
)
|
|
937
|
+
if reasoning_tokens is not None:
|
|
938
|
+
metrics["reasoning_tokens"] = reasoning_tokens
|
|
939
|
+
span.update(metrics=metrics)
|
|
907
940
|
|
|
908
941
|
return result
|
|
909
942
|
|
|
@@ -302,6 +302,10 @@ class Experiment:
|
|
|
302
302
|
iter_ctx = IterationContext(index=index, item=item)
|
|
303
303
|
iter_token = _iteration_context.set(iter_ctx)
|
|
304
304
|
|
|
305
|
+
# Reset target context at the start of each iteration to prevent pollution
|
|
306
|
+
# from previous iterations (especially important for implicit Output targets)
|
|
307
|
+
_target_context.set(None)
|
|
308
|
+
|
|
305
309
|
# Determine if we should create an iteration trace:
|
|
306
310
|
# - Don't create if evaluation uses targets (each target creates its own trace)
|
|
307
311
|
# - Don't create if we're collecting submit() calls (not in_thread yet)
|
|
@@ -340,6 +344,8 @@ class Experiment:
|
|
|
340
344
|
finally:
|
|
341
345
|
# Reset iteration context
|
|
342
346
|
_iteration_context.reset(iter_token)
|
|
347
|
+
# Reset target context to prevent pollution to next iteration
|
|
348
|
+
_target_context.set(None)
|
|
343
349
|
|
|
344
350
|
# Handle iteration trace cleanup
|
|
345
351
|
# Note: If target() was used, it may have already closed the trace
|
|
@@ -715,9 +721,10 @@ class Experiment:
|
|
|
715
721
|
"""
|
|
716
722
|
Log the model's response/output for the current target.
|
|
717
723
|
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
724
|
+
Can be called inside a `target()` context, or outside of one. When called
|
|
725
|
+
outside a target context, an implicit "Output" target is created automatically.
|
|
726
|
+
The response will be stored in the dataset entry's `predicted` field, which
|
|
727
|
+
is displayed in the results table.
|
|
721
728
|
|
|
722
729
|
Args:
|
|
723
730
|
response: The model's output. Can be a string (will be wrapped as
|
|
@@ -725,30 +732,131 @@ class Experiment:
|
|
|
725
732
|
|
|
726
733
|
Example:
|
|
727
734
|
```python
|
|
735
|
+
# With explicit target
|
|
728
736
|
with evaluation.target("gpt-4", {"model": "openai/gpt-4"}):
|
|
729
737
|
response = call_gpt4(row["question"])
|
|
730
738
|
evaluation.log_response(response) # Store the output
|
|
731
739
|
evaluation.log("quality", index=index, score=0.95) # Log metrics
|
|
732
|
-
```
|
|
733
740
|
|
|
734
|
-
|
|
735
|
-
|
|
741
|
+
# Without explicit target (creates implicit "Output" target)
|
|
742
|
+
for index, row in evaluation.loop(df.iterrows()):
|
|
743
|
+
response = my_model(row["question"])
|
|
744
|
+
evaluation.log_response(response) # Creates "Output" target
|
|
745
|
+
evaluation.log("quality", index=index, score=0.95)
|
|
746
|
+
```
|
|
736
747
|
"""
|
|
737
748
|
ctx = _target_context.get()
|
|
738
|
-
if ctx is None:
|
|
739
|
-
raise RuntimeError(
|
|
740
|
-
"log_response() must be called inside a target() context. "
|
|
741
|
-
"Example: with evaluation.target('my-target'): evaluation.log_response(response)"
|
|
742
|
-
)
|
|
743
749
|
|
|
744
750
|
# Normalize response to dict format
|
|
745
751
|
if isinstance(response, str):
|
|
746
|
-
|
|
752
|
+
predicted = {"output": response}
|
|
747
753
|
elif isinstance(response, dict):
|
|
748
|
-
|
|
754
|
+
predicted = response
|
|
749
755
|
else:
|
|
750
756
|
# Try to convert to string for other types
|
|
751
|
-
|
|
757
|
+
predicted = {"output": str(response)}
|
|
758
|
+
|
|
759
|
+
if ctx is None:
|
|
760
|
+
# Create implicit "Output" target and dataset entry immediately
|
|
761
|
+
self._create_implicit_output_target(predicted)
|
|
762
|
+
else:
|
|
763
|
+
# Inside explicit target context - just set predicted
|
|
764
|
+
ctx.predicted = predicted
|
|
765
|
+
|
|
766
|
+
def _create_implicit_output_target(self, predicted: Dict[str, Any]) -> None:
|
|
767
|
+
"""
|
|
768
|
+
Create an implicit "Output" target when log_response() is called outside
|
|
769
|
+
a target() context. This enables a simpler API for single-target evaluations.
|
|
770
|
+
|
|
771
|
+
Creates the dataset entry immediately with the predicted response.
|
|
772
|
+
"""
|
|
773
|
+
target_name = "Output"
|
|
774
|
+
|
|
775
|
+
# Mark that targets are being used
|
|
776
|
+
if not self._evaluation_uses_targets:
|
|
777
|
+
self._evaluation_uses_targets = True
|
|
778
|
+
# Close the active iteration trace if any
|
|
779
|
+
if self._active_iteration_trace is not None:
|
|
780
|
+
self._active_iteration_trace.__exit__(None, None, None)
|
|
781
|
+
self._active_iteration_trace = None
|
|
782
|
+
|
|
783
|
+
self._current_iteration_used_with_target = True
|
|
784
|
+
|
|
785
|
+
# Register the target
|
|
786
|
+
self._register_target(target_name, None)
|
|
787
|
+
|
|
788
|
+
# Get index and item from iteration context
|
|
789
|
+
iter_ctx = _iteration_context.get()
|
|
790
|
+
if iter_ctx is not None:
|
|
791
|
+
index = iter_ctx.index
|
|
792
|
+
current_item = iter_ctx.item
|
|
793
|
+
else:
|
|
794
|
+
index = self._current_index
|
|
795
|
+
current_item = self._current_item
|
|
796
|
+
|
|
797
|
+
# Create a trace for this implicit target
|
|
798
|
+
tracer = trace.get_tracer("langwatch-evaluation")
|
|
799
|
+
root_context = otel_context.Context()
|
|
800
|
+
|
|
801
|
+
# Start span and get trace_id
|
|
802
|
+
with tracer.start_span(
|
|
803
|
+
f"evaluation.target.{target_name}",
|
|
804
|
+
context=root_context,
|
|
805
|
+
attributes={
|
|
806
|
+
"evaluation.run_id": self.run_id,
|
|
807
|
+
"evaluation.index": index,
|
|
808
|
+
"evaluation.target": target_name,
|
|
809
|
+
},
|
|
810
|
+
) as span:
|
|
811
|
+
span_context = span.get_span_context()
|
|
812
|
+
trace_id = format(span_context.trace_id, "032x")
|
|
813
|
+
|
|
814
|
+
# Create and set target context (for subsequent log() calls)
|
|
815
|
+
ctx = TargetContext(
|
|
816
|
+
target_id=target_name,
|
|
817
|
+
index=index,
|
|
818
|
+
trace_id=trace_id,
|
|
819
|
+
predicted=predicted,
|
|
820
|
+
)
|
|
821
|
+
_target_context.set(ctx)
|
|
822
|
+
|
|
823
|
+
# Create dataset entry immediately
|
|
824
|
+
entry_data: Any = (
|
|
825
|
+
current_item.to_dict()
|
|
826
|
+
if hasattr(current_item, "to_dict")
|
|
827
|
+
else (
|
|
828
|
+
current_item.__dict__
|
|
829
|
+
if hasattr(current_item, "__dict__")
|
|
830
|
+
else (
|
|
831
|
+
current_item[1].to_dict()
|
|
832
|
+
if type(current_item) == tuple
|
|
833
|
+
and hasattr(current_item[1], "to_dict")
|
|
834
|
+
else (
|
|
835
|
+
current_item[1].__dict__
|
|
836
|
+
if type(current_item) == tuple
|
|
837
|
+
and hasattr(current_item[1], "__dict__")
|
|
838
|
+
else {
|
|
839
|
+
"entry": json.dumps(
|
|
840
|
+
current_item, cls=SerializableWithStringFallback
|
|
841
|
+
)
|
|
842
|
+
}
|
|
843
|
+
)
|
|
844
|
+
)
|
|
845
|
+
)
|
|
846
|
+
)
|
|
847
|
+
|
|
848
|
+
batch_entry = BatchEntry(
|
|
849
|
+
index=index,
|
|
850
|
+
entry=entry_data,
|
|
851
|
+
duration=0, # Duration not tracked for implicit targets
|
|
852
|
+
error=None,
|
|
853
|
+
trace_id=trace_id,
|
|
854
|
+
target_id=target_name,
|
|
855
|
+
predicted=predicted,
|
|
856
|
+
)
|
|
857
|
+
|
|
858
|
+
with self.lock:
|
|
859
|
+
self.batch["dataset"].append(batch_entry)
|
|
752
860
|
|
|
753
861
|
def log(
|
|
754
862
|
self,
|
langwatch/litellm.py
CHANGED
|
@@ -246,6 +246,9 @@ class LiteLLMPatch:
|
|
|
246
246
|
SpanMetrics(
|
|
247
247
|
prompt_tokens=safe_get(usage, "prompt_tokens"),
|
|
248
248
|
completion_tokens=safe_get(usage, "completion_tokens"),
|
|
249
|
+
reasoning_tokens=safe_get(
|
|
250
|
+
usage, "completion_tokens_details", "reasoning_tokens"
|
|
251
|
+
),
|
|
249
252
|
)
|
|
250
253
|
if usage
|
|
251
254
|
else SpanMetrics()
|
|
@@ -281,6 +284,9 @@ class LiteLLMPatch:
|
|
|
281
284
|
metrics=SpanMetrics(
|
|
282
285
|
prompt_tokens=safe_get(response, "usage", "prompt_tokens"),
|
|
283
286
|
completion_tokens=safe_get(response, "usage", "completion_tokens"),
|
|
287
|
+
reasoning_tokens=safe_get(
|
|
288
|
+
response, "usage", "completion_tokens_details", "reasoning_tokens"
|
|
289
|
+
),
|
|
284
290
|
),
|
|
285
291
|
timestamps=timestamps,
|
|
286
292
|
**kwargs,
|
|
@@ -338,6 +344,7 @@ class LiteLLMPatch:
|
|
|
338
344
|
"functions",
|
|
339
345
|
"user",
|
|
340
346
|
"response_format",
|
|
347
|
+
"reasoning_effort",
|
|
341
348
|
]
|
|
342
349
|
for param in params:
|
|
343
350
|
if kwargs.get(param):
|
langwatch/openai.py
CHANGED
|
@@ -296,6 +296,9 @@ class OpenAICompletionTracer:
|
|
|
296
296
|
metrics=SpanMetrics(
|
|
297
297
|
prompt_tokens=safe_get(response, "usage", "prompt_tokens"),
|
|
298
298
|
completion_tokens=safe_get(response, "usage", "completion_tokens"),
|
|
299
|
+
reasoning_tokens=safe_get(
|
|
300
|
+
response, "usage", "completion_tokens_details", "reasoning_tokens"
|
|
301
|
+
),
|
|
299
302
|
),
|
|
300
303
|
timestamps=timestamps,
|
|
301
304
|
**kwargs,
|
|
@@ -336,22 +339,31 @@ class OpenAICompletionTracer:
|
|
|
336
339
|
if len(outputs) == 0
|
|
337
340
|
else outputs[0] if len(outputs) == 1 else {"type": "list", "value": outputs}
|
|
338
341
|
)
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
342
|
+
span_params = SpanParams()
|
|
343
|
+
param_names = [
|
|
344
|
+
"frequency_penalty",
|
|
345
|
+
"logit_bias",
|
|
346
|
+
"logprobs",
|
|
347
|
+
"top_logprobs",
|
|
348
|
+
"max_tokens",
|
|
349
|
+
"n",
|
|
350
|
+
"presence_penalty",
|
|
351
|
+
"seed",
|
|
352
|
+
"stop",
|
|
353
|
+
"stream",
|
|
354
|
+
"temperature",
|
|
355
|
+
"top_p",
|
|
356
|
+
"tools",
|
|
357
|
+
"tool_choice",
|
|
358
|
+
"parallel_tool_calls",
|
|
359
|
+
"functions",
|
|
360
|
+
"user",
|
|
361
|
+
"response_format",
|
|
362
|
+
"reasoning_effort",
|
|
363
|
+
]
|
|
364
|
+
for param in param_names:
|
|
365
|
+
if kwargs.get(param) is not None:
|
|
366
|
+
span_params[param] = kwargs.get(param)
|
|
355
367
|
|
|
356
368
|
vendor = (
|
|
357
369
|
"azure"
|
|
@@ -367,7 +379,7 @@ class OpenAICompletionTracer:
|
|
|
367
379
|
),
|
|
368
380
|
output=output,
|
|
369
381
|
error=error,
|
|
370
|
-
params=
|
|
382
|
+
params=span_params,
|
|
371
383
|
metrics=metrics,
|
|
372
384
|
timestamps=timestamps,
|
|
373
385
|
)
|
|
@@ -611,6 +623,9 @@ class OpenAIChatCompletionTracer:
|
|
|
611
623
|
SpanMetrics(
|
|
612
624
|
prompt_tokens=usage.prompt_tokens if usage else None,
|
|
613
625
|
completion_tokens=usage.completion_tokens if usage else None,
|
|
626
|
+
reasoning_tokens=safe_get(
|
|
627
|
+
usage, "completion_tokens_details", "reasoning_tokens"
|
|
628
|
+
),
|
|
614
629
|
)
|
|
615
630
|
if usage
|
|
616
631
|
else SpanMetrics()
|
|
@@ -643,6 +658,9 @@ class OpenAIChatCompletionTracer:
|
|
|
643
658
|
metrics=SpanMetrics(
|
|
644
659
|
prompt_tokens=safe_get(response, "usage", "prompt_tokens"),
|
|
645
660
|
completion_tokens=safe_get(response, "usage", "completion_tokens"),
|
|
661
|
+
reasoning_tokens=safe_get(
|
|
662
|
+
response, "usage", "completion_tokens_details", "reasoning_tokens"
|
|
663
|
+
),
|
|
646
664
|
),
|
|
647
665
|
timestamps=timestamps,
|
|
648
666
|
**kwargs,
|
|
@@ -683,22 +701,31 @@ class OpenAIChatCompletionTracer:
|
|
|
683
701
|
if len(outputs) == 0
|
|
684
702
|
else outputs[0] if len(outputs) == 1 else {"type": "list", "value": outputs}
|
|
685
703
|
)
|
|
686
|
-
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
|
|
690
|
-
|
|
691
|
-
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
704
|
+
span_params = SpanParams()
|
|
705
|
+
param_names = [
|
|
706
|
+
"frequency_penalty",
|
|
707
|
+
"logit_bias",
|
|
708
|
+
"logprobs",
|
|
709
|
+
"top_logprobs",
|
|
710
|
+
"max_tokens",
|
|
711
|
+
"n",
|
|
712
|
+
"presence_penalty",
|
|
713
|
+
"seed",
|
|
714
|
+
"stop",
|
|
715
|
+
"stream",
|
|
716
|
+
"temperature",
|
|
717
|
+
"top_p",
|
|
718
|
+
"tools",
|
|
719
|
+
"tool_choice",
|
|
720
|
+
"parallel_tool_calls",
|
|
721
|
+
"functions",
|
|
722
|
+
"user",
|
|
723
|
+
"response_format",
|
|
724
|
+
"reasoning_effort",
|
|
725
|
+
]
|
|
726
|
+
for param in param_names:
|
|
727
|
+
if kwargs.get(param) is not None:
|
|
728
|
+
span_params[param] = kwargs.get(param)
|
|
702
729
|
|
|
703
730
|
vendor = (
|
|
704
731
|
"azure"
|
|
@@ -714,7 +741,7 @@ class OpenAIChatCompletionTracer:
|
|
|
714
741
|
),
|
|
715
742
|
output=output,
|
|
716
743
|
error=error,
|
|
717
|
-
params=
|
|
744
|
+
params=span_params,
|
|
718
745
|
metrics=metrics,
|
|
719
746
|
timestamps=timestamps,
|
|
720
747
|
)
|
|
@@ -24,6 +24,8 @@ logger = logging.getLogger(__name__)
|
|
|
24
24
|
class LocalPromptLoader:
|
|
25
25
|
"""Loads prompts from local files in CLI format."""
|
|
26
26
|
|
|
27
|
+
_warned_no_prompts_path: bool = False
|
|
28
|
+
|
|
27
29
|
def __init__(self, base_path: Optional[Path] = None):
|
|
28
30
|
"""Initialize with base path (defaults to current working directory at load time)."""
|
|
29
31
|
self._base_path = base_path
|
|
@@ -43,6 +45,16 @@ class LocalPromptLoader:
|
|
|
43
45
|
# Check if prompts.json exists
|
|
44
46
|
prompts_json_path = self.base_path / "prompts.json"
|
|
45
47
|
if not prompts_json_path.exists():
|
|
48
|
+
# Warn once if no prompts_path was configured and prompts.json doesn't exist
|
|
49
|
+
if self._base_path is None and not LocalPromptLoader._warned_no_prompts_path:
|
|
50
|
+
LocalPromptLoader._warned_no_prompts_path = True
|
|
51
|
+
warnings.warn(
|
|
52
|
+
f"No prompts.json found at {prompts_json_path}. "
|
|
53
|
+
f"If you have local prompt files, configure the path with "
|
|
54
|
+
f"langwatch.setup(prompts_path='/path/to/prompts') or ensure "
|
|
55
|
+
f"prompts.json is in the current working directory.",
|
|
56
|
+
UserWarning,
|
|
57
|
+
)
|
|
46
58
|
logger.debug(
|
|
47
59
|
f"No prompts.json found at {prompts_json_path}, falling back to API"
|
|
48
60
|
)
|
|
@@ -8,6 +8,7 @@ or when API is unavailable.
|
|
|
8
8
|
|
|
9
9
|
Follows the facade pattern to coordinate between LocalPromptLoader and PromptApiService.
|
|
10
10
|
"""
|
|
11
|
+
from pathlib import Path
|
|
11
12
|
from typing import Any, Dict, List, Literal, Optional
|
|
12
13
|
import time
|
|
13
14
|
from langwatch.generated.langwatch_rest_api_client.client import (
|
|
@@ -35,10 +36,16 @@ class PromptsFacade:
|
|
|
35
36
|
work even when offline or when API is unavailable.
|
|
36
37
|
"""
|
|
37
38
|
|
|
38
|
-
def __init__(
|
|
39
|
+
def __init__(
|
|
40
|
+
self,
|
|
41
|
+
rest_api_client: LangWatchRestApiClient,
|
|
42
|
+
prompts_path: Optional[str] = None,
|
|
43
|
+
):
|
|
39
44
|
"""Initialize the prompt service facade with dependencies."""
|
|
40
45
|
self._api_service = PromptApiService(rest_api_client)
|
|
41
|
-
self._local_loader = LocalPromptLoader(
|
|
46
|
+
self._local_loader = LocalPromptLoader(
|
|
47
|
+
base_path=Path(prompts_path) if prompts_path else None
|
|
48
|
+
)
|
|
42
49
|
self._cache: Dict[str, Dict[str, Any]] = {}
|
|
43
50
|
|
|
44
51
|
@classmethod
|
|
@@ -50,7 +57,7 @@ class PromptsFacade:
|
|
|
50
57
|
raise RuntimeError(
|
|
51
58
|
"LangWatch client has not been initialized. Call setup() first."
|
|
52
59
|
)
|
|
53
|
-
return cls(instance.rest_api_client)
|
|
60
|
+
return cls(instance.rest_api_client, prompts_path=instance.prompts_path)
|
|
54
61
|
|
|
55
62
|
def get(
|
|
56
63
|
self,
|
langwatch/types.py
CHANGED
|
@@ -89,6 +89,11 @@ class LangWatchClientProtocol(Protocol):
|
|
|
89
89
|
"""Get whether OpenTelemetry setup is skipped."""
|
|
90
90
|
...
|
|
91
91
|
|
|
92
|
+
@property
|
|
93
|
+
def prompts_path(self) -> Optional[str]:
|
|
94
|
+
"""Get the base path for local prompt files."""
|
|
95
|
+
...
|
|
96
|
+
|
|
92
97
|
# Regular attributes (not properties)
|
|
93
98
|
base_attributes: BaseAttributes
|
|
94
99
|
tracer_provider: Optional[TracerProvider]
|
|
@@ -3,14 +3,16 @@
|
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
5
5
|
import sys
|
|
6
|
+
from pathlib import Path
|
|
6
7
|
from typing import List, Optional, Sequence
|
|
7
|
-
|
|
8
|
+
|
|
8
9
|
from opentelemetry import trace
|
|
9
10
|
from opentelemetry.instrumentation.instrumentor import BaseInstrumentor
|
|
11
|
+
from opentelemetry.sdk.trace import TracerProvider
|
|
10
12
|
|
|
11
|
-
from langwatch.state import get_instance, set_instance
|
|
12
13
|
from langwatch.client import Client
|
|
13
14
|
from langwatch.domain import BaseAttributes, SpanProcessingExcludeRule
|
|
15
|
+
from langwatch.state import get_instance, set_instance
|
|
14
16
|
|
|
15
17
|
logger: logging.Logger = logging.getLogger(__name__)
|
|
16
18
|
|
|
@@ -41,6 +43,7 @@ def setup(
|
|
|
41
43
|
span_exclude_rules: Optional[List[SpanProcessingExcludeRule]] = None,
|
|
42
44
|
debug: Optional[bool] = None,
|
|
43
45
|
skip_open_telemetry_setup: Optional[bool] = None,
|
|
46
|
+
prompts_path: Optional[str] = None,
|
|
44
47
|
) -> Client:
|
|
45
48
|
"""
|
|
46
49
|
Initialize the LangWatch client.
|
|
@@ -54,6 +57,7 @@ def setup(
|
|
|
54
57
|
span_exclude_rules: Optional. A list of rules that will be applied to spans processed by the exporter.
|
|
55
58
|
debug: Whether to enable debug logging for the LangWatch client.
|
|
56
59
|
skip_open_telemetry_setup: Whether to skip setting up the OpenTelemetry tracer provider. If this is skipped, instrumentors will be added to the global tracer provider.
|
|
60
|
+
prompts_path: The base path for local prompt files. If not set, defaults to the current working directory.
|
|
57
61
|
Returns:
|
|
58
62
|
The LangWatch client.
|
|
59
63
|
"""
|
|
@@ -62,6 +66,11 @@ def setup(
|
|
|
62
66
|
if debug:
|
|
63
67
|
logger.info("Setting up LangWatch client...")
|
|
64
68
|
|
|
69
|
+
if prompts_path is not None:
|
|
70
|
+
prompts_path = str(
|
|
71
|
+
Path(prompts_path).resolve()
|
|
72
|
+
) # Convert to absolute path asap
|
|
73
|
+
|
|
65
74
|
# Get existing client to check if we're changing the API key
|
|
66
75
|
existing_client = get_instance()
|
|
67
76
|
changed_api_key = False
|
|
@@ -87,6 +96,7 @@ def setup(
|
|
|
87
96
|
span_exclude_rules=span_exclude_rules,
|
|
88
97
|
ignore_global_tracer_provider_override_warning=changed_api_key,
|
|
89
98
|
skip_open_telemetry_setup=skip_open_telemetry_setup,
|
|
99
|
+
prompts_path=prompts_path,
|
|
90
100
|
)
|
|
91
101
|
|
|
92
102
|
if debug:
|
langwatch/utils/utils.py
CHANGED
|
@@ -5,9 +5,11 @@ from pydantic import BaseModel
|
|
|
5
5
|
|
|
6
6
|
def safe_get(d: Union[Dict[str, Any], BaseModel], *keys: str) -> Optional[Any]:
|
|
7
7
|
for key in keys:
|
|
8
|
+
if d is None:
|
|
9
|
+
return None
|
|
8
10
|
if isinstance(d, dict):
|
|
9
11
|
d = d.get(key, None)
|
|
10
|
-
|
|
12
|
+
elif hasattr(d, key):
|
|
11
13
|
d = getattr(d, key)
|
|
12
14
|
else:
|
|
13
15
|
return None
|
|
@@ -1,24 +1,24 @@
|
|
|
1
1
|
langwatch/__init__.py,sha256=VGkLDw3h8hOtzyaTMObWupGTQosn4E17Dk5zcfDmy7g,4462
|
|
2
|
-
langwatch/__version__.py,sha256=
|
|
2
|
+
langwatch/__version__.py,sha256=adHQtrIBxR2urwtvyvknbyN66CZvRupt68CR0Ym8N58,92
|
|
3
3
|
langwatch/attributes.py,sha256=nXdI_G85wQQCAdAcwjCiLYdEYj3wATmfgCmhlf6dVIk,3910
|
|
4
4
|
langwatch/batch_evaluation.py,sha256=Y_S3teXpHV07U-vvJYyV1PB6d0CgyFM_rTzPp6GnEBo,16165
|
|
5
|
-
langwatch/client.py,sha256=
|
|
5
|
+
langwatch/client.py,sha256=xwqvTnbAZ-Qr8OnI8-D8cV3J7YPsJ6l0trHbd2PSi6Q,26148
|
|
6
6
|
langwatch/evaluations.py,sha256=-rvlpw8J3-0lMn9tdnte1Z3qHpuE60DGB3gmI8VMexQ,8983
|
|
7
7
|
langwatch/guardrails.py,sha256=4d320HyklXPUVszF34aWsDKGzuvPggcDM_f45_eJTnc,1352
|
|
8
8
|
langwatch/langchain.py,sha256=HjbBBIDwwt688g0W4K0EsZGuGBbo1Mv5LQ-7Mkn56iQ,18726
|
|
9
|
-
langwatch/litellm.py,sha256=
|
|
9
|
+
langwatch/litellm.py,sha256=v2fXvsdvqRdSLVwTKNBbiJJy9YJvV9M9OSnugIMvNtg,12328
|
|
10
10
|
langwatch/login.py,sha256=o0DxYVMhMCRHeopyF0qlj_U4o6yD1rL8QjfKvKnHv0s,965
|
|
11
|
-
langwatch/openai.py,sha256=
|
|
11
|
+
langwatch/openai.py,sha256=UnfZtskUSvpnZUA-2CHpP3iRjrG3DmDQ1Xftuos6JMk,25724
|
|
12
12
|
langwatch/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
13
|
langwatch/state.py,sha256=qXvPAjO90jdokCU6tPSwjHIac4QU_5N0pSd9dfmc9kY,1204
|
|
14
14
|
langwatch/tracer.py,sha256=t5FOdP1es9H_pPGqGUBLXCyEln0tTi4m4M9b6WxCrPU,975
|
|
15
|
-
langwatch/types.py,sha256=
|
|
15
|
+
langwatch/types.py,sha256=HtnfmGc3p6k7rLitnJsf3MOXZfvjyZ8soB74kPyoCGY,3242
|
|
16
16
|
langwatch/dataset/__init__.py,sha256=hZBcbjXuBO2qE5osJtd9wIE9f45F6-jpNTrne5nk4eE,2606
|
|
17
|
-
langwatch/domain/__init__.py,sha256=
|
|
18
|
-
langwatch/dspy/__init__.py,sha256=
|
|
17
|
+
langwatch/domain/__init__.py,sha256=BVpWugFqiohIA2MiTZy3x1BQCbIgN3okIXzmT6BHUkQ,6183
|
|
18
|
+
langwatch/dspy/__init__.py,sha256=ahOMnNefVD9xsf7Z0P6iE0SbKylANixrNZkbKoK2FTs,35208
|
|
19
19
|
langwatch/evaluation/__init__.py,sha256=8SOSZZbSzXa1jL-9Zlyt0f9u5sOA_TrO1J61ueASBLI,16980
|
|
20
20
|
langwatch/experiment/__init__.py,sha256=nv2OfoNMMZwUA9KfozW2ZNaR1-J1LCmU4NykjGfe9is,3001
|
|
21
|
-
langwatch/experiment/experiment.py,sha256=
|
|
21
|
+
langwatch/experiment/experiment.py,sha256=BoTia3NPi_OcMSVGWN4xdNHrj5DC46_ZzcPhNhzNECc,37144
|
|
22
22
|
langwatch/experiment/platform_run.py,sha256=qiy_bwp786TbkH4HIlZVlJPmCtQlStAq9vUdG4-3VdU,13850
|
|
23
23
|
langwatch/exporters/filterable_batch_span_exporter.py,sha256=MlhZjui56XD6p2sa8kEGyr-Hb3wqudknngmemnB4Twg,2142
|
|
24
24
|
langwatch/generated/langwatch_rest_api_client/__init__.py,sha256=8r-9pAj7fK7vnVX3mT0y_zS4B9ZRqD6RZiBo5fPra60,156
|
|
@@ -394,10 +394,10 @@ langwatch/generated/langwatch_rest_api_client/models/timestamps.py,sha256=-nRKUP
|
|
|
394
394
|
langwatch/generated/langwatch_rest_api_client/models/trace.py,sha256=K9Lc_EQOrJ2dqMXx9EpiUXReT1_uYF7WRfYyhlfbi3I,7537
|
|
395
395
|
langwatch/prompts/__init__.py,sha256=OGf3BrzVsTZnSPYSqSqe_eWmGIBRiOUlovOduYzKCx4,1259
|
|
396
396
|
langwatch/prompts/errors.py,sha256=kmaGeA1QPot9Ii5lgooxmAFlvUPOGjAnzzPBuw4h6Bw,5124
|
|
397
|
-
langwatch/prompts/local_loader.py,sha256=
|
|
397
|
+
langwatch/prompts/local_loader.py,sha256=DifYVaAIdEdsApChPIEx0zsYinNixJoPgi8s0zy1MNY,7490
|
|
398
398
|
langwatch/prompts/prompt.py,sha256=SgLDo9hO-CuRE-AZ8zx9v7-KqjiabiW8GzD9jdx1IoA,6914
|
|
399
399
|
langwatch/prompts/prompt_api_service.py,sha256=tHhwIRjUBSM43_jwDAoGCHJjvvqVeSCrUPwcwMvUHho,9823
|
|
400
|
-
langwatch/prompts/prompt_facade.py,sha256=
|
|
400
|
+
langwatch/prompts/prompt_facade.py,sha256=yUq9-5JHcHrgiz9EgijzNXp_KkK6ZUouoGtEtF4_xdE,9154
|
|
401
401
|
langwatch/prompts/decorators/prompt_service_tracing.py,sha256=uSYw0vExo7AuxbcCRnxbYl6UOfOQSC0IsisSqYy153Y,2395
|
|
402
402
|
langwatch/prompts/decorators/prompt_tracing.py,sha256=x_PQvJlGbGF1h2HtGNiqaZ8K1qNd1jRf5pTOBTQx-7M,3963
|
|
403
403
|
langwatch/prompts/types/__init__.py,sha256=jwaFV4VJHrOE6dm6yyLtWk6_7dqZpR5uZjN1cswtga4,579
|
|
@@ -413,10 +413,10 @@ langwatch/telemetry/__tests__/test_tracing.py,sha256=Px2vcpbRWBgwwaXzw3MgRfkcL-I
|
|
|
413
413
|
langwatch/utils/__init__.py,sha256=3rqQTgzEtmICJW_KSPuLa5q8p5udxt5SRi28Z2vZB10,138
|
|
414
414
|
langwatch/utils/capture.py,sha256=uVKPqHCm-o8CpabsUfhqbNFr5sgUHzcKnBadvL2oIwI,1172
|
|
415
415
|
langwatch/utils/exceptions.py,sha256=b-dorrnQ9XBJcijLLNJP9LRQzdOZGEiyQ3f8GcA1kgk,1046
|
|
416
|
-
langwatch/utils/initialization.py,sha256=
|
|
416
|
+
langwatch/utils/initialization.py,sha256=2egw2aXGYdbgLsyOfkQ3Oz0JFbfnQnpiFg_Q-gcoEgo,4915
|
|
417
417
|
langwatch/utils/module.py,sha256=KLBNOK3mA9gCSifCcQX_lOtU48BJQDWvFKtF6NMvwVA,688
|
|
418
418
|
langwatch/utils/transformation.py,sha256=76MGXyrYTxM0Yri36NJqLK-XxL4BBYdmKWAXXlw3D4Q,7690
|
|
419
|
-
langwatch/utils/utils.py,sha256=
|
|
420
|
-
langwatch-0.10.
|
|
421
|
-
langwatch-0.10.
|
|
422
|
-
langwatch-0.10.
|
|
419
|
+
langwatch/utils/utils.py,sha256=RW01NPA_cpWsTlUvLd0FGuoVECtMVO9Bj4gdIVx8fUg,644
|
|
420
|
+
langwatch-0.10.2.dist-info/METADATA,sha256=rs-hw9hPy3Ap1y3PqwtA7oKvTCwj7i4eDchq7xmpMC0,13193
|
|
421
|
+
langwatch-0.10.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
422
|
+
langwatch-0.10.2.dist-info/RECORD,,
|
|
File without changes
|