langwatch 0.10.1__py3-none-any.whl → 0.10.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
langwatch/__version__.py
CHANGED
|
@@ -302,6 +302,10 @@ class Experiment:
|
|
|
302
302
|
iter_ctx = IterationContext(index=index, item=item)
|
|
303
303
|
iter_token = _iteration_context.set(iter_ctx)
|
|
304
304
|
|
|
305
|
+
# Reset target context at the start of each iteration to prevent pollution
|
|
306
|
+
# from previous iterations (especially important for implicit Output targets)
|
|
307
|
+
_target_context.set(None)
|
|
308
|
+
|
|
305
309
|
# Determine if we should create an iteration trace:
|
|
306
310
|
# - Don't create if evaluation uses targets (each target creates its own trace)
|
|
307
311
|
# - Don't create if we're collecting submit() calls (not in_thread yet)
|
|
@@ -340,6 +344,8 @@ class Experiment:
|
|
|
340
344
|
finally:
|
|
341
345
|
# Reset iteration context
|
|
342
346
|
_iteration_context.reset(iter_token)
|
|
347
|
+
# Reset target context to prevent pollution to next iteration
|
|
348
|
+
_target_context.set(None)
|
|
343
349
|
|
|
344
350
|
# Handle iteration trace cleanup
|
|
345
351
|
# Note: If target() was used, it may have already closed the trace
|
|
@@ -715,9 +721,10 @@ class Experiment:
|
|
|
715
721
|
"""
|
|
716
722
|
Log the model's response/output for the current target.
|
|
717
723
|
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
724
|
+
Can be called inside a `target()` context, or outside of one. When called
|
|
725
|
+
outside a target context, an implicit "Output" target is created automatically.
|
|
726
|
+
The response will be stored in the dataset entry's `predicted` field, which
|
|
727
|
+
is displayed in the results table.
|
|
721
728
|
|
|
722
729
|
Args:
|
|
723
730
|
response: The model's output. Can be a string (will be wrapped as
|
|
@@ -725,30 +732,131 @@ class Experiment:
|
|
|
725
732
|
|
|
726
733
|
Example:
|
|
727
734
|
```python
|
|
735
|
+
# With explicit target
|
|
728
736
|
with evaluation.target("gpt-4", {"model": "openai/gpt-4"}):
|
|
729
737
|
response = call_gpt4(row["question"])
|
|
730
738
|
evaluation.log_response(response) # Store the output
|
|
731
739
|
evaluation.log("quality", index=index, score=0.95) # Log metrics
|
|
732
|
-
```
|
|
733
740
|
|
|
734
|
-
|
|
735
|
-
|
|
741
|
+
# Without explicit target (creates implicit "Output" target)
|
|
742
|
+
for index, row in evaluation.loop(df.iterrows()):
|
|
743
|
+
response = my_model(row["question"])
|
|
744
|
+
evaluation.log_response(response) # Creates "Output" target
|
|
745
|
+
evaluation.log("quality", index=index, score=0.95)
|
|
746
|
+
```
|
|
736
747
|
"""
|
|
737
748
|
ctx = _target_context.get()
|
|
738
|
-
if ctx is None:
|
|
739
|
-
raise RuntimeError(
|
|
740
|
-
"log_response() must be called inside a target() context. "
|
|
741
|
-
"Example: with evaluation.target('my-target'): evaluation.log_response(response)"
|
|
742
|
-
)
|
|
743
749
|
|
|
744
750
|
# Normalize response to dict format
|
|
745
751
|
if isinstance(response, str):
|
|
746
|
-
|
|
752
|
+
predicted = {"output": response}
|
|
747
753
|
elif isinstance(response, dict):
|
|
748
|
-
|
|
754
|
+
predicted = response
|
|
749
755
|
else:
|
|
750
756
|
# Try to convert to string for other types
|
|
751
|
-
|
|
757
|
+
predicted = {"output": str(response)}
|
|
758
|
+
|
|
759
|
+
if ctx is None:
|
|
760
|
+
# Create implicit "Output" target and dataset entry immediately
|
|
761
|
+
self._create_implicit_output_target(predicted)
|
|
762
|
+
else:
|
|
763
|
+
# Inside explicit target context - just set predicted
|
|
764
|
+
ctx.predicted = predicted
|
|
765
|
+
|
|
766
|
+
def _create_implicit_output_target(self, predicted: Dict[str, Any]) -> None:
|
|
767
|
+
"""
|
|
768
|
+
Create an implicit "Output" target when log_response() is called outside
|
|
769
|
+
a target() context. This enables a simpler API for single-target evaluations.
|
|
770
|
+
|
|
771
|
+
Creates the dataset entry immediately with the predicted response.
|
|
772
|
+
"""
|
|
773
|
+
target_name = "Output"
|
|
774
|
+
|
|
775
|
+
# Mark that targets are being used
|
|
776
|
+
if not self._evaluation_uses_targets:
|
|
777
|
+
self._evaluation_uses_targets = True
|
|
778
|
+
# Close the active iteration trace if any
|
|
779
|
+
if self._active_iteration_trace is not None:
|
|
780
|
+
self._active_iteration_trace.__exit__(None, None, None)
|
|
781
|
+
self._active_iteration_trace = None
|
|
782
|
+
|
|
783
|
+
self._current_iteration_used_with_target = True
|
|
784
|
+
|
|
785
|
+
# Register the target
|
|
786
|
+
self._register_target(target_name, None)
|
|
787
|
+
|
|
788
|
+
# Get index and item from iteration context
|
|
789
|
+
iter_ctx = _iteration_context.get()
|
|
790
|
+
if iter_ctx is not None:
|
|
791
|
+
index = iter_ctx.index
|
|
792
|
+
current_item = iter_ctx.item
|
|
793
|
+
else:
|
|
794
|
+
index = self._current_index
|
|
795
|
+
current_item = self._current_item
|
|
796
|
+
|
|
797
|
+
# Create a trace for this implicit target
|
|
798
|
+
tracer = trace.get_tracer("langwatch-evaluation")
|
|
799
|
+
root_context = otel_context.Context()
|
|
800
|
+
|
|
801
|
+
# Start span and get trace_id
|
|
802
|
+
with tracer.start_span(
|
|
803
|
+
f"evaluation.target.{target_name}",
|
|
804
|
+
context=root_context,
|
|
805
|
+
attributes={
|
|
806
|
+
"evaluation.run_id": self.run_id,
|
|
807
|
+
"evaluation.index": index,
|
|
808
|
+
"evaluation.target": target_name,
|
|
809
|
+
},
|
|
810
|
+
) as span:
|
|
811
|
+
span_context = span.get_span_context()
|
|
812
|
+
trace_id = format(span_context.trace_id, "032x")
|
|
813
|
+
|
|
814
|
+
# Create and set target context (for subsequent log() calls)
|
|
815
|
+
ctx = TargetContext(
|
|
816
|
+
target_id=target_name,
|
|
817
|
+
index=index,
|
|
818
|
+
trace_id=trace_id,
|
|
819
|
+
predicted=predicted,
|
|
820
|
+
)
|
|
821
|
+
_target_context.set(ctx)
|
|
822
|
+
|
|
823
|
+
# Create dataset entry immediately
|
|
824
|
+
entry_data: Any = (
|
|
825
|
+
current_item.to_dict()
|
|
826
|
+
if hasattr(current_item, "to_dict")
|
|
827
|
+
else (
|
|
828
|
+
current_item.__dict__
|
|
829
|
+
if hasattr(current_item, "__dict__")
|
|
830
|
+
else (
|
|
831
|
+
current_item[1].to_dict()
|
|
832
|
+
if type(current_item) == tuple
|
|
833
|
+
and hasattr(current_item[1], "to_dict")
|
|
834
|
+
else (
|
|
835
|
+
current_item[1].__dict__
|
|
836
|
+
if type(current_item) == tuple
|
|
837
|
+
and hasattr(current_item[1], "__dict__")
|
|
838
|
+
else {
|
|
839
|
+
"entry": json.dumps(
|
|
840
|
+
current_item, cls=SerializableWithStringFallback
|
|
841
|
+
)
|
|
842
|
+
}
|
|
843
|
+
)
|
|
844
|
+
)
|
|
845
|
+
)
|
|
846
|
+
)
|
|
847
|
+
|
|
848
|
+
batch_entry = BatchEntry(
|
|
849
|
+
index=index,
|
|
850
|
+
entry=entry_data,
|
|
851
|
+
duration=0, # Duration not tracked for implicit targets
|
|
852
|
+
error=None,
|
|
853
|
+
trace_id=trace_id,
|
|
854
|
+
target_id=target_name,
|
|
855
|
+
predicted=predicted,
|
|
856
|
+
)
|
|
857
|
+
|
|
858
|
+
with self.lock:
|
|
859
|
+
self.batch["dataset"].append(batch_entry)
|
|
752
860
|
|
|
753
861
|
def log(
|
|
754
862
|
self,
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
langwatch/__init__.py,sha256=VGkLDw3h8hOtzyaTMObWupGTQosn4E17Dk5zcfDmy7g,4462
|
|
2
|
-
langwatch/__version__.py,sha256=
|
|
2
|
+
langwatch/__version__.py,sha256=adHQtrIBxR2urwtvyvknbyN66CZvRupt68CR0Ym8N58,92
|
|
3
3
|
langwatch/attributes.py,sha256=nXdI_G85wQQCAdAcwjCiLYdEYj3wATmfgCmhlf6dVIk,3910
|
|
4
4
|
langwatch/batch_evaluation.py,sha256=Y_S3teXpHV07U-vvJYyV1PB6d0CgyFM_rTzPp6GnEBo,16165
|
|
5
5
|
langwatch/client.py,sha256=xwqvTnbAZ-Qr8OnI8-D8cV3J7YPsJ6l0trHbd2PSi6Q,26148
|
|
@@ -18,7 +18,7 @@ langwatch/domain/__init__.py,sha256=BVpWugFqiohIA2MiTZy3x1BQCbIgN3okIXzmT6BHUkQ,
|
|
|
18
18
|
langwatch/dspy/__init__.py,sha256=ahOMnNefVD9xsf7Z0P6iE0SbKylANixrNZkbKoK2FTs,35208
|
|
19
19
|
langwatch/evaluation/__init__.py,sha256=8SOSZZbSzXa1jL-9Zlyt0f9u5sOA_TrO1J61ueASBLI,16980
|
|
20
20
|
langwatch/experiment/__init__.py,sha256=nv2OfoNMMZwUA9KfozW2ZNaR1-J1LCmU4NykjGfe9is,3001
|
|
21
|
-
langwatch/experiment/experiment.py,sha256=
|
|
21
|
+
langwatch/experiment/experiment.py,sha256=BoTia3NPi_OcMSVGWN4xdNHrj5DC46_ZzcPhNhzNECc,37144
|
|
22
22
|
langwatch/experiment/platform_run.py,sha256=qiy_bwp786TbkH4HIlZVlJPmCtQlStAq9vUdG4-3VdU,13850
|
|
23
23
|
langwatch/exporters/filterable_batch_span_exporter.py,sha256=MlhZjui56XD6p2sa8kEGyr-Hb3wqudknngmemnB4Twg,2142
|
|
24
24
|
langwatch/generated/langwatch_rest_api_client/__init__.py,sha256=8r-9pAj7fK7vnVX3mT0y_zS4B9ZRqD6RZiBo5fPra60,156
|
|
@@ -417,6 +417,6 @@ langwatch/utils/initialization.py,sha256=2egw2aXGYdbgLsyOfkQ3Oz0JFbfnQnpiFg_Q-gc
|
|
|
417
417
|
langwatch/utils/module.py,sha256=KLBNOK3mA9gCSifCcQX_lOtU48BJQDWvFKtF6NMvwVA,688
|
|
418
418
|
langwatch/utils/transformation.py,sha256=76MGXyrYTxM0Yri36NJqLK-XxL4BBYdmKWAXXlw3D4Q,7690
|
|
419
419
|
langwatch/utils/utils.py,sha256=RW01NPA_cpWsTlUvLd0FGuoVECtMVO9Bj4gdIVx8fUg,644
|
|
420
|
-
langwatch-0.10.
|
|
421
|
-
langwatch-0.10.
|
|
422
|
-
langwatch-0.10.
|
|
420
|
+
langwatch-0.10.2.dist-info/METADATA,sha256=rs-hw9hPy3Ap1y3PqwtA7oKvTCwj7i4eDchq7xmpMC0,13193
|
|
421
|
+
langwatch-0.10.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
422
|
+
langwatch-0.10.2.dist-info/RECORD,,
|
|
File without changes
|