langwatch 0.10.1__py3-none-any.whl → 0.10.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
langwatch/__version__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """Version information for LangWatch."""
2
2
 
3
- __version__ = "0.10.1" # x-release-please-version
3
+ __version__ = "0.10.2" # x-release-please-version
@@ -302,6 +302,10 @@ class Experiment:
302
302
  iter_ctx = IterationContext(index=index, item=item)
303
303
  iter_token = _iteration_context.set(iter_ctx)
304
304
 
305
+ # Reset target context at the start of each iteration to prevent pollution
306
+ # from previous iterations (especially important for implicit Output targets)
307
+ _target_context.set(None)
308
+
305
309
  # Determine if we should create an iteration trace:
306
310
  # - Don't create if evaluation uses targets (each target creates its own trace)
307
311
  # - Don't create if we're collecting submit() calls (not in_thread yet)
@@ -340,6 +344,8 @@ class Experiment:
340
344
  finally:
341
345
  # Reset iteration context
342
346
  _iteration_context.reset(iter_token)
347
+ # Reset target context to prevent pollution to next iteration
348
+ _target_context.set(None)
343
349
 
344
350
  # Handle iteration trace cleanup
345
351
  # Note: If target() was used, it may have already closed the trace
@@ -715,9 +721,10 @@ class Experiment:
715
721
  """
716
722
  Log the model's response/output for the current target.
717
723
 
718
- Must be called inside a `target()` context. The response will be stored
719
- in the dataset entry's `predicted` field, which is displayed in the
720
- results table.
724
+ Can be called inside a `target()` context, or outside of one. When called
725
+ outside a target context, an implicit "Output" target is created automatically.
726
+ The response will be stored in the dataset entry's `predicted` field, which
727
+ is displayed in the results table.
721
728
 
722
729
  Args:
723
730
  response: The model's output. Can be a string (will be wrapped as
@@ -725,30 +732,131 @@ class Experiment:
725
732
 
726
733
  Example:
727
734
  ```python
735
+ # With explicit target
728
736
  with evaluation.target("gpt-4", {"model": "openai/gpt-4"}):
729
737
  response = call_gpt4(row["question"])
730
738
  evaluation.log_response(response) # Store the output
731
739
  evaluation.log("quality", index=index, score=0.95) # Log metrics
732
- ```
733
740
 
734
- Raises:
735
- RuntimeError: If called outside of a target() context.
741
+ # Without explicit target (creates implicit "Output" target)
742
+ for index, row in evaluation.loop(df.iterrows()):
743
+ response = my_model(row["question"])
744
+ evaluation.log_response(response) # Creates "Output" target
745
+ evaluation.log("quality", index=index, score=0.95)
746
+ ```
736
747
  """
737
748
  ctx = _target_context.get()
738
- if ctx is None:
739
- raise RuntimeError(
740
- "log_response() must be called inside a target() context. "
741
- "Example: with evaluation.target('my-target'): evaluation.log_response(response)"
742
- )
743
749
 
744
750
  # Normalize response to dict format
745
751
  if isinstance(response, str):
746
- ctx.predicted = {"output": response}
752
+ predicted = {"output": response}
747
753
  elif isinstance(response, dict):
748
- ctx.predicted = response
754
+ predicted = response
749
755
  else:
750
756
  # Try to convert to string for other types
751
- ctx.predicted = {"output": str(response)}
757
+ predicted = {"output": str(response)}
758
+
759
+ if ctx is None:
760
+ # Create implicit "Output" target and dataset entry immediately
761
+ self._create_implicit_output_target(predicted)
762
+ else:
763
+ # Inside explicit target context - just set predicted
764
+ ctx.predicted = predicted
765
+
766
+ def _create_implicit_output_target(self, predicted: Dict[str, Any]) -> None:
767
+ """
768
+ Create an implicit "Output" target when log_response() is called outside
769
+ a target() context. This enables a simpler API for single-target evaluations.
770
+
771
+ Creates the dataset entry immediately with the predicted response.
772
+ """
773
+ target_name = "Output"
774
+
775
+ # Mark that targets are being used
776
+ if not self._evaluation_uses_targets:
777
+ self._evaluation_uses_targets = True
778
+ # Close the active iteration trace if any
779
+ if self._active_iteration_trace is not None:
780
+ self._active_iteration_trace.__exit__(None, None, None)
781
+ self._active_iteration_trace = None
782
+
783
+ self._current_iteration_used_with_target = True
784
+
785
+ # Register the target
786
+ self._register_target(target_name, None)
787
+
788
+ # Get index and item from iteration context
789
+ iter_ctx = _iteration_context.get()
790
+ if iter_ctx is not None:
791
+ index = iter_ctx.index
792
+ current_item = iter_ctx.item
793
+ else:
794
+ index = self._current_index
795
+ current_item = self._current_item
796
+
797
+ # Create a trace for this implicit target
798
+ tracer = trace.get_tracer("langwatch-evaluation")
799
+ root_context = otel_context.Context()
800
+
801
+ # Start span and get trace_id
802
+ with tracer.start_span(
803
+ f"evaluation.target.{target_name}",
804
+ context=root_context,
805
+ attributes={
806
+ "evaluation.run_id": self.run_id,
807
+ "evaluation.index": index,
808
+ "evaluation.target": target_name,
809
+ },
810
+ ) as span:
811
+ span_context = span.get_span_context()
812
+ trace_id = format(span_context.trace_id, "032x")
813
+
814
+ # Create and set target context (for subsequent log() calls)
815
+ ctx = TargetContext(
816
+ target_id=target_name,
817
+ index=index,
818
+ trace_id=trace_id,
819
+ predicted=predicted,
820
+ )
821
+ _target_context.set(ctx)
822
+
823
+ # Create dataset entry immediately
824
+ entry_data: Any = (
825
+ current_item.to_dict()
826
+ if hasattr(current_item, "to_dict")
827
+ else (
828
+ current_item.__dict__
829
+ if hasattr(current_item, "__dict__")
830
+ else (
831
+ current_item[1].to_dict()
832
+ if type(current_item) == tuple
833
+ and hasattr(current_item[1], "to_dict")
834
+ else (
835
+ current_item[1].__dict__
836
+ if type(current_item) == tuple
837
+ and hasattr(current_item[1], "__dict__")
838
+ else {
839
+ "entry": json.dumps(
840
+ current_item, cls=SerializableWithStringFallback
841
+ )
842
+ }
843
+ )
844
+ )
845
+ )
846
+ )
847
+
848
+ batch_entry = BatchEntry(
849
+ index=index,
850
+ entry=entry_data,
851
+ duration=0, # Duration not tracked for implicit targets
852
+ error=None,
853
+ trace_id=trace_id,
854
+ target_id=target_name,
855
+ predicted=predicted,
856
+ )
857
+
858
+ with self.lock:
859
+ self.batch["dataset"].append(batch_entry)
752
860
 
753
861
  def log(
754
862
  self,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: langwatch
3
- Version: 0.10.1
3
+ Version: 0.10.2
4
4
  Summary: LangWatch Python SDK, for monitoring your LLMs
5
5
  Author-email: Langwatch Engineers <engineering@langwatch.ai>
6
6
  License: MIT
@@ -1,5 +1,5 @@
1
1
  langwatch/__init__.py,sha256=VGkLDw3h8hOtzyaTMObWupGTQosn4E17Dk5zcfDmy7g,4462
2
- langwatch/__version__.py,sha256=96vdgjIvZZ7XbIP2U10ynaU8nnbGmXfLBXugfC_8r3Q,92
2
+ langwatch/__version__.py,sha256=adHQtrIBxR2urwtvyvknbyN66CZvRupt68CR0Ym8N58,92
3
3
  langwatch/attributes.py,sha256=nXdI_G85wQQCAdAcwjCiLYdEYj3wATmfgCmhlf6dVIk,3910
4
4
  langwatch/batch_evaluation.py,sha256=Y_S3teXpHV07U-vvJYyV1PB6d0CgyFM_rTzPp6GnEBo,16165
5
5
  langwatch/client.py,sha256=xwqvTnbAZ-Qr8OnI8-D8cV3J7YPsJ6l0trHbd2PSi6Q,26148
@@ -18,7 +18,7 @@ langwatch/domain/__init__.py,sha256=BVpWugFqiohIA2MiTZy3x1BQCbIgN3okIXzmT6BHUkQ,
18
18
  langwatch/dspy/__init__.py,sha256=ahOMnNefVD9xsf7Z0P6iE0SbKylANixrNZkbKoK2FTs,35208
19
19
  langwatch/evaluation/__init__.py,sha256=8SOSZZbSzXa1jL-9Zlyt0f9u5sOA_TrO1J61ueASBLI,16980
20
20
  langwatch/experiment/__init__.py,sha256=nv2OfoNMMZwUA9KfozW2ZNaR1-J1LCmU4NykjGfe9is,3001
21
- langwatch/experiment/experiment.py,sha256=5xj58FKVC0y_LxgfwjJZP9lDp7tZ9FUUbERBtui_nC8,33026
21
+ langwatch/experiment/experiment.py,sha256=BoTia3NPi_OcMSVGWN4xdNHrj5DC46_ZzcPhNhzNECc,37144
22
22
  langwatch/experiment/platform_run.py,sha256=qiy_bwp786TbkH4HIlZVlJPmCtQlStAq9vUdG4-3VdU,13850
23
23
  langwatch/exporters/filterable_batch_span_exporter.py,sha256=MlhZjui56XD6p2sa8kEGyr-Hb3wqudknngmemnB4Twg,2142
24
24
  langwatch/generated/langwatch_rest_api_client/__init__.py,sha256=8r-9pAj7fK7vnVX3mT0y_zS4B9ZRqD6RZiBo5fPra60,156
@@ -417,6 +417,6 @@ langwatch/utils/initialization.py,sha256=2egw2aXGYdbgLsyOfkQ3Oz0JFbfnQnpiFg_Q-gc
417
417
  langwatch/utils/module.py,sha256=KLBNOK3mA9gCSifCcQX_lOtU48BJQDWvFKtF6NMvwVA,688
418
418
  langwatch/utils/transformation.py,sha256=76MGXyrYTxM0Yri36NJqLK-XxL4BBYdmKWAXXlw3D4Q,7690
419
419
  langwatch/utils/utils.py,sha256=RW01NPA_cpWsTlUvLd0FGuoVECtMVO9Bj4gdIVx8fUg,644
420
- langwatch-0.10.1.dist-info/METADATA,sha256=KjaQMj1Rk2spt7K6_UFEb2BNY6KraJVzvcCX7Qvwqfw,13193
421
- langwatch-0.10.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
422
- langwatch-0.10.1.dist-info/RECORD,,
420
+ langwatch-0.10.2.dist-info/METADATA,sha256=rs-hw9hPy3Ap1y3PqwtA7oKvTCwj7i4eDchq7xmpMC0,13193
421
+ langwatch-0.10.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
422
+ langwatch-0.10.2.dist-info/RECORD,,