langchain 0.3.23__py3-none-any.whl → 0.3.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (170) hide show
  1. langchain/_api/module_import.py +3 -3
  2. langchain/agents/agent.py +104 -109
  3. langchain/agents/agent_iterator.py +11 -15
  4. langchain/agents/agent_toolkits/conversational_retrieval/openai_functions.py +2 -2
  5. langchain/agents/agent_toolkits/vectorstore/base.py +3 -3
  6. langchain/agents/agent_toolkits/vectorstore/toolkit.py +4 -6
  7. langchain/agents/chat/base.py +7 -6
  8. langchain/agents/chat/output_parser.py +2 -1
  9. langchain/agents/conversational/base.py +5 -4
  10. langchain/agents/conversational_chat/base.py +9 -8
  11. langchain/agents/format_scratchpad/log.py +1 -3
  12. langchain/agents/format_scratchpad/log_to_messages.py +3 -5
  13. langchain/agents/format_scratchpad/openai_functions.py +4 -4
  14. langchain/agents/format_scratchpad/tools.py +3 -3
  15. langchain/agents/format_scratchpad/xml.py +1 -3
  16. langchain/agents/initialize.py +2 -1
  17. langchain/agents/json_chat/base.py +3 -2
  18. langchain/agents/loading.py +5 -5
  19. langchain/agents/mrkl/base.py +6 -5
  20. langchain/agents/openai_assistant/base.py +13 -17
  21. langchain/agents/openai_functions_agent/agent_token_buffer_memory.py +6 -6
  22. langchain/agents/openai_functions_agent/base.py +13 -12
  23. langchain/agents/openai_functions_multi_agent/base.py +15 -14
  24. langchain/agents/openai_tools/base.py +2 -1
  25. langchain/agents/output_parsers/openai_functions.py +2 -2
  26. langchain/agents/output_parsers/openai_tools.py +6 -6
  27. langchain/agents/output_parsers/react_json_single_input.py +2 -1
  28. langchain/agents/output_parsers/self_ask.py +2 -1
  29. langchain/agents/output_parsers/tools.py +7 -7
  30. langchain/agents/react/agent.py +3 -2
  31. langchain/agents/react/base.py +4 -3
  32. langchain/agents/schema.py +3 -3
  33. langchain/agents/self_ask_with_search/base.py +2 -1
  34. langchain/agents/structured_chat/base.py +9 -8
  35. langchain/agents/structured_chat/output_parser.py +2 -1
  36. langchain/agents/tool_calling_agent/base.py +3 -2
  37. langchain/agents/tools.py +4 -4
  38. langchain/agents/types.py +3 -3
  39. langchain/agents/utils.py +1 -1
  40. langchain/agents/xml/base.py +7 -6
  41. langchain/callbacks/streaming_aiter.py +3 -2
  42. langchain/callbacks/streaming_aiter_final_only.py +3 -3
  43. langchain/callbacks/streaming_stdout_final_only.py +3 -3
  44. langchain/chains/api/base.py +11 -12
  45. langchain/chains/base.py +47 -50
  46. langchain/chains/combine_documents/base.py +23 -23
  47. langchain/chains/combine_documents/map_reduce.py +12 -12
  48. langchain/chains/combine_documents/map_rerank.py +16 -15
  49. langchain/chains/combine_documents/reduce.py +17 -17
  50. langchain/chains/combine_documents/refine.py +12 -12
  51. langchain/chains/combine_documents/stuff.py +10 -10
  52. langchain/chains/constitutional_ai/base.py +9 -9
  53. langchain/chains/conversation/base.py +2 -4
  54. langchain/chains/conversational_retrieval/base.py +30 -30
  55. langchain/chains/elasticsearch_database/base.py +13 -13
  56. langchain/chains/example_generator.py +1 -3
  57. langchain/chains/flare/base.py +13 -12
  58. langchain/chains/flare/prompts.py +2 -4
  59. langchain/chains/hyde/base.py +8 -8
  60. langchain/chains/llm.py +31 -30
  61. langchain/chains/llm_checker/base.py +6 -6
  62. langchain/chains/llm_math/base.py +10 -10
  63. langchain/chains/llm_summarization_checker/base.py +6 -6
  64. langchain/chains/loading.py +12 -14
  65. langchain/chains/mapreduce.py +7 -6
  66. langchain/chains/moderation.py +8 -8
  67. langchain/chains/natbot/base.py +6 -6
  68. langchain/chains/openai_functions/base.py +8 -10
  69. langchain/chains/openai_functions/citation_fuzzy_match.py +4 -4
  70. langchain/chains/openai_functions/extraction.py +3 -3
  71. langchain/chains/openai_functions/openapi.py +12 -12
  72. langchain/chains/openai_functions/qa_with_structure.py +4 -4
  73. langchain/chains/openai_functions/utils.py +2 -2
  74. langchain/chains/openai_tools/extraction.py +2 -2
  75. langchain/chains/prompt_selector.py +3 -3
  76. langchain/chains/qa_generation/base.py +5 -5
  77. langchain/chains/qa_with_sources/base.py +21 -21
  78. langchain/chains/qa_with_sources/loading.py +2 -1
  79. langchain/chains/qa_with_sources/retrieval.py +6 -6
  80. langchain/chains/qa_with_sources/vector_db.py +8 -8
  81. langchain/chains/query_constructor/base.py +4 -3
  82. langchain/chains/query_constructor/parser.py +5 -4
  83. langchain/chains/question_answering/chain.py +3 -2
  84. langchain/chains/retrieval.py +2 -2
  85. langchain/chains/retrieval_qa/base.py +16 -16
  86. langchain/chains/router/base.py +12 -11
  87. langchain/chains/router/embedding_router.py +12 -11
  88. langchain/chains/router/llm_router.py +12 -12
  89. langchain/chains/router/multi_prompt.py +3 -3
  90. langchain/chains/router/multi_retrieval_qa.py +5 -4
  91. langchain/chains/sequential.py +18 -18
  92. langchain/chains/sql_database/query.py +4 -4
  93. langchain/chains/structured_output/base.py +14 -13
  94. langchain/chains/summarize/chain.py +4 -3
  95. langchain/chains/transform.py +12 -11
  96. langchain/chat_models/base.py +27 -31
  97. langchain/embeddings/__init__.py +1 -1
  98. langchain/embeddings/base.py +4 -4
  99. langchain/embeddings/cache.py +19 -18
  100. langchain/evaluation/agents/trajectory_eval_chain.py +16 -19
  101. langchain/evaluation/comparison/eval_chain.py +10 -10
  102. langchain/evaluation/criteria/eval_chain.py +11 -10
  103. langchain/evaluation/embedding_distance/base.py +21 -21
  104. langchain/evaluation/exact_match/base.py +3 -3
  105. langchain/evaluation/loading.py +7 -8
  106. langchain/evaluation/qa/eval_chain.py +7 -6
  107. langchain/evaluation/regex_match/base.py +3 -3
  108. langchain/evaluation/schema.py +6 -5
  109. langchain/evaluation/scoring/eval_chain.py +9 -9
  110. langchain/evaluation/string_distance/base.py +23 -23
  111. langchain/hub.py +2 -1
  112. langchain/indexes/_sql_record_manager.py +8 -7
  113. langchain/indexes/vectorstore.py +11 -11
  114. langchain/llms/__init__.py +3 -3
  115. langchain/memory/buffer.py +13 -13
  116. langchain/memory/buffer_window.py +5 -5
  117. langchain/memory/chat_memory.py +5 -5
  118. langchain/memory/combined.py +10 -10
  119. langchain/memory/entity.py +8 -7
  120. langchain/memory/readonly.py +4 -4
  121. langchain/memory/simple.py +5 -5
  122. langchain/memory/summary.py +8 -8
  123. langchain/memory/summary_buffer.py +11 -11
  124. langchain/memory/token_buffer.py +5 -5
  125. langchain/memory/utils.py +2 -2
  126. langchain/memory/vectorstore.py +15 -14
  127. langchain/memory/vectorstore_token_buffer_memory.py +7 -7
  128. langchain/model_laboratory.py +4 -3
  129. langchain/output_parsers/combining.py +5 -5
  130. langchain/output_parsers/datetime.py +1 -2
  131. langchain/output_parsers/enum.py +4 -5
  132. langchain/output_parsers/pandas_dataframe.py +5 -5
  133. langchain/output_parsers/regex.py +4 -4
  134. langchain/output_parsers/regex_dict.py +4 -4
  135. langchain/output_parsers/retry.py +2 -2
  136. langchain/output_parsers/structured.py +5 -5
  137. langchain/output_parsers/yaml.py +3 -3
  138. langchain/pydantic_v1/__init__.py +1 -6
  139. langchain/pydantic_v1/dataclasses.py +1 -5
  140. langchain/pydantic_v1/main.py +1 -5
  141. langchain/retrievers/contextual_compression.py +3 -3
  142. langchain/retrievers/document_compressors/base.py +3 -2
  143. langchain/retrievers/document_compressors/chain_extract.py +4 -3
  144. langchain/retrievers/document_compressors/chain_filter.py +3 -2
  145. langchain/retrievers/document_compressors/cohere_rerank.py +4 -3
  146. langchain/retrievers/document_compressors/cross_encoder.py +1 -2
  147. langchain/retrievers/document_compressors/cross_encoder_rerank.py +2 -1
  148. langchain/retrievers/document_compressors/embeddings_filter.py +3 -2
  149. langchain/retrievers/document_compressors/listwise_rerank.py +6 -5
  150. langchain/retrievers/ensemble.py +15 -19
  151. langchain/retrievers/merger_retriever.py +7 -12
  152. langchain/retrievers/multi_query.py +14 -13
  153. langchain/retrievers/multi_vector.py +4 -4
  154. langchain/retrievers/parent_document_retriever.py +9 -8
  155. langchain/retrievers/re_phraser.py +2 -3
  156. langchain/retrievers/self_query/base.py +13 -12
  157. langchain/retrievers/time_weighted_retriever.py +14 -14
  158. langchain/runnables/openai_functions.py +4 -3
  159. langchain/smith/evaluation/config.py +7 -6
  160. langchain/smith/evaluation/progress.py +3 -2
  161. langchain/smith/evaluation/runner_utils.py +58 -61
  162. langchain/smith/evaluation/string_run_evaluator.py +29 -29
  163. langchain/storage/encoder_backed.py +7 -11
  164. langchain/storage/file_system.py +5 -4
  165. {langchain-0.3.23.dist-info → langchain-0.3.24.dist-info}/METADATA +2 -2
  166. {langchain-0.3.23.dist-info → langchain-0.3.24.dist-info}/RECORD +169 -169
  167. {langchain-0.3.23.dist-info → langchain-0.3.24.dist-info}/WHEEL +1 -1
  168. langchain-0.3.24.dist-info/entry_points.txt +4 -0
  169. langchain-0.3.23.dist-info/entry_points.txt +0 -5
  170. {langchain-0.3.23.dist-info → langchain-0.3.24.dist-info}/licenses/LICENSE +0 -0
@@ -1,6 +1,7 @@
1
1
  """Configuration for run evaluators."""
2
2
 
3
- from typing import Any, Callable, Dict, List, Optional, Sequence, Union
3
+ from collections.abc import Sequence
4
+ from typing import Any, Callable, Optional, Union
4
5
 
5
6
  from langchain_core.embeddings import Embeddings
6
7
  from langchain_core.language_models import BaseLanguageModel
@@ -45,7 +46,7 @@ class EvalConfig(BaseModel):
45
46
 
46
47
  evaluator_type: EvaluatorType
47
48
 
48
- def get_kwargs(self) -> Dict[str, Any]:
49
+ def get_kwargs(self) -> dict[str, Any]:
49
50
  """Get the keyword arguments for the load_evaluator call.
50
51
 
51
52
  Returns
@@ -78,7 +79,7 @@ class SingleKeyEvalConfig(EvalConfig):
78
79
  """The key from the traced run's inputs dictionary to use to represent the
79
80
  input. If not provided, it will be inferred automatically."""
80
81
 
81
- def get_kwargs(self) -> Dict[str, Any]:
82
+ def get_kwargs(self) -> dict[str, Any]:
82
83
  kwargs = super().get_kwargs()
83
84
  # Filer out the keys that are not needed for the evaluator.
84
85
  for key in ["reference_key", "prediction_key", "input_key"]:
@@ -121,7 +122,7 @@ class RunEvalConfig(BaseModel):
121
122
  The language model to pass to any evaluators that use a language model.
122
123
  """ # noqa: E501
123
124
 
124
- evaluators: List[
125
+ evaluators: list[
125
126
  Union[
126
127
  SINGLE_EVAL_CONFIG_TYPE,
127
128
  CUSTOM_EVALUATOR_TYPE,
@@ -134,9 +135,9 @@ class RunEvalConfig(BaseModel):
134
135
  given evaluator
135
136
  (e.g.,
136
137
  :class:`RunEvalConfig.QA <langchain.smith.evaluation.config.RunEvalConfig.QA>`)."""
137
- custom_evaluators: Optional[List[CUSTOM_EVALUATOR_TYPE]] = None
138
+ custom_evaluators: Optional[list[CUSTOM_EVALUATOR_TYPE]] = None
138
139
  """Custom evaluators to apply to the dataset run."""
139
- batch_evaluators: Optional[List[BATCH_EVALUATOR_LIKE]] = None
140
+ batch_evaluators: Optional[list[BATCH_EVALUATOR_LIKE]] = None
140
141
  """Evaluators that run on an aggregate/batch level.
141
142
 
142
143
  These generate 1 or more metrics that are assigned to the full test run.
@@ -1,7 +1,8 @@
1
1
  """A simple progress bar for the console."""
2
2
 
3
3
  import threading
4
- from typing import Any, Dict, Optional, Sequence
4
+ from collections.abc import Sequence
5
+ from typing import Any, Optional
5
6
  from uuid import UUID
6
7
 
7
8
  from langchain_core.callbacks import base as base_callbacks
@@ -51,7 +52,7 @@ class ProgressBarCallback(base_callbacks.BaseCallbackHandler):
51
52
 
52
53
  def on_chain_end(
53
54
  self,
54
- outputs: Dict[str, Any],
55
+ outputs: dict[str, Any],
55
56
  *,
56
57
  run_id: UUID,
57
58
  parent_run_id: Optional[UUID] = None,
@@ -13,10 +13,7 @@ from typing import (
13
13
  TYPE_CHECKING,
14
14
  Any,
15
15
  Callable,
16
- Dict,
17
- List,
18
16
  Optional,
19
- Tuple,
20
17
  Union,
21
18
  cast,
22
19
  )
@@ -229,7 +226,7 @@ def _wrap_in_chain_factory(
229
226
  return llm_or_chain_factory
230
227
 
231
228
 
232
- def _get_prompt(inputs: Dict[str, Any]) -> str:
229
+ def _get_prompt(inputs: dict[str, Any]) -> str:
233
230
  """Get prompt from inputs.
234
231
 
235
232
  Args:
@@ -286,10 +283,10 @@ class ChatModelInput(TypedDict):
286
283
  messages: List of chat messages.
287
284
  """
288
285
 
289
- messages: List[BaseMessage]
286
+ messages: list[BaseMessage]
290
287
 
291
288
 
292
- def _get_messages(inputs: Dict[str, Any]) -> dict:
289
+ def _get_messages(inputs: dict[str, Any]) -> dict:
293
290
  """Get Chat Messages from inputs.
294
291
 
295
292
  Args:
@@ -331,7 +328,7 @@ def _get_messages(inputs: Dict[str, Any]) -> dict:
331
328
  ## Shared data validation utilities
332
329
  def _validate_example_inputs_for_language_model(
333
330
  first_example: Example,
334
- input_mapper: Optional[Callable[[Dict], Any]],
331
+ input_mapper: Optional[Callable[[dict], Any]],
335
332
  ) -> None:
336
333
  if input_mapper:
337
334
  prompt_input = input_mapper(first_example.inputs)
@@ -365,7 +362,7 @@ def _validate_example_inputs_for_language_model(
365
362
  def _validate_example_inputs_for_chain(
366
363
  first_example: Example,
367
364
  chain: Chain,
368
- input_mapper: Optional[Callable[[Dict], Any]],
365
+ input_mapper: Optional[Callable[[dict], Any]],
369
366
  ) -> None:
370
367
  """Validate that the example inputs match the chain input keys."""
371
368
  if input_mapper:
@@ -402,7 +399,7 @@ def _validate_example_inputs_for_chain(
402
399
  def _validate_example_inputs(
403
400
  example: Example,
404
401
  llm_or_chain_factory: MCF,
405
- input_mapper: Optional[Callable[[Dict], Any]],
402
+ input_mapper: Optional[Callable[[dict], Any]],
406
403
  ) -> None:
407
404
  """Validate that the example inputs are valid for the model."""
408
405
  if isinstance(llm_or_chain_factory, BaseLanguageModel):
@@ -421,10 +418,10 @@ def _validate_example_inputs(
421
418
 
422
419
  def _setup_evaluation(
423
420
  llm_or_chain_factory: MCF,
424
- examples: List[Example],
421
+ examples: list[Example],
425
422
  evaluation: Optional[smith_eval.RunEvalConfig],
426
423
  data_type: DataType,
427
- ) -> Optional[List[RunEvaluator]]:
424
+ ) -> Optional[list[RunEvaluator]]:
428
425
  """Configure the evaluators to run on the results of the chain."""
429
426
  if evaluation:
430
427
  if isinstance(llm_or_chain_factory, BaseLanguageModel):
@@ -451,7 +448,7 @@ def _setup_evaluation(
451
448
 
452
449
  def _determine_input_key(
453
450
  config: smith_eval.RunEvalConfig,
454
- run_inputs: Optional[List[str]],
451
+ run_inputs: Optional[list[str]],
455
452
  ) -> Optional[str]:
456
453
  input_key = None
457
454
  if config.input_key:
@@ -475,7 +472,7 @@ def _determine_input_key(
475
472
 
476
473
  def _determine_prediction_key(
477
474
  config: smith_eval.RunEvalConfig,
478
- run_outputs: Optional[List[str]],
475
+ run_outputs: Optional[list[str]],
479
476
  ) -> Optional[str]:
480
477
  prediction_key = None
481
478
  if config.prediction_key:
@@ -498,7 +495,7 @@ def _determine_prediction_key(
498
495
 
499
496
  def _determine_reference_key(
500
497
  config: smith_eval.RunEvalConfig,
501
- example_outputs: Optional[List[str]],
498
+ example_outputs: Optional[list[str]],
502
499
  ) -> Optional[str]:
503
500
  if config.reference_key:
504
501
  reference_key = config.reference_key
@@ -522,7 +519,7 @@ def _construct_run_evaluator(
522
519
  eval_llm: Optional[BaseLanguageModel],
523
520
  run_type: str,
524
521
  data_type: DataType,
525
- example_outputs: Optional[List[str]],
522
+ example_outputs: Optional[list[str]],
526
523
  reference_key: Optional[str],
527
524
  input_key: Optional[str],
528
525
  prediction_key: Optional[str],
@@ -583,10 +580,10 @@ def _construct_run_evaluator(
583
580
 
584
581
  def _get_keys(
585
582
  config: smith_eval.RunEvalConfig,
586
- run_inputs: Optional[List[str]],
587
- run_outputs: Optional[List[str]],
588
- example_outputs: Optional[List[str]],
589
- ) -> Tuple[Optional[str], Optional[str], Optional[str]]:
583
+ run_inputs: Optional[list[str]],
584
+ run_outputs: Optional[list[str]],
585
+ example_outputs: Optional[list[str]],
586
+ ) -> tuple[Optional[str], Optional[str], Optional[str]]:
590
587
  input_key = _determine_input_key(config, run_inputs)
591
588
  prediction_key = _determine_prediction_key(config, run_outputs)
592
589
  reference_key = _determine_reference_key(config, example_outputs)
@@ -597,10 +594,10 @@ def _load_run_evaluators(
597
594
  config: smith_eval.RunEvalConfig,
598
595
  run_type: str,
599
596
  data_type: DataType,
600
- example_outputs: Optional[List[str]],
601
- run_inputs: Optional[List[str]],
602
- run_outputs: Optional[List[str]],
603
- ) -> List[RunEvaluator]:
597
+ example_outputs: Optional[list[str]],
598
+ run_inputs: Optional[list[str]],
599
+ run_outputs: Optional[list[str]],
600
+ ) -> list[RunEvaluator]:
604
601
  """
605
602
  Load run evaluators from a configuration.
606
603
 
@@ -662,12 +659,12 @@ def _load_run_evaluators(
662
659
 
663
660
  async def _arun_llm(
664
661
  llm: BaseLanguageModel,
665
- inputs: Dict[str, Any],
662
+ inputs: dict[str, Any],
666
663
  *,
667
- tags: Optional[List[str]] = None,
664
+ tags: Optional[list[str]] = None,
668
665
  callbacks: Callbacks = None,
669
- input_mapper: Optional[Callable[[Dict], Any]] = None,
670
- metadata: Optional[Dict[str, Any]] = None,
666
+ input_mapper: Optional[Callable[[dict], Any]] = None,
667
+ metadata: Optional[dict[str, Any]] = None,
671
668
  ) -> Union[str, BaseMessage]:
672
669
  """Asynchronously run the language model.
673
670
 
@@ -726,12 +723,12 @@ async def _arun_llm(
726
723
 
727
724
  async def _arun_chain(
728
725
  chain: Union[Chain, Runnable],
729
- inputs: Dict[str, Any],
726
+ inputs: dict[str, Any],
730
727
  callbacks: Callbacks,
731
728
  *,
732
- tags: Optional[List[str]] = None,
733
- input_mapper: Optional[Callable[[Dict], Any]] = None,
734
- metadata: Optional[Dict[str, Any]] = None,
729
+ tags: Optional[list[str]] = None,
730
+ input_mapper: Optional[Callable[[dict], Any]] = None,
731
+ metadata: Optional[dict[str, Any]] = None,
735
732
  ) -> Union[dict, str]:
736
733
  """Run a chain asynchronously on inputs."""
737
734
  inputs_ = inputs if input_mapper is None else input_mapper(inputs)
@@ -761,7 +758,7 @@ async def _arun_llm_or_chain(
761
758
  config: RunnableConfig,
762
759
  *,
763
760
  llm_or_chain_factory: MCF,
764
- input_mapper: Optional[Callable[[Dict], Any]] = None,
761
+ input_mapper: Optional[Callable[[dict], Any]] = None,
765
762
  ) -> Union[dict, str, LLMResult, ChatResult]:
766
763
  """Asynchronously run the Chain or language model.
767
764
 
@@ -815,12 +812,12 @@ async def _arun_llm_or_chain(
815
812
 
816
813
  def _run_llm(
817
814
  llm: BaseLanguageModel,
818
- inputs: Dict[str, Any],
815
+ inputs: dict[str, Any],
819
816
  callbacks: Callbacks,
820
817
  *,
821
- tags: Optional[List[str]] = None,
822
- input_mapper: Optional[Callable[[Dict], Any]] = None,
823
- metadata: Optional[Dict[str, Any]] = None,
818
+ tags: Optional[list[str]] = None,
819
+ input_mapper: Optional[Callable[[dict], Any]] = None,
820
+ metadata: Optional[dict[str, Any]] = None,
824
821
  ) -> Union[str, BaseMessage]:
825
822
  """
826
823
  Run the language model on the example.
@@ -877,13 +874,13 @@ def _run_llm(
877
874
 
878
875
  def _run_chain(
879
876
  chain: Union[Chain, Runnable],
880
- inputs: Dict[str, Any],
877
+ inputs: dict[str, Any],
881
878
  callbacks: Callbacks,
882
879
  *,
883
- tags: Optional[List[str]] = None,
884
- input_mapper: Optional[Callable[[Dict], Any]] = None,
885
- metadata: Optional[Dict[str, Any]] = None,
886
- ) -> Union[Dict, str]:
880
+ tags: Optional[list[str]] = None,
881
+ input_mapper: Optional[Callable[[dict], Any]] = None,
882
+ metadata: Optional[dict[str, Any]] = None,
883
+ ) -> Union[dict, str]:
887
884
  """Run a chain on inputs."""
888
885
  inputs_ = inputs if input_mapper is None else input_mapper(inputs)
889
886
  if (
@@ -912,7 +909,7 @@ def _run_llm_or_chain(
912
909
  config: RunnableConfig,
913
910
  *,
914
911
  llm_or_chain_factory: MCF,
915
- input_mapper: Optional[Callable[[Dict], Any]] = None,
912
+ input_mapper: Optional[Callable[[dict], Any]] = None,
916
913
  ) -> Union[dict, str, LLMResult, ChatResult]:
917
914
  """
918
915
  Run the Chain or language model synchronously.
@@ -968,10 +965,10 @@ def _prepare_eval_run(
968
965
  dataset_name: str,
969
966
  llm_or_chain_factory: MODEL_OR_CHAIN_FACTORY,
970
967
  project_name: str,
971
- project_metadata: Optional[Dict[str, Any]] = None,
972
- tags: Optional[List[str]] = None,
968
+ project_metadata: Optional[dict[str, Any]] = None,
969
+ tags: Optional[list[str]] = None,
973
970
  dataset_version: Optional[Union[str, datetime]] = None,
974
- ) -> Tuple[MCF, TracerSession, Dataset, List[Example]]:
971
+ ) -> tuple[MCF, TracerSession, Dataset, list[Example]]:
975
972
  wrapped_model = _wrap_in_chain_factory(llm_or_chain_factory, dataset_name)
976
973
  dataset = client.read_dataset(dataset_name=dataset_name)
977
974
 
@@ -1027,7 +1024,7 @@ run_on_dataset(
1027
1024
  class _RowResult(TypedDict, total=False):
1028
1025
  """A dictionary of the results for a single example row."""
1029
1026
 
1030
- feedback: Optional[List[EvaluationResult]]
1027
+ feedback: Optional[list[EvaluationResult]]
1031
1028
  execution_time: Optional[float]
1032
1029
  run_id: Optional[str]
1033
1030
 
@@ -1039,14 +1036,14 @@ class _DatasetRunContainer:
1039
1036
  client: Client
1040
1037
  project: TracerSession
1041
1038
  wrapped_model: MCF
1042
- examples: List[Example]
1043
- configs: List[RunnableConfig]
1044
- batch_evaluators: Optional[List[smith_eval_config.BATCH_EVALUATOR_LIKE]] = None
1039
+ examples: list[Example]
1040
+ configs: list[RunnableConfig]
1041
+ batch_evaluators: Optional[list[smith_eval_config.BATCH_EVALUATOR_LIKE]] = None
1045
1042
 
1046
1043
  def _merge_test_outputs(
1047
1044
  self,
1048
1045
  batch_results: list,
1049
- all_eval_results: Dict[str, _RowResult],
1046
+ all_eval_results: dict[str, _RowResult],
1050
1047
  ) -> dict:
1051
1048
  results: dict = {}
1052
1049
  for example, output in zip(self.examples, batch_results):
@@ -1065,7 +1062,7 @@ class _DatasetRunContainer:
1065
1062
  results[str(example.id)]["reference"] = example.outputs
1066
1063
  return results
1067
1064
 
1068
- def _run_batch_evaluators(self, runs: Dict[str, Run]) -> List[dict]:
1065
+ def _run_batch_evaluators(self, runs: dict[str, Run]) -> list[dict]:
1069
1066
  evaluators = self.batch_evaluators
1070
1067
  if not evaluators:
1071
1068
  return []
@@ -1090,7 +1087,7 @@ class _DatasetRunContainer:
1090
1087
  )
1091
1088
  return aggregate_feedback
1092
1089
 
1093
- def _collect_metrics(self) -> Tuple[Dict[str, _RowResult], Dict[str, Run]]:
1090
+ def _collect_metrics(self) -> tuple[dict[str, _RowResult], dict[str, Run]]:
1094
1091
  all_eval_results: dict = {}
1095
1092
  all_runs: dict = {}
1096
1093
  for c in self.configs:
@@ -1117,11 +1114,11 @@ class _DatasetRunContainer:
1117
1114
  }
1118
1115
  )
1119
1116
  all_runs[str(callback.example_id)] = run
1120
- return cast(Dict[str, _RowResult], all_eval_results), all_runs
1117
+ return cast(dict[str, _RowResult], all_eval_results), all_runs
1121
1118
 
1122
1119
  def _collect_test_results(
1123
1120
  self,
1124
- batch_results: List[Union[dict, str, LLMResult, ChatResult]],
1121
+ batch_results: list[Union[dict, str, LLMResult, ChatResult]],
1125
1122
  ) -> TestResult:
1126
1123
  logger.info("Waiting for evaluators to complete.")
1127
1124
  wait_for_all_evaluators()
@@ -1162,10 +1159,10 @@ class _DatasetRunContainer:
1162
1159
  llm_or_chain_factory: MODEL_OR_CHAIN_FACTORY,
1163
1160
  project_name: Optional[str],
1164
1161
  evaluation: Optional[smith_eval.RunEvalConfig] = None,
1165
- tags: Optional[List[str]] = None,
1166
- input_mapper: Optional[Callable[[Dict], Any]] = None,
1162
+ tags: Optional[list[str]] = None,
1163
+ input_mapper: Optional[Callable[[dict], Any]] = None,
1167
1164
  concurrency_level: int = 5,
1168
- project_metadata: Optional[Dict[str, Any]] = None,
1165
+ project_metadata: Optional[dict[str, Any]] = None,
1169
1166
  revision_id: Optional[str] = None,
1170
1167
  dataset_version: Optional[Union[datetime, str]] = None,
1171
1168
  ) -> _DatasetRunContainer:
@@ -1277,11 +1274,11 @@ async def arun_on_dataset(
1277
1274
  dataset_version: Optional[Union[datetime, str]] = None,
1278
1275
  concurrency_level: int = 5,
1279
1276
  project_name: Optional[str] = None,
1280
- project_metadata: Optional[Dict[str, Any]] = None,
1277
+ project_metadata: Optional[dict[str, Any]] = None,
1281
1278
  verbose: bool = False,
1282
1279
  revision_id: Optional[str] = None,
1283
1280
  **kwargs: Any,
1284
- ) -> Dict[str, Any]:
1281
+ ) -> dict[str, Any]:
1285
1282
  input_mapper = kwargs.pop("input_mapper", None)
1286
1283
  if input_mapper:
1287
1284
  warn_deprecated("0.0.305", message=_INPUT_MAPPER_DEP_WARNING, pending=True)
@@ -1342,11 +1339,11 @@ def run_on_dataset(
1342
1339
  dataset_version: Optional[Union[datetime, str]] = None,
1343
1340
  concurrency_level: int = 5,
1344
1341
  project_name: Optional[str] = None,
1345
- project_metadata: Optional[Dict[str, Any]] = None,
1342
+ project_metadata: Optional[dict[str, Any]] = None,
1346
1343
  verbose: bool = False,
1347
1344
  revision_id: Optional[str] = None,
1348
1345
  **kwargs: Any,
1349
- ) -> Dict[str, Any]:
1346
+ ) -> dict[str, Any]:
1350
1347
  input_mapper = kwargs.pop("input_mapper", None)
1351
1348
  if input_mapper:
1352
1349
  warn_deprecated("0.0.305", message=_INPUT_MAPPER_DEP_WARNING, pending=True)
@@ -3,7 +3,7 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  from abc import abstractmethod
6
- from typing import Any, Dict, List, Optional
6
+ from typing import Any, Optional
7
7
 
8
8
  from langchain_core.callbacks.manager import (
9
9
  AsyncCallbackManagerForChainRun,
@@ -21,7 +21,7 @@ from langchain.evaluation.schema import StringEvaluator
21
21
  from langchain.schema import RUN_KEY
22
22
 
23
23
 
24
- def _get_messages_from_run_dict(messages: List[dict]) -> List[BaseMessage]:
24
+ def _get_messages_from_run_dict(messages: list[dict]) -> list[BaseMessage]:
25
25
  if not messages:
26
26
  return []
27
27
  first_message = messages[0]
@@ -35,15 +35,15 @@ class StringRunMapper(Serializable):
35
35
  """Extract items to evaluate from the run object."""
36
36
 
37
37
  @property
38
- def output_keys(self) -> List[str]:
38
+ def output_keys(self) -> list[str]:
39
39
  """The keys to extract from the run."""
40
40
  return ["prediction", "input"]
41
41
 
42
42
  @abstractmethod
43
- def map(self, run: Run) -> Dict[str, str]:
43
+ def map(self, run: Run) -> dict[str, str]:
44
44
  """Maps the Run to a dictionary."""
45
45
 
46
- def __call__(self, run: Run) -> Dict[str, str]:
46
+ def __call__(self, run: Run) -> dict[str, str]:
47
47
  """Maps the Run to a dictionary."""
48
48
  if not run.outputs:
49
49
  raise ValueError(f"Run {run.id} has no outputs to evaluate.")
@@ -53,7 +53,7 @@ class StringRunMapper(Serializable):
53
53
  class LLMStringRunMapper(StringRunMapper):
54
54
  """Extract items to evaluate from the run object."""
55
55
 
56
- def serialize_chat_messages(self, messages: List[Dict]) -> str:
56
+ def serialize_chat_messages(self, messages: list[dict]) -> str:
57
57
  """Extract the input messages from the run."""
58
58
  if isinstance(messages, list) and messages:
59
59
  if isinstance(messages[0], dict):
@@ -66,7 +66,7 @@ class LLMStringRunMapper(StringRunMapper):
66
66
  return get_buffer_string(chat_messages)
67
67
  raise ValueError(f"Could not extract messages to evaluate {messages}")
68
68
 
69
- def serialize_inputs(self, inputs: Dict) -> str:
69
+ def serialize_inputs(self, inputs: dict) -> str:
70
70
  if "prompts" in inputs: # Should we even accept this?
71
71
  input_ = "\n\n".join(inputs["prompts"])
72
72
  elif "prompt" in inputs:
@@ -77,13 +77,13 @@ class LLMStringRunMapper(StringRunMapper):
77
77
  raise ValueError("LLM Run must have either messages or prompts as inputs.")
78
78
  return input_
79
79
 
80
- def serialize_outputs(self, outputs: Dict) -> str:
80
+ def serialize_outputs(self, outputs: dict) -> str:
81
81
  if not outputs.get("generations"):
82
82
  raise ValueError("Cannot evaluate LLM Run without generations.")
83
- generations: List[Dict] = outputs["generations"]
83
+ generations: list[dict] = outputs["generations"]
84
84
  if not generations:
85
85
  raise ValueError("Cannot evaluate LLM run with empty generations.")
86
- first_generation: Dict = generations[0]
86
+ first_generation: dict = generations[0]
87
87
  if isinstance(first_generation, list):
88
88
  # Runs from Tracer have generations as a list of lists of dicts
89
89
  # Whereas Runs from the API have a list of dicts
@@ -94,7 +94,7 @@ class LLMStringRunMapper(StringRunMapper):
94
94
  output_ = first_generation["text"]
95
95
  return output_
96
96
 
97
- def map(self, run: Run) -> Dict[str, str]:
97
+ def map(self, run: Run) -> dict[str, str]:
98
98
  """Maps the Run to a dictionary."""
99
99
  if run.run_type != "llm":
100
100
  raise ValueError("LLM RunMapper only supports LLM runs.")
@@ -135,7 +135,7 @@ class ChainStringRunMapper(StringRunMapper):
135
135
  If not provided, will use the only output key or raise an error
136
136
  if there are multiple."""
137
137
 
138
- def _get_key(self, source: Dict, key: Optional[str], which: str) -> str:
138
+ def _get_key(self, source: dict, key: Optional[str], which: str) -> str:
139
139
  if key is not None:
140
140
  return source[key]
141
141
  elif len(source) == 1:
@@ -146,7 +146,7 @@ class ChainStringRunMapper(StringRunMapper):
146
146
  f"{source}\nPlease manually specify a {which}_key"
147
147
  )
148
148
 
149
- def map(self, run: Run) -> Dict[str, str]:
149
+ def map(self, run: Run) -> dict[str, str]:
150
150
  """Maps the Run to a dictionary."""
151
151
  if not run.outputs:
152
152
  raise ValueError(
@@ -182,7 +182,7 @@ class ChainStringRunMapper(StringRunMapper):
182
182
  class ToolStringRunMapper(StringRunMapper):
183
183
  """Map an input to the tool."""
184
184
 
185
- def map(self, run: Run) -> Dict[str, str]:
185
+ def map(self, run: Run) -> dict[str, str]:
186
186
  if not run.outputs:
187
187
  raise ValueError(f"Run {run.id} has no outputs to evaluate.")
188
188
  return {"input": run.inputs["input"], "prediction": run.outputs["output"]}
@@ -194,16 +194,16 @@ class StringExampleMapper(Serializable):
194
194
  reference_key: Optional[str] = None
195
195
 
196
196
  @property
197
- def output_keys(self) -> List[str]:
197
+ def output_keys(self) -> list[str]:
198
198
  """The keys to extract from the run."""
199
199
  return ["reference"]
200
200
 
201
- def serialize_chat_messages(self, messages: List[Dict]) -> str:
201
+ def serialize_chat_messages(self, messages: list[dict]) -> str:
202
202
  """Extract the input messages from the run."""
203
203
  chat_messages = _get_messages_from_run_dict(messages)
204
204
  return get_buffer_string(chat_messages)
205
205
 
206
- def map(self, example: Example) -> Dict[str, str]:
206
+ def map(self, example: Example) -> dict[str, str]:
207
207
  """Maps the Example, or dataset row to a dictionary."""
208
208
  if not example.outputs:
209
209
  raise ValueError(
@@ -230,7 +230,7 @@ class StringExampleMapper(Serializable):
230
230
  else output
231
231
  }
232
232
 
233
- def __call__(self, example: Example) -> Dict[str, str]:
233
+ def __call__(self, example: Example) -> dict[str, str]:
234
234
  """Maps the Run and Example to a dictionary."""
235
235
  if not example.outputs:
236
236
  raise ValueError(
@@ -239,7 +239,7 @@ class StringExampleMapper(Serializable):
239
239
  return self.map(example)
240
240
 
241
241
 
242
- class StringRunEvaluatorChain(Chain, RunEvaluator): # type: ignore[override, override]
242
+ class StringRunEvaluatorChain(Chain, RunEvaluator):
243
243
  """Evaluate Run and optional examples."""
244
244
 
245
245
  run_mapper: StringRunMapper
@@ -253,14 +253,14 @@ class StringRunEvaluatorChain(Chain, RunEvaluator): # type: ignore[override, ov
253
253
  """The evaluation chain."""
254
254
 
255
255
  @property
256
- def input_keys(self) -> List[str]:
256
+ def input_keys(self) -> list[str]:
257
257
  return ["run", "example"]
258
258
 
259
259
  @property
260
- def output_keys(self) -> List[str]:
260
+ def output_keys(self) -> list[str]:
261
261
  return ["feedback"]
262
262
 
263
- def _prepare_input(self, inputs: Dict[str, Any]) -> Dict[str, str]:
263
+ def _prepare_input(self, inputs: dict[str, Any]) -> dict[str, str]:
264
264
  run: Run = inputs["run"]
265
265
  example: Optional[Example] = inputs.get("example")
266
266
  evaluate_strings_inputs = self.run_mapper(run)
@@ -277,7 +277,7 @@ class StringRunEvaluatorChain(Chain, RunEvaluator): # type: ignore[override, ov
277
277
  )
278
278
  return evaluate_strings_inputs
279
279
 
280
- def _prepare_output(self, output: Dict[str, Any]) -> Dict[str, Any]:
280
+ def _prepare_output(self, output: dict[str, Any]) -> dict[str, Any]:
281
281
  evaluation_result = EvaluationResult(
282
282
  key=self.name, comment=output.get("reasoning"), **output
283
283
  )
@@ -288,9 +288,9 @@ class StringRunEvaluatorChain(Chain, RunEvaluator): # type: ignore[override, ov
288
288
 
289
289
  def _call(
290
290
  self,
291
- inputs: Dict[str, str],
291
+ inputs: dict[str, str],
292
292
  run_manager: Optional[CallbackManagerForChainRun] = None,
293
- ) -> Dict[str, Any]:
293
+ ) -> dict[str, Any]:
294
294
  """Call the evaluation chain."""
295
295
  evaluate_strings_inputs = self._prepare_input(inputs)
296
296
  _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
@@ -304,9 +304,9 @@ class StringRunEvaluatorChain(Chain, RunEvaluator): # type: ignore[override, ov
304
304
 
305
305
  async def _acall(
306
306
  self,
307
- inputs: Dict[str, str],
307
+ inputs: dict[str, str],
308
308
  run_manager: Optional[AsyncCallbackManagerForChainRun] = None,
309
- ) -> Dict[str, Any]:
309
+ ) -> dict[str, Any]:
310
310
  """Call the evaluation chain."""
311
311
  evaluate_strings_inputs = self._prepare_input(inputs)
312
312
  _run_manager = run_manager or AsyncCallbackManagerForChainRun.get_noop_manager()
@@ -318,7 +318,7 @@ class StringRunEvaluatorChain(Chain, RunEvaluator): # type: ignore[override, ov
318
318
  )
319
319
  return self._prepare_output(chain_output)
320
320
 
321
- def _prepare_evaluator_output(self, output: Dict[str, Any]) -> EvaluationResult:
321
+ def _prepare_evaluator_output(self, output: dict[str, Any]) -> EvaluationResult:
322
322
  feedback: EvaluationResult = output["feedback"]
323
323
  if RUN_KEY not in feedback.evaluator_info:
324
324
  feedback.evaluator_info[RUN_KEY] = output[RUN_KEY]
@@ -362,7 +362,7 @@ class StringRunEvaluatorChain(Chain, RunEvaluator): # type: ignore[override, ov
362
362
  input_key: Optional[str] = None,
363
363
  prediction_key: Optional[str] = None,
364
364
  reference_key: Optional[str] = None,
365
- tags: Optional[List[str]] = None,
365
+ tags: Optional[list[str]] = None,
366
366
  ) -> StringRunEvaluatorChain:
367
367
  """
368
368
  Create a StringRunEvaluatorChain from an evaluator and the run and dataset types.
@@ -1,12 +1,8 @@
1
+ from collections.abc import AsyncIterator, Iterator, Sequence
1
2
  from typing import (
2
3
  Any,
3
- AsyncIterator,
4
4
  Callable,
5
- Iterator,
6
- List,
7
5
  Optional,
8
- Sequence,
9
- Tuple,
10
6
  TypeVar,
11
7
  Union,
12
8
  )
@@ -65,25 +61,25 @@ class EncoderBackedStore(BaseStore[K, V]):
65
61
  self.value_serializer = value_serializer
66
62
  self.value_deserializer = value_deserializer
67
63
 
68
- def mget(self, keys: Sequence[K]) -> List[Optional[V]]:
64
+ def mget(self, keys: Sequence[K]) -> list[Optional[V]]:
69
65
  """Get the values associated with the given keys."""
70
- encoded_keys: List[str] = [self.key_encoder(key) for key in keys]
66
+ encoded_keys: list[str] = [self.key_encoder(key) for key in keys]
71
67
  values = self.store.mget(encoded_keys)
72
68
  return [
73
69
  self.value_deserializer(value) if value is not None else value
74
70
  for value in values
75
71
  ]
76
72
 
77
- async def amget(self, keys: Sequence[K]) -> List[Optional[V]]:
73
+ async def amget(self, keys: Sequence[K]) -> list[Optional[V]]:
78
74
  """Get the values associated with the given keys."""
79
- encoded_keys: List[str] = [self.key_encoder(key) for key in keys]
75
+ encoded_keys: list[str] = [self.key_encoder(key) for key in keys]
80
76
  values = await self.store.amget(encoded_keys)
81
77
  return [
82
78
  self.value_deserializer(value) if value is not None else value
83
79
  for value in values
84
80
  ]
85
81
 
86
- def mset(self, key_value_pairs: Sequence[Tuple[K, V]]) -> None:
82
+ def mset(self, key_value_pairs: Sequence[tuple[K, V]]) -> None:
87
83
  """Set the values for the given keys."""
88
84
  encoded_pairs = [
89
85
  (self.key_encoder(key), self.value_serializer(value))
@@ -91,7 +87,7 @@ class EncoderBackedStore(BaseStore[K, V]):
91
87
  ]
92
88
  self.store.mset(encoded_pairs)
93
89
 
94
- async def amset(self, key_value_pairs: Sequence[Tuple[K, V]]) -> None:
90
+ async def amset(self, key_value_pairs: Sequence[tuple[K, V]]) -> None:
95
91
  """Set the values for the given keys."""
96
92
  encoded_pairs = [
97
93
  (self.key_encoder(key), self.value_serializer(value))