langchain 0.3.23__py3-none-any.whl → 0.3.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of langchain might be problematic. Click here for more details.

Files changed (170) hide show
  1. langchain/_api/module_import.py +3 -3
  2. langchain/agents/agent.py +104 -109
  3. langchain/agents/agent_iterator.py +11 -15
  4. langchain/agents/agent_toolkits/conversational_retrieval/openai_functions.py +2 -2
  5. langchain/agents/agent_toolkits/vectorstore/base.py +3 -3
  6. langchain/agents/agent_toolkits/vectorstore/toolkit.py +4 -6
  7. langchain/agents/chat/base.py +7 -6
  8. langchain/agents/chat/output_parser.py +2 -1
  9. langchain/agents/conversational/base.py +5 -4
  10. langchain/agents/conversational_chat/base.py +9 -8
  11. langchain/agents/format_scratchpad/log.py +1 -3
  12. langchain/agents/format_scratchpad/log_to_messages.py +3 -5
  13. langchain/agents/format_scratchpad/openai_functions.py +4 -4
  14. langchain/agents/format_scratchpad/tools.py +3 -3
  15. langchain/agents/format_scratchpad/xml.py +1 -3
  16. langchain/agents/initialize.py +2 -1
  17. langchain/agents/json_chat/base.py +3 -2
  18. langchain/agents/loading.py +5 -5
  19. langchain/agents/mrkl/base.py +6 -5
  20. langchain/agents/openai_assistant/base.py +17 -17
  21. langchain/agents/openai_functions_agent/agent_token_buffer_memory.py +6 -6
  22. langchain/agents/openai_functions_agent/base.py +13 -12
  23. langchain/agents/openai_functions_multi_agent/base.py +15 -14
  24. langchain/agents/openai_tools/base.py +2 -1
  25. langchain/agents/output_parsers/openai_functions.py +2 -2
  26. langchain/agents/output_parsers/openai_tools.py +6 -6
  27. langchain/agents/output_parsers/react_json_single_input.py +2 -1
  28. langchain/agents/output_parsers/self_ask.py +2 -1
  29. langchain/agents/output_parsers/tools.py +7 -7
  30. langchain/agents/react/agent.py +3 -2
  31. langchain/agents/react/base.py +4 -3
  32. langchain/agents/schema.py +3 -3
  33. langchain/agents/self_ask_with_search/base.py +2 -1
  34. langchain/agents/structured_chat/base.py +9 -8
  35. langchain/agents/structured_chat/output_parser.py +2 -1
  36. langchain/agents/tool_calling_agent/base.py +3 -2
  37. langchain/agents/tools.py +4 -4
  38. langchain/agents/types.py +3 -3
  39. langchain/agents/utils.py +1 -1
  40. langchain/agents/xml/base.py +7 -6
  41. langchain/callbacks/streaming_aiter.py +3 -2
  42. langchain/callbacks/streaming_aiter_final_only.py +3 -3
  43. langchain/callbacks/streaming_stdout_final_only.py +3 -3
  44. langchain/chains/api/base.py +11 -12
  45. langchain/chains/base.py +47 -50
  46. langchain/chains/combine_documents/base.py +23 -23
  47. langchain/chains/combine_documents/map_reduce.py +12 -12
  48. langchain/chains/combine_documents/map_rerank.py +16 -15
  49. langchain/chains/combine_documents/reduce.py +17 -17
  50. langchain/chains/combine_documents/refine.py +12 -12
  51. langchain/chains/combine_documents/stuff.py +10 -10
  52. langchain/chains/constitutional_ai/base.py +9 -9
  53. langchain/chains/conversation/base.py +2 -4
  54. langchain/chains/conversational_retrieval/base.py +30 -30
  55. langchain/chains/elasticsearch_database/base.py +13 -13
  56. langchain/chains/example_generator.py +1 -3
  57. langchain/chains/flare/base.py +13 -12
  58. langchain/chains/flare/prompts.py +2 -4
  59. langchain/chains/hyde/base.py +8 -8
  60. langchain/chains/llm.py +31 -30
  61. langchain/chains/llm_checker/base.py +6 -6
  62. langchain/chains/llm_math/base.py +10 -10
  63. langchain/chains/llm_summarization_checker/base.py +6 -6
  64. langchain/chains/loading.py +12 -14
  65. langchain/chains/mapreduce.py +7 -6
  66. langchain/chains/moderation.py +8 -8
  67. langchain/chains/natbot/base.py +6 -6
  68. langchain/chains/openai_functions/base.py +8 -10
  69. langchain/chains/openai_functions/citation_fuzzy_match.py +4 -4
  70. langchain/chains/openai_functions/extraction.py +3 -3
  71. langchain/chains/openai_functions/openapi.py +12 -12
  72. langchain/chains/openai_functions/qa_with_structure.py +4 -4
  73. langchain/chains/openai_functions/utils.py +2 -2
  74. langchain/chains/openai_tools/extraction.py +2 -2
  75. langchain/chains/prompt_selector.py +3 -3
  76. langchain/chains/qa_generation/base.py +5 -5
  77. langchain/chains/qa_with_sources/base.py +21 -21
  78. langchain/chains/qa_with_sources/loading.py +2 -1
  79. langchain/chains/qa_with_sources/retrieval.py +6 -6
  80. langchain/chains/qa_with_sources/vector_db.py +8 -8
  81. langchain/chains/query_constructor/base.py +4 -3
  82. langchain/chains/query_constructor/parser.py +5 -4
  83. langchain/chains/question_answering/chain.py +3 -2
  84. langchain/chains/retrieval.py +2 -2
  85. langchain/chains/retrieval_qa/base.py +16 -16
  86. langchain/chains/router/base.py +12 -11
  87. langchain/chains/router/embedding_router.py +12 -11
  88. langchain/chains/router/llm_router.py +12 -12
  89. langchain/chains/router/multi_prompt.py +3 -3
  90. langchain/chains/router/multi_retrieval_qa.py +5 -4
  91. langchain/chains/sequential.py +18 -18
  92. langchain/chains/sql_database/query.py +21 -5
  93. langchain/chains/structured_output/base.py +14 -13
  94. langchain/chains/summarize/chain.py +4 -3
  95. langchain/chains/transform.py +12 -11
  96. langchain/chat_models/base.py +27 -31
  97. langchain/embeddings/__init__.py +1 -1
  98. langchain/embeddings/base.py +4 -6
  99. langchain/embeddings/cache.py +19 -18
  100. langchain/evaluation/agents/trajectory_eval_chain.py +16 -19
  101. langchain/evaluation/comparison/eval_chain.py +10 -10
  102. langchain/evaluation/criteria/eval_chain.py +11 -10
  103. langchain/evaluation/embedding_distance/base.py +21 -21
  104. langchain/evaluation/exact_match/base.py +3 -3
  105. langchain/evaluation/loading.py +7 -8
  106. langchain/evaluation/qa/eval_chain.py +7 -6
  107. langchain/evaluation/regex_match/base.py +3 -3
  108. langchain/evaluation/schema.py +6 -5
  109. langchain/evaluation/scoring/eval_chain.py +9 -9
  110. langchain/evaluation/string_distance/base.py +23 -23
  111. langchain/hub.py +2 -1
  112. langchain/indexes/_sql_record_manager.py +8 -7
  113. langchain/indexes/vectorstore.py +11 -11
  114. langchain/llms/__init__.py +3 -3
  115. langchain/memory/buffer.py +13 -13
  116. langchain/memory/buffer_window.py +5 -5
  117. langchain/memory/chat_memory.py +5 -5
  118. langchain/memory/combined.py +10 -10
  119. langchain/memory/entity.py +8 -7
  120. langchain/memory/readonly.py +4 -4
  121. langchain/memory/simple.py +5 -5
  122. langchain/memory/summary.py +8 -8
  123. langchain/memory/summary_buffer.py +11 -11
  124. langchain/memory/token_buffer.py +5 -5
  125. langchain/memory/utils.py +2 -2
  126. langchain/memory/vectorstore.py +15 -14
  127. langchain/memory/vectorstore_token_buffer_memory.py +7 -7
  128. langchain/model_laboratory.py +4 -3
  129. langchain/output_parsers/combining.py +5 -5
  130. langchain/output_parsers/datetime.py +1 -2
  131. langchain/output_parsers/enum.py +4 -5
  132. langchain/output_parsers/pandas_dataframe.py +5 -5
  133. langchain/output_parsers/regex.py +4 -4
  134. langchain/output_parsers/regex_dict.py +4 -4
  135. langchain/output_parsers/retry.py +2 -2
  136. langchain/output_parsers/structured.py +5 -5
  137. langchain/output_parsers/yaml.py +3 -3
  138. langchain/pydantic_v1/__init__.py +1 -6
  139. langchain/pydantic_v1/dataclasses.py +1 -5
  140. langchain/pydantic_v1/main.py +1 -5
  141. langchain/retrievers/contextual_compression.py +3 -3
  142. langchain/retrievers/document_compressors/base.py +3 -2
  143. langchain/retrievers/document_compressors/chain_extract.py +4 -3
  144. langchain/retrievers/document_compressors/chain_filter.py +3 -2
  145. langchain/retrievers/document_compressors/cohere_rerank.py +4 -3
  146. langchain/retrievers/document_compressors/cross_encoder.py +1 -2
  147. langchain/retrievers/document_compressors/cross_encoder_rerank.py +2 -1
  148. langchain/retrievers/document_compressors/embeddings_filter.py +3 -2
  149. langchain/retrievers/document_compressors/listwise_rerank.py +6 -5
  150. langchain/retrievers/ensemble.py +15 -19
  151. langchain/retrievers/merger_retriever.py +7 -12
  152. langchain/retrievers/multi_query.py +14 -13
  153. langchain/retrievers/multi_vector.py +4 -4
  154. langchain/retrievers/parent_document_retriever.py +9 -8
  155. langchain/retrievers/re_phraser.py +2 -3
  156. langchain/retrievers/self_query/base.py +13 -12
  157. langchain/retrievers/time_weighted_retriever.py +14 -14
  158. langchain/runnables/openai_functions.py +4 -3
  159. langchain/smith/evaluation/config.py +7 -6
  160. langchain/smith/evaluation/progress.py +3 -2
  161. langchain/smith/evaluation/runner_utils.py +66 -69
  162. langchain/smith/evaluation/string_run_evaluator.py +38 -31
  163. langchain/storage/encoder_backed.py +7 -11
  164. langchain/storage/file_system.py +5 -4
  165. {langchain-0.3.23.dist-info → langchain-0.3.25.dist-info}/METADATA +3 -3
  166. {langchain-0.3.23.dist-info → langchain-0.3.25.dist-info}/RECORD +169 -169
  167. {langchain-0.3.23.dist-info → langchain-0.3.25.dist-info}/WHEEL +1 -1
  168. langchain-0.3.25.dist-info/entry_points.txt +4 -0
  169. langchain-0.3.23.dist-info/entry_points.txt +0 -5
  170. {langchain-0.3.23.dist-info → langchain-0.3.25.dist-info}/licenses/LICENSE +0 -0
@@ -1,6 +1,7 @@
1
1
  """Configuration for run evaluators."""
2
2
 
3
- from typing import Any, Callable, Dict, List, Optional, Sequence, Union
3
+ from collections.abc import Sequence
4
+ from typing import Any, Callable, Optional, Union
4
5
 
5
6
  from langchain_core.embeddings import Embeddings
6
7
  from langchain_core.language_models import BaseLanguageModel
@@ -45,7 +46,7 @@ class EvalConfig(BaseModel):
45
46
 
46
47
  evaluator_type: EvaluatorType
47
48
 
48
- def get_kwargs(self) -> Dict[str, Any]:
49
+ def get_kwargs(self) -> dict[str, Any]:
49
50
  """Get the keyword arguments for the load_evaluator call.
50
51
 
51
52
  Returns
@@ -78,7 +79,7 @@ class SingleKeyEvalConfig(EvalConfig):
78
79
  """The key from the traced run's inputs dictionary to use to represent the
79
80
  input. If not provided, it will be inferred automatically."""
80
81
 
81
- def get_kwargs(self) -> Dict[str, Any]:
82
+ def get_kwargs(self) -> dict[str, Any]:
82
83
  kwargs = super().get_kwargs()
83
84
  # Filer out the keys that are not needed for the evaluator.
84
85
  for key in ["reference_key", "prediction_key", "input_key"]:
@@ -121,7 +122,7 @@ class RunEvalConfig(BaseModel):
121
122
  The language model to pass to any evaluators that use a language model.
122
123
  """ # noqa: E501
123
124
 
124
- evaluators: List[
125
+ evaluators: list[
125
126
  Union[
126
127
  SINGLE_EVAL_CONFIG_TYPE,
127
128
  CUSTOM_EVALUATOR_TYPE,
@@ -134,9 +135,9 @@ class RunEvalConfig(BaseModel):
134
135
  given evaluator
135
136
  (e.g.,
136
137
  :class:`RunEvalConfig.QA <langchain.smith.evaluation.config.RunEvalConfig.QA>`)."""
137
- custom_evaluators: Optional[List[CUSTOM_EVALUATOR_TYPE]] = None
138
+ custom_evaluators: Optional[list[CUSTOM_EVALUATOR_TYPE]] = None
138
139
  """Custom evaluators to apply to the dataset run."""
139
- batch_evaluators: Optional[List[BATCH_EVALUATOR_LIKE]] = None
140
+ batch_evaluators: Optional[list[BATCH_EVALUATOR_LIKE]] = None
140
141
  """Evaluators that run on an aggregate/batch level.
141
142
 
142
143
  These generate 1 or more metrics that are assigned to the full test run.
@@ -1,7 +1,8 @@
1
1
  """A simple progress bar for the console."""
2
2
 
3
3
  import threading
4
- from typing import Any, Dict, Optional, Sequence
4
+ from collections.abc import Sequence
5
+ from typing import Any, Optional
5
6
  from uuid import UUID
6
7
 
7
8
  from langchain_core.callbacks import base as base_callbacks
@@ -51,7 +52,7 @@ class ProgressBarCallback(base_callbacks.BaseCallbackHandler):
51
52
 
52
53
  def on_chain_end(
53
54
  self,
54
- outputs: Dict[str, Any],
55
+ outputs: dict[str, Any],
55
56
  *,
56
57
  run_id: UUID,
57
58
  parent_run_id: Optional[UUID] = None,
@@ -13,10 +13,7 @@ from typing import (
13
13
  TYPE_CHECKING,
14
14
  Any,
15
15
  Callable,
16
- Dict,
17
- List,
18
16
  Optional,
19
- Tuple,
20
17
  Union,
21
18
  cast,
22
19
  )
@@ -229,7 +226,7 @@ def _wrap_in_chain_factory(
229
226
  return llm_or_chain_factory
230
227
 
231
228
 
232
- def _get_prompt(inputs: Dict[str, Any]) -> str:
229
+ def _get_prompt(inputs: dict[str, Any]) -> str:
233
230
  """Get prompt from inputs.
234
231
 
235
232
  Args:
@@ -286,10 +283,10 @@ class ChatModelInput(TypedDict):
286
283
  messages: List of chat messages.
287
284
  """
288
285
 
289
- messages: List[BaseMessage]
286
+ messages: list[BaseMessage]
290
287
 
291
288
 
292
- def _get_messages(inputs: Dict[str, Any]) -> dict:
289
+ def _get_messages(inputs: dict[str, Any]) -> dict:
293
290
  """Get Chat Messages from inputs.
294
291
 
295
292
  Args:
@@ -331,10 +328,10 @@ def _get_messages(inputs: Dict[str, Any]) -> dict:
331
328
  ## Shared data validation utilities
332
329
  def _validate_example_inputs_for_language_model(
333
330
  first_example: Example,
334
- input_mapper: Optional[Callable[[Dict], Any]],
331
+ input_mapper: Optional[Callable[[dict], Any]],
335
332
  ) -> None:
336
333
  if input_mapper:
337
- prompt_input = input_mapper(first_example.inputs)
334
+ prompt_input = input_mapper(first_example.inputs or {})
338
335
  if not isinstance(prompt_input, str) and not (
339
336
  isinstance(prompt_input, list)
340
337
  and all(isinstance(msg, BaseMessage) for msg in prompt_input)
@@ -347,10 +344,10 @@ def _validate_example_inputs_for_language_model(
347
344
  )
348
345
  else:
349
346
  try:
350
- _get_prompt(first_example.inputs)
347
+ _get_prompt(first_example.inputs or {})
351
348
  except InputFormatError:
352
349
  try:
353
- _get_messages(first_example.inputs)
350
+ _get_messages(first_example.inputs or {})
354
351
  except InputFormatError:
355
352
  raise InputFormatError(
356
353
  "Example inputs do not match language model input format. "
@@ -365,11 +362,11 @@ def _validate_example_inputs_for_language_model(
365
362
  def _validate_example_inputs_for_chain(
366
363
  first_example: Example,
367
364
  chain: Chain,
368
- input_mapper: Optional[Callable[[Dict], Any]],
365
+ input_mapper: Optional[Callable[[dict], Any]],
369
366
  ) -> None:
370
367
  """Validate that the example inputs match the chain input keys."""
371
368
  if input_mapper:
372
- first_inputs = input_mapper(first_example.inputs)
369
+ first_inputs = input_mapper(first_example.inputs or {})
373
370
  missing_keys = set(chain.input_keys).difference(first_inputs)
374
371
  if not isinstance(first_inputs, dict):
375
372
  raise InputFormatError(
@@ -402,7 +399,7 @@ def _validate_example_inputs_for_chain(
402
399
  def _validate_example_inputs(
403
400
  example: Example,
404
401
  llm_or_chain_factory: MCF,
405
- input_mapper: Optional[Callable[[Dict], Any]],
402
+ input_mapper: Optional[Callable[[dict], Any]],
406
403
  ) -> None:
407
404
  """Validate that the example inputs are valid for the model."""
408
405
  if isinstance(llm_or_chain_factory, BaseLanguageModel):
@@ -421,10 +418,10 @@ def _validate_example_inputs(
421
418
 
422
419
  def _setup_evaluation(
423
420
  llm_or_chain_factory: MCF,
424
- examples: List[Example],
421
+ examples: list[Example],
425
422
  evaluation: Optional[smith_eval.RunEvalConfig],
426
423
  data_type: DataType,
427
- ) -> Optional[List[RunEvaluator]]:
424
+ ) -> Optional[list[RunEvaluator]]:
428
425
  """Configure the evaluators to run on the results of the chain."""
429
426
  if evaluation:
430
427
  if isinstance(llm_or_chain_factory, BaseLanguageModel):
@@ -451,7 +448,7 @@ def _setup_evaluation(
451
448
 
452
449
  def _determine_input_key(
453
450
  config: smith_eval.RunEvalConfig,
454
- run_inputs: Optional[List[str]],
451
+ run_inputs: Optional[list[str]],
455
452
  ) -> Optional[str]:
456
453
  input_key = None
457
454
  if config.input_key:
@@ -475,7 +472,7 @@ def _determine_input_key(
475
472
 
476
473
  def _determine_prediction_key(
477
474
  config: smith_eval.RunEvalConfig,
478
- run_outputs: Optional[List[str]],
475
+ run_outputs: Optional[list[str]],
479
476
  ) -> Optional[str]:
480
477
  prediction_key = None
481
478
  if config.prediction_key:
@@ -498,7 +495,7 @@ def _determine_prediction_key(
498
495
 
499
496
  def _determine_reference_key(
500
497
  config: smith_eval.RunEvalConfig,
501
- example_outputs: Optional[List[str]],
498
+ example_outputs: Optional[list[str]],
502
499
  ) -> Optional[str]:
503
500
  if config.reference_key:
504
501
  reference_key = config.reference_key
@@ -522,7 +519,7 @@ def _construct_run_evaluator(
522
519
  eval_llm: Optional[BaseLanguageModel],
523
520
  run_type: str,
524
521
  data_type: DataType,
525
- example_outputs: Optional[List[str]],
522
+ example_outputs: Optional[list[str]],
526
523
  reference_key: Optional[str],
527
524
  input_key: Optional[str],
528
525
  prediction_key: Optional[str],
@@ -583,10 +580,10 @@ def _construct_run_evaluator(
583
580
 
584
581
  def _get_keys(
585
582
  config: smith_eval.RunEvalConfig,
586
- run_inputs: Optional[List[str]],
587
- run_outputs: Optional[List[str]],
588
- example_outputs: Optional[List[str]],
589
- ) -> Tuple[Optional[str], Optional[str], Optional[str]]:
583
+ run_inputs: Optional[list[str]],
584
+ run_outputs: Optional[list[str]],
585
+ example_outputs: Optional[list[str]],
586
+ ) -> tuple[Optional[str], Optional[str], Optional[str]]:
590
587
  input_key = _determine_input_key(config, run_inputs)
591
588
  prediction_key = _determine_prediction_key(config, run_outputs)
592
589
  reference_key = _determine_reference_key(config, example_outputs)
@@ -597,10 +594,10 @@ def _load_run_evaluators(
597
594
  config: smith_eval.RunEvalConfig,
598
595
  run_type: str,
599
596
  data_type: DataType,
600
- example_outputs: Optional[List[str]],
601
- run_inputs: Optional[List[str]],
602
- run_outputs: Optional[List[str]],
603
- ) -> List[RunEvaluator]:
597
+ example_outputs: Optional[list[str]],
598
+ run_inputs: Optional[list[str]],
599
+ run_outputs: Optional[list[str]],
600
+ ) -> list[RunEvaluator]:
604
601
  """
605
602
  Load run evaluators from a configuration.
606
603
 
@@ -662,12 +659,12 @@ def _load_run_evaluators(
662
659
 
663
660
  async def _arun_llm(
664
661
  llm: BaseLanguageModel,
665
- inputs: Dict[str, Any],
662
+ inputs: dict[str, Any],
666
663
  *,
667
- tags: Optional[List[str]] = None,
664
+ tags: Optional[list[str]] = None,
668
665
  callbacks: Callbacks = None,
669
- input_mapper: Optional[Callable[[Dict], Any]] = None,
670
- metadata: Optional[Dict[str, Any]] = None,
666
+ input_mapper: Optional[Callable[[dict], Any]] = None,
667
+ metadata: Optional[dict[str, Any]] = None,
671
668
  ) -> Union[str, BaseMessage]:
672
669
  """Asynchronously run the language model.
673
670
 
@@ -726,12 +723,12 @@ async def _arun_llm(
726
723
 
727
724
  async def _arun_chain(
728
725
  chain: Union[Chain, Runnable],
729
- inputs: Dict[str, Any],
726
+ inputs: dict[str, Any],
730
727
  callbacks: Callbacks,
731
728
  *,
732
- tags: Optional[List[str]] = None,
733
- input_mapper: Optional[Callable[[Dict], Any]] = None,
734
- metadata: Optional[Dict[str, Any]] = None,
729
+ tags: Optional[list[str]] = None,
730
+ input_mapper: Optional[Callable[[dict], Any]] = None,
731
+ metadata: Optional[dict[str, Any]] = None,
735
732
  ) -> Union[dict, str]:
736
733
  """Run a chain asynchronously on inputs."""
737
734
  inputs_ = inputs if input_mapper is None else input_mapper(inputs)
@@ -761,7 +758,7 @@ async def _arun_llm_or_chain(
761
758
  config: RunnableConfig,
762
759
  *,
763
760
  llm_or_chain_factory: MCF,
764
- input_mapper: Optional[Callable[[Dict], Any]] = None,
761
+ input_mapper: Optional[Callable[[dict], Any]] = None,
765
762
  ) -> Union[dict, str, LLMResult, ChatResult]:
766
763
  """Asynchronously run the Chain or language model.
767
764
 
@@ -783,7 +780,7 @@ async def _arun_llm_or_chain(
783
780
  if isinstance(llm_or_chain_factory, BaseLanguageModel):
784
781
  output: Any = await _arun_llm(
785
782
  llm_or_chain_factory,
786
- example.inputs,
783
+ example.inputs or {},
787
784
  tags=config["tags"],
788
785
  callbacks=config["callbacks"],
789
786
  input_mapper=input_mapper,
@@ -793,7 +790,7 @@ async def _arun_llm_or_chain(
793
790
  chain = llm_or_chain_factory()
794
791
  output = await _arun_chain(
795
792
  chain,
796
- example.inputs,
793
+ example.inputs or {},
797
794
  tags=config["tags"],
798
795
  callbacks=config["callbacks"],
799
796
  input_mapper=input_mapper,
@@ -815,12 +812,12 @@ async def _arun_llm_or_chain(
815
812
 
816
813
  def _run_llm(
817
814
  llm: BaseLanguageModel,
818
- inputs: Dict[str, Any],
815
+ inputs: dict[str, Any],
819
816
  callbacks: Callbacks,
820
817
  *,
821
- tags: Optional[List[str]] = None,
822
- input_mapper: Optional[Callable[[Dict], Any]] = None,
823
- metadata: Optional[Dict[str, Any]] = None,
818
+ tags: Optional[list[str]] = None,
819
+ input_mapper: Optional[Callable[[dict], Any]] = None,
820
+ metadata: Optional[dict[str, Any]] = None,
824
821
  ) -> Union[str, BaseMessage]:
825
822
  """
826
823
  Run the language model on the example.
@@ -877,13 +874,13 @@ def _run_llm(
877
874
 
878
875
  def _run_chain(
879
876
  chain: Union[Chain, Runnable],
880
- inputs: Dict[str, Any],
877
+ inputs: dict[str, Any],
881
878
  callbacks: Callbacks,
882
879
  *,
883
- tags: Optional[List[str]] = None,
884
- input_mapper: Optional[Callable[[Dict], Any]] = None,
885
- metadata: Optional[Dict[str, Any]] = None,
886
- ) -> Union[Dict, str]:
880
+ tags: Optional[list[str]] = None,
881
+ input_mapper: Optional[Callable[[dict], Any]] = None,
882
+ metadata: Optional[dict[str, Any]] = None,
883
+ ) -> Union[dict, str]:
887
884
  """Run a chain on inputs."""
888
885
  inputs_ = inputs if input_mapper is None else input_mapper(inputs)
889
886
  if (
@@ -912,7 +909,7 @@ def _run_llm_or_chain(
912
909
  config: RunnableConfig,
913
910
  *,
914
911
  llm_or_chain_factory: MCF,
915
- input_mapper: Optional[Callable[[Dict], Any]] = None,
912
+ input_mapper: Optional[Callable[[dict], Any]] = None,
916
913
  ) -> Union[dict, str, LLMResult, ChatResult]:
917
914
  """
918
915
  Run the Chain or language model synchronously.
@@ -935,7 +932,7 @@ def _run_llm_or_chain(
935
932
  if isinstance(llm_or_chain_factory, BaseLanguageModel):
936
933
  output: Any = _run_llm(
937
934
  llm_or_chain_factory,
938
- example.inputs,
935
+ example.inputs or {},
939
936
  config["callbacks"],
940
937
  tags=config["tags"],
941
938
  input_mapper=input_mapper,
@@ -945,7 +942,7 @@ def _run_llm_or_chain(
945
942
  chain = llm_or_chain_factory()
946
943
  output = _run_chain(
947
944
  chain,
948
- example.inputs,
945
+ example.inputs or {},
949
946
  config["callbacks"],
950
947
  tags=config["tags"],
951
948
  input_mapper=input_mapper,
@@ -968,10 +965,10 @@ def _prepare_eval_run(
968
965
  dataset_name: str,
969
966
  llm_or_chain_factory: MODEL_OR_CHAIN_FACTORY,
970
967
  project_name: str,
971
- project_metadata: Optional[Dict[str, Any]] = None,
972
- tags: Optional[List[str]] = None,
968
+ project_metadata: Optional[dict[str, Any]] = None,
969
+ tags: Optional[list[str]] = None,
973
970
  dataset_version: Optional[Union[str, datetime]] = None,
974
- ) -> Tuple[MCF, TracerSession, Dataset, List[Example]]:
971
+ ) -> tuple[MCF, TracerSession, Dataset, list[Example]]:
975
972
  wrapped_model = _wrap_in_chain_factory(llm_or_chain_factory, dataset_name)
976
973
  dataset = client.read_dataset(dataset_name=dataset_name)
977
974
 
@@ -1027,7 +1024,7 @@ run_on_dataset(
1027
1024
  class _RowResult(TypedDict, total=False):
1028
1025
  """A dictionary of the results for a single example row."""
1029
1026
 
1030
- feedback: Optional[List[EvaluationResult]]
1027
+ feedback: Optional[list[EvaluationResult]]
1031
1028
  execution_time: Optional[float]
1032
1029
  run_id: Optional[str]
1033
1030
 
@@ -1039,14 +1036,14 @@ class _DatasetRunContainer:
1039
1036
  client: Client
1040
1037
  project: TracerSession
1041
1038
  wrapped_model: MCF
1042
- examples: List[Example]
1043
- configs: List[RunnableConfig]
1044
- batch_evaluators: Optional[List[smith_eval_config.BATCH_EVALUATOR_LIKE]] = None
1039
+ examples: list[Example]
1040
+ configs: list[RunnableConfig]
1041
+ batch_evaluators: Optional[list[smith_eval_config.BATCH_EVALUATOR_LIKE]] = None
1045
1042
 
1046
1043
  def _merge_test_outputs(
1047
1044
  self,
1048
1045
  batch_results: list,
1049
- all_eval_results: Dict[str, _RowResult],
1046
+ all_eval_results: dict[str, _RowResult],
1050
1047
  ) -> dict:
1051
1048
  results: dict = {}
1052
1049
  for example, output in zip(self.examples, batch_results):
@@ -1065,7 +1062,7 @@ class _DatasetRunContainer:
1065
1062
  results[str(example.id)]["reference"] = example.outputs
1066
1063
  return results
1067
1064
 
1068
- def _run_batch_evaluators(self, runs: Dict[str, Run]) -> List[dict]:
1065
+ def _run_batch_evaluators(self, runs: dict[str, Run]) -> list[dict]:
1069
1066
  evaluators = self.batch_evaluators
1070
1067
  if not evaluators:
1071
1068
  return []
@@ -1090,7 +1087,7 @@ class _DatasetRunContainer:
1090
1087
  )
1091
1088
  return aggregate_feedback
1092
1089
 
1093
- def _collect_metrics(self) -> Tuple[Dict[str, _RowResult], Dict[str, Run]]:
1090
+ def _collect_metrics(self) -> tuple[dict[str, _RowResult], dict[str, Run]]:
1094
1091
  all_eval_results: dict = {}
1095
1092
  all_runs: dict = {}
1096
1093
  for c in self.configs:
@@ -1117,11 +1114,11 @@ class _DatasetRunContainer:
1117
1114
  }
1118
1115
  )
1119
1116
  all_runs[str(callback.example_id)] = run
1120
- return cast(Dict[str, _RowResult], all_eval_results), all_runs
1117
+ return cast(dict[str, _RowResult], all_eval_results), all_runs
1121
1118
 
1122
1119
  def _collect_test_results(
1123
1120
  self,
1124
- batch_results: List[Union[dict, str, LLMResult, ChatResult]],
1121
+ batch_results: list[Union[dict, str, LLMResult, ChatResult]],
1125
1122
  ) -> TestResult:
1126
1123
  logger.info("Waiting for evaluators to complete.")
1127
1124
  wait_for_all_evaluators()
@@ -1162,10 +1159,10 @@ class _DatasetRunContainer:
1162
1159
  llm_or_chain_factory: MODEL_OR_CHAIN_FACTORY,
1163
1160
  project_name: Optional[str],
1164
1161
  evaluation: Optional[smith_eval.RunEvalConfig] = None,
1165
- tags: Optional[List[str]] = None,
1166
- input_mapper: Optional[Callable[[Dict], Any]] = None,
1162
+ tags: Optional[list[str]] = None,
1163
+ input_mapper: Optional[Callable[[dict], Any]] = None,
1167
1164
  concurrency_level: int = 5,
1168
- project_metadata: Optional[Dict[str, Any]] = None,
1165
+ project_metadata: Optional[dict[str, Any]] = None,
1169
1166
  revision_id: Optional[str] = None,
1170
1167
  dataset_version: Optional[Union[datetime, str]] = None,
1171
1168
  ) -> _DatasetRunContainer:
@@ -1277,11 +1274,11 @@ async def arun_on_dataset(
1277
1274
  dataset_version: Optional[Union[datetime, str]] = None,
1278
1275
  concurrency_level: int = 5,
1279
1276
  project_name: Optional[str] = None,
1280
- project_metadata: Optional[Dict[str, Any]] = None,
1277
+ project_metadata: Optional[dict[str, Any]] = None,
1281
1278
  verbose: bool = False,
1282
1279
  revision_id: Optional[str] = None,
1283
1280
  **kwargs: Any,
1284
- ) -> Dict[str, Any]:
1281
+ ) -> dict[str, Any]:
1285
1282
  input_mapper = kwargs.pop("input_mapper", None)
1286
1283
  if input_mapper:
1287
1284
  warn_deprecated("0.0.305", message=_INPUT_MAPPER_DEP_WARNING, pending=True)
@@ -1342,11 +1339,11 @@ def run_on_dataset(
1342
1339
  dataset_version: Optional[Union[datetime, str]] = None,
1343
1340
  concurrency_level: int = 5,
1344
1341
  project_name: Optional[str] = None,
1345
- project_metadata: Optional[Dict[str, Any]] = None,
1342
+ project_metadata: Optional[dict[str, Any]] = None,
1346
1343
  verbose: bool = False,
1347
1344
  revision_id: Optional[str] = None,
1348
1345
  **kwargs: Any,
1349
- ) -> Dict[str, Any]:
1346
+ ) -> dict[str, Any]:
1350
1347
  input_mapper = kwargs.pop("input_mapper", None)
1351
1348
  if input_mapper:
1352
1349
  warn_deprecated("0.0.305", message=_INPUT_MAPPER_DEP_WARNING, pending=True)