langchain 0.3.23__py3-none-any.whl → 0.3.25__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of langchain might be problematic. Click here for more details.
- langchain/_api/module_import.py +3 -3
- langchain/agents/agent.py +104 -109
- langchain/agents/agent_iterator.py +11 -15
- langchain/agents/agent_toolkits/conversational_retrieval/openai_functions.py +2 -2
- langchain/agents/agent_toolkits/vectorstore/base.py +3 -3
- langchain/agents/agent_toolkits/vectorstore/toolkit.py +4 -6
- langchain/agents/chat/base.py +7 -6
- langchain/agents/chat/output_parser.py +2 -1
- langchain/agents/conversational/base.py +5 -4
- langchain/agents/conversational_chat/base.py +9 -8
- langchain/agents/format_scratchpad/log.py +1 -3
- langchain/agents/format_scratchpad/log_to_messages.py +3 -5
- langchain/agents/format_scratchpad/openai_functions.py +4 -4
- langchain/agents/format_scratchpad/tools.py +3 -3
- langchain/agents/format_scratchpad/xml.py +1 -3
- langchain/agents/initialize.py +2 -1
- langchain/agents/json_chat/base.py +3 -2
- langchain/agents/loading.py +5 -5
- langchain/agents/mrkl/base.py +6 -5
- langchain/agents/openai_assistant/base.py +17 -17
- langchain/agents/openai_functions_agent/agent_token_buffer_memory.py +6 -6
- langchain/agents/openai_functions_agent/base.py +13 -12
- langchain/agents/openai_functions_multi_agent/base.py +15 -14
- langchain/agents/openai_tools/base.py +2 -1
- langchain/agents/output_parsers/openai_functions.py +2 -2
- langchain/agents/output_parsers/openai_tools.py +6 -6
- langchain/agents/output_parsers/react_json_single_input.py +2 -1
- langchain/agents/output_parsers/self_ask.py +2 -1
- langchain/agents/output_parsers/tools.py +7 -7
- langchain/agents/react/agent.py +3 -2
- langchain/agents/react/base.py +4 -3
- langchain/agents/schema.py +3 -3
- langchain/agents/self_ask_with_search/base.py +2 -1
- langchain/agents/structured_chat/base.py +9 -8
- langchain/agents/structured_chat/output_parser.py +2 -1
- langchain/agents/tool_calling_agent/base.py +3 -2
- langchain/agents/tools.py +4 -4
- langchain/agents/types.py +3 -3
- langchain/agents/utils.py +1 -1
- langchain/agents/xml/base.py +7 -6
- langchain/callbacks/streaming_aiter.py +3 -2
- langchain/callbacks/streaming_aiter_final_only.py +3 -3
- langchain/callbacks/streaming_stdout_final_only.py +3 -3
- langchain/chains/api/base.py +11 -12
- langchain/chains/base.py +47 -50
- langchain/chains/combine_documents/base.py +23 -23
- langchain/chains/combine_documents/map_reduce.py +12 -12
- langchain/chains/combine_documents/map_rerank.py +16 -15
- langchain/chains/combine_documents/reduce.py +17 -17
- langchain/chains/combine_documents/refine.py +12 -12
- langchain/chains/combine_documents/stuff.py +10 -10
- langchain/chains/constitutional_ai/base.py +9 -9
- langchain/chains/conversation/base.py +2 -4
- langchain/chains/conversational_retrieval/base.py +30 -30
- langchain/chains/elasticsearch_database/base.py +13 -13
- langchain/chains/example_generator.py +1 -3
- langchain/chains/flare/base.py +13 -12
- langchain/chains/flare/prompts.py +2 -4
- langchain/chains/hyde/base.py +8 -8
- langchain/chains/llm.py +31 -30
- langchain/chains/llm_checker/base.py +6 -6
- langchain/chains/llm_math/base.py +10 -10
- langchain/chains/llm_summarization_checker/base.py +6 -6
- langchain/chains/loading.py +12 -14
- langchain/chains/mapreduce.py +7 -6
- langchain/chains/moderation.py +8 -8
- langchain/chains/natbot/base.py +6 -6
- langchain/chains/openai_functions/base.py +8 -10
- langchain/chains/openai_functions/citation_fuzzy_match.py +4 -4
- langchain/chains/openai_functions/extraction.py +3 -3
- langchain/chains/openai_functions/openapi.py +12 -12
- langchain/chains/openai_functions/qa_with_structure.py +4 -4
- langchain/chains/openai_functions/utils.py +2 -2
- langchain/chains/openai_tools/extraction.py +2 -2
- langchain/chains/prompt_selector.py +3 -3
- langchain/chains/qa_generation/base.py +5 -5
- langchain/chains/qa_with_sources/base.py +21 -21
- langchain/chains/qa_with_sources/loading.py +2 -1
- langchain/chains/qa_with_sources/retrieval.py +6 -6
- langchain/chains/qa_with_sources/vector_db.py +8 -8
- langchain/chains/query_constructor/base.py +4 -3
- langchain/chains/query_constructor/parser.py +5 -4
- langchain/chains/question_answering/chain.py +3 -2
- langchain/chains/retrieval.py +2 -2
- langchain/chains/retrieval_qa/base.py +16 -16
- langchain/chains/router/base.py +12 -11
- langchain/chains/router/embedding_router.py +12 -11
- langchain/chains/router/llm_router.py +12 -12
- langchain/chains/router/multi_prompt.py +3 -3
- langchain/chains/router/multi_retrieval_qa.py +5 -4
- langchain/chains/sequential.py +18 -18
- langchain/chains/sql_database/query.py +21 -5
- langchain/chains/structured_output/base.py +14 -13
- langchain/chains/summarize/chain.py +4 -3
- langchain/chains/transform.py +12 -11
- langchain/chat_models/base.py +27 -31
- langchain/embeddings/__init__.py +1 -1
- langchain/embeddings/base.py +4 -6
- langchain/embeddings/cache.py +19 -18
- langchain/evaluation/agents/trajectory_eval_chain.py +16 -19
- langchain/evaluation/comparison/eval_chain.py +10 -10
- langchain/evaluation/criteria/eval_chain.py +11 -10
- langchain/evaluation/embedding_distance/base.py +21 -21
- langchain/evaluation/exact_match/base.py +3 -3
- langchain/evaluation/loading.py +7 -8
- langchain/evaluation/qa/eval_chain.py +7 -6
- langchain/evaluation/regex_match/base.py +3 -3
- langchain/evaluation/schema.py +6 -5
- langchain/evaluation/scoring/eval_chain.py +9 -9
- langchain/evaluation/string_distance/base.py +23 -23
- langchain/hub.py +2 -1
- langchain/indexes/_sql_record_manager.py +8 -7
- langchain/indexes/vectorstore.py +11 -11
- langchain/llms/__init__.py +3 -3
- langchain/memory/buffer.py +13 -13
- langchain/memory/buffer_window.py +5 -5
- langchain/memory/chat_memory.py +5 -5
- langchain/memory/combined.py +10 -10
- langchain/memory/entity.py +8 -7
- langchain/memory/readonly.py +4 -4
- langchain/memory/simple.py +5 -5
- langchain/memory/summary.py +8 -8
- langchain/memory/summary_buffer.py +11 -11
- langchain/memory/token_buffer.py +5 -5
- langchain/memory/utils.py +2 -2
- langchain/memory/vectorstore.py +15 -14
- langchain/memory/vectorstore_token_buffer_memory.py +7 -7
- langchain/model_laboratory.py +4 -3
- langchain/output_parsers/combining.py +5 -5
- langchain/output_parsers/datetime.py +1 -2
- langchain/output_parsers/enum.py +4 -5
- langchain/output_parsers/pandas_dataframe.py +5 -5
- langchain/output_parsers/regex.py +4 -4
- langchain/output_parsers/regex_dict.py +4 -4
- langchain/output_parsers/retry.py +2 -2
- langchain/output_parsers/structured.py +5 -5
- langchain/output_parsers/yaml.py +3 -3
- langchain/pydantic_v1/__init__.py +1 -6
- langchain/pydantic_v1/dataclasses.py +1 -5
- langchain/pydantic_v1/main.py +1 -5
- langchain/retrievers/contextual_compression.py +3 -3
- langchain/retrievers/document_compressors/base.py +3 -2
- langchain/retrievers/document_compressors/chain_extract.py +4 -3
- langchain/retrievers/document_compressors/chain_filter.py +3 -2
- langchain/retrievers/document_compressors/cohere_rerank.py +4 -3
- langchain/retrievers/document_compressors/cross_encoder.py +1 -2
- langchain/retrievers/document_compressors/cross_encoder_rerank.py +2 -1
- langchain/retrievers/document_compressors/embeddings_filter.py +3 -2
- langchain/retrievers/document_compressors/listwise_rerank.py +6 -5
- langchain/retrievers/ensemble.py +15 -19
- langchain/retrievers/merger_retriever.py +7 -12
- langchain/retrievers/multi_query.py +14 -13
- langchain/retrievers/multi_vector.py +4 -4
- langchain/retrievers/parent_document_retriever.py +9 -8
- langchain/retrievers/re_phraser.py +2 -3
- langchain/retrievers/self_query/base.py +13 -12
- langchain/retrievers/time_weighted_retriever.py +14 -14
- langchain/runnables/openai_functions.py +4 -3
- langchain/smith/evaluation/config.py +7 -6
- langchain/smith/evaluation/progress.py +3 -2
- langchain/smith/evaluation/runner_utils.py +66 -69
- langchain/smith/evaluation/string_run_evaluator.py +38 -31
- langchain/storage/encoder_backed.py +7 -11
- langchain/storage/file_system.py +5 -4
- {langchain-0.3.23.dist-info → langchain-0.3.25.dist-info}/METADATA +3 -3
- {langchain-0.3.23.dist-info → langchain-0.3.25.dist-info}/RECORD +169 -169
- {langchain-0.3.23.dist-info → langchain-0.3.25.dist-info}/WHEEL +1 -1
- langchain-0.3.25.dist-info/entry_points.txt +4 -0
- langchain-0.3.23.dist-info/entry_points.txt +0 -5
- {langchain-0.3.23.dist-info → langchain-0.3.25.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
"""Configuration for run evaluators."""
|
|
2
2
|
|
|
3
|
-
from
|
|
3
|
+
from collections.abc import Sequence
|
|
4
|
+
from typing import Any, Callable, Optional, Union
|
|
4
5
|
|
|
5
6
|
from langchain_core.embeddings import Embeddings
|
|
6
7
|
from langchain_core.language_models import BaseLanguageModel
|
|
@@ -45,7 +46,7 @@ class EvalConfig(BaseModel):
|
|
|
45
46
|
|
|
46
47
|
evaluator_type: EvaluatorType
|
|
47
48
|
|
|
48
|
-
def get_kwargs(self) ->
|
|
49
|
+
def get_kwargs(self) -> dict[str, Any]:
|
|
49
50
|
"""Get the keyword arguments for the load_evaluator call.
|
|
50
51
|
|
|
51
52
|
Returns
|
|
@@ -78,7 +79,7 @@ class SingleKeyEvalConfig(EvalConfig):
|
|
|
78
79
|
"""The key from the traced run's inputs dictionary to use to represent the
|
|
79
80
|
input. If not provided, it will be inferred automatically."""
|
|
80
81
|
|
|
81
|
-
def get_kwargs(self) ->
|
|
82
|
+
def get_kwargs(self) -> dict[str, Any]:
|
|
82
83
|
kwargs = super().get_kwargs()
|
|
83
84
|
# Filer out the keys that are not needed for the evaluator.
|
|
84
85
|
for key in ["reference_key", "prediction_key", "input_key"]:
|
|
@@ -121,7 +122,7 @@ class RunEvalConfig(BaseModel):
|
|
|
121
122
|
The language model to pass to any evaluators that use a language model.
|
|
122
123
|
""" # noqa: E501
|
|
123
124
|
|
|
124
|
-
evaluators:
|
|
125
|
+
evaluators: list[
|
|
125
126
|
Union[
|
|
126
127
|
SINGLE_EVAL_CONFIG_TYPE,
|
|
127
128
|
CUSTOM_EVALUATOR_TYPE,
|
|
@@ -134,9 +135,9 @@ class RunEvalConfig(BaseModel):
|
|
|
134
135
|
given evaluator
|
|
135
136
|
(e.g.,
|
|
136
137
|
:class:`RunEvalConfig.QA <langchain.smith.evaluation.config.RunEvalConfig.QA>`)."""
|
|
137
|
-
custom_evaluators: Optional[
|
|
138
|
+
custom_evaluators: Optional[list[CUSTOM_EVALUATOR_TYPE]] = None
|
|
138
139
|
"""Custom evaluators to apply to the dataset run."""
|
|
139
|
-
batch_evaluators: Optional[
|
|
140
|
+
batch_evaluators: Optional[list[BATCH_EVALUATOR_LIKE]] = None
|
|
140
141
|
"""Evaluators that run on an aggregate/batch level.
|
|
141
142
|
|
|
142
143
|
These generate 1 or more metrics that are assigned to the full test run.
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
"""A simple progress bar for the console."""
|
|
2
2
|
|
|
3
3
|
import threading
|
|
4
|
-
from
|
|
4
|
+
from collections.abc import Sequence
|
|
5
|
+
from typing import Any, Optional
|
|
5
6
|
from uuid import UUID
|
|
6
7
|
|
|
7
8
|
from langchain_core.callbacks import base as base_callbacks
|
|
@@ -51,7 +52,7 @@ class ProgressBarCallback(base_callbacks.BaseCallbackHandler):
|
|
|
51
52
|
|
|
52
53
|
def on_chain_end(
|
|
53
54
|
self,
|
|
54
|
-
outputs:
|
|
55
|
+
outputs: dict[str, Any],
|
|
55
56
|
*,
|
|
56
57
|
run_id: UUID,
|
|
57
58
|
parent_run_id: Optional[UUID] = None,
|
|
@@ -13,10 +13,7 @@ from typing import (
|
|
|
13
13
|
TYPE_CHECKING,
|
|
14
14
|
Any,
|
|
15
15
|
Callable,
|
|
16
|
-
Dict,
|
|
17
|
-
List,
|
|
18
16
|
Optional,
|
|
19
|
-
Tuple,
|
|
20
17
|
Union,
|
|
21
18
|
cast,
|
|
22
19
|
)
|
|
@@ -229,7 +226,7 @@ def _wrap_in_chain_factory(
|
|
|
229
226
|
return llm_or_chain_factory
|
|
230
227
|
|
|
231
228
|
|
|
232
|
-
def _get_prompt(inputs:
|
|
229
|
+
def _get_prompt(inputs: dict[str, Any]) -> str:
|
|
233
230
|
"""Get prompt from inputs.
|
|
234
231
|
|
|
235
232
|
Args:
|
|
@@ -286,10 +283,10 @@ class ChatModelInput(TypedDict):
|
|
|
286
283
|
messages: List of chat messages.
|
|
287
284
|
"""
|
|
288
285
|
|
|
289
|
-
messages:
|
|
286
|
+
messages: list[BaseMessage]
|
|
290
287
|
|
|
291
288
|
|
|
292
|
-
def _get_messages(inputs:
|
|
289
|
+
def _get_messages(inputs: dict[str, Any]) -> dict:
|
|
293
290
|
"""Get Chat Messages from inputs.
|
|
294
291
|
|
|
295
292
|
Args:
|
|
@@ -331,10 +328,10 @@ def _get_messages(inputs: Dict[str, Any]) -> dict:
|
|
|
331
328
|
## Shared data validation utilities
|
|
332
329
|
def _validate_example_inputs_for_language_model(
|
|
333
330
|
first_example: Example,
|
|
334
|
-
input_mapper: Optional[Callable[[
|
|
331
|
+
input_mapper: Optional[Callable[[dict], Any]],
|
|
335
332
|
) -> None:
|
|
336
333
|
if input_mapper:
|
|
337
|
-
prompt_input = input_mapper(first_example.inputs)
|
|
334
|
+
prompt_input = input_mapper(first_example.inputs or {})
|
|
338
335
|
if not isinstance(prompt_input, str) and not (
|
|
339
336
|
isinstance(prompt_input, list)
|
|
340
337
|
and all(isinstance(msg, BaseMessage) for msg in prompt_input)
|
|
@@ -347,10 +344,10 @@ def _validate_example_inputs_for_language_model(
|
|
|
347
344
|
)
|
|
348
345
|
else:
|
|
349
346
|
try:
|
|
350
|
-
_get_prompt(first_example.inputs)
|
|
347
|
+
_get_prompt(first_example.inputs or {})
|
|
351
348
|
except InputFormatError:
|
|
352
349
|
try:
|
|
353
|
-
_get_messages(first_example.inputs)
|
|
350
|
+
_get_messages(first_example.inputs or {})
|
|
354
351
|
except InputFormatError:
|
|
355
352
|
raise InputFormatError(
|
|
356
353
|
"Example inputs do not match language model input format. "
|
|
@@ -365,11 +362,11 @@ def _validate_example_inputs_for_language_model(
|
|
|
365
362
|
def _validate_example_inputs_for_chain(
|
|
366
363
|
first_example: Example,
|
|
367
364
|
chain: Chain,
|
|
368
|
-
input_mapper: Optional[Callable[[
|
|
365
|
+
input_mapper: Optional[Callable[[dict], Any]],
|
|
369
366
|
) -> None:
|
|
370
367
|
"""Validate that the example inputs match the chain input keys."""
|
|
371
368
|
if input_mapper:
|
|
372
|
-
first_inputs = input_mapper(first_example.inputs)
|
|
369
|
+
first_inputs = input_mapper(first_example.inputs or {})
|
|
373
370
|
missing_keys = set(chain.input_keys).difference(first_inputs)
|
|
374
371
|
if not isinstance(first_inputs, dict):
|
|
375
372
|
raise InputFormatError(
|
|
@@ -402,7 +399,7 @@ def _validate_example_inputs_for_chain(
|
|
|
402
399
|
def _validate_example_inputs(
|
|
403
400
|
example: Example,
|
|
404
401
|
llm_or_chain_factory: MCF,
|
|
405
|
-
input_mapper: Optional[Callable[[
|
|
402
|
+
input_mapper: Optional[Callable[[dict], Any]],
|
|
406
403
|
) -> None:
|
|
407
404
|
"""Validate that the example inputs are valid for the model."""
|
|
408
405
|
if isinstance(llm_or_chain_factory, BaseLanguageModel):
|
|
@@ -421,10 +418,10 @@ def _validate_example_inputs(
|
|
|
421
418
|
|
|
422
419
|
def _setup_evaluation(
|
|
423
420
|
llm_or_chain_factory: MCF,
|
|
424
|
-
examples:
|
|
421
|
+
examples: list[Example],
|
|
425
422
|
evaluation: Optional[smith_eval.RunEvalConfig],
|
|
426
423
|
data_type: DataType,
|
|
427
|
-
) -> Optional[
|
|
424
|
+
) -> Optional[list[RunEvaluator]]:
|
|
428
425
|
"""Configure the evaluators to run on the results of the chain."""
|
|
429
426
|
if evaluation:
|
|
430
427
|
if isinstance(llm_or_chain_factory, BaseLanguageModel):
|
|
@@ -451,7 +448,7 @@ def _setup_evaluation(
|
|
|
451
448
|
|
|
452
449
|
def _determine_input_key(
|
|
453
450
|
config: smith_eval.RunEvalConfig,
|
|
454
|
-
run_inputs: Optional[
|
|
451
|
+
run_inputs: Optional[list[str]],
|
|
455
452
|
) -> Optional[str]:
|
|
456
453
|
input_key = None
|
|
457
454
|
if config.input_key:
|
|
@@ -475,7 +472,7 @@ def _determine_input_key(
|
|
|
475
472
|
|
|
476
473
|
def _determine_prediction_key(
|
|
477
474
|
config: smith_eval.RunEvalConfig,
|
|
478
|
-
run_outputs: Optional[
|
|
475
|
+
run_outputs: Optional[list[str]],
|
|
479
476
|
) -> Optional[str]:
|
|
480
477
|
prediction_key = None
|
|
481
478
|
if config.prediction_key:
|
|
@@ -498,7 +495,7 @@ def _determine_prediction_key(
|
|
|
498
495
|
|
|
499
496
|
def _determine_reference_key(
|
|
500
497
|
config: smith_eval.RunEvalConfig,
|
|
501
|
-
example_outputs: Optional[
|
|
498
|
+
example_outputs: Optional[list[str]],
|
|
502
499
|
) -> Optional[str]:
|
|
503
500
|
if config.reference_key:
|
|
504
501
|
reference_key = config.reference_key
|
|
@@ -522,7 +519,7 @@ def _construct_run_evaluator(
|
|
|
522
519
|
eval_llm: Optional[BaseLanguageModel],
|
|
523
520
|
run_type: str,
|
|
524
521
|
data_type: DataType,
|
|
525
|
-
example_outputs: Optional[
|
|
522
|
+
example_outputs: Optional[list[str]],
|
|
526
523
|
reference_key: Optional[str],
|
|
527
524
|
input_key: Optional[str],
|
|
528
525
|
prediction_key: Optional[str],
|
|
@@ -583,10 +580,10 @@ def _construct_run_evaluator(
|
|
|
583
580
|
|
|
584
581
|
def _get_keys(
|
|
585
582
|
config: smith_eval.RunEvalConfig,
|
|
586
|
-
run_inputs: Optional[
|
|
587
|
-
run_outputs: Optional[
|
|
588
|
-
example_outputs: Optional[
|
|
589
|
-
) ->
|
|
583
|
+
run_inputs: Optional[list[str]],
|
|
584
|
+
run_outputs: Optional[list[str]],
|
|
585
|
+
example_outputs: Optional[list[str]],
|
|
586
|
+
) -> tuple[Optional[str], Optional[str], Optional[str]]:
|
|
590
587
|
input_key = _determine_input_key(config, run_inputs)
|
|
591
588
|
prediction_key = _determine_prediction_key(config, run_outputs)
|
|
592
589
|
reference_key = _determine_reference_key(config, example_outputs)
|
|
@@ -597,10 +594,10 @@ def _load_run_evaluators(
|
|
|
597
594
|
config: smith_eval.RunEvalConfig,
|
|
598
595
|
run_type: str,
|
|
599
596
|
data_type: DataType,
|
|
600
|
-
example_outputs: Optional[
|
|
601
|
-
run_inputs: Optional[
|
|
602
|
-
run_outputs: Optional[
|
|
603
|
-
) ->
|
|
597
|
+
example_outputs: Optional[list[str]],
|
|
598
|
+
run_inputs: Optional[list[str]],
|
|
599
|
+
run_outputs: Optional[list[str]],
|
|
600
|
+
) -> list[RunEvaluator]:
|
|
604
601
|
"""
|
|
605
602
|
Load run evaluators from a configuration.
|
|
606
603
|
|
|
@@ -662,12 +659,12 @@ def _load_run_evaluators(
|
|
|
662
659
|
|
|
663
660
|
async def _arun_llm(
|
|
664
661
|
llm: BaseLanguageModel,
|
|
665
|
-
inputs:
|
|
662
|
+
inputs: dict[str, Any],
|
|
666
663
|
*,
|
|
667
|
-
tags: Optional[
|
|
664
|
+
tags: Optional[list[str]] = None,
|
|
668
665
|
callbacks: Callbacks = None,
|
|
669
|
-
input_mapper: Optional[Callable[[
|
|
670
|
-
metadata: Optional[
|
|
666
|
+
input_mapper: Optional[Callable[[dict], Any]] = None,
|
|
667
|
+
metadata: Optional[dict[str, Any]] = None,
|
|
671
668
|
) -> Union[str, BaseMessage]:
|
|
672
669
|
"""Asynchronously run the language model.
|
|
673
670
|
|
|
@@ -726,12 +723,12 @@ async def _arun_llm(
|
|
|
726
723
|
|
|
727
724
|
async def _arun_chain(
|
|
728
725
|
chain: Union[Chain, Runnable],
|
|
729
|
-
inputs:
|
|
726
|
+
inputs: dict[str, Any],
|
|
730
727
|
callbacks: Callbacks,
|
|
731
728
|
*,
|
|
732
|
-
tags: Optional[
|
|
733
|
-
input_mapper: Optional[Callable[[
|
|
734
|
-
metadata: Optional[
|
|
729
|
+
tags: Optional[list[str]] = None,
|
|
730
|
+
input_mapper: Optional[Callable[[dict], Any]] = None,
|
|
731
|
+
metadata: Optional[dict[str, Any]] = None,
|
|
735
732
|
) -> Union[dict, str]:
|
|
736
733
|
"""Run a chain asynchronously on inputs."""
|
|
737
734
|
inputs_ = inputs if input_mapper is None else input_mapper(inputs)
|
|
@@ -761,7 +758,7 @@ async def _arun_llm_or_chain(
|
|
|
761
758
|
config: RunnableConfig,
|
|
762
759
|
*,
|
|
763
760
|
llm_or_chain_factory: MCF,
|
|
764
|
-
input_mapper: Optional[Callable[[
|
|
761
|
+
input_mapper: Optional[Callable[[dict], Any]] = None,
|
|
765
762
|
) -> Union[dict, str, LLMResult, ChatResult]:
|
|
766
763
|
"""Asynchronously run the Chain or language model.
|
|
767
764
|
|
|
@@ -783,7 +780,7 @@ async def _arun_llm_or_chain(
|
|
|
783
780
|
if isinstance(llm_or_chain_factory, BaseLanguageModel):
|
|
784
781
|
output: Any = await _arun_llm(
|
|
785
782
|
llm_or_chain_factory,
|
|
786
|
-
example.inputs,
|
|
783
|
+
example.inputs or {},
|
|
787
784
|
tags=config["tags"],
|
|
788
785
|
callbacks=config["callbacks"],
|
|
789
786
|
input_mapper=input_mapper,
|
|
@@ -793,7 +790,7 @@ async def _arun_llm_or_chain(
|
|
|
793
790
|
chain = llm_or_chain_factory()
|
|
794
791
|
output = await _arun_chain(
|
|
795
792
|
chain,
|
|
796
|
-
example.inputs,
|
|
793
|
+
example.inputs or {},
|
|
797
794
|
tags=config["tags"],
|
|
798
795
|
callbacks=config["callbacks"],
|
|
799
796
|
input_mapper=input_mapper,
|
|
@@ -815,12 +812,12 @@ async def _arun_llm_or_chain(
|
|
|
815
812
|
|
|
816
813
|
def _run_llm(
|
|
817
814
|
llm: BaseLanguageModel,
|
|
818
|
-
inputs:
|
|
815
|
+
inputs: dict[str, Any],
|
|
819
816
|
callbacks: Callbacks,
|
|
820
817
|
*,
|
|
821
|
-
tags: Optional[
|
|
822
|
-
input_mapper: Optional[Callable[[
|
|
823
|
-
metadata: Optional[
|
|
818
|
+
tags: Optional[list[str]] = None,
|
|
819
|
+
input_mapper: Optional[Callable[[dict], Any]] = None,
|
|
820
|
+
metadata: Optional[dict[str, Any]] = None,
|
|
824
821
|
) -> Union[str, BaseMessage]:
|
|
825
822
|
"""
|
|
826
823
|
Run the language model on the example.
|
|
@@ -877,13 +874,13 @@ def _run_llm(
|
|
|
877
874
|
|
|
878
875
|
def _run_chain(
|
|
879
876
|
chain: Union[Chain, Runnable],
|
|
880
|
-
inputs:
|
|
877
|
+
inputs: dict[str, Any],
|
|
881
878
|
callbacks: Callbacks,
|
|
882
879
|
*,
|
|
883
|
-
tags: Optional[
|
|
884
|
-
input_mapper: Optional[Callable[[
|
|
885
|
-
metadata: Optional[
|
|
886
|
-
) -> Union[
|
|
880
|
+
tags: Optional[list[str]] = None,
|
|
881
|
+
input_mapper: Optional[Callable[[dict], Any]] = None,
|
|
882
|
+
metadata: Optional[dict[str, Any]] = None,
|
|
883
|
+
) -> Union[dict, str]:
|
|
887
884
|
"""Run a chain on inputs."""
|
|
888
885
|
inputs_ = inputs if input_mapper is None else input_mapper(inputs)
|
|
889
886
|
if (
|
|
@@ -912,7 +909,7 @@ def _run_llm_or_chain(
|
|
|
912
909
|
config: RunnableConfig,
|
|
913
910
|
*,
|
|
914
911
|
llm_or_chain_factory: MCF,
|
|
915
|
-
input_mapper: Optional[Callable[[
|
|
912
|
+
input_mapper: Optional[Callable[[dict], Any]] = None,
|
|
916
913
|
) -> Union[dict, str, LLMResult, ChatResult]:
|
|
917
914
|
"""
|
|
918
915
|
Run the Chain or language model synchronously.
|
|
@@ -935,7 +932,7 @@ def _run_llm_or_chain(
|
|
|
935
932
|
if isinstance(llm_or_chain_factory, BaseLanguageModel):
|
|
936
933
|
output: Any = _run_llm(
|
|
937
934
|
llm_or_chain_factory,
|
|
938
|
-
example.inputs,
|
|
935
|
+
example.inputs or {},
|
|
939
936
|
config["callbacks"],
|
|
940
937
|
tags=config["tags"],
|
|
941
938
|
input_mapper=input_mapper,
|
|
@@ -945,7 +942,7 @@ def _run_llm_or_chain(
|
|
|
945
942
|
chain = llm_or_chain_factory()
|
|
946
943
|
output = _run_chain(
|
|
947
944
|
chain,
|
|
948
|
-
example.inputs,
|
|
945
|
+
example.inputs or {},
|
|
949
946
|
config["callbacks"],
|
|
950
947
|
tags=config["tags"],
|
|
951
948
|
input_mapper=input_mapper,
|
|
@@ -968,10 +965,10 @@ def _prepare_eval_run(
|
|
|
968
965
|
dataset_name: str,
|
|
969
966
|
llm_or_chain_factory: MODEL_OR_CHAIN_FACTORY,
|
|
970
967
|
project_name: str,
|
|
971
|
-
project_metadata: Optional[
|
|
972
|
-
tags: Optional[
|
|
968
|
+
project_metadata: Optional[dict[str, Any]] = None,
|
|
969
|
+
tags: Optional[list[str]] = None,
|
|
973
970
|
dataset_version: Optional[Union[str, datetime]] = None,
|
|
974
|
-
) ->
|
|
971
|
+
) -> tuple[MCF, TracerSession, Dataset, list[Example]]:
|
|
975
972
|
wrapped_model = _wrap_in_chain_factory(llm_or_chain_factory, dataset_name)
|
|
976
973
|
dataset = client.read_dataset(dataset_name=dataset_name)
|
|
977
974
|
|
|
@@ -1027,7 +1024,7 @@ run_on_dataset(
|
|
|
1027
1024
|
class _RowResult(TypedDict, total=False):
|
|
1028
1025
|
"""A dictionary of the results for a single example row."""
|
|
1029
1026
|
|
|
1030
|
-
feedback: Optional[
|
|
1027
|
+
feedback: Optional[list[EvaluationResult]]
|
|
1031
1028
|
execution_time: Optional[float]
|
|
1032
1029
|
run_id: Optional[str]
|
|
1033
1030
|
|
|
@@ -1039,14 +1036,14 @@ class _DatasetRunContainer:
|
|
|
1039
1036
|
client: Client
|
|
1040
1037
|
project: TracerSession
|
|
1041
1038
|
wrapped_model: MCF
|
|
1042
|
-
examples:
|
|
1043
|
-
configs:
|
|
1044
|
-
batch_evaluators: Optional[
|
|
1039
|
+
examples: list[Example]
|
|
1040
|
+
configs: list[RunnableConfig]
|
|
1041
|
+
batch_evaluators: Optional[list[smith_eval_config.BATCH_EVALUATOR_LIKE]] = None
|
|
1045
1042
|
|
|
1046
1043
|
def _merge_test_outputs(
|
|
1047
1044
|
self,
|
|
1048
1045
|
batch_results: list,
|
|
1049
|
-
all_eval_results:
|
|
1046
|
+
all_eval_results: dict[str, _RowResult],
|
|
1050
1047
|
) -> dict:
|
|
1051
1048
|
results: dict = {}
|
|
1052
1049
|
for example, output in zip(self.examples, batch_results):
|
|
@@ -1065,7 +1062,7 @@ class _DatasetRunContainer:
|
|
|
1065
1062
|
results[str(example.id)]["reference"] = example.outputs
|
|
1066
1063
|
return results
|
|
1067
1064
|
|
|
1068
|
-
def _run_batch_evaluators(self, runs:
|
|
1065
|
+
def _run_batch_evaluators(self, runs: dict[str, Run]) -> list[dict]:
|
|
1069
1066
|
evaluators = self.batch_evaluators
|
|
1070
1067
|
if not evaluators:
|
|
1071
1068
|
return []
|
|
@@ -1090,7 +1087,7 @@ class _DatasetRunContainer:
|
|
|
1090
1087
|
)
|
|
1091
1088
|
return aggregate_feedback
|
|
1092
1089
|
|
|
1093
|
-
def _collect_metrics(self) ->
|
|
1090
|
+
def _collect_metrics(self) -> tuple[dict[str, _RowResult], dict[str, Run]]:
|
|
1094
1091
|
all_eval_results: dict = {}
|
|
1095
1092
|
all_runs: dict = {}
|
|
1096
1093
|
for c in self.configs:
|
|
@@ -1117,11 +1114,11 @@ class _DatasetRunContainer:
|
|
|
1117
1114
|
}
|
|
1118
1115
|
)
|
|
1119
1116
|
all_runs[str(callback.example_id)] = run
|
|
1120
|
-
return cast(
|
|
1117
|
+
return cast(dict[str, _RowResult], all_eval_results), all_runs
|
|
1121
1118
|
|
|
1122
1119
|
def _collect_test_results(
|
|
1123
1120
|
self,
|
|
1124
|
-
batch_results:
|
|
1121
|
+
batch_results: list[Union[dict, str, LLMResult, ChatResult]],
|
|
1125
1122
|
) -> TestResult:
|
|
1126
1123
|
logger.info("Waiting for evaluators to complete.")
|
|
1127
1124
|
wait_for_all_evaluators()
|
|
@@ -1162,10 +1159,10 @@ class _DatasetRunContainer:
|
|
|
1162
1159
|
llm_or_chain_factory: MODEL_OR_CHAIN_FACTORY,
|
|
1163
1160
|
project_name: Optional[str],
|
|
1164
1161
|
evaluation: Optional[smith_eval.RunEvalConfig] = None,
|
|
1165
|
-
tags: Optional[
|
|
1166
|
-
input_mapper: Optional[Callable[[
|
|
1162
|
+
tags: Optional[list[str]] = None,
|
|
1163
|
+
input_mapper: Optional[Callable[[dict], Any]] = None,
|
|
1167
1164
|
concurrency_level: int = 5,
|
|
1168
|
-
project_metadata: Optional[
|
|
1165
|
+
project_metadata: Optional[dict[str, Any]] = None,
|
|
1169
1166
|
revision_id: Optional[str] = None,
|
|
1170
1167
|
dataset_version: Optional[Union[datetime, str]] = None,
|
|
1171
1168
|
) -> _DatasetRunContainer:
|
|
@@ -1277,11 +1274,11 @@ async def arun_on_dataset(
|
|
|
1277
1274
|
dataset_version: Optional[Union[datetime, str]] = None,
|
|
1278
1275
|
concurrency_level: int = 5,
|
|
1279
1276
|
project_name: Optional[str] = None,
|
|
1280
|
-
project_metadata: Optional[
|
|
1277
|
+
project_metadata: Optional[dict[str, Any]] = None,
|
|
1281
1278
|
verbose: bool = False,
|
|
1282
1279
|
revision_id: Optional[str] = None,
|
|
1283
1280
|
**kwargs: Any,
|
|
1284
|
-
) ->
|
|
1281
|
+
) -> dict[str, Any]:
|
|
1285
1282
|
input_mapper = kwargs.pop("input_mapper", None)
|
|
1286
1283
|
if input_mapper:
|
|
1287
1284
|
warn_deprecated("0.0.305", message=_INPUT_MAPPER_DEP_WARNING, pending=True)
|
|
@@ -1342,11 +1339,11 @@ def run_on_dataset(
|
|
|
1342
1339
|
dataset_version: Optional[Union[datetime, str]] = None,
|
|
1343
1340
|
concurrency_level: int = 5,
|
|
1344
1341
|
project_name: Optional[str] = None,
|
|
1345
|
-
project_metadata: Optional[
|
|
1342
|
+
project_metadata: Optional[dict[str, Any]] = None,
|
|
1346
1343
|
verbose: bool = False,
|
|
1347
1344
|
revision_id: Optional[str] = None,
|
|
1348
1345
|
**kwargs: Any,
|
|
1349
|
-
) ->
|
|
1346
|
+
) -> dict[str, Any]:
|
|
1350
1347
|
input_mapper = kwargs.pop("input_mapper", None)
|
|
1351
1348
|
if input_mapper:
|
|
1352
1349
|
warn_deprecated("0.0.305", message=_INPUT_MAPPER_DEP_WARNING, pending=True)
|