langchain 0.3.27__py3-none-any.whl → 0.4.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. langchain/agents/agent.py +16 -20
  2. langchain/agents/agent_iterator.py +19 -12
  3. langchain/agents/agent_toolkits/vectorstore/base.py +2 -0
  4. langchain/agents/chat/base.py +2 -0
  5. langchain/agents/conversational/base.py +2 -0
  6. langchain/agents/conversational_chat/base.py +2 -0
  7. langchain/agents/initialize.py +1 -1
  8. langchain/agents/json_chat/base.py +1 -0
  9. langchain/agents/mrkl/base.py +2 -0
  10. langchain/agents/openai_assistant/base.py +1 -1
  11. langchain/agents/openai_functions_agent/agent_token_buffer_memory.py +2 -0
  12. langchain/agents/openai_functions_agent/base.py +3 -2
  13. langchain/agents/openai_functions_multi_agent/base.py +1 -1
  14. langchain/agents/openai_tools/base.py +1 -0
  15. langchain/agents/output_parsers/json.py +2 -0
  16. langchain/agents/output_parsers/openai_functions.py +10 -3
  17. langchain/agents/output_parsers/openai_tools.py +8 -1
  18. langchain/agents/output_parsers/react_json_single_input.py +3 -0
  19. langchain/agents/output_parsers/react_single_input.py +3 -0
  20. langchain/agents/output_parsers/self_ask.py +2 -0
  21. langchain/agents/output_parsers/tools.py +16 -2
  22. langchain/agents/output_parsers/xml.py +3 -0
  23. langchain/agents/react/agent.py +1 -0
  24. langchain/agents/react/base.py +4 -0
  25. langchain/agents/react/output_parser.py +2 -0
  26. langchain/agents/schema.py +2 -0
  27. langchain/agents/self_ask_with_search/base.py +4 -0
  28. langchain/agents/structured_chat/base.py +5 -0
  29. langchain/agents/structured_chat/output_parser.py +13 -0
  30. langchain/agents/tool_calling_agent/base.py +1 -0
  31. langchain/agents/tools.py +3 -0
  32. langchain/agents/xml/base.py +7 -1
  33. langchain/callbacks/streaming_aiter.py +13 -2
  34. langchain/callbacks/streaming_aiter_final_only.py +11 -2
  35. langchain/callbacks/streaming_stdout_final_only.py +5 -0
  36. langchain/callbacks/tracers/logging.py +11 -0
  37. langchain/chains/api/base.py +5 -1
  38. langchain/chains/base.py +8 -2
  39. langchain/chains/combine_documents/base.py +7 -1
  40. langchain/chains/combine_documents/map_reduce.py +3 -0
  41. langchain/chains/combine_documents/map_rerank.py +6 -4
  42. langchain/chains/combine_documents/reduce.py +1 -0
  43. langchain/chains/combine_documents/refine.py +1 -0
  44. langchain/chains/combine_documents/stuff.py +5 -1
  45. langchain/chains/constitutional_ai/base.py +7 -0
  46. langchain/chains/conversation/base.py +4 -1
  47. langchain/chains/conversational_retrieval/base.py +67 -59
  48. langchain/chains/elasticsearch_database/base.py +2 -1
  49. langchain/chains/flare/base.py +2 -0
  50. langchain/chains/flare/prompts.py +2 -0
  51. langchain/chains/llm.py +7 -2
  52. langchain/chains/llm_bash/__init__.py +1 -1
  53. langchain/chains/llm_checker/base.py +12 -1
  54. langchain/chains/llm_math/base.py +9 -1
  55. langchain/chains/llm_summarization_checker/base.py +13 -1
  56. langchain/chains/llm_symbolic_math/__init__.py +1 -1
  57. langchain/chains/loading.py +4 -2
  58. langchain/chains/moderation.py +3 -0
  59. langchain/chains/natbot/base.py +3 -1
  60. langchain/chains/natbot/crawler.py +29 -0
  61. langchain/chains/openai_functions/base.py +2 -0
  62. langchain/chains/openai_functions/citation_fuzzy_match.py +9 -0
  63. langchain/chains/openai_functions/openapi.py +4 -0
  64. langchain/chains/openai_functions/qa_with_structure.py +3 -3
  65. langchain/chains/openai_functions/tagging.py +2 -0
  66. langchain/chains/qa_generation/base.py +4 -0
  67. langchain/chains/qa_with_sources/base.py +3 -0
  68. langchain/chains/qa_with_sources/retrieval.py +1 -1
  69. langchain/chains/qa_with_sources/vector_db.py +4 -2
  70. langchain/chains/query_constructor/base.py +4 -2
  71. langchain/chains/query_constructor/parser.py +64 -2
  72. langchain/chains/retrieval_qa/base.py +4 -0
  73. langchain/chains/router/base.py +14 -2
  74. langchain/chains/router/embedding_router.py +3 -0
  75. langchain/chains/router/llm_router.py +6 -4
  76. langchain/chains/router/multi_prompt.py +3 -0
  77. langchain/chains/router/multi_retrieval_qa.py +18 -0
  78. langchain/chains/sql_database/query.py +1 -0
  79. langchain/chains/structured_output/base.py +2 -0
  80. langchain/chains/transform.py +4 -0
  81. langchain/chat_models/base.py +55 -18
  82. langchain/document_loaders/blob_loaders/schema.py +1 -4
  83. langchain/embeddings/base.py +2 -0
  84. langchain/embeddings/cache.py +3 -3
  85. langchain/evaluation/agents/trajectory_eval_chain.py +3 -2
  86. langchain/evaluation/comparison/eval_chain.py +1 -0
  87. langchain/evaluation/criteria/eval_chain.py +3 -0
  88. langchain/evaluation/embedding_distance/base.py +11 -0
  89. langchain/evaluation/exact_match/base.py +14 -1
  90. langchain/evaluation/loading.py +1 -0
  91. langchain/evaluation/parsing/base.py +16 -3
  92. langchain/evaluation/parsing/json_distance.py +19 -8
  93. langchain/evaluation/parsing/json_schema.py +1 -4
  94. langchain/evaluation/qa/eval_chain.py +8 -0
  95. langchain/evaluation/qa/generate_chain.py +2 -0
  96. langchain/evaluation/regex_match/base.py +9 -1
  97. langchain/evaluation/scoring/eval_chain.py +1 -0
  98. langchain/evaluation/string_distance/base.py +6 -0
  99. langchain/memory/buffer.py +5 -0
  100. langchain/memory/buffer_window.py +2 -0
  101. langchain/memory/combined.py +1 -1
  102. langchain/memory/entity.py +47 -0
  103. langchain/memory/simple.py +3 -0
  104. langchain/memory/summary.py +30 -0
  105. langchain/memory/summary_buffer.py +3 -0
  106. langchain/memory/token_buffer.py +2 -0
  107. langchain/output_parsers/combining.py +4 -2
  108. langchain/output_parsers/enum.py +5 -1
  109. langchain/output_parsers/fix.py +8 -1
  110. langchain/output_parsers/pandas_dataframe.py +16 -1
  111. langchain/output_parsers/regex.py +2 -0
  112. langchain/output_parsers/retry.py +21 -1
  113. langchain/output_parsers/structured.py +10 -0
  114. langchain/output_parsers/yaml.py +4 -0
  115. langchain/pydantic_v1/__init__.py +1 -1
  116. langchain/retrievers/document_compressors/chain_extract.py +4 -2
  117. langchain/retrievers/document_compressors/cohere_rerank.py +2 -0
  118. langchain/retrievers/document_compressors/cross_encoder_rerank.py +2 -0
  119. langchain/retrievers/document_compressors/embeddings_filter.py +3 -0
  120. langchain/retrievers/document_compressors/listwise_rerank.py +1 -0
  121. langchain/retrievers/ensemble.py +2 -2
  122. langchain/retrievers/multi_query.py +3 -1
  123. langchain/retrievers/multi_vector.py +4 -1
  124. langchain/retrievers/parent_document_retriever.py +15 -0
  125. langchain/retrievers/self_query/base.py +19 -0
  126. langchain/retrievers/time_weighted_retriever.py +3 -0
  127. langchain/runnables/hub.py +12 -0
  128. langchain/runnables/openai_functions.py +6 -0
  129. langchain/smith/__init__.py +1 -0
  130. langchain/smith/evaluation/config.py +5 -22
  131. langchain/smith/evaluation/progress.py +12 -3
  132. langchain/smith/evaluation/runner_utils.py +240 -123
  133. langchain/smith/evaluation/string_run_evaluator.py +27 -0
  134. langchain/storage/encoder_backed.py +1 -0
  135. langchain/tools/python/__init__.py +1 -1
  136. {langchain-0.3.27.dist-info → langchain-0.4.0.dev0.dist-info}/METADATA +2 -12
  137. {langchain-0.3.27.dist-info → langchain-0.4.0.dev0.dist-info}/RECORD +140 -141
  138. langchain/smith/evaluation/utils.py +0 -0
  139. {langchain-0.3.27.dist-info → langchain-0.4.0.dev0.dist-info}/WHEEL +0 -0
  140. {langchain-0.3.27.dist-info → langchain-0.4.0.dev0.dist-info}/entry_points.txt +0 -0
  141. {langchain-0.3.27.dist-info → langchain-0.4.0.dev0.dist-info}/licenses/LICENSE +0 -0
@@ -155,9 +155,24 @@ class EvalError(dict):
155
155
  """Your architecture raised an error."""
156
156
 
157
157
  def __init__(self, Error: BaseException, **kwargs: Any) -> None:
158
+ """Initialize the EvalError with an error and additional attributes.
159
+
160
+ Args:
161
+ Error: The error that occurred.
162
+ **kwargs: Additional attributes to include in the error.
163
+ """
158
164
  super().__init__(Error=Error, **kwargs)
159
165
 
160
166
  def __getattr__(self, name: str) -> Any:
167
+ """Get an attribute from the EvalError.
168
+
169
+ Args:
170
+ name: The name of the attribute to get.
171
+ Returns:
172
+ The value of the attribute.
173
+ Raises:
174
+ AttributeError: If the attribute does not exist.
175
+ """
161
176
  try:
162
177
  return self[name]
163
178
  except KeyError as e:
@@ -199,24 +214,24 @@ def _wrap_in_chain_factory(
199
214
  return lambda: lcf
200
215
  if callable(llm_or_chain_factory):
201
216
  if is_traceable_function(llm_or_chain_factory):
202
- runnable_ = as_runnable(cast(Callable, llm_or_chain_factory))
217
+ runnable_ = as_runnable(cast("Callable", llm_or_chain_factory))
203
218
  return lambda: runnable_
204
219
  try:
205
220
  _model = llm_or_chain_factory() # type: ignore[call-arg]
206
221
  except TypeError:
207
222
  # It's an arbitrary function, wrap it in a RunnableLambda
208
- user_func = cast(Callable, llm_or_chain_factory)
223
+ user_func = cast("Callable", llm_or_chain_factory)
209
224
  sig = inspect.signature(user_func)
210
225
  logger.info("Wrapping function %s as RunnableLambda.", sig)
211
226
  wrapped = RunnableLambda(user_func)
212
227
  return lambda: wrapped
213
- constructor = cast(Callable, llm_or_chain_factory)
228
+ constructor = cast("Callable", llm_or_chain_factory)
214
229
  if isinstance(_model, BaseLanguageModel):
215
230
  # It's not uncommon to do an LLM constructor instead of raw LLM,
216
231
  # so we'll unpack it for the user.
217
232
  return _model
218
- if is_traceable_function(cast(Callable, _model)):
219
- runnable_ = as_runnable(cast(Callable, _model))
233
+ if is_traceable_function(cast("Callable", _model)):
234
+ runnable_ = as_runnable(cast("Callable", _model))
220
235
  return lambda: runnable_
221
236
  if not isinstance(_model, Runnable):
222
237
  # This is unlikely to happen - a constructor for a model function
@@ -1089,7 +1104,7 @@ class _DatasetRunContainer:
1089
1104
  ) -> dict:
1090
1105
  results: dict = {}
1091
1106
  for example, output in zip(self.examples, batch_results):
1092
- row_result = cast(_RowResult, all_eval_results.get(str(example.id), {}))
1107
+ row_result = cast("_RowResult", all_eval_results.get(str(example.id), {}))
1093
1108
  results[str(example.id)] = {
1094
1109
  "input": example.inputs,
1095
1110
  "feedback": row_result.get("feedback", []),
@@ -1116,7 +1131,7 @@ class _DatasetRunContainer:
1116
1131
  result = evaluator(runs_list, self.examples)
1117
1132
  if isinstance(result, EvaluationResult):
1118
1133
  result = result.dict()
1119
- aggregate_feedback.append(cast(dict, result))
1134
+ aggregate_feedback.append(cast("dict", result))
1120
1135
  executor.submit(
1121
1136
  self.client.create_feedback,
1122
1137
  **result,
@@ -1133,7 +1148,7 @@ class _DatasetRunContainer:
1133
1148
  all_eval_results: dict = {}
1134
1149
  all_runs: dict = {}
1135
1150
  for c in self.configs:
1136
- for callback in cast(list, c["callbacks"]):
1151
+ for callback in cast("list", c["callbacks"]):
1137
1152
  if isinstance(callback, EvaluatorCallbackHandler):
1138
1153
  eval_results = callback.logged_eval_results
1139
1154
  for (_, example_id), v in eval_results.items():
@@ -1156,7 +1171,7 @@ class _DatasetRunContainer:
1156
1171
  },
1157
1172
  )
1158
1173
  all_runs[str(callback.example_id)] = run
1159
- return cast(dict[str, _RowResult], all_eval_results), all_runs
1174
+ return cast("dict[str, _RowResult]", all_eval_results), all_runs
1160
1175
 
1161
1176
  def _collect_test_results(
1162
1177
  self,
@@ -1330,6 +1345,114 @@ async def arun_on_dataset(
1330
1345
  revision_id: Optional[str] = None,
1331
1346
  **kwargs: Any,
1332
1347
  ) -> dict[str, Any]:
1348
+ """Run on dataset.
1349
+
1350
+ Run the Chain or language model on a dataset and store traces
1351
+ to the specified project name.
1352
+
1353
+ For the (usually faster) async version of this function,
1354
+ see :func:`arun_on_dataset`.
1355
+
1356
+ Args:
1357
+ dataset_name: Name of the dataset to run the chain on.
1358
+ llm_or_chain_factory: Language model or Chain constructor to run
1359
+ over the dataset. The Chain constructor is used to permit
1360
+ independent calls on each example without carrying over state.
1361
+ evaluation: Configuration for evaluators to run on the
1362
+ results of the chain
1363
+ concurrency_level: The number of async tasks to run concurrently.
1364
+ project_name: Name of the project to store the traces in.
1365
+ Defaults to {dataset_name}-{chain class name}-{datetime}.
1366
+ project_metadata: Optional metadata to add to the project.
1367
+ Useful for storing information the test variant.
1368
+ (prompt version, model version, etc.)
1369
+ client: LangSmith client to use to access the dataset and to
1370
+ log feedback and run traces.
1371
+ verbose: Whether to print progress.
1372
+ tags: Tags to add to each run in the project.
1373
+ revision_id: Optional revision identifier to assign this test run to
1374
+ track the performance of different versions of your system.
1375
+ Returns:
1376
+ A dictionary containing the run's project name and the resulting model outputs.
1377
+
1378
+ Examples:
1379
+
1380
+ .. code-block:: python
1381
+
1382
+ from langsmith import Client
1383
+ from langchain_openai import ChatOpenAI
1384
+ from langchain.chains import LLMChain
1385
+ from langchain.smith import smith_eval.RunEvalConfig, run_on_dataset
1386
+
1387
+ # Chains may have memory. Passing in a constructor function lets the
1388
+ # evaluation framework avoid cross-contamination between runs.
1389
+ def construct_chain():
1390
+ llm = ChatOpenAI(temperature=0)
1391
+ chain = LLMChain.from_string(
1392
+ llm,
1393
+ "What's the answer to {your_input_key}"
1394
+ )
1395
+ return chain
1396
+
1397
+ # Load off-the-shelf evaluators via config or the EvaluatorType (string or enum)
1398
+ evaluation_config = smith_eval.RunEvalConfig(
1399
+ evaluators=[
1400
+ "qa", # "Correctness" against a reference answer
1401
+ "embedding_distance",
1402
+ smith_eval.RunEvalConfig.Criteria("helpfulness"),
1403
+ smith_eval.RunEvalConfig.Criteria({
1404
+ "fifth-grader-score": "Do you have to be smarter than a fifth grader to answer this question?"
1405
+ }),
1406
+ ]
1407
+ )
1408
+
1409
+ client = Client()
1410
+ await arun_on_dataset(
1411
+ client,
1412
+ dataset_name="<my_dataset_name>",
1413
+ llm_or_chain_factory=construct_chain,
1414
+ evaluation=evaluation_config,
1415
+ )
1416
+
1417
+ You can also create custom evaluators by subclassing the
1418
+ :class:`StringEvaluator <langchain.evaluation.schema.StringEvaluator>`
1419
+ or LangSmith's `RunEvaluator` classes.
1420
+
1421
+ .. code-block:: python
1422
+
1423
+ from typing import Optional
1424
+ from langchain.evaluation import StringEvaluator
1425
+
1426
+ class MyStringEvaluator(StringEvaluator):
1427
+
1428
+ @property
1429
+ def requires_input(self) -> bool:
1430
+ return False
1431
+
1432
+ @property
1433
+ def requires_reference(self) -> bool:
1434
+ return True
1435
+
1436
+ @property
1437
+ def evaluation_name(self) -> str:
1438
+ return "exact_match"
1439
+
1440
+ def _evaluate_strings(self, prediction, reference=None, input=None, **kwargs) -> dict:
1441
+ return {"score": prediction == reference}
1442
+
1443
+
1444
+ evaluation_config = smith_eval.RunEvalConfig(
1445
+ custom_evaluators = [MyStringEvaluator()],
1446
+ )
1447
+
1448
+ await arun_on_dataset(
1449
+ client,
1450
+ dataset_name="<my_dataset_name>",
1451
+ llm_or_chain_factory=construct_chain,
1452
+ evaluation=evaluation_config,
1453
+ )
1454
+
1455
+ """ # noqa: E501
1333
1456
  input_mapper = kwargs.pop("input_mapper", None)
1334
1457
  if input_mapper:
1335
1458
  warn_deprecated("0.0.305", message=_INPUT_MAPPER_DEP_WARNING, pending=True)
@@ -1395,6 +1518,114 @@ def run_on_dataset(
1395
1518
  revision_id: Optional[str] = None,
1396
1519
  **kwargs: Any,
1397
1520
  ) -> dict[str, Any]:
1521
+ """Run on dataset.
1522
+
1523
+ Run the Chain or language model on a dataset and store traces
1524
+ to the specified project name.
1525
+
1526
+ For the (usually faster) async version of this function,
1527
+ see :func:`arun_on_dataset`.
1528
+
1529
+ Args:
1530
+ dataset_name: Name of the dataset to run the chain on.
1531
+ llm_or_chain_factory: Language model or Chain constructor to run
1532
+ over the dataset. The Chain constructor is used to permit
1533
+ independent calls on each example without carrying over state.
1534
+ evaluation: Configuration for evaluators to run on the
1535
+ results of the chain
1536
+ concurrency_level: The number of async tasks to run concurrently.
1537
+ project_name: Name of the project to store the traces in.
1538
+ Defaults to {dataset_name}-{chain class name}-{datetime}.
1539
+ project_metadata: Optional metadata to add to the project.
1540
+ Useful for storing information the test variant.
1541
+ (prompt version, model version, etc.)
1542
+ client: LangSmith client to use to access the dataset and to
1543
+ log feedback and run traces.
1544
+ verbose: Whether to print progress.
1545
+ tags: Tags to add to each run in the project.
1546
+ revision_id: Optional revision identifier to assign this test run to
1547
+ track the performance of different versions of your system.
1548
+ Returns:
1549
+ A dictionary containing the run's project name and the resulting model outputs.
1550
+
1551
+ Examples:
1552
+
1553
+ .. code-block:: python
1554
+
1555
+ from langsmith import Client
1556
+ from langchain_openai import ChatOpenAI
1557
+ from langchain.chains import LLMChain
1558
+ from langchain.smith import smith_eval.RunEvalConfig, run_on_dataset
1559
+
1560
+ # Chains may have memory. Passing in a constructor function lets the
1561
+ # evaluation framework avoid cross-contamination between runs.
1562
+ def construct_chain():
1563
+ llm = ChatOpenAI(temperature=0)
1564
+ chain = LLMChain.from_string(
1565
+ llm,
1566
+ "What's the answer to {your_input_key}"
1567
+ )
1568
+ return chain
1569
+
1570
+ # Load off-the-shelf evaluators via config or the EvaluatorType (string or enum)
1571
+ evaluation_config = smith_eval.RunEvalConfig(
1572
+ evaluators=[
1573
+ "qa", # "Correctness" against a reference answer
1574
+ "embedding_distance",
1575
+ smith_eval.RunEvalConfig.Criteria("helpfulness"),
1576
+ smith_eval.RunEvalConfig.Criteria({
1577
+ "fifth-grader-score": "Do you have to be smarter than a fifth grader to answer this question?"
1578
+ }),
1579
+ ]
1580
+ )
1581
+
1582
+ client = Client()
1583
+ run_on_dataset(
1584
+ client,
1585
+ dataset_name="<my_dataset_name>",
1586
+ llm_or_chain_factory=construct_chain,
1587
+ evaluation=evaluation_config,
1588
+ )
1589
+
1590
+ You can also create custom evaluators by subclassing the
1591
+ :class:`StringEvaluator <langchain.evaluation.schema.StringEvaluator>`
1592
+ or LangSmith's `RunEvaluator` classes.
1593
+
1594
+ .. code-block:: python
1595
+
1596
+ from typing import Optional
1597
+ from langchain.evaluation import StringEvaluator
1598
+
1599
+ class MyStringEvaluator(StringEvaluator):
1600
+
1601
+ @property
1602
+ def requires_input(self) -> bool:
1603
+ return False
1604
+
1605
+ @property
1606
+ def requires_reference(self) -> bool:
1607
+ return True
1608
+
1609
+ @property
1610
+ def evaluation_name(self) -> str:
1611
+ return "exact_match"
1612
+
1613
+ def _evaluate_strings(self, prediction, reference=None, input=None, **kwargs) -> dict:
1614
+ return {"score": prediction == reference}
1615
+
1616
+
1617
+ evaluation_config = smith_eval.RunEvalConfig(
1618
+ custom_evaluators = [MyStringEvaluator()],
1619
+ )
1620
+
1621
+ run_on_dataset(
1622
+ client,
1623
+ dataset_name="<my_dataset_name>",
1624
+ llm_or_chain_factory=construct_chain,
1625
+ evaluation=evaluation_config,
1626
+ )
1627
+
1628
+ """ # noqa: E501
1398
1629
  input_mapper = kwargs.pop("input_mapper", None)
1399
1630
  if input_mapper:
1400
1631
  warn_deprecated("0.0.305", message=_INPUT_MAPPER_DEP_WARNING, pending=True)
@@ -1456,117 +1687,3 @@ def run_on_dataset(
1456
1687
  )
1457
1688
 
1458
1689
  return container.finish(batch_results, verbose=verbose)
1459
-
1460
-
1461
- _RUN_ON_DATASET_DOCSTRING = """
1462
- Run the Chain or language model on a dataset and store traces
1463
- to the specified project name.
1464
-
1465
- Args:
1466
- dataset_name: Name of the dataset to run the chain on.
1467
- llm_or_chain_factory: Language model or Chain constructor to run
1468
- over the dataset. The Chain constructor is used to permit
1469
- independent calls on each example without carrying over state.
1470
- evaluation: Configuration for evaluators to run on the
1471
- results of the chain
1472
- concurrency_level: The number of async tasks to run concurrently.
1473
- project_name: Name of the project to store the traces in.
1474
- Defaults to {dataset_name}-{chain class name}-{datetime}.
1475
- project_metadata: Optional metadata to add to the project.
1476
- Useful for storing information the test variant.
1477
- (prompt version, model version, etc.)
1478
- client: LangSmith client to use to access the dataset and to
1479
- log feedback and run traces.
1480
- verbose: Whether to print progress.
1481
- tags: Tags to add to each run in the project.
1482
- revision_id: Optional revision identifier to assign this test run to
1483
- track the performance of different versions of your system.
1484
- Returns:
1485
- A dictionary containing the run's project name and the resulting model outputs.
1486
-
1487
-
1488
- For the (usually faster) async version of this function, see :func:`arun_on_dataset`.
1489
-
1490
- Examples
1491
- --------
1492
-
1493
- .. code-block:: python
1494
-
1495
- from langsmith import Client
1496
- from langchain_openai import ChatOpenAI
1497
- from langchain.chains import LLMChain
1498
- from langchain.smith import smith_eval.RunEvalConfig, run_on_dataset
1499
-
1500
- # Chains may have memory. Passing in a constructor function lets the
1501
- # evaluation framework avoid cross-contamination between runs.
1502
- def construct_chain():
1503
- llm = ChatOpenAI(temperature=0)
1504
- chain = LLMChain.from_string(
1505
- llm,
1506
- "What's the answer to {your_input_key}"
1507
- )
1508
- return chain
1509
-
1510
- # Load off-the-shelf evaluators via config or the EvaluatorType (string or enum)
1511
- evaluation_config = smith_eval.RunEvalConfig(
1512
- evaluators=[
1513
- "qa", # "Correctness" against a reference answer
1514
- "embedding_distance",
1515
- smith_eval.RunEvalConfig.Criteria("helpfulness"),
1516
- smith_eval.RunEvalConfig.Criteria({
1517
- "fifth-grader-score": "Do you have to be smarter than a fifth grader to answer this question?"
1518
- }),
1519
- ]
1520
- )
1521
-
1522
- client = Client()
1523
- run_on_dataset(
1524
- client,
1525
- dataset_name="<my_dataset_name>",
1526
- llm_or_chain_factory=construct_chain,
1527
- evaluation=evaluation_config,
1528
- )
1529
-
1530
- You can also create custom evaluators by subclassing the
1531
- :class:`StringEvaluator <langchain.evaluation.schema.StringEvaluator>`
1532
- or LangSmith's `RunEvaluator` classes.
1533
-
1534
- .. code-block:: python
1535
-
1536
- from typing import Optional
1537
- from langchain.evaluation import StringEvaluator
1538
-
1539
- class MyStringEvaluator(StringEvaluator):
1540
-
1541
- @property
1542
- def requires_input(self) -> bool:
1543
- return False
1544
-
1545
- @property
1546
- def requires_reference(self) -> bool:
1547
- return True
1548
-
1549
- @property
1550
- def evaluation_name(self) -> str:
1551
- return "exact_match"
1552
-
1553
- def _evaluate_strings(self, prediction, reference=None, input=None, **kwargs) -> dict:
1554
- return {"score": prediction == reference}
1555
-
1556
-
1557
- evaluation_config = smith_eval.RunEvalConfig(
1558
- custom_evaluators = [MyStringEvaluator()],
1559
- )
1560
-
1561
- run_on_dataset(
1562
- client,
1563
- dataset_name="<my_dataset_name>",
1564
- llm_or_chain_factory=construct_chain,
1565
- evaluation=evaluation_config,
1566
- )
1567
- """ # noqa: E501
1568
- run_on_dataset.__doc__ = _RUN_ON_DATASET_DOCSTRING
1569
- arun_on_dataset.__doc__ = _RUN_ON_DATASET_DOCSTRING.replace(
1570
- "run_on_dataset(",
1571
- "await arun_on_dataset(",
1572
- )
@@ -16,6 +16,7 @@ from langchain_core.load.serializable import Serializable
16
16
  from langchain_core.messages import BaseMessage, get_buffer_string, messages_from_dict
17
17
  from langsmith import EvaluationResult, RunEvaluator
18
18
  from langsmith.schemas import DataType, Example, Run
19
+ from typing_extensions import override
19
20
 
20
21
  from langchain.chains.base import Chain
21
22
  from langchain.evaluation.schema import StringEvaluator
@@ -70,6 +71,15 @@ class LLMStringRunMapper(StringRunMapper):
70
71
  raise ValueError(msg)
71
72
 
72
73
  def serialize_inputs(self, inputs: dict) -> str:
74
+ """Serialize inputs.
75
+
76
+ Args:
77
+ inputs: The inputs from the run, expected to contain prompts or messages.
78
+ Returns:
79
+ The serialized input text from the prompts or messages.
80
+ Raises:
81
+ ValueError: If neither prompts nor messages are found in the inputs.
82
+ """
73
83
  if "prompts" in inputs: # Should we even accept this?
74
84
  input_ = "\n\n".join(inputs["prompts"])
75
85
  elif "prompt" in inputs:
@@ -82,6 +92,18 @@ class LLMStringRunMapper(StringRunMapper):
82
92
  return input_
83
93
 
84
94
  def serialize_outputs(self, outputs: dict) -> str:
95
+ """Serialize outputs.
96
+
97
+ Args:
98
+ outputs: The outputs from the run, expected to contain generations.
99
+
100
+ Returns:
101
+ The serialized output text from the first generation.
102
+
103
+ Raises:
104
+ ValueError: If no generations are found in the outputs,
105
+ or if the generations are empty.
106
+ """
85
107
  if not outputs.get("generations"):
86
108
  msg = "Cannot evaluate LLM Run without generations."
87
109
  raise ValueError(msg)
@@ -185,6 +207,7 @@ class ChainStringRunMapper(StringRunMapper):
185
207
  class ToolStringRunMapper(StringRunMapper):
186
208
  """Map an input to the tool."""
187
209
 
210
+ @override
188
211
  def map(self, run: Run) -> dict[str, str]:
189
212
  if not run.outputs:
190
213
  msg = f"Run {run.id} has no outputs to evaluate."
@@ -256,10 +279,12 @@ class StringRunEvaluatorChain(Chain, RunEvaluator):
256
279
  """The evaluation chain."""
257
280
 
258
281
  @property
282
+ @override
259
283
  def input_keys(self) -> list[str]:
260
284
  return ["run", "example"]
261
285
 
262
286
  @property
287
+ @override
263
288
  def output_keys(self) -> list[str]:
264
289
  return ["feedback"]
265
290
 
@@ -330,6 +355,7 @@ class StringRunEvaluatorChain(Chain, RunEvaluator):
330
355
  feedback.evaluator_info[RUN_KEY] = output[RUN_KEY]
331
356
  return feedback
332
357
 
358
+ @override
333
359
  def evaluate_run(
334
360
  self,
335
361
  run: Run,
@@ -347,6 +373,7 @@ class StringRunEvaluatorChain(Chain, RunEvaluator):
347
373
  # TODO: Add run ID once we can declare it via callbacks
348
374
  )
349
375
 
376
+ @override
350
377
  async def aevaluate_run(
351
378
  self,
352
379
  run: Run,
@@ -46,6 +46,7 @@ class EncoderBackedStore(BaseStore[K, V]):
46
46
  store.mset([(1, 3.14), (2, 2.718)])
47
47
  values = store.mget([1, 2]) # Retrieves [3.14, 2.718]
48
48
  store.mdelete([1, 2]) # Deletes the keys 1 and 2
49
+
49
50
  """
50
51
 
51
52
  def __init__(
@@ -1,7 +1,7 @@
1
1
  from typing import Any
2
2
 
3
3
 
4
- def __getattr__(name: str = "") -> Any:
4
+ def __getattr__(_: str = "") -> Any:
5
5
  msg = (
6
6
  "This tool has been moved to langchain experiment. "
7
7
  "This tool has access to a python REPL. "
@@ -1,13 +1,13 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: langchain
3
- Version: 0.3.27
3
+ Version: 0.4.0.dev0
4
4
  Summary: Building applications with LLMs through composability
5
5
  License: MIT
6
6
  Project-URL: Source Code, https://github.com/langchain-ai/langchain/tree/master/libs/langchain
7
7
  Project-URL: Release Notes, https://github.com/langchain-ai/langchain/releases?q=tag%3A%22langchain%3D%3D0%22&expanded=true
8
8
  Project-URL: repository, https://github.com/langchain-ai/langchain
9
9
  Requires-Python: <4.0,>=3.9
10
- Requires-Dist: langchain-core<1.0.0,>=0.3.72
10
+ Requires-Dist: langchain-core<1.0.0,>=0.4.0.dev0
11
11
  Requires-Dist: langchain-text-splitters<1.0.0,>=0.3.9
12
12
  Requires-Dist: langsmith>=0.1.17
13
13
  Requires-Dist: pydantic<3.0.0,>=2.7.4
@@ -21,12 +21,6 @@ Provides-Extra: anthropic
21
21
  Requires-Dist: langchain-anthropic; extra == "anthropic"
22
22
  Provides-Extra: openai
23
23
  Requires-Dist: langchain-openai; extra == "openai"
24
- Provides-Extra: azure-ai
25
- Requires-Dist: langchain-azure-ai; extra == "azure-ai"
26
- Provides-Extra: cohere
27
- Requires-Dist: langchain-cohere; extra == "cohere"
28
- Provides-Extra: google-vertexai
29
- Requires-Dist: langchain-google-vertexai; extra == "google-vertexai"
30
24
  Provides-Extra: google-genai
31
25
  Requires-Dist: langchain-google-genai; extra == "google-genai"
32
26
  Provides-Extra: fireworks
@@ -41,12 +35,8 @@ Provides-Extra: huggingface
41
35
  Requires-Dist: langchain-huggingface; extra == "huggingface"
42
36
  Provides-Extra: groq
43
37
  Requires-Dist: langchain-groq; extra == "groq"
44
- Provides-Extra: aws
45
- Requires-Dist: langchain-aws; extra == "aws"
46
38
  Provides-Extra: deepseek
47
39
  Requires-Dist: langchain-deepseek; extra == "deepseek"
48
- Provides-Extra: xai
49
- Requires-Dist: langchain-xai; extra == "xai"
50
40
  Provides-Extra: perplexity
51
41
  Requires-Dist: langchain-perplexity; extra == "perplexity"
52
42
  Description-Content-Type: text/markdown