langchain 0.3.26__py3-none-any.whl → 0.4.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (591) hide show
  1. langchain/__init__.py +110 -96
  2. langchain/_api/__init__.py +2 -2
  3. langchain/_api/deprecation.py +3 -3
  4. langchain/_api/module_import.py +51 -46
  5. langchain/_api/path.py +1 -1
  6. langchain/adapters/openai.py +8 -8
  7. langchain/agents/__init__.py +15 -12
  8. langchain/agents/agent.py +174 -151
  9. langchain/agents/agent_iterator.py +50 -26
  10. langchain/agents/agent_toolkits/__init__.py +7 -6
  11. langchain/agents/agent_toolkits/ainetwork/toolkit.py +1 -1
  12. langchain/agents/agent_toolkits/amadeus/toolkit.py +1 -1
  13. langchain/agents/agent_toolkits/azure_cognitive_services.py +1 -1
  14. langchain/agents/agent_toolkits/clickup/toolkit.py +1 -1
  15. langchain/agents/agent_toolkits/conversational_retrieval/openai_functions.py +6 -4
  16. langchain/agents/agent_toolkits/csv/__init__.py +4 -2
  17. langchain/agents/agent_toolkits/file_management/__init__.py +1 -1
  18. langchain/agents/agent_toolkits/file_management/toolkit.py +1 -1
  19. langchain/agents/agent_toolkits/github/toolkit.py +9 -9
  20. langchain/agents/agent_toolkits/gitlab/toolkit.py +1 -1
  21. langchain/agents/agent_toolkits/json/base.py +1 -1
  22. langchain/agents/agent_toolkits/multion/toolkit.py +1 -1
  23. langchain/agents/agent_toolkits/office365/toolkit.py +1 -1
  24. langchain/agents/agent_toolkits/openapi/base.py +1 -1
  25. langchain/agents/agent_toolkits/openapi/planner.py +2 -2
  26. langchain/agents/agent_toolkits/openapi/planner_prompt.py +10 -10
  27. langchain/agents/agent_toolkits/openapi/prompt.py +1 -1
  28. langchain/agents/agent_toolkits/openapi/toolkit.py +1 -1
  29. langchain/agents/agent_toolkits/pandas/__init__.py +4 -2
  30. langchain/agents/agent_toolkits/playwright/__init__.py +1 -1
  31. langchain/agents/agent_toolkits/playwright/toolkit.py +1 -1
  32. langchain/agents/agent_toolkits/powerbi/base.py +1 -1
  33. langchain/agents/agent_toolkits/powerbi/chat_base.py +1 -1
  34. langchain/agents/agent_toolkits/powerbi/prompt.py +2 -2
  35. langchain/agents/agent_toolkits/powerbi/toolkit.py +1 -1
  36. langchain/agents/agent_toolkits/python/__init__.py +4 -2
  37. langchain/agents/agent_toolkits/spark/__init__.py +4 -2
  38. langchain/agents/agent_toolkits/spark_sql/base.py +1 -1
  39. langchain/agents/agent_toolkits/spark_sql/toolkit.py +1 -1
  40. langchain/agents/agent_toolkits/sql/prompt.py +1 -1
  41. langchain/agents/agent_toolkits/sql/toolkit.py +1 -1
  42. langchain/agents/agent_toolkits/vectorstore/base.py +4 -2
  43. langchain/agents/agent_toolkits/vectorstore/prompt.py +2 -4
  44. langchain/agents/agent_toolkits/vectorstore/toolkit.py +12 -11
  45. langchain/agents/agent_toolkits/xorbits/__init__.py +4 -2
  46. langchain/agents/agent_toolkits/zapier/toolkit.py +1 -1
  47. langchain/agents/agent_types.py +6 -6
  48. langchain/agents/chat/base.py +8 -12
  49. langchain/agents/chat/output_parser.py +9 -6
  50. langchain/agents/chat/prompt.py +3 -4
  51. langchain/agents/conversational/base.py +11 -5
  52. langchain/agents/conversational/output_parser.py +4 -2
  53. langchain/agents/conversational/prompt.py +2 -3
  54. langchain/agents/conversational_chat/base.py +9 -5
  55. langchain/agents/conversational_chat/output_parser.py +9 -11
  56. langchain/agents/conversational_chat/prompt.py +5 -6
  57. langchain/agents/format_scratchpad/__init__.py +3 -3
  58. langchain/agents/format_scratchpad/log_to_messages.py +1 -1
  59. langchain/agents/format_scratchpad/openai_functions.py +8 -6
  60. langchain/agents/format_scratchpad/tools.py +5 -3
  61. langchain/agents/format_scratchpad/xml.py +33 -2
  62. langchain/agents/initialize.py +17 -9
  63. langchain/agents/json_chat/base.py +19 -18
  64. langchain/agents/json_chat/prompt.py +2 -3
  65. langchain/agents/load_tools.py +2 -1
  66. langchain/agents/loading.py +28 -18
  67. langchain/agents/mrkl/base.py +11 -4
  68. langchain/agents/mrkl/output_parser.py +17 -13
  69. langchain/agents/mrkl/prompt.py +1 -2
  70. langchain/agents/openai_assistant/base.py +81 -71
  71. langchain/agents/openai_functions_agent/agent_token_buffer_memory.py +2 -0
  72. langchain/agents/openai_functions_agent/base.py +47 -37
  73. langchain/agents/openai_functions_multi_agent/base.py +40 -27
  74. langchain/agents/openai_tools/base.py +9 -8
  75. langchain/agents/output_parsers/__init__.py +3 -3
  76. langchain/agents/output_parsers/json.py +8 -6
  77. langchain/agents/output_parsers/openai_functions.py +24 -9
  78. langchain/agents/output_parsers/openai_tools.py +16 -4
  79. langchain/agents/output_parsers/react_json_single_input.py +13 -5
  80. langchain/agents/output_parsers/react_single_input.py +18 -11
  81. langchain/agents/output_parsers/self_ask.py +5 -2
  82. langchain/agents/output_parsers/tools.py +32 -13
  83. langchain/agents/output_parsers/xml.py +102 -28
  84. langchain/agents/react/agent.py +5 -4
  85. langchain/agents/react/base.py +26 -17
  86. langchain/agents/react/output_parser.py +7 -6
  87. langchain/agents/react/textworld_prompt.py +0 -1
  88. langchain/agents/react/wiki_prompt.py +14 -15
  89. langchain/agents/schema.py +5 -2
  90. langchain/agents/self_ask_with_search/base.py +23 -15
  91. langchain/agents/self_ask_with_search/prompt.py +0 -1
  92. langchain/agents/structured_chat/base.py +19 -11
  93. langchain/agents/structured_chat/output_parser.py +29 -18
  94. langchain/agents/structured_chat/prompt.py +3 -4
  95. langchain/agents/tool_calling_agent/base.py +8 -6
  96. langchain/agents/tools.py +5 -2
  97. langchain/agents/utils.py +2 -3
  98. langchain/agents/xml/base.py +12 -6
  99. langchain/agents/xml/prompt.py +1 -2
  100. langchain/cache.py +12 -12
  101. langchain/callbacks/__init__.py +11 -11
  102. langchain/callbacks/aim_callback.py +2 -2
  103. langchain/callbacks/argilla_callback.py +1 -1
  104. langchain/callbacks/arize_callback.py +1 -1
  105. langchain/callbacks/arthur_callback.py +1 -1
  106. langchain/callbacks/base.py +7 -7
  107. langchain/callbacks/clearml_callback.py +1 -1
  108. langchain/callbacks/comet_ml_callback.py +1 -1
  109. langchain/callbacks/confident_callback.py +1 -1
  110. langchain/callbacks/context_callback.py +1 -1
  111. langchain/callbacks/flyte_callback.py +1 -1
  112. langchain/callbacks/human.py +2 -2
  113. langchain/callbacks/infino_callback.py +1 -1
  114. langchain/callbacks/labelstudio_callback.py +1 -1
  115. langchain/callbacks/llmonitor_callback.py +1 -1
  116. langchain/callbacks/manager.py +5 -5
  117. langchain/callbacks/mlflow_callback.py +2 -2
  118. langchain/callbacks/openai_info.py +1 -1
  119. langchain/callbacks/promptlayer_callback.py +1 -1
  120. langchain/callbacks/sagemaker_callback.py +1 -1
  121. langchain/callbacks/streaming_aiter.py +17 -3
  122. langchain/callbacks/streaming_aiter_final_only.py +16 -5
  123. langchain/callbacks/streaming_stdout_final_only.py +10 -3
  124. langchain/callbacks/streamlit/__init__.py +3 -2
  125. langchain/callbacks/streamlit/mutable_expander.py +1 -1
  126. langchain/callbacks/streamlit/streamlit_callback_handler.py +3 -3
  127. langchain/callbacks/tracers/__init__.py +1 -1
  128. langchain/callbacks/tracers/comet.py +1 -1
  129. langchain/callbacks/tracers/evaluation.py +1 -1
  130. langchain/callbacks/tracers/log_stream.py +1 -1
  131. langchain/callbacks/tracers/logging.py +12 -1
  132. langchain/callbacks/tracers/stdout.py +1 -1
  133. langchain/callbacks/trubrics_callback.py +1 -1
  134. langchain/callbacks/utils.py +4 -4
  135. langchain/callbacks/wandb_callback.py +1 -1
  136. langchain/callbacks/whylabs_callback.py +1 -1
  137. langchain/chains/api/base.py +41 -23
  138. langchain/chains/api/news_docs.py +1 -2
  139. langchain/chains/api/open_meteo_docs.py +1 -2
  140. langchain/chains/api/openapi/requests_chain.py +1 -1
  141. langchain/chains/api/openapi/response_chain.py +1 -1
  142. langchain/chains/api/podcast_docs.py +1 -2
  143. langchain/chains/api/prompt.py +1 -2
  144. langchain/chains/api/tmdb_docs.py +1 -2
  145. langchain/chains/base.py +96 -56
  146. langchain/chains/chat_vector_db/prompts.py +2 -3
  147. langchain/chains/combine_documents/__init__.py +1 -1
  148. langchain/chains/combine_documents/base.py +30 -11
  149. langchain/chains/combine_documents/map_reduce.py +41 -30
  150. langchain/chains/combine_documents/map_rerank.py +39 -24
  151. langchain/chains/combine_documents/reduce.py +48 -26
  152. langchain/chains/combine_documents/refine.py +27 -17
  153. langchain/chains/combine_documents/stuff.py +24 -13
  154. langchain/chains/constitutional_ai/base.py +11 -4
  155. langchain/chains/constitutional_ai/principles.py +22 -25
  156. langchain/chains/constitutional_ai/prompts.py +25 -28
  157. langchain/chains/conversation/base.py +9 -4
  158. langchain/chains/conversation/memory.py +5 -5
  159. langchain/chains/conversation/prompt.py +5 -5
  160. langchain/chains/conversational_retrieval/base.py +108 -79
  161. langchain/chains/conversational_retrieval/prompts.py +2 -3
  162. langchain/chains/elasticsearch_database/base.py +10 -10
  163. langchain/chains/elasticsearch_database/prompts.py +2 -3
  164. langchain/chains/ernie_functions/__init__.py +2 -2
  165. langchain/chains/example_generator.py +3 -1
  166. langchain/chains/flare/base.py +28 -12
  167. langchain/chains/flare/prompts.py +2 -0
  168. langchain/chains/graph_qa/cypher.py +2 -2
  169. langchain/chains/graph_qa/falkordb.py +1 -1
  170. langchain/chains/graph_qa/gremlin.py +1 -1
  171. langchain/chains/graph_qa/neptune_sparql.py +1 -1
  172. langchain/chains/graph_qa/prompts.py +2 -2
  173. langchain/chains/history_aware_retriever.py +2 -1
  174. langchain/chains/hyde/base.py +6 -5
  175. langchain/chains/hyde/prompts.py +5 -6
  176. langchain/chains/llm.py +82 -61
  177. langchain/chains/llm_bash/__init__.py +3 -2
  178. langchain/chains/llm_checker/base.py +19 -6
  179. langchain/chains/llm_checker/prompt.py +3 -4
  180. langchain/chains/llm_math/base.py +25 -10
  181. langchain/chains/llm_math/prompt.py +1 -2
  182. langchain/chains/llm_summarization_checker/base.py +22 -7
  183. langchain/chains/llm_symbolic_math/__init__.py +3 -2
  184. langchain/chains/loading.py +155 -97
  185. langchain/chains/mapreduce.py +4 -3
  186. langchain/chains/moderation.py +11 -9
  187. langchain/chains/natbot/base.py +11 -9
  188. langchain/chains/natbot/crawler.py +102 -76
  189. langchain/chains/natbot/prompt.py +2 -3
  190. langchain/chains/openai_functions/__init__.py +7 -7
  191. langchain/chains/openai_functions/base.py +15 -10
  192. langchain/chains/openai_functions/citation_fuzzy_match.py +21 -11
  193. langchain/chains/openai_functions/extraction.py +19 -19
  194. langchain/chains/openai_functions/openapi.py +39 -35
  195. langchain/chains/openai_functions/qa_with_structure.py +22 -15
  196. langchain/chains/openai_functions/tagging.py +4 -4
  197. langchain/chains/openai_tools/extraction.py +7 -8
  198. langchain/chains/qa_generation/base.py +8 -3
  199. langchain/chains/qa_generation/prompt.py +5 -5
  200. langchain/chains/qa_with_sources/base.py +17 -6
  201. langchain/chains/qa_with_sources/loading.py +16 -8
  202. langchain/chains/qa_with_sources/map_reduce_prompt.py +8 -9
  203. langchain/chains/qa_with_sources/refine_prompts.py +0 -1
  204. langchain/chains/qa_with_sources/retrieval.py +15 -6
  205. langchain/chains/qa_with_sources/stuff_prompt.py +6 -7
  206. langchain/chains/qa_with_sources/vector_db.py +21 -8
  207. langchain/chains/query_constructor/base.py +37 -34
  208. langchain/chains/query_constructor/ir.py +4 -4
  209. langchain/chains/query_constructor/parser.py +101 -34
  210. langchain/chains/query_constructor/prompt.py +5 -6
  211. langchain/chains/question_answering/chain.py +21 -10
  212. langchain/chains/question_answering/map_reduce_prompt.py +14 -14
  213. langchain/chains/question_answering/map_rerank_prompt.py +3 -3
  214. langchain/chains/question_answering/refine_prompts.py +2 -5
  215. langchain/chains/question_answering/stuff_prompt.py +5 -5
  216. langchain/chains/retrieval.py +1 -3
  217. langchain/chains/retrieval_qa/base.py +38 -27
  218. langchain/chains/retrieval_qa/prompt.py +1 -2
  219. langchain/chains/router/__init__.py +3 -3
  220. langchain/chains/router/base.py +38 -22
  221. langchain/chains/router/embedding_router.py +15 -8
  222. langchain/chains/router/llm_router.py +23 -20
  223. langchain/chains/router/multi_prompt.py +5 -2
  224. langchain/chains/router/multi_retrieval_qa.py +28 -5
  225. langchain/chains/sequential.py +30 -18
  226. langchain/chains/sql_database/prompt.py +14 -16
  227. langchain/chains/sql_database/query.py +7 -5
  228. langchain/chains/structured_output/__init__.py +1 -1
  229. langchain/chains/structured_output/base.py +77 -67
  230. langchain/chains/summarize/chain.py +11 -5
  231. langchain/chains/summarize/map_reduce_prompt.py +0 -1
  232. langchain/chains/summarize/stuff_prompt.py +0 -1
  233. langchain/chains/transform.py +9 -6
  234. langchain/chat_loaders/facebook_messenger.py +1 -1
  235. langchain/chat_loaders/langsmith.py +1 -1
  236. langchain/chat_loaders/utils.py +3 -3
  237. langchain/chat_models/__init__.py +20 -19
  238. langchain/chat_models/anthropic.py +1 -1
  239. langchain/chat_models/azureml_endpoint.py +1 -1
  240. langchain/chat_models/baidu_qianfan_endpoint.py +1 -1
  241. langchain/chat_models/base.py +213 -139
  242. langchain/chat_models/bedrock.py +1 -1
  243. langchain/chat_models/fake.py +1 -1
  244. langchain/chat_models/meta.py +1 -1
  245. langchain/chat_models/pai_eas_endpoint.py +1 -1
  246. langchain/chat_models/promptlayer_openai.py +1 -1
  247. langchain/chat_models/volcengine_maas.py +1 -1
  248. langchain/docstore/base.py +1 -1
  249. langchain/document_loaders/__init__.py +9 -9
  250. langchain/document_loaders/airbyte.py +3 -3
  251. langchain/document_loaders/assemblyai.py +1 -1
  252. langchain/document_loaders/azure_blob_storage_container.py +1 -1
  253. langchain/document_loaders/azure_blob_storage_file.py +1 -1
  254. langchain/document_loaders/baiducloud_bos_file.py +1 -1
  255. langchain/document_loaders/base.py +1 -1
  256. langchain/document_loaders/blob_loaders/__init__.py +1 -1
  257. langchain/document_loaders/blob_loaders/schema.py +1 -4
  258. langchain/document_loaders/blockchain.py +1 -1
  259. langchain/document_loaders/chatgpt.py +1 -1
  260. langchain/document_loaders/college_confidential.py +1 -1
  261. langchain/document_loaders/confluence.py +1 -1
  262. langchain/document_loaders/email.py +1 -1
  263. langchain/document_loaders/facebook_chat.py +1 -1
  264. langchain/document_loaders/markdown.py +1 -1
  265. langchain/document_loaders/notebook.py +1 -1
  266. langchain/document_loaders/org_mode.py +1 -1
  267. langchain/document_loaders/parsers/__init__.py +1 -1
  268. langchain/document_loaders/parsers/docai.py +1 -1
  269. langchain/document_loaders/parsers/generic.py +1 -1
  270. langchain/document_loaders/parsers/html/__init__.py +1 -1
  271. langchain/document_loaders/parsers/html/bs4.py +1 -1
  272. langchain/document_loaders/parsers/language/cobol.py +1 -1
  273. langchain/document_loaders/parsers/language/python.py +1 -1
  274. langchain/document_loaders/parsers/msword.py +1 -1
  275. langchain/document_loaders/parsers/pdf.py +5 -5
  276. langchain/document_loaders/parsers/registry.py +1 -1
  277. langchain/document_loaders/pdf.py +8 -8
  278. langchain/document_loaders/powerpoint.py +1 -1
  279. langchain/document_loaders/pyspark_dataframe.py +1 -1
  280. langchain/document_loaders/telegram.py +2 -2
  281. langchain/document_loaders/tencent_cos_directory.py +1 -1
  282. langchain/document_loaders/unstructured.py +5 -5
  283. langchain/document_loaders/url_playwright.py +1 -1
  284. langchain/document_loaders/whatsapp_chat.py +1 -1
  285. langchain/document_loaders/youtube.py +2 -2
  286. langchain/document_transformers/__init__.py +3 -3
  287. langchain/document_transformers/beautiful_soup_transformer.py +1 -1
  288. langchain/document_transformers/doctran_text_extract.py +1 -1
  289. langchain/document_transformers/doctran_text_qa.py +1 -1
  290. langchain/document_transformers/doctran_text_translate.py +1 -1
  291. langchain/document_transformers/embeddings_redundant_filter.py +3 -3
  292. langchain/document_transformers/google_translate.py +1 -1
  293. langchain/document_transformers/html2text.py +1 -1
  294. langchain/document_transformers/nuclia_text_transform.py +1 -1
  295. langchain/embeddings/__init__.py +5 -5
  296. langchain/embeddings/base.py +35 -24
  297. langchain/embeddings/cache.py +37 -32
  298. langchain/embeddings/fake.py +1 -1
  299. langchain/embeddings/huggingface.py +2 -2
  300. langchain/evaluation/__init__.py +22 -22
  301. langchain/evaluation/agents/trajectory_eval_chain.py +26 -25
  302. langchain/evaluation/agents/trajectory_eval_prompt.py +6 -9
  303. langchain/evaluation/comparison/__init__.py +1 -1
  304. langchain/evaluation/comparison/eval_chain.py +21 -13
  305. langchain/evaluation/comparison/prompt.py +1 -2
  306. langchain/evaluation/criteria/__init__.py +1 -1
  307. langchain/evaluation/criteria/eval_chain.py +23 -11
  308. langchain/evaluation/criteria/prompt.py +2 -3
  309. langchain/evaluation/embedding_distance/base.py +34 -20
  310. langchain/evaluation/exact_match/base.py +14 -1
  311. langchain/evaluation/loading.py +16 -11
  312. langchain/evaluation/parsing/base.py +20 -4
  313. langchain/evaluation/parsing/json_distance.py +24 -10
  314. langchain/evaluation/parsing/json_schema.py +13 -12
  315. langchain/evaluation/qa/__init__.py +1 -1
  316. langchain/evaluation/qa/eval_chain.py +20 -5
  317. langchain/evaluation/qa/eval_prompt.py +7 -8
  318. langchain/evaluation/qa/generate_chain.py +4 -1
  319. langchain/evaluation/qa/generate_prompt.py +2 -4
  320. langchain/evaluation/regex_match/base.py +9 -1
  321. langchain/evaluation/schema.py +38 -30
  322. langchain/evaluation/scoring/__init__.py +1 -1
  323. langchain/evaluation/scoring/eval_chain.py +23 -15
  324. langchain/evaluation/scoring/prompt.py +0 -1
  325. langchain/evaluation/string_distance/base.py +20 -9
  326. langchain/globals.py +12 -11
  327. langchain/graphs/__init__.py +6 -6
  328. langchain/graphs/graph_document.py +1 -1
  329. langchain/graphs/networkx_graph.py +2 -2
  330. langchain/hub.py +9 -11
  331. langchain/indexes/__init__.py +3 -3
  332. langchain/indexes/_sql_record_manager.py +63 -46
  333. langchain/indexes/prompts/entity_extraction.py +1 -2
  334. langchain/indexes/prompts/entity_summarization.py +1 -2
  335. langchain/indexes/prompts/knowledge_triplet_extraction.py +1 -3
  336. langchain/indexes/vectorstore.py +35 -19
  337. langchain/llms/__init__.py +13 -13
  338. langchain/llms/ai21.py +1 -1
  339. langchain/llms/azureml_endpoint.py +4 -4
  340. langchain/llms/base.py +15 -7
  341. langchain/llms/bedrock.py +1 -1
  342. langchain/llms/cloudflare_workersai.py +1 -1
  343. langchain/llms/gradient_ai.py +1 -1
  344. langchain/llms/loading.py +1 -1
  345. langchain/llms/openai.py +1 -1
  346. langchain/llms/sagemaker_endpoint.py +1 -1
  347. langchain/load/dump.py +1 -1
  348. langchain/load/load.py +1 -1
  349. langchain/load/serializable.py +3 -3
  350. langchain/memory/__init__.py +3 -3
  351. langchain/memory/buffer.py +14 -7
  352. langchain/memory/buffer_window.py +2 -0
  353. langchain/memory/chat_memory.py +14 -8
  354. langchain/memory/chat_message_histories/__init__.py +1 -1
  355. langchain/memory/chat_message_histories/astradb.py +1 -1
  356. langchain/memory/chat_message_histories/cassandra.py +1 -1
  357. langchain/memory/chat_message_histories/cosmos_db.py +1 -1
  358. langchain/memory/chat_message_histories/dynamodb.py +1 -1
  359. langchain/memory/chat_message_histories/elasticsearch.py +1 -1
  360. langchain/memory/chat_message_histories/file.py +1 -1
  361. langchain/memory/chat_message_histories/firestore.py +1 -1
  362. langchain/memory/chat_message_histories/momento.py +1 -1
  363. langchain/memory/chat_message_histories/mongodb.py +1 -1
  364. langchain/memory/chat_message_histories/neo4j.py +1 -1
  365. langchain/memory/chat_message_histories/postgres.py +1 -1
  366. langchain/memory/chat_message_histories/redis.py +1 -1
  367. langchain/memory/chat_message_histories/rocksetdb.py +1 -1
  368. langchain/memory/chat_message_histories/singlestoredb.py +1 -1
  369. langchain/memory/chat_message_histories/streamlit.py +1 -1
  370. langchain/memory/chat_message_histories/upstash_redis.py +1 -1
  371. langchain/memory/chat_message_histories/xata.py +1 -1
  372. langchain/memory/chat_message_histories/zep.py +1 -1
  373. langchain/memory/combined.py +14 -13
  374. langchain/memory/entity.py +131 -61
  375. langchain/memory/prompt.py +10 -11
  376. langchain/memory/readonly.py +0 -2
  377. langchain/memory/simple.py +4 -3
  378. langchain/memory/summary.py +43 -11
  379. langchain/memory/summary_buffer.py +20 -8
  380. langchain/memory/token_buffer.py +2 -0
  381. langchain/memory/utils.py +3 -2
  382. langchain/memory/vectorstore.py +12 -5
  383. langchain/memory/vectorstore_token_buffer_memory.py +5 -5
  384. langchain/model_laboratory.py +12 -11
  385. langchain/output_parsers/__init__.py +4 -4
  386. langchain/output_parsers/boolean.py +7 -4
  387. langchain/output_parsers/combining.py +14 -7
  388. langchain/output_parsers/datetime.py +32 -31
  389. langchain/output_parsers/enum.py +10 -4
  390. langchain/output_parsers/fix.py +60 -53
  391. langchain/output_parsers/format_instructions.py +6 -8
  392. langchain/output_parsers/json.py +2 -2
  393. langchain/output_parsers/list.py +2 -2
  394. langchain/output_parsers/loading.py +9 -9
  395. langchain/output_parsers/openai_functions.py +3 -3
  396. langchain/output_parsers/openai_tools.py +1 -1
  397. langchain/output_parsers/pandas_dataframe.py +59 -48
  398. langchain/output_parsers/prompts.py +1 -2
  399. langchain/output_parsers/rail_parser.py +1 -1
  400. langchain/output_parsers/regex.py +9 -8
  401. langchain/output_parsers/regex_dict.py +7 -10
  402. langchain/output_parsers/retry.py +99 -80
  403. langchain/output_parsers/structured.py +21 -6
  404. langchain/output_parsers/yaml.py +19 -11
  405. langchain/prompts/__init__.py +5 -3
  406. langchain/prompts/base.py +5 -5
  407. langchain/prompts/chat.py +8 -8
  408. langchain/prompts/example_selector/__init__.py +3 -1
  409. langchain/prompts/example_selector/semantic_similarity.py +2 -2
  410. langchain/prompts/few_shot.py +1 -1
  411. langchain/prompts/loading.py +3 -3
  412. langchain/prompts/prompt.py +1 -1
  413. langchain/pydantic_v1/__init__.py +1 -1
  414. langchain/retrievers/__init__.py +5 -5
  415. langchain/retrievers/bedrock.py +2 -2
  416. langchain/retrievers/bm25.py +1 -1
  417. langchain/retrievers/contextual_compression.py +14 -8
  418. langchain/retrievers/docarray.py +1 -1
  419. langchain/retrievers/document_compressors/__init__.py +5 -4
  420. langchain/retrievers/document_compressors/base.py +12 -6
  421. langchain/retrievers/document_compressors/chain_extract.py +5 -3
  422. langchain/retrievers/document_compressors/chain_extract_prompt.py +2 -3
  423. langchain/retrievers/document_compressors/chain_filter.py +9 -9
  424. langchain/retrievers/document_compressors/chain_filter_prompt.py +1 -2
  425. langchain/retrievers/document_compressors/cohere_rerank.py +17 -15
  426. langchain/retrievers/document_compressors/cross_encoder_rerank.py +2 -0
  427. langchain/retrievers/document_compressors/embeddings_filter.py +24 -17
  428. langchain/retrievers/document_compressors/flashrank_rerank.py +1 -1
  429. langchain/retrievers/document_compressors/listwise_rerank.py +8 -5
  430. langchain/retrievers/ensemble.py +30 -27
  431. langchain/retrievers/google_cloud_documentai_warehouse.py +1 -1
  432. langchain/retrievers/google_vertex_ai_search.py +2 -2
  433. langchain/retrievers/kendra.py +10 -10
  434. langchain/retrievers/llama_index.py +1 -1
  435. langchain/retrievers/merger_retriever.py +11 -11
  436. langchain/retrievers/milvus.py +1 -1
  437. langchain/retrievers/multi_query.py +35 -27
  438. langchain/retrievers/multi_vector.py +24 -9
  439. langchain/retrievers/parent_document_retriever.py +33 -9
  440. langchain/retrievers/re_phraser.py +6 -5
  441. langchain/retrievers/self_query/base.py +157 -127
  442. langchain/retrievers/time_weighted_retriever.py +21 -7
  443. langchain/retrievers/zilliz.py +1 -1
  444. langchain/runnables/hub.py +12 -0
  445. langchain/runnables/openai_functions.py +12 -2
  446. langchain/schema/__init__.py +23 -23
  447. langchain/schema/cache.py +1 -1
  448. langchain/schema/callbacks/base.py +7 -7
  449. langchain/schema/callbacks/manager.py +19 -19
  450. langchain/schema/callbacks/tracers/base.py +1 -1
  451. langchain/schema/callbacks/tracers/evaluation.py +1 -1
  452. langchain/schema/callbacks/tracers/langchain.py +1 -1
  453. langchain/schema/callbacks/tracers/langchain_v1.py +1 -1
  454. langchain/schema/callbacks/tracers/log_stream.py +1 -1
  455. langchain/schema/callbacks/tracers/schemas.py +8 -8
  456. langchain/schema/callbacks/tracers/stdout.py +3 -3
  457. langchain/schema/document.py +1 -1
  458. langchain/schema/language_model.py +2 -2
  459. langchain/schema/messages.py +12 -12
  460. langchain/schema/output.py +3 -3
  461. langchain/schema/output_parser.py +3 -3
  462. langchain/schema/runnable/__init__.py +3 -3
  463. langchain/schema/runnable/base.py +9 -9
  464. langchain/schema/runnable/config.py +5 -5
  465. langchain/schema/runnable/configurable.py +1 -1
  466. langchain/schema/runnable/history.py +1 -1
  467. langchain/schema/runnable/passthrough.py +1 -1
  468. langchain/schema/runnable/utils.py +16 -16
  469. langchain/schema/vectorstore.py +1 -1
  470. langchain/smith/__init__.py +2 -1
  471. langchain/smith/evaluation/__init__.py +2 -2
  472. langchain/smith/evaluation/config.py +9 -23
  473. langchain/smith/evaluation/name_generation.py +3 -3
  474. langchain/smith/evaluation/progress.py +22 -4
  475. langchain/smith/evaluation/runner_utils.py +416 -247
  476. langchain/smith/evaluation/string_run_evaluator.py +102 -68
  477. langchain/storage/__init__.py +2 -2
  478. langchain/storage/_lc_store.py +4 -2
  479. langchain/storage/encoder_backed.py +7 -2
  480. langchain/storage/file_system.py +19 -16
  481. langchain/storage/in_memory.py +1 -1
  482. langchain/storage/upstash_redis.py +1 -1
  483. langchain/text_splitter.py +15 -15
  484. langchain/tools/__init__.py +28 -26
  485. langchain/tools/ainetwork/app.py +1 -1
  486. langchain/tools/ainetwork/base.py +1 -1
  487. langchain/tools/ainetwork/owner.py +1 -1
  488. langchain/tools/ainetwork/rule.py +1 -1
  489. langchain/tools/ainetwork/transfer.py +1 -1
  490. langchain/tools/ainetwork/value.py +1 -1
  491. langchain/tools/amadeus/closest_airport.py +1 -1
  492. langchain/tools/amadeus/flight_search.py +1 -1
  493. langchain/tools/azure_cognitive_services/__init__.py +1 -1
  494. langchain/tools/base.py +4 -4
  495. langchain/tools/bearly/tool.py +1 -1
  496. langchain/tools/bing_search/__init__.py +1 -1
  497. langchain/tools/bing_search/tool.py +1 -1
  498. langchain/tools/dataforseo_api_search/__init__.py +1 -1
  499. langchain/tools/dataforseo_api_search/tool.py +1 -1
  500. langchain/tools/ddg_search/tool.py +1 -1
  501. langchain/tools/e2b_data_analysis/tool.py +2 -2
  502. langchain/tools/edenai/__init__.py +1 -1
  503. langchain/tools/file_management/__init__.py +1 -1
  504. langchain/tools/file_management/copy.py +1 -1
  505. langchain/tools/file_management/delete.py +1 -1
  506. langchain/tools/gmail/__init__.py +2 -2
  507. langchain/tools/gmail/get_message.py +1 -1
  508. langchain/tools/gmail/search.py +1 -1
  509. langchain/tools/gmail/send_message.py +1 -1
  510. langchain/tools/google_finance/__init__.py +1 -1
  511. langchain/tools/google_finance/tool.py +1 -1
  512. langchain/tools/google_scholar/__init__.py +1 -1
  513. langchain/tools/google_scholar/tool.py +1 -1
  514. langchain/tools/google_search/__init__.py +1 -1
  515. langchain/tools/google_search/tool.py +1 -1
  516. langchain/tools/google_serper/__init__.py +1 -1
  517. langchain/tools/google_serper/tool.py +1 -1
  518. langchain/tools/google_trends/__init__.py +1 -1
  519. langchain/tools/google_trends/tool.py +1 -1
  520. langchain/tools/jira/tool.py +20 -1
  521. langchain/tools/json/tool.py +25 -3
  522. langchain/tools/memorize/tool.py +1 -1
  523. langchain/tools/multion/__init__.py +1 -1
  524. langchain/tools/multion/update_session.py +1 -1
  525. langchain/tools/office365/__init__.py +2 -2
  526. langchain/tools/office365/events_search.py +1 -1
  527. langchain/tools/office365/messages_search.py +1 -1
  528. langchain/tools/office365/send_event.py +1 -1
  529. langchain/tools/office365/send_message.py +1 -1
  530. langchain/tools/openapi/utils/api_models.py +6 -6
  531. langchain/tools/playwright/__init__.py +5 -5
  532. langchain/tools/playwright/click.py +1 -1
  533. langchain/tools/playwright/extract_hyperlinks.py +1 -1
  534. langchain/tools/playwright/get_elements.py +1 -1
  535. langchain/tools/playwright/navigate.py +1 -1
  536. langchain/tools/plugin.py +2 -2
  537. langchain/tools/powerbi/tool.py +1 -1
  538. langchain/tools/python/__init__.py +3 -2
  539. langchain/tools/reddit_search/tool.py +1 -1
  540. langchain/tools/render.py +2 -2
  541. langchain/tools/requests/tool.py +2 -2
  542. langchain/tools/searchapi/tool.py +1 -1
  543. langchain/tools/searx_search/tool.py +1 -1
  544. langchain/tools/slack/get_message.py +1 -1
  545. langchain/tools/spark_sql/tool.py +1 -1
  546. langchain/tools/sql_database/tool.py +1 -1
  547. langchain/tools/tavily_search/__init__.py +1 -1
  548. langchain/tools/tavily_search/tool.py +1 -1
  549. langchain/tools/zapier/__init__.py +1 -1
  550. langchain/tools/zapier/tool.py +24 -2
  551. langchain/utilities/__init__.py +4 -4
  552. langchain/utilities/arcee.py +4 -4
  553. langchain/utilities/clickup.py +4 -4
  554. langchain/utilities/dalle_image_generator.py +1 -1
  555. langchain/utilities/dataforseo_api_search.py +1 -1
  556. langchain/utilities/opaqueprompts.py +1 -1
  557. langchain/utilities/reddit_search.py +1 -1
  558. langchain/utilities/sql_database.py +1 -1
  559. langchain/utilities/tavily_search.py +1 -1
  560. langchain/utilities/vertexai.py +2 -2
  561. langchain/utils/__init__.py +1 -1
  562. langchain/utils/aiter.py +1 -1
  563. langchain/utils/html.py +3 -3
  564. langchain/utils/input.py +1 -1
  565. langchain/utils/iter.py +1 -1
  566. langchain/utils/json_schema.py +1 -3
  567. langchain/utils/strings.py +1 -1
  568. langchain/utils/utils.py +6 -6
  569. langchain/vectorstores/__init__.py +5 -5
  570. langchain/vectorstores/alibabacloud_opensearch.py +1 -1
  571. langchain/vectorstores/azure_cosmos_db.py +1 -1
  572. langchain/vectorstores/clickhouse.py +1 -1
  573. langchain/vectorstores/elastic_vector_search.py +1 -1
  574. langchain/vectorstores/elasticsearch.py +2 -2
  575. langchain/vectorstores/myscale.py +1 -1
  576. langchain/vectorstores/neo4j_vector.py +1 -1
  577. langchain/vectorstores/pgembedding.py +1 -1
  578. langchain/vectorstores/qdrant.py +1 -1
  579. langchain/vectorstores/redis/__init__.py +1 -1
  580. langchain/vectorstores/redis/base.py +1 -1
  581. langchain/vectorstores/redis/filters.py +4 -4
  582. langchain/vectorstores/redis/schema.py +6 -6
  583. langchain/vectorstores/sklearn.py +2 -2
  584. langchain/vectorstores/starrocks.py +1 -1
  585. langchain/vectorstores/utils.py +1 -1
  586. {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/METADATA +4 -14
  587. {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/RECORD +590 -591
  588. {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/WHEEL +1 -1
  589. langchain/smith/evaluation/utils.py +0 -0
  590. {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/entry_points.txt +0 -0
  591. {langchain-0.3.26.dist-info → langchain-0.4.0.dev0.dist-info}/licenses/LICENSE +0 -0
@@ -98,10 +98,8 @@ class TestResult(dict):
98
98
  to_drop = [
99
99
  col
100
100
  for col in df.columns
101
- if col.startswith("inputs.")
102
- or col.startswith("outputs.")
101
+ if col.startswith(("inputs.", "outputs.", "reference"))
103
102
  or col in {"input", "output"}
104
- or col.startswith("reference")
105
103
  ]
106
104
  return df.describe(include="all").drop(to_drop, axis=1)
107
105
 
@@ -110,10 +108,11 @@ class TestResult(dict):
110
108
  try:
111
109
  import pandas as pd
112
110
  except ImportError as e:
113
- raise ImportError(
111
+ msg = (
114
112
  "Pandas is required to convert the results to a dataframe."
115
113
  " to install pandas, run `pip install pandas`."
116
- ) from e
114
+ )
115
+ raise ImportError(msg) from e
117
116
 
118
117
  indices = []
119
118
  records = []
@@ -134,7 +133,7 @@ class TestResult(dict):
134
133
  if "reference" in result:
135
134
  if isinstance(result["reference"], dict):
136
135
  r.update(
137
- {f"reference.{k}": v for k, v in result["reference"].items()}
136
+ {f"reference.{k}": v for k, v in result["reference"].items()},
138
137
  )
139
138
  else:
140
139
  r["reference"] = result["reference"]
@@ -144,7 +143,7 @@ class TestResult(dict):
144
143
  "error": result.get("Error"),
145
144
  "execution_time": result["execution_time"],
146
145
  "run_id": result.get("run_id"),
147
- }
146
+ },
148
147
  )
149
148
  records.append(r)
150
149
  indices.append(example_id)
@@ -156,13 +155,29 @@ class EvalError(dict):
156
155
  """Your architecture raised an error."""
157
156
 
158
157
  def __init__(self, Error: BaseException, **kwargs: Any) -> None:
158
+ """Initialize the EvalError with an error and additional attributes.
159
+
160
+ Args:
161
+ Error: The error that occurred.
162
+ **kwargs: Additional attributes to include in the error.
163
+ """
159
164
  super().__init__(Error=Error, **kwargs)
160
165
 
161
166
  def __getattr__(self, name: str) -> Any:
167
+ """Get an attribute from the EvalError.
168
+
169
+ Args:
170
+ name: The name of the attribute to get.
171
+ Returns:
172
+ The value of the attribute.
173
+ Raises:
174
+ AttributeError: If the attribute does not exist.
175
+ """
162
176
  try:
163
177
  return self[name]
164
- except KeyError:
165
- raise AttributeError(f"'EvalError' object has no attribute '{name}'")
178
+ except KeyError as e:
179
+ msg = f"'EvalError' object has no attribute '{name}'"
180
+ raise AttributeError(msg) from e
166
181
 
167
182
 
168
183
  def _wrap_in_chain_factory(
@@ -176,7 +191,7 @@ def _wrap_in_chain_factory(
176
191
  chain_class = chain.__class__.__name__
177
192
  if llm_or_chain_factory.memory is not None:
178
193
  memory_class = chain.memory.__class__.__name__
179
- raise ValueError(
194
+ msg = (
180
195
  "Cannot directly evaluate a chain with stateful memory."
181
196
  " To evaluate this chain, pass in a chain constructor"
182
197
  " that initializes fresh memory each time it is called."
@@ -189,40 +204,40 @@ def _wrap_in_chain_factory(
189
204
  "(memory=new_memory, ...)\n\n"
190
205
  f'run_on_dataset("{dataset_name}", chain_constructor, ...)'
191
206
  )
207
+ raise ValueError(msg)
192
208
  return lambda: chain
193
- elif isinstance(llm_or_chain_factory, BaseLanguageModel):
209
+ if isinstance(llm_or_chain_factory, BaseLanguageModel):
194
210
  return llm_or_chain_factory
195
- elif isinstance(llm_or_chain_factory, Runnable):
211
+ if isinstance(llm_or_chain_factory, Runnable):
196
212
  # Memory may exist here, but it's not elegant to check all those cases.
197
213
  lcf = llm_or_chain_factory
198
214
  return lambda: lcf
199
- elif callable(llm_or_chain_factory):
215
+ if callable(llm_or_chain_factory):
200
216
  if is_traceable_function(llm_or_chain_factory):
201
- runnable_ = as_runnable(cast(Callable, llm_or_chain_factory))
217
+ runnable_ = as_runnable(cast("Callable", llm_or_chain_factory))
202
218
  return lambda: runnable_
203
219
  try:
204
220
  _model = llm_or_chain_factory() # type: ignore[call-arg]
205
221
  except TypeError:
206
222
  # It's an arbitrary function, wrap it in a RunnableLambda
207
- user_func = cast(Callable, llm_or_chain_factory)
223
+ user_func = cast("Callable", llm_or_chain_factory)
208
224
  sig = inspect.signature(user_func)
209
- logger.info(f"Wrapping function {sig} as RunnableLambda.")
225
+ logger.info("Wrapping function %s as RunnableLambda.", sig)
210
226
  wrapped = RunnableLambda(user_func)
211
227
  return lambda: wrapped
212
- constructor = cast(Callable, llm_or_chain_factory)
228
+ constructor = cast("Callable", llm_or_chain_factory)
213
229
  if isinstance(_model, BaseLanguageModel):
214
230
  # It's not uncommon to do an LLM constructor instead of raw LLM,
215
231
  # so we'll unpack it for the user.
216
232
  return _model
217
- elif is_traceable_function(cast(Callable, _model)):
218
- runnable_ = as_runnable(cast(Callable, _model))
233
+ if is_traceable_function(cast("Callable", _model)):
234
+ runnable_ = as_runnable(cast("Callable", _model))
219
235
  return lambda: runnable_
220
- elif not isinstance(_model, Runnable):
236
+ if not isinstance(_model, Runnable):
221
237
  # This is unlikely to happen - a constructor for a model function
222
238
  return lambda: RunnableLambda(constructor)
223
- else:
224
- # Typical correct case
225
- return constructor
239
+ # Typical correct case
240
+ return constructor
226
241
  return llm_or_chain_factory
227
242
 
228
243
 
@@ -238,23 +253,24 @@ def _get_prompt(inputs: dict[str, Any]) -> str:
238
253
  InputFormatError: If the input format is invalid.
239
254
  """
240
255
  if not inputs:
241
- raise InputFormatError("Inputs should not be empty.")
256
+ msg = "Inputs should not be empty."
257
+ raise InputFormatError(msg)
242
258
 
243
259
  prompts = []
244
260
  if "prompt" in inputs:
245
261
  if not isinstance(inputs["prompt"], str):
246
- raise InputFormatError(
247
- f"Expected string for 'prompt', got {type(inputs['prompt']).__name__}"
248
- )
262
+ msg = f"Expected string for 'prompt', got {type(inputs['prompt']).__name__}"
263
+ raise InputFormatError(msg)
249
264
  prompts = [inputs["prompt"]]
250
265
  elif "prompts" in inputs:
251
266
  if not isinstance(inputs["prompts"], list) or not all(
252
267
  isinstance(i, str) for i in inputs["prompts"]
253
268
  ):
254
- raise InputFormatError(
269
+ msg = (
255
270
  "Expected list of strings for 'prompts',"
256
271
  f" got {type(inputs['prompts']).__name__}"
257
272
  )
273
+ raise InputFormatError(msg)
258
274
  prompts = inputs["prompts"]
259
275
  elif len(inputs) == 1:
260
276
  prompt_ = next(iter(inputs.values()))
@@ -263,17 +279,15 @@ def _get_prompt(inputs: dict[str, Any]) -> str:
263
279
  elif isinstance(prompt_, list) and all(isinstance(i, str) for i in prompt_):
264
280
  prompts = prompt_
265
281
  else:
266
- raise InputFormatError(f"LLM Run expects string prompt input. Got {inputs}")
282
+ msg = f"LLM Run expects string prompt input. Got {inputs}"
283
+ raise InputFormatError(msg)
267
284
  else:
268
- raise InputFormatError(
269
- f"LLM Run expects 'prompt' or 'prompts' in inputs. Got {inputs}"
270
- )
285
+ msg = f"LLM Run expects 'prompt' or 'prompts' in inputs. Got {inputs}"
286
+ raise InputFormatError(msg)
271
287
  if len(prompts) == 1:
272
288
  return prompts[0]
273
- else:
274
- raise InputFormatError(
275
- f"LLM Run expects single prompt input. Got {len(prompts)} prompts."
276
- )
289
+ msg = f"LLM Run expects single prompt input. Got {len(prompts)} prompts."
290
+ raise InputFormatError(msg)
277
291
 
278
292
 
279
293
  class ChatModelInput(TypedDict):
@@ -298,7 +312,8 @@ def _get_messages(inputs: dict[str, Any]) -> dict:
298
312
  InputFormatError: If the input format is invalid.
299
313
  """
300
314
  if not inputs:
301
- raise InputFormatError("Inputs should not be empty.")
315
+ msg = "Inputs should not be empty."
316
+ raise InputFormatError(msg)
302
317
  input_copy = inputs.copy()
303
318
  if "messages" in inputs:
304
319
  input_copy["input"] = input_copy.pop("messages")
@@ -313,16 +328,17 @@ def _get_messages(inputs: dict[str, Any]) -> dict:
313
328
  if len(raw_messages) == 1:
314
329
  input_copy["input"] = messages_from_dict(raw_messages[0])
315
330
  else:
316
- raise InputFormatError(
331
+ msg = (
317
332
  "Batch messages not supported. Please provide a"
318
333
  " single list of messages."
319
334
  )
335
+ raise InputFormatError(msg)
320
336
  return input_copy
321
- else:
322
- raise InputFormatError(
323
- f"Chat Run expects single List[dict] or List[List[dict]] 'messages'"
324
- f" input. Got {inputs}"
325
- )
337
+ msg = (
338
+ f"Chat Run expects single List[dict] or List[List[dict]] 'messages'"
339
+ f" input. Got {inputs}"
340
+ )
341
+ raise InputFormatError(msg)
326
342
 
327
343
 
328
344
  ## Shared data validation utilities
@@ -336,20 +352,21 @@ def _validate_example_inputs_for_language_model(
336
352
  isinstance(prompt_input, list)
337
353
  and all(isinstance(msg, BaseMessage) for msg in prompt_input)
338
354
  ):
339
- raise InputFormatError(
355
+ msg = (
340
356
  "When using an input_mapper to prepare dataset example inputs"
341
357
  " for an LLM or chat model, the output must a single string or"
342
358
  " a list of chat messages."
343
359
  f"\nGot: {prompt_input} of type {type(prompt_input)}."
344
360
  )
361
+ raise InputFormatError(msg)
345
362
  else:
346
363
  try:
347
364
  _get_prompt(first_example.inputs or {})
348
365
  except InputFormatError:
349
366
  try:
350
367
  _get_messages(first_example.inputs or {})
351
- except InputFormatError:
352
- raise InputFormatError(
368
+ except InputFormatError as err2:
369
+ msg = (
353
370
  "Example inputs do not match language model input format. "
354
371
  "Expected a dictionary with messages or a single prompt."
355
372
  f" Got: {first_example.inputs}"
@@ -357,6 +374,7 @@ def _validate_example_inputs_for_language_model(
357
374
  " to convert the example.inputs to a compatible format"
358
375
  " for the llm or chat model you wish to evaluate."
359
376
  )
377
+ raise InputFormatError(msg) from err2
360
378
 
361
379
 
362
380
  def _validate_example_inputs_for_chain(
@@ -369,16 +387,18 @@ def _validate_example_inputs_for_chain(
369
387
  first_inputs = input_mapper(first_example.inputs or {})
370
388
  missing_keys = set(chain.input_keys).difference(first_inputs)
371
389
  if not isinstance(first_inputs, dict):
372
- raise InputFormatError(
390
+ msg = (
373
391
  "When using an input_mapper to prepare dataset example"
374
392
  " inputs for a chain, the mapped value must be a dictionary."
375
393
  f"\nGot: {first_inputs} of type {type(first_inputs)}."
376
394
  )
395
+ raise InputFormatError(msg)
377
396
  if missing_keys:
378
- raise InputFormatError(
397
+ msg = (
379
398
  "Missing keys after loading example using input_mapper."
380
399
  f"\nExpected: {chain.input_keys}. Got: {first_inputs.keys()}"
381
400
  )
401
+ raise InputFormatError(msg)
382
402
  else:
383
403
  first_inputs = first_example.inputs
384
404
  missing_keys = set(chain.input_keys).difference(first_inputs)
@@ -387,13 +407,14 @@ def _validate_example_inputs_for_chain(
387
407
  # Refrain from calling to validate.
388
408
  pass
389
409
  elif missing_keys:
390
- raise InputFormatError(
410
+ msg = (
391
411
  "Example inputs missing expected chain input keys."
392
412
  " Please provide an input_mapper to convert the example.inputs"
393
413
  " to a compatible format for the chain you wish to evaluate."
394
414
  f"Expected: {chain.input_keys}. "
395
415
  f"Got: {first_inputs.keys()}"
396
416
  )
417
+ raise InputFormatError(msg)
397
418
 
398
419
 
399
420
  def _validate_example_inputs(
@@ -410,7 +431,7 @@ def _validate_example_inputs(
410
431
  # Otherwise it's a runnable
411
432
  _validate_example_inputs_for_chain(example, chain, input_mapper)
412
433
  elif isinstance(chain, Runnable):
413
- logger.debug(f"Skipping input validation for {chain}")
434
+ logger.debug("Skipping input validation for %s", chain)
414
435
 
415
436
 
416
437
  ## Shared Evaluator Setup Utilities
@@ -455,16 +476,19 @@ def _determine_input_key(
455
476
  input_key = config.input_key
456
477
  if run_inputs and input_key not in run_inputs:
457
478
  logger.warning(
458
- f"Input key {input_key} not in chain's specified"
459
- f" input keys {run_inputs}. Evaluation behavior may be undefined."
479
+ "Input key %s not in chain's specified input keys %s. "
480
+ "Evaluation behavior may be undefined.",
481
+ input_key,
482
+ run_inputs,
460
483
  )
461
484
  elif run_inputs and len(run_inputs) == 1:
462
485
  input_key = run_inputs[0]
463
486
  elif run_inputs is not None and len(run_inputs) > 1:
464
487
  logger.warning(
465
- f"Chain expects multiple input keys: {run_inputs},"
466
- f" Evaluator is likely to fail. Evaluation behavior may be undefined."
467
- " Specify an input_key in the RunEvalConfig to avoid this warning."
488
+ "Chain expects multiple input keys: %s,"
489
+ " Evaluator is likely to fail. Evaluation behavior may be undefined."
490
+ " Specify an input_key in the RunEvalConfig to avoid this warning.",
491
+ run_inputs,
468
492
  )
469
493
 
470
494
  return input_key
@@ -479,16 +503,19 @@ def _determine_prediction_key(
479
503
  prediction_key = config.prediction_key
480
504
  if run_outputs and prediction_key not in run_outputs:
481
505
  logger.warning(
482
- f"Prediction key {prediction_key} not in chain's specified"
483
- f" output keys {run_outputs}. Evaluation behavior may be undefined."
506
+ "Prediction key %s not in chain's specified output keys %s. "
507
+ "Evaluation behavior may be undefined.",
508
+ prediction_key,
509
+ run_outputs,
484
510
  )
485
511
  elif run_outputs and len(run_outputs) == 1:
486
512
  prediction_key = run_outputs[0]
487
513
  elif run_outputs is not None and len(run_outputs) > 1:
488
514
  logger.warning(
489
- f"Chain expects multiple output keys: {run_outputs},"
490
- f" Evaluation behavior may be undefined. Specify a prediction_key"
491
- " in the RunEvalConfig to avoid this warning."
515
+ "Chain expects multiple output keys: %s,"
516
+ " Evaluation behavior may be undefined. Specify a prediction_key"
517
+ " in the RunEvalConfig to avoid this warning.",
518
+ run_outputs,
492
519
  )
493
520
  return prediction_key
494
521
 
@@ -500,12 +527,13 @@ def _determine_reference_key(
500
527
  if config.reference_key:
501
528
  reference_key = config.reference_key
502
529
  if example_outputs and reference_key not in example_outputs:
503
- raise ValueError(
530
+ msg = (
504
531
  f"Reference key {reference_key} not in Dataset"
505
532
  f" example outputs: {example_outputs}"
506
533
  )
534
+ raise ValueError(msg)
507
535
  elif example_outputs and len(example_outputs) == 1:
508
- reference_key = list(example_outputs)[0]
536
+ reference_key = next(iter(example_outputs))
509
537
  else:
510
538
  reference_key = None
511
539
  return reference_key
@@ -544,15 +572,17 @@ def _construct_run_evaluator(
544
572
  # Assume we can decorate
545
573
  return run_evaluator_dec(eval_config)
546
574
  else:
547
- raise ValueError(f"Unknown evaluator type: {type(eval_config)}")
575
+ msg = f"Unknown evaluator type: {type(eval_config)}"
576
+ raise ValueError(msg) # noqa: TRY004
548
577
 
549
578
  if isinstance(evaluator_, StringEvaluator):
550
579
  if evaluator_.requires_reference and reference_key is None:
551
- raise ValueError(
580
+ msg = (
552
581
  f"Must specify reference_key in smith_eval.RunEvalConfig to use"
553
582
  f" evaluator of type {eval_type_tag} with"
554
583
  f" dataset with multiple output keys: {example_outputs}."
555
584
  )
585
+ raise ValueError(msg)
556
586
  run_evaluator = smith_eval.StringRunEvaluatorChain.from_run_and_data_type(
557
587
  evaluator_,
558
588
  run_type,
@@ -563,18 +593,18 @@ def _construct_run_evaluator(
563
593
  tags=[eval_type_tag],
564
594
  )
565
595
  elif isinstance(evaluator_, PairwiseStringEvaluator):
566
- raise NotImplementedError(
596
+ msg = (
567
597
  f"Run evaluator for {eval_type_tag} is not implemented."
568
598
  " PairwiseStringEvaluators compare the outputs of two different models"
569
599
  " rather than the output of a single model."
570
600
  " Did you mean to use a StringEvaluator instead?"
571
601
  "\nSee: https://python.langchain.com/docs/guides/evaluation/string/"
572
602
  )
603
+ raise NotImplementedError(msg)
573
604
 
574
605
  else:
575
- raise NotImplementedError(
576
- f"Run evaluator for {eval_type_tag} is not implemented"
577
- )
606
+ msg = f"Run evaluator for {eval_type_tag} is not implemented"
607
+ raise NotImplementedError(msg)
578
608
  return run_evaluator
579
609
 
580
610
 
@@ -611,10 +641,13 @@ def _load_run_evaluators(
611
641
  input_key, prediction_key, reference_key = None, None, None
612
642
  if config.evaluators or (
613
643
  config.custom_evaluators
614
- and any([isinstance(e, StringEvaluator) for e in config.custom_evaluators])
644
+ and any(isinstance(e, StringEvaluator) for e in config.custom_evaluators)
615
645
  ):
616
646
  input_key, prediction_key, reference_key = _get_keys(
617
- config, run_inputs, run_outputs, example_outputs
647
+ config,
648
+ run_inputs,
649
+ run_outputs,
650
+ example_outputs,
618
651
  )
619
652
  for eval_config in config.evaluators:
620
653
  run_evaluator = _construct_run_evaluator(
@@ -641,15 +674,16 @@ def _load_run_evaluators(
641
674
  input_key=input_key,
642
675
  prediction_key=prediction_key,
643
676
  reference_key=reference_key,
644
- )
677
+ ),
645
678
  )
646
679
  elif callable(custom_evaluator):
647
680
  run_evaluators.append(run_evaluator_dec(custom_evaluator))
648
681
  else:
649
- raise ValueError(
682
+ msg = (
650
683
  f"Unsupported custom evaluator: {custom_evaluator}."
651
684
  f" Expected RunEvaluator or StringEvaluator."
652
685
  )
686
+ raise ValueError(msg) # noqa: TRY004
653
687
 
654
688
  return run_evaluators
655
689
 
@@ -683,41 +717,45 @@ async def _arun_llm(
683
717
  """
684
718
  if input_mapper is not None:
685
719
  prompt_or_messages = input_mapper(inputs)
686
- if (
687
- isinstance(prompt_or_messages, str)
688
- or isinstance(prompt_or_messages, list)
720
+ if isinstance(prompt_or_messages, str) or (
721
+ isinstance(prompt_or_messages, list)
689
722
  and all(isinstance(msg, BaseMessage) for msg in prompt_or_messages)
690
723
  ):
691
724
  return await llm.ainvoke(
692
725
  prompt_or_messages,
693
726
  config=RunnableConfig(
694
- callbacks=callbacks, tags=tags or [], metadata=metadata or {}
727
+ callbacks=callbacks,
728
+ tags=tags or [],
729
+ metadata=metadata or {},
695
730
  ),
696
731
  )
697
- else:
698
- raise InputFormatError(
699
- "Input mapper returned invalid format"
700
- f" {prompt_or_messages}"
701
- "\nExpected a single string or list of chat messages."
702
- )
732
+ msg = (
733
+ "Input mapper returned invalid format"
734
+ f" {prompt_or_messages}"
735
+ "\nExpected a single string or list of chat messages."
736
+ )
737
+ raise InputFormatError(msg)
703
738
 
704
- else:
705
- try:
706
- prompt = _get_prompt(inputs)
707
- llm_output: Union[str, BaseMessage] = await llm.ainvoke(
708
- prompt,
709
- config=RunnableConfig(
710
- callbacks=callbacks, tags=tags or [], metadata=metadata or {}
711
- ),
712
- )
713
- except InputFormatError:
714
- llm_inputs = _get_messages(inputs)
715
- llm_output = await llm.ainvoke(
716
- **llm_inputs,
717
- config=RunnableConfig(
718
- callbacks=callbacks, tags=tags or [], metadata=metadata or {}
719
- ),
720
- )
739
+ try:
740
+ prompt = _get_prompt(inputs)
741
+ llm_output: Union[str, BaseMessage] = await llm.ainvoke(
742
+ prompt,
743
+ config=RunnableConfig(
744
+ callbacks=callbacks,
745
+ tags=tags or [],
746
+ metadata=metadata or {},
747
+ ),
748
+ )
749
+ except InputFormatError:
750
+ llm_inputs = _get_messages(inputs)
751
+ llm_output = await llm.ainvoke(
752
+ **llm_inputs,
753
+ config=RunnableConfig(
754
+ callbacks=callbacks,
755
+ tags=tags or [],
756
+ metadata=metadata or {},
757
+ ),
758
+ )
721
759
  return llm_output
722
760
 
723
761
 
@@ -742,12 +780,16 @@ async def _arun_chain(
742
780
  output = await chain.ainvoke(
743
781
  val,
744
782
  config=RunnableConfig(
745
- callbacks=callbacks, tags=tags or [], metadata=metadata or {}
783
+ callbacks=callbacks,
784
+ tags=tags or [],
785
+ metadata=metadata or {},
746
786
  ),
747
787
  )
748
788
  else:
749
789
  runnable_config = RunnableConfig(
750
- tags=tags or [], callbacks=callbacks, metadata=metadata or {}
790
+ tags=tags or [],
791
+ callbacks=callbacks,
792
+ metadata=metadata or {},
751
793
  )
752
794
  output = await chain.ainvoke(inputs_, config=runnable_config)
753
795
  return output
@@ -799,9 +841,11 @@ async def _arun_llm_or_chain(
799
841
  result = output
800
842
  except Exception as e:
801
843
  logger.warning(
802
- f"{chain_or_llm} failed for example {example.id} "
803
- f"with inputs {example.inputs}"
804
- f"\n{repr(e)}"
844
+ "%s failed for example %s with inputs %s\n%s",
845
+ chain_or_llm,
846
+ example.id,
847
+ example.inputs,
848
+ e,
805
849
  )
806
850
  result = EvalError(Error=e)
807
851
  return result
@@ -837,30 +881,34 @@ def _run_llm(
837
881
  # Most of this is legacy code; we could probably remove a lot of it.
838
882
  if input_mapper is not None:
839
883
  prompt_or_messages = input_mapper(inputs)
840
- if (
841
- isinstance(prompt_or_messages, str)
842
- or isinstance(prompt_or_messages, list)
884
+ if isinstance(prompt_or_messages, str) or (
885
+ isinstance(prompt_or_messages, list)
843
886
  and all(isinstance(msg, BaseMessage) for msg in prompt_or_messages)
844
887
  ):
845
888
  llm_output: Union[str, BaseMessage] = llm.invoke(
846
889
  prompt_or_messages,
847
890
  config=RunnableConfig(
848
- callbacks=callbacks, tags=tags or [], metadata=metadata or {}
891
+ callbacks=callbacks,
892
+ tags=tags or [],
893
+ metadata=metadata or {},
849
894
  ),
850
895
  )
851
896
  else:
852
- raise InputFormatError(
897
+ msg = (
853
898
  "Input mapper returned invalid format: "
854
899
  f" {prompt_or_messages}"
855
900
  "\nExpected a single string or list of chat messages."
856
901
  )
902
+ raise InputFormatError(msg)
857
903
  else:
858
904
  try:
859
905
  llm_prompts = _get_prompt(inputs)
860
906
  llm_output = llm.invoke(
861
907
  llm_prompts,
862
908
  config=RunnableConfig(
863
- callbacks=callbacks, tags=tags or [], metadata=metadata or {}
909
+ callbacks=callbacks,
910
+ tags=tags or [],
911
+ metadata=metadata or {},
864
912
  ),
865
913
  )
866
914
  except InputFormatError:
@@ -893,12 +941,16 @@ def _run_chain(
893
941
  output = chain.invoke(
894
942
  val,
895
943
  config=RunnableConfig(
896
- callbacks=callbacks, tags=tags or [], metadata=metadata or {}
944
+ callbacks=callbacks,
945
+ tags=tags or [],
946
+ metadata=metadata or {},
897
947
  ),
898
948
  )
899
949
  else:
900
950
  runnable_config = RunnableConfig(
901
- tags=tags or [], callbacks=callbacks, metadata=metadata or {}
951
+ tags=tags or [],
952
+ callbacks=callbacks,
953
+ metadata=metadata or {},
902
954
  )
903
955
  output = chain.invoke(inputs_, config=runnable_config)
904
956
  return output
@@ -952,9 +1004,12 @@ def _run_llm_or_chain(
952
1004
  except Exception as e:
953
1005
  error_type = type(e).__name__
954
1006
  logger.warning(
955
- f"{chain_or_llm} failed for example {example.id} "
956
- f"with inputs {example.inputs}"
957
- f"\nError Type: {error_type}, Message: {e}"
1007
+ "%s failed for example %s with inputs %s\nError Type: %s, Message: %s",
1008
+ chain_or_llm,
1009
+ example.id,
1010
+ example.inputs,
1011
+ error_type,
1012
+ e,
958
1013
  )
959
1014
  result = EvalError(Error=e)
960
1015
  return result
@@ -974,7 +1029,8 @@ def _prepare_eval_run(
974
1029
 
975
1030
  examples = list(client.list_examples(dataset_id=dataset.id, as_of=dataset_version))
976
1031
  if not examples:
977
- raise ValueError(f"Dataset {dataset_name} has no example rows.")
1032
+ msg = f"Dataset {dataset_name} has no example rows."
1033
+ raise ValueError(msg)
978
1034
  modified_at = [ex.modified_at for ex in examples if ex.modified_at]
979
1035
  # Should always be defined in practice when fetched,
980
1036
  # but the typing permits None
@@ -999,7 +1055,7 @@ def _prepare_eval_run(
999
1055
  )
1000
1056
  except (HTTPError, ValueError, LangSmithError) as e:
1001
1057
  if "already exists " not in str(e):
1002
- raise e
1058
+ raise
1003
1059
  uid = uuid.uuid4()
1004
1060
  example_msg = f"""
1005
1061
  run_on_dataset(
@@ -1007,10 +1063,11 @@ run_on_dataset(
1007
1063
  project_name="{project_name} - {uid}", # Update since {project_name} already exists
1008
1064
  )
1009
1065
  """
1010
- raise ValueError(
1066
+ msg = (
1011
1067
  f"Test project {project_name} already exists. Please use a different name:"
1012
1068
  f"\n\n{example_msg}"
1013
1069
  )
1070
+ raise ValueError(msg) from e
1014
1071
  comparison_url = dataset.url + f"/compare?selectedSessions={project.id}"
1015
1072
  print( # noqa: T201
1016
1073
  f"View the evaluation results for project '{project_name}'"
@@ -1047,7 +1104,7 @@ class _DatasetRunContainer:
1047
1104
  ) -> dict:
1048
1105
  results: dict = {}
1049
1106
  for example, output in zip(self.examples, batch_results):
1050
- row_result = cast(_RowResult, all_eval_results.get(str(example.id), {}))
1107
+ row_result = cast("_RowResult", all_eval_results.get(str(example.id), {}))
1051
1108
  results[str(example.id)] = {
1052
1109
  "input": example.inputs,
1053
1110
  "feedback": row_result.get("feedback", []),
@@ -1074,16 +1131,16 @@ class _DatasetRunContainer:
1074
1131
  result = evaluator(runs_list, self.examples)
1075
1132
  if isinstance(result, EvaluationResult):
1076
1133
  result = result.dict()
1077
- aggregate_feedback.append(cast(dict, result))
1134
+ aggregate_feedback.append(cast("dict", result))
1078
1135
  executor.submit(
1079
1136
  self.client.create_feedback,
1080
1137
  **result,
1081
1138
  run_id=None,
1082
1139
  project_id=self.project.id,
1083
1140
  )
1084
- except Exception as e:
1085
- logger.error(
1086
- f"Error running batch evaluator {repr(evaluator)}: {e}"
1141
+ except Exception:
1142
+ logger.exception(
1143
+ "Error running batch evaluator %s", repr(evaluator)
1087
1144
  )
1088
1145
  return aggregate_feedback
1089
1146
 
@@ -1091,12 +1148,12 @@ class _DatasetRunContainer:
1091
1148
  all_eval_results: dict = {}
1092
1149
  all_runs: dict = {}
1093
1150
  for c in self.configs:
1094
- for callback in cast(list, c["callbacks"]):
1151
+ for callback in cast("list", c["callbacks"]):
1095
1152
  if isinstance(callback, EvaluatorCallbackHandler):
1096
1153
  eval_results = callback.logged_eval_results
1097
1154
  for (_, example_id), v in eval_results.items():
1098
1155
  all_eval_results.setdefault(str(example_id), {}).update(
1099
- {"feedback": v}
1156
+ {"feedback": v},
1100
1157
  )
1101
1158
  elif isinstance(callback, LangChainTracer):
1102
1159
  run = callback.latest_run
@@ -1111,10 +1168,10 @@ class _DatasetRunContainer:
1111
1168
  "execution_time": execution_time,
1112
1169
  "run_id": run_id,
1113
1170
  "run": run,
1114
- }
1171
+ },
1115
1172
  )
1116
1173
  all_runs[str(callback.example_id)] = run
1117
- return cast(dict[str, _RowResult], all_eval_results), all_runs
1174
+ return cast("dict[str, _RowResult]", all_eval_results), all_runs
1118
1175
 
1119
1176
  def _collect_test_results(
1120
1177
  self,
@@ -1134,21 +1191,26 @@ class _DatasetRunContainer:
1134
1191
  aggregate_metrics=aggregate_feedback,
1135
1192
  )
1136
1193
 
1137
- def finish(self, batch_results: list, verbose: bool = False) -> TestResult:
1194
+ def finish(
1195
+ self,
1196
+ batch_results: list,
1197
+ verbose: bool = False, # noqa: FBT001,FBT002
1198
+ ) -> TestResult:
1138
1199
  results = self._collect_test_results(batch_results)
1139
1200
  if verbose:
1140
1201
  try:
1141
1202
  agg_feedback = results.get_aggregate_feedback()
1142
1203
  _display_aggregate_results(agg_feedback)
1143
1204
  except Exception as e:
1144
- logger.debug(f"Failed to print aggregate feedback: {repr(e)}")
1205
+ logger.debug("Failed to print aggregate feedback: %s", e, exc_info=True)
1145
1206
  try:
1146
1207
  # Closing the project permits name changing and metric optimizations
1147
1208
  self.client.update_project(
1148
- self.project.id, end_time=datetime.now(timezone.utc)
1209
+ self.project.id,
1210
+ end_time=datetime.now(timezone.utc),
1149
1211
  )
1150
1212
  except Exception as e:
1151
- logger.debug(f"Failed to close project: {repr(e)}")
1213
+ logger.debug("Failed to close project: %s", e, exc_info=True)
1152
1214
  return results
1153
1215
 
1154
1216
  @classmethod
@@ -1188,7 +1250,10 @@ class _DatasetRunContainer:
1188
1250
  run_metadata["revision_id"] = revision_id
1189
1251
  wrapped_model = _wrap_in_chain_factory(llm_or_chain_factory)
1190
1252
  run_evaluators = _setup_evaluation(
1191
- wrapped_model, examples, evaluation, dataset.data_type or DataType.kv
1253
+ wrapped_model,
1254
+ examples,
1255
+ evaluation,
1256
+ dataset.data_type or DataType.kv,
1192
1257
  )
1193
1258
  _validate_example_inputs(examples[0], wrapped_model, input_mapper)
1194
1259
  progress_bar = progress.ProgressBarCallback(len(examples))
@@ -1242,7 +1307,8 @@ def _display_aggregate_results(aggregate_results: pd.DataFrame) -> None:
1242
1307
  display(aggregate_results)
1243
1308
  else:
1244
1309
  formatted_string = aggregate_results.to_string(
1245
- float_format=lambda x: f"{x:.2f}", justify="right"
1310
+ float_format=lambda x: f"{x:.2f}",
1311
+ justify="right",
1246
1312
  )
1247
1313
  print("\n Experiment Results:") # noqa: T201
1248
1314
  print(formatted_string) # noqa: T201
@@ -1279,6 +1345,114 @@ async def arun_on_dataset(
1279
1345
  revision_id: Optional[str] = None,
1280
1346
  **kwargs: Any,
1281
1347
  ) -> dict[str, Any]:
1348
+ """Run on dataset.
1349
+
1350
+ Run the Chain or language model on a dataset and store traces
1351
+ to the specified project name.
1352
+
1353
+ For the (usually faster) async version of this function,
1354
+ see :func:`arun_on_dataset`.
1355
+
1356
+ Args:
1357
+ dataset_name: Name of the dataset to run the chain on.
1358
+ llm_or_chain_factory: Language model or Chain constructor to run
1359
+ over the dataset. The Chain constructor is used to permit
1360
+ independent calls on each example without carrying over state.
1361
+ evaluation: Configuration for evaluators to run on the
1362
+ results of the chain
1363
+ concurrency_level: The number of async tasks to run concurrently.
1364
+ project_name: Name of the project to store the traces in.
1365
+ Defaults to {dataset_name}-{chain class name}-{datetime}.
1366
+ project_metadata: Optional metadata to add to the project.
1367
+ Useful for storing information the test variant.
1368
+ (prompt version, model version, etc.)
1369
+ client: LangSmith client to use to access the dataset and to
1370
+ log feedback and run traces.
1371
+ verbose: Whether to print progress.
1372
+ tags: Tags to add to each run in the project.
1373
+ revision_id: Optional revision identifier to assign this test run to
1374
+ track the performance of different versions of your system.
1375
+ Returns:
1376
+ A dictionary containing the run's project name and the resulting model outputs.
1377
+
1378
+ Examples:
1379
+
1380
+ .. code-block:: python
1381
+
1382
+ from langsmith import Client
1383
+ from langchain_openai import ChatOpenAI
1384
+ from langchain.chains import LLMChain
1385
+ from langchain.smith import smith_eval.RunEvalConfig, run_on_dataset
1386
+
1387
+ # Chains may have memory. Passing in a constructor function lets the
1388
+ # evaluation framework avoid cross-contamination between runs.
1389
+ def construct_chain():
1390
+ llm = ChatOpenAI(temperature=0)
1391
+ chain = LLMChain.from_string(
1392
+ llm,
1393
+ "What's the answer to {your_input_key}"
1394
+ )
1395
+ return chain
1396
+
1397
+ # Load off-the-shelf evaluators via config or the EvaluatorType (string or enum)
1398
+ evaluation_config = smith_eval.RunEvalConfig(
1399
+ evaluators=[
1400
+ "qa", # "Correctness" against a reference answer
1401
+ "embedding_distance",
1402
+ smith_eval.RunEvalConfig.Criteria("helpfulness"),
1403
+ smith_eval.RunEvalConfig.Criteria({
1404
+ "fifth-grader-score": "Do you have to be smarter than a fifth grader to answer this question?"
1405
+ }),
1406
+ ]
1407
+ )
1408
+
1409
+ client = Client()
1410
+ await arun_on_dataset(
1411
+ client,
1412
+ dataset_name="<my_dataset_name>",
1413
+ llm_or_chain_factory=construct_chain,
1414
+ evaluation=evaluation_config,
1415
+ )
1416
+
1417
+ You can also create custom evaluators by subclassing the
1418
+ :class:`StringEvaluator <langchain.evaluation.schema.StringEvaluator>`
1419
+ or LangSmith's `RunEvaluator` classes.
1420
+
1421
+ .. code-block:: python
1422
+
1423
+ from typing import Optional
1424
+ from langchain.evaluation import StringEvaluator
1425
+
1426
+ class MyStringEvaluator(StringEvaluator):
1427
+
1428
+ @property
1429
+ def requires_input(self) -> bool:
1430
+ return False
1431
+
1432
+ @property
1433
+ def requires_reference(self) -> bool:
1434
+ return True
1435
+
1436
+ @property
1437
+ def evaluation_name(self) -> str:
1438
+ return "exact_match"
1439
+
1440
+ def _evaluate_strings(self, prediction, reference=None, input=None, **kwargs) -> dict:
1441
+ return {"score": prediction == reference}
1442
+
1443
+
1444
+ evaluation_config = smith_eval.RunEvalConfig(
1445
+ custom_evaluators = [MyStringEvaluator()],
1446
+ )
1447
+
1448
+ await arun_on_dataset(
1449
+ client,
1450
+ dataset_name="<my_dataset_name>",
1451
+ llm_or_chain_factory=construct_chain,
1452
+ evaluation=evaluation_config,
1453
+ )
1454
+
1455
+ """ # noqa: E501
1282
1456
  input_mapper = kwargs.pop("input_mapper", None)
1283
1457
  if input_mapper:
1284
1458
  warn_deprecated("0.0.305", message=_INPUT_MAPPER_DEP_WARNING, pending=True)
@@ -1344,6 +1518,114 @@ def run_on_dataset(
1344
1518
  revision_id: Optional[str] = None,
1345
1519
  **kwargs: Any,
1346
1520
  ) -> dict[str, Any]:
1521
+ """Run on dataset.
1522
+
1523
+ Run the Chain or language model on a dataset and store traces
1524
+ to the specified project name.
1525
+
1526
+ For the (usually faster) async version of this function,
1527
+ see :func:`arun_on_dataset`.
1528
+
1529
+ Args:
1530
+ dataset_name: Name of the dataset to run the chain on.
1531
+ llm_or_chain_factory: Language model or Chain constructor to run
1532
+ over the dataset. The Chain constructor is used to permit
1533
+ independent calls on each example without carrying over state.
1534
+ evaluation: Configuration for evaluators to run on the
1535
+ results of the chain
1536
+ concurrency_level: The number of async tasks to run concurrently.
1537
+ project_name: Name of the project to store the traces in.
1538
+ Defaults to {dataset_name}-{chain class name}-{datetime}.
1539
+ project_metadata: Optional metadata to add to the project.
1540
+ Useful for storing information the test variant.
1541
+ (prompt version, model version, etc.)
1542
+ client: LangSmith client to use to access the dataset and to
1543
+ log feedback and run traces.
1544
+ verbose: Whether to print progress.
1545
+ tags: Tags to add to each run in the project.
1546
+ revision_id: Optional revision identifier to assign this test run to
1547
+ track the performance of different versions of your system.
1548
+ Returns:
1549
+ A dictionary containing the run's project name and the resulting model outputs.
1550
+
1551
+ Examples:
1552
+
1553
+ .. code-block:: python
1554
+
1555
+ from langsmith import Client
1556
+ from langchain_openai import ChatOpenAI
1557
+ from langchain.chains import LLMChain
1558
+ from langchain.smith import smith_eval.RunEvalConfig, run_on_dataset
1559
+
1560
+ # Chains may have memory. Passing in a constructor function lets the
1561
+ # evaluation framework avoid cross-contamination between runs.
1562
+ def construct_chain():
1563
+ llm = ChatOpenAI(temperature=0)
1564
+ chain = LLMChain.from_string(
1565
+ llm,
1566
+ "What's the answer to {your_input_key}"
1567
+ )
1568
+ return chain
1569
+
1570
+ # Load off-the-shelf evaluators via config or the EvaluatorType (string or enum)
1571
+ evaluation_config = smith_eval.RunEvalConfig(
1572
+ evaluators=[
1573
+ "qa", # "Correctness" against a reference answer
1574
+ "embedding_distance",
1575
+ smith_eval.RunEvalConfig.Criteria("helpfulness"),
1576
+ smith_eval.RunEvalConfig.Criteria({
1577
+ "fifth-grader-score": "Do you have to be smarter than a fifth grader to answer this question?"
1578
+ }),
1579
+ ]
1580
+ )
1581
+
1582
+ client = Client()
1583
+ run_on_dataset(
1584
+ client,
1585
+ dataset_name="<my_dataset_name>",
1586
+ llm_or_chain_factory=construct_chain,
1587
+ evaluation=evaluation_config,
1588
+ )
1589
+
1590
+ You can also create custom evaluators by subclassing the
1591
+ :class:`StringEvaluator <langchain.evaluation.schema.StringEvaluator>`
1592
+ or LangSmith's `RunEvaluator` classes.
1593
+
1594
+ .. code-block:: python
1595
+
1596
+ from typing import Optional
1597
+ from langchain.evaluation import StringEvaluator
1598
+
1599
+ class MyStringEvaluator(StringEvaluator):
1600
+
1601
+ @property
1602
+ def requires_input(self) -> bool:
1603
+ return False
1604
+
1605
+ @property
1606
+ def requires_reference(self) -> bool:
1607
+ return True
1608
+
1609
+ @property
1610
+ def evaluation_name(self) -> str:
1611
+ return "exact_match"
1612
+
1613
+ def _evaluate_strings(self, prediction, reference=None, input=None, **kwargs) -> dict:
1614
+ return {"score": prediction == reference}
1615
+
1616
+
1617
+ evaluation_config = smith_eval.RunEvalConfig(
1618
+ custom_evaluators = [MyStringEvaluator()],
1619
+ )
1620
+
1621
+ run_on_dataset(
1622
+ client,
1623
+ dataset_name="<my_dataset_name>",
1624
+ llm_or_chain_factory=construct_chain,
1625
+ evaluation=evaluation_config,
1626
+ )
1627
+
1628
+ """ # noqa: E501
1347
1629
  input_mapper = kwargs.pop("input_mapper", None)
1348
1630
  if input_mapper:
1349
1631
  warn_deprecated("0.0.305", message=_INPUT_MAPPER_DEP_WARNING, pending=True)
@@ -1401,120 +1683,7 @@ def run_on_dataset(
1401
1683
  ),
1402
1684
  container.examples,
1403
1685
  container.configs,
1404
- )
1686
+ ),
1405
1687
  )
1406
1688
 
1407
1689
  return container.finish(batch_results, verbose=verbose)
1408
-
1409
-
1410
- _RUN_ON_DATASET_DOCSTRING = """
1411
- Run the Chain or language model on a dataset and store traces
1412
- to the specified project name.
1413
-
1414
- Args:
1415
- dataset_name: Name of the dataset to run the chain on.
1416
- llm_or_chain_factory: Language model or Chain constructor to run
1417
- over the dataset. The Chain constructor is used to permit
1418
- independent calls on each example without carrying over state.
1419
- evaluation: Configuration for evaluators to run on the
1420
- results of the chain
1421
- concurrency_level: The number of async tasks to run concurrently.
1422
- project_name: Name of the project to store the traces in.
1423
- Defaults to {dataset_name}-{chain class name}-{datetime}.
1424
- project_metadata: Optional metadata to add to the project.
1425
- Useful for storing information the test variant.
1426
- (prompt version, model version, etc.)
1427
- client: LangSmith client to use to access the dataset and to
1428
- log feedback and run traces.
1429
- verbose: Whether to print progress.
1430
- tags: Tags to add to each run in the project.
1431
- revision_id: Optional revision identifier to assign this test run to
1432
- track the performance of different versions of your system.
1433
- Returns:
1434
- A dictionary containing the run's project name and the resulting model outputs.
1435
-
1436
-
1437
- For the (usually faster) async version of this function, see :func:`arun_on_dataset`.
1438
-
1439
- Examples
1440
- --------
1441
-
1442
- .. code-block:: python
1443
-
1444
- from langsmith import Client
1445
- from langchain_openai import ChatOpenAI
1446
- from langchain.chains import LLMChain
1447
- from langchain.smith import smith_eval.RunEvalConfig, run_on_dataset
1448
-
1449
- # Chains may have memory. Passing in a constructor function lets the
1450
- # evaluation framework avoid cross-contamination between runs.
1451
- def construct_chain():
1452
- llm = ChatOpenAI(temperature=0)
1453
- chain = LLMChain.from_string(
1454
- llm,
1455
- "What's the answer to {your_input_key}"
1456
- )
1457
- return chain
1458
-
1459
- # Load off-the-shelf evaluators via config or the EvaluatorType (string or enum)
1460
- evaluation_config = smith_eval.RunEvalConfig(
1461
- evaluators=[
1462
- "qa", # "Correctness" against a reference answer
1463
- "embedding_distance",
1464
- smith_eval.RunEvalConfig.Criteria("helpfulness"),
1465
- smith_eval.RunEvalConfig.Criteria({
1466
- "fifth-grader-score": "Do you have to be smarter than a fifth grader to answer this question?"
1467
- }),
1468
- ]
1469
- )
1470
-
1471
- client = Client()
1472
- run_on_dataset(
1473
- client,
1474
- dataset_name="<my_dataset_name>",
1475
- llm_or_chain_factory=construct_chain,
1476
- evaluation=evaluation_config,
1477
- )
1478
-
1479
- You can also create custom evaluators by subclassing the
1480
- :class:`StringEvaluator <langchain.evaluation.schema.StringEvaluator>`
1481
- or LangSmith's `RunEvaluator` classes.
1482
-
1483
- .. code-block:: python
1484
-
1485
- from typing import Optional
1486
- from langchain.evaluation import StringEvaluator
1487
-
1488
- class MyStringEvaluator(StringEvaluator):
1489
-
1490
- @property
1491
- def requires_input(self) -> bool:
1492
- return False
1493
-
1494
- @property
1495
- def requires_reference(self) -> bool:
1496
- return True
1497
-
1498
- @property
1499
- def evaluation_name(self) -> str:
1500
- return "exact_match"
1501
-
1502
- def _evaluate_strings(self, prediction, reference=None, input=None, **kwargs) -> dict:
1503
- return {"score": prediction == reference}
1504
-
1505
-
1506
- evaluation_config = smith_eval.RunEvalConfig(
1507
- custom_evaluators = [MyStringEvaluator()],
1508
- )
1509
-
1510
- run_on_dataset(
1511
- client,
1512
- dataset_name="<my_dataset_name>",
1513
- llm_or_chain_factory=construct_chain,
1514
- evaluation=evaluation_config,
1515
- )
1516
- """ # noqa: E501
1517
- run_on_dataset.__doc__ = _RUN_ON_DATASET_DOCSTRING
1518
- arun_on_dataset.__doc__ = _RUN_ON_DATASET_DOCSTRING.replace(
1519
- "run_on_dataset(", "await arun_on_dataset("
1520
- )