langchain 0.1.26 → 0.1.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/agent.cjs +137 -10
- package/dist/agents/agent.d.ts +37 -5
- package/dist/agents/agent.js +133 -9
- package/dist/agents/executor.cjs +4 -1
- package/dist/agents/executor.js +5 -2
- package/dist/agents/openai_functions/index.cjs +6 -2
- package/dist/agents/openai_functions/index.d.ts +7 -5
- package/dist/agents/openai_functions/index.js +7 -3
- package/dist/agents/openai_tools/index.cjs +7 -2
- package/dist/agents/openai_tools/index.d.ts +7 -4
- package/dist/agents/openai_tools/index.js +7 -2
- package/dist/agents/react/index.cjs +7 -2
- package/dist/agents/react/index.d.ts +7 -5
- package/dist/agents/react/index.js +7 -2
- package/dist/agents/structured_chat/index.cjs +6 -2
- package/dist/agents/structured_chat/index.d.ts +7 -5
- package/dist/agents/structured_chat/index.js +7 -3
- package/dist/agents/types.d.ts +21 -3
- package/dist/agents/xml/index.cjs +6 -2
- package/dist/agents/xml/index.d.ts +7 -5
- package/dist/agents/xml/index.js +7 -3
- package/dist/document_loaders/fs/unstructured.cjs +40 -0
- package/dist/document_loaders/fs/unstructured.d.ts +8 -0
- package/dist/document_loaders/fs/unstructured.js +40 -0
- package/dist/document_loaders/web/gitbook.cjs +11 -3
- package/dist/document_loaders/web/gitbook.d.ts +1 -0
- package/dist/document_loaders/web/gitbook.js +11 -3
- package/dist/output_parsers/fix.cjs +16 -4
- package/dist/output_parsers/fix.d.ts +10 -3
- package/dist/output_parsers/fix.js +16 -4
- package/dist/retrievers/parent_document.cjs +3 -2
- package/dist/retrievers/parent_document.d.ts +3 -1
- package/dist/retrievers/parent_document.js +3 -2
- package/package.json +1 -1
|
@@ -5,6 +5,7 @@ const runnables_1 = require("@langchain/core/runnables");
|
|
|
5
5
|
const render_js_1 = require("../../tools/render.cjs");
|
|
6
6
|
const log_js_1 = require("../format_scratchpad/log.cjs");
|
|
7
7
|
const output_parser_js_1 = require("./output_parser.cjs");
|
|
8
|
+
const agent_js_1 = require("../agent.cjs");
|
|
8
9
|
/**
|
|
9
10
|
* Create an agent that uses ReAct prompting.
|
|
10
11
|
* @param params Params required to create the agent. Includes an LLM, tools, and prompt.
|
|
@@ -48,7 +49,7 @@ const output_parser_js_1 = require("./output_parser.cjs");
|
|
|
48
49
|
* });
|
|
49
50
|
* ```
|
|
50
51
|
*/
|
|
51
|
-
async function createReactAgent({ llm, tools, prompt, }) {
|
|
52
|
+
async function createReactAgent({ llm, tools, prompt, streamRunnable, }) {
|
|
52
53
|
const missingVariables = ["tools", "tool_names", "agent_scratchpad"].filter((v) => !prompt.inputVariables.includes(v));
|
|
53
54
|
if (missingVariables.length > 0) {
|
|
54
55
|
throw new Error(`Provided prompt is missing required input variables: ${JSON.stringify(missingVariables)}`);
|
|
@@ -72,6 +73,10 @@ async function createReactAgent({ llm, tools, prompt, }) {
|
|
|
72
73
|
toolNames,
|
|
73
74
|
}),
|
|
74
75
|
]);
|
|
75
|
-
return
|
|
76
|
+
return new agent_js_1.RunnableSingleActionAgent({
|
|
77
|
+
runnable: agent,
|
|
78
|
+
defaultRunName: "ReactAgent",
|
|
79
|
+
streamRunnable,
|
|
80
|
+
});
|
|
76
81
|
}
|
|
77
82
|
exports.createReactAgent = createReactAgent;
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
import type { ToolInterface } from "@langchain/core/tools";
|
|
2
2
|
import { BasePromptTemplate } from "@langchain/core/prompts";
|
|
3
3
|
import type { BaseLanguageModelInterface } from "@langchain/core/language_models/base";
|
|
4
|
-
import {
|
|
5
|
-
import { AgentStep } from "@langchain/core/agents";
|
|
4
|
+
import { RunnableSingleActionAgent } from "../agent.js";
|
|
6
5
|
/**
|
|
7
6
|
* Params used by the createXmlAgent function.
|
|
8
7
|
*/
|
|
@@ -16,6 +15,11 @@ export type CreateReactAgentParams = {
|
|
|
16
15
|
* `tools`, `tool_names`, and `agent_scratchpad`.
|
|
17
16
|
*/
|
|
18
17
|
prompt: BasePromptTemplate;
|
|
18
|
+
/**
|
|
19
|
+
* Whether to invoke the underlying model in streaming mode,
|
|
20
|
+
* allowing streaming of intermediate steps. Defaults to true.
|
|
21
|
+
*/
|
|
22
|
+
streamRunnable?: boolean;
|
|
19
23
|
};
|
|
20
24
|
/**
|
|
21
25
|
* Create an agent that uses ReAct prompting.
|
|
@@ -60,6 +64,4 @@ export type CreateReactAgentParams = {
|
|
|
60
64
|
* });
|
|
61
65
|
* ```
|
|
62
66
|
*/
|
|
63
|
-
export declare function createReactAgent({ llm, tools, prompt, }: CreateReactAgentParams): Promise<
|
|
64
|
-
steps: AgentStep[];
|
|
65
|
-
}, import("@langchain/core/agents").AgentAction | import("@langchain/core/agents").AgentFinish>>;
|
|
67
|
+
export declare function createReactAgent({ llm, tools, prompt, streamRunnable, }: CreateReactAgentParams): Promise<RunnableSingleActionAgent>;
|
|
@@ -2,6 +2,7 @@ import { RunnablePassthrough, RunnableSequence, } from "@langchain/core/runnable
|
|
|
2
2
|
import { renderTextDescription } from "../../tools/render.js";
|
|
3
3
|
import { formatLogToString } from "../format_scratchpad/log.js";
|
|
4
4
|
import { ReActSingleInputOutputParser } from "./output_parser.js";
|
|
5
|
+
import { RunnableSingleActionAgent } from "../agent.js";
|
|
5
6
|
/**
|
|
6
7
|
* Create an agent that uses ReAct prompting.
|
|
7
8
|
* @param params Params required to create the agent. Includes an LLM, tools, and prompt.
|
|
@@ -45,7 +46,7 @@ import { ReActSingleInputOutputParser } from "./output_parser.js";
|
|
|
45
46
|
* });
|
|
46
47
|
* ```
|
|
47
48
|
*/
|
|
48
|
-
export async function createReactAgent({ llm, tools, prompt, }) {
|
|
49
|
+
export async function createReactAgent({ llm, tools, prompt, streamRunnable, }) {
|
|
49
50
|
const missingVariables = ["tools", "tool_names", "agent_scratchpad"].filter((v) => !prompt.inputVariables.includes(v));
|
|
50
51
|
if (missingVariables.length > 0) {
|
|
51
52
|
throw new Error(`Provided prompt is missing required input variables: ${JSON.stringify(missingVariables)}`);
|
|
@@ -69,5 +70,9 @@ export async function createReactAgent({ llm, tools, prompt, }) {
|
|
|
69
70
|
toolNames,
|
|
70
71
|
}),
|
|
71
72
|
]);
|
|
72
|
-
return
|
|
73
|
+
return new RunnableSingleActionAgent({
|
|
74
|
+
runnable: agent,
|
|
75
|
+
defaultRunName: "ReactAgent",
|
|
76
|
+
streamRunnable,
|
|
77
|
+
});
|
|
73
78
|
}
|
|
@@ -210,7 +210,7 @@ exports.StructuredChatAgent = StructuredChatAgent;
|
|
|
210
210
|
* });
|
|
211
211
|
* ```
|
|
212
212
|
*/
|
|
213
|
-
async function createStructuredChatAgent({ llm, tools, prompt, }) {
|
|
213
|
+
async function createStructuredChatAgent({ llm, tools, prompt, streamRunnable, }) {
|
|
214
214
|
const missingVariables = ["tools", "tool_names", "agent_scratchpad"].filter((v) => !prompt.inputVariables.includes(v));
|
|
215
215
|
if (missingVariables.length > 0) {
|
|
216
216
|
throw new Error(`Provided prompt is missing required input variables: ${JSON.stringify(missingVariables)}`);
|
|
@@ -234,6 +234,10 @@ async function createStructuredChatAgent({ llm, tools, prompt, }) {
|
|
|
234
234
|
toolNames,
|
|
235
235
|
}),
|
|
236
236
|
]);
|
|
237
|
-
return
|
|
237
|
+
return new agent_js_1.RunnableSingleActionAgent({
|
|
238
|
+
runnable: agent,
|
|
239
|
+
defaultRunName: "StructuredChatAgent",
|
|
240
|
+
streamRunnable,
|
|
241
|
+
});
|
|
238
242
|
}
|
|
239
243
|
exports.createStructuredChatAgent = createStructuredChatAgent;
|
|
@@ -1,11 +1,10 @@
|
|
|
1
1
|
import type { StructuredToolInterface } from "@langchain/core/tools";
|
|
2
2
|
import type { BaseLanguageModelInterface } from "@langchain/core/language_models/base";
|
|
3
|
-
import { RunnableSequence } from "@langchain/core/runnables";
|
|
4
3
|
import type { BasePromptTemplate } from "@langchain/core/prompts";
|
|
5
4
|
import { BaseMessagePromptTemplate, ChatPromptTemplate } from "@langchain/core/prompts";
|
|
6
5
|
import { AgentStep } from "@langchain/core/agents";
|
|
7
6
|
import { Optional } from "../../types/type-utils.js";
|
|
8
|
-
import { Agent, AgentArgs, OutputParserArgs } from "../agent.js";
|
|
7
|
+
import { Agent, AgentArgs, OutputParserArgs, RunnableSingleActionAgent } from "../agent.js";
|
|
9
8
|
import { AgentInput } from "../types.js";
|
|
10
9
|
import { StructuredChatOutputParserWithRetries } from "./outputParser.js";
|
|
11
10
|
/**
|
|
@@ -106,6 +105,11 @@ export type CreateStructuredChatAgentParams = {
|
|
|
106
105
|
* `tools`, `tool_names`, and `agent_scratchpad`.
|
|
107
106
|
*/
|
|
108
107
|
prompt: BasePromptTemplate;
|
|
108
|
+
/**
|
|
109
|
+
* Whether to invoke the underlying model in streaming mode,
|
|
110
|
+
* allowing streaming of intermediate steps. Defaults to true.
|
|
111
|
+
*/
|
|
112
|
+
streamRunnable?: boolean;
|
|
109
113
|
};
|
|
110
114
|
/**
|
|
111
115
|
* Create an agent aimed at supporting tools with multiple inputs.
|
|
@@ -163,6 +167,4 @@ export type CreateStructuredChatAgentParams = {
|
|
|
163
167
|
* });
|
|
164
168
|
* ```
|
|
165
169
|
*/
|
|
166
|
-
export declare function createStructuredChatAgent({ llm, tools, prompt, }: CreateStructuredChatAgentParams): Promise<
|
|
167
|
-
steps: AgentStep[];
|
|
168
|
-
}, import("@langchain/core/agents").AgentAction | import("@langchain/core/agents").AgentFinish>>;
|
|
170
|
+
export declare function createStructuredChatAgent({ llm, tools, prompt, streamRunnable, }: CreateStructuredChatAgentParams): Promise<RunnableSingleActionAgent>;
|
|
@@ -2,7 +2,7 @@ import { zodToJsonSchema } from "zod-to-json-schema";
|
|
|
2
2
|
import { RunnablePassthrough, RunnableSequence, } from "@langchain/core/runnables";
|
|
3
3
|
import { ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate, PromptTemplate, } from "@langchain/core/prompts";
|
|
4
4
|
import { LLMChain } from "../../chains/llm_chain.js";
|
|
5
|
-
import { Agent } from "../agent.js";
|
|
5
|
+
import { Agent, RunnableSingleActionAgent, } from "../agent.js";
|
|
6
6
|
import { StructuredChatOutputParserWithRetries } from "./outputParser.js";
|
|
7
7
|
import { FORMAT_INSTRUCTIONS, PREFIX, SUFFIX } from "./prompt.js";
|
|
8
8
|
import { renderTextDescriptionAndArgs } from "../../tools/render.js";
|
|
@@ -206,7 +206,7 @@ export class StructuredChatAgent extends Agent {
|
|
|
206
206
|
* });
|
|
207
207
|
* ```
|
|
208
208
|
*/
|
|
209
|
-
export async function createStructuredChatAgent({ llm, tools, prompt, }) {
|
|
209
|
+
export async function createStructuredChatAgent({ llm, tools, prompt, streamRunnable, }) {
|
|
210
210
|
const missingVariables = ["tools", "tool_names", "agent_scratchpad"].filter((v) => !prompt.inputVariables.includes(v));
|
|
211
211
|
if (missingVariables.length > 0) {
|
|
212
212
|
throw new Error(`Provided prompt is missing required input variables: ${JSON.stringify(missingVariables)}`);
|
|
@@ -230,5 +230,9 @@ export async function createStructuredChatAgent({ llm, tools, prompt, }) {
|
|
|
230
230
|
toolNames,
|
|
231
231
|
}),
|
|
232
232
|
]);
|
|
233
|
-
return
|
|
233
|
+
return new RunnableSingleActionAgent({
|
|
234
|
+
runnable: agent,
|
|
235
|
+
defaultRunName: "StructuredChatAgent",
|
|
236
|
+
streamRunnable,
|
|
237
|
+
});
|
|
234
238
|
}
|
package/dist/agents/types.d.ts
CHANGED
|
@@ -16,16 +16,34 @@ export interface AgentInput {
|
|
|
16
16
|
allowedTools?: string[];
|
|
17
17
|
}
|
|
18
18
|
/**
|
|
19
|
-
* Interface defining the input for creating
|
|
20
|
-
*
|
|
19
|
+
* Interface defining the input for creating a single action agent
|
|
20
|
+
* that uses runnables.
|
|
21
21
|
*/
|
|
22
|
-
export interface
|
|
22
|
+
export interface RunnableSingleActionAgentInput {
|
|
23
|
+
runnable: Runnable<ChainValues & {
|
|
24
|
+
agent_scratchpad?: string | BaseMessage[];
|
|
25
|
+
stop?: string[];
|
|
26
|
+
}, AgentAction | AgentFinish>;
|
|
27
|
+
streamRunnable?: boolean;
|
|
28
|
+
defaultRunName?: string;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Interface defining the input for creating a multi-action agent that uses
|
|
32
|
+
* runnables. It includes the Runnable instance, and an optional list of
|
|
33
|
+
* stop strings.
|
|
34
|
+
*/
|
|
35
|
+
export interface RunnableMultiActionAgentInput {
|
|
23
36
|
runnable: Runnable<ChainValues & {
|
|
24
37
|
agent_scratchpad?: string | BaseMessage[];
|
|
25
38
|
stop?: string[];
|
|
26
39
|
}, AgentAction[] | AgentAction | AgentFinish>;
|
|
40
|
+
streamRunnable?: boolean;
|
|
41
|
+
defaultRunName?: string;
|
|
27
42
|
stop?: string[];
|
|
28
43
|
}
|
|
44
|
+
/** @deprecated Renamed to RunnableMultiActionAgentInput. */
|
|
45
|
+
export interface RunnableAgentInput extends RunnableMultiActionAgentInput {
|
|
46
|
+
}
|
|
29
47
|
/**
|
|
30
48
|
* Abstract class representing an output parser specifically for agent
|
|
31
49
|
* actions and finishes in LangChain. It extends the `BaseOutputParser`
|
|
@@ -156,7 +156,7 @@ exports.XMLAgent = XMLAgent;
|
|
|
156
156
|
* });
|
|
157
157
|
* ```
|
|
158
158
|
*/
|
|
159
|
-
async function createXmlAgent({ llm, tools, prompt, }) {
|
|
159
|
+
async function createXmlAgent({ llm, tools, prompt, streamRunnable, }) {
|
|
160
160
|
const missingVariables = ["tools", "agent_scratchpad"].filter((v) => !prompt.inputVariables.includes(v));
|
|
161
161
|
if (missingVariables.length > 0) {
|
|
162
162
|
throw new Error(`Provided prompt is missing required input variables: ${JSON.stringify(missingVariables)}`);
|
|
@@ -176,6 +176,10 @@ async function createXmlAgent({ llm, tools, prompt, }) {
|
|
|
176
176
|
llmWithStop,
|
|
177
177
|
new output_parser_js_1.XMLAgentOutputParser(),
|
|
178
178
|
]);
|
|
179
|
-
return
|
|
179
|
+
return new agent_js_1.RunnableSingleActionAgent({
|
|
180
|
+
runnable: agent,
|
|
181
|
+
defaultRunName: "XMLAgent",
|
|
182
|
+
streamRunnable,
|
|
183
|
+
});
|
|
180
184
|
}
|
|
181
185
|
exports.createXmlAgent = createXmlAgent;
|
|
@@ -1,13 +1,12 @@
|
|
|
1
1
|
import type { BaseLanguageModelInterface } from "@langchain/core/language_models/base";
|
|
2
2
|
import type { ToolInterface } from "@langchain/core/tools";
|
|
3
|
-
import { RunnableSequence } from "@langchain/core/runnables";
|
|
4
3
|
import type { BasePromptTemplate } from "@langchain/core/prompts";
|
|
5
4
|
import { AgentStep, AgentAction, AgentFinish } from "@langchain/core/agents";
|
|
6
5
|
import { ChainValues } from "@langchain/core/utils/types";
|
|
7
6
|
import { ChatPromptTemplate } from "@langchain/core/prompts";
|
|
8
7
|
import { CallbackManager } from "@langchain/core/callbacks/manager";
|
|
9
8
|
import { LLMChain } from "../../chains/llm_chain.js";
|
|
10
|
-
import { AgentArgs, BaseSingleActionAgent } from "../agent.js";
|
|
9
|
+
import { AgentArgs, BaseSingleActionAgent, RunnableSingleActionAgent } from "../agent.js";
|
|
11
10
|
import { XMLAgentOutputParser } from "./output_parser.js";
|
|
12
11
|
/**
|
|
13
12
|
* Interface for the input to the XMLAgent class.
|
|
@@ -62,6 +61,11 @@ export type CreateXmlAgentParams = {
|
|
|
62
61
|
* `tools` and `agent_scratchpad`.
|
|
63
62
|
*/
|
|
64
63
|
prompt: BasePromptTemplate;
|
|
64
|
+
/**
|
|
65
|
+
* Whether to invoke the underlying model in streaming mode,
|
|
66
|
+
* allowing streaming of intermediate steps. Defaults to true.
|
|
67
|
+
*/
|
|
68
|
+
streamRunnable?: boolean;
|
|
65
69
|
};
|
|
66
70
|
/**
|
|
67
71
|
* Create an agent that uses XML to format its logic.
|
|
@@ -113,6 +117,4 @@ export type CreateXmlAgentParams = {
|
|
|
113
117
|
* });
|
|
114
118
|
* ```
|
|
115
119
|
*/
|
|
116
|
-
export declare function createXmlAgent({ llm, tools, prompt, }: CreateXmlAgentParams): Promise<
|
|
117
|
-
steps: AgentStep[];
|
|
118
|
-
}, AgentAction | AgentFinish>>;
|
|
120
|
+
export declare function createXmlAgent({ llm, tools, prompt, streamRunnable, }: CreateXmlAgentParams): Promise<RunnableSingleActionAgent>;
|
package/dist/agents/xml/index.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { RunnablePassthrough, RunnableSequence, } from "@langchain/core/runnables";
|
|
2
2
|
import { AIMessagePromptTemplate, ChatPromptTemplate, HumanMessagePromptTemplate, } from "@langchain/core/prompts";
|
|
3
3
|
import { LLMChain } from "../../chains/llm_chain.js";
|
|
4
|
-
import { BaseSingleActionAgent } from "../agent.js";
|
|
4
|
+
import { BaseSingleActionAgent, RunnableSingleActionAgent, } from "../agent.js";
|
|
5
5
|
import { AGENT_INSTRUCTIONS } from "./prompt.js";
|
|
6
6
|
import { XMLAgentOutputParser } from "./output_parser.js";
|
|
7
7
|
import { renderTextDescription } from "../../tools/render.js";
|
|
@@ -152,7 +152,7 @@ export class XMLAgent extends BaseSingleActionAgent {
|
|
|
152
152
|
* });
|
|
153
153
|
* ```
|
|
154
154
|
*/
|
|
155
|
-
export async function createXmlAgent({ llm, tools, prompt, }) {
|
|
155
|
+
export async function createXmlAgent({ llm, tools, prompt, streamRunnable, }) {
|
|
156
156
|
const missingVariables = ["tools", "agent_scratchpad"].filter((v) => !prompt.inputVariables.includes(v));
|
|
157
157
|
if (missingVariables.length > 0) {
|
|
158
158
|
throw new Error(`Provided prompt is missing required input variables: ${JSON.stringify(missingVariables)}`);
|
|
@@ -172,5 +172,9 @@ export async function createXmlAgent({ llm, tools, prompt, }) {
|
|
|
172
172
|
llmWithStop,
|
|
173
173
|
new XMLAgentOutputParser(),
|
|
174
174
|
]);
|
|
175
|
-
return
|
|
175
|
+
return new RunnableSingleActionAgent({
|
|
176
|
+
runnable: agent,
|
|
177
|
+
defaultRunName: "XMLAgent",
|
|
178
|
+
streamRunnable,
|
|
179
|
+
});
|
|
176
180
|
}
|
|
@@ -116,6 +116,30 @@ class UnstructuredLoader extends base_js_1.BaseDocumentLoader {
|
|
|
116
116
|
writable: true,
|
|
117
117
|
value: void 0
|
|
118
118
|
});
|
|
119
|
+
Object.defineProperty(this, "multiPageSections", {
|
|
120
|
+
enumerable: true,
|
|
121
|
+
configurable: true,
|
|
122
|
+
writable: true,
|
|
123
|
+
value: void 0
|
|
124
|
+
});
|
|
125
|
+
Object.defineProperty(this, "combineUnderNChars", {
|
|
126
|
+
enumerable: true,
|
|
127
|
+
configurable: true,
|
|
128
|
+
writable: true,
|
|
129
|
+
value: void 0
|
|
130
|
+
});
|
|
131
|
+
Object.defineProperty(this, "newAfterNChars", {
|
|
132
|
+
enumerable: true,
|
|
133
|
+
configurable: true,
|
|
134
|
+
writable: true,
|
|
135
|
+
value: void 0
|
|
136
|
+
});
|
|
137
|
+
Object.defineProperty(this, "maxCharacters", {
|
|
138
|
+
enumerable: true,
|
|
139
|
+
configurable: true,
|
|
140
|
+
writable: true,
|
|
141
|
+
value: void 0
|
|
142
|
+
});
|
|
119
143
|
// Temporary shim to avoid breaking existing users
|
|
120
144
|
// Remove when API keys are enforced by Unstructured and existing code will break anyway
|
|
121
145
|
const isLegacySyntax = typeof optionsOrLegacyFilePath === "string";
|
|
@@ -138,6 +162,10 @@ class UnstructuredLoader extends base_js_1.BaseDocumentLoader {
|
|
|
138
162
|
this.hiResModelName = options.hiResModelName;
|
|
139
163
|
this.includePageBreaks = options.includePageBreaks;
|
|
140
164
|
this.chunkingStrategy = options.chunkingStrategy;
|
|
165
|
+
this.multiPageSections = options.multiPageSections;
|
|
166
|
+
this.combineUnderNChars = options.combineUnderNChars;
|
|
167
|
+
this.newAfterNChars = options.newAfterNChars;
|
|
168
|
+
this.maxCharacters = options.maxCharacters;
|
|
141
169
|
}
|
|
142
170
|
}
|
|
143
171
|
async _partition() {
|
|
@@ -177,6 +205,18 @@ class UnstructuredLoader extends base_js_1.BaseDocumentLoader {
|
|
|
177
205
|
if (this.chunkingStrategy) {
|
|
178
206
|
formData.append("chunking_strategy", this.chunkingStrategy);
|
|
179
207
|
}
|
|
208
|
+
if (this.multiPageSections !== undefined) {
|
|
209
|
+
formData.append("multipage_sections", this.multiPageSections ? "true" : "false");
|
|
210
|
+
}
|
|
211
|
+
if (this.combineUnderNChars !== undefined) {
|
|
212
|
+
formData.append("combine_under_n_chars", String(this.combineUnderNChars));
|
|
213
|
+
}
|
|
214
|
+
if (this.newAfterNChars !== undefined) {
|
|
215
|
+
formData.append("new_after_n_chars", String(this.newAfterNChars));
|
|
216
|
+
}
|
|
217
|
+
if (this.maxCharacters !== undefined) {
|
|
218
|
+
formData.append("max_characters", String(this.maxCharacters));
|
|
219
|
+
}
|
|
180
220
|
const headers = {
|
|
181
221
|
"UNSTRUCTURED-API-KEY": this.apiKey ?? "",
|
|
182
222
|
};
|
|
@@ -54,6 +54,10 @@ export type UnstructuredLoaderOptions = {
|
|
|
54
54
|
hiResModelName?: StringWithAutocomplete<HiResModelName>;
|
|
55
55
|
includePageBreaks?: boolean;
|
|
56
56
|
chunkingStrategy?: StringWithAutocomplete<ChunkingStrategy>;
|
|
57
|
+
multiPageSections?: boolean;
|
|
58
|
+
combineUnderNChars?: number;
|
|
59
|
+
newAfterNChars?: number;
|
|
60
|
+
maxCharacters?: number;
|
|
57
61
|
};
|
|
58
62
|
type UnstructuredDirectoryLoaderOptions = UnstructuredLoaderOptions & {
|
|
59
63
|
recursive?: boolean;
|
|
@@ -81,6 +85,10 @@ export declare class UnstructuredLoader extends BaseDocumentLoader {
|
|
|
81
85
|
private hiResModelName?;
|
|
82
86
|
private includePageBreaks?;
|
|
83
87
|
private chunkingStrategy?;
|
|
88
|
+
private multiPageSections?;
|
|
89
|
+
private combineUnderNChars?;
|
|
90
|
+
private newAfterNChars?;
|
|
91
|
+
private maxCharacters?;
|
|
84
92
|
constructor(filePathOrLegacyApiUrl: string, optionsOrLegacyFilePath?: UnstructuredLoaderOptions | string);
|
|
85
93
|
_partition(): Promise<Element[]>;
|
|
86
94
|
load(): Promise<Document[]>;
|
|
@@ -112,6 +112,30 @@ export class UnstructuredLoader extends BaseDocumentLoader {
|
|
|
112
112
|
writable: true,
|
|
113
113
|
value: void 0
|
|
114
114
|
});
|
|
115
|
+
Object.defineProperty(this, "multiPageSections", {
|
|
116
|
+
enumerable: true,
|
|
117
|
+
configurable: true,
|
|
118
|
+
writable: true,
|
|
119
|
+
value: void 0
|
|
120
|
+
});
|
|
121
|
+
Object.defineProperty(this, "combineUnderNChars", {
|
|
122
|
+
enumerable: true,
|
|
123
|
+
configurable: true,
|
|
124
|
+
writable: true,
|
|
125
|
+
value: void 0
|
|
126
|
+
});
|
|
127
|
+
Object.defineProperty(this, "newAfterNChars", {
|
|
128
|
+
enumerable: true,
|
|
129
|
+
configurable: true,
|
|
130
|
+
writable: true,
|
|
131
|
+
value: void 0
|
|
132
|
+
});
|
|
133
|
+
Object.defineProperty(this, "maxCharacters", {
|
|
134
|
+
enumerable: true,
|
|
135
|
+
configurable: true,
|
|
136
|
+
writable: true,
|
|
137
|
+
value: void 0
|
|
138
|
+
});
|
|
115
139
|
// Temporary shim to avoid breaking existing users
|
|
116
140
|
// Remove when API keys are enforced by Unstructured and existing code will break anyway
|
|
117
141
|
const isLegacySyntax = typeof optionsOrLegacyFilePath === "string";
|
|
@@ -134,6 +158,10 @@ export class UnstructuredLoader extends BaseDocumentLoader {
|
|
|
134
158
|
this.hiResModelName = options.hiResModelName;
|
|
135
159
|
this.includePageBreaks = options.includePageBreaks;
|
|
136
160
|
this.chunkingStrategy = options.chunkingStrategy;
|
|
161
|
+
this.multiPageSections = options.multiPageSections;
|
|
162
|
+
this.combineUnderNChars = options.combineUnderNChars;
|
|
163
|
+
this.newAfterNChars = options.newAfterNChars;
|
|
164
|
+
this.maxCharacters = options.maxCharacters;
|
|
137
165
|
}
|
|
138
166
|
}
|
|
139
167
|
async _partition() {
|
|
@@ -173,6 +201,18 @@ export class UnstructuredLoader extends BaseDocumentLoader {
|
|
|
173
201
|
if (this.chunkingStrategy) {
|
|
174
202
|
formData.append("chunking_strategy", this.chunkingStrategy);
|
|
175
203
|
}
|
|
204
|
+
if (this.multiPageSections !== undefined) {
|
|
205
|
+
formData.append("multipage_sections", this.multiPageSections ? "true" : "false");
|
|
206
|
+
}
|
|
207
|
+
if (this.combineUnderNChars !== undefined) {
|
|
208
|
+
formData.append("combine_under_n_chars", String(this.combineUnderNChars));
|
|
209
|
+
}
|
|
210
|
+
if (this.newAfterNChars !== undefined) {
|
|
211
|
+
formData.append("new_after_n_chars", String(this.newAfterNChars));
|
|
212
|
+
}
|
|
213
|
+
if (this.maxCharacters !== undefined) {
|
|
214
|
+
formData.append("max_characters", String(this.maxCharacters));
|
|
215
|
+
}
|
|
176
216
|
const headers = {
|
|
177
217
|
"UNSTRUCTURED-API-KEY": this.apiKey ?? "",
|
|
178
218
|
};
|
|
@@ -23,6 +23,13 @@ class GitbookLoader extends cheerio_js_1.CheerioWebBaseLoader {
|
|
|
23
23
|
writable: true,
|
|
24
24
|
value: false
|
|
25
25
|
});
|
|
26
|
+
Object.defineProperty(this, "baseUrl", {
|
|
27
|
+
enumerable: true,
|
|
28
|
+
configurable: true,
|
|
29
|
+
writable: true,
|
|
30
|
+
value: void 0
|
|
31
|
+
});
|
|
32
|
+
this.baseUrl = webPath;
|
|
26
33
|
this.webPath = path;
|
|
27
34
|
this.shouldLoadAllPaths =
|
|
28
35
|
params.shouldLoadAllPaths ?? this.shouldLoadAllPaths;
|
|
@@ -84,9 +91,10 @@ class GitbookLoader extends cheerio_js_1.CheerioWebBaseLoader {
|
|
|
84
91
|
.map((element) => $(element).text());
|
|
85
92
|
const documents = [];
|
|
86
93
|
for (const url of urls) {
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
94
|
+
const buildUrl = url.includes(this.baseUrl) ? url : this.baseUrl + url;
|
|
95
|
+
console.log(`Fetching text from ${buildUrl}`);
|
|
96
|
+
const html = await GitbookLoader._scrape(buildUrl, this.caller, this.timeout);
|
|
97
|
+
documents.push(...this.loadPath(html, buildUrl));
|
|
90
98
|
}
|
|
91
99
|
console.log(`Fetched ${documents.length} documents.`);
|
|
92
100
|
return documents;
|
|
@@ -15,6 +15,7 @@ interface GitbookLoaderParams {
|
|
|
15
15
|
export declare class GitbookLoader extends CheerioWebBaseLoader {
|
|
16
16
|
webPath: string;
|
|
17
17
|
shouldLoadAllPaths: boolean;
|
|
18
|
+
private readonly baseUrl;
|
|
18
19
|
constructor(webPath: string, params?: GitbookLoaderParams);
|
|
19
20
|
/**
|
|
20
21
|
* Method that scrapes the web document using Cheerio and loads the
|
|
@@ -20,6 +20,13 @@ export class GitbookLoader extends CheerioWebBaseLoader {
|
|
|
20
20
|
writable: true,
|
|
21
21
|
value: false
|
|
22
22
|
});
|
|
23
|
+
Object.defineProperty(this, "baseUrl", {
|
|
24
|
+
enumerable: true,
|
|
25
|
+
configurable: true,
|
|
26
|
+
writable: true,
|
|
27
|
+
value: void 0
|
|
28
|
+
});
|
|
29
|
+
this.baseUrl = webPath;
|
|
23
30
|
this.webPath = path;
|
|
24
31
|
this.shouldLoadAllPaths =
|
|
25
32
|
params.shouldLoadAllPaths ?? this.shouldLoadAllPaths;
|
|
@@ -81,9 +88,10 @@ export class GitbookLoader extends CheerioWebBaseLoader {
|
|
|
81
88
|
.map((element) => $(element).text());
|
|
82
89
|
const documents = [];
|
|
83
90
|
for (const url of urls) {
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
91
|
+
const buildUrl = url.includes(this.baseUrl) ? url : this.baseUrl + url;
|
|
92
|
+
console.log(`Fetching text from ${buildUrl}`);
|
|
93
|
+
const html = await GitbookLoader._scrape(buildUrl, this.caller, this.timeout);
|
|
94
|
+
documents.push(...this.loadPath(html, buildUrl));
|
|
87
95
|
}
|
|
88
96
|
console.log(`Fetched ${documents.length} documents.`);
|
|
89
97
|
return documents;
|
|
@@ -4,6 +4,9 @@ exports.OutputFixingParser = void 0;
|
|
|
4
4
|
const output_parsers_1 = require("@langchain/core/output_parsers");
|
|
5
5
|
const llm_chain_js_1 = require("../chains/llm_chain.cjs");
|
|
6
6
|
const prompts_js_1 = require("./prompts.cjs");
|
|
7
|
+
function isLLMChain(x) {
|
|
8
|
+
return (x.prompt !== undefined && x.llm !== undefined);
|
|
9
|
+
}
|
|
7
10
|
/**
|
|
8
11
|
* Class that extends the BaseOutputParser to handle situations where the
|
|
9
12
|
* initial parsing attempt fails. It contains a retryChain for retrying
|
|
@@ -70,13 +73,22 @@ class OutputFixingParser extends output_parsers_1.BaseOutputParser {
|
|
|
70
73
|
catch (e) {
|
|
71
74
|
// eslint-disable-next-line no-instanceof/no-instanceof
|
|
72
75
|
if (e instanceof output_parsers_1.OutputParserException) {
|
|
73
|
-
const
|
|
76
|
+
const retryInput = {
|
|
74
77
|
instructions: this.parser.getFormatInstructions(),
|
|
75
78
|
completion,
|
|
76
79
|
error: e,
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
|
|
80
|
+
};
|
|
81
|
+
if (isLLMChain(this.retryChain)) {
|
|
82
|
+
const result = await this.retryChain.call(retryInput, callbacks);
|
|
83
|
+
const newCompletion = result[this.retryChain.outputKey];
|
|
84
|
+
return this.parser.parse(newCompletion, callbacks);
|
|
85
|
+
}
|
|
86
|
+
else {
|
|
87
|
+
const result = await this.retryChain.invoke(retryInput, {
|
|
88
|
+
callbacks,
|
|
89
|
+
});
|
|
90
|
+
return result;
|
|
91
|
+
}
|
|
80
92
|
}
|
|
81
93
|
throw e;
|
|
82
94
|
}
|
|
@@ -1,8 +1,14 @@
|
|
|
1
1
|
import type { BaseLanguageModelInterface } from "@langchain/core/language_models/base";
|
|
2
2
|
import { Callbacks } from "@langchain/core/callbacks/manager";
|
|
3
|
-
import { BaseOutputParser } from "@langchain/core/output_parsers";
|
|
3
|
+
import { BaseOutputParser, OutputParserException } from "@langchain/core/output_parsers";
|
|
4
4
|
import { BasePromptTemplate } from "@langchain/core/prompts";
|
|
5
|
+
import { Runnable } from "@langchain/core/runnables";
|
|
5
6
|
import { LLMChain } from "../chains/llm_chain.js";
|
|
7
|
+
interface OutputFixingParserRetryInput {
|
|
8
|
+
instructions: string;
|
|
9
|
+
completion: string;
|
|
10
|
+
error: OutputParserException;
|
|
11
|
+
}
|
|
6
12
|
/**
|
|
7
13
|
* Class that extends the BaseOutputParser to handle situations where the
|
|
8
14
|
* initial parsing attempt fails. It contains a retryChain for retrying
|
|
@@ -13,7 +19,7 @@ export declare class OutputFixingParser<T> extends BaseOutputParser<T> {
|
|
|
13
19
|
lc_namespace: string[];
|
|
14
20
|
lc_serializable: boolean;
|
|
15
21
|
parser: BaseOutputParser<T>;
|
|
16
|
-
retryChain: LLMChain
|
|
22
|
+
retryChain: LLMChain | Runnable<OutputFixingParserRetryInput, T>;
|
|
17
23
|
/**
|
|
18
24
|
* Static method to create a new instance of OutputFixingParser using a
|
|
19
25
|
* given language model, parser, and optional fields.
|
|
@@ -27,7 +33,7 @@ export declare class OutputFixingParser<T> extends BaseOutputParser<T> {
|
|
|
27
33
|
}): OutputFixingParser<T>;
|
|
28
34
|
constructor({ parser, retryChain, }: {
|
|
29
35
|
parser: BaseOutputParser<T>;
|
|
30
|
-
retryChain: LLMChain
|
|
36
|
+
retryChain: LLMChain | Runnable<OutputFixingParserRetryInput, T>;
|
|
31
37
|
});
|
|
32
38
|
/**
|
|
33
39
|
* Method to parse the completion using the parser. If the initial parsing
|
|
@@ -44,3 +50,4 @@ export declare class OutputFixingParser<T> extends BaseOutputParser<T> {
|
|
|
44
50
|
*/
|
|
45
51
|
getFormatInstructions(): string;
|
|
46
52
|
}
|
|
53
|
+
export {};
|
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
import { BaseOutputParser, OutputParserException, } from "@langchain/core/output_parsers";
|
|
2
2
|
import { LLMChain } from "../chains/llm_chain.js";
|
|
3
3
|
import { NAIVE_FIX_PROMPT } from "./prompts.js";
|
|
4
|
+
function isLLMChain(x) {
|
|
5
|
+
return (x.prompt !== undefined && x.llm !== undefined);
|
|
6
|
+
}
|
|
4
7
|
/**
|
|
5
8
|
* Class that extends the BaseOutputParser to handle situations where the
|
|
6
9
|
* initial parsing attempt fails. It contains a retryChain for retrying
|
|
@@ -67,13 +70,22 @@ export class OutputFixingParser extends BaseOutputParser {
|
|
|
67
70
|
catch (e) {
|
|
68
71
|
// eslint-disable-next-line no-instanceof/no-instanceof
|
|
69
72
|
if (e instanceof OutputParserException) {
|
|
70
|
-
const
|
|
73
|
+
const retryInput = {
|
|
71
74
|
instructions: this.parser.getFormatInstructions(),
|
|
72
75
|
completion,
|
|
73
76
|
error: e,
|
|
74
|
-
}
|
|
75
|
-
|
|
76
|
-
|
|
77
|
+
};
|
|
78
|
+
if (isLLMChain(this.retryChain)) {
|
|
79
|
+
const result = await this.retryChain.call(retryInput, callbacks);
|
|
80
|
+
const newCompletion = result[this.retryChain.outputKey];
|
|
81
|
+
return this.parser.parse(newCompletion, callbacks);
|
|
82
|
+
}
|
|
83
|
+
else {
|
|
84
|
+
const result = await this.retryChain.invoke(retryInput, {
|
|
85
|
+
callbacks,
|
|
86
|
+
});
|
|
87
|
+
return result;
|
|
88
|
+
}
|
|
77
89
|
}
|
|
78
90
|
throw e;
|
|
79
91
|
}
|
|
@@ -153,9 +153,10 @@ class ParentDocumentRetriever extends multi_vector_js_1.MultiVectorRetriever {
|
|
|
153
153
|
* This can be false if and only if `ids` are provided. You may want
|
|
154
154
|
* to set this to False if the documents are already in the docstore
|
|
155
155
|
* and you don't want to re-add them.
|
|
156
|
+
* @param config.chunkHeaderOptions Object with options for adding Contextual chunk headers
|
|
156
157
|
*/
|
|
157
158
|
async addDocuments(docs, config) {
|
|
158
|
-
const { ids, addToDocstore = true } = config ?? {};
|
|
159
|
+
const { ids, addToDocstore = true, childDocChunkHeaderOptions = {}, } = config ?? {};
|
|
159
160
|
const parentDocs = this.parentSplitter
|
|
160
161
|
? await this.parentSplitter.splitDocuments(docs)
|
|
161
162
|
: docs;
|
|
@@ -177,7 +178,7 @@ class ParentDocumentRetriever extends multi_vector_js_1.MultiVectorRetriever {
|
|
|
177
178
|
for (let i = 0; i < parentDocs.length; i += 1) {
|
|
178
179
|
const parentDoc = parentDocs[i];
|
|
179
180
|
const parentDocId = parentDocIds[i];
|
|
180
|
-
const subDocs = await this.childSplitter.splitDocuments([parentDoc]);
|
|
181
|
+
const subDocs = await this.childSplitter.splitDocuments([parentDoc], childDocChunkHeaderOptions);
|
|
181
182
|
const taggedSubDocs = subDocs.map((subDoc) => new documents_1.Document({
|
|
182
183
|
pageContent: subDoc.pageContent,
|
|
183
184
|
metadata: { ...subDoc.metadata, [this.idKey]: parentDocId },
|