langchain 0.0.197 → 0.0.199
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/agents/index.d.ts +1 -1
- package/dist/chains/conversational_retrieval_chain.cjs +16 -2
- package/dist/chains/conversational_retrieval_chain.d.ts +2 -0
- package/dist/chains/conversational_retrieval_chain.js +16 -2
- package/dist/chat_models/llama_cpp.cjs +45 -0
- package/dist/chat_models/llama_cpp.d.ts +4 -1
- package/dist/chat_models/llama_cpp.js +45 -0
- package/dist/document_loaders/fs/chatgpt.cjs +85 -0
- package/dist/document_loaders/fs/chatgpt.d.ts +8 -0
- package/dist/document_loaders/fs/chatgpt.js +81 -0
- package/dist/document_loaders/fs/pptx.cjs +39 -0
- package/dist/document_loaders/fs/pptx.d.ts +23 -0
- package/dist/document_loaders/fs/pptx.js +35 -0
- package/dist/document_loaders/web/confluence.cjs +31 -7
- package/dist/document_loaders/web/confluence.d.ts +12 -5
- package/dist/document_loaders/web/confluence.js +31 -7
- package/dist/experimental/openai_assistant/index.cjs +32 -0
- package/dist/experimental/openai_assistant/index.d.ts +26 -0
- package/dist/experimental/openai_assistant/index.js +32 -0
- package/dist/experimental/tools/pyinterpreter.cjs +248 -0
- package/dist/experimental/tools/pyinterpreter.d.ts +18 -0
- package/dist/experimental/tools/pyinterpreter.js +244 -0
- package/dist/graphs/neo4j_graph.cjs +49 -14
- package/dist/graphs/neo4j_graph.d.ts +30 -0
- package/dist/graphs/neo4j_graph.js +49 -14
- package/dist/llms/gradient_ai.cjs +98 -0
- package/dist/llms/gradient_ai.d.ts +50 -0
- package/dist/llms/gradient_ai.js +94 -0
- package/dist/llms/hf.cjs +13 -2
- package/dist/llms/hf.d.ts +5 -0
- package/dist/llms/hf.js +13 -2
- package/dist/llms/llama_cpp.cjs +17 -3
- package/dist/llms/llama_cpp.d.ts +4 -1
- package/dist/llms/llama_cpp.js +17 -3
- package/dist/llms/watsonx_ai.cjs +154 -0
- package/dist/llms/watsonx_ai.d.ts +72 -0
- package/dist/llms/watsonx_ai.js +150 -0
- package/dist/load/import_constants.cjs +6 -0
- package/dist/load/import_constants.js +6 -0
- package/dist/load/import_map.cjs +4 -3
- package/dist/load/import_map.d.ts +1 -0
- package/dist/load/import_map.js +1 -0
- package/dist/output_parsers/json.cjs +4 -0
- package/dist/output_parsers/json.js +4 -0
- package/dist/tools/google_places.cjs +81 -0
- package/dist/tools/google_places.d.ts +21 -0
- package/dist/tools/google_places.js +77 -0
- package/dist/vectorstores/clickhouse.cjs +286 -0
- package/dist/vectorstores/clickhouse.d.ts +126 -0
- package/dist/vectorstores/clickhouse.js +259 -0
- package/dist/vectorstores/elasticsearch.cjs +16 -3
- package/dist/vectorstores/elasticsearch.d.ts +6 -2
- package/dist/vectorstores/elasticsearch.js +16 -3
- package/dist/vectorstores/pgvector.cjs +142 -18
- package/dist/vectorstores/pgvector.d.ts +21 -0
- package/dist/vectorstores/pgvector.js +142 -18
- package/dist/vectorstores/prisma.cjs +1 -1
- package/dist/vectorstores/prisma.js +1 -1
- package/dist/vectorstores/weaviate.cjs +45 -2
- package/dist/vectorstores/weaviate.d.ts +27 -1
- package/dist/vectorstores/weaviate.js +45 -2
- package/dist/vectorstores/xata.cjs +3 -2
- package/dist/vectorstores/xata.js +3 -2
- package/document_loaders/fs/chatgpt.cjs +1 -0
- package/document_loaders/fs/chatgpt.d.ts +1 -0
- package/document_loaders/fs/chatgpt.js +1 -0
- package/document_loaders/fs/pptx.cjs +1 -0
- package/document_loaders/fs/pptx.d.ts +1 -0
- package/document_loaders/fs/pptx.js +1 -0
- package/experimental/tools/pyinterpreter.cjs +1 -0
- package/experimental/tools/pyinterpreter.d.ts +1 -0
- package/experimental/tools/pyinterpreter.js +1 -0
- package/llms/gradient_ai.cjs +1 -0
- package/llms/gradient_ai.d.ts +1 -0
- package/llms/gradient_ai.js +1 -0
- package/llms/watsonx_ai.cjs +1 -0
- package/llms/watsonx_ai.d.ts +1 -0
- package/llms/watsonx_ai.js +1 -0
- package/package.json +87 -13
- package/tools/google_places.cjs +1 -0
- package/tools/google_places.d.ts +1 -0
- package/tools/google_places.js +1 -0
- package/vectorstores/clickhouse.cjs +1 -0
- package/vectorstores/clickhouse.d.ts +1 -0
- package/vectorstores/clickhouse.js +1 -0
package/dist/agents/index.d.ts
CHANGED
|
@@ -6,7 +6,7 @@ export { ChatAgentOutputParser } from "./chat/outputParser.js";
|
|
|
6
6
|
export { ChatConversationalAgent, type ChatConversationalAgentInput, type ChatConversationalCreatePromptArgs, } from "./chat_convo/index.js";
|
|
7
7
|
export { ChatConversationalAgentOutputParser, type ChatConversationalAgentOutputParserArgs, ChatConversationalAgentOutputParserWithRetries, type ChatConversationalAgentOutputParserFormatInstructionsOptions, } from "./chat_convo/outputParser.js";
|
|
8
8
|
export { AgentExecutor, type AgentExecutorInput } from "./executor.js";
|
|
9
|
-
export { initializeAgentExecutor, initializeAgentExecutorWithOptions, type InitializeAgentExecutorOptions, } from "./initialize.js";
|
|
9
|
+
export { initializeAgentExecutor, initializeAgentExecutorWithOptions, type InitializeAgentExecutorOptions, type InitializeAgentExecutorOptionsStructured, } from "./initialize.js";
|
|
10
10
|
export { ZeroShotAgent, type ZeroShotAgentInput, type ZeroShotCreatePromptArgs, } from "./mrkl/index.js";
|
|
11
11
|
export { ZeroShotAgentOutputParser } from "./mrkl/outputParser.js";
|
|
12
12
|
export { AgentActionOutputParser, type AgentInput, type SerializedAgent, type SerializedAgentT, type SerializedZeroShotAgent, type StoppingMethod, } from "./types.js";
|
|
@@ -94,12 +94,20 @@ class ConversationalRetrievalQAChain extends base_js_1.BaseChain {
|
|
|
94
94
|
writable: true,
|
|
95
95
|
value: false
|
|
96
96
|
});
|
|
97
|
+
Object.defineProperty(this, "returnGeneratedQuestion", {
|
|
98
|
+
enumerable: true,
|
|
99
|
+
configurable: true,
|
|
100
|
+
writable: true,
|
|
101
|
+
value: false
|
|
102
|
+
});
|
|
97
103
|
this.retriever = fields.retriever;
|
|
98
104
|
this.combineDocumentsChain = fields.combineDocumentsChain;
|
|
99
105
|
this.questionGeneratorChain = fields.questionGeneratorChain;
|
|
100
106
|
this.inputKey = fields.inputKey ?? this.inputKey;
|
|
101
107
|
this.returnSourceDocuments =
|
|
102
108
|
fields.returnSourceDocuments ?? this.returnSourceDocuments;
|
|
109
|
+
this.returnGeneratedQuestion =
|
|
110
|
+
fields.returnGeneratedQuestion ?? this.returnGeneratedQuestion;
|
|
103
111
|
}
|
|
104
112
|
/**
|
|
105
113
|
* Static method to convert the chat history input into a formatted
|
|
@@ -172,13 +180,19 @@ class ConversationalRetrievalQAChain extends base_js_1.BaseChain {
|
|
|
172
180
|
input_documents: docs,
|
|
173
181
|
chat_history: chatHistory,
|
|
174
182
|
};
|
|
175
|
-
|
|
183
|
+
let result = await this.combineDocumentsChain.call(inputs, runManager?.getChild("combine_documents"));
|
|
176
184
|
if (this.returnSourceDocuments) {
|
|
177
|
-
|
|
185
|
+
result = {
|
|
178
186
|
...result,
|
|
179
187
|
sourceDocuments: docs,
|
|
180
188
|
};
|
|
181
189
|
}
|
|
190
|
+
if (this.returnGeneratedQuestion) {
|
|
191
|
+
result = {
|
|
192
|
+
...result,
|
|
193
|
+
generatedQuestion: newQuestion,
|
|
194
|
+
};
|
|
195
|
+
}
|
|
182
196
|
return result;
|
|
183
197
|
}
|
|
184
198
|
_chainType() {
|
|
@@ -16,6 +16,7 @@ export interface ConversationalRetrievalQAChainInput extends ChainInputs {
|
|
|
16
16
|
combineDocumentsChain: BaseChain;
|
|
17
17
|
questionGeneratorChain: LLMChain;
|
|
18
18
|
returnSourceDocuments?: boolean;
|
|
19
|
+
returnGeneratedQuestion?: boolean;
|
|
19
20
|
inputKey?: string;
|
|
20
21
|
}
|
|
21
22
|
/**
|
|
@@ -62,6 +63,7 @@ export declare class ConversationalRetrievalQAChain extends BaseChain implements
|
|
|
62
63
|
combineDocumentsChain: BaseChain;
|
|
63
64
|
questionGeneratorChain: LLMChain;
|
|
64
65
|
returnSourceDocuments: boolean;
|
|
66
|
+
returnGeneratedQuestion: boolean;
|
|
65
67
|
constructor(fields: ConversationalRetrievalQAChainInput);
|
|
66
68
|
/**
|
|
67
69
|
* Static method to convert the chat history input into a formatted
|
|
@@ -91,12 +91,20 @@ export class ConversationalRetrievalQAChain extends BaseChain {
|
|
|
91
91
|
writable: true,
|
|
92
92
|
value: false
|
|
93
93
|
});
|
|
94
|
+
Object.defineProperty(this, "returnGeneratedQuestion", {
|
|
95
|
+
enumerable: true,
|
|
96
|
+
configurable: true,
|
|
97
|
+
writable: true,
|
|
98
|
+
value: false
|
|
99
|
+
});
|
|
94
100
|
this.retriever = fields.retriever;
|
|
95
101
|
this.combineDocumentsChain = fields.combineDocumentsChain;
|
|
96
102
|
this.questionGeneratorChain = fields.questionGeneratorChain;
|
|
97
103
|
this.inputKey = fields.inputKey ?? this.inputKey;
|
|
98
104
|
this.returnSourceDocuments =
|
|
99
105
|
fields.returnSourceDocuments ?? this.returnSourceDocuments;
|
|
106
|
+
this.returnGeneratedQuestion =
|
|
107
|
+
fields.returnGeneratedQuestion ?? this.returnGeneratedQuestion;
|
|
100
108
|
}
|
|
101
109
|
/**
|
|
102
110
|
* Static method to convert the chat history input into a formatted
|
|
@@ -169,13 +177,19 @@ export class ConversationalRetrievalQAChain extends BaseChain {
|
|
|
169
177
|
input_documents: docs,
|
|
170
178
|
chat_history: chatHistory,
|
|
171
179
|
};
|
|
172
|
-
|
|
180
|
+
let result = await this.combineDocumentsChain.call(inputs, runManager?.getChild("combine_documents"));
|
|
173
181
|
if (this.returnSourceDocuments) {
|
|
174
|
-
|
|
182
|
+
result = {
|
|
175
183
|
...result,
|
|
176
184
|
sourceDocuments: docs,
|
|
177
185
|
};
|
|
178
186
|
}
|
|
187
|
+
if (this.returnGeneratedQuestion) {
|
|
188
|
+
result = {
|
|
189
|
+
...result,
|
|
190
|
+
generatedQuestion: newQuestion,
|
|
191
|
+
};
|
|
192
|
+
}
|
|
179
193
|
return result;
|
|
180
194
|
}
|
|
181
195
|
_chainType() {
|
|
@@ -4,6 +4,7 @@ exports.ChatLlamaCpp = void 0;
|
|
|
4
4
|
const node_llama_cpp_1 = require("node-llama-cpp");
|
|
5
5
|
const base_js_1 = require("./base.cjs");
|
|
6
6
|
const llama_cpp_js_1 = require("../util/llama_cpp.cjs");
|
|
7
|
+
const index_js_1 = require("../schema/index.cjs");
|
|
7
8
|
/**
|
|
8
9
|
* To use this model you need to have the `node-llama-cpp` module installed.
|
|
9
10
|
* This can be installed using `npm install -S node-llama-cpp` and the minimum
|
|
@@ -139,6 +140,25 @@ class ChatLlamaCpp extends base_js_1.SimpleChatModel {
|
|
|
139
140
|
throw new Error("Error getting prompt completion.");
|
|
140
141
|
}
|
|
141
142
|
}
|
|
143
|
+
async *_streamResponseChunks(input, _options, runManager) {
|
|
144
|
+
const promptOptions = {
|
|
145
|
+
temperature: this?.temperature,
|
|
146
|
+
topK: this?.topK,
|
|
147
|
+
topP: this?.topP,
|
|
148
|
+
};
|
|
149
|
+
const prompt = this._buildPrompt(input);
|
|
150
|
+
const stream = await this.caller.call(async () => this._context.evaluate(this._context.encode(prompt), promptOptions));
|
|
151
|
+
for await (const chunk of stream) {
|
|
152
|
+
yield new index_js_1.ChatGenerationChunk({
|
|
153
|
+
text: this._context.decode([chunk]),
|
|
154
|
+
message: new index_js_1.AIMessageChunk({
|
|
155
|
+
content: this._context.decode([chunk]),
|
|
156
|
+
}),
|
|
157
|
+
generationInfo: {},
|
|
158
|
+
});
|
|
159
|
+
await runManager?.handleLLMNewToken(this._context.decode([chunk]) ?? "");
|
|
160
|
+
}
|
|
161
|
+
}
|
|
142
162
|
// This constructs a new session if we need to adding in any sys messages or previous chats
|
|
143
163
|
_buildSession(messages) {
|
|
144
164
|
let prompt = "";
|
|
@@ -227,5 +247,30 @@ class ChatLlamaCpp extends base_js_1.SimpleChatModel {
|
|
|
227
247
|
}
|
|
228
248
|
return result;
|
|
229
249
|
}
|
|
250
|
+
_buildPrompt(input) {
|
|
251
|
+
const prompt = input
|
|
252
|
+
.map((message) => {
|
|
253
|
+
let messageText;
|
|
254
|
+
if (message._getType() === "human") {
|
|
255
|
+
messageText = `[INST] ${message.content} [/INST]`;
|
|
256
|
+
}
|
|
257
|
+
else if (message._getType() === "ai") {
|
|
258
|
+
messageText = message.content;
|
|
259
|
+
}
|
|
260
|
+
else if (message._getType() === "system") {
|
|
261
|
+
messageText = `<<SYS>> ${message.content} <</SYS>>`;
|
|
262
|
+
}
|
|
263
|
+
else if (index_js_1.ChatMessage.isInstance(message)) {
|
|
264
|
+
messageText = `\n\n${message.role[0].toUpperCase()}${message.role.slice(1)}: ${message.content}`;
|
|
265
|
+
}
|
|
266
|
+
else {
|
|
267
|
+
console.warn(`Unsupported message type passed to llama_cpp: "${message._getType()}"`);
|
|
268
|
+
messageText = "";
|
|
269
|
+
}
|
|
270
|
+
return messageText;
|
|
271
|
+
})
|
|
272
|
+
.join("\n");
|
|
273
|
+
return prompt;
|
|
274
|
+
}
|
|
230
275
|
}
|
|
231
276
|
exports.ChatLlamaCpp = ChatLlamaCpp;
|
|
@@ -2,7 +2,8 @@ import { LlamaModel, LlamaContext, LlamaChatSession, type ConversationInteractio
|
|
|
2
2
|
import { SimpleChatModel, BaseChatModelParams } from "./base.js";
|
|
3
3
|
import { LlamaBaseCppInputs } from "../util/llama_cpp.js";
|
|
4
4
|
import { BaseLanguageModelCallOptions } from "../base_language/index.js";
|
|
5
|
-
import
|
|
5
|
+
import { CallbackManagerForLLMRun } from "../callbacks/manager.js";
|
|
6
|
+
import { BaseMessage, ChatGenerationChunk } from "../schema/index.js";
|
|
6
7
|
/**
|
|
7
8
|
* Note that the modelPath is the only required parameter. For testing you
|
|
8
9
|
* can set this in the environment variable `LLAMA_PATH`.
|
|
@@ -63,6 +64,8 @@ export declare class ChatLlamaCpp extends SimpleChatModel<LlamaCppCallOptions> {
|
|
|
63
64
|
};
|
|
64
65
|
/** @ignore */
|
|
65
66
|
_call(messages: BaseMessage[], _options: this["ParsedCallOptions"]): Promise<string>;
|
|
67
|
+
_streamResponseChunks(input: BaseMessage[], _options: this["ParsedCallOptions"], runManager?: CallbackManagerForLLMRun): AsyncGenerator<ChatGenerationChunk>;
|
|
66
68
|
protected _buildSession(messages: BaseMessage[]): string;
|
|
67
69
|
protected _convertMessagesToInteractions(messages: BaseMessage[]): ConversationInteraction[];
|
|
70
|
+
protected _buildPrompt(input: BaseMessage[]): string;
|
|
68
71
|
}
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { LlamaChatSession, } from "node-llama-cpp";
|
|
2
2
|
import { SimpleChatModel } from "./base.js";
|
|
3
3
|
import { createLlamaModel, createLlamaContext, } from "../util/llama_cpp.js";
|
|
4
|
+
import { ChatGenerationChunk, AIMessageChunk, ChatMessage, } from "../schema/index.js";
|
|
4
5
|
/**
|
|
5
6
|
* To use this model you need to have the `node-llama-cpp` module installed.
|
|
6
7
|
* This can be installed using `npm install -S node-llama-cpp` and the minimum
|
|
@@ -136,6 +137,25 @@ export class ChatLlamaCpp extends SimpleChatModel {
|
|
|
136
137
|
throw new Error("Error getting prompt completion.");
|
|
137
138
|
}
|
|
138
139
|
}
|
|
140
|
+
async *_streamResponseChunks(input, _options, runManager) {
|
|
141
|
+
const promptOptions = {
|
|
142
|
+
temperature: this?.temperature,
|
|
143
|
+
topK: this?.topK,
|
|
144
|
+
topP: this?.topP,
|
|
145
|
+
};
|
|
146
|
+
const prompt = this._buildPrompt(input);
|
|
147
|
+
const stream = await this.caller.call(async () => this._context.evaluate(this._context.encode(prompt), promptOptions));
|
|
148
|
+
for await (const chunk of stream) {
|
|
149
|
+
yield new ChatGenerationChunk({
|
|
150
|
+
text: this._context.decode([chunk]),
|
|
151
|
+
message: new AIMessageChunk({
|
|
152
|
+
content: this._context.decode([chunk]),
|
|
153
|
+
}),
|
|
154
|
+
generationInfo: {},
|
|
155
|
+
});
|
|
156
|
+
await runManager?.handleLLMNewToken(this._context.decode([chunk]) ?? "");
|
|
157
|
+
}
|
|
158
|
+
}
|
|
139
159
|
// This constructs a new session if we need to adding in any sys messages or previous chats
|
|
140
160
|
_buildSession(messages) {
|
|
141
161
|
let prompt = "";
|
|
@@ -224,4 +244,29 @@ export class ChatLlamaCpp extends SimpleChatModel {
|
|
|
224
244
|
}
|
|
225
245
|
return result;
|
|
226
246
|
}
|
|
247
|
+
_buildPrompt(input) {
|
|
248
|
+
const prompt = input
|
|
249
|
+
.map((message) => {
|
|
250
|
+
let messageText;
|
|
251
|
+
if (message._getType() === "human") {
|
|
252
|
+
messageText = `[INST] ${message.content} [/INST]`;
|
|
253
|
+
}
|
|
254
|
+
else if (message._getType() === "ai") {
|
|
255
|
+
messageText = message.content;
|
|
256
|
+
}
|
|
257
|
+
else if (message._getType() === "system") {
|
|
258
|
+
messageText = `<<SYS>> ${message.content} <</SYS>>`;
|
|
259
|
+
}
|
|
260
|
+
else if (ChatMessage.isInstance(message)) {
|
|
261
|
+
messageText = `\n\n${message.role[0].toUpperCase()}${message.role.slice(1)}: ${message.content}`;
|
|
262
|
+
}
|
|
263
|
+
else {
|
|
264
|
+
console.warn(`Unsupported message type passed to llama_cpp: "${message._getType()}"`);
|
|
265
|
+
messageText = "";
|
|
266
|
+
}
|
|
267
|
+
return messageText;
|
|
268
|
+
})
|
|
269
|
+
.join("\n");
|
|
270
|
+
return prompt;
|
|
271
|
+
}
|
|
227
272
|
}
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.ChatGPTLoader = void 0;
|
|
4
|
+
const text_js_1 = require("./text.cjs");
|
|
5
|
+
const document_js_1 = require("../../document.cjs");
|
|
6
|
+
function concatenateRows(message, title) {
|
|
7
|
+
/**
|
|
8
|
+
* Combine message information in a readable format ready to be used.
|
|
9
|
+
* @param {ChatGPTMessage} message - Message to be concatenated
|
|
10
|
+
* @param {string} title - Title of the conversation
|
|
11
|
+
*
|
|
12
|
+
* @returns {string} Concatenated message
|
|
13
|
+
*/
|
|
14
|
+
if (!message) {
|
|
15
|
+
return "";
|
|
16
|
+
}
|
|
17
|
+
const sender = message.author ? message.author.role : "unknown";
|
|
18
|
+
const text = message.content.parts[0];
|
|
19
|
+
const date = new Date(message.create_time * 1000)
|
|
20
|
+
.toISOString()
|
|
21
|
+
.slice(0, 19)
|
|
22
|
+
.replace("T", " ");
|
|
23
|
+
return `${title} - ${sender} on ${date}: ${text}\n\n`;
|
|
24
|
+
}
|
|
25
|
+
class ChatGPTLoader extends text_js_1.TextLoader {
|
|
26
|
+
constructor(filePathOrBlob, numLogs = 0) {
|
|
27
|
+
super(filePathOrBlob);
|
|
28
|
+
Object.defineProperty(this, "numLogs", {
|
|
29
|
+
enumerable: true,
|
|
30
|
+
configurable: true,
|
|
31
|
+
writable: true,
|
|
32
|
+
value: void 0
|
|
33
|
+
});
|
|
34
|
+
this.numLogs = numLogs;
|
|
35
|
+
}
|
|
36
|
+
async parse(raw) {
|
|
37
|
+
let data;
|
|
38
|
+
try {
|
|
39
|
+
data = JSON.parse(raw);
|
|
40
|
+
}
|
|
41
|
+
catch (e) {
|
|
42
|
+
console.error(e);
|
|
43
|
+
throw new Error("Failed to parse JSON");
|
|
44
|
+
}
|
|
45
|
+
const truncatedData = this.numLogs > 0 ? data.slice(0, this.numLogs) : data;
|
|
46
|
+
return truncatedData.map((d) => Object.values(d.mapping)
|
|
47
|
+
.filter((msg, idx) => !(idx === 0 && msg.message.author.role === "system"))
|
|
48
|
+
.map((msg) => concatenateRows(msg.message, d.title))
|
|
49
|
+
.join(""));
|
|
50
|
+
}
|
|
51
|
+
async load() {
|
|
52
|
+
let text;
|
|
53
|
+
let metadata;
|
|
54
|
+
if (typeof this.filePathOrBlob === "string") {
|
|
55
|
+
const { readFile } = await text_js_1.TextLoader.imports();
|
|
56
|
+
try {
|
|
57
|
+
text = await readFile(this.filePathOrBlob, "utf8");
|
|
58
|
+
}
|
|
59
|
+
catch (e) {
|
|
60
|
+
console.error(e);
|
|
61
|
+
throw new Error("Failed to read file");
|
|
62
|
+
}
|
|
63
|
+
metadata = { source: this.filePathOrBlob };
|
|
64
|
+
}
|
|
65
|
+
else {
|
|
66
|
+
try {
|
|
67
|
+
text = await this.filePathOrBlob.text();
|
|
68
|
+
}
|
|
69
|
+
catch (e) {
|
|
70
|
+
console.error(e);
|
|
71
|
+
throw new Error("Failed to read blob");
|
|
72
|
+
}
|
|
73
|
+
metadata = { source: "blob", blobType: this.filePathOrBlob.type };
|
|
74
|
+
}
|
|
75
|
+
const parsed = await this.parse(text);
|
|
76
|
+
return parsed.map((pageContent, i) => new document_js_1.Document({
|
|
77
|
+
pageContent,
|
|
78
|
+
metadata: {
|
|
79
|
+
...metadata,
|
|
80
|
+
logIndex: i + 1,
|
|
81
|
+
},
|
|
82
|
+
}));
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
exports.ChatGPTLoader = ChatGPTLoader;
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { TextLoader } from "./text.js";
|
|
2
|
+
import { Document } from "../../document.js";
|
|
3
|
+
export declare class ChatGPTLoader extends TextLoader {
|
|
4
|
+
numLogs: number;
|
|
5
|
+
constructor(filePathOrBlob: string | Blob, numLogs?: number);
|
|
6
|
+
protected parse(raw: string): Promise<string[]>;
|
|
7
|
+
load(): Promise<Document[]>;
|
|
8
|
+
}
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import { TextLoader } from "./text.js";
|
|
2
|
+
import { Document } from "../../document.js";
|
|
3
|
+
function concatenateRows(message, title) {
|
|
4
|
+
/**
|
|
5
|
+
* Combine message information in a readable format ready to be used.
|
|
6
|
+
* @param {ChatGPTMessage} message - Message to be concatenated
|
|
7
|
+
* @param {string} title - Title of the conversation
|
|
8
|
+
*
|
|
9
|
+
* @returns {string} Concatenated message
|
|
10
|
+
*/
|
|
11
|
+
if (!message) {
|
|
12
|
+
return "";
|
|
13
|
+
}
|
|
14
|
+
const sender = message.author ? message.author.role : "unknown";
|
|
15
|
+
const text = message.content.parts[0];
|
|
16
|
+
const date = new Date(message.create_time * 1000)
|
|
17
|
+
.toISOString()
|
|
18
|
+
.slice(0, 19)
|
|
19
|
+
.replace("T", " ");
|
|
20
|
+
return `${title} - ${sender} on ${date}: ${text}\n\n`;
|
|
21
|
+
}
|
|
22
|
+
export class ChatGPTLoader extends TextLoader {
|
|
23
|
+
constructor(filePathOrBlob, numLogs = 0) {
|
|
24
|
+
super(filePathOrBlob);
|
|
25
|
+
Object.defineProperty(this, "numLogs", {
|
|
26
|
+
enumerable: true,
|
|
27
|
+
configurable: true,
|
|
28
|
+
writable: true,
|
|
29
|
+
value: void 0
|
|
30
|
+
});
|
|
31
|
+
this.numLogs = numLogs;
|
|
32
|
+
}
|
|
33
|
+
async parse(raw) {
|
|
34
|
+
let data;
|
|
35
|
+
try {
|
|
36
|
+
data = JSON.parse(raw);
|
|
37
|
+
}
|
|
38
|
+
catch (e) {
|
|
39
|
+
console.error(e);
|
|
40
|
+
throw new Error("Failed to parse JSON");
|
|
41
|
+
}
|
|
42
|
+
const truncatedData = this.numLogs > 0 ? data.slice(0, this.numLogs) : data;
|
|
43
|
+
return truncatedData.map((d) => Object.values(d.mapping)
|
|
44
|
+
.filter((msg, idx) => !(idx === 0 && msg.message.author.role === "system"))
|
|
45
|
+
.map((msg) => concatenateRows(msg.message, d.title))
|
|
46
|
+
.join(""));
|
|
47
|
+
}
|
|
48
|
+
async load() {
|
|
49
|
+
let text;
|
|
50
|
+
let metadata;
|
|
51
|
+
if (typeof this.filePathOrBlob === "string") {
|
|
52
|
+
const { readFile } = await TextLoader.imports();
|
|
53
|
+
try {
|
|
54
|
+
text = await readFile(this.filePathOrBlob, "utf8");
|
|
55
|
+
}
|
|
56
|
+
catch (e) {
|
|
57
|
+
console.error(e);
|
|
58
|
+
throw new Error("Failed to read file");
|
|
59
|
+
}
|
|
60
|
+
metadata = { source: this.filePathOrBlob };
|
|
61
|
+
}
|
|
62
|
+
else {
|
|
63
|
+
try {
|
|
64
|
+
text = await this.filePathOrBlob.text();
|
|
65
|
+
}
|
|
66
|
+
catch (e) {
|
|
67
|
+
console.error(e);
|
|
68
|
+
throw new Error("Failed to read blob");
|
|
69
|
+
}
|
|
70
|
+
metadata = { source: "blob", blobType: this.filePathOrBlob.type };
|
|
71
|
+
}
|
|
72
|
+
const parsed = await this.parse(text);
|
|
73
|
+
return parsed.map((pageContent, i) => new Document({
|
|
74
|
+
pageContent,
|
|
75
|
+
metadata: {
|
|
76
|
+
...metadata,
|
|
77
|
+
logIndex: i + 1,
|
|
78
|
+
},
|
|
79
|
+
}));
|
|
80
|
+
}
|
|
81
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.PPTXLoader = void 0;
|
|
4
|
+
const officeparser_1 = require("officeparser");
|
|
5
|
+
const document_js_1 = require("../../document.cjs");
|
|
6
|
+
const buffer_js_1 = require("./buffer.cjs");
|
|
7
|
+
/**
|
|
8
|
+
* A class that extends the `BufferLoader` class. It represents a document
|
|
9
|
+
* loader that loads documents from PDF files.
|
|
10
|
+
*/
|
|
11
|
+
class PPTXLoader extends buffer_js_1.BufferLoader {
|
|
12
|
+
constructor(filePathOrBlob) {
|
|
13
|
+
super(filePathOrBlob);
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* A method that takes a `raw` buffer and `metadata` as parameters and
|
|
17
|
+
* returns a promise that resolves to an array of `Document` instances. It
|
|
18
|
+
* uses the `parseOfficeAsync` function from the `officeparser` module to extract
|
|
19
|
+
* the raw text content from the buffer. If the extracted powerpoint content is
|
|
20
|
+
* empty, it returns an empty array. Otherwise, it creates a new
|
|
21
|
+
* `Document` instance with the extracted powerpoint content and the provided
|
|
22
|
+
* metadata, and returns it as an array.
|
|
23
|
+
* @param raw The buffer to be parsed.
|
|
24
|
+
* @param metadata The metadata of the document.
|
|
25
|
+
* @returns A promise that resolves to an array of `Document` instances.
|
|
26
|
+
*/
|
|
27
|
+
async parse(raw, metadata) {
|
|
28
|
+
const pptx = await (0, officeparser_1.parseOfficeAsync)(raw, { outputErrorToConsole: true });
|
|
29
|
+
if (!pptx)
|
|
30
|
+
return [];
|
|
31
|
+
return [
|
|
32
|
+
new document_js_1.Document({
|
|
33
|
+
pageContent: pptx,
|
|
34
|
+
metadata,
|
|
35
|
+
}),
|
|
36
|
+
];
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
exports.PPTXLoader = PPTXLoader;
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
/// <reference types="node" resolution-mode="require"/>
|
|
2
|
+
import { Document } from "../../document.js";
|
|
3
|
+
import { BufferLoader } from "./buffer.js";
|
|
4
|
+
/**
|
|
5
|
+
* A class that extends the `BufferLoader` class. It represents a document
|
|
6
|
+
* loader that loads documents from PDF files.
|
|
7
|
+
*/
|
|
8
|
+
export declare class PPTXLoader extends BufferLoader {
|
|
9
|
+
constructor(filePathOrBlob: string | Blob);
|
|
10
|
+
/**
|
|
11
|
+
* A method that takes a `raw` buffer and `metadata` as parameters and
|
|
12
|
+
* returns a promise that resolves to an array of `Document` instances. It
|
|
13
|
+
* uses the `parseOfficeAsync` function from the `officeparser` module to extract
|
|
14
|
+
* the raw text content from the buffer. If the extracted powerpoint content is
|
|
15
|
+
* empty, it returns an empty array. Otherwise, it creates a new
|
|
16
|
+
* `Document` instance with the extracted powerpoint content and the provided
|
|
17
|
+
* metadata, and returns it as an array.
|
|
18
|
+
* @param raw The buffer to be parsed.
|
|
19
|
+
* @param metadata The metadata of the document.
|
|
20
|
+
* @returns A promise that resolves to an array of `Document` instances.
|
|
21
|
+
*/
|
|
22
|
+
parse(raw: Buffer, metadata: Document["metadata"]): Promise<Document[]>;
|
|
23
|
+
}
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
import { parseOfficeAsync } from "officeparser";
|
|
2
|
+
import { Document } from "../../document.js";
|
|
3
|
+
import { BufferLoader } from "./buffer.js";
|
|
4
|
+
/**
|
|
5
|
+
* A class that extends the `BufferLoader` class. It represents a document
|
|
6
|
+
* loader that loads documents from PDF files.
|
|
7
|
+
*/
|
|
8
|
+
export class PPTXLoader extends BufferLoader {
|
|
9
|
+
constructor(filePathOrBlob) {
|
|
10
|
+
super(filePathOrBlob);
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* A method that takes a `raw` buffer and `metadata` as parameters and
|
|
14
|
+
* returns a promise that resolves to an array of `Document` instances. It
|
|
15
|
+
* uses the `parseOfficeAsync` function from the `officeparser` module to extract
|
|
16
|
+
* the raw text content from the buffer. If the extracted powerpoint content is
|
|
17
|
+
* empty, it returns an empty array. Otherwise, it creates a new
|
|
18
|
+
* `Document` instance with the extracted powerpoint content and the provided
|
|
19
|
+
* metadata, and returns it as an array.
|
|
20
|
+
* @param raw The buffer to be parsed.
|
|
21
|
+
* @param metadata The metadata of the document.
|
|
22
|
+
* @returns A promise that resolves to an array of `Document` instances.
|
|
23
|
+
*/
|
|
24
|
+
async parse(raw, metadata) {
|
|
25
|
+
const pptx = await parseOfficeAsync(raw, { outputErrorToConsole: true });
|
|
26
|
+
if (!pptx)
|
|
27
|
+
return [];
|
|
28
|
+
return [
|
|
29
|
+
new Document({
|
|
30
|
+
pageContent: pptx,
|
|
31
|
+
metadata,
|
|
32
|
+
}),
|
|
33
|
+
];
|
|
34
|
+
}
|
|
35
|
+
}
|
|
@@ -19,7 +19,7 @@ const base_js_1 = require("../base.cjs");
|
|
|
19
19
|
* ```
|
|
20
20
|
*/
|
|
21
21
|
class ConfluencePagesLoader extends base_js_1.BaseDocumentLoader {
|
|
22
|
-
constructor({ baseUrl, spaceKey, username, accessToken, limit = 25, }) {
|
|
22
|
+
constructor({ baseUrl, spaceKey, username, accessToken, limit = 25, personalAccessToken, }) {
|
|
23
23
|
super();
|
|
24
24
|
Object.defineProperty(this, "baseUrl", {
|
|
25
25
|
enumerable: true,
|
|
@@ -51,11 +51,32 @@ class ConfluencePagesLoader extends base_js_1.BaseDocumentLoader {
|
|
|
51
51
|
writable: true,
|
|
52
52
|
value: void 0
|
|
53
53
|
});
|
|
54
|
+
Object.defineProperty(this, "personalAccessToken", {
|
|
55
|
+
enumerable: true,
|
|
56
|
+
configurable: true,
|
|
57
|
+
writable: true,
|
|
58
|
+
value: void 0
|
|
59
|
+
});
|
|
54
60
|
this.baseUrl = baseUrl;
|
|
55
61
|
this.spaceKey = spaceKey;
|
|
56
62
|
this.username = username;
|
|
57
63
|
this.accessToken = accessToken;
|
|
58
64
|
this.limit = limit;
|
|
65
|
+
this.personalAccessToken = personalAccessToken;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Returns the authorization header for the request.
|
|
69
|
+
* @returns The authorization header as a string, or undefined if no credentials were provided.
|
|
70
|
+
*/
|
|
71
|
+
get authorizationHeader() {
|
|
72
|
+
if (this.personalAccessToken) {
|
|
73
|
+
return `Bearer ${this.personalAccessToken}`;
|
|
74
|
+
}
|
|
75
|
+
else if (this.username && this.accessToken) {
|
|
76
|
+
const authToken = Buffer.from(`${this.username}:${this.accessToken}`).toString("base64");
|
|
77
|
+
return `Basic ${authToken}`;
|
|
78
|
+
}
|
|
79
|
+
return undefined;
|
|
59
80
|
}
|
|
60
81
|
/**
|
|
61
82
|
* Fetches all the pages in the specified space and converts each page to
|
|
@@ -79,13 +100,16 @@ class ConfluencePagesLoader extends base_js_1.BaseDocumentLoader {
|
|
|
79
100
|
*/
|
|
80
101
|
async fetchConfluenceData(url) {
|
|
81
102
|
try {
|
|
82
|
-
const
|
|
103
|
+
const initialHeaders = {
|
|
104
|
+
"Content-Type": "application/json",
|
|
105
|
+
Accept: "application/json",
|
|
106
|
+
};
|
|
107
|
+
const authHeader = this.authorizationHeader;
|
|
108
|
+
if (authHeader) {
|
|
109
|
+
initialHeaders.Authorization = authHeader;
|
|
110
|
+
}
|
|
83
111
|
const response = await fetch(url, {
|
|
84
|
-
headers:
|
|
85
|
-
Authorization: `Basic ${authToken}`,
|
|
86
|
-
"Content-Type": "application/json",
|
|
87
|
-
Accept: "application/json",
|
|
88
|
-
},
|
|
112
|
+
headers: initialHeaders,
|
|
89
113
|
});
|
|
90
114
|
if (!response.ok) {
|
|
91
115
|
throw new Error(`Failed to fetch ${url} from Confluence: ${response.status}`);
|
|
@@ -7,8 +7,9 @@ import { BaseDocumentLoader } from "../base.js";
|
|
|
7
7
|
export interface ConfluencePagesLoaderParams {
|
|
8
8
|
baseUrl: string;
|
|
9
9
|
spaceKey: string;
|
|
10
|
-
username
|
|
11
|
-
accessToken
|
|
10
|
+
username?: string;
|
|
11
|
+
accessToken?: string;
|
|
12
|
+
personalAccessToken?: string;
|
|
12
13
|
limit?: number;
|
|
13
14
|
}
|
|
14
15
|
/**
|
|
@@ -47,10 +48,16 @@ export interface ConfluenceAPIResponse {
|
|
|
47
48
|
export declare class ConfluencePagesLoader extends BaseDocumentLoader {
|
|
48
49
|
readonly baseUrl: string;
|
|
49
50
|
readonly spaceKey: string;
|
|
50
|
-
readonly username
|
|
51
|
-
readonly accessToken
|
|
51
|
+
readonly username?: string;
|
|
52
|
+
readonly accessToken?: string;
|
|
52
53
|
readonly limit: number;
|
|
53
|
-
|
|
54
|
+
readonly personalAccessToken?: string;
|
|
55
|
+
constructor({ baseUrl, spaceKey, username, accessToken, limit, personalAccessToken, }: ConfluencePagesLoaderParams);
|
|
56
|
+
/**
|
|
57
|
+
* Returns the authorization header for the request.
|
|
58
|
+
* @returns The authorization header as a string, or undefined if no credentials were provided.
|
|
59
|
+
*/
|
|
60
|
+
private get authorizationHeader();
|
|
54
61
|
/**
|
|
55
62
|
* Fetches all the pages in the specified space and converts each page to
|
|
56
63
|
* a Document instance.
|