langchain 0.0.105 → 0.0.107
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/base_language/count_tokens.cjs +2 -2
- package/dist/base_language/count_tokens.js +2 -2
- package/dist/chat_models/anthropic.cjs +38 -43
- package/dist/chat_models/anthropic.d.ts +12 -13
- package/dist/chat_models/anthropic.js +39 -44
- package/dist/document_loaders/web/notionapi.cjs +59 -19
- package/dist/document_loaders/web/notionapi.d.ts +2 -0
- package/dist/document_loaders/web/notionapi.js +60 -20
- package/dist/document_loaders/web/sonix_audio.cjs +40 -0
- package/dist/document_loaders/web/sonix_audio.d.ts +12 -0
- package/dist/document_loaders/web/sonix_audio.js +36 -0
- package/dist/document_loaders/web/sort_xyz_blockchain.cjs +118 -0
- package/dist/document_loaders/web/sort_xyz_blockchain.d.ts +37 -0
- package/dist/document_loaders/web/sort_xyz_blockchain.js +114 -0
- package/dist/embeddings/openai.cjs +1 -1
- package/dist/embeddings/openai.js +1 -1
- package/dist/load/import_constants.cjs +1 -0
- package/dist/load/import_constants.js +1 -0
- package/dist/load/import_map.cjs +2 -1
- package/dist/load/import_map.d.ts +1 -0
- package/dist/load/import_map.js +1 -0
- package/dist/tools/index.cjs +3 -1
- package/dist/tools/index.d.ts +1 -0
- package/dist/tools/index.js +1 -0
- package/dist/tools/wikipedia_query_run.cjs +108 -0
- package/dist/tools/wikipedia_query_run.d.ts +21 -0
- package/dist/tools/wikipedia_query_run.js +104 -0
- package/dist/util/async_caller.cjs +7 -0
- package/dist/util/async_caller.js +7 -0
- package/dist/vectorstores/supabase.cjs +12 -7
- package/dist/vectorstores/supabase.d.ts +6 -2
- package/dist/vectorstores/supabase.js +12 -7
- package/document_loaders/web/sonix_audio.cjs +1 -0
- package/document_loaders/web/sonix_audio.d.ts +1 -0
- package/document_loaders/web/sonix_audio.js +1 -0
- package/document_loaders/web/sort_xyz_blockchain.cjs +1 -0
- package/document_loaders/web/sort_xyz_blockchain.d.ts +1 -0
- package/document_loaders/web/sort_xyz_blockchain.js +1 -0
- package/package.json +23 -2
|
@@ -4,13 +4,13 @@ exports.calculateMaxTokens = exports.getModelContextSize = exports.getEmbeddingC
|
|
|
4
4
|
const tiktoken_js_1 = require("../util/tiktoken.cjs");
|
|
5
5
|
// https://www.npmjs.com/package/js-tiktoken
|
|
6
6
|
const getModelNameForTiktoken = (modelName) => {
|
|
7
|
-
if (modelName.startsWith("gpt-3.5-turbo-16k
|
|
7
|
+
if (modelName.startsWith("gpt-3.5-turbo-16k")) {
|
|
8
8
|
return "gpt-3.5-turbo-16k";
|
|
9
9
|
}
|
|
10
10
|
if (modelName.startsWith("gpt-3.5-turbo-")) {
|
|
11
11
|
return "gpt-3.5-turbo";
|
|
12
12
|
}
|
|
13
|
-
if (modelName.startsWith("gpt-4-32k
|
|
13
|
+
if (modelName.startsWith("gpt-4-32k")) {
|
|
14
14
|
return "gpt-4-32k";
|
|
15
15
|
}
|
|
16
16
|
if (modelName.startsWith("gpt-4-")) {
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
import { encodingForModel } from "../util/tiktoken.js";
|
|
2
2
|
// https://www.npmjs.com/package/js-tiktoken
|
|
3
3
|
export const getModelNameForTiktoken = (modelName) => {
|
|
4
|
-
if (modelName.startsWith("gpt-3.5-turbo-16k
|
|
4
|
+
if (modelName.startsWith("gpt-3.5-turbo-16k")) {
|
|
5
5
|
return "gpt-3.5-turbo-16k";
|
|
6
6
|
}
|
|
7
7
|
if (modelName.startsWith("gpt-3.5-turbo-")) {
|
|
8
8
|
return "gpt-3.5-turbo";
|
|
9
9
|
}
|
|
10
|
-
if (modelName.startsWith("gpt-4-32k
|
|
10
|
+
if (modelName.startsWith("gpt-4-32k")) {
|
|
11
11
|
return "gpt-4-32k";
|
|
12
12
|
}
|
|
13
13
|
if (modelName.startsWith("gpt-4-")) {
|
|
@@ -2,9 +2,9 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.ChatAnthropic = void 0;
|
|
4
4
|
const sdk_1 = require("@anthropic-ai/sdk");
|
|
5
|
-
const base_js_1 = require("./base.cjs");
|
|
6
5
|
const index_js_1 = require("../schema/index.cjs");
|
|
7
6
|
const env_js_1 = require("../util/env.cjs");
|
|
7
|
+
const base_js_1 = require("./base.cjs");
|
|
8
8
|
function getAnthropicPromptFromMessage(type) {
|
|
9
9
|
switch (type) {
|
|
10
10
|
case "ai":
|
|
@@ -208,61 +208,56 @@ class ChatAnthropic extends base_js_1.BaseChatModel {
|
|
|
208
208
|
throw new Error("Missing Anthropic API key.");
|
|
209
209
|
}
|
|
210
210
|
let makeCompletionRequest;
|
|
211
|
+
let asyncCallerOptions = {};
|
|
211
212
|
if (request.stream) {
|
|
212
213
|
if (!this.streamingClient) {
|
|
213
214
|
const options = this.apiUrl ? { apiUrl: this.apiUrl } : undefined;
|
|
214
|
-
this.streamingClient = new sdk_1.
|
|
215
|
+
this.streamingClient = new sdk_1.Anthropic({
|
|
216
|
+
...options,
|
|
217
|
+
apiKey: this.anthropicApiKey,
|
|
218
|
+
});
|
|
215
219
|
}
|
|
216
220
|
makeCompletionRequest = async () => {
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
}
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
235
|
-
.catch((e) => {
|
|
236
|
-
// Anthropic doesn't actually throw JavaScript error objects at the moment.
|
|
237
|
-
// We convert the error so the async caller can recognize it correctly.
|
|
238
|
-
if (e?.name === "AbortError") {
|
|
239
|
-
throw new Error(`${e.name}: ${e.message}`);
|
|
221
|
+
const stream = await this.streamingClient.completions.create({
|
|
222
|
+
...request,
|
|
223
|
+
});
|
|
224
|
+
const completion = {
|
|
225
|
+
completion: "",
|
|
226
|
+
model: "",
|
|
227
|
+
stop_reason: "",
|
|
228
|
+
};
|
|
229
|
+
for await (const data of stream) {
|
|
230
|
+
completion.stop_reason = data.stop_reason;
|
|
231
|
+
completion.model = data.model;
|
|
232
|
+
if (options.signal?.aborted) {
|
|
233
|
+
stream.controller.abort();
|
|
234
|
+
throw new Error("AbortError: User aborted the request.");
|
|
235
|
+
}
|
|
236
|
+
if (data.stop_reason) {
|
|
237
|
+
break;
|
|
240
238
|
}
|
|
241
|
-
|
|
242
|
-
|
|
239
|
+
const part = data.completion;
|
|
240
|
+
if (part) {
|
|
241
|
+
completion.completion += part;
|
|
242
|
+
// eslint-disable-next-line no-void
|
|
243
|
+
void runManager?.handleLLMNewToken(part ?? "");
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
return completion;
|
|
243
247
|
};
|
|
244
248
|
}
|
|
245
249
|
else {
|
|
246
250
|
if (!this.batchClient) {
|
|
247
251
|
const options = this.apiUrl ? { apiUrl: this.apiUrl } : undefined;
|
|
248
|
-
this.batchClient = new sdk_1.
|
|
252
|
+
this.batchClient = new sdk_1.Anthropic({
|
|
253
|
+
...options,
|
|
254
|
+
apiKey: this.anthropicApiKey,
|
|
255
|
+
});
|
|
249
256
|
}
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
signal: options.signal,
|
|
253
|
-
})
|
|
254
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
255
|
-
.catch((e) => {
|
|
256
|
-
console.log(e);
|
|
257
|
-
// Anthropic doesn't actually throw JavaScript error objects at the moment.
|
|
258
|
-
// We convert the error so the async caller can recognize it correctly.
|
|
259
|
-
if (e?.type === "aborted") {
|
|
260
|
-
throw new Error(`${e.name}: ${e.message}`);
|
|
261
|
-
}
|
|
262
|
-
throw e;
|
|
263
|
-
});
|
|
257
|
+
asyncCallerOptions = { signal: options.signal };
|
|
258
|
+
makeCompletionRequest = async () => this.batchClient.completions.create({ ...request });
|
|
264
259
|
}
|
|
265
|
-
return this.caller.
|
|
260
|
+
return this.caller.callWithOptions(asyncCallerOptions, makeCompletionRequest);
|
|
266
261
|
}
|
|
267
262
|
_llmType() {
|
|
268
263
|
return "anthropic";
|
|
@@ -1,8 +1,9 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
3
|
-
import { BaseMessage, ChatResult } from "../schema/index.js";
|
|
4
|
-
import { CallbackManagerForLLMRun } from "../callbacks/manager.js";
|
|
1
|
+
import { Anthropic as AnthropicApi } from "@anthropic-ai/sdk";
|
|
2
|
+
import type { CompletionCreateParams } from "@anthropic-ai/sdk/resources/completions";
|
|
5
3
|
import { BaseLanguageModelCallOptions } from "../base_language/index.js";
|
|
4
|
+
import { CallbackManagerForLLMRun } from "../callbacks/manager.js";
|
|
5
|
+
import { BaseMessage, ChatResult } from "../schema/index.js";
|
|
6
|
+
import { BaseChatModel, BaseChatModelParams } from "./base.js";
|
|
6
7
|
/**
|
|
7
8
|
* Input to AnthropicChat class.
|
|
8
9
|
*/
|
|
@@ -85,33 +86,31 @@ export declare class ChatAnthropic extends BaseChatModel implements AnthropicInp
|
|
|
85
86
|
/**
|
|
86
87
|
* Get the parameters used to invoke the model
|
|
87
88
|
*/
|
|
88
|
-
invocationParams(options?: this["ParsedCallOptions"]): Omit<
|
|
89
|
+
invocationParams(options?: this["ParsedCallOptions"]): Omit<CompletionCreateParams, "prompt"> & Kwargs;
|
|
89
90
|
/** @ignore */
|
|
90
91
|
_identifyingParams(): {
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
} | undefined;
|
|
92
|
+
metadata?: AnthropicApi.Completions.CompletionCreateParams.CompletionRequestNonStreaming.Metadata | AnthropicApi.Completions.CompletionCreateParams.CompletionRequestStreaming.Metadata | undefined;
|
|
93
|
+
stream?: boolean | undefined;
|
|
94
94
|
model: string;
|
|
95
95
|
temperature?: number | undefined;
|
|
96
96
|
top_p?: number | undefined;
|
|
97
97
|
top_k?: number | undefined;
|
|
98
98
|
max_tokens_to_sample: number;
|
|
99
|
-
stop_sequences
|
|
99
|
+
stop_sequences?: string[] | undefined;
|
|
100
100
|
model_name: string;
|
|
101
101
|
};
|
|
102
102
|
/**
|
|
103
103
|
* Get the identifying parameters for the model
|
|
104
104
|
*/
|
|
105
105
|
identifyingParams(): {
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
} | undefined;
|
|
106
|
+
metadata?: AnthropicApi.Completions.CompletionCreateParams.CompletionRequestNonStreaming.Metadata | AnthropicApi.Completions.CompletionCreateParams.CompletionRequestStreaming.Metadata | undefined;
|
|
107
|
+
stream?: boolean | undefined;
|
|
109
108
|
model: string;
|
|
110
109
|
temperature?: number | undefined;
|
|
111
110
|
top_p?: number | undefined;
|
|
112
111
|
top_k?: number | undefined;
|
|
113
112
|
max_tokens_to_sample: number;
|
|
114
|
-
stop_sequences
|
|
113
|
+
stop_sequences?: string[] | undefined;
|
|
115
114
|
model_name: string;
|
|
116
115
|
};
|
|
117
116
|
private formatMessagesAsPrompt;
|
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import { AI_PROMPT,
|
|
2
|
-
import { BaseChatModel } from "./base.js";
|
|
1
|
+
import { AI_PROMPT, Anthropic as AnthropicApi, HUMAN_PROMPT, } from "@anthropic-ai/sdk";
|
|
3
2
|
import { AIMessage, } from "../schema/index.js";
|
|
4
3
|
import { getEnvironmentVariable } from "../util/env.js";
|
|
4
|
+
import { BaseChatModel } from "./base.js";
|
|
5
5
|
function getAnthropicPromptFromMessage(type) {
|
|
6
6
|
switch (type) {
|
|
7
7
|
case "ai":
|
|
@@ -205,61 +205,56 @@ export class ChatAnthropic extends BaseChatModel {
|
|
|
205
205
|
throw new Error("Missing Anthropic API key.");
|
|
206
206
|
}
|
|
207
207
|
let makeCompletionRequest;
|
|
208
|
+
let asyncCallerOptions = {};
|
|
208
209
|
if (request.stream) {
|
|
209
210
|
if (!this.streamingClient) {
|
|
210
211
|
const options = this.apiUrl ? { apiUrl: this.apiUrl } : undefined;
|
|
211
|
-
this.streamingClient = new AnthropicApi(
|
|
212
|
+
this.streamingClient = new AnthropicApi({
|
|
213
|
+
...options,
|
|
214
|
+
apiKey: this.anthropicApiKey,
|
|
215
|
+
});
|
|
212
216
|
}
|
|
213
217
|
makeCompletionRequest = async () => {
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
}
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
232
|
-
.catch((e) => {
|
|
233
|
-
// Anthropic doesn't actually throw JavaScript error objects at the moment.
|
|
234
|
-
// We convert the error so the async caller can recognize it correctly.
|
|
235
|
-
if (e?.name === "AbortError") {
|
|
236
|
-
throw new Error(`${e.name}: ${e.message}`);
|
|
218
|
+
const stream = await this.streamingClient.completions.create({
|
|
219
|
+
...request,
|
|
220
|
+
});
|
|
221
|
+
const completion = {
|
|
222
|
+
completion: "",
|
|
223
|
+
model: "",
|
|
224
|
+
stop_reason: "",
|
|
225
|
+
};
|
|
226
|
+
for await (const data of stream) {
|
|
227
|
+
completion.stop_reason = data.stop_reason;
|
|
228
|
+
completion.model = data.model;
|
|
229
|
+
if (options.signal?.aborted) {
|
|
230
|
+
stream.controller.abort();
|
|
231
|
+
throw new Error("AbortError: User aborted the request.");
|
|
232
|
+
}
|
|
233
|
+
if (data.stop_reason) {
|
|
234
|
+
break;
|
|
237
235
|
}
|
|
238
|
-
|
|
239
|
-
|
|
236
|
+
const part = data.completion;
|
|
237
|
+
if (part) {
|
|
238
|
+
completion.completion += part;
|
|
239
|
+
// eslint-disable-next-line no-void
|
|
240
|
+
void runManager?.handleLLMNewToken(part ?? "");
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
return completion;
|
|
240
244
|
};
|
|
241
245
|
}
|
|
242
246
|
else {
|
|
243
247
|
if (!this.batchClient) {
|
|
244
248
|
const options = this.apiUrl ? { apiUrl: this.apiUrl } : undefined;
|
|
245
|
-
this.batchClient = new AnthropicApi(
|
|
249
|
+
this.batchClient = new AnthropicApi({
|
|
250
|
+
...options,
|
|
251
|
+
apiKey: this.anthropicApiKey,
|
|
252
|
+
});
|
|
246
253
|
}
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
signal: options.signal,
|
|
250
|
-
})
|
|
251
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
252
|
-
.catch((e) => {
|
|
253
|
-
console.log(e);
|
|
254
|
-
// Anthropic doesn't actually throw JavaScript error objects at the moment.
|
|
255
|
-
// We convert the error so the async caller can recognize it correctly.
|
|
256
|
-
if (e?.type === "aborted") {
|
|
257
|
-
throw new Error(`${e.name}: ${e.message}`);
|
|
258
|
-
}
|
|
259
|
-
throw e;
|
|
260
|
-
});
|
|
254
|
+
asyncCallerOptions = { signal: options.signal };
|
|
255
|
+
makeCompletionRequest = async () => this.batchClient.completions.create({ ...request });
|
|
261
256
|
}
|
|
262
|
-
return this.caller.
|
|
257
|
+
return this.caller.callWithOptions(asyncCallerOptions, makeCompletionRequest);
|
|
263
258
|
}
|
|
264
259
|
_llmType() {
|
|
265
260
|
return "anthropic";
|
|
@@ -98,8 +98,6 @@ class NotionAPILoader extends base_js_1.BaseDocumentLoader {
|
|
|
98
98
|
}));
|
|
99
99
|
}
|
|
100
100
|
parsePageDetails(page) {
|
|
101
|
-
if (!(0, client_1.isFullPage)(page))
|
|
102
|
-
return;
|
|
103
101
|
const metadata = Object.fromEntries(Object.entries(page).filter(([key, _]) => key !== "id"));
|
|
104
102
|
return {
|
|
105
103
|
...metadata,
|
|
@@ -107,6 +105,58 @@ class NotionAPILoader extends base_js_1.BaseDocumentLoader {
|
|
|
107
105
|
properties: this.parsePageProperties(page),
|
|
108
106
|
};
|
|
109
107
|
}
|
|
108
|
+
async loadBlock(block) {
|
|
109
|
+
return {
|
|
110
|
+
type: block.type,
|
|
111
|
+
blockId: block.id,
|
|
112
|
+
parent: await this.n2mClient.blockToMarkdown(block),
|
|
113
|
+
children: [],
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
async loadBlocksAndDocs(blocksResponse) {
|
|
117
|
+
const blocks = blocksResponse.filter(client_1.isFullBlock);
|
|
118
|
+
const [childPageDocuments, childDatabaseDocuments, blocksDocsArray] = await Promise.all([
|
|
119
|
+
Promise.all(blocks
|
|
120
|
+
.filter((block) => block.type.includes("child_page"))
|
|
121
|
+
.map((block) => this.loadPage(block.id))),
|
|
122
|
+
Promise.all(blocks
|
|
123
|
+
.filter((block) => block.type.includes("child_database"))
|
|
124
|
+
.map((block) => this.loadDatabase(block.id))),
|
|
125
|
+
Promise.all(blocks
|
|
126
|
+
.filter((block) => !["child_page", "child_database"].includes(block.type))
|
|
127
|
+
.map(async (block) => {
|
|
128
|
+
const mdBlock = await this.loadBlock(block);
|
|
129
|
+
let childDocuments = [];
|
|
130
|
+
if (block.has_children) {
|
|
131
|
+
const block_id = block.type === "synced_block" &&
|
|
132
|
+
block.synced_block?.synced_from?.block_id
|
|
133
|
+
? block.synced_block.synced_from.block_id
|
|
134
|
+
: block.id;
|
|
135
|
+
const childBlocksDocs = await this.loadBlocksAndDocs(await (0, notion_js_1.getBlockChildren)(this.notionClient, block_id, null));
|
|
136
|
+
mdBlock.children = childBlocksDocs.mdBlocks;
|
|
137
|
+
childDocuments = childBlocksDocs.childDocuments;
|
|
138
|
+
}
|
|
139
|
+
return {
|
|
140
|
+
mdBlocks: [mdBlock],
|
|
141
|
+
childDocuments,
|
|
142
|
+
};
|
|
143
|
+
})),
|
|
144
|
+
]);
|
|
145
|
+
const allMdBlocks = blocksDocsArray
|
|
146
|
+
.flat()
|
|
147
|
+
.map((blockDoc) => blockDoc.mdBlocks);
|
|
148
|
+
const childDocuments = blocksDocsArray
|
|
149
|
+
.flat()
|
|
150
|
+
.map((blockDoc) => blockDoc.childDocuments);
|
|
151
|
+
return {
|
|
152
|
+
mdBlocks: [...allMdBlocks.flat()],
|
|
153
|
+
childDocuments: [
|
|
154
|
+
...childPageDocuments.flat(),
|
|
155
|
+
...childDatabaseDocuments.flat(),
|
|
156
|
+
...childDocuments.flat(),
|
|
157
|
+
],
|
|
158
|
+
};
|
|
159
|
+
}
|
|
110
160
|
async loadPage(page) {
|
|
111
161
|
// Check page is a page ID or a GetPageResponse
|
|
112
162
|
const [pageData, pageId] = typeof page === "string"
|
|
@@ -116,25 +166,15 @@ class NotionAPILoader extends base_js_1.BaseDocumentLoader {
|
|
|
116
166
|
pageData,
|
|
117
167
|
(0, notion_js_1.getBlockChildren)(this.notionClient, pageId, null),
|
|
118
168
|
]);
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
.map((block) => this.loadPage(block.id))),
|
|
123
|
-
Promise.all(pageBlocks
|
|
124
|
-
.filter((block) => "type" in block && block.type.includes("child_database"))
|
|
125
|
-
.map((block) => this.loadDatabase(block.id))),
|
|
126
|
-
this.n2mClient.blocksToMarkdown(pageBlocks),
|
|
127
|
-
]);
|
|
169
|
+
if (!(0, client_1.isFullPage)(pageDetails))
|
|
170
|
+
return [];
|
|
171
|
+
const { mdBlocks, childDocuments } = await this.loadBlocksAndDocs(pageBlocks);
|
|
128
172
|
const mdStringObject = this.n2mClient.toMarkdownString(mdBlocks);
|
|
129
|
-
const
|
|
130
|
-
pageContent,
|
|
173
|
+
const pageDocument = new document_js_1.Document({
|
|
174
|
+
pageContent: mdStringObject.parent,
|
|
131
175
|
metadata: this.parsePageDetails(pageDetails),
|
|
132
|
-
})
|
|
133
|
-
return [
|
|
134
|
-
...pageDocuments,
|
|
135
|
-
...childPageDocuments.flat(),
|
|
136
|
-
...childDatabaseDocuments.flat(),
|
|
137
|
-
];
|
|
176
|
+
});
|
|
177
|
+
return [pageDocument, ...childDocuments];
|
|
138
178
|
}
|
|
139
179
|
async loadDatabase(id) {
|
|
140
180
|
const documents = [];
|
|
@@ -15,6 +15,8 @@ export declare class NotionAPILoader extends BaseDocumentLoader {
|
|
|
15
15
|
constructor(options: NotionAPILoaderOptions);
|
|
16
16
|
private parsePageProperties;
|
|
17
17
|
private parsePageDetails;
|
|
18
|
+
private loadBlock;
|
|
19
|
+
private loadBlocksAndDocs;
|
|
18
20
|
private loadPage;
|
|
19
21
|
private loadDatabase;
|
|
20
22
|
load(): Promise<Document[]>;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { Client, isFullPage, iteratePaginatedAPI } from "@notionhq/client";
|
|
1
|
+
import { Client, isFullBlock, isFullPage, iteratePaginatedAPI, } from "@notionhq/client";
|
|
2
2
|
import { NotionToMarkdown } from "notion-to-md";
|
|
3
3
|
import { getBlockChildren } from "notion-to-md/build/utils/notion.js";
|
|
4
4
|
import { BaseDocumentLoader } from "../base.js";
|
|
@@ -95,8 +95,6 @@ export class NotionAPILoader extends BaseDocumentLoader {
|
|
|
95
95
|
}));
|
|
96
96
|
}
|
|
97
97
|
parsePageDetails(page) {
|
|
98
|
-
if (!isFullPage(page))
|
|
99
|
-
return;
|
|
100
98
|
const metadata = Object.fromEntries(Object.entries(page).filter(([key, _]) => key !== "id"));
|
|
101
99
|
return {
|
|
102
100
|
...metadata,
|
|
@@ -104,6 +102,58 @@ export class NotionAPILoader extends BaseDocumentLoader {
|
|
|
104
102
|
properties: this.parsePageProperties(page),
|
|
105
103
|
};
|
|
106
104
|
}
|
|
105
|
+
async loadBlock(block) {
|
|
106
|
+
return {
|
|
107
|
+
type: block.type,
|
|
108
|
+
blockId: block.id,
|
|
109
|
+
parent: await this.n2mClient.blockToMarkdown(block),
|
|
110
|
+
children: [],
|
|
111
|
+
};
|
|
112
|
+
}
|
|
113
|
+
async loadBlocksAndDocs(blocksResponse) {
|
|
114
|
+
const blocks = blocksResponse.filter(isFullBlock);
|
|
115
|
+
const [childPageDocuments, childDatabaseDocuments, blocksDocsArray] = await Promise.all([
|
|
116
|
+
Promise.all(blocks
|
|
117
|
+
.filter((block) => block.type.includes("child_page"))
|
|
118
|
+
.map((block) => this.loadPage(block.id))),
|
|
119
|
+
Promise.all(blocks
|
|
120
|
+
.filter((block) => block.type.includes("child_database"))
|
|
121
|
+
.map((block) => this.loadDatabase(block.id))),
|
|
122
|
+
Promise.all(blocks
|
|
123
|
+
.filter((block) => !["child_page", "child_database"].includes(block.type))
|
|
124
|
+
.map(async (block) => {
|
|
125
|
+
const mdBlock = await this.loadBlock(block);
|
|
126
|
+
let childDocuments = [];
|
|
127
|
+
if (block.has_children) {
|
|
128
|
+
const block_id = block.type === "synced_block" &&
|
|
129
|
+
block.synced_block?.synced_from?.block_id
|
|
130
|
+
? block.synced_block.synced_from.block_id
|
|
131
|
+
: block.id;
|
|
132
|
+
const childBlocksDocs = await this.loadBlocksAndDocs(await getBlockChildren(this.notionClient, block_id, null));
|
|
133
|
+
mdBlock.children = childBlocksDocs.mdBlocks;
|
|
134
|
+
childDocuments = childBlocksDocs.childDocuments;
|
|
135
|
+
}
|
|
136
|
+
return {
|
|
137
|
+
mdBlocks: [mdBlock],
|
|
138
|
+
childDocuments,
|
|
139
|
+
};
|
|
140
|
+
})),
|
|
141
|
+
]);
|
|
142
|
+
const allMdBlocks = blocksDocsArray
|
|
143
|
+
.flat()
|
|
144
|
+
.map((blockDoc) => blockDoc.mdBlocks);
|
|
145
|
+
const childDocuments = blocksDocsArray
|
|
146
|
+
.flat()
|
|
147
|
+
.map((blockDoc) => blockDoc.childDocuments);
|
|
148
|
+
return {
|
|
149
|
+
mdBlocks: [...allMdBlocks.flat()],
|
|
150
|
+
childDocuments: [
|
|
151
|
+
...childPageDocuments.flat(),
|
|
152
|
+
...childDatabaseDocuments.flat(),
|
|
153
|
+
...childDocuments.flat(),
|
|
154
|
+
],
|
|
155
|
+
};
|
|
156
|
+
}
|
|
107
157
|
async loadPage(page) {
|
|
108
158
|
// Check page is a page ID or a GetPageResponse
|
|
109
159
|
const [pageData, pageId] = typeof page === "string"
|
|
@@ -113,25 +163,15 @@ export class NotionAPILoader extends BaseDocumentLoader {
|
|
|
113
163
|
pageData,
|
|
114
164
|
getBlockChildren(this.notionClient, pageId, null),
|
|
115
165
|
]);
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
.map((block) => this.loadPage(block.id))),
|
|
120
|
-
Promise.all(pageBlocks
|
|
121
|
-
.filter((block) => "type" in block && block.type.includes("child_database"))
|
|
122
|
-
.map((block) => this.loadDatabase(block.id))),
|
|
123
|
-
this.n2mClient.blocksToMarkdown(pageBlocks),
|
|
124
|
-
]);
|
|
166
|
+
if (!isFullPage(pageDetails))
|
|
167
|
+
return [];
|
|
168
|
+
const { mdBlocks, childDocuments } = await this.loadBlocksAndDocs(pageBlocks);
|
|
125
169
|
const mdStringObject = this.n2mClient.toMarkdownString(mdBlocks);
|
|
126
|
-
const
|
|
127
|
-
pageContent,
|
|
170
|
+
const pageDocument = new Document({
|
|
171
|
+
pageContent: mdStringObject.parent,
|
|
128
172
|
metadata: this.parsePageDetails(pageDetails),
|
|
129
|
-
})
|
|
130
|
-
return [
|
|
131
|
-
...pageDocuments,
|
|
132
|
-
...childPageDocuments.flat(),
|
|
133
|
-
...childDatabaseDocuments.flat(),
|
|
134
|
-
];
|
|
173
|
+
});
|
|
174
|
+
return [pageDocument, ...childDocuments];
|
|
135
175
|
}
|
|
136
176
|
async loadDatabase(id) {
|
|
137
177
|
const documents = [];
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.SonixAudioTranscriptionLoader = void 0;
|
|
4
|
+
const sonix_speech_recognition_1 = require("sonix-speech-recognition");
|
|
5
|
+
const document_js_1 = require("../../document.cjs");
|
|
6
|
+
const base_js_1 = require("../base.cjs");
|
|
7
|
+
class SonixAudioTranscriptionLoader extends base_js_1.BaseDocumentLoader {
|
|
8
|
+
constructor({ sonixAuthKey, request: speechToTextRequest, }) {
|
|
9
|
+
super();
|
|
10
|
+
Object.defineProperty(this, "sonixSpeechRecognitionService", {
|
|
11
|
+
enumerable: true,
|
|
12
|
+
configurable: true,
|
|
13
|
+
writable: true,
|
|
14
|
+
value: void 0
|
|
15
|
+
});
|
|
16
|
+
Object.defineProperty(this, "speechToTextRequest", {
|
|
17
|
+
enumerable: true,
|
|
18
|
+
configurable: true,
|
|
19
|
+
writable: true,
|
|
20
|
+
value: void 0
|
|
21
|
+
});
|
|
22
|
+
this.sonixSpeechRecognitionService = new sonix_speech_recognition_1.SonixSpeechRecognitionService(sonixAuthKey);
|
|
23
|
+
this.speechToTextRequest = speechToTextRequest;
|
|
24
|
+
}
|
|
25
|
+
async load() {
|
|
26
|
+
const { text, status, error } = await this.sonixSpeechRecognitionService.speechToText(this.speechToTextRequest);
|
|
27
|
+
if (status === "failed") {
|
|
28
|
+
console.error("Error:", error);
|
|
29
|
+
return [];
|
|
30
|
+
}
|
|
31
|
+
const document = new document_js_1.Document({
|
|
32
|
+
pageContent: text,
|
|
33
|
+
metadata: {
|
|
34
|
+
fileName: this.speechToTextRequest.fileName,
|
|
35
|
+
},
|
|
36
|
+
});
|
|
37
|
+
return [document];
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
exports.SonixAudioTranscriptionLoader = SonixAudioTranscriptionLoader;
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { SpeechToTextRequest } from "sonix-speech-recognition/lib/types.js";
|
|
2
|
+
import { Document } from "../../document.js";
|
|
3
|
+
import { BaseDocumentLoader } from "../base.js";
|
|
4
|
+
export declare class SonixAudioTranscriptionLoader extends BaseDocumentLoader {
|
|
5
|
+
private readonly sonixSpeechRecognitionService;
|
|
6
|
+
private readonly speechToTextRequest;
|
|
7
|
+
constructor({ sonixAuthKey, request: speechToTextRequest, }: {
|
|
8
|
+
sonixAuthKey: string;
|
|
9
|
+
request: SpeechToTextRequest;
|
|
10
|
+
});
|
|
11
|
+
load(): Promise<Document[]>;
|
|
12
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { SonixSpeechRecognitionService } from "sonix-speech-recognition";
|
|
2
|
+
import { Document } from "../../document.js";
|
|
3
|
+
import { BaseDocumentLoader } from "../base.js";
|
|
4
|
+
export class SonixAudioTranscriptionLoader extends BaseDocumentLoader {
|
|
5
|
+
constructor({ sonixAuthKey, request: speechToTextRequest, }) {
|
|
6
|
+
super();
|
|
7
|
+
Object.defineProperty(this, "sonixSpeechRecognitionService", {
|
|
8
|
+
enumerable: true,
|
|
9
|
+
configurable: true,
|
|
10
|
+
writable: true,
|
|
11
|
+
value: void 0
|
|
12
|
+
});
|
|
13
|
+
Object.defineProperty(this, "speechToTextRequest", {
|
|
14
|
+
enumerable: true,
|
|
15
|
+
configurable: true,
|
|
16
|
+
writable: true,
|
|
17
|
+
value: void 0
|
|
18
|
+
});
|
|
19
|
+
this.sonixSpeechRecognitionService = new SonixSpeechRecognitionService(sonixAuthKey);
|
|
20
|
+
this.speechToTextRequest = speechToTextRequest;
|
|
21
|
+
}
|
|
22
|
+
async load() {
|
|
23
|
+
const { text, status, error } = await this.sonixSpeechRecognitionService.speechToText(this.speechToTextRequest);
|
|
24
|
+
if (status === "failed") {
|
|
25
|
+
console.error("Error:", error);
|
|
26
|
+
return [];
|
|
27
|
+
}
|
|
28
|
+
const document = new Document({
|
|
29
|
+
pageContent: text,
|
|
30
|
+
metadata: {
|
|
31
|
+
fileName: this.speechToTextRequest.fileName,
|
|
32
|
+
},
|
|
33
|
+
});
|
|
34
|
+
return [document];
|
|
35
|
+
}
|
|
36
|
+
}
|