langchain 0.0.105 → 0.0.107

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/dist/base_language/count_tokens.cjs +2 -2
  2. package/dist/base_language/count_tokens.js +2 -2
  3. package/dist/chat_models/anthropic.cjs +38 -43
  4. package/dist/chat_models/anthropic.d.ts +12 -13
  5. package/dist/chat_models/anthropic.js +39 -44
  6. package/dist/document_loaders/web/notionapi.cjs +59 -19
  7. package/dist/document_loaders/web/notionapi.d.ts +2 -0
  8. package/dist/document_loaders/web/notionapi.js +60 -20
  9. package/dist/document_loaders/web/sonix_audio.cjs +40 -0
  10. package/dist/document_loaders/web/sonix_audio.d.ts +12 -0
  11. package/dist/document_loaders/web/sonix_audio.js +36 -0
  12. package/dist/document_loaders/web/sort_xyz_blockchain.cjs +118 -0
  13. package/dist/document_loaders/web/sort_xyz_blockchain.d.ts +37 -0
  14. package/dist/document_loaders/web/sort_xyz_blockchain.js +114 -0
  15. package/dist/embeddings/openai.cjs +1 -1
  16. package/dist/embeddings/openai.js +1 -1
  17. package/dist/load/import_constants.cjs +1 -0
  18. package/dist/load/import_constants.js +1 -0
  19. package/dist/load/import_map.cjs +2 -1
  20. package/dist/load/import_map.d.ts +1 -0
  21. package/dist/load/import_map.js +1 -0
  22. package/dist/tools/index.cjs +3 -1
  23. package/dist/tools/index.d.ts +1 -0
  24. package/dist/tools/index.js +1 -0
  25. package/dist/tools/wikipedia_query_run.cjs +108 -0
  26. package/dist/tools/wikipedia_query_run.d.ts +21 -0
  27. package/dist/tools/wikipedia_query_run.js +104 -0
  28. package/dist/util/async_caller.cjs +7 -0
  29. package/dist/util/async_caller.js +7 -0
  30. package/dist/vectorstores/supabase.cjs +12 -7
  31. package/dist/vectorstores/supabase.d.ts +6 -2
  32. package/dist/vectorstores/supabase.js +12 -7
  33. package/document_loaders/web/sonix_audio.cjs +1 -0
  34. package/document_loaders/web/sonix_audio.d.ts +1 -0
  35. package/document_loaders/web/sonix_audio.js +1 -0
  36. package/document_loaders/web/sort_xyz_blockchain.cjs +1 -0
  37. package/document_loaders/web/sort_xyz_blockchain.d.ts +1 -0
  38. package/document_loaders/web/sort_xyz_blockchain.js +1 -0
  39. package/package.json +23 -2
@@ -4,13 +4,13 @@ exports.calculateMaxTokens = exports.getModelContextSize = exports.getEmbeddingC
4
4
  const tiktoken_js_1 = require("../util/tiktoken.cjs");
5
5
  // https://www.npmjs.com/package/js-tiktoken
6
6
  const getModelNameForTiktoken = (modelName) => {
7
- if (modelName.startsWith("gpt-3.5-turbo-16k-")) {
7
+ if (modelName.startsWith("gpt-3.5-turbo-16k")) {
8
8
  return "gpt-3.5-turbo-16k";
9
9
  }
10
10
  if (modelName.startsWith("gpt-3.5-turbo-")) {
11
11
  return "gpt-3.5-turbo";
12
12
  }
13
- if (modelName.startsWith("gpt-4-32k-")) {
13
+ if (modelName.startsWith("gpt-4-32k")) {
14
14
  return "gpt-4-32k";
15
15
  }
16
16
  if (modelName.startsWith("gpt-4-")) {
@@ -1,13 +1,13 @@
1
1
  import { encodingForModel } from "../util/tiktoken.js";
2
2
  // https://www.npmjs.com/package/js-tiktoken
3
3
  export const getModelNameForTiktoken = (modelName) => {
4
- if (modelName.startsWith("gpt-3.5-turbo-16k-")) {
4
+ if (modelName.startsWith("gpt-3.5-turbo-16k")) {
5
5
  return "gpt-3.5-turbo-16k";
6
6
  }
7
7
  if (modelName.startsWith("gpt-3.5-turbo-")) {
8
8
  return "gpt-3.5-turbo";
9
9
  }
10
- if (modelName.startsWith("gpt-4-32k-")) {
10
+ if (modelName.startsWith("gpt-4-32k")) {
11
11
  return "gpt-4-32k";
12
12
  }
13
13
  if (modelName.startsWith("gpt-4-")) {
@@ -2,9 +2,9 @@
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.ChatAnthropic = void 0;
4
4
  const sdk_1 = require("@anthropic-ai/sdk");
5
- const base_js_1 = require("./base.cjs");
6
5
  const index_js_1 = require("../schema/index.cjs");
7
6
  const env_js_1 = require("../util/env.cjs");
7
+ const base_js_1 = require("./base.cjs");
8
8
  function getAnthropicPromptFromMessage(type) {
9
9
  switch (type) {
10
10
  case "ai":
@@ -208,61 +208,56 @@ class ChatAnthropic extends base_js_1.BaseChatModel {
208
208
  throw new Error("Missing Anthropic API key.");
209
209
  }
210
210
  let makeCompletionRequest;
211
+ let asyncCallerOptions = {};
211
212
  if (request.stream) {
212
213
  if (!this.streamingClient) {
213
214
  const options = this.apiUrl ? { apiUrl: this.apiUrl } : undefined;
214
- this.streamingClient = new sdk_1.Client(this.anthropicApiKey, options);
215
+ this.streamingClient = new sdk_1.Anthropic({
216
+ ...options,
217
+ apiKey: this.anthropicApiKey,
218
+ });
215
219
  }
216
220
  makeCompletionRequest = async () => {
217
- let currentCompletion = "";
218
- return (this.streamingClient
219
- .completeStream(request, {
220
- onUpdate: (data) => {
221
- if (data.stop_reason) {
222
- return;
223
- }
224
- const part = data.completion;
225
- if (part) {
226
- const delta = part.slice(currentCompletion.length);
227
- currentCompletion += delta ?? "";
228
- // eslint-disable-next-line no-void
229
- void runManager?.handleLLMNewToken(delta ?? "");
230
- }
231
- },
232
- signal: options.signal,
233
- })
234
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
235
- .catch((e) => {
236
- // Anthropic doesn't actually throw JavaScript error objects at the moment.
237
- // We convert the error so the async caller can recognize it correctly.
238
- if (e?.name === "AbortError") {
239
- throw new Error(`${e.name}: ${e.message}`);
221
+ const stream = await this.streamingClient.completions.create({
222
+ ...request,
223
+ });
224
+ const completion = {
225
+ completion: "",
226
+ model: "",
227
+ stop_reason: "",
228
+ };
229
+ for await (const data of stream) {
230
+ completion.stop_reason = data.stop_reason;
231
+ completion.model = data.model;
232
+ if (options.signal?.aborted) {
233
+ stream.controller.abort();
234
+ throw new Error("AbortError: User aborted the request.");
235
+ }
236
+ if (data.stop_reason) {
237
+ break;
240
238
  }
241
- throw e;
242
- }));
239
+ const part = data.completion;
240
+ if (part) {
241
+ completion.completion += part;
242
+ // eslint-disable-next-line no-void
243
+ void runManager?.handleLLMNewToken(part ?? "");
244
+ }
245
+ }
246
+ return completion;
243
247
  };
244
248
  }
245
249
  else {
246
250
  if (!this.batchClient) {
247
251
  const options = this.apiUrl ? { apiUrl: this.apiUrl } : undefined;
248
- this.batchClient = new sdk_1.Client(this.anthropicApiKey, options);
252
+ this.batchClient = new sdk_1.Anthropic({
253
+ ...options,
254
+ apiKey: this.anthropicApiKey,
255
+ });
249
256
  }
250
- makeCompletionRequest = async () => this.batchClient
251
- .complete(request, {
252
- signal: options.signal,
253
- })
254
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
255
- .catch((e) => {
256
- console.log(e);
257
- // Anthropic doesn't actually throw JavaScript error objects at the moment.
258
- // We convert the error so the async caller can recognize it correctly.
259
- if (e?.type === "aborted") {
260
- throw new Error(`${e.name}: ${e.message}`);
261
- }
262
- throw e;
263
- });
257
+ asyncCallerOptions = { signal: options.signal };
258
+ makeCompletionRequest = async () => this.batchClient.completions.create({ ...request });
264
259
  }
265
- return this.caller.call(makeCompletionRequest);
260
+ return this.caller.callWithOptions(asyncCallerOptions, makeCompletionRequest);
266
261
  }
267
262
  _llmType() {
268
263
  return "anthropic";
@@ -1,8 +1,9 @@
1
- import { SamplingParameters } from "@anthropic-ai/sdk";
2
- import { BaseChatModel, BaseChatModelParams } from "./base.js";
3
- import { BaseMessage, ChatResult } from "../schema/index.js";
4
- import { CallbackManagerForLLMRun } from "../callbacks/manager.js";
1
+ import { Anthropic as AnthropicApi } from "@anthropic-ai/sdk";
2
+ import type { CompletionCreateParams } from "@anthropic-ai/sdk/resources/completions";
5
3
  import { BaseLanguageModelCallOptions } from "../base_language/index.js";
4
+ import { CallbackManagerForLLMRun } from "../callbacks/manager.js";
5
+ import { BaseMessage, ChatResult } from "../schema/index.js";
6
+ import { BaseChatModel, BaseChatModelParams } from "./base.js";
6
7
  /**
7
8
  * Input to AnthropicChat class.
8
9
  */
@@ -85,33 +86,31 @@ export declare class ChatAnthropic extends BaseChatModel implements AnthropicInp
85
86
  /**
86
87
  * Get the parameters used to invoke the model
87
88
  */
88
- invocationParams(options?: this["ParsedCallOptions"]): Omit<SamplingParameters, "prompt"> & Kwargs;
89
+ invocationParams(options?: this["ParsedCallOptions"]): Omit<CompletionCreateParams, "prompt"> & Kwargs;
89
90
  /** @ignore */
90
91
  _identifyingParams(): {
91
- tags?: {
92
- [key: string]: string;
93
- } | undefined;
92
+ metadata?: AnthropicApi.Completions.CompletionCreateParams.CompletionRequestNonStreaming.Metadata | AnthropicApi.Completions.CompletionCreateParams.CompletionRequestStreaming.Metadata | undefined;
93
+ stream?: boolean | undefined;
94
94
  model: string;
95
95
  temperature?: number | undefined;
96
96
  top_p?: number | undefined;
97
97
  top_k?: number | undefined;
98
98
  max_tokens_to_sample: number;
99
- stop_sequences: string[];
99
+ stop_sequences?: string[] | undefined;
100
100
  model_name: string;
101
101
  };
102
102
  /**
103
103
  * Get the identifying parameters for the model
104
104
  */
105
105
  identifyingParams(): {
106
- tags?: {
107
- [key: string]: string;
108
- } | undefined;
106
+ metadata?: AnthropicApi.Completions.CompletionCreateParams.CompletionRequestNonStreaming.Metadata | AnthropicApi.Completions.CompletionCreateParams.CompletionRequestStreaming.Metadata | undefined;
107
+ stream?: boolean | undefined;
109
108
  model: string;
110
109
  temperature?: number | undefined;
111
110
  top_p?: number | undefined;
112
111
  top_k?: number | undefined;
113
112
  max_tokens_to_sample: number;
114
- stop_sequences: string[];
113
+ stop_sequences?: string[] | undefined;
115
114
  model_name: string;
116
115
  };
117
116
  private formatMessagesAsPrompt;
@@ -1,7 +1,7 @@
1
- import { AI_PROMPT, HUMAN_PROMPT, Client as AnthropicApi, } from "@anthropic-ai/sdk";
2
- import { BaseChatModel } from "./base.js";
1
+ import { AI_PROMPT, Anthropic as AnthropicApi, HUMAN_PROMPT, } from "@anthropic-ai/sdk";
3
2
  import { AIMessage, } from "../schema/index.js";
4
3
  import { getEnvironmentVariable } from "../util/env.js";
4
+ import { BaseChatModel } from "./base.js";
5
5
  function getAnthropicPromptFromMessage(type) {
6
6
  switch (type) {
7
7
  case "ai":
@@ -205,61 +205,56 @@ export class ChatAnthropic extends BaseChatModel {
205
205
  throw new Error("Missing Anthropic API key.");
206
206
  }
207
207
  let makeCompletionRequest;
208
+ let asyncCallerOptions = {};
208
209
  if (request.stream) {
209
210
  if (!this.streamingClient) {
210
211
  const options = this.apiUrl ? { apiUrl: this.apiUrl } : undefined;
211
- this.streamingClient = new AnthropicApi(this.anthropicApiKey, options);
212
+ this.streamingClient = new AnthropicApi({
213
+ ...options,
214
+ apiKey: this.anthropicApiKey,
215
+ });
212
216
  }
213
217
  makeCompletionRequest = async () => {
214
- let currentCompletion = "";
215
- return (this.streamingClient
216
- .completeStream(request, {
217
- onUpdate: (data) => {
218
- if (data.stop_reason) {
219
- return;
220
- }
221
- const part = data.completion;
222
- if (part) {
223
- const delta = part.slice(currentCompletion.length);
224
- currentCompletion += delta ?? "";
225
- // eslint-disable-next-line no-void
226
- void runManager?.handleLLMNewToken(delta ?? "");
227
- }
228
- },
229
- signal: options.signal,
230
- })
231
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
232
- .catch((e) => {
233
- // Anthropic doesn't actually throw JavaScript error objects at the moment.
234
- // We convert the error so the async caller can recognize it correctly.
235
- if (e?.name === "AbortError") {
236
- throw new Error(`${e.name}: ${e.message}`);
218
+ const stream = await this.streamingClient.completions.create({
219
+ ...request,
220
+ });
221
+ const completion = {
222
+ completion: "",
223
+ model: "",
224
+ stop_reason: "",
225
+ };
226
+ for await (const data of stream) {
227
+ completion.stop_reason = data.stop_reason;
228
+ completion.model = data.model;
229
+ if (options.signal?.aborted) {
230
+ stream.controller.abort();
231
+ throw new Error("AbortError: User aborted the request.");
232
+ }
233
+ if (data.stop_reason) {
234
+ break;
237
235
  }
238
- throw e;
239
- }));
236
+ const part = data.completion;
237
+ if (part) {
238
+ completion.completion += part;
239
+ // eslint-disable-next-line no-void
240
+ void runManager?.handleLLMNewToken(part ?? "");
241
+ }
242
+ }
243
+ return completion;
240
244
  };
241
245
  }
242
246
  else {
243
247
  if (!this.batchClient) {
244
248
  const options = this.apiUrl ? { apiUrl: this.apiUrl } : undefined;
245
- this.batchClient = new AnthropicApi(this.anthropicApiKey, options);
249
+ this.batchClient = new AnthropicApi({
250
+ ...options,
251
+ apiKey: this.anthropicApiKey,
252
+ });
246
253
  }
247
- makeCompletionRequest = async () => this.batchClient
248
- .complete(request, {
249
- signal: options.signal,
250
- })
251
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
252
- .catch((e) => {
253
- console.log(e);
254
- // Anthropic doesn't actually throw JavaScript error objects at the moment.
255
- // We convert the error so the async caller can recognize it correctly.
256
- if (e?.type === "aborted") {
257
- throw new Error(`${e.name}: ${e.message}`);
258
- }
259
- throw e;
260
- });
254
+ asyncCallerOptions = { signal: options.signal };
255
+ makeCompletionRequest = async () => this.batchClient.completions.create({ ...request });
261
256
  }
262
- return this.caller.call(makeCompletionRequest);
257
+ return this.caller.callWithOptions(asyncCallerOptions, makeCompletionRequest);
263
258
  }
264
259
  _llmType() {
265
260
  return "anthropic";
@@ -98,8 +98,6 @@ class NotionAPILoader extends base_js_1.BaseDocumentLoader {
98
98
  }));
99
99
  }
100
100
  parsePageDetails(page) {
101
- if (!(0, client_1.isFullPage)(page))
102
- return;
103
101
  const metadata = Object.fromEntries(Object.entries(page).filter(([key, _]) => key !== "id"));
104
102
  return {
105
103
  ...metadata,
@@ -107,6 +105,58 @@ class NotionAPILoader extends base_js_1.BaseDocumentLoader {
107
105
  properties: this.parsePageProperties(page),
108
106
  };
109
107
  }
108
+ async loadBlock(block) {
109
+ return {
110
+ type: block.type,
111
+ blockId: block.id,
112
+ parent: await this.n2mClient.blockToMarkdown(block),
113
+ children: [],
114
+ };
115
+ }
116
+ async loadBlocksAndDocs(blocksResponse) {
117
+ const blocks = blocksResponse.filter(client_1.isFullBlock);
118
+ const [childPageDocuments, childDatabaseDocuments, blocksDocsArray] = await Promise.all([
119
+ Promise.all(blocks
120
+ .filter((block) => block.type.includes("child_page"))
121
+ .map((block) => this.loadPage(block.id))),
122
+ Promise.all(blocks
123
+ .filter((block) => block.type.includes("child_database"))
124
+ .map((block) => this.loadDatabase(block.id))),
125
+ Promise.all(blocks
126
+ .filter((block) => !["child_page", "child_database"].includes(block.type))
127
+ .map(async (block) => {
128
+ const mdBlock = await this.loadBlock(block);
129
+ let childDocuments = [];
130
+ if (block.has_children) {
131
+ const block_id = block.type === "synced_block" &&
132
+ block.synced_block?.synced_from?.block_id
133
+ ? block.synced_block.synced_from.block_id
134
+ : block.id;
135
+ const childBlocksDocs = await this.loadBlocksAndDocs(await (0, notion_js_1.getBlockChildren)(this.notionClient, block_id, null));
136
+ mdBlock.children = childBlocksDocs.mdBlocks;
137
+ childDocuments = childBlocksDocs.childDocuments;
138
+ }
139
+ return {
140
+ mdBlocks: [mdBlock],
141
+ childDocuments,
142
+ };
143
+ })),
144
+ ]);
145
+ const allMdBlocks = blocksDocsArray
146
+ .flat()
147
+ .map((blockDoc) => blockDoc.mdBlocks);
148
+ const childDocuments = blocksDocsArray
149
+ .flat()
150
+ .map((blockDoc) => blockDoc.childDocuments);
151
+ return {
152
+ mdBlocks: [...allMdBlocks.flat()],
153
+ childDocuments: [
154
+ ...childPageDocuments.flat(),
155
+ ...childDatabaseDocuments.flat(),
156
+ ...childDocuments.flat(),
157
+ ],
158
+ };
159
+ }
110
160
  async loadPage(page) {
111
161
  // Check page is a page ID or a GetPageResponse
112
162
  const [pageData, pageId] = typeof page === "string"
@@ -116,25 +166,15 @@ class NotionAPILoader extends base_js_1.BaseDocumentLoader {
116
166
  pageData,
117
167
  (0, notion_js_1.getBlockChildren)(this.notionClient, pageId, null),
118
168
  ]);
119
- const [childPageDocuments, childDatabaseDocuments, mdBlocks] = await Promise.all([
120
- Promise.all(pageBlocks
121
- .filter((block) => "type" in block && block.type.includes("child_page"))
122
- .map((block) => this.loadPage(block.id))),
123
- Promise.all(pageBlocks
124
- .filter((block) => "type" in block && block.type.includes("child_database"))
125
- .map((block) => this.loadDatabase(block.id))),
126
- this.n2mClient.blocksToMarkdown(pageBlocks),
127
- ]);
169
+ if (!(0, client_1.isFullPage)(pageDetails))
170
+ return [];
171
+ const { mdBlocks, childDocuments } = await this.loadBlocksAndDocs(pageBlocks);
128
172
  const mdStringObject = this.n2mClient.toMarkdownString(mdBlocks);
129
- const pageDocuments = Object.entries(mdStringObject).map(([_, pageContent]) => new document_js_1.Document({
130
- pageContent,
173
+ const pageDocument = new document_js_1.Document({
174
+ pageContent: mdStringObject.parent,
131
175
  metadata: this.parsePageDetails(pageDetails),
132
- }));
133
- return [
134
- ...pageDocuments,
135
- ...childPageDocuments.flat(),
136
- ...childDatabaseDocuments.flat(),
137
- ];
176
+ });
177
+ return [pageDocument, ...childDocuments];
138
178
  }
139
179
  async loadDatabase(id) {
140
180
  const documents = [];
@@ -15,6 +15,8 @@ export declare class NotionAPILoader extends BaseDocumentLoader {
15
15
  constructor(options: NotionAPILoaderOptions);
16
16
  private parsePageProperties;
17
17
  private parsePageDetails;
18
+ private loadBlock;
19
+ private loadBlocksAndDocs;
18
20
  private loadPage;
19
21
  private loadDatabase;
20
22
  load(): Promise<Document[]>;
@@ -1,4 +1,4 @@
1
- import { Client, isFullPage, iteratePaginatedAPI } from "@notionhq/client";
1
+ import { Client, isFullBlock, isFullPage, iteratePaginatedAPI, } from "@notionhq/client";
2
2
  import { NotionToMarkdown } from "notion-to-md";
3
3
  import { getBlockChildren } from "notion-to-md/build/utils/notion.js";
4
4
  import { BaseDocumentLoader } from "../base.js";
@@ -95,8 +95,6 @@ export class NotionAPILoader extends BaseDocumentLoader {
95
95
  }));
96
96
  }
97
97
  parsePageDetails(page) {
98
- if (!isFullPage(page))
99
- return;
100
98
  const metadata = Object.fromEntries(Object.entries(page).filter(([key, _]) => key !== "id"));
101
99
  return {
102
100
  ...metadata,
@@ -104,6 +102,58 @@ export class NotionAPILoader extends BaseDocumentLoader {
104
102
  properties: this.parsePageProperties(page),
105
103
  };
106
104
  }
105
+ async loadBlock(block) {
106
+ return {
107
+ type: block.type,
108
+ blockId: block.id,
109
+ parent: await this.n2mClient.blockToMarkdown(block),
110
+ children: [],
111
+ };
112
+ }
113
+ async loadBlocksAndDocs(blocksResponse) {
114
+ const blocks = blocksResponse.filter(isFullBlock);
115
+ const [childPageDocuments, childDatabaseDocuments, blocksDocsArray] = await Promise.all([
116
+ Promise.all(blocks
117
+ .filter((block) => block.type.includes("child_page"))
118
+ .map((block) => this.loadPage(block.id))),
119
+ Promise.all(blocks
120
+ .filter((block) => block.type.includes("child_database"))
121
+ .map((block) => this.loadDatabase(block.id))),
122
+ Promise.all(blocks
123
+ .filter((block) => !["child_page", "child_database"].includes(block.type))
124
+ .map(async (block) => {
125
+ const mdBlock = await this.loadBlock(block);
126
+ let childDocuments = [];
127
+ if (block.has_children) {
128
+ const block_id = block.type === "synced_block" &&
129
+ block.synced_block?.synced_from?.block_id
130
+ ? block.synced_block.synced_from.block_id
131
+ : block.id;
132
+ const childBlocksDocs = await this.loadBlocksAndDocs(await getBlockChildren(this.notionClient, block_id, null));
133
+ mdBlock.children = childBlocksDocs.mdBlocks;
134
+ childDocuments = childBlocksDocs.childDocuments;
135
+ }
136
+ return {
137
+ mdBlocks: [mdBlock],
138
+ childDocuments,
139
+ };
140
+ })),
141
+ ]);
142
+ const allMdBlocks = blocksDocsArray
143
+ .flat()
144
+ .map((blockDoc) => blockDoc.mdBlocks);
145
+ const childDocuments = blocksDocsArray
146
+ .flat()
147
+ .map((blockDoc) => blockDoc.childDocuments);
148
+ return {
149
+ mdBlocks: [...allMdBlocks.flat()],
150
+ childDocuments: [
151
+ ...childPageDocuments.flat(),
152
+ ...childDatabaseDocuments.flat(),
153
+ ...childDocuments.flat(),
154
+ ],
155
+ };
156
+ }
107
157
  async loadPage(page) {
108
158
  // Check page is a page ID or a GetPageResponse
109
159
  const [pageData, pageId] = typeof page === "string"
@@ -113,25 +163,15 @@ export class NotionAPILoader extends BaseDocumentLoader {
113
163
  pageData,
114
164
  getBlockChildren(this.notionClient, pageId, null),
115
165
  ]);
116
- const [childPageDocuments, childDatabaseDocuments, mdBlocks] = await Promise.all([
117
- Promise.all(pageBlocks
118
- .filter((block) => "type" in block && block.type.includes("child_page"))
119
- .map((block) => this.loadPage(block.id))),
120
- Promise.all(pageBlocks
121
- .filter((block) => "type" in block && block.type.includes("child_database"))
122
- .map((block) => this.loadDatabase(block.id))),
123
- this.n2mClient.blocksToMarkdown(pageBlocks),
124
- ]);
166
+ if (!isFullPage(pageDetails))
167
+ return [];
168
+ const { mdBlocks, childDocuments } = await this.loadBlocksAndDocs(pageBlocks);
125
169
  const mdStringObject = this.n2mClient.toMarkdownString(mdBlocks);
126
- const pageDocuments = Object.entries(mdStringObject).map(([_, pageContent]) => new Document({
127
- pageContent,
170
+ const pageDocument = new Document({
171
+ pageContent: mdStringObject.parent,
128
172
  metadata: this.parsePageDetails(pageDetails),
129
- }));
130
- return [
131
- ...pageDocuments,
132
- ...childPageDocuments.flat(),
133
- ...childDatabaseDocuments.flat(),
134
- ];
173
+ });
174
+ return [pageDocument, ...childDocuments];
135
175
  }
136
176
  async loadDatabase(id) {
137
177
  const documents = [];
@@ -0,0 +1,40 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.SonixAudioTranscriptionLoader = void 0;
4
+ const sonix_speech_recognition_1 = require("sonix-speech-recognition");
5
+ const document_js_1 = require("../../document.cjs");
6
+ const base_js_1 = require("../base.cjs");
7
+ class SonixAudioTranscriptionLoader extends base_js_1.BaseDocumentLoader {
8
+ constructor({ sonixAuthKey, request: speechToTextRequest, }) {
9
+ super();
10
+ Object.defineProperty(this, "sonixSpeechRecognitionService", {
11
+ enumerable: true,
12
+ configurable: true,
13
+ writable: true,
14
+ value: void 0
15
+ });
16
+ Object.defineProperty(this, "speechToTextRequest", {
17
+ enumerable: true,
18
+ configurable: true,
19
+ writable: true,
20
+ value: void 0
21
+ });
22
+ this.sonixSpeechRecognitionService = new sonix_speech_recognition_1.SonixSpeechRecognitionService(sonixAuthKey);
23
+ this.speechToTextRequest = speechToTextRequest;
24
+ }
25
+ async load() {
26
+ const { text, status, error } = await this.sonixSpeechRecognitionService.speechToText(this.speechToTextRequest);
27
+ if (status === "failed") {
28
+ console.error("Error:", error);
29
+ return [];
30
+ }
31
+ const document = new document_js_1.Document({
32
+ pageContent: text,
33
+ metadata: {
34
+ fileName: this.speechToTextRequest.fileName,
35
+ },
36
+ });
37
+ return [document];
38
+ }
39
+ }
40
+ exports.SonixAudioTranscriptionLoader = SonixAudioTranscriptionLoader;
@@ -0,0 +1,12 @@
1
+ import { SpeechToTextRequest } from "sonix-speech-recognition/lib/types.js";
2
+ import { Document } from "../../document.js";
3
+ import { BaseDocumentLoader } from "../base.js";
4
+ export declare class SonixAudioTranscriptionLoader extends BaseDocumentLoader {
5
+ private readonly sonixSpeechRecognitionService;
6
+ private readonly speechToTextRequest;
7
+ constructor({ sonixAuthKey, request: speechToTextRequest, }: {
8
+ sonixAuthKey: string;
9
+ request: SpeechToTextRequest;
10
+ });
11
+ load(): Promise<Document[]>;
12
+ }
@@ -0,0 +1,36 @@
1
+ import { SonixSpeechRecognitionService } from "sonix-speech-recognition";
2
+ import { Document } from "../../document.js";
3
+ import { BaseDocumentLoader } from "../base.js";
4
+ export class SonixAudioTranscriptionLoader extends BaseDocumentLoader {
5
+ constructor({ sonixAuthKey, request: speechToTextRequest, }) {
6
+ super();
7
+ Object.defineProperty(this, "sonixSpeechRecognitionService", {
8
+ enumerable: true,
9
+ configurable: true,
10
+ writable: true,
11
+ value: void 0
12
+ });
13
+ Object.defineProperty(this, "speechToTextRequest", {
14
+ enumerable: true,
15
+ configurable: true,
16
+ writable: true,
17
+ value: void 0
18
+ });
19
+ this.sonixSpeechRecognitionService = new SonixSpeechRecognitionService(sonixAuthKey);
20
+ this.speechToTextRequest = speechToTextRequest;
21
+ }
22
+ async load() {
23
+ const { text, status, error } = await this.sonixSpeechRecognitionService.speechToText(this.speechToTextRequest);
24
+ if (status === "failed") {
25
+ console.error("Error:", error);
26
+ return [];
27
+ }
28
+ const document = new Document({
29
+ pageContent: text,
30
+ metadata: {
31
+ fileName: this.speechToTextRequest.fileName,
32
+ },
33
+ });
34
+ return [document];
35
+ }
36
+ }