langchain 0.0.199 → 0.0.201
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents/toolkits/connery.cjs +1 -0
- package/agents/toolkits/connery.d.ts +1 -0
- package/agents/toolkits/connery.js +1 -0
- package/dist/agents/index.cjs +3 -1
- package/dist/agents/index.d.ts +1 -1
- package/dist/agents/index.js +1 -1
- package/dist/agents/toolkits/connery/index.cjs +39 -0
- package/dist/agents/toolkits/connery/index.d.ts +23 -0
- package/dist/agents/toolkits/connery/index.js +35 -0
- package/dist/agents/toolkits/conversational_retrieval/tool.cjs +1 -1
- package/dist/agents/toolkits/conversational_retrieval/tool.js +1 -1
- package/dist/chat_models/fake.cjs +2 -114
- package/dist/chat_models/fake.d.ts +1 -52
- package/dist/chat_models/fake.js +1 -113
- package/dist/chat_models/llama_cpp.cjs +2 -1
- package/dist/chat_models/llama_cpp.d.ts +1 -1
- package/dist/chat_models/llama_cpp.js +2 -1
- package/dist/chat_models/minimax.d.ts +1 -1
- package/dist/document_loaders/fs/obsidian.cjs +240 -0
- package/dist/document_loaders/fs/obsidian.d.ts +26 -0
- package/dist/document_loaders/fs/obsidian.js +233 -0
- package/dist/embeddings/gradient_ai.cjs +103 -0
- package/dist/embeddings/gradient_ai.d.ts +48 -0
- package/dist/embeddings/gradient_ai.js +99 -0
- package/dist/llms/gradient_ai.cjs +22 -8
- package/dist/llms/gradient_ai.d.ts +7 -2
- package/dist/llms/gradient_ai.js +22 -8
- package/dist/llms/llama_cpp.cjs +2 -1
- package/dist/llms/llama_cpp.d.ts +1 -1
- package/dist/llms/llama_cpp.js +2 -1
- package/dist/load/import_constants.cjs +3 -0
- package/dist/load/import_constants.js +3 -0
- package/dist/load/import_map.cjs +5 -3
- package/dist/load/import_map.d.ts +2 -0
- package/dist/load/import_map.js +2 -0
- package/dist/memory/vector_store.cjs +1 -1
- package/dist/memory/vector_store.js +1 -1
- package/dist/tools/connery.cjs +279 -0
- package/dist/tools/connery.d.ts +145 -0
- package/dist/tools/connery.js +274 -0
- package/dist/tools/gmail/base.cjs +69 -0
- package/dist/tools/gmail/base.d.ts +19 -0
- package/dist/tools/gmail/base.js +65 -0
- package/dist/tools/gmail/create_draft.cjs +62 -0
- package/dist/tools/gmail/create_draft.d.ts +35 -0
- package/dist/tools/gmail/create_draft.js +58 -0
- package/dist/tools/gmail/descriptions.cjs +118 -0
- package/dist/tools/gmail/descriptions.d.ts +5 -0
- package/dist/tools/gmail/descriptions.js +115 -0
- package/dist/tools/gmail/get_message.cjs +83 -0
- package/dist/tools/gmail/get_message.d.ts +18 -0
- package/dist/tools/gmail/get_message.js +79 -0
- package/dist/tools/gmail/get_thread.cjs +89 -0
- package/dist/tools/gmail/get_thread.d.ts +18 -0
- package/dist/tools/gmail/get_thread.js +85 -0
- package/dist/tools/gmail/index.cjs +13 -0
- package/dist/tools/gmail/index.d.ts +11 -0
- package/dist/tools/gmail/index.js +5 -0
- package/dist/tools/gmail/search.cjs +118 -0
- package/dist/tools/gmail/search.d.ts +29 -0
- package/dist/tools/gmail/search.js +114 -0
- package/dist/tools/gmail/send_message.cjs +74 -0
- package/dist/tools/gmail/send_message.d.ts +35 -0
- package/dist/tools/gmail/send_message.js +70 -0
- package/dist/tools/webbrowser.cjs +1 -1
- package/dist/tools/webbrowser.js +1 -1
- package/dist/tools/wolframalpha.cjs +1 -1
- package/dist/tools/wolframalpha.js +1 -1
- package/dist/util/document.cjs +1 -1
- package/dist/util/document.d.ts +1 -1
- package/dist/util/document.js +1 -1
- package/dist/util/tiktoken.cjs +15 -24
- package/dist/util/tiktoken.d.ts +1 -9
- package/dist/util/tiktoken.js +1 -21
- package/document_loaders/fs/obsidian.cjs +1 -0
- package/document_loaders/fs/obsidian.d.ts +1 -0
- package/document_loaders/fs/obsidian.js +1 -0
- package/embeddings/gradient_ai.cjs +1 -0
- package/embeddings/gradient_ai.d.ts +1 -0
- package/embeddings/gradient_ai.js +1 -0
- package/package.json +43 -3
- package/tools/connery.cjs +1 -0
- package/tools/connery.d.ts +1 -0
- package/tools/connery.js +1 -0
- package/tools/gmail.cjs +1 -0
- package/tools/gmail.d.ts +1 -0
- package/tools/gmail.js +1 -0
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
5
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
6
|
+
exports.ObsidianLoader = void 0;
|
|
7
|
+
const js_yaml_1 = __importDefault(require("js-yaml"));
|
|
8
|
+
const env_js_1 = require("../../util/env.cjs");
|
|
9
|
+
const directory_js_1 = require("./directory.cjs");
|
|
10
|
+
const base_js_1 = require("../base.cjs");
|
|
11
|
+
const document_js_1 = require("../../document.cjs");
|
|
12
|
+
/**
|
|
13
|
+
* Represents a loader for Obsidian markdown files. This loader extends the BaseDocumentLoader
|
|
14
|
+
* and provides functionality to parse and extract metadata, tags, and dataview fields from
|
|
15
|
+
* Obsidian markdown files.
|
|
16
|
+
*/
|
|
17
|
+
class ObsidianFileLoader extends base_js_1.BaseDocumentLoader {
|
|
18
|
+
/**
|
|
19
|
+
* Initializes a new instance of the ObsidianFileLoader class.
|
|
20
|
+
* @param filePath The path to the Obsidian markdown file.
|
|
21
|
+
* @param encoding The character encoding to use when reading the file. Defaults to 'utf-8'.
|
|
22
|
+
* @param collectMetadata Determines whether metadata should be collected from the file. Defaults to true.
|
|
23
|
+
*/
|
|
24
|
+
constructor(filePath, { encoding = "utf-8", collectMetadata = true, } = {}) {
|
|
25
|
+
super();
|
|
26
|
+
Object.defineProperty(this, "filePath", {
|
|
27
|
+
enumerable: true,
|
|
28
|
+
configurable: true,
|
|
29
|
+
writable: true,
|
|
30
|
+
value: void 0
|
|
31
|
+
});
|
|
32
|
+
Object.defineProperty(this, "encoding", {
|
|
33
|
+
enumerable: true,
|
|
34
|
+
configurable: true,
|
|
35
|
+
writable: true,
|
|
36
|
+
value: void 0
|
|
37
|
+
});
|
|
38
|
+
Object.defineProperty(this, "collectMetadata", {
|
|
39
|
+
enumerable: true,
|
|
40
|
+
configurable: true,
|
|
41
|
+
writable: true,
|
|
42
|
+
value: void 0
|
|
43
|
+
});
|
|
44
|
+
this.filePath = filePath;
|
|
45
|
+
this.encoding = encoding;
|
|
46
|
+
this.collectMetadata = collectMetadata;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Parses the YAML front matter from the given content string.
|
|
50
|
+
* @param content The string content of the markdown file.
|
|
51
|
+
* @returns An object representing the parsed front matter.
|
|
52
|
+
*/
|
|
53
|
+
parseFrontMatter(content) {
|
|
54
|
+
if (!this.collectMetadata) {
|
|
55
|
+
return {};
|
|
56
|
+
}
|
|
57
|
+
const match = content.match(ObsidianFileLoader.FRONT_MATTER_REGEX);
|
|
58
|
+
if (!match) {
|
|
59
|
+
return {};
|
|
60
|
+
}
|
|
61
|
+
try {
|
|
62
|
+
const frontMatter = js_yaml_1.default.load(match[1]);
|
|
63
|
+
if (frontMatter.tags && typeof frontMatter.tags === "string") {
|
|
64
|
+
frontMatter.tags = frontMatter.tags.split(", ");
|
|
65
|
+
}
|
|
66
|
+
return frontMatter;
|
|
67
|
+
}
|
|
68
|
+
catch (e) {
|
|
69
|
+
console.warn("Encountered non-yaml frontmatter");
|
|
70
|
+
return {};
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
/**
|
|
74
|
+
* Removes YAML front matter from the given content string.
|
|
75
|
+
* @param content The string content of the markdown file.
|
|
76
|
+
* @returns The content string with the front matter removed.
|
|
77
|
+
*/
|
|
78
|
+
removeFrontMatter(content) {
|
|
79
|
+
if (!this.collectMetadata) {
|
|
80
|
+
return content;
|
|
81
|
+
}
|
|
82
|
+
return content.replace(ObsidianFileLoader.FRONT_MATTER_REGEX, "");
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Parses Obsidian-style tags from the given content string.
|
|
86
|
+
* @param content The string content of the markdown file.
|
|
87
|
+
* @returns A set of parsed tags.
|
|
88
|
+
*/
|
|
89
|
+
parseObsidianTags(content) {
|
|
90
|
+
if (!this.collectMetadata) {
|
|
91
|
+
return new Set();
|
|
92
|
+
}
|
|
93
|
+
const matches = content.matchAll(ObsidianFileLoader.TAG_REGEX);
|
|
94
|
+
const tags = new Set();
|
|
95
|
+
for (const match of matches) {
|
|
96
|
+
tags.add(match[1]);
|
|
97
|
+
}
|
|
98
|
+
return tags;
|
|
99
|
+
}
|
|
100
|
+
/**
|
|
101
|
+
* Parses dataview fields from the given content string.
|
|
102
|
+
* @param content The string content of the markdown file.
|
|
103
|
+
* @returns A record object containing key-value pairs of dataview fields.
|
|
104
|
+
*/
|
|
105
|
+
parseObsidianDataviewFields(content) {
|
|
106
|
+
if (!this.collectMetadata) {
|
|
107
|
+
return {};
|
|
108
|
+
}
|
|
109
|
+
const fields = {};
|
|
110
|
+
const lineMatches = content.matchAll(ObsidianFileLoader.DATAVIEW_LINE_REGEX);
|
|
111
|
+
for (const [, key, value] of lineMatches) {
|
|
112
|
+
fields[key] = value;
|
|
113
|
+
}
|
|
114
|
+
const bracketMatches = content.matchAll(ObsidianFileLoader.DATAVIEW_INLINE_BRACKET_REGEX);
|
|
115
|
+
for (const [, key, value] of bracketMatches) {
|
|
116
|
+
fields[key] = value;
|
|
117
|
+
}
|
|
118
|
+
const parenMatches = content.matchAll(ObsidianFileLoader.DATAVIEW_INLINE_PAREN_REGEX);
|
|
119
|
+
for (const [, key, value] of parenMatches) {
|
|
120
|
+
fields[key] = value;
|
|
121
|
+
}
|
|
122
|
+
return fields;
|
|
123
|
+
}
|
|
124
|
+
/**
|
|
125
|
+
* Converts metadata to a format compatible with Langchain.
|
|
126
|
+
* @param metadata The metadata object to convert.
|
|
127
|
+
* @returns A record object containing key-value pairs of Langchain-compatible metadata.
|
|
128
|
+
*/
|
|
129
|
+
toLangchainCompatibleMetadata(metadata) {
|
|
130
|
+
const result = {};
|
|
131
|
+
for (const [key, value] of Object.entries(metadata)) {
|
|
132
|
+
if (typeof value === "string" || typeof value === "number") {
|
|
133
|
+
result[key] = value;
|
|
134
|
+
}
|
|
135
|
+
else {
|
|
136
|
+
result[key] = JSON.stringify(value);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
return result;
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* It loads the Obsidian file, parses it, and returns a `Document` instance.
|
|
143
|
+
* @returns An array of `Document` instances to comply with the BaseDocumentLoader interface.
|
|
144
|
+
*/
|
|
145
|
+
async load() {
|
|
146
|
+
const documents = [];
|
|
147
|
+
const { basename, readFile, stat } = await ObsidianFileLoader.imports();
|
|
148
|
+
const fileName = basename(this.filePath);
|
|
149
|
+
const stats = await stat(this.filePath);
|
|
150
|
+
let content = await readFile(this.filePath, this.encoding);
|
|
151
|
+
const frontMatter = this.parseFrontMatter(content);
|
|
152
|
+
const tags = this.parseObsidianTags(content);
|
|
153
|
+
const dataviewFields = this.parseObsidianDataviewFields(content);
|
|
154
|
+
content = this.removeFrontMatter(content);
|
|
155
|
+
const metadata = {
|
|
156
|
+
source: fileName,
|
|
157
|
+
path: this.filePath,
|
|
158
|
+
created: stats.birthtimeMs,
|
|
159
|
+
lastModified: stats.mtimeMs,
|
|
160
|
+
lastAccessed: stats.atimeMs,
|
|
161
|
+
...this.toLangchainCompatibleMetadata(frontMatter),
|
|
162
|
+
...dataviewFields,
|
|
163
|
+
};
|
|
164
|
+
if (tags.size || frontMatter.tags) {
|
|
165
|
+
metadata.tags = Array.from(new Set([...tags, ...(frontMatter.tags ?? [])])).join(",");
|
|
166
|
+
}
|
|
167
|
+
documents.push(new document_js_1.Document({
|
|
168
|
+
pageContent: content,
|
|
169
|
+
metadata,
|
|
170
|
+
}));
|
|
171
|
+
return documents;
|
|
172
|
+
}
|
|
173
|
+
/**
|
|
174
|
+
* Imports the necessary functions from the `node:path` and
|
|
175
|
+
* `node:fs/promises` modules. It is used to dynamically import the
|
|
176
|
+
* functions when needed. If the import fails, it throws an error
|
|
177
|
+
* indicating that the modules failed to load.
|
|
178
|
+
* @returns A promise that resolves to an object containing the imported functions.
|
|
179
|
+
*/
|
|
180
|
+
static async imports() {
|
|
181
|
+
try {
|
|
182
|
+
const { basename } = await import("node:path");
|
|
183
|
+
const { readFile, stat } = await import("node:fs/promises");
|
|
184
|
+
return { basename, readFile, stat };
|
|
185
|
+
}
|
|
186
|
+
catch (e) {
|
|
187
|
+
console.error(e);
|
|
188
|
+
throw new Error(`Failed to load fs/promises. ObsidianFileLoader available only on environment 'node'. It appears you are running environment '${(0, env_js_1.getEnv)()}'. See https://<link to docs> for alternatives.`);
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
Object.defineProperty(ObsidianFileLoader, "FRONT_MATTER_REGEX", {
|
|
193
|
+
enumerable: true,
|
|
194
|
+
configurable: true,
|
|
195
|
+
writable: true,
|
|
196
|
+
value: /^---\n(.*?)\n---\n/s
|
|
197
|
+
});
|
|
198
|
+
Object.defineProperty(ObsidianFileLoader, "TAG_REGEX", {
|
|
199
|
+
enumerable: true,
|
|
200
|
+
configurable: true,
|
|
201
|
+
writable: true,
|
|
202
|
+
value: /(?:\s|^)#([a-zA-Z_][\w/-]*)/g
|
|
203
|
+
});
|
|
204
|
+
Object.defineProperty(ObsidianFileLoader, "DATAVIEW_LINE_REGEX", {
|
|
205
|
+
enumerable: true,
|
|
206
|
+
configurable: true,
|
|
207
|
+
writable: true,
|
|
208
|
+
value: /^\s*(\w+)::\s*(.*)$/gm
|
|
209
|
+
});
|
|
210
|
+
Object.defineProperty(ObsidianFileLoader, "DATAVIEW_INLINE_BRACKET_REGEX", {
|
|
211
|
+
enumerable: true,
|
|
212
|
+
configurable: true,
|
|
213
|
+
writable: true,
|
|
214
|
+
value: /\[(\w+)::\s*(.*)\]/gm
|
|
215
|
+
});
|
|
216
|
+
Object.defineProperty(ObsidianFileLoader, "DATAVIEW_INLINE_PAREN_REGEX", {
|
|
217
|
+
enumerable: true,
|
|
218
|
+
configurable: true,
|
|
219
|
+
writable: true,
|
|
220
|
+
value: /\((\w+)::\s*(.*)\)/gm
|
|
221
|
+
});
|
|
222
|
+
/**
|
|
223
|
+
* Represents a loader for directories containing Obsidian markdown files. This loader extends
|
|
224
|
+
* the DirectoryLoader and provides functionality to load and parse '.md' files with YAML frontmatter,
|
|
225
|
+
* Obsidian tags, and Dataview fields.
|
|
226
|
+
*/
|
|
227
|
+
class ObsidianLoader extends directory_js_1.DirectoryLoader {
|
|
228
|
+
/**
|
|
229
|
+
* Initializes a new instance of the ObsidianLoader class.
|
|
230
|
+
* @param directoryPath The path to the directory containing Obsidian markdown files.
|
|
231
|
+
* @param encoding The character encoding to use when reading files. Defaults to 'utf-8'.
|
|
232
|
+
* @param collectMetadata Determines whether metadata should be collected from the files. Defaults to true.
|
|
233
|
+
*/
|
|
234
|
+
constructor(directoryPath, options) {
|
|
235
|
+
super(directoryPath, {
|
|
236
|
+
".md": (filePath) => new ObsidianFileLoader(filePath, options),
|
|
237
|
+
}, true, directory_js_1.UnknownHandling.Ignore);
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
exports.ObsidianLoader = ObsidianLoader;
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
/// <reference types="node" resolution-mode="require"/>
|
|
2
|
+
import { DirectoryLoader } from "./directory.js";
|
|
3
|
+
export type FrontMatter = {
|
|
4
|
+
title?: string;
|
|
5
|
+
description?: string;
|
|
6
|
+
tags?: string[] | string;
|
|
7
|
+
[key: string]: unknown;
|
|
8
|
+
};
|
|
9
|
+
export interface ObsidianFileLoaderOptions {
|
|
10
|
+
encoding?: BufferEncoding;
|
|
11
|
+
collectMetadata?: boolean;
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* Represents a loader for directories containing Obsidian markdown files. This loader extends
|
|
15
|
+
* the DirectoryLoader and provides functionality to load and parse '.md' files with YAML frontmatter,
|
|
16
|
+
* Obsidian tags, and Dataview fields.
|
|
17
|
+
*/
|
|
18
|
+
export declare class ObsidianLoader extends DirectoryLoader {
|
|
19
|
+
/**
|
|
20
|
+
* Initializes a new instance of the ObsidianLoader class.
|
|
21
|
+
* @param directoryPath The path to the directory containing Obsidian markdown files.
|
|
22
|
+
* @param encoding The character encoding to use when reading files. Defaults to 'utf-8'.
|
|
23
|
+
* @param collectMetadata Determines whether metadata should be collected from the files. Defaults to true.
|
|
24
|
+
*/
|
|
25
|
+
constructor(directoryPath: string, options?: ObsidianFileLoaderOptions);
|
|
26
|
+
}
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
import yaml from "js-yaml";
|
|
2
|
+
import { getEnv } from "../../util/env.js";
|
|
3
|
+
import { DirectoryLoader, UnknownHandling } from "./directory.js";
|
|
4
|
+
import { BaseDocumentLoader } from "../base.js";
|
|
5
|
+
import { Document } from "../../document.js";
|
|
6
|
+
/**
|
|
7
|
+
* Represents a loader for Obsidian markdown files. This loader extends the BaseDocumentLoader
|
|
8
|
+
* and provides functionality to parse and extract metadata, tags, and dataview fields from
|
|
9
|
+
* Obsidian markdown files.
|
|
10
|
+
*/
|
|
11
|
+
class ObsidianFileLoader extends BaseDocumentLoader {
|
|
12
|
+
/**
|
|
13
|
+
* Initializes a new instance of the ObsidianFileLoader class.
|
|
14
|
+
* @param filePath The path to the Obsidian markdown file.
|
|
15
|
+
* @param encoding The character encoding to use when reading the file. Defaults to 'utf-8'.
|
|
16
|
+
* @param collectMetadata Determines whether metadata should be collected from the file. Defaults to true.
|
|
17
|
+
*/
|
|
18
|
+
constructor(filePath, { encoding = "utf-8", collectMetadata = true, } = {}) {
|
|
19
|
+
super();
|
|
20
|
+
Object.defineProperty(this, "filePath", {
|
|
21
|
+
enumerable: true,
|
|
22
|
+
configurable: true,
|
|
23
|
+
writable: true,
|
|
24
|
+
value: void 0
|
|
25
|
+
});
|
|
26
|
+
Object.defineProperty(this, "encoding", {
|
|
27
|
+
enumerable: true,
|
|
28
|
+
configurable: true,
|
|
29
|
+
writable: true,
|
|
30
|
+
value: void 0
|
|
31
|
+
});
|
|
32
|
+
Object.defineProperty(this, "collectMetadata", {
|
|
33
|
+
enumerable: true,
|
|
34
|
+
configurable: true,
|
|
35
|
+
writable: true,
|
|
36
|
+
value: void 0
|
|
37
|
+
});
|
|
38
|
+
this.filePath = filePath;
|
|
39
|
+
this.encoding = encoding;
|
|
40
|
+
this.collectMetadata = collectMetadata;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Parses the YAML front matter from the given content string.
|
|
44
|
+
* @param content The string content of the markdown file.
|
|
45
|
+
* @returns An object representing the parsed front matter.
|
|
46
|
+
*/
|
|
47
|
+
parseFrontMatter(content) {
|
|
48
|
+
if (!this.collectMetadata) {
|
|
49
|
+
return {};
|
|
50
|
+
}
|
|
51
|
+
const match = content.match(ObsidianFileLoader.FRONT_MATTER_REGEX);
|
|
52
|
+
if (!match) {
|
|
53
|
+
return {};
|
|
54
|
+
}
|
|
55
|
+
try {
|
|
56
|
+
const frontMatter = yaml.load(match[1]);
|
|
57
|
+
if (frontMatter.tags && typeof frontMatter.tags === "string") {
|
|
58
|
+
frontMatter.tags = frontMatter.tags.split(", ");
|
|
59
|
+
}
|
|
60
|
+
return frontMatter;
|
|
61
|
+
}
|
|
62
|
+
catch (e) {
|
|
63
|
+
console.warn("Encountered non-yaml frontmatter");
|
|
64
|
+
return {};
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Removes YAML front matter from the given content string.
|
|
69
|
+
* @param content The string content of the markdown file.
|
|
70
|
+
* @returns The content string with the front matter removed.
|
|
71
|
+
*/
|
|
72
|
+
removeFrontMatter(content) {
|
|
73
|
+
if (!this.collectMetadata) {
|
|
74
|
+
return content;
|
|
75
|
+
}
|
|
76
|
+
return content.replace(ObsidianFileLoader.FRONT_MATTER_REGEX, "");
|
|
77
|
+
}
|
|
78
|
+
/**
|
|
79
|
+
* Parses Obsidian-style tags from the given content string.
|
|
80
|
+
* @param content The string content of the markdown file.
|
|
81
|
+
* @returns A set of parsed tags.
|
|
82
|
+
*/
|
|
83
|
+
parseObsidianTags(content) {
|
|
84
|
+
if (!this.collectMetadata) {
|
|
85
|
+
return new Set();
|
|
86
|
+
}
|
|
87
|
+
const matches = content.matchAll(ObsidianFileLoader.TAG_REGEX);
|
|
88
|
+
const tags = new Set();
|
|
89
|
+
for (const match of matches) {
|
|
90
|
+
tags.add(match[1]);
|
|
91
|
+
}
|
|
92
|
+
return tags;
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Parses dataview fields from the given content string.
|
|
96
|
+
* @param content The string content of the markdown file.
|
|
97
|
+
* @returns A record object containing key-value pairs of dataview fields.
|
|
98
|
+
*/
|
|
99
|
+
parseObsidianDataviewFields(content) {
|
|
100
|
+
if (!this.collectMetadata) {
|
|
101
|
+
return {};
|
|
102
|
+
}
|
|
103
|
+
const fields = {};
|
|
104
|
+
const lineMatches = content.matchAll(ObsidianFileLoader.DATAVIEW_LINE_REGEX);
|
|
105
|
+
for (const [, key, value] of lineMatches) {
|
|
106
|
+
fields[key] = value;
|
|
107
|
+
}
|
|
108
|
+
const bracketMatches = content.matchAll(ObsidianFileLoader.DATAVIEW_INLINE_BRACKET_REGEX);
|
|
109
|
+
for (const [, key, value] of bracketMatches) {
|
|
110
|
+
fields[key] = value;
|
|
111
|
+
}
|
|
112
|
+
const parenMatches = content.matchAll(ObsidianFileLoader.DATAVIEW_INLINE_PAREN_REGEX);
|
|
113
|
+
for (const [, key, value] of parenMatches) {
|
|
114
|
+
fields[key] = value;
|
|
115
|
+
}
|
|
116
|
+
return fields;
|
|
117
|
+
}
|
|
118
|
+
/**
|
|
119
|
+
* Converts metadata to a format compatible with Langchain.
|
|
120
|
+
* @param metadata The metadata object to convert.
|
|
121
|
+
* @returns A record object containing key-value pairs of Langchain-compatible metadata.
|
|
122
|
+
*/
|
|
123
|
+
toLangchainCompatibleMetadata(metadata) {
|
|
124
|
+
const result = {};
|
|
125
|
+
for (const [key, value] of Object.entries(metadata)) {
|
|
126
|
+
if (typeof value === "string" || typeof value === "number") {
|
|
127
|
+
result[key] = value;
|
|
128
|
+
}
|
|
129
|
+
else {
|
|
130
|
+
result[key] = JSON.stringify(value);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
return result;
|
|
134
|
+
}
|
|
135
|
+
/**
|
|
136
|
+
* It loads the Obsidian file, parses it, and returns a `Document` instance.
|
|
137
|
+
* @returns An array of `Document` instances to comply with the BaseDocumentLoader interface.
|
|
138
|
+
*/
|
|
139
|
+
async load() {
|
|
140
|
+
const documents = [];
|
|
141
|
+
const { basename, readFile, stat } = await ObsidianFileLoader.imports();
|
|
142
|
+
const fileName = basename(this.filePath);
|
|
143
|
+
const stats = await stat(this.filePath);
|
|
144
|
+
let content = await readFile(this.filePath, this.encoding);
|
|
145
|
+
const frontMatter = this.parseFrontMatter(content);
|
|
146
|
+
const tags = this.parseObsidianTags(content);
|
|
147
|
+
const dataviewFields = this.parseObsidianDataviewFields(content);
|
|
148
|
+
content = this.removeFrontMatter(content);
|
|
149
|
+
const metadata = {
|
|
150
|
+
source: fileName,
|
|
151
|
+
path: this.filePath,
|
|
152
|
+
created: stats.birthtimeMs,
|
|
153
|
+
lastModified: stats.mtimeMs,
|
|
154
|
+
lastAccessed: stats.atimeMs,
|
|
155
|
+
...this.toLangchainCompatibleMetadata(frontMatter),
|
|
156
|
+
...dataviewFields,
|
|
157
|
+
};
|
|
158
|
+
if (tags.size || frontMatter.tags) {
|
|
159
|
+
metadata.tags = Array.from(new Set([...tags, ...(frontMatter.tags ?? [])])).join(",");
|
|
160
|
+
}
|
|
161
|
+
documents.push(new Document({
|
|
162
|
+
pageContent: content,
|
|
163
|
+
metadata,
|
|
164
|
+
}));
|
|
165
|
+
return documents;
|
|
166
|
+
}
|
|
167
|
+
/**
|
|
168
|
+
* Imports the necessary functions from the `node:path` and
|
|
169
|
+
* `node:fs/promises` modules. It is used to dynamically import the
|
|
170
|
+
* functions when needed. If the import fails, it throws an error
|
|
171
|
+
* indicating that the modules failed to load.
|
|
172
|
+
* @returns A promise that resolves to an object containing the imported functions.
|
|
173
|
+
*/
|
|
174
|
+
static async imports() {
|
|
175
|
+
try {
|
|
176
|
+
const { basename } = await import("node:path");
|
|
177
|
+
const { readFile, stat } = await import("node:fs/promises");
|
|
178
|
+
return { basename, readFile, stat };
|
|
179
|
+
}
|
|
180
|
+
catch (e) {
|
|
181
|
+
console.error(e);
|
|
182
|
+
throw new Error(`Failed to load fs/promises. ObsidianFileLoader available only on environment 'node'. It appears you are running environment '${getEnv()}'. See https://<link to docs> for alternatives.`);
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
Object.defineProperty(ObsidianFileLoader, "FRONT_MATTER_REGEX", {
|
|
187
|
+
enumerable: true,
|
|
188
|
+
configurable: true,
|
|
189
|
+
writable: true,
|
|
190
|
+
value: /^---\n(.*?)\n---\n/s
|
|
191
|
+
});
|
|
192
|
+
Object.defineProperty(ObsidianFileLoader, "TAG_REGEX", {
|
|
193
|
+
enumerable: true,
|
|
194
|
+
configurable: true,
|
|
195
|
+
writable: true,
|
|
196
|
+
value: /(?:\s|^)#([a-zA-Z_][\w/-]*)/g
|
|
197
|
+
});
|
|
198
|
+
Object.defineProperty(ObsidianFileLoader, "DATAVIEW_LINE_REGEX", {
|
|
199
|
+
enumerable: true,
|
|
200
|
+
configurable: true,
|
|
201
|
+
writable: true,
|
|
202
|
+
value: /^\s*(\w+)::\s*(.*)$/gm
|
|
203
|
+
});
|
|
204
|
+
Object.defineProperty(ObsidianFileLoader, "DATAVIEW_INLINE_BRACKET_REGEX", {
|
|
205
|
+
enumerable: true,
|
|
206
|
+
configurable: true,
|
|
207
|
+
writable: true,
|
|
208
|
+
value: /\[(\w+)::\s*(.*)\]/gm
|
|
209
|
+
});
|
|
210
|
+
Object.defineProperty(ObsidianFileLoader, "DATAVIEW_INLINE_PAREN_REGEX", {
|
|
211
|
+
enumerable: true,
|
|
212
|
+
configurable: true,
|
|
213
|
+
writable: true,
|
|
214
|
+
value: /\((\w+)::\s*(.*)\)/gm
|
|
215
|
+
});
|
|
216
|
+
/**
|
|
217
|
+
* Represents a loader for directories containing Obsidian markdown files. This loader extends
|
|
218
|
+
* the DirectoryLoader and provides functionality to load and parse '.md' files with YAML frontmatter,
|
|
219
|
+
* Obsidian tags, and Dataview fields.
|
|
220
|
+
*/
|
|
221
|
+
export class ObsidianLoader extends DirectoryLoader {
|
|
222
|
+
/**
|
|
223
|
+
* Initializes a new instance of the ObsidianLoader class.
|
|
224
|
+
* @param directoryPath The path to the directory containing Obsidian markdown files.
|
|
225
|
+
* @param encoding The character encoding to use when reading files. Defaults to 'utf-8'.
|
|
226
|
+
* @param collectMetadata Determines whether metadata should be collected from the files. Defaults to true.
|
|
227
|
+
*/
|
|
228
|
+
constructor(directoryPath, options) {
|
|
229
|
+
super(directoryPath, {
|
|
230
|
+
".md": (filePath) => new ObsidianFileLoader(filePath, options),
|
|
231
|
+
}, true, UnknownHandling.Ignore);
|
|
232
|
+
}
|
|
233
|
+
}
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.GradientEmbeddings = void 0;
|
|
4
|
+
const nodejs_sdk_1 = require("@gradientai/nodejs-sdk");
|
|
5
|
+
const env_js_1 = require("../util/env.cjs");
|
|
6
|
+
const chunk_js_1 = require("../util/chunk.cjs");
|
|
7
|
+
const base_js_1 = require("./base.cjs");
|
|
8
|
+
/**
|
|
9
|
+
* Class for generating embeddings using the Gradient AI's API. Extends the
|
|
10
|
+
* Embeddings class and implements GradientEmbeddingsParams and
|
|
11
|
+
*/
|
|
12
|
+
class GradientEmbeddings extends base_js_1.Embeddings {
|
|
13
|
+
constructor(fields) {
|
|
14
|
+
super(fields);
|
|
15
|
+
Object.defineProperty(this, "gradientAccessKey", {
|
|
16
|
+
enumerable: true,
|
|
17
|
+
configurable: true,
|
|
18
|
+
writable: true,
|
|
19
|
+
value: void 0
|
|
20
|
+
});
|
|
21
|
+
Object.defineProperty(this, "workspaceId", {
|
|
22
|
+
enumerable: true,
|
|
23
|
+
configurable: true,
|
|
24
|
+
writable: true,
|
|
25
|
+
value: void 0
|
|
26
|
+
});
|
|
27
|
+
Object.defineProperty(this, "batchSize", {
|
|
28
|
+
enumerable: true,
|
|
29
|
+
configurable: true,
|
|
30
|
+
writable: true,
|
|
31
|
+
value: 128
|
|
32
|
+
});
|
|
33
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
34
|
+
Object.defineProperty(this, "model", {
|
|
35
|
+
enumerable: true,
|
|
36
|
+
configurable: true,
|
|
37
|
+
writable: true,
|
|
38
|
+
value: void 0
|
|
39
|
+
});
|
|
40
|
+
this.gradientAccessKey =
|
|
41
|
+
fields?.gradientAccessKey ??
|
|
42
|
+
(0, env_js_1.getEnvironmentVariable)("GRADIENT_ACCESS_TOKEN");
|
|
43
|
+
this.workspaceId =
|
|
44
|
+
fields?.workspaceId ?? (0, env_js_1.getEnvironmentVariable)("GRADIENT_WORKSPACE_ID");
|
|
45
|
+
if (!this.gradientAccessKey) {
|
|
46
|
+
throw new Error("Missing Gradient AI Access Token");
|
|
47
|
+
}
|
|
48
|
+
if (!this.workspaceId) {
|
|
49
|
+
throw new Error("Missing Gradient AI Workspace ID");
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
/**
|
|
53
|
+
* Method to generate embeddings for an array of documents. Splits the
|
|
54
|
+
* documents into batches and makes requests to the Gradient API to generate
|
|
55
|
+
* embeddings.
|
|
56
|
+
* @param texts Array of documents to generate embeddings for.
|
|
57
|
+
* @returns Promise that resolves to a 2D array of embeddings for each document.
|
|
58
|
+
*/
|
|
59
|
+
async embedDocuments(texts) {
|
|
60
|
+
await this.setModel();
|
|
61
|
+
const mappedTexts = texts.map((text) => ({ input: text }));
|
|
62
|
+
const batches = (0, chunk_js_1.chunkArray)(mappedTexts, this.batchSize);
|
|
63
|
+
const batchRequests = batches.map((batch) => this.caller.call(async () => this.model.generateEmbeddings({
|
|
64
|
+
inputs: batch,
|
|
65
|
+
})));
|
|
66
|
+
const batchResponses = await Promise.all(batchRequests);
|
|
67
|
+
const embeddings = [];
|
|
68
|
+
for (let i = 0; i < batchResponses.length; i += 1) {
|
|
69
|
+
const batch = batches[i];
|
|
70
|
+
const { embeddings: batchResponse } = batchResponses[i];
|
|
71
|
+
for (let j = 0; j < batch.length; j += 1) {
|
|
72
|
+
embeddings.push(batchResponse[j].embedding);
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
return embeddings;
|
|
76
|
+
}
|
|
77
|
+
/**
|
|
78
|
+
* Method to generate an embedding for a single document. Calls the
|
|
79
|
+
* embedDocuments method with the document as the input.
|
|
80
|
+
* @param text Document to generate an embedding for.
|
|
81
|
+
* @returns Promise that resolves to an embedding for the document.
|
|
82
|
+
*/
|
|
83
|
+
async embedQuery(text) {
|
|
84
|
+
const data = await this.embedDocuments([text]);
|
|
85
|
+
return data[0];
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Method to set the model to use for generating embeddings.
|
|
89
|
+
* @sets the class' `model` value to that of the retrieved Embeddings Model.
|
|
90
|
+
*/
|
|
91
|
+
async setModel() {
|
|
92
|
+
if (this.model)
|
|
93
|
+
return;
|
|
94
|
+
const gradient = new nodejs_sdk_1.Gradient({
|
|
95
|
+
accessToken: this.gradientAccessKey,
|
|
96
|
+
workspaceId: this.workspaceId,
|
|
97
|
+
});
|
|
98
|
+
this.model = await gradient.getEmbeddingsModel({
|
|
99
|
+
slug: "bge-large",
|
|
100
|
+
});
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
exports.GradientEmbeddings = GradientEmbeddings;
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import { Embeddings, EmbeddingsParams } from "./base.js";
|
|
2
|
+
/**
|
|
3
|
+
* Interface for GradientEmbeddings parameters. Extends EmbeddingsParams and
|
|
4
|
+
* defines additional parameters specific to the GradientEmbeddings class.
|
|
5
|
+
*/
|
|
6
|
+
export interface GradientEmbeddingsParams extends EmbeddingsParams {
|
|
7
|
+
/**
|
|
8
|
+
* Gradient AI Access Token.
|
|
9
|
+
* Provide Access Token if you do not wish to automatically pull from env.
|
|
10
|
+
*/
|
|
11
|
+
gradientAccessKey?: string;
|
|
12
|
+
/**
|
|
13
|
+
* Gradient Workspace Id.
|
|
14
|
+
* Provide workspace id if you do not wish to automatically pull from env.
|
|
15
|
+
*/
|
|
16
|
+
workspaceId?: string;
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Class for generating embeddings using the Gradient AI's API. Extends the
|
|
20
|
+
* Embeddings class and implements GradientEmbeddingsParams and
|
|
21
|
+
*/
|
|
22
|
+
export declare class GradientEmbeddings extends Embeddings implements GradientEmbeddingsParams {
|
|
23
|
+
gradientAccessKey?: string;
|
|
24
|
+
workspaceId?: string;
|
|
25
|
+
batchSize: number;
|
|
26
|
+
model: any;
|
|
27
|
+
constructor(fields: GradientEmbeddingsParams);
|
|
28
|
+
/**
|
|
29
|
+
* Method to generate embeddings for an array of documents. Splits the
|
|
30
|
+
* documents into batches and makes requests to the Gradient API to generate
|
|
31
|
+
* embeddings.
|
|
32
|
+
* @param texts Array of documents to generate embeddings for.
|
|
33
|
+
* @returns Promise that resolves to a 2D array of embeddings for each document.
|
|
34
|
+
*/
|
|
35
|
+
embedDocuments(texts: string[]): Promise<number[][]>;
|
|
36
|
+
/**
|
|
37
|
+
* Method to generate an embedding for a single document. Calls the
|
|
38
|
+
* embedDocuments method with the document as the input.
|
|
39
|
+
* @param text Document to generate an embedding for.
|
|
40
|
+
* @returns Promise that resolves to an embedding for the document.
|
|
41
|
+
*/
|
|
42
|
+
embedQuery(text: string): Promise<number[]>;
|
|
43
|
+
/**
|
|
44
|
+
* Method to set the model to use for generating embeddings.
|
|
45
|
+
* @sets the class' `model` value to that of the retrieved Embeddings Model.
|
|
46
|
+
*/
|
|
47
|
+
setModel(): Promise<void>;
|
|
48
|
+
}
|