toolpack-sdk 1.0.0 → 1.1.0-SNAPSHOT
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +115 -4
- package/dist/client/index.d.ts +1 -0
- package/dist/client/index.d.ts.map +1 -1
- package/dist/client/index.js +82 -79
- package/dist/client/index.js.map +1 -1
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -1
- package/dist/knowledge/embedders/gemini-embedder.d.ts +21 -0
- package/dist/knowledge/embedders/gemini-embedder.d.ts.map +1 -0
- package/dist/knowledge/embedders/gemini-embedder.js +93 -0
- package/dist/knowledge/embedders/gemini-embedder.js.map +1 -0
- package/dist/knowledge/embedders/ollama-embedder.d.ts +12 -0
- package/dist/knowledge/embedders/ollama-embedder.d.ts.map +1 -0
- package/dist/knowledge/embedders/ollama-embedder.js +68 -0
- package/dist/knowledge/embedders/ollama-embedder.js.map +1 -0
- package/dist/knowledge/embedders/openai-embedder.d.ts +14 -0
- package/dist/knowledge/embedders/openai-embedder.d.ts.map +1 -0
- package/dist/knowledge/embedders/openai-embedder.js +94 -0
- package/dist/knowledge/embedders/openai-embedder.js.map +1 -0
- package/dist/knowledge/errors.d.ts +22 -0
- package/dist/knowledge/errors.d.ts.map +1 -0
- package/dist/knowledge/errors.js +51 -0
- package/dist/knowledge/errors.js.map +1 -0
- package/dist/knowledge/index.d.ts +12 -0
- package/dist/knowledge/index.d.ts.map +1 -0
- package/dist/knowledge/index.js +26 -0
- package/dist/knowledge/index.js.map +1 -0
- package/dist/knowledge/knowledge.d.ts +38 -0
- package/dist/knowledge/knowledge.d.ts.map +1 -0
- package/dist/knowledge/knowledge.js +287 -0
- package/dist/knowledge/knowledge.js.map +1 -0
- package/dist/knowledge/providers/memory-provider.d.ts +15 -0
- package/dist/knowledge/providers/memory-provider.d.ts.map +1 -0
- package/dist/knowledge/providers/memory-provider.js +113 -0
- package/dist/knowledge/providers/memory-provider.js.map +1 -0
- package/dist/knowledge/sources/json-source.d.ts +18 -0
- package/dist/knowledge/sources/json-source.d.ts.map +1 -0
- package/dist/knowledge/sources/json-source.js +224 -0
- package/dist/knowledge/sources/json-source.js.map +1 -0
- package/dist/knowledge/sources/markdown-source.d.ts +27 -0
- package/dist/knowledge/sources/markdown-source.d.ts.map +1 -0
- package/dist/knowledge/sources/markdown-source.js +410 -0
- package/dist/knowledge/sources/markdown-source.js.map +1 -0
- package/dist/knowledge/sources/sqlite-text-source.d.ts +18 -0
- package/dist/knowledge/sources/sqlite-text-source.d.ts.map +1 -0
- package/dist/knowledge/sources/sqlite-text-source.js +201 -0
- package/dist/knowledge/sources/sqlite-text-source.js.map +1 -0
- package/dist/knowledge/types.d.ts +130 -0
- package/dist/knowledge/types.d.ts.map +1 -0
- package/dist/knowledge/types.js +3 -0
- package/dist/knowledge/types.js.map +1 -0
- package/dist/mcp/client.js +1 -1
- package/dist/mcp/client.js.map +1 -1
- package/dist/providers/anthropic/index.js +13 -13
- package/dist/providers/anthropic/index.js.map +1 -1
- package/dist/providers/config.d.ts +0 -2
- package/dist/providers/config.d.ts.map +1 -1
- package/dist/providers/config.js.map +1 -1
- package/dist/providers/gemini/index.js +10 -10
- package/dist/providers/gemini/index.js.map +1 -1
- package/dist/providers/ollama/adapter.d.ts.map +1 -1
- package/dist/providers/ollama/adapter.js +14 -18
- package/dist/providers/ollama/adapter.js.map +1 -1
- package/dist/providers/ollama/slm-healer.js +7 -7
- package/dist/providers/ollama/slm-healer.js.map +1 -1
- package/dist/providers/openai/index.d.ts.map +1 -1
- package/dist/providers/openai/index.js +15 -21
- package/dist/providers/openai/index.js.map +1 -1
- package/dist/providers/provider-logger.d.ts +17 -9
- package/dist/providers/provider-logger.d.ts.map +1 -1
- package/dist/providers/provider-logger.js +68 -26
- package/dist/providers/provider-logger.js.map +1 -1
- package/dist/toolpack.d.ts +12 -0
- package/dist/toolpack.d.ts.map +1 -1
- package/dist/toolpack.js +36 -34
- package/dist/toolpack.js.map +1 -1
- package/dist/tools/cloud-tools/tools/deploy/index.d.ts.map +1 -1
- package/dist/tools/cloud-tools/tools/deploy/index.js +2 -0
- package/dist/tools/cloud-tools/tools/deploy/index.js.map +1 -1
- package/dist/tools/cloud-tools/tools/list/index.d.ts.map +1 -1
- package/dist/tools/cloud-tools/tools/list/index.js +2 -0
- package/dist/tools/cloud-tools/tools/list/index.js.map +1 -1
- package/dist/tools/coding-tools/tools/find-references/index.d.ts.map +1 -1
- package/dist/tools/coding-tools/tools/find-references/index.js +2 -0
- package/dist/tools/coding-tools/tools/find-references/index.js.map +1 -1
- package/dist/tools/coding-tools/tools/find-symbol/index.d.ts.map +1 -1
- package/dist/tools/coding-tools/tools/find-symbol/index.js +2 -0
- package/dist/tools/coding-tools/tools/find-symbol/index.js.map +1 -1
- package/dist/tools/coding-tools/tools/get-exports/index.d.ts.map +1 -1
- package/dist/tools/coding-tools/tools/get-exports/index.js +2 -0
- package/dist/tools/coding-tools/tools/get-exports/index.js.map +1 -1
- package/dist/tools/coding-tools/tools/get-imports/index.d.ts.map +1 -1
- package/dist/tools/coding-tools/tools/get-imports/index.js +2 -0
- package/dist/tools/coding-tools/tools/get-imports/index.js.map +1 -1
- package/dist/tools/coding-tools/tools/get-outline/index.d.ts.map +1 -1
- package/dist/tools/coding-tools/tools/get-outline/index.js +2 -0
- package/dist/tools/coding-tools/tools/get-outline/index.js.map +1 -1
- package/dist/tools/coding-tools/tools/get-symbols/index.d.ts.map +1 -1
- package/dist/tools/coding-tools/tools/get-symbols/index.js +2 -0
- package/dist/tools/coding-tools/tools/get-symbols/index.js.map +1 -1
- package/dist/tools/config-loader.d.ts +13 -0
- package/dist/tools/config-loader.d.ts.map +1 -1
- package/dist/tools/config-loader.js +20 -8
- package/dist/tools/config-loader.js.map +1 -1
- package/dist/tools/create-tool-project.d.ts.map +1 -1
- package/dist/tools/create-tool-project.js +2 -1
- package/dist/tools/create-tool-project.js.map +1 -1
- package/dist/tools/db-tools/tools/query/index.d.ts.map +1 -1
- package/dist/tools/db-tools/tools/query/index.js +2 -0
- package/dist/tools/db-tools/tools/query/index.js.map +1 -1
- package/dist/tools/diff-tools/tools/apply/index.d.ts.map +1 -1
- package/dist/tools/diff-tools/tools/apply/index.js +2 -0
- package/dist/tools/diff-tools/tools/apply/index.js.map +1 -1
- package/dist/tools/diff-tools/tools/create/index.d.ts.map +1 -1
- package/dist/tools/diff-tools/tools/create/index.js +2 -0
- package/dist/tools/diff-tools/tools/create/index.js.map +1 -1
- package/dist/tools/exec-tools/tools/run/index.d.ts.map +1 -1
- package/dist/tools/exec-tools/tools/run/index.js +2 -0
- package/dist/tools/exec-tools/tools/run/index.js.map +1 -1
- package/dist/tools/exec-tools/tools/run-background/index.d.ts.map +1 -1
- package/dist/tools/exec-tools/tools/run-background/index.js +5 -0
- package/dist/tools/exec-tools/tools/run-background/index.js.map +1 -1
- package/dist/tools/exec-tools/tools/run-shell/index.d.ts.map +1 -1
- package/dist/tools/exec-tools/tools/run-shell/index.js +2 -0
- package/dist/tools/exec-tools/tools/run-shell/index.js.map +1 -1
- package/dist/tools/fs-tools/tools/delete-file/index.d.ts.map +1 -1
- package/dist/tools/fs-tools/tools/delete-file/index.js +2 -0
- package/dist/tools/fs-tools/tools/delete-file/index.js.map +1 -1
- package/dist/tools/fs-tools/tools/read-file/index.d.ts.map +1 -1
- package/dist/tools/fs-tools/tools/read-file/index.js +2 -0
- package/dist/tools/fs-tools/tools/read-file/index.js.map +1 -1
- package/dist/tools/fs-tools/tools/search/index.d.ts.map +1 -1
- package/dist/tools/fs-tools/tools/search/index.js +2 -0
- package/dist/tools/fs-tools/tools/search/index.js.map +1 -1
- package/dist/tools/fs-tools/tools/write-file/index.d.ts.map +1 -1
- package/dist/tools/fs-tools/tools/write-file/index.js +2 -0
- package/dist/tools/fs-tools/tools/write-file/index.js.map +1 -1
- package/dist/tools/http-tools/tools/delete/index.d.ts.map +1 -1
- package/dist/tools/http-tools/tools/delete/index.js +2 -0
- package/dist/tools/http-tools/tools/delete/index.js.map +1 -1
- package/dist/tools/http-tools/tools/download/index.d.ts.map +1 -1
- package/dist/tools/http-tools/tools/download/index.js +2 -0
- package/dist/tools/http-tools/tools/download/index.js.map +1 -1
- package/dist/tools/http-tools/tools/get/index.d.ts.map +1 -1
- package/dist/tools/http-tools/tools/get/index.js +2 -0
- package/dist/tools/http-tools/tools/get/index.js.map +1 -1
- package/dist/tools/http-tools/tools/post/index.d.ts.map +1 -1
- package/dist/tools/http-tools/tools/post/index.js +2 -0
- package/dist/tools/http-tools/tools/post/index.js.map +1 -1
- package/dist/tools/http-tools/tools/put/index.d.ts.map +1 -1
- package/dist/tools/http-tools/tools/put/index.js +2 -0
- package/dist/tools/http-tools/tools/put/index.js.map +1 -1
- package/dist/tools/index.d.ts +1 -1
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +3 -2
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/system-tools/tools/cwd/index.d.ts.map +1 -1
- package/dist/tools/system-tools/tools/cwd/index.js +2 -0
- package/dist/tools/system-tools/tools/cwd/index.js.map +1 -1
- package/dist/tools/system-tools/tools/env/index.d.ts.map +1 -1
- package/dist/tools/system-tools/tools/env/index.js +2 -0
- package/dist/tools/system-tools/tools/env/index.js.map +1 -1
- package/dist/tools/system-tools/tools/info/index.d.ts.map +1 -1
- package/dist/tools/system-tools/tools/info/index.js +2 -0
- package/dist/tools/system-tools/tools/info/index.js.map +1 -1
- package/dist/tools/types.d.ts +1 -0
- package/dist/tools/types.d.ts.map +1 -1
- package/dist/tools/types.js +1 -0
- package/dist/tools/types.js.map +1 -1
- package/dist/tools/web-tools/tools/fetch/index.d.ts.map +1 -1
- package/dist/tools/web-tools/tools/fetch/index.js +2 -0
- package/dist/tools/web-tools/tools/fetch/index.js.map +1 -1
- package/dist/tools/web-tools/tools/scrape/index.d.ts.map +1 -1
- package/dist/tools/web-tools/tools/scrape/index.js +2 -0
- package/dist/tools/web-tools/tools/scrape/index.js.map +1 -1
- package/dist/tools/web-tools/tools/search/index.d.ts.map +1 -1
- package/dist/tools/web-tools/tools/search/index.js +44 -7
- package/dist/tools/web-tools/tools/search/index.js.map +1 -1
- package/dist/tools/web-tools/tools/search/schema.js +1 -1
- package/dist/tools/web-tools/tools/search/schema.js.map +1 -1
- package/dist/workflows/planning/planner.d.ts.map +1 -1
- package/dist/workflows/planning/planner.js +16 -2
- package/dist/workflows/planning/planner.js.map +1 -1
- package/dist/workflows/steps/step-executor.d.ts.map +1 -1
- package/dist/workflows/steps/step-executor.js +17 -5
- package/dist/workflows/steps/step-executor.js.map +1 -1
- package/dist/workflows/workflow-executor.d.ts.map +1 -1
- package/dist/workflows/workflow-executor.js +32 -2
- package/dist/workflows/workflow-executor.js.map +1 -1
- package/package.json +4 -4
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
|
+
exports.JSONSource = void 0;
|
|
37
|
+
const fs = __importStar(require("fs"));
|
|
38
|
+
const path = __importStar(require("path"));
|
|
39
|
+
const crypto = __importStar(require("crypto"));
|
|
40
|
+
const errors_js_1 = require("../errors.js");
|
|
41
|
+
class JSONSource {
|
|
42
|
+
filePath;
|
|
43
|
+
options;
|
|
44
|
+
watcher = null;
|
|
45
|
+
lastHash = null;
|
|
46
|
+
constructor(filePath, options = {}) {
|
|
47
|
+
this.filePath = path.resolve(filePath);
|
|
48
|
+
this.options = options;
|
|
49
|
+
}
|
|
50
|
+
async *load() {
|
|
51
|
+
try {
|
|
52
|
+
const content = await fs.promises.readFile(this.filePath, 'utf-8');
|
|
53
|
+
this.lastHash = this.hashContent(content);
|
|
54
|
+
const data = JSON.parse(content);
|
|
55
|
+
const chunks = this.extractChunks(data);
|
|
56
|
+
for (const chunk of chunks) {
|
|
57
|
+
yield chunk;
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
catch (error) {
|
|
61
|
+
throw new errors_js_1.IngestionError(`Failed to parse JSON file: ${error.message}`, this.filePath, error);
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
async *watch() {
|
|
65
|
+
if (!this.options.watch) {
|
|
66
|
+
return;
|
|
67
|
+
}
|
|
68
|
+
const updateQueue = [];
|
|
69
|
+
let resolveNext = null;
|
|
70
|
+
const processChange = async () => {
|
|
71
|
+
try {
|
|
72
|
+
const content = await fs.promises.readFile(this.filePath, 'utf-8');
|
|
73
|
+
const newHash = this.hashContent(content);
|
|
74
|
+
if (newHash !== this.lastHash) {
|
|
75
|
+
this.lastHash = newHash;
|
|
76
|
+
const data = JSON.parse(content);
|
|
77
|
+
const chunks = this.extractChunks(data);
|
|
78
|
+
for (const chunk of chunks) {
|
|
79
|
+
updateQueue.push({ type: 'update', chunk });
|
|
80
|
+
}
|
|
81
|
+
if (resolveNext && updateQueue.length > 0) {
|
|
82
|
+
const update = updateQueue.shift();
|
|
83
|
+
const resolve = resolveNext;
|
|
84
|
+
resolveNext = null;
|
|
85
|
+
resolve({ value: update, done: false });
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
catch {
|
|
90
|
+
// File might be in the middle of being written
|
|
91
|
+
}
|
|
92
|
+
};
|
|
93
|
+
this.watcher = fs.watch(this.filePath, (eventType) => {
|
|
94
|
+
if (eventType === 'change') {
|
|
95
|
+
processChange();
|
|
96
|
+
}
|
|
97
|
+
});
|
|
98
|
+
while (true) {
|
|
99
|
+
if (updateQueue.length > 0) {
|
|
100
|
+
yield updateQueue.shift();
|
|
101
|
+
}
|
|
102
|
+
else {
|
|
103
|
+
yield await new Promise((resolve) => {
|
|
104
|
+
resolveNext = (result) => resolve(result.value);
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
stop() {
|
|
110
|
+
if (this.watcher) {
|
|
111
|
+
this.watcher.close();
|
|
112
|
+
this.watcher = null;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
extractChunks(data) {
|
|
116
|
+
const chunkBy = this.options.chunkBy ?? 'item';
|
|
117
|
+
const contentFields = this.options.contentFields ?? [];
|
|
118
|
+
const metadataFields = this.options.metadataFields ?? [];
|
|
119
|
+
let items;
|
|
120
|
+
if (chunkBy === 'item') {
|
|
121
|
+
items = Array.isArray(data) ? data : [data];
|
|
122
|
+
}
|
|
123
|
+
else if (chunkBy.startsWith('$.')) {
|
|
124
|
+
items = this.evaluateJSONPath(data, chunkBy);
|
|
125
|
+
}
|
|
126
|
+
else {
|
|
127
|
+
items = Array.isArray(data) ? data : [data];
|
|
128
|
+
}
|
|
129
|
+
return items.map((item, index) => this.itemToChunk(item, index, contentFields, metadataFields));
|
|
130
|
+
}
|
|
131
|
+
evaluateJSONPath(data, pathExpr) {
|
|
132
|
+
const path = pathExpr.slice(2);
|
|
133
|
+
const segments = path.split(/(?=\[)|\./).filter(Boolean);
|
|
134
|
+
let current = [data];
|
|
135
|
+
for (const segment of segments) {
|
|
136
|
+
const next = [];
|
|
137
|
+
for (const item of current) {
|
|
138
|
+
if (segment === '[*]') {
|
|
139
|
+
if (Array.isArray(item)) {
|
|
140
|
+
next.push(...item);
|
|
141
|
+
}
|
|
142
|
+
else if (typeof item === 'object' && item !== null) {
|
|
143
|
+
next.push(...Object.values(item));
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
else {
|
|
147
|
+
const key = segment.replace(/^\[|\]$/g, '');
|
|
148
|
+
if (item && typeof item === 'object' && key in item) {
|
|
149
|
+
next.push(item[key]);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
current = next;
|
|
154
|
+
}
|
|
155
|
+
return current;
|
|
156
|
+
}
|
|
157
|
+
itemToChunk(item, index, contentFields, metadataFields) {
|
|
158
|
+
let content;
|
|
159
|
+
if (contentFields.length > 0) {
|
|
160
|
+
const contentParts = contentFields
|
|
161
|
+
.map((field) => this.getNestedValue(item, field))
|
|
162
|
+
.filter((v) => v !== undefined && v !== null)
|
|
163
|
+
.map((v) => String(v));
|
|
164
|
+
content = contentParts.join('\n\n');
|
|
165
|
+
}
|
|
166
|
+
else {
|
|
167
|
+
content = this.flattenToContent(item);
|
|
168
|
+
}
|
|
169
|
+
const metadata = {
|
|
170
|
+
...(this.options.metadata ?? {}),
|
|
171
|
+
source: this.filePath,
|
|
172
|
+
index,
|
|
173
|
+
};
|
|
174
|
+
if (metadataFields.length > 0) {
|
|
175
|
+
for (const field of metadataFields) {
|
|
176
|
+
const value = this.getNestedValue(item, field);
|
|
177
|
+
if (value !== undefined) {
|
|
178
|
+
metadata[field] = value;
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
const namespace = this.options.namespace ?? 'json';
|
|
183
|
+
const hash = crypto.createHash('md5').update(content).digest('hex').slice(0, 8);
|
|
184
|
+
const id = `${namespace}:${path.basename(this.filePath)}:${hash}:${index}`;
|
|
185
|
+
return { id, content, metadata };
|
|
186
|
+
}
|
|
187
|
+
getNestedValue(obj, path) {
|
|
188
|
+
const parts = path.split('.');
|
|
189
|
+
let current = obj;
|
|
190
|
+
for (const part of parts) {
|
|
191
|
+
if (current === null || current === undefined) {
|
|
192
|
+
return undefined;
|
|
193
|
+
}
|
|
194
|
+
current = current[part];
|
|
195
|
+
}
|
|
196
|
+
return current;
|
|
197
|
+
}
|
|
198
|
+
flattenToContent(obj, prefix = '') {
|
|
199
|
+
if (obj === null || obj === undefined) {
|
|
200
|
+
return '';
|
|
201
|
+
}
|
|
202
|
+
if (typeof obj !== 'object') {
|
|
203
|
+
return prefix ? `${prefix}: ${obj}` : String(obj);
|
|
204
|
+
}
|
|
205
|
+
if (Array.isArray(obj)) {
|
|
206
|
+
return obj
|
|
207
|
+
.map((item, i) => this.flattenToContent(item, prefix ? `${prefix}[${i}]` : `[${i}]`))
|
|
208
|
+
.filter(Boolean)
|
|
209
|
+
.join('\n');
|
|
210
|
+
}
|
|
211
|
+
return Object.entries(obj)
|
|
212
|
+
.map(([key, value]) => {
|
|
213
|
+
const newPrefix = prefix ? `${prefix}.${key}` : key;
|
|
214
|
+
return this.flattenToContent(value, newPrefix);
|
|
215
|
+
})
|
|
216
|
+
.filter(Boolean)
|
|
217
|
+
.join('\n');
|
|
218
|
+
}
|
|
219
|
+
hashContent(content) {
|
|
220
|
+
return crypto.createHash('md5').update(content).digest('hex');
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
exports.JSONSource = JSONSource;
|
|
224
|
+
//# sourceMappingURL=json-source.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"json-source.js","sourceRoot":"","sources":["../../../src/knowledge/sources/json-source.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,uCAAyB;AACzB,2CAA6B;AAC7B,+CAAiC;AAEjC,4CAA8C;AAE9C,MAAa,UAAU;IACb,QAAQ,CAAS;IACjB,OAAO,CAAoB;IAC3B,OAAO,GAAwB,IAAI,CAAC;IACpC,QAAQ,GAAkB,IAAI,CAAC;IAEvC,YAAY,QAAgB,EAAE,UAA6B,EAAE;QAC3D,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QACvC,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;IACzB,CAAC;IAED,KAAK,CAAC,CAAC,IAAI;QACT,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;YACnE,IAAI,CAAC,QAAQ,GAAG,IAAI,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;YAC1C,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YAEjC,MAAM,MAAM,GAAG,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;YACxC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gBAC3B,MAAM,KAAK,CAAC;YACd,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,0BAAc,CACtB,8BAA+B,KAAe,CAAC,OAAO,EAAE,EACxD,IAAI,CAAC,QAAQ,EACb,KAAc,CACf,CAAC;QACJ,CAAC;IACH,CAAC;IAED,KAAK,CAAC,CAAC,KAAK;QACV,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;YACxB,OAAO;QACT,CAAC;QAED,MAAM,WAAW,GAAkB,EAAE,CAAC;QACtC,IAAI,WAAW,GAA0D,IAAI,CAAC;QAE9E,MAAM,aAAa,GAAG,KAAK,IAAI,EAAE;YAC/B,IAAI,CAAC;gBACH,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,EAAE,OAAO,CAAC,CAAC;gBACnE,MAAM,OAAO,GAAG,IAAI,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;gBAE1C,IAAI,OAAO,KAAK,IAAI,CAAC,QAAQ,EAAE,CAAC;oBAC9B,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC;oBACxB,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;oBACjC,MAAM,MAAM,GAAG,IAAI,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;oBAExC,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;wBAC3B,WAAW,CAAC,IAAI,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,KAAK,EAAE,CAAC,CAAC;oBAC9C,CAAC;oBAED,IAAI,WAAW,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;wBAC1C,MAAM,MAAM,GAAG,WAAW,CAAC,KAAK,EAAG,CAAC;wBACpC,MAAM,OAAO,GAAG,WAAW,CAAC;wBAC5B,WAAW,GAAG,IAAI,CAAC;wBACnB,OAAO,CAAC,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,CAAC;oBAC1C,CAAC;gBACH,CAAC;YACH,CAAC;YAAC,MAAM,CAAC;gBACP,+CAA+C;YACjD,CAAC;QACH,CAAC,CAAC;QAEF,IAAI,CAAC,OAAO,GAAG,EAAE,CAAC,KAAK,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC,SAAS,EAAE,EAAE;YACnD,IAAI,SAAS,KAAK,QAAQ,EAAE,CAAC;gBAC3B,aAAa,EAAE,CAAC;YAClB,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,OAAO,IAAI,EAAE,CAAC;YACZ,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC3B,MAAM,WAAW,CAAC,KAAK,EAAG,CAAC;YAC7B,CAAC;iBAAM,CAAC;gBACN,MAAM,MAAM,IAAI,OAAO,CAAc,CAAC,OAAO,EAAE,EAAE;oBAC/C,WAAW,GAAG,CAAC,MAAM,EAAE,EAAE,CAAC,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;gBAClD,CAAC,CAAC,CAAC;YACL,CAAC;QACH,CAAC;IACH,CAAC;IAED,IAAI;QACF,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;YACjB,IAAI,CAAC,OAAO,CAAC,KAAK,EAAE,CAAC;YACrB,IAAI,CAAC,OAAO,GAAG,IAAI,CAAC;QACtB,CAAC;IACH,CAAC;IAEO,aAAa,CAAC,IAAS;QAC7B,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,OAAO,IAAI,MAAM,CAAC;QAC/C,MAAM,aAAa,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,IAAI,EAAE,CAAC;QACvD,MAAM,cAAc,GAAG,IAAI,CAAC,OAAO,CAAC,cAAc,IAAI,EAAE,CAAC;QAEzD,IAAI,KAAY,CAAC;QAEjB,IAAI,OAAO,KAAK,MAAM,EAAE,CAAC;YACvB,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAC9C,CAAC;aAAM,IAAI,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;YACpC,KAAK,GAAG,IAAI,CAAC,gBAAgB,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QAC/C,CAAC;aAAM,CAAC;YACN,KAAK,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAC9C,CAAC;QAED,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,KAAK,EAAE,aAAa,EAAE,cAAc,CAAC,CAAC,CAAC;IAClG,CAAC;IAEO,gBAAgB,CAAC,IAAS,EAAE,QAAgB;QAClD,MAAM,IAAI,GAAG,QAAQ,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;QAC/B,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;QAEzD,IAAI,OAAO,GAAU,CAAC,IAAI,CAAC,CAAC;QAE5B,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC/B,MAAM,IAAI,GAAU,EAAE,CAAC;YAEvB,KAAK,MAAM,IAAI,IAAI,OAAO,EAAE,CAAC;gBAC3B,IAAI,OAAO,KAAK,KAAK,EAAE,CAAC;oBACtB,IAAI,KAAK,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;wBACxB,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;oBACrB,CAAC;yBAAM,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,IAAI,KAAK,IAAI,EAAE,CAAC;wBACrD,IAAI,CAAC,IAAI,CAAC,GAAG,MAAM,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC,CAAC;oBACpC,CAAC;gBACH,CAAC;qBAAM,CAAC;oBACN,MAAM,GAAG,GAAG,OAAO,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC;oBAC5C,IAAI,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ,IAAI,GAAG,IAAI,IAAI,EAAE,CAAC;wBACpD,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;oBACvB,CAAC;gBACH,CAAC;YACH,CAAC;YAED,OAAO,GAAG,IAAI,CAAC;QACjB,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAEO,WAAW,CACjB,IAAS,EACT,KAAa,EACb,aAAuB,EACvB,cAAwB;QAExB,IAAI,OAAe,CAAC;QAEpB,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC7B,MAAM,YAAY,GAAG,aAAa;iBAC/B,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,IAAI,CAAC,cAAc,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;iBAChD,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,KAAK,SAAS,IAAI,CAAC,KAAK,IAAI,CAAC;iBAC5C,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;YACzB,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACtC,CAAC;aAAM,CAAC;YACN,OAAO,GAAG,IAAI,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC;QACxC,CAAC;QAED,MAAM,QAAQ,GAAwB;YACpC,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,QAAQ,IAAI,EAAE,CAAC;YAChC,MAAM,EAAE,IAAI,CAAC,QAAQ;YACrB,KAAK;SACN,CAAC;QAEF,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9B,KAAK,MAAM,KAAK,IAAI,cAAc,EAAE,CAAC;gBACnC,MAAM,KAAK,GAAG,IAAI,CAAC,cAAc,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;gBAC/C,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;oBACxB,QAAQ,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC;gBAC1B,CAAC;YACH,CAAC;QACH,CAAC;QAED,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,IAAI,MAAM,CAAC;QACnD,MAAM,IAAI,GAAG,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QAChF,MAAM,EAAE,GAAG,GAAG,SAAS,IAAI,IAAI,CAAC,QAAQ,CAAC,IAAI,CAAC,QAAQ,CAAC,IAAI,IAAI,IAAI,KAAK,EAAE,CAAC;QAE3E,OAAO,EAAE,EAAE,EAAE,OAAO,EAAE,QAAQ,EAAE,CAAC;IACnC,CAAC;IAEO,cAAc,CAAC,GAAQ,EAAE,IAAY;QAC3C,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;QAC9B,IAAI,OAAO,GAAG,GAAG,CAAC;QAElB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,IAAI,OAAO,KAAK,IAAI,IAAI,OAAO,KAAK,SAAS,EAAE,CAAC;gBAC9C,OAAO,SAAS,CAAC;YACnB,CAAC;YACD,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;QAC1B,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAEO,gBAAgB,CAAC,GAAQ,EAAE,SAAiB,EAAE;QACpD,IAAI,GAAG,KAAK,IAAI,IAAI,GAAG,KAAK,SAAS,EAAE,CAAC;YACtC,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,IAAI,OAAO,GAAG,KAAK,QAAQ,EAAE,CAAC;YAC5B,OAAO,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,KAAK,GAAG,EAAE,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;QACpD,CAAC;QAED,IAAI,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;YACvB,OAAO,GAAG;iBACP,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,gBAAgB,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;iBACpF,MAAM,CAAC,OAAO,CAAC;iBACf,IAAI,CAAC,IAAI,CAAC,CAAC;QAChB,CAAC;QAED,OAAO,MAAM,CAAC,OAAO,CAAC,GAAG,CAAC;aACvB,GAAG,CAAC,CAAC,CAAC,GAAG,EAAE,KAAK,CAAC,EAAE,EAAE;YACpB,MAAM,SAAS,GAAG,MAAM,CAAC,CAAC,CAAC,GAAG,MAAM,IAAI,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC;YACpD,OAAO,IAAI,CAAC,gBAAgB,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;QACjD,CAAC,CAAC;aACD,MAAM,CAAC,OAAO,CAAC;aACf,IAAI,CAAC,IAAI,CAAC,CAAC;IAChB,CAAC;IAEO,WAAW,CAAC,OAAe;QACjC,OAAO,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IAChE,CAAC;CACF;AA1ND,gCA0NC"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import type { KnowledgeSource, Chunk, ChunkUpdate, MarkdownSourceOptions } from '../types.js';
|
|
2
|
+
export declare class MarkdownSource implements KnowledgeSource {
|
|
3
|
+
private pattern;
|
|
4
|
+
private options;
|
|
5
|
+
private watcher;
|
|
6
|
+
private fileHashes;
|
|
7
|
+
constructor(pattern: string, options?: MarkdownSourceOptions);
|
|
8
|
+
load(): AsyncIterable<Chunk>;
|
|
9
|
+
watch(): AsyncIterable<ChunkUpdate>;
|
|
10
|
+
stop(): void;
|
|
11
|
+
private parseMarkdown;
|
|
12
|
+
private extractFrontmatter;
|
|
13
|
+
private splitByHeadings;
|
|
14
|
+
private splitLargeSection;
|
|
15
|
+
private getOverlapText;
|
|
16
|
+
private splitIntoSentences;
|
|
17
|
+
private estimateTokens;
|
|
18
|
+
private extractTags;
|
|
19
|
+
private extractWikilinks;
|
|
20
|
+
private generateChunkId;
|
|
21
|
+
private getRelativePath;
|
|
22
|
+
private getBaseDir;
|
|
23
|
+
private matchesPattern;
|
|
24
|
+
private hashContent;
|
|
25
|
+
private parseFrontmatterValue;
|
|
26
|
+
}
|
|
27
|
+
//# sourceMappingURL=markdown-source.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"markdown-source.d.ts","sourceRoot":"","sources":["../../../src/knowledge/sources/markdown-source.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,eAAe,EAAE,KAAK,EAAE,WAAW,EAAE,qBAAqB,EAAE,MAAM,aAAa,CAAC;AAiB9F,qBAAa,cAAe,YAAW,eAAe;IACpD,OAAO,CAAC,OAAO,CAAS;IACxB,OAAO,CAAC,OAAO,CAAwB;IACvC,OAAO,CAAC,OAAO,CAA6B;IAC5C,OAAO,CAAC,UAAU,CAAkC;gBAExC,OAAO,EAAE,MAAM,EAAE,OAAO,GAAE,qBAA0B;IAezD,IAAI,IAAI,aAAa,CAAC,KAAK,CAAC;IAuB5B,KAAK,IAAI,aAAa,CAAC,WAAW,CAAC;IA8E1C,IAAI,IAAI,IAAI;IAOZ,OAAO,CAAC,aAAa;IAqCrB,OAAO,CAAC,kBAAkB;IAwB1B,OAAO,CAAC,eAAe;IAmEvB,OAAO,CAAC,iBAAiB;IAqEzB,OAAO,CAAC,cAAc;IAiBtB,OAAO,CAAC,kBAAkB;IAI1B,OAAO,CAAC,cAAc;IAItB,OAAO,CAAC,WAAW;IAQnB,OAAO,CAAC,gBAAgB;IAYxB,OAAO,CAAC,eAAe;IAOvB,OAAO,CAAC,eAAe;IAKvB,OAAO,CAAC,UAAU;IAKlB,OAAO,CAAC,cAAc;IAItB,OAAO,CAAC,WAAW;IAInB,OAAO,CAAC,qBAAqB;CAsB9B"}
|
|
@@ -0,0 +1,410 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
3
|
+
if (k2 === undefined) k2 = k;
|
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
7
|
+
}
|
|
8
|
+
Object.defineProperty(o, k2, desc);
|
|
9
|
+
}) : (function(o, m, k, k2) {
|
|
10
|
+
if (k2 === undefined) k2 = k;
|
|
11
|
+
o[k2] = m[k];
|
|
12
|
+
}));
|
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
15
|
+
}) : function(o, v) {
|
|
16
|
+
o["default"] = v;
|
|
17
|
+
});
|
|
18
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
19
|
+
var ownKeys = function(o) {
|
|
20
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
21
|
+
var ar = [];
|
|
22
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
23
|
+
return ar;
|
|
24
|
+
};
|
|
25
|
+
return ownKeys(o);
|
|
26
|
+
};
|
|
27
|
+
return function (mod) {
|
|
28
|
+
if (mod && mod.__esModule) return mod;
|
|
29
|
+
var result = {};
|
|
30
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
31
|
+
__setModuleDefault(result, mod);
|
|
32
|
+
return result;
|
|
33
|
+
};
|
|
34
|
+
})();
|
|
35
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
36
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
37
|
+
};
|
|
38
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
39
|
+
exports.MarkdownSource = void 0;
|
|
40
|
+
const fs = __importStar(require("fs"));
|
|
41
|
+
const path = __importStar(require("path"));
|
|
42
|
+
const crypto = __importStar(require("crypto"));
|
|
43
|
+
const fast_glob_1 = __importDefault(require("fast-glob"));
|
|
44
|
+
const errors_js_1 = require("../errors.js");
|
|
45
|
+
const DEFAULT_MAX_CHUNK_SIZE = 2000;
|
|
46
|
+
const DEFAULT_CHUNK_OVERLAP = 200;
|
|
47
|
+
const DEFAULT_MIN_CHUNK_SIZE = 100;
|
|
48
|
+
class MarkdownSource {
|
|
49
|
+
pattern;
|
|
50
|
+
options;
|
|
51
|
+
watcher = null;
|
|
52
|
+
fileHashes = new Map();
|
|
53
|
+
constructor(pattern, options = {}) {
|
|
54
|
+
// Auto-append **/*.md if pattern looks like a directory (no glob chars and no .md extension)
|
|
55
|
+
if (!pattern.includes('*') && !pattern.includes('?') && !pattern.endsWith('.md')) {
|
|
56
|
+
this.pattern = pattern.endsWith('/') ? `${pattern}**/*.md` : `${pattern}/**/*.md`;
|
|
57
|
+
}
|
|
58
|
+
else {
|
|
59
|
+
this.pattern = pattern;
|
|
60
|
+
}
|
|
61
|
+
this.options = {
|
|
62
|
+
maxChunkSize: DEFAULT_MAX_CHUNK_SIZE,
|
|
63
|
+
chunkOverlap: DEFAULT_CHUNK_OVERLAP,
|
|
64
|
+
minChunkSize: DEFAULT_MIN_CHUNK_SIZE,
|
|
65
|
+
...options,
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
async *load() {
|
|
69
|
+
const files = await (0, fast_glob_1.default)(this.pattern, { absolute: true });
|
|
70
|
+
for (const filePath of files) {
|
|
71
|
+
try {
|
|
72
|
+
const content = await fs.promises.readFile(filePath, 'utf-8');
|
|
73
|
+
const hash = this.hashContent(content);
|
|
74
|
+
this.fileHashes.set(filePath, hash);
|
|
75
|
+
const chunks = this.parseMarkdown(filePath, content);
|
|
76
|
+
for (const chunk of chunks) {
|
|
77
|
+
yield chunk;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
catch (error) {
|
|
81
|
+
throw new errors_js_1.IngestionError(`Failed to parse markdown file: ${error.message}`, filePath, error);
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
async *watch() {
|
|
86
|
+
if (!this.options.watch) {
|
|
87
|
+
return;
|
|
88
|
+
}
|
|
89
|
+
const baseDir = this.getBaseDir();
|
|
90
|
+
const updateQueue = [];
|
|
91
|
+
let resolveNext = null;
|
|
92
|
+
const processFile = async (filePath, eventType) => {
|
|
93
|
+
const absolutePath = path.isAbsolute(filePath) ? filePath : path.join(baseDir, filePath);
|
|
94
|
+
if (!this.matchesPattern(absolutePath)) {
|
|
95
|
+
return;
|
|
96
|
+
}
|
|
97
|
+
if (eventType === 'unlink') {
|
|
98
|
+
const oldHash = this.fileHashes.get(absolutePath);
|
|
99
|
+
if (oldHash) {
|
|
100
|
+
this.fileHashes.delete(absolutePath);
|
|
101
|
+
const chunkId = this.generateChunkId(absolutePath, '', 0);
|
|
102
|
+
updateQueue.push({
|
|
103
|
+
type: 'delete',
|
|
104
|
+
chunk: { id: chunkId, content: '', metadata: { source: absolutePath } },
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
else {
|
|
109
|
+
try {
|
|
110
|
+
const content = await fs.promises.readFile(absolutePath, 'utf-8');
|
|
111
|
+
const newHash = this.hashContent(content);
|
|
112
|
+
const oldHash = this.fileHashes.get(absolutePath);
|
|
113
|
+
if (oldHash !== newHash) {
|
|
114
|
+
this.fileHashes.set(absolutePath, newHash);
|
|
115
|
+
const chunks = this.parseMarkdown(absolutePath, content);
|
|
116
|
+
const updateType = oldHash ? 'update' : 'add';
|
|
117
|
+
for (const chunk of chunks) {
|
|
118
|
+
updateQueue.push({ type: updateType, chunk });
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
catch {
|
|
123
|
+
// File might have been deleted between event and read
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
if (resolveNext && updateQueue.length > 0) {
|
|
127
|
+
const update = updateQueue.shift();
|
|
128
|
+
const resolve = resolveNext;
|
|
129
|
+
resolveNext = null;
|
|
130
|
+
resolve({ value: update, done: false });
|
|
131
|
+
}
|
|
132
|
+
};
|
|
133
|
+
this.watcher = fs.watch(baseDir, { recursive: true }, (eventType, filename) => {
|
|
134
|
+
if (filename && filename.endsWith('.md')) {
|
|
135
|
+
const fullPath = path.join(baseDir, filename);
|
|
136
|
+
if (eventType === 'rename') {
|
|
137
|
+
fs.access(fullPath, fs.constants.F_OK, (err) => {
|
|
138
|
+
processFile(fullPath, err ? 'unlink' : 'add');
|
|
139
|
+
});
|
|
140
|
+
}
|
|
141
|
+
else {
|
|
142
|
+
processFile(fullPath, 'change');
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
});
|
|
146
|
+
while (true) {
|
|
147
|
+
if (updateQueue.length > 0) {
|
|
148
|
+
yield updateQueue.shift();
|
|
149
|
+
}
|
|
150
|
+
else {
|
|
151
|
+
yield await new Promise((resolve) => {
|
|
152
|
+
resolveNext = (result) => resolve(result.value);
|
|
153
|
+
});
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
stop() {
|
|
158
|
+
if (this.watcher) {
|
|
159
|
+
this.watcher.close();
|
|
160
|
+
this.watcher = null;
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
parseMarkdown(filePath, content) {
|
|
164
|
+
const { frontmatter, body } = this.extractFrontmatter(content);
|
|
165
|
+
const sections = this.splitByHeadings(body);
|
|
166
|
+
const chunks = [];
|
|
167
|
+
let chunkIndex = 0;
|
|
168
|
+
const relativePath = this.getRelativePath(filePath);
|
|
169
|
+
for (const section of sections) {
|
|
170
|
+
const sectionChunks = this.splitLargeSection(section);
|
|
171
|
+
const totalChunks = sectionChunks.length;
|
|
172
|
+
for (const sectionChunk of sectionChunks) {
|
|
173
|
+
const id = this.generateChunkId(filePath, sectionChunk.content, chunkIndex);
|
|
174
|
+
chunks.push({
|
|
175
|
+
id,
|
|
176
|
+
content: sectionChunk.content,
|
|
177
|
+
metadata: {
|
|
178
|
+
...frontmatter,
|
|
179
|
+
...(this.options.metadata ?? {}),
|
|
180
|
+
heading: sectionChunk.headings,
|
|
181
|
+
hasCode: sectionChunk.hasCode,
|
|
182
|
+
source: relativePath,
|
|
183
|
+
chunkIndex,
|
|
184
|
+
totalChunks,
|
|
185
|
+
...(this.extractTags(sectionChunk.content)),
|
|
186
|
+
...(this.extractWikilinks(sectionChunk.content)),
|
|
187
|
+
},
|
|
188
|
+
});
|
|
189
|
+
chunkIndex++;
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
return chunks;
|
|
193
|
+
}
|
|
194
|
+
extractFrontmatter(content) {
|
|
195
|
+
const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---\n/);
|
|
196
|
+
if (!frontmatterMatch) {
|
|
197
|
+
return { frontmatter: {}, body: content };
|
|
198
|
+
}
|
|
199
|
+
const frontmatterStr = frontmatterMatch[1];
|
|
200
|
+
const body = content.slice(frontmatterMatch[0].length);
|
|
201
|
+
const frontmatter = {};
|
|
202
|
+
for (const line of frontmatterStr.split('\n')) {
|
|
203
|
+
const colonIndex = line.indexOf(':');
|
|
204
|
+
if (colonIndex > 0) {
|
|
205
|
+
const key = line.slice(0, colonIndex).trim();
|
|
206
|
+
const rawValue = line.slice(colonIndex + 1).trim();
|
|
207
|
+
const parsedValue = this.parseFrontmatterValue(rawValue);
|
|
208
|
+
frontmatter[key] = parsedValue;
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
return { frontmatter, body };
|
|
212
|
+
}
|
|
213
|
+
splitByHeadings(content) {
|
|
214
|
+
const lines = content.split('\n');
|
|
215
|
+
const sections = [];
|
|
216
|
+
let currentHeadings = [];
|
|
217
|
+
let currentContent = [];
|
|
218
|
+
let inCodeBlock = false;
|
|
219
|
+
for (const line of lines) {
|
|
220
|
+
if (line.startsWith('```')) {
|
|
221
|
+
inCodeBlock = !inCodeBlock;
|
|
222
|
+
currentContent.push(line);
|
|
223
|
+
continue;
|
|
224
|
+
}
|
|
225
|
+
if (!inCodeBlock && line.match(/^#{1,6}\s/)) {
|
|
226
|
+
if (currentContent.length > 0) {
|
|
227
|
+
const contentStr = currentContent.join('\n').trim();
|
|
228
|
+
if (contentStr) {
|
|
229
|
+
sections.push({
|
|
230
|
+
headings: [...currentHeadings],
|
|
231
|
+
content: contentStr,
|
|
232
|
+
hasCode: currentContent.some((l) => l.startsWith('```')),
|
|
233
|
+
frontmatter: {},
|
|
234
|
+
});
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
const headingMatch = line.match(/^(#{1,6})\s+(.+)$/);
|
|
238
|
+
if (headingMatch) {
|
|
239
|
+
const level = headingMatch[1].length;
|
|
240
|
+
const headingText = headingMatch[2].trim();
|
|
241
|
+
currentHeadings = currentHeadings.slice(0, level - 1);
|
|
242
|
+
currentHeadings[level - 1] = headingText;
|
|
243
|
+
currentHeadings = currentHeadings.filter(Boolean);
|
|
244
|
+
}
|
|
245
|
+
currentContent = [];
|
|
246
|
+
}
|
|
247
|
+
else {
|
|
248
|
+
currentContent.push(line);
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
if (currentContent.length > 0) {
|
|
252
|
+
const contentStr = currentContent.join('\n').trim();
|
|
253
|
+
if (contentStr) {
|
|
254
|
+
sections.push({
|
|
255
|
+
headings: [...currentHeadings],
|
|
256
|
+
content: contentStr,
|
|
257
|
+
hasCode: currentContent.some((l) => l.startsWith('```')),
|
|
258
|
+
frontmatter: {},
|
|
259
|
+
});
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
if (sections.length === 0 && content.trim()) {
|
|
263
|
+
sections.push({
|
|
264
|
+
headings: [],
|
|
265
|
+
content: content.trim(),
|
|
266
|
+
hasCode: content.includes('```'),
|
|
267
|
+
frontmatter: {},
|
|
268
|
+
});
|
|
269
|
+
}
|
|
270
|
+
return sections;
|
|
271
|
+
}
|
|
272
|
+
splitLargeSection(section) {
|
|
273
|
+
const maxSize = this.options.maxChunkSize;
|
|
274
|
+
const minSize = this.options.minChunkSize;
|
|
275
|
+
const overlap = this.options.chunkOverlap;
|
|
276
|
+
const tokens = this.estimateTokens(section.content);
|
|
277
|
+
if (tokens <= maxSize) {
|
|
278
|
+
return [section];
|
|
279
|
+
}
|
|
280
|
+
const paragraphs = section.content.split(/\n\n+/);
|
|
281
|
+
const chunks = [];
|
|
282
|
+
let currentChunk = [];
|
|
283
|
+
let currentTokens = 0;
|
|
284
|
+
for (const paragraph of paragraphs) {
|
|
285
|
+
const paragraphTokens = this.estimateTokens(paragraph);
|
|
286
|
+
if (currentTokens + paragraphTokens > maxSize && currentChunk.length > 0) {
|
|
287
|
+
chunks.push({
|
|
288
|
+
...section,
|
|
289
|
+
content: currentChunk.join('\n\n'),
|
|
290
|
+
});
|
|
291
|
+
const overlapText = this.getOverlapText(currentChunk, overlap);
|
|
292
|
+
currentChunk = overlapText ? [overlapText] : [];
|
|
293
|
+
currentTokens = this.estimateTokens(currentChunk.join('\n\n'));
|
|
294
|
+
}
|
|
295
|
+
if (paragraphTokens > maxSize) {
|
|
296
|
+
const sentences = this.splitIntoSentences(paragraph);
|
|
297
|
+
for (const sentence of sentences) {
|
|
298
|
+
const sentenceTokens = this.estimateTokens(sentence);
|
|
299
|
+
if (currentTokens + sentenceTokens > maxSize && currentChunk.length > 0) {
|
|
300
|
+
chunks.push({
|
|
301
|
+
...section,
|
|
302
|
+
content: currentChunk.join(' '),
|
|
303
|
+
});
|
|
304
|
+
currentChunk = [];
|
|
305
|
+
currentTokens = 0;
|
|
306
|
+
}
|
|
307
|
+
currentChunk.push(sentence);
|
|
308
|
+
currentTokens += sentenceTokens;
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
else {
|
|
312
|
+
currentChunk.push(paragraph);
|
|
313
|
+
currentTokens += paragraphTokens;
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
if (currentChunk.length > 0) {
|
|
317
|
+
const content = currentChunk.join('\n\n');
|
|
318
|
+
if (this.estimateTokens(content) >= minSize || chunks.length === 0) {
|
|
319
|
+
chunks.push({
|
|
320
|
+
...section,
|
|
321
|
+
content,
|
|
322
|
+
});
|
|
323
|
+
}
|
|
324
|
+
else if (chunks.length > 0) {
|
|
325
|
+
const lastChunk = chunks[chunks.length - 1];
|
|
326
|
+
lastChunk.content += '\n\n' + content;
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
return chunks;
|
|
330
|
+
}
|
|
331
|
+
getOverlapText(chunks, targetTokens) {
|
|
332
|
+
const result = [];
|
|
333
|
+
let tokens = 0;
|
|
334
|
+
for (let i = chunks.length - 1; i >= 0 && tokens < targetTokens; i--) {
|
|
335
|
+
const chunkTokens = this.estimateTokens(chunks[i]);
|
|
336
|
+
if (tokens + chunkTokens <= targetTokens) {
|
|
337
|
+
result.unshift(chunks[i]);
|
|
338
|
+
tokens += chunkTokens;
|
|
339
|
+
}
|
|
340
|
+
else {
|
|
341
|
+
break;
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
return result.join('\n\n');
|
|
345
|
+
}
|
|
346
|
+
splitIntoSentences(text) {
|
|
347
|
+
return text.split(/(?<=[.!?])\s+/).filter(Boolean);
|
|
348
|
+
}
|
|
349
|
+
estimateTokens(text) {
|
|
350
|
+
return Math.ceil(text.length / 4);
|
|
351
|
+
}
|
|
352
|
+
extractTags(content) {
|
|
353
|
+
const tagMatches = content.match(/#[\w-]+/g);
|
|
354
|
+
if (tagMatches) {
|
|
355
|
+
return { tags: [...new Set(tagMatches.map((t) => t.slice(1)))] };
|
|
356
|
+
}
|
|
357
|
+
return {};
|
|
358
|
+
}
|
|
359
|
+
extractWikilinks(content) {
|
|
360
|
+
const linkMatches = content.match(/\[\[([^\]|]+)(?:\|[^\]]+)?\]\]/g);
|
|
361
|
+
if (linkMatches) {
|
|
362
|
+
const links = linkMatches.map((l) => {
|
|
363
|
+
const match = l.match(/\[\[([^\]|]+)/);
|
|
364
|
+
return match ? match[1] : '';
|
|
365
|
+
}).filter(Boolean);
|
|
366
|
+
return { links: [...new Set(links)] };
|
|
367
|
+
}
|
|
368
|
+
return {};
|
|
369
|
+
}
|
|
370
|
+
generateChunkId(filePath, content, index) {
|
|
371
|
+
const namespace = this.options.namespace ?? 'default';
|
|
372
|
+
const relativePath = this.getRelativePath(filePath);
|
|
373
|
+
const hash = crypto.createHash('md5').update(content).digest('hex').slice(0, 8);
|
|
374
|
+
return `${namespace}:${relativePath}:${hash}:${index}`;
|
|
375
|
+
}
|
|
376
|
+
getRelativePath(filePath) {
|
|
377
|
+
const baseDir = this.getBaseDir();
|
|
378
|
+
return path.relative(baseDir, filePath);
|
|
379
|
+
}
|
|
380
|
+
getBaseDir() {
|
|
381
|
+
const patternParts = this.pattern.split(/[*?]/);
|
|
382
|
+
return patternParts[0].replace(/\/$/, '') || '.';
|
|
383
|
+
}
|
|
384
|
+
matchesPattern(filePath) {
|
|
385
|
+
return filePath.endsWith('.md');
|
|
386
|
+
}
|
|
387
|
+
hashContent(content) {
|
|
388
|
+
return crypto.createHash('md5').update(content).digest('hex');
|
|
389
|
+
}
|
|
390
|
+
parseFrontmatterValue(value) {
|
|
391
|
+
if (value.startsWith('[') && value.endsWith(']')) {
|
|
392
|
+
return value
|
|
393
|
+
.slice(1, -1)
|
|
394
|
+
.split(',')
|
|
395
|
+
.map((segment) => segment.trim().replace(/^["']|["']$/g, ''));
|
|
396
|
+
}
|
|
397
|
+
if (value === 'true') {
|
|
398
|
+
return true;
|
|
399
|
+
}
|
|
400
|
+
if (value === 'false') {
|
|
401
|
+
return false;
|
|
402
|
+
}
|
|
403
|
+
if (!Number.isNaN(Number(value)) && value !== '') {
|
|
404
|
+
return Number(value);
|
|
405
|
+
}
|
|
406
|
+
return value.replace(/^["']|["']$/g, '');
|
|
407
|
+
}
|
|
408
|
+
}
|
|
409
|
+
exports.MarkdownSource = MarkdownSource;
|
|
410
|
+
//# sourceMappingURL=markdown-source.js.map
|