@toolpack-sdk/knowledge 1.2.0-SNAPSHOT.04032026-2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/README.md +4 -4
  2. package/dist/index.cjs +25 -0
  3. package/dist/index.d.cts +239 -0
  4. package/dist/index.d.ts +239 -14
  5. package/dist/index.js +25 -31
  6. package/package.json +30 -8
  7. package/dist/embedders/ollama.d.ts +0 -15
  8. package/dist/embedders/ollama.d.ts.map +0 -1
  9. package/dist/embedders/ollama.js +0 -51
  10. package/dist/embedders/ollama.js.map +0 -1
  11. package/dist/embedders/openai.d.ts +0 -18
  12. package/dist/embedders/openai.d.ts.map +0 -1
  13. package/dist/embedders/openai.js +0 -63
  14. package/dist/embedders/openai.js.map +0 -1
  15. package/dist/errors.d.ts +0 -25
  16. package/dist/errors.d.ts.map +0 -1
  17. package/dist/errors.js +0 -58
  18. package/dist/errors.js.map +0 -1
  19. package/dist/index.d.ts.map +0 -1
  20. package/dist/index.js.map +0 -1
  21. package/dist/interfaces.d.ts +0 -48
  22. package/dist/interfaces.d.ts.map +0 -1
  23. package/dist/interfaces.js +0 -3
  24. package/dist/interfaces.js.map +0 -1
  25. package/dist/knowledge.d.ts +0 -74
  26. package/dist/knowledge.d.ts.map +0 -1
  27. package/dist/knowledge.js +0 -120
  28. package/dist/knowledge.js.map +0 -1
  29. package/dist/providers/memory.d.ts +0 -16
  30. package/dist/providers/memory.d.ts.map +0 -1
  31. package/dist/providers/memory.js +0 -72
  32. package/dist/providers/memory.js.map +0 -1
  33. package/dist/providers/persistent.d.ts +0 -23
  34. package/dist/providers/persistent.d.ts.map +0 -1
  35. package/dist/providers/persistent.js +0 -162
  36. package/dist/providers/persistent.js.map +0 -1
  37. package/dist/sources/markdown.d.ts +0 -20
  38. package/dist/sources/markdown.d.ts.map +0 -1
  39. package/dist/sources/markdown.js +0 -196
  40. package/dist/sources/markdown.js.map +0 -1
  41. package/dist/utils/chunking.d.ts +0 -6
  42. package/dist/utils/chunking.d.ts.map +0 -1
  43. package/dist/utils/chunking.js +0 -86
  44. package/dist/utils/chunking.js.map +0 -1
  45. package/dist/utils/cosine.d.ts +0 -4
  46. package/dist/utils/cosine.d.ts.map +0 -1
  47. package/dist/utils/cosine.js +0 -52
  48. package/dist/utils/cosine.js.map +0 -1
@@ -1,72 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.MemoryProvider = void 0;
4
- const errors_js_1 = require("../errors.js");
5
- const cosine_js_1 = require("../utils/cosine.js");
6
- class MemoryProvider {
7
- options;
8
- chunks = new Map();
9
- dimensions;
10
- constructor(options = {}) {
11
- this.options = options;
12
- }
13
- async validateDimensions(dimensions) {
14
- if (this.dimensions && this.dimensions !== dimensions) {
15
- throw new errors_js_1.DimensionMismatchError(this.dimensions, dimensions);
16
- }
17
- this.dimensions = dimensions;
18
- }
19
- async add(chunks) {
20
- for (const chunk of chunks) {
21
- if (!chunk.vector) {
22
- throw new errors_js_1.KnowledgeProviderError('Chunk missing vector');
23
- }
24
- if (this.options.maxChunks && this.chunks.size >= this.options.maxChunks) {
25
- throw new errors_js_1.KnowledgeProviderError(`Max chunks limit reached: ${this.options.maxChunks}`);
26
- }
27
- this.chunks.set(chunk.id, {
28
- chunk: {
29
- id: chunk.id,
30
- content: chunk.content,
31
- metadata: chunk.metadata,
32
- },
33
- vector: chunk.vector
34
- });
35
- }
36
- }
37
- async query(queryVector, options = {}) {
38
- const { limit = 10, threshold = 0.7, filter, includeMetadata = true, includeVectors = false, } = options;
39
- const results = [];
40
- for (const { chunk, vector } of this.chunks.values()) {
41
- if (filter && !(0, cosine_js_1.matchesFilter)(chunk.metadata, filter)) {
42
- continue;
43
- }
44
- const score = (0, cosine_js_1.cosineSimilarity)(queryVector, vector);
45
- if (score >= threshold) {
46
- results.push({
47
- chunk: {
48
- id: chunk.id,
49
- content: chunk.content,
50
- metadata: includeMetadata ? chunk.metadata : {},
51
- vector: includeVectors ? vector : undefined,
52
- },
53
- score,
54
- distance: 1 - score,
55
- });
56
- }
57
- }
58
- results.sort((a, b) => b.score - a.score);
59
- return results.slice(0, limit);
60
- }
61
- async delete(ids) {
62
- for (const id of ids) {
63
- this.chunks.delete(id);
64
- }
65
- }
66
- async clear() {
67
- this.chunks.clear();
68
- this.dimensions = undefined;
69
- }
70
- }
71
- exports.MemoryProvider = MemoryProvider;
72
- //# sourceMappingURL=memory.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"memory.js","sourceRoot":"","sources":["../../src/providers/memory.ts"],"names":[],"mappings":";;;AACA,4CAA8E;AAC9E,kDAAqE;AAMrE,MAAa,cAAc;IAIL;IAHZ,MAAM,GAAG,IAAI,GAAG,EAA8C,CAAC;IAC/D,UAAU,CAAU;IAE5B,YAAoB,UAAiC,EAAE;QAAnC,YAAO,GAAP,OAAO,CAA4B;IAAG,CAAC;IAE3D,KAAK,CAAC,kBAAkB,CAAC,UAAkB;QACzC,IAAI,IAAI,CAAC,UAAU,IAAI,IAAI,CAAC,UAAU,KAAK,UAAU,EAAE,CAAC;YACtD,MAAM,IAAI,kCAAsB,CAAC,IAAI,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC;QAChE,CAAC;QACD,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;IAC/B,CAAC;IAED,KAAK,CAAC,GAAG,CAAC,MAAe;QACvB,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;YAC3B,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC;gBAClB,MAAM,IAAI,kCAAsB,CAAC,sBAAsB,CAAC,CAAC;YAC3D,CAAC;YAED,IAAI,IAAI,CAAC,OAAO,CAAC,SAAS,IAAI,IAAI,CAAC,MAAM,CAAC,IAAI,IAAI,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,CAAC;gBACzE,MAAM,IAAI,kCAAsB,CAAC,6BAA6B,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,CAAC,CAAC;YAC1F,CAAC;YAED,IAAI,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,EAAE;gBACxB,KAAK,EAAE;oBACL,EAAE,EAAE,KAAK,CAAC,EAAE;oBACZ,OAAO,EAAE,KAAK,CAAC,OAAO;oBACtB,QAAQ,EAAE,KAAK,CAAC,QAAQ;iBACzB;gBACD,MAAM,EAAE,KAAK,CAAC,MAAM;aACrB,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,WAAqB,EAAE,UAAwB,EAAE;QAC3D,MAAM,EACJ,KAAK,GAAG,EAAE,EACV,SAAS,GAAG,GAAG,EACf,MAAM,EACN,eAAe,GAAG,IAAI,EACtB,cAAc,GAAG,KAAK,GACvB,GAAG,OAAO,CAAC;QAEZ,MAAM,OAAO,GAAkB,EAAE,CAAC;QAElC,KAAK,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE,EAAE,CAAC;YACrD,IAAI,MAAM,IAAI,CAAC,IAAA,yBAAa,EAAC,KAAK,CAAC,QAAQ,EAAE,MAAM,CAAC,EAAE,CAAC;gBACrD,SAAS;YACX,CAAC;YAED,MAAM,KAAK,GAAG,IAAA,4BAAgB,EAAC,WAAW,EAAE,MAAM,CAAC,CAAC;YAEpD,IAAI,KAAK,IAAI,SAAS,EAAE,CAAC;gBACvB,OAAO,CAAC,IAAI,CAAC;oBACX,KAAK,EAAE;wBACL,EAAE,EAAE,KAAK,CAAC,EAAE;wBACZ,OAAO,EAAE,KAAK,CAAC,OAAO;wBACtB,QAAQ,EAAE,eAAe,CAAC,CAAC,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE;wBAC/C,MAAM,EAAE,cAAc,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,SAAS;qBAC5C;oBACD,KAAK;oBACL,QAAQ,EAAE,CAAC,GAAG,KAAK;iBACpB,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;QAE1C,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;IACjC,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,GAAa;QACxB,KAAK,MAAM,EAAE,IAAI,GAAG,EAAE,CAAC;YACrB,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,EAAE,CAAC,CAAC;QACzB,CAAC;IACH,CAAC;IAED,KAAK,CAAC,KAAK;QACT,IAAI,CAAC,MAAM,CAAC,KAAK,EAAE,CAAC;QACpB,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;IAC9B,CAAC;CACF;AAjFD,wCAiFC"}
@@ -1,23 +0,0 @@
1
- import { KnowledgeProvider, Chunk, QueryOptions, QueryResult } from '../interfaces.js';
2
- export interface PersistentKnowledgeProviderOptions {
3
- namespace: string;
4
- storagePath?: string;
5
- reSync?: boolean;
6
- }
7
- export declare class PersistentKnowledgeProvider implements KnowledgeProvider {
8
- private options;
9
- private db;
10
- private dimensions?;
11
- private dbPath;
12
- constructor(options: PersistentKnowledgeProviderOptions);
13
- private initSchema;
14
- private loadDimensions;
15
- validateDimensions(dimensions: number): Promise<void>;
16
- add(chunks: Chunk[]): Promise<void>;
17
- query(queryVector: number[], options?: QueryOptions): Promise<QueryResult[]>;
18
- delete(ids: string[]): Promise<void>;
19
- clear(): Promise<void>;
20
- shouldReSync(): boolean;
21
- close(): void;
22
- }
23
- //# sourceMappingURL=persistent.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"persistent.d.ts","sourceRoot":"","sources":["../../src/providers/persistent.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,iBAAiB,EAAE,KAAK,EAAE,YAAY,EAAE,WAAW,EAAE,MAAM,kBAAkB,CAAC;AAIvF,MAAM,WAAW,kCAAkC;IACjD,SAAS,EAAE,MAAM,CAAC;IAClB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,OAAO,CAAC;CAClB;AAED,qBAAa,2BAA4B,YAAW,iBAAiB;IAKvD,OAAO,CAAC,OAAO;IAJ3B,OAAO,CAAC,EAAE,CAAoB;IAC9B,OAAO,CAAC,UAAU,CAAC,CAAS;IAC5B,OAAO,CAAC,MAAM,CAAS;gBAEH,OAAO,EAAE,kCAAkC;IAa/D,OAAO,CAAC,UAAU;IAiBlB,OAAO,CAAC,cAAc;IAOhB,kBAAkB,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAWrD,GAAG,CAAC,MAAM,EAAE,KAAK,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IA0BnC,KAAK,CAAC,WAAW,EAAE,MAAM,EAAE,EAAE,OAAO,GAAE,YAAiB,GAAG,OAAO,CAAC,WAAW,EAAE,CAAC;IAgDhF,MAAM,CAAC,GAAG,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAUpC,KAAK,IAAI,OAAO,CAAC,IAAI,CAAC;IAM5B,YAAY,IAAI,OAAO;IAQvB,KAAK,IAAI,IAAI;CAGd"}
@@ -1,162 +0,0 @@
1
- "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- var desc = Object.getOwnPropertyDescriptor(m, k);
5
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
- desc = { enumerable: true, get: function() { return m[k]; } };
7
- }
8
- Object.defineProperty(o, k2, desc);
9
- }) : (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- o[k2] = m[k];
12
- }));
13
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
- Object.defineProperty(o, "default", { enumerable: true, value: v });
15
- }) : function(o, v) {
16
- o["default"] = v;
17
- });
18
- var __importStar = (this && this.__importStar) || (function () {
19
- var ownKeys = function(o) {
20
- ownKeys = Object.getOwnPropertyNames || function (o) {
21
- var ar = [];
22
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
- return ar;
24
- };
25
- return ownKeys(o);
26
- };
27
- return function (mod) {
28
- if (mod && mod.__esModule) return mod;
29
- var result = {};
30
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
- __setModuleDefault(result, mod);
32
- return result;
33
- };
34
- })();
35
- var __importDefault = (this && this.__importDefault) || function (mod) {
36
- return (mod && mod.__esModule) ? mod : { "default": mod };
37
- };
38
- Object.defineProperty(exports, "__esModule", { value: true });
39
- exports.PersistentKnowledgeProvider = void 0;
40
- const better_sqlite3_1 = __importDefault(require("better-sqlite3"));
41
- const fs = __importStar(require("fs"));
42
- const path = __importStar(require("path"));
43
- const os = __importStar(require("os"));
44
- const errors_js_1 = require("../errors.js");
45
- const cosine_js_1 = require("../utils/cosine.js");
46
- class PersistentKnowledgeProvider {
47
- options;
48
- db;
49
- dimensions;
50
- dbPath;
51
- constructor(options) {
52
- this.options = options;
53
- const basePath = options.storagePath || path.join(os.homedir(), '.toolpack', 'knowledge');
54
- this.dbPath = path.join(basePath, `${options.namespace}.db`);
55
- fs.mkdirSync(basePath, { recursive: true });
56
- this.db = new better_sqlite3_1.default(this.dbPath);
57
- this.db.pragma('journal_mode = WAL');
58
- this.initSchema();
59
- this.loadDimensions();
60
- }
61
- initSchema() {
62
- this.db.exec(`
63
- CREATE TABLE IF NOT EXISTS chunks (
64
- id TEXT PRIMARY KEY,
65
- content TEXT NOT NULL,
66
- metadata TEXT NOT NULL,
67
- vector BLOB NOT NULL,
68
- synced_at INTEGER NOT NULL
69
- );
70
-
71
- CREATE TABLE IF NOT EXISTS provider_meta (
72
- key TEXT PRIMARY KEY,
73
- value TEXT NOT NULL
74
- );
75
- `);
76
- }
77
- loadDimensions() {
78
- const row = this.db.prepare('SELECT value FROM provider_meta WHERE key = ?').get('dimensions');
79
- if (row) {
80
- this.dimensions = parseInt(row.value, 10);
81
- }
82
- }
83
- async validateDimensions(dimensions) {
84
- if (this.dimensions && this.dimensions !== dimensions) {
85
- throw new errors_js_1.DimensionMismatchError(this.dimensions, dimensions);
86
- }
87
- if (!this.dimensions) {
88
- this.db.prepare('INSERT OR REPLACE INTO provider_meta (key, value) VALUES (?, ?)').run('dimensions', dimensions.toString());
89
- this.dimensions = dimensions;
90
- }
91
- }
92
- async add(chunks) {
93
- const insert = this.db.prepare(`
94
- INSERT OR REPLACE INTO chunks (id, content, metadata, vector, synced_at)
95
- VALUES (?, ?, ?, ?, ?)
96
- `);
97
- const transaction = this.db.transaction((chunks) => {
98
- for (const chunk of chunks) {
99
- if (!chunk.vector) {
100
- throw new errors_js_1.KnowledgeProviderError('Chunk missing vector');
101
- }
102
- const vectorBlob = Buffer.from(new Float32Array(chunk.vector).buffer);
103
- insert.run(chunk.id, chunk.content, JSON.stringify(chunk.metadata), vectorBlob, Date.now());
104
- }
105
- });
106
- transaction(chunks);
107
- }
108
- async query(queryVector, options = {}) {
109
- const { limit = 10, threshold = 0.7, filter, includeMetadata = true, includeVectors = false, } = options;
110
- const rows = this.db.prepare('SELECT id, content, metadata, vector FROM chunks').all();
111
- const results = [];
112
- for (const row of rows) {
113
- const metadata = JSON.parse(row.metadata);
114
- if (filter && !(0, cosine_js_1.matchesFilter)(metadata, filter)) {
115
- continue;
116
- }
117
- const vector = new Float32Array(row.vector.buffer, row.vector.byteOffset, row.vector.byteLength / 4);
118
- const vectorArray = Array.from(vector);
119
- const score = (0, cosine_js_1.cosineSimilarity)(queryVector, vectorArray);
120
- if (score >= threshold) {
121
- results.push({
122
- chunk: {
123
- id: row.id,
124
- content: row.content,
125
- metadata: includeMetadata ? metadata : {},
126
- vector: includeVectors ? vectorArray : undefined,
127
- },
128
- score,
129
- distance: 1 - score,
130
- });
131
- }
132
- }
133
- results.sort((a, b) => b.score - a.score);
134
- return results.slice(0, limit);
135
- }
136
- async delete(ids) {
137
- const del = this.db.prepare('DELETE FROM chunks WHERE id = ?');
138
- const transaction = this.db.transaction((ids) => {
139
- for (const id of ids) {
140
- del.run(id);
141
- }
142
- });
143
- transaction(ids);
144
- }
145
- async clear() {
146
- this.db.prepare('DELETE FROM chunks').run();
147
- this.db.prepare('DELETE FROM provider_meta').run();
148
- this.dimensions = undefined;
149
- }
150
- shouldReSync() {
151
- if (this.options.reSync === false) {
152
- const count = this.db.prepare('SELECT COUNT(*) as count FROM chunks').get();
153
- return count.count === 0;
154
- }
155
- return true;
156
- }
157
- close() {
158
- this.db.close();
159
- }
160
- }
161
- exports.PersistentKnowledgeProvider = PersistentKnowledgeProvider;
162
- //# sourceMappingURL=persistent.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"persistent.js","sourceRoot":"","sources":["../../src/providers/persistent.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,oEAAsC;AACtC,uCAAyB;AACzB,2CAA6B;AAC7B,uCAAyB;AAEzB,4CAA8E;AAC9E,kDAAqE;AAQrE,MAAa,2BAA2B;IAKlB;IAJZ,EAAE,CAAoB;IACtB,UAAU,CAAU;IACpB,MAAM,CAAS;IAEvB,YAAoB,OAA2C;QAA3C,YAAO,GAAP,OAAO,CAAoC;QAC7D,MAAM,QAAQ,GAAG,OAAO,CAAC,WAAW,IAAI,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,OAAO,EAAE,EAAE,WAAW,EAAE,WAAW,CAAC,CAAC;QAC1F,IAAI,CAAC,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,GAAG,OAAO,CAAC,SAAS,KAAK,CAAC,CAAC;QAE7D,EAAE,CAAC,SAAS,CAAC,QAAQ,EAAE,EAAE,SAAS,EAAE,IAAI,EAAE,CAAC,CAAC;QAE5C,IAAI,CAAC,EAAE,GAAG,IAAI,wBAAQ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;QACpC,IAAI,CAAC,EAAE,CAAC,MAAM,CAAC,oBAAoB,CAAC,CAAC;QAErC,IAAI,CAAC,UAAU,EAAE,CAAC;QAClB,IAAI,CAAC,cAAc,EAAE,CAAC;IACxB,CAAC;IAEO,UAAU;QAChB,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC;;;;;;;;;;;;;KAaZ,CAAC,CAAC;IACL,CAAC;IAEO,cAAc;QACpB,MAAM,GAAG,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,+CAA+C,CAAC,CAAC,GAAG,CAAC,YAAY,CAAkC,CAAC;QAChI,IAAI,GAAG,EAAE,CAAC;YACR,IAAI,CAAC,UAAU,GAAG,QAAQ,CAAC,GAAG,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QAC5C,CAAC;IACH,CAAC;IAED,KAAK,CAAC,kBAAkB,CAAC,UAAkB;QACzC,IAAI,IAAI,CAAC,UAAU,IAAI,IAAI,CAAC,UAAU,KAAK,UAAU,EAAE,CAAC;YACtD,MAAM,IAAI,kCAAsB,CAAC,IAAI,CAAC,UAAU,EAAE,UAAU,CAAC,CAAC;QAChE,CAAC;QAED,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,CAAC;YACrB,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,iEAAiE,CAAC,CAAC,GAAG,CAAC,YAAY,EAAE,UAAU,CAAC,QAAQ,EAAE,CAAC,CAAC;YAC5H,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;QAC/B,CAAC;IACH,CAAC;IAED,KAAK,CAAC,GAAG,CAAC,MAAe;QACvB,MAAM,MAAM,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC;;;KAG9B,CAAC,CAAC;QAEH,MAAM,WAAW,GAAG,IAAI,CAAC,EAAE,CAAC,WAAW,CAAC,CAAC,MAAe,EAAE,EAAE;YAC1D,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;gBAC3B,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC;oBAClB,MAAM,IAAI,kCAAsB,CAAC,sBAAsB,CAAC,CAAC;gBAC3D,CAAC;gBAED,MAAM,UAAU,GAAG,MAAM,CAAC,IAAI,CAAC,IAAI,YAAY,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,MAAM,CAAC,CAAC;gBACtE,MAAM,CAAC,GAAG,CACR,KAAK,CAAC,EAAE,EACR,KAAK,CAAC,OAAO,EACb,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,QAAQ,CAAC,EAC9B,UAAU,EACV,IAAI,CAAC,GAAG,EAAE,CACX,CAAC;YACJ,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,WAAW,CAAC,MAAM,CAAC,CAAC;IACtB,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,WAAqB,EAAE,UAAwB,EAAE;QAC3D,MAAM,EACJ,KAAK,GAAG,EAAE,EACV,SAAS,GAAG,GAAG,EACf,MAAM,EACN,eAAe,GAAG,IAAI,EACtB,cAAc,GAAG,KAAK,GACvB,GAAG,OAAO,CAAC;QAEZ,MAAM,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,kDAAkD,CAAC,CAAC,GAAG,EAKlF,CAAC;QAEH,MAAM,OAAO,GAAkB,EAAE,CAAC;QAElC,KAAK,MAAM,GAAG,IAAI,IAAI,EAAE,CAAC;YACvB,MAAM,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;YAE1C,IAAI,MAAM,IAAI,CAAC,IAAA,yBAAa,EAAC,QAAQ,EAAE,MAAM,CAAC,EAAE,CAAC;gBAC/C,SAAS;YACX,CAAC;YAED,MAAM,MAAM,GAAG,IAAI,YAAY,CAAC,GAAG,CAAC,MAAM,CAAC,MAAM,EAAE,GAAG,CAAC,MAAM,CAAC,UAAU,EAAE,GAAG,CAAC,MAAM,CAAC,UAAU,GAAG,CAAC,CAAC,CAAC;YACrG,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACvC,MAAM,KAAK,GAAG,IAAA,4BAAgB,EAAC,WAAW,EAAE,WAAW,CAAC,CAAC;YAEzD,IAAI,KAAK,IAAI,SAAS,EAAE,CAAC;gBACvB,OAAO,CAAC,IAAI,CAAC;oBACX,KAAK,EAAE;wBACL,EAAE,EAAE,GAAG,CAAC,EAAE;wBACV,OAAO,EAAE,GAAG,CAAC,OAAO;wBACpB,QAAQ,EAAE,eAAe,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE;wBACzC,MAAM,EAAE,cAAc,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC,CAAC,SAAS;qBACjD;oBACD,KAAK;oBACL,QAAQ,EAAE,CAAC,GAAG,KAAK;iBACpB,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAAC,CAAC;QAE1C,OAAO,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC;IACjC,CAAC;IAED,KAAK,CAAC,MAAM,CAAC,GAAa;QACxB,MAAM,GAAG,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,iCAAiC,CAAC,CAAC;QAC/D,MAAM,WAAW,GAAG,IAAI,CAAC,EAAE,CAAC,WAAW,CAAC,CAAC,GAAa,EAAE,EAAE;YACxD,KAAK,MAAM,EAAE,IAAI,GAAG,EAAE,CAAC;gBACrB,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC,CAAC;YACd,CAAC;QACH,CAAC,CAAC,CAAC;QACH,WAAW,CAAC,GAAG,CAAC,CAAC;IACnB,CAAC;IAED,KAAK,CAAC,KAAK;QACT,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,oBAAoB,CAAC,CAAC,GAAG,EAAE,CAAC;QAC5C,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,2BAA2B,CAAC,CAAC,GAAG,EAAE,CAAC;QACnD,IAAI,CAAC,UAAU,GAAG,SAAS,CAAC;IAC9B,CAAC;IAED,YAAY;QACV,IAAI,IAAI,CAAC,OAAO,CAAC,MAAM,KAAK,KAAK,EAAE,CAAC;YAClC,MAAM,KAAK,GAAG,IAAI,CAAC,EAAE,CAAC,OAAO,CAAC,sCAAsC,CAAC,CAAC,GAAG,EAAuB,CAAC;YACjG,OAAO,KAAK,CAAC,KAAK,KAAK,CAAC,CAAC;QAC3B,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC;IAED,KAAK;QACH,IAAI,CAAC,EAAE,CAAC,KAAK,EAAE,CAAC;IAClB,CAAC;CACF;AA1JD,kEA0JC"}
@@ -1,20 +0,0 @@
1
- import { KnowledgeSource, Chunk } from '../interfaces.js';
2
- export interface MarkdownSourceOptions {
3
- maxChunkSize?: number;
4
- chunkOverlap?: number;
5
- minChunkSize?: number;
6
- namespace?: string;
7
- metadata?: Record<string, unknown>;
8
- }
9
- export declare class MarkdownSource implements KnowledgeSource {
10
- private pattern;
11
- private options;
12
- constructor(pattern: string, options?: MarkdownSourceOptions);
13
- load(): AsyncIterable<Chunk>;
14
- private chunkMarkdown;
15
- private parseHeadings;
16
- private extractFrontmatter;
17
- private removeFrontmatter;
18
- private generateChunkId;
19
- }
20
- //# sourceMappingURL=markdown.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"markdown.d.ts","sourceRoot":"","sources":["../../src/sources/markdown.ts"],"names":[],"mappings":"AAIA,OAAO,EAAE,eAAe,EAAE,KAAK,EAAE,MAAM,kBAAkB,CAAC;AAI1D,MAAM,WAAW,qBAAqB;IACpC,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;CACpC;AAQD,qBAAa,cAAe,YAAW,eAAe;IAIlD,OAAO,CAAC,OAAO;IAHjB,OAAO,CAAC,OAAO,CAAkC;gBAGvC,OAAO,EAAE,MAAM,EACvB,OAAO,GAAE,qBAA0B;IAW9B,IAAI,IAAI,aAAa,CAAC,KAAK,CAAC;IAiBnC,OAAO,CAAC,aAAa;IA0DrB,OAAO,CAAC,aAAa;IA8CrB,OAAO,CAAC,kBAAkB;IA8B1B,OAAO,CAAC,iBAAiB;IAIzB,OAAO,CAAC,eAAe;CAKxB"}
@@ -1,196 +0,0 @@
1
- "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- var desc = Object.getOwnPropertyDescriptor(m, k);
5
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
- desc = { enumerable: true, get: function() { return m[k]; } };
7
- }
8
- Object.defineProperty(o, k2, desc);
9
- }) : (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- o[k2] = m[k];
12
- }));
13
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
- Object.defineProperty(o, "default", { enumerable: true, value: v });
15
- }) : function(o, v) {
16
- o["default"] = v;
17
- });
18
- var __importStar = (this && this.__importStar) || (function () {
19
- var ownKeys = function(o) {
20
- ownKeys = Object.getOwnPropertyNames || function (o) {
21
- var ar = [];
22
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
- return ar;
24
- };
25
- return ownKeys(o);
26
- };
27
- return function (mod) {
28
- if (mod && mod.__esModule) return mod;
29
- var result = {};
30
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
- __setModuleDefault(result, mod);
32
- return result;
33
- };
34
- })();
35
- Object.defineProperty(exports, "__esModule", { value: true });
36
- exports.MarkdownSource = void 0;
37
- const fs = __importStar(require("fs/promises"));
38
- const path = __importStar(require("path"));
39
- const crypto = __importStar(require("crypto"));
40
- const fast_glob_1 = require("fast-glob");
41
- const errors_js_1 = require("../errors.js");
42
- const chunking_js_1 = require("../utils/chunking.js");
43
- class MarkdownSource {
44
- pattern;
45
- options;
46
- constructor(pattern, options = {}) {
47
- this.pattern = pattern;
48
- this.options = {
49
- maxChunkSize: options.maxChunkSize ?? 2000,
50
- chunkOverlap: options.chunkOverlap ?? 200,
51
- minChunkSize: options.minChunkSize ?? 100,
52
- namespace: options.namespace ?? 'markdown',
53
- metadata: options.metadata ?? {},
54
- };
55
- }
56
- async *load() {
57
- const files = await (0, fast_glob_1.glob)(this.pattern, { absolute: true });
58
- for (const file of files) {
59
- try {
60
- const content = await fs.readFile(file, 'utf-8');
61
- const chunks = this.chunkMarkdown(content, file);
62
- for (const chunk of chunks) {
63
- yield chunk;
64
- }
65
- }
66
- catch (error) {
67
- throw new errors_js_1.IngestionError(`Failed to process file: ${error.message}`, file);
68
- }
69
- }
70
- }
71
- chunkMarkdown(content, filePath) {
72
- const frontmatter = this.extractFrontmatter(content);
73
- const contentWithoutFrontmatter = this.removeFrontmatter(content);
74
- const sections = this.parseHeadings(contentWithoutFrontmatter);
75
- const chunks = [];
76
- let chunkIndex = 0;
77
- for (const section of sections) {
78
- const hasCode = /```[\s\S]*?```/.test(section.content);
79
- const tokens = (0, chunking_js_1.estimateTokens)(section.content);
80
- if (tokens < this.options.minChunkSize && chunks.length > 0) {
81
- const lastChunk = chunks[chunks.length - 1];
82
- lastChunk.content += '\n\n' + section.content;
83
- if (hasCode) {
84
- lastChunk.metadata.hasCode = true;
85
- }
86
- continue;
87
- }
88
- let sectionChunks;
89
- if (tokens > this.options.maxChunkSize) {
90
- sectionChunks = (0, chunking_js_1.splitLargeChunk)(section.content, this.options.maxChunkSize);
91
- }
92
- else {
93
- sectionChunks = [section.content];
94
- }
95
- if (this.options.chunkOverlap > 0 && sectionChunks.length > 1) {
96
- sectionChunks = (0, chunking_js_1.applyOverlap)(sectionChunks, this.options.chunkOverlap);
97
- }
98
- for (let i = 0; i < sectionChunks.length; i++) {
99
- const chunkContent = sectionChunks[i];
100
- const chunkId = this.generateChunkId(filePath, chunkContent, chunkIndex);
101
- chunks.push({
102
- id: chunkId,
103
- content: chunkContent,
104
- metadata: {
105
- ...this.options.metadata,
106
- ...frontmatter,
107
- heading: section.heading,
108
- hasCode,
109
- source: path.basename(filePath),
110
- sourcePath: filePath,
111
- chunkIndex,
112
- totalChunks: sectionChunks.length,
113
- },
114
- });
115
- chunkIndex++;
116
- }
117
- }
118
- return chunks;
119
- }
120
- parseHeadings(content) {
121
- const lines = content.split('\n');
122
- const sections = [];
123
- const headingStack = [];
124
- let currentContent = [];
125
- for (const line of lines) {
126
- const headingMatch = line.match(/^(#{1,6})\s+(.+)$/);
127
- if (headingMatch) {
128
- if (currentContent.length > 0) {
129
- const headingPath = headingStack.map(h => h.text);
130
- sections.push({
131
- heading: headingPath.length > 0 ? [...headingPath] : [''],
132
- content: currentContent.join('\n').trim(),
133
- level: headingStack.length > 0 ? headingStack[headingStack.length - 1].level : 0,
134
- });
135
- currentContent = [];
136
- }
137
- const level = headingMatch[1].length;
138
- const text = headingMatch[2].trim();
139
- while (headingStack.length > 0 && headingStack[headingStack.length - 1].level >= level) {
140
- headingStack.pop();
141
- }
142
- headingStack.push({ level, text });
143
- currentContent.push(line);
144
- }
145
- else {
146
- currentContent.push(line);
147
- }
148
- }
149
- if (currentContent.length > 0) {
150
- const headingPath = headingStack.map(h => h.text);
151
- sections.push({
152
- heading: headingPath.length > 0 ? [...headingPath] : [''],
153
- content: currentContent.join('\n').trim(),
154
- level: headingStack.length > 0 ? headingStack[headingStack.length - 1].level : 0,
155
- });
156
- }
157
- return sections.filter(s => s.content.length > 0);
158
- }
159
- extractFrontmatter(content) {
160
- const frontmatterMatch = content.match(/^---\n([\s\S]*?)\n---/);
161
- if (!frontmatterMatch) {
162
- return {};
163
- }
164
- const frontmatterText = frontmatterMatch[1];
165
- const frontmatter = {};
166
- const lines = frontmatterText.split('\n');
167
- for (const line of lines) {
168
- const match = line.match(/^(\w+):\s*(.+)$/);
169
- if (match) {
170
- const key = match[1];
171
- let value = match[2].trim();
172
- if (value === 'true')
173
- value = true;
174
- else if (value === 'false')
175
- value = false;
176
- else if (!isNaN(Number(value)))
177
- value = Number(value);
178
- else if (typeof value === 'string' && value.startsWith('[') && value.endsWith(']')) {
179
- value = value.slice(1, -1).split(',').map((v) => v.trim());
180
- }
181
- frontmatter[key] = value;
182
- }
183
- }
184
- return frontmatter;
185
- }
186
- removeFrontmatter(content) {
187
- return content.replace(/^---\n[\s\S]*?\n---\n/, '');
188
- }
189
- generateChunkId(filePath, content, index) {
190
- const hash = crypto.createHash('md5').update(content).digest('hex').substring(0, 8);
191
- const filename = path.basename(filePath, path.extname(filePath));
192
- return `${this.options.namespace}:${filename}:${index}:${hash}`;
193
- }
194
- }
195
- exports.MarkdownSource = MarkdownSource;
196
- //# sourceMappingURL=markdown.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"markdown.js","sourceRoot":"","sources":["../../src/sources/markdown.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA,gDAAkC;AAClC,2CAA6B;AAC7B,+CAAiC;AACjC,yCAAiC;AAEjC,4CAA8C;AAC9C,sDAAqF;AAgBrF,MAAa,cAAc;IAIf;IAHF,OAAO,CAAkC;IAEjD,YACU,OAAe,EACvB,UAAiC,EAAE;QAD3B,YAAO,GAAP,OAAO,CAAQ;QAGvB,IAAI,CAAC,OAAO,GAAG;YACb,YAAY,EAAE,OAAO,CAAC,YAAY,IAAI,IAAI;YAC1C,YAAY,EAAE,OAAO,CAAC,YAAY,IAAI,GAAG;YACzC,YAAY,EAAE,OAAO,CAAC,YAAY,IAAI,GAAG;YACzC,SAAS,EAAE,OAAO,CAAC,SAAS,IAAI,UAAU;YAC1C,QAAQ,EAAE,OAAO,CAAC,QAAQ,IAAI,EAAE;SACjC,CAAC;IACJ,CAAC;IAED,KAAK,CAAC,CAAC,IAAI;QACT,MAAM,KAAK,GAAG,MAAM,IAAA,gBAAI,EAAC,IAAI,CAAC,OAAO,EAAE,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;QAE3D,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,IAAI,CAAC;gBACH,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;gBACjD,MAAM,MAAM,GAAG,IAAI,CAAC,aAAa,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;gBAEjD,KAAK,MAAM,KAAK,IAAI,MAAM,EAAE,CAAC;oBAC3B,MAAM,KAAK,CAAC;gBACd,CAAC;YACH,CAAC;YAAC,OAAO,KAAK,EAAE,CAAC;gBACf,MAAM,IAAI,0BAAc,CAAC,2BAA4B,KAAe,CAAC,OAAO,EAAE,EAAE,IAAI,CAAC,CAAC;YACxF,CAAC;QACH,CAAC;IACH,CAAC;IAEO,aAAa,CAAC,OAAe,EAAE,QAAgB;QACrD,MAAM,WAAW,GAAG,IAAI,CAAC,kBAAkB,CAAC,OAAO,CAAC,CAAC;QACrD,MAAM,yBAAyB,GAAG,IAAI,CAAC,iBAAiB,CAAC,OAAO,CAAC,CAAC;QAClE,MAAM,QAAQ,GAAG,IAAI,CAAC,aAAa,CAAC,yBAAyB,CAAC,CAAC;QAC/D,MAAM,MAAM,GAAY,EAAE,CAAC;QAE3B,IAAI,UAAU,GAAG,CAAC,CAAC;QAEnB,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC/B,MAAM,OAAO,GAAG,gBAAgB,CAAC,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;YACvD,MAAM,MAAM,GAAG,IAAA,4BAAc,EAAC,OAAO,CAAC,OAAO,CAAC,CAAC;YAE/C,IAAI,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,YAAY,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC5D,MAAM,SAAS,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;gBAC5C,SAAS,CAAC,OAAO,IAAI,MAAM,GAAG,OAAO,CAAC,OAAO,CAAC;gBAC9C,IAAI,OAAO,EAAE,CAAC;oBACZ,SAAS,CAAC,QAAQ,CAAC,OAAO,GAAG,IAAI,CAAC;gBACpC,CAAC;gBACD,SAAS;YACX,CAAC;YAED,IAAI,aAAuB,CAAC;YAC5B,IAAI,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,YAAY,EAAE,CAAC;gBACvC,aAAa,GAAG,IAAA,6BAAe,EAAC,OAAO,CAAC,OAAO,EAAE,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;YAC9E,CAAC;iBAAM,CAAC;gBACN,aAAa,GAAG,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;YACpC,CAAC;YAED,IAAI,IAAI,CAAC,OAAO,CAAC,YAAY,GAAG,CAAC,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAC9D,aAAa,GAAG,IAAA,0BAAY,EAAC,aAAa,EAAE,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;YACzE,CAAC;YAED,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,aAAa,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC9C,MAAM,YAAY,GAAG,aAAa,CAAC,CAAC,CAAC,CAAC;gBACtC,MAAM,OAAO,GAAG,IAAI,CAAC,eAAe,CAAC,QAAQ,EAAE,YAAY,EAAE,UAAU,CAAC,CAAC;gBAEzE,MAAM,CAAC,IAAI,CAAC;oBACV,EAAE,EAAE,OAAO;oBACX,OAAO,EAAE,YAAY;oBACrB,QAAQ,EAAE;wBACR,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ;wBACxB,GAAG,WAAW;wBACd,OAAO,EAAE,OAAO,CAAC,OAAO;wBACxB,OAAO;wBACP,MAAM,EAAE,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC;wBAC/B,UAAU,EAAE,QAAQ;wBACpB,UAAU;wBACV,WAAW,EAAE,aAAa,CAAC,MAAM;qBAClC;iBACF,CAAC,CAAC;gBAEH,UAAU,EAAE,CAAC;YACf,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAEO,aAAa,CAAC,OAAe;QACnC,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAClC,MAAM,QAAQ,GAAc,EAAE,CAAC;QAC/B,MAAM,YAAY,GAAsC,EAAE,CAAC;QAC3D,IAAI,cAAc,GAAa,EAAE,CAAC;QAElC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,mBAAmB,CAAC,CAAC;YAErD,IAAI,YAAY,EAAE,CAAC;gBACjB,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAC9B,MAAM,WAAW,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;oBAClD,QAAQ,CAAC,IAAI,CAAC;wBACZ,OAAO,EAAE,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;wBACzD,OAAO,EAAE,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE;wBACzC,KAAK,EAAE,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;qBACjF,CAAC,CAAC;oBACH,cAAc,GAAG,EAAE,CAAC;gBACtB,CAAC;gBAED,MAAM,KAAK,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;gBACrC,MAAM,IAAI,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;gBAEpC,OAAO,YAAY,CAAC,MAAM,GAAG,CAAC,IAAI,YAAY,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,KAAK,IAAI,KAAK,EAAE,CAAC;oBACvF,YAAY,CAAC,GAAG,EAAE,CAAC;gBACrB,CAAC;gBAED,YAAY,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,IAAI,EAAE,CAAC,CAAC;gBACnC,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC5B,CAAC;iBAAM,CAAC;gBACN,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAC5B,CAAC;QACH,CAAC;QAED,IAAI,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC9B,MAAM,WAAW,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YAClD,QAAQ,CAAC,IAAI,CAAC;gBACZ,OAAO,EAAE,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,GAAG,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;gBACzD,OAAO,EAAE,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE;gBACzC,KAAK,EAAE,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;aACjF,CAAC,CAAC;QACL,CAAC;QAED,OAAO,QAAQ,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACpD,CAAC;IAEO,kBAAkB,CAAC,OAAe;QACxC,MAAM,gBAAgB,GAAG,OAAO,CAAC,KAAK,CAAC,uBAAuB,CAAC,CAAC;QAChE,IAAI,CAAC,gBAAgB,EAAE,CAAC;YACtB,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,MAAM,eAAe,GAAG,gBAAgB,CAAC,CAAC,CAAC,CAAC;QAC5C,MAAM,WAAW,GAA4B,EAAE,CAAC;QAEhD,MAAM,KAAK,GAAG,eAAe,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC1C,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,iBAAiB,CAAC,CAAC;YAC5C,IAAI,KAAK,EAAE,CAAC;gBACV,MAAM,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;gBACrB,IAAI,KAAK,GAAY,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;gBAErC,IAAI,KAAK,KAAK,MAAM;oBAAE,KAAK,GAAG,IAAI,CAAC;qBAC9B,IAAI,KAAK,KAAK,OAAO;oBAAE,KAAK,GAAG,KAAK,CAAC;qBACrC,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;oBAAE,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC;qBACjD,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,KAAK,CAAC,QAAQ,CAAC,GAAG,CAAC,EAAE,CAAC;oBACnF,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAS,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;gBACrE,CAAC;gBAED,WAAW,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;YAC3B,CAAC;QACH,CAAC;QAED,OAAO,WAAW,CAAC;IACrB,CAAC;IAEO,iBAAiB,CAAC,OAAe;QACvC,OAAO,OAAO,CAAC,OAAO,CAAC,uBAAuB,EAAE,EAAE,CAAC,CAAC;IACtD,CAAC;IAEO,eAAe,CAAC,QAAgB,EAAE,OAAe,EAAE,KAAa;QACtE,MAAM,IAAI,GAAG,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,SAAS,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC;QACpF,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,CAAC;QACjE,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,SAAS,IAAI,QAAQ,IAAI,KAAK,IAAI,IAAI,EAAE,CAAC;IAClE,CAAC;CACF;AAhLD,wCAgLC"}
@@ -1,6 +0,0 @@
1
- export declare function estimateTokens(text: string): number;
2
- export declare function splitByParagraphs(text: string, maxTokens: number): string[];
3
- export declare function splitBySentences(text: string, maxTokens: number): string[];
4
- export declare function applyOverlap(chunks: string[], overlapTokens: number): string[];
5
- export declare function splitLargeChunk(text: string, maxTokens: number): string[];
6
- //# sourceMappingURL=chunking.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"chunking.d.ts","sourceRoot":"","sources":["../../src/utils/chunking.ts"],"names":[],"mappings":"AAAA,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAEnD;AAED,wBAAgB,iBAAiB,CAAC,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,EAAE,CAsB3E;AAED,wBAAgB,gBAAgB,CAAC,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,EAAE,CAsB1E;AAED,wBAAgB,YAAY,CAAC,MAAM,EAAE,MAAM,EAAE,EAAE,aAAa,EAAE,MAAM,GAAG,MAAM,EAAE,CAsB9E;AAED,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM,EAAE,CAmBzE"}
@@ -1,86 +0,0 @@
1
- "use strict";
2
- Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.estimateTokens = estimateTokens;
4
- exports.splitByParagraphs = splitByParagraphs;
5
- exports.splitBySentences = splitBySentences;
6
- exports.applyOverlap = applyOverlap;
7
- exports.splitLargeChunk = splitLargeChunk;
8
- function estimateTokens(text) {
9
- return Math.ceil(text.length / 4);
10
- }
11
- function splitByParagraphs(text, maxTokens) {
12
- const paragraphs = text.split(/\n\n+/);
13
- const chunks = [];
14
- let currentChunk = '';
15
- for (const paragraph of paragraphs) {
16
- const paragraphTokens = estimateTokens(paragraph);
17
- const currentTokens = estimateTokens(currentChunk);
18
- if (currentTokens + paragraphTokens > maxTokens && currentChunk) {
19
- chunks.push(currentChunk.trim());
20
- currentChunk = paragraph;
21
- }
22
- else {
23
- currentChunk += (currentChunk ? '\n\n' : '') + paragraph;
24
- }
25
- }
26
- if (currentChunk) {
27
- chunks.push(currentChunk.trim());
28
- }
29
- return chunks;
30
- }
31
- function splitBySentences(text, maxTokens) {
32
- const sentences = text.match(/[^.!?]+[.!?]+/g) || [text];
33
- const chunks = [];
34
- let currentChunk = '';
35
- for (const sentence of sentences) {
36
- const sentenceTokens = estimateTokens(sentence);
37
- const currentTokens = estimateTokens(currentChunk);
38
- if (currentTokens + sentenceTokens > maxTokens && currentChunk) {
39
- chunks.push(currentChunk.trim());
40
- currentChunk = sentence;
41
- }
42
- else {
43
- currentChunk += (currentChunk ? ' ' : '') + sentence;
44
- }
45
- }
46
- if (currentChunk) {
47
- chunks.push(currentChunk.trim());
48
- }
49
- return chunks;
50
- }
51
- function applyOverlap(chunks, overlapTokens) {
52
- if (chunks.length <= 1 || overlapTokens === 0) {
53
- return chunks;
54
- }
55
- const overlappedChunks = [];
56
- for (let i = 0; i < chunks.length; i++) {
57
- let chunk = chunks[i];
58
- if (i > 0) {
59
- const prevChunk = chunks[i - 1];
60
- const words = prevChunk.split(/\s+/);
61
- const overlapWords = Math.ceil(overlapTokens / 4);
62
- const overlap = words.slice(-overlapWords).join(' ');
63
- chunk = overlap + ' ' + chunk;
64
- }
65
- overlappedChunks.push(chunk);
66
- }
67
- return overlappedChunks;
68
- }
69
- function splitLargeChunk(text, maxTokens) {
70
- const tokens = estimateTokens(text);
71
- if (tokens <= maxTokens) {
72
- return [text];
73
- }
74
- const paragraphChunks = splitByParagraphs(text, maxTokens);
75
- const finalChunks = [];
76
- for (const chunk of paragraphChunks) {
77
- if (estimateTokens(chunk) > maxTokens) {
78
- finalChunks.push(...splitBySentences(chunk, maxTokens));
79
- }
80
- else {
81
- finalChunks.push(chunk);
82
- }
83
- }
84
- return finalChunks;
85
- }
86
- //# sourceMappingURL=chunking.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"chunking.js","sourceRoot":"","sources":["../../src/utils/chunking.ts"],"names":[],"mappings":";;AAAA,wCAEC;AAED,8CAsBC;AAED,4CAsBC;AAED,oCAsBC;AAED,0CAmBC;AA/FD,SAAgB,cAAc,CAAC,IAAY;IACzC,OAAO,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;AACpC,CAAC;AAED,SAAgB,iBAAiB,CAAC,IAAY,EAAE,SAAiB;IAC/D,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IACvC,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,YAAY,GAAG,EAAE,CAAC;IAEtB,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;QACnC,MAAM,eAAe,GAAG,cAAc,CAAC,SAAS,CAAC,CAAC;QAClD,MAAM,aAAa,GAAG,cAAc,CAAC,YAAY,CAAC,CAAC;QAEnD,IAAI,aAAa,GAAG,eAAe,GAAG,SAAS,IAAI,YAAY,EAAE,CAAC;YAChE,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC;YACjC,YAAY,GAAG,SAAS,CAAC;QAC3B,CAAC;aAAM,CAAC;YACN,YAAY,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC;QAC3D,CAAC;IACH,CAAC;IAED,IAAI,YAAY,EAAE,CAAC;QACjB,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC;IACnC,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAgB,gBAAgB,CAAC,IAAY,EAAE,SAAiB;IAC9D,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACzD,MAAM,MAAM,GAAa,EAAE,CAAC;IAC5B,IAAI,YAAY,GAAG,EAAE,CAAC;IAEtB,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;QACjC,MAAM,cAAc,GAAG,cAAc,CAAC,QAAQ,CAAC,CAAC;QAChD,MAAM,aAAa,GAAG,cAAc,CAAC,YAAY,CAAC,CAAC;QAEnD,IAAI,aAAa,GAAG,cAAc,GAAG,SAAS,IAAI,YAAY,EAAE,CAAC;YAC/D,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC;YACjC,YAAY,GAAG,QAAQ,CAAC;QAC1B,CAAC;aAAM,CAAC;YACN,YAAY,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC;QACvD,CAAC;IACH,CAAC;IAED,IAAI,YAAY,EAAE,CAAC;QACjB,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC;IACnC,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC;AAED,SAAgB,YAAY,CAAC,MAAgB,EAAE,aAAqB;IAClE,IAAI,MAAM,CAAC,MAAM,IAAI,CAAC,IAAI,aAAa,KAAK,CAAC,EAAE,CAAC;QAC9C,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,MAAM,gBAAgB,GAAa,EAAE,CAAC;IAEtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QACvC,IAAI,KAAK,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;QAEtB,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;YACV,MAAM,SAAS,GAAG,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YAChC,MAAM,KAAK,GAAG,SAAS,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YACrC,MAAM,YAAY,GAAG,IAAI,CAAC,IAAI,CAAC,aAAa,GAAG,CAAC,CAAC,CAAC;YAClD,MAAM,OAAO,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACrD,KAAK,GAAG,OAAO,GAAG,GAAG,GAAG,KAAK,CAAC;QAChC,CAAC;QAED,gBAAgB,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC/B,CAAC;IAED,OAAO,gBAAgB,CAAC;AAC1B,CAAC;AAED,SAAgB,eAAe,CAAC,IAAY,EAAE,SAAiB;IAC7D,MAAM,MAAM,GAAG,cAAc,CAAC,IAAI,CAAC,CAAC;IAEpC,IAAI,MAAM,IAAI,SAAS,EAAE,CAAC;QACxB,OAAO,CAAC,IAAI,CAAC,CAAC;IAChB,CAAC;IAED,MAAM,eAAe,GAAG,iBAAiB,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;IAE3D,MAAM,WAAW,GAAa,EAAE,CAAC;IACjC,KAAK,MAAM,KAAK,IAAI,eAAe,EAAE,CAAC;QACpC,IAAI,cAAc,CAAC,KAAK,CAAC,GAAG,SAAS,EAAE,CAAC;YACtC,WAAW,CAAC,IAAI,CAAC,GAAG,gBAAgB,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC,CAAC;QAC1D,CAAC;aAAM,CAAC;YACN,WAAW,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC1B,CAAC;IACH,CAAC;IAED,OAAO,WAAW,CAAC;AACrB,CAAC"}
@@ -1,4 +0,0 @@
1
- import { MetadataFilter } from '../interfaces.js';
2
- export declare function cosineSimilarity(a: number[], b: number[]): number;
3
- export declare function matchesFilter(metadata: Record<string, unknown>, filter?: MetadataFilter): boolean;
4
- //# sourceMappingURL=cosine.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"cosine.d.ts","sourceRoot":"","sources":["../../src/utils/cosine.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AAElD,wBAAgB,gBAAgB,CAAC,CAAC,EAAE,MAAM,EAAE,EAAE,CAAC,EAAE,MAAM,EAAE,GAAG,MAAM,CAsBjE;AAED,wBAAgB,aAAa,CAAC,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,EAAE,MAAM,CAAC,EAAE,cAAc,GAAG,OAAO,CAuBjG"}