@dcyfr/ai-rag 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +588 -0
  3. package/dist/index.d.ts +8 -0
  4. package/dist/index.d.ts.map +1 -0
  5. package/dist/index.js +10 -0
  6. package/dist/index.js.map +1 -0
  7. package/dist/loaders/html/index.d.ts +26 -0
  8. package/dist/loaders/html/index.d.ts.map +1 -0
  9. package/dist/loaders/html/index.js +106 -0
  10. package/dist/loaders/html/index.js.map +1 -0
  11. package/dist/loaders/index.d.ts +8 -0
  12. package/dist/loaders/index.d.ts.map +1 -0
  13. package/dist/loaders/index.js +7 -0
  14. package/dist/loaders/index.js.map +1 -0
  15. package/dist/loaders/markdown/index.d.ts +33 -0
  16. package/dist/loaders/markdown/index.d.ts.map +1 -0
  17. package/dist/loaders/markdown/index.js +150 -0
  18. package/dist/loaders/markdown/index.js.map +1 -0
  19. package/dist/loaders/text/index.d.ts +21 -0
  20. package/dist/loaders/text/index.d.ts.map +1 -0
  21. package/dist/loaders/text/index.js +78 -0
  22. package/dist/loaders/text/index.js.map +1 -0
  23. package/dist/pipeline/embedding/generator.d.ts +24 -0
  24. package/dist/pipeline/embedding/generator.d.ts.map +1 -0
  25. package/dist/pipeline/embedding/generator.js +42 -0
  26. package/dist/pipeline/embedding/generator.js.map +1 -0
  27. package/dist/pipeline/embedding/index.d.ts +8 -0
  28. package/dist/pipeline/embedding/index.d.ts.map +1 -0
  29. package/dist/pipeline/embedding/index.js +6 -0
  30. package/dist/pipeline/embedding/index.js.map +1 -0
  31. package/dist/pipeline/embedding/pipeline.d.ts +26 -0
  32. package/dist/pipeline/embedding/pipeline.d.ts.map +1 -0
  33. package/dist/pipeline/embedding/pipeline.js +59 -0
  34. package/dist/pipeline/embedding/pipeline.js.map +1 -0
  35. package/dist/pipeline/index.d.ts +7 -0
  36. package/dist/pipeline/index.d.ts.map +1 -0
  37. package/dist/pipeline/index.js +7 -0
  38. package/dist/pipeline/index.js.map +1 -0
  39. package/dist/pipeline/ingestion/index.d.ts +5 -0
  40. package/dist/pipeline/ingestion/index.d.ts.map +1 -0
  41. package/dist/pipeline/ingestion/index.js +5 -0
  42. package/dist/pipeline/ingestion/index.js.map +1 -0
  43. package/dist/pipeline/ingestion/pipeline.d.ts +27 -0
  44. package/dist/pipeline/ingestion/pipeline.d.ts.map +1 -0
  45. package/dist/pipeline/ingestion/pipeline.js +118 -0
  46. package/dist/pipeline/ingestion/pipeline.js.map +1 -0
  47. package/dist/pipeline/retrieval/index.d.ts +5 -0
  48. package/dist/pipeline/retrieval/index.d.ts.map +1 -0
  49. package/dist/pipeline/retrieval/index.js +5 -0
  50. package/dist/pipeline/retrieval/index.js.map +1 -0
  51. package/dist/pipeline/retrieval/pipeline.d.ts +29 -0
  52. package/dist/pipeline/retrieval/pipeline.d.ts.map +1 -0
  53. package/dist/pipeline/retrieval/pipeline.js +109 -0
  54. package/dist/pipeline/retrieval/pipeline.js.map +1 -0
  55. package/dist/stores/index.d.ts +5 -0
  56. package/dist/stores/index.d.ts.map +1 -0
  57. package/dist/stores/index.js +5 -0
  58. package/dist/stores/index.js.map +1 -0
  59. package/dist/stores/vector/in-memory.d.ts +52 -0
  60. package/dist/stores/vector/in-memory.d.ts.map +1 -0
  61. package/dist/stores/vector/in-memory.js +172 -0
  62. package/dist/stores/vector/in-memory.js.map +1 -0
  63. package/dist/stores/vector/index.d.ts +6 -0
  64. package/dist/stores/vector/index.d.ts.map +1 -0
  65. package/dist/stores/vector/index.js +5 -0
  66. package/dist/stores/vector/index.js.map +1 -0
  67. package/dist/types/index.d.ts +259 -0
  68. package/dist/types/index.d.ts.map +1 -0
  69. package/dist/types/index.js +5 -0
  70. package/dist/types/index.js.map +1 -0
  71. package/docs/DOCUMENT_LOADERS.md +621 -0
  72. package/docs/EMBEDDINGS.md +733 -0
  73. package/docs/PIPELINES.md +771 -0
  74. package/docs/VECTOR_STORES.md +754 -0
  75. package/package.json +100 -0
@@ -0,0 +1,106 @@
1
+ /**
2
+ * HTML document loader
3
+ * Handles HTML files (.html, .htm)
4
+ */
5
+ import { promises as fs } from 'node:fs';
6
+ import { basename } from 'node:path';
7
+ /**
8
+ * Load HTML documents
9
+ */
10
+ export class HTMLLoader {
11
+ supportedExtensions = ['.html', '.htm'];
12
+ async load(source, config) {
13
+ try {
14
+ const content = await fs.readFile(source, 'utf-8');
15
+ const stats = await fs.stat(source);
16
+ // Extract title from <title> tag if present
17
+ const titleMatch = content.match(/<title>(.*?)<\/title>/i);
18
+ const title = titleMatch ? titleMatch[1] : basename(source);
19
+ // Extract text content
20
+ const textContent = config?.preserveFormatting
21
+ ? content
22
+ : this.extractText(content);
23
+ const document = {
24
+ id: this.generateId(source),
25
+ content: textContent,
26
+ metadata: {
27
+ source,
28
+ type: 'html',
29
+ createdAt: stats.birthtime,
30
+ updatedAt: stats.mtime,
31
+ title,
32
+ ...config?.metadata,
33
+ },
34
+ };
35
+ // Apply chunking if configured
36
+ if (config?.chunkSize) {
37
+ return this.chunkDocument(document, config);
38
+ }
39
+ return [document];
40
+ }
41
+ catch (error) {
42
+ throw new Error(`Failed to load HTML file ${source}: ${error}`);
43
+ }
44
+ }
45
+ /**
46
+ * Extract text content from HTML
47
+ * This is a simple implementation - for production use a proper HTML parser
48
+ */
49
+ extractText(html) {
50
+ return html
51
+ // Remove script and style tags
52
+ .replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi, '')
53
+ .replace(/<style\b[^<]*(?:(?!<\/style>)<[^<]*)*<\/style>/gi, '')
54
+ // Remove HTML comments
55
+ .replace(/<!--[\s\S]*?-->/g, '')
56
+ // Remove HTML tags
57
+ .replace(/<[^>]+>/g, ' ')
58
+ // Decode HTML entities (basic)
59
+ .replace(/&nbsp;/g, ' ')
60
+ .replace(/&lt;/g, '<')
61
+ .replace(/&gt;/g, '>')
62
+ .replace(/&amp;/g, '&')
63
+ .replace(/&quot;/g, '"')
64
+ .replace(/&#39;/g, "'")
65
+ // Clean up whitespace
66
+ .replace(/\s+/g, ' ')
67
+ .replace(/\n{3,}/g, '\n\n')
68
+ .trim();
69
+ }
70
+ /**
71
+ * Split document into chunks
72
+ */
73
+ chunkDocument(document, config) {
74
+ const chunkSize = config.chunkSize ?? 1000;
75
+ const chunkOverlap = config.chunkOverlap ?? 200;
76
+ const content = document.content;
77
+ const chunks = [];
78
+ let start = 0;
79
+ let chunkIndex = 0;
80
+ while (start < content.length) {
81
+ const end = Math.min(start + chunkSize, content.length);
82
+ const chunkContent = content.slice(start, end);
83
+ chunks.push({
84
+ id: `${document.id}-chunk-${chunkIndex}`,
85
+ content: chunkContent,
86
+ metadata: {
87
+ ...document.metadata,
88
+ chunkIndex,
89
+ startChar: start,
90
+ endChar: end,
91
+ parentDocumentId: document.id,
92
+ },
93
+ });
94
+ start += chunkSize - chunkOverlap;
95
+ chunkIndex++;
96
+ }
97
+ return chunks;
98
+ }
99
+ /**
100
+ * Generate document ID from source
101
+ */
102
+ generateId(source) {
103
+ return `html-${Buffer.from(source).toString('base64').slice(0, 16)}`;
104
+ }
105
+ }
106
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/loaders/html/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,EAAE,QAAQ,IAAI,EAAE,EAAE,MAAM,SAAS,CAAC;AACzC,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAErC;;GAEG;AACH,MAAM,OAAO,UAAU;IACrB,mBAAmB,GAAG,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;IAExC,KAAK,CAAC,IAAI,CAAC,MAAc,EAAE,MAAqB;QAC9C,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;YACnD,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAEpC,4CAA4C;YAC5C,MAAM,UAAU,GAAG,OAAO,CAAC,KAAK,CAAC,wBAAwB,CAAC,CAAC;YAC3D,MAAM,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YAE5D,uBAAuB;YACvB,MAAM,WAAW,GAAG,MAAM,EAAE,kBAAkB;gBAC5C,CAAC,CAAC,OAAO;gBACT,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;YAE9B,MAAM,QAAQ,GAAa;gBACzB,EAAE,EAAE,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC;gBAC3B,OAAO,EAAE,WAAW;gBACpB,QAAQ,EAAE;oBACR,MAAM;oBACN,IAAI,EAAE,MAAM;oBACZ,SAAS,EAAE,KAAK,CAAC,SAAS;oBAC1B,SAAS,EAAE,KAAK,CAAC,KAAK;oBACtB,KAAK;oBACL,GAAG,MAAM,EAAE,QAAQ;iBACpB;aACF,CAAC;YAEF,+BAA+B;YAC/B,IAAI,MAAM,EAAE,SAAS,EAAE,CAAC;gBACtB,OAAO,IAAI,CAAC,aAAa,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;YAC9C,CAAC;YAED,OAAO,CAAC,QAAQ,CAAC,CAAC;QACpB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,4BAA4B,MAAM,KAAK,KAAK,EAAE,CAAC,CAAC;QAClE,CAAC;IACH,CAAC;IAED;;;OAGG;IACK,WAAW,CAAC,IAAY;QAC9B,OAAO,IAAI;YACT,+BAA+B;aAC9B,OAAO,CAAC,qDAAqD,EAAE,EAAE,CAAC;aAClE,OAAO,CAAC,kDAAkD,EAAE,EAAE,CAAC;YAChE,uBAAuB;aACtB,OAAO,CAAC,kBAAkB,EAAE,EAAE,CAAC;YAChC,mBAAmB;aAClB,OAAO,CAAC,UAAU,EAAE,GAAG,CAAC;YACzB,+BAA+B;aAC9B,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;aACvB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;aACrB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;aACrB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;aACtB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;aACvB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;YACvB,sBAAsB;aACrB,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC;aACpB,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC;aAC1B,IAAI,EAAE,CAAC;IACZ,CAAC;IAED;;OAEG;IACK,aAAa,CAAC,QAAkB,EAAE,MAAoB;QAC5D,MAAM,SAAS,GAAG,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC;QAC3C,MAAM,YAAY,GAAG,MAAM,CAAC,YAAY,IAAI,GAAG,CAAC;QAChD,MAAM,OAAO,GAAG,QAAQ,CAAC,OAAO,CAAC;QACjC,MAAM,MAAM,GAAe,EAAE,CAAC;QAE9B,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,IAAI,UAAU,GAAG,CAAC,CAAC;QAEnB,OAAO,KAAK,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;YAC9B,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,GAAG,SAAS,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;YACxD,MAAM,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;YAE/C,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,GAAG,QAAQ,CAAC,EAAE,UAAU,UAAU,EAAE;gBACxC,OAAO,EAAE,YAAY;gBACrB,QAAQ,EAAE;oBACR,GAAG,QAAQ,CAAC,QAAQ;oBACpB,UAAU;oBACV,SAAS,EAAE,KAAK;oBAChB,OAAO,EAAE,GAAG;oBACZ,gBAAgB,EAAE,QAAQ,CAAC,EAAE;iBAC9B;aACF,CAAC,CAAC;YAEH,KAAK,IAAI,SAAS,GAAG,YAAY,CAAC;YAClC,UAAU,EAAE,CAAC;QACf,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,UAAU,CAAC,MAAc;QAC/B,OAAO,QAAQ,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC;IACvE,CAAC;CACF"}
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Document loaders
3
+ */
4
+ export { TextLoader } from './text/index.js';
5
+ export { MarkdownLoader } from './markdown/index.js';
6
+ export { HTMLLoader } from './html/index.js';
7
+ export type { Document, DocumentLoader, LoaderConfig } from '../types/index.js';
8
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/loaders/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAG7C,YAAY,EAAE,QAAQ,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,mBAAmB,CAAC"}
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Document loaders
3
+ */
4
+ export { TextLoader } from './text/index.js';
5
+ export { MarkdownLoader } from './markdown/index.js';
6
+ export { HTMLLoader } from './html/index.js';
7
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/loaders/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAC7C,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC"}
@@ -0,0 +1,33 @@
1
+ /**
2
+ * Markdown document loader
3
+ * Handles Markdown files (.md, .markdown)
4
+ */
5
+ import type { Document, DocumentLoader, LoaderConfig } from '../../types/index.js';
6
+ /**
7
+ * Load Markdown documents
8
+ */
9
+ export declare class MarkdownLoader implements DocumentLoader {
10
+ supportedExtensions: string[];
11
+ load(source: string, config?: LoaderConfig): Promise<Document[]>;
12
+ /**
13
+ * Remove markdown formatting for pure text
14
+ */
15
+ private removeFormatting;
16
+ /**
17
+ * Chunk document by sections (headings)
18
+ */
19
+ private chunkBySection;
20
+ /**
21
+ * Split content by headings
22
+ */
23
+ private splitByHeadings;
24
+ /**
25
+ * Chunk text by size with overlap
26
+ */
27
+ private chunkText;
28
+ /**
29
+ * Generate document ID from source
30
+ */
31
+ private generateId;
32
+ }
33
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/loaders/markdown/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAInF;;GAEG;AACH,qBAAa,cAAe,YAAW,cAAc;IACnD,mBAAmB,WAAwB;IAErC,IAAI,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IAiCtE;;OAEG;IACH,OAAO,CAAC,gBAAgB;IAsBxB;;OAEG;IACH,OAAO,CAAC,cAAc;IAsCtB;;OAEG;IACH,OAAO,CAAC,eAAe;IAwBvB;;OAEG;IACH,OAAO,CAAC,SAAS;IAejB;;OAEG;IACH,OAAO,CAAC,UAAU;CAGnB"}
@@ -0,0 +1,150 @@
1
+ /**
2
+ * Markdown document loader
3
+ * Handles Markdown files (.md, .markdown)
4
+ */
5
+ import { promises as fs } from 'node:fs';
6
+ import { basename } from 'node:path';
7
+ /**
8
+ * Load Markdown documents
9
+ */
10
+ export class MarkdownLoader {
11
+ supportedExtensions = ['.md', '.markdown'];
12
+ async load(source, config) {
13
+ try {
14
+ const content = await fs.readFile(source, 'utf-8');
15
+ const stats = await fs.stat(source);
16
+ // Extract title from first heading if present
17
+ const titleMatch = content.match(/^#\s+(.+)$/m);
18
+ const title = titleMatch ? titleMatch[1] : basename(source);
19
+ const document = {
20
+ id: this.generateId(source),
21
+ content: config?.preserveFormatting ? content : this.removeFormatting(content),
22
+ metadata: {
23
+ source,
24
+ type: 'markdown',
25
+ createdAt: stats.birthtime,
26
+ updatedAt: stats.mtime,
27
+ title,
28
+ ...config?.metadata,
29
+ },
30
+ };
31
+ // Apply chunking if configured
32
+ if (config?.chunkSize) {
33
+ return this.chunkBySection(document, config);
34
+ }
35
+ return [document];
36
+ }
37
+ catch (error) {
38
+ throw new Error(`Failed to load markdown file ${source}: ${error}`);
39
+ }
40
+ }
41
+ /**
42
+ * Remove markdown formatting for pure text
43
+ */
44
+ removeFormatting(content) {
45
+ return content
46
+ // Remove code blocks
47
+ .replace(/```[\s\S]*?```/g, '')
48
+ // Remove inline code
49
+ .replace(/`([^`]+)`/g, '$1')
50
+ // Remove bold/italic
51
+ .replace(/(\*\*|__)(.*?)\1/g, '$2')
52
+ .replace(/(\*|_)(.*?)\1/g, '$2')
53
+ // Remove links but keep text
54
+ .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')
55
+ // Remove images
56
+ .replace(/!\[([^\]]*)\]\([^)]+\)/g, '')
57
+ // Remove headings markers
58
+ .replace(/^#{1,6}\s+/gm, '')
59
+ // Remove horizontal rules
60
+ .replace(/^[-*_]{3,}$/gm, '')
61
+ // Clean up extra whitespace
62
+ .replace(/\n{3,}/g, '\n\n')
63
+ .trim();
64
+ }
65
+ /**
66
+ * Chunk document by sections (headings)
67
+ */
68
+ chunkBySection(document, config) {
69
+ const content = document.content;
70
+ const sections = this.splitByHeadings(content);
71
+ const chunks = [];
72
+ sections.forEach((section, index) => {
73
+ // Further chunk if section is too large
74
+ if (config.chunkSize && section.content.length > config.chunkSize) {
75
+ const subChunks = this.chunkText(section.content, config);
76
+ subChunks.forEach((chunk, subIndex) => {
77
+ chunks.push({
78
+ id: `${document.id}-${index}-${subIndex}`,
79
+ content: chunk,
80
+ metadata: {
81
+ ...document.metadata,
82
+ section: section.title,
83
+ chunkIndex: chunks.length,
84
+ parentDocumentId: document.id,
85
+ },
86
+ });
87
+ });
88
+ }
89
+ else {
90
+ chunks.push({
91
+ id: `${document.id}-${index}`,
92
+ content: section.content,
93
+ metadata: {
94
+ ...document.metadata,
95
+ section: section.title,
96
+ chunkIndex: index,
97
+ parentDocumentId: document.id,
98
+ },
99
+ });
100
+ }
101
+ });
102
+ return chunks;
103
+ }
104
+ /**
105
+ * Split content by headings
106
+ */
107
+ splitByHeadings(content) {
108
+ const sections = [];
109
+ const lines = content.split('\n');
110
+ let currentSection = { title: 'Introduction', content: '' };
111
+ for (const line of lines) {
112
+ const headingMatch = line.match(/^(#{1,6})\s+(.+)$/);
113
+ if (headingMatch) {
114
+ if (currentSection.content.trim()) {
115
+ sections.push(currentSection);
116
+ }
117
+ currentSection = { title: headingMatch[2], content: '' };
118
+ }
119
+ else {
120
+ currentSection.content += line + '\n';
121
+ }
122
+ }
123
+ if (currentSection.content.trim()) {
124
+ sections.push(currentSection);
125
+ }
126
+ return sections;
127
+ }
128
+ /**
129
+ * Chunk text by size with overlap
130
+ */
131
+ chunkText(text, config) {
132
+ const chunkSize = config.chunkSize ?? 1000;
133
+ const chunkOverlap = config.chunkOverlap ?? 200;
134
+ const chunks = [];
135
+ let start = 0;
136
+ while (start < text.length) {
137
+ const end = Math.min(start + chunkSize, text.length);
138
+ chunks.push(text.slice(start, end));
139
+ start += chunkSize - chunkOverlap;
140
+ }
141
+ return chunks;
142
+ }
143
+ /**
144
+ * Generate document ID from source
145
+ */
146
+ generateId(source) {
147
+ return `md-${Buffer.from(source).toString('base64').slice(0, 16)}`;
148
+ }
149
+ }
150
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/loaders/markdown/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,EAAE,QAAQ,IAAI,EAAE,EAAE,MAAM,SAAS,CAAC;AACzC,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAErC;;GAEG;AACH,MAAM,OAAO,cAAc;IACzB,mBAAmB,GAAG,CAAC,KAAK,EAAE,WAAW,CAAC,CAAC;IAE3C,KAAK,CAAC,IAAI,CAAC,MAAc,EAAE,MAAqB;QAC9C,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;YACnD,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAEpC,8CAA8C;YAC9C,MAAM,UAAU,GAAG,OAAO,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;YAChD,MAAM,KAAK,GAAG,UAAU,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;YAE5D,MAAM,QAAQ,GAAa;gBACzB,EAAE,EAAE,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC;gBAC3B,OAAO,EAAE,MAAM,EAAE,kBAAkB,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,gBAAgB,CAAC,OAAO,CAAC;gBAC9E,QAAQ,EAAE;oBACR,MAAM;oBACN,IAAI,EAAE,UAAU;oBAChB,SAAS,EAAE,KAAK,CAAC,SAAS;oBAC1B,SAAS,EAAE,KAAK,CAAC,KAAK;oBACtB,KAAK;oBACL,GAAG,MAAM,EAAE,QAAQ;iBACpB;aACF,CAAC;YAEF,+BAA+B;YAC/B,IAAI,MAAM,EAAE,SAAS,EAAE,CAAC;gBACtB,OAAO,IAAI,CAAC,cAAc,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;YAC/C,CAAC;YAED,OAAO,CAAC,QAAQ,CAAC,CAAC;QACpB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,gCAAgC,MAAM,KAAK,KAAK,EAAE,CAAC,CAAC;QACtE,CAAC;IACH,CAAC;IAED;;OAEG;IACK,gBAAgB,CAAC,OAAe;QACtC,OAAO,OAAO;YACZ,qBAAqB;aACpB,OAAO,CAAC,iBAAiB,EAAE,EAAE,CAAC;YAC/B,qBAAqB;aACpB,OAAO,CAAC,YAAY,EAAE,IAAI,CAAC;YAC5B,qBAAqB;aACpB,OAAO,CAAC,mBAAmB,EAAE,IAAI,CAAC;aAClC,OAAO,CAAC,gBAAgB,EAAE,IAAI,CAAC;YAChC,6BAA6B;aAC5B,OAAO,CAAC,wBAAwB,EAAE,IAAI,CAAC;YACxC,gBAAgB;aACf,OAAO,CAAC,yBAAyB,EAAE,EAAE,CAAC;YACvC,0BAA0B;aACzB,OAAO,CAAC,cAAc,EAAE,EAAE,CAAC;YAC5B,0BAA0B;aACzB,OAAO,CAAC,eAAe,EAAE,EAAE,CAAC;YAC7B,4BAA4B;aAC3B,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC;aAC1B,IAAI,EAAE,CAAC;IACZ,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,QAAkB,EAAE,MAAoB;QAC7D,MAAM,OAAO,GAAG,QAAQ,CAAC,OAAO,CAAC;QACjC,MAAM,QAAQ,GAAG,IAAI,CAAC,eAAe,CAAC,OAAO,CAAC,CAAC;QAC/C,MAAM,MAAM,GAAe,EAAE,CAAC;QAE9B,QAAQ,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE,KAAK,EAAE,EAAE;YAClC,wCAAwC;YACxC,IAAI,MAAM,CAAC,SAAS,IAAI,OAAO,CAAC,OAAO,CAAC,MAAM,GAAG,MAAM,CAAC,SAAS,EAAE,CAAC;gBAClE,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,OAAO,EAAE,MAAM,CAAC,CAAC;gBAC1D,SAAS,CAAC,OAAO,CAAC,CAAC,KAAK,EAAE,QAAQ,EAAE,EAAE;oBACpC,MAAM,CAAC,IAAI,CAAC;wBACV,EAAE,EAAE,GAAG,QAAQ,CAAC,EAAE,IAAI,KAAK,IAAI,QAAQ,EAAE;wBACzC,OAAO,EAAE,KAAK;wBACd,QAAQ,EAAE;4BACR,GAAG,QAAQ,CAAC,QAAQ;4BACpB,OAAO,EAAE,OAAO,CAAC,KAAK;4BACtB,UAAU,EAAE,MAAM,CAAC,MAAM;4BACzB,gBAAgB,EAAE,QAAQ,CAAC,EAAE;yBAC9B;qBACF,CAAC,CAAC;gBACL,CAAC,CAAC,CAAC;YACL,CAAC;iBAAM,CAAC;gBACN,MAAM,CAAC,IAAI,CAAC;oBACV,EAAE,EAAE,GAAG,QAAQ,CAAC,EAAE,IAAI,KAAK,EAAE;oBAC7B,OAAO,EAAE,OAAO,CAAC,OAAO;oBACxB,QAAQ,EAAE;wBACR,GAAG,QAAQ,CAAC,QAAQ;wBACpB,OAAO,EAAE,OAAO,CAAC,KAAK;wBACtB,UAAU,EAAE,KAAK;wBACjB,gBAAgB,EAAE,QAAQ,CAAC,EAAE;qBAC9B;iBACF,CAAC,CAAC;YACL,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,eAAe,CAAC,OAAe;QACrC,MAAM,QAAQ,GAA8C,EAAE,CAAC;QAC/D,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAClC,IAAI,cAAc,GAAG,EAAE,KAAK,EAAE,cAAc,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC;QAE5D,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,YAAY,GAAG,IAAI,CAAC,KAAK,CAAC,mBAAmB,CAAC,CAAC;YACrD,IAAI,YAAY,EAAE,CAAC;gBACjB,IAAI,cAAc,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;oBAClC,QAAQ,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;gBAChC,CAAC;gBACD,cAAc,GAAG,EAAE,KAAK,EAAE,YAAY,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC;YAC3D,CAAC;iBAAM,CAAC;gBACN,cAAc,CAAC,OAAO,IAAI,IAAI,GAAG,IAAI,CAAC;YACxC,CAAC;QACH,CAAC;QAED,IAAI,cAAc,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;YAClC,QAAQ,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAChC,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED;;OAEG;IACK,SAAS,CAAC,IAAY,EAAE,MAAoB;QAClD,MAAM,SAAS,GAAG,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC;QAC3C,MAAM,YAAY,GAAG,MAAM,CAAC,YAAY,IAAI,GAAG,CAAC;QAChD,MAAM,MAAM,GAAa,EAAE,CAAC;QAC5B,IAAI,KAAK,GAAG,CAAC,CAAC;QAEd,OAAO,KAAK,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;YAC3B,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,GAAG,SAAS,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;YACrD,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,CAAC;YACpC,KAAK,IAAI,SAAS,GAAG,YAAY,CAAC;QACpC,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,UAAU,CAAC,MAAc;QAC/B,OAAO,MAAM,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC;IACrE,CAAC;CACF"}
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Text document loader
3
+ * Handles plain text files (.txt)
4
+ */
5
+ import type { Document, DocumentLoader, LoaderConfig } from '../../types/index.js';
6
+ /**
7
+ * Load plain text documents
8
+ */
9
+ export declare class TextLoader implements DocumentLoader {
10
+ supportedExtensions: string[];
11
+ load(source: string, config?: LoaderConfig): Promise<Document[]>;
12
+ /**
13
+ * Split document into chunks
14
+ */
15
+ private chunkDocument;
16
+ /**
17
+ * Generate document ID from source
18
+ */
19
+ private generateId;
20
+ }
21
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/loaders/text/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,cAAc,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAInF;;GAEG;AACH,qBAAa,UAAW,YAAW,cAAc;IAC/C,mBAAmB,WAAqB;IAElC,IAAI,CAAC,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,EAAE,YAAY,GAAG,OAAO,CAAC,QAAQ,EAAE,CAAC;IAkCtE;;OAEG;IACH,OAAO,CAAC,aAAa;IAgCrB;;OAEG;IACH,OAAO,CAAC,UAAU;CAGnB"}
@@ -0,0 +1,78 @@
1
+ /**
2
+ * Text document loader
3
+ * Handles plain text files (.txt)
4
+ */
5
+ import { promises as fs } from 'node:fs';
6
+ import { basename } from 'node:path';
7
+ /**
8
+ * Load plain text documents
9
+ */
10
+ export class TextLoader {
11
+ supportedExtensions = ['.txt', '.text'];
12
+ async load(source, config) {
13
+ try {
14
+ const content = await fs.readFile(source, 'utf-8');
15
+ const stats = await fs.stat(source);
16
+ const document = {
17
+ id: this.generateId(source),
18
+ content,
19
+ metadata: {
20
+ source,
21
+ type: 'text',
22
+ createdAt: stats.birthtime,
23
+ updatedAt: stats.mtime,
24
+ title: basename(source),
25
+ ...config?.metadata,
26
+ },
27
+ };
28
+ // Skip empty documents
29
+ if (!content.trim()) {
30
+ return [];
31
+ }
32
+ // Apply chunking if configured
33
+ if (config?.chunkSize) {
34
+ return this.chunkDocument(document, config);
35
+ }
36
+ return [document];
37
+ }
38
+ catch (error) {
39
+ throw new Error(`Failed to load text file ${source}: ${error}`);
40
+ }
41
+ }
42
+ /**
43
+ * Split document into chunks
44
+ */
45
+ chunkDocument(document, config) {
46
+ const chunkSize = config.chunkSize ?? 1000;
47
+ const chunkOverlap = Math.min(config.chunkOverlap ?? 200, chunkSize - 1);
48
+ const content = document.content;
49
+ const chunks = [];
50
+ let start = 0;
51
+ let chunkIndex = 0;
52
+ while (start < content.length) {
53
+ const end = Math.min(start + chunkSize, content.length);
54
+ const chunkContent = content.slice(start, end);
55
+ chunks.push({
56
+ id: `${document.id}-chunk-${chunkIndex}`,
57
+ content: chunkContent,
58
+ metadata: {
59
+ ...document.metadata,
60
+ chunkIndex,
61
+ startChar: start,
62
+ endChar: end,
63
+ parentDocumentId: document.id,
64
+ },
65
+ });
66
+ start += chunkSize - chunkOverlap;
67
+ chunkIndex++;
68
+ }
69
+ return chunks;
70
+ }
71
+ /**
72
+ * Generate document ID from source
73
+ */
74
+ generateId(source) {
75
+ return `text-${Buffer.from(source).toString('base64').slice(0, 16)}`;
76
+ }
77
+ }
78
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/loaders/text/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAGH,OAAO,EAAE,QAAQ,IAAI,EAAE,EAAE,MAAM,SAAS,CAAC;AACzC,OAAO,EAAE,QAAQ,EAAE,MAAM,WAAW,CAAC;AAErC;;GAEG;AACH,MAAM,OAAO,UAAU;IACrB,mBAAmB,GAAG,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAExC,KAAK,CAAC,IAAI,CAAC,MAAc,EAAE,MAAqB;QAC9C,IAAI,CAAC;YACH,MAAM,OAAO,GAAG,MAAM,EAAE,CAAC,QAAQ,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;YACnD,MAAM,KAAK,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YAEpC,MAAM,QAAQ,GAAa;gBACzB,EAAE,EAAE,IAAI,CAAC,UAAU,CAAC,MAAM,CAAC;gBAC3B,OAAO;gBACP,QAAQ,EAAE;oBACR,MAAM;oBACN,IAAI,EAAE,MAAM;oBACZ,SAAS,EAAE,KAAK,CAAC,SAAS;oBAC1B,SAAS,EAAE,KAAK,CAAC,KAAK;oBACtB,KAAK,EAAE,QAAQ,CAAC,MAAM,CAAC;oBACvB,GAAG,MAAM,EAAE,QAAQ;iBACpB;aACF,CAAC;YAEF,uBAAuB;YACvB,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;gBACpB,OAAO,EAAE,CAAC;YACZ,CAAC;YAED,+BAA+B;YAC/B,IAAI,MAAM,EAAE,SAAS,EAAE,CAAC;gBACtB,OAAO,IAAI,CAAC,aAAa,CAAC,QAAQ,EAAE,MAAM,CAAC,CAAC;YAC9C,CAAC;YAED,OAAO,CAAC,QAAQ,CAAC,CAAC;QACpB,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,IAAI,KAAK,CAAC,4BAA4B,MAAM,KAAK,KAAK,EAAE,CAAC,CAAC;QAClE,CAAC;IACH,CAAC;IAED;;OAEG;IACK,aAAa,CAAC,QAAkB,EAAE,MAAoB;QAC5D,MAAM,SAAS,GAAG,MAAM,CAAC,SAAS,IAAI,IAAI,CAAC;QAC3C,MAAM,YAAY,GAAG,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC,YAAY,IAAI,GAAG,EAAE,SAAS,GAAG,CAAC,CAAC,CAAC;QACzE,MAAM,OAAO,GAAG,QAAQ,CAAC,OAAO,CAAC;QACjC,MAAM,MAAM,GAAe,EAAE,CAAC;QAE9B,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,IAAI,UAAU,GAAG,CAAC,CAAC;QAEnB,OAAO,KAAK,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;YAC9B,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,GAAG,SAAS,EAAE,OAAO,CAAC,MAAM,CAAC,CAAC;YACxD,MAAM,YAAY,GAAG,OAAO,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC;YAE/C,MAAM,CAAC,IAAI,CAAC;gBACV,EAAE,EAAE,GAAG,QAAQ,CAAC,EAAE,UAAU,UAAU,EAAE;gBACxC,OAAO,EAAE,YAAY;gBACrB,QAAQ,EAAE;oBACR,GAAG,QAAQ,CAAC,QAAQ;oBACpB,UAAU;oBACV,SAAS,EAAE,KAAK;oBAChB,OAAO,EAAE,GAAG;oBACZ,gBAAgB,EAAE,QAAQ,CAAC,EAAE;iBAC9B;aACF,CAAC,CAAC;YAEH,KAAK,IAAI,SAAS,GAAG,YAAY,CAAC;YAClC,UAAU,EAAE,CAAC;QACf,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,UAAU,CAAC,MAAc;QAC/B,OAAO,QAAQ,MAAM,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC;IACvE,CAAC;CACF"}
@@ -0,0 +1,24 @@
1
+ /**
2
+ * Simple embedding generator (placeholder implementation)
3
+ * In production, integrate with OpenAI, Anthropic, or local models
4
+ */
5
+ import type { EmbeddingGenerator, EmbeddingConfig } from '../../types/index.js';
6
+ export interface SimpleEmbeddingOptions {
7
+ dimensions?: number;
8
+ }
9
+ /**
10
+ * Simple embedding generator using text hashing
11
+ * For demonstration purposes - use real embeddings in production
12
+ */
13
+ export declare class SimpleEmbeddingGenerator implements EmbeddingGenerator {
14
+ private dimensions;
15
+ constructor(options?: SimpleEmbeddingOptions);
16
+ embed(texts: string[], _config?: EmbeddingConfig): Promise<number[][]>;
17
+ getDimensions(): number;
18
+ /**
19
+ * Generate simple embedding using character-based hashing
20
+ * This is NOT a real embedding - use OpenAI/Anthropic/local models in production
21
+ */
22
+ private generateEmbedding;
23
+ }
24
+ //# sourceMappingURL=generator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"generator.d.ts","sourceRoot":"","sources":["../../../src/pipeline/embedding/generator.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAE,kBAAkB,EAAE,eAAe,EAAE,MAAM,sBAAsB,CAAC;AAEhF,MAAM,WAAW,sBAAsB;IACrC,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;;GAGG;AACH,qBAAa,wBAAyB,YAAW,kBAAkB;IACjE,OAAO,CAAC,UAAU,CAAS;gBAEf,OAAO,GAAE,sBAA2B;IAI1C,KAAK,CAAC,KAAK,EAAE,MAAM,EAAE,EAAE,OAAO,CAAC,EAAE,eAAe,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAI5E,aAAa,IAAI,MAAM;IAIvB;;;OAGG;IACH,OAAO,CAAC,iBAAiB;CAoB1B"}
@@ -0,0 +1,42 @@
1
+ /**
2
+ * Simple embedding generator (placeholder implementation)
3
+ * In production, integrate with OpenAI, Anthropic, or local models
4
+ */
5
+ /**
6
+ * Simple embedding generator using text hashing
7
+ * For demonstration purposes - use real embeddings in production
8
+ */
9
+ export class SimpleEmbeddingGenerator {
10
+ dimensions;
11
+ constructor(options = {}) {
12
+ this.dimensions = options.dimensions ?? 384; // Common embedding size
13
+ }
14
+ async embed(texts, _config) {
15
+ return texts.map((text) => this.generateEmbedding(text));
16
+ }
17
+ getDimensions() {
18
+ return this.dimensions;
19
+ }
20
+ /**
21
+ * Generate simple embedding using character-based hashing
22
+ * This is NOT a real embedding - use OpenAI/Anthropic/local models in production
23
+ */
24
+ generateEmbedding(text) {
25
+ const embedding = new Array(this.dimensions).fill(0);
26
+ // Simple character-based features
27
+ for (let i = 0; i < text.length; i++) {
28
+ const charCode = text.charCodeAt(i);
29
+ const index = charCode % this.dimensions;
30
+ embedding[index] += 1;
31
+ }
32
+ // Normalize vector
33
+ const magnitude = Math.sqrt(embedding.reduce((sum, val) => sum + val * val, 0));
34
+ if (magnitude > 0) {
35
+ for (let i = 0; i < embedding.length; i++) {
36
+ embedding[i] /= magnitude;
37
+ }
38
+ }
39
+ return embedding;
40
+ }
41
+ }
42
+ //# sourceMappingURL=generator.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"generator.js","sourceRoot":"","sources":["../../../src/pipeline/embedding/generator.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAQH;;;GAGG;AACH,MAAM,OAAO,wBAAwB;IAC3B,UAAU,CAAS;IAE3B,YAAY,UAAkC,EAAE;QAC9C,IAAI,CAAC,UAAU,GAAG,OAAO,CAAC,UAAU,IAAI,GAAG,CAAC,CAAC,wBAAwB;IACvE,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,KAAe,EAAE,OAAyB;QACpD,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC,CAAC;IAC3D,CAAC;IAED,aAAa;QACX,OAAO,IAAI,CAAC,UAAU,CAAC;IACzB,CAAC;IAED;;;OAGG;IACK,iBAAiB,CAAC,IAAY;QACpC,MAAM,SAAS,GAAG,IAAI,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAErD,kCAAkC;QAClC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACrC,MAAM,QAAQ,GAAG,IAAI,CAAC,UAAU,CAAC,CAAC,CAAC,CAAC;YACpC,MAAM,KAAK,GAAG,QAAQ,GAAG,IAAI,CAAC,UAAU,CAAC;YACzC,SAAS,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QACxB,CAAC;QAED,mBAAmB;QACnB,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,GAAG,EAAE,EAAE,CAAC,GAAG,GAAI,GAAG,GAAG,GAAG,EAAE,CAAC,CAAC,CAAC,CAAC;QACjF,IAAI,SAAS,GAAG,CAAC,EAAE,CAAC;YAClB,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBAC1C,SAAS,CAAC,CAAC,CAAC,IAAI,SAAS,CAAC;YAC5B,CAAC;QACH,CAAC;QAED,OAAO,SAAS,CAAC;IACnB,CAAC;CACF"}
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Embedding module exports
3
+ */
4
+ export { SimpleEmbeddingGenerator } from './generator.js';
5
+ export { EmbeddingPipeline } from './pipeline.js';
6
+ export type { SimpleEmbeddingOptions } from './generator.js';
7
+ export type { EmbeddingPipelineOptions } from './pipeline.js';
8
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/pipeline/embedding/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,wBAAwB,EAAE,MAAM,gBAAgB,CAAC;AAC1D,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC;AAElD,YAAY,EAAE,sBAAsB,EAAE,MAAM,gBAAgB,CAAC;AAC7D,YAAY,EAAE,wBAAwB,EAAE,MAAM,eAAe,CAAC"}
@@ -0,0 +1,6 @@
1
+ /**
2
+ * Embedding module exports
3
+ */
4
+ export { SimpleEmbeddingGenerator } from './generator.js';
5
+ export { EmbeddingPipeline } from './pipeline.js';
6
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/pipeline/embedding/index.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,EAAE,wBAAwB,EAAE,MAAM,gBAAgB,CAAC;AAC1D,OAAO,EAAE,iBAAiB,EAAE,MAAM,eAAe,CAAC"}
@@ -0,0 +1,26 @@
1
+ /**
2
+ * Embedding pipeline for processing documents
3
+ */
4
+ import type { Document, DocumentChunk, EmbeddingGenerator } from '../../types/index.js';
5
+ export interface EmbeddingPipelineOptions {
6
+ /** Batch size for embedding generation */
7
+ batchSize?: number;
8
+ /** Progress callback */
9
+ onProgress?: (processed: number, total: number) => void;
10
+ }
11
+ /**
12
+ * Pipeline for generating embeddings from documents
13
+ */
14
+ export declare class EmbeddingPipeline {
15
+ private embedder;
16
+ constructor(embedder: EmbeddingGenerator);
17
+ /**
18
+ * Process documents and generate embeddings
19
+ */
20
+ process(documents: Document[], options?: EmbeddingPipelineOptions): Promise<DocumentChunk[]>;
21
+ /**
22
+ * Generate embedding for a single query
23
+ */
24
+ embedQuery(query: string): Promise<number[]>;
25
+ }
26
+ //# sourceMappingURL=pipeline.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../../../src/pipeline/embedding/pipeline.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH,OAAO,KAAK,EAAE,QAAQ,EAAE,aAAa,EAAE,kBAAkB,EAAE,MAAM,sBAAsB,CAAC;AAExF,MAAM,WAAW,wBAAwB;IACvC,0CAA0C;IAC1C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,wBAAwB;IACxB,UAAU,CAAC,EAAE,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;CACzD;AAED;;GAEG;AACH,qBAAa,iBAAiB;IAChB,OAAO,CAAC,QAAQ;gBAAR,QAAQ,EAAE,kBAAkB;IAEhD;;OAEG;IACG,OAAO,CACX,SAAS,EAAE,QAAQ,EAAE,EACrB,OAAO,GAAE,wBAA6B,GACrC,OAAO,CAAC,aAAa,EAAE,CAAC;IA0C3B;;OAEG;IACG,UAAU,CAAC,KAAK,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;CAInD"}
@@ -0,0 +1,59 @@
1
+ /**
2
+ * Embedding pipeline for processing documents
3
+ */
4
+ /**
5
+ * Pipeline for generating embeddings from documents
6
+ */
7
+ export class EmbeddingPipeline {
8
+ embedder;
9
+ constructor(embedder) {
10
+ this.embedder = embedder;
11
+ }
12
+ /**
13
+ * Process documents and generate embeddings
14
+ */
15
+ async process(documents, options = {}) {
16
+ const batchSize = options.batchSize ?? 32;
17
+ const chunks = [];
18
+ // Convert documents to chunks
19
+ for (let i = 0; i < documents.length; i++) {
20
+ const doc = documents[i];
21
+ const chunk = {
22
+ id: doc.id,
23
+ documentId: doc.id,
24
+ content: doc.content,
25
+ index: i,
26
+ metadata: {
27
+ chunkIndex: i,
28
+ chunkCount: documents.length,
29
+ startChar: 0,
30
+ endChar: doc.content.length,
31
+ ...doc.metadata,
32
+ },
33
+ };
34
+ chunks.push(chunk);
35
+ }
36
+ // Generate embeddings in batches
37
+ for (let i = 0; i < chunks.length; i += batchSize) {
38
+ const batch = chunks.slice(i, i + batchSize);
39
+ const texts = batch.map((chunk) => chunk.content);
40
+ const embeddings = await this.embedder.embed(texts);
41
+ // Assign embeddings to chunks
42
+ for (let j = 0; j < batch.length; j++) {
43
+ batch[j].embedding = embeddings[j];
44
+ }
45
+ if (options.onProgress) {
46
+ options.onProgress(Math.min(i + batchSize, chunks.length), chunks.length);
47
+ }
48
+ }
49
+ return chunks;
50
+ }
51
+ /**
52
+ * Generate embedding for a single query
53
+ */
54
+ async embedQuery(query) {
55
+ const embeddings = await this.embedder.embed([query]);
56
+ return embeddings[0];
57
+ }
58
+ }
59
+ //# sourceMappingURL=pipeline.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"pipeline.js","sourceRoot":"","sources":["../../../src/pipeline/embedding/pipeline.ts"],"names":[],"mappings":"AAAA;;GAEG;AAWH;;GAEG;AACH,MAAM,OAAO,iBAAiB;IACR;IAApB,YAAoB,QAA4B;QAA5B,aAAQ,GAAR,QAAQ,CAAoB;IAAG,CAAC;IAEpD;;OAEG;IACH,KAAK,CAAC,OAAO,CACX,SAAqB,EACrB,UAAoC,EAAE;QAEtC,MAAM,SAAS,GAAG,OAAO,CAAC,SAAS,IAAI,EAAE,CAAC;QAC1C,MAAM,MAAM,GAAoB,EAAE,CAAC;QAEnC,8BAA8B;QAC9B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC1C,MAAM,GAAG,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;YACzB,MAAM,KAAK,GAAkB;gBAC3B,EAAE,EAAE,GAAG,CAAC,EAAE;gBACV,UAAU,EAAE,GAAG,CAAC,EAAE;gBAClB,OAAO,EAAE,GAAG,CAAC,OAAO;gBACpB,KAAK,EAAE,CAAC;gBACR,QAAQ,EAAE;oBACR,UAAU,EAAE,CAAC;oBACb,UAAU,EAAE,SAAS,CAAC,MAAM;oBAC5B,SAAS,EAAE,CAAC;oBACZ,OAAO,EAAE,GAAG,CAAC,OAAO,CAAC,MAAM;oBAC3B,GAAG,GAAG,CAAC,QAAQ;iBAChB;aACF,CAAC;YACF,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QACrB,CAAC;QAED,iCAAiC;QACjC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,EAAE,CAAC;YAClD,MAAM,KAAK,GAAG,MAAM,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC;YAC7C,MAAM,KAAK,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YAClD,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YAEpD,8BAA8B;YAC9B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;gBACtC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,GAAG,UAAU,CAAC,CAAC,CAAC,CAAC;YACrC,CAAC;YAED,IAAI,OAAO,CAAC,UAAU,EAAE,CAAC;gBACvB,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,GAAG,SAAS,EAAE,MAAM,CAAC,MAAM,CAAC,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;YAC5E,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,UAAU,CAAC,KAAa;QAC5B,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC;QACtD,OAAO,UAAU,CAAC,CAAC,CAAC,CAAC;IACvB,CAAC;CACF"}
@@ -0,0 +1,7 @@
1
+ /**
2
+ * Pipeline exports
3
+ */
4
+ export * from './embedding/index.js';
5
+ export * from './ingestion/index.js';
6
+ export * from './retrieval/index.js';
7
+ //# sourceMappingURL=index.d.ts.map