@hazeljs/rag 0.2.0-beta.59 → 0.2.0-beta.61

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (161) hide show
  1. package/dist/__tests__/graph/community-detector.test.d.ts +2 -0
  2. package/dist/__tests__/graph/community-detector.test.d.ts.map +1 -0
  3. package/dist/__tests__/graph/community-detector.test.js +87 -0
  4. package/dist/__tests__/graph/community-detector.test.js.map +1 -0
  5. package/dist/__tests__/graph/community-summarizer.test.d.ts +2 -0
  6. package/dist/__tests__/graph/community-summarizer.test.d.ts.map +1 -0
  7. package/dist/__tests__/graph/community-summarizer.test.js +131 -0
  8. package/dist/__tests__/graph/community-summarizer.test.js.map +1 -0
  9. package/dist/__tests__/graph/entity-extractor.test.d.ts +2 -0
  10. package/dist/__tests__/graph/entity-extractor.test.d.ts.map +1 -0
  11. package/dist/__tests__/graph/entity-extractor.test.js +129 -0
  12. package/dist/__tests__/graph/entity-extractor.test.js.map +1 -0
  13. package/dist/__tests__/graph/graph-rag-pipeline.test.d.ts +2 -0
  14. package/dist/__tests__/graph/graph-rag-pipeline.test.d.ts.map +1 -0
  15. package/dist/__tests__/graph/graph-rag-pipeline.test.js +158 -0
  16. package/dist/__tests__/graph/graph-rag-pipeline.test.js.map +1 -0
  17. package/dist/__tests__/graph/knowledge-graph.test.d.ts +2 -0
  18. package/dist/__tests__/graph/knowledge-graph.test.d.ts.map +1 -0
  19. package/dist/__tests__/graph/knowledge-graph.test.js +208 -0
  20. package/dist/__tests__/graph/knowledge-graph.test.js.map +1 -0
  21. package/dist/__tests__/loaders/base.loader.test.d.ts +2 -0
  22. package/dist/__tests__/loaders/base.loader.test.d.ts.map +1 -0
  23. package/dist/__tests__/loaders/base.loader.test.js +114 -0
  24. package/dist/__tests__/loaders/base.loader.test.js.map +1 -0
  25. package/dist/__tests__/loaders/csv-file.loader.test.d.ts +2 -0
  26. package/dist/__tests__/loaders/csv-file.loader.test.d.ts.map +1 -0
  27. package/dist/__tests__/loaders/csv-file.loader.test.js +98 -0
  28. package/dist/__tests__/loaders/csv-file.loader.test.js.map +1 -0
  29. package/dist/__tests__/loaders/directory.loader.test.d.ts +2 -0
  30. package/dist/__tests__/loaders/directory.loader.test.d.ts.map +1 -0
  31. package/dist/__tests__/loaders/directory.loader.test.js +154 -0
  32. package/dist/__tests__/loaders/directory.loader.test.js.map +1 -0
  33. package/dist/__tests__/loaders/html-file.loader.test.d.ts +2 -0
  34. package/dist/__tests__/loaders/html-file.loader.test.d.ts.map +1 -0
  35. package/dist/__tests__/loaders/html-file.loader.test.js +93 -0
  36. package/dist/__tests__/loaders/html-file.loader.test.js.map +1 -0
  37. package/dist/__tests__/loaders/json-file.loader.test.d.ts +2 -0
  38. package/dist/__tests__/loaders/json-file.loader.test.d.ts.map +1 -0
  39. package/dist/__tests__/loaders/json-file.loader.test.js +84 -0
  40. package/dist/__tests__/loaders/json-file.loader.test.js.map +1 -0
  41. package/dist/__tests__/loaders/markdown-file.loader.test.d.ts +2 -0
  42. package/dist/__tests__/loaders/markdown-file.loader.test.d.ts.map +1 -0
  43. package/dist/__tests__/loaders/markdown-file.loader.test.js +83 -0
  44. package/dist/__tests__/loaders/markdown-file.loader.test.js.map +1 -0
  45. package/dist/__tests__/loaders/text-file.loader.test.d.ts +2 -0
  46. package/dist/__tests__/loaders/text-file.loader.test.d.ts.map +1 -0
  47. package/dist/__tests__/loaders/text-file.loader.test.js +50 -0
  48. package/dist/__tests__/loaders/text-file.loader.test.js.map +1 -0
  49. package/dist/__tests__/rag-pipeline.test.d.ts +2 -0
  50. package/dist/__tests__/rag-pipeline.test.d.ts.map +1 -0
  51. package/dist/__tests__/rag-pipeline.test.js +210 -0
  52. package/dist/__tests__/rag-pipeline.test.js.map +1 -0
  53. package/dist/__tests__/retrieval/bm25.test.d.ts +2 -0
  54. package/dist/__tests__/retrieval/bm25.test.d.ts.map +1 -0
  55. package/dist/__tests__/retrieval/bm25.test.js +86 -0
  56. package/dist/__tests__/retrieval/bm25.test.js.map +1 -0
  57. package/dist/__tests__/retrieval/hybrid-search.test.d.ts +2 -0
  58. package/dist/__tests__/retrieval/hybrid-search.test.d.ts.map +1 -0
  59. package/dist/__tests__/retrieval/hybrid-search.test.js +85 -0
  60. package/dist/__tests__/retrieval/hybrid-search.test.js.map +1 -0
  61. package/dist/__tests__/retrieval/multi-query.test.d.ts +2 -0
  62. package/dist/__tests__/retrieval/multi-query.test.d.ts.map +1 -0
  63. package/dist/__tests__/retrieval/multi-query.test.js +90 -0
  64. package/dist/__tests__/retrieval/multi-query.test.js.map +1 -0
  65. package/dist/__tests__/text-splitters/recursive-text-splitter.test.d.ts +2 -0
  66. package/dist/__tests__/text-splitters/recursive-text-splitter.test.d.ts.map +1 -0
  67. package/dist/__tests__/text-splitters/recursive-text-splitter.test.js +97 -0
  68. package/dist/__tests__/text-splitters/recursive-text-splitter.test.js.map +1 -0
  69. package/dist/__tests__/utils/similarity.test.d.ts +2 -0
  70. package/dist/__tests__/utils/similarity.test.d.ts.map +1 -0
  71. package/dist/__tests__/utils/similarity.test.js +47 -0
  72. package/dist/__tests__/utils/similarity.test.js.map +1 -0
  73. package/dist/agentic/index.d.ts +1 -1
  74. package/dist/agentic/index.d.ts.map +1 -1
  75. package/dist/agentic/types.d.ts +3 -2
  76. package/dist/agentic/types.d.ts.map +1 -1
  77. package/dist/graph/community-detector.d.ts +45 -0
  78. package/dist/graph/community-detector.d.ts.map +1 -0
  79. package/dist/graph/community-detector.js +153 -0
  80. package/dist/graph/community-detector.js.map +1 -0
  81. package/dist/graph/community-summarizer.d.ts +40 -0
  82. package/dist/graph/community-summarizer.d.ts.map +1 -0
  83. package/dist/graph/community-summarizer.js +127 -0
  84. package/dist/graph/community-summarizer.js.map +1 -0
  85. package/dist/graph/entity-extractor.d.ts +46 -0
  86. package/dist/graph/entity-extractor.d.ts.map +1 -0
  87. package/dist/graph/entity-extractor.js +243 -0
  88. package/dist/graph/entity-extractor.js.map +1 -0
  89. package/dist/graph/graph-rag-pipeline.d.ts +82 -0
  90. package/dist/graph/graph-rag-pipeline.d.ts.map +1 -0
  91. package/dist/graph/graph-rag-pipeline.js +397 -0
  92. package/dist/graph/graph-rag-pipeline.js.map +1 -0
  93. package/dist/graph/graph.types.d.ts +186 -0
  94. package/dist/graph/graph.types.d.ts.map +1 -0
  95. package/dist/graph/graph.types.js +20 -0
  96. package/dist/graph/graph.types.js.map +1 -0
  97. package/dist/graph/index.d.ts +15 -0
  98. package/dist/graph/index.d.ts.map +1 -0
  99. package/dist/graph/index.js +31 -0
  100. package/dist/graph/index.js.map +1 -0
  101. package/dist/graph/knowledge-graph.d.ts +57 -0
  102. package/dist/graph/knowledge-graph.d.ts.map +1 -0
  103. package/dist/graph/knowledge-graph.js +198 -0
  104. package/dist/graph/knowledge-graph.js.map +1 -0
  105. package/dist/index.d.ts +2 -0
  106. package/dist/index.d.ts.map +1 -1
  107. package/dist/index.js +4 -0
  108. package/dist/index.js.map +1 -1
  109. package/dist/loaders/base.loader.d.ts +108 -0
  110. package/dist/loaders/base.loader.d.ts.map +1 -0
  111. package/dist/loaders/base.loader.js +123 -0
  112. package/dist/loaders/base.loader.js.map +1 -0
  113. package/dist/loaders/csv-file.loader.d.ts +61 -0
  114. package/dist/loaders/csv-file.loader.d.ts.map +1 -0
  115. package/dist/loaders/csv-file.loader.js +162 -0
  116. package/dist/loaders/csv-file.loader.js.map +1 -0
  117. package/dist/loaders/directory.loader.d.ts +67 -0
  118. package/dist/loaders/directory.loader.d.ts.map +1 -0
  119. package/dist/loaders/directory.loader.js +163 -0
  120. package/dist/loaders/directory.loader.js.map +1 -0
  121. package/dist/loaders/docx.loader.d.ts +52 -0
  122. package/dist/loaders/docx.loader.d.ts.map +1 -0
  123. package/dist/loaders/docx.loader.js +110 -0
  124. package/dist/loaders/docx.loader.js.map +1 -0
  125. package/dist/loaders/github.loader.d.ts +114 -0
  126. package/dist/loaders/github.loader.d.ts.map +1 -0
  127. package/dist/loaders/github.loader.js +217 -0
  128. package/dist/loaders/github.loader.js.map +1 -0
  129. package/dist/loaders/html-file.loader.d.ts +55 -0
  130. package/dist/loaders/html-file.loader.d.ts.map +1 -0
  131. package/dist/loaders/html-file.loader.js +170 -0
  132. package/dist/loaders/html-file.loader.js.map +1 -0
  133. package/dist/loaders/index.d.ts +52 -0
  134. package/dist/loaders/index.d.ts.map +1 -0
  135. package/dist/loaders/index.js +61 -0
  136. package/dist/loaders/index.js.map +1 -0
  137. package/dist/loaders/json-file.loader.d.ts +51 -0
  138. package/dist/loaders/json-file.loader.d.ts.map +1 -0
  139. package/dist/loaders/json-file.loader.js +100 -0
  140. package/dist/loaders/json-file.loader.js.map +1 -0
  141. package/dist/loaders/markdown-file.loader.d.ts +61 -0
  142. package/dist/loaders/markdown-file.loader.d.ts.map +1 -0
  143. package/dist/loaders/markdown-file.loader.js +148 -0
  144. package/dist/loaders/markdown-file.loader.js.map +1 -0
  145. package/dist/loaders/pdf.loader.d.ts +64 -0
  146. package/dist/loaders/pdf.loader.d.ts.map +1 -0
  147. package/dist/loaders/pdf.loader.js +163 -0
  148. package/dist/loaders/pdf.loader.js.map +1 -0
  149. package/dist/loaders/text-file.loader.d.ts +39 -0
  150. package/dist/loaders/text-file.loader.d.ts.map +1 -0
  151. package/dist/loaders/text-file.loader.js +69 -0
  152. package/dist/loaders/text-file.loader.js.map +1 -0
  153. package/dist/loaders/web.loader.d.ts +87 -0
  154. package/dist/loaders/web.loader.d.ts.map +1 -0
  155. package/dist/loaders/web.loader.js +194 -0
  156. package/dist/loaders/web.loader.js.map +1 -0
  157. package/dist/loaders/youtube-transcript.loader.d.ts +92 -0
  158. package/dist/loaders/youtube-transcript.loader.d.ts.map +1 -0
  159. package/dist/loaders/youtube-transcript.loader.js +254 -0
  160. package/dist/loaders/youtube-transcript.loader.js.map +1 -0
  161. package/package.json +50 -2
@@ -0,0 +1,254 @@
1
+ "use strict";
2
+ /**
3
+ * YouTubeTranscriptLoader
4
+ *
5
+ * Downloads the caption/transcript for one or more YouTube videos and
6
+ * converts them to `Document` objects — no extra npm dependency required.
7
+ * Uses Node.js built-in `fetch` and parses the YouTube InnerTube API response.
8
+ *
9
+ * How it works:
10
+ * 1. Fetches the YouTube watch page for the video.
11
+ * 2. Extracts the `ytInitialPlayerResponse` JSON blob from the page HTML.
12
+ * 3. Locates the caption tracks list and selects the preferred language.
13
+ * 4. Fetches the caption XML file.
14
+ * 5. Parses `<text>` elements and decodes HTML entities.
15
+ * 6. Returns the transcript as one document (or split into segments).
16
+ *
17
+ * Accepts video IDs, full watch URLs, and youtu.be short URLs.
18
+ *
19
+ * @example
20
+ * ```typescript
21
+ * const loader = new YouTubeTranscriptLoader({
22
+ * videoId: 'dQw4w9WgXcQ',
23
+ * });
24
+ * const docs = await loader.load();
25
+ * // docs[0].content === "Never gonna give you up, never gonna let you..."
26
+ * // docs[0].metadata.title === "Rick Astley - Never Gonna Give You Up"
27
+ * // docs[0].metadata.videoId === "dQw4w9WgXcQ"
28
+ * ```
29
+ *
30
+ * Multiple videos:
31
+ * ```typescript
32
+ * const loader = new YouTubeTranscriptLoader({
33
+ * videoIds: ['VIDEO_ID_1', 'VIDEO_ID_2'],
34
+ * });
35
+ * ```
36
+ *
37
+ * Split into timed segments (useful for timestamped RAG retrieval):
38
+ * ```typescript
39
+ * const loader = new YouTubeTranscriptLoader({
40
+ * videoId: 'VIDEO_ID',
41
+ * segmentDuration: 120, // one document per 2-minute window
42
+ * });
43
+ * ```
44
+ */
45
+ var __decorate = (this && this.__decorate) || function (decorators, target, key, desc) {
46
+ var c = arguments.length, r = c < 3 ? target : desc === null ? desc = Object.getOwnPropertyDescriptor(target, key) : desc, d;
47
+ if (typeof Reflect === "object" && typeof Reflect.decorate === "function") r = Reflect.decorate(decorators, target, key, desc);
48
+ else for (var i = decorators.length - 1; i >= 0; i--) if (d = decorators[i]) r = (c < 3 ? d(r) : c > 3 ? d(target, key, r) : d(target, key)) || r;
49
+ return c > 3 && r && Object.defineProperty(target, key, r), r;
50
+ };
51
+ var __metadata = (this && this.__metadata) || function (k, v) {
52
+ if (typeof Reflect === "object" && typeof Reflect.metadata === "function") return Reflect.metadata(k, v);
53
+ };
54
+ var YouTubeTranscriptLoader_1;
55
+ Object.defineProperty(exports, "__esModule", { value: true });
56
+ exports.YouTubeTranscriptLoader = void 0;
57
+ const base_loader_1 = require("./base.loader");
58
+ let YouTubeTranscriptLoader = YouTubeTranscriptLoader_1 = class YouTubeTranscriptLoader extends base_loader_1.BaseDocumentLoader {
59
+ constructor(options) {
60
+ super();
61
+ if (!options.videoId && (!options.videoIds || options.videoIds.length === 0)) {
62
+ throw new Error('YouTubeTranscriptLoader: provide at least one videoId.');
63
+ }
64
+ const raw = options.videoIds ?? (options.videoId ? [options.videoId] : []);
65
+ this.videoIds = raw.map((id) => this.normaliseId(id));
66
+ this.language = options.language ?? 'en';
67
+ this.segmentDuration = options.segmentDuration;
68
+ this.timeout = options.timeout ?? 15000;
69
+ this.extraMetadata = options.metadata ?? {};
70
+ }
71
+ async load() {
72
+ const allDocs = [];
73
+ for (const videoId of this.videoIds) {
74
+ try {
75
+ const docs = await this.loadVideo(videoId);
76
+ allDocs.push(...docs);
77
+ }
78
+ catch (err) {
79
+ const message = err instanceof Error ? err.message : String(err);
80
+ // eslint-disable-next-line no-console
81
+ console.warn(`[YouTubeTranscriptLoader] Skipping ${videoId}: ${message}`);
82
+ }
83
+ }
84
+ return allDocs;
85
+ }
86
+ // ── Private: per-video pipeline ──────────────────────────────────────────
87
+ async loadVideo(videoId) {
88
+ const watchUrl = `${YouTubeTranscriptLoader_1.YOUTUBE_BASE}/watch?v=${videoId}`;
89
+ const html = await this.fetch(watchUrl);
90
+ const { captionUrl, title, channelName } = this.extractCaptionInfo(html, videoId);
91
+ const captionXml = await this.fetch(captionUrl);
92
+ const lines = this.parseCaptionXml(captionXml);
93
+ if (lines.length === 0) {
94
+ throw new Error(`No transcript lines found for video ${videoId}`);
95
+ }
96
+ const baseMetadata = {
97
+ source: watchUrl,
98
+ videoId,
99
+ loaderType: 'youtube-transcript',
100
+ language: this.language,
101
+ ...(title && { title }),
102
+ ...(channelName && { channelName }),
103
+ ...this.extraMetadata,
104
+ };
105
+ if (!this.segmentDuration) {
106
+ const fullText = lines.map((l) => l.text).join(' ');
107
+ return [this.createDocument(fullText, { ...baseMetadata, totalLines: lines.length })];
108
+ }
109
+ return this.buildSegments(lines, videoId, baseMetadata);
110
+ }
111
+ extractCaptionInfo(html, videoId) {
112
+ // Extract ytInitialPlayerResponse
113
+ const match = YouTubeTranscriptLoader_1.RE_PLAYER_RESPONSE.exec(html);
114
+ if (!match) {
115
+ throw new Error(`Could not find ytInitialPlayerResponse for video ${videoId}. ` +
116
+ `The video may be unavailable, age-restricted, or have no captions.`);
117
+ }
118
+ // Minimal safe JSON parse — the JSON is often very large
119
+ let playerResponse;
120
+ try {
121
+ playerResponse = JSON.parse(match[1]);
122
+ }
123
+ catch {
124
+ throw new Error(`Failed to parse ytInitialPlayerResponse for video ${videoId}`);
125
+ }
126
+ const title = playerResponse.videoDetails?.title;
127
+ const channelName = playerResponse.videoDetails
128
+ ?.author;
129
+ // Navigate: captions → playerCaptionsTracklistRenderer → captionTracks[]
130
+ const captionTracks = playerResponse.captions?.playerCaptionsTracklistRenderer?.captionTracks;
131
+ if (!captionTracks || captionTracks.length === 0) {
132
+ throw new Error(`No caption tracks available for video ${videoId}. ` +
133
+ `The video may not have captions/subtitles.`);
134
+ }
135
+ // Prefer the requested language; fall back to the first available track
136
+ const track = captionTracks.find((t) => t.languageCode === this.language) ?? captionTracks[0];
137
+ return {
138
+ captionUrl: `${track.baseUrl}&fmt=xml`,
139
+ title,
140
+ channelName,
141
+ };
142
+ }
143
+ parseCaptionXml(xml) {
144
+ const lines = [];
145
+ const re = new RegExp(YouTubeTranscriptLoader_1.RE_CAPTION_TEXT.source, 'g');
146
+ let match;
147
+ while ((match = re.exec(xml)) !== null) {
148
+ const start = parseFloat(match[1]);
149
+ const duration = parseFloat(match[2]);
150
+ const raw = match[3];
151
+ const text = this.decodeEntities(raw.replace(/<[^>]+>/g, '')).trim();
152
+ if (text)
153
+ lines.push({ start, duration, text });
154
+ }
155
+ return lines;
156
+ }
157
+ buildSegments(lines, videoId, baseMetadata) {
158
+ const segments = [];
159
+ const duration = this.segmentDuration;
160
+ let segStart = 0;
161
+ let buffer = [];
162
+ let windowEnd = segStart + duration;
163
+ for (const line of lines) {
164
+ if (line.start >= windowEnd) {
165
+ if (buffer.length > 0) {
166
+ const text = buffer.join(' ');
167
+ const startTime = this.formatTime(segStart);
168
+ const endTime = this.formatTime(windowEnd);
169
+ segments.push(this.createDocument(text, {
170
+ ...baseMetadata,
171
+ startTime,
172
+ endTime,
173
+ startSeconds: segStart,
174
+ youtubeUrl: `https://youtu.be/${videoId}?t=${Math.floor(segStart)}`,
175
+ }));
176
+ buffer = [];
177
+ }
178
+ segStart = Math.floor(line.start / duration) * duration;
179
+ windowEnd = segStart + duration;
180
+ }
181
+ buffer.push(line.text);
182
+ }
183
+ // Final segment
184
+ if (buffer.length > 0) {
185
+ segments.push(this.createDocument(buffer.join(' '), {
186
+ ...baseMetadata,
187
+ startTime: this.formatTime(segStart),
188
+ endTime: this.formatTime(windowEnd),
189
+ startSeconds: segStart,
190
+ youtubeUrl: `https://youtu.be/${videoId}?t=${Math.floor(segStart)}`,
191
+ }));
192
+ }
193
+ return segments;
194
+ }
195
+ // ── Utilities ────────────────────────────────────────────────────────────
196
+ normaliseId(input) {
197
+ // youtu.be/ID
198
+ const shortMatch = input.match(/youtu\.be\/([a-zA-Z0-9_-]{11})/);
199
+ if (shortMatch)
200
+ return shortMatch[1];
201
+ // youtube.com/watch?v=ID
202
+ const watchMatch = input.match(/[?&]v=([a-zA-Z0-9_-]{11})/);
203
+ if (watchMatch)
204
+ return watchMatch[1];
205
+ // Already an 11-char ID
206
+ if (/^[a-zA-Z0-9_-]{11}$/.test(input))
207
+ return input;
208
+ throw new Error(`YouTubeTranscriptLoader: cannot parse video ID from "${input}"`);
209
+ }
210
+ async fetch(url) {
211
+ const controller = new AbortController();
212
+ const timer = setTimeout(() => controller.abort(), this.timeout);
213
+ const response = await fetch(url, {
214
+ headers: {
215
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ' +
216
+ '(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
217
+ 'Accept-Language': 'en-US,en;q=0.9',
218
+ },
219
+ signal: controller.signal,
220
+ });
221
+ clearTimeout(timer);
222
+ if (!response.ok) {
223
+ throw new Error(`HTTP ${response.status} ${response.statusText} fetching ${url}`);
224
+ }
225
+ return response.text();
226
+ }
227
+ decodeEntities(text) {
228
+ return text
229
+ .replace(/&amp;/g, '&')
230
+ .replace(/&lt;/g, '<')
231
+ .replace(/&gt;/g, '>')
232
+ .replace(/&quot;/g, '"')
233
+ .replace(/&#39;/g, "'")
234
+ .replace(/&#(\d+);/g, (_, code) => String.fromCharCode(parseInt(code, 10)));
235
+ }
236
+ formatTime(seconds) {
237
+ const h = Math.floor(seconds / 3600);
238
+ const m = Math.floor((seconds % 3600) / 60);
239
+ const s = Math.floor(seconds % 60);
240
+ return [h, m, s].map((v) => String(v).padStart(2, '0')).join(':');
241
+ }
242
+ };
243
+ exports.YouTubeTranscriptLoader = YouTubeTranscriptLoader;
244
+ YouTubeTranscriptLoader.YOUTUBE_BASE = 'https://www.youtube.com';
245
+ YouTubeTranscriptLoader.RE_PLAYER_RESPONSE = /ytInitialPlayerResponse\s*=\s*({.+?})\s*;/;
246
+ YouTubeTranscriptLoader.RE_CAPTION_TEXT = /<text[^>]*start="([^"]*)"[^>]*dur="([^"]*)"[^>]*>([\s\S]*?)<\/text>/g;
247
+ exports.YouTubeTranscriptLoader = YouTubeTranscriptLoader = YouTubeTranscriptLoader_1 = __decorate([
248
+ (0, base_loader_1.Loader)({
249
+ name: 'YouTubeTranscriptLoader',
250
+ description: 'Downloads YouTube video transcripts/captions — no API key needed.',
251
+ }),
252
+ __metadata("design:paramtypes", [Object])
253
+ ], YouTubeTranscriptLoader);
254
+ //# sourceMappingURL=youtube-transcript.loader.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"youtube-transcript.loader.js","sourceRoot":"","sources":["../../src/loaders/youtube-transcript.loader.ts"],"names":[],"mappings":";AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA0CG;;;;;;;;;;;;;AAEH,+CAA2D;AAwCpD,IAAM,uBAAuB,+BAA7B,MAAM,uBAAwB,SAAQ,gCAAkB;IAY7D,YAAY,OAAuC;QACjD,KAAK,EAAE,CAAC;QACR,IAAI,CAAC,OAAO,CAAC,OAAO,IAAI,CAAC,CAAC,OAAO,CAAC,QAAQ,IAAI,OAAO,CAAC,QAAQ,CAAC,MAAM,KAAK,CAAC,CAAC,EAAE,CAAC;YAC7E,MAAM,IAAI,KAAK,CAAC,wDAAwD,CAAC,CAAC;QAC5E,CAAC;QACD,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,IAAI,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC;QAC3E,IAAI,CAAC,QAAQ,GAAG,GAAG,CAAC,GAAG,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,IAAI,CAAC,WAAW,CAAC,EAAE,CAAC,CAAC,CAAC;QACtD,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,IAAI,CAAC;QACzC,IAAI,CAAC,eAAe,GAAG,OAAO,CAAC,eAAe,CAAC;QAC/C,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC,OAAO,IAAI,KAAM,CAAC;QACzC,IAAI,CAAC,aAAa,GAAG,OAAO,CAAC,QAAQ,IAAI,EAAE,CAAC;IAC9C,CAAC;IAED,KAAK,CAAC,IAAI;QACR,MAAM,OAAO,GAAe,EAAE,CAAC;QAE/B,KAAK,MAAM,OAAO,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YACpC,IAAI,CAAC;gBACH,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;gBAC3C,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;YACxB,CAAC;YAAC,OAAO,GAAY,EAAE,CAAC;gBACtB,MAAM,OAAO,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC;gBACjE,sCAAsC;gBACtC,OAAO,CAAC,IAAI,CAAC,sCAAsC,OAAO,KAAK,OAAO,EAAE,CAAC,CAAC;YAC5E,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;IAED,4EAA4E;IAEpE,KAAK,CAAC,SAAS,CAAC,OAAe;QACrC,MAAM,QAAQ,GAAG,GAAG,yBAAuB,CAAC,YAAY,YAAY,OAAO,EAAE,CAAC;QAE9E,MAAM,IAAI,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;QACxC,MAAM,EAAE,UAAU,EAAE,KAAK,EAAE,WAAW,EAAE,GAAG,IAAI,CAAC,kBAAkB,CAAC,IAAI,EAAE,OAAO,CAAC,CAAC;QAElF,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,CAAC;QAChD,MAAM,KAAK,GAAG,IAAI,CAAC,eAAe,CAAC,UAAU,CAAC,CAAC;QAE/C,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,MAAM,IAAI,KAAK,CAAC,uCAAuC,OAAO,EAAE,CAAC,CAAC;QACpE,CAAC;QAED,MAAM,YAAY,GAA4B;YAC5C,MAAM,EAAE,QAAQ;YAChB,OAAO;YACP,UAAU,EAAE,oBAAoB;YAChC,QAAQ,EAAE,IAAI,CAAC,QAAQ;YACvB,GAAG,CAAC,KAAK,IAAI,EAAE,KAAK,EAAE,CAAC;YACvB,GAAG,CAAC,WAAW,IAAI,EAAE,WAAW,EAAE,CAAC;YACnC,GAAG,IAAI,CAAC,aAAa;SACtB,CAAC;QAEF,IAAI,CAAC,IAAI,CAAC,eAAe,EAAE,CAAC;YAC1B,MAAM,QAAQ,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;YACpD,OAAO,CAAC,IAAI,CAAC,cAAc,CAAC,QAAQ,EAAE,EAAE,GAAG,YAAY,EAAE,UAAU,EAAE,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;QACxF,CAAC;QAED,OAAO,IAAI,CAAC,aAAa,CAAC,KAAK,EAAE,OAAO,EAAE,YAAY,CAAC,CAAC;IAC1D,CAAC;IAEO,kBAAkB,CACxB,IAAY,EACZ,OAAe;QAEf,kCAAkC;QAClC,MAAM,KAAK,GAAG,yBAAuB,CAAC,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACpE,IAAI,CAAC,KAAK,EAAE,CAAC;YACX,MAAM,IAAI,KAAK,CACb,oDAAoD,OAAO,IAAI;gBAC7D,oEAAoE,CACvE,CAAC;QACJ,CAAC;QAED,yDAAyD;QACzD,IAAI,cAAuC,CAAC;QAC5C,IAAI,CAAC;YACH,cAAc,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAA4B,CAAC;QACnE,CAAC;QAAC,MAAM,CAAC;YACP,MAAM,IAAI,KAAK,CAAC,qDAAqD,OAAO,EAAE,CAAC,CAAC;QAClF,CAAC;QAED,MAAM,KAAK,GAAI,cAAwD,CAAC,YAAY,EAAE,KAAK,CAAC;QAC5F,MAAM,WAAW,GAAI,cAAyD,CAAC,YAAY;YACzF,EAAE,MAAM,CAAC;QAEX,yEAAyE;QACzE,MAAM,aAAa,GACjB,cAOD,CAAC,QAAQ,EAAE,+BAA+B,EAAE,aAAa,CAAC;QAE3D,IAAI,CAAC,aAAa,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACjD,MAAM,IAAI,KAAK,CACb,yCAAyC,OAAO,IAAI;gBAClD,4CAA4C,CAC/C,CAAC;QACJ,CAAC;QAED,wEAAwE;QACxE,MAAM,KAAK,GAAG,aAAa,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,YAAY,KAAK,IAAI,CAAC,QAAQ,CAAC,IAAI,aAAa,CAAC,CAAC,CAAC,CAAC;QAE9F,OAAO;YACL,UAAU,EAAE,GAAG,KAAK,CAAC,OAAO,UAAU;YACtC,KAAK;YACL,WAAW;SACZ,CAAC;IACJ,CAAC;IAEO,eAAe,CAAC,GAAW;QACjC,MAAM,KAAK,GAAqB,EAAE,CAAC;QACnC,MAAM,EAAE,GAAG,IAAI,MAAM,CAAC,yBAAuB,CAAC,eAAe,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;QAC3E,IAAI,KAA6B,CAAC;QAElC,OAAO,CAAC,KAAK,GAAG,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;YACvC,MAAM,KAAK,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YACnC,MAAM,QAAQ,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC;YACtC,MAAM,GAAG,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACrB,MAAM,IAAI,GAAG,IAAI,CAAC,cAAc,CAAC,GAAG,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;YACrE,IAAI,IAAI;gBAAE,KAAK,CAAC,IAAI,CAAC,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAC,CAAC;QAClD,CAAC;QAED,OAAO,KAAK,CAAC;IACf,CAAC;IAEO,aAAa,CACnB,KAAuB,EACvB,OAAe,EACf,YAAqC;QAErC,MAAM,QAAQ,GAAe,EAAE,CAAC;QAChC,MAAM,QAAQ,GAAG,IAAI,CAAC,eAAgB,CAAC;QAEvC,IAAI,QAAQ,GAAG,CAAC,CAAC;QACjB,IAAI,MAAM,GAAa,EAAE,CAAC;QAC1B,IAAI,SAAS,GAAG,QAAQ,GAAG,QAAQ,CAAC;QAEpC,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,IAAI,IAAI,CAAC,KAAK,IAAI,SAAS,EAAE,CAAC;gBAC5B,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBACtB,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;oBAC9B,MAAM,SAAS,GAAG,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC,CAAC;oBAC5C,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC,CAAC;oBAC3C,QAAQ,CAAC,IAAI,CACX,IAAI,CAAC,cAAc,CAAC,IAAI,EAAE;wBACxB,GAAG,YAAY;wBACf,SAAS;wBACT,OAAO;wBACP,YAAY,EAAE,QAAQ;wBACtB,UAAU,EAAE,oBAAoB,OAAO,MAAM,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,EAAE;qBACpE,CAAC,CACH,CAAC;oBACF,MAAM,GAAG,EAAE,CAAC;gBACd,CAAC;gBACD,QAAQ,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,KAAK,GAAG,QAAQ,CAAC,GAAG,QAAQ,CAAC;gBACxD,SAAS,GAAG,QAAQ,GAAG,QAAQ,CAAC;YAClC,CAAC;YACD,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACzB,CAAC;QAED,gBAAgB;QAChB,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtB,QAAQ,CAAC,IAAI,CACX,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE;gBACpC,GAAG,YAAY;gBACf,SAAS,EAAE,IAAI,CAAC,UAAU,CAAC,QAAQ,CAAC;gBACpC,OAAO,EAAE,IAAI,CAAC,UAAU,CAAC,SAAS,CAAC;gBACnC,YAAY,EAAE,QAAQ;gBACtB,UAAU,EAAE,oBAAoB,OAAO,MAAM,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,EAAE;aACpE,CAAC,CACH,CAAC;QACJ,CAAC;QAED,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,4EAA4E;IAEpE,WAAW,CAAC,KAAa;QAC/B,cAAc;QACd,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,gCAAgC,CAAC,CAAC;QACjE,IAAI,UAAU;YAAE,OAAO,UAAU,CAAC,CAAC,CAAC,CAAC;QACrC,yBAAyB;QACzB,MAAM,UAAU,GAAG,KAAK,CAAC,KAAK,CAAC,2BAA2B,CAAC,CAAC;QAC5D,IAAI,UAAU;YAAE,OAAO,UAAU,CAAC,CAAC,CAAC,CAAC;QACrC,wBAAwB;QACxB,IAAI,qBAAqB,CAAC,IAAI,CAAC,KAAK,CAAC;YAAE,OAAO,KAAK,CAAC;QACpD,MAAM,IAAI,KAAK,CAAC,wDAAwD,KAAK,GAAG,CAAC,CAAC;IACpF,CAAC;IAEO,KAAK,CAAC,KAAK,CAAC,GAAW;QAC7B,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAC;QACzC,MAAM,KAAK,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,IAAI,CAAC,OAAO,CAAC,CAAC;QAEjE,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;YAChC,OAAO,EAAE;gBACP,YAAY,EACV,+DAA+D;oBAC/D,oDAAoD;gBACtD,iBAAiB,EAAE,gBAAgB;aACpC;YACD,MAAM,EAAE,UAAU,CAAC,MAAM;SAC1B,CAAC,CAAC;QACH,YAAY,CAAC,KAAK,CAAC,CAAC;QAEpB,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,IAAI,KAAK,CAAC,QAAQ,QAAQ,CAAC,MAAM,IAAI,QAAQ,CAAC,UAAU,aAAa,GAAG,EAAE,CAAC,CAAC;QACpF,CAAC;QAED,OAAO,QAAQ,CAAC,IAAI,EAAE,CAAC;IACzB,CAAC;IAEO,cAAc,CAAC,IAAY;QACjC,OAAO,IAAI;aACR,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;aACtB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;aACrB,OAAO,CAAC,OAAO,EAAE,GAAG,CAAC;aACrB,OAAO,CAAC,SAAS,EAAE,GAAG,CAAC;aACvB,OAAO,CAAC,QAAQ,EAAE,GAAG,CAAC;aACtB,OAAO,CAAC,WAAW,EAAE,CAAC,CAAC,EAAE,IAAI,EAAE,EAAE,CAAC,MAAM,CAAC,YAAY,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC;IAChF,CAAC;IAEO,UAAU,CAAC,OAAe;QAChC,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,IAAI,CAAC,CAAC;QACrC,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC,CAAC;QAC5C,MAAM,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,EAAE,CAAC,CAAC;QACnC,OAAO,CAAC,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACpE,CAAC;;AAtPU,0DAAuB;AAOV,oCAAY,GAAG,yBAAyB,AAA5B,CAA6B;AACzC,0CAAkB,GAAG,2CAA2C,AAA9C,CAA+C;AACjE,uCAAe,GACrC,sEAAsE,AADjC,CACkC;kCAV9D,uBAAuB;IAJnC,IAAA,oBAAM,EAAC;QACN,IAAI,EAAE,yBAAyB;QAC/B,WAAW,EAAE,mEAAmE;KACjF,CAAC;;GACW,uBAAuB,CAuPnC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@hazeljs/rag",
3
- "version": "0.2.0-beta.59",
3
+ "version": "0.2.0-beta.61",
4
4
  "description": "Retrieval-Augmented Generation (RAG) and vector search for HazelJS framework",
5
5
  "main": "dist/index.js",
6
6
  "types": "dist/index.d.ts",
@@ -23,9 +23,12 @@
23
23
  "@huggingface/inference": "^2.0.0",
24
24
  "@pinecone-database/pinecone": "^3.0.0",
25
25
  "@qdrant/js-client-rest": "^1.9.0",
26
+ "cheerio": "^1.0.0",
26
27
  "chromadb": "^1.8.0",
27
28
  "cohere-ai": "^7.0.0",
29
+ "mammoth": "^1.11.0",
28
30
  "openai": "^4.0.0",
31
+ "pdf-parse": "^1.1.1",
29
32
  "weaviate-ts-client": "^2.0.0"
30
33
  },
31
34
  "peerDependenciesMeta": {
@@ -49,6 +52,51 @@
49
52
  },
50
53
  "@huggingface/inference": {
51
54
  "optional": true
55
+ },
56
+ "pdf-parse": {
57
+ "optional": true
58
+ },
59
+ "mammoth": {
60
+ "optional": true
61
+ },
62
+ "cheerio": {
63
+ "optional": true
64
+ }
65
+ },
66
+ "jest": {
67
+ "preset": "ts-jest",
68
+ "testEnvironment": "node",
69
+ "testMatch": [
70
+ "**/src/__tests__/**/*.test.ts"
71
+ ],
72
+ "collectCoverageFrom": [
73
+ "src/**/*.ts",
74
+ "!src/**/*.d.ts",
75
+ "!src/index.ts",
76
+ "!src/rag.module.ts",
77
+ "!src/rag.service.ts",
78
+ "!src/rag-pipeline-with-memory.ts",
79
+ "!src/vector-stores/**",
80
+ "!src/embeddings/**",
81
+ "!src/agentic/**",
82
+ "!src/decorators/**",
83
+ "!src/memory/**",
84
+ "!src/loaders/pdf.loader.ts",
85
+ "!src/loaders/docx.loader.ts",
86
+ "!src/loaders/web.loader.ts",
87
+ "!src/loaders/github.loader.ts",
88
+ "!src/loaders/youtube-transcript.loader.ts",
89
+ "!src/loaders/index.ts",
90
+ "!src/graph/index.ts",
91
+ "!src/types/**"
92
+ ],
93
+ "coverageThreshold": {
94
+ "global": {
95
+ "statements": 85,
96
+ "branches": 75,
97
+ "functions": 85,
98
+ "lines": 85
99
+ }
52
100
  }
53
101
  },
54
102
  "devDependencies": {
@@ -89,5 +137,5 @@
89
137
  "url": "https://github.com/hazeljs/hazel-js/issues"
90
138
  },
91
139
  "homepage": "https://hazeljs.com",
92
- "gitHead": "f6d8ee8162a40e2298ccce46d843269838bbe6ff"
140
+ "gitHead": "2205447dd2f88f83a7748b0ffdee0417be0f3970"
93
141
  }