unrag 0.2.4 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,346 @@
1
+ {
2
+ "version": 1,
3
+ "extractors": [
4
+ {
5
+ "id": "pdf-text-layer",
6
+ "extractorName": "pdf:text-layer",
7
+ "group": "PDF",
8
+ "label": "pdf-text-layer",
9
+ "description": "Fast/cheap extraction via PDF text layer",
10
+ "hint": "recommended",
11
+ "defaultSelected": true,
12
+ "workerOnly": false,
13
+ "configComplexity": "needs-dep",
14
+ "fileTypes": ["pdf"],
15
+ "inputModes": ["file", "url", "buffer"],
16
+ "output": "text",
17
+ "docsPath": "/docs/extractors/pdf/text-layer",
18
+ "deps": { "pdfjs-dist": "^5.4.149" },
19
+ "devDeps": {},
20
+ "factory": "createPdfTextLayerExtractor",
21
+ "assetProcessingFlagKeys": ["pdf_textLayer"]
22
+ },
23
+ {
24
+ "id": "pdf-llm",
25
+ "extractorName": "pdf:llm",
26
+ "group": "PDF",
27
+ "label": "pdf-llm",
28
+ "description": "LLM-based PDF extraction; higher cost",
29
+ "defaultSelected": false,
30
+ "workerOnly": false,
31
+ "configComplexity": "needs-api-key",
32
+ "fileTypes": ["pdf"],
33
+ "inputModes": ["file", "url", "buffer"],
34
+ "output": "text (markdown)",
35
+ "docsPath": "/docs/extractors/pdf/llm",
36
+ "deps": { "ai": "^5.0.113" },
37
+ "devDeps": {},
38
+ "factory": "createPdfLlmExtractor",
39
+ "assetProcessingFlagKeys": ["pdf_llmExtraction"]
40
+ },
41
+ {
42
+ "id": "pdf-ocr",
43
+ "extractorName": "pdf:ocr",
44
+ "group": "PDF",
45
+ "label": "pdf-ocr",
46
+ "description": "OCR scanned PDFs; requires native binaries",
47
+ "hint": "worker-only",
48
+ "defaultSelected": false,
49
+ "workerOnly": true,
50
+ "configComplexity": "advanced",
51
+ "fileTypes": ["pdf"],
52
+ "inputModes": ["file", "url", "buffer"],
53
+ "output": "text",
54
+ "docsPath": "/docs/extractors/pdf/ocr",
55
+ "deps": {},
56
+ "devDeps": {},
57
+ "factory": "createPdfOcrExtractor",
58
+ "assetProcessingFlagKeys": ["pdf_ocr"]
59
+ },
60
+ {
61
+ "id": "image-ocr",
62
+ "extractorName": "image:ocr",
63
+ "group": "Image",
64
+ "label": "image-ocr",
65
+ "description": "Extract text from images via vision LLM",
66
+ "defaultSelected": false,
67
+ "workerOnly": false,
68
+ "configComplexity": "needs-api-key",
69
+ "fileTypes": ["jpg", "png", "webp", "gif"],
70
+ "inputModes": ["file", "url", "buffer"],
71
+ "output": "text",
72
+ "docsPath": "/docs/extractors/image/ocr",
73
+ "deps": { "ai": "^5.0.113" },
74
+ "devDeps": {},
75
+ "factory": "createImageOcrExtractor",
76
+ "assetProcessingFlagKeys": ["image_ocr"]
77
+ },
78
+ {
79
+ "id": "image-caption-llm",
80
+ "extractorName": "image:caption-llm",
81
+ "group": "Image",
82
+ "label": "image-caption-llm",
83
+ "description": "Generate captions for images via vision LLM",
84
+ "defaultSelected": false,
85
+ "workerOnly": false,
86
+ "configComplexity": "needs-api-key",
87
+ "fileTypes": ["jpg", "png", "webp", "gif"],
88
+ "inputModes": ["file", "url", "buffer"],
89
+ "output": "caption",
90
+ "docsPath": "/docs/extractors/image/caption-llm",
91
+ "deps": { "ai": "^5.0.113" },
92
+ "devDeps": {},
93
+ "factory": "createImageCaptionLlmExtractor",
94
+ "assetProcessingFlagKeys": ["image_captionLlm"]
95
+ },
96
+ {
97
+ "id": "audio-transcribe",
98
+ "extractorName": "audio:transcribe",
99
+ "group": "Audio",
100
+ "label": "audio-transcribe",
101
+ "description": "Speech-to-text transcription",
102
+ "defaultSelected": false,
103
+ "workerOnly": false,
104
+ "configComplexity": "needs-api-key",
105
+ "fileTypes": ["mp3", "wav", "ogg", "m4a"],
106
+ "inputModes": ["file", "url", "buffer"],
107
+ "output": "transcript",
108
+ "docsPath": "/docs/extractors/audio/transcribe",
109
+ "deps": { "ai": "^5.0.113" },
110
+ "devDeps": {},
111
+ "factory": "createAudioTranscribeExtractor",
112
+ "assetProcessingFlagKeys": ["audio_transcription"]
113
+ },
114
+ {
115
+ "id": "video-transcribe",
116
+ "extractorName": "video:transcribe",
117
+ "group": "Video",
118
+ "label": "video-transcribe",
119
+ "description": "Transcribe video audio track",
120
+ "defaultSelected": false,
121
+ "workerOnly": false,
122
+ "configComplexity": "needs-api-key",
123
+ "fileTypes": ["mp4", "webm", "mov"],
124
+ "inputModes": ["file", "url", "buffer"],
125
+ "output": "transcript",
126
+ "docsPath": "/docs/extractors/video/transcribe",
127
+ "deps": { "ai": "^5.0.113" },
128
+ "devDeps": {},
129
+ "factory": "createVideoTranscribeExtractor",
130
+ "assetProcessingFlagKeys": ["video_transcription"]
131
+ },
132
+ {
133
+ "id": "video-frames",
134
+ "extractorName": "video:frames",
135
+ "group": "Video",
136
+ "label": "video-frames",
137
+ "description": "Sample frames + analyze via vision LLM; requires ffmpeg",
138
+ "hint": "worker-only",
139
+ "defaultSelected": false,
140
+ "workerOnly": true,
141
+ "configComplexity": "advanced",
142
+ "fileTypes": ["mp4", "webm", "mov"],
143
+ "inputModes": ["file", "url", "buffer"],
144
+ "output": "frame descriptions",
145
+ "docsPath": "/docs/extractors/video/frames",
146
+ "deps": { "ai": "^5.0.113" },
147
+ "devDeps": {},
148
+ "factory": "createVideoFramesExtractor",
149
+ "assetProcessingFlagKeys": ["video_frames"]
150
+ },
151
+ {
152
+ "id": "file-text",
153
+ "extractorName": "file:text",
154
+ "group": "Files",
155
+ "label": "file-text",
156
+ "description": "Extract text/markdown/json/html from common text files",
157
+ "hint": "recommended",
158
+ "defaultSelected": true,
159
+ "workerOnly": false,
160
+ "configComplexity": "zero-config",
161
+ "fileTypes": ["txt", "md", "json", "csv"],
162
+ "inputModes": ["file", "url", "buffer"],
163
+ "output": "text",
164
+ "docsPath": "/docs/extractors/file/text",
165
+ "deps": {},
166
+ "devDeps": {},
167
+ "factory": "createFileTextExtractor",
168
+ "assetProcessingFlagKeys": ["file_text"]
169
+ },
170
+ {
171
+ "id": "file-docx",
172
+ "extractorName": "file:docx",
173
+ "group": "Files",
174
+ "label": "file-docx",
175
+ "description": "Extract text from .docx files",
176
+ "defaultSelected": false,
177
+ "workerOnly": false,
178
+ "configComplexity": "needs-dep",
179
+ "fileTypes": ["docx"],
180
+ "inputModes": ["file", "url", "buffer"],
181
+ "output": "text",
182
+ "docsPath": "/docs/extractors/file/docx",
183
+ "deps": { "mammoth": "^1.10.0" },
184
+ "devDeps": {},
185
+ "factory": "createFileDocxExtractor",
186
+ "assetProcessingFlagKeys": ["file_docx"]
187
+ },
188
+ {
189
+ "id": "file-pptx",
190
+ "extractorName": "file:pptx",
191
+ "group": "Files",
192
+ "label": "file-pptx",
193
+ "description": "Extract text from .pptx slides",
194
+ "defaultSelected": false,
195
+ "workerOnly": false,
196
+ "configComplexity": "needs-dep",
197
+ "fileTypes": ["pptx"],
198
+ "inputModes": ["file", "url", "buffer"],
199
+ "output": "text",
200
+ "docsPath": "/docs/extractors/file/pptx",
201
+ "deps": { "jszip": "^3.10.1" },
202
+ "devDeps": {},
203
+ "factory": "createFilePptxExtractor",
204
+ "assetProcessingFlagKeys": ["file_pptx"]
205
+ },
206
+ {
207
+ "id": "file-xlsx",
208
+ "extractorName": "file:xlsx",
209
+ "group": "Files",
210
+ "label": "file-xlsx",
211
+ "description": "Extract tables from .xlsx spreadsheets",
212
+ "defaultSelected": false,
213
+ "workerOnly": false,
214
+ "configComplexity": "needs-dep",
215
+ "fileTypes": ["xlsx"],
216
+ "inputModes": ["file", "url", "buffer"],
217
+ "output": "text (csv)",
218
+ "docsPath": "/docs/extractors/file/xlsx",
219
+ "deps": { "xlsx": "^0.18.5" },
220
+ "devDeps": {},
221
+ "factory": "createFileXlsxExtractor",
222
+ "assetProcessingFlagKeys": ["file_xlsx"]
223
+ }
224
+ ],
225
+ "connectors": [
226
+ {
227
+ "id": "notion",
228
+ "displayName": "Notion",
229
+ "types": ["docs", "wiki", "db"],
230
+ "description": "Sync pages, databases, and blocks from Notion workspaces",
231
+ "status": "available",
232
+ "docsPath": "/docs/connectors/notion",
233
+ "deps": { "@notionhq/client": "^2.2.16" },
234
+ "devDeps": {},
235
+ "envVars": [
236
+ { "name": "NOTION_TOKEN", "required": true, "notes": "Server-only Notion integration token." }
237
+ ]
238
+ },
239
+ {
240
+ "id": "google-drive",
241
+ "displayName": "Google Drive",
242
+ "types": ["files", "docs"],
243
+ "description": "Ingest Docs/Sheets exports and shared folders",
244
+ "status": "available",
245
+ "docsPath": "/docs/connectors/google-drive",
246
+ "deps": { "googleapis": "^148.0.0", "google-auth-library": "^10.0.0" },
247
+ "devDeps": {},
248
+ "envVars": [
249
+ {
250
+ "name": "GOOGLE_SERVICE_ACCOUNT_JSON",
251
+ "required": false,
252
+ "notes": "Service account JSON credentials (server-only)."
253
+ },
254
+ { "name": "GOOGLE_CLIENT_ID", "required": false, "notes": "OAuth client id (server-only)." },
255
+ {
256
+ "name": "GOOGLE_CLIENT_SECRET",
257
+ "required": false,
258
+ "notes": "OAuth client secret (server-only)."
259
+ },
260
+ { "name": "GOOGLE_REDIRECT_URI", "required": false, "notes": "OAuth redirect URI." }
261
+ ]
262
+ },
263
+ {
264
+ "id": "github",
265
+ "displayName": "GitHub",
266
+ "types": ["code", "docs"],
267
+ "description": "Ingest repositories (Markdown, READMEs, docs folders) and issues/PRs",
268
+ "status": "coming-soon",
269
+ "docsPath": null,
270
+ "deps": {},
271
+ "devDeps": {}
272
+ },
273
+ {
274
+ "id": "gitlab",
275
+ "displayName": "GitLab",
276
+ "types": ["code", "docs"],
277
+ "description": "Ingest repos + wiki pages for self-hosted documentation",
278
+ "status": "coming-soon",
279
+ "docsPath": null,
280
+ "deps": {},
281
+ "devDeps": {}
282
+ },
283
+ {
284
+ "id": "slack",
285
+ "displayName": "Slack",
286
+ "types": ["chat"],
287
+ "description": "Ingest channels (messages + threads) as searchable knowledge",
288
+ "status": "coming-soon",
289
+ "docsPath": null,
290
+ "deps": {},
291
+ "devDeps": {}
292
+ },
293
+ {
294
+ "id": "discord",
295
+ "displayName": "Discord",
296
+ "types": ["chat"],
297
+ "description": "Ingest server channels and threads for community support knowledge",
298
+ "status": "coming-soon",
299
+ "docsPath": null,
300
+ "deps": {},
301
+ "devDeps": {}
302
+ },
303
+ {
304
+ "id": "linear",
305
+ "displayName": "Linear",
306
+ "types": ["issues", "project"],
307
+ "description": "Ingest issues and project updates for internal knowledge",
308
+ "status": "coming-soon",
309
+ "docsPath": null,
310
+ "deps": {},
311
+ "devDeps": {}
312
+ },
313
+ {
314
+ "id": "dropbox",
315
+ "displayName": "Dropbox",
316
+ "types": ["files"],
317
+ "description": "Ingest shared folders and exported docs/files",
318
+ "status": "coming-soon",
319
+ "docsPath": null,
320
+ "deps": {},
321
+ "devDeps": {}
322
+ },
323
+ {
324
+ "id": "onedrive",
325
+ "displayName": "OneDrive",
326
+ "types": ["files"],
327
+ "description": "Ingest files and Office exports from Microsoft 365",
328
+ "status": "coming-soon",
329
+ "docsPath": null,
330
+ "deps": {},
331
+ "devDeps": {}
332
+ },
333
+ {
334
+ "id": "teams",
335
+ "displayName": "Microsoft Teams",
336
+ "types": ["chat"],
337
+ "description": "Ingest channels and conversations for internal support knowledge",
338
+ "status": "coming-soon",
339
+ "docsPath": null,
340
+ "deps": {},
341
+ "devDeps": {}
342
+ }
343
+ ]
344
+ }
345
+
346
+
@@ -113,7 +113,7 @@ export const createDrizzleVectorStore = (db: DrizzleDb): VectorStore => ({
113
113
 
114
114
  const vectorLiteral = `[${embedding.join(",")}]`;
115
115
 
116
- const rows = await db.execute(
116
+ const result = await db.execute(
117
117
  sql`
118
118
  select
119
119
  c.id,
@@ -133,6 +133,10 @@ export const createDrizzleVectorStore = (db: DrizzleDb): VectorStore => ({
133
133
  `
134
134
  );
135
135
 
136
+ const rows = Array.isArray(result)
137
+ ? result
138
+ : ((result as { rows?: unknown[] }).rows ?? []);
139
+
136
140
  return (rows as Array<Record<string, unknown>>).map((row) => ({
137
141
  id: String(row.id),
138
142
  documentId: String(row.document_id),
@@ -157,4 +161,3 @@ export const createDrizzleVectorStore = (db: DrizzleDb): VectorStore => ({
157
161
  },
158
162
  });
159
163
 
160
-