unrag 0.2.4 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -6,18 +6,24 @@
6
6
  "bin": {
7
7
  "unrag": "./dist/cli/index.js"
8
8
  },
9
- "version": "0.2.4",
9
+ "version": "0.2.6",
10
10
  "private": false,
11
11
  "license": "Apache-2.0",
12
12
  "devDependencies": {
13
+ "@ai-sdk/amazon-bedrock": "^3.0.72",
14
+ "@ai-sdk/cohere": "^3.0.1",
15
+ "@ai-sdk/google": "^3.0.1",
16
+ "@ai-sdk/openai": "^3.0.1",
17
+ "@openrouter/sdk": "^0.3.10",
18
+ "@prisma/client": "^6.0.0",
13
19
  "@types/bun": "latest",
14
20
  "@types/pg": "^8.16.0",
15
- "@prisma/client": "^6.0.0",
16
- "prisma": "^6.0.0",
17
- "drizzle-orm": "^0.45.1",
21
+ "ai": "^6.0.3",
18
22
  "drizzle-kit": "^0.31.8",
19
- "ai": "^5.0.113",
20
- "pg": "^8.16.3"
23
+ "drizzle-orm": "^0.45.1",
24
+ "pg": "^8.16.3",
25
+ "prisma": "^6.0.0",
26
+ "voyage-ai-provider": "^3.0.0"
21
27
  },
22
28
  "dependencies": {
23
29
  "@clack/prompts": "^0.11.0",
@@ -18,19 +18,18 @@
18
18
  export const unrag = defineUnragConfig({
19
19
  defaults: {
20
20
  chunking: {
21
- chunkSize: 200,
22
- chunkOverlap: 40,
21
+ chunkSize: 200, // __UNRAG_DEFAULT_chunkSize__
22
+ chunkOverlap: 40, // __UNRAG_DEFAULT_chunkOverlap__
23
23
  },
24
24
  retrieval: {
25
- topK: 8,
25
+ topK: 8, // __UNRAG_DEFAULT_topK__
26
26
  },
27
27
  },
28
28
  embedding: {
29
29
  provider: "ai",
30
30
  config: {
31
- type: "text", // __UNRAG_EMBEDDING_TYPE__
32
31
  model: "openai/text-embedding-3-small", // __UNRAG_EMBEDDING_MODEL__
33
- timeoutMs: 15_000,
32
+ timeoutMs: 15_000, // __UNRAG_EMBEDDING_TIMEOUT__
34
33
  },
35
34
  },
36
35
  engine: {
@@ -41,8 +40,8 @@ export const unrag = defineUnragConfig({
41
40
  * - storeDocumentContent: whether the full original document text is stored in `documents.content`.
42
41
  */
43
42
  storage: {
44
- storeChunkContent: true,
45
- storeDocumentContent: true,
43
+ storeChunkContent: true, // __UNRAG_STORAGE_storeChunkContent__
44
+ storeDocumentContent: true, // __UNRAG_STORAGE_storeDocumentContent__
46
45
  },
47
46
  /**
48
47
  * Optional extractor modules that can process non-text assets into text outputs.
@@ -62,9 +61,10 @@ export const unrag = defineUnragConfig({
62
61
  *
63
62
  * Notes:
64
63
  * - This generated config is cost-safe by default (all extraction is off).
65
- * - `unrag init` can enable rich media + multimodal embeddings for you.
64
+ * - `unrag init --rich-media` can enable rich media ingestion for you (extractors + assetProcessing flags).
66
65
  * - Tighten fetch allowlists/limits in production if you ingest URL-based assets.
67
66
  */
67
+ // __UNRAG_ASSET_PROCESSING_BLOCK_START__
68
68
  assetProcessing: {
69
69
  onUnsupportedAsset: "skip",
70
70
  onError: "skip",
@@ -181,6 +181,7 @@ export const unrag = defineUnragConfig({
181
181
  },
182
182
  },
183
183
  },
184
+ // __UNRAG_ASSET_PROCESSING_BLOCK_END__
184
185
  },
185
186
  } as const);
186
187
 
@@ -0,0 +1,171 @@
1
+ import type { GoogleDriveAuth } from "./types";
2
+
3
+ export const DEFAULT_DRIVE_SCOPES = [
4
+ "https://www.googleapis.com/auth/drive.readonly",
5
+ "https://www.googleapis.com/auth/drive.metadata.readonly",
6
+ ] as const;
7
+
8
+ type NormalizedAuth =
9
+ | { kind: "oauth_client"; oauthClient: unknown }
10
+ | {
11
+ kind: "oauth_config";
12
+ clientId: string;
13
+ clientSecret: string;
14
+ redirectUri: string;
15
+ refreshToken: string;
16
+ accessToken?: string;
17
+ }
18
+ | {
19
+ kind: "service_account";
20
+ credentials: Record<string, any>;
21
+ subject?: string;
22
+ }
23
+ | { kind: "google_auth"; auth: unknown };
24
+
25
+ export function normalizeGoogleDriveAuth(auth: GoogleDriveAuth): NormalizedAuth {
26
+ if (!auth || typeof auth !== "object") {
27
+ throw new Error("Google Drive auth is required");
28
+ }
29
+
30
+ const kind = (auth as any).kind;
31
+ if (kind !== "oauth" && kind !== "service_account" && kind !== "google_auth") {
32
+ throw new Error(`Unknown Google Drive auth kind: ${String(kind)}`);
33
+ }
34
+
35
+ if (kind === "google_auth") {
36
+ const a = (auth as any).auth;
37
+ if (!a) throw new Error('Google Drive auth.kind="google_auth" requires auth');
38
+ return { kind: "google_auth", auth: a };
39
+ }
40
+
41
+ if (kind === "service_account") {
42
+ const raw = (auth as any).credentialsJson;
43
+ if (!raw) {
44
+ throw new Error(
45
+ 'Google Drive auth.kind="service_account" requires credentialsJson'
46
+ );
47
+ }
48
+ const credentials =
49
+ typeof raw === "string" ? (JSON.parse(raw) as Record<string, any>) : (raw as any);
50
+ if (!credentials?.client_email || !credentials?.private_key) {
51
+ throw new Error(
52
+ 'Google Drive service account credentials must include "client_email" and "private_key".'
53
+ );
54
+ }
55
+ return {
56
+ kind: "service_account",
57
+ credentials,
58
+ subject: (auth as any).subject ? String((auth as any).subject) : undefined,
59
+ };
60
+ }
61
+
62
+ // oauth
63
+ if ((auth as any).oauthClient) {
64
+ return { kind: "oauth_client", oauthClient: (auth as any).oauthClient };
65
+ }
66
+
67
+ const { clientId, clientSecret, redirectUri, refreshToken, accessToken } = auth as any;
68
+ if (!clientId || !clientSecret || !redirectUri || !refreshToken) {
69
+ throw new Error(
70
+ 'Google Drive auth.kind="oauth" requires either oauthClient or { clientId, clientSecret, redirectUri, refreshToken }'
71
+ );
72
+ }
73
+ return {
74
+ kind: "oauth_config",
75
+ clientId: String(clientId),
76
+ clientSecret: String(clientSecret),
77
+ redirectUri: String(redirectUri),
78
+ refreshToken: String(refreshToken),
79
+ ...(accessToken ? { accessToken: String(accessToken) } : {}),
80
+ };
81
+ }
82
+
83
+ const asMessage = (err: unknown) => {
84
+ if (err instanceof Error) return err.message;
85
+ try {
86
+ return typeof err === "string" ? err : JSON.stringify(err);
87
+ } catch {
88
+ return String(err);
89
+ }
90
+ };
91
+
92
+ /**
93
+ * Creates a Google Drive API client from a plug-and-play auth input.
94
+ *
95
+ * Note: This uses dynamic imports so the core Unrag package does not require
96
+ * Google dependencies unless the connector is installed into a user project.
97
+ */
98
+ export async function createGoogleDriveClient(args: {
99
+ auth: GoogleDriveAuth;
100
+ scopes?: string[];
101
+ }): Promise<{ drive: any; authClient: any }> {
102
+ const normalized = normalizeGoogleDriveAuth(args.auth);
103
+ const scopes = (args.scopes?.length ? args.scopes : DEFAULT_DRIVE_SCOPES) as string[];
104
+
105
+ let authClient: any;
106
+
107
+ try {
108
+ if (normalized.kind === "oauth_client") {
109
+ authClient = normalized.oauthClient;
110
+ } else if (normalized.kind === "google_auth") {
111
+ authClient = normalized.auth;
112
+ } else {
113
+ // google-auth-library (dynamic)
114
+ const gal: any = await import("google-auth-library");
115
+
116
+ if (normalized.kind === "oauth_config") {
117
+ const OAuth2Client = gal.OAuth2Client ?? gal.OAuth2;
118
+ if (!OAuth2Client) {
119
+ throw new Error("OAuth2Client not found in google-auth-library");
120
+ }
121
+ const client = new OAuth2Client(
122
+ normalized.clientId,
123
+ normalized.clientSecret,
124
+ normalized.redirectUri
125
+ );
126
+ client.setCredentials({
127
+ refresh_token: normalized.refreshToken,
128
+ ...(normalized.accessToken ? { access_token: normalized.accessToken } : {}),
129
+ });
130
+ authClient = client;
131
+ } else {
132
+ const JWT = gal.JWT;
133
+ if (!JWT) {
134
+ throw new Error("JWT not found in google-auth-library");
135
+ }
136
+ const c = normalized.credentials;
137
+ authClient = new JWT({
138
+ email: c.client_email,
139
+ key: c.private_key,
140
+ scopes,
141
+ ...(normalized.subject ? { subject: normalized.subject } : {}),
142
+ });
143
+ }
144
+ }
145
+
146
+ const { google }: any = await import("googleapis");
147
+ if (!google?.drive) {
148
+ throw new Error("googleapis.google.drive not found");
149
+ }
150
+
151
+ const drive = google.drive({
152
+ version: "v3",
153
+ auth: authClient,
154
+ });
155
+
156
+ return { drive, authClient };
157
+ } catch (err) {
158
+ const msg = asMessage(err);
159
+ if (
160
+ msg.includes("Cannot find module") &&
161
+ (msg.includes("googleapis") || msg.includes("google-auth-library"))
162
+ ) {
163
+ throw new Error(
164
+ `Missing Google Drive connector dependencies. Ensure you've installed the connector via \`unrag add google-drive\` (which adds "googleapis" and "google-auth-library"). Original error: ${msg}`
165
+ );
166
+ }
167
+ throw err;
168
+ }
169
+ }
170
+
171
+
@@ -0,0 +1,10 @@
1
+ export { createGoogleDriveClient } from "./client";
2
+ export {
3
+ loadGoogleDriveFileDocument,
4
+ syncGoogleDriveFiles,
5
+ buildGoogleDriveFileIngestInput,
6
+ } from "./sync";
7
+ export * from "./types";
8
+ export * from "./mime";
9
+
10
+
@@ -0,0 +1,76 @@
1
+ import type { AssetKind } from "../../core/types";
2
+
3
+ export const DRIVE_MIME = {
4
+ folder: "application/vnd.google-apps.folder",
5
+ shortcut: "application/vnd.google-apps.shortcut",
6
+ doc: "application/vnd.google-apps.document",
7
+ sheet: "application/vnd.google-apps.spreadsheet",
8
+ slides: "application/vnd.google-apps.presentation",
9
+ drawing: "application/vnd.google-apps.drawing",
10
+ } as const;
11
+
12
+ export const EXPORT_MIME = {
13
+ text: "text/plain",
14
+ csv: "text/csv",
15
+ pptx:
16
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation",
17
+ png: "image/png",
18
+ } as const;
19
+
20
+ export type DriveGoogleNativeKind = "doc" | "sheet" | "slides" | "drawing";
21
+
22
+ export type DriveMimeClassification =
23
+ | { kind: "folder" }
24
+ | { kind: "shortcut" }
25
+ | { kind: "google_native"; nativeKind: DriveGoogleNativeKind }
26
+ | { kind: "binary" };
27
+
28
+ export function classifyDriveMimeType(mimeType: string | undefined): DriveMimeClassification {
29
+ const mt = String(mimeType ?? "").trim();
30
+ if (!mt) return { kind: "binary" };
31
+
32
+ if (mt === DRIVE_MIME.folder) return { kind: "folder" };
33
+ if (mt === DRIVE_MIME.shortcut) return { kind: "shortcut" };
34
+
35
+ if (mt === DRIVE_MIME.doc) return { kind: "google_native", nativeKind: "doc" };
36
+ if (mt === DRIVE_MIME.sheet) return { kind: "google_native", nativeKind: "sheet" };
37
+ if (mt === DRIVE_MIME.slides) return { kind: "google_native", nativeKind: "slides" };
38
+ if (mt === DRIVE_MIME.drawing) return { kind: "google_native", nativeKind: "drawing" };
39
+
40
+ return { kind: "binary" };
41
+ }
42
+
43
+ export type DriveNativeExportPlan =
44
+ | { kind: "content"; mimeType: string }
45
+ | {
46
+ kind: "asset";
47
+ assetKind: AssetKind;
48
+ mimeType: string;
49
+ filenameExt?: string;
50
+ }
51
+ | { kind: "unsupported" };
52
+
53
+ /**
54
+ * Default behavior (Notion-like): Google-native files are exported to text-ish content.
55
+ * Drawings are exported as PNG image assets (no good text representation).
56
+ */
57
+ export function getNativeExportPlan(nativeKind: DriveGoogleNativeKind): DriveNativeExportPlan {
58
+ if (nativeKind === "doc") return { kind: "content", mimeType: EXPORT_MIME.text };
59
+ if (nativeKind === "sheet") return { kind: "content", mimeType: EXPORT_MIME.csv };
60
+ if (nativeKind === "slides") return { kind: "content", mimeType: EXPORT_MIME.text };
61
+ if (nativeKind === "drawing") {
62
+ return { kind: "asset", assetKind: "image", mimeType: EXPORT_MIME.png, filenameExt: "png" };
63
+ }
64
+ return { kind: "unsupported" };
65
+ }
66
+
67
+ export function assetKindFromMediaType(mediaType: string | undefined): AssetKind {
68
+ const mt = String(mediaType ?? "").trim().toLowerCase();
69
+ if (mt === "application/pdf") return "pdf";
70
+ if (mt.startsWith("image/")) return "image";
71
+ if (mt.startsWith("audio/")) return "audio";
72
+ if (mt.startsWith("video/")) return "video";
73
+ return "file";
74
+ }
75
+
76
+