unrag 0.2.4 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +616 -174
- package/package.json +12 -6
- package/registry/config/unrag.config.ts +9 -8
- package/registry/connectors/google-drive/client.ts +171 -0
- package/registry/connectors/google-drive/index.ts +10 -0
- package/registry/connectors/google-drive/mime.ts +76 -0
- package/registry/connectors/google-drive/sync.ts +528 -0
- package/registry/connectors/google-drive/types.ts +127 -0
- package/registry/core/context-engine.ts +66 -2
- package/registry/core/types.ts +44 -2
- package/registry/docs/unrag.md +6 -1
- package/registry/embedding/_shared.ts +20 -0
- package/registry/embedding/ai.ts +6 -65
- package/registry/embedding/azure.ts +79 -0
- package/registry/embedding/bedrock.ts +79 -0
- package/registry/embedding/cohere.ts +79 -0
- package/registry/embedding/google.ts +93 -0
- package/registry/embedding/mistral.ts +62 -0
- package/registry/embedding/ollama.ts +75 -0
- package/registry/embedding/openai.ts +79 -0
- package/registry/embedding/openrouter.ts +85 -0
- package/registry/embedding/together.ts +67 -0
- package/registry/embedding/vertex.ts +102 -0
- package/registry/embedding/voyage.ts +159 -0
- package/registry/manifest.json +346 -0
- package/registry/store/drizzle-postgres-pgvector/store.ts +5 -2
package/package.json
CHANGED
|
@@ -6,18 +6,24 @@
|
|
|
6
6
|
"bin": {
|
|
7
7
|
"unrag": "./dist/cli/index.js"
|
|
8
8
|
},
|
|
9
|
-
"version": "0.2.
|
|
9
|
+
"version": "0.2.6",
|
|
10
10
|
"private": false,
|
|
11
11
|
"license": "Apache-2.0",
|
|
12
12
|
"devDependencies": {
|
|
13
|
+
"@ai-sdk/amazon-bedrock": "^3.0.72",
|
|
14
|
+
"@ai-sdk/cohere": "^3.0.1",
|
|
15
|
+
"@ai-sdk/google": "^3.0.1",
|
|
16
|
+
"@ai-sdk/openai": "^3.0.1",
|
|
17
|
+
"@openrouter/sdk": "^0.3.10",
|
|
18
|
+
"@prisma/client": "^6.0.0",
|
|
13
19
|
"@types/bun": "latest",
|
|
14
20
|
"@types/pg": "^8.16.0",
|
|
15
|
-
"
|
|
16
|
-
"prisma": "^6.0.0",
|
|
17
|
-
"drizzle-orm": "^0.45.1",
|
|
21
|
+
"ai": "^6.0.3",
|
|
18
22
|
"drizzle-kit": "^0.31.8",
|
|
19
|
-
"
|
|
20
|
-
"pg": "^8.16.3"
|
|
23
|
+
"drizzle-orm": "^0.45.1",
|
|
24
|
+
"pg": "^8.16.3",
|
|
25
|
+
"prisma": "^6.0.0",
|
|
26
|
+
"voyage-ai-provider": "^3.0.0"
|
|
21
27
|
},
|
|
22
28
|
"dependencies": {
|
|
23
29
|
"@clack/prompts": "^0.11.0",
|
|
@@ -18,19 +18,18 @@
|
|
|
18
18
|
export const unrag = defineUnragConfig({
|
|
19
19
|
defaults: {
|
|
20
20
|
chunking: {
|
|
21
|
-
chunkSize: 200,
|
|
22
|
-
chunkOverlap: 40,
|
|
21
|
+
chunkSize: 200, // __UNRAG_DEFAULT_chunkSize__
|
|
22
|
+
chunkOverlap: 40, // __UNRAG_DEFAULT_chunkOverlap__
|
|
23
23
|
},
|
|
24
24
|
retrieval: {
|
|
25
|
-
topK: 8,
|
|
25
|
+
topK: 8, // __UNRAG_DEFAULT_topK__
|
|
26
26
|
},
|
|
27
27
|
},
|
|
28
28
|
embedding: {
|
|
29
29
|
provider: "ai",
|
|
30
30
|
config: {
|
|
31
|
-
type: "text", // __UNRAG_EMBEDDING_TYPE__
|
|
32
31
|
model: "openai/text-embedding-3-small", // __UNRAG_EMBEDDING_MODEL__
|
|
33
|
-
timeoutMs: 15_000,
|
|
32
|
+
timeoutMs: 15_000, // __UNRAG_EMBEDDING_TIMEOUT__
|
|
34
33
|
},
|
|
35
34
|
},
|
|
36
35
|
engine: {
|
|
@@ -41,8 +40,8 @@ export const unrag = defineUnragConfig({
|
|
|
41
40
|
* - storeDocumentContent: whether the full original document text is stored in `documents.content`.
|
|
42
41
|
*/
|
|
43
42
|
storage: {
|
|
44
|
-
storeChunkContent: true,
|
|
45
|
-
storeDocumentContent: true,
|
|
43
|
+
storeChunkContent: true, // __UNRAG_STORAGE_storeChunkContent__
|
|
44
|
+
storeDocumentContent: true, // __UNRAG_STORAGE_storeDocumentContent__
|
|
46
45
|
},
|
|
47
46
|
/**
|
|
48
47
|
* Optional extractor modules that can process non-text assets into text outputs.
|
|
@@ -62,9 +61,10 @@ export const unrag = defineUnragConfig({
|
|
|
62
61
|
*
|
|
63
62
|
* Notes:
|
|
64
63
|
* - This generated config is cost-safe by default (all extraction is off).
|
|
65
|
-
* - `unrag init` can enable rich media
|
|
64
|
+
* - `unrag init --rich-media` can enable rich media ingestion for you (extractors + assetProcessing flags).
|
|
66
65
|
* - Tighten fetch allowlists/limits in production if you ingest URL-based assets.
|
|
67
66
|
*/
|
|
67
|
+
// __UNRAG_ASSET_PROCESSING_BLOCK_START__
|
|
68
68
|
assetProcessing: {
|
|
69
69
|
onUnsupportedAsset: "skip",
|
|
70
70
|
onError: "skip",
|
|
@@ -181,6 +181,7 @@ export const unrag = defineUnragConfig({
|
|
|
181
181
|
},
|
|
182
182
|
},
|
|
183
183
|
},
|
|
184
|
+
// __UNRAG_ASSET_PROCESSING_BLOCK_END__
|
|
184
185
|
},
|
|
185
186
|
} as const);
|
|
186
187
|
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
import type { GoogleDriveAuth } from "./types";
|
|
2
|
+
|
|
3
|
+
export const DEFAULT_DRIVE_SCOPES = [
|
|
4
|
+
"https://www.googleapis.com/auth/drive.readonly",
|
|
5
|
+
"https://www.googleapis.com/auth/drive.metadata.readonly",
|
|
6
|
+
] as const;
|
|
7
|
+
|
|
8
|
+
type NormalizedAuth =
|
|
9
|
+
| { kind: "oauth_client"; oauthClient: unknown }
|
|
10
|
+
| {
|
|
11
|
+
kind: "oauth_config";
|
|
12
|
+
clientId: string;
|
|
13
|
+
clientSecret: string;
|
|
14
|
+
redirectUri: string;
|
|
15
|
+
refreshToken: string;
|
|
16
|
+
accessToken?: string;
|
|
17
|
+
}
|
|
18
|
+
| {
|
|
19
|
+
kind: "service_account";
|
|
20
|
+
credentials: Record<string, any>;
|
|
21
|
+
subject?: string;
|
|
22
|
+
}
|
|
23
|
+
| { kind: "google_auth"; auth: unknown };
|
|
24
|
+
|
|
25
|
+
export function normalizeGoogleDriveAuth(auth: GoogleDriveAuth): NormalizedAuth {
|
|
26
|
+
if (!auth || typeof auth !== "object") {
|
|
27
|
+
throw new Error("Google Drive auth is required");
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
const kind = (auth as any).kind;
|
|
31
|
+
if (kind !== "oauth" && kind !== "service_account" && kind !== "google_auth") {
|
|
32
|
+
throw new Error(`Unknown Google Drive auth kind: ${String(kind)}`);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
if (kind === "google_auth") {
|
|
36
|
+
const a = (auth as any).auth;
|
|
37
|
+
if (!a) throw new Error('Google Drive auth.kind="google_auth" requires auth');
|
|
38
|
+
return { kind: "google_auth", auth: a };
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
if (kind === "service_account") {
|
|
42
|
+
const raw = (auth as any).credentialsJson;
|
|
43
|
+
if (!raw) {
|
|
44
|
+
throw new Error(
|
|
45
|
+
'Google Drive auth.kind="service_account" requires credentialsJson'
|
|
46
|
+
);
|
|
47
|
+
}
|
|
48
|
+
const credentials =
|
|
49
|
+
typeof raw === "string" ? (JSON.parse(raw) as Record<string, any>) : (raw as any);
|
|
50
|
+
if (!credentials?.client_email || !credentials?.private_key) {
|
|
51
|
+
throw new Error(
|
|
52
|
+
'Google Drive service account credentials must include "client_email" and "private_key".'
|
|
53
|
+
);
|
|
54
|
+
}
|
|
55
|
+
return {
|
|
56
|
+
kind: "service_account",
|
|
57
|
+
credentials,
|
|
58
|
+
subject: (auth as any).subject ? String((auth as any).subject) : undefined,
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// oauth
|
|
63
|
+
if ((auth as any).oauthClient) {
|
|
64
|
+
return { kind: "oauth_client", oauthClient: (auth as any).oauthClient };
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
const { clientId, clientSecret, redirectUri, refreshToken, accessToken } = auth as any;
|
|
68
|
+
if (!clientId || !clientSecret || !redirectUri || !refreshToken) {
|
|
69
|
+
throw new Error(
|
|
70
|
+
'Google Drive auth.kind="oauth" requires either oauthClient or { clientId, clientSecret, redirectUri, refreshToken }'
|
|
71
|
+
);
|
|
72
|
+
}
|
|
73
|
+
return {
|
|
74
|
+
kind: "oauth_config",
|
|
75
|
+
clientId: String(clientId),
|
|
76
|
+
clientSecret: String(clientSecret),
|
|
77
|
+
redirectUri: String(redirectUri),
|
|
78
|
+
refreshToken: String(refreshToken),
|
|
79
|
+
...(accessToken ? { accessToken: String(accessToken) } : {}),
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
const asMessage = (err: unknown) => {
|
|
84
|
+
if (err instanceof Error) return err.message;
|
|
85
|
+
try {
|
|
86
|
+
return typeof err === "string" ? err : JSON.stringify(err);
|
|
87
|
+
} catch {
|
|
88
|
+
return String(err);
|
|
89
|
+
}
|
|
90
|
+
};
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Creates a Google Drive API client from a plug-and-play auth input.
|
|
94
|
+
*
|
|
95
|
+
* Note: This uses dynamic imports so the core Unrag package does not require
|
|
96
|
+
* Google dependencies unless the connector is installed into a user project.
|
|
97
|
+
*/
|
|
98
|
+
export async function createGoogleDriveClient(args: {
|
|
99
|
+
auth: GoogleDriveAuth;
|
|
100
|
+
scopes?: string[];
|
|
101
|
+
}): Promise<{ drive: any; authClient: any }> {
|
|
102
|
+
const normalized = normalizeGoogleDriveAuth(args.auth);
|
|
103
|
+
const scopes = (args.scopes?.length ? args.scopes : DEFAULT_DRIVE_SCOPES) as string[];
|
|
104
|
+
|
|
105
|
+
let authClient: any;
|
|
106
|
+
|
|
107
|
+
try {
|
|
108
|
+
if (normalized.kind === "oauth_client") {
|
|
109
|
+
authClient = normalized.oauthClient;
|
|
110
|
+
} else if (normalized.kind === "google_auth") {
|
|
111
|
+
authClient = normalized.auth;
|
|
112
|
+
} else {
|
|
113
|
+
// google-auth-library (dynamic)
|
|
114
|
+
const gal: any = await import("google-auth-library");
|
|
115
|
+
|
|
116
|
+
if (normalized.kind === "oauth_config") {
|
|
117
|
+
const OAuth2Client = gal.OAuth2Client ?? gal.OAuth2;
|
|
118
|
+
if (!OAuth2Client) {
|
|
119
|
+
throw new Error("OAuth2Client not found in google-auth-library");
|
|
120
|
+
}
|
|
121
|
+
const client = new OAuth2Client(
|
|
122
|
+
normalized.clientId,
|
|
123
|
+
normalized.clientSecret,
|
|
124
|
+
normalized.redirectUri
|
|
125
|
+
);
|
|
126
|
+
client.setCredentials({
|
|
127
|
+
refresh_token: normalized.refreshToken,
|
|
128
|
+
...(normalized.accessToken ? { access_token: normalized.accessToken } : {}),
|
|
129
|
+
});
|
|
130
|
+
authClient = client;
|
|
131
|
+
} else {
|
|
132
|
+
const JWT = gal.JWT;
|
|
133
|
+
if (!JWT) {
|
|
134
|
+
throw new Error("JWT not found in google-auth-library");
|
|
135
|
+
}
|
|
136
|
+
const c = normalized.credentials;
|
|
137
|
+
authClient = new JWT({
|
|
138
|
+
email: c.client_email,
|
|
139
|
+
key: c.private_key,
|
|
140
|
+
scopes,
|
|
141
|
+
...(normalized.subject ? { subject: normalized.subject } : {}),
|
|
142
|
+
});
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
const { google }: any = await import("googleapis");
|
|
147
|
+
if (!google?.drive) {
|
|
148
|
+
throw new Error("googleapis.google.drive not found");
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
const drive = google.drive({
|
|
152
|
+
version: "v3",
|
|
153
|
+
auth: authClient,
|
|
154
|
+
});
|
|
155
|
+
|
|
156
|
+
return { drive, authClient };
|
|
157
|
+
} catch (err) {
|
|
158
|
+
const msg = asMessage(err);
|
|
159
|
+
if (
|
|
160
|
+
msg.includes("Cannot find module") &&
|
|
161
|
+
(msg.includes("googleapis") || msg.includes("google-auth-library"))
|
|
162
|
+
) {
|
|
163
|
+
throw new Error(
|
|
164
|
+
`Missing Google Drive connector dependencies. Ensure you've installed the connector via \`unrag add google-drive\` (which adds "googleapis" and "google-auth-library"). Original error: ${msg}`
|
|
165
|
+
);
|
|
166
|
+
}
|
|
167
|
+
throw err;
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import type { AssetKind } from "../../core/types";
|
|
2
|
+
|
|
3
|
+
export const DRIVE_MIME = {
|
|
4
|
+
folder: "application/vnd.google-apps.folder",
|
|
5
|
+
shortcut: "application/vnd.google-apps.shortcut",
|
|
6
|
+
doc: "application/vnd.google-apps.document",
|
|
7
|
+
sheet: "application/vnd.google-apps.spreadsheet",
|
|
8
|
+
slides: "application/vnd.google-apps.presentation",
|
|
9
|
+
drawing: "application/vnd.google-apps.drawing",
|
|
10
|
+
} as const;
|
|
11
|
+
|
|
12
|
+
export const EXPORT_MIME = {
|
|
13
|
+
text: "text/plain",
|
|
14
|
+
csv: "text/csv",
|
|
15
|
+
pptx:
|
|
16
|
+
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
|
17
|
+
png: "image/png",
|
|
18
|
+
} as const;
|
|
19
|
+
|
|
20
|
+
export type DriveGoogleNativeKind = "doc" | "sheet" | "slides" | "drawing";
|
|
21
|
+
|
|
22
|
+
export type DriveMimeClassification =
|
|
23
|
+
| { kind: "folder" }
|
|
24
|
+
| { kind: "shortcut" }
|
|
25
|
+
| { kind: "google_native"; nativeKind: DriveGoogleNativeKind }
|
|
26
|
+
| { kind: "binary" };
|
|
27
|
+
|
|
28
|
+
export function classifyDriveMimeType(mimeType: string | undefined): DriveMimeClassification {
|
|
29
|
+
const mt = String(mimeType ?? "").trim();
|
|
30
|
+
if (!mt) return { kind: "binary" };
|
|
31
|
+
|
|
32
|
+
if (mt === DRIVE_MIME.folder) return { kind: "folder" };
|
|
33
|
+
if (mt === DRIVE_MIME.shortcut) return { kind: "shortcut" };
|
|
34
|
+
|
|
35
|
+
if (mt === DRIVE_MIME.doc) return { kind: "google_native", nativeKind: "doc" };
|
|
36
|
+
if (mt === DRIVE_MIME.sheet) return { kind: "google_native", nativeKind: "sheet" };
|
|
37
|
+
if (mt === DRIVE_MIME.slides) return { kind: "google_native", nativeKind: "slides" };
|
|
38
|
+
if (mt === DRIVE_MIME.drawing) return { kind: "google_native", nativeKind: "drawing" };
|
|
39
|
+
|
|
40
|
+
return { kind: "binary" };
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export type DriveNativeExportPlan =
|
|
44
|
+
| { kind: "content"; mimeType: string }
|
|
45
|
+
| {
|
|
46
|
+
kind: "asset";
|
|
47
|
+
assetKind: AssetKind;
|
|
48
|
+
mimeType: string;
|
|
49
|
+
filenameExt?: string;
|
|
50
|
+
}
|
|
51
|
+
| { kind: "unsupported" };
|
|
52
|
+
|
|
53
|
+
/**
|
|
54
|
+
* Default behavior (Notion-like): Google-native files are exported to text-ish content.
|
|
55
|
+
* Drawings are exported as PNG image assets (no good text representation).
|
|
56
|
+
*/
|
|
57
|
+
export function getNativeExportPlan(nativeKind: DriveGoogleNativeKind): DriveNativeExportPlan {
|
|
58
|
+
if (nativeKind === "doc") return { kind: "content", mimeType: EXPORT_MIME.text };
|
|
59
|
+
if (nativeKind === "sheet") return { kind: "content", mimeType: EXPORT_MIME.csv };
|
|
60
|
+
if (nativeKind === "slides") return { kind: "content", mimeType: EXPORT_MIME.text };
|
|
61
|
+
if (nativeKind === "drawing") {
|
|
62
|
+
return { kind: "asset", assetKind: "image", mimeType: EXPORT_MIME.png, filenameExt: "png" };
|
|
63
|
+
}
|
|
64
|
+
return { kind: "unsupported" };
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export function assetKindFromMediaType(mediaType: string | undefined): AssetKind {
|
|
68
|
+
const mt = String(mediaType ?? "").trim().toLowerCase();
|
|
69
|
+
if (mt === "application/pdf") return "pdf";
|
|
70
|
+
if (mt.startsWith("image/")) return "image";
|
|
71
|
+
if (mt.startsWith("audio/")) return "audio";
|
|
72
|
+
if (mt.startsWith("video/")) return "video";
|
|
73
|
+
return "file";
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
|