unrag 0.2.12 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli/index.js +31 -5
- package/package.json +2 -1
- package/registry/connectors/dropbox/client.ts +110 -0
- package/registry/connectors/dropbox/index.ts +6 -0
- package/registry/connectors/dropbox/sync.ts +429 -0
- package/registry/connectors/dropbox/types.ts +58 -0
- package/registry/connectors/google-drive/_api-types.ts +52 -2
- package/registry/connectors/google-drive/index.ts +3 -1
- package/registry/connectors/google-drive/sync.ts +419 -94
- package/registry/connectors/google-drive/types.ts +78 -33
- package/registry/connectors/notion/index.ts +2 -1
- package/registry/connectors/notion/sync.ts +112 -60
- package/registry/connectors/notion/types.ts +22 -30
- package/registry/connectors/onedrive/client.ts +181 -0
- package/registry/connectors/onedrive/index.ts +6 -0
- package/registry/connectors/onedrive/sync.ts +556 -0
- package/registry/connectors/onedrive/types.ts +93 -0
- package/registry/core/connectors.ts +161 -0
- package/registry/core/context-engine.ts +20 -0
- package/registry/core/index.ts +1 -0
- package/registry/manifest.json +49 -20
package/dist/cli/index.js
CHANGED
|
@@ -720,6 +720,10 @@ async function copyRegistryFiles(selection) {
|
|
|
720
720
|
src: path5.join(selection.registryRoot, "core/types.ts"),
|
|
721
721
|
dest: path5.join(installBaseAbs, "core/types.ts")
|
|
722
722
|
},
|
|
723
|
+
{
|
|
724
|
+
src: path5.join(selection.registryRoot, "core/connectors.ts"),
|
|
725
|
+
dest: path5.join(installBaseAbs, "core/connectors.ts")
|
|
726
|
+
},
|
|
723
727
|
{
|
|
724
728
|
src: path5.join(selection.registryRoot, "core/chunking.ts"),
|
|
725
729
|
dest: path5.join(installBaseAbs, "core/chunking.ts")
|
|
@@ -870,7 +874,7 @@ async function copyRegistryFiles(selection) {
|
|
|
870
874
|
});
|
|
871
875
|
if (isCancel(answer)) {
|
|
872
876
|
cancel("Cancelled.");
|
|
873
|
-
return;
|
|
877
|
+
return [];
|
|
874
878
|
}
|
|
875
879
|
if (!answer) {
|
|
876
880
|
continue;
|
|
@@ -912,7 +916,7 @@ async function copyConnectorFiles(selection) {
|
|
|
912
916
|
});
|
|
913
917
|
if (isCancel(answer)) {
|
|
914
918
|
cancel("Cancelled.");
|
|
915
|
-
return;
|
|
919
|
+
return [];
|
|
916
920
|
}
|
|
917
921
|
if (!answer) {
|
|
918
922
|
continue;
|
|
@@ -3815,6 +3819,7 @@ async function runInstallChecks(state) {
|
|
|
3815
3819
|
const coreExists = await exists(path12.join(installDirFull, "core"));
|
|
3816
3820
|
const storeExists = await exists(path12.join(installDirFull, "store"));
|
|
3817
3821
|
const embeddingExists = await exists(path12.join(installDirFull, "embedding"));
|
|
3822
|
+
const coreConnectorsExists = coreExists && await exists(path12.join(installDirFull, "core", "connectors.ts"));
|
|
3818
3823
|
const unragMdExists = await exists(path12.join(installDirFull, "unrag.md"));
|
|
3819
3824
|
const missingDirs = [];
|
|
3820
3825
|
if (!coreExists) {
|
|
@@ -3826,7 +3831,11 @@ async function runInstallChecks(state) {
|
|
|
3826
3831
|
if (!embeddingExists) {
|
|
3827
3832
|
missingDirs.push("embedding/");
|
|
3828
3833
|
}
|
|
3829
|
-
|
|
3834
|
+
const missingCoreFiles = [];
|
|
3835
|
+
if (!coreConnectorsExists) {
|
|
3836
|
+
missingCoreFiles.push("core/connectors.ts");
|
|
3837
|
+
}
|
|
3838
|
+
if (state.installDirExists && missingDirs.length === 0 && missingCoreFiles.length === 0) {
|
|
3830
3839
|
results.push({
|
|
3831
3840
|
id: "install-dir",
|
|
3832
3841
|
title: "Install directory",
|
|
@@ -3834,18 +3843,23 @@ async function runInstallChecks(state) {
|
|
|
3834
3843
|
summary: `Install directory found at ${state.installDir}`,
|
|
3835
3844
|
details: [
|
|
3836
3845
|
`core/: ${coreExists ? "✓" : "✗"}`,
|
|
3846
|
+
`core/connectors.ts: ${coreConnectorsExists ? "✓" : "✗"}`,
|
|
3837
3847
|
`store/: ${storeExists ? "✓" : "✗"}`,
|
|
3838
3848
|
`embedding/: ${embeddingExists ? "✓" : "✗"}`,
|
|
3839
3849
|
`unrag.md: ${unragMdExists ? "✓" : "✗"}`
|
|
3840
3850
|
]
|
|
3841
3851
|
});
|
|
3842
3852
|
} else if (state.installDirExists) {
|
|
3853
|
+
const missing = [
|
|
3854
|
+
...missingDirs,
|
|
3855
|
+
...missingCoreFiles
|
|
3856
|
+
].filter(Boolean);
|
|
3843
3857
|
results.push({
|
|
3844
3858
|
id: "install-dir",
|
|
3845
3859
|
title: "Install directory",
|
|
3846
3860
|
status: "warn",
|
|
3847
3861
|
summary: "Install directory exists but is incomplete.",
|
|
3848
|
-
details: [`Missing: ${
|
|
3862
|
+
details: [`Missing: ${missing.join(", ")}`],
|
|
3849
3863
|
fixHints: ["Run `unrag init` to reinstall missing files"]
|
|
3850
3864
|
});
|
|
3851
3865
|
} else {
|
|
@@ -4052,6 +4066,18 @@ async function checkConnectorEnvVars(connector) {
|
|
|
4052
4066
|
"GOOGLE_CLIENT_ID",
|
|
4053
4067
|
"GOOGLE_CLIENT_SECRET"
|
|
4054
4068
|
]
|
|
4069
|
+
},
|
|
4070
|
+
onedrive: {
|
|
4071
|
+
required: [],
|
|
4072
|
+
optional: [
|
|
4073
|
+
"AZURE_TENANT_ID",
|
|
4074
|
+
"AZURE_CLIENT_ID",
|
|
4075
|
+
"AZURE_CLIENT_SECRET"
|
|
4076
|
+
]
|
|
4077
|
+
},
|
|
4078
|
+
dropbox: {
|
|
4079
|
+
required: [],
|
|
4080
|
+
optional: ["DROPBOX_CLIENT_ID", "DROPBOX_CLIENT_SECRET"]
|
|
4055
4081
|
}
|
|
4056
4082
|
};
|
|
4057
4083
|
const envVars = connectorEnvVars[connector];
|
|
@@ -6144,7 +6170,7 @@ var __dirname4 = path18.dirname(__filename4);
|
|
|
6144
6170
|
var formatDepChanges3 = (changes) => changes.map((c) => `${c.name}${c.action === "update" ? " (update)" : ""}`).join(", ");
|
|
6145
6171
|
var isEmbeddingProviderName = (v) => v === "ai" || v === "openai" || v === "google" || v === "openrouter" || v === "azure" || v === "vertex" || v === "bedrock" || v === "cohere" || v === "mistral" || v === "together" || v === "ollama" || v === "voyage";
|
|
6146
6172
|
var isExtractorName = (v) => v === "pdf-llm" || v === "pdf-text-layer" || v === "pdf-ocr" || v === "image-ocr" || v === "image-caption-llm" || v === "audio-transcribe" || v === "video-transcribe" || v === "video-frames" || v === "file-text" || v === "file-docx" || v === "file-pptx" || v === "file-xlsx";
|
|
6147
|
-
var isConnectorName = (v) => v === "notion" || v === "google-drive";
|
|
6173
|
+
var isConnectorName = (v) => v === "notion" || v === "google-drive" || v === "onedrive" || v === "dropbox";
|
|
6148
6174
|
var isBatteryName = (v) => v === "reranker" || v === "eval" || v === "debug";
|
|
6149
6175
|
var toStringList = (xs) => Array.isArray(xs) ? xs.map((x) => String(x).trim()).filter(Boolean) : [];
|
|
6150
6176
|
var toExtractorNames = (xs) => toStringList(xs).filter(isExtractorName);
|
package/package.json
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
"bin": {
|
|
7
7
|
"unrag": "./dist/cli/index.js"
|
|
8
8
|
},
|
|
9
|
-
"version": "0.
|
|
9
|
+
"version": "0.3.1",
|
|
10
10
|
"private": false,
|
|
11
11
|
"license": "Apache-2.0",
|
|
12
12
|
"devDependencies": {
|
|
@@ -41,6 +41,7 @@
|
|
|
41
41
|
"scripts": {
|
|
42
42
|
"build": "bun build --target=node --format=esm --packages=external --outfile dist/cli/index.js cli/index.ts && bun build --target=node --format=esm --production --outfile dist/debug-tui/index.js registry/debug/tui/App.tsx",
|
|
43
43
|
"test": "bun test",
|
|
44
|
+
"types:check": "tsc -p tsconfig.json --noEmit",
|
|
44
45
|
"lint": "biome check .",
|
|
45
46
|
"unrag:doctor": "unrag doctor --config .unrag/doctor.json",
|
|
46
47
|
"unrag:doctor:db": "unrag doctor --config .unrag/doctor.json --db",
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
DropboxAccessTokenAuth,
|
|
3
|
+
DropboxAuth,
|
|
4
|
+
DropboxRefreshTokenAuth
|
|
5
|
+
} from '@registry/connectors/dropbox/types'
|
|
6
|
+
|
|
7
|
+
const API_BASE = 'https://api.dropboxapi.com/2'
|
|
8
|
+
const CONTENT_BASE = 'https://content.dropboxapi.com/2'
|
|
9
|
+
const TOKEN_URL = 'https://api.dropboxapi.com/oauth2/token'
|
|
10
|
+
|
|
11
|
+
const isAccessToken = (auth: DropboxAuth): auth is DropboxAccessTokenAuth =>
|
|
12
|
+
auth.kind === 'access_token'
|
|
13
|
+
|
|
14
|
+
const isRefreshToken = (auth: DropboxAuth): auth is DropboxRefreshTokenAuth =>
|
|
15
|
+
auth.kind === 'oauth_refresh_token'
|
|
16
|
+
|
|
17
|
+
const requestToken = async (params: Record<string, string>) => {
|
|
18
|
+
const res = await fetch(TOKEN_URL, {
|
|
19
|
+
method: 'POST',
|
|
20
|
+
headers: {'Content-Type': 'application/x-www-form-urlencoded'},
|
|
21
|
+
body: new URLSearchParams(params)
|
|
22
|
+
})
|
|
23
|
+
if (!res.ok) {
|
|
24
|
+
throw new Error(
|
|
25
|
+
`Dropbox token request failed (${res.status}): ${await res.text()}`
|
|
26
|
+
)
|
|
27
|
+
}
|
|
28
|
+
const data = (await res.json()) as Record<string, unknown>
|
|
29
|
+
const accessToken = String(data.access_token ?? '')
|
|
30
|
+
if (!accessToken) {
|
|
31
|
+
throw new Error('Dropbox token response missing access_token')
|
|
32
|
+
}
|
|
33
|
+
return accessToken
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export const getDropboxAccessToken = async (
|
|
37
|
+
auth: DropboxAuth
|
|
38
|
+
): Promise<string> => {
|
|
39
|
+
if (!auth || typeof auth !== 'object') {
|
|
40
|
+
throw new Error('Dropbox auth is required')
|
|
41
|
+
}
|
|
42
|
+
if (isAccessToken(auth)) {
|
|
43
|
+
if (!auth.accessToken) {
|
|
44
|
+
throw new Error('Dropbox access token is required')
|
|
45
|
+
}
|
|
46
|
+
return auth.accessToken
|
|
47
|
+
}
|
|
48
|
+
if (isRefreshToken(auth)) {
|
|
49
|
+
if (!auth.clientId || !auth.clientSecret || !auth.refreshToken) {
|
|
50
|
+
throw new Error(
|
|
51
|
+
'Dropbox refresh token auth requires clientId, clientSecret, and refreshToken'
|
|
52
|
+
)
|
|
53
|
+
}
|
|
54
|
+
return requestToken({
|
|
55
|
+
grant_type: 'refresh_token',
|
|
56
|
+
client_id: auth.clientId,
|
|
57
|
+
client_secret: auth.clientSecret,
|
|
58
|
+
refresh_token: auth.refreshToken
|
|
59
|
+
})
|
|
60
|
+
}
|
|
61
|
+
const kind =
|
|
62
|
+
typeof auth === 'object' && auth !== null
|
|
63
|
+
? (auth as {kind?: unknown}).kind
|
|
64
|
+
: undefined
|
|
65
|
+
throw new Error(`Unknown Dropbox auth kind: ${String(kind)}`)
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
export const dropboxApiFetch = async <T>(args: {
|
|
69
|
+
auth: DropboxAuth
|
|
70
|
+
path: string
|
|
71
|
+
body?: unknown
|
|
72
|
+
}) => {
|
|
73
|
+
const token = await getDropboxAccessToken(args.auth)
|
|
74
|
+
const res = await fetch(`${API_BASE}/${args.path.replace(/^\//, '')}`, {
|
|
75
|
+
method: 'POST',
|
|
76
|
+
headers: {
|
|
77
|
+
Authorization: `Bearer ${token}`,
|
|
78
|
+
'Content-Type': 'application/json'
|
|
79
|
+
},
|
|
80
|
+
body: JSON.stringify(args.body ?? {})
|
|
81
|
+
})
|
|
82
|
+
if (!res.ok) {
|
|
83
|
+
const text = await res.text()
|
|
84
|
+
throw new Error(`Dropbox API failed (${res.status}): ${text}`)
|
|
85
|
+
}
|
|
86
|
+
return (await res.json()) as T
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
export const dropboxDownload = async (args: {
|
|
90
|
+
auth: DropboxAuth
|
|
91
|
+
path: string
|
|
92
|
+
}) => {
|
|
93
|
+
const token = await getDropboxAccessToken(args.auth)
|
|
94
|
+
const res = await fetch(`${CONTENT_BASE}/files/download`, {
|
|
95
|
+
method: 'POST',
|
|
96
|
+
headers: {
|
|
97
|
+
Authorization: `Bearer ${token}`,
|
|
98
|
+
'Dropbox-API-Arg': JSON.stringify({path: args.path})
|
|
99
|
+
}
|
|
100
|
+
})
|
|
101
|
+
if (!res.ok) {
|
|
102
|
+
throw new Error(
|
|
103
|
+
`Dropbox download failed (${res.status}): ${await res.text()}`
|
|
104
|
+
)
|
|
105
|
+
}
|
|
106
|
+
return {
|
|
107
|
+
bytes: new Uint8Array(await res.arrayBuffer()),
|
|
108
|
+
contentType: res.headers.get('content-type') ?? undefined
|
|
109
|
+
}
|
|
110
|
+
}
|
|
@@ -0,0 +1,429 @@
|
|
|
1
|
+
import {
|
|
2
|
+
dropboxApiFetch,
|
|
3
|
+
dropboxDownload
|
|
4
|
+
} from '@registry/connectors/dropbox/client'
|
|
5
|
+
import type {
|
|
6
|
+
DropboxAuth,
|
|
7
|
+
DropboxCheckpoint,
|
|
8
|
+
StreamDropboxFilesInput,
|
|
9
|
+
StreamDropboxFolderInput
|
|
10
|
+
} from '@registry/connectors/dropbox/types'
|
|
11
|
+
import type {ConnectorStream} from '@registry/core/connectors'
|
|
12
|
+
import type {AssetInput, Metadata} from '@registry/core/types'
|
|
13
|
+
|
|
14
|
+
const DEFAULT_MAX_BYTES = 15 * 1024 * 1024 // 15MB
|
|
15
|
+
|
|
16
|
+
const joinPrefix = (prefix: string | undefined, rest: string) => {
|
|
17
|
+
const p = (prefix ?? '').trim()
|
|
18
|
+
if (!p) {
|
|
19
|
+
return rest
|
|
20
|
+
}
|
|
21
|
+
return p.endsWith(':') ? p + rest : `${p}:${rest}`
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
const buildSourceId = (prefix: string | undefined, pathLower: string) =>
|
|
25
|
+
joinPrefix(prefix, `dropbox:path:${pathLower}`)
|
|
26
|
+
|
|
27
|
+
const asMessage = (err: unknown): string => {
|
|
28
|
+
if (err instanceof Error) {
|
|
29
|
+
return err.message
|
|
30
|
+
}
|
|
31
|
+
try {
|
|
32
|
+
return typeof err === 'string' ? err : JSON.stringify(err)
|
|
33
|
+
} catch {
|
|
34
|
+
return String(err)
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
const isTextLike = (mediaType: string | undefined) => {
|
|
39
|
+
const mt = String(mediaType ?? '')
|
|
40
|
+
.trim()
|
|
41
|
+
.toLowerCase()
|
|
42
|
+
if (!mt) {
|
|
43
|
+
return false
|
|
44
|
+
}
|
|
45
|
+
return (
|
|
46
|
+
mt.startsWith('text/') ||
|
|
47
|
+
mt === 'application/json' ||
|
|
48
|
+
mt === 'application/xml' ||
|
|
49
|
+
mt === 'text/csv'
|
|
50
|
+
)
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
const assetKindFromMediaType = (
|
|
54
|
+
mediaType: string | undefined
|
|
55
|
+
): AssetInput['kind'] => {
|
|
56
|
+
const mt = String(mediaType ?? '')
|
|
57
|
+
.trim()
|
|
58
|
+
.toLowerCase()
|
|
59
|
+
if (mt === 'application/pdf') {
|
|
60
|
+
return 'pdf'
|
|
61
|
+
}
|
|
62
|
+
if (mt.startsWith('image/')) {
|
|
63
|
+
return 'image'
|
|
64
|
+
}
|
|
65
|
+
if (mt.startsWith('audio/')) {
|
|
66
|
+
return 'audio'
|
|
67
|
+
}
|
|
68
|
+
if (mt.startsWith('video/')) {
|
|
69
|
+
return 'video'
|
|
70
|
+
}
|
|
71
|
+
return 'file'
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
type DropboxEntry = {
|
|
75
|
+
'.tag': 'file' | 'folder' | 'deleted'
|
|
76
|
+
id?: string
|
|
77
|
+
name?: string
|
|
78
|
+
path_lower?: string
|
|
79
|
+
path_display?: string
|
|
80
|
+
size?: number
|
|
81
|
+
client_modified?: string
|
|
82
|
+
server_modified?: string
|
|
83
|
+
content_hash?: string
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
type DropboxListFolderResponse = {
|
|
87
|
+
entries?: DropboxEntry[]
|
|
88
|
+
cursor?: string
|
|
89
|
+
has_more?: boolean
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
type DropboxMetadataResponse = DropboxEntry
|
|
93
|
+
|
|
94
|
+
const resolvePath = (entry: DropboxEntry) =>
|
|
95
|
+
String(entry.path_lower ?? entry.path_display ?? '').trim()
|
|
96
|
+
|
|
97
|
+
const downloadFile = async (args: {
|
|
98
|
+
auth: DropboxAuth
|
|
99
|
+
path: string
|
|
100
|
+
maxBytesPerFile: number
|
|
101
|
+
}) => {
|
|
102
|
+
const {bytes, contentType} = await dropboxDownload({
|
|
103
|
+
auth: args.auth,
|
|
104
|
+
path: args.path
|
|
105
|
+
})
|
|
106
|
+
if (bytes.byteLength > args.maxBytesPerFile) {
|
|
107
|
+
return {skipped: true as const, reason: 'too_large'}
|
|
108
|
+
}
|
|
109
|
+
if (isTextLike(contentType)) {
|
|
110
|
+
const content = new TextDecoder('utf-8', {fatal: false}).decode(bytes)
|
|
111
|
+
return {skipped: false as const, content, assets: []}
|
|
112
|
+
}
|
|
113
|
+
const asset: AssetInput = {
|
|
114
|
+
assetId: args.path,
|
|
115
|
+
kind: assetKindFromMediaType(contentType),
|
|
116
|
+
data: {
|
|
117
|
+
kind: 'bytes',
|
|
118
|
+
bytes,
|
|
119
|
+
mediaType: contentType || 'application/octet-stream',
|
|
120
|
+
filename: args.path.split('/').pop()
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
return {skipped: false as const, content: '', assets: [asset]}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
const buildMetadata = (entry: DropboxEntry): Metadata => ({
|
|
127
|
+
connector: 'dropbox',
|
|
128
|
+
kind: 'file',
|
|
129
|
+
fileId: entry.id ?? '',
|
|
130
|
+
name: entry.name ?? '',
|
|
131
|
+
path: entry.path_lower ?? entry.path_display ?? '',
|
|
132
|
+
size: entry.size,
|
|
133
|
+
clientModified: entry.client_modified,
|
|
134
|
+
serverModified: entry.server_modified,
|
|
135
|
+
contentHash: entry.content_hash
|
|
136
|
+
})
|
|
137
|
+
|
|
138
|
+
const isNotFound = (err: unknown): boolean => {
|
|
139
|
+
const message =
|
|
140
|
+
typeof err === 'object' && err !== null
|
|
141
|
+
? String((err as {message?: unknown}).message ?? '')
|
|
142
|
+
: ''
|
|
143
|
+
return (
|
|
144
|
+
message.toLowerCase().includes('not_found') || message.includes('404')
|
|
145
|
+
)
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
/**
|
|
149
|
+
* Stream Dropbox folder changes (cursor-based).
|
|
150
|
+
*/
|
|
151
|
+
export async function* streamFolder(
|
|
152
|
+
input: StreamDropboxFolderInput
|
|
153
|
+
): ConnectorStream<DropboxCheckpoint> {
|
|
154
|
+
const options = input.options ?? {}
|
|
155
|
+
const maxBytesPerFile = options.maxBytesPerFile ?? DEFAULT_MAX_BYTES
|
|
156
|
+
const deleteOnRemoved = options.deleteOnRemoved ?? false
|
|
157
|
+
const recursive = options.recursive ?? true
|
|
158
|
+
const folderPath = String(input.folderPath ?? '').trim()
|
|
159
|
+
if (!folderPath) {
|
|
160
|
+
throw new Error('Dropbox folderPath is required')
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
let cursor = input.checkpoint?.cursor
|
|
164
|
+
let processed = 0
|
|
165
|
+
|
|
166
|
+
while (true) {
|
|
167
|
+
const data = cursor
|
|
168
|
+
? await dropboxApiFetch<DropboxListFolderResponse>({
|
|
169
|
+
auth: input.auth,
|
|
170
|
+
path: 'files/list_folder/continue',
|
|
171
|
+
body: {cursor}
|
|
172
|
+
})
|
|
173
|
+
: await dropboxApiFetch<DropboxListFolderResponse>({
|
|
174
|
+
auth: input.auth,
|
|
175
|
+
path: 'files/list_folder',
|
|
176
|
+
body: {
|
|
177
|
+
path: folderPath,
|
|
178
|
+
recursive,
|
|
179
|
+
include_deleted: true
|
|
180
|
+
}
|
|
181
|
+
})
|
|
182
|
+
|
|
183
|
+
const entries = data.entries ?? []
|
|
184
|
+
for (const entry of entries) {
|
|
185
|
+
processed += 1
|
|
186
|
+
const entryPath = resolvePath(entry)
|
|
187
|
+
const sourceId = entryPath
|
|
188
|
+
? buildSourceId(input.sourceIdPrefix, entryPath)
|
|
189
|
+
: ''
|
|
190
|
+
|
|
191
|
+
if (entry['.tag'] === 'deleted') {
|
|
192
|
+
if (deleteOnRemoved && sourceId) {
|
|
193
|
+
yield {type: 'delete', input: {sourceId}}
|
|
194
|
+
}
|
|
195
|
+
continue
|
|
196
|
+
}
|
|
197
|
+
if (entry['.tag'] === 'folder') {
|
|
198
|
+
continue
|
|
199
|
+
}
|
|
200
|
+
if (!entryPath) {
|
|
201
|
+
continue
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
yield {
|
|
205
|
+
type: 'progress',
|
|
206
|
+
message: 'file:start',
|
|
207
|
+
current: processed,
|
|
208
|
+
sourceId,
|
|
209
|
+
entityId: entry.id ?? entryPath
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
if (
|
|
213
|
+
Number.isFinite(entry.size) &&
|
|
214
|
+
(entry.size as number) > maxBytesPerFile
|
|
215
|
+
) {
|
|
216
|
+
yield {
|
|
217
|
+
type: 'warning',
|
|
218
|
+
code: 'file_skipped',
|
|
219
|
+
message: `Skipping file because it exceeds maxBytesPerFile (${maxBytesPerFile}).`,
|
|
220
|
+
data: {path: entryPath, sourceId, reason: 'too_large'}
|
|
221
|
+
}
|
|
222
|
+
continue
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
try {
|
|
226
|
+
const payload = await downloadFile({
|
|
227
|
+
auth: input.auth,
|
|
228
|
+
path: entryPath,
|
|
229
|
+
maxBytesPerFile
|
|
230
|
+
})
|
|
231
|
+
if (payload.skipped) {
|
|
232
|
+
yield {
|
|
233
|
+
type: 'warning',
|
|
234
|
+
code: 'file_skipped',
|
|
235
|
+
message: `Skipping file because it exceeds maxBytesPerFile (${maxBytesPerFile}).`,
|
|
236
|
+
data: {path: entryPath, sourceId, reason: 'too_large'}
|
|
237
|
+
}
|
|
238
|
+
continue
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
yield {
|
|
242
|
+
type: 'upsert',
|
|
243
|
+
input: {
|
|
244
|
+
sourceId,
|
|
245
|
+
content: payload.content,
|
|
246
|
+
assets: payload.assets,
|
|
247
|
+
metadata: buildMetadata(entry)
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
yield {
|
|
252
|
+
type: 'progress',
|
|
253
|
+
message: 'file:success',
|
|
254
|
+
current: processed,
|
|
255
|
+
sourceId,
|
|
256
|
+
entityId: entry.id ?? entryPath
|
|
257
|
+
}
|
|
258
|
+
} catch (err) {
|
|
259
|
+
if (isNotFound(err)) {
|
|
260
|
+
yield {
|
|
261
|
+
type: 'warning',
|
|
262
|
+
code: 'file_not_found',
|
|
263
|
+
message: 'Dropbox file not found or inaccessible.',
|
|
264
|
+
data: {path: entryPath, sourceId}
|
|
265
|
+
}
|
|
266
|
+
if (deleteOnRemoved) {
|
|
267
|
+
yield {type: 'delete', input: {sourceId}}
|
|
268
|
+
}
|
|
269
|
+
} else {
|
|
270
|
+
yield {
|
|
271
|
+
type: 'warning',
|
|
272
|
+
code: 'file_error',
|
|
273
|
+
message: asMessage(err),
|
|
274
|
+
data: {path: entryPath, sourceId}
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
if (data.cursor) {
|
|
281
|
+
yield {
|
|
282
|
+
type: 'checkpoint',
|
|
283
|
+
checkpoint: {
|
|
284
|
+
cursor: data.cursor,
|
|
285
|
+
path: folderPath
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
cursor = data.cursor ?? cursor
|
|
291
|
+
if (!data.has_more) {
|
|
292
|
+
break
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
/**
|
|
298
|
+
* Stream explicit Dropbox file IDs.
|
|
299
|
+
*/
|
|
300
|
+
export async function* streamFiles(
|
|
301
|
+
input: StreamDropboxFilesInput
|
|
302
|
+
): ConnectorStream<DropboxCheckpoint> {
|
|
303
|
+
const options = input.options ?? {}
|
|
304
|
+
const maxBytesPerFile = options.maxBytesPerFile ?? DEFAULT_MAX_BYTES
|
|
305
|
+
const fileIds = Array.isArray(input.fileIds) ? input.fileIds : []
|
|
306
|
+
const startIndex = Math.max(0, input.checkpoint?.index ?? 0)
|
|
307
|
+
|
|
308
|
+
let processed = 0
|
|
309
|
+
for (let i = startIndex; i < fileIds.length; i++) {
|
|
310
|
+
const fileId = String(fileIds[i] ?? '').trim()
|
|
311
|
+
if (!fileId) {
|
|
312
|
+
continue
|
|
313
|
+
}
|
|
314
|
+
processed += 1
|
|
315
|
+
|
|
316
|
+
yield {
|
|
317
|
+
type: 'progress',
|
|
318
|
+
message: 'file:start',
|
|
319
|
+
current: processed,
|
|
320
|
+
total: fileIds.length,
|
|
321
|
+
entityId: fileId
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
try {
|
|
325
|
+
const meta = await dropboxApiFetch<DropboxMetadataResponse>({
|
|
326
|
+
auth: input.auth,
|
|
327
|
+
path: 'files/get_metadata',
|
|
328
|
+
body: {path: fileId}
|
|
329
|
+
})
|
|
330
|
+
if (meta['.tag'] !== 'file') {
|
|
331
|
+
yield {
|
|
332
|
+
type: 'warning',
|
|
333
|
+
code: 'file_skipped',
|
|
334
|
+
message: 'Skipping non-file entry.',
|
|
335
|
+
data: {fileId}
|
|
336
|
+
}
|
|
337
|
+
continue
|
|
338
|
+
}
|
|
339
|
+
const entryPath = resolvePath(meta)
|
|
340
|
+
if (!entryPath) {
|
|
341
|
+
yield {
|
|
342
|
+
type: 'warning',
|
|
343
|
+
code: 'file_skipped',
|
|
344
|
+
message: 'Missing path for file.',
|
|
345
|
+
data: {fileId}
|
|
346
|
+
}
|
|
347
|
+
continue
|
|
348
|
+
}
|
|
349
|
+
const sourceId = buildSourceId(input.sourceIdPrefix, entryPath)
|
|
350
|
+
if (
|
|
351
|
+
Number.isFinite(meta.size) &&
|
|
352
|
+
(meta.size as number) > maxBytesPerFile
|
|
353
|
+
) {
|
|
354
|
+
yield {
|
|
355
|
+
type: 'warning',
|
|
356
|
+
code: 'file_skipped',
|
|
357
|
+
message: `Skipping file because it exceeds maxBytesPerFile (${maxBytesPerFile}).`,
|
|
358
|
+
data: {path: entryPath, sourceId, reason: 'too_large'}
|
|
359
|
+
}
|
|
360
|
+
continue
|
|
361
|
+
}
|
|
362
|
+
const payload = await downloadFile({
|
|
363
|
+
auth: input.auth,
|
|
364
|
+
path: entryPath,
|
|
365
|
+
maxBytesPerFile
|
|
366
|
+
})
|
|
367
|
+
if (payload.skipped) {
|
|
368
|
+
yield {
|
|
369
|
+
type: 'warning',
|
|
370
|
+
code: 'file_skipped',
|
|
371
|
+
message: `Skipping file because it exceeds maxBytesPerFile (${maxBytesPerFile}).`,
|
|
372
|
+
data: {path: entryPath, sourceId, reason: 'too_large'}
|
|
373
|
+
}
|
|
374
|
+
continue
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
yield {
|
|
378
|
+
type: 'upsert',
|
|
379
|
+
input: {
|
|
380
|
+
sourceId,
|
|
381
|
+
content: payload.content,
|
|
382
|
+
assets: payload.assets,
|
|
383
|
+
metadata: buildMetadata(meta)
|
|
384
|
+
}
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
yield {
|
|
388
|
+
type: 'progress',
|
|
389
|
+
message: 'file:success',
|
|
390
|
+
current: processed,
|
|
391
|
+
total: fileIds.length,
|
|
392
|
+
sourceId,
|
|
393
|
+
entityId: fileId
|
|
394
|
+
}
|
|
395
|
+
} catch (err) {
|
|
396
|
+
if (isNotFound(err)) {
|
|
397
|
+
yield {
|
|
398
|
+
type: 'warning',
|
|
399
|
+
code: 'file_not_found',
|
|
400
|
+
message: 'Dropbox file not found or inaccessible.',
|
|
401
|
+
data: {fileId}
|
|
402
|
+
}
|
|
403
|
+
} else {
|
|
404
|
+
yield {
|
|
405
|
+
type: 'warning',
|
|
406
|
+
code: 'file_error',
|
|
407
|
+
message: asMessage(err),
|
|
408
|
+
data: {fileId}
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
yield {
|
|
414
|
+
type: 'checkpoint',
|
|
415
|
+
checkpoint: {
|
|
416
|
+
index: i + 1,
|
|
417
|
+
fileId
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
/**
|
|
424
|
+
* Exported connector surface for Dropbox.
|
|
425
|
+
*/
|
|
426
|
+
export const dropboxConnector = {
|
|
427
|
+
streamFolder,
|
|
428
|
+
streamFiles
|
|
429
|
+
}
|