@byted-las/contextlake-openclaw 1.0.3 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +45 -23
- package/dist/src/commands/cli.d.ts +1 -1
- package/dist/src/commands/cli.js +10 -14
- package/dist/src/commands/index.js +11 -4
- package/dist/src/commands/slashcmd.js +4 -9
- package/dist/src/commands/tools.d.ts +5 -0
- package/dist/src/commands/tools.js +180 -10
- package/dist/src/lib/actions/lance-tools.d.ts +13 -0
- package/dist/src/lib/actions/lance-tools.js +73 -0
- package/dist/src/lib/actions/las-tools.js +58 -0
- package/dist/src/lib/actions/profiler.d.ts +4 -3
- package/dist/src/lib/actions/profiler.js +156 -141
- package/dist/src/lib/actions/s3-tools.d.ts +21 -0
- package/dist/src/lib/actions/s3-tools.js +221 -0
- package/dist/src/skills/SKILL.md +14 -151
- package/dist/src/skills/las-data-profiler/SKILL.md +14 -151
- package/dist/src/utils/config.js +5 -4
- package/dist/src/utils/credentials.d.ts +4 -0
- package/openclaw.plugin.json +1 -1
- package/package.json +3 -1
- package/src/commands/cli.ts +10 -14
- package/src/commands/index.ts +16 -4
- package/src/commands/slashcmd.ts +4 -10
- package/src/commands/tools.ts +177 -12
- package/src/lib/actions/lance-tools.ts +58 -0
- package/src/lib/actions/las-tools.ts +56 -0
- package/src/lib/actions/profiler.ts +148 -157
- package/src/lib/actions/s3-tools.ts +203 -0
- package/src/skills/las-data-profiler/SKILL.md +14 -151
- package/src/utils/config.ts +5 -4
- package/src/utils/credentials.ts +6 -0
- package/src/lib/scripts/s3_catalog.py +0 -617
|
@@ -1,21 +1,20 @@
|
|
|
1
1
|
import * as path from 'path';
|
|
2
2
|
import * as fs from 'fs';
|
|
3
3
|
import * as os from 'os';
|
|
4
|
-
import {
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
// Types
|
|
8
|
-
// ---------------------------------------------------------------------------
|
|
4
|
+
import { listS3Objects } from './s3-tools';
|
|
5
|
+
import { writeLanceCatalog } from './lance-tools';
|
|
6
|
+
import * as mime from 'mime-types';
|
|
9
7
|
|
|
10
8
|
export interface ConnectParams {
|
|
11
9
|
datasource_name: string;
|
|
12
|
-
|
|
10
|
+
url: string;
|
|
11
|
+
vendor?: 'volcengine' | 'alibaba' | 'tencent' | 'aws' | 'local';
|
|
13
12
|
endpoint?: string;
|
|
14
13
|
access_key?: string;
|
|
15
14
|
secret_key?: string;
|
|
16
15
|
region?: string;
|
|
17
|
-
bucket
|
|
18
|
-
prefix
|
|
16
|
+
bucket?: string;
|
|
17
|
+
prefix?: string;
|
|
19
18
|
sample_rows?: number;
|
|
20
19
|
}
|
|
21
20
|
|
|
@@ -33,16 +32,7 @@ export interface ConnectResult {
|
|
|
33
32
|
error?: string;
|
|
34
33
|
}
|
|
35
34
|
|
|
36
|
-
// ---------------------------------------------------------------------------
|
|
37
|
-
// Constants
|
|
38
|
-
// ---------------------------------------------------------------------------
|
|
39
|
-
|
|
40
35
|
const BASE_DIR = path.join(os.homedir(), '.openclaw', 'contextlake', 'profiler');
|
|
41
|
-
const PYTHON_DEPS = ['boto3', 'lancedb', 'pyarrow', 'pandas', 'Pillow', 'mutagen', 'pymupdf'];
|
|
42
|
-
|
|
43
|
-
// ---------------------------------------------------------------------------
|
|
44
|
-
// Helpers
|
|
45
|
-
// ---------------------------------------------------------------------------
|
|
46
36
|
|
|
47
37
|
function getDataSourceDir(name: string): string {
|
|
48
38
|
return path.join(BASE_DIR, name);
|
|
@@ -52,10 +42,6 @@ function ensureDir(dir: string): void {
|
|
|
52
42
|
fs.mkdirSync(dir, { recursive: true });
|
|
53
43
|
}
|
|
54
44
|
|
|
55
|
-
/**
|
|
56
|
-
* Generate env.sh with all connection parameters for this datasource.
|
|
57
|
-
* This file can be sourced to re-run the profiler or for debugging.
|
|
58
|
-
*/
|
|
59
45
|
function writeEnvFile(dir: string, params: ConnectParams): string {
|
|
60
46
|
const envPath = path.join(dir, 'env.sh');
|
|
61
47
|
const lines: string[] = [
|
|
@@ -69,21 +55,11 @@ function writeEnvFile(dir: string, params: ConnectParams): string {
|
|
|
69
55
|
`export LAS_PREFIX="${params.prefix}"`,
|
|
70
56
|
];
|
|
71
57
|
|
|
72
|
-
if (params.endpoint) {
|
|
73
|
-
|
|
74
|
-
}
|
|
75
|
-
if (params.
|
|
76
|
-
|
|
77
|
-
}
|
|
78
|
-
if (params.secret_key) {
|
|
79
|
-
lines.push(`export LAS_SECRET_KEY="${params.secret_key}"`);
|
|
80
|
-
}
|
|
81
|
-
if (params.region) {
|
|
82
|
-
lines.push(`export LAS_REGION="${params.region}"`);
|
|
83
|
-
}
|
|
84
|
-
if (params.sample_rows) {
|
|
85
|
-
lines.push(`export LAS_SAMPLE_ROWS="${params.sample_rows}"`);
|
|
86
|
-
}
|
|
58
|
+
if (params.endpoint) lines.push(`export LAS_ENDPOINT="${params.endpoint}"`);
|
|
59
|
+
if (params.access_key) lines.push(`export LAS_ACCESS_KEY="${params.access_key}"`);
|
|
60
|
+
if (params.secret_key) lines.push(`export LAS_SECRET_KEY="${params.secret_key}"`);
|
|
61
|
+
if (params.region) lines.push(`export LAS_REGION="${params.region}"`);
|
|
62
|
+
if (params.sample_rows) lines.push(`export LAS_SAMPLE_ROWS="${params.sample_rows}"`);
|
|
87
63
|
|
|
88
64
|
lines.push(`export LAS_DB_PATH="${path.join(dir, 'catalog_db')}"`);
|
|
89
65
|
lines.push(`export LAS_DATASOURCE_NAME="${params.datasource_name}"`);
|
|
@@ -93,65 +69,78 @@ function writeEnvFile(dir: string, params: ConnectParams): string {
|
|
|
93
69
|
return envPath;
|
|
94
70
|
}
|
|
95
71
|
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
});
|
|
104
|
-
} catch {
|
|
105
|
-
console.log('[las-data-profiler] Installing Python dependencies...');
|
|
106
|
-
execSync(`pip3 install --user ${PYTHON_DEPS.join(' ')}`, {
|
|
107
|
-
stdio: 'inherit',
|
|
108
|
-
});
|
|
109
|
-
}
|
|
110
|
-
}
|
|
72
|
+
function classifyFile(ext: string): { category: string; mediaType: string } {
|
|
73
|
+
ext = ext.toLowerCase();
|
|
74
|
+
const STRUCTURED_EXTS = ['.json', '.jsonl', '.ndjson', '.csv', '.tsv', '.parquet', '.pq'];
|
|
75
|
+
const IMAGE_EXTS = ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.tiff', '.tif', '.svg', '.ico', '.heic', '.heif'];
|
|
76
|
+
const AUDIO_EXTS = ['.mp3', '.wav', '.flac', '.aac', '.ogg', '.m4a', '.wma', '.opus'];
|
|
77
|
+
const VIDEO_EXTS = ['.mp4', '.avi', '.mov', '.mkv', '.webm', '.wmv', '.flv', '.m4v', '.3gp'];
|
|
78
|
+
const PDF_EXTS = ['.pdf'];
|
|
111
79
|
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
80
|
+
if (STRUCTURED_EXTS.includes(ext)) return { category: 'structured', mediaType: '' };
|
|
81
|
+
if (IMAGE_EXTS.includes(ext)) return { category: 'non-structured', mediaType: 'image' };
|
|
82
|
+
if (AUDIO_EXTS.includes(ext)) return { category: 'non-structured', mediaType: 'audio' };
|
|
83
|
+
if (VIDEO_EXTS.includes(ext)) return { category: 'non-structured', mediaType: 'video' };
|
|
84
|
+
if (PDF_EXTS.includes(ext)) return { category: 'non-structured', mediaType: 'pdf' };
|
|
85
|
+
|
|
86
|
+
return { category: 'non-structured', mediaType: '' };
|
|
118
87
|
}
|
|
119
88
|
|
|
120
|
-
// ---------------------------------------------------------------------------
|
|
121
|
-
// Main Entry
|
|
122
|
-
// ---------------------------------------------------------------------------
|
|
123
|
-
|
|
124
89
|
export async function connectDataSource(
|
|
125
90
|
params: ConnectParams,
|
|
126
91
|
_ctx?: any
|
|
127
92
|
): Promise<ConnectResult> {
|
|
128
|
-
|
|
129
|
-
if (!params.
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
93
|
+
if (!params.datasource_name) throw new Error('datasource_name is required');
|
|
94
|
+
if (!params.url) throw new Error('url is required (e.g. tos://bucket/prefix)');
|
|
95
|
+
|
|
96
|
+
// Parse URL: tos://bucket/prefix
|
|
97
|
+
try {
|
|
98
|
+
if (params.url.startsWith('file://') || params.url.startsWith('/')) {
|
|
99
|
+
params.vendor = 'local';
|
|
100
|
+
const localPath = params.url.startsWith('file://') ? params.url.slice(7) : params.url;
|
|
101
|
+
params.bucket = localPath;
|
|
102
|
+
params.prefix = '.';
|
|
103
|
+
} else {
|
|
104
|
+
const parsedUrl = new URL(params.url);
|
|
105
|
+
const protocol = parsedUrl.protocol.replace(':', '');
|
|
106
|
+
|
|
107
|
+
if (['tos', 'oss', 'cos', 's3'].includes(protocol)) {
|
|
108
|
+
if (protocol === 'tos') params.vendor = 'volcengine';
|
|
109
|
+
else if (protocol === 'oss') params.vendor = 'alibaba';
|
|
110
|
+
else if (protocol === 'cos') params.vendor = 'tencent';
|
|
111
|
+
else if (protocol === 's3') params.vendor = 'aws';
|
|
112
|
+
|
|
113
|
+
params.bucket = parsedUrl.hostname;
|
|
114
|
+
params.prefix = parsedUrl.pathname.replace(/^\//, ''); // Remove leading slash
|
|
115
|
+
} else {
|
|
116
|
+
throw new Error(`Unsupported protocol: ${protocol}`);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
} catch (e: any) {
|
|
120
|
+
if (!params.vendor || !params.bucket || params.prefix === undefined) {
|
|
121
|
+
throw new Error(`Invalid url format: ${e.message}`);
|
|
122
|
+
}
|
|
140
123
|
}
|
|
141
124
|
|
|
142
|
-
// For non-local vendors, validate credentials
|
|
143
125
|
if (params.vendor !== 'local') {
|
|
144
|
-
if (!params.endpoint && params.vendor !== 'aws') {
|
|
145
|
-
|
|
126
|
+
if (!params.endpoint && params.vendor !== 'aws') throw new Error(`endpoint is required for vendor "${params.vendor}"`);
|
|
127
|
+
let ak = params.access_key;
|
|
128
|
+
let sk = params.secret_key;
|
|
129
|
+
|
|
130
|
+
if (!ak || !sk) {
|
|
131
|
+
try {
|
|
132
|
+
const { loadCredentials } = require('../../utils/credentials');
|
|
133
|
+
const creds = loadCredentials();
|
|
134
|
+
ak = ak || creds.ACCESS_KEY || creds.VOLCENGINE_ACCESS_KEY;
|
|
135
|
+
sk = sk || creds.SECRET_KEY || creds.VOLCENGINE_SECRET_KEY;
|
|
136
|
+
} catch(e) {
|
|
137
|
+
// ignore
|
|
138
|
+
}
|
|
146
139
|
}
|
|
147
|
-
|
|
148
|
-
const sk = params.secret_key || process.env.TOS_SECRET_KEY || process.env.S3_SECRET_KEY || process.env.AWS_SECRET_ACCESS_KEY;
|
|
140
|
+
|
|
149
141
|
if (!ak || !sk) {
|
|
150
|
-
throw new Error(
|
|
151
|
-
'access_key and secret_key are required (via params or env vars TOS_ACCESS_KEY/TOS_SECRET_KEY, S3_ACCESS_KEY/S3_SECRET_KEY, AWS_ACCESS_KEY_ID/AWS_SECRET_ACCESS_KEY)'
|
|
152
|
-
);
|
|
142
|
+
throw new Error('access_key and secret_key are required');
|
|
153
143
|
}
|
|
154
|
-
// Normalise into params so env.sh picks them up
|
|
155
144
|
params.access_key = ak;
|
|
156
145
|
params.secret_key = sk;
|
|
157
146
|
}
|
|
@@ -160,97 +149,99 @@ export async function connectDataSource(
|
|
|
160
149
|
const dbPath = path.join(dsDir, 'catalog_db');
|
|
161
150
|
|
|
162
151
|
ensureDir(dsDir);
|
|
163
|
-
|
|
164
|
-
// 1. Write env.sh
|
|
165
152
|
const envPath = writeEnvFile(dsDir, params);
|
|
166
153
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
'--prefix', params.prefix,
|
|
177
|
-
'--db-path', dbPath,
|
|
178
|
-
];
|
|
179
|
-
|
|
180
|
-
if (params.endpoint) {
|
|
181
|
-
args.push('--endpoint', params.endpoint);
|
|
182
|
-
}
|
|
183
|
-
if (params.access_key) {
|
|
184
|
-
args.push('--ak', params.access_key);
|
|
185
|
-
}
|
|
186
|
-
if (params.secret_key) {
|
|
187
|
-
args.push('--sk', params.secret_key);
|
|
188
|
-
}
|
|
189
|
-
if (params.region) {
|
|
190
|
-
args.push('--region', params.region);
|
|
191
|
-
}
|
|
192
|
-
if (params.sample_rows) {
|
|
193
|
-
args.push('--sample-rows', String(params.sample_rows));
|
|
194
|
-
}
|
|
154
|
+
try {
|
|
155
|
+
let isTruncated = true;
|
|
156
|
+
let continuationToken: string | undefined = undefined;
|
|
157
|
+
let total_files = 0;
|
|
158
|
+
let structured_files = 0;
|
|
159
|
+
let media_files = 0;
|
|
160
|
+
|
|
161
|
+
const allRecords: any[] = [];
|
|
162
|
+
const scan_ts = new Date().toISOString() + 'Z';
|
|
195
163
|
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
164
|
+
while (isTruncated) {
|
|
165
|
+
const response = await listS3Objects({
|
|
166
|
+
vendor: params.vendor as any,
|
|
167
|
+
bucket: params.bucket as string,
|
|
168
|
+
endpoint: params.endpoint,
|
|
169
|
+
access_key: params.access_key,
|
|
170
|
+
secret_key: params.secret_key,
|
|
171
|
+
region: params.region
|
|
172
|
+
}, params.prefix || '', 1000, continuationToken);
|
|
173
|
+
|
|
174
|
+
for (const obj of response.Contents) {
|
|
175
|
+
const key = obj.Key || '';
|
|
176
|
+
if (key.endsWith('/')) continue;
|
|
200
177
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
178
|
+
const name = path.basename(key);
|
|
179
|
+
const ext = path.extname(name).toLowerCase();
|
|
180
|
+
const mimeType = mime.lookup(name) || '';
|
|
181
|
+
const { category, mediaType } = classifyFile(ext);
|
|
182
|
+
const depth = (key.match(/\//g) || []).length;
|
|
183
|
+
const parentDir = key.includes('/') ? path.basename(path.dirname(key)) : '';
|
|
184
|
+
|
|
185
|
+
total_files++;
|
|
186
|
+
if (category === 'structured') structured_files++;
|
|
187
|
+
if (mediaType) media_files++;
|
|
205
188
|
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
189
|
+
allRecords.push({
|
|
190
|
+
file_path: key,
|
|
191
|
+
file_name: name,
|
|
192
|
+
extension: ext,
|
|
193
|
+
mime_type: mimeType,
|
|
194
|
+
category: category,
|
|
195
|
+
media_type: mediaType,
|
|
196
|
+
size_bytes: obj.Size || 0,
|
|
197
|
+
last_modified: obj.LastModified ? String(obj.LastModified) : '',
|
|
198
|
+
created_time: obj._created_time ? String(obj._created_time) : '',
|
|
199
|
+
etag: (obj.ETag || '').replace(/"/g, ''),
|
|
200
|
+
storage_class: obj.StorageClass || '',
|
|
201
|
+
is_multipart: (obj.ETag || '').includes('-'),
|
|
202
|
+
depth: depth,
|
|
203
|
+
parent_dir: parentDir,
|
|
204
|
+
vendor: params.vendor,
|
|
205
|
+
bucket: params.bucket,
|
|
206
|
+
has_schema: false,
|
|
207
|
+
has_media_meta: false,
|
|
208
|
+
scan_timestamp: scan_ts
|
|
209
|
+
});
|
|
210
|
+
}
|
|
209
211
|
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
212
|
+
isTruncated = response.IsTruncated || false;
|
|
213
|
+
continuationToken = response.NextContinuationToken;
|
|
214
|
+
}
|
|
213
215
|
|
|
214
|
-
|
|
215
|
-
if (code !== 0) {
|
|
216
|
-
resolve({
|
|
217
|
-
status: 'error',
|
|
218
|
-
datasource_name: params.datasource_name,
|
|
216
|
+
await writeLanceCatalog({
|
|
219
217
|
db_path: dbPath,
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
});
|
|
224
|
-
return;
|
|
225
|
-
}
|
|
218
|
+
table_name: 'file_catalog',
|
|
219
|
+
records: allRecords
|
|
220
|
+
});
|
|
226
221
|
|
|
227
|
-
|
|
228
|
-
try {
|
|
229
|
-
const jsonMatch = stdout.match(/\{[\s\S]*"summary"[\s\S]*\}/);
|
|
230
|
-
const result = jsonMatch ? JSON.parse(jsonMatch[0]) : {};
|
|
231
|
-
resolve({
|
|
222
|
+
return {
|
|
232
223
|
status: 'success',
|
|
233
224
|
datasource_name: params.datasource_name,
|
|
234
225
|
db_path: dbPath,
|
|
235
226
|
env_path: envPath,
|
|
236
|
-
tables: ['file_catalog'
|
|
237
|
-
summary:
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
}
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
227
|
+
tables: ['file_catalog'],
|
|
228
|
+
summary: {
|
|
229
|
+
total_files,
|
|
230
|
+
structured_files,
|
|
231
|
+
media_files
|
|
232
|
+
}
|
|
233
|
+
};
|
|
234
|
+
|
|
235
|
+
} catch (error: any) {
|
|
236
|
+
return {
|
|
237
|
+
status: 'error',
|
|
246
238
|
datasource_name: params.datasource_name,
|
|
247
239
|
db_path: dbPath,
|
|
248
240
|
env_path: envPath,
|
|
249
|
-
tables: [
|
|
250
|
-
|
|
251
|
-
}
|
|
252
|
-
|
|
253
|
-
});
|
|
241
|
+
tables: [],
|
|
242
|
+
error: error.message
|
|
243
|
+
};
|
|
244
|
+
}
|
|
254
245
|
}
|
|
255
246
|
|
|
256
247
|
export async function listDataSources(
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
import { S3Client, ListObjectsV2Command, GetObjectCommand } from '@aws-sdk/client-s3';
|
|
2
|
+
import { getSignedUrl } from '@aws-sdk/s3-request-presigner';
|
|
3
|
+
import * as fs from 'fs';
|
|
4
|
+
import * as path from 'path';
|
|
5
|
+
|
|
6
|
+
export interface S3Params {
|
|
7
|
+
url?: string;
|
|
8
|
+
vendor?: 'volcengine' | 'alibaba' | 'tencent' | 'aws' | 'local';
|
|
9
|
+
endpoint?: string;
|
|
10
|
+
access_key?: string;
|
|
11
|
+
secret_key?: string;
|
|
12
|
+
region?: string;
|
|
13
|
+
bucket?: string;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export function parseS3Url(params: S3Params): S3Params {
|
|
17
|
+
if (params.url) {
|
|
18
|
+
if (params.url.startsWith('file://') || params.url.startsWith('/')) {
|
|
19
|
+
params.vendor = 'local';
|
|
20
|
+
const localPath = params.url.startsWith('file://') ? params.url.slice(7) : params.url;
|
|
21
|
+
params.bucket = localPath;
|
|
22
|
+
} else {
|
|
23
|
+
const parsedUrl = new URL(params.url);
|
|
24
|
+
const protocol = parsedUrl.protocol.replace(':', '');
|
|
25
|
+
|
|
26
|
+
if (['tos', 'oss', 'cos', 's3'].includes(protocol)) {
|
|
27
|
+
if (protocol === 'tos') params.vendor = 'volcengine';
|
|
28
|
+
else if (protocol === 'oss') params.vendor = 'alibaba';
|
|
29
|
+
else if (protocol === 'cos') params.vendor = 'tencent';
|
|
30
|
+
else if (protocol === 's3') params.vendor = 'aws';
|
|
31
|
+
|
|
32
|
+
params.bucket = parsedUrl.hostname;
|
|
33
|
+
// Prefix is usually parsed separately or passed explicitly for listing
|
|
34
|
+
} else {
|
|
35
|
+
throw new Error(`Unsupported protocol: ${protocol}`);
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
if (!params.vendor || !params.bucket) {
|
|
41
|
+
throw new Error('Could not determine vendor or bucket. Please provide a valid url or vendor/bucket directly.');
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
return params;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
function createS3Client(params: S3Params): S3Client | null {
|
|
48
|
+
params = parseS3Url(params);
|
|
49
|
+
if (params.vendor === 'local') return null;
|
|
50
|
+
|
|
51
|
+
let endpoint = params.endpoint;
|
|
52
|
+
let region = params.region;
|
|
53
|
+
|
|
54
|
+
// Attempt to load credentials if not provided
|
|
55
|
+
let ak = params.access_key;
|
|
56
|
+
let sk = params.secret_key;
|
|
57
|
+
|
|
58
|
+
if (!ak || !sk || !region) {
|
|
59
|
+
try {
|
|
60
|
+
const { loadCredentials } = require('../../utils/credentials');
|
|
61
|
+
const creds = loadCredentials();
|
|
62
|
+
ak = ak || creds.ACCESS_KEY || creds.VOLCENGINE_ACCESS_KEY;
|
|
63
|
+
sk = sk || creds.SECRET_KEY || creds.VOLCENGINE_SECRET_KEY;
|
|
64
|
+
region = region || creds.REGION || creds.VOLCENGINE_REGION;
|
|
65
|
+
} catch (e) {
|
|
66
|
+
// ignore
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
ak = ak || '';
|
|
71
|
+
sk = sk || '';
|
|
72
|
+
|
|
73
|
+
if (params.vendor === 'volcengine' && !endpoint) {
|
|
74
|
+
endpoint = `https://tos-s3-${region || 'cn-beijing'}.volces.com`;
|
|
75
|
+
} else if (params.vendor === 'alibaba' && !endpoint) {
|
|
76
|
+
endpoint = `https://s3.oss-${region || 'cn-hangzhou'}.aliyuncs.com`;
|
|
77
|
+
} else if (params.vendor === 'tencent' && !endpoint) {
|
|
78
|
+
endpoint = `https://cos.${region || 'ap-beijing'}.myqcloud.com`;
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
return new S3Client({
|
|
82
|
+
region: region || 'us-east-1',
|
|
83
|
+
endpoint: endpoint,
|
|
84
|
+
credentials: {
|
|
85
|
+
accessKeyId: ak,
|
|
86
|
+
secretAccessKey: sk
|
|
87
|
+
},
|
|
88
|
+
forcePathStyle: false // usually false for virtual hosted style
|
|
89
|
+
});
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
export async function listS3Objects(params: S3Params, prefix: string, maxKeys: number = 1000, continuationToken?: string) {
|
|
93
|
+
params = parseS3Url(params);
|
|
94
|
+
if (params.vendor === 'local') {
|
|
95
|
+
const root = params.bucket as string;
|
|
96
|
+
const prefixPath = prefix && prefix !== '.' ? path.join(root, prefix) : root;
|
|
97
|
+
const files: any[] = [];
|
|
98
|
+
|
|
99
|
+
function walkSync(currentDirPath: string) {
|
|
100
|
+
if (!fs.existsSync(currentDirPath)) return;
|
|
101
|
+
const dirents = fs.readdirSync(currentDirPath, { withFileTypes: true });
|
|
102
|
+
for (const dirent of dirents) {
|
|
103
|
+
const res = path.resolve(currentDirPath, dirent.name);
|
|
104
|
+
if (dirent.isDirectory()) {
|
|
105
|
+
walkSync(res);
|
|
106
|
+
} else {
|
|
107
|
+
const stat = fs.statSync(res);
|
|
108
|
+
files.push({
|
|
109
|
+
Key: path.relative(root, res),
|
|
110
|
+
Size: stat.size,
|
|
111
|
+
LastModified: stat.mtime,
|
|
112
|
+
ETag: '',
|
|
113
|
+
StorageClass: 'LOCAL',
|
|
114
|
+
_created_time: stat.ctime
|
|
115
|
+
});
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
walkSync(prefixPath);
|
|
120
|
+
return {
|
|
121
|
+
Contents: files,
|
|
122
|
+
IsTruncated: false,
|
|
123
|
+
NextContinuationToken: undefined
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
const client = createS3Client(params);
|
|
128
|
+
if (!client) throw new Error('Failed to create S3 client');
|
|
129
|
+
|
|
130
|
+
const command = new ListObjectsV2Command({
|
|
131
|
+
Bucket: params.bucket,
|
|
132
|
+
Prefix: prefix,
|
|
133
|
+
MaxKeys: maxKeys,
|
|
134
|
+
ContinuationToken: continuationToken
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
const response = await client.send(command);
|
|
138
|
+
return {
|
|
139
|
+
Contents: response.Contents || [],
|
|
140
|
+
IsTruncated: response.IsTruncated,
|
|
141
|
+
NextContinuationToken: response.NextContinuationToken
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
export async function readS3Object(params: S3Params, key: string, maxBytes?: number): Promise<Buffer> {
|
|
146
|
+
params = parseS3Url(params);
|
|
147
|
+
if (params.vendor === 'local') {
|
|
148
|
+
const fullPath = path.join(params.bucket as string, key);
|
|
149
|
+
if (maxBytes) {
|
|
150
|
+
const fd = fs.openSync(fullPath, 'r');
|
|
151
|
+
const buffer = Buffer.alloc(maxBytes);
|
|
152
|
+
const bytesRead = fs.readSync(fd, buffer, 0, maxBytes, 0);
|
|
153
|
+
fs.closeSync(fd);
|
|
154
|
+
return buffer.subarray(0, bytesRead);
|
|
155
|
+
} else {
|
|
156
|
+
return fs.readFileSync(fullPath);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
const client = createS3Client(params);
|
|
161
|
+
if (!client) throw new Error('Failed to create S3 client');
|
|
162
|
+
|
|
163
|
+
const commandInput: any = {
|
|
164
|
+
Bucket: params.bucket,
|
|
165
|
+
Key: key
|
|
166
|
+
};
|
|
167
|
+
|
|
168
|
+
if (maxBytes) {
|
|
169
|
+
commandInput.Range = `bytes=0-${maxBytes - 1}`;
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
const command = new GetObjectCommand(commandInput);
|
|
173
|
+
const response = await client.send(command);
|
|
174
|
+
|
|
175
|
+
if (response.Body) {
|
|
176
|
+
// @ts-ignore
|
|
177
|
+
const chunks = [];
|
|
178
|
+
for await (const chunk of response.Body as any) {
|
|
179
|
+
chunks.push(chunk);
|
|
180
|
+
}
|
|
181
|
+
return Buffer.concat(chunks);
|
|
182
|
+
}
|
|
183
|
+
return Buffer.alloc(0);
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
export async function getPresignedUrl(params: S3Params, key: string, expiresIn: number = 3600): Promise<string> {
|
|
187
|
+
params = parseS3Url(params);
|
|
188
|
+
if (params.vendor === 'local') {
|
|
189
|
+
const fullPath = path.join(params.bucket as string, key);
|
|
190
|
+
return `file://${fullPath}`;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
const client = createS3Client(params);
|
|
194
|
+
if (!client) throw new Error('Failed to create S3 client');
|
|
195
|
+
|
|
196
|
+
const command = new GetObjectCommand({
|
|
197
|
+
Bucket: params.bucket,
|
|
198
|
+
Key: key
|
|
199
|
+
});
|
|
200
|
+
|
|
201
|
+
const signedUrl = await getSignedUrl(client, command, { expiresIn });
|
|
202
|
+
return signedUrl;
|
|
203
|
+
}
|