memorylake-openclaw 1.0.2-beta.4 → 1.0.2-beta.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.ts +125 -28
- package/package.json +5 -2
- package/skills/memorylake-upload/SKILL.md +24 -5
- package/skills/memorylake-upload/scripts/upload.mjs +251 -4
package/index.ts
CHANGED
|
@@ -5,9 +5,9 @@
|
|
|
5
5
|
*
|
|
6
6
|
* Features:
|
|
7
7
|
* - 9 tools: memory_search, memory_list, memory_store, memory_get, memory_forget, document_search, document_download, advanced_web_search, open_data_search
|
|
8
|
-
* - Auto-recall: injects
|
|
8
|
+
* - Auto-recall: injects relevant memories and document excerpts before each agent turn
|
|
9
9
|
* - Auto-capture: stores key facts scoped to the current session after each agent turn
|
|
10
|
-
* - CLI: openclaw memorylake search, openclaw memorylake stats
|
|
10
|
+
* - CLI: openclaw memorylake search, openclaw memorylake stats, openclaw memorylake upload
|
|
11
11
|
*/
|
|
12
12
|
|
|
13
13
|
import fs from "node:fs";
|
|
@@ -15,6 +15,7 @@ import fsPromises from "node:fs/promises";
|
|
|
15
15
|
import os from "node:os";
|
|
16
16
|
import path from "node:path";
|
|
17
17
|
import got from "got";
|
|
18
|
+
import { pipeline } from "node:stream/promises";
|
|
18
19
|
import { Type } from "@sinclair/typebox";
|
|
19
20
|
import type { OpenClawPluginApi } from "openclaw/plugin-sdk";
|
|
20
21
|
import { loadCoreAgentDeps } from "./core-bridge";
|
|
@@ -780,6 +781,15 @@ const memoryLakeConfigSchema = {
|
|
|
780
781
|
// Plugin Definition
|
|
781
782
|
// ============================================================================
|
|
782
783
|
|
|
784
|
+
/** Shared type for the upload / uploadAuto function signature */
|
|
785
|
+
type UploadFn = (opts: {
|
|
786
|
+
host: string;
|
|
787
|
+
apiKey: string;
|
|
788
|
+
projectId: string;
|
|
789
|
+
filePath: string;
|
|
790
|
+
fileName: string;
|
|
791
|
+
}) => Promise<unknown>;
|
|
792
|
+
|
|
783
793
|
const memoryPlugin = {
|
|
784
794
|
id: "memorylake-openclaw",
|
|
785
795
|
name: "Memory (MemoryLake)",
|
|
@@ -994,23 +1004,6 @@ const memoryPlugin = {
|
|
|
994
1004
|
content: r.content,
|
|
995
1005
|
created_at: r.created_at,
|
|
996
1006
|
}));
|
|
997
|
-
|
|
998
|
-
// Check for unresolved conflicts among returned memories
|
|
999
|
-
const conflictMemoryIds = results
|
|
1000
|
-
.filter((r) => r.has_unresolved_conflict)
|
|
1001
|
-
.map((r) => r.id);
|
|
1002
|
-
if (conflictMemoryIds.length > 0) {
|
|
1003
|
-
try {
|
|
1004
|
-
const effectiveUserId = userId ?? effectiveCfg.userId;
|
|
1005
|
-
const conflicts = await effectiveProvider.listConflicts(conflictMemoryIds, effectiveUserId);
|
|
1006
|
-
if (conflicts.length > 0) {
|
|
1007
|
-
const conflictText = buildConflictContext(conflicts);
|
|
1008
|
-
sections.push(`## Memory Conflicts\nThe following memories have unresolved conflicts. Review and help the user resolve them if relevant:\n\n${conflictText}`);
|
|
1009
|
-
}
|
|
1010
|
-
} catch (err) {
|
|
1011
|
-
sections.push(`## Memory Conflicts\nFailed to fetch conflicts: ${String(err)}`);
|
|
1012
|
-
}
|
|
1013
|
-
}
|
|
1014
1007
|
} else if (memoryResult.status === "rejected") {
|
|
1015
1008
|
sections.push(`## Memories\nMemory search failed: ${String(memoryResult.reason)}`);
|
|
1016
1009
|
}
|
|
@@ -1028,7 +1021,7 @@ const memoryPlugin = {
|
|
|
1028
1021
|
content: [
|
|
1029
1022
|
{ type: "text", text: "No relevant memories or documents found." },
|
|
1030
1023
|
],
|
|
1031
|
-
details: {
|
|
1024
|
+
details: { count: 0 },
|
|
1032
1025
|
};
|
|
1033
1026
|
}
|
|
1034
1027
|
|
|
@@ -1337,35 +1330,76 @@ const memoryPlugin = {
|
|
|
1337
1330
|
{ name: "document_search" },
|
|
1338
1331
|
);
|
|
1339
1332
|
|
|
1333
|
+
/**
|
|
1334
|
+
* Try to extract a usable filename from a pre-signed download URL.
|
|
1335
|
+
* Returns null if the URL doesn't contain a recognizable filename.
|
|
1336
|
+
*/
|
|
1337
|
+
function fileNameFromUrl(urlStr: string): string | null {
|
|
1338
|
+
try {
|
|
1339
|
+
const p = new URL(urlStr).pathname;
|
|
1340
|
+
const base = path.posix.basename(p);
|
|
1341
|
+
if (base && /\.\w{1,10}$/.test(base)) return decodeURIComponent(base);
|
|
1342
|
+
} catch { /* ignore */ }
|
|
1343
|
+
return null;
|
|
1344
|
+
}
|
|
1345
|
+
|
|
1340
1346
|
api.registerTool(
|
|
1341
1347
|
(ctx) => ({
|
|
1342
1348
|
name: "document_download",
|
|
1343
1349
|
label: "Document Download",
|
|
1344
1350
|
description:
|
|
1345
|
-
"Download a document (image, PDF, etc.) from MemoryLake
|
|
1351
|
+
"Download a document (image, PDF, etc.) from MemoryLake to local disk. After calling this tool, you MUST call the `message` tool with action='send' and media=<the returned local file path> to deliver the file to the user.",
|
|
1346
1352
|
parameters: Type.Object({
|
|
1347
1353
|
documentId: Type.String({
|
|
1348
1354
|
description:
|
|
1349
1355
|
"The document ID to download (from document_search results or document listing)",
|
|
1350
1356
|
}),
|
|
1357
|
+
fileName: Type.Optional(
|
|
1358
|
+
Type.String({
|
|
1359
|
+
description:
|
|
1360
|
+
"Original file name for saving locally (e.g. 'report.pdf'). Falls back to the name in the download URL or the document ID.",
|
|
1361
|
+
}),
|
|
1362
|
+
),
|
|
1351
1363
|
}),
|
|
1352
1364
|
async execute(_toolCallId, params) {
|
|
1353
1365
|
const effectiveCfg = resolveConfig(ctx);
|
|
1354
1366
|
const effectiveProvider = getProvider(effectiveCfg);
|
|
1355
|
-
const { documentId } = params as {
|
|
1367
|
+
const { documentId, fileName } = params as {
|
|
1368
|
+
documentId: string;
|
|
1369
|
+
fileName?: string;
|
|
1370
|
+
};
|
|
1356
1371
|
|
|
1357
1372
|
try {
|
|
1373
|
+
// 1. Get pre-signed download URL
|
|
1358
1374
|
const downloadUrl =
|
|
1359
1375
|
await effectiveProvider.getDocumentDownloadUrl(documentId);
|
|
1360
1376
|
|
|
1377
|
+
// 2. Determine local save directory (cross-platform)
|
|
1378
|
+
const workspaceDir = (ctx as any)?.workspaceDir;
|
|
1379
|
+
const downloadDir = workspaceDir
|
|
1380
|
+
? path.join(workspaceDir, ".memorylake", "downloads")
|
|
1381
|
+
: path.join(os.tmpdir(), "memorylake-downloads");
|
|
1382
|
+
fs.mkdirSync(downloadDir, { recursive: true });
|
|
1383
|
+
|
|
1384
|
+
// 3. Determine filename: explicit param > URL-derived > documentId
|
|
1385
|
+
const finalName =
|
|
1386
|
+
fileName || fileNameFromUrl(downloadUrl) || documentId;
|
|
1387
|
+
const localPath = path.join(downloadDir, finalName);
|
|
1388
|
+
|
|
1389
|
+
// 4. Stream download to local file
|
|
1390
|
+
await pipeline(
|
|
1391
|
+
got.stream(downloadUrl),
|
|
1392
|
+
fs.createWriteStream(localPath),
|
|
1393
|
+
);
|
|
1394
|
+
|
|
1361
1395
|
return {
|
|
1362
1396
|
content: [
|
|
1363
1397
|
{
|
|
1364
1398
|
type: "text",
|
|
1365
|
-
text: `Document ${documentId}
|
|
1399
|
+
text: `Document ${documentId} downloaded to:\n${localPath}\n\nYou MUST now call the message tool with action="send" and media set to this local path to deliver the file to the user.`,
|
|
1366
1400
|
},
|
|
1367
1401
|
],
|
|
1368
|
-
details: { documentId },
|
|
1402
|
+
details: { documentId, localPath },
|
|
1369
1403
|
};
|
|
1370
1404
|
} catch (err) {
|
|
1371
1405
|
return {
|
|
@@ -1698,6 +1732,69 @@ const memoryPlugin = {
|
|
|
1698
1732
|
}
|
|
1699
1733
|
});
|
|
1700
1734
|
|
|
1735
|
+
memorylake
|
|
1736
|
+
.command("upload")
|
|
1737
|
+
.description("Upload files or directories to MemoryLake")
|
|
1738
|
+
.argument("<path>", "File or directory path to upload")
|
|
1739
|
+
.option("--agent <id>", "Agent ID (resolves workspace and per-agent projectId)")
|
|
1740
|
+
.option("--project-id <id>", "Override project ID (takes precedence over --agent)")
|
|
1741
|
+
.action(async (targetPath: string, opts: { agent?: string; projectId?: string }) => {
|
|
1742
|
+
// Resolve effective config: --project-id > agent workspace config > global config
|
|
1743
|
+
let effectiveCfg = cfg;
|
|
1744
|
+
if (opts.agent) {
|
|
1745
|
+
try {
|
|
1746
|
+
const openclawPath = path.join(os.homedir(), ".openclaw", "openclaw.json");
|
|
1747
|
+
const openclaw = JSON.parse(fs.readFileSync(openclawPath, "utf-8"));
|
|
1748
|
+
const agents = openclaw?.agents;
|
|
1749
|
+
const agentEntry = agents?.list?.find((a: any) => a.id === opts.agent);
|
|
1750
|
+
const workspace = agentEntry?.workspace || agents?.defaults?.workspace;
|
|
1751
|
+
if (workspace) {
|
|
1752
|
+
effectiveCfg = resolveConfig({ workspaceDir: workspace });
|
|
1753
|
+
} else {
|
|
1754
|
+
console.warn(`Warning: no workspace found for agent "${opts.agent}", using global config.`);
|
|
1755
|
+
}
|
|
1756
|
+
} catch (err) {
|
|
1757
|
+
console.warn(`Warning: failed to resolve agent config: ${String(err)}, using global config.`);
|
|
1758
|
+
}
|
|
1759
|
+
}
|
|
1760
|
+
const effectiveProjectId = opts.projectId || effectiveCfg.projectId;
|
|
1761
|
+
if (!effectiveProjectId) {
|
|
1762
|
+
console.error("No project ID configured. Use --project-id or set up agent/workspace config.");
|
|
1763
|
+
return;
|
|
1764
|
+
}
|
|
1765
|
+
if (!effectiveCfg.host || !effectiveCfg.apiKey) {
|
|
1766
|
+
console.error("Missing host or apiKey in config. Check your MemoryLake configuration.");
|
|
1767
|
+
return;
|
|
1768
|
+
}
|
|
1769
|
+
|
|
1770
|
+
// Lazy import upload.mjs (use uploadAuto to support archives)
|
|
1771
|
+
let uploadFn: UploadFn;
|
|
1772
|
+
try {
|
|
1773
|
+
const uploadModule = await import(
|
|
1774
|
+
/* webpackIgnore: true */
|
|
1775
|
+
new URL("./skills/memorylake-upload/scripts/upload.mjs", import.meta.url).href
|
|
1776
|
+
);
|
|
1777
|
+
uploadFn = uploadModule.uploadAuto;
|
|
1778
|
+
} catch (err) {
|
|
1779
|
+
console.error(`Failed to load upload module: ${String(err)}`);
|
|
1780
|
+
return;
|
|
1781
|
+
}
|
|
1782
|
+
|
|
1783
|
+
const absPath = path.resolve(targetPath);
|
|
1784
|
+
|
|
1785
|
+
try {
|
|
1786
|
+
await uploadFn({
|
|
1787
|
+
host: effectiveCfg.host,
|
|
1788
|
+
apiKey: effectiveCfg.apiKey,
|
|
1789
|
+
projectId: effectiveProjectId,
|
|
1790
|
+
filePath: absPath,
|
|
1791
|
+
fileName: path.basename(absPath),
|
|
1792
|
+
});
|
|
1793
|
+
} catch (err) {
|
|
1794
|
+
console.error(`Upload failed: ${String(err)}`);
|
|
1795
|
+
}
|
|
1796
|
+
});
|
|
1797
|
+
|
|
1701
1798
|
memorylake
|
|
1702
1799
|
.command("stats")
|
|
1703
1800
|
.description("Show memory statistics from MemoryLake")
|
|
@@ -1774,7 +1871,7 @@ const memoryPlugin = {
|
|
|
1774
1871
|
// Auto-upload: upload inbound files to MemoryLake before prompt build
|
|
1775
1872
|
if (cfg.autoUpload) {
|
|
1776
1873
|
// Lazy-load upload function from upload.mjs
|
|
1777
|
-
let
|
|
1874
|
+
let uploadAutoFn: UploadFn | undefined;
|
|
1778
1875
|
|
|
1779
1876
|
api.on("before_prompt_build", (event, ctx) => {
|
|
1780
1877
|
if ((ctx as any)?.trigger !== "user") {
|
|
@@ -1799,17 +1896,17 @@ const memoryPlugin = {
|
|
|
1799
1896
|
// Fire-and-forget: upload asynchronously without blocking
|
|
1800
1897
|
(async () => {
|
|
1801
1898
|
// Lazy import upload.mjs
|
|
1802
|
-
if (!
|
|
1899
|
+
if (!uploadAutoFn) {
|
|
1803
1900
|
const uploadModule = await import(
|
|
1804
1901
|
/* webpackIgnore: true */
|
|
1805
1902
|
new URL("./skills/memorylake-upload/scripts/upload.mjs", import.meta.url).href
|
|
1806
1903
|
);
|
|
1807
|
-
|
|
1904
|
+
uploadAutoFn = uploadModule.uploadAuto;
|
|
1808
1905
|
}
|
|
1809
1906
|
|
|
1810
1907
|
for (const { filePath, stat } of filesToUpload) {
|
|
1811
1908
|
try {
|
|
1812
|
-
await
|
|
1909
|
+
await uploadAutoFn!({
|
|
1813
1910
|
host: effectiveCfg.host,
|
|
1814
1911
|
apiKey: effectiveCfg.apiKey,
|
|
1815
1912
|
projectId: effectiveCfg.projectId,
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "memorylake-openclaw",
|
|
3
|
-
"version": "1.0.2-beta.
|
|
3
|
+
"version": "1.0.2-beta.6",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "MemoryLake memory backend for OpenClaw",
|
|
6
6
|
"license": "MIT",
|
|
@@ -17,7 +17,10 @@
|
|
|
17
17
|
],
|
|
18
18
|
"dependencies": {
|
|
19
19
|
"@sinclair/typebox": "0.34.47",
|
|
20
|
-
"
|
|
20
|
+
"7zip-min": "^3.0.1",
|
|
21
|
+
"adm-zip": "^0.5.17",
|
|
22
|
+
"got": "^14.0.0",
|
|
23
|
+
"tar": "^7.5.13"
|
|
21
24
|
},
|
|
22
25
|
"openclaw": {
|
|
23
26
|
"extensions": [
|
|
@@ -1,18 +1,20 @@
|
|
|
1
1
|
---
|
|
2
2
|
name: memorylake-upload
|
|
3
|
-
description: Use when the user wants to upload files, documents, PDFs, or other data
|
|
3
|
+
description: Use when the user wants to upload files, documents, PDFs, archives, directories, or other data to MemoryLake. Supports single files, archives (zip, tar.gz, tgz, tar, gz, 7z, tar.bz2, bz2) which are extracted then uploaded, and directories which are recursively uploaded.
|
|
4
4
|
---
|
|
5
5
|
|
|
6
6
|
# MemoryLake File Upload
|
|
7
7
|
|
|
8
8
|
## Overview
|
|
9
9
|
|
|
10
|
-
Upload local files to MemoryLake using the multipart upload API, then associate them with a project.
|
|
10
|
+
Upload local files, archives, or directories to MemoryLake using the multipart upload API, then associate them with a project. Archives are automatically detected, extracted, and each file inside is uploaded individually. Directories are recursively traversed and all files inside are uploaded.
|
|
11
11
|
|
|
12
12
|
## When to Use
|
|
13
13
|
|
|
14
14
|
- User wants to upload a file (PDF, DOCX, image, etc.) to MemoryLake
|
|
15
15
|
- User wants to add a local document to a MemoryLake project
|
|
16
|
+
- User wants to upload an archive (zip, tar.gz, tgz, tar, gz, 7z, tar.bz2, bz2) — files inside will be extracted and uploaded one by one
|
|
17
|
+
- User wants to upload an entire directory/folder — all files will be recursively collected and uploaded
|
|
16
18
|
|
|
17
19
|
## Step 1 -- Read MemoryLake Config
|
|
18
20
|
|
|
@@ -30,22 +32,39 @@ If the script exits with an error, stop and inform the user.
|
|
|
30
32
|
|
|
31
33
|
The upload script is at `scripts/upload.mjs` relative to **this skill's SKILL.md**.
|
|
32
34
|
|
|
35
|
+
The script automatically detects the input type and handles it accordingly — plain files are uploaded directly, archives are extracted first, and directories are recursively traversed.
|
|
36
|
+
|
|
33
37
|
```bash
|
|
38
|
+
# Single file
|
|
34
39
|
node {path-to-this-skill}/scripts/upload.mjs \
|
|
35
40
|
--host {host} \
|
|
36
41
|
--api-key {apiKey} \
|
|
37
42
|
--project-id {projectId} \
|
|
38
43
|
--file-name {fileName} \
|
|
39
44
|
/path/to/file
|
|
45
|
+
|
|
46
|
+
# Archive (auto-detected, extracted, each file uploaded)
|
|
47
|
+
node {path-to-this-skill}/scripts/upload.mjs \
|
|
48
|
+
--host {host} \
|
|
49
|
+
--api-key {apiKey} \
|
|
50
|
+
--project-id {projectId} \
|
|
51
|
+
/path/to/archive.zip
|
|
52
|
+
|
|
53
|
+
# Directory (recursively uploads all files inside)
|
|
54
|
+
node {path-to-this-skill}/scripts/upload.mjs \
|
|
55
|
+
--host {host} \
|
|
56
|
+
--api-key {apiKey} \
|
|
57
|
+
--project-id {projectId} \
|
|
58
|
+
/path/to/my-folder/
|
|
40
59
|
```
|
|
41
60
|
|
|
42
|
-
`--file-name` is the original file name as provided by the user (e.g., `report-Q1.pdf`). This is required because the local file path may be a temp path or renamed file that doesn't reflect the real name.
|
|
61
|
+
`--file-name` is the original file name as provided by the user (e.g., `report-Q1.pdf`). This is required because the local file path may be a temp path or renamed file that doesn't reflect the real name. For archives and directories, `--file-name` is not needed since individual file names are taken from the contents.
|
|
43
62
|
|
|
44
63
|
## Step 3 -- Handle Output
|
|
45
64
|
|
|
46
|
-
The script prints progress for each step (create upload, upload parts, complete, add to project).
|
|
65
|
+
The script prints progress for each step (create upload, upload parts, complete, add to project). For archives and directories, it additionally prints a summary of succeeded/failed uploads.
|
|
47
66
|
|
|
48
|
-
- **Success**: Report the document ID and file name to the user
|
|
67
|
+
- **Success**: Report the document ID and file name to the user. For archives/directories, report the total count and list each uploaded file
|
|
49
68
|
- **Failure**: The script prints the specific error (file not found, auth failed, API error). Read the error message and relay it to the user — don't guess the cause
|
|
50
69
|
|
|
51
70
|
## Common Mistakes
|
|
@@ -20,6 +20,12 @@ import fs from 'fs';
|
|
|
20
20
|
import path from 'path';
|
|
21
21
|
import https from 'https';
|
|
22
22
|
import http from 'http';
|
|
23
|
+
import os from 'os';
|
|
24
|
+
import { createGunzip } from 'zlib';
|
|
25
|
+
import { pipeline } from 'stream/promises';
|
|
26
|
+
import AdmZip from 'adm-zip';
|
|
27
|
+
import { extract as tarExtract } from 'tar';
|
|
28
|
+
import sevenZip from '7zip-min';
|
|
23
29
|
|
|
24
30
|
// API base path
|
|
25
31
|
const API_BASE = '/openapi/memorylake';
|
|
@@ -228,6 +234,243 @@ export async function upload({ host, apiKey, projectId, filePath, fileName }) {
|
|
|
228
234
|
return doc;
|
|
229
235
|
}
|
|
230
236
|
|
|
237
|
+
// ============================================================================
|
|
238
|
+
// Archive support
|
|
239
|
+
// ============================================================================
|
|
240
|
+
|
|
241
|
+
/** Formats we can extract with npm packages (adm-zip, tar, zlib, 7zip-min) */
|
|
242
|
+
const SUPPORTED_ARCHIVE_EXTENSIONS = [
|
|
243
|
+
'.tar.gz', '.tgz', '.tar.bz2', '.tbz2',
|
|
244
|
+
'.tar', '.zip', '.gz', '.bz2', '.7z',
|
|
245
|
+
];
|
|
246
|
+
|
|
247
|
+
/** Recognized but unsupported — detected so we give a clear error instead of uploading raw */
|
|
248
|
+
const UNSUPPORTED_ARCHIVE_EXTENSIONS = [
|
|
249
|
+
'.tar.xz', '.txz', '.xz', '.rar',
|
|
250
|
+
];
|
|
251
|
+
|
|
252
|
+
/** All recognized archive extensions (compound first for matching priority) */
|
|
253
|
+
const ALL_ARCHIVE_EXTENSIONS = [...SUPPORTED_ARCHIVE_EXTENSIONS, ...UNSUPPORTED_ARCHIVE_EXTENSIONS];
|
|
254
|
+
|
|
255
|
+
/**
|
|
256
|
+
* Check if a file is an archive based on its extension.
|
|
257
|
+
* Returns the matched extension or null.
|
|
258
|
+
*/
|
|
259
|
+
function getArchiveExtension(filePath) {
|
|
260
|
+
const lower = filePath.toLowerCase();
|
|
261
|
+
// Check compound extensions first (e.g. .tar.gz before .gz)
|
|
262
|
+
for (const ext of ALL_ARCHIVE_EXTENSIONS) {
|
|
263
|
+
if (lower.endsWith(ext)) return ext;
|
|
264
|
+
}
|
|
265
|
+
return null;
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
/**
|
|
269
|
+
* Promisified wrapper around 7zip-min's callback-based unpack API.
|
|
270
|
+
*/
|
|
271
|
+
function sevenZipExtract(archivePath, destDir) {
|
|
272
|
+
return new Promise((resolve, reject) => {
|
|
273
|
+
sevenZip.unpack(archivePath, destDir, (err) => {
|
|
274
|
+
if (err) reject(err);
|
|
275
|
+
else resolve();
|
|
276
|
+
});
|
|
277
|
+
});
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
/**
|
|
281
|
+
* Extract an archive to the given destination directory.
|
|
282
|
+
* Uses npm packages (adm-zip, tar, zlib, 7zip-min) for cross-platform compatibility.
|
|
283
|
+
*/
|
|
284
|
+
async function extractArchive(archivePath, destDir) {
|
|
285
|
+
const ext = getArchiveExtension(archivePath);
|
|
286
|
+
|
|
287
|
+
// Reject recognized-but-unsupported formats early with a clear message
|
|
288
|
+
if (UNSUPPORTED_ARCHIVE_EXTENSIONS.includes(ext)) {
|
|
289
|
+
throw new Error(`Unsupported archive format: ${ext}. Please convert to .zip, .tar.gz or .7z`);
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
console.log(` Format: ${ext}`);
|
|
293
|
+
|
|
294
|
+
switch (ext) {
|
|
295
|
+
case '.zip': {
|
|
296
|
+
const zip = new AdmZip(archivePath);
|
|
297
|
+
zip.extractAllTo(destDir, true);
|
|
298
|
+
break;
|
|
299
|
+
}
|
|
300
|
+
case '.tar.gz':
|
|
301
|
+
case '.tgz':
|
|
302
|
+
case '.tar':
|
|
303
|
+
await tarExtract({ file: archivePath, cwd: destDir });
|
|
304
|
+
break;
|
|
305
|
+
case '.gz': {
|
|
306
|
+
const baseName = path.basename(archivePath).slice(0, -3);
|
|
307
|
+
const destPath = path.join(destDir, baseName);
|
|
308
|
+
await pipeline(
|
|
309
|
+
fs.createReadStream(archivePath),
|
|
310
|
+
createGunzip(),
|
|
311
|
+
fs.createWriteStream(destPath),
|
|
312
|
+
);
|
|
313
|
+
break;
|
|
314
|
+
}
|
|
315
|
+
case '.tar.bz2':
|
|
316
|
+
case '.tbz2': {
|
|
317
|
+
// 7za only strips the bz2 layer, producing a .tar — need a second pass
|
|
318
|
+
const tmpBz2 = fs.mkdtempSync(path.join(os.tmpdir(), 'memorylake-bz2-'));
|
|
319
|
+
try {
|
|
320
|
+
await sevenZipExtract(archivePath, tmpBz2);
|
|
321
|
+
// Find the resulting .tar inside tmpBz2
|
|
322
|
+
const inner = fs.readdirSync(tmpBz2).find(f => f.endsWith('.tar'));
|
|
323
|
+
if (inner) {
|
|
324
|
+
await tarExtract({ file: path.join(tmpBz2, inner), cwd: destDir });
|
|
325
|
+
}
|
|
326
|
+
} finally {
|
|
327
|
+
fs.rmSync(tmpBz2, { recursive: true, force: true });
|
|
328
|
+
}
|
|
329
|
+
break;
|
|
330
|
+
}
|
|
331
|
+
case '.bz2':
|
|
332
|
+
case '.7z':
|
|
333
|
+
await sevenZipExtract(archivePath, destDir);
|
|
334
|
+
break;
|
|
335
|
+
default:
|
|
336
|
+
throw new Error(`Unsupported archive format: ${path.basename(archivePath)}`);
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
/**
|
|
341
|
+
* Names and directories to skip when collecting files.
|
|
342
|
+
*/
|
|
343
|
+
const IGNORED_NAMES = new Set(['.DS_Store', 'Thumbs.db', 'desktop.ini']);
|
|
344
|
+
const IGNORED_DIRS = new Set(['__MACOSX', '.Spotlight-V100', '.Trashes']);
|
|
345
|
+
|
|
346
|
+
/** Sanity limit to prevent accidentally uploading huge trees (e.g. node_modules) */
|
|
347
|
+
const MAX_COLLECT_FILES = 500;
|
|
348
|
+
|
|
349
|
+
/**
|
|
350
|
+
* Recursively collect all files in a directory, skipping hidden/system junk.
|
|
351
|
+
* Throws if the file count exceeds MAX_COLLECT_FILES.
|
|
352
|
+
*/
|
|
353
|
+
function collectFiles(dir, _counter = { value: 0 }) {
|
|
354
|
+
const files = [];
|
|
355
|
+
for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
|
|
356
|
+
if (IGNORED_NAMES.has(entry.name)) continue;
|
|
357
|
+
const full = path.join(dir, entry.name);
|
|
358
|
+
if (entry.isDirectory()) {
|
|
359
|
+
if (IGNORED_DIRS.has(entry.name) || entry.name.startsWith('.')) continue;
|
|
360
|
+
files.push(...collectFiles(full, _counter));
|
|
361
|
+
} else if (entry.isFile()) {
|
|
362
|
+
_counter.value++;
|
|
363
|
+
if (_counter.value > MAX_COLLECT_FILES) {
|
|
364
|
+
throw new Error(`Too many files (>${MAX_COLLECT_FILES}). Please upload a smaller directory or archive.`);
|
|
365
|
+
}
|
|
366
|
+
files.push(full);
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
return files;
|
|
370
|
+
}
|
|
371
|
+
|
|
372
|
+
/** Max number of concurrent uploads */
|
|
373
|
+
const UPLOAD_CONCURRENCY = 10;
|
|
374
|
+
|
|
375
|
+
/**
|
|
376
|
+
* Upload multiple files with a sliding-window concurrency pool.
|
|
377
|
+
* Up to UPLOAD_CONCURRENCY uploads run simultaneously; as soon as one
|
|
378
|
+
* finishes the next file starts immediately — no waiting for a full batch.
|
|
379
|
+
* @param {Array<string>} files - absolute paths of files to upload
|
|
380
|
+
* @param {string} label - label for the summary line (e.g. "Archive" / "Directory")
|
|
381
|
+
*/
|
|
382
|
+
async function uploadMany({ host, apiKey, projectId, files, label }) {
|
|
383
|
+
const results = [];
|
|
384
|
+
let success = 0;
|
|
385
|
+
let failed = 0;
|
|
386
|
+
let nextIndex = 0;
|
|
387
|
+
|
|
388
|
+
function runNext() {
|
|
389
|
+
if (nextIndex >= files.length) return null;
|
|
390
|
+
const i = nextIndex++;
|
|
391
|
+
const f = files[i];
|
|
392
|
+
return upload({
|
|
393
|
+
host, apiKey, projectId,
|
|
394
|
+
filePath: f,
|
|
395
|
+
fileName: path.basename(f),
|
|
396
|
+
}).then(doc => {
|
|
397
|
+
results.push(doc);
|
|
398
|
+
success++;
|
|
399
|
+
}).catch(err => {
|
|
400
|
+
console.error(`Failed to upload ${path.basename(f)}: ${err.message}`);
|
|
401
|
+
failed++;
|
|
402
|
+
}).then(() => runNext()); // slot freed → pick up next file immediately
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
// Kick off the initial pool of workers
|
|
406
|
+
const workers = [];
|
|
407
|
+
for (let w = 0; w < Math.min(UPLOAD_CONCURRENCY, files.length); w++) {
|
|
408
|
+
workers.push(runNext());
|
|
409
|
+
}
|
|
410
|
+
await Promise.all(workers);
|
|
411
|
+
|
|
412
|
+
console.log(`\n${label} upload complete: ${success} succeeded, ${failed} failed.`);
|
|
413
|
+
return results;
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
/**
|
|
417
|
+
* Smart upload: automatically handles plain files, archives, and directories.
|
|
418
|
+
*
|
|
419
|
+
* - Plain file → upload directly
|
|
420
|
+
* - Archive → extract to temp dir, upload each extracted file
|
|
421
|
+
* - Directory → recursively collect files, upload each
|
|
422
|
+
*
|
|
423
|
+
* Returns a single doc object for plain files, or an array for multi-file cases.
|
|
424
|
+
*/
|
|
425
|
+
export async function uploadAuto({ host, apiKey, projectId, filePath, fileName }) {
|
|
426
|
+
if (!fs.existsSync(filePath)) {
|
|
427
|
+
throw new Error(`Path not found: ${filePath}`);
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
const stat = fs.statSync(filePath);
|
|
431
|
+
|
|
432
|
+
// --- Directory ---
|
|
433
|
+
if (stat.isDirectory()) {
|
|
434
|
+
const displayName = fileName || path.basename(filePath);
|
|
435
|
+
console.log(`\nUploading directory: ${displayName}`);
|
|
436
|
+
|
|
437
|
+
const files = collectFiles(filePath);
|
|
438
|
+
if (files.length === 0) {
|
|
439
|
+
console.log('Directory is empty, nothing to upload.');
|
|
440
|
+
return [];
|
|
441
|
+
}
|
|
442
|
+
console.log(`Found ${files.length} file(s)\n`);
|
|
443
|
+
|
|
444
|
+
return uploadMany({ host, apiKey, projectId, files, label: 'Directory' });
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
// --- Archive ---
|
|
448
|
+
if (getArchiveExtension(filePath)) {
|
|
449
|
+
const displayName = fileName || path.basename(filePath);
|
|
450
|
+
console.log(`\nDetected archive: ${displayName}`);
|
|
451
|
+
console.log('Extracting...');
|
|
452
|
+
|
|
453
|
+
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'memorylake-upload-'));
|
|
454
|
+
try {
|
|
455
|
+
await extractArchive(filePath, tmpDir);
|
|
456
|
+
const files = collectFiles(tmpDir);
|
|
457
|
+
|
|
458
|
+
if (files.length === 0) {
|
|
459
|
+
console.log('Archive is empty, nothing to upload.');
|
|
460
|
+
return [];
|
|
461
|
+
}
|
|
462
|
+
console.log(`Extracted ${files.length} file(s)\n`);
|
|
463
|
+
|
|
464
|
+
return await uploadMany({ host, apiKey, projectId, files, label: 'Archive' });
|
|
465
|
+
} finally {
|
|
466
|
+
fs.rmSync(tmpDir, { recursive: true, force: true });
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
|
|
470
|
+
// --- Plain file ---
|
|
471
|
+
return upload({ host, apiKey, projectId, filePath, fileName });
|
|
472
|
+
}
|
|
473
|
+
|
|
231
474
|
// CLI entry point
|
|
232
475
|
async function main() {
|
|
233
476
|
const args = process.argv.slice(2);
|
|
@@ -237,17 +480,21 @@ async function main() {
|
|
|
237
480
|
MemoryLake File Upload
|
|
238
481
|
|
|
239
482
|
Usage:
|
|
240
|
-
node upload.mjs --host <url> --api-key <key> --project-id <id> <
|
|
483
|
+
node upload.mjs --host <url> --api-key <key> --project-id <id> <path>
|
|
241
484
|
|
|
242
485
|
Arguments:
|
|
243
486
|
--host Base URL (e.g., http://10.71.10.71:3002)
|
|
244
487
|
--api-key API key for authentication
|
|
245
488
|
--project-id Project ID to associate the document with (required)
|
|
246
|
-
--file-name Custom file name (default: basename of
|
|
247
|
-
|
|
489
|
+
--file-name Custom file name (default: basename of path)
|
|
490
|
+
path Path to a file, archive, or directory to upload
|
|
491
|
+
|
|
492
|
+
Archives are auto-detected and extracted; directories are recursively uploaded.
|
|
248
493
|
|
|
249
494
|
Examples:
|
|
250
495
|
node upload.mjs --host http://10.71.10.71:3002 --api-key sk-xxx --project-id proj-abc123 document.pdf
|
|
496
|
+
node upload.mjs --host http://10.71.10.71:3002 --api-key sk-xxx --project-id proj-abc123 docs.zip
|
|
497
|
+
node upload.mjs --host http://10.71.10.71:3002 --api-key sk-xxx --project-id proj-abc123 ./my-folder/
|
|
251
498
|
`);
|
|
252
499
|
process.exit(0);
|
|
253
500
|
}
|
|
@@ -262,7 +509,7 @@ Examples:
|
|
|
262
509
|
}
|
|
263
510
|
|
|
264
511
|
try {
|
|
265
|
-
await
|
|
512
|
+
await uploadAuto({ host, apiKey, projectId, filePath, fileName });
|
|
266
513
|
console.log('\nDone!\n');
|
|
267
514
|
} catch (err) {
|
|
268
515
|
console.error(`\nError: ${err.message}\n`);
|