@memvid/sdk 2.0.146 → 2.0.148
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/documents/docx.d.ts +6 -0
- package/dist/documents/docx.js +71 -0
- package/dist/documents/index.d.ts +44 -0
- package/dist/documents/index.js +114 -0
- package/dist/documents/pdf.d.ts +7 -0
- package/dist/documents/pdf.js +75 -0
- package/dist/documents/pptx.d.ts +6 -0
- package/dist/documents/pptx.js +89 -0
- package/dist/documents/types.d.ts +61 -0
- package/dist/documents/types.js +5 -0
- package/dist/documents/xlsx.d.ts +6 -0
- package/dist/documents/xlsx.js +117 -0
- package/dist/index.d.ts +107 -0
- package/dist/index.js +336 -0
- package/dist/types.d.ts +35 -0
- package/package.json +8 -5
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Word Document Parser with error handling
|
|
3
|
+
* Uses officeparser v6 - no Rust fallback available for DOCX
|
|
4
|
+
*/
|
|
5
|
+
import type { ParseOptions, ParseResult } from "./types";
|
|
6
|
+
export declare function parseDocx(filePath: string, _options?: ParseOptions): Promise<ParseResult>;
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Word Document Parser with error handling
|
|
4
|
+
* Uses officeparser v6 - no Rust fallback available for DOCX
|
|
5
|
+
*/
|
|
6
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
7
|
+
if (k2 === undefined) k2 = k;
|
|
8
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
9
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
10
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
11
|
+
}
|
|
12
|
+
Object.defineProperty(o, k2, desc);
|
|
13
|
+
}) : (function(o, m, k, k2) {
|
|
14
|
+
if (k2 === undefined) k2 = k;
|
|
15
|
+
o[k2] = m[k];
|
|
16
|
+
}));
|
|
17
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
18
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
19
|
+
}) : function(o, v) {
|
|
20
|
+
o["default"] = v;
|
|
21
|
+
});
|
|
22
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
23
|
+
var ownKeys = function(o) {
|
|
24
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
25
|
+
var ar = [];
|
|
26
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
27
|
+
return ar;
|
|
28
|
+
};
|
|
29
|
+
return ownKeys(o);
|
|
30
|
+
};
|
|
31
|
+
return function (mod) {
|
|
32
|
+
if (mod && mod.__esModule) return mod;
|
|
33
|
+
var result = {};
|
|
34
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
35
|
+
__setModuleDefault(result, mod);
|
|
36
|
+
return result;
|
|
37
|
+
};
|
|
38
|
+
})();
|
|
39
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
40
|
+
exports.parseDocx = parseDocx;
|
|
41
|
+
async function parseDocx(filePath, _options) {
|
|
42
|
+
const filename = filePath.split("/").pop() || filePath;
|
|
43
|
+
try {
|
|
44
|
+
const officeParser = await Promise.resolve().then(() => __importStar(require("officeparser")));
|
|
45
|
+
const ast = await officeParser.parseOffice(filePath);
|
|
46
|
+
// Get full text content
|
|
47
|
+
const content = ast.toText();
|
|
48
|
+
const items = [];
|
|
49
|
+
if (content && content.trim().length > 0) {
|
|
50
|
+
items.push({
|
|
51
|
+
number: 1,
|
|
52
|
+
text: content,
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
else {
|
|
56
|
+
console.warn(`[memvid] No text content found in ${filename}`);
|
|
57
|
+
}
|
|
58
|
+
return {
|
|
59
|
+
type: "docx",
|
|
60
|
+
filename,
|
|
61
|
+
totalItems: 1,
|
|
62
|
+
items,
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
catch (err) {
|
|
66
|
+
// For DOCX, we don't have a Rust fallback, so throw a descriptive error
|
|
67
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
68
|
+
throw new Error(`Failed to parse Word file "${filename}": ${message}. ` +
|
|
69
|
+
`Ensure the file is a valid .docx/.doc file.`);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Document Parsing Module
|
|
3
|
+
*
|
|
4
|
+
* Auto-detects file type and parses PDF, XLSX, PPTX, DOCX documents.
|
|
5
|
+
* Includes fallback support for PDF using Rust core.
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* ```typescript
|
|
9
|
+
* import { parse } from "@memvid/sdk/documents";
|
|
10
|
+
*
|
|
11
|
+
* const result = await parse("./report.pdf");
|
|
12
|
+
* if (result) {
|
|
13
|
+
* console.log(result.items.length); // number of pages
|
|
14
|
+
* }
|
|
15
|
+
* ```
|
|
16
|
+
*/
|
|
17
|
+
import type { ParseOptions, ParseResult } from "./types";
|
|
18
|
+
export * from "./types";
|
|
19
|
+
export { parsePdf } from "./pdf";
|
|
20
|
+
export { parseXlsx } from "./xlsx";
|
|
21
|
+
export { parsePptx } from "./pptx";
|
|
22
|
+
export { parseDocx } from "./docx";
|
|
23
|
+
/**
|
|
24
|
+
* Parse a document file with automatic format detection.
|
|
25
|
+
*
|
|
26
|
+
* Supported formats:
|
|
27
|
+
* - PDF (.pdf) - per-page extraction (with Rust fallback)
|
|
28
|
+
* - Excel (.xlsx, .xls) - per-sheet extraction
|
|
29
|
+
* - PowerPoint (.pptx, .ppt) - per-slide extraction
|
|
30
|
+
* - Word (.docx, .doc) - full document extraction
|
|
31
|
+
*
|
|
32
|
+
* @param filePath - Path to the document file
|
|
33
|
+
* @param options - Parsing options
|
|
34
|
+
* @returns ParseResult with items, or null for PDF if JS parser failed (signals Rust fallback)
|
|
35
|
+
*/
|
|
36
|
+
export declare function parse(filePath: string, options?: ParseOptions): Promise<ParseResult | null>;
|
|
37
|
+
/**
|
|
38
|
+
* Check if a file extension is supported for document parsing.
|
|
39
|
+
*/
|
|
40
|
+
export declare function isSupportedFormat(filePath: string): boolean;
|
|
41
|
+
/**
|
|
42
|
+
* Get the document type from a file path.
|
|
43
|
+
*/
|
|
44
|
+
export declare function getDocumentType(filePath: string): string | null;
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Document Parsing Module
|
|
4
|
+
*
|
|
5
|
+
* Auto-detects file type and parses PDF, XLSX, PPTX, DOCX documents.
|
|
6
|
+
* Includes fallback support for PDF using Rust core.
|
|
7
|
+
*
|
|
8
|
+
* @example
|
|
9
|
+
* ```typescript
|
|
10
|
+
* import { parse } from "@memvid/sdk/documents";
|
|
11
|
+
*
|
|
12
|
+
* const result = await parse("./report.pdf");
|
|
13
|
+
* if (result) {
|
|
14
|
+
* console.log(result.items.length); // number of pages
|
|
15
|
+
* }
|
|
16
|
+
* ```
|
|
17
|
+
*/
|
|
18
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
19
|
+
if (k2 === undefined) k2 = k;
|
|
20
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
21
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
22
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
23
|
+
}
|
|
24
|
+
Object.defineProperty(o, k2, desc);
|
|
25
|
+
}) : (function(o, m, k, k2) {
|
|
26
|
+
if (k2 === undefined) k2 = k;
|
|
27
|
+
o[k2] = m[k];
|
|
28
|
+
}));
|
|
29
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
30
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
|
31
|
+
};
|
|
32
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
33
|
+
exports.parseDocx = exports.parsePptx = exports.parseXlsx = exports.parsePdf = void 0;
|
|
34
|
+
exports.parse = parse;
|
|
35
|
+
exports.isSupportedFormat = isSupportedFormat;
|
|
36
|
+
exports.getDocumentType = getDocumentType;
|
|
37
|
+
const fs_1 = require("fs");
|
|
38
|
+
const path_1 = require("path");
|
|
39
|
+
const pdf_1 = require("./pdf");
|
|
40
|
+
const xlsx_1 = require("./xlsx");
|
|
41
|
+
const pptx_1 = require("./pptx");
|
|
42
|
+
const docx_1 = require("./docx");
|
|
43
|
+
__exportStar(require("./types"), exports);
|
|
44
|
+
var pdf_2 = require("./pdf");
|
|
45
|
+
Object.defineProperty(exports, "parsePdf", { enumerable: true, get: function () { return pdf_2.parsePdf; } });
|
|
46
|
+
var xlsx_2 = require("./xlsx");
|
|
47
|
+
Object.defineProperty(exports, "parseXlsx", { enumerable: true, get: function () { return xlsx_2.parseXlsx; } });
|
|
48
|
+
var pptx_2 = require("./pptx");
|
|
49
|
+
Object.defineProperty(exports, "parsePptx", { enumerable: true, get: function () { return pptx_2.parsePptx; } });
|
|
50
|
+
var docx_2 = require("./docx");
|
|
51
|
+
Object.defineProperty(exports, "parseDocx", { enumerable: true, get: function () { return docx_2.parseDocx; } });
|
|
52
|
+
/**
|
|
53
|
+
* Parse a document file with automatic format detection.
|
|
54
|
+
*
|
|
55
|
+
* Supported formats:
|
|
56
|
+
* - PDF (.pdf) - per-page extraction (with Rust fallback)
|
|
57
|
+
* - Excel (.xlsx, .xls) - per-sheet extraction
|
|
58
|
+
* - PowerPoint (.pptx, .ppt) - per-slide extraction
|
|
59
|
+
* - Word (.docx, .doc) - full document extraction
|
|
60
|
+
*
|
|
61
|
+
* @param filePath - Path to the document file
|
|
62
|
+
* @param options - Parsing options
|
|
63
|
+
* @returns ParseResult with items, or null for PDF if JS parser failed (signals Rust fallback)
|
|
64
|
+
*/
|
|
65
|
+
async function parse(filePath, options) {
|
|
66
|
+
if (!(0, fs_1.existsSync)(filePath)) {
|
|
67
|
+
throw new Error(`File not found: ${filePath}`);
|
|
68
|
+
}
|
|
69
|
+
const ext = (0, path_1.extname)(filePath).toLowerCase();
|
|
70
|
+
switch (ext) {
|
|
71
|
+
case ".pdf":
|
|
72
|
+
// PDF parser returns null if unpdf fails, signaling to use Rust fallback
|
|
73
|
+
return (0, pdf_1.parsePdf)(filePath, options);
|
|
74
|
+
case ".xlsx":
|
|
75
|
+
case ".xls":
|
|
76
|
+
return (0, xlsx_1.parseXlsx)(filePath, options);
|
|
77
|
+
case ".pptx":
|
|
78
|
+
case ".ppt":
|
|
79
|
+
return (0, pptx_1.parsePptx)(filePath, options);
|
|
80
|
+
case ".docx":
|
|
81
|
+
case ".doc":
|
|
82
|
+
return (0, docx_1.parseDocx)(filePath, options);
|
|
83
|
+
default:
|
|
84
|
+
throw new Error(`Unsupported file format: ${ext}. Supported: .pdf, .xlsx, .xls, .pptx, .ppt, .docx, .doc`);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
/**
|
|
88
|
+
* Check if a file extension is supported for document parsing.
|
|
89
|
+
*/
|
|
90
|
+
function isSupportedFormat(filePath) {
|
|
91
|
+
const ext = (0, path_1.extname)(filePath).toLowerCase();
|
|
92
|
+
return [".pdf", ".xlsx", ".xls", ".pptx", ".ppt", ".docx", ".doc"].includes(ext);
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* Get the document type from a file path.
|
|
96
|
+
*/
|
|
97
|
+
function getDocumentType(filePath) {
|
|
98
|
+
const ext = (0, path_1.extname)(filePath).toLowerCase();
|
|
99
|
+
switch (ext) {
|
|
100
|
+
case ".pdf":
|
|
101
|
+
return "pdf";
|
|
102
|
+
case ".xlsx":
|
|
103
|
+
case ".xls":
|
|
104
|
+
return "xlsx";
|
|
105
|
+
case ".pptx":
|
|
106
|
+
case ".ppt":
|
|
107
|
+
return "pptx";
|
|
108
|
+
case ".docx":
|
|
109
|
+
case ".doc":
|
|
110
|
+
return "docx";
|
|
111
|
+
default:
|
|
112
|
+
return null;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PDF Parser with fallback support
|
|
3
|
+
* Primary: unpdf (Mozilla pdf.js)
|
|
4
|
+
* Fallback: Returns null to signal using Rust core's pdf_extract
|
|
5
|
+
*/
|
|
6
|
+
import type { ParseOptions, ParseResult } from "./types";
|
|
7
|
+
export declare function parsePdf(filePath: string, options?: ParseOptions): Promise<ParseResult | null>;
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* PDF Parser with fallback support
|
|
4
|
+
* Primary: unpdf (Mozilla pdf.js)
|
|
5
|
+
* Fallback: Returns null to signal using Rust core's pdf_extract
|
|
6
|
+
*/
|
|
7
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
8
|
+
if (k2 === undefined) k2 = k;
|
|
9
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
10
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
11
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
12
|
+
}
|
|
13
|
+
Object.defineProperty(o, k2, desc);
|
|
14
|
+
}) : (function(o, m, k, k2) {
|
|
15
|
+
if (k2 === undefined) k2 = k;
|
|
16
|
+
o[k2] = m[k];
|
|
17
|
+
}));
|
|
18
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
19
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
20
|
+
}) : function(o, v) {
|
|
21
|
+
o["default"] = v;
|
|
22
|
+
});
|
|
23
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
24
|
+
var ownKeys = function(o) {
|
|
25
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
26
|
+
var ar = [];
|
|
27
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
28
|
+
return ar;
|
|
29
|
+
};
|
|
30
|
+
return ownKeys(o);
|
|
31
|
+
};
|
|
32
|
+
return function (mod) {
|
|
33
|
+
if (mod && mod.__esModule) return mod;
|
|
34
|
+
var result = {};
|
|
35
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
36
|
+
__setModuleDefault(result, mod);
|
|
37
|
+
return result;
|
|
38
|
+
};
|
|
39
|
+
})();
|
|
40
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
41
|
+
exports.parsePdf = parsePdf;
|
|
42
|
+
const fs_1 = require("fs");
|
|
43
|
+
async function parsePdf(filePath, options) {
|
|
44
|
+
const filename = filePath.split("/").pop() || filePath;
|
|
45
|
+
try {
|
|
46
|
+
const { extractText } = await Promise.resolve().then(() => __importStar(require("unpdf")));
|
|
47
|
+
const buffer = (0, fs_1.readFileSync)(filePath);
|
|
48
|
+
const { text, totalPages } = await extractText(new Uint8Array(buffer), {
|
|
49
|
+
mergePages: false,
|
|
50
|
+
});
|
|
51
|
+
const textArray = text;
|
|
52
|
+
const maxItems = options?.maxItems || textArray.length;
|
|
53
|
+
const items = [];
|
|
54
|
+
for (let i = 0; i < Math.min(textArray.length, maxItems); i++) {
|
|
55
|
+
const pageText = textArray[i];
|
|
56
|
+
if (pageText && pageText.trim().length > 0) {
|
|
57
|
+
items.push({
|
|
58
|
+
number: i + 1,
|
|
59
|
+
text: pageText,
|
|
60
|
+
});
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
return {
|
|
64
|
+
type: "pdf",
|
|
65
|
+
filename,
|
|
66
|
+
totalItems: totalPages,
|
|
67
|
+
items,
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
catch (err) {
|
|
71
|
+
// Log warning and return null to signal fallback to Rust core
|
|
72
|
+
console.warn(`[memvid] unpdf failed for ${filename}, using Rust fallback: ${err instanceof Error ? err.message : String(err)}`);
|
|
73
|
+
return null;
|
|
74
|
+
}
|
|
75
|
+
}
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PowerPoint Parser with error handling
|
|
3
|
+
* Uses officeparser v6 - no Rust fallback available for PPTX
|
|
4
|
+
*/
|
|
5
|
+
import type { ParseOptions, ParseResult } from "./types";
|
|
6
|
+
export declare function parsePptx(filePath: string, options?: ParseOptions): Promise<ParseResult>;
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* PowerPoint Parser with error handling
|
|
4
|
+
* Uses officeparser v6 - no Rust fallback available for PPTX
|
|
5
|
+
*/
|
|
6
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
7
|
+
if (k2 === undefined) k2 = k;
|
|
8
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
9
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
10
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
11
|
+
}
|
|
12
|
+
Object.defineProperty(o, k2, desc);
|
|
13
|
+
}) : (function(o, m, k, k2) {
|
|
14
|
+
if (k2 === undefined) k2 = k;
|
|
15
|
+
o[k2] = m[k];
|
|
16
|
+
}));
|
|
17
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
18
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
19
|
+
}) : function(o, v) {
|
|
20
|
+
o["default"] = v;
|
|
21
|
+
});
|
|
22
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
23
|
+
var ownKeys = function(o) {
|
|
24
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
25
|
+
var ar = [];
|
|
26
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
27
|
+
return ar;
|
|
28
|
+
};
|
|
29
|
+
return ownKeys(o);
|
|
30
|
+
};
|
|
31
|
+
return function (mod) {
|
|
32
|
+
if (mod && mod.__esModule) return mod;
|
|
33
|
+
var result = {};
|
|
34
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
35
|
+
__setModuleDefault(result, mod);
|
|
36
|
+
return result;
|
|
37
|
+
};
|
|
38
|
+
})();
|
|
39
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
40
|
+
exports.parsePptx = parsePptx;
|
|
41
|
+
async function parsePptx(filePath, options) {
|
|
42
|
+
const filename = filePath.split("/").pop() || filePath;
|
|
43
|
+
try {
|
|
44
|
+
const officeParser = await Promise.resolve().then(() => __importStar(require("officeparser")));
|
|
45
|
+
const ast = await officeParser.parseOffice(filePath, {
|
|
46
|
+
ignoreNotes: false, // Include speaker notes
|
|
47
|
+
});
|
|
48
|
+
const items = [];
|
|
49
|
+
// Get full text content
|
|
50
|
+
const content = ast.toText();
|
|
51
|
+
if (!content || content.trim().length === 0) {
|
|
52
|
+
console.warn(`[memvid] No text content found in ${filename}`);
|
|
53
|
+
return {
|
|
54
|
+
type: "pptx",
|
|
55
|
+
filename,
|
|
56
|
+
totalItems: 0,
|
|
57
|
+
items: [],
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
// Split content into slides (officeparser separates with double newlines)
|
|
61
|
+
const slides = content.split(/\n\s*\n/).filter((s) => s.trim());
|
|
62
|
+
const maxItems = options?.maxItems || slides.length;
|
|
63
|
+
for (let i = 0; i < Math.min(slides.length, maxItems); i++) {
|
|
64
|
+
const slideText = slides[i].trim();
|
|
65
|
+
if (slideText.length > 0) {
|
|
66
|
+
// Try to extract title (first line)
|
|
67
|
+
const lines = slideText.split("\n");
|
|
68
|
+
const title = lines[0]?.trim();
|
|
69
|
+
items.push({
|
|
70
|
+
number: i + 1,
|
|
71
|
+
title: title || undefined,
|
|
72
|
+
text: slideText,
|
|
73
|
+
});
|
|
74
|
+
}
|
|
75
|
+
}
|
|
76
|
+
return {
|
|
77
|
+
type: "pptx",
|
|
78
|
+
filename,
|
|
79
|
+
totalItems: items.length,
|
|
80
|
+
items,
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
catch (err) {
|
|
84
|
+
// For PPTX, we don't have a Rust fallback, so throw a descriptive error
|
|
85
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
86
|
+
throw new Error(`Failed to parse PowerPoint file "${filename}": ${message}. ` +
|
|
87
|
+
`Ensure the file is a valid .pptx/.ppt file.`);
|
|
88
|
+
}
|
|
89
|
+
}
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Document Parsing Types
|
|
3
|
+
*/
|
|
4
|
+
export interface ParseOptions {
|
|
5
|
+
/** Enable OCR for scanned pages (default: false) */
|
|
6
|
+
ocr?: boolean;
|
|
7
|
+
/** Extract tables as structured arrays (default: true) */
|
|
8
|
+
extractTables?: boolean;
|
|
9
|
+
/** Limit pages/sheets/slides to process */
|
|
10
|
+
maxItems?: number;
|
|
11
|
+
}
|
|
12
|
+
export interface DocumentItem {
|
|
13
|
+
/** 1-based item number (page/slide) or 0-based (sheet) */
|
|
14
|
+
number: number;
|
|
15
|
+
/** Sheet name for XLSX */
|
|
16
|
+
name?: string;
|
|
17
|
+
/** Extracted text content */
|
|
18
|
+
text: string;
|
|
19
|
+
/** Slide title for PPTX */
|
|
20
|
+
title?: string;
|
|
21
|
+
}
|
|
22
|
+
export interface ParseResult {
|
|
23
|
+
/** Detected document type */
|
|
24
|
+
type: "pdf" | "xlsx" | "pptx" | "docx";
|
|
25
|
+
/** Original filename */
|
|
26
|
+
filename: string;
|
|
27
|
+
/** Total items (pages/sheets/slides) */
|
|
28
|
+
totalItems: number;
|
|
29
|
+
/** Parsed items */
|
|
30
|
+
items: DocumentItem[];
|
|
31
|
+
}
|
|
32
|
+
export interface PutFileOptions {
|
|
33
|
+
/** Label for all frames (default: "document") */
|
|
34
|
+
label?: string;
|
|
35
|
+
/** Additional metadata to attach */
|
|
36
|
+
metadata?: Record<string, unknown>;
|
|
37
|
+
}
|
|
38
|
+
export interface PutFileResult {
|
|
39
|
+
/** Number of frames added */
|
|
40
|
+
framesAdded: number;
|
|
41
|
+
/** Document type detected */
|
|
42
|
+
type: string;
|
|
43
|
+
/** Original filename */
|
|
44
|
+
filename: string;
|
|
45
|
+
}
|
|
46
|
+
export interface PutFilesOptions extends PutFileOptions {
|
|
47
|
+
/** File extensions to process (default: all supported) */
|
|
48
|
+
extensions?: string[];
|
|
49
|
+
}
|
|
50
|
+
export interface PutFilesResult {
|
|
51
|
+
/** Number of files processed */
|
|
52
|
+
filesProcessed: number;
|
|
53
|
+
/** Total frames added */
|
|
54
|
+
framesAdded: number;
|
|
55
|
+
/** Results per file */
|
|
56
|
+
files: Array<{
|
|
57
|
+
filename: string;
|
|
58
|
+
framesAdded: number;
|
|
59
|
+
type: string;
|
|
60
|
+
}>;
|
|
61
|
+
}
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* Excel Parser with error handling
|
|
4
|
+
* Uses exceljs - no Rust fallback available for XLSX
|
|
5
|
+
*/
|
|
6
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
7
|
+
if (k2 === undefined) k2 = k;
|
|
8
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
9
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
10
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
11
|
+
}
|
|
12
|
+
Object.defineProperty(o, k2, desc);
|
|
13
|
+
}) : (function(o, m, k, k2) {
|
|
14
|
+
if (k2 === undefined) k2 = k;
|
|
15
|
+
o[k2] = m[k];
|
|
16
|
+
}));
|
|
17
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
18
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
19
|
+
}) : function(o, v) {
|
|
20
|
+
o["default"] = v;
|
|
21
|
+
});
|
|
22
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
23
|
+
var ownKeys = function(o) {
|
|
24
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
25
|
+
var ar = [];
|
|
26
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
27
|
+
return ar;
|
|
28
|
+
};
|
|
29
|
+
return ownKeys(o);
|
|
30
|
+
};
|
|
31
|
+
return function (mod) {
|
|
32
|
+
if (mod && mod.__esModule) return mod;
|
|
33
|
+
var result = {};
|
|
34
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
35
|
+
__setModuleDefault(result, mod);
|
|
36
|
+
return result;
|
|
37
|
+
};
|
|
38
|
+
})();
|
|
39
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
40
|
+
exports.parseXlsx = parseXlsx;
|
|
41
|
+
/** Extract the display value from an exceljs cell */
|
|
42
|
+
function getCellValue(cellValue) {
|
|
43
|
+
if (cellValue == null || cellValue === "")
|
|
44
|
+
return null;
|
|
45
|
+
// Handle formula cells - exceljs returns { formula, result }
|
|
46
|
+
if (typeof cellValue === "object" && cellValue !== null) {
|
|
47
|
+
const obj = cellValue;
|
|
48
|
+
// Formula cell with result
|
|
49
|
+
if ("result" in obj && obj.result != null) {
|
|
50
|
+
return String(obj.result);
|
|
51
|
+
}
|
|
52
|
+
// Rich text - array of text runs
|
|
53
|
+
if ("richText" in obj && Array.isArray(obj.richText)) {
|
|
54
|
+
return obj.richText.map((r) => r.text || "").join("");
|
|
55
|
+
}
|
|
56
|
+
// Error value
|
|
57
|
+
if ("error" in obj) {
|
|
58
|
+
return `#${obj.error}`;
|
|
59
|
+
}
|
|
60
|
+
// Date object
|
|
61
|
+
if (cellValue instanceof Date) {
|
|
62
|
+
return cellValue.toISOString().split("T")[0];
|
|
63
|
+
}
|
|
64
|
+
// Unknown object - skip to avoid [object Object]
|
|
65
|
+
return null;
|
|
66
|
+
}
|
|
67
|
+
return String(cellValue);
|
|
68
|
+
}
|
|
69
|
+
async function parseXlsx(filePath, options) {
|
|
70
|
+
const filename = filePath.split("/").pop() || filePath;
|
|
71
|
+
try {
|
|
72
|
+
const ExcelJS = await Promise.resolve().then(() => __importStar(require("exceljs")));
|
|
73
|
+
const workbook = new ExcelJS.default.Workbook();
|
|
74
|
+
await workbook.xlsx.readFile(filePath);
|
|
75
|
+
const maxItems = options?.maxItems || workbook.worksheets.length;
|
|
76
|
+
const items = [];
|
|
77
|
+
for (let i = 0; i < Math.min(workbook.worksheets.length, maxItems); i++) {
|
|
78
|
+
const worksheet = workbook.worksheets[i];
|
|
79
|
+
const textLines = [];
|
|
80
|
+
worksheet.eachRow((row) => {
|
|
81
|
+
const values = [];
|
|
82
|
+
row.eachCell((cell) => {
|
|
83
|
+
const value = getCellValue(cell.value);
|
|
84
|
+
if (value != null) {
|
|
85
|
+
values.push(value);
|
|
86
|
+
}
|
|
87
|
+
});
|
|
88
|
+
if (values.length > 0) {
|
|
89
|
+
textLines.push(values.join(" | "));
|
|
90
|
+
}
|
|
91
|
+
});
|
|
92
|
+
if (textLines.length > 0) {
|
|
93
|
+
items.push({
|
|
94
|
+
number: i,
|
|
95
|
+
name: worksheet.name,
|
|
96
|
+
text: textLines.join("\n"),
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
// If no sheets with content, return empty but valid result
|
|
101
|
+
if (items.length === 0) {
|
|
102
|
+
console.warn(`[memvid] No content found in ${filename}`);
|
|
103
|
+
}
|
|
104
|
+
return {
|
|
105
|
+
type: "xlsx",
|
|
106
|
+
filename,
|
|
107
|
+
totalItems: workbook.worksheets.length,
|
|
108
|
+
items,
|
|
109
|
+
};
|
|
110
|
+
}
|
|
111
|
+
catch (err) {
|
|
112
|
+
// For XLSX, we don't have a Rust fallback, so throw a descriptive error
|
|
113
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
114
|
+
throw new Error(`Failed to parse Excel file "${filename}": ${message}. ` +
|
|
115
|
+
`Ensure the file is a valid .xlsx/.xls file.`);
|
|
116
|
+
}
|
|
117
|
+
}
|
package/dist/index.d.ts
CHANGED
|
@@ -264,6 +264,113 @@ export declare function listMemories(options?: {
|
|
|
264
264
|
apiKey?: string;
|
|
265
265
|
dashboardUrl?: string;
|
|
266
266
|
}): Promise<MemoryInfo[]>;
|
|
267
|
+
/**
|
|
268
|
+
* Options for creating a project on the cloud.
|
|
269
|
+
*/
|
|
270
|
+
export interface CreateProjectOptions {
|
|
271
|
+
/** Name for the project */
|
|
272
|
+
name: string;
|
|
273
|
+
/** Optional description */
|
|
274
|
+
description?: string;
|
|
275
|
+
/** API key (uses global config or env var if not specified) */
|
|
276
|
+
apiKey?: string;
|
|
277
|
+
/** Dashboard URL (uses global config or env var if not specified) */
|
|
278
|
+
dashboardUrl?: string;
|
|
279
|
+
}
|
|
280
|
+
/**
|
|
281
|
+
* Result of creating a project on the cloud.
|
|
282
|
+
*/
|
|
283
|
+
export interface CreateProjectResult {
|
|
284
|
+
/** The project ID (24-char ObjectId format) */
|
|
285
|
+
id: string;
|
|
286
|
+
/** Organisation ID */
|
|
287
|
+
organisationId: string;
|
|
288
|
+
/** URL-friendly slug */
|
|
289
|
+
slug: string;
|
|
290
|
+
/** Project name */
|
|
291
|
+
name: string;
|
|
292
|
+
/** Description */
|
|
293
|
+
description: string | null;
|
|
294
|
+
/** Creation timestamp */
|
|
295
|
+
createdAt: string;
|
|
296
|
+
/** Last update timestamp */
|
|
297
|
+
updatedAt: string;
|
|
298
|
+
}
|
|
299
|
+
/**
|
|
300
|
+
* Project info returned from listProjects.
|
|
301
|
+
*/
|
|
302
|
+
export interface ProjectInfo {
|
|
303
|
+
/** The project ID (24-char ObjectId format) */
|
|
304
|
+
id: string;
|
|
305
|
+
/** Organisation ID */
|
|
306
|
+
organisationId: string;
|
|
307
|
+
/** URL-friendly slug */
|
|
308
|
+
slug: string;
|
|
309
|
+
/** Project name */
|
|
310
|
+
name: string;
|
|
311
|
+
/** Description */
|
|
312
|
+
description: string | null;
|
|
313
|
+
/** Creation timestamp */
|
|
314
|
+
createdAt: string;
|
|
315
|
+
/** Last update timestamp */
|
|
316
|
+
updatedAt: string;
|
|
317
|
+
}
|
|
318
|
+
/**
|
|
319
|
+
* Create a new project on the cloud dashboard.
|
|
320
|
+
*
|
|
321
|
+
* Projects are containers that organize multiple memories together.
|
|
322
|
+
* Use projects to group related memories (e.g., by environment, team, or use case).
|
|
323
|
+
*
|
|
324
|
+
* @param options - Project creation options
|
|
325
|
+
* @returns The created project details
|
|
326
|
+
*
|
|
327
|
+
* @example
|
|
328
|
+
* ```typescript
|
|
329
|
+
* import { configure, createProject, createMemory, create } from 'memvid';
|
|
330
|
+
*
|
|
331
|
+
* configure({
|
|
332
|
+
* apiKey: 'mv2_your_api_key',
|
|
333
|
+
* dashboardUrl: 'http://localhost:3001',
|
|
334
|
+
* });
|
|
335
|
+
*
|
|
336
|
+
* // Create a project
|
|
337
|
+
* const project = await createProject({
|
|
338
|
+
* name: 'Production Environment',
|
|
339
|
+
* description: 'All production memories',
|
|
340
|
+
* });
|
|
341
|
+
*
|
|
342
|
+
* console.log('Created project:', project.id);
|
|
343
|
+
*
|
|
344
|
+
* // Create a memory in this project
|
|
345
|
+
* const memory = await createMemory({
|
|
346
|
+
* name: 'Customer Support KB',
|
|
347
|
+
* projectId: project.id,
|
|
348
|
+
* });
|
|
349
|
+
* ```
|
|
350
|
+
*/
|
|
351
|
+
export declare function createProject(options: CreateProjectOptions): Promise<CreateProjectResult>;
|
|
352
|
+
/**
|
|
353
|
+
* List all projects for the authenticated organisation.
|
|
354
|
+
*
|
|
355
|
+
* @param options - Optional API key and dashboard URL overrides
|
|
356
|
+
* @returns Array of project info objects
|
|
357
|
+
*
|
|
358
|
+
* @example
|
|
359
|
+
* ```typescript
|
|
360
|
+
* import { configure, listProjects } from 'memvid';
|
|
361
|
+
*
|
|
362
|
+
* configure({ apiKey: 'mv2_your_api_key' });
|
|
363
|
+
*
|
|
364
|
+
* const projects = await listProjects();
|
|
365
|
+
* for (const project of projects) {
|
|
366
|
+
* console.log(`${project.name} (${project.id})`);
|
|
367
|
+
* }
|
|
368
|
+
* ```
|
|
369
|
+
*/
|
|
370
|
+
export declare function listProjects(options?: {
|
|
371
|
+
apiKey?: string;
|
|
372
|
+
dashboardUrl?: string;
|
|
373
|
+
}): Promise<ProjectInfo[]>;
|
|
267
374
|
type UseFunction = {
|
|
268
375
|
(kind: Kind, filename: string, apiKeyOrOptions?: ApiKey | UseOptions, options?: UseOptions): Promise<Memvid>;
|
|
269
376
|
verify(path: string, options?: UseVerifyOptions): Promise<unknown>;
|
package/dist/index.js
CHANGED
|
@@ -44,6 +44,8 @@ exports.validateConfig = validateConfig;
|
|
|
44
44
|
exports.resolveMemory = resolveMemory;
|
|
45
45
|
exports.createMemory = createMemory;
|
|
46
46
|
exports.listMemories = listMemories;
|
|
47
|
+
exports.createProject = createProject;
|
|
48
|
+
exports.listProjects = listProjects;
|
|
47
49
|
exports.lockWho = lockWho;
|
|
48
50
|
exports.lockNudge = lockNudge;
|
|
49
51
|
exports.lock = lock;
|
|
@@ -454,6 +456,136 @@ async function listMemories(options) {
|
|
|
454
456
|
updatedAt: m.updated_at,
|
|
455
457
|
}));
|
|
456
458
|
}
|
|
459
|
+
/**
|
|
460
|
+
* Create a new project on the cloud dashboard.
|
|
461
|
+
*
|
|
462
|
+
* Projects are containers that organize multiple memories together.
|
|
463
|
+
* Use projects to group related memories (e.g., by environment, team, or use case).
|
|
464
|
+
*
|
|
465
|
+
* @param options - Project creation options
|
|
466
|
+
* @returns The created project details
|
|
467
|
+
*
|
|
468
|
+
* @example
|
|
469
|
+
* ```typescript
|
|
470
|
+
* import { configure, createProject, createMemory, create } from 'memvid';
|
|
471
|
+
*
|
|
472
|
+
* configure({
|
|
473
|
+
* apiKey: 'mv2_your_api_key',
|
|
474
|
+
* dashboardUrl: 'http://localhost:3001',
|
|
475
|
+
* });
|
|
476
|
+
*
|
|
477
|
+
* // Create a project
|
|
478
|
+
* const project = await createProject({
|
|
479
|
+
* name: 'Production Environment',
|
|
480
|
+
* description: 'All production memories',
|
|
481
|
+
* });
|
|
482
|
+
*
|
|
483
|
+
* console.log('Created project:', project.id);
|
|
484
|
+
*
|
|
485
|
+
* // Create a memory in this project
|
|
486
|
+
* const memory = await createMemory({
|
|
487
|
+
* name: 'Customer Support KB',
|
|
488
|
+
* projectId: project.id,
|
|
489
|
+
* });
|
|
490
|
+
* ```
|
|
491
|
+
*/
|
|
492
|
+
async function createProject(options) {
|
|
493
|
+
const { name, description } = options;
|
|
494
|
+
// Resolve API key
|
|
495
|
+
const apiKey = options.apiKey || globalConfig.apiKey || process.env.MEMVID_API_KEY;
|
|
496
|
+
if (!apiKey) {
|
|
497
|
+
throw new error_1.ApiKeyRequiredError("API key required for createProject(). " +
|
|
498
|
+
"Set via configure({ apiKey: 'mv2_...' }), MEMVID_API_KEY env var, or options.apiKey. " +
|
|
499
|
+
"Get your API key at https://memvid.com/dashboard/api-keys");
|
|
500
|
+
}
|
|
501
|
+
// Resolve dashboard URL
|
|
502
|
+
const dashboardUrl = (options.dashboardUrl ||
|
|
503
|
+
globalConfig.dashboardUrl ||
|
|
504
|
+
process.env.MEMVID_DASHBOARD_URL ||
|
|
505
|
+
"https://memvid.com").replace(/\/$/, "");
|
|
506
|
+
const url = `${dashboardUrl}/api/projects`;
|
|
507
|
+
const body = { name };
|
|
508
|
+
if (description)
|
|
509
|
+
body.description = description;
|
|
510
|
+
const response = await fetch(url, {
|
|
511
|
+
method: "POST",
|
|
512
|
+
headers: {
|
|
513
|
+
"Content-Type": "application/json",
|
|
514
|
+
"x-api-key": apiKey,
|
|
515
|
+
},
|
|
516
|
+
body: JSON.stringify(body),
|
|
517
|
+
});
|
|
518
|
+
if (!response.ok) {
|
|
519
|
+
const text = await response.text().catch(() => "");
|
|
520
|
+
throw new error_1.MemvidError("MV022", `Failed to create project: ${response.status} ${response.statusText}. ${text}`);
|
|
521
|
+
}
|
|
522
|
+
const json = await response.json();
|
|
523
|
+
const data = json.data || json;
|
|
524
|
+
return {
|
|
525
|
+
id: data.id,
|
|
526
|
+
organisationId: data.organisation_id,
|
|
527
|
+
slug: data.slug,
|
|
528
|
+
name: data.name,
|
|
529
|
+
description: data.description,
|
|
530
|
+
createdAt: data.created_at,
|
|
531
|
+
updatedAt: data.updated_at,
|
|
532
|
+
};
|
|
533
|
+
}
|
|
534
|
+
/**
|
|
535
|
+
* List all projects for the authenticated organisation.
|
|
536
|
+
*
|
|
537
|
+
* @param options - Optional API key and dashboard URL overrides
|
|
538
|
+
* @returns Array of project info objects
|
|
539
|
+
*
|
|
540
|
+
* @example
|
|
541
|
+
* ```typescript
|
|
542
|
+
* import { configure, listProjects } from 'memvid';
|
|
543
|
+
*
|
|
544
|
+
* configure({ apiKey: 'mv2_your_api_key' });
|
|
545
|
+
*
|
|
546
|
+
* const projects = await listProjects();
|
|
547
|
+
* for (const project of projects) {
|
|
548
|
+
* console.log(`${project.name} (${project.id})`);
|
|
549
|
+
* }
|
|
550
|
+
* ```
|
|
551
|
+
*/
|
|
552
|
+
async function listProjects(options) {
|
|
553
|
+
// Resolve API key
|
|
554
|
+
const apiKey = options?.apiKey || globalConfig.apiKey || process.env.MEMVID_API_KEY;
|
|
555
|
+
if (!apiKey) {
|
|
556
|
+
throw new error_1.ApiKeyRequiredError("API key required for listProjects(). " +
|
|
557
|
+
"Set via configure({ apiKey: 'mv2_...' }), MEMVID_API_KEY env var, or options.apiKey. " +
|
|
558
|
+
"Get your API key at https://memvid.com/dashboard/api-keys");
|
|
559
|
+
}
|
|
560
|
+
// Resolve dashboard URL
|
|
561
|
+
const dashboardUrl = (options?.dashboardUrl ||
|
|
562
|
+
globalConfig.dashboardUrl ||
|
|
563
|
+
process.env.MEMVID_DASHBOARD_URL ||
|
|
564
|
+
"https://memvid.com").replace(/\/$/, "");
|
|
565
|
+
const url = `${dashboardUrl}/api/projects`;
|
|
566
|
+
const response = await fetch(url, {
|
|
567
|
+
method: "GET",
|
|
568
|
+
headers: {
|
|
569
|
+
"x-api-key": apiKey,
|
|
570
|
+
},
|
|
571
|
+
});
|
|
572
|
+
if (!response.ok) {
|
|
573
|
+
const text = await response.text().catch(() => "");
|
|
574
|
+
throw new error_1.MemvidError("MV023", `Failed to list projects: ${response.status} ${response.statusText}. ${text}`);
|
|
575
|
+
}
|
|
576
|
+
const json = await response.json();
|
|
577
|
+
const data = json.data || json;
|
|
578
|
+
const projects = data.projects || [];
|
|
579
|
+
return projects.map((p) => ({
|
|
580
|
+
id: p.id,
|
|
581
|
+
organisationId: p.organisation_id,
|
|
582
|
+
slug: p.slug,
|
|
583
|
+
name: p.name,
|
|
584
|
+
description: p.description,
|
|
585
|
+
createdAt: p.created_at,
|
|
586
|
+
updatedAt: p.updated_at,
|
|
587
|
+
}));
|
|
588
|
+
}
|
|
457
589
|
// ===========================================================================
|
|
458
590
|
// Load platform-specific native binary
|
|
459
591
|
function loadNativeAddon() {
|
|
@@ -944,6 +1076,23 @@ class MemvidImpl {
|
|
|
944
1076
|
return result;
|
|
945
1077
|
});
|
|
946
1078
|
}
|
|
1079
|
+
/**
|
|
1080
|
+
* Remove a frame by its ID.
|
|
1081
|
+
*
|
|
1082
|
+
* This performs a soft delete (tombstone) - the frame is marked as deleted
|
|
1083
|
+
* and won't appear in search results or timeline, but the data remains
|
|
1084
|
+
* in the file until compaction.
|
|
1085
|
+
*
|
|
1086
|
+
* @param frameId - The frame ID to remove (returned by put())
|
|
1087
|
+
* @returns The WAL sequence number of the tombstone operation
|
|
1088
|
+
*/
|
|
1089
|
+
async remove(frameId) {
|
|
1090
|
+
return wrapAsync(async () => {
|
|
1091
|
+
const result = await this.core.remove(frameId);
|
|
1092
|
+
(0, analytics_1.trackCommand)(this.filename, "remove", true);
|
|
1093
|
+
return result;
|
|
1094
|
+
});
|
|
1095
|
+
}
|
|
947
1096
|
async putMany(requests, options) {
|
|
948
1097
|
return wrapAsync(async () => {
|
|
949
1098
|
const embedder = options?.embedder;
|
|
@@ -1541,6 +1690,13 @@ class MemvidImpl {
|
|
|
1541
1690
|
async seal() {
|
|
1542
1691
|
return wrapAsync(() => this.core.seal());
|
|
1543
1692
|
}
|
|
1693
|
+
/**
|
|
1694
|
+
* Rebuild the time index. Call this after using putMany() if you need
|
|
1695
|
+
* time-based queries (like ask() with temporal context).
|
|
1696
|
+
*/
|
|
1697
|
+
async rebuildTimeIndex() {
|
|
1698
|
+
await this.doctor({ rebuildTimeIndex: true, quiet: true });
|
|
1699
|
+
}
|
|
1544
1700
|
async enableLex() {
|
|
1545
1701
|
return wrapAsync(() => this.core.enableLex());
|
|
1546
1702
|
}
|
|
@@ -1550,6 +1706,9 @@ class MemvidImpl {
|
|
|
1550
1706
|
async applyTicket(ticket) {
|
|
1551
1707
|
return wrapAsync(() => this.core.applyTicket(ticket));
|
|
1552
1708
|
}
|
|
1709
|
+
async applySignedTicket(ticket) {
|
|
1710
|
+
return wrapAsync(() => this.core.applySignedTicket(ticket));
|
|
1711
|
+
}
|
|
1553
1712
|
async getMemoryBinding() {
|
|
1554
1713
|
return wrapAsync(() => this.core.getMemoryBinding());
|
|
1555
1714
|
}
|
|
@@ -1687,6 +1846,182 @@ class MemvidImpl {
|
|
|
1687
1846
|
async addMemoryCards(cards) {
|
|
1688
1847
|
return wrapAsync(() => this.core.addMemoryCards(cards));
|
|
1689
1848
|
}
|
|
1849
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
1850
|
+
// Document Parsing & Ingestion
|
|
1851
|
+
// ─────────────────────────────────────────────────────────────────────────
|
|
1852
|
+
/**
|
|
1853
|
+
* Ingest a document file with automatic parsing and per-page/sheet/slide storage.
|
|
1854
|
+
*
|
|
1855
|
+
* Supported formats:
|
|
1856
|
+
* - PDF (.pdf) - stores each page as a separate frame
|
|
1857
|
+
* - Excel (.xlsx, .xls) - stores each sheet as a separate frame
|
|
1858
|
+
* - PowerPoint (.pptx, .ppt) - stores each slide as a separate frame
|
|
1859
|
+
* - Word (.docx, .doc) - stores the document as a single frame
|
|
1860
|
+
*
|
|
1861
|
+
* @example
|
|
1862
|
+
* ```typescript
|
|
1863
|
+
* const mv = await create('memory.mv2');
|
|
1864
|
+
* await mv.putFile('./report.pdf');
|
|
1865
|
+
* await mv.putFile('./data.xlsx');
|
|
1866
|
+
* await mv.putFile('./slides.pptx');
|
|
1867
|
+
* ```
|
|
1868
|
+
*
|
|
1869
|
+
* @param filePath - Path to the document file
|
|
1870
|
+
* @param options - Optional label and metadata
|
|
1871
|
+
* @returns Result with framesAdded count
|
|
1872
|
+
*/
|
|
1873
|
+
async putFile(filePath, options) {
|
|
1874
|
+
const { parse, getDocumentType } = await Promise.resolve().then(() => __importStar(require("./documents/index.js")));
|
|
1875
|
+
const { basename } = await Promise.resolve().then(() => __importStar(require("path")));
|
|
1876
|
+
const filename = basename(filePath);
|
|
1877
|
+
const docType = getDocumentType(filePath);
|
|
1878
|
+
const label = options?.label || "document";
|
|
1879
|
+
const baseMetadata = options?.metadata || {};
|
|
1880
|
+
const embedder = options?.embedder;
|
|
1881
|
+
// Try JS-based parsing first
|
|
1882
|
+
const result = await parse(filePath);
|
|
1883
|
+
// PDF fallback: if parse() returns null, use Rust core's built-in pdf_extract
|
|
1884
|
+
if (result === null && docType === "pdf") {
|
|
1885
|
+
console.log(`[memvid] Using Rust pdf_extract for ${filename}`);
|
|
1886
|
+
// For PDF fallback with external embedder, we can't easily extract text first
|
|
1887
|
+
// so we fall back to the internal embedding model
|
|
1888
|
+
await this.put({
|
|
1889
|
+
file: filePath,
|
|
1890
|
+
label,
|
|
1891
|
+
metadata: {
|
|
1892
|
+
...baseMetadata,
|
|
1893
|
+
doc_name: filename,
|
|
1894
|
+
doc_type: "pdf",
|
|
1895
|
+
fallback: "rust_pdf_extract",
|
|
1896
|
+
},
|
|
1897
|
+
enableEmbedding: embedder ? true : options?.enableEmbedding,
|
|
1898
|
+
embeddingModel: embedder ? undefined : options?.embeddingModel,
|
|
1899
|
+
vectorCompression: options?.vectorCompression,
|
|
1900
|
+
autoTag: options?.autoTag,
|
|
1901
|
+
extractDates: options?.extractDates,
|
|
1902
|
+
});
|
|
1903
|
+
(0, analytics_1.trackCommand)(this.filename, "putFile", true);
|
|
1904
|
+
return { framesAdded: 1, type: "pdf", filename };
|
|
1905
|
+
}
|
|
1906
|
+
// If result is null and not PDF, something went wrong
|
|
1907
|
+
if (result === null) {
|
|
1908
|
+
throw new Error(`Failed to parse document: ${filename}`);
|
|
1909
|
+
}
|
|
1910
|
+
// Build items for batch processing with putMany (6x faster than individual put())
|
|
1911
|
+
const items = [];
|
|
1912
|
+
for (const item of result.items) {
|
|
1913
|
+
let title;
|
|
1914
|
+
let metadata;
|
|
1915
|
+
if (result.type === "pdf") {
|
|
1916
|
+
title = `${result.filename} [Page ${item.number}]`;
|
|
1917
|
+
metadata = {
|
|
1918
|
+
...baseMetadata,
|
|
1919
|
+
doc_name: result.filename,
|
|
1920
|
+
doc_type: result.type,
|
|
1921
|
+
page_number: item.number,
|
|
1922
|
+
total_pages: result.totalItems,
|
|
1923
|
+
};
|
|
1924
|
+
}
|
|
1925
|
+
else if (result.type === "xlsx") {
|
|
1926
|
+
title = `${result.filename} [Sheet: ${item.name}]`;
|
|
1927
|
+
metadata = {
|
|
1928
|
+
...baseMetadata,
|
|
1929
|
+
doc_name: result.filename,
|
|
1930
|
+
doc_type: result.type,
|
|
1931
|
+
sheet_name: item.name,
|
|
1932
|
+
sheet_index: item.number,
|
|
1933
|
+
total_sheets: result.totalItems,
|
|
1934
|
+
};
|
|
1935
|
+
}
|
|
1936
|
+
else if (result.type === "pptx") {
|
|
1937
|
+
title = `${result.filename} [Slide ${item.number}]`;
|
|
1938
|
+
metadata = {
|
|
1939
|
+
...baseMetadata,
|
|
1940
|
+
doc_name: result.filename,
|
|
1941
|
+
doc_type: result.type,
|
|
1942
|
+
slide_number: item.number,
|
|
1943
|
+
slide_title: item.title,
|
|
1944
|
+
total_slides: result.totalItems,
|
|
1945
|
+
};
|
|
1946
|
+
}
|
|
1947
|
+
else {
|
|
1948
|
+
// docx
|
|
1949
|
+
title = result.filename;
|
|
1950
|
+
metadata = {
|
|
1951
|
+
...baseMetadata,
|
|
1952
|
+
doc_name: result.filename,
|
|
1953
|
+
doc_type: result.type,
|
|
1954
|
+
};
|
|
1955
|
+
}
|
|
1956
|
+
items.push({
|
|
1957
|
+
title,
|
|
1958
|
+
labels: label ? [label] : undefined,
|
|
1959
|
+
text: item.text,
|
|
1960
|
+
metadata,
|
|
1961
|
+
});
|
|
1962
|
+
}
|
|
1963
|
+
// Use putMany for fast batch ingestion
|
|
1964
|
+
// Note: Call rebuildTimeIndex() after seal() if using ask() with temporal queries
|
|
1965
|
+
await this.putMany(items, {
|
|
1966
|
+
embedder,
|
|
1967
|
+
enableEmbedding: embedder ? undefined : options?.enableEmbedding,
|
|
1968
|
+
embeddingModel: embedder ? undefined : options?.embeddingModel,
|
|
1969
|
+
});
|
|
1970
|
+
(0, analytics_1.trackCommand)(this.filename, "putFile", true);
|
|
1971
|
+
return { framesAdded: items.length, type: result.type, filename: result.filename };
|
|
1972
|
+
}
|
|
1973
|
+
/**
|
|
1974
|
+
* Ingest multiple document files from a directory.
|
|
1975
|
+
*
|
|
1976
|
+
* @example
|
|
1977
|
+
* ```typescript
|
|
1978
|
+
* const mv = await create('memory.mv2');
|
|
1979
|
+
* const result = await mv.putFiles('./documents/');
|
|
1980
|
+
* console.log(`Processed ${result.filesProcessed} files, ${result.framesAdded} frames`);
|
|
1981
|
+
* ```
|
|
1982
|
+
*
|
|
1983
|
+
* @param dirPath - Path to directory containing documents
|
|
1984
|
+
* @param options - Optional label, extensions filter, and metadata
|
|
1985
|
+
* @returns Result with filesProcessed and framesAdded counts
|
|
1986
|
+
*/
|
|
1987
|
+
async putFiles(dirPath, options) {
|
|
1988
|
+
const { readdirSync } = await Promise.resolve().then(() => __importStar(require("fs")));
|
|
1989
|
+
const { join } = await Promise.resolve().then(() => __importStar(require("path")));
|
|
1990
|
+
const extensions = options?.extensions || [
|
|
1991
|
+
".pdf",
|
|
1992
|
+
".xlsx",
|
|
1993
|
+
".xls",
|
|
1994
|
+
".pptx",
|
|
1995
|
+
".ppt",
|
|
1996
|
+
".docx",
|
|
1997
|
+
".doc",
|
|
1998
|
+
];
|
|
1999
|
+
const files = readdirSync(dirPath).filter((f) => extensions.some((ext) => f.toLowerCase().endsWith(ext)));
|
|
2000
|
+
let filesProcessed = 0;
|
|
2001
|
+
let framesAdded = 0;
|
|
2002
|
+
const results = [];
|
|
2003
|
+
for (const file of files) {
|
|
2004
|
+
const result = await this.putFile(join(dirPath, file), {
|
|
2005
|
+
label: options?.label,
|
|
2006
|
+
metadata: options?.metadata,
|
|
2007
|
+
enableEmbedding: options?.enableEmbedding,
|
|
2008
|
+
embeddingModel: options?.embeddingModel,
|
|
2009
|
+
embedder: options?.embedder,
|
|
2010
|
+
vectorCompression: options?.vectorCompression,
|
|
2011
|
+
autoTag: options?.autoTag,
|
|
2012
|
+
extractDates: options?.extractDates,
|
|
2013
|
+
});
|
|
2014
|
+
filesProcessed++;
|
|
2015
|
+
framesAdded += result.framesAdded;
|
|
2016
|
+
results.push({
|
|
2017
|
+
filename: result.filename,
|
|
2018
|
+
framesAdded: result.framesAdded,
|
|
2019
|
+
type: result.type,
|
|
2020
|
+
});
|
|
2021
|
+
}
|
|
2022
|
+
(0, analytics_1.trackCommand)(this.filename, "putFiles", true);
|
|
2023
|
+
return { filesProcessed, framesAdded, files: results };
|
|
2024
|
+
}
|
|
1690
2025
|
}
|
|
1691
2026
|
const useImpl = (async (kind, filename, apiKeyOrOptions, options) => {
|
|
1692
2027
|
const { apiKey, options: resolvedOptions } = splitUseArgs(apiKeyOrOptions, options);
|
|
@@ -1735,6 +2070,7 @@ useImpl.doctor = async (path, options) => {
|
|
|
1735
2070
|
rebuildVecIndex: options.rebuildVecIndex,
|
|
1736
2071
|
vacuum: options.vacuum,
|
|
1737
2072
|
dryRun: options.dryRun,
|
|
2073
|
+
quiet: options.quiet,
|
|
1738
2074
|
};
|
|
1739
2075
|
return wrapAsync(() => addon.doctorMemvid(path, nativeOptions));
|
|
1740
2076
|
};
|
package/dist/types.d.ts
CHANGED
|
@@ -351,6 +351,7 @@ export interface SessionReplayResult {
|
|
|
351
351
|
}
|
|
352
352
|
export interface NativeMemvid {
|
|
353
353
|
put(args: NativePutArgs): Promise<string>;
|
|
354
|
+
remove(frameId: string): Promise<number>;
|
|
354
355
|
putMany(requests: NativePutManyRequest[], options?: NativePutManyOptions): Promise<string[]>;
|
|
355
356
|
find(query: string, options?: NativeFindOptions): Promise<unknown>;
|
|
356
357
|
findWithEmbedding(query: string, embedding: number[], options?: NativeFindOptions): Promise<unknown>;
|
|
@@ -365,7 +366,10 @@ export interface NativeMemvid {
|
|
|
365
366
|
seal(): Promise<void>;
|
|
366
367
|
enableLex(): Promise<void>;
|
|
367
368
|
setVectorCompression(enabled: boolean): Promise<void>;
|
|
369
|
+
/** @deprecated Use applySignedTicket() for cryptographically verified tickets */
|
|
368
370
|
applyTicket(ticket: string): Promise<void>;
|
|
371
|
+
/** Apply a cryptographically signed ticket (verifies signature against Memvid public key) */
|
|
372
|
+
applySignedTicket(ticket: string): Promise<void>;
|
|
369
373
|
getMemoryBinding(): Promise<MemoryBinding | null>;
|
|
370
374
|
unbindMemory(): Promise<void>;
|
|
371
375
|
getCapacity(): Promise<number>;
|
|
@@ -404,6 +408,8 @@ export interface TicketInfo {
|
|
|
404
408
|
seq_no: number;
|
|
405
409
|
expires_in_secs: number;
|
|
406
410
|
capacity_bytes: number;
|
|
411
|
+
/** Whether the ticket was cryptographically verified against Memvid's public key */
|
|
412
|
+
verified: boolean;
|
|
407
413
|
}
|
|
408
414
|
export interface SyncTicketResult {
|
|
409
415
|
already_bound: boolean;
|
|
@@ -411,6 +417,19 @@ export interface SyncTicketResult {
|
|
|
411
417
|
issuer: string;
|
|
412
418
|
seq_no: number;
|
|
413
419
|
capacity_bytes: number;
|
|
420
|
+
/** Whether the ticket was cryptographically verified against Memvid's public key */
|
|
421
|
+
verified: boolean;
|
|
422
|
+
}
|
|
423
|
+
/** A cryptographically signed ticket from the Memvid control plane */
|
|
424
|
+
export interface SignedTicket {
|
|
425
|
+
issuer: string;
|
|
426
|
+
seq_no: number;
|
|
427
|
+
expires_in_secs: number;
|
|
428
|
+
capacity_bytes?: number;
|
|
429
|
+
/** The memory ID this ticket is bound to */
|
|
430
|
+
memory_id: string;
|
|
431
|
+
/** Base64-encoded Ed25519 signature */
|
|
432
|
+
signature: string;
|
|
414
433
|
}
|
|
415
434
|
export interface HeatmapEntry {
|
|
416
435
|
frame_id: number;
|
|
@@ -553,6 +572,17 @@ export interface Memvid {
|
|
|
553
572
|
/** Absolute or relative path to the backing `.mv2` file. */
|
|
554
573
|
path(): Promise<string>;
|
|
555
574
|
put(data: PutInput): Promise<string>;
|
|
575
|
+
/**
|
|
576
|
+
* Remove a frame by its ID.
|
|
577
|
+
*
|
|
578
|
+
* This performs a soft delete (tombstone) - the frame is marked as deleted
|
|
579
|
+
* and won't appear in search results or timeline, but the data remains
|
|
580
|
+
* in the file until compaction.
|
|
581
|
+
*
|
|
582
|
+
* @param frameId - The frame ID to remove (returned by put())
|
|
583
|
+
* @returns The WAL sequence number of the tombstone operation
|
|
584
|
+
*/
|
|
585
|
+
remove(frameId: string): Promise<number>;
|
|
556
586
|
/**
|
|
557
587
|
* Batch ingest multiple documents using parallel segment building.
|
|
558
588
|
* Much more efficient than calling put() multiple times.
|
|
@@ -580,7 +610,10 @@ export interface Memvid {
|
|
|
580
610
|
seal(): Promise<void>;
|
|
581
611
|
enableLex(): Promise<void>;
|
|
582
612
|
setVectorCompression(enabled: boolean): Promise<void>;
|
|
613
|
+
/** @deprecated Use applySignedTicket() for cryptographically verified tickets */
|
|
583
614
|
applyTicket(ticket: string): Promise<void>;
|
|
615
|
+
/** Apply a cryptographically signed ticket (verifies signature against Memvid public key) */
|
|
616
|
+
applySignedTicket(ticket: string): Promise<void>;
|
|
584
617
|
getMemoryBinding(): Promise<MemoryBinding | null>;
|
|
585
618
|
unbindMemory(): Promise<void>;
|
|
586
619
|
getCapacity(): Promise<number>;
|
|
@@ -660,6 +693,7 @@ export interface UseDoctorOptions {
|
|
|
660
693
|
rebuildVecIndex?: boolean;
|
|
661
694
|
vacuum?: boolean;
|
|
662
695
|
dryRun?: boolean;
|
|
696
|
+
quiet?: boolean;
|
|
663
697
|
}
|
|
664
698
|
export interface LockOptions {
|
|
665
699
|
/** Output file path (default: input.mv2e) */
|
|
@@ -786,6 +820,7 @@ export type NativeDoctorOptions = {
|
|
|
786
820
|
rebuildVecIndex?: boolean;
|
|
787
821
|
vacuum?: boolean;
|
|
788
822
|
dryRun?: boolean;
|
|
823
|
+
quiet?: boolean;
|
|
789
824
|
};
|
|
790
825
|
export type NativeCapsuleOptions = {
|
|
791
826
|
output?: string;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@memvid/sdk",
|
|
3
|
-
"version": "2.0.
|
|
3
|
+
"version": "2.0.148",
|
|
4
4
|
"description": "Single-file AI memory system for Node.js. Store, search, and query documents with built-in RAG.",
|
|
5
5
|
"main": "./dist/index.js",
|
|
6
6
|
"types": "./dist/index.d.ts",
|
|
@@ -41,10 +41,10 @@
|
|
|
41
41
|
"node": ">=18"
|
|
42
42
|
},
|
|
43
43
|
"optionalDependencies": {
|
|
44
|
-
"@memvid/sdk-darwin-arm64": "2.0.
|
|
45
|
-
"@memvid/sdk-darwin-x64": "2.0.
|
|
46
|
-
"@memvid/sdk-linux-x64-gnu": "2.0.
|
|
47
|
-
"@memvid/sdk-win32-x64-msvc": "2.0.
|
|
44
|
+
"@memvid/sdk-darwin-arm64": "2.0.148",
|
|
45
|
+
"@memvid/sdk-darwin-x64": "2.0.148",
|
|
46
|
+
"@memvid/sdk-linux-x64-gnu": "2.0.148",
|
|
47
|
+
"@memvid/sdk-win32-x64-msvc": "2.0.148"
|
|
48
48
|
},
|
|
49
49
|
"peerDependencies": {
|
|
50
50
|
"@langchain/core": ">=0.3.0",
|
|
@@ -76,6 +76,9 @@
|
|
|
76
76
|
"typescript": "^5.4.0"
|
|
77
77
|
},
|
|
78
78
|
"dependencies": {
|
|
79
|
+
"unpdf": "^1.4.0",
|
|
80
|
+
"exceljs": "^4.4.0",
|
|
81
|
+
"officeparser": "^6.0.2",
|
|
79
82
|
"@ai-sdk/openai": "^1.0.0",
|
|
80
83
|
"@google/generative-ai": "^0.24.0",
|
|
81
84
|
"@langchain/langgraph": ">=0.2.0",
|