@hamster-note/document-parser 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,53 @@
1
+ import { IntermediateDocument, Number2 } from "@hamster-note/types";
2
+
3
+ //#region src/types/DocumentAnchor.d.ts
4
+ interface DocumentAnchorWithTextId {
5
+ pageId: string;
6
+ textId?: string;
7
+ }
8
+ interface DocumentAnchorWithPosition {
9
+ pageId: string;
10
+ position: Number2;
11
+ }
12
+ type DocumentAnchor = DocumentAnchorWithPosition | DocumentAnchorWithTextId;
13
+ //#endregion
14
+ //#region src/Parser/index.d.ts
15
+ type ParserInput = ArrayBuffer | ArrayBufferView | Blob;
16
+ declare abstract class DocumentParser {
17
+ static readonly exts: readonly string[];
18
+ /**
19
+ * 将原始输入解析为中间文档结构。
20
+ * - 输入必须是内存态数据(ArrayBuffer / ArrayBufferView / Blob)
21
+ * - 失败时抛出异常;不要返回 undefined。
22
+ */
23
+ abstract encode(input: ParserInput): Promise<IntermediateDocument>;
24
+ /**
25
+ * 可选:从中间文档逆序列化回原始文件数据。
26
+ * 默认不支持,需要的子类自行实现。
27
+ */
28
+ decode(_intermediateDocument: IntermediateDocument): Promise<ParserInput>;
29
+ protected static toArrayBuffer(input: ParserInput): Promise<ArrayBuffer>;
30
+ protected static toUint8Array(input: ParserInput): Promise<Uint8Array>;
31
+ }
32
+ //#endregion
33
+ //#region src/register/index.d.ts
34
+ interface DocumentParserConstructor<T extends DocumentParser = DocumentParser> {
35
+ new (): T;
36
+ readonly exts: readonly string[];
37
+ }
38
+ declare const registerParser: (Parser: DocumentParserConstructor) => void;
39
+ declare function unregisterParser(Parser: DocumentParserConstructor): void;
40
+ declare function getParserByExt(ext?: string): DocumentParserConstructor | undefined;
41
+ interface ParseOptions {
42
+ filename?: string;
43
+ ext?: string;
44
+ parser?: DocumentParserConstructor;
45
+ }
46
+ declare function parse(input: ParserInput, options?: ParseOptions): Promise<IntermediateDocument>;
47
+ declare function listRegisteredParsers(): {
48
+ ext: string;
49
+ parser: DocumentParserConstructor<DocumentParser>;
50
+ }[];
51
+ //#endregion
52
+ export { DocumentAnchor, DocumentAnchorWithPosition, DocumentAnchorWithTextId, DocumentParser, DocumentParserConstructor, ParseOptions, ParserInput, getParserByExt, listRegisteredParsers, parse, registerParser, unregisterParser };
53
+ //# sourceMappingURL=index.d.ts.map
package/dist/index.js ADDED
@@ -0,0 +1,66 @@
1
+ //#region src/Parser/index.ts
2
+ var DocumentParser = class DocumentParser {
3
+ static exts = [];
4
+ /**
5
+ * 可选:从中间文档逆序列化回原始文件数据。
6
+ * 默认不支持,需要的子类自行实现。
7
+ */
8
+ decode(_intermediateDocument) {
9
+ return Promise.reject(/* @__PURE__ */ new Error("decode is not implemented"));
10
+ }
11
+ static async toArrayBuffer(input) {
12
+ if (input instanceof ArrayBuffer) return input;
13
+ if (ArrayBuffer.isView(input)) {
14
+ const buffer = new ArrayBuffer(input.byteLength);
15
+ new Uint8Array(buffer).set(new Uint8Array(input.buffer, input.byteOffset, input.byteLength));
16
+ return buffer;
17
+ }
18
+ if (typeof Blob !== "undefined" && input instanceof Blob) return input.arrayBuffer();
19
+ throw new TypeError("Unsupported parser input");
20
+ }
21
+ static async toUint8Array(input) {
22
+ const buffer = await DocumentParser.toArrayBuffer(input);
23
+ return new Uint8Array(buffer);
24
+ }
25
+ };
26
+
27
+ //#endregion
28
+ //#region src/register/index.ts
29
+ const parserMap = /* @__PURE__ */ new Map();
30
+ const normalizeExt = (ext) => ext.replace(/^\./, "").toLowerCase();
31
+ const registerParser = (Parser) => {
32
+ if (!Array.isArray(Parser.exts) || Parser.exts.length === 0) throw new Error("Parser.exts 必须是非空数组");
33
+ Parser.exts.forEach((ext) => {
34
+ const key = normalizeExt(ext);
35
+ const exists = parserMap.get(key);
36
+ if (exists && exists !== Parser) throw new Error(`扩展名 ${key} 已被其他解析器占用`);
37
+ parserMap.set(key, Parser);
38
+ });
39
+ };
40
+ function unregisterParser(Parser) {
41
+ Parser.exts.forEach((ext) => {
42
+ const key = normalizeExt(ext);
43
+ if (parserMap.get(key) === Parser) parserMap.delete(key);
44
+ });
45
+ }
46
+ function getParserByExt(ext) {
47
+ if (!ext) return void 0;
48
+ return parserMap.get(normalizeExt(ext));
49
+ }
50
+ async function parse(input, options = {}) {
51
+ const extFromName = options.filename?.split(".").pop();
52
+ const ext = options.ext ?? extFromName;
53
+ const Parser = options.parser ?? getParserByExt(ext);
54
+ if (!Parser) throw new Error(`未找到匹配的解析器,扩展名: ${ext ?? "未知"},已注册: ${[...parserMap.keys()].join(", ")}`);
55
+ return new Parser().encode(input);
56
+ }
57
+ function listRegisteredParsers() {
58
+ return [...parserMap.entries()].map(([ext, ctor]) => ({
59
+ ext,
60
+ parser: ctor
61
+ }));
62
+ }
63
+
64
+ //#endregion
65
+ export { DocumentParser, getParserByExt, listRegisteredParsers, parse, registerParser, unregisterParser };
66
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","names":[],"sources":["../src/Parser/index.ts","../src/register/index.ts"],"sourcesContent":["import type { IntermediateDocument } from '@hamster-note/types'\n\n// 解析器通用输入,兼容 Browser / Worker / Node(Buffer 属于 ArrayBufferView)\nexport type ParserInput = ArrayBuffer | ArrayBufferView | Blob\n\nexport abstract class DocumentParser {\n // 子类必须声明支持的扩展名列表(不带点),用于注册分发\n static readonly exts: readonly string[] = []\n\n /**\n * 将原始输入解析为中间文档结构。\n * - 输入必须是内存态数据(ArrayBuffer / ArrayBufferView / Blob)\n * - 失败时抛出异常;不要返回 undefined。\n */\n abstract encode(input: ParserInput): Promise<IntermediateDocument>\n\n /**\n * 可选:从中间文档逆序列化回原始文件数据。\n * 默认不支持,需要的子类自行实现。\n */\n decode(_intermediateDocument: IntermediateDocument): Promise<ParserInput> {\n return Promise.reject(new Error('decode is not implemented'))\n }\n\n // 将各种输入转换为 ArrayBuffer,方便子类直接消费二进制\n protected static async toArrayBuffer(\n input: ParserInput\n ): Promise<ArrayBuffer> {\n if (input instanceof ArrayBuffer) {\n return input\n }\n if (ArrayBuffer.isView(input)) {\n // 拷贝为独立的 ArrayBuffer,避免 SharedArrayBuffer 兼容性问题\n const buffer = new ArrayBuffer(input.byteLength)\n new Uint8Array(buffer).set(\n new Uint8Array(input.buffer, input.byteOffset, input.byteLength)\n )\n return buffer\n }\n if (typeof Blob !== 'undefined' && input instanceof Blob) {\n return input.arrayBuffer()\n }\n throw new TypeError('Unsupported parser input')\n }\n\n // 子类若需要 Uint8Array,可直接复用\n protected static async toUint8Array(input: ParserInput): Promise<Uint8Array> {\n const buffer = await DocumentParser.toArrayBuffer(input)\n return new Uint8Array(buffer)\n }\n}\n","import type { DocumentParser, ParserInput } from '../Parser/index'\nimport type { IntermediateDocument } from '@hamster-note/types'\n\nexport interface DocumentParserConstructor<\n T extends DocumentParser = DocumentParser\n> {\n new (): T\n readonly exts: readonly string[]\n}\n\nconst parserMap: Map<string, DocumentParserConstructor> = new Map()\n\nconst normalizeExt = (ext: string) => ext.replace(/^\\./, '').toLowerCase()\n\n// 注册解析器:防止重复注册同扩展名的不同类\nexport const registerParser = (Parser: DocumentParserConstructor) => {\n if (!Array.isArray(Parser.exts) || Parser.exts.length === 0) {\n throw new Error('Parser.exts 必须是非空数组')\n }\n Parser.exts.forEach((ext) => {\n const key = normalizeExt(ext)\n const exists = parserMap.get(key)\n if (exists && exists !== Parser) {\n throw new Error(`扩展名 ${key} 已被其他解析器占用`)\n }\n parserMap.set(key, Parser)\n })\n}\n\nexport function unregisterParser(Parser: DocumentParserConstructor) {\n Parser.exts.forEach((ext) => {\n const key = normalizeExt(ext)\n const exists = parserMap.get(key)\n if (exists === Parser) {\n parserMap.delete(key)\n }\n })\n}\n\nexport function getParserByExt(\n ext?: string\n): DocumentParserConstructor | undefined {\n if (!ext) return undefined\n return parserMap.get(normalizeExt(ext))\n}\n\nexport interface ParseOptions {\n filename?: string\n ext?: string\n parser?: DocumentParserConstructor\n}\n\nexport async function parse(\n input: ParserInput,\n options: ParseOptions = {}\n): Promise<IntermediateDocument> {\n const extFromName = options.filename?.split('.').pop()\n const ext = options.ext ?? extFromName\n const Parser = options.parser ?? getParserByExt(ext)\n\n if (!Parser) {\n throw new Error(\n `未找到匹配的解析器,扩展名: ${ext ?? '未知'},已注册: ${[\n ...parserMap.keys()\n ].join(', ')}`\n )\n }\n\n const parser = new Parser()\n return parser.encode(input)\n}\n\n// 便于测试或运行时可观测性\nexport function listRegisteredParsers() {\n return [...parserMap.entries()].map(([ext, ctor]) => ({ ext, parser: ctor }))\n}\n"],"mappings":";AAKA,IAAsB,iBAAtB,MAAsB,eAAe;CAEnC,OAAgB,OAA0B,EAAE;;;;;CAa5C,OAAO,uBAAmE;AACxE,SAAO,QAAQ,uBAAO,IAAI,MAAM,4BAA4B,CAAC;;CAI/D,aAAuB,cACrB,OACsB;AACtB,MAAI,iBAAiB,YACnB,QAAO;AAET,MAAI,YAAY,OAAO,MAAM,EAAE;GAE7B,MAAM,SAAS,IAAI,YAAY,MAAM,WAAW;AAChD,OAAI,WAAW,OAAO,CAAC,IACrB,IAAI,WAAW,MAAM,QAAQ,MAAM,YAAY,MAAM,WAAW,CACjE;AACD,UAAO;;AAET,MAAI,OAAO,SAAS,eAAe,iBAAiB,KAClD,QAAO,MAAM,aAAa;AAE5B,QAAM,IAAI,UAAU,2BAA2B;;CAIjD,aAAuB,aAAa,OAAyC;EAC3E,MAAM,SAAS,MAAM,eAAe,cAAc,MAAM;AACxD,SAAO,IAAI,WAAW,OAAO;;;;;;ACtCjC,MAAM,4BAAoD,IAAI,KAAK;AAEnE,MAAM,gBAAgB,QAAgB,IAAI,QAAQ,OAAO,GAAG,CAAC,aAAa;AAG1E,MAAa,kBAAkB,WAAsC;AACnE,KAAI,CAAC,MAAM,QAAQ,OAAO,KAAK,IAAI,OAAO,KAAK,WAAW,EACxD,OAAM,IAAI,MAAM,sBAAsB;AAExC,QAAO,KAAK,SAAS,QAAQ;EAC3B,MAAM,MAAM,aAAa,IAAI;EAC7B,MAAM,SAAS,UAAU,IAAI,IAAI;AACjC,MAAI,UAAU,WAAW,OACvB,OAAM,IAAI,MAAM,OAAO,IAAI,YAAY;AAEzC,YAAU,IAAI,KAAK,OAAO;GAC1B;;AAGJ,SAAgB,iBAAiB,QAAmC;AAClE,QAAO,KAAK,SAAS,QAAQ;EAC3B,MAAM,MAAM,aAAa,IAAI;AAE7B,MADe,UAAU,IAAI,IAAI,KAClB,OACb,WAAU,OAAO,IAAI;GAEvB;;AAGJ,SAAgB,eACd,KACuC;AACvC,KAAI,CAAC,IAAK,QAAO;AACjB,QAAO,UAAU,IAAI,aAAa,IAAI,CAAC;;AASzC,eAAsB,MACpB,OACA,UAAwB,EAAE,EACK;CAC/B,MAAM,cAAc,QAAQ,UAAU,MAAM,IAAI,CAAC,KAAK;CACtD,MAAM,MAAM,QAAQ,OAAO;CAC3B,MAAM,SAAS,QAAQ,UAAU,eAAe,IAAI;AAEpD,KAAI,CAAC,OACH,OAAM,IAAI,MACR,kBAAkB,OAAO,KAAK,QAAQ,CACpC,GAAG,UAAU,MAAM,CACpB,CAAC,KAAK,KAAK,GACb;AAIH,QADe,IAAI,QAAQ,CACb,OAAO,MAAM;;AAI7B,SAAgB,wBAAwB;AACtC,QAAO,CAAC,GAAG,UAAU,SAAS,CAAC,CAAC,KAAK,CAAC,KAAK,WAAW;EAAE;EAAK,QAAQ;EAAM,EAAE"}
package/package.json CHANGED
@@ -1,16 +1,20 @@
1
1
  {
2
2
  "name": "@hamster-note/document-parser",
3
- "version": "0.1.0",
3
+ "version": "0.2.1",
4
4
  "description": "",
5
5
  "scripts": {
6
- "build:all": "echo 'no-build'",
7
- "test": "echo 'no-test'"
6
+ "build:all": "rolldown --config rolldown.config.ts",
7
+ "prepublishOnly": "yarn build:all",
8
+ "test": "echo 'no-test'",
9
+ "lint": "echo 'no-lint'"
8
10
  },
9
11
  "dependencies": {
10
- "@hamster-note/types": "^0.1.0"
12
+ "@hamster-note/types": "^0.5.1"
11
13
  },
12
14
  "devDependencies": {
13
15
  "@eslint/js": "^9.39.2",
16
+ "rolldown": "^1.0.0-beta.58",
17
+ "rolldown-plugin-dts": "^0.20.0",
14
18
  "@system-ui-js/development-base": "^0.1.2",
15
19
  "@typescript-eslint/eslint-plugin": "^8.50.0",
16
20
  "@typescript-eslint/parser": "^8.50.0",
@@ -27,11 +31,18 @@
27
31
  "typescript": "^5.0.2",
28
32
  "typescript-eslint": "^8.50.0"
29
33
  },
30
- "main": "src/index.ts",
31
- "module": "src/index.ts",
32
- "types": "src/index.ts",
34
+ "type": "module",
35
+ "main": "./dist/index.js",
36
+ "module": "./dist/index.js",
37
+ "types": "./dist/index.d.ts",
38
+ "exports": {
39
+ ".": {
40
+ "types": "./dist/index.d.ts",
41
+ "import": "./dist/index.js"
42
+ }
43
+ },
33
44
  "files": [
34
- "src"
45
+ "dist"
35
46
  ],
36
47
  "packageManager": "yarn@1.22.22+sha512.a6b2f7906b721bba3d67d4aff083df04dad64c399707841b7acf00f6b133b7ac24255f2652fa22ae3534329dc6180534e98d17432037ff6fd140556e2bb3137e"
37
- }
48
+ }
@@ -1,13 +0,0 @@
1
- import { HamsterPage } from '../Page'
2
- import { IntermediateOutline } from '@hamster-note/types'
3
-
4
- export abstract class HamsterDocument {
5
- // 获取 Page 对象列表
6
- abstract getPages(): Promise<HamsterPage[]>
7
- // 获取 Page 对象
8
- abstract getPage(pageNumber: number): Promise<HamsterPage | undefined>
9
- // 获取大纲(PDF如果有内置的话)
10
- abstract getOutline(): Promise<IntermediateOutline | undefined>
11
- // 获取封面
12
- abstract getCover(): Promise<HTMLCanvasElement | HTMLImageElement>
13
- }
package/src/Page/index.ts DELETED
@@ -1,22 +0,0 @@
1
- import { Number2 } from '@hamster-note/types'
2
-
3
- export enum RenderViews {
4
- THUMBNAIL = 'thumbnail',
5
- TEXT = 'text'
6
- }
7
-
8
- export interface RenderOptions {
9
- views: RenderViews[]
10
- scale: number
11
- }
12
-
13
- export abstract class HamsterPage {
14
- abstract render(
15
- container: HTMLDivElement,
16
- options?: RenderOptions
17
- ): Promise<void>
18
- abstract getNumber(): number
19
- abstract getSize(scale: number): Number2
20
- abstract getPureText(): string
21
- // abstract getTextDom
22
- }
@@ -1,28 +0,0 @@
1
- import { HamsterDocument } from '../Document'
2
- import { IntermediateDocument } from '@hamster-note/types'
3
-
4
- export class DocumentParser {
5
- static readonly ext: string
6
- // Base static methods to be overridden by concrete parsers
7
- static async encode(
8
- _file: File | ArrayBuffer
9
- ): Promise<IntermediateDocument | HamsterDocument | undefined> {
10
- return Promise.resolve(undefined)
11
- }
12
- static async decode(
13
- _intermediateDocument: IntermediateDocument
14
- ): Promise<File | ArrayBuffer | undefined> {
15
- return Promise.resolve(undefined)
16
- }
17
- protected static async toArrayBuffer(
18
- fileOrBuffer: File | ArrayBuffer
19
- ): Promise<ArrayBuffer> {
20
- if (fileOrBuffer instanceof ArrayBuffer) return fileOrBuffer
21
- return await new Promise<ArrayBuffer>((resolve, reject) => {
22
- const reader = new FileReader()
23
- reader.readAsArrayBuffer(fileOrBuffer)
24
- reader.onload = () => resolve(reader.result as ArrayBuffer)
25
- reader.onerror = (e) => reject(e)
26
- })
27
- }
28
- }
package/src/index.ts DELETED
@@ -1,5 +0,0 @@
1
- export * from './Document'
2
- export * from './Page'
3
- export * from './types/DocumentAnchor'
4
- export * from './Parser'
5
- export * from './register'
@@ -1,30 +0,0 @@
1
- import { DocumentParser } from '../Parser'
2
-
3
- // Store constructors (classes), not instances
4
- const parserMap: Map<string, typeof DocumentParser> = new Map()
5
-
6
- // Register a parser class (constructor) which exposes a static `ext`
7
- export const registerParser = (parser: typeof DocumentParser) => {
8
- console.log(parser.ext, parser)
9
- parserMap.set(parser.ext, parser)
10
- }
11
-
12
- export function getParser(file: File): typeof DocumentParser | undefined {
13
- const extension = file.name.split('.').pop()
14
- if (!extension) {
15
- return undefined
16
- }
17
- return parserMap.get(extension)
18
- }
19
-
20
- export function parse(file: File) {
21
- const Parser = getParser(file)
22
- console.log('Parse:', Parser)
23
- if (!Parser) {
24
- return undefined
25
- }
26
- // call static encode on the parser class
27
- const res = Parser.encode(file)
28
- // res.then((_doc) => { /* window.doc1 = _doc */ })
29
- return res
30
- }
@@ -1,17 +0,0 @@
1
- // 文档锚(标记了文字id)
2
- import { Number2 } from '@hamster-note/types'
3
-
4
- export interface DocumentAnchorWithTextId {
5
- pageId: string
6
- textId?: string
7
- }
8
-
9
- // 文档锚(标记了位置)
10
- export interface DocumentAnchorWithPosition {
11
- pageId: string
12
- position: Number2
13
- }
14
-
15
- export type DocumentAnchor =
16
- | DocumentAnchorWithPosition
17
- | DocumentAnchorWithTextId