@dragon708/docmind-node 0.1.0-alpha.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,30 +1,34 @@
1
- import { DetectFileKindInput, AnalyzeOptions, AnalysisResult } from '@dragon708/docmind-core';
2
- export { AnalysisAnalyzer, AnalysisResult, AnalyzeOptions, DetectFileKindInput, FileKind, FileKindMetadata, GenericAnalysisResult, TextAnalysisResult } from '@dragon708/docmind-core';
3
- import { NamedInput } from '@dragon708/docmind-shared';
1
+ import { DetectFileKindInput, NamedInput, AnalysisResult } from '@dragon708/docmind-shared';
2
+ export { AnalysisAnalyzer, AnalysisResult, DetectFileKindInput, DocxAnalysisCoreResult, FileKind, FileKindMetadata, GenericAnalysisResult, ImageAnalysisCoreResult, PdfAnalysisCoreResult, TextAnalysisResult } from '@dragon708/docmind-shared';
3
+ import { OcrOptions } from '@dragon708/docmind-ocr';
4
+ import { PdfAnalyzeOptions } from '@dragon708/docmind-pdf';
5
+
6
+ /** Options for {@link analyzeFile} in the Node entry (`pdf` / `ocr` forwarded to format packages). */
7
+ interface NodeAnalyzeOptions {
8
+ readonly signal?: AbortSignal;
9
+ readonly pdf?: PdfAnalyzeOptions;
10
+ readonly ocr?: OcrOptions;
11
+ }
4
12
 
5
13
  /**
6
14
  * Inputs accepted by {@link analyzeFile} in this package.
7
- * Paths and `file:` URLs are read with `fs`; other values pass through as core’s
8
- * {@link DetectFileKindInput}.
15
+ * Paths and `file:` URLs are read with `fs`; other values pass through as {@link DetectFileKindInput}.
9
16
  */
10
17
  type NodeAnalyzeInput = string | URL | DetectFileKindInput;
11
18
  /**
12
- * Reads a file from disk into a {@link NamedInput} suitable for `@dragon708/docmind-core`
13
- * (binary `Buffer`, basename as `name` for extension / MIME hints).
19
+ * Reads a file from disk into a {@link NamedInput} (binary `Buffer`, basename as `name` for hints).
14
20
  */
15
21
  declare function readFileToInput(path: string | URL): Promise<NamedInput<Buffer>>;
16
22
  /** Wraps a `Buffer` as a named payload when you already know the filename. */
17
23
  declare function bufferToInput(buffer: Buffer, name?: string): NamedInput<Buffer>;
18
24
  /**
19
- * Resolves paths / `file:` URLs to core’s internal shape; leaves other
20
- * {@link DetectFileKindInput} values untouched.
25
+ * Resolves paths / `file:` URLs to a {@link DetectFileKindInput}; leaves other values untouched.
21
26
  */
22
27
  declare function resolveNodeAnalyzeInput(input: NodeAnalyzeInput): Promise<DetectFileKindInput>;
23
28
 
24
29
  /**
25
- * Like `@dragon708/docmind-core`’s `analyzeFile`, but accepts filesystem paths and `file:` URLs
26
- * in addition to buffers and browser-style inputs.
30
+ * Node router: PDF, DOCX, images (OCR), and text. Paths and `file:` URLs are read via `fs`.
27
31
  */
28
- declare function analyzeFile(input: NodeAnalyzeInput, options?: AnalyzeOptions): Promise<AnalysisResult>;
32
+ declare function analyzeFile(input: NodeAnalyzeInput, options?: NodeAnalyzeOptions): Promise<AnalysisResult>;
29
33
 
30
- export { type NodeAnalyzeInput, analyzeFile, bufferToInput, readFileToInput, resolveNodeAnalyzeInput };
34
+ export { type NodeAnalyzeInput, type NodeAnalyzeOptions, analyzeFile, bufferToInput, readFileToInput, resolveNodeAnalyzeInput };
package/dist/index.js CHANGED
@@ -1,9 +1,129 @@
1
- import { analyzeFile as analyzeFile$1 } from '@dragon708/docmind-core';
1
+ import { assertValidAnalyzeFileInput, detectFileKind, notImplementedResult, UNKNOWN_FORMAT_WARNING, analyzeText, toUint8Array, isNamedInput, isBinaryInput, isBlob, isFile } from '@dragon708/docmind-shared';
2
+ import { analyzeDocx } from '@dragon708/docmind-docx';
3
+ import { ocr } from '@dragon708/docmind-ocr';
4
+ import { analyzePdf } from '@dragon708/docmind-pdf';
2
5
  import { readFile } from 'fs/promises';
3
6
  import { basename } from 'path';
4
7
  import { fileURLToPath } from 'url';
5
8
 
6
9
  // src/analyze.ts
10
+ function isByteBackedInput(input) {
11
+ return isNamedInput(input) || isBinaryInput(input) || isBlob(input) || isFile(input);
12
+ }
13
+ async function bytesFromDetectInput(input) {
14
+ if (!isByteBackedInput(input)) {
15
+ return new Uint8Array(0);
16
+ }
17
+ return toUint8Array(input);
18
+ }
19
+
20
+ // src/analyzers/docx.ts
21
+ async function analyzeDocxForNode(input, signal) {
22
+ if (signal?.aborted) {
23
+ const err = new Error("The operation was aborted");
24
+ err.name = "AbortError";
25
+ throw err;
26
+ }
27
+ const data = await bytesFromDetectInput(input);
28
+ if (data.byteLength === 0) {
29
+ return {
30
+ fileKind: "docx",
31
+ analyzer: "docx",
32
+ status: "ok",
33
+ kind: "docx",
34
+ text: "",
35
+ html: "",
36
+ warnings: ["No document bytes were provided for analysis."]
37
+ };
38
+ }
39
+ const r = await analyzeDocx(data);
40
+ return {
41
+ fileKind: "docx",
42
+ analyzer: "docx",
43
+ status: "ok",
44
+ kind: "docx",
45
+ text: r.text,
46
+ html: r.html,
47
+ warnings: [...r.warnings]
48
+ };
49
+ }
50
+ async function analyzeImageForNode(input, options) {
51
+ if (options?.signal?.aborted) {
52
+ const err = new Error("The operation was aborted");
53
+ err.name = "AbortError";
54
+ throw err;
55
+ }
56
+ const data = await bytesFromDetectInput(input);
57
+ if (data.byteLength === 0) {
58
+ return {
59
+ fileKind: "image",
60
+ analyzer: "image",
61
+ status: "ok",
62
+ kind: "image",
63
+ text: "",
64
+ confidence: 0,
65
+ ocrUsed: true,
66
+ warnings: ["No image bytes were provided for analysis."]
67
+ };
68
+ }
69
+ const ocrOpts = {
70
+ ...options?.ocr ?? {},
71
+ signal: options?.ocr?.signal ?? options?.signal
72
+ };
73
+ const r = await ocr(data, ocrOpts);
74
+ return {
75
+ fileKind: "image",
76
+ analyzer: "image",
77
+ status: "ok",
78
+ kind: "image",
79
+ text: r.text,
80
+ confidence: r.confidence,
81
+ ocrUsed: r.ocrUsed,
82
+ warnings: []
83
+ };
84
+ }
85
+ async function analyzePdfForNode(input, options) {
86
+ if (options?.signal?.aborted) {
87
+ const err = new Error("The operation was aborted");
88
+ err.name = "AbortError";
89
+ throw err;
90
+ }
91
+ const data = await bytesFromDetectInput(input);
92
+ if (data.byteLength === 0) {
93
+ return {
94
+ fileKind: "pdf",
95
+ analyzer: "pdf",
96
+ status: "ok",
97
+ kind: "pdf",
98
+ text: "",
99
+ pages: 0,
100
+ metadata: { info: {} },
101
+ warnings: ["No document bytes were provided for analysis."],
102
+ needsOCR: false,
103
+ ocrUsed: false
104
+ };
105
+ }
106
+ const userPdf = options?.pdf;
107
+ const pdfOpts = {
108
+ ...userPdf,
109
+ ocr: userPdf?.ocr ?? "auto",
110
+ ocrLangs: userPdf?.ocrLangs ?? options?.ocr?.langs,
111
+ signal: userPdf?.signal ?? options?.signal
112
+ };
113
+ const r = await analyzePdf(data, pdfOpts);
114
+ return {
115
+ fileKind: "pdf",
116
+ analyzer: "pdf",
117
+ status: "ok",
118
+ kind: "pdf",
119
+ text: r.text,
120
+ pages: r.pages,
121
+ metadata: r.metadata,
122
+ warnings: [...r.warnings],
123
+ needsOCR: r.needsOCR,
124
+ ocrUsed: r.ocrUsed
125
+ };
126
+ }
7
127
  function toPathString(pathOrUrl) {
8
128
  return pathOrUrl instanceof URL ? fileURLToPath(pathOrUrl) : pathOrUrl;
9
129
  }
@@ -27,8 +147,26 @@ async function resolveNodeAnalyzeInput(input) {
27
147
 
28
148
  // src/analyze.ts
29
149
  async function analyzeFile(input, options) {
150
+ if (options?.signal?.aborted) {
151
+ const err = new Error("The operation was aborted");
152
+ err.name = "AbortError";
153
+ throw err;
154
+ }
30
155
  const resolved = await resolveNodeAnalyzeInput(input);
31
- return analyzeFile$1(resolved, options);
156
+ assertValidAnalyzeFileInput(resolved);
157
+ const fileKind = detectFileKind(resolved);
158
+ switch (fileKind) {
159
+ case "pdf":
160
+ return analyzePdfForNode(resolved, options);
161
+ case "docx":
162
+ return analyzeDocxForNode(resolved, options?.signal);
163
+ case "image":
164
+ return analyzeImageForNode(resolved, options);
165
+ case "text":
166
+ return analyzeText(resolved, { signal: options?.signal });
167
+ default:
168
+ return notImplementedResult(fileKind, "none", [UNKNOWN_FORMAT_WARNING]);
169
+ }
32
170
  }
33
171
 
34
172
  export { analyzeFile, bufferToInput, readFileToInput, resolveNodeAnalyzeInput };
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"sources":["../src/toCoreInput.ts","../src/analyze.ts"],"names":["analyzeWithCore"],"mappings":";;;;;;AAaA,SAAS,aAAa,SAAA,EAAiC;AACrD,EAAA,OAAO,SAAA,YAAqB,GAAA,GAAM,aAAA,CAAc,SAAS,CAAA,GAAI,SAAA;AAC/D;AAMA,eAAsB,gBAAgB,IAAA,EAAiD;AACrF,EAAA,MAAM,MAAA,GAAS,aAAa,IAAI,CAAA;AAChC,EAAA,MAAM,IAAA,GAAO,MAAM,QAAA,CAAS,MAAM,CAAA;AAClC,EAAA,OAAO;AAAA,IACL,IAAA;AAAA,IACA,IAAA,EAAM,SAAS,MAAM;AAAA,GACvB;AACF;AAGO,SAAS,aAAA,CAAc,QAAgB,IAAA,EAAmC;AAC/E,EAAA,OAAO,IAAA,KAAS,SAAY,EAAE,IAAA,EAAM,QAAQ,IAAA,EAAK,GAAI,EAAE,IAAA,EAAM,MAAA,EAAO;AACtE;AAMA,eAAsB,wBAAwB,KAAA,EAAuD;AACnG,EAAA,IAAI,OAAO,KAAA,KAAU,QAAA,IAAY,KAAA,YAAiB,GAAA,EAAK;AACrD,IAAA,OAAO,gBAAgB,KAAK,CAAA;AAAA,EAC9B;AACA,EAAA,OAAO,KAAA;AACT;;;ACpCA,eAAsB,WAAA,CACpB,OACA,OAAA,EACyB;AACzB,EAAA,MAAM,QAAA,GAAW,MAAM,uBAAA,CAAwB,KAAK,CAAA;AACpD,EAAA,OAAOA,aAAA,CAAgB,UAAU,OAAO,CAAA;AAC1C","file":"index.js","sourcesContent":["import type { DetectFileKindInput } from \"@dragon708/docmind-core\";\nimport type { NamedInput } from \"@dragon708/docmind-shared\";\nimport { readFile } from \"node:fs/promises\";\nimport { basename } from \"node:path\";\nimport { fileURLToPath } from \"node:url\";\n\n/**\n * Inputs accepted by {@link analyzeFile} in this package.\n * Paths and `file:` URLs are read with `fs`; other values pass through as core’s\n * {@link DetectFileKindInput}.\n */\nexport type NodeAnalyzeInput = string | URL | DetectFileKindInput;\n\nfunction toPathString(pathOrUrl: string | URL): string {\n return pathOrUrl instanceof URL ? fileURLToPath(pathOrUrl) : pathOrUrl;\n}\n\n/**\n * Reads a file from disk into a {@link NamedInput} suitable for `@dragon708/docmind-core`\n * (binary `Buffer`, basename as `name` for extension / MIME hints).\n */\nexport async function readFileToInput(path: string | URL): Promise<NamedInput<Buffer>> {\n const fsPath = toPathString(path);\n const data = await readFile(fsPath);\n return {\n data,\n name: basename(fsPath),\n };\n}\n\n/** Wraps a `Buffer` as a named payload when you already know the filename. */\nexport function bufferToInput(buffer: Buffer, name?: string): NamedInput<Buffer> {\n return name !== undefined ? { data: buffer, name } : { data: buffer };\n}\n\n/**\n * Resolves paths / `file:` URLs to core’s internal shape; leaves other\n * {@link DetectFileKindInput} values untouched.\n */\nexport async function resolveNodeAnalyzeInput(input: NodeAnalyzeInput): Promise<DetectFileKindInput> {\n if (typeof input === \"string\" || input instanceof URL) {\n return readFileToInput(input);\n }\n return input;\n}\n","import { analyzeFile as analyzeWithCore } from \"@dragon708/docmind-core\";\nimport type { AnalyzeOptions, AnalysisResult } from \"@dragon708/docmind-core\";\nimport { resolveNodeAnalyzeInput, type NodeAnalyzeInput } from \"./toCoreInput.js\";\n\n/**\n * Like `@dragon708/docmind-core`’s `analyzeFile`, but accepts filesystem paths and `file:` URLs\n * in addition to buffers and browser-style inputs.\n */\nexport async function analyzeFile(\n input: NodeAnalyzeInput,\n options?: AnalyzeOptions,\n): Promise<AnalysisResult> {\n const resolved = await resolveNodeAnalyzeInput(input);\n return analyzeWithCore(resolved, options);\n}\n"]}
1
+ {"version":3,"sources":["../src/inputBytes.ts","../src/analyzers/docx.ts","../src/analyzers/image.ts","../src/analyzers/pdf.ts","../src/resolveNodeInput.ts","../src/analyze.ts"],"names":["extractDocx","runPdf"],"mappings":";;;;;;;;;AASO,SAAS,kBAAkB,KAAA,EAAoD;AACpF,EAAA,OAAO,YAAA,CAAa,KAAK,CAAA,IAAK,aAAA,CAAc,KAAK,KAAK,MAAA,CAAO,KAAK,CAAA,IAAK,MAAA,CAAO,KAAK,CAAA;AACrF;AAGA,eAAsB,qBAAqB,KAAA,EAAiD;AAC1F,EAAA,IAAI,CAAC,iBAAA,CAAkB,KAAK,CAAA,EAAG;AAC7B,IAAA,OAAO,IAAI,WAAW,CAAC,CAAA;AAAA,EACzB;AACA,EAAA,OAAO,aAAa,KAAK,CAAA;AAC3B;;;ACZA,eAAsB,kBAAA,CACpB,OACA,MAAA,EACyB;AACzB,EAAA,IAAI,QAAQ,OAAA,EAAS;AACnB,IAAA,MAAM,GAAA,GAAM,IAAI,KAAA,CAAM,2BAA2B,CAAA;AACjD,IAAA,GAAA,CAAI,IAAA,GAAO,YAAA;AACX,IAAA,MAAM,GAAA;AAAA,EACR;AAEA,EAAA,MAAM,IAAA,GAAO,MAAM,oBAAA,CAAqB,KAAK,CAAA;AAC7C,EAAA,IAAI,IAAA,CAAK,eAAe,CAAA,EAAG;AACzB,IAAA,OAAO;AAAA,MACL,QAAA,EAAU,MAAA;AAAA,MACV,QAAA,EAAU,MAAA;AAAA,MACV,MAAA,EAAQ,IAAA;AAAA,MACR,IAAA,EAAM,MAAA;AAAA,MACN,IAAA,EAAM,EAAA;AAAA,MACN,IAAA,EAAM,EAAA;AAAA,MACN,QAAA,EAAU,CAAC,+CAA+C;AAAA,KAC5D;AAAA,EACF;AAEA,EAAA,MAAM,CAAA,GAAI,MAAMA,WAAA,CAAY,IAAI,CAAA;AAChC,EAAA,OAAO;AAAA,IACL,QAAA,EAAU,MAAA;AAAA,IACV,QAAA,EAAU,MAAA;AAAA,IACV,MAAA,EAAQ,IAAA;AAAA,IACR,IAAA,EAAM,MAAA;AAAA,IACN,MAAM,CAAA,CAAE,IAAA;AAAA,IACR,MAAM,CAAA,CAAE,IAAA;AAAA,IACR,QAAA,EAAU,CAAC,GAAG,CAAA,CAAE,QAAQ;AAAA,GAC1B;AACF;AChCA,eAAsB,mBAAA,CACpB,OACA,OAAA,EACyB;AACzB,EAAA,IAAI,OAAA,EAAS,QAAQ,OAAA,EAAS;AAC5B,IAAA,MAAM,GAAA,GAAM,IAAI,KAAA,CAAM,2BAA2B,CAAA;AACjD,IAAA,GAAA,CAAI,IAAA,GAAO,YAAA;AACX,IAAA,MAAM,GAAA;AAAA,EACR;AAEA,EAAA,MAAM,IAAA,GAAO,MAAM,oBAAA,CAAqB,KAAK,CAAA;AAC7C,EAAA,IAAI,IAAA,CAAK,eAAe,CAAA,EAAG;AACzB,IAAA,OAAO;AAAA,MACL,QAAA,EAAU,OAAA;AAAA,MACV,QAAA,EAAU,OAAA;AAAA,MACV,MAAA,EAAQ,IAAA;AAAA,MACR,IAAA,EAAM,OAAA;AAAA,MACN,IAAA,EAAM,EAAA;AAAA,MACN,UAAA,EAAY,CAAA;AAAA,MACZ,OAAA,EAAS,IAAA;AAAA,MACT,QAAA,EAAU,CAAC,4CAA4C;AAAA,KACzD;AAAA,EACF;AAEA,EAAA,MAAM,OAAA,GAAU;AAAA,IACd,GAAI,OAAA,EAAS,GAAA,IAAO,EAAC;AAAA,IACrB,MAAA,EAAQ,OAAA,EAAS,GAAA,EAAK,MAAA,IAAU,OAAA,EAAS;AAAA,GAC3C;AAEA,EAAA,MAAM,CAAA,GAAI,MAAM,GAAA,CAAI,IAAA,EAAM,OAAO,CAAA;AACjC,EAAA,OAAO;AAAA,IACL,QAAA,EAAU,OAAA;AAAA,IACV,QAAA,EAAU,OAAA;AAAA,IACV,MAAA,EAAQ,IAAA;AAAA,IACR,IAAA,EAAM,OAAA;AAAA,IACN,MAAM,CAAA,CAAE,IAAA;AAAA,IACR,YAAY,CAAA,CAAE,UAAA;AAAA,IACd,SAAS,CAAA,CAAE,OAAA;AAAA,IACX,UAAU;AAAC,GACb;AACF;AClCA,eAAsB,iBAAA,CACpB,OACA,OAAA,EACyB;AACzB,EAAA,IAAI,OAAA,EAAS,QAAQ,OAAA,EAAS;AAC5B,IAAA,MAAM,GAAA,GAAM,IAAI,KAAA,CAAM,2BAA2B,CAAA;AACjD,IAAA,GAAA,CAAI,IAAA,GAAO,YAAA;AACX,IAAA,MAAM,GAAA;AAAA,EACR;AAEA,EAAA,MAAM,IAAA,GAAO,MAAM,oBAAA,CAAqB,KAAK,CAAA;AAC7C,EAAA,IAAI,IAAA,CAAK,eAAe,CAAA,EAAG;AACzB,IAAA,OAAO;AAAA,MACL,QAAA,EAAU,KAAA;AAAA,MACV,QAAA,EAAU,KAAA;AAAA,MACV,MAAA,EAAQ,IAAA;AAAA,MACR,IAAA,EAAM,KAAA;AAAA,MACN,IAAA,EAAM,EAAA;AAAA,MACN,KAAA,EAAO,CAAA;AAAA,MACP,QAAA,EAAU,EAAE,IAAA,EAAM,EAAC,EAAE;AAAA,MACrB,QAAA,EAAU,CAAC,+CAA+C,CAAA;AAAA,MAC1D,QAAA,EAAU,KAAA;AAAA,MACV,OAAA,EAAS;AAAA,KACX;AAAA,EACF;AAEA,EAAA,MAAM,UAAU,OAAA,EAAS,GAAA;AACzB,EAAA,MAAM,OAAA,GAA6B;AAAA,IACjC,GAAG,OAAA;AAAA,IACH,GAAA,EAAK,SAAS,GAAA,IAAO,MAAA;AAAA,IACrB,QAAA,EAAU,OAAA,EAAS,QAAA,IAAY,OAAA,EAAS,GAAA,EAAK,KAAA;AAAA,IAC7C,MAAA,EAAQ,OAAA,EAAS,MAAA,IAAU,OAAA,EAAS;AAAA,GACtC;AAEA,EAAA,MAAM,CAAA,GAAI,MAAMC,UAAA,CAAO,IAAA,EAAM,OAAO,CAAA;AACpC,EAAA,OAAO;AAAA,IACL,QAAA,EAAU,KAAA;AAAA,IACV,QAAA,EAAU,KAAA;AAAA,IACV,MAAA,EAAQ,IAAA;AAAA,IACR,IAAA,EAAM,KAAA;AAAA,IACN,MAAM,CAAA,CAAE,IAAA;AAAA,IACR,OAAO,CAAA,CAAE,KAAA;AAAA,IACT,UAAU,CAAA,CAAE,QAAA;AAAA,IACZ,QAAA,EAAU,CAAC,GAAG,CAAA,CAAE,QAAQ,CAAA;AAAA,IACxB,UAAU,CAAA,CAAE,QAAA;AAAA,IACZ,SAAS,CAAA,CAAE;AAAA,GACb;AACF;AClDA,SAAS,aAAa,SAAA,EAAiC;AACrD,EAAA,OAAO,SAAA,YAAqB,GAAA,GAAM,aAAA,CAAc,SAAS,CAAA,GAAI,SAAA;AAC/D;AAKA,eAAsB,gBAAgB,IAAA,EAAiD;AACrF,EAAA,MAAM,MAAA,GAAS,aAAa,IAAI,CAAA;AAChC,EAAA,MAAM,IAAA,GAAO,MAAM,QAAA,CAAS,MAAM,CAAA;AAClC,EAAA,OAAO;AAAA,IACL,IAAA;AAAA,IACA,IAAA,EAAM,SAAS,MAAM;AAAA,GACvB;AACF;AAGO,SAAS,aAAA,CAAc,QAAgB,IAAA,EAAmC;AAC/E,EAAA,OAAO,IAAA,KAAS,SAAY,EAAE,IAAA,EAAM,QAAQ,IAAA,EAAK,GAAI,EAAE,IAAA,EAAM,MAAA,EAAO;AACtE;AAKA,eAAsB,wBAAwB,KAAA,EAAuD;AACnG,EAAA,IAAI,OAAO,KAAA,KAAU,QAAA,IAAY,KAAA,YAAiB,GAAA,EAAK;AACrD,IAAA,OAAO,gBAAgB,KAAK,CAAA;AAAA,EAC9B;AACA,EAAA,OAAO,KAAA;AACT;;;ACvBA,eAAsB,WAAA,CACpB,OACA,OAAA,EACyB;AACzB,EAAA,IAAI,OAAA,EAAS,QAAQ,OAAA,EAAS;AAC5B,IAAA,MAAM,GAAA,GAAM,IAAI,KAAA,CAAM,2BAA2B,CAAA;AACjD,IAAA,GAAA,CAAI,IAAA,GAAO,YAAA;AACX,IAAA,MAAM,GAAA;AAAA,EACR;AAEA,EAAA,MAAM,QAAA,GAAW,MAAM,uBAAA,CAAwB,KAAK,CAAA;AACpD,EAAA,2BAAA,CAA4B,QAAQ,CAAA;AAEpC,EAAA,MAAM,QAAA,GAAW,eAAe,QAAQ,CAAA;AAExC,EAAA,QAAQ,QAAA;AAAU,IAChB,KAAK,KAAA;AACH,MAAA,OAAO,iBAAA,CAAkB,UAAiC,OAAO,CAAA;AAAA,IACnE,KAAK,MAAA;AACH,MAAA,OAAO,kBAAA,CAAmB,QAAA,EAAiC,OAAA,EAAS,MAAM,CAAA;AAAA,IAC5E,KAAK,OAAA;AACH,MAAA,OAAO,mBAAA,CAAoB,UAAiC,OAAO,CAAA;AAAA,IACrE,KAAK,MAAA;AACH,MAAA,OAAO,YAAY,QAAA,EAAiC,EAAE,MAAA,EAAQ,OAAA,EAAS,QAAQ,CAAA;AAAA,IACjF;AACE,MAAA,OAAO,oBAAA,CAAqB,QAAA,EAAU,MAAA,EAAQ,CAAC,sBAAsB,CAAC,CAAA;AAAA;AAE5E","file":"index.js","sourcesContent":["import type { DetectFileKindInput, FileLikeInput } from \"@dragon708/docmind-shared\";\nimport {\n isBinaryInput,\n isBlob,\n isFile,\n isNamedInput,\n toUint8Array,\n} from \"@dragon708/docmind-shared\";\n\nexport function isByteBackedInput(input: DetectFileKindInput): input is FileLikeInput {\n return isNamedInput(input) || isBinaryInput(input) || isBlob(input) || isFile(input);\n}\n\n/** Resolves bytes when the input carries a body; otherwise an empty `Uint8Array`. */\nexport async function bytesFromDetectInput(input: DetectFileKindInput): Promise<Uint8Array> {\n if (!isByteBackedInput(input)) {\n return new Uint8Array(0);\n }\n return toUint8Array(input);\n}\n","import { analyzeDocx as extractDocx } from \"@dragon708/docmind-docx\";\nimport type { AnalysisResult, DetectFileKindInput } from \"@dragon708/docmind-shared\";\nimport { bytesFromDetectInput } from \"../inputBytes.js\";\n\n/**\n * DOCX → `@dragon708/docmind-docx`.\n */\nexport async function analyzeDocxForNode(\n input: DetectFileKindInput,\n signal?: AbortSignal,\n): Promise<AnalysisResult> {\n if (signal?.aborted) {\n const err = new Error(\"The operation was aborted\");\n err.name = \"AbortError\";\n throw err;\n }\n\n const data = await bytesFromDetectInput(input);\n if (data.byteLength === 0) {\n return {\n fileKind: \"docx\",\n analyzer: \"docx\",\n status: \"ok\",\n kind: \"docx\",\n text: \"\",\n html: \"\",\n warnings: [\"No document bytes were provided for analysis.\"],\n };\n }\n\n const r = await extractDocx(data);\n return {\n fileKind: \"docx\",\n analyzer: \"docx\",\n status: \"ok\",\n kind: \"docx\",\n text: r.text,\n html: r.html,\n warnings: [...r.warnings],\n };\n}\n","import { ocr } from \"@dragon708/docmind-ocr\";\nimport type { AnalysisResult, DetectFileKindInput } from \"@dragon708/docmind-shared\";\nimport type { NodeAnalyzeOptions } from \"../nodeAnalyzeOptions.js\";\nimport { bytesFromDetectInput } from \"../inputBytes.js\";\n\n/**\n * Image → `@dragon708/docmind-ocr`.\n */\nexport async function analyzeImageForNode(\n input: DetectFileKindInput,\n options?: NodeAnalyzeOptions,\n): Promise<AnalysisResult> {\n if (options?.signal?.aborted) {\n const err = new Error(\"The operation was aborted\");\n err.name = \"AbortError\";\n throw err;\n }\n\n const data = await bytesFromDetectInput(input);\n if (data.byteLength === 0) {\n return {\n fileKind: \"image\",\n analyzer: \"image\",\n status: \"ok\",\n kind: \"image\",\n text: \"\",\n confidence: 0,\n ocrUsed: true,\n warnings: [\"No image bytes were provided for analysis.\"],\n };\n }\n\n const ocrOpts = {\n ...(options?.ocr ?? {}),\n signal: options?.ocr?.signal ?? options?.signal,\n };\n\n const r = await ocr(data, ocrOpts);\n return {\n fileKind: \"image\",\n analyzer: \"image\",\n status: \"ok\",\n kind: \"image\",\n text: r.text,\n confidence: r.confidence,\n ocrUsed: r.ocrUsed,\n warnings: [],\n };\n}\n","import { analyzePdf as runPdf } from \"@dragon708/docmind-pdf\";\nimport type { PdfAnalyzeOptions } from \"@dragon708/docmind-pdf\";\nimport type { AnalysisResult, DetectFileKindInput } from \"@dragon708/docmind-shared\";\nimport type { NodeAnalyzeOptions } from \"../nodeAnalyzeOptions.js\";\nimport { bytesFromDetectInput } from \"../inputBytes.js\";\n\n/**\n * PDF → `@dragon708/docmind-pdf` (Node / pdf-parse + OCR).\n *\n * Unlike `analyzePdf` from `@dragon708/docmind-pdf` (OCR off unless set), `analyzeFile` defaults\n * to `pdf.ocr: \"auto\"`: when the PDF has\n * pages but almost no extractable text (typical scan), the raster OCR pipeline runs. Pass\n * `pdf: { ocr: \"off\" }` to skip OCR for speed.\n */\nexport async function analyzePdfForNode(\n input: DetectFileKindInput,\n options?: NodeAnalyzeOptions,\n): Promise<AnalysisResult> {\n if (options?.signal?.aborted) {\n const err = new Error(\"The operation was aborted\");\n err.name = \"AbortError\";\n throw err;\n }\n\n const data = await bytesFromDetectInput(input);\n if (data.byteLength === 0) {\n return {\n fileKind: \"pdf\",\n analyzer: \"pdf\",\n status: \"ok\",\n kind: \"pdf\",\n text: \"\",\n pages: 0,\n metadata: { info: {} },\n warnings: [\"No document bytes were provided for analysis.\"],\n needsOCR: false,\n ocrUsed: false,\n };\n }\n\n const userPdf = options?.pdf;\n const pdfOpts: PdfAnalyzeOptions = {\n ...userPdf,\n ocr: userPdf?.ocr ?? \"auto\",\n ocrLangs: userPdf?.ocrLangs ?? options?.ocr?.langs,\n signal: userPdf?.signal ?? options?.signal,\n };\n\n const r = await runPdf(data, pdfOpts);\n return {\n fileKind: \"pdf\",\n analyzer: \"pdf\",\n status: \"ok\",\n kind: \"pdf\",\n text: r.text,\n pages: r.pages,\n metadata: r.metadata,\n warnings: [...r.warnings],\n needsOCR: r.needsOCR,\n ocrUsed: r.ocrUsed,\n };\n}\n","import type { DetectFileKindInput, NamedInput } from \"@dragon708/docmind-shared\";\nimport { readFile } from \"node:fs/promises\";\nimport { basename } from \"node:path\";\nimport { fileURLToPath } from \"node:url\";\n\n/**\n * Inputs accepted by {@link analyzeFile} in this package.\n * Paths and `file:` URLs are read with `fs`; other values pass through as {@link DetectFileKindInput}.\n */\nexport type NodeAnalyzeInput = string | URL | DetectFileKindInput;\n\nfunction toPathString(pathOrUrl: string | URL): string {\n return pathOrUrl instanceof URL ? fileURLToPath(pathOrUrl) : pathOrUrl;\n}\n\n/**\n * Reads a file from disk into a {@link NamedInput} (binary `Buffer`, basename as `name` for hints).\n */\nexport async function readFileToInput(path: string | URL): Promise<NamedInput<Buffer>> {\n const fsPath = toPathString(path);\n const data = await readFile(fsPath);\n return {\n data,\n name: basename(fsPath),\n };\n}\n\n/** Wraps a `Buffer` as a named payload when you already know the filename. */\nexport function bufferToInput(buffer: Buffer, name?: string): NamedInput<Buffer> {\n return name !== undefined ? { data: buffer, name } : { data: buffer };\n}\n\n/**\n * Resolves paths / `file:` URLs to a {@link DetectFileKindInput}; leaves other values untouched.\n */\nexport async function resolveNodeAnalyzeInput(input: NodeAnalyzeInput): Promise<DetectFileKindInput> {\n if (typeof input === \"string\" || input instanceof URL) {\n return readFileToInput(input);\n }\n return input;\n}\n","import type { AnalysisResult, DetectFileKindInput } from \"@dragon708/docmind-shared\";\nimport {\n analyzeText,\n assertValidAnalyzeFileInput,\n detectFileKind,\n notImplementedResult,\n UNKNOWN_FORMAT_WARNING,\n} from \"@dragon708/docmind-shared\";\nimport { analyzeDocxForNode } from \"./analyzers/docx.js\";\nimport { analyzeImageForNode } from \"./analyzers/image.js\";\nimport { analyzePdfForNode } from \"./analyzers/pdf.js\";\nimport type { NodeAnalyzeOptions } from \"./nodeAnalyzeOptions.js\";\nimport { resolveNodeAnalyzeInput, type NodeAnalyzeInput } from \"./resolveNodeInput.js\";\n\n/**\n * Node router: PDF, DOCX, images (OCR), and text. Paths and `file:` URLs are read via `fs`.\n */\nexport async function analyzeFile(\n input: NodeAnalyzeInput,\n options?: NodeAnalyzeOptions,\n): Promise<AnalysisResult> {\n if (options?.signal?.aborted) {\n const err = new Error(\"The operation was aborted\");\n err.name = \"AbortError\";\n throw err;\n }\n\n const resolved = await resolveNodeAnalyzeInput(input);\n assertValidAnalyzeFileInput(resolved);\n\n const fileKind = detectFileKind(resolved);\n\n switch (fileKind) {\n case \"pdf\":\n return analyzePdfForNode(resolved as DetectFileKindInput, options);\n case \"docx\":\n return analyzeDocxForNode(resolved as DetectFileKindInput, options?.signal);\n case \"image\":\n return analyzeImageForNode(resolved as DetectFileKindInput, options);\n case \"text\":\n return analyzeText(resolved as DetectFileKindInput, { signal: options?.signal });\n default:\n return notImplementedResult(fileKind, \"none\", [UNKNOWN_FORMAT_WARNING]);\n }\n}\n"]}
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@dragon708/docmind-node",
3
- "version": "0.1.0-alpha.1",
4
- "description": "Node.js file I/O helpers and core integration for DocMind (does not run in browsers).",
3
+ "version": "1.0.0",
4
+ "description": "Node.js DocMind entry: PDF, DOCX, OCR, text, and fs helpers.",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",
7
7
  "module": "./dist/index.js",
@@ -31,13 +31,16 @@
31
31
  ],
32
32
  "license": "MIT",
33
33
  "dependencies": {
34
- "@dragon708/docmind-core": "^0.1.0-alpha.1",
35
- "@dragon708/docmind-shared": "^0.1.0-alpha.1"
34
+ "@dragon708/docmind-docx": "^1.0.0",
35
+ "@dragon708/docmind-ocr": "^1.0.0",
36
+ "@dragon708/docmind-pdf": "^1.0.0",
37
+ "@dragon708/docmind-shared": "^1.0.0"
36
38
  },
37
39
  "devDependencies": {
38
40
  "@types/node": "^20.19.37",
39
41
  "tsup": "^8.5.1",
40
- "typescript": "^5.9.3"
42
+ "typescript": "^5.9.3",
43
+ "vitest": "^1.6.1"
41
44
  },
42
45
  "engines": {
43
46
  "node": ">=18"