@echofiles/echo-pdf 0.4.3 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/README.md +85 -562
- package/bin/echo-pdf.js +130 -525
- package/dist/file-utils.d.ts +0 -3
- package/dist/file-utils.js +0 -18
- package/dist/local/document.d.ts +10 -0
- package/dist/local/document.js +133 -0
- package/dist/local/index.d.ts +3 -135
- package/dist/local/index.js +2 -555
- package/dist/local/semantic.d.ts +2 -0
- package/dist/local/semantic.js +231 -0
- package/dist/local/shared.d.ts +50 -0
- package/dist/local/shared.js +173 -0
- package/dist/local/types.d.ts +183 -0
- package/dist/local/types.js +2 -0
- package/dist/node/pdfium-local.js +30 -6
- package/dist/pdf-config.js +2 -65
- package/dist/pdf-types.d.ts +1 -58
- package/dist/types.d.ts +1 -87
- package/echo-pdf.config.json +1 -21
- package/package.json +25 -22
- package/bin/lib/http.js +0 -97
- package/bin/lib/mcp-stdio.js +0 -99
- package/dist/auth.d.ts +0 -18
- package/dist/auth.js +0 -36
- package/dist/core/index.d.ts +0 -50
- package/dist/core/index.js +0 -7
- package/dist/file-ops.d.ts +0 -11
- package/dist/file-ops.js +0 -36
- package/dist/file-store-do.d.ts +0 -36
- package/dist/file-store-do.js +0 -298
- package/dist/http-error.d.ts +0 -9
- package/dist/http-error.js +0 -14
- package/dist/index.d.ts +0 -1
- package/dist/index.js +0 -1
- package/dist/mcp-server.d.ts +0 -3
- package/dist/mcp-server.js +0 -124
- package/dist/node/semantic-local.d.ts +0 -16
- package/dist/node/semantic-local.js +0 -113
- package/dist/pdf-agent.d.ts +0 -18
- package/dist/pdf-agent.js +0 -217
- package/dist/pdf-storage.d.ts +0 -8
- package/dist/pdf-storage.js +0 -86
- package/dist/pdfium-engine.d.ts +0 -9
- package/dist/pdfium-engine.js +0 -180
- package/dist/r2-file-store.d.ts +0 -20
- package/dist/r2-file-store.js +0 -176
- package/dist/response-schema.d.ts +0 -15
- package/dist/response-schema.js +0 -159
- package/dist/tool-registry.d.ts +0 -16
- package/dist/tool-registry.js +0 -175
- package/dist/worker.d.ts +0 -7
- package/dist/worker.js +0 -386
- package/scripts/export-fixtures.sh +0 -204
- package/wrangler.toml +0 -19
package/dist/file-utils.d.ts
CHANGED
|
@@ -1,6 +1,3 @@
|
|
|
1
|
-
import type { ReturnMode, StoredFileRecord } from "./types.js";
|
|
2
1
|
export declare const fromBase64: (value: string) => Uint8Array;
|
|
3
2
|
export declare const toBase64: (bytes: Uint8Array) => string;
|
|
4
3
|
export declare const toDataUrl: (bytes: Uint8Array, mimeType: string) => string;
|
|
5
|
-
export declare const normalizeReturnMode: (value: unknown) => ReturnMode;
|
|
6
|
-
export declare const toInlineFilePayload: (file: StoredFileRecord, includeBase64: boolean) => Record<string, unknown>;
|
package/dist/file-utils.js
CHANGED
|
@@ -16,21 +16,3 @@ export const toBase64 = (bytes) => {
|
|
|
16
16
|
return btoa(binary);
|
|
17
17
|
};
|
|
18
18
|
export const toDataUrl = (bytes, mimeType) => `data:${mimeType};base64,${toBase64(bytes)}`;
|
|
19
|
-
export const normalizeReturnMode = (value) => {
|
|
20
|
-
if (value === "file_id" || value === "url" || value === "inline") {
|
|
21
|
-
return value;
|
|
22
|
-
}
|
|
23
|
-
return "inline";
|
|
24
|
-
};
|
|
25
|
-
export const toInlineFilePayload = (file, includeBase64) => ({
|
|
26
|
-
file: {
|
|
27
|
-
id: file.id,
|
|
28
|
-
filename: file.filename,
|
|
29
|
-
mimeType: file.mimeType,
|
|
30
|
-
sizeBytes: file.sizeBytes,
|
|
31
|
-
createdAt: file.createdAt,
|
|
32
|
-
},
|
|
33
|
-
dataUrl: file.mimeType.startsWith("image/") ? toDataUrl(file.bytes, file.mimeType) : undefined,
|
|
34
|
-
base64: includeBase64 ? toBase64(file.bytes) : undefined,
|
|
35
|
-
text: file.mimeType.startsWith("text/") ? new TextDecoder().decode(file.bytes) : undefined,
|
|
36
|
-
});
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { LocalDocumentMetadata, LocalDocumentRequest, LocalDocumentStructure, LocalPageContent, LocalPageContentRequest, LocalPageRenderArtifact, LocalPageRenderRequest, StoredDocumentRecord } from "./types.js";
|
|
2
|
+
export declare const indexDocumentInternal: (request: LocalDocumentRequest) => Promise<{
|
|
3
|
+
record: StoredDocumentRecord;
|
|
4
|
+
reused: boolean;
|
|
5
|
+
}>;
|
|
6
|
+
export declare const ensureRenderArtifact: (request: LocalPageRenderRequest) => Promise<LocalPageRenderArtifact>;
|
|
7
|
+
export declare const get_document: (request: LocalDocumentRequest) => Promise<LocalDocumentMetadata>;
|
|
8
|
+
export declare const get_document_structure: (request: LocalDocumentRequest) => Promise<LocalDocumentStructure>;
|
|
9
|
+
export declare const get_page_content: (request: LocalPageContentRequest) => Promise<LocalPageContent>;
|
|
10
|
+
export declare const get_page_render: (request: LocalPageRenderRequest) => Promise<LocalPageRenderArtifact>;
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
/// <reference path="../node/compat.d.ts" />
|
|
2
|
+
import { mkdir, stat, writeFile } from "node:fs/promises";
|
|
3
|
+
import path from "node:path";
|
|
4
|
+
import { extractLocalPdfPageText, getLocalPdfPageCount, renderLocalPdfPageToPng } from "../node/pdfium-local.js";
|
|
5
|
+
import { buildArtifactPaths, buildRenderArtifactPaths, createPageTitle, createPreview, ensurePageNumber, fileExists, isReusableRecord, loadStoredDocument, matchesSourceSnapshot, pageLabel, readJson, readSourceBytes, resolveConfig, resolveRenderScale, resolveWorkspaceDir, toDocumentId, toPublicArtifactPaths, writeJson, } from "./shared.js";
|
|
6
|
+
export const indexDocumentInternal = async (request) => {
|
|
7
|
+
const config = resolveConfig(request.config);
|
|
8
|
+
const sourcePath = path.resolve(process.cwd(), request.pdfPath);
|
|
9
|
+
const workspaceDir = resolveWorkspaceDir(request.workspaceDir);
|
|
10
|
+
const documentId = toDocumentId(sourcePath);
|
|
11
|
+
const artifactPaths = buildArtifactPaths(workspaceDir, documentId);
|
|
12
|
+
const sourceStats = await stat(sourcePath);
|
|
13
|
+
const stored = await loadStoredDocument(artifactPaths);
|
|
14
|
+
const sourceMeta = {
|
|
15
|
+
sizeBytes: sourceStats.size,
|
|
16
|
+
mtimeMs: sourceStats.mtimeMs,
|
|
17
|
+
};
|
|
18
|
+
if (!request.forceRefresh && stored && await isReusableRecord(stored, sourceMeta, artifactPaths)) {
|
|
19
|
+
return { record: stored, reused: true };
|
|
20
|
+
}
|
|
21
|
+
await mkdir(artifactPaths.pagesDir, { recursive: true });
|
|
22
|
+
const bytes = await readSourceBytes(sourcePath);
|
|
23
|
+
const pageCount = await getLocalPdfPageCount(config, bytes);
|
|
24
|
+
const pageNodes = [];
|
|
25
|
+
for (let pageNumber = 1; pageNumber <= pageCount; pageNumber += 1) {
|
|
26
|
+
const text = await extractLocalPdfPageText(config, bytes, pageNumber - 1);
|
|
27
|
+
const preview = createPreview(text);
|
|
28
|
+
const title = createPageTitle(pageNumber, text);
|
|
29
|
+
const artifactPath = path.join(artifactPaths.pagesDir, `${pageLabel(pageNumber)}.json`);
|
|
30
|
+
const pageArtifact = {
|
|
31
|
+
documentId,
|
|
32
|
+
pageNumber,
|
|
33
|
+
title,
|
|
34
|
+
preview,
|
|
35
|
+
text,
|
|
36
|
+
chars: text.length,
|
|
37
|
+
artifactPath,
|
|
38
|
+
};
|
|
39
|
+
await writeJson(artifactPath, pageArtifact);
|
|
40
|
+
pageNodes.push({
|
|
41
|
+
id: `page-${pageNumber}`,
|
|
42
|
+
type: "page",
|
|
43
|
+
title,
|
|
44
|
+
pageNumber,
|
|
45
|
+
preview,
|
|
46
|
+
artifactPath,
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
const structure = {
|
|
50
|
+
documentId,
|
|
51
|
+
generatedAt: new Date().toISOString(),
|
|
52
|
+
root: {
|
|
53
|
+
id: documentId,
|
|
54
|
+
type: "document",
|
|
55
|
+
title: path.basename(sourcePath),
|
|
56
|
+
children: pageNodes,
|
|
57
|
+
},
|
|
58
|
+
};
|
|
59
|
+
await writeJson(artifactPaths.structureJsonPath, structure);
|
|
60
|
+
const documentRecord = {
|
|
61
|
+
documentId,
|
|
62
|
+
sourcePath,
|
|
63
|
+
filename: path.basename(sourcePath),
|
|
64
|
+
sizeBytes: sourceMeta.sizeBytes,
|
|
65
|
+
mtimeMs: sourceMeta.mtimeMs,
|
|
66
|
+
pageCount,
|
|
67
|
+
indexedAt: structure.generatedAt,
|
|
68
|
+
artifactPaths,
|
|
69
|
+
};
|
|
70
|
+
await writeJson(artifactPaths.documentJsonPath, {
|
|
71
|
+
...documentRecord,
|
|
72
|
+
artifactPaths: toPublicArtifactPaths(documentRecord.artifactPaths),
|
|
73
|
+
});
|
|
74
|
+
return { record: documentRecord, reused: false };
|
|
75
|
+
};
|
|
76
|
+
const toMetadata = (record, cacheStatus) => ({
|
|
77
|
+
...record,
|
|
78
|
+
artifactPaths: toPublicArtifactPaths(record.artifactPaths),
|
|
79
|
+
cacheStatus,
|
|
80
|
+
});
|
|
81
|
+
export const ensureRenderArtifact = async (request) => {
|
|
82
|
+
const config = resolveConfig(request.config);
|
|
83
|
+
const { record } = await indexDocumentInternal(request);
|
|
84
|
+
ensurePageNumber(record.pageCount, request.pageNumber);
|
|
85
|
+
const renderScale = resolveRenderScale(config, request.renderScale);
|
|
86
|
+
const renderPaths = buildRenderArtifactPaths(record.artifactPaths, request.pageNumber, renderScale);
|
|
87
|
+
if (!request.forceRefresh && await fileExists(renderPaths.artifactPath) && await fileExists(renderPaths.imagePath)) {
|
|
88
|
+
const cached = await readJson(renderPaths.artifactPath);
|
|
89
|
+
if (matchesSourceSnapshot(cached, record)) {
|
|
90
|
+
return {
|
|
91
|
+
...cached,
|
|
92
|
+
cacheStatus: "reused",
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
const bytes = await readSourceBytes(record.sourcePath);
|
|
97
|
+
const rendered = await renderLocalPdfPageToPng(config, bytes, request.pageNumber - 1, renderScale);
|
|
98
|
+
await mkdir(path.dirname(renderPaths.imagePath), { recursive: true });
|
|
99
|
+
await writeFile(renderPaths.imagePath, rendered.png);
|
|
100
|
+
const artifact = {
|
|
101
|
+
documentId: record.documentId,
|
|
102
|
+
pageNumber: request.pageNumber,
|
|
103
|
+
renderScale,
|
|
104
|
+
sourceSizeBytes: record.sizeBytes,
|
|
105
|
+
sourceMtimeMs: record.mtimeMs,
|
|
106
|
+
width: rendered.width,
|
|
107
|
+
height: rendered.height,
|
|
108
|
+
mimeType: "image/png",
|
|
109
|
+
imagePath: renderPaths.imagePath,
|
|
110
|
+
artifactPath: renderPaths.artifactPath,
|
|
111
|
+
generatedAt: new Date().toISOString(),
|
|
112
|
+
};
|
|
113
|
+
await writeJson(renderPaths.artifactPath, artifact);
|
|
114
|
+
return {
|
|
115
|
+
...artifact,
|
|
116
|
+
cacheStatus: "fresh",
|
|
117
|
+
};
|
|
118
|
+
};
|
|
119
|
+
export const get_document = async (request) => {
|
|
120
|
+
const { record, reused } = await indexDocumentInternal(request);
|
|
121
|
+
return toMetadata(record, reused ? "reused" : "fresh");
|
|
122
|
+
};
|
|
123
|
+
export const get_document_structure = async (request) => {
|
|
124
|
+
const { record } = await indexDocumentInternal(request);
|
|
125
|
+
return readJson(record.artifactPaths.structureJsonPath);
|
|
126
|
+
};
|
|
127
|
+
export const get_page_content = async (request) => {
|
|
128
|
+
const { record } = await indexDocumentInternal(request);
|
|
129
|
+
ensurePageNumber(record.pageCount, request.pageNumber);
|
|
130
|
+
const pagePath = path.join(record.artifactPaths.pagesDir, `${pageLabel(request.pageNumber)}.json`);
|
|
131
|
+
return readJson(pagePath);
|
|
132
|
+
};
|
|
133
|
+
export const get_page_render = async (request) => ensureRenderArtifact(request);
|
package/dist/local/index.d.ts
CHANGED
|
@@ -1,135 +1,3 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
export
|
|
4
|
-
readonly workspaceDir: string;
|
|
5
|
-
readonly documentDir: string;
|
|
6
|
-
readonly documentJsonPath: string;
|
|
7
|
-
readonly structureJsonPath: string;
|
|
8
|
-
readonly semanticStructureJsonPath: string;
|
|
9
|
-
readonly pagesDir: string;
|
|
10
|
-
readonly rendersDir: string;
|
|
11
|
-
readonly ocrDir: string;
|
|
12
|
-
}
|
|
13
|
-
export interface LocalDocumentMetadata {
|
|
14
|
-
readonly documentId: string;
|
|
15
|
-
readonly sourcePath: string;
|
|
16
|
-
readonly filename: string;
|
|
17
|
-
readonly sizeBytes: number;
|
|
18
|
-
readonly mtimeMs: number;
|
|
19
|
-
readonly pageCount: number;
|
|
20
|
-
readonly indexedAt: string;
|
|
21
|
-
readonly cacheStatus: "fresh" | "reused";
|
|
22
|
-
readonly artifactPaths: LocalDocumentArtifactPaths;
|
|
23
|
-
}
|
|
24
|
-
export interface LocalDocumentStructureNode {
|
|
25
|
-
readonly id: string;
|
|
26
|
-
readonly type: "document" | "page";
|
|
27
|
-
readonly title: string;
|
|
28
|
-
readonly pageNumber?: number;
|
|
29
|
-
readonly preview?: string;
|
|
30
|
-
readonly artifactPath?: string;
|
|
31
|
-
readonly children?: ReadonlyArray<LocalDocumentStructureNode>;
|
|
32
|
-
}
|
|
33
|
-
export interface LocalDocumentStructure {
|
|
34
|
-
readonly documentId: string;
|
|
35
|
-
readonly generatedAt: string;
|
|
36
|
-
readonly root: LocalDocumentStructureNode;
|
|
37
|
-
}
|
|
38
|
-
export interface LocalSemanticStructureNode {
|
|
39
|
-
readonly id: string;
|
|
40
|
-
readonly type: "document" | "section";
|
|
41
|
-
readonly title: string;
|
|
42
|
-
readonly level?: number;
|
|
43
|
-
readonly pageNumber?: number;
|
|
44
|
-
readonly pageArtifactPath?: string;
|
|
45
|
-
readonly excerpt?: string;
|
|
46
|
-
readonly children?: ReadonlyArray<LocalSemanticStructureNode>;
|
|
47
|
-
}
|
|
48
|
-
export interface LocalSemanticDocumentStructure {
|
|
49
|
-
readonly documentId: string;
|
|
50
|
-
readonly generatedAt: string;
|
|
51
|
-
readonly detector: "agent-structured-v1" | "heading-heuristic-v1";
|
|
52
|
-
readonly strategyKey: string;
|
|
53
|
-
readonly sourceSizeBytes: number;
|
|
54
|
-
readonly sourceMtimeMs: number;
|
|
55
|
-
readonly pageIndexArtifactPath: string;
|
|
56
|
-
readonly artifactPath: string;
|
|
57
|
-
readonly root: LocalSemanticStructureNode;
|
|
58
|
-
readonly cacheStatus: "fresh" | "reused";
|
|
59
|
-
}
|
|
60
|
-
export interface LocalPageContent {
|
|
61
|
-
readonly documentId: string;
|
|
62
|
-
readonly pageNumber: number;
|
|
63
|
-
readonly title: string;
|
|
64
|
-
readonly preview: string;
|
|
65
|
-
readonly text: string;
|
|
66
|
-
readonly chars: number;
|
|
67
|
-
readonly artifactPath: string;
|
|
68
|
-
}
|
|
69
|
-
export interface LocalPageRenderArtifact {
|
|
70
|
-
readonly documentId: string;
|
|
71
|
-
readonly pageNumber: number;
|
|
72
|
-
readonly renderScale: number;
|
|
73
|
-
readonly sourceSizeBytes: number;
|
|
74
|
-
readonly sourceMtimeMs: number;
|
|
75
|
-
readonly width: number;
|
|
76
|
-
readonly height: number;
|
|
77
|
-
readonly mimeType: "image/png";
|
|
78
|
-
readonly imagePath: string;
|
|
79
|
-
readonly artifactPath: string;
|
|
80
|
-
readonly generatedAt: string;
|
|
81
|
-
readonly cacheStatus: "fresh" | "reused";
|
|
82
|
-
}
|
|
83
|
-
export interface LocalPageOcrArtifact {
|
|
84
|
-
readonly documentId: string;
|
|
85
|
-
readonly pageNumber: number;
|
|
86
|
-
readonly renderScale: number;
|
|
87
|
-
readonly sourceSizeBytes: number;
|
|
88
|
-
readonly sourceMtimeMs: number;
|
|
89
|
-
readonly provider: string;
|
|
90
|
-
readonly model: string;
|
|
91
|
-
readonly prompt: string;
|
|
92
|
-
readonly text: string;
|
|
93
|
-
readonly chars: number;
|
|
94
|
-
readonly imagePath: string;
|
|
95
|
-
readonly renderArtifactPath: string;
|
|
96
|
-
readonly artifactPath: string;
|
|
97
|
-
readonly generatedAt: string;
|
|
98
|
-
readonly cacheStatus: "fresh" | "reused";
|
|
99
|
-
}
|
|
100
|
-
export interface LocalDocumentRequest {
|
|
101
|
-
readonly pdfPath: string;
|
|
102
|
-
readonly workspaceDir?: string;
|
|
103
|
-
readonly forceRefresh?: boolean;
|
|
104
|
-
readonly config?: EchoPdfConfig;
|
|
105
|
-
}
|
|
106
|
-
export interface LocalPageContentRequest extends LocalDocumentRequest {
|
|
107
|
-
readonly pageNumber: number;
|
|
108
|
-
}
|
|
109
|
-
export interface LocalSemanticDocumentRequest extends LocalDocumentRequest {
|
|
110
|
-
readonly provider?: string;
|
|
111
|
-
readonly model?: string;
|
|
112
|
-
readonly semanticExtraction?: {
|
|
113
|
-
readonly pageSelection?: "all";
|
|
114
|
-
readonly chunkMaxChars?: number;
|
|
115
|
-
readonly chunkOverlapChars?: number;
|
|
116
|
-
};
|
|
117
|
-
readonly env?: Env;
|
|
118
|
-
readonly providerApiKeys?: Record<string, string>;
|
|
119
|
-
}
|
|
120
|
-
export interface LocalPageRenderRequest extends LocalPageContentRequest {
|
|
121
|
-
readonly renderScale?: number;
|
|
122
|
-
}
|
|
123
|
-
export interface LocalPageOcrRequest extends LocalPageRenderRequest {
|
|
124
|
-
readonly provider?: string;
|
|
125
|
-
readonly model?: string;
|
|
126
|
-
readonly prompt?: string;
|
|
127
|
-
readonly env?: Env;
|
|
128
|
-
readonly providerApiKeys?: Record<string, string>;
|
|
129
|
-
}
|
|
130
|
-
export declare const get_document: (request: LocalDocumentRequest) => Promise<LocalDocumentMetadata>;
|
|
131
|
-
export declare const get_document_structure: (request: LocalDocumentRequest) => Promise<LocalDocumentStructure>;
|
|
132
|
-
export declare const get_semantic_document_structure: (request: LocalSemanticDocumentRequest) => Promise<LocalSemanticDocumentStructure>;
|
|
133
|
-
export declare const get_page_content: (request: LocalPageContentRequest) => Promise<LocalPageContent>;
|
|
134
|
-
export declare const get_page_render: (request: LocalPageRenderRequest) => Promise<LocalPageRenderArtifact>;
|
|
135
|
-
export declare const get_page_ocr: (request: LocalPageOcrRequest) => Promise<LocalPageOcrArtifact>;
|
|
1
|
+
export type { LocalDocumentArtifactPaths, LocalDocumentMetadata, LocalDocumentRequest, LocalDocumentStructure, LocalDocumentStructureNode, LocalPageContent, LocalPageContentRequest, LocalPageRenderArtifact, LocalPageRenderRequest, LocalSemanticDocumentRequest, LocalSemanticDocumentStructure, LocalSemanticStructureNode, } from "./types.js";
|
|
2
|
+
export { get_document, get_document_structure, get_page_content, get_page_render } from "./document.js";
|
|
3
|
+
export { get_semantic_document_structure } from "./semantic.js";
|