@uurtech/jdf-cli 0.1.6 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +1093 -28
- package/dist/jdf-schema.json +104 -1
- package/package.json +10 -3
package/dist/index.js
CHANGED
|
@@ -5,6 +5,8 @@ import { fileURLToPath } from 'url';
|
|
|
5
5
|
import Ajv from 'ajv';
|
|
6
6
|
import addFormats from 'ajv-formats';
|
|
7
7
|
import JSZip from 'jszip';
|
|
8
|
+
import { createHash } from 'crypto';
|
|
9
|
+
import { readFile } from 'fs/promises';
|
|
8
10
|
|
|
9
11
|
// ../../packages/jdf-core/src/manifest.ts
|
|
10
12
|
var JDFX_MANIFEST_VERSION = "1.0.0";
|
|
@@ -38,7 +40,14 @@ async function loadDocument(filePath) {
|
|
|
38
40
|
} catch {
|
|
39
41
|
}
|
|
40
42
|
}
|
|
41
|
-
const
|
|
43
|
+
const assetPrefix = `${JDFX_ASSET_DIR}/`;
|
|
44
|
+
const assetCount = Object.values(zip.files).filter((f) => !f.dir && f.name.startsWith(assetPrefix)).length;
|
|
45
|
+
for (const fname of Object.keys(zip.files)) {
|
|
46
|
+
if (fname.includes("..") || fname.startsWith("/")) {
|
|
47
|
+
console.error(`\u2717 Refusing zip entry with traversal: ${fname}`);
|
|
48
|
+
return null;
|
|
49
|
+
}
|
|
50
|
+
}
|
|
42
51
|
return { doc, bundle: { manifest, assetCount } };
|
|
43
52
|
}
|
|
44
53
|
return { doc: JSON.parse(fs.readFileSync(filePath, "utf-8")) };
|
|
@@ -95,21 +104,59 @@ var GENERATOR = "@uurtech/jdf-cli";
|
|
|
95
104
|
function decodeBase64(s) {
|
|
96
105
|
return Buffer.from(s, "base64");
|
|
97
106
|
}
|
|
107
|
+
function hashBytes(bytes) {
|
|
108
|
+
return createHash("sha1").update(bytes).digest("hex").slice(0, 16);
|
|
109
|
+
}
|
|
110
|
+
function rewriteResourceRefs(doc, oldKey, newKey) {
|
|
111
|
+
function walk(els) {
|
|
112
|
+
if (!els) return;
|
|
113
|
+
for (const el of els) {
|
|
114
|
+
if (el?.resource === oldKey) el.resource = newKey;
|
|
115
|
+
if (el?.elements) walk(el.elements);
|
|
116
|
+
if (el?.children) walk(el.children);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
for (const page of doc.pages || []) walk(page.elements);
|
|
120
|
+
}
|
|
98
121
|
function extractAssets(doc) {
|
|
99
122
|
const assets = [];
|
|
100
123
|
const cloned = JSON.parse(JSON.stringify(doc));
|
|
101
|
-
|
|
124
|
+
const usedIds = /* @__PURE__ */ new Set();
|
|
125
|
+
const hashToId = /* @__PURE__ */ new Map();
|
|
126
|
+
let inlineCounter = 0;
|
|
127
|
+
if (cloned.resources?.images) {
|
|
128
|
+
for (const key of Object.keys(cloned.resources.images)) {
|
|
129
|
+
usedIds.add(key);
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
function nextInlineId() {
|
|
133
|
+
while (true) {
|
|
134
|
+
inlineCounter++;
|
|
135
|
+
const id = `asset-${inlineCounter}`;
|
|
136
|
+
if (!usedIds.has(id)) {
|
|
137
|
+
usedIds.add(id);
|
|
138
|
+
return id;
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
function intern(bytes, mimeType, ext) {
|
|
143
|
+
const h = hashBytes(bytes);
|
|
144
|
+
const existing = hashToId.get(h);
|
|
145
|
+
if (existing) return existing;
|
|
146
|
+
const id = nextInlineId();
|
|
147
|
+
hashToId.set(h, id);
|
|
148
|
+
assets.push({ id, bytes, mimeType, ext });
|
|
149
|
+
return id;
|
|
150
|
+
}
|
|
102
151
|
function walk(els) {
|
|
103
152
|
if (!els) return;
|
|
104
153
|
for (const el of els) {
|
|
105
154
|
if (el?.type === "image" && typeof el.src === "string" && el.src.startsWith("data:")) {
|
|
106
155
|
const m = el.src.match(/^data:([^;]+);base64,(.*)$/);
|
|
107
156
|
if (m) {
|
|
108
|
-
counter++;
|
|
109
|
-
const id = `asset-${counter}`;
|
|
110
157
|
const mimeType = m[1];
|
|
111
158
|
const ext = mimeType.split("/")[1]?.replace("jpeg", "jpg") || "bin";
|
|
112
|
-
|
|
159
|
+
const id = intern(decodeBase64(m[2]), mimeType, ext);
|
|
113
160
|
delete el.src;
|
|
114
161
|
el.resource = id;
|
|
115
162
|
}
|
|
@@ -127,11 +174,25 @@ function extractAssets(doc) {
|
|
|
127
174
|
const b64 = m ? m[2] : data;
|
|
128
175
|
const mimeType = m ? m[1] : res.mimeType || "image/png";
|
|
129
176
|
const ext = mimeType.split("/")[1]?.replace("jpeg", "jpg") || "bin";
|
|
130
|
-
|
|
131
|
-
const
|
|
132
|
-
|
|
177
|
+
const bytes = decodeBase64(b64);
|
|
178
|
+
const h = hashBytes(bytes);
|
|
179
|
+
let canonicalId = hashToId.get(h);
|
|
180
|
+
if (!canonicalId) {
|
|
181
|
+
canonicalId = key;
|
|
182
|
+
hashToId.set(h, canonicalId);
|
|
183
|
+
usedIds.add(canonicalId);
|
|
184
|
+
assets.push({ id: canonicalId, bytes, mimeType, ext });
|
|
185
|
+
}
|
|
186
|
+
const updated = { ...res };
|
|
187
|
+
delete updated.data;
|
|
188
|
+
updated.src = "embedded";
|
|
189
|
+
if (canonicalId !== key) {
|
|
190
|
+
delete cloned.resources.images[key];
|
|
191
|
+
rewriteResourceRefs(cloned, key, canonicalId);
|
|
192
|
+
} else {
|
|
193
|
+
cloned.resources.images[key] = updated;
|
|
194
|
+
}
|
|
133
195
|
}
|
|
134
|
-
cloned.resources.images = {};
|
|
135
196
|
}
|
|
136
197
|
return { doc: cloned, assets };
|
|
137
198
|
}
|
|
@@ -257,8 +318,12 @@ function convertMarkdownToJdf(md, title, baseDir = process.cwd()) {
|
|
|
257
318
|
content: text,
|
|
258
319
|
position: { x: 0, y },
|
|
259
320
|
width: contentWidth,
|
|
260
|
-
heading: true
|
|
321
|
+
// Emit the actual H1..H6 level instead of `heading: true`. RAG
|
|
322
|
+
// chunkers and the JDF TOC builder both key on the numeric level for
|
|
323
|
+
// hierarchy; the boolean form collapsed every heading to one bucket.
|
|
324
|
+
heading: level,
|
|
261
325
|
tocEntry: text,
|
|
326
|
+
tocLevel: level,
|
|
262
327
|
style: { fontFamily: "Inter", fontSize, fontWeight: "bold", color: level <= 2 ? "#0f172a" : "#1e293b" }
|
|
263
328
|
});
|
|
264
329
|
y += height2 + 4;
|
|
@@ -339,12 +404,20 @@ function convertMarkdownToJdf(md, title, baseDir = process.cwd()) {
|
|
|
339
404
|
}
|
|
340
405
|
if (line.startsWith("```")) {
|
|
341
406
|
const codeLines = [];
|
|
407
|
+
const fenceLine = i;
|
|
342
408
|
i++;
|
|
409
|
+
let closed = false;
|
|
343
410
|
while (i < lines.length && !lines[i].startsWith("```")) {
|
|
344
411
|
codeLines.push(lines[i]);
|
|
345
412
|
i++;
|
|
346
413
|
}
|
|
347
|
-
i
|
|
414
|
+
if (i < lines.length) {
|
|
415
|
+
closed = true;
|
|
416
|
+
i++;
|
|
417
|
+
}
|
|
418
|
+
if (!closed) {
|
|
419
|
+
console.warn(`[jdf-cli] warning: unterminated code fence at line ${fenceLine + 1} \u2014 content to EOF treated as code`);
|
|
420
|
+
}
|
|
348
421
|
const code = codeLines.join("\n");
|
|
349
422
|
const height2 = codeLines.length * 4 + 8;
|
|
350
423
|
if (y + height2 > maxY) {
|
|
@@ -369,7 +442,7 @@ function convertMarkdownToJdf(md, title, baseDir = process.cwd()) {
|
|
|
369
442
|
continue;
|
|
370
443
|
}
|
|
371
444
|
let para = "";
|
|
372
|
-
while (i < lines.length && lines[i].trim() !== "" && !lines[i].match(/^#{1,6}\s/) && !lines[i].match(/^\s*[-*+]\s/) && !lines[i].startsWith("```")) {
|
|
445
|
+
while (i < lines.length && lines[i].trim() !== "" && !lines[i].match(/^#{1,6}\s/) && !lines[i].match(/^\s*[-*+]\s/) && !lines[i].match(/^\s*\d+\.\s/) && !lines[i].startsWith("```")) {
|
|
373
446
|
para += (para ? " " : "") + lines[i].trim();
|
|
374
447
|
i++;
|
|
375
448
|
}
|
|
@@ -409,30 +482,1001 @@ function convertMarkdownToJdf(md, title, baseDir = process.cwd()) {
|
|
|
409
482
|
};
|
|
410
483
|
}
|
|
411
484
|
|
|
485
|
+
// ../../packages/jdf-pdf-import/src/core.ts
|
|
486
|
+
var PT_TO_MM = 0.352778;
|
|
487
|
+
function classifyFont(name) {
|
|
488
|
+
const n = (name || "").toLowerCase();
|
|
489
|
+
const bold = /bold|black|heavy|semibold|demibold|extrabold/.test(n);
|
|
490
|
+
const italic = /italic|oblique/.test(n);
|
|
491
|
+
let family = "Inter, Helvetica, Arial, sans-serif";
|
|
492
|
+
if (n.includes("times") || n.includes("serif") || n.includes("roman") || n.includes("georgia") || n.includes("garamond") || n.includes("baskerville")) {
|
|
493
|
+
family = "Times New Roman, serif";
|
|
494
|
+
} else if (n.includes("courier") || n.includes("mono") || n.includes("consolas") || n.includes("menlo") || n.includes("source code") || n.includes("fira code")) {
|
|
495
|
+
family = "JetBrains Mono, ui-monospace, monospace";
|
|
496
|
+
} else if (n.includes("helvetica") || n.includes("arial") || n.includes("sans") || n.includes("roboto") || n.includes("inter") || n.includes("noto")) {
|
|
497
|
+
family = "Inter, Helvetica, Arial, sans-serif";
|
|
498
|
+
}
|
|
499
|
+
return {
|
|
500
|
+
family,
|
|
501
|
+
weight: bold ? "bold" : void 0,
|
|
502
|
+
style: italic ? "italic" : void 0
|
|
503
|
+
};
|
|
504
|
+
}
|
|
505
|
+
function clampByte(v) {
|
|
506
|
+
return Math.max(0, Math.min(255, Math.round(v)));
|
|
507
|
+
}
|
|
508
|
+
function rgbToHex(r, g, b) {
|
|
509
|
+
const h = (n) => clampByte(n).toString(16).padStart(2, "0");
|
|
510
|
+
return `#${h(r)}${h(g)}${h(b)}`;
|
|
511
|
+
}
|
|
512
|
+
function multiplyCtm(a, b) {
|
|
513
|
+
return [
|
|
514
|
+
a[0] * b[0] + a[1] * b[2],
|
|
515
|
+
a[0] * b[1] + a[1] * b[3],
|
|
516
|
+
a[2] * b[0] + a[3] * b[2],
|
|
517
|
+
a[2] * b[1] + a[3] * b[3],
|
|
518
|
+
a[4] * b[0] + a[5] * b[2] + b[4],
|
|
519
|
+
a[4] * b[1] + a[5] * b[3] + b[5]
|
|
520
|
+
];
|
|
521
|
+
}
|
|
522
|
+
function tx(ctm, x, y) {
|
|
523
|
+
return { x: ctm[0] * x + ctm[2] * y + ctm[4], y: ctm[1] * x + ctm[3] * y + ctm[5] };
|
|
524
|
+
}
|
|
525
|
+
async function walkOps(page, OPS, viewport) {
|
|
526
|
+
const toViewport = (x, y) => {
|
|
527
|
+
const [vx, vy] = viewport.convertToViewportPoint(x, y);
|
|
528
|
+
return { x: vx, y: vy };
|
|
529
|
+
};
|
|
530
|
+
const opList = await page.getOperatorList();
|
|
531
|
+
const fnArr = opList.fnArray;
|
|
532
|
+
const argsArr = opList.argsArray;
|
|
533
|
+
const gs = {
|
|
534
|
+
ctm: [1, 0, 0, 1, 0, 0],
|
|
535
|
+
fill: "#000000",
|
|
536
|
+
stroke: "#000000",
|
|
537
|
+
lineWidth: 1,
|
|
538
|
+
fillAlpha: 1,
|
|
539
|
+
strokeAlpha: 1,
|
|
540
|
+
textRenderingMode: 0
|
|
541
|
+
};
|
|
542
|
+
const stack = [];
|
|
543
|
+
const textColors = [];
|
|
544
|
+
const textOpacities = [];
|
|
545
|
+
const textRenderingModes = [];
|
|
546
|
+
const shapes = [];
|
|
547
|
+
const imagePositions = [];
|
|
548
|
+
let textIdx = 0;
|
|
549
|
+
let pathSegments = [];
|
|
550
|
+
let pathRect = null;
|
|
551
|
+
let pathStart = null;
|
|
552
|
+
let pathLast = null;
|
|
553
|
+
function flushPath(isFill, isStroke) {
|
|
554
|
+
if (pathRect) {
|
|
555
|
+
const tl = toViewport(pathRect.x, pathRect.y + pathRect.h);
|
|
556
|
+
const br = toViewport(pathRect.x + pathRect.w, pathRect.y);
|
|
557
|
+
const x = Math.min(tl.x, br.x);
|
|
558
|
+
const y = Math.min(tl.y, br.y);
|
|
559
|
+
const w = Math.abs(br.x - tl.x);
|
|
560
|
+
const h = Math.abs(br.y - tl.y);
|
|
561
|
+
shapes.push({
|
|
562
|
+
kind: "rect",
|
|
563
|
+
x: x * PT_TO_MM,
|
|
564
|
+
y: y * PT_TO_MM,
|
|
565
|
+
width: w * PT_TO_MM,
|
|
566
|
+
height: h * PT_TO_MM,
|
|
567
|
+
fill: isFill ? gs.fill : void 0,
|
|
568
|
+
stroke: isStroke ? gs.stroke : void 0,
|
|
569
|
+
strokeWidth: isStroke ? gs.lineWidth * PT_TO_MM : void 0,
|
|
570
|
+
opacity: isFill ? gs.fillAlpha : gs.strokeAlpha
|
|
571
|
+
});
|
|
572
|
+
} else if (pathSegments.length === 2 && pathSegments[0].type === "M" && pathSegments[1].type === "L") {
|
|
573
|
+
const a = pathSegments[0].pts;
|
|
574
|
+
const b = pathSegments[1].pts;
|
|
575
|
+
const va = toViewport(a[0], a[1]);
|
|
576
|
+
const vb = toViewport(b[0], b[1]);
|
|
577
|
+
const minX = Math.min(va.x, vb.x);
|
|
578
|
+
const minY = Math.min(va.y, vb.y);
|
|
579
|
+
const maxX = Math.max(va.x, vb.x);
|
|
580
|
+
const maxY = Math.max(va.y, vb.y);
|
|
581
|
+
const x1Local = (va.x - minX) * PT_TO_MM;
|
|
582
|
+
const y1Local = (va.y - minY) * PT_TO_MM;
|
|
583
|
+
const x2Local = (vb.x - minX) * PT_TO_MM;
|
|
584
|
+
const y2Local = (vb.y - minY) * PT_TO_MM;
|
|
585
|
+
shapes.push({
|
|
586
|
+
kind: "path",
|
|
587
|
+
x: minX * PT_TO_MM,
|
|
588
|
+
y: minY * PT_TO_MM,
|
|
589
|
+
width: Math.max(0.05, (maxX - minX) * PT_TO_MM),
|
|
590
|
+
height: Math.max(0.05, (maxY - minY) * PT_TO_MM),
|
|
591
|
+
stroke: isStroke ? gs.stroke : void 0,
|
|
592
|
+
strokeWidth: isStroke ? gs.lineWidth * PT_TO_MM : void 0,
|
|
593
|
+
opacity: gs.strokeAlpha,
|
|
594
|
+
path: `M ${x1Local.toFixed(2)} ${y1Local.toFixed(2)} L ${x2Local.toFixed(2)} ${y2Local.toFixed(2)}`
|
|
595
|
+
});
|
|
596
|
+
} else if (pathSegments.length > 0) {
|
|
597
|
+
const vpSegments = pathSegments.map((seg) => {
|
|
598
|
+
if (seg.type === "Z") return seg;
|
|
599
|
+
const out = [];
|
|
600
|
+
for (let i = 0; i < seg.pts.length; i += 2) {
|
|
601
|
+
const v = toViewport(seg.pts[i], seg.pts[i + 1]);
|
|
602
|
+
out.push(v.x, v.y);
|
|
603
|
+
}
|
|
604
|
+
return { type: seg.type, pts: out };
|
|
605
|
+
});
|
|
606
|
+
let minX = Infinity, minY = Infinity, maxX = -Infinity, maxY = -Infinity;
|
|
607
|
+
for (const seg of vpSegments) {
|
|
608
|
+
for (let i = 0; i < seg.pts.length; i += 2) {
|
|
609
|
+
const x = seg.pts[i];
|
|
610
|
+
const y = seg.pts[i + 1];
|
|
611
|
+
if (x < minX) minX = x;
|
|
612
|
+
if (x > maxX) maxX = x;
|
|
613
|
+
if (y < minY) minY = y;
|
|
614
|
+
if (y > maxY) maxY = y;
|
|
615
|
+
}
|
|
616
|
+
}
|
|
617
|
+
if (isFinite(minX) && isFinite(minY)) {
|
|
618
|
+
const bw = maxX - minX;
|
|
619
|
+
const bh = maxY - minY;
|
|
620
|
+
const d = vpSegments.map((seg) => {
|
|
621
|
+
if (seg.type === "Z") return "Z";
|
|
622
|
+
const p = [];
|
|
623
|
+
for (let i = 0; i < seg.pts.length; i += 2) {
|
|
624
|
+
p.push(((seg.pts[i] - minX) * PT_TO_MM).toFixed(2));
|
|
625
|
+
p.push(((seg.pts[i + 1] - minY) * PT_TO_MM).toFixed(2));
|
|
626
|
+
}
|
|
627
|
+
return `${seg.type} ${p.join(" ")}`;
|
|
628
|
+
}).join(" ");
|
|
629
|
+
shapes.push({
|
|
630
|
+
kind: "path",
|
|
631
|
+
x: minX * PT_TO_MM,
|
|
632
|
+
y: minY * PT_TO_MM,
|
|
633
|
+
width: bw * PT_TO_MM,
|
|
634
|
+
height: bh * PT_TO_MM,
|
|
635
|
+
fill: isFill ? gs.fill : void 0,
|
|
636
|
+
stroke: isStroke ? gs.stroke : void 0,
|
|
637
|
+
strokeWidth: isStroke ? gs.lineWidth * PT_TO_MM : void 0,
|
|
638
|
+
opacity: isFill ? gs.fillAlpha : gs.strokeAlpha,
|
|
639
|
+
path: d
|
|
640
|
+
});
|
|
641
|
+
}
|
|
642
|
+
}
|
|
643
|
+
pathSegments = [];
|
|
644
|
+
pathRect = null;
|
|
645
|
+
pathStart = null;
|
|
646
|
+
pathLast = null;
|
|
647
|
+
}
|
|
648
|
+
for (let i = 0; i < fnArr.length; i++) {
|
|
649
|
+
const fn = fnArr[i];
|
|
650
|
+
const args = argsArr[i] || [];
|
|
651
|
+
if (fn === OPS.save) {
|
|
652
|
+
stack.push({ ctm: [...gs.ctm], fill: gs.fill, stroke: gs.stroke, lineWidth: gs.lineWidth, fillAlpha: gs.fillAlpha, strokeAlpha: gs.strokeAlpha, textRenderingMode: gs.textRenderingMode });
|
|
653
|
+
} else if (fn === OPS.restore) {
|
|
654
|
+
const s = stack.pop();
|
|
655
|
+
if (s) Object.assign(gs, s);
|
|
656
|
+
} else if (fn === OPS.transform) {
|
|
657
|
+
gs.ctm = multiplyCtm(gs.ctm, args);
|
|
658
|
+
} else if (fn === OPS.setFillRGBColor) {
|
|
659
|
+
gs.fill = rgbToHex(args[0], args[1], args[2]);
|
|
660
|
+
} else if (fn === OPS.setStrokeRGBColor) {
|
|
661
|
+
gs.stroke = rgbToHex(args[0], args[1], args[2]);
|
|
662
|
+
} else if (fn === OPS.setFillGray) {
|
|
663
|
+
gs.fill = rgbToHex(args[0], args[0], args[0]);
|
|
664
|
+
} else if (fn === OPS.setStrokeGray) {
|
|
665
|
+
gs.stroke = rgbToHex(args[0], args[0], args[0]);
|
|
666
|
+
} else if (fn === OPS.setFillCMYKColor || fn === OPS.setStrokeCMYKColor) {
|
|
667
|
+
const c = args[0], m = args[1], y = args[2], k = args[3];
|
|
668
|
+
const r = (1 - c) * (1 - k) * 255;
|
|
669
|
+
const g = (1 - m) * (1 - k) * 255;
|
|
670
|
+
const b = (1 - y) * (1 - k) * 255;
|
|
671
|
+
const hex = rgbToHex(r, g, b);
|
|
672
|
+
if (fn === OPS.setFillCMYKColor) gs.fill = hex;
|
|
673
|
+
else gs.stroke = hex;
|
|
674
|
+
} else if (fn === OPS.setLineWidth) {
|
|
675
|
+
gs.lineWidth = args[0];
|
|
676
|
+
} else if (fn === OPS.setTextRenderingMode) {
|
|
677
|
+
gs.textRenderingMode = args[0];
|
|
678
|
+
} else if (fn === OPS.setGState) {
|
|
679
|
+
const dict = args[0];
|
|
680
|
+
if (Array.isArray(dict)) {
|
|
681
|
+
for (const entry of dict) {
|
|
682
|
+
if (!Array.isArray(entry)) continue;
|
|
683
|
+
const [key, val] = entry;
|
|
684
|
+
if (key === "LW") gs.lineWidth = val;
|
|
685
|
+
else if (key === "ca") gs.fillAlpha = val;
|
|
686
|
+
else if (key === "CA") gs.strokeAlpha = val;
|
|
687
|
+
}
|
|
688
|
+
}
|
|
689
|
+
} else if (fn === OPS.showText || fn === OPS.showSpacedText || fn === OPS.nextLineShowText || fn === OPS.nextLineSetSpacingShowText) {
|
|
690
|
+
textColors[textIdx] = gs.fill;
|
|
691
|
+
textOpacities[textIdx] = gs.fillAlpha;
|
|
692
|
+
textRenderingModes[textIdx] = gs.textRenderingMode;
|
|
693
|
+
textIdx++;
|
|
694
|
+
} else if (fn === OPS.rectangle) {
|
|
695
|
+
const [x, y, w, h] = args;
|
|
696
|
+
const p1 = tx(gs.ctm, x, y);
|
|
697
|
+
const p3 = tx(gs.ctm, x + w, y + h);
|
|
698
|
+
pathRect = {
|
|
699
|
+
x: Math.min(p1.x, p3.x),
|
|
700
|
+
y: Math.min(p1.y, p3.y),
|
|
701
|
+
w: Math.abs(p3.x - p1.x),
|
|
702
|
+
h: Math.abs(p3.y - p1.y)
|
|
703
|
+
};
|
|
704
|
+
} else if (fn === OPS.constructPath) {
|
|
705
|
+
const [pathOps, pathArgs] = args;
|
|
706
|
+
let ai = 0;
|
|
707
|
+
for (const op of pathOps) {
|
|
708
|
+
if (op === OPS.moveTo) {
|
|
709
|
+
const p = tx(gs.ctm, pathArgs[ai], pathArgs[ai + 1]);
|
|
710
|
+
ai += 2;
|
|
711
|
+
pathSegments.push({ type: "M", pts: [p.x, p.y] });
|
|
712
|
+
pathStart = { x: p.x, y: p.y };
|
|
713
|
+
pathLast = { x: p.x, y: p.y };
|
|
714
|
+
} else if (op === OPS.lineTo) {
|
|
715
|
+
const p = tx(gs.ctm, pathArgs[ai], pathArgs[ai + 1]);
|
|
716
|
+
ai += 2;
|
|
717
|
+
pathSegments.push({ type: "L", pts: [p.x, p.y] });
|
|
718
|
+
pathLast = { x: p.x, y: p.y };
|
|
719
|
+
} else if (op === OPS.curveTo) {
|
|
720
|
+
const p1 = tx(gs.ctm, pathArgs[ai], pathArgs[ai + 1]);
|
|
721
|
+
ai += 2;
|
|
722
|
+
const p2 = tx(gs.ctm, pathArgs[ai], pathArgs[ai + 1]);
|
|
723
|
+
ai += 2;
|
|
724
|
+
const p3 = tx(gs.ctm, pathArgs[ai], pathArgs[ai + 1]);
|
|
725
|
+
ai += 2;
|
|
726
|
+
pathSegments.push({ type: "C", pts: [p1.x, p1.y, p2.x, p2.y, p3.x, p3.y] });
|
|
727
|
+
pathLast = { x: p3.x, y: p3.y };
|
|
728
|
+
} else if (op === OPS.curveTo2) {
|
|
729
|
+
const p2 = tx(gs.ctm, pathArgs[ai], pathArgs[ai + 1]);
|
|
730
|
+
ai += 2;
|
|
731
|
+
const p3 = tx(gs.ctm, pathArgs[ai], pathArgs[ai + 1]);
|
|
732
|
+
ai += 2;
|
|
733
|
+
const p1 = pathLast || { x: 0, y: 0 };
|
|
734
|
+
pathSegments.push({ type: "C", pts: [p1.x, p1.y, p2.x, p2.y, p3.x, p3.y] });
|
|
735
|
+
pathLast = { x: p3.x, y: p3.y };
|
|
736
|
+
} else if (op === OPS.curveTo3) {
|
|
737
|
+
const p1 = tx(gs.ctm, pathArgs[ai], pathArgs[ai + 1]);
|
|
738
|
+
ai += 2;
|
|
739
|
+
const p3 = tx(gs.ctm, pathArgs[ai], pathArgs[ai + 1]);
|
|
740
|
+
ai += 2;
|
|
741
|
+
pathSegments.push({ type: "C", pts: [p1.x, p1.y, p3.x, p3.y, p3.x, p3.y] });
|
|
742
|
+
pathLast = { x: p3.x, y: p3.y };
|
|
743
|
+
} else if (op === OPS.closePath) {
|
|
744
|
+
pathSegments.push({ type: "Z", pts: [] });
|
|
745
|
+
if (pathStart) pathLast = { ...pathStart };
|
|
746
|
+
}
|
|
747
|
+
}
|
|
748
|
+
} else if (fn === OPS.fill || fn === OPS.stroke || fn === OPS.fillStroke || fn === OPS.eoFill || fn === OPS.eoFillStroke || fn === OPS.closeFillStroke || fn === OPS.closeStroke || fn === OPS.closeEOFillStroke) {
|
|
749
|
+
const isFill = fn === OPS.fill || fn === OPS.fillStroke || fn === OPS.eoFill || fn === OPS.eoFillStroke || fn === OPS.closeFillStroke || fn === OPS.closeEOFillStroke;
|
|
750
|
+
const isStroke = fn === OPS.stroke || fn === OPS.fillStroke || fn === OPS.eoFillStroke || fn === OPS.closeFillStroke || fn === OPS.closeStroke || fn === OPS.closeEOFillStroke;
|
|
751
|
+
flushPath(isFill, isStroke);
|
|
752
|
+
} else if (fn === OPS.endPath || fn === OPS.clip || fn === OPS.eoClip) {
|
|
753
|
+
pathSegments = [];
|
|
754
|
+
pathRect = null;
|
|
755
|
+
pathStart = null;
|
|
756
|
+
pathLast = null;
|
|
757
|
+
} else if (fn === OPS.paintImageXObject || fn === OPS.paintImageMaskXObject || fn === OPS.paintInlineImageXObject) {
|
|
758
|
+
const name = args[0];
|
|
759
|
+
const c = gs.ctm;
|
|
760
|
+
const corners = [tx(c, 0, 0), tx(c, 1, 0), tx(c, 1, 1), tx(c, 0, 1)];
|
|
761
|
+
const vpCorners = corners.map((p) => toViewport(p.x, p.y));
|
|
762
|
+
const xs = vpCorners.map((p) => p.x), ys = vpCorners.map((p) => p.y);
|
|
763
|
+
const minX = Math.min(...xs), maxX = Math.max(...xs);
|
|
764
|
+
const minY = Math.min(...ys), maxY = Math.max(...ys);
|
|
765
|
+
imagePositions.push({
|
|
766
|
+
name,
|
|
767
|
+
x: minX * PT_TO_MM,
|
|
768
|
+
y: minY * PT_TO_MM,
|
|
769
|
+
w: (maxX - minX) * PT_TO_MM,
|
|
770
|
+
h: (maxY - minY) * PT_TO_MM
|
|
771
|
+
});
|
|
772
|
+
}
|
|
773
|
+
}
|
|
774
|
+
return { textColors, textOpacities, textRenderingModes, shapes, imagePositions };
|
|
775
|
+
}
|
|
776
|
+
async function extractImages(page, positions, runtime, dataUrlCache) {
|
|
777
|
+
const tasks = positions.map(async (pos) => {
|
|
778
|
+
if (dataUrlCache.has(pos.name)) {
|
|
779
|
+
return { pos, dataUrl: dataUrlCache.get(pos.name) };
|
|
780
|
+
}
|
|
781
|
+
let imgObj = null;
|
|
782
|
+
try {
|
|
783
|
+
imgObj = await new Promise((resolve) => {
|
|
784
|
+
let settled2 = false;
|
|
785
|
+
const timer = setTimeout(() => done(null), 250);
|
|
786
|
+
const done = (v) => {
|
|
787
|
+
if (settled2) return;
|
|
788
|
+
settled2 = true;
|
|
789
|
+
clearTimeout(timer);
|
|
790
|
+
resolve(v);
|
|
791
|
+
};
|
|
792
|
+
try {
|
|
793
|
+
page.objs.get(pos.name, (img) => done(img));
|
|
794
|
+
} catch {
|
|
795
|
+
try {
|
|
796
|
+
page.commonObjs.get(pos.name, (img) => done(img));
|
|
797
|
+
} catch {
|
|
798
|
+
done(null);
|
|
799
|
+
}
|
|
800
|
+
}
|
|
801
|
+
});
|
|
802
|
+
} catch {
|
|
803
|
+
imgObj = null;
|
|
804
|
+
}
|
|
805
|
+
if (!imgObj || !imgObj.data || !imgObj.width || !imgObj.height) return null;
|
|
806
|
+
const dataUrl = runtime.encodePng(imgObj.width, imgObj.height, imgObj.kind || 0, imgObj.data);
|
|
807
|
+
if (!dataUrl) return null;
|
|
808
|
+
dataUrlCache.set(pos.name, dataUrl);
|
|
809
|
+
return { pos, dataUrl };
|
|
810
|
+
});
|
|
811
|
+
const settled = await Promise.all(tasks);
|
|
812
|
+
return settled.filter((x) => x !== null);
|
|
813
|
+
}
|
|
814
|
+
async function extractLinks(page, viewport) {
|
|
815
|
+
const out = [];
|
|
816
|
+
let annots = [];
|
|
817
|
+
try {
|
|
818
|
+
annots = await page.getAnnotations();
|
|
819
|
+
} catch {
|
|
820
|
+
return out;
|
|
821
|
+
}
|
|
822
|
+
const conv = (x, y) => {
|
|
823
|
+
const [vx, vy] = viewport.convertToViewportPoint(x, y);
|
|
824
|
+
return { x: vx, y: vy };
|
|
825
|
+
};
|
|
826
|
+
for (const a of annots) {
|
|
827
|
+
if (a.subtype !== "Link") continue;
|
|
828
|
+
if (!a.rect || a.rect.length < 4) continue;
|
|
829
|
+
const [x1, y1, x2, y2] = a.rect;
|
|
830
|
+
const c1 = conv(x1, y1);
|
|
831
|
+
const c2 = conv(x2, y2);
|
|
832
|
+
const xMin = Math.min(c1.x, c2.x);
|
|
833
|
+
const yMin = Math.min(c1.y, c2.y);
|
|
834
|
+
const xMax = Math.max(c1.x, c2.x);
|
|
835
|
+
const yMax = Math.max(c1.y, c2.y);
|
|
836
|
+
const rectMm = {
|
|
837
|
+
x: xMin * PT_TO_MM,
|
|
838
|
+
y: yMin * PT_TO_MM,
|
|
839
|
+
w: (xMax - xMin) * PT_TO_MM,
|
|
840
|
+
h: (yMax - yMin) * PT_TO_MM
|
|
841
|
+
};
|
|
842
|
+
const url = a.url || a.unsafeUrl;
|
|
843
|
+
out.push({ rectMm, url });
|
|
844
|
+
}
|
|
845
|
+
return out;
|
|
846
|
+
}
|
|
847
|
+
async function extractFormWidgets(page, viewport) {
|
|
848
|
+
const out = [];
|
|
849
|
+
let annots = [];
|
|
850
|
+
try {
|
|
851
|
+
annots = await page.getAnnotations();
|
|
852
|
+
} catch {
|
|
853
|
+
return out;
|
|
854
|
+
}
|
|
855
|
+
const conv = (x, y) => {
|
|
856
|
+
const [vx, vy] = viewport.convertToViewportPoint(x, y);
|
|
857
|
+
return { x: vx, y: vy };
|
|
858
|
+
};
|
|
859
|
+
for (const a of annots) {
|
|
860
|
+
if (a.subtype !== "Widget") continue;
|
|
861
|
+
if (!a.rect || a.rect.length < 4) continue;
|
|
862
|
+
const [x1, y1, x2, y2] = a.rect;
|
|
863
|
+
const c1 = conv(x1, y1);
|
|
864
|
+
const c2 = conv(x2, y2);
|
|
865
|
+
const xMin = Math.min(c1.x, c2.x);
|
|
866
|
+
const yMin = Math.min(c1.y, c2.y);
|
|
867
|
+
const xMax = Math.max(c1.x, c2.x);
|
|
868
|
+
const yMax = Math.max(c1.y, c2.y);
|
|
869
|
+
const flags = typeof a.fieldFlags === "number" ? a.fieldFlags : 0;
|
|
870
|
+
const options = Array.isArray(a.options) ? a.options.map((o) => ({
|
|
871
|
+
value: typeof o?.exportValue === "string" ? o.exportValue : typeof o?.value === "string" ? o.value : "",
|
|
872
|
+
label: typeof o?.displayValue === "string" ? o.displayValue : void 0
|
|
873
|
+
})).filter((o) => o.value !== "") : [];
|
|
874
|
+
out.push({
|
|
875
|
+
rectMm: {
|
|
876
|
+
x: xMin * PT_TO_MM,
|
|
877
|
+
y: yMin * PT_TO_MM,
|
|
878
|
+
w: (xMax - xMin) * PT_TO_MM,
|
|
879
|
+
h: (yMax - yMin) * PT_TO_MM
|
|
880
|
+
},
|
|
881
|
+
fieldType: a.fieldType || "",
|
|
882
|
+
fieldName: a.fieldName || `field-${out.length + 1}`,
|
|
883
|
+
fieldValue: a.fieldValue ?? a.buttonValue ?? "",
|
|
884
|
+
multiLine: (flags & 4096) !== 0,
|
|
885
|
+
multiSelect: (flags & 2097152) !== 0,
|
|
886
|
+
combo: (flags & 131072) !== 0,
|
|
887
|
+
pushButton: (flags & 65536) !== 0,
|
|
888
|
+
radio: (flags & 32768) !== 0,
|
|
889
|
+
options,
|
|
890
|
+
readonly: (flags & 1) !== 0,
|
|
891
|
+
required: (flags & 2) !== 0
|
|
892
|
+
});
|
|
893
|
+
}
|
|
894
|
+
return out;
|
|
895
|
+
}
|
|
896
|
+
async function flattenOutline(doc, outline) {
|
|
897
|
+
if (!outline) return [];
|
|
898
|
+
const out = [];
|
|
899
|
+
async function walk(items) {
|
|
900
|
+
for (const item of items) {
|
|
901
|
+
try {
|
|
902
|
+
let dest = item.dest;
|
|
903
|
+
if (typeof dest === "string") {
|
|
904
|
+
dest = await doc.getDestination(dest);
|
|
905
|
+
}
|
|
906
|
+
if (Array.isArray(dest) && dest[0]) {
|
|
907
|
+
const ref = dest[0];
|
|
908
|
+
const idx = await doc.getPageIndex(ref);
|
|
909
|
+
if (typeof idx === "number") out.push({ title: item.title, pageIndex: idx });
|
|
910
|
+
}
|
|
911
|
+
} catch {
|
|
912
|
+
}
|
|
913
|
+
if (item.items?.length) await walk(item.items);
|
|
914
|
+
}
|
|
915
|
+
}
|
|
916
|
+
await walk(outline);
|
|
917
|
+
return out;
|
|
918
|
+
}
|
|
919
|
+
async function importPdfToJdf(source, title, runtime, options = {}) {
|
|
920
|
+
const pdfjs = options.pdfjs || runtime.pdfjs;
|
|
921
|
+
if (!pdfjs) {
|
|
922
|
+
throw new Error("[@jdf/pdf-import] runtime did not provide a pdfjs module");
|
|
923
|
+
}
|
|
924
|
+
const OPS = pdfjs.OPS;
|
|
925
|
+
let data;
|
|
926
|
+
if (typeof source === "string") {
|
|
927
|
+
if (source.startsWith("data:") || source.startsWith("http")) {
|
|
928
|
+
const r = await fetch(source);
|
|
929
|
+
data = await r.arrayBuffer();
|
|
930
|
+
} else {
|
|
931
|
+
if (!runtime.readFile) {
|
|
932
|
+
throw new Error("[@jdf/pdf-import] cannot read file path: runtime has no readFile()");
|
|
933
|
+
}
|
|
934
|
+
data = await runtime.readFile(source);
|
|
935
|
+
}
|
|
936
|
+
} else {
|
|
937
|
+
data = source;
|
|
938
|
+
}
|
|
939
|
+
const doc = await pdfjs.getDocument({
|
|
940
|
+
data,
|
|
941
|
+
// The runtime adapter declares whether it supports a real Web Worker.
|
|
942
|
+
// We don't sniff `typeof Worker` here because Node 22+ exposes a global
|
|
943
|
+
// `Worker` (worker_threads) that isn't compatible with PDF.js's worker
|
|
944
|
+
// protocol — the sniff would silently re-enable the broken path on
|
|
945
|
+
// newer Node. Browser entry leaves this unset (= false = real worker
|
|
946
|
+
// via GlobalWorkerOptions.workerSrc); node entry sets `true`.
|
|
947
|
+
disableWorker: runtime.disableWorker === true,
|
|
948
|
+
isEvalSupported: false
|
|
949
|
+
}).promise;
|
|
950
|
+
const pages = [];
|
|
951
|
+
const imageResources = {};
|
|
952
|
+
let imgCounter = 0;
|
|
953
|
+
const dataUrlCache = /* @__PURE__ */ new Map();
|
|
954
|
+
const resourceKeyByName = /* @__PURE__ */ new Map();
|
|
955
|
+
const outline = await doc.getOutline().catch(() => null);
|
|
956
|
+
await flattenOutline(doc, outline);
|
|
957
|
+
for (let pi = 1; pi <= doc.numPages; pi++) {
|
|
958
|
+
let findLinkForRun2 = function(r) {
|
|
959
|
+
const cx = r.x + r.width / 2;
|
|
960
|
+
const cy = r.y + r.height / 2;
|
|
961
|
+
for (const a of links) {
|
|
962
|
+
if (cx >= a.rectMm.x && cx <= a.rectMm.x + a.rectMm.w && cy >= a.rectMm.y && cy <= a.rectMm.y + a.rectMm.h) {
|
|
963
|
+
return a;
|
|
964
|
+
}
|
|
965
|
+
}
|
|
966
|
+
return null;
|
|
967
|
+
};
|
|
968
|
+
const page = await doc.getPage(pi);
|
|
969
|
+
const viewport = page.getViewport({ scale: 1 });
|
|
970
|
+
const pageW = viewport.width;
|
|
971
|
+
const pageH = viewport.height;
|
|
972
|
+
const { canvas, context } = runtime.createCanvas(Math.ceil(pageW), Math.ceil(pageH));
|
|
973
|
+
try {
|
|
974
|
+
await page.render({ canvasContext: context, viewport, canvas }).promise;
|
|
975
|
+
} catch {
|
|
976
|
+
}
|
|
977
|
+
const ops = await walkOps(page, OPS, viewport);
|
|
978
|
+
const links = await extractLinks(page, viewport);
|
|
979
|
+
const formWidgets = await extractFormWidgets(page, viewport);
|
|
980
|
+
const textContent = await page.getTextContent({ disableCombineTextItems: false });
|
|
981
|
+
const items = textContent.items;
|
|
982
|
+
const fontMap = /* @__PURE__ */ new Map();
|
|
983
|
+
for (const k of Object.keys(textContent.styles || {})) {
|
|
984
|
+
const s = textContent.styles[k];
|
|
985
|
+
let realName = s.fontFamily || k;
|
|
986
|
+
try {
|
|
987
|
+
const has = typeof page.commonObjs.has === "function" ? page.commonObjs.has(k) : false;
|
|
988
|
+
if (has) {
|
|
989
|
+
await new Promise((resolve) => {
|
|
990
|
+
let settled = false;
|
|
991
|
+
const done = () => {
|
|
992
|
+
if (!settled) {
|
|
993
|
+
settled = true;
|
|
994
|
+
resolve();
|
|
995
|
+
}
|
|
996
|
+
};
|
|
997
|
+
try {
|
|
998
|
+
page.commonObjs.get(k, (font) => {
|
|
999
|
+
if (font?.name) realName = font.name;
|
|
1000
|
+
else if (font?.loadedName) realName = font.loadedName;
|
|
1001
|
+
done();
|
|
1002
|
+
});
|
|
1003
|
+
} catch {
|
|
1004
|
+
done();
|
|
1005
|
+
}
|
|
1006
|
+
setTimeout(done, 100);
|
|
1007
|
+
});
|
|
1008
|
+
}
|
|
1009
|
+
} catch {
|
|
1010
|
+
}
|
|
1011
|
+
const cls = classifyFont(realName);
|
|
1012
|
+
if (!cls.weight && /bold/i.test(s.fontFamily || "")) cls.weight = "bold";
|
|
1013
|
+
if (!cls.style && /italic|oblique/i.test(s.fontFamily || "")) cls.style = "italic";
|
|
1014
|
+
fontMap.set(k, cls);
|
|
1015
|
+
}
|
|
1016
|
+
const runs = [];
|
|
1017
|
+
const safeNum = (v, fallback) => {
|
|
1018
|
+
const n = typeof v === "number" ? v : Number(v);
|
|
1019
|
+
return Number.isFinite(n) ? n : fallback;
|
|
1020
|
+
};
|
|
1021
|
+
items.forEach((it, idx) => {
|
|
1022
|
+
if (!it.str || !it.str.length) return;
|
|
1023
|
+
if ((ops.textRenderingModes[idx] ?? 0) === 3) return;
|
|
1024
|
+
const tr = it.transform;
|
|
1025
|
+
const fontSize = safeNum(Math.hypot(safeNum(tr?.[2], 0), safeNum(tr?.[3], 0)), 0) || safeNum(it.height, 0) || 10;
|
|
1026
|
+
const baseX = safeNum(tr?.[4], 0);
|
|
1027
|
+
const baseY = safeNum(tr?.[5], 0);
|
|
1028
|
+
const conv = viewport.convertToViewportPoint(baseX, baseY);
|
|
1029
|
+
const vx = safeNum(conv?.[0], 0);
|
|
1030
|
+
const vy = safeNum(conv?.[1], 0);
|
|
1031
|
+
const ascent = it.height ? safeNum(it.height, fontSize) * 0.78 : fontSize * 0.78;
|
|
1032
|
+
const yTop = vy - ascent;
|
|
1033
|
+
const w = safeNum(it.width, 0);
|
|
1034
|
+
runs.push({
|
|
1035
|
+
text: it.str,
|
|
1036
|
+
x: safeNum(vx * PT_TO_MM, 0),
|
|
1037
|
+
y: safeNum(yTop * PT_TO_MM, 0),
|
|
1038
|
+
fontSize: safeNum(fontSize, 10),
|
|
1039
|
+
fontName: it.fontName,
|
|
1040
|
+
width: safeNum(w * PT_TO_MM, 0),
|
|
1041
|
+
height: safeNum((it.height || fontSize) * PT_TO_MM, fontSize * PT_TO_MM),
|
|
1042
|
+
color: ops.textColors[idx] || "#000000",
|
|
1043
|
+
opacity: safeNum(ops.textOpacities[idx], 1)
|
|
1044
|
+
});
|
|
1045
|
+
});
|
|
1046
|
+
runs.sort((a, b) => a.y - b.y || a.x - b.x);
|
|
1047
|
+
const lines = [];
|
|
1048
|
+
const Y_TOL = 0.6;
|
|
1049
|
+
for (const r of runs) {
|
|
1050
|
+
if (!r.text.length) continue;
|
|
1051
|
+
const last = lines[lines.length - 1];
|
|
1052
|
+
if (!last) {
|
|
1053
|
+
lines.push({ ...r });
|
|
1054
|
+
continue;
|
|
1055
|
+
}
|
|
1056
|
+
const sameLine = Math.abs(last.y - r.y) <= Y_TOL;
|
|
1057
|
+
const sameStyle = Math.abs(last.fontSize - r.fontSize) < 0.4 && last.fontName === r.fontName && last.color === r.color && Math.abs(last.opacity - r.opacity) < 0.05;
|
|
1058
|
+
const gapMm = r.x - (last.x + last.width);
|
|
1059
|
+
const emMm = r.fontSize * PT_TO_MM;
|
|
1060
|
+
const mergeOk = sameLine && sameStyle && gapMm >= -0.2 && gapMm <= emMm * 0.45;
|
|
1061
|
+
if (mergeOk) {
|
|
1062
|
+
const lastEndsSpace = /\s$/.test(last.text);
|
|
1063
|
+
const currStartsSpace = /^\s/.test(r.text);
|
|
1064
|
+
const sep = gapMm > emMm * 0.08 && !lastEndsSpace && !currStartsSpace ? " " : "";
|
|
1065
|
+
last.text = last.text + sep + r.text;
|
|
1066
|
+
const newExtent = r.x - last.x + r.width;
|
|
1067
|
+
last.width = Math.max(last.width, newExtent);
|
|
1068
|
+
} else {
|
|
1069
|
+
lines.push({ ...r });
|
|
1070
|
+
}
|
|
1071
|
+
}
|
|
1072
|
+
const elements = [];
|
|
1073
|
+
for (const sh of ops.shapes) {
|
|
1074
|
+
if (sh.width < 0.3 && sh.height < 0.3) continue;
|
|
1075
|
+
const shapeType = sh.kind;
|
|
1076
|
+
const shape = {
|
|
1077
|
+
type: "shape",
|
|
1078
|
+
shape: shapeType,
|
|
1079
|
+
position: { x: Math.round(sh.x * 100) / 100, y: Math.round(sh.y * 100) / 100 },
|
|
1080
|
+
width: Math.max(0.1, Math.round(sh.width * 100) / 100),
|
|
1081
|
+
height: Math.max(0.1, Math.round(sh.height * 100) / 100)
|
|
1082
|
+
};
|
|
1083
|
+
if (sh.fill) shape.fill = sh.fill;
|
|
1084
|
+
if (sh.stroke) shape.stroke = { color: sh.stroke, width: sh.strokeWidth || 0.3 };
|
|
1085
|
+
if (shapeType === "path" && sh.path) shape.path = sh.path;
|
|
1086
|
+
if (sh.opacity != null && sh.opacity < 0.999) {
|
|
1087
|
+
shape.style = { opacity: Math.round(sh.opacity * 100) / 100 };
|
|
1088
|
+
}
|
|
1089
|
+
elements.push(shape);
|
|
1090
|
+
}
|
|
1091
|
+
const imgs = await extractImages(page, ops.imagePositions, runtime, dataUrlCache);
|
|
1092
|
+
for (const { pos, dataUrl } of imgs) {
|
|
1093
|
+
let resourceKey = resourceKeyByName.get(pos.name);
|
|
1094
|
+
if (!resourceKey) {
|
|
1095
|
+
resourceKey = `img${imgCounter++}`;
|
|
1096
|
+
resourceKeyByName.set(pos.name, resourceKey);
|
|
1097
|
+
const base64 = dataUrl.replace(/^data:image\/[a-zA-Z+]+;base64,/, "");
|
|
1098
|
+
imageResources[resourceKey] = {
|
|
1099
|
+
src: "embedded",
|
|
1100
|
+
mimeType: "image/png",
|
|
1101
|
+
data: base64
|
|
1102
|
+
};
|
|
1103
|
+
}
|
|
1104
|
+
elements.push({
|
|
1105
|
+
type: "image",
|
|
1106
|
+
resource: resourceKey,
|
|
1107
|
+
position: { x: Math.round(pos.x * 100) / 100, y: Math.round(pos.y * 100) / 100 },
|
|
1108
|
+
width: Math.max(1, Math.round(pos.w * 100) / 100),
|
|
1109
|
+
height: Math.max(1, Math.round(pos.h * 100) / 100),
|
|
1110
|
+
fit: "fill"
|
|
1111
|
+
});
|
|
1112
|
+
}
|
|
1113
|
+
for (const l of lines) {
|
|
1114
|
+
const cls = fontMap.get(l.fontName) || classifyFont(l.fontName || "");
|
|
1115
|
+
const style = {
|
|
1116
|
+
fontSize: Math.round(l.fontSize * 10) / 10,
|
|
1117
|
+
fontFamily: cls.family
|
|
1118
|
+
};
|
|
1119
|
+
if (cls.weight === "bold") style.fontWeight = "bold";
|
|
1120
|
+
if (cls.style === "italic") style.fontStyle = "italic";
|
|
1121
|
+
if (l.color !== "#000000") style.color = l.color;
|
|
1122
|
+
if (l.opacity < 0.999) style.opacity = Math.round(l.opacity * 100) / 100;
|
|
1123
|
+
const link = findLinkForRun2(l);
|
|
1124
|
+
const pageWmm = pageW * PT_TO_MM;
|
|
1125
|
+
const measured = Math.max(l.width + l.fontSize * PT_TO_MM * 0.4, l.fontSize * PT_TO_MM);
|
|
1126
|
+
const remaining = Math.max(measured, pageWmm - l.x);
|
|
1127
|
+
const elWidth = Math.min(measured, remaining);
|
|
1128
|
+
const text = {
|
|
1129
|
+
type: "text",
|
|
1130
|
+
content: l.text,
|
|
1131
|
+
position: { x: Math.max(0, Math.round(l.x * 100) / 100), y: Math.max(0, Math.round(l.y * 100) / 100) },
|
|
1132
|
+
width: Math.max(2, Math.round(elWidth * 100) / 100),
|
|
1133
|
+
style
|
|
1134
|
+
};
|
|
1135
|
+
if (cls.weight === "bold") {
|
|
1136
|
+
if (l.fontSize >= 22) text.heading = 1;
|
|
1137
|
+
else if (l.fontSize >= 17) text.heading = 2;
|
|
1138
|
+
else if (l.fontSize >= 16) text.heading = 3;
|
|
1139
|
+
}
|
|
1140
|
+
if (text.heading) text.tocEntry = text.content;
|
|
1141
|
+
if (link) {
|
|
1142
|
+
if (link.url) text.link = link.url;
|
|
1143
|
+
else if (link.destPage != null) text.link = { type: "internal", target: `#page-${link.destPage + 1}` };
|
|
1144
|
+
}
|
|
1145
|
+
elements.push(text);
|
|
1146
|
+
}
|
|
1147
|
+
for (const w of formWidgets) {
|
|
1148
|
+
if (w.pushButton) continue;
|
|
1149
|
+
const baseEl = {
|
|
1150
|
+
name: w.fieldName,
|
|
1151
|
+
position: { x: Math.max(0, Math.round(w.rectMm.x * 100) / 100), y: Math.max(0, Math.round(w.rectMm.y * 100) / 100) },
|
|
1152
|
+
width: Math.max(2, Math.round(w.rectMm.w * 100) / 100),
|
|
1153
|
+
height: Math.max(2, Math.round(w.rectMm.h * 100) / 100)
|
|
1154
|
+
};
|
|
1155
|
+
if (w.readonly) baseEl.readonly = true;
|
|
1156
|
+
if (w.required) baseEl.required = true;
|
|
1157
|
+
if (w.fieldType === "Tx") {
|
|
1158
|
+
if (w.multiLine) {
|
|
1159
|
+
elements.push({ type: "textarea", ...baseEl, value: typeof w.fieldValue === "string" ? w.fieldValue : "" });
|
|
1160
|
+
} else {
|
|
1161
|
+
elements.push({ type: "input", ...baseEl, inputType: "text", value: typeof w.fieldValue === "string" ? w.fieldValue : "" });
|
|
1162
|
+
}
|
|
1163
|
+
} else if (w.fieldType === "Btn" && !w.radio) {
|
|
1164
|
+
const checked = w.fieldValue !== "Off" && !!w.fieldValue;
|
|
1165
|
+
elements.push({ type: "checkbox", ...baseEl, checked });
|
|
1166
|
+
} else if (w.fieldType === "Ch") {
|
|
1167
|
+
const value = typeof w.fieldValue === "string" ? w.fieldValue : "";
|
|
1168
|
+
const values = Array.isArray(w.fieldValue) ? w.fieldValue : void 0;
|
|
1169
|
+
const opts = w.options.length > 0 ? w.options : value ? [{ value }] : [];
|
|
1170
|
+
if (w.multiSelect) {
|
|
1171
|
+
elements.push({ type: "select", ...baseEl, options: opts, multiple: true, values: values ?? (value ? [value] : []) });
|
|
1172
|
+
} else {
|
|
1173
|
+
elements.push({ type: "select", ...baseEl, options: opts, value });
|
|
1174
|
+
}
|
|
1175
|
+
} else if (w.fieldType === "Sig") {
|
|
1176
|
+
elements.push({ type: "signature", ...baseEl, value: "" });
|
|
1177
|
+
}
|
|
1178
|
+
}
|
|
1179
|
+
pages.push({
|
|
1180
|
+
id: `page-${pi}`,
|
|
1181
|
+
pageSize: { width: Math.round(pageW * PT_TO_MM * 100) / 100, height: Math.round(pageH * PT_TO_MM * 100) / 100 },
|
|
1182
|
+
margins: { top: 0, right: 0, bottom: 0, left: 0 },
|
|
1183
|
+
elements
|
|
1184
|
+
});
|
|
1185
|
+
}
|
|
1186
|
+
const result = {
|
|
1187
|
+
$jdf: "1.0.0",
|
|
1188
|
+
meta: {
|
|
1189
|
+
title,
|
|
1190
|
+
pageSize: pages[0]?.pageSize || "A4",
|
|
1191
|
+
unit: "mm",
|
|
1192
|
+
margins: { top: 0, right: 0, bottom: 0, left: 0 }
|
|
1193
|
+
},
|
|
1194
|
+
pages
|
|
1195
|
+
};
|
|
1196
|
+
if (Object.keys(imageResources).length > 0) {
|
|
1197
|
+
result.resources = { images: imageResources };
|
|
1198
|
+
}
|
|
1199
|
+
return result;
|
|
1200
|
+
}
|
|
1201
|
+
|
|
1202
|
+
// ../../packages/jdf-pdf-import/src/node.ts
|
|
1203
|
+
var pdfjsModule = null;
|
|
1204
|
+
var pdfjsLoadPromise = null;
|
|
1205
|
+
async function loadNodePdfJs() {
|
|
1206
|
+
if (pdfjsModule) return pdfjsModule;
|
|
1207
|
+
if (pdfjsLoadPromise) return pdfjsLoadPromise;
|
|
1208
|
+
pdfjsLoadPromise = (async () => {
|
|
1209
|
+
const { createRequire } = await import('module');
|
|
1210
|
+
const require_ = createRequire(import.meta.url);
|
|
1211
|
+
const origWarn = console.warn;
|
|
1212
|
+
console.warn = (...args) => {
|
|
1213
|
+
if (typeof args[0] === "string" && args[0].includes("legacy")) return;
|
|
1214
|
+
origWarn.apply(console, args);
|
|
1215
|
+
};
|
|
1216
|
+
let lib;
|
|
1217
|
+
try {
|
|
1218
|
+
lib = await import('pdfjs-dist/build/pdf.mjs');
|
|
1219
|
+
} finally {
|
|
1220
|
+
console.warn = origWarn;
|
|
1221
|
+
}
|
|
1222
|
+
const workerPath = require_.resolve("pdfjs-dist/build/pdf.worker.mjs");
|
|
1223
|
+
if (lib.GlobalWorkerOptions) {
|
|
1224
|
+
lib.GlobalWorkerOptions.workerSrc = workerPath;
|
|
1225
|
+
}
|
|
1226
|
+
pdfjsModule = lib;
|
|
1227
|
+
return lib;
|
|
1228
|
+
})();
|
|
1229
|
+
return pdfjsLoadPromise;
|
|
1230
|
+
}
|
|
1231
|
+
var canvasModule = null;
|
|
1232
|
+
async function loadCanvas() {
|
|
1233
|
+
if (canvasModule) return canvasModule;
|
|
1234
|
+
try {
|
|
1235
|
+
canvasModule = await import('@napi-rs/canvas');
|
|
1236
|
+
return canvasModule;
|
|
1237
|
+
} catch (err) {
|
|
1238
|
+
throw new Error(
|
|
1239
|
+
"[@jdf/pdf-import/node] @napi-rs/canvas is required for the node entry point. Install it: pnpm add @napi-rs/canvas"
|
|
1240
|
+
);
|
|
1241
|
+
}
|
|
1242
|
+
}
|
|
1243
|
+
function makeNodeEncoder(canvasMod) {
|
|
1244
|
+
return function encodePngNode(width, height, kind, data) {
|
|
1245
|
+
const canvas = canvasMod.createCanvas(width, height);
|
|
1246
|
+
const ctx = canvas.getContext("2d");
|
|
1247
|
+
if (!ctx) return null;
|
|
1248
|
+
const imgData = ctx.createImageData(width, height);
|
|
1249
|
+
const buf = imgData.data;
|
|
1250
|
+
if (kind === 3) {
|
|
1251
|
+
for (let i = 0; i < data.length && i < buf.length; i++) buf[i] = data[i];
|
|
1252
|
+
} else if (kind === 2) {
|
|
1253
|
+
for (let i = 0, j = 0; i < data.length; i += 3, j += 4) {
|
|
1254
|
+
buf[j] = data[i];
|
|
1255
|
+
buf[j + 1] = data[i + 1];
|
|
1256
|
+
buf[j + 2] = data[i + 2];
|
|
1257
|
+
buf[j + 3] = 255;
|
|
1258
|
+
}
|
|
1259
|
+
} else if (kind === 1) {
|
|
1260
|
+
for (let i = 0, j = 0; i < width * height; i++, j += 4) {
|
|
1261
|
+
const byte = data[i >> 3];
|
|
1262
|
+
const bit = byte >> 7 - (i & 7) & 1;
|
|
1263
|
+
const v = bit ? 255 : 0;
|
|
1264
|
+
buf[j] = v;
|
|
1265
|
+
buf[j + 1] = v;
|
|
1266
|
+
buf[j + 2] = v;
|
|
1267
|
+
buf[j + 3] = 255;
|
|
1268
|
+
}
|
|
1269
|
+
} else {
|
|
1270
|
+
for (let i = 0; i < data.length && i < buf.length; i++) buf[i] = data[i];
|
|
1271
|
+
}
|
|
1272
|
+
ctx.putImageData(imgData, 0, 0);
|
|
1273
|
+
try {
|
|
1274
|
+
const png = canvas.toBuffer("image/png");
|
|
1275
|
+
return `data:image/png;base64,${png.toString("base64")}`;
|
|
1276
|
+
} catch {
|
|
1277
|
+
return null;
|
|
1278
|
+
}
|
|
1279
|
+
};
|
|
1280
|
+
}
|
|
1281
|
+
async function importPdfToJdf2(source, title, options = {}) {
|
|
1282
|
+
const pdfjs = options.pdfjs || await loadNodePdfJs();
|
|
1283
|
+
const canvasMod = await loadCanvas();
|
|
1284
|
+
const runtime = {
|
|
1285
|
+
pdfjs,
|
|
1286
|
+
disableWorker: true,
|
|
1287
|
+
createCanvas(width, height) {
|
|
1288
|
+
const canvas = canvasMod.createCanvas(width, height);
|
|
1289
|
+
const context = canvas.getContext("2d");
|
|
1290
|
+
return { canvas, context };
|
|
1291
|
+
},
|
|
1292
|
+
encodePng: makeNodeEncoder(canvasMod),
|
|
1293
|
+
async readFile(filePath) {
|
|
1294
|
+
const buf = await readFile(filePath);
|
|
1295
|
+
return new Uint8Array(buf);
|
|
1296
|
+
}
|
|
1297
|
+
};
|
|
1298
|
+
return importPdfToJdf(source, title, runtime, options);
|
|
1299
|
+
}
|
|
1300
|
+
|
|
412
1301
|
// src/commands/import-pdf.ts
|
|
413
|
-
async function
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
1302
|
+
async function importPdf(inputPath, outputPath, options = {}) {
|
|
1303
|
+
const input = path2.resolve(inputPath);
|
|
1304
|
+
if (!fs.existsSync(input)) {
|
|
1305
|
+
console.error(`File not found: ${input}`);
|
|
1306
|
+
process.exit(1);
|
|
1307
|
+
}
|
|
1308
|
+
console.log(`Importing: ${input}`);
|
|
1309
|
+
const title = path2.basename(input, path2.extname(input));
|
|
1310
|
+
const t0 = Date.now();
|
|
1311
|
+
const doc = await importPdfToJdf2(input, title);
|
|
1312
|
+
console.log(`Parsed in ${((Date.now() - t0) / 1e3).toFixed(1)}s \u2014 ${doc.pages.length} page(s)`);
|
|
1313
|
+
let output;
|
|
1314
|
+
if (outputPath) {
|
|
1315
|
+
output = path2.resolve(outputPath);
|
|
1316
|
+
} else {
|
|
1317
|
+
const stem = input.replace(/\.pdf$/i, "");
|
|
1318
|
+
const wantJdfx = !options.forceJson && shouldUseJdfx(doc);
|
|
1319
|
+
output = stem + (wantJdfx ? ".jdfx" : ".jdf");
|
|
1320
|
+
}
|
|
1321
|
+
console.log(`Output: ${output}`);
|
|
1322
|
+
if (output.toLowerCase().endsWith(".jdfx")) {
|
|
1323
|
+
const { bytes, manifest } = await packJdfx(doc);
|
|
1324
|
+
fs.writeFileSync(output, bytes);
|
|
1325
|
+
console.log(`
|
|
1326
|
+
Done! Created ${doc.pages.length} page(s), ${manifest.assets.length} asset(s) bundled`);
|
|
1327
|
+
} else {
|
|
1328
|
+
fs.writeFileSync(output, JSON.stringify(doc, null, 2));
|
|
1329
|
+
console.log(`
|
|
1330
|
+
Done! Created ${doc.pages.length} page(s)`);
|
|
1331
|
+
}
|
|
1332
|
+
console.log(`Open with: open -a "JDF Reader" "${output}"`);
|
|
1333
|
+
}
|
|
1334
|
+
var ImportJsonError = class extends Error {
|
|
1335
|
+
constructor(message) {
|
|
1336
|
+
super(message);
|
|
1337
|
+
this.name = "ImportJsonError";
|
|
1338
|
+
}
|
|
1339
|
+
};
|
|
1340
|
+
async function importJson(inputPath, outputPath, options = {}) {
|
|
1341
|
+
const input = path2.resolve(inputPath);
|
|
1342
|
+
if (!fs.existsSync(input)) {
|
|
1343
|
+
throw new ImportJsonError(`File not found: ${input}`);
|
|
1344
|
+
}
|
|
1345
|
+
console.log(`Importing: ${input}`);
|
|
1346
|
+
const raw = fs.readFileSync(input, "utf-8");
|
|
1347
|
+
let parsed;
|
|
1348
|
+
try {
|
|
1349
|
+
parsed = JSON.parse(raw);
|
|
1350
|
+
} catch (e) {
|
|
1351
|
+
throw new ImportJsonError(`Not valid JSON: ${e.message}`);
|
|
1352
|
+
}
|
|
1353
|
+
const title = path2.basename(input, path2.extname(input));
|
|
1354
|
+
const doc = normaliseToJdf(parsed, title);
|
|
1355
|
+
let output;
|
|
1356
|
+
if (outputPath) {
|
|
1357
|
+
output = path2.resolve(outputPath);
|
|
1358
|
+
} else {
|
|
1359
|
+
const stem = input.replace(/\.json$/i, "");
|
|
1360
|
+
const wantJdfx = !options.forceJson && shouldUseJdfx(doc);
|
|
1361
|
+
output = stem + (wantJdfx ? ".jdfx" : ".jdf");
|
|
1362
|
+
}
|
|
1363
|
+
console.log(`Output: ${output}`);
|
|
1364
|
+
if (output.toLowerCase().endsWith(".jdfx")) {
|
|
1365
|
+
const { bytes, manifest } = await packJdfx(doc);
|
|
1366
|
+
fs.writeFileSync(output, bytes);
|
|
1367
|
+
console.log(`
|
|
1368
|
+
Done! Created ${doc.pages.length} page(s), ${manifest.assets.length} asset(s) bundled`);
|
|
1369
|
+
} else {
|
|
1370
|
+
fs.writeFileSync(output, JSON.stringify(doc, null, 2));
|
|
1371
|
+
console.log(`
|
|
1372
|
+
Done! Created ${doc.pages.length} page(s)`);
|
|
1373
|
+
}
|
|
1374
|
+
if (!options.skipValidate) {
|
|
1375
|
+
console.log("");
|
|
1376
|
+
const ok = await validate(output);
|
|
1377
|
+
if (!ok) throw new ImportJsonError(`Schema validation failed for ${output}`);
|
|
1378
|
+
}
|
|
1379
|
+
console.log(`Open with: open -a "JDF Reader" "${output}"`);
|
|
1380
|
+
}
|
|
1381
|
+
function normaliseToJdf(input, title) {
|
|
1382
|
+
if (input && typeof input === "object" && typeof input.$jdf === "string" && Array.isArray(input.pages)) {
|
|
1383
|
+
if (input.pages.length === 0) {
|
|
1384
|
+
throw new ImportJsonError("JDF document has zero pages \u2014 `pages` must contain at least one page");
|
|
1385
|
+
}
|
|
1386
|
+
const meta = input.meta && typeof input.meta === "object" ? { title: input.meta.title ?? title, pageSize: "A4", unit: "mm", ...input.meta } : { title, pageSize: "A4", unit: "mm" };
|
|
1387
|
+
return {
|
|
1388
|
+
$jdf: input.$jdf,
|
|
1389
|
+
meta,
|
|
1390
|
+
...input.styles ? { styles: input.styles } : {},
|
|
1391
|
+
...input.resources ? { resources: input.resources } : {},
|
|
1392
|
+
...input.header ? { header: input.header } : {},
|
|
1393
|
+
...input.footer ? { footer: input.footer } : {},
|
|
1394
|
+
pages: input.pages
|
|
1395
|
+
};
|
|
1396
|
+
}
|
|
1397
|
+
if (Array.isArray(input)) {
|
|
1398
|
+
if (input.length === 0) {
|
|
1399
|
+
throw new ImportJsonError("Element array is empty \u2014 wrap at least one element");
|
|
1400
|
+
}
|
|
1401
|
+
return wrapElements(input, title);
|
|
1402
|
+
}
|
|
1403
|
+
if (input && typeof input === "object") {
|
|
1404
|
+
if (Array.isArray(input.pages)) {
|
|
1405
|
+
if (input.pages.length === 0) {
|
|
1406
|
+
throw new ImportJsonError("`pages` is empty \u2014 provide at least one page");
|
|
1407
|
+
}
|
|
1408
|
+
return {
|
|
1409
|
+
$jdf: input.$jdf || "1.0.0",
|
|
1410
|
+
meta: { title, pageSize: "A4", unit: "mm", ...input.meta || {} },
|
|
1411
|
+
...input.styles ? { styles: input.styles } : {},
|
|
1412
|
+
...input.resources ? { resources: input.resources } : {},
|
|
1413
|
+
...input.header ? { header: input.header } : {},
|
|
1414
|
+
...input.footer ? { footer: input.footer } : {},
|
|
1415
|
+
pages: input.pages
|
|
1416
|
+
};
|
|
1417
|
+
}
|
|
1418
|
+
if (Array.isArray(input.elements)) {
|
|
1419
|
+
if (input.elements.length === 0) {
|
|
1420
|
+
throw new ImportJsonError("`elements` is empty \u2014 provide at least one element");
|
|
1421
|
+
}
|
|
1422
|
+
return wrapElements(input.elements, title, input.meta);
|
|
1423
|
+
}
|
|
1424
|
+
}
|
|
1425
|
+
throw new ImportJsonError(
|
|
1426
|
+
"Unrecognised JSON shape \u2014 expected a JDF document, an element array, or { pages: [...] } / { elements: [...] }"
|
|
1427
|
+
);
|
|
1428
|
+
}
|
|
1429
|
+
function wrapElements(elements, title, meta) {
|
|
1430
|
+
return {
|
|
1431
|
+
$jdf: "1.0.0",
|
|
1432
|
+
meta: {
|
|
1433
|
+
title,
|
|
1434
|
+
pageSize: "A4",
|
|
1435
|
+
unit: "mm",
|
|
1436
|
+
...meta || {}
|
|
1437
|
+
},
|
|
1438
|
+
pages: [
|
|
1439
|
+
{
|
|
1440
|
+
id: "page-1",
|
|
1441
|
+
elements
|
|
1442
|
+
}
|
|
1443
|
+
]
|
|
1444
|
+
};
|
|
417
1445
|
}
|
|
418
1446
|
|
|
419
1447
|
// src/index.ts
|
|
420
1448
|
var HELP = `jdf \u2014 JSON Document Format CLI
|
|
421
1449
|
|
|
1450
|
+
The CLI exists for two workflows:
|
|
1451
|
+
\u2022 PDF \u2192 JDF legacy documents become a structured JSON tree your
|
|
1452
|
+
RAG / agent / pipeline can read natively.
|
|
1453
|
+
\u2022 JSON \u2192 JDF LLMs and code emit JSON; this command wraps that JSON
|
|
1454
|
+
into a validated .jdf (or .jdfx) you can ship.
|
|
1455
|
+
|
|
422
1456
|
Usage:
|
|
423
1457
|
jdf validate <file.jdf>
|
|
424
|
-
jdf import <file.{md
|
|
1458
|
+
jdf import <file.{pdf,json,md}> [-o output.{jdf,jdfx}] [--json]
|
|
425
1459
|
jdf --help
|
|
426
1460
|
|
|
427
1461
|
Commands:
|
|
428
|
-
validate Validate a .jdf file against the JDF schema
|
|
429
|
-
import Convert a
|
|
1462
|
+
validate Validate a .jdf / .jdfx file against the JDF schema
|
|
1463
|
+
import Convert a PDF, JSON, or Markdown file into JDF
|
|
1464
|
+
|
|
1465
|
+
Flags:
|
|
1466
|
+
-o, --output <path> Explicit output path (extension picks .jdf vs .jdfx)
|
|
1467
|
+
--json Force pure JSON .jdf output (documents with embedded
|
|
1468
|
+
images stay as a single base64-inlined .jdf instead
|
|
1469
|
+
of a .jdfx bundle). Useful for RAG / CI consumers
|
|
1470
|
+
that prefer one text file over a zip.
|
|
430
1471
|
|
|
431
1472
|
Examples:
|
|
432
1473
|
jdf validate spec/examples/hello-world.jdf
|
|
1474
|
+
jdf import paper.pdf # PDF \u2192 JDF (or .jdfx for images)
|
|
1475
|
+
jdf import contract.pdf --json | jq . # PDF \u2192 pure JSON, pipe-friendly
|
|
1476
|
+
jdf import response.json -o response.jdf # LLM JSON output \u2192 validated JDF
|
|
433
1477
|
jdf import README.md
|
|
434
|
-
jdf import paper.pdf -o paper.jdf
|
|
435
1478
|
`;
|
|
1479
|
+
var BOOLEAN_FLAGS = /* @__PURE__ */ new Set(["help", "h", "json", "verbose", "skip-validate"]);
|
|
436
1480
|
function parseArgs(argv) {
|
|
437
1481
|
const positional = [];
|
|
438
1482
|
const flags = {};
|
|
@@ -448,7 +1492,17 @@ function parseArgs(argv) {
|
|
|
448
1492
|
continue;
|
|
449
1493
|
}
|
|
450
1494
|
if (a.startsWith("--")) {
|
|
451
|
-
const
|
|
1495
|
+
const eq = a.indexOf("=");
|
|
1496
|
+
const k = eq > 0 ? a.slice(2, eq) : a.slice(2);
|
|
1497
|
+
const inlineVal = eq > 0 ? a.slice(eq + 1) : void 0;
|
|
1498
|
+
if (inlineVal !== void 0) {
|
|
1499
|
+
flags[k] = inlineVal;
|
|
1500
|
+
continue;
|
|
1501
|
+
}
|
|
1502
|
+
if (BOOLEAN_FLAGS.has(k)) {
|
|
1503
|
+
flags[k] = true;
|
|
1504
|
+
continue;
|
|
1505
|
+
}
|
|
452
1506
|
const next = argv[i + 1];
|
|
453
1507
|
if (next && !next.startsWith("-")) {
|
|
454
1508
|
flags[k] = next;
|
|
@@ -458,10 +1512,12 @@ function parseArgs(argv) {
|
|
|
458
1512
|
}
|
|
459
1513
|
if (a === "-o" || a === "--output") {
|
|
460
1514
|
const next = argv[i + 1];
|
|
461
|
-
if (next) {
|
|
462
|
-
|
|
463
|
-
|
|
1515
|
+
if (next === void 0 || next.startsWith("-")) {
|
|
1516
|
+
console.error(`Error: ${a} requires a path argument`);
|
|
1517
|
+
process.exit(1);
|
|
464
1518
|
}
|
|
1519
|
+
flags["output"] = next;
|
|
1520
|
+
i++;
|
|
465
1521
|
continue;
|
|
466
1522
|
}
|
|
467
1523
|
positional.push(a);
|
|
@@ -487,20 +1543,25 @@ async function main() {
|
|
|
487
1543
|
case "import": {
|
|
488
1544
|
const input = positional[0];
|
|
489
1545
|
if (!input) {
|
|
490
|
-
console.error("Usage: jdf import <file.{md
|
|
1546
|
+
console.error("Usage: jdf import <file.{pdf,json,md}> [-o output.jdf] [--json]");
|
|
491
1547
|
process.exit(1);
|
|
492
1548
|
}
|
|
493
1549
|
const output = typeof flags.output === "string" ? flags.output : void 0;
|
|
1550
|
+
const forceJson = flags.json === true;
|
|
494
1551
|
const lower = input.toLowerCase();
|
|
495
1552
|
if (lower.endsWith(".md") || lower.endsWith(".markdown")) {
|
|
496
1553
|
await importMarkdown(input, output);
|
|
1554
|
+
process.exit(0);
|
|
497
1555
|
} else if (lower.endsWith(".pdf")) {
|
|
498
|
-
await
|
|
1556
|
+
await importPdf(input, output, { forceJson });
|
|
1557
|
+
process.exit(0);
|
|
1558
|
+
} else if (lower.endsWith(".json")) {
|
|
1559
|
+
await importJson(input, output, { forceJson });
|
|
1560
|
+
process.exit(0);
|
|
499
1561
|
} else {
|
|
500
1562
|
console.error(`Unsupported file type: ${input}`);
|
|
501
1563
|
process.exit(1);
|
|
502
1564
|
}
|
|
503
|
-
break;
|
|
504
1565
|
}
|
|
505
1566
|
default:
|
|
506
1567
|
console.error(`Unknown command: ${command}`);
|
|
@@ -508,7 +1569,11 @@ async function main() {
|
|
|
508
1569
|
process.exit(1);
|
|
509
1570
|
}
|
|
510
1571
|
} catch (e) {
|
|
511
|
-
|
|
1572
|
+
if (e?.name === "ImportJsonError") {
|
|
1573
|
+
console.error(`\u2717 ${e.message}`);
|
|
1574
|
+
} else {
|
|
1575
|
+
console.error(`Error: ${e?.message || e}`);
|
|
1576
|
+
}
|
|
512
1577
|
process.exit(1);
|
|
513
1578
|
}
|
|
514
1579
|
}
|