@uurtech/jdf-cli 0.1.5 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -5,6 +5,8 @@ import { fileURLToPath } from 'url';
5
5
  import Ajv from 'ajv';
6
6
  import addFormats from 'ajv-formats';
7
7
  import JSZip from 'jszip';
8
+ import { createHash } from 'crypto';
9
+ import { readFile } from 'fs/promises';
8
10
 
9
11
  // ../../packages/jdf-core/src/manifest.ts
10
12
  var JDFX_MANIFEST_VERSION = "1.0.0";
@@ -38,7 +40,14 @@ async function loadDocument(filePath) {
38
40
  } catch {
39
41
  }
40
42
  }
41
- const assetCount = Object.values(zip.files).filter((f) => !f.dir && f.name.startsWith("assets/")).length;
43
+ const assetPrefix = `${JDFX_ASSET_DIR}/`;
44
+ const assetCount = Object.values(zip.files).filter((f) => !f.dir && f.name.startsWith(assetPrefix)).length;
45
+ for (const fname of Object.keys(zip.files)) {
46
+ if (fname.includes("..") || fname.startsWith("/")) {
47
+ console.error(`\u2717 Refusing zip entry with traversal: ${fname}`);
48
+ return null;
49
+ }
50
+ }
42
51
  return { doc, bundle: { manifest, assetCount } };
43
52
  }
44
53
  return { doc: JSON.parse(fs.readFileSync(filePath, "utf-8")) };
@@ -95,21 +104,59 @@ var GENERATOR = "@uurtech/jdf-cli";
95
104
  function decodeBase64(s) {
96
105
  return Buffer.from(s, "base64");
97
106
  }
107
+ function hashBytes(bytes) {
108
+ return createHash("sha1").update(bytes).digest("hex").slice(0, 16);
109
+ }
110
+ function rewriteResourceRefs(doc, oldKey, newKey) {
111
+ function walk(els) {
112
+ if (!els) return;
113
+ for (const el of els) {
114
+ if (el?.resource === oldKey) el.resource = newKey;
115
+ if (el?.elements) walk(el.elements);
116
+ if (el?.children) walk(el.children);
117
+ }
118
+ }
119
+ for (const page of doc.pages || []) walk(page.elements);
120
+ }
98
121
  function extractAssets(doc) {
99
122
  const assets = [];
100
123
  const cloned = JSON.parse(JSON.stringify(doc));
101
- let counter = 0;
124
+ const usedIds = /* @__PURE__ */ new Set();
125
+ const hashToId = /* @__PURE__ */ new Map();
126
+ let inlineCounter = 0;
127
+ if (cloned.resources?.images) {
128
+ for (const key of Object.keys(cloned.resources.images)) {
129
+ usedIds.add(key);
130
+ }
131
+ }
132
+ function nextInlineId() {
133
+ while (true) {
134
+ inlineCounter++;
135
+ const id = `asset-${inlineCounter}`;
136
+ if (!usedIds.has(id)) {
137
+ usedIds.add(id);
138
+ return id;
139
+ }
140
+ }
141
+ }
142
+ function intern(bytes, mimeType, ext) {
143
+ const h = hashBytes(bytes);
144
+ const existing = hashToId.get(h);
145
+ if (existing) return existing;
146
+ const id = nextInlineId();
147
+ hashToId.set(h, id);
148
+ assets.push({ id, bytes, mimeType, ext });
149
+ return id;
150
+ }
102
151
  function walk(els) {
103
152
  if (!els) return;
104
153
  for (const el of els) {
105
154
  if (el?.type === "image" && typeof el.src === "string" && el.src.startsWith("data:")) {
106
155
  const m = el.src.match(/^data:([^;]+);base64,(.*)$/);
107
156
  if (m) {
108
- counter++;
109
- const id = `asset-${counter}`;
110
157
  const mimeType = m[1];
111
158
  const ext = mimeType.split("/")[1]?.replace("jpeg", "jpg") || "bin";
112
- assets.push({ id, bytes: decodeBase64(m[2]), mimeType, ext });
159
+ const id = intern(decodeBase64(m[2]), mimeType, ext);
113
160
  delete el.src;
114
161
  el.resource = id;
115
162
  }
@@ -127,11 +174,25 @@ function extractAssets(doc) {
127
174
  const b64 = m ? m[2] : data;
128
175
  const mimeType = m ? m[1] : res.mimeType || "image/png";
129
176
  const ext = mimeType.split("/")[1]?.replace("jpeg", "jpg") || "bin";
130
- counter++;
131
- const id = key || `asset-${counter}`;
132
- assets.push({ id, bytes: decodeBase64(b64), mimeType, ext });
177
+ const bytes = decodeBase64(b64);
178
+ const h = hashBytes(bytes);
179
+ let canonicalId = hashToId.get(h);
180
+ if (!canonicalId) {
181
+ canonicalId = key;
182
+ hashToId.set(h, canonicalId);
183
+ usedIds.add(canonicalId);
184
+ assets.push({ id: canonicalId, bytes, mimeType, ext });
185
+ }
186
+ const updated = { ...res };
187
+ delete updated.data;
188
+ updated.src = "embedded";
189
+ if (canonicalId !== key) {
190
+ delete cloned.resources.images[key];
191
+ rewriteResourceRefs(cloned, key, canonicalId);
192
+ } else {
193
+ cloned.resources.images[key] = updated;
194
+ }
133
195
  }
134
- cloned.resources.images = {};
135
196
  }
136
197
  return { doc: cloned, assets };
137
198
  }
@@ -257,8 +318,12 @@ function convertMarkdownToJdf(md, title, baseDir = process.cwd()) {
257
318
  content: text,
258
319
  position: { x: 0, y },
259
320
  width: contentWidth,
260
- heading: true,
321
+ // Emit the actual H1..H6 level instead of `heading: true`. RAG
322
+ // chunkers and the JDF TOC builder both key on the numeric level for
323
+ // hierarchy; the boolean form collapsed every heading to one bucket.
324
+ heading: level,
261
325
  tocEntry: text,
326
+ tocLevel: level,
262
327
  style: { fontFamily: "Inter", fontSize, fontWeight: "bold", color: level <= 2 ? "#0f172a" : "#1e293b" }
263
328
  });
264
329
  y += height2 + 4;
@@ -339,12 +404,20 @@ function convertMarkdownToJdf(md, title, baseDir = process.cwd()) {
339
404
  }
340
405
  if (line.startsWith("```")) {
341
406
  const codeLines = [];
407
+ const fenceLine = i;
342
408
  i++;
409
+ let closed = false;
343
410
  while (i < lines.length && !lines[i].startsWith("```")) {
344
411
  codeLines.push(lines[i]);
345
412
  i++;
346
413
  }
347
- i++;
414
+ if (i < lines.length) {
415
+ closed = true;
416
+ i++;
417
+ }
418
+ if (!closed) {
419
+ console.warn(`[jdf-cli] warning: unterminated code fence at line ${fenceLine + 1} \u2014 content to EOF treated as code`);
420
+ }
348
421
  const code = codeLines.join("\n");
349
422
  const height2 = codeLines.length * 4 + 8;
350
423
  if (y + height2 > maxY) {
@@ -369,7 +442,7 @@ function convertMarkdownToJdf(md, title, baseDir = process.cwd()) {
369
442
  continue;
370
443
  }
371
444
  let para = "";
372
- while (i < lines.length && lines[i].trim() !== "" && !lines[i].match(/^#{1,6}\s/) && !lines[i].match(/^\s*[-*+]\s/) && !lines[i].startsWith("```")) {
445
+ while (i < lines.length && lines[i].trim() !== "" && !lines[i].match(/^#{1,6}\s/) && !lines[i].match(/^\s*[-*+]\s/) && !lines[i].match(/^\s*\d+\.\s/) && !lines[i].startsWith("```")) {
373
446
  para += (para ? " " : "") + lines[i].trim();
374
447
  i++;
375
448
  }
@@ -409,30 +482,1001 @@ function convertMarkdownToJdf(md, title, baseDir = process.cwd()) {
409
482
  };
410
483
  }
411
484
 
485
+ // ../../packages/jdf-pdf-import/src/core.ts
486
+ var PT_TO_MM = 0.352778;
487
+ function classifyFont(name) {
488
+ const n = (name || "").toLowerCase();
489
+ const bold = /bold|black|heavy|semibold|demibold|extrabold/.test(n);
490
+ const italic = /italic|oblique/.test(n);
491
+ let family = "Inter, Helvetica, Arial, sans-serif";
492
+ if (n.includes("times") || n.includes("serif") || n.includes("roman") || n.includes("georgia") || n.includes("garamond") || n.includes("baskerville")) {
493
+ family = "Times New Roman, serif";
494
+ } else if (n.includes("courier") || n.includes("mono") || n.includes("consolas") || n.includes("menlo") || n.includes("source code") || n.includes("fira code")) {
495
+ family = "JetBrains Mono, ui-monospace, monospace";
496
+ } else if (n.includes("helvetica") || n.includes("arial") || n.includes("sans") || n.includes("roboto") || n.includes("inter") || n.includes("noto")) {
497
+ family = "Inter, Helvetica, Arial, sans-serif";
498
+ }
499
+ return {
500
+ family,
501
+ weight: bold ? "bold" : void 0,
502
+ style: italic ? "italic" : void 0
503
+ };
504
+ }
505
+ function clampByte(v) {
506
+ return Math.max(0, Math.min(255, Math.round(v)));
507
+ }
508
+ function rgbToHex(r, g, b) {
509
+ const h = (n) => clampByte(n).toString(16).padStart(2, "0");
510
+ return `#${h(r)}${h(g)}${h(b)}`;
511
+ }
512
+ function multiplyCtm(a, b) {
513
+ return [
514
+ a[0] * b[0] + a[1] * b[2],
515
+ a[0] * b[1] + a[1] * b[3],
516
+ a[2] * b[0] + a[3] * b[2],
517
+ a[2] * b[1] + a[3] * b[3],
518
+ a[4] * b[0] + a[5] * b[2] + b[4],
519
+ a[4] * b[1] + a[5] * b[3] + b[5]
520
+ ];
521
+ }
522
+ function tx(ctm, x, y) {
523
+ return { x: ctm[0] * x + ctm[2] * y + ctm[4], y: ctm[1] * x + ctm[3] * y + ctm[5] };
524
+ }
525
+ async function walkOps(page, OPS, viewport) {
526
+ const toViewport = (x, y) => {
527
+ const [vx, vy] = viewport.convertToViewportPoint(x, y);
528
+ return { x: vx, y: vy };
529
+ };
530
+ const opList = await page.getOperatorList();
531
+ const fnArr = opList.fnArray;
532
+ const argsArr = opList.argsArray;
533
+ const gs = {
534
+ ctm: [1, 0, 0, 1, 0, 0],
535
+ fill: "#000000",
536
+ stroke: "#000000",
537
+ lineWidth: 1,
538
+ fillAlpha: 1,
539
+ strokeAlpha: 1,
540
+ textRenderingMode: 0
541
+ };
542
+ const stack = [];
543
+ const textColors = [];
544
+ const textOpacities = [];
545
+ const textRenderingModes = [];
546
+ const shapes = [];
547
+ const imagePositions = [];
548
+ let textIdx = 0;
549
+ let pathSegments = [];
550
+ let pathRect = null;
551
+ let pathStart = null;
552
+ let pathLast = null;
553
+ function flushPath(isFill, isStroke) {
554
+ if (pathRect) {
555
+ const tl = toViewport(pathRect.x, pathRect.y + pathRect.h);
556
+ const br = toViewport(pathRect.x + pathRect.w, pathRect.y);
557
+ const x = Math.min(tl.x, br.x);
558
+ const y = Math.min(tl.y, br.y);
559
+ const w = Math.abs(br.x - tl.x);
560
+ const h = Math.abs(br.y - tl.y);
561
+ shapes.push({
562
+ kind: "rect",
563
+ x: x * PT_TO_MM,
564
+ y: y * PT_TO_MM,
565
+ width: w * PT_TO_MM,
566
+ height: h * PT_TO_MM,
567
+ fill: isFill ? gs.fill : void 0,
568
+ stroke: isStroke ? gs.stroke : void 0,
569
+ strokeWidth: isStroke ? gs.lineWidth * PT_TO_MM : void 0,
570
+ opacity: isFill ? gs.fillAlpha : gs.strokeAlpha
571
+ });
572
+ } else if (pathSegments.length === 2 && pathSegments[0].type === "M" && pathSegments[1].type === "L") {
573
+ const a = pathSegments[0].pts;
574
+ const b = pathSegments[1].pts;
575
+ const va = toViewport(a[0], a[1]);
576
+ const vb = toViewport(b[0], b[1]);
577
+ const minX = Math.min(va.x, vb.x);
578
+ const minY = Math.min(va.y, vb.y);
579
+ const maxX = Math.max(va.x, vb.x);
580
+ const maxY = Math.max(va.y, vb.y);
581
+ const x1Local = (va.x - minX) * PT_TO_MM;
582
+ const y1Local = (va.y - minY) * PT_TO_MM;
583
+ const x2Local = (vb.x - minX) * PT_TO_MM;
584
+ const y2Local = (vb.y - minY) * PT_TO_MM;
585
+ shapes.push({
586
+ kind: "path",
587
+ x: minX * PT_TO_MM,
588
+ y: minY * PT_TO_MM,
589
+ width: Math.max(0.05, (maxX - minX) * PT_TO_MM),
590
+ height: Math.max(0.05, (maxY - minY) * PT_TO_MM),
591
+ stroke: isStroke ? gs.stroke : void 0,
592
+ strokeWidth: isStroke ? gs.lineWidth * PT_TO_MM : void 0,
593
+ opacity: gs.strokeAlpha,
594
+ path: `M ${x1Local.toFixed(2)} ${y1Local.toFixed(2)} L ${x2Local.toFixed(2)} ${y2Local.toFixed(2)}`
595
+ });
596
+ } else if (pathSegments.length > 0) {
597
+ const vpSegments = pathSegments.map((seg) => {
598
+ if (seg.type === "Z") return seg;
599
+ const out = [];
600
+ for (let i = 0; i < seg.pts.length; i += 2) {
601
+ const v = toViewport(seg.pts[i], seg.pts[i + 1]);
602
+ out.push(v.x, v.y);
603
+ }
604
+ return { type: seg.type, pts: out };
605
+ });
606
+ let minX = Infinity, minY = Infinity, maxX = -Infinity, maxY = -Infinity;
607
+ for (const seg of vpSegments) {
608
+ for (let i = 0; i < seg.pts.length; i += 2) {
609
+ const x = seg.pts[i];
610
+ const y = seg.pts[i + 1];
611
+ if (x < minX) minX = x;
612
+ if (x > maxX) maxX = x;
613
+ if (y < minY) minY = y;
614
+ if (y > maxY) maxY = y;
615
+ }
616
+ }
617
+ if (isFinite(minX) && isFinite(minY)) {
618
+ const bw = maxX - minX;
619
+ const bh = maxY - minY;
620
+ const d = vpSegments.map((seg) => {
621
+ if (seg.type === "Z") return "Z";
622
+ const p = [];
623
+ for (let i = 0; i < seg.pts.length; i += 2) {
624
+ p.push(((seg.pts[i] - minX) * PT_TO_MM).toFixed(2));
625
+ p.push(((seg.pts[i + 1] - minY) * PT_TO_MM).toFixed(2));
626
+ }
627
+ return `${seg.type} ${p.join(" ")}`;
628
+ }).join(" ");
629
+ shapes.push({
630
+ kind: "path",
631
+ x: minX * PT_TO_MM,
632
+ y: minY * PT_TO_MM,
633
+ width: bw * PT_TO_MM,
634
+ height: bh * PT_TO_MM,
635
+ fill: isFill ? gs.fill : void 0,
636
+ stroke: isStroke ? gs.stroke : void 0,
637
+ strokeWidth: isStroke ? gs.lineWidth * PT_TO_MM : void 0,
638
+ opacity: isFill ? gs.fillAlpha : gs.strokeAlpha,
639
+ path: d
640
+ });
641
+ }
642
+ }
643
+ pathSegments = [];
644
+ pathRect = null;
645
+ pathStart = null;
646
+ pathLast = null;
647
+ }
648
+ for (let i = 0; i < fnArr.length; i++) {
649
+ const fn = fnArr[i];
650
+ const args = argsArr[i] || [];
651
+ if (fn === OPS.save) {
652
+ stack.push({ ctm: [...gs.ctm], fill: gs.fill, stroke: gs.stroke, lineWidth: gs.lineWidth, fillAlpha: gs.fillAlpha, strokeAlpha: gs.strokeAlpha, textRenderingMode: gs.textRenderingMode });
653
+ } else if (fn === OPS.restore) {
654
+ const s = stack.pop();
655
+ if (s) Object.assign(gs, s);
656
+ } else if (fn === OPS.transform) {
657
+ gs.ctm = multiplyCtm(gs.ctm, args);
658
+ } else if (fn === OPS.setFillRGBColor) {
659
+ gs.fill = rgbToHex(args[0], args[1], args[2]);
660
+ } else if (fn === OPS.setStrokeRGBColor) {
661
+ gs.stroke = rgbToHex(args[0], args[1], args[2]);
662
+ } else if (fn === OPS.setFillGray) {
663
+ gs.fill = rgbToHex(args[0], args[0], args[0]);
664
+ } else if (fn === OPS.setStrokeGray) {
665
+ gs.stroke = rgbToHex(args[0], args[0], args[0]);
666
+ } else if (fn === OPS.setFillCMYKColor || fn === OPS.setStrokeCMYKColor) {
667
+ const c = args[0], m = args[1], y = args[2], k = args[3];
668
+ const r = (1 - c) * (1 - k) * 255;
669
+ const g = (1 - m) * (1 - k) * 255;
670
+ const b = (1 - y) * (1 - k) * 255;
671
+ const hex = rgbToHex(r, g, b);
672
+ if (fn === OPS.setFillCMYKColor) gs.fill = hex;
673
+ else gs.stroke = hex;
674
+ } else if (fn === OPS.setLineWidth) {
675
+ gs.lineWidth = args[0];
676
+ } else if (fn === OPS.setTextRenderingMode) {
677
+ gs.textRenderingMode = args[0];
678
+ } else if (fn === OPS.setGState) {
679
+ const dict = args[0];
680
+ if (Array.isArray(dict)) {
681
+ for (const entry of dict) {
682
+ if (!Array.isArray(entry)) continue;
683
+ const [key, val] = entry;
684
+ if (key === "LW") gs.lineWidth = val;
685
+ else if (key === "ca") gs.fillAlpha = val;
686
+ else if (key === "CA") gs.strokeAlpha = val;
687
+ }
688
+ }
689
+ } else if (fn === OPS.showText || fn === OPS.showSpacedText || fn === OPS.nextLineShowText || fn === OPS.nextLineSetSpacingShowText) {
690
+ textColors[textIdx] = gs.fill;
691
+ textOpacities[textIdx] = gs.fillAlpha;
692
+ textRenderingModes[textIdx] = gs.textRenderingMode;
693
+ textIdx++;
694
+ } else if (fn === OPS.rectangle) {
695
+ const [x, y, w, h] = args;
696
+ const p1 = tx(gs.ctm, x, y);
697
+ const p3 = tx(gs.ctm, x + w, y + h);
698
+ pathRect = {
699
+ x: Math.min(p1.x, p3.x),
700
+ y: Math.min(p1.y, p3.y),
701
+ w: Math.abs(p3.x - p1.x),
702
+ h: Math.abs(p3.y - p1.y)
703
+ };
704
+ } else if (fn === OPS.constructPath) {
705
+ const [pathOps, pathArgs] = args;
706
+ let ai = 0;
707
+ for (const op of pathOps) {
708
+ if (op === OPS.moveTo) {
709
+ const p = tx(gs.ctm, pathArgs[ai], pathArgs[ai + 1]);
710
+ ai += 2;
711
+ pathSegments.push({ type: "M", pts: [p.x, p.y] });
712
+ pathStart = { x: p.x, y: p.y };
713
+ pathLast = { x: p.x, y: p.y };
714
+ } else if (op === OPS.lineTo) {
715
+ const p = tx(gs.ctm, pathArgs[ai], pathArgs[ai + 1]);
716
+ ai += 2;
717
+ pathSegments.push({ type: "L", pts: [p.x, p.y] });
718
+ pathLast = { x: p.x, y: p.y };
719
+ } else if (op === OPS.curveTo) {
720
+ const p1 = tx(gs.ctm, pathArgs[ai], pathArgs[ai + 1]);
721
+ ai += 2;
722
+ const p2 = tx(gs.ctm, pathArgs[ai], pathArgs[ai + 1]);
723
+ ai += 2;
724
+ const p3 = tx(gs.ctm, pathArgs[ai], pathArgs[ai + 1]);
725
+ ai += 2;
726
+ pathSegments.push({ type: "C", pts: [p1.x, p1.y, p2.x, p2.y, p3.x, p3.y] });
727
+ pathLast = { x: p3.x, y: p3.y };
728
+ } else if (op === OPS.curveTo2) {
729
+ const p2 = tx(gs.ctm, pathArgs[ai], pathArgs[ai + 1]);
730
+ ai += 2;
731
+ const p3 = tx(gs.ctm, pathArgs[ai], pathArgs[ai + 1]);
732
+ ai += 2;
733
+ const p1 = pathLast || { x: 0, y: 0 };
734
+ pathSegments.push({ type: "C", pts: [p1.x, p1.y, p2.x, p2.y, p3.x, p3.y] });
735
+ pathLast = { x: p3.x, y: p3.y };
736
+ } else if (op === OPS.curveTo3) {
737
+ const p1 = tx(gs.ctm, pathArgs[ai], pathArgs[ai + 1]);
738
+ ai += 2;
739
+ const p3 = tx(gs.ctm, pathArgs[ai], pathArgs[ai + 1]);
740
+ ai += 2;
741
+ pathSegments.push({ type: "C", pts: [p1.x, p1.y, p3.x, p3.y, p3.x, p3.y] });
742
+ pathLast = { x: p3.x, y: p3.y };
743
+ } else if (op === OPS.closePath) {
744
+ pathSegments.push({ type: "Z", pts: [] });
745
+ if (pathStart) pathLast = { ...pathStart };
746
+ }
747
+ }
748
+ } else if (fn === OPS.fill || fn === OPS.stroke || fn === OPS.fillStroke || fn === OPS.eoFill || fn === OPS.eoFillStroke || fn === OPS.closeFillStroke || fn === OPS.closeStroke || fn === OPS.closeEOFillStroke) {
749
+ const isFill = fn === OPS.fill || fn === OPS.fillStroke || fn === OPS.eoFill || fn === OPS.eoFillStroke || fn === OPS.closeFillStroke || fn === OPS.closeEOFillStroke;
750
+ const isStroke = fn === OPS.stroke || fn === OPS.fillStroke || fn === OPS.eoFillStroke || fn === OPS.closeFillStroke || fn === OPS.closeStroke || fn === OPS.closeEOFillStroke;
751
+ flushPath(isFill, isStroke);
752
+ } else if (fn === OPS.endPath || fn === OPS.clip || fn === OPS.eoClip) {
753
+ pathSegments = [];
754
+ pathRect = null;
755
+ pathStart = null;
756
+ pathLast = null;
757
+ } else if (fn === OPS.paintImageXObject || fn === OPS.paintImageMaskXObject || fn === OPS.paintInlineImageXObject) {
758
+ const name = args[0];
759
+ const c = gs.ctm;
760
+ const corners = [tx(c, 0, 0), tx(c, 1, 0), tx(c, 1, 1), tx(c, 0, 1)];
761
+ const vpCorners = corners.map((p) => toViewport(p.x, p.y));
762
+ const xs = vpCorners.map((p) => p.x), ys = vpCorners.map((p) => p.y);
763
+ const minX = Math.min(...xs), maxX = Math.max(...xs);
764
+ const minY = Math.min(...ys), maxY = Math.max(...ys);
765
+ imagePositions.push({
766
+ name,
767
+ x: minX * PT_TO_MM,
768
+ y: minY * PT_TO_MM,
769
+ w: (maxX - minX) * PT_TO_MM,
770
+ h: (maxY - minY) * PT_TO_MM
771
+ });
772
+ }
773
+ }
774
+ return { textColors, textOpacities, textRenderingModes, shapes, imagePositions };
775
+ }
776
+ async function extractImages(page, positions, runtime, dataUrlCache) {
777
+ const tasks = positions.map(async (pos) => {
778
+ if (dataUrlCache.has(pos.name)) {
779
+ return { pos, dataUrl: dataUrlCache.get(pos.name) };
780
+ }
781
+ let imgObj = null;
782
+ try {
783
+ imgObj = await new Promise((resolve) => {
784
+ let settled2 = false;
785
+ const timer = setTimeout(() => done(null), 250);
786
+ const done = (v) => {
787
+ if (settled2) return;
788
+ settled2 = true;
789
+ clearTimeout(timer);
790
+ resolve(v);
791
+ };
792
+ try {
793
+ page.objs.get(pos.name, (img) => done(img));
794
+ } catch {
795
+ try {
796
+ page.commonObjs.get(pos.name, (img) => done(img));
797
+ } catch {
798
+ done(null);
799
+ }
800
+ }
801
+ });
802
+ } catch {
803
+ imgObj = null;
804
+ }
805
+ if (!imgObj || !imgObj.data || !imgObj.width || !imgObj.height) return null;
806
+ const dataUrl = runtime.encodePng(imgObj.width, imgObj.height, imgObj.kind || 0, imgObj.data);
807
+ if (!dataUrl) return null;
808
+ dataUrlCache.set(pos.name, dataUrl);
809
+ return { pos, dataUrl };
810
+ });
811
+ const settled = await Promise.all(tasks);
812
+ return settled.filter((x) => x !== null);
813
+ }
814
+ async function extractLinks(page, viewport) {
815
+ const out = [];
816
+ let annots = [];
817
+ try {
818
+ annots = await page.getAnnotations();
819
+ } catch {
820
+ return out;
821
+ }
822
+ const conv = (x, y) => {
823
+ const [vx, vy] = viewport.convertToViewportPoint(x, y);
824
+ return { x: vx, y: vy };
825
+ };
826
+ for (const a of annots) {
827
+ if (a.subtype !== "Link") continue;
828
+ if (!a.rect || a.rect.length < 4) continue;
829
+ const [x1, y1, x2, y2] = a.rect;
830
+ const c1 = conv(x1, y1);
831
+ const c2 = conv(x2, y2);
832
+ const xMin = Math.min(c1.x, c2.x);
833
+ const yMin = Math.min(c1.y, c2.y);
834
+ const xMax = Math.max(c1.x, c2.x);
835
+ const yMax = Math.max(c1.y, c2.y);
836
+ const rectMm = {
837
+ x: xMin * PT_TO_MM,
838
+ y: yMin * PT_TO_MM,
839
+ w: (xMax - xMin) * PT_TO_MM,
840
+ h: (yMax - yMin) * PT_TO_MM
841
+ };
842
+ const url = a.url || a.unsafeUrl;
843
+ out.push({ rectMm, url });
844
+ }
845
+ return out;
846
+ }
847
+ async function extractFormWidgets(page, viewport) {
848
+ const out = [];
849
+ let annots = [];
850
+ try {
851
+ annots = await page.getAnnotations();
852
+ } catch {
853
+ return out;
854
+ }
855
+ const conv = (x, y) => {
856
+ const [vx, vy] = viewport.convertToViewportPoint(x, y);
857
+ return { x: vx, y: vy };
858
+ };
859
+ for (const a of annots) {
860
+ if (a.subtype !== "Widget") continue;
861
+ if (!a.rect || a.rect.length < 4) continue;
862
+ const [x1, y1, x2, y2] = a.rect;
863
+ const c1 = conv(x1, y1);
864
+ const c2 = conv(x2, y2);
865
+ const xMin = Math.min(c1.x, c2.x);
866
+ const yMin = Math.min(c1.y, c2.y);
867
+ const xMax = Math.max(c1.x, c2.x);
868
+ const yMax = Math.max(c1.y, c2.y);
869
+ const flags = typeof a.fieldFlags === "number" ? a.fieldFlags : 0;
870
+ const options = Array.isArray(a.options) ? a.options.map((o) => ({
871
+ value: typeof o?.exportValue === "string" ? o.exportValue : typeof o?.value === "string" ? o.value : "",
872
+ label: typeof o?.displayValue === "string" ? o.displayValue : void 0
873
+ })).filter((o) => o.value !== "") : [];
874
+ out.push({
875
+ rectMm: {
876
+ x: xMin * PT_TO_MM,
877
+ y: yMin * PT_TO_MM,
878
+ w: (xMax - xMin) * PT_TO_MM,
879
+ h: (yMax - yMin) * PT_TO_MM
880
+ },
881
+ fieldType: a.fieldType || "",
882
+ fieldName: a.fieldName || `field-${out.length + 1}`,
883
+ fieldValue: a.fieldValue ?? a.buttonValue ?? "",
884
+ multiLine: (flags & 4096) !== 0,
885
+ multiSelect: (flags & 2097152) !== 0,
886
+ combo: (flags & 131072) !== 0,
887
+ pushButton: (flags & 65536) !== 0,
888
+ radio: (flags & 32768) !== 0,
889
+ options,
890
+ readonly: (flags & 1) !== 0,
891
+ required: (flags & 2) !== 0
892
+ });
893
+ }
894
+ return out;
895
+ }
896
+ async function flattenOutline(doc, outline) {
897
+ if (!outline) return [];
898
+ const out = [];
899
+ async function walk(items) {
900
+ for (const item of items) {
901
+ try {
902
+ let dest = item.dest;
903
+ if (typeof dest === "string") {
904
+ dest = await doc.getDestination(dest);
905
+ }
906
+ if (Array.isArray(dest) && dest[0]) {
907
+ const ref = dest[0];
908
+ const idx = await doc.getPageIndex(ref);
909
+ if (typeof idx === "number") out.push({ title: item.title, pageIndex: idx });
910
+ }
911
+ } catch {
912
+ }
913
+ if (item.items?.length) await walk(item.items);
914
+ }
915
+ }
916
+ await walk(outline);
917
+ return out;
918
+ }
919
+ async function importPdfToJdf(source, title, runtime, options = {}) {
920
+ const pdfjs = options.pdfjs || runtime.pdfjs;
921
+ if (!pdfjs) {
922
+ throw new Error("[@jdf/pdf-import] runtime did not provide a pdfjs module");
923
+ }
924
+ const OPS = pdfjs.OPS;
925
+ let data;
926
+ if (typeof source === "string") {
927
+ if (source.startsWith("data:") || source.startsWith("http")) {
928
+ const r = await fetch(source);
929
+ data = await r.arrayBuffer();
930
+ } else {
931
+ if (!runtime.readFile) {
932
+ throw new Error("[@jdf/pdf-import] cannot read file path: runtime has no readFile()");
933
+ }
934
+ data = await runtime.readFile(source);
935
+ }
936
+ } else {
937
+ data = source;
938
+ }
939
+ const doc = await pdfjs.getDocument({
940
+ data,
941
+ // The runtime adapter declares whether it supports a real Web Worker.
942
+ // We don't sniff `typeof Worker` here because Node 22+ exposes a global
943
+ // `Worker` (worker_threads) that isn't compatible with PDF.js's worker
944
+ // protocol — the sniff would silently re-enable the broken path on
945
+ // newer Node. Browser entry leaves this unset (= false = real worker
946
+ // via GlobalWorkerOptions.workerSrc); node entry sets `true`.
947
+ disableWorker: runtime.disableWorker === true,
948
+ isEvalSupported: false
949
+ }).promise;
950
+ const pages = [];
951
+ const imageResources = {};
952
+ let imgCounter = 0;
953
+ const dataUrlCache = /* @__PURE__ */ new Map();
954
+ const resourceKeyByName = /* @__PURE__ */ new Map();
955
+ const outline = await doc.getOutline().catch(() => null);
956
+ await flattenOutline(doc, outline);
957
+ for (let pi = 1; pi <= doc.numPages; pi++) {
958
+ let findLinkForRun2 = function(r) {
959
+ const cx = r.x + r.width / 2;
960
+ const cy = r.y + r.height / 2;
961
+ for (const a of links) {
962
+ if (cx >= a.rectMm.x && cx <= a.rectMm.x + a.rectMm.w && cy >= a.rectMm.y && cy <= a.rectMm.y + a.rectMm.h) {
963
+ return a;
964
+ }
965
+ }
966
+ return null;
967
+ };
968
+ const page = await doc.getPage(pi);
969
+ const viewport = page.getViewport({ scale: 1 });
970
+ const pageW = viewport.width;
971
+ const pageH = viewport.height;
972
+ const { canvas, context } = runtime.createCanvas(Math.ceil(pageW), Math.ceil(pageH));
973
+ try {
974
+ await page.render({ canvasContext: context, viewport, canvas }).promise;
975
+ } catch {
976
+ }
977
+ const ops = await walkOps(page, OPS, viewport);
978
+ const links = await extractLinks(page, viewport);
979
+ const formWidgets = await extractFormWidgets(page, viewport);
980
+ const textContent = await page.getTextContent({ disableCombineTextItems: false });
981
+ const items = textContent.items;
982
+ const fontMap = /* @__PURE__ */ new Map();
983
+ for (const k of Object.keys(textContent.styles || {})) {
984
+ const s = textContent.styles[k];
985
+ let realName = s.fontFamily || k;
986
+ try {
987
+ const has = typeof page.commonObjs.has === "function" ? page.commonObjs.has(k) : false;
988
+ if (has) {
989
+ await new Promise((resolve) => {
990
+ let settled = false;
991
+ const done = () => {
992
+ if (!settled) {
993
+ settled = true;
994
+ resolve();
995
+ }
996
+ };
997
+ try {
998
+ page.commonObjs.get(k, (font) => {
999
+ if (font?.name) realName = font.name;
1000
+ else if (font?.loadedName) realName = font.loadedName;
1001
+ done();
1002
+ });
1003
+ } catch {
1004
+ done();
1005
+ }
1006
+ setTimeout(done, 100);
1007
+ });
1008
+ }
1009
+ } catch {
1010
+ }
1011
+ const cls = classifyFont(realName);
1012
+ if (!cls.weight && /bold/i.test(s.fontFamily || "")) cls.weight = "bold";
1013
+ if (!cls.style && /italic|oblique/i.test(s.fontFamily || "")) cls.style = "italic";
1014
+ fontMap.set(k, cls);
1015
+ }
1016
+ const runs = [];
1017
+ const safeNum = (v, fallback) => {
1018
+ const n = typeof v === "number" ? v : Number(v);
1019
+ return Number.isFinite(n) ? n : fallback;
1020
+ };
1021
+ items.forEach((it, idx) => {
1022
+ if (!it.str || !it.str.length) return;
1023
+ if ((ops.textRenderingModes[idx] ?? 0) === 3) return;
1024
+ const tr = it.transform;
1025
+ const fontSize = safeNum(Math.hypot(safeNum(tr?.[2], 0), safeNum(tr?.[3], 0)), 0) || safeNum(it.height, 0) || 10;
1026
+ const baseX = safeNum(tr?.[4], 0);
1027
+ const baseY = safeNum(tr?.[5], 0);
1028
+ const conv = viewport.convertToViewportPoint(baseX, baseY);
1029
+ const vx = safeNum(conv?.[0], 0);
1030
+ const vy = safeNum(conv?.[1], 0);
1031
+ const ascent = it.height ? safeNum(it.height, fontSize) * 0.78 : fontSize * 0.78;
1032
+ const yTop = vy - ascent;
1033
+ const w = safeNum(it.width, 0);
1034
+ runs.push({
1035
+ text: it.str,
1036
+ x: safeNum(vx * PT_TO_MM, 0),
1037
+ y: safeNum(yTop * PT_TO_MM, 0),
1038
+ fontSize: safeNum(fontSize, 10),
1039
+ fontName: it.fontName,
1040
+ width: safeNum(w * PT_TO_MM, 0),
1041
+ height: safeNum((it.height || fontSize) * PT_TO_MM, fontSize * PT_TO_MM),
1042
+ color: ops.textColors[idx] || "#000000",
1043
+ opacity: safeNum(ops.textOpacities[idx], 1)
1044
+ });
1045
+ });
1046
+ runs.sort((a, b) => a.y - b.y || a.x - b.x);
1047
+ const lines = [];
1048
+ const Y_TOL = 0.6;
1049
+ for (const r of runs) {
1050
+ if (!r.text.length) continue;
1051
+ const last = lines[lines.length - 1];
1052
+ if (!last) {
1053
+ lines.push({ ...r });
1054
+ continue;
1055
+ }
1056
+ const sameLine = Math.abs(last.y - r.y) <= Y_TOL;
1057
+ const sameStyle = Math.abs(last.fontSize - r.fontSize) < 0.4 && last.fontName === r.fontName && last.color === r.color && Math.abs(last.opacity - r.opacity) < 0.05;
1058
+ const gapMm = r.x - (last.x + last.width);
1059
+ const emMm = r.fontSize * PT_TO_MM;
1060
+ const mergeOk = sameLine && sameStyle && gapMm >= -0.2 && gapMm <= emMm * 0.45;
1061
+ if (mergeOk) {
1062
+ const lastEndsSpace = /\s$/.test(last.text);
1063
+ const currStartsSpace = /^\s/.test(r.text);
1064
+ const sep = gapMm > emMm * 0.08 && !lastEndsSpace && !currStartsSpace ? " " : "";
1065
+ last.text = last.text + sep + r.text;
1066
+ const newExtent = r.x - last.x + r.width;
1067
+ last.width = Math.max(last.width, newExtent);
1068
+ } else {
1069
+ lines.push({ ...r });
1070
+ }
1071
+ }
1072
+ const elements = [];
1073
+ for (const sh of ops.shapes) {
1074
+ if (sh.width < 0.3 && sh.height < 0.3) continue;
1075
+ const shapeType = sh.kind;
1076
+ const shape = {
1077
+ type: "shape",
1078
+ shape: shapeType,
1079
+ position: { x: Math.round(sh.x * 100) / 100, y: Math.round(sh.y * 100) / 100 },
1080
+ width: Math.max(0.1, Math.round(sh.width * 100) / 100),
1081
+ height: Math.max(0.1, Math.round(sh.height * 100) / 100)
1082
+ };
1083
+ if (sh.fill) shape.fill = sh.fill;
1084
+ if (sh.stroke) shape.stroke = { color: sh.stroke, width: sh.strokeWidth || 0.3 };
1085
+ if (shapeType === "path" && sh.path) shape.path = sh.path;
1086
+ if (sh.opacity != null && sh.opacity < 0.999) {
1087
+ shape.style = { opacity: Math.round(sh.opacity * 100) / 100 };
1088
+ }
1089
+ elements.push(shape);
1090
+ }
1091
+ const imgs = await extractImages(page, ops.imagePositions, runtime, dataUrlCache);
1092
+ for (const { pos, dataUrl } of imgs) {
1093
+ let resourceKey = resourceKeyByName.get(pos.name);
1094
+ if (!resourceKey) {
1095
+ resourceKey = `img${imgCounter++}`;
1096
+ resourceKeyByName.set(pos.name, resourceKey);
1097
+ const base64 = dataUrl.replace(/^data:image\/[a-zA-Z+]+;base64,/, "");
1098
+ imageResources[resourceKey] = {
1099
+ src: "embedded",
1100
+ mimeType: "image/png",
1101
+ data: base64
1102
+ };
1103
+ }
1104
+ elements.push({
1105
+ type: "image",
1106
+ resource: resourceKey,
1107
+ position: { x: Math.round(pos.x * 100) / 100, y: Math.round(pos.y * 100) / 100 },
1108
+ width: Math.max(1, Math.round(pos.w * 100) / 100),
1109
+ height: Math.max(1, Math.round(pos.h * 100) / 100),
1110
+ fit: "fill"
1111
+ });
1112
+ }
1113
+ for (const l of lines) {
1114
+ const cls = fontMap.get(l.fontName) || classifyFont(l.fontName || "");
1115
+ const style = {
1116
+ fontSize: Math.round(l.fontSize * 10) / 10,
1117
+ fontFamily: cls.family
1118
+ };
1119
+ if (cls.weight === "bold") style.fontWeight = "bold";
1120
+ if (cls.style === "italic") style.fontStyle = "italic";
1121
+ if (l.color !== "#000000") style.color = l.color;
1122
+ if (l.opacity < 0.999) style.opacity = Math.round(l.opacity * 100) / 100;
1123
+ const link = findLinkForRun2(l);
1124
+ const pageWmm = pageW * PT_TO_MM;
1125
+ const measured = Math.max(l.width + l.fontSize * PT_TO_MM * 0.4, l.fontSize * PT_TO_MM);
1126
+ const remaining = Math.max(measured, pageWmm - l.x);
1127
+ const elWidth = Math.min(measured, remaining);
1128
+ const text = {
1129
+ type: "text",
1130
+ content: l.text,
1131
+ position: { x: Math.max(0, Math.round(l.x * 100) / 100), y: Math.max(0, Math.round(l.y * 100) / 100) },
1132
+ width: Math.max(2, Math.round(elWidth * 100) / 100),
1133
+ style
1134
+ };
1135
+ if (cls.weight === "bold") {
1136
+ if (l.fontSize >= 22) text.heading = 1;
1137
+ else if (l.fontSize >= 17) text.heading = 2;
1138
+ else if (l.fontSize >= 16) text.heading = 3;
1139
+ }
1140
+ if (text.heading) text.tocEntry = text.content;
1141
+ if (link) {
1142
+ if (link.url) text.link = link.url;
1143
+ else if (link.destPage != null) text.link = { type: "internal", target: `#page-${link.destPage + 1}` };
1144
+ }
1145
+ elements.push(text);
1146
+ }
1147
+ for (const w of formWidgets) {
1148
+ if (w.pushButton) continue;
1149
+ const baseEl = {
1150
+ name: w.fieldName,
1151
+ position: { x: Math.max(0, Math.round(w.rectMm.x * 100) / 100), y: Math.max(0, Math.round(w.rectMm.y * 100) / 100) },
1152
+ width: Math.max(2, Math.round(w.rectMm.w * 100) / 100),
1153
+ height: Math.max(2, Math.round(w.rectMm.h * 100) / 100)
1154
+ };
1155
+ if (w.readonly) baseEl.readonly = true;
1156
+ if (w.required) baseEl.required = true;
1157
+ if (w.fieldType === "Tx") {
1158
+ if (w.multiLine) {
1159
+ elements.push({ type: "textarea", ...baseEl, value: typeof w.fieldValue === "string" ? w.fieldValue : "" });
1160
+ } else {
1161
+ elements.push({ type: "input", ...baseEl, inputType: "text", value: typeof w.fieldValue === "string" ? w.fieldValue : "" });
1162
+ }
1163
+ } else if (w.fieldType === "Btn" && !w.radio) {
1164
+ const checked = w.fieldValue !== "Off" && !!w.fieldValue;
1165
+ elements.push({ type: "checkbox", ...baseEl, checked });
1166
+ } else if (w.fieldType === "Ch") {
1167
+ const value = typeof w.fieldValue === "string" ? w.fieldValue : "";
1168
+ const values = Array.isArray(w.fieldValue) ? w.fieldValue : void 0;
1169
+ const opts = w.options.length > 0 ? w.options : value ? [{ value }] : [];
1170
+ if (w.multiSelect) {
1171
+ elements.push({ type: "select", ...baseEl, options: opts, multiple: true, values: values ?? (value ? [value] : []) });
1172
+ } else {
1173
+ elements.push({ type: "select", ...baseEl, options: opts, value });
1174
+ }
1175
+ } else if (w.fieldType === "Sig") {
1176
+ elements.push({ type: "signature", ...baseEl, value: "" });
1177
+ }
1178
+ }
1179
+ pages.push({
1180
+ id: `page-${pi}`,
1181
+ pageSize: { width: Math.round(pageW * PT_TO_MM * 100) / 100, height: Math.round(pageH * PT_TO_MM * 100) / 100 },
1182
+ margins: { top: 0, right: 0, bottom: 0, left: 0 },
1183
+ elements
1184
+ });
1185
+ }
1186
+ const result = {
1187
+ $jdf: "1.0.0",
1188
+ meta: {
1189
+ title,
1190
+ pageSize: pages[0]?.pageSize || "A4",
1191
+ unit: "mm",
1192
+ margins: { top: 0, right: 0, bottom: 0, left: 0 }
1193
+ },
1194
+ pages
1195
+ };
1196
+ if (Object.keys(imageResources).length > 0) {
1197
+ result.resources = { images: imageResources };
1198
+ }
1199
+ return result;
1200
+ }
1201
+
1202
+ // ../../packages/jdf-pdf-import/src/node.ts
1203
+ var pdfjsModule = null;
1204
+ var pdfjsLoadPromise = null;
1205
+ async function loadNodePdfJs() {
1206
+ if (pdfjsModule) return pdfjsModule;
1207
+ if (pdfjsLoadPromise) return pdfjsLoadPromise;
1208
+ pdfjsLoadPromise = (async () => {
1209
+ const { createRequire } = await import('module');
1210
+ const require_ = createRequire(import.meta.url);
1211
+ const origWarn = console.warn;
1212
+ console.warn = (...args) => {
1213
+ if (typeof args[0] === "string" && args[0].includes("legacy")) return;
1214
+ origWarn.apply(console, args);
1215
+ };
1216
+ let lib;
1217
+ try {
1218
+ lib = await import('pdfjs-dist/build/pdf.mjs');
1219
+ } finally {
1220
+ console.warn = origWarn;
1221
+ }
1222
+ const workerPath = require_.resolve("pdfjs-dist/build/pdf.worker.mjs");
1223
+ if (lib.GlobalWorkerOptions) {
1224
+ lib.GlobalWorkerOptions.workerSrc = workerPath;
1225
+ }
1226
+ pdfjsModule = lib;
1227
+ return lib;
1228
+ })();
1229
+ return pdfjsLoadPromise;
1230
+ }
1231
+ var canvasModule = null;
1232
+ async function loadCanvas() {
1233
+ if (canvasModule) return canvasModule;
1234
+ try {
1235
+ canvasModule = await import('@napi-rs/canvas');
1236
+ return canvasModule;
1237
+ } catch (err) {
1238
+ throw new Error(
1239
+ "[@jdf/pdf-import/node] @napi-rs/canvas is required for the node entry point. Install it: pnpm add @napi-rs/canvas"
1240
+ );
1241
+ }
1242
+ }
1243
+ function makeNodeEncoder(canvasMod) {
1244
+ return function encodePngNode(width, height, kind, data) {
1245
+ const canvas = canvasMod.createCanvas(width, height);
1246
+ const ctx = canvas.getContext("2d");
1247
+ if (!ctx) return null;
1248
+ const imgData = ctx.createImageData(width, height);
1249
+ const buf = imgData.data;
1250
+ if (kind === 3) {
1251
+ for (let i = 0; i < data.length && i < buf.length; i++) buf[i] = data[i];
1252
+ } else if (kind === 2) {
1253
+ for (let i = 0, j = 0; i < data.length; i += 3, j += 4) {
1254
+ buf[j] = data[i];
1255
+ buf[j + 1] = data[i + 1];
1256
+ buf[j + 2] = data[i + 2];
1257
+ buf[j + 3] = 255;
1258
+ }
1259
+ } else if (kind === 1) {
1260
+ for (let i = 0, j = 0; i < width * height; i++, j += 4) {
1261
+ const byte = data[i >> 3];
1262
+ const bit = byte >> 7 - (i & 7) & 1;
1263
+ const v = bit ? 255 : 0;
1264
+ buf[j] = v;
1265
+ buf[j + 1] = v;
1266
+ buf[j + 2] = v;
1267
+ buf[j + 3] = 255;
1268
+ }
1269
+ } else {
1270
+ for (let i = 0; i < data.length && i < buf.length; i++) buf[i] = data[i];
1271
+ }
1272
+ ctx.putImageData(imgData, 0, 0);
1273
+ try {
1274
+ const png = canvas.toBuffer("image/png");
1275
+ return `data:image/png;base64,${png.toString("base64")}`;
1276
+ } catch {
1277
+ return null;
1278
+ }
1279
+ };
1280
+ }
1281
+ async function importPdfToJdf2(source, title, options = {}) {
1282
+ const pdfjs = options.pdfjs || await loadNodePdfJs();
1283
+ const canvasMod = await loadCanvas();
1284
+ const runtime = {
1285
+ pdfjs,
1286
+ disableWorker: true,
1287
+ createCanvas(width, height) {
1288
+ const canvas = canvasMod.createCanvas(width, height);
1289
+ const context = canvas.getContext("2d");
1290
+ return { canvas, context };
1291
+ },
1292
+ encodePng: makeNodeEncoder(canvasMod),
1293
+ async readFile(filePath) {
1294
+ const buf = await readFile(filePath);
1295
+ return new Uint8Array(buf);
1296
+ }
1297
+ };
1298
+ return importPdfToJdf(source, title, runtime, options);
1299
+ }
1300
+
412
1301
  // src/commands/import-pdf.ts
413
- async function importPdfPlaceholder(_inputPath, _outputPath) {
414
- console.error("PDF import via CLI is not yet wired up \u2014 run the JDF Reader app and use Open / drag-drop, which uses the Rust pdf-extract pipeline.");
415
- console.error("Track progress at: https://github.com/uurtech/jdf");
416
- process.exit(2);
1302
+ async function importPdf(inputPath, outputPath, options = {}) {
1303
+ const input = path2.resolve(inputPath);
1304
+ if (!fs.existsSync(input)) {
1305
+ console.error(`File not found: ${input}`);
1306
+ process.exit(1);
1307
+ }
1308
+ console.log(`Importing: ${input}`);
1309
+ const title = path2.basename(input, path2.extname(input));
1310
+ const t0 = Date.now();
1311
+ const doc = await importPdfToJdf2(input, title);
1312
+ console.log(`Parsed in ${((Date.now() - t0) / 1e3).toFixed(1)}s \u2014 ${doc.pages.length} page(s)`);
1313
+ let output;
1314
+ if (outputPath) {
1315
+ output = path2.resolve(outputPath);
1316
+ } else {
1317
+ const stem = input.replace(/\.pdf$/i, "");
1318
+ const wantJdfx = !options.forceJson && shouldUseJdfx(doc);
1319
+ output = stem + (wantJdfx ? ".jdfx" : ".jdf");
1320
+ }
1321
+ console.log(`Output: ${output}`);
1322
+ if (output.toLowerCase().endsWith(".jdfx")) {
1323
+ const { bytes, manifest } = await packJdfx(doc);
1324
+ fs.writeFileSync(output, bytes);
1325
+ console.log(`
1326
+ Done! Created ${doc.pages.length} page(s), ${manifest.assets.length} asset(s) bundled`);
1327
+ } else {
1328
+ fs.writeFileSync(output, JSON.stringify(doc, null, 2));
1329
+ console.log(`
1330
+ Done! Created ${doc.pages.length} page(s)`);
1331
+ }
1332
+ console.log(`Open with: open -a "JDF Reader" "${output}"`);
1333
+ }
1334
+ var ImportJsonError = class extends Error {
1335
+ constructor(message) {
1336
+ super(message);
1337
+ this.name = "ImportJsonError";
1338
+ }
1339
+ };
1340
+ async function importJson(inputPath, outputPath, options = {}) {
1341
+ const input = path2.resolve(inputPath);
1342
+ if (!fs.existsSync(input)) {
1343
+ throw new ImportJsonError(`File not found: ${input}`);
1344
+ }
1345
+ console.log(`Importing: ${input}`);
1346
+ const raw = fs.readFileSync(input, "utf-8");
1347
+ let parsed;
1348
+ try {
1349
+ parsed = JSON.parse(raw);
1350
+ } catch (e) {
1351
+ throw new ImportJsonError(`Not valid JSON: ${e.message}`);
1352
+ }
1353
+ const title = path2.basename(input, path2.extname(input));
1354
+ const doc = normaliseToJdf(parsed, title);
1355
+ let output;
1356
+ if (outputPath) {
1357
+ output = path2.resolve(outputPath);
1358
+ } else {
1359
+ const stem = input.replace(/\.json$/i, "");
1360
+ const wantJdfx = !options.forceJson && shouldUseJdfx(doc);
1361
+ output = stem + (wantJdfx ? ".jdfx" : ".jdf");
1362
+ }
1363
+ console.log(`Output: ${output}`);
1364
+ if (output.toLowerCase().endsWith(".jdfx")) {
1365
+ const { bytes, manifest } = await packJdfx(doc);
1366
+ fs.writeFileSync(output, bytes);
1367
+ console.log(`
1368
+ Done! Created ${doc.pages.length} page(s), ${manifest.assets.length} asset(s) bundled`);
1369
+ } else {
1370
+ fs.writeFileSync(output, JSON.stringify(doc, null, 2));
1371
+ console.log(`
1372
+ Done! Created ${doc.pages.length} page(s)`);
1373
+ }
1374
+ if (!options.skipValidate) {
1375
+ console.log("");
1376
+ const ok = await validate(output);
1377
+ if (!ok) throw new ImportJsonError(`Schema validation failed for ${output}`);
1378
+ }
1379
+ console.log(`Open with: open -a "JDF Reader" "${output}"`);
1380
+ }
1381
+ function normaliseToJdf(input, title) {
1382
+ if (input && typeof input === "object" && typeof input.$jdf === "string" && Array.isArray(input.pages)) {
1383
+ if (input.pages.length === 0) {
1384
+ throw new ImportJsonError("JDF document has zero pages \u2014 `pages` must contain at least one page");
1385
+ }
1386
+ const meta = input.meta && typeof input.meta === "object" ? { title: input.meta.title ?? title, pageSize: "A4", unit: "mm", ...input.meta } : { title, pageSize: "A4", unit: "mm" };
1387
+ return {
1388
+ $jdf: input.$jdf,
1389
+ meta,
1390
+ ...input.styles ? { styles: input.styles } : {},
1391
+ ...input.resources ? { resources: input.resources } : {},
1392
+ ...input.header ? { header: input.header } : {},
1393
+ ...input.footer ? { footer: input.footer } : {},
1394
+ pages: input.pages
1395
+ };
1396
+ }
1397
+ if (Array.isArray(input)) {
1398
+ if (input.length === 0) {
1399
+ throw new ImportJsonError("Element array is empty \u2014 wrap at least one element");
1400
+ }
1401
+ return wrapElements(input, title);
1402
+ }
1403
+ if (input && typeof input === "object") {
1404
+ if (Array.isArray(input.pages)) {
1405
+ if (input.pages.length === 0) {
1406
+ throw new ImportJsonError("`pages` is empty \u2014 provide at least one page");
1407
+ }
1408
+ return {
1409
+ $jdf: input.$jdf || "1.0.0",
1410
+ meta: { title, pageSize: "A4", unit: "mm", ...input.meta || {} },
1411
+ ...input.styles ? { styles: input.styles } : {},
1412
+ ...input.resources ? { resources: input.resources } : {},
1413
+ ...input.header ? { header: input.header } : {},
1414
+ ...input.footer ? { footer: input.footer } : {},
1415
+ pages: input.pages
1416
+ };
1417
+ }
1418
+ if (Array.isArray(input.elements)) {
1419
+ if (input.elements.length === 0) {
1420
+ throw new ImportJsonError("`elements` is empty \u2014 provide at least one element");
1421
+ }
1422
+ return wrapElements(input.elements, title, input.meta);
1423
+ }
1424
+ }
1425
+ throw new ImportJsonError(
1426
+ "Unrecognised JSON shape \u2014 expected a JDF document, an element array, or { pages: [...] } / { elements: [...] }"
1427
+ );
1428
+ }
1429
+ function wrapElements(elements, title, meta) {
1430
+ return {
1431
+ $jdf: "1.0.0",
1432
+ meta: {
1433
+ title,
1434
+ pageSize: "A4",
1435
+ unit: "mm",
1436
+ ...meta || {}
1437
+ },
1438
+ pages: [
1439
+ {
1440
+ id: "page-1",
1441
+ elements
1442
+ }
1443
+ ]
1444
+ };
417
1445
  }
418
1446
 
419
1447
  // src/index.ts
420
1448
  var HELP = `jdf \u2014 JSON Document Format CLI
421
1449
 
1450
+ The CLI exists for two workflows:
1451
+ \u2022 PDF \u2192 JDF legacy documents become a structured JSON tree your
1452
+ RAG / agent / pipeline can read natively.
1453
+ \u2022 JSON \u2192 JDF LLMs and code emit JSON; this command wraps that JSON
1454
+ into a validated .jdf (or .jdfx) you can ship.
1455
+
422
1456
  Usage:
423
1457
  jdf validate <file.jdf>
424
- jdf import <file.{md,pdf}> [-o output.jdf]
1458
+ jdf import <file.{pdf,json,md}> [-o output.{jdf,jdfx}] [--json]
425
1459
  jdf --help
426
1460
 
427
1461
  Commands:
428
- validate Validate a .jdf file against the JDF schema
429
- import Convert a markdown or PDF file to JDF
1462
+ validate Validate a .jdf / .jdfx file against the JDF schema
1463
+ import Convert a PDF, JSON, or Markdown file into JDF
1464
+
1465
+ Flags:
1466
+ -o, --output <path> Explicit output path (extension picks .jdf vs .jdfx)
1467
+ --json Force pure JSON .jdf output (documents with embedded
1468
+ images stay as a single base64-inlined .jdf instead
1469
+ of a .jdfx bundle). Useful for RAG / CI consumers
1470
+ that prefer one text file over a zip.
430
1471
 
431
1472
  Examples:
432
1473
  jdf validate spec/examples/hello-world.jdf
1474
+ jdf import paper.pdf # PDF \u2192 JDF (or .jdfx for images)
1475
+ jdf import contract.pdf --json | jq . # PDF \u2192 pure JSON, pipe-friendly
1476
+ jdf import response.json -o response.jdf # LLM JSON output \u2192 validated JDF
433
1477
  jdf import README.md
434
- jdf import paper.pdf -o paper.jdf
435
1478
  `;
1479
+ var BOOLEAN_FLAGS = /* @__PURE__ */ new Set(["help", "h", "json", "verbose", "skip-validate"]);
436
1480
  function parseArgs(argv) {
437
1481
  const positional = [];
438
1482
  const flags = {};
@@ -448,7 +1492,17 @@ function parseArgs(argv) {
448
1492
  continue;
449
1493
  }
450
1494
  if (a.startsWith("--")) {
451
- const k = a.slice(2);
1495
+ const eq = a.indexOf("=");
1496
+ const k = eq > 0 ? a.slice(2, eq) : a.slice(2);
1497
+ const inlineVal = eq > 0 ? a.slice(eq + 1) : void 0;
1498
+ if (inlineVal !== void 0) {
1499
+ flags[k] = inlineVal;
1500
+ continue;
1501
+ }
1502
+ if (BOOLEAN_FLAGS.has(k)) {
1503
+ flags[k] = true;
1504
+ continue;
1505
+ }
452
1506
  const next = argv[i + 1];
453
1507
  if (next && !next.startsWith("-")) {
454
1508
  flags[k] = next;
@@ -458,10 +1512,12 @@ function parseArgs(argv) {
458
1512
  }
459
1513
  if (a === "-o" || a === "--output") {
460
1514
  const next = argv[i + 1];
461
- if (next) {
462
- flags["output"] = next;
463
- i++;
1515
+ if (next === void 0 || next.startsWith("-")) {
1516
+ console.error(`Error: ${a} requires a path argument`);
1517
+ process.exit(1);
464
1518
  }
1519
+ flags["output"] = next;
1520
+ i++;
465
1521
  continue;
466
1522
  }
467
1523
  positional.push(a);
@@ -487,20 +1543,25 @@ async function main() {
487
1543
  case "import": {
488
1544
  const input = positional[0];
489
1545
  if (!input) {
490
- console.error("Usage: jdf import <file.{md,pdf}> [-o output.jdf]");
1546
+ console.error("Usage: jdf import <file.{pdf,json,md}> [-o output.jdf] [--json]");
491
1547
  process.exit(1);
492
1548
  }
493
1549
  const output = typeof flags.output === "string" ? flags.output : void 0;
1550
+ const forceJson = flags.json === true;
494
1551
  const lower = input.toLowerCase();
495
1552
  if (lower.endsWith(".md") || lower.endsWith(".markdown")) {
496
1553
  await importMarkdown(input, output);
1554
+ process.exit(0);
497
1555
  } else if (lower.endsWith(".pdf")) {
498
- await importPdfPlaceholder(input, output);
1556
+ await importPdf(input, output, { forceJson });
1557
+ process.exit(0);
1558
+ } else if (lower.endsWith(".json")) {
1559
+ await importJson(input, output, { forceJson });
1560
+ process.exit(0);
499
1561
  } else {
500
1562
  console.error(`Unsupported file type: ${input}`);
501
1563
  process.exit(1);
502
1564
  }
503
- break;
504
1565
  }
505
1566
  default:
506
1567
  console.error(`Unknown command: ${command}`);
@@ -508,7 +1569,11 @@ async function main() {
508
1569
  process.exit(1);
509
1570
  }
510
1571
  } catch (e) {
511
- console.error(`Error: ${e.message || e}`);
1572
+ if (e?.name === "ImportJsonError") {
1573
+ console.error(`\u2717 ${e.message}`);
1574
+ } else {
1575
+ console.error(`Error: ${e?.message || e}`);
1576
+ }
512
1577
  process.exit(1);
513
1578
  }
514
1579
  }