npm - koten-layout-detector - Versions diffs - 1.0.0 - Mend

koten-layout-detector 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2026
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

package/README.md ADDED Viewed

@@ -0,0 +1,130 @@
+# koten-layout-detector
+Japanese classical document layout analysis library using ONNX Runtime for detecting text regions, illustrations, and stamps in historical Japanese documents.
+## Features
+- 🏯 Specialized for Japanese classical documents (古典籍)
+- 🚀 Browser-based inference using ONNX Runtime Web
+- 📦 Lightweight and easy to integrate
+- 🎯 Detects 5 types of regions:
+  - Overall layout (全体)
+  - Handwritten text (手書き)
+  - Typographic text (活字)
+  - Illustrations (図版)
+  - Stamps/Seals (印判)
+## See It In Action
+Check out the live demo at [https://koten-layout.netlify.app/](https://koten-layout.netlify.app/)
+## Installation
+```bash
+npm install koten-layout-detector onnxruntime-web
+```
+## Usage
+```javascript
+import {
+  loadModel,
+  preprocess,
+  runInference,
+  postprocess,
+  drawDetections,
+  CLASSES,
+  COLORS
+} from 'koten-layout-detector'
+// Load the ONNX model
+const session = await loadModel('/path/to/your/model.onnx')
+// Load an image
+const img = new Image()
+img.src = '/path/to/classical-document.jpg'
+await img.decode()
+// Preprocess the image
+const { tensor, meta } = preprocess(img)
+// Run inference
+const outputTensor = await runInference(session, tensor)
+// Postprocess results
+const detections = postprocess(outputTensor, meta, 0.5, 0.45)
+// Draw detections on canvas
+const canvas = document.getElementById('output-canvas')
+drawDetections(canvas, img, detections)
+console.log('Detected regions:', detections)
+```
+## API Reference
+### `loadModel(modelUrl: string): Promise<InferenceSession>`
+Loads an ONNX model from the specified URL.
+### `preprocess(img: HTMLImageElement): { tensor: Tensor, meta: Object }`
+Preprocesses an image for inference with letterbox resizing.
+Returns:
+- `tensor`: ONNX tensor ready for inference
+- `meta`: Metadata for postprocessing (scale, padding, original dimensions)
+### `runInference(session: InferenceSession, tensor: Tensor): Promise<Tensor>`
+Runs inference on the preprocessed tensor.
+### `postprocess(outputTensor: Tensor, meta: Object, confThreshold?: number, iouThreshold?: number): Array<Detection>`
+Postprocesses the model output into detection results.
+Parameters:
+- `confThreshold`: Confidence threshold (default: 0.5)
+- `iouThreshold`: IoU threshold for NMS (default: 0.45)
+Returns an array of detections with:
+- `x1, y1, x2, y2`: Bounding box coordinates
+- `conf`: Confidence score
+- `classId`: Class ID
+- `label`: Japanese label
+- `color`: Color for visualization
+### `drawDetections(canvas: HTMLCanvasElement, img: HTMLImageElement, detections: Array<Detection>): void`
+Draws the original image and detection boxes on a canvas.
+### `CLASSES`
+Array of class definitions with ID, key, and Japanese labels.
+### `COLORS`
+Array of colors for each class for visualization.
+## Dataset
+This model is trained on the [NDL-DocL Layout Dataset](https://github.com/ndl-lab/layout-dataset) provided by the National Diet Library of Japan. The dataset contains annotated layout information for Japanese classical documents.
+## Model
+The detection model is based on YOLOv12, optimized for classical Japanese document analysis.
+**Note:** You need to provide your own trained ONNX model file. This library provides the inference pipeline but does not include the model weights.
+## License
+MIT
+## Contributing
+Contributions are welcome! Please feel free to submit a Pull Request.
+## Acknowledgments
+- [NDL-DocL Layout Dataset](https://github.com/ndl-lab/layout-dataset) - National Diet Library of Japan
+- ONNX Runtime Web team for the excellent inference engine

package/package.json ADDED Viewed

@@ -0,0 +1,41 @@
+{
+  "name": "koten-layout-detector",
+  "version": "1.0.0",
+  "description": "Japanese classical document layout analysis library using ONNX Runtime for detecting text regions, illustrations, and stamps in historical Japanese documents",
+  "main": "src/index.js",
+  "type": "module",
+  "scripts": {
+    "test": "echo \"Error: no test specified\" && exit 1"
+  },
+  "keywords": [
+    "onnx",
+    "layout-analysis",
+    "document-analysis",
+    "japanese",
+    "classical-documents",
+    "yolo",
+    "object-detection",
+    "koten"
+  ],
+  "author": "",
+  "license": "MIT",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/yuta1984/koten-layout-detector.git"
+  },
+  "bugs": {
+    "url": "https://github.com/yuta1984/koten-layout-detector/issues"
+  },
+  "homepage": "https://github.com/yuta1984/koten-layout-detector#readme",
+  "peerDependencies": {
+    "onnxruntime-web": "^1.20.0"
+  },
+  "devDependencies": {
+    "onnxruntime-web": "^1.20.1"
+  },
+  "files": [
+    "src/**/*",
+    "README.md",
+    "LICENSE"
+  ]
+}

package/src/index.js ADDED Viewed

@@ -0,0 +1,19 @@
+/**
+ * koten-layout-detector
+ * Japanese classical document layout analysis library
+ *
+ * Trained on NDL-DocL Layout Dataset
+ * https://github.com/ndl-lab/layout-dataset
+ */
+export {
+  loadModel,
+  preprocess,
+  runInference,
+  postprocess,
+  drawDetections,
+  CLASSES,
+  COLORS
+} from './inference.js'
+export { iou, nms } from './nms.js'

package/src/inference.js ADDED Viewed

@@ -0,0 +1,204 @@
+import * as ort from 'onnxruntime-web'
+import { nms } from './nms.js'
+/** モデル入力サイズ */
+const MODEL_SIZE = 640
+/** レターボックスのパディング色（YOLO デフォルト: グレー 114） */
+const PAD_COLOR = 114
+/**
+ * クラス定義
+ * NDL-DocL 古典籍データセットの 5 クラス
+ */
+export const CLASSES = [
+  { id: 0, key: '1_overall',      ja: '全体'   },
+  { id: 1, key: '2_handwritten',  ja: '手書き' },
+  { id: 2, key: '3_typography',   ja: '活字'   },
+  { id: 3, key: '4_illustration', ja: '図版'   },
+  { id: 4, key: '5_stamp',        ja: '印判'   },
+]
+export const COLORS = ['#e74c3c', '#3498db', '#2ecc71', '#f39c12', '#9b59b6']
+// ---------------------------------------------------------------------------
+// モデルのロード
+// ---------------------------------------------------------------------------
+/**
+ * ONNX セッションを作成して返す
+ * @param {string} modelUrl - モデルファイルの URL
+ * @returns {Promise<ort.InferenceSession>}
+ */
+export async function loadModel(modelUrl) {
+  // WASM ファイルのパスを明示（Vite の static copy 先に合わせる）
+  ort.env.wasm.wasmPaths = '/'
+  const session = await ort.InferenceSession.create(modelUrl, {
+    executionProviders: ['wasm'],
+    graphOptimizationLevel: 'all',
+  })
+  return session
+}
+// ---------------------------------------------------------------------------
+// 前処理
+// ---------------------------------------------------------------------------
+/**
+ * 画像をレターボックスリサイズして Float32Array テンソルに変換する
+ * @param {HTMLImageElement} img
+ * @returns {{ tensor: ort.Tensor, meta: Object }}
+ *   meta: { scale, padX, padY, origW, origH }
+ */
+export function preprocess(img) {
+  const canvas = document.createElement('canvas')
+  canvas.width = MODEL_SIZE
+  canvas.height = MODEL_SIZE
+  const ctx = canvas.getContext('2d')
+  // パディング色で塗りつぶし
+  ctx.fillStyle = `rgb(${PAD_COLOR},${PAD_COLOR},${PAD_COLOR})`
+  ctx.fillRect(0, 0, MODEL_SIZE, MODEL_SIZE)
+  // アスペクト比を保ったまま縮小
+  const scale = Math.min(MODEL_SIZE / img.width, MODEL_SIZE / img.height)
+  const newW = Math.round(img.width * scale)
+  const newH = Math.round(img.height * scale)
+  const padX = Math.floor((MODEL_SIZE - newW) / 2)
+  const padY = Math.floor((MODEL_SIZE - newH) / 2)
+  ctx.drawImage(img, padX, padY, newW, newH)
+  const { data } = ctx.getImageData(0, 0, MODEL_SIZE, MODEL_SIZE)
+  // HWC (RGBA) → CHW (RGB) Float32 & 正規化 ÷255
+  const float32 = new Float32Array(3 * MODEL_SIZE * MODEL_SIZE)
+  const pixelCount = MODEL_SIZE * MODEL_SIZE
+  for (let i = 0; i < pixelCount; i++) {
+    float32[i]                  = data[i * 4]     / 255.0 // R
+    float32[i + pixelCount]     = data[i * 4 + 1] / 255.0 // G
+    float32[i + pixelCount * 2] = data[i * 4 + 2] / 255.0 // B
+  }
+  return {
+    tensor: new ort.Tensor('float32', float32, [1, 3, MODEL_SIZE, MODEL_SIZE]),
+    meta: { scale, padX, padY, origW: img.width, origH: img.height },
+  }
+}
+// ---------------------------------------------------------------------------
+// 推論
+// ---------------------------------------------------------------------------
+/**
+ * ONNX セッションで推論を実行する
+ * @param {ort.InferenceSession} session
+ * @param {ort.Tensor} tensor
+ * @returns {Promise<ort.Tensor>} 出力テンソル [1, 9, 8400]
+ */
+export async function runInference(session, tensor) {
+  const inputName = session.inputNames[0]
+  const feeds = { [inputName]: tensor }
+  const results = await session.run(feeds)
+  return results[session.outputNames[0]]
+}
+// ---------------------------------------------------------------------------
+// 後処理
+// ---------------------------------------------------------------------------
+/**
+ * ONNX 出力テンソルを検出結果に変換する
+ * @param {ort.Tensor} outputTensor - shape [1, 4+nc, 8400]
+ * @param {Object} meta - preprocess() が返す meta
+ * @param {number} confThreshold - 信頼度閾値（デフォルト 0.5）
+ * @param {number} iouThreshold  - NMS IoU 閾値（デフォルト 0.45）
+ * @returns {Array} 検出結果リスト { x1, y1, x2, y2, conf, classId, label, color }
+ */
+export function postprocess(outputTensor, meta, confThreshold = 0.5, iouThreshold = 0.45) {
+  const [, numChannels, numPreds] = outputTensor.dims
+  const data = outputTensor.data
+  const nc = numChannels - 4 // クラス数
+  const raw = []
+  for (let i = 0; i < numPreds; i++) {
+    // クラス別スコアの最大値と ID を取得
+    let maxScore = -Infinity
+    let classId = 0
+    for (let c = 0; c < nc; c++) {
+      const score = data[(4 + c) * numPreds + i]
+      if (score > maxScore) {
+        maxScore = score
+        classId = c
+      }
+    }
+    if (maxScore < confThreshold) continue
+    // cx, cy, w, h（640px スケール）→ x1, y1, x2, y2（元画像スケール）
+    const cx = data[0 * numPreds + i]
+    const cy = data[1 * numPreds + i]
+    const w  = data[2 * numPreds + i]
+    const h  = data[3 * numPreds + i]
+    // レターボックスのパディングと縮小率を元に戻す
+    const x1 = ((cx - w / 2) - meta.padX) / meta.scale
+    const y1 = ((cy - h / 2) - meta.padY) / meta.scale
+    const x2 = ((cx + w / 2) - meta.padX) / meta.scale
+    const y2 = ((cy + h / 2) - meta.padY) / meta.scale
+    raw.push({ x1, y1, x2, y2, conf: maxScore, classId })
+  }
+  const kept = nms(raw, iouThreshold)
+  return kept.map((d) => ({
+    ...d,
+    label: CLASSES[d.classId]?.ja ?? String(d.classId),
+    color: COLORS[d.classId] ?? '#ffffff',
+  }))
+}
+// ---------------------------------------------------------------------------
+// Canvas 描画
+// ---------------------------------------------------------------------------
+/**
+ * 元画像と検出結果を Canvas に描画する
+ * @param {HTMLCanvasElement} canvas
+ * @param {HTMLImageElement} img
+ * @param {Array} detections - postprocess() の戻り値
+ */
+export function drawDetections(canvas, img, detections) {
+  canvas.width  = img.width
+  canvas.height = img.height
+  const ctx = canvas.getContext('2d')
+  ctx.drawImage(img, 0, 0)
+  for (const d of detections) {
+    const x1 = Math.max(0, d.x1)
+    const y1 = Math.max(0, d.y1)
+    const bw = d.x2 - x1
+    const bh = d.y2 - y1
+    // ボックス
+    ctx.strokeStyle = d.color
+    ctx.lineWidth = Math.max(2, img.width / 300)
+    ctx.strokeRect(x1, y1, bw, bh)
+    // ラベル背景
+    const fontSize = Math.max(14, img.width / 50)
+    ctx.font = `bold ${fontSize}px sans-serif`
+    const text = `${d.label} ${(d.conf * 100).toFixed(0)}%`
+    const textW = ctx.measureText(text).width
+    const textH = fontSize * 1.4
+    ctx.fillStyle = d.color
+    ctx.fillRect(x1, y1 - textH, textW + 8, textH)
+    // ラベルテキスト
+    ctx.fillStyle = '#ffffff'
+    ctx.fillText(text, x1 + 4, y1 - fontSize * 0.2)
+  }
+}

package/src/nms.js ADDED Viewed

@@ -0,0 +1,48 @@
+/**
+ * IoU（Intersection over Union）を計算する
+ * @param {Object} a - { x1, y1, x2, y2 }
+ * @param {Object} b - { x1, y1, x2, y2 }
+ * @returns {number} IoU 値（0〜1）
+ */
+export function iou(a, b) {
+  const ix1 = Math.max(a.x1, b.x1)
+  const iy1 = Math.max(a.y1, b.y1)
+  const ix2 = Math.min(a.x2, b.x2)
+  const iy2 = Math.min(a.y2, b.y2)
+  const interW = Math.max(0, ix2 - ix1)
+  const interH = Math.max(0, iy2 - iy1)
+  const interArea = interW * interH
+  const areaA = (a.x2 - a.x1) * (a.y2 - a.y1)
+  const areaB = (b.x2 - b.x1) * (b.y2 - b.y1)
+  const unionArea = areaA + areaB - interArea
+  return unionArea <= 0 ? 0 : interArea / unionArea
+}
+/**
+ * Non-Maximum Suppression を適用する
+ * @param {Array} detections - 検出結果の配列 ({ x1, y1, x2, y2, conf, classId })
+ * @param {number} iouThreshold - IoU 閾値（デフォルト 0.45）
+ * @returns {Array} NMS 後の検出結果
+ */
+export function nms(detections, iouThreshold = 0.45) {
+  // クラスごとに NMS を適用
+  const classIds = [...new Set(detections.map((d) => d.classId))]
+  const result = []
+  for (const cid of classIds) {
+    let boxes = detections
+      .filter((d) => d.classId === cid)
+      .sort((a, b) => b.conf - a.conf) // 信頼度の高い順にソート
+    while (boxes.length > 0) {
+      const best = boxes.shift()
+      result.push(best)
+      boxes = boxes.filter((b) => iou(best, b) < iouThreshold)
+    }
+  }
+  return result
+}