@make-u-free/migi 0.5.10 → 0.5.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/tools.js +29 -15
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@make-u-free/migi",
3
- "version": "0.5.10",
3
+ "version": "0.5.12",
4
4
  "description": "Your AI right-hand agent. Works anywhere, with any LLM API.",
5
5
  "type": "module",
6
6
  "bin": {
package/src/tools.js CHANGED
@@ -8,7 +8,8 @@ import chalk from 'chalk'
8
8
  import xlsxPkg from 'xlsx'
9
9
  import { createRequire } from 'module'
10
10
  const require = createRequire(import.meta.url)
11
- const pdfParse = require('pdf-parse')
11
+ const _pdfParseModule = require('pdf-parse')
12
+ const pdfParse = typeof _pdfParseModule === 'function' ? _pdfParseModule : _pdfParseModule.default
12
13
  import AdmZip from 'adm-zip'
13
14
  import OpenAI from 'openai'
14
15
  import { httpsAgent } from './tls.js'
@@ -134,14 +135,22 @@ function extractImagesFromPdf(buf) {
134
135
  const images = []
135
136
  let i = 0
136
137
 
137
- while (i < buf.length - 1) {
138
- // JPEG: FF D8 で始まり FF D9 で終わる
139
- if (buf[i] === 0xFF && buf[i + 1] === 0xD8) {
140
- const eoiIdx = buf.indexOf(Buffer.from([0xFF, 0xD9]), i + 2)
141
- if (eoiIdx === -1) break
142
- images.push({ data: buf.slice(i, eoiIdx + 2), mime: 'image/jpeg' })
143
- i = eoiIdx + 2
144
- continue
138
+ while (i < buf.length - 3) {
139
+ // JPEG: FF D8 で始まり、直後に FF E0〜EF(APPマーカー)か FF DB(DQT)が続く本物だけ拾う
140
+ if (buf[i] === 0xFF && buf[i + 1] === 0xD8 && buf[i + 2] === 0xFF) {
141
+ const nextMarker = buf[i + 3]
142
+ const isApp = nextMarker >= 0xE0 && nextMarker <= 0xEF // JFIF / EXIF 等
143
+ const isDqt = nextMarker === 0xDB // 量子化テーブル
144
+ if (isApp || isDqt) {
145
+ const eoiIdx = buf.indexOf(Buffer.from([0xFF, 0xD9]), i + 4)
146
+ if (eoiIdx === -1) break
147
+ const data = buf.slice(i, eoiIdx + 2)
148
+ if (data.length > 1024) { // 1KB未満はアイコン等のゴミなので除外
149
+ images.push({ data, mime: 'image/jpeg' })
150
+ }
151
+ i = eoiIdx + 2
152
+ continue
153
+ }
145
154
  }
146
155
 
147
156
  // PNG: 89 50 4E 47 0D 0A 1A 0A で始まる
@@ -152,7 +161,10 @@ function extractImagesFromPdf(buf) {
152
161
  ) {
153
162
  const iend = buf.indexOf(Buffer.from([0x49, 0x45, 0x4E, 0x44, 0xAE, 0x42, 0x60, 0x82]), i + 8)
154
163
  if (iend === -1) break
155
- images.push({ data: buf.slice(i, iend + 8), mime: 'image/png' })
164
+ const data = buf.slice(i, iend + 8)
165
+ if (data.length > 1024) {
166
+ images.push({ data, mime: 'image/png' })
167
+ }
156
168
  i = iend + 8
157
169
  continue
158
170
  }
@@ -222,11 +234,13 @@ export async function executeTool(name, args, opts = {}) {
222
234
  const buf = readFileSync(args.path)
223
235
 
224
236
  // Step 1: テキストPDFとして抽出を試みる
225
- try {
226
- const data = await pdfParse(buf)
227
- const text = data.text?.trim()
228
- if (text) return text
229
- } catch (_) {}
237
+ if (typeof pdfParse === 'function') {
238
+ try {
239
+ const data = await pdfParse(buf)
240
+ const text = data.text?.trim()
241
+ if (text) return text
242
+ } catch (_) {}
243
+ }
230
244
 
231
245
  // Step 2: 画像PDFとしてVision APIでOCR(ネイティブ依存なし)
232
246
  if (!opts.apiKey) return '(テキストが抽出できませんでした)'