@make-u-free/migi 0.5.12 → 0.5.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -1
- package/src/tools.js +16 -2
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@make-u-free/migi",
|
|
3
|
-
"version": "0.5.
|
|
3
|
+
"version": "0.5.13",
|
|
4
4
|
"description": "Your AI right-hand agent. Works anywhere, with any LLM API.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -17,6 +17,7 @@
|
|
|
17
17
|
"glob": "^11.0.0",
|
|
18
18
|
"openai": "^4.0.0",
|
|
19
19
|
"pdf-parse": "^2.4.5",
|
|
20
|
+
"pdfjs-dist": "^5.5.207",
|
|
20
21
|
"xlsx": "^0.18.5"
|
|
21
22
|
},
|
|
22
23
|
"engines": {
|
package/src/tools.js
CHANGED
|
@@ -11,6 +11,7 @@ const require = createRequire(import.meta.url)
|
|
|
11
11
|
const _pdfParseModule = require('pdf-parse')
|
|
12
12
|
const pdfParse = typeof _pdfParseModule === 'function' ? _pdfParseModule : _pdfParseModule.default
|
|
13
13
|
import AdmZip from 'adm-zip'
|
|
14
|
+
import * as pdfjsLib from 'pdfjs-dist/legacy/build/pdf.mjs'
|
|
14
15
|
import OpenAI from 'openai'
|
|
15
16
|
import { httpsAgent } from './tls.js'
|
|
16
17
|
const { readFile: xlsxReadFile, utils: xlsxUtils } = xlsxPkg
|
|
@@ -233,7 +234,20 @@ export async function executeTool(name, args, opts = {}) {
|
|
|
233
234
|
if (ext === '.pdf') {
|
|
234
235
|
const buf = readFileSync(args.path)
|
|
235
236
|
|
|
236
|
-
// Step 1:
|
|
237
|
+
// Step 1: pdfjs-dist でテキスト抽出(最も信頼性が高い)
|
|
238
|
+
try {
|
|
239
|
+
const doc = await pdfjsLib.getDocument({ data: new Uint8Array(buf) }).promise
|
|
240
|
+
const pages = []
|
|
241
|
+
for (let p = 1; p <= doc.numPages; p++) {
|
|
242
|
+
const page = await doc.getPage(p)
|
|
243
|
+
const content = await page.getTextContent()
|
|
244
|
+
pages.push(content.items.map(item => item.str).join(''))
|
|
245
|
+
}
|
|
246
|
+
const text = pages.join('\n').trim()
|
|
247
|
+
if (text) return text
|
|
248
|
+
} catch (_) {}
|
|
249
|
+
|
|
250
|
+
// Step 2: pdf-parse でも試みる(フォールバック)
|
|
237
251
|
if (typeof pdfParse === 'function') {
|
|
238
252
|
try {
|
|
239
253
|
const data = await pdfParse(buf)
|
|
@@ -242,7 +256,7 @@ export async function executeTool(name, args, opts = {}) {
|
|
|
242
256
|
} catch (_) {}
|
|
243
257
|
}
|
|
244
258
|
|
|
245
|
-
// Step
|
|
259
|
+
// Step 3: 画像PDFとしてVision APIでOCR(ネイティブ依存なし)
|
|
246
260
|
if (!opts.apiKey) return '(テキストが抽出できませんでした)'
|
|
247
261
|
const images = extractImagesFromPdf(buf)
|
|
248
262
|
if (images.length === 0) return '(テキストも画像も抽出できませんでした)'
|