@make-u-free/migi 0.5.7 → 0.5.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/tools.js +67 -5
package/package.json
CHANGED
package/src/tools.js
CHANGED
|
@@ -128,6 +128,41 @@ export const teamsToolSchema = {
|
|
|
128
128
|
}
|
|
129
129
|
}
|
|
130
130
|
|
|
131
|
+
// ---- PDFから埋め込み画像を抽出(ネイティブ依存なし) ----
|
|
132
|
+
|
|
133
|
+
function extractImagesFromPdf(buf) {
|
|
134
|
+
const images = []
|
|
135
|
+
let i = 0
|
|
136
|
+
|
|
137
|
+
while (i < buf.length - 1) {
|
|
138
|
+
// JPEG: FF D8 で始まり FF D9 で終わる
|
|
139
|
+
if (buf[i] === 0xFF && buf[i + 1] === 0xD8) {
|
|
140
|
+
const eoiIdx = buf.indexOf(Buffer.from([0xFF, 0xD9]), i + 2)
|
|
141
|
+
if (eoiIdx === -1) break
|
|
142
|
+
images.push({ data: buf.slice(i, eoiIdx + 2), mime: 'image/jpeg' })
|
|
143
|
+
i = eoiIdx + 2
|
|
144
|
+
continue
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
// PNG: 89 50 4E 47 0D 0A 1A 0A で始まる
|
|
148
|
+
if (
|
|
149
|
+
i + 7 < buf.length &&
|
|
150
|
+
buf[i] === 0x89 && buf[i+1] === 0x50 && buf[i+2] === 0x4E && buf[i+3] === 0x47 &&
|
|
151
|
+
buf[i+4] === 0x0D && buf[i+5] === 0x0A && buf[i+6] === 0x1A && buf[i+7] === 0x0A
|
|
152
|
+
) {
|
|
153
|
+
const iend = buf.indexOf(Buffer.from([0x49, 0x45, 0x4E, 0x44, 0xAE, 0x42, 0x60, 0x82]), i + 8)
|
|
154
|
+
if (iend === -1) break
|
|
155
|
+
images.push({ data: buf.slice(i, iend + 8), mime: 'image/png' })
|
|
156
|
+
i = iend + 8
|
|
157
|
+
continue
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
i++
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
return images
|
|
164
|
+
}
|
|
165
|
+
|
|
131
166
|
// ---- diff 表示 ----
|
|
132
167
|
|
|
133
168
|
function showDiff(path, oldContent, newContent) {
|
|
@@ -184,13 +219,40 @@ export async function executeTool(name, args, opts = {}) {
|
|
|
184
219
|
|
|
185
220
|
// PDF
|
|
186
221
|
if (ext === '.pdf') {
|
|
222
|
+
const buf = readFileSync(args.path)
|
|
223
|
+
|
|
224
|
+
// Step 1: テキストPDFとして抽出を試みる
|
|
187
225
|
try {
|
|
188
|
-
const buf = readFileSync(args.path)
|
|
189
226
|
const data = await pdfParse(buf)
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
227
|
+
const text = data.text?.trim()
|
|
228
|
+
if (text) return text
|
|
229
|
+
} catch (_) {}
|
|
230
|
+
|
|
231
|
+
// Step 2: 画像PDFとしてVision APIでOCR(ネイティブ依存なし)
|
|
232
|
+
if (!opts.apiKey) return '(テキストが抽出できませんでした)'
|
|
233
|
+
const images = extractImagesFromPdf(buf)
|
|
234
|
+
if (images.length === 0) return '(テキストも画像も抽出できませんでした)'
|
|
235
|
+
|
|
236
|
+
const client = new OpenAI({
|
|
237
|
+
apiKey: opts.apiKey,
|
|
238
|
+
...(httpsAgent ? { httpAgent: httpsAgent } : {})
|
|
239
|
+
})
|
|
240
|
+
const targets = images.slice(0, 10) // 最大10ページ
|
|
241
|
+
const res = await client.chat.completions.create({
|
|
242
|
+
model: opts.model || 'gpt-4.1-2025-04-14',
|
|
243
|
+
messages: [{
|
|
244
|
+
role: 'user',
|
|
245
|
+
content: [
|
|
246
|
+
{ type: 'text', text: 'このPDFのページ画像です。すべてのテキストを正確に書き起こしてください。' },
|
|
247
|
+
...targets.map(img => ({
|
|
248
|
+
type: 'image_url',
|
|
249
|
+
image_url: { url: `data:${img.mime};base64,${img.data.toString('base64')}` }
|
|
250
|
+
}))
|
|
251
|
+
]
|
|
252
|
+
}],
|
|
253
|
+
max_tokens: 4000
|
|
254
|
+
})
|
|
255
|
+
return res.choices[0].message.content
|
|
194
256
|
}
|
|
195
257
|
|
|
196
258
|
// PowerPoint(PPTX)/ Word(DOCX)→ ZIPを展開してXMLからテキスト抽出
|