npm - @make-u-free/migi - Versions diffs - 0.3.2 → 0.3.4 - Mend

@make-u-free/migi 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/package.json +3 -2
package/src/tools.js +32 -5

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@make-u-free/migi",
-  "version": "0.3.2",
+  "version": "0.3.4",
   "description": "Your AI right-hand agent. Works anywhere, with any LLM API.",
   "type": "module",
   "bin": {
@@ -10,11 +10,12 @@
     "start": "node bin/migi.js"
   },
   "dependencies": {
+    "adm-zip": "^0.5.16",
     "chalk": "^5.3.0",
     "dotenv": "^16.4.0",
     "glob": "^11.0.0",
-    "officeparser": "^6.0.7",
     "openai": "^4.0.0",
+    "pdf-parse": "^2.4.5",
     "xlsx": "^0.18.5"
   },
   "engines": {

package/src/tools.js CHANGED Viewed

@@ -4,7 +4,10 @@ import { dirname, extname } from 'path'
 import { request } from 'https'
 import { glob } from 'glob'
 import xlsxPkg from 'xlsx'
-import officeParser from 'officeparser'
+import { createRequire } from 'module'
+const require = createRequire(import.meta.url)
+const pdfParse = require('pdf-parse')
+import AdmZip from 'adm-zip'
 import OpenAI from 'openai'
 import { httpsAgent } from './tls.js'
 const { readFile: xlsxReadFile, utils: xlsxUtils } = xlsxPkg
@@ -143,11 +146,35 @@ export async function executeTool(name, args, opts = {}) {
         return result.join('\n\n')
       }
-      // PDF / PowerPoint / Word
-      if (OFFICE_EXTS.has(ext)) {
+      // PDF
+      if (ext === '.pdf') {
         try {
-          const text = await officeParser.parseOfficeAsync(args.path)
-          return text?.trim() || '(テキストが抽出できませんでした)'
+          const buf = readFileSync(args.path)
+          const data = await pdfParse(buf)
+          return data.text?.trim() || '(テキストが抽出できませんでした)'
+        } catch (err) {
+          return `エラー: PDFの解析に失敗しました: ${err.message}`
+        }
+      }
+      // PowerPoint（PPTX）/ Word（DOCX）→ ZIPを展開してXMLからテキスト抽出
+      if (['.pptx', '.ppt', '.docx', '.doc', '.odp', '.odt'].includes(ext)) {
+        try {
+          const zip = new AdmZip(args.path)
+          const entries = zip.getEntries()
+          const xmlTexts = []
+          for (const entry of entries) {
+            const name = entry.entryName
+            const isSlide = name.startsWith('ppt/slides/slide') && name.endsWith('.xml')
+            const isDoc = name === 'word/document.xml'
+            const isOdp = name === 'content.xml'
+            if (isSlide || isDoc || isOdp) {
+              const xml = entry.getData().toString('utf-8')
+              const text = xml.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim()
+              if (text) xmlTexts.push(text)
+            }
+          }
+          return xmlTexts.join('\n\n') || '(テキストが抽出できませんでした)'
         } catch (err) {
           return `エラー: ファイルの解析に失敗しました: ${err.message}`
         }