@make-u-free/migi 0.3.2 → 0.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +3 -2
  2. package/src/tools.js +32 -5
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@make-u-free/migi",
3
- "version": "0.3.2",
3
+ "version": "0.3.4",
4
4
  "description": "Your AI right-hand agent. Works anywhere, with any LLM API.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -10,11 +10,12 @@
10
10
  "start": "node bin/migi.js"
11
11
  },
12
12
  "dependencies": {
13
+ "adm-zip": "^0.5.16",
13
14
  "chalk": "^5.3.0",
14
15
  "dotenv": "^16.4.0",
15
16
  "glob": "^11.0.0",
16
- "officeparser": "^6.0.7",
17
17
  "openai": "^4.0.0",
18
+ "pdf-parse": "^2.4.5",
18
19
  "xlsx": "^0.18.5"
19
20
  },
20
21
  "engines": {
package/src/tools.js CHANGED
@@ -4,7 +4,10 @@ import { dirname, extname } from 'path'
4
4
  import { request } from 'https'
5
5
  import { glob } from 'glob'
6
6
  import xlsxPkg from 'xlsx'
7
- import officeParser from 'officeparser'
7
+ import { createRequire } from 'module'
8
+ const require = createRequire(import.meta.url)
9
+ const pdfParse = require('pdf-parse')
10
+ import AdmZip from 'adm-zip'
8
11
  import OpenAI from 'openai'
9
12
  import { httpsAgent } from './tls.js'
10
13
  const { readFile: xlsxReadFile, utils: xlsxUtils } = xlsxPkg
@@ -143,11 +146,35 @@ export async function executeTool(name, args, opts = {}) {
143
146
  return result.join('\n\n')
144
147
  }
145
148
 
146
- // PDF / PowerPoint / Word
147
- if (OFFICE_EXTS.has(ext)) {
149
+ // PDF
150
+ if (ext === '.pdf') {
148
151
  try {
149
- const text = await officeParser.parseOfficeAsync(args.path)
150
- return text?.trim() || '(テキストが抽出できませんでした)'
152
+ const buf = readFileSync(args.path)
153
+ const data = await pdfParse(buf)
154
+ return data.text?.trim() || '(テキストが抽出できませんでした)'
155
+ } catch (err) {
156
+ return `エラー: PDFの解析に失敗しました: ${err.message}`
157
+ }
158
+ }
159
+
160
+ // PowerPoint(PPTX)/ Word(DOCX)→ ZIPを展開してXMLからテキスト抽出
161
+ if (['.pptx', '.ppt', '.docx', '.doc', '.odp', '.odt'].includes(ext)) {
162
+ try {
163
+ const zip = new AdmZip(args.path)
164
+ const entries = zip.getEntries()
165
+ const xmlTexts = []
166
+ for (const entry of entries) {
167
+ const name = entry.entryName
168
+ const isSlide = name.startsWith('ppt/slides/slide') && name.endsWith('.xml')
169
+ const isDoc = name === 'word/document.xml'
170
+ const isOdp = name === 'content.xml'
171
+ if (isSlide || isDoc || isOdp) {
172
+ const xml = entry.getData().toString('utf-8')
173
+ const text = xml.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim()
174
+ if (text) xmlTexts.push(text)
175
+ }
176
+ }
177
+ return xmlTexts.join('\n\n') || '(テキストが抽出できませんでした)'
151
178
  } catch (err) {
152
179
  return `エラー: ファイルの解析に失敗しました: ${err.message}`
153
180
  }