@make-u-free/migi 0.3.2 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +3 -2
  2. package/src/tools.js +30 -5
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@make-u-free/migi",
3
- "version": "0.3.2",
3
+ "version": "0.3.3",
4
4
  "description": "Your AI right-hand agent. Works anywhere, with any LLM API.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -10,11 +10,12 @@
10
10
  "start": "node bin/migi.js"
11
11
  },
12
12
  "dependencies": {
13
+ "adm-zip": "^0.5.16",
13
14
  "chalk": "^5.3.0",
14
15
  "dotenv": "^16.4.0",
15
16
  "glob": "^11.0.0",
16
- "officeparser": "^6.0.7",
17
17
  "openai": "^4.0.0",
18
+ "pdf-parse": "^2.4.5",
18
19
  "xlsx": "^0.18.5"
19
20
  },
20
21
  "engines": {
package/src/tools.js CHANGED
@@ -4,7 +4,8 @@ import { dirname, extname } from 'path'
4
4
  import { request } from 'https'
5
5
  import { glob } from 'glob'
6
6
  import xlsxPkg from 'xlsx'
7
- import officeParser from 'officeparser'
7
+ import pdfParse from 'pdf-parse'
8
+ import AdmZip from 'adm-zip'
8
9
  import OpenAI from 'openai'
9
10
  import { httpsAgent } from './tls.js'
10
11
  const { readFile: xlsxReadFile, utils: xlsxUtils } = xlsxPkg
@@ -143,11 +144,35 @@ export async function executeTool(name, args, opts = {}) {
143
144
  return result.join('\n\n')
144
145
  }
145
146
 
146
- // PDF / PowerPoint / Word
147
- if (OFFICE_EXTS.has(ext)) {
147
+ // PDF
148
+ if (ext === '.pdf') {
148
149
  try {
149
- const text = await officeParser.parseOfficeAsync(args.path)
150
- return text?.trim() || '(テキストが抽出できませんでした)'
150
+ const buf = readFileSync(args.path)
151
+ const data = await pdfParse(buf)
152
+ return data.text?.trim() || '(テキストが抽出できませんでした)'
153
+ } catch (err) {
154
+ return `エラー: PDFの解析に失敗しました: ${err.message}`
155
+ }
156
+ }
157
+
158
+ // PowerPoint(PPTX)/ Word(DOCX)→ ZIPを展開してXMLからテキスト抽出
159
+ if (['.pptx', '.ppt', '.docx', '.doc', '.odp', '.odt'].includes(ext)) {
160
+ try {
161
+ const zip = new AdmZip(args.path)
162
+ const entries = zip.getEntries()
163
+ const xmlTexts = []
164
+ for (const entry of entries) {
165
+ const name = entry.entryName
166
+ const isSlide = name.startsWith('ppt/slides/slide') && name.endsWith('.xml')
167
+ const isDoc = name === 'word/document.xml'
168
+ const isOdp = name === 'content.xml'
169
+ if (isSlide || isDoc || isOdp) {
170
+ const xml = entry.getData().toString('utf-8')
171
+ const text = xml.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim()
172
+ if (text) xmlTexts.push(text)
173
+ }
174
+ }
175
+ return xmlTexts.join('\n\n') || '(テキストが抽出できませんでした)'
151
176
  } catch (err) {
152
177
  return `エラー: ファイルの解析に失敗しました: ${err.message}`
153
178
  }