@iola_adm/iola-cli 0.1.27 → 0.1.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -108,6 +108,7 @@ iola version --check
108
108
  - планы выполнения, traces, tasks, artifacts, snapshots и policy-профили;
109
109
  - экспорт отчетов в Excel/Word-совместимые файлы;
110
110
  - staged changes, импорт локальных CSV/JSON, индекс локальных документов, report packs, plugins и локальный MCP endpoint;
111
+ - чтение и индексирование `.docx`, `.xlsx`, `.pptx`, `.pdf`, `.md`, `.txt`, `.csv`, `.json`, `.html`;
111
112
  - cron-задачи, локальный daemon и RPC для автоматизаций;
112
113
  - контекстные файлы `IOLA.md` и `.iola/context.md`;
113
114
  - интеграция с публичным MCP-сервером Йошкар-Олы.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@iola_adm/iola-cli",
3
- "version": "0.1.27",
3
+ "version": "0.1.28",
4
4
  "description": "CLI и AI-агент для работы с открытыми данными городского округа Йошкар-Ола.",
5
5
  "license": "MIT",
6
6
  "homepage": "https://github.com/adm-iola/iola-cli#readme",
package/src/cli.js CHANGED
@@ -8,6 +8,7 @@ import readline from "node:readline/promises";
8
8
  import { stdin as input, stdout as output } from "node:process";
9
9
  import { DatabaseSync } from "node:sqlite";
10
10
  import { fileURLToPath } from "node:url";
11
+ import { inflateRawSync, inflateSync } from "node:zlib";
11
12
 
12
13
  const API_BASE_URL = process.env.IOLA_API_BASE_URL || "https://apiiola.yasg.ru/api/v1";
13
14
  const MCP_BASE_URL = process.env.IOLA_MCP_BASE_URL || "https://apiiola.yasg.ru";
@@ -17,7 +18,8 @@ const CONFIG_FILE = path.join(CONFIG_DIR, "config.json");
17
18
  const LAST_GOOD_CONFIG_FILE = path.join(CONFIG_DIR, "config.last-good.json");
18
19
  const SECRETS_FILE = path.join(CONFIG_DIR, "secrets.json");
19
20
  const DB_FILE = path.join(CONFIG_DIR, "iola.db");
20
- const DB_SCHEMA_VERSION = 6;
21
+ const DB_SCHEMA_VERSION = 7;
22
+ const INDEXABLE_EXTENSIONS = /\.(md|txt|csv|json|html|docx|xlsx|pptx|pdf)$/i;
21
23
  const LOCAL_TOOLS = ["search_local", "get_card", "export_data", "run_report", "save_view"];
22
24
  const FILE_TOOLS = ["files_tree", "files_read", "files_search", "files_write", "files_patch"];
23
25
  const ALL_LOCAL_TOOLS = [...LOCAL_TOOLS, ...FILE_TOOLS];
@@ -5833,7 +5835,7 @@ async function filesRead(target, options = {}) {
5833
5835
  if (!info.isFile()) throw new Error(`Это не файл: ${target}`);
5834
5836
  const maxBytes = Number(options.maxBytes || config.files?.maxReadBytes || 200000);
5835
5837
  if (info.size > maxBytes) throw new Error(`Файл слишком большой: ${info.size} байт. Лимит: ${maxBytes}`);
5836
- return readFile(resolved, "utf8");
5838
+ return extractReadableText(resolved);
5837
5839
  }
5838
5840
 
5839
5841
  async function filesSearch(query, options = {}) {
@@ -5882,6 +5884,150 @@ async function filesPatch(target, search, replace) {
5882
5884
  return { path: relative, replacements };
5883
5885
  }
5884
5886
 
5887
+ async function extractReadableText(file) {
5888
+ const ext = path.extname(file).toLocaleLowerCase("ru-RU");
5889
+ if (ext === ".docx") return extractDocxText(await readFile(file));
5890
+ if (ext === ".xlsx") return extractXlsxText(await readFile(file));
5891
+ if (ext === ".pptx") return extractPptxText(await readFile(file));
5892
+ if (ext === ".pdf") return extractPdfText(await readFile(file));
5893
+ return readFile(file, "utf8");
5894
+ }
5895
+
5896
+ function extractDocxText(buffer) {
5897
+ const entries = readZipEntries(buffer);
5898
+ const documentXml = entries.get("word/document.xml") || "";
5899
+ const footnotes = [...entries.entries()].filter(([name]) => name.startsWith("word/") && /footnotes|endnotes|comments/.test(name)).map(([, text]) => text).join("\n");
5900
+ return xmlToText(`${documentXml}\n${footnotes}`);
5901
+ }
5902
+
5903
+ function extractXlsxText(buffer) {
5904
+ const entries = readZipEntries(buffer);
5905
+ const sharedStrings = parseSharedStrings(entries.get("xl/sharedStrings.xml") || "");
5906
+ const chunks = [];
5907
+ for (const [name, xml] of entries.entries()) {
5908
+ if (!/^xl\/worksheets\/sheet\d+\.xml$/i.test(name)) continue;
5909
+ chunks.push(name);
5910
+ const resolved = xml.replace(/<c[^>]*t="s"[^>]*>[\s\S]*?<v>(\d+)<\/v>[\s\S]*?<\/c>/g, (_, index) => ` ${sharedStrings[Number(index)] || ""} `);
5911
+ chunks.push(xmlToText(resolved));
5912
+ }
5913
+ return normalizeExtractedText(chunks.join("\n"));
5914
+ }
5915
+
5916
+ function extractPptxText(buffer) {
5917
+ const entries = readZipEntries(buffer);
5918
+ const slides = [...entries.entries()]
5919
+ .filter(([name]) => /^ppt\/slides\/slide\d+\.xml$/i.test(name))
5920
+ .sort(([left], [right]) => left.localeCompare(right, undefined, { numeric: true }));
5921
+ return normalizeExtractedText(slides.map(([name, xml]) => `${name}\n${xmlToText(xml)}`).join("\n\n"));
5922
+ }
5923
+
5924
+ function extractPdfText(buffer) {
5925
+ const latin = buffer.toString("latin1");
5926
+ const chunks = [];
5927
+ const streamPattern = /<<(?:.|\r|\n)*?>>\s*stream\r?\n([\s\S]*?)\r?\nendstream/g;
5928
+ let match;
5929
+ while ((match = streamPattern.exec(latin))) {
5930
+ const dictionary = latin.slice(Math.max(0, match.index - 500), match.index + 500);
5931
+ let data = Buffer.from(match[1], "latin1");
5932
+ if (/FlateDecode/.test(dictionary)) {
5933
+ try {
5934
+ data = inflateSync(data);
5935
+ } catch {
5936
+ try {
5937
+ data = inflateRawSync(data);
5938
+ } catch {
5939
+ // Leave compressed stream unreadable.
5940
+ }
5941
+ }
5942
+ }
5943
+ chunks.push(extractPdfStrings(data.toString("latin1")));
5944
+ }
5945
+ chunks.push(extractPdfStrings(latin));
5946
+ return normalizeExtractedText(chunks.join("\n"));
5947
+ }
5948
+
5949
+ function extractPdfStrings(text) {
5950
+ const strings = [];
5951
+ for (const match of text.matchAll(/\(([^()\\]*(?:\\.[^()\\]*)*)\)\s*T[jJ]?/g)) {
5952
+ strings.push(unescapePdfString(match[1]));
5953
+ }
5954
+ for (const match of text.matchAll(/\[([\s\S]*?)\]\s*TJ/g)) {
5955
+ for (const item of match[1].matchAll(/\(([^()\\]*(?:\\.[^()\\]*)*)\)/g)) {
5956
+ strings.push(unescapePdfString(item[1]));
5957
+ }
5958
+ }
5959
+ return strings.join(" ");
5960
+ }
5961
+
5962
+ function unescapePdfString(value) {
5963
+ const unescaped = value
5964
+ .replace(/\\n/g, "\n")
5965
+ .replace(/\\r/g, "\r")
5966
+ .replace(/\\t/g, "\t")
5967
+ .replace(/\\([()\\])/g, "$1")
5968
+ .replace(/\\(\d{3})/g, (_, octal) => String.fromCharCode(parseInt(octal, 8)));
5969
+ return decodePossiblyUtf8(unescaped);
5970
+ }
5971
+
5972
+ function decodePossiblyUtf8(value) {
5973
+ const decoded = Buffer.from(value, "latin1").toString("utf8");
5974
+ return decoded.includes("\uFFFD") ? value : decoded;
5975
+ }
5976
+
5977
+ function readZipEntries(buffer) {
5978
+ const entries = new Map();
5979
+ let offset = 0;
5980
+ while (offset < buffer.length - 30) {
5981
+ const signature = buffer.readUInt32LE(offset);
5982
+ if (signature !== 0x04034b50) {
5983
+ offset += 1;
5984
+ continue;
5985
+ }
5986
+ const method = buffer.readUInt16LE(offset + 8);
5987
+ const compressedSize = buffer.readUInt32LE(offset + 18);
5988
+ const fileNameLength = buffer.readUInt16LE(offset + 26);
5989
+ const extraLength = buffer.readUInt16LE(offset + 28);
5990
+ const nameStart = offset + 30;
5991
+ const name = buffer.subarray(nameStart, nameStart + fileNameLength).toString("utf8");
5992
+ const dataStart = nameStart + fileNameLength + extraLength;
5993
+ const dataEnd = dataStart + compressedSize;
5994
+ const compressed = buffer.subarray(dataStart, dataEnd);
5995
+ try {
5996
+ const data = method === 8 ? inflateRawSync(compressed) : compressed;
5997
+ entries.set(name.replace(/\\/g, "/"), data.toString("utf8"));
5998
+ } catch {
5999
+ // Skip unreadable ZIP entry.
6000
+ }
6001
+ offset = dataEnd;
6002
+ }
6003
+ return entries;
6004
+ }
6005
+
6006
+ function parseSharedStrings(xml) {
6007
+ return [...xml.matchAll(/<si[\s\S]*?<\/si>/g)].map((match) => xmlToText(match[0]));
6008
+ }
6009
+
6010
+ function xmlToText(xml) {
6011
+ return normalizeExtractedText(String(xml)
6012
+ .replace(/<w:tab\/>/g, "\t")
6013
+ .replace(/<w:br\/>|<a:br\/>|<\/w:p>|<\/a:p>|<\/row>/g, "\n")
6014
+ .replace(/<[^>]+>/g, " ")
6015
+ .replace(/&quot;/g, "\"")
6016
+ .replace(/&apos;/g, "'")
6017
+ .replace(/&lt;/g, "<")
6018
+ .replace(/&gt;/g, ">")
6019
+ .replace(/&amp;/g, "&"));
6020
+ }
6021
+
6022
+ function normalizeExtractedText(text) {
6023
+ return String(text)
6024
+ .replace(/\u0000/g, "")
6025
+ .replace(/[ \t]+/g, " ")
6026
+ .replace(/\s*\n\s*/g, "\n")
6027
+ .replace(/\n{3,}/g, "\n\n")
6028
+ .trim();
6029
+ }
6030
+
5885
6031
  async function maybeConfirmFileOperation(operation, target, preview) {
5886
6032
  const config = await loadConfig();
5887
6033
  const approvals = config.files?.approvals || "on-write";
@@ -6157,7 +6303,7 @@ function saveCustomRecords(dataset, rows) {
6157
6303
  async function indexFolder(target, options = {}) {
6158
6304
  const rows = await filesTree(target, { depth: Number(options.depth || 5), limit: Number(options.limit || 1000) });
6159
6305
  let count = 0;
6160
- for (const row of rows.filter((item) => item.type === "file" && /\.(md|txt|csv|json|html)$/i.test(item.path))) {
6306
+ for (const row of rows.filter((item) => item.type === "file" && INDEXABLE_EXTENSIONS.test(item.path))) {
6161
6307
  try {
6162
6308
  const text = await filesRead(row.path, { maxBytes: 1_000_000 });
6163
6309
  saveIndexedDoc(row.path, path.basename(row.path), text);
@@ -36,6 +36,18 @@ iola files write report.md --text "Текст отчета"
36
36
  iola files patch README.md --search old --replace new
37
37
  ```
38
38
 
39
+ Чтение и индексирование поддерживает:
40
+
41
+ - `.docx`
42
+ - `.xlsx`
43
+ - `.pptx`
44
+ - `.pdf`
45
+ - `.md`
46
+ - `.txt`
47
+ - `.csv`
48
+ - `.json`
49
+ - `.html`
50
+
39
51
  AI/tool-agent:
40
52
 
41
53
  ```bash
@@ -43,4 +55,3 @@ iola ask "найди в текущей папке упоминания школ"
43
55
  ```
44
56
 
45
57
  По умолчанию файловый режим `locked`. Запись требует включения `workspace-write` или `full-access`.
46
-
@@ -27,6 +27,18 @@ iola index status
27
27
  iola index search "школа 29"
28
28
  ```
29
29
 
30
+ Поддерживаемые форматы для чтения и индекса:
31
+
32
+ - `.docx`
33
+ - `.xlsx`
34
+ - `.pptx`
35
+ - `.pdf`
36
+ - `.md`
37
+ - `.txt`
38
+ - `.csv`
39
+ - `.json`
40
+ - `.html`
41
+
30
42
  Пакеты отчетов:
31
43
 
32
44
  ```bash
@@ -51,4 +63,3 @@ iola mcp serve
51
63
  ```
52
64
 
53
65
  По умолчанию MCP запускается на порту `daemon.port + 1`.
54
-