npm - @cesarandreslopez/occ - Versions diffs - 0.1.0 → 0.1.2 - Mend

@cesarandreslopez/occ 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/README.md CHANGED Viewed

@@ -77,24 +77,24 @@ occ --ci docs/
 ## Example Output
 ```
--- Documents -----------------------------------------------------------
-Format        Files    Words    Pages    Extra          Size
---------------------------------------------------------------------
-Word             12   34,210      137    1,203 paras    1.2 MB
-PDF               8   22,540       64                   4.5 MB
-Excel             3                      12 sheets      890 KB
---------------------------------------------------------------------
-Total            23   56,750      201    1,203 paras    6.5 MB
---------------------------------------------------------------------
--- Code (via scc) ------------------------------------------------------
-Language         Files    Lines   Blanks  Comments     Code
---------------------------------------------------------------------
-JavaScript          15     2340      180       320     1840
-Python               8     1200       90       150      960
---------------------------------------------------------------------
-Total               23     3540      270       470     2800
---------------------------------------------------------------------
+-- Documents ---------------------------------------------------------------
+  Format    Files    Words    Pages                  Details      Size
+----------------------------------------------------------------------------
+  Word         12   34,210      137              1,203 paras    1.2 MB
+  PDF           8   22,540       64                             4.5 MB
+  Excel         3                                12 sheets      890 KB
+----------------------------------------------------------------------------
+  Total        23   56,750      201              1,203 paras    6.5 MB
+-- Code (via scc) ----------------------------------------------------------
+  Language    Files    Lines   Blanks  Comments     Code
+----------------------------------------------------------------------------
+  JavaScript     15     2340      180       320     1840
+  Python          8     1200       90       150      960
+----------------------------------------------------------------------------
+  Total          23     3540      270       470     2800
+Scanned 23 documents (56,750 words, 201 pages) in 120ms
 ```
 ## Supported Formats
@@ -156,7 +156,7 @@ Tools like `scc`, `cloc`, and `tokei` give you instant visibility into codebases
 ## How It Works
-OCC uses [fast-glob](https://github.com/mrmlnc/fast-glob) for file discovery, dispatches to format-specific parsers (mammoth for DOCX, pdf-parse for PDF, ExcelJS for XLSX, JSZip + officeparser for PPTX/ODF), aggregates metrics, and renders output via cli-table3. For code metrics, it shells out to a vendored [scc](https://github.com/boyter/scc) binary (auto-downloaded during `npm install`, with PATH fallback).
+OCC uses [fast-glob](https://github.com/mrmlnc/fast-glob) for file discovery, dispatches to format-specific parsers (mammoth for DOCX, pdf-parse for PDF, SheetJS for XLSX, JSZip + officeparser for PPTX/ODF), aggregates metrics, and renders output via cli-table3. For code metrics, it shells out to a vendored [scc](https://github.com/boyter/scc) binary (auto-downloaded during `npm install`, with PATH fallback).
 ## Contributing

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@cesarandreslopez/occ",
-  "version": "0.1.0",
+  "version": "0.1.2",
   "description": "Office Cloc and Count — scc-style summary tables for office documents",
   "type": "module",
   "bin": {
@@ -20,8 +20,17 @@
     "url": "https://github.com/cesarandreslopez/occ/issues"
   },
   "keywords": [
-    "office", "documents", "cloc", "count", "metrics",
-    "docx", "xlsx", "pptx", "pdf", "scc", "cli"
+    "office",
+    "documents",
+    "cloc",
+    "count",
+    "metrics",
+    "docx",
+    "xlsx",
+    "pptx",
+    "pdf",
+    "scc",
+    "cli"
   ],
   "files": [
     "bin/",
@@ -39,12 +48,12 @@
     "chalk": "^5.3.0",
     "cli-table3": "^0.6.5",
     "commander": "^12.1.0",
-    "exceljs": "^4.4.0",
     "fast-glob": "^3.3.2",
     "jszip": "^3.10.1",
     "mammoth": "^1.8.0",
     "officeparser": "^6.0.1",
-    "pdf-parse": "^1.1.1"
+    "pdf-parse": "^1.1.1",
+    "xlsx": "^0.18.5"
   },
   "devDependencies": {
     "docx": "^9.6.0"

package/scripts/postinstall.js CHANGED Viewed

@@ -3,7 +3,8 @@
 // Downloads the scc binary for the current platform during npm install.
 // Falls back gracefully — if download fails, occ will look for scc on PATH.
-import { createWriteStream, existsSync, mkdirSync, chmodSync, unlinkSync } from 'node:fs';
+import { createWriteStream, readFileSync, existsSync, mkdirSync, chmodSync, unlinkSync } from 'node:fs';
+import { createHash } from 'node:crypto';
 import { pipeline } from 'node:stream/promises';
 import { execFile } from 'node:child_process';
 import { promisify } from 'node:util';
@@ -27,6 +28,18 @@ const ARCH_MAP = {
   ia32: 'i386',
 };
+// SHA-256 checksums from the official scc v3.7.0 release
+const CHECKSUMS = {
+  'scc_Darwin_arm64.tar.gz':  '376cbae670be59ee64f398de20e0694ec434bf8a9b842642952b0ab0be5f3961',
+  'scc_Darwin_x86_64.tar.gz': 'c3f7457856b9169ccb3c1dd14198e67f730bee065f24d9051bf52cdc2a719ecc',
+  'scc_Linux_arm64.tar.gz':   'dcb05c6e993bb2d8d2da4765ff018f2e752325dd205a41698929c55e4123575d',
+  'scc_Linux_i386.tar.gz':    '1de91dae8a927ac2063a99b520d9a474644db6827fe6f85e3d8f87a1def3b14d',
+  'scc_Linux_x86_64.tar.gz':  '3d9d65b00ca874c2b29151abe7e1480736f5229edc3ce8e4b2791460cdfabf5a',
+  'scc_Windows_arm64.zip':    'fd114614c10382c9ed2e32d5455cc4b51960a9f71691c5c1ca42b31adea5b84d',
+  'scc_Windows_i386.zip':     '7b887022c37dc79e79ae51897030a6ff2515ab7b124e7b2aabcb0fba15412b05',
+  'scc_Windows_x86_64.zip':   '97abf9d55d4b79d3310536d576ccbdf5017aeb425780e850336120b6e67622e1',
+};
 function getAssetName() {
   const platform = PLATFORM_MAP[process.platform];
   const arch = ARCH_MAP[process.arch];
@@ -53,6 +66,19 @@ async function download(url, dest) {
   await pipeline(res.body, createWriteStream(dest));
 }
+function verifyChecksum(filePath, assetName) {
+  const expected = CHECKSUMS[assetName];
+  if (!expected) {
+    console.warn(`occ: No checksum available for ${assetName}, skipping verification`);
+    return;
+  }
+  const data = readFileSync(filePath);
+  const actual = createHash('sha256').update(data).digest('hex');
+  if (actual !== expected) {
+    throw new Error(`Checksum mismatch for ${assetName}\n  Expected: ${expected}\n  Got:      ${actual}`);
+  }
+}
 async function extract(archive, destDir) {
   if (archive.endsWith('.tar.gz')) {
     await execFileAsync('tar', ['xzf', archive, '-C', destDir]);
@@ -92,6 +118,7 @@ async function main() {
   try {
     console.log(`Downloading scc v${SCC_VERSION} for ${process.platform}-${process.arch}...`);
     await download(url, archivePath);
+    verifyChecksum(archivePath, assetName);
     console.log('Extracting...');
     await extract(archivePath, vendorDir);

package/src/cli.js CHANGED Viewed

@@ -1,20 +1,22 @@
-import { Command } from 'commander';
-import { writeFile } from 'node:fs/promises';
+import { Command, Option } from 'commander';
+import { readFile, writeFile } from 'node:fs/promises';
 import { findFiles } from './walker.js';
 import { parseFiles } from './parsers/index.js';
 import { aggregate } from './stats.js';
-import { formatDocumentTable, formatSccTable } from './output/tabular.js';
+import { formatDocumentTable, formatSccTable, formatSummaryLine } from './output/tabular.js';
 import { formatJson } from './output/json.js';
 import { checkScc, runScc } from './scc.js';
 import { createProgress } from './progress.js';
+const pkg = JSON.parse(await readFile(new URL('../package.json', import.meta.url), 'utf8'));
 export async function run(argv) {
   const program = new Command();
   program
     .name('occ')
     .description('Office Cloc and Count — scc-style summary tables for office documents')
-    .version('0.1.0')
+    .version(pkg.version)
     .argument('[directories...]', 'directories to scan', [])
     .option('-f, --by-file', 'show a row per file instead of grouped by type')
     .option('--format <type>', 'output format: tabular or json', 'tabular')
@@ -22,7 +24,7 @@ export async function run(argv) {
     .option('--exclude-ext <exts>', 'comma-separated extensions to exclude')
     .option('--exclude-dir <dirs>', 'directories to skip (comma-separated)', 'node_modules,.git')
     .option('--no-gitignore', 'disable .gitignore respect')
-    .option('--sort <col>', 'sort by: files, name, words, size', 'files')
+    .addOption(new Option('--sort <col>', 'sort by: files, name, words, size').choices(['files', 'name', 'words', 'size']).default('files'))
     .option('-o, --output <file>', 'write output to file')
     .option('--ci', 'ASCII-only output, no colors')
     .option('--large-file-limit <mb>', 'skip files over this size in MB', '50')
@@ -39,14 +41,25 @@ export async function run(argv) {
   await program.parseAsync(argv);
 }
+function validateLargeFileLimit(value) {
+  const n = parseFloat(value);
+  if (Number.isNaN(n) || n <= 0) {
+    throw new Error(`Invalid --large-file-limit value: "${value}" (must be a positive number)`);
+  }
+  return n;
+}
 async function execute(directories, opts) {
+  const startTime = Date.now();
   const excludeDirs = opts.excludeDir
     ? opts.excludeDir.split(',').map(d => d.trim())
     : ['node_modules', '.git'];
-  // Check scc availability (unless --no-code)
-  if (opts.code !== false) {
-    await checkScc();
+  const includeCode = opts.code !== false;
+  let sccBinary = null;
+  if (includeCode) {
+    sccBinary = await checkScc();
   }
   // Find and parse office documents
@@ -55,7 +68,7 @@ async function execute(directories, opts) {
     excludeExt: opts.excludeExt,
     excludeDir: excludeDirs,
     noGitignore: !opts.gitignore,
-    largeFileLimit: parseFloat(opts.largeFileLimit),
+    largeFileLimit: validateLargeFileLimit(opts.largeFileLimit),
   });
   const showProgress = opts.format !== 'json' && process.stderr.isTTY;
@@ -71,11 +84,10 @@ async function execute(directories, opts) {
     sort: opts.sort,
   });
-  // Run scc for code files
   let sccData = null;
-  if (opts.code !== false) {
+  if (includeCode) {
     if (showProgress) process.stderr.write('\rAnalyzing code with scc...');
-    sccData = await runScc(directories, {
+    sccData = await runScc(sccBinary, directories, {
       byFile: opts.byFile,
       excludeDir: excludeDirs,
       sort: opts.sort,
@@ -106,9 +118,9 @@ async function execute(directories, opts) {
         parts.push(formatSccTable(sccData, { ci: opts.ci, byFile: opts.byFile }));
       }
-      if (files.length === 0) {
-        parts.unshift('\nNo office documents found.');
-      }
+      const elapsed = Date.now() - startTime;
+      const summary = formatSummaryLine(stats, sccData, elapsed, { ci: opts.ci });
+      if (summary) parts.push(summary);
     }
     if (skipped.length > 0) {

package/src/output/json.js CHANGED Viewed

@@ -1,31 +1,35 @@
+import { METRIC_FIELDS, hasKey } from '../utils.js';
 export function formatJson(stats, sccData = null) {
+  const { columns } = stats;
+  const mapRow = (r) => {
+    const entry = {
+      type: r.fileType,
+      ...(r.fileName ? { name: r.fileName } : {}),
+      ...(r.filePath ? { path: r.filePath } : {}),
+      count: r.files,
+    };
+    for (const f of METRIC_FIELDS) {
+      if (r[hasKey(f)]) entry[f] = r[f] || 0;
+    }
+    entry.size = r.size;
+    return entry;
+  };
+  const mapTotals = (t) => {
+    const entry = { files: t.files };
+    for (const f of METRIC_FIELDS) {
+      if (columns[hasKey(f)]) entry[f] = t[f];
+    }
+    entry.size = t.size;
+    return entry;
+  };
   const output = {
     documents: {
-      files: stats.rows.map(r => ({
-        type: r.fileType,
-        ...(r.fileName ? { name: r.fileName } : {}),
-        ...(r.filePath ? { path: r.filePath } : {}),
-        count: r.files,
-        words: r.words || 0,
-        pages: r.pages || 0,
-        paragraphs: r.paragraphs || 0,
-        sheets: r.sheets || 0,
-        rows: r.rows || 0,
-        cells: r.cells || 0,
-        slides: r.slides || 0,
-        size: r.size,
-      })),
-      totals: {
-        files: stats.totals.files,
-        words: stats.totals.words,
-        pages: stats.totals.pages,
-        paragraphs: stats.totals.paragraphs,
-        sheets: stats.totals.sheets,
-        rows: stats.totals.rows,
-        cells: stats.totals.cells,
-        slides: stats.totals.slides,
-        size: stats.totals.size,
-      },
+      files: stats.rows.map(mapRow),
+      totals: mapTotals(stats.totals),
     },
   };

package/src/output/tabular.js CHANGED Viewed

@@ -6,25 +6,30 @@ export function formatDocumentTable(stats, options = {}) {
   const { ci = false } = options;
   const c = ci ? noColor : colorize;
-  const headers = buildHeaders(stats.columns, stats.mode === 'by-file', c);
+  const isByFile = stats.mode === 'by-file';
+  const headers = buildHeaders(stats.columns, isByFile, c);
+  const colAligns = buildColAligns(stats.columns, isByFile);
   const table = new Table({
     head: headers.map(h => h.label),
-    chars: ci ? asciiChars() : unicodeChars(),
+    chars: tableChars(ci),
     style: { head: [], border: [] },
+    colAligns,
   });
   for (const row of stats.rows) {
-    table.push(buildRow(row, stats.columns, stats.mode === 'by-file', c));
+    table.push(buildRow(row, stats.columns, isByFile, c));
   }
-  // Totals row
-  const isByFile = stats.mode === 'by-file';
   table.push(buildRow(stats.totals, stats.columns, isByFile, c, true));
+  const tableStr = addSeparators(table.toString(), ci ? '-' : '─');
+  const tableWidth = stripAnsi(tableStr.split('\n')[0]).length;
   const lines = [];
   lines.push('');
-  lines.push(c.header(`── Documents ${'─'.repeat(56)}`));
-  lines.push(table.toString());
+  lines.push(c.header(sectionHeader('Documents', tableWidth, ci)));
+  lines.push(tableStr);
   // Footnotes
   const hasEstimatedPages = stats.rows.some(r =>
@@ -52,7 +57,7 @@ export function formatSccTable(sccData, options = {}) {
       c.headerCell('Comments'),
       c.headerCell('Code'),
     ],
-    chars: ci ? asciiChars() : unicodeChars(),
+    chars: tableChars(ci),
     style: { head: [], border: [] },
     colAligns: ['left', 'right', 'right', 'right', 'right', 'right'],
   });
@@ -66,8 +71,8 @@ export function formatSccTable(sccData, options = {}) {
           c.type(file.Filename || file.Location || ''),
           formatNumber(1),
           c.number(formatNumber(file.Lines)),
-          formatNumber(file.Blank),
-          formatNumber(file.Comment),
+          c.number(formatNumber(file.Blank)),
+          c.number(formatNumber(file.Comment)),
           c.number(formatNumber(file.Code)),
         ]);
       }
@@ -76,8 +81,8 @@ export function formatSccTable(sccData, options = {}) {
         c.type(lang.Name),
         formatNumber(lang.Count),
         c.number(formatNumber(lang.Lines)),
-        formatNumber(lang.Blank),
-        formatNumber(lang.Comment),
+        c.number(formatNumber(lang.Blank)),
+        c.number(formatNumber(lang.Comment)),
         c.number(formatNumber(lang.Code)),
       ]);
     }
@@ -97,30 +102,117 @@ export function formatSccTable(sccData, options = {}) {
     c.total(formatNumber(totalCode)),
   ]);
+  const tableStr = addSeparators(table.toString(), ci ? '-' : '─');
+  const tableWidth = stripAnsi(tableStr.split('\n')[0]).length;
   const lines = [];
   lines.push('');
-  lines.push(c.header(`── Code (via scc) ${'─'.repeat(51)}`));
-  lines.push(table.toString());
+  lines.push(c.header(sectionHeader('Code (via scc)', tableWidth, ci)));
+  lines.push(tableStr);
   return lines.join('\n');
 }
+export function formatSummaryLine(stats, sccData, elapsed, options = {}) {
+  const { ci = false } = options;
+  const c = ci ? noColor : colorize;
+  const parts = [];
+  if (stats && stats.totals.files > 0) {
+    let docPart = `${stats.totals.files} document${stats.totals.files !== 1 ? 's' : ''}`;
+    const details = [];
+    if (stats.totals.words > 0) details.push(`${formatNumber(stats.totals.words)} word${stats.totals.words !== 1 ? 's' : ''}`);
+    if (stats.totals.pages > 0) details.push(`${formatNumber(stats.totals.pages)} page${stats.totals.pages !== 1 ? 's' : ''}`);
+    if (details.length > 0) docPart += ` (${details.join(', ')})`;
+    parts.push(docPart);
+  }
+  if (sccData && sccData.length > 0) {
+    const totalCode = sccData.reduce((sum, l) => sum + (l.Code || 0), 0);
+    parts.push(`${formatNumber(totalCode)} lines of code`);
+  }
+  if (parts.length === 0) return '';
+  const time = elapsed >= 1000
+    ? `${(elapsed / 1000).toFixed(1)}s`
+    : `${elapsed}ms`;
+  return '\n' + c.dim(`Scanned ${parts.join(', ')} in ${time}`) + '\n';
+}
+/**
+ * Post-process table string to insert separator lines after the header row
+ * and before the totals row (last data row).
+ *
+ * Table layout from cli-table3 (with empty mid chars):
+ *   line 0: top border
+ *   line 1: header row
+ *   lines 2..N-2: data rows
+ *   line N-1: totals row
+ *   line N: bottom border
+ */
+function addSeparators(tableStr, char) {
+  const lines = tableStr.split('\n');
+  if (lines.length < 4) return tableStr;
+  // Use header row width — top border is narrower due to single-char top-mid vs 2-char middle
+  const width = stripAnsi(lines[1]).length;
+  const sep = char.repeat(width);
+  const result = [];
+  // Skip lines[0] (top border) — section header already serves as delimiter
+  result.push(lines[1]); // header row
+  result.push(sep);      // header separator
+  // Data rows (everything except first 2 and last 2)
+  for (let i = 2; i < lines.length - 2; i++) {
+    result.push(lines[i]);
+  }
+  result.push(sep);                    // totals separator
+  result.push(lines[lines.length - 2]); // totals row
+  // Skip bottom border — totals row is the natural end
+  return result.join('\n');
+}
+function stripAnsi(str) {
+  return str.replace(/\x1b\[[0-9;]*m/g, '');
+}
+function sectionHeader(title, width, ci = false) {
+  const dash = ci ? '-' : '─';
+  const prefix = `${dash}${dash} ${title} `;
+  const padLen = Math.max(0, width - prefix.length);
+  return prefix + dash.repeat(padLen);
+}
+function hasExtraColumns(columns) {
+  return columns.hasParagraphs || columns.hasSheets || columns.hasSlides ||
+         columns.hasRows || columns.hasCells;
+}
 function buildHeaders(columns, byFile, c) {
   const headers = [];
   headers.push({ key: 'format', label: c.headerCell(byFile ? 'File' : 'Format') });
   if (!byFile) headers.push({ key: 'files', label: c.headerCell('Files') });
   if (columns.hasWords) headers.push({ key: 'words', label: c.headerCell('Words') });
   if (columns.hasPages) headers.push({ key: 'pages', label: c.headerCell('Pages') });
-  // Extra column for type-specific metrics
-  const hasExtra = columns.hasParagraphs || columns.hasSheets || columns.hasSlides ||
-                   columns.hasRows || columns.hasCells;
-  if (hasExtra) headers.push({ key: 'extra', label: c.headerCell('Extra') });
+  if (hasExtraColumns(columns)) headers.push({ key: 'extra', label: c.headerCell('Details') });
   headers.push({ key: 'size', label: c.headerCell('Size') });
   return headers;
 }
+function buildColAligns(columns, byFile) {
+  const aligns = ['left']; // Format/File
+  if (!byFile) aligns.push('right'); // Files
+  if (columns.hasWords) aligns.push('right');
+  if (columns.hasPages) aligns.push('right');
+  if (hasExtraColumns(columns)) aligns.push('right');
+  aligns.push('right'); // Size
+  return aligns;
+}
 function buildRow(row, columns, byFile, c, isTotal = false) {
   const fmt = isTotal ? c.total : (v) => v;
   const fmtType = isTotal ? c.total : c.type;
@@ -142,9 +234,7 @@ function buildRow(row, columns, byFile, c, isTotal = false) {
   if (columns.hasWords) cells.push(fmtNum(row.words ? formatNumber(row.words) : ''));
   if (columns.hasPages) cells.push(fmtNum(row.pages ? formatNumber(row.pages) : ''));
-  const hasExtra = columns.hasParagraphs || columns.hasSheets || columns.hasSlides ||
-                   columns.hasRows || columns.hasCells;
-  if (hasExtra) {
+  if (hasExtraColumns(columns)) {
     const parts = [];
     if (row.paragraphs) parts.push(`${formatNumber(row.paragraphs)} paras`);
     if (row.sheets) parts.push(`${formatNumber(row.sheets)} sheets`);
@@ -158,21 +248,13 @@ function buildRow(row, columns, byFile, c, isTotal = false) {
   return cells;
 }
-function unicodeChars() {
+function tableChars(ci) {
+  const ch = ci ? '-' : '─';
   return {
-    top: '─', 'top-mid': '─', 'top-left': '─', 'top-right': '─',
-    bottom: '─', 'bottom-mid': '─', 'bottom-left': '─', 'bottom-right': '─',
-    left: ' ', 'left-mid': '─', mid: '─', 'mid-mid': '─',
-    right: ' ', 'right-mid': '─', middle: '  ',
-  };
-}
-function asciiChars() {
-  return {
-    top: '-', 'top-mid': '-', 'top-left': '-', 'top-right': '-',
-    bottom: '-', 'bottom-mid': '-', 'bottom-left': '-', 'bottom-right': '-',
-    left: ' ', 'left-mid': '-', mid: '-', 'mid-mid': '-',
-    right: ' ', 'right-mid': '-', middle: '  ',
+    top: ch, 'top-mid': ch, 'top-left': ch, 'top-right': ch,
+    bottom: ch, 'bottom-mid': ch, 'bottom-left': ch, 'bottom-right': ch,
+    left: ' ', 'left-mid': '', mid: '', 'mid-mid': '',
+    right: ' ', 'right-mid': '', middle: '  ',
   };
 }
@@ -186,12 +268,5 @@ const colorize = {
   dim: (s) => chalk.dim(s),
 };
-const noColor = {
-  header: (s) => s,
-  headerCell: (s) => s,
-  type: (s) => s,
-  number: (s) => s,
-  total: (s) => s,
-  error: (s) => s,
-  dim: (s) => s,
-};
+const identity = (s) => s;
+const noColor = Object.fromEntries(Object.keys(colorize).map(k => [k, identity]));

package/src/parsers/docx.js CHANGED Viewed

@@ -10,14 +10,6 @@ export async function parseDocx(filePath) {
   return {
     fileType: 'Word',
-    metrics: {
-      words,
-      pages,
-      paragraphs,
-      sheets: null,
-      rows: null,
-      cells: null,
-      slides: null,
-    },
+    metrics: { words, pages, paragraphs },
   };
 }

package/src/parsers/index.js CHANGED Viewed

@@ -15,18 +15,22 @@ const PARSER_MAP = {
   odp: parseOdf,
 };
+function failureResult(filePath, size, ext) {
+  return {
+    filePath,
+    size,
+    success: false,
+    fileType: EXTENSION_TO_TYPE[ext] || ext.toUpperCase(),
+    metrics: null,
+  };
+}
 export async function parseFile(filePath, size) {
   const ext = getExtension(filePath);
   const parser = PARSER_MAP[ext];
   if (!parser) {
-    return {
-      filePath,
-      size,
-      success: false,
-      fileType: EXTENSION_TO_TYPE[ext] || ext.toUpperCase(),
-      metrics: null,
-    };
+    return failureResult(filePath, size, ext);
   }
   try {
@@ -39,13 +43,7 @@ export async function parseFile(filePath, size) {
       metrics: result.metrics,
     };
   } catch {
-    return {
-      filePath,
-      size,
-      success: false,
-      fileType: EXTENSION_TO_TYPE[ext] || ext.toUpperCase(),
-      metrics: null,
-    };
+    return failureResult(filePath, size, ext);
   }
 }

package/src/parsers/odf.js CHANGED Viewed

@@ -5,11 +5,12 @@ import { countWords, getExtension } from '../utils.js';
 export async function parseOdf(filePath) {
   const ext = getExtension(filePath);
-  const buffer = await readFile(filePath);
   if (ext === 'odt') return parseOdt(filePath);
-  if (ext === 'ods') return parseOds(filePath, buffer);
-  if (ext === 'odp') return parseOdp(filePath, buffer);
+  const buffer = await readFile(filePath);
+  if (ext === 'ods') return parseOds(buffer);
+  if (ext === 'odp') return parseOdp(buffer);
   throw new Error(`Unsupported ODF format: ${ext}`);
 }
@@ -22,19 +23,11 @@ async function parseOdt(filePath) {
   return {
     fileType: 'ODT',
-    metrics: {
-      words,
-      pages,
-      paragraphs,
-      sheets: null,
-      rows: null,
-      cells: null,
-      slides: null,
-    },
+    metrics: { words, pages, paragraphs },
   };
 }
-async function parseOds(filePath, buffer) {
+async function parseOds(buffer) {
   const zip = await JSZip.loadAsync(buffer);
   const contentXml = await zip.file('content.xml')?.async('text');
   if (!contentXml) throw new Error('No content.xml found in ODS');
@@ -42,44 +35,28 @@ async function parseOds(filePath, buffer) {
   const sheets = (contentXml.match(/<table:table /g) || []).length;
   const rows = (contentXml.match(/<table:table-row/g) || []).length;
-  // Use officeparser for cell text count
-  const text = await officeparser.parseOffice(filePath);
+  // Use officeparser with buffer to avoid re-reading from disk
+  const text = await officeparser.parseOffice(buffer);
   const cells = text.split(/\n/).filter(s => s.trim().length > 0).length;
   return {
     fileType: 'ODS',
-    metrics: {
-      words: null,
-      pages: null,
-      paragraphs: null,
-      sheets,
-      rows,
-      cells,
-      slides: null,
-    },
+    metrics: { sheets, rows, cells },
   };
 }
-async function parseOdp(filePath, buffer) {
+async function parseOdp(buffer) {
   const zip = await JSZip.loadAsync(buffer);
   const contentXml = await zip.file('content.xml')?.async('text');
   if (!contentXml) throw new Error('No content.xml found in ODP');
   const slides = (contentXml.match(/<draw:page /g) || []).length;
-  const text = await officeparser.parseOffice(filePath);
+  const text = await officeparser.parseOffice(buffer);
   const words = countWords(text);
   return {
     fileType: 'ODP',
-    metrics: {
-      words,
-      pages: null,
-      paragraphs: null,
-      sheets: null,
-      rows: null,
-      cells: null,
-      slides,
-    },
+    metrics: { words, slides },
   };
 }

package/src/parsers/pdf.js CHANGED Viewed

@@ -25,6 +25,7 @@ function beginSuppression() {
 function endSuppression() {
   if (--suppressionDepth === 0) {
     console.log = originalLog;
+    capturedWarnings.length = 0;
   }
 }
@@ -43,14 +44,6 @@ export async function parsePdf(filePath) {
   return {
     fileType: 'PDF',
-    metrics: {
-      words,
-      pages: data.numpages,
-      paragraphs: null,
-      sheets: null,
-      rows: null,
-      cells: null,
-      slides: null,
-    },
+    metrics: { words, pages: data.numpages },
   };
 }

package/src/parsers/pptx.js CHANGED Viewed

@@ -13,20 +13,12 @@ export async function parsePptx(filePath) {
   );
   const slides = slideFiles.length;
-  // Extract text via officeparser
-  const text = await officeparser.parseOffice(filePath);
+  // Extract text via officeparser (reuse buffer to avoid re-reading)
+  const text = await officeparser.parseOffice(buffer);
   const words = countWords(text);
   return {
     fileType: 'PowerPoint',
-    metrics: {
-      words,
-      pages: null,
-      paragraphs: null,
-      sheets: null,
-      rows: null,
-      cells: null,
-      slides,
-    },
+    metrics: { words, slides },
   };
 }

package/src/parsers/xlsx.js CHANGED Viewed

@@ -1,32 +1,23 @@
-import ExcelJS from 'exceljs';
+import XLSX from 'xlsx';
 export async function parseXlsx(filePath) {
-  const workbook = new ExcelJS.Workbook();
-  await workbook.xlsx.readFile(filePath);
+  const workbook = XLSX.readFile(filePath);
-  const sheets = workbook.worksheets.length;
+  const sheets = workbook.SheetNames.length;
   let rows = 0;
   let cells = 0;
-  for (const worksheet of workbook.worksheets) {
-    rows += worksheet.actualRowCount || 0;
-    worksheet.eachRow((row) => {
-      row.eachCell(() => {
-        cells++;
-      });
-    });
+  for (const name of workbook.SheetNames) {
+    const sheet = workbook.Sheets[name];
+    const ref = sheet['!ref'];
+    if (!ref) continue;
+    const range = XLSX.utils.decode_range(ref);
+    rows += range.e.r - range.s.r + 1;
+    cells += (range.e.r - range.s.r + 1) * (range.e.c - range.s.c + 1);
   }
   return {
     fileType: 'Excel',
-    metrics: {
-      words: null,
-      pages: null,
-      paragraphs: null,
-      sheets,
-      rows,
-      cells,
-      slides: null,
-    },
+    metrics: { sheets, rows, cells },
   };
 }

package/src/scc.js CHANGED Viewed

@@ -28,19 +28,18 @@ async function findScc() {
   }
 }
-let sccBinary = null;
 export async function checkScc() {
-  sccBinary = await findScc();
-  if (!sccBinary) {
+  const binary = await findScc();
+  if (!binary) {
     throw new Error(
       'scc is required but not found.\n' +
       'Run "npm install" to auto-download it, or install manually from https://github.com/boyter/scc'
     );
   }
+  return binary;
 }
-export async function runScc(directories, options = {}) {
+export async function runScc(sccBinary, directories, options = {}) {
   const {
     byFile = false,
     excludeDir = [],
@@ -49,7 +48,6 @@ export async function runScc(directories, options = {}) {
     noGitignore = false,
   } = options;
-  if (!sccBinary) sccBinary = await findScc();
   if (!sccBinary) return [];
   const args = ['--format', 'json'];

package/src/stats.js CHANGED Viewed

@@ -1,5 +1,7 @@
 import path from 'node:path';
-import { EXTENSION_TO_TYPE } from './utils.js';
+import { METRIC_FIELDS, hasKey } from './utils.js';
+const SUM_FIELDS = ['files', ...METRIC_FIELDS, 'size'];
 export function aggregate(results, options = {}) {
   const { byFile = false, sort = 'files' } = options;
@@ -16,25 +18,9 @@ function aggregateByType(results, sort) {
   for (const r of results) {
     const key = r.success ? r.fileType : 'Unreadable';
     if (!groups[key]) {
-      groups[key] = {
-        fileType: key,
-        files: 0,
-        words: 0,
-        pages: 0,
-        paragraphs: 0,
-        sheets: 0,
-        rows: 0,
-        cells: 0,
-        slides: 0,
-        size: 0,
-        hasWords: false,
-        hasPages: false,
-        hasParagraphs: false,
-        hasSheets: false,
-        hasRows: false,
-        hasCells: false,
-        hasSlides: false,
-      };
+      const g = { fileType: key, files: 0, size: 0 };
+      for (const f of METRIC_FIELDS) { g[f] = 0; g[hasKey(f)] = false; }
+      groups[key] = g;
     }
     const g = groups[key];
     g.files++;
@@ -42,54 +28,39 @@ function aggregateByType(results, sort) {
     if (r.success && r.metrics) {
       const m = r.metrics;
-      if (m.words != null) { g.words += m.words; g.hasWords = true; }
-      if (m.pages != null) { g.pages += m.pages; g.hasPages = true; }
-      if (m.paragraphs != null) { g.paragraphs += m.paragraphs; g.hasParagraphs = true; }
-      if (m.sheets != null) { g.sheets += m.sheets; g.hasSheets = true; }
-      if (m.rows != null) { g.rows += m.rows; g.hasRows = true; }
-      if (m.cells != null) { g.cells += m.cells; g.hasCells = true; }
-      if (m.slides != null) { g.slides += m.slides; g.hasSlides = true; }
+      for (const f of METRIC_FIELDS) {
+        if (m[f] != null) { g[f] += m[f]; g[hasKey(f)] = true; }
+      }
     }
   }
-  const rows = Object.values(groups);
-  sortRows(rows, sort);
-  const totals = computeTotals(rows);
-  const columns = detectColumns(rows);
-  return { rows, totals, columns, mode: 'grouped' };
+  return finalize(Object.values(groups), sort, 'grouped');
 }
 function aggregateByFile(results, sort) {
-  const rows = results.map(r => ({
-    fileType: r.success ? r.fileType : 'Unreadable',
-    fileName: path.basename(r.filePath),
-    filePath: r.filePath,
-    files: 1,
-    words: r.metrics?.words || 0,
-    pages: r.metrics?.pages || 0,
-    paragraphs: r.metrics?.paragraphs || 0,
-    sheets: r.metrics?.sheets || 0,
-    rows: r.metrics?.rows || 0,
-    cells: r.metrics?.cells || 0,
-    slides: r.metrics?.slides || 0,
-    size: r.size || 0,
-    hasWords: r.metrics?.words != null,
-    hasPages: r.metrics?.pages != null,
-    hasParagraphs: r.metrics?.paragraphs != null,
-    hasSheets: r.metrics?.sheets != null,
-    hasRows: r.metrics?.rows != null,
-    hasCells: r.metrics?.cells != null,
-    hasSlides: r.metrics?.slides != null,
-  }));
+  const rows = results.map(r => {
+    const row = {
+      fileType: r.success ? r.fileType : 'Unreadable',
+      fileName: path.basename(r.filePath),
+      filePath: r.filePath,
+      files: 1,
+      size: r.size || 0,
+    };
+    for (const f of METRIC_FIELDS) {
+      row[f] = r.metrics?.[f] || 0;
+      row[hasKey(f)] = r.metrics?.[f] != null;
+    }
+    return row;
+  });
-  sortRows(rows, sort);
+  return finalize(rows, sort, 'by-file');
+}
+function finalize(rows, sort, mode) {
+  sortRows(rows, sort);
   const totals = computeTotals(rows);
   const columns = detectColumns(rows);
-  return { rows, totals, columns, mode: 'by-file' };
+  return { rows, totals, columns, mode };
 }
 function sortRows(rows, sort) {
@@ -104,40 +75,18 @@ function sortRows(rows, sort) {
 }
 function computeTotals(rows) {
-  const totals = {
-    fileType: 'Total',
-    files: 0,
-    words: 0,
-    pages: 0,
-    paragraphs: 0,
-    sheets: 0,
-    rows: 0,
-    cells: 0,
-    slides: 0,
-    size: 0,
-  };
+  const totals = { fileType: 'Total' };
+  for (const f of SUM_FIELDS) totals[f] = 0;
   for (const r of rows) {
-    totals.files += r.files;
-    totals.words += r.words;
-    totals.pages += r.pages;
-    totals.paragraphs += r.paragraphs;
-    totals.sheets += r.sheets;
-    totals.rows += r.rows;
-    totals.cells += r.cells;
-    totals.slides += r.slides;
-    totals.size += r.size;
+    for (const f of SUM_FIELDS) totals[f] += r[f];
   }
   return totals;
 }
 function detectColumns(rows) {
-  return {
-    hasWords: rows.some(r => r.hasWords),
-    hasPages: rows.some(r => r.hasPages),
-    hasParagraphs: rows.some(r => r.hasParagraphs),
-    hasSheets: rows.some(r => r.hasSheets),
-    hasRows: rows.some(r => r.hasRows),
-    hasCells: rows.some(r => r.hasCells),
-    hasSlides: rows.some(r => r.hasSlides),
-  };
+  const columns = {};
+  for (const f of METRIC_FIELDS) {
+    columns[hasKey(f)] = rows.some(r => r[hasKey(f)]);
+  }
+  return columns;
 }

package/src/utils.js CHANGED Viewed

@@ -22,8 +22,6 @@ export function getExtension(filePath) {
   return path.extname(filePath).toLowerCase().replace('.', '');
 }
-export const OFFICE_EXTENSIONS = ['docx', 'xlsx', 'pptx', 'pdf', 'odt', 'ods', 'odp'];
 export const EXTENSION_TO_TYPE = {
   docx: 'Word',
   pdf: 'PDF',
@@ -33,3 +31,11 @@ export const EXTENSION_TO_TYPE = {
   ods: 'ODS',
   odp: 'ODP',
 };
+export const OFFICE_EXTENSIONS = Object.keys(EXTENSION_TO_TYPE);
+export const METRIC_FIELDS = ['words', 'pages', 'paragraphs', 'sheets', 'rows', 'cells', 'slides'];
+export function hasKey(field) {
+  return `has${field[0].toUpperCase()}${field.slice(1)}`;
+}

package/src/walker.js CHANGED Viewed

@@ -30,55 +30,40 @@ export async function findFiles(directories, options = {}) {
   const ignore = excludeDir.map(d => `**/${d}/**`);
-  const paths = await fg(pattern, {
-    cwd: undefined,
-    absolute: true,
-    ignore,
-    dot: false,
-    onlyFiles: true,
-    followSymbolicLinks: false,
-    ...(directories.length > 0 ? {} : { cwd: process.cwd() }),
-  });
-  // If directories specified, search each one
-  let allPaths = [];
-  if (directories.length > 0) {
-    for (const dir of directories) {
-      const found = await fg(pattern, {
-        cwd: dir,
-        absolute: true,
-        ignore,
-        dot: false,
-        onlyFiles: true,
-        followSymbolicLinks: false,
-      });
-      allPaths.push(...found);
-    }
-  } else {
-    allPaths = await fg(pattern, {
-      cwd: process.cwd(),
+  const dirs = directories.length > 0 ? directories : [process.cwd()];
+  const allPaths = [];
+  for (const dir of dirs) {
+    const found = await fg(pattern, {
+      cwd: dir,
       absolute: true,
       ignore,
       dot: false,
       onlyFiles: true,
       followSymbolicLinks: false,
     });
+    allPaths.push(...found);
   }
   const limitBytes = largeFileLimit * 1024 * 1024;
   const files = [];
   const skipped = [];
-  for (const p of allPaths) {
-    try {
-      const s = await stat(p);
-      if (s.size > limitBytes) {
-        skipped.push({ path: p, reason: `Exceeds ${largeFileLimit}MB limit`, size: s.size });
+  // Batch stat calls for better throughput on large directories
+  const BATCH_SIZE = 50;
+  for (let i = 0; i < allPaths.length; i += BATCH_SIZE) {
+    const batch = allPaths.slice(i, i + BATCH_SIZE);
+    const results = await Promise.allSettled(batch.map(p => stat(p)));
+    for (let j = 0; j < results.length; j++) {
+      const p = batch[j];
+      const r = results[j];
+      if (r.status === 'rejected') {
+        const err = r.reason;
+        skipped.push({ path: p, reason: err.code === 'EACCES' ? 'Permission denied' : err.message, size: 0 });
+      } else if (r.value.size > limitBytes) {
+        skipped.push({ path: p, reason: `Exceeds ${largeFileLimit}MB limit`, size: r.value.size });
       } else {
-        files.push({ path: p, size: s.size });
+        files.push({ path: p, size: r.value.size });
       }
-    } catch (err) {
-      skipped.push({ path: p, reason: err.code === 'EACCES' ? 'Permission denied' : err.message, size: 0 });
     }
   }