@cesarandreslopez/occ 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +19 -19
- package/package.json +14 -5
- package/scripts/postinstall.js +28 -1
- package/src/cli.js +27 -15
- package/src/output/json.js +29 -25
- package/src/output/tabular.js +121 -46
- package/src/parsers/docx.js +1 -9
- package/src/parsers/index.js +12 -14
- package/src/parsers/odf.js +12 -35
- package/src/parsers/pdf.js +2 -9
- package/src/parsers/pptx.js +3 -11
- package/src/parsers/xlsx.js +11 -20
- package/src/scc.js +4 -6
- package/src/stats.js +37 -88
- package/src/utils.js +8 -2
- package/src/walker.js +20 -35
package/README.md
CHANGED
|
@@ -77,24 +77,24 @@ occ --ci docs/
|
|
|
77
77
|
## Example Output
|
|
78
78
|
|
|
79
79
|
```
|
|
80
|
-
-- Documents
|
|
81
|
-
Format
|
|
82
|
-
|
|
83
|
-
Word
|
|
84
|
-
PDF
|
|
85
|
-
Excel
|
|
86
|
-
|
|
87
|
-
Total
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
80
|
+
-- Documents ---------------------------------------------------------------
|
|
81
|
+
Format Files Words Pages Details Size
|
|
82
|
+
----------------------------------------------------------------------------
|
|
83
|
+
Word 12 34,210 137 1,203 paras 1.2 MB
|
|
84
|
+
PDF 8 22,540 64 4.5 MB
|
|
85
|
+
Excel 3 12 sheets 890 KB
|
|
86
|
+
----------------------------------------------------------------------------
|
|
87
|
+
Total 23 56,750 201 1,203 paras 6.5 MB
|
|
88
|
+
|
|
89
|
+
-- Code (via scc) ----------------------------------------------------------
|
|
90
|
+
Language Files Lines Blanks Comments Code
|
|
91
|
+
----------------------------------------------------------------------------
|
|
92
|
+
JavaScript 15 2340 180 320 1840
|
|
93
|
+
Python 8 1200 90 150 960
|
|
94
|
+
----------------------------------------------------------------------------
|
|
95
|
+
Total 23 3540 270 470 2800
|
|
96
|
+
|
|
97
|
+
Scanned 23 documents (56,750 words, 201 pages) in 120ms
|
|
98
98
|
```
|
|
99
99
|
|
|
100
100
|
## Supported Formats
|
|
@@ -156,7 +156,7 @@ Tools like `scc`, `cloc`, and `tokei` give you instant visibility into codebases
|
|
|
156
156
|
|
|
157
157
|
## How It Works
|
|
158
158
|
|
|
159
|
-
OCC uses [fast-glob](https://github.com/mrmlnc/fast-glob) for file discovery, dispatches to format-specific parsers (mammoth for DOCX, pdf-parse for PDF,
|
|
159
|
+
OCC uses [fast-glob](https://github.com/mrmlnc/fast-glob) for file discovery, dispatches to format-specific parsers (mammoth for DOCX, pdf-parse for PDF, SheetJS for XLSX, JSZip + officeparser for PPTX/ODF), aggregates metrics, and renders output via cli-table3. For code metrics, it shells out to a vendored [scc](https://github.com/boyter/scc) binary (auto-downloaded during `npm install`, with PATH fallback).
|
|
160
160
|
|
|
161
161
|
## Contributing
|
|
162
162
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@cesarandreslopez/occ",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.2",
|
|
4
4
|
"description": "Office Cloc and Count — scc-style summary tables for office documents",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -20,8 +20,17 @@
|
|
|
20
20
|
"url": "https://github.com/cesarandreslopez/occ/issues"
|
|
21
21
|
},
|
|
22
22
|
"keywords": [
|
|
23
|
-
"office",
|
|
24
|
-
"
|
|
23
|
+
"office",
|
|
24
|
+
"documents",
|
|
25
|
+
"cloc",
|
|
26
|
+
"count",
|
|
27
|
+
"metrics",
|
|
28
|
+
"docx",
|
|
29
|
+
"xlsx",
|
|
30
|
+
"pptx",
|
|
31
|
+
"pdf",
|
|
32
|
+
"scc",
|
|
33
|
+
"cli"
|
|
25
34
|
],
|
|
26
35
|
"files": [
|
|
27
36
|
"bin/",
|
|
@@ -39,12 +48,12 @@
|
|
|
39
48
|
"chalk": "^5.3.0",
|
|
40
49
|
"cli-table3": "^0.6.5",
|
|
41
50
|
"commander": "^12.1.0",
|
|
42
|
-
"exceljs": "^4.4.0",
|
|
43
51
|
"fast-glob": "^3.3.2",
|
|
44
52
|
"jszip": "^3.10.1",
|
|
45
53
|
"mammoth": "^1.8.0",
|
|
46
54
|
"officeparser": "^6.0.1",
|
|
47
|
-
"pdf-parse": "^1.1.1"
|
|
55
|
+
"pdf-parse": "^1.1.1",
|
|
56
|
+
"xlsx": "^0.18.5"
|
|
48
57
|
},
|
|
49
58
|
"devDependencies": {
|
|
50
59
|
"docx": "^9.6.0"
|
package/scripts/postinstall.js
CHANGED
|
@@ -3,7 +3,8 @@
|
|
|
3
3
|
// Downloads the scc binary for the current platform during npm install.
|
|
4
4
|
// Falls back gracefully — if download fails, occ will look for scc on PATH.
|
|
5
5
|
|
|
6
|
-
import { createWriteStream, existsSync, mkdirSync, chmodSync, unlinkSync } from 'node:fs';
|
|
6
|
+
import { createWriteStream, readFileSync, existsSync, mkdirSync, chmodSync, unlinkSync } from 'node:fs';
|
|
7
|
+
import { createHash } from 'node:crypto';
|
|
7
8
|
import { pipeline } from 'node:stream/promises';
|
|
8
9
|
import { execFile } from 'node:child_process';
|
|
9
10
|
import { promisify } from 'node:util';
|
|
@@ -27,6 +28,18 @@ const ARCH_MAP = {
|
|
|
27
28
|
ia32: 'i386',
|
|
28
29
|
};
|
|
29
30
|
|
|
31
|
+
// SHA-256 checksums from the official scc v3.7.0 release
|
|
32
|
+
const CHECKSUMS = {
|
|
33
|
+
'scc_Darwin_arm64.tar.gz': '376cbae670be59ee64f398de20e0694ec434bf8a9b842642952b0ab0be5f3961',
|
|
34
|
+
'scc_Darwin_x86_64.tar.gz': 'c3f7457856b9169ccb3c1dd14198e67f730bee065f24d9051bf52cdc2a719ecc',
|
|
35
|
+
'scc_Linux_arm64.tar.gz': 'dcb05c6e993bb2d8d2da4765ff018f2e752325dd205a41698929c55e4123575d',
|
|
36
|
+
'scc_Linux_i386.tar.gz': '1de91dae8a927ac2063a99b520d9a474644db6827fe6f85e3d8f87a1def3b14d',
|
|
37
|
+
'scc_Linux_x86_64.tar.gz': '3d9d65b00ca874c2b29151abe7e1480736f5229edc3ce8e4b2791460cdfabf5a',
|
|
38
|
+
'scc_Windows_arm64.zip': 'fd114614c10382c9ed2e32d5455cc4b51960a9f71691c5c1ca42b31adea5b84d',
|
|
39
|
+
'scc_Windows_i386.zip': '7b887022c37dc79e79ae51897030a6ff2515ab7b124e7b2aabcb0fba15412b05',
|
|
40
|
+
'scc_Windows_x86_64.zip': '97abf9d55d4b79d3310536d576ccbdf5017aeb425780e850336120b6e67622e1',
|
|
41
|
+
};
|
|
42
|
+
|
|
30
43
|
function getAssetName() {
|
|
31
44
|
const platform = PLATFORM_MAP[process.platform];
|
|
32
45
|
const arch = ARCH_MAP[process.arch];
|
|
@@ -53,6 +66,19 @@ async function download(url, dest) {
|
|
|
53
66
|
await pipeline(res.body, createWriteStream(dest));
|
|
54
67
|
}
|
|
55
68
|
|
|
69
|
+
function verifyChecksum(filePath, assetName) {
|
|
70
|
+
const expected = CHECKSUMS[assetName];
|
|
71
|
+
if (!expected) {
|
|
72
|
+
console.warn(`occ: No checksum available for ${assetName}, skipping verification`);
|
|
73
|
+
return;
|
|
74
|
+
}
|
|
75
|
+
const data = readFileSync(filePath);
|
|
76
|
+
const actual = createHash('sha256').update(data).digest('hex');
|
|
77
|
+
if (actual !== expected) {
|
|
78
|
+
throw new Error(`Checksum mismatch for ${assetName}\n Expected: ${expected}\n Got: ${actual}`);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
56
82
|
async function extract(archive, destDir) {
|
|
57
83
|
if (archive.endsWith('.tar.gz')) {
|
|
58
84
|
await execFileAsync('tar', ['xzf', archive, '-C', destDir]);
|
|
@@ -92,6 +118,7 @@ async function main() {
|
|
|
92
118
|
try {
|
|
93
119
|
console.log(`Downloading scc v${SCC_VERSION} for ${process.platform}-${process.arch}...`);
|
|
94
120
|
await download(url, archivePath);
|
|
121
|
+
verifyChecksum(archivePath, assetName);
|
|
95
122
|
|
|
96
123
|
console.log('Extracting...');
|
|
97
124
|
await extract(archivePath, vendorDir);
|
package/src/cli.js
CHANGED
|
@@ -1,20 +1,22 @@
|
|
|
1
|
-
import { Command } from 'commander';
|
|
2
|
-
import { writeFile } from 'node:fs/promises';
|
|
1
|
+
import { Command, Option } from 'commander';
|
|
2
|
+
import { readFile, writeFile } from 'node:fs/promises';
|
|
3
3
|
import { findFiles } from './walker.js';
|
|
4
4
|
import { parseFiles } from './parsers/index.js';
|
|
5
5
|
import { aggregate } from './stats.js';
|
|
6
|
-
import { formatDocumentTable, formatSccTable } from './output/tabular.js';
|
|
6
|
+
import { formatDocumentTable, formatSccTable, formatSummaryLine } from './output/tabular.js';
|
|
7
7
|
import { formatJson } from './output/json.js';
|
|
8
8
|
import { checkScc, runScc } from './scc.js';
|
|
9
9
|
import { createProgress } from './progress.js';
|
|
10
10
|
|
|
11
|
+
const pkg = JSON.parse(await readFile(new URL('../package.json', import.meta.url), 'utf8'));
|
|
12
|
+
|
|
11
13
|
export async function run(argv) {
|
|
12
14
|
const program = new Command();
|
|
13
15
|
|
|
14
16
|
program
|
|
15
17
|
.name('occ')
|
|
16
18
|
.description('Office Cloc and Count — scc-style summary tables for office documents')
|
|
17
|
-
.version(
|
|
19
|
+
.version(pkg.version)
|
|
18
20
|
.argument('[directories...]', 'directories to scan', [])
|
|
19
21
|
.option('-f, --by-file', 'show a row per file instead of grouped by type')
|
|
20
22
|
.option('--format <type>', 'output format: tabular or json', 'tabular')
|
|
@@ -22,7 +24,7 @@ export async function run(argv) {
|
|
|
22
24
|
.option('--exclude-ext <exts>', 'comma-separated extensions to exclude')
|
|
23
25
|
.option('--exclude-dir <dirs>', 'directories to skip (comma-separated)', 'node_modules,.git')
|
|
24
26
|
.option('--no-gitignore', 'disable .gitignore respect')
|
|
25
|
-
.
|
|
27
|
+
.addOption(new Option('--sort <col>', 'sort by: files, name, words, size').choices(['files', 'name', 'words', 'size']).default('files'))
|
|
26
28
|
.option('-o, --output <file>', 'write output to file')
|
|
27
29
|
.option('--ci', 'ASCII-only output, no colors')
|
|
28
30
|
.option('--large-file-limit <mb>', 'skip files over this size in MB', '50')
|
|
@@ -39,14 +41,25 @@ export async function run(argv) {
|
|
|
39
41
|
await program.parseAsync(argv);
|
|
40
42
|
}
|
|
41
43
|
|
|
44
|
+
function validateLargeFileLimit(value) {
|
|
45
|
+
const n = parseFloat(value);
|
|
46
|
+
if (Number.isNaN(n) || n <= 0) {
|
|
47
|
+
throw new Error(`Invalid --large-file-limit value: "${value}" (must be a positive number)`);
|
|
48
|
+
}
|
|
49
|
+
return n;
|
|
50
|
+
}
|
|
51
|
+
|
|
42
52
|
async function execute(directories, opts) {
|
|
53
|
+
const startTime = Date.now();
|
|
43
54
|
const excludeDirs = opts.excludeDir
|
|
44
55
|
? opts.excludeDir.split(',').map(d => d.trim())
|
|
45
56
|
: ['node_modules', '.git'];
|
|
46
57
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
58
|
+
const includeCode = opts.code !== false;
|
|
59
|
+
|
|
60
|
+
let sccBinary = null;
|
|
61
|
+
if (includeCode) {
|
|
62
|
+
sccBinary = await checkScc();
|
|
50
63
|
}
|
|
51
64
|
|
|
52
65
|
// Find and parse office documents
|
|
@@ -55,7 +68,7 @@ async function execute(directories, opts) {
|
|
|
55
68
|
excludeExt: opts.excludeExt,
|
|
56
69
|
excludeDir: excludeDirs,
|
|
57
70
|
noGitignore: !opts.gitignore,
|
|
58
|
-
largeFileLimit:
|
|
71
|
+
largeFileLimit: validateLargeFileLimit(opts.largeFileLimit),
|
|
59
72
|
});
|
|
60
73
|
|
|
61
74
|
const showProgress = opts.format !== 'json' && process.stderr.isTTY;
|
|
@@ -71,11 +84,10 @@ async function execute(directories, opts) {
|
|
|
71
84
|
sort: opts.sort,
|
|
72
85
|
});
|
|
73
86
|
|
|
74
|
-
// Run scc for code files
|
|
75
87
|
let sccData = null;
|
|
76
|
-
if (
|
|
88
|
+
if (includeCode) {
|
|
77
89
|
if (showProgress) process.stderr.write('\rAnalyzing code with scc...');
|
|
78
|
-
sccData = await runScc(directories, {
|
|
90
|
+
sccData = await runScc(sccBinary, directories, {
|
|
79
91
|
byFile: opts.byFile,
|
|
80
92
|
excludeDir: excludeDirs,
|
|
81
93
|
sort: opts.sort,
|
|
@@ -106,9 +118,9 @@ async function execute(directories, opts) {
|
|
|
106
118
|
parts.push(formatSccTable(sccData, { ci: opts.ci, byFile: opts.byFile }));
|
|
107
119
|
}
|
|
108
120
|
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
121
|
+
const elapsed = Date.now() - startTime;
|
|
122
|
+
const summary = formatSummaryLine(stats, sccData, elapsed, { ci: opts.ci });
|
|
123
|
+
if (summary) parts.push(summary);
|
|
112
124
|
}
|
|
113
125
|
|
|
114
126
|
if (skipped.length > 0) {
|
package/src/output/json.js
CHANGED
|
@@ -1,31 +1,35 @@
|
|
|
1
|
+
import { METRIC_FIELDS, hasKey } from '../utils.js';
|
|
2
|
+
|
|
1
3
|
export function formatJson(stats, sccData = null) {
|
|
4
|
+
const { columns } = stats;
|
|
5
|
+
|
|
6
|
+
const mapRow = (r) => {
|
|
7
|
+
const entry = {
|
|
8
|
+
type: r.fileType,
|
|
9
|
+
...(r.fileName ? { name: r.fileName } : {}),
|
|
10
|
+
...(r.filePath ? { path: r.filePath } : {}),
|
|
11
|
+
count: r.files,
|
|
12
|
+
};
|
|
13
|
+
for (const f of METRIC_FIELDS) {
|
|
14
|
+
if (r[hasKey(f)]) entry[f] = r[f] || 0;
|
|
15
|
+
}
|
|
16
|
+
entry.size = r.size;
|
|
17
|
+
return entry;
|
|
18
|
+
};
|
|
19
|
+
|
|
20
|
+
const mapTotals = (t) => {
|
|
21
|
+
const entry = { files: t.files };
|
|
22
|
+
for (const f of METRIC_FIELDS) {
|
|
23
|
+
if (columns[hasKey(f)]) entry[f] = t[f];
|
|
24
|
+
}
|
|
25
|
+
entry.size = t.size;
|
|
26
|
+
return entry;
|
|
27
|
+
};
|
|
28
|
+
|
|
2
29
|
const output = {
|
|
3
30
|
documents: {
|
|
4
|
-
files: stats.rows.map(
|
|
5
|
-
|
|
6
|
-
...(r.fileName ? { name: r.fileName } : {}),
|
|
7
|
-
...(r.filePath ? { path: r.filePath } : {}),
|
|
8
|
-
count: r.files,
|
|
9
|
-
words: r.words || 0,
|
|
10
|
-
pages: r.pages || 0,
|
|
11
|
-
paragraphs: r.paragraphs || 0,
|
|
12
|
-
sheets: r.sheets || 0,
|
|
13
|
-
rows: r.rows || 0,
|
|
14
|
-
cells: r.cells || 0,
|
|
15
|
-
slides: r.slides || 0,
|
|
16
|
-
size: r.size,
|
|
17
|
-
})),
|
|
18
|
-
totals: {
|
|
19
|
-
files: stats.totals.files,
|
|
20
|
-
words: stats.totals.words,
|
|
21
|
-
pages: stats.totals.pages,
|
|
22
|
-
paragraphs: stats.totals.paragraphs,
|
|
23
|
-
sheets: stats.totals.sheets,
|
|
24
|
-
rows: stats.totals.rows,
|
|
25
|
-
cells: stats.totals.cells,
|
|
26
|
-
slides: stats.totals.slides,
|
|
27
|
-
size: stats.totals.size,
|
|
28
|
-
},
|
|
31
|
+
files: stats.rows.map(mapRow),
|
|
32
|
+
totals: mapTotals(stats.totals),
|
|
29
33
|
},
|
|
30
34
|
};
|
|
31
35
|
|
package/src/output/tabular.js
CHANGED
|
@@ -6,25 +6,30 @@ export function formatDocumentTable(stats, options = {}) {
|
|
|
6
6
|
const { ci = false } = options;
|
|
7
7
|
const c = ci ? noColor : colorize;
|
|
8
8
|
|
|
9
|
-
const
|
|
9
|
+
const isByFile = stats.mode === 'by-file';
|
|
10
|
+
const headers = buildHeaders(stats.columns, isByFile, c);
|
|
11
|
+
const colAligns = buildColAligns(stats.columns, isByFile);
|
|
10
12
|
const table = new Table({
|
|
11
13
|
head: headers.map(h => h.label),
|
|
12
|
-
chars: ci
|
|
14
|
+
chars: tableChars(ci),
|
|
13
15
|
style: { head: [], border: [] },
|
|
16
|
+
colAligns,
|
|
14
17
|
});
|
|
15
18
|
|
|
16
19
|
for (const row of stats.rows) {
|
|
17
|
-
table.push(buildRow(row, stats.columns,
|
|
20
|
+
table.push(buildRow(row, stats.columns, isByFile, c));
|
|
18
21
|
}
|
|
19
22
|
|
|
20
|
-
// Totals row
|
|
21
|
-
const isByFile = stats.mode === 'by-file';
|
|
22
23
|
table.push(buildRow(stats.totals, stats.columns, isByFile, c, true));
|
|
23
24
|
|
|
25
|
+
const tableStr = addSeparators(table.toString(), ci ? '-' : '─');
|
|
26
|
+
|
|
27
|
+
const tableWidth = stripAnsi(tableStr.split('\n')[0]).length;
|
|
28
|
+
|
|
24
29
|
const lines = [];
|
|
25
30
|
lines.push('');
|
|
26
|
-
lines.push(c.header(
|
|
27
|
-
lines.push(
|
|
31
|
+
lines.push(c.header(sectionHeader('Documents', tableWidth, ci)));
|
|
32
|
+
lines.push(tableStr);
|
|
28
33
|
|
|
29
34
|
// Footnotes
|
|
30
35
|
const hasEstimatedPages = stats.rows.some(r =>
|
|
@@ -52,7 +57,7 @@ export function formatSccTable(sccData, options = {}) {
|
|
|
52
57
|
c.headerCell('Comments'),
|
|
53
58
|
c.headerCell('Code'),
|
|
54
59
|
],
|
|
55
|
-
chars: ci
|
|
60
|
+
chars: tableChars(ci),
|
|
56
61
|
style: { head: [], border: [] },
|
|
57
62
|
colAligns: ['left', 'right', 'right', 'right', 'right', 'right'],
|
|
58
63
|
});
|
|
@@ -66,8 +71,8 @@ export function formatSccTable(sccData, options = {}) {
|
|
|
66
71
|
c.type(file.Filename || file.Location || ''),
|
|
67
72
|
formatNumber(1),
|
|
68
73
|
c.number(formatNumber(file.Lines)),
|
|
69
|
-
formatNumber(file.Blank),
|
|
70
|
-
formatNumber(file.Comment),
|
|
74
|
+
c.number(formatNumber(file.Blank)),
|
|
75
|
+
c.number(formatNumber(file.Comment)),
|
|
71
76
|
c.number(formatNumber(file.Code)),
|
|
72
77
|
]);
|
|
73
78
|
}
|
|
@@ -76,8 +81,8 @@ export function formatSccTable(sccData, options = {}) {
|
|
|
76
81
|
c.type(lang.Name),
|
|
77
82
|
formatNumber(lang.Count),
|
|
78
83
|
c.number(formatNumber(lang.Lines)),
|
|
79
|
-
formatNumber(lang.Blank),
|
|
80
|
-
formatNumber(lang.Comment),
|
|
84
|
+
c.number(formatNumber(lang.Blank)),
|
|
85
|
+
c.number(formatNumber(lang.Comment)),
|
|
81
86
|
c.number(formatNumber(lang.Code)),
|
|
82
87
|
]);
|
|
83
88
|
}
|
|
@@ -97,30 +102,117 @@ export function formatSccTable(sccData, options = {}) {
|
|
|
97
102
|
c.total(formatNumber(totalCode)),
|
|
98
103
|
]);
|
|
99
104
|
|
|
105
|
+
const tableStr = addSeparators(table.toString(), ci ? '-' : '─');
|
|
106
|
+
const tableWidth = stripAnsi(tableStr.split('\n')[0]).length;
|
|
107
|
+
|
|
100
108
|
const lines = [];
|
|
101
109
|
lines.push('');
|
|
102
|
-
lines.push(c.header(
|
|
103
|
-
lines.push(
|
|
110
|
+
lines.push(c.header(sectionHeader('Code (via scc)', tableWidth, ci)));
|
|
111
|
+
lines.push(tableStr);
|
|
104
112
|
|
|
105
113
|
return lines.join('\n');
|
|
106
114
|
}
|
|
107
115
|
|
|
116
|
+
export function formatSummaryLine(stats, sccData, elapsed, options = {}) {
|
|
117
|
+
const { ci = false } = options;
|
|
118
|
+
const c = ci ? noColor : colorize;
|
|
119
|
+
|
|
120
|
+
const parts = [];
|
|
121
|
+
if (stats && stats.totals.files > 0) {
|
|
122
|
+
let docPart = `${stats.totals.files} document${stats.totals.files !== 1 ? 's' : ''}`;
|
|
123
|
+
const details = [];
|
|
124
|
+
if (stats.totals.words > 0) details.push(`${formatNumber(stats.totals.words)} word${stats.totals.words !== 1 ? 's' : ''}`);
|
|
125
|
+
if (stats.totals.pages > 0) details.push(`${formatNumber(stats.totals.pages)} page${stats.totals.pages !== 1 ? 's' : ''}`);
|
|
126
|
+
if (details.length > 0) docPart += ` (${details.join(', ')})`;
|
|
127
|
+
parts.push(docPart);
|
|
128
|
+
}
|
|
129
|
+
if (sccData && sccData.length > 0) {
|
|
130
|
+
const totalCode = sccData.reduce((sum, l) => sum + (l.Code || 0), 0);
|
|
131
|
+
parts.push(`${formatNumber(totalCode)} lines of code`);
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
if (parts.length === 0) return '';
|
|
135
|
+
|
|
136
|
+
const time = elapsed >= 1000
|
|
137
|
+
? `${(elapsed / 1000).toFixed(1)}s`
|
|
138
|
+
: `${elapsed}ms`;
|
|
139
|
+
|
|
140
|
+
return '\n' + c.dim(`Scanned ${parts.join(', ')} in ${time}`) + '\n';
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
/**
|
|
144
|
+
* Post-process table string to insert separator lines after the header row
|
|
145
|
+
* and before the totals row (last data row).
|
|
146
|
+
*
|
|
147
|
+
* Table layout from cli-table3 (with empty mid chars):
|
|
148
|
+
* line 0: top border
|
|
149
|
+
* line 1: header row
|
|
150
|
+
* lines 2..N-2: data rows
|
|
151
|
+
* line N-1: totals row
|
|
152
|
+
* line N: bottom border
|
|
153
|
+
*/
|
|
154
|
+
function addSeparators(tableStr, char) {
|
|
155
|
+
const lines = tableStr.split('\n');
|
|
156
|
+
if (lines.length < 4) return tableStr;
|
|
157
|
+
|
|
158
|
+
// Use header row width — top border is narrower due to single-char top-mid vs 2-char middle
|
|
159
|
+
const width = stripAnsi(lines[1]).length;
|
|
160
|
+
const sep = char.repeat(width);
|
|
161
|
+
|
|
162
|
+
const result = [];
|
|
163
|
+
// Skip lines[0] (top border) — section header already serves as delimiter
|
|
164
|
+
result.push(lines[1]); // header row
|
|
165
|
+
result.push(sep); // header separator
|
|
166
|
+
|
|
167
|
+
// Data rows (everything except first 2 and last 2)
|
|
168
|
+
for (let i = 2; i < lines.length - 2; i++) {
|
|
169
|
+
result.push(lines[i]);
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
result.push(sep); // totals separator
|
|
173
|
+
result.push(lines[lines.length - 2]); // totals row
|
|
174
|
+
// Skip bottom border — totals row is the natural end
|
|
175
|
+
|
|
176
|
+
return result.join('\n');
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
function stripAnsi(str) {
|
|
180
|
+
return str.replace(/\x1b\[[0-9;]*m/g, '');
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
function sectionHeader(title, width, ci = false) {
|
|
184
|
+
const dash = ci ? '-' : '─';
|
|
185
|
+
const prefix = `${dash}${dash} ${title} `;
|
|
186
|
+
const padLen = Math.max(0, width - prefix.length);
|
|
187
|
+
return prefix + dash.repeat(padLen);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
function hasExtraColumns(columns) {
|
|
191
|
+
return columns.hasParagraphs || columns.hasSheets || columns.hasSlides ||
|
|
192
|
+
columns.hasRows || columns.hasCells;
|
|
193
|
+
}
|
|
194
|
+
|
|
108
195
|
function buildHeaders(columns, byFile, c) {
|
|
109
196
|
const headers = [];
|
|
110
197
|
headers.push({ key: 'format', label: c.headerCell(byFile ? 'File' : 'Format') });
|
|
111
198
|
if (!byFile) headers.push({ key: 'files', label: c.headerCell('Files') });
|
|
112
199
|
if (columns.hasWords) headers.push({ key: 'words', label: c.headerCell('Words') });
|
|
113
200
|
if (columns.hasPages) headers.push({ key: 'pages', label: c.headerCell('Pages') });
|
|
114
|
-
|
|
115
|
-
// Extra column for type-specific metrics
|
|
116
|
-
const hasExtra = columns.hasParagraphs || columns.hasSheets || columns.hasSlides ||
|
|
117
|
-
columns.hasRows || columns.hasCells;
|
|
118
|
-
if (hasExtra) headers.push({ key: 'extra', label: c.headerCell('Extra') });
|
|
119
|
-
|
|
201
|
+
if (hasExtraColumns(columns)) headers.push({ key: 'extra', label: c.headerCell('Details') });
|
|
120
202
|
headers.push({ key: 'size', label: c.headerCell('Size') });
|
|
121
203
|
return headers;
|
|
122
204
|
}
|
|
123
205
|
|
|
206
|
+
function buildColAligns(columns, byFile) {
|
|
207
|
+
const aligns = ['left']; // Format/File
|
|
208
|
+
if (!byFile) aligns.push('right'); // Files
|
|
209
|
+
if (columns.hasWords) aligns.push('right');
|
|
210
|
+
if (columns.hasPages) aligns.push('right');
|
|
211
|
+
if (hasExtraColumns(columns)) aligns.push('right');
|
|
212
|
+
aligns.push('right'); // Size
|
|
213
|
+
return aligns;
|
|
214
|
+
}
|
|
215
|
+
|
|
124
216
|
function buildRow(row, columns, byFile, c, isTotal = false) {
|
|
125
217
|
const fmt = isTotal ? c.total : (v) => v;
|
|
126
218
|
const fmtType = isTotal ? c.total : c.type;
|
|
@@ -142,9 +234,7 @@ function buildRow(row, columns, byFile, c, isTotal = false) {
|
|
|
142
234
|
if (columns.hasWords) cells.push(fmtNum(row.words ? formatNumber(row.words) : ''));
|
|
143
235
|
if (columns.hasPages) cells.push(fmtNum(row.pages ? formatNumber(row.pages) : ''));
|
|
144
236
|
|
|
145
|
-
|
|
146
|
-
columns.hasRows || columns.hasCells;
|
|
147
|
-
if (hasExtra) {
|
|
237
|
+
if (hasExtraColumns(columns)) {
|
|
148
238
|
const parts = [];
|
|
149
239
|
if (row.paragraphs) parts.push(`${formatNumber(row.paragraphs)} paras`);
|
|
150
240
|
if (row.sheets) parts.push(`${formatNumber(row.sheets)} sheets`);
|
|
@@ -158,21 +248,13 @@ function buildRow(row, columns, byFile, c, isTotal = false) {
|
|
|
158
248
|
return cells;
|
|
159
249
|
}
|
|
160
250
|
|
|
161
|
-
function
|
|
251
|
+
function tableChars(ci) {
|
|
252
|
+
const ch = ci ? '-' : '─';
|
|
162
253
|
return {
|
|
163
|
-
top:
|
|
164
|
-
bottom:
|
|
165
|
-
left: ' ', 'left-mid': '
|
|
166
|
-
right: ' ', 'right-mid': '
|
|
167
|
-
};
|
|
168
|
-
}
|
|
169
|
-
|
|
170
|
-
function asciiChars() {
|
|
171
|
-
return {
|
|
172
|
-
top: '-', 'top-mid': '-', 'top-left': '-', 'top-right': '-',
|
|
173
|
-
bottom: '-', 'bottom-mid': '-', 'bottom-left': '-', 'bottom-right': '-',
|
|
174
|
-
left: ' ', 'left-mid': '-', mid: '-', 'mid-mid': '-',
|
|
175
|
-
right: ' ', 'right-mid': '-', middle: ' ',
|
|
254
|
+
top: ch, 'top-mid': ch, 'top-left': ch, 'top-right': ch,
|
|
255
|
+
bottom: ch, 'bottom-mid': ch, 'bottom-left': ch, 'bottom-right': ch,
|
|
256
|
+
left: ' ', 'left-mid': '', mid: '', 'mid-mid': '',
|
|
257
|
+
right: ' ', 'right-mid': '', middle: ' ',
|
|
176
258
|
};
|
|
177
259
|
}
|
|
178
260
|
|
|
@@ -186,12 +268,5 @@ const colorize = {
|
|
|
186
268
|
dim: (s) => chalk.dim(s),
|
|
187
269
|
};
|
|
188
270
|
|
|
189
|
-
const
|
|
190
|
-
|
|
191
|
-
headerCell: (s) => s,
|
|
192
|
-
type: (s) => s,
|
|
193
|
-
number: (s) => s,
|
|
194
|
-
total: (s) => s,
|
|
195
|
-
error: (s) => s,
|
|
196
|
-
dim: (s) => s,
|
|
197
|
-
};
|
|
271
|
+
const identity = (s) => s;
|
|
272
|
+
const noColor = Object.fromEntries(Object.keys(colorize).map(k => [k, identity]));
|
package/src/parsers/docx.js
CHANGED
|
@@ -10,14 +10,6 @@ export async function parseDocx(filePath) {
|
|
|
10
10
|
|
|
11
11
|
return {
|
|
12
12
|
fileType: 'Word',
|
|
13
|
-
metrics: {
|
|
14
|
-
words,
|
|
15
|
-
pages,
|
|
16
|
-
paragraphs,
|
|
17
|
-
sheets: null,
|
|
18
|
-
rows: null,
|
|
19
|
-
cells: null,
|
|
20
|
-
slides: null,
|
|
21
|
-
},
|
|
13
|
+
metrics: { words, pages, paragraphs },
|
|
22
14
|
};
|
|
23
15
|
}
|
package/src/parsers/index.js
CHANGED
|
@@ -15,18 +15,22 @@ const PARSER_MAP = {
|
|
|
15
15
|
odp: parseOdf,
|
|
16
16
|
};
|
|
17
17
|
|
|
18
|
+
function failureResult(filePath, size, ext) {
|
|
19
|
+
return {
|
|
20
|
+
filePath,
|
|
21
|
+
size,
|
|
22
|
+
success: false,
|
|
23
|
+
fileType: EXTENSION_TO_TYPE[ext] || ext.toUpperCase(),
|
|
24
|
+
metrics: null,
|
|
25
|
+
};
|
|
26
|
+
}
|
|
27
|
+
|
|
18
28
|
export async function parseFile(filePath, size) {
|
|
19
29
|
const ext = getExtension(filePath);
|
|
20
30
|
const parser = PARSER_MAP[ext];
|
|
21
31
|
|
|
22
32
|
if (!parser) {
|
|
23
|
-
return
|
|
24
|
-
filePath,
|
|
25
|
-
size,
|
|
26
|
-
success: false,
|
|
27
|
-
fileType: EXTENSION_TO_TYPE[ext] || ext.toUpperCase(),
|
|
28
|
-
metrics: null,
|
|
29
|
-
};
|
|
33
|
+
return failureResult(filePath, size, ext);
|
|
30
34
|
}
|
|
31
35
|
|
|
32
36
|
try {
|
|
@@ -39,13 +43,7 @@ export async function parseFile(filePath, size) {
|
|
|
39
43
|
metrics: result.metrics,
|
|
40
44
|
};
|
|
41
45
|
} catch {
|
|
42
|
-
return
|
|
43
|
-
filePath,
|
|
44
|
-
size,
|
|
45
|
-
success: false,
|
|
46
|
-
fileType: EXTENSION_TO_TYPE[ext] || ext.toUpperCase(),
|
|
47
|
-
metrics: null,
|
|
48
|
-
};
|
|
46
|
+
return failureResult(filePath, size, ext);
|
|
49
47
|
}
|
|
50
48
|
}
|
|
51
49
|
|
package/src/parsers/odf.js
CHANGED
|
@@ -5,11 +5,12 @@ import { countWords, getExtension } from '../utils.js';
|
|
|
5
5
|
|
|
6
6
|
export async function parseOdf(filePath) {
|
|
7
7
|
const ext = getExtension(filePath);
|
|
8
|
-
const buffer = await readFile(filePath);
|
|
9
8
|
|
|
10
9
|
if (ext === 'odt') return parseOdt(filePath);
|
|
11
|
-
|
|
12
|
-
|
|
10
|
+
|
|
11
|
+
const buffer = await readFile(filePath);
|
|
12
|
+
if (ext === 'ods') return parseOds(buffer);
|
|
13
|
+
if (ext === 'odp') return parseOdp(buffer);
|
|
13
14
|
|
|
14
15
|
throw new Error(`Unsupported ODF format: ${ext}`);
|
|
15
16
|
}
|
|
@@ -22,19 +23,11 @@ async function parseOdt(filePath) {
|
|
|
22
23
|
|
|
23
24
|
return {
|
|
24
25
|
fileType: 'ODT',
|
|
25
|
-
metrics: {
|
|
26
|
-
words,
|
|
27
|
-
pages,
|
|
28
|
-
paragraphs,
|
|
29
|
-
sheets: null,
|
|
30
|
-
rows: null,
|
|
31
|
-
cells: null,
|
|
32
|
-
slides: null,
|
|
33
|
-
},
|
|
26
|
+
metrics: { words, pages, paragraphs },
|
|
34
27
|
};
|
|
35
28
|
}
|
|
36
29
|
|
|
37
|
-
async function parseOds(
|
|
30
|
+
async function parseOds(buffer) {
|
|
38
31
|
const zip = await JSZip.loadAsync(buffer);
|
|
39
32
|
const contentXml = await zip.file('content.xml')?.async('text');
|
|
40
33
|
if (!contentXml) throw new Error('No content.xml found in ODS');
|
|
@@ -42,44 +35,28 @@ async function parseOds(filePath, buffer) {
|
|
|
42
35
|
const sheets = (contentXml.match(/<table:table /g) || []).length;
|
|
43
36
|
const rows = (contentXml.match(/<table:table-row/g) || []).length;
|
|
44
37
|
|
|
45
|
-
// Use officeparser
|
|
46
|
-
const text = await officeparser.parseOffice(
|
|
38
|
+
// Use officeparser with buffer to avoid re-reading from disk
|
|
39
|
+
const text = await officeparser.parseOffice(buffer);
|
|
47
40
|
const cells = text.split(/\n/).filter(s => s.trim().length > 0).length;
|
|
48
41
|
|
|
49
42
|
return {
|
|
50
43
|
fileType: 'ODS',
|
|
51
|
-
metrics: {
|
|
52
|
-
words: null,
|
|
53
|
-
pages: null,
|
|
54
|
-
paragraphs: null,
|
|
55
|
-
sheets,
|
|
56
|
-
rows,
|
|
57
|
-
cells,
|
|
58
|
-
slides: null,
|
|
59
|
-
},
|
|
44
|
+
metrics: { sheets, rows, cells },
|
|
60
45
|
};
|
|
61
46
|
}
|
|
62
47
|
|
|
63
|
-
async function parseOdp(
|
|
48
|
+
async function parseOdp(buffer) {
|
|
64
49
|
const zip = await JSZip.loadAsync(buffer);
|
|
65
50
|
const contentXml = await zip.file('content.xml')?.async('text');
|
|
66
51
|
if (!contentXml) throw new Error('No content.xml found in ODP');
|
|
67
52
|
|
|
68
53
|
const slides = (contentXml.match(/<draw:page /g) || []).length;
|
|
69
54
|
|
|
70
|
-
const text = await officeparser.parseOffice(
|
|
55
|
+
const text = await officeparser.parseOffice(buffer);
|
|
71
56
|
const words = countWords(text);
|
|
72
57
|
|
|
73
58
|
return {
|
|
74
59
|
fileType: 'ODP',
|
|
75
|
-
metrics: {
|
|
76
|
-
words,
|
|
77
|
-
pages: null,
|
|
78
|
-
paragraphs: null,
|
|
79
|
-
sheets: null,
|
|
80
|
-
rows: null,
|
|
81
|
-
cells: null,
|
|
82
|
-
slides,
|
|
83
|
-
},
|
|
60
|
+
metrics: { words, slides },
|
|
84
61
|
};
|
|
85
62
|
}
|
package/src/parsers/pdf.js
CHANGED
|
@@ -25,6 +25,7 @@ function beginSuppression() {
|
|
|
25
25
|
function endSuppression() {
|
|
26
26
|
if (--suppressionDepth === 0) {
|
|
27
27
|
console.log = originalLog;
|
|
28
|
+
capturedWarnings.length = 0;
|
|
28
29
|
}
|
|
29
30
|
}
|
|
30
31
|
|
|
@@ -43,14 +44,6 @@ export async function parsePdf(filePath) {
|
|
|
43
44
|
|
|
44
45
|
return {
|
|
45
46
|
fileType: 'PDF',
|
|
46
|
-
metrics: {
|
|
47
|
-
words,
|
|
48
|
-
pages: data.numpages,
|
|
49
|
-
paragraphs: null,
|
|
50
|
-
sheets: null,
|
|
51
|
-
rows: null,
|
|
52
|
-
cells: null,
|
|
53
|
-
slides: null,
|
|
54
|
-
},
|
|
47
|
+
metrics: { words, pages: data.numpages },
|
|
55
48
|
};
|
|
56
49
|
}
|
package/src/parsers/pptx.js
CHANGED
|
@@ -13,20 +13,12 @@ export async function parsePptx(filePath) {
|
|
|
13
13
|
);
|
|
14
14
|
const slides = slideFiles.length;
|
|
15
15
|
|
|
16
|
-
// Extract text via officeparser
|
|
17
|
-
const text = await officeparser.parseOffice(
|
|
16
|
+
// Extract text via officeparser (reuse buffer to avoid re-reading)
|
|
17
|
+
const text = await officeparser.parseOffice(buffer);
|
|
18
18
|
const words = countWords(text);
|
|
19
19
|
|
|
20
20
|
return {
|
|
21
21
|
fileType: 'PowerPoint',
|
|
22
|
-
metrics: {
|
|
23
|
-
words,
|
|
24
|
-
pages: null,
|
|
25
|
-
paragraphs: null,
|
|
26
|
-
sheets: null,
|
|
27
|
-
rows: null,
|
|
28
|
-
cells: null,
|
|
29
|
-
slides,
|
|
30
|
-
},
|
|
22
|
+
metrics: { words, slides },
|
|
31
23
|
};
|
|
32
24
|
}
|
package/src/parsers/xlsx.js
CHANGED
|
@@ -1,32 +1,23 @@
|
|
|
1
|
-
import
|
|
1
|
+
import XLSX from 'xlsx';
|
|
2
2
|
|
|
3
3
|
export async function parseXlsx(filePath) {
|
|
4
|
-
const workbook =
|
|
5
|
-
await workbook.xlsx.readFile(filePath);
|
|
4
|
+
const workbook = XLSX.readFile(filePath);
|
|
6
5
|
|
|
7
|
-
const sheets = workbook.
|
|
6
|
+
const sheets = workbook.SheetNames.length;
|
|
8
7
|
let rows = 0;
|
|
9
8
|
let cells = 0;
|
|
10
9
|
|
|
11
|
-
for (const
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
10
|
+
for (const name of workbook.SheetNames) {
|
|
11
|
+
const sheet = workbook.Sheets[name];
|
|
12
|
+
const ref = sheet['!ref'];
|
|
13
|
+
if (!ref) continue;
|
|
14
|
+
const range = XLSX.utils.decode_range(ref);
|
|
15
|
+
rows += range.e.r - range.s.r + 1;
|
|
16
|
+
cells += (range.e.r - range.s.r + 1) * (range.e.c - range.s.c + 1);
|
|
18
17
|
}
|
|
19
18
|
|
|
20
19
|
return {
|
|
21
20
|
fileType: 'Excel',
|
|
22
|
-
metrics: {
|
|
23
|
-
words: null,
|
|
24
|
-
pages: null,
|
|
25
|
-
paragraphs: null,
|
|
26
|
-
sheets,
|
|
27
|
-
rows,
|
|
28
|
-
cells,
|
|
29
|
-
slides: null,
|
|
30
|
-
},
|
|
21
|
+
metrics: { sheets, rows, cells },
|
|
31
22
|
};
|
|
32
23
|
}
|
package/src/scc.js
CHANGED
|
@@ -28,19 +28,18 @@ async function findScc() {
|
|
|
28
28
|
}
|
|
29
29
|
}
|
|
30
30
|
|
|
31
|
-
let sccBinary = null;
|
|
32
|
-
|
|
33
31
|
export async function checkScc() {
|
|
34
|
-
|
|
35
|
-
if (!
|
|
32
|
+
const binary = await findScc();
|
|
33
|
+
if (!binary) {
|
|
36
34
|
throw new Error(
|
|
37
35
|
'scc is required but not found.\n' +
|
|
38
36
|
'Run "npm install" to auto-download it, or install manually from https://github.com/boyter/scc'
|
|
39
37
|
);
|
|
40
38
|
}
|
|
39
|
+
return binary;
|
|
41
40
|
}
|
|
42
41
|
|
|
43
|
-
export async function runScc(directories, options = {}) {
|
|
42
|
+
export async function runScc(sccBinary, directories, options = {}) {
|
|
44
43
|
const {
|
|
45
44
|
byFile = false,
|
|
46
45
|
excludeDir = [],
|
|
@@ -49,7 +48,6 @@ export async function runScc(directories, options = {}) {
|
|
|
49
48
|
noGitignore = false,
|
|
50
49
|
} = options;
|
|
51
50
|
|
|
52
|
-
if (!sccBinary) sccBinary = await findScc();
|
|
53
51
|
if (!sccBinary) return [];
|
|
54
52
|
|
|
55
53
|
const args = ['--format', 'json'];
|
package/src/stats.js
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
import path from 'node:path';
|
|
2
|
-
import {
|
|
2
|
+
import { METRIC_FIELDS, hasKey } from './utils.js';
|
|
3
|
+
|
|
4
|
+
const SUM_FIELDS = ['files', ...METRIC_FIELDS, 'size'];
|
|
3
5
|
|
|
4
6
|
export function aggregate(results, options = {}) {
|
|
5
7
|
const { byFile = false, sort = 'files' } = options;
|
|
@@ -16,25 +18,9 @@ function aggregateByType(results, sort) {
|
|
|
16
18
|
for (const r of results) {
|
|
17
19
|
const key = r.success ? r.fileType : 'Unreadable';
|
|
18
20
|
if (!groups[key]) {
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
words: 0,
|
|
23
|
-
pages: 0,
|
|
24
|
-
paragraphs: 0,
|
|
25
|
-
sheets: 0,
|
|
26
|
-
rows: 0,
|
|
27
|
-
cells: 0,
|
|
28
|
-
slides: 0,
|
|
29
|
-
size: 0,
|
|
30
|
-
hasWords: false,
|
|
31
|
-
hasPages: false,
|
|
32
|
-
hasParagraphs: false,
|
|
33
|
-
hasSheets: false,
|
|
34
|
-
hasRows: false,
|
|
35
|
-
hasCells: false,
|
|
36
|
-
hasSlides: false,
|
|
37
|
-
};
|
|
21
|
+
const g = { fileType: key, files: 0, size: 0 };
|
|
22
|
+
for (const f of METRIC_FIELDS) { g[f] = 0; g[hasKey(f)] = false; }
|
|
23
|
+
groups[key] = g;
|
|
38
24
|
}
|
|
39
25
|
const g = groups[key];
|
|
40
26
|
g.files++;
|
|
@@ -42,54 +28,39 @@ function aggregateByType(results, sort) {
|
|
|
42
28
|
|
|
43
29
|
if (r.success && r.metrics) {
|
|
44
30
|
const m = r.metrics;
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
if (m.sheets != null) { g.sheets += m.sheets; g.hasSheets = true; }
|
|
49
|
-
if (m.rows != null) { g.rows += m.rows; g.hasRows = true; }
|
|
50
|
-
if (m.cells != null) { g.cells += m.cells; g.hasCells = true; }
|
|
51
|
-
if (m.slides != null) { g.slides += m.slides; g.hasSlides = true; }
|
|
31
|
+
for (const f of METRIC_FIELDS) {
|
|
32
|
+
if (m[f] != null) { g[f] += m[f]; g[hasKey(f)] = true; }
|
|
33
|
+
}
|
|
52
34
|
}
|
|
53
35
|
}
|
|
54
36
|
|
|
55
|
-
|
|
56
|
-
sortRows(rows, sort);
|
|
57
|
-
|
|
58
|
-
const totals = computeTotals(rows);
|
|
59
|
-
const columns = detectColumns(rows);
|
|
60
|
-
|
|
61
|
-
return { rows, totals, columns, mode: 'grouped' };
|
|
37
|
+
return finalize(Object.values(groups), sort, 'grouped');
|
|
62
38
|
}
|
|
63
39
|
|
|
64
40
|
function aggregateByFile(results, sort) {
|
|
65
|
-
const rows = results.map(r =>
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
hasPages: r.metrics?.pages != null,
|
|
80
|
-
hasParagraphs: r.metrics?.paragraphs != null,
|
|
81
|
-
hasSheets: r.metrics?.sheets != null,
|
|
82
|
-
hasRows: r.metrics?.rows != null,
|
|
83
|
-
hasCells: r.metrics?.cells != null,
|
|
84
|
-
hasSlides: r.metrics?.slides != null,
|
|
85
|
-
}));
|
|
41
|
+
const rows = results.map(r => {
|
|
42
|
+
const row = {
|
|
43
|
+
fileType: r.success ? r.fileType : 'Unreadable',
|
|
44
|
+
fileName: path.basename(r.filePath),
|
|
45
|
+
filePath: r.filePath,
|
|
46
|
+
files: 1,
|
|
47
|
+
size: r.size || 0,
|
|
48
|
+
};
|
|
49
|
+
for (const f of METRIC_FIELDS) {
|
|
50
|
+
row[f] = r.metrics?.[f] || 0;
|
|
51
|
+
row[hasKey(f)] = r.metrics?.[f] != null;
|
|
52
|
+
}
|
|
53
|
+
return row;
|
|
54
|
+
});
|
|
86
55
|
|
|
87
|
-
|
|
56
|
+
return finalize(rows, sort, 'by-file');
|
|
57
|
+
}
|
|
88
58
|
|
|
59
|
+
function finalize(rows, sort, mode) {
|
|
60
|
+
sortRows(rows, sort);
|
|
89
61
|
const totals = computeTotals(rows);
|
|
90
62
|
const columns = detectColumns(rows);
|
|
91
|
-
|
|
92
|
-
return { rows, totals, columns, mode: 'by-file' };
|
|
63
|
+
return { rows, totals, columns, mode };
|
|
93
64
|
}
|
|
94
65
|
|
|
95
66
|
function sortRows(rows, sort) {
|
|
@@ -104,40 +75,18 @@ function sortRows(rows, sort) {
|
|
|
104
75
|
}
|
|
105
76
|
|
|
106
77
|
function computeTotals(rows) {
|
|
107
|
-
const totals = {
|
|
108
|
-
|
|
109
|
-
files: 0,
|
|
110
|
-
words: 0,
|
|
111
|
-
pages: 0,
|
|
112
|
-
paragraphs: 0,
|
|
113
|
-
sheets: 0,
|
|
114
|
-
rows: 0,
|
|
115
|
-
cells: 0,
|
|
116
|
-
slides: 0,
|
|
117
|
-
size: 0,
|
|
118
|
-
};
|
|
78
|
+
const totals = { fileType: 'Total' };
|
|
79
|
+
for (const f of SUM_FIELDS) totals[f] = 0;
|
|
119
80
|
for (const r of rows) {
|
|
120
|
-
totals
|
|
121
|
-
totals.words += r.words;
|
|
122
|
-
totals.pages += r.pages;
|
|
123
|
-
totals.paragraphs += r.paragraphs;
|
|
124
|
-
totals.sheets += r.sheets;
|
|
125
|
-
totals.rows += r.rows;
|
|
126
|
-
totals.cells += r.cells;
|
|
127
|
-
totals.slides += r.slides;
|
|
128
|
-
totals.size += r.size;
|
|
81
|
+
for (const f of SUM_FIELDS) totals[f] += r[f];
|
|
129
82
|
}
|
|
130
83
|
return totals;
|
|
131
84
|
}
|
|
132
85
|
|
|
133
86
|
function detectColumns(rows) {
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
hasRows: rows.some(r => r.hasRows),
|
|
140
|
-
hasCells: rows.some(r => r.hasCells),
|
|
141
|
-
hasSlides: rows.some(r => r.hasSlides),
|
|
142
|
-
};
|
|
87
|
+
const columns = {};
|
|
88
|
+
for (const f of METRIC_FIELDS) {
|
|
89
|
+
columns[hasKey(f)] = rows.some(r => r[hasKey(f)]);
|
|
90
|
+
}
|
|
91
|
+
return columns;
|
|
143
92
|
}
|
package/src/utils.js
CHANGED
|
@@ -22,8 +22,6 @@ export function getExtension(filePath) {
|
|
|
22
22
|
return path.extname(filePath).toLowerCase().replace('.', '');
|
|
23
23
|
}
|
|
24
24
|
|
|
25
|
-
export const OFFICE_EXTENSIONS = ['docx', 'xlsx', 'pptx', 'pdf', 'odt', 'ods', 'odp'];
|
|
26
|
-
|
|
27
25
|
export const EXTENSION_TO_TYPE = {
|
|
28
26
|
docx: 'Word',
|
|
29
27
|
pdf: 'PDF',
|
|
@@ -33,3 +31,11 @@ export const EXTENSION_TO_TYPE = {
|
|
|
33
31
|
ods: 'ODS',
|
|
34
32
|
odp: 'ODP',
|
|
35
33
|
};
|
|
34
|
+
|
|
35
|
+
export const OFFICE_EXTENSIONS = Object.keys(EXTENSION_TO_TYPE);
|
|
36
|
+
|
|
37
|
+
export const METRIC_FIELDS = ['words', 'pages', 'paragraphs', 'sheets', 'rows', 'cells', 'slides'];
|
|
38
|
+
|
|
39
|
+
export function hasKey(field) {
|
|
40
|
+
return `has${field[0].toUpperCase()}${field.slice(1)}`;
|
|
41
|
+
}
|
package/src/walker.js
CHANGED
|
@@ -30,55 +30,40 @@ export async function findFiles(directories, options = {}) {
|
|
|
30
30
|
|
|
31
31
|
const ignore = excludeDir.map(d => `**/${d}/**`);
|
|
32
32
|
|
|
33
|
-
const
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
onlyFiles: true,
|
|
39
|
-
followSymbolicLinks: false,
|
|
40
|
-
...(directories.length > 0 ? {} : { cwd: process.cwd() }),
|
|
41
|
-
});
|
|
42
|
-
|
|
43
|
-
// If directories specified, search each one
|
|
44
|
-
let allPaths = [];
|
|
45
|
-
if (directories.length > 0) {
|
|
46
|
-
for (const dir of directories) {
|
|
47
|
-
const found = await fg(pattern, {
|
|
48
|
-
cwd: dir,
|
|
49
|
-
absolute: true,
|
|
50
|
-
ignore,
|
|
51
|
-
dot: false,
|
|
52
|
-
onlyFiles: true,
|
|
53
|
-
followSymbolicLinks: false,
|
|
54
|
-
});
|
|
55
|
-
allPaths.push(...found);
|
|
56
|
-
}
|
|
57
|
-
} else {
|
|
58
|
-
allPaths = await fg(pattern, {
|
|
59
|
-
cwd: process.cwd(),
|
|
33
|
+
const dirs = directories.length > 0 ? directories : [process.cwd()];
|
|
34
|
+
const allPaths = [];
|
|
35
|
+
for (const dir of dirs) {
|
|
36
|
+
const found = await fg(pattern, {
|
|
37
|
+
cwd: dir,
|
|
60
38
|
absolute: true,
|
|
61
39
|
ignore,
|
|
62
40
|
dot: false,
|
|
63
41
|
onlyFiles: true,
|
|
64
42
|
followSymbolicLinks: false,
|
|
65
43
|
});
|
|
44
|
+
allPaths.push(...found);
|
|
66
45
|
}
|
|
67
46
|
|
|
68
47
|
const limitBytes = largeFileLimit * 1024 * 1024;
|
|
69
48
|
const files = [];
|
|
70
49
|
const skipped = [];
|
|
71
50
|
|
|
72
|
-
for
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
51
|
+
// Batch stat calls for better throughput on large directories
|
|
52
|
+
const BATCH_SIZE = 50;
|
|
53
|
+
for (let i = 0; i < allPaths.length; i += BATCH_SIZE) {
|
|
54
|
+
const batch = allPaths.slice(i, i + BATCH_SIZE);
|
|
55
|
+
const results = await Promise.allSettled(batch.map(p => stat(p)));
|
|
56
|
+
for (let j = 0; j < results.length; j++) {
|
|
57
|
+
const p = batch[j];
|
|
58
|
+
const r = results[j];
|
|
59
|
+
if (r.status === 'rejected') {
|
|
60
|
+
const err = r.reason;
|
|
61
|
+
skipped.push({ path: p, reason: err.code === 'EACCES' ? 'Permission denied' : err.message, size: 0 });
|
|
62
|
+
} else if (r.value.size > limitBytes) {
|
|
63
|
+
skipped.push({ path: p, reason: `Exceeds ${largeFileLimit}MB limit`, size: r.value.size });
|
|
77
64
|
} else {
|
|
78
|
-
files.push({ path: p, size:
|
|
65
|
+
files.push({ path: p, size: r.value.size });
|
|
79
66
|
}
|
|
80
|
-
} catch (err) {
|
|
81
|
-
skipped.push({ path: p, reason: err.code === 'EACCES' ? 'Permission denied' : err.message, size: 0 });
|
|
82
67
|
}
|
|
83
68
|
}
|
|
84
69
|
|