@cesarandreslopez/occ 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/README.md +58 -28
  2. package/dist/bin/occ.d.ts +2 -0
  3. package/{bin → dist/bin}/occ.js +1 -0
  4. package/dist/bin/occ.js.map +1 -0
  5. package/dist/src/cli.d.ts +1 -0
  6. package/dist/src/cli.js +184 -0
  7. package/dist/src/cli.js.map +1 -0
  8. package/dist/src/markdown/convert.d.ts +2 -0
  9. package/dist/src/markdown/convert.js +117 -0
  10. package/dist/src/markdown/convert.js.map +1 -0
  11. package/dist/src/output/json.d.ts +4 -0
  12. package/dist/src/output/json.js +42 -0
  13. package/dist/src/output/json.js.map +1 -0
  14. package/dist/src/output/tabular.d.ts +12 -0
  15. package/dist/src/output/tabular.js +238 -0
  16. package/dist/src/output/tabular.js.map +1 -0
  17. package/dist/src/output/tree.d.ts +11 -0
  18. package/dist/src/output/tree.js +79 -0
  19. package/dist/src/output/tree.js.map +1 -0
  20. package/dist/src/parsers/docx.d.ts +2 -0
  21. package/dist/src/parsers/docx.js +14 -0
  22. package/dist/src/parsers/docx.js.map +1 -0
  23. package/dist/src/parsers/index.d.ts +4 -0
  24. package/dist/src/parsers/index.js +65 -0
  25. package/dist/src/parsers/index.js.map +1 -0
  26. package/dist/src/parsers/odf.d.ts +2 -0
  27. package/dist/src/parsers/odf.js +54 -0
  28. package/dist/src/parsers/odf.js.map +1 -0
  29. package/dist/src/parsers/pdf.d.ts +2 -0
  30. package/dist/src/parsers/pdf.js +43 -0
  31. package/dist/src/parsers/pdf.js.map +1 -0
  32. package/dist/src/parsers/pptx.d.ts +2 -0
  33. package/dist/src/parsers/pptx.js +19 -0
  34. package/dist/src/parsers/pptx.js.map +1 -0
  35. package/dist/src/parsers/xlsx.d.ts +2 -0
  36. package/dist/src/parsers/xlsx.js +21 -0
  37. package/dist/src/parsers/xlsx.js.map +1 -0
  38. package/dist/src/progress.d.ts +10 -0
  39. package/dist/src/progress.js +38 -0
  40. package/dist/src/progress.js.map +1 -0
  41. package/dist/src/scc.d.ts +28 -0
  42. package/dist/src/scc.js +83 -0
  43. package/dist/src/scc.js.map +1 -0
  44. package/dist/src/stats.d.ts +30 -0
  45. package/dist/src/stats.js +88 -0
  46. package/dist/src/stats.js.map +1 -0
  47. package/dist/src/structure/extract.d.ts +7 -0
  48. package/dist/src/structure/extract.js +176 -0
  49. package/dist/src/structure/extract.js.map +1 -0
  50. package/dist/src/structure/index.d.ts +3 -0
  51. package/dist/src/structure/index.js +3 -0
  52. package/dist/src/structure/index.js.map +1 -0
  53. package/dist/src/structure/types.d.ts +29 -0
  54. package/dist/src/structure/types.js +72 -0
  55. package/dist/src/structure/types.js.map +1 -0
  56. package/dist/src/types.d.ts +20 -0
  57. package/dist/src/types.js +2 -0
  58. package/dist/src/types.js.map +1 -0
  59. package/dist/src/utils.d.ts +9 -0
  60. package/dist/src/utils.js +37 -0
  61. package/dist/src/utils.js.map +1 -0
  62. package/dist/src/walker.d.ts +13 -0
  63. package/dist/src/walker.js +59 -0
  64. package/dist/src/walker.js.map +1 -0
  65. package/package.json +13 -6
  66. package/scripts/postinstall.js +28 -1
  67. package/src/cli.js +0 -126
  68. package/src/output/json.js +0 -37
  69. package/src/output/tabular.js +0 -197
  70. package/src/parsers/docx.js +0 -23
  71. package/src/parsers/index.js +0 -72
  72. package/src/parsers/odf.js +0 -85
  73. package/src/parsers/pdf.js +0 -56
  74. package/src/parsers/pptx.js +0 -32
  75. package/src/parsers/xlsx.js +0 -31
  76. package/src/progress.js +0 -45
  77. package/src/scc.js +0 -94
  78. package/src/stats.js +0 -143
  79. package/src/utils.js +0 -35
  80. package/src/walker.js +0 -86
package/README.md CHANGED
@@ -19,6 +19,7 @@ OCC scans directories for office documents (DOCX, XLSX, PPTX, PDF, ODT, ODS, ODP
19
19
 
20
20
  - **Office document metrics** — words, pages, paragraphs, slides, sheets, rows, cells
21
21
  - **Seven formats supported** — DOCX, XLSX, PPTX, PDF, ODT, ODS, ODP
22
+ - **Document structure extraction** — `--structure` parses heading hierarchy into a navigable tree with dotted section codes (1, 1.1, 1.2, ...)
22
23
  - **Code metrics via scc** — auto-detects code files and integrates scc output
23
24
  - **Multiple output modes** — grouped by type, per-file breakdown, or JSON
24
25
  - **CI-friendly** — ASCII-only, no-color mode for pipelines
@@ -46,6 +47,7 @@ npx @cesarandreslopez/occ docs/ reports/
46
47
  ```bash
47
48
  git clone https://github.com/cesarandreslopez/occ.git && cd occ
48
49
  npm install
50
+ npm run build
49
51
  npm start
50
52
  ```
51
53
 
@@ -64,6 +66,12 @@ occ --by-file docs/
64
66
  # JSON output
65
67
  occ --format json docs/
66
68
 
69
+ # Extract document structure (heading hierarchy)
70
+ occ --structure docs/
71
+
72
+ # Structure as JSON
73
+ occ --structure --format json docs/
74
+
67
75
  # Only specific formats
68
76
  occ --include-ext pdf,docx docs/
69
77
 
@@ -77,37 +85,55 @@ occ --ci docs/
77
85
  ## Example Output
78
86
 
79
87
  ```
80
- -- Documents -----------------------------------------------------------
81
- Format Files Words Pages Extra Size
82
- --------------------------------------------------------------------
83
- Word 12 34,210 137 1,203 paras 1.2 MB
84
- PDF 8 22,540 64 4.5 MB
85
- Excel 3 12 sheets 890 KB
86
- --------------------------------------------------------------------
87
- Total 23 56,750 201 1,203 paras 6.5 MB
88
- --------------------------------------------------------------------
89
-
90
- -- Code (via scc) ------------------------------------------------------
91
- Language Files Lines Blanks Comments Code
92
- --------------------------------------------------------------------
93
- JavaScript 15 2340 180 320 1840
94
- Python 8 1200 90 150 960
95
- --------------------------------------------------------------------
96
- Total 23 3540 270 470 2800
97
- --------------------------------------------------------------------
88
+ -- Documents ---------------------------------------------------------------
89
+ Format Files Words Pages Details Size
90
+ ----------------------------------------------------------------------------
91
+ Word 12 34,210 137 1,203 paras 1.2 MB
92
+ PDF 8 22,540 64 4.5 MB
93
+ Excel 3 12 sheets 890 KB
94
+ ----------------------------------------------------------------------------
95
+ Total 23 56,750 201 1,203 paras 6.5 MB
96
+
97
+ -- Code (via scc) ----------------------------------------------------------
98
+ Language Files Lines Blanks Comments Code
99
+ ----------------------------------------------------------------------------
100
+ JavaScript 15 2340 180 320 1840
101
+ Python 8 1200 90 150 960
102
+ ----------------------------------------------------------------------------
103
+ Total 23 3540 270 470 2800
104
+
105
+ Scanned 23 documents (56,750 words, 201 pages) in 120ms
106
+ ```
107
+
108
+ ### Structure Output (`--structure`)
109
+
110
+ ```
111
+ -- Structure: report.docx --------------------------------------------------
112
+ 1 Executive Summary
113
+ 1.1 Background ......................................... p.1
114
+ 1.2 Key Findings ....................................... p.1-2
115
+ 2 Methodology
116
+ 2.1 Data Collection .................................... p.3
117
+ 2.2 Analysis Framework ................................. p.4
118
+ 2.2.1 Quantitative Methods ........................... p.4
119
+ 2.2.2 Qualitative Methods ............................ p.5
120
+ 3 Results ................................................ p.6-8
121
+ 4 Conclusions ............................................ p.9
122
+
123
+ 4 sections, 10 nodes, max depth 3
98
124
  ```
99
125
 
100
126
  ## Supported Formats
101
127
 
102
- | Format | Extension | Metrics |
103
- |--------|-----------|---------|
104
- | Word | `.docx` | words, pages*, paragraphs |
105
- | PDF | `.pdf` | words, pages |
106
- | Excel | `.xlsx` | sheets, rows, cells |
107
- | PowerPoint | `.pptx` | words, slides |
108
- | ODT | `.odt` | words, pages*, paragraphs |
109
- | ODS | `.ods` | sheets, rows, cells |
110
- | ODP | `.odp` | words, slides |
128
+ | Format | Extension | Metrics | Structure |
129
+ |--------|-----------|---------|-----------|
130
+ | Word | `.docx` | words, pages*, paragraphs | Yes |
131
+ | PDF | `.pdf` | words, pages | Yes (with page mapping) |
132
+ | Excel | `.xlsx` | sheets, rows, cells | — |
133
+ | PowerPoint | `.pptx` | words, slides | Yes (slide headers) |
134
+ | ODT | `.odt` | words, pages*, paragraphs | Yes (best-effort) |
135
+ | ODS | `.ods` | sheets, rows, cells | — |
136
+ | ODP | `.odp` | words, slides | Yes (slide headers) |
111
137
 
112
138
  \* Pages for Word/ODT are estimated at 250 words/page.
113
139
 
@@ -117,6 +143,7 @@ Total 23 3540 270 470 2800
117
143
  |------|-------------|---------|
118
144
  | `--by-file` / `-f` | Row per file | grouped by type |
119
145
  | `--format <type>` | `tabular` or `json` | `tabular` |
146
+ | `--structure` | Extract and display document heading hierarchy | off |
120
147
  | `--include-ext <exts>` | Comma-separated extensions | all supported |
121
148
  | `--exclude-ext <exts>` | Comma-separated to skip | none |
122
149
  | `--exclude-dir <dirs>` | Directories to skip | `node_modules,.git` |
@@ -151,12 +178,15 @@ Tools like `scc`, `cloc`, and `tokei` give you instant visibility into codebases
151
178
 
152
179
  - **Context budgeting** — LLMs have finite context windows. OCC's word and page counts let agents estimate how much of a document set they can ingest before hitting token limits
153
180
  - **Prioritization** — an agent deciding which documents to read can use OCC's JSON output to rank files by size, word count, or type, focusing on the most relevant content first
181
+ - **RAG chunk mapping** — `--structure --format json` outputs heading trees with character offsets, enabling chunk-to-section mapping, scoped retrieval, and citation paths in RAG pipelines
154
182
  - **Repository mapping** — agents exploring an unfamiliar codebase can run `occ --format json` to build a structured inventory of all non-code content alongside `scc` code metrics
155
183
  - **Pipeline integration** — JSON output pipes directly into agent toolchains for automated document analysis, summarization, or compliance checking
156
184
 
157
185
  ## How It Works
158
186
 
159
- OCC uses [fast-glob](https://github.com/mrmlnc/fast-glob) for file discovery, dispatches to format-specific parsers (mammoth for DOCX, pdf-parse for PDF, SheetJS for XLSX, JSZip + officeparser for PPTX/ODF), aggregates metrics, and renders output via cli-table3. For code metrics, it shells out to a vendored [scc](https://github.com/boyter/scc) binary (auto-downloaded during `npm install`, with PATH fallback).
187
+ OCC is written in TypeScript and uses [fast-glob](https://github.com/mrmlnc/fast-glob) for file discovery, dispatches to format-specific parsers (mammoth for DOCX, pdf-parse for PDF, SheetJS for XLSX, JSZip + officeparser for PPTX/ODF), aggregates metrics, and renders output via cli-table3. For code metrics, it shells out to a vendored [scc](https://github.com/boyter/scc) binary (auto-downloaded during `npm install`, with PATH fallback).
188
+
189
+ For structure extraction (`--structure`), documents are first converted to markdown (mammoth + [turndown](https://github.com/mixmark-io/turndown) for DOCX, pdf-parse with page markers for PDF), then headers are extracted and assembled into a tree with dotted section codes.
160
190
 
161
191
  ## Contributing
162
192
 
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env node
2
+ export {};
@@ -1,3 +1,4 @@
1
1
  #!/usr/bin/env node
2
2
  import { run } from '../src/cli.js';
3
3
  run(process.argv);
4
+ //# sourceMappingURL=occ.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"occ.js","sourceRoot":"","sources":["../../bin/occ.ts"],"names":[],"mappings":";AACA,OAAO,EAAE,GAAG,EAAE,MAAM,eAAe,CAAC;AACpC,GAAG,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC"}
@@ -0,0 +1 @@
1
+ export declare function run(argv: string[]): Promise<void>;
@@ -0,0 +1,184 @@
1
+ import { Command, Option } from 'commander';
2
+ import { readFile, writeFile } from 'node:fs/promises';
3
+ import { fileURLToPath } from 'node:url';
4
+ import path from 'node:path';
5
+ import { findFiles } from './walker.js';
6
+ import { parseFiles } from './parsers/index.js';
7
+ import { aggregate } from './stats.js';
8
+ import { formatDocumentTable, formatSccTable, formatSummaryLine } from './output/tabular.js';
9
+ import { formatJson } from './output/json.js';
10
+ import { checkScc, runScc } from './scc.js';
11
+ import { createProgress } from './progress.js';
12
+ import { documentToMarkdown } from './markdown/convert.js';
13
+ import { extractFromMarkdown } from './structure/index.js';
14
+ import { formatStructureTree } from './output/tree.js';
15
+ import { getExtension } from './utils.js';
16
+ // Find package.json — works from both src/ (dev) and dist/src/ (built)
17
+ const __dirname = path.dirname(fileURLToPath(import.meta.url));
18
+ async function loadPkg() {
19
+ for (const rel of ['..', '../..']) {
20
+ try {
21
+ return JSON.parse(await readFile(path.resolve(__dirname, rel, 'package.json'), 'utf8'));
22
+ }
23
+ catch { /* try next */ }
24
+ }
25
+ return { version: '0.0.0' };
26
+ }
27
+ const pkg = await loadPkg();
28
+ export async function run(argv) {
29
+ const program = new Command();
30
+ program
31
+ .name('occ')
32
+ .description('Office Cloc and Count — scc-style summary tables for office documents')
33
+ .version(pkg.version)
34
+ .argument('[directories...]', 'directories to scan', [])
35
+ .option('-f, --by-file', 'show a row per file instead of grouped by type')
36
+ .option('--format <type>', 'output format: tabular or json', 'tabular')
37
+ .option('--include-ext <exts>', 'comma-separated extensions to include')
38
+ .option('--exclude-ext <exts>', 'comma-separated extensions to exclude')
39
+ .option('--exclude-dir <dirs>', 'directories to skip (comma-separated)', 'node_modules,.git')
40
+ .option('--no-gitignore', 'disable .gitignore respect')
41
+ .addOption(new Option('--sort <col>', 'sort by: files, name, words, size').choices(['files', 'name', 'words', 'size']).default('files'))
42
+ .option('-o, --output <file>', 'write output to file')
43
+ .option('--ci', 'ASCII-only output, no colors')
44
+ .option('--large-file-limit <mb>', 'skip files over this size in MB', '50')
45
+ .option('--no-code', 'skip scc code analysis')
46
+ .option('--structure', 'extract and display document structure')
47
+ .action(async (directories, opts) => {
48
+ try {
49
+ await execute(directories, opts);
50
+ }
51
+ catch (err) {
52
+ const error = err;
53
+ process.stderr.write(`Error: ${error.message}\n`);
54
+ process.exit(1);
55
+ }
56
+ });
57
+ await program.parseAsync(argv);
58
+ }
59
+ function validateLargeFileLimit(value) {
60
+ const n = parseFloat(value);
61
+ if (Number.isNaN(n) || n <= 0) {
62
+ throw new Error(`Invalid --large-file-limit value: "${value}" (must be a positive number)`);
63
+ }
64
+ return n;
65
+ }
66
+ const STRUCTURABLE_EXTS = new Set(['docx', 'pdf', 'pptx', 'odt', 'odp']);
67
+ async function extractStructures(files, concurrency, onProgress) {
68
+ const results = [];
69
+ for (let i = 0; i < files.length; i += concurrency) {
70
+ const batch = files.slice(i, i + concurrency);
71
+ const batchResults = await Promise.allSettled(batch.map(async (f) => {
72
+ const markdown = await documentToMarkdown(f.path);
73
+ if (markdown == null)
74
+ return null;
75
+ const structure = extractFromMarkdown(markdown);
76
+ return { file: f.path, structure, markdown };
77
+ }));
78
+ for (let j = 0; j < batchResults.length; j++) {
79
+ const r = batchResults[j];
80
+ if (r.status === 'fulfilled' && r.value) {
81
+ results.push(r.value);
82
+ }
83
+ if (onProgress)
84
+ onProgress(1, batch[j]?.path);
85
+ }
86
+ }
87
+ return results;
88
+ }
89
+ async function execute(directories, opts) {
90
+ const startTime = Date.now();
91
+ const excludeDirs = opts.excludeDir
92
+ ? opts.excludeDir.split(',').map(d => d.trim())
93
+ : ['node_modules', '.git'];
94
+ const includeCode = opts.code !== false;
95
+ let sccBinary = null;
96
+ if (includeCode) {
97
+ sccBinary = await checkScc();
98
+ }
99
+ // Find and parse office documents
100
+ const { files, skipped } = await findFiles(directories, {
101
+ includeExt: opts.includeExt,
102
+ excludeExt: opts.excludeExt,
103
+ excludeDir: excludeDirs,
104
+ noGitignore: !opts.gitignore,
105
+ largeFileLimit: validateLargeFileLimit(opts.largeFileLimit),
106
+ });
107
+ const showProgress = opts.format !== 'json' && process.stderr.isTTY;
108
+ let results = [];
109
+ if (files.length > 0) {
110
+ const progress = createProgress({ total: files.length, label: 'Parsing', enabled: showProgress });
111
+ results = await parseFiles(files, 10, (inc, detail) => progress.update(inc, detail));
112
+ progress.done();
113
+ }
114
+ const stats = aggregate(results, {
115
+ byFile: opts.byFile,
116
+ sort: opts.sort,
117
+ });
118
+ let sccData = null;
119
+ if (includeCode) {
120
+ if (showProgress)
121
+ process.stderr.write('\rAnalyzing code with scc...');
122
+ sccData = await runScc(sccBinary, directories, {
123
+ byFile: opts.byFile,
124
+ excludeDir: excludeDirs,
125
+ sort: opts.sort,
126
+ ci: opts.ci,
127
+ noGitignore: !opts.gitignore,
128
+ });
129
+ if (showProgress) {
130
+ const cols = process.stderr.columns || 80;
131
+ process.stderr.write('\r' + ' '.repeat(cols) + '\r');
132
+ }
133
+ }
134
+ // Structure extraction
135
+ let structureResults = [];
136
+ if (opts.structure) {
137
+ const structurableFiles = files.filter(f => STRUCTURABLE_EXTS.has(getExtension(f.path)));
138
+ if (structurableFiles.length > 0) {
139
+ const progress = createProgress({ total: structurableFiles.length, label: 'Extracting structure', enabled: showProgress });
140
+ structureResults = await extractStructures(structurableFiles, 10, (inc, detail) => progress.update(inc, detail));
141
+ progress.done();
142
+ }
143
+ }
144
+ // Format output
145
+ let output;
146
+ if (opts.format === 'json') {
147
+ output = formatJson(stats, sccData, opts.structure ? structureResults : undefined);
148
+ }
149
+ else {
150
+ const parts = [];
151
+ if (files.length === 0 && (!sccData || sccData.length === 0)) {
152
+ parts.push('No files found.');
153
+ }
154
+ else {
155
+ if (files.length > 0) {
156
+ parts.push(formatDocumentTable(stats, { ci: opts.ci }));
157
+ }
158
+ if (sccData && sccData.length > 0) {
159
+ parts.push(formatSccTable(sccData, { ci: opts.ci, byFile: opts.byFile }));
160
+ }
161
+ // Structure trees
162
+ if (structureResults.length > 0) {
163
+ for (const sr of structureResults) {
164
+ parts.push(formatStructureTree(sr, { ci: opts.ci }));
165
+ }
166
+ }
167
+ const elapsed = Date.now() - startTime;
168
+ const summary = formatSummaryLine(stats, sccData, elapsed, { ci: opts.ci });
169
+ if (summary)
170
+ parts.push(summary);
171
+ }
172
+ if (skipped.length > 0) {
173
+ parts.push(`\n${skipped.length} file(s) skipped (use --large-file-limit to adjust)`);
174
+ }
175
+ output = parts.join('\n') + '\n';
176
+ }
177
+ if (opts.output) {
178
+ await writeFile(opts.output, output);
179
+ }
180
+ else {
181
+ process.stdout.write(output);
182
+ }
183
+ }
184
+ //# sourceMappingURL=cli.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"cli.js","sourceRoot":"","sources":["../../src/cli.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,WAAW,CAAC;AAC5C,OAAO,EAAE,QAAQ,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AACvD,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AACzC,OAAO,IAAI,MAAM,WAAW,CAAC;AAC7B,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AACxC,OAAO,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAChD,OAAO,EAAE,SAAS,EAAE,MAAM,YAAY,CAAC;AACvC,OAAO,EAAE,mBAAmB,EAAE,cAAc,EAAE,iBAAiB,EAAE,MAAM,qBAAqB,CAAC;AAC7F,OAAO,EAAE,UAAU,EAAE,MAAM,kBAAkB,CAAC;AAC9C,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,MAAM,UAAU,CAAC;AAC5C,OAAO,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AAC/C,OAAO,EAAE,kBAAkB,EAAE,MAAM,uBAAuB,CAAC;AAC3D,OAAO,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AAC3D,OAAO,EAAE,mBAAmB,EAAuB,MAAM,kBAAkB,CAAC;AAK5E,OAAO,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAiB1C,uEAAuE;AACvE,MAAM,SAAS,GAAG,IAAI,CAAC,OAAO,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAC/D,KAAK,UAAU,OAAO;IACpB,KAAK,MAAM,GAAG,IAAI,CAAC,IAAI,EAAE,OAAO,CAAC,EAAE,CAAC;QAClC,IAAI,CAAC;YAAC,OAAO,IAAI,CAAC,KAAK,CAAC,MAAM,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,SAAS,EAAE,GAAG,EAAE,cAAc,CAAC,EAAE,MAAM,CAAC,CAAC,CAAC;QAAC,CAAC;QAChG,MAAM,CAAC,CAAC,cAAc,CAAC,CAAC;IAC1B,CAAC;IACD,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,CAAC;AAC9B,CAAC;AACD,MAAM,GAAG,GAAG,MAAM,OAAO,EAAE,CAAC;AAE5B,MAAM,CAAC,KAAK,UAAU,GAAG,CAAC,IAAc;IACtC,MAAM,OAAO,GAAG,IAAI,OAAO,EAAE,CAAC;IAE9B,OAAO;SACJ,IAAI,CAAC,KAAK,CAAC;SACX,WAAW,CAAC,uEAAuE,CAAC;SACpF,OAAO,CAAC,GAAG,CAAC,OAAO,CAAC;SACpB,QAAQ,CAAC,kBAAkB,EAAE,qBAAqB,EAAE,EAAE,CAAC;SACvD,MAAM,CAAC,eAAe,EAAE,gDAAgD,CAAC;SACzE,MAAM,CAAC,iBAAiB,EAAE,gCAAgC,EAAE,SAAS,CAAC;SACtE,MAAM,CAAC,sBAAsB,EAAE,uCAAuC,CAAC;SACvE,MAAM,CAAC,sBAAsB,EAAE,uCAAuC,CAAC;SACvE,MAAM,CAAC,sBAAsB,EAAE,uCAAuC,EAAE,mBAAmB,CAAC;SAC5F,MAAM,CAAC,gBAAgB,EAAE,4BAA4B,CAAC;SACtD,SAAS,CAAC,IAAI,MAAM,CAAC,cAAc,EAAE,mCAAmC,CAAC,CAAC,OAAO,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;SACvI,MAAM,CAAC,qBAAqB,EAAE,sBAAsB,CAAC;SACrD,MAAM,CAAC,MAAM,EAAE,8BAA8B,CAAC;SAC9C,MAAM,CAAC,yBAAyB,EAAE,iCAAiC,EAAE,IAAI,CAAC;SAC1E,MAAM,CAAC,WAAW,EAAE,wBAAwB,CAAC;SAC7C,MAAM,CAAC,aAAa,EAAE,wCAAwC,CAAC;SAC/D,MAAM,CAAC,KAAK,EAAE,WAAqB,EAAE,IAAgB,EAAE,EAAE;QACxD,IAAI,CAAC;YACH,MAAM,OAAO,CAAC,WAAW,EAAE,IAAI,CAAC,CAAC;QACnC,CAAC;QAAC,OAAO,GAAY,EAAE,CAAC;YACtB,MAAM,KAAK,GAAG,GAAY,CAAC;YAC3B,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,UAAU,KAAK,CAAC,OAAO,IAAI,CAAC,CAAC;YAClD,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QAClB,CAAC;IACH,CAAC,CAAC,CAAC;IAEL,MAAM,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;AACjC,CAAC;AAED,SAAS,sBAAsB,CAAC,KAAa;IAC3C,MAAM,CAAC,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC;IAC5B,IAAI,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;QAC9B,MAAM,IAAI,KAAK,CAAC,sCAAsC,KAAK,+BAA+B,CAAC,CAAC;IAC9F,CAAC;IACD,OAAO,CAAC,CAAC;AACX,CAAC;AAED,MAAM,iBAAiB,GAAG,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC,CAAC;AAEzE,KAAK,UAAU,iBAAiB,CAC9B,KAAkB,EAClB,WAAmB,EACnB,UAAmD;IAEnD,MAAM,OAAO,GAAsB,EAAE,CAAC;IAEtC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,WAAW,EAAE,CAAC;QACnD,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,WAAW,CAAC,CAAC;QAC9C,MAAM,YAAY,GAAG,MAAM,OAAO,CAAC,UAAU,CAC3C,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,EAAE,EAAE;YACpB,MAAM,QAAQ,GAAG,MAAM,kBAAkB,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YAClD,IAAI,QAAQ,IAAI,IAAI;gBAAE,OAAO,IAAI,CAAC;YAClC,MAAM,SAAS,GAAG,mBAAmB,CAAC,QAAQ,CAAC,CAAC;YAChD,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC,IAAI,EAAE,SAAS,EAAE,QAAQ,EAAqB,CAAC;QAClE,CAAC,CAAC,CACH,CAAC;QACF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,YAAY,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAC7C,MAAM,CAAC,GAAG,YAAY,CAAC,CAAC,CAAC,CAAC;YAC1B,IAAI,CAAC,CAAC,MAAM,KAAK,WAAW,IAAI,CAAC,CAAC,KAAK,EAAE,CAAC;gBACxC,OAAO,CAAC,IAAI,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC;YACxB,CAAC;YACD,IAAI,UAAU;gBAAE,UAAU,CAAC,CAAC,EAAE,KAAK,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC;QAChD,CAAC;IACH,CAAC;IAED,OAAO,OAAO,CAAC;AACjB,CAAC;AAED,KAAK,UAAU,OAAO,CAAC,WAAqB,EAAE,IAAgB;IAC5D,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAC7B,MAAM,WAAW,GAAG,IAAI,CAAC,UAAU;QACjC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;QAC/C,CAAC,CAAC,CAAC,cAAc,EAAE,MAAM,CAAC,CAAC;IAE7B,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,KAAK,KAAK,CAAC;IAExC,IAAI,SAAS,GAAkB,IAAI,CAAC;IACpC,IAAI,WAAW,EAAE,CAAC;QAChB,SAAS,GAAG,MAAM,QAAQ,EAAE,CAAC;IAC/B,CAAC;IAED,kCAAkC;IAClC,MAAM,EAAE,KAAK,EAAE,OAAO,EAAE,GAAG,MAAM,SAAS,CAAC,WAAW,EAAE;QACtD,UAAU,EAAE,IAAI,CAAC,UAAU;QAC3B,UAAU,EAAE,IAAI,CAAC,UAAU;QAC3B,UAAU,EAAE,WAAW;QACvB,WAAW,EAAE,CAAC,IAAI,CAAC,SAAS;QAC5B,cAAc,EAAE,sBAAsB,CAAC,IAAI,CAAC,cAAc,CAAC;KAC5D,CAAC,CAAC;IAEH,MAAM,YAAY,GAAG,IAAI,CAAC,MAAM,KAAK,MAAM,IAAI,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC;IACpE,IAAI,OAAO,GAAkB,EAAE,CAAC;IAChC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACrB,MAAM,QAAQ,GAAG,cAAc,CAAC,EAAE,KAAK,EAAE,KAAK,CAAC,MAAM,EAAE,KAAK,EAAE,SAAS,EAAE,OAAO,EAAE,YAAY,EAAE,CAAC,CAAC;QAClG,OAAO,GAAG,MAAM,UAAU,CAAC,KAAK,EAAE,EAAE,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC,CAAC;QACrF,QAAQ,CAAC,IAAI,EAAE,CAAC;IAClB,CAAC;IAED,MAAM,KAAK,GAAG,SAAS,CAAC,OAAO,EAAE;QAC/B,MAAM,EAAE,IAAI,CAAC,MAAM;QACnB,IAAI,EAAE,IAAI,CAAC,IAAI;KAChB,CAAC,CAAC;IAEH,IAAI,OAAO,GAAyB,IAAI,CAAC;IACzC,IAAI,WAAW,EAAE,CAAC;QAChB,IAAI,YAAY;YAAE,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,8BAA8B,CAAC,CAAC;QACvE,OAAO,GAAG,MAAM,MAAM,CAAC,SAAS,EAAE,WAAW,EAAE;YAC7C,MAAM,EAAE,IAAI,CAAC,MAAM;YACnB,UAAU,EAAE,WAAW;YACvB,IAAI,EAAE,IAAI,CAAC,IAAI;YACf,EAAE,EAAE,IAAI,CAAC,EAAE;YACX,WAAW,EAAE,CAAC,IAAI,CAAC,SAAS;SAC7B,CAAC,CAAC;QACH,IAAI,YAAY,EAAE,CAAC;YACjB,MAAM,IAAI,GAAG,OAAO,CAAC,MAAM,CAAC,OAAO,IAAI,EAAE,CAAC;YAC1C,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,GAAG,GAAG,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC,CAAC;QACvD,CAAC;IACH,CAAC;IAED,uBAAuB;IACvB,IAAI,gBAAgB,GAAsB,EAAE,CAAC;IAC7C,IAAI,IAAI,CAAC,SAAS,EAAE,CAAC;QACnB,MAAM,iBAAiB,GAAG,KAAK,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,iBAAiB,CAAC,GAAG,CAAC,YAAY,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;QACzF,IAAI,iBAAiB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACjC,MAAM,QAAQ,GAAG,cAAc,CAAC,EAAE,KAAK,EAAE,iBAAiB,CAAC,MAAM,EAAE,KAAK,EAAE,sBAAsB,EAAE,OAAO,EAAE,YAAY,EAAE,CAAC,CAAC;YAC3H,gBAAgB,GAAG,MAAM,iBAAiB,CAAC,iBAAiB,EAAE,EAAE,EAAE,CAAC,GAAG,EAAE,MAAM,EAAE,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,GAAG,EAAE,MAAM,CAAC,CAAC,CAAC;YACjH,QAAQ,CAAC,IAAI,EAAE,CAAC;QAClB,CAAC;IACH,CAAC;IAED,gBAAgB;IAChB,IAAI,MAAc,CAAC;IACnB,IAAI,IAAI,CAAC,MAAM,KAAK,MAAM,EAAE,CAAC;QAC3B,MAAM,GAAG,UAAU,CAAC,KAAK,EAAE,OAAO,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,gBAAgB,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC;IACrF,CAAC;SAAM,CAAC;QACN,MAAM,KAAK,GAAa,EAAE,CAAC;QAE3B,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,IAAI,CAAC,CAAC,OAAO,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,CAAC,EAAE,CAAC;YAC7D,KAAK,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC;QAChC,CAAC;aAAM,CAAC;YACN,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACrB,KAAK,CAAC,IAAI,CAAC,mBAAmB,CAAC,KAAK,EAAE,EAAE,EAAE,EAAE,IAAI,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;YAC1D,CAAC;YAED,IAAI,OAAO,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAClC,KAAK,CAAC,IAAI,CAAC,cAAc,CAAC,OAAO,EAAE,EAAE,EAAE,EAAE,IAAI,CAAC,EAAE,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC,CAAC,CAAC;YAC5E,CAAC;YAED,kBAAkB;YAClB,IAAI,gBAAgB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAChC,KAAK,MAAM,EAAE,IAAI,gBAAgB,EAAE,CAAC;oBAClC,KAAK,CAAC,IAAI,CAAC,mBAAmB,CAAC,EAAE,EAAE,EAAE,EAAE,EAAE,IAAI,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC;gBACvD,CAAC;YACH,CAAC;YAED,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;YACvC,MAAM,OAAO,GAAG,iBAAiB,CAAC,KAAK,EAAE,OAAO,EAAE,OAAO,EAAE,EAAE,EAAE,EAAE,IAAI,CAAC,EAAE,EAAE,CAAC,CAAC;YAC5E,IAAI,OAAO;gBAAE,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QACnC,CAAC;QAED,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACvB,KAAK,CAAC,IAAI,CAAC,KAAK,OAAO,CAAC,MAAM,qDAAqD,CAAC,CAAC;QACvF,CAAC;QAED,MAAM,GAAG,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;IACnC,CAAC;IAED,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;QAChB,MAAM,SAAS,CAAC,IAAI,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IACvC,CAAC;SAAM,CAAC;QACN,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;IAC/B,CAAC;AACH,CAAC"}
@@ -0,0 +1,2 @@
1
+ /** Convert a document to markdown. Returns null for unsupported formats (xlsx, ods). */
2
+ export declare function documentToMarkdown(filePath: string): Promise<string | null>;
@@ -0,0 +1,117 @@
1
+ import { readFile } from 'node:fs/promises';
2
+ import mammoth from 'mammoth';
3
+ import pdf from 'pdf-parse';
4
+ import JSZip from 'jszip';
5
+ import officeparser from 'officeparser';
6
+ import TurndownService from 'turndown';
7
+ import { getExtension } from '../utils.js';
8
+ const turndown = new TurndownService({ headingStyle: 'atx' });
9
+ /** Convert a DOCX file to markdown via mammoth → HTML → turndown */
10
+ async function docxToMarkdown(filePath) {
11
+ const result = await mammoth.convertToHtml({ path: filePath });
12
+ const html = result.value || '';
13
+ if (!html.trim())
14
+ return '';
15
+ return turndown.turndown(html);
16
+ }
17
+ /** Convert a PDF to markdown with [Page N] markers */
18
+ async function pdfToMarkdown(filePath) {
19
+ const buffer = await readFile(filePath);
20
+ // Suppress pdf.js warnings
21
+ const originalLog = console.log;
22
+ console.log = (...args) => {
23
+ if (typeof args[0] === 'string' && (args[0].startsWith('Warning: ') || args[0].startsWith('Info: ') || args[0].startsWith('Deprecated API usage: '))) {
24
+ return;
25
+ }
26
+ originalLog.apply(console, args);
27
+ };
28
+ let data;
29
+ try {
30
+ data = await pdf(buffer, {
31
+ pagerender: async (pageData) => {
32
+ const textContent = await pageData.getTextContent();
33
+ const strings = textContent.items.map(item => item.str);
34
+ return `[Page ${pageData.pageIndex + 1}]\n${strings.join(' ')}`;
35
+ },
36
+ });
37
+ }
38
+ finally {
39
+ console.log = originalLog;
40
+ }
41
+ return data.text;
42
+ }
43
+ /** Convert a PPTX to markdown with slide headers */
44
+ async function pptxToMarkdown(filePath) {
45
+ const buffer = await readFile(filePath);
46
+ const zip = await JSZip.loadAsync(buffer);
47
+ // Get slide filenames sorted by number
48
+ const slideFiles = Object.keys(zip.files)
49
+ .filter(name => /^ppt\/slides\/slide\d+\.xml$/.test(name))
50
+ .sort((a, b) => {
51
+ const numA = parseInt(a.match(/slide(\d+)/)?.[1] || '0', 10);
52
+ const numB = parseInt(b.match(/slide(\d+)/)?.[1] || '0', 10);
53
+ return numA - numB;
54
+ });
55
+ // Get full text via officeparser
56
+ const fullText = await officeparser.parseOffice(buffer);
57
+ const slideCount = slideFiles.length;
58
+ if (slideCount <= 1) {
59
+ return `# Slide 1\n\n${fullText}`;
60
+ }
61
+ // Split text roughly between slides
62
+ const lines = fullText.split('\n');
63
+ const linesPerSlide = Math.max(1, Math.ceil(lines.length / slideCount));
64
+ const parts = [];
65
+ for (let i = 0; i < slideCount; i++) {
66
+ const start = i * linesPerSlide;
67
+ const end = Math.min((i + 1) * linesPerSlide, lines.length);
68
+ const slideText = lines.slice(start, end).join('\n').trim();
69
+ parts.push(`# Slide ${i + 1}\n\n${slideText}`);
70
+ }
71
+ return parts.join('\n\n');
72
+ }
73
+ /** Convert an ODT file to markdown (best-effort heading detection) */
74
+ async function odtToMarkdown(filePath) {
75
+ const text = await officeparser.parseOffice(filePath);
76
+ return text;
77
+ }
78
+ /** Convert an ODP file to markdown with slide headers */
79
+ async function odpToMarkdown(filePath) {
80
+ const buffer = await readFile(filePath);
81
+ const zip = await JSZip.loadAsync(buffer);
82
+ const contentXml = await zip.file('content.xml')?.async('text');
83
+ if (!contentXml)
84
+ return '';
85
+ const slides = (contentXml.match(/<draw:page /g) || []).length;
86
+ const text = await officeparser.parseOffice(buffer);
87
+ if (slides <= 1) {
88
+ return `# Slide 1\n\n${text}`;
89
+ }
90
+ const lines = text.split('\n');
91
+ const linesPerSlide = Math.max(1, Math.ceil(lines.length / slides));
92
+ const parts = [];
93
+ for (let i = 0; i < slides; i++) {
94
+ const start = i * linesPerSlide;
95
+ const end = Math.min((i + 1) * linesPerSlide, lines.length);
96
+ const slideText = lines.slice(start, end).join('\n').trim();
97
+ parts.push(`# Slide ${i + 1}\n\n${slideText}`);
98
+ }
99
+ return parts.join('\n\n');
100
+ }
101
+ /** Convert a document to markdown. Returns null for unsupported formats (xlsx, ods). */
102
+ export async function documentToMarkdown(filePath) {
103
+ const ext = getExtension(filePath);
104
+ switch (ext) {
105
+ case 'docx': return docxToMarkdown(filePath);
106
+ case 'pdf': return pdfToMarkdown(filePath);
107
+ case 'pptx': return pptxToMarkdown(filePath);
108
+ case 'odt': return odtToMarkdown(filePath);
109
+ case 'odp': return odpToMarkdown(filePath);
110
+ case 'xlsx':
111
+ case 'ods':
112
+ return null;
113
+ default:
114
+ return null;
115
+ }
116
+ }
117
+ //# sourceMappingURL=convert.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"convert.js","sourceRoot":"","sources":["../../../src/markdown/convert.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAC5C,OAAO,OAAO,MAAM,SAAS,CAAC;AAC9B,OAAO,GAAG,MAAM,WAAW,CAAC;AAC5B,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,YAAY,MAAM,cAAc,CAAC;AACxC,OAAO,eAAe,MAAM,UAAU,CAAC;AACvC,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE3C,MAAM,QAAQ,GAAG,IAAI,eAAe,CAAC,EAAE,YAAY,EAAE,KAAK,EAAE,CAAC,CAAC;AAE9D,oEAAoE;AACpE,KAAK,UAAU,cAAc,CAAC,QAAgB;IAC5C,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,aAAa,CAAC,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC,CAAC;IAC/D,MAAM,IAAI,GAAG,MAAM,CAAC,KAAK,IAAI,EAAE,CAAC;IAChC,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE;QAAE,OAAO,EAAE,CAAC;IAC5B,OAAO,QAAQ,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC;AACjC,CAAC;AAED,sDAAsD;AACtD,KAAK,UAAU,aAAa,CAAC,QAAgB;IAC3C,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,QAAQ,CAAC,CAAC;IAExC,2BAA2B;IAC3B,MAAM,WAAW,GAAG,OAAO,CAAC,GAAG,CAAC;IAChC,OAAO,CAAC,GAAG,GAAG,CAAC,GAAG,IAAe,EAAE,EAAE;QACnC,IAAI,OAAO,IAAI,CAAC,CAAC,CAAC,KAAK,QAAQ,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,WAAW,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,wBAAwB,CAAC,CAAC,EAAE,CAAC;YACrJ,OAAO;QACT,CAAC;QACD,WAAW,CAAC,KAAK,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;IACnC,CAAC,CAAC;IAEF,IAAI,IAAwC,CAAC;IAC7C,IAAI,CAAC;QACH,IAAI,GAAG,MAAM,GAAG,CAAC,MAAM,EAAE;YACvB,UAAU,EAAE,KAAK,EAAE,QAAuE,EAAE,EAAE;gBAC5F,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,cAAc,EAAuC,CAAC;gBACzF,MAAM,OAAO,GAAG,WAAW,CAAC,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;gBACxD,OAAO,SAAS,QAAQ,CAAC,SAAS,GAAG,CAAC,MAAM,OAAO,CAAC,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC;YAClE,CAAC;SACF,CAAC,CAAC;IACL,CAAC;YAAS,CAAC;QACT,OAAO,CAAC,GAAG,GAAG,WAAW,CAAC;IAC5B,CAAC;IAED,OAAO,IAAI,CAAC,IAAI,CAAC;AACnB,CAAC;AAED,oDAAoD;AACpD,KAAK,UAAU,cAAc,CAAC,QAAgB;IAC5C,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,QAAQ,CAAC,CAAC;IACxC,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;IAE1C,uCAAuC;IACvC,MAAM,UAAU,GAAG,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC;SACtC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,8BAA8B,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;SACzD,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE;QACb,MAAM,IAAI,GAAG,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,GAAG,EAAE,EAAE,CAAC,CAAC;QAC7D,MAAM,IAAI,GAAG,QAAQ,CAAC,CAAC,CAAC,KAAK,CAAC,YAAY,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,GAAG,EAAE,EAAE,CAAC,CAAC;QAC7D,OAAO,IAAI,GAAG,IAAI,CAAC;IACrB,CAAC,CAAC,CAAC;IAEL,iCAAiC;IACjC,MAAM,QAAQ,GAAG,MAAM,YAAY,CAAC,WAAW,CAAC,MAAM,CAAsB,CAAC;IAC7E,MAAM,UAAU,GAAG,UAAU,CAAC,MAAM,CAAC;IAErC,IAAI,UAAU,IAAI,CAAC,EAAE,CAAC;QACpB,OAAO,gBAAgB,QAAQ,EAAE,CAAC;IACpC,CAAC;IAED,oCAAoC;IACpC,MAAM,KAAK,GAAG,QAAQ,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IACnC,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,UAAU,CAAC,CAAC,CAAC;IACxE,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,UAAU,EAAE,CAAC,EAAE,EAAE,CAAC;QACpC,MAAM,KAAK,GAAG,CAAC,GAAG,aAAa,CAAC;QAChC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,aAAa,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;QAC5D,MAAM,SAAS,GAAG,KAAK,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;QAC5D,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,OAAO,SAAS,EAAE,CAAC,CAAC;IACjD,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AAC5B,CAAC;AAED,sEAAsE;AACtE,KAAK,UAAU,aAAa,CAAC,QAAgB;IAC3C,MAAM,IAAI,GAAG,MAAM,YAAY,CAAC,WAAW,CAAC,QAAQ,CAAsB,CAAC;IAC3E,OAAO,IAAI,CAAC;AACd,CAAC;AAED,yDAAyD;AACzD,KAAK,UAAU,aAAa,CAAC,QAAgB;IAC3C,MAAM,MAAM,GAAG,MAAM,QAAQ,CAAC,QAAQ,CAAC,CAAC;IACxC,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;IAC1C,MAAM,UAAU,GAAG,MAAM,GAAG,CAAC,IAAI,CAAC,aAAa,CAAC,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;IAChE,IAAI,CAAC,UAAU;QAAE,OAAO,EAAE,CAAC;IAE3B,MAAM,MAAM,GAAG,CAAC,UAAU,CAAC,KAAK,CAAC,cAAc,CAAC,IAAI,EAAE,CAAC,CAAC,MAAM,CAAC;IAC/D,MAAM,IAAI,GAAG,MAAM,YAAY,CAAC,WAAW,CAAC,MAAM,CAAsB,CAAC;IAEzE,IAAI,MAAM,IAAI,CAAC,EAAE,CAAC;QAChB,OAAO,gBAAgB,IAAI,EAAE,CAAC;IAChC,CAAC;IAED,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;IAC/B,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,MAAM,GAAG,MAAM,CAAC,CAAC,CAAC;IACpE,MAAM,KAAK,GAAa,EAAE,CAAC;IAE3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;QAChC,MAAM,KAAK,GAAG,CAAC,GAAG,aAAa,CAAC;QAChC,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,GAAG,aAAa,EAAE,KAAK,CAAC,MAAM,CAAC,CAAC;QAC5D,MAAM,SAAS,GAAG,KAAK,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;QAC5D,KAAK,CAAC,IAAI,CAAC,WAAW,CAAC,GAAG,CAAC,OAAO,SAAS,EAAE,CAAC,CAAC;IACjD,CAAC;IAED,OAAO,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;AAC5B,CAAC;AAED,wFAAwF;AACxF,MAAM,CAAC,KAAK,UAAU,kBAAkB,CAAC,QAAgB;IACvD,MAAM,GAAG,GAAG,YAAY,CAAC,QAAQ,CAAC,CAAC;IAEnC,QAAQ,GAAG,EAAE,CAAC;QACZ,KAAK,MAAM,CAAC,CAAC,OAAO,cAAc,CAAC,QAAQ,CAAC,CAAC;QAC7C,KAAK,KAAK,CAAC,CAAC,OAAO,aAAa,CAAC,QAAQ,CAAC,CAAC;QAC3C,KAAK,MAAM,CAAC,CAAC,OAAO,cAAc,CAAC,QAAQ,CAAC,CAAC;QAC7C,KAAK,KAAK,CAAC,CAAC,OAAO,aAAa,CAAC,QAAQ,CAAC,CAAC;QAC3C,KAAK,KAAK,CAAC,CAAC,OAAO,aAAa,CAAC,QAAQ,CAAC,CAAC;QAC3C,KAAK,MAAM,CAAC;QACZ,KAAK,KAAK;YACR,OAAO,IAAI,CAAC;QACd;YACE,OAAO,IAAI,CAAC;IAChB,CAAC;AACH,CAAC"}
@@ -0,0 +1,4 @@
1
+ import type { AggregateResult } from '../stats.js';
2
+ import type { SccLanguage } from '../scc.js';
3
+ import type { StructureResult } from './tree.js';
4
+ export declare function formatJson(stats: AggregateResult, sccData?: SccLanguage[] | null, structureResults?: StructureResult[]): string;
@@ -0,0 +1,42 @@
1
+ import { METRIC_FIELDS, hasKey } from '../utils.js';
2
+ import { formatStructureJson } from './tree.js';
3
+ export function formatJson(stats, sccData = null, structureResults) {
4
+ const { columns } = stats;
5
+ const mapRow = (r) => {
6
+ const entry = {
7
+ type: r.fileType,
8
+ ...(r.fileName ? { name: r.fileName } : {}),
9
+ ...(r.filePath ? { path: r.filePath } : {}),
10
+ count: r.files,
11
+ };
12
+ for (const f of METRIC_FIELDS) {
13
+ if (r[hasKey(f)])
14
+ entry[f] = r[f] || 0;
15
+ }
16
+ entry.size = r.size;
17
+ return entry;
18
+ };
19
+ const mapTotals = (t) => {
20
+ const entry = { files: t.files };
21
+ for (const f of METRIC_FIELDS) {
22
+ if (columns[hasKey(f)])
23
+ entry[f] = t[f];
24
+ }
25
+ entry.size = t.size;
26
+ return entry;
27
+ };
28
+ const output = {
29
+ documents: {
30
+ files: stats.rows.map(mapRow),
31
+ totals: mapTotals(stats.totals),
32
+ },
33
+ };
34
+ if (sccData && sccData.length > 0) {
35
+ output.code = sccData;
36
+ }
37
+ if (structureResults && structureResults.length > 0) {
38
+ output.structures = formatStructureJson(structureResults);
39
+ }
40
+ return JSON.stringify(output, null, 2);
41
+ }
42
+ //# sourceMappingURL=json.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"json.js","sourceRoot":"","sources":["../../../src/output/json.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,aAAa,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AACpD,OAAO,EAAE,mBAAmB,EAAE,MAAM,WAAW,CAAC;AAKhD,MAAM,UAAU,UAAU,CACxB,KAAsB,EACtB,UAAgC,IAAI,EACpC,gBAAoC;IAEpC,MAAM,EAAE,OAAO,EAAE,GAAG,KAAK,CAAC;IAE1B,MAAM,MAAM,GAAG,CAAC,CAAW,EAAE,EAAE;QAC7B,MAAM,KAAK,GAA4B;YACrC,IAAI,EAAE,CAAC,CAAC,QAAQ;YAChB,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAC3C,GAAG,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;YAC3C,KAAK,EAAE,CAAC,CAAC,KAAK;SACf,CAAC;QACF,KAAK,MAAM,CAAC,IAAI,aAAa,EAAE,CAAC;YAC9B,IAAI,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;gBAAE,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QACzC,CAAC;QACD,KAAK,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC;QACpB,OAAO,KAAK,CAAC;IACf,CAAC,CAAC;IAEF,MAAM,SAAS,GAAG,CAAC,CAAW,EAAE,EAAE;QAChC,MAAM,KAAK,GAA4B,EAAE,KAAK,EAAE,CAAC,CAAC,KAAK,EAAE,CAAC;QAC1D,KAAK,MAAM,CAAC,IAAI,aAAa,EAAE,CAAC;YAC9B,IAAI,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;gBAAE,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;QAC1C,CAAC;QACD,KAAK,CAAC,IAAI,GAAG,CAAC,CAAC,IAAI,CAAC;QACpB,OAAO,KAAK,CAAC;IACf,CAAC,CAAC;IAEF,MAAM,MAAM,GAA4B;QACtC,SAAS,EAAE;YACT,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,MAAM,CAAC;YAC7B,MAAM,EAAE,SAAS,CAAC,KAAK,CAAC,MAAM,CAAC;SAChC;KACF,CAAC;IAEF,IAAI,OAAO,IAAI,OAAO,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAClC,MAAM,CAAC,IAAI,GAAG,OAAO,CAAC;IACxB,CAAC;IAED,IAAI,gBAAgB,IAAI,gBAAgB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACpD,MAAM,CAAC,UAAU,GAAG,mBAAmB,CAAC,gBAAgB,CAAC,CAAC;IAC5D,CAAC;IAED,OAAO,IAAI,CAAC,SAAS,CAAC,MAAM,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;AACzC,CAAC"}
@@ -0,0 +1,12 @@
1
+ import type { AggregateResult } from '../stats.js';
2
+ import type { SccLanguage } from '../scc.js';
3
+ export interface TableOptions {
4
+ ci?: boolean;
5
+ byFile?: boolean;
6
+ }
7
+ export declare function formatDocumentTable(stats: AggregateResult, options?: TableOptions): string;
8
+ export declare function formatSccTable(sccData: SccLanguage[], options?: TableOptions): string;
9
+ export declare function formatSummaryLine(stats: AggregateResult, sccData: SccLanguage[] | null, elapsed: number, options?: TableOptions): string;
10
+ export declare function stripAnsi(str: string): string;
11
+ export declare function sectionHeader(title: string, width: number, ci?: boolean): string;
12
+ export declare function tableChars(ci: boolean): Record<string, string>;