officecat 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- officecat-0.1.0/.github/workflows/ci.yml +50 -0
- officecat-0.1.0/.github/workflows/release.yml +56 -0
- officecat-0.1.0/.gitignore +41 -0
- officecat-0.1.0/PKG-INFO +142 -0
- officecat-0.1.0/README.md +107 -0
- officecat-0.1.0/officecat/__init__.py +3 -0
- officecat-0.1.0/officecat/cli.py +152 -0
- officecat-0.1.0/officecat/detect.py +58 -0
- officecat-0.1.0/officecat/readers/__init__.py +31 -0
- officecat-0.1.0/officecat/readers/csv_.py +100 -0
- officecat-0.1.0/officecat/readers/docx.py +120 -0
- officecat-0.1.0/officecat/readers/pptx.py +106 -0
- officecat-0.1.0/officecat/readers/xlsx.py +144 -0
- officecat-0.1.0/officecat/renderers/__init__.py +0 -0
- officecat-0.1.0/officecat/renderers/json_.py +11 -0
- officecat-0.1.0/officecat/renderers/plain.py +12 -0
- officecat-0.1.0/officecat/renderers/rich.py +24 -0
- officecat-0.1.0/officecat/tui/__init__.py +0 -0
- officecat-0.1.0/officecat/tui/app.py +34 -0
- officecat-0.1.0/pyproject.toml +63 -0
- officecat-0.1.0/tests/__init__.py +0 -0
- officecat-0.1.0/tests/fixtures/corrupt.xlsx +1 -0
- officecat-0.1.0/tests/fixtures/empty.docx +0 -0
- officecat-0.1.0/tests/fixtures/empty.pptx +0 -0
- officecat-0.1.0/tests/fixtures/empty.xlsx +0 -0
- officecat-0.1.0/tests/fixtures/sample.csv +6 -0
- officecat-0.1.0/tests/fixtures/sample.docx +0 -0
- officecat-0.1.0/tests/fixtures/sample.pptx +0 -0
- officecat-0.1.0/tests/fixtures/sample.tsv +4 -0
- officecat-0.1.0/tests/fixtures/sample.xlsx +0 -0
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main, dev]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
lint:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
steps:
|
|
13
|
+
- uses: actions/checkout@v5
|
|
14
|
+
- uses: actions/setup-python@v6
|
|
15
|
+
with:
|
|
16
|
+
python-version: "3.12"
|
|
17
|
+
- run: pip install ruff mypy
|
|
18
|
+
- run: ruff check officecat/
|
|
19
|
+
- run: mypy officecat/ --ignore-missing-imports
|
|
20
|
+
|
|
21
|
+
test:
|
|
22
|
+
runs-on: ${{ matrix.os }}
|
|
23
|
+
strategy:
|
|
24
|
+
matrix:
|
|
25
|
+
os:
|
|
26
|
+
- ubuntu-latest
|
|
27
|
+
- ubuntu-24.04-arm
|
|
28
|
+
- macos-latest
|
|
29
|
+
- windows-latest
|
|
30
|
+
- windows-11-arm
|
|
31
|
+
python-version: ["3.10", "3.12", "3.13"]
|
|
32
|
+
exclude:
|
|
33
|
+
- os: windows-11-arm
|
|
34
|
+
python-version: "3.10"
|
|
35
|
+
- os: ubuntu-24.04-arm
|
|
36
|
+
python-version: "3.10"
|
|
37
|
+
steps:
|
|
38
|
+
- uses: actions/checkout@v5
|
|
39
|
+
- uses: actions/setup-python@v6
|
|
40
|
+
with:
|
|
41
|
+
python-version: ${{ matrix.python-version }}
|
|
42
|
+
- run: pip install -e .
|
|
43
|
+
- run: officecat --help
|
|
44
|
+
- run: officecat tests/fixtures/sample.csv --plain
|
|
45
|
+
- run: officecat tests/fixtures/sample.docx --plain
|
|
46
|
+
- run: officecat tests/fixtures/sample.pptx --plain
|
|
47
|
+
- run: officecat tests/fixtures/sample.xlsx --plain
|
|
48
|
+
- run: officecat tests/fixtures/sample.tsv --plain
|
|
49
|
+
- run: officecat tests/fixtures/sample.csv --json
|
|
50
|
+
- run: officecat tests/fixtures/empty.docx --plain
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
name: Release & Publish
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main]
|
|
6
|
+
|
|
7
|
+
permissions:
|
|
8
|
+
contents: write
|
|
9
|
+
id-token: write
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
release:
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
environment: pypi
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v5
|
|
17
|
+
with:
|
|
18
|
+
fetch-depth: 0
|
|
19
|
+
fetch-tags: true
|
|
20
|
+
|
|
21
|
+
- name: Get version from pyproject.toml
|
|
22
|
+
id: version
|
|
23
|
+
run: |
|
|
24
|
+
VERSION=$(grep '^version' pyproject.toml | head -1 | sed 's/.*"\(.*\)".*/\1/')
|
|
25
|
+
echo "version=$VERSION" >> "$GITHUB_OUTPUT"
|
|
26
|
+
echo "tag=v$VERSION" >> "$GITHUB_OUTPUT"
|
|
27
|
+
|
|
28
|
+
- name: Check if tag exists
|
|
29
|
+
id: check
|
|
30
|
+
run: |
|
|
31
|
+
if git rev-parse "v${{ steps.version.outputs.version }}" >/dev/null 2>&1; then
|
|
32
|
+
echo "exists=true" >> "$GITHUB_OUTPUT"
|
|
33
|
+
else
|
|
34
|
+
echo "exists=false" >> "$GITHUB_OUTPUT"
|
|
35
|
+
fi
|
|
36
|
+
|
|
37
|
+
- name: Create release
|
|
38
|
+
if: steps.check.outputs.exists == 'false'
|
|
39
|
+
run: |
|
|
40
|
+
gh release create "${{ steps.version.outputs.tag }}" \
|
|
41
|
+
--title "${{ steps.version.outputs.tag }}" \
|
|
42
|
+
--generate-notes
|
|
43
|
+
env:
|
|
44
|
+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
45
|
+
|
|
46
|
+
- uses: actions/setup-python@v6
|
|
47
|
+
with:
|
|
48
|
+
python-version: "3.12"
|
|
49
|
+
|
|
50
|
+
- name: Build package
|
|
51
|
+
run: pip install build && python -m build
|
|
52
|
+
|
|
53
|
+
- name: Publish to PyPI
|
|
54
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
55
|
+
with:
|
|
56
|
+
skip-existing: true
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
*.egg-info/
|
|
7
|
+
*.egg
|
|
8
|
+
dist/
|
|
9
|
+
build/
|
|
10
|
+
eggs/
|
|
11
|
+
*.whl
|
|
12
|
+
|
|
13
|
+
# Virtual environments
|
|
14
|
+
.venv/
|
|
15
|
+
venv/
|
|
16
|
+
ENV/
|
|
17
|
+
|
|
18
|
+
# IDE
|
|
19
|
+
.vscode/
|
|
20
|
+
.idea/
|
|
21
|
+
*.swp
|
|
22
|
+
*.swo
|
|
23
|
+
*~
|
|
24
|
+
|
|
25
|
+
# OS
|
|
26
|
+
.DS_Store
|
|
27
|
+
Thumbs.db
|
|
28
|
+
|
|
29
|
+
# Testing / Coverage
|
|
30
|
+
.pytest_cache/
|
|
31
|
+
.coverage
|
|
32
|
+
htmlcov/
|
|
33
|
+
|
|
34
|
+
# Distribution
|
|
35
|
+
*.tar.gz
|
|
36
|
+
*.zip
|
|
37
|
+
|
|
38
|
+
# Environment
|
|
39
|
+
.env
|
|
40
|
+
.env.*
|
|
41
|
+
dist/
|
officecat-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: officecat
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: View Office files in the terminal
|
|
5
|
+
Project-URL: Homepage, https://github.com/mubbie/officecat
|
|
6
|
+
Project-URL: Repository, https://github.com/mubbie/officecat
|
|
7
|
+
Project-URL: Issues, https://github.com/mubbie/officecat/issues
|
|
8
|
+
Author: Mubbie Idoko
|
|
9
|
+
License: MIT
|
|
10
|
+
Keywords: cli,csv,docx,office,pptx,terminal,viewer,xlsx
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Environment :: Console
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Operating System :: OS Independent
|
|
16
|
+
Classifier: Programming Language :: Python :: 3
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Topic :: Office/Business
|
|
22
|
+
Classifier: Topic :: Utilities
|
|
23
|
+
Requires-Python: >=3.10
|
|
24
|
+
Requires-Dist: python-calamine>=0.2
|
|
25
|
+
Requires-Dist: python-docx>=1.0
|
|
26
|
+
Requires-Dist: python-pptx>=0.6
|
|
27
|
+
Requires-Dist: rich
|
|
28
|
+
Requires-Dist: textual>=0.50
|
|
29
|
+
Requires-Dist: typer>=0.9
|
|
30
|
+
Provides-Extra: dev
|
|
31
|
+
Requires-Dist: mypy; extra == 'dev'
|
|
32
|
+
Requires-Dist: pytest; extra == 'dev'
|
|
33
|
+
Requires-Dist: ruff; extra == 'dev'
|
|
34
|
+
Description-Content-Type: text/markdown
|
|
35
|
+
|
|
36
|
+
# officecat 🐱
|
|
37
|
+
|
|
38
|
+
A CLI tool to view Office files in the terminal. Think `cat` but for `.docx`, `.pptx`, `.xlsx`, `.csv`, and `.tsv` files.
|
|
39
|
+
|
|
40
|
+
Every supported format is converted to markdown internally, then rendered through a single unified pipeline.
|
|
41
|
+
|
|
42
|
+
## Installation
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
pip install officecat
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Or install from source:
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
git clone https://github.com/mubbie/officecat.git
|
|
52
|
+
cd officecat
|
|
53
|
+
pip install -e .
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
## Usage
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
officecat report.docx # colored formatted output (default)
|
|
60
|
+
officecat budget.xlsx # spreadsheet as markdown table
|
|
61
|
+
officecat slides.pptx # presentation content
|
|
62
|
+
officecat data.csv # CSV and TSV
|
|
63
|
+
officecat report.docx --tui # interactive full-screen viewer
|
|
64
|
+
officecat budget.xlsx | head # plain text (auto-detected pipe)
|
|
65
|
+
officecat slides.pptx --json # JSON output
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
### Output Modes
|
|
69
|
+
|
|
70
|
+
- **Rich** (default): Colored, formatted output to stdout. Works with `less -R`.
|
|
71
|
+
- **TUI** (`--tui`): Full-screen interactive viewer with scrolling.
|
|
72
|
+
- **Plain** (auto when piped, or `--plain`): Raw markdown for piping to `grep`, `head`, `awk`.
|
|
73
|
+
- **JSON** (`--json`): `{"source": "...", "markdown": "..."}` for scripting.
|
|
74
|
+
|
|
75
|
+
### Options
|
|
76
|
+
|
|
77
|
+
| Flag | Short | Description |
|
|
78
|
+
|---|---|---|
|
|
79
|
+
| `--tui` | `-t` | Interactive full-screen viewer |
|
|
80
|
+
| `--plain` | `-p` | Raw markdown text, no colors |
|
|
81
|
+
| `--json` | `-j` | JSON output |
|
|
82
|
+
| `--head N` | `-n N` | Show first N lines |
|
|
83
|
+
| `--sheet S` | `-s S` | Select sheet by name or 1-based index (xlsx only) |
|
|
84
|
+
| `--slide N` | | Show only slide N (pptx only) |
|
|
85
|
+
| `--headers N` | `-h N` | Promote row N as headers (xlsx/csv, default: 1, 0 to disable) |
|
|
86
|
+
| `--all` | `-a` | Disable the default 500-row cap |
|
|
87
|
+
|
|
88
|
+
### TUI Key Bindings
|
|
89
|
+
|
|
90
|
+
| Key | Action |
|
|
91
|
+
|---|---|
|
|
92
|
+
| `q` | Quit |
|
|
93
|
+
| `Up` / `Down` | Scroll |
|
|
94
|
+
| `PgUp` / `PgDn` | Page scroll |
|
|
95
|
+
| `Home` / `End` | Jump to top/bottom |
|
|
96
|
+
|
|
97
|
+
### Examples
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
# Quick view of a document
|
|
101
|
+
officecat report.docx
|
|
102
|
+
|
|
103
|
+
# Browse interactively
|
|
104
|
+
officecat report.docx --tui
|
|
105
|
+
|
|
106
|
+
# Specific sheet
|
|
107
|
+
officecat budget.xlsx --sheet "Q4 Summary"
|
|
108
|
+
|
|
109
|
+
# Specific slide
|
|
110
|
+
officecat deck.pptx --slide 3
|
|
111
|
+
|
|
112
|
+
# First 10 lines
|
|
113
|
+
officecat budget.xlsx --head 10
|
|
114
|
+
|
|
115
|
+
# JSON output
|
|
116
|
+
officecat report.docx --json | jq '.markdown'
|
|
117
|
+
|
|
118
|
+
# Pipe to grep
|
|
119
|
+
officecat data.xlsx --plain | grep "revenue"
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
## Supported Formats
|
|
123
|
+
|
|
124
|
+
- Word (.docx): headings, paragraphs, lists, tables in document order
|
|
125
|
+
- PowerPoint (.pptx): slides, shapes, images, speaker notes, hidden slides
|
|
126
|
+
- Excel (.xlsx): all sheets, row cap, header promotion
|
|
127
|
+
- CSV (.csv): auto-delimited
|
|
128
|
+
- TSV (.tsv): tab-delimited
|
|
129
|
+
|
|
130
|
+
Legacy binary formats (`.doc`, `.ppt`, `.xls`) show a conversion hint.
|
|
131
|
+
|
|
132
|
+
## Known Limitations
|
|
133
|
+
|
|
134
|
+
- All content is rendered as markdown. Spreadsheet tables are markdown tables, not interactive grids.
|
|
135
|
+
- DOCX list detection is style-name-based and may miss custom list styles.
|
|
136
|
+
- PPTX grouped shapes and embedded tables show as placeholders.
|
|
137
|
+
- PPTX charts and SmartArt are not extracted.
|
|
138
|
+
- XLSX formulas show cached/computed values, not formula strings.
|
|
139
|
+
- Large spreadsheets are capped at 500 rows by default. Use `--all` to show everything.
|
|
140
|
+
- TUI enforces a 1000-line cap with `--all` for performance. Use `--plain` for full output.
|
|
141
|
+
- No decryption of password-protected files.
|
|
142
|
+
- Legacy binary formats (.doc, .ppt, .xls) are not supported.
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# officecat 🐱
|
|
2
|
+
|
|
3
|
+
A CLI tool to view Office files in the terminal. Think `cat` but for `.docx`, `.pptx`, `.xlsx`, `.csv`, and `.tsv` files.
|
|
4
|
+
|
|
5
|
+
Every supported format is converted to markdown internally, then rendered through a single unified pipeline.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
pip install officecat
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
Or install from source:
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
git clone https://github.com/mubbie/officecat.git
|
|
17
|
+
cd officecat
|
|
18
|
+
pip install -e .
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Usage
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
officecat report.docx # colored formatted output (default)
|
|
25
|
+
officecat budget.xlsx # spreadsheet as markdown table
|
|
26
|
+
officecat slides.pptx # presentation content
|
|
27
|
+
officecat data.csv # CSV and TSV
|
|
28
|
+
officecat report.docx --tui # interactive full-screen viewer
|
|
29
|
+
officecat budget.xlsx | head # plain text (auto-detected pipe)
|
|
30
|
+
officecat slides.pptx --json # JSON output
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
### Output Modes
|
|
34
|
+
|
|
35
|
+
- **Rich** (default): Colored, formatted output to stdout. Works with `less -R`.
|
|
36
|
+
- **TUI** (`--tui`): Full-screen interactive viewer with scrolling.
|
|
37
|
+
- **Plain** (auto when piped, or `--plain`): Raw markdown for piping to `grep`, `head`, `awk`.
|
|
38
|
+
- **JSON** (`--json`): `{"source": "...", "markdown": "..."}` for scripting.
|
|
39
|
+
|
|
40
|
+
### Options
|
|
41
|
+
|
|
42
|
+
| Flag | Short | Description |
|
|
43
|
+
|---|---|---|
|
|
44
|
+
| `--tui` | `-t` | Interactive full-screen viewer |
|
|
45
|
+
| `--plain` | `-p` | Raw markdown text, no colors |
|
|
46
|
+
| `--json` | `-j` | JSON output |
|
|
47
|
+
| `--head N` | `-n N` | Show first N lines |
|
|
48
|
+
| `--sheet S` | `-s S` | Select sheet by name or 1-based index (xlsx only) |
|
|
49
|
+
| `--slide N` | | Show only slide N (pptx only) |
|
|
50
|
+
| `--headers N` | `-h N` | Promote row N as headers (xlsx/csv, default: 1, 0 to disable) |
|
|
51
|
+
| `--all` | `-a` | Disable the default 500-row cap |
|
|
52
|
+
|
|
53
|
+
### TUI Key Bindings
|
|
54
|
+
|
|
55
|
+
| Key | Action |
|
|
56
|
+
|---|---|
|
|
57
|
+
| `q` | Quit |
|
|
58
|
+
| `Up` / `Down` | Scroll |
|
|
59
|
+
| `PgUp` / `PgDn` | Page scroll |
|
|
60
|
+
| `Home` / `End` | Jump to top/bottom |
|
|
61
|
+
|
|
62
|
+
### Examples
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
# Quick view of a document
|
|
66
|
+
officecat report.docx
|
|
67
|
+
|
|
68
|
+
# Browse interactively
|
|
69
|
+
officecat report.docx --tui
|
|
70
|
+
|
|
71
|
+
# Specific sheet
|
|
72
|
+
officecat budget.xlsx --sheet "Q4 Summary"
|
|
73
|
+
|
|
74
|
+
# Specific slide
|
|
75
|
+
officecat deck.pptx --slide 3
|
|
76
|
+
|
|
77
|
+
# First 10 lines
|
|
78
|
+
officecat budget.xlsx --head 10
|
|
79
|
+
|
|
80
|
+
# JSON output
|
|
81
|
+
officecat report.docx --json | jq '.markdown'
|
|
82
|
+
|
|
83
|
+
# Pipe to grep
|
|
84
|
+
officecat data.xlsx --plain | grep "revenue"
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
## Supported Formats
|
|
88
|
+
|
|
89
|
+
- Word (.docx): headings, paragraphs, lists, tables in document order
|
|
90
|
+
- PowerPoint (.pptx): slides, shapes, images, speaker notes, hidden slides
|
|
91
|
+
- Excel (.xlsx): all sheets, row cap, header promotion
|
|
92
|
+
- CSV (.csv): auto-delimited
|
|
93
|
+
- TSV (.tsv): tab-delimited
|
|
94
|
+
|
|
95
|
+
Legacy binary formats (`.doc`, `.ppt`, `.xls`) show a conversion hint.
|
|
96
|
+
|
|
97
|
+
## Known Limitations
|
|
98
|
+
|
|
99
|
+
- All content is rendered as markdown. Spreadsheet tables are markdown tables, not interactive grids.
|
|
100
|
+
- DOCX list detection is style-name-based and may miss custom list styles.
|
|
101
|
+
- PPTX grouped shapes and embedded tables show as placeholders.
|
|
102
|
+
- PPTX charts and SmartArt are not extracted.
|
|
103
|
+
- XLSX formulas show cached/computed values, not formula strings.
|
|
104
|
+
- Large spreadsheets are capped at 500 rows by default. Use `--all` to show everything.
|
|
105
|
+
- TUI enforces a 1000-line cap with `--all` for performance. Use `--plain` for full output.
|
|
106
|
+
- No decryption of password-protected files.
|
|
107
|
+
- Legacy binary formats (.doc, .ppt, .xls) are not supported.
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
"""CLI entry point — read and render pipeline."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Annotated, Optional
|
|
8
|
+
|
|
9
|
+
import typer
|
|
10
|
+
|
|
11
|
+
app = typer.Typer(add_completion=False)
|
|
12
|
+
|
|
13
|
+
# Formats that support tabular flags
|
|
14
|
+
_TABULAR_FMTS = {"xlsx", "csv"}
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@app.command()
|
|
18
|
+
def run(
|
|
19
|
+
file: Annotated[
|
|
20
|
+
Path, typer.Argument(help="File to view.")
|
|
21
|
+
],
|
|
22
|
+
tui: Annotated[
|
|
23
|
+
bool, typer.Option("--tui", "-t", help="Interactive viewer.")
|
|
24
|
+
] = False,
|
|
25
|
+
plain: Annotated[
|
|
26
|
+
bool, typer.Option("--plain", "-p", help="Raw markdown, no colors.")
|
|
27
|
+
] = False,
|
|
28
|
+
json: Annotated[
|
|
29
|
+
bool, typer.Option("--json", "-j", help="JSON output.")
|
|
30
|
+
] = False,
|
|
31
|
+
head: Annotated[
|
|
32
|
+
Optional[int], typer.Option("--head", "-n", help="Show first N lines.")
|
|
33
|
+
] = None,
|
|
34
|
+
sheet: Annotated[
|
|
35
|
+
Optional[str],
|
|
36
|
+
typer.Option("--sheet", "-s", help="Sheet by name or index (xlsx)."),
|
|
37
|
+
] = None,
|
|
38
|
+
slide: Annotated[
|
|
39
|
+
Optional[int],
|
|
40
|
+
typer.Option("--slide", help="Show only slide N (pptx)."),
|
|
41
|
+
] = None,
|
|
42
|
+
headers: Annotated[
|
|
43
|
+
int,
|
|
44
|
+
typer.Option("--headers", "-h", help="Row N as headers (default: 1)."),
|
|
45
|
+
] = 1,
|
|
46
|
+
show_all: Annotated[
|
|
47
|
+
bool, typer.Option("--all", "-a", help="Disable the row cap.")
|
|
48
|
+
] = False,
|
|
49
|
+
) -> None:
|
|
50
|
+
"""View Office files in the terminal.
|
|
51
|
+
|
|
52
|
+
Supports .docx, .pptx, .xlsx, .csv, and .tsv files.
|
|
53
|
+
"""
|
|
54
|
+
# ── Validate output flags ──
|
|
55
|
+
output_flags = sum([tui, plain, json])
|
|
56
|
+
if output_flags > 1:
|
|
57
|
+
_error("--tui, --plain, and --json are mutually exclusive.")
|
|
58
|
+
|
|
59
|
+
if not file.exists():
|
|
60
|
+
_error(f"File '{file}' not found.")
|
|
61
|
+
|
|
62
|
+
# ── Validate format ──
|
|
63
|
+
from officecat.detect import detect_format
|
|
64
|
+
fmt = detect_format(file)
|
|
65
|
+
|
|
66
|
+
# ── Validate format-specific flags ──
|
|
67
|
+
fmt_name = fmt.value
|
|
68
|
+
if sheet is not None and fmt_name not in ("xlsx", "csv"):
|
|
69
|
+
_error("--sheet is only valid for xlsx, csv, and tsv files.")
|
|
70
|
+
if slide is not None and fmt_name != "pptx":
|
|
71
|
+
_error("--slide is only valid for pptx files.")
|
|
72
|
+
if headers != 1 and fmt_name not in ("xlsx", "csv"):
|
|
73
|
+
_error("--headers is only valid for xlsx, csv, and tsv files.")
|
|
74
|
+
|
|
75
|
+
# ── Mode resolution ──
|
|
76
|
+
if json:
|
|
77
|
+
mode = "json"
|
|
78
|
+
elif plain:
|
|
79
|
+
mode = "plain"
|
|
80
|
+
elif tui:
|
|
81
|
+
mode = "tui"
|
|
82
|
+
elif sys.stdout.isatty():
|
|
83
|
+
mode = "rich"
|
|
84
|
+
else:
|
|
85
|
+
mode = "plain"
|
|
86
|
+
|
|
87
|
+
# ── Build reader options ──
|
|
88
|
+
reader_opts: dict = {}
|
|
89
|
+
if fmt_name in ("xlsx", "csv"):
|
|
90
|
+
reader_opts["headers"] = headers
|
|
91
|
+
reader_opts["show_all"] = show_all
|
|
92
|
+
if sheet is not None:
|
|
93
|
+
reader_opts["sheet"] = sheet
|
|
94
|
+
if fmt_name == "pptx" and slide is not None:
|
|
95
|
+
reader_opts["slide"] = slide
|
|
96
|
+
if head is not None and fmt_name in ("docx", "pptx"):
|
|
97
|
+
reader_opts["head"] = head
|
|
98
|
+
|
|
99
|
+
# For tabular formats, pass head as row limit to reader
|
|
100
|
+
if head is not None and fmt_name in ("xlsx", "csv"):
|
|
101
|
+
reader_opts["head"] = head
|
|
102
|
+
|
|
103
|
+
# ── Convert (with spinner for TTY) ──
|
|
104
|
+
from officecat.readers import convert
|
|
105
|
+
|
|
106
|
+
if sys.stderr.isatty() and mode != "tui":
|
|
107
|
+
from rich.console import Console
|
|
108
|
+
console = Console(stderr=True)
|
|
109
|
+
with console.status(f"Reading {file.name}...", spinner="dots"):
|
|
110
|
+
markdown = convert(file, **reader_opts)
|
|
111
|
+
else:
|
|
112
|
+
markdown = convert(file, **reader_opts)
|
|
113
|
+
|
|
114
|
+
if not markdown or not markdown.strip():
|
|
115
|
+
print("Document is empty.")
|
|
116
|
+
return
|
|
117
|
+
|
|
118
|
+
# ── TUI guardrail for large tables ──
|
|
119
|
+
if mode == "tui" and show_all and fmt_name in ("xlsx", "csv"):
|
|
120
|
+
line_count = markdown.count("\n")
|
|
121
|
+
if line_count > 1000:
|
|
122
|
+
# Truncate to ~1000 lines for TUI performance
|
|
123
|
+
lines = markdown.splitlines()
|
|
124
|
+
markdown = "\n".join(lines[:1000])
|
|
125
|
+
markdown += (
|
|
126
|
+
"\n\n*TUI limited to first 1000 lines."
|
|
127
|
+
" Use --plain with --all to view everything.*"
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# ── Render ──
|
|
131
|
+
if mode == "tui":
|
|
132
|
+
from officecat.tui.app import OfficeCatApp
|
|
133
|
+
tui_app = OfficeCatApp(source=str(file), markdown=markdown)
|
|
134
|
+
tui_app.run()
|
|
135
|
+
elif mode == "rich":
|
|
136
|
+
from officecat.renderers.rich import render
|
|
137
|
+
render(markdown, head=head)
|
|
138
|
+
elif mode == "json":
|
|
139
|
+
from officecat.renderers.json_ import render
|
|
140
|
+
render(str(file), markdown)
|
|
141
|
+
else:
|
|
142
|
+
from officecat.renderers.plain import render
|
|
143
|
+
render(markdown, head=head)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def _error(msg: str) -> None:
|
|
147
|
+
print(f"Error: {msg}", file=sys.stderr)
|
|
148
|
+
raise SystemExit(1)
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def main() -> None:
|
|
152
|
+
app()
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""File type detection and validation."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import sys
|
|
6
|
+
from enum import Enum
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class FileFormat(Enum):
|
|
11
|
+
DOCX = "docx"
|
|
12
|
+
PPTX = "pptx"
|
|
13
|
+
XLSX = "xlsx"
|
|
14
|
+
CSV = "csv"
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
EXTENSION_MAP: dict[str, FileFormat] = {
|
|
18
|
+
".docx": FileFormat.DOCX,
|
|
19
|
+
".pptx": FileFormat.PPTX,
|
|
20
|
+
".xlsx": FileFormat.XLSX,
|
|
21
|
+
".csv": FileFormat.CSV,
|
|
22
|
+
".tsv": FileFormat.CSV,
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
LEGACY_FORMATS: dict[str, str] = {
|
|
26
|
+
".doc": "docx",
|
|
27
|
+
".ppt": "pptx",
|
|
28
|
+
".xls": "xlsx",
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
SUPPORTED_EXTENSIONS = ", ".join(sorted(EXTENSION_MAP.keys()))
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def detect_format(path: Path) -> FileFormat:
|
|
35
|
+
"""Detect file format from extension. Exits on error."""
|
|
36
|
+
ext = path.suffix.lower()
|
|
37
|
+
|
|
38
|
+
if ext in LEGACY_FORMATS:
|
|
39
|
+
target = LEGACY_FORMATS[ext]
|
|
40
|
+
print(
|
|
41
|
+
f"Error: Legacy binary format ({ext}) is not supported.\n"
|
|
42
|
+
f"Convert to .{target} using LibreOffice: "
|
|
43
|
+
f"libreoffice --headless --convert-to {target} {path.name}\n"
|
|
44
|
+
f"Or use specialized tools like antiword or catdoc.",
|
|
45
|
+
file=sys.stderr,
|
|
46
|
+
)
|
|
47
|
+
raise SystemExit(2)
|
|
48
|
+
|
|
49
|
+
fmt = EXTENSION_MAP.get(ext)
|
|
50
|
+
if fmt is None:
|
|
51
|
+
print(
|
|
52
|
+
f"Error: Unsupported file type '{ext}'. "
|
|
53
|
+
f"Supported: {SUPPORTED_EXTENSIONS}",
|
|
54
|
+
file=sys.stderr,
|
|
55
|
+
)
|
|
56
|
+
raise SystemExit(2)
|
|
57
|
+
|
|
58
|
+
return fmt
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""Reader dispatch — routes a file path to the correct format reader."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from officecat.detect import FileFormat, detect_format
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def convert(path: Path, **options) -> str:
|
|
11
|
+
"""Convert a file to a markdown string.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
path: Path to the file.
|
|
15
|
+
**options: Format-specific options (head, sheet, slide, headers, show_all).
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
A markdown string.
|
|
19
|
+
"""
|
|
20
|
+
fmt = detect_format(path)
|
|
21
|
+
|
|
22
|
+
if fmt == FileFormat.CSV:
|
|
23
|
+
from officecat.readers.csv_ import to_markdown
|
|
24
|
+
elif fmt == FileFormat.XLSX:
|
|
25
|
+
from officecat.readers.xlsx import to_markdown
|
|
26
|
+
elif fmt == FileFormat.DOCX:
|
|
27
|
+
from officecat.readers.docx import to_markdown
|
|
28
|
+
elif fmt == FileFormat.PPTX:
|
|
29
|
+
from officecat.readers.pptx import to_markdown
|
|
30
|
+
|
|
31
|
+
return to_markdown(path, **options)
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
"""CSV/TSV to markdown reader."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import csv
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
DEFAULT_ROW_CAP = 500
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _escape_pipe(text: str) -> str:
|
|
12
|
+
"""Escape pipe characters for markdown tables."""
|
|
13
|
+
return text.replace("|", "\\|")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _col_letter(index: int) -> str:
|
|
17
|
+
"""Convert 0-based index to Excel-style letter (A, B, ... Z, AA, ...)."""
|
|
18
|
+
result = ""
|
|
19
|
+
i = index
|
|
20
|
+
while True:
|
|
21
|
+
result = chr(65 + i % 26) + result
|
|
22
|
+
i = i // 26 - 1
|
|
23
|
+
if i < 0:
|
|
24
|
+
break
|
|
25
|
+
return result
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def to_markdown(
|
|
29
|
+
path: Path,
|
|
30
|
+
*,
|
|
31
|
+
head: int | None = None,
|
|
32
|
+
headers: int = 1,
|
|
33
|
+
show_all: bool = False,
|
|
34
|
+
**_kwargs,
|
|
35
|
+
) -> str:
|
|
36
|
+
"""Convert a CSV/TSV file to a markdown table string."""
|
|
37
|
+
is_tsv = path.suffix.lower() == ".tsv"
|
|
38
|
+
|
|
39
|
+
with open(path, newline="", encoding="utf-8-sig") as f:
|
|
40
|
+
if is_tsv:
|
|
41
|
+
delimiter = "\t"
|
|
42
|
+
else:
|
|
43
|
+
first_line = f.readline()
|
|
44
|
+
f.seek(0)
|
|
45
|
+
# Only sniff if comma-split yields a single column
|
|
46
|
+
if len(first_line.split(",")) <= 1:
|
|
47
|
+
try:
|
|
48
|
+
dialect = csv.Sniffer().sniff(first_line)
|
|
49
|
+
delimiter = dialect.delimiter
|
|
50
|
+
except csv.Error:
|
|
51
|
+
delimiter = ","
|
|
52
|
+
else:
|
|
53
|
+
delimiter = ","
|
|
54
|
+
|
|
55
|
+
reader = csv.reader(f, delimiter=delimiter)
|
|
56
|
+
|
|
57
|
+
header_row: list[str] = []
|
|
58
|
+
rows: list[list[str]] = []
|
|
59
|
+
total_rows = 0
|
|
60
|
+
row_cap = head if head is not None else (None if show_all else DEFAULT_ROW_CAP)
|
|
61
|
+
|
|
62
|
+
for i, row in enumerate(reader, start=1):
|
|
63
|
+
if headers > 0 and i == headers:
|
|
64
|
+
header_row = [_escape_pipe(c) for c in row]
|
|
65
|
+
continue
|
|
66
|
+
|
|
67
|
+
total_rows += 1
|
|
68
|
+
if row_cap is not None and len(rows) >= row_cap:
|
|
69
|
+
continue # keep counting total_rows
|
|
70
|
+
rows.append([_escape_pipe(c) for c in row])
|
|
71
|
+
|
|
72
|
+
if not header_row and rows:
|
|
73
|
+
col_count = max(len(r) for r in rows)
|
|
74
|
+
header_row = [_col_letter(i) for i in range(col_count)]
|
|
75
|
+
|
|
76
|
+
if not header_row and not rows:
|
|
77
|
+
return ""
|
|
78
|
+
|
|
79
|
+
return _build_table(header_row, rows, total_rows)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _build_table(
|
|
83
|
+
headers: list[str], rows: list[list[str]], total_rows: int
|
|
84
|
+
) -> str:
|
|
85
|
+
"""Build a markdown pipe table from headers and rows."""
|
|
86
|
+
col_count = len(headers)
|
|
87
|
+
lines: list[str] = []
|
|
88
|
+
|
|
89
|
+
lines.append("| " + " | ".join(headers) + " |")
|
|
90
|
+
lines.append("| " + " | ".join(["---"] * col_count) + " |")
|
|
91
|
+
|
|
92
|
+
for row in rows:
|
|
93
|
+
padded = row + [""] * (col_count - len(row))
|
|
94
|
+
lines.append("| " + " | ".join(padded[:col_count]) + " |")
|
|
95
|
+
|
|
96
|
+
if total_rows > len(rows):
|
|
97
|
+
lines.append("")
|
|
98
|
+
lines.append(f"*Showing {len(rows)} of {total_rows:,} rows.*")
|
|
99
|
+
|
|
100
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""Word (.docx) to markdown reader."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _classify_style(style_name: str | None) -> str | None:
|
|
10
|
+
"""Map a docx paragraph style name to a markdown heading prefix or None."""
|
|
11
|
+
if style_name is None:
|
|
12
|
+
return None
|
|
13
|
+
name = style_name.lower()
|
|
14
|
+
if "heading 1" in name or name == "title":
|
|
15
|
+
return "# "
|
|
16
|
+
if "heading 2" in name:
|
|
17
|
+
return "## "
|
|
18
|
+
if "heading 3" in name:
|
|
19
|
+
return "### "
|
|
20
|
+
if "heading 4" in name or "heading 5" in name or "heading 6" in name:
|
|
21
|
+
return "#### "
|
|
22
|
+
if "list" in name:
|
|
23
|
+
return "- "
|
|
24
|
+
return None
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _escape_pipe(text: str) -> str:
|
|
28
|
+
return text.replace("|", "\\|")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _table_to_markdown(table) -> str:
|
|
32
|
+
"""Convert a docx Table object to a markdown pipe table."""
|
|
33
|
+
rows: list[list[str]] = []
|
|
34
|
+
for row in table.rows:
|
|
35
|
+
cells = row.cells
|
|
36
|
+
row_text: list[str] = []
|
|
37
|
+
prev_tc = None
|
|
38
|
+
for cell in cells:
|
|
39
|
+
if prev_tc is not None and cell._tc is prev_tc:
|
|
40
|
+
continue
|
|
41
|
+
row_text.append(_escape_pipe(cell.text.strip()))
|
|
42
|
+
prev_tc = cell._tc
|
|
43
|
+
rows.append(row_text)
|
|
44
|
+
|
|
45
|
+
if not rows:
|
|
46
|
+
return ""
|
|
47
|
+
|
|
48
|
+
col_count = max(len(r) for r in rows)
|
|
49
|
+
headers = rows[0] + [""] * (col_count - len(rows[0]))
|
|
50
|
+
|
|
51
|
+
lines: list[str] = []
|
|
52
|
+
lines.append("| " + " | ".join(headers[:col_count]) + " |")
|
|
53
|
+
lines.append("| " + " | ".join(["---"] * col_count) + " |")
|
|
54
|
+
|
|
55
|
+
for row in rows[1:]:
|
|
56
|
+
padded = row + [""] * (col_count - len(row))
|
|
57
|
+
lines.append("| " + " | ".join(padded[:col_count]) + " |")
|
|
58
|
+
|
|
59
|
+
return "\n".join(lines)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def to_markdown(path: Path, *, head: int | None = None, **_kwargs) -> str:
|
|
63
|
+
"""Convert a docx file to markdown, preserving paragraph/table order."""
|
|
64
|
+
from docx import Document
|
|
65
|
+
from docx.opc.exceptions import PackageNotFoundError
|
|
66
|
+
from docx.table import Table as DocxTable
|
|
67
|
+
from docx.text.paragraph import Paragraph
|
|
68
|
+
|
|
69
|
+
try:
|
|
70
|
+
doc = Document(str(path))
|
|
71
|
+
except PackageNotFoundError:
|
|
72
|
+
print(
|
|
73
|
+
f"Error: '{path.name}' appears to be corrupt or invalid.",
|
|
74
|
+
file=sys.stderr,
|
|
75
|
+
)
|
|
76
|
+
raise SystemExit(3)
|
|
77
|
+
except Exception as e:
|
|
78
|
+
msg = str(e).lower()
|
|
79
|
+
if "password" in msg or "encrypt" in msg:
|
|
80
|
+
print(
|
|
81
|
+
f"Error: '{path.name}' is password-protected. "
|
|
82
|
+
f"officecat cannot open encrypted files.",
|
|
83
|
+
file=sys.stderr,
|
|
84
|
+
)
|
|
85
|
+
raise SystemExit(3)
|
|
86
|
+
raise
|
|
87
|
+
|
|
88
|
+
blocks: list[str] = []
|
|
89
|
+
block_count = 0
|
|
90
|
+
|
|
91
|
+
# Walk body children to preserve interleaved order
|
|
92
|
+
for child in doc.element.body:
|
|
93
|
+
tag = child.tag.split("}")[-1] if "}" in child.tag else child.tag
|
|
94
|
+
|
|
95
|
+
if tag == "p":
|
|
96
|
+
para = Paragraph(child, doc)
|
|
97
|
+
text = para.text.strip()
|
|
98
|
+
if not text:
|
|
99
|
+
continue
|
|
100
|
+
|
|
101
|
+
prefix = _classify_style(para.style.name if para.style else None)
|
|
102
|
+
if prefix:
|
|
103
|
+
blocks.append(f"{prefix}{text}")
|
|
104
|
+
else:
|
|
105
|
+
blocks.append(text)
|
|
106
|
+
|
|
107
|
+
block_count += 1
|
|
108
|
+
if head is not None and block_count >= head:
|
|
109
|
+
break
|
|
110
|
+
|
|
111
|
+
elif tag == "tbl":
|
|
112
|
+
table = DocxTable(child, doc)
|
|
113
|
+
md_table = _table_to_markdown(table)
|
|
114
|
+
if md_table:
|
|
115
|
+
blocks.append(md_table)
|
|
116
|
+
block_count += 1
|
|
117
|
+
if head is not None and block_count >= head:
|
|
118
|
+
break
|
|
119
|
+
|
|
120
|
+
return "\n\n".join(blocks)
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
"""PowerPoint (.pptx) to markdown reader."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def to_markdown(
|
|
10
|
+
path: Path,
|
|
11
|
+
*,
|
|
12
|
+
head: int | None = None,
|
|
13
|
+
slide: int | None = None,
|
|
14
|
+
**_kwargs,
|
|
15
|
+
) -> str:
|
|
16
|
+
"""Convert a pptx file to markdown."""
|
|
17
|
+
from pptx import Presentation
|
|
18
|
+
from pptx.enum.shapes import MSO_SHAPE_TYPE
|
|
19
|
+
from pptx.exc import PackageNotFoundError
|
|
20
|
+
|
|
21
|
+
try:
|
|
22
|
+
prs = Presentation(str(path))
|
|
23
|
+
except PackageNotFoundError:
|
|
24
|
+
print(
|
|
25
|
+
f"Error: '{path.name}' appears to be corrupt or invalid.",
|
|
26
|
+
file=sys.stderr,
|
|
27
|
+
)
|
|
28
|
+
raise SystemExit(3)
|
|
29
|
+
except Exception as e:
|
|
30
|
+
msg = str(e).lower()
|
|
31
|
+
if "password" in msg or "encrypt" in msg:
|
|
32
|
+
print(
|
|
33
|
+
f"Error: '{path.name}' is password-protected. "
|
|
34
|
+
f"officecat cannot open encrypted files.",
|
|
35
|
+
file=sys.stderr,
|
|
36
|
+
)
|
|
37
|
+
raise SystemExit(3)
|
|
38
|
+
raise
|
|
39
|
+
|
|
40
|
+
total_slides = len(prs.slides)
|
|
41
|
+
|
|
42
|
+
if slide is not None:
|
|
43
|
+
if slide < 1 or slide > total_slides:
|
|
44
|
+
print(
|
|
45
|
+
f"Error: Slide {slide} not found. "
|
|
46
|
+
f"Document has {total_slides} slides.",
|
|
47
|
+
file=sys.stderr,
|
|
48
|
+
)
|
|
49
|
+
raise SystemExit(1)
|
|
50
|
+
|
|
51
|
+
sections: list[str] = []
|
|
52
|
+
|
|
53
|
+
for i, sld in enumerate(prs.slides, start=1):
|
|
54
|
+
if slide is not None and i != slide:
|
|
55
|
+
continue
|
|
56
|
+
|
|
57
|
+
# Check if slide is hidden
|
|
58
|
+
show_attr = sld._element.get("show")
|
|
59
|
+
is_hidden = show_attr == "0"
|
|
60
|
+
|
|
61
|
+
title_shape = sld.shapes.title
|
|
62
|
+
title_text = title_shape.text.strip() if title_shape else None
|
|
63
|
+
title_shape_id = title_shape.shape_id if title_shape else None
|
|
64
|
+
|
|
65
|
+
hidden_tag = " (Hidden)" if is_hidden else ""
|
|
66
|
+
if title_text:
|
|
67
|
+
heading = f"## Slide {i}{hidden_tag}: {title_text}"
|
|
68
|
+
else:
|
|
69
|
+
heading = f"## Slide {i}{hidden_tag}"
|
|
70
|
+
|
|
71
|
+
body_lines: list[str] = [heading, ""]
|
|
72
|
+
|
|
73
|
+
for shape in sld.shapes:
|
|
74
|
+
if title_shape_id is not None and shape.shape_id == title_shape_id:
|
|
75
|
+
continue
|
|
76
|
+
|
|
77
|
+
st = shape.shape_type
|
|
78
|
+
if st == MSO_SHAPE_TYPE.PICTURE:
|
|
79
|
+
body_lines.append(f"*[Image: {shape.name}]*")
|
|
80
|
+
elif st == MSO_SHAPE_TYPE.GROUP:
|
|
81
|
+
body_lines.append("*[Grouped content]*")
|
|
82
|
+
elif hasattr(shape, "has_table") and shape.has_table:
|
|
83
|
+
tbl = shape.table
|
|
84
|
+
rows = len(tbl.rows)
|
|
85
|
+
cols = len(tbl.columns)
|
|
86
|
+
body_lines.append(f"*[Table: {rows}x{cols}]*")
|
|
87
|
+
elif shape.has_text_frame:
|
|
88
|
+
text = shape.text_frame.text.strip()
|
|
89
|
+
if text:
|
|
90
|
+
body_lines.append(text)
|
|
91
|
+
|
|
92
|
+
# Notes
|
|
93
|
+
if sld.has_notes_slide:
|
|
94
|
+
notes_text = sld.notes_slide.notes_text_frame.text.strip()
|
|
95
|
+
if notes_text:
|
|
96
|
+
body_lines.append("")
|
|
97
|
+
body_lines.append(f"> **Notes:** {notes_text}")
|
|
98
|
+
|
|
99
|
+
sections.append("\n".join(body_lines))
|
|
100
|
+
|
|
101
|
+
if slide is not None:
|
|
102
|
+
break
|
|
103
|
+
if head is not None and len(sections) >= head:
|
|
104
|
+
break
|
|
105
|
+
|
|
106
|
+
return "\n\n---\n\n".join(sections)
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
"""Excel (.xlsx) to markdown reader using python-calamine."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import sys
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
DEFAULT_ROW_CAP = 500
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def _escape_pipe(text: str) -> str:
|
|
12
|
+
return text.replace("|", "\\|")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _col_letter(index: int) -> str:
|
|
16
|
+
result = ""
|
|
17
|
+
i = index
|
|
18
|
+
while True:
|
|
19
|
+
result = chr(65 + i % 26) + result
|
|
20
|
+
i = i // 26 - 1
|
|
21
|
+
if i < 0:
|
|
22
|
+
break
|
|
23
|
+
return result
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _format_cell(value: object) -> str:
|
|
27
|
+
if value is None:
|
|
28
|
+
return ""
|
|
29
|
+
if isinstance(value, float) and value == int(value):
|
|
30
|
+
return str(int(value))
|
|
31
|
+
if hasattr(value, "isoformat"):
|
|
32
|
+
return value.isoformat()
|
|
33
|
+
return str(value)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def to_markdown(
|
|
37
|
+
path: Path,
|
|
38
|
+
*,
|
|
39
|
+
head: int | None = None,
|
|
40
|
+
sheet: str | None = None,
|
|
41
|
+
headers: int = 1,
|
|
42
|
+
show_all: bool = False,
|
|
43
|
+
**_kwargs,
|
|
44
|
+
) -> str:
|
|
45
|
+
"""Convert an xlsx file to markdown tables."""
|
|
46
|
+
from python_calamine import CalamineWorkbook
|
|
47
|
+
|
|
48
|
+
try:
|
|
49
|
+
wb = CalamineWorkbook.from_path(str(path))
|
|
50
|
+
except Exception as e:
|
|
51
|
+
msg = str(e).lower()
|
|
52
|
+
if "password" in msg or "encrypt" in msg:
|
|
53
|
+
print(
|
|
54
|
+
f"Error: '{path.name}' is password-protected. "
|
|
55
|
+
f"officecat cannot open encrypted files.",
|
|
56
|
+
file=sys.stderr,
|
|
57
|
+
)
|
|
58
|
+
raise SystemExit(3)
|
|
59
|
+
if "zip" in msg or "invalid" in msg or "corrupt" in msg:
|
|
60
|
+
print(
|
|
61
|
+
f"Error: '{path.name}' appears to be corrupt or invalid.",
|
|
62
|
+
file=sys.stderr,
|
|
63
|
+
)
|
|
64
|
+
raise SystemExit(3)
|
|
65
|
+
raise
|
|
66
|
+
|
|
67
|
+
all_sheet_names = wb.sheet_names
|
|
68
|
+
|
|
69
|
+
if sheet is not None:
|
|
70
|
+
# Try as 1-based index
|
|
71
|
+
try:
|
|
72
|
+
idx = int(sheet)
|
|
73
|
+
if 1 <= idx <= len(all_sheet_names):
|
|
74
|
+
sheets_to_read = [all_sheet_names[idx - 1]]
|
|
75
|
+
else:
|
|
76
|
+
print(
|
|
77
|
+
f"Error: Sheet index {idx} out of range. "
|
|
78
|
+
f"Available: {', '.join(all_sheet_names)}",
|
|
79
|
+
file=sys.stderr,
|
|
80
|
+
)
|
|
81
|
+
raise SystemExit(1)
|
|
82
|
+
except ValueError:
|
|
83
|
+
if sheet in all_sheet_names:
|
|
84
|
+
sheets_to_read = [sheet]
|
|
85
|
+
else:
|
|
86
|
+
print(
|
|
87
|
+
f"Error: Sheet '{sheet}' not found. "
|
|
88
|
+
f"Available: {', '.join(all_sheet_names)}",
|
|
89
|
+
file=sys.stderr,
|
|
90
|
+
)
|
|
91
|
+
raise SystemExit(1)
|
|
92
|
+
else:
|
|
93
|
+
sheets_to_read = list(all_sheet_names)
|
|
94
|
+
|
|
95
|
+
row_cap = head if head is not None else (None if show_all else DEFAULT_ROW_CAP)
|
|
96
|
+
sections: list[str] = []
|
|
97
|
+
|
|
98
|
+
for sheet_name in sheets_to_read:
|
|
99
|
+
ws = wb.get_sheet_by_name(sheet_name)
|
|
100
|
+
header_row: list[str] = []
|
|
101
|
+
data_rows: list[list[str]] = []
|
|
102
|
+
total_rows = 0
|
|
103
|
+
|
|
104
|
+
for i, row in enumerate(ws.iter_rows(), start=1):
|
|
105
|
+
if headers > 0 and i == headers:
|
|
106
|
+
header_row = [_escape_pipe(_format_cell(c)) for c in row]
|
|
107
|
+
continue
|
|
108
|
+
|
|
109
|
+
total_rows += 1
|
|
110
|
+
if row_cap is not None and len(data_rows) >= row_cap:
|
|
111
|
+
continue
|
|
112
|
+
data_rows.append([_escape_pipe(_format_cell(c)) for c in row])
|
|
113
|
+
|
|
114
|
+
if not header_row:
|
|
115
|
+
if data_rows:
|
|
116
|
+
col_count = max(len(r) for r in data_rows)
|
|
117
|
+
else:
|
|
118
|
+
col_count = 0
|
|
119
|
+
header_row = [_col_letter(i) for i in range(col_count)]
|
|
120
|
+
|
|
121
|
+
section_lines: list[str] = [f"## Sheet: {sheet_name}", ""]
|
|
122
|
+
|
|
123
|
+
if not header_row:
|
|
124
|
+
section_lines.append("*Empty sheet.*")
|
|
125
|
+
else:
|
|
126
|
+
col_count = len(header_row)
|
|
127
|
+
section_lines.append("| " + " | ".join(header_row) + " |")
|
|
128
|
+
section_lines.append("| " + " | ".join(["---"] * col_count) + " |")
|
|
129
|
+
|
|
130
|
+
for row in data_rows:
|
|
131
|
+
padded = row + [""] * (col_count - len(row))
|
|
132
|
+
section_lines.append(
|
|
133
|
+
"| " + " | ".join(padded[:col_count]) + " |"
|
|
134
|
+
)
|
|
135
|
+
|
|
136
|
+
if total_rows > len(data_rows):
|
|
137
|
+
section_lines.append("")
|
|
138
|
+
section_lines.append(
|
|
139
|
+
f"*Showing {len(data_rows)} of {total_rows:,} rows.*"
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
sections.append("\n".join(section_lines))
|
|
143
|
+
|
|
144
|
+
return "\n\n---\n\n".join(sections)
|
|
File without changes
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""JSON renderer."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def render(source: str, markdown_text: str) -> None:
|
|
9
|
+
"""Print JSON with source and markdown keys."""
|
|
10
|
+
output = {"source": source, "markdown": markdown_text}
|
|
11
|
+
print(json.dumps(output, indent=2))
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""Plain text renderer — raw markdown via print()."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def render(markdown_text: str, head: int | None = None) -> None:
|
|
7
|
+
"""Print raw markdown text to stdout."""
|
|
8
|
+
lines = markdown_text.splitlines()
|
|
9
|
+
if head is not None:
|
|
10
|
+
lines = lines[:head]
|
|
11
|
+
for line in lines:
|
|
12
|
+
print(line)
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""Rich colored markdown renderer."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import sys
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def render(markdown_text: str, head: int | None = None) -> None:
|
|
9
|
+
"""Print colored markdown to stdout."""
|
|
10
|
+
if sys.platform == "win32" and hasattr(sys.stdout, "reconfigure"):
|
|
11
|
+
try:
|
|
12
|
+
sys.stdout.reconfigure(encoding="utf-8")
|
|
13
|
+
except Exception:
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
from rich.console import Console
|
|
17
|
+
from rich.markdown import Markdown
|
|
18
|
+
|
|
19
|
+
text = markdown_text
|
|
20
|
+
if head is not None:
|
|
21
|
+
text = "\n".join(text.splitlines()[:head])
|
|
22
|
+
|
|
23
|
+
console = Console()
|
|
24
|
+
console.print(Markdown(text))
|
|
File without changes
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""Textual TUI app for officecat — full-screen markdown viewer."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from textual.app import App, ComposeResult
|
|
6
|
+
from textual.binding import Binding
|
|
7
|
+
from textual.containers import VerticalScroll
|
|
8
|
+
from textual.widgets import Footer, Header, Markdown
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class OfficeCatApp(App):
|
|
12
|
+
"""Interactive terminal viewer for Office files."""
|
|
13
|
+
|
|
14
|
+
CSS = """
|
|
15
|
+
#md-scroll {
|
|
16
|
+
height: 1fr;
|
|
17
|
+
}
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
BINDINGS = [
|
|
21
|
+
Binding("q", "quit", "Quit"),
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
def __init__(self, source: str, markdown: str, **kwargs: object) -> None:
|
|
25
|
+
self._source = source
|
|
26
|
+
self._markdown = markdown
|
|
27
|
+
super().__init__(**kwargs)
|
|
28
|
+
self.title = f"officecat — {self._source}"
|
|
29
|
+
|
|
30
|
+
def compose(self) -> ComposeResult:
|
|
31
|
+
yield Header()
|
|
32
|
+
with VerticalScroll(id="md-scroll"):
|
|
33
|
+
yield Markdown(self._markdown, id="md-view")
|
|
34
|
+
yield Footer()
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "officecat"
|
|
3
|
+
version = "0.1.0"
|
|
4
|
+
description = "View Office files in the terminal"
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
license = {text = "MIT"}
|
|
7
|
+
requires-python = ">=3.10"
|
|
8
|
+
authors = [
|
|
9
|
+
{name = "Mubbie Idoko"},
|
|
10
|
+
]
|
|
11
|
+
keywords = ["office", "cli", "docx", "xlsx", "pptx", "csv", "terminal", "viewer"]
|
|
12
|
+
classifiers = [
|
|
13
|
+
"Development Status :: 4 - Beta",
|
|
14
|
+
"Environment :: Console",
|
|
15
|
+
"Intended Audience :: Developers",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Operating System :: OS Independent",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Programming Language :: Python :: 3.10",
|
|
20
|
+
"Programming Language :: Python :: 3.11",
|
|
21
|
+
"Programming Language :: Python :: 3.12",
|
|
22
|
+
"Programming Language :: Python :: 3.13",
|
|
23
|
+
"Topic :: Office/Business",
|
|
24
|
+
"Topic :: Utilities",
|
|
25
|
+
]
|
|
26
|
+
dependencies = [
|
|
27
|
+
"typer>=0.9",
|
|
28
|
+
"textual>=0.50",
|
|
29
|
+
"python-docx>=1.0",
|
|
30
|
+
"python-pptx>=0.6",
|
|
31
|
+
"python-calamine>=0.2",
|
|
32
|
+
"rich",
|
|
33
|
+
]
|
|
34
|
+
|
|
35
|
+
[project.urls]
|
|
36
|
+
Homepage = "https://github.com/mubbie/officecat"
|
|
37
|
+
Repository = "https://github.com/mubbie/officecat"
|
|
38
|
+
Issues = "https://github.com/mubbie/officecat/issues"
|
|
39
|
+
|
|
40
|
+
[project.scripts]
|
|
41
|
+
officecat = "officecat.cli:main"
|
|
42
|
+
|
|
43
|
+
[project.optional-dependencies]
|
|
44
|
+
dev = ["pytest", "ruff", "mypy"]
|
|
45
|
+
|
|
46
|
+
[build-system]
|
|
47
|
+
requires = ["hatchling"]
|
|
48
|
+
build-backend = "hatchling.build"
|
|
49
|
+
|
|
50
|
+
[tool.ruff]
|
|
51
|
+
target-version = "py310"
|
|
52
|
+
line-length = 88
|
|
53
|
+
|
|
54
|
+
[tool.ruff.lint]
|
|
55
|
+
select = ["E", "F", "I", "W"]
|
|
56
|
+
|
|
57
|
+
[tool.mypy]
|
|
58
|
+
python_version = "3.10"
|
|
59
|
+
warn_return_any = true
|
|
60
|
+
warn_unused_configs = true
|
|
61
|
+
|
|
62
|
+
[tool.pytest.ini_options]
|
|
63
|
+
testpaths = ["tests"]
|
|
File without changes
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
not a real xlsx
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|