officecat 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,50 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main, dev]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ lint:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v5
14
+ - uses: actions/setup-python@v6
15
+ with:
16
+ python-version: "3.12"
17
+ - run: pip install ruff mypy
18
+ - run: ruff check officecat/
19
+ - run: mypy officecat/ --ignore-missing-imports
20
+
21
+ test:
22
+ runs-on: ${{ matrix.os }}
23
+ strategy:
24
+ matrix:
25
+ os:
26
+ - ubuntu-latest
27
+ - ubuntu-24.04-arm
28
+ - macos-latest
29
+ - windows-latest
30
+ - windows-11-arm
31
+ python-version: ["3.10", "3.12", "3.13"]
32
+ exclude:
33
+ - os: windows-11-arm
34
+ python-version: "3.10"
35
+ - os: ubuntu-24.04-arm
36
+ python-version: "3.10"
37
+ steps:
38
+ - uses: actions/checkout@v5
39
+ - uses: actions/setup-python@v6
40
+ with:
41
+ python-version: ${{ matrix.python-version }}
42
+ - run: pip install -e .
43
+ - run: officecat --help
44
+ - run: officecat tests/fixtures/sample.csv --plain
45
+ - run: officecat tests/fixtures/sample.docx --plain
46
+ - run: officecat tests/fixtures/sample.pptx --plain
47
+ - run: officecat tests/fixtures/sample.xlsx --plain
48
+ - run: officecat tests/fixtures/sample.tsv --plain
49
+ - run: officecat tests/fixtures/sample.csv --json
50
+ - run: officecat tests/fixtures/empty.docx --plain
@@ -0,0 +1,56 @@
1
+ name: Release & Publish
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+
7
+ permissions:
8
+ contents: write
9
+ id-token: write
10
+
11
+ jobs:
12
+ release:
13
+ runs-on: ubuntu-latest
14
+ environment: pypi
15
+ steps:
16
+ - uses: actions/checkout@v5
17
+ with:
18
+ fetch-depth: 0
19
+ fetch-tags: true
20
+
21
+ - name: Get version from pyproject.toml
22
+ id: version
23
+ run: |
24
+ VERSION=$(grep '^version' pyproject.toml | head -1 | sed 's/.*"\(.*\)".*/\1/')
25
+ echo "version=$VERSION" >> "$GITHUB_OUTPUT"
26
+ echo "tag=v$VERSION" >> "$GITHUB_OUTPUT"
27
+
28
+ - name: Check if tag exists
29
+ id: check
30
+ run: |
31
+ if git rev-parse "v${{ steps.version.outputs.version }}" >/dev/null 2>&1; then
32
+ echo "exists=true" >> "$GITHUB_OUTPUT"
33
+ else
34
+ echo "exists=false" >> "$GITHUB_OUTPUT"
35
+ fi
36
+
37
+ - name: Create release
38
+ if: steps.check.outputs.exists == 'false'
39
+ run: |
40
+ gh release create "${{ steps.version.outputs.tag }}" \
41
+ --title "${{ steps.version.outputs.tag }}" \
42
+ --generate-notes
43
+ env:
44
+ GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
45
+
46
+ - uses: actions/setup-python@v6
47
+ with:
48
+ python-version: "3.12"
49
+
50
+ - name: Build package
51
+ run: pip install build && python -m build
52
+
53
+ - name: Publish to PyPI
54
+ uses: pypa/gh-action-pypi-publish@release/v1
55
+ with:
56
+ skip-existing: true
@@ -0,0 +1,41 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ *.egg-info/
7
+ *.egg
8
+ dist/
9
+ build/
10
+ eggs/
11
+ *.whl
12
+
13
+ # Virtual environments
14
+ .venv/
15
+ venv/
16
+ ENV/
17
+
18
+ # IDE
19
+ .vscode/
20
+ .idea/
21
+ *.swp
22
+ *.swo
23
+ *~
24
+
25
+ # OS
26
+ .DS_Store
27
+ Thumbs.db
28
+
29
+ # Testing / Coverage
30
+ .pytest_cache/
31
+ .coverage
32
+ htmlcov/
33
+
34
+ # Distribution
35
+ *.tar.gz
36
+ *.zip
37
+
38
+ # Environment
39
+ .env
40
+ .env.*
41
+ dist/
@@ -0,0 +1,142 @@
1
+ Metadata-Version: 2.4
2
+ Name: officecat
3
+ Version: 0.1.0
4
+ Summary: View Office files in the terminal
5
+ Project-URL: Homepage, https://github.com/mubbie/officecat
6
+ Project-URL: Repository, https://github.com/mubbie/officecat
7
+ Project-URL: Issues, https://github.com/mubbie/officecat/issues
8
+ Author: Mubbie Idoko
9
+ License: MIT
10
+ Keywords: cli,csv,docx,office,pptx,terminal,viewer,xlsx
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Environment :: Console
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Topic :: Office/Business
22
+ Classifier: Topic :: Utilities
23
+ Requires-Python: >=3.10
24
+ Requires-Dist: python-calamine>=0.2
25
+ Requires-Dist: python-docx>=1.0
26
+ Requires-Dist: python-pptx>=0.6
27
+ Requires-Dist: rich
28
+ Requires-Dist: textual>=0.50
29
+ Requires-Dist: typer>=0.9
30
+ Provides-Extra: dev
31
+ Requires-Dist: mypy; extra == 'dev'
32
+ Requires-Dist: pytest; extra == 'dev'
33
+ Requires-Dist: ruff; extra == 'dev'
34
+ Description-Content-Type: text/markdown
35
+
36
+ # officecat 🐱
37
+
38
+ A CLI tool to view Office files in the terminal. Think `cat` but for `.docx`, `.pptx`, `.xlsx`, `.csv`, and `.tsv` files.
39
+
40
+ Every supported format is converted to markdown internally, then rendered through a single unified pipeline.
41
+
42
+ ## Installation
43
+
44
+ ```bash
45
+ pip install officecat
46
+ ```
47
+
48
+ Or install from source:
49
+
50
+ ```bash
51
+ git clone https://github.com/mubbie/officecat.git
52
+ cd officecat
53
+ pip install -e .
54
+ ```
55
+
56
+ ## Usage
57
+
58
+ ```bash
59
+ officecat report.docx # colored formatted output (default)
60
+ officecat budget.xlsx # spreadsheet as markdown table
61
+ officecat slides.pptx # presentation content
62
+ officecat data.csv # CSV and TSV
63
+ officecat report.docx --tui # interactive full-screen viewer
64
+ officecat budget.xlsx | head # plain text (auto-detected pipe)
65
+ officecat slides.pptx --json # JSON output
66
+ ```
67
+
68
+ ### Output Modes
69
+
70
+ - **Rich** (default): Colored, formatted output to stdout. Works with `less -R`.
71
+ - **TUI** (`--tui`): Full-screen interactive viewer with scrolling.
72
+ - **Plain** (auto when piped, or `--plain`): Raw markdown for piping to `grep`, `head`, `awk`.
73
+ - **JSON** (`--json`): `{"source": "...", "markdown": "..."}` for scripting.
74
+
75
+ ### Options
76
+
77
+ | Flag | Short | Description |
78
+ |---|---|---|
79
+ | `--tui` | `-t` | Interactive full-screen viewer |
80
+ | `--plain` | `-p` | Raw markdown text, no colors |
81
+ | `--json` | `-j` | JSON output |
82
+ | `--head N` | `-n N` | Show first N lines |
83
+ | `--sheet S` | `-s S` | Select sheet by name or 1-based index (xlsx only) |
84
+ | `--slide N` | | Show only slide N (pptx only) |
85
+ | `--headers N` | `-h N` | Promote row N as headers (xlsx/csv, default: 1, 0 to disable) |
86
+ | `--all` | `-a` | Disable the default 500-row cap |
87
+
88
+ ### TUI Key Bindings
89
+
90
+ | Key | Action |
91
+ |---|---|
92
+ | `q` | Quit |
93
+ | `Up` / `Down` | Scroll |
94
+ | `PgUp` / `PgDn` | Page scroll |
95
+ | `Home` / `End` | Jump to top/bottom |
96
+
97
+ ### Examples
98
+
99
+ ```bash
100
+ # Quick view of a document
101
+ officecat report.docx
102
+
103
+ # Browse interactively
104
+ officecat report.docx --tui
105
+
106
+ # Specific sheet
107
+ officecat budget.xlsx --sheet "Q4 Summary"
108
+
109
+ # Specific slide
110
+ officecat deck.pptx --slide 3
111
+
112
+ # First 10 lines
113
+ officecat budget.xlsx --head 10
114
+
115
+ # JSON output
116
+ officecat report.docx --json | jq '.markdown'
117
+
118
+ # Pipe to grep
119
+ officecat data.xlsx --plain | grep "revenue"
120
+ ```
121
+
122
+ ## Supported Formats
123
+
124
+ - Word (.docx): headings, paragraphs, lists, tables in document order
125
+ - PowerPoint (.pptx): slides, shapes, images, speaker notes, hidden slides
126
+ - Excel (.xlsx): all sheets, row cap, header promotion
127
+ - CSV (.csv): auto-delimited
128
+ - TSV (.tsv): tab-delimited
129
+
130
+ Legacy binary formats (`.doc`, `.ppt`, `.xls`) show a conversion hint.
131
+
132
+ ## Known Limitations
133
+
134
+ - All content is rendered as markdown. Spreadsheet tables are markdown tables, not interactive grids.
135
+ - DOCX list detection is style-name-based and may miss custom list styles.
136
+ - PPTX grouped shapes and embedded tables show as placeholders.
137
+ - PPTX charts and SmartArt are not extracted.
138
+ - XLSX formulas show cached/computed values, not formula strings.
139
+ - Large spreadsheets are capped at 500 rows by default. Use `--all` to show everything.
140
+ - TUI enforces a 1000-line cap with `--all` for performance. Use `--plain` for full output.
141
+ - No decryption of password-protected files.
142
+ - Legacy binary formats (.doc, .ppt, .xls) are not supported.
@@ -0,0 +1,107 @@
1
+ # officecat 🐱
2
+
3
+ A CLI tool to view Office files in the terminal. Think `cat` but for `.docx`, `.pptx`, `.xlsx`, `.csv`, and `.tsv` files.
4
+
5
+ Every supported format is converted to markdown internally, then rendered through a single unified pipeline.
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ pip install officecat
11
+ ```
12
+
13
+ Or install from source:
14
+
15
+ ```bash
16
+ git clone https://github.com/mubbie/officecat.git
17
+ cd officecat
18
+ pip install -e .
19
+ ```
20
+
21
+ ## Usage
22
+
23
+ ```bash
24
+ officecat report.docx # colored formatted output (default)
25
+ officecat budget.xlsx # spreadsheet as markdown table
26
+ officecat slides.pptx # presentation content
27
+ officecat data.csv # CSV and TSV
28
+ officecat report.docx --tui # interactive full-screen viewer
29
+ officecat budget.xlsx | head # plain text (auto-detected pipe)
30
+ officecat slides.pptx --json # JSON output
31
+ ```
32
+
33
+ ### Output Modes
34
+
35
+ - **Rich** (default): Colored, formatted output to stdout. Works with `less -R`.
36
+ - **TUI** (`--tui`): Full-screen interactive viewer with scrolling.
37
+ - **Plain** (auto when piped, or `--plain`): Raw markdown for piping to `grep`, `head`, `awk`.
38
+ - **JSON** (`--json`): `{"source": "...", "markdown": "..."}` for scripting.
39
+
40
+ ### Options
41
+
42
+ | Flag | Short | Description |
43
+ |---|---|---|
44
+ | `--tui` | `-t` | Interactive full-screen viewer |
45
+ | `--plain` | `-p` | Raw markdown text, no colors |
46
+ | `--json` | `-j` | JSON output |
47
+ | `--head N` | `-n N` | Show first N lines |
48
+ | `--sheet S` | `-s S` | Select sheet by name or 1-based index (xlsx only) |
49
+ | `--slide N` | | Show only slide N (pptx only) |
50
+ | `--headers N` | `-h N` | Promote row N as headers (xlsx/csv, default: 1, 0 to disable) |
51
+ | `--all` | `-a` | Disable the default 500-row cap |
52
+
53
+ ### TUI Key Bindings
54
+
55
+ | Key | Action |
56
+ |---|---|
57
+ | `q` | Quit |
58
+ | `Up` / `Down` | Scroll |
59
+ | `PgUp` / `PgDn` | Page scroll |
60
+ | `Home` / `End` | Jump to top/bottom |
61
+
62
+ ### Examples
63
+
64
+ ```bash
65
+ # Quick view of a document
66
+ officecat report.docx
67
+
68
+ # Browse interactively
69
+ officecat report.docx --tui
70
+
71
+ # Specific sheet
72
+ officecat budget.xlsx --sheet "Q4 Summary"
73
+
74
+ # Specific slide
75
+ officecat deck.pptx --slide 3
76
+
77
+ # First 10 lines
78
+ officecat budget.xlsx --head 10
79
+
80
+ # JSON output
81
+ officecat report.docx --json | jq '.markdown'
82
+
83
+ # Pipe to grep
84
+ officecat data.xlsx --plain | grep "revenue"
85
+ ```
86
+
87
+ ## Supported Formats
88
+
89
+ - Word (.docx): headings, paragraphs, lists, tables in document order
90
+ - PowerPoint (.pptx): slides, shapes, images, speaker notes, hidden slides
91
+ - Excel (.xlsx): all sheets, row cap, header promotion
92
+ - CSV (.csv): auto-delimited
93
+ - TSV (.tsv): tab-delimited
94
+
95
+ Legacy binary formats (`.doc`, `.ppt`, `.xls`) show a conversion hint.
96
+
97
+ ## Known Limitations
98
+
99
+ - All content is rendered as markdown. Spreadsheet tables are markdown tables, not interactive grids.
100
+ - DOCX list detection is style-name-based and may miss custom list styles.
101
+ - PPTX grouped shapes and embedded tables show as placeholders.
102
+ - PPTX charts and SmartArt are not extracted.
103
+ - XLSX formulas show cached/computed values, not formula strings.
104
+ - Large spreadsheets are capped at 500 rows by default. Use `--all` to show everything.
105
+ - TUI enforces a 1000-line cap with `--all` for performance. Use `--plain` for full output.
106
+ - No decryption of password-protected files.
107
+ - Legacy binary formats (.doc, .ppt, .xls) are not supported.
@@ -0,0 +1,3 @@
1
+ """officecat — View Office files in the terminal."""
2
+
3
+ __version__ = "0.1.0"
@@ -0,0 +1,152 @@
1
+ """CLI entry point — read and render pipeline."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ from pathlib import Path
7
+ from typing import Annotated, Optional
8
+
9
+ import typer
10
+
11
+ app = typer.Typer(add_completion=False)
12
+
13
+ # Formats that support tabular flags
14
+ _TABULAR_FMTS = {"xlsx", "csv"}
15
+
16
+
17
+ @app.command()
18
+ def run(
19
+ file: Annotated[
20
+ Path, typer.Argument(help="File to view.")
21
+ ],
22
+ tui: Annotated[
23
+ bool, typer.Option("--tui", "-t", help="Interactive viewer.")
24
+ ] = False,
25
+ plain: Annotated[
26
+ bool, typer.Option("--plain", "-p", help="Raw markdown, no colors.")
27
+ ] = False,
28
+ json: Annotated[
29
+ bool, typer.Option("--json", "-j", help="JSON output.")
30
+ ] = False,
31
+ head: Annotated[
32
+ Optional[int], typer.Option("--head", "-n", help="Show first N lines.")
33
+ ] = None,
34
+ sheet: Annotated[
35
+ Optional[str],
36
+ typer.Option("--sheet", "-s", help="Sheet by name or index (xlsx)."),
37
+ ] = None,
38
+ slide: Annotated[
39
+ Optional[int],
40
+ typer.Option("--slide", help="Show only slide N (pptx)."),
41
+ ] = None,
42
+ headers: Annotated[
43
+ int,
44
+ typer.Option("--headers", "-h", help="Row N as headers (default: 1)."),
45
+ ] = 1,
46
+ show_all: Annotated[
47
+ bool, typer.Option("--all", "-a", help="Disable the row cap.")
48
+ ] = False,
49
+ ) -> None:
50
+ """View Office files in the terminal.
51
+
52
+ Supports .docx, .pptx, .xlsx, .csv, and .tsv files.
53
+ """
54
+ # ── Validate output flags ──
55
+ output_flags = sum([tui, plain, json])
56
+ if output_flags > 1:
57
+ _error("--tui, --plain, and --json are mutually exclusive.")
58
+
59
+ if not file.exists():
60
+ _error(f"File '{file}' not found.")
61
+
62
+ # ── Validate format ──
63
+ from officecat.detect import detect_format
64
+ fmt = detect_format(file)
65
+
66
+ # ── Validate format-specific flags ──
67
+ fmt_name = fmt.value
68
+ if sheet is not None and fmt_name not in ("xlsx", "csv"):
69
+ _error("--sheet is only valid for xlsx, csv, and tsv files.")
70
+ if slide is not None and fmt_name != "pptx":
71
+ _error("--slide is only valid for pptx files.")
72
+ if headers != 1 and fmt_name not in ("xlsx", "csv"):
73
+ _error("--headers is only valid for xlsx, csv, and tsv files.")
74
+
75
+ # ── Mode resolution ──
76
+ if json:
77
+ mode = "json"
78
+ elif plain:
79
+ mode = "plain"
80
+ elif tui:
81
+ mode = "tui"
82
+ elif sys.stdout.isatty():
83
+ mode = "rich"
84
+ else:
85
+ mode = "plain"
86
+
87
+ # ── Build reader options ──
88
+ reader_opts: dict = {}
89
+ if fmt_name in ("xlsx", "csv"):
90
+ reader_opts["headers"] = headers
91
+ reader_opts["show_all"] = show_all
92
+ if sheet is not None:
93
+ reader_opts["sheet"] = sheet
94
+ if fmt_name == "pptx" and slide is not None:
95
+ reader_opts["slide"] = slide
96
+ if head is not None and fmt_name in ("docx", "pptx"):
97
+ reader_opts["head"] = head
98
+
99
+ # For tabular formats, pass head as row limit to reader
100
+ if head is not None and fmt_name in ("xlsx", "csv"):
101
+ reader_opts["head"] = head
102
+
103
+ # ── Convert (with spinner for TTY) ──
104
+ from officecat.readers import convert
105
+
106
+ if sys.stderr.isatty() and mode != "tui":
107
+ from rich.console import Console
108
+ console = Console(stderr=True)
109
+ with console.status(f"Reading {file.name}...", spinner="dots"):
110
+ markdown = convert(file, **reader_opts)
111
+ else:
112
+ markdown = convert(file, **reader_opts)
113
+
114
+ if not markdown or not markdown.strip():
115
+ print("Document is empty.")
116
+ return
117
+
118
+ # ── TUI guardrail for large tables ──
119
+ if mode == "tui" and show_all and fmt_name in ("xlsx", "csv"):
120
+ line_count = markdown.count("\n")
121
+ if line_count > 1000:
122
+ # Truncate to ~1000 lines for TUI performance
123
+ lines = markdown.splitlines()
124
+ markdown = "\n".join(lines[:1000])
125
+ markdown += (
126
+ "\n\n*TUI limited to first 1000 lines."
127
+ " Use --plain with --all to view everything.*"
128
+ )
129
+
130
+ # ── Render ──
131
+ if mode == "tui":
132
+ from officecat.tui.app import OfficeCatApp
133
+ tui_app = OfficeCatApp(source=str(file), markdown=markdown)
134
+ tui_app.run()
135
+ elif mode == "rich":
136
+ from officecat.renderers.rich import render
137
+ render(markdown, head=head)
138
+ elif mode == "json":
139
+ from officecat.renderers.json_ import render
140
+ render(str(file), markdown)
141
+ else:
142
+ from officecat.renderers.plain import render
143
+ render(markdown, head=head)
144
+
145
+
146
+ def _error(msg: str) -> None:
147
+ print(f"Error: {msg}", file=sys.stderr)
148
+ raise SystemExit(1)
149
+
150
+
151
+ def main() -> None:
152
+ app()
@@ -0,0 +1,58 @@
1
+ """File type detection and validation."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ from enum import Enum
7
+ from pathlib import Path
8
+
9
+
10
+ class FileFormat(Enum):
11
+ DOCX = "docx"
12
+ PPTX = "pptx"
13
+ XLSX = "xlsx"
14
+ CSV = "csv"
15
+
16
+
17
+ EXTENSION_MAP: dict[str, FileFormat] = {
18
+ ".docx": FileFormat.DOCX,
19
+ ".pptx": FileFormat.PPTX,
20
+ ".xlsx": FileFormat.XLSX,
21
+ ".csv": FileFormat.CSV,
22
+ ".tsv": FileFormat.CSV,
23
+ }
24
+
25
+ LEGACY_FORMATS: dict[str, str] = {
26
+ ".doc": "docx",
27
+ ".ppt": "pptx",
28
+ ".xls": "xlsx",
29
+ }
30
+
31
+ SUPPORTED_EXTENSIONS = ", ".join(sorted(EXTENSION_MAP.keys()))
32
+
33
+
34
+ def detect_format(path: Path) -> FileFormat:
35
+ """Detect file format from extension. Exits on error."""
36
+ ext = path.suffix.lower()
37
+
38
+ if ext in LEGACY_FORMATS:
39
+ target = LEGACY_FORMATS[ext]
40
+ print(
41
+ f"Error: Legacy binary format ({ext}) is not supported.\n"
42
+ f"Convert to .{target} using LibreOffice: "
43
+ f"libreoffice --headless --convert-to {target} {path.name}\n"
44
+ f"Or use specialized tools like antiword or catdoc.",
45
+ file=sys.stderr,
46
+ )
47
+ raise SystemExit(2)
48
+
49
+ fmt = EXTENSION_MAP.get(ext)
50
+ if fmt is None:
51
+ print(
52
+ f"Error: Unsupported file type '{ext}'. "
53
+ f"Supported: {SUPPORTED_EXTENSIONS}",
54
+ file=sys.stderr,
55
+ )
56
+ raise SystemExit(2)
57
+
58
+ return fmt
@@ -0,0 +1,31 @@
1
+ """Reader dispatch — routes a file path to the correct format reader."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+
7
+ from officecat.detect import FileFormat, detect_format
8
+
9
+
10
+ def convert(path: Path, **options) -> str:
11
+ """Convert a file to a markdown string.
12
+
13
+ Args:
14
+ path: Path to the file.
15
+ **options: Format-specific options (head, sheet, slide, headers, show_all).
16
+
17
+ Returns:
18
+ A markdown string.
19
+ """
20
+ fmt = detect_format(path)
21
+
22
+ if fmt == FileFormat.CSV:
23
+ from officecat.readers.csv_ import to_markdown
24
+ elif fmt == FileFormat.XLSX:
25
+ from officecat.readers.xlsx import to_markdown
26
+ elif fmt == FileFormat.DOCX:
27
+ from officecat.readers.docx import to_markdown
28
+ elif fmt == FileFormat.PPTX:
29
+ from officecat.readers.pptx import to_markdown
30
+
31
+ return to_markdown(path, **options)
@@ -0,0 +1,100 @@
1
+ """CSV/TSV to markdown reader."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import csv
6
+ from pathlib import Path
7
+
8
+ DEFAULT_ROW_CAP = 500
9
+
10
+
11
+ def _escape_pipe(text: str) -> str:
12
+ """Escape pipe characters for markdown tables."""
13
+ return text.replace("|", "\\|")
14
+
15
+
16
+ def _col_letter(index: int) -> str:
17
+ """Convert 0-based index to Excel-style letter (A, B, ... Z, AA, ...)."""
18
+ result = ""
19
+ i = index
20
+ while True:
21
+ result = chr(65 + i % 26) + result
22
+ i = i // 26 - 1
23
+ if i < 0:
24
+ break
25
+ return result
26
+
27
+
28
+ def to_markdown(
29
+ path: Path,
30
+ *,
31
+ head: int | None = None,
32
+ headers: int = 1,
33
+ show_all: bool = False,
34
+ **_kwargs,
35
+ ) -> str:
36
+ """Convert a CSV/TSV file to a markdown table string."""
37
+ is_tsv = path.suffix.lower() == ".tsv"
38
+
39
+ with open(path, newline="", encoding="utf-8-sig") as f:
40
+ if is_tsv:
41
+ delimiter = "\t"
42
+ else:
43
+ first_line = f.readline()
44
+ f.seek(0)
45
+ # Only sniff if comma-split yields a single column
46
+ if len(first_line.split(",")) <= 1:
47
+ try:
48
+ dialect = csv.Sniffer().sniff(first_line)
49
+ delimiter = dialect.delimiter
50
+ except csv.Error:
51
+ delimiter = ","
52
+ else:
53
+ delimiter = ","
54
+
55
+ reader = csv.reader(f, delimiter=delimiter)
56
+
57
+ header_row: list[str] = []
58
+ rows: list[list[str]] = []
59
+ total_rows = 0
60
+ row_cap = head if head is not None else (None if show_all else DEFAULT_ROW_CAP)
61
+
62
+ for i, row in enumerate(reader, start=1):
63
+ if headers > 0 and i == headers:
64
+ header_row = [_escape_pipe(c) for c in row]
65
+ continue
66
+
67
+ total_rows += 1
68
+ if row_cap is not None and len(rows) >= row_cap:
69
+ continue # keep counting total_rows
70
+ rows.append([_escape_pipe(c) for c in row])
71
+
72
+ if not header_row and rows:
73
+ col_count = max(len(r) for r in rows)
74
+ header_row = [_col_letter(i) for i in range(col_count)]
75
+
76
+ if not header_row and not rows:
77
+ return ""
78
+
79
+ return _build_table(header_row, rows, total_rows)
80
+
81
+
82
+ def _build_table(
83
+ headers: list[str], rows: list[list[str]], total_rows: int
84
+ ) -> str:
85
+ """Build a markdown pipe table from headers and rows."""
86
+ col_count = len(headers)
87
+ lines: list[str] = []
88
+
89
+ lines.append("| " + " | ".join(headers) + " |")
90
+ lines.append("| " + " | ".join(["---"] * col_count) + " |")
91
+
92
+ for row in rows:
93
+ padded = row + [""] * (col_count - len(row))
94
+ lines.append("| " + " | ".join(padded[:col_count]) + " |")
95
+
96
+ if total_rows > len(rows):
97
+ lines.append("")
98
+ lines.append(f"*Showing {len(rows)} of {total_rows:,} rows.*")
99
+
100
+ return "\n".join(lines)
@@ -0,0 +1,120 @@
1
+ """Word (.docx) to markdown reader."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ from pathlib import Path
7
+
8
+
9
+ def _classify_style(style_name: str | None) -> str | None:
10
+ """Map a docx paragraph style name to a markdown heading prefix or None."""
11
+ if style_name is None:
12
+ return None
13
+ name = style_name.lower()
14
+ if "heading 1" in name or name == "title":
15
+ return "# "
16
+ if "heading 2" in name:
17
+ return "## "
18
+ if "heading 3" in name:
19
+ return "### "
20
+ if "heading 4" in name or "heading 5" in name or "heading 6" in name:
21
+ return "#### "
22
+ if "list" in name:
23
+ return "- "
24
+ return None
25
+
26
+
27
+ def _escape_pipe(text: str) -> str:
28
+ return text.replace("|", "\\|")
29
+
30
+
31
+ def _table_to_markdown(table) -> str:
32
+ """Convert a docx Table object to a markdown pipe table."""
33
+ rows: list[list[str]] = []
34
+ for row in table.rows:
35
+ cells = row.cells
36
+ row_text: list[str] = []
37
+ prev_tc = None
38
+ for cell in cells:
39
+ if prev_tc is not None and cell._tc is prev_tc:
40
+ continue
41
+ row_text.append(_escape_pipe(cell.text.strip()))
42
+ prev_tc = cell._tc
43
+ rows.append(row_text)
44
+
45
+ if not rows:
46
+ return ""
47
+
48
+ col_count = max(len(r) for r in rows)
49
+ headers = rows[0] + [""] * (col_count - len(rows[0]))
50
+
51
+ lines: list[str] = []
52
+ lines.append("| " + " | ".join(headers[:col_count]) + " |")
53
+ lines.append("| " + " | ".join(["---"] * col_count) + " |")
54
+
55
+ for row in rows[1:]:
56
+ padded = row + [""] * (col_count - len(row))
57
+ lines.append("| " + " | ".join(padded[:col_count]) + " |")
58
+
59
+ return "\n".join(lines)
60
+
61
+
62
+ def to_markdown(path: Path, *, head: int | None = None, **_kwargs) -> str:
63
+ """Convert a docx file to markdown, preserving paragraph/table order."""
64
+ from docx import Document
65
+ from docx.opc.exceptions import PackageNotFoundError
66
+ from docx.table import Table as DocxTable
67
+ from docx.text.paragraph import Paragraph
68
+
69
+ try:
70
+ doc = Document(str(path))
71
+ except PackageNotFoundError:
72
+ print(
73
+ f"Error: '{path.name}' appears to be corrupt or invalid.",
74
+ file=sys.stderr,
75
+ )
76
+ raise SystemExit(3)
77
+ except Exception as e:
78
+ msg = str(e).lower()
79
+ if "password" in msg or "encrypt" in msg:
80
+ print(
81
+ f"Error: '{path.name}' is password-protected. "
82
+ f"officecat cannot open encrypted files.",
83
+ file=sys.stderr,
84
+ )
85
+ raise SystemExit(3)
86
+ raise
87
+
88
+ blocks: list[str] = []
89
+ block_count = 0
90
+
91
+ # Walk body children to preserve interleaved order
92
+ for child in doc.element.body:
93
+ tag = child.tag.split("}")[-1] if "}" in child.tag else child.tag
94
+
95
+ if tag == "p":
96
+ para = Paragraph(child, doc)
97
+ text = para.text.strip()
98
+ if not text:
99
+ continue
100
+
101
+ prefix = _classify_style(para.style.name if para.style else None)
102
+ if prefix:
103
+ blocks.append(f"{prefix}{text}")
104
+ else:
105
+ blocks.append(text)
106
+
107
+ block_count += 1
108
+ if head is not None and block_count >= head:
109
+ break
110
+
111
+ elif tag == "tbl":
112
+ table = DocxTable(child, doc)
113
+ md_table = _table_to_markdown(table)
114
+ if md_table:
115
+ blocks.append(md_table)
116
+ block_count += 1
117
+ if head is not None and block_count >= head:
118
+ break
119
+
120
+ return "\n\n".join(blocks)
@@ -0,0 +1,106 @@
1
+ """PowerPoint (.pptx) to markdown reader."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ from pathlib import Path
7
+
8
+
9
+ def to_markdown(
10
+ path: Path,
11
+ *,
12
+ head: int | None = None,
13
+ slide: int | None = None,
14
+ **_kwargs,
15
+ ) -> str:
16
+ """Convert a pptx file to markdown."""
17
+ from pptx import Presentation
18
+ from pptx.enum.shapes import MSO_SHAPE_TYPE
19
+ from pptx.exc import PackageNotFoundError
20
+
21
+ try:
22
+ prs = Presentation(str(path))
23
+ except PackageNotFoundError:
24
+ print(
25
+ f"Error: '{path.name}' appears to be corrupt or invalid.",
26
+ file=sys.stderr,
27
+ )
28
+ raise SystemExit(3)
29
+ except Exception as e:
30
+ msg = str(e).lower()
31
+ if "password" in msg or "encrypt" in msg:
32
+ print(
33
+ f"Error: '{path.name}' is password-protected. "
34
+ f"officecat cannot open encrypted files.",
35
+ file=sys.stderr,
36
+ )
37
+ raise SystemExit(3)
38
+ raise
39
+
40
+ total_slides = len(prs.slides)
41
+
42
+ if slide is not None:
43
+ if slide < 1 or slide > total_slides:
44
+ print(
45
+ f"Error: Slide {slide} not found. "
46
+ f"Document has {total_slides} slides.",
47
+ file=sys.stderr,
48
+ )
49
+ raise SystemExit(1)
50
+
51
+ sections: list[str] = []
52
+
53
+ for i, sld in enumerate(prs.slides, start=1):
54
+ if slide is not None and i != slide:
55
+ continue
56
+
57
+ # Check if slide is hidden
58
+ show_attr = sld._element.get("show")
59
+ is_hidden = show_attr == "0"
60
+
61
+ title_shape = sld.shapes.title
62
+ title_text = title_shape.text.strip() if title_shape else None
63
+ title_shape_id = title_shape.shape_id if title_shape else None
64
+
65
+ hidden_tag = " (Hidden)" if is_hidden else ""
66
+ if title_text:
67
+ heading = f"## Slide {i}{hidden_tag}: {title_text}"
68
+ else:
69
+ heading = f"## Slide {i}{hidden_tag}"
70
+
71
+ body_lines: list[str] = [heading, ""]
72
+
73
+ for shape in sld.shapes:
74
+ if title_shape_id is not None and shape.shape_id == title_shape_id:
75
+ continue
76
+
77
+ st = shape.shape_type
78
+ if st == MSO_SHAPE_TYPE.PICTURE:
79
+ body_lines.append(f"*[Image: {shape.name}]*")
80
+ elif st == MSO_SHAPE_TYPE.GROUP:
81
+ body_lines.append("*[Grouped content]*")
82
+ elif hasattr(shape, "has_table") and shape.has_table:
83
+ tbl = shape.table
84
+ rows = len(tbl.rows)
85
+ cols = len(tbl.columns)
86
+ body_lines.append(f"*[Table: {rows}x{cols}]*")
87
+ elif shape.has_text_frame:
88
+ text = shape.text_frame.text.strip()
89
+ if text:
90
+ body_lines.append(text)
91
+
92
+ # Notes
93
+ if sld.has_notes_slide:
94
+ notes_text = sld.notes_slide.notes_text_frame.text.strip()
95
+ if notes_text:
96
+ body_lines.append("")
97
+ body_lines.append(f"> **Notes:** {notes_text}")
98
+
99
+ sections.append("\n".join(body_lines))
100
+
101
+ if slide is not None:
102
+ break
103
+ if head is not None and len(sections) >= head:
104
+ break
105
+
106
+ return "\n\n---\n\n".join(sections)
@@ -0,0 +1,144 @@
1
+ """Excel (.xlsx) to markdown reader using python-calamine."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+ from pathlib import Path
7
+
8
+ DEFAULT_ROW_CAP = 500
9
+
10
+
11
+ def _escape_pipe(text: str) -> str:
12
+ return text.replace("|", "\\|")
13
+
14
+
15
+ def _col_letter(index: int) -> str:
16
+ result = ""
17
+ i = index
18
+ while True:
19
+ result = chr(65 + i % 26) + result
20
+ i = i // 26 - 1
21
+ if i < 0:
22
+ break
23
+ return result
24
+
25
+
26
+ def _format_cell(value: object) -> str:
27
+ if value is None:
28
+ return ""
29
+ if isinstance(value, float) and value == int(value):
30
+ return str(int(value))
31
+ if hasattr(value, "isoformat"):
32
+ return value.isoformat()
33
+ return str(value)
34
+
35
+
36
+ def to_markdown(
37
+ path: Path,
38
+ *,
39
+ head: int | None = None,
40
+ sheet: str | None = None,
41
+ headers: int = 1,
42
+ show_all: bool = False,
43
+ **_kwargs,
44
+ ) -> str:
45
+ """Convert an xlsx file to markdown tables."""
46
+ from python_calamine import CalamineWorkbook
47
+
48
+ try:
49
+ wb = CalamineWorkbook.from_path(str(path))
50
+ except Exception as e:
51
+ msg = str(e).lower()
52
+ if "password" in msg or "encrypt" in msg:
53
+ print(
54
+ f"Error: '{path.name}' is password-protected. "
55
+ f"officecat cannot open encrypted files.",
56
+ file=sys.stderr,
57
+ )
58
+ raise SystemExit(3)
59
+ if "zip" in msg or "invalid" in msg or "corrupt" in msg:
60
+ print(
61
+ f"Error: '{path.name}' appears to be corrupt or invalid.",
62
+ file=sys.stderr,
63
+ )
64
+ raise SystemExit(3)
65
+ raise
66
+
67
+ all_sheet_names = wb.sheet_names
68
+
69
+ if sheet is not None:
70
+ # Try as 1-based index
71
+ try:
72
+ idx = int(sheet)
73
+ if 1 <= idx <= len(all_sheet_names):
74
+ sheets_to_read = [all_sheet_names[idx - 1]]
75
+ else:
76
+ print(
77
+ f"Error: Sheet index {idx} out of range. "
78
+ f"Available: {', '.join(all_sheet_names)}",
79
+ file=sys.stderr,
80
+ )
81
+ raise SystemExit(1)
82
+ except ValueError:
83
+ if sheet in all_sheet_names:
84
+ sheets_to_read = [sheet]
85
+ else:
86
+ print(
87
+ f"Error: Sheet '{sheet}' not found. "
88
+ f"Available: {', '.join(all_sheet_names)}",
89
+ file=sys.stderr,
90
+ )
91
+ raise SystemExit(1)
92
+ else:
93
+ sheets_to_read = list(all_sheet_names)
94
+
95
+ row_cap = head if head is not None else (None if show_all else DEFAULT_ROW_CAP)
96
+ sections: list[str] = []
97
+
98
+ for sheet_name in sheets_to_read:
99
+ ws = wb.get_sheet_by_name(sheet_name)
100
+ header_row: list[str] = []
101
+ data_rows: list[list[str]] = []
102
+ total_rows = 0
103
+
104
+ for i, row in enumerate(ws.iter_rows(), start=1):
105
+ if headers > 0 and i == headers:
106
+ header_row = [_escape_pipe(_format_cell(c)) for c in row]
107
+ continue
108
+
109
+ total_rows += 1
110
+ if row_cap is not None and len(data_rows) >= row_cap:
111
+ continue
112
+ data_rows.append([_escape_pipe(_format_cell(c)) for c in row])
113
+
114
+ if not header_row:
115
+ if data_rows:
116
+ col_count = max(len(r) for r in data_rows)
117
+ else:
118
+ col_count = 0
119
+ header_row = [_col_letter(i) for i in range(col_count)]
120
+
121
+ section_lines: list[str] = [f"## Sheet: {sheet_name}", ""]
122
+
123
+ if not header_row:
124
+ section_lines.append("*Empty sheet.*")
125
+ else:
126
+ col_count = len(header_row)
127
+ section_lines.append("| " + " | ".join(header_row) + " |")
128
+ section_lines.append("| " + " | ".join(["---"] * col_count) + " |")
129
+
130
+ for row in data_rows:
131
+ padded = row + [""] * (col_count - len(row))
132
+ section_lines.append(
133
+ "| " + " | ".join(padded[:col_count]) + " |"
134
+ )
135
+
136
+ if total_rows > len(data_rows):
137
+ section_lines.append("")
138
+ section_lines.append(
139
+ f"*Showing {len(data_rows)} of {total_rows:,} rows.*"
140
+ )
141
+
142
+ sections.append("\n".join(section_lines))
143
+
144
+ return "\n\n---\n\n".join(sections)
File without changes
@@ -0,0 +1,11 @@
1
+ """JSON renderer."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+
7
+
8
+ def render(source: str, markdown_text: str) -> None:
9
+ """Print JSON with source and markdown keys."""
10
+ output = {"source": source, "markdown": markdown_text}
11
+ print(json.dumps(output, indent=2))
@@ -0,0 +1,12 @@
1
+ """Plain text renderer — raw markdown via print()."""
2
+
3
+ from __future__ import annotations
4
+
5
+
6
+ def render(markdown_text: str, head: int | None = None) -> None:
7
+ """Print raw markdown text to stdout."""
8
+ lines = markdown_text.splitlines()
9
+ if head is not None:
10
+ lines = lines[:head]
11
+ for line in lines:
12
+ print(line)
@@ -0,0 +1,24 @@
1
+ """Rich colored markdown renderer."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import sys
6
+
7
+
8
+ def render(markdown_text: str, head: int | None = None) -> None:
9
+ """Print colored markdown to stdout."""
10
+ if sys.platform == "win32" and hasattr(sys.stdout, "reconfigure"):
11
+ try:
12
+ sys.stdout.reconfigure(encoding="utf-8")
13
+ except Exception:
14
+ pass
15
+
16
+ from rich.console import Console
17
+ from rich.markdown import Markdown
18
+
19
+ text = markdown_text
20
+ if head is not None:
21
+ text = "\n".join(text.splitlines()[:head])
22
+
23
+ console = Console()
24
+ console.print(Markdown(text))
File without changes
@@ -0,0 +1,34 @@
1
+ """Textual TUI app for officecat — full-screen markdown viewer."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from textual.app import App, ComposeResult
6
+ from textual.binding import Binding
7
+ from textual.containers import VerticalScroll
8
+ from textual.widgets import Footer, Header, Markdown
9
+
10
+
11
+ class OfficeCatApp(App):
12
+ """Interactive terminal viewer for Office files."""
13
+
14
+ CSS = """
15
+ #md-scroll {
16
+ height: 1fr;
17
+ }
18
+ """
19
+
20
+ BINDINGS = [
21
+ Binding("q", "quit", "Quit"),
22
+ ]
23
+
24
+ def __init__(self, source: str, markdown: str, **kwargs: object) -> None:
25
+ self._source = source
26
+ self._markdown = markdown
27
+ super().__init__(**kwargs)
28
+ self.title = f"officecat — {self._source}"
29
+
30
+ def compose(self) -> ComposeResult:
31
+ yield Header()
32
+ with VerticalScroll(id="md-scroll"):
33
+ yield Markdown(self._markdown, id="md-view")
34
+ yield Footer()
@@ -0,0 +1,63 @@
1
+ [project]
2
+ name = "officecat"
3
+ version = "0.1.0"
4
+ description = "View Office files in the terminal"
5
+ readme = "README.md"
6
+ license = {text = "MIT"}
7
+ requires-python = ">=3.10"
8
+ authors = [
9
+ {name = "Mubbie Idoko"},
10
+ ]
11
+ keywords = ["office", "cli", "docx", "xlsx", "pptx", "csv", "terminal", "viewer"]
12
+ classifiers = [
13
+ "Development Status :: 4 - Beta",
14
+ "Environment :: Console",
15
+ "Intended Audience :: Developers",
16
+ "License :: OSI Approved :: MIT License",
17
+ "Operating System :: OS Independent",
18
+ "Programming Language :: Python :: 3",
19
+ "Programming Language :: Python :: 3.10",
20
+ "Programming Language :: Python :: 3.11",
21
+ "Programming Language :: Python :: 3.12",
22
+ "Programming Language :: Python :: 3.13",
23
+ "Topic :: Office/Business",
24
+ "Topic :: Utilities",
25
+ ]
26
+ dependencies = [
27
+ "typer>=0.9",
28
+ "textual>=0.50",
29
+ "python-docx>=1.0",
30
+ "python-pptx>=0.6",
31
+ "python-calamine>=0.2",
32
+ "rich",
33
+ ]
34
+
35
+ [project.urls]
36
+ Homepage = "https://github.com/mubbie/officecat"
37
+ Repository = "https://github.com/mubbie/officecat"
38
+ Issues = "https://github.com/mubbie/officecat/issues"
39
+
40
+ [project.scripts]
41
+ officecat = "officecat.cli:main"
42
+
43
+ [project.optional-dependencies]
44
+ dev = ["pytest", "ruff", "mypy"]
45
+
46
+ [build-system]
47
+ requires = ["hatchling"]
48
+ build-backend = "hatchling.build"
49
+
50
+ [tool.ruff]
51
+ target-version = "py310"
52
+ line-length = 88
53
+
54
+ [tool.ruff.lint]
55
+ select = ["E", "F", "I", "W"]
56
+
57
+ [tool.mypy]
58
+ python_version = "3.10"
59
+ warn_return_any = true
60
+ warn_unused_configs = true
61
+
62
+ [tool.pytest.ini_options]
63
+ testpaths = ["tests"]
File without changes
@@ -0,0 +1 @@
1
+ not a real xlsx
@@ -0,0 +1,6 @@
1
+ Name,Age,City,Role,Salary
2
+ Alice,30,New York,Engineer,120000
3
+ Bob,25,Los Angeles,Designer,95000
4
+ Charlie,35,Chicago,Manager,140000
5
+ Diana,28,Seattle,Engineer,125000
6
+ Eve,32,Boston,Analyst,105000
@@ -0,0 +1,4 @@
1
+ Name Age City Role Salary
2
+ Alice 30 New York Engineer 120000
3
+ Bob 25 Los Angeles Designer 95000
4
+ Charlie 35 Chicago Manager 140000