PyPI - doc-to-md-cli - Versions diffs - 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl - Mend

doc-to-md-cli 0.1.0py3-none-any.whl → 0.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

doc2md/__init__.py CHANGED Viewed

@@ -0,0 +1,84 @@
+"""
+doc-to-md-cli: Convert DOCX files to Markdown using Playwright
+"""
+from pathlib import Path
+from playwright.sync_api import sync_playwright
+__version__ = "0.1.1"
+def convert_docx_to_md(
+    input_path: str | Path,
+    output_dir: str | Path = "./md",
+    headless: bool = True,
+) -> list[Path]:
+    """
+    Convert DOCX file(s) to Markdown.
+    Args:
+        input_path: Path to a DOCX file or directory containing DOCX files
+        output_dir: Directory where Markdown files will be saved
+        headless: Whether to run browser in headless mode
+    Returns:
+        List of paths to the generated Markdown files
+    Raises:
+        FileNotFoundError: If input_path doesn't exist
+        ValueError: If no DOCX files found
+    Example:
+        >>> from doc2md import convert_docx_to_md
+        >>> convert_docx_to_md("document.docx", "output")
+        ['output/document.md']
+    """
+    input_path = Path(input_path).expanduser()
+    output_dir = Path(output_dir).expanduser()
+    output_dir.mkdir(parents=True, exist_ok=True)
+    if not input_path.exists():
+        raise FileNotFoundError(f"Path not found: {input_path}")
+    # Determine files to convert
+    if input_path.is_file():
+        if input_path.suffix.lower() != ".docx":
+            raise ValueError("Input file must be a .docx file")
+        files = [input_path]
+    else:
+        files = list(input_path.glob("*.docx"))
+        if not files:
+            raise ValueError("No .docx files found in directory")
+    converted_files = []
+    with sync_playwright() as p:
+        browser = p.chromium.launch(headless=headless)
+        context = browser.new_context(
+            permissions=["clipboard-read", "clipboard-write"]
+        )
+        page = context.new_page()
+        page.goto("https://word2md.com/")
+        for file_path in files:
+            with page.expect_file_chooser() as fc_info:
+                page.click('input[type="file"]')
+            file_chooser = fc_info.value
+            file_chooser.set_files(str(file_path))
+            page.wait_for_selector("#copy-button", state="visible")
+            page.click("#copy-button")
+            md_content = page.evaluate("navigator.clipboard.readText()")
+            out_file = output_dir / (file_path.stem + ".md")
+            out_file.write_text(md_content, encoding="utf-8")
+            converted_files.append(out_file)
+        browser.close()
+    return converted_files
+__all__ = ["convert_docx_to_md", "__version__"]

doc2md/cli.py CHANGED Viewed

@@ -1,44 +1,13 @@
 #!/usr/bin/env python3
 import argparse
 from pathlib import Path
-from playwright.sync_api import sync_playwright
-def convert_files(files, output_dir: Path, headless: bool):
-    output_dir.mkdir(parents=True, exist_ok=True)
-    with sync_playwright() as p:
-        browser = p.chromium.launch(headless=headless)
-        context = browser.new_context(
-            permissions=["clipboard-read", "clipboard-write"]
-        )
-        page = context.new_page()
-        page.goto("https://word2md.com/")
-        for file_path in files:
-            print(f"⬇ Converting: {file_path.name}")
-            with page.expect_file_chooser() as fc_info:
-                page.click('input[type="file"]')
-            file_chooser = fc_info.value
-            file_chooser.set_files(str(file_path))
-            page.wait_for_selector("#copy-button", state="visible")
-            page.click("#copy-button")
-            md_content = page.evaluate("navigator.clipboard.readText()")
-            out_file = output_dir / (file_path.stem + ".md")
-            out_file.write_text(md_content, encoding="utf-8")
-            print(f"✔ Saved: {out_file}")
-        browser.close()
+from . import convert_docx_to_md
 def main():
     parser = argparse.ArgumentParser(
-        prog="doc2md",
+        prog="doc-to-md",
         description="Convert DOCX files to Markdown using word2md.com"
     )
@@ -56,29 +25,29 @@ def main():
     parser.add_argument(
         "--headless",
         action="store_true",
-        help="Run browser in headless mode"
+        default=True,
+        help="Run browser in headless mode (default: True)"
     )
-    args = parser.parse_args()
-    input_path = Path(args.input).expanduser()
-    output_dir = Path(args.out).expanduser()
-    if not input_path.exists():
-        raise SystemExit(f"❌ Path not found: {input_path}")
-    # 🔥 NEW LOGIC
-    if input_path.is_file():
-        if input_path.suffix.lower() != ".docx":
-            raise SystemExit("❌ Input file must be a .docx file")
-        files = [input_path]
+    parser.add_argument(
+        "--no-headless",
+        action="store_false",
+        dest="headless",
+        help="Show browser window"
+    )
-    else:
-        files = list(input_path.glob("*.docx"))
-        if not files:
-            raise SystemExit("❌ No .docx files found")
+    args = parser.parse_args()
-    convert_files(files, output_dir, args.headless)
+    try:
+        converted_files = convert_docx_to_md(
+            args.input,
+            output_dir=args.out,
+            headless=args.headless,
+        )
+        for f in converted_files:
+            print(f"✔ Saved: {f}")
+    except (FileNotFoundError, ValueError) as e:
+        raise SystemExit(f"❌ {e}")
 if __name__ == "__main__":

{doc_to_md_cli-0.1.0.dist-info → doc_to_md_cli-0.1.1.dist-info}/METADATA RENAMED Viewed

@@ -1,10 +1,10 @@
 Metadata-Version: 2.4
 Name: doc-to-md-cli
-Version: 0.1.0
+Version: 0.1.1
 Summary: Convert DOCX files to Markdown using Playwright
-Project-URL: Homepage, https://github.com/YOUR_USERNAME/doc2md-cli
-Project-URL: Repository, https://github.com/YOUR_USERNAME/doc2md-cli
-Project-URL: Issues, https://github.com/YOUR_USERNAME/doc2md-cli/issues
+Project-URL: Homepage, https://github.com/ebinesh25/doc2md-cli
+Project-URL: Repository, https://github.com/ebinesh25/doc2md-cli
+Project-URL: Issues, https://github.com/ebinesh25/doc2md-cli/issues
 License: MIT
 License-File: LICENSE
 Keywords: cli,converter,docx,markdown,word
@@ -26,7 +26,7 @@ Description-Content-Type: text/markdown
 # doc-to-md-cli
-A command-line tool to convert DOCX files to Markdown using Playwright automation.
+A Python library and CLI tool to convert DOCX files to Markdown using Playwright automation.
 ## Features
@@ -34,6 +34,7 @@ A command-line tool to convert DOCX files to Markdown using Playwright automatio
 - Batch processing support
 - Headless browser mode for automation
 - Clean Markdown output powered by word2md.com
+- Use as a CLI tool or import as a Python library
 ## Installation
@@ -41,43 +42,62 @@ A command-line tool to convert DOCX files to Markdown using Playwright automatio
 pip install doc-to-md-cli
 ```
+Install Playwright browser (required):
+```bash
+playwright install chromium
+```
 ## Usage
-### Convert a single file
+### CLI
+Convert a single file:
 ```bash
 doc-to-md document.docx
 ```
-### Convert to a specific output directory
+Convert to a specific output directory:
 ```bash
 doc-to-md document.docx --out ./output
 ```
-### Convert all DOCX files in a directory
+Convert all DOCX files in a directory:
 ```bash
 doc-to-md ./my-docs --out ./converted
 ```
-### Run in headless mode (no visible browser)
+Show browser window (non-headless mode):
 ```bash
-doc-to-md document.docx --headless
+doc-to-md document.docx --no-headless
 ```
-## Requirements
+### Python API
-- Python 3.8 or higher
-- Playwright (installed automatically)
+```python
+from doc2md import convert_docx_to_md
-After installing, you may need to install Playwright browsers:
+# Convert a single file
+files = convert_docx_to_md("document.docx", "output")
+print(files)  # ['output/document.md']
-```bash
-playwright install chromium
+# Convert all files in a directory
+files = convert_docx_to_md("./my-docs", "output")
+# ['output/file1.md', 'output/file2.md', ...]
+# Convert with browser visible
+files = convert_docx_to_md("document.docx", headless=False)
 ```
+## Requirements
+- Python 3.8 or higher
+- Playwright (installed automatically)
 ## License
 MIT License - see LICENSE file for details.

doc_to_md_cli-0.1.1.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,7 @@
+doc2md/__init__.py,sha256=ZzsCLzhzHV0fKtIVNFNHT0dFPGzXeNgLSIBnUzwc55g,2478
+doc2md/cli.py,sha256=y8PNOErH6cVm_4zOJgzlLj26Wo5Riue2u4dM735VN_I,1183
+doc_to_md_cli-0.1.1.dist-info/METADATA,sha256=3EqwwL0WYKKUwqwbMZne03PqMgMdzAnQ2PzCmfldjwI,2447
+doc_to_md_cli-0.1.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+doc_to_md_cli-0.1.1.dist-info/entry_points.txt,sha256=YyynNzAcKo_GQg_JVs7gKf3JKJvjf1Tg6HPPmRhzBvc,46
+doc_to_md_cli-0.1.1.dist-info/licenses/LICENSE,sha256=iDvN_LXmDQT00XUgh0zJHPIWQqWpwd-YXOhR_7WD7uQ,1063
+doc_to_md_cli-0.1.1.dist-info/RECORD,,

{doc_to_md_cli-0.1.0.dist-info → doc_to_md_cli-0.1.1.dist-info}/licenses/LICENSE RENAMED Viewed

@@ -1,6 +1,6 @@
 MIT License
-Copyright (c) 2025 [Your Name]
+Copyright (c) 2025Ebinesh
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal

doc_to_md_cli-0.1.0.dist-info/RECORD DELETED Viewed

@@ -1,7 +0,0 @@
-doc2md/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-doc2md/cli.py,sha256=GLfepA1qHLdPb1Jjl630srrDAAGL6NW86MW0S5ihp2s,2319
-doc_to_md_cli-0.1.0.dist-info/METADATA,sha256=rhWZqq_MsfmkGaWF5YNEUBuy8RLoTUF_s7VDYqLXLys,2014
-doc_to_md_cli-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-doc_to_md_cli-0.1.0.dist-info/entry_points.txt,sha256=YyynNzAcKo_GQg_JVs7gKf3JKJvjf1Tg6HPPmRhzBvc,46
-doc_to_md_cli-0.1.0.dist-info/licenses/LICENSE,sha256=mnxgbhYn7K71v_xzsVUFRVeXIMYebeqdmubgALMnuIg,1068
-doc_to_md_cli-0.1.0.dist-info/RECORD,,

{doc_to_md_cli-0.1.0.dist-info → doc_to_md_cli-0.1.1.dist-info}/WHEEL RENAMED Viewed

File without changes

{doc_to_md_cli-0.1.0.dist-info → doc_to_md_cli-0.1.1.dist-info}/entry_points.txt RENAMED Viewed

File without changes

doc-to-md-cli 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

doc-to-md-cli 0.1.0py3-none-any.whl → 0.1.1py3-none-any.whl