PyPI - ebk - Versions diffs - 0.4.4__py3-none-any.whl - Mend

ebk 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (87) hide show

ebk/__init__.py +35 -0
ebk/ai/__init__.py +23 -0
ebk/ai/knowledge_graph.py +450 -0
ebk/ai/llm_providers/__init__.py +26 -0
ebk/ai/llm_providers/anthropic.py +209 -0
ebk/ai/llm_providers/base.py +295 -0
ebk/ai/llm_providers/gemini.py +285 -0
ebk/ai/llm_providers/ollama.py +294 -0
ebk/ai/metadata_enrichment.py +394 -0
ebk/ai/question_generator.py +328 -0
ebk/ai/reading_companion.py +224 -0
ebk/ai/semantic_search.py +433 -0
ebk/ai/text_extractor.py +393 -0
ebk/calibre_import.py +66 -0
ebk/cli.py +6433 -0
ebk/config.py +230 -0
ebk/db/__init__.py +37 -0
ebk/db/migrations.py +507 -0
ebk/db/models.py +725 -0
ebk/db/session.py +144 -0
ebk/decorators.py +1 -0
ebk/exports/__init__.py +0 -0
ebk/exports/base_exporter.py +218 -0
ebk/exports/echo_export.py +279 -0
ebk/exports/html_library.py +1743 -0
ebk/exports/html_utils.py +87 -0
ebk/exports/hugo.py +59 -0
ebk/exports/jinja_export.py +286 -0
ebk/exports/multi_facet_export.py +159 -0
ebk/exports/opds_export.py +232 -0
ebk/exports/symlink_dag.py +479 -0
ebk/exports/zip.py +25 -0
ebk/extract_metadata.py +341 -0
ebk/ident.py +89 -0
ebk/library_db.py +1440 -0
ebk/opds.py +748 -0
ebk/plugins/__init__.py +42 -0
ebk/plugins/base.py +502 -0
ebk/plugins/hooks.py +442 -0
ebk/plugins/registry.py +499 -0
ebk/repl/__init__.py +9 -0
ebk/repl/find.py +126 -0
ebk/repl/grep.py +173 -0
ebk/repl/shell.py +1677 -0
ebk/repl/text_utils.py +320 -0
ebk/search_parser.py +413 -0
ebk/server.py +3608 -0
ebk/services/__init__.py +28 -0
ebk/services/annotation_extraction.py +351 -0
ebk/services/annotation_service.py +380 -0
ebk/services/export_service.py +577 -0
ebk/services/import_service.py +447 -0
ebk/services/personal_metadata_service.py +347 -0
ebk/services/queue_service.py +253 -0
ebk/services/tag_service.py +281 -0
ebk/services/text_extraction.py +317 -0
ebk/services/view_service.py +12 -0
ebk/similarity/__init__.py +77 -0
ebk/similarity/base.py +154 -0
ebk/similarity/core.py +471 -0
ebk/similarity/extractors.py +168 -0
ebk/similarity/metrics.py +376 -0
ebk/skills/SKILL.md +182 -0
ebk/skills/__init__.py +1 -0
ebk/vfs/__init__.py +101 -0
ebk/vfs/base.py +298 -0
ebk/vfs/library_vfs.py +122 -0
ebk/vfs/nodes/__init__.py +54 -0
ebk/vfs/nodes/authors.py +196 -0
ebk/vfs/nodes/books.py +480 -0
ebk/vfs/nodes/files.py +155 -0
ebk/vfs/nodes/metadata.py +385 -0
ebk/vfs/nodes/root.py +100 -0
ebk/vfs/nodes/similar.py +165 -0
ebk/vfs/nodes/subjects.py +184 -0
ebk/vfs/nodes/tags.py +371 -0
ebk/vfs/resolver.py +228 -0
ebk/vfs_router.py +275 -0
ebk/views/__init__.py +32 -0
ebk/views/dsl.py +668 -0
ebk/views/service.py +619 -0
ebk-0.4.4.dist-info/METADATA +755 -0
ebk-0.4.4.dist-info/RECORD +87 -0
ebk-0.4.4.dist-info/WHEEL +5 -0
ebk-0.4.4.dist-info/entry_points.txt +2 -0
ebk-0.4.4.dist-info/licenses/LICENSE +21 -0
ebk-0.4.4.dist-info/top_level.txt +1 -0

ebk/db/session.py ADDED Viewed

@@ -0,0 +1,144 @@
+"""
+Database session management for ebk.
+Provides session factory and initialization utilities.
+"""
+from pathlib import Path
+from typing import Optional
+from contextlib import contextmanager
+from sqlalchemy import create_engine, event, text
+from sqlalchemy.orm import sessionmaker, Session
+from sqlalchemy.engine import Engine
+from .models import Base
+# Global session factory
+_SessionFactory: Optional[sessionmaker] = None
+_engine: Optional[Engine] = None
+def init_db(library_path: Path, echo: bool = False) -> Engine:
+    """
+    Initialize database and create all tables.
+    Args:
+        library_path: Path to library directory
+        echo: If True, log all SQL statements (debug mode)
+    Returns:
+        SQLAlchemy engine
+    """
+    global _engine, _SessionFactory
+    library_path = Path(library_path)
+    library_path.mkdir(parents=True, exist_ok=True)
+    db_path = library_path / 'library.db'
+    db_url = f'sqlite:///{db_path}'
+    _engine = create_engine(db_url, echo=echo)
+    # Enable foreign keys for SQLite
+    @event.listens_for(_engine, "connect")
+    def set_sqlite_pragma(dbapi_conn, connection_record):
+        cursor = dbapi_conn.cursor()
+        cursor.execute("PRAGMA foreign_keys=ON")
+        cursor.close()
+    # Create all tables
+    Base.metadata.create_all(_engine)
+    # Create FTS5 virtual table for full-text search
+    with _engine.connect() as conn:
+        # Check if FTS table exists
+        result = conn.execute(
+            text("SELECT name FROM sqlite_master WHERE type='table' AND name='books_fts'")
+        )
+        if not result.fetchone():
+            conn.execute(text("""
+                CREATE VIRTUAL TABLE books_fts USING fts5(
+                    book_id UNINDEXED,
+                    title,
+                    description,
+                    extracted_text,
+                    tokenize='porter unicode61'
+                )
+            """))
+            conn.commit()
+    # Create session factory
+    _SessionFactory = sessionmaker(bind=_engine)
+    return _engine
+def get_session() -> Session:
+    """
+    Get a new database session.
+    Returns:
+        SQLAlchemy session
+    Raises:
+        RuntimeError: If database not initialized
+    """
+    if _SessionFactory is None:
+        raise RuntimeError(
+            "Database not initialized. Call init_db() first."
+        )
+    return _SessionFactory()
+@contextmanager
+def session_scope():
+    """
+    Provide a transactional scope around a series of operations.
+    Usage:
+        with session_scope() as session:
+            session.add(book)
+            # Automatically commits or rolls back
+    """
+    session = get_session()
+    try:
+        yield session
+        session.commit()
+    except Exception:
+        session.rollback()
+        raise
+    finally:
+        session.close()
+def close_db():
+    """Close database connection and cleanup."""
+    global _engine, _SessionFactory
+    if _engine:
+        _engine.dispose()
+        _engine = None
+    _SessionFactory = None
+def get_or_create(session: Session, model, **kwargs):
+    """
+    Get existing instance or create new one.
+    Args:
+        session: Database session
+        model: SQLAlchemy model class
+        **kwargs: Filter criteria and/or values to set
+    Returns:
+        Tuple of (instance, created: bool)
+    """
+    instance = session.query(model).filter_by(**kwargs).first()
+    if instance:
+        return instance, False
+    else:
+        instance = model(**kwargs)
+        session.add(instance)
+        return instance, True

ebk/decorators.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Decorators for ebk functionality."""

ebk/exports/__init__.py ADDED Viewed

File without changes

ebk/exports/base_exporter.py ADDED Viewed

@@ -0,0 +1,218 @@
+"""Base exporter class for ebk library exports."""
+from abc import ABC, abstractmethod
+from pathlib import Path
+from typing import Dict, List, Optional, Any
+import json
+import shutil
+import re
+from .html_utils import create_safe_filename, sanitize_for_html
+class BaseExporter(ABC):
+    """
+    Abstract base class for all ebk exporters.
+    Provides common functionality for exporting library data:
+    - Loading metadata
+    - File operations (copy/symlink)
+    - Filename sanitization
+    - Directory management
+    """
+    def __init__(self):
+        """Initialize the base exporter."""
+        self.library_path = None
+        self.output_path = None
+        self.entries = []
+    def load_metadata(self, library_path: Path) -> List[Dict]:
+        """
+        Load metadata from the library.
+        Args:
+            library_path: Path to the ebk library
+        Returns:
+            List of entry dictionaries
+        Raises:
+            FileNotFoundError: If metadata.json doesn't exist
+            json.JSONDecodeError: If metadata is invalid
+        """
+        self.library_path = Path(library_path)
+        metadata_path = self.library_path / "metadata.json"
+        if not metadata_path.exists():
+            raise FileNotFoundError(f"Metadata file not found at {metadata_path}")
+        with open(metadata_path, 'r', encoding='utf-8') as f:
+            self.entries = json.load(f)
+        return self.entries
+    def prepare_output_directory(self, output_path: Path, clean: bool = True):
+        """
+        Prepare the output directory.
+        Args:
+            output_path: Path for output
+            clean: Whether to clean existing directory
+        """
+        self.output_path = Path(output_path)
+        if clean and self.output_path.exists():
+            shutil.rmtree(self.output_path)
+        self.output_path.mkdir(parents=True, exist_ok=True)
+    def copy_entry_files(self, entry: Dict, source_dir: Path, dest_dir: Path):
+        """
+        Copy entry files (ebooks and covers) to destination.
+        Args:
+            entry: Entry dictionary
+            source_dir: Source library directory
+            dest_dir: Destination directory
+        """
+        # Copy ebook files
+        for file_path in entry.get('file_paths', []):
+            src_file = source_dir / file_path
+            if src_file.exists():
+                dest_file = dest_dir / Path(file_path).name
+                shutil.copy2(src_file, dest_file)
+        # Copy cover image
+        cover_path = entry.get('cover_path')
+        if cover_path:
+            src_cover = source_dir / cover_path
+            if src_cover.exists():
+                dest_cover = dest_dir / Path(cover_path).name
+                shutil.copy2(src_cover, dest_cover)
+    def symlink_entry_files(self, entry: Dict, source_dir: Path, dest_dir: Path):
+        """
+        Create symlinks to entry files instead of copying.
+        Args:
+            entry: Entry dictionary
+            source_dir: Source library directory
+            dest_dir: Destination directory
+        """
+        # Symlink ebook files
+        for file_path in entry.get('file_paths', []):
+            src_file = source_dir / file_path
+            if src_file.exists():
+                dest_file = dest_dir / Path(file_path).name
+                if not dest_file.exists():
+                    dest_file.symlink_to(src_file.absolute())
+        # Symlink cover image
+        cover_path = entry.get('cover_path')
+        if cover_path:
+            src_cover = source_dir / cover_path
+            if src_cover.exists():
+                dest_cover = dest_dir / Path(cover_path).name
+                if not dest_cover.exists():
+                    dest_cover.symlink_to(src_cover.absolute())
+    def sanitize_filename(self, name: str, max_length: int = 100) -> str:
+        """
+        Sanitize filename to be filesystem-safe.
+        Args:
+            name: Original filename
+            max_length: Maximum length for filename
+        Returns:
+            Sanitized filename
+        """
+        return create_safe_filename(name, max_length=max_length)
+    def get_readable_name(self, entry: Dict) -> str:
+        """
+        Get a human-readable name for an entry.
+        Args:
+            entry: Entry dictionary
+        Returns:
+            Readable name combining title and author
+        """
+        title = entry.get('title', 'Unknown')
+        creators = entry.get('creators', [])
+        if creators:
+            author = creators[0]
+            if len(creators) > 1:
+                author += " et al."
+            return f"{title} - {author}"
+        return title
+    def write_json(self, data: Any, file_path: Path):
+        """
+        Write JSON data to file with proper encoding.
+        Args:
+            data: Data to serialize
+            file_path: Output file path
+        """
+        with open(file_path, 'w', encoding='utf-8') as f:
+            json.dump(data, f, ensure_ascii=False, indent=2)
+    def create_readme(self, output_dir: Path, stats: Dict):
+        """
+        Create a README file with export information.
+        Args:
+            output_dir: Output directory
+            stats: Statistics dictionary
+        """
+        readme_path = output_dir / "README.md"
+        content = f"""# EBK Library Export
+This directory contains an export of an EBK library.
+## Statistics
+- Total entries: {stats.get('total_entries', 0)}
+- Export date: {stats.get('export_date', 'Unknown')}
+- Export type: {stats.get('export_type', 'Unknown')}
+## Structure
+{stats.get('structure_description', 'See directory contents for structure.')}
+---
+Generated by EBK Library Manager
+"""
+        with open(readme_path, 'w', encoding='utf-8') as f:
+            f.write(content)
+    @abstractmethod
+    def export(self, library_path: Path, output_path: Path, **options):
+        """
+        Export the library.
+        This method must be implemented by subclasses.
+        Args:
+            library_path: Path to source library
+            output_path: Path for output
+            **options: Additional export options
+        """
+        pass
+    def validate_export(self) -> bool:
+        """
+        Validate that the export was successful.
+        Returns:
+            True if validation passes
+        """
+        if not self.output_path or not self.output_path.exists():
+            return False
+        # Check if at least some files were created
+        return any(self.output_path.iterdir())

ebk/exports/echo_export.py ADDED Viewed

@@ -0,0 +1,279 @@
+"""
+ECHO format exporter for ebk e-book library.
+Exports library in an ECHO-compliant directory structure with:
+- README.md explaining the archive
+- library.db (SQLite database copy)
+- books.jsonl (one book per line)
+- covers/ directory with cover images
+- by-author/ directory with markdown indexes
+"""
+import json
+import shutil
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import List, Dict, Any, Optional
+def export_echo(
+    library_path: Path,
+    output_dir: Path,
+    db_path: Optional[Path] = None,
+    entries: Optional[List[Dict[str, Any]]] = None,
+    owner_name: str = "Unknown"
+) -> Dict[str, Any]:
+    """
+    Export library to ECHO-compliant directory structure.
+    Args:
+        library_path: Source library path
+        output_dir: Output directory
+        db_path: Path to SQLite database (optional, for copy)
+        entries: List of book entries (if not provided, reads from library)
+        owner_name: Name of archive owner for README
+    Returns:
+        Summary dict with export statistics
+    """
+    output_path = Path(output_dir)
+    output_path.mkdir(parents=True, exist_ok=True)
+    # Load entries if not provided
+    if entries is None:
+        metadata_path = library_path / "metadata.json"
+        if metadata_path.exists():
+            with open(metadata_path, 'r', encoding='utf-8') as f:
+                entries = json.load(f)
+        else:
+            entries = []
+    # Copy database
+    db_included = False
+    if db_path and db_path.exists():
+        shutil.copy2(db_path, output_path / "library.db")
+        db_included = True
+    # Export JSONL
+    jsonl_path = output_path / "books.jsonl"
+    with open(jsonl_path, "w", encoding="utf-8") as f:
+        for entry in entries:
+            record = {
+                "id": entry.get("id"),
+                "title": entry.get("title", "Unknown"),
+                "creators": entry.get("creators", []),
+                "language": entry.get("language"),
+                "publisher": entry.get("publisher"),
+                "published_date": entry.get("published_date"),
+                "isbn": entry.get("isbn"),
+                "subjects": entry.get("subjects", []),
+                "description": entry.get("description"),
+                "file_paths": entry.get("file_paths", []),
+                "file_formats": entry.get("file_formats", []),
+                "cover_path": entry.get("cover_path"),
+                "added_at": entry.get("added_at"),
+                "status": entry.get("status"),
+                "rating": entry.get("rating"),
+                "favorite": entry.get("favorite"),
+                "tags": entry.get("tags", []),
+            }
+            f.write(json.dumps(record, ensure_ascii=False) + "\n")
+    # Copy covers
+    covers_dir = output_path / "covers"
+    covers_dir.mkdir(exist_ok=True)
+    covers_copied = 0
+    for entry in entries:
+        cover_path = entry.get("cover_path")
+        if cover_path:
+            src_cover = library_path / cover_path
+            if src_cover.exists():
+                # Use entry ID as filename
+                entry_id = entry.get("id", "unknown")
+                suffix = src_cover.suffix or ".jpg"
+                dest_cover = covers_dir / f"{entry_id}{suffix}"
+                shutil.copy2(src_cover, dest_cover)
+                covers_copied += 1
+    # Create by-author index
+    by_author_dir = output_path / "by-author"
+    by_author_dir.mkdir(exist_ok=True)
+    author_books = {}
+    for entry in entries:
+        creators = entry.get("creators", [])
+        if not creators:
+            creators = ["Unknown"]
+        for author in creators:
+            if author not in author_books:
+                author_books[author] = []
+            author_books[author].append(entry)
+    for author, books in sorted(author_books.items()):
+        # Create safe filename
+        safe_name = "".join(c if c.isalnum() or c in " -_" else "_" for c in author)
+        safe_name = safe_name[:100].strip()
+        if not safe_name:
+            safe_name = "unknown"
+        md_path = by_author_dir / f"{safe_name}.md"
+        lines = [f"# {author}", "", f"Books by {author} ({len(books)} total)", ""]
+        for book in sorted(books, key=lambda x: x.get("title", "")):
+            title = book.get("title", "Unknown")
+            year = ""
+            pub_date = book.get("published_date")
+            if pub_date:
+                year = f" ({pub_date[:4]})" if len(pub_date) >= 4 else ""
+            lines.append(f"## {title}{year}")
+            lines.append("")
+            if book.get("description"):
+                desc = book["description"]
+                if len(desc) > 300:
+                    desc = desc[:297] + "..."
+                lines.append(desc)
+                lines.append("")
+            formats = book.get("file_formats", [])
+            if formats:
+                lines.append(f"Formats: {', '.join(formats)}")
+                lines.append("")
+        with open(md_path, "w", encoding="utf-8") as f:
+            f.write("\n".join(lines))
+    # Generate README
+    readme_content = _generate_echo_readme(
+        owner_name=owner_name,
+        total_books=len(entries),
+        total_authors=len(author_books),
+        covers_included=covers_copied,
+        db_included=db_included
+    )
+    (output_path / "README.md").write_text(readme_content, encoding="utf-8")
+    return {
+        "total_exported": len(entries),
+        "covers_copied": covers_copied,
+        "authors": len(author_books),
+        "db_included": db_included,
+        "output_dir": str(output_path)
+    }
+def _generate_echo_readme(
+    owner_name: str,
+    total_books: int,
+    total_authors: int,
+    covers_included: int,
+    db_included: bool
+) -> str:
+    """Generate ECHO-compliant README for ebook archive."""
+    db_section = ""
+    if db_included:
+        db_section = """
+### SQLite Database
+The `library.db` file is a copy of the source database.
+Key tables:
+- `books`: id, title, language, publisher, published_date, isbn, ...
+- `authors`: id, name
+- `book_authors`: book_id, author_id (many-to-many)
+- `subjects`: id, name
+- `book_subjects`: book_id, subject_id
+Query examples:
+```sql
+-- List all books
+sqlite3 library.db "SELECT title, published_date FROM books ORDER BY title"
+-- Books by author
+sqlite3 library.db "SELECT b.title FROM books b
+  JOIN book_authors ba ON b.id = ba.book_id
+  JOIN authors a ON ba.author_id = a.id
+  WHERE a.name LIKE '%Tolkien%'"
+```
+"""
+    return f"""# E-Book Library Archive
+{owner_name}'s e-book collection.
+Exported: {datetime.now(timezone.utc).strftime('%Y-%m-%d')}
+Total books: {total_books}
+Total authors: {total_authors}
+Covers included: {covers_included}
+## Format
+This is an ECHO-compliant archive. All data is in durable, open formats.
+### Directory Structure
+```
+├── README.md            # This file
+├── books.jsonl          # One book per line
+{"├── library.db          # SQLite database" if db_included else ""}
+├── covers/              # Cover images
+│   └── {{id}}.jpg
+└── by-author/           # Markdown index by author
+    ├── author-name.md
+    └── ...
+```
+### books.jsonl
+Each line is a JSON object:
+```json
+{{"id": "...", "title": "...", "creators": ["..."], "subjects": [...], ...}}
+```
+Fields:
+- `id`: Unique identifier
+- `title`: Book title
+- `creators`: Array of author names
+- `language`: ISO language code
+- `publisher`: Publisher name
+- `published_date`: Publication date
+- `isbn`: ISBN (if available)
+- `subjects`: Array of subject/genre tags
+- `description`: Book description
+- `file_paths`: Relative paths to ebook files
+- `file_formats`: Array of formats (epub, pdf, etc.)
+- `cover_path`: Relative path to cover image
+- `status`: Reading status (read, reading, to-read)
+- `rating`: User rating (1-5)
+- `favorite`: Boolean
+- `tags`: User tags
+{db_section}
+### covers/ Directory
+Cover images named by book ID. Original format preserved.
+### by-author/ Directory
+Markdown files for each author listing their books.
+## Exploring
+1. **Browse authors**: Look in `by-author/` directory
+2. **Search**: `grep -l "search term" by-author/*.md`
+3. **Parse**: Process `books.jsonl` with any JSON tool
+4. **Query**: Use SQLite browser on `library.db` (if included)
+5. **View covers**: Browse `covers/` directory
+## About ECHO
+ECHO is a philosophy for durable personal data archives.
+Learn more: https://github.com/alextowell/longecho
+---
+*Generated by ebk (E-Book Library Manager)*
+"""