PyPI - ebk - Versions diffs - 0.1.0__py3-none-any.whl → 0.3.1__py3-none-any.whl - Mend

ebk 0.1.0py3-none-any.whl → 0.3.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ebk might be problematic. Click here for more details.

Files changed (41) hide show

ebk/__init__.py +35 -0
ebk/cli.py +1724 -664
ebk/config.py +260 -22
ebk/decorators.py +132 -0
ebk/extract_metadata.py +76 -7
ebk/library_db.py +744 -0
ebk/plugins/__init__.py +42 -0
ebk/plugins/base.py +502 -0
ebk/plugins/hooks.py +444 -0
ebk/plugins/registry.py +500 -0
ebk/search_parser.py +413 -0
ebk/server.py +1633 -0
ebk-0.3.1.dist-info/METADATA +755 -0
ebk-0.3.1.dist-info/RECORD +19 -0
{ebk-0.1.0.dist-info → ebk-0.3.1.dist-info}/WHEEL +1 -1
ebk-0.3.1.dist-info/entry_points.txt +6 -0
ebk-0.3.1.dist-info/licenses/LICENSE +21 -0
ebk-0.3.1.dist-info/top_level.txt +2 -0
ebk/exports/__init__.py +0 -0
ebk/exports/hugo.py +0 -55
ebk/exports/zip.py +0 -25
ebk/imports/__init__.py +0 -0
ebk/imports/calibre.py +0 -144
ebk/imports/ebooks.py +0 -116
ebk/llm.py +0 -58
ebk/manager.py +0 -44
ebk/merge.py +0 -308
ebk/streamlit/__init__.py +0 -0
ebk/streamlit/__pycache__/__init__.cpython-310.pyc +0 -0
ebk/streamlit/__pycache__/display.cpython-310.pyc +0 -0
ebk/streamlit/__pycache__/filters.cpython-310.pyc +0 -0
ebk/streamlit/__pycache__/utils.cpython-310.pyc +0 -0
ebk/streamlit/app.py +0 -185
ebk/streamlit/display.py +0 -168
ebk/streamlit/filters.py +0 -151
ebk/streamlit/utils.py +0 -58
ebk/utils.py +0 -311
ebk-0.1.0.dist-info/METADATA +0 -457
ebk-0.1.0.dist-info/RECORD +0 -29
ebk-0.1.0.dist-info/entry_points.txt +0 -2
ebk-0.1.0.dist-info/top_level.txt +0 -1

ebk/streamlit/filters.py DELETED Viewed

@@ -1,151 +0,0 @@
-import pandas as pd
-import streamlit as st
-import logging
-logger = logging.getLogger(__name__)
-def sanitize_dataframe(df: pd.DataFrame) -> pd.DataFrame:
-    """
-    Sanitizes the DataFrame by ensuring correct data types and handling missing values.
-    """
-    # List of columns that should contain lists
-    list_columns = ['creators', 'subjects', 'file_paths']
-    def ensure_list(column):
-        """
-        Ensures that each entry in the column is a list. If not, replaces it with an empty list.
-        """
-        return column.apply(lambda x: x if isinstance(x, list) else [])
-    for col in list_columns:
-        if col in df.columns:
-            df[col] = ensure_list(df[col])
-            logger.debug(f"Processed list column: {col}")
-        else:
-            df[col] = [[] for _ in range(len(df))]
-            logger.debug(f"Created empty list column: {col}")
-    # Handle 'identifiers' column
-    if 'identifiers' in df.columns:
-        df['identifiers'] = df['identifiers'].apply(lambda x: x if isinstance(x, dict) else {})
-        logger.debug("Sanitized 'identifiers' column.")
-    else:
-        df['identifiers'] = [{} for _ in range(len(df))]
-        logger.debug("Created empty 'identifiers' column.")
-    # Sanitize 'language' column
-    if 'language' in df.columns:
-        df['language'] = df['language'].apply(lambda x: x if isinstance(x, str) else '').fillna('').astype(str)
-        logger.debug("Sanitized 'language' column.")
-    else:
-        df['language'] = ['' for _ in range(len(df))]
-        logger.debug("Created empty 'language' column.")
-    # Sanitize 'cover_path' column
-    if 'cover_path' in df.columns:
-        df['cover_path'] = df['cover_path'].apply(lambda x: x if isinstance(x, str) else '').fillna('').astype(str)
-        logger.debug("Sanitized 'cover_path' column.")
-    else:
-        df['cover_path'] = ['' for _ in range(len(df))]
-        logger.debug("Created empty 'cover_path' column.")
-    # Sanitize string fields: 'title', 'description'
-    string_fields = ['title', 'description']
-    for field in string_fields:
-        if field in df.columns:
-            df[field] = df[field].apply(lambda x: x if isinstance(x, str) else '').fillna('').astype(str)
-            logger.debug(f"Sanitized '{field}' column.")
-        else:
-            df[field] = ['' for _ in range(len(df))]
-            logger.debug(f"Created empty '{field}' column.")
-    # Sanitize 'date' column
-    if 'date' in df.columns:
-        df['date'] = pd.to_numeric(df['date'], errors='coerce')
-        logger.debug("Sanitized 'date' column to ensure numeric types.")
-    else:
-        df['date'] = [None for _ in range(len(df))]
-        logger.debug("Created empty 'date' column.")
-    return df
-def create_filters(df: pd.DataFrame) -> pd.DataFrame:
-    """
-    Creates and applies advanced filters to the DataFrame based on user inputs.
-    Returns the filtered DataFrame.
-    """
-    # Sidebar for Filters
-    st.sidebar.header("🔍 Filters")
-    # Title Search
-    title_search = st.sidebar.text_input("🔎 Search by Title")
-    # Author Filter (Multi-select)
-    all_creators = sorted(set(creator for creators in df['creators'] for creator in creators))
-    selected_authors = st.sidebar.multiselect("👤 Filter by Author(s)", all_creators, default=[])
-    # Subjects Filter (Multi-select)
-    all_subjects = sorted(set(subject for subjects in df['subjects'] for subject in subjects))
-    selected_subjects = st.sidebar.multiselect("📚 Filter by Subject(s)", all_subjects, default=[])
-    # Search by Various Libraries
-    all_libraries = sorted(set(lib for libs in df['virtual_libs'] for lib in libs))
-    selected_libraries = st.sidebar.multiselect("📚 Filter by Virtual Library(s)", all_libraries, default=[])
-    # Language Filter (Multi-select)
-    all_languages = sorted(set(lang for lang in df['language'] if lang))
-    selected_languages = st.sidebar.multiselect("🌐 Filter by Language(s)", all_languages, default=[])
-    # Publication Date Filter (Range Slider)
-    selected_years = None
-    if 'date' in df.columns and pd.api.types.is_numeric_dtype(df['date']):
-        min_year = int(df['date'].min()) if pd.notna(df['date'].min()) else 0
-        max_year = int(df['date'].max()) if pd.notna(df['date'].max()) else 0
-        if min_year and max_year:
-            selected_years = st.sidebar.slider("📅 Publication Year Range", min_year, max_year, (min_year, max_year))
-            logger.debug(f"Publication year range selected: {selected_years}")
-        else:
-            st.sidebar.info("📅 No valid publication year data available.")
-            logger.warning("Publication year data is not available or entirely NaN.")
-    else:
-        st.sidebar.info("📅 Publication date data is not available or not in a numeric format.")
-        logger.warning("Publication date data is not available or not numeric.")
-    # Identifier Search
-    identifier_search = st.sidebar.text_input("🔑 Search by Identifier (e.g., ISBN)")
-    # Apply Filters
-    filtered_df = df.copy()
-    if title_search:
-        filtered_df = filtered_df[filtered_df['title'].str.contains(title_search, case=False, na=False)]
-        logger.debug(f"Applied title search filter: '{title_search}'")
-    if selected_authors:
-        filtered_df = filtered_df[filtered_df['creators'].apply(lambda x: any(creator in selected_authors for creator in x))]
-        logger.debug(f"Applied author filter: {selected_authors}")
-    if selected_subjects:
-        filtered_df = filtered_df[filtered_df['subjects'].apply(lambda x: any(subject in selected_subjects for subject in x))]
-        logger.debug(f"Applied subject filter: {selected_subjects}")
-    if selected_libraries:
-        filtered_df = filtered_df[filtered_df['virtual_libs'].apply(lambda x: any(lib in selected_libraries for lib in x))]
-        logger.debug(f"Applied library filter: {selected_libraries}")
-    if selected_languages:
-        filtered_df = filtered_df[filtered_df['language'].isin(selected_languages)]
-        logger.debug(f"Applied language filter: {selected_languages}")
-    if selected_years:
-        filtered_df = filtered_df[(filtered_df['date'] >= selected_years[0]) & (filtered_df['date'] <= selected_years[1])]
-        logger.debug(f"Applied publication year range filter: {selected_years}")
-    if identifier_search:
-        idents = filtered_df['identifiers']
-        idents_stringified = idents.apply(
-            lambda x: ' '.join(f"{k}:{v}" for k, v in x.items()) if isinstance(x, dict) else str(x)
-        )
-        filtered_df = filtered_df[idents_stringified.str.contains(identifier_search)]
-    return filtered_df

ebk/streamlit/utils.py DELETED Viewed

@@ -1,58 +0,0 @@
-import json
-import os
-import zipfile
-from io import BytesIO
-import streamlit as st
-import logging
-import streamlit as st
-from typing import List, Dict
-from collections import Counter
-from pathlib import Path
-logger = logging.getLogger(__name__)
-def load_metadata(metadata_content: BytesIO) -> list:
-    """
-    Loads metadata from the uploaded JSON file.
-    Returns a list of dictionaries.
-    """
-    try:
-        data = json.load(metadata_content)
-        logger.debug("Metadata loaded successfully.")
-        return data
-    except json.JSONDecodeError as e:
-        st.error(f"JSON decoding error: {e}")
-        logger.error(f"JSONDecodeError: {e}")
-        return []
-    except Exception as e:
-        st.error(f"Unexpected error loading metadata.json: {e}")
-        logger.error(f"Unexpected error: {e}")
-        return []
-def extract_zip(zip_bytes: BytesIO) -> dict:
-    """
-    Extracts a ZIP file in-memory and returns a dictionary of its contents.
-    Keys are file names, and values are BytesIO objects containing the file data.
-    """
-    extracted_files = {}
-    try:
-        with zipfile.ZipFile(zip_bytes) as z:
-            for file_info in z.infolist():
-                if not file_info.is_dir():
-                    with z.open(file_info) as f:
-                        normalized_path = os.path.normpath(file_info.filename)
-                        # Prevent path traversal
-                        if os.path.commonprefix([normalized_path, os.path.basename(normalized_path)]) != "":
-                            extracted_files[normalized_path] = BytesIO(f.read())
-                            logger.debug(f"Extracted: {normalized_path}")
-        logger.debug("ZIP archive extracted successfully.")
-        return extracted_files
-    except zipfile.BadZipFile:
-        st.error("The uploaded file is not a valid ZIP archive.")
-        logger.error("BadZipFile encountered.")
-        return {}
-    except Exception as e:
-        st.error(f"Error extracting ZIP file: {e}")
-        logger.error(f"Exception during ZIP extraction: {e}")
-        return {}

ebk/utils.py DELETED Viewed

@@ -1,311 +0,0 @@
-import json
-import os
-from collections import Counter
-from pathlib import Path
-from typing import List, Dict, Optional
-import logging
-from jmespath import search as jmes_search
-import sys
-from rich.console import Console
-from rich.table import Table
-from rich.markdown import Markdown
-from rich import print
-import re
-RICH_AVAILABLE = True
-logger = logging.getLogger(__name__)
-def search_jmes(lib_dir: str, expression: str):
-    """
-    Search entries in an ebk library using a JMESPath expression. This is a
-    very flexible way to search for entries in the library, but may have a
-    steep learning curve.
-    Args:
-        lib_dir (str): Path to the ebk library directory
-        expression (str): Search expression (JMESPath)
-    Returns:
-        Any: Result of the JMESPath search
-    """
-    library = load_library(lib_dir)
-    if not library:
-        logger.error(f"Failed to load the library at {lib_dir}")
-        return []
-    result = jmes_search(expression, library)
-    return result
-def search_regex(lib_dir: str, expression: str, fields: List[str] = ["title"]):
-    library = load_library(lib_dir)
-    results = []
-    for entry in library:
-        for key, value in entry.items():
-            if key in fields and value:
-                if isinstance(value, str) and re.search(expression, value):
-                    results.append(entry)
-                    break
-    return results
-def load_library(lib_dir: str) -> List[Dict]:
-    """
-    Load an ebk library from the specified directory.
-    Args:
-        lib_dir (str): Path to the ebk library directory
-    Returns:
-        List[Dict]: List of entries in the library
-    """
-    lib_dir = Path(lib_dir)
-    metadata_path = lib_dir / "metadata.json"
-    if not metadata_path.exists():
-        logger.error(f"Metadata file not found at {metadata_path}")
-        return []
-    with open(metadata_path, "r") as f:
-        try:
-            library = json.load(f)
-            return library
-        except json.JSONDecodeError as e:
-            logger.error(f"Error decoding JSON from {metadata_path}: {e}")
-            return []
-def get_library_statistics(lib_dir: str,
-                           keywords: List[str] = None) -> Dict:
-    """
-    Compute statistics for an ebk library.
-    Args:
-        lib_dir (str): Path to the ebk library directory.
-        keywords (List[str]): Keywords to search for in titles (default: None).
-    Returns:
-        dict: A dictionary or markdown with statistics about the library.
-    """
-    # Load the library
-    library = load_library(lib_dir)
-    if not library:
-        logger.error(f"Failed to load the library at {lib_dir}")
-        return {}
-    # Initialize counters and statistics
-    stats = {
-        "total_entries": 0,
-        "languages": Counter(),
-        "creators_count": 0,
-        "average_creators_per_entry": 0,
-        "most_creators_in_entry": 0,
-        "least_creators_in_entry": 0,
-        "top_creators": Counter(),
-        "subjects": Counter(),
-        "most_common_subjects": [],
-        "average_title_length": 0,
-        "longest_title": "",
-        "shortest_title": "",
-        "virtual_libs": Counter(),
-        "titles_with_keywords": Counter(),
-    }
-    title_lengths = []
-    for entry in library:
-        # Total entries
-        stats["total_entries"] += 1
-        # Languages
-        language = entry.get("language", "unknown")
-        stats["languages"][language] += 1
-        # Creators
-        creators = entry.get("creators", [])
-        stats["creators_count"] += len(creators)
-        stats["top_creators"].update(creators)
-        stats["most_creators_in_entry"] = max(stats["most_creators_in_entry"], len(creators))
-        if stats["least_creators_in_entry"] == 0 or len(creators) < stats["least_creators_in_entry"]:
-            stats["least_creators_in_entry"] = len(creators)
-        # Subjects
-        subjects = entry.get("subjects", [])
-        stats["subjects"].update(subjects)
-        # Titles
-        title = entry.get("title", "")
-        if title:
-            title_lengths.append(len(title))
-            if len(title) > len(stats["longest_title"]):
-                stats["longest_title"] = title
-            if not stats["shortest_title"] or len(title) < len(stats["shortest_title"]):
-                stats["shortest_title"] = title
-        # Keywords
-        for keyword in keywords:
-            if keyword.lower() in title.lower():
-                stats["titles_with_keywords"][keyword] += 1
-        # Virtual Libraries
-        virtual_libs = entry.get("virtual_libs", [])
-        stats["virtual_libs"].update(virtual_libs)
-    # Post-process statistics
-    stats["average_creators_per_entry"] = round(stats["creators_count"] / stats["total_entries"], 2)
-    stats["average_title_length"] = round(sum(title_lengths) / len(title_lengths), 2) if title_lengths else 0
-    stats["most_common_subjects"] = stats["subjects"].most_common(5)
-    stats["languages"] = dict(stats["languages"])
-    stats["top_creators"] = dict(stats["top_creators"].most_common(5))
-    stats["titles_with_keywords"] = dict(stats["titles_with_keywords"])
-    stats["virtual_libs"] = dict(stats["virtual_libs"])
-    return stats
-def get_unique_filename(target_path: str) -> str:
-    """
-    If target_path already exists, generate a new path with (1), (2), etc.
-    Otherwise just return target_path.
-    Example:
-       'myfile.pdf' -> if it exists -> 'myfile (1).pdf' -> if that exists -> 'myfile (2).pdf'
-    """
-    if not os.path.exists(target_path):
-        return target_path
-    base, ext = os.path.splitext(target_path)
-    counter = 1
-    new_path = f"{base} ({counter}){ext}"
-    while os.path.exists(new_path):
-        counter += 1
-        new_path = f"{base} ({counter}){ext}"
-    return new_path
-def enumerate_ebooks(metadata_list: List[Dict],
-                     lib_path: Path,
-                     indices: Optional[List[int]] = None,
-                     detailed: Optional[bool] = False) -> None:
-    """
-    Enumerates and displays the ebooks in the specified library directory.
-    For each ebook, displays its index, title, creators, and a clickable link to the first PDF file.
-    Args:
-        metadata_list (List[Dict]): List of metadata dictionaries for each ebook.
-        indices (List[int]): List of indices to display (default: None).
-    """
-    console = Console()
-    total_books = len(metadata_list)
-    if total_books == 0:
-        console.print("[yellow]No ebooks found in the library.[/yellow]")
-        return
-    if indices is None:
-        indices = range(total_books)
-    console.print(f"📚 [bold]Found {total_books} ebook(s) in the library:[/bold]\n")
-    table = Table(show_header=True, header_style="bold magenta")
-    table.add_column("#", style="dim")
-    table.add_column("Title")
-    table.add_column("Creators")
-    table.add_column("Link")
-    if detailed:
-        table.add_column("Subjects")
-        table.add_column("Language")
-        table.add_column("Date")
-        table.add_column("Identifiers")
-        table.add_column("Publisher")
-        table.add_column("File Size")
-        table.add_column("Virtual Libraries")
-        table.add_column("UID")
-    for i, book in enumerate(metadata_list, start=0):
-        if i not in indices:
-            continue
-        title = book.get('title', '-')
-        creators = book.get('creators', ['-'])
-        if not isinstance(creators, list):
-            creators = [str(creators)]
-        creators_str = ', '.join(creators)
-        ebook_paths = book.get('file_paths', [])
-        ebook_path = ebook_paths[0] if ebook_paths else None
-        if ebook_path:
-            ebook_full_path = lib_path / ebook_path
-            if ebook_full_path.exists():
-                # Resolve the path to an absolute path
-                resolved_path = ebook_full_path.resolve()
-                # Convert Windows paths to URL format if necessary
-                if sys.platform.startswith('win'):
-                    ebook_link = resolved_path.as_uri()
-                else:
-                    ebook_link = f"file://{resolved_path}"
-                link_display = f"[link={ebook_link}]🔗 Open[/link]"
-            else:
-                ebook_link = "File not found"
-                link_display = "[red]🔗 Not Found[/red]"
-        else:
-            ebook_link = "Unknown"
-            link_display = "[red]🔗 Unknown[/red]"
-        table.add_row(str(i), title, creators_str, link_display)
-    console.print(table)
-    console.print("\n")  # Add some spacing
-def get_index_by_unique_id(lib_dir: str, id: str) -> int:
-    """
-    Get the index of an entry in the library by its unique ID.
-    Args:
-        lib_dir (str): Path to the ebk library directory.
-        id (str): Unique ID to search for.
-    Returns:
-        int: Index of the entry with the specified unique ID. -1 if not found.
-    Raises:
-        ValueError: If the library cannot be loaded.
-    """
-    library = load_library(lib_dir)
-    if not library:
-       raise ValueError("Failed to load the library.")
-    for i, entry in enumerate(library):
-        if entry.get('unique_id') == id:
-            return i
-    return -1
-def print_json_as_table(data):
-    """
-    Pretty print JSON data as a table using Rich.
-    Args:
-        data: JSON data to print
-    """
-    if not RICH_AVAILABLE:
-        print(json.dumps(data, indent=2))
-        return
-    if isinstance(data, dict):
-        table = Table(show_header=True, header_style="bold magenta")
-        table.add_column("Key", style="dim", width=20)
-        table.add_column("Value", width=80)
-        for key, value in data.items():
-            table.add_row(str(key), str(value))
-        console = Console()
-        console.print(table)
-    else:
-        print(data)

ebk 0.1.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

Potentially problematic release.

ebk 0.1.0py3-none-any.whl → 0.3.1py3-none-any.whl