PyPI - ebk - Versions diffs - 0.1.0__py3-none-any.whl → 0.3.2__py3-none-any.whl - Mend

ebk 0.1.0py3-none-any.whl → 0.3.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ebk might be problematic. Click here for more details.

Files changed (84) hide show

ebk/__init__.py +35 -0
ebk/ai/__init__.py +23 -0
ebk/ai/knowledge_graph.py +443 -0
ebk/ai/llm_providers/__init__.py +21 -0
ebk/ai/llm_providers/base.py +230 -0
ebk/ai/llm_providers/ollama.py +362 -0
ebk/ai/metadata_enrichment.py +396 -0
ebk/ai/question_generator.py +328 -0
ebk/ai/reading_companion.py +224 -0
ebk/ai/semantic_search.py +434 -0
ebk/ai/text_extractor.py +394 -0
ebk/cli.py +2828 -680
ebk/config.py +260 -22
ebk/db/__init__.py +37 -0
ebk/db/migrations.py +180 -0
ebk/db/models.py +526 -0
ebk/db/session.py +144 -0
ebk/decorators.py +132 -0
ebk/exports/base_exporter.py +218 -0
ebk/exports/html_library.py +1390 -0
ebk/exports/html_utils.py +117 -0
ebk/exports/hugo.py +7 -3
ebk/exports/jinja_export.py +287 -0
ebk/exports/multi_facet_export.py +164 -0
ebk/exports/symlink_dag.py +479 -0
ebk/extract_metadata.py +76 -7
ebk/library_db.py +899 -0
ebk/plugins/__init__.py +42 -0
ebk/plugins/base.py +502 -0
ebk/plugins/hooks.py +444 -0
ebk/plugins/registry.py +500 -0
ebk/repl/__init__.py +9 -0
ebk/repl/find.py +126 -0
ebk/repl/grep.py +174 -0
ebk/repl/shell.py +1677 -0
ebk/repl/text_utils.py +320 -0
ebk/search_parser.py +413 -0
ebk/server.py +1633 -0
ebk/services/__init__.py +11 -0
ebk/services/import_service.py +442 -0
ebk/services/tag_service.py +282 -0
ebk/services/text_extraction.py +317 -0
ebk/similarity/__init__.py +77 -0
ebk/similarity/base.py +154 -0
ebk/similarity/core.py +445 -0
ebk/similarity/extractors.py +168 -0
ebk/similarity/metrics.py +376 -0
ebk/vfs/__init__.py +101 -0
ebk/vfs/base.py +301 -0
ebk/vfs/library_vfs.py +124 -0
ebk/vfs/nodes/__init__.py +54 -0
ebk/vfs/nodes/authors.py +196 -0
ebk/vfs/nodes/books.py +480 -0
ebk/vfs/nodes/files.py +155 -0
ebk/vfs/nodes/metadata.py +385 -0
ebk/vfs/nodes/root.py +100 -0
ebk/vfs/nodes/similar.py +165 -0
ebk/vfs/nodes/subjects.py +184 -0
ebk/vfs/nodes/tags.py +371 -0
ebk/vfs/resolver.py +228 -0
ebk-0.3.2.dist-info/METADATA +755 -0
ebk-0.3.2.dist-info/RECORD +69 -0
{ebk-0.1.0.dist-info → ebk-0.3.2.dist-info}/WHEEL +1 -1
ebk-0.3.2.dist-info/licenses/LICENSE +21 -0
ebk/imports/__init__.py +0 -0
ebk/imports/calibre.py +0 -144
ebk/imports/ebooks.py +0 -116
ebk/llm.py +0 -58
ebk/manager.py +0 -44
ebk/merge.py +0 -308
ebk/streamlit/__init__.py +0 -0
ebk/streamlit/__pycache__/__init__.cpython-310.pyc +0 -0
ebk/streamlit/__pycache__/display.cpython-310.pyc +0 -0
ebk/streamlit/__pycache__/filters.cpython-310.pyc +0 -0
ebk/streamlit/__pycache__/utils.cpython-310.pyc +0 -0
ebk/streamlit/app.py +0 -185
ebk/streamlit/display.py +0 -168
ebk/streamlit/filters.py +0 -151
ebk/streamlit/utils.py +0 -58
ebk/utils.py +0 -311
ebk-0.1.0.dist-info/METADATA +0 -457
ebk-0.1.0.dist-info/RECORD +0 -29
{ebk-0.1.0.dist-info → ebk-0.3.2.dist-info}/entry_points.txt +0 -0
{ebk-0.1.0.dist-info → ebk-0.3.2.dist-info}/top_level.txt +0 -0

ebk/search_parser.py ADDED Viewed

@@ -0,0 +1,413 @@
+"""
+Advanced search query parser for ebk.
+Supports field-specific searches, boolean logic, and comparison operators.
+Examples:
+    title:Python rating:>=4 format:pdf
+    author:"Donald Knuth" series:TAOCP
+    tag:programming favorite:true NOT java
+    "machine learning" OR "deep learning"
+"""
+import re
+from typing import List, Dict, Any, Optional, Tuple
+from dataclasses import dataclass, field
+@dataclass
+class SearchToken:
+    """Represents a single search token."""
+    type: str  # 'field', 'text', 'operator', 'phrase'
+    value: str
+    field: Optional[str] = None
+    operator: Optional[str] = None  # For comparisons: '=', '>', '>=', '<', '<=', '-' (range)
+    negated: bool = False
+@dataclass
+class ParsedQuery:
+    """Parsed search query with structured tokens."""
+    tokens: List[SearchToken] = field(default_factory=list)
+    fts_query: Optional[str] = None  # Combined FTS query for title/description/text
+    filters: Dict[str, Any] = field(default_factory=dict)  # Exact filters (language, format, etc.)
+    def has_fts_terms(self) -> bool:
+        """Check if query has full-text search terms."""
+        return bool(self.fts_query)
+    def has_filters(self) -> bool:
+        """Check if query has filter conditions."""
+        return bool(self.filters)
+class SearchQueryParser:
+    """
+    Parser for advanced search queries with field specifiers and boolean logic.
+    Syntax:
+        - Field searches: field:value (e.g., title:Python, author:Knuth)
+        - Phrases: "quoted text" (e.g., "machine learning")
+        - Boolean: AND (implicit), OR (explicit), NOT/-prefix (negation)
+        - Comparisons: rating:>=4, rating:3-5
+        - Multiple fields: title:python format:pdf (implicit AND)
+    Field mappings:
+        - title: Book title
+        - author: Author names
+        - tag/subject: Subjects/tags
+        - description: Book description
+        - series: Series name
+        - publisher: Publisher name
+        - language: Language code (exact match)
+        - format: File format (exact match)
+        - rating: Personal rating (numeric comparison)
+        - favorite: Favorite status (boolean)
+        - status: Reading status (exact match)
+    """
+    # Field aliases
+    FIELD_ALIASES = {
+        'tag': 'subject',
+        'tags': 'subject',
+        'subjects': 'subject',
+        'lang': 'language',
+        'fmt': 'format',
+        'type': 'format',
+    }
+    # Fields that support FTS (full-text search)
+    FTS_FIELDS = {'title', 'description', 'text', 'author', 'subject'}
+    # Fields that are exact filters (not FTS)
+    FILTER_FIELDS = {'language', 'format', 'series', 'publisher', 'rating', 'favorite', 'status'}
+    # Numeric fields that support comparison operators
+    NUMERIC_FIELDS = {'rating'}
+    # Boolean fields
+    BOOLEAN_FIELDS = {'favorite'}
+    def __init__(self):
+        # Regex patterns
+        self.field_pattern = re.compile(r'(\w+):(>=|<=|>|<|=)?("[^"]+"|[\S]+)')
+        self.phrase_pattern = re.compile(r'"([^"]+)"')
+        self.operator_pattern = re.compile(r'\b(AND|OR|NOT)\b', re.IGNORECASE)
+    def parse(self, query: str) -> ParsedQuery:
+        """
+        Parse search query into structured format.
+        Args:
+            query: Search query string
+        Returns:
+            ParsedQuery with tokens, FTS query, and filters
+        """
+        if not query or not query.strip():
+            return ParsedQuery()
+        query = query.strip()
+        tokens = []
+        remaining_text = []
+        pos = 0
+        # Track OR groups for FTS
+        or_groups = []
+        current_or_group = []
+        while pos < len(query):
+            # Skip whitespace
+            if query[pos].isspace():
+                pos += 1
+                continue
+            # Check for NOT operator or -prefix
+            negated = False
+            if query[pos:pos+4].upper() == 'NOT ' or query[pos] == '-':
+                negated = True
+                if query[pos] == '-':
+                    pos += 1
+                else:
+                    pos += 4
+                while pos < len(query) and query[pos].isspace():
+                    pos += 1
+            # Check for OR operator
+            if query[pos:pos+3].upper() == 'OR ':
+                tokens.append(SearchToken(type='operator', value='OR'))
+                pos += 3
+                continue
+            # Check for AND operator (usually implicit, but can be explicit)
+            if query[pos:pos+4].upper() == 'AND ':
+                tokens.append(SearchToken(type='operator', value='AND'))
+                pos += 4
+                continue
+            # Try to match field:value
+            field_match = self.field_pattern.match(query, pos)
+            if field_match:
+                field_name = field_match.group(1).lower()
+                operator = field_match.group(2) or '='
+                value = field_match.group(3).strip('"')
+                # Apply field aliases
+                field_name = self.FIELD_ALIASES.get(field_name, field_name)
+                tokens.append(SearchToken(
+                    type='field',
+                    field=field_name,
+                    value=value,
+                    operator=operator,
+                    negated=negated
+                ))
+                pos = field_match.end()
+                continue
+            # Try to match quoted phrase
+            phrase_match = self.phrase_pattern.match(query, pos)
+            if phrase_match:
+                phrase = phrase_match.group(1)
+                tokens.append(SearchToken(
+                    type='phrase',
+                    value=phrase,
+                    negated=negated
+                ))
+                pos = phrase_match.end()
+                continue
+            # Match single word
+            end_pos = pos
+            while end_pos < len(query) and not query[end_pos].isspace():
+                end_pos += 1
+            if end_pos > pos:
+                word = query[pos:end_pos]
+                tokens.append(SearchToken(
+                    type='text',
+                    value=word,
+                    negated=negated
+                ))
+                pos = end_pos
+                continue
+            pos += 1
+        # Build ParsedQuery from tokens
+        parsed = ParsedQuery(tokens=tokens)
+        self._build_fts_and_filters(parsed)
+        return parsed
+    def _build_fts_and_filters(self, parsed: ParsedQuery):
+        """
+        Build FTS query and filters from parsed tokens.
+        Modifies parsed query in place.
+        """
+        fts_parts = []  # Parts for FTS5 query
+        filters = {}
+        i = 0
+        while i < len(parsed.tokens):
+            token = parsed.tokens[i]
+            if token.type == 'operator':
+                # Add OR operator to FTS query
+                if token.value == 'OR' and fts_parts:
+                    fts_parts.append('OR')
+                i += 1
+                continue
+            if token.type == 'field':
+                field = token.field
+                value = token.value
+                operator = token.operator
+                # Handle FTS fields
+                if field in self.FTS_FIELDS:
+                    # Build FTS query with field prefix
+                    if field == 'subject':
+                        # Subjects are handled separately (join table)
+                        if 'subjects' not in filters:
+                            filters['subjects'] = []
+                        filters['subjects'].append((value, token.negated))
+                    elif field == 'author':
+                        # Authors are not in FTS table, handle via SQL join
+                        if 'authors' not in filters:
+                            filters['authors'] = []
+                        filters['authors'].append((value, token.negated))
+                    else:
+                        # title, description, text - these ARE in FTS table
+                        # Map 'text' to 'extracted_text' column name
+                        fts_column = 'extracted_text' if field == 'text' else field
+                        # Build FTS5 column-specific query
+                        fts_term = f"{fts_column}:{value}"
+                        if token.negated:
+                            fts_term = f"NOT {fts_term}"
+                        fts_parts.append(fts_term)
+                # Handle exact filter fields
+                elif field in self.FILTER_FIELDS:
+                    if field in self.NUMERIC_FIELDS:
+                        # Parse numeric comparison
+                        filters[field] = self._parse_numeric_filter(value, operator)
+                    elif field in self.BOOLEAN_FIELDS:
+                        # Parse boolean
+                        filters[field] = value.lower() in ('true', 'yes', '1')
+                    else:
+                        # Exact match
+                        filters[field] = value
+            elif token.type in ('text', 'phrase'):
+                # Add to FTS query
+                value = token.value
+                if ' ' in value or token.type == 'phrase':
+                    # Quoted phrase for FTS5
+                    value = f'"{value}"'
+                if token.negated:
+                    value = f"NOT {value}"
+                fts_parts.append(value)
+            i += 1
+        # Build final FTS query
+        if fts_parts:
+            parsed.fts_query = ' '.join(fts_parts)
+        parsed.filters = filters
+    def _parse_numeric_filter(self, value: str, operator: str) -> Dict[str, Any]:
+        """
+        Parse numeric filter with comparison operator.
+        Examples:
+            rating:5 -> {'=': 5}
+            rating:>=4 -> {'>=': 4}
+            rating:3-5 -> {'>=': 3, '<=': 5}
+        """
+        # Check for range (e.g., 3-5)
+        if '-' in value and operator == '=':
+            parts = value.split('-')
+            if len(parts) == 2:
+                try:
+                    min_val = float(parts[0].strip())
+                    max_val = float(parts[1].strip())
+                    return {'>=': min_val, '<=': max_val}
+                except ValueError:
+                    pass
+        # Single value with operator
+        try:
+            num_val = float(value)
+            return {operator: num_val}
+        except ValueError:
+            return {}
+    def to_sql_conditions(self, parsed: ParsedQuery) -> Tuple[str, Dict[str, Any]]:
+        """
+        Convert parsed query to SQL WHERE conditions.
+        Returns:
+            Tuple of (where_clause, params_dict)
+        This is used by Library.search() to build the final SQL query.
+        """
+        conditions = []
+        params = {}
+        # Handle filters
+        for field, value in parsed.filters.items():
+            if field == 'subjects':
+                # Handle subject filtering (many-to-many)
+                for i, (subject, negated) in enumerate(value):
+                    param_name = f'subject_{i}'
+                    if negated:
+                        conditions.append(
+                            f"NOT EXISTS (SELECT 1 FROM book_subjects bs "
+                            f"JOIN subjects s ON bs.subject_id = s.id "
+                            f"WHERE bs.book_id = books.id AND s.name LIKE :{param_name})"
+                        )
+                    else:
+                        conditions.append(
+                            f"EXISTS (SELECT 1 FROM book_subjects bs "
+                            f"JOIN subjects s ON bs.subject_id = s.id "
+                            f"WHERE bs.book_id = books.id AND s.name LIKE :{param_name})"
+                        )
+                    params[param_name] = f"%{subject}%"
+            elif field == 'authors':
+                # Handle author filtering (many-to-many)
+                for i, (author, negated) in enumerate(value):
+                    param_name = f'author_{i}'
+                    if negated:
+                        conditions.append(
+                            f"NOT EXISTS (SELECT 1 FROM book_authors ba "
+                            f"JOIN authors a ON ba.author_id = a.id "
+                            f"WHERE ba.book_id = books.id AND a.name LIKE :{param_name})"
+                        )
+                    else:
+                        conditions.append(
+                            f"EXISTS (SELECT 1 FROM book_authors ba "
+                            f"JOIN authors a ON ba.author_id = a.id "
+                            f"WHERE ba.book_id = books.id AND a.name LIKE :{param_name})"
+                        )
+                    params[param_name] = f"%{author}%"
+            elif field == 'rating':
+                # Numeric comparison via personal_metadata
+                for op, val in value.items():
+                    param_name = f'rating_{op.replace("<", "lt").replace(">", "gt").replace("=", "eq")}'
+                    conditions.append(
+                        f"EXISTS (SELECT 1 FROM personal_metadata pm "
+                        f"WHERE pm.book_id = books.id AND pm.rating {op} :{param_name})"
+                    )
+                    params[param_name] = val
+            elif field == 'favorite':
+                # Boolean via personal_metadata
+                conditions.append(
+                    f"EXISTS (SELECT 1 FROM personal_metadata pm "
+                    f"WHERE pm.book_id = books.id AND pm.favorite = :favorite)"
+                )
+                params['favorite'] = value
+            elif field == 'status':
+                # Reading status via personal_metadata
+                conditions.append(
+                    f"EXISTS (SELECT 1 FROM personal_metadata pm "
+                    f"WHERE pm.book_id = books.id AND pm.reading_status = :status)"
+                )
+                params['status'] = value
+            elif field == 'format':
+                # File format
+                conditions.append(
+                    f"EXISTS (SELECT 1 FROM files f "
+                    f"WHERE f.book_id = books.id AND LOWER(f.format) = :format)"
+                )
+                params['format'] = value.lower()
+            elif field == 'language':
+                conditions.append("books.language = :language")
+                params['language'] = value
+            elif field == 'series':
+                conditions.append("books.series LIKE :series")
+                params['series'] = f"%{value}%"
+            elif field == 'publisher':
+                conditions.append("books.publisher LIKE :publisher")
+                params['publisher'] = f"%{value}%"
+        where_clause = ' AND '.join(conditions) if conditions else ''
+        return where_clause, params
+# Convenience function for parsing queries
+def parse_search_query(query: str) -> ParsedQuery:
+    """Parse a search query string."""
+    parser = SearchQueryParser()
+    return parser.parse(query)

ebk 0.1.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

Potentially problematic release.

ebk 0.1.0py3-none-any.whl → 0.3.2py3-none-any.whl