PyPI - ebk - Versions diffs - 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl - Mend

ebk 0.3.1py3-none-any.whl → 0.3.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ebk might be problematic. Click here for more details.

Files changed (61) hide show

ebk/ai/__init__.py +23 -0
ebk/ai/knowledge_graph.py +443 -0
ebk/ai/llm_providers/__init__.py +21 -0
ebk/ai/llm_providers/base.py +230 -0
ebk/ai/llm_providers/ollama.py +362 -0
ebk/ai/metadata_enrichment.py +396 -0
ebk/ai/question_generator.py +328 -0
ebk/ai/reading_companion.py +224 -0
ebk/ai/semantic_search.py +434 -0
ebk/ai/text_extractor.py +394 -0
ebk/cli.py +1097 -9
ebk/db/__init__.py +37 -0
ebk/db/migrations.py +180 -0
ebk/db/models.py +526 -0
ebk/db/session.py +144 -0
ebk/exports/__init__.py +0 -0
ebk/exports/base_exporter.py +218 -0
ebk/exports/html_library.py +1390 -0
ebk/exports/html_utils.py +117 -0
ebk/exports/hugo.py +59 -0
ebk/exports/jinja_export.py +287 -0
ebk/exports/multi_facet_export.py +164 -0
ebk/exports/symlink_dag.py +479 -0
ebk/exports/zip.py +25 -0
ebk/library_db.py +155 -0
ebk/repl/__init__.py +9 -0
ebk/repl/find.py +126 -0
ebk/repl/grep.py +174 -0
ebk/repl/shell.py +1677 -0
ebk/repl/text_utils.py +320 -0
ebk/services/__init__.py +11 -0
ebk/services/import_service.py +442 -0
ebk/services/tag_service.py +282 -0
ebk/services/text_extraction.py +317 -0
ebk/similarity/__init__.py +77 -0
ebk/similarity/base.py +154 -0
ebk/similarity/core.py +445 -0
ebk/similarity/extractors.py +168 -0
ebk/similarity/metrics.py +376 -0
ebk/vfs/__init__.py +101 -0
ebk/vfs/base.py +301 -0
ebk/vfs/library_vfs.py +124 -0
ebk/vfs/nodes/__init__.py +54 -0
ebk/vfs/nodes/authors.py +196 -0
ebk/vfs/nodes/books.py +480 -0
ebk/vfs/nodes/files.py +155 -0
ebk/vfs/nodes/metadata.py +385 -0
ebk/vfs/nodes/root.py +100 -0
ebk/vfs/nodes/similar.py +165 -0
ebk/vfs/nodes/subjects.py +184 -0
ebk/vfs/nodes/tags.py +371 -0
ebk/vfs/resolver.py +228 -0
{ebk-0.3.1.dist-info → ebk-0.3.2.dist-info}/METADATA +1 -1
ebk-0.3.2.dist-info/RECORD +69 -0
ebk-0.3.2.dist-info/entry_points.txt +2 -0
ebk-0.3.2.dist-info/top_level.txt +1 -0
ebk-0.3.1.dist-info/RECORD +0 -19
ebk-0.3.1.dist-info/entry_points.txt +0 -6
ebk-0.3.1.dist-info/top_level.txt +0 -2
{ebk-0.3.1.dist-info → ebk-0.3.2.dist-info}/WHEEL +0 -0
{ebk-0.3.1.dist-info → ebk-0.3.2.dist-info}/licenses/LICENSE +0 -0

ebk/ai/question_generator.py ADDED Viewed

@@ -0,0 +1,328 @@
+"""
+Question generator for active recall and comprehension testing.
+"""
+import random
+import re
+from typing import List, Dict, Any, Optional
+from dataclasses import dataclass
+@dataclass
+class Question:
+    """Represents a question for active recall."""
+    question_text: str
+    answer: str
+    question_type: str  # 'factual', 'conceptual', 'application', 'synthesis'
+    difficulty: str  # 'easy', 'medium', 'hard'
+    context: Optional[str] = None
+    hints: List[str] = None
+class QuestionGenerator:
+    """
+    Generate questions from text for active recall and comprehension testing.
+    """
+    def __init__(self):
+        self.question_templates = {
+            'factual': [
+                "What is {concept}?",
+                "Define {term}.",
+                "Who {action}?",
+                "When did {event} occur?",
+                "List the main characteristics of {topic}."
+            ],
+            'conceptual': [
+                "Explain the relationship between {concept1} and {concept2}.",
+                "Why is {concept} important?",
+                "What is the main idea of {topic}?",
+                "How does {concept} work?",
+                "Compare and contrast {item1} and {item2}."
+            ],
+            'application': [
+                "How would you apply {concept} to {scenario}?",
+                "Give an example of {concept} in practice.",
+                "What would happen if {condition}?",
+                "How could {concept} be used to solve {problem}?"
+            ],
+            'synthesis': [
+                "What conclusions can you draw from {evidence}?",
+                "How would you combine {concept1} and {concept2}?",
+                "What pattern emerges from {data}?",
+                "Predict the outcome if {scenario}."
+            ]
+        }
+    def generate_from_text(self, text: str, num_questions: int = 5) -> List[Question]:
+        """Generate questions from a text passage."""
+        questions = []
+        # Extract key information
+        sentences = self._split_sentences(text)
+        key_terms = self._extract_key_terms(text)
+        facts = self._extract_facts(sentences)
+        # Generate factual questions
+        for fact in facts[:num_questions // 2]:
+            question = self._create_factual_question(fact)
+            if question:
+                questions.append(question)
+        # Generate conceptual questions
+        for term in key_terms[:num_questions // 2]:
+            question = self._create_conceptual_question(term, text)
+            if question:
+                questions.append(question)
+        return questions[:num_questions]
+    def generate_from_highlights(self, highlights: List[str]) -> List[Question]:
+        """Generate questions from user highlights."""
+        questions = []
+        for highlight in highlights:
+            # Determine what type of content this is
+            if self._is_definition(highlight):
+                question = self._create_definition_question(highlight)
+            elif self._is_list(highlight):
+                question = self._create_list_question(highlight)
+            else:
+                question = self._create_explanation_question(highlight)
+            if question:
+                questions.append(question)
+        return questions
+    def _create_factual_question(self, fact: str) -> Optional[Question]:
+        """Create a factual question from a fact."""
+        # Simple pattern matching for fact extraction
+        patterns = [
+            (r"(\w+) is (\w+)", "What is {0}?", "{1}"),
+            (r"(\w+) was (\w+)", "What was {0}?", "{1}"),
+            (r"In (\d+), (\w+)", "When did {1} occur?", "{0}"),
+        ]
+        for pattern, question_template, answer_template in patterns:
+            match = re.search(pattern, fact, re.IGNORECASE)
+            if match:
+                groups = match.groups()
+                return Question(
+                    question_text=question_template.format(*groups),
+                    answer=answer_template.format(*groups),
+                    question_type='factual',
+                    difficulty='easy',
+                    context=fact
+                )
+        return None
+    def _create_conceptual_question(self, term: str, context: str) -> Question:
+        """Create a conceptual question about a term."""
+        question_text = f"Explain the concept of {term} based on the text."
+        # Extract sentences containing the term for the answer
+        sentences = [s for s in context.split('.') if term.lower() in s.lower()]
+        answer = ' '.join(sentences[:2]) if sentences else f"The text discusses {term}."
+        return Question(
+            question_text=question_text,
+            answer=answer,
+            question_type='conceptual',
+            difficulty='medium',
+            context=context[:200]
+        )
+    def _create_definition_question(self, highlight: str) -> Optional[Question]:
+        """Create a question from a definition."""
+        # Pattern: "X is defined as Y" or "X: Y"
+        patterns = [
+            r"(\w+) is defined as (.+)",
+            r"(\w+) means (.+)",
+            r"(\w+): (.+)"
+        ]
+        for pattern in patterns:
+            match = re.search(pattern, highlight, re.IGNORECASE)
+            if match:
+                term, definition = match.groups()
+                return Question(
+                    question_text=f"Define {term}.",
+                    answer=definition,
+                    question_type='factual',
+                    difficulty='easy',
+                    context=highlight
+                )
+        return None
+    def _create_list_question(self, highlight: str) -> Question:
+        """Create a question from a list."""
+        # Detect if highlight contains a list
+        list_items = re.findall(r'[•\-\*]\s*(.+)', highlight)
+        if list_items:
+            return Question(
+                question_text="List the main points mentioned.",
+                answer='\n'.join(list_items),
+                question_type='factual',
+                difficulty='easy',
+                context=highlight
+            )
+        return None
+    def _create_explanation_question(self, highlight: str) -> Question:
+        """Create an explanation question from a highlight."""
+        # Extract the main subject
+        first_sentence = highlight.split('.')[0]
+        return Question(
+            question_text=f"Explain the following concept: {first_sentence[:50]}...",
+            answer=highlight,
+            question_type='conceptual',
+            difficulty='medium',
+            context=highlight
+        )
+    def _is_definition(self, text: str) -> bool:
+        """Check if text is a definition."""
+        definition_patterns = [
+            r'\bis defined as\b',
+            r'\bmeans\b',
+            r'\brefers to\b',
+            r':\s*[A-Z]'  # Colon followed by capital letter
+        ]
+        return any(re.search(pattern, text, re.IGNORECASE) for pattern in definition_patterns)
+    def _is_list(self, text: str) -> bool:
+        """Check if text contains a list."""
+        return bool(re.search(r'[•\-\*]\s*\w+', text))
+    def _split_sentences(self, text: str) -> List[str]:
+        """Split text into sentences."""
+        sentences = re.split(r'[.!?]\s+', text)
+        return [s.strip() for s in sentences if s.strip()]
+    def _extract_key_terms(self, text: str) -> List[str]:
+        """Extract key terms from text."""
+        # Simple noun phrase extraction
+        # In production, use NLP libraries like spaCy
+        words = re.findall(r'\b[A-Z][a-z]+\b', text)
+        return list(set(words))[:10]
+    def _extract_facts(self, sentences: List[str]) -> List[str]:
+        """Extract factual statements from sentences."""
+        facts = []
+        fact_patterns = [
+            r'\bis\b',
+            r'\bwas\b',
+            r'\bare\b',
+            r'\bwere\b',
+            r'In \d+',
+            r'\bdefined as\b'
+        ]
+        for sentence in sentences:
+            if any(re.search(pattern, sentence) for pattern in fact_patterns):
+                facts.append(sentence)
+        return facts
+class QuizBuilder:
+    """
+    Build and manage quizzes from questions.
+    """
+    def __init__(self):
+        self.question_generator = QuestionGenerator()
+    def create_quiz(self, questions: List[Question],
+                   quiz_type: str = 'mixed',
+                   num_questions: int = 10) -> Dict[str, Any]:
+        """Create a quiz from questions."""
+        if quiz_type == 'factual':
+            filtered = [q for q in questions if q.question_type == 'factual']
+        elif quiz_type == 'conceptual':
+            filtered = [q for q in questions if q.question_type in ['conceptual', 'synthesis']]
+        else:
+            filtered = questions
+        # Randomly select questions
+        selected = random.sample(filtered, min(num_questions, len(filtered)))
+        return {
+            'quiz_id': self._generate_quiz_id(),
+            'questions': [
+                {
+                    'id': i,
+                    'question': q.question_text,
+                    'type': q.question_type,
+                    'difficulty': q.difficulty,
+                    'hints': q.hints or []
+                }
+                for i, q in enumerate(selected)
+            ],
+            'answers': {
+                i: q.answer for i, q in enumerate(selected)
+            },
+            'total_questions': len(selected)
+        }
+    def grade_quiz(self, quiz: Dict[str, Any],
+                  responses: Dict[int, str]) -> Dict[str, Any]:
+        """Grade a quiz based on responses."""
+        correct = 0
+        results = []
+        for q_id, response in responses.items():
+            correct_answer = quiz['answers'].get(q_id, '')
+            is_correct = self._check_answer(response, correct_answer)
+            if is_correct:
+                correct += 1
+            results.append({
+                'question_id': q_id,
+                'response': response,
+                'correct_answer': correct_answer,
+                'is_correct': is_correct
+            })
+        score = (correct / len(responses)) * 100 if responses else 0
+        return {
+            'score': score,
+            'correct': correct,
+            'total': len(responses),
+            'results': results
+        }
+    def _check_answer(self, response: str, correct: str) -> bool:
+        """Check if response matches correct answer (fuzzy matching)."""
+        # Simple check - can be improved with NLP
+        response_lower = response.lower().strip()
+        correct_lower = correct.lower().strip()
+        # Exact match
+        if response_lower == correct_lower:
+            return True
+        # Check if key terms are present
+        key_terms = re.findall(r'\b\w+\b', correct_lower)
+        important_terms = [t for t in key_terms if len(t) > 4]
+        if important_terms:
+            matches = sum(1 for term in important_terms if term in response_lower)
+            return matches >= len(important_terms) * 0.6
+        return False
+    def _generate_quiz_id(self) -> str:
+        """Generate unique quiz ID."""
+        import hashlib
+        from datetime import datetime
+        timestamp = datetime.now().isoformat()
+        return hashlib.md5(timestamp.encode()).hexdigest()[:8]

ebk/ai/reading_companion.py ADDED Viewed

@@ -0,0 +1,224 @@
+"""
+Reading Companion - Track reading sessions and provide intelligent assistance.
+"""
+import json
+from pathlib import Path
+from datetime import datetime, timedelta
+from typing import Dict, List, Any, Optional
+from dataclasses import dataclass, field
+import hashlib
+@dataclass
+class ReadingSession:
+    """Represents a reading session with tracking and insights."""
+    session_id: str
+    book_id: str
+    chapter: Optional[str] = None
+    start_time: datetime = field(default_factory=datetime.now)
+    end_time: Optional[datetime] = None
+    pages_read: int = 0
+    highlights: List[str] = field(default_factory=list)
+    notes: List[str] = field(default_factory=list)
+    comprehension_score: Optional[float] = None
+    quiz_results: List[Dict] = field(default_factory=list)
+    @property
+    def duration(self) -> timedelta:
+        """Calculate session duration."""
+        if self.end_time:
+            return self.end_time - self.start_time
+        return datetime.now() - self.start_time
+    @property
+    def reading_speed(self) -> float:
+        """Calculate reading speed in pages per hour."""
+        duration_hours = self.duration.total_seconds() / 3600
+        if duration_hours > 0:
+            return self.pages_read / duration_hours
+        return 0
+class ReadingCompanion:
+    """
+    AI-powered reading companion that tracks sessions and provides assistance.
+    """
+    def __init__(self, library_path: Path):
+        self.library_path = Path(library_path)
+        self.sessions_path = self.library_path / '.reading_sessions'
+        self.sessions_path.mkdir(exist_ok=True)
+        self.active_sessions: Dict[str, ReadingSession] = {}
+        self.completed_sessions: List[ReadingSession] = []
+        self.load_sessions()
+    def start_session(self, book_id: str, chapter: str = None) -> ReadingSession:
+        """Start a new reading session."""
+        session_id = self._generate_session_id(book_id)
+        session = ReadingSession(
+            session_id=session_id,
+            book_id=book_id,
+            chapter=chapter
+        )
+        self.active_sessions[session_id] = session
+        return session
+    def end_session(self, session_id: str) -> ReadingSession:
+        """End a reading session and save it."""
+        if session_id not in self.active_sessions:
+            raise ValueError(f"No active session with ID {session_id}")
+        session = self.active_sessions[session_id]
+        session.end_time = datetime.now()
+        # Move to completed
+        self.completed_sessions.append(session)
+        del self.active_sessions[session_id]
+        self.save_sessions()
+        return session
+    def add_highlight(self, session_id: str, text: str):
+        """Add a highlight to the current session."""
+        if session_id in self.active_sessions:
+            self.active_sessions[session_id].highlights.append(text)
+    def add_note(self, session_id: str, note: str):
+        """Add a note to the current session."""
+        if session_id in self.active_sessions:
+            self.active_sessions[session_id].notes.append(note)
+    def get_reading_stats(self, book_id: str = None) -> Dict[str, Any]:
+        """Get reading statistics for a book or all books."""
+        sessions = self.completed_sessions
+        if book_id:
+            sessions = [s for s in sessions if s.book_id == book_id]
+        if not sessions:
+            return {}
+        total_time = sum((s.duration for s in sessions), timedelta())
+        total_pages = sum(s.pages_read for s in sessions)
+        avg_speed = total_pages / (total_time.total_seconds() / 3600) if total_time.total_seconds() > 0 else 0
+        return {
+            'total_sessions': len(sessions),
+            'total_time': str(total_time),
+            'total_pages': total_pages,
+            'average_speed': avg_speed,
+            'total_highlights': sum(len(s.highlights) for s in sessions),
+            'total_notes': sum(len(s.notes) for s in sessions)
+        }
+    def get_reading_streak(self) -> int:
+        """Calculate current reading streak in days."""
+        if not self.completed_sessions:
+            return 0
+        # Sort sessions by date
+        sessions_by_date = {}
+        for session in self.completed_sessions:
+            date = session.start_time.date()
+            sessions_by_date[date] = True
+        # Check streak
+        streak = 0
+        current_date = datetime.now().date()
+        while current_date in sessions_by_date or current_date == datetime.now().date():
+            if current_date in sessions_by_date:
+                streak += 1
+            current_date -= timedelta(days=1)
+            if current_date not in sessions_by_date:
+                break
+        return streak
+    def save_sessions(self):
+        """Save sessions to disk."""
+        sessions_file = self.sessions_path / 'sessions.json'
+        data = {
+            'active': {
+                sid: {
+                    'session_id': s.session_id,
+                    'book_id': s.book_id,
+                    'chapter': s.chapter,
+                    'start_time': s.start_time.isoformat(),
+                    'pages_read': s.pages_read,
+                    'highlights': s.highlights,
+                    'notes': s.notes
+                }
+                for sid, s in self.active_sessions.items()
+            },
+            'completed': [
+                {
+                    'session_id': s.session_id,
+                    'book_id': s.book_id,
+                    'chapter': s.chapter,
+                    'start_time': s.start_time.isoformat(),
+                    'end_time': s.end_time.isoformat() if s.end_time else None,
+                    'pages_read': s.pages_read,
+                    'highlights': s.highlights,
+                    'notes': s.notes,
+                    'comprehension_score': s.comprehension_score,
+                    'quiz_results': s.quiz_results
+                }
+                for s in self.completed_sessions
+            ]
+        }
+        with open(sessions_file, 'w') as f:
+            json.dump(data, f, indent=2)
+    def load_sessions(self):
+        """Load sessions from disk."""
+        sessions_file = self.sessions_path / 'sessions.json'
+        if not sessions_file.exists():
+            return
+        with open(sessions_file, 'r') as f:
+            data = json.load(f)
+        # Load active sessions
+        for sid, sdata in data.get('active', {}).items():
+            session = ReadingSession(
+                session_id=sdata['session_id'],
+                book_id=sdata['book_id'],
+                chapter=sdata.get('chapter'),
+                start_time=datetime.fromisoformat(sdata['start_time']),
+                pages_read=sdata.get('pages_read', 0),
+                highlights=sdata.get('highlights', []),
+                notes=sdata.get('notes', [])
+            )
+            self.active_sessions[sid] = session
+        # Load completed sessions
+        for sdata in data.get('completed', []):
+            session = ReadingSession(
+                session_id=sdata['session_id'],
+                book_id=sdata['book_id'],
+                chapter=sdata.get('chapter'),
+                start_time=datetime.fromisoformat(sdata['start_time']),
+                end_time=datetime.fromisoformat(sdata['end_time']) if sdata.get('end_time') else None,
+                pages_read=sdata.get('pages_read', 0),
+                highlights=sdata.get('highlights', []),
+                notes=sdata.get('notes', []),
+                comprehension_score=sdata.get('comprehension_score'),
+                quiz_results=sdata.get('quiz_results', [])
+            )
+            self.completed_sessions.append(session)
+    def _generate_session_id(self, book_id: str) -> str:
+        """Generate unique session ID."""
+        timestamp = datetime.now().isoformat()
+        content = f"{book_id}:{timestamp}"
+        return hashlib.md5(content.encode()).hexdigest()[:12]

ebk 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl

Potentially problematic release.

ebk 0.3.1py3-none-any.whl → 0.3.2py3-none-any.whl