ebk 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ebk might be problematic. Click here for more details.

Files changed (61) hide show
  1. ebk/ai/__init__.py +23 -0
  2. ebk/ai/knowledge_graph.py +443 -0
  3. ebk/ai/llm_providers/__init__.py +21 -0
  4. ebk/ai/llm_providers/base.py +230 -0
  5. ebk/ai/llm_providers/ollama.py +362 -0
  6. ebk/ai/metadata_enrichment.py +396 -0
  7. ebk/ai/question_generator.py +328 -0
  8. ebk/ai/reading_companion.py +224 -0
  9. ebk/ai/semantic_search.py +434 -0
  10. ebk/ai/text_extractor.py +394 -0
  11. ebk/cli.py +1097 -9
  12. ebk/db/__init__.py +37 -0
  13. ebk/db/migrations.py +180 -0
  14. ebk/db/models.py +526 -0
  15. ebk/db/session.py +144 -0
  16. ebk/exports/__init__.py +0 -0
  17. ebk/exports/base_exporter.py +218 -0
  18. ebk/exports/html_library.py +1390 -0
  19. ebk/exports/html_utils.py +117 -0
  20. ebk/exports/hugo.py +59 -0
  21. ebk/exports/jinja_export.py +287 -0
  22. ebk/exports/multi_facet_export.py +164 -0
  23. ebk/exports/symlink_dag.py +479 -0
  24. ebk/exports/zip.py +25 -0
  25. ebk/library_db.py +155 -0
  26. ebk/repl/__init__.py +9 -0
  27. ebk/repl/find.py +126 -0
  28. ebk/repl/grep.py +174 -0
  29. ebk/repl/shell.py +1677 -0
  30. ebk/repl/text_utils.py +320 -0
  31. ebk/services/__init__.py +11 -0
  32. ebk/services/import_service.py +442 -0
  33. ebk/services/tag_service.py +282 -0
  34. ebk/services/text_extraction.py +317 -0
  35. ebk/similarity/__init__.py +77 -0
  36. ebk/similarity/base.py +154 -0
  37. ebk/similarity/core.py +445 -0
  38. ebk/similarity/extractors.py +168 -0
  39. ebk/similarity/metrics.py +376 -0
  40. ebk/vfs/__init__.py +101 -0
  41. ebk/vfs/base.py +301 -0
  42. ebk/vfs/library_vfs.py +124 -0
  43. ebk/vfs/nodes/__init__.py +54 -0
  44. ebk/vfs/nodes/authors.py +196 -0
  45. ebk/vfs/nodes/books.py +480 -0
  46. ebk/vfs/nodes/files.py +155 -0
  47. ebk/vfs/nodes/metadata.py +385 -0
  48. ebk/vfs/nodes/root.py +100 -0
  49. ebk/vfs/nodes/similar.py +165 -0
  50. ebk/vfs/nodes/subjects.py +184 -0
  51. ebk/vfs/nodes/tags.py +371 -0
  52. ebk/vfs/resolver.py +228 -0
  53. {ebk-0.3.1.dist-info → ebk-0.3.2.dist-info}/METADATA +1 -1
  54. ebk-0.3.2.dist-info/RECORD +69 -0
  55. ebk-0.3.2.dist-info/entry_points.txt +2 -0
  56. ebk-0.3.2.dist-info/top_level.txt +1 -0
  57. ebk-0.3.1.dist-info/RECORD +0 -19
  58. ebk-0.3.1.dist-info/entry_points.txt +0 -6
  59. ebk-0.3.1.dist-info/top_level.txt +0 -2
  60. {ebk-0.3.1.dist-info → ebk-0.3.2.dist-info}/WHEEL +0 -0
  61. {ebk-0.3.1.dist-info → ebk-0.3.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,328 @@
1
+ """
2
+ Question generator for active recall and comprehension testing.
3
+ """
4
+
5
+ import random
6
+ import re
7
+ from typing import List, Dict, Any, Optional
8
+ from dataclasses import dataclass
9
+
10
+
11
+ @dataclass
12
+ class Question:
13
+ """Represents a question for active recall."""
14
+ question_text: str
15
+ answer: str
16
+ question_type: str # 'factual', 'conceptual', 'application', 'synthesis'
17
+ difficulty: str # 'easy', 'medium', 'hard'
18
+ context: Optional[str] = None
19
+ hints: List[str] = None
20
+
21
+
22
+ class QuestionGenerator:
23
+ """
24
+ Generate questions from text for active recall and comprehension testing.
25
+ """
26
+
27
+ def __init__(self):
28
+ self.question_templates = {
29
+ 'factual': [
30
+ "What is {concept}?",
31
+ "Define {term}.",
32
+ "Who {action}?",
33
+ "When did {event} occur?",
34
+ "List the main characteristics of {topic}."
35
+ ],
36
+ 'conceptual': [
37
+ "Explain the relationship between {concept1} and {concept2}.",
38
+ "Why is {concept} important?",
39
+ "What is the main idea of {topic}?",
40
+ "How does {concept} work?",
41
+ "Compare and contrast {item1} and {item2}."
42
+ ],
43
+ 'application': [
44
+ "How would you apply {concept} to {scenario}?",
45
+ "Give an example of {concept} in practice.",
46
+ "What would happen if {condition}?",
47
+ "How could {concept} be used to solve {problem}?"
48
+ ],
49
+ 'synthesis': [
50
+ "What conclusions can you draw from {evidence}?",
51
+ "How would you combine {concept1} and {concept2}?",
52
+ "What pattern emerges from {data}?",
53
+ "Predict the outcome if {scenario}."
54
+ ]
55
+ }
56
+
57
+ def generate_from_text(self, text: str, num_questions: int = 5) -> List[Question]:
58
+ """Generate questions from a text passage."""
59
+ questions = []
60
+
61
+ # Extract key information
62
+ sentences = self._split_sentences(text)
63
+ key_terms = self._extract_key_terms(text)
64
+ facts = self._extract_facts(sentences)
65
+
66
+ # Generate factual questions
67
+ for fact in facts[:num_questions // 2]:
68
+ question = self._create_factual_question(fact)
69
+ if question:
70
+ questions.append(question)
71
+
72
+ # Generate conceptual questions
73
+ for term in key_terms[:num_questions // 2]:
74
+ question = self._create_conceptual_question(term, text)
75
+ if question:
76
+ questions.append(question)
77
+
78
+ return questions[:num_questions]
79
+
80
+ def generate_from_highlights(self, highlights: List[str]) -> List[Question]:
81
+ """Generate questions from user highlights."""
82
+ questions = []
83
+
84
+ for highlight in highlights:
85
+ # Determine what type of content this is
86
+ if self._is_definition(highlight):
87
+ question = self._create_definition_question(highlight)
88
+ elif self._is_list(highlight):
89
+ question = self._create_list_question(highlight)
90
+ else:
91
+ question = self._create_explanation_question(highlight)
92
+
93
+ if question:
94
+ questions.append(question)
95
+
96
+ return questions
97
+
98
+ def _create_factual_question(self, fact: str) -> Optional[Question]:
99
+ """Create a factual question from a fact."""
100
+ # Simple pattern matching for fact extraction
101
+ patterns = [
102
+ (r"(\w+) is (\w+)", "What is {0}?", "{1}"),
103
+ (r"(\w+) was (\w+)", "What was {0}?", "{1}"),
104
+ (r"In (\d+), (\w+)", "When did {1} occur?", "{0}"),
105
+ ]
106
+
107
+ for pattern, question_template, answer_template in patterns:
108
+ match = re.search(pattern, fact, re.IGNORECASE)
109
+ if match:
110
+ groups = match.groups()
111
+ return Question(
112
+ question_text=question_template.format(*groups),
113
+ answer=answer_template.format(*groups),
114
+ question_type='factual',
115
+ difficulty='easy',
116
+ context=fact
117
+ )
118
+
119
+ return None
120
+
121
+ def _create_conceptual_question(self, term: str, context: str) -> Question:
122
+ """Create a conceptual question about a term."""
123
+ question_text = f"Explain the concept of {term} based on the text."
124
+
125
+ # Extract sentences containing the term for the answer
126
+ sentences = [s for s in context.split('.') if term.lower() in s.lower()]
127
+ answer = ' '.join(sentences[:2]) if sentences else f"The text discusses {term}."
128
+
129
+ return Question(
130
+ question_text=question_text,
131
+ answer=answer,
132
+ question_type='conceptual',
133
+ difficulty='medium',
134
+ context=context[:200]
135
+ )
136
+
137
+ def _create_definition_question(self, highlight: str) -> Optional[Question]:
138
+ """Create a question from a definition."""
139
+ # Pattern: "X is defined as Y" or "X: Y"
140
+ patterns = [
141
+ r"(\w+) is defined as (.+)",
142
+ r"(\w+) means (.+)",
143
+ r"(\w+): (.+)"
144
+ ]
145
+
146
+ for pattern in patterns:
147
+ match = re.search(pattern, highlight, re.IGNORECASE)
148
+ if match:
149
+ term, definition = match.groups()
150
+ return Question(
151
+ question_text=f"Define {term}.",
152
+ answer=definition,
153
+ question_type='factual',
154
+ difficulty='easy',
155
+ context=highlight
156
+ )
157
+
158
+ return None
159
+
160
+ def _create_list_question(self, highlight: str) -> Question:
161
+ """Create a question from a list."""
162
+ # Detect if highlight contains a list
163
+ list_items = re.findall(r'[•\-\*]\s*(.+)', highlight)
164
+
165
+ if list_items:
166
+ return Question(
167
+ question_text="List the main points mentioned.",
168
+ answer='\n'.join(list_items),
169
+ question_type='factual',
170
+ difficulty='easy',
171
+ context=highlight
172
+ )
173
+
174
+ return None
175
+
176
+ def _create_explanation_question(self, highlight: str) -> Question:
177
+ """Create an explanation question from a highlight."""
178
+ # Extract the main subject
179
+ first_sentence = highlight.split('.')[0]
180
+
181
+ return Question(
182
+ question_text=f"Explain the following concept: {first_sentence[:50]}...",
183
+ answer=highlight,
184
+ question_type='conceptual',
185
+ difficulty='medium',
186
+ context=highlight
187
+ )
188
+
189
+ def _is_definition(self, text: str) -> bool:
190
+ """Check if text is a definition."""
191
+ definition_patterns = [
192
+ r'\bis defined as\b',
193
+ r'\bmeans\b',
194
+ r'\brefers to\b',
195
+ r':\s*[A-Z]' # Colon followed by capital letter
196
+ ]
197
+ return any(re.search(pattern, text, re.IGNORECASE) for pattern in definition_patterns)
198
+
199
+ def _is_list(self, text: str) -> bool:
200
+ """Check if text contains a list."""
201
+ return bool(re.search(r'[•\-\*]\s*\w+', text))
202
+
203
+ def _split_sentences(self, text: str) -> List[str]:
204
+ """Split text into sentences."""
205
+ sentences = re.split(r'[.!?]\s+', text)
206
+ return [s.strip() for s in sentences if s.strip()]
207
+
208
+ def _extract_key_terms(self, text: str) -> List[str]:
209
+ """Extract key terms from text."""
210
+ # Simple noun phrase extraction
211
+ # In production, use NLP libraries like spaCy
212
+ words = re.findall(r'\b[A-Z][a-z]+\b', text)
213
+ return list(set(words))[:10]
214
+
215
+ def _extract_facts(self, sentences: List[str]) -> List[str]:
216
+ """Extract factual statements from sentences."""
217
+ facts = []
218
+ fact_patterns = [
219
+ r'\bis\b',
220
+ r'\bwas\b',
221
+ r'\bare\b',
222
+ r'\bwere\b',
223
+ r'In \d+',
224
+ r'\bdefined as\b'
225
+ ]
226
+
227
+ for sentence in sentences:
228
+ if any(re.search(pattern, sentence) for pattern in fact_patterns):
229
+ facts.append(sentence)
230
+
231
+ return facts
232
+
233
+
234
+ class QuizBuilder:
235
+ """
236
+ Build and manage quizzes from questions.
237
+ """
238
+
239
+ def __init__(self):
240
+ self.question_generator = QuestionGenerator()
241
+
242
+ def create_quiz(self, questions: List[Question],
243
+ quiz_type: str = 'mixed',
244
+ num_questions: int = 10) -> Dict[str, Any]:
245
+ """Create a quiz from questions."""
246
+ if quiz_type == 'factual':
247
+ filtered = [q for q in questions if q.question_type == 'factual']
248
+ elif quiz_type == 'conceptual':
249
+ filtered = [q for q in questions if q.question_type in ['conceptual', 'synthesis']]
250
+ else:
251
+ filtered = questions
252
+
253
+ # Randomly select questions
254
+ selected = random.sample(filtered, min(num_questions, len(filtered)))
255
+
256
+ return {
257
+ 'quiz_id': self._generate_quiz_id(),
258
+ 'questions': [
259
+ {
260
+ 'id': i,
261
+ 'question': q.question_text,
262
+ 'type': q.question_type,
263
+ 'difficulty': q.difficulty,
264
+ 'hints': q.hints or []
265
+ }
266
+ for i, q in enumerate(selected)
267
+ ],
268
+ 'answers': {
269
+ i: q.answer for i, q in enumerate(selected)
270
+ },
271
+ 'total_questions': len(selected)
272
+ }
273
+
274
+ def grade_quiz(self, quiz: Dict[str, Any],
275
+ responses: Dict[int, str]) -> Dict[str, Any]:
276
+ """Grade a quiz based on responses."""
277
+ correct = 0
278
+ results = []
279
+
280
+ for q_id, response in responses.items():
281
+ correct_answer = quiz['answers'].get(q_id, '')
282
+ is_correct = self._check_answer(response, correct_answer)
283
+
284
+ if is_correct:
285
+ correct += 1
286
+
287
+ results.append({
288
+ 'question_id': q_id,
289
+ 'response': response,
290
+ 'correct_answer': correct_answer,
291
+ 'is_correct': is_correct
292
+ })
293
+
294
+ score = (correct / len(responses)) * 100 if responses else 0
295
+
296
+ return {
297
+ 'score': score,
298
+ 'correct': correct,
299
+ 'total': len(responses),
300
+ 'results': results
301
+ }
302
+
303
+ def _check_answer(self, response: str, correct: str) -> bool:
304
+ """Check if response matches correct answer (fuzzy matching)."""
305
+ # Simple check - can be improved with NLP
306
+ response_lower = response.lower().strip()
307
+ correct_lower = correct.lower().strip()
308
+
309
+ # Exact match
310
+ if response_lower == correct_lower:
311
+ return True
312
+
313
+ # Check if key terms are present
314
+ key_terms = re.findall(r'\b\w+\b', correct_lower)
315
+ important_terms = [t for t in key_terms if len(t) > 4]
316
+
317
+ if important_terms:
318
+ matches = sum(1 for term in important_terms if term in response_lower)
319
+ return matches >= len(important_terms) * 0.6
320
+
321
+ return False
322
+
323
+ def _generate_quiz_id(self) -> str:
324
+ """Generate unique quiz ID."""
325
+ import hashlib
326
+ from datetime import datetime
327
+ timestamp = datetime.now().isoformat()
328
+ return hashlib.md5(timestamp.encode()).hexdigest()[:8]
@@ -0,0 +1,224 @@
1
+ """
2
+ Reading Companion - Track reading sessions and provide intelligent assistance.
3
+ """
4
+
5
+ import json
6
+ from pathlib import Path
7
+ from datetime import datetime, timedelta
8
+ from typing import Dict, List, Any, Optional
9
+ from dataclasses import dataclass, field
10
+ import hashlib
11
+
12
+
13
+ @dataclass
14
+ class ReadingSession:
15
+ """Represents a reading session with tracking and insights."""
16
+ session_id: str
17
+ book_id: str
18
+ chapter: Optional[str] = None
19
+ start_time: datetime = field(default_factory=datetime.now)
20
+ end_time: Optional[datetime] = None
21
+ pages_read: int = 0
22
+ highlights: List[str] = field(default_factory=list)
23
+ notes: List[str] = field(default_factory=list)
24
+ comprehension_score: Optional[float] = None
25
+ quiz_results: List[Dict] = field(default_factory=list)
26
+
27
+ @property
28
+ def duration(self) -> timedelta:
29
+ """Calculate session duration."""
30
+ if self.end_time:
31
+ return self.end_time - self.start_time
32
+ return datetime.now() - self.start_time
33
+
34
+ @property
35
+ def reading_speed(self) -> float:
36
+ """Calculate reading speed in pages per hour."""
37
+ duration_hours = self.duration.total_seconds() / 3600
38
+ if duration_hours > 0:
39
+ return self.pages_read / duration_hours
40
+ return 0
41
+
42
+
43
+ class ReadingCompanion:
44
+ """
45
+ AI-powered reading companion that tracks sessions and provides assistance.
46
+ """
47
+
48
+ def __init__(self, library_path: Path):
49
+ self.library_path = Path(library_path)
50
+ self.sessions_path = self.library_path / '.reading_sessions'
51
+ self.sessions_path.mkdir(exist_ok=True)
52
+
53
+ self.active_sessions: Dict[str, ReadingSession] = {}
54
+ self.completed_sessions: List[ReadingSession] = []
55
+
56
+ self.load_sessions()
57
+
58
+ def start_session(self, book_id: str, chapter: str = None) -> ReadingSession:
59
+ """Start a new reading session."""
60
+ session_id = self._generate_session_id(book_id)
61
+
62
+ session = ReadingSession(
63
+ session_id=session_id,
64
+ book_id=book_id,
65
+ chapter=chapter
66
+ )
67
+
68
+ self.active_sessions[session_id] = session
69
+ return session
70
+
71
+ def end_session(self, session_id: str) -> ReadingSession:
72
+ """End a reading session and save it."""
73
+ if session_id not in self.active_sessions:
74
+ raise ValueError(f"No active session with ID {session_id}")
75
+
76
+ session = self.active_sessions[session_id]
77
+ session.end_time = datetime.now()
78
+
79
+ # Move to completed
80
+ self.completed_sessions.append(session)
81
+ del self.active_sessions[session_id]
82
+
83
+ self.save_sessions()
84
+ return session
85
+
86
+ def add_highlight(self, session_id: str, text: str):
87
+ """Add a highlight to the current session."""
88
+ if session_id in self.active_sessions:
89
+ self.active_sessions[session_id].highlights.append(text)
90
+
91
+ def add_note(self, session_id: str, note: str):
92
+ """Add a note to the current session."""
93
+ if session_id in self.active_sessions:
94
+ self.active_sessions[session_id].notes.append(note)
95
+
96
+ def get_reading_stats(self, book_id: str = None) -> Dict[str, Any]:
97
+ """Get reading statistics for a book or all books."""
98
+ sessions = self.completed_sessions
99
+
100
+ if book_id:
101
+ sessions = [s for s in sessions if s.book_id == book_id]
102
+
103
+ if not sessions:
104
+ return {}
105
+
106
+ total_time = sum((s.duration for s in sessions), timedelta())
107
+ total_pages = sum(s.pages_read for s in sessions)
108
+ avg_speed = total_pages / (total_time.total_seconds() / 3600) if total_time.total_seconds() > 0 else 0
109
+
110
+ return {
111
+ 'total_sessions': len(sessions),
112
+ 'total_time': str(total_time),
113
+ 'total_pages': total_pages,
114
+ 'average_speed': avg_speed,
115
+ 'total_highlights': sum(len(s.highlights) for s in sessions),
116
+ 'total_notes': sum(len(s.notes) for s in sessions)
117
+ }
118
+
119
+ def get_reading_streak(self) -> int:
120
+ """Calculate current reading streak in days."""
121
+ if not self.completed_sessions:
122
+ return 0
123
+
124
+ # Sort sessions by date
125
+ sessions_by_date = {}
126
+ for session in self.completed_sessions:
127
+ date = session.start_time.date()
128
+ sessions_by_date[date] = True
129
+
130
+ # Check streak
131
+ streak = 0
132
+ current_date = datetime.now().date()
133
+
134
+ while current_date in sessions_by_date or current_date == datetime.now().date():
135
+ if current_date in sessions_by_date:
136
+ streak += 1
137
+ current_date -= timedelta(days=1)
138
+
139
+ if current_date not in sessions_by_date:
140
+ break
141
+
142
+ return streak
143
+
144
+ def save_sessions(self):
145
+ """Save sessions to disk."""
146
+ sessions_file = self.sessions_path / 'sessions.json'
147
+
148
+ data = {
149
+ 'active': {
150
+ sid: {
151
+ 'session_id': s.session_id,
152
+ 'book_id': s.book_id,
153
+ 'chapter': s.chapter,
154
+ 'start_time': s.start_time.isoformat(),
155
+ 'pages_read': s.pages_read,
156
+ 'highlights': s.highlights,
157
+ 'notes': s.notes
158
+ }
159
+ for sid, s in self.active_sessions.items()
160
+ },
161
+ 'completed': [
162
+ {
163
+ 'session_id': s.session_id,
164
+ 'book_id': s.book_id,
165
+ 'chapter': s.chapter,
166
+ 'start_time': s.start_time.isoformat(),
167
+ 'end_time': s.end_time.isoformat() if s.end_time else None,
168
+ 'pages_read': s.pages_read,
169
+ 'highlights': s.highlights,
170
+ 'notes': s.notes,
171
+ 'comprehension_score': s.comprehension_score,
172
+ 'quiz_results': s.quiz_results
173
+ }
174
+ for s in self.completed_sessions
175
+ ]
176
+ }
177
+
178
+ with open(sessions_file, 'w') as f:
179
+ json.dump(data, f, indent=2)
180
+
181
+ def load_sessions(self):
182
+ """Load sessions from disk."""
183
+ sessions_file = self.sessions_path / 'sessions.json'
184
+
185
+ if not sessions_file.exists():
186
+ return
187
+
188
+ with open(sessions_file, 'r') as f:
189
+ data = json.load(f)
190
+
191
+ # Load active sessions
192
+ for sid, sdata in data.get('active', {}).items():
193
+ session = ReadingSession(
194
+ session_id=sdata['session_id'],
195
+ book_id=sdata['book_id'],
196
+ chapter=sdata.get('chapter'),
197
+ start_time=datetime.fromisoformat(sdata['start_time']),
198
+ pages_read=sdata.get('pages_read', 0),
199
+ highlights=sdata.get('highlights', []),
200
+ notes=sdata.get('notes', [])
201
+ )
202
+ self.active_sessions[sid] = session
203
+
204
+ # Load completed sessions
205
+ for sdata in data.get('completed', []):
206
+ session = ReadingSession(
207
+ session_id=sdata['session_id'],
208
+ book_id=sdata['book_id'],
209
+ chapter=sdata.get('chapter'),
210
+ start_time=datetime.fromisoformat(sdata['start_time']),
211
+ end_time=datetime.fromisoformat(sdata['end_time']) if sdata.get('end_time') else None,
212
+ pages_read=sdata.get('pages_read', 0),
213
+ highlights=sdata.get('highlights', []),
214
+ notes=sdata.get('notes', []),
215
+ comprehension_score=sdata.get('comprehension_score'),
216
+ quiz_results=sdata.get('quiz_results', [])
217
+ )
218
+ self.completed_sessions.append(session)
219
+
220
+ def _generate_session_id(self, book_id: str) -> str:
221
+ """Generate unique session ID."""
222
+ timestamp = datetime.now().isoformat()
223
+ content = f"{book_id}:{timestamp}"
224
+ return hashlib.md5(content.encode()).hexdigest()[:12]