ebk 0.1.0__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ebk might be problematic. Click here for more details.

Files changed (84) hide show
  1. ebk/__init__.py +35 -0
  2. ebk/ai/__init__.py +23 -0
  3. ebk/ai/knowledge_graph.py +443 -0
  4. ebk/ai/llm_providers/__init__.py +21 -0
  5. ebk/ai/llm_providers/base.py +230 -0
  6. ebk/ai/llm_providers/ollama.py +362 -0
  7. ebk/ai/metadata_enrichment.py +396 -0
  8. ebk/ai/question_generator.py +328 -0
  9. ebk/ai/reading_companion.py +224 -0
  10. ebk/ai/semantic_search.py +434 -0
  11. ebk/ai/text_extractor.py +394 -0
  12. ebk/cli.py +2828 -680
  13. ebk/config.py +260 -22
  14. ebk/db/__init__.py +37 -0
  15. ebk/db/migrations.py +180 -0
  16. ebk/db/models.py +526 -0
  17. ebk/db/session.py +144 -0
  18. ebk/decorators.py +132 -0
  19. ebk/exports/base_exporter.py +218 -0
  20. ebk/exports/html_library.py +1390 -0
  21. ebk/exports/html_utils.py +117 -0
  22. ebk/exports/hugo.py +7 -3
  23. ebk/exports/jinja_export.py +287 -0
  24. ebk/exports/multi_facet_export.py +164 -0
  25. ebk/exports/symlink_dag.py +479 -0
  26. ebk/extract_metadata.py +76 -7
  27. ebk/library_db.py +899 -0
  28. ebk/plugins/__init__.py +42 -0
  29. ebk/plugins/base.py +502 -0
  30. ebk/plugins/hooks.py +444 -0
  31. ebk/plugins/registry.py +500 -0
  32. ebk/repl/__init__.py +9 -0
  33. ebk/repl/find.py +126 -0
  34. ebk/repl/grep.py +174 -0
  35. ebk/repl/shell.py +1677 -0
  36. ebk/repl/text_utils.py +320 -0
  37. ebk/search_parser.py +413 -0
  38. ebk/server.py +1633 -0
  39. ebk/services/__init__.py +11 -0
  40. ebk/services/import_service.py +442 -0
  41. ebk/services/tag_service.py +282 -0
  42. ebk/services/text_extraction.py +317 -0
  43. ebk/similarity/__init__.py +77 -0
  44. ebk/similarity/base.py +154 -0
  45. ebk/similarity/core.py +445 -0
  46. ebk/similarity/extractors.py +168 -0
  47. ebk/similarity/metrics.py +376 -0
  48. ebk/vfs/__init__.py +101 -0
  49. ebk/vfs/base.py +301 -0
  50. ebk/vfs/library_vfs.py +124 -0
  51. ebk/vfs/nodes/__init__.py +54 -0
  52. ebk/vfs/nodes/authors.py +196 -0
  53. ebk/vfs/nodes/books.py +480 -0
  54. ebk/vfs/nodes/files.py +155 -0
  55. ebk/vfs/nodes/metadata.py +385 -0
  56. ebk/vfs/nodes/root.py +100 -0
  57. ebk/vfs/nodes/similar.py +165 -0
  58. ebk/vfs/nodes/subjects.py +184 -0
  59. ebk/vfs/nodes/tags.py +371 -0
  60. ebk/vfs/resolver.py +228 -0
  61. ebk-0.3.2.dist-info/METADATA +755 -0
  62. ebk-0.3.2.dist-info/RECORD +69 -0
  63. {ebk-0.1.0.dist-info → ebk-0.3.2.dist-info}/WHEEL +1 -1
  64. ebk-0.3.2.dist-info/licenses/LICENSE +21 -0
  65. ebk/imports/__init__.py +0 -0
  66. ebk/imports/calibre.py +0 -144
  67. ebk/imports/ebooks.py +0 -116
  68. ebk/llm.py +0 -58
  69. ebk/manager.py +0 -44
  70. ebk/merge.py +0 -308
  71. ebk/streamlit/__init__.py +0 -0
  72. ebk/streamlit/__pycache__/__init__.cpython-310.pyc +0 -0
  73. ebk/streamlit/__pycache__/display.cpython-310.pyc +0 -0
  74. ebk/streamlit/__pycache__/filters.cpython-310.pyc +0 -0
  75. ebk/streamlit/__pycache__/utils.cpython-310.pyc +0 -0
  76. ebk/streamlit/app.py +0 -185
  77. ebk/streamlit/display.py +0 -168
  78. ebk/streamlit/filters.py +0 -151
  79. ebk/streamlit/utils.py +0 -58
  80. ebk/utils.py +0 -311
  81. ebk-0.1.0.dist-info/METADATA +0 -457
  82. ebk-0.1.0.dist-info/RECORD +0 -29
  83. {ebk-0.1.0.dist-info → ebk-0.3.2.dist-info}/entry_points.txt +0 -0
  84. {ebk-0.1.0.dist-info → ebk-0.3.2.dist-info}/top_level.txt +0 -0
ebk/search_parser.py ADDED
@@ -0,0 +1,413 @@
1
+ """
2
+ Advanced search query parser for ebk.
3
+
4
+ Supports field-specific searches, boolean logic, and comparison operators.
5
+
6
+ Examples:
7
+ title:Python rating:>=4 format:pdf
8
+ author:"Donald Knuth" series:TAOCP
9
+ tag:programming favorite:true NOT java
10
+ "machine learning" OR "deep learning"
11
+ """
12
+
13
+ import re
14
+ from typing import List, Dict, Any, Optional, Tuple
15
+ from dataclasses import dataclass, field
16
+
17
+
18
+ @dataclass
19
+ class SearchToken:
20
+ """Represents a single search token."""
21
+ type: str # 'field', 'text', 'operator', 'phrase'
22
+ value: str
23
+ field: Optional[str] = None
24
+ operator: Optional[str] = None # For comparisons: '=', '>', '>=', '<', '<=', '-' (range)
25
+ negated: bool = False
26
+
27
+
28
+ @dataclass
29
+ class ParsedQuery:
30
+ """Parsed search query with structured tokens."""
31
+ tokens: List[SearchToken] = field(default_factory=list)
32
+ fts_query: Optional[str] = None # Combined FTS query for title/description/text
33
+ filters: Dict[str, Any] = field(default_factory=dict) # Exact filters (language, format, etc.)
34
+
35
+ def has_fts_terms(self) -> bool:
36
+ """Check if query has full-text search terms."""
37
+ return bool(self.fts_query)
38
+
39
+ def has_filters(self) -> bool:
40
+ """Check if query has filter conditions."""
41
+ return bool(self.filters)
42
+
43
+
44
+ class SearchQueryParser:
45
+ """
46
+ Parser for advanced search queries with field specifiers and boolean logic.
47
+
48
+ Syntax:
49
+ - Field searches: field:value (e.g., title:Python, author:Knuth)
50
+ - Phrases: "quoted text" (e.g., "machine learning")
51
+ - Boolean: AND (implicit), OR (explicit), NOT/-prefix (negation)
52
+ - Comparisons: rating:>=4, rating:3-5
53
+ - Multiple fields: title:python format:pdf (implicit AND)
54
+
55
+ Field mappings:
56
+ - title: Book title
57
+ - author: Author names
58
+ - tag/subject: Subjects/tags
59
+ - description: Book description
60
+ - series: Series name
61
+ - publisher: Publisher name
62
+ - language: Language code (exact match)
63
+ - format: File format (exact match)
64
+ - rating: Personal rating (numeric comparison)
65
+ - favorite: Favorite status (boolean)
66
+ - status: Reading status (exact match)
67
+ """
68
+
69
+ # Field aliases
70
+ FIELD_ALIASES = {
71
+ 'tag': 'subject',
72
+ 'tags': 'subject',
73
+ 'subjects': 'subject',
74
+ 'lang': 'language',
75
+ 'fmt': 'format',
76
+ 'type': 'format',
77
+ }
78
+
79
+ # Fields that support FTS (full-text search)
80
+ FTS_FIELDS = {'title', 'description', 'text', 'author', 'subject'}
81
+
82
+ # Fields that are exact filters (not FTS)
83
+ FILTER_FIELDS = {'language', 'format', 'series', 'publisher', 'rating', 'favorite', 'status'}
84
+
85
+ # Numeric fields that support comparison operators
86
+ NUMERIC_FIELDS = {'rating'}
87
+
88
+ # Boolean fields
89
+ BOOLEAN_FIELDS = {'favorite'}
90
+
91
+ def __init__(self):
92
+ # Regex patterns
93
+ self.field_pattern = re.compile(r'(\w+):(>=|<=|>|<|=)?("[^"]+"|[\S]+)')
94
+ self.phrase_pattern = re.compile(r'"([^"]+)"')
95
+ self.operator_pattern = re.compile(r'\b(AND|OR|NOT)\b', re.IGNORECASE)
96
+
97
+ def parse(self, query: str) -> ParsedQuery:
98
+ """
99
+ Parse search query into structured format.
100
+
101
+ Args:
102
+ query: Search query string
103
+
104
+ Returns:
105
+ ParsedQuery with tokens, FTS query, and filters
106
+ """
107
+ if not query or not query.strip():
108
+ return ParsedQuery()
109
+
110
+ query = query.strip()
111
+ tokens = []
112
+ remaining_text = []
113
+ pos = 0
114
+
115
+ # Track OR groups for FTS
116
+ or_groups = []
117
+ current_or_group = []
118
+
119
+ while pos < len(query):
120
+ # Skip whitespace
121
+ if query[pos].isspace():
122
+ pos += 1
123
+ continue
124
+
125
+ # Check for NOT operator or -prefix
126
+ negated = False
127
+ if query[pos:pos+4].upper() == 'NOT ' or query[pos] == '-':
128
+ negated = True
129
+ if query[pos] == '-':
130
+ pos += 1
131
+ else:
132
+ pos += 4
133
+ while pos < len(query) and query[pos].isspace():
134
+ pos += 1
135
+
136
+ # Check for OR operator
137
+ if query[pos:pos+3].upper() == 'OR ':
138
+ tokens.append(SearchToken(type='operator', value='OR'))
139
+ pos += 3
140
+ continue
141
+
142
+ # Check for AND operator (usually implicit, but can be explicit)
143
+ if query[pos:pos+4].upper() == 'AND ':
144
+ tokens.append(SearchToken(type='operator', value='AND'))
145
+ pos += 4
146
+ continue
147
+
148
+ # Try to match field:value
149
+ field_match = self.field_pattern.match(query, pos)
150
+ if field_match:
151
+ field_name = field_match.group(1).lower()
152
+ operator = field_match.group(2) or '='
153
+ value = field_match.group(3).strip('"')
154
+
155
+ # Apply field aliases
156
+ field_name = self.FIELD_ALIASES.get(field_name, field_name)
157
+
158
+ tokens.append(SearchToken(
159
+ type='field',
160
+ field=field_name,
161
+ value=value,
162
+ operator=operator,
163
+ negated=negated
164
+ ))
165
+ pos = field_match.end()
166
+ continue
167
+
168
+ # Try to match quoted phrase
169
+ phrase_match = self.phrase_pattern.match(query, pos)
170
+ if phrase_match:
171
+ phrase = phrase_match.group(1)
172
+ tokens.append(SearchToken(
173
+ type='phrase',
174
+ value=phrase,
175
+ negated=negated
176
+ ))
177
+ pos = phrase_match.end()
178
+ continue
179
+
180
+ # Match single word
181
+ end_pos = pos
182
+ while end_pos < len(query) and not query[end_pos].isspace():
183
+ end_pos += 1
184
+
185
+ if end_pos > pos:
186
+ word = query[pos:end_pos]
187
+ tokens.append(SearchToken(
188
+ type='text',
189
+ value=word,
190
+ negated=negated
191
+ ))
192
+ pos = end_pos
193
+ continue
194
+
195
+ pos += 1
196
+
197
+ # Build ParsedQuery from tokens
198
+ parsed = ParsedQuery(tokens=tokens)
199
+ self._build_fts_and_filters(parsed)
200
+
201
+ return parsed
202
+
203
+ def _build_fts_and_filters(self, parsed: ParsedQuery):
204
+ """
205
+ Build FTS query and filters from parsed tokens.
206
+
207
+ Modifies parsed query in place.
208
+ """
209
+ fts_parts = [] # Parts for FTS5 query
210
+ filters = {}
211
+
212
+ i = 0
213
+ while i < len(parsed.tokens):
214
+ token = parsed.tokens[i]
215
+
216
+ if token.type == 'operator':
217
+ # Add OR operator to FTS query
218
+ if token.value == 'OR' and fts_parts:
219
+ fts_parts.append('OR')
220
+ i += 1
221
+ continue
222
+
223
+ if token.type == 'field':
224
+ field = token.field
225
+ value = token.value
226
+ operator = token.operator
227
+
228
+ # Handle FTS fields
229
+ if field in self.FTS_FIELDS:
230
+ # Build FTS query with field prefix
231
+ if field == 'subject':
232
+ # Subjects are handled separately (join table)
233
+ if 'subjects' not in filters:
234
+ filters['subjects'] = []
235
+ filters['subjects'].append((value, token.negated))
236
+ elif field == 'author':
237
+ # Authors are not in FTS table, handle via SQL join
238
+ if 'authors' not in filters:
239
+ filters['authors'] = []
240
+ filters['authors'].append((value, token.negated))
241
+ else:
242
+ # title, description, text - these ARE in FTS table
243
+ # Map 'text' to 'extracted_text' column name
244
+ fts_column = 'extracted_text' if field == 'text' else field
245
+
246
+ # Build FTS5 column-specific query
247
+ fts_term = f"{fts_column}:{value}"
248
+ if token.negated:
249
+ fts_term = f"NOT {fts_term}"
250
+ fts_parts.append(fts_term)
251
+
252
+ # Handle exact filter fields
253
+ elif field in self.FILTER_FIELDS:
254
+ if field in self.NUMERIC_FIELDS:
255
+ # Parse numeric comparison
256
+ filters[field] = self._parse_numeric_filter(value, operator)
257
+ elif field in self.BOOLEAN_FIELDS:
258
+ # Parse boolean
259
+ filters[field] = value.lower() in ('true', 'yes', '1')
260
+ else:
261
+ # Exact match
262
+ filters[field] = value
263
+
264
+ elif token.type in ('text', 'phrase'):
265
+ # Add to FTS query
266
+ value = token.value
267
+ if ' ' in value or token.type == 'phrase':
268
+ # Quoted phrase for FTS5
269
+ value = f'"{value}"'
270
+ if token.negated:
271
+ value = f"NOT {value}"
272
+ fts_parts.append(value)
273
+
274
+ i += 1
275
+
276
+ # Build final FTS query
277
+ if fts_parts:
278
+ parsed.fts_query = ' '.join(fts_parts)
279
+
280
+ parsed.filters = filters
281
+
282
+ def _parse_numeric_filter(self, value: str, operator: str) -> Dict[str, Any]:
283
+ """
284
+ Parse numeric filter with comparison operator.
285
+
286
+ Examples:
287
+ rating:5 -> {'=': 5}
288
+ rating:>=4 -> {'>=': 4}
289
+ rating:3-5 -> {'>=': 3, '<=': 5}
290
+ """
291
+ # Check for range (e.g., 3-5)
292
+ if '-' in value and operator == '=':
293
+ parts = value.split('-')
294
+ if len(parts) == 2:
295
+ try:
296
+ min_val = float(parts[0].strip())
297
+ max_val = float(parts[1].strip())
298
+ return {'>=': min_val, '<=': max_val}
299
+ except ValueError:
300
+ pass
301
+
302
+ # Single value with operator
303
+ try:
304
+ num_val = float(value)
305
+ return {operator: num_val}
306
+ except ValueError:
307
+ return {}
308
+
309
+ def to_sql_conditions(self, parsed: ParsedQuery) -> Tuple[str, Dict[str, Any]]:
310
+ """
311
+ Convert parsed query to SQL WHERE conditions.
312
+
313
+ Returns:
314
+ Tuple of (where_clause, params_dict)
315
+
316
+ This is used by Library.search() to build the final SQL query.
317
+ """
318
+ conditions = []
319
+ params = {}
320
+
321
+ # Handle filters
322
+ for field, value in parsed.filters.items():
323
+ if field == 'subjects':
324
+ # Handle subject filtering (many-to-many)
325
+ for i, (subject, negated) in enumerate(value):
326
+ param_name = f'subject_{i}'
327
+ if negated:
328
+ conditions.append(
329
+ f"NOT EXISTS (SELECT 1 FROM book_subjects bs "
330
+ f"JOIN subjects s ON bs.subject_id = s.id "
331
+ f"WHERE bs.book_id = books.id AND s.name LIKE :{param_name})"
332
+ )
333
+ else:
334
+ conditions.append(
335
+ f"EXISTS (SELECT 1 FROM book_subjects bs "
336
+ f"JOIN subjects s ON bs.subject_id = s.id "
337
+ f"WHERE bs.book_id = books.id AND s.name LIKE :{param_name})"
338
+ )
339
+ params[param_name] = f"%{subject}%"
340
+
341
+ elif field == 'authors':
342
+ # Handle author filtering (many-to-many)
343
+ for i, (author, negated) in enumerate(value):
344
+ param_name = f'author_{i}'
345
+ if negated:
346
+ conditions.append(
347
+ f"NOT EXISTS (SELECT 1 FROM book_authors ba "
348
+ f"JOIN authors a ON ba.author_id = a.id "
349
+ f"WHERE ba.book_id = books.id AND a.name LIKE :{param_name})"
350
+ )
351
+ else:
352
+ conditions.append(
353
+ f"EXISTS (SELECT 1 FROM book_authors ba "
354
+ f"JOIN authors a ON ba.author_id = a.id "
355
+ f"WHERE ba.book_id = books.id AND a.name LIKE :{param_name})"
356
+ )
357
+ params[param_name] = f"%{author}%"
358
+
359
+ elif field == 'rating':
360
+ # Numeric comparison via personal_metadata
361
+ for op, val in value.items():
362
+ param_name = f'rating_{op.replace("<", "lt").replace(">", "gt").replace("=", "eq")}'
363
+ conditions.append(
364
+ f"EXISTS (SELECT 1 FROM personal_metadata pm "
365
+ f"WHERE pm.book_id = books.id AND pm.rating {op} :{param_name})"
366
+ )
367
+ params[param_name] = val
368
+
369
+ elif field == 'favorite':
370
+ # Boolean via personal_metadata
371
+ conditions.append(
372
+ f"EXISTS (SELECT 1 FROM personal_metadata pm "
373
+ f"WHERE pm.book_id = books.id AND pm.favorite = :favorite)"
374
+ )
375
+ params['favorite'] = value
376
+
377
+ elif field == 'status':
378
+ # Reading status via personal_metadata
379
+ conditions.append(
380
+ f"EXISTS (SELECT 1 FROM personal_metadata pm "
381
+ f"WHERE pm.book_id = books.id AND pm.reading_status = :status)"
382
+ )
383
+ params['status'] = value
384
+
385
+ elif field == 'format':
386
+ # File format
387
+ conditions.append(
388
+ f"EXISTS (SELECT 1 FROM files f "
389
+ f"WHERE f.book_id = books.id AND LOWER(f.format) = :format)"
390
+ )
391
+ params['format'] = value.lower()
392
+
393
+ elif field == 'language':
394
+ conditions.append("books.language = :language")
395
+ params['language'] = value
396
+
397
+ elif field == 'series':
398
+ conditions.append("books.series LIKE :series")
399
+ params['series'] = f"%{value}%"
400
+
401
+ elif field == 'publisher':
402
+ conditions.append("books.publisher LIKE :publisher")
403
+ params['publisher'] = f"%{value}%"
404
+
405
+ where_clause = ' AND '.join(conditions) if conditions else ''
406
+ return where_clause, params
407
+
408
+
409
+ # Convenience function for parsing queries
410
+ def parse_search_query(query: str) -> ParsedQuery:
411
+ """Parse a search query string."""
412
+ parser = SearchQueryParser()
413
+ return parser.parse(query)