ebk 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ebk might be problematic. Click here for more details.
- ebk/ai/__init__.py +23 -0
- ebk/ai/knowledge_graph.py +443 -0
- ebk/ai/llm_providers/__init__.py +21 -0
- ebk/ai/llm_providers/base.py +230 -0
- ebk/ai/llm_providers/ollama.py +362 -0
- ebk/ai/metadata_enrichment.py +396 -0
- ebk/ai/question_generator.py +328 -0
- ebk/ai/reading_companion.py +224 -0
- ebk/ai/semantic_search.py +434 -0
- ebk/ai/text_extractor.py +394 -0
- ebk/cli.py +1097 -9
- ebk/db/__init__.py +37 -0
- ebk/db/migrations.py +180 -0
- ebk/db/models.py +526 -0
- ebk/db/session.py +144 -0
- ebk/exports/__init__.py +0 -0
- ebk/exports/base_exporter.py +218 -0
- ebk/exports/html_library.py +1390 -0
- ebk/exports/html_utils.py +117 -0
- ebk/exports/hugo.py +59 -0
- ebk/exports/jinja_export.py +287 -0
- ebk/exports/multi_facet_export.py +164 -0
- ebk/exports/symlink_dag.py +479 -0
- ebk/exports/zip.py +25 -0
- ebk/library_db.py +155 -0
- ebk/repl/__init__.py +9 -0
- ebk/repl/find.py +126 -0
- ebk/repl/grep.py +174 -0
- ebk/repl/shell.py +1677 -0
- ebk/repl/text_utils.py +320 -0
- ebk/services/__init__.py +11 -0
- ebk/services/import_service.py +442 -0
- ebk/services/tag_service.py +282 -0
- ebk/services/text_extraction.py +317 -0
- ebk/similarity/__init__.py +77 -0
- ebk/similarity/base.py +154 -0
- ebk/similarity/core.py +445 -0
- ebk/similarity/extractors.py +168 -0
- ebk/similarity/metrics.py +376 -0
- ebk/vfs/__init__.py +101 -0
- ebk/vfs/base.py +301 -0
- ebk/vfs/library_vfs.py +124 -0
- ebk/vfs/nodes/__init__.py +54 -0
- ebk/vfs/nodes/authors.py +196 -0
- ebk/vfs/nodes/books.py +480 -0
- ebk/vfs/nodes/files.py +155 -0
- ebk/vfs/nodes/metadata.py +385 -0
- ebk/vfs/nodes/root.py +100 -0
- ebk/vfs/nodes/similar.py +165 -0
- ebk/vfs/nodes/subjects.py +184 -0
- ebk/vfs/nodes/tags.py +371 -0
- ebk/vfs/resolver.py +228 -0
- {ebk-0.3.1.dist-info → ebk-0.3.2.dist-info}/METADATA +1 -1
- ebk-0.3.2.dist-info/RECORD +69 -0
- ebk-0.3.2.dist-info/entry_points.txt +2 -0
- ebk-0.3.2.dist-info/top_level.txt +1 -0
- ebk-0.3.1.dist-info/RECORD +0 -19
- ebk-0.3.1.dist-info/entry_points.txt +0 -6
- ebk-0.3.1.dist-info/top_level.txt +0 -2
- {ebk-0.3.1.dist-info → ebk-0.3.2.dist-info}/WHEEL +0 -0
- {ebk-0.3.1.dist-info → ebk-0.3.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,479 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Export library as a navigable directory structure using symlinks to represent tag hierarchies.
|
|
3
|
+
|
|
4
|
+
This module creates a filesystem view of the library where:
|
|
5
|
+
- Tags are represented as directories in a hierarchy
|
|
6
|
+
- Books appear in all relevant tag directories via symlinks
|
|
7
|
+
- The DAG structure of tags is preserved through the directory tree
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
import json
|
|
12
|
+
import shutil
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
from typing import Dict, List, Set, Optional, Tuple
|
|
15
|
+
import re
|
|
16
|
+
from collections import defaultdict
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class SymlinkDAGExporter:
|
|
20
|
+
"""Creates a navigable directory structure using symlinks to represent tag hierarchies."""
|
|
21
|
+
|
|
22
|
+
def __init__(self):
|
|
23
|
+
self.tag_separator = "/" # Separator for hierarchical tags
|
|
24
|
+
self.books_dir_name = "_books" # Directory to store actual book files
|
|
25
|
+
|
|
26
|
+
def export(self, lib_dir: str, output_dir: str,
|
|
27
|
+
tag_field: str = "subjects",
|
|
28
|
+
include_files: bool = False, # Changed default to False
|
|
29
|
+
create_index: bool = True,
|
|
30
|
+
flatten: bool = False,
|
|
31
|
+
min_books: int = 0):
|
|
32
|
+
"""
|
|
33
|
+
Export library as symlink-based directory structure.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
lib_dir: Path to the ebk library
|
|
37
|
+
output_dir: Output directory for the symlink structure
|
|
38
|
+
tag_field: Field to use for tags (default: "subjects")
|
|
39
|
+
include_files: Whether to copy actual ebook files (default: False)
|
|
40
|
+
create_index: Whether to create index.html files in directories
|
|
41
|
+
flatten: Whether to create direct symlinks to files instead of _books structure
|
|
42
|
+
min_books: Minimum books per tag folder; smaller folders go to _misc (default: 0)
|
|
43
|
+
"""
|
|
44
|
+
lib_path = Path(lib_dir)
|
|
45
|
+
output_path = Path(output_dir)
|
|
46
|
+
|
|
47
|
+
# Load metadata
|
|
48
|
+
metadata_file = lib_path / "metadata.json"
|
|
49
|
+
with open(metadata_file, "r") as f:
|
|
50
|
+
entries = json.load(f)
|
|
51
|
+
|
|
52
|
+
# Create output directory
|
|
53
|
+
output_path.mkdir(parents=True, exist_ok=True)
|
|
54
|
+
|
|
55
|
+
# Create books directory for actual files (unless flattening)
|
|
56
|
+
if not flatten:
|
|
57
|
+
books_path = output_path / self.books_dir_name
|
|
58
|
+
books_path.mkdir(exist_ok=True)
|
|
59
|
+
|
|
60
|
+
# Process each entry
|
|
61
|
+
entry_paths = {} # Map entry ID to its path in _books
|
|
62
|
+
tag_entries = defaultdict(list) # Map tag to list of entries
|
|
63
|
+
|
|
64
|
+
for i, entry in enumerate(entries):
|
|
65
|
+
entry_id = entry.get("unique_id", f"entry_{i}")
|
|
66
|
+
|
|
67
|
+
if not flatten:
|
|
68
|
+
# Create entry directory in _books
|
|
69
|
+
entry_dir = books_path / self._sanitize_filename(entry_id)
|
|
70
|
+
entry_dir.mkdir(exist_ok=True)
|
|
71
|
+
entry_paths[entry_id] = entry_dir
|
|
72
|
+
|
|
73
|
+
# Save metadata
|
|
74
|
+
with open(entry_dir / "metadata.json", "w") as f:
|
|
75
|
+
json.dump(entry, f, indent=2)
|
|
76
|
+
|
|
77
|
+
# Handle files - either copy or symlink
|
|
78
|
+
if include_files:
|
|
79
|
+
self._copy_entry_files(entry, lib_path, entry_dir)
|
|
80
|
+
else:
|
|
81
|
+
# Create symlinks to original files
|
|
82
|
+
self._symlink_entry_files(entry, lib_path, entry_dir)
|
|
83
|
+
else:
|
|
84
|
+
# For flatten mode, store original file paths
|
|
85
|
+
entry_paths[entry_id] = entry.get("file_paths", [])
|
|
86
|
+
|
|
87
|
+
# Create a readable symlink name
|
|
88
|
+
title = entry.get("title", "Unknown Title")
|
|
89
|
+
creators = entry.get("creators", [])
|
|
90
|
+
if creators:
|
|
91
|
+
readable_name = f"{self._sanitize_filename(title)} - {self._sanitize_filename(creators[0])}"
|
|
92
|
+
else:
|
|
93
|
+
readable_name = self._sanitize_filename(title)
|
|
94
|
+
|
|
95
|
+
# Store readable name for later use
|
|
96
|
+
entry["_readable_name"] = readable_name
|
|
97
|
+
entry["_entry_id"] = entry_id
|
|
98
|
+
|
|
99
|
+
# Extract tags and build hierarchy
|
|
100
|
+
tags = entry.get(tag_field, [])
|
|
101
|
+
if isinstance(tags, str):
|
|
102
|
+
tags = [tags]
|
|
103
|
+
|
|
104
|
+
for tag in tags:
|
|
105
|
+
# Add to this tag and all parent tags
|
|
106
|
+
tag_parts = tag.split(self.tag_separator)
|
|
107
|
+
for i in range(len(tag_parts)):
|
|
108
|
+
parent_tag = self.tag_separator.join(tag_parts[:i+1])
|
|
109
|
+
tag_entries[parent_tag].append(entry)
|
|
110
|
+
|
|
111
|
+
# Consolidate small tag folders if min_books is set
|
|
112
|
+
if min_books > 0:
|
|
113
|
+
tag_entries = self._consolidate_small_tags(tag_entries, min_books)
|
|
114
|
+
|
|
115
|
+
# Create tag directory structure with symlinks
|
|
116
|
+
self._create_tag_structure(output_path, tag_entries, entry_paths, flatten, lib_path)
|
|
117
|
+
|
|
118
|
+
# Create root index if requested
|
|
119
|
+
if create_index:
|
|
120
|
+
self._create_index_files(output_path, tag_entries, entries)
|
|
121
|
+
|
|
122
|
+
# Create a README
|
|
123
|
+
self._create_readme(output_path, len(entries), len(tag_entries))
|
|
124
|
+
|
|
125
|
+
def _consolidate_small_tags(self, tag_entries: Dict[str, List[Dict]],
|
|
126
|
+
min_books: int) -> Dict[str, List[Dict]]:
|
|
127
|
+
"""Consolidate tags with fewer than min_books into a _misc folder."""
|
|
128
|
+
consolidated = defaultdict(list)
|
|
129
|
+
misc_entries = []
|
|
130
|
+
|
|
131
|
+
for tag, entries in tag_entries.items():
|
|
132
|
+
# Get unique entries for this tag
|
|
133
|
+
seen_ids = set()
|
|
134
|
+
unique_entries = []
|
|
135
|
+
for entry in entries:
|
|
136
|
+
entry_id = entry.get("_entry_id", entry.get("unique_id"))
|
|
137
|
+
if entry_id not in seen_ids:
|
|
138
|
+
seen_ids.add(entry_id)
|
|
139
|
+
unique_entries.append(entry)
|
|
140
|
+
|
|
141
|
+
# Check if this tag has enough unique books
|
|
142
|
+
if len(unique_entries) < min_books:
|
|
143
|
+
# Check if it's a leaf tag (no children with enough books)
|
|
144
|
+
tag_prefix = tag + self.tag_separator
|
|
145
|
+
has_large_children = any(
|
|
146
|
+
other_tag.startswith(tag_prefix) and
|
|
147
|
+
len(set(e.get("_entry_id", e.get("unique_id")) for e in tag_entries[other_tag])) >= min_books
|
|
148
|
+
for other_tag in tag_entries.keys()
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
if not has_large_children:
|
|
152
|
+
# Add to misc folder with tag prefix
|
|
153
|
+
for entry in unique_entries:
|
|
154
|
+
misc_entry = entry.copy()
|
|
155
|
+
# Store original tag for display in misc folder
|
|
156
|
+
misc_entry["_original_tag"] = tag
|
|
157
|
+
misc_entries.append(misc_entry)
|
|
158
|
+
else:
|
|
159
|
+
# Keep it as is because it has large children
|
|
160
|
+
consolidated[tag] = entries
|
|
161
|
+
else:
|
|
162
|
+
# Keep tags with enough books
|
|
163
|
+
consolidated[tag] = entries
|
|
164
|
+
|
|
165
|
+
# Add misc entries if any
|
|
166
|
+
if misc_entries:
|
|
167
|
+
consolidated["_misc"] = misc_entries
|
|
168
|
+
|
|
169
|
+
return dict(consolidated)
|
|
170
|
+
|
|
171
|
+
def _sanitize_filename(self, name: str) -> str:
|
|
172
|
+
"""Sanitize a string to be safe as a filename."""
|
|
173
|
+
# Replace problematic characters
|
|
174
|
+
name = re.sub(r'[<>:"/\\|?*]', '-', str(name))
|
|
175
|
+
# Remove leading/trailing spaces and dots
|
|
176
|
+
name = name.strip('. ')
|
|
177
|
+
# Limit length (being more conservative)
|
|
178
|
+
if len(name) > 150:
|
|
179
|
+
name = name[:147] + "..."
|
|
180
|
+
return name or "unnamed"
|
|
181
|
+
|
|
182
|
+
def _copy_entry_files(self, entry: Dict, lib_path: Path, entry_dir: Path):
|
|
183
|
+
"""Copy ebook and cover files for an entry."""
|
|
184
|
+
# Copy ebook files
|
|
185
|
+
for file_path in entry.get("file_paths", []):
|
|
186
|
+
src_file = lib_path / file_path
|
|
187
|
+
if src_file.exists():
|
|
188
|
+
dest_file = entry_dir / src_file.name
|
|
189
|
+
shutil.copy2(src_file, dest_file)
|
|
190
|
+
|
|
191
|
+
# Copy cover file
|
|
192
|
+
cover_path = entry.get("cover_path")
|
|
193
|
+
if cover_path:
|
|
194
|
+
src_cover = lib_path / cover_path
|
|
195
|
+
if src_cover.exists():
|
|
196
|
+
dest_cover = entry_dir / src_cover.name
|
|
197
|
+
shutil.copy2(src_cover, dest_cover)
|
|
198
|
+
|
|
199
|
+
def _symlink_entry_files(self, entry: Dict, lib_path: Path, entry_dir: Path):
|
|
200
|
+
"""Create symlinks to ebook and cover files for an entry."""
|
|
201
|
+
# Symlink ebook files
|
|
202
|
+
for file_path in entry.get("file_paths", []):
|
|
203
|
+
src_file = lib_path / file_path
|
|
204
|
+
if src_file.exists():
|
|
205
|
+
# Get absolute path of source file
|
|
206
|
+
abs_src = src_file.resolve()
|
|
207
|
+
dest_link = entry_dir / src_file.name
|
|
208
|
+
|
|
209
|
+
# Remove existing symlink if it exists
|
|
210
|
+
if dest_link.exists() or dest_link.is_symlink():
|
|
211
|
+
dest_link.unlink()
|
|
212
|
+
|
|
213
|
+
try:
|
|
214
|
+
# Create symlink using absolute path
|
|
215
|
+
dest_link.symlink_to(abs_src)
|
|
216
|
+
except OSError as e:
|
|
217
|
+
print(f"Warning: Could not create symlink for '{file_path}': {e}")
|
|
218
|
+
|
|
219
|
+
# Symlink cover file
|
|
220
|
+
cover_path = entry.get("cover_path")
|
|
221
|
+
if cover_path:
|
|
222
|
+
src_cover = lib_path / cover_path
|
|
223
|
+
if src_cover.exists():
|
|
224
|
+
# Get absolute path of source cover
|
|
225
|
+
abs_cover = src_cover.resolve()
|
|
226
|
+
dest_link = entry_dir / src_cover.name
|
|
227
|
+
|
|
228
|
+
if dest_link.exists() or dest_link.is_symlink():
|
|
229
|
+
dest_link.unlink()
|
|
230
|
+
|
|
231
|
+
try:
|
|
232
|
+
# Create symlink using absolute path
|
|
233
|
+
dest_link.symlink_to(abs_cover)
|
|
234
|
+
except OSError as e:
|
|
235
|
+
print(f"Warning: Could not create symlink for cover '{cover_path}': {e}")
|
|
236
|
+
|
|
237
|
+
def _create_tag_structure(self, output_path: Path,
|
|
238
|
+
tag_entries: Dict[str, List[Dict]],
|
|
239
|
+
entry_paths: Dict[str, Path],
|
|
240
|
+
flatten: bool = False,
|
|
241
|
+
lib_path: Path = None):
|
|
242
|
+
"""Create the hierarchical tag directory structure with symlinks."""
|
|
243
|
+
# Sort tags to ensure parents are created before children
|
|
244
|
+
sorted_tags = sorted(tag_entries.keys())
|
|
245
|
+
|
|
246
|
+
for tag in sorted_tags:
|
|
247
|
+
# Create tag directory path
|
|
248
|
+
tag_parts = tag.split(self.tag_separator)
|
|
249
|
+
tag_dir = output_path
|
|
250
|
+
for part in tag_parts:
|
|
251
|
+
tag_dir = tag_dir / self._sanitize_filename(part)
|
|
252
|
+
tag_dir.mkdir(parents=True, exist_ok=True)
|
|
253
|
+
|
|
254
|
+
# Get unique entries for this tag (avoid duplicates)
|
|
255
|
+
seen_ids = set()
|
|
256
|
+
unique_entries = []
|
|
257
|
+
for entry in tag_entries[tag]:
|
|
258
|
+
entry_id = entry["_entry_id"]
|
|
259
|
+
if entry_id not in seen_ids:
|
|
260
|
+
seen_ids.add(entry_id)
|
|
261
|
+
unique_entries.append(entry)
|
|
262
|
+
|
|
263
|
+
# Create symlinks to entries
|
|
264
|
+
for entry in unique_entries:
|
|
265
|
+
entry_id = entry["_entry_id"]
|
|
266
|
+
readable_name = entry["_readable_name"]
|
|
267
|
+
|
|
268
|
+
# For _misc folder, include original tag in the name
|
|
269
|
+
if tag == "_misc" and "_original_tag" in entry:
|
|
270
|
+
original_tag = entry["_original_tag"]
|
|
271
|
+
# Shorten the tag to avoid filesystem limits
|
|
272
|
+
tag_parts = original_tag.split(self.tag_separator)
|
|
273
|
+
if len(tag_parts) > 2:
|
|
274
|
+
# Use only the last two parts of hierarchical tags
|
|
275
|
+
short_tag = self.tag_separator.join(tag_parts[-2:])
|
|
276
|
+
else:
|
|
277
|
+
short_tag = original_tag
|
|
278
|
+
|
|
279
|
+
# Further limit tag length
|
|
280
|
+
if len(short_tag) > 50:
|
|
281
|
+
short_tag = short_tag[:47] + "..."
|
|
282
|
+
|
|
283
|
+
tag_prefix = f"[{short_tag.replace(self.tag_separator, '-')}] "
|
|
284
|
+
|
|
285
|
+
# Ensure the total name isn't too long
|
|
286
|
+
max_name_length = 200 # Safe limit for most filesystems
|
|
287
|
+
if len(tag_prefix + readable_name) > max_name_length:
|
|
288
|
+
# Truncate the readable name to fit
|
|
289
|
+
available_length = max_name_length - len(tag_prefix) - 3
|
|
290
|
+
readable_name = readable_name[:available_length] + "..."
|
|
291
|
+
|
|
292
|
+
if not flatten:
|
|
293
|
+
# Path to actual entry in _books
|
|
294
|
+
target_path = Path(*[".."] * len(tag_parts)) / self.books_dir_name / self._sanitize_filename(entry_id)
|
|
295
|
+
# Create symlink
|
|
296
|
+
symlink_path = tag_dir / readable_name
|
|
297
|
+
else:
|
|
298
|
+
# For flatten mode, create direct symlinks to original files
|
|
299
|
+
file_paths = entry_paths.get(entry_id, [])
|
|
300
|
+
if file_paths:
|
|
301
|
+
# Use the first file path (usually the main ebook file)
|
|
302
|
+
original_file = file_paths[0]
|
|
303
|
+
# Get absolute path to the original file
|
|
304
|
+
abs_file_path = (lib_path / original_file).resolve()
|
|
305
|
+
# Use original filename as symlink name
|
|
306
|
+
symlink_path = tag_dir / Path(original_file).name
|
|
307
|
+
target_path = abs_file_path
|
|
308
|
+
else:
|
|
309
|
+
continue # Skip if no files
|
|
310
|
+
|
|
311
|
+
# Remove existing symlink if it exists
|
|
312
|
+
if symlink_path.exists() or symlink_path.is_symlink():
|
|
313
|
+
symlink_path.unlink()
|
|
314
|
+
|
|
315
|
+
# Create relative symlink
|
|
316
|
+
try:
|
|
317
|
+
symlink_path.symlink_to(target_path)
|
|
318
|
+
except OSError as e:
|
|
319
|
+
# On Windows, creating symlinks might require admin privileges
|
|
320
|
+
print(f"Warning: Could not create symlink for '{readable_name}': {e}")
|
|
321
|
+
|
|
322
|
+
def _create_index_files(self, output_path: Path,
|
|
323
|
+
tag_entries: Dict[str, List[Dict]],
|
|
324
|
+
all_entries: List[Dict]):
|
|
325
|
+
"""Create index.html files in each directory for web browsing."""
|
|
326
|
+
# Create root index with tag counts
|
|
327
|
+
root_child_tags = {}
|
|
328
|
+
for tag, entries in tag_entries.items():
|
|
329
|
+
if self.tag_separator not in tag: # Top-level tags only
|
|
330
|
+
unique_count = len(set(e.get("_entry_id", e.get("unique_id"))
|
|
331
|
+
for e in entries))
|
|
332
|
+
root_child_tags[tag] = unique_count
|
|
333
|
+
self._write_index_file(output_path, "Library Root", all_entries, root_child_tags, output_path)
|
|
334
|
+
|
|
335
|
+
# Create index for each tag directory
|
|
336
|
+
for tag, entries in tag_entries.items():
|
|
337
|
+
tag_parts = tag.split(self.tag_separator)
|
|
338
|
+
tag_dir = output_path
|
|
339
|
+
for part in tag_parts:
|
|
340
|
+
tag_dir = tag_dir / self._sanitize_filename(part)
|
|
341
|
+
|
|
342
|
+
# Get child tags with counts
|
|
343
|
+
child_tags = {}
|
|
344
|
+
tag_prefix = tag + self.tag_separator
|
|
345
|
+
for other_tag, other_entries in tag_entries.items():
|
|
346
|
+
if other_tag.startswith(tag_prefix) and other_tag != tag:
|
|
347
|
+
# Check if it's a direct child
|
|
348
|
+
remaining = other_tag[len(tag_prefix):]
|
|
349
|
+
if self.tag_separator not in remaining:
|
|
350
|
+
# Count unique entries for this tag
|
|
351
|
+
unique_count = len(set(e.get("_entry_id", e.get("unique_id"))
|
|
352
|
+
for e in other_entries))
|
|
353
|
+
child_tags[other_tag] = unique_count
|
|
354
|
+
|
|
355
|
+
# Get unique entries
|
|
356
|
+
seen_ids = set()
|
|
357
|
+
unique_entries = []
|
|
358
|
+
for entry in entries:
|
|
359
|
+
entry_id = entry.get("_entry_id", entry.get("unique_id"))
|
|
360
|
+
if entry_id not in seen_ids:
|
|
361
|
+
seen_ids.add(entry_id)
|
|
362
|
+
unique_entries.append(entry)
|
|
363
|
+
|
|
364
|
+
self._write_index_file(tag_dir, tag, unique_entries, child_tags, output_path)
|
|
365
|
+
|
|
366
|
+
def _write_index_file(self, directory: Path, title: str,
|
|
367
|
+
entries: List[Dict], child_tags: Dict[str, int], output_path: Path):
|
|
368
|
+
"""Write an index.html file for a directory using Jinja2 template."""
|
|
369
|
+
from jinja2 import Environment, FileSystemLoader
|
|
370
|
+
import json
|
|
371
|
+
import re
|
|
372
|
+
|
|
373
|
+
# Prepare entries for JSON (clean and escape)
|
|
374
|
+
clean_entries = []
|
|
375
|
+
for entry in entries:
|
|
376
|
+
clean_entry = {}
|
|
377
|
+
for key, value in entry.items():
|
|
378
|
+
if isinstance(value, str):
|
|
379
|
+
# Remove problematic HTML from descriptions
|
|
380
|
+
if key == "description":
|
|
381
|
+
# Strip HTML tags from description for JSON
|
|
382
|
+
value = re.sub(r'<[^>]+>', '', value)
|
|
383
|
+
# Limit description length
|
|
384
|
+
if len(value) > 500:
|
|
385
|
+
value = value[:500] + "..."
|
|
386
|
+
clean_entry[key] = value
|
|
387
|
+
elif isinstance(value, list):
|
|
388
|
+
clean_entry[key] = [str(v) for v in value]
|
|
389
|
+
else:
|
|
390
|
+
clean_entry[key] = str(value)
|
|
391
|
+
clean_entries.append(clean_entry)
|
|
392
|
+
|
|
393
|
+
# Convert to JSON for JavaScript
|
|
394
|
+
entries_json = json.dumps(clean_entries, ensure_ascii=True)
|
|
395
|
+
|
|
396
|
+
# Set up Jinja2 environment
|
|
397
|
+
template_dir = Path(__file__).parent / "templates"
|
|
398
|
+
env = Environment(loader=FileSystemLoader(str(template_dir)))
|
|
399
|
+
template = env.get_template("advanced_index.html")
|
|
400
|
+
|
|
401
|
+
# Calculate if we're in a subdirectory (for proper _books path)
|
|
402
|
+
is_subdir = directory != output_path
|
|
403
|
+
|
|
404
|
+
# Render template
|
|
405
|
+
html_content = template.render(
|
|
406
|
+
title=title,
|
|
407
|
+
entries=entries,
|
|
408
|
+
entries_json=entries_json,
|
|
409
|
+
child_tags=child_tags,
|
|
410
|
+
tag_separator=self.tag_separator,
|
|
411
|
+
is_subdir=is_subdir
|
|
412
|
+
)
|
|
413
|
+
|
|
414
|
+
# Write the file
|
|
415
|
+
index_path = directory / "index.html"
|
|
416
|
+
with open(index_path, "w", encoding="utf-8") as f:
|
|
417
|
+
f.write(html_content)
|
|
418
|
+
|
|
419
|
+
def _create_readme(self, output_path: Path, num_entries: int, num_tags: int):
|
|
420
|
+
"""Create a README file explaining the structure."""
|
|
421
|
+
readme_content = f"""# EBK Library - Symlink Navigation Structure
|
|
422
|
+
|
|
423
|
+
This directory contains a navigable view of your ebook library organized by tags.
|
|
424
|
+
|
|
425
|
+
## Statistics
|
|
426
|
+
- Total books: {num_entries}
|
|
427
|
+
- Total tags/categories: {num_tags}
|
|
428
|
+
|
|
429
|
+
## Structure
|
|
430
|
+
|
|
431
|
+
- **_books/**: Contains the actual ebook files and metadata
|
|
432
|
+
- **Tag directories**: Each tag becomes a directory, with hierarchical tags creating nested directories
|
|
433
|
+
- **Symlinks**: Books appear in multiple tag directories via symbolic links
|
|
434
|
+
|
|
435
|
+
## Navigation
|
|
436
|
+
|
|
437
|
+
You can navigate this structure using:
|
|
438
|
+
1. Your file explorer (Finder, Windows Explorer, etc.)
|
|
439
|
+
2. Command line tools (cd, ls, etc.)
|
|
440
|
+
3. Web browser (open index.html files)
|
|
441
|
+
|
|
442
|
+
## Hierarchical Tags
|
|
443
|
+
|
|
444
|
+
Tags like "Programming/Python/Web" create a nested structure:
|
|
445
|
+
```
|
|
446
|
+
Programming/
|
|
447
|
+
Python/
|
|
448
|
+
Web/
|
|
449
|
+
(books tagged with Programming/Python/Web)
|
|
450
|
+
(books tagged with Programming/Python)
|
|
451
|
+
(books tagged with Programming)
|
|
452
|
+
```
|
|
453
|
+
|
|
454
|
+
Books appear at each relevant level in the hierarchy.
|
|
455
|
+
|
|
456
|
+
## Notes
|
|
457
|
+
|
|
458
|
+
- This is a read-only view. Modifying files here won't affect the original library.
|
|
459
|
+
- Symlinks point to files in the _books directory.
|
|
460
|
+
- On Windows, you may need administrator privileges to create symlinks.
|
|
461
|
+
|
|
462
|
+
Generated by EBK - https://github.com/queelius/ebk
|
|
463
|
+
"""
|
|
464
|
+
|
|
465
|
+
with open(output_path / "README.md", "w") as f:
|
|
466
|
+
f.write(readme_content)
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
def export_symlink_dag(lib_dir: str, output_dir: str, **kwargs):
|
|
470
|
+
"""
|
|
471
|
+
Convenience function to export library as symlink DAG.
|
|
472
|
+
|
|
473
|
+
Args:
|
|
474
|
+
lib_dir: Path to ebk library
|
|
475
|
+
output_dir: Output directory
|
|
476
|
+
**kwargs: Additional arguments passed to SymlinkDAGExporter.export()
|
|
477
|
+
"""
|
|
478
|
+
exporter = SymlinkDAGExporter()
|
|
479
|
+
exporter.export(lib_dir, output_dir, **kwargs)
|
ebk/exports/zip.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import zipfile
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
import logging
|
|
5
|
+
|
|
6
|
+
logger = logging.getLogger(__name__)
|
|
7
|
+
|
|
8
|
+
def export_zipfile(lib_dir, zip_file):
|
|
9
|
+
"""
|
|
10
|
+
Export ebk library to a ZIP archive.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
lib_dir (str): Path to the ebk library directory to export (contains `metadata.json` and ebook-related files)
|
|
14
|
+
zip_file (str): Path to the output ZIP file
|
|
15
|
+
"""
|
|
16
|
+
lib_dir = Path(lib_dir)
|
|
17
|
+
|
|
18
|
+
# just want to take the entire directory and zip it
|
|
19
|
+
|
|
20
|
+
with zipfile.ZipFile(zip_file, "w") as z:
|
|
21
|
+
for root, _, files in os.walk(lib_dir):
|
|
22
|
+
for file in files:
|
|
23
|
+
file_path = Path(root) / file
|
|
24
|
+
logging.debug(f"Adding file to zip: {file_path}")
|
|
25
|
+
z.write(file_path, arcname=file_path.relative_to(lib_dir))
|
ebk/library_db.py
CHANGED
|
@@ -627,6 +627,113 @@ class Library:
|
|
|
627
627
|
self.session.commit()
|
|
628
628
|
logger.info(f"Deleted book: {book.title}")
|
|
629
629
|
|
|
630
|
+
def find_similar(
|
|
631
|
+
self,
|
|
632
|
+
book_id: int,
|
|
633
|
+
top_k: int = 10,
|
|
634
|
+
similarity_config: Optional[Any] = None,
|
|
635
|
+
filter_language: bool = True,
|
|
636
|
+
) -> List[Tuple[Book, float]]:
|
|
637
|
+
"""
|
|
638
|
+
Find books similar to the given book.
|
|
639
|
+
|
|
640
|
+
Uses semantic similarity based on content, metadata, etc.
|
|
641
|
+
|
|
642
|
+
Args:
|
|
643
|
+
book_id: ID of the query book
|
|
644
|
+
top_k: Number of similar books to return (default 10)
|
|
645
|
+
similarity_config: Optional BookSimilarity instance
|
|
646
|
+
(default: balanced preset)
|
|
647
|
+
filter_language: If True, only return books in same language
|
|
648
|
+
|
|
649
|
+
Returns:
|
|
650
|
+
List of (book, similarity_score) tuples, sorted by similarity
|
|
651
|
+
|
|
652
|
+
Example:
|
|
653
|
+
>>> similar = lib.find_similar(42, top_k=5)
|
|
654
|
+
>>> for book, score in similar:
|
|
655
|
+
... print(f"{book.title}: {score:.2f}")
|
|
656
|
+
"""
|
|
657
|
+
from ebk.similarity import BookSimilarity
|
|
658
|
+
|
|
659
|
+
# Get query book
|
|
660
|
+
query_book = self.get_book(book_id)
|
|
661
|
+
if not query_book:
|
|
662
|
+
logger.warning(f"Book {book_id} not found")
|
|
663
|
+
return []
|
|
664
|
+
|
|
665
|
+
# Get candidate books
|
|
666
|
+
candidates_query = self.query()
|
|
667
|
+
if filter_language and query_book.language:
|
|
668
|
+
candidates_query = candidates_query.filter_by_language(query_book.language)
|
|
669
|
+
|
|
670
|
+
candidates = candidates_query.all()
|
|
671
|
+
|
|
672
|
+
if not candidates:
|
|
673
|
+
return []
|
|
674
|
+
|
|
675
|
+
# Configure similarity
|
|
676
|
+
if similarity_config is None:
|
|
677
|
+
similarity_config = BookSimilarity().balanced()
|
|
678
|
+
|
|
679
|
+
# Fit on all candidates for performance
|
|
680
|
+
similarity_config.fit(candidates)
|
|
681
|
+
|
|
682
|
+
# Find similar books
|
|
683
|
+
results = similarity_config.find_similar(query_book, candidates, top_k=top_k)
|
|
684
|
+
|
|
685
|
+
logger.info(
|
|
686
|
+
f"Found {len(results)} similar books to '{query_book.title}'"
|
|
687
|
+
)
|
|
688
|
+
|
|
689
|
+
return results
|
|
690
|
+
|
|
691
|
+
def compute_similarity_matrix(
|
|
692
|
+
self,
|
|
693
|
+
book_ids: Optional[List[int]] = None,
|
|
694
|
+
similarity_config: Optional[Any] = None,
|
|
695
|
+
) -> Tuple[List[Book], Any]:
|
|
696
|
+
"""
|
|
697
|
+
Compute pairwise similarity matrix for books.
|
|
698
|
+
|
|
699
|
+
Args:
|
|
700
|
+
book_ids: Optional list of book IDs (default: all books)
|
|
701
|
+
similarity_config: Optional BookSimilarity instance
|
|
702
|
+
(default: balanced preset)
|
|
703
|
+
|
|
704
|
+
Returns:
|
|
705
|
+
Tuple of (books, similarity_matrix)
|
|
706
|
+
where similarity_matrix[i][j] = similarity(books[i], books[j])
|
|
707
|
+
|
|
708
|
+
Example:
|
|
709
|
+
>>> books, matrix = lib.compute_similarity_matrix()
|
|
710
|
+
>>> # matrix[0][1] is similarity between books[0] and books[1]
|
|
711
|
+
"""
|
|
712
|
+
from ebk.similarity import BookSimilarity
|
|
713
|
+
|
|
714
|
+
# Get books
|
|
715
|
+
if book_ids:
|
|
716
|
+
books = [self.get_book(book_id) for book_id in book_ids]
|
|
717
|
+
books = [b for b in books if b is not None] # Filter None
|
|
718
|
+
else:
|
|
719
|
+
books = self.query().all()
|
|
720
|
+
|
|
721
|
+
if not books:
|
|
722
|
+
logger.warning("No books found for similarity matrix")
|
|
723
|
+
return [], None
|
|
724
|
+
|
|
725
|
+
# Configure similarity
|
|
726
|
+
if similarity_config is None:
|
|
727
|
+
similarity_config = BookSimilarity().balanced()
|
|
728
|
+
|
|
729
|
+
# Fit and compute matrix
|
|
730
|
+
similarity_config.fit(books)
|
|
731
|
+
matrix = similarity_config.similarity_matrix(books)
|
|
732
|
+
|
|
733
|
+
logger.info(f"Computed {len(books)}x{len(books)} similarity matrix")
|
|
734
|
+
|
|
735
|
+
return books, matrix
|
|
736
|
+
|
|
630
737
|
|
|
631
738
|
class QueryBuilder:
|
|
632
739
|
"""Fluent query builder for books."""
|
|
@@ -667,6 +774,54 @@ class QueryBuilder:
|
|
|
667
774
|
self._query = self._query.filter(Book.publisher.ilike(f"%{publisher}%"))
|
|
668
775
|
return self
|
|
669
776
|
|
|
777
|
+
def filter_by_year(self, year: int) -> 'QueryBuilder':
|
|
778
|
+
"""Filter by publication year.
|
|
779
|
+
|
|
780
|
+
Args:
|
|
781
|
+
year: Publication year (e.g., 1975)
|
|
782
|
+
|
|
783
|
+
Returns:
|
|
784
|
+
Self for chaining
|
|
785
|
+
"""
|
|
786
|
+
# publication_date can be "YYYY", "YYYY-MM", or "YYYY-MM-DD"
|
|
787
|
+
# So we match if it starts with the year
|
|
788
|
+
year_str = str(year)
|
|
789
|
+
self._query = self._query.filter(Book.publication_date.like(f"{year_str}%"))
|
|
790
|
+
return self
|
|
791
|
+
|
|
792
|
+
def filter_by_text(self, search_text: str) -> 'QueryBuilder':
|
|
793
|
+
"""Filter by full-text search.
|
|
794
|
+
|
|
795
|
+
Uses FTS5 to search across title, description, and extracted text.
|
|
796
|
+
|
|
797
|
+
Args:
|
|
798
|
+
search_text: Text to search for
|
|
799
|
+
|
|
800
|
+
Returns:
|
|
801
|
+
Self for chaining
|
|
802
|
+
"""
|
|
803
|
+
from sqlalchemy import text as sql_text
|
|
804
|
+
|
|
805
|
+
# Query FTS5 table for matching book IDs
|
|
806
|
+
result = self.session.execute(
|
|
807
|
+
sql_text("""
|
|
808
|
+
SELECT book_id
|
|
809
|
+
FROM books_fts
|
|
810
|
+
WHERE books_fts MATCH :query
|
|
811
|
+
ORDER BY rank
|
|
812
|
+
"""),
|
|
813
|
+
{"query": search_text}
|
|
814
|
+
)
|
|
815
|
+
book_ids = [row[0] for row in result]
|
|
816
|
+
|
|
817
|
+
if book_ids:
|
|
818
|
+
self._query = self._query.filter(Book.id.in_(book_ids))
|
|
819
|
+
else:
|
|
820
|
+
# No matches - ensure query returns empty
|
|
821
|
+
self._query = self._query.filter(Book.id == -1)
|
|
822
|
+
|
|
823
|
+
return self
|
|
824
|
+
|
|
670
825
|
def filter_by_reading_status(self, status: str) -> 'QueryBuilder':
|
|
671
826
|
"""Filter by reading status."""
|
|
672
827
|
self._query = self._query.join(Book.personal).filter(
|
ebk/repl/__init__.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""REPL shell for interactive library navigation.
|
|
2
|
+
|
|
3
|
+
This module provides an interactive shell for navigating and managing
|
|
4
|
+
the ebook library through a virtual filesystem interface.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from ebk.repl.shell import LibraryShell
|
|
8
|
+
|
|
9
|
+
__all__ = ["LibraryShell"]
|