ebk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ebk might be problematic. Click here for more details.
- ebk/__init__.py +0 -0
- ebk/cli.py +879 -0
- ebk/config.py +35 -0
- ebk/exports/__init__.py +0 -0
- ebk/exports/hugo.py +55 -0
- ebk/exports/zip.py +25 -0
- ebk/extract_metadata.py +273 -0
- ebk/ident.py +96 -0
- ebk/imports/__init__.py +0 -0
- ebk/imports/calibre.py +144 -0
- ebk/imports/ebooks.py +116 -0
- ebk/llm.py +58 -0
- ebk/manager.py +44 -0
- ebk/merge.py +308 -0
- ebk/streamlit/__init__.py +0 -0
- ebk/streamlit/__pycache__/__init__.cpython-310.pyc +0 -0
- ebk/streamlit/__pycache__/display.cpython-310.pyc +0 -0
- ebk/streamlit/__pycache__/filters.cpython-310.pyc +0 -0
- ebk/streamlit/__pycache__/utils.cpython-310.pyc +0 -0
- ebk/streamlit/app.py +185 -0
- ebk/streamlit/display.py +168 -0
- ebk/streamlit/filters.py +151 -0
- ebk/streamlit/utils.py +58 -0
- ebk/utils.py +311 -0
- ebk-0.1.0.dist-info/METADATA +457 -0
- ebk-0.1.0.dist-info/RECORD +29 -0
- ebk-0.1.0.dist-info/WHEEL +5 -0
- ebk-0.1.0.dist-info/entry_points.txt +2 -0
- ebk-0.1.0.dist-info/top_level.txt +1 -0
ebk/utils.py
ADDED
|
@@ -0,0 +1,311 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from collections import Counter
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import List, Dict, Optional
|
|
6
|
+
import logging
|
|
7
|
+
from jmespath import search as jmes_search
|
|
8
|
+
import sys
|
|
9
|
+
from rich.console import Console
|
|
10
|
+
from rich.table import Table
|
|
11
|
+
from rich.markdown import Markdown
|
|
12
|
+
from rich import print
|
|
13
|
+
import re
|
|
14
|
+
|
|
15
|
+
RICH_AVAILABLE = True
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
def search_jmes(lib_dir: str, expression: str):
|
|
20
|
+
"""
|
|
21
|
+
Search entries in an ebk library using a JMESPath expression. This is a
|
|
22
|
+
very flexible way to search for entries in the library, but may have a
|
|
23
|
+
steep learning curve.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
lib_dir (str): Path to the ebk library directory
|
|
27
|
+
expression (str): Search expression (JMESPath)
|
|
28
|
+
|
|
29
|
+
Returns:
|
|
30
|
+
Any: Result of the JMESPath search
|
|
31
|
+
"""
|
|
32
|
+
library = load_library(lib_dir)
|
|
33
|
+
if not library:
|
|
34
|
+
logger.error(f"Failed to load the library at {lib_dir}")
|
|
35
|
+
return []
|
|
36
|
+
|
|
37
|
+
result = jmes_search(expression, library)
|
|
38
|
+
|
|
39
|
+
return result
|
|
40
|
+
|
|
41
|
+
def search_regex(lib_dir: str, expression: str, fields: List[str] = ["title"]):
|
|
42
|
+
|
|
43
|
+
library = load_library(lib_dir)
|
|
44
|
+
results = []
|
|
45
|
+
for entry in library:
|
|
46
|
+
for key, value in entry.items():
|
|
47
|
+
if key in fields and value:
|
|
48
|
+
if isinstance(value, str) and re.search(expression, value):
|
|
49
|
+
results.append(entry)
|
|
50
|
+
break
|
|
51
|
+
|
|
52
|
+
return results
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def load_library(lib_dir: str) -> List[Dict]:
|
|
56
|
+
"""
|
|
57
|
+
Load an ebk library from the specified directory.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
lib_dir (str): Path to the ebk library directory
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
List[Dict]: List of entries in the library
|
|
64
|
+
"""
|
|
65
|
+
lib_dir = Path(lib_dir)
|
|
66
|
+
metadata_path = lib_dir / "metadata.json"
|
|
67
|
+
if not metadata_path.exists():
|
|
68
|
+
logger.error(f"Metadata file not found at {metadata_path}")
|
|
69
|
+
return []
|
|
70
|
+
|
|
71
|
+
with open(metadata_path, "r") as f:
|
|
72
|
+
try:
|
|
73
|
+
library = json.load(f)
|
|
74
|
+
return library
|
|
75
|
+
except json.JSONDecodeError as e:
|
|
76
|
+
logger.error(f"Error decoding JSON from {metadata_path}: {e}")
|
|
77
|
+
return []
|
|
78
|
+
|
|
79
|
+
def get_library_statistics(lib_dir: str,
|
|
80
|
+
keywords: List[str] = None) -> Dict:
|
|
81
|
+
"""
|
|
82
|
+
Compute statistics for an ebk library.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
lib_dir (str): Path to the ebk library directory.
|
|
86
|
+
keywords (List[str]): Keywords to search for in titles (default: None).
|
|
87
|
+
|
|
88
|
+
Returns:
|
|
89
|
+
dict: A dictionary or markdown with statistics about the library.
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
# Load the library
|
|
93
|
+
library = load_library(lib_dir)
|
|
94
|
+
if not library:
|
|
95
|
+
logger.error(f"Failed to load the library at {lib_dir}")
|
|
96
|
+
return {}
|
|
97
|
+
|
|
98
|
+
# Initialize counters and statistics
|
|
99
|
+
stats = {
|
|
100
|
+
"total_entries": 0,
|
|
101
|
+
"languages": Counter(),
|
|
102
|
+
"creators_count": 0,
|
|
103
|
+
"average_creators_per_entry": 0,
|
|
104
|
+
"most_creators_in_entry": 0,
|
|
105
|
+
"least_creators_in_entry": 0,
|
|
106
|
+
"top_creators": Counter(),
|
|
107
|
+
"subjects": Counter(),
|
|
108
|
+
"most_common_subjects": [],
|
|
109
|
+
"average_title_length": 0,
|
|
110
|
+
"longest_title": "",
|
|
111
|
+
"shortest_title": "",
|
|
112
|
+
"virtual_libs": Counter(),
|
|
113
|
+
"titles_with_keywords": Counter(),
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
title_lengths = []
|
|
117
|
+
|
|
118
|
+
for entry in library:
|
|
119
|
+
# Total entries
|
|
120
|
+
stats["total_entries"] += 1
|
|
121
|
+
|
|
122
|
+
# Languages
|
|
123
|
+
language = entry.get("language", "unknown")
|
|
124
|
+
stats["languages"][language] += 1
|
|
125
|
+
|
|
126
|
+
# Creators
|
|
127
|
+
creators = entry.get("creators", [])
|
|
128
|
+
stats["creators_count"] += len(creators)
|
|
129
|
+
stats["top_creators"].update(creators)
|
|
130
|
+
stats["most_creators_in_entry"] = max(stats["most_creators_in_entry"], len(creators))
|
|
131
|
+
if stats["least_creators_in_entry"] == 0 or len(creators) < stats["least_creators_in_entry"]:
|
|
132
|
+
stats["least_creators_in_entry"] = len(creators)
|
|
133
|
+
|
|
134
|
+
# Subjects
|
|
135
|
+
subjects = entry.get("subjects", [])
|
|
136
|
+
stats["subjects"].update(subjects)
|
|
137
|
+
|
|
138
|
+
# Titles
|
|
139
|
+
title = entry.get("title", "")
|
|
140
|
+
if title:
|
|
141
|
+
title_lengths.append(len(title))
|
|
142
|
+
if len(title) > len(stats["longest_title"]):
|
|
143
|
+
stats["longest_title"] = title
|
|
144
|
+
if not stats["shortest_title"] or len(title) < len(stats["shortest_title"]):
|
|
145
|
+
stats["shortest_title"] = title
|
|
146
|
+
|
|
147
|
+
# Keywords
|
|
148
|
+
for keyword in keywords:
|
|
149
|
+
if keyword.lower() in title.lower():
|
|
150
|
+
stats["titles_with_keywords"][keyword] += 1
|
|
151
|
+
|
|
152
|
+
# Virtual Libraries
|
|
153
|
+
virtual_libs = entry.get("virtual_libs", [])
|
|
154
|
+
stats["virtual_libs"].update(virtual_libs)
|
|
155
|
+
|
|
156
|
+
# Post-process statistics
|
|
157
|
+
stats["average_creators_per_entry"] = round(stats["creators_count"] / stats["total_entries"], 2)
|
|
158
|
+
stats["average_title_length"] = round(sum(title_lengths) / len(title_lengths), 2) if title_lengths else 0
|
|
159
|
+
stats["most_common_subjects"] = stats["subjects"].most_common(5)
|
|
160
|
+
stats["languages"] = dict(stats["languages"])
|
|
161
|
+
stats["top_creators"] = dict(stats["top_creators"].most_common(5))
|
|
162
|
+
stats["titles_with_keywords"] = dict(stats["titles_with_keywords"])
|
|
163
|
+
stats["virtual_libs"] = dict(stats["virtual_libs"])
|
|
164
|
+
|
|
165
|
+
return stats
|
|
166
|
+
|
|
167
|
+
def get_unique_filename(target_path: str) -> str:
|
|
168
|
+
"""
|
|
169
|
+
If target_path already exists, generate a new path with (1), (2), etc.
|
|
170
|
+
Otherwise just return target_path.
|
|
171
|
+
|
|
172
|
+
Example:
|
|
173
|
+
'myfile.pdf' -> if it exists -> 'myfile (1).pdf' -> if that exists -> 'myfile (2).pdf'
|
|
174
|
+
"""
|
|
175
|
+
if not os.path.exists(target_path):
|
|
176
|
+
return target_path
|
|
177
|
+
|
|
178
|
+
base, ext = os.path.splitext(target_path)
|
|
179
|
+
counter = 1
|
|
180
|
+
new_path = f"{base} ({counter}){ext}"
|
|
181
|
+
while os.path.exists(new_path):
|
|
182
|
+
counter += 1
|
|
183
|
+
new_path = f"{base} ({counter}){ext}"
|
|
184
|
+
|
|
185
|
+
return new_path
|
|
186
|
+
|
|
187
|
+
def enumerate_ebooks(metadata_list: List[Dict],
|
|
188
|
+
lib_path: Path,
|
|
189
|
+
indices: Optional[List[int]] = None,
|
|
190
|
+
detailed: Optional[bool] = False) -> None:
|
|
191
|
+
"""
|
|
192
|
+
Enumerates and displays the ebooks in the specified library directory.
|
|
193
|
+
|
|
194
|
+
For each ebook, displays its index, title, creators, and a clickable link to the first PDF file.
|
|
195
|
+
|
|
196
|
+
Args:
|
|
197
|
+
metadata_list (List[Dict]): List of metadata dictionaries for each ebook.
|
|
198
|
+
indices (List[int]): List of indices to display (default: None).
|
|
199
|
+
"""
|
|
200
|
+
console = Console()
|
|
201
|
+
|
|
202
|
+
total_books = len(metadata_list)
|
|
203
|
+
if total_books == 0:
|
|
204
|
+
console.print("[yellow]No ebooks found in the library.[/yellow]")
|
|
205
|
+
return
|
|
206
|
+
|
|
207
|
+
if indices is None:
|
|
208
|
+
indices = range(total_books)
|
|
209
|
+
|
|
210
|
+
console.print(f"📚 [bold]Found {total_books} ebook(s) in the library:[/bold]\n")
|
|
211
|
+
|
|
212
|
+
table = Table(show_header=True, header_style="bold magenta")
|
|
213
|
+
table.add_column("#", style="dim")
|
|
214
|
+
table.add_column("Title")
|
|
215
|
+
table.add_column("Creators")
|
|
216
|
+
table.add_column("Link")
|
|
217
|
+
|
|
218
|
+
if detailed:
|
|
219
|
+
table.add_column("Subjects")
|
|
220
|
+
table.add_column("Language")
|
|
221
|
+
table.add_column("Date")
|
|
222
|
+
table.add_column("Identifiers")
|
|
223
|
+
table.add_column("Publisher")
|
|
224
|
+
table.add_column("File Size")
|
|
225
|
+
table.add_column("Virtual Libraries")
|
|
226
|
+
table.add_column("UID")
|
|
227
|
+
|
|
228
|
+
for i, book in enumerate(metadata_list, start=0):
|
|
229
|
+
|
|
230
|
+
if i not in indices:
|
|
231
|
+
continue
|
|
232
|
+
|
|
233
|
+
title = book.get('title', '-')
|
|
234
|
+
creators = book.get('creators', ['-'])
|
|
235
|
+
if not isinstance(creators, list):
|
|
236
|
+
creators = [str(creators)]
|
|
237
|
+
creators_str = ', '.join(creators)
|
|
238
|
+
|
|
239
|
+
ebook_paths = book.get('file_paths', [])
|
|
240
|
+
ebook_path = ebook_paths[0] if ebook_paths else None
|
|
241
|
+
|
|
242
|
+
if ebook_path:
|
|
243
|
+
ebook_full_path = lib_path / ebook_path
|
|
244
|
+
if ebook_full_path.exists():
|
|
245
|
+
# Resolve the path to an absolute path
|
|
246
|
+
resolved_path = ebook_full_path.resolve()
|
|
247
|
+
# Convert Windows paths to URL format if necessary
|
|
248
|
+
if sys.platform.startswith('win'):
|
|
249
|
+
ebook_link = resolved_path.as_uri()
|
|
250
|
+
else:
|
|
251
|
+
ebook_link = f"file://{resolved_path}"
|
|
252
|
+
link_display = f"[link={ebook_link}]🔗 Open[/link]"
|
|
253
|
+
else:
|
|
254
|
+
ebook_link = "File not found"
|
|
255
|
+
link_display = "[red]🔗 Not Found[/red]"
|
|
256
|
+
else:
|
|
257
|
+
ebook_link = "Unknown"
|
|
258
|
+
link_display = "[red]🔗 Unknown[/red]"
|
|
259
|
+
|
|
260
|
+
table.add_row(str(i), title, creators_str, link_display)
|
|
261
|
+
|
|
262
|
+
console.print(table)
|
|
263
|
+
console.print("\n") # Add some spacing
|
|
264
|
+
|
|
265
|
+
def get_index_by_unique_id(lib_dir: str, id: str) -> int:
|
|
266
|
+
"""
|
|
267
|
+
Get the index of an entry in the library by its unique ID.
|
|
268
|
+
|
|
269
|
+
Args:
|
|
270
|
+
lib_dir (str): Path to the ebk library directory.
|
|
271
|
+
id (str): Unique ID to search for.
|
|
272
|
+
|
|
273
|
+
Returns:
|
|
274
|
+
int: Index of the entry with the specified unique ID. -1 if not found.
|
|
275
|
+
|
|
276
|
+
Raises:
|
|
277
|
+
ValueError: If the library cannot be loaded.
|
|
278
|
+
"""
|
|
279
|
+
|
|
280
|
+
library = load_library(lib_dir)
|
|
281
|
+
if not library:
|
|
282
|
+
raise ValueError("Failed to load the library.")
|
|
283
|
+
|
|
284
|
+
for i, entry in enumerate(library):
|
|
285
|
+
if entry.get('unique_id') == id:
|
|
286
|
+
return i
|
|
287
|
+
|
|
288
|
+
return -1
|
|
289
|
+
|
|
290
|
+
def print_json_as_table(data):
|
|
291
|
+
"""
|
|
292
|
+
Pretty print JSON data as a table using Rich.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
data: JSON data to print
|
|
296
|
+
"""
|
|
297
|
+
if not RICH_AVAILABLE:
|
|
298
|
+
print(json.dumps(data, indent=2))
|
|
299
|
+
return
|
|
300
|
+
|
|
301
|
+
if isinstance(data, dict):
|
|
302
|
+
table = Table(show_header=True, header_style="bold magenta")
|
|
303
|
+
table.add_column("Key", style="dim", width=20)
|
|
304
|
+
table.add_column("Value", width=80)
|
|
305
|
+
for key, value in data.items():
|
|
306
|
+
table.add_row(str(key), str(value))
|
|
307
|
+
console = Console()
|
|
308
|
+
console.print(table)
|
|
309
|
+
else:
|
|
310
|
+
print(data)
|
|
311
|
+
|