ebk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ebk might be problematic. Click here for more details.

ebk/utils.py ADDED
@@ -0,0 +1,311 @@
1
+ import json
2
+ import os
3
+ from collections import Counter
4
+ from pathlib import Path
5
+ from typing import List, Dict, Optional
6
+ import logging
7
+ from jmespath import search as jmes_search
8
+ import sys
9
+ from rich.console import Console
10
+ from rich.table import Table
11
+ from rich.markdown import Markdown
12
+ from rich import print
13
+ import re
14
+
15
+ RICH_AVAILABLE = True
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+ def search_jmes(lib_dir: str, expression: str):
20
+ """
21
+ Search entries in an ebk library using a JMESPath expression. This is a
22
+ very flexible way to search for entries in the library, but may have a
23
+ steep learning curve.
24
+
25
+ Args:
26
+ lib_dir (str): Path to the ebk library directory
27
+ expression (str): Search expression (JMESPath)
28
+
29
+ Returns:
30
+ Any: Result of the JMESPath search
31
+ """
32
+ library = load_library(lib_dir)
33
+ if not library:
34
+ logger.error(f"Failed to load the library at {lib_dir}")
35
+ return []
36
+
37
+ result = jmes_search(expression, library)
38
+
39
+ return result
40
+
41
+ def search_regex(lib_dir: str, expression: str, fields: List[str] = ["title"]):
42
+
43
+ library = load_library(lib_dir)
44
+ results = []
45
+ for entry in library:
46
+ for key, value in entry.items():
47
+ if key in fields and value:
48
+ if isinstance(value, str) and re.search(expression, value):
49
+ results.append(entry)
50
+ break
51
+
52
+ return results
53
+
54
+
55
+ def load_library(lib_dir: str) -> List[Dict]:
56
+ """
57
+ Load an ebk library from the specified directory.
58
+
59
+ Args:
60
+ lib_dir (str): Path to the ebk library directory
61
+
62
+ Returns:
63
+ List[Dict]: List of entries in the library
64
+ """
65
+ lib_dir = Path(lib_dir)
66
+ metadata_path = lib_dir / "metadata.json"
67
+ if not metadata_path.exists():
68
+ logger.error(f"Metadata file not found at {metadata_path}")
69
+ return []
70
+
71
+ with open(metadata_path, "r") as f:
72
+ try:
73
+ library = json.load(f)
74
+ return library
75
+ except json.JSONDecodeError as e:
76
+ logger.error(f"Error decoding JSON from {metadata_path}: {e}")
77
+ return []
78
+
79
+ def get_library_statistics(lib_dir: str,
80
+ keywords: List[str] = None) -> Dict:
81
+ """
82
+ Compute statistics for an ebk library.
83
+
84
+ Args:
85
+ lib_dir (str): Path to the ebk library directory.
86
+ keywords (List[str]): Keywords to search for in titles (default: None).
87
+
88
+ Returns:
89
+ dict: A dictionary or markdown with statistics about the library.
90
+ """
91
+
92
+ # Load the library
93
+ library = load_library(lib_dir)
94
+ if not library:
95
+ logger.error(f"Failed to load the library at {lib_dir}")
96
+ return {}
97
+
98
+ # Initialize counters and statistics
99
+ stats = {
100
+ "total_entries": 0,
101
+ "languages": Counter(),
102
+ "creators_count": 0,
103
+ "average_creators_per_entry": 0,
104
+ "most_creators_in_entry": 0,
105
+ "least_creators_in_entry": 0,
106
+ "top_creators": Counter(),
107
+ "subjects": Counter(),
108
+ "most_common_subjects": [],
109
+ "average_title_length": 0,
110
+ "longest_title": "",
111
+ "shortest_title": "",
112
+ "virtual_libs": Counter(),
113
+ "titles_with_keywords": Counter(),
114
+ }
115
+
116
+ title_lengths = []
117
+
118
+ for entry in library:
119
+ # Total entries
120
+ stats["total_entries"] += 1
121
+
122
+ # Languages
123
+ language = entry.get("language", "unknown")
124
+ stats["languages"][language] += 1
125
+
126
+ # Creators
127
+ creators = entry.get("creators", [])
128
+ stats["creators_count"] += len(creators)
129
+ stats["top_creators"].update(creators)
130
+ stats["most_creators_in_entry"] = max(stats["most_creators_in_entry"], len(creators))
131
+ if stats["least_creators_in_entry"] == 0 or len(creators) < stats["least_creators_in_entry"]:
132
+ stats["least_creators_in_entry"] = len(creators)
133
+
134
+ # Subjects
135
+ subjects = entry.get("subjects", [])
136
+ stats["subjects"].update(subjects)
137
+
138
+ # Titles
139
+ title = entry.get("title", "")
140
+ if title:
141
+ title_lengths.append(len(title))
142
+ if len(title) > len(stats["longest_title"]):
143
+ stats["longest_title"] = title
144
+ if not stats["shortest_title"] or len(title) < len(stats["shortest_title"]):
145
+ stats["shortest_title"] = title
146
+
147
+ # Keywords
148
+ for keyword in keywords:
149
+ if keyword.lower() in title.lower():
150
+ stats["titles_with_keywords"][keyword] += 1
151
+
152
+ # Virtual Libraries
153
+ virtual_libs = entry.get("virtual_libs", [])
154
+ stats["virtual_libs"].update(virtual_libs)
155
+
156
+ # Post-process statistics
157
+ stats["average_creators_per_entry"] = round(stats["creators_count"] / stats["total_entries"], 2)
158
+ stats["average_title_length"] = round(sum(title_lengths) / len(title_lengths), 2) if title_lengths else 0
159
+ stats["most_common_subjects"] = stats["subjects"].most_common(5)
160
+ stats["languages"] = dict(stats["languages"])
161
+ stats["top_creators"] = dict(stats["top_creators"].most_common(5))
162
+ stats["titles_with_keywords"] = dict(stats["titles_with_keywords"])
163
+ stats["virtual_libs"] = dict(stats["virtual_libs"])
164
+
165
+ return stats
166
+
167
+ def get_unique_filename(target_path: str) -> str:
168
+ """
169
+ If target_path already exists, generate a new path with (1), (2), etc.
170
+ Otherwise just return target_path.
171
+
172
+ Example:
173
+ 'myfile.pdf' -> if it exists -> 'myfile (1).pdf' -> if that exists -> 'myfile (2).pdf'
174
+ """
175
+ if not os.path.exists(target_path):
176
+ return target_path
177
+
178
+ base, ext = os.path.splitext(target_path)
179
+ counter = 1
180
+ new_path = f"{base} ({counter}){ext}"
181
+ while os.path.exists(new_path):
182
+ counter += 1
183
+ new_path = f"{base} ({counter}){ext}"
184
+
185
+ return new_path
186
+
187
+ def enumerate_ebooks(metadata_list: List[Dict],
188
+ lib_path: Path,
189
+ indices: Optional[List[int]] = None,
190
+ detailed: Optional[bool] = False) -> None:
191
+ """
192
+ Enumerates and displays the ebooks in the specified library directory.
193
+
194
+ For each ebook, displays its index, title, creators, and a clickable link to the first PDF file.
195
+
196
+ Args:
197
+ metadata_list (List[Dict]): List of metadata dictionaries for each ebook.
198
+ indices (List[int]): List of indices to display (default: None).
199
+ """
200
+ console = Console()
201
+
202
+ total_books = len(metadata_list)
203
+ if total_books == 0:
204
+ console.print("[yellow]No ebooks found in the library.[/yellow]")
205
+ return
206
+
207
+ if indices is None:
208
+ indices = range(total_books)
209
+
210
+ console.print(f"📚 [bold]Found {total_books} ebook(s) in the library:[/bold]\n")
211
+
212
+ table = Table(show_header=True, header_style="bold magenta")
213
+ table.add_column("#", style="dim")
214
+ table.add_column("Title")
215
+ table.add_column("Creators")
216
+ table.add_column("Link")
217
+
218
+ if detailed:
219
+ table.add_column("Subjects")
220
+ table.add_column("Language")
221
+ table.add_column("Date")
222
+ table.add_column("Identifiers")
223
+ table.add_column("Publisher")
224
+ table.add_column("File Size")
225
+ table.add_column("Virtual Libraries")
226
+ table.add_column("UID")
227
+
228
+ for i, book in enumerate(metadata_list, start=0):
229
+
230
+ if i not in indices:
231
+ continue
232
+
233
+ title = book.get('title', '-')
234
+ creators = book.get('creators', ['-'])
235
+ if not isinstance(creators, list):
236
+ creators = [str(creators)]
237
+ creators_str = ', '.join(creators)
238
+
239
+ ebook_paths = book.get('file_paths', [])
240
+ ebook_path = ebook_paths[0] if ebook_paths else None
241
+
242
+ if ebook_path:
243
+ ebook_full_path = lib_path / ebook_path
244
+ if ebook_full_path.exists():
245
+ # Resolve the path to an absolute path
246
+ resolved_path = ebook_full_path.resolve()
247
+ # Convert Windows paths to URL format if necessary
248
+ if sys.platform.startswith('win'):
249
+ ebook_link = resolved_path.as_uri()
250
+ else:
251
+ ebook_link = f"file://{resolved_path}"
252
+ link_display = f"[link={ebook_link}]🔗 Open[/link]"
253
+ else:
254
+ ebook_link = "File not found"
255
+ link_display = "[red]🔗 Not Found[/red]"
256
+ else:
257
+ ebook_link = "Unknown"
258
+ link_display = "[red]🔗 Unknown[/red]"
259
+
260
+ table.add_row(str(i), title, creators_str, link_display)
261
+
262
+ console.print(table)
263
+ console.print("\n") # Add some spacing
264
+
265
+ def get_index_by_unique_id(lib_dir: str, id: str) -> int:
266
+ """
267
+ Get the index of an entry in the library by its unique ID.
268
+
269
+ Args:
270
+ lib_dir (str): Path to the ebk library directory.
271
+ id (str): Unique ID to search for.
272
+
273
+ Returns:
274
+ int: Index of the entry with the specified unique ID. -1 if not found.
275
+
276
+ Raises:
277
+ ValueError: If the library cannot be loaded.
278
+ """
279
+
280
+ library = load_library(lib_dir)
281
+ if not library:
282
+ raise ValueError("Failed to load the library.")
283
+
284
+ for i, entry in enumerate(library):
285
+ if entry.get('unique_id') == id:
286
+ return i
287
+
288
+ return -1
289
+
290
+ def print_json_as_table(data):
291
+ """
292
+ Pretty print JSON data as a table using Rich.
293
+
294
+ Args:
295
+ data: JSON data to print
296
+ """
297
+ if not RICH_AVAILABLE:
298
+ print(json.dumps(data, indent=2))
299
+ return
300
+
301
+ if isinstance(data, dict):
302
+ table = Table(show_header=True, header_style="bold magenta")
303
+ table.add_column("Key", style="dim", width=20)
304
+ table.add_column("Value", width=80)
305
+ for key, value in data.items():
306
+ table.add_row(str(key), str(value))
307
+ console = Console()
308
+ console.print(table)
309
+ else:
310
+ print(data)
311
+