ebk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ebk might be problematic. Click here for more details.

ebk/imports/ebooks.py ADDED
@@ -0,0 +1,116 @@
1
+ import os
2
+ import json
3
+ import shutil
4
+
5
+ from pathlib import Path
6
+
7
+ import fitz
8
+ from PIL import Image
9
+ from io import BytesIO
10
+
11
+ from rich.console import Console
12
+
13
+ from typing import Dict
14
+ from slugify import slugify
15
+ from ..extract_metadata import extract_metadata_from_pdf
16
+ from ..ident import add_unique_id
17
+ from ..utils import get_unique_filename
18
+
19
+ import logging
20
+
21
+ def import_ebooks(ebooks_dir, output_dir, output_formats):
22
+ """
23
+ Import ebooks from a directory into the library.
24
+
25
+ Args:
26
+ ebooks_dir (str): Path to the directory containing the ebooks
27
+ output_dir (str): Path to the output directory
28
+ output_formats (list): List of output formats to convert the ebooks to
29
+ """
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+ if os.path.exists(output_dir):
34
+ logger.error(f"Output directory already exists: {output_dir}")
35
+ return
36
+ os.makedirs(output_dir)
37
+
38
+ metadata_list = []
39
+ for root, _, files in os.walk(ebooks_dir):
40
+ for file in files:
41
+ try:
42
+ # create the dictionary item for file
43
+ item = {
44
+ "title": file
45
+ }
46
+ path = Path(root) / Path(file)
47
+
48
+ # infer the format of the file
49
+ _, ext = os.path.splitext(file)
50
+ ext = ext.lower().strip(".")
51
+ if ext not in output_formats:
52
+ continue
53
+
54
+ cover_image = None
55
+ if ext == "pdf":
56
+ metadata = extract_metadata_from_pdf(path)
57
+ cover_image = extract_cover_from_pdf(path)
58
+ else:
59
+ continue
60
+
61
+ logger.debug(f"Importing ebook {file} in {root}")
62
+ metadata = {key: item.get(key) or metadata.get(key) or value for key, value in metadata.items()}
63
+
64
+ item["root"] = root
65
+ item["source_folder"] = ebooks_dir
66
+ item["output_folder"] = output_dir
67
+ item["imported_from"] = "ebooks"
68
+ item["virtual_libs"] = [slugify(output_dir)]
69
+
70
+ title_slug = slugify(item.get("title", "unknown_title"))
71
+ creator_slug = slugify(item.get("creators", ["unknown_creator"])[0])
72
+ base_name = f"{title_slug}__{creator_slug}"
73
+
74
+ _, ext = os.path.splitext(file)
75
+ src = os.path.join(root, file)
76
+ dst = os.path.join(output_dir, f"{base_name}{ext}")
77
+ dst = get_unique_filename(dst)
78
+ shutil.copy(src, dst)
79
+ file_paths = [ os.path.relpath(dst, output_dir) ]
80
+ item["file_paths"] = file_paths
81
+
82
+ if cover_image:
83
+ cover_image_file = os.path.join(output_dir, f"{base_name}_cover.jpg")
84
+ with open(cover_image_file, "wb") as cover:
85
+ cover.write(cover_image)
86
+
87
+ item["cover_path"] = os.path.relpath(cover_image_file, output_dir)
88
+ metadata_list.append(item)
89
+
90
+ except Exception as e:
91
+ logger.error(f"Error processing file {file} in {root}: {e}")
92
+
93
+ for entry in metadata_list:
94
+ add_unique_id(entry)
95
+
96
+ metadata_file = os.path.join(output_dir, "metadata.json")
97
+ with open(metadata_file, "w") as f:
98
+ json.dump(metadata_list, f, indent=2)
99
+
100
+
101
+ def extract_cover_from_pdf(pdf_path):
102
+ # Open the PDF file
103
+ pdf_document = fitz.open(pdf_path)
104
+ first_page = pdf_document[0]
105
+
106
+ # Render the first page as a PNG image
107
+ pix = first_page.get_pixmap()
108
+ image = Image.open(BytesIO(pix.tobytes(output="png")))
109
+
110
+ # Create a thumbnail
111
+ image.thumbnail((256, 256))
112
+
113
+ # Convert the image to JPEG bytes
114
+ image_bytes = BytesIO()
115
+ image.save(image_bytes, format="JPEG")
116
+ return image_bytes.getvalue()
ebk/llm.py ADDED
@@ -0,0 +1,58 @@
1
+ import os
2
+ import requests
3
+ from string import Template
4
+ from .config import load_ebkrc_config
5
+
6
+
7
+ def query_llm(lib_dir, prompt):
8
+ """
9
+ Queries an OpenAI-compatible LLM endpoint with the given prompt.
10
+
11
+ :param prompt: The user query or conversation prompt text.
12
+ :param model: The OpenAI model name to use, defaults to gpt-3.5-turbo.
13
+ :param temperature: Sampling temperature, defaults to 0.7.
14
+ :return: The JSON response from the endpoint.
15
+ """
16
+
17
+ return {}
18
+
19
+ # endpoint, api_key, model = load_ebkrc_config()
20
+
21
+ # headers = {
22
+ # "Content-Type": "application/json",
23
+ # "Authorization": f"Bearer {api_key}"
24
+ # }
25
+
26
+ # # let's prefix the prompt with the contents of the file `llm-instructions.md`
27
+ # # however, since this is a ypi package, we need to find the path to the file
28
+ # # we can use the `__file__` variable to get the path to this file, and then
29
+ # # construct the path to the `llm-instructions.md` file
30
+ # file_instr_path = os.path.join(os.path.dirname(__file__), "llm-instructions.md")
31
+
32
+ # # Read the markdown file
33
+ # with open(file_instr_path, "r") as f:
34
+ # template = Template(f.read())
35
+
36
+ # data = {
37
+ # "lib_dir": lib_dir
38
+ # }
39
+
40
+ # instructions = template.safe_substitute(data)
41
+ # prompt = instructions + "\n\Natural language query: " + prompt
42
+
43
+ # data = {
44
+ # "model": model,
45
+ # "prompt": prompt,
46
+ # "stream": False,
47
+ # "format": "json"
48
+ # }
49
+
50
+ # try:
51
+ # response = requests.post(endpoint, headers=headers, json=data)
52
+ # response.raise_for_status()
53
+ # except requests.RequestException as e:
54
+ # raise SystemError(f"Error calling LLM endpoint: {e}")
55
+ # except Exception as e:
56
+ # raise SystemError(f"Unknown Error: {e}")
57
+
58
+ # return response.json()
ebk/manager.py ADDED
@@ -0,0 +1,44 @@
1
+ import json
2
+
3
+ class LibraryManager:
4
+ def __init__(self, json_file):
5
+ self.json_file = json_file
6
+ self._load_library()
7
+
8
+ def _load_library(self):
9
+ """Load the JSON library into memory."""
10
+ with open(self.json_file, "r") as f:
11
+ self.library = json.load(f)
12
+
13
+ def save_library(self):
14
+ """Save the in-memory library back to the JSON file."""
15
+ with open(self.json_file, "w") as f:
16
+ json.dump(self.library, f, indent=4)
17
+
18
+ def list_books(self):
19
+ """List all books in the library."""
20
+ return self.library
21
+
22
+ def search_books(self, query):
23
+ """Search for books by title, author, or tags."""
24
+ return [
25
+ book for book in self.library
26
+ if query.lower() in (book["Title"].lower() + book["Author"].lower() + book["Tags"].lower())
27
+ ]
28
+
29
+ def add_book(self, book_metadata):
30
+ """Add a new book to the library."""
31
+ self.library.append(book_metadata)
32
+ self.save_library()
33
+
34
+ def delete_book(self, title):
35
+ """Delete a book by title."""
36
+ self.library = [book for book in self.library if book["Title"] != title]
37
+ self.save_library()
38
+
39
+ def update_book(self, title, new_metadata):
40
+ """Update metadata for a specific book."""
41
+ for book in self.library:
42
+ if book["Title"] == title:
43
+ book.update(new_metadata)
44
+ self.save_library()
ebk/merge.py ADDED
@@ -0,0 +1,308 @@
1
+ import os
2
+ import json
3
+ import shutil
4
+ from slugify import slugify
5
+ from typing import List, Dict, Tuple
6
+ from .ident import add_unique_id
7
+ import logging
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+ def load_all_metadata(source_folders: List[str]) -> List[Tuple[Dict, str]]:
12
+ """
13
+ Given a list of source folders, load all 'metadata.json' files and
14
+ return them as a list of (metadata_entry, source_folder).
15
+ """
16
+ all_entries = []
17
+ for folder in source_folders:
18
+ meta_path = os.path.join(folder, "metadata.json")
19
+ if os.path.exists(meta_path):
20
+ with open(meta_path, "r", encoding="utf-8") as f:
21
+ try:
22
+ data = json.load(f)
23
+ for entry in data:
24
+ all_entries.append((entry, folder))
25
+ except json.JSONDecodeError as e:
26
+ logger.error(f"Error decoding JSON from {meta_path}: {e}")
27
+ else:
28
+ logger.warning(f"No metadata.json found in {folder}")
29
+ return all_entries
30
+
31
+ def perform_set_operation(
32
+ entries: List[Dict],
33
+ operation: str,
34
+ source_counts: Dict[str, int]
35
+ ) -> List[Dict]:
36
+ """
37
+ Perform the specified set operation on the list of entries.
38
+
39
+ Args:
40
+ entries (List[Dict]): List of eBook entries with 'unique_id'.
41
+ operation (str): One of 'union', 'intersect', 'diff', 'symdiff'.
42
+ source_counts (Dict[str, int]): Counts of how many sources each unique_id appears in.
43
+
44
+ Returns:
45
+ List[Dict]: Filtered list of entries based on the set operation.
46
+ """
47
+ if operation == "union":
48
+ # All unique entries
49
+ return entries
50
+ elif operation == "intersect":
51
+ # Entries present in all source libraries
52
+ return [entry for entry in entries if source_counts.get(entry['unique_id'], 0) == len(source_counts)]
53
+ elif operation == "diff":
54
+ # Set difference: entries present in the first library but not in others
55
+ # Assuming 'diff' is lib1 - lib2
56
+ # Modify the function signature to pass specific libraries if needed
57
+ return [entry for entry in entries if source_counts.get(entry['unique_id'], 0) == 1]
58
+ elif operation == "symdiff":
59
+ # Symmetric difference: entries present in one library but not in both
60
+ return [entry for entry in entries if source_counts.get(entry['unique_id'], 0) == 1]
61
+ else:
62
+ logger.error(f"Unsupported set operation: {operation}")
63
+ return []
64
+
65
+ def merge_libraries(
66
+ source_folders: List[str],
67
+ merged_folder: str,
68
+ operation: str
69
+ ):
70
+ """
71
+ Merges multiple ebook libraries (each in a separate folder) into a single library
72
+ based on the specified set-theoretic operation.
73
+
74
+ Args:
75
+ source_folders (List[str]): List of source library folders to merge.
76
+ merged_folder (str): Path to the folder where the merged library will be saved.
77
+ operation (str): Set operation to apply ('union', 'intersect', 'diff', 'symdiff').
78
+ """
79
+ if not os.path.exists(merged_folder):
80
+ os.makedirs(merged_folder)
81
+ logger.info(f"Created merged folder at {merged_folder}")
82
+
83
+ # Load all entries
84
+ entries_with_sources = load_all_metadata(source_folders)
85
+
86
+ # Index entries by unique_id
87
+ unique_entries = {}
88
+ source_counts = {}
89
+
90
+ for entry, source in entries_with_sources:
91
+ uid = entry['unique_id']
92
+ if uid not in unique_entries:
93
+ unique_entries[uid] = entry
94
+ source_counts[uid] = 1
95
+ else:
96
+ source_counts[uid] += 1
97
+ # Optionally, handle metadata conflicts here
98
+ # For example, you could merge metadata fields or prioritize certain sources
99
+ # Here, we'll assume the first occurrence is kept
100
+ logger.debug(f"Duplicate entry found for unique_id {uid} in {source}. Ignoring.")
101
+
102
+ all_unique_entries = list(unique_entries.values())
103
+
104
+ # Perform the set operation
105
+ filtered_entries = perform_set_operation(all_unique_entries, operation, source_counts)
106
+
107
+ logger.info(f"Performing '{operation}' operation. {len(filtered_entries)} entries selected.")
108
+
109
+ # **New Step:** Preprocess filenames to identify conflicts
110
+ filename_counts = {}
111
+ cover_filename_counts = {}
112
+
113
+ for entry in filtered_entries:
114
+ # Count ebook filenames
115
+ for file_rel_path in entry.get('file_paths', []):
116
+ filename = os.path.basename(file_rel_path)
117
+ filename_counts[filename] = filename_counts.get(filename, 0) + 1
118
+ # Count cover filenames
119
+ cover_path = entry.get('cover_path')
120
+ if cover_path:
121
+ cover_filename = os.path.basename(cover_path)
122
+ cover_filename_counts[cover_filename] = cover_filename_counts.get(cover_filename, 0) + 1
123
+
124
+ logger.debug(f"Ebook filename counts: {filename_counts}")
125
+ logger.debug(f"Cover filename counts: {cover_filename_counts}")
126
+
127
+ # Copy files and prepare merged metadata
128
+ merged_metadata = []
129
+
130
+ for entry in filtered_entries:
131
+ # Copy eBook files with awareness of filename uniqueness
132
+ new_entry = copy_entry_files(entry, source_folders, merged_folder, filename_counts)
133
+ # Copy cover image with awareness of filename uniqueness
134
+ new_entry = copy_cover_image(new_entry, source_folders, merged_folder, cover_filename_counts)
135
+ merged_metadata.append(new_entry)
136
+
137
+ # Write merged metadata.json
138
+ merged_meta_path = os.path.join(merged_folder, "metadata.json")
139
+ with open(merged_meta_path, "w", encoding="utf-8") as f:
140
+ json.dump(merged_metadata, f, indent=2, ensure_ascii=False)
141
+
142
+ logger.info(f"Merged {len(merged_metadata)} entries into {merged_folder}")
143
+
144
+ def copy_entry_files(
145
+ entry: Dict,
146
+ source_folders: List[str],
147
+ dst_folder: str,
148
+ filename_counts: Dict[str, int]
149
+ ) -> Dict:
150
+ """
151
+ Copies all relevant files for an entry from its source folder to the destination folder.
152
+
153
+ Args:
154
+ entry (Dict): The eBook entry metadata.
155
+ source_folders (List[str]): List of source library folders.
156
+ dst_folder (str): Destination folder to copy files to.
157
+ filename_counts (Dict[str, int]): Counts of each ebook filename across all entries.
158
+
159
+ Returns:
160
+ Dict: The updated entry with new file paths.
161
+ """
162
+ new_entry = entry.copy()
163
+
164
+ # Find the source folder containing this entry
165
+ source_folder = find_source_folder(entry, source_folders)
166
+ if not source_folder:
167
+ logger.warning(f"Source folder not found for entry with unique_id {entry['unique_id']}")
168
+ return new_entry
169
+
170
+ # Copy eBook files
171
+ new_file_paths = []
172
+ for file_rel_path in entry.get('file_paths', []):
173
+ src_path = os.path.join(source_folder, file_rel_path)
174
+ if not os.path.exists(src_path):
175
+ logger.warning(f"Ebook file '{src_path}' does not exist.")
176
+ continue
177
+ original_filename = os.path.basename(file_rel_path)
178
+
179
+ if filename_counts.get(original_filename, 0) == 1:
180
+ # Filename is unique; keep it as is
181
+ dst_filename = original_filename
182
+ else:
183
+ # Filename is duplicated; append unique_id to disambiguate
184
+ name, ext = os.path.splitext(original_filename)
185
+ safe_name = slugify(name)
186
+ safe_unique_id = slugify(entry['unique_id'])
187
+ dst_filename = f"{safe_name}__{safe_unique_id}{ext}"
188
+
189
+ dst_path = os.path.join(dst_folder, dst_filename)
190
+ dst_path = get_unique_filename(dst_path)
191
+ try:
192
+ shutil.copy(src_path, dst_path)
193
+ except OSError as e:
194
+ logger.error(f"Error copying file '{src_path}' to '{dst_path}': {e}")
195
+ continue
196
+ new_file_paths.append(os.path.basename(dst_path))
197
+ logger.debug(f"Copied ebook file '{src_path}' to '{dst_path}'")
198
+
199
+ new_entry['file_paths'] = new_file_paths
200
+
201
+ return new_entry
202
+
203
+ def copy_cover_image(
204
+ entry: Dict,
205
+ source_folders: List[str],
206
+ dst_folder: str,
207
+ cover_filename_counts: Dict[str, int]
208
+ ) -> Dict:
209
+ """
210
+ Copies the cover image for an entry from its source folder to the destination folder.
211
+
212
+ Args:
213
+ entry (Dict): The eBook entry metadata.
214
+ source_folders (List[str]): List of source library folders.
215
+ dst_folder (str): Destination folder to copy files to.
216
+ cover_filename_counts (Dict[str, int]): Counts of each cover filename across all entries.
217
+
218
+ Returns:
219
+ Dict: The updated entry with new cover path.
220
+ """
221
+ cover_path = entry.get('cover_path')
222
+ if not cover_path:
223
+ return entry # No cover to copy
224
+
225
+ new_entry = entry.copy()
226
+
227
+ # Find the source folder containing this entry
228
+ source_folder = find_source_folder(entry, source_folders)
229
+ if not source_folder:
230
+ logger.warning(f"Source folder not found for entry with unique_id {entry['unique_id']} (cover)")
231
+ new_entry['cover_path'] = None
232
+ return new_entry
233
+
234
+ src_cover = os.path.join(source_folder, cover_path)
235
+ if not os.path.exists(src_cover):
236
+ logger.warning(f"Cover image '{src_cover}' does not exist.")
237
+ new_entry['cover_path'] = None
238
+ return new_entry
239
+
240
+ original_cover_filename = os.path.basename(cover_path)
241
+
242
+ if cover_filename_counts.get(original_cover_filename, 0) == 1:
243
+ # Cover filename is unique; keep it as is
244
+ dst_cover_filename = original_cover_filename
245
+ else:
246
+ # Cover filename is duplicated; append unique_id to disambiguate
247
+ name, ext = os.path.splitext(original_cover_filename)
248
+ safe_name = slugify(name)
249
+ safe_unique_id = slugify(entry['unique_id'])
250
+ dst_cover_filename = f"{safe_name}__{safe_unique_id}{ext}"
251
+
252
+ dst_cover_path = os.path.join(dst_folder, dst_cover_filename)
253
+ dst_cover_path = get_unique_filename(dst_cover_path)
254
+ try:
255
+ shutil.copy(src_cover, dst_cover_path)
256
+ except OSError as e:
257
+ logger.error(f"Error copying cover image '{src_cover}' to '{dst_cover_path}': {e}")
258
+ new_entry['cover_path'] = None
259
+ return new_entry
260
+ new_entry['cover_path'] = os.path.basename(dst_cover_path)
261
+ logger.debug(f"Copied cover image '{src_cover}' to '{dst_cover_path}'")
262
+
263
+ return new_entry
264
+
265
+ def find_source_folder(entry: Dict, source_folders: List[str]) -> str:
266
+ """
267
+ Identifies the source folder where the entry's files are located.
268
+
269
+ Args:
270
+ entry (Dict): The eBook entry metadata.
271
+ source_folders (List[str]): List of source library folders.
272
+
273
+ Returns:
274
+ str: The path to the source folder, or None if not found.
275
+ """
276
+ for folder in source_folders:
277
+ meta_path = os.path.join(folder, "metadata.json")
278
+ if not os.path.exists(meta_path):
279
+ continue
280
+ with open(meta_path, "r", encoding="utf-8") as f:
281
+ try:
282
+ data = json.load(f)
283
+ for src_entry in data:
284
+ if src_entry.get('unique_id') == entry.get('unique_id'):
285
+ return folder
286
+ except json.JSONDecodeError as e:
287
+ logger.error(f"Error decoding JSON from {meta_path}: {e}")
288
+ return None
289
+
290
+ def get_unique_filename(target_path: str) -> str:
291
+ """
292
+ If target_path already exists, generate a new path with (1), (2), etc.
293
+ Otherwise just return target_path.
294
+
295
+ Example:
296
+ 'myfile.pdf' -> if it exists -> 'myfile (1).pdf' -> if that exists -> 'myfile (2).pdf'
297
+ """
298
+ if not os.path.exists(target_path):
299
+ return target_path
300
+
301
+ base, ext = os.path.splitext(target_path)
302
+ counter = 1
303
+ new_path = f"{base} ({counter}){ext}"
304
+ while os.path.exists(new_path):
305
+ counter += 1
306
+ new_path = f"{base} ({counter}){ext}"
307
+
308
+ return new_path
File without changes