ebk 0.1.0__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ebk might be problematic. Click here for more details.
- ebk/__init__.py +35 -0
- ebk/cli.py +1724 -664
- ebk/config.py +260 -22
- ebk/decorators.py +132 -0
- ebk/extract_metadata.py +76 -7
- ebk/library_db.py +744 -0
- ebk/plugins/__init__.py +42 -0
- ebk/plugins/base.py +502 -0
- ebk/plugins/hooks.py +444 -0
- ebk/plugins/registry.py +500 -0
- ebk/search_parser.py +413 -0
- ebk/server.py +1633 -0
- ebk-0.3.1.dist-info/METADATA +755 -0
- ebk-0.3.1.dist-info/RECORD +19 -0
- {ebk-0.1.0.dist-info → ebk-0.3.1.dist-info}/WHEEL +1 -1
- ebk-0.3.1.dist-info/entry_points.txt +6 -0
- ebk-0.3.1.dist-info/licenses/LICENSE +21 -0
- ebk-0.3.1.dist-info/top_level.txt +2 -0
- ebk/exports/__init__.py +0 -0
- ebk/exports/hugo.py +0 -55
- ebk/exports/zip.py +0 -25
- ebk/imports/__init__.py +0 -0
- ebk/imports/calibre.py +0 -144
- ebk/imports/ebooks.py +0 -116
- ebk/llm.py +0 -58
- ebk/manager.py +0 -44
- ebk/merge.py +0 -308
- ebk/streamlit/__init__.py +0 -0
- ebk/streamlit/__pycache__/__init__.cpython-310.pyc +0 -0
- ebk/streamlit/__pycache__/display.cpython-310.pyc +0 -0
- ebk/streamlit/__pycache__/filters.cpython-310.pyc +0 -0
- ebk/streamlit/__pycache__/utils.cpython-310.pyc +0 -0
- ebk/streamlit/app.py +0 -185
- ebk/streamlit/display.py +0 -168
- ebk/streamlit/filters.py +0 -151
- ebk/streamlit/utils.py +0 -58
- ebk/utils.py +0 -311
- ebk-0.1.0.dist-info/METADATA +0 -457
- ebk-0.1.0.dist-info/RECORD +0 -29
- ebk-0.1.0.dist-info/entry_points.txt +0 -2
- ebk-0.1.0.dist-info/top_level.txt +0 -1
ebk/merge.py
DELETED
|
@@ -1,308 +0,0 @@
|
|
|
1
|
-
import os
|
|
2
|
-
import json
|
|
3
|
-
import shutil
|
|
4
|
-
from slugify import slugify
|
|
5
|
-
from typing import List, Dict, Tuple
|
|
6
|
-
from .ident import add_unique_id
|
|
7
|
-
import logging
|
|
8
|
-
|
|
9
|
-
logger = logging.getLogger(__name__)
|
|
10
|
-
|
|
11
|
-
def load_all_metadata(source_folders: List[str]) -> List[Tuple[Dict, str]]:
|
|
12
|
-
"""
|
|
13
|
-
Given a list of source folders, load all 'metadata.json' files and
|
|
14
|
-
return them as a list of (metadata_entry, source_folder).
|
|
15
|
-
"""
|
|
16
|
-
all_entries = []
|
|
17
|
-
for folder in source_folders:
|
|
18
|
-
meta_path = os.path.join(folder, "metadata.json")
|
|
19
|
-
if os.path.exists(meta_path):
|
|
20
|
-
with open(meta_path, "r", encoding="utf-8") as f:
|
|
21
|
-
try:
|
|
22
|
-
data = json.load(f)
|
|
23
|
-
for entry in data:
|
|
24
|
-
all_entries.append((entry, folder))
|
|
25
|
-
except json.JSONDecodeError as e:
|
|
26
|
-
logger.error(f"Error decoding JSON from {meta_path}: {e}")
|
|
27
|
-
else:
|
|
28
|
-
logger.warning(f"No metadata.json found in {folder}")
|
|
29
|
-
return all_entries
|
|
30
|
-
|
|
31
|
-
def perform_set_operation(
|
|
32
|
-
entries: List[Dict],
|
|
33
|
-
operation: str,
|
|
34
|
-
source_counts: Dict[str, int]
|
|
35
|
-
) -> List[Dict]:
|
|
36
|
-
"""
|
|
37
|
-
Perform the specified set operation on the list of entries.
|
|
38
|
-
|
|
39
|
-
Args:
|
|
40
|
-
entries (List[Dict]): List of eBook entries with 'unique_id'.
|
|
41
|
-
operation (str): One of 'union', 'intersect', 'diff', 'symdiff'.
|
|
42
|
-
source_counts (Dict[str, int]): Counts of how many sources each unique_id appears in.
|
|
43
|
-
|
|
44
|
-
Returns:
|
|
45
|
-
List[Dict]: Filtered list of entries based on the set operation.
|
|
46
|
-
"""
|
|
47
|
-
if operation == "union":
|
|
48
|
-
# All unique entries
|
|
49
|
-
return entries
|
|
50
|
-
elif operation == "intersect":
|
|
51
|
-
# Entries present in all source libraries
|
|
52
|
-
return [entry for entry in entries if source_counts.get(entry['unique_id'], 0) == len(source_counts)]
|
|
53
|
-
elif operation == "diff":
|
|
54
|
-
# Set difference: entries present in the first library but not in others
|
|
55
|
-
# Assuming 'diff' is lib1 - lib2
|
|
56
|
-
# Modify the function signature to pass specific libraries if needed
|
|
57
|
-
return [entry for entry in entries if source_counts.get(entry['unique_id'], 0) == 1]
|
|
58
|
-
elif operation == "symdiff":
|
|
59
|
-
# Symmetric difference: entries present in one library but not in both
|
|
60
|
-
return [entry for entry in entries if source_counts.get(entry['unique_id'], 0) == 1]
|
|
61
|
-
else:
|
|
62
|
-
logger.error(f"Unsupported set operation: {operation}")
|
|
63
|
-
return []
|
|
64
|
-
|
|
65
|
-
def merge_libraries(
|
|
66
|
-
source_folders: List[str],
|
|
67
|
-
merged_folder: str,
|
|
68
|
-
operation: str
|
|
69
|
-
):
|
|
70
|
-
"""
|
|
71
|
-
Merges multiple ebook libraries (each in a separate folder) into a single library
|
|
72
|
-
based on the specified set-theoretic operation.
|
|
73
|
-
|
|
74
|
-
Args:
|
|
75
|
-
source_folders (List[str]): List of source library folders to merge.
|
|
76
|
-
merged_folder (str): Path to the folder where the merged library will be saved.
|
|
77
|
-
operation (str): Set operation to apply ('union', 'intersect', 'diff', 'symdiff').
|
|
78
|
-
"""
|
|
79
|
-
if not os.path.exists(merged_folder):
|
|
80
|
-
os.makedirs(merged_folder)
|
|
81
|
-
logger.info(f"Created merged folder at {merged_folder}")
|
|
82
|
-
|
|
83
|
-
# Load all entries
|
|
84
|
-
entries_with_sources = load_all_metadata(source_folders)
|
|
85
|
-
|
|
86
|
-
# Index entries by unique_id
|
|
87
|
-
unique_entries = {}
|
|
88
|
-
source_counts = {}
|
|
89
|
-
|
|
90
|
-
for entry, source in entries_with_sources:
|
|
91
|
-
uid = entry['unique_id']
|
|
92
|
-
if uid not in unique_entries:
|
|
93
|
-
unique_entries[uid] = entry
|
|
94
|
-
source_counts[uid] = 1
|
|
95
|
-
else:
|
|
96
|
-
source_counts[uid] += 1
|
|
97
|
-
# Optionally, handle metadata conflicts here
|
|
98
|
-
# For example, you could merge metadata fields or prioritize certain sources
|
|
99
|
-
# Here, we'll assume the first occurrence is kept
|
|
100
|
-
logger.debug(f"Duplicate entry found for unique_id {uid} in {source}. Ignoring.")
|
|
101
|
-
|
|
102
|
-
all_unique_entries = list(unique_entries.values())
|
|
103
|
-
|
|
104
|
-
# Perform the set operation
|
|
105
|
-
filtered_entries = perform_set_operation(all_unique_entries, operation, source_counts)
|
|
106
|
-
|
|
107
|
-
logger.info(f"Performing '{operation}' operation. {len(filtered_entries)} entries selected.")
|
|
108
|
-
|
|
109
|
-
# **New Step:** Preprocess filenames to identify conflicts
|
|
110
|
-
filename_counts = {}
|
|
111
|
-
cover_filename_counts = {}
|
|
112
|
-
|
|
113
|
-
for entry in filtered_entries:
|
|
114
|
-
# Count ebook filenames
|
|
115
|
-
for file_rel_path in entry.get('file_paths', []):
|
|
116
|
-
filename = os.path.basename(file_rel_path)
|
|
117
|
-
filename_counts[filename] = filename_counts.get(filename, 0) + 1
|
|
118
|
-
# Count cover filenames
|
|
119
|
-
cover_path = entry.get('cover_path')
|
|
120
|
-
if cover_path:
|
|
121
|
-
cover_filename = os.path.basename(cover_path)
|
|
122
|
-
cover_filename_counts[cover_filename] = cover_filename_counts.get(cover_filename, 0) + 1
|
|
123
|
-
|
|
124
|
-
logger.debug(f"Ebook filename counts: {filename_counts}")
|
|
125
|
-
logger.debug(f"Cover filename counts: {cover_filename_counts}")
|
|
126
|
-
|
|
127
|
-
# Copy files and prepare merged metadata
|
|
128
|
-
merged_metadata = []
|
|
129
|
-
|
|
130
|
-
for entry in filtered_entries:
|
|
131
|
-
# Copy eBook files with awareness of filename uniqueness
|
|
132
|
-
new_entry = copy_entry_files(entry, source_folders, merged_folder, filename_counts)
|
|
133
|
-
# Copy cover image with awareness of filename uniqueness
|
|
134
|
-
new_entry = copy_cover_image(new_entry, source_folders, merged_folder, cover_filename_counts)
|
|
135
|
-
merged_metadata.append(new_entry)
|
|
136
|
-
|
|
137
|
-
# Write merged metadata.json
|
|
138
|
-
merged_meta_path = os.path.join(merged_folder, "metadata.json")
|
|
139
|
-
with open(merged_meta_path, "w", encoding="utf-8") as f:
|
|
140
|
-
json.dump(merged_metadata, f, indent=2, ensure_ascii=False)
|
|
141
|
-
|
|
142
|
-
logger.info(f"Merged {len(merged_metadata)} entries into {merged_folder}")
|
|
143
|
-
|
|
144
|
-
def copy_entry_files(
|
|
145
|
-
entry: Dict,
|
|
146
|
-
source_folders: List[str],
|
|
147
|
-
dst_folder: str,
|
|
148
|
-
filename_counts: Dict[str, int]
|
|
149
|
-
) -> Dict:
|
|
150
|
-
"""
|
|
151
|
-
Copies all relevant files for an entry from its source folder to the destination folder.
|
|
152
|
-
|
|
153
|
-
Args:
|
|
154
|
-
entry (Dict): The eBook entry metadata.
|
|
155
|
-
source_folders (List[str]): List of source library folders.
|
|
156
|
-
dst_folder (str): Destination folder to copy files to.
|
|
157
|
-
filename_counts (Dict[str, int]): Counts of each ebook filename across all entries.
|
|
158
|
-
|
|
159
|
-
Returns:
|
|
160
|
-
Dict: The updated entry with new file paths.
|
|
161
|
-
"""
|
|
162
|
-
new_entry = entry.copy()
|
|
163
|
-
|
|
164
|
-
# Find the source folder containing this entry
|
|
165
|
-
source_folder = find_source_folder(entry, source_folders)
|
|
166
|
-
if not source_folder:
|
|
167
|
-
logger.warning(f"Source folder not found for entry with unique_id {entry['unique_id']}")
|
|
168
|
-
return new_entry
|
|
169
|
-
|
|
170
|
-
# Copy eBook files
|
|
171
|
-
new_file_paths = []
|
|
172
|
-
for file_rel_path in entry.get('file_paths', []):
|
|
173
|
-
src_path = os.path.join(source_folder, file_rel_path)
|
|
174
|
-
if not os.path.exists(src_path):
|
|
175
|
-
logger.warning(f"Ebook file '{src_path}' does not exist.")
|
|
176
|
-
continue
|
|
177
|
-
original_filename = os.path.basename(file_rel_path)
|
|
178
|
-
|
|
179
|
-
if filename_counts.get(original_filename, 0) == 1:
|
|
180
|
-
# Filename is unique; keep it as is
|
|
181
|
-
dst_filename = original_filename
|
|
182
|
-
else:
|
|
183
|
-
# Filename is duplicated; append unique_id to disambiguate
|
|
184
|
-
name, ext = os.path.splitext(original_filename)
|
|
185
|
-
safe_name = slugify(name)
|
|
186
|
-
safe_unique_id = slugify(entry['unique_id'])
|
|
187
|
-
dst_filename = f"{safe_name}__{safe_unique_id}{ext}"
|
|
188
|
-
|
|
189
|
-
dst_path = os.path.join(dst_folder, dst_filename)
|
|
190
|
-
dst_path = get_unique_filename(dst_path)
|
|
191
|
-
try:
|
|
192
|
-
shutil.copy(src_path, dst_path)
|
|
193
|
-
except OSError as e:
|
|
194
|
-
logger.error(f"Error copying file '{src_path}' to '{dst_path}': {e}")
|
|
195
|
-
continue
|
|
196
|
-
new_file_paths.append(os.path.basename(dst_path))
|
|
197
|
-
logger.debug(f"Copied ebook file '{src_path}' to '{dst_path}'")
|
|
198
|
-
|
|
199
|
-
new_entry['file_paths'] = new_file_paths
|
|
200
|
-
|
|
201
|
-
return new_entry
|
|
202
|
-
|
|
203
|
-
def copy_cover_image(
|
|
204
|
-
entry: Dict,
|
|
205
|
-
source_folders: List[str],
|
|
206
|
-
dst_folder: str,
|
|
207
|
-
cover_filename_counts: Dict[str, int]
|
|
208
|
-
) -> Dict:
|
|
209
|
-
"""
|
|
210
|
-
Copies the cover image for an entry from its source folder to the destination folder.
|
|
211
|
-
|
|
212
|
-
Args:
|
|
213
|
-
entry (Dict): The eBook entry metadata.
|
|
214
|
-
source_folders (List[str]): List of source library folders.
|
|
215
|
-
dst_folder (str): Destination folder to copy files to.
|
|
216
|
-
cover_filename_counts (Dict[str, int]): Counts of each cover filename across all entries.
|
|
217
|
-
|
|
218
|
-
Returns:
|
|
219
|
-
Dict: The updated entry with new cover path.
|
|
220
|
-
"""
|
|
221
|
-
cover_path = entry.get('cover_path')
|
|
222
|
-
if not cover_path:
|
|
223
|
-
return entry # No cover to copy
|
|
224
|
-
|
|
225
|
-
new_entry = entry.copy()
|
|
226
|
-
|
|
227
|
-
# Find the source folder containing this entry
|
|
228
|
-
source_folder = find_source_folder(entry, source_folders)
|
|
229
|
-
if not source_folder:
|
|
230
|
-
logger.warning(f"Source folder not found for entry with unique_id {entry['unique_id']} (cover)")
|
|
231
|
-
new_entry['cover_path'] = None
|
|
232
|
-
return new_entry
|
|
233
|
-
|
|
234
|
-
src_cover = os.path.join(source_folder, cover_path)
|
|
235
|
-
if not os.path.exists(src_cover):
|
|
236
|
-
logger.warning(f"Cover image '{src_cover}' does not exist.")
|
|
237
|
-
new_entry['cover_path'] = None
|
|
238
|
-
return new_entry
|
|
239
|
-
|
|
240
|
-
original_cover_filename = os.path.basename(cover_path)
|
|
241
|
-
|
|
242
|
-
if cover_filename_counts.get(original_cover_filename, 0) == 1:
|
|
243
|
-
# Cover filename is unique; keep it as is
|
|
244
|
-
dst_cover_filename = original_cover_filename
|
|
245
|
-
else:
|
|
246
|
-
# Cover filename is duplicated; append unique_id to disambiguate
|
|
247
|
-
name, ext = os.path.splitext(original_cover_filename)
|
|
248
|
-
safe_name = slugify(name)
|
|
249
|
-
safe_unique_id = slugify(entry['unique_id'])
|
|
250
|
-
dst_cover_filename = f"{safe_name}__{safe_unique_id}{ext}"
|
|
251
|
-
|
|
252
|
-
dst_cover_path = os.path.join(dst_folder, dst_cover_filename)
|
|
253
|
-
dst_cover_path = get_unique_filename(dst_cover_path)
|
|
254
|
-
try:
|
|
255
|
-
shutil.copy(src_cover, dst_cover_path)
|
|
256
|
-
except OSError as e:
|
|
257
|
-
logger.error(f"Error copying cover image '{src_cover}' to '{dst_cover_path}': {e}")
|
|
258
|
-
new_entry['cover_path'] = None
|
|
259
|
-
return new_entry
|
|
260
|
-
new_entry['cover_path'] = os.path.basename(dst_cover_path)
|
|
261
|
-
logger.debug(f"Copied cover image '{src_cover}' to '{dst_cover_path}'")
|
|
262
|
-
|
|
263
|
-
return new_entry
|
|
264
|
-
|
|
265
|
-
def find_source_folder(entry: Dict, source_folders: List[str]) -> str:
|
|
266
|
-
"""
|
|
267
|
-
Identifies the source folder where the entry's files are located.
|
|
268
|
-
|
|
269
|
-
Args:
|
|
270
|
-
entry (Dict): The eBook entry metadata.
|
|
271
|
-
source_folders (List[str]): List of source library folders.
|
|
272
|
-
|
|
273
|
-
Returns:
|
|
274
|
-
str: The path to the source folder, or None if not found.
|
|
275
|
-
"""
|
|
276
|
-
for folder in source_folders:
|
|
277
|
-
meta_path = os.path.join(folder, "metadata.json")
|
|
278
|
-
if not os.path.exists(meta_path):
|
|
279
|
-
continue
|
|
280
|
-
with open(meta_path, "r", encoding="utf-8") as f:
|
|
281
|
-
try:
|
|
282
|
-
data = json.load(f)
|
|
283
|
-
for src_entry in data:
|
|
284
|
-
if src_entry.get('unique_id') == entry.get('unique_id'):
|
|
285
|
-
return folder
|
|
286
|
-
except json.JSONDecodeError as e:
|
|
287
|
-
logger.error(f"Error decoding JSON from {meta_path}: {e}")
|
|
288
|
-
return None
|
|
289
|
-
|
|
290
|
-
def get_unique_filename(target_path: str) -> str:
|
|
291
|
-
"""
|
|
292
|
-
If target_path already exists, generate a new path with (1), (2), etc.
|
|
293
|
-
Otherwise just return target_path.
|
|
294
|
-
|
|
295
|
-
Example:
|
|
296
|
-
'myfile.pdf' -> if it exists -> 'myfile (1).pdf' -> if that exists -> 'myfile (2).pdf'
|
|
297
|
-
"""
|
|
298
|
-
if not os.path.exists(target_path):
|
|
299
|
-
return target_path
|
|
300
|
-
|
|
301
|
-
base, ext = os.path.splitext(target_path)
|
|
302
|
-
counter = 1
|
|
303
|
-
new_path = f"{base} ({counter}){ext}"
|
|
304
|
-
while os.path.exists(new_path):
|
|
305
|
-
counter += 1
|
|
306
|
-
new_path = f"{base} ({counter}){ext}"
|
|
307
|
-
|
|
308
|
-
return new_path
|
ebk/streamlit/__init__.py
DELETED
|
File without changes
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
ebk/streamlit/app.py
DELETED
|
@@ -1,185 +0,0 @@
|
|
|
1
|
-
import streamlit as st
|
|
2
|
-
import pandas as pd
|
|
3
|
-
import os
|
|
4
|
-
import logging
|
|
5
|
-
from utils import load_metadata, extract_zip
|
|
6
|
-
from filters import sanitize_dataframe, create_filters
|
|
7
|
-
from display import display_books_tab, display_statistics_tab
|
|
8
|
-
|
|
9
|
-
# Configure logging
|
|
10
|
-
logging.basicConfig(
|
|
11
|
-
level=logging.INFO,
|
|
12
|
-
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
13
|
-
)
|
|
14
|
-
logger = logging.getLogger(__name__)
|
|
15
|
-
|
|
16
|
-
#def display_footer():
|
|
17
|
-
# st.markdown("---")
|
|
18
|
-
# st.write("Developed with ❤️ using Streamlit.")
|
|
19
|
-
|
|
20
|
-
def display_dashboard(metadata_list: list, cover_images: dict, ebook_files: dict):
|
|
21
|
-
"""
|
|
22
|
-
Displays the main dashboard with advanced filtering and a compact UI layout using tabs.
|
|
23
|
-
"""
|
|
24
|
-
# Convert metadata list to DataFrame
|
|
25
|
-
df = pd.DataFrame(metadata_list)
|
|
26
|
-
logger.debug("Converted metadata list to DataFrame.")
|
|
27
|
-
|
|
28
|
-
# Sanitize DataFrame
|
|
29
|
-
df = sanitize_dataframe(df)
|
|
30
|
-
logger.debug("Sanitized DataFrame.")
|
|
31
|
-
|
|
32
|
-
# Apply Filters
|
|
33
|
-
filtered_df = create_filters(df)
|
|
34
|
-
logger.debug("Applied filters to DataFrame.")
|
|
35
|
-
|
|
36
|
-
# Create Tabs
|
|
37
|
-
tabs = st.tabs(["📚 Books", "📊 Statistics", "Advanced Search", "📖 Table", "📝 Instructions"])
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
with tabs[0]:
|
|
41
|
-
# Display Books
|
|
42
|
-
display_books_tab(filtered_df, cover_images, ebook_files)
|
|
43
|
-
|
|
44
|
-
with tabs[1]:
|
|
45
|
-
# Display Statistics
|
|
46
|
-
display_statistics_tab(filtered_df)
|
|
47
|
-
|
|
48
|
-
with tabs[2]:
|
|
49
|
-
# Display Advanced Search
|
|
50
|
-
display_advanced_search_tab(metadata_list)
|
|
51
|
-
|
|
52
|
-
with tabs[3]:
|
|
53
|
-
# Display Table
|
|
54
|
-
display_table_view_tab(filtered_df)
|
|
55
|
-
|
|
56
|
-
with tabs[4]:
|
|
57
|
-
# Display Instructions
|
|
58
|
-
st.header("📝 Instructions")
|
|
59
|
-
st.markdown("""
|
|
60
|
-
1. **Prepare a ZIP Archive** of an ebk library using the following process:
|
|
61
|
-
- Go to the directory containing the desired ebk library (should have 'metadata.json` and associated files).
|
|
62
|
-
- Compress the directory into a ZIP archive.
|
|
63
|
-
- The `ebk` CLI tool can also autoatically output a ZIP archive,
|
|
64
|
-
e.g., `ebk import calibre <calibre-library> --output.zip`.
|
|
65
|
-
2. **Upload the ZIP Archive** using the uploader below.
|
|
66
|
-
3. **Use the Sidebar** to apply filters and search your library.
|
|
67
|
-
4. **Interact** with the dashboard to view details and download ebooks.
|
|
68
|
-
""")
|
|
69
|
-
|
|
70
|
-
# Display Footer
|
|
71
|
-
# display_footer()
|
|
72
|
-
|
|
73
|
-
def main():
|
|
74
|
-
st.set_page_config(page_title="ebk Dashboard", layout="wide")
|
|
75
|
-
st.title("📚 ebk Dashoard")
|
|
76
|
-
st.write("""
|
|
77
|
-
Upload a **ZIP archive** containing your `metadata.json`, all associated cover images, and ebook files.
|
|
78
|
-
The app will automatically process and display your library with advanced search and filtering options.
|
|
79
|
-
""")
|
|
80
|
-
|
|
81
|
-
# File uploader for ZIP archive
|
|
82
|
-
st.subheader("📁 Upload ZIP Archive")
|
|
83
|
-
zip_file = st.file_uploader(
|
|
84
|
-
label="Upload a ZIP file containing `metadata.json`, cover images, and ebook files",
|
|
85
|
-
type=["zip"],
|
|
86
|
-
key="zip_upload"
|
|
87
|
-
)
|
|
88
|
-
|
|
89
|
-
MAX_ZIP_SIZE = 8 * 1024 * 1024 * 1024 # 1 GB
|
|
90
|
-
|
|
91
|
-
if zip_file:
|
|
92
|
-
print("Uploaded ZIP file:", zip_file.name)
|
|
93
|
-
print("🔄 File size:", zip_file.size)
|
|
94
|
-
if zip_file.size > MAX_ZIP_SIZE:
|
|
95
|
-
st.error(f"❌ Uploaded ZIP file is {zip_file.size / 1024 / 1024 / 1024:.2f} GB, which exceeds the size limit of 1 GB.")
|
|
96
|
-
logger.error("Uploaded ZIP file exceeds the size limit.")
|
|
97
|
-
st.stop()
|
|
98
|
-
|
|
99
|
-
with st.spinner("🔄 Extracting and processing ZIP archive..."):
|
|
100
|
-
extracted_files = extract_zip(zip_file)
|
|
101
|
-
if not extracted_files:
|
|
102
|
-
logger.error("No files extracted from the ZIP archive.")
|
|
103
|
-
st.stop() # Stop if extraction failed
|
|
104
|
-
|
|
105
|
-
# Locate metadata.json (case-insensitive search)
|
|
106
|
-
metadata_key = next((k for k in extracted_files if os.path.basename(k).lower() == "metadata.json"), None)
|
|
107
|
-
if not metadata_key:
|
|
108
|
-
st.error("❌ `metadata.json` not found in the uploaded ZIP archive.")
|
|
109
|
-
logger.error("`metadata.json` not found in the uploaded ZIP archive.")
|
|
110
|
-
st.stop()
|
|
111
|
-
|
|
112
|
-
metadata_content = extracted_files[metadata_key]
|
|
113
|
-
metadata_list = load_metadata(metadata_content)
|
|
114
|
-
if not metadata_list:
|
|
115
|
-
logger.error("Failed to load metadata from `metadata.json`.")
|
|
116
|
-
st.stop()
|
|
117
|
-
|
|
118
|
-
# Collect cover images and ebook files
|
|
119
|
-
cover_images = {}
|
|
120
|
-
ebook_files = {}
|
|
121
|
-
for filename, file_bytes in extracted_files.items():
|
|
122
|
-
lower_filename = filename.lower()
|
|
123
|
-
basename = os.path.basename(filename)
|
|
124
|
-
if lower_filename.endswith(('.jpg', '.jpeg', '.png')):
|
|
125
|
-
cover_images[basename] = file_bytes
|
|
126
|
-
logger.debug(f"Added cover image: {basename}")
|
|
127
|
-
elif lower_filename.endswith(('.pdf', '.epub', '.mobi', '.azw3', '.txt')):
|
|
128
|
-
ebook_files[basename] = file_bytes
|
|
129
|
-
logger.debug(f"Added ebook file: {basename}")
|
|
130
|
-
else:
|
|
131
|
-
# Ignore other file types or handle as needed
|
|
132
|
-
logger.debug(f"Ignored unsupported file type: {basename}")
|
|
133
|
-
pass
|
|
134
|
-
|
|
135
|
-
# Inform user about unmatched cover images
|
|
136
|
-
expected_covers = {os.path.basename(md.get("cover_path", "")) for md in metadata_list if md.get("cover_path")}
|
|
137
|
-
uploaded_covers = set(cover_images.keys())
|
|
138
|
-
missing_covers = expected_covers - uploaded_covers
|
|
139
|
-
if missing_covers:
|
|
140
|
-
st.warning(f"⚠️ The following cover images are referenced in `metadata.json` but were not uploaded: {', '.join(missing_covers)}")
|
|
141
|
-
logger.warning(f"Missing cover images: {missing_covers}")
|
|
142
|
-
|
|
143
|
-
# Inform user about unmatched ebook files
|
|
144
|
-
expected_ebooks = {os.path.basename(path) for md in metadata_list for path in md.get("file_paths", [])}
|
|
145
|
-
uploaded_ebooks = set(ebook_files.keys())
|
|
146
|
-
missing_ebooks = expected_ebooks - uploaded_ebooks
|
|
147
|
-
if missing_ebooks:
|
|
148
|
-
st.warning(f"⚠️ The following ebook files are referenced in `metadata.json` but were not uploaded: {', '.join(missing_ebooks)}")
|
|
149
|
-
logger.warning(f"Missing ebook files: {missing_ebooks}")
|
|
150
|
-
|
|
151
|
-
# Display the dashboard with metadata and cover images
|
|
152
|
-
display_dashboard(metadata_list, cover_images, ebook_files)
|
|
153
|
-
else:
|
|
154
|
-
st.info("📥 Please upload a ZIP archive to get started.")
|
|
155
|
-
logger.debug("No ZIP archive uploaded yet.")
|
|
156
|
-
|
|
157
|
-
def display_table_view_tab(filtered_df: pd.DataFrame):
|
|
158
|
-
"""
|
|
159
|
-
Displays the Table tab with a searchable table of metadata.
|
|
160
|
-
"""
|
|
161
|
-
st.header("📖 Table")
|
|
162
|
-
st.write("Explore the metadata of your library using the interactive table below.")
|
|
163
|
-
st.dataframe(filtered_df)
|
|
164
|
-
|
|
165
|
-
def display_advanced_search_tab(metadata_list: list):
|
|
166
|
-
"""
|
|
167
|
-
Using JMESPath to search the metadata list.
|
|
168
|
-
"""
|
|
169
|
-
import jmespath
|
|
170
|
-
|
|
171
|
-
st.header("Advanced Search")
|
|
172
|
-
st.write("Use JMESPath queries to search the metadata list.")
|
|
173
|
-
query = st.text_input("Enter a JMESPath query", "[].[?date > `2020-01-01`]")
|
|
174
|
-
try:
|
|
175
|
-
result = jmespath.search(query, metadata_list)
|
|
176
|
-
st.write("Search Results:")
|
|
177
|
-
st.write(result)
|
|
178
|
-
except Exception as e:
|
|
179
|
-
st.error(f"An error occurred: {e}")
|
|
180
|
-
logger.error(f"JMESPath search error: {e}")
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
if __name__ == "__main__":
|
|
185
|
-
main()
|
ebk/streamlit/display.py
DELETED
|
@@ -1,168 +0,0 @@
|
|
|
1
|
-
import streamlit as st
|
|
2
|
-
from PIL import Image
|
|
3
|
-
import pandas as pd
|
|
4
|
-
import altair as alt
|
|
5
|
-
import logging
|
|
6
|
-
import os
|
|
7
|
-
|
|
8
|
-
logger = logging.getLogger(__name__)
|
|
9
|
-
|
|
10
|
-
def display_books_tab(filtered_df: pd.DataFrame, cover_images: dict, ebook_files: dict):
|
|
11
|
-
"""
|
|
12
|
-
Displays the Books tab with book entries and download/view links.
|
|
13
|
-
"""
|
|
14
|
-
total_size = len(filtered_df)
|
|
15
|
-
st.subheader(f"📚 Book Entries (Total: {total_size})")
|
|
16
|
-
if not filtered_df.empty:
|
|
17
|
-
for idx, row in filtered_df.iterrows():
|
|
18
|
-
with st.expander(f"**{row.get('title', 'No Title')}**"):
|
|
19
|
-
# Layout: Cover Image & Downloads | Metadata
|
|
20
|
-
cols = st.columns([1.5, 3])
|
|
21
|
-
|
|
22
|
-
# Left Column: Cover Image
|
|
23
|
-
with cols[0]:
|
|
24
|
-
# Cover Image
|
|
25
|
-
cover_path = row.get("cover_path", "")
|
|
26
|
-
cover_filename = os.path.basename(cover_path)
|
|
27
|
-
cover_data = cover_images.get(cover_filename)
|
|
28
|
-
if cover_data:
|
|
29
|
-
try:
|
|
30
|
-
image = Image.open(cover_data)
|
|
31
|
-
st.image(image, use_container_width=True, caption="🖼️ Cover")
|
|
32
|
-
logger.debug(f"Displayed cover image: {cover_filename}")
|
|
33
|
-
except Exception as e:
|
|
34
|
-
st.error(f"🖼️ Error loading image: {e}")
|
|
35
|
-
logger.error(f"Error loading image {cover_filename}: {e}")
|
|
36
|
-
else:
|
|
37
|
-
st.info("🖼️ No cover image available.")
|
|
38
|
-
logger.debug(f"No cover image available for {cover_filename}.")
|
|
39
|
-
|
|
40
|
-
# Right Column: Metadata Details and Ebook Links
|
|
41
|
-
with cols[1]:
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
# show title in a header style
|
|
45
|
-
title = row.get("title", "No Title")
|
|
46
|
-
st.markdown(f"# 📖 {title}")
|
|
47
|
-
|
|
48
|
-
metadata_details = {
|
|
49
|
-
"👤 **Author(s)**": ", ".join(row.get("creators", ["N/A"])),
|
|
50
|
-
"📚 **Subjects**": ", ".join(row.get("subjects", ["N/A"])),
|
|
51
|
-
"📝 **Description**": row.get("description", "N/A"),
|
|
52
|
-
"🌐 **Language**": row.get("language", "N/A"),
|
|
53
|
-
"📅 **Publication Date**": row.get("date", "N/A") if pd.notna(row.get("date", None)) else "N/A",
|
|
54
|
-
"📖 **Publisher**": row.get("publisher", "N/A"),
|
|
55
|
-
"📏 **File Size**": row.get("file_size", "N/A"),
|
|
56
|
-
"📚 **Virtual Libraries**": ", ".join(row.get("virtual_libs", ["N/A"])),
|
|
57
|
-
"🔑 **Identifiers**": ", ".join([f"{k}: {v}" for k, v in row.get("identifiers", {}).items()]),
|
|
58
|
-
"🔑 **Unique ID**": row.get("unique_id", "NA"),
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
for key, value in metadata_details.items():
|
|
62
|
-
st.markdown(f"{key}: {value}")
|
|
63
|
-
|
|
64
|
-
# Ebook Download and View Links
|
|
65
|
-
ebook_paths = row.get("file_paths", [])
|
|
66
|
-
if ebook_paths:
|
|
67
|
-
st.markdown("### 📥 Ebook Links")
|
|
68
|
-
for ebook_path in ebook_paths:
|
|
69
|
-
ebook_filename = os.path.basename(ebook_path)
|
|
70
|
-
ebook_data = ebook_files.get(ebook_filename)
|
|
71
|
-
if ebook_data:
|
|
72
|
-
# Determine MIME type based on file extension
|
|
73
|
-
_, ext = os.path.splitext(ebook_filename.lower())
|
|
74
|
-
mime_types = {
|
|
75
|
-
'.pdf': 'application/pdf',
|
|
76
|
-
'.epub': 'application/epub+zip',
|
|
77
|
-
'.mobi': 'application/x-mobipocket-ebook',
|
|
78
|
-
'.azw3': 'application/vnd.amazon.ebook',
|
|
79
|
-
'.txt': 'text/plain'
|
|
80
|
-
}
|
|
81
|
-
mime_type = mime_types.get(ext, 'application/octet-stream')
|
|
82
|
-
|
|
83
|
-
st.download_button(
|
|
84
|
-
label=f"💾 Download {ebook_filename}",
|
|
85
|
-
data=ebook_data.getvalue(),
|
|
86
|
-
file_name=ebook_filename,
|
|
87
|
-
mime=mime_type
|
|
88
|
-
)
|
|
89
|
-
logger.debug(f"Provided link for {ebook_filename}.")
|
|
90
|
-
else:
|
|
91
|
-
st.warning(f"Ebook file '{ebook_filename}' not found in the uploaded ZIP.")
|
|
92
|
-
logger.warning(f"Ebook file '{ebook_filename}' not found in the uploaded ZIP.")
|
|
93
|
-
else:
|
|
94
|
-
st.info("📄 No ebook files available for download.")
|
|
95
|
-
logger.debug("No ebook files available for download.")
|
|
96
|
-
else:
|
|
97
|
-
st.info("📚 No books match the current filter criteria.")
|
|
98
|
-
logger.debug("No books match the current filter criteria.")
|
|
99
|
-
|
|
100
|
-
def display_statistics_tab(filtered_df: pd.DataFrame):
|
|
101
|
-
"""
|
|
102
|
-
Displays the Statistics tab with various visualizations.
|
|
103
|
-
"""
|
|
104
|
-
st.subheader("📊 Statistics")
|
|
105
|
-
|
|
106
|
-
if not filtered_df.empty:
|
|
107
|
-
# Visualization: Books per Author (Top 10)
|
|
108
|
-
st.markdown("### 📈 Top 10 Authors by Number of Books")
|
|
109
|
-
author_counts = pd.Series([creator for creators in filtered_df['creators'] for creator in creators]).value_counts().nlargest(10).reset_index()
|
|
110
|
-
author_counts.columns = ['Author', 'Number of Books']
|
|
111
|
-
|
|
112
|
-
chart = alt.Chart(author_counts).mark_bar().encode(
|
|
113
|
-
x=alt.X('Number of Books:Q', title='Number of Books'),
|
|
114
|
-
y=alt.Y('Author:N', sort='-x', title='Author'),
|
|
115
|
-
tooltip=['Author', 'Number of Books']
|
|
116
|
-
).properties(
|
|
117
|
-
width=600,
|
|
118
|
-
height=400
|
|
119
|
-
)
|
|
120
|
-
|
|
121
|
-
st.altair_chart(chart, use_container_width=True)
|
|
122
|
-
logger.debug("Displayed Top 10 Authors chart.")
|
|
123
|
-
|
|
124
|
-
# Visualization: Books per Subject (Top 10)
|
|
125
|
-
st.markdown("### 📊 Top 10 Subjects by Number of Books")
|
|
126
|
-
subject_counts = pd.Series([subject for subjects in filtered_df['subjects'] for subject in subjects]).value_counts().nlargest(10).reset_index()
|
|
127
|
-
subject_counts.columns = ['Subject', 'Number of Books']
|
|
128
|
-
|
|
129
|
-
subject_chart = alt.Chart(subject_counts).mark_bar().encode(
|
|
130
|
-
x=alt.X('Number of Books:Q', title='Number of Books'),
|
|
131
|
-
y=alt.Y('Subject:N', sort='-x', title='Subject'),
|
|
132
|
-
tooltip=['Subject', 'Number of Books']
|
|
133
|
-
).properties(
|
|
134
|
-
width=600,
|
|
135
|
-
height=400
|
|
136
|
-
)
|
|
137
|
-
|
|
138
|
-
st.altair_chart(subject_chart, use_container_width=True)
|
|
139
|
-
logger.debug("Displayed Top 10 Subjects chart.")
|
|
140
|
-
|
|
141
|
-
# Visualization: Books Published Over Time
|
|
142
|
-
st.markdown("### 📈 Books Published Over Time")
|
|
143
|
-
if 'date' in filtered_df.columns and pd.api.types.is_numeric_dtype(filtered_df['date']):
|
|
144
|
-
publication_years = filtered_df['date'].dropna().astype(int)
|
|
145
|
-
if not publication_years.empty:
|
|
146
|
-
year_counts = publication_years.value_counts().sort_index().reset_index()
|
|
147
|
-
year_counts.columns = ['Year', 'Number of Books']
|
|
148
|
-
|
|
149
|
-
time_chart = alt.Chart(year_counts).mark_line(point=True).encode(
|
|
150
|
-
x=alt.X('Year:O', title='Year'),
|
|
151
|
-
y=alt.Y('Number of Books:Q', title='Number of Books'),
|
|
152
|
-
tooltip=['Year', 'Number of Books']
|
|
153
|
-
).properties(
|
|
154
|
-
width=800,
|
|
155
|
-
height=400
|
|
156
|
-
)
|
|
157
|
-
|
|
158
|
-
st.altair_chart(time_chart, use_container_width=True)
|
|
159
|
-
logger.debug("Displayed Books Published Over Time chart.")
|
|
160
|
-
else:
|
|
161
|
-
st.info("📅 No publication date data available.")
|
|
162
|
-
logger.warning("Publication year data is empty after filtering.")
|
|
163
|
-
else:
|
|
164
|
-
st.info("📅 Publication date data is not available or not in a numeric format.")
|
|
165
|
-
logger.warning("Publication date data is not available or not numeric.")
|
|
166
|
-
else:
|
|
167
|
-
st.info("📊 No statistics to display as no books match the current filter criteria.")
|
|
168
|
-
logger.debug("No statistics to display due to empty filtered DataFrame.")
|