ebk 0.1.0__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ebk might be problematic. Click here for more details.

ebk/merge.py DELETED
@@ -1,308 +0,0 @@
1
- import os
2
- import json
3
- import shutil
4
- from slugify import slugify
5
- from typing import List, Dict, Tuple
6
- from .ident import add_unique_id
7
- import logging
8
-
9
- logger = logging.getLogger(__name__)
10
-
11
- def load_all_metadata(source_folders: List[str]) -> List[Tuple[Dict, str]]:
12
- """
13
- Given a list of source folders, load all 'metadata.json' files and
14
- return them as a list of (metadata_entry, source_folder).
15
- """
16
- all_entries = []
17
- for folder in source_folders:
18
- meta_path = os.path.join(folder, "metadata.json")
19
- if os.path.exists(meta_path):
20
- with open(meta_path, "r", encoding="utf-8") as f:
21
- try:
22
- data = json.load(f)
23
- for entry in data:
24
- all_entries.append((entry, folder))
25
- except json.JSONDecodeError as e:
26
- logger.error(f"Error decoding JSON from {meta_path}: {e}")
27
- else:
28
- logger.warning(f"No metadata.json found in {folder}")
29
- return all_entries
30
-
31
- def perform_set_operation(
32
- entries: List[Dict],
33
- operation: str,
34
- source_counts: Dict[str, int]
35
- ) -> List[Dict]:
36
- """
37
- Perform the specified set operation on the list of entries.
38
-
39
- Args:
40
- entries (List[Dict]): List of eBook entries with 'unique_id'.
41
- operation (str): One of 'union', 'intersect', 'diff', 'symdiff'.
42
- source_counts (Dict[str, int]): Counts of how many sources each unique_id appears in.
43
-
44
- Returns:
45
- List[Dict]: Filtered list of entries based on the set operation.
46
- """
47
- if operation == "union":
48
- # All unique entries
49
- return entries
50
- elif operation == "intersect":
51
- # Entries present in all source libraries
52
- return [entry for entry in entries if source_counts.get(entry['unique_id'], 0) == len(source_counts)]
53
- elif operation == "diff":
54
- # Set difference: entries present in the first library but not in others
55
- # Assuming 'diff' is lib1 - lib2
56
- # Modify the function signature to pass specific libraries if needed
57
- return [entry for entry in entries if source_counts.get(entry['unique_id'], 0) == 1]
58
- elif operation == "symdiff":
59
- # Symmetric difference: entries present in one library but not in both
60
- return [entry for entry in entries if source_counts.get(entry['unique_id'], 0) == 1]
61
- else:
62
- logger.error(f"Unsupported set operation: {operation}")
63
- return []
64
-
65
- def merge_libraries(
66
- source_folders: List[str],
67
- merged_folder: str,
68
- operation: str
69
- ):
70
- """
71
- Merges multiple ebook libraries (each in a separate folder) into a single library
72
- based on the specified set-theoretic operation.
73
-
74
- Args:
75
- source_folders (List[str]): List of source library folders to merge.
76
- merged_folder (str): Path to the folder where the merged library will be saved.
77
- operation (str): Set operation to apply ('union', 'intersect', 'diff', 'symdiff').
78
- """
79
- if not os.path.exists(merged_folder):
80
- os.makedirs(merged_folder)
81
- logger.info(f"Created merged folder at {merged_folder}")
82
-
83
- # Load all entries
84
- entries_with_sources = load_all_metadata(source_folders)
85
-
86
- # Index entries by unique_id
87
- unique_entries = {}
88
- source_counts = {}
89
-
90
- for entry, source in entries_with_sources:
91
- uid = entry['unique_id']
92
- if uid not in unique_entries:
93
- unique_entries[uid] = entry
94
- source_counts[uid] = 1
95
- else:
96
- source_counts[uid] += 1
97
- # Optionally, handle metadata conflicts here
98
- # For example, you could merge metadata fields or prioritize certain sources
99
- # Here, we'll assume the first occurrence is kept
100
- logger.debug(f"Duplicate entry found for unique_id {uid} in {source}. Ignoring.")
101
-
102
- all_unique_entries = list(unique_entries.values())
103
-
104
- # Perform the set operation
105
- filtered_entries = perform_set_operation(all_unique_entries, operation, source_counts)
106
-
107
- logger.info(f"Performing '{operation}' operation. {len(filtered_entries)} entries selected.")
108
-
109
- # **New Step:** Preprocess filenames to identify conflicts
110
- filename_counts = {}
111
- cover_filename_counts = {}
112
-
113
- for entry in filtered_entries:
114
- # Count ebook filenames
115
- for file_rel_path in entry.get('file_paths', []):
116
- filename = os.path.basename(file_rel_path)
117
- filename_counts[filename] = filename_counts.get(filename, 0) + 1
118
- # Count cover filenames
119
- cover_path = entry.get('cover_path')
120
- if cover_path:
121
- cover_filename = os.path.basename(cover_path)
122
- cover_filename_counts[cover_filename] = cover_filename_counts.get(cover_filename, 0) + 1
123
-
124
- logger.debug(f"Ebook filename counts: {filename_counts}")
125
- logger.debug(f"Cover filename counts: {cover_filename_counts}")
126
-
127
- # Copy files and prepare merged metadata
128
- merged_metadata = []
129
-
130
- for entry in filtered_entries:
131
- # Copy eBook files with awareness of filename uniqueness
132
- new_entry = copy_entry_files(entry, source_folders, merged_folder, filename_counts)
133
- # Copy cover image with awareness of filename uniqueness
134
- new_entry = copy_cover_image(new_entry, source_folders, merged_folder, cover_filename_counts)
135
- merged_metadata.append(new_entry)
136
-
137
- # Write merged metadata.json
138
- merged_meta_path = os.path.join(merged_folder, "metadata.json")
139
- with open(merged_meta_path, "w", encoding="utf-8") as f:
140
- json.dump(merged_metadata, f, indent=2, ensure_ascii=False)
141
-
142
- logger.info(f"Merged {len(merged_metadata)} entries into {merged_folder}")
143
-
144
- def copy_entry_files(
145
- entry: Dict,
146
- source_folders: List[str],
147
- dst_folder: str,
148
- filename_counts: Dict[str, int]
149
- ) -> Dict:
150
- """
151
- Copies all relevant files for an entry from its source folder to the destination folder.
152
-
153
- Args:
154
- entry (Dict): The eBook entry metadata.
155
- source_folders (List[str]): List of source library folders.
156
- dst_folder (str): Destination folder to copy files to.
157
- filename_counts (Dict[str, int]): Counts of each ebook filename across all entries.
158
-
159
- Returns:
160
- Dict: The updated entry with new file paths.
161
- """
162
- new_entry = entry.copy()
163
-
164
- # Find the source folder containing this entry
165
- source_folder = find_source_folder(entry, source_folders)
166
- if not source_folder:
167
- logger.warning(f"Source folder not found for entry with unique_id {entry['unique_id']}")
168
- return new_entry
169
-
170
- # Copy eBook files
171
- new_file_paths = []
172
- for file_rel_path in entry.get('file_paths', []):
173
- src_path = os.path.join(source_folder, file_rel_path)
174
- if not os.path.exists(src_path):
175
- logger.warning(f"Ebook file '{src_path}' does not exist.")
176
- continue
177
- original_filename = os.path.basename(file_rel_path)
178
-
179
- if filename_counts.get(original_filename, 0) == 1:
180
- # Filename is unique; keep it as is
181
- dst_filename = original_filename
182
- else:
183
- # Filename is duplicated; append unique_id to disambiguate
184
- name, ext = os.path.splitext(original_filename)
185
- safe_name = slugify(name)
186
- safe_unique_id = slugify(entry['unique_id'])
187
- dst_filename = f"{safe_name}__{safe_unique_id}{ext}"
188
-
189
- dst_path = os.path.join(dst_folder, dst_filename)
190
- dst_path = get_unique_filename(dst_path)
191
- try:
192
- shutil.copy(src_path, dst_path)
193
- except OSError as e:
194
- logger.error(f"Error copying file '{src_path}' to '{dst_path}': {e}")
195
- continue
196
- new_file_paths.append(os.path.basename(dst_path))
197
- logger.debug(f"Copied ebook file '{src_path}' to '{dst_path}'")
198
-
199
- new_entry['file_paths'] = new_file_paths
200
-
201
- return new_entry
202
-
203
- def copy_cover_image(
204
- entry: Dict,
205
- source_folders: List[str],
206
- dst_folder: str,
207
- cover_filename_counts: Dict[str, int]
208
- ) -> Dict:
209
- """
210
- Copies the cover image for an entry from its source folder to the destination folder.
211
-
212
- Args:
213
- entry (Dict): The eBook entry metadata.
214
- source_folders (List[str]): List of source library folders.
215
- dst_folder (str): Destination folder to copy files to.
216
- cover_filename_counts (Dict[str, int]): Counts of each cover filename across all entries.
217
-
218
- Returns:
219
- Dict: The updated entry with new cover path.
220
- """
221
- cover_path = entry.get('cover_path')
222
- if not cover_path:
223
- return entry # No cover to copy
224
-
225
- new_entry = entry.copy()
226
-
227
- # Find the source folder containing this entry
228
- source_folder = find_source_folder(entry, source_folders)
229
- if not source_folder:
230
- logger.warning(f"Source folder not found for entry with unique_id {entry['unique_id']} (cover)")
231
- new_entry['cover_path'] = None
232
- return new_entry
233
-
234
- src_cover = os.path.join(source_folder, cover_path)
235
- if not os.path.exists(src_cover):
236
- logger.warning(f"Cover image '{src_cover}' does not exist.")
237
- new_entry['cover_path'] = None
238
- return new_entry
239
-
240
- original_cover_filename = os.path.basename(cover_path)
241
-
242
- if cover_filename_counts.get(original_cover_filename, 0) == 1:
243
- # Cover filename is unique; keep it as is
244
- dst_cover_filename = original_cover_filename
245
- else:
246
- # Cover filename is duplicated; append unique_id to disambiguate
247
- name, ext = os.path.splitext(original_cover_filename)
248
- safe_name = slugify(name)
249
- safe_unique_id = slugify(entry['unique_id'])
250
- dst_cover_filename = f"{safe_name}__{safe_unique_id}{ext}"
251
-
252
- dst_cover_path = os.path.join(dst_folder, dst_cover_filename)
253
- dst_cover_path = get_unique_filename(dst_cover_path)
254
- try:
255
- shutil.copy(src_cover, dst_cover_path)
256
- except OSError as e:
257
- logger.error(f"Error copying cover image '{src_cover}' to '{dst_cover_path}': {e}")
258
- new_entry['cover_path'] = None
259
- return new_entry
260
- new_entry['cover_path'] = os.path.basename(dst_cover_path)
261
- logger.debug(f"Copied cover image '{src_cover}' to '{dst_cover_path}'")
262
-
263
- return new_entry
264
-
265
- def find_source_folder(entry: Dict, source_folders: List[str]) -> str:
266
- """
267
- Identifies the source folder where the entry's files are located.
268
-
269
- Args:
270
- entry (Dict): The eBook entry metadata.
271
- source_folders (List[str]): List of source library folders.
272
-
273
- Returns:
274
- str: The path to the source folder, or None if not found.
275
- """
276
- for folder in source_folders:
277
- meta_path = os.path.join(folder, "metadata.json")
278
- if not os.path.exists(meta_path):
279
- continue
280
- with open(meta_path, "r", encoding="utf-8") as f:
281
- try:
282
- data = json.load(f)
283
- for src_entry in data:
284
- if src_entry.get('unique_id') == entry.get('unique_id'):
285
- return folder
286
- except json.JSONDecodeError as e:
287
- logger.error(f"Error decoding JSON from {meta_path}: {e}")
288
- return None
289
-
290
- def get_unique_filename(target_path: str) -> str:
291
- """
292
- If target_path already exists, generate a new path with (1), (2), etc.
293
- Otherwise just return target_path.
294
-
295
- Example:
296
- 'myfile.pdf' -> if it exists -> 'myfile (1).pdf' -> if that exists -> 'myfile (2).pdf'
297
- """
298
- if not os.path.exists(target_path):
299
- return target_path
300
-
301
- base, ext = os.path.splitext(target_path)
302
- counter = 1
303
- new_path = f"{base} ({counter}){ext}"
304
- while os.path.exists(new_path):
305
- counter += 1
306
- new_path = f"{base} ({counter}){ext}"
307
-
308
- return new_path
ebk/streamlit/__init__.py DELETED
File without changes
ebk/streamlit/app.py DELETED
@@ -1,185 +0,0 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import os
4
- import logging
5
- from utils import load_metadata, extract_zip
6
- from filters import sanitize_dataframe, create_filters
7
- from display import display_books_tab, display_statistics_tab
8
-
9
- # Configure logging
10
- logging.basicConfig(
11
- level=logging.INFO,
12
- format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
13
- )
14
- logger = logging.getLogger(__name__)
15
-
16
- #def display_footer():
17
- # st.markdown("---")
18
- # st.write("Developed with ❤️ using Streamlit.")
19
-
20
- def display_dashboard(metadata_list: list, cover_images: dict, ebook_files: dict):
21
- """
22
- Displays the main dashboard with advanced filtering and a compact UI layout using tabs.
23
- """
24
- # Convert metadata list to DataFrame
25
- df = pd.DataFrame(metadata_list)
26
- logger.debug("Converted metadata list to DataFrame.")
27
-
28
- # Sanitize DataFrame
29
- df = sanitize_dataframe(df)
30
- logger.debug("Sanitized DataFrame.")
31
-
32
- # Apply Filters
33
- filtered_df = create_filters(df)
34
- logger.debug("Applied filters to DataFrame.")
35
-
36
- # Create Tabs
37
- tabs = st.tabs(["📚 Books", "📊 Statistics", "Advanced Search", "📖 Table", "📝 Instructions"])
38
-
39
-
40
- with tabs[0]:
41
- # Display Books
42
- display_books_tab(filtered_df, cover_images, ebook_files)
43
-
44
- with tabs[1]:
45
- # Display Statistics
46
- display_statistics_tab(filtered_df)
47
-
48
- with tabs[2]:
49
- # Display Advanced Search
50
- display_advanced_search_tab(metadata_list)
51
-
52
- with tabs[3]:
53
- # Display Table
54
- display_table_view_tab(filtered_df)
55
-
56
- with tabs[4]:
57
- # Display Instructions
58
- st.header("📝 Instructions")
59
- st.markdown("""
60
- 1. **Prepare a ZIP Archive** of an ebk library using the following process:
61
- - Go to the directory containing the desired ebk library (should have 'metadata.json` and associated files).
62
- - Compress the directory into a ZIP archive.
63
- - The `ebk` CLI tool can also autoatically output a ZIP archive,
64
- e.g., `ebk import calibre <calibre-library> --output.zip`.
65
- 2. **Upload the ZIP Archive** using the uploader below.
66
- 3. **Use the Sidebar** to apply filters and search your library.
67
- 4. **Interact** with the dashboard to view details and download ebooks.
68
- """)
69
-
70
- # Display Footer
71
- # display_footer()
72
-
73
- def main():
74
- st.set_page_config(page_title="ebk Dashboard", layout="wide")
75
- st.title("📚 ebk Dashoard")
76
- st.write("""
77
- Upload a **ZIP archive** containing your `metadata.json`, all associated cover images, and ebook files.
78
- The app will automatically process and display your library with advanced search and filtering options.
79
- """)
80
-
81
- # File uploader for ZIP archive
82
- st.subheader("📁 Upload ZIP Archive")
83
- zip_file = st.file_uploader(
84
- label="Upload a ZIP file containing `metadata.json`, cover images, and ebook files",
85
- type=["zip"],
86
- key="zip_upload"
87
- )
88
-
89
- MAX_ZIP_SIZE = 8 * 1024 * 1024 * 1024 # 1 GB
90
-
91
- if zip_file:
92
- print("Uploaded ZIP file:", zip_file.name)
93
- print("🔄 File size:", zip_file.size)
94
- if zip_file.size > MAX_ZIP_SIZE:
95
- st.error(f"❌ Uploaded ZIP file is {zip_file.size / 1024 / 1024 / 1024:.2f} GB, which exceeds the size limit of 1 GB.")
96
- logger.error("Uploaded ZIP file exceeds the size limit.")
97
- st.stop()
98
-
99
- with st.spinner("🔄 Extracting and processing ZIP archive..."):
100
- extracted_files = extract_zip(zip_file)
101
- if not extracted_files:
102
- logger.error("No files extracted from the ZIP archive.")
103
- st.stop() # Stop if extraction failed
104
-
105
- # Locate metadata.json (case-insensitive search)
106
- metadata_key = next((k for k in extracted_files if os.path.basename(k).lower() == "metadata.json"), None)
107
- if not metadata_key:
108
- st.error("❌ `metadata.json` not found in the uploaded ZIP archive.")
109
- logger.error("`metadata.json` not found in the uploaded ZIP archive.")
110
- st.stop()
111
-
112
- metadata_content = extracted_files[metadata_key]
113
- metadata_list = load_metadata(metadata_content)
114
- if not metadata_list:
115
- logger.error("Failed to load metadata from `metadata.json`.")
116
- st.stop()
117
-
118
- # Collect cover images and ebook files
119
- cover_images = {}
120
- ebook_files = {}
121
- for filename, file_bytes in extracted_files.items():
122
- lower_filename = filename.lower()
123
- basename = os.path.basename(filename)
124
- if lower_filename.endswith(('.jpg', '.jpeg', '.png')):
125
- cover_images[basename] = file_bytes
126
- logger.debug(f"Added cover image: {basename}")
127
- elif lower_filename.endswith(('.pdf', '.epub', '.mobi', '.azw3', '.txt')):
128
- ebook_files[basename] = file_bytes
129
- logger.debug(f"Added ebook file: {basename}")
130
- else:
131
- # Ignore other file types or handle as needed
132
- logger.debug(f"Ignored unsupported file type: {basename}")
133
- pass
134
-
135
- # Inform user about unmatched cover images
136
- expected_covers = {os.path.basename(md.get("cover_path", "")) for md in metadata_list if md.get("cover_path")}
137
- uploaded_covers = set(cover_images.keys())
138
- missing_covers = expected_covers - uploaded_covers
139
- if missing_covers:
140
- st.warning(f"⚠️ The following cover images are referenced in `metadata.json` but were not uploaded: {', '.join(missing_covers)}")
141
- logger.warning(f"Missing cover images: {missing_covers}")
142
-
143
- # Inform user about unmatched ebook files
144
- expected_ebooks = {os.path.basename(path) for md in metadata_list for path in md.get("file_paths", [])}
145
- uploaded_ebooks = set(ebook_files.keys())
146
- missing_ebooks = expected_ebooks - uploaded_ebooks
147
- if missing_ebooks:
148
- st.warning(f"⚠️ The following ebook files are referenced in `metadata.json` but were not uploaded: {', '.join(missing_ebooks)}")
149
- logger.warning(f"Missing ebook files: {missing_ebooks}")
150
-
151
- # Display the dashboard with metadata and cover images
152
- display_dashboard(metadata_list, cover_images, ebook_files)
153
- else:
154
- st.info("📥 Please upload a ZIP archive to get started.")
155
- logger.debug("No ZIP archive uploaded yet.")
156
-
157
- def display_table_view_tab(filtered_df: pd.DataFrame):
158
- """
159
- Displays the Table tab with a searchable table of metadata.
160
- """
161
- st.header("📖 Table")
162
- st.write("Explore the metadata of your library using the interactive table below.")
163
- st.dataframe(filtered_df)
164
-
165
- def display_advanced_search_tab(metadata_list: list):
166
- """
167
- Using JMESPath to search the metadata list.
168
- """
169
- import jmespath
170
-
171
- st.header("Advanced Search")
172
- st.write("Use JMESPath queries to search the metadata list.")
173
- query = st.text_input("Enter a JMESPath query", "[].[?date > `2020-01-01`]")
174
- try:
175
- result = jmespath.search(query, metadata_list)
176
- st.write("Search Results:")
177
- st.write(result)
178
- except Exception as e:
179
- st.error(f"An error occurred: {e}")
180
- logger.error(f"JMESPath search error: {e}")
181
-
182
-
183
-
184
- if __name__ == "__main__":
185
- main()
ebk/streamlit/display.py DELETED
@@ -1,168 +0,0 @@
1
- import streamlit as st
2
- from PIL import Image
3
- import pandas as pd
4
- import altair as alt
5
- import logging
6
- import os
7
-
8
- logger = logging.getLogger(__name__)
9
-
10
- def display_books_tab(filtered_df: pd.DataFrame, cover_images: dict, ebook_files: dict):
11
- """
12
- Displays the Books tab with book entries and download/view links.
13
- """
14
- total_size = len(filtered_df)
15
- st.subheader(f"📚 Book Entries (Total: {total_size})")
16
- if not filtered_df.empty:
17
- for idx, row in filtered_df.iterrows():
18
- with st.expander(f"**{row.get('title', 'No Title')}**"):
19
- # Layout: Cover Image & Downloads | Metadata
20
- cols = st.columns([1.5, 3])
21
-
22
- # Left Column: Cover Image
23
- with cols[0]:
24
- # Cover Image
25
- cover_path = row.get("cover_path", "")
26
- cover_filename = os.path.basename(cover_path)
27
- cover_data = cover_images.get(cover_filename)
28
- if cover_data:
29
- try:
30
- image = Image.open(cover_data)
31
- st.image(image, use_container_width=True, caption="🖼️ Cover")
32
- logger.debug(f"Displayed cover image: {cover_filename}")
33
- except Exception as e:
34
- st.error(f"🖼️ Error loading image: {e}")
35
- logger.error(f"Error loading image {cover_filename}: {e}")
36
- else:
37
- st.info("🖼️ No cover image available.")
38
- logger.debug(f"No cover image available for {cover_filename}.")
39
-
40
- # Right Column: Metadata Details and Ebook Links
41
- with cols[1]:
42
-
43
-
44
- # show title in a header style
45
- title = row.get("title", "No Title")
46
- st.markdown(f"# 📖 {title}")
47
-
48
- metadata_details = {
49
- "👤 **Author(s)**": ", ".join(row.get("creators", ["N/A"])),
50
- "📚 **Subjects**": ", ".join(row.get("subjects", ["N/A"])),
51
- "📝 **Description**": row.get("description", "N/A"),
52
- "🌐 **Language**": row.get("language", "N/A"),
53
- "📅 **Publication Date**": row.get("date", "N/A") if pd.notna(row.get("date", None)) else "N/A",
54
- "📖 **Publisher**": row.get("publisher", "N/A"),
55
- "📏 **File Size**": row.get("file_size", "N/A"),
56
- "📚 **Virtual Libraries**": ", ".join(row.get("virtual_libs", ["N/A"])),
57
- "🔑 **Identifiers**": ", ".join([f"{k}: {v}" for k, v in row.get("identifiers", {}).items()]),
58
- "🔑 **Unique ID**": row.get("unique_id", "NA"),
59
- }
60
-
61
- for key, value in metadata_details.items():
62
- st.markdown(f"{key}: {value}")
63
-
64
- # Ebook Download and View Links
65
- ebook_paths = row.get("file_paths", [])
66
- if ebook_paths:
67
- st.markdown("### 📥 Ebook Links")
68
- for ebook_path in ebook_paths:
69
- ebook_filename = os.path.basename(ebook_path)
70
- ebook_data = ebook_files.get(ebook_filename)
71
- if ebook_data:
72
- # Determine MIME type based on file extension
73
- _, ext = os.path.splitext(ebook_filename.lower())
74
- mime_types = {
75
- '.pdf': 'application/pdf',
76
- '.epub': 'application/epub+zip',
77
- '.mobi': 'application/x-mobipocket-ebook',
78
- '.azw3': 'application/vnd.amazon.ebook',
79
- '.txt': 'text/plain'
80
- }
81
- mime_type = mime_types.get(ext, 'application/octet-stream')
82
-
83
- st.download_button(
84
- label=f"💾 Download {ebook_filename}",
85
- data=ebook_data.getvalue(),
86
- file_name=ebook_filename,
87
- mime=mime_type
88
- )
89
- logger.debug(f"Provided link for {ebook_filename}.")
90
- else:
91
- st.warning(f"Ebook file '{ebook_filename}' not found in the uploaded ZIP.")
92
- logger.warning(f"Ebook file '{ebook_filename}' not found in the uploaded ZIP.")
93
- else:
94
- st.info("📄 No ebook files available for download.")
95
- logger.debug("No ebook files available for download.")
96
- else:
97
- st.info("📚 No books match the current filter criteria.")
98
- logger.debug("No books match the current filter criteria.")
99
-
100
- def display_statistics_tab(filtered_df: pd.DataFrame):
101
- """
102
- Displays the Statistics tab with various visualizations.
103
- """
104
- st.subheader("📊 Statistics")
105
-
106
- if not filtered_df.empty:
107
- # Visualization: Books per Author (Top 10)
108
- st.markdown("### 📈 Top 10 Authors by Number of Books")
109
- author_counts = pd.Series([creator for creators in filtered_df['creators'] for creator in creators]).value_counts().nlargest(10).reset_index()
110
- author_counts.columns = ['Author', 'Number of Books']
111
-
112
- chart = alt.Chart(author_counts).mark_bar().encode(
113
- x=alt.X('Number of Books:Q', title='Number of Books'),
114
- y=alt.Y('Author:N', sort='-x', title='Author'),
115
- tooltip=['Author', 'Number of Books']
116
- ).properties(
117
- width=600,
118
- height=400
119
- )
120
-
121
- st.altair_chart(chart, use_container_width=True)
122
- logger.debug("Displayed Top 10 Authors chart.")
123
-
124
- # Visualization: Books per Subject (Top 10)
125
- st.markdown("### 📊 Top 10 Subjects by Number of Books")
126
- subject_counts = pd.Series([subject for subjects in filtered_df['subjects'] for subject in subjects]).value_counts().nlargest(10).reset_index()
127
- subject_counts.columns = ['Subject', 'Number of Books']
128
-
129
- subject_chart = alt.Chart(subject_counts).mark_bar().encode(
130
- x=alt.X('Number of Books:Q', title='Number of Books'),
131
- y=alt.Y('Subject:N', sort='-x', title='Subject'),
132
- tooltip=['Subject', 'Number of Books']
133
- ).properties(
134
- width=600,
135
- height=400
136
- )
137
-
138
- st.altair_chart(subject_chart, use_container_width=True)
139
- logger.debug("Displayed Top 10 Subjects chart.")
140
-
141
- # Visualization: Books Published Over Time
142
- st.markdown("### 📈 Books Published Over Time")
143
- if 'date' in filtered_df.columns and pd.api.types.is_numeric_dtype(filtered_df['date']):
144
- publication_years = filtered_df['date'].dropna().astype(int)
145
- if not publication_years.empty:
146
- year_counts = publication_years.value_counts().sort_index().reset_index()
147
- year_counts.columns = ['Year', 'Number of Books']
148
-
149
- time_chart = alt.Chart(year_counts).mark_line(point=True).encode(
150
- x=alt.X('Year:O', title='Year'),
151
- y=alt.Y('Number of Books:Q', title='Number of Books'),
152
- tooltip=['Year', 'Number of Books']
153
- ).properties(
154
- width=800,
155
- height=400
156
- )
157
-
158
- st.altair_chart(time_chart, use_container_width=True)
159
- logger.debug("Displayed Books Published Over Time chart.")
160
- else:
161
- st.info("📅 No publication date data available.")
162
- logger.warning("Publication year data is empty after filtering.")
163
- else:
164
- st.info("📅 Publication date data is not available or not in a numeric format.")
165
- logger.warning("Publication date data is not available or not numeric.")
166
- else:
167
- st.info("📊 No statistics to display as no books match the current filter criteria.")
168
- logger.debug("No statistics to display due to empty filtered DataFrame.")