ebk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ebk might be problematic. Click here for more details.

ebk/streamlit/app.py ADDED
@@ -0,0 +1,185 @@
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import os
4
+ import logging
5
+ from utils import load_metadata, extract_zip
6
+ from filters import sanitize_dataframe, create_filters
7
+ from display import display_books_tab, display_statistics_tab
8
+
9
+ # Configure logging
10
+ logging.basicConfig(
11
+ level=logging.INFO,
12
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
13
+ )
14
+ logger = logging.getLogger(__name__)
15
+
16
+ #def display_footer():
17
+ # st.markdown("---")
18
+ # st.write("Developed with ❤️ using Streamlit.")
19
+
20
+ def display_dashboard(metadata_list: list, cover_images: dict, ebook_files: dict):
21
+ """
22
+ Displays the main dashboard with advanced filtering and a compact UI layout using tabs.
23
+ """
24
+ # Convert metadata list to DataFrame
25
+ df = pd.DataFrame(metadata_list)
26
+ logger.debug("Converted metadata list to DataFrame.")
27
+
28
+ # Sanitize DataFrame
29
+ df = sanitize_dataframe(df)
30
+ logger.debug("Sanitized DataFrame.")
31
+
32
+ # Apply Filters
33
+ filtered_df = create_filters(df)
34
+ logger.debug("Applied filters to DataFrame.")
35
+
36
+ # Create Tabs
37
+ tabs = st.tabs(["📚 Books", "📊 Statistics", "Advanced Search", "📖 Table", "📝 Instructions"])
38
+
39
+
40
+ with tabs[0]:
41
+ # Display Books
42
+ display_books_tab(filtered_df, cover_images, ebook_files)
43
+
44
+ with tabs[1]:
45
+ # Display Statistics
46
+ display_statistics_tab(filtered_df)
47
+
48
+ with tabs[2]:
49
+ # Display Advanced Search
50
+ display_advanced_search_tab(metadata_list)
51
+
52
+ with tabs[3]:
53
+ # Display Table
54
+ display_table_view_tab(filtered_df)
55
+
56
+ with tabs[4]:
57
+ # Display Instructions
58
+ st.header("📝 Instructions")
59
+ st.markdown("""
60
+ 1. **Prepare a ZIP Archive** of an ebk library using the following process:
61
+ - Go to the directory containing the desired ebk library (should have 'metadata.json` and associated files).
62
+ - Compress the directory into a ZIP archive.
63
+ - The `ebk` CLI tool can also autoatically output a ZIP archive,
64
+ e.g., `ebk import calibre <calibre-library> --output.zip`.
65
+ 2. **Upload the ZIP Archive** using the uploader below.
66
+ 3. **Use the Sidebar** to apply filters and search your library.
67
+ 4. **Interact** with the dashboard to view details and download ebooks.
68
+ """)
69
+
70
+ # Display Footer
71
+ # display_footer()
72
+
73
+ def main():
74
+ st.set_page_config(page_title="ebk Dashboard", layout="wide")
75
+ st.title("📚 ebk Dashoard")
76
+ st.write("""
77
+ Upload a **ZIP archive** containing your `metadata.json`, all associated cover images, and ebook files.
78
+ The app will automatically process and display your library with advanced search and filtering options.
79
+ """)
80
+
81
+ # File uploader for ZIP archive
82
+ st.subheader("📁 Upload ZIP Archive")
83
+ zip_file = st.file_uploader(
84
+ label="Upload a ZIP file containing `metadata.json`, cover images, and ebook files",
85
+ type=["zip"],
86
+ key="zip_upload"
87
+ )
88
+
89
+ MAX_ZIP_SIZE = 8 * 1024 * 1024 * 1024 # 1 GB
90
+
91
+ if zip_file:
92
+ print("Uploaded ZIP file:", zip_file.name)
93
+ print("🔄 File size:", zip_file.size)
94
+ if zip_file.size > MAX_ZIP_SIZE:
95
+ st.error(f"❌ Uploaded ZIP file is {zip_file.size / 1024 / 1024 / 1024:.2f} GB, which exceeds the size limit of 1 GB.")
96
+ logger.error("Uploaded ZIP file exceeds the size limit.")
97
+ st.stop()
98
+
99
+ with st.spinner("🔄 Extracting and processing ZIP archive..."):
100
+ extracted_files = extract_zip(zip_file)
101
+ if not extracted_files:
102
+ logger.error("No files extracted from the ZIP archive.")
103
+ st.stop() # Stop if extraction failed
104
+
105
+ # Locate metadata.json (case-insensitive search)
106
+ metadata_key = next((k for k in extracted_files if os.path.basename(k).lower() == "metadata.json"), None)
107
+ if not metadata_key:
108
+ st.error("❌ `metadata.json` not found in the uploaded ZIP archive.")
109
+ logger.error("`metadata.json` not found in the uploaded ZIP archive.")
110
+ st.stop()
111
+
112
+ metadata_content = extracted_files[metadata_key]
113
+ metadata_list = load_metadata(metadata_content)
114
+ if not metadata_list:
115
+ logger.error("Failed to load metadata from `metadata.json`.")
116
+ st.stop()
117
+
118
+ # Collect cover images and ebook files
119
+ cover_images = {}
120
+ ebook_files = {}
121
+ for filename, file_bytes in extracted_files.items():
122
+ lower_filename = filename.lower()
123
+ basename = os.path.basename(filename)
124
+ if lower_filename.endswith(('.jpg', '.jpeg', '.png')):
125
+ cover_images[basename] = file_bytes
126
+ logger.debug(f"Added cover image: {basename}")
127
+ elif lower_filename.endswith(('.pdf', '.epub', '.mobi', '.azw3', '.txt')):
128
+ ebook_files[basename] = file_bytes
129
+ logger.debug(f"Added ebook file: {basename}")
130
+ else:
131
+ # Ignore other file types or handle as needed
132
+ logger.debug(f"Ignored unsupported file type: {basename}")
133
+ pass
134
+
135
+ # Inform user about unmatched cover images
136
+ expected_covers = {os.path.basename(md.get("cover_path", "")) for md in metadata_list if md.get("cover_path")}
137
+ uploaded_covers = set(cover_images.keys())
138
+ missing_covers = expected_covers - uploaded_covers
139
+ if missing_covers:
140
+ st.warning(f"⚠️ The following cover images are referenced in `metadata.json` but were not uploaded: {', '.join(missing_covers)}")
141
+ logger.warning(f"Missing cover images: {missing_covers}")
142
+
143
+ # Inform user about unmatched ebook files
144
+ expected_ebooks = {os.path.basename(path) for md in metadata_list for path in md.get("file_paths", [])}
145
+ uploaded_ebooks = set(ebook_files.keys())
146
+ missing_ebooks = expected_ebooks - uploaded_ebooks
147
+ if missing_ebooks:
148
+ st.warning(f"⚠️ The following ebook files are referenced in `metadata.json` but were not uploaded: {', '.join(missing_ebooks)}")
149
+ logger.warning(f"Missing ebook files: {missing_ebooks}")
150
+
151
+ # Display the dashboard with metadata and cover images
152
+ display_dashboard(metadata_list, cover_images, ebook_files)
153
+ else:
154
+ st.info("📥 Please upload a ZIP archive to get started.")
155
+ logger.debug("No ZIP archive uploaded yet.")
156
+
157
+ def display_table_view_tab(filtered_df: pd.DataFrame):
158
+ """
159
+ Displays the Table tab with a searchable table of metadata.
160
+ """
161
+ st.header("📖 Table")
162
+ st.write("Explore the metadata of your library using the interactive table below.")
163
+ st.dataframe(filtered_df)
164
+
165
+ def display_advanced_search_tab(metadata_list: list):
166
+ """
167
+ Using JMESPath to search the metadata list.
168
+ """
169
+ import jmespath
170
+
171
+ st.header("Advanced Search")
172
+ st.write("Use JMESPath queries to search the metadata list.")
173
+ query = st.text_input("Enter a JMESPath query", "[].[?date > `2020-01-01`]")
174
+ try:
175
+ result = jmespath.search(query, metadata_list)
176
+ st.write("Search Results:")
177
+ st.write(result)
178
+ except Exception as e:
179
+ st.error(f"An error occurred: {e}")
180
+ logger.error(f"JMESPath search error: {e}")
181
+
182
+
183
+
184
+ if __name__ == "__main__":
185
+ main()
@@ -0,0 +1,168 @@
1
+ import streamlit as st
2
+ from PIL import Image
3
+ import pandas as pd
4
+ import altair as alt
5
+ import logging
6
+ import os
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+ def display_books_tab(filtered_df: pd.DataFrame, cover_images: dict, ebook_files: dict):
11
+ """
12
+ Displays the Books tab with book entries and download/view links.
13
+ """
14
+ total_size = len(filtered_df)
15
+ st.subheader(f"📚 Book Entries (Total: {total_size})")
16
+ if not filtered_df.empty:
17
+ for idx, row in filtered_df.iterrows():
18
+ with st.expander(f"**{row.get('title', 'No Title')}**"):
19
+ # Layout: Cover Image & Downloads | Metadata
20
+ cols = st.columns([1.5, 3])
21
+
22
+ # Left Column: Cover Image
23
+ with cols[0]:
24
+ # Cover Image
25
+ cover_path = row.get("cover_path", "")
26
+ cover_filename = os.path.basename(cover_path)
27
+ cover_data = cover_images.get(cover_filename)
28
+ if cover_data:
29
+ try:
30
+ image = Image.open(cover_data)
31
+ st.image(image, use_container_width=True, caption="🖼️ Cover")
32
+ logger.debug(f"Displayed cover image: {cover_filename}")
33
+ except Exception as e:
34
+ st.error(f"🖼️ Error loading image: {e}")
35
+ logger.error(f"Error loading image {cover_filename}: {e}")
36
+ else:
37
+ st.info("🖼️ No cover image available.")
38
+ logger.debug(f"No cover image available for {cover_filename}.")
39
+
40
+ # Right Column: Metadata Details and Ebook Links
41
+ with cols[1]:
42
+
43
+
44
+ # show title in a header style
45
+ title = row.get("title", "No Title")
46
+ st.markdown(f"# 📖 {title}")
47
+
48
+ metadata_details = {
49
+ "👤 **Author(s)**": ", ".join(row.get("creators", ["N/A"])),
50
+ "📚 **Subjects**": ", ".join(row.get("subjects", ["N/A"])),
51
+ "📝 **Description**": row.get("description", "N/A"),
52
+ "🌐 **Language**": row.get("language", "N/A"),
53
+ "📅 **Publication Date**": row.get("date", "N/A") if pd.notna(row.get("date", None)) else "N/A",
54
+ "📖 **Publisher**": row.get("publisher", "N/A"),
55
+ "📏 **File Size**": row.get("file_size", "N/A"),
56
+ "📚 **Virtual Libraries**": ", ".join(row.get("virtual_libs", ["N/A"])),
57
+ "🔑 **Identifiers**": ", ".join([f"{k}: {v}" for k, v in row.get("identifiers", {}).items()]),
58
+ "🔑 **Unique ID**": row.get("unique_id", "NA"),
59
+ }
60
+
61
+ for key, value in metadata_details.items():
62
+ st.markdown(f"{key}: {value}")
63
+
64
+ # Ebook Download and View Links
65
+ ebook_paths = row.get("file_paths", [])
66
+ if ebook_paths:
67
+ st.markdown("### 📥 Ebook Links")
68
+ for ebook_path in ebook_paths:
69
+ ebook_filename = os.path.basename(ebook_path)
70
+ ebook_data = ebook_files.get(ebook_filename)
71
+ if ebook_data:
72
+ # Determine MIME type based on file extension
73
+ _, ext = os.path.splitext(ebook_filename.lower())
74
+ mime_types = {
75
+ '.pdf': 'application/pdf',
76
+ '.epub': 'application/epub+zip',
77
+ '.mobi': 'application/x-mobipocket-ebook',
78
+ '.azw3': 'application/vnd.amazon.ebook',
79
+ '.txt': 'text/plain'
80
+ }
81
+ mime_type = mime_types.get(ext, 'application/octet-stream')
82
+
83
+ st.download_button(
84
+ label=f"💾 Download {ebook_filename}",
85
+ data=ebook_data.getvalue(),
86
+ file_name=ebook_filename,
87
+ mime=mime_type
88
+ )
89
+ logger.debug(f"Provided link for {ebook_filename}.")
90
+ else:
91
+ st.warning(f"Ebook file '{ebook_filename}' not found in the uploaded ZIP.")
92
+ logger.warning(f"Ebook file '{ebook_filename}' not found in the uploaded ZIP.")
93
+ else:
94
+ st.info("📄 No ebook files available for download.")
95
+ logger.debug("No ebook files available for download.")
96
+ else:
97
+ st.info("📚 No books match the current filter criteria.")
98
+ logger.debug("No books match the current filter criteria.")
99
+
100
+ def display_statistics_tab(filtered_df: pd.DataFrame):
101
+ """
102
+ Displays the Statistics tab with various visualizations.
103
+ """
104
+ st.subheader("📊 Statistics")
105
+
106
+ if not filtered_df.empty:
107
+ # Visualization: Books per Author (Top 10)
108
+ st.markdown("### 📈 Top 10 Authors by Number of Books")
109
+ author_counts = pd.Series([creator for creators in filtered_df['creators'] for creator in creators]).value_counts().nlargest(10).reset_index()
110
+ author_counts.columns = ['Author', 'Number of Books']
111
+
112
+ chart = alt.Chart(author_counts).mark_bar().encode(
113
+ x=alt.X('Number of Books:Q', title='Number of Books'),
114
+ y=alt.Y('Author:N', sort='-x', title='Author'),
115
+ tooltip=['Author', 'Number of Books']
116
+ ).properties(
117
+ width=600,
118
+ height=400
119
+ )
120
+
121
+ st.altair_chart(chart, use_container_width=True)
122
+ logger.debug("Displayed Top 10 Authors chart.")
123
+
124
+ # Visualization: Books per Subject (Top 10)
125
+ st.markdown("### 📊 Top 10 Subjects by Number of Books")
126
+ subject_counts = pd.Series([subject for subjects in filtered_df['subjects'] for subject in subjects]).value_counts().nlargest(10).reset_index()
127
+ subject_counts.columns = ['Subject', 'Number of Books']
128
+
129
+ subject_chart = alt.Chart(subject_counts).mark_bar().encode(
130
+ x=alt.X('Number of Books:Q', title='Number of Books'),
131
+ y=alt.Y('Subject:N', sort='-x', title='Subject'),
132
+ tooltip=['Subject', 'Number of Books']
133
+ ).properties(
134
+ width=600,
135
+ height=400
136
+ )
137
+
138
+ st.altair_chart(subject_chart, use_container_width=True)
139
+ logger.debug("Displayed Top 10 Subjects chart.")
140
+
141
+ # Visualization: Books Published Over Time
142
+ st.markdown("### 📈 Books Published Over Time")
143
+ if 'date' in filtered_df.columns and pd.api.types.is_numeric_dtype(filtered_df['date']):
144
+ publication_years = filtered_df['date'].dropna().astype(int)
145
+ if not publication_years.empty:
146
+ year_counts = publication_years.value_counts().sort_index().reset_index()
147
+ year_counts.columns = ['Year', 'Number of Books']
148
+
149
+ time_chart = alt.Chart(year_counts).mark_line(point=True).encode(
150
+ x=alt.X('Year:O', title='Year'),
151
+ y=alt.Y('Number of Books:Q', title='Number of Books'),
152
+ tooltip=['Year', 'Number of Books']
153
+ ).properties(
154
+ width=800,
155
+ height=400
156
+ )
157
+
158
+ st.altair_chart(time_chart, use_container_width=True)
159
+ logger.debug("Displayed Books Published Over Time chart.")
160
+ else:
161
+ st.info("📅 No publication date data available.")
162
+ logger.warning("Publication year data is empty after filtering.")
163
+ else:
164
+ st.info("📅 Publication date data is not available or not in a numeric format.")
165
+ logger.warning("Publication date data is not available or not numeric.")
166
+ else:
167
+ st.info("📊 No statistics to display as no books match the current filter criteria.")
168
+ logger.debug("No statistics to display due to empty filtered DataFrame.")
@@ -0,0 +1,151 @@
1
+ import pandas as pd
2
+ import streamlit as st
3
+ import logging
4
+
5
+ logger = logging.getLogger(__name__)
6
+
7
+ def sanitize_dataframe(df: pd.DataFrame) -> pd.DataFrame:
8
+ """
9
+ Sanitizes the DataFrame by ensuring correct data types and handling missing values.
10
+ """
11
+ # List of columns that should contain lists
12
+ list_columns = ['creators', 'subjects', 'file_paths']
13
+
14
+ def ensure_list(column):
15
+ """
16
+ Ensures that each entry in the column is a list. If not, replaces it with an empty list.
17
+ """
18
+ return column.apply(lambda x: x if isinstance(x, list) else [])
19
+
20
+ for col in list_columns:
21
+ if col in df.columns:
22
+ df[col] = ensure_list(df[col])
23
+ logger.debug(f"Processed list column: {col}")
24
+ else:
25
+ df[col] = [[] for _ in range(len(df))]
26
+ logger.debug(f"Created empty list column: {col}")
27
+
28
+ # Handle 'identifiers' column
29
+ if 'identifiers' in df.columns:
30
+ df['identifiers'] = df['identifiers'].apply(lambda x: x if isinstance(x, dict) else {})
31
+ logger.debug("Sanitized 'identifiers' column.")
32
+ else:
33
+ df['identifiers'] = [{} for _ in range(len(df))]
34
+ logger.debug("Created empty 'identifiers' column.")
35
+
36
+ # Sanitize 'language' column
37
+ if 'language' in df.columns:
38
+ df['language'] = df['language'].apply(lambda x: x if isinstance(x, str) else '').fillna('').astype(str)
39
+ logger.debug("Sanitized 'language' column.")
40
+ else:
41
+ df['language'] = ['' for _ in range(len(df))]
42
+ logger.debug("Created empty 'language' column.")
43
+
44
+ # Sanitize 'cover_path' column
45
+ if 'cover_path' in df.columns:
46
+ df['cover_path'] = df['cover_path'].apply(lambda x: x if isinstance(x, str) else '').fillna('').astype(str)
47
+ logger.debug("Sanitized 'cover_path' column.")
48
+ else:
49
+ df['cover_path'] = ['' for _ in range(len(df))]
50
+ logger.debug("Created empty 'cover_path' column.")
51
+
52
+ # Sanitize string fields: 'title', 'description'
53
+ string_fields = ['title', 'description']
54
+ for field in string_fields:
55
+ if field in df.columns:
56
+ df[field] = df[field].apply(lambda x: x if isinstance(x, str) else '').fillna('').astype(str)
57
+ logger.debug(f"Sanitized '{field}' column.")
58
+ else:
59
+ df[field] = ['' for _ in range(len(df))]
60
+ logger.debug(f"Created empty '{field}' column.")
61
+
62
+ # Sanitize 'date' column
63
+ if 'date' in df.columns:
64
+ df['date'] = pd.to_numeric(df['date'], errors='coerce')
65
+ logger.debug("Sanitized 'date' column to ensure numeric types.")
66
+ else:
67
+ df['date'] = [None for _ in range(len(df))]
68
+ logger.debug("Created empty 'date' column.")
69
+
70
+ return df
71
+
72
+ def create_filters(df: pd.DataFrame) -> pd.DataFrame:
73
+ """
74
+ Creates and applies advanced filters to the DataFrame based on user inputs.
75
+ Returns the filtered DataFrame.
76
+ """
77
+ # Sidebar for Filters
78
+ st.sidebar.header("🔍 Filters")
79
+
80
+ # Title Search
81
+ title_search = st.sidebar.text_input("🔎 Search by Title")
82
+
83
+ # Author Filter (Multi-select)
84
+ all_creators = sorted(set(creator for creators in df['creators'] for creator in creators))
85
+ selected_authors = st.sidebar.multiselect("👤 Filter by Author(s)", all_creators, default=[])
86
+
87
+ # Subjects Filter (Multi-select)
88
+ all_subjects = sorted(set(subject for subjects in df['subjects'] for subject in subjects))
89
+ selected_subjects = st.sidebar.multiselect("📚 Filter by Subject(s)", all_subjects, default=[])
90
+
91
+ # Search by Various Libraries
92
+ all_libraries = sorted(set(lib for libs in df['virtual_libs'] for lib in libs))
93
+ selected_libraries = st.sidebar.multiselect("📚 Filter by Virtual Library(s)", all_libraries, default=[])
94
+
95
+ # Language Filter (Multi-select)
96
+ all_languages = sorted(set(lang for lang in df['language'] if lang))
97
+ selected_languages = st.sidebar.multiselect("🌐 Filter by Language(s)", all_languages, default=[])
98
+
99
+ # Publication Date Filter (Range Slider)
100
+ selected_years = None
101
+ if 'date' in df.columns and pd.api.types.is_numeric_dtype(df['date']):
102
+ min_year = int(df['date'].min()) if pd.notna(df['date'].min()) else 0
103
+ max_year = int(df['date'].max()) if pd.notna(df['date'].max()) else 0
104
+ if min_year and max_year:
105
+ selected_years = st.sidebar.slider("📅 Publication Year Range", min_year, max_year, (min_year, max_year))
106
+ logger.debug(f"Publication year range selected: {selected_years}")
107
+ else:
108
+ st.sidebar.info("📅 No valid publication year data available.")
109
+ logger.warning("Publication year data is not available or entirely NaN.")
110
+ else:
111
+ st.sidebar.info("📅 Publication date data is not available or not in a numeric format.")
112
+ logger.warning("Publication date data is not available or not numeric.")
113
+
114
+ # Identifier Search
115
+ identifier_search = st.sidebar.text_input("🔑 Search by Identifier (e.g., ISBN)")
116
+
117
+ # Apply Filters
118
+ filtered_df = df.copy()
119
+
120
+ if title_search:
121
+ filtered_df = filtered_df[filtered_df['title'].str.contains(title_search, case=False, na=False)]
122
+ logger.debug(f"Applied title search filter: '{title_search}'")
123
+
124
+ if selected_authors:
125
+ filtered_df = filtered_df[filtered_df['creators'].apply(lambda x: any(creator in selected_authors for creator in x))]
126
+ logger.debug(f"Applied author filter: {selected_authors}")
127
+
128
+ if selected_subjects:
129
+ filtered_df = filtered_df[filtered_df['subjects'].apply(lambda x: any(subject in selected_subjects for subject in x))]
130
+ logger.debug(f"Applied subject filter: {selected_subjects}")
131
+
132
+ if selected_libraries:
133
+ filtered_df = filtered_df[filtered_df['virtual_libs'].apply(lambda x: any(lib in selected_libraries for lib in x))]
134
+ logger.debug(f"Applied library filter: {selected_libraries}")
135
+
136
+ if selected_languages:
137
+ filtered_df = filtered_df[filtered_df['language'].isin(selected_languages)]
138
+ logger.debug(f"Applied language filter: {selected_languages}")
139
+
140
+ if selected_years:
141
+ filtered_df = filtered_df[(filtered_df['date'] >= selected_years[0]) & (filtered_df['date'] <= selected_years[1])]
142
+ logger.debug(f"Applied publication year range filter: {selected_years}")
143
+
144
+ if identifier_search:
145
+ idents = filtered_df['identifiers']
146
+ idents_stringified = idents.apply(
147
+ lambda x: ' '.join(f"{k}:{v}" for k, v in x.items()) if isinstance(x, dict) else str(x)
148
+ )
149
+ filtered_df = filtered_df[idents_stringified.str.contains(identifier_search)]
150
+
151
+ return filtered_df
ebk/streamlit/utils.py ADDED
@@ -0,0 +1,58 @@
1
+ import json
2
+ import os
3
+ import zipfile
4
+ from io import BytesIO
5
+ import streamlit as st
6
+ import logging
7
+ import streamlit as st
8
+ from typing import List, Dict
9
+ from collections import Counter
10
+ from pathlib import Path
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+ def load_metadata(metadata_content: BytesIO) -> list:
15
+ """
16
+ Loads metadata from the uploaded JSON file.
17
+ Returns a list of dictionaries.
18
+ """
19
+ try:
20
+ data = json.load(metadata_content)
21
+ logger.debug("Metadata loaded successfully.")
22
+ return data
23
+ except json.JSONDecodeError as e:
24
+ st.error(f"JSON decoding error: {e}")
25
+ logger.error(f"JSONDecodeError: {e}")
26
+ return []
27
+ except Exception as e:
28
+ st.error(f"Unexpected error loading metadata.json: {e}")
29
+ logger.error(f"Unexpected error: {e}")
30
+ return []
31
+
32
+ def extract_zip(zip_bytes: BytesIO) -> dict:
33
+ """
34
+ Extracts a ZIP file in-memory and returns a dictionary of its contents.
35
+ Keys are file names, and values are BytesIO objects containing the file data.
36
+ """
37
+ extracted_files = {}
38
+ try:
39
+ with zipfile.ZipFile(zip_bytes) as z:
40
+ for file_info in z.infolist():
41
+ if not file_info.is_dir():
42
+ with z.open(file_info) as f:
43
+ normalized_path = os.path.normpath(file_info.filename)
44
+ # Prevent path traversal
45
+ if os.path.commonprefix([normalized_path, os.path.basename(normalized_path)]) != "":
46
+ extracted_files[normalized_path] = BytesIO(f.read())
47
+ logger.debug(f"Extracted: {normalized_path}")
48
+ logger.debug("ZIP archive extracted successfully.")
49
+ return extracted_files
50
+ except zipfile.BadZipFile:
51
+ st.error("The uploaded file is not a valid ZIP archive.")
52
+ logger.error("BadZipFile encountered.")
53
+ return {}
54
+ except Exception as e:
55
+ st.error(f"Error extracting ZIP file: {e}")
56
+ logger.error(f"Exception during ZIP extraction: {e}")
57
+ return {}
58
+