ebk 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ebk might be problematic. Click here for more details.

ebk/config.py ADDED
@@ -0,0 +1,35 @@
1
+ import configparser
2
+ import os
3
+
4
+ def load_ebkrc_config():
5
+ """
6
+ Loads configuration from ~/.btkrc.
7
+
8
+ If using LLM interface, expects a section [llm] with at least 'endpoint' and 'api_key'.
9
+ If using cloud interface (for generating complex networks), the section [cloud] may be used to specify various parameters.
10
+ """
11
+ config_path = os.path.expanduser("~/.ebkrc")
12
+ parser = configparser.ConfigParser()
13
+
14
+ if not os.path.exists(config_path):
15
+ raise FileNotFoundError(f"Could not find config file at {config_path}")
16
+
17
+ parser.read(config_path)
18
+
19
+ if "llm" not in parser:
20
+ raise ValueError(
21
+ "Config file ~/.btkrc is missing the [llm] section. "
22
+ "Please add it with 'endpoint' and 'api_key' keys."
23
+ )
24
+
25
+ endpoint = parser["llm"].get("endpoint", "")
26
+ api_key = parser["llm"].get("api_key", "")
27
+ model = parser["llm"].get("model", "gpt-3.5-turbo")
28
+
29
+ if not endpoint or not api_key or not model:
30
+ raise ValueError(
31
+ "Please make sure your [llm] section in ~/.btkrc "
32
+ "includes 'endpoint', 'api_key', and 'model' keys."
33
+ )
34
+
35
+ return endpoint, api_key, model
File without changes
ebk/exports/hugo.py ADDED
@@ -0,0 +1,55 @@
1
+ import os
2
+ import json
3
+ from pathlib import Path
4
+ from typing import List
5
+ import logging
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ def export_hugo(lib_dir, hugo_dir):
10
+ """
11
+ Export ebk library to Hugo-compatible Markdown files.
12
+
13
+ Args:
14
+ lib_dir (str): Path to the ebk library directory to export (contains `metadata.json` and ebook-related files)
15
+ hugo_dir (str): Path to the Hugo site directory
16
+ """
17
+
18
+ lib_dir = Path(lib_dir)
19
+ with open(lib_dir / "metadata.json", "r") as f:
20
+ books = json.load(f)
21
+
22
+ hugo_dir = Path(hugo_dir)
23
+
24
+ content_dir = hugo_dir / "content" / "library"
25
+ static_dir = hugo_dir / "static" / "ebooks"
26
+ content_dir.mkdir(parents=True, exist_ok=True)
27
+ static_dir.mkdir(parents=True, exist_ok=True)
28
+
29
+ for book in books:
30
+ slug = book['title'].replace(" ", "-").lower()
31
+ md_file = content_dir / f"{slug}.md"
32
+
33
+ with open(md_file, "w") as md:
34
+ md.write("---\n")
35
+ md.write(f"title: {book['title']}\n")
36
+ md.write(f"creators: [{', '.join(book['creators'])}]\n")
37
+ md.write(f"subjects: [{', '.join(book['subjects'])}]\n")
38
+ md.write(f"description: {book['description']}\n")
39
+ md.write(f"date: {book['date']}\n")
40
+ md.write(f"tags: [{', '.join(book['Tags'].split(', '))}]\n")
41
+ md.write(f"ebook_file: /ebooks/{Path(book['file_path']).name}\n")
42
+ md.write(f"cover_image: /ebooks/{Path(book['Cover Path']).name if book['Cover Path'] else ''}\n")
43
+ md.write("---\n\n")
44
+ md.write(f"# {book['Title']}\n\n")
45
+ md.write(f"Author: {book['Author']}\n\n")
46
+ md.write(f"[Download eBook](/ebooks/{Path(book['File Path']).name})\n")
47
+
48
+ # Copy eBook and cover to static directory
49
+ if book["File Path"]:
50
+ os.system(f"cp '{book['File Path']}' '{static_dir}'")
51
+ if book["Cover Path"]:
52
+ os.system(f"cp '{book['Cover Path']}' '{static_dir}'")
53
+
54
+ logger.debug(f"Exported {len(books)} books to Hugo site at '{hugo_dir}'")
55
+
ebk/exports/zip.py ADDED
@@ -0,0 +1,25 @@
1
+ import os
2
+ import zipfile
3
+ from pathlib import Path
4
+ import logging
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ def export_zipfile(lib_dir, zip_file):
9
+ """
10
+ Export ebk library to a ZIP archive.
11
+
12
+ Args:
13
+ lib_dir (str): Path to the ebk library directory to export (contains `metadata.json` and ebook-related files)
14
+ zip_file (str): Path to the output ZIP file
15
+ """
16
+ lib_dir = Path(lib_dir)
17
+
18
+ # just want to take the entire directory and zip it
19
+
20
+ with zipfile.ZipFile(zip_file, "w") as z:
21
+ for root, _, files in os.walk(lib_dir):
22
+ for file in files:
23
+ file_path = Path(root) / file
24
+ logging.debug(f"Adding file to zip: {file_path}")
25
+ z.write(file_path, arcname=file_path.relative_to(lib_dir))
@@ -0,0 +1,273 @@
1
+ import os
2
+ import xmltodict
3
+ from typing import Dict, Optional
4
+ from slugify import slugify
5
+ import PyPDF2
6
+ from ebooklib import epub
7
+
8
+ def extract_metadata_from_opf(opf_file: str) -> Dict:
9
+ """
10
+ Parse a Calibre OPF file into a simplified dictionary structure (Dublin Core).
11
+ Returns a dict with keys:
12
+ - title
13
+ - creators
14
+ - subjects
15
+ - description
16
+ - language
17
+ - date
18
+ - identifiers
19
+ - publisher
20
+ """
21
+ try:
22
+ with open(opf_file, "r", encoding="utf-8") as f:
23
+ opf_dict = xmltodict.parse(f.read(), process_namespaces=False)
24
+ except Exception as e:
25
+ print(f"[extract_metadata_from_opf] Error reading '{opf_file}': {e}")
26
+ return {}
27
+
28
+ package = opf_dict.get("package", {})
29
+ metadata = package.get("metadata", {})
30
+
31
+ # Prepare simplified structure
32
+ simplified = {
33
+ "title": metadata.get("dc:title", metadata.get("title")),
34
+ "creators": None,
35
+ "subjects": None,
36
+ "description": metadata.get("dc:description", metadata.get("description")),
37
+ "language": metadata.get("dc:language", metadata.get("language")),
38
+ "date": metadata.get("dc:date", metadata.get("date")),
39
+ "publisher": metadata.get("dc:publisher", metadata.get("publisher")),
40
+ "identifiers": None
41
+ }
42
+
43
+ # -- Creators
44
+ creators = metadata.get("dc:creator", metadata.get("creator"))
45
+ if isinstance(creators, list):
46
+ simplified["creators"] = [
47
+ c.get("#text", "").strip() if isinstance(c, dict) else c
48
+ for c in creators
49
+ ]
50
+ elif isinstance(creators, dict):
51
+ simplified["creators"] = [creators.get("#text", "").strip()]
52
+ elif isinstance(creators, str):
53
+ simplified["creators"] = [creators.strip()]
54
+
55
+ # -- Subjects
56
+ subjects = metadata.get("dc:subject", metadata.get("subject"))
57
+ if isinstance(subjects, list):
58
+ simplified["subjects"] = [s.strip() for s in subjects]
59
+ elif isinstance(subjects, str):
60
+ simplified["subjects"] = [subjects.strip()]
61
+
62
+ # -- Identifiers
63
+ identifiers = metadata.get("dc:identifier", metadata.get("identifier"))
64
+ if isinstance(identifiers, list):
65
+ simplified["identifiers"] = {}
66
+ for identifier in identifiers:
67
+ if isinstance(identifier, dict):
68
+ scheme = identifier.get("@opf:scheme", "unknown")
69
+ text = identifier.get("#text", "").strip()
70
+ simplified["identifiers"][scheme] = text
71
+ else:
72
+ simplified["identifiers"]["unknown"] = identifier
73
+ elif isinstance(identifiers, dict):
74
+ scheme = identifiers.get("@opf:scheme", "unknown")
75
+ text = identifiers.get("#text", "").strip()
76
+ simplified["identifiers"][scheme] = text
77
+
78
+ return simplified
79
+
80
+
81
+ def extract_metadata_from_pdf(pdf_path: str) -> Dict:
82
+ """
83
+ Extract metadata from a PDF file using PyPDF2.
84
+ Returns a dictionary with the same keys as the OPF-based dict.
85
+ """
86
+
87
+ metadata = {
88
+ "title": None,
89
+ "creators": None,
90
+ "subjects": None,
91
+ "description": None,
92
+ "language": None,
93
+ "date": None,
94
+ "publisher": None,
95
+ "identifiers": None,
96
+ "keywords": None,
97
+ }
98
+
99
+ try:
100
+ with open(pdf_path, "rb") as f:
101
+ reader = PyPDF2.PdfReader(f)
102
+ info = reader.metadata or {}
103
+
104
+ # NOTE: Depending on PyPDF2 version, metadata keys can differ
105
+ # e.g. info.title vs info.get('/Title')
106
+ pdf_title = info.get("/Title", None) or info.get("title", None)
107
+ pdf_author = info.get("/Author", None) or info.get("author", None)
108
+ pdf_subject = info.get("/Subject", None) or info.get("subject", None)
109
+ pdf_keywords = info.get("/Keywords", None) or info.get("keywords", None)
110
+ pdf_publisher = info.get("/Producer", None) or info.get("producer", None) or info.get("/Publisher", None) or info.get("publisher", None)
111
+ pdf_creation_date = info.get("/CreationDate", None)
112
+
113
+ if pdf_title:
114
+ metadata["title"] = pdf_title.strip()
115
+ if pdf_author:
116
+ metadata["creators"] = [pdf_author.strip()]
117
+ if pdf_subject:
118
+ metadata["subjects"] = [sub.strip() for sub in pdf_subject.split(",")]
119
+ metadata["description"] = pdf_subject.strip()
120
+
121
+ if pdf_creation_date and len(pdf_creation_date) >= 10:
122
+ # Format: 'D:YYYYMMDDhhmmss'
123
+ # We'll extract 'YYYY-MM-DD'
124
+ date_str = pdf_creation_date[2:10] # e.g., 20210101
125
+ metadata["date"] = f"{date_str[:4]}-{date_str[4:6]}-{date_str[6:]}"
126
+ # Language not typically stored in PDF metadata
127
+ metadata["language"] = "unknown-language"
128
+
129
+ # For an "identifier," we don't really have a built-in PDF field, so it's optional
130
+ metadata["identifiers"] = {"pdf:identifier": pdf_path}
131
+
132
+ if pdf_keywords:
133
+ metadata["keywords"] = [kw.strip() for kw in pdf_keywords.split(",")]
134
+
135
+ if pdf_publisher:
136
+ metadata["publisher"] = pdf_publisher.strip()
137
+
138
+ metadata["file_paths"] = [pdf_path]
139
+
140
+
141
+ except Exception as e:
142
+ print(f"[extract_metadata_from_pdf] Error reading '{pdf_path}': {e}")
143
+
144
+ return metadata
145
+
146
+
147
+ def extract_metadata_from_epub(epub_path: str) -> Dict:
148
+ """
149
+ Extract metadata from an EPUB file using ebooklib.
150
+ Returns a dictionary with the same keys as the OPF-based dict.
151
+ """
152
+ metadata = {
153
+ "title": None,
154
+ "creators": [],
155
+ "subjects": [],
156
+ "description": None,
157
+ "language": None,
158
+ "date": None,
159
+ "identifiers": {},
160
+ }
161
+
162
+ try:
163
+ book = epub.read_epub(epub_path)
164
+
165
+ # Title
166
+ dc_title = book.get_metadata("DC", "title")
167
+ if dc_title:
168
+ metadata["title"] = dc_title[0][0]
169
+
170
+ # Creators
171
+ dc_creators = book.get_metadata("DC", "creator")
172
+ if dc_creators:
173
+ metadata["creators"] = [c[0] for c in dc_creators]
174
+
175
+ # Subjects
176
+ dc_subjects = book.get_metadata("DC", "subject")
177
+ if dc_subjects:
178
+ metadata["subjects"] = [s[0] for s in dc_subjects]
179
+
180
+ # Description
181
+ dc_description = book.get_metadata("DC", "description")
182
+ if dc_description:
183
+ metadata["description"] = dc_description[0][0]
184
+
185
+ # Language
186
+ dc_language = book.get_metadata("DC", "language")
187
+ if dc_language:
188
+ metadata["language"] = dc_language[0][0]
189
+
190
+ # Date
191
+ dc_date = book.get_metadata("DC", "date")
192
+ if dc_date:
193
+ metadata["date"] = dc_date[0][0]
194
+
195
+ # Identifiers
196
+ identifiers = book.get_metadata("DC", "identifier")
197
+ if identifiers:
198
+ for identifier in identifiers:
199
+ # identifier is a tuple: (value, { 'scheme': '...' })
200
+ ident_value, ident_attrs = identifier
201
+ scheme = ident_attrs.get("scheme", "unknown")
202
+ metadata["identifiers"][scheme] = ident_value
203
+ except Exception as e:
204
+ print(f"[extract_metadata_from_epub] Error reading '{epub_path}': {e}")
205
+
206
+ return metadata
207
+
208
+
209
+ def extract_metadata_from_path(file_path: str) -> Dict:
210
+ """
211
+ Fallback metadata extraction by interpreting the path as <...>/<author>/<title>.
212
+ Slugify them to remove invalid characters.
213
+ """
214
+ metadata = {
215
+ "title": None,
216
+ "creators": [],
217
+ "subjects": [],
218
+ "description": "",
219
+ "language": "unknown-language",
220
+ "date": "unknown-date",
221
+ "identifiers": {}
222
+ }
223
+
224
+ try:
225
+ path_parts = file_path.split(os.sep)
226
+ # path_parts: ['base_dir', 'author_dir', 'title', 'title - author.ext'] ]
227
+ title = path_parts[-2]
228
+ creators = path_parts[1].split(",")
229
+ metadata["title"] = title
230
+ metadata["creators"] = creators
231
+ except Exception as e:
232
+ print(f"[extract_metadata_from_path] Error with '{file_path}': {e}")
233
+
234
+ return metadata
235
+
236
+ def extract_metadata(ebook_file: str, opf_file: Optional[str] = None) -> Dict:
237
+ """
238
+ High-level function to extract metadata from either:
239
+ - OPF file (if provided)
240
+ - The ebook_file (PDF, EPUB, or fallback from path)
241
+ Then merges them, giving priority to OPF data.
242
+
243
+ Returns a final merged dictionary with keys:
244
+ - title
245
+ - creators
246
+ - subjects
247
+ - description
248
+ - language
249
+ - date
250
+ - identifiers
251
+ - cover_path
252
+ - file_paths
253
+ - virtual_libs
254
+ - unique_id
255
+ """
256
+
257
+ # 1. Extract from OPF if we have it
258
+ opf_metadata = {}
259
+ if opf_file and os.path.isfile(opf_file):
260
+ opf_metadata = extract_metadata_from_opf(opf_file)
261
+
262
+ _, ext = os.path.splitext(ebook_file.lower())
263
+ if ext == ".pdf":
264
+ ebook_metadata = extract_metadata_from_pdf(ebook_file)
265
+ elif ext == ".epub":
266
+ ebook_metadata = extract_metadata_from_epub(ebook_file)
267
+
268
+ path_metadata = extract_metadata_from_path(ebook_file)
269
+
270
+ metadata = {key: opf_metadata.get(key) or ebook_metadata.get(key) or value for key, value in ebook_metadata.items()}
271
+ metadata = {key: metadata.get(key) or value for key, value in path_metadata.items()}
272
+ return metadata
273
+
ebk/ident.py ADDED
@@ -0,0 +1,96 @@
1
+ import hashlib
2
+ import re
3
+ from typing import List, Dict
4
+ import uuid
5
+
6
+ def canonicalize_text(text: str) -> str:
7
+ """
8
+ Canonicalize text by converting to lowercase, removing punctuation,
9
+ stripping whitespace, and replacing spaces with underscores.
10
+ """
11
+ text = text.lower()
12
+ # Remove punctuation using regex
13
+ text = re.sub(r'[^\w\s]', '', text)
14
+ # Replace multiple spaces with a single space
15
+ text = re.sub(r'\s+', ' ', text)
16
+ # Strip leading and trailing whitespace
17
+ text = text.strip()
18
+ # Replace spaces with underscores
19
+ text = text.replace(' ', '_')
20
+ return text
21
+
22
+ def canonicalize_creators(creators: List[str]) -> str:
23
+ """
24
+ Canonicalize a list of creators (authors) by sorting them,
25
+ canonicalizing each name, and joining with underscores.
26
+ """
27
+ # Sort creators alphabetically for consistency
28
+ sorted_creators = sorted(creators)
29
+ canonical_creators = [canonicalize_text(creator) for creator in sorted_creators]
30
+ # Join multiple creators with underscores
31
+ return '_'.join(canonical_creators)
32
+
33
+ def generate_composite_string(entry: Dict) -> str:
34
+ """
35
+ Generate a composite string by concatenating canonicalized values
36
+ of ISBN, date, language, publisher, creators, and title.
37
+
38
+ The order is important for consistency.
39
+ """
40
+ identifiers = entry.get('identifiers', {})
41
+ #isbn = identifiers.get('ISBN', '').strip()
42
+ #date = entry.get('date', '').strip()
43
+ language = entry.get('language', '').strip()
44
+ #publisher = entry.get('publisher', '').strip()
45
+ creators = entry.get('creators', [])
46
+ title = entry.get('title', '').strip()
47
+
48
+ # Canonicalize each field
49
+ #isbn_c = canonicalize_text(isbn) if isbn else 'no_isbn'
50
+ #date_c = canonicalize_text(date) if date else 'no_date'
51
+ language_c = canonicalize_text(language) if language else 'no_language'
52
+ #publisher_c = canonicalize_text(publisher) if publisher else 'no_publisher'
53
+ creators_c = canonicalize_creators(creators) if creators else 'no_creators'
54
+ title_c = canonicalize_text(title) if title else 'no_title'
55
+
56
+ if language_c == 'no_language' and creators_c == 'no_creators' and title_c == 'no_title':
57
+ return None
58
+
59
+ # Concatenate fields with double underscores as delimiters
60
+ composite_string = f"{language_c}__{creators_c}__{title_c}"
61
+ return composite_string
62
+
63
+ def generate_hash_id(entry: Dict) -> str:
64
+ """
65
+ Generate a unique hash ID for an eBook entry by hashing the composite string.
66
+
67
+ Args:
68
+ entry (Dict): The eBook entry metadata.
69
+
70
+ Returns:
71
+ str: The SHA-256 hash hexadecimal string.
72
+ """
73
+ composite_string = generate_composite_string(entry)
74
+ if composite_string:
75
+ composite_bytes = composite_string.encode('utf-8')
76
+ else:
77
+ composite_bytes = str(uuid.uuid4()).encode('utf-8')
78
+
79
+ # Create SHA-256 hash
80
+ hash_obj = hashlib.sha256(composite_bytes)
81
+ hash_hex = hash_obj.hexdigest()
82
+ return hash_hex
83
+
84
+ def add_unique_id(entry: Dict) -> Dict:
85
+ """
86
+ Add a unique hash ID to the eBook entry.
87
+
88
+ Args:
89
+ entry (Dict): The original eBook entry metadata.
90
+
91
+ Returns:
92
+ Dict: The eBook entry with an added 'unique_id' field.
93
+ """
94
+ unique_id = generate_hash_id(entry)
95
+ entry['unique_id'] = unique_id
96
+ return entry
File without changes
ebk/imports/calibre.py ADDED
@@ -0,0 +1,144 @@
1
+ import os
2
+ import shutil
3
+ import json
4
+ from slugify import slugify
5
+ from typing import Dict
6
+ import logging
7
+ from ..extract_metadata import extract_metadata
8
+ from ..ident import add_unique_id
9
+ from ..utils import get_unique_filename
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+ ebook_exts = (".pdf", ".epub", ".mobi", ".azw3", ".txt", ".docx", ".odt",
14
+ ".html", ".rtf", ".md", ".fb2", ".cbz", ".cbr", ".djvu",
15
+ ".xps", ".ibooks", ".azw", ".lit", ".pdb", ".prc", ".lrf",
16
+ ".pdb", ".pml", ".rb", ".snb", ".tcr", ".txtz", ".azw1")
17
+
18
+ def import_calibre(calibre_dir: str,
19
+ output_dir: str,
20
+ ebook_exts: tuple = ebook_exts):
21
+ if not os.path.exists(output_dir):
22
+ os.makedirs(output_dir)
23
+
24
+ metadata_list = []
25
+
26
+ for root, _, files in os.walk(calibre_dir):
27
+ # Look for OPF
28
+ opf_file_path = os.path.join(root, "metadata.opf")
29
+
30
+ # Gather valid ebook files
31
+ ebook_files = [f for f in files if f.lower().endswith(ebook_exts)]
32
+
33
+ if not ebook_files:
34
+ logger.debug(f"No recognized ebook files found in {root}. Skipping.")
35
+ continue # skip if no recognized ebook files
36
+
37
+ # Pick the "primary" ebook file. This is arbitrary and can be changed.
38
+ primary_ebook_file = ebook_files[0]
39
+ ebook_full_path = os.path.join(root, primary_ebook_file)
40
+
41
+ # Extract metadata
42
+ if os.path.exists(opf_file_path):
43
+ logger.debug(f"Found metadata.opf in {root}. Extracting metadata from OPF.")
44
+ metadata = extract_metadata(ebook_full_path, opf_file_path)
45
+ else:
46
+ logger.warning(f"No metadata.opf found in {root}. Inferring metadata from ebook files.")
47
+ metadata = extract_metadata(ebook_full_path) # Only ebook file path is provided
48
+
49
+ # Extract metadata (OPF + ebook)
50
+ metadata = extract_metadata(ebook_full_path, opf_file_path)
51
+ metadata["root"] = root
52
+ metadata["source_folder"] = calibre_dir
53
+ metadata["output_folder"] = output_dir
54
+ metadata["imported_from"] = "calibre"
55
+ metadata["virtual_libs"] = [slugify(output_dir)]
56
+
57
+ # Generate base name
58
+ title_slug = slugify(metadata.get("title", "unknown_title"))
59
+ creator_slug = slugify(
60
+ metadata["creators"][0]) if metadata.get("creators") else "unknown_creator"
61
+
62
+ base_name = f"{title_slug}__{creator_slug}"
63
+
64
+ # Copy ebooks
65
+ file_paths = []
66
+ for ebook_file in ebook_files:
67
+ _, ext = os.path.splitext(ebook_file)
68
+ src = os.path.join(root, ebook_file)
69
+ dst = os.path.join(output_dir, f"{base_name}{ext}")
70
+ dst = get_unique_filename(dst)
71
+ shutil.copy(src, dst)
72
+ file_paths.append(os.path.relpath(dst, output_dir))
73
+
74
+ # Optionally handle cover.jpg
75
+ if "cover.jpg" in files:
76
+ cover_src = os.path.join(root, "cover.jpg")
77
+ cover_dst = os.path.join(output_dir, f"{base_name}_cover.jpg")
78
+ shutil.copy(cover_src, cover_dst)
79
+ metadata["cover_path"] = os.path.relpath(cover_dst, output_dir)
80
+
81
+ # Store relative paths in metadata
82
+ metadata["file_paths"] = file_paths
83
+ metadata_list.append(metadata)
84
+
85
+ for entry in metadata_list:
86
+ add_unique_id(entry)
87
+
88
+ # Write out metadata.json
89
+ output_json = os.path.join(output_dir, "metadata.json")
90
+ with open(output_json, "w", encoding="utf-8") as f:
91
+ json.dump(metadata_list, f, indent=2, ensure_ascii=False)
92
+
93
+
94
+ def ensure_metadata_completeness(metadata: Dict) -> Dict:
95
+ """
96
+ Ensure that all required metadata fields are present.
97
+ If a field is missing or empty, attempt to infer or set default values.
98
+
99
+ Args:
100
+ metadata (Dict): The metadata dictionary extracted from OPF or inferred.
101
+
102
+ Returns:
103
+ Dict: The updated metadata dictionary with all necessary fields.
104
+ """
105
+ required_fields = ["title", "creators",
106
+ "subjects", "description",
107
+ "language", "date", "identifiers",
108
+ "file_paths", "cover_path", "unique_id",
109
+ "source_folder", "output_folder",
110
+ "imported_from", "virtual_libs"]
111
+ for field in required_fields:
112
+ if field not in metadata:
113
+ if field == "creators":
114
+ metadata[field] = ["Unknown Author"]
115
+ logger.debug(f"Set default value for '{field}'.")
116
+ elif field == "subjects":
117
+ metadata[field] = []
118
+ logger.debug(f"Set default value for '{field}'.")
119
+ elif field == "description":
120
+ metadata[field] = "No description available."
121
+ logger.debug(f"Set default value for '{field}'.")
122
+ elif field == "language":
123
+ metadata[field] = "en" # Default to English
124
+ logger.debug(f"Set default value for '{field}'.")
125
+ elif field == "date":
126
+ metadata[field] = None # Unknown date
127
+ logger.debug(f"Set default value for '{field}'.")
128
+ elif field == "title":
129
+ metadata[field] = "Unknown Title"
130
+ logger.debug(f"Set default value for '{field}'.")
131
+ elif field == "identifiers":
132
+ metadata[field] = {}
133
+ logger.debug(f"Set default value for '{field}'.")
134
+ elif field == "file_paths":
135
+ metadata[field] = []
136
+ logger.debug(f"Set default value for '{field}'.")
137
+ elif field == "cover_path":
138
+ metadata[field] = None
139
+ logger.debug(f"Set default value for '{field}'.")
140
+ elif field == "unique_id":
141
+ metadata[field] = None
142
+ logger.debug(f"Set default value for '{field}'.")
143
+
144
+ return metadata