ebk 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. ebk/__init__.py +35 -0
  2. ebk/ai/__init__.py +23 -0
  3. ebk/ai/knowledge_graph.py +450 -0
  4. ebk/ai/llm_providers/__init__.py +26 -0
  5. ebk/ai/llm_providers/anthropic.py +209 -0
  6. ebk/ai/llm_providers/base.py +295 -0
  7. ebk/ai/llm_providers/gemini.py +285 -0
  8. ebk/ai/llm_providers/ollama.py +294 -0
  9. ebk/ai/metadata_enrichment.py +394 -0
  10. ebk/ai/question_generator.py +328 -0
  11. ebk/ai/reading_companion.py +224 -0
  12. ebk/ai/semantic_search.py +433 -0
  13. ebk/ai/text_extractor.py +393 -0
  14. ebk/calibre_import.py +66 -0
  15. ebk/cli.py +6433 -0
  16. ebk/config.py +230 -0
  17. ebk/db/__init__.py +37 -0
  18. ebk/db/migrations.py +507 -0
  19. ebk/db/models.py +725 -0
  20. ebk/db/session.py +144 -0
  21. ebk/decorators.py +1 -0
  22. ebk/exports/__init__.py +0 -0
  23. ebk/exports/base_exporter.py +218 -0
  24. ebk/exports/echo_export.py +279 -0
  25. ebk/exports/html_library.py +1743 -0
  26. ebk/exports/html_utils.py +87 -0
  27. ebk/exports/hugo.py +59 -0
  28. ebk/exports/jinja_export.py +286 -0
  29. ebk/exports/multi_facet_export.py +159 -0
  30. ebk/exports/opds_export.py +232 -0
  31. ebk/exports/symlink_dag.py +479 -0
  32. ebk/exports/zip.py +25 -0
  33. ebk/extract_metadata.py +341 -0
  34. ebk/ident.py +89 -0
  35. ebk/library_db.py +1440 -0
  36. ebk/opds.py +748 -0
  37. ebk/plugins/__init__.py +42 -0
  38. ebk/plugins/base.py +502 -0
  39. ebk/plugins/hooks.py +442 -0
  40. ebk/plugins/registry.py +499 -0
  41. ebk/repl/__init__.py +9 -0
  42. ebk/repl/find.py +126 -0
  43. ebk/repl/grep.py +173 -0
  44. ebk/repl/shell.py +1677 -0
  45. ebk/repl/text_utils.py +320 -0
  46. ebk/search_parser.py +413 -0
  47. ebk/server.py +3608 -0
  48. ebk/services/__init__.py +28 -0
  49. ebk/services/annotation_extraction.py +351 -0
  50. ebk/services/annotation_service.py +380 -0
  51. ebk/services/export_service.py +577 -0
  52. ebk/services/import_service.py +447 -0
  53. ebk/services/personal_metadata_service.py +347 -0
  54. ebk/services/queue_service.py +253 -0
  55. ebk/services/tag_service.py +281 -0
  56. ebk/services/text_extraction.py +317 -0
  57. ebk/services/view_service.py +12 -0
  58. ebk/similarity/__init__.py +77 -0
  59. ebk/similarity/base.py +154 -0
  60. ebk/similarity/core.py +471 -0
  61. ebk/similarity/extractors.py +168 -0
  62. ebk/similarity/metrics.py +376 -0
  63. ebk/skills/SKILL.md +182 -0
  64. ebk/skills/__init__.py +1 -0
  65. ebk/vfs/__init__.py +101 -0
  66. ebk/vfs/base.py +298 -0
  67. ebk/vfs/library_vfs.py +122 -0
  68. ebk/vfs/nodes/__init__.py +54 -0
  69. ebk/vfs/nodes/authors.py +196 -0
  70. ebk/vfs/nodes/books.py +480 -0
  71. ebk/vfs/nodes/files.py +155 -0
  72. ebk/vfs/nodes/metadata.py +385 -0
  73. ebk/vfs/nodes/root.py +100 -0
  74. ebk/vfs/nodes/similar.py +165 -0
  75. ebk/vfs/nodes/subjects.py +184 -0
  76. ebk/vfs/nodes/tags.py +371 -0
  77. ebk/vfs/resolver.py +228 -0
  78. ebk/vfs_router.py +275 -0
  79. ebk/views/__init__.py +32 -0
  80. ebk/views/dsl.py +668 -0
  81. ebk/views/service.py +619 -0
  82. ebk-0.4.4.dist-info/METADATA +755 -0
  83. ebk-0.4.4.dist-info/RECORD +87 -0
  84. ebk-0.4.4.dist-info/WHEEL +5 -0
  85. ebk-0.4.4.dist-info/entry_points.txt +2 -0
  86. ebk-0.4.4.dist-info/licenses/LICENSE +21 -0
  87. ebk-0.4.4.dist-info/top_level.txt +1 -0
@@ -0,0 +1,87 @@
1
+ """HTML sanitization utilities for secure template rendering."""
2
+
3
+ import json
4
+ import html
5
+ from typing import Any, Dict, List
6
+ import re
7
+
8
+
9
+ def sanitize_for_html(text: str) -> str:
10
+ """
11
+ Sanitize text for safe HTML output.
12
+
13
+ Escapes HTML special characters to prevent XSS attacks.
14
+ """
15
+ if not text:
16
+ return ""
17
+ return html.escape(str(text))
18
+
19
+
20
+ def sanitize_for_javascript(obj: Any) -> str:
21
+ """
22
+ Safely encode data for embedding in JavaScript.
23
+
24
+ This prevents XSS attacks when embedding data in script tags.
25
+ """
26
+ # Convert to JSON with proper escaping
27
+ json_str = json.dumps(obj, ensure_ascii=False)
28
+
29
+ # Additional escaping for script context
30
+ # Replace </script> to prevent breaking out of script tags
31
+ json_str = json_str.replace('</script>', '<\\/script>')
32
+ json_str = json_str.replace('<!--', '<\\!--')
33
+ json_str = json_str.replace('-->', '--\\>')
34
+
35
+ return json_str
36
+
37
+
38
+ def sanitize_entries_for_javascript(entries: List[Dict]) -> str:
39
+ """
40
+ Prepare entries for safe embedding in JavaScript.
41
+
42
+ This sanitizes user content while preserving the data structure.
43
+ """
44
+ # Create a sanitized copy of entries
45
+ sanitized_entries = []
46
+
47
+ for entry in entries:
48
+ # Create a minimal, safe version for JavaScript
49
+ safe_entry = {
50
+ 'unique_id': entry.get('unique_id', ''),
51
+ 'title': sanitize_for_html(entry.get('title', '')),
52
+ 'creators': [sanitize_for_html(c) for c in entry.get('creators', [])],
53
+ 'subjects': [sanitize_for_html(s) for s in entry.get('subjects', [])],
54
+ 'language': sanitize_for_html(entry.get('language', '')),
55
+ 'date': sanitize_for_html(str(entry.get('date', ''))),
56
+ 'publisher': sanitize_for_html(str(entry.get('publisher', ''))),
57
+ 'description': sanitize_for_html(entry.get('description', '')),
58
+ 'cover_path': entry.get('cover_path', ''),
59
+ 'file_paths': entry.get('file_paths', []),
60
+ '_readable_name': sanitize_for_html(entry.get('_readable_name', '')),
61
+ '_entry_id': entry.get('_entry_id', '')
62
+ }
63
+ sanitized_entries.append(safe_entry)
64
+
65
+ return sanitize_for_javascript(sanitized_entries)
66
+
67
+
68
+ def create_safe_filename(text: str, max_length: int = 255) -> str:
69
+ """
70
+ Create a safe filename from text.
71
+
72
+ Removes/replaces characters that could cause issues in filenames.
73
+ """
74
+ # Remove HTML tags if any
75
+ text = re.sub(r'<[^>]+>', '', text)
76
+
77
+ # Replace unsafe characters
78
+ safe_chars = re.sub(r'[<>:"/\\|?*]', '_', text)
79
+
80
+ # Remove control characters
81
+ safe_chars = ''.join(char for char in safe_chars if ord(char) >= 32)
82
+
83
+ # Truncate if too long
84
+ if len(safe_chars) > max_length:
85
+ safe_chars = safe_chars[:max_length-3] + '...'
86
+
87
+ return safe_chars.strip()
ebk/exports/hugo.py ADDED
@@ -0,0 +1,59 @@
1
+ import json
2
+ import shutil
3
+ from pathlib import Path
4
+ from typing import List
5
+ import logging
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+ def export_hugo(lib_dir, hugo_dir):
10
+ """
11
+ Export ebk library to Hugo-compatible Markdown files.
12
+
13
+ Args:
14
+ lib_dir (str): Path to the ebk library directory to export (contains `metadata.json` and ebook-related files)
15
+ hugo_dir (str): Path to the Hugo site directory
16
+ """
17
+
18
+ lib_dir = Path(lib_dir)
19
+ with open(lib_dir / "metadata.json", "r") as f:
20
+ books = json.load(f)
21
+
22
+ hugo_dir = Path(hugo_dir)
23
+
24
+ content_dir = hugo_dir / "content" / "library"
25
+ static_dir = hugo_dir / "static" / "ebooks"
26
+ content_dir.mkdir(parents=True, exist_ok=True)
27
+ static_dir.mkdir(parents=True, exist_ok=True)
28
+
29
+ for book in books:
30
+ slug = book['title'].replace(" ", "-").lower()
31
+ md_file = content_dir / f"{slug}.md"
32
+
33
+ with open(md_file, "w") as md:
34
+ md.write("---\n")
35
+ md.write(f"title: {book['title']}\n")
36
+ md.write(f"creators: [{', '.join(book['creators'])}]\n")
37
+ md.write(f"subjects: [{', '.join(book['subjects'])}]\n")
38
+ md.write(f"description: {book['description']}\n")
39
+ md.write(f"date: {book['date']}\n")
40
+ md.write(f"tags: [{', '.join(book['Tags'].split(', '))}]\n")
41
+ md.write(f"ebook_file: /ebooks/{Path(book['file_path']).name}\n")
42
+ md.write(f"cover_image: /ebooks/{Path(book['Cover Path']).name if book['Cover Path'] else ''}\n")
43
+ md.write("---\n\n")
44
+ md.write(f"# {book['Title']}\n\n")
45
+ md.write(f"Author: {book['Author']}\n\n")
46
+ md.write(f"[Download eBook](/ebooks/{Path(book['File Path']).name})\n")
47
+
48
+ # Copy eBook and cover to static directory
49
+ if book["File Path"]:
50
+ source_file = Path(book['File Path'])
51
+ if source_file.exists():
52
+ shutil.copy2(source_file, static_dir)
53
+ if book["Cover Path"]:
54
+ cover_file = Path(book['Cover Path'])
55
+ if cover_file.exists():
56
+ shutil.copy2(cover_file, static_dir)
57
+
58
+ logger.debug(f"Exported {len(books)} books to Hugo site at '{hugo_dir}'")
59
+
@@ -0,0 +1,286 @@
1
+ """
2
+ Flexible Jinja2-based export system for ebk libraries.
3
+
4
+ This module provides a template-driven approach to exporting ebook metadata
5
+ in various formats, with Hugo as the primary implementation.
6
+ """
7
+
8
+ import json
9
+ import shutil
10
+ from pathlib import Path
11
+ from typing import Dict, List, Optional
12
+ from jinja2 import Environment, FileSystemLoader, select_autoescape
13
+ import logging
14
+ from slugify import slugify
15
+ from collections import defaultdict
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class JinjaExporter:
21
+ """Flexible export system using Jinja2 templates."""
22
+
23
+ def __init__(self, template_dir: Optional[Path] = None):
24
+ """
25
+ Initialize the exporter with a template directory.
26
+
27
+ Args:
28
+ template_dir: Path to custom templates. If None, uses built-in templates.
29
+ """
30
+ if template_dir is None:
31
+ template_dir = Path(__file__).parent / "templates"
32
+
33
+ self.env = Environment(
34
+ loader=FileSystemLoader(template_dir),
35
+ autoescape=select_autoescape(['html', 'xml']),
36
+ trim_blocks=True,
37
+ lstrip_blocks=True
38
+ )
39
+
40
+ # Add custom filters
41
+ self.env.filters['slugify'] = slugify
42
+ self.env.filters['join_list'] = lambda x: ', '.join(x) if isinstance(x, list) else x
43
+ self.env.filters['default_if_none'] = lambda x, default='': x if x is not None else default
44
+
45
+ def export_hugo(self, lib_dir: str, hugo_dir: str,
46
+ organize_by: str = "flat",
47
+ create_indexes: bool = True,
48
+ copy_files: bool = True):
49
+ """
50
+ Export library to Hugo with flexible organization options.
51
+
52
+ Args:
53
+ lib_dir: Path to ebk library
54
+ hugo_dir: Path to Hugo site directory
55
+ organize_by: Organization method - "flat", "year", "language", "subject", "creator"
56
+ create_indexes: Whether to create index pages for categories
57
+ copy_files: Whether to copy ebook and cover files
58
+ """
59
+ lib_path = Path(lib_dir)
60
+ hugo_path = Path(hugo_dir)
61
+
62
+ # Load metadata
63
+ with open(lib_path / "metadata.json", "r") as f:
64
+ books = json.load(f)
65
+
66
+ # Prepare books with normalized fields
67
+ books = self._normalize_metadata(books)
68
+
69
+ # Create directory structure
70
+ content_dir = hugo_path / "content" / "library"
71
+ static_dir = hugo_path / "static" / "ebooks"
72
+ content_dir.mkdir(parents=True, exist_ok=True)
73
+ static_dir.mkdir(parents=True, exist_ok=True)
74
+
75
+ # Group books by organization method
76
+ grouped_books = self._group_books(books, organize_by)
77
+
78
+ # Export individual book pages
79
+ for group_key, group_books in grouped_books.items():
80
+ group_dir = content_dir / group_key if organize_by != "flat" else content_dir
81
+ group_dir.mkdir(parents=True, exist_ok=True)
82
+
83
+ for book in group_books:
84
+ self._export_book(book, group_dir, static_dir, lib_path, copy_files)
85
+
86
+ # Create index pages
87
+ if create_indexes:
88
+ self._create_indexes(grouped_books, content_dir, organize_by)
89
+
90
+ # Create main library index
91
+ self._create_main_index(books, content_dir, organize_by)
92
+
93
+ logger.info(f"Exported {len(books)} books to Hugo site at '{hugo_dir}'")
94
+
95
+ def _normalize_metadata(self, books: List[Dict]) -> List[Dict]:
96
+ """Normalize metadata fields for consistent access."""
97
+ normalized = []
98
+
99
+ for book in books:
100
+ # Create a normalized version with consistent field names
101
+ norm = {
102
+ 'title': book.get('title', 'Unknown Title'),
103
+ 'creators': book.get('creators', []),
104
+ 'subjects': book.get('subjects', []),
105
+ 'description': book.get('description', ''),
106
+ 'language': book.get('language', 'en'),
107
+ 'date': book.get('date', ''),
108
+ 'publisher': book.get('publisher', ''),
109
+ 'identifiers': book.get('identifiers', {}),
110
+ 'file_paths': book.get('file_paths', []),
111
+ 'cover_path': book.get('cover_path', ''),
112
+ 'unique_id': book.get('unique_id', ''),
113
+ # Keep original data for backward compatibility
114
+ '_original': book
115
+ }
116
+
117
+ # Extract year from date if available
118
+ if norm['date']:
119
+ try:
120
+ norm['year'] = norm['date'][:4]
121
+ except (IndexError, TypeError, AttributeError):
122
+ norm['year'] = '' # Invalid date format
123
+ else:
124
+ norm['year'] = ''
125
+
126
+ # Generate slug
127
+ norm['slug'] = slugify(f"{norm['title']}-{norm['unique_id'][:8]}")
128
+
129
+ normalized.append(norm)
130
+
131
+ return normalized
132
+
133
+ def _group_books(self, books: List[Dict], organize_by: str) -> Dict[str, List[Dict]]:
134
+ """Group books by specified organization method."""
135
+ grouped = defaultdict(list)
136
+
137
+ if organize_by == "flat":
138
+ grouped[""] = books
139
+ elif organize_by == "year":
140
+ for book in books:
141
+ year = book.get('year', 'unknown-year')
142
+ grouped[year].append(book)
143
+ elif organize_by == "language":
144
+ for book in books:
145
+ lang = book.get('language', 'unknown-language')
146
+ grouped[lang].append(book)
147
+ elif organize_by == "subject":
148
+ for book in books:
149
+ subjects = book.get('subjects', ['uncategorized'])
150
+ for subject in subjects:
151
+ grouped[slugify(subject)].append(book)
152
+ elif organize_by == "creator":
153
+ for book in books:
154
+ creators = book.get('creators', ['unknown-creator'])
155
+ for creator in creators:
156
+ grouped[slugify(creator)].append(book)
157
+ else:
158
+ # Default to flat
159
+ grouped[""] = books
160
+
161
+ return dict(grouped)
162
+
163
+ def _export_book(self, book: Dict, output_dir: Path, static_dir: Path,
164
+ lib_path: Path, copy_files: bool):
165
+ """Export a single book."""
166
+ # Load book template
167
+ template = self.env.get_template('hugo/book.md')
168
+
169
+ # Prepare file paths for Hugo
170
+ ebook_urls = []
171
+ if book['file_paths']:
172
+ for file_path in book['file_paths']:
173
+ if copy_files and file_path:
174
+ src = lib_path / file_path
175
+ if src.exists():
176
+ dst = static_dir / src.name
177
+ shutil.copy2(src, dst)
178
+ ebook_urls.append(f"/ebooks/{src.name}")
179
+
180
+ cover_url = ""
181
+ if book['cover_path'] and copy_files:
182
+ src = lib_path / book['cover_path']
183
+ if src.exists():
184
+ dst = static_dir / src.name
185
+ shutil.copy2(src, dst)
186
+ cover_url = f"/ebooks/{src.name}"
187
+
188
+ # Render template
189
+ content = template.render(
190
+ book=book,
191
+ ebook_urls=ebook_urls,
192
+ cover_url=cover_url
193
+ )
194
+
195
+ # Write file
196
+ output_file = output_dir / f"{book['slug']}.md"
197
+ with open(output_file, 'w', encoding='utf-8') as f:
198
+ f.write(content)
199
+
200
+ def _create_indexes(self, grouped_books: Dict[str, List[Dict]],
201
+ content_dir: Path, organize_by: str):
202
+ """Create index pages for each group."""
203
+ if organize_by == "flat":
204
+ return
205
+
206
+ template = self.env.get_template('hugo/index.md')
207
+
208
+ for group_key, books in grouped_books.items():
209
+ if not group_key: # Skip empty group
210
+ continue
211
+
212
+ group_dir = content_dir / group_key
213
+ index_file = group_dir / "_index.md"
214
+
215
+ # Determine group title
216
+ if organize_by == "year":
217
+ group_title = f"Books from {group_key}"
218
+ elif organize_by == "language":
219
+ group_title = f"Books in {group_key}"
220
+ elif organize_by == "subject":
221
+ group_title = f"Subject: {group_key.replace('-', ' ').title()}"
222
+ elif organize_by == "creator":
223
+ group_title = f"Books by {group_key.replace('-', ' ').title()}"
224
+ else:
225
+ group_title = group_key.replace('-', ' ').title()
226
+
227
+ content = template.render(
228
+ title=group_title,
229
+ organize_by=organize_by,
230
+ group_key=group_key,
231
+ books=books,
232
+ book_count=len(books)
233
+ )
234
+
235
+ with open(index_file, 'w', encoding='utf-8') as f:
236
+ f.write(content)
237
+
238
+ def _create_main_index(self, books: List[Dict], content_dir: Path, organize_by: str):
239
+ """Create main library index page."""
240
+ template = self.env.get_template('hugo/library.md')
241
+
242
+ # Calculate statistics
243
+ stats = {
244
+ 'total_books': len(books),
245
+ 'total_creators': len(set(creator for book in books for creator in book.get('creators', []))),
246
+ 'total_subjects': len(set(subject for book in books for subject in book.get('subjects', []))),
247
+ 'languages': defaultdict(int),
248
+ 'years': defaultdict(int),
249
+ 'top_creators': defaultdict(int),
250
+ 'top_subjects': defaultdict(int)
251
+ }
252
+
253
+ for book in books:
254
+ # Language stats
255
+ lang = book.get('language', 'unknown')
256
+ stats['languages'][lang] += 1
257
+
258
+ # Year stats
259
+ year = book.get('year', 'unknown')
260
+ if year:
261
+ stats['years'][year] += 1
262
+
263
+ # Creator stats
264
+ for creator in book.get('creators', []):
265
+ stats['top_creators'][creator] += 1
266
+
267
+ # Subject stats
268
+ for subject in book.get('subjects', []):
269
+ stats['top_subjects'][subject] += 1
270
+
271
+ # Sort and limit top items
272
+ stats['top_creators'] = sorted(stats['top_creators'].items(),
273
+ key=lambda x: x[1], reverse=True)[:10]
274
+ stats['top_subjects'] = sorted(stats['top_subjects'].items(),
275
+ key=lambda x: x[1], reverse=True)[:10]
276
+
277
+ content = template.render(
278
+ title="Library",
279
+ books=books,
280
+ stats=stats,
281
+ organize_by=organize_by
282
+ )
283
+
284
+ index_file = content_dir / "_index.md"
285
+ with open(index_file, 'w', encoding='utf-8') as f:
286
+ f.write(content)
@@ -0,0 +1,159 @@
1
+ """Multi-faceted export for ebk libraries with sidebar navigation."""
2
+
3
+ from pathlib import Path
4
+ from typing import Dict, List, Set, Optional
5
+ import json
6
+ import shutil
7
+ from collections import defaultdict
8
+ import re
9
+ from datetime import datetime
10
+ from jinja2 import Environment, FileSystemLoader
11
+ from .html_utils import sanitize_entries_for_javascript, sanitize_for_html, create_safe_filename
12
+ from .base_exporter import BaseExporter
13
+
14
+
15
+ class MultiFacetExporter(BaseExporter):
16
+ """Export library with multiple faceted navigation (subjects, authors, etc.)."""
17
+
18
+ def __init__(self, facets: Optional[Dict[str, str]] = None):
19
+ """
20
+ Initialize the multi-facet exporter.
21
+
22
+ Args:
23
+ facets: Dictionary mapping facet names to metadata fields
24
+ e.g., {"Subjects": "subjects", "Authors": "creators", "Years": "date"}
25
+ """
26
+ super().__init__()
27
+ self.facets = facets or {
28
+ "Subjects": "subjects",
29
+ "Authors": "creators",
30
+ "Publishers": "publisher",
31
+ "Languages": "language"
32
+ }
33
+
34
+ def export(self, library_path: Path, output_path: Path,
35
+ include_files: bool = False,
36
+ create_index: bool = True, **options):
37
+ """Export the library with multi-faceted navigation."""
38
+ # Use base class methods
39
+ entries = self.load_metadata(library_path)
40
+ self.prepare_output_directory(output_path)
41
+
42
+ # Build facet data
43
+ facet_data = self._build_facet_data(entries)
44
+
45
+ # Create _books directory structure
46
+ books_dir = output_path / "_books"
47
+ books_dir.mkdir()
48
+
49
+ # Process each entry
50
+ for entry in entries:
51
+ entry_id = entry.get("unique_id", "")
52
+ if not entry_id:
53
+ continue
54
+
55
+ # Create entry directory
56
+ entry_dir = books_dir / self._sanitize_filename(entry_id)
57
+ entry_dir.mkdir(exist_ok=True)
58
+
59
+ # Use base class file operations
60
+ if include_files:
61
+ self.copy_entry_files(entry, library_path, entry_dir)
62
+ else:
63
+ self.symlink_entry_files(entry, library_path, entry_dir)
64
+
65
+ # Write entry metadata using base class method
66
+ self.write_json(entry, entry_dir / "metadata.json")
67
+
68
+ # Add computed fields for template
69
+ entry["_entry_id"] = entry_id
70
+ entry["_readable_name"] = self.get_readable_name(entry)
71
+
72
+ # Create index.html if requested
73
+ if create_index:
74
+ self._create_index_file(output_path, entries, facet_data)
75
+
76
+ # Create README using base class method
77
+ stats = {
78
+ 'total_entries': len(entries),
79
+ 'export_date': datetime.now().isoformat(),
80
+ 'export_type': 'Multi-Faceted Export',
81
+ 'structure_description': f"Organized by {len(self.facets)} facets with {len(entries)} entries"
82
+ }
83
+ self.create_readme(output_path, stats)
84
+
85
+ def _build_facet_data(self, entries: List[Dict]) -> Dict[str, Dict]:
86
+ """Build facet data structure from entries."""
87
+ facet_data = {}
88
+
89
+ for facet_name, field_name in self.facets.items():
90
+ items = defaultdict(int)
91
+
92
+ for entry in entries:
93
+ values = entry.get(field_name, [])
94
+ if not isinstance(values, list):
95
+ values = [values] if values else []
96
+
97
+ for value in values:
98
+ if value: # Skip empty values
99
+ # Special handling for dates - extract year
100
+ if field_name == "date" and value:
101
+ try:
102
+ year = str(value)[:4]
103
+ if year.isdigit():
104
+ items[year] += 1
105
+ except (KeyError, ValueError, AttributeError):
106
+ pass # Skip entries with invalid date format
107
+ else:
108
+ items[str(value)] += 1
109
+
110
+ facet_data[field_name] = {
111
+ "display_name": facet_name,
112
+ "items": dict(items)
113
+ }
114
+
115
+ return facet_data
116
+
117
+ def _create_index_file(self, output_path: Path, entries: List[Dict],
118
+ facet_data: Dict[str, Dict]):
119
+ """Create the multi-faceted index.html file."""
120
+ # Prepare entries for JSON
121
+ clean_entries = []
122
+ for entry in entries:
123
+ clean_entry = {}
124
+ for key, value in entry.items():
125
+ if isinstance(value, str):
126
+ if key == "description":
127
+ # Strip HTML and limit length
128
+ import re
129
+ value = re.sub(r'<[^>]+>', '', value)
130
+ if len(value) > 500:
131
+ value = value[:500] + "..."
132
+ clean_entry[key] = value
133
+ elif isinstance(value, list):
134
+ clean_entry[key] = [str(v) for v in value]
135
+ else:
136
+ clean_entry[key] = str(value)
137
+ clean_entries.append(clean_entry)
138
+
139
+ # Use safe JSON encoding for JavaScript embedding
140
+ entries_json = sanitize_entries_for_javascript(clean_entries)
141
+
142
+ # Set up Jinja2
143
+ template_dir = Path(__file__).parent / "templates"
144
+ env = Environment(loader=FileSystemLoader(str(template_dir)))
145
+ template = env.get_template("multi_facet_index.html")
146
+
147
+ # Render template with sanitized data
148
+ html_content = template.render(
149
+ title=sanitize_for_html("EBK Library"),
150
+ entries=entries,
151
+ entries_json=entries_json, # Already sanitized
152
+ facets=facet_data,
153
+ is_subdir=False
154
+ )
155
+
156
+ # Write the file
157
+ index_path = output_path / "index.html"
158
+ with open(index_path, "w", encoding="utf-8") as f:
159
+ f.write(html_content)