shoko 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.bundle/config +4 -0
- data/.bundle/config.bak +3 -0
- data/.rspec_status +42 -0
- data/.rubocop.yml +124 -0
- data/Gemfile +19 -0
- data/LICENSE +21 -0
- data/README.md +82 -0
- data/Rakefile +29 -0
- data/bin/start +15 -0
- data/lib/shoko/adapters/book_sources/document_service.rb +201 -0
- data/lib/shoko/adapters/book_sources/download_service.rb +95 -0
- data/lib/shoko/adapters/book_sources/epub/epub_resource_loader.rb +137 -0
- data/lib/shoko/adapters/book_sources/epub/parsers/html_processor.rb +151 -0
- data/lib/shoko/adapters/book_sources/epub/parsers/metadata_extractor.rb +53 -0
- data/lib/shoko/adapters/book_sources/epub/parsers/opf/entry_reader.rb +77 -0
- data/lib/shoko/adapters/book_sources/epub/parsers/opf/metadata_extractor.rb +67 -0
- data/lib/shoko/adapters/book_sources/epub/parsers/opf/navigation_context.rb +86 -0
- data/lib/shoko/adapters/book_sources/epub/parsers/opf/navigation_document_index.rb +75 -0
- data/lib/shoko/adapters/book_sources/epub/parsers/opf/navigation_document_scanner.rb +47 -0
- data/lib/shoko/adapters/book_sources/epub/parsers/opf/navigation_extractor.rb +46 -0
- data/lib/shoko/adapters/book_sources/epub/parsers/opf/navigation_label_resolver.rb +83 -0
- data/lib/shoko/adapters/book_sources/epub/parsers/opf/navigation_list_item.rb +55 -0
- data/lib/shoko/adapters/book_sources/epub/parsers/opf/navigation_result.rb +8 -0
- data/lib/shoko/adapters/book_sources/epub/parsers/opf/navigation_selector.rb +100 -0
- data/lib/shoko/adapters/book_sources/epub/parsers/opf/navigation_source_locator.rb +93 -0
- data/lib/shoko/adapters/book_sources/epub/parsers/opf/navigation_traversal.rb +103 -0
- data/lib/shoko/adapters/book_sources/epub/parsers/opf/navigation_walker.rb +56 -0
- data/lib/shoko/adapters/book_sources/epub/parsers/opf_processor.rb +102 -0
- data/lib/shoko/adapters/book_sources/epub/parsers/xhtml_content_parser.rb +661 -0
- data/lib/shoko/adapters/book_sources/epub/parsers/xml_text_normalizer.rb +41 -0
- data/lib/shoko/adapters/book_sources/epub_document.rb +253 -0
- data/lib/shoko/adapters/book_sources/epub_finder/directory_scanner.rb +134 -0
- data/lib/shoko/adapters/book_sources/epub_finder/scanner_context.rb +28 -0
- data/lib/shoko/adapters/book_sources/epub_finder.rb +161 -0
- data/lib/shoko/adapters/book_sources/epub_importer.rb +268 -0
- data/lib/shoko/adapters/book_sources/gutendex_client.rb +150 -0
- data/lib/shoko/adapters/book_sources/library_scanner.rb +93 -0
- data/lib/shoko/adapters/book_sources/source_fingerprint.rb +57 -0
- data/lib/shoko/adapters/input/annotations/mouse_handler.rb +84 -0
- data/lib/shoko/adapters/input/command_bridge.rb +148 -0
- data/lib/shoko/adapters/input/command_factory.rb +255 -0
- data/lib/shoko/adapters/input/commands.rb +60 -0
- data/lib/shoko/adapters/input/dispatcher.rb +69 -0
- data/lib/shoko/adapters/input/input_controller.rb +250 -0
- data/lib/shoko/adapters/input/key_definitions.rb +108 -0
- data/lib/shoko/adapters/input/validators/file_path_validator.rb +81 -0
- data/lib/shoko/adapters/input/validators/terminal_size_validator.rb +76 -0
- data/lib/shoko/adapters/monitoring/logger.rb +150 -0
- data/lib/shoko/adapters/monitoring/perf_tracer.rb +183 -0
- data/lib/shoko/adapters/monitoring/performance_monitor.rb +110 -0
- data/lib/shoko/adapters/output/clipboard/clipboard_service.rb +125 -0
- data/lib/shoko/adapters/output/formatting/formatting_service/line_assembler/image_builder.rb +149 -0
- data/lib/shoko/adapters/output/formatting/formatting_service/line_assembler/text_wrapper.rb +149 -0
- data/lib/shoko/adapters/output/formatting/formatting_service/line_assembler/tokenizer.rb +91 -0
- data/lib/shoko/adapters/output/formatting/formatting_service/line_assembler.rb +144 -0
- data/lib/shoko/adapters/output/formatting/formatting_service/plain_lines_builder.rb +54 -0
- data/lib/shoko/adapters/output/formatting/formatting_service.rb +247 -0
- data/lib/shoko/adapters/output/formatting/wrapping_service.rb +228 -0
- data/lib/shoko/adapters/output/instrumentation_service.rb +52 -0
- data/lib/shoko/adapters/output/kitty/image_transcoder.rb +71 -0
- data/lib/shoko/adapters/output/kitty/kitty_graphics.rb +114 -0
- data/lib/shoko/adapters/output/kitty/kitty_image_renderer.rb +239 -0
- data/lib/shoko/adapters/output/kitty/kitty_unicode_placeholders.rb +139 -0
- data/lib/shoko/adapters/output/kitty/kitty_unicode_placeholders_diacritic_codepoints.txt +26 -0
- data/lib/shoko/adapters/output/notification_service.rb +58 -0
- data/lib/shoko/adapters/output/render_registry.rb +45 -0
- data/lib/shoko/adapters/output/rendering/models/line_geometry.rb +60 -0
- data/lib/shoko/adapters/output/rendering/models/page_rendering_context.rb +22 -0
- data/lib/shoko/adapters/output/rendering/models/render_params.rb +28 -0
- data/lib/shoko/adapters/output/rendering/models/rendering_context.rb +58 -0
- data/lib/shoko/adapters/output/terminal/buffer.rb +275 -0
- data/lib/shoko/adapters/output/terminal/constants/terminal_defaults.rb +11 -0
- data/lib/shoko/adapters/output/terminal/input/decoder.rb +347 -0
- data/lib/shoko/adapters/output/terminal/input.rb +161 -0
- data/lib/shoko/adapters/output/terminal/output.rb +105 -0
- data/lib/shoko/adapters/output/terminal/terminal.rb +167 -0
- data/lib/shoko/adapters/output/terminal/terminal_sanitizer.rb +243 -0
- data/lib/shoko/adapters/output/terminal/terminal_service.rb +138 -0
- data/lib/shoko/adapters/output/terminal/text_metrics.rb +273 -0
- data/lib/shoko/adapters/output/ui/builders/page_setup_builder.rb +47 -0
- data/lib/shoko/adapters/output/ui/components/annotation_editor_overlay/footer_renderer.rb +80 -0
- data/lib/shoko/adapters/output/ui/components/annotation_editor_overlay/geometry.rb +61 -0
- data/lib/shoko/adapters/output/ui/components/annotation_editor_overlay/note_renderer.rb +86 -0
- data/lib/shoko/adapters/output/ui/components/annotation_editor_overlay_component.rb +234 -0
- data/lib/shoko/adapters/output/ui/components/annotations_overlay/list_renderer.rb +142 -0
- data/lib/shoko/adapters/output/ui/components/annotations_overlay_component.rb +185 -0
- data/lib/shoko/adapters/output/ui/components/base_component.rb +110 -0
- data/lib/shoko/adapters/output/ui/components/component_interface.rb +80 -0
- data/lib/shoko/adapters/output/ui/components/content_component.rb +61 -0
- data/lib/shoko/adapters/output/ui/components/enhanced_popup_menu.rb +191 -0
- data/lib/shoko/adapters/output/ui/components/footer_component.rb +120 -0
- data/lib/shoko/adapters/output/ui/components/header_component.rb +46 -0
- data/lib/shoko/adapters/output/ui/components/layouts/horizontal.rb +63 -0
- data/lib/shoko/adapters/output/ui/components/layouts/vertical.rb +73 -0
- data/lib/shoko/adapters/output/ui/components/main_menu_component.rb +103 -0
- data/lib/shoko/adapters/output/ui/components/reading/base_view_renderer.rb +199 -0
- data/lib/shoko/adapters/output/ui/components/reading/config_helpers.rb +42 -0
- data/lib/shoko/adapters/output/ui/components/reading/help_renderer.rb +62 -0
- data/lib/shoko/adapters/output/ui/components/reading/inline_segment_highlighter.rb +144 -0
- data/lib/shoko/adapters/output/ui/components/reading/kitty_image_line_renderer.rb +262 -0
- data/lib/shoko/adapters/output/ui/components/reading/line_content_composer.rb +114 -0
- data/lib/shoko/adapters/output/ui/components/reading/line_drawer.rb +87 -0
- data/lib/shoko/adapters/output/ui/components/reading/line_geometry_builder.rb +41 -0
- data/lib/shoko/adapters/output/ui/components/reading/rendered_lines_recorder.rb +64 -0
- data/lib/shoko/adapters/output/ui/components/reading/single_view_renderer.rb +156 -0
- data/lib/shoko/adapters/output/ui/components/reading/split_view_renderer.rb +221 -0
- data/lib/shoko/adapters/output/ui/components/reading/view_renderer_factory.rb +20 -0
- data/lib/shoko/adapters/output/ui/components/reading/wrapped_lines_fetcher.rb +139 -0
- data/lib/shoko/adapters/output/ui/components/rect.rb +15 -0
- data/lib/shoko/adapters/output/ui/components/render_style.rb +84 -0
- data/lib/shoko/adapters/output/ui/components/screen_component.rb +24 -0
- data/lib/shoko/adapters/output/ui/components/screens/annotation_detail_screen_component.rb +175 -0
- data/lib/shoko/adapters/output/ui/components/screens/annotation_edit_screen_component.rb +221 -0
- data/lib/shoko/adapters/output/ui/components/screens/annotation_editor_screen_component.rb +205 -0
- data/lib/shoko/adapters/output/ui/components/screens/annotation_rendering_helpers.rb +190 -0
- data/lib/shoko/adapters/output/ui/components/screens/annotations_screen_component.rb +266 -0
- data/lib/shoko/adapters/output/ui/components/screens/base_screen_component.rb +49 -0
- data/lib/shoko/adapters/output/ui/components/screens/browse_screen_component.rb +319 -0
- data/lib/shoko/adapters/output/ui/components/screens/download_books_screen_component.rb +340 -0
- data/lib/shoko/adapters/output/ui/components/screens/library_screen_component.rb +205 -0
- data/lib/shoko/adapters/output/ui/components/screens/loading_overlay_component.rb +49 -0
- data/lib/shoko/adapters/output/ui/components/screens/menu_screen_component.rb +107 -0
- data/lib/shoko/adapters/output/ui/components/screens/settings_screen_component.rb +238 -0
- data/lib/shoko/adapters/output/ui/components/sidebar/annotations_tab_renderer.rb +159 -0
- data/lib/shoko/adapters/output/ui/components/sidebar/bookmarks_tab_renderer.rb +139 -0
- data/lib/shoko/adapters/output/ui/components/sidebar/tab_header_component.rb +157 -0
- data/lib/shoko/adapters/output/ui/components/sidebar/toc_tab_renderer.rb +111 -0
- data/lib/shoko/adapters/output/ui/components/sidebar/toc_tab_support.rb +1606 -0
- data/lib/shoko/adapters/output/ui/components/sidebar_panel_component.rb +217 -0
- data/lib/shoko/adapters/output/ui/components/surface.rb +88 -0
- data/lib/shoko/adapters/output/ui/components/tooltip_overlay_component.rb +224 -0
- data/lib/shoko/adapters/output/ui/components/ui/box_drawer.rb +32 -0
- data/lib/shoko/adapters/output/ui/components/ui/list_helpers.rb +33 -0
- data/lib/shoko/adapters/output/ui/components/ui/overlay_layout.rb +79 -0
- data/lib/shoko/adapters/output/ui/components/ui/text_utils.rb +46 -0
- data/lib/shoko/adapters/output/ui/constants/highlighting.rb +21 -0
- data/lib/shoko/adapters/output/ui/constants/messages.rb +12 -0
- data/lib/shoko/adapters/output/ui/constants/themes.rb +79 -0
- data/lib/shoko/adapters/output/ui/constants/ui_constants.rb +85 -0
- data/lib/shoko/adapters/output/ui/rendering/frame_coordinator.rb +42 -0
- data/lib/shoko/adapters/output/ui/rendering/reader_render_coordinator.rb +169 -0
- data/lib/shoko/adapters/output/ui/rendering/render_pipeline.rb +55 -0
- data/lib/shoko/adapters/storage/atomic_file_writer.rb +43 -0
- data/lib/shoko/adapters/storage/background_worker.rb +66 -0
- data/lib/shoko/adapters/storage/book_cache_pipeline.rb +653 -0
- data/lib/shoko/adapters/storage/cache/epub/memory_cache.rb +99 -0
- data/lib/shoko/adapters/storage/cache/epub/persistence.rb +131 -0
- data/lib/shoko/adapters/storage/cache/epub/serializer/deserialize.rb +225 -0
- data/lib/shoko/adapters/storage/cache/epub/serializer/helpers.rb +63 -0
- data/lib/shoko/adapters/storage/cache/epub/serializer/serialize.rb +83 -0
- data/lib/shoko/adapters/storage/cache/epub/serializer.rb +5 -0
- data/lib/shoko/adapters/storage/cache/epub/source_reference.rb +58 -0
- data/lib/shoko/adapters/storage/cache_paths.rb +21 -0
- data/lib/shoko/adapters/storage/cache_pointer_manager.rb +60 -0
- data/lib/shoko/adapters/storage/config_paths.rb +30 -0
- data/lib/shoko/adapters/storage/epub_cache.rb +195 -0
- data/lib/shoko/adapters/storage/file_writer_service.rb +47 -0
- data/lib/shoko/adapters/storage/json_cache_store/chapters.rb +141 -0
- data/lib/shoko/adapters/storage/json_cache_store/layouts.rb +67 -0
- data/lib/shoko/adapters/storage/json_cache_store/manifest.rb +42 -0
- data/lib/shoko/adapters/storage/json_cache_store/payload_helpers.rb +113 -0
- data/lib/shoko/adapters/storage/json_cache_store/resources.rb +84 -0
- data/lib/shoko/adapters/storage/json_cache_store.rb +167 -0
- data/lib/shoko/adapters/storage/lazy_file_string.rb +65 -0
- data/lib/shoko/adapters/storage/pagination_cache.rb +127 -0
- data/lib/shoko/adapters/storage/recent_files.rb +78 -0
- data/lib/shoko/adapters/storage/repositories/annotation_repository.rb +182 -0
- data/lib/shoko/adapters/storage/repositories/base_repository.rb +81 -0
- data/lib/shoko/adapters/storage/repositories/bookmark_repository.rb +132 -0
- data/lib/shoko/adapters/storage/repositories/cached_library_repository.rb +129 -0
- data/lib/shoko/adapters/storage/repositories/config_repository.rb +262 -0
- data/lib/shoko/adapters/storage/repositories/progress_repository.rb +166 -0
- data/lib/shoko/adapters/storage/repositories/storage/annotation_file_store.rb +128 -0
- data/lib/shoko/adapters/storage/repositories/storage/bookmark_file_store.rb +109 -0
- data/lib/shoko/adapters/storage/repositories/storage/file_store_utils.rb +20 -0
- data/lib/shoko/adapters/storage/repositories/storage/progress_file_store.rb +59 -0
- data/lib/shoko/application/annotation_editor_overlay_session.rb +138 -0
- data/lib/shoko/application/cli.rb +134 -0
- data/lib/shoko/application/controllers/menu/input_controller.rb +189 -0
- data/lib/shoko/application/controllers/menu/state_controller.rb +642 -0
- data/lib/shoko/application/controllers/menu_controller.rb +469 -0
- data/lib/shoko/application/controllers/mouseable_reader.rb +377 -0
- data/lib/shoko/application/controllers/reader_controller.rb +449 -0
- data/lib/shoko/application/controllers/state_controller.rb +410 -0
- data/lib/shoko/application/controllers/ui_controller.rb +782 -0
- data/lib/shoko/application/dependency_container.rb +301 -0
- data/lib/shoko/application/infrastructure/event_bus.rb +80 -0
- data/lib/shoko/application/infrastructure/observer_state_store.rb +136 -0
- data/lib/shoko/application/infrastructure/state_store.rb +413 -0
- data/lib/shoko/application/main_menu/menu_progress_presenter.rb +83 -0
- data/lib/shoko/application/pending_jump_handler.rb +122 -0
- data/lib/shoko/application/reader_lifecycle.rb +65 -0
- data/lib/shoko/application/reader_startup_orchestrator.rb +113 -0
- data/lib/shoko/application/selectors/config_selectors.rb +62 -0
- data/lib/shoko/application/selectors/menu_selectors.rb +62 -0
- data/lib/shoko/application/selectors/reader_selectors.rb +186 -0
- data/lib/shoko/application/state/actions/base_action.rb +24 -0
- data/lib/shoko/application/state/actions/quit_to_menu_action.rb +16 -0
- data/lib/shoko/application/state/actions/switch_reader_mode_action.rb +22 -0
- data/lib/shoko/application/state/actions/toggle_view_mode_action.rb +31 -0
- data/lib/shoko/application/state/actions/update_annotation_editor_overlay_action.rb +27 -0
- data/lib/shoko/application/state/actions/update_annotations_action.rb +20 -0
- data/lib/shoko/application/state/actions/update_annotations_overlay_action.rb +27 -0
- data/lib/shoko/application/state/actions/update_bookmarks_action.rb +20 -0
- data/lib/shoko/application/state/actions/update_chapter_action.rb +24 -0
- data/lib/shoko/application/state/actions/update_config_action.rb +22 -0
- data/lib/shoko/application/state/actions/update_field_helpers.rb +26 -0
- data/lib/shoko/application/state/actions/update_menu_action.rb +21 -0
- data/lib/shoko/application/state/actions/update_message_action.rb +35 -0
- data/lib/shoko/application/state/actions/update_page_action.rb +21 -0
- data/lib/shoko/application/state/actions/update_pagination_state_action.rb +21 -0
- data/lib/shoko/application/state/actions/update_popup_menu_action.rb +27 -0
- data/lib/shoko/application/state/actions/update_reader_meta_action.rb +21 -0
- data/lib/shoko/application/state/actions/update_reader_mode_action.rb +20 -0
- data/lib/shoko/application/state/actions/update_rendered_lines_action.rb +40 -0
- data/lib/shoko/application/state/actions/update_selection_action.rb +27 -0
- data/lib/shoko/application/state/actions/update_selections_action.rb +21 -0
- data/lib/shoko/application/state/actions/update_sidebar_action.rb +34 -0
- data/lib/shoko/application/state/actions/update_ui_loading_action.rb +23 -0
- data/lib/shoko/application/ui/reader_view_model_builder.rb +74 -0
- data/lib/shoko/application/ui/view_models/reader_view_model.rb +177 -0
- data/lib/shoko/application/unified_application.rb +48 -0
- data/lib/shoko/application/use_cases/catalog_service.rb +117 -0
- data/lib/shoko/application/use_cases/commands/annotation_editor_commands.rb +105 -0
- data/lib/shoko/application/use_cases/commands/application_commands.rb +208 -0
- data/lib/shoko/application/use_cases/commands/base_command.rb +166 -0
- data/lib/shoko/application/use_cases/commands/bookmark_commands.rb +114 -0
- data/lib/shoko/application/use_cases/commands/conditional_navigation_commands.rb +57 -0
- data/lib/shoko/application/use_cases/commands/menu_commands.rb +170 -0
- data/lib/shoko/application/use_cases/commands/navigation_commands.rb +183 -0
- data/lib/shoko/application/use_cases/commands/reader_commands.rb +46 -0
- data/lib/shoko/application/use_cases/commands/sidebar_commands.rb +55 -0
- data/lib/shoko/application/use_cases/settings_service.rb +123 -0
- data/lib/shoko/core/events/annotation_events.rb +94 -0
- data/lib/shoko/core/events/base_domain_event.rb +169 -0
- data/lib/shoko/core/events/bookmark_events.rb +41 -0
- data/lib/shoko/core/events/domain_event_bus.rb +163 -0
- data/lib/shoko/core/events/progress_events.rb +108 -0
- data/lib/shoko/core/models/bookmark.rb +36 -0
- data/lib/shoko/core/models/bookmark_data.rb +10 -0
- data/lib/shoko/core/models/chapter.rb +25 -0
- data/lib/shoko/core/models/content_block.rb +44 -0
- data/lib/shoko/core/models/reader_settings.rb +20 -0
- data/lib/shoko/core/models/selection_anchor.rb +73 -0
- data/lib/shoko/core/models/toc_entry.rb +14 -0
- data/lib/shoko/core/ports/annotation_repository.rb +0 -0
- data/lib/shoko/core/ports/book_repository.rb +0 -0
- data/lib/shoko/core/ports/book_source.rb +0 -0
- data/lib/shoko/core/ports/bookmark_repository.rb +0 -0
- data/lib/shoko/core/ports/cache.rb +0 -0
- data/lib/shoko/core/ports/input_handler.rb +0 -0
- data/lib/shoko/core/ports/renderer.rb +0 -0
- data/lib/shoko/core/ports/storage.rb +0 -0
- data/lib/shoko/core/services/annotation_service.rb +102 -0
- data/lib/shoko/core/services/base_service.rb +60 -0
- data/lib/shoko/core/services/bookmark_service.rb +267 -0
- data/lib/shoko/core/services/coordinate_service.rb +265 -0
- data/lib/shoko/core/services/layout_service.rb +95 -0
- data/lib/shoko/core/services/navigation/absolute_change_applier.rb +96 -0
- data/lib/shoko/core/services/navigation/absolute_layout.rb +101 -0
- data/lib/shoko/core/services/navigation/absolute_strategy.rb +179 -0
- data/lib/shoko/core/services/navigation/context_builder.rb +52 -0
- data/lib/shoko/core/services/navigation/context_helpers.rb +63 -0
- data/lib/shoko/core/services/navigation/dynamic_change_applier.rb +50 -0
- data/lib/shoko/core/services/navigation/dynamic_strategy.rb +51 -0
- data/lib/shoko/core/services/navigation/image_offset_snapper.rb +150 -0
- data/lib/shoko/core/services/navigation/nav_context.rb +27 -0
- data/lib/shoko/core/services/navigation/state_updater.rb +29 -0
- data/lib/shoko/core/services/navigation/strategy_factory.rb +20 -0
- data/lib/shoko/core/services/navigation_service.rb +150 -0
- data/lib/shoko/core/services/page_calculator_service.rb +242 -0
- data/lib/shoko/core/services/pagination/internal/absolute_page_map_builder.rb +28 -0
- data/lib/shoko/core/services/pagination/internal/chapter_cache.rb +60 -0
- data/lib/shoko/core/services/pagination/internal/dynamic_page_map_builder.rb +157 -0
- data/lib/shoko/core/services/pagination/internal/layout_metrics_calculator.rb +73 -0
- data/lib/shoko/core/services/pagination/internal/page_hydrator.rb +145 -0
- data/lib/shoko/core/services/pagination/internal/pagination_workflow.rb +152 -0
- data/lib/shoko/core/services/pagination/page_info_calculator.rb +247 -0
- data/lib/shoko/core/services/pagination/pagination_cache_preloader.rb +173 -0
- data/lib/shoko/core/services/pagination/pagination_coordinator.rb +202 -0
- data/lib/shoko/core/services/pagination/pagination_orchestrator.rb +291 -0
- data/lib/shoko/core/services/pagination.rb +10 -0
- data/lib/shoko/core/services/progress_helper.rb +22 -0
- data/lib/shoko/core/services/selection_service.rb +126 -0
- data/lib/shoko/core/validator.rb +76 -0
- data/lib/shoko/shared/errors.rb +97 -0
- data/lib/shoko/shared/version.rb +5 -0
- data/lib/shoko/test_support/terminal_double.rb +175 -0
- data/lib/shoko/test_support/test_mode.rb +78 -0
- data/lib/shoko.rb +279 -0
- data/lib/zip.rb +732 -0
- data/zip.rb +5 -0
- metadata +370 -0
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'digest'
|
|
4
|
+
require 'zip'
|
|
5
|
+
|
|
6
|
+
require_relative '../../storage/atomic_file_writer.rb'
|
|
7
|
+
require_relative '../../storage/cache_paths.rb'
|
|
8
|
+
require_relative '../../monitoring/logger.rb'
|
|
9
|
+
|
|
10
|
+
module Shoko
|
|
11
|
+
module Adapters::BookSources::Epub
|
|
12
|
+
# Loads resources (typically images) from an EPUB on-demand and optionally
|
|
13
|
+
# persists them as per-book blobs under the cache root.
|
|
14
|
+
class EpubResourceLoader
|
|
15
|
+
SHA256_HEX_PATTERN = /\A[0-9a-f]{64}\z/i
|
|
16
|
+
|
|
17
|
+
def initialize(cache_root: CachePaths.cache_root)
|
|
18
|
+
@cache_root = cache_root
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Fetch an entry from the per-book blob cache or from the EPUB archive.
|
|
22
|
+
#
|
|
23
|
+
# @param book_sha [String,nil] 64-char hex digest identifying the book cache directory
|
|
24
|
+
# @param epub_path [String] filesystem path to the EPUB
|
|
25
|
+
# @param entry_path [String] path inside the EPUB zip
|
|
26
|
+
# @param cache_key [String,nil] logical cache key (defaults to entry_path)
|
|
27
|
+
# @return [String,nil] binary bytes
|
|
28
|
+
def fetch(book_sha:, epub_path:, entry_path:, persist: true, cache_key: nil)
|
|
29
|
+
return nil if entry_path.to_s.empty?
|
|
30
|
+
|
|
31
|
+
normalized_sha = normalize_sha(book_sha)
|
|
32
|
+
key = (cache_key || entry_path).to_s
|
|
33
|
+
return nil if key.empty?
|
|
34
|
+
|
|
35
|
+
cached = normalized_sha ? read_blob(normalized_sha, key) : nil
|
|
36
|
+
return cached if cached
|
|
37
|
+
|
|
38
|
+
bytes = read_from_zip(epub_path, entry_path)
|
|
39
|
+
return nil unless bytes
|
|
40
|
+
|
|
41
|
+
write_blob(normalized_sha, key, bytes) if persist && normalized_sha
|
|
42
|
+
bytes
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def store(book_sha:, entry_path:, bytes:)
|
|
46
|
+
normalized_sha = normalize_sha(book_sha)
|
|
47
|
+
return false unless normalized_sha
|
|
48
|
+
return false if entry_path.to_s.empty?
|
|
49
|
+
|
|
50
|
+
write_blob(normalized_sha, entry_path, bytes)
|
|
51
|
+
true
|
|
52
|
+
rescue StandardError
|
|
53
|
+
false
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Resolve a resource href relative to a chapter (zip entry) path.
|
|
57
|
+
#
|
|
58
|
+
# @param chapter_entry_path [String] zip entry path of the chapter
|
|
59
|
+
# @param href [String] href/src value from XHTML
|
|
60
|
+
# @return [String,nil] normalized zip entry path
|
|
61
|
+
def self.resolve_chapter_relative(chapter_entry_path, href)
|
|
62
|
+
return nil unless chapter_entry_path && href
|
|
63
|
+
|
|
64
|
+
core = href.to_s.split(/[?#]/, 2).first.to_s
|
|
65
|
+
return nil if core.empty?
|
|
66
|
+
return nil if core.match?(/\A[a-z][a-z0-9+.-]*:/i) # data:, http:, etc.
|
|
67
|
+
|
|
68
|
+
normalized = if core.start_with?('/')
|
|
69
|
+
core.sub(%r{\A/+}, '')
|
|
70
|
+
else
|
|
71
|
+
base = File.dirname(chapter_entry_path.to_s)
|
|
72
|
+
File.expand_path(File.join('/', base, core), '/').sub(%r{^/}, '')
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
normalized.empty? ? nil : normalized
|
|
76
|
+
rescue StandardError
|
|
77
|
+
nil
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
private
|
|
81
|
+
|
|
82
|
+
def normalize_sha(sha)
|
|
83
|
+
value = sha.to_s.strip
|
|
84
|
+
return nil if value.empty?
|
|
85
|
+
return nil unless SHA256_HEX_PATTERN.match?(value)
|
|
86
|
+
|
|
87
|
+
value.downcase
|
|
88
|
+
rescue StandardError
|
|
89
|
+
nil
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def read_from_zip(epub_path, entry_path)
|
|
93
|
+
return nil unless epub_path && File.file?(epub_path)
|
|
94
|
+
return nil if entry_path.to_s.empty?
|
|
95
|
+
|
|
96
|
+
Zip::File.open(epub_path) do |zip|
|
|
97
|
+
return nil unless zip.find_entry(entry_path.to_s)
|
|
98
|
+
|
|
99
|
+
data = zip.read(entry_path.to_s)
|
|
100
|
+
data.force_encoding(Encoding::BINARY)
|
|
101
|
+
data
|
|
102
|
+
end
|
|
103
|
+
rescue Zip::Error => e
|
|
104
|
+
Shoko::Adapters::Monitoring::Logger.debug('EpubResourceLoader: zip read failed', path: epub_path.to_s, entry: entry_path.to_s,
|
|
105
|
+
error: e.message)
|
|
106
|
+
nil
|
|
107
|
+
rescue StandardError => e
|
|
108
|
+
Shoko::Adapters::Monitoring::Logger.debug('EpubResourceLoader: read failed', path: epub_path.to_s, entry: entry_path.to_s, error: e.message)
|
|
109
|
+
nil
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def blob_path(book_sha, entry_path)
|
|
113
|
+
key = Digest::SHA256.hexdigest(entry_path.to_s)
|
|
114
|
+
File.join(@cache_root, 'resources', book_sha.to_s, "#{key}.bin")
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def read_blob(book_sha, entry_path)
|
|
118
|
+
path = blob_path(book_sha, entry_path)
|
|
119
|
+
return nil unless File.file?(path)
|
|
120
|
+
|
|
121
|
+
data = File.binread(path)
|
|
122
|
+
data.force_encoding(Encoding::BINARY)
|
|
123
|
+
data
|
|
124
|
+
rescue StandardError
|
|
125
|
+
nil
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def write_blob(book_sha, entry_path, bytes)
|
|
129
|
+
return unless book_sha
|
|
130
|
+
|
|
131
|
+
AtomicFileWriter.write(blob_path(book_sha, entry_path), bytes, binary: true)
|
|
132
|
+
rescue StandardError
|
|
133
|
+
nil
|
|
134
|
+
end
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
end
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'cgi'
|
|
4
|
+
|
|
5
|
+
require_relative '../../../monitoring/perf_tracer.rb'
|
|
6
|
+
require_relative '../../../output/terminal/terminal_sanitizer.rb'
|
|
7
|
+
|
|
8
|
+
module Shoko
|
|
9
|
+
module Adapters::BookSources::Epub::Parsers
|
|
10
|
+
# Processes HTML content
|
|
11
|
+
class HTMLProcessor
|
|
12
|
+
def self.extract_title(html)
|
|
13
|
+
match = html.match(%r{<title[^>]*>([^<]+)</title>}i) ||
|
|
14
|
+
html.match(%r{<h[1-3][^>]*>([^<]+)</h[1-3]>}i)
|
|
15
|
+
clean_html(match[1]) if match
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def self.html_to_text(html)
|
|
19
|
+
if Shoko::Adapters::Monitoring::PerfTracer.enabled?
|
|
20
|
+
Shoko::Adapters::Monitoring::PerfTracer.measure('xhtml.normalize') { normalize_html(html) }
|
|
21
|
+
else
|
|
22
|
+
normalize_html(html)
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
BLOCK_REPLACEMENTS = {
|
|
27
|
+
%r{</p>}i => "\n\n",
|
|
28
|
+
/<p[^>]*>/i => "\n\n",
|
|
29
|
+
/<br[^>]*>/i => "\n",
|
|
30
|
+
%r{</h[1-6]>}i => "\n\n",
|
|
31
|
+
/<h[1-6][^>]*>/i => "\n\n",
|
|
32
|
+
%r{</div>}i => "\n",
|
|
33
|
+
/<div[^>]*>/i => "\n",
|
|
34
|
+
}.freeze
|
|
35
|
+
|
|
36
|
+
private_constant :BLOCK_REPLACEMENTS
|
|
37
|
+
|
|
38
|
+
HTML_ENTITY_MAP = {
|
|
39
|
+
'nbsp' => ' ',
|
|
40
|
+
'ensp' => ' ',
|
|
41
|
+
'emsp' => ' ',
|
|
42
|
+
'thinsp' => ' ',
|
|
43
|
+
'shy' => '',
|
|
44
|
+
'mdash' => '—',
|
|
45
|
+
'ndash' => '–',
|
|
46
|
+
'hellip' => '…',
|
|
47
|
+
'ldquo' => '“',
|
|
48
|
+
'rdquo' => '”',
|
|
49
|
+
'lsquo' => '‘',
|
|
50
|
+
'rsquo' => '’',
|
|
51
|
+
'laquo' => '«',
|
|
52
|
+
'raquo' => '»',
|
|
53
|
+
'bull' => '•',
|
|
54
|
+
'middot' => '·',
|
|
55
|
+
'times' => '×',
|
|
56
|
+
'divide' => '÷',
|
|
57
|
+
'deg' => '°',
|
|
58
|
+
'copy' => '©',
|
|
59
|
+
'reg' => '®',
|
|
60
|
+
'trade' => '™',
|
|
61
|
+
'frac14' => '¼',
|
|
62
|
+
'frac12' => '½',
|
|
63
|
+
'frac34' => '¾',
|
|
64
|
+
'sup1' => '¹',
|
|
65
|
+
'sup2' => '²',
|
|
66
|
+
'sup3' => '³',
|
|
67
|
+
}.freeze
|
|
68
|
+
|
|
69
|
+
private_constant :HTML_ENTITY_MAP
|
|
70
|
+
|
|
71
|
+
def self.decode_entities(text)
|
|
72
|
+
str = text.to_s
|
|
73
|
+
return str if str.empty?
|
|
74
|
+
|
|
75
|
+
decoded = str.gsub(/&#x([0-9A-Fa-f]+);/) do |match|
|
|
76
|
+
[Regexp.last_match(1).to_i(16)].pack('U')
|
|
77
|
+
rescue StandardError
|
|
78
|
+
match
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
decoded = decoded.gsub(/&#(\d+);/) do |match|
|
|
82
|
+
[Regexp.last_match(1).to_i].pack('U')
|
|
83
|
+
rescue StandardError
|
|
84
|
+
match
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
decoded = decoded.gsub(/&([A-Za-z][A-Za-z0-9]+);/) do |match|
|
|
88
|
+
name = Regexp.last_match(1)
|
|
89
|
+
replacement = HTML_ENTITY_MAP[name] || HTML_ENTITY_MAP[name.downcase]
|
|
90
|
+
replacement.nil? ? match : replacement
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Decode the built-in XML entities (amp/lt/gt/quot/apos) last.
|
|
94
|
+
CGI.unescapeHTML(decoded).tr("\u00A0", ' ')
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
private_class_method def self.normalize_html(html)
|
|
98
|
+
text = html.dup
|
|
99
|
+
# Handle CDATA sections BEFORE removing other tags
|
|
100
|
+
text = handle_cdata_sections(text)
|
|
101
|
+
text = remove_scripts_and_styles(text)
|
|
102
|
+
text = replace_block_elements(text)
|
|
103
|
+
text = strip_tags(text)
|
|
104
|
+
text = decode_entities(text)
|
|
105
|
+
cleaned = clean_whitespace(text)
|
|
106
|
+
Shoko::Adapters::Output::Terminal::TerminalSanitizer.sanitize(
|
|
107
|
+
cleaned,
|
|
108
|
+
preserve_newlines: true,
|
|
109
|
+
preserve_tabs: false
|
|
110
|
+
)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
private_class_method def self.handle_cdata_sections(text)
|
|
114
|
+
# Extract CDATA content before other processing
|
|
115
|
+
text.gsub(/<!\[CDATA\[(.*?)\]\]>/m, '\1')
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
private_class_method def self.remove_scripts_and_styles(text)
|
|
119
|
+
text.gsub!(%r{<script[^>]*>.*?</script>}mi, '')
|
|
120
|
+
text.gsub!(%r{<style[^>]*>.*?</style>}mi, '')
|
|
121
|
+
text
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
private_class_method def self.replace_block_elements(text)
|
|
125
|
+
BLOCK_REPLACEMENTS.each { |pattern, rep| text.gsub!(pattern, rep) }
|
|
126
|
+
text
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
private_class_method def self.strip_tags(text)
|
|
130
|
+
text.gsub!(/<[^>]+>/, '')
|
|
131
|
+
text
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
private_class_method def self.clean_whitespace(text)
|
|
135
|
+
text.delete!("\r")
|
|
136
|
+
text.gsub!(/\n{3,}/, "\n\n")
|
|
137
|
+
text.gsub!(/[ \t]+/, ' ')
|
|
138
|
+
text.strip
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def self.clean_html(text)
|
|
142
|
+
decoded = decode_entities(text.to_s.strip)
|
|
143
|
+
Shoko::Adapters::Output::Terminal::TerminalSanitizer.sanitize(
|
|
144
|
+
decoded,
|
|
145
|
+
preserve_newlines: false,
|
|
146
|
+
preserve_tabs: false
|
|
147
|
+
)
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
end
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'zip'
|
|
4
|
+
require 'rexml/document'
|
|
5
|
+
|
|
6
|
+
require_relative 'opf_processor'
|
|
7
|
+
|
|
8
|
+
module Shoko
|
|
9
|
+
module Adapters::BookSources::Epub::Parsers
|
|
10
|
+
# Lightweight extractor for common EPUB metadata (authors, year)
|
|
11
|
+
# Opens the EPUB zip and reads the OPF without loading chapter content.
|
|
12
|
+
class MetadataExtractor
|
|
13
|
+
def self.from_epub(path)
|
|
14
|
+
Zip::File.open(path) do |zip|
|
|
15
|
+
opf_path = find_opf_path(zip)
|
|
16
|
+
return {} unless opf_path
|
|
17
|
+
|
|
18
|
+
processor = OPFProcessor.new(opf_path, zip: zip)
|
|
19
|
+
meta = processor.extract_metadata
|
|
20
|
+
normalize(meta)
|
|
21
|
+
end
|
|
22
|
+
rescue StandardError
|
|
23
|
+
{}
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def self.find_opf_path(zip)
|
|
27
|
+
container_xml = zip.read('META-INF/container.xml')
|
|
28
|
+
container = REXML::Document.new(container_xml)
|
|
29
|
+
rootfile = container.elements['//rootfile']
|
|
30
|
+
return nil unless rootfile
|
|
31
|
+
|
|
32
|
+
opf_path = rootfile.attributes['full-path']
|
|
33
|
+
zip.find_entry(opf_path) ? opf_path : nil
|
|
34
|
+
rescue StandardError
|
|
35
|
+
nil
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def self.normalize(meta)
|
|
39
|
+
return {} unless meta.is_a?(Hash)
|
|
40
|
+
|
|
41
|
+
authors = Array(meta[:authors]).compact.map(&:to_s).reject(&:empty?)
|
|
42
|
+
{
|
|
43
|
+
authors: authors,
|
|
44
|
+
author_str: authors.join('; '),
|
|
45
|
+
year: (meta[:year] || '').to_s[0, 4],
|
|
46
|
+
title: meta[:title],
|
|
47
|
+
language: meta[:language],
|
|
48
|
+
}
|
|
49
|
+
end
|
|
50
|
+
private_class_method :find_opf_path, :normalize
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'pathname'
|
|
4
|
+
|
|
5
|
+
require_relative '../xml_text_normalizer'
|
|
6
|
+
|
|
7
|
+
module Shoko
|
|
8
|
+
module Adapters::BookSources::Epub::Parsers
|
|
9
|
+
# Reads OPF and related XML entries from a zip or filesystem path.
|
|
10
|
+
class OPFEntryReader
|
|
11
|
+
def initialize(opf_path, zip: nil)
|
|
12
|
+
@opf_path = opf_path
|
|
13
|
+
@opf_dir = File.dirname(opf_path)
|
|
14
|
+
@zip = zip
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def zip?
|
|
18
|
+
!@zip.nil?
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def read_raw(path)
|
|
22
|
+
zip? ? @zip.read(path) : File.read(path)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def read_entry(path)
|
|
26
|
+
normalize_xml_text(read_raw(path))
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def safe_read_entry(path)
|
|
30
|
+
read_entry(path)
|
|
31
|
+
rescue StandardError
|
|
32
|
+
nil
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def entry_exists?(path)
|
|
36
|
+
zip? ? !!@zip.find_entry(path) : File.exist?(path)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def join_path(href)
|
|
40
|
+
expand_path(@opf_dir, href)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def expand_path(base_dir, href)
|
|
44
|
+
return nil if href.nil? || href.to_s.empty?
|
|
45
|
+
|
|
46
|
+
if zip?
|
|
47
|
+
File.expand_path(File.join('/', base_dir, href), '/').sub(%r{^/}, '')
|
|
48
|
+
else
|
|
49
|
+
File.expand_path(File.join(base_dir, href))
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def normalize_opf_relative_href(href)
|
|
54
|
+
return nil if href.nil? || href.to_s.empty?
|
|
55
|
+
|
|
56
|
+
joined = join_path(href)
|
|
57
|
+
return nil unless joined
|
|
58
|
+
|
|
59
|
+
Pathname.new(joined).relative_path_from(Pathname.new(@opf_dir)).to_s
|
|
60
|
+
rescue ArgumentError
|
|
61
|
+
href.to_s
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def opf_relative_path(path)
|
|
65
|
+
return nil if path.nil? || path.to_s.empty?
|
|
66
|
+
|
|
67
|
+
Pathname.new(path).relative_path_from(Pathname.new(@opf_dir)).to_s
|
|
68
|
+
rescue ArgumentError
|
|
69
|
+
nil
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def normalize_xml_text(content)
|
|
73
|
+
XmlTextNormalizer.normalize(content)
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative '../html_processor'
|
|
4
|
+
|
|
5
|
+
module Shoko
|
|
6
|
+
module Adapters::BookSources::Epub::Parsers
|
|
7
|
+
# Extracts metadata fields from an OPF document.
|
|
8
|
+
class OPFMetadataExtractor
|
|
9
|
+
def initialize(opf)
|
|
10
|
+
@opf = opf
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def extract
|
|
14
|
+
metadata_element = @opf.elements['//metadata']
|
|
15
|
+
return {} unless metadata_element
|
|
16
|
+
|
|
17
|
+
@elements = metadata_element.elements
|
|
18
|
+
@metadata = {}
|
|
19
|
+
|
|
20
|
+
extract_title
|
|
21
|
+
extract_language
|
|
22
|
+
extract_authors
|
|
23
|
+
extract_year
|
|
24
|
+
|
|
25
|
+
@metadata
|
|
26
|
+
ensure
|
|
27
|
+
@elements = nil
|
|
28
|
+
@metadata = nil
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
private
|
|
32
|
+
|
|
33
|
+
def extract_title
|
|
34
|
+
raw_title = @elements['*[local-name()="title"]']&.text
|
|
35
|
+
return unless raw_title
|
|
36
|
+
|
|
37
|
+
title = HTMLProcessor.clean_html(raw_title.to_s).strip
|
|
38
|
+
@metadata[:title] = title unless title.empty?
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def extract_language
|
|
42
|
+
lang_text = @elements['*[local-name()="language"]']&.text
|
|
43
|
+
return unless lang_text
|
|
44
|
+
|
|
45
|
+
@metadata[:language] = lang_text.include?('_') ? lang_text : "#{lang_text}_#{lang_text.upcase}"
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def extract_authors
|
|
49
|
+
authors = []
|
|
50
|
+
@elements.each('*[local-name()="creator"]') do |creator|
|
|
51
|
+
txt = HTMLProcessor.clean_html(creator.text.to_s).strip
|
|
52
|
+
authors << txt unless txt.empty?
|
|
53
|
+
end
|
|
54
|
+
@metadata[:authors] = authors unless authors.empty?
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def extract_year
|
|
58
|
+
date_elem = @elements['*[local-name()="date"]']
|
|
59
|
+
return unless date_elem
|
|
60
|
+
|
|
61
|
+
date_text = date_elem.text.to_s
|
|
62
|
+
match = date_text.match(/(\d{4})/)
|
|
63
|
+
@metadata[:year] = match[1] if match
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'rexml/document'
|
|
4
|
+
|
|
5
|
+
require_relative 'navigation_label_resolver'
|
|
6
|
+
require_relative 'navigation_list_item'
|
|
7
|
+
|
|
8
|
+
module Shoko
|
|
9
|
+
module Adapters::BookSources::Epub::Parsers
|
|
10
|
+
# Tracks navigation entries/titles while walking a nav tree.
|
|
11
|
+
class OPFNavigationContext
|
|
12
|
+
attr_reader :toc_entries, :titles, :level
|
|
13
|
+
|
|
14
|
+
def self.root(source_path:, entry_reader:)
|
|
15
|
+
label_resolver = OPFNavigationLabelResolver.new(entry_reader: entry_reader, source_path: source_path)
|
|
16
|
+
new(label_resolver: label_resolver, level: 0, toc_entries: [], titles: {})
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def initialize(label_resolver:, level:, toc_entries:, titles:)
|
|
20
|
+
@label_resolver = label_resolver
|
|
21
|
+
@level = level
|
|
22
|
+
@toc_entries = toc_entries
|
|
23
|
+
@titles = titles
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def source_path
|
|
27
|
+
@label_resolver.source_path
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def next_level
|
|
31
|
+
self.class.new(
|
|
32
|
+
label_resolver: @label_resolver,
|
|
33
|
+
level: @level + 1,
|
|
34
|
+
toc_entries: @toc_entries,
|
|
35
|
+
titles: @titles
|
|
36
|
+
)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def add_entry(title:, href:)
|
|
40
|
+
target_path, opf_href = @label_resolver.target_for(href: href)
|
|
41
|
+
@toc_entries << {
|
|
42
|
+
title: title,
|
|
43
|
+
href: href,
|
|
44
|
+
level: @level,
|
|
45
|
+
source_path: source_path,
|
|
46
|
+
target: target_path,
|
|
47
|
+
opf_href: opf_href,
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
return unless opf_href
|
|
51
|
+
return if @level.zero? && @titles.key?(opf_href)
|
|
52
|
+
|
|
53
|
+
@titles[opf_href] = title
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def clean_label(text)
|
|
57
|
+
@label_resolver.clean_label(text)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def resolve_label(href:, title:)
|
|
61
|
+
@label_resolver.resolve(href: href, title: title)
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def entry_for_nav_point(nav_point)
|
|
65
|
+
elements = nav_point.elements
|
|
66
|
+
href_attr = elements['content']&.attributes&.[]('src')
|
|
67
|
+
title = resolve_label(
|
|
68
|
+
href: href_attr,
|
|
69
|
+
title: clean_label(elements['navLabel/text']&.text.to_s)
|
|
70
|
+
)
|
|
71
|
+
[title, href_attr]
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def entry_for_list_item(list_item)
|
|
75
|
+
details = OPFNavigationListItem.new(list_item, cleaner: self)
|
|
76
|
+
href_attr = details.href
|
|
77
|
+
title = resolve_label(href: href_attr, title: details.title)
|
|
78
|
+
[title, href_attr]
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def to_result(result_class)
|
|
82
|
+
result_class.new(toc_entries: @toc_entries, titles: @titles)
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'navigation_document_scanner'
|
|
4
|
+
|
|
5
|
+
module Shoko
|
|
6
|
+
module Adapters::BookSources::Epub::Parsers
|
|
7
|
+
# Caches anchor labels and heading queues for navigation fallbacks.
|
|
8
|
+
class OPFNavigationDocumentIndex
|
|
9
|
+
# Value object for indexed document content.
|
|
10
|
+
Document = Struct.new(:path, :content, keyword_init: true)
|
|
11
|
+
private_constant :Document
|
|
12
|
+
|
|
13
|
+
def initialize(entry_reader:, cleaner:)
|
|
14
|
+
@entry_reader = entry_reader
|
|
15
|
+
@scanner = OPFNavigationDocumentScanner.new(cleaner: cleaner)
|
|
16
|
+
@anchors = {}
|
|
17
|
+
@headings = {}
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def anchor_label(path, anchor)
|
|
21
|
+
ensure_index(path)
|
|
22
|
+
@anchors[path][anchor]
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def next_heading(path)
|
|
26
|
+
ensure_index(path)
|
|
27
|
+
queue = @headings[path]
|
|
28
|
+
return '' unless queue
|
|
29
|
+
|
|
30
|
+
queue.shift.to_s
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def remove_heading(path, text)
|
|
34
|
+
cleaned = text.to_s.strip
|
|
35
|
+
return if cleaned.empty?
|
|
36
|
+
|
|
37
|
+
queue = heading_queue(path)
|
|
38
|
+
idx = queue&.index(cleaned)
|
|
39
|
+
queue.delete_at(idx) if idx
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
private
|
|
43
|
+
|
|
44
|
+
def ensure_index(path)
|
|
45
|
+
return if @anchors.key?(path)
|
|
46
|
+
|
|
47
|
+
content = @entry_reader.safe_read_entry(path)
|
|
48
|
+
index_document(Document.new(path: path, content: content))
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def index_document(document)
|
|
52
|
+
path = document.path
|
|
53
|
+
return if @anchors.key?(path)
|
|
54
|
+
|
|
55
|
+
prepare_index(path)
|
|
56
|
+
apply_scan(path, @scanner.scan(document.content))
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def prepare_index(path)
|
|
60
|
+
@anchors[path] = {}
|
|
61
|
+
@headings[path] = []
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def apply_scan(path, scan_result)
|
|
65
|
+
@anchors[path].merge!(scan_result.anchors)
|
|
66
|
+
@headings[path].concat(scan_result.headings)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def heading_queue(path)
|
|
70
|
+
ensure_index(path)
|
|
71
|
+
@headings[path]
|
|
72
|
+
end
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Shoko
|
|
4
|
+
module Adapters::BookSources::Epub::Parsers
|
|
5
|
+
# Scans document content for anchor and heading labels.
|
|
6
|
+
class OPFNavigationDocumentScanner
|
|
7
|
+
# Value object for extracted anchor and heading labels.
|
|
8
|
+
ScanResult = Struct.new(:anchors, :headings, keyword_init: true)
|
|
9
|
+
private_constant :ScanResult
|
|
10
|
+
|
|
11
|
+
def initialize(cleaner:)
|
|
12
|
+
@cleaner = cleaner
|
|
13
|
+
@anchor_regex = %r{<(h[1-6])[^>]*?(?:id|name|xml:id)\s*=\s*["']([^"']+)["'][^>]*>(.*?)</\1>}im
|
|
14
|
+
@heading_regex = %r{<(h[1-6])[^>]*>(.*?)</\1>}im
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def scan(content)
|
|
18
|
+
return ScanResult.new(anchors: {}, headings: []) unless content
|
|
19
|
+
|
|
20
|
+
result = ScanResult.new(anchors: {}, headings: [])
|
|
21
|
+
scan_anchors(content, result.anchors)
|
|
22
|
+
scan_headings(content, result.headings)
|
|
23
|
+
result
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
private
|
|
27
|
+
|
|
28
|
+
def scan_anchors(content, anchors)
|
|
29
|
+
content.scan(@anchor_regex) { |_tag, anchor, text| store_anchor(anchors, anchor, text) }
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def scan_headings(content, headings)
|
|
33
|
+
content.scan(@heading_regex) { |_tag, text| store_heading(headings, text) }
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def store_anchor(anchors, anchor, text)
|
|
37
|
+
label = @cleaner.clean_label(text)
|
|
38
|
+
anchors[anchor] = label unless label.empty?
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def store_heading(headings, text)
|
|
42
|
+
label = @cleaner.clean_label(text)
|
|
43
|
+
headings << label unless label.empty?
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
end
|