shoko 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (294) hide show
  1. checksums.yaml +7 -0
  2. data/.bundle/config +4 -0
  3. data/.bundle/config.bak +3 -0
  4. data/.rspec_status +42 -0
  5. data/.rubocop.yml +124 -0
  6. data/Gemfile +19 -0
  7. data/LICENSE +21 -0
  8. data/README.md +82 -0
  9. data/Rakefile +29 -0
  10. data/bin/start +15 -0
  11. data/lib/shoko/adapters/book_sources/document_service.rb +201 -0
  12. data/lib/shoko/adapters/book_sources/download_service.rb +95 -0
  13. data/lib/shoko/adapters/book_sources/epub/epub_resource_loader.rb +137 -0
  14. data/lib/shoko/adapters/book_sources/epub/parsers/html_processor.rb +151 -0
  15. data/lib/shoko/adapters/book_sources/epub/parsers/metadata_extractor.rb +53 -0
  16. data/lib/shoko/adapters/book_sources/epub/parsers/opf/entry_reader.rb +77 -0
  17. data/lib/shoko/adapters/book_sources/epub/parsers/opf/metadata_extractor.rb +67 -0
  18. data/lib/shoko/adapters/book_sources/epub/parsers/opf/navigation_context.rb +86 -0
  19. data/lib/shoko/adapters/book_sources/epub/parsers/opf/navigation_document_index.rb +75 -0
  20. data/lib/shoko/adapters/book_sources/epub/parsers/opf/navigation_document_scanner.rb +47 -0
  21. data/lib/shoko/adapters/book_sources/epub/parsers/opf/navigation_extractor.rb +46 -0
  22. data/lib/shoko/adapters/book_sources/epub/parsers/opf/navigation_label_resolver.rb +83 -0
  23. data/lib/shoko/adapters/book_sources/epub/parsers/opf/navigation_list_item.rb +55 -0
  24. data/lib/shoko/adapters/book_sources/epub/parsers/opf/navigation_result.rb +8 -0
  25. data/lib/shoko/adapters/book_sources/epub/parsers/opf/navigation_selector.rb +100 -0
  26. data/lib/shoko/adapters/book_sources/epub/parsers/opf/navigation_source_locator.rb +93 -0
  27. data/lib/shoko/adapters/book_sources/epub/parsers/opf/navigation_traversal.rb +103 -0
  28. data/lib/shoko/adapters/book_sources/epub/parsers/opf/navigation_walker.rb +56 -0
  29. data/lib/shoko/adapters/book_sources/epub/parsers/opf_processor.rb +102 -0
  30. data/lib/shoko/adapters/book_sources/epub/parsers/xhtml_content_parser.rb +661 -0
  31. data/lib/shoko/adapters/book_sources/epub/parsers/xml_text_normalizer.rb +41 -0
  32. data/lib/shoko/adapters/book_sources/epub_document.rb +253 -0
  33. data/lib/shoko/adapters/book_sources/epub_finder/directory_scanner.rb +134 -0
  34. data/lib/shoko/adapters/book_sources/epub_finder/scanner_context.rb +28 -0
  35. data/lib/shoko/adapters/book_sources/epub_finder.rb +161 -0
  36. data/lib/shoko/adapters/book_sources/epub_importer.rb +268 -0
  37. data/lib/shoko/adapters/book_sources/gutendex_client.rb +150 -0
  38. data/lib/shoko/adapters/book_sources/library_scanner.rb +93 -0
  39. data/lib/shoko/adapters/book_sources/source_fingerprint.rb +57 -0
  40. data/lib/shoko/adapters/input/annotations/mouse_handler.rb +84 -0
  41. data/lib/shoko/adapters/input/command_bridge.rb +148 -0
  42. data/lib/shoko/adapters/input/command_factory.rb +255 -0
  43. data/lib/shoko/adapters/input/commands.rb +60 -0
  44. data/lib/shoko/adapters/input/dispatcher.rb +69 -0
  45. data/lib/shoko/adapters/input/input_controller.rb +250 -0
  46. data/lib/shoko/adapters/input/key_definitions.rb +108 -0
  47. data/lib/shoko/adapters/input/validators/file_path_validator.rb +81 -0
  48. data/lib/shoko/adapters/input/validators/terminal_size_validator.rb +76 -0
  49. data/lib/shoko/adapters/monitoring/logger.rb +150 -0
  50. data/lib/shoko/adapters/monitoring/perf_tracer.rb +183 -0
  51. data/lib/shoko/adapters/monitoring/performance_monitor.rb +110 -0
  52. data/lib/shoko/adapters/output/clipboard/clipboard_service.rb +125 -0
  53. data/lib/shoko/adapters/output/formatting/formatting_service/line_assembler/image_builder.rb +149 -0
  54. data/lib/shoko/adapters/output/formatting/formatting_service/line_assembler/text_wrapper.rb +149 -0
  55. data/lib/shoko/adapters/output/formatting/formatting_service/line_assembler/tokenizer.rb +91 -0
  56. data/lib/shoko/adapters/output/formatting/formatting_service/line_assembler.rb +144 -0
  57. data/lib/shoko/adapters/output/formatting/formatting_service/plain_lines_builder.rb +54 -0
  58. data/lib/shoko/adapters/output/formatting/formatting_service.rb +247 -0
  59. data/lib/shoko/adapters/output/formatting/wrapping_service.rb +228 -0
  60. data/lib/shoko/adapters/output/instrumentation_service.rb +52 -0
  61. data/lib/shoko/adapters/output/kitty/image_transcoder.rb +71 -0
  62. data/lib/shoko/adapters/output/kitty/kitty_graphics.rb +114 -0
  63. data/lib/shoko/adapters/output/kitty/kitty_image_renderer.rb +239 -0
  64. data/lib/shoko/adapters/output/kitty/kitty_unicode_placeholders.rb +139 -0
  65. data/lib/shoko/adapters/output/kitty/kitty_unicode_placeholders_diacritic_codepoints.txt +26 -0
  66. data/lib/shoko/adapters/output/notification_service.rb +58 -0
  67. data/lib/shoko/adapters/output/render_registry.rb +45 -0
  68. data/lib/shoko/adapters/output/rendering/models/line_geometry.rb +60 -0
  69. data/lib/shoko/adapters/output/rendering/models/page_rendering_context.rb +22 -0
  70. data/lib/shoko/adapters/output/rendering/models/render_params.rb +28 -0
  71. data/lib/shoko/adapters/output/rendering/models/rendering_context.rb +58 -0
  72. data/lib/shoko/adapters/output/terminal/buffer.rb +275 -0
  73. data/lib/shoko/adapters/output/terminal/constants/terminal_defaults.rb +11 -0
  74. data/lib/shoko/adapters/output/terminal/input/decoder.rb +347 -0
  75. data/lib/shoko/adapters/output/terminal/input.rb +161 -0
  76. data/lib/shoko/adapters/output/terminal/output.rb +105 -0
  77. data/lib/shoko/adapters/output/terminal/terminal.rb +167 -0
  78. data/lib/shoko/adapters/output/terminal/terminal_sanitizer.rb +243 -0
  79. data/lib/shoko/adapters/output/terminal/terminal_service.rb +138 -0
  80. data/lib/shoko/adapters/output/terminal/text_metrics.rb +273 -0
  81. data/lib/shoko/adapters/output/ui/builders/page_setup_builder.rb +47 -0
  82. data/lib/shoko/adapters/output/ui/components/annotation_editor_overlay/footer_renderer.rb +80 -0
  83. data/lib/shoko/adapters/output/ui/components/annotation_editor_overlay/geometry.rb +61 -0
  84. data/lib/shoko/adapters/output/ui/components/annotation_editor_overlay/note_renderer.rb +86 -0
  85. data/lib/shoko/adapters/output/ui/components/annotation_editor_overlay_component.rb +234 -0
  86. data/lib/shoko/adapters/output/ui/components/annotations_overlay/list_renderer.rb +142 -0
  87. data/lib/shoko/adapters/output/ui/components/annotations_overlay_component.rb +185 -0
  88. data/lib/shoko/adapters/output/ui/components/base_component.rb +110 -0
  89. data/lib/shoko/adapters/output/ui/components/component_interface.rb +80 -0
  90. data/lib/shoko/adapters/output/ui/components/content_component.rb +61 -0
  91. data/lib/shoko/adapters/output/ui/components/enhanced_popup_menu.rb +191 -0
  92. data/lib/shoko/adapters/output/ui/components/footer_component.rb +120 -0
  93. data/lib/shoko/adapters/output/ui/components/header_component.rb +46 -0
  94. data/lib/shoko/adapters/output/ui/components/layouts/horizontal.rb +63 -0
  95. data/lib/shoko/adapters/output/ui/components/layouts/vertical.rb +73 -0
  96. data/lib/shoko/adapters/output/ui/components/main_menu_component.rb +103 -0
  97. data/lib/shoko/adapters/output/ui/components/reading/base_view_renderer.rb +199 -0
  98. data/lib/shoko/adapters/output/ui/components/reading/config_helpers.rb +42 -0
  99. data/lib/shoko/adapters/output/ui/components/reading/help_renderer.rb +62 -0
  100. data/lib/shoko/adapters/output/ui/components/reading/inline_segment_highlighter.rb +144 -0
  101. data/lib/shoko/adapters/output/ui/components/reading/kitty_image_line_renderer.rb +262 -0
  102. data/lib/shoko/adapters/output/ui/components/reading/line_content_composer.rb +114 -0
  103. data/lib/shoko/adapters/output/ui/components/reading/line_drawer.rb +87 -0
  104. data/lib/shoko/adapters/output/ui/components/reading/line_geometry_builder.rb +41 -0
  105. data/lib/shoko/adapters/output/ui/components/reading/rendered_lines_recorder.rb +64 -0
  106. data/lib/shoko/adapters/output/ui/components/reading/single_view_renderer.rb +156 -0
  107. data/lib/shoko/adapters/output/ui/components/reading/split_view_renderer.rb +221 -0
  108. data/lib/shoko/adapters/output/ui/components/reading/view_renderer_factory.rb +20 -0
  109. data/lib/shoko/adapters/output/ui/components/reading/wrapped_lines_fetcher.rb +139 -0
  110. data/lib/shoko/adapters/output/ui/components/rect.rb +15 -0
  111. data/lib/shoko/adapters/output/ui/components/render_style.rb +84 -0
  112. data/lib/shoko/adapters/output/ui/components/screen_component.rb +24 -0
  113. data/lib/shoko/adapters/output/ui/components/screens/annotation_detail_screen_component.rb +175 -0
  114. data/lib/shoko/adapters/output/ui/components/screens/annotation_edit_screen_component.rb +221 -0
  115. data/lib/shoko/adapters/output/ui/components/screens/annotation_editor_screen_component.rb +205 -0
  116. data/lib/shoko/adapters/output/ui/components/screens/annotation_rendering_helpers.rb +190 -0
  117. data/lib/shoko/adapters/output/ui/components/screens/annotations_screen_component.rb +266 -0
  118. data/lib/shoko/adapters/output/ui/components/screens/base_screen_component.rb +49 -0
  119. data/lib/shoko/adapters/output/ui/components/screens/browse_screen_component.rb +319 -0
  120. data/lib/shoko/adapters/output/ui/components/screens/download_books_screen_component.rb +340 -0
  121. data/lib/shoko/adapters/output/ui/components/screens/library_screen_component.rb +205 -0
  122. data/lib/shoko/adapters/output/ui/components/screens/loading_overlay_component.rb +49 -0
  123. data/lib/shoko/adapters/output/ui/components/screens/menu_screen_component.rb +107 -0
  124. data/lib/shoko/adapters/output/ui/components/screens/settings_screen_component.rb +238 -0
  125. data/lib/shoko/adapters/output/ui/components/sidebar/annotations_tab_renderer.rb +159 -0
  126. data/lib/shoko/adapters/output/ui/components/sidebar/bookmarks_tab_renderer.rb +139 -0
  127. data/lib/shoko/adapters/output/ui/components/sidebar/tab_header_component.rb +157 -0
  128. data/lib/shoko/adapters/output/ui/components/sidebar/toc_tab_renderer.rb +111 -0
  129. data/lib/shoko/adapters/output/ui/components/sidebar/toc_tab_support.rb +1606 -0
  130. data/lib/shoko/adapters/output/ui/components/sidebar_panel_component.rb +217 -0
  131. data/lib/shoko/adapters/output/ui/components/surface.rb +88 -0
  132. data/lib/shoko/adapters/output/ui/components/tooltip_overlay_component.rb +224 -0
  133. data/lib/shoko/adapters/output/ui/components/ui/box_drawer.rb +32 -0
  134. data/lib/shoko/adapters/output/ui/components/ui/list_helpers.rb +33 -0
  135. data/lib/shoko/adapters/output/ui/components/ui/overlay_layout.rb +79 -0
  136. data/lib/shoko/adapters/output/ui/components/ui/text_utils.rb +46 -0
  137. data/lib/shoko/adapters/output/ui/constants/highlighting.rb +21 -0
  138. data/lib/shoko/adapters/output/ui/constants/messages.rb +12 -0
  139. data/lib/shoko/adapters/output/ui/constants/themes.rb +79 -0
  140. data/lib/shoko/adapters/output/ui/constants/ui_constants.rb +85 -0
  141. data/lib/shoko/adapters/output/ui/rendering/frame_coordinator.rb +42 -0
  142. data/lib/shoko/adapters/output/ui/rendering/reader_render_coordinator.rb +169 -0
  143. data/lib/shoko/adapters/output/ui/rendering/render_pipeline.rb +55 -0
  144. data/lib/shoko/adapters/storage/atomic_file_writer.rb +43 -0
  145. data/lib/shoko/adapters/storage/background_worker.rb +66 -0
  146. data/lib/shoko/adapters/storage/book_cache_pipeline.rb +653 -0
  147. data/lib/shoko/adapters/storage/cache/epub/memory_cache.rb +99 -0
  148. data/lib/shoko/adapters/storage/cache/epub/persistence.rb +131 -0
  149. data/lib/shoko/adapters/storage/cache/epub/serializer/deserialize.rb +225 -0
  150. data/lib/shoko/adapters/storage/cache/epub/serializer/helpers.rb +63 -0
  151. data/lib/shoko/adapters/storage/cache/epub/serializer/serialize.rb +83 -0
  152. data/lib/shoko/adapters/storage/cache/epub/serializer.rb +5 -0
  153. data/lib/shoko/adapters/storage/cache/epub/source_reference.rb +58 -0
  154. data/lib/shoko/adapters/storage/cache_paths.rb +21 -0
  155. data/lib/shoko/adapters/storage/cache_pointer_manager.rb +60 -0
  156. data/lib/shoko/adapters/storage/config_paths.rb +30 -0
  157. data/lib/shoko/adapters/storage/epub_cache.rb +195 -0
  158. data/lib/shoko/adapters/storage/file_writer_service.rb +47 -0
  159. data/lib/shoko/adapters/storage/json_cache_store/chapters.rb +141 -0
  160. data/lib/shoko/adapters/storage/json_cache_store/layouts.rb +67 -0
  161. data/lib/shoko/adapters/storage/json_cache_store/manifest.rb +42 -0
  162. data/lib/shoko/adapters/storage/json_cache_store/payload_helpers.rb +113 -0
  163. data/lib/shoko/adapters/storage/json_cache_store/resources.rb +84 -0
  164. data/lib/shoko/adapters/storage/json_cache_store.rb +167 -0
  165. data/lib/shoko/adapters/storage/lazy_file_string.rb +65 -0
  166. data/lib/shoko/adapters/storage/pagination_cache.rb +127 -0
  167. data/lib/shoko/adapters/storage/recent_files.rb +78 -0
  168. data/lib/shoko/adapters/storage/repositories/annotation_repository.rb +182 -0
  169. data/lib/shoko/adapters/storage/repositories/base_repository.rb +81 -0
  170. data/lib/shoko/adapters/storage/repositories/bookmark_repository.rb +132 -0
  171. data/lib/shoko/adapters/storage/repositories/cached_library_repository.rb +129 -0
  172. data/lib/shoko/adapters/storage/repositories/config_repository.rb +262 -0
  173. data/lib/shoko/adapters/storage/repositories/progress_repository.rb +166 -0
  174. data/lib/shoko/adapters/storage/repositories/storage/annotation_file_store.rb +128 -0
  175. data/lib/shoko/adapters/storage/repositories/storage/bookmark_file_store.rb +109 -0
  176. data/lib/shoko/adapters/storage/repositories/storage/file_store_utils.rb +20 -0
  177. data/lib/shoko/adapters/storage/repositories/storage/progress_file_store.rb +59 -0
  178. data/lib/shoko/application/annotation_editor_overlay_session.rb +138 -0
  179. data/lib/shoko/application/cli.rb +134 -0
  180. data/lib/shoko/application/controllers/menu/input_controller.rb +189 -0
  181. data/lib/shoko/application/controllers/menu/state_controller.rb +642 -0
  182. data/lib/shoko/application/controllers/menu_controller.rb +469 -0
  183. data/lib/shoko/application/controllers/mouseable_reader.rb +377 -0
  184. data/lib/shoko/application/controllers/reader_controller.rb +449 -0
  185. data/lib/shoko/application/controllers/state_controller.rb +410 -0
  186. data/lib/shoko/application/controllers/ui_controller.rb +782 -0
  187. data/lib/shoko/application/dependency_container.rb +301 -0
  188. data/lib/shoko/application/infrastructure/event_bus.rb +80 -0
  189. data/lib/shoko/application/infrastructure/observer_state_store.rb +136 -0
  190. data/lib/shoko/application/infrastructure/state_store.rb +413 -0
  191. data/lib/shoko/application/main_menu/menu_progress_presenter.rb +83 -0
  192. data/lib/shoko/application/pending_jump_handler.rb +122 -0
  193. data/lib/shoko/application/reader_lifecycle.rb +65 -0
  194. data/lib/shoko/application/reader_startup_orchestrator.rb +113 -0
  195. data/lib/shoko/application/selectors/config_selectors.rb +62 -0
  196. data/lib/shoko/application/selectors/menu_selectors.rb +62 -0
  197. data/lib/shoko/application/selectors/reader_selectors.rb +186 -0
  198. data/lib/shoko/application/state/actions/base_action.rb +24 -0
  199. data/lib/shoko/application/state/actions/quit_to_menu_action.rb +16 -0
  200. data/lib/shoko/application/state/actions/switch_reader_mode_action.rb +22 -0
  201. data/lib/shoko/application/state/actions/toggle_view_mode_action.rb +31 -0
  202. data/lib/shoko/application/state/actions/update_annotation_editor_overlay_action.rb +27 -0
  203. data/lib/shoko/application/state/actions/update_annotations_action.rb +20 -0
  204. data/lib/shoko/application/state/actions/update_annotations_overlay_action.rb +27 -0
  205. data/lib/shoko/application/state/actions/update_bookmarks_action.rb +20 -0
  206. data/lib/shoko/application/state/actions/update_chapter_action.rb +24 -0
  207. data/lib/shoko/application/state/actions/update_config_action.rb +22 -0
  208. data/lib/shoko/application/state/actions/update_field_helpers.rb +26 -0
  209. data/lib/shoko/application/state/actions/update_menu_action.rb +21 -0
  210. data/lib/shoko/application/state/actions/update_message_action.rb +35 -0
  211. data/lib/shoko/application/state/actions/update_page_action.rb +21 -0
  212. data/lib/shoko/application/state/actions/update_pagination_state_action.rb +21 -0
  213. data/lib/shoko/application/state/actions/update_popup_menu_action.rb +27 -0
  214. data/lib/shoko/application/state/actions/update_reader_meta_action.rb +21 -0
  215. data/lib/shoko/application/state/actions/update_reader_mode_action.rb +20 -0
  216. data/lib/shoko/application/state/actions/update_rendered_lines_action.rb +40 -0
  217. data/lib/shoko/application/state/actions/update_selection_action.rb +27 -0
  218. data/lib/shoko/application/state/actions/update_selections_action.rb +21 -0
  219. data/lib/shoko/application/state/actions/update_sidebar_action.rb +34 -0
  220. data/lib/shoko/application/state/actions/update_ui_loading_action.rb +23 -0
  221. data/lib/shoko/application/ui/reader_view_model_builder.rb +74 -0
  222. data/lib/shoko/application/ui/view_models/reader_view_model.rb +177 -0
  223. data/lib/shoko/application/unified_application.rb +48 -0
  224. data/lib/shoko/application/use_cases/catalog_service.rb +117 -0
  225. data/lib/shoko/application/use_cases/commands/annotation_editor_commands.rb +105 -0
  226. data/lib/shoko/application/use_cases/commands/application_commands.rb +208 -0
  227. data/lib/shoko/application/use_cases/commands/base_command.rb +166 -0
  228. data/lib/shoko/application/use_cases/commands/bookmark_commands.rb +114 -0
  229. data/lib/shoko/application/use_cases/commands/conditional_navigation_commands.rb +57 -0
  230. data/lib/shoko/application/use_cases/commands/menu_commands.rb +170 -0
  231. data/lib/shoko/application/use_cases/commands/navigation_commands.rb +183 -0
  232. data/lib/shoko/application/use_cases/commands/reader_commands.rb +46 -0
  233. data/lib/shoko/application/use_cases/commands/sidebar_commands.rb +55 -0
  234. data/lib/shoko/application/use_cases/settings_service.rb +123 -0
  235. data/lib/shoko/core/events/annotation_events.rb +94 -0
  236. data/lib/shoko/core/events/base_domain_event.rb +169 -0
  237. data/lib/shoko/core/events/bookmark_events.rb +41 -0
  238. data/lib/shoko/core/events/domain_event_bus.rb +163 -0
  239. data/lib/shoko/core/events/progress_events.rb +108 -0
  240. data/lib/shoko/core/models/bookmark.rb +36 -0
  241. data/lib/shoko/core/models/bookmark_data.rb +10 -0
  242. data/lib/shoko/core/models/chapter.rb +25 -0
  243. data/lib/shoko/core/models/content_block.rb +44 -0
  244. data/lib/shoko/core/models/reader_settings.rb +20 -0
  245. data/lib/shoko/core/models/selection_anchor.rb +73 -0
  246. data/lib/shoko/core/models/toc_entry.rb +14 -0
  247. data/lib/shoko/core/ports/annotation_repository.rb +0 -0
  248. data/lib/shoko/core/ports/book_repository.rb +0 -0
  249. data/lib/shoko/core/ports/book_source.rb +0 -0
  250. data/lib/shoko/core/ports/bookmark_repository.rb +0 -0
  251. data/lib/shoko/core/ports/cache.rb +0 -0
  252. data/lib/shoko/core/ports/input_handler.rb +0 -0
  253. data/lib/shoko/core/ports/renderer.rb +0 -0
  254. data/lib/shoko/core/ports/storage.rb +0 -0
  255. data/lib/shoko/core/services/annotation_service.rb +102 -0
  256. data/lib/shoko/core/services/base_service.rb +60 -0
  257. data/lib/shoko/core/services/bookmark_service.rb +267 -0
  258. data/lib/shoko/core/services/coordinate_service.rb +265 -0
  259. data/lib/shoko/core/services/layout_service.rb +95 -0
  260. data/lib/shoko/core/services/navigation/absolute_change_applier.rb +96 -0
  261. data/lib/shoko/core/services/navigation/absolute_layout.rb +101 -0
  262. data/lib/shoko/core/services/navigation/absolute_strategy.rb +179 -0
  263. data/lib/shoko/core/services/navigation/context_builder.rb +52 -0
  264. data/lib/shoko/core/services/navigation/context_helpers.rb +63 -0
  265. data/lib/shoko/core/services/navigation/dynamic_change_applier.rb +50 -0
  266. data/lib/shoko/core/services/navigation/dynamic_strategy.rb +51 -0
  267. data/lib/shoko/core/services/navigation/image_offset_snapper.rb +150 -0
  268. data/lib/shoko/core/services/navigation/nav_context.rb +27 -0
  269. data/lib/shoko/core/services/navigation/state_updater.rb +29 -0
  270. data/lib/shoko/core/services/navigation/strategy_factory.rb +20 -0
  271. data/lib/shoko/core/services/navigation_service.rb +150 -0
  272. data/lib/shoko/core/services/page_calculator_service.rb +242 -0
  273. data/lib/shoko/core/services/pagination/internal/absolute_page_map_builder.rb +28 -0
  274. data/lib/shoko/core/services/pagination/internal/chapter_cache.rb +60 -0
  275. data/lib/shoko/core/services/pagination/internal/dynamic_page_map_builder.rb +157 -0
  276. data/lib/shoko/core/services/pagination/internal/layout_metrics_calculator.rb +73 -0
  277. data/lib/shoko/core/services/pagination/internal/page_hydrator.rb +145 -0
  278. data/lib/shoko/core/services/pagination/internal/pagination_workflow.rb +152 -0
  279. data/lib/shoko/core/services/pagination/page_info_calculator.rb +247 -0
  280. data/lib/shoko/core/services/pagination/pagination_cache_preloader.rb +173 -0
  281. data/lib/shoko/core/services/pagination/pagination_coordinator.rb +202 -0
  282. data/lib/shoko/core/services/pagination/pagination_orchestrator.rb +291 -0
  283. data/lib/shoko/core/services/pagination.rb +10 -0
  284. data/lib/shoko/core/services/progress_helper.rb +22 -0
  285. data/lib/shoko/core/services/selection_service.rb +126 -0
  286. data/lib/shoko/core/validator.rb +76 -0
  287. data/lib/shoko/shared/errors.rb +97 -0
  288. data/lib/shoko/shared/version.rb +5 -0
  289. data/lib/shoko/test_support/terminal_double.rb +175 -0
  290. data/lib/shoko/test_support/test_mode.rb +78 -0
  291. data/lib/shoko.rb +279 -0
  292. data/lib/zip.rb +732 -0
  293. data/zip.rb +5 -0
  294. metadata +370 -0
@@ -0,0 +1,661 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'cgi'
4
+ require 'rexml/document'
5
+ require 'rexml/parsers/pullparser'
6
+
7
+ require_relative '../../../../core/models/content_block.rb'
8
+ require_relative 'html_processor'
9
+ require_relative '../../../output/terminal/terminal_sanitizer.rb'
10
+ require_relative '../../../../shared/errors.rb'
11
+ require_relative '../../../monitoring/logger.rb'
12
+
13
+ module Shoko
14
+ module Adapters::BookSources::Epub::Parsers
15
+ # Parses XHTML content into semantic content blocks + text segments.
16
+ class XHTMLContentParser
17
+ TAG_SETS = begin
18
+ block_types = %w[p div section article aside header footer figure figcaption main].freeze
19
+ heading_types = %w[h1 h2 h3 h4 h5 h6].freeze
20
+ list_types = %w[ul ol].freeze
21
+ list_item = 'li'
22
+ blockquote = 'blockquote'
23
+ pre = 'pre'
24
+ hr = 'hr'
25
+ br = 'br'
26
+ img = 'img'
27
+ table = 'table'
28
+ block_level_elements = (
29
+ block_types +
30
+ heading_types +
31
+ list_types +
32
+ [
33
+ list_item,
34
+ blockquote,
35
+ pre,
36
+ hr,
37
+ table,
38
+ ]
39
+ ).freeze
40
+
41
+ {
42
+ inline_newline: "\n",
43
+ block_types: block_types,
44
+ heading_types: heading_types,
45
+ list_types: list_types,
46
+ list_item: list_item,
47
+ blockquote: blockquote,
48
+ pre: pre,
49
+ hr: hr,
50
+ br: br,
51
+ img: img,
52
+ table: table,
53
+ block_level_elements: block_level_elements,
54
+ }.freeze
55
+ end
56
+
57
+ WHITESPACE_PATTERN = /\s+/
58
+ XML_ENTITY_NAMES = %w[amp lt gt apos quot].freeze
59
+
60
+ def initialize(html)
61
+ @html = html.to_s
62
+ @segment_builder = XHTMLSegmentBuilder.new(tag_sets: TAG_SETS, whitespace_pattern: WHITESPACE_PATTERN)
63
+ @block_builder = XHTMLBlockBuilder.new(segment_builder: @segment_builder, tag_sets: TAG_SETS)
64
+ end
65
+
66
+ def parse
67
+ return [] if html_blank?
68
+
69
+ body = parse_body
70
+ return [] unless body
71
+
72
+ build_blocks(body)
73
+ rescue REXML::ParseException => e
74
+ Adapters::Monitoring::Logger.error('Failed to parse chapter HTML', error: e.message)
75
+ fallback_blocks
76
+ end
77
+
78
+ private
79
+
80
+ def html_blank?
81
+ @html.strip.empty?
82
+ end
83
+
84
+ def build_blocks(body)
85
+ blocks = XHTMLContentTraversal.new(block_builder: @block_builder, tag_sets: TAG_SETS).build(body)
86
+ ensure_blocks_present(body, blocks)
87
+ blocks
88
+ end
89
+
90
+ def parse_body
91
+ document = parse_document(@html)
92
+ return nil unless document
93
+
94
+ find_body(document) || document.root
95
+ end
96
+
97
+ def parse_document(text)
98
+ safe = Shoko::Adapters::Output::Terminal::TerminalSanitizer.sanitize_xml_source(text.to_s, preserve_newlines: true,
99
+ preserve_tabs: true)
100
+ sanitized = sanitize_for_xml(safe)
101
+ # Preserve whitespace-only text nodes so inline element boundaries
102
+ # don't accidentally collapse words (e.g., <em>foo</em>\n<em>bar</em>).
103
+ # We normalize whitespace later in `normalize_text`.
104
+ REXML::Document.new(sanitized)
105
+ end
106
+
107
+ def sanitize_for_xml(text)
108
+ text.gsub(/&([A-Za-z][A-Za-z0-9]+);/) do |match|
109
+ sanitize_entity(match)
110
+ end
111
+ end
112
+
113
+ def sanitize_entity(match)
114
+ name = Regexp.last_match(1)
115
+ return match if XML_ENTITY_NAMES.include?(name)
116
+
117
+ decoded = Shoko::Adapters::BookSources::Epub::Parsers::HTMLProcessor.decode_entities(match)
118
+ decoded == match ? "&amp;#{name};" : decoded
119
+ end
120
+
121
+ def find_body(document)
122
+ root = document&.root
123
+ return nil unless root
124
+
125
+ elements = root.elements
126
+ elements['*[local-name()="body"]'] ||
127
+ elements['body'] ||
128
+ elements['BODY']
129
+ end
130
+
131
+ def ensure_blocks_present(body, blocks)
132
+ text_content = body.texts.join.strip
133
+ return if text_content.empty? || blocks.any?
134
+
135
+ Adapters::Monitoring::Logger.error(
136
+ 'Formatting produced no blocks',
137
+ source: 'XHTMLContentParser',
138
+ sample: text_content.slice(0, 120)
139
+ )
140
+ raise Shoko::FormattingError.new('chapter', 'normalized block list was empty')
141
+ end
142
+
143
+ def fallback_blocks
144
+ text = Shoko::Adapters::BookSources::Epub::Parsers::HTMLProcessor.html_to_text(@html)
145
+ return [] if text.to_s.strip.empty?
146
+
147
+ paragraphs = text.split(/\n{2,}/).map(&:strip).reject(&:empty?)
148
+ paragraphs.map do |paragraph|
149
+ Shoko::Core::Models::ContentBlock.new(
150
+ type: :paragraph,
151
+ segments: [@segment_builder.text_segment(paragraph)],
152
+ metadata: {}
153
+ )
154
+ end
155
+ rescue StandardError
156
+ []
157
+ end
158
+ end
159
+
160
+ # Traverses elements and emits block structures.
161
+ class XHTMLContentTraversal
162
+ # Traversal state for list nesting and blockquote context.
163
+ Context = Struct.new(:list_stack, :in_blockquote, keyword_init: true)
164
+ private_constant :Context
165
+
166
+ # Tracks ordered list numbering as the traversal enters list items.
167
+ ListContext = Struct.new(:ordered, :index, keyword_init: true) do
168
+ def marker
169
+ ordered ? "#{index}." : '•'
170
+ end
171
+
172
+ def advance
173
+ self.index += 1 if ordered
174
+ end
175
+ end
176
+ private_constant :ListContext
177
+
178
+ def initialize(block_builder:, tag_sets:)
179
+ @block_builder = block_builder
180
+ @tag_sets = tag_sets
181
+ @blocks = []
182
+ end
183
+
184
+ def build(root)
185
+ context = Context.new(list_stack: [], in_blockquote: false)
186
+ traverse_children(root, context)
187
+ @block_builder.compact_blocks(@blocks)
188
+ end
189
+
190
+ private
191
+
192
+ attr_reader :block_builder, :tag_sets
193
+
194
+ def traverse_children(node, context)
195
+ node.children.each { |child| handle_node(child, context) }
196
+ end
197
+
198
+ def handle_node(child, context)
199
+ if child.is_a?(REXML::Element)
200
+ handle_element(child, context)
201
+ elsif child.is_a?(REXML::Text)
202
+ append_text_block(child, context)
203
+ end
204
+ end
205
+
206
+ def handle_element(element, context)
207
+ name = element.name.downcase
208
+ return if skip_element?(name)
209
+
210
+ return if append_block_result(block_builder.block_for(name, element, context))
211
+ return if handle_list_element(name, element, context)
212
+ return if handle_container_element(name, element, context)
213
+
214
+ traverse_children(element, context)
215
+ end
216
+
217
+ def append_block_result(result)
218
+ return false unless result
219
+
220
+ if result.is_a?(Array)
221
+ result.each { |block| append_block(block) }
222
+ else
223
+ append_block(result)
224
+ end
225
+ true
226
+ end
227
+
228
+ def handle_list_element(name, element, context)
229
+ list_types = tag_sets[:list_types]
230
+ if list_types.include?(name)
231
+ traverse_list(element, context, ordered: name == 'ol')
232
+ return true
233
+ end
234
+
235
+ return false unless name == tag_sets[:list_item]
236
+
237
+ append_block(block_builder.list_item(element, context))
238
+ true
239
+ end
240
+
241
+ def handle_container_element(name, element, context)
242
+ block_types = tag_sets[:block_types]
243
+ block_level = tag_sets[:block_level_elements]
244
+ return false unless block_types.include?(name) || block_builder.block_via_style?(element)
245
+
246
+ if block_builder.contains_block_children?(element, block_level)
247
+ traverse_children(element, context)
248
+ else
249
+ append_block(block_builder.paragraph(element, context))
250
+ end
251
+ true
252
+ end
253
+
254
+ def append_text_block(text_node, context)
255
+ segments = block_builder.segments_from_text(text_node.value)
256
+ append_block(block_builder.paragraph_from_segments(segments, context)) if segments
257
+ end
258
+
259
+ def traverse_list(element, context, ordered:)
260
+ list_context = ListContext.new(ordered: ordered, index: ordered ? 1 : nil)
261
+ new_context = Context.new(list_stack: context.list_stack + [list_context],
262
+ in_blockquote: context.in_blockquote)
263
+ element.each_element { |child| handle_element(child, new_context) }
264
+ end
265
+
266
+ def append_block(block)
267
+ @blocks << block if block
268
+ end
269
+
270
+ def skip_element?(name)
271
+ %w[script style].include?(name)
272
+ end
273
+ end
274
+
275
+ # Builds content blocks and metadata from parsed elements.
276
+ class XHTMLBlockBuilder
277
+ ContentBlock = Shoko::Core::Models::ContentBlock
278
+
279
+ def initialize(segment_builder:, tag_sets:)
280
+ @segments = segment_builder
281
+ @tag_sets = tag_sets
282
+ end
283
+
284
+ def block_for(name, element, context)
285
+ heading = heading_block(name, element, context)
286
+ return heading if heading
287
+
288
+ case name
289
+ when @tag_sets[:blockquote]
290
+ quote_block(element, context)
291
+ when @tag_sets[:img]
292
+ image_block(element, context)
293
+ when @tag_sets[:pre]
294
+ preformatted_block(element, context)
295
+ when @tag_sets[:hr]
296
+ separator_block(context)
297
+ when @tag_sets[:table]
298
+ table_blocks(element, context)
299
+ when @tag_sets[:br]
300
+ break_block
301
+ end
302
+ end
303
+
304
+ def list_item(element, context)
305
+ list_stack = context.list_stack
306
+ list_context = list_stack.last
307
+ segments = segments_for(element)
308
+ marker = list_context ? list_context.marker : '•'
309
+ list_context&.advance
310
+
311
+ level = list_stack.length
312
+ metadata = metadata_with_quote(context, marker: marker, level: level)
313
+ ContentBlock.new(type: :list_item, segments: segments, level: level, metadata: metadata)
314
+ end
315
+
316
+ def paragraph(element, context)
317
+ segments = segments_for(element)
318
+ return nil if segments.empty?
319
+
320
+ ContentBlock.new(type: :paragraph, segments: segments, metadata: metadata_with_quote(context))
321
+ end
322
+
323
+ def paragraph_from_segments(segments, context)
324
+ return nil if segments.nil? || segments.empty?
325
+
326
+ ContentBlock.new(type: :paragraph, segments: segments, metadata: metadata_with_quote(context))
327
+ end
328
+
329
+ def segments_from_text(text)
330
+ segment = @segments.text_segment(text)
331
+ segments = @segments.finalize_segments([segment])
332
+ segments.empty? ? nil : segments
333
+ end
334
+
335
+ def compact_blocks(blocks)
336
+ blocks.reject do |block|
337
+ next false if block&.type == :break
338
+
339
+ block.nil? || block.segments.empty? || block.text.strip.empty?
340
+ end
341
+ end
342
+
343
+ def block_via_style?(element)
344
+ style = element.attributes['style'].to_s
345
+ /display\s*:\s*(block|list-item)/i.match?(style)
346
+ end
347
+
348
+ def contains_block_children?(element, block_level_elements)
349
+ element.children.any? do |child|
350
+ next false unless child.is_a?(REXML::Element)
351
+
352
+ name = child.name.to_s.downcase
353
+ block_level_elements.include?(name) || block_via_style?(child)
354
+ end
355
+ end
356
+
357
+ private
358
+
359
+ def heading_block(name, element, context)
360
+ heading_types = @tag_sets[:heading_types]
361
+ return nil unless heading_types.include?(name)
362
+
363
+ level = name.delete('h').to_i
364
+ segments = segments_for(element)
365
+ metadata = metadata_with_quote(context, level: level)
366
+ ContentBlock.new(type: :heading, segments: segments, level: level, metadata: metadata)
367
+ end
368
+
369
+ def quote_block(element, context)
370
+ segments = segments_for(element)
371
+ return nil if segments.empty?
372
+
373
+ metadata = metadata_with_quote(context, quoted: true)
374
+ ContentBlock.new(type: :quote, segments: segments, metadata: metadata)
375
+ end
376
+
377
+ def preformatted_block(element, context)
378
+ target = code_child_for(element) || element
379
+ text = target.texts.join
380
+ return nil if text.to_s.empty?
381
+
382
+ metadata = metadata_with_quote(context, preserve_whitespace: true)
383
+ segment = @segments.text_segment(text, code: true, preserve_whitespace: true)
384
+ ContentBlock.new(type: :code, segments: [segment], metadata: metadata)
385
+ end
386
+
387
+ def image_block(element, context)
388
+ segments = @segments.finalize_segments([@segments.image_placeholder_segment({})])
389
+ return nil if segments.empty?
390
+
391
+ attrs = element.attributes
392
+ metadata = metadata_with_quote(context, image: { src: attrs['src'], alt: attrs['alt'] })
393
+ ContentBlock.new(type: :image, segments: segments, metadata: metadata)
394
+ end
395
+
396
+ def separator_block(context)
397
+ metadata = metadata_with_quote(context)
398
+ ContentBlock.new(
399
+ type: :separator,
400
+ segments: [@segments.text_segment('─' * 40)],
401
+ metadata: metadata
402
+ )
403
+ end
404
+
405
+ def table_blocks(element, context)
406
+ rows = collect_descendants(element, 'tr')
407
+ return [] if rows.empty?
408
+
409
+ lines = rows.filter_map { |row| table_row_text(row) }
410
+ return [] if lines.empty?
411
+
412
+ inline_newline = @tag_sets[:inline_newline]
413
+ metadata = metadata_with_quote(context, preserve_whitespace: true)
414
+ block = ContentBlock.new(
415
+ type: :table,
416
+ segments: [@segments.text_segment(lines.join(inline_newline), preserve_whitespace: true)],
417
+ metadata: metadata
418
+ )
419
+ [block]
420
+ end
421
+
422
+ def break_block
423
+ ContentBlock.new(
424
+ type: :break,
425
+ segments: [],
426
+ metadata: { spacer: true }
427
+ )
428
+ end
429
+
430
+ def segments_for(element)
431
+ @segments.finalize_segments(@segments.collect_segments(element))
432
+ end
433
+
434
+ def metadata_with_quote(context, base = {})
435
+ metadata = base.dup
436
+ metadata[:quoted] = true if context.in_blockquote
437
+ metadata
438
+ end
439
+
440
+ def code_child_for(element)
441
+ element.elements.find do |child|
442
+ child.is_a?(REXML::Element) && child.name.casecmp('code').zero?
443
+ end
444
+ end
445
+
446
+ def table_row_text(row)
447
+ cells = row.elements.each_with_object([]) do |cell, acc|
448
+ next unless table_cell?(cell)
449
+
450
+ text = @segments.collect_segments(cell).map(&:text).join.strip
451
+ acc << text unless text.empty?
452
+ end
453
+ cells.empty? ? nil : cells.join(' | ')
454
+ end
455
+
456
+ def table_cell?(element)
457
+ %w[td th].include?(element.name.downcase)
458
+ end
459
+
460
+ def collect_descendants(element, name)
461
+ results = []
462
+ element.each_element do |child|
463
+ results << child if child.name.casecmp(name).zero?
464
+ results.concat(collect_descendants(child, name))
465
+ end
466
+ results
467
+ end
468
+ end
469
+
470
+ # Collects and normalizes inline text segments.
471
+ class XHTMLSegmentBuilder
472
+ TextSegment = Shoko::Core::Models::TextSegment
473
+
474
+ STYLE_MAP = {
475
+ 'strong' => { bold: true },
476
+ 'b' => { bold: true },
477
+ 'em' => { italic: true },
478
+ 'i' => { italic: true },
479
+ 'u' => { underline: true },
480
+ 'code' => { code: true, preserve_whitespace: true },
481
+ 'kbd' => { code: true, preserve_whitespace: true },
482
+ 'samp' => { code: true, preserve_whitespace: true },
483
+ }.freeze
484
+
485
+ SPAN_STYLE_MATCHERS = {
486
+ bold: /font-weight\s*:\s*bold/i,
487
+ italic: /font-style\s*:\s*italic/i,
488
+ underline: /text-decoration\s*:\s*underline/i,
489
+ }.freeze
490
+
491
+ PLACEHOLDER_TEXT = '[Image]'
492
+
493
+ def initialize(tag_sets:, whitespace_pattern:)
494
+ @br_tag = tag_sets[:br]
495
+ @img_tag = tag_sets[:img]
496
+ @inline_newline = tag_sets[:inline_newline]
497
+ @whitespace_pattern = whitespace_pattern
498
+ end
499
+
500
+ def collect_segments(element, inherited_styles = {})
501
+ element.children.flat_map { |child| segments_for(child, inherited_styles) }
502
+ end
503
+
504
+ def text_segment(text, styles = {})
505
+ TextSegment.new(
506
+ text: normalize_text(text.to_s, styles),
507
+ styles: styles
508
+ )
509
+ end
510
+
511
+ def image_placeholder_segment(inherited_styles)
512
+ placeholder_segment(inherited_styles.merge(dim: true))
513
+ end
514
+
515
+ def inline_image_placeholder_segment(element, inherited_styles)
516
+ attrs = element.attributes
517
+ styles = inherited_styles.merge(
518
+ dim: true,
519
+ inline_image: { src: attrs['src'].to_s, alt: attrs['alt'].to_s.strip }
520
+ )
521
+ placeholder_segment(styles)
522
+ end
523
+
524
+ def finalize_segments(segments)
525
+ segs = compact_segments(segments)
526
+ return [] if segs.empty?
527
+
528
+ segs = collapse_boundary_spaces(segs)
529
+ trim_edge_whitespace(segs)
530
+ end
531
+
532
+ private
533
+
534
+ def segments_for(child, inherited_styles)
535
+ return [] unless child
536
+
537
+ if child.is_a?(REXML::Text)
538
+ segment = text_segment(child.value, inherited_styles)
539
+ segment.text.to_s.empty? ? [] : [segment]
540
+ elsif child.is_a?(REXML::Element)
541
+ segments_for_element(child, inherited_styles)
542
+ else
543
+ []
544
+ end
545
+ end
546
+
547
+ def segments_for_element(element, inherited_styles)
548
+ name = element.name.downcase
549
+ return [line_break_segment(inherited_styles)] if name == @br_tag
550
+ return [inline_image_placeholder_segment(element, inherited_styles)] if name == @img_tag
551
+
552
+ new_styles = inherited_styles.merge(styles_for(name, element))
553
+ collect_segments(element, new_styles)
554
+ end
555
+
556
+ def line_break_segment(inherited_styles)
557
+ text_segment(@inline_newline, inherited_styles.merge(break: true))
558
+ end
559
+
560
+ def styles_for(name, element)
561
+ return STYLE_MAP[name] if STYLE_MAP.key?(name)
562
+ return span_styles(element) if name == 'span'
563
+ return link_styles(element) if name == 'a'
564
+
565
+ {}
566
+ end
567
+
568
+ def link_styles(element)
569
+ { link: element.attributes['href'] }.merge(span_styles(element))
570
+ end
571
+
572
+ def span_styles(element)
573
+ style_attr = element.attributes['style']
574
+ return {} if style_attr.to_s.empty?
575
+
576
+ SPAN_STYLE_MATCHERS.each_with_object({}) do |(key, matcher), styles|
577
+ styles[key] = true if matcher.match?(style_attr)
578
+ end
579
+ end
580
+
581
+ def normalize_text(text, styles)
582
+ decoded = decode_text(text)
583
+ return decoded if preserve_whitespace?(styles)
584
+ return normalize_break(decoded) if styles[:break]
585
+
586
+ normalize_whitespace(decoded)
587
+ end
588
+
589
+ def decode_text(text)
590
+ decoded = Shoko::Adapters::BookSources::Epub::Parsers::HTMLProcessor.decode_entities(text)
591
+ Shoko::Adapters::Output::Terminal::TerminalSanitizer.sanitize(decoded, preserve_newlines: true, preserve_tabs: true)
592
+ end
593
+
594
+ def preserve_whitespace?(styles)
595
+ styles[:code] || styles[:preserve_whitespace]
596
+ end
597
+
598
+ def normalize_break(text)
599
+ text == @inline_newline ? @inline_newline : text
600
+ end
601
+
602
+ def normalize_whitespace(text)
603
+ text.delete("\r").tr("\n", ' ').gsub(@whitespace_pattern, ' ')
604
+ end
605
+
606
+ def placeholder_segment(styles)
607
+ text_segment(" #{PLACEHOLDER_TEXT} ", styles)
608
+ end
609
+
610
+ def compact_segments(segments)
611
+ Array(segments).compact.reject { |segment| segment_text(segment).empty? }
612
+ end
613
+
614
+ def collapse_boundary_spaces(segments)
615
+ out = [segments.first]
616
+ segments.drop(1).each do |segment|
617
+ previous = out.last
618
+ adjusted = adjust_leading_space(previous, segment)
619
+ next unless adjusted
620
+
621
+ out << adjusted unless segment_text(adjusted).empty?
622
+ end
623
+ out
624
+ end
625
+
626
+ def adjust_leading_space(previous, segment)
627
+ prev_text = segment_text(previous)
628
+ cur_text = segment_text(segment)
629
+ return segment unless prev_text.end_with?(' ') && cur_text.start_with?(' ')
630
+
631
+ trimmed = cur_text.sub(/\A +/, '')
632
+ return nil if trimmed.empty?
633
+
634
+ TextSegment.new(text: trimmed, styles: segment.styles)
635
+ end
636
+
637
+ def trim_edge_whitespace(segments)
638
+ segs = segments.dup
639
+ return [] if segs.empty?
640
+
641
+ segs[0] = trim_segment_start(segs[0])
642
+ segs[-1] = trim_segment_end(segs[-1])
643
+ segs.reject { |segment| segment_text(segment).empty? }
644
+ end
645
+
646
+ def trim_segment_start(segment)
647
+ text = segment_text(segment).sub(/\A\s+/, '')
648
+ TextSegment.new(text: text, styles: segment.styles)
649
+ end
650
+
651
+ def trim_segment_end(segment)
652
+ text = segment_text(segment).sub(/\s+\z/, '')
653
+ TextSegment.new(text: text, styles: segment.styles)
654
+ end
655
+
656
+ def segment_text(segment)
657
+ segment.text.to_s
658
+ end
659
+ end
660
+ end
661
+ end
@@ -0,0 +1,41 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../../../output/terminal/terminal_sanitizer.rb'
4
+
5
+ module Shoko
6
+ module Adapters::BookSources::Epub::Parsers
7
+ # Normalizes XML or XHTML text into UTF-8 and sanitizes control sequences.
8
+ module XmlTextNormalizer
9
+ module_function
10
+
11
+ def normalize(text)
12
+ bytes = String(text).dup
13
+ bytes.force_encoding(Encoding::BINARY)
14
+ bytes = bytes.delete_prefix("\xEF\xBB\xBF".b)
15
+
16
+ declared = bytes[/\A\s*<\?xml[^>]*encoding=["']([^"']+)["']/i, 1]
17
+ encoding = begin
18
+ declared ? Encoding.find(declared) : Encoding::UTF_8
19
+ rescue StandardError
20
+ Encoding::UTF_8
21
+ end
22
+
23
+ normalized = bytes.dup
24
+ normalized.force_encoding(encoding)
25
+ normalized = normalized.encode(Encoding::UTF_8, invalid: :replace, undef: :replace, replace: "\uFFFD")
26
+ normalized = normalized.delete_prefix("\uFEFF")
27
+ Shoko::Adapters::Output::Terminal::TerminalSanitizer.sanitize_xml_source(
28
+ normalized,
29
+ preserve_newlines: true,
30
+ preserve_tabs: true
31
+ )
32
+ rescue StandardError
33
+ Shoko::Adapters::Output::Terminal::TerminalSanitizer.sanitize_xml_source(
34
+ text.to_s,
35
+ preserve_newlines: true,
36
+ preserve_tabs: true
37
+ )
38
+ end
39
+ end
40
+ end
41
+ end