writebook_pdf_import 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 88b01c32f19a3690d72cd2cafee6aa8c71812925ff9933b30cf59df2fad28955
4
+ data.tar.gz: 9c73f0631abd8805a496ac6ce7e392e14ba9a688a96ab9626e3eb1869588f966
5
+ SHA512:
6
+ metadata.gz: 2285f57116079a0b1d332ae9ba686200e216acbc21716d7e384c3213111aded0ee8e4879849833a7d9805c3314f699f74852fc52e34e77333e35b2180ce5285e
7
+ data.tar.gz: caeeadbaa938ec8bc637592d92d746ef13b989d66523607f7c76b257880c37546014badf3cf6903ffe0d6bfec966560da5a27c9047e575f6cea6331a05e2f967
data/MIT-LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 albertski
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,38 @@
1
+ # WritebookPdfImport
2
+
3
+ A Rails engine that adds PDF import support to [Writebook](https://once.com/writebook). It parses a PDF and creates `Page` and `Picture` leaves in a book.
4
+
5
+ ## Usage
6
+
7
+ An **Import PDF** button is added to the book page, allowing you to upload a PDF directly from the Writebook interface.
8
+
9
+ ![Import PDF button on the book page](docs/import_pdf.png)
10
+
11
+ ## Installation
12
+
13
+ Add to your `Gemfile`:
14
+
15
+ ```ruby
16
+ gem "writebook_pdf_import"
17
+ ```
18
+
19
+ ## What it does
20
+
21
+ - Extracts text from each PDF page and creates a `Page` leaf
22
+ - Extracts embedded images (JPEG, JPEG 2000, raw RGB/grayscale/CMYK) and creates `Picture` leaves
23
+ - Uses the first line of text as the page title if it's 3–100 characters, otherwise falls back to "Page #"
24
+ - Skips blank pages
25
+
26
+ ## Dependencies
27
+
28
+ - `pdf-reader ~> 2.12`
29
+ - `mini_magick` (for JPEG 2000 and raw image conversion)
30
+ - ImageMagick must be installed on the system
31
+
32
+ ## Background Jobs
33
+
34
+ PDF imports are processed asynchronously using Active Job. You must have Redis and a Solid Queue worker running:
35
+
36
+ ```bash
37
+ redis-server
38
+ ```
@@ -0,0 +1,22 @@
1
+ import { Controller } from "@hotwired/stimulus"
2
+
3
+ export default class extends Controller {
4
+ connect() {
5
+ document.addEventListener("turbo:before-morph-element", this._closeOnMorph)
6
+ }
7
+
8
+ disconnect() {
9
+ document.removeEventListener("turbo:before-morph-element", this._closeOnMorph)
10
+ }
11
+
12
+ _closeOnMorph = (event) => {
13
+ if (
14
+ event.target === this.element &&
15
+ this.element.open &&
16
+ event.detail.newElement &&
17
+ !event.detail.newElement.hasAttribute("open")
18
+ ) {
19
+ this.element.close()
20
+ }
21
+ }
22
+ }
@@ -0,0 +1,49 @@
1
+ class Books::ImportsController < ApplicationController
2
+ include BookScoped
3
+
4
+ before_action :ensure_editable
5
+
6
+ MAX_FILE_SIZE = 500.megabytes
7
+
8
+ def create
9
+ return unless valid_pdf?(params[:pdf])
10
+
11
+ PdfImportJob.perform_later(@book, upload_pdf(params[:pdf]))
12
+
13
+ respond_to do |format|
14
+ format.turbo_stream do
15
+ render turbo_stream: turbo_stream.update(
16
+ helpers.dom_id(@book, :pdf_import_body),
17
+ partial: "books/imports/progress",
18
+ locals: { percent: 0 }
19
+ )
20
+ end
21
+ format.html { redirect_to book_slug_url(@book), notice: "PDF import started. Pages will appear shortly." }
22
+ end
23
+ rescue => e
24
+ Rails.logger.error("PDF import enqueue failed: #{e.class}: #{e.message}")
25
+ redirect_to book_slug_url(@book), alert: "Could not import PDF."
26
+ end
27
+
28
+ private
29
+
30
+ def valid_pdf?(file)
31
+ if file.blank? || file.content_type != "application/pdf"
32
+ redirect_to book_slug_url(@book), alert: "Could not import PDF." and return false
33
+ end
34
+
35
+ if file.size > MAX_FILE_SIZE
36
+ redirect_to book_slug_url(@book), alert: "PDF exceeds the #{MAX_FILE_SIZE / 1.megabyte}MB size limit." and return false
37
+ end
38
+
39
+ true
40
+ end
41
+
42
+ def upload_pdf(file)
43
+ ActiveStorage::Blob.create_and_upload!(
44
+ io: file,
45
+ filename: file.original_filename,
46
+ content_type: "application/pdf"
47
+ )
48
+ end
49
+ end
@@ -0,0 +1,30 @@
1
+ class PdfImportJob < ApplicationJob
2
+ include ActionView::RecordIdentifier
3
+
4
+ queue_as :default
5
+
6
+ def perform(book, blob)
7
+ blob.open do |file|
8
+ PdfImporter.new(book, file).import do |current, total|
9
+ percent = (current.to_f / total * 100).floor
10
+ broadcast_progress(book, percent)
11
+ end
12
+ end
13
+ rescue PdfImporter::InvalidPdfError, ArgumentError => e
14
+ Rails.logger.error("PdfImportJob failed for book #{book.id}: #{e.class}: #{e.message}")
15
+ ensure
16
+ blob.purge
17
+ Turbo::StreamsChannel.broadcast_refresh_to(book, "pdf_import")
18
+ end
19
+
20
+ private
21
+
22
+ def broadcast_progress(book, percent)
23
+ Turbo::StreamsChannel.broadcast_update_to(
24
+ book, "pdf_import",
25
+ target: dom_id(book, :pdf_import_body),
26
+ partial: "books/imports/progress",
27
+ locals: { percent: percent }
28
+ )
29
+ end
30
+ end
@@ -0,0 +1,36 @@
1
+ class PdfImporter
2
+ class InvalidPdfError < StandardError; end
3
+
4
+ def initialize(book, pdf_io)
5
+ raise InvalidPdfError, "No PDF file provided" if pdf_io.blank?
6
+ @book = book
7
+ @reader = PDF::Reader.new(pdf_io)
8
+ rescue PDF::Reader::MalformedPDFError, PDF::Reader::UnsupportedFeatureError => e
9
+ raise InvalidPdfError, e.message
10
+ end
11
+
12
+ def import
13
+ total = @reader.page_count
14
+ ActiveRecord::Base.transaction do
15
+ @reader.pages.each_with_index.flat_map do |raw_page, index|
16
+ parsed = PdfPage.new(raw_page, index + 1)
17
+ leaves = parsed.blank? ? [] : leaves_for(parsed)
18
+ yield index + 1, total if block_given?
19
+ leaves
20
+ end
21
+ end
22
+ end
23
+
24
+ private
25
+
26
+ def leaves_for(pdf_page)
27
+ leaves = []
28
+ leaves << @book.press(Page.new(body: pdf_page.body), title: pdf_page.title) if pdf_page.body.present?
29
+ pdf_page.pictures.each do |attachment|
30
+ picture = Picture.new
31
+ picture.image.attach(attachment)
32
+ leaves << @book.press(picture, title: pdf_page.title)
33
+ end
34
+ leaves
35
+ end
36
+ end
@@ -0,0 +1,30 @@
1
+ class PdfPage
2
+ MIN_TITLE_LENGTH = 3
3
+ MAX_TITLE_LENGTH = 100
4
+
5
+ attr_reader :pictures
6
+
7
+ def initialize(page, page_number)
8
+ @page = page
9
+ @page_number = page_number
10
+ @text = page.text.strip
11
+ @pictures = PdfPicture.extract_from(page)
12
+ end
13
+
14
+ def blank?
15
+ @text.blank? && @pictures.empty?
16
+ end
17
+
18
+ def title
19
+ first_line = @text.lines.first&.strip
20
+ if first_line.present? && first_line.length.between?(MIN_TITLE_LENGTH, MAX_TITLE_LENGTH)
21
+ first_line
22
+ else
23
+ "Page #{@page_number}"
24
+ end
25
+ end
26
+
27
+ def body
28
+ @text.gsub(/\n{3,}/, "\n\n").strip
29
+ end
30
+ end
@@ -0,0 +1,80 @@
1
+ class PdfPicture
2
+ COLORSPACE_MAP = { DeviceRGB: "rgb", DeviceGray: "gray", DeviceCMYK: "cmyk" }.freeze
3
+
4
+ def self.extract_from(page)
5
+ new(page).extract
6
+ end
7
+
8
+ def initialize(page)
9
+ @source = page
10
+ end
11
+
12
+ def extract
13
+ @source.xobjects.flat_map do |name, stream|
14
+ stream.hash[:Subtype] == :Image ? [ attachment_for(stream, name) ].compact : []
15
+ end
16
+ rescue => e
17
+ Rails.logger.warn "PDF picture extraction failed (#{e.class}): #{e.message}"
18
+ []
19
+ end
20
+
21
+ private
22
+ def attachment_for(stream, name)
23
+ filter = Array(stream.hash[:Filter]).first
24
+
25
+ case filter
26
+ when :DCTDecode then jpeg_attachment(stream, name)
27
+ when :JPXDecode then jp2_to_jpeg(stream, name)
28
+ else raw_to_png(stream, name)
29
+ end
30
+ rescue => e
31
+ Rails.logger.warn "PDF picture attachment failed for #{name} (#{e.class}): #{e.message}"
32
+ nil
33
+ end
34
+
35
+ def jpeg_attachment(stream, name)
36
+ { io: StringIO.new(stream.data), filename: "#{name}.jpg", content_type: "image/jpeg" }
37
+ end
38
+
39
+ def jp2_to_jpeg(stream, name)
40
+ jpeg_data = Tempfile.create([ name.to_s, ".jp2" ], binmode: true) do |f|
41
+ f.write(stream.data)
42
+ f.flush
43
+ MiniMagick::Tool::Convert.new do |cmd|
44
+ cmd << f.path
45
+ cmd << "jpeg:-"
46
+ end
47
+ end
48
+
49
+ { io: StringIO.new(jpeg_data), filename: "#{name}.jpg", content_type: "image/jpeg" }
50
+ rescue => e
51
+ Rails.logger.warn "PDF JP2 conversion failed for #{name} (#{e.class}): #{e.message}"
52
+ nil
53
+ end
54
+
55
+ def raw_to_png(stream, name)
56
+ width = stream.hash[:Width]
57
+ height = stream.hash[:Height]
58
+ bit_depth = stream.hash[:BitsPerComponent] || 8
59
+ colorspace = COLORSPACE_MAP[stream.hash[:ColorSpace]]
60
+
61
+ return nil unless width && height && colorspace
62
+
63
+ png_data = Tempfile.create([ name.to_s, ".raw" ], binmode: true) do |raw_file|
64
+ raw_file.write(stream.unfiltered_data)
65
+ raw_file.flush
66
+
67
+ MiniMagick::Tool::Convert.new do |cmd|
68
+ cmd.size "#{width}x#{height}"
69
+ cmd.depth bit_depth.to_s
70
+ cmd << "#{colorspace}:#{raw_file.path}"
71
+ cmd << "png:-"
72
+ end
73
+ end
74
+
75
+ { io: StringIO.new(png_data), filename: "#{name}.png", content_type: "image/png" }
76
+ rescue => e
77
+ Rails.logger.warn "PDF raw picture conversion failed for #{name} (#{e.class}): #{e.message}"
78
+ nil
79
+ end
80
+ end
@@ -0,0 +1,52 @@
1
+ <%= turbo_stream_from book, "pdf_import" %>
2
+
3
+ <style>
4
+ [data-show-on-upload] { display: none; }
5
+ form[aria-busy="true"] [data-hide-on-upload] { display: none; }
6
+ form[aria-busy="true"] [data-show-on-upload] { display: flex; }
7
+ dialog:has([data-pdf-import-progress]) [data-hide-on-upload] { display: none; }
8
+ </style>
9
+
10
+ <div data-controller="dialog" data-action="keydown.esc->dialog#close">
11
+ <button type="button" data-action="click->dialog#open" class="btn btn--plain txt-medium fill-transparent disable-when-arranging disable-when-deleting" title="Import PDF" aria-label="Import PDF">
12
+ <svg viewBox="0 0 20 24" xmlns="http://www.w3.org/2000/svg" fill="var(--color-ink)">
13
+ <path d="m15.8 21.7c0 .3-.2.4-.4.4h-13.5-.2v-16.9c0-.3.2-.4.4-.4h6.3c0-.6.1-1.2.3-1.8h-6.9c-1 0-1.8.8-1.8 1.8v17.5c0 1 .8 1.8 1.8 1.8h14c.9 0 1.6-.6 1.8-1.4v-11.6c-.5.2-1.1.4-1.8.5v10.3z"/>
14
+ <path fill="var(--color-positive)" d="m15 0c-2.8 0-5 2.2-5 5s2.2 5 5 5 5-2.2 5-5-2.2-5-5-5zm.7 7.5-2 2-2-2 1-1 .4.4v-2.9h1.1v2.9l.5-.4z"/>
15
+ <path d="m4.5 14h9v1.5h-9zm0 3h6v1.5h-6z"/>
16
+ </svg>
17
+ <span class="for-screen-reader">Import PDF</span>
18
+ </button>
19
+
20
+ <dialog data-dialog-target="dialog" class="dialog panel shadow"
21
+ style="min-width: 600px"
22
+ data-controller="dialog-morph">
23
+ <button type="button" data-action="click->dialog#close" class="btn btn--plain" aria-label="Close" style="position: absolute; top: 0.5rem; right: 0.5rem;">
24
+ &times;
25
+ </button>
26
+
27
+ <%= form_with url: book_import_path(book), method: :post, multipart: true do |form| %>
28
+ <div data-hide-on-upload class="flex align-center gap">
29
+ <label class="flex align-center gap full-width">
30
+ <div class="flex align-center gap input input--actor">
31
+ <%= image_tag "file-pdf.svg", aria: { hidden: true }, size: 24, class: "colorize--black" %>
32
+ <span class="search__input txt-large">
33
+ <%= form.file_field :pdf, accept: "application/pdf", required: true, class: "input--file__input" %>
34
+ </span>
35
+ </div>
36
+
37
+ <button type="submit" class="btn btn--reversed txt-medium" title="Import PDF">
38
+ <%= image_tag "arrow-right.svg", aria: { hidden: true }, size: 24 %>
39
+ <span class="for-screen-reader">Import PDF</span>
40
+ </button>
41
+ </label>
42
+ </div>
43
+
44
+ <div data-show-on-upload class="flex align-center gap">
45
+ <%= image_tag "file-pdf.svg", aria: { hidden: true }, size: 24, class: "colorize--black" %>
46
+ <span class="txt-medium">Uploading PDF&hellip;</span>
47
+ </div>
48
+ <% end %>
49
+
50
+ <div id="<%= dom_id(book, :pdf_import_body) %>"></div>
51
+ </dialog>
52
+ </div>
@@ -0,0 +1,11 @@
1
+ <div data-pdf-import-progress style="flex: 1">
2
+ <div style="background: var(--color-hairline, #e0e0e0); border-radius: 2px; overflow: hidden; height: 6px;">
3
+ <div role="progressbar"
4
+ aria-valuenow="<%= percent %>"
5
+ aria-valuemin="0"
6
+ aria-valuemax="100"
7
+ style="width: <%= percent %>%; height: 100%; background: var(--color-positive, #4caf50); transition: width 0.3s ease-out;">
8
+ </div>
9
+ </div>
10
+ <span class="txt-small txt-muted">Importing PDF&hellip; <%= percent %>%</span>
11
+ </div>
@@ -0,0 +1 @@
1
+ pin "controllers/dialog_morph_controller", to: "writebook_pdf_import/dialog_morph_controller.js"
data/config/routes.rb ADDED
@@ -0,0 +1,5 @@
1
+ Rails.application.routes.draw do
2
+ resources :books, only: [] do
3
+ resource :import, controller: "books/imports", only: %i[ create ]
4
+ end
5
+ end
@@ -0,0 +1,56 @@
1
+ module WritebookPdfImport
2
+ class Engine < ::Rails::Engine
3
+ # Add the engine's routes file to the list Rails reloads — works in dev too
4
+ initializer "writebook_pdf_import.routes" do |app|
5
+ app.config.paths["config/routes.rb"] << root.join("config/routes.rb")
6
+ end
7
+
8
+ initializer "writebook_pdf_import.view_paths" do |app|
9
+ ActiveSupport.on_load(:action_controller) do
10
+ prepend_view_path WritebookPdfImport::Engine.root.join("app/views")
11
+ end
12
+ end
13
+
14
+ initializer "writebook_pdf_import.patch_create_buttons" do
15
+ ActiveSupport.on_load(:action_view) do
16
+ ActionView::Template.prepend(Module.new do
17
+ def initialize(source, identifier, handler, **kwargs)
18
+ if identifier.to_s.end_with?("books/_create_buttons.html.erb")
19
+ source = source.to_s + %(\n<%= render "books/imports/import", book: book %>)
20
+ end
21
+ super
22
+ end
23
+ end)
24
+ end
25
+ end
26
+
27
+ # Pin loading_controller under the host app's "controllers/" importmap namespace
28
+ # so eagerLoadControllersFrom picks it up automatically
29
+ initializer "writebook_pdf_import.importmap", before: "importmap" do |app|
30
+ if app.config.respond_to?(:importmap)
31
+ app.config.importmap.paths << root.join("config/importmap.rb")
32
+ app.config.importmap.cache_sweepers << root.join("app/assets/javascripts")
33
+ end
34
+ end
35
+
36
+ # Patch pdf-reader's word-spacing threshold. The default (font_size * 0.2) is
37
+ # too aggressive for large/bold fonts and drops spaces between words. 0.1 is
38
+ # tight enough to preserve intra-word kerning while keeping word spaces intact.
39
+ initializer "writebook_pdf_import.pdf_reader_patch" do
40
+ ActiveSupport.on_load(:after_initialize) do
41
+ require "pdf/reader"
42
+
43
+ PDF::Reader::TextRun.prepend(Module.new do
44
+ def +(other)
45
+ raise ArgumentError, "#{other} cannot be merged with this run" unless mergable?(other)
46
+ if (other.x - endx) < (font_size * 0.1)
47
+ self.class.new(x, y, other.endx - x, font_size, text + other.text)
48
+ else
49
+ self.class.new(x, y, other.endx - x, font_size, "#{text} #{other.text}")
50
+ end
51
+ end
52
+ end)
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,3 @@
1
+ module WritebookPdfImport
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,2 @@
1
+ require "writebook_pdf_import/version"
2
+ require "writebook_pdf_import/engine"
metadata ADDED
@@ -0,0 +1,100 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: writebook_pdf_import
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Albert Jankowski
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: rails
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: '0'
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - ">="
24
+ - !ruby/object:Gem::Version
25
+ version: '0'
26
+ - !ruby/object:Gem::Dependency
27
+ name: pdf-reader
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '2.12'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '2.12'
40
+ - !ruby/object:Gem::Dependency
41
+ name: mini_magick
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: '0'
47
+ type: :runtime
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ description: A Rails engine that adds the ability to import PDF files into a Writebook
55
+ book, creating pages and pictures from the PDF content.
56
+ email:
57
+ - albertski@hey.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - MIT-LICENSE
63
+ - README.md
64
+ - app/assets/javascripts/writebook_pdf_import/dialog_morph_controller.js
65
+ - app/controllers/books/imports_controller.rb
66
+ - app/jobs/pdf_import_job.rb
67
+ - app/models/pdf_importer.rb
68
+ - app/models/pdf_page.rb
69
+ - app/models/pdf_picture.rb
70
+ - app/views/books/imports/_import.html.erb
71
+ - app/views/books/imports/_progress.html.erb
72
+ - config/importmap.rb
73
+ - config/routes.rb
74
+ - lib/writebook_pdf_import.rb
75
+ - lib/writebook_pdf_import/engine.rb
76
+ - lib/writebook_pdf_import/version.rb
77
+ homepage: https://github.com/albertski/writebook_pdf_import
78
+ licenses:
79
+ - MIT
80
+ metadata:
81
+ homepage_uri: https://github.com/albertski/writebook_pdf_import
82
+ source_code_uri: https://github.com/albertski/writebook_pdf_import
83
+ rdoc_options: []
84
+ require_paths:
85
+ - lib
86
+ required_ruby_version: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - ">="
89
+ - !ruby/object:Gem::Version
90
+ version: '0'
91
+ required_rubygems_version: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ">="
94
+ - !ruby/object:Gem::Version
95
+ version: '0'
96
+ requirements: []
97
+ rubygems_version: 4.0.6
98
+ specification_version: 4
99
+ summary: PDF import support for Writebook
100
+ test_files: []