docpdf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,61 @@
1
+ require "tempfile"
2
+ require_relative "base"
3
+
4
+ module DocPDF
5
+ module Adapters
6
+ module Converters
7
+ class Soffice < Base
8
+ MIME_TYPES = %w[
9
+ application/msword
10
+ application/vnd.openxmlformats-officedocument.wordprocessingml.document
11
+ application/vnd.ms-excel
12
+ application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
13
+ application/vnd.ms-powerpoint
14
+ application/vnd.openxmlformats-officedocument.presentationml.presentation
15
+ application/vnd.oasis.opendocument.text
16
+ application/vnd.oasis.opendocument.spreadsheet
17
+ application/vnd.oasis.opendocument.presentation
18
+ text/csv
19
+ text/html
20
+ text/rtf
21
+ application/rtf
22
+ ].freeze
23
+
24
+ class << self
25
+ def convert(data, source_filename)
26
+ Dir.mktmpdir do |output_dir|
27
+ Tempfile.create(["docpdf", File.extname(source_filename || ".tmp")]) do |tempfile|
28
+ tempfile.binmode
29
+ tempfile.write(data)
30
+ tempfile.rewind
31
+
32
+ soffice = DocPDF.configuration.soffice_path
33
+ stderr_path = File.join(output_dir, "stderr.log")
34
+ success = system(soffice, "--headless", "--convert-to", "pdf",
35
+ "--outdir", output_dir, tempfile.path,
36
+ out: File::NULL, err: stderr_path)
37
+
38
+ pdf_path = Dir.glob(File.join(output_dir, "*.pdf")).first
39
+
40
+ if pdf_path
41
+ File.binread(pdf_path)
42
+ else
43
+ stderr_output = File.read(stderr_path).strip
44
+ detail = stderr_output.empty? ? "" : " (#{stderr_output})"
45
+
46
+ if success.nil?
47
+ raise SofficeNotFoundError, "LibreOffice (soffice) not found on PATH. Install LibreOffice or set DocPDF.configuration.soffice_path."
48
+ elsif success
49
+ raise ConversionError, "LibreOffice completed but produced no PDF for #{source_filename || 'document'}#{detail}"
50
+ else
51
+ raise ConversionError, "LibreOffice failed to convert #{source_filename || 'document'} to PDF#{detail}"
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
@@ -0,0 +1,22 @@
1
+ module DocPDF
2
+ module Adapters
3
+ module Stampers
4
+ class Base
5
+ class << self
6
+ def stamp(data, stamps, page_indices: nil)
7
+ raise NotImplementedError, "#{name} must implement .stamp(data, stamps, page_indices:)"
8
+ end
9
+
10
+ private
11
+
12
+ def rotated_bounds(w, h, degrees)
13
+ radians = degrees * Math::PI / 180.0
14
+ cos = Math.cos(radians).abs
15
+ sin = Math.sin(radians).abs
16
+ [(w * cos + h * sin).ceil, (w * sin + h * cos).ceil]
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,116 @@
1
+ require "combine_pdf"
2
+ require "prawn"
3
+ require_relative "base"
4
+
5
+ module DocPDF
6
+ module Adapters
7
+ module Stampers
8
+ class CombinePdf < Base
9
+ class << self
10
+ def stamp(data, stamps, page_indices: nil)
11
+ source = ::CombinePDF.parse(data)
12
+ source_page = source.pages.first
13
+ page_w = source_page[:MediaBox][2].to_f
14
+ page_h = source_page[:MediaBox][3].to_f
15
+
16
+ stamp_page_data = generate_stamp_page(stamps, page_w, page_h)
17
+ stamp_page = ::CombinePDF.parse(stamp_page_data).pages.first
18
+
19
+ source.pages.each_with_index do |page, idx|
20
+ page << stamp_page if page_indices.nil? || page_indices.include?(idx)
21
+ end
22
+ source.to_pdf
23
+ rescue ::CombinePDF::ParsingError => e
24
+ raise ConversionError, "CombinePDF failed to stamp PDF: #{e.message}"
25
+ end
26
+
27
+ private
28
+
29
+ def fit_font_size(font_size, text, pdf, page_w, page_h)
30
+ max_dim = [page_w, page_h].max * 0.9
31
+ text_w = pdf.width_of(text, size: font_size)
32
+ return font_size if text_w <= max_dim
33
+
34
+ (font_size * max_dim / text_w).floor
35
+ end
36
+
37
+ def generate_stamp_page(stamps, page_w, page_h)
38
+ ::Prawn::Document.new(page_size: [page_w, page_h], margin: 0) { |pdf|
39
+ stamps.each do |stamp|
40
+ if stamp[:text]
41
+ render_text_stamp(pdf, stamp)
42
+ else
43
+ render_image_stamp(pdf, stamp)
44
+ end
45
+ end
46
+ }.render
47
+ end
48
+
49
+ def image_dimensions(stamp, pdf)
50
+ img_w = stamp[:width]
51
+ img_h = stamp[:height]
52
+
53
+ unless img_h
54
+ _, info = pdf.build_image_object(File.open(stamp[:image], "rb"))
55
+ native_w = info.width.to_f
56
+ native_h = info.height.to_f
57
+ img_h = (img_w.to_f / native_w * native_h).round
58
+ end
59
+
60
+ [img_w, img_h]
61
+ end
62
+
63
+ def render_image_stamp(pdf, stamp)
64
+ page_w = pdf.bounds.width
65
+ page_h = pdf.bounds.height
66
+ img_w, img_h = image_dimensions(stamp, pdf)
67
+
68
+ x, y = Watermarker.calculate_position(
69
+ stamp[:position], page_w, page_h, img_w, img_h,
70
+ stamp[:offset_x], stamp[:offset_y]
71
+ )
72
+
73
+ # calculate_position returns bottom-left coordinates, but
74
+ # Prawn's image at: expects the top-left corner
75
+ y += img_h
76
+
77
+ image_opts = { at: [x, y], width: img_w }
78
+ image_opts[:height] = img_h if stamp[:height]
79
+
80
+ pdf.transparent(stamp[:opacity]) do
81
+ pdf.image stamp[:image], **image_opts
82
+ end
83
+ end
84
+
85
+ def render_text_stamp(pdf, stamp)
86
+ page_w = pdf.bounds.width
87
+ page_h = pdf.bounds.height
88
+
89
+ pdf.font(stamp[:font])
90
+ font_size = fit_font_size(stamp[:font_size], stamp[:text], pdf, page_w, page_h)
91
+ text_w = pdf.width_of(stamp[:text], size: font_size)
92
+ text_h = font_size.to_f
93
+
94
+ rotation = stamp[:rotation] || 0
95
+ rot_w, rot_h = rotated_bounds(text_w, text_h, rotation)
96
+
97
+ x, y = Watermarker.calculate_position(
98
+ stamp[:position], page_w, page_h, rot_w, rot_h,
99
+ stamp[:offset_x], stamp[:offset_y]
100
+ )
101
+
102
+ cx = x + rot_w / 2.0
103
+ cy = y + rot_h / 2.0
104
+
105
+ pdf.transparent(stamp[:opacity]) do
106
+ pdf.rotate(rotation, origin: [cx, cy]) do
107
+ pdf.fill_color(stamp[:color])
108
+ pdf.draw_text(stamp[:text], at: [cx - text_w / 2.0, cy - text_h / 2.0], size: font_size)
109
+ end
110
+ end
111
+ end
112
+ end
113
+ end
114
+ end
115
+ end
116
+ end
@@ -0,0 +1,127 @@
1
+ require "hexapdf"
2
+ require_relative "base"
3
+
4
+ module DocPDF
5
+ module Adapters
6
+ module Stampers
7
+ class Hexapdf < Base
8
+ class << self
9
+ def stamp(data, stamps, page_indices: nil)
10
+ doc = HexaPDF::Document.new(io: StringIO.new(data))
11
+ source_page = doc.pages[0]
12
+ page_w = source_page.box.width
13
+ page_h = source_page.box.height
14
+
15
+ stamp_page_data = generate_stamp_page(stamps, page_w, page_h)
16
+ stamp_doc = HexaPDF::Document.new(io: StringIO.new(stamp_page_data))
17
+ stamp_form = doc.import(stamp_doc.pages[0].to_form_xobject)
18
+
19
+ doc.pages.each_with_index do |page, idx|
20
+ page.canvas(type: :overlay).xobject(stamp_form, at: [0, 0]) if page_indices.nil? || page_indices.include?(idx)
21
+ end
22
+
23
+ write_to_string(doc)
24
+ rescue HexaPDF::Error => e
25
+ raise ConversionError, "HexaPDF failed to stamp PDF: #{e.message}"
26
+ end
27
+
28
+ private
29
+
30
+ def fit_font_size(font_size, glyph_units, page_w, page_h)
31
+ max_dim = [page_w, page_h].max * 0.9
32
+ text_w = glyph_units * font_size / 1000.0
33
+ return font_size if text_w <= max_dim
34
+
35
+ (max_dim * 1000.0 / glyph_units).floor
36
+ end
37
+
38
+ def generate_stamp_page(stamps, page_w, page_h)
39
+ doc = HexaPDF::Document.new
40
+ page = doc.pages.add([0, 0, page_w, page_h])
41
+ canvas = page.canvas
42
+
43
+ stamps.each do |stamp|
44
+ if stamp[:text]
45
+ render_text_stamp(canvas, page, stamp, doc)
46
+ else
47
+ render_image_stamp(canvas, page, stamp, doc)
48
+ end
49
+ end
50
+
51
+ write_to_string(doc)
52
+ end
53
+
54
+ def image_dimensions(stamp, doc)
55
+ img_w = stamp[:width]
56
+ img_h = stamp[:height]
57
+
58
+ unless img_h
59
+ image = doc.images.add(File.open(stamp[:image], "rb"))
60
+ native_w = image.width.to_f
61
+ native_h = image.height.to_f
62
+ img_h = (img_w.to_f / native_w * native_h).round
63
+ end
64
+
65
+ [img_w, img_h]
66
+ end
67
+
68
+ def render_image_stamp(canvas, page, stamp, doc)
69
+ page_w = page.box.width
70
+ page_h = page.box.height
71
+ img_w, img_h = image_dimensions(stamp, doc)
72
+
73
+ x, y = Watermarker.calculate_position(
74
+ stamp[:position], page_w, page_h, img_w, img_h,
75
+ stamp[:offset_x], stamp[:offset_y]
76
+ )
77
+
78
+ image_opts = { at: [x, y], width: img_w }
79
+ image_opts[:height] = img_h if stamp[:height]
80
+
81
+ canvas.opacity(fill_alpha: stamp[:opacity]) do
82
+ canvas.image(File.open(stamp[:image], "rb"), **image_opts)
83
+ end
84
+ end
85
+
86
+ def render_text_stamp(canvas, page, stamp, doc)
87
+ page_w = page.box.width
88
+ page_h = page.box.height
89
+
90
+ font = doc.fonts.add(stamp[:font])
91
+ glyph_units = font.decode_utf8(stamp[:text]).sum { |g| g.width }
92
+ font_size = fit_font_size(stamp[:font_size], glyph_units, page_w, page_h)
93
+ text_w = glyph_units * font_size / 1000.0
94
+ text_h = font_size.to_f
95
+
96
+ rotation = stamp[:rotation] || 0
97
+ rot_w, rot_h = rotated_bounds(text_w, text_h, rotation)
98
+
99
+ x, y = Watermarker.calculate_position(
100
+ stamp[:position], page_w, page_h, rot_w, rot_h,
101
+ stamp[:offset_x], stamp[:offset_y]
102
+ )
103
+
104
+ cx = x + rot_w / 2.0
105
+ cy = y + rot_h / 2.0
106
+
107
+ canvas.opacity(fill_alpha: stamp[:opacity]) do
108
+ canvas.save_graphics_state do
109
+ canvas.translate(cx, cy)
110
+ canvas.rotate(rotation) if rotation != 0
111
+ canvas.font(stamp[:font], size: font_size)
112
+ canvas.fill_color(stamp[:color])
113
+ canvas.text(stamp[:text], at: [-text_w / 2.0, -text_h / 2.0])
114
+ end
115
+ end
116
+ end
117
+
118
+ def write_to_string(doc)
119
+ io = StringIO.new
120
+ doc.write(io)
121
+ io.string
122
+ end
123
+ end
124
+ end
125
+ end
126
+ end
127
+ end
@@ -0,0 +1,31 @@
1
+ module DocPDF
2
+ class Configuration
3
+ attr_accessor :soffice_path,
4
+ :stamper,
5
+ :page_size,
6
+ :text_options,
7
+ :watermark_options
8
+
9
+ TEXT_DEFAULTS = {
10
+ font: "Courier",
11
+ font_size: 10,
12
+ margins: [50, 50, 50, 50],
13
+ color: "333333",
14
+ }.freeze
15
+
16
+ WATERMARK_DEFAULTS = {
17
+ font: "Helvetica",
18
+ font_size: 72,
19
+ color: "AAAAAA",
20
+ rotation: 45,
21
+ }.freeze
22
+
23
+ def initialize
24
+ @soffice_path = "soffice"
25
+ @stamper = nil
26
+ @page_size = "LETTER"
27
+ @text_options = TEXT_DEFAULTS.dup
28
+ @watermark_options = WATERMARK_DEFAULTS.dup
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,28 @@
1
+ module DocPDF
2
+ class Converter
3
+ class << self
4
+ def call(source = nil, **options)
5
+ new(source, **options).call
6
+ end
7
+ end
8
+
9
+ def initialize(source = nil, **options)
10
+ @input = InputNormalizer.new(source, **options.slice(:io, :data, :filename, :mime_type))
11
+ end
12
+
13
+ def call
14
+ data = ConverterResolver.resolve(@input.mime_type).convert(@input.data, @input.source_filename)
15
+ filename = build_filename(@input.filename)
16
+ Result.new(data: data, filename: filename)
17
+ end
18
+
19
+ private
20
+
21
+ def build_filename(original)
22
+ return "converted.pdf" unless original
23
+
24
+ base = File.basename(original, File.extname(original))
25
+ "#{base}.pdf"
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,77 @@
1
+ module DocPDF
2
+ class ConverterResolver
3
+ @adapters = []
4
+
5
+ class << self
6
+ def register(name, require_name: nil, mime_types:, loader:)
7
+ @adapters << { name: name.to_sym, require_name: require_name, mime_types: mime_types, loader: loader }
8
+ end
9
+
10
+ def resolve(mime_type)
11
+ @adapters.each do |entry|
12
+ next unless entry[:mime_types].include?(mime_type)
13
+ require entry[:require_name] if entry[:require_name]
14
+ return entry[:loader].call
15
+ rescue LoadError
16
+ next
17
+ end
18
+ resolve_fallback
19
+ end
20
+
21
+ private
22
+
23
+ def resolve_fallback
24
+ entry = @adapters.find { |e| e[:name] == :fallback }
25
+ raise AdapterNotFoundError, "No converter found and no fallback registered" unless entry
26
+ entry[:loader].call
27
+ end
28
+ end
29
+
30
+ # Registration order determines priority for shared mime types
31
+ register :passthrough,
32
+ mime_types: %w[application/pdf],
33
+ loader: -> { require "docpdf/adapters/converters/passthrough"; Adapters::Converters::Passthrough }
34
+
35
+ register :soffice,
36
+ mime_types: %w[
37
+ application/msword
38
+ application/vnd.openxmlformats-officedocument.wordprocessingml.document
39
+ application/vnd.ms-excel
40
+ application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
41
+ application/vnd.ms-powerpoint
42
+ application/vnd.openxmlformats-officedocument.presentationml.presentation
43
+ application/vnd.oasis.opendocument.text
44
+ application/vnd.oasis.opendocument.spreadsheet
45
+ application/vnd.oasis.opendocument.presentation
46
+ text/csv
47
+ text/html
48
+ text/rtf
49
+ application/rtf
50
+ ],
51
+ loader: -> { require "docpdf/adapters/converters/soffice"; Adapters::Converters::Soffice }
52
+
53
+ register :prawn,
54
+ require_name: "prawn",
55
+ mime_types: %w[text/plain],
56
+ loader: -> { require "docpdf/adapters/converters/prawn"; Adapters::Converters::Prawn }
57
+
58
+ register :hexapdf,
59
+ require_name: "hexapdf",
60
+ mime_types: %w[text/plain],
61
+ loader: -> { require "docpdf/adapters/converters/hexapdf"; Adapters::Converters::Hexapdf }
62
+
63
+ register :rmagick,
64
+ require_name: "rmagick",
65
+ mime_types: %w[image/jpeg image/png image/heic image/heif image/webp],
66
+ loader: -> { require "docpdf/adapters/converters/rmagick"; Adapters::Converters::Rmagick }
67
+
68
+ register :mini_magick,
69
+ require_name: "mini_magick",
70
+ mime_types: %w[image/jpeg image/png image/heic image/heif image/webp],
71
+ loader: -> { require "docpdf/adapters/converters/mini_magick"; Adapters::Converters::MiniMagick }
72
+
73
+ register :fallback,
74
+ mime_types: [],
75
+ loader: -> { require "docpdf/adapters/converters/fallback"; Adapters::Converters::Fallback }
76
+ end
77
+ end
@@ -0,0 +1,6 @@
1
+ module DocPDF
2
+ class Error < StandardError; end
3
+ class ConversionError < Error; end
4
+ class SofficeNotFoundError < Error; end
5
+ class AdapterNotFoundError < Error; end
6
+ end
@@ -0,0 +1,76 @@
1
+ module DocPDF
2
+ class InputNormalizer
3
+ attr_reader :data, :filename, :source_filename, :mime_type
4
+
5
+ def initialize(source = nil, io: nil, data: nil, filename: nil, mime_type: nil)
6
+ @data, source_filename, detected_mime = extract(source, io, data)
7
+
8
+ # MIME detection uses the source's original filename, not the user's
9
+ # display-name override, unless the source has no filename at all
10
+ # (e.g., raw data: with a filename: hint).
11
+ @source_filename = source_filename
12
+ @filename = filename || source_filename
13
+ @mime_type = mime_type || detected_mime || MimeDetector.detect(source_filename || filename)
14
+ end
15
+
16
+ private
17
+
18
+ def extract(source, io, data)
19
+ case source
20
+ when Pathname then extract_from_pathname(source)
21
+ when String then extract_from_string(source)
22
+ when ->(s) { s.respond_to?(:read) } then extract_from_io(source)
23
+ when ->(s) { s.respond_to?(:download) } then extract_from_active_storage(source)
24
+ when ->(s) { s.respond_to?(:data) } then extract_from_data_wrapper(source)
25
+ when nil then extract_from_kwargs(io, data)
26
+ else
27
+ raise ArgumentError, "Provide a file path, IO object, io:, or data:"
28
+ end
29
+ end
30
+
31
+ def extract_filename(source, method)
32
+ source.respond_to?(method) ? source.send(method) : nil
33
+ end
34
+
35
+ def extract_from_active_storage(source)
36
+ blob = source.respond_to?(:blob) ? source.blob : nil
37
+ [source.download, blob&.filename&.to_s, blob&.content_type]
38
+ end
39
+
40
+ def extract_from_data_wrapper(source)
41
+ source_filename = (source.respond_to?(:name) && source.name) || nil
42
+ detected_mime = source.respond_to?(:mime_type) ? source.mime_type : nil
43
+ [source.data, source_filename, detected_mime]
44
+ end
45
+
46
+ def extract_from_io(source)
47
+ [source.read, extract_filename(source, :original_filename), nil]
48
+ end
49
+
50
+ def extract_from_kwargs(io, data)
51
+ if io
52
+ [io.read, extract_filename(io, :original_filename), nil]
53
+ elsif data
54
+ [data, nil, nil]
55
+ else
56
+ raise ArgumentError, "Provide a file path, IO object, io:, or data:"
57
+ end
58
+ end
59
+
60
+ def extract_from_pathname(source)
61
+ [File.binread(source.to_s), File.basename(source.to_s), nil]
62
+ end
63
+
64
+ def extract_from_string(source)
65
+ if file_path?(source)
66
+ [File.binread(source), File.basename(source), nil]
67
+ else
68
+ [source, nil, nil]
69
+ end
70
+ end
71
+
72
+ def file_path?(string)
73
+ !string.match?(/[\x00-\x08]/) && File.exist?(string)
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,36 @@
1
+ module DocPDF
2
+ class MimeDetector
3
+ EXTENSION_MAP = {
4
+ ".pdf" => "application/pdf",
5
+ ".doc" => "application/msword",
6
+ ".docx" => "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
7
+ ".xls" => "application/vnd.ms-excel",
8
+ ".xlsx" => "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
9
+ ".ppt" => "application/vnd.ms-powerpoint",
10
+ ".pptx" => "application/vnd.openxmlformats-officedocument.presentationml.presentation",
11
+ ".odt" => "application/vnd.oasis.opendocument.text",
12
+ ".ods" => "application/vnd.oasis.opendocument.spreadsheet",
13
+ ".odp" => "application/vnd.oasis.opendocument.presentation",
14
+ ".csv" => "text/csv",
15
+ ".html" => "text/html",
16
+ ".htm" => "text/html",
17
+ ".txt" => "text/plain",
18
+ ".rtf" => "text/rtf",
19
+ ".jpg" => "image/jpeg",
20
+ ".jpeg" => "image/jpeg",
21
+ ".png" => "image/png",
22
+ ".heic" => "image/heic",
23
+ ".heif" => "image/heif",
24
+ ".webp" => "image/webp",
25
+ }.freeze
26
+
27
+ class << self
28
+ def detect(filename)
29
+ return nil if filename.nil?
30
+
31
+ ext = File.extname(filename).downcase
32
+ EXTENSION_MAP[ext]
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,14 @@
1
+ module DocPDF
2
+ class Result
3
+ attr_reader :data, :filename
4
+
5
+ def initialize(data:, filename: nil)
6
+ @data = data
7
+ @filename = filename
8
+ end
9
+
10
+ def watermark(*stamps, **options)
11
+ DocPDF.watermark(self, *stamps, **options)
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,43 @@
1
+ module DocPDF
2
+ class StamperResolver
3
+ @adapters = []
4
+
5
+ class << self
6
+ def register(name, loader:)
7
+ @adapters << { name: name.to_sym, loader: loader }
8
+ end
9
+
10
+ def resolve
11
+ configured = DocPDF.configuration.stamper
12
+ if configured
13
+ entry = @adapters.find { |e| e[:name] == configured }
14
+ raise AdapterNotFoundError, "Unknown stamper: #{configured}" unless entry
15
+ begin
16
+ entry[:loader].call
17
+ rescue LoadError => e
18
+ raise AdapterNotFoundError, "Stamper '#{configured}' requires gems that are not available: #{e.message}"
19
+ end
20
+ else
21
+ auto_detect
22
+ end
23
+ end
24
+
25
+ private
26
+
27
+ def auto_detect
28
+ @adapters.each do |entry|
29
+ return entry[:loader].call
30
+ rescue LoadError
31
+ next
32
+ end
33
+ raise AdapterNotFoundError, "No stamper available. Add 'hexapdf' or 'combine_pdf' (with 'prawn') to your Gemfile."
34
+ end
35
+ end
36
+
37
+ register :hexapdf,
38
+ loader: -> { require "hexapdf"; require "docpdf/adapters/stampers/hexapdf"; Adapters::Stampers::Hexapdf }
39
+
40
+ register :combine_pdf,
41
+ loader: -> { require "combine_pdf"; require "prawn"; require "docpdf/adapters/stampers/combine_pdf"; Adapters::Stampers::CombinePdf }
42
+ end
43
+ end
@@ -0,0 +1,3 @@
1
+ module DocPDF
2
+ VERSION = "0.1.0"
3
+ end