docpdf 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +41 -0
- data/LICENSE.txt +21 -0
- data/README.md +327 -0
- data/lib/docpdf/adapters/converters/base.rb +15 -0
- data/lib/docpdf/adapters/converters/fallback.rb +45 -0
- data/lib/docpdf/adapters/converters/hexapdf.rb +69 -0
- data/lib/docpdf/adapters/converters/mini_magick.rb +31 -0
- data/lib/docpdf/adapters/converters/passthrough.rb +17 -0
- data/lib/docpdf/adapters/converters/prawn.rb +26 -0
- data/lib/docpdf/adapters/converters/rmagick.rb +30 -0
- data/lib/docpdf/adapters/converters/soffice.rb +61 -0
- data/lib/docpdf/adapters/stampers/base.rb +22 -0
- data/lib/docpdf/adapters/stampers/combine_pdf.rb +116 -0
- data/lib/docpdf/adapters/stampers/hexapdf.rb +127 -0
- data/lib/docpdf/configuration.rb +31 -0
- data/lib/docpdf/converter.rb +28 -0
- data/lib/docpdf/converter_resolver.rb +77 -0
- data/lib/docpdf/errors.rb +6 -0
- data/lib/docpdf/input_normalizer.rb +76 -0
- data/lib/docpdf/mime_detector.rb +36 -0
- data/lib/docpdf/result.rb +14 -0
- data/lib/docpdf/stamper_resolver.rb +43 -0
- data/lib/docpdf/version.rb +3 -0
- data/lib/docpdf/watermarker.rb +164 -0
- data/lib/docpdf.rb +116 -0
- metadata +73 -0
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
module DocPDF
|
|
2
|
+
class Watermarker
|
|
3
|
+
POSITIONS = %i[center top bottom left right top_left top_right bottom_left bottom_right].freeze
|
|
4
|
+
|
|
5
|
+
STAMP_DEFAULTS = {
|
|
6
|
+
opacity: 0.1,
|
|
7
|
+
position: :center,
|
|
8
|
+
width: 250,
|
|
9
|
+
offset_x: 0,
|
|
10
|
+
offset_y: 0,
|
|
11
|
+
pages: :all,
|
|
12
|
+
}.freeze
|
|
13
|
+
|
|
14
|
+
class << self
|
|
15
|
+
def calculate_position(position, page_w, page_h, img_w, img_h, offset_x, offset_y)
|
|
16
|
+
x, y = case position
|
|
17
|
+
when :center then [(page_w - img_w) / 2, (page_h - img_h) / 2]
|
|
18
|
+
when :top then [(page_w - img_w) / 2, page_h - img_h]
|
|
19
|
+
when :bottom then [(page_w - img_w) / 2, 0]
|
|
20
|
+
when :left then [0, (page_h - img_h) / 2]
|
|
21
|
+
when :right then [page_w - img_w, (page_h - img_h) / 2]
|
|
22
|
+
when :top_left then [0, page_h - img_h]
|
|
23
|
+
when :top_right then [page_w - img_w, page_h - img_h]
|
|
24
|
+
when :bottom_left then [0, 0]
|
|
25
|
+
when :bottom_right then [page_w - img_w, 0]
|
|
26
|
+
else [(page_w - img_w) / 2, (page_h - img_h) / 2]
|
|
27
|
+
end
|
|
28
|
+
[x + offset_x, y + offset_y]
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def call(source = nil, *stamps, filename: nil, **input_options)
|
|
32
|
+
pdf_bytes, resolved_filename = resolve_input(source, filename, input_options)
|
|
33
|
+
|
|
34
|
+
if stamps.empty?
|
|
35
|
+
return Result.new(data: pdf_bytes, filename: resolved_filename)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
normalized = stamps.map { |s| normalize_stamp(s) }
|
|
39
|
+
stamped = new(pdf_bytes, normalized).call
|
|
40
|
+
Result.new(data: stamped, filename: resolved_filename)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
private
|
|
44
|
+
|
|
45
|
+
def normalize_stamp(stamp)
|
|
46
|
+
merged = STAMP_DEFAULTS.merge(stamp)
|
|
47
|
+
if merged[:text]
|
|
48
|
+
wm = DocPDF.configuration.watermark_options
|
|
49
|
+
{ font: wm[:font], font_size: wm[:font_size], color: wm[:color], rotation: wm[:rotation] }.merge(merged)
|
|
50
|
+
else
|
|
51
|
+
merged
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def resolve_input(source, filename, input_options)
|
|
56
|
+
case source
|
|
57
|
+
when Result
|
|
58
|
+
[source.data, filename || source.filename]
|
|
59
|
+
when Pathname
|
|
60
|
+
[File.binread(source.to_s), filename || File.basename(source.to_s)]
|
|
61
|
+
when String
|
|
62
|
+
if !source.match?(/[\x00-\x08]/) && File.exist?(source)
|
|
63
|
+
[File.binread(source), filename || File.basename(source)]
|
|
64
|
+
else
|
|
65
|
+
[source, filename]
|
|
66
|
+
end
|
|
67
|
+
when nil
|
|
68
|
+
if input_options[:io]
|
|
69
|
+
io = input_options[:io]
|
|
70
|
+
resolved_filename = filename || (io.respond_to?(:original_filename) && io.original_filename) || nil
|
|
71
|
+
[io.read, resolved_filename]
|
|
72
|
+
elsif input_options[:data]
|
|
73
|
+
[input_options[:data], filename]
|
|
74
|
+
else
|
|
75
|
+
raise ArgumentError, "Provide a file path, IO object, Result, or data:"
|
|
76
|
+
end
|
|
77
|
+
else
|
|
78
|
+
if source.respond_to?(:read)
|
|
79
|
+
resolved_filename = filename || (source.respond_to?(:original_filename) && source.original_filename) || nil
|
|
80
|
+
[source.read, resolved_filename]
|
|
81
|
+
else
|
|
82
|
+
raise ArgumentError, "Cannot read PDF from #{source.class}. Provide a file path, IO object, Result, or data:"
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def initialize(pdf_bytes, stamps)
|
|
89
|
+
@pdf_bytes = pdf_bytes
|
|
90
|
+
@stamps = stamps
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def call
|
|
94
|
+
stamper = StamperResolver.resolve
|
|
95
|
+
|
|
96
|
+
if all_stamps_target_all_pages?
|
|
97
|
+
stamper.stamp(@pdf_bytes, @stamps)
|
|
98
|
+
else
|
|
99
|
+
stamp_per_page(stamper)
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
private
|
|
104
|
+
|
|
105
|
+
def all_stamps_target_all_pages?
|
|
106
|
+
@stamps.all? { |s| s[:pages] == :all }
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def build_page_stamp_map(page_count)
|
|
110
|
+
per_page = Array.new(page_count) { [] }
|
|
111
|
+
|
|
112
|
+
@stamps.each do |stamp|
|
|
113
|
+
target_pages(stamp[:pages], page_count).each do |idx|
|
|
114
|
+
per_page[idx] << stamp
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
groups = {}
|
|
119
|
+
per_page.each_with_index do |stamps, idx|
|
|
120
|
+
next if stamps.empty?
|
|
121
|
+
groups[stamps] ||= []
|
|
122
|
+
groups[stamps] << idx
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
groups.map { |stamps, indices| [indices, stamps] }
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def pdf_page_count
|
|
129
|
+
if defined?(CombinePDF)
|
|
130
|
+
CombinePDF.parse(@pdf_bytes).pages.length
|
|
131
|
+
elsif defined?(HexaPDF)
|
|
132
|
+
doc = HexaPDF::Document.new(io: StringIO.new(@pdf_bytes))
|
|
133
|
+
doc.pages.count
|
|
134
|
+
else
|
|
135
|
+
@pdf_bytes.scan(/\/Type\s*\/Page[^s]/).length
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
def stamp_per_page(stamper)
|
|
140
|
+
page_count = pdf_page_count
|
|
141
|
+
result = @pdf_bytes
|
|
142
|
+
|
|
143
|
+
build_page_stamp_map(page_count).each do |page_indices, stamps_for_pages|
|
|
144
|
+
result = stamper.stamp(result, stamps_for_pages, page_indices: page_indices)
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
result
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def target_pages(pages_option, page_count)
|
|
151
|
+
case pages_option
|
|
152
|
+
when :all then (0...page_count).to_a
|
|
153
|
+
when :first then [0]
|
|
154
|
+
when :last then [page_count - 1]
|
|
155
|
+
when :odd then (0...page_count).select { |i| i.even? }
|
|
156
|
+
when :even then (0...page_count).select { |i| i.odd? }
|
|
157
|
+
when Integer then [pages_option - 1]
|
|
158
|
+
when Array then pages_option.map { |p| p - 1 }
|
|
159
|
+
when Range then pages_option.map { |p| p - 1 }
|
|
160
|
+
else (0...page_count).to_a
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
end
|
data/lib/docpdf.rb
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
require "docpdf/version"
|
|
2
|
+
require "docpdf/configuration"
|
|
3
|
+
require "docpdf/errors"
|
|
4
|
+
require "docpdf/result"
|
|
5
|
+
require "docpdf/input_normalizer"
|
|
6
|
+
require "docpdf/mime_detector"
|
|
7
|
+
require "docpdf/converter_resolver"
|
|
8
|
+
require "docpdf/stamper_resolver"
|
|
9
|
+
require "docpdf/converter"
|
|
10
|
+
require "docpdf/watermarker"
|
|
11
|
+
|
|
12
|
+
# Multi-format document-to-PDF converter with pluggable adapters and optional
|
|
13
|
+
# watermarking. Supports Word, Excel, PowerPoint, OpenDocument, CSV, HTML, RTF,
|
|
14
|
+
# plain text, images, and PDF passthrough.
|
|
15
|
+
#
|
|
16
|
+
# result = DocPDF.convert("report.docx")
|
|
17
|
+
# result = DocPDF.convert(uploaded_file, filename: "report.pdf")
|
|
18
|
+
# result = DocPDF.convert("report.docx").watermark({ image: "logo.png", opacity: 0.1 })
|
|
19
|
+
module DocPDF
|
|
20
|
+
class << self
|
|
21
|
+
# Returns the global Configuration instance.
|
|
22
|
+
#
|
|
23
|
+
# DocPDF.configuration.soffice_path # => "soffice"
|
|
24
|
+
def configuration
|
|
25
|
+
@configuration ||= Configuration.new
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Yields the global Configuration instance for modification.
|
|
29
|
+
#
|
|
30
|
+
# Options:
|
|
31
|
+
# soffice_path - Path to the LibreOffice binary (default: "soffice")
|
|
32
|
+
# stamper - Stamper adapter: :hexapdf, :combine_pdf, or nil for auto-detect (default: nil)
|
|
33
|
+
# page_size - Page size for text rendering: "LETTER", "A4", etc. (default: "LETTER")
|
|
34
|
+
# text_options - Hash of plain text conversion settings:
|
|
35
|
+
# font: "Courier", font_size: 10, margins: [50, 50, 50, 50], color: "333333"
|
|
36
|
+
# watermark_options - Hash of text watermark defaults:
|
|
37
|
+
# font: "Helvetica", font_size: 72, color: "AAAAAA", rotation: 45
|
|
38
|
+
#
|
|
39
|
+
# DocPDF.configure do |c|
|
|
40
|
+
# c.soffice_path = "/usr/bin/soffice"
|
|
41
|
+
# c.stamper = :hexapdf
|
|
42
|
+
# c.page_size = "A4"
|
|
43
|
+
# c.text_options = { font: "Helvetica", font_size: 12, margins: [72, 72, 72, 72], color: "000000" }
|
|
44
|
+
# c.watermark_options = { font: "Times", font_size: 96, color: "FF0000", rotation: 30 }
|
|
45
|
+
# end
|
|
46
|
+
def configure
|
|
47
|
+
yield(configuration)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Converts a document to PDF. Accepts a file path, Pathname, IO object,
|
|
51
|
+
# Active Storage attachment, Dragonfly attachment, or raw binary data.
|
|
52
|
+
# Returns a Result with the PDF bytes and filename.
|
|
53
|
+
#
|
|
54
|
+
# Options:
|
|
55
|
+
# io - An IO object to read from (alternative to positional source)
|
|
56
|
+
# data - Raw binary string to convert (alternative to positional source)
|
|
57
|
+
# filename - Output filename override (default: derived from source)
|
|
58
|
+
# mime_type - MIME type override (default: detected from filename extension)
|
|
59
|
+
#
|
|
60
|
+
# result = DocPDF.convert("report.docx")
|
|
61
|
+
# result = DocPDF.convert(upload)
|
|
62
|
+
# result = DocPDF.convert(data: raw_bytes, mime_type: "image/png", filename: "photo.png")
|
|
63
|
+
#
|
|
64
|
+
# result.data # => PDF binary string
|
|
65
|
+
# result.filename # => "report.pdf"
|
|
66
|
+
def convert(source = nil, **options)
|
|
67
|
+
Converter.call(source, **options)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Resets configuration to defaults. Primarily used in tests.
|
|
71
|
+
def reset_configuration!
|
|
72
|
+
@configuration = Configuration.new
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Applies watermark stamps to a PDF. Accepts the same source types as
|
|
76
|
+
# .convert, plus a Result object. Returns a Result.
|
|
77
|
+
#
|
|
78
|
+
# Each stamp is a Hash with either an :image or :text key:
|
|
79
|
+
#
|
|
80
|
+
# Image stamp keys:
|
|
81
|
+
# image - Path to the watermark image file
|
|
82
|
+
# opacity - Transparency level, 0.0 to 1.0 (default: 0.1)
|
|
83
|
+
# position - Placement on the page (default: :center)
|
|
84
|
+
# One of: :center, :top, :bottom, :left, :right, :top_left, :top_right, :bottom_left, :bottom_right
|
|
85
|
+
# width - Image width in points (default: 250)
|
|
86
|
+
# height - Image height in points (default: proportional to width)
|
|
87
|
+
# offset_x - Horizontal offset in points (default: 0)
|
|
88
|
+
# offset_y - Vertical offset in points (default: 0)
|
|
89
|
+
# pages - Which pages to stamp (default: :all)
|
|
90
|
+
# One of: :all, :first, :last, :odd, :even, Integer, Array, or Range
|
|
91
|
+
#
|
|
92
|
+
# Text stamp keys:
|
|
93
|
+
# text - The text to render (e.g., "DRAFT", "CONFIDENTIAL")
|
|
94
|
+
# opacity - Transparency level, 0.0 to 1.0 (default: 0.1)
|
|
95
|
+
# position - Placement on the page (default: :center)
|
|
96
|
+
# font - Font name (default from watermark_options: "Helvetica")
|
|
97
|
+
# font_size - Font size in points (default from watermark_options: 72)
|
|
98
|
+
# color - Hex color string (default from watermark_options: "AAAAAA")
|
|
99
|
+
# rotation - Rotation in degrees, counter-clockwise (default from watermark_options: 45)
|
|
100
|
+
# offset_x - Horizontal offset in points (default: 0)
|
|
101
|
+
# offset_y - Vertical offset in points (default: 0)
|
|
102
|
+
# pages - Which pages to stamp (default: :all)
|
|
103
|
+
#
|
|
104
|
+
# Text is auto-scaled to fit the page when the font size would cause overflow.
|
|
105
|
+
#
|
|
106
|
+
# result = DocPDF.watermark("doc.pdf",
|
|
107
|
+
# { image: "logo.png", opacity: 0.1, position: :top_right, width: 80 },
|
|
108
|
+
# { text: "DRAFT", opacity: 0.1, position: :center, rotation: 45 })
|
|
109
|
+
#
|
|
110
|
+
# Chainable from a convert result:
|
|
111
|
+
# DocPDF.convert("report.docx").watermark({ text: "DRAFT", opacity: 0.1 })
|
|
112
|
+
def watermark(source = nil, *stamps, **options)
|
|
113
|
+
Watermarker.call(source, *stamps, **options)
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: docpdf
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Velocity Labs, LLC
|
|
8
|
+
bindir: bin
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies: []
|
|
12
|
+
description: Multi-format document-to-PDF converter with pluggable adapters and zero
|
|
13
|
+
hard dependencies. Supports Word, Excel, PowerPoint, OpenDocument, CSV, HTML, RTF,
|
|
14
|
+
plain text, images, and PDF passthrough. Optional watermarking with position grid,
|
|
15
|
+
offsets, and per-page targeting.
|
|
16
|
+
email:
|
|
17
|
+
- admin@velocitylabs.io
|
|
18
|
+
executables: []
|
|
19
|
+
extensions: []
|
|
20
|
+
extra_rdoc_files: []
|
|
21
|
+
files:
|
|
22
|
+
- CHANGELOG.md
|
|
23
|
+
- LICENSE.txt
|
|
24
|
+
- README.md
|
|
25
|
+
- lib/docpdf.rb
|
|
26
|
+
- lib/docpdf/adapters/converters/base.rb
|
|
27
|
+
- lib/docpdf/adapters/converters/fallback.rb
|
|
28
|
+
- lib/docpdf/adapters/converters/hexapdf.rb
|
|
29
|
+
- lib/docpdf/adapters/converters/mini_magick.rb
|
|
30
|
+
- lib/docpdf/adapters/converters/passthrough.rb
|
|
31
|
+
- lib/docpdf/adapters/converters/prawn.rb
|
|
32
|
+
- lib/docpdf/adapters/converters/rmagick.rb
|
|
33
|
+
- lib/docpdf/adapters/converters/soffice.rb
|
|
34
|
+
- lib/docpdf/adapters/stampers/base.rb
|
|
35
|
+
- lib/docpdf/adapters/stampers/combine_pdf.rb
|
|
36
|
+
- lib/docpdf/adapters/stampers/hexapdf.rb
|
|
37
|
+
- lib/docpdf/configuration.rb
|
|
38
|
+
- lib/docpdf/converter.rb
|
|
39
|
+
- lib/docpdf/converter_resolver.rb
|
|
40
|
+
- lib/docpdf/errors.rb
|
|
41
|
+
- lib/docpdf/input_normalizer.rb
|
|
42
|
+
- lib/docpdf/mime_detector.rb
|
|
43
|
+
- lib/docpdf/result.rb
|
|
44
|
+
- lib/docpdf/stamper_resolver.rb
|
|
45
|
+
- lib/docpdf/version.rb
|
|
46
|
+
- lib/docpdf/watermarker.rb
|
|
47
|
+
homepage: https://github.com/velocity-labs/docpdf
|
|
48
|
+
licenses:
|
|
49
|
+
- MIT
|
|
50
|
+
metadata:
|
|
51
|
+
homepage_uri: https://github.com/velocity-labs/docpdf
|
|
52
|
+
source_code_uri: https://github.com/velocity-labs/docpdf
|
|
53
|
+
changelog_uri: https://github.com/velocity-labs/docpdf/blob/main/CHANGELOG.md
|
|
54
|
+
rubygems_mfa_required: 'true'
|
|
55
|
+
rdoc_options: []
|
|
56
|
+
require_paths:
|
|
57
|
+
- lib
|
|
58
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
59
|
+
requirements:
|
|
60
|
+
- - ">="
|
|
61
|
+
- !ruby/object:Gem::Version
|
|
62
|
+
version: '3.3'
|
|
63
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
64
|
+
requirements:
|
|
65
|
+
- - ">="
|
|
66
|
+
- !ruby/object:Gem::Version
|
|
67
|
+
version: '0'
|
|
68
|
+
requirements: []
|
|
69
|
+
rubygems_version: 4.0.6
|
|
70
|
+
specification_version: 4
|
|
71
|
+
summary: Convert documents (Word, Excel, PowerPoint, images) to PDF with optional
|
|
72
|
+
watermarking
|
|
73
|
+
test_files: []
|