docpdf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,164 @@
1
+ module DocPDF
2
+ class Watermarker
3
+ POSITIONS = %i[center top bottom left right top_left top_right bottom_left bottom_right].freeze
4
+
5
+ STAMP_DEFAULTS = {
6
+ opacity: 0.1,
7
+ position: :center,
8
+ width: 250,
9
+ offset_x: 0,
10
+ offset_y: 0,
11
+ pages: :all,
12
+ }.freeze
13
+
14
+ class << self
15
+ def calculate_position(position, page_w, page_h, img_w, img_h, offset_x, offset_y)
16
+ x, y = case position
17
+ when :center then [(page_w - img_w) / 2, (page_h - img_h) / 2]
18
+ when :top then [(page_w - img_w) / 2, page_h - img_h]
19
+ when :bottom then [(page_w - img_w) / 2, 0]
20
+ when :left then [0, (page_h - img_h) / 2]
21
+ when :right then [page_w - img_w, (page_h - img_h) / 2]
22
+ when :top_left then [0, page_h - img_h]
23
+ when :top_right then [page_w - img_w, page_h - img_h]
24
+ when :bottom_left then [0, 0]
25
+ when :bottom_right then [page_w - img_w, 0]
26
+ else [(page_w - img_w) / 2, (page_h - img_h) / 2]
27
+ end
28
+ [x + offset_x, y + offset_y]
29
+ end
30
+
31
+ def call(source = nil, *stamps, filename: nil, **input_options)
32
+ pdf_bytes, resolved_filename = resolve_input(source, filename, input_options)
33
+
34
+ if stamps.empty?
35
+ return Result.new(data: pdf_bytes, filename: resolved_filename)
36
+ end
37
+
38
+ normalized = stamps.map { |s| normalize_stamp(s) }
39
+ stamped = new(pdf_bytes, normalized).call
40
+ Result.new(data: stamped, filename: resolved_filename)
41
+ end
42
+
43
+ private
44
+
45
+ def normalize_stamp(stamp)
46
+ merged = STAMP_DEFAULTS.merge(stamp)
47
+ if merged[:text]
48
+ wm = DocPDF.configuration.watermark_options
49
+ { font: wm[:font], font_size: wm[:font_size], color: wm[:color], rotation: wm[:rotation] }.merge(merged)
50
+ else
51
+ merged
52
+ end
53
+ end
54
+
55
+ def resolve_input(source, filename, input_options)
56
+ case source
57
+ when Result
58
+ [source.data, filename || source.filename]
59
+ when Pathname
60
+ [File.binread(source.to_s), filename || File.basename(source.to_s)]
61
+ when String
62
+ if !source.match?(/[\x00-\x08]/) && File.exist?(source)
63
+ [File.binread(source), filename || File.basename(source)]
64
+ else
65
+ [source, filename]
66
+ end
67
+ when nil
68
+ if input_options[:io]
69
+ io = input_options[:io]
70
+ resolved_filename = filename || (io.respond_to?(:original_filename) && io.original_filename) || nil
71
+ [io.read, resolved_filename]
72
+ elsif input_options[:data]
73
+ [input_options[:data], filename]
74
+ else
75
+ raise ArgumentError, "Provide a file path, IO object, Result, or data:"
76
+ end
77
+ else
78
+ if source.respond_to?(:read)
79
+ resolved_filename = filename || (source.respond_to?(:original_filename) && source.original_filename) || nil
80
+ [source.read, resolved_filename]
81
+ else
82
+ raise ArgumentError, "Cannot read PDF from #{source.class}. Provide a file path, IO object, Result, or data:"
83
+ end
84
+ end
85
+ end
86
+ end
87
+
88
+ def initialize(pdf_bytes, stamps)
89
+ @pdf_bytes = pdf_bytes
90
+ @stamps = stamps
91
+ end
92
+
93
+ def call
94
+ stamper = StamperResolver.resolve
95
+
96
+ if all_stamps_target_all_pages?
97
+ stamper.stamp(@pdf_bytes, @stamps)
98
+ else
99
+ stamp_per_page(stamper)
100
+ end
101
+ end
102
+
103
+ private
104
+
105
+ def all_stamps_target_all_pages?
106
+ @stamps.all? { |s| s[:pages] == :all }
107
+ end
108
+
109
+ def build_page_stamp_map(page_count)
110
+ per_page = Array.new(page_count) { [] }
111
+
112
+ @stamps.each do |stamp|
113
+ target_pages(stamp[:pages], page_count).each do |idx|
114
+ per_page[idx] << stamp
115
+ end
116
+ end
117
+
118
+ groups = {}
119
+ per_page.each_with_index do |stamps, idx|
120
+ next if stamps.empty?
121
+ groups[stamps] ||= []
122
+ groups[stamps] << idx
123
+ end
124
+
125
+ groups.map { |stamps, indices| [indices, stamps] }
126
+ end
127
+
128
+ def pdf_page_count
129
+ if defined?(CombinePDF)
130
+ CombinePDF.parse(@pdf_bytes).pages.length
131
+ elsif defined?(HexaPDF)
132
+ doc = HexaPDF::Document.new(io: StringIO.new(@pdf_bytes))
133
+ doc.pages.count
134
+ else
135
+ @pdf_bytes.scan(/\/Type\s*\/Page[^s]/).length
136
+ end
137
+ end
138
+
139
+ def stamp_per_page(stamper)
140
+ page_count = pdf_page_count
141
+ result = @pdf_bytes
142
+
143
+ build_page_stamp_map(page_count).each do |page_indices, stamps_for_pages|
144
+ result = stamper.stamp(result, stamps_for_pages, page_indices: page_indices)
145
+ end
146
+
147
+ result
148
+ end
149
+
150
+ def target_pages(pages_option, page_count)
151
+ case pages_option
152
+ when :all then (0...page_count).to_a
153
+ when :first then [0]
154
+ when :last then [page_count - 1]
155
+ when :odd then (0...page_count).select { |i| i.even? }
156
+ when :even then (0...page_count).select { |i| i.odd? }
157
+ when Integer then [pages_option - 1]
158
+ when Array then pages_option.map { |p| p - 1 }
159
+ when Range then pages_option.map { |p| p - 1 }
160
+ else (0...page_count).to_a
161
+ end
162
+ end
163
+ end
164
+ end
data/lib/docpdf.rb ADDED
@@ -0,0 +1,116 @@
1
+ require "docpdf/version"
2
+ require "docpdf/configuration"
3
+ require "docpdf/errors"
4
+ require "docpdf/result"
5
+ require "docpdf/input_normalizer"
6
+ require "docpdf/mime_detector"
7
+ require "docpdf/converter_resolver"
8
+ require "docpdf/stamper_resolver"
9
+ require "docpdf/converter"
10
+ require "docpdf/watermarker"
11
+
12
+ # Multi-format document-to-PDF converter with pluggable adapters and optional
13
+ # watermarking. Supports Word, Excel, PowerPoint, OpenDocument, CSV, HTML, RTF,
14
+ # plain text, images, and PDF passthrough.
15
+ #
16
+ # result = DocPDF.convert("report.docx")
17
+ # result = DocPDF.convert(uploaded_file, filename: "report.pdf")
18
+ # result = DocPDF.convert("report.docx").watermark({ image: "logo.png", opacity: 0.1 })
19
+ module DocPDF
20
+ class << self
21
+ # Returns the global Configuration instance.
22
+ #
23
+ # DocPDF.configuration.soffice_path # => "soffice"
24
+ def configuration
25
+ @configuration ||= Configuration.new
26
+ end
27
+
28
+ # Yields the global Configuration instance for modification.
29
+ #
30
+ # Options:
31
+ # soffice_path - Path to the LibreOffice binary (default: "soffice")
32
+ # stamper - Stamper adapter: :hexapdf, :combine_pdf, or nil for auto-detect (default: nil)
33
+ # page_size - Page size for text rendering: "LETTER", "A4", etc. (default: "LETTER")
34
+ # text_options - Hash of plain text conversion settings:
35
+ # font: "Courier", font_size: 10, margins: [50, 50, 50, 50], color: "333333"
36
+ # watermark_options - Hash of text watermark defaults:
37
+ # font: "Helvetica", font_size: 72, color: "AAAAAA", rotation: 45
38
+ #
39
+ # DocPDF.configure do |c|
40
+ # c.soffice_path = "/usr/bin/soffice"
41
+ # c.stamper = :hexapdf
42
+ # c.page_size = "A4"
43
+ # c.text_options = { font: "Helvetica", font_size: 12, margins: [72, 72, 72, 72], color: "000000" }
44
+ # c.watermark_options = { font: "Times", font_size: 96, color: "FF0000", rotation: 30 }
45
+ # end
46
+ def configure
47
+ yield(configuration)
48
+ end
49
+
50
+ # Converts a document to PDF. Accepts a file path, Pathname, IO object,
51
+ # Active Storage attachment, Dragonfly attachment, or raw binary data.
52
+ # Returns a Result with the PDF bytes and filename.
53
+ #
54
+ # Options:
55
+ # io - An IO object to read from (alternative to positional source)
56
+ # data - Raw binary string to convert (alternative to positional source)
57
+ # filename - Output filename override (default: derived from source)
58
+ # mime_type - MIME type override (default: detected from filename extension)
59
+ #
60
+ # result = DocPDF.convert("report.docx")
61
+ # result = DocPDF.convert(upload)
62
+ # result = DocPDF.convert(data: raw_bytes, mime_type: "image/png", filename: "photo.png")
63
+ #
64
+ # result.data # => PDF binary string
65
+ # result.filename # => "report.pdf"
66
+ def convert(source = nil, **options)
67
+ Converter.call(source, **options)
68
+ end
69
+
70
+ # Resets configuration to defaults. Primarily used in tests.
71
+ def reset_configuration!
72
+ @configuration = Configuration.new
73
+ end
74
+
75
+ # Applies watermark stamps to a PDF. Accepts the same source types as
76
+ # .convert, plus a Result object. Returns a Result.
77
+ #
78
+ # Each stamp is a Hash with either an :image or :text key:
79
+ #
80
+ # Image stamp keys:
81
+ # image - Path to the watermark image file
82
+ # opacity - Transparency level, 0.0 to 1.0 (default: 0.1)
83
+ # position - Placement on the page (default: :center)
84
+ # One of: :center, :top, :bottom, :left, :right, :top_left, :top_right, :bottom_left, :bottom_right
85
+ # width - Image width in points (default: 250)
86
+ # height - Image height in points (default: proportional to width)
87
+ # offset_x - Horizontal offset in points (default: 0)
88
+ # offset_y - Vertical offset in points (default: 0)
89
+ # pages - Which pages to stamp (default: :all)
90
+ # One of: :all, :first, :last, :odd, :even, Integer, Array, or Range
91
+ #
92
+ # Text stamp keys:
93
+ # text - The text to render (e.g., "DRAFT", "CONFIDENTIAL")
94
+ # opacity - Transparency level, 0.0 to 1.0 (default: 0.1)
95
+ # position - Placement on the page (default: :center)
96
+ # font - Font name (default from watermark_options: "Helvetica")
97
+ # font_size - Font size in points (default from watermark_options: 72)
98
+ # color - Hex color string (default from watermark_options: "AAAAAA")
99
+ # rotation - Rotation in degrees, counter-clockwise (default from watermark_options: 45)
100
+ # offset_x - Horizontal offset in points (default: 0)
101
+ # offset_y - Vertical offset in points (default: 0)
102
+ # pages - Which pages to stamp (default: :all)
103
+ #
104
+ # Text is auto-scaled to fit the page when the font size would cause overflow.
105
+ #
106
+ # result = DocPDF.watermark("doc.pdf",
107
+ # { image: "logo.png", opacity: 0.1, position: :top_right, width: 80 },
108
+ # { text: "DRAFT", opacity: 0.1, position: :center, rotation: 45 })
109
+ #
110
+ # Chainable from a convert result:
111
+ # DocPDF.convert("report.docx").watermark({ text: "DRAFT", opacity: 0.1 })
112
+ def watermark(source = nil, *stamps, **options)
113
+ Watermarker.call(source, *stamps, **options)
114
+ end
115
+ end
116
+ end
metadata ADDED
@@ -0,0 +1,73 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: docpdf
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Velocity Labs, LLC
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies: []
12
+ description: Multi-format document-to-PDF converter with pluggable adapters and zero
13
+ hard dependencies. Supports Word, Excel, PowerPoint, OpenDocument, CSV, HTML, RTF,
14
+ plain text, images, and PDF passthrough. Optional watermarking with position grid,
15
+ offsets, and per-page targeting.
16
+ email:
17
+ - admin@velocitylabs.io
18
+ executables: []
19
+ extensions: []
20
+ extra_rdoc_files: []
21
+ files:
22
+ - CHANGELOG.md
23
+ - LICENSE.txt
24
+ - README.md
25
+ - lib/docpdf.rb
26
+ - lib/docpdf/adapters/converters/base.rb
27
+ - lib/docpdf/adapters/converters/fallback.rb
28
+ - lib/docpdf/adapters/converters/hexapdf.rb
29
+ - lib/docpdf/adapters/converters/mini_magick.rb
30
+ - lib/docpdf/adapters/converters/passthrough.rb
31
+ - lib/docpdf/adapters/converters/prawn.rb
32
+ - lib/docpdf/adapters/converters/rmagick.rb
33
+ - lib/docpdf/adapters/converters/soffice.rb
34
+ - lib/docpdf/adapters/stampers/base.rb
35
+ - lib/docpdf/adapters/stampers/combine_pdf.rb
36
+ - lib/docpdf/adapters/stampers/hexapdf.rb
37
+ - lib/docpdf/configuration.rb
38
+ - lib/docpdf/converter.rb
39
+ - lib/docpdf/converter_resolver.rb
40
+ - lib/docpdf/errors.rb
41
+ - lib/docpdf/input_normalizer.rb
42
+ - lib/docpdf/mime_detector.rb
43
+ - lib/docpdf/result.rb
44
+ - lib/docpdf/stamper_resolver.rb
45
+ - lib/docpdf/version.rb
46
+ - lib/docpdf/watermarker.rb
47
+ homepage: https://github.com/velocity-labs/docpdf
48
+ licenses:
49
+ - MIT
50
+ metadata:
51
+ homepage_uri: https://github.com/velocity-labs/docpdf
52
+ source_code_uri: https://github.com/velocity-labs/docpdf
53
+ changelog_uri: https://github.com/velocity-labs/docpdf/blob/main/CHANGELOG.md
54
+ rubygems_mfa_required: 'true'
55
+ rdoc_options: []
56
+ require_paths:
57
+ - lib
58
+ required_ruby_version: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: '3.3'
63
+ required_rubygems_version: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
68
+ requirements: []
69
+ rubygems_version: 4.0.6
70
+ specification_version: 4
71
+ summary: Convert documents (Word, Excel, PowerPoint, images) to PDF with optional
72
+ watermarking
73
+ test_files: []