docpdf 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +41 -0
- data/LICENSE.txt +21 -0
- data/README.md +327 -0
- data/lib/docpdf/adapters/converters/base.rb +15 -0
- data/lib/docpdf/adapters/converters/fallback.rb +45 -0
- data/lib/docpdf/adapters/converters/hexapdf.rb +69 -0
- data/lib/docpdf/adapters/converters/mini_magick.rb +31 -0
- data/lib/docpdf/adapters/converters/passthrough.rb +17 -0
- data/lib/docpdf/adapters/converters/prawn.rb +26 -0
- data/lib/docpdf/adapters/converters/rmagick.rb +30 -0
- data/lib/docpdf/adapters/converters/soffice.rb +61 -0
- data/lib/docpdf/adapters/stampers/base.rb +22 -0
- data/lib/docpdf/adapters/stampers/combine_pdf.rb +116 -0
- data/lib/docpdf/adapters/stampers/hexapdf.rb +127 -0
- data/lib/docpdf/configuration.rb +31 -0
- data/lib/docpdf/converter.rb +28 -0
- data/lib/docpdf/converter_resolver.rb +77 -0
- data/lib/docpdf/errors.rb +6 -0
- data/lib/docpdf/input_normalizer.rb +76 -0
- data/lib/docpdf/mime_detector.rb +36 -0
- data/lib/docpdf/result.rb +14 -0
- data/lib/docpdf/stamper_resolver.rb +43 -0
- data/lib/docpdf/version.rb +3 -0
- data/lib/docpdf/watermarker.rb +164 -0
- data/lib/docpdf.rb +116 -0
- metadata +73 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: a534da9974aa240254b1ae28fefb29de094cfff9209ca162259b62c63eb517a5
|
|
4
|
+
data.tar.gz: 3ab77722e521e913cb5dc574ebbc7af2a0436abaf5040c6399e10f2d51311367
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 4c3552cec9c8a2b92f730ec784a3f0117f4eb0a1f6190b4b38c1dfe0bc1e3c7a7a6f7234b57c1d1e59f4d095f52cff99249a586b7402aaccb278240e7da3fb58
|
|
7
|
+
data.tar.gz: cd32abba539b7dea911026697fb62ca3ff56c936306a2850058a926dda7bbaab9bfc54d7f5a2b4732493428a2342a0b8f8a3233f2951b1340c3c7aa19a114c50
|
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## [0.1.0] - 2026-04-03
|
|
4
|
+
|
|
5
|
+
Initial release.
|
|
6
|
+
|
|
7
|
+
### Added
|
|
8
|
+
- Multi-format document-to-PDF conversion via LibreOffice:
|
|
9
|
+
- Word (.doc, .docx)
|
|
10
|
+
- Excel (.xls, .xlsx)
|
|
11
|
+
- PowerPoint (.ppt, .pptx)
|
|
12
|
+
- OpenDocument (.odt, .ods, .odp)
|
|
13
|
+
- CSV, HTML, RTF
|
|
14
|
+
- Plain text to PDF via Prawn or HexaPDF
|
|
15
|
+
- Image to PDF via RMagick or MiniMagick (JPEG, PNG, HEIC, WebP)
|
|
16
|
+
- PDF passthrough
|
|
17
|
+
- PDF watermarking with image and text stamps
|
|
18
|
+
- Image stamps with configurable width, height (proportional if omitted), and opacity
|
|
19
|
+
- Text stamps with configurable font, font_size, color, and rotation
|
|
20
|
+
- Text auto-scales to fit page when font size would cause overflow
|
|
21
|
+
- Rotated text uses correct bounding box for edge/corner positioning
|
|
22
|
+
- Position grid: `:center`, `:top`, `:bottom`, `:left`, `:right`, `:top_left`, `:top_right`, `:bottom_left`, `:bottom_right`
|
|
23
|
+
- Stamp page matches source PDF page size
|
|
24
|
+
- Per-stamp offsets (`offset_x`, `offset_y`) for precise placement
|
|
25
|
+
- Per-stamp page targeting: `:all`, `:first`, `:last`, `:odd`, `:even`, specific page numbers, arrays, and ranges
|
|
26
|
+
- Multiple stamps per call (image and text can be mixed)
|
|
27
|
+
- Chainable API: `DocPDF.convert("file.docx").watermark({ text: "DRAFT", opacity: 0.1 })`
|
|
28
|
+
- Pluggable adapter architecture with zero hard dependencies
|
|
29
|
+
- **Converter adapters**: Soffice, Prawn, HexaPDF, RMagick, MiniMagick, Passthrough, Fallback
|
|
30
|
+
- **Stamper adapters**: HexaPDF, CombinePDF (with Prawn)
|
|
31
|
+
- Auto-detected at runtime based on MIME type and gem availability
|
|
32
|
+
- Extensible via `ConverterResolver.register` and `StamperResolver.register`
|
|
33
|
+
- Flexible input: file path, Pathname, IO object, raw binary data
|
|
34
|
+
- Works with Dragonfly, Active Storage, CarrierWave, Shrine, and Rails UploadedFile
|
|
35
|
+
- `DocPDF::Result` returned from both `convert` and `watermark` with `.data` and `.filename`
|
|
36
|
+
- Extension-based MIME type detection
|
|
37
|
+
- Grouped configuration: `text_options` for plain text conversion, `watermark_options` for text watermark defaults
|
|
38
|
+
- Consistent error wrapping across all adapters via `DocPDF::ConversionError`
|
|
39
|
+
- Soffice stderr captured in `ConversionError` messages for diagnostics
|
|
40
|
+
- Ruby 3.3, 3.4, and 4.0 support
|
|
41
|
+
- Tested with Appraisal across 6 adapter configurations
|
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Velocity Labs, LLC
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,327 @@
|
|
|
1
|
+
# DocPDF
|
|
2
|
+
|
|
3
|
+
Convert documents from any common format to PDF, with optional watermarking. Zero hard dependencies; bring your own PDF library.
|
|
4
|
+
|
|
5
|
+
## Supported Formats
|
|
6
|
+
|
|
7
|
+
| Input Format | Conversion Tool | Ruby Gem Required |
|
|
8
|
+
|-------------|----------------|-------------------|
|
|
9
|
+
| PDF | Passthrough | None |
|
|
10
|
+
| Word (.doc, .docx) | LibreOffice | None (system dep) |
|
|
11
|
+
| Excel (.xls, .xlsx) | LibreOffice | None (system dep) |
|
|
12
|
+
| PowerPoint (.ppt, .pptx) | LibreOffice | None (system dep) |
|
|
13
|
+
| OpenDocument (.odt, .ods, .odp) | LibreOffice | None (system dep) |
|
|
14
|
+
| CSV, HTML, RTF | LibreOffice | None (system dep) |
|
|
15
|
+
| Plain text | Prawn or HexaPDF | `prawn` or `hexapdf` |
|
|
16
|
+
| JPEG, PNG | RMagick or MiniMagick | `rmagick` or `mini_magick` |
|
|
17
|
+
| HEIC, WebP | RMagick or MiniMagick | `rmagick` or `mini_magick` |
|
|
18
|
+
|
|
19
|
+
> **Note:** PDF and Word/RTF conversion work with zero gem dependencies. You only need adapter gems for text, image, and watermarking features. If you try to use a feature without the required gem, you'll get a clear error telling you which gem to add.
|
|
20
|
+
|
|
21
|
+
## Installation
|
|
22
|
+
|
|
23
|
+
```ruby
|
|
24
|
+
gem "docpdf"
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
Then pick the adapters you need:
|
|
28
|
+
|
|
29
|
+
```ruby
|
|
30
|
+
# Minimum for text + watermarking (one gem covers both):
|
|
31
|
+
gem "hexapdf"
|
|
32
|
+
|
|
33
|
+
# Or use two separate gems:
|
|
34
|
+
gem "prawn" # text-to-PDF conversion + watermark stamp generation
|
|
35
|
+
gem "combine_pdf" # PDF watermark stamping
|
|
36
|
+
|
|
37
|
+
# For image conversion (pick one):
|
|
38
|
+
gem "rmagick" # full ImageMagick bindings (handles HEIC, WebP, JPEG, PNG)
|
|
39
|
+
gem "mini_magick" # lighter shell wrapper (same format support)
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
**System dependencies:**
|
|
43
|
+
|
|
44
|
+
- **Ruby >= 3.3**
|
|
45
|
+
- **LibreOffice** for Word/RTF conversion. `soffice` must be on PATH.
|
|
46
|
+
- **ImageMagick** + **Ghostscript** required by RMagick/MiniMagick for image conversion.
|
|
47
|
+
|
|
48
|
+
## Usage
|
|
49
|
+
|
|
50
|
+
### Convert files
|
|
51
|
+
|
|
52
|
+
```ruby
|
|
53
|
+
# From a file path (format is detected automatically)
|
|
54
|
+
result = DocPDF.convert("document.docx") # Word -> PDF via LibreOffice
|
|
55
|
+
result = DocPDF.convert("spreadsheet.xlsx") # Excel -> PDF via LibreOffice
|
|
56
|
+
result = DocPDF.convert("slides.pptx") # PowerPoint -> PDF via LibreOffice
|
|
57
|
+
result = DocPDF.convert("document.odt") # OpenDocument -> PDF via LibreOffice
|
|
58
|
+
result = DocPDF.convert("data.csv") # CSV -> PDF via LibreOffice
|
|
59
|
+
result = DocPDF.convert("page.html") # HTML -> PDF via LibreOffice
|
|
60
|
+
result = DocPDF.convert("notes.txt") # Text -> PDF via Prawn/HexaPDF
|
|
61
|
+
result = DocPDF.convert("scan.heic") # Image -> PDF via RMagick/MiniMagick
|
|
62
|
+
result = DocPDF.convert("existing.pdf") # PDF passthrough
|
|
63
|
+
|
|
64
|
+
# From an IO object (Rails UploadedFile, Tempfile, StringIO, etc.)
|
|
65
|
+
result = DocPDF.convert(params[:file])
|
|
66
|
+
|
|
67
|
+
# From raw binary data
|
|
68
|
+
result = DocPDF.convert(data: file_contents, mime_type: "image/png", filename: "photo.png")
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
The result is a `DocPDF::Result` with `data` (binary string) and `filename` (suggested output name).
|
|
72
|
+
|
|
73
|
+
### Watermark PDFs
|
|
74
|
+
|
|
75
|
+
Pass one or more stamp hashes. Each stamp uses either `image:` or `text:`.
|
|
76
|
+
|
|
77
|
+
```ruby
|
|
78
|
+
# Image watermark
|
|
79
|
+
result = DocPDF.watermark("report.pdf",
|
|
80
|
+
{ image: "logo.png", opacity: 0.06, position: :center })
|
|
81
|
+
|
|
82
|
+
# Text watermark (e.g., "DRAFT" diagonally across the page)
|
|
83
|
+
result = DocPDF.watermark("report.pdf",
|
|
84
|
+
{ text: "DRAFT", opacity: 0.1, position: :center, rotation: 45 })
|
|
85
|
+
|
|
86
|
+
# Mix image and text stamps with page targeting
|
|
87
|
+
result = DocPDF.watermark("report.pdf",
|
|
88
|
+
{ text: "CONFIDENTIAL", opacity: 0.1, position: :center, font_size: 60, rotation: 45 },
|
|
89
|
+
{ image: "logo.png", opacity: 0.3, position: :top_right, width: 80, pages: :first })
|
|
90
|
+
|
|
91
|
+
# Chain onto a conversion
|
|
92
|
+
result = DocPDF.convert("document.docx")
|
|
93
|
+
.watermark({ text: "DRAFT", opacity: 0.1 })
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Both `convert` and `watermark` return a `DocPDF::Result`, so you can chain them. `watermark` accepts file paths, IO objects, raw bytes, or a `Result` from a prior call.
|
|
97
|
+
|
|
98
|
+
Text watermarks auto-scale to fit the page when the font size would cause overflow.
|
|
99
|
+
|
|
100
|
+
#### Image stamp options
|
|
101
|
+
|
|
102
|
+
| Option | Default | Description |
|
|
103
|
+
|--------|---------|-------------|
|
|
104
|
+
| `image` | | Path to the image file |
|
|
105
|
+
| `opacity` | `0.1` | Transparency (0.0 = invisible, 1.0 = opaque) |
|
|
106
|
+
| `position` | `:center` | Anchor point on the page (see below) |
|
|
107
|
+
| `width` | `250` | Image width in points |
|
|
108
|
+
| `height` | proportional | Image height in points (scales proportionally if omitted) |
|
|
109
|
+
| `offset_x` | `0` | Horizontal nudge from anchor (positive = right, negative = left) |
|
|
110
|
+
| `offset_y` | `0` | Vertical nudge from anchor (positive = up, negative = down) |
|
|
111
|
+
| `pages` | `:all` | Which pages to stamp (see below) |
|
|
112
|
+
|
|
113
|
+
#### Text stamp options
|
|
114
|
+
|
|
115
|
+
| Option | Default | Description |
|
|
116
|
+
|--------|---------|-------------|
|
|
117
|
+
| `text` | | The text to render |
|
|
118
|
+
| `opacity` | `0.1` | Transparency (0.0 = invisible, 1.0 = opaque) |
|
|
119
|
+
| `position` | `:center` | Anchor point on the page (see below) |
|
|
120
|
+
| `font` | `"Helvetica"` | Font name (configurable via `watermark_options`) |
|
|
121
|
+
| `font_size` | `72` | Font size in points (configurable via `watermark_options`) |
|
|
122
|
+
| `color` | `"AAAAAA"` | Hex color string (configurable via `watermark_options`) |
|
|
123
|
+
| `rotation` | `45` | Degrees counter-clockwise (configurable via `watermark_options`) |
|
|
124
|
+
| `offset_x` | `0` | Horizontal nudge from anchor |
|
|
125
|
+
| `offset_y` | `0` | Vertical nudge from anchor |
|
|
126
|
+
| `pages` | `:all` | Which pages to stamp (see below) |
|
|
127
|
+
|
|
128
|
+
#### Positions
|
|
129
|
+
|
|
130
|
+
Stamps are centered on the anchor point, not placed by their corner.
|
|
131
|
+
|
|
132
|
+
| Position | Anchor |
|
|
133
|
+
|----------|--------|
|
|
134
|
+
| `:center` | Center of page |
|
|
135
|
+
| `:top` | Top center |
|
|
136
|
+
| `:bottom` | Bottom center |
|
|
137
|
+
| `:left` | Left center |
|
|
138
|
+
| `:right` | Right center |
|
|
139
|
+
| `:top_left` | Top-left corner |
|
|
140
|
+
| `:top_right` | Top-right corner |
|
|
141
|
+
| `:bottom_left` | Bottom-left corner |
|
|
142
|
+
| `:bottom_right` | Bottom-right corner |
|
|
143
|
+
|
|
144
|
+
#### Page targeting
|
|
145
|
+
|
|
146
|
+
| Value | Pages stamped |
|
|
147
|
+
|-------|---------------|
|
|
148
|
+
| `:all` | Every page (default) |
|
|
149
|
+
| `:first` | First page only |
|
|
150
|
+
| `:last` | Last page only |
|
|
151
|
+
| `:odd` | Odd pages (1, 3, 5...) |
|
|
152
|
+
| `:even` | Even pages (2, 4, 6...) |
|
|
153
|
+
| `3` | Specific page (1-indexed) |
|
|
154
|
+
| `[1, 3, 5]` | Array of page numbers |
|
|
155
|
+
| `2..5` | Range of page numbers |
|
|
156
|
+
|
|
157
|
+
### Rails integration
|
|
158
|
+
|
|
159
|
+
DocPDF works with file upload and attachment libraries out of the box:
|
|
160
|
+
|
|
161
|
+
```ruby
|
|
162
|
+
# ActionDispatch::Http::UploadedFile
|
|
163
|
+
result = DocPDF.convert(params[:document])
|
|
164
|
+
|
|
165
|
+
# Dragonfly
|
|
166
|
+
result = DocPDF.convert(record.document, filename: "output.pdf")
|
|
167
|
+
|
|
168
|
+
# Active Storage
|
|
169
|
+
result = DocPDF.convert(user.document)
|
|
170
|
+
|
|
171
|
+
# CarrierWave / Shrine
|
|
172
|
+
result = DocPDF.convert(record.file)
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
MIME type and filename are auto-extracted from each library's metadata. Use `filename:` to set the output name without affecting format detection.
|
|
176
|
+
|
|
177
|
+
## Configuration
|
|
178
|
+
|
|
179
|
+
All configuration is optional. DocPDF works out of the box with sensible defaults.
|
|
180
|
+
|
|
181
|
+
```ruby
|
|
182
|
+
DocPDF.configure do |config|
|
|
183
|
+
# LibreOffice binary path (default: "soffice", found via PATH)
|
|
184
|
+
config.soffice_path = "/usr/bin/soffice"
|
|
185
|
+
|
|
186
|
+
# Stamper adapter for watermarking (default: nil, auto-detects hexapdf then combine_pdf)
|
|
187
|
+
config.stamper = :hexapdf # or :combine_pdf
|
|
188
|
+
|
|
189
|
+
# Page size (default: "LETTER")
|
|
190
|
+
config.page_size = "A4"
|
|
191
|
+
|
|
192
|
+
# Plain text file conversion defaults
|
|
193
|
+
config.text_options = {
|
|
194
|
+
font: "Helvetica", # default: "Courier"
|
|
195
|
+
font_size: 12, # default: 10
|
|
196
|
+
margins: [72, 72, 72, 72], # default: [50, 50, 50, 50] (points: top, right, bottom, left)
|
|
197
|
+
color: "000000", # default: "333333" (hex)
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
# Text watermark defaults (per-stamp options override these)
|
|
201
|
+
config.watermark_options = {
|
|
202
|
+
font: "Times", # default: "Helvetica"
|
|
203
|
+
font_size: 96, # default: 72
|
|
204
|
+
color: "FF0000", # default: "AAAAAA" (hex)
|
|
205
|
+
rotation: 30, # default: 45 (degrees counter-clockwise)
|
|
206
|
+
}
|
|
207
|
+
end
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
Converter adapters (for format-to-PDF conversion) are auto-detected based on MIME type and gem availability. The first available adapter wins, in registration order:
|
|
211
|
+
|
|
212
|
+
- **text/plain**: Prawn, then HexaPDF
|
|
213
|
+
- **image/\***: RMagick, then MiniMagick
|
|
214
|
+
- **Office formats**: LibreOffice (always available if installed)
|
|
215
|
+
- **application/pdf**: Passthrough (returned unchanged)
|
|
216
|
+
- **Unknown formats**: Fallback (tries LibreOffice, then returns raw data)
|
|
217
|
+
|
|
218
|
+
## Adapters
|
|
219
|
+
|
|
220
|
+
DocPDF has two types of adapters:
|
|
221
|
+
|
|
222
|
+
**Converter adapters** convert input data to PDF:
|
|
223
|
+
|
|
224
|
+
| Adapter | Gem | Formats |
|
|
225
|
+
|---------|-----|---------|
|
|
226
|
+
| Soffice | None (system) | Word, Excel, PowerPoint, ODF, CSV, HTML, RTF |
|
|
227
|
+
| Prawn | `prawn` | Plain text |
|
|
228
|
+
| HexaPDF | `hexapdf` | Plain text |
|
|
229
|
+
| RMagick | `rmagick` | JPEG, PNG, HEIC, WebP |
|
|
230
|
+
| MiniMagick | `mini_magick` | JPEG, PNG, HEIC, WebP |
|
|
231
|
+
| Passthrough | None | PDF (returned unchanged) |
|
|
232
|
+
| Fallback | None | Unknown formats (tries LibreOffice, then raw data) |
|
|
233
|
+
|
|
234
|
+
**Stamper adapters** apply watermarks to PDFs:
|
|
235
|
+
|
|
236
|
+
| Adapter | Gem(s) | Notes |
|
|
237
|
+
|---------|--------|-------|
|
|
238
|
+
| HexaPDF | `hexapdf` | All-in-one, handles both stamp generation and overlay |
|
|
239
|
+
| CombinePDF | `combine_pdf` + `prawn` | Prawn generates the stamp page, CombinePDF overlays it |
|
|
240
|
+
|
|
241
|
+
### Custom adapters
|
|
242
|
+
|
|
243
|
+
Register your own converter or stamper:
|
|
244
|
+
|
|
245
|
+
```ruby
|
|
246
|
+
# Custom converter for a specific MIME type
|
|
247
|
+
DocPDF::ConverterResolver.register(:my_converter,
|
|
248
|
+
require_name: "my_gem",
|
|
249
|
+
mime_types: %w[application/x-custom],
|
|
250
|
+
loader: -> { require "docpdf/adapters/converters/my_converter"; MyConverter })
|
|
251
|
+
|
|
252
|
+
# Custom stamper
|
|
253
|
+
DocPDF::StamperResolver.register(:my_stamper,
|
|
254
|
+
loader: -> { require "my_stamper"; MyStamper })
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
## Error Handling
|
|
258
|
+
|
|
259
|
+
All errors inherit from `DocPDF::Error`, so you can catch everything with one rescue or handle specific cases:
|
|
260
|
+
|
|
261
|
+
```ruby
|
|
262
|
+
begin
|
|
263
|
+
result = DocPDF.convert("file.docx")
|
|
264
|
+
rescue DocPDF::Error => e
|
|
265
|
+
# Catch any docpdf error
|
|
266
|
+
end
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
Specific error classes:
|
|
270
|
+
|
|
271
|
+
| Error | When |
|
|
272
|
+
|-------|------|
|
|
273
|
+
| `DocPDF::ConversionError` | Conversion failed (message includes adapter name and details) |
|
|
274
|
+
| `DocPDF::SofficeNotFoundError` | LibreOffice not installed or not on PATH |
|
|
275
|
+
| `DocPDF::AdapterNotFoundError` | Required gem not installed (message tells you which to add) |
|
|
276
|
+
|
|
277
|
+
## Deployment Notes
|
|
278
|
+
|
|
279
|
+
### ImageMagick policy.xml
|
|
280
|
+
|
|
281
|
+
Most Linux distributions ship ImageMagick with PDF conversion disabled for security. If image-to-PDF conversion fails with a permission error, find your ImageMagick `policy.xml` file (commonly at `/etc/ImageMagick-6/policy.xml` or `/etc/ImageMagick-7/policy.xml`) and change the PDF coder policy from `rights="none"` to `rights="read|write"`.
|
|
282
|
+
|
|
283
|
+
### LibreOffice on Heroku
|
|
284
|
+
|
|
285
|
+
Use the [LibreOffice buildpack](https://github.com/heroku/heroku-buildpack-apt) or AppImage approach.
|
|
286
|
+
|
|
287
|
+
### LibreOffice on Docker
|
|
288
|
+
|
|
289
|
+
```dockerfile
|
|
290
|
+
RUN apt-get update && apt-get install -y libreoffice-writer
|
|
291
|
+
```
|
|
292
|
+
|
|
293
|
+
## Contributing
|
|
294
|
+
|
|
295
|
+
Bug reports and pull requests are welcome on [GitHub](https://github.com/velocity-labs/docpdf).
|
|
296
|
+
|
|
297
|
+
1. Fork the repo
|
|
298
|
+
2. Create your feature branch (`git checkout -b my-feature`)
|
|
299
|
+
3. Make your changes with tests
|
|
300
|
+
4. Ensure all tests pass (`bundle exec rake test`)
|
|
301
|
+
5. Commit and push
|
|
302
|
+
6. Open a pull request
|
|
303
|
+
|
|
304
|
+
## Testing
|
|
305
|
+
|
|
306
|
+
```bash
|
|
307
|
+
# Install dependencies
|
|
308
|
+
bundle install
|
|
309
|
+
|
|
310
|
+
# Run the full test suite
|
|
311
|
+
bundle exec rake test
|
|
312
|
+
|
|
313
|
+
# Run tests for a specific adapter configuration
|
|
314
|
+
bundle exec appraisal hexapdf-only rake test
|
|
315
|
+
bundle exec appraisal no-adapters rake test
|
|
316
|
+
|
|
317
|
+
# Run all appraisals
|
|
318
|
+
bundle exec appraisal rake test
|
|
319
|
+
```
|
|
320
|
+
|
|
321
|
+
Available appraisals: `all`, `hexapdf-only`, `prawn-combine-pdf`, `rmagick`, `mini-magick`, `no-adapters`.
|
|
322
|
+
|
|
323
|
+
Tests require LibreOffice and ImageMagick installed locally.
|
|
324
|
+
|
|
325
|
+
## License
|
|
326
|
+
|
|
327
|
+
Copyright (c) 2026 Velocity Labs, LLC. Released under the [MIT License](LICENSE.txt).
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
module DocPDF
|
|
2
|
+
module Adapters
|
|
3
|
+
module Converters
|
|
4
|
+
class Base
|
|
5
|
+
MIME_TYPES = [].freeze
|
|
6
|
+
|
|
7
|
+
class << self
|
|
8
|
+
def convert(data, source_filename)
|
|
9
|
+
raise NotImplementedError, "#{name} must implement .convert(data, source_filename)"
|
|
10
|
+
end
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
require_relative "base"
|
|
2
|
+
|
|
3
|
+
module DocPDF
|
|
4
|
+
module Adapters
|
|
5
|
+
module Converters
|
|
6
|
+
# Fallback converter for unrecognized mime types. Tries three strategies
|
|
7
|
+
# in order:
|
|
8
|
+
#
|
|
9
|
+
# 1. If the source filename has a known image extension, detect its mime
|
|
10
|
+
# type and delegate to the appropriate image converter.
|
|
11
|
+
# 2. Attempt conversion via LibreOffice (soffice), which can handle many
|
|
12
|
+
# formats not explicitly registered.
|
|
13
|
+
# 3. If soffice is unavailable or fails, return the raw data unchanged.
|
|
14
|
+
class Fallback < Base
|
|
15
|
+
IMAGE_EXTENSIONS = %w[.heic .heif .webp .jpg .jpeg .png].freeze
|
|
16
|
+
|
|
17
|
+
class << self
|
|
18
|
+
def convert(data, source_filename)
|
|
19
|
+
convert_by_extension(data, source_filename) ||
|
|
20
|
+
convert_with_soffice(data, source_filename) ||
|
|
21
|
+
data
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
private
|
|
25
|
+
|
|
26
|
+
def convert_by_extension(data, source_filename)
|
|
27
|
+
ext = source_filename ? File.extname(source_filename).downcase : nil
|
|
28
|
+
return unless ext && IMAGE_EXTENSIONS.include?(ext)
|
|
29
|
+
|
|
30
|
+
mime_type = MimeDetector.detect(source_filename)
|
|
31
|
+
return unless mime_type
|
|
32
|
+
|
|
33
|
+
ConverterResolver.resolve(mime_type).convert(data, source_filename)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def convert_with_soffice(data, source_filename)
|
|
37
|
+
Soffice.convert(data, source_filename)
|
|
38
|
+
rescue SofficeNotFoundError, ConversionError
|
|
39
|
+
nil
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
require "hexapdf"
|
|
2
|
+
require_relative "base"
|
|
3
|
+
|
|
4
|
+
module DocPDF
|
|
5
|
+
module Adapters
|
|
6
|
+
module Converters
|
|
7
|
+
class Hexapdf < Base
|
|
8
|
+
MIME_TYPES = %w[text/plain].freeze
|
|
9
|
+
|
|
10
|
+
HEXAPDF_PAGE_SIZES = {
|
|
11
|
+
"LETTER" => :Letter, "LEGAL" => :Legal, "TABLOID" => :Tabloid,
|
|
12
|
+
"A0" => :A0, "A1" => :A1, "A2" => :A2, "A3" => :A3, "A4" => :A4,
|
|
13
|
+
"A5" => :A5, "A6" => :A6, "B0" => :B0, "B1" => :B1, "B2" => :B2,
|
|
14
|
+
"B3" => :B3, "B4" => :B4, "B5" => :B5, "B6" => :B6,
|
|
15
|
+
}.freeze
|
|
16
|
+
|
|
17
|
+
class << self
|
|
18
|
+
def convert(data, _source_filename)
|
|
19
|
+
config = DocPDF.configuration
|
|
20
|
+
opts = config.text_options
|
|
21
|
+
content = data.dup.force_encoding("UTF-8")
|
|
22
|
+
page_size = normalize_page_size(config.page_size)
|
|
23
|
+
|
|
24
|
+
doc = HexaPDF::Document.new
|
|
25
|
+
page = doc.pages.add(page_size)
|
|
26
|
+
canvas = page.canvas
|
|
27
|
+
|
|
28
|
+
canvas.font(opts[:font], size: opts[:font_size])
|
|
29
|
+
canvas.fill_color(opts[:color])
|
|
30
|
+
|
|
31
|
+
margins = opts[:margins]
|
|
32
|
+
y = page.box.height - margins[0]
|
|
33
|
+
line_height = opts[:font_size] * 1.4
|
|
34
|
+
|
|
35
|
+
content.each_line do |line|
|
|
36
|
+
if y < margins[2]
|
|
37
|
+
page = doc.pages.add(page_size)
|
|
38
|
+
canvas = page.canvas
|
|
39
|
+
canvas.font(opts[:font], size: opts[:font_size])
|
|
40
|
+
canvas.fill_color(opts[:color])
|
|
41
|
+
y = page.box.height - margins[0]
|
|
42
|
+
end
|
|
43
|
+
canvas.text(line.chomp, at: [margins[3], y])
|
|
44
|
+
y -= line_height
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
write_to_string(doc)
|
|
48
|
+
rescue HexaPDF::Error => e
|
|
49
|
+
raise ConversionError, "HexaPDF failed to render text to PDF: #{e.message}"
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
private
|
|
53
|
+
|
|
54
|
+
def normalize_page_size(size)
|
|
55
|
+
return size if size.is_a?(Symbol)
|
|
56
|
+
|
|
57
|
+
HEXAPDF_PAGE_SIZES[size.to_s.upcase] || size.to_sym
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
def write_to_string(doc)
|
|
61
|
+
io = StringIO.new
|
|
62
|
+
doc.write(io)
|
|
63
|
+
io.string
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
end
|
|
69
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
require "mini_magick"
|
|
2
|
+
require_relative "base"
|
|
3
|
+
|
|
4
|
+
module DocPDF
|
|
5
|
+
module Adapters
|
|
6
|
+
module Converters
|
|
7
|
+
class MiniMagick < Base
|
|
8
|
+
MIME_TYPES = %w[image/jpeg image/png image/heic image/heif image/webp].freeze
|
|
9
|
+
|
|
10
|
+
class << self
|
|
11
|
+
def convert(data, filename)
|
|
12
|
+
img = nil
|
|
13
|
+
Tempfile.create(["docpdf", File.extname(filename || ".tmp")]) do |tempfile|
|
|
14
|
+
tempfile.binmode
|
|
15
|
+
tempfile.write(data)
|
|
16
|
+
tempfile.rewind
|
|
17
|
+
|
|
18
|
+
img = ::MiniMagick::Image.open(tempfile.path)
|
|
19
|
+
img.format("pdf")
|
|
20
|
+
img.to_blob
|
|
21
|
+
end
|
|
22
|
+
rescue ::MiniMagick::Error => e
|
|
23
|
+
raise ConversionError, "MiniMagick failed to convert #{filename || 'image'} to PDF: #{e.message}"
|
|
24
|
+
ensure
|
|
25
|
+
img&.destroy!
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
require_relative "base"
|
|
2
|
+
|
|
3
|
+
module DocPDF
|
|
4
|
+
module Adapters
|
|
5
|
+
module Converters
|
|
6
|
+
class Passthrough < Base
|
|
7
|
+
MIME_TYPES = %w[application/pdf].freeze
|
|
8
|
+
|
|
9
|
+
class << self
|
|
10
|
+
def convert(data, _source_filename)
|
|
11
|
+
data
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
require "prawn"
|
|
2
|
+
require_relative "base"
|
|
3
|
+
|
|
4
|
+
module DocPDF
|
|
5
|
+
module Adapters
|
|
6
|
+
module Converters
|
|
7
|
+
class Prawn < Base
|
|
8
|
+
MIME_TYPES = %w[text/plain].freeze
|
|
9
|
+
|
|
10
|
+
class << self
|
|
11
|
+
def convert(data, _source_filename)
|
|
12
|
+
config = DocPDF.configuration
|
|
13
|
+
opts = config.text_options
|
|
14
|
+
content = data.dup.force_encoding("UTF-8")
|
|
15
|
+
pdf = ::Prawn::Document.new(page_size: config.page_size, margin: opts[:margins])
|
|
16
|
+
pdf.font(opts[:font], size: opts[:font_size])
|
|
17
|
+
pdf.text content, color: opts[:color]
|
|
18
|
+
pdf.render
|
|
19
|
+
rescue ::Prawn::Errors::UnknownFont, ::Prawn::Errors::CannotFit => e
|
|
20
|
+
raise ConversionError, "Prawn failed to render text to PDF: #{e.message}"
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
require "rmagick"
|
|
2
|
+
require_relative "base"
|
|
3
|
+
|
|
4
|
+
module DocPDF
|
|
5
|
+
module Adapters
|
|
6
|
+
module Converters
|
|
7
|
+
class Rmagick < Base
|
|
8
|
+
MIME_TYPES = %w[image/jpeg image/png image/heic image/heif image/webp].freeze
|
|
9
|
+
|
|
10
|
+
class << self
|
|
11
|
+
def convert(data, filename)
|
|
12
|
+
img = nil
|
|
13
|
+
Tempfile.create(["docpdf", File.extname(filename || ".tmp")]) do |tempfile|
|
|
14
|
+
tempfile.binmode
|
|
15
|
+
tempfile.write(data)
|
|
16
|
+
tempfile.rewind
|
|
17
|
+
|
|
18
|
+
img = Magick::Image.read(tempfile.path).first
|
|
19
|
+
img.to_blob { |attrs| attrs.format = "PDF" }
|
|
20
|
+
end
|
|
21
|
+
rescue Magick::ImageMagickError => e
|
|
22
|
+
raise ConversionError, "RMagick failed to convert #{filename || 'image'} to PDF: #{e.message}"
|
|
23
|
+
ensure
|
|
24
|
+
img&.destroy!
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|