hexapdf 0.35.1 → 0.37.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +36 -0
- data/Rakefile +1 -1
- data/lib/hexapdf/configuration.rb +3 -0
- data/lib/hexapdf/content/canvas_composer.rb +1 -1
- data/lib/hexapdf/document/layout.rb +15 -6
- data/lib/hexapdf/document/metadata.rb +488 -0
- data/lib/hexapdf/document.rb +18 -3
- data/lib/hexapdf/filter.rb +2 -2
- data/lib/hexapdf/layout/box.rb +104 -29
- data/lib/hexapdf/layout/container_box.rb +159 -0
- data/lib/hexapdf/layout/frame.rb +7 -4
- data/lib/hexapdf/layout/list_box.rb +2 -1
- data/lib/hexapdf/layout.rb +1 -0
- data/lib/hexapdf/type/annotation.rb +71 -0
- data/lib/hexapdf/type/catalog.rb +1 -1
- data/lib/hexapdf/type/font_simple.rb +2 -1
- data/lib/hexapdf/type/metadata.rb +63 -0
- data/lib/hexapdf/type.rb +1 -0
- data/lib/hexapdf/version.rb +1 -1
- data/test/hexapdf/document/test_layout.rb +6 -0
- data/test/hexapdf/document/test_metadata.rb +192 -0
- data/test/hexapdf/layout/test_box.rb +39 -13
- data/test/hexapdf/layout/test_container_box.rb +84 -0
- data/test/hexapdf/layout/test_frame.rb +3 -2
- data/test/hexapdf/test_filter.rb +12 -0
- data/test/hexapdf/test_writer.rb +2 -2
- data/test/hexapdf/type/test_font_simple.rb +9 -1
- metadata +8 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 464355c84e7dd5636fe34bb0feb96038e06d4e0701d0ce9e915a2a9bddd3c18a
|
4
|
+
data.tar.gz: 8bcebc03512574b0fd71396bbe21330f59cb49e75f2194d1cd47ced0648ef7ad
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ccfbc72734d74178b1eb49da85bcd364410b12abfbb2d2cd730e6a37176dc85c7b87369bd0463869d6815d911e3428a21b0e18e1492f75d75b59c53a68ebf835
|
7
|
+
data.tar.gz: bc7001b82ec40571b6257575923d0fb5ef4c1011ff4067b3b53c40822240328c461c358084f9de8d5271809d9d85b8bd2de91ce219e211cd84154b3156c7c426
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,39 @@
|
|
1
|
+
## 0.37.0 - 2024-01-29
|
2
|
+
|
3
|
+
### Added
|
4
|
+
|
5
|
+
* [HexaPDF::Document::Metadata] for working with metadata (reading the info
|
6
|
+
dictionary and writing it as well as the XMP metadata stream)
|
7
|
+
|
8
|
+
### Changed
|
9
|
+
|
10
|
+
* Minimum Ruby version to be 2.7
|
11
|
+
|
12
|
+
### Fixed
|
13
|
+
|
14
|
+
* [HexaPDF::FiberDoubleForString#length] to not assume a binary string
|
15
|
+
|
16
|
+
|
17
|
+
## 0.36.0 - 2024-01-20
|
18
|
+
|
19
|
+
### Added
|
20
|
+
|
21
|
+
* [HexaPDF::Layout::ContainerBox] for grouping child boxes together
|
22
|
+
|
23
|
+
### Changed
|
24
|
+
|
25
|
+
* [HexaPDF::Layout::Frame::FitResult#draw] to allow drawing at an offset
|
26
|
+
* [HexaPDF::Layout::Box#fit] to delegate the actual content fitting to the
|
27
|
+
`#fit_content` method
|
28
|
+
* [HexaPDF::Document::Layout#box] to allow using the block as drawing block for
|
29
|
+
the base box class
|
30
|
+
|
31
|
+
### Fixed
|
32
|
+
|
33
|
+
* [HexaPDF::Type::FontSimple#to_utf8] to work in case the font's encoding cannot
|
34
|
+
be retrieved
|
35
|
+
|
36
|
+
|
1
37
|
## 0.35.1 - 2024-01-11
|
2
38
|
|
3
39
|
### Added
|
data/Rakefile
CHANGED
@@ -47,7 +47,7 @@ namespace :dev do
|
|
47
47
|
end
|
48
48
|
|
49
49
|
task :test_all do
|
50
|
-
versions = `rbenv versions --bare | grep -i ^2.
|
50
|
+
versions = `rbenv versions --bare | grep -i ^2.7\\\\\\|^3.`.split("\n")
|
51
51
|
versions.each do |version|
|
52
52
|
sh "eval \"$(rbenv init -)\"; rbenv shell #{version} && ruby -v && rake test"
|
53
53
|
end
|
@@ -545,6 +545,7 @@ module HexaPDF
|
|
545
545
|
column: 'HexaPDF::Layout::ColumnBox',
|
546
546
|
list: 'HexaPDF::Layout::ListBox',
|
547
547
|
table: 'HexaPDF::Layout::TableBox',
|
548
|
+
container: 'HexaPDF::Layout::ContainerBox',
|
548
549
|
},
|
549
550
|
'page.default_media_box' => :A4,
|
550
551
|
'page.default_media_orientation' => :portrait,
|
@@ -687,6 +688,7 @@ module HexaPDF
|
|
687
688
|
XXReference: 'HexaPDF::Type::Form::Reference',
|
688
689
|
XXCIDSystemInfo: 'HexaPDF::Type::CIDFont::CIDSystemInfo',
|
689
690
|
Group: 'HexaPDF::Type::Form::Group',
|
691
|
+
Metadata: 'HexaPDF::Type::Metadata',
|
690
692
|
},
|
691
693
|
'object.subtype_map' => {
|
692
694
|
nil => {
|
@@ -705,6 +707,7 @@ module HexaPDF
|
|
705
707
|
Text: 'HexaPDF::Type::Annotations::Text',
|
706
708
|
Link: 'HexaPDF::Type::Annotations::Link',
|
707
709
|
Widget: 'HexaPDF::Type::Annotations::Widget',
|
710
|
+
XML: 'HexaPDF::Type::Metadata'
|
708
711
|
},
|
709
712
|
XObject: {
|
710
713
|
Image: 'HexaPDF::Type::Image',
|
@@ -118,7 +118,7 @@ module HexaPDF
|
|
118
118
|
# composer.list(item_spacing: 2) do |list|
|
119
119
|
# composer.document.config['layout.boxes.map'].each do |name, klass|
|
120
120
|
# list.formatted_text([{text: name.to_s, fill_color: "hp-blue-dark"},
|
121
|
-
# {text: "\n#{klass}"}, font_size:
|
121
|
+
# {text: "\n#{klass}"}], font_size: 6)
|
122
122
|
# end
|
123
123
|
# end
|
124
124
|
# end
|
@@ -238,10 +238,12 @@ module HexaPDF
|
|
238
238
|
#
|
239
239
|
# The +name+ argument refers to the registered name of the box class that is looked up in the
|
240
240
|
# 'layout.boxes.map' configuration option. The +box_options+ are passed as-is to the
|
241
|
-
# initialization method of that box class
|
241
|
+
# initialization method of that box class.
|
242
242
|
#
|
243
243
|
# If a block is provided, a ChildrenCollector is yielded and the collected children are passed
|
244
|
-
# to the box initialization method via the :children keyword argument.
|
244
|
+
# to the box initialization method via the :children keyword argument. There is one exception
|
245
|
+
# to this rule in case +name+ is +base+: The provided block is passed to the initialization
|
246
|
+
# method of the base box class to function as drawing method.
|
245
247
|
#
|
246
248
|
# See #text_box for details on +width+, +height+ and +style+ (note that there is no
|
247
249
|
# +style_properties+ argument).
|
@@ -252,12 +254,19 @@ module HexaPDF
|
|
252
254
|
# layout.box(:column) do |column| # column box with one child
|
253
255
|
# column.lorem_ipsum
|
254
256
|
# end
|
255
|
-
|
256
|
-
|
257
|
-
|
257
|
+
# layout.box(width: 100) do |canvas, box|
|
258
|
+
# canvas.line(0, 0, box.content_width, box.content_height).stroke
|
259
|
+
# end
|
260
|
+
def box(name = :base, width: 0, height: 0, style: nil, **box_options, &block)
|
261
|
+
if block_given?
|
262
|
+
if name == :base
|
263
|
+
box_block = block
|
264
|
+
elsif !box_options.key?(:children)
|
265
|
+
box_options[:children] = ChildrenCollector.collect(self, &block)
|
266
|
+
end
|
258
267
|
end
|
259
268
|
box_class_for_name(name).new(width: width, height: height,
|
260
|
-
style: retrieve_style(style), **box_options)
|
269
|
+
style: retrieve_style(style), **box_options, &box_block)
|
261
270
|
end
|
262
271
|
|
263
272
|
# Creates an array of HexaPDF::Layout::TextFragment objects for the given +text+.
|
@@ -0,0 +1,488 @@
|
|
1
|
+
# -*- encoding: utf-8; frozen_string_literal: true -*-
|
2
|
+
#
|
3
|
+
#--
|
4
|
+
# This file is part of HexaPDF.
|
5
|
+
#
|
6
|
+
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
|
7
|
+
# Copyright (C) 2014-2023 Thomas Leitner
|
8
|
+
#
|
9
|
+
# HexaPDF is free software: you can redistribute it and/or modify it
|
10
|
+
# under the terms of the GNU Affero General Public License version 3 as
|
11
|
+
# published by the Free Software Foundation with the addition of the
|
12
|
+
# following permission added to Section 15 as permitted in Section 7(a):
|
13
|
+
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
|
14
|
+
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
|
15
|
+
# INFRINGEMENT OF THIRD PARTY RIGHTS.
|
16
|
+
#
|
17
|
+
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
|
18
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
|
20
|
+
# License for more details.
|
21
|
+
#
|
22
|
+
# You should have received a copy of the GNU Affero General Public License
|
23
|
+
# along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
|
24
|
+
#
|
25
|
+
# The interactive user interfaces in modified source and object code
|
26
|
+
# versions of HexaPDF must display Appropriate Legal Notices, as required
|
27
|
+
# under Section 5 of the GNU Affero General Public License version 3.
|
28
|
+
#
|
29
|
+
# In accordance with Section 7(b) of the GNU Affero General Public
|
30
|
+
# License, a covered work must retain the producer line in every PDF that
|
31
|
+
# is created or manipulated using HexaPDF.
|
32
|
+
#
|
33
|
+
# If the GNU Affero General Public License doesn't fit your need,
|
34
|
+
# commercial licenses are available at <https://gettalong.at/hexapdf/>.
|
35
|
+
#++
|
36
|
+
|
37
|
+
require 'securerandom'
|
38
|
+
require 'hexapdf/dictionary'
|
39
|
+
require 'hexapdf/error'
|
40
|
+
|
41
|
+
module HexaPDF
|
42
|
+
class Document
|
43
|
+
|
44
|
+
# This class provides methods for reading and writing the document-level metadata.
|
45
|
+
#
|
46
|
+
# When an instance is created (usually through HexaPDF::Document#metadata), the metadata is read
|
47
|
+
# from the document's information dictionary (see HexaPDF::Type::Info) and made available
|
48
|
+
# through the various methods.
|
49
|
+
#
|
50
|
+
# By default, the metadata is written to the information dictionary as well as to the document's
|
51
|
+
# metadata stream (see HexaPDF::Type::Metadata) once the document is written. This can be
|
52
|
+
# controlled via the #write_info_dict and #write_metdata_stream methods.
|
53
|
+
#
|
54
|
+
# While HexaPDF is able to write an XMP packet (using a limited form) to the document's metadata
|
55
|
+
# stream, it provides no way for reading XMP metadata. If reading functionality or extended
|
56
|
+
# writing functionality is needed, make sure this class does not write the metadata and
|
57
|
+
# read/create the metadata stream yourself.
|
58
|
+
#
|
59
|
+
#
|
60
|
+
# == Caveats
|
61
|
+
#
|
62
|
+
# * Disabling writing to the information dictionary will only prevent parts from being written.
|
63
|
+
# The #producer is always written to the information dictionary as per the AGPL license terms.
|
64
|
+
# The #modification_date may be written depending on the arguments to HexaPDF::Document#write.
|
65
|
+
#
|
66
|
+
# * If writing the metadata stream is enabled, any existing metadata stream is completely
|
67
|
+
# overwritten. This means the metadata stream is *not* updated with the changed information.
|
68
|
+
#
|
69
|
+
#
|
70
|
+
# == Adding custom metadata properties
|
71
|
+
#
|
72
|
+
# All the properties specified for the information dictionary are supported.
|
73
|
+
#
|
74
|
+
# Furthermore, HexaPDF supports writing custom properties to the metadata stream. For this to
|
75
|
+
# work the used XMP namespaces need to be registered using #register_namespace. Additionally,
|
76
|
+
# the types of all used XMP properties need to be registered using #register_property.
|
77
|
+
#
|
78
|
+
# The following types for XMP properties are supported:
|
79
|
+
#
|
80
|
+
# String::
|
81
|
+
# Maps to the XMP simple string value. Values need to be of type String.
|
82
|
+
#
|
83
|
+
# Date::
|
84
|
+
# Maps to the XMP simple string value, correctly formatted. Values need to be of type Time,
|
85
|
+
# Date, or DateTime
|
86
|
+
#
|
87
|
+
# URI::
|
88
|
+
# Maps to the XMP simple value variant of URI. Values need to be of type String or URI.
|
89
|
+
#
|
90
|
+
# Boolean::
|
91
|
+
# Maps to the XMP simple string value, correctly formatted. Values need to be either +true+
|
92
|
+
# or +false+.
|
93
|
+
#
|
94
|
+
# OrderedArray::
|
95
|
+
# Maps to the XMP ordered array. Values need to be of type Array and items must be XMP
|
96
|
+
# simple values.
|
97
|
+
#
|
98
|
+
# UnorderedArray::
|
99
|
+
# Maps to the XMP unordered array. Values need to be of type Array and items must be
|
100
|
+
# simple values.
|
101
|
+
#
|
102
|
+
# LanguageArray
|
103
|
+
# Maps to the XMP language alternatives array. Values need to be of type Array and items
|
104
|
+
# must either be strings (they are associated with the set default language) or
|
105
|
+
# LocalizedString instances.
|
106
|
+
#
|
107
|
+
#
|
108
|
+
# See: PDF2.0 s14.3, https://www.adobe.com/products/xmp.html
|
109
|
+
class Metadata
|
110
|
+
|
111
|
+
# Represents a localized XMP string, i.e. as string with an attached language.
|
112
|
+
class LocalizedString < String
|
113
|
+
# The language identifier for the string in RFC3066 format.
|
114
|
+
attr_accessor :language
|
115
|
+
end
|
116
|
+
|
117
|
+
# Contains a mapping of predefined prefixes for XMP namespaces for metadata.
|
118
|
+
PREDEFINED_NAMESPACES = {
|
119
|
+
"rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
|
120
|
+
"xmp" => "http://ns.adobe.com/xap/1.0/",
|
121
|
+
"pdf" => "http://ns.adobe.com/pdf/1.3/",
|
122
|
+
"dc" => "http://purl.org/dc/elements/1.1/",
|
123
|
+
"x" => "adobe:ns:meta/",
|
124
|
+
}.freeze
|
125
|
+
|
126
|
+
# Contains a mapping of predefined XMP properties to their types, i.e. from namespace to
|
127
|
+
# property and then type.
|
128
|
+
PREDEFINED_PROPERTIES = {
|
129
|
+
"http://ns.adobe.com/xap/1.0/" => {
|
130
|
+
'CreatorTool' => 'String',
|
131
|
+
'CreateDate' => 'Date',
|
132
|
+
'ModifyDate' => 'Date',
|
133
|
+
}.freeze,
|
134
|
+
"http://ns.adobe.com/pdf/1.3/" => {
|
135
|
+
'Keywords' => 'String',
|
136
|
+
'Producer' => 'String',
|
137
|
+
'Trapped' => 'Boolean',
|
138
|
+
}.freeze,
|
139
|
+
"http://purl.org/dc/elements/1.1/" => {
|
140
|
+
'creator' => 'OrderedArray',
|
141
|
+
'description' => 'LanguageArray',
|
142
|
+
'title' => 'LanguageArray',
|
143
|
+
}.freeze,
|
144
|
+
}.freeze
|
145
|
+
|
146
|
+
|
147
|
+
# Creates a new Metadata object for the given PDF document.
|
148
|
+
def initialize(document)
|
149
|
+
@document = document
|
150
|
+
@namespaces = PREDEFINED_NAMESPACES.dup
|
151
|
+
@properties = PREDEFINED_PROPERTIES.transform_values {|value| value.dup}
|
152
|
+
@default_language = document.catalog[:Lang] || 'en'
|
153
|
+
@metadata = Hash.new {|h, k| h[k] = {} }
|
154
|
+
write_info_dict(true)
|
155
|
+
write_metadata_stream(true)
|
156
|
+
@document.register_listener(:complete_objects, &method(:write_metadata))
|
157
|
+
parse_metadata
|
158
|
+
end
|
159
|
+
|
160
|
+
# :call-seq:
|
161
|
+
# metadata.default_language -> language
|
162
|
+
# metadata.default_language(value) -> value
|
163
|
+
#
|
164
|
+
# Returns the default language in RFC3066 format used for unlocalized strings if no argument
|
165
|
+
# is given. Otherwise sets the default language to the given language.
|
166
|
+
#
|
167
|
+
# The initial default lanuage is taken from the document catalog's /Lang entry. If that is not
|
168
|
+
# set, the default language is assumed to be English ('en').
|
169
|
+
def default_language(value = :UNSET)
|
170
|
+
if value == :UNSET
|
171
|
+
@default_language
|
172
|
+
else
|
173
|
+
@default_language = value
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
# Returns +true+ if the information dictionary should be written.
|
178
|
+
def write_info_dict?
|
179
|
+
@write_info_dict
|
180
|
+
end
|
181
|
+
|
182
|
+
# Makes HexaPDF write the information dictionary if +value+ is +true+.
|
183
|
+
#
|
184
|
+
# See the class documentation for caveats.
|
185
|
+
def write_info_dict(value)
|
186
|
+
@write_info_dict = value
|
187
|
+
end
|
188
|
+
|
189
|
+
# Returns +true+ if the metadata stream should be written.
|
190
|
+
def write_metadata_stream?
|
191
|
+
@write_metadata_stream
|
192
|
+
end
|
193
|
+
|
194
|
+
# Makes HexaPDF write the metadata stream if +value+ is +true+.
|
195
|
+
#
|
196
|
+
# See the class documentation for caveats.
|
197
|
+
def write_metadata_stream(value)
|
198
|
+
@write_metadata_stream = value
|
199
|
+
end
|
200
|
+
|
201
|
+
# Registers the +prefix+ for the given namespace +uri+.
|
202
|
+
def register_namespace(prefix, uri)
|
203
|
+
@namespaces[prefix] = uri
|
204
|
+
end
|
205
|
+
|
206
|
+
# Returns the namespace URI associated with the given prefix.
|
207
|
+
def namespace(ns)
|
208
|
+
@namespaces.fetch(ns) do
|
209
|
+
raise HexaPDF::Error, "Namespace prefix '#{ns}' not registered"
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
# Registers the +property+ for the namespace specified via +prefix+ as the given +type+.
|
214
|
+
#
|
215
|
+
# The argument +type+ has to be one of the following: 'String', 'Date', 'URI', 'Boolean',
|
216
|
+
# 'OrderedArray', 'UnorderedArray', or 'LanguageArray'.
|
217
|
+
def register_property_type(prefix, property, type)
|
218
|
+
(@properties[namespace(prefix)] ||= {})[property] = type
|
219
|
+
end
|
220
|
+
|
221
|
+
# :call-seq:
|
222
|
+
# metadata.property(ns_prefix, name) -> property_value
|
223
|
+
# metadata.property(ns_prefix, name, value) -> value
|
224
|
+
#
|
225
|
+
# Returns the value for the property specified via the namespace prefix +ns_prefix+ and +name+
|
226
|
+
# if the +value+ argument is not provided. Otherwise sets the property to +value+.
|
227
|
+
#
|
228
|
+
# The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
|
229
|
+
# property is deleted from the metadata.
|
230
|
+
def property(ns, property, value = :UNSET)
|
231
|
+
ns = @metadata[namespace(ns)]
|
232
|
+
if value == :UNSET
|
233
|
+
ns[property]
|
234
|
+
elsif value.nil?
|
235
|
+
ns.delete(property)
|
236
|
+
else
|
237
|
+
ns[property] = value
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
241
|
+
# :call-seq:
|
242
|
+
# metadata.title -> title or nil
|
243
|
+
# metadata.title(value -> value
|
244
|
+
#
|
245
|
+
# Returns the document's title if no argument is given. Otherwise sets the document's title to
|
246
|
+
# the given value.
|
247
|
+
#
|
248
|
+
# The language for the title is specified via #default_language.
|
249
|
+
#
|
250
|
+
# The value +nil+ is returned if the property is not set. And by using +nil+ as +value+ the
|
251
|
+
# property is deleted from the metadata.
|
252
|
+
#
|
253
|
+
# This metadata property is represented by the XMP name dc:title.
|
254
|
+
def title(value = :UNSET)
|
255
|
+
property('dc', 'title', value)
|
256
|
+
end
|
257
|
+
|
258
|
+
# :call-seq:
|
259
|
+
# metadata.author -> author or nil
|
260
|
+
# metadata.author(value) -> value
|
261
|
+
#
|
262
|
+
# Returns the name of the person who created the document (author) if no argument is given.
|
263
|
+
# Otherwise sets the author to the given value.
|
264
|
+
#
|
265
|
+
# The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
|
266
|
+
# property is deleted from the metadata.
|
267
|
+
#
|
268
|
+
# This metadata property is represented by the XMP name dc:creator.
|
269
|
+
def author(value = :UNSET)
|
270
|
+
property('dc', 'creator', value)
|
271
|
+
end
|
272
|
+
|
273
|
+
# :call-seq:
|
274
|
+
# metadata.subject -> subject or nil
|
275
|
+
# metadata.subject(value) -> value
|
276
|
+
#
|
277
|
+
# Returns the subject of the document if no argument is given. Otherwise sets the subject to
|
278
|
+
# the given value.
|
279
|
+
#
|
280
|
+
# The language for the subject is specified via #default_language.
|
281
|
+
#
|
282
|
+
# The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
|
283
|
+
# property is deleted from the metadata.
|
284
|
+
#
|
285
|
+
# This metadata property is represented by the XMP name dc:description.
|
286
|
+
def subject(value = :UNSET)
|
287
|
+
property('dc', 'description', value)
|
288
|
+
end
|
289
|
+
|
290
|
+
# :call-seq:
|
291
|
+
# metadata.keywords -> keywords or nil
|
292
|
+
# metadata.keywords(value) -> value
|
293
|
+
#
|
294
|
+
# Returns the keywords associated with the document if no argument is given. Otherwise sets
|
295
|
+
# keywords to the given value.
|
296
|
+
#
|
297
|
+
# The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
|
298
|
+
# property is deleted from the metadata.
|
299
|
+
#
|
300
|
+
# This metadata property is represented by the XMP name pdf:Keywords.
|
301
|
+
def keywords(value = :UNSET)
|
302
|
+
property('pdf', 'Keywords', value)
|
303
|
+
end
|
304
|
+
|
305
|
+
# :call-seq:
|
306
|
+
# metadata.creator -> creator or nil
|
307
|
+
# metadata.creator(value) -> value
|
308
|
+
#
|
309
|
+
# Returns the name of the PDF processor that created the original document from which this PDF
|
310
|
+
# was converted if no argument is given. Otherwise sets the name of the creator tool to the
|
311
|
+
# given value.
|
312
|
+
#
|
313
|
+
# The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
|
314
|
+
# property is deleted from the metadata.
|
315
|
+
#
|
316
|
+
# This metadata property is represented by the XMP name xmp:CreatorTool.
|
317
|
+
def creator(value = :UNSET)
|
318
|
+
property('xmp', 'CreatorTool', value)
|
319
|
+
end
|
320
|
+
|
321
|
+
# :call-seq:
|
322
|
+
# metadata.producer -> producer or nil
|
323
|
+
# metadata.producer(value) -> value
|
324
|
+
#
|
325
|
+
# Returns the name of the PDF processor that converted the original document to PDF if no
|
326
|
+
# argument is given. Otherwise sets the name of the producer to the given value.
|
327
|
+
#
|
328
|
+
# The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
|
329
|
+
# property is deleted from the metadata.
|
330
|
+
#
|
331
|
+
# This metadata property is represented by the XMP name pdf:Producer.
|
332
|
+
def producer(value = :UNSET)
|
333
|
+
property('pdf', 'Producer', value)
|
334
|
+
end
|
335
|
+
|
336
|
+
# :call-seq:
|
337
|
+
# metadata.creation_date -> creation_date or nil
|
338
|
+
# metadata.creation_date(value) -> value
|
339
|
+
#
|
340
|
+
# Returns the date and time (a Time object) the document was created if no argument is given.
|
341
|
+
# Otherwise sets the creation date to the given value.
|
342
|
+
#
|
343
|
+
# The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
|
344
|
+
# property is deleted from the metadata.
|
345
|
+
#
|
346
|
+
# This metadata property is represented by the XMP name xmp:CreateDate.
|
347
|
+
def creation_date(value = :UNSET)
|
348
|
+
property('xmp', 'CreateDate', value)
|
349
|
+
end
|
350
|
+
|
351
|
+
# :call-seq:
|
352
|
+
# metadata.modification_date -> modification_date or nil
|
353
|
+
# metadata.modification_date(value) -> value
|
354
|
+
#
|
355
|
+
# Returns the date and time (a Time object) the document was most recently modified if no
|
356
|
+
# argument is given. Otherwise sets the modification date to the given value.
|
357
|
+
#
|
358
|
+
# The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
|
359
|
+
# property is deleted from the metadata.
|
360
|
+
#
|
361
|
+
# This metadata property is represented by the XMP name xmp:ModifyDate.
|
362
|
+
def modification_date(value = :UNSET)
|
363
|
+
property('xmp', 'ModifyDate', value)
|
364
|
+
end
|
365
|
+
|
366
|
+
# :call-seq:
|
367
|
+
# metadata.trapped -> trapped or nil
|
368
|
+
# metadata.trapped(value) -> value
|
369
|
+
#
|
370
|
+
# Returns +true+ if the document has been modified to include trapping information if no
|
371
|
+
# argument is given. Otherwise sets the trapped status to the given boolean value.
|
372
|
+
#
|
373
|
+
# The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
|
374
|
+
# property is deleted from the metadata.
|
375
|
+
#
|
376
|
+
# This metadata property is represented by the XMP name pdf:Trapped.
|
377
|
+
def trapped(value = :UNSET)
|
378
|
+
property('pdf', 'Trapped', value)
|
379
|
+
end
|
380
|
+
|
381
|
+
private
|
382
|
+
|
383
|
+
# Parses the metadata from the information dictionary into the internal data structure.
|
384
|
+
def parse_metadata
|
385
|
+
info_dict = @document.trailer.info
|
386
|
+
ns_dc = namespace('dc')
|
387
|
+
ns_xmp = namespace('xmp')
|
388
|
+
ns_pdf = namespace('pdf')
|
389
|
+
@metadata[ns_dc]['title'] = info_dict[:Title] if info_dict.key?(:Title)
|
390
|
+
@metadata[ns_dc]['creator'] = info_dict[:Author] if info_dict.key?(:Author)
|
391
|
+
@metadata[ns_dc]['description'] = info_dict[:Subject] if info_dict.key?(:Subject)
|
392
|
+
@metadata[ns_xmp]['CreatorTool'] = info_dict[:Creator] if info_dict.key?(:Creator)
|
393
|
+
@metadata[ns_xmp]['CreateDate'] = info_dict[:CreationDate] if info_dict.key?(:CreationDate)
|
394
|
+
@metadata[ns_xmp]['ModifyDate'] = info_dict[:ModDate] if info_dict.key?(:ModDate)
|
395
|
+
@metadata[ns_pdf]['Keywords'] = info_dict[:Keywords] if info_dict.key?(:Keywords)
|
396
|
+
@metadata[ns_pdf]['Producer'] = info_dict[:Producer] if info_dict.key?(:Producer)
|
397
|
+
if info_dict.key?(:Trapped) && info_dict[:Trapped] != :Unknown
|
398
|
+
@metadata[ns_pdf]['Trapped'] = (info_dict[:Trapped] == :True)
|
399
|
+
end
|
400
|
+
end
|
401
|
+
|
402
|
+
# Writes the metadata to the specified destinations.
|
403
|
+
def write_metadata
|
404
|
+
ns_dc = namespace('dc')
|
405
|
+
ns_xmp = namespace('xmp')
|
406
|
+
ns_pdf = namespace('pdf')
|
407
|
+
|
408
|
+
if write_info_dict?
|
409
|
+
info_dict = @document.trailer.info
|
410
|
+
info_dict[:Title] = Array(@metadata[ns_dc]['title']).first
|
411
|
+
info_dict[:Author] = Array(@metadata[ns_dc]['creator']).join(', ')
|
412
|
+
info_dict[:Subject] = Array(@metadata[ns_dc]['description']).first
|
413
|
+
info_dict[:Creator] = @metadata[ns_xmp]['CreatorTool']
|
414
|
+
info_dict[:CreationDate] = @metadata[ns_xmp]['CreateDate']
|
415
|
+
info_dict[:ModDate] = @metadata[ns_xmp]['ModifyDate']
|
416
|
+
info_dict[:Keywords] = @metadata[ns_pdf]['Keywords']
|
417
|
+
info_dict[:Producer] = @metadata[ns_pdf]['Producer']
|
418
|
+
info_dict[:Trapped] = @metadata[ns_pdf]['Trapped'] ? :True : :False
|
419
|
+
end
|
420
|
+
|
421
|
+
if write_metadata_stream?
|
422
|
+
descriptions = @metadata.map do |namespace, values|
|
423
|
+
xmp_description(@namespaces.key(namespace), values)
|
424
|
+
end.join("\n")
|
425
|
+
obj = @document.catalog[:Metadata] ||= @document.add({Type: :Metadata, Subtype: :XML})
|
426
|
+
obj.stream = xmp_packet(descriptions)
|
427
|
+
end
|
428
|
+
end
|
429
|
+
|
430
|
+
# Creates an XMP packet with the given payload +data+.
|
431
|
+
def xmp_packet(data)
|
432
|
+
<<~XMP
|
433
|
+
<?xpacket begin="\u{FEFF}" id="#{SecureRandom.uuid.tr('-', '')}"?>
|
434
|
+
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
435
|
+
#{data}
|
436
|
+
</rdf:RDF>
|
437
|
+
<?xpacket end="r"?>
|
438
|
+
XMP
|
439
|
+
end
|
440
|
+
|
441
|
+
# Creates an 'rdf:Description' element for all metadata +values+ with the given +ns_prefix+.
|
442
|
+
def xmp_description(ns_prefix, values)
|
443
|
+
values = values.map do |name, value|
|
444
|
+
str = +"<#{ns_prefix}:#{name}"
|
445
|
+
case (property_type = @properties[namespace(ns_prefix)][name])
|
446
|
+
when 'String'
|
447
|
+
str << ">#{xmp_escape(value)}</#{ns_prefix}:#{name}>"
|
448
|
+
when 'Date'
|
449
|
+
str << ">#{xmp_date(value)}</#{ns_prefix}:#{name}>"
|
450
|
+
when 'URI'
|
451
|
+
str << " rdf:resource=\"#{xmp_escape(value.to_s)}\" />"
|
452
|
+
when 'Boolean'
|
453
|
+
str << ">#{value ? 'True' : 'False'}</#{ns_prefix}:#{name}>"
|
454
|
+
when 'LanguageArray'
|
455
|
+
value = Array(value).map do |item|
|
456
|
+
lang = item.respond_to?(:language) ? item.language : default_language
|
457
|
+
"<rdf:li xml:lang=\"#{lang}\">#{xmp_escape(item)}</rdf:li>"
|
458
|
+
end.join("\n")
|
459
|
+
str << "><rdf:Alt>\n#{value}\n</rdf:Alt></#{ns_prefix}:#{name}>"
|
460
|
+
when 'OrderedArray', 'UnorderedArray'
|
461
|
+
value = Array(value).map {|item| "<rdf:li>#{xmp_escape(item)}</rdf:li>" }.join("\n")
|
462
|
+
el_type = (property_type == 'OrderedArray' ? 'Seq' : 'Bag')
|
463
|
+
str << "><rdf:#{el_type}>\n#{value}\n</rdf:#{el_type}></#{ns_prefix}:#{name}>"
|
464
|
+
end
|
465
|
+
str
|
466
|
+
end.join("\n")
|
467
|
+
<<~XMP.strip
|
468
|
+
<rdf:Description rdf:about="" xmlns:#{ns_prefix}="#{xmp_escape(namespace(ns_prefix))}">
|
469
|
+
#{values}
|
470
|
+
</rdf:Description>
|
471
|
+
XMP
|
472
|
+
end
|
473
|
+
|
474
|
+
# Escapes the given value so as to be usable as XMP simple value.
|
475
|
+
def xmp_escape(value)
|
476
|
+
value.gsub(/<|>|"/, {'<' => '<', '>' => '>', '"' => '"'})
|
477
|
+
end
|
478
|
+
|
479
|
+
# Formats the given date-time object (Time, Date, or DateTime) to be a valid XMP date-time
|
480
|
+
# value.
|
481
|
+
def xmp_date(date)
|
482
|
+
date.strftime("%Y-%m-%dT%H:%M:%S%:z")
|
483
|
+
end
|
484
|
+
|
485
|
+
end
|
486
|
+
|
487
|
+
end
|
488
|
+
end
|
data/lib/hexapdf/document.rb
CHANGED
@@ -120,6 +120,7 @@ module HexaPDF
|
|
120
120
|
autoload(:Files, 'hexapdf/document/files')
|
121
121
|
autoload(:Destinations, 'hexapdf/document/destinations')
|
122
122
|
autoload(:Layout, 'hexapdf/document/layout')
|
123
|
+
autoload(:Metadata, 'hexapdf/document/metadata')
|
123
124
|
|
124
125
|
# :call-seq:
|
125
126
|
# Document.open(filename, **docargs) -> doc
|
@@ -486,6 +487,16 @@ module HexaPDF
|
|
486
487
|
pdf_data ? @cache[pdf_data].clear : @cache.clear
|
487
488
|
end
|
488
489
|
|
490
|
+
# Returns the Metadata object that provides a convenience interface for working with the
|
491
|
+
# document metadata.
|
492
|
+
#
|
493
|
+
# Note that invoking this method means that, depending on the settings, the info dictionary as
|
494
|
+
# well as the metadata stream will be overwritten when the document gets written. See the
|
495
|
+
# "Caveats" section in the Metadata documentation.
|
496
|
+
def metadata
|
497
|
+
@metadata ||= Metadata.new(self)
|
498
|
+
end
|
499
|
+
|
489
500
|
# Returns the Pages object that provides convenience methods for working with the pages of the
|
490
501
|
# PDF file.
|
491
502
|
#
|
@@ -706,13 +717,17 @@ module HexaPDF
|
|
706
717
|
# Optimize the file size by using object and cross-reference streams. This will raise the PDF
|
707
718
|
# version to at least 1.5.
|
708
719
|
def write(file_or_io, incremental: false, validate: true, update_fields: true, optimize: false)
|
709
|
-
dispatch_message(:complete_objects)
|
710
|
-
|
711
720
|
if update_fields
|
712
721
|
trailer.update_id
|
713
|
-
|
722
|
+
if @metadata
|
723
|
+
metadata.modification_date(Time.now)
|
724
|
+
else
|
725
|
+
trailer.info[:ModDate] = Time.now
|
726
|
+
end
|
714
727
|
end
|
715
728
|
|
729
|
+
dispatch_message(:complete_objects)
|
730
|
+
|
716
731
|
if validate
|
717
732
|
self.validate(auto_correct: true) do |msg, correctable, obj|
|
718
733
|
next if correctable
|
data/lib/hexapdf/filter.rb
CHANGED
@@ -69,11 +69,11 @@ module HexaPDF
|
|
69
69
|
@block_used = false
|
70
70
|
end
|
71
71
|
|
72
|
-
# Returns the length of the wrapped string.
|
72
|
+
# Returns the length in bytes of the wrapped string.
|
73
73
|
#
|
74
74
|
# May only be called before #resume!
|
75
75
|
def length
|
76
|
-
str.
|
76
|
+
str.bytesize
|
77
77
|
end
|
78
78
|
|
79
79
|
# Returns +true+ if #resume has not yet been called.
|