hexapdf 0.35.1 → 0.37.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +36 -0
- data/Rakefile +1 -1
- data/lib/hexapdf/configuration.rb +3 -0
- data/lib/hexapdf/content/canvas_composer.rb +1 -1
- data/lib/hexapdf/document/layout.rb +15 -6
- data/lib/hexapdf/document/metadata.rb +488 -0
- data/lib/hexapdf/document.rb +18 -3
- data/lib/hexapdf/filter.rb +2 -2
- data/lib/hexapdf/layout/box.rb +104 -29
- data/lib/hexapdf/layout/container_box.rb +159 -0
- data/lib/hexapdf/layout/frame.rb +7 -4
- data/lib/hexapdf/layout/list_box.rb +2 -1
- data/lib/hexapdf/layout.rb +1 -0
- data/lib/hexapdf/type/annotation.rb +71 -0
- data/lib/hexapdf/type/catalog.rb +1 -1
- data/lib/hexapdf/type/font_simple.rb +2 -1
- data/lib/hexapdf/type/metadata.rb +63 -0
- data/lib/hexapdf/type.rb +1 -0
- data/lib/hexapdf/version.rb +1 -1
- data/test/hexapdf/document/test_layout.rb +6 -0
- data/test/hexapdf/document/test_metadata.rb +192 -0
- data/test/hexapdf/layout/test_box.rb +39 -13
- data/test/hexapdf/layout/test_container_box.rb +84 -0
- data/test/hexapdf/layout/test_frame.rb +3 -2
- data/test/hexapdf/test_filter.rb +12 -0
- data/test/hexapdf/test_writer.rb +2 -2
- data/test/hexapdf/type/test_font_simple.rb +9 -1
- metadata +8 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 464355c84e7dd5636fe34bb0feb96038e06d4e0701d0ce9e915a2a9bddd3c18a
|
4
|
+
data.tar.gz: 8bcebc03512574b0fd71396bbe21330f59cb49e75f2194d1cd47ced0648ef7ad
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ccfbc72734d74178b1eb49da85bcd364410b12abfbb2d2cd730e6a37176dc85c7b87369bd0463869d6815d911e3428a21b0e18e1492f75d75b59c53a68ebf835
|
7
|
+
data.tar.gz: bc7001b82ec40571b6257575923d0fb5ef4c1011ff4067b3b53c40822240328c461c358084f9de8d5271809d9d85b8bd2de91ce219e211cd84154b3156c7c426
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,39 @@
|
|
1
|
+
## 0.37.0 - 2024-01-29
|
2
|
+
|
3
|
+
### Added
|
4
|
+
|
5
|
+
* [HexaPDF::Document::Metadata] for working with metadata (reading the info
|
6
|
+
dictionary and writing it as well as the XMP metadata stream)
|
7
|
+
|
8
|
+
### Changed
|
9
|
+
|
10
|
+
* Minimum Ruby version to be 2.7
|
11
|
+
|
12
|
+
### Fixed
|
13
|
+
|
14
|
+
* [HexaPDF::FiberDoubleForString#length] to not assume a binary string
|
15
|
+
|
16
|
+
|
17
|
+
## 0.36.0 - 2024-01-20
|
18
|
+
|
19
|
+
### Added
|
20
|
+
|
21
|
+
* [HexaPDF::Layout::ContainerBox] for grouping child boxes together
|
22
|
+
|
23
|
+
### Changed
|
24
|
+
|
25
|
+
* [HexaPDF::Layout::Frame::FitResult#draw] to allow drawing at an offset
|
26
|
+
* [HexaPDF::Layout::Box#fit] to delegate the actual content fitting to the
|
27
|
+
`#fit_content` method
|
28
|
+
* [HexaPDF::Document::Layout#box] to allow using the block as drawing block for
|
29
|
+
the base box class
|
30
|
+
|
31
|
+
### Fixed
|
32
|
+
|
33
|
+
* [HexaPDF::Type::FontSimple#to_utf8] to work in case the font's encoding cannot
|
34
|
+
be retrieved
|
35
|
+
|
36
|
+
|
1
37
|
## 0.35.1 - 2024-01-11
|
2
38
|
|
3
39
|
### Added
|
data/Rakefile
CHANGED
@@ -47,7 +47,7 @@ namespace :dev do
|
|
47
47
|
end
|
48
48
|
|
49
49
|
task :test_all do
|
50
|
-
versions = `rbenv versions --bare | grep -i ^2.
|
50
|
+
versions = `rbenv versions --bare | grep -i ^2.7\\\\\\|^3.`.split("\n")
|
51
51
|
versions.each do |version|
|
52
52
|
sh "eval \"$(rbenv init -)\"; rbenv shell #{version} && ruby -v && rake test"
|
53
53
|
end
|
@@ -545,6 +545,7 @@ module HexaPDF
|
|
545
545
|
column: 'HexaPDF::Layout::ColumnBox',
|
546
546
|
list: 'HexaPDF::Layout::ListBox',
|
547
547
|
table: 'HexaPDF::Layout::TableBox',
|
548
|
+
container: 'HexaPDF::Layout::ContainerBox',
|
548
549
|
},
|
549
550
|
'page.default_media_box' => :A4,
|
550
551
|
'page.default_media_orientation' => :portrait,
|
@@ -687,6 +688,7 @@ module HexaPDF
|
|
687
688
|
XXReference: 'HexaPDF::Type::Form::Reference',
|
688
689
|
XXCIDSystemInfo: 'HexaPDF::Type::CIDFont::CIDSystemInfo',
|
689
690
|
Group: 'HexaPDF::Type::Form::Group',
|
691
|
+
Metadata: 'HexaPDF::Type::Metadata',
|
690
692
|
},
|
691
693
|
'object.subtype_map' => {
|
692
694
|
nil => {
|
@@ -705,6 +707,7 @@ module HexaPDF
|
|
705
707
|
Text: 'HexaPDF::Type::Annotations::Text',
|
706
708
|
Link: 'HexaPDF::Type::Annotations::Link',
|
707
709
|
Widget: 'HexaPDF::Type::Annotations::Widget',
|
710
|
+
XML: 'HexaPDF::Type::Metadata'
|
708
711
|
},
|
709
712
|
XObject: {
|
710
713
|
Image: 'HexaPDF::Type::Image',
|
@@ -118,7 +118,7 @@ module HexaPDF
|
|
118
118
|
# composer.list(item_spacing: 2) do |list|
|
119
119
|
# composer.document.config['layout.boxes.map'].each do |name, klass|
|
120
120
|
# list.formatted_text([{text: name.to_s, fill_color: "hp-blue-dark"},
|
121
|
-
# {text: "\n#{klass}"}, font_size:
|
121
|
+
# {text: "\n#{klass}"}], font_size: 6)
|
122
122
|
# end
|
123
123
|
# end
|
124
124
|
# end
|
@@ -238,10 +238,12 @@ module HexaPDF
|
|
238
238
|
#
|
239
239
|
# The +name+ argument refers to the registered name of the box class that is looked up in the
|
240
240
|
# 'layout.boxes.map' configuration option. The +box_options+ are passed as-is to the
|
241
|
-
# initialization method of that box class
|
241
|
+
# initialization method of that box class.
|
242
242
|
#
|
243
243
|
# If a block is provided, a ChildrenCollector is yielded and the collected children are passed
|
244
|
-
# to the box initialization method via the :children keyword argument.
|
244
|
+
# to the box initialization method via the :children keyword argument. There is one exception
|
245
|
+
# to this rule in case +name+ is +base+: The provided block is passed to the initialization
|
246
|
+
# method of the base box class to function as drawing method.
|
245
247
|
#
|
246
248
|
# See #text_box for details on +width+, +height+ and +style+ (note that there is no
|
247
249
|
# +style_properties+ argument).
|
@@ -252,12 +254,19 @@ module HexaPDF
|
|
252
254
|
# layout.box(:column) do |column| # column box with one child
|
253
255
|
# column.lorem_ipsum
|
254
256
|
# end
|
255
|
-
|
256
|
-
|
257
|
-
|
257
|
+
# layout.box(width: 100) do |canvas, box|
|
258
|
+
# canvas.line(0, 0, box.content_width, box.content_height).stroke
|
259
|
+
# end
|
260
|
+
def box(name = :base, width: 0, height: 0, style: nil, **box_options, &block)
|
261
|
+
if block_given?
|
262
|
+
if name == :base
|
263
|
+
box_block = block
|
264
|
+
elsif !box_options.key?(:children)
|
265
|
+
box_options[:children] = ChildrenCollector.collect(self, &block)
|
266
|
+
end
|
258
267
|
end
|
259
268
|
box_class_for_name(name).new(width: width, height: height,
|
260
|
-
style: retrieve_style(style), **box_options)
|
269
|
+
style: retrieve_style(style), **box_options, &box_block)
|
261
270
|
end
|
262
271
|
|
263
272
|
# Creates an array of HexaPDF::Layout::TextFragment objects for the given +text+.
|
@@ -0,0 +1,488 @@
|
|
1
|
+
# -*- encoding: utf-8; frozen_string_literal: true -*-
|
2
|
+
#
|
3
|
+
#--
|
4
|
+
# This file is part of HexaPDF.
|
5
|
+
#
|
6
|
+
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
|
7
|
+
# Copyright (C) 2014-2023 Thomas Leitner
|
8
|
+
#
|
9
|
+
# HexaPDF is free software: you can redistribute it and/or modify it
|
10
|
+
# under the terms of the GNU Affero General Public License version 3 as
|
11
|
+
# published by the Free Software Foundation with the addition of the
|
12
|
+
# following permission added to Section 15 as permitted in Section 7(a):
|
13
|
+
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
|
14
|
+
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
|
15
|
+
# INFRINGEMENT OF THIRD PARTY RIGHTS.
|
16
|
+
#
|
17
|
+
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
|
18
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
|
20
|
+
# License for more details.
|
21
|
+
#
|
22
|
+
# You should have received a copy of the GNU Affero General Public License
|
23
|
+
# along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
|
24
|
+
#
|
25
|
+
# The interactive user interfaces in modified source and object code
|
26
|
+
# versions of HexaPDF must display Appropriate Legal Notices, as required
|
27
|
+
# under Section 5 of the GNU Affero General Public License version 3.
|
28
|
+
#
|
29
|
+
# In accordance with Section 7(b) of the GNU Affero General Public
|
30
|
+
# License, a covered work must retain the producer line in every PDF that
|
31
|
+
# is created or manipulated using HexaPDF.
|
32
|
+
#
|
33
|
+
# If the GNU Affero General Public License doesn't fit your need,
|
34
|
+
# commercial licenses are available at <https://gettalong.at/hexapdf/>.
|
35
|
+
#++
|
36
|
+
|
37
|
+
require 'securerandom'
|
38
|
+
require 'hexapdf/dictionary'
|
39
|
+
require 'hexapdf/error'
|
40
|
+
|
41
|
+
module HexaPDF
|
42
|
+
class Document
|
43
|
+
|
44
|
+
# This class provides methods for reading and writing the document-level metadata.
|
45
|
+
#
|
46
|
+
# When an instance is created (usually through HexaPDF::Document#metadata), the metadata is read
|
47
|
+
# from the document's information dictionary (see HexaPDF::Type::Info) and made available
|
48
|
+
# through the various methods.
|
49
|
+
#
|
50
|
+
# By default, the metadata is written to the information dictionary as well as to the document's
|
51
|
+
# metadata stream (see HexaPDF::Type::Metadata) once the document is written. This can be
|
52
|
+
# controlled via the #write_info_dict and #write_metdata_stream methods.
|
53
|
+
#
|
54
|
+
# While HexaPDF is able to write an XMP packet (using a limited form) to the document's metadata
|
55
|
+
# stream, it provides no way for reading XMP metadata. If reading functionality or extended
|
56
|
+
# writing functionality is needed, make sure this class does not write the metadata and
|
57
|
+
# read/create the metadata stream yourself.
|
58
|
+
#
|
59
|
+
#
|
60
|
+
# == Caveats
|
61
|
+
#
|
62
|
+
# * Disabling writing to the information dictionary will only prevent parts from being written.
|
63
|
+
# The #producer is always written to the information dictionary as per the AGPL license terms.
|
64
|
+
# The #modification_date may be written depending on the arguments to HexaPDF::Document#write.
|
65
|
+
#
|
66
|
+
# * If writing the metadata stream is enabled, any existing metadata stream is completely
|
67
|
+
# overwritten. This means the metadata stream is *not* updated with the changed information.
|
68
|
+
#
|
69
|
+
#
|
70
|
+
# == Adding custom metadata properties
|
71
|
+
#
|
72
|
+
# All the properties specified for the information dictionary are supported.
|
73
|
+
#
|
74
|
+
# Furthermore, HexaPDF supports writing custom properties to the metadata stream. For this to
|
75
|
+
# work the used XMP namespaces need to be registered using #register_namespace. Additionally,
|
76
|
+
# the types of all used XMP properties need to be registered using #register_property.
|
77
|
+
#
|
78
|
+
# The following types for XMP properties are supported:
|
79
|
+
#
|
80
|
+
# String::
|
81
|
+
# Maps to the XMP simple string value. Values need to be of type String.
|
82
|
+
#
|
83
|
+
# Date::
|
84
|
+
# Maps to the XMP simple string value, correctly formatted. Values need to be of type Time,
|
85
|
+
# Date, or DateTime
|
86
|
+
#
|
87
|
+
# URI::
|
88
|
+
# Maps to the XMP simple value variant of URI. Values need to be of type String or URI.
|
89
|
+
#
|
90
|
+
# Boolean::
|
91
|
+
# Maps to the XMP simple string value, correctly formatted. Values need to be either +true+
|
92
|
+
# or +false+.
|
93
|
+
#
|
94
|
+
# OrderedArray::
|
95
|
+
# Maps to the XMP ordered array. Values need to be of type Array and items must be XMP
|
96
|
+
# simple values.
|
97
|
+
#
|
98
|
+
# UnorderedArray::
|
99
|
+
# Maps to the XMP unordered array. Values need to be of type Array and items must be
|
100
|
+
# simple values.
|
101
|
+
#
|
102
|
+
# LanguageArray
|
103
|
+
# Maps to the XMP language alternatives array. Values need to be of type Array and items
|
104
|
+
# must either be strings (they are associated with the set default language) or
|
105
|
+
# LocalizedString instances.
|
106
|
+
#
|
107
|
+
#
|
108
|
+
# See: PDF2.0 s14.3, https://www.adobe.com/products/xmp.html
|
109
|
+
class Metadata
|
110
|
+
|
111
|
+
# Represents a localized XMP string, i.e. as string with an attached language.
|
112
|
+
class LocalizedString < String
|
113
|
+
# The language identifier for the string in RFC3066 format.
|
114
|
+
attr_accessor :language
|
115
|
+
end
|
116
|
+
|
117
|
+
# Contains a mapping of predefined prefixes for XMP namespaces for metadata.
|
118
|
+
PREDEFINED_NAMESPACES = {
|
119
|
+
"rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
|
120
|
+
"xmp" => "http://ns.adobe.com/xap/1.0/",
|
121
|
+
"pdf" => "http://ns.adobe.com/pdf/1.3/",
|
122
|
+
"dc" => "http://purl.org/dc/elements/1.1/",
|
123
|
+
"x" => "adobe:ns:meta/",
|
124
|
+
}.freeze
|
125
|
+
|
126
|
+
# Contains a mapping of predefined XMP properties to their types, i.e. from namespace to
|
127
|
+
# property and then type.
|
128
|
+
PREDEFINED_PROPERTIES = {
|
129
|
+
"http://ns.adobe.com/xap/1.0/" => {
|
130
|
+
'CreatorTool' => 'String',
|
131
|
+
'CreateDate' => 'Date',
|
132
|
+
'ModifyDate' => 'Date',
|
133
|
+
}.freeze,
|
134
|
+
"http://ns.adobe.com/pdf/1.3/" => {
|
135
|
+
'Keywords' => 'String',
|
136
|
+
'Producer' => 'String',
|
137
|
+
'Trapped' => 'Boolean',
|
138
|
+
}.freeze,
|
139
|
+
"http://purl.org/dc/elements/1.1/" => {
|
140
|
+
'creator' => 'OrderedArray',
|
141
|
+
'description' => 'LanguageArray',
|
142
|
+
'title' => 'LanguageArray',
|
143
|
+
}.freeze,
|
144
|
+
}.freeze
|
145
|
+
|
146
|
+
|
147
|
+
# Creates a new Metadata object for the given PDF document.
|
148
|
+
def initialize(document)
|
149
|
+
@document = document
|
150
|
+
@namespaces = PREDEFINED_NAMESPACES.dup
|
151
|
+
@properties = PREDEFINED_PROPERTIES.transform_values {|value| value.dup}
|
152
|
+
@default_language = document.catalog[:Lang] || 'en'
|
153
|
+
@metadata = Hash.new {|h, k| h[k] = {} }
|
154
|
+
write_info_dict(true)
|
155
|
+
write_metadata_stream(true)
|
156
|
+
@document.register_listener(:complete_objects, &method(:write_metadata))
|
157
|
+
parse_metadata
|
158
|
+
end
|
159
|
+
|
160
|
+
# :call-seq:
|
161
|
+
# metadata.default_language -> language
|
162
|
+
# metadata.default_language(value) -> value
|
163
|
+
#
|
164
|
+
# Returns the default language in RFC3066 format used for unlocalized strings if no argument
|
165
|
+
# is given. Otherwise sets the default language to the given language.
|
166
|
+
#
|
167
|
+
# The initial default lanuage is taken from the document catalog's /Lang entry. If that is not
|
168
|
+
# set, the default language is assumed to be English ('en').
|
169
|
+
def default_language(value = :UNSET)
|
170
|
+
if value == :UNSET
|
171
|
+
@default_language
|
172
|
+
else
|
173
|
+
@default_language = value
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
# Returns +true+ if the information dictionary should be written.
|
178
|
+
def write_info_dict?
|
179
|
+
@write_info_dict
|
180
|
+
end
|
181
|
+
|
182
|
+
# Makes HexaPDF write the information dictionary if +value+ is +true+.
|
183
|
+
#
|
184
|
+
# See the class documentation for caveats.
|
185
|
+
def write_info_dict(value)
|
186
|
+
@write_info_dict = value
|
187
|
+
end
|
188
|
+
|
189
|
+
# Returns +true+ if the metadata stream should be written.
|
190
|
+
def write_metadata_stream?
|
191
|
+
@write_metadata_stream
|
192
|
+
end
|
193
|
+
|
194
|
+
# Makes HexaPDF write the metadata stream if +value+ is +true+.
|
195
|
+
#
|
196
|
+
# See the class documentation for caveats.
|
197
|
+
def write_metadata_stream(value)
|
198
|
+
@write_metadata_stream = value
|
199
|
+
end
|
200
|
+
|
201
|
+
# Registers the +prefix+ for the given namespace +uri+.
|
202
|
+
def register_namespace(prefix, uri)
|
203
|
+
@namespaces[prefix] = uri
|
204
|
+
end
|
205
|
+
|
206
|
+
# Returns the namespace URI associated with the given prefix.
|
207
|
+
def namespace(ns)
|
208
|
+
@namespaces.fetch(ns) do
|
209
|
+
raise HexaPDF::Error, "Namespace prefix '#{ns}' not registered"
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
# Registers the +property+ for the namespace specified via +prefix+ as the given +type+.
|
214
|
+
#
|
215
|
+
# The argument +type+ has to be one of the following: 'String', 'Date', 'URI', 'Boolean',
|
216
|
+
# 'OrderedArray', 'UnorderedArray', or 'LanguageArray'.
|
217
|
+
def register_property_type(prefix, property, type)
|
218
|
+
(@properties[namespace(prefix)] ||= {})[property] = type
|
219
|
+
end
|
220
|
+
|
221
|
+
# :call-seq:
|
222
|
+
# metadata.property(ns_prefix, name) -> property_value
|
223
|
+
# metadata.property(ns_prefix, name, value) -> value
|
224
|
+
#
|
225
|
+
# Returns the value for the property specified via the namespace prefix +ns_prefix+ and +name+
|
226
|
+
# if the +value+ argument is not provided. Otherwise sets the property to +value+.
|
227
|
+
#
|
228
|
+
# The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
|
229
|
+
# property is deleted from the metadata.
|
230
|
+
def property(ns, property, value = :UNSET)
|
231
|
+
ns = @metadata[namespace(ns)]
|
232
|
+
if value == :UNSET
|
233
|
+
ns[property]
|
234
|
+
elsif value.nil?
|
235
|
+
ns.delete(property)
|
236
|
+
else
|
237
|
+
ns[property] = value
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
241
|
+
# :call-seq:
|
242
|
+
# metadata.title -> title or nil
|
243
|
+
# metadata.title(value -> value
|
244
|
+
#
|
245
|
+
# Returns the document's title if no argument is given. Otherwise sets the document's title to
|
246
|
+
# the given value.
|
247
|
+
#
|
248
|
+
# The language for the title is specified via #default_language.
|
249
|
+
#
|
250
|
+
# The value +nil+ is returned if the property is not set. And by using +nil+ as +value+ the
|
251
|
+
# property is deleted from the metadata.
|
252
|
+
#
|
253
|
+
# This metadata property is represented by the XMP name dc:title.
|
254
|
+
def title(value = :UNSET)
|
255
|
+
property('dc', 'title', value)
|
256
|
+
end
|
257
|
+
|
258
|
+
# :call-seq:
|
259
|
+
# metadata.author -> author or nil
|
260
|
+
# metadata.author(value) -> value
|
261
|
+
#
|
262
|
+
# Returns the name of the person who created the document (author) if no argument is given.
|
263
|
+
# Otherwise sets the author to the given value.
|
264
|
+
#
|
265
|
+
# The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
|
266
|
+
# property is deleted from the metadata.
|
267
|
+
#
|
268
|
+
# This metadata property is represented by the XMP name dc:creator.
|
269
|
+
def author(value = :UNSET)
|
270
|
+
property('dc', 'creator', value)
|
271
|
+
end
|
272
|
+
|
273
|
+
# :call-seq:
|
274
|
+
# metadata.subject -> subject or nil
|
275
|
+
# metadata.subject(value) -> value
|
276
|
+
#
|
277
|
+
# Returns the subject of the document if no argument is given. Otherwise sets the subject to
|
278
|
+
# the given value.
|
279
|
+
#
|
280
|
+
# The language for the subject is specified via #default_language.
|
281
|
+
#
|
282
|
+
# The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
|
283
|
+
# property is deleted from the metadata.
|
284
|
+
#
|
285
|
+
# This metadata property is represented by the XMP name dc:description.
|
286
|
+
def subject(value = :UNSET)
|
287
|
+
property('dc', 'description', value)
|
288
|
+
end
|
289
|
+
|
290
|
+
# :call-seq:
|
291
|
+
# metadata.keywords -> keywords or nil
|
292
|
+
# metadata.keywords(value) -> value
|
293
|
+
#
|
294
|
+
# Returns the keywords associated with the document if no argument is given. Otherwise sets
|
295
|
+
# keywords to the given value.
|
296
|
+
#
|
297
|
+
# The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
|
298
|
+
# property is deleted from the metadata.
|
299
|
+
#
|
300
|
+
# This metadata property is represented by the XMP name pdf:Keywords.
|
301
|
+
def keywords(value = :UNSET)
|
302
|
+
property('pdf', 'Keywords', value)
|
303
|
+
end
|
304
|
+
|
305
|
+
# :call-seq:
|
306
|
+
# metadata.creator -> creator or nil
|
307
|
+
# metadata.creator(value) -> value
|
308
|
+
#
|
309
|
+
# Returns the name of the PDF processor that created the original document from which this PDF
|
310
|
+
# was converted if no argument is given. Otherwise sets the name of the creator tool to the
|
311
|
+
# given value.
|
312
|
+
#
|
313
|
+
# The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
|
314
|
+
# property is deleted from the metadata.
|
315
|
+
#
|
316
|
+
# This metadata property is represented by the XMP name xmp:CreatorTool.
|
317
|
+
def creator(value = :UNSET)
|
318
|
+
property('xmp', 'CreatorTool', value)
|
319
|
+
end
|
320
|
+
|
321
|
+
# :call-seq:
|
322
|
+
# metadata.producer -> producer or nil
|
323
|
+
# metadata.producer(value) -> value
|
324
|
+
#
|
325
|
+
# Returns the name of the PDF processor that converted the original document to PDF if no
|
326
|
+
# argument is given. Otherwise sets the name of the producer to the given value.
|
327
|
+
#
|
328
|
+
# The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
|
329
|
+
# property is deleted from the metadata.
|
330
|
+
#
|
331
|
+
# This metadata property is represented by the XMP name pdf:Producer.
|
332
|
+
def producer(value = :UNSET)
|
333
|
+
property('pdf', 'Producer', value)
|
334
|
+
end
|
335
|
+
|
336
|
+
# :call-seq:
|
337
|
+
# metadata.creation_date -> creation_date or nil
|
338
|
+
# metadata.creation_date(value) -> value
|
339
|
+
#
|
340
|
+
# Returns the date and time (a Time object) the document was created if no argument is given.
|
341
|
+
# Otherwise sets the creation date to the given value.
|
342
|
+
#
|
343
|
+
# The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
|
344
|
+
# property is deleted from the metadata.
|
345
|
+
#
|
346
|
+
# This metadata property is represented by the XMP name xmp:CreateDate.
|
347
|
+
def creation_date(value = :UNSET)
|
348
|
+
property('xmp', 'CreateDate', value)
|
349
|
+
end
|
350
|
+
|
351
|
+
# :call-seq:
|
352
|
+
# metadata.modification_date -> modification_date or nil
|
353
|
+
# metadata.modification_date(value) -> value
|
354
|
+
#
|
355
|
+
# Returns the date and time (a Time object) the document was most recently modified if no
|
356
|
+
# argument is given. Otherwise sets the modification date to the given value.
|
357
|
+
#
|
358
|
+
# The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
|
359
|
+
# property is deleted from the metadata.
|
360
|
+
#
|
361
|
+
# This metadata property is represented by the XMP name xmp:ModifyDate.
|
362
|
+
def modification_date(value = :UNSET)
|
363
|
+
property('xmp', 'ModifyDate', value)
|
364
|
+
end
|
365
|
+
|
366
|
+
# :call-seq:
|
367
|
+
# metadata.trapped -> trapped or nil
|
368
|
+
# metadata.trapped(value) -> value
|
369
|
+
#
|
370
|
+
# Returns +true+ if the document has been modified to include trapping information if no
|
371
|
+
# argument is given. Otherwise sets the trapped status to the given boolean value.
|
372
|
+
#
|
373
|
+
# The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
|
374
|
+
# property is deleted from the metadata.
|
375
|
+
#
|
376
|
+
# This metadata property is represented by the XMP name pdf:Trapped.
|
377
|
+
def trapped(value = :UNSET)
|
378
|
+
property('pdf', 'Trapped', value)
|
379
|
+
end
|
380
|
+
|
381
|
+
private
|
382
|
+
|
383
|
+
# Parses the metadata from the information dictionary into the internal data structure.
|
384
|
+
def parse_metadata
|
385
|
+
info_dict = @document.trailer.info
|
386
|
+
ns_dc = namespace('dc')
|
387
|
+
ns_xmp = namespace('xmp')
|
388
|
+
ns_pdf = namespace('pdf')
|
389
|
+
@metadata[ns_dc]['title'] = info_dict[:Title] if info_dict.key?(:Title)
|
390
|
+
@metadata[ns_dc]['creator'] = info_dict[:Author] if info_dict.key?(:Author)
|
391
|
+
@metadata[ns_dc]['description'] = info_dict[:Subject] if info_dict.key?(:Subject)
|
392
|
+
@metadata[ns_xmp]['CreatorTool'] = info_dict[:Creator] if info_dict.key?(:Creator)
|
393
|
+
@metadata[ns_xmp]['CreateDate'] = info_dict[:CreationDate] if info_dict.key?(:CreationDate)
|
394
|
+
@metadata[ns_xmp]['ModifyDate'] = info_dict[:ModDate] if info_dict.key?(:ModDate)
|
395
|
+
@metadata[ns_pdf]['Keywords'] = info_dict[:Keywords] if info_dict.key?(:Keywords)
|
396
|
+
@metadata[ns_pdf]['Producer'] = info_dict[:Producer] if info_dict.key?(:Producer)
|
397
|
+
if info_dict.key?(:Trapped) && info_dict[:Trapped] != :Unknown
|
398
|
+
@metadata[ns_pdf]['Trapped'] = (info_dict[:Trapped] == :True)
|
399
|
+
end
|
400
|
+
end
|
401
|
+
|
402
|
+
# Writes the metadata to the specified destinations.
|
403
|
+
def write_metadata
|
404
|
+
ns_dc = namespace('dc')
|
405
|
+
ns_xmp = namespace('xmp')
|
406
|
+
ns_pdf = namespace('pdf')
|
407
|
+
|
408
|
+
if write_info_dict?
|
409
|
+
info_dict = @document.trailer.info
|
410
|
+
info_dict[:Title] = Array(@metadata[ns_dc]['title']).first
|
411
|
+
info_dict[:Author] = Array(@metadata[ns_dc]['creator']).join(', ')
|
412
|
+
info_dict[:Subject] = Array(@metadata[ns_dc]['description']).first
|
413
|
+
info_dict[:Creator] = @metadata[ns_xmp]['CreatorTool']
|
414
|
+
info_dict[:CreationDate] = @metadata[ns_xmp]['CreateDate']
|
415
|
+
info_dict[:ModDate] = @metadata[ns_xmp]['ModifyDate']
|
416
|
+
info_dict[:Keywords] = @metadata[ns_pdf]['Keywords']
|
417
|
+
info_dict[:Producer] = @metadata[ns_pdf]['Producer']
|
418
|
+
info_dict[:Trapped] = @metadata[ns_pdf]['Trapped'] ? :True : :False
|
419
|
+
end
|
420
|
+
|
421
|
+
if write_metadata_stream?
|
422
|
+
descriptions = @metadata.map do |namespace, values|
|
423
|
+
xmp_description(@namespaces.key(namespace), values)
|
424
|
+
end.join("\n")
|
425
|
+
obj = @document.catalog[:Metadata] ||= @document.add({Type: :Metadata, Subtype: :XML})
|
426
|
+
obj.stream = xmp_packet(descriptions)
|
427
|
+
end
|
428
|
+
end
|
429
|
+
|
430
|
+
# Creates an XMP packet with the given payload +data+.
|
431
|
+
def xmp_packet(data)
|
432
|
+
<<~XMP
|
433
|
+
<?xpacket begin="\u{FEFF}" id="#{SecureRandom.uuid.tr('-', '')}"?>
|
434
|
+
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
435
|
+
#{data}
|
436
|
+
</rdf:RDF>
|
437
|
+
<?xpacket end="r"?>
|
438
|
+
XMP
|
439
|
+
end
|
440
|
+
|
441
|
+
# Creates an 'rdf:Description' element for all metadata +values+ with the given +ns_prefix+.
|
442
|
+
def xmp_description(ns_prefix, values)
|
443
|
+
values = values.map do |name, value|
|
444
|
+
str = +"<#{ns_prefix}:#{name}"
|
445
|
+
case (property_type = @properties[namespace(ns_prefix)][name])
|
446
|
+
when 'String'
|
447
|
+
str << ">#{xmp_escape(value)}</#{ns_prefix}:#{name}>"
|
448
|
+
when 'Date'
|
449
|
+
str << ">#{xmp_date(value)}</#{ns_prefix}:#{name}>"
|
450
|
+
when 'URI'
|
451
|
+
str << " rdf:resource=\"#{xmp_escape(value.to_s)}\" />"
|
452
|
+
when 'Boolean'
|
453
|
+
str << ">#{value ? 'True' : 'False'}</#{ns_prefix}:#{name}>"
|
454
|
+
when 'LanguageArray'
|
455
|
+
value = Array(value).map do |item|
|
456
|
+
lang = item.respond_to?(:language) ? item.language : default_language
|
457
|
+
"<rdf:li xml:lang=\"#{lang}\">#{xmp_escape(item)}</rdf:li>"
|
458
|
+
end.join("\n")
|
459
|
+
str << "><rdf:Alt>\n#{value}\n</rdf:Alt></#{ns_prefix}:#{name}>"
|
460
|
+
when 'OrderedArray', 'UnorderedArray'
|
461
|
+
value = Array(value).map {|item| "<rdf:li>#{xmp_escape(item)}</rdf:li>" }.join("\n")
|
462
|
+
el_type = (property_type == 'OrderedArray' ? 'Seq' : 'Bag')
|
463
|
+
str << "><rdf:#{el_type}>\n#{value}\n</rdf:#{el_type}></#{ns_prefix}:#{name}>"
|
464
|
+
end
|
465
|
+
str
|
466
|
+
end.join("\n")
|
467
|
+
<<~XMP.strip
|
468
|
+
<rdf:Description rdf:about="" xmlns:#{ns_prefix}="#{xmp_escape(namespace(ns_prefix))}">
|
469
|
+
#{values}
|
470
|
+
</rdf:Description>
|
471
|
+
XMP
|
472
|
+
end
|
473
|
+
|
474
|
+
# Escapes the given value so as to be usable as XMP simple value.
|
475
|
+
def xmp_escape(value)
|
476
|
+
value.gsub(/<|>|"/, {'<' => '<', '>' => '>', '"' => '"'})
|
477
|
+
end
|
478
|
+
|
479
|
+
# Formats the given date-time object (Time, Date, or DateTime) to be a valid XMP date-time
|
480
|
+
# value.
|
481
|
+
def xmp_date(date)
|
482
|
+
date.strftime("%Y-%m-%dT%H:%M:%S%:z")
|
483
|
+
end
|
484
|
+
|
485
|
+
end
|
486
|
+
|
487
|
+
end
|
488
|
+
end
|
data/lib/hexapdf/document.rb
CHANGED
@@ -120,6 +120,7 @@ module HexaPDF
|
|
120
120
|
autoload(:Files, 'hexapdf/document/files')
|
121
121
|
autoload(:Destinations, 'hexapdf/document/destinations')
|
122
122
|
autoload(:Layout, 'hexapdf/document/layout')
|
123
|
+
autoload(:Metadata, 'hexapdf/document/metadata')
|
123
124
|
|
124
125
|
# :call-seq:
|
125
126
|
# Document.open(filename, **docargs) -> doc
|
@@ -486,6 +487,16 @@ module HexaPDF
|
|
486
487
|
pdf_data ? @cache[pdf_data].clear : @cache.clear
|
487
488
|
end
|
488
489
|
|
490
|
+
# Returns the Metadata object that provides a convenience interface for working with the
|
491
|
+
# document metadata.
|
492
|
+
#
|
493
|
+
# Note that invoking this method means that, depending on the settings, the info dictionary as
|
494
|
+
# well as the metadata stream will be overwritten when the document gets written. See the
|
495
|
+
# "Caveats" section in the Metadata documentation.
|
496
|
+
def metadata
|
497
|
+
@metadata ||= Metadata.new(self)
|
498
|
+
end
|
499
|
+
|
489
500
|
# Returns the Pages object that provides convenience methods for working with the pages of the
|
490
501
|
# PDF file.
|
491
502
|
#
|
@@ -706,13 +717,17 @@ module HexaPDF
|
|
706
717
|
# Optimize the file size by using object and cross-reference streams. This will raise the PDF
|
707
718
|
# version to at least 1.5.
|
708
719
|
def write(file_or_io, incremental: false, validate: true, update_fields: true, optimize: false)
|
709
|
-
dispatch_message(:complete_objects)
|
710
|
-
|
711
720
|
if update_fields
|
712
721
|
trailer.update_id
|
713
|
-
|
722
|
+
if @metadata
|
723
|
+
metadata.modification_date(Time.now)
|
724
|
+
else
|
725
|
+
trailer.info[:ModDate] = Time.now
|
726
|
+
end
|
714
727
|
end
|
715
728
|
|
729
|
+
dispatch_message(:complete_objects)
|
730
|
+
|
716
731
|
if validate
|
717
732
|
self.validate(auto_correct: true) do |msg, correctable, obj|
|
718
733
|
next if correctable
|
data/lib/hexapdf/filter.rb
CHANGED
@@ -69,11 +69,11 @@ module HexaPDF
|
|
69
69
|
@block_used = false
|
70
70
|
end
|
71
71
|
|
72
|
-
# Returns the length of the wrapped string.
|
72
|
+
# Returns the length in bytes of the wrapped string.
|
73
73
|
#
|
74
74
|
# May only be called before #resume!
|
75
75
|
def length
|
76
|
-
str.
|
76
|
+
str.bytesize
|
77
77
|
end
|
78
78
|
|
79
79
|
# Returns +true+ if #resume has not yet been called.
|