hexapdf 0.35.1 → 0.37.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4a7769176f4b753c876ecfb95a6cd2a954249388a7da9e308fbd8c25ba071c9b
4
- data.tar.gz: f39b081ab1408fd08a3dff88f8aa3417283e98478b3e9fad60a692afb16e92c1
3
+ metadata.gz: 464355c84e7dd5636fe34bb0feb96038e06d4e0701d0ce9e915a2a9bddd3c18a
4
+ data.tar.gz: 8bcebc03512574b0fd71396bbe21330f59cb49e75f2194d1cd47ced0648ef7ad
5
5
  SHA512:
6
- metadata.gz: 21fcdbd57cdcf436edd1de50f0268d05573b92b70edd1eafeb00af0414eed3cbaa8f107ac1310419a29a113dc5a33ec991fa49a4fa0b47edea1bd2eb7afb0768
7
- data.tar.gz: 0c6732024ebd325a4216fe8e4a2fdc69d51365b873ec53c926a9a1e575730cdb6d5f45eea5cd211c1cb2a21afd16cb75f3a1782411a2971eed7afe2f61708dfd
6
+ metadata.gz: ccfbc72734d74178b1eb49da85bcd364410b12abfbb2d2cd730e6a37176dc85c7b87369bd0463869d6815d911e3428a21b0e18e1492f75d75b59c53a68ebf835
7
+ data.tar.gz: bc7001b82ec40571b6257575923d0fb5ef4c1011ff4067b3b53c40822240328c461c358084f9de8d5271809d9d85b8bd2de91ce219e211cd84154b3156c7c426
data/CHANGELOG.md CHANGED
@@ -1,3 +1,39 @@
1
+ ## 0.37.0 - 2024-01-29
2
+
3
+ ### Added
4
+
5
+ * [HexaPDF::Document::Metadata] for working with metadata (reading the info
6
+ dictionary and writing it as well as the XMP metadata stream)
7
+
8
+ ### Changed
9
+
10
+ * Minimum Ruby version to be 2.7
11
+
12
+ ### Fixed
13
+
14
+ * [HexaPDF::FiberDoubleForString#length] to not assume a binary string
15
+
16
+
17
+ ## 0.36.0 - 2024-01-20
18
+
19
+ ### Added
20
+
21
+ * [HexaPDF::Layout::ContainerBox] for grouping child boxes together
22
+
23
+ ### Changed
24
+
25
+ * [HexaPDF::Layout::Frame::FitResult#draw] to allow drawing at an offset
26
+ * [HexaPDF::Layout::Box#fit] to delegate the actual content fitting to the
27
+ `#fit_content` method
28
+ * [HexaPDF::Document::Layout#box] to allow using the block as drawing block for
29
+ the base box class
30
+
31
+ ### Fixed
32
+
33
+ * [HexaPDF::Type::FontSimple#to_utf8] to work in case the font's encoding cannot
34
+ be retrieved
35
+
36
+
1
37
  ## 0.35.1 - 2024-01-11
2
38
 
3
39
  ### Added
data/Rakefile CHANGED
@@ -47,7 +47,7 @@ namespace :dev do
47
47
  end
48
48
 
49
49
  task :test_all do
50
- versions = `rbenv versions --bare | grep -i ^2.[67]\\\\\\|^3.`.split("\n")
50
+ versions = `rbenv versions --bare | grep -i ^2.7\\\\\\|^3.`.split("\n")
51
51
  versions.each do |version|
52
52
  sh "eval \"$(rbenv init -)\"; rbenv shell #{version} && ruby -v && rake test"
53
53
  end
@@ -545,6 +545,7 @@ module HexaPDF
545
545
  column: 'HexaPDF::Layout::ColumnBox',
546
546
  list: 'HexaPDF::Layout::ListBox',
547
547
  table: 'HexaPDF::Layout::TableBox',
548
+ container: 'HexaPDF::Layout::ContainerBox',
548
549
  },
549
550
  'page.default_media_box' => :A4,
550
551
  'page.default_media_orientation' => :portrait,
@@ -687,6 +688,7 @@ module HexaPDF
687
688
  XXReference: 'HexaPDF::Type::Form::Reference',
688
689
  XXCIDSystemInfo: 'HexaPDF::Type::CIDFont::CIDSystemInfo',
689
690
  Group: 'HexaPDF::Type::Form::Group',
691
+ Metadata: 'HexaPDF::Type::Metadata',
690
692
  },
691
693
  'object.subtype_map' => {
692
694
  nil => {
@@ -705,6 +707,7 @@ module HexaPDF
705
707
  Text: 'HexaPDF::Type::Annotations::Text',
706
708
  Link: 'HexaPDF::Type::Annotations::Link',
707
709
  Widget: 'HexaPDF::Type::Annotations::Widget',
710
+ XML: 'HexaPDF::Type::Metadata'
708
711
  },
709
712
  XObject: {
710
713
  Image: 'HexaPDF::Type::Image',
@@ -118,7 +118,7 @@ module HexaPDF
118
118
  # composer.list(item_spacing: 2) do |list|
119
119
  # composer.document.config['layout.boxes.map'].each do |name, klass|
120
120
  # list.formatted_text([{text: name.to_s, fill_color: "hp-blue-dark"},
121
- # {text: "\n#{klass}"}, font_size: 7])
121
+ # {text: "\n#{klass}"}], font_size: 6)
122
122
  # end
123
123
  # end
124
124
  # end
@@ -238,10 +238,12 @@ module HexaPDF
238
238
  #
239
239
  # The +name+ argument refers to the registered name of the box class that is looked up in the
240
240
  # 'layout.boxes.map' configuration option. The +box_options+ are passed as-is to the
241
- # initialization method of that box class
241
+ # initialization method of that box class.
242
242
  #
243
243
  # If a block is provided, a ChildrenCollector is yielded and the collected children are passed
244
- # to the box initialization method via the :children keyword argument.
244
+ # to the box initialization method via the :children keyword argument. There is one exception
245
+ # to this rule in case +name+ is +base+: The provided block is passed to the initialization
246
+ # method of the base box class to function as drawing method.
245
247
  #
246
248
  # See #text_box for details on +width+, +height+ and +style+ (note that there is no
247
249
  # +style_properties+ argument).
@@ -252,12 +254,19 @@ module HexaPDF
252
254
  # layout.box(:column) do |column| # column box with one child
253
255
  # column.lorem_ipsum
254
256
  # end
255
- def box(name, width: 0, height: 0, style: nil, **box_options, &block)
256
- if block_given? && !box_options.key?(:children)
257
- box_options[:children] = ChildrenCollector.collect(self, &block)
257
+ # layout.box(width: 100) do |canvas, box|
258
+ # canvas.line(0, 0, box.content_width, box.content_height).stroke
259
+ # end
260
+ def box(name = :base, width: 0, height: 0, style: nil, **box_options, &block)
261
+ if block_given?
262
+ if name == :base
263
+ box_block = block
264
+ elsif !box_options.key?(:children)
265
+ box_options[:children] = ChildrenCollector.collect(self, &block)
266
+ end
258
267
  end
259
268
  box_class_for_name(name).new(width: width, height: height,
260
- style: retrieve_style(style), **box_options)
269
+ style: retrieve_style(style), **box_options, &box_block)
261
270
  end
262
271
 
263
272
  # Creates an array of HexaPDF::Layout::TextFragment objects for the given +text+.
@@ -0,0 +1,488 @@
1
+ # -*- encoding: utf-8; frozen_string_literal: true -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2014-2023 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #
33
+ # If the GNU Affero General Public License doesn't fit your need,
34
+ # commercial licenses are available at <https://gettalong.at/hexapdf/>.
35
+ #++
36
+
37
+ require 'securerandom'
38
+ require 'hexapdf/dictionary'
39
+ require 'hexapdf/error'
40
+
41
+ module HexaPDF
42
+ class Document
43
+
44
+ # This class provides methods for reading and writing the document-level metadata.
45
+ #
46
+ # When an instance is created (usually through HexaPDF::Document#metadata), the metadata is read
47
+ # from the document's information dictionary (see HexaPDF::Type::Info) and made available
48
+ # through the various methods.
49
+ #
50
+ # By default, the metadata is written to the information dictionary as well as to the document's
51
+ # metadata stream (see HexaPDF::Type::Metadata) once the document is written. This can be
52
+ # controlled via the #write_info_dict and #write_metdata_stream methods.
53
+ #
54
+ # While HexaPDF is able to write an XMP packet (using a limited form) to the document's metadata
55
+ # stream, it provides no way for reading XMP metadata. If reading functionality or extended
56
+ # writing functionality is needed, make sure this class does not write the metadata and
57
+ # read/create the metadata stream yourself.
58
+ #
59
+ #
60
+ # == Caveats
61
+ #
62
+ # * Disabling writing to the information dictionary will only prevent parts from being written.
63
+ # The #producer is always written to the information dictionary as per the AGPL license terms.
64
+ # The #modification_date may be written depending on the arguments to HexaPDF::Document#write.
65
+ #
66
+ # * If writing the metadata stream is enabled, any existing metadata stream is completely
67
+ # overwritten. This means the metadata stream is *not* updated with the changed information.
68
+ #
69
+ #
70
+ # == Adding custom metadata properties
71
+ #
72
+ # All the properties specified for the information dictionary are supported.
73
+ #
74
+ # Furthermore, HexaPDF supports writing custom properties to the metadata stream. For this to
75
+ # work the used XMP namespaces need to be registered using #register_namespace. Additionally,
76
+ # the types of all used XMP properties need to be registered using #register_property.
77
+ #
78
+ # The following types for XMP properties are supported:
79
+ #
80
+ # String::
81
+ # Maps to the XMP simple string value. Values need to be of type String.
82
+ #
83
+ # Date::
84
+ # Maps to the XMP simple string value, correctly formatted. Values need to be of type Time,
85
+ # Date, or DateTime
86
+ #
87
+ # URI::
88
+ # Maps to the XMP simple value variant of URI. Values need to be of type String or URI.
89
+ #
90
+ # Boolean::
91
+ # Maps to the XMP simple string value, correctly formatted. Values need to be either +true+
92
+ # or +false+.
93
+ #
94
+ # OrderedArray::
95
+ # Maps to the XMP ordered array. Values need to be of type Array and items must be XMP
96
+ # simple values.
97
+ #
98
+ # UnorderedArray::
99
+ # Maps to the XMP unordered array. Values need to be of type Array and items must be
100
+ # simple values.
101
+ #
102
+ # LanguageArray
103
+ # Maps to the XMP language alternatives array. Values need to be of type Array and items
104
+ # must either be strings (they are associated with the set default language) or
105
+ # LocalizedString instances.
106
+ #
107
+ #
108
+ # See: PDF2.0 s14.3, https://www.adobe.com/products/xmp.html
109
+ class Metadata
110
+
111
+ # Represents a localized XMP string, i.e. as string with an attached language.
112
+ class LocalizedString < String
113
+ # The language identifier for the string in RFC3066 format.
114
+ attr_accessor :language
115
+ end
116
+
117
+ # Contains a mapping of predefined prefixes for XMP namespaces for metadata.
118
+ PREDEFINED_NAMESPACES = {
119
+ "rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
120
+ "xmp" => "http://ns.adobe.com/xap/1.0/",
121
+ "pdf" => "http://ns.adobe.com/pdf/1.3/",
122
+ "dc" => "http://purl.org/dc/elements/1.1/",
123
+ "x" => "adobe:ns:meta/",
124
+ }.freeze
125
+
126
+ # Contains a mapping of predefined XMP properties to their types, i.e. from namespace to
127
+ # property and then type.
128
+ PREDEFINED_PROPERTIES = {
129
+ "http://ns.adobe.com/xap/1.0/" => {
130
+ 'CreatorTool' => 'String',
131
+ 'CreateDate' => 'Date',
132
+ 'ModifyDate' => 'Date',
133
+ }.freeze,
134
+ "http://ns.adobe.com/pdf/1.3/" => {
135
+ 'Keywords' => 'String',
136
+ 'Producer' => 'String',
137
+ 'Trapped' => 'Boolean',
138
+ }.freeze,
139
+ "http://purl.org/dc/elements/1.1/" => {
140
+ 'creator' => 'OrderedArray',
141
+ 'description' => 'LanguageArray',
142
+ 'title' => 'LanguageArray',
143
+ }.freeze,
144
+ }.freeze
145
+
146
+
147
+ # Creates a new Metadata object for the given PDF document.
148
+ def initialize(document)
149
+ @document = document
150
+ @namespaces = PREDEFINED_NAMESPACES.dup
151
+ @properties = PREDEFINED_PROPERTIES.transform_values {|value| value.dup}
152
+ @default_language = document.catalog[:Lang] || 'en'
153
+ @metadata = Hash.new {|h, k| h[k] = {} }
154
+ write_info_dict(true)
155
+ write_metadata_stream(true)
156
+ @document.register_listener(:complete_objects, &method(:write_metadata))
157
+ parse_metadata
158
+ end
159
+
160
+ # :call-seq:
161
+ # metadata.default_language -> language
162
+ # metadata.default_language(value) -> value
163
+ #
164
+ # Returns the default language in RFC3066 format used for unlocalized strings if no argument
165
+ # is given. Otherwise sets the default language to the given language.
166
+ #
167
+ # The initial default lanuage is taken from the document catalog's /Lang entry. If that is not
168
+ # set, the default language is assumed to be English ('en').
169
+ def default_language(value = :UNSET)
170
+ if value == :UNSET
171
+ @default_language
172
+ else
173
+ @default_language = value
174
+ end
175
+ end
176
+
177
+ # Returns +true+ if the information dictionary should be written.
178
+ def write_info_dict?
179
+ @write_info_dict
180
+ end
181
+
182
+ # Makes HexaPDF write the information dictionary if +value+ is +true+.
183
+ #
184
+ # See the class documentation for caveats.
185
+ def write_info_dict(value)
186
+ @write_info_dict = value
187
+ end
188
+
189
+ # Returns +true+ if the metadata stream should be written.
190
+ def write_metadata_stream?
191
+ @write_metadata_stream
192
+ end
193
+
194
+ # Makes HexaPDF write the metadata stream if +value+ is +true+.
195
+ #
196
+ # See the class documentation for caveats.
197
+ def write_metadata_stream(value)
198
+ @write_metadata_stream = value
199
+ end
200
+
201
+ # Registers the +prefix+ for the given namespace +uri+.
202
+ def register_namespace(prefix, uri)
203
+ @namespaces[prefix] = uri
204
+ end
205
+
206
+ # Returns the namespace URI associated with the given prefix.
207
+ def namespace(ns)
208
+ @namespaces.fetch(ns) do
209
+ raise HexaPDF::Error, "Namespace prefix '#{ns}' not registered"
210
+ end
211
+ end
212
+
213
+ # Registers the +property+ for the namespace specified via +prefix+ as the given +type+.
214
+ #
215
+ # The argument +type+ has to be one of the following: 'String', 'Date', 'URI', 'Boolean',
216
+ # 'OrderedArray', 'UnorderedArray', or 'LanguageArray'.
217
+ def register_property_type(prefix, property, type)
218
+ (@properties[namespace(prefix)] ||= {})[property] = type
219
+ end
220
+
221
+ # :call-seq:
222
+ # metadata.property(ns_prefix, name) -> property_value
223
+ # metadata.property(ns_prefix, name, value) -> value
224
+ #
225
+ # Returns the value for the property specified via the namespace prefix +ns_prefix+ and +name+
226
+ # if the +value+ argument is not provided. Otherwise sets the property to +value+.
227
+ #
228
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
229
+ # property is deleted from the metadata.
230
+ def property(ns, property, value = :UNSET)
231
+ ns = @metadata[namespace(ns)]
232
+ if value == :UNSET
233
+ ns[property]
234
+ elsif value.nil?
235
+ ns.delete(property)
236
+ else
237
+ ns[property] = value
238
+ end
239
+ end
240
+
241
+ # :call-seq:
242
+ # metadata.title -> title or nil
243
+ # metadata.title(value -> value
244
+ #
245
+ # Returns the document's title if no argument is given. Otherwise sets the document's title to
246
+ # the given value.
247
+ #
248
+ # The language for the title is specified via #default_language.
249
+ #
250
+ # The value +nil+ is returned if the property is not set. And by using +nil+ as +value+ the
251
+ # property is deleted from the metadata.
252
+ #
253
+ # This metadata property is represented by the XMP name dc:title.
254
+ def title(value = :UNSET)
255
+ property('dc', 'title', value)
256
+ end
257
+
258
+ # :call-seq:
259
+ # metadata.author -> author or nil
260
+ # metadata.author(value) -> value
261
+ #
262
+ # Returns the name of the person who created the document (author) if no argument is given.
263
+ # Otherwise sets the author to the given value.
264
+ #
265
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
266
+ # property is deleted from the metadata.
267
+ #
268
+ # This metadata property is represented by the XMP name dc:creator.
269
+ def author(value = :UNSET)
270
+ property('dc', 'creator', value)
271
+ end
272
+
273
+ # :call-seq:
274
+ # metadata.subject -> subject or nil
275
+ # metadata.subject(value) -> value
276
+ #
277
+ # Returns the subject of the document if no argument is given. Otherwise sets the subject to
278
+ # the given value.
279
+ #
280
+ # The language for the subject is specified via #default_language.
281
+ #
282
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
283
+ # property is deleted from the metadata.
284
+ #
285
+ # This metadata property is represented by the XMP name dc:description.
286
+ def subject(value = :UNSET)
287
+ property('dc', 'description', value)
288
+ end
289
+
290
+ # :call-seq:
291
+ # metadata.keywords -> keywords or nil
292
+ # metadata.keywords(value) -> value
293
+ #
294
+ # Returns the keywords associated with the document if no argument is given. Otherwise sets
295
+ # keywords to the given value.
296
+ #
297
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
298
+ # property is deleted from the metadata.
299
+ #
300
+ # This metadata property is represented by the XMP name pdf:Keywords.
301
+ def keywords(value = :UNSET)
302
+ property('pdf', 'Keywords', value)
303
+ end
304
+
305
+ # :call-seq:
306
+ # metadata.creator -> creator or nil
307
+ # metadata.creator(value) -> value
308
+ #
309
+ # Returns the name of the PDF processor that created the original document from which this PDF
310
+ # was converted if no argument is given. Otherwise sets the name of the creator tool to the
311
+ # given value.
312
+ #
313
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
314
+ # property is deleted from the metadata.
315
+ #
316
+ # This metadata property is represented by the XMP name xmp:CreatorTool.
317
+ def creator(value = :UNSET)
318
+ property('xmp', 'CreatorTool', value)
319
+ end
320
+
321
+ # :call-seq:
322
+ # metadata.producer -> producer or nil
323
+ # metadata.producer(value) -> value
324
+ #
325
+ # Returns the name of the PDF processor that converted the original document to PDF if no
326
+ # argument is given. Otherwise sets the name of the producer to the given value.
327
+ #
328
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
329
+ # property is deleted from the metadata.
330
+ #
331
+ # This metadata property is represented by the XMP name pdf:Producer.
332
+ def producer(value = :UNSET)
333
+ property('pdf', 'Producer', value)
334
+ end
335
+
336
+ # :call-seq:
337
+ # metadata.creation_date -> creation_date or nil
338
+ # metadata.creation_date(value) -> value
339
+ #
340
+ # Returns the date and time (a Time object) the document was created if no argument is given.
341
+ # Otherwise sets the creation date to the given value.
342
+ #
343
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
344
+ # property is deleted from the metadata.
345
+ #
346
+ # This metadata property is represented by the XMP name xmp:CreateDate.
347
+ def creation_date(value = :UNSET)
348
+ property('xmp', 'CreateDate', value)
349
+ end
350
+
351
+ # :call-seq:
352
+ # metadata.modification_date -> modification_date or nil
353
+ # metadata.modification_date(value) -> value
354
+ #
355
+ # Returns the date and time (a Time object) the document was most recently modified if no
356
+ # argument is given. Otherwise sets the modification date to the given value.
357
+ #
358
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
359
+ # property is deleted from the metadata.
360
+ #
361
+ # This metadata property is represented by the XMP name xmp:ModifyDate.
362
+ def modification_date(value = :UNSET)
363
+ property('xmp', 'ModifyDate', value)
364
+ end
365
+
366
+ # :call-seq:
367
+ # metadata.trapped -> trapped or nil
368
+ # metadata.trapped(value) -> value
369
+ #
370
+ # Returns +true+ if the document has been modified to include trapping information if no
371
+ # argument is given. Otherwise sets the trapped status to the given boolean value.
372
+ #
373
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
374
+ # property is deleted from the metadata.
375
+ #
376
+ # This metadata property is represented by the XMP name pdf:Trapped.
377
+ def trapped(value = :UNSET)
378
+ property('pdf', 'Trapped', value)
379
+ end
380
+
381
+ private
382
+
383
+ # Parses the metadata from the information dictionary into the internal data structure.
384
+ def parse_metadata
385
+ info_dict = @document.trailer.info
386
+ ns_dc = namespace('dc')
387
+ ns_xmp = namespace('xmp')
388
+ ns_pdf = namespace('pdf')
389
+ @metadata[ns_dc]['title'] = info_dict[:Title] if info_dict.key?(:Title)
390
+ @metadata[ns_dc]['creator'] = info_dict[:Author] if info_dict.key?(:Author)
391
+ @metadata[ns_dc]['description'] = info_dict[:Subject] if info_dict.key?(:Subject)
392
+ @metadata[ns_xmp]['CreatorTool'] = info_dict[:Creator] if info_dict.key?(:Creator)
393
+ @metadata[ns_xmp]['CreateDate'] = info_dict[:CreationDate] if info_dict.key?(:CreationDate)
394
+ @metadata[ns_xmp]['ModifyDate'] = info_dict[:ModDate] if info_dict.key?(:ModDate)
395
+ @metadata[ns_pdf]['Keywords'] = info_dict[:Keywords] if info_dict.key?(:Keywords)
396
+ @metadata[ns_pdf]['Producer'] = info_dict[:Producer] if info_dict.key?(:Producer)
397
+ if info_dict.key?(:Trapped) && info_dict[:Trapped] != :Unknown
398
+ @metadata[ns_pdf]['Trapped'] = (info_dict[:Trapped] == :True)
399
+ end
400
+ end
401
+
402
+ # Writes the metadata to the specified destinations.
403
+ def write_metadata
404
+ ns_dc = namespace('dc')
405
+ ns_xmp = namespace('xmp')
406
+ ns_pdf = namespace('pdf')
407
+
408
+ if write_info_dict?
409
+ info_dict = @document.trailer.info
410
+ info_dict[:Title] = Array(@metadata[ns_dc]['title']).first
411
+ info_dict[:Author] = Array(@metadata[ns_dc]['creator']).join(', ')
412
+ info_dict[:Subject] = Array(@metadata[ns_dc]['description']).first
413
+ info_dict[:Creator] = @metadata[ns_xmp]['CreatorTool']
414
+ info_dict[:CreationDate] = @metadata[ns_xmp]['CreateDate']
415
+ info_dict[:ModDate] = @metadata[ns_xmp]['ModifyDate']
416
+ info_dict[:Keywords] = @metadata[ns_pdf]['Keywords']
417
+ info_dict[:Producer] = @metadata[ns_pdf]['Producer']
418
+ info_dict[:Trapped] = @metadata[ns_pdf]['Trapped'] ? :True : :False
419
+ end
420
+
421
+ if write_metadata_stream?
422
+ descriptions = @metadata.map do |namespace, values|
423
+ xmp_description(@namespaces.key(namespace), values)
424
+ end.join("\n")
425
+ obj = @document.catalog[:Metadata] ||= @document.add({Type: :Metadata, Subtype: :XML})
426
+ obj.stream = xmp_packet(descriptions)
427
+ end
428
+ end
429
+
430
+ # Creates an XMP packet with the given payload +data+.
431
+ def xmp_packet(data)
432
+ <<~XMP
433
+ <?xpacket begin="\u{FEFF}" id="#{SecureRandom.uuid.tr('-', '')}"?>
434
+ <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
435
+ #{data}
436
+ </rdf:RDF>
437
+ <?xpacket end="r"?>
438
+ XMP
439
+ end
440
+
441
+ # Creates an 'rdf:Description' element for all metadata +values+ with the given +ns_prefix+.
442
+ def xmp_description(ns_prefix, values)
443
+ values = values.map do |name, value|
444
+ str = +"<#{ns_prefix}:#{name}"
445
+ case (property_type = @properties[namespace(ns_prefix)][name])
446
+ when 'String'
447
+ str << ">#{xmp_escape(value)}</#{ns_prefix}:#{name}>"
448
+ when 'Date'
449
+ str << ">#{xmp_date(value)}</#{ns_prefix}:#{name}>"
450
+ when 'URI'
451
+ str << " rdf:resource=\"#{xmp_escape(value.to_s)}\" />"
452
+ when 'Boolean'
453
+ str << ">#{value ? 'True' : 'False'}</#{ns_prefix}:#{name}>"
454
+ when 'LanguageArray'
455
+ value = Array(value).map do |item|
456
+ lang = item.respond_to?(:language) ? item.language : default_language
457
+ "<rdf:li xml:lang=\"#{lang}\">#{xmp_escape(item)}</rdf:li>"
458
+ end.join("\n")
459
+ str << "><rdf:Alt>\n#{value}\n</rdf:Alt></#{ns_prefix}:#{name}>"
460
+ when 'OrderedArray', 'UnorderedArray'
461
+ value = Array(value).map {|item| "<rdf:li>#{xmp_escape(item)}</rdf:li>" }.join("\n")
462
+ el_type = (property_type == 'OrderedArray' ? 'Seq' : 'Bag')
463
+ str << "><rdf:#{el_type}>\n#{value}\n</rdf:#{el_type}></#{ns_prefix}:#{name}>"
464
+ end
465
+ str
466
+ end.join("\n")
467
+ <<~XMP.strip
468
+ <rdf:Description rdf:about="" xmlns:#{ns_prefix}="#{xmp_escape(namespace(ns_prefix))}">
469
+ #{values}
470
+ </rdf:Description>
471
+ XMP
472
+ end
473
+
474
+ # Escapes the given value so as to be usable as XMP simple value.
475
+ def xmp_escape(value)
476
+ value.gsub(/<|>|"/, {'<' => '&lt;', '>' => '&gt;', '"' => '&quot;'})
477
+ end
478
+
479
+ # Formats the given date-time object (Time, Date, or DateTime) to be a valid XMP date-time
480
+ # value.
481
+ def xmp_date(date)
482
+ date.strftime("%Y-%m-%dT%H:%M:%S%:z")
483
+ end
484
+
485
+ end
486
+
487
+ end
488
+ end
@@ -120,6 +120,7 @@ module HexaPDF
120
120
  autoload(:Files, 'hexapdf/document/files')
121
121
  autoload(:Destinations, 'hexapdf/document/destinations')
122
122
  autoload(:Layout, 'hexapdf/document/layout')
123
+ autoload(:Metadata, 'hexapdf/document/metadata')
123
124
 
124
125
  # :call-seq:
125
126
  # Document.open(filename, **docargs) -> doc
@@ -486,6 +487,16 @@ module HexaPDF
486
487
  pdf_data ? @cache[pdf_data].clear : @cache.clear
487
488
  end
488
489
 
490
+ # Returns the Metadata object that provides a convenience interface for working with the
491
+ # document metadata.
492
+ #
493
+ # Note that invoking this method means that, depending on the settings, the info dictionary as
494
+ # well as the metadata stream will be overwritten when the document gets written. See the
495
+ # "Caveats" section in the Metadata documentation.
496
+ def metadata
497
+ @metadata ||= Metadata.new(self)
498
+ end
499
+
489
500
  # Returns the Pages object that provides convenience methods for working with the pages of the
490
501
  # PDF file.
491
502
  #
@@ -706,13 +717,17 @@ module HexaPDF
706
717
  # Optimize the file size by using object and cross-reference streams. This will raise the PDF
707
718
  # version to at least 1.5.
708
719
  def write(file_or_io, incremental: false, validate: true, update_fields: true, optimize: false)
709
- dispatch_message(:complete_objects)
710
-
711
720
  if update_fields
712
721
  trailer.update_id
713
- trailer.info[:ModDate] = Time.now
722
+ if @metadata
723
+ metadata.modification_date(Time.now)
724
+ else
725
+ trailer.info[:ModDate] = Time.now
726
+ end
714
727
  end
715
728
 
729
+ dispatch_message(:complete_objects)
730
+
716
731
  if validate
717
732
  self.validate(auto_correct: true) do |msg, correctable, obj|
718
733
  next if correctable
@@ -69,11 +69,11 @@ module HexaPDF
69
69
  @block_used = false
70
70
  end
71
71
 
72
- # Returns the length of the wrapped string.
72
+ # Returns the length in bytes of the wrapped string.
73
73
  #
74
74
  # May only be called before #resume!
75
75
  def length
76
- str.length
76
+ str.bytesize
77
77
  end
78
78
 
79
79
  # Returns +true+ if #resume has not yet been called.