hexapdf 0.35.1 → 0.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4a7769176f4b753c876ecfb95a6cd2a954249388a7da9e308fbd8c25ba071c9b
4
- data.tar.gz: f39b081ab1408fd08a3dff88f8aa3417283e98478b3e9fad60a692afb16e92c1
3
+ metadata.gz: 464355c84e7dd5636fe34bb0feb96038e06d4e0701d0ce9e915a2a9bddd3c18a
4
+ data.tar.gz: 8bcebc03512574b0fd71396bbe21330f59cb49e75f2194d1cd47ced0648ef7ad
5
5
  SHA512:
6
- metadata.gz: 21fcdbd57cdcf436edd1de50f0268d05573b92b70edd1eafeb00af0414eed3cbaa8f107ac1310419a29a113dc5a33ec991fa49a4fa0b47edea1bd2eb7afb0768
7
- data.tar.gz: 0c6732024ebd325a4216fe8e4a2fdc69d51365b873ec53c926a9a1e575730cdb6d5f45eea5cd211c1cb2a21afd16cb75f3a1782411a2971eed7afe2f61708dfd
6
+ metadata.gz: ccfbc72734d74178b1eb49da85bcd364410b12abfbb2d2cd730e6a37176dc85c7b87369bd0463869d6815d911e3428a21b0e18e1492f75d75b59c53a68ebf835
7
+ data.tar.gz: bc7001b82ec40571b6257575923d0fb5ef4c1011ff4067b3b53c40822240328c461c358084f9de8d5271809d9d85b8bd2de91ce219e211cd84154b3156c7c426
data/CHANGELOG.md CHANGED
@@ -1,3 +1,39 @@
1
+ ## 0.37.0 - 2024-01-29
2
+
3
+ ### Added
4
+
5
+ * [HexaPDF::Document::Metadata] for working with metadata (reading the info
6
+ dictionary and writing it as well as the XMP metadata stream)
7
+
8
+ ### Changed
9
+
10
+ * Minimum Ruby version to be 2.7
11
+
12
+ ### Fixed
13
+
14
+ * [HexaPDF::FiberDoubleForString#length] to not assume a binary string
15
+
16
+
17
+ ## 0.36.0 - 2024-01-20
18
+
19
+ ### Added
20
+
21
+ * [HexaPDF::Layout::ContainerBox] for grouping child boxes together
22
+
23
+ ### Changed
24
+
25
+ * [HexaPDF::Layout::Frame::FitResult#draw] to allow drawing at an offset
26
+ * [HexaPDF::Layout::Box#fit] to delegate the actual content fitting to the
27
+ `#fit_content` method
28
+ * [HexaPDF::Document::Layout#box] to allow using the block as drawing block for
29
+ the base box class
30
+
31
+ ### Fixed
32
+
33
+ * [HexaPDF::Type::FontSimple#to_utf8] to work in case the font's encoding cannot
34
+ be retrieved
35
+
36
+
1
37
  ## 0.35.1 - 2024-01-11
2
38
 
3
39
  ### Added
data/Rakefile CHANGED
@@ -47,7 +47,7 @@ namespace :dev do
47
47
  end
48
48
 
49
49
  task :test_all do
50
- versions = `rbenv versions --bare | grep -i ^2.[67]\\\\\\|^3.`.split("\n")
50
+ versions = `rbenv versions --bare | grep -i ^2.7\\\\\\|^3.`.split("\n")
51
51
  versions.each do |version|
52
52
  sh "eval \"$(rbenv init -)\"; rbenv shell #{version} && ruby -v && rake test"
53
53
  end
@@ -545,6 +545,7 @@ module HexaPDF
545
545
  column: 'HexaPDF::Layout::ColumnBox',
546
546
  list: 'HexaPDF::Layout::ListBox',
547
547
  table: 'HexaPDF::Layout::TableBox',
548
+ container: 'HexaPDF::Layout::ContainerBox',
548
549
  },
549
550
  'page.default_media_box' => :A4,
550
551
  'page.default_media_orientation' => :portrait,
@@ -687,6 +688,7 @@ module HexaPDF
687
688
  XXReference: 'HexaPDF::Type::Form::Reference',
688
689
  XXCIDSystemInfo: 'HexaPDF::Type::CIDFont::CIDSystemInfo',
689
690
  Group: 'HexaPDF::Type::Form::Group',
691
+ Metadata: 'HexaPDF::Type::Metadata',
690
692
  },
691
693
  'object.subtype_map' => {
692
694
  nil => {
@@ -705,6 +707,7 @@ module HexaPDF
705
707
  Text: 'HexaPDF::Type::Annotations::Text',
706
708
  Link: 'HexaPDF::Type::Annotations::Link',
707
709
  Widget: 'HexaPDF::Type::Annotations::Widget',
710
+ XML: 'HexaPDF::Type::Metadata'
708
711
  },
709
712
  XObject: {
710
713
  Image: 'HexaPDF::Type::Image',
@@ -118,7 +118,7 @@ module HexaPDF
118
118
  # composer.list(item_spacing: 2) do |list|
119
119
  # composer.document.config['layout.boxes.map'].each do |name, klass|
120
120
  # list.formatted_text([{text: name.to_s, fill_color: "hp-blue-dark"},
121
- # {text: "\n#{klass}"}, font_size: 7])
121
+ # {text: "\n#{klass}"}], font_size: 6)
122
122
  # end
123
123
  # end
124
124
  # end
@@ -238,10 +238,12 @@ module HexaPDF
238
238
  #
239
239
  # The +name+ argument refers to the registered name of the box class that is looked up in the
240
240
  # 'layout.boxes.map' configuration option. The +box_options+ are passed as-is to the
241
- # initialization method of that box class
241
+ # initialization method of that box class.
242
242
  #
243
243
  # If a block is provided, a ChildrenCollector is yielded and the collected children are passed
244
- # to the box initialization method via the :children keyword argument.
244
+ # to the box initialization method via the :children keyword argument. There is one exception
245
+ # to this rule in case +name+ is +base+: The provided block is passed to the initialization
246
+ # method of the base box class to function as drawing method.
245
247
  #
246
248
  # See #text_box for details on +width+, +height+ and +style+ (note that there is no
247
249
  # +style_properties+ argument).
@@ -252,12 +254,19 @@ module HexaPDF
252
254
  # layout.box(:column) do |column| # column box with one child
253
255
  # column.lorem_ipsum
254
256
  # end
255
- def box(name, width: 0, height: 0, style: nil, **box_options, &block)
256
- if block_given? && !box_options.key?(:children)
257
- box_options[:children] = ChildrenCollector.collect(self, &block)
257
+ # layout.box(width: 100) do |canvas, box|
258
+ # canvas.line(0, 0, box.content_width, box.content_height).stroke
259
+ # end
260
+ def box(name = :base, width: 0, height: 0, style: nil, **box_options, &block)
261
+ if block_given?
262
+ if name == :base
263
+ box_block = block
264
+ elsif !box_options.key?(:children)
265
+ box_options[:children] = ChildrenCollector.collect(self, &block)
266
+ end
258
267
  end
259
268
  box_class_for_name(name).new(width: width, height: height,
260
- style: retrieve_style(style), **box_options)
269
+ style: retrieve_style(style), **box_options, &box_block)
261
270
  end
262
271
 
263
272
  # Creates an array of HexaPDF::Layout::TextFragment objects for the given +text+.
@@ -0,0 +1,488 @@
1
+ # -*- encoding: utf-8; frozen_string_literal: true -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2014-2023 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #
33
+ # If the GNU Affero General Public License doesn't fit your need,
34
+ # commercial licenses are available at <https://gettalong.at/hexapdf/>.
35
+ #++
36
+
37
+ require 'securerandom'
38
+ require 'hexapdf/dictionary'
39
+ require 'hexapdf/error'
40
+
41
+ module HexaPDF
42
+ class Document
43
+
44
+ # This class provides methods for reading and writing the document-level metadata.
45
+ #
46
+ # When an instance is created (usually through HexaPDF::Document#metadata), the metadata is read
47
+ # from the document's information dictionary (see HexaPDF::Type::Info) and made available
48
+ # through the various methods.
49
+ #
50
+ # By default, the metadata is written to the information dictionary as well as to the document's
51
+ # metadata stream (see HexaPDF::Type::Metadata) once the document is written. This can be
52
+ # controlled via the #write_info_dict and #write_metdata_stream methods.
53
+ #
54
+ # While HexaPDF is able to write an XMP packet (using a limited form) to the document's metadata
55
+ # stream, it provides no way for reading XMP metadata. If reading functionality or extended
56
+ # writing functionality is needed, make sure this class does not write the metadata and
57
+ # read/create the metadata stream yourself.
58
+ #
59
+ #
60
+ # == Caveats
61
+ #
62
+ # * Disabling writing to the information dictionary will only prevent parts from being written.
63
+ # The #producer is always written to the information dictionary as per the AGPL license terms.
64
+ # The #modification_date may be written depending on the arguments to HexaPDF::Document#write.
65
+ #
66
+ # * If writing the metadata stream is enabled, any existing metadata stream is completely
67
+ # overwritten. This means the metadata stream is *not* updated with the changed information.
68
+ #
69
+ #
70
+ # == Adding custom metadata properties
71
+ #
72
+ # All the properties specified for the information dictionary are supported.
73
+ #
74
+ # Furthermore, HexaPDF supports writing custom properties to the metadata stream. For this to
75
+ # work the used XMP namespaces need to be registered using #register_namespace. Additionally,
76
+ # the types of all used XMP properties need to be registered using #register_property.
77
+ #
78
+ # The following types for XMP properties are supported:
79
+ #
80
+ # String::
81
+ # Maps to the XMP simple string value. Values need to be of type String.
82
+ #
83
+ # Date::
84
+ # Maps to the XMP simple string value, correctly formatted. Values need to be of type Time,
85
+ # Date, or DateTime
86
+ #
87
+ # URI::
88
+ # Maps to the XMP simple value variant of URI. Values need to be of type String or URI.
89
+ #
90
+ # Boolean::
91
+ # Maps to the XMP simple string value, correctly formatted. Values need to be either +true+
92
+ # or +false+.
93
+ #
94
+ # OrderedArray::
95
+ # Maps to the XMP ordered array. Values need to be of type Array and items must be XMP
96
+ # simple values.
97
+ #
98
+ # UnorderedArray::
99
+ # Maps to the XMP unordered array. Values need to be of type Array and items must be
100
+ # simple values.
101
+ #
102
+ # LanguageArray
103
+ # Maps to the XMP language alternatives array. Values need to be of type Array and items
104
+ # must either be strings (they are associated with the set default language) or
105
+ # LocalizedString instances.
106
+ #
107
+ #
108
+ # See: PDF2.0 s14.3, https://www.adobe.com/products/xmp.html
109
+ class Metadata
110
+
111
+ # Represents a localized XMP string, i.e. as string with an attached language.
112
+ class LocalizedString < String
113
+ # The language identifier for the string in RFC3066 format.
114
+ attr_accessor :language
115
+ end
116
+
117
+ # Contains a mapping of predefined prefixes for XMP namespaces for metadata.
118
+ PREDEFINED_NAMESPACES = {
119
+ "rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
120
+ "xmp" => "http://ns.adobe.com/xap/1.0/",
121
+ "pdf" => "http://ns.adobe.com/pdf/1.3/",
122
+ "dc" => "http://purl.org/dc/elements/1.1/",
123
+ "x" => "adobe:ns:meta/",
124
+ }.freeze
125
+
126
+ # Contains a mapping of predefined XMP properties to their types, i.e. from namespace to
127
+ # property and then type.
128
+ PREDEFINED_PROPERTIES = {
129
+ "http://ns.adobe.com/xap/1.0/" => {
130
+ 'CreatorTool' => 'String',
131
+ 'CreateDate' => 'Date',
132
+ 'ModifyDate' => 'Date',
133
+ }.freeze,
134
+ "http://ns.adobe.com/pdf/1.3/" => {
135
+ 'Keywords' => 'String',
136
+ 'Producer' => 'String',
137
+ 'Trapped' => 'Boolean',
138
+ }.freeze,
139
+ "http://purl.org/dc/elements/1.1/" => {
140
+ 'creator' => 'OrderedArray',
141
+ 'description' => 'LanguageArray',
142
+ 'title' => 'LanguageArray',
143
+ }.freeze,
144
+ }.freeze
145
+
146
+
147
+ # Creates a new Metadata object for the given PDF document.
148
+ def initialize(document)
149
+ @document = document
150
+ @namespaces = PREDEFINED_NAMESPACES.dup
151
+ @properties = PREDEFINED_PROPERTIES.transform_values {|value| value.dup}
152
+ @default_language = document.catalog[:Lang] || 'en'
153
+ @metadata = Hash.new {|h, k| h[k] = {} }
154
+ write_info_dict(true)
155
+ write_metadata_stream(true)
156
+ @document.register_listener(:complete_objects, &method(:write_metadata))
157
+ parse_metadata
158
+ end
159
+
160
+ # :call-seq:
161
+ # metadata.default_language -> language
162
+ # metadata.default_language(value) -> value
163
+ #
164
+ # Returns the default language in RFC3066 format used for unlocalized strings if no argument
165
+ # is given. Otherwise sets the default language to the given language.
166
+ #
167
+ # The initial default lanuage is taken from the document catalog's /Lang entry. If that is not
168
+ # set, the default language is assumed to be English ('en').
169
+ def default_language(value = :UNSET)
170
+ if value == :UNSET
171
+ @default_language
172
+ else
173
+ @default_language = value
174
+ end
175
+ end
176
+
177
+ # Returns +true+ if the information dictionary should be written.
178
+ def write_info_dict?
179
+ @write_info_dict
180
+ end
181
+
182
+ # Makes HexaPDF write the information dictionary if +value+ is +true+.
183
+ #
184
+ # See the class documentation for caveats.
185
+ def write_info_dict(value)
186
+ @write_info_dict = value
187
+ end
188
+
189
+ # Returns +true+ if the metadata stream should be written.
190
+ def write_metadata_stream?
191
+ @write_metadata_stream
192
+ end
193
+
194
+ # Makes HexaPDF write the metadata stream if +value+ is +true+.
195
+ #
196
+ # See the class documentation for caveats.
197
+ def write_metadata_stream(value)
198
+ @write_metadata_stream = value
199
+ end
200
+
201
+ # Registers the +prefix+ for the given namespace +uri+.
202
+ def register_namespace(prefix, uri)
203
+ @namespaces[prefix] = uri
204
+ end
205
+
206
+ # Returns the namespace URI associated with the given prefix.
207
+ def namespace(ns)
208
+ @namespaces.fetch(ns) do
209
+ raise HexaPDF::Error, "Namespace prefix '#{ns}' not registered"
210
+ end
211
+ end
212
+
213
+ # Registers the +property+ for the namespace specified via +prefix+ as the given +type+.
214
+ #
215
+ # The argument +type+ has to be one of the following: 'String', 'Date', 'URI', 'Boolean',
216
+ # 'OrderedArray', 'UnorderedArray', or 'LanguageArray'.
217
+ def register_property_type(prefix, property, type)
218
+ (@properties[namespace(prefix)] ||= {})[property] = type
219
+ end
220
+
221
+ # :call-seq:
222
+ # metadata.property(ns_prefix, name) -> property_value
223
+ # metadata.property(ns_prefix, name, value) -> value
224
+ #
225
+ # Returns the value for the property specified via the namespace prefix +ns_prefix+ and +name+
226
+ # if the +value+ argument is not provided. Otherwise sets the property to +value+.
227
+ #
228
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
229
+ # property is deleted from the metadata.
230
+ def property(ns, property, value = :UNSET)
231
+ ns = @metadata[namespace(ns)]
232
+ if value == :UNSET
233
+ ns[property]
234
+ elsif value.nil?
235
+ ns.delete(property)
236
+ else
237
+ ns[property] = value
238
+ end
239
+ end
240
+
241
+ # :call-seq:
242
+ # metadata.title -> title or nil
243
+ # metadata.title(value -> value
244
+ #
245
+ # Returns the document's title if no argument is given. Otherwise sets the document's title to
246
+ # the given value.
247
+ #
248
+ # The language for the title is specified via #default_language.
249
+ #
250
+ # The value +nil+ is returned if the property is not set. And by using +nil+ as +value+ the
251
+ # property is deleted from the metadata.
252
+ #
253
+ # This metadata property is represented by the XMP name dc:title.
254
+ def title(value = :UNSET)
255
+ property('dc', 'title', value)
256
+ end
257
+
258
+ # :call-seq:
259
+ # metadata.author -> author or nil
260
+ # metadata.author(value) -> value
261
+ #
262
+ # Returns the name of the person who created the document (author) if no argument is given.
263
+ # Otherwise sets the author to the given value.
264
+ #
265
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
266
+ # property is deleted from the metadata.
267
+ #
268
+ # This metadata property is represented by the XMP name dc:creator.
269
+ def author(value = :UNSET)
270
+ property('dc', 'creator', value)
271
+ end
272
+
273
+ # :call-seq:
274
+ # metadata.subject -> subject or nil
275
+ # metadata.subject(value) -> value
276
+ #
277
+ # Returns the subject of the document if no argument is given. Otherwise sets the subject to
278
+ # the given value.
279
+ #
280
+ # The language for the subject is specified via #default_language.
281
+ #
282
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
283
+ # property is deleted from the metadata.
284
+ #
285
+ # This metadata property is represented by the XMP name dc:description.
286
+ def subject(value = :UNSET)
287
+ property('dc', 'description', value)
288
+ end
289
+
290
+ # :call-seq:
291
+ # metadata.keywords -> keywords or nil
292
+ # metadata.keywords(value) -> value
293
+ #
294
+ # Returns the keywords associated with the document if no argument is given. Otherwise sets
295
+ # keywords to the given value.
296
+ #
297
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
298
+ # property is deleted from the metadata.
299
+ #
300
+ # This metadata property is represented by the XMP name pdf:Keywords.
301
+ def keywords(value = :UNSET)
302
+ property('pdf', 'Keywords', value)
303
+ end
304
+
305
+ # :call-seq:
306
+ # metadata.creator -> creator or nil
307
+ # metadata.creator(value) -> value
308
+ #
309
+ # Returns the name of the PDF processor that created the original document from which this PDF
310
+ # was converted if no argument is given. Otherwise sets the name of the creator tool to the
311
+ # given value.
312
+ #
313
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
314
+ # property is deleted from the metadata.
315
+ #
316
+ # This metadata property is represented by the XMP name xmp:CreatorTool.
317
+ def creator(value = :UNSET)
318
+ property('xmp', 'CreatorTool', value)
319
+ end
320
+
321
+ # :call-seq:
322
+ # metadata.producer -> producer or nil
323
+ # metadata.producer(value) -> value
324
+ #
325
+ # Returns the name of the PDF processor that converted the original document to PDF if no
326
+ # argument is given. Otherwise sets the name of the producer to the given value.
327
+ #
328
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
329
+ # property is deleted from the metadata.
330
+ #
331
+ # This metadata property is represented by the XMP name pdf:Producer.
332
+ def producer(value = :UNSET)
333
+ property('pdf', 'Producer', value)
334
+ end
335
+
336
+ # :call-seq:
337
+ # metadata.creation_date -> creation_date or nil
338
+ # metadata.creation_date(value) -> value
339
+ #
340
+ # Returns the date and time (a Time object) the document was created if no argument is given.
341
+ # Otherwise sets the creation date to the given value.
342
+ #
343
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
344
+ # property is deleted from the metadata.
345
+ #
346
+ # This metadata property is represented by the XMP name xmp:CreateDate.
347
+ def creation_date(value = :UNSET)
348
+ property('xmp', 'CreateDate', value)
349
+ end
350
+
351
+ # :call-seq:
352
+ # metadata.modification_date -> modification_date or nil
353
+ # metadata.modification_date(value) -> value
354
+ #
355
+ # Returns the date and time (a Time object) the document was most recently modified if no
356
+ # argument is given. Otherwise sets the modification date to the given value.
357
+ #
358
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
359
+ # property is deleted from the metadata.
360
+ #
361
+ # This metadata property is represented by the XMP name xmp:ModifyDate.
362
+ def modification_date(value = :UNSET)
363
+ property('xmp', 'ModifyDate', value)
364
+ end
365
+
366
+ # :call-seq:
367
+ # metadata.trapped -> trapped or nil
368
+ # metadata.trapped(value) -> value
369
+ #
370
+ # Returns +true+ if the document has been modified to include trapping information if no
371
+ # argument is given. Otherwise sets the trapped status to the given boolean value.
372
+ #
373
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
374
+ # property is deleted from the metadata.
375
+ #
376
+ # This metadata property is represented by the XMP name pdf:Trapped.
377
+ def trapped(value = :UNSET)
378
+ property('pdf', 'Trapped', value)
379
+ end
380
+
381
+ private
382
+
383
+ # Parses the metadata from the information dictionary into the internal data structure.
384
+ def parse_metadata
385
+ info_dict = @document.trailer.info
386
+ ns_dc = namespace('dc')
387
+ ns_xmp = namespace('xmp')
388
+ ns_pdf = namespace('pdf')
389
+ @metadata[ns_dc]['title'] = info_dict[:Title] if info_dict.key?(:Title)
390
+ @metadata[ns_dc]['creator'] = info_dict[:Author] if info_dict.key?(:Author)
391
+ @metadata[ns_dc]['description'] = info_dict[:Subject] if info_dict.key?(:Subject)
392
+ @metadata[ns_xmp]['CreatorTool'] = info_dict[:Creator] if info_dict.key?(:Creator)
393
+ @metadata[ns_xmp]['CreateDate'] = info_dict[:CreationDate] if info_dict.key?(:CreationDate)
394
+ @metadata[ns_xmp]['ModifyDate'] = info_dict[:ModDate] if info_dict.key?(:ModDate)
395
+ @metadata[ns_pdf]['Keywords'] = info_dict[:Keywords] if info_dict.key?(:Keywords)
396
+ @metadata[ns_pdf]['Producer'] = info_dict[:Producer] if info_dict.key?(:Producer)
397
+ if info_dict.key?(:Trapped) && info_dict[:Trapped] != :Unknown
398
+ @metadata[ns_pdf]['Trapped'] = (info_dict[:Trapped] == :True)
399
+ end
400
+ end
401
+
402
+ # Writes the metadata to the specified destinations.
403
+ def write_metadata
404
+ ns_dc = namespace('dc')
405
+ ns_xmp = namespace('xmp')
406
+ ns_pdf = namespace('pdf')
407
+
408
+ if write_info_dict?
409
+ info_dict = @document.trailer.info
410
+ info_dict[:Title] = Array(@metadata[ns_dc]['title']).first
411
+ info_dict[:Author] = Array(@metadata[ns_dc]['creator']).join(', ')
412
+ info_dict[:Subject] = Array(@metadata[ns_dc]['description']).first
413
+ info_dict[:Creator] = @metadata[ns_xmp]['CreatorTool']
414
+ info_dict[:CreationDate] = @metadata[ns_xmp]['CreateDate']
415
+ info_dict[:ModDate] = @metadata[ns_xmp]['ModifyDate']
416
+ info_dict[:Keywords] = @metadata[ns_pdf]['Keywords']
417
+ info_dict[:Producer] = @metadata[ns_pdf]['Producer']
418
+ info_dict[:Trapped] = @metadata[ns_pdf]['Trapped'] ? :True : :False
419
+ end
420
+
421
+ if write_metadata_stream?
422
+ descriptions = @metadata.map do |namespace, values|
423
+ xmp_description(@namespaces.key(namespace), values)
424
+ end.join("\n")
425
+ obj = @document.catalog[:Metadata] ||= @document.add({Type: :Metadata, Subtype: :XML})
426
+ obj.stream = xmp_packet(descriptions)
427
+ end
428
+ end
429
+
430
+ # Creates an XMP packet with the given payload +data+.
431
+ def xmp_packet(data)
432
+ <<~XMP
433
+ <?xpacket begin="\u{FEFF}" id="#{SecureRandom.uuid.tr('-', '')}"?>
434
+ <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
435
+ #{data}
436
+ </rdf:RDF>
437
+ <?xpacket end="r"?>
438
+ XMP
439
+ end
440
+
441
+ # Creates an 'rdf:Description' element for all metadata +values+ with the given +ns_prefix+.
442
+ def xmp_description(ns_prefix, values)
443
+ values = values.map do |name, value|
444
+ str = +"<#{ns_prefix}:#{name}"
445
+ case (property_type = @properties[namespace(ns_prefix)][name])
446
+ when 'String'
447
+ str << ">#{xmp_escape(value)}</#{ns_prefix}:#{name}>"
448
+ when 'Date'
449
+ str << ">#{xmp_date(value)}</#{ns_prefix}:#{name}>"
450
+ when 'URI'
451
+ str << " rdf:resource=\"#{xmp_escape(value.to_s)}\" />"
452
+ when 'Boolean'
453
+ str << ">#{value ? 'True' : 'False'}</#{ns_prefix}:#{name}>"
454
+ when 'LanguageArray'
455
+ value = Array(value).map do |item|
456
+ lang = item.respond_to?(:language) ? item.language : default_language
457
+ "<rdf:li xml:lang=\"#{lang}\">#{xmp_escape(item)}</rdf:li>"
458
+ end.join("\n")
459
+ str << "><rdf:Alt>\n#{value}\n</rdf:Alt></#{ns_prefix}:#{name}>"
460
+ when 'OrderedArray', 'UnorderedArray'
461
+ value = Array(value).map {|item| "<rdf:li>#{xmp_escape(item)}</rdf:li>" }.join("\n")
462
+ el_type = (property_type == 'OrderedArray' ? 'Seq' : 'Bag')
463
+ str << "><rdf:#{el_type}>\n#{value}\n</rdf:#{el_type}></#{ns_prefix}:#{name}>"
464
+ end
465
+ str
466
+ end.join("\n")
467
+ <<~XMP.strip
468
+ <rdf:Description rdf:about="" xmlns:#{ns_prefix}="#{xmp_escape(namespace(ns_prefix))}">
469
+ #{values}
470
+ </rdf:Description>
471
+ XMP
472
+ end
473
+
474
+ # Escapes the given value so as to be usable as XMP simple value.
475
+ def xmp_escape(value)
476
+ value.gsub(/<|>|"/, {'<' => '&lt;', '>' => '&gt;', '"' => '&quot;'})
477
+ end
478
+
479
+ # Formats the given date-time object (Time, Date, or DateTime) to be a valid XMP date-time
480
+ # value.
481
+ def xmp_date(date)
482
+ date.strftime("%Y-%m-%dT%H:%M:%S%:z")
483
+ end
484
+
485
+ end
486
+
487
+ end
488
+ end
@@ -120,6 +120,7 @@ module HexaPDF
120
120
  autoload(:Files, 'hexapdf/document/files')
121
121
  autoload(:Destinations, 'hexapdf/document/destinations')
122
122
  autoload(:Layout, 'hexapdf/document/layout')
123
+ autoload(:Metadata, 'hexapdf/document/metadata')
123
124
 
124
125
  # :call-seq:
125
126
  # Document.open(filename, **docargs) -> doc
@@ -486,6 +487,16 @@ module HexaPDF
486
487
  pdf_data ? @cache[pdf_data].clear : @cache.clear
487
488
  end
488
489
 
490
+ # Returns the Metadata object that provides a convenience interface for working with the
491
+ # document metadata.
492
+ #
493
+ # Note that invoking this method means that, depending on the settings, the info dictionary as
494
+ # well as the metadata stream will be overwritten when the document gets written. See the
495
+ # "Caveats" section in the Metadata documentation.
496
+ def metadata
497
+ @metadata ||= Metadata.new(self)
498
+ end
499
+
489
500
  # Returns the Pages object that provides convenience methods for working with the pages of the
490
501
  # PDF file.
491
502
  #
@@ -706,13 +717,17 @@ module HexaPDF
706
717
  # Optimize the file size by using object and cross-reference streams. This will raise the PDF
707
718
  # version to at least 1.5.
708
719
  def write(file_or_io, incremental: false, validate: true, update_fields: true, optimize: false)
709
- dispatch_message(:complete_objects)
710
-
711
720
  if update_fields
712
721
  trailer.update_id
713
- trailer.info[:ModDate] = Time.now
722
+ if @metadata
723
+ metadata.modification_date(Time.now)
724
+ else
725
+ trailer.info[:ModDate] = Time.now
726
+ end
714
727
  end
715
728
 
729
+ dispatch_message(:complete_objects)
730
+
716
731
  if validate
717
732
  self.validate(auto_correct: true) do |msg, correctable, obj|
718
733
  next if correctable
@@ -69,11 +69,11 @@ module HexaPDF
69
69
  @block_used = false
70
70
  end
71
71
 
72
- # Returns the length of the wrapped string.
72
+ # Returns the length in bytes of the wrapped string.
73
73
  #
74
74
  # May only be called before #resume!
75
75
  def length
76
- str.length
76
+ str.bytesize
77
77
  end
78
78
 
79
79
  # Returns +true+ if #resume has not yet been called.