hexapdf 0.36.0 → 0.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b7cec7494ffe5e4f8031e5a05f2a31da13741879ff138513f737048880702389
4
- data.tar.gz: 238a71920dcd9bde03497a22f0583bf72f11d358176f28217214f86ca835764d
3
+ metadata.gz: 464355c84e7dd5636fe34bb0feb96038e06d4e0701d0ce9e915a2a9bddd3c18a
4
+ data.tar.gz: 8bcebc03512574b0fd71396bbe21330f59cb49e75f2194d1cd47ced0648ef7ad
5
5
  SHA512:
6
- metadata.gz: 7850251dba07dbae11c280bc87852c91aa72e166da7983c4b4a1508a4c76d99306eaacbc108dad8f0ba54246a2fe6648714cd8bfd5f6d9bddeb0ec67abab9867
7
- data.tar.gz: dace9a43ef57a0d27d33218ef4dc6503a961edfc2c96c04c16ebeb0d8675e09373c21908d159c992c4d2125499bdd1818acdf1150c86106e23888552210f4fc8
6
+ metadata.gz: ccfbc72734d74178b1eb49da85bcd364410b12abfbb2d2cd730e6a37176dc85c7b87369bd0463869d6815d911e3428a21b0e18e1492f75d75b59c53a68ebf835
7
+ data.tar.gz: bc7001b82ec40571b6257575923d0fb5ef4c1011ff4067b3b53c40822240328c461c358084f9de8d5271809d9d85b8bd2de91ce219e211cd84154b3156c7c426
data/CHANGELOG.md CHANGED
@@ -1,3 +1,19 @@
1
+ ## 0.37.0 - 2024-01-29
2
+
3
+ ### Added
4
+
5
+ * [HexaPDF::Document::Metadata] for working with metadata (reading the info
6
+ dictionary and writing it as well as the XMP metadata stream)
7
+
8
+ ### Changed
9
+
10
+ * Minimum Ruby version to be 2.7
11
+
12
+ ### Fixed
13
+
14
+ * [HexaPDF::FiberDoubleForString#length] to not assume a binary string
15
+
16
+
1
17
  ## 0.36.0 - 2024-01-20
2
18
 
3
19
  ### Added
data/Rakefile CHANGED
@@ -47,7 +47,7 @@ namespace :dev do
47
47
  end
48
48
 
49
49
  task :test_all do
50
- versions = `rbenv versions --bare | grep -i ^2.[67]\\\\\\|^3.`.split("\n")
50
+ versions = `rbenv versions --bare | grep -i ^2.7\\\\\\|^3.`.split("\n")
51
51
  versions.each do |version|
52
52
  sh "eval \"$(rbenv init -)\"; rbenv shell #{version} && ruby -v && rake test"
53
53
  end
@@ -688,6 +688,7 @@ module HexaPDF
688
688
  XXReference: 'HexaPDF::Type::Form::Reference',
689
689
  XXCIDSystemInfo: 'HexaPDF::Type::CIDFont::CIDSystemInfo',
690
690
  Group: 'HexaPDF::Type::Form::Group',
691
+ Metadata: 'HexaPDF::Type::Metadata',
691
692
  },
692
693
  'object.subtype_map' => {
693
694
  nil => {
@@ -706,6 +707,7 @@ module HexaPDF
706
707
  Text: 'HexaPDF::Type::Annotations::Text',
707
708
  Link: 'HexaPDF::Type::Annotations::Link',
708
709
  Widget: 'HexaPDF::Type::Annotations::Widget',
710
+ XML: 'HexaPDF::Type::Metadata'
709
711
  },
710
712
  XObject: {
711
713
  Image: 'HexaPDF::Type::Image',
@@ -118,7 +118,7 @@ module HexaPDF
118
118
  # composer.list(item_spacing: 2) do |list|
119
119
  # composer.document.config['layout.boxes.map'].each do |name, klass|
120
120
  # list.formatted_text([{text: name.to_s, fill_color: "hp-blue-dark"},
121
- # {text: "\n#{klass}"}, font_size: 7])
121
+ # {text: "\n#{klass}"}], font_size: 6)
122
122
  # end
123
123
  # end
124
124
  # end
@@ -0,0 +1,488 @@
1
+ # -*- encoding: utf-8; frozen_string_literal: true -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2014-2023 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #
33
+ # If the GNU Affero General Public License doesn't fit your need,
34
+ # commercial licenses are available at <https://gettalong.at/hexapdf/>.
35
+ #++
36
+
37
+ require 'securerandom'
38
+ require 'hexapdf/dictionary'
39
+ require 'hexapdf/error'
40
+
41
+ module HexaPDF
42
+ class Document
43
+
44
+ # This class provides methods for reading and writing the document-level metadata.
45
+ #
46
+ # When an instance is created (usually through HexaPDF::Document#metadata), the metadata is read
47
+ # from the document's information dictionary (see HexaPDF::Type::Info) and made available
48
+ # through the various methods.
49
+ #
50
+ # By default, the metadata is written to the information dictionary as well as to the document's
51
+ # metadata stream (see HexaPDF::Type::Metadata) once the document is written. This can be
52
+ # controlled via the #write_info_dict and #write_metdata_stream methods.
53
+ #
54
+ # While HexaPDF is able to write an XMP packet (using a limited form) to the document's metadata
55
+ # stream, it provides no way for reading XMP metadata. If reading functionality or extended
56
+ # writing functionality is needed, make sure this class does not write the metadata and
57
+ # read/create the metadata stream yourself.
58
+ #
59
+ #
60
+ # == Caveats
61
+ #
62
+ # * Disabling writing to the information dictionary will only prevent parts from being written.
63
+ # The #producer is always written to the information dictionary as per the AGPL license terms.
64
+ # The #modification_date may be written depending on the arguments to HexaPDF::Document#write.
65
+ #
66
+ # * If writing the metadata stream is enabled, any existing metadata stream is completely
67
+ # overwritten. This means the metadata stream is *not* updated with the changed information.
68
+ #
69
+ #
70
+ # == Adding custom metadata properties
71
+ #
72
+ # All the properties specified for the information dictionary are supported.
73
+ #
74
+ # Furthermore, HexaPDF supports writing custom properties to the metadata stream. For this to
75
+ # work the used XMP namespaces need to be registered using #register_namespace. Additionally,
76
+ # the types of all used XMP properties need to be registered using #register_property.
77
+ #
78
+ # The following types for XMP properties are supported:
79
+ #
80
+ # String::
81
+ # Maps to the XMP simple string value. Values need to be of type String.
82
+ #
83
+ # Date::
84
+ # Maps to the XMP simple string value, correctly formatted. Values need to be of type Time,
85
+ # Date, or DateTime
86
+ #
87
+ # URI::
88
+ # Maps to the XMP simple value variant of URI. Values need to be of type String or URI.
89
+ #
90
+ # Boolean::
91
+ # Maps to the XMP simple string value, correctly formatted. Values need to be either +true+
92
+ # or +false+.
93
+ #
94
+ # OrderedArray::
95
+ # Maps to the XMP ordered array. Values need to be of type Array and items must be XMP
96
+ # simple values.
97
+ #
98
+ # UnorderedArray::
99
+ # Maps to the XMP unordered array. Values need to be of type Array and items must be
100
+ # simple values.
101
+ #
102
+ # LanguageArray
103
+ # Maps to the XMP language alternatives array. Values need to be of type Array and items
104
+ # must either be strings (they are associated with the set default language) or
105
+ # LocalizedString instances.
106
+ #
107
+ #
108
+ # See: PDF2.0 s14.3, https://www.adobe.com/products/xmp.html
109
+ class Metadata
110
+
111
+ # Represents a localized XMP string, i.e. as string with an attached language.
112
+ class LocalizedString < String
113
+ # The language identifier for the string in RFC3066 format.
114
+ attr_accessor :language
115
+ end
116
+
117
+ # Contains a mapping of predefined prefixes for XMP namespaces for metadata.
118
+ PREDEFINED_NAMESPACES = {
119
+ "rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
120
+ "xmp" => "http://ns.adobe.com/xap/1.0/",
121
+ "pdf" => "http://ns.adobe.com/pdf/1.3/",
122
+ "dc" => "http://purl.org/dc/elements/1.1/",
123
+ "x" => "adobe:ns:meta/",
124
+ }.freeze
125
+
126
+ # Contains a mapping of predefined XMP properties to their types, i.e. from namespace to
127
+ # property and then type.
128
+ PREDEFINED_PROPERTIES = {
129
+ "http://ns.adobe.com/xap/1.0/" => {
130
+ 'CreatorTool' => 'String',
131
+ 'CreateDate' => 'Date',
132
+ 'ModifyDate' => 'Date',
133
+ }.freeze,
134
+ "http://ns.adobe.com/pdf/1.3/" => {
135
+ 'Keywords' => 'String',
136
+ 'Producer' => 'String',
137
+ 'Trapped' => 'Boolean',
138
+ }.freeze,
139
+ "http://purl.org/dc/elements/1.1/" => {
140
+ 'creator' => 'OrderedArray',
141
+ 'description' => 'LanguageArray',
142
+ 'title' => 'LanguageArray',
143
+ }.freeze,
144
+ }.freeze
145
+
146
+
147
+ # Creates a new Metadata object for the given PDF document.
148
+ def initialize(document)
149
+ @document = document
150
+ @namespaces = PREDEFINED_NAMESPACES.dup
151
+ @properties = PREDEFINED_PROPERTIES.transform_values {|value| value.dup}
152
+ @default_language = document.catalog[:Lang] || 'en'
153
+ @metadata = Hash.new {|h, k| h[k] = {} }
154
+ write_info_dict(true)
155
+ write_metadata_stream(true)
156
+ @document.register_listener(:complete_objects, &method(:write_metadata))
157
+ parse_metadata
158
+ end
159
+
160
+ # :call-seq:
161
+ # metadata.default_language -> language
162
+ # metadata.default_language(value) -> value
163
+ #
164
+ # Returns the default language in RFC3066 format used for unlocalized strings if no argument
165
+ # is given. Otherwise sets the default language to the given language.
166
+ #
167
+ # The initial default lanuage is taken from the document catalog's /Lang entry. If that is not
168
+ # set, the default language is assumed to be English ('en').
169
+ def default_language(value = :UNSET)
170
+ if value == :UNSET
171
+ @default_language
172
+ else
173
+ @default_language = value
174
+ end
175
+ end
176
+
177
+ # Returns +true+ if the information dictionary should be written.
178
+ def write_info_dict?
179
+ @write_info_dict
180
+ end
181
+
182
+ # Makes HexaPDF write the information dictionary if +value+ is +true+.
183
+ #
184
+ # See the class documentation for caveats.
185
+ def write_info_dict(value)
186
+ @write_info_dict = value
187
+ end
188
+
189
+ # Returns +true+ if the metadata stream should be written.
190
+ def write_metadata_stream?
191
+ @write_metadata_stream
192
+ end
193
+
194
+ # Makes HexaPDF write the metadata stream if +value+ is +true+.
195
+ #
196
+ # See the class documentation for caveats.
197
+ def write_metadata_stream(value)
198
+ @write_metadata_stream = value
199
+ end
200
+
201
+ # Registers the +prefix+ for the given namespace +uri+.
202
+ def register_namespace(prefix, uri)
203
+ @namespaces[prefix] = uri
204
+ end
205
+
206
+ # Returns the namespace URI associated with the given prefix.
207
+ def namespace(ns)
208
+ @namespaces.fetch(ns) do
209
+ raise HexaPDF::Error, "Namespace prefix '#{ns}' not registered"
210
+ end
211
+ end
212
+
213
+ # Registers the +property+ for the namespace specified via +prefix+ as the given +type+.
214
+ #
215
+ # The argument +type+ has to be one of the following: 'String', 'Date', 'URI', 'Boolean',
216
+ # 'OrderedArray', 'UnorderedArray', or 'LanguageArray'.
217
+ def register_property_type(prefix, property, type)
218
+ (@properties[namespace(prefix)] ||= {})[property] = type
219
+ end
220
+
221
+ # :call-seq:
222
+ # metadata.property(ns_prefix, name) -> property_value
223
+ # metadata.property(ns_prefix, name, value) -> value
224
+ #
225
+ # Returns the value for the property specified via the namespace prefix +ns_prefix+ and +name+
226
+ # if the +value+ argument is not provided. Otherwise sets the property to +value+.
227
+ #
228
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
229
+ # property is deleted from the metadata.
230
+ def property(ns, property, value = :UNSET)
231
+ ns = @metadata[namespace(ns)]
232
+ if value == :UNSET
233
+ ns[property]
234
+ elsif value.nil?
235
+ ns.delete(property)
236
+ else
237
+ ns[property] = value
238
+ end
239
+ end
240
+
241
+ # :call-seq:
242
+ # metadata.title -> title or nil
243
+ # metadata.title(value -> value
244
+ #
245
+ # Returns the document's title if no argument is given. Otherwise sets the document's title to
246
+ # the given value.
247
+ #
248
+ # The language for the title is specified via #default_language.
249
+ #
250
+ # The value +nil+ is returned if the property is not set. And by using +nil+ as +value+ the
251
+ # property is deleted from the metadata.
252
+ #
253
+ # This metadata property is represented by the XMP name dc:title.
254
+ def title(value = :UNSET)
255
+ property('dc', 'title', value)
256
+ end
257
+
258
+ # :call-seq:
259
+ # metadata.author -> author or nil
260
+ # metadata.author(value) -> value
261
+ #
262
+ # Returns the name of the person who created the document (author) if no argument is given.
263
+ # Otherwise sets the author to the given value.
264
+ #
265
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
266
+ # property is deleted from the metadata.
267
+ #
268
+ # This metadata property is represented by the XMP name dc:creator.
269
+ def author(value = :UNSET)
270
+ property('dc', 'creator', value)
271
+ end
272
+
273
+ # :call-seq:
274
+ # metadata.subject -> subject or nil
275
+ # metadata.subject(value) -> value
276
+ #
277
+ # Returns the subject of the document if no argument is given. Otherwise sets the subject to
278
+ # the given value.
279
+ #
280
+ # The language for the subject is specified via #default_language.
281
+ #
282
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
283
+ # property is deleted from the metadata.
284
+ #
285
+ # This metadata property is represented by the XMP name dc:description.
286
+ def subject(value = :UNSET)
287
+ property('dc', 'description', value)
288
+ end
289
+
290
+ # :call-seq:
291
+ # metadata.keywords -> keywords or nil
292
+ # metadata.keywords(value) -> value
293
+ #
294
+ # Returns the keywords associated with the document if no argument is given. Otherwise sets
295
+ # keywords to the given value.
296
+ #
297
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
298
+ # property is deleted from the metadata.
299
+ #
300
+ # This metadata property is represented by the XMP name pdf:Keywords.
301
+ def keywords(value = :UNSET)
302
+ property('pdf', 'Keywords', value)
303
+ end
304
+
305
+ # :call-seq:
306
+ # metadata.creator -> creator or nil
307
+ # metadata.creator(value) -> value
308
+ #
309
+ # Returns the name of the PDF processor that created the original document from which this PDF
310
+ # was converted if no argument is given. Otherwise sets the name of the creator tool to the
311
+ # given value.
312
+ #
313
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
314
+ # property is deleted from the metadata.
315
+ #
316
+ # This metadata property is represented by the XMP name xmp:CreatorTool.
317
+ def creator(value = :UNSET)
318
+ property('xmp', 'CreatorTool', value)
319
+ end
320
+
321
+ # :call-seq:
322
+ # metadata.producer -> producer or nil
323
+ # metadata.producer(value) -> value
324
+ #
325
+ # Returns the name of the PDF processor that converted the original document to PDF if no
326
+ # argument is given. Otherwise sets the name of the producer to the given value.
327
+ #
328
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
329
+ # property is deleted from the metadata.
330
+ #
331
+ # This metadata property is represented by the XMP name pdf:Producer.
332
+ def producer(value = :UNSET)
333
+ property('pdf', 'Producer', value)
334
+ end
335
+
336
+ # :call-seq:
337
+ # metadata.creation_date -> creation_date or nil
338
+ # metadata.creation_date(value) -> value
339
+ #
340
+ # Returns the date and time (a Time object) the document was created if no argument is given.
341
+ # Otherwise sets the creation date to the given value.
342
+ #
343
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
344
+ # property is deleted from the metadata.
345
+ #
346
+ # This metadata property is represented by the XMP name xmp:CreateDate.
347
+ def creation_date(value = :UNSET)
348
+ property('xmp', 'CreateDate', value)
349
+ end
350
+
351
+ # :call-seq:
352
+ # metadata.modification_date -> modification_date or nil
353
+ # metadata.modification_date(value) -> value
354
+ #
355
+ # Returns the date and time (a Time object) the document was most recently modified if no
356
+ # argument is given. Otherwise sets the modification date to the given value.
357
+ #
358
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
359
+ # property is deleted from the metadata.
360
+ #
361
+ # This metadata property is represented by the XMP name xmp:ModifyDate.
362
+ def modification_date(value = :UNSET)
363
+ property('xmp', 'ModifyDate', value)
364
+ end
365
+
366
+ # :call-seq:
367
+ # metadata.trapped -> trapped or nil
368
+ # metadata.trapped(value) -> value
369
+ #
370
+ # Returns +true+ if the document has been modified to include trapping information if no
371
+ # argument is given. Otherwise sets the trapped status to the given boolean value.
372
+ #
373
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
374
+ # property is deleted from the metadata.
375
+ #
376
+ # This metadata property is represented by the XMP name pdf:Trapped.
377
+ def trapped(value = :UNSET)
378
+ property('pdf', 'Trapped', value)
379
+ end
380
+
381
+ private
382
+
383
+ # Parses the metadata from the information dictionary into the internal data structure.
384
+ def parse_metadata
385
+ info_dict = @document.trailer.info
386
+ ns_dc = namespace('dc')
387
+ ns_xmp = namespace('xmp')
388
+ ns_pdf = namespace('pdf')
389
+ @metadata[ns_dc]['title'] = info_dict[:Title] if info_dict.key?(:Title)
390
+ @metadata[ns_dc]['creator'] = info_dict[:Author] if info_dict.key?(:Author)
391
+ @metadata[ns_dc]['description'] = info_dict[:Subject] if info_dict.key?(:Subject)
392
+ @metadata[ns_xmp]['CreatorTool'] = info_dict[:Creator] if info_dict.key?(:Creator)
393
+ @metadata[ns_xmp]['CreateDate'] = info_dict[:CreationDate] if info_dict.key?(:CreationDate)
394
+ @metadata[ns_xmp]['ModifyDate'] = info_dict[:ModDate] if info_dict.key?(:ModDate)
395
+ @metadata[ns_pdf]['Keywords'] = info_dict[:Keywords] if info_dict.key?(:Keywords)
396
+ @metadata[ns_pdf]['Producer'] = info_dict[:Producer] if info_dict.key?(:Producer)
397
+ if info_dict.key?(:Trapped) && info_dict[:Trapped] != :Unknown
398
+ @metadata[ns_pdf]['Trapped'] = (info_dict[:Trapped] == :True)
399
+ end
400
+ end
401
+
402
+ # Writes the metadata to the specified destinations.
403
+ def write_metadata
404
+ ns_dc = namespace('dc')
405
+ ns_xmp = namespace('xmp')
406
+ ns_pdf = namespace('pdf')
407
+
408
+ if write_info_dict?
409
+ info_dict = @document.trailer.info
410
+ info_dict[:Title] = Array(@metadata[ns_dc]['title']).first
411
+ info_dict[:Author] = Array(@metadata[ns_dc]['creator']).join(', ')
412
+ info_dict[:Subject] = Array(@metadata[ns_dc]['description']).first
413
+ info_dict[:Creator] = @metadata[ns_xmp]['CreatorTool']
414
+ info_dict[:CreationDate] = @metadata[ns_xmp]['CreateDate']
415
+ info_dict[:ModDate] = @metadata[ns_xmp]['ModifyDate']
416
+ info_dict[:Keywords] = @metadata[ns_pdf]['Keywords']
417
+ info_dict[:Producer] = @metadata[ns_pdf]['Producer']
418
+ info_dict[:Trapped] = @metadata[ns_pdf]['Trapped'] ? :True : :False
419
+ end
420
+
421
+ if write_metadata_stream?
422
+ descriptions = @metadata.map do |namespace, values|
423
+ xmp_description(@namespaces.key(namespace), values)
424
+ end.join("\n")
425
+ obj = @document.catalog[:Metadata] ||= @document.add({Type: :Metadata, Subtype: :XML})
426
+ obj.stream = xmp_packet(descriptions)
427
+ end
428
+ end
429
+
430
+ # Creates an XMP packet with the given payload +data+.
431
+ def xmp_packet(data)
432
+ <<~XMP
433
+ <?xpacket begin="\u{FEFF}" id="#{SecureRandom.uuid.tr('-', '')}"?>
434
+ <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
435
+ #{data}
436
+ </rdf:RDF>
437
+ <?xpacket end="r"?>
438
+ XMP
439
+ end
440
+
441
+ # Creates an 'rdf:Description' element for all metadata +values+ with the given +ns_prefix+.
442
+ def xmp_description(ns_prefix, values)
443
+ values = values.map do |name, value|
444
+ str = +"<#{ns_prefix}:#{name}"
445
+ case (property_type = @properties[namespace(ns_prefix)][name])
446
+ when 'String'
447
+ str << ">#{xmp_escape(value)}</#{ns_prefix}:#{name}>"
448
+ when 'Date'
449
+ str << ">#{xmp_date(value)}</#{ns_prefix}:#{name}>"
450
+ when 'URI'
451
+ str << " rdf:resource=\"#{xmp_escape(value.to_s)}\" />"
452
+ when 'Boolean'
453
+ str << ">#{value ? 'True' : 'False'}</#{ns_prefix}:#{name}>"
454
+ when 'LanguageArray'
455
+ value = Array(value).map do |item|
456
+ lang = item.respond_to?(:language) ? item.language : default_language
457
+ "<rdf:li xml:lang=\"#{lang}\">#{xmp_escape(item)}</rdf:li>"
458
+ end.join("\n")
459
+ str << "><rdf:Alt>\n#{value}\n</rdf:Alt></#{ns_prefix}:#{name}>"
460
+ when 'OrderedArray', 'UnorderedArray'
461
+ value = Array(value).map {|item| "<rdf:li>#{xmp_escape(item)}</rdf:li>" }.join("\n")
462
+ el_type = (property_type == 'OrderedArray' ? 'Seq' : 'Bag')
463
+ str << "><rdf:#{el_type}>\n#{value}\n</rdf:#{el_type}></#{ns_prefix}:#{name}>"
464
+ end
465
+ str
466
+ end.join("\n")
467
+ <<~XMP.strip
468
+ <rdf:Description rdf:about="" xmlns:#{ns_prefix}="#{xmp_escape(namespace(ns_prefix))}">
469
+ #{values}
470
+ </rdf:Description>
471
+ XMP
472
+ end
473
+
474
+ # Escapes the given value so as to be usable as XMP simple value.
475
+ def xmp_escape(value)
476
+ value.gsub(/<|>|"/, {'<' => '&lt;', '>' => '&gt;', '"' => '&quot;'})
477
+ end
478
+
479
+ # Formats the given date-time object (Time, Date, or DateTime) to be a valid XMP date-time
480
+ # value.
481
+ def xmp_date(date)
482
+ date.strftime("%Y-%m-%dT%H:%M:%S%:z")
483
+ end
484
+
485
+ end
486
+
487
+ end
488
+ end
@@ -120,6 +120,7 @@ module HexaPDF
120
120
  autoload(:Files, 'hexapdf/document/files')
121
121
  autoload(:Destinations, 'hexapdf/document/destinations')
122
122
  autoload(:Layout, 'hexapdf/document/layout')
123
+ autoload(:Metadata, 'hexapdf/document/metadata')
123
124
 
124
125
  # :call-seq:
125
126
  # Document.open(filename, **docargs) -> doc
@@ -486,6 +487,16 @@ module HexaPDF
486
487
  pdf_data ? @cache[pdf_data].clear : @cache.clear
487
488
  end
488
489
 
490
+ # Returns the Metadata object that provides a convenience interface for working with the
491
+ # document metadata.
492
+ #
493
+ # Note that invoking this method means that, depending on the settings, the info dictionary as
494
+ # well as the metadata stream will be overwritten when the document gets written. See the
495
+ # "Caveats" section in the Metadata documentation.
496
+ def metadata
497
+ @metadata ||= Metadata.new(self)
498
+ end
499
+
489
500
  # Returns the Pages object that provides convenience methods for working with the pages of the
490
501
  # PDF file.
491
502
  #
@@ -706,13 +717,17 @@ module HexaPDF
706
717
  # Optimize the file size by using object and cross-reference streams. This will raise the PDF
707
718
  # version to at least 1.5.
708
719
  def write(file_or_io, incremental: false, validate: true, update_fields: true, optimize: false)
709
- dispatch_message(:complete_objects)
710
-
711
720
  if update_fields
712
721
  trailer.update_id
713
- trailer.info[:ModDate] = Time.now
722
+ if @metadata
723
+ metadata.modification_date(Time.now)
724
+ else
725
+ trailer.info[:ModDate] = Time.now
726
+ end
714
727
  end
715
728
 
729
+ dispatch_message(:complete_objects)
730
+
716
731
  if validate
717
732
  self.validate(auto_correct: true) do |msg, correctable, obj|
718
733
  next if correctable
@@ -69,11 +69,11 @@ module HexaPDF
69
69
  @block_used = false
70
70
  end
71
71
 
72
- # Returns the length of the wrapped string.
72
+ # Returns the length in bytes of the wrapped string.
73
73
  #
74
74
  # May only be called before #resume!
75
75
  def length
76
- str.length
76
+ str.bytesize
77
77
  end
78
78
 
79
79
  # Returns +true+ if #resume has not yet been called.
@@ -132,6 +132,77 @@ module HexaPDF
132
132
  define_field :StructParent, type: Integer, version: '1.3'
133
133
  define_field :OC, type: Dictionary, version: '1.5'
134
134
 
135
+ ##
136
+ # :method: flags
137
+ #
138
+ # Returns an array of flag names representing the set bit flags for /F.
139
+ #
140
+ # The available flags are:
141
+ #
142
+ # :invisible or 0::
143
+ # Applies only to non-standard annotations. If set, do not render or print the annotation.
144
+ #
145
+ # :hidden or 1::
146
+ # If set, do not render the annotation or allow interactions.
147
+ #
148
+ # :print or 2::
149
+ # If set, print the annotation unless the hidden flag is also set. Otherwise never print
150
+ # the annotation.
151
+ #
152
+ # :no_zoom or 3::
153
+ # If set, do not scale the annotation's appearance to match the magnification of the page.
154
+ #
155
+ # :no_rotate or 4::
156
+ # If set, do not rotate the annotation's appearance to match the rotation of the page.
157
+ #
158
+ # :no_view or 5::
159
+ # If set, do not render the annotation on the screen or allow interactions.
160
+ #
161
+ # :read_only or 6::
162
+ # If set, do not allow user interactions.
163
+ #
164
+ # :locked or 7::
165
+ # If set, do not allow the annotation to be deleted or its properties be modified.
166
+ #
167
+ # :toggle_no_view or 8::
168
+ # If set, invert the interpretation of the :no_view flag for annotation selection and
169
+ # mouse hovering.
170
+ #
171
+ # :locked_contents or 9::
172
+ # If set, do not allow the contents of the annotation to be modified.
173
+ #
174
+
175
+ ##
176
+ # :method: flagged?
177
+ # :call-seq:
178
+ # flagged?(flag)
179
+ #
180
+ # Returns +true+ if the given flag is set on /F. The argument can either be the flag name or
181
+ # the bit index.
182
+ #
183
+ # See #flags for the list of available flags.
184
+ #
185
+
186
+ ##
187
+ # :method: flag
188
+ # :call-seq:
189
+ # flag(*flags, clear_existing: false)
190
+ #
191
+ # Sets the given flags on /F, given as flag names or bit indices. If +clear_existing+ is
192
+ # +true+, all prior flags will be cleared.
193
+ #
194
+ # See #flags for the list of available flags.
195
+ #
196
+
197
+ ##
198
+ # :method: unflag
199
+ # :call-seq:
200
+ # flag(*flags)
201
+ #
202
+ # Clears the given flags from /F, given as flag names or bit indices.
203
+ #
204
+ # See #flags for the list of available flags.
205
+ #
135
206
  bit_field(:flags, {invisible: 0, hidden: 1, print: 2, no_zoom: 3, no_rotate: 4,
136
207
  no_view: 5, read_only: 6, locked: 7, toggle_no_view: 8,
137
208
  locked_contents: 9},
@@ -71,7 +71,7 @@ module HexaPDF
71
71
  define_field :AA, type: Dictionary, version: '1.4'
72
72
  define_field :URI, type: Dictionary, version: '1.1'
73
73
  define_field :AcroForm, type: :XXAcroForm, version: '1.2'
74
- define_field :Metadata, type: Stream, indirect: true, version: '1.4'
74
+ define_field :Metadata, type: :Metadata, indirect: true, version: '1.4'
75
75
  define_field :StructTreeRoot, type: Dictionary, version: '1.3'
76
76
  define_field :MarkInfo, type: :XXMarkInformation, version: '1.4'
77
77
  define_field :Lang, type: String, version: '1.4'
@@ -0,0 +1,63 @@
1
+ # -*- encoding: utf-8; frozen_string_literal: true -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2014-2023 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #
33
+ # If the GNU Affero General Public License doesn't fit your need,
34
+ # commercial licenses are available at <https://gettalong.at/hexapdf/>.
35
+ #++
36
+
37
+ require 'hexapdf/stream'
38
+
39
+ module HexaPDF
40
+ module Type
41
+
42
+ # Represents an XMP metadata stream.
43
+ #
44
+ # XMP metadata streams may be attached to most PDF objects, though it only makes sense for some
45
+ # of them.
46
+ #
47
+ # There is also a main XMP metadata stream for the whole document that is accessible via the
48
+ # /Metadata key of the document catalog. That metadata stream should contain the same values as
49
+ # the PDF's info dictionary and may contain additional entries. This can be accomplished via
50
+ # HexaPDF::Document#metadata.
51
+ #
52
+ # See: PDF2.0 s14.3.2
53
+ class Metadata < Stream
54
+
55
+ define_type :Metadata
56
+
57
+ define_field :Type, type: Symbol, default: type, required: true
58
+ define_field :Subtype, type: Symbol, default: :XML, required: true
59
+
60
+ end
61
+
62
+ end
63
+ end
data/lib/hexapdf/type.rb CHANGED
@@ -80,6 +80,7 @@ module HexaPDF
80
80
  autoload(:OptionalContentMembership, 'hexapdf/type/optional_content_membership')
81
81
  autoload(:OptionalContentProperties, 'hexapdf/type/optional_content_properties')
82
82
  autoload(:OptionalContentConfiguration, 'hexapdf/type/optional_content_configuration')
83
+ autoload(:Metadata, 'hexapdf/type/metadata')
83
84
 
84
85
  end
85
86
 
@@ -37,6 +37,6 @@
37
37
  module HexaPDF
38
38
 
39
39
  # The version of HexaPDF.
40
- VERSION = '0.36.0'
40
+ VERSION = '0.37.0'
41
41
 
42
42
  end
@@ -0,0 +1,192 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require 'test_helper'
4
+ require 'stringio'
5
+ require 'hexapdf/document'
6
+
7
+ describe HexaPDF::Document::Metadata do
8
+ before do
9
+ @doc = HexaPDF::Document.new
10
+ @doc.trailer.info[:Title] = 'Title'
11
+ @metadata = @doc.metadata
12
+ end
13
+
14
+ it "parses the info dictionary on creation" do
15
+ assert_equal('Title', @metadata.title)
16
+ @doc.trailer.info[:Trapped] = :Unknown
17
+ assert_nil(HexaPDF::Document::Metadata.new(@doc).trapped)
18
+ @doc.trailer.info[:Trapped] = :True
19
+ assert_equal(true, HexaPDF::Document::Metadata.new(@doc).trapped)
20
+ @doc.trailer.info[:Trapped] = :False
21
+ assert_equal(false, HexaPDF::Document::Metadata.new(@doc).trapped)
22
+ end
23
+
24
+ describe "default_language" do
25
+ it "use the document's language as default" do
26
+ @doc.catalog[:Lang] = 'de'
27
+ assert_equal("de", HexaPDF::Document::Metadata.new(@doc).default_language)
28
+ end
29
+
30
+ it "falls back to English if the document doesn't have a default language set" do
31
+ assert_equal('en', @metadata.default_language)
32
+ end
33
+
34
+ it "allows changing the default language" do
35
+ @metadata.default_language('de')
36
+ assert_equal('de', @metadata.default_language)
37
+ end
38
+ end
39
+
40
+ it "enables writing the info dict by default" do
41
+ assert(@metadata.write_info_dict?)
42
+ end
43
+
44
+ it "allows setting whether the info dict is written" do
45
+ @metadata.write_info_dict(false)
46
+ refute(@metadata.write_info_dict?)
47
+ end
48
+
49
+ it "enables writing the metadata stream by default" do
50
+ assert(@metadata.write_metadata_stream?)
51
+ end
52
+
53
+ it "allows setting whether the metadata stream is written" do
54
+ @metadata.write_metadata_stream(false)
55
+ refute(@metadata.write_metadata_stream?)
56
+ end
57
+
58
+ it "resolves namespace URI via a prefix" do
59
+ assert_equal('http://www.w3.org/1999/02/22-rdf-syntax-ns#', @metadata.namespace('rdf'))
60
+ end
61
+
62
+ it "allows registering prefixes for namespaces" do
63
+ err = assert_raises(HexaPDF::Error) { @metadata.namespace('hexa') }
64
+ assert_match(/prefix.*hexa.*not registered/, err.message)
65
+ @metadata.register_namespace('hexa', 'hexa:')
66
+ assert_equal('hexa:', @metadata.namespace('hexa'))
67
+ end
68
+
69
+ it "allows registering property types" do
70
+ @metadata.register_property_type('dc', 'title', 'Boolean')
71
+ assert_equal('Boolean', @metadata.instance_variable_get(:@properties)[@metadata.namespace('dc')]['title'])
72
+ end
73
+
74
+ it "allows reading and setting properties" do
75
+ assert_equal('Title', @metadata.property('dc', 'title'))
76
+ @metadata.property('dc', 'title', 'another')
77
+ assert_equal('another', @metadata.property('dc', 'title'))
78
+ @metadata.property('dc', 'title', nil)
79
+ assert_nil(@metadata.property('dc', 'title'))
80
+ refute(@metadata.instance_variable_get(:@metadata)[@metadata.namespace('dc')].key?('title'))
81
+ end
82
+
83
+ it "allows reading and setting all info dictionary properties" do
84
+ [['title', 'dc', 'title'], ['author', 'dc', 'creator'], ['subject', 'dc', 'description'],
85
+ ['keywords', 'pdf', 'Keywords'], ['creator', 'xmp', 'CreatorTool'],
86
+ ['producer', 'pdf', 'Producer'], ['creation_date', 'xmp', 'CreateDate'],
87
+ ['modification_date', 'xmp', 'ModifyDate'], ['trapped', 'pdf', 'Trapped']].each do |name, ns, property|
88
+ @metadata.property(ns, property, 'value')
89
+ assert_equal('value', @metadata.send(name), name)
90
+ @metadata.send(name, 'modified')
91
+ assert_equal('modified', @metadata.property(ns, property), name)
92
+ end
93
+ end
94
+
95
+ describe "metadata writing" do
96
+ before do
97
+ @time = Time.now.floor
98
+ @metadata.title('Title')
99
+ @metadata.author('Author')
100
+ @metadata.subject('Subject')
101
+ @metadata.keywords('Keywords')
102
+ @metadata.creator('Creator')
103
+ @metadata.producer('Producer')
104
+ @metadata.creation_date(@time)
105
+ @metadata.modification_date(@time)
106
+ @metadata.trapped(true)
107
+ end
108
+
109
+ it "writes the info dictionary properties" do
110
+ info = @doc.trailer.info
111
+ @doc.write(StringIO.new, update_fields: false)
112
+ assert_equal('Title', info[:Title])
113
+ assert_equal('Author', info[:Author])
114
+ assert_equal('Subject', info[:Subject])
115
+ assert_equal('Keywords', info[:Keywords])
116
+ assert_equal('Creator', info[:Creator])
117
+ assert_match(/HexaPDF/, info[:Producer])
118
+ assert_same(@time, info[:CreationDate])
119
+ assert_same(@time, info[:ModDate])
120
+ assert_equal(:True, info[:Trapped])
121
+ end
122
+
123
+ it "uses a correctly updated modification date if set so by Document#write" do
124
+ info = @doc.trailer.info
125
+ sleep(0.1)
126
+ @doc.write(StringIO.new)
127
+ assert_same(@time, info[:CreationDate])
128
+ refute_same(@time, info[:ModDate])
129
+ assert(@time < info[:ModDate])
130
+ end
131
+
132
+ it "correctly handles array values for title, author, and subject for info dictionary" do
133
+ @metadata.title(['Title', 'Another'])
134
+ @metadata.author(['Author', 'Author2'])
135
+ @metadata.subject(['Subject', 'Another'])
136
+ @doc.write(StringIO.new)
137
+ info = @doc.trailer.info
138
+ assert_equal('Title', info[:Title])
139
+ assert_equal('Author, Author2', info[:Author])
140
+ assert_equal('Subject', info[:Subject])
141
+ end
142
+
143
+ it "writes the XMP metadata" do
144
+ title = HexaPDF::Document::Metadata::LocalizedString.new('Der Titel')
145
+ title.language = 'de'
146
+ @metadata.title(['Title', title])
147
+ @metadata.author(['Author 1', 'Author 2'])
148
+ @metadata.register_property_type('dc', 'other', 'URI')
149
+ @metadata.property('dc', 'other', 'https://test.org/example')
150
+ @doc.write(StringIO.new, update_fields: false)
151
+ metadata = <<~XMP
152
+ <?xpacket begin="" id=""?>
153
+ <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
154
+ <rdf:Description rdf:about="" xmlns:dc="http://purl.org/dc/elements/1.1/">
155
+ <dc:title><rdf:Alt>
156
+ <rdf:li xml:lang="en">Title</rdf:li>
157
+ <rdf:li xml:lang="de">Der Titel</rdf:li>
158
+ </rdf:Alt></dc:title>
159
+ <dc:creator><rdf:Seq>
160
+ <rdf:li>Author 1</rdf:li>
161
+ <rdf:li>Author 2</rdf:li>
162
+ </rdf:Seq></dc:creator>
163
+ <dc:description><rdf:Alt>
164
+ <rdf:li xml:lang="en">Subject</rdf:li>
165
+ </rdf:Alt></dc:description>
166
+ <dc:other rdf:resource="https://test.org/example" />
167
+ </rdf:Description>
168
+ <rdf:Description rdf:about="" xmlns:pdf="http://ns.adobe.com/pdf/1.3/">
169
+ <pdf:Keywords>Keywords</pdf:Keywords>
170
+ <pdf:Producer>Producer</pdf:Producer>
171
+ <pdf:Trapped>True</pdf:Trapped>
172
+ </rdf:Description>
173
+ <rdf:Description rdf:about="" xmlns:xmp="http://ns.adobe.com/xap/1.0/">
174
+ <xmp:CreatorTool>Creator</xmp:CreatorTool>
175
+ <xmp:CreateDate>#{@metadata.send(:xmp_date, @time)}</xmp:CreateDate>
176
+ <xmp:ModifyDate>#{@metadata.send(:xmp_date, @time)}</xmp:ModifyDate>
177
+ </rdf:Description>
178
+ </rdf:RDF>
179
+ <?xpacket end="r"?>
180
+ XMP
181
+ assert_equal(metadata, @doc.catalog[:Metadata].stream.sub(/(?<=id=")\w+/, ''))
182
+ end
183
+
184
+ it "respects the write settings for info dictionary and metadata stream" do
185
+ @metadata.write_info_dict(false)
186
+ @metadata.write_metadata_stream(false)
187
+ @doc.write(StringIO.new)
188
+ assert_nil(@doc.trailer.info[:Author])
189
+ refute(@doc.catalog.key?(:Metadata))
190
+ end
191
+ end
192
+ end
@@ -18,6 +18,12 @@ describe HexaPDF::Filter do
18
18
  assert_equal(@str, collector(fib))
19
19
  assert_equal('', collector(fib))
20
20
  end
21
+
22
+ it "returns the correct length of the fiber" do
23
+ str = "\u{FEFF}Öl"
24
+ fib = @obj.source_from_proc { str }
25
+ assert_equal(6, fib.length)
26
+ end
21
27
  end
22
28
 
23
29
  describe "source_from_string" do
@@ -30,6 +36,12 @@ describe HexaPDF::Filter do
30
36
  it "returns the whole string" do
31
37
  assert_equal(@str, collector(@obj.source_from_string(@str)))
32
38
  end
39
+
40
+ it "returns the correct size of the fiber" do
41
+ str = "\u{FEFF}Öl"
42
+ fib = @obj.source_from_string(str)
43
+ assert_equal(6, fib.length)
44
+ end
33
45
  end
34
46
 
35
47
  describe "source_from_io" do
@@ -40,7 +40,7 @@ describe HexaPDF::Writer do
40
40
  219
41
41
  %%EOF
42
42
  3 0 obj
43
- <</Producer(HexaPDF version 0.36.0)>>
43
+ <</Producer(HexaPDF version 0.37.0)>>
44
44
  endobj
45
45
  xref
46
46
  3 1
@@ -72,7 +72,7 @@ describe HexaPDF::Writer do
72
72
  141
73
73
  %%EOF
74
74
  6 0 obj
75
- <</Producer(HexaPDF version 0.36.0)>>
75
+ <</Producer(HexaPDF version 0.37.0)>>
76
76
  endobj
77
77
  2 0 obj
78
78
  <</Length 10>>stream
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hexapdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.36.0
4
+ version: 0.37.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thomas Leitner
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-01-20 00:00:00.000000000 Z
11
+ date: 2024-01-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cmdparse
@@ -358,6 +358,7 @@ files:
358
358
  - lib/hexapdf/document/fonts.rb
359
359
  - lib/hexapdf/document/images.rb
360
360
  - lib/hexapdf/document/layout.rb
361
+ - lib/hexapdf/document/metadata.rb
361
362
  - lib/hexapdf/document/pages.rb
362
363
  - lib/hexapdf/encryption.rb
363
364
  - lib/hexapdf/encryption/aes.rb
@@ -505,6 +506,7 @@ files:
505
506
  - lib/hexapdf/type/image.rb
506
507
  - lib/hexapdf/type/info.rb
507
508
  - lib/hexapdf/type/mark_information.rb
509
+ - lib/hexapdf/type/metadata.rb
508
510
  - lib/hexapdf/type/names.rb
509
511
  - lib/hexapdf/type/object_stream.rb
510
512
  - lib/hexapdf/type/optional_content_configuration.rb
@@ -631,6 +633,7 @@ files:
631
633
  - test/hexapdf/document/test_fonts.rb
632
634
  - test/hexapdf/document/test_images.rb
633
635
  - test/hexapdf/document/test_layout.rb
636
+ - test/hexapdf/document/test_metadata.rb
634
637
  - test/hexapdf/document/test_pages.rb
635
638
  - test/hexapdf/encryption/common.rb
636
639
  - test/hexapdf/encryption/test_aes.rb
@@ -796,7 +799,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
796
799
  requirements:
797
800
  - - ">="
798
801
  - !ruby/object:Gem::Version
799
- version: '2.6'
802
+ version: '2.7'
800
803
  required_rubygems_version: !ruby/object:Gem::Requirement
801
804
  requirements:
802
805
  - - ">="