hexapdf 0.36.0 → 0.37.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b7cec7494ffe5e4f8031e5a05f2a31da13741879ff138513f737048880702389
4
- data.tar.gz: 238a71920dcd9bde03497a22f0583bf72f11d358176f28217214f86ca835764d
3
+ metadata.gz: 464355c84e7dd5636fe34bb0feb96038e06d4e0701d0ce9e915a2a9bddd3c18a
4
+ data.tar.gz: 8bcebc03512574b0fd71396bbe21330f59cb49e75f2194d1cd47ced0648ef7ad
5
5
  SHA512:
6
- metadata.gz: 7850251dba07dbae11c280bc87852c91aa72e166da7983c4b4a1508a4c76d99306eaacbc108dad8f0ba54246a2fe6648714cd8bfd5f6d9bddeb0ec67abab9867
7
- data.tar.gz: dace9a43ef57a0d27d33218ef4dc6503a961edfc2c96c04c16ebeb0d8675e09373c21908d159c992c4d2125499bdd1818acdf1150c86106e23888552210f4fc8
6
+ metadata.gz: ccfbc72734d74178b1eb49da85bcd364410b12abfbb2d2cd730e6a37176dc85c7b87369bd0463869d6815d911e3428a21b0e18e1492f75d75b59c53a68ebf835
7
+ data.tar.gz: bc7001b82ec40571b6257575923d0fb5ef4c1011ff4067b3b53c40822240328c461c358084f9de8d5271809d9d85b8bd2de91ce219e211cd84154b3156c7c426
data/CHANGELOG.md CHANGED
@@ -1,3 +1,19 @@
1
+ ## 0.37.0 - 2024-01-29
2
+
3
+ ### Added
4
+
5
+ * [HexaPDF::Document::Metadata] for working with metadata (reading the info
6
+ dictionary and writing it as well as the XMP metadata stream)
7
+
8
+ ### Changed
9
+
10
+ * Minimum Ruby version to be 2.7
11
+
12
+ ### Fixed
13
+
14
+ * [HexaPDF::FiberDoubleForString#length] to not assume a binary string
15
+
16
+
1
17
  ## 0.36.0 - 2024-01-20
2
18
 
3
19
  ### Added
data/Rakefile CHANGED
@@ -47,7 +47,7 @@ namespace :dev do
47
47
  end
48
48
 
49
49
  task :test_all do
50
- versions = `rbenv versions --bare | grep -i ^2.[67]\\\\\\|^3.`.split("\n")
50
+ versions = `rbenv versions --bare | grep -i ^2.7\\\\\\|^3.`.split("\n")
51
51
  versions.each do |version|
52
52
  sh "eval \"$(rbenv init -)\"; rbenv shell #{version} && ruby -v && rake test"
53
53
  end
@@ -688,6 +688,7 @@ module HexaPDF
688
688
  XXReference: 'HexaPDF::Type::Form::Reference',
689
689
  XXCIDSystemInfo: 'HexaPDF::Type::CIDFont::CIDSystemInfo',
690
690
  Group: 'HexaPDF::Type::Form::Group',
691
+ Metadata: 'HexaPDF::Type::Metadata',
691
692
  },
692
693
  'object.subtype_map' => {
693
694
  nil => {
@@ -706,6 +707,7 @@ module HexaPDF
706
707
  Text: 'HexaPDF::Type::Annotations::Text',
707
708
  Link: 'HexaPDF::Type::Annotations::Link',
708
709
  Widget: 'HexaPDF::Type::Annotations::Widget',
710
+ XML: 'HexaPDF::Type::Metadata'
709
711
  },
710
712
  XObject: {
711
713
  Image: 'HexaPDF::Type::Image',
@@ -118,7 +118,7 @@ module HexaPDF
118
118
  # composer.list(item_spacing: 2) do |list|
119
119
  # composer.document.config['layout.boxes.map'].each do |name, klass|
120
120
  # list.formatted_text([{text: name.to_s, fill_color: "hp-blue-dark"},
121
- # {text: "\n#{klass}"}, font_size: 7])
121
+ # {text: "\n#{klass}"}], font_size: 6)
122
122
  # end
123
123
  # end
124
124
  # end
@@ -0,0 +1,488 @@
1
+ # -*- encoding: utf-8; frozen_string_literal: true -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2014-2023 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #
33
+ # If the GNU Affero General Public License doesn't fit your need,
34
+ # commercial licenses are available at <https://gettalong.at/hexapdf/>.
35
+ #++
36
+
37
+ require 'securerandom'
38
+ require 'hexapdf/dictionary'
39
+ require 'hexapdf/error'
40
+
41
+ module HexaPDF
42
+ class Document
43
+
44
+ # This class provides methods for reading and writing the document-level metadata.
45
+ #
46
+ # When an instance is created (usually through HexaPDF::Document#metadata), the metadata is read
47
+ # from the document's information dictionary (see HexaPDF::Type::Info) and made available
48
+ # through the various methods.
49
+ #
50
+ # By default, the metadata is written to the information dictionary as well as to the document's
51
+ # metadata stream (see HexaPDF::Type::Metadata) once the document is written. This can be
52
+ # controlled via the #write_info_dict and #write_metdata_stream methods.
53
+ #
54
+ # While HexaPDF is able to write an XMP packet (using a limited form) to the document's metadata
55
+ # stream, it provides no way for reading XMP metadata. If reading functionality or extended
56
+ # writing functionality is needed, make sure this class does not write the metadata and
57
+ # read/create the metadata stream yourself.
58
+ #
59
+ #
60
+ # == Caveats
61
+ #
62
+ # * Disabling writing to the information dictionary will only prevent parts from being written.
63
+ # The #producer is always written to the information dictionary as per the AGPL license terms.
64
+ # The #modification_date may be written depending on the arguments to HexaPDF::Document#write.
65
+ #
66
+ # * If writing the metadata stream is enabled, any existing metadata stream is completely
67
+ # overwritten. This means the metadata stream is *not* updated with the changed information.
68
+ #
69
+ #
70
+ # == Adding custom metadata properties
71
+ #
72
+ # All the properties specified for the information dictionary are supported.
73
+ #
74
+ # Furthermore, HexaPDF supports writing custom properties to the metadata stream. For this to
75
+ # work the used XMP namespaces need to be registered using #register_namespace. Additionally,
76
+ # the types of all used XMP properties need to be registered using #register_property.
77
+ #
78
+ # The following types for XMP properties are supported:
79
+ #
80
+ # String::
81
+ # Maps to the XMP simple string value. Values need to be of type String.
82
+ #
83
+ # Date::
84
+ # Maps to the XMP simple string value, correctly formatted. Values need to be of type Time,
85
+ # Date, or DateTime
86
+ #
87
+ # URI::
88
+ # Maps to the XMP simple value variant of URI. Values need to be of type String or URI.
89
+ #
90
+ # Boolean::
91
+ # Maps to the XMP simple string value, correctly formatted. Values need to be either +true+
92
+ # or +false+.
93
+ #
94
+ # OrderedArray::
95
+ # Maps to the XMP ordered array. Values need to be of type Array and items must be XMP
96
+ # simple values.
97
+ #
98
+ # UnorderedArray::
99
+ # Maps to the XMP unordered array. Values need to be of type Array and items must be
100
+ # simple values.
101
+ #
102
+ # LanguageArray
103
+ # Maps to the XMP language alternatives array. Values need to be of type Array and items
104
+ # must either be strings (they are associated with the set default language) or
105
+ # LocalizedString instances.
106
+ #
107
+ #
108
+ # See: PDF2.0 s14.3, https://www.adobe.com/products/xmp.html
109
+ class Metadata
110
+
111
+ # Represents a localized XMP string, i.e. as string with an attached language.
112
+ class LocalizedString < String
113
+ # The language identifier for the string in RFC3066 format.
114
+ attr_accessor :language
115
+ end
116
+
117
+ # Contains a mapping of predefined prefixes for XMP namespaces for metadata.
118
+ PREDEFINED_NAMESPACES = {
119
+ "rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
120
+ "xmp" => "http://ns.adobe.com/xap/1.0/",
121
+ "pdf" => "http://ns.adobe.com/pdf/1.3/",
122
+ "dc" => "http://purl.org/dc/elements/1.1/",
123
+ "x" => "adobe:ns:meta/",
124
+ }.freeze
125
+
126
+ # Contains a mapping of predefined XMP properties to their types, i.e. from namespace to
127
+ # property and then type.
128
+ PREDEFINED_PROPERTIES = {
129
+ "http://ns.adobe.com/xap/1.0/" => {
130
+ 'CreatorTool' => 'String',
131
+ 'CreateDate' => 'Date',
132
+ 'ModifyDate' => 'Date',
133
+ }.freeze,
134
+ "http://ns.adobe.com/pdf/1.3/" => {
135
+ 'Keywords' => 'String',
136
+ 'Producer' => 'String',
137
+ 'Trapped' => 'Boolean',
138
+ }.freeze,
139
+ "http://purl.org/dc/elements/1.1/" => {
140
+ 'creator' => 'OrderedArray',
141
+ 'description' => 'LanguageArray',
142
+ 'title' => 'LanguageArray',
143
+ }.freeze,
144
+ }.freeze
145
+
146
+
147
+ # Creates a new Metadata object for the given PDF document.
148
+ def initialize(document)
149
+ @document = document
150
+ @namespaces = PREDEFINED_NAMESPACES.dup
151
+ @properties = PREDEFINED_PROPERTIES.transform_values {|value| value.dup}
152
+ @default_language = document.catalog[:Lang] || 'en'
153
+ @metadata = Hash.new {|h, k| h[k] = {} }
154
+ write_info_dict(true)
155
+ write_metadata_stream(true)
156
+ @document.register_listener(:complete_objects, &method(:write_metadata))
157
+ parse_metadata
158
+ end
159
+
160
+ # :call-seq:
161
+ # metadata.default_language -> language
162
+ # metadata.default_language(value) -> value
163
+ #
164
+ # Returns the default language in RFC3066 format used for unlocalized strings if no argument
165
+ # is given. Otherwise sets the default language to the given language.
166
+ #
167
+ # The initial default lanuage is taken from the document catalog's /Lang entry. If that is not
168
+ # set, the default language is assumed to be English ('en').
169
+ def default_language(value = :UNSET)
170
+ if value == :UNSET
171
+ @default_language
172
+ else
173
+ @default_language = value
174
+ end
175
+ end
176
+
177
+ # Returns +true+ if the information dictionary should be written.
178
+ def write_info_dict?
179
+ @write_info_dict
180
+ end
181
+
182
+ # Makes HexaPDF write the information dictionary if +value+ is +true+.
183
+ #
184
+ # See the class documentation for caveats.
185
+ def write_info_dict(value)
186
+ @write_info_dict = value
187
+ end
188
+
189
+ # Returns +true+ if the metadata stream should be written.
190
+ def write_metadata_stream?
191
+ @write_metadata_stream
192
+ end
193
+
194
+ # Makes HexaPDF write the metadata stream if +value+ is +true+.
195
+ #
196
+ # See the class documentation for caveats.
197
+ def write_metadata_stream(value)
198
+ @write_metadata_stream = value
199
+ end
200
+
201
+ # Registers the +prefix+ for the given namespace +uri+.
202
+ def register_namespace(prefix, uri)
203
+ @namespaces[prefix] = uri
204
+ end
205
+
206
+ # Returns the namespace URI associated with the given prefix.
207
+ def namespace(ns)
208
+ @namespaces.fetch(ns) do
209
+ raise HexaPDF::Error, "Namespace prefix '#{ns}' not registered"
210
+ end
211
+ end
212
+
213
+ # Registers the +property+ for the namespace specified via +prefix+ as the given +type+.
214
+ #
215
+ # The argument +type+ has to be one of the following: 'String', 'Date', 'URI', 'Boolean',
216
+ # 'OrderedArray', 'UnorderedArray', or 'LanguageArray'.
217
+ def register_property_type(prefix, property, type)
218
+ (@properties[namespace(prefix)] ||= {})[property] = type
219
+ end
220
+
221
+ # :call-seq:
222
+ # metadata.property(ns_prefix, name) -> property_value
223
+ # metadata.property(ns_prefix, name, value) -> value
224
+ #
225
+ # Returns the value for the property specified via the namespace prefix +ns_prefix+ and +name+
226
+ # if the +value+ argument is not provided. Otherwise sets the property to +value+.
227
+ #
228
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
229
+ # property is deleted from the metadata.
230
+ def property(ns, property, value = :UNSET)
231
+ ns = @metadata[namespace(ns)]
232
+ if value == :UNSET
233
+ ns[property]
234
+ elsif value.nil?
235
+ ns.delete(property)
236
+ else
237
+ ns[property] = value
238
+ end
239
+ end
240
+
241
+ # :call-seq:
242
+ # metadata.title -> title or nil
243
+ # metadata.title(value -> value
244
+ #
245
+ # Returns the document's title if no argument is given. Otherwise sets the document's title to
246
+ # the given value.
247
+ #
248
+ # The language for the title is specified via #default_language.
249
+ #
250
+ # The value +nil+ is returned if the property is not set. And by using +nil+ as +value+ the
251
+ # property is deleted from the metadata.
252
+ #
253
+ # This metadata property is represented by the XMP name dc:title.
254
+ def title(value = :UNSET)
255
+ property('dc', 'title', value)
256
+ end
257
+
258
+ # :call-seq:
259
+ # metadata.author -> author or nil
260
+ # metadata.author(value) -> value
261
+ #
262
+ # Returns the name of the person who created the document (author) if no argument is given.
263
+ # Otherwise sets the author to the given value.
264
+ #
265
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
266
+ # property is deleted from the metadata.
267
+ #
268
+ # This metadata property is represented by the XMP name dc:creator.
269
+ def author(value = :UNSET)
270
+ property('dc', 'creator', value)
271
+ end
272
+
273
+ # :call-seq:
274
+ # metadata.subject -> subject or nil
275
+ # metadata.subject(value) -> value
276
+ #
277
+ # Returns the subject of the document if no argument is given. Otherwise sets the subject to
278
+ # the given value.
279
+ #
280
+ # The language for the subject is specified via #default_language.
281
+ #
282
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
283
+ # property is deleted from the metadata.
284
+ #
285
+ # This metadata property is represented by the XMP name dc:description.
286
+ def subject(value = :UNSET)
287
+ property('dc', 'description', value)
288
+ end
289
+
290
+ # :call-seq:
291
+ # metadata.keywords -> keywords or nil
292
+ # metadata.keywords(value) -> value
293
+ #
294
+ # Returns the keywords associated with the document if no argument is given. Otherwise sets
295
+ # keywords to the given value.
296
+ #
297
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
298
+ # property is deleted from the metadata.
299
+ #
300
+ # This metadata property is represented by the XMP name pdf:Keywords.
301
+ def keywords(value = :UNSET)
302
+ property('pdf', 'Keywords', value)
303
+ end
304
+
305
+ # :call-seq:
306
+ # metadata.creator -> creator or nil
307
+ # metadata.creator(value) -> value
308
+ #
309
+ # Returns the name of the PDF processor that created the original document from which this PDF
310
+ # was converted if no argument is given. Otherwise sets the name of the creator tool to the
311
+ # given value.
312
+ #
313
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
314
+ # property is deleted from the metadata.
315
+ #
316
+ # This metadata property is represented by the XMP name xmp:CreatorTool.
317
+ def creator(value = :UNSET)
318
+ property('xmp', 'CreatorTool', value)
319
+ end
320
+
321
+ # :call-seq:
322
+ # metadata.producer -> producer or nil
323
+ # metadata.producer(value) -> value
324
+ #
325
+ # Returns the name of the PDF processor that converted the original document to PDF if no
326
+ # argument is given. Otherwise sets the name of the producer to the given value.
327
+ #
328
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
329
+ # property is deleted from the metadata.
330
+ #
331
+ # This metadata property is represented by the XMP name pdf:Producer.
332
+ def producer(value = :UNSET)
333
+ property('pdf', 'Producer', value)
334
+ end
335
+
336
+ # :call-seq:
337
+ # metadata.creation_date -> creation_date or nil
338
+ # metadata.creation_date(value) -> value
339
+ #
340
+ # Returns the date and time (a Time object) the document was created if no argument is given.
341
+ # Otherwise sets the creation date to the given value.
342
+ #
343
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
344
+ # property is deleted from the metadata.
345
+ #
346
+ # This metadata property is represented by the XMP name xmp:CreateDate.
347
+ def creation_date(value = :UNSET)
348
+ property('xmp', 'CreateDate', value)
349
+ end
350
+
351
+ # :call-seq:
352
+ # metadata.modification_date -> modification_date or nil
353
+ # metadata.modification_date(value) -> value
354
+ #
355
+ # Returns the date and time (a Time object) the document was most recently modified if no
356
+ # argument is given. Otherwise sets the modification date to the given value.
357
+ #
358
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
359
+ # property is deleted from the metadata.
360
+ #
361
+ # This metadata property is represented by the XMP name xmp:ModifyDate.
362
+ def modification_date(value = :UNSET)
363
+ property('xmp', 'ModifyDate', value)
364
+ end
365
+
366
+ # :call-seq:
367
+ # metadata.trapped -> trapped or nil
368
+ # metadata.trapped(value) -> value
369
+ #
370
+ # Returns +true+ if the document has been modified to include trapping information if no
371
+ # argument is given. Otherwise sets the trapped status to the given boolean value.
372
+ #
373
+ # The value +nil+ is returned if the property ist not set. And by using +nil+ as +value+ the
374
+ # property is deleted from the metadata.
375
+ #
376
+ # This metadata property is represented by the XMP name pdf:Trapped.
377
+ def trapped(value = :UNSET)
378
+ property('pdf', 'Trapped', value)
379
+ end
380
+
381
+ private
382
+
383
+ # Parses the metadata from the information dictionary into the internal data structure.
384
+ def parse_metadata
385
+ info_dict = @document.trailer.info
386
+ ns_dc = namespace('dc')
387
+ ns_xmp = namespace('xmp')
388
+ ns_pdf = namespace('pdf')
389
+ @metadata[ns_dc]['title'] = info_dict[:Title] if info_dict.key?(:Title)
390
+ @metadata[ns_dc]['creator'] = info_dict[:Author] if info_dict.key?(:Author)
391
+ @metadata[ns_dc]['description'] = info_dict[:Subject] if info_dict.key?(:Subject)
392
+ @metadata[ns_xmp]['CreatorTool'] = info_dict[:Creator] if info_dict.key?(:Creator)
393
+ @metadata[ns_xmp]['CreateDate'] = info_dict[:CreationDate] if info_dict.key?(:CreationDate)
394
+ @metadata[ns_xmp]['ModifyDate'] = info_dict[:ModDate] if info_dict.key?(:ModDate)
395
+ @metadata[ns_pdf]['Keywords'] = info_dict[:Keywords] if info_dict.key?(:Keywords)
396
+ @metadata[ns_pdf]['Producer'] = info_dict[:Producer] if info_dict.key?(:Producer)
397
+ if info_dict.key?(:Trapped) && info_dict[:Trapped] != :Unknown
398
+ @metadata[ns_pdf]['Trapped'] = (info_dict[:Trapped] == :True)
399
+ end
400
+ end
401
+
402
+ # Writes the metadata to the specified destinations.
403
+ def write_metadata
404
+ ns_dc = namespace('dc')
405
+ ns_xmp = namespace('xmp')
406
+ ns_pdf = namespace('pdf')
407
+
408
+ if write_info_dict?
409
+ info_dict = @document.trailer.info
410
+ info_dict[:Title] = Array(@metadata[ns_dc]['title']).first
411
+ info_dict[:Author] = Array(@metadata[ns_dc]['creator']).join(', ')
412
+ info_dict[:Subject] = Array(@metadata[ns_dc]['description']).first
413
+ info_dict[:Creator] = @metadata[ns_xmp]['CreatorTool']
414
+ info_dict[:CreationDate] = @metadata[ns_xmp]['CreateDate']
415
+ info_dict[:ModDate] = @metadata[ns_xmp]['ModifyDate']
416
+ info_dict[:Keywords] = @metadata[ns_pdf]['Keywords']
417
+ info_dict[:Producer] = @metadata[ns_pdf]['Producer']
418
+ info_dict[:Trapped] = @metadata[ns_pdf]['Trapped'] ? :True : :False
419
+ end
420
+
421
+ if write_metadata_stream?
422
+ descriptions = @metadata.map do |namespace, values|
423
+ xmp_description(@namespaces.key(namespace), values)
424
+ end.join("\n")
425
+ obj = @document.catalog[:Metadata] ||= @document.add({Type: :Metadata, Subtype: :XML})
426
+ obj.stream = xmp_packet(descriptions)
427
+ end
428
+ end
429
+
430
+ # Creates an XMP packet with the given payload +data+.
431
+ def xmp_packet(data)
432
+ <<~XMP
433
+ <?xpacket begin="\u{FEFF}" id="#{SecureRandom.uuid.tr('-', '')}"?>
434
+ <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
435
+ #{data}
436
+ </rdf:RDF>
437
+ <?xpacket end="r"?>
438
+ XMP
439
+ end
440
+
441
+ # Creates an 'rdf:Description' element for all metadata +values+ with the given +ns_prefix+.
442
+ def xmp_description(ns_prefix, values)
443
+ values = values.map do |name, value|
444
+ str = +"<#{ns_prefix}:#{name}"
445
+ case (property_type = @properties[namespace(ns_prefix)][name])
446
+ when 'String'
447
+ str << ">#{xmp_escape(value)}</#{ns_prefix}:#{name}>"
448
+ when 'Date'
449
+ str << ">#{xmp_date(value)}</#{ns_prefix}:#{name}>"
450
+ when 'URI'
451
+ str << " rdf:resource=\"#{xmp_escape(value.to_s)}\" />"
452
+ when 'Boolean'
453
+ str << ">#{value ? 'True' : 'False'}</#{ns_prefix}:#{name}>"
454
+ when 'LanguageArray'
455
+ value = Array(value).map do |item|
456
+ lang = item.respond_to?(:language) ? item.language : default_language
457
+ "<rdf:li xml:lang=\"#{lang}\">#{xmp_escape(item)}</rdf:li>"
458
+ end.join("\n")
459
+ str << "><rdf:Alt>\n#{value}\n</rdf:Alt></#{ns_prefix}:#{name}>"
460
+ when 'OrderedArray', 'UnorderedArray'
461
+ value = Array(value).map {|item| "<rdf:li>#{xmp_escape(item)}</rdf:li>" }.join("\n")
462
+ el_type = (property_type == 'OrderedArray' ? 'Seq' : 'Bag')
463
+ str << "><rdf:#{el_type}>\n#{value}\n</rdf:#{el_type}></#{ns_prefix}:#{name}>"
464
+ end
465
+ str
466
+ end.join("\n")
467
+ <<~XMP.strip
468
+ <rdf:Description rdf:about="" xmlns:#{ns_prefix}="#{xmp_escape(namespace(ns_prefix))}">
469
+ #{values}
470
+ </rdf:Description>
471
+ XMP
472
+ end
473
+
474
+ # Escapes the given value so as to be usable as XMP simple value.
475
+ def xmp_escape(value)
476
+ value.gsub(/<|>|"/, {'<' => '&lt;', '>' => '&gt;', '"' => '&quot;'})
477
+ end
478
+
479
+ # Formats the given date-time object (Time, Date, or DateTime) to be a valid XMP date-time
480
+ # value.
481
+ def xmp_date(date)
482
+ date.strftime("%Y-%m-%dT%H:%M:%S%:z")
483
+ end
484
+
485
+ end
486
+
487
+ end
488
+ end
@@ -120,6 +120,7 @@ module HexaPDF
120
120
  autoload(:Files, 'hexapdf/document/files')
121
121
  autoload(:Destinations, 'hexapdf/document/destinations')
122
122
  autoload(:Layout, 'hexapdf/document/layout')
123
+ autoload(:Metadata, 'hexapdf/document/metadata')
123
124
 
124
125
  # :call-seq:
125
126
  # Document.open(filename, **docargs) -> doc
@@ -486,6 +487,16 @@ module HexaPDF
486
487
  pdf_data ? @cache[pdf_data].clear : @cache.clear
487
488
  end
488
489
 
490
+ # Returns the Metadata object that provides a convenience interface for working with the
491
+ # document metadata.
492
+ #
493
+ # Note that invoking this method means that, depending on the settings, the info dictionary as
494
+ # well as the metadata stream will be overwritten when the document gets written. See the
495
+ # "Caveats" section in the Metadata documentation.
496
+ def metadata
497
+ @metadata ||= Metadata.new(self)
498
+ end
499
+
489
500
  # Returns the Pages object that provides convenience methods for working with the pages of the
490
501
  # PDF file.
491
502
  #
@@ -706,13 +717,17 @@ module HexaPDF
706
717
  # Optimize the file size by using object and cross-reference streams. This will raise the PDF
707
718
  # version to at least 1.5.
708
719
  def write(file_or_io, incremental: false, validate: true, update_fields: true, optimize: false)
709
- dispatch_message(:complete_objects)
710
-
711
720
  if update_fields
712
721
  trailer.update_id
713
- trailer.info[:ModDate] = Time.now
722
+ if @metadata
723
+ metadata.modification_date(Time.now)
724
+ else
725
+ trailer.info[:ModDate] = Time.now
726
+ end
714
727
  end
715
728
 
729
+ dispatch_message(:complete_objects)
730
+
716
731
  if validate
717
732
  self.validate(auto_correct: true) do |msg, correctable, obj|
718
733
  next if correctable
@@ -69,11 +69,11 @@ module HexaPDF
69
69
  @block_used = false
70
70
  end
71
71
 
72
- # Returns the length of the wrapped string.
72
+ # Returns the length in bytes of the wrapped string.
73
73
  #
74
74
  # May only be called before #resume!
75
75
  def length
76
- str.length
76
+ str.bytesize
77
77
  end
78
78
 
79
79
  # Returns +true+ if #resume has not yet been called.
@@ -132,6 +132,77 @@ module HexaPDF
132
132
  define_field :StructParent, type: Integer, version: '1.3'
133
133
  define_field :OC, type: Dictionary, version: '1.5'
134
134
 
135
+ ##
136
+ # :method: flags
137
+ #
138
+ # Returns an array of flag names representing the set bit flags for /F.
139
+ #
140
+ # The available flags are:
141
+ #
142
+ # :invisible or 0::
143
+ # Applies only to non-standard annotations. If set, do not render or print the annotation.
144
+ #
145
+ # :hidden or 1::
146
+ # If set, do not render the annotation or allow interactions.
147
+ #
148
+ # :print or 2::
149
+ # If set, print the annotation unless the hidden flag is also set. Otherwise never print
150
+ # the annotation.
151
+ #
152
+ # :no_zoom or 3::
153
+ # If set, do not scale the annotation's appearance to match the magnification of the page.
154
+ #
155
+ # :no_rotate or 4::
156
+ # If set, do not rotate the annotation's appearance to match the rotation of the page.
157
+ #
158
+ # :no_view or 5::
159
+ # If set, do not render the annotation on the screen or allow interactions.
160
+ #
161
+ # :read_only or 6::
162
+ # If set, do not allow user interactions.
163
+ #
164
+ # :locked or 7::
165
+ # If set, do not allow the annotation to be deleted or its properties be modified.
166
+ #
167
+ # :toggle_no_view or 8::
168
+ # If set, invert the interpretation of the :no_view flag for annotation selection and
169
+ # mouse hovering.
170
+ #
171
+ # :locked_contents or 9::
172
+ # If set, do not allow the contents of the annotation to be modified.
173
+ #
174
+
175
+ ##
176
+ # :method: flagged?
177
+ # :call-seq:
178
+ # flagged?(flag)
179
+ #
180
+ # Returns +true+ if the given flag is set on /F. The argument can either be the flag name or
181
+ # the bit index.
182
+ #
183
+ # See #flags for the list of available flags.
184
+ #
185
+
186
+ ##
187
+ # :method: flag
188
+ # :call-seq:
189
+ # flag(*flags, clear_existing: false)
190
+ #
191
+ # Sets the given flags on /F, given as flag names or bit indices. If +clear_existing+ is
192
+ # +true+, all prior flags will be cleared.
193
+ #
194
+ # See #flags for the list of available flags.
195
+ #
196
+
197
+ ##
198
+ # :method: unflag
199
+ # :call-seq:
200
+ # flag(*flags)
201
+ #
202
+ # Clears the given flags from /F, given as flag names or bit indices.
203
+ #
204
+ # See #flags for the list of available flags.
205
+ #
135
206
  bit_field(:flags, {invisible: 0, hidden: 1, print: 2, no_zoom: 3, no_rotate: 4,
136
207
  no_view: 5, read_only: 6, locked: 7, toggle_no_view: 8,
137
208
  locked_contents: 9},
@@ -71,7 +71,7 @@ module HexaPDF
71
71
  define_field :AA, type: Dictionary, version: '1.4'
72
72
  define_field :URI, type: Dictionary, version: '1.1'
73
73
  define_field :AcroForm, type: :XXAcroForm, version: '1.2'
74
- define_field :Metadata, type: Stream, indirect: true, version: '1.4'
74
+ define_field :Metadata, type: :Metadata, indirect: true, version: '1.4'
75
75
  define_field :StructTreeRoot, type: Dictionary, version: '1.3'
76
76
  define_field :MarkInfo, type: :XXMarkInformation, version: '1.4'
77
77
  define_field :Lang, type: String, version: '1.4'
@@ -0,0 +1,63 @@
1
+ # -*- encoding: utf-8; frozen_string_literal: true -*-
2
+ #
3
+ #--
4
+ # This file is part of HexaPDF.
5
+ #
6
+ # HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
7
+ # Copyright (C) 2014-2023 Thomas Leitner
8
+ #
9
+ # HexaPDF is free software: you can redistribute it and/or modify it
10
+ # under the terms of the GNU Affero General Public License version 3 as
11
+ # published by the Free Software Foundation with the addition of the
12
+ # following permission added to Section 15 as permitted in Section 7(a):
13
+ # FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
14
+ # THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
15
+ # INFRINGEMENT OF THIRD PARTY RIGHTS.
16
+ #
17
+ # HexaPDF is distributed in the hope that it will be useful, but WITHOUT
18
+ # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
19
+ # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
20
+ # License for more details.
21
+ #
22
+ # You should have received a copy of the GNU Affero General Public License
23
+ # along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
24
+ #
25
+ # The interactive user interfaces in modified source and object code
26
+ # versions of HexaPDF must display Appropriate Legal Notices, as required
27
+ # under Section 5 of the GNU Affero General Public License version 3.
28
+ #
29
+ # In accordance with Section 7(b) of the GNU Affero General Public
30
+ # License, a covered work must retain the producer line in every PDF that
31
+ # is created or manipulated using HexaPDF.
32
+ #
33
+ # If the GNU Affero General Public License doesn't fit your need,
34
+ # commercial licenses are available at <https://gettalong.at/hexapdf/>.
35
+ #++
36
+
37
+ require 'hexapdf/stream'
38
+
39
+ module HexaPDF
40
+ module Type
41
+
42
+ # Represents an XMP metadata stream.
43
+ #
44
+ # XMP metadata streams may be attached to most PDF objects, though it only makes sense for some
45
+ # of them.
46
+ #
47
+ # There is also a main XMP metadata stream for the whole document that is accessible via the
48
+ # /Metadata key of the document catalog. That metadata stream should contain the same values as
49
+ # the PDF's info dictionary and may contain additional entries. This can be accomplished via
50
+ # HexaPDF::Document#metadata.
51
+ #
52
+ # See: PDF2.0 s14.3.2
53
+ class Metadata < Stream
54
+
55
+ define_type :Metadata
56
+
57
+ define_field :Type, type: Symbol, default: type, required: true
58
+ define_field :Subtype, type: Symbol, default: :XML, required: true
59
+
60
+ end
61
+
62
+ end
63
+ end
data/lib/hexapdf/type.rb CHANGED
@@ -80,6 +80,7 @@ module HexaPDF
80
80
  autoload(:OptionalContentMembership, 'hexapdf/type/optional_content_membership')
81
81
  autoload(:OptionalContentProperties, 'hexapdf/type/optional_content_properties')
82
82
  autoload(:OptionalContentConfiguration, 'hexapdf/type/optional_content_configuration')
83
+ autoload(:Metadata, 'hexapdf/type/metadata')
83
84
 
84
85
  end
85
86
 
@@ -37,6 +37,6 @@
37
37
  module HexaPDF
38
38
 
39
39
  # The version of HexaPDF.
40
- VERSION = '0.36.0'
40
+ VERSION = '0.37.0'
41
41
 
42
42
  end
@@ -0,0 +1,192 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require 'test_helper'
4
+ require 'stringio'
5
+ require 'hexapdf/document'
6
+
7
+ describe HexaPDF::Document::Metadata do
8
+ before do
9
+ @doc = HexaPDF::Document.new
10
+ @doc.trailer.info[:Title] = 'Title'
11
+ @metadata = @doc.metadata
12
+ end
13
+
14
+ it "parses the info dictionary on creation" do
15
+ assert_equal('Title', @metadata.title)
16
+ @doc.trailer.info[:Trapped] = :Unknown
17
+ assert_nil(HexaPDF::Document::Metadata.new(@doc).trapped)
18
+ @doc.trailer.info[:Trapped] = :True
19
+ assert_equal(true, HexaPDF::Document::Metadata.new(@doc).trapped)
20
+ @doc.trailer.info[:Trapped] = :False
21
+ assert_equal(false, HexaPDF::Document::Metadata.new(@doc).trapped)
22
+ end
23
+
24
+ describe "default_language" do
25
+ it "use the document's language as default" do
26
+ @doc.catalog[:Lang] = 'de'
27
+ assert_equal("de", HexaPDF::Document::Metadata.new(@doc).default_language)
28
+ end
29
+
30
+ it "falls back to English if the document doesn't have a default language set" do
31
+ assert_equal('en', @metadata.default_language)
32
+ end
33
+
34
+ it "allows changing the default language" do
35
+ @metadata.default_language('de')
36
+ assert_equal('de', @metadata.default_language)
37
+ end
38
+ end
39
+
40
+ it "enables writing the info dict by default" do
41
+ assert(@metadata.write_info_dict?)
42
+ end
43
+
44
+ it "allows setting whether the info dict is written" do
45
+ @metadata.write_info_dict(false)
46
+ refute(@metadata.write_info_dict?)
47
+ end
48
+
49
+ it "enables writing the metadata stream by default" do
50
+ assert(@metadata.write_metadata_stream?)
51
+ end
52
+
53
+ it "allows setting whether the metadata stream is written" do
54
+ @metadata.write_metadata_stream(false)
55
+ refute(@metadata.write_metadata_stream?)
56
+ end
57
+
58
+ it "resolves namespace URI via a prefix" do
59
+ assert_equal('http://www.w3.org/1999/02/22-rdf-syntax-ns#', @metadata.namespace('rdf'))
60
+ end
61
+
62
+ it "allows registering prefixes for namespaces" do
63
+ err = assert_raises(HexaPDF::Error) { @metadata.namespace('hexa') }
64
+ assert_match(/prefix.*hexa.*not registered/, err.message)
65
+ @metadata.register_namespace('hexa', 'hexa:')
66
+ assert_equal('hexa:', @metadata.namespace('hexa'))
67
+ end
68
+
69
+ it "allows registering property types" do
70
+ @metadata.register_property_type('dc', 'title', 'Boolean')
71
+ assert_equal('Boolean', @metadata.instance_variable_get(:@properties)[@metadata.namespace('dc')]['title'])
72
+ end
73
+
74
+ it "allows reading and setting properties" do
75
+ assert_equal('Title', @metadata.property('dc', 'title'))
76
+ @metadata.property('dc', 'title', 'another')
77
+ assert_equal('another', @metadata.property('dc', 'title'))
78
+ @metadata.property('dc', 'title', nil)
79
+ assert_nil(@metadata.property('dc', 'title'))
80
+ refute(@metadata.instance_variable_get(:@metadata)[@metadata.namespace('dc')].key?('title'))
81
+ end
82
+
83
+ it "allows reading and setting all info dictionary properties" do
84
+ [['title', 'dc', 'title'], ['author', 'dc', 'creator'], ['subject', 'dc', 'description'],
85
+ ['keywords', 'pdf', 'Keywords'], ['creator', 'xmp', 'CreatorTool'],
86
+ ['producer', 'pdf', 'Producer'], ['creation_date', 'xmp', 'CreateDate'],
87
+ ['modification_date', 'xmp', 'ModifyDate'], ['trapped', 'pdf', 'Trapped']].each do |name, ns, property|
88
+ @metadata.property(ns, property, 'value')
89
+ assert_equal('value', @metadata.send(name), name)
90
+ @metadata.send(name, 'modified')
91
+ assert_equal('modified', @metadata.property(ns, property), name)
92
+ end
93
+ end
94
+
95
+ describe "metadata writing" do
96
+ before do
97
+ @time = Time.now.floor
98
+ @metadata.title('Title')
99
+ @metadata.author('Author')
100
+ @metadata.subject('Subject')
101
+ @metadata.keywords('Keywords')
102
+ @metadata.creator('Creator')
103
+ @metadata.producer('Producer')
104
+ @metadata.creation_date(@time)
105
+ @metadata.modification_date(@time)
106
+ @metadata.trapped(true)
107
+ end
108
+
109
+ it "writes the info dictionary properties" do
110
+ info = @doc.trailer.info
111
+ @doc.write(StringIO.new, update_fields: false)
112
+ assert_equal('Title', info[:Title])
113
+ assert_equal('Author', info[:Author])
114
+ assert_equal('Subject', info[:Subject])
115
+ assert_equal('Keywords', info[:Keywords])
116
+ assert_equal('Creator', info[:Creator])
117
+ assert_match(/HexaPDF/, info[:Producer])
118
+ assert_same(@time, info[:CreationDate])
119
+ assert_same(@time, info[:ModDate])
120
+ assert_equal(:True, info[:Trapped])
121
+ end
122
+
123
+ it "uses a correctly updated modification date if set so by Document#write" do
124
+ info = @doc.trailer.info
125
+ sleep(0.1)
126
+ @doc.write(StringIO.new)
127
+ assert_same(@time, info[:CreationDate])
128
+ refute_same(@time, info[:ModDate])
129
+ assert(@time < info[:ModDate])
130
+ end
131
+
132
+ it "correctly handles array values for title, author, and subject for info dictionary" do
133
+ @metadata.title(['Title', 'Another'])
134
+ @metadata.author(['Author', 'Author2'])
135
+ @metadata.subject(['Subject', 'Another'])
136
+ @doc.write(StringIO.new)
137
+ info = @doc.trailer.info
138
+ assert_equal('Title', info[:Title])
139
+ assert_equal('Author, Author2', info[:Author])
140
+ assert_equal('Subject', info[:Subject])
141
+ end
142
+
143
+ it "writes the XMP metadata" do
144
+ title = HexaPDF::Document::Metadata::LocalizedString.new('Der Titel')
145
+ title.language = 'de'
146
+ @metadata.title(['Title', title])
147
+ @metadata.author(['Author 1', 'Author 2'])
148
+ @metadata.register_property_type('dc', 'other', 'URI')
149
+ @metadata.property('dc', 'other', 'https://test.org/example')
150
+ @doc.write(StringIO.new, update_fields: false)
151
+ metadata = <<~XMP
152
+ <?xpacket begin="" id=""?>
153
+ <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
154
+ <rdf:Description rdf:about="" xmlns:dc="http://purl.org/dc/elements/1.1/">
155
+ <dc:title><rdf:Alt>
156
+ <rdf:li xml:lang="en">Title</rdf:li>
157
+ <rdf:li xml:lang="de">Der Titel</rdf:li>
158
+ </rdf:Alt></dc:title>
159
+ <dc:creator><rdf:Seq>
160
+ <rdf:li>Author 1</rdf:li>
161
+ <rdf:li>Author 2</rdf:li>
162
+ </rdf:Seq></dc:creator>
163
+ <dc:description><rdf:Alt>
164
+ <rdf:li xml:lang="en">Subject</rdf:li>
165
+ </rdf:Alt></dc:description>
166
+ <dc:other rdf:resource="https://test.org/example" />
167
+ </rdf:Description>
168
+ <rdf:Description rdf:about="" xmlns:pdf="http://ns.adobe.com/pdf/1.3/">
169
+ <pdf:Keywords>Keywords</pdf:Keywords>
170
+ <pdf:Producer>Producer</pdf:Producer>
171
+ <pdf:Trapped>True</pdf:Trapped>
172
+ </rdf:Description>
173
+ <rdf:Description rdf:about="" xmlns:xmp="http://ns.adobe.com/xap/1.0/">
174
+ <xmp:CreatorTool>Creator</xmp:CreatorTool>
175
+ <xmp:CreateDate>#{@metadata.send(:xmp_date, @time)}</xmp:CreateDate>
176
+ <xmp:ModifyDate>#{@metadata.send(:xmp_date, @time)}</xmp:ModifyDate>
177
+ </rdf:Description>
178
+ </rdf:RDF>
179
+ <?xpacket end="r"?>
180
+ XMP
181
+ assert_equal(metadata, @doc.catalog[:Metadata].stream.sub(/(?<=id=")\w+/, ''))
182
+ end
183
+
184
+ it "respects the write settings for info dictionary and metadata stream" do
185
+ @metadata.write_info_dict(false)
186
+ @metadata.write_metadata_stream(false)
187
+ @doc.write(StringIO.new)
188
+ assert_nil(@doc.trailer.info[:Author])
189
+ refute(@doc.catalog.key?(:Metadata))
190
+ end
191
+ end
192
+ end
@@ -18,6 +18,12 @@ describe HexaPDF::Filter do
18
18
  assert_equal(@str, collector(fib))
19
19
  assert_equal('', collector(fib))
20
20
  end
21
+
22
+ it "returns the correct length of the fiber" do
23
+ str = "\u{FEFF}Öl"
24
+ fib = @obj.source_from_proc { str }
25
+ assert_equal(6, fib.length)
26
+ end
21
27
  end
22
28
 
23
29
  describe "source_from_string" do
@@ -30,6 +36,12 @@ describe HexaPDF::Filter do
30
36
  it "returns the whole string" do
31
37
  assert_equal(@str, collector(@obj.source_from_string(@str)))
32
38
  end
39
+
40
+ it "returns the correct size of the fiber" do
41
+ str = "\u{FEFF}Öl"
42
+ fib = @obj.source_from_string(str)
43
+ assert_equal(6, fib.length)
44
+ end
33
45
  end
34
46
 
35
47
  describe "source_from_io" do
@@ -40,7 +40,7 @@ describe HexaPDF::Writer do
40
40
  219
41
41
  %%EOF
42
42
  3 0 obj
43
- <</Producer(HexaPDF version 0.36.0)>>
43
+ <</Producer(HexaPDF version 0.37.0)>>
44
44
  endobj
45
45
  xref
46
46
  3 1
@@ -72,7 +72,7 @@ describe HexaPDF::Writer do
72
72
  141
73
73
  %%EOF
74
74
  6 0 obj
75
- <</Producer(HexaPDF version 0.36.0)>>
75
+ <</Producer(HexaPDF version 0.37.0)>>
76
76
  endobj
77
77
  2 0 obj
78
78
  <</Length 10>>stream
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hexapdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.36.0
4
+ version: 0.37.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Thomas Leitner
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-01-20 00:00:00.000000000 Z
11
+ date: 2024-01-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: cmdparse
@@ -358,6 +358,7 @@ files:
358
358
  - lib/hexapdf/document/fonts.rb
359
359
  - lib/hexapdf/document/images.rb
360
360
  - lib/hexapdf/document/layout.rb
361
+ - lib/hexapdf/document/metadata.rb
361
362
  - lib/hexapdf/document/pages.rb
362
363
  - lib/hexapdf/encryption.rb
363
364
  - lib/hexapdf/encryption/aes.rb
@@ -505,6 +506,7 @@ files:
505
506
  - lib/hexapdf/type/image.rb
506
507
  - lib/hexapdf/type/info.rb
507
508
  - lib/hexapdf/type/mark_information.rb
509
+ - lib/hexapdf/type/metadata.rb
508
510
  - lib/hexapdf/type/names.rb
509
511
  - lib/hexapdf/type/object_stream.rb
510
512
  - lib/hexapdf/type/optional_content_configuration.rb
@@ -631,6 +633,7 @@ files:
631
633
  - test/hexapdf/document/test_fonts.rb
632
634
  - test/hexapdf/document/test_images.rb
633
635
  - test/hexapdf/document/test_layout.rb
636
+ - test/hexapdf/document/test_metadata.rb
634
637
  - test/hexapdf/document/test_pages.rb
635
638
  - test/hexapdf/encryption/common.rb
636
639
  - test/hexapdf/encryption/test_aes.rb
@@ -796,7 +799,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
796
799
  requirements:
797
800
  - - ">="
798
801
  - !ruby/object:Gem::Version
799
- version: '2.6'
802
+ version: '2.7'
800
803
  required_rubygems_version: !ruby/object:Gem::Requirement
801
804
  requirements:
802
805
  - - ">="