origamindee 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +89 -0
  3. data/COPYING.LESSER +165 -0
  4. data/README.md +131 -0
  5. data/bin/config/pdfcop.conf.yml +236 -0
  6. data/bin/pdf2pdfa +87 -0
  7. data/bin/pdf2ruby +333 -0
  8. data/bin/pdfcop +476 -0
  9. data/bin/pdfdecompress +97 -0
  10. data/bin/pdfdecrypt +91 -0
  11. data/bin/pdfencrypt +113 -0
  12. data/bin/pdfexplode +223 -0
  13. data/bin/pdfextract +277 -0
  14. data/bin/pdfmetadata +143 -0
  15. data/bin/pdfsh +12 -0
  16. data/bin/shell/console.rb +128 -0
  17. data/bin/shell/hexdump.rb +59 -0
  18. data/bin/shell/irbrc +69 -0
  19. data/examples/README.md +34 -0
  20. data/examples/attachments/attachment.rb +38 -0
  21. data/examples/attachments/nested_document.rb +51 -0
  22. data/examples/encryption/encryption.rb +28 -0
  23. data/examples/events/events.rb +72 -0
  24. data/examples/flash/flash.rb +37 -0
  25. data/examples/flash/helloworld.swf +0 -0
  26. data/examples/forms/javascript.rb +54 -0
  27. data/examples/forms/xfa.rb +115 -0
  28. data/examples/javascript/hello_world.rb +22 -0
  29. data/examples/javascript/js_emulation.rb +54 -0
  30. data/examples/loop/goto.rb +32 -0
  31. data/examples/loop/named.rb +33 -0
  32. data/examples/signature/signature.rb +65 -0
  33. data/examples/uri/javascript.rb +56 -0
  34. data/examples/uri/open-uri.rb +21 -0
  35. data/examples/uri/submitform.rb +47 -0
  36. data/lib/origami/3d.rb +364 -0
  37. data/lib/origami/acroform.rb +321 -0
  38. data/lib/origami/actions.rb +318 -0
  39. data/lib/origami/annotations.rb +711 -0
  40. data/lib/origami/array.rb +242 -0
  41. data/lib/origami/boolean.rb +90 -0
  42. data/lib/origami/catalog.rb +418 -0
  43. data/lib/origami/collections.rb +144 -0
  44. data/lib/origami/compound.rb +161 -0
  45. data/lib/origami/destinations.rb +252 -0
  46. data/lib/origami/dictionary.rb +192 -0
  47. data/lib/origami/encryption.rb +1084 -0
  48. data/lib/origami/extensions/fdf.rb +347 -0
  49. data/lib/origami/extensions/ppklite.rb +422 -0
  50. data/lib/origami/filespec.rb +197 -0
  51. data/lib/origami/filters/ascii.rb +211 -0
  52. data/lib/origami/filters/ccitt/tables.rb +267 -0
  53. data/lib/origami/filters/ccitt.rb +357 -0
  54. data/lib/origami/filters/crypt.rb +38 -0
  55. data/lib/origami/filters/dct.rb +54 -0
  56. data/lib/origami/filters/flate.rb +69 -0
  57. data/lib/origami/filters/jbig2.rb +57 -0
  58. data/lib/origami/filters/jpx.rb +47 -0
  59. data/lib/origami/filters/lzw.rb +170 -0
  60. data/lib/origami/filters/predictors.rb +292 -0
  61. data/lib/origami/filters/runlength.rb +129 -0
  62. data/lib/origami/filters.rb +364 -0
  63. data/lib/origami/font.rb +196 -0
  64. data/lib/origami/functions.rb +79 -0
  65. data/lib/origami/graphics/colors.rb +230 -0
  66. data/lib/origami/graphics/instruction.rb +98 -0
  67. data/lib/origami/graphics/path.rb +182 -0
  68. data/lib/origami/graphics/patterns.rb +174 -0
  69. data/lib/origami/graphics/render.rb +62 -0
  70. data/lib/origami/graphics/state.rb +149 -0
  71. data/lib/origami/graphics/text.rb +225 -0
  72. data/lib/origami/graphics/xobject.rb +918 -0
  73. data/lib/origami/graphics.rb +38 -0
  74. data/lib/origami/header.rb +75 -0
  75. data/lib/origami/javascript.rb +713 -0
  76. data/lib/origami/linearization.rb +330 -0
  77. data/lib/origami/metadata.rb +172 -0
  78. data/lib/origami/name.rb +135 -0
  79. data/lib/origami/null.rb +65 -0
  80. data/lib/origami/numeric.rb +181 -0
  81. data/lib/origami/obfuscation.rb +245 -0
  82. data/lib/origami/object.rb +760 -0
  83. data/lib/origami/optionalcontent.rb +183 -0
  84. data/lib/origami/outline.rb +54 -0
  85. data/lib/origami/outputintents.rb +85 -0
  86. data/lib/origami/page.rb +722 -0
  87. data/lib/origami/parser.rb +269 -0
  88. data/lib/origami/parsers/fdf.rb +56 -0
  89. data/lib/origami/parsers/pdf/lazy.rb +176 -0
  90. data/lib/origami/parsers/pdf/linear.rb +122 -0
  91. data/lib/origami/parsers/pdf.rb +118 -0
  92. data/lib/origami/parsers/ppklite.rb +57 -0
  93. data/lib/origami/pdf.rb +1108 -0
  94. data/lib/origami/reference.rb +134 -0
  95. data/lib/origami/signature.rb +702 -0
  96. data/lib/origami/stream.rb +705 -0
  97. data/lib/origami/string.rb +444 -0
  98. data/lib/origami/template/patterns.rb +56 -0
  99. data/lib/origami/template/widgets.rb +151 -0
  100. data/lib/origami/trailer.rb +190 -0
  101. data/lib/origami/tree.rb +62 -0
  102. data/lib/origami/version.rb +23 -0
  103. data/lib/origami/webcapture.rb +100 -0
  104. data/lib/origami/xfa/config.rb +453 -0
  105. data/lib/origami/xfa/connectionset.rb +146 -0
  106. data/lib/origami/xfa/datasets.rb +49 -0
  107. data/lib/origami/xfa/localeset.rb +42 -0
  108. data/lib/origami/xfa/package.rb +59 -0
  109. data/lib/origami/xfa/pdf.rb +73 -0
  110. data/lib/origami/xfa/signature.rb +42 -0
  111. data/lib/origami/xfa/sourceset.rb +43 -0
  112. data/lib/origami/xfa/stylesheet.rb +44 -0
  113. data/lib/origami/xfa/template.rb +1691 -0
  114. data/lib/origami/xfa/xdc.rb +42 -0
  115. data/lib/origami/xfa/xfa.rb +146 -0
  116. data/lib/origami/xfa/xfdf.rb +43 -0
  117. data/lib/origami/xfa/xmpmeta.rb +43 -0
  118. data/lib/origami/xfa.rb +62 -0
  119. data/lib/origami/xreftable.rb +557 -0
  120. data/lib/origami.rb +47 -0
  121. data/test/dataset/calc.pdf +85 -0
  122. data/test/dataset/crypto.pdf +36 -0
  123. data/test/dataset/empty.pdf +49 -0
  124. data/test/test_actions.rb +27 -0
  125. data/test/test_annotations.rb +68 -0
  126. data/test/test_forms.rb +30 -0
  127. data/test/test_native_types.rb +83 -0
  128. data/test/test_object_tree.rb +33 -0
  129. data/test/test_pages.rb +60 -0
  130. data/test/test_pdf.rb +20 -0
  131. data/test/test_pdf_attachment.rb +34 -0
  132. data/test/test_pdf_create.rb +24 -0
  133. data/test/test_pdf_encrypt.rb +102 -0
  134. data/test/test_pdf_parse.rb +134 -0
  135. data/test/test_pdf_parse_lazy.rb +69 -0
  136. data/test/test_pdf_sign.rb +97 -0
  137. data/test/test_streams.rb +184 -0
  138. data/test/test_xrefs.rb +67 -0
  139. metadata +280 -0
@@ -0,0 +1,1108 @@
1
+ =begin
2
+
3
+ This file is part of Origami, PDF manipulation framework for Ruby
4
+ Copyright (C) 2016 Guillaume Delugré.
5
+
6
+ Origami is free software: you can redistribute it and/or modify
7
+ it under the terms of the GNU Lesser General Public License as published by
8
+ the Free Software Foundation, either version 3 of the License, or
9
+ (at your option) any later version.
10
+
11
+ Origami is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU Lesser General Public License for more details.
15
+
16
+ You should have received a copy of the GNU Lesser General Public License
17
+ along with Origami. If not, see <http://www.gnu.org/licenses/>.
18
+
19
+ =end
20
+
21
+ require 'origami/object'
22
+ require 'origami/compound'
23
+ require 'origami/null'
24
+ require 'origami/name'
25
+ require 'origami/dictionary'
26
+ require 'origami/reference'
27
+ require 'origami/boolean'
28
+ require 'origami/numeric'
29
+ require 'origami/string'
30
+ require 'origami/array'
31
+ require 'origami/stream'
32
+ require 'origami/tree'
33
+ require 'origami/filters'
34
+ require 'origami/header'
35
+ require 'origami/metadata'
36
+ require 'origami/functions'
37
+ require 'origami/page'
38
+ require 'origami/font'
39
+ require 'origami/graphics'
40
+ require 'origami/optionalcontent'
41
+ require 'origami/destinations'
42
+ require 'origami/filespec'
43
+ require 'origami/xfa'
44
+ require 'origami/acroform'
45
+ require 'origami/annotations'
46
+ require 'origami/actions'
47
+ require 'origami/3d'
48
+ require 'origami/signature'
49
+ require 'origami/webcapture'
50
+ require 'origami/encryption'
51
+ require 'origami/linearization'
52
+ require 'origami/obfuscation'
53
+ require 'origami/javascript'
54
+ require 'origami/outline'
55
+ require 'origami/outputintents'
56
+ require 'origami/collections'
57
+ require 'origami/catalog'
58
+ require 'origami/xreftable'
59
+ require 'origami/trailer'
60
+
61
+ require 'origami/parsers/pdf/linear'
62
+ require 'origami/parsers/pdf/lazy'
63
+
64
+ module Origami
65
+
66
+ class InvalidPDFError < Error #:nodoc:
67
+ end
68
+
69
+ #
70
+ # Main class representing a PDF file and its inner contents.
71
+ # A PDF file contains a set of Revision.
72
+ #
73
+ class PDF
74
+
75
+ #
76
+ # Class representing a particular revision in a PDF file.
77
+ # Revision contains :
78
+ # * A Body, which is a sequence of Object.
79
+ # * A XRef::Section, holding XRef information about objects in body.
80
+ # * A Trailer.
81
+ #
82
+ class Revision
83
+ attr_accessor :pdf
84
+ attr_accessor :body, :xreftable, :xrefstm
85
+ attr_reader :trailer
86
+
87
+ def initialize(doc)
88
+ @document = doc
89
+ @body = {}
90
+ @xreftable = nil
91
+ @xrefstm = nil
92
+ @trailer = nil
93
+ end
94
+
95
+ def trailer=(trl)
96
+ trl.document = @document
97
+
98
+ @trailer = trl
99
+ end
100
+
101
+ def xreftable?
102
+ not @xreftable.nil?
103
+ end
104
+
105
+ def xrefstm?
106
+ not @xrefstm.nil?
107
+ end
108
+
109
+ def each_object(&b)
110
+ @body.each_value(&b)
111
+ end
112
+
113
+ def objects
114
+ @body.values
115
+ end
116
+ end
117
+
118
+ #
119
+ # Document header and revisions.
120
+ #
121
+ attr_accessor :header, :revisions
122
+
123
+ class << self
124
+ #
125
+ # Reads and parses a PDF file from disk.
126
+ #
127
+ def read(path, options = {})
128
+ path = File.expand_path(path) if path.is_a?(::String)
129
+ lazy = options[:lazy]
130
+
131
+ if lazy
132
+ parser_class = PDF::LazyParser
133
+ else
134
+ parser_class = PDF::LinearParser
135
+ end
136
+
137
+ parser_class.new(options).parse(path)
138
+ end
139
+
140
+ #
141
+ # Creates a new PDF and saves it.
142
+ # If a block is passed, the PDF instance can be processed before saving.
143
+ #
144
+ def create(output, options = {})
145
+ pdf = PDF.new
146
+ yield(pdf) if block_given?
147
+ pdf.save(output, options)
148
+ end
149
+ alias write create
150
+ end
151
+
152
+ #
153
+ # Creates a new PDF instance.
154
+ # _parser_:: The Parser object creating the document.
155
+ # If none is specified, some default structures are automatically created to get a minimal working document.
156
+ #
157
+ def initialize(parser = nil)
158
+ @header = PDF::Header.new
159
+ @revisions = []
160
+ @parser = parser
161
+ @loaded = false
162
+
163
+ add_new_revision
164
+ @revisions.first.trailer = Trailer.new
165
+
166
+ init if parser.nil?
167
+ end
168
+
169
+ #
170
+ # Original file name if parsed from disk, nil otherwise.
171
+ #
172
+ def original_filename
173
+ @parser.target_filename if @parser
174
+ end
175
+
176
+ #
177
+ # Original file size if parsed from a data stream, nil otherwise.
178
+ #
179
+ def original_filesize
180
+ @parser.target_filesize if @parser
181
+ end
182
+
183
+ #
184
+ # Original data parsed to create this document, nil if created from scratch.
185
+ #
186
+ def original_data
187
+ @parser.target_data if @parser
188
+ end
189
+
190
+ #
191
+ # Saves the current document.
192
+ # _filename_:: The path where to save this PDF.
193
+ #
194
+ def save(path, params = {})
195
+ options =
196
+ {
197
+ delinearize: true,
198
+ recompile: true,
199
+ decrypt: false
200
+ }
201
+ options.update(params)
202
+
203
+ if self.frozen? # incompatible flags with frozen doc (signed)
204
+ options[:recompile] =
205
+ options[:rebuild_xrefs] =
206
+ options[:noindent] =
207
+ options[:obfuscate] = false
208
+ end
209
+
210
+ if path.respond_to?(:write)
211
+ fd = path
212
+ else
213
+ path = File.expand_path(path)
214
+ fd = File.open(path, 'w').binmode
215
+ close = true
216
+ end
217
+
218
+ load_all_objects unless loaded?
219
+
220
+ intents_as_pdfa1 if options[:intent] =~ /pdf[\/-]?A1?/i
221
+ self.delinearize! if options[:delinearize] and self.linearized?
222
+ compile(options) if options[:recompile]
223
+
224
+ fd.write output(options)
225
+ fd.close if close
226
+
227
+ self
228
+ end
229
+ alias write save
230
+
231
+ #
232
+ # Saves the file up to given revision number.
233
+ # This can be useful to visualize the modifications over different incremental updates.
234
+ # _revision_:: The revision number to save.
235
+ # _filename_:: The path where to save this PDF.
236
+ #
237
+ def save_upto(revision, filename)
238
+ save(filename, up_to_revision: revision)
239
+ end
240
+
241
+ #
242
+ # Returns an array of strings, names and streams matching the given pattern.
243
+ # _streams_: Search into decoded stream data.
244
+ # _object_streams_: Search into objects inside object streams.
245
+ #
246
+ def grep(pattern, streams: true, object_streams: true) #:nodoc:
247
+
248
+ pattern = /#{Regexp.escape(pattern)}/i if pattern.is_a?(::String)
249
+ raise TypeError, "Expected a String or Regexp" unless pattern.is_a?(Regexp)
250
+
251
+ result = []
252
+
253
+ self.indirect_objects.each do |object|
254
+ result.concat search_object(object, pattern,
255
+ streams: streams, object_streams: object_streams)
256
+ end
257
+
258
+ result
259
+ end
260
+
261
+ #
262
+ # Returns an array of Objects whose name (in a Dictionary) is matching _pattern_.
263
+ #
264
+ def ls(pattern, follow_references: true)
265
+
266
+ pattern = /#{Regexp.escape(pattern)}/i if pattern.is_a?(::String)
267
+ raise TypeError, "Expected a String or Regexp" unless pattern.is_a?(Regexp)
268
+
269
+ self.grep(pattern, streams: false, object_streams: true)
270
+ .select {|object| object.is_a?(Name) and object.parent.is_a?(Dictionary) and object.parent.key?(object) }
271
+ .collect {|object| result = object.parent[object]; follow_references ? result.solve : result }
272
+ end
273
+
274
+ #
275
+ # Iterates over the objects of the document.
276
+ # _compressed_: iterates over the objects inside object streams.
277
+ # _recursive_: iterates recursively inside objects like arrays and dictionaries.
278
+ #
279
+ def each_object(compressed: false, recursive: false, &block)
280
+ return enum_for(__method__, compressed: compressed,
281
+ recursive: recursive
282
+ ) unless block_given?
283
+
284
+ @revisions.each do |revision|
285
+ revision.each_object do |object|
286
+ block.call(object)
287
+
288
+ walk_object(object, &block) if recursive
289
+
290
+ if object.is_a?(ObjectStream) and compressed
291
+ object.each do |child_obj|
292
+ block.call(child_obj)
293
+
294
+ walk_object(child_obj) if recursive
295
+ end
296
+ end
297
+ end
298
+ end
299
+ end
300
+
301
+ #
302
+ # Return an array of indirect objects.
303
+ #
304
+ def indirect_objects
305
+ @revisions.inject([]) do |set, rev| set.concat(rev.objects) end
306
+ end
307
+ alias root_objects indirect_objects
308
+
309
+ #
310
+ # Adds a new object to the PDF file.
311
+ # If this object has no version number, then a new one will be automatically
312
+ # computed and assignated to him.
313
+ #
314
+ # It returns a Reference to this Object.
315
+ # _object_:: The object to add.
316
+ #
317
+ def <<(object)
318
+ owner = object.document
319
+
320
+ #
321
+ # Does object belongs to another PDF ?
322
+ #
323
+ if owner and not owner.equal?(self)
324
+ import object
325
+ else
326
+ add_to_revision(object, @revisions.last)
327
+ end
328
+ end
329
+ alias insert <<
330
+
331
+ #
332
+ # Similar to PDF#insert or PDF#<<, but for an object belonging to another document.
333
+ # Object will be recursively copied and new version numbers will be assigned.
334
+ # Returns the new reference to the imported object.
335
+ # _object_:: The object to import.
336
+ #
337
+ def import(object)
338
+ self.insert(object.export)
339
+ end
340
+
341
+ #
342
+ # Adds a new object to a specific revision.
343
+ # If this object has no version number, then a new one will be automatically
344
+ # computed and assignated to him.
345
+ #
346
+ # It returns a Reference to this Object.
347
+ # _object_:: The object to add.
348
+ # _revision_:: The revision to add the object to.
349
+ #
350
+ def add_to_revision(object, revision)
351
+ object.set_indirect(true)
352
+ object.set_document(self)
353
+
354
+ object.no, object.generation = allocate_new_object_number if object.no == 0
355
+
356
+ revision.body[object.reference] = object
357
+
358
+ object.reference
359
+ end
360
+
361
+ #
362
+ # Ends the current Revision, and starts a new one.
363
+ #
364
+ def add_new_revision
365
+ root = @revisions.last.trailer[:Root] unless @revisions.empty?
366
+
367
+ @revisions << Revision.new(self)
368
+ @revisions.last.trailer = Trailer.new
369
+ @revisions.last.trailer.Root = root
370
+
371
+ self
372
+ end
373
+
374
+ #
375
+ # Removes a whole document revision.
376
+ # _index_:: Revision index, first is 0.
377
+ #
378
+ def remove_revision(index)
379
+ if index < 0 or index > @revisions.size
380
+ raise IndexError, "Not a valid revision index"
381
+ end
382
+
383
+ if @revisions.size == 1
384
+ raise InvalidPDFError, "Cannot remove last revision"
385
+ end
386
+
387
+ @revisions.delete_at(index)
388
+ self
389
+ end
390
+
391
+ #
392
+ # Looking for an object present at a specified file offset.
393
+ #
394
+ def get_object_by_offset(offset) #:nodoc:
395
+ self.each_object.find { |obj| obj.file_offset == offset }
396
+ end
397
+
398
+ #
399
+ # Remove an object.
400
+ #
401
+ def delete_object(no, generation = 0)
402
+ case no
403
+ when Reference
404
+ target = no
405
+ when ::Integer
406
+ target = Reference.new(no, generation)
407
+ else
408
+ raise TypeError, "Invalid parameter type : #{no.class}"
409
+ end
410
+
411
+ @revisions.each do |rev|
412
+ rev.body.delete(target)
413
+ end
414
+ end
415
+
416
+ #
417
+ # Search for an indirect object in the document.
418
+ # _no_:: Reference or number of the object.
419
+ # _generation_:: Object generation.
420
+ #
421
+ def get_object(no, generation = 0, use_xrefstm: true) #:nodoc:
422
+ case no
423
+ when Reference
424
+ target = no
425
+ when ::Integer
426
+ target = Reference.new(no, generation)
427
+ when Origami::Object
428
+ return no
429
+ else
430
+ raise TypeError, "Invalid parameter type : #{no.class}"
431
+ end
432
+
433
+ #
434
+ # Search through accessible indirect objects.
435
+ #
436
+ @revisions.reverse_each do |rev|
437
+ return rev.body[target] if rev.body.include?(target)
438
+ end
439
+
440
+ #
441
+ # Search through xref sections.
442
+ #
443
+ @revisions.reverse_each do |rev|
444
+ next unless rev.xreftable?
445
+
446
+ xref = rev.xreftable.find(target.refno)
447
+ next if xref.nil? or xref.free?
448
+
449
+ # Try loading the object if it is not present.
450
+ object = load_object_at_offset(rev, xref.offset)
451
+ return object unless object.nil?
452
+ end
453
+
454
+ return nil unless use_xrefstm
455
+
456
+ # Search through xref streams.
457
+ @revisions.reverse_each do |rev|
458
+ next unless rev.xrefstm?
459
+
460
+ xrefstm = rev.xrefstm
461
+
462
+ xref = xrefstm.find(target.refno)
463
+ next if xref.nil?
464
+
465
+ #
466
+ # We found a matching XRef.
467
+ #
468
+ if xref.is_a?(XRefToCompressedObject)
469
+ objstm = get_object(xref.objstmno, 0, use_xrefstm: use_xrefstm)
470
+
471
+ object = objstm.extract_by_index(xref.index)
472
+ if object.is_a?(Origami::Object) and object.no == target.refno
473
+ return object
474
+ else
475
+ return objstm.extract(target.refno)
476
+ end
477
+ elsif xref.is_a?(XRef)
478
+ object = load_object_at_offset(rev, xref.offset)
479
+ return object unless object.nil?
480
+ end
481
+ end
482
+
483
+ #
484
+ # Lastly search directly into Object streams (might be very slow).
485
+ #
486
+ @revisions.reverse_each do |rev|
487
+ stream = rev.objects.find{|obj| obj.is_a?(ObjectStream) and obj.include?(target.refno)}
488
+ return stream.extract(target.refno) unless stream.nil?
489
+ end
490
+
491
+ nil
492
+ end
493
+ alias [] get_object
494
+
495
+ #
496
+ # Casts a PDF object into another object type.
497
+ # The target type must be a subtype of the original type.
498
+ #
499
+ def cast_object(reference, type) #:nodoc:
500
+ @revisions.each do |rev|
501
+ if rev.body.include?(reference)
502
+ object = rev.body[reference]
503
+ return object if object.is_a?(type)
504
+
505
+ if type < rev.body[reference].class
506
+ rev.body[reference] = object.cast_to(type, @parser)
507
+
508
+ return rev.body[reference]
509
+ end
510
+ end
511
+ end
512
+
513
+ nil
514
+ end
515
+
516
+ #
517
+ # Returns a new number/generation for future object.
518
+ #
519
+ def allocate_new_object_number
520
+
521
+ last_object = self.each_object(compressed: true).max_by {|object| object.no }
522
+ if last_object.nil?
523
+ no = 1
524
+ else
525
+ no = last_object.no + 1
526
+ end
527
+
528
+ [ no, 0 ]
529
+ end
530
+
531
+ #
532
+ # Mark the document as complete.
533
+ # No more objects needs to be fetched by the parser.
534
+ #
535
+ def loaded!
536
+ @loaded = true
537
+ end
538
+
539
+ #
540
+ # Returns if the document as been fully loaded by the parser.
541
+ #
542
+ def loaded?
543
+ @loaded
544
+ end
545
+
546
+ ##########################
547
+ private
548
+ ##########################
549
+
550
+ #
551
+ # Iterates over the children of an object, avoiding cycles.
552
+ #
553
+ def walk_object(object, excludes: [], &block)
554
+ return enum_for(__method__, object, excludes: excludes) unless block_given?
555
+
556
+ return if excludes.include?(object)
557
+ excludes.push(object)
558
+
559
+ case object
560
+ when CompoundObject
561
+ object.each_value do |value|
562
+ yield(value)
563
+ walk_object(value, excludes: excludes, &block)
564
+ end
565
+
566
+ when Stream
567
+ yield(object.dictionary)
568
+ walk_object(object.dictionary, excludes: excludes, &block)
569
+ end
570
+ end
571
+
572
+ #
573
+ # Searches through an object, possibly going into object streams.
574
+ # Returns an array of matching strings, names and streams.
575
+ #
576
+ def search_object(object, pattern, streams: true, object_streams: true)
577
+ result = []
578
+
579
+ case object
580
+ when Stream
581
+ result.concat object.dictionary.strings_cache.select{|str| str.match(pattern) }
582
+ result.concat object.dictionary.names_cache.select{|name| name.value.match(pattern) }
583
+
584
+ begin
585
+ result.push object if streams and object.data.match(pattern)
586
+ rescue Filter::Error
587
+ return result # Skip object if a decoding error occured.
588
+ end
589
+
590
+ return result unless object.is_a?(ObjectStream) and object_streams
591
+
592
+ object.each do |child|
593
+ result.concat search_object(child, pattern,
594
+ streams: streams, object_streams: object_streams)
595
+ end
596
+
597
+ when Name, String
598
+ result.push object if object.value.match(pattern)
599
+
600
+ when ObjectCache
601
+ result.concat object.strings_cache.select{|str| str.match(pattern) }
602
+ result.concat object.names_cache.select{|name| name.value.match(pattern) }
603
+ end
604
+
605
+ result
606
+ end
607
+
608
+ #
609
+ # Load an object from its given file offset.
610
+ # The document must have an associated Parser.
611
+ #
612
+ def load_object_at_offset(revision, offset)
613
+ return nil if loaded? or @parser.nil?
614
+ pos = @parser.pos
615
+
616
+ begin
617
+ object = @parser.parse_object(offset)
618
+ return nil if object.nil?
619
+
620
+ if self.is_a?(Encryption::EncryptedDocument)
621
+ make_encrypted_object(object)
622
+ end
623
+
624
+ add_to_revision(object, revision)
625
+ ensure
626
+ @parser.pos = pos
627
+ end
628
+
629
+ object
630
+ end
631
+
632
+ #
633
+ # Method called on encrypted objects loaded into the document.
634
+ #
635
+ def make_encrypted_object(object)
636
+ case object
637
+ when String
638
+ object.extend(Encryption::EncryptedString)
639
+ when Stream
640
+ object.extend(Encryption::EncryptedStream)
641
+ when ObjectCache
642
+ object.strings_cache.each do |string|
643
+ string.extend(Encryption::EncryptedString)
644
+ end
645
+ end
646
+ end
647
+
648
+ #
649
+ # Force the loading of all objects in the document.
650
+ #
651
+ def load_all_objects
652
+ return if loaded? or @parser.nil?
653
+
654
+ @revisions.each do |revision|
655
+ if revision.xreftable?
656
+ xrefs = revision.xreftable
657
+ elsif revision.xrefstm?
658
+ xrefs = revision.xrefstm
659
+ else
660
+ next
661
+ end
662
+
663
+ xrefs.each_with_number do |xref, no|
664
+ self.get_object(no) unless xref.free?
665
+ end
666
+ end
667
+
668
+ loaded!
669
+ end
670
+
671
+ #
672
+ # Compute and update XRef::Section for each Revision.
673
+ #
674
+ def rebuild_xrefs
675
+ size = 0
676
+ startxref = @header.to_s.size
677
+
678
+ @revisions.each do |revision|
679
+ revision.each_object do |object|
680
+ startxref += object.to_s.size
681
+ end
682
+
683
+ size += revision.body.size
684
+ revision.xreftable = build_xrefs(revision.objects)
685
+
686
+ revision.trailer ||= Trailer.new
687
+ revision.trailer.Size = size + 1
688
+ revision.trailer.startxref = startxref
689
+
690
+ startxref += revision.xreftable.to_s.size + revision.trailer.to_s.size
691
+ end
692
+
693
+ self
694
+ end
695
+
696
+ #
697
+ # This method is meant to recompute, verify and correct main PDF structures, in order to output a proper file.
698
+ # * Allocates objects references.
699
+ # * Sets some objects missing required values.
700
+ #
701
+ def compile(options = {})
702
+
703
+ load_all_objects unless loaded?
704
+
705
+ #
706
+ # A valid document must have at least one page.
707
+ #
708
+ append_page if pages.empty?
709
+
710
+ #
711
+ # Allocates object numbers and creates references.
712
+ # Invokes object finalization methods.
713
+ #
714
+ physicalize(options)
715
+
716
+ #
717
+ # Sets the PDF version header.
718
+ #
719
+ version, level = version_required
720
+ @header.major_version = version[0,1].to_i
721
+ @header.minor_version = version[2,1].to_i
722
+
723
+ set_extension_level(version, level) if level > 0
724
+
725
+ self
726
+ end
727
+
728
+ #
729
+ # Converts a logical PDF view into a physical view ready for writing.
730
+ #
731
+ def physicalize(options = {})
732
+
733
+ @revisions.each do |revision|
734
+ # Do not use each_object here as build_object may modify the iterator.
735
+ revision.objects.each do |obj|
736
+ build_object(obj, revision, options)
737
+ end
738
+ end
739
+
740
+ self
741
+ end
742
+
743
+ def build_object(object, revision, options)
744
+ # Build any compressed object before building the object stream.
745
+ if object.is_a?(ObjectStream)
746
+ object.each do |compressed_obj|
747
+ build_object(compressed_obj, revision, options)
748
+ end
749
+ end
750
+
751
+ object.pre_build
752
+
753
+ case object
754
+ when Stream
755
+ build_object(object.dictionary, revision, options)
756
+ when CompoundObject
757
+ build_compound_object(object, revision, options)
758
+ end
759
+
760
+ object.post_build
761
+ end
762
+
763
+ def build_compound_object(object, revision, options)
764
+ return unless object.is_a?(CompoundObject)
765
+
766
+ # Flatten the object by adding indirect objects to the revision and
767
+ # replacing them with their reference.
768
+ object.update_values! do |child|
769
+ next(child) unless child.indirect?
770
+
771
+ if get_object(child.reference)
772
+ child.reference
773
+ else
774
+ reference = add_to_revision(child, revision)
775
+ build_object(child, revision, options)
776
+ reference
777
+ end
778
+ end
779
+
780
+ # Finalize all the children objects.
781
+ object.each_value do |child|
782
+ build_object(child, revision, options)
783
+ end
784
+ end
785
+
786
+ #
787
+ # Returns the final binary representation of the current document.
788
+ #
789
+ def output(params = {})
790
+
791
+ has_objstm = self.each_object.any?{|obj| obj.is_a?(ObjectStream)}
792
+
793
+ options =
794
+ {
795
+ eol: $/,
796
+ rebuild_xrefs: true,
797
+ noindent: false,
798
+ obfuscate: false,
799
+ use_xrefstm: has_objstm,
800
+ use_xreftable: (not has_objstm),
801
+ up_to_revision: @revisions.size
802
+ }
803
+ options.update(params)
804
+
805
+ # Ensures we are using a valid EOL delimiter.
806
+ assert_valid_eol(options[:eol])
807
+
808
+ # Do not emit more revisions than present in the document.
809
+ options[:up_to_revision] = [ @revisions.size, options[:up_to_revision] ].min
810
+
811
+ # Reset to default params if no xrefs are chosen (hybrid files not supported yet)
812
+ if options[:use_xrefstm] == options[:use_xreftable]
813
+ options[:use_xrefstm] = has_objstm
814
+ options[:use_xreftable] = (not has_objstm)
815
+ end
816
+
817
+ # Indent level for objects.
818
+ indent = (options[:noindent] == true ? 0 : 1)
819
+
820
+ # Get trailer dictionary
821
+ trailer_dict = self.trailer.dictionary
822
+
823
+ prev_xref_offset = nil
824
+ xrefstm_offset = nil
825
+
826
+ # Header
827
+ bin = ""
828
+ bin << @header.to_s(eol: options[:eol])
829
+
830
+ # For each revision
831
+ @revisions[0, options[:up_to_revision]].each do |rev|
832
+
833
+ # Create xref table/stream.
834
+ if options[:rebuild_xrefs] == true
835
+ lastno_table, lastno_stm = 0, 0
836
+ brange_table, brange_stm = 0, 0
837
+
838
+ xrefs_stm = [ XRef.new(0, 0, XRef::FREE) ]
839
+ xrefs_table = [ XRef.new(0, XRef::FIRSTFREE, XRef::FREE) ]
840
+
841
+ if options[:use_xreftable] == true
842
+ xrefsection = XRef::Section.new
843
+ end
844
+
845
+ if options[:use_xrefstm] == true
846
+ xrefstm = rev.xrefstm || XRefStream.new
847
+ if xrefstm == rev.xrefstm
848
+ xrefstm.clear
849
+ else
850
+ add_to_revision(xrefstm, rev)
851
+ end
852
+ end
853
+ end
854
+
855
+ objset = rev.objects
856
+
857
+ objset.find_all{|obj| obj.is_a?(ObjectStream)}.each do |objstm|
858
+ objset.concat objstm.objects
859
+ end if options[:rebuild_xrefs] == true and options[:use_xrefstm] == true
860
+
861
+ previous_obj = nil
862
+
863
+ # For each object, in number order
864
+ # Move any XRefStream to the end of the revision.
865
+ objset.sort_by {|obj| [obj.is_a?(XRefStream) ? 1 : 0, obj.no, obj.generation] }
866
+ .each do |obj|
867
+
868
+ # Ensures that every object has a unique reference number.
869
+ # Duplicates should never happen in a well-formed revision and will cause breakage of xrefs.
870
+ if previous_obj and previous_obj.reference == obj.reference
871
+ raise InvalidPDFError, "Duplicate object detected, reference #{obj.reference}"
872
+ else
873
+ previous_obj = obj
874
+ end
875
+
876
+ # Create xref entry.
877
+ if options[:rebuild_xrefs] == true
878
+
879
+ # Adding subsections if needed
880
+ if options[:use_xreftable] and (obj.no - lastno_table).abs > 1
881
+ xrefsection << XRef::Subsection.new(brange_table, xrefs_table)
882
+
883
+ xrefs_table.clear
884
+ brange_table = obj.no
885
+ end
886
+
887
+ if options[:use_xrefstm] and (obj.no - lastno_stm).abs > 1
888
+ xrefs_stm.each do |xref| xrefstm << xref end
889
+ xrefstm.Index ||= []
890
+ xrefstm.Index << brange_stm << xrefs_stm.length
891
+
892
+ xrefs_stm.clear
893
+ brange_stm = obj.no
894
+ end
895
+
896
+ # Process embedded objects
897
+ if options[:use_xrefstm] and obj.parent != obj and obj.parent.is_a?(ObjectStream)
898
+ index = obj.parent.index(obj.no)
899
+
900
+ xrefs_stm << XRefToCompressedObject.new(obj.parent.no, index)
901
+
902
+ lastno_stm = obj.no
903
+ else
904
+ xrefs_stm << XRef.new(bin.size, obj.generation, XRef::USED)
905
+ xrefs_table << XRef.new(bin.size, obj.generation, XRef::USED)
906
+
907
+ lastno_table = lastno_stm = obj.no
908
+ end
909
+ end
910
+
911
+ if obj.parent == obj or not obj.parent.is_a?(ObjectStream)
912
+
913
+ # Finalize XRefStm
914
+ if options[:rebuild_xrefs] == true and options[:use_xrefstm] == true and obj == xrefstm
915
+ xrefstm_offset = bin.size
916
+
917
+ xrefs_stm.each do |xref| xrefstm << xref end
918
+
919
+ xrefstm.W = [ 1, (xrefstm_offset.to_s(2).size + 7) >> 3, 2 ]
920
+ if xrefstm.DecodeParms.is_a?(Dictionary) and xrefstm.DecodeParms.has_key?(:Columns)
921
+ xrefstm.DecodeParms[:Columns] = xrefstm.W[0] + xrefstm.W[1] + xrefstm.W[2]
922
+ end
923
+
924
+ xrefstm.Index ||= []
925
+ xrefstm.Index << brange_stm << xrefs_stm.size
926
+
927
+ xrefstm.dictionary = xrefstm.dictionary.merge(trailer_dict)
928
+ xrefstm.Prev = prev_xref_offset
929
+ rev.trailer.dictionary = nil
930
+
931
+ add_to_revision(xrefstm, rev)
932
+
933
+ xrefstm.pre_build
934
+ xrefstm.post_build
935
+ end
936
+
937
+ # Output object code
938
+ if (obj.is_a?(Dictionary) or obj.is_a?(Stream))
939
+ bin << obj.to_s(eol: options[:eol], indent: indent)
940
+ else
941
+ bin << obj.to_s(eol: options[:eol])
942
+ end
943
+ end
944
+ end # end each object
945
+
946
+ rev.trailer ||= Trailer.new
947
+
948
+ # XRef table
949
+ if options[:rebuild_xrefs] == true
950
+
951
+ if options[:use_xreftable] == true
952
+ table_offset = bin.size
953
+
954
+ xrefsection << XRef::Subsection.new(brange_table, xrefs_table)
955
+ rev.xreftable = xrefsection
956
+
957
+ rev.trailer.dictionary = trailer_dict
958
+ rev.trailer.Size = objset.size + 1
959
+ rev.trailer.Prev = prev_xref_offset
960
+
961
+ rev.trailer.XRefStm = xrefstm_offset if options[:use_xrefstm] == true
962
+ end
963
+
964
+ startxref = options[:use_xreftable] == true ? table_offset : xrefstm_offset
965
+ rev.trailer.startxref = prev_xref_offset = startxref
966
+
967
+ end
968
+
969
+ # Trailer
970
+ bin << rev.xreftable.to_s(eol: options[:eol]) if options[:use_xreftable] == true
971
+ bin << (options[:obfuscate] == true ? rev.trailer.to_obfuscated_str : rev.trailer.to_s(eol: options[:eol], indent: indent))
972
+
973
+ end # end each revision
974
+
975
+ bin
976
+ end
977
+
978
+ def assert_valid_eol(d)
979
+ allowed = [ "\n", "\r", "\r\n" ]
980
+ unless allowed.include?(d)
981
+ raise ArgumentError, "Invalid EOL delimiter #{d.inspect}, allowed: #{allowed.inspect}"
982
+ end
983
+ end
984
+
985
+ #
986
+ # Instanciates basic structures required for a valid PDF file.
987
+ #
988
+ def init
989
+ catalog = (self.Catalog = (trailer_key(:Root) || Catalog.new))
990
+ @revisions.last.trailer.Root = catalog.reference
991
+
992
+ loaded!
993
+
994
+ self
995
+ end
996
+
997
+ def filesize #:nodoc:
998
+ output(rebuild_xrefs: false).size
999
+ end
1000
+
1001
+ def version_required #:nodoc:
1002
+ self.each_object.max_by {|obj| obj.version_required}.version_required
1003
+ end
1004
+
1005
+ #
1006
+ # Compute and update XRef::Section for each Revision.
1007
+ #
1008
+ def rebuild_dummy_xrefs #:nodoc
1009
+
1010
+ build_dummy_xrefs = -> (objects) do
1011
+ lastno = 0
1012
+ brange = 0
1013
+
1014
+ xrefs = [ XRef.new(0, XRef::FIRSTFREE, XRef::FREE) ]
1015
+
1016
+ xrefsection = XRef::Section.new
1017
+ objects.sort_by {|object| object.reference }
1018
+ .each do |object|
1019
+
1020
+ if (object.no - lastno).abs > 1
1021
+ xrefsection << XRef::Subsection.new(brange, xrefs)
1022
+ brange = object.no
1023
+ xrefs.clear
1024
+ end
1025
+
1026
+ xrefs << XRef.new(0, 0, XRef::FREE)
1027
+
1028
+ lastno = object.no
1029
+ end
1030
+
1031
+ xrefsection << XRef::Subsection.new(brange, xrefs)
1032
+
1033
+ xrefsection
1034
+ end
1035
+
1036
+ size = 0
1037
+ startxref = @header.to_s.size
1038
+
1039
+ @revisions.each do |revision|
1040
+ revision.each_object do |object|
1041
+ startxref += object.to_s.size
1042
+ end
1043
+
1044
+ size += revision.body.size
1045
+ revision.xreftable = build_dummy_xrefs.call(revision.objects)
1046
+
1047
+ revision.trailer ||= Trailer.new
1048
+ revision.trailer.Size = size + 1
1049
+ revision.trailer.startxref = startxref
1050
+
1051
+ startxref += revision.xreftable.to_s.size + revision.trailer.to_s.size
1052
+ end
1053
+
1054
+ self
1055
+ end
1056
+
1057
+ #
1058
+ # Build a xref section from a set of objects.
1059
+ #
1060
+ def build_xrefs(objects) #:nodoc:
1061
+
1062
+ lastno = 0
1063
+ brange = 0
1064
+
1065
+ xrefs = [ XRef.new(0, XRef::FIRSTFREE, XRef::FREE) ]
1066
+
1067
+ xrefsection = XRef::Section.new
1068
+ objects.sort_by {|object| object.reference}
1069
+ .each do |object|
1070
+
1071
+ if (object.no - lastno).abs > 1
1072
+ xrefsection << XRef::Subsection.new(brange, xrefs)
1073
+ brange = object.no
1074
+ xrefs.clear
1075
+ end
1076
+
1077
+ xrefs << XRef.new(get_object_offset(object.no, object.generation), object.generation, XRef::USED)
1078
+
1079
+ lastno = object.no
1080
+ end
1081
+
1082
+ xrefsection << XRef::Subsection.new(brange, xrefs)
1083
+
1084
+ xrefsection
1085
+ end
1086
+
1087
+ def get_object_offset(no, generation) #:nodoc:
1088
+ objectoffset = @header.to_s.size
1089
+
1090
+ @revisions.each do |revision|
1091
+ revision.objects.sort_by {|object| object.reference }
1092
+ .each do |object|
1093
+
1094
+ if object.no == no and object.generation == generation then return objectoffset
1095
+ else
1096
+ objectoffset += object.to_s.size
1097
+ end
1098
+ end
1099
+
1100
+ objectoffset += revision.xreftable.to_s.size
1101
+ objectoffset += revision.trailer.to_s.size
1102
+ end
1103
+
1104
+ nil
1105
+ end
1106
+ end
1107
+
1108
+ end