origami 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. data/COPYING.LESSER +165 -0
  2. data/README +77 -0
  3. data/VERSION +1 -0
  4. data/bin/config/pdfcop.conf.yml +237 -0
  5. data/bin/gui/about.rb +46 -0
  6. data/bin/gui/config.rb +132 -0
  7. data/bin/gui/file.rb +385 -0
  8. data/bin/gui/hexdump.rb +74 -0
  9. data/bin/gui/hexview.rb +91 -0
  10. data/bin/gui/imgview.rb +72 -0
  11. data/bin/gui/menu.rb +392 -0
  12. data/bin/gui/properties.rb +132 -0
  13. data/bin/gui/signing.rb +635 -0
  14. data/bin/gui/textview.rb +107 -0
  15. data/bin/gui/treeview.rb +409 -0
  16. data/bin/gui/walker.rb +282 -0
  17. data/bin/gui/xrefs.rb +79 -0
  18. data/bin/pdf2graph +121 -0
  19. data/bin/pdf2ruby +353 -0
  20. data/bin/pdfcocoon +104 -0
  21. data/bin/pdfcop +455 -0
  22. data/bin/pdfdecompress +104 -0
  23. data/bin/pdfdecrypt +95 -0
  24. data/bin/pdfencrypt +112 -0
  25. data/bin/pdfextract +221 -0
  26. data/bin/pdfmetadata +123 -0
  27. data/bin/pdfsh +13 -0
  28. data/bin/pdfwalker +7 -0
  29. data/bin/shell/.irbrc +104 -0
  30. data/bin/shell/console.rb +136 -0
  31. data/bin/shell/hexdump.rb +83 -0
  32. data/origami.rb +36 -0
  33. data/origami/3d.rb +239 -0
  34. data/origami/acroform.rb +321 -0
  35. data/origami/actions.rb +299 -0
  36. data/origami/adobe/fdf.rb +259 -0
  37. data/origami/adobe/ppklite.rb +489 -0
  38. data/origami/annotations.rb +775 -0
  39. data/origami/array.rb +187 -0
  40. data/origami/boolean.rb +101 -0
  41. data/origami/catalog.rb +486 -0
  42. data/origami/destinations.rb +213 -0
  43. data/origami/dictionary.rb +188 -0
  44. data/origami/docmdp.rb +96 -0
  45. data/origami/encryption.rb +1293 -0
  46. data/origami/export.rb +283 -0
  47. data/origami/file.rb +222 -0
  48. data/origami/filters.rb +250 -0
  49. data/origami/filters/ascii.rb +189 -0
  50. data/origami/filters/ccitt.rb +515 -0
  51. data/origami/filters/crypt.rb +47 -0
  52. data/origami/filters/dct.rb +61 -0
  53. data/origami/filters/flate.rb +112 -0
  54. data/origami/filters/jbig2.rb +63 -0
  55. data/origami/filters/jpx.rb +53 -0
  56. data/origami/filters/lzw.rb +195 -0
  57. data/origami/filters/predictors.rb +276 -0
  58. data/origami/filters/runlength.rb +117 -0
  59. data/origami/font.rb +209 -0
  60. data/origami/functions.rb +93 -0
  61. data/origami/graphics.rb +33 -0
  62. data/origami/graphics/colors.rb +191 -0
  63. data/origami/graphics/instruction.rb +126 -0
  64. data/origami/graphics/path.rb +154 -0
  65. data/origami/graphics/patterns.rb +180 -0
  66. data/origami/graphics/state.rb +164 -0
  67. data/origami/graphics/text.rb +224 -0
  68. data/origami/graphics/xobject.rb +493 -0
  69. data/origami/header.rb +90 -0
  70. data/origami/linearization.rb +318 -0
  71. data/origami/metadata.rb +114 -0
  72. data/origami/name.rb +170 -0
  73. data/origami/null.rb +75 -0
  74. data/origami/numeric.rb +188 -0
  75. data/origami/obfuscation.rb +233 -0
  76. data/origami/object.rb +527 -0
  77. data/origami/outline.rb +59 -0
  78. data/origami/page.rb +559 -0
  79. data/origami/parser.rb +268 -0
  80. data/origami/parsers/fdf.rb +45 -0
  81. data/origami/parsers/pdf.rb +27 -0
  82. data/origami/parsers/pdf/linear.rb +113 -0
  83. data/origami/parsers/ppklite.rb +86 -0
  84. data/origami/pdf.rb +1144 -0
  85. data/origami/reference.rb +113 -0
  86. data/origami/signature.rb +474 -0
  87. data/origami/stream.rb +575 -0
  88. data/origami/string.rb +416 -0
  89. data/origami/trailer.rb +173 -0
  90. data/origami/webcapture.rb +87 -0
  91. data/origami/xfa.rb +3027 -0
  92. data/origami/xreftable.rb +447 -0
  93. data/templates/patterns.rb +66 -0
  94. data/templates/widgets.rb +173 -0
  95. data/templates/xdp.rb +92 -0
  96. data/tests/dataset/test.dummycrt +28 -0
  97. data/tests/dataset/test.dummykey +27 -0
  98. data/tests/tc_actions.rb +32 -0
  99. data/tests/tc_annotations.rb +85 -0
  100. data/tests/tc_pages.rb +37 -0
  101. data/tests/tc_pdfattach.rb +24 -0
  102. data/tests/tc_pdfencrypt.rb +110 -0
  103. data/tests/tc_pdfnew.rb +32 -0
  104. data/tests/tc_pdfparse.rb +98 -0
  105. data/tests/tc_pdfsig.rb +37 -0
  106. data/tests/tc_streams.rb +129 -0
  107. data/tests/ts_pdf.rb +45 -0
  108. metadata +193 -0
@@ -0,0 +1,86 @@
1
+ =begin
2
+
3
+ = File
4
+ parsers/ppklite.rb
5
+
6
+ = Info
7
+ Origami is free software: you can redistribute it and/or modify
8
+ it under the terms of the GNU Lesser General Public License as published by
9
+ the Free Software Foundation, either version 3 of the License, or
10
+ (at your option) any later version.
11
+
12
+ Origami is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ GNU Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public License
18
+ along with Origami. If not, see <http://www.gnu.org/licenses/>.
19
+
20
+ =end
21
+
22
+ require 'origami/parser'
23
+ require 'origami/adobe/ppklite'
24
+
25
+ module Origami
26
+
27
+ class Adobe::PPKLite
28
+ class Parser < Origami::Parser
29
+ def parse(stream) #:nodoc:
30
+ super
31
+
32
+ addrbk = Adobe::PPKLite.new
33
+ addrbk.header = Adobe::PPKLite::Header.parse(stream)
34
+ @options[:callback].call(addrbk.header)
35
+
36
+ parse_objects(addrbk)
37
+ parse_xreftable(addrbk)
38
+ parse_trailer(addrbk)
39
+ book_specialize_entries(addrbk)
40
+
41
+ addrbk
42
+ end
43
+
44
+ def book_specialize_entries(addrbk) #:nodoc:
45
+ addrbk.revisions.first.body.each_pair do |ref, obj|
46
+
47
+ if obj.is_a?(Dictionary)
48
+
49
+ if obj[:Type] == :Catalog
50
+
51
+ o = Adobe::PPKLite::Catalog.new(obj)
52
+ o.generation, o.no, o.file_offset = obj.generation, obj.no, obj.file_offset
53
+
54
+ if o.PPK.is_a?(Dictionary) and o.PPK[:Type] == :PPK
55
+ o.PPK = Adobe::PPKLite::PPK.new(o.PPK)
56
+
57
+ if o.PPK.User.is_a?(Dictionary) and o.PPK.User[:Type] == :User
58
+ o.PPK.User = Adobe::PPKLite::UserList.new(o.PPK.User)
59
+ end
60
+
61
+ if o.PPK.AddressBook.is_a?(Dictionary) and o.PPK.AddressBook[:Type] == :AddressBook
62
+ o.PPK.AddressBook = Adobe::PPKLite::AddressList.new(o.PPK.AddressBook)
63
+ end
64
+ end
65
+
66
+ addrbk.revisions.first.body[ref] = o
67
+
68
+ elsif obj[:ABEType] == Adobe::PPKLite::Descriptor::USER
69
+ o = Adobe::PPKLite::User.new(obj)
70
+ o.generation, o.no, o.file_offset = obj.generation, obj.no, obj.file_offset
71
+
72
+ addrbk.revisions.first.body[ref] = o
73
+ elsif obj[:ABEType] == Adobe::PPKLite::Descriptor::CERTIFICATE
74
+ o = Adobe::PPKLite::Certificate.new(obj)
75
+ o.generation, o.no, o.file_offset = obj.generation, obj.no, obj.file_offset
76
+
77
+ addrbk.revisions.first.body[ref] = o
78
+ end
79
+
80
+ end
81
+ end
82
+ end
83
+ end
84
+ end
85
+ end
86
+
@@ -0,0 +1,1144 @@
1
+ =begin
2
+
3
+ = File
4
+ pdf.rb
5
+
6
+ = Info
7
+ Origami is free software: you can redistribute it and/or modify
8
+ it under the terms of the GNU Lesser General Public License as published by
9
+ the Free Software Foundation, either version 3 of the License, or
10
+ (at your option) any later version.
11
+
12
+ Origami is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ GNU Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public License
18
+ along with Origami. If not, see <http://www.gnu.org/licenses/>.
19
+
20
+ =end
21
+
22
+ require 'origami/object'
23
+ require 'origami/null'
24
+ require 'origami/name'
25
+ require 'origami/dictionary'
26
+ require 'origami/reference'
27
+ require 'origami/boolean'
28
+ require 'origami/numeric'
29
+ require 'origami/string'
30
+ require 'origami/array'
31
+ require 'origami/stream'
32
+ require 'origami/filters'
33
+ require 'origami/trailer'
34
+ require 'origami/xreftable'
35
+ require 'origami/header'
36
+ require 'origami/functions'
37
+ require 'origami/catalog'
38
+ require 'origami/font'
39
+ require 'origami/page'
40
+ require 'origami/graphics'
41
+ require 'origami/destinations'
42
+ require 'origami/outline'
43
+ require 'origami/actions'
44
+ require 'origami/file'
45
+ require 'origami/acroform'
46
+ require 'origami/annotations'
47
+ require 'origami/3d'
48
+ require 'origami/signature'
49
+ require 'origami/webcapture'
50
+ require 'origami/metadata'
51
+ require 'origami/export'
52
+ require 'origami/webcapture'
53
+ require 'origami/encryption'
54
+ require 'origami/linearization'
55
+ require 'origami/obfuscation'
56
+ require 'origami/xfa'
57
+
58
+ module Origami
59
+
60
+ VERSION = "1.0.2"
61
+ REVISION = "$Revision: rev 111/, 2011/05/25 18:01:00 darko $" #:nodoc:
62
+
63
+ #
64
+ # Global options for Origami.
65
+ #
66
+ OPTIONS =
67
+ {
68
+ :enable_type_checking => true, # set to false to disable type consistency checks during compilation.
69
+ :enable_type_guessing => true, # set to false to prevent the parser to guess the type of special dictionary and streams (not recommended).
70
+ :use_openssl => true # set to false to use Origami crypto backend.
71
+ }
72
+
73
+ begin
74
+ require 'openssl'
75
+ OPTIONS[:use_openssl] = true
76
+ rescue LoadError
77
+ OPTIONS[:use_openssl] = false
78
+ end
79
+
80
+ @@dict_special_types =
81
+ {
82
+ :Catalog => Catalog,
83
+ :Pages => PageTreeNode,
84
+ :Page => Page,
85
+ :Filespec => FileSpec,
86
+ :Action => Action,
87
+ :Font => Font,
88
+ :FontDescriptor => FontDescriptor,
89
+ :Encoding => Encoding,
90
+ :Annot => Annotation,
91
+ :Border => Annotation::BorderStyle,
92
+ :Outlines => Outline,
93
+ :Sig => Signature::DigitalSignature,
94
+ :SigRef => Signature::Reference,
95
+ :SigFieldLock => Field::SignatureLock,
96
+ :SV => Field::SignatureSeedValue,
97
+ :SVCert => Field::CertificateSeedValue,
98
+ :ExtGState => Graphics::ExtGState,
99
+ :RichMediaSettings => Annotation::RichMedia::Settings,
100
+ :RichMediaActivation => Annotation::RichMedia::Activation,
101
+ :RichMediaDeactivation => Annotation::RichMedia::Deactivation,
102
+ :RichMediaAnimation => Annotation::RichMedia::Animation,
103
+ :RichMediaPresentation => Annotation::RichMedia::Presentation,
104
+ :RichMediaWindow => Annotation::RichMedia::Window,
105
+ :RichMediaPosition => Annotation::RichMedia::Position,
106
+ :RichMediaContent => Annotation::RichMedia::Content,
107
+ :RichMediaConfiguration => Annotation::RichMedia::Configuration,
108
+ :RichMediaInstance => Annotation::RichMedia::Instance,
109
+ :RichMediaParams => Annotation::RichMedia::Parameters,
110
+ :CuePoint => Annotation::RichMedia::CuePoint
111
+ }
112
+
113
+ @@stm_special_types =
114
+ {
115
+ :ObjStm => ObjectStream,
116
+ :EmbeddedFile => EmbeddedFileStream,
117
+ :Metadata => MetadataStream,
118
+ :XRef => XRefStream,
119
+ :"3D" => U3DStream
120
+ }
121
+
122
+ @@stm_xobj_subtypes =
123
+ {
124
+ :Image => Graphics::ImageXObject,
125
+ :Form => Graphics::FormXObject
126
+ }
127
+
128
+ class InvalidPDFError < Exception #:nodoc:
129
+ end
130
+
131
+ #
132
+ # Main class representing a PDF file and its inner contents.
133
+ # A PDF file contains a set of Revision.
134
+ #
135
+ class PDF
136
+
137
+ #
138
+ # Class representing a particular revision in a PDF file.
139
+ # Revision contains :
140
+ # * A Body, which is a sequence of Object.
141
+ # * A XRef::Section, holding XRef information about objects in body.
142
+ # * A Trailer.
143
+ #
144
+ class Revision
145
+ attr_accessor :pdf
146
+ attr_accessor :body, :xreftable, :xrefstm, :trailer
147
+
148
+ def initialize(pdf)
149
+ @pdf = pdf
150
+ @body = {}
151
+ @xreftable = nil
152
+ @xrefstm = nil
153
+ @trailer = nil
154
+ end
155
+
156
+ def trailer=(trl)
157
+ trl.pdf = @pdf
158
+ @trailer = trl
159
+ end
160
+
161
+ def has_xreftable?
162
+ not @xreftable.nil?
163
+ end
164
+
165
+ def has_xrefstm?
166
+ not @xrefstm.nil?
167
+ end
168
+
169
+ def objects
170
+ @body.values
171
+ end
172
+ end
173
+
174
+ attr_accessor :header, :revisions
175
+
176
+ class << self
177
+
178
+ #
179
+ # Reads and parses a PDF file from disk.
180
+ #
181
+ def read(filename, options = {:verbosity => Parser::VERBOSE_INSANE})
182
+ PDF::LinearParser.new(options).parse(filename)
183
+ end
184
+
185
+ #
186
+ # Creates a new PDF and saves it.
187
+ # If a block is passed, the PDF instance can be processed before saving.
188
+ #
189
+ def create(output, options = {})
190
+ pdf = PDF.new
191
+ yield(pdf) if block_given?
192
+ pdf.save(output, options)
193
+ end
194
+
195
+ #
196
+ # Deserializes a PDF dump.
197
+ #
198
+ def deserialize(filename)
199
+ Zlib::GzipReader.open(filename) { |gz|
200
+ pdf = Marshal.load(gz.read)
201
+ }
202
+
203
+ pdf
204
+ end
205
+ end
206
+
207
+ #
208
+ # Creates a new PDF instance.
209
+ # _init_structure_:: If this flag is set, then some structures will be automatically generated while manipulating this PDF. Set it if you are creating a new PDF file, this _must_ _not_ be used when parsing an existing file.
210
+ #
211
+ def initialize(init_structure = true)
212
+ @header = PDF::Header.new
213
+ @revisions = []
214
+
215
+ add_new_revision
216
+ @revisions.first.trailer = Trailer.new
217
+
218
+ init if init_structure
219
+ end
220
+
221
+
222
+ #
223
+ # Serializes the current PDF
224
+ #
225
+ def serialize(filename)
226
+ Zlib::GzipWriter.open(filename) { |gz|
227
+ gz.write Marshal.dump(self)
228
+ }
229
+
230
+ self
231
+ end
232
+
233
+ #
234
+ # Returns the virtual file size as it would be taking on disk.
235
+ #
236
+ def filesize
237
+ self.to_bin(:rebuildxrefs => false).size
238
+ end
239
+
240
+ #
241
+ # Saves the current document.
242
+ # _filename_:: The path where to save this PDF.
243
+ #
244
+ def save(path, params = {})
245
+
246
+ options =
247
+ {
248
+ :delinearize => true,
249
+ :recompile => true,
250
+ :decrypt => false
251
+ }
252
+ options.update(params)
253
+
254
+ if self.frozen? # incompatible flags with frozen doc (signed)
255
+ options[:recompile] =
256
+ options[:rebuildxrefs] =
257
+ options[:noindent] =
258
+ options[:obfuscate] = false
259
+ end
260
+
261
+ if path.respond_to?(:write)
262
+ fd = path
263
+ else
264
+ fd = File.open(path, 'w').binmode
265
+ end
266
+
267
+ self.delinearize! if options[:delinearize] and self.is_linearized?
268
+ self.compile(options) if options[:recompile]
269
+
270
+ fd.write self.to_bin(options)
271
+ fd.close
272
+
273
+ self
274
+ end
275
+ alias saveas save
276
+
277
+ #
278
+ # Saves the file up to given revision number.
279
+ # This can be useful to visualize the modifications over different incremental updates.
280
+ # _revision_:: The revision number to save.
281
+ # _filename_:: The path where to save this PDF.
282
+ #
283
+ def save_upto(revision, filename)
284
+ save(filename, :up_to_revision => revision)
285
+ end
286
+
287
+ #
288
+ # Returns an array of Objects whose content is matching _pattern_.
289
+ #
290
+ # def grep(*patterns)
291
+ #
292
+ # patterns.map! do |pattern|
293
+ # pattern.is_a?(::String) ? Regexp.new(Regexp.escape(pattern)) : pattern
294
+ # end
295
+ #
296
+ # unless patterns.all? { |pattern| pattern.is_a?(Regexp) }
297
+ # raise TypeError, "Expected a String or Regexp"
298
+ # end
299
+ #
300
+ # result = []
301
+ # objects.each do |obj|
302
+ # begin
303
+ # case obj
304
+ # when String, Name
305
+ # result << obj if patterns.any?{|pattern| obj.value.to_s.match(pattern)}
306
+ # when Stream
307
+ # result << obj if patterns.any?{|pattern| obj.data.match(pattern)}
308
+ # end
309
+ # rescue Exception => e
310
+ # puts "[#{e.class}] #{e.message}"
311
+ #
312
+ # next
313
+ # end
314
+ # end
315
+ #
316
+ # result
317
+ # end
318
+
319
+ #
320
+ # Returns an array of strings and streams matching the given pattern.
321
+ #
322
+ def grep(*patterns) #:nodoc:
323
+ patterns.map! do |pattern|
324
+ if pattern.is_a?(::String)
325
+ Regexp.new(Regexp.escape(pattern), Regexp::IGNORECASE)
326
+ else
327
+ pattern
328
+ end
329
+ end
330
+
331
+ unless patterns.all? { |pattern| pattern.is_a?(Regexp) }
332
+ raise TypeError, "Expected a String or Regexp"
333
+ end
334
+
335
+ objset = []
336
+ self.indirect_objects.each do |indobj|
337
+ case indobj
338
+ when Stream then
339
+ objset.push indobj
340
+ objset.concat(indobj.dictionary.strings_cache)
341
+ objset.concat(indobj.dictionary.names_cache)
342
+ when Name,String then objset.push indobj
343
+ when Dictionary,Array then
344
+ objset.concat(indobj.strings_cache)
345
+ objset.concat(indobj.names_cache)
346
+ end
347
+ end
348
+
349
+ objset.delete_if do |obj|
350
+ begin
351
+ case obj
352
+ when String, Name
353
+ not patterns.any?{|pattern| obj.value.to_s.match(pattern)}
354
+ when Stream
355
+ not patterns.any?{|pattern| obj.data.match(pattern)}
356
+ end
357
+ rescue Exception => e
358
+ true
359
+ end
360
+ end
361
+ end
362
+
363
+ #
364
+ # Returns an array of Objects whose name (in a Dictionary) is matching _pattern_.
365
+ #
366
+ def ls(*patterns)
367
+ return objects(:include_keys => false) if patterns.empty?
368
+
369
+ result = []
370
+
371
+ patterns.map! do |pattern|
372
+ pattern.is_a?(::String) ? Regexp.new(Regexp.escape(pattern)) : pattern
373
+ end
374
+
375
+ objects(:only_keys => true).each do |key|
376
+ if patterns.any?{ |pattern| key.value.to_s.match(pattern) }
377
+ value = key.parent[key]
378
+ result << ( value.is_a?(Reference) ? value.solve : value )
379
+ end
380
+ end
381
+
382
+ result
383
+ end
384
+
385
+ #
386
+ # Returns an array of Objects whose name (in a Dictionary) is matching _pattern_.
387
+ # Do not follow references.
388
+ #
389
+ def ls_no_follow(*patterns)
390
+ return objects(:include_keys => false) if patterns.empty?
391
+
392
+ result = []
393
+
394
+ patterns.map! do |pattern|
395
+ pattern.is_a?(::String) ? Regexp.new(Regexp.escape(pattern)) : pattern
396
+ end
397
+
398
+ objects(:only_keys => true).each do |key|
399
+ if patterns.any?{ |pattern| key.value.to_s.match(pattern) }
400
+ value = key.parent[key]
401
+ result << value
402
+ end
403
+ end
404
+
405
+ result
406
+ end
407
+
408
+ #
409
+ # Returns an array of objects matching specified block.
410
+ #
411
+ def find(params = {}, &b)
412
+
413
+ options =
414
+ {
415
+ :only_indirect => false
416
+ }
417
+ options.update(params)
418
+
419
+ objset = (options[:only_indirect] == true) ?
420
+ self.indirect_objects : self.objects
421
+
422
+ objset.find_all(&b)
423
+ end
424
+
425
+ #
426
+ # Returns an array of objects embedded in the PDF body.
427
+ # _include_objstm_:: Whether it shall return objects embedded in object streams.
428
+ # Note : Shall return to an iterator for Ruby 1.9 comp.
429
+ #
430
+ def objects(params = {})
431
+
432
+ def append_subobj(root, objset, opts)
433
+
434
+ if objset.find{ |o| root.equal?(o) }.nil?
435
+ objset << root unless opts[:only_keys]
436
+
437
+ if root.is_a?(Dictionary)
438
+ root.each_pair { |name, value|
439
+ objset << name if opts[:only_keys]
440
+
441
+ append_subobj(name, objset, opts) if opts[:include_keys] and not opts[:only_keys]
442
+ append_subobj(value, objset, opts)
443
+ }
444
+ elsif root.is_a?(Array) or (root.is_a?(ObjectStream) and opts[:include_objectstreams])
445
+ root.each { |subobj| append_subobj(subobj, objset, opts) }
446
+ end
447
+ end
448
+ end
449
+
450
+ options =
451
+ {
452
+ :include_objectstreams => true,
453
+ :include_keys => true,
454
+ :only_keys => false
455
+ }
456
+ options.update(params)
457
+
458
+ options[:include_keys] |= options[:only_keys]
459
+
460
+ objset = []
461
+ @revisions.each do |revision|
462
+ revision.objects.each do |object|
463
+ append_subobj(object, objset, options)
464
+ end
465
+ end
466
+
467
+ objset
468
+ end
469
+
470
+ #
471
+ # Return an array of indirect objects.
472
+ #
473
+ def indirect_objects
474
+ @revisions.inject([]) do |set, rev| set.concat(rev.objects) end
475
+ end
476
+ alias :root_objects :indirect_objects
477
+
478
+ #
479
+ # Adds a new object to the PDF file.
480
+ # If this object has no version number, then a new one will be automatically computed and assignated to him.
481
+ # It returns a Reference to this Object.
482
+ # _object_:: The object to add.
483
+ #
484
+ def <<(object)
485
+ add_to_revision(object, @revisions.last)
486
+ end
487
+ alias :insert :<<
488
+
489
+ #
490
+ # Adds a new object to a specific revision.
491
+ # If this object has no version number, then a new one will be automatically computed and assignated to him.
492
+ # It returns a Reference to this Object.
493
+ # _object_:: The object to add.
494
+ # _revision_:: The revision to add the object to.
495
+ #
496
+ def add_to_revision(object, revision)
497
+
498
+ object.set_indirect(true)
499
+ object.set_pdf(self)
500
+
501
+ object.no, object.generation = alloc_new_object_number if object.no == 0
502
+
503
+ revision.body[object.reference] = object
504
+
505
+ object.reference
506
+ end
507
+
508
+ #
509
+ # Returns a new number/generation for future object.
510
+ #
511
+ def alloc_new_object_number
512
+ no = 1
513
+
514
+ # Deprecated number allocation policy (first available)
515
+ #no = no + 1 while get_object(no)
516
+
517
+ objset = self.indirect_objects
518
+ self.indirect_objects.find_all{|obj| obj.is_a?(ObjectStream)}.each do |objstm|
519
+ objstm.each{|obj| objset << obj}
520
+ end
521
+
522
+ allocated = objset.collect{|obj| obj.no}.compact
523
+ no = allocated.max + 1 unless allocated.empty?
524
+
525
+ [ no, 0 ]
526
+ end
527
+
528
+ #
529
+ # This method is meant to recompute, verify and correct main PDF structures, in order to output a proper file.
530
+ # * Allocates objects references.
531
+ # * Sets some objects missing required values.
532
+ #
533
+ def compile(options = {})
534
+
535
+ #
536
+ # A valid document must have at least one page.
537
+ #
538
+ append_page if pages.empty?
539
+
540
+ #
541
+ # Allocates object numbers and creates references.
542
+ # Invokes object finalization methods.
543
+ #
544
+ if self.is_a?(Encryption::EncryptedDocument)
545
+ physicalize(options)
546
+ else
547
+ physicalize
548
+ end
549
+
550
+ #
551
+ # Sets the PDF version header.
552
+ #
553
+ version, level = version_required
554
+ @header.majorversion = version[0,1].to_i
555
+ @header.minorversion = version[2,1].to_i
556
+
557
+ set_extension_level(version, level) if level > 0
558
+
559
+ self
560
+ end
561
+
562
+ #
563
+ # Returns the final binary representation of the current document.
564
+ # _rebuildxrefs_:: Computes xrefs while writing objects (default true).
565
+ # _obfuscate_:: Do some basic syntactic object obfuscation.
566
+ #
567
+ def to_bin(params = {})
568
+
569
+ has_objstm = self.indirect_objects.any?{|obj| obj.is_a?(ObjectStream)}
570
+
571
+ options =
572
+ {
573
+ :rebuildxrefs => true,
574
+ :noindent => false,
575
+ :obfuscate => false,
576
+ :use_xrefstm => has_objstm,
577
+ :use_xreftable => (not has_objstm),
578
+ :up_to_revision => @revisions.size
579
+ }
580
+ options.update(params)
581
+
582
+ options[:up_to_revision] = @revisions.size if options[:up_to_revision] > @revisions.size
583
+
584
+ # Reset to default params if no xrefs are chosen (hybrid files not supported yet)
585
+ if options[:use_xrefstm] == options[:use_xreftable]
586
+ options[:use_xrefstm] = has_objstm
587
+ options[:use_xreftable] = (not has_objstm)
588
+ end
589
+
590
+ # Get trailer dictionary
591
+ trailer_info = get_trailer_info
592
+ if trailer_info.nil?
593
+ raise InvalidPDFError, "No trailer information found"
594
+ end
595
+ trailer_dict = trailer_info.dictionary
596
+
597
+ prev_xref_offset = nil
598
+ xrefstm_offset = nil
599
+ xreftable_offset = nil
600
+
601
+ # Header
602
+ bin = ""
603
+ bin << @header.to_s
604
+
605
+ # For each revision
606
+ @revisions[0, options[:up_to_revision]].each do |rev|
607
+
608
+ # Create xref table/stream.
609
+ if options[:rebuildxrefs] == true
610
+ lastno_table, lastno_stm = 0, 0
611
+ brange_table, brange_stm = 0, 0
612
+
613
+ xrefs_stm = [ XRef.new(0, 0, XRef::FREE) ]
614
+ xrefs_table = [ XRef.new(0, XRef::FIRSTFREE, XRef::FREE) ]
615
+
616
+ if options[:use_xreftable] == true
617
+ xrefsection = XRef::Section.new
618
+ end
619
+
620
+ if options[:use_xrefstm] == true
621
+ xrefstm = rev.xrefstm || XRefStream.new
622
+ if xrefstm == rev.xrefstm
623
+ xrefstm.clear
624
+ else
625
+ add_to_revision(xrefstm, rev)
626
+ end
627
+ end
628
+ end
629
+
630
+ objset = rev.objects
631
+
632
+ objset.find_all{|obj| obj.is_a?(ObjectStream)}.each do |objstm|
633
+ objset |= objstm.objects
634
+ end if options[:rebuildxrefs] == true and options[:use_xrefstm] == true
635
+
636
+ # For each object, in number order
637
+ objset.sort.each do |obj|
638
+
639
+ # Create xref entry.
640
+ if options[:rebuildxrefs] == true
641
+
642
+ # Adding subsections if needed
643
+ if options[:use_xreftable] and (obj.no - lastno_table).abs > 1
644
+ xrefsection << XRef::Subsection.new(brange_table, xrefs_table)
645
+
646
+ xrefs_table.clear
647
+ brange_table = obj.no
648
+ end
649
+ if options[:use_xrefstm] and (obj.no - lastno_stm).abs > 1
650
+ xrefs_stm.each do |xref| xrefstm << xref end
651
+ xrefstm.Index ||= []
652
+ xrefstm.Index << brange_stm << xrefs_stm.length
653
+
654
+ xrefs_stm.clear
655
+ brange_stm = obj.no
656
+ end
657
+
658
+ # Process embedded objects
659
+ if options[:use_xrefstm] and obj.parent != obj and obj.parent.is_a?(ObjectStream)
660
+ index = obj.parent.index(obj.no)
661
+
662
+ xrefs_stm << XRefToCompressedObj.new(obj.parent.no, index)
663
+
664
+ lastno_stm = obj.no
665
+ else
666
+ xrefs_stm << XRef.new(bin.size, obj.generation, XRef::USED)
667
+ xrefs_table << XRef.new(bin.size, obj.generation, XRef::USED)
668
+
669
+ lastno_table = lastno_stm = obj.no
670
+ end
671
+
672
+ end
673
+
674
+ if obj.parent == obj or not obj.parent.is_a?(ObjectStream)
675
+
676
+ # Finalize XRefStm
677
+ if options[:rebuildxrefs] == true and options[:use_xrefstm] == true and obj == xrefstm
678
+ xrefstm_offset = bin.size
679
+
680
+ xrefs_stm.each do |xref| xrefstm << xref end
681
+
682
+ xrefstm.W = [ 1, (xrefstm_offset.to_s(2).size + 7) >> 3, 2 ]
683
+ if xrefstm.DecodeParms.is_a?(Dictionary) and xrefstm.DecodeParms.has_key?(:Columns)
684
+ xrefstm.DecodeParms[:Columns] = xrefstm.W[0] + xrefstm.W[1] + xrefstm.W[2]
685
+ end
686
+
687
+ xrefstm.Index ||= []
688
+ xrefstm.Index << brange_stm << xrefs_stm.size
689
+
690
+ xrefstm.dictionary = xrefstm.dictionary.merge(trailer_dict)
691
+ xrefstm.Prev = prev_xref_offset
692
+ rev.trailer.dictionary = nil
693
+
694
+ add_to_revision(xrefstm, rev)
695
+
696
+ xrefstm.pre_build
697
+ xrefstm.post_build
698
+ end
699
+
700
+ # Output object code
701
+ if (obj.is_a?(Dictionary) or obj.is_a?(Stream)) and options[:noindent]
702
+ bin << obj.to_s(0)
703
+ else
704
+ bin << obj.to_s
705
+ end
706
+ end
707
+ end
708
+
709
+ rev.trailer ||= Trailer.new
710
+
711
+ # XRef table
712
+ if options[:rebuildxrefs] == true
713
+
714
+ if options[:use_xreftable] == true
715
+ table_offset = bin.size
716
+
717
+ xrefsection << XRef::Subsection.new(brange_table, xrefs_table)
718
+ rev.xreftable = xrefsection
719
+
720
+ rev.trailer.dictionary = trailer_dict
721
+ rev.trailer.Size = objset.size + 1
722
+ rev.trailer.Prev = prev_xref_offset
723
+
724
+ rev.trailer.XRefStm = xrefstm_offset if options[:use_xrefstm] == true
725
+ end
726
+
727
+ startxref = options[:use_xreftable] == true ? table_offset : xrefstm_offset
728
+ rev.trailer.startxref = prev_xref_offset = startxref
729
+
730
+ end # end each rev
731
+
732
+ # Trailer
733
+ bin << rev.xreftable.to_s if options[:use_xreftable] == true
734
+ bin << (options[:obfuscate] == true ? rev.trailer.to_obfuscated_str : rev.trailer.to_s)
735
+
736
+ end
737
+
738
+ bin
739
+ end
740
+
741
+ #
742
+ # Compute and update XRef::Section for each Revision.
743
+ #
744
+ def rebuildxrefs
745
+
746
+ size = 0
747
+ startxref = @header.to_s.size
748
+
749
+ @revisions.each do |revision|
750
+
751
+ revision.objects.each do |object|
752
+ startxref += object.to_s.size
753
+ end
754
+
755
+ size += revision.body.size
756
+ revision.xreftable = buildxrefs(revision.objects)
757
+
758
+ revision.trailer ||= Trailer.new
759
+ revision.trailer.Size = size + 1
760
+ revision.trailer.startxref = startxref
761
+
762
+ startxref += revision.xreftable.to_s.size + revision.trailer.to_s.size
763
+ end
764
+
765
+ self
766
+ end
767
+
768
+ #
769
+ # Ends the current Revision, and starts a new one.
770
+ #
771
+ def add_new_revision
772
+
773
+ root = @revisions.last.trailer[:Root] unless @revisions.empty?
774
+
775
+ @revisions << Revision.new(self)
776
+ @revisions.last.trailer = Trailer.new
777
+ @revisions.last.trailer.Root = root
778
+
779
+ self
780
+ end
781
+
782
+ #
783
+ # Removes a whole document revision.
784
+ # _index_:: Revision index, first is 0.
785
+ #
786
+ def remove_revision(index)
787
+ if index < 0 or index > @revisions.size
788
+ raise IndexError, "Not a valid revision index"
789
+ end
790
+
791
+ if @revisions.size == 1
792
+ raise InvalidPDFError, "Cannot remove last revision"
793
+ end
794
+
795
+ @revisions.delete_at(index)
796
+ self
797
+ end
798
+
799
+ #
800
+ # Looking for an object present at a specified file offset.
801
+ #
802
+ def get_object_by_offset(offset) #:nodoc:
803
+ self.indirect_objects.find { |obj| obj.file_offset == offset }
804
+ end
805
+
806
+ #
807
+ # Remove an object.
808
+ #
809
+ def delete_object(no, generation = 0)
810
+
811
+ case no
812
+ when Reference
813
+ target = no
814
+ when ::Integer
815
+ target = Reference.new(no, generation)
816
+ else
817
+ raise TypeError, "Invalid parameter type : #{no.class}"
818
+ end
819
+
820
+ @revisions.each do |rev|
821
+ rev.body.delete(target)
822
+ end
823
+
824
+ end
825
+
826
+ #
827
+ # Search for an indirect object in the document.
828
+ # _no_:: Reference or number of the object.
829
+ # _generation_:: Object generation.
830
+ #
831
+ def get_object(no, generation = 0, use_xrefstm = true) #:nodoc:
832
+ case no
833
+ when Reference
834
+ target = no
835
+ when ::Integer
836
+ target = Reference.new(no, generation)
837
+ when Origami::Object
838
+ return no
839
+ else
840
+ raise TypeError, "Invalid parameter type : #{no.class}"
841
+ end
842
+
843
+ set = indirect_objects_table
844
+
845
+ #
846
+ # Search through accessible indirect objects.
847
+ #
848
+ if set.include?(target)
849
+ set[target]
850
+ elsif use_xrefstm == true
851
+ # Look into XRef streams.
852
+
853
+ if @revisions.last.has_xrefstm?
854
+ xrefstm = @revisions.last.xrefstm
855
+
856
+ done = []
857
+ while xrefstm.is_a?(XRefStream) and not done.include?(xrefstm)
858
+ xref = xrefstm.find(target.refno)
859
+
860
+ #
861
+ # We found a matching XRef.
862
+ #
863
+ if xref.is_a?(XRefToCompressedObj)
864
+ objstm = get_object(xref.objstmno, 0, false)
865
+
866
+ object = objstm.extract_by_index(xref.index)
867
+ if object.is_a?(Origami::Object) and object.no == target.refno
868
+ return object
869
+ else
870
+ return objstm.extract(target.refno)
871
+ end
872
+ elsif xrefstm.has_field?(:Prev)
873
+ done << xrefstm
874
+ xrefstm = get_object_by_offset(xrefstm.Prev)
875
+ else
876
+ break
877
+ end
878
+ end
879
+ end
880
+
881
+ #
882
+ # Lastly search directly into Object streams (might be very slow).
883
+ #
884
+ stream = set.values.find_all{|obj| obj.is_a?(ObjectStream)}.find do |objstm| objstm.include?(target.refno) end
885
+ stream && stream.extract(target.refno)
886
+ end
887
+
888
+ end
889
+
890
+ alias :[] :get_object
891
+
892
+ #
893
+ # Converts a logical PDF view into a physical view ready for writing.
894
+ #
895
+ def physicalize
896
+
897
+ #
898
+ # Indirect objects are added to the revision and assigned numbers.
899
+ #
900
+ def build(obj, revision) #:nodoc:
901
+
902
+ #
903
+ # Finalize any subobjects before building the stream.
904
+ #
905
+ if obj.is_a?(ObjectStream)
906
+ obj.each do |subobj|
907
+ build(subobj, revision)
908
+ end
909
+ end
910
+
911
+ obj.pre_build
912
+
913
+ if obj.is_a?(Dictionary) or obj.is_a?(Array)
914
+
915
+ obj.map! do |subobj|
916
+ if subobj.is_indirect?
917
+ if get_object(subobj.reference)
918
+ subobj.reference
919
+ else
920
+ ref = add_to_revision(subobj, revision)
921
+ build(subobj, revision)
922
+ ref
923
+ end
924
+ else
925
+ subobj
926
+ end
927
+ end
928
+
929
+ obj.each do |subobj|
930
+ build(subobj, revision)
931
+ end
932
+
933
+ elsif obj.is_a?(Stream)
934
+ build(obj.dictionary, revision)
935
+ end
936
+
937
+ obj.post_build
938
+
939
+ end
940
+
941
+ indirect_objects_by_rev.each do |obj, revision|
942
+ build(obj, revision)
943
+ end
944
+
945
+ self
946
+ end
947
+
948
+ #
949
+ # Cleans the document from its references.
950
+ # Indirects objects are made direct whenever possible.
951
+ # TODO: Circuit-checking to avoid infinite induction
952
+ #
953
+ def logicalize #:nodoc:
954
+
955
+ fail "Not yet supported"
956
+
957
+ processed = []
958
+
959
+ def convert(root) #:nodoc:
960
+
961
+ replaced = []
962
+ if root.is_a?(Dictionary) or root.is_a?(Array)
963
+
964
+ root.each { |obj|
965
+ convert(obj)
966
+ }
967
+
968
+ root.map! { |obj|
969
+ if obj.is_a?(Reference)
970
+ target = obj.solve
971
+ # Streams can't be direct objects
972
+ if target.is_a?(Stream)
973
+ obj
974
+ else
975
+ replaced << obj
976
+ target
977
+ end
978
+ else
979
+ obj
980
+ end
981
+ }
982
+
983
+ end
984
+
985
+ replaced
986
+ end
987
+
988
+ @revisions.each do |revision|
989
+ revision.objects.each do |obj|
990
+ processed.concat(convert(obj))
991
+ end
992
+ end
993
+
994
+ end
995
+
996
+ ##########################
997
+ private
998
+ ##########################
999
+
1000
+ #
1001
+ # Instanciates basic structures required for a valid PDF file.
1002
+ #
1003
+ def init
1004
+ catalog = (self.Catalog = (get_doc_attr(:Root) || Catalog.new))
1005
+ catalog.Pages = PageTreeNode.new.set_indirect(true)
1006
+ @revisions.last.trailer.Root = catalog.reference
1007
+
1008
+ self
1009
+ end
1010
+
1011
+ def version_required #:nodoc:
1012
+
1013
+ max = [ 1.0, 0 ]
1014
+ @revisions.each { |revision|
1015
+ revision.objects.each { |object|
1016
+ current = object.pdf_version_required
1017
+ max = current if (current <=> max) > 0
1018
+ }
1019
+ }
1020
+ max[0] = max[0].to_s
1021
+
1022
+ max
1023
+ end
1024
+
1025
+ def indirect_objects_table #:nodoc:
1026
+ @revisions.inject({}) do |set, rev| set.merge(rev.body) end
1027
+ end
1028
+
1029
+ def indirect_objects_by_rev #:nodoc:
1030
+ @revisions.inject([]) do |set,rev|
1031
+ objset = rev.objects
1032
+ set.concat(objset.zip(::Array.new(objset.length, rev)))
1033
+ end
1034
+ end
1035
+
1036
+ #
1037
+ # Compute and update XRef::Section for each Revision.
1038
+ #
1039
+ def rebuild_dummy_xrefs #:nodoc
1040
+
1041
+ def build_dummy_xrefs(objects)
1042
+
1043
+ lastno = 0
1044
+ brange = 0
1045
+
1046
+ xrefs = [ XRef.new(0, XRef::FIRSTFREE, XRef::FREE) ]
1047
+
1048
+ xrefsection = XRef::Section.new
1049
+ objects.sort.each { |object|
1050
+ if (object.no - lastno).abs > 1
1051
+ xrefsection << XRef::Subsection.new(brange, xrefs)
1052
+ brange = object.no
1053
+ xrefs.clear
1054
+ end
1055
+
1056
+ xrefs << XRef.new(0, 0, XRef::FREE)
1057
+
1058
+ lastno = object.no
1059
+ }
1060
+
1061
+ xrefsection << XRef::Subsection.new(brange, xrefs)
1062
+
1063
+ xrefsection
1064
+ end
1065
+
1066
+ size = 0
1067
+ startxref = @header.to_s.size
1068
+
1069
+ @revisions.each do |revision|
1070
+ revision.objects.each do |object|
1071
+ startxref += object.to_s.size
1072
+ end
1073
+
1074
+ size += revision.body.size
1075
+ revision.xreftable = build_dummy_xrefs(revision.objects)
1076
+
1077
+ revision.trailer ||= Trailer.new
1078
+ revision.trailer.Size = size + 1
1079
+ revision.trailer.startxref = startxref
1080
+
1081
+ startxref += revision.xreftable.to_s.size + revision.trailer.to_s.size
1082
+ end
1083
+
1084
+ self
1085
+ end
1086
+
1087
+ #
1088
+ # Build a xref section from a set of objects.
1089
+ #
1090
+ def buildxrefs(objects) #:nodoc:
1091
+
1092
+ lastno = 0
1093
+ brange = 0
1094
+
1095
+ xrefs = [ XRef.new(0, XRef::FIRSTFREE, XRef::FREE) ]
1096
+
1097
+ xrefsection = XRef::Section.new
1098
+ objects.sort.each { |object|
1099
+ if (object.no - lastno).abs > 1
1100
+ xrefsection << XRef::Subsection.new(brange, xrefs)
1101
+ brange = object.no
1102
+ xrefs.clear
1103
+ end
1104
+
1105
+ xrefs << XRef.new(get_object_offset(object.no, object.generation), object.generation, XRef::USED)
1106
+
1107
+ lastno = object.no
1108
+ }
1109
+
1110
+ xrefsection << XRef::Subsection.new(brange, xrefs)
1111
+
1112
+ xrefsection
1113
+ end
1114
+
1115
+ def delete_revision(ngen) #:nodoc:
1116
+ @revisions.delete_at[ngen]
1117
+ end
1118
+
1119
+ def get_revision(ngen) #:nodoc:
1120
+ @revisions[ngen].body
1121
+ end
1122
+
1123
+ def get_object_offset(no,generation) #:nodoc:
1124
+ objectoffset = @header.to_s.size
1125
+
1126
+ @revisions.each do |revision|
1127
+ revision.objects.sort.each do |object|
1128
+ if object.no == no and object.generation == generation then return objectoffset
1129
+ else
1130
+ objectoffset += object.to_s.size
1131
+ end
1132
+ end
1133
+
1134
+ objectoffset += revision.xreftable.to_s.size
1135
+ objectoffset += revision.trailer.to_s.size
1136
+ end
1137
+
1138
+ nil
1139
+ end
1140
+
1141
+ end
1142
+
1143
+ end
1144
+