origami 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (108) hide show
  1. data/COPYING.LESSER +165 -0
  2. data/README +77 -0
  3. data/VERSION +1 -0
  4. data/bin/config/pdfcop.conf.yml +237 -0
  5. data/bin/gui/about.rb +46 -0
  6. data/bin/gui/config.rb +132 -0
  7. data/bin/gui/file.rb +385 -0
  8. data/bin/gui/hexdump.rb +74 -0
  9. data/bin/gui/hexview.rb +91 -0
  10. data/bin/gui/imgview.rb +72 -0
  11. data/bin/gui/menu.rb +392 -0
  12. data/bin/gui/properties.rb +132 -0
  13. data/bin/gui/signing.rb +635 -0
  14. data/bin/gui/textview.rb +107 -0
  15. data/bin/gui/treeview.rb +409 -0
  16. data/bin/gui/walker.rb +282 -0
  17. data/bin/gui/xrefs.rb +79 -0
  18. data/bin/pdf2graph +121 -0
  19. data/bin/pdf2ruby +353 -0
  20. data/bin/pdfcocoon +104 -0
  21. data/bin/pdfcop +455 -0
  22. data/bin/pdfdecompress +104 -0
  23. data/bin/pdfdecrypt +95 -0
  24. data/bin/pdfencrypt +112 -0
  25. data/bin/pdfextract +221 -0
  26. data/bin/pdfmetadata +123 -0
  27. data/bin/pdfsh +13 -0
  28. data/bin/pdfwalker +7 -0
  29. data/bin/shell/.irbrc +104 -0
  30. data/bin/shell/console.rb +136 -0
  31. data/bin/shell/hexdump.rb +83 -0
  32. data/origami.rb +36 -0
  33. data/origami/3d.rb +239 -0
  34. data/origami/acroform.rb +321 -0
  35. data/origami/actions.rb +299 -0
  36. data/origami/adobe/fdf.rb +259 -0
  37. data/origami/adobe/ppklite.rb +489 -0
  38. data/origami/annotations.rb +775 -0
  39. data/origami/array.rb +187 -0
  40. data/origami/boolean.rb +101 -0
  41. data/origami/catalog.rb +486 -0
  42. data/origami/destinations.rb +213 -0
  43. data/origami/dictionary.rb +188 -0
  44. data/origami/docmdp.rb +96 -0
  45. data/origami/encryption.rb +1293 -0
  46. data/origami/export.rb +283 -0
  47. data/origami/file.rb +222 -0
  48. data/origami/filters.rb +250 -0
  49. data/origami/filters/ascii.rb +189 -0
  50. data/origami/filters/ccitt.rb +515 -0
  51. data/origami/filters/crypt.rb +47 -0
  52. data/origami/filters/dct.rb +61 -0
  53. data/origami/filters/flate.rb +112 -0
  54. data/origami/filters/jbig2.rb +63 -0
  55. data/origami/filters/jpx.rb +53 -0
  56. data/origami/filters/lzw.rb +195 -0
  57. data/origami/filters/predictors.rb +276 -0
  58. data/origami/filters/runlength.rb +117 -0
  59. data/origami/font.rb +209 -0
  60. data/origami/functions.rb +93 -0
  61. data/origami/graphics.rb +33 -0
  62. data/origami/graphics/colors.rb +191 -0
  63. data/origami/graphics/instruction.rb +126 -0
  64. data/origami/graphics/path.rb +154 -0
  65. data/origami/graphics/patterns.rb +180 -0
  66. data/origami/graphics/state.rb +164 -0
  67. data/origami/graphics/text.rb +224 -0
  68. data/origami/graphics/xobject.rb +493 -0
  69. data/origami/header.rb +90 -0
  70. data/origami/linearization.rb +318 -0
  71. data/origami/metadata.rb +114 -0
  72. data/origami/name.rb +170 -0
  73. data/origami/null.rb +75 -0
  74. data/origami/numeric.rb +188 -0
  75. data/origami/obfuscation.rb +233 -0
  76. data/origami/object.rb +527 -0
  77. data/origami/outline.rb +59 -0
  78. data/origami/page.rb +559 -0
  79. data/origami/parser.rb +268 -0
  80. data/origami/parsers/fdf.rb +45 -0
  81. data/origami/parsers/pdf.rb +27 -0
  82. data/origami/parsers/pdf/linear.rb +113 -0
  83. data/origami/parsers/ppklite.rb +86 -0
  84. data/origami/pdf.rb +1144 -0
  85. data/origami/reference.rb +113 -0
  86. data/origami/signature.rb +474 -0
  87. data/origami/stream.rb +575 -0
  88. data/origami/string.rb +416 -0
  89. data/origami/trailer.rb +173 -0
  90. data/origami/webcapture.rb +87 -0
  91. data/origami/xfa.rb +3027 -0
  92. data/origami/xreftable.rb +447 -0
  93. data/templates/patterns.rb +66 -0
  94. data/templates/widgets.rb +173 -0
  95. data/templates/xdp.rb +92 -0
  96. data/tests/dataset/test.dummycrt +28 -0
  97. data/tests/dataset/test.dummykey +27 -0
  98. data/tests/tc_actions.rb +32 -0
  99. data/tests/tc_annotations.rb +85 -0
  100. data/tests/tc_pages.rb +37 -0
  101. data/tests/tc_pdfattach.rb +24 -0
  102. data/tests/tc_pdfencrypt.rb +110 -0
  103. data/tests/tc_pdfnew.rb +32 -0
  104. data/tests/tc_pdfparse.rb +98 -0
  105. data/tests/tc_pdfsig.rb +37 -0
  106. data/tests/tc_streams.rb +129 -0
  107. data/tests/ts_pdf.rb +45 -0
  108. metadata +193 -0
@@ -0,0 +1,86 @@
1
+ =begin
2
+
3
+ = File
4
+ parsers/ppklite.rb
5
+
6
+ = Info
7
+ Origami is free software: you can redistribute it and/or modify
8
+ it under the terms of the GNU Lesser General Public License as published by
9
+ the Free Software Foundation, either version 3 of the License, or
10
+ (at your option) any later version.
11
+
12
+ Origami is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ GNU Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public License
18
+ along with Origami. If not, see <http://www.gnu.org/licenses/>.
19
+
20
+ =end
21
+
22
+ require 'origami/parser'
23
+ require 'origami/adobe/ppklite'
24
+
25
+ module Origami
26
+
27
+ class Adobe::PPKLite
28
+ class Parser < Origami::Parser
29
+ def parse(stream) #:nodoc:
30
+ super
31
+
32
+ addrbk = Adobe::PPKLite.new
33
+ addrbk.header = Adobe::PPKLite::Header.parse(stream)
34
+ @options[:callback].call(addrbk.header)
35
+
36
+ parse_objects(addrbk)
37
+ parse_xreftable(addrbk)
38
+ parse_trailer(addrbk)
39
+ book_specialize_entries(addrbk)
40
+
41
+ addrbk
42
+ end
43
+
44
+ def book_specialize_entries(addrbk) #:nodoc:
45
+ addrbk.revisions.first.body.each_pair do |ref, obj|
46
+
47
+ if obj.is_a?(Dictionary)
48
+
49
+ if obj[:Type] == :Catalog
50
+
51
+ o = Adobe::PPKLite::Catalog.new(obj)
52
+ o.generation, o.no, o.file_offset = obj.generation, obj.no, obj.file_offset
53
+
54
+ if o.PPK.is_a?(Dictionary) and o.PPK[:Type] == :PPK
55
+ o.PPK = Adobe::PPKLite::PPK.new(o.PPK)
56
+
57
+ if o.PPK.User.is_a?(Dictionary) and o.PPK.User[:Type] == :User
58
+ o.PPK.User = Adobe::PPKLite::UserList.new(o.PPK.User)
59
+ end
60
+
61
+ if o.PPK.AddressBook.is_a?(Dictionary) and o.PPK.AddressBook[:Type] == :AddressBook
62
+ o.PPK.AddressBook = Adobe::PPKLite::AddressList.new(o.PPK.AddressBook)
63
+ end
64
+ end
65
+
66
+ addrbk.revisions.first.body[ref] = o
67
+
68
+ elsif obj[:ABEType] == Adobe::PPKLite::Descriptor::USER
69
+ o = Adobe::PPKLite::User.new(obj)
70
+ o.generation, o.no, o.file_offset = obj.generation, obj.no, obj.file_offset
71
+
72
+ addrbk.revisions.first.body[ref] = o
73
+ elsif obj[:ABEType] == Adobe::PPKLite::Descriptor::CERTIFICATE
74
+ o = Adobe::PPKLite::Certificate.new(obj)
75
+ o.generation, o.no, o.file_offset = obj.generation, obj.no, obj.file_offset
76
+
77
+ addrbk.revisions.first.body[ref] = o
78
+ end
79
+
80
+ end
81
+ end
82
+ end
83
+ end
84
+ end
85
+ end
86
+
@@ -0,0 +1,1144 @@
1
+ =begin
2
+
3
+ = File
4
+ pdf.rb
5
+
6
+ = Info
7
+ Origami is free software: you can redistribute it and/or modify
8
+ it under the terms of the GNU Lesser General Public License as published by
9
+ the Free Software Foundation, either version 3 of the License, or
10
+ (at your option) any later version.
11
+
12
+ Origami is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ GNU Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public License
18
+ along with Origami. If not, see <http://www.gnu.org/licenses/>.
19
+
20
+ =end
21
+
22
+ require 'origami/object'
23
+ require 'origami/null'
24
+ require 'origami/name'
25
+ require 'origami/dictionary'
26
+ require 'origami/reference'
27
+ require 'origami/boolean'
28
+ require 'origami/numeric'
29
+ require 'origami/string'
30
+ require 'origami/array'
31
+ require 'origami/stream'
32
+ require 'origami/filters'
33
+ require 'origami/trailer'
34
+ require 'origami/xreftable'
35
+ require 'origami/header'
36
+ require 'origami/functions'
37
+ require 'origami/catalog'
38
+ require 'origami/font'
39
+ require 'origami/page'
40
+ require 'origami/graphics'
41
+ require 'origami/destinations'
42
+ require 'origami/outline'
43
+ require 'origami/actions'
44
+ require 'origami/file'
45
+ require 'origami/acroform'
46
+ require 'origami/annotations'
47
+ require 'origami/3d'
48
+ require 'origami/signature'
49
+ require 'origami/webcapture'
50
+ require 'origami/metadata'
51
+ require 'origami/export'
52
+ require 'origami/webcapture'
53
+ require 'origami/encryption'
54
+ require 'origami/linearization'
55
+ require 'origami/obfuscation'
56
+ require 'origami/xfa'
57
+
58
+ module Origami
59
+
60
+ VERSION = "1.0.2"
61
+ REVISION = "$Revision: rev 111/, 2011/05/25 18:01:00 darko $" #:nodoc:
62
+
63
+ #
64
+ # Global options for Origami.
65
+ #
66
+ OPTIONS =
67
+ {
68
+ :enable_type_checking => true, # set to false to disable type consistency checks during compilation.
69
+ :enable_type_guessing => true, # set to false to prevent the parser to guess the type of special dictionary and streams (not recommended).
70
+ :use_openssl => true # set to false to use Origami crypto backend.
71
+ }
72
+
73
+ begin
74
+ require 'openssl'
75
+ OPTIONS[:use_openssl] = true
76
+ rescue LoadError
77
+ OPTIONS[:use_openssl] = false
78
+ end
79
+
80
+ @@dict_special_types =
81
+ {
82
+ :Catalog => Catalog,
83
+ :Pages => PageTreeNode,
84
+ :Page => Page,
85
+ :Filespec => FileSpec,
86
+ :Action => Action,
87
+ :Font => Font,
88
+ :FontDescriptor => FontDescriptor,
89
+ :Encoding => Encoding,
90
+ :Annot => Annotation,
91
+ :Border => Annotation::BorderStyle,
92
+ :Outlines => Outline,
93
+ :Sig => Signature::DigitalSignature,
94
+ :SigRef => Signature::Reference,
95
+ :SigFieldLock => Field::SignatureLock,
96
+ :SV => Field::SignatureSeedValue,
97
+ :SVCert => Field::CertificateSeedValue,
98
+ :ExtGState => Graphics::ExtGState,
99
+ :RichMediaSettings => Annotation::RichMedia::Settings,
100
+ :RichMediaActivation => Annotation::RichMedia::Activation,
101
+ :RichMediaDeactivation => Annotation::RichMedia::Deactivation,
102
+ :RichMediaAnimation => Annotation::RichMedia::Animation,
103
+ :RichMediaPresentation => Annotation::RichMedia::Presentation,
104
+ :RichMediaWindow => Annotation::RichMedia::Window,
105
+ :RichMediaPosition => Annotation::RichMedia::Position,
106
+ :RichMediaContent => Annotation::RichMedia::Content,
107
+ :RichMediaConfiguration => Annotation::RichMedia::Configuration,
108
+ :RichMediaInstance => Annotation::RichMedia::Instance,
109
+ :RichMediaParams => Annotation::RichMedia::Parameters,
110
+ :CuePoint => Annotation::RichMedia::CuePoint
111
+ }
112
+
113
+ @@stm_special_types =
114
+ {
115
+ :ObjStm => ObjectStream,
116
+ :EmbeddedFile => EmbeddedFileStream,
117
+ :Metadata => MetadataStream,
118
+ :XRef => XRefStream,
119
+ :"3D" => U3DStream
120
+ }
121
+
122
+ @@stm_xobj_subtypes =
123
+ {
124
+ :Image => Graphics::ImageXObject,
125
+ :Form => Graphics::FormXObject
126
+ }
127
+
128
+ class InvalidPDFError < Exception #:nodoc:
129
+ end
130
+
131
+ #
132
+ # Main class representing a PDF file and its inner contents.
133
+ # A PDF file contains a set of Revision.
134
+ #
135
+ class PDF
136
+
137
+ #
138
+ # Class representing a particular revision in a PDF file.
139
+ # Revision contains :
140
+ # * A Body, which is a sequence of Object.
141
+ # * A XRef::Section, holding XRef information about objects in body.
142
+ # * A Trailer.
143
+ #
144
+ class Revision
145
+ attr_accessor :pdf
146
+ attr_accessor :body, :xreftable, :xrefstm, :trailer
147
+
148
+ def initialize(pdf)
149
+ @pdf = pdf
150
+ @body = {}
151
+ @xreftable = nil
152
+ @xrefstm = nil
153
+ @trailer = nil
154
+ end
155
+
156
+ def trailer=(trl)
157
+ trl.pdf = @pdf
158
+ @trailer = trl
159
+ end
160
+
161
+ def has_xreftable?
162
+ not @xreftable.nil?
163
+ end
164
+
165
+ def has_xrefstm?
166
+ not @xrefstm.nil?
167
+ end
168
+
169
+ def objects
170
+ @body.values
171
+ end
172
+ end
173
+
174
+ attr_accessor :header, :revisions
175
+
176
+ class << self
177
+
178
+ #
179
+ # Reads and parses a PDF file from disk.
180
+ #
181
+ def read(filename, options = {:verbosity => Parser::VERBOSE_INSANE})
182
+ PDF::LinearParser.new(options).parse(filename)
183
+ end
184
+
185
+ #
186
+ # Creates a new PDF and saves it.
187
+ # If a block is passed, the PDF instance can be processed before saving.
188
+ #
189
+ def create(output, options = {})
190
+ pdf = PDF.new
191
+ yield(pdf) if block_given?
192
+ pdf.save(output, options)
193
+ end
194
+
195
+ #
196
+ # Deserializes a PDF dump.
197
+ #
198
+ def deserialize(filename)
199
+ Zlib::GzipReader.open(filename) { |gz|
200
+ pdf = Marshal.load(gz.read)
201
+ }
202
+
203
+ pdf
204
+ end
205
+ end
206
+
207
+ #
208
+ # Creates a new PDF instance.
209
+ # _init_structure_:: If this flag is set, then some structures will be automatically generated while manipulating this PDF. Set it if you are creating a new PDF file, this _must_ _not_ be used when parsing an existing file.
210
+ #
211
+ def initialize(init_structure = true)
212
+ @header = PDF::Header.new
213
+ @revisions = []
214
+
215
+ add_new_revision
216
+ @revisions.first.trailer = Trailer.new
217
+
218
+ init if init_structure
219
+ end
220
+
221
+
222
+ #
223
+ # Serializes the current PDF
224
+ #
225
+ def serialize(filename)
226
+ Zlib::GzipWriter.open(filename) { |gz|
227
+ gz.write Marshal.dump(self)
228
+ }
229
+
230
+ self
231
+ end
232
+
233
+ #
234
+ # Returns the virtual file size as it would be taking on disk.
235
+ #
236
+ def filesize
237
+ self.to_bin(:rebuildxrefs => false).size
238
+ end
239
+
240
+ #
241
+ # Saves the current document.
242
+ # _filename_:: The path where to save this PDF.
243
+ #
244
+ def save(path, params = {})
245
+
246
+ options =
247
+ {
248
+ :delinearize => true,
249
+ :recompile => true,
250
+ :decrypt => false
251
+ }
252
+ options.update(params)
253
+
254
+ if self.frozen? # incompatible flags with frozen doc (signed)
255
+ options[:recompile] =
256
+ options[:rebuildxrefs] =
257
+ options[:noindent] =
258
+ options[:obfuscate] = false
259
+ end
260
+
261
+ if path.respond_to?(:write)
262
+ fd = path
263
+ else
264
+ fd = File.open(path, 'w').binmode
265
+ end
266
+
267
+ self.delinearize! if options[:delinearize] and self.is_linearized?
268
+ self.compile(options) if options[:recompile]
269
+
270
+ fd.write self.to_bin(options)
271
+ fd.close
272
+
273
+ self
274
+ end
275
+ alias saveas save
276
+
277
+ #
278
+ # Saves the file up to given revision number.
279
+ # This can be useful to visualize the modifications over different incremental updates.
280
+ # _revision_:: The revision number to save.
281
+ # _filename_:: The path where to save this PDF.
282
+ #
283
+ def save_upto(revision, filename)
284
+ save(filename, :up_to_revision => revision)
285
+ end
286
+
287
+ #
288
+ # Returns an array of Objects whose content is matching _pattern_.
289
+ #
290
+ # def grep(*patterns)
291
+ #
292
+ # patterns.map! do |pattern|
293
+ # pattern.is_a?(::String) ? Regexp.new(Regexp.escape(pattern)) : pattern
294
+ # end
295
+ #
296
+ # unless patterns.all? { |pattern| pattern.is_a?(Regexp) }
297
+ # raise TypeError, "Expected a String or Regexp"
298
+ # end
299
+ #
300
+ # result = []
301
+ # objects.each do |obj|
302
+ # begin
303
+ # case obj
304
+ # when String, Name
305
+ # result << obj if patterns.any?{|pattern| obj.value.to_s.match(pattern)}
306
+ # when Stream
307
+ # result << obj if patterns.any?{|pattern| obj.data.match(pattern)}
308
+ # end
309
+ # rescue Exception => e
310
+ # puts "[#{e.class}] #{e.message}"
311
+ #
312
+ # next
313
+ # end
314
+ # end
315
+ #
316
+ # result
317
+ # end
318
+
319
+ #
320
+ # Returns an array of strings and streams matching the given pattern.
321
+ #
322
+ def grep(*patterns) #:nodoc:
323
+ patterns.map! do |pattern|
324
+ if pattern.is_a?(::String)
325
+ Regexp.new(Regexp.escape(pattern), Regexp::IGNORECASE)
326
+ else
327
+ pattern
328
+ end
329
+ end
330
+
331
+ unless patterns.all? { |pattern| pattern.is_a?(Regexp) }
332
+ raise TypeError, "Expected a String or Regexp"
333
+ end
334
+
335
+ objset = []
336
+ self.indirect_objects.each do |indobj|
337
+ case indobj
338
+ when Stream then
339
+ objset.push indobj
340
+ objset.concat(indobj.dictionary.strings_cache)
341
+ objset.concat(indobj.dictionary.names_cache)
342
+ when Name,String then objset.push indobj
343
+ when Dictionary,Array then
344
+ objset.concat(indobj.strings_cache)
345
+ objset.concat(indobj.names_cache)
346
+ end
347
+ end
348
+
349
+ objset.delete_if do |obj|
350
+ begin
351
+ case obj
352
+ when String, Name
353
+ not patterns.any?{|pattern| obj.value.to_s.match(pattern)}
354
+ when Stream
355
+ not patterns.any?{|pattern| obj.data.match(pattern)}
356
+ end
357
+ rescue Exception => e
358
+ true
359
+ end
360
+ end
361
+ end
362
+
363
+ #
364
+ # Returns an array of Objects whose name (in a Dictionary) is matching _pattern_.
365
+ #
366
+ def ls(*patterns)
367
+ return objects(:include_keys => false) if patterns.empty?
368
+
369
+ result = []
370
+
371
+ patterns.map! do |pattern|
372
+ pattern.is_a?(::String) ? Regexp.new(Regexp.escape(pattern)) : pattern
373
+ end
374
+
375
+ objects(:only_keys => true).each do |key|
376
+ if patterns.any?{ |pattern| key.value.to_s.match(pattern) }
377
+ value = key.parent[key]
378
+ result << ( value.is_a?(Reference) ? value.solve : value )
379
+ end
380
+ end
381
+
382
+ result
383
+ end
384
+
385
+ #
386
+ # Returns an array of Objects whose name (in a Dictionary) is matching _pattern_.
387
+ # Do not follow references.
388
+ #
389
+ def ls_no_follow(*patterns)
390
+ return objects(:include_keys => false) if patterns.empty?
391
+
392
+ result = []
393
+
394
+ patterns.map! do |pattern|
395
+ pattern.is_a?(::String) ? Regexp.new(Regexp.escape(pattern)) : pattern
396
+ end
397
+
398
+ objects(:only_keys => true).each do |key|
399
+ if patterns.any?{ |pattern| key.value.to_s.match(pattern) }
400
+ value = key.parent[key]
401
+ result << value
402
+ end
403
+ end
404
+
405
+ result
406
+ end
407
+
408
+ #
409
+ # Returns an array of objects matching specified block.
410
+ #
411
+ def find(params = {}, &b)
412
+
413
+ options =
414
+ {
415
+ :only_indirect => false
416
+ }
417
+ options.update(params)
418
+
419
+ objset = (options[:only_indirect] == true) ?
420
+ self.indirect_objects : self.objects
421
+
422
+ objset.find_all(&b)
423
+ end
424
+
425
+ #
426
+ # Returns an array of objects embedded in the PDF body.
427
+ # _include_objstm_:: Whether it shall return objects embedded in object streams.
428
+ # Note : Shall return to an iterator for Ruby 1.9 comp.
429
+ #
430
+ def objects(params = {})
431
+
432
+ def append_subobj(root, objset, opts)
433
+
434
+ if objset.find{ |o| root.equal?(o) }.nil?
435
+ objset << root unless opts[:only_keys]
436
+
437
+ if root.is_a?(Dictionary)
438
+ root.each_pair { |name, value|
439
+ objset << name if opts[:only_keys]
440
+
441
+ append_subobj(name, objset, opts) if opts[:include_keys] and not opts[:only_keys]
442
+ append_subobj(value, objset, opts)
443
+ }
444
+ elsif root.is_a?(Array) or (root.is_a?(ObjectStream) and opts[:include_objectstreams])
445
+ root.each { |subobj| append_subobj(subobj, objset, opts) }
446
+ end
447
+ end
448
+ end
449
+
450
+ options =
451
+ {
452
+ :include_objectstreams => true,
453
+ :include_keys => true,
454
+ :only_keys => false
455
+ }
456
+ options.update(params)
457
+
458
+ options[:include_keys] |= options[:only_keys]
459
+
460
+ objset = []
461
+ @revisions.each do |revision|
462
+ revision.objects.each do |object|
463
+ append_subobj(object, objset, options)
464
+ end
465
+ end
466
+
467
+ objset
468
+ end
469
+
470
+ #
471
+ # Return an array of indirect objects.
472
+ #
473
+ def indirect_objects
474
+ @revisions.inject([]) do |set, rev| set.concat(rev.objects) end
475
+ end
476
+ alias :root_objects :indirect_objects
477
+
478
+ #
479
+ # Adds a new object to the PDF file.
480
+ # If this object has no version number, then a new one will be automatically computed and assignated to him.
481
+ # It returns a Reference to this Object.
482
+ # _object_:: The object to add.
483
+ #
484
+ def <<(object)
485
+ add_to_revision(object, @revisions.last)
486
+ end
487
+ alias :insert :<<
488
+
489
+ #
490
+ # Adds a new object to a specific revision.
491
+ # If this object has no version number, then a new one will be automatically computed and assignated to him.
492
+ # It returns a Reference to this Object.
493
+ # _object_:: The object to add.
494
+ # _revision_:: The revision to add the object to.
495
+ #
496
+ def add_to_revision(object, revision)
497
+
498
+ object.set_indirect(true)
499
+ object.set_pdf(self)
500
+
501
+ object.no, object.generation = alloc_new_object_number if object.no == 0
502
+
503
+ revision.body[object.reference] = object
504
+
505
+ object.reference
506
+ end
507
+
508
+ #
509
+ # Returns a new number/generation for future object.
510
+ #
511
+ def alloc_new_object_number
512
+ no = 1
513
+
514
+ # Deprecated number allocation policy (first available)
515
+ #no = no + 1 while get_object(no)
516
+
517
+ objset = self.indirect_objects
518
+ self.indirect_objects.find_all{|obj| obj.is_a?(ObjectStream)}.each do |objstm|
519
+ objstm.each{|obj| objset << obj}
520
+ end
521
+
522
+ allocated = objset.collect{|obj| obj.no}.compact
523
+ no = allocated.max + 1 unless allocated.empty?
524
+
525
+ [ no, 0 ]
526
+ end
527
+
528
+ #
529
+ # This method is meant to recompute, verify and correct main PDF structures, in order to output a proper file.
530
+ # * Allocates objects references.
531
+ # * Sets some objects missing required values.
532
+ #
533
+ def compile(options = {})
534
+
535
+ #
536
+ # A valid document must have at least one page.
537
+ #
538
+ append_page if pages.empty?
539
+
540
+ #
541
+ # Allocates object numbers and creates references.
542
+ # Invokes object finalization methods.
543
+ #
544
+ if self.is_a?(Encryption::EncryptedDocument)
545
+ physicalize(options)
546
+ else
547
+ physicalize
548
+ end
549
+
550
+ #
551
+ # Sets the PDF version header.
552
+ #
553
+ version, level = version_required
554
+ @header.majorversion = version[0,1].to_i
555
+ @header.minorversion = version[2,1].to_i
556
+
557
+ set_extension_level(version, level) if level > 0
558
+
559
+ self
560
+ end
561
+
562
+ #
563
+ # Returns the final binary representation of the current document.
564
+ # _rebuildxrefs_:: Computes xrefs while writing objects (default true).
565
+ # _obfuscate_:: Do some basic syntactic object obfuscation.
566
+ #
567
+ def to_bin(params = {})
568
+
569
+ has_objstm = self.indirect_objects.any?{|obj| obj.is_a?(ObjectStream)}
570
+
571
+ options =
572
+ {
573
+ :rebuildxrefs => true,
574
+ :noindent => false,
575
+ :obfuscate => false,
576
+ :use_xrefstm => has_objstm,
577
+ :use_xreftable => (not has_objstm),
578
+ :up_to_revision => @revisions.size
579
+ }
580
+ options.update(params)
581
+
582
+ options[:up_to_revision] = @revisions.size if options[:up_to_revision] > @revisions.size
583
+
584
+ # Reset to default params if no xrefs are chosen (hybrid files not supported yet)
585
+ if options[:use_xrefstm] == options[:use_xreftable]
586
+ options[:use_xrefstm] = has_objstm
587
+ options[:use_xreftable] = (not has_objstm)
588
+ end
589
+
590
+ # Get trailer dictionary
591
+ trailer_info = get_trailer_info
592
+ if trailer_info.nil?
593
+ raise InvalidPDFError, "No trailer information found"
594
+ end
595
+ trailer_dict = trailer_info.dictionary
596
+
597
+ prev_xref_offset = nil
598
+ xrefstm_offset = nil
599
+ xreftable_offset = nil
600
+
601
+ # Header
602
+ bin = ""
603
+ bin << @header.to_s
604
+
605
+ # For each revision
606
+ @revisions[0, options[:up_to_revision]].each do |rev|
607
+
608
+ # Create xref table/stream.
609
+ if options[:rebuildxrefs] == true
610
+ lastno_table, lastno_stm = 0, 0
611
+ brange_table, brange_stm = 0, 0
612
+
613
+ xrefs_stm = [ XRef.new(0, 0, XRef::FREE) ]
614
+ xrefs_table = [ XRef.new(0, XRef::FIRSTFREE, XRef::FREE) ]
615
+
616
+ if options[:use_xreftable] == true
617
+ xrefsection = XRef::Section.new
618
+ end
619
+
620
+ if options[:use_xrefstm] == true
621
+ xrefstm = rev.xrefstm || XRefStream.new
622
+ if xrefstm == rev.xrefstm
623
+ xrefstm.clear
624
+ else
625
+ add_to_revision(xrefstm, rev)
626
+ end
627
+ end
628
+ end
629
+
630
+ objset = rev.objects
631
+
632
+ objset.find_all{|obj| obj.is_a?(ObjectStream)}.each do |objstm|
633
+ objset |= objstm.objects
634
+ end if options[:rebuildxrefs] == true and options[:use_xrefstm] == true
635
+
636
+ # For each object, in number order
637
+ objset.sort.each do |obj|
638
+
639
+ # Create xref entry.
640
+ if options[:rebuildxrefs] == true
641
+
642
+ # Adding subsections if needed
643
+ if options[:use_xreftable] and (obj.no - lastno_table).abs > 1
644
+ xrefsection << XRef::Subsection.new(brange_table, xrefs_table)
645
+
646
+ xrefs_table.clear
647
+ brange_table = obj.no
648
+ end
649
+ if options[:use_xrefstm] and (obj.no - lastno_stm).abs > 1
650
+ xrefs_stm.each do |xref| xrefstm << xref end
651
+ xrefstm.Index ||= []
652
+ xrefstm.Index << brange_stm << xrefs_stm.length
653
+
654
+ xrefs_stm.clear
655
+ brange_stm = obj.no
656
+ end
657
+
658
+ # Process embedded objects
659
+ if options[:use_xrefstm] and obj.parent != obj and obj.parent.is_a?(ObjectStream)
660
+ index = obj.parent.index(obj.no)
661
+
662
+ xrefs_stm << XRefToCompressedObj.new(obj.parent.no, index)
663
+
664
+ lastno_stm = obj.no
665
+ else
666
+ xrefs_stm << XRef.new(bin.size, obj.generation, XRef::USED)
667
+ xrefs_table << XRef.new(bin.size, obj.generation, XRef::USED)
668
+
669
+ lastno_table = lastno_stm = obj.no
670
+ end
671
+
672
+ end
673
+
674
+ if obj.parent == obj or not obj.parent.is_a?(ObjectStream)
675
+
676
+ # Finalize XRefStm
677
+ if options[:rebuildxrefs] == true and options[:use_xrefstm] == true and obj == xrefstm
678
+ xrefstm_offset = bin.size
679
+
680
+ xrefs_stm.each do |xref| xrefstm << xref end
681
+
682
+ xrefstm.W = [ 1, (xrefstm_offset.to_s(2).size + 7) >> 3, 2 ]
683
+ if xrefstm.DecodeParms.is_a?(Dictionary) and xrefstm.DecodeParms.has_key?(:Columns)
684
+ xrefstm.DecodeParms[:Columns] = xrefstm.W[0] + xrefstm.W[1] + xrefstm.W[2]
685
+ end
686
+
687
+ xrefstm.Index ||= []
688
+ xrefstm.Index << brange_stm << xrefs_stm.size
689
+
690
+ xrefstm.dictionary = xrefstm.dictionary.merge(trailer_dict)
691
+ xrefstm.Prev = prev_xref_offset
692
+ rev.trailer.dictionary = nil
693
+
694
+ add_to_revision(xrefstm, rev)
695
+
696
+ xrefstm.pre_build
697
+ xrefstm.post_build
698
+ end
699
+
700
+ # Output object code
701
+ if (obj.is_a?(Dictionary) or obj.is_a?(Stream)) and options[:noindent]
702
+ bin << obj.to_s(0)
703
+ else
704
+ bin << obj.to_s
705
+ end
706
+ end
707
+ end
708
+
709
+ rev.trailer ||= Trailer.new
710
+
711
+ # XRef table
712
+ if options[:rebuildxrefs] == true
713
+
714
+ if options[:use_xreftable] == true
715
+ table_offset = bin.size
716
+
717
+ xrefsection << XRef::Subsection.new(brange_table, xrefs_table)
718
+ rev.xreftable = xrefsection
719
+
720
+ rev.trailer.dictionary = trailer_dict
721
+ rev.trailer.Size = objset.size + 1
722
+ rev.trailer.Prev = prev_xref_offset
723
+
724
+ rev.trailer.XRefStm = xrefstm_offset if options[:use_xrefstm] == true
725
+ end
726
+
727
+ startxref = options[:use_xreftable] == true ? table_offset : xrefstm_offset
728
+ rev.trailer.startxref = prev_xref_offset = startxref
729
+
730
+ end # end each rev
731
+
732
+ # Trailer
733
+ bin << rev.xreftable.to_s if options[:use_xreftable] == true
734
+ bin << (options[:obfuscate] == true ? rev.trailer.to_obfuscated_str : rev.trailer.to_s)
735
+
736
+ end
737
+
738
+ bin
739
+ end
740
+
741
+ #
742
+ # Compute and update XRef::Section for each Revision.
743
+ #
744
+ def rebuildxrefs
745
+
746
+ size = 0
747
+ startxref = @header.to_s.size
748
+
749
+ @revisions.each do |revision|
750
+
751
+ revision.objects.each do |object|
752
+ startxref += object.to_s.size
753
+ end
754
+
755
+ size += revision.body.size
756
+ revision.xreftable = buildxrefs(revision.objects)
757
+
758
+ revision.trailer ||= Trailer.new
759
+ revision.trailer.Size = size + 1
760
+ revision.trailer.startxref = startxref
761
+
762
+ startxref += revision.xreftable.to_s.size + revision.trailer.to_s.size
763
+ end
764
+
765
+ self
766
+ end
767
+
768
+ #
769
+ # Ends the current Revision, and starts a new one.
770
+ #
771
+ def add_new_revision
772
+
773
+ root = @revisions.last.trailer[:Root] unless @revisions.empty?
774
+
775
+ @revisions << Revision.new(self)
776
+ @revisions.last.trailer = Trailer.new
777
+ @revisions.last.trailer.Root = root
778
+
779
+ self
780
+ end
781
+
782
+ #
783
+ # Removes a whole document revision.
784
+ # _index_:: Revision index, first is 0.
785
+ #
786
+ def remove_revision(index)
787
+ if index < 0 or index > @revisions.size
788
+ raise IndexError, "Not a valid revision index"
789
+ end
790
+
791
+ if @revisions.size == 1
792
+ raise InvalidPDFError, "Cannot remove last revision"
793
+ end
794
+
795
+ @revisions.delete_at(index)
796
+ self
797
+ end
798
+
799
+ #
800
+ # Looking for an object present at a specified file offset.
801
+ #
802
+ def get_object_by_offset(offset) #:nodoc:
803
+ self.indirect_objects.find { |obj| obj.file_offset == offset }
804
+ end
805
+
806
+ #
807
+ # Remove an object.
808
+ #
809
+ def delete_object(no, generation = 0)
810
+
811
+ case no
812
+ when Reference
813
+ target = no
814
+ when ::Integer
815
+ target = Reference.new(no, generation)
816
+ else
817
+ raise TypeError, "Invalid parameter type : #{no.class}"
818
+ end
819
+
820
+ @revisions.each do |rev|
821
+ rev.body.delete(target)
822
+ end
823
+
824
+ end
825
+
826
+ #
827
+ # Search for an indirect object in the document.
828
+ # _no_:: Reference or number of the object.
829
+ # _generation_:: Object generation.
830
+ #
831
+ def get_object(no, generation = 0, use_xrefstm = true) #:nodoc:
832
+ case no
833
+ when Reference
834
+ target = no
835
+ when ::Integer
836
+ target = Reference.new(no, generation)
837
+ when Origami::Object
838
+ return no
839
+ else
840
+ raise TypeError, "Invalid parameter type : #{no.class}"
841
+ end
842
+
843
+ set = indirect_objects_table
844
+
845
+ #
846
+ # Search through accessible indirect objects.
847
+ #
848
+ if set.include?(target)
849
+ set[target]
850
+ elsif use_xrefstm == true
851
+ # Look into XRef streams.
852
+
853
+ if @revisions.last.has_xrefstm?
854
+ xrefstm = @revisions.last.xrefstm
855
+
856
+ done = []
857
+ while xrefstm.is_a?(XRefStream) and not done.include?(xrefstm)
858
+ xref = xrefstm.find(target.refno)
859
+
860
+ #
861
+ # We found a matching XRef.
862
+ #
863
+ if xref.is_a?(XRefToCompressedObj)
864
+ objstm = get_object(xref.objstmno, 0, false)
865
+
866
+ object = objstm.extract_by_index(xref.index)
867
+ if object.is_a?(Origami::Object) and object.no == target.refno
868
+ return object
869
+ else
870
+ return objstm.extract(target.refno)
871
+ end
872
+ elsif xrefstm.has_field?(:Prev)
873
+ done << xrefstm
874
+ xrefstm = get_object_by_offset(xrefstm.Prev)
875
+ else
876
+ break
877
+ end
878
+ end
879
+ end
880
+
881
+ #
882
+ # Lastly search directly into Object streams (might be very slow).
883
+ #
884
+ stream = set.values.find_all{|obj| obj.is_a?(ObjectStream)}.find do |objstm| objstm.include?(target.refno) end
885
+ stream && stream.extract(target.refno)
886
+ end
887
+
888
+ end
889
+
890
+ alias :[] :get_object
891
+
892
+ #
893
+ # Converts a logical PDF view into a physical view ready for writing.
894
+ #
895
+ def physicalize
896
+
897
+ #
898
+ # Indirect objects are added to the revision and assigned numbers.
899
+ #
900
+ def build(obj, revision) #:nodoc:
901
+
902
+ #
903
+ # Finalize any subobjects before building the stream.
904
+ #
905
+ if obj.is_a?(ObjectStream)
906
+ obj.each do |subobj|
907
+ build(subobj, revision)
908
+ end
909
+ end
910
+
911
+ obj.pre_build
912
+
913
+ if obj.is_a?(Dictionary) or obj.is_a?(Array)
914
+
915
+ obj.map! do |subobj|
916
+ if subobj.is_indirect?
917
+ if get_object(subobj.reference)
918
+ subobj.reference
919
+ else
920
+ ref = add_to_revision(subobj, revision)
921
+ build(subobj, revision)
922
+ ref
923
+ end
924
+ else
925
+ subobj
926
+ end
927
+ end
928
+
929
+ obj.each do |subobj|
930
+ build(subobj, revision)
931
+ end
932
+
933
+ elsif obj.is_a?(Stream)
934
+ build(obj.dictionary, revision)
935
+ end
936
+
937
+ obj.post_build
938
+
939
+ end
940
+
941
+ indirect_objects_by_rev.each do |obj, revision|
942
+ build(obj, revision)
943
+ end
944
+
945
+ self
946
+ end
947
+
948
+ #
949
+ # Cleans the document from its references.
950
+ # Indirects objects are made direct whenever possible.
951
+ # TODO: Circuit-checking to avoid infinite induction
952
+ #
953
+ def logicalize #:nodoc:
954
+
955
+ fail "Not yet supported"
956
+
957
+ processed = []
958
+
959
+ def convert(root) #:nodoc:
960
+
961
+ replaced = []
962
+ if root.is_a?(Dictionary) or root.is_a?(Array)
963
+
964
+ root.each { |obj|
965
+ convert(obj)
966
+ }
967
+
968
+ root.map! { |obj|
969
+ if obj.is_a?(Reference)
970
+ target = obj.solve
971
+ # Streams can't be direct objects
972
+ if target.is_a?(Stream)
973
+ obj
974
+ else
975
+ replaced << obj
976
+ target
977
+ end
978
+ else
979
+ obj
980
+ end
981
+ }
982
+
983
+ end
984
+
985
+ replaced
986
+ end
987
+
988
+ @revisions.each do |revision|
989
+ revision.objects.each do |obj|
990
+ processed.concat(convert(obj))
991
+ end
992
+ end
993
+
994
+ end
995
+
996
+ ##########################
997
+ private
998
+ ##########################
999
+
1000
+ #
1001
+ # Instanciates basic structures required for a valid PDF file.
1002
+ #
1003
+ def init
1004
+ catalog = (self.Catalog = (get_doc_attr(:Root) || Catalog.new))
1005
+ catalog.Pages = PageTreeNode.new.set_indirect(true)
1006
+ @revisions.last.trailer.Root = catalog.reference
1007
+
1008
+ self
1009
+ end
1010
+
1011
+ def version_required #:nodoc:
1012
+
1013
+ max = [ 1.0, 0 ]
1014
+ @revisions.each { |revision|
1015
+ revision.objects.each { |object|
1016
+ current = object.pdf_version_required
1017
+ max = current if (current <=> max) > 0
1018
+ }
1019
+ }
1020
+ max[0] = max[0].to_s
1021
+
1022
+ max
1023
+ end
1024
+
1025
+ def indirect_objects_table #:nodoc:
1026
+ @revisions.inject({}) do |set, rev| set.merge(rev.body) end
1027
+ end
1028
+
1029
+ def indirect_objects_by_rev #:nodoc:
1030
+ @revisions.inject([]) do |set,rev|
1031
+ objset = rev.objects
1032
+ set.concat(objset.zip(::Array.new(objset.length, rev)))
1033
+ end
1034
+ end
1035
+
1036
+ #
1037
+ # Compute and update XRef::Section for each Revision.
1038
+ #
1039
+ def rebuild_dummy_xrefs #:nodoc
1040
+
1041
+ def build_dummy_xrefs(objects)
1042
+
1043
+ lastno = 0
1044
+ brange = 0
1045
+
1046
+ xrefs = [ XRef.new(0, XRef::FIRSTFREE, XRef::FREE) ]
1047
+
1048
+ xrefsection = XRef::Section.new
1049
+ objects.sort.each { |object|
1050
+ if (object.no - lastno).abs > 1
1051
+ xrefsection << XRef::Subsection.new(brange, xrefs)
1052
+ brange = object.no
1053
+ xrefs.clear
1054
+ end
1055
+
1056
+ xrefs << XRef.new(0, 0, XRef::FREE)
1057
+
1058
+ lastno = object.no
1059
+ }
1060
+
1061
+ xrefsection << XRef::Subsection.new(brange, xrefs)
1062
+
1063
+ xrefsection
1064
+ end
1065
+
1066
+ size = 0
1067
+ startxref = @header.to_s.size
1068
+
1069
+ @revisions.each do |revision|
1070
+ revision.objects.each do |object|
1071
+ startxref += object.to_s.size
1072
+ end
1073
+
1074
+ size += revision.body.size
1075
+ revision.xreftable = build_dummy_xrefs(revision.objects)
1076
+
1077
+ revision.trailer ||= Trailer.new
1078
+ revision.trailer.Size = size + 1
1079
+ revision.trailer.startxref = startxref
1080
+
1081
+ startxref += revision.xreftable.to_s.size + revision.trailer.to_s.size
1082
+ end
1083
+
1084
+ self
1085
+ end
1086
+
1087
+ #
1088
+ # Build a xref section from a set of objects.
1089
+ #
1090
+ def buildxrefs(objects) #:nodoc:
1091
+
1092
+ lastno = 0
1093
+ brange = 0
1094
+
1095
+ xrefs = [ XRef.new(0, XRef::FIRSTFREE, XRef::FREE) ]
1096
+
1097
+ xrefsection = XRef::Section.new
1098
+ objects.sort.each { |object|
1099
+ if (object.no - lastno).abs > 1
1100
+ xrefsection << XRef::Subsection.new(brange, xrefs)
1101
+ brange = object.no
1102
+ xrefs.clear
1103
+ end
1104
+
1105
+ xrefs << XRef.new(get_object_offset(object.no, object.generation), object.generation, XRef::USED)
1106
+
1107
+ lastno = object.no
1108
+ }
1109
+
1110
+ xrefsection << XRef::Subsection.new(brange, xrefs)
1111
+
1112
+ xrefsection
1113
+ end
1114
+
1115
+ def delete_revision(ngen) #:nodoc:
1116
+ @revisions.delete_at[ngen]
1117
+ end
1118
+
1119
+ def get_revision(ngen) #:nodoc:
1120
+ @revisions[ngen].body
1121
+ end
1122
+
1123
+ def get_object_offset(no,generation) #:nodoc:
1124
+ objectoffset = @header.to_s.size
1125
+
1126
+ @revisions.each do |revision|
1127
+ revision.objects.sort.each do |object|
1128
+ if object.no == no and object.generation == generation then return objectoffset
1129
+ else
1130
+ objectoffset += object.to_s.size
1131
+ end
1132
+ end
1133
+
1134
+ objectoffset += revision.xreftable.to_s.size
1135
+ objectoffset += revision.trailer.to_s.size
1136
+ end
1137
+
1138
+ nil
1139
+ end
1140
+
1141
+ end
1142
+
1143
+ end
1144
+