origami 2.0.0 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (80) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -0
  3. data/bin/gui/config.rb +2 -1
  4. data/bin/gui/file.rb +118 -240
  5. data/bin/gui/gtkhex.rb +5 -5
  6. data/bin/gui/hexview.rb +20 -16
  7. data/bin/gui/imgview.rb +1 -1
  8. data/bin/gui/menu.rb +138 -158
  9. data/bin/gui/properties.rb +46 -48
  10. data/bin/gui/signing.rb +183 -214
  11. data/bin/gui/textview.rb +1 -1
  12. data/bin/gui/treeview.rb +13 -7
  13. data/bin/gui/walker.rb +102 -71
  14. data/bin/gui/xrefs.rb +1 -1
  15. data/bin/pdf2ruby +3 -3
  16. data/bin/pdfcop +18 -11
  17. data/bin/pdfextract +14 -5
  18. data/bin/pdfmetadata +3 -3
  19. data/bin/shell/console.rb +8 -8
  20. data/bin/shell/hexdump.rb +4 -4
  21. data/examples/attachments/nested_document.rb +1 -1
  22. data/examples/javascript/hello_world.rb +3 -3
  23. data/lib/origami.rb +0 -1
  24. data/lib/origami/acroform.rb +3 -3
  25. data/lib/origami/array.rb +1 -3
  26. data/lib/origami/boolean.rb +1 -3
  27. data/lib/origami/catalog.rb +3 -9
  28. data/lib/origami/destinations.rb +2 -2
  29. data/lib/origami/dictionary.rb +15 -29
  30. data/lib/origami/encryption.rb +334 -692
  31. data/lib/origami/extensions/fdf.rb +3 -2
  32. data/lib/origami/extensions/ppklite.rb +5 -9
  33. data/lib/origami/filespec.rb +2 -2
  34. data/lib/origami/filters.rb +54 -36
  35. data/lib/origami/filters/ascii.rb +67 -49
  36. data/lib/origami/filters/ccitt.rb +4 -236
  37. data/lib/origami/filters/ccitt/tables.rb +267 -0
  38. data/lib/origami/filters/crypt.rb +1 -1
  39. data/lib/origami/filters/dct.rb +0 -1
  40. data/lib/origami/filters/flate.rb +3 -43
  41. data/lib/origami/filters/lzw.rb +62 -99
  42. data/lib/origami/filters/predictors.rb +135 -105
  43. data/lib/origami/filters/runlength.rb +34 -22
  44. data/lib/origami/graphics.rb +2 -2
  45. data/lib/origami/graphics/colors.rb +89 -63
  46. data/lib/origami/graphics/path.rb +14 -14
  47. data/lib/origami/graphics/patterns.rb +31 -33
  48. data/lib/origami/graphics/render.rb +0 -1
  49. data/lib/origami/graphics/state.rb +9 -9
  50. data/lib/origami/graphics/text.rb +17 -17
  51. data/lib/origami/graphics/xobject.rb +102 -92
  52. data/lib/origami/javascript.rb +91 -68
  53. data/lib/origami/linearization.rb +22 -20
  54. data/lib/origami/metadata.rb +1 -1
  55. data/lib/origami/name.rb +1 -3
  56. data/lib/origami/null.rb +1 -3
  57. data/lib/origami/numeric.rb +3 -13
  58. data/lib/origami/object.rb +100 -72
  59. data/lib/origami/page.rb +24 -28
  60. data/lib/origami/parser.rb +34 -51
  61. data/lib/origami/parsers/fdf.rb +2 -2
  62. data/lib/origami/parsers/pdf.rb +41 -18
  63. data/lib/origami/parsers/pdf/lazy.rb +83 -46
  64. data/lib/origami/parsers/pdf/linear.rb +19 -10
  65. data/lib/origami/parsers/ppklite.rb +1 -1
  66. data/lib/origami/pdf.rb +150 -206
  67. data/lib/origami/reference.rb +4 -6
  68. data/lib/origami/signature.rb +76 -48
  69. data/lib/origami/stream.rb +69 -63
  70. data/lib/origami/string.rb +2 -19
  71. data/lib/origami/trailer.rb +25 -22
  72. data/lib/origami/version.rb +1 -1
  73. data/lib/origami/xfa.rb +6 -4
  74. data/lib/origami/xreftable.rb +29 -29
  75. data/test/test_annotations.rb +16 -38
  76. data/test/test_pdf_attachment.rb +1 -1
  77. data/test/test_pdf_parse.rb +1 -1
  78. data/test/test_xrefs.rb +2 -2
  79. metadata +4 -4
  80. data/lib/origami/export.rb +0 -247
@@ -54,16 +54,7 @@ module Origami
54
54
  trailer = parse_trailer
55
55
  pdf.revisions.last.trailer = trailer
56
56
 
57
- if trailer.startxref != 0
58
- xrefstm = pdf.get_object_by_offset(trailer.startxref)
59
- elsif trailer[:XRefStm].is_a?(Integer)
60
- xrefstm = pdf.get_object_by_offset(trailer[:XRefStm])
61
- end
62
-
63
- if xrefstm.is_a?(XRefStream)
64
- warn "Found a XRefStream for this revision at #{xrefstm.reference}"
65
- pdf.revisions.last.xrefstm = xrefstm
66
- end
57
+ locate_xref_stream(pdf, pdf.revisions.last)
67
58
 
68
59
  rescue
69
60
  error "Cannot read : " + (@data.peek(10) + "...").inspect
@@ -77,6 +68,24 @@ module Origami
77
68
 
78
69
  parse_finalize(pdf)
79
70
  end
71
+
72
+ private
73
+
74
+ def locate_xref_stream(pdf, revision)
75
+ trailer = revision.trailer
76
+
77
+ # Try to match the location of the last startxref / XRefStm with an XRefStream.
78
+ if trailer.startxref != 0
79
+ xrefstm = pdf.get_object_by_offset(trailer.startxref)
80
+ else
81
+ xrefstm = pdf.get_object_by_offset(trailer[:XRefStm])
82
+ end
83
+
84
+ if xrefstm.is_a?(XRefStream)
85
+ warn "Found a XRefStream for this revision at #{xrefstm.reference}"
86
+ revision.xrefstm = xrefstm
87
+ end
88
+ end
80
89
  end
81
90
  end
82
91
 
@@ -44,7 +44,7 @@ module Origami
44
44
  trailer = address_book.revisions.first.trailer
45
45
 
46
46
  if trailer[:Root].is_a?(Reference)
47
- address_book.cast_object(trailer[:Root], PPKLite::Catalog, self)
47
+ address_book.cast_object(trailer[:Root], PPKLite::Catalog)
48
48
  end
49
49
 
50
50
  propagate_types(address_book)
@@ -45,8 +45,6 @@ require 'origami/actions'
45
45
  require 'origami/3d'
46
46
  require 'origami/signature'
47
47
  require 'origami/webcapture'
48
- require 'origami/export'
49
- require 'origami/webcapture'
50
48
  require 'origami/encryption'
51
49
  require 'origami/linearization'
52
50
  require 'origami/obfuscation'
@@ -147,15 +145,6 @@ module Origami
147
145
  pdf.save(output, options)
148
146
  end
149
147
  alias write create
150
-
151
- #
152
- # Deserializes a PDF dump.
153
- #
154
- def deserialize(filename)
155
- Zlib::GzipReader.open(filename) { |gz|
156
- return Marshal.load(gz.read)
157
- }
158
- end
159
148
  end
160
149
 
161
150
  #
@@ -166,16 +155,13 @@ module Origami
166
155
  def initialize(parser = nil)
167
156
  @header = PDF::Header.new
168
157
  @revisions = []
158
+ @parser = parser
159
+ @loaded = false
169
160
 
170
161
  add_new_revision
171
162
  @revisions.first.trailer = Trailer.new
172
163
 
173
- if parser
174
- @loaded = false
175
- @parser = parser
176
- else
177
- init
178
- end
164
+ init if parser.nil?
179
165
  end
180
166
 
181
167
  #
@@ -199,21 +185,6 @@ module Origami
199
185
  @parser.target_data if @parser
200
186
  end
201
187
 
202
- #
203
- # Serializes the current PDF.
204
- #
205
- def serialize(filename)
206
- parser = @parser
207
- @parser = nil # do not serialize the parser
208
-
209
- Zlib::GzipWriter.open(filename) { |gz|
210
- gz.write Marshal.dump(self)
211
- }
212
-
213
- @parser = parser
214
- self
215
- end
216
-
217
188
  #
218
189
  # Saves the current document.
219
190
  # _filename_:: The path where to save this PDF.
@@ -277,35 +248,9 @@ module Origami
277
248
 
278
249
  result = []
279
250
 
280
- search_object = -> (object) do
281
- case object
282
- when Stream
283
- result.concat object.dictionary.strings_cache.select{|str| pattern === str}
284
- result.concat object.dictionary.names_cache.select{|name| pattern === name.value}
285
-
286
- begin
287
- result.push object if streams and object.data.match(pattern)
288
- rescue Filter::Error
289
- next # Skip object if a decoding error occured.
290
- end
291
-
292
- next if object.is_a?(ObjectStream) and not object_streams
293
-
294
- object.each do |subobject|
295
- search_object.call(subobject)
296
- end
297
-
298
- when Name, String
299
- result.push object if object.value.match(pattern)
300
-
301
- when Dictionary, Array then
302
- result.concat object.strings_cache.select{|str| pattern === str}
303
- result.concat object.names_cache.select{|name| pattern === name.value}
304
- end
305
- end
306
-
307
251
  self.indirect_objects.each do |object|
308
- search_object.call(object)
252
+ result.concat search_object(object, pattern,
253
+ streams: streams, object_streams: object_streams)
309
254
  end
310
255
 
311
256
  result
@@ -329,42 +274,22 @@ module Origami
329
274
  # _compressed_: iterates over the objects inside object streams.
330
275
  # _recursive_: iterates recursively inside objects like arrays and dictionaries.
331
276
  #
332
- def each_object(compressed: false, recursive: false)
277
+ def each_object(compressed: false, recursive: false, &block)
333
278
  return enum_for(__method__, compressed: compressed,
334
279
  recursive: recursive
335
280
  ) unless block_given?
336
281
 
337
- walk_object = -> (object) do
338
- case object
339
- when Dictionary
340
- object.each_value do |value|
341
- yield(value)
342
- walk_object.call(value)
343
- end
344
-
345
- when Array
346
- object.each do |child|
347
- yield(child)
348
- walk_object.call(child)
349
- end
350
-
351
- when Stream
352
- yield(object.dictionary)
353
- walk_object.call(object.dictionary)
354
- end
355
- end
356
-
357
282
  @revisions.each do |revision|
358
283
  revision.each_object do |object|
359
- yield(object)
284
+ block.call(object)
360
285
 
361
- walk_object.call(object) if recursive
286
+ walk_object(object, &block) if recursive
362
287
 
363
288
  if object.is_a?(ObjectStream) and compressed
364
289
  object.each do |child_obj|
365
- yield(child_obj)
290
+ block.call(child_obj)
366
291
 
367
- walk_object.call(child_obj) if recursive
292
+ walk_object(child_obj) if recursive
368
293
  end
369
294
  end
370
295
  end
@@ -539,7 +464,7 @@ module Origami
539
464
  #
540
465
  # We found a matching XRef.
541
466
  #
542
- if xref.is_a?(XRefToCompressedObj)
467
+ if xref.is_a?(XRefToCompressedObject)
543
468
  objstm = get_object(xref.objstmno, 0, use_xrefstm: use_xrefstm)
544
469
 
545
470
  object = objstm.extract_by_index(xref.index)
@@ -570,35 +495,35 @@ module Origami
570
495
  # Casts a PDF object into another object type.
571
496
  # The target type must be a subtype of the original type.
572
497
  #
573
- def cast_object(reference, type, parser = nil) #:nodoc:
498
+ def cast_object(reference, type) #:nodoc:
574
499
  @revisions.each do |rev|
575
- if rev.body.include?(reference) and type < rev.body[reference].class
576
- rev.body[reference] = rev.body[reference].cast_to(type, parser)
500
+ if rev.body.include?(reference)
501
+ object = rev.body[reference]
502
+ return object if object.is_a?(type)
577
503
 
578
- rev.body[reference]
579
- else
580
- nil
504
+ if type < rev.body[reference].class
505
+ rev.body[reference] = object.cast_to(type, @parser)
506
+
507
+ return rev.body[reference]
508
+ end
581
509
  end
582
510
  end
511
+
512
+ nil
583
513
  end
584
514
 
585
515
  #
586
516
  # Returns a new number/generation for future object.
587
517
  #
588
518
  def allocate_new_object_number
589
- no = 1
590
519
 
591
- # Deprecated number allocation policy (first available)
592
- #no = no + 1 while get_object(no)
593
-
594
- objset = self.indirect_objects
595
- self.indirect_objects.find_all{|obj| obj.is_a?(ObjectStream)}.each do |objstm|
596
- objstm.each{|obj| objset << obj}
520
+ last_object = self.each_object(compressed: true).max_by {|object| object.no }
521
+ if last_object.nil?
522
+ no = 1
523
+ else
524
+ no = last_object.no + 1
597
525
  end
598
526
 
599
- allocated = objset.collect{|obj| obj.no}.compact
600
- no = allocated.max + 1 unless allocated.empty?
601
-
602
527
  [ no, 0 ]
603
528
  end
604
529
 
@@ -614,6 +539,70 @@ module Origami
614
539
  private
615
540
  ##########################
616
541
 
542
+ #
543
+ # Iterates over the children of an object, avoiding cycles.
544
+ #
545
+ def walk_object(object, excludes: [])
546
+ return enum_for(__method__, object, excludes: excludes) unless block_given?
547
+
548
+ return if excludes.include?(object)
549
+ excludes.push(object)
550
+
551
+ case object
552
+ when Dictionary
553
+ object.each_value do |value|
554
+ yield(value)
555
+ walk_object(value, excludes: excludes)
556
+ end
557
+
558
+ when Array
559
+ object.each do |child|
560
+ yield(child)
561
+ walk_object(child, excludes: excludes)
562
+ end
563
+
564
+ when Stream
565
+ yield(object.dictionary)
566
+ walk_object(object.dictionary, excludes: excludes)
567
+ end
568
+ end
569
+
570
+ #
571
+ # Searches through an object, possibly going into object streams.
572
+ # Returns an array of matching strings, names and streams.
573
+ #
574
+ def search_object(object, pattern, streams: true, object_streams: true)
575
+ result = []
576
+
577
+ case object
578
+ when Stream
579
+ result.concat object.dictionary.strings_cache.select{|str| pattern === str}
580
+ result.concat object.dictionary.names_cache.select{|name| pattern === name.value}
581
+
582
+ begin
583
+ result.push object if streams and object.data.match(pattern)
584
+ rescue Filter::Error
585
+ return result # Skip object if a decoding error occured.
586
+ end
587
+
588
+ return result unless object.is_a?(ObjectStream) and object_streams
589
+
590
+ object.each do |child|
591
+ result.concat search_object(child, pattern,
592
+ streams: streams, object_streams: object_streams)
593
+ end
594
+
595
+ when Name, String
596
+ result.push object if object.value.match(pattern)
597
+
598
+ when Dictionary, Array
599
+ result.concat object.strings_cache.select{|str| pattern === str}
600
+ result.concat object.names_cache.select{|name| pattern === name.value}
601
+ end
602
+
603
+ result
604
+ end
605
+
617
606
  #
618
607
  # Load an object from its given file offset.
619
608
  # The document must have an associated Parser.
@@ -627,19 +616,7 @@ module Origami
627
616
  return nil if object.nil?
628
617
 
629
618
  if self.is_a?(Encryption::EncryptedDocument)
630
- case object
631
- when String
632
- object.extend(Encryption::EncryptedString)
633
- object.decrypted = false
634
- when Stream
635
- object.extend(Encryption::EncryptedStream)
636
- object.decrypted = false
637
- when Dictionary, Array
638
- object.strings_cache.each do |string|
639
- string.extend(Encryption::EncryptedString)
640
- string.decrypted = false
641
- end
642
- end
619
+ make_encrypted_object(object)
643
620
  end
644
621
 
645
622
  add_to_revision(object, revision)
@@ -650,6 +627,22 @@ module Origami
650
627
  object
651
628
  end
652
629
 
630
+ #
631
+ # Method called on encrypted objects loaded into the document.
632
+ #
633
+ def make_encrypted_object(object)
634
+ case object
635
+ when String
636
+ object.extend(Encryption::EncryptedString)
637
+ when Stream
638
+ object.extend(Encryption::EncryptedStream)
639
+ when Dictionary, Array
640
+ object.strings_cache.each do |string|
641
+ string.extend(Encryption::EncryptedString)
642
+ end
643
+ end
644
+ end
645
+
653
646
  #
654
647
  # Force the loading of all objects in the document.
655
648
  #
@@ -665,8 +658,8 @@ module Origami
665
658
  next
666
659
  end
667
660
 
668
- xrefs.each_with_number do |_, no|
669
- self.get_object(no)
661
+ xrefs.each_with_number do |xref, no|
662
+ self.get_object(no) unless xref.free?
670
663
  end
671
664
  end
672
665
 
@@ -716,11 +709,7 @@ module Origami
716
709
  # Allocates object numbers and creates references.
717
710
  # Invokes object finalization methods.
718
711
  #
719
- if self.is_a?(Encryption::EncryptedDocument)
720
- physicalize(options)
721
- else
722
- physicalize
723
- end
712
+ physicalize(options)
724
713
 
725
714
  #
726
715
  # Sets the PDF version header.
@@ -735,100 +724,58 @@ module Origami
735
724
  end
736
725
 
737
726
  #
738
- # Cleans the document from its references.
739
- # Indirects objects are made direct whenever possible.
740
- # TODO: Circuit-checking to avoid infinite induction
727
+ # Converts a logical PDF view into a physical view ready for writing.
741
728
  #
742
- def logicalize #:nodoc:
743
- raise NotImplementedError
729
+ def physicalize(options = {})
744
730
 
745
- processed = []
746
-
747
- convert = -> (root) do
748
- replaced = []
749
- if root.is_a?(Dictionary) or root.is_a?(Array)
750
- root.each do |obj|
751
- convert[obj]
752
- end
753
-
754
- root.map! do |obj|
755
- if obj.is_a?(Reference)
756
- target = obj.solve
757
- # Streams can't be direct objects
758
- if target.is_a?(Stream)
759
- obj
760
- else
761
- replaced << obj
762
- target
763
- end
764
- else
765
- obj
766
- end
767
- end
768
- end
769
-
770
- replaced
731
+ indirect_objects_by_rev.each do |obj, revision|
732
+ build_object(obj, revision, options)
771
733
  end
772
734
 
773
- @revisions.each do |revision|
774
- revision.objects.each do |obj|
775
- processed.concat(convert[obj])
776
- end
777
- end
735
+ self
778
736
  end
779
737
 
780
- #
781
- # Converts a logical PDF view into a physical view ready for writing.
782
- #
783
- def physicalize
784
-
785
- #
786
- # Indirect objects are added to the revision and assigned numbers.
787
- #
788
- build = -> (obj, revision) do
789
- #
790
- # Finalize any subobjects before building the stream.
791
- #
792
- if obj.is_a?(ObjectStream)
793
- obj.each do |subobj|
794
- build.call(subobj, revision)
795
- end
738
+ def build_object(object, revision, options)
739
+ # Build any compressed object before building the object stream.
740
+ if object.is_a?(ObjectStream)
741
+ object.each do |compressed_obj|
742
+ build_object(compressed_obj, revision, options)
796
743
  end
744
+ end
797
745
 
798
- obj.pre_build
746
+ object.pre_build
799
747
 
800
- if obj.is_a?(Dictionary) or obj.is_a?(Array)
748
+ case object
749
+ when Stream
750
+ build_object(object.dictionary, revision, options)
751
+ when Dictionary, Array
752
+ build_compound_object(object, revision, options)
753
+ end
801
754
 
802
- obj.map! do |subobj|
803
- if subobj.indirect?
804
- if get_object(subobj.reference)
805
- subobj.reference
806
- else
807
- ref = add_to_revision(subobj, revision)
808
- build.call(subobj, revision)
809
- ref
810
- end
811
- else
812
- subobj
813
- end
814
- end
755
+ object.post_build
756
+ end
815
757
 
816
- obj.each do |subobj|
817
- build.call(subobj, revision)
818
- end
758
+ def build_compound_object(object, revision, options)
759
+ return unless object.is_a?(Dictionary) or object.is_a?(Array)
819
760
 
820
- elsif obj.is_a?(Stream)
821
- build.call(obj.dictionary, revision)
822
- end
761
+ # Flatten the object by adding indirect objects to the revision and
762
+ # replacing them with their reference.
763
+ object.map! do |child|
764
+ next(child) unless child.indirect?
823
765
 
824
- obj.post_build
766
+ if get_object(child.reference)
767
+ child.reference
768
+ else
769
+ reference = add_to_revision(child, revision)
770
+ build_object(child, revision, options)
771
+ reference
772
+ end
825
773
  end
826
774
 
827
- indirect_objects_by_rev.each do |obj, revision|
828
- build.call(obj, revision)
829
- end
830
-
831
- self
775
+ # Finalize all the children objects.
776
+ object.each do |child|
777
+ build_object(child, revision, options)
778
+ end
832
779
  end
833
780
 
834
781
  #
@@ -858,9 +805,7 @@ module Origami
858
805
  end
859
806
 
860
807
  # Get trailer dictionary
861
- trailer_info = get_trailer_info
862
- raise InvalidPDFError, "No trailer information found" if trailer_info.nil?
863
- trailer_dict = trailer_info.dictionary
808
+ trailer_dict = self.trailer.dictionary
864
809
 
865
810
  prev_xref_offset = nil
866
811
  xrefstm_offset = nil
@@ -939,7 +884,7 @@ module Origami
939
884
  if options[:use_xrefstm] and obj.parent != obj and obj.parent.is_a?(ObjectStream)
940
885
  index = obj.parent.index(obj.no)
941
886
 
942
- xrefs_stm << XRefToCompressedObj.new(obj.parent.no, index)
887
+ xrefs_stm << XRefToCompressedObject.new(obj.parent.no, index)
943
888
 
944
889
  lastno_stm = obj.no
945
890
  else
@@ -1022,7 +967,6 @@ module Origami
1022
967
  #
1023
968
  def init
1024
969
  catalog = (self.Catalog = (trailer_key(:Root) || Catalog.new))
1025
- catalog.Pages = PageTreeNode.new.set_indirect(true)
1026
970
  @revisions.last.trailer.Root = catalog.reference
1027
971
 
1028
972
  @loaded = true