origami 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -0
  3. data/bin/gui/config.rb +2 -1
  4. data/bin/gui/file.rb +118 -240
  5. data/bin/gui/gtkhex.rb +5 -5
  6. data/bin/gui/hexview.rb +20 -16
  7. data/bin/gui/imgview.rb +1 -1
  8. data/bin/gui/menu.rb +138 -158
  9. data/bin/gui/properties.rb +46 -48
  10. data/bin/gui/signing.rb +183 -214
  11. data/bin/gui/textview.rb +1 -1
  12. data/bin/gui/treeview.rb +13 -7
  13. data/bin/gui/walker.rb +102 -71
  14. data/bin/gui/xrefs.rb +1 -1
  15. data/bin/pdf2ruby +3 -3
  16. data/bin/pdfcop +18 -11
  17. data/bin/pdfextract +14 -5
  18. data/bin/pdfmetadata +3 -3
  19. data/bin/shell/console.rb +8 -8
  20. data/bin/shell/hexdump.rb +4 -4
  21. data/examples/attachments/nested_document.rb +1 -1
  22. data/examples/javascript/hello_world.rb +3 -3
  23. data/lib/origami.rb +0 -1
  24. data/lib/origami/acroform.rb +3 -3
  25. data/lib/origami/array.rb +1 -3
  26. data/lib/origami/boolean.rb +1 -3
  27. data/lib/origami/catalog.rb +3 -9
  28. data/lib/origami/destinations.rb +2 -2
  29. data/lib/origami/dictionary.rb +15 -29
  30. data/lib/origami/encryption.rb +334 -692
  31. data/lib/origami/extensions/fdf.rb +3 -2
  32. data/lib/origami/extensions/ppklite.rb +5 -9
  33. data/lib/origami/filespec.rb +2 -2
  34. data/lib/origami/filters.rb +54 -36
  35. data/lib/origami/filters/ascii.rb +67 -49
  36. data/lib/origami/filters/ccitt.rb +4 -236
  37. data/lib/origami/filters/ccitt/tables.rb +267 -0
  38. data/lib/origami/filters/crypt.rb +1 -1
  39. data/lib/origami/filters/dct.rb +0 -1
  40. data/lib/origami/filters/flate.rb +3 -43
  41. data/lib/origami/filters/lzw.rb +62 -99
  42. data/lib/origami/filters/predictors.rb +135 -105
  43. data/lib/origami/filters/runlength.rb +34 -22
  44. data/lib/origami/graphics.rb +2 -2
  45. data/lib/origami/graphics/colors.rb +89 -63
  46. data/lib/origami/graphics/path.rb +14 -14
  47. data/lib/origami/graphics/patterns.rb +31 -33
  48. data/lib/origami/graphics/render.rb +0 -1
  49. data/lib/origami/graphics/state.rb +9 -9
  50. data/lib/origami/graphics/text.rb +17 -17
  51. data/lib/origami/graphics/xobject.rb +102 -92
  52. data/lib/origami/javascript.rb +91 -68
  53. data/lib/origami/linearization.rb +22 -20
  54. data/lib/origami/metadata.rb +1 -1
  55. data/lib/origami/name.rb +1 -3
  56. data/lib/origami/null.rb +1 -3
  57. data/lib/origami/numeric.rb +3 -13
  58. data/lib/origami/object.rb +100 -72
  59. data/lib/origami/page.rb +24 -28
  60. data/lib/origami/parser.rb +34 -51
  61. data/lib/origami/parsers/fdf.rb +2 -2
  62. data/lib/origami/parsers/pdf.rb +41 -18
  63. data/lib/origami/parsers/pdf/lazy.rb +83 -46
  64. data/lib/origami/parsers/pdf/linear.rb +19 -10
  65. data/lib/origami/parsers/ppklite.rb +1 -1
  66. data/lib/origami/pdf.rb +150 -206
  67. data/lib/origami/reference.rb +4 -6
  68. data/lib/origami/signature.rb +76 -48
  69. data/lib/origami/stream.rb +69 -63
  70. data/lib/origami/string.rb +2 -19
  71. data/lib/origami/trailer.rb +25 -22
  72. data/lib/origami/version.rb +1 -1
  73. data/lib/origami/xfa.rb +6 -4
  74. data/lib/origami/xreftable.rb +29 -29
  75. data/test/test_annotations.rb +16 -38
  76. data/test/test_pdf_attachment.rb +1 -1
  77. data/test/test_pdf_parse.rb +1 -1
  78. data/test/test_xrefs.rb +2 -2
  79. metadata +4 -4
  80. data/lib/origami/export.rb +0 -247
@@ -54,16 +54,7 @@ module Origami
54
54
  trailer = parse_trailer
55
55
  pdf.revisions.last.trailer = trailer
56
56
 
57
- if trailer.startxref != 0
58
- xrefstm = pdf.get_object_by_offset(trailer.startxref)
59
- elsif trailer[:XRefStm].is_a?(Integer)
60
- xrefstm = pdf.get_object_by_offset(trailer[:XRefStm])
61
- end
62
-
63
- if xrefstm.is_a?(XRefStream)
64
- warn "Found a XRefStream for this revision at #{xrefstm.reference}"
65
- pdf.revisions.last.xrefstm = xrefstm
66
- end
57
+ locate_xref_stream(pdf, pdf.revisions.last)
67
58
 
68
59
  rescue
69
60
  error "Cannot read : " + (@data.peek(10) + "...").inspect
@@ -77,6 +68,24 @@ module Origami
77
68
 
78
69
  parse_finalize(pdf)
79
70
  end
71
+
72
+ private
73
+
74
+ def locate_xref_stream(pdf, revision)
75
+ trailer = revision.trailer
76
+
77
+ # Try to match the location of the last startxref / XRefStm with an XRefStream.
78
+ if trailer.startxref != 0
79
+ xrefstm = pdf.get_object_by_offset(trailer.startxref)
80
+ else
81
+ xrefstm = pdf.get_object_by_offset(trailer[:XRefStm])
82
+ end
83
+
84
+ if xrefstm.is_a?(XRefStream)
85
+ warn "Found a XRefStream for this revision at #{xrefstm.reference}"
86
+ revision.xrefstm = xrefstm
87
+ end
88
+ end
80
89
  end
81
90
  end
82
91
 
@@ -44,7 +44,7 @@ module Origami
44
44
  trailer = address_book.revisions.first.trailer
45
45
 
46
46
  if trailer[:Root].is_a?(Reference)
47
- address_book.cast_object(trailer[:Root], PPKLite::Catalog, self)
47
+ address_book.cast_object(trailer[:Root], PPKLite::Catalog)
48
48
  end
49
49
 
50
50
  propagate_types(address_book)
@@ -45,8 +45,6 @@ require 'origami/actions'
45
45
  require 'origami/3d'
46
46
  require 'origami/signature'
47
47
  require 'origami/webcapture'
48
- require 'origami/export'
49
- require 'origami/webcapture'
50
48
  require 'origami/encryption'
51
49
  require 'origami/linearization'
52
50
  require 'origami/obfuscation'
@@ -147,15 +145,6 @@ module Origami
147
145
  pdf.save(output, options)
148
146
  end
149
147
  alias write create
150
-
151
- #
152
- # Deserializes a PDF dump.
153
- #
154
- def deserialize(filename)
155
- Zlib::GzipReader.open(filename) { |gz|
156
- return Marshal.load(gz.read)
157
- }
158
- end
159
148
  end
160
149
 
161
150
  #
@@ -166,16 +155,13 @@ module Origami
166
155
  def initialize(parser = nil)
167
156
  @header = PDF::Header.new
168
157
  @revisions = []
158
+ @parser = parser
159
+ @loaded = false
169
160
 
170
161
  add_new_revision
171
162
  @revisions.first.trailer = Trailer.new
172
163
 
173
- if parser
174
- @loaded = false
175
- @parser = parser
176
- else
177
- init
178
- end
164
+ init if parser.nil?
179
165
  end
180
166
 
181
167
  #
@@ -199,21 +185,6 @@ module Origami
199
185
  @parser.target_data if @parser
200
186
  end
201
187
 
202
- #
203
- # Serializes the current PDF.
204
- #
205
- def serialize(filename)
206
- parser = @parser
207
- @parser = nil # do not serialize the parser
208
-
209
- Zlib::GzipWriter.open(filename) { |gz|
210
- gz.write Marshal.dump(self)
211
- }
212
-
213
- @parser = parser
214
- self
215
- end
216
-
217
188
  #
218
189
  # Saves the current document.
219
190
  # _filename_:: The path where to save this PDF.
@@ -277,35 +248,9 @@ module Origami
277
248
 
278
249
  result = []
279
250
 
280
- search_object = -> (object) do
281
- case object
282
- when Stream
283
- result.concat object.dictionary.strings_cache.select{|str| pattern === str}
284
- result.concat object.dictionary.names_cache.select{|name| pattern === name.value}
285
-
286
- begin
287
- result.push object if streams and object.data.match(pattern)
288
- rescue Filter::Error
289
- next # Skip object if a decoding error occured.
290
- end
291
-
292
- next if object.is_a?(ObjectStream) and not object_streams
293
-
294
- object.each do |subobject|
295
- search_object.call(subobject)
296
- end
297
-
298
- when Name, String
299
- result.push object if object.value.match(pattern)
300
-
301
- when Dictionary, Array then
302
- result.concat object.strings_cache.select{|str| pattern === str}
303
- result.concat object.names_cache.select{|name| pattern === name.value}
304
- end
305
- end
306
-
307
251
  self.indirect_objects.each do |object|
308
- search_object.call(object)
252
+ result.concat search_object(object, pattern,
253
+ streams: streams, object_streams: object_streams)
309
254
  end
310
255
 
311
256
  result
@@ -329,42 +274,22 @@ module Origami
329
274
  # _compressed_: iterates over the objects inside object streams.
330
275
  # _recursive_: iterates recursively inside objects like arrays and dictionaries.
331
276
  #
332
- def each_object(compressed: false, recursive: false)
277
+ def each_object(compressed: false, recursive: false, &block)
333
278
  return enum_for(__method__, compressed: compressed,
334
279
  recursive: recursive
335
280
  ) unless block_given?
336
281
 
337
- walk_object = -> (object) do
338
- case object
339
- when Dictionary
340
- object.each_value do |value|
341
- yield(value)
342
- walk_object.call(value)
343
- end
344
-
345
- when Array
346
- object.each do |child|
347
- yield(child)
348
- walk_object.call(child)
349
- end
350
-
351
- when Stream
352
- yield(object.dictionary)
353
- walk_object.call(object.dictionary)
354
- end
355
- end
356
-
357
282
  @revisions.each do |revision|
358
283
  revision.each_object do |object|
359
- yield(object)
284
+ block.call(object)
360
285
 
361
- walk_object.call(object) if recursive
286
+ walk_object(object, &block) if recursive
362
287
 
363
288
  if object.is_a?(ObjectStream) and compressed
364
289
  object.each do |child_obj|
365
- yield(child_obj)
290
+ block.call(child_obj)
366
291
 
367
- walk_object.call(child_obj) if recursive
292
+ walk_object(child_obj) if recursive
368
293
  end
369
294
  end
370
295
  end
@@ -539,7 +464,7 @@ module Origami
539
464
  #
540
465
  # We found a matching XRef.
541
466
  #
542
- if xref.is_a?(XRefToCompressedObj)
467
+ if xref.is_a?(XRefToCompressedObject)
543
468
  objstm = get_object(xref.objstmno, 0, use_xrefstm: use_xrefstm)
544
469
 
545
470
  object = objstm.extract_by_index(xref.index)
@@ -570,35 +495,35 @@ module Origami
570
495
  # Casts a PDF object into another object type.
571
496
  # The target type must be a subtype of the original type.
572
497
  #
573
- def cast_object(reference, type, parser = nil) #:nodoc:
498
+ def cast_object(reference, type) #:nodoc:
574
499
  @revisions.each do |rev|
575
- if rev.body.include?(reference) and type < rev.body[reference].class
576
- rev.body[reference] = rev.body[reference].cast_to(type, parser)
500
+ if rev.body.include?(reference)
501
+ object = rev.body[reference]
502
+ return object if object.is_a?(type)
577
503
 
578
- rev.body[reference]
579
- else
580
- nil
504
+ if type < rev.body[reference].class
505
+ rev.body[reference] = object.cast_to(type, @parser)
506
+
507
+ return rev.body[reference]
508
+ end
581
509
  end
582
510
  end
511
+
512
+ nil
583
513
  end
584
514
 
585
515
  #
586
516
  # Returns a new number/generation for future object.
587
517
  #
588
518
  def allocate_new_object_number
589
- no = 1
590
519
 
591
- # Deprecated number allocation policy (first available)
592
- #no = no + 1 while get_object(no)
593
-
594
- objset = self.indirect_objects
595
- self.indirect_objects.find_all{|obj| obj.is_a?(ObjectStream)}.each do |objstm|
596
- objstm.each{|obj| objset << obj}
520
+ last_object = self.each_object(compressed: true).max_by {|object| object.no }
521
+ if last_object.nil?
522
+ no = 1
523
+ else
524
+ no = last_object.no + 1
597
525
  end
598
526
 
599
- allocated = objset.collect{|obj| obj.no}.compact
600
- no = allocated.max + 1 unless allocated.empty?
601
-
602
527
  [ no, 0 ]
603
528
  end
604
529
 
@@ -614,6 +539,70 @@ module Origami
614
539
  private
615
540
  ##########################
616
541
 
542
+ #
543
+ # Iterates over the children of an object, avoiding cycles.
544
+ #
545
+ def walk_object(object, excludes: [])
546
+ return enum_for(__method__, object, excludes: excludes) unless block_given?
547
+
548
+ return if excludes.include?(object)
549
+ excludes.push(object)
550
+
551
+ case object
552
+ when Dictionary
553
+ object.each_value do |value|
554
+ yield(value)
555
+ walk_object(value, excludes: excludes)
556
+ end
557
+
558
+ when Array
559
+ object.each do |child|
560
+ yield(child)
561
+ walk_object(child, excludes: excludes)
562
+ end
563
+
564
+ when Stream
565
+ yield(object.dictionary)
566
+ walk_object(object.dictionary, excludes: excludes)
567
+ end
568
+ end
569
+
570
+ #
571
+ # Searches through an object, possibly going into object streams.
572
+ # Returns an array of matching strings, names and streams.
573
+ #
574
+ def search_object(object, pattern, streams: true, object_streams: true)
575
+ result = []
576
+
577
+ case object
578
+ when Stream
579
+ result.concat object.dictionary.strings_cache.select{|str| pattern === str}
580
+ result.concat object.dictionary.names_cache.select{|name| pattern === name.value}
581
+
582
+ begin
583
+ result.push object if streams and object.data.match(pattern)
584
+ rescue Filter::Error
585
+ return result # Skip object if a decoding error occured.
586
+ end
587
+
588
+ return result unless object.is_a?(ObjectStream) and object_streams
589
+
590
+ object.each do |child|
591
+ result.concat search_object(child, pattern,
592
+ streams: streams, object_streams: object_streams)
593
+ end
594
+
595
+ when Name, String
596
+ result.push object if object.value.match(pattern)
597
+
598
+ when Dictionary, Array
599
+ result.concat object.strings_cache.select{|str| pattern === str}
600
+ result.concat object.names_cache.select{|name| pattern === name.value}
601
+ end
602
+
603
+ result
604
+ end
605
+
617
606
  #
618
607
  # Load an object from its given file offset.
619
608
  # The document must have an associated Parser.
@@ -627,19 +616,7 @@ module Origami
627
616
  return nil if object.nil?
628
617
 
629
618
  if self.is_a?(Encryption::EncryptedDocument)
630
- case object
631
- when String
632
- object.extend(Encryption::EncryptedString)
633
- object.decrypted = false
634
- when Stream
635
- object.extend(Encryption::EncryptedStream)
636
- object.decrypted = false
637
- when Dictionary, Array
638
- object.strings_cache.each do |string|
639
- string.extend(Encryption::EncryptedString)
640
- string.decrypted = false
641
- end
642
- end
619
+ make_encrypted_object(object)
643
620
  end
644
621
 
645
622
  add_to_revision(object, revision)
@@ -650,6 +627,22 @@ module Origami
650
627
  object
651
628
  end
652
629
 
630
+ #
631
+ # Method called on encrypted objects loaded into the document.
632
+ #
633
+ def make_encrypted_object(object)
634
+ case object
635
+ when String
636
+ object.extend(Encryption::EncryptedString)
637
+ when Stream
638
+ object.extend(Encryption::EncryptedStream)
639
+ when Dictionary, Array
640
+ object.strings_cache.each do |string|
641
+ string.extend(Encryption::EncryptedString)
642
+ end
643
+ end
644
+ end
645
+
653
646
  #
654
647
  # Force the loading of all objects in the document.
655
648
  #
@@ -665,8 +658,8 @@ module Origami
665
658
  next
666
659
  end
667
660
 
668
- xrefs.each_with_number do |_, no|
669
- self.get_object(no)
661
+ xrefs.each_with_number do |xref, no|
662
+ self.get_object(no) unless xref.free?
670
663
  end
671
664
  end
672
665
 
@@ -716,11 +709,7 @@ module Origami
716
709
  # Allocates object numbers and creates references.
717
710
  # Invokes object finalization methods.
718
711
  #
719
- if self.is_a?(Encryption::EncryptedDocument)
720
- physicalize(options)
721
- else
722
- physicalize
723
- end
712
+ physicalize(options)
724
713
 
725
714
  #
726
715
  # Sets the PDF version header.
@@ -735,100 +724,58 @@ module Origami
735
724
  end
736
725
 
737
726
  #
738
- # Cleans the document from its references.
739
- # Indirects objects are made direct whenever possible.
740
- # TODO: Circuit-checking to avoid infinite induction
727
+ # Converts a logical PDF view into a physical view ready for writing.
741
728
  #
742
- def logicalize #:nodoc:
743
- raise NotImplementedError
729
+ def physicalize(options = {})
744
730
 
745
- processed = []
746
-
747
- convert = -> (root) do
748
- replaced = []
749
- if root.is_a?(Dictionary) or root.is_a?(Array)
750
- root.each do |obj|
751
- convert[obj]
752
- end
753
-
754
- root.map! do |obj|
755
- if obj.is_a?(Reference)
756
- target = obj.solve
757
- # Streams can't be direct objects
758
- if target.is_a?(Stream)
759
- obj
760
- else
761
- replaced << obj
762
- target
763
- end
764
- else
765
- obj
766
- end
767
- end
768
- end
769
-
770
- replaced
731
+ indirect_objects_by_rev.each do |obj, revision|
732
+ build_object(obj, revision, options)
771
733
  end
772
734
 
773
- @revisions.each do |revision|
774
- revision.objects.each do |obj|
775
- processed.concat(convert[obj])
776
- end
777
- end
735
+ self
778
736
  end
779
737
 
780
- #
781
- # Converts a logical PDF view into a physical view ready for writing.
782
- #
783
- def physicalize
784
-
785
- #
786
- # Indirect objects are added to the revision and assigned numbers.
787
- #
788
- build = -> (obj, revision) do
789
- #
790
- # Finalize any subobjects before building the stream.
791
- #
792
- if obj.is_a?(ObjectStream)
793
- obj.each do |subobj|
794
- build.call(subobj, revision)
795
- end
738
+ def build_object(object, revision, options)
739
+ # Build any compressed object before building the object stream.
740
+ if object.is_a?(ObjectStream)
741
+ object.each do |compressed_obj|
742
+ build_object(compressed_obj, revision, options)
796
743
  end
744
+ end
797
745
 
798
- obj.pre_build
746
+ object.pre_build
799
747
 
800
- if obj.is_a?(Dictionary) or obj.is_a?(Array)
748
+ case object
749
+ when Stream
750
+ build_object(object.dictionary, revision, options)
751
+ when Dictionary, Array
752
+ build_compound_object(object, revision, options)
753
+ end
801
754
 
802
- obj.map! do |subobj|
803
- if subobj.indirect?
804
- if get_object(subobj.reference)
805
- subobj.reference
806
- else
807
- ref = add_to_revision(subobj, revision)
808
- build.call(subobj, revision)
809
- ref
810
- end
811
- else
812
- subobj
813
- end
814
- end
755
+ object.post_build
756
+ end
815
757
 
816
- obj.each do |subobj|
817
- build.call(subobj, revision)
818
- end
758
+ def build_compound_object(object, revision, options)
759
+ return unless object.is_a?(Dictionary) or object.is_a?(Array)
819
760
 
820
- elsif obj.is_a?(Stream)
821
- build.call(obj.dictionary, revision)
822
- end
761
+ # Flatten the object by adding indirect objects to the revision and
762
+ # replacing them with their reference.
763
+ object.map! do |child|
764
+ next(child) unless child.indirect?
823
765
 
824
- obj.post_build
766
+ if get_object(child.reference)
767
+ child.reference
768
+ else
769
+ reference = add_to_revision(child, revision)
770
+ build_object(child, revision, options)
771
+ reference
772
+ end
825
773
  end
826
774
 
827
- indirect_objects_by_rev.each do |obj, revision|
828
- build.call(obj, revision)
829
- end
830
-
831
- self
775
+ # Finalize all the children objects.
776
+ object.each do |child|
777
+ build_object(child, revision, options)
778
+ end
832
779
  end
833
780
 
834
781
  #
@@ -858,9 +805,7 @@ module Origami
858
805
  end
859
806
 
860
807
  # Get trailer dictionary
861
- trailer_info = get_trailer_info
862
- raise InvalidPDFError, "No trailer information found" if trailer_info.nil?
863
- trailer_dict = trailer_info.dictionary
808
+ trailer_dict = self.trailer.dictionary
864
809
 
865
810
  prev_xref_offset = nil
866
811
  xrefstm_offset = nil
@@ -939,7 +884,7 @@ module Origami
939
884
  if options[:use_xrefstm] and obj.parent != obj and obj.parent.is_a?(ObjectStream)
940
885
  index = obj.parent.index(obj.no)
941
886
 
942
- xrefs_stm << XRefToCompressedObj.new(obj.parent.no, index)
887
+ xrefs_stm << XRefToCompressedObject.new(obj.parent.no, index)
943
888
 
944
889
  lastno_stm = obj.no
945
890
  else
@@ -1022,7 +967,6 @@ module Origami
1022
967
  #
1023
968
  def init
1024
969
  catalog = (self.Catalog = (trailer_key(:Root) || Catalog.new))
1025
- catalog.Pages = PageTreeNode.new.set_indirect(true)
1026
970
  @revisions.last.trailer.Root = catalog.reference
1027
971
 
1028
972
  @loaded = true