origami 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +1 -0
  3. data/bin/gui/config.rb +2 -1
  4. data/bin/gui/file.rb +118 -240
  5. data/bin/gui/gtkhex.rb +5 -5
  6. data/bin/gui/hexview.rb +20 -16
  7. data/bin/gui/imgview.rb +1 -1
  8. data/bin/gui/menu.rb +138 -158
  9. data/bin/gui/properties.rb +46 -48
  10. data/bin/gui/signing.rb +183 -214
  11. data/bin/gui/textview.rb +1 -1
  12. data/bin/gui/treeview.rb +13 -7
  13. data/bin/gui/walker.rb +102 -71
  14. data/bin/gui/xrefs.rb +1 -1
  15. data/bin/pdf2ruby +3 -3
  16. data/bin/pdfcop +18 -11
  17. data/bin/pdfextract +14 -5
  18. data/bin/pdfmetadata +3 -3
  19. data/bin/shell/console.rb +8 -8
  20. data/bin/shell/hexdump.rb +4 -4
  21. data/examples/attachments/nested_document.rb +1 -1
  22. data/examples/javascript/hello_world.rb +3 -3
  23. data/lib/origami.rb +0 -1
  24. data/lib/origami/acroform.rb +3 -3
  25. data/lib/origami/array.rb +1 -3
  26. data/lib/origami/boolean.rb +1 -3
  27. data/lib/origami/catalog.rb +3 -9
  28. data/lib/origami/destinations.rb +2 -2
  29. data/lib/origami/dictionary.rb +15 -29
  30. data/lib/origami/encryption.rb +334 -692
  31. data/lib/origami/extensions/fdf.rb +3 -2
  32. data/lib/origami/extensions/ppklite.rb +5 -9
  33. data/lib/origami/filespec.rb +2 -2
  34. data/lib/origami/filters.rb +54 -36
  35. data/lib/origami/filters/ascii.rb +67 -49
  36. data/lib/origami/filters/ccitt.rb +4 -236
  37. data/lib/origami/filters/ccitt/tables.rb +267 -0
  38. data/lib/origami/filters/crypt.rb +1 -1
  39. data/lib/origami/filters/dct.rb +0 -1
  40. data/lib/origami/filters/flate.rb +3 -43
  41. data/lib/origami/filters/lzw.rb +62 -99
  42. data/lib/origami/filters/predictors.rb +135 -105
  43. data/lib/origami/filters/runlength.rb +34 -22
  44. data/lib/origami/graphics.rb +2 -2
  45. data/lib/origami/graphics/colors.rb +89 -63
  46. data/lib/origami/graphics/path.rb +14 -14
  47. data/lib/origami/graphics/patterns.rb +31 -33
  48. data/lib/origami/graphics/render.rb +0 -1
  49. data/lib/origami/graphics/state.rb +9 -9
  50. data/lib/origami/graphics/text.rb +17 -17
  51. data/lib/origami/graphics/xobject.rb +102 -92
  52. data/lib/origami/javascript.rb +91 -68
  53. data/lib/origami/linearization.rb +22 -20
  54. data/lib/origami/metadata.rb +1 -1
  55. data/lib/origami/name.rb +1 -3
  56. data/lib/origami/null.rb +1 -3
  57. data/lib/origami/numeric.rb +3 -13
  58. data/lib/origami/object.rb +100 -72
  59. data/lib/origami/page.rb +24 -28
  60. data/lib/origami/parser.rb +34 -51
  61. data/lib/origami/parsers/fdf.rb +2 -2
  62. data/lib/origami/parsers/pdf.rb +41 -18
  63. data/lib/origami/parsers/pdf/lazy.rb +83 -46
  64. data/lib/origami/parsers/pdf/linear.rb +19 -10
  65. data/lib/origami/parsers/ppklite.rb +1 -1
  66. data/lib/origami/pdf.rb +150 -206
  67. data/lib/origami/reference.rb +4 -6
  68. data/lib/origami/signature.rb +76 -48
  69. data/lib/origami/stream.rb +69 -63
  70. data/lib/origami/string.rb +2 -19
  71. data/lib/origami/trailer.rb +25 -22
  72. data/lib/origami/version.rb +1 -1
  73. data/lib/origami/xfa.rb +6 -4
  74. data/lib/origami/xreftable.rb +29 -29
  75. data/test/test_annotations.rb +16 -38
  76. data/test/test_pdf_attachment.rb +1 -1
  77. data/test/test_pdf_parse.rb +1 -1
  78. data/test/test_xrefs.rb +2 -2
  79. metadata +4 -4
  80. data/lib/origami/export.rb +0 -247
@@ -29,9 +29,7 @@ module Origami
29
29
  # Pass the Page object if a block is present.
30
30
  #
31
31
  def append_page(page = Page.new)
32
- unless self.Catalog and self.Catalog.Pages and self.Catalog.Pages.is_a?(PageTreeNode)
33
- raise InvalidPDFError, "Invalid page tree"
34
- end
32
+ init_page_tree
35
33
 
36
34
  treeroot = self.Catalog.Pages
37
35
 
@@ -55,9 +53,7 @@ module Origami
55
53
  # Pass the Page object if a block is present.
56
54
  #
57
55
  def insert_page(index, page = Page.new)
58
- unless self.Catalog and self.Catalog.Pages and self.Catalog.Pages.is_a?(PageTreeNode)
59
- raise InvalidPageTreeError, "Invalid page tree"
60
- end
56
+ init_page_tree
61
57
 
62
58
  # Page from another document must be exported.
63
59
  page = page.export if page.document and page.document != self
@@ -73,9 +69,7 @@ module Origami
73
69
  # Returns an Enumerator of Page
74
70
  #
75
71
  def pages
76
- unless self.Catalog and self.Catalog.Pages and self.Catalog.Pages.is_a?(PageTreeNode)
77
- raise InvalidPageTreeError, "Invalid page tree"
78
- end
72
+ init_page_tree
79
73
 
80
74
  self.Catalog.Pages.pages
81
75
  end
@@ -84,9 +78,7 @@ module Origami
84
78
  # Iterate through each page, returns self.
85
79
  #
86
80
  def each_page(&b)
87
- unless self.Catalog and self.Catalog.Pages and self.Catalog.Pages.is_a?(PageTreeNode)
88
- raise InvalidPageTreeError, "Invalid page tree"
89
- end
81
+ init_page_tree
90
82
 
91
83
  self.Catalog.Pages.each_page(&b)
92
84
  end
@@ -95,9 +87,7 @@ module Origami
95
87
  # Get the n-th Page object.
96
88
  #
97
89
  def get_page(n)
98
- unless self.Catalog and self.Catalog.Pages and self.Catalog.Pages.is_a?(PageTreeNode)
99
- raise InvalidPageTreeError, "Invalid page tree"
100
- end
90
+ init_page_tree
101
91
 
102
92
  self.Catalog.Pages.get_page(n)
103
93
  end
@@ -115,6 +105,19 @@ module Origami
115
105
  def each_named_page(&b)
116
106
  each_name(Names::PAGES, &b)
117
107
  end
108
+
109
+ private
110
+
111
+ def init_page_tree #:nodoc:
112
+ unless self.Catalog.key?(:Pages)
113
+ self.Catalog.Pages = PageTreeNode.new
114
+ return
115
+ end
116
+
117
+ unless self.Catalog.Pages.is_a?(PageTreeNode)
118
+ raise InvalidPageTreeError, "Root page node is not a PageTreeNode"
119
+ end
120
+ end
118
121
  end
119
122
 
120
123
  module ResourcesHolder
@@ -151,11 +154,12 @@ module Origami
151
154
  # If _name_ is not specified, a new name will be automatically generated.
152
155
  #
153
156
  def add_resource(type, rsrc, name = nil)
154
- if name.nil? and existing = self.resources(type).key(rsrc)
155
- return existing
157
+ if name.nil?
158
+ rsrc_name = self.resources(type).key(rsrc)
159
+ return rsrc_name if rsrc_name
156
160
  end
157
161
 
158
- name = new_id(type) unless name
162
+ name ||= new_id(type)
159
163
  target = self.is_a?(Resources) ? self : (self.Resources ||= Resources.new)
160
164
 
161
165
  rsrc_dict = (target[type] and target[type].solve) || (target[type] = Dictionary.new)
@@ -234,14 +238,6 @@ module Origami
234
238
 
235
239
  Name.new(prefix + n)
236
240
  end
237
-
238
- def new_extgstate_id; new_id(Resources::EXTGSTATE) end
239
- def new_colorspace_id; new_id(Resources::COLORSPACE) end
240
- def new_pattern_id; new_id(Resources::PATTERN) end
241
- def new_shading_id; new_id(Resources::SHADING) end
242
- def new_xobject_id; new_id(Resources::XOBJECT) end
243
- def new_font_id; new_name(Resources::FONT) end
244
- def new_properties_id; new_name(Resources::PROPERTIES) end
245
241
  end
246
242
 
247
243
  #
@@ -293,7 +289,7 @@ module Origami
293
289
  self.Count = 0
294
290
  self.Kids = []
295
291
 
296
- super(hash, parser)
292
+ super
297
293
 
298
294
  set_indirect(true)
299
295
  end
@@ -421,7 +417,7 @@ module Origami
421
417
  raise IndexError, "Page not found"
422
418
  end
423
419
 
424
- def << (pageset)
420
+ def <<(pageset)
425
421
  pageset = [pageset] unless pageset.is_a?(::Array)
426
422
  unless pageset.all? {|item| item.is_a?(Page) or item.is_a?(PageTreeNode) }
427
423
  raise TypeError, "Cannot add anything but Page and PageTreeNode to this node"
@@ -67,8 +67,6 @@ module Origami
67
67
  @options.update(options)
68
68
  @logger = @options[:logger]
69
69
  @data = nil
70
-
71
- ::String.disable_colorization(false) if @options[:colorize_log]
72
70
  end
73
71
 
74
72
  def pos
@@ -107,21 +105,8 @@ module Origami
107
105
  obj = Object.parse(@data, self)
108
106
  return if obj.nil?
109
107
 
110
- if Origami::OPTIONS[:enable_type_propagation] and @deferred_casts.key?(obj.reference)
111
- types = @deferred_casts[obj.reference]
112
- types = [ types ] unless types.is_a?(::Array)
113
-
114
- # Promote object if a compatible type is found.
115
- if cast_type = types.find{|type| type < obj.class}
116
- obj = obj.cast_to(cast_type, self)
117
- end
118
- end
119
-
120
- trace "Read #{obj.type} object#{
121
- if obj.class != obj.native_type
122
- " (" + obj.native_type.to_s.split('::').last + ")"
123
- end
124
- }, #{obj.reference}"
108
+ obj = try_object_promotion(obj)
109
+ trace "Read #{obj.type} object, #{obj.reference}"
125
110
 
126
111
  @options[:callback].call(obj)
127
112
  obj
@@ -205,54 +190,52 @@ module Origami
205
190
 
206
191
  private
207
192
 
208
- def error(str = "") #:nodoc:
209
- if @options[:colorize_log]
210
- @logger.puts "[error] #{str}".red
211
- else
212
- @logger.puts "[error] #{str}"
213
- end
214
- end
193
+ #
194
+ # Attempt to promote an object using the deferred casts.
195
+ #
196
+ def try_object_promotion(obj)
197
+ return obj unless Origami::OPTIONS[:enable_type_propagation] and @deferred_casts.key?(obj.reference)
215
198
 
216
- def warn(str = "") #:nodoc:
217
- return unless @options[:verbosity] >= VERBOSE_INFO
199
+ types = @deferred_casts[obj.reference]
200
+ types = [ types ] unless types.is_a?(::Array)
218
201
 
219
- if @options[:colorize_log]
220
- @logger.puts "[info ] Warning: #{str}".yellow
202
+ # Promote object if a compatible type is found.
203
+ cast_type = types.find {|type| type < obj.class }
204
+ if cast_type
205
+ obj = obj.cast_to(cast_type, self)
221
206
  else
222
- @logger.puts "[info ] Warning: #{str}"
207
+ obj
223
208
  end
224
209
  end
225
210
 
226
- def info(str = "") #:nodoc:
227
- return unless @options[:verbosity] >= VERBOSE_INFO
211
+ def error(msg = "") #:nodoc:
212
+ log(VERBOSE_QUIET, 'error', :red, msg.red)
213
+ end
228
214
 
229
- if @options[:colorize_log]
230
- @logger.print "[info ] ".green
231
- @logger.puts str
232
- else
233
- @logger.puts "[info ] #{str}"
234
- end
215
+ def warn(msg = "") #:nodoc:
216
+ log(VERBOSE_INFO, 'warn ', :yellow, msg.yellow)
235
217
  end
236
218
 
237
- def debug(str = "") #:nodoc:
238
- return unless @options[:verbosity] >= VERBOSE_DEBUG
219
+ def info(msg = "") #:nodoc:
220
+ log(VERBOSE_INFO, 'info ', :green, msg)
221
+ end
239
222
 
240
- if @options[:colorize_log]
241
- @logger.print "[debug] ".magenta
242
- @logger.puts str
243
- else
244
- @logger.puts "[debug] #{str}"
245
- end
223
+ def debug(msg = "") #:nodoc:
224
+ log(VERBOSE_DEBUG, 'debug', :magenta, msg)
225
+ end
226
+
227
+ def trace(msg = "") #:nodoc:
228
+ log(VERBOSE_TRACE, 'trace', :cyan, msg)
246
229
  end
247
230
 
248
- def trace(str = "") #:nodoc:
249
- return unless @options[:verbosity] >= VERBOSE_TRACE
231
+ def log(level, prefix, color, message) #:nodoc:
232
+ return unless @options[:verbosity] >= level
250
233
 
251
234
  if @options[:colorize_log]
252
- @logger.print "[trace] ".cyan
253
- @logger.puts str
235
+ @logger.print "[#{prefix}] ".colorize(color)
236
+ @logger.puts message
254
237
  else
255
- @logger.puts "[trace] #{str}"
238
+ @logger.puts "[#{prefix}] #{message}"
256
239
  end
257
240
  end
258
241
 
@@ -266,7 +249,7 @@ module Origami
266
249
  current_state.each_pair do |ref, type|
267
250
  type = [ type ] unless type.is_a?(::Array)
268
251
  type.each do |hint|
269
- break if document.cast_object(ref, hint, self)
252
+ break if document.cast_object(ref, hint)
270
253
  end
271
254
  end
272
255
  end
@@ -27,7 +27,7 @@ module Origami
27
27
  def parse(stream) #:nodoc:
28
28
  super(stream)
29
29
 
30
- fdf = FDF.new
30
+ fdf = FDF.new(self)
31
31
  fdf.header = FDF::Header.parse(@data)
32
32
  @options[:callback].call(fdf.header)
33
33
 
@@ -43,7 +43,7 @@ module Origami
43
43
  trailer = fdf.revisions.first.trailer
44
44
 
45
45
  if trailer[:Root].is_a?(Reference)
46
- fdf.cast_object(trailer[:Root], FDF::Catalog, self)
46
+ fdf.cast_object(trailer[:Root], FDF::Catalog)
47
47
  end
48
48
 
49
49
  propagate_types(fdf)
@@ -27,13 +27,14 @@ module Origami
27
27
  def initialize(params = {})
28
28
  options =
29
29
  {
30
- password: '', # Default password being tried when opening a protected document.
31
- prompt_password: lambda do # Callback procedure to prompt password when document is encrypted.
32
- require 'io/console'
33
- STDERR.print "Password: "
34
- STDIN.noecho(&:gets).chomp
35
- end,
36
- force: false # Force PDF header detection
30
+ decrypt: true, # Attempt to decrypt to document if encrypted (recommended).
31
+ password: '', # Default password being tried when opening a protected document.
32
+ prompt_password: lambda do # Callback procedure to prompt password when document is encrypted.
33
+ require 'io/console'
34
+ STDERR.print "Password: "
35
+ STDIN.noecho(&:gets).chomp
36
+ end,
37
+ force: false # Force PDF header detection
37
38
  }.update(params)
38
39
 
39
40
  super(options)
@@ -62,6 +63,8 @@ module Origami
62
63
  end
63
64
 
64
65
  def parse_finalize(pdf) #:nodoc:
66
+ cast_trailer_objects(pdf)
67
+
65
68
  warn "This file has been linearized." if pdf.linearized?
66
69
 
67
70
  propagate_types(pdf) if Origami::OPTIONS[:enable_type_propagation]
@@ -72,23 +75,43 @@ module Origami
72
75
  if pdf.encrypted?
73
76
  warn "This document contains encrypted data!"
74
77
 
75
- passwd = @options[:password]
76
- begin
77
- pdf.decrypt(passwd)
78
- rescue EncryptionInvalidPasswordError
79
- if passwd.empty?
80
- passwd = @options[:prompt_password].call
81
- retry unless passwd.empty?
82
- end
83
-
84
- raise
85
- end
78
+ decrypt_document(pdf) if @options[:decrypt]
86
79
  end
87
80
 
88
81
  warn "This document has been signed!" if pdf.signed?
89
82
 
90
83
  pdf
91
84
  end
85
+
86
+ def cast_trailer_objects(pdf) #:nodoc:
87
+ trailer = pdf.trailer
88
+
89
+ if trailer[:Root].is_a?(Reference)
90
+ pdf.cast_object(trailer[:Root], Catalog)
91
+ end
92
+
93
+ if trailer[:Info].is_a?(Reference)
94
+ pdf.cast_object(trailer[:Info], Metadata)
95
+ end
96
+
97
+ if trailer[:Encrypt].is_a?(Reference)
98
+ pdf.cast_object(trailer[:Encrypt], Encryption::Standard::Dictionary)
99
+ end
100
+ end
101
+
102
+ def decrypt_document(pdf) #:nodoc:
103
+ passwd = @options[:password]
104
+ begin
105
+ pdf.decrypt(passwd)
106
+ rescue EncryptionInvalidPasswordError
107
+ if passwd.empty?
108
+ passwd = @options[:prompt_password].call
109
+ retry unless passwd.empty?
110
+ end
111
+
112
+ raise
113
+ end
114
+ end
92
115
  end
93
116
  end
94
117
 
@@ -35,20 +35,9 @@ module Origami
35
35
  pdf = parse_initialize
36
36
  revisions = []
37
37
 
38
- # Set the scanner position at the end.
39
- @data.terminate
40
-
41
- # Locate the startxref token.
42
- until @data.match?(/#{Trailer::XREF_TOKEN}/)
43
- raise ParsingError, "No xref token found" if @data.pos == 0
44
- @data.pos -= 1
45
- end
46
-
47
- # Extract the offset of the last xref section.
48
- trailer = Trailer.parse(@data, self)
49
- raise ParsingError, "Cannot locate xref section" if trailer.startxref.zero?
38
+ # Locate the last xref offset at the end of the file.
39
+ xref_offset = locate_last_xref_offset
50
40
 
51
- xref_offset = trailer.startxref
52
41
  while xref_offset and xref_offset != 0
53
42
 
54
43
  # Create a new revision based on the xref section offset.
@@ -78,6 +67,29 @@ module Origami
78
67
 
79
68
  private
80
69
 
70
+ #
71
+ # The document is scanned starting from the end, by locating the last startxref token.
72
+ #
73
+ def locate_last_xref_offset
74
+ # Set the scanner position at the end.
75
+ @data.terminate
76
+
77
+ # Locate the startxref token.
78
+ until @data.match?(/#{Trailer::XREF_TOKEN}/)
79
+ raise ParsingError, "No xref token found" if @data.pos == 0
80
+ @data.pos -= 1
81
+ end
82
+
83
+ # Extract the offset of the last xref section.
84
+ trailer = Trailer.parse(@data, self)
85
+ raise ParsingError, "Cannot locate xref section" if trailer.startxref.zero?
86
+
87
+ trailer.startxref
88
+ end
89
+
90
+ #
91
+ # In the LazyParser, the revisions are parsed by jumping through the cross-references (table or streams).
92
+ #
81
93
  def parse_revision(pdf, offset)
82
94
  raise ParsingError, "Invalid xref offset" if offset < 0 or offset >= @data.string.size
83
95
 
@@ -88,48 +100,73 @@ module Origami
88
100
 
89
101
  # Regular xref section.
90
102
  if @data.match?(/#{XRef::Section::TOKEN}/)
91
- xreftable = parse_xreftable
92
- raise ParsingError, "Cannot parse xref section" if xreftable.nil?
93
-
94
- revision.xreftable = xreftable
95
- revision.trailer = parse_trailer
96
-
97
- # Handle hybrid cross-references.
98
- if revision.trailer[:XRefStm].is_a?(Integer)
99
- begin
100
- offset = revision.trailer[:XRefStm].to_i
101
- xrefstm = parse_object(offset)
102
-
103
- if xrefstm.is_a?(XRefStream)
104
- revision.xrefstm = xrefstm
105
- else
106
- warn "Invalid xref stream at offset #{offset}"
107
- end
108
-
109
- rescue
110
- warn "Cannot parse xref stream at offset #{offset}"
111
- end
112
- end
103
+ parse_revision_from_xreftable(revision)
113
104
 
114
105
  # The xrefs are stored in a stream.
115
106
  else
116
- xrefstm = parse_object
117
- raise ParsingError, "Invalid xref stream" unless xrefstm.is_a?(XRefStream)
107
+ parse_revision_from_xrefstm(revision)
108
+ end
118
109
 
119
- revision.xrefstm = xrefstm
110
+ revision
111
+ end
120
112
 
121
- # Search for the trailer.
122
- if @data.skip_until Regexp.union(Trailer::XREF_TOKEN, *Trailer::TOKENS)
123
- @data.pos -= @data.matched_size
113
+ #
114
+ # Assume the current pointer is at the xreftable of the revision.
115
+ # We are expecting:
116
+ # - a regular xref table, starting with xref
117
+ # - a revision trailer
118
+ #
119
+ # The trailer may hold a XRefStm entry in case of hybrid references.
120
+ #
121
+ def parse_revision_from_xreftable(revision)
122
+ xreftable = parse_xreftable
123
+ raise ParsingError, "Cannot parse xref section" if xreftable.nil?
124
+
125
+ revision.xreftable = xreftable
126
+ revision.trailer = parse_trailer
127
+
128
+ # Handle hybrid cross-references.
129
+ if revision.trailer[:XRefStm].is_a?(Integer)
130
+ begin
131
+ offset = revision.trailer[:XRefStm].to_i
132
+ xrefstm = parse_object(offset)
133
+
134
+ if xrefstm.is_a?(XRefStream)
135
+ revision.xrefstm = xrefstm
136
+ else
137
+ warn "Invalid xref stream at offset #{offset}"
138
+ end
124
139
 
125
- revision.trailer = parse_trailer
126
- else
127
- warn "No trailer found."
128
- revision.trailer = Trailer.new
140
+ rescue
141
+ warn "Cannot parse xref stream at offset #{offset}"
129
142
  end
130
143
  end
144
+ end
131
145
 
132
- revision
146
+ #
147
+ # Assume the current pointer is at the xref stream of the revision.
148
+ #
149
+ # The XRefStream should normally be at the end of the revision.
150
+ # We scan after the object for a trailer token.
151
+ #
152
+ # The revision is allowed not to have a trailer, and the stream
153
+ # dictionary will be used as the trailer dictionary in that case.
154
+ #
155
+ def parse_revision_from_xrefstm(revision)
156
+ xrefstm = parse_object
157
+ raise ParsingError, "Invalid xref stream" unless xrefstm.is_a?(XRefStream)
158
+
159
+ revision.xrefstm = xrefstm
160
+
161
+ # Search for the trailer.
162
+ if @data.skip_until Regexp.union(Trailer::XREF_TOKEN, *Trailer::TOKENS)
163
+ @data.pos -= @data.matched_size
164
+
165
+ revision.trailer = parse_trailer
166
+ else
167
+ warn "No trailer found."
168
+ revision.trailer = Trailer.new
169
+ end
133
170
  end
134
171
  end
135
172
  end