origami 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (108) hide show
  1. data/COPYING.LESSER +165 -0
  2. data/README +77 -0
  3. data/VERSION +1 -0
  4. data/bin/config/pdfcop.conf.yml +237 -0
  5. data/bin/gui/about.rb +46 -0
  6. data/bin/gui/config.rb +132 -0
  7. data/bin/gui/file.rb +385 -0
  8. data/bin/gui/hexdump.rb +74 -0
  9. data/bin/gui/hexview.rb +91 -0
  10. data/bin/gui/imgview.rb +72 -0
  11. data/bin/gui/menu.rb +392 -0
  12. data/bin/gui/properties.rb +132 -0
  13. data/bin/gui/signing.rb +635 -0
  14. data/bin/gui/textview.rb +107 -0
  15. data/bin/gui/treeview.rb +409 -0
  16. data/bin/gui/walker.rb +282 -0
  17. data/bin/gui/xrefs.rb +79 -0
  18. data/bin/pdf2graph +121 -0
  19. data/bin/pdf2ruby +353 -0
  20. data/bin/pdfcocoon +104 -0
  21. data/bin/pdfcop +455 -0
  22. data/bin/pdfdecompress +104 -0
  23. data/bin/pdfdecrypt +95 -0
  24. data/bin/pdfencrypt +112 -0
  25. data/bin/pdfextract +221 -0
  26. data/bin/pdfmetadata +123 -0
  27. data/bin/pdfsh +13 -0
  28. data/bin/pdfwalker +7 -0
  29. data/bin/shell/.irbrc +104 -0
  30. data/bin/shell/console.rb +136 -0
  31. data/bin/shell/hexdump.rb +83 -0
  32. data/origami.rb +36 -0
  33. data/origami/3d.rb +239 -0
  34. data/origami/acroform.rb +321 -0
  35. data/origami/actions.rb +299 -0
  36. data/origami/adobe/fdf.rb +259 -0
  37. data/origami/adobe/ppklite.rb +489 -0
  38. data/origami/annotations.rb +775 -0
  39. data/origami/array.rb +187 -0
  40. data/origami/boolean.rb +101 -0
  41. data/origami/catalog.rb +486 -0
  42. data/origami/destinations.rb +213 -0
  43. data/origami/dictionary.rb +188 -0
  44. data/origami/docmdp.rb +96 -0
  45. data/origami/encryption.rb +1293 -0
  46. data/origami/export.rb +283 -0
  47. data/origami/file.rb +222 -0
  48. data/origami/filters.rb +250 -0
  49. data/origami/filters/ascii.rb +189 -0
  50. data/origami/filters/ccitt.rb +515 -0
  51. data/origami/filters/crypt.rb +47 -0
  52. data/origami/filters/dct.rb +61 -0
  53. data/origami/filters/flate.rb +112 -0
  54. data/origami/filters/jbig2.rb +63 -0
  55. data/origami/filters/jpx.rb +53 -0
  56. data/origami/filters/lzw.rb +195 -0
  57. data/origami/filters/predictors.rb +276 -0
  58. data/origami/filters/runlength.rb +117 -0
  59. data/origami/font.rb +209 -0
  60. data/origami/functions.rb +93 -0
  61. data/origami/graphics.rb +33 -0
  62. data/origami/graphics/colors.rb +191 -0
  63. data/origami/graphics/instruction.rb +126 -0
  64. data/origami/graphics/path.rb +154 -0
  65. data/origami/graphics/patterns.rb +180 -0
  66. data/origami/graphics/state.rb +164 -0
  67. data/origami/graphics/text.rb +224 -0
  68. data/origami/graphics/xobject.rb +493 -0
  69. data/origami/header.rb +90 -0
  70. data/origami/linearization.rb +318 -0
  71. data/origami/metadata.rb +114 -0
  72. data/origami/name.rb +170 -0
  73. data/origami/null.rb +75 -0
  74. data/origami/numeric.rb +188 -0
  75. data/origami/obfuscation.rb +233 -0
  76. data/origami/object.rb +527 -0
  77. data/origami/outline.rb +59 -0
  78. data/origami/page.rb +559 -0
  79. data/origami/parser.rb +268 -0
  80. data/origami/parsers/fdf.rb +45 -0
  81. data/origami/parsers/pdf.rb +27 -0
  82. data/origami/parsers/pdf/linear.rb +113 -0
  83. data/origami/parsers/ppklite.rb +86 -0
  84. data/origami/pdf.rb +1144 -0
  85. data/origami/reference.rb +113 -0
  86. data/origami/signature.rb +474 -0
  87. data/origami/stream.rb +575 -0
  88. data/origami/string.rb +416 -0
  89. data/origami/trailer.rb +173 -0
  90. data/origami/webcapture.rb +87 -0
  91. data/origami/xfa.rb +3027 -0
  92. data/origami/xreftable.rb +447 -0
  93. data/templates/patterns.rb +66 -0
  94. data/templates/widgets.rb +173 -0
  95. data/templates/xdp.rb +92 -0
  96. data/tests/dataset/test.dummycrt +28 -0
  97. data/tests/dataset/test.dummykey +27 -0
  98. data/tests/tc_actions.rb +32 -0
  99. data/tests/tc_annotations.rb +85 -0
  100. data/tests/tc_pages.rb +37 -0
  101. data/tests/tc_pdfattach.rb +24 -0
  102. data/tests/tc_pdfencrypt.rb +110 -0
  103. data/tests/tc_pdfnew.rb +32 -0
  104. data/tests/tc_pdfparse.rb +98 -0
  105. data/tests/tc_pdfsig.rb +37 -0
  106. data/tests/tc_streams.rb +129 -0
  107. data/tests/ts_pdf.rb +45 -0
  108. metadata +193 -0
@@ -0,0 +1,575 @@
1
+ =begin
2
+
3
+ = File
4
+ stream.rb
5
+
6
+ = Info
7
+ Origami is free software: you can redistribute it and/or modify
8
+ it under the terms of the GNU Lesser General Public License as published by
9
+ the Free Software Foundation, either version 3 of the License, or
10
+ (at your option) any later version.
11
+
12
+ Origami is distributed in the hope that it will be useful,
13
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
14
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
+ GNU Lesser General Public License for more details.
16
+
17
+ You should have received a copy of the GNU Lesser General Public License
18
+ along with Origami. If not, see <http://www.gnu.org/licenses/>.
19
+
20
+ =end
21
+
22
+ require 'strscan'
23
+
24
+ module Origami
25
+
26
+ class InvalidStreamObjectError < InvalidObjectError #:nodoc:
27
+ end
28
+
29
+ #
30
+ # Class representing a PDF Stream Object.
31
+ # Streams can be used to hold any kind of data, especially binary data.
32
+ #
33
+ class Stream
34
+
35
+ include Origami::Object
36
+ include StandardObject
37
+
38
+ TOKENS = [ "stream" + WHITECHARS_NORET + "\\r?\\n", "endstream" ] #:nodoc:
39
+
40
+ @@regexp_open = Regexp.new(WHITESPACES + TOKENS.first)
41
+ @@regexp_close = Regexp.new(TOKENS.last)
42
+
43
+ #
44
+ # Actually only 5 first ones are implemented, other ones are mainly about image data processing (JPEG, JPEG2000 ... )
45
+ #
46
+ @@defined_filters =
47
+ [
48
+ :ASCIIHexDecode,
49
+ :ASCII85Decode,
50
+ :LZWDecode,
51
+ :FlateDecode,
52
+ :RunLengthDecode,
53
+ # TODO
54
+ :CCITTFaxDecode,
55
+ :JBIG2Decode,
56
+ :DCTDecode,
57
+ :JPXDecode
58
+ ]
59
+
60
+ attr_accessor :dictionary
61
+
62
+ field :Length, :Type => Integer, :Required => true
63
+ field :Filter, :Type => [ Name, Array ]
64
+ field :DecodeParms, :Type => [ Dictionary, Array ]
65
+ field :F, :Type => Dictionary, :Version => "1.2"
66
+ field :FFilter, :Type => [ Name, Array ], :Version => "1.2"
67
+ field :FDecodeParms, :Type => [ Dictionary, Array ], :Version => "1.2"
68
+ field :DL, :Type => Integer, :Version => "1.5"
69
+
70
+ #
71
+ # Creates a new PDF Stream.
72
+ # _data_:: The Stream uncompressed data.
73
+ # _dictionary_:: A hash representing the Stream attributes.
74
+ #
75
+ def initialize(data = "", dictionary = {})
76
+ super()
77
+
78
+ set_indirect(true)
79
+
80
+ @dictionary, @data = Dictionary.new(dictionary), data
81
+ @dictionary.parent = self
82
+ end
83
+
84
+ def pre_build
85
+ encode!
86
+
87
+ super
88
+ end
89
+
90
+ def post_build
91
+ self.Length = @rawdata.length
92
+
93
+ super
94
+ end
95
+
96
+ def self.parse(stream) #:nodoc:
97
+
98
+ dictionary = Dictionary.parse(stream)
99
+ return dictionary if not stream.skip(@@regexp_open)
100
+
101
+ length = dictionary[:Length]
102
+ if not length.is_a?(Integer)
103
+ rawdata = stream.scan_until(@@regexp_close)
104
+ if rawdata.nil?
105
+ raise InvalidStreamObjectError,
106
+ "Stream shall end with a 'endstream' statement"
107
+ end
108
+ else
109
+ length = length.value
110
+ rawdata = stream.peek(length)
111
+ stream.pos += length
112
+
113
+ if not ( unmatched = stream.scan_until(@@regexp_close) )
114
+ raise InvalidStreamObjectError,
115
+ "Stream shall end with a 'endstream' statement"
116
+ end
117
+
118
+ rawdata << unmatched
119
+ end
120
+
121
+ stm =
122
+ if Origami::OPTIONS[:enable_type_guessing]
123
+ type, subtype = dictionary[:Type], dictionary[:Subtype]
124
+
125
+ if type.is_a?(Name)
126
+ if @@stm_special_types.include?(type.value)
127
+ @@stm_special_types[type.value].new("", dictionary.to_h)
128
+ else
129
+ if type == :XObject and subtype.is_a?(Name) and @@stm_xobj_subtypes.include?(subtype.value)
130
+ @@stm_xobj_subtypes[subtype.value].new("", dictionary.to_h)
131
+ else
132
+ Stream.new('', dictionary.to_h)
133
+ end
134
+ end
135
+
136
+ else
137
+ Stream.new('', dictionary.to_h)
138
+ end
139
+
140
+ else
141
+ Stream.new('', dictionary.to_h)
142
+ end
143
+
144
+ rawdata.chomp!(TOKENS.last)
145
+
146
+ if rawdata[-1,1] == "\n"
147
+ if rawdata[-2,1] == "\r"
148
+ rawdata = rawdata[0, rawdata.size - 2]
149
+ else
150
+ rawdata = rawdata[0, rawdata.size - 1]
151
+ end
152
+ end
153
+ #rawdata.chomp! if length.is_a?(Integer) and length < rawdata.length
154
+
155
+ stm.rawdata = rawdata
156
+ stm.file_offset = dictionary.file_offset
157
+
158
+ stm
159
+ end
160
+
161
+ def set_predictor(predictor, colors = 1, bitspercomponent = 8, columns = 1)
162
+
163
+ filters = self.Filter
164
+ filters = [ filters ] unless filters.is_a?(::Array)
165
+
166
+ if not filters.include?(:FlateDecode) and not filters.include?(:LZWDecode)
167
+ raise InvalidStreamObjectError, 'Predictor functions can only be used with Flate or LZW filters'
168
+ end
169
+
170
+ layer = filters.index(:FlateDecode) or filters.index(:LZWDecode)
171
+
172
+ params = Filter::LZW::DecodeParms.new
173
+ params[:Predictor] = predictor
174
+ params[:Colors] = colors if colors != 1
175
+ params[:BitsPerComponent] = bitspercomponent if bitspercomponent != 8
176
+ params[:Columns] = columns if columns != 1
177
+
178
+ set_decode_params(layer, params)
179
+
180
+ self
181
+ end
182
+
183
+ def value #:nodoc:
184
+ self
185
+ end
186
+
187
+ #
188
+ # Returns the uncompressed stream content.
189
+ #
190
+ def data
191
+ self.decode! if @data.nil?
192
+
193
+ @data
194
+ end
195
+
196
+ #
197
+ # Sets the uncompressed stream content.
198
+ # _str_:: The new uncompressed data.
199
+ #
200
+ def data=(str)
201
+ @rawdata = nil
202
+ @data = str
203
+ end
204
+
205
+ #
206
+ # Returns the raw compressed stream content.
207
+ #
208
+ def rawdata
209
+ self.encode! if @rawdata.nil?
210
+
211
+ @rawdata
212
+ end
213
+
214
+ #
215
+ # Sets the raw compressed stream content.
216
+ # _str_:: the new raw data.
217
+ #
218
+ def rawdata=(str)
219
+ @rawdata = str
220
+ @data = nil
221
+ end
222
+
223
+ #
224
+ # Uncompress the stream data.
225
+ #
226
+ def decode!
227
+ self.decrypt! if self.is_a?(Encryption::EncryptedStream)
228
+
229
+ unless is_decoded?
230
+ filters = self.Filter
231
+
232
+ if filters.nil?
233
+ @data = @rawdata.dup
234
+ else
235
+ case filters
236
+ when Array, Name then
237
+ dparams = self.DecodeParms || []
238
+
239
+ dparams = [ dparams ] unless dparams.is_a?(::Array)
240
+ filters = [ filters ] unless filters.is_a?(::Array)
241
+
242
+ @data = @rawdata.dup
243
+ filters.length.times do |layer|
244
+ params = dparams[layer].is_a?(Dictionary) ? dparams[layer] : {}
245
+ filter = filters[layer]
246
+
247
+ @data = decode_data(@data, filter, params)
248
+ end
249
+ else
250
+ raise InvalidStreamObjectError, "Invalid Filter type parameter"
251
+ end
252
+ end
253
+ end
254
+
255
+ self
256
+ end
257
+
258
+ #
259
+ # Compress the stream data.
260
+ #
261
+ def encode!
262
+
263
+ unless is_encoded?
264
+ filters = self.Filter
265
+
266
+ if filters.nil?
267
+ @rawdata = @data.dup
268
+ else
269
+ case filters
270
+ when Array, Name then
271
+ dparams = self.DecodeParms || []
272
+
273
+ dparams = [ dparams ] unless dparams.is_a?(::Array)
274
+ filters = [ filters ] unless filters.is_a?(::Array)
275
+
276
+ @rawdata = @data.dup
277
+ (filters.length - 1).downto(0) do |layer|
278
+ params = dparams[layer].is_a?(Dictionary) ? dparams[layer] : {}
279
+ filter = filters[layer]
280
+
281
+ @rawdata = encode_data(@rawdata, filter, params)
282
+ end
283
+ else
284
+ raise InvalidStreamObjectError, "Invalid filter type parameter"
285
+ end
286
+ end
287
+
288
+ self.Length = @rawdata.length
289
+ end
290
+
291
+ self
292
+ end
293
+
294
+ def to_s(indent = 1) #:nodoc:
295
+
296
+ content = ""
297
+
298
+ content << @dictionary.to_s(indent)
299
+ content << "stream" + EOL
300
+ content << self.rawdata
301
+ content << EOL << TOKENS.last
302
+
303
+ super(content)
304
+ end
305
+
306
+ def [](key) #:nodoc:
307
+ @dictionary[key]
308
+ end
309
+
310
+ def []=(key,val) #:nodoc:
311
+ @dictionary[key] = val
312
+ end
313
+
314
+ def each_key(&b) #:nodoc:
315
+ @dictionary.each_key(&b)
316
+ end
317
+
318
+ def real_type ; Stream end
319
+
320
+ private
321
+
322
+ def is_decoded? #:nodoc:
323
+ not @data.nil?
324
+ end
325
+
326
+ def is_encoded? #:nodoc:
327
+ not @rawdata.nil?
328
+ end
329
+
330
+ def set_decode_params(layer, params) #:nodoc:
331
+ dparms = self.DecodeParms
332
+ unless dparms.is_a? ::Array
333
+ @dictionary[:DecodeParms] = dparms = []
334
+ end
335
+
336
+ if layer > dparms.length - 1
337
+ dparms.concat(::Array.new(layer - dparms.length + 1, Null.new))
338
+ end
339
+
340
+ dparms[layer] = params
341
+ @dictionary[:DecodeParms] = dparms.first if dparms.length == 1
342
+
343
+ self
344
+ end
345
+
346
+ def decode_data(data, filter, params) #:nodoc:
347
+ unless @@defined_filters.include?(filter.value)
348
+ raise InvalidStreamObjectError, "Unknown filter : #{filter}"
349
+ end
350
+
351
+ begin
352
+ Origami::Filter.const_get(filter.value.to_s.sub(/Decode$/,"")).decode(data, params)
353
+
354
+ rescue Filter::InvalidFlateDataError => flate_e
355
+ return flate_e.zlib_stream.flush_next_out
356
+
357
+ rescue Exception => e
358
+ raise InvalidStreamObjectError, "Error while decoding stream #{self.reference}\n\t-> [#{e.class}] #{e.message}"
359
+ end
360
+ end
361
+
362
+ def encode_data(data, filter, params) #:nodoc:
363
+ unless @@defined_filters.include?(filter.value)
364
+ raise InvalidStreamObjectError, "Unknown filter : #{filter}"
365
+ end
366
+
367
+ encoded = Origami::Filter.const_get(filter.value.to_s.sub(/Decode$/,"")).encode(data, params)
368
+
369
+ if filter.value == :ASCIIHexDecode or filter.value == :ASCII85Decode
370
+ encoded << Origami::Filter.const_get(filter.value.to_s.sub(/Decode$/,""))::EOD
371
+ end
372
+
373
+ encoded
374
+ end
375
+
376
+ end
377
+
378
+ #
379
+ # Class representing an external Stream.
380
+ #
381
+ class ExternalStream < Stream
382
+
383
+ def initialize(filespec, hash = {})
384
+
385
+ hash[:F] = filespec
386
+ super('', hash)
387
+ end
388
+
389
+ end
390
+
391
+ class InvalidObjectStreamObjectError < InvalidStreamObjectError #:nodoc:
392
+ end
393
+
394
+ #
395
+ # Class representing a Stream containing other Objects.
396
+ #
397
+ class ObjectStream < Stream
398
+
399
+ include Enumerable
400
+
401
+ NUM = 0 #:nodoc:
402
+ OBJ = 1 #:nodoc:
403
+
404
+ field :Type, :Type => Name, :Default => :ObjStm, :Required => true, :Version => "1.5"
405
+ field :N, :Type => Integer, :Required => true
406
+ field :First, :Type => Integer, :Required => true
407
+ field :Extends, :Type => Stream
408
+
409
+ #
410
+ # Creates a new Object Stream.
411
+ # _dictionary_:: A hash of attributes to set to the Stream.
412
+ # _rawdata_:: The Stream data.
413
+ #
414
+ def initialize(rawdata = "", dictionary = {})
415
+ @objects = nil
416
+
417
+ super(rawdata, dictionary)
418
+ end
419
+
420
+ def pre_build #:nodoc:
421
+ load! if @objects.nil?
422
+
423
+ prolog = ""
424
+ data = ""
425
+ objoff = 0
426
+ @objects.to_a.sort.each do |num,obj|
427
+
428
+ obj.set_indirect(false)
429
+ obj.objstm_offset = objoff
430
+
431
+ prolog << "#{num} #{objoff} "
432
+ objdata = "#{obj.to_s} "
433
+
434
+ objoff += objdata.size
435
+ data << objdata
436
+ obj.set_indirect(true)
437
+ end
438
+
439
+ @data = prolog + data
440
+
441
+ @dictionary[:N] = @objects.size
442
+ @dictionary[:First] = prolog.size
443
+
444
+ super
445
+ end
446
+
447
+ #
448
+ # Adds a new Object to this Stream.
449
+ # _object_:: The Object to append.
450
+ #
451
+ def <<(object)
452
+ unless object.generation == 0
453
+ raise InvalidObjectError, "Cannot store an object with generation > 0 in an ObjectStream"
454
+ end
455
+
456
+ if object.is_a?(Stream)
457
+ raise InvalidObjectError, "Cannot store a Stream in an ObjectStream"
458
+ end
459
+
460
+ load! if @objects.nil?
461
+
462
+ object.no, object.generation = @pdf.alloc_new_object_number if object.no == 0
463
+
464
+ object.set_indirect(true) # object is indirect
465
+ object.parent = self # set this stream as the parent
466
+ object.set_pdf(@pdf) # indirect objects need pdf information
467
+ @objects[object.no] = object
468
+
469
+ Reference.new(object.no, 0)
470
+ end
471
+ alias :insert :<<
472
+
473
+ #
474
+ # Deletes Object _no_.
475
+ #
476
+ def delete(no)
477
+ load! if @objects.nil?
478
+
479
+ @objects.delete(no)
480
+ end
481
+
482
+ #
483
+ # Returns the index of Object _no_.
484
+ #
485
+ def index(no)
486
+ ind = 0
487
+ @objects.to_a.sort.each { |num, obj|
488
+ return ind if num == no
489
+
490
+ ind = ind + 1
491
+ }
492
+
493
+ nil
494
+ end
495
+
496
+ #
497
+ # Returns a given decompressed object contained in the Stream.
498
+ # _no_:: The Object number.
499
+ #
500
+ def extract(no)
501
+ load! if @objects.nil?
502
+
503
+ @objects[no]
504
+ end
505
+
506
+ #
507
+ # Returns a given decompressed object by index.
508
+ # _index_:: The Object index in the ObjectStream.
509
+ #
510
+ def extract_by_index(index)
511
+ load! if @objects.nil?
512
+
513
+ @objects.to_a.sort[index]
514
+ end
515
+
516
+ #
517
+ # Returns whether a specific object is contained in this stream.
518
+ # _no_:: The Object number.
519
+ #
520
+ def include?(no)
521
+ load! if @objects.nil?
522
+
523
+ @objects.include?(no)
524
+ end
525
+
526
+ #
527
+ # Iterates over each object in the stream.
528
+ #
529
+ def each(&b)
530
+ load! if @objects.nil?
531
+
532
+ @objects.values.each(&b)
533
+ end
534
+
535
+ #
536
+ # Returns the array of inner objects.
537
+ #
538
+ def objects
539
+ load! if @objects.nil?
540
+
541
+ @objects.values
542
+ end
543
+
544
+ private
545
+
546
+ def load! #:nodoc:
547
+ decode!
548
+
549
+ data = StringScanner.new(@data)
550
+ nums = []
551
+ offsets = []
552
+
553
+ @dictionary[:N].to_i.times do
554
+ nums << Integer.parse(data).to_i
555
+ offsets << Integer.parse(data)
556
+ end
557
+
558
+ @objects = {}
559
+ nums.size.times do |i|
560
+ type = Object.typeof(data)
561
+ raise InvalidObjectStreamObjectError,
562
+ "Bad embedded object format in object stream" if type.nil?
563
+
564
+ embeddedobj = type.parse(data)
565
+ embeddedobj.set_indirect(true) # object is indirect
566
+ embeddedobj.no = nums[i] # object number
567
+ embeddedobj.parent = self # set this stream as the parent
568
+ embeddedobj.set_pdf(@pdf) # indirect objects need pdf information
569
+ embeddedobj.objstm_offset = offsets[i]
570
+ @objects[nums[i]] = embeddedobj
571
+ end
572
+
573
+ end
574
+ end
575
+ end