pdf-reader 1.1.1 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG CHANGED
@@ -1,3 +1,11 @@
1
+ v1.2.0 (28th AUgust 2012)
2
+ - Feature: correctly extract text using surrogate pairs and ligatures
3
+ (thanks Nathaniel Madura)
4
+ - Speed optimisation: cache tokenised Form XObjects to avoid re-parsing them
5
+ - Feature: support opening documents with some junk bytes prepended to file
6
+ (thanks Paul Gallagher)
7
+ - Acrobat does this, so it seemed reasonable to add support
8
+
1
9
  v1.1.1 (9th May 2012)
2
10
  - bugfix release to improve parsing of some PDFs
3
11
 
@@ -1,8 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'rubygems'
4
- $LOAD_PATH.unshift(File.dirname(__FILE__) + "/../lib")
5
-
6
4
  require 'pdf/reader'
7
5
 
8
6
  if ARGV.empty?
@@ -86,14 +86,15 @@ module ExtractImages
86
86
  tiff = header.dup
87
87
  tiff << short_tag.call( 256, 1, w ) # image width
88
88
  tiff << short_tag.call( 257, 1, h ) # image height
89
- tiff << long_tag.call( 258, 4, (header.size + (tag_count*12))) # bits per pixel
89
+ tiff << long_tag.call( 258, 4, (header.size + (tag_count*12) + 4)) # bits per pixel
90
90
  tiff << short_tag.call( 259, 1, 1 ) # compression
91
91
  tiff << short_tag.call( 262, 1, 5 ) # colorspace - separation
92
- tiff << long_tag.call( 273, 1, (10 + (tag_count*12) + 16) ) # data offset
92
+ tiff << long_tag.call( 273, 1, (10 + (tag_count*12) + 20) ) # data offset
93
93
  tiff << short_tag.call( 277, 1, 4 ) # samples per pixel
94
94
  tiff << long_tag.call( 279, 1, stream.unfiltered_data.size) # data byte size
95
95
  tiff << short_tag.call( 284, 1, 1 ) # planer config
96
96
  tiff << long_tag.call( 332, 1, 1) # inkset - CMYK
97
+ tiff << [0].pack("I") # next IFD pointer
97
98
  tiff << [bpc, bpc, bpc, bpc].pack("IIII")
98
99
  tiff << stream.unfiltered_data
99
100
  File.open(filename, "wb") { |file| file.write tiff }
@@ -119,10 +120,12 @@ module ExtractImages
119
120
  tiff << short_tag.call( 258, 1, 8 ) # bits per pixel
120
121
  tiff << short_tag.call( 259, 1, 1 ) # compression
121
122
  tiff << short_tag.call( 262, 1, 1 ) # colorspace - grayscale
122
- tiff << long_tag.call( 273, 1, (10 + (tag_count*12)) ) # data offset
123
+ tiff << long_tag.call( 273, 1, (10 + (tag_count*12) + 4) ) # data offset
123
124
  tiff << short_tag.call( 277, 1, 1 ) # samples per pixel
124
125
  tiff << long_tag.call( 279, 1, stream.unfiltered_data.size) # data byte size
125
126
  tiff << short_tag.call( 284, 1, 1 ) # planer config
127
+ tiff << [0].pack("I") # next IFD pointer
128
+ p stream.unfiltered_data.size
126
129
  tiff << stream.unfiltered_data
127
130
  File.open(filename, "wb") { |file| file.write tiff }
128
131
  end
@@ -144,12 +147,13 @@ module ExtractImages
144
147
  tiff = header.dup
145
148
  tiff << short_tag.call( 256, 1, w ) # image width
146
149
  tiff << short_tag.call( 257, 1, h ) # image height
147
- tiff << long_tag.call( 258, 3, (header.size + (tag_count*12))) # bits per pixel
150
+ tiff << long_tag.call( 258, 3, (header.size + (tag_count*12) + 4)) # bits per pixel
148
151
  tiff << short_tag.call( 259, 1, 1 ) # compression
149
152
  tiff << short_tag.call( 262, 1, 2 ) # colorspace - RGB
150
- tiff << long_tag.call( 273, 1, (header.size + (tag_count*12) + 12) ) # data offset
153
+ tiff << long_tag.call( 273, 1, (header.size + (tag_count*12) + 16) ) # data offset
151
154
  tiff << short_tag.call( 277, 1, 3 ) # samples per pixel
152
155
  tiff << long_tag.call( 279, 1, stream.unfiltered_data.size) # data byte size
156
+ tiff << [0].pack("I") # next IFD pointer
153
157
  tiff << [bpc, bpc, bpc].pack("III")
154
158
  tiff << stream.unfiltered_data
155
159
  File.open(filename, "wb") { |file| file.write tiff }
@@ -209,8 +213,9 @@ module ExtractImages
209
213
  + short_tag.call( 256, cols ) \
210
214
  + short_tag.call( 257, h ) \
211
215
  + short_tag.call( 259, 4 ) \
212
- + long_tag.call( 273, (10 + (5*12)) ) \
216
+ + long_tag.call( 273, (10 + (5*12) + 4) ) \
213
217
  + long_tag.call( 279, len) \
218
+ + [0].pack("I") \
214
219
  + stream.data
215
220
  File.open(filename, "wb") { |file| file.write tiff }
216
221
  end
@@ -25,9 +25,6 @@
25
25
  ################################################################################
26
26
 
27
27
  require 'stringio'
28
- require 'zlib'
29
-
30
- require 'ascii85'
31
28
 
32
29
  module PDF
33
30
  ################################################################################
@@ -113,6 +110,8 @@ module PDF
113
110
  #
114
111
  def initialize(input = nil, opts = {})
115
112
  if input # support the deprecated Reader API
113
+ @cache = PDF::Reader::ObjectCache.new
114
+ opts.merge!(:cache => @cache)
116
115
  @objects = PDF::Reader::ObjectHash.new(input, opts)
117
116
  end
118
117
  end
@@ -222,7 +221,7 @@ module PDF
222
221
  #
223
222
  def pages
224
223
  (1..self.page_count).map { |num|
225
- PDF::Reader::Page.new(@objects, num)
224
+ PDF::Reader::Page.new(@objects, num, :cache => @cache)
226
225
  }
227
226
  end
228
227
 
@@ -241,7 +240,7 @@ module PDF
241
240
  def page(num)
242
241
  num = num.to_i
243
242
  raise ArgumentError, "valid pages are 1 .. #{self.page_count}" if num < 1 || num > self.page_count
244
- PDF::Reader::Page.new(@objects, num)
243
+ PDF::Reader::Page.new(@objects, num, :cache => @cache)
245
244
  end
246
245
 
247
246
 
@@ -338,6 +337,13 @@ require 'pdf/reader/cmap'
338
337
  require 'pdf/reader/encoding'
339
338
  require 'pdf/reader/error'
340
339
  require 'pdf/reader/filter'
340
+ require 'pdf/reader/filter/ascii85'
341
+ require 'pdf/reader/filter/ascii_hex'
342
+ require 'pdf/reader/filter/depredict'
343
+ require 'pdf/reader/filter/flate'
344
+ require 'pdf/reader/filter/lzw'
345
+ require 'pdf/reader/filter/null'
346
+ require 'pdf/reader/filter/run_length'
341
347
  require 'pdf/reader/font'
342
348
  require 'pdf/reader/form_xobject'
343
349
  require 'pdf/reader/glyph_hash'
@@ -36,7 +36,7 @@ class PDF::Reader
36
36
  # the raw tokens into objects we can work with (strings, ints, arrays, etc)
37
37
  #
38
38
  class Buffer
39
- TOKEN_WHITESPACE=["\x00", "\x09", "\x0A", "\x0C", "\x0D", "\x20"]
39
+ TOKEN_WHITESPACE=[0x00, 0x09, 0x0A, 0x0C, 0x0D, 0x20]
40
40
 
41
41
  attr_reader :pos
42
42
 
@@ -232,18 +232,17 @@ class PDF::Reader
232
232
  finished = false
233
233
 
234
234
  while !finished
235
- chr = @io.read(1)
236
- codepoint = chr.to_s.unpack("C*").first
237
- if chr.nil?
235
+ byte = @io.getbyte
236
+ if byte.nil?
238
237
  finished = true # unbalanced params
239
- elsif (48..57).include?(codepoint) || (65..90).include?(codepoint) || (97..122).include?(codepoint)
240
- str << chr
241
- elsif codepoint <= 32
238
+ elsif (48..57).include?(byte) || (65..90).include?(byte) || (97..122).include?(byte)
239
+ str << byte.chr
240
+ elsif byte <= 32
242
241
  # ignore it
243
242
  else
244
243
  @tokens << str if str.size > 0
245
- @tokens << ">" if chr != ">"
246
- @tokens << chr
244
+ @tokens << ">" if byte != 0x3E # '>'
245
+ @tokens << byte.chr
247
246
  finished = true
248
247
  end
249
248
  end
@@ -263,19 +262,19 @@ class PDF::Reader
263
262
  count = 1
264
263
 
265
264
  while count > 0
266
- chr = @io.read(1)
267
- if chr.nil?
265
+ byte = @io.getbyte
266
+ if byte.nil?
268
267
  count = 0 # unbalanced params
269
- elsif chr == "\x5c"
270
- str << chr << @io.read(1).to_s
271
- elsif chr == "("
268
+ elsif byte == 0x5C
269
+ str << byte.chr << @io.getbyte.chr
270
+ elsif byte == 0x28 # "("
272
271
  str << "("
273
272
  count += 1
274
- elsif chr == ")"
273
+ elsif byte == 0x29 # ")"
275
274
  count -= 1
276
275
  str << ")" unless count == 0
277
276
  else
278
- str << chr unless count == 0
277
+ str << byte.chr unless count == 0
279
278
  end
280
279
  end
281
280
 
@@ -291,61 +290,68 @@ class PDF::Reader
291
290
  def prepare_regular_token
292
291
  tok = ""
293
292
 
294
- while chr = @io.read(1)
295
- case chr
296
- when "\x25"
293
+ while byte = @io.getbyte
294
+ case byte
295
+ when 0x25
297
296
  # comment, ignore everything until the next EOL char
298
297
  done = false
299
298
  while !done
300
- chr = @io.read(1)
301
- done = true if chr.nil? || chr == "\x0A" || chr == "\x0D"
299
+ byte = @io.getbyte
300
+ done = true if byte.nil? || byte == 0x0A || byte == 0x0D
302
301
  end
303
302
  when *TOKEN_WHITESPACE
304
303
  # white space, token finished
305
304
  @tokens << tok if tok.size > 0
306
305
 
307
306
  #If the token was empty, chomp the rest of the whitespace too
308
- while TOKEN_WHITESPACE.include?(peek_char) && tok.size == 0
309
- @io.read(1)
307
+ while TOKEN_WHITESPACE.include?(peek_byte) && tok.size == 0
308
+ @io.getbyte
310
309
  end
311
310
  tok = ""
312
311
  break
313
- when "\x3C"
312
+ when 0x3C
314
313
  # opening delimiter '<', start of new token
315
314
  @tokens << tok if tok.size > 0
316
- chr << @io.read(1) if peek_char == "\x3C" # check if token is actually '<<'
317
- @tokens << chr
315
+ if peek_byte == 0x3C # check if token is actually '<<'
316
+ @io.getbyte
317
+ @tokens << "<<"
318
+ else
319
+ @tokens << "<"
320
+ end
318
321
  tok = ""
319
322
  break
320
- when "\x3E"
323
+ when 0x3E
321
324
  # closing delimiter '>', start of new token
322
325
  @tokens << tok if tok.size > 0
323
- chr << @io.read(1) if peek_char == "\x3E" # check if token is actually '>>'
324
- @tokens << chr
326
+ if peek_byte == 0x3E # check if token is actually '>>'
327
+ @io.getbyte
328
+ @tokens << ">>"
329
+ else
330
+ @tokens << byte.chr
331
+ end
325
332
  tok = ""
326
333
  break
327
- when "\x28", "\x5B", "\x7B"
334
+ when 0x28, 0x5B, 0x7B
328
335
  # opening delimiter, start of new token
329
336
  @tokens << tok if tok.size > 0
330
- @tokens << chr
337
+ @tokens << byte.chr
331
338
  tok = ""
332
339
  break
333
- when "\x29", "\x5D", "\x7D"
340
+ when 0x29, 0x5D, 0x7D
334
341
  # closing delimiter
335
342
  @tokens << tok if tok.size > 0
336
- @tokens << chr
343
+ @tokens << byte.chr
337
344
  tok = ""
338
345
  break
339
- when "\x2F"
346
+ when 0x2F
340
347
  # PDF name, start of new token
341
348
  @tokens << tok if tok.size > 0
342
- @tokens << chr
343
- next_char = peek_char
344
- @tokens << "" if chr == "/" && [nil, " ", "\n"].include?(next_char)
349
+ @tokens << byte.chr
350
+ @tokens << "" if byte == 0x2F && [nil, 0x20, 0x0A].include?(peek_byte)
345
351
  tok = ""
346
352
  break
347
353
  else
348
- tok << chr
354
+ tok << byte.chr
349
355
  end
350
356
  end
351
357
 
@@ -355,10 +361,10 @@ class PDF::Reader
355
361
  # peek at the next character in the io stream, leaving the stream position
356
362
  # untouched
357
363
  #
358
- def peek_char
359
- chr = @io.read(1)
360
- @io.seek(-1, IO::SEEK_CUR) unless chr.nil?
361
- chr
364
+ def peek_byte
365
+ byte = @io.getbyte
366
+ @io.seek(-1, IO::SEEK_CUR) if byte
367
+ byte
362
368
  end
363
369
 
364
370
  # for a handful of tokens we want to tell the parser how to convert them
@@ -60,6 +60,10 @@ class PDF::Reader
60
60
  @map.size
61
61
  end
62
62
 
63
+ # Convert a glyph code into one or more Codepoints.
64
+ #
65
+ # Returns an array of Fixnums.
66
+ #
63
67
  def decode(c)
64
68
  # TODO: implement the conversion
65
69
  return c unless c.class == Fixnum
@@ -74,12 +78,23 @@ class PDF::Reader
74
78
  end
75
79
 
76
80
  def str_to_int(str)
77
- return nil if str.nil? || str.size == 0 || str.size >= 3
78
-
79
- if str.size == 1
80
- str.unpack("C*")[0]
81
+ return nil if str.nil? || str.size == 0
82
+ unpacked_string = if str.size == 1 # UTF-8
83
+ str.unpack("C*")
84
+ else # UTF-16
85
+ str.unpack("n*")
86
+ end
87
+ if unpacked_string.length == 1
88
+ unpacked_string
89
+ elsif unpacked_string.length == 2 && (unpacked_string[0] > 0xD800 && unpacked_string[0] < 0xDBFF)
90
+ # this is a Unicode UTF-16 "Surrogate Pair" see Unicode Spec. Chapter 3.7
91
+ # lets convert to a UTF-32. (the high bit is between 0xD800-0xDBFF, the
92
+ # low bit is between 0xDC00-0xDFFF) for example: U+1D44E (U+D835 U+DC4E)
93
+ [(unpacked_string[0] - 0xD800) * 0x400 + (unpacked_string[1] - 0xDC00) + 0x10000]
81
94
  else
82
- str.unpack("n*")[0]
95
+ # it is a bad idea to just return the first 16 bits, as this doesn't allow
96
+ # for ligatures for example fi (U+0066 U+0069)
97
+ unpacked_string
83
98
  end
84
99
  end
85
100
 
@@ -88,7 +103,7 @@ class PDF::Reader
88
103
  find = str_to_int(parser.parse_token)
89
104
  replace = str_to_int(parser.parse_token)
90
105
  while find && replace
91
- @map[find] = replace
106
+ @map[find[0]] = replace
92
107
  find = str_to_int(parser.parse_token)
93
108
  replace = str_to_int(parser.parse_token)
94
109
  end
@@ -114,21 +129,21 @@ class PDF::Reader
114
129
  end
115
130
 
116
131
  def bfrange_type_one(start_code, end_code, dst)
117
- start_code = str_to_int(start_code)
118
- end_code = str_to_int(end_code)
132
+ start_code = str_to_int(start_code)[0]
133
+ end_code = str_to_int(end_code)[0]
119
134
  dst = str_to_int(dst)
120
135
 
121
136
  # add all values in the range to our mapping
122
137
  (start_code..end_code).each_with_index do |val, idx|
123
- @map[val] = dst + idx
138
+ @map[val] = dst.length == 1 ? [dst[0] + idx] : [dst[0], dst[1] + 1]
124
139
  # ensure a single range does not exceed 255 chars
125
140
  raise PDF::Reader::MalformedPDFError, "a CMap bfrange cann't exceed 255 chars" if idx > 255
126
141
  end
127
142
  end
128
143
 
129
144
  def bfrange_type_two(start_code, end_code, dst)
130
- start_code = str_to_int(start_code)
131
- end_code = str_to_int(end_code)
145
+ start_code = str_to_int(start_code)[0]
146
+ end_code = str_to_int(end_code)[0]
132
147
  from_range = (start_code..end_code)
133
148
 
134
149
  # add all values in the range to our mapping
@@ -22,7 +22,6 @@
22
22
  # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
23
  #
24
24
  ################################################################################
25
- require 'zlib'
26
25
 
27
26
  class PDF::Reader
28
27
  ################################################################################
@@ -30,7 +29,7 @@ class PDF::Reader
30
29
  # support for features like compression and encryption. This class is for decoding that
31
30
  # content.
32
31
  #
33
- class Filter # :nodoc:
32
+ module Filter # :nodoc:
34
33
 
35
34
  ################################################################################
36
35
  # creates a new filter for decoding content.
@@ -38,242 +37,20 @@ class PDF::Reader
38
37
  # Filters that are only used to encode image data are accepted, but the data is
39
38
  # returned untouched. At this stage PDF::Reader has no need to decode images.
40
39
  #
41
- def initialize (name, options = nil)
42
- @options = options
43
-
40
+ def self.with(name, options = {})
44
41
  case name.to_sym
45
- when :ASCII85Decode then @filter = :ascii85
46
- when :ASCIIHexDecode then @filter = :asciihex
47
- when :CCITTFaxDecode then @filter = nil
48
- when :DCTDecode then @filter = nil
49
- when :FlateDecode then @filter = :flate
50
- when :JBIG2Decode then @filter = nil
51
- when :JPXDecode then @filter = nil
52
- when :LZWDecode then @filter = :lzw
53
- when :RunLengthDecode then @filter = :runlength
42
+ when :ASCII85Decode then PDF::Reader::Filter::Ascii85.new(options)
43
+ when :ASCIIHexDecode then PDF::Reader::Filter::AsciiHex.new(options)
44
+ when :CCITTFaxDecode then PDF::Reader::Filter::Null.new(options)
45
+ when :DCTDecode then PDF::Reader::Filter::Null.new(options)
46
+ when :FlateDecode then PDF::Reader::Filter::Flate.new(options)
47
+ when :JBIG2Decode then PDF::Reader::Filter::Null.new(options)
48
+ when :JPXDecode then PDF::Reader::Filter::Null.new(options)
49
+ when :LZWDecode then PDF::Reader::Filter::Lzw.new(options)
50
+ when :RunLengthDecode then PDF::Reader::Filter::RunLength.new(options)
54
51
  else
55
52
  raise UnsupportedFeatureError, "Unknown filter: #{name}"
56
53
  end
57
54
  end
58
- ################################################################################
59
- # attempts to decode the specified data with the current filter
60
- #
61
- # Filters that are only used to encode image data are accepted, but the data is
62
- # returned untouched. At this stage PDF::Reader has no need to decode images.
63
- #
64
- def filter (data)
65
- # leave the data untouched if we don't support the required filter
66
- return data if @filter.nil?
67
-
68
- # decode the data
69
- self.send(@filter, data)
70
- end
71
- ################################################################################
72
- # Decode the specified data using the Ascii85 algorithm. Relies on the AScii85
73
- # rubygem.
74
- #
75
- def ascii85(data)
76
- data = "<~#{data}" unless data.to_s[0,2] == "<~"
77
- Ascii85::decode(data)
78
- rescue Exception => e
79
- # Oops, there was a problem decoding the stream
80
- raise MalformedPDFError, "Error occured while decoding an ASCII85 stream (#{e.class.to_s}: #{e.to_s})"
81
- end
82
- ################################################################################
83
- # Decode the specified data using the AsciiHex algorithm.
84
- #
85
- def asciihex(data)
86
- data.chop! if data[-1,1] == ">"
87
- data = data[1,data.size] if data[0,1] == "<"
88
- data.gsub!(/[^A-Fa-f0-9]/,"")
89
- data << "0" if data.size % 2 == 1
90
- data.scan(/.{2}/).map { |s| s.hex.chr }.join("")
91
- rescue Exception => e
92
- # Oops, there was a problem decoding the stream
93
- raise MalformedPDFError, "Error occured while decoding an ASCIIHex stream (#{e.class.to_s}: #{e.to_s})"
94
- end
95
- ################################################################################
96
- # Decode the specified data with the Zlib compression algorithm
97
- def flate (data)
98
- deflated = nil
99
- begin
100
- deflated = Zlib::Inflate.new.inflate(data)
101
- rescue Zlib::DataError => e
102
- # by default, Ruby's Zlib assumes the data it's inflating
103
- # is RFC1951 deflated data, wrapped in a RFC1951 zlib container.
104
- # If that fails, then use an undocumented 'feature' to attempt to inflate
105
- # the data as a raw RFC1951 stream.
106
- #
107
- # See
108
- # - http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/243545
109
- # - http://www.gzip.org/zlib/zlib_faq.html#faq38
110
- deflated = Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(data)
111
- end
112
- depredict(deflated, @options)
113
- rescue Exception => e
114
- # Oops, there was a problem inflating the stream
115
- raise MalformedPDFError, "Error occured while inflating a compressed stream (#{e.class.to_s}: #{e.to_s})"
116
- end
117
- ################################################################################
118
- # Decode the specified data with the LZW compression algorithm
119
- def lzw(data)
120
- data = PDF::Reader::LZW.decode(data)
121
- depredict(data, @options)
122
- end
123
- ################################################################################
124
- # Decode the specified data with the RunLengthDecode compression algorithm
125
- def runlength(data)
126
- pos = 0
127
- out = ""
128
-
129
- while pos < data.length
130
- if data.respond_to?(:getbyte)
131
- length = data.getbyte(pos)
132
- else
133
- length = data[pos]
134
- end
135
- pos += 1
136
-
137
- case
138
- when length == 128
139
- break
140
- when length < 128
141
- # When the length is < 128, we copy the following length+1 bytes
142
- # literally.
143
- out << data[pos, length + 1]
144
- pos += length
145
- else
146
- # When the length is > 128, we copy the next byte (257 - length)
147
- # times; i.e., "\xFA\x00" ([250, 0]) will expand to
148
- # "\x00\x00\x00\x00\x00\x00\x00".
149
- out << data[pos, 1] * (257 - length)
150
- end
151
-
152
- pos += 1
153
- end
154
-
155
- out
156
- end
157
- ################################################################################
158
- def depredict(data, opts = {})
159
- predictor = (opts || {})[:Predictor].to_i
160
-
161
- case predictor
162
- when 0, 1 then
163
- data
164
- when 2 then
165
- tiff_depredict(data, opts)
166
- when 10, 11, 12, 13, 14, 15 then
167
- png_depredict(data, opts)
168
- else
169
- raise MalformedPDFError, "Unrecognised predictor value (#{predictor})"
170
- end
171
- end
172
- ################################################################################
173
- def tiff_depredict(data, opts = {})
174
- data = data.unpack("C*")
175
- unfiltered = []
176
- bpc = opts[:BitsPerComponent] || 8
177
- pixel_bits = bpc * opts[:Colors]
178
- pixel_bytes = pixel_bits / 8
179
- line_len = (pixel_bytes * opts[:Columns])
180
- pos = 0
181
-
182
- if bpc != 8
183
- raise UnsupportedFeatureError, "TIFF predictor onlys supports 8 Bits Per Component"
184
- end
185
-
186
- until pos > data.size
187
- row_data = data[pos, line_len]
188
- row_data.each_with_index do |byte, index|
189
- left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
190
- row_data[index] = (byte + left) % 256
191
- end
192
- unfiltered += row_data
193
- pos += line_len
194
- end
195
-
196
- unfiltered.pack("C*")
197
- end
198
- ################################################################################
199
- def png_depredict(data, opts = {})
200
- return data if opts.nil? || opts[:Predictor].to_i < 10
201
-
202
- data = data.unpack("C*")
203
-
204
- pixel_bytes = opts[:Colors] || 1
205
- scanline_length = (pixel_bytes * opts[:Columns]) + 1
206
- row = 0
207
- pixels = []
208
- paeth, pa, pb, pc = nil
209
- until data.empty? do
210
- row_data = data.slice! 0, scanline_length
211
- filter = row_data.shift
212
- case filter
213
- when 0 # None
214
- when 1 # Sub
215
- row_data.each_with_index do |byte, index|
216
- left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
217
- row_data[index] = (byte + left) % 256
218
- #p [byte, left, row_data[index]]
219
- end
220
- when 2 # Up
221
- row_data.each_with_index do |byte, index|
222
- col = index / pixel_bytes
223
- upper = row == 0 ? 0 : pixels[row-1][col][index % pixel_bytes]
224
- row_data[index] = (upper + byte) % 256
225
- end
226
- when 3 # Average
227
- row_data.each_with_index do |byte, index|
228
- col = index / pixel_bytes
229
- upper = row == 0 ? 0 : pixels[row-1][col][index % pixel_bytes]
230
- left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
231
-
232
- row_data[index] = (byte + ((left + upper)/2).floor) % 256
233
- end
234
- when 4 # Paeth
235
- left = upper = upper_left = nil
236
- row_data.each_with_index do |byte, index|
237
- col = index / pixel_bytes
238
-
239
- left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
240
- if row.zero?
241
- upper = upper_left = 0
242
- else
243
- upper = pixels[row-1][col][index % pixel_bytes]
244
- upper_left = col.zero? ? 0 :
245
- pixels[row-1][col-1][index % pixel_bytes]
246
- end
247
-
248
- p = left + upper - upper_left
249
- pa = (p - left).abs
250
- pb = (p - upper).abs
251
- pc = (p - upper_left).abs
252
-
253
- paeth = if pa <= pb && pa <= pc
254
- left
255
- elsif pb <= pc
256
- upper
257
- else
258
- upper_left
259
- end
260
-
261
- row_data[index] = (byte + paeth) % 256
262
- end
263
- else
264
- raise ArgumentError, "Invalid filter algorithm #{filter}"
265
- end
266
-
267
- s = []
268
- row_data.each_slice pixel_bytes do |slice|
269
- s << slice
270
- end
271
- pixels << s
272
- row += 1
273
- end
274
-
275
- pixels.map { |bytes| bytes.flatten.pack("C*") }.join("")
276
- end
277
55
  end
278
56
  end
279
- ################################################################################