pdf-reader 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG CHANGED
@@ -1,3 +1,11 @@
1
+ v1.2.0 (28th AUgust 2012)
2
+ - Feature: correctly extract text using surrogate pairs and ligatures
3
+ (thanks Nathaniel Madura)
4
+ - Speed optimisation: cache tokenised Form XObjects to avoid re-parsing them
5
+ - Feature: support opening documents with some junk bytes prepended to file
6
+ (thanks Paul Gallagher)
7
+ - Acrobat does this, so it seemed reasonable to add support
8
+
1
9
  v1.1.1 (9th May 2012)
2
10
  - bugfix release to improve parsing of some PDFs
3
11
 
@@ -1,8 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require 'rubygems'
4
- $LOAD_PATH.unshift(File.dirname(__FILE__) + "/../lib")
5
-
6
4
  require 'pdf/reader'
7
5
 
8
6
  if ARGV.empty?
@@ -86,14 +86,15 @@ module ExtractImages
86
86
  tiff = header.dup
87
87
  tiff << short_tag.call( 256, 1, w ) # image width
88
88
  tiff << short_tag.call( 257, 1, h ) # image height
89
- tiff << long_tag.call( 258, 4, (header.size + (tag_count*12))) # bits per pixel
89
+ tiff << long_tag.call( 258, 4, (header.size + (tag_count*12) + 4)) # bits per pixel
90
90
  tiff << short_tag.call( 259, 1, 1 ) # compression
91
91
  tiff << short_tag.call( 262, 1, 5 ) # colorspace - separation
92
- tiff << long_tag.call( 273, 1, (10 + (tag_count*12) + 16) ) # data offset
92
+ tiff << long_tag.call( 273, 1, (10 + (tag_count*12) + 20) ) # data offset
93
93
  tiff << short_tag.call( 277, 1, 4 ) # samples per pixel
94
94
  tiff << long_tag.call( 279, 1, stream.unfiltered_data.size) # data byte size
95
95
  tiff << short_tag.call( 284, 1, 1 ) # planer config
96
96
  tiff << long_tag.call( 332, 1, 1) # inkset - CMYK
97
+ tiff << [0].pack("I") # next IFD pointer
97
98
  tiff << [bpc, bpc, bpc, bpc].pack("IIII")
98
99
  tiff << stream.unfiltered_data
99
100
  File.open(filename, "wb") { |file| file.write tiff }
@@ -119,10 +120,12 @@ module ExtractImages
119
120
  tiff << short_tag.call( 258, 1, 8 ) # bits per pixel
120
121
  tiff << short_tag.call( 259, 1, 1 ) # compression
121
122
  tiff << short_tag.call( 262, 1, 1 ) # colorspace - grayscale
122
- tiff << long_tag.call( 273, 1, (10 + (tag_count*12)) ) # data offset
123
+ tiff << long_tag.call( 273, 1, (10 + (tag_count*12) + 4) ) # data offset
123
124
  tiff << short_tag.call( 277, 1, 1 ) # samples per pixel
124
125
  tiff << long_tag.call( 279, 1, stream.unfiltered_data.size) # data byte size
125
126
  tiff << short_tag.call( 284, 1, 1 ) # planer config
127
+ tiff << [0].pack("I") # next IFD pointer
128
+ p stream.unfiltered_data.size
126
129
  tiff << stream.unfiltered_data
127
130
  File.open(filename, "wb") { |file| file.write tiff }
128
131
  end
@@ -144,12 +147,13 @@ module ExtractImages
144
147
  tiff = header.dup
145
148
  tiff << short_tag.call( 256, 1, w ) # image width
146
149
  tiff << short_tag.call( 257, 1, h ) # image height
147
- tiff << long_tag.call( 258, 3, (header.size + (tag_count*12))) # bits per pixel
150
+ tiff << long_tag.call( 258, 3, (header.size + (tag_count*12) + 4)) # bits per pixel
148
151
  tiff << short_tag.call( 259, 1, 1 ) # compression
149
152
  tiff << short_tag.call( 262, 1, 2 ) # colorspace - RGB
150
- tiff << long_tag.call( 273, 1, (header.size + (tag_count*12) + 12) ) # data offset
153
+ tiff << long_tag.call( 273, 1, (header.size + (tag_count*12) + 16) ) # data offset
151
154
  tiff << short_tag.call( 277, 1, 3 ) # samples per pixel
152
155
  tiff << long_tag.call( 279, 1, stream.unfiltered_data.size) # data byte size
156
+ tiff << [0].pack("I") # next IFD pointer
153
157
  tiff << [bpc, bpc, bpc].pack("III")
154
158
  tiff << stream.unfiltered_data
155
159
  File.open(filename, "wb") { |file| file.write tiff }
@@ -209,8 +213,9 @@ module ExtractImages
209
213
  + short_tag.call( 256, cols ) \
210
214
  + short_tag.call( 257, h ) \
211
215
  + short_tag.call( 259, 4 ) \
212
- + long_tag.call( 273, (10 + (5*12)) ) \
216
+ + long_tag.call( 273, (10 + (5*12) + 4) ) \
213
217
  + long_tag.call( 279, len) \
218
+ + [0].pack("I") \
214
219
  + stream.data
215
220
  File.open(filename, "wb") { |file| file.write tiff }
216
221
  end
@@ -25,9 +25,6 @@
25
25
  ################################################################################
26
26
 
27
27
  require 'stringio'
28
- require 'zlib'
29
-
30
- require 'ascii85'
31
28
 
32
29
  module PDF
33
30
  ################################################################################
@@ -113,6 +110,8 @@ module PDF
113
110
  #
114
111
  def initialize(input = nil, opts = {})
115
112
  if input # support the deprecated Reader API
113
+ @cache = PDF::Reader::ObjectCache.new
114
+ opts.merge!(:cache => @cache)
116
115
  @objects = PDF::Reader::ObjectHash.new(input, opts)
117
116
  end
118
117
  end
@@ -222,7 +221,7 @@ module PDF
222
221
  #
223
222
  def pages
224
223
  (1..self.page_count).map { |num|
225
- PDF::Reader::Page.new(@objects, num)
224
+ PDF::Reader::Page.new(@objects, num, :cache => @cache)
226
225
  }
227
226
  end
228
227
 
@@ -241,7 +240,7 @@ module PDF
241
240
  def page(num)
242
241
  num = num.to_i
243
242
  raise ArgumentError, "valid pages are 1 .. #{self.page_count}" if num < 1 || num > self.page_count
244
- PDF::Reader::Page.new(@objects, num)
243
+ PDF::Reader::Page.new(@objects, num, :cache => @cache)
245
244
  end
246
245
 
247
246
 
@@ -338,6 +337,13 @@ require 'pdf/reader/cmap'
338
337
  require 'pdf/reader/encoding'
339
338
  require 'pdf/reader/error'
340
339
  require 'pdf/reader/filter'
340
+ require 'pdf/reader/filter/ascii85'
341
+ require 'pdf/reader/filter/ascii_hex'
342
+ require 'pdf/reader/filter/depredict'
343
+ require 'pdf/reader/filter/flate'
344
+ require 'pdf/reader/filter/lzw'
345
+ require 'pdf/reader/filter/null'
346
+ require 'pdf/reader/filter/run_length'
341
347
  require 'pdf/reader/font'
342
348
  require 'pdf/reader/form_xobject'
343
349
  require 'pdf/reader/glyph_hash'
@@ -36,7 +36,7 @@ class PDF::Reader
36
36
  # the raw tokens into objects we can work with (strings, ints, arrays, etc)
37
37
  #
38
38
  class Buffer
39
- TOKEN_WHITESPACE=["\x00", "\x09", "\x0A", "\x0C", "\x0D", "\x20"]
39
+ TOKEN_WHITESPACE=[0x00, 0x09, 0x0A, 0x0C, 0x0D, 0x20]
40
40
 
41
41
  attr_reader :pos
42
42
 
@@ -232,18 +232,17 @@ class PDF::Reader
232
232
  finished = false
233
233
 
234
234
  while !finished
235
- chr = @io.read(1)
236
- codepoint = chr.to_s.unpack("C*").first
237
- if chr.nil?
235
+ byte = @io.getbyte
236
+ if byte.nil?
238
237
  finished = true # unbalanced params
239
- elsif (48..57).include?(codepoint) || (65..90).include?(codepoint) || (97..122).include?(codepoint)
240
- str << chr
241
- elsif codepoint <= 32
238
+ elsif (48..57).include?(byte) || (65..90).include?(byte) || (97..122).include?(byte)
239
+ str << byte.chr
240
+ elsif byte <= 32
242
241
  # ignore it
243
242
  else
244
243
  @tokens << str if str.size > 0
245
- @tokens << ">" if chr != ">"
246
- @tokens << chr
244
+ @tokens << ">" if byte != 0x3E # '>'
245
+ @tokens << byte.chr
247
246
  finished = true
248
247
  end
249
248
  end
@@ -263,19 +262,19 @@ class PDF::Reader
263
262
  count = 1
264
263
 
265
264
  while count > 0
266
- chr = @io.read(1)
267
- if chr.nil?
265
+ byte = @io.getbyte
266
+ if byte.nil?
268
267
  count = 0 # unbalanced params
269
- elsif chr == "\x5c"
270
- str << chr << @io.read(1).to_s
271
- elsif chr == "("
268
+ elsif byte == 0x5C
269
+ str << byte.chr << @io.getbyte.chr
270
+ elsif byte == 0x28 # "("
272
271
  str << "("
273
272
  count += 1
274
- elsif chr == ")"
273
+ elsif byte == 0x29 # ")"
275
274
  count -= 1
276
275
  str << ")" unless count == 0
277
276
  else
278
- str << chr unless count == 0
277
+ str << byte.chr unless count == 0
279
278
  end
280
279
  end
281
280
 
@@ -291,61 +290,68 @@ class PDF::Reader
291
290
  def prepare_regular_token
292
291
  tok = ""
293
292
 
294
- while chr = @io.read(1)
295
- case chr
296
- when "\x25"
293
+ while byte = @io.getbyte
294
+ case byte
295
+ when 0x25
297
296
  # comment, ignore everything until the next EOL char
298
297
  done = false
299
298
  while !done
300
- chr = @io.read(1)
301
- done = true if chr.nil? || chr == "\x0A" || chr == "\x0D"
299
+ byte = @io.getbyte
300
+ done = true if byte.nil? || byte == 0x0A || byte == 0x0D
302
301
  end
303
302
  when *TOKEN_WHITESPACE
304
303
  # white space, token finished
305
304
  @tokens << tok if tok.size > 0
306
305
 
307
306
  #If the token was empty, chomp the rest of the whitespace too
308
- while TOKEN_WHITESPACE.include?(peek_char) && tok.size == 0
309
- @io.read(1)
307
+ while TOKEN_WHITESPACE.include?(peek_byte) && tok.size == 0
308
+ @io.getbyte
310
309
  end
311
310
  tok = ""
312
311
  break
313
- when "\x3C"
312
+ when 0x3C
314
313
  # opening delimiter '<', start of new token
315
314
  @tokens << tok if tok.size > 0
316
- chr << @io.read(1) if peek_char == "\x3C" # check if token is actually '<<'
317
- @tokens << chr
315
+ if peek_byte == 0x3C # check if token is actually '<<'
316
+ @io.getbyte
317
+ @tokens << "<<"
318
+ else
319
+ @tokens << "<"
320
+ end
318
321
  tok = ""
319
322
  break
320
- when "\x3E"
323
+ when 0x3E
321
324
  # closing delimiter '>', start of new token
322
325
  @tokens << tok if tok.size > 0
323
- chr << @io.read(1) if peek_char == "\x3E" # check if token is actually '>>'
324
- @tokens << chr
326
+ if peek_byte == 0x3E # check if token is actually '>>'
327
+ @io.getbyte
328
+ @tokens << ">>"
329
+ else
330
+ @tokens << byte.chr
331
+ end
325
332
  tok = ""
326
333
  break
327
- when "\x28", "\x5B", "\x7B"
334
+ when 0x28, 0x5B, 0x7B
328
335
  # opening delimiter, start of new token
329
336
  @tokens << tok if tok.size > 0
330
- @tokens << chr
337
+ @tokens << byte.chr
331
338
  tok = ""
332
339
  break
333
- when "\x29", "\x5D", "\x7D"
340
+ when 0x29, 0x5D, 0x7D
334
341
  # closing delimiter
335
342
  @tokens << tok if tok.size > 0
336
- @tokens << chr
343
+ @tokens << byte.chr
337
344
  tok = ""
338
345
  break
339
- when "\x2F"
346
+ when 0x2F
340
347
  # PDF name, start of new token
341
348
  @tokens << tok if tok.size > 0
342
- @tokens << chr
343
- next_char = peek_char
344
- @tokens << "" if chr == "/" && [nil, " ", "\n"].include?(next_char)
349
+ @tokens << byte.chr
350
+ @tokens << "" if byte == 0x2F && [nil, 0x20, 0x0A].include?(peek_byte)
345
351
  tok = ""
346
352
  break
347
353
  else
348
- tok << chr
354
+ tok << byte.chr
349
355
  end
350
356
  end
351
357
 
@@ -355,10 +361,10 @@ class PDF::Reader
355
361
  # peek at the next character in the io stream, leaving the stream position
356
362
  # untouched
357
363
  #
358
- def peek_char
359
- chr = @io.read(1)
360
- @io.seek(-1, IO::SEEK_CUR) unless chr.nil?
361
- chr
364
+ def peek_byte
365
+ byte = @io.getbyte
366
+ @io.seek(-1, IO::SEEK_CUR) if byte
367
+ byte
362
368
  end
363
369
 
364
370
  # for a handful of tokens we want to tell the parser how to convert them
@@ -60,6 +60,10 @@ class PDF::Reader
60
60
  @map.size
61
61
  end
62
62
 
63
+ # Convert a glyph code into one or more Codepoints.
64
+ #
65
+ # Returns an array of Fixnums.
66
+ #
63
67
  def decode(c)
64
68
  # TODO: implement the conversion
65
69
  return c unless c.class == Fixnum
@@ -74,12 +78,23 @@ class PDF::Reader
74
78
  end
75
79
 
76
80
  def str_to_int(str)
77
- return nil if str.nil? || str.size == 0 || str.size >= 3
78
-
79
- if str.size == 1
80
- str.unpack("C*")[0]
81
+ return nil if str.nil? || str.size == 0
82
+ unpacked_string = if str.size == 1 # UTF-8
83
+ str.unpack("C*")
84
+ else # UTF-16
85
+ str.unpack("n*")
86
+ end
87
+ if unpacked_string.length == 1
88
+ unpacked_string
89
+ elsif unpacked_string.length == 2 && (unpacked_string[0] > 0xD800 && unpacked_string[0] < 0xDBFF)
90
+ # this is a Unicode UTF-16 "Surrogate Pair" see Unicode Spec. Chapter 3.7
91
+ # lets convert to a UTF-32. (the high bit is between 0xD800-0xDBFF, the
92
+ # low bit is between 0xDC00-0xDFFF) for example: U+1D44E (U+D835 U+DC4E)
93
+ [(unpacked_string[0] - 0xD800) * 0x400 + (unpacked_string[1] - 0xDC00) + 0x10000]
81
94
  else
82
- str.unpack("n*")[0]
95
+ # it is a bad idea to just return the first 16 bits, as this doesn't allow
96
+ # for ligatures for example fi (U+0066 U+0069)
97
+ unpacked_string
83
98
  end
84
99
  end
85
100
 
@@ -88,7 +103,7 @@ class PDF::Reader
88
103
  find = str_to_int(parser.parse_token)
89
104
  replace = str_to_int(parser.parse_token)
90
105
  while find && replace
91
- @map[find] = replace
106
+ @map[find[0]] = replace
92
107
  find = str_to_int(parser.parse_token)
93
108
  replace = str_to_int(parser.parse_token)
94
109
  end
@@ -114,21 +129,21 @@ class PDF::Reader
114
129
  end
115
130
 
116
131
  def bfrange_type_one(start_code, end_code, dst)
117
- start_code = str_to_int(start_code)
118
- end_code = str_to_int(end_code)
132
+ start_code = str_to_int(start_code)[0]
133
+ end_code = str_to_int(end_code)[0]
119
134
  dst = str_to_int(dst)
120
135
 
121
136
  # add all values in the range to our mapping
122
137
  (start_code..end_code).each_with_index do |val, idx|
123
- @map[val] = dst + idx
138
+ @map[val] = dst.length == 1 ? [dst[0] + idx] : [dst[0], dst[1] + 1]
124
139
  # ensure a single range does not exceed 255 chars
125
140
  raise PDF::Reader::MalformedPDFError, "a CMap bfrange cann't exceed 255 chars" if idx > 255
126
141
  end
127
142
  end
128
143
 
129
144
  def bfrange_type_two(start_code, end_code, dst)
130
- start_code = str_to_int(start_code)
131
- end_code = str_to_int(end_code)
145
+ start_code = str_to_int(start_code)[0]
146
+ end_code = str_to_int(end_code)[0]
132
147
  from_range = (start_code..end_code)
133
148
 
134
149
  # add all values in the range to our mapping
@@ -22,7 +22,6 @@
22
22
  # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
23
  #
24
24
  ################################################################################
25
- require 'zlib'
26
25
 
27
26
  class PDF::Reader
28
27
  ################################################################################
@@ -30,7 +29,7 @@ class PDF::Reader
30
29
  # support for features like compression and encryption. This class is for decoding that
31
30
  # content.
32
31
  #
33
- class Filter # :nodoc:
32
+ module Filter # :nodoc:
34
33
 
35
34
  ################################################################################
36
35
  # creates a new filter for decoding content.
@@ -38,242 +37,20 @@ class PDF::Reader
38
37
  # Filters that are only used to encode image data are accepted, but the data is
39
38
  # returned untouched. At this stage PDF::Reader has no need to decode images.
40
39
  #
41
- def initialize (name, options = nil)
42
- @options = options
43
-
40
+ def self.with(name, options = {})
44
41
  case name.to_sym
45
- when :ASCII85Decode then @filter = :ascii85
46
- when :ASCIIHexDecode then @filter = :asciihex
47
- when :CCITTFaxDecode then @filter = nil
48
- when :DCTDecode then @filter = nil
49
- when :FlateDecode then @filter = :flate
50
- when :JBIG2Decode then @filter = nil
51
- when :JPXDecode then @filter = nil
52
- when :LZWDecode then @filter = :lzw
53
- when :RunLengthDecode then @filter = :runlength
42
+ when :ASCII85Decode then PDF::Reader::Filter::Ascii85.new(options)
43
+ when :ASCIIHexDecode then PDF::Reader::Filter::AsciiHex.new(options)
44
+ when :CCITTFaxDecode then PDF::Reader::Filter::Null.new(options)
45
+ when :DCTDecode then PDF::Reader::Filter::Null.new(options)
46
+ when :FlateDecode then PDF::Reader::Filter::Flate.new(options)
47
+ when :JBIG2Decode then PDF::Reader::Filter::Null.new(options)
48
+ when :JPXDecode then PDF::Reader::Filter::Null.new(options)
49
+ when :LZWDecode then PDF::Reader::Filter::Lzw.new(options)
50
+ when :RunLengthDecode then PDF::Reader::Filter::RunLength.new(options)
54
51
  else
55
52
  raise UnsupportedFeatureError, "Unknown filter: #{name}"
56
53
  end
57
54
  end
58
- ################################################################################
59
- # attempts to decode the specified data with the current filter
60
- #
61
- # Filters that are only used to encode image data are accepted, but the data is
62
- # returned untouched. At this stage PDF::Reader has no need to decode images.
63
- #
64
- def filter (data)
65
- # leave the data untouched if we don't support the required filter
66
- return data if @filter.nil?
67
-
68
- # decode the data
69
- self.send(@filter, data)
70
- end
71
- ################################################################################
72
- # Decode the specified data using the Ascii85 algorithm. Relies on the AScii85
73
- # rubygem.
74
- #
75
- def ascii85(data)
76
- data = "<~#{data}" unless data.to_s[0,2] == "<~"
77
- Ascii85::decode(data)
78
- rescue Exception => e
79
- # Oops, there was a problem decoding the stream
80
- raise MalformedPDFError, "Error occured while decoding an ASCII85 stream (#{e.class.to_s}: #{e.to_s})"
81
- end
82
- ################################################################################
83
- # Decode the specified data using the AsciiHex algorithm.
84
- #
85
- def asciihex(data)
86
- data.chop! if data[-1,1] == ">"
87
- data = data[1,data.size] if data[0,1] == "<"
88
- data.gsub!(/[^A-Fa-f0-9]/,"")
89
- data << "0" if data.size % 2 == 1
90
- data.scan(/.{2}/).map { |s| s.hex.chr }.join("")
91
- rescue Exception => e
92
- # Oops, there was a problem decoding the stream
93
- raise MalformedPDFError, "Error occured while decoding an ASCIIHex stream (#{e.class.to_s}: #{e.to_s})"
94
- end
95
- ################################################################################
96
- # Decode the specified data with the Zlib compression algorithm
97
- def flate (data)
98
- deflated = nil
99
- begin
100
- deflated = Zlib::Inflate.new.inflate(data)
101
- rescue Zlib::DataError => e
102
- # by default, Ruby's Zlib assumes the data it's inflating
103
- # is RFC1951 deflated data, wrapped in a RFC1951 zlib container.
104
- # If that fails, then use an undocumented 'feature' to attempt to inflate
105
- # the data as a raw RFC1951 stream.
106
- #
107
- # See
108
- # - http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/243545
109
- # - http://www.gzip.org/zlib/zlib_faq.html#faq38
110
- deflated = Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(data)
111
- end
112
- depredict(deflated, @options)
113
- rescue Exception => e
114
- # Oops, there was a problem inflating the stream
115
- raise MalformedPDFError, "Error occured while inflating a compressed stream (#{e.class.to_s}: #{e.to_s})"
116
- end
117
- ################################################################################
118
- # Decode the specified data with the LZW compression algorithm
119
- def lzw(data)
120
- data = PDF::Reader::LZW.decode(data)
121
- depredict(data, @options)
122
- end
123
- ################################################################################
124
- # Decode the specified data with the RunLengthDecode compression algorithm
125
- def runlength(data)
126
- pos = 0
127
- out = ""
128
-
129
- while pos < data.length
130
- if data.respond_to?(:getbyte)
131
- length = data.getbyte(pos)
132
- else
133
- length = data[pos]
134
- end
135
- pos += 1
136
-
137
- case
138
- when length == 128
139
- break
140
- when length < 128
141
- # When the length is < 128, we copy the following length+1 bytes
142
- # literally.
143
- out << data[pos, length + 1]
144
- pos += length
145
- else
146
- # When the length is > 128, we copy the next byte (257 - length)
147
- # times; i.e., "\xFA\x00" ([250, 0]) will expand to
148
- # "\x00\x00\x00\x00\x00\x00\x00".
149
- out << data[pos, 1] * (257 - length)
150
- end
151
-
152
- pos += 1
153
- end
154
-
155
- out
156
- end
157
- ################################################################################
158
- def depredict(data, opts = {})
159
- predictor = (opts || {})[:Predictor].to_i
160
-
161
- case predictor
162
- when 0, 1 then
163
- data
164
- when 2 then
165
- tiff_depredict(data, opts)
166
- when 10, 11, 12, 13, 14, 15 then
167
- png_depredict(data, opts)
168
- else
169
- raise MalformedPDFError, "Unrecognised predictor value (#{predictor})"
170
- end
171
- end
172
- ################################################################################
173
- def tiff_depredict(data, opts = {})
174
- data = data.unpack("C*")
175
- unfiltered = []
176
- bpc = opts[:BitsPerComponent] || 8
177
- pixel_bits = bpc * opts[:Colors]
178
- pixel_bytes = pixel_bits / 8
179
- line_len = (pixel_bytes * opts[:Columns])
180
- pos = 0
181
-
182
- if bpc != 8
183
- raise UnsupportedFeatureError, "TIFF predictor onlys supports 8 Bits Per Component"
184
- end
185
-
186
- until pos > data.size
187
- row_data = data[pos, line_len]
188
- row_data.each_with_index do |byte, index|
189
- left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
190
- row_data[index] = (byte + left) % 256
191
- end
192
- unfiltered += row_data
193
- pos += line_len
194
- end
195
-
196
- unfiltered.pack("C*")
197
- end
198
- ################################################################################
199
- def png_depredict(data, opts = {})
200
- return data if opts.nil? || opts[:Predictor].to_i < 10
201
-
202
- data = data.unpack("C*")
203
-
204
- pixel_bytes = opts[:Colors] || 1
205
- scanline_length = (pixel_bytes * opts[:Columns]) + 1
206
- row = 0
207
- pixels = []
208
- paeth, pa, pb, pc = nil
209
- until data.empty? do
210
- row_data = data.slice! 0, scanline_length
211
- filter = row_data.shift
212
- case filter
213
- when 0 # None
214
- when 1 # Sub
215
- row_data.each_with_index do |byte, index|
216
- left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
217
- row_data[index] = (byte + left) % 256
218
- #p [byte, left, row_data[index]]
219
- end
220
- when 2 # Up
221
- row_data.each_with_index do |byte, index|
222
- col = index / pixel_bytes
223
- upper = row == 0 ? 0 : pixels[row-1][col][index % pixel_bytes]
224
- row_data[index] = (upper + byte) % 256
225
- end
226
- when 3 # Average
227
- row_data.each_with_index do |byte, index|
228
- col = index / pixel_bytes
229
- upper = row == 0 ? 0 : pixels[row-1][col][index % pixel_bytes]
230
- left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
231
-
232
- row_data[index] = (byte + ((left + upper)/2).floor) % 256
233
- end
234
- when 4 # Paeth
235
- left = upper = upper_left = nil
236
- row_data.each_with_index do |byte, index|
237
- col = index / pixel_bytes
238
-
239
- left = index < pixel_bytes ? 0 : row_data[index - pixel_bytes]
240
- if row.zero?
241
- upper = upper_left = 0
242
- else
243
- upper = pixels[row-1][col][index % pixel_bytes]
244
- upper_left = col.zero? ? 0 :
245
- pixels[row-1][col-1][index % pixel_bytes]
246
- end
247
-
248
- p = left + upper - upper_left
249
- pa = (p - left).abs
250
- pb = (p - upper).abs
251
- pc = (p - upper_left).abs
252
-
253
- paeth = if pa <= pb && pa <= pc
254
- left
255
- elsif pb <= pc
256
- upper
257
- else
258
- upper_left
259
- end
260
-
261
- row_data[index] = (byte + paeth) % 256
262
- end
263
- else
264
- raise ArgumentError, "Invalid filter algorithm #{filter}"
265
- end
266
-
267
- s = []
268
- row_data.each_slice pixel_bytes do |slice|
269
- s << slice
270
- end
271
- pixels << s
272
- row += 1
273
- end
274
-
275
- pixels.map { |bytes| bytes.flatten.pack("C*") }.join("")
276
- end
277
55
  end
278
56
  end
279
- ################################################################################