pdfbeads 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,503 @@
1
+ # encoding: UTF-8
2
+
3
+ ######################################################################
4
+ #
5
+ # ImageInspector -- a simple pure Ruby module to detect basic image
6
+ # properties, such as width, height, color space or resolution. It also
7
+ # gives an access to TIFF tags and EXIF properties.
8
+ #
9
+ # Version 1.0
10
+ #
11
+ # Copyright (C) 2010 Alexey Kryukov (amkryukov@gmail.com).
12
+ # All rights reserved.
13
+ #
14
+ # This program is free software; you can redistribute it and/or modify
15
+ # it under the terms of the GNU General Public License as published by
16
+ # the Free Software Foundation; either version 2 of the License, or
17
+ # (at your option) any later version.
18
+ #
19
+ # This program is distributed in the hope that it will be useful,
20
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
21
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22
+ # GNU General Public License for more details.
23
+ #
24
+ # You should have received a copy of the GNU General Public License
25
+ # along with this program; if not, write to the Free Software
26
+ # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27
+ #
28
+ #######################################################################
29
+
30
+ require 'stringio'
31
+
32
+ class String
33
+ # Protect strings which are supposed be treated as a raw sequence of bytes.
34
+ # This is important for Ruby 1.9. For earlier versions the method just
35
+ # does nothing.
36
+ unless self.method_defined? :to_binary
37
+ def to_binary()
38
+ force_encoding 'ASCII-8BIT' if respond_to? :force_encoding
39
+ return self
40
+ end
41
+ end
42
+ end
43
+
44
+ module ImageInspector
45
+ def ImageInspector.new( input=nil )
46
+ insp = Image.new( input )
47
+ insp
48
+ end
49
+ end
50
+
51
+ # Parse image header and retrieve its basic properties. The code is inspired
52
+ # by Sam Stephenson's snippet which demonstrates how to determine a JPEG
53
+ # image size ( see http://snippets.dzone.com/posts/show/805) and Paul
54
+ # Schreiber's code for TIFF (see
55
+ # http://paulschreiber.com/blog/2010/06/10/tiff-file-dimensions-in-ruby/)
56
+ #
57
+ # Supported formats are: TIFF, PNG, JPEG and JPEG2000.
58
+ #
59
+ class ImageInspector::Image
60
+ # Return the obvious.
61
+ attr_reader :width, :height
62
+ # Return image resolution (always in pixels per inch, even if it is
63
+ # differently specified in the source image).
64
+ attr_reader :x_dpi, :y_dpi
65
+ # Image depth, color space, palette (for indexed images) and transparency data (for PNG)
66
+ attr_reader :depth, :cspace, :palette, :trans
67
+ # Image format and compression method
68
+ attr_reader :format, :compression
69
+ # Return TIFF tags as a hash for TIFF images or JPEG images with EXIF
70
+ # data. Otherwise this property is nil.
71
+ attr_reader :tags
72
+
73
+ @@gc = (IO.method_defined? :getbyte) ? (:getbyte) : (:getc)
74
+
75
+ # Set all image attributes to nil and open an image if an optional
76
+ # argument is specified.
77
+ def initialize( input=nil )
78
+ clearData()
79
+ open( input ) unless input.nil?
80
+ end
81
+
82
+ # Accepts either a file name or a stream-like object.
83
+ def open( input )
84
+ @input = input
85
+
86
+ begin
87
+ if input.kind_of? IO or input.kind_of? StringIO
88
+ @fname = '<STREAM>'
89
+ byFormat( input )
90
+ else
91
+ @fname = input
92
+ File.open( input, 'rb' ) { |io| byFormat( io ) }
93
+ end
94
+
95
+ rescue Exception => e
96
+ $stderr.puts( "Could not read data from #{@fname}: " << e.message )
97
+ clearData()
98
+ @input = nil
99
+ end
100
+ end
101
+
102
+ # Return image data (possibly compressed) for a previously initialized
103
+ # image as a sring. For JPEG and JPEG2000 this would be the whole image
104
+ # as it is stored on the disk, while for TIFF and PNG all headers are
105
+ # stripped and a raw data stream is returned.
106
+ def getRawData()
107
+ raise "The image has not been properly initialized" if @width.nil? or @input.nil?
108
+
109
+ begin
110
+ if @input.kind_of? IO or @input.kind_of? StringIO
111
+ ret = concatDataBlocks( @input )
112
+ else
113
+ File.open( @input, 'rb' ) { |io| ret = concatDataBlocks( io ) }
114
+ end
115
+ return ret
116
+ rescue Exception => e
117
+ $stderr.puts( "Could not read data from #{@fname}: " << e.message )
118
+ end
119
+ end
120
+
121
+ private
122
+
123
+ def clearData
124
+ @width = @height = nil
125
+ @x_dpi = @y_dpi = 72
126
+ @data_blocks = Array.new()
127
+ @depth = @cspace = @palette = @trans = nil
128
+ @compression = @format = @tags = nil
129
+ @stream = @fname = @next_off = nil
130
+ end
131
+
132
+ def concatDataBlocks( io )
133
+ io.set_encoding 'ASCII-8BIT' if io.respond_to? :set_encoding
134
+ ret = ''
135
+
136
+ # For JPEG/JPEG2000 just return the whole file
137
+ if @format.eql? :JPEG or @format.eql? :JPEG2000
138
+ ret = io.read
139
+ # For TIFF/PNG extract raw data blocks from the image
140
+ else
141
+ @data_blocks.each do |b|
142
+ io.seek( b[0],IO::SEEK_SET )
143
+ chunk = io.read( b[1] )
144
+ ret << chunk
145
+ end
146
+ end
147
+ return ret
148
+ end
149
+
150
+ def byFormat( io )
151
+ io.set_encoding 'ASCII-8BIT' if io.respond_to? :set_encoding
152
+
153
+ sign = io.read( 2 )
154
+ if sign.eql? "\xFF\xD8".to_binary
155
+ @format = :JPEG
156
+ @compression = :DCTDecode
157
+ jpgExamine( io )
158
+ return
159
+ end
160
+
161
+ sign << io.read( 2 )
162
+ if sign.eql? "MM\x00\x2a".to_binary or sign.eql? "II\x2a\x00".to_binary
163
+ @format = :TIFF
164
+ tiffExamine( io,sign )
165
+ return
166
+ end
167
+
168
+ sign << io.read( 4 )
169
+ if sign.eql? "\x89PNG\x0D\x0A\x1A\x0A".to_binary
170
+ @format = :PNG
171
+ pngExamine( io )
172
+ return
173
+ end
174
+
175
+ sign << io.read( 4 )
176
+ if sign.eql? "\x00\x00\x00\x0CjP \x0D\x0A\x87\x0A".to_binary
177
+ @format = :JPEG2000
178
+ @compression = :JPXDecode
179
+ j2kParseBox( io )
180
+ return
181
+ end
182
+
183
+ raise "File format not recognized"
184
+ end
185
+
186
+ def tiffReadArray( io,intgr,fmt,cnt,val )
187
+ ret = []
188
+ case fmt
189
+ when 'C', 'c'
190
+ rec_len = 1
191
+ when intgr
192
+ rec_len = 2
193
+ when intgr.upcase
194
+ rec_len = 4
195
+ when intgr.upcase*2
196
+ rec_len = 8
197
+ when 'A*'
198
+ rec_len = cnt
199
+ cnt = 1
200
+ end
201
+
202
+ if rec_len*cnt > 4
203
+ ptr = val.unpack( intgr.upcase )[0]
204
+ cur_pos = io.tell
205
+ io.seek( ptr,IO::SEEK_SET )
206
+
207
+ for i in ( 0...cnt )
208
+ if fmt.eql? intgr.upcase*2
209
+ rat = io.read( rec_len ).unpack( fmt )
210
+ ret << rat[0]/rat[1]
211
+ else
212
+ ret << io.read( rec_len ).unpack( fmt )[0]
213
+ end
214
+ end
215
+ io.seek( cur_pos,IO::SEEK_SET )
216
+
217
+ else
218
+ ret = val.unpack( fmt*cnt )
219
+ end
220
+
221
+ return ret
222
+ end
223
+
224
+ def tiffParseIFD( io,offset,intgr )
225
+ packspec = [
226
+ nil, # nothing (shouldn't happen)
227
+ 'C', # BYTE (8-bit unsigned integer)
228
+ 'A*', # ASCII
229
+ intgr, # SHORT (16-bit unsigned integer)
230
+ intgr.upcase, # LONG (32-bit unsigned integer)
231
+ intgr.upcase * 2, # RATIONAL (numerator + denominator)
232
+ intgr.upcase, # SBYTE (8-bit signed integer)
233
+ 'A*', # undefined, but used for EXIF version
234
+ intgr, # SSHORT (16-bit signed integer)
235
+ intgr.upcase, # SLONG (32-bit signed integer)
236
+ intgr.upcase * 2, # SRATIONAL (numerator + denominator)
237
+ ]
238
+ io.seek( offset,IO::SEEK_SET )
239
+ num_dirent = io.read( 2 ).unpack( intgr )[0]
240
+
241
+ tags = Hash.new()
242
+ for i in ( 0...num_dirent )
243
+ code, type, length = io.read( 8 ).unpack( "#{intgr}#{intgr}#{intgr.upcase}" )
244
+ raise 'malformed TIFF: could not read an IFD entry' if (
245
+ type.nil? or type > packspec.size or packspec[type].nil? )
246
+ sval = io.read( 4 )
247
+
248
+ tags[code] = tiffReadArray( io,intgr,packspec[type],length,sval )
249
+ end
250
+
251
+ @next_off = io.read( 4 ).unpack( intgr.upcase )[0]
252
+ return tags
253
+ end
254
+
255
+ def tiffExamine( io,sign,offset=nil )
256
+ if sign.eql? "MM\x00\x2a".to_binary
257
+ intgr = 'n'
258
+ elsif sign.eql? "II\x2a\x00".to_binary
259
+ intgr = 'v'
260
+ else
261
+ raise 'malformed TIFF: no TIFF signature'
262
+ end
263
+
264
+ # Get offset to IFD
265
+ offset = io.read( 4 ).unpack( intgr.upcase )[0] if offset.nil?
266
+ @tags = tiffParseIFD( io,offset,intgr )
267
+
268
+ # We should not expect to find required image properties (such as width
269
+ # or height) in EXIF data of a JPEG image.
270
+ raise 'malformed TIFF: a required tag is missing' unless @format.eql? :JPEG or (
271
+ @tags.has_key? 0x0100 and @tags.has_key? 0x0101 and
272
+ @tags.has_key? 0x0106 and @tags.has_key? 0x0111 and @tags.has_key? 0x0117 )
273
+
274
+ unless @tags.has_key? 0x8769
275
+ @width = @tags[0x0100][0]; @height = @tags[0x0101][0]
276
+
277
+ @tags[0x0111].each_index do |i|
278
+ @data_blocks << [ @tags[0x0111][i],@tags[0x0117][i] ]
279
+ end
280
+
281
+ case @tags[0x0106][0]
282
+ when 0, 1
283
+ @cspace = :DeviceGray
284
+ when 3
285
+ @cspace = :Indexed
286
+ when 5
287
+ @cspace = :DeviceCMYK
288
+ else
289
+ @cspace = :DeviceRGB
290
+ end
291
+
292
+ if @tags[0x0106][0] == 3 and @tags.has_key? 0x0140
293
+ @palette = Array.new()
294
+ clen = @tags[0x0140].length / 3
295
+ for i in ( 0...clen )
296
+ r = @tags[0x0140][i]
297
+ g = @tags[0x0140][i+clen]
298
+ b = @tags[0x0140][i+clen*2]
299
+ @palette << [ r/256,g/256,b/256 ]
300
+ end
301
+ end
302
+ @depth = 1
303
+ @depth = @tags[0x0102][0] if @tags.has_key? 0x0102
304
+ end
305
+ @tags.merge! tiffParseIFD( io,@tags[0x8769][0],intgr ) if @tags.has_key? 0x8769
306
+
307
+ if @tags.has_key? 0x0103
308
+ case @tags[0x0103][0]
309
+ when 1
310
+ @compression = :NoCompression
311
+ when 3, 4
312
+ @compression = :CCITTFaxDecode
313
+ when 5
314
+ @compression = :LZWDecode
315
+ when 8, 32946
316
+ @compression = :FlateDecode
317
+ end
318
+ end
319
+
320
+ if ( @tags.has_key? 0x011A and @tags.has_key? 0x011B )
321
+ @x_dpi = @tags[0x011A][0]; @y_dpi = @tags[0x011B][0]
322
+ if @tags.has_key? 0x0128 and @tags[0x0128][0] == 3
323
+ @x_dpi = (@x_dpi * 2.54).round
324
+ @y_dpi = (@y_dpi * 2.54).round
325
+ end
326
+ end
327
+ end
328
+
329
+ def j2kParseBox( io )
330
+ buf = [ 0 ] * 8
331
+ while b = io.send( @@gc )
332
+ # always keep last 8 bytes so that we can check for chunk name and length
333
+ buf.shift
334
+ buf.push( b )
335
+ tag = buf[4..7].pack('c*')
336
+
337
+ # Currently we can detect only width/height for JP2 images.
338
+ # That's OK, as other parameters aren't needed for PDF generation.
339
+ if ['ftyp','jp2h','ihdr','colr','res ','resc',
340
+ 'resd','prfl','bpcc','pclr','cdef','jp2i'].include? tag
341
+ length = buf[0..4].pack( 'c*' ).unpack( 'N' )[0]
342
+ if length == 0
343
+ length = io.read( 8 ).unpack( 'N' )[0]
344
+ length -= 8
345
+ end
346
+ length -= 8
347
+ case tag
348
+ when 'jp2h'
349
+ iostr = StringIO.new( io.read( length ))
350
+ j2kParseBox( iostr )
351
+ return
352
+ when 'ihdr'
353
+ if length == 14
354
+ @height = io.read( 4 ).unpack( 'N' )[0]
355
+ @width = io.read( 4 ).unpack( 'N' )[0]
356
+ ncomps = io.read( 2 ).unpack( 'n' )[0]
357
+ strdepth = io.read( 1 )
358
+ signed = !(strdepth.unpack( 'C' )[0] >> 7).zero?
359
+ fmt = signed ? 'c' : 'C'
360
+ @depth = (strdepth.unpack( fmt )[0] & 0x7f) + 1
361
+ else
362
+ raise 'Malformed JPEG2000: the file is damaged or has an unsupported format'
363
+ end
364
+ when 'colr'
365
+ next unless @cspace.nil?
366
+ meth, prec, approx = io.read( 3 ).unpack( 'CcC' )
367
+ if meth == 1
368
+ enumcs = io.read( 4 ).unpack( 'N' )[0]
369
+ case enumcs
370
+ when 16
371
+ @cspace = :DeviceRGB
372
+ when 17
373
+ @cspace = :DeviceGray
374
+ else
375
+ raise 'Malformed JPEG2000: unknown colorspace'
376
+ end
377
+ end
378
+ return
379
+ else
380
+ io.read( length )
381
+ end
382
+ end
383
+ end
384
+ end
385
+
386
+ def pngExamine( io )
387
+ io.seek( 16,IO::SEEK_SET )
388
+ @width, @height, @depth, color, compr, filtr, interlace = io.read( 13 ).unpack('NNccccc')
389
+ @compression = :FlateDecode if compr == 0 and filtr == 0
390
+ case color
391
+ when 0, 4
392
+ @cspace = :DeviceGray
393
+ when 3
394
+ @cspace = :Indexed
395
+ else
396
+ @cspace = :DeviceRGB
397
+ end
398
+
399
+ buf = [ 0 ] * 8
400
+ ctags = [ 'IHDR', 'PLTE', 'IDAT', 'IEND', 'tRNS', 'cHRM',
401
+ 'gAMA', 'iCCP', 'sBIT', 'sRGB', 'iTXt', 'tEXt',
402
+ 'zTXt', 'bKGD', 'hIST', 'pHYs', 'sPLT', 'tIME' ]
403
+ while b = io.send( @@gc )
404
+ # always keep last 8 bytes so that we can check for chunk name and length
405
+ buf.shift
406
+ buf.push( b )
407
+ tag = buf[4..7].pack('c*')
408
+
409
+ if ctags.include? tag
410
+ length = buf[0..4].pack( 'c*' ).unpack( 'N' )[0]
411
+ case tag
412
+ when 'PLTE'
413
+ @palette = Array.new()
414
+ for i in (0...length/3)
415
+ r, g, b = io.read( 3 ).unpack( 'ccc' )
416
+ @palette << [ r, g, b ]
417
+ end
418
+ when 'IDAT'
419
+ @data_blocks << [ io.tell,length ]
420
+ io.seek( length + 4,IO::SEEK_CUR )
421
+ when 'pHYs'
422
+ x_dpm, y_dpm = io.read( 8 ).unpack( 'NN' )
423
+ @x_dpi = (x_dpm/100 * 2.54).round
424
+ @y_dpi = (y_dpm/100 * 2.54).round
425
+ when 'tRNS'
426
+ trans = Hash.new[]
427
+ case @cspace
428
+ when :Indexed
429
+ # Indexed colour, RGB. Each byte in this chunk is an alpha for
430
+ # the palette index in the PLTE ("palette") chunk up until the
431
+ # last non-opaque entry. Set up an array, stretching over all
432
+ # palette entries which will be 0 (opaque) or 1 (transparent).
433
+ @trans = io.read( size ).unpack( 'C*' )
434
+ when :DeviceGray
435
+ # Greyscale. Corresponding to entries in the PLTE chunk.
436
+ # Grey is two bytes, range 0 .. (2 ^ bit-depth) - 1
437
+ @trans = io.read( 2 ).unpack( 'n' )
438
+ when :DeviceRGB
439
+ # True colour with proper alpha channel.
440
+ @trans = io.read( 6 ).unpack( 'nnn' )
441
+ end
442
+ when 'IEND'
443
+ break
444
+ else
445
+ io.seek( length + 4,IO::SEEK_CUR )
446
+ end
447
+ end
448
+ end
449
+ end
450
+
451
+ def jpgNextMarker( io )
452
+ c = io.send( @@gc ) until c == 0xFF
453
+ c = io.send( @@gc ) while c == 0xFF
454
+ c
455
+ end
456
+
457
+ def jpgReadFrame( io )
458
+ off = io.read( 2 ).unpack( 'n' )[0]
459
+ io.read( off - 2 )
460
+ end
461
+
462
+ def jpgExamine( io )
463
+ while marker = jpgNextMarker( io )
464
+ case marker
465
+ # SOF markers
466
+ when 0xC0..0xC3, 0xC5..0xC7, 0xC9..0xCB, 0xCD..0xCF
467
+ length, @depth, @height, @width, components = io.read( 8 ).unpack( 'ncnnc' )
468
+ raise 'malformed JPEG: could not read a SOF header' unless length == 8 + components * 3
469
+ case components
470
+ when 1
471
+ @cspace = :DeviceGray
472
+ when 4
473
+ @cspace = :DeviceCMYK
474
+ else
475
+ @cspace = :DeviceRGB
476
+ end
477
+ # EOI, SOS
478
+ when 0xD9, 0xDA
479
+ break
480
+ # APP0, contains JFIF tag
481
+ when 0xE0
482
+ length,sign,version,units,@x_dpi,@y_dpi = io.read( 14 ).unpack( 'nZ5ncnn' )
483
+ raise 'malformed JPEG: could not read JFIF data' unless length == 16 and sign.eql? 'JFIF'
484
+ if units == 2
485
+ @x_dpi = (@x_dpi * 2.54).round
486
+ @y_dpi = (@y_dpi * 2.54).round
487
+ end
488
+ # APP1, contains EXIF tag
489
+ when 0xE1
490
+ exif = jpgReadFrame( io )
491
+ exif_hdr = exif[0...6]
492
+ if exif_hdr.eql? "Exif\x00\x00".to_binary
493
+ buf = StringIO.new( exif[6..-1] )
494
+ sign = buf.read( 4 )
495
+ tiffExamine( buf,sign )
496
+ end
497
+ # ignore frame
498
+ else
499
+ jpgReadFrame( io )
500
+ end
501
+ end
502
+ end
503
+ end