pdfbeads 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,503 @@
1
+ # encoding: UTF-8
2
+
3
+ ######################################################################
4
+ #
5
+ # ImageInspector -- a simple pure Ruby module to detect basic image
6
+ # properties, such as width, height, color space or resolution. It also
7
+ # gives an access to TIFF tags and EXIF properties.
8
+ #
9
+ # Version 1.0
10
+ #
11
+ # Copyright (C) 2010 Alexey Kryukov (amkryukov@gmail.com).
12
+ # All rights reserved.
13
+ #
14
+ # This program is free software; you can redistribute it and/or modify
15
+ # it under the terms of the GNU General Public License as published by
16
+ # the Free Software Foundation; either version 2 of the License, or
17
+ # (at your option) any later version.
18
+ #
19
+ # This program is distributed in the hope that it will be useful,
20
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
21
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22
+ # GNU General Public License for more details.
23
+ #
24
+ # You should have received a copy of the GNU General Public License
25
+ # along with this program; if not, write to the Free Software
26
+ # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
27
+ #
28
+ #######################################################################
29
+
30
+ require 'stringio'
31
+
32
+ class String
33
+ # Protect strings which are supposed be treated as a raw sequence of bytes.
34
+ # This is important for Ruby 1.9. For earlier versions the method just
35
+ # does nothing.
36
+ unless self.method_defined? :to_binary
37
+ def to_binary()
38
+ force_encoding 'ASCII-8BIT' if respond_to? :force_encoding
39
+ return self
40
+ end
41
+ end
42
+ end
43
+
44
+ module ImageInspector
45
+ def ImageInspector.new( input=nil )
46
+ insp = Image.new( input )
47
+ insp
48
+ end
49
+ end
50
+
51
+ # Parse image header and retrieve its basic properties. The code is inspired
52
+ # by Sam Stephenson's snippet which demonstrates how to determine a JPEG
53
+ # image size ( see http://snippets.dzone.com/posts/show/805) and Paul
54
+ # Schreiber's code for TIFF (see
55
+ # http://paulschreiber.com/blog/2010/06/10/tiff-file-dimensions-in-ruby/)
56
+ #
57
+ # Supported formats are: TIFF, PNG, JPEG and JPEG2000.
58
+ #
59
+ class ImageInspector::Image
60
+ # Return the obvious.
61
+ attr_reader :width, :height
62
+ # Return image resolution (always in pixels per inch, even if it is
63
+ # differently specified in the source image).
64
+ attr_reader :x_dpi, :y_dpi
65
+ # Image depth, color space, palette (for indexed images) and transparency data (for PNG)
66
+ attr_reader :depth, :cspace, :palette, :trans
67
+ # Image format and compression method
68
+ attr_reader :format, :compression
69
+ # Return TIFF tags as a hash for TIFF images or JPEG images with EXIF
70
+ # data. Otherwise this property is nil.
71
+ attr_reader :tags
72
+
73
+ @@gc = (IO.method_defined? :getbyte) ? (:getbyte) : (:getc)
74
+
75
+ # Set all image attributes to nil and open an image if an optional
76
+ # argument is specified.
77
+ def initialize( input=nil )
78
+ clearData()
79
+ open( input ) unless input.nil?
80
+ end
81
+
82
+ # Accepts either a file name or a stream-like object.
83
+ def open( input )
84
+ @input = input
85
+
86
+ begin
87
+ if input.kind_of? IO or input.kind_of? StringIO
88
+ @fname = '<STREAM>'
89
+ byFormat( input )
90
+ else
91
+ @fname = input
92
+ File.open( input, 'rb' ) { |io| byFormat( io ) }
93
+ end
94
+
95
+ rescue Exception => e
96
+ $stderr.puts( "Could not read data from #{@fname}: " << e.message )
97
+ clearData()
98
+ @input = nil
99
+ end
100
+ end
101
+
102
+ # Return image data (possibly compressed) for a previously initialized
103
+ # image as a sring. For JPEG and JPEG2000 this would be the whole image
104
+ # as it is stored on the disk, while for TIFF and PNG all headers are
105
+ # stripped and a raw data stream is returned.
106
+ def getRawData()
107
+ raise "The image has not been properly initialized" if @width.nil? or @input.nil?
108
+
109
+ begin
110
+ if @input.kind_of? IO or @input.kind_of? StringIO
111
+ ret = concatDataBlocks( @input )
112
+ else
113
+ File.open( @input, 'rb' ) { |io| ret = concatDataBlocks( io ) }
114
+ end
115
+ return ret
116
+ rescue Exception => e
117
+ $stderr.puts( "Could not read data from #{@fname}: " << e.message )
118
+ end
119
+ end
120
+
121
+ private
122
+
123
+ def clearData
124
+ @width = @height = nil
125
+ @x_dpi = @y_dpi = 72
126
+ @data_blocks = Array.new()
127
+ @depth = @cspace = @palette = @trans = nil
128
+ @compression = @format = @tags = nil
129
+ @stream = @fname = @next_off = nil
130
+ end
131
+
132
+ def concatDataBlocks( io )
133
+ io.set_encoding 'ASCII-8BIT' if io.respond_to? :set_encoding
134
+ ret = ''
135
+
136
+ # For JPEG/JPEG2000 just return the whole file
137
+ if @format.eql? :JPEG or @format.eql? :JPEG2000
138
+ ret = io.read
139
+ # For TIFF/PNG extract raw data blocks from the image
140
+ else
141
+ @data_blocks.each do |b|
142
+ io.seek( b[0],IO::SEEK_SET )
143
+ chunk = io.read( b[1] )
144
+ ret << chunk
145
+ end
146
+ end
147
+ return ret
148
+ end
149
+
150
+ def byFormat( io )
151
+ io.set_encoding 'ASCII-8BIT' if io.respond_to? :set_encoding
152
+
153
+ sign = io.read( 2 )
154
+ if sign.eql? "\xFF\xD8".to_binary
155
+ @format = :JPEG
156
+ @compression = :DCTDecode
157
+ jpgExamine( io )
158
+ return
159
+ end
160
+
161
+ sign << io.read( 2 )
162
+ if sign.eql? "MM\x00\x2a".to_binary or sign.eql? "II\x2a\x00".to_binary
163
+ @format = :TIFF
164
+ tiffExamine( io,sign )
165
+ return
166
+ end
167
+
168
+ sign << io.read( 4 )
169
+ if sign.eql? "\x89PNG\x0D\x0A\x1A\x0A".to_binary
170
+ @format = :PNG
171
+ pngExamine( io )
172
+ return
173
+ end
174
+
175
+ sign << io.read( 4 )
176
+ if sign.eql? "\x00\x00\x00\x0CjP \x0D\x0A\x87\x0A".to_binary
177
+ @format = :JPEG2000
178
+ @compression = :JPXDecode
179
+ j2kParseBox( io )
180
+ return
181
+ end
182
+
183
+ raise "File format not recognized"
184
+ end
185
+
186
+ def tiffReadArray( io,intgr,fmt,cnt,val )
187
+ ret = []
188
+ case fmt
189
+ when 'C', 'c'
190
+ rec_len = 1
191
+ when intgr
192
+ rec_len = 2
193
+ when intgr.upcase
194
+ rec_len = 4
195
+ when intgr.upcase*2
196
+ rec_len = 8
197
+ when 'A*'
198
+ rec_len = cnt
199
+ cnt = 1
200
+ end
201
+
202
+ if rec_len*cnt > 4
203
+ ptr = val.unpack( intgr.upcase )[0]
204
+ cur_pos = io.tell
205
+ io.seek( ptr,IO::SEEK_SET )
206
+
207
+ for i in ( 0...cnt )
208
+ if fmt.eql? intgr.upcase*2
209
+ rat = io.read( rec_len ).unpack( fmt )
210
+ ret << rat[0]/rat[1]
211
+ else
212
+ ret << io.read( rec_len ).unpack( fmt )[0]
213
+ end
214
+ end
215
+ io.seek( cur_pos,IO::SEEK_SET )
216
+
217
+ else
218
+ ret = val.unpack( fmt*cnt )
219
+ end
220
+
221
+ return ret
222
+ end
223
+
224
+ def tiffParseIFD( io,offset,intgr )
225
+ packspec = [
226
+ nil, # nothing (shouldn't happen)
227
+ 'C', # BYTE (8-bit unsigned integer)
228
+ 'A*', # ASCII
229
+ intgr, # SHORT (16-bit unsigned integer)
230
+ intgr.upcase, # LONG (32-bit unsigned integer)
231
+ intgr.upcase * 2, # RATIONAL (numerator + denominator)
232
+ intgr.upcase, # SBYTE (8-bit signed integer)
233
+ 'A*', # undefined, but used for EXIF version
234
+ intgr, # SSHORT (16-bit signed integer)
235
+ intgr.upcase, # SLONG (32-bit signed integer)
236
+ intgr.upcase * 2, # SRATIONAL (numerator + denominator)
237
+ ]
238
+ io.seek( offset,IO::SEEK_SET )
239
+ num_dirent = io.read( 2 ).unpack( intgr )[0]
240
+
241
+ tags = Hash.new()
242
+ for i in ( 0...num_dirent )
243
+ code, type, length = io.read( 8 ).unpack( "#{intgr}#{intgr}#{intgr.upcase}" )
244
+ raise 'malformed TIFF: could not read an IFD entry' if (
245
+ type.nil? or type > packspec.size or packspec[type].nil? )
246
+ sval = io.read( 4 )
247
+
248
+ tags[code] = tiffReadArray( io,intgr,packspec[type],length,sval )
249
+ end
250
+
251
+ @next_off = io.read( 4 ).unpack( intgr.upcase )[0]
252
+ return tags
253
+ end
254
+
255
+ def tiffExamine( io,sign,offset=nil )
256
+ if sign.eql? "MM\x00\x2a".to_binary
257
+ intgr = 'n'
258
+ elsif sign.eql? "II\x2a\x00".to_binary
259
+ intgr = 'v'
260
+ else
261
+ raise 'malformed TIFF: no TIFF signature'
262
+ end
263
+
264
+ # Get offset to IFD
265
+ offset = io.read( 4 ).unpack( intgr.upcase )[0] if offset.nil?
266
+ @tags = tiffParseIFD( io,offset,intgr )
267
+
268
+ # We should not expect to find required image properties (such as width
269
+ # or height) in EXIF data of a JPEG image.
270
+ raise 'malformed TIFF: a required tag is missing' unless @format.eql? :JPEG or (
271
+ @tags.has_key? 0x0100 and @tags.has_key? 0x0101 and
272
+ @tags.has_key? 0x0106 and @tags.has_key? 0x0111 and @tags.has_key? 0x0117 )
273
+
274
+ unless @tags.has_key? 0x8769
275
+ @width = @tags[0x0100][0]; @height = @tags[0x0101][0]
276
+
277
+ @tags[0x0111].each_index do |i|
278
+ @data_blocks << [ @tags[0x0111][i],@tags[0x0117][i] ]
279
+ end
280
+
281
+ case @tags[0x0106][0]
282
+ when 0, 1
283
+ @cspace = :DeviceGray
284
+ when 3
285
+ @cspace = :Indexed
286
+ when 5
287
+ @cspace = :DeviceCMYK
288
+ else
289
+ @cspace = :DeviceRGB
290
+ end
291
+
292
+ if @tags[0x0106][0] == 3 and @tags.has_key? 0x0140
293
+ @palette = Array.new()
294
+ clen = @tags[0x0140].length / 3
295
+ for i in ( 0...clen )
296
+ r = @tags[0x0140][i]
297
+ g = @tags[0x0140][i+clen]
298
+ b = @tags[0x0140][i+clen*2]
299
+ @palette << [ r/256,g/256,b/256 ]
300
+ end
301
+ end
302
+ @depth = 1
303
+ @depth = @tags[0x0102][0] if @tags.has_key? 0x0102
304
+ end
305
+ @tags.merge! tiffParseIFD( io,@tags[0x8769][0],intgr ) if @tags.has_key? 0x8769
306
+
307
+ if @tags.has_key? 0x0103
308
+ case @tags[0x0103][0]
309
+ when 1
310
+ @compression = :NoCompression
311
+ when 3, 4
312
+ @compression = :CCITTFaxDecode
313
+ when 5
314
+ @compression = :LZWDecode
315
+ when 8, 32946
316
+ @compression = :FlateDecode
317
+ end
318
+ end
319
+
320
+ if ( @tags.has_key? 0x011A and @tags.has_key? 0x011B )
321
+ @x_dpi = @tags[0x011A][0]; @y_dpi = @tags[0x011B][0]
322
+ if @tags.has_key? 0x0128 and @tags[0x0128][0] == 3
323
+ @x_dpi = (@x_dpi * 2.54).round
324
+ @y_dpi = (@y_dpi * 2.54).round
325
+ end
326
+ end
327
+ end
328
+
329
+ def j2kParseBox( io )
330
+ buf = [ 0 ] * 8
331
+ while b = io.send( @@gc )
332
+ # always keep last 8 bytes so that we can check for chunk name and length
333
+ buf.shift
334
+ buf.push( b )
335
+ tag = buf[4..7].pack('c*')
336
+
337
+ # Currently we can detect only width/height for JP2 images.
338
+ # That's OK, as other parameters aren't needed for PDF generation.
339
+ if ['ftyp','jp2h','ihdr','colr','res ','resc',
340
+ 'resd','prfl','bpcc','pclr','cdef','jp2i'].include? tag
341
+ length = buf[0..4].pack( 'c*' ).unpack( 'N' )[0]
342
+ if length == 0
343
+ length = io.read( 8 ).unpack( 'N' )[0]
344
+ length -= 8
345
+ end
346
+ length -= 8
347
+ case tag
348
+ when 'jp2h'
349
+ iostr = StringIO.new( io.read( length ))
350
+ j2kParseBox( iostr )
351
+ return
352
+ when 'ihdr'
353
+ if length == 14
354
+ @height = io.read( 4 ).unpack( 'N' )[0]
355
+ @width = io.read( 4 ).unpack( 'N' )[0]
356
+ ncomps = io.read( 2 ).unpack( 'n' )[0]
357
+ strdepth = io.read( 1 )
358
+ signed = !(strdepth.unpack( 'C' )[0] >> 7).zero?
359
+ fmt = signed ? 'c' : 'C'
360
+ @depth = (strdepth.unpack( fmt )[0] & 0x7f) + 1
361
+ else
362
+ raise 'Malformed JPEG2000: the file is damaged or has an unsupported format'
363
+ end
364
+ when 'colr'
365
+ next unless @cspace.nil?
366
+ meth, prec, approx = io.read( 3 ).unpack( 'CcC' )
367
+ if meth == 1
368
+ enumcs = io.read( 4 ).unpack( 'N' )[0]
369
+ case enumcs
370
+ when 16
371
+ @cspace = :DeviceRGB
372
+ when 17
373
+ @cspace = :DeviceGray
374
+ else
375
+ raise 'Malformed JPEG2000: unknown colorspace'
376
+ end
377
+ end
378
+ return
379
+ else
380
+ io.read( length )
381
+ end
382
+ end
383
+ end
384
+ end
385
+
386
+ def pngExamine( io )
387
+ io.seek( 16,IO::SEEK_SET )
388
+ @width, @height, @depth, color, compr, filtr, interlace = io.read( 13 ).unpack('NNccccc')
389
+ @compression = :FlateDecode if compr == 0 and filtr == 0
390
+ case color
391
+ when 0, 4
392
+ @cspace = :DeviceGray
393
+ when 3
394
+ @cspace = :Indexed
395
+ else
396
+ @cspace = :DeviceRGB
397
+ end
398
+
399
+ buf = [ 0 ] * 8
400
+ ctags = [ 'IHDR', 'PLTE', 'IDAT', 'IEND', 'tRNS', 'cHRM',
401
+ 'gAMA', 'iCCP', 'sBIT', 'sRGB', 'iTXt', 'tEXt',
402
+ 'zTXt', 'bKGD', 'hIST', 'pHYs', 'sPLT', 'tIME' ]
403
+ while b = io.send( @@gc )
404
+ # always keep last 8 bytes so that we can check for chunk name and length
405
+ buf.shift
406
+ buf.push( b )
407
+ tag = buf[4..7].pack('c*')
408
+
409
+ if ctags.include? tag
410
+ length = buf[0..4].pack( 'c*' ).unpack( 'N' )[0]
411
+ case tag
412
+ when 'PLTE'
413
+ @palette = Array.new()
414
+ for i in (0...length/3)
415
+ r, g, b = io.read( 3 ).unpack( 'ccc' )
416
+ @palette << [ r, g, b ]
417
+ end
418
+ when 'IDAT'
419
+ @data_blocks << [ io.tell,length ]
420
+ io.seek( length + 4,IO::SEEK_CUR )
421
+ when 'pHYs'
422
+ x_dpm, y_dpm = io.read( 8 ).unpack( 'NN' )
423
+ @x_dpi = (x_dpm/100 * 2.54).round
424
+ @y_dpi = (y_dpm/100 * 2.54).round
425
+ when 'tRNS'
426
+ trans = Hash.new[]
427
+ case @cspace
428
+ when :Indexed
429
+ # Indexed colour, RGB. Each byte in this chunk is an alpha for
430
+ # the palette index in the PLTE ("palette") chunk up until the
431
+ # last non-opaque entry. Set up an array, stretching over all
432
+ # palette entries which will be 0 (opaque) or 1 (transparent).
433
+ @trans = io.read( size ).unpack( 'C*' )
434
+ when :DeviceGray
435
+ # Greyscale. Corresponding to entries in the PLTE chunk.
436
+ # Grey is two bytes, range 0 .. (2 ^ bit-depth) - 1
437
+ @trans = io.read( 2 ).unpack( 'n' )
438
+ when :DeviceRGB
439
+ # True colour with proper alpha channel.
440
+ @trans = io.read( 6 ).unpack( 'nnn' )
441
+ end
442
+ when 'IEND'
443
+ break
444
+ else
445
+ io.seek( length + 4,IO::SEEK_CUR )
446
+ end
447
+ end
448
+ end
449
+ end
450
+
451
+ def jpgNextMarker( io )
452
+ c = io.send( @@gc ) until c == 0xFF
453
+ c = io.send( @@gc ) while c == 0xFF
454
+ c
455
+ end
456
+
457
+ def jpgReadFrame( io )
458
+ off = io.read( 2 ).unpack( 'n' )[0]
459
+ io.read( off - 2 )
460
+ end
461
+
462
+ def jpgExamine( io )
463
+ while marker = jpgNextMarker( io )
464
+ case marker
465
+ # SOF markers
466
+ when 0xC0..0xC3, 0xC5..0xC7, 0xC9..0xCB, 0xCD..0xCF
467
+ length, @depth, @height, @width, components = io.read( 8 ).unpack( 'ncnnc' )
468
+ raise 'malformed JPEG: could not read a SOF header' unless length == 8 + components * 3
469
+ case components
470
+ when 1
471
+ @cspace = :DeviceGray
472
+ when 4
473
+ @cspace = :DeviceCMYK
474
+ else
475
+ @cspace = :DeviceRGB
476
+ end
477
+ # EOI, SOS
478
+ when 0xD9, 0xDA
479
+ break
480
+ # APP0, contains JFIF tag
481
+ when 0xE0
482
+ length,sign,version,units,@x_dpi,@y_dpi = io.read( 14 ).unpack( 'nZ5ncnn' )
483
+ raise 'malformed JPEG: could not read JFIF data' unless length == 16 and sign.eql? 'JFIF'
484
+ if units == 2
485
+ @x_dpi = (@x_dpi * 2.54).round
486
+ @y_dpi = (@y_dpi * 2.54).round
487
+ end
488
+ # APP1, contains EXIF tag
489
+ when 0xE1
490
+ exif = jpgReadFrame( io )
491
+ exif_hdr = exif[0...6]
492
+ if exif_hdr.eql? "Exif\x00\x00".to_binary
493
+ buf = StringIO.new( exif[6..-1] )
494
+ sign = buf.read( 4 )
495
+ tiffExamine( buf,sign )
496
+ end
497
+ # ignore frame
498
+ else
499
+ jpgReadFrame( io )
500
+ end
501
+ end
502
+ end
503
+ end