pdfbeads 1.0.1 → 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/bin/pdfbeads CHANGED
@@ -108,13 +108,13 @@ OptionParser.new() do |opts|
108
108
  "Generate one shared JBIG2 dictionary per NUM pages.",
109
109
  "This option is only applied when JBIG2 compression",
110
110
  "is used. Default value is #{pageargs[:pages_per_dict]}") do |p|
111
- pageargs[:pages_per_dict] = p
111
+ pageargs[:pages_per_dict] = p.to_i
112
112
  end
113
113
  opts.on("-r", "--force-resolution DPI",
114
114
  "Set resolution for foreground mask images to the",
115
115
  "specified value (in pixels per inch). Note that the",
116
116
  "image is not actually resampled.") do |r|
117
- pageargs[:st_resolution] = r
117
+ pageargs[:st_resolution] = r.to_f
118
118
  end
119
119
  opts.on("-x", "--max-colors NUM",
120
120
  "If pdfbeads finds an indexed file with NUM or",
@@ -123,7 +123,7 @@ OptionParser.new() do |opts|
123
123
  "PDF page mask. Otherwise the file is treated just",
124
124
  "like a normal greyscale or color image. Default",
125
125
  "value is #{pageargs[:maxcolors]}") do |num|
126
- pageargs[:maxcolors] = num
126
+ pageargs[:maxcolors] = num.to_i
127
127
  end
128
128
 
129
129
  opts.separator "\n"
@@ -118,6 +118,22 @@ class ImageInspector::Image
118
118
  end
119
119
  end
120
120
 
121
+ def nextImage()
122
+ if @format.eql? :TIFF and @next_off > 0
123
+ begin
124
+ if @input.kind_of? IO or @input.kind_of? StringIO
125
+ tiffNext( @input )
126
+ else
127
+ File.open( @input, 'rb' ) { |io| tiffNext( io ) }
128
+ end
129
+ return true
130
+ rescue Exception => e
131
+ $stderr.puts( "Could not read data from #{@fname}: " << e.message )
132
+ end
133
+ end
134
+ false
135
+ end
136
+
121
137
  private
122
138
 
123
139
  def clearData
@@ -221,6 +237,11 @@ class ImageInspector::Image
221
237
  return ret
222
238
  end
223
239
 
240
+ def tiffNext( io )
241
+ sign = io.read( 4 )
242
+ tiffExamine( io,sign,@next_off )
243
+ end
244
+
224
245
  def tiffParseIFD( io,offset,intgr )
225
246
  packspec = [
226
247
  nil, # nothing (shouldn't happen)
@@ -229,7 +250,7 @@ class ImageInspector::Image
229
250
  intgr, # SHORT (16-bit unsigned integer)
230
251
  intgr.upcase, # LONG (32-bit unsigned integer)
231
252
  intgr.upcase * 2, # RATIONAL (numerator + denominator)
232
- intgr.upcase, # SBYTE (8-bit signed integer)
253
+ 'c', # SBYTE (8-bit signed integer)
233
254
  'A*', # undefined, but used for EXIF version
234
255
  intgr, # SSHORT (16-bit signed integer)
235
256
  intgr.upcase, # SLONG (32-bit signed integer)
@@ -271,7 +292,7 @@ class ImageInspector::Image
271
292
  @tags.has_key? 0x0100 and @tags.has_key? 0x0101 and
272
293
  @tags.has_key? 0x0106 and @tags.has_key? 0x0111 and @tags.has_key? 0x0117 )
273
294
 
274
- unless @tags.has_key? 0x8769
295
+ unless @format.eql? :JPEG
275
296
  @width = @tags[0x0100][0]; @height = @tags[0x0101][0]
276
297
 
277
298
  @tags[0x0111].each_index do |i|
@@ -334,8 +355,8 @@ class ImageInspector::Image
334
355
  buf.push( b )
335
356
  tag = buf[4..7].pack('c*')
336
357
 
337
- # Currently we can detect only width/height for JP2 images.
338
- # That's OK, as other parameters aren't needed for PDF generation.
358
+ # Currently no support for resolution, as I have never seen JP2 images
359
+ # with 'res '/'resc'/'resd' boxes, and not sure if they are ever used.
339
360
  if ['ftyp','jp2h','ihdr','colr','res ','resc',
340
361
  'resd','prfl','bpcc','pclr','cdef','jp2i'].include? tag
341
362
  length = buf[0..4].pack( 'c*' ).unpack( 'N' )[0]
data/lib/pdfbeads.rb CHANGED
@@ -40,8 +40,8 @@ begin
40
40
  require 'hpricot'
41
41
  $has_hpricot = true
42
42
  rescue LoadError
43
- $stderr.puts( "Warning: the hpricot extension is not available." )
44
- $stderr.puts( " pdfbeads will not be able to read OCR data from hOCR files." )
43
+ $stderr.puts( "Warning: the hpricot extension is not available. I'll not be able" )
44
+ $stderr.puts( "\tto create hidden text layer from hOCR files." )
45
45
  $has_hpricot = false
46
46
  end
47
47
 
@@ -70,27 +70,34 @@ class PDFBeads::PageDataProvider < Array
70
70
  else
71
71
  @x_res = @y_res = fres
72
72
  end
73
+
73
74
  if insp.depth == 1
74
75
  @stencils << map
75
76
  $stderr.puts( "Prepared data for processing #{@name}\n" )
76
- return 1
77
- end
77
+ ret = 1
78
+
79
+ else
80
+ img = ImageList.new( @name )
81
+ # ImageMagick incorrectly identifies indexed PNG images as DirectClass.
82
+ # It also assigns a strange color value to fully opaque areas. So
83
+ # we have to use an independent approach to recognize indexed images.
84
+ unless insp.palette.nil?
85
+ img.class_type = PseudoClass
86
+ ret = processIndexed( img,@pageargs[:maxcolors],force )
87
+ end
88
+ ret = processMixed( img,force,map ) if ret == 0
89
+ img.destroy!
78
90
 
79
- img = ImageList.new( @name )
80
- # ImageMagick incorrectly identifies indexed PNG images as DirectClass.
81
- # It also assigns a strange color value to fully opaque areas. So
82
- # we have to use an independent approach to recognize indexed images.
83
- unless insp.palette.nil?
84
- img.class_type = PseudoClass
85
- ret = processIndexed( img,@pageargs[:maxcolors],force )
91
+ # Make sure there are no more RMagick objects
92
+ GC.start
86
93
  end
87
- ret = processMixed( img,force,map ) if ret == 0
88
- img.destroy!
89
94
 
90
- # Make sure there are no more RMagick objects
91
- GC.start
92
95
  $stderr.puts( "Prepared data for processing #{@name}\n" )
93
- return ret
96
+ if insp.nextImage
97
+ $stderr.puts( "Warning: #{@name} contains multiple images, but only the first one")
98
+ $stderr.puts( "\tis going to be used\n" )
99
+ end
100
+ ret
94
101
  end
95
102
 
96
103
  def addSupplementaryFiles()
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdfbeads
3
3
  version: !ruby/object:Gem::Version
4
- hash: 21
4
+ hash: 19
5
5
  prerelease: false
6
6
  segments:
7
7
  - 1
8
8
  - 0
9
- - 1
10
- version: 1.0.1
9
+ - 2
10
+ version: 1.0.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - Alexey Kryukov
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-11-12 00:00:00 +03:00
18
+ date: 2010-11-13 00:00:00 +03:00
19
19
  default_executable: pdfbeads
20
20
  dependencies: []
21
21