pdfbeads 1.0.2 → 1.0.3

Sign up to get free protection for your applications and to get access to all the features.
data/ChangeLog CHANGED
@@ -5,3 +5,16 @@
5
5
  2010 November 12 (Alexey Kryukov) Version 1.0.1
6
6
 
7
7
  * JBIG2 support didn't work properly under MS Windows.
8
+
9
+ 2010 November 15 (Alexey Kryukov) Version 1.0.3
10
+
11
+ + Make it possible to specify binarization threshold for mixed scans
12
+ (it is still not recommended to perform segmentation with pdfbeads,
13
+ unless the image has been previously processed with ScanTailor, so
14
+ the normal value for this parameter is 1).
15
+
16
+ * If a multipage TIFF file is passed to pdfbeads, warn user that
17
+ only the first image will be processed.
18
+
19
+ * TIFF files with an embedded EXIF block were incorrectly processed
20
+ by ImageInspector.
data/bin/pdfbeads CHANGED
@@ -45,6 +45,7 @@ pdfargs = Hash[
45
45
  :meta => nil
46
46
  ]
47
47
  pageargs = Hash[
48
+ :threshold => 1,
48
49
  :maxcolors => 4,
49
50
  :st_resolution => 0,
50
51
  :bg_resolution => 300,
@@ -108,13 +109,22 @@ OptionParser.new() do |opts|
108
109
  "Generate one shared JBIG2 dictionary per NUM pages.",
109
110
  "This option is only applied when JBIG2 compression",
110
111
  "is used. Default value is #{pageargs[:pages_per_dict]}") do |p|
111
- pageargs[:pages_per_dict] = p.to_i
112
+ pval = p.to_i
113
+ pageargs[:pages_per_dict] = pval if ( pval >= 0 )
112
114
  end
113
115
  opts.on("-r", "--force-resolution DPI",
114
116
  "Set resolution for foreground mask images to the",
115
117
  "specified value (in pixels per inch). Note that the",
116
- "image is not actually resampled.") do |r|
117
- pageargs[:st_resolution] = r.to_f
118
+ "image is not actually resampled.") do |dpi|
119
+ pageargs[:st_resolution] = dpi.to_f
120
+ end
121
+ opts.on("-t", "--threshold VAL",
122
+ "Set binarization threshold for mixed images. Valid",
123
+ "values are between 1 and 255. 1 is used by default,",
124
+ "as the input files are assumed to be preprocessed",
125
+ "with ScanTailor (http://scantailor.sourceforge.net)") do |t|
126
+ tval = t.to_i
127
+ pageargs[:threshold] = tval if ( (1..255).include? tval )
118
128
  end
119
129
  opts.on("-x", "--max-colors NUM",
120
130
  "If pdfbeads finds an indexed file with NUM or",
@@ -123,7 +133,8 @@ OptionParser.new() do |opts|
123
133
  "PDF page mask. Otherwise the file is treated just",
124
134
  "like a normal greyscale or color image. Default",
125
135
  "value is #{pageargs[:maxcolors]}") do |num|
126
- pageargs[:maxcolors] = num.to_i
136
+ cval = num.to_i
137
+ pageargs[:maxcolors] = cval if ( (2..255).include? cval )
127
138
  end
128
139
 
129
140
  opts.separator "\n"
@@ -281,7 +281,7 @@ class PDFBeads::PDFBuilder
281
281
  pages.addToDict( 'Count', page_objs.length )
282
282
  pages.addToDict( 'Kids', '[' << page_objs.map{|x| ref(x.getID).to_s}.join(' ') << ']' )
283
283
 
284
- pkey = pidx + 1
284
+ pkey = (pidx + 1).to_s
285
285
  pkey = labels.getPageLabel( cur_range_id,pidx ) if labels != nil and labels.length > 0
286
286
  pages_by_num[pkey] = page.getID
287
287
  pidx += 1
@@ -547,10 +547,11 @@ class PDFBeads::PDFBuilder
547
547
  return nil if stencil.width.nil?
548
548
 
549
549
  width = stencil.width
550
- height = stencil.height
550
+ height = rows_per_strip = stencil.height
551
551
  xres = stencil.x_dpi
552
552
  yres = stencil.y_dpi
553
- rows_per_strip = stencil.tags[0x116][0]
553
+ rows_per_strip = stencil.tags[0x116][0] if
554
+ stencil.format.eql? :TIFF and stencil.tags.has_key? 0x116
554
555
 
555
556
  unless stencil.compression.eql? :CCITTFaxDecode and rows_per_strip >= height
556
557
  img = ImageList.new( path )
@@ -55,6 +55,7 @@ class PDFBeads::PageDataProvider < Array
55
55
  ret = 0
56
56
  force = @pageargs[:force_update]
57
57
  fres = @pageargs[:st_resolution]
58
+ treshold = @pageargs[:threshold]
58
59
 
59
60
  map = Hash[
60
61
  :path => @name,
@@ -73,7 +74,6 @@ class PDFBeads::PageDataProvider < Array
73
74
 
74
75
  if insp.depth == 1
75
76
  @stencils << map
76
- $stderr.puts( "Prepared data for processing #{@name}\n" )
77
77
  ret = 1
78
78
 
79
79
  else
@@ -85,7 +85,7 @@ class PDFBeads::PageDataProvider < Array
85
85
  img.class_type = PseudoClass
86
86
  ret = processIndexed( img,@pageargs[:maxcolors],force )
87
87
  end
88
- ret = processMixed( img,force,map ) if ret == 0
88
+ ret = processMixed( img,treshold,force,map ) if ret == 0
89
89
  img.destroy!
90
90
 
91
91
  # Make sure there are no more RMagick objects
@@ -221,10 +221,10 @@ class PDFBeads::PageDataProvider < Array
221
221
  return ret
222
222
  end
223
223
 
224
- def processMixed( img,force,map )
224
+ def processMixed( img,treshold,force,map )
225
225
  binpath = "#{@basename}.black.tiff"
226
226
  if not File.exists? binpath or force
227
- im_copy = img.copy; bitonal = im_copy.threshold(1); im_copy.destroy!
227
+ im_copy = img.copy; bitonal = im_copy.threshold(QuantumRange/255*treshold); im_copy.destroy!
228
228
  bitonal.write( binpath ){
229
229
  self.format = 'TIFF'
230
230
  self.define( 'TIFF','rows-per-strip',img.rows )
@@ -237,6 +237,9 @@ class PDFBeads::PageDataProvider < Array
237
237
  bgpath = "#{@basename}.bg." << bgf.downcase
238
238
 
239
239
  if not File.exists? bgpath or force
240
+ if treshold > 1
241
+ bk = img.black_threshold(QuantumRange/255*treshold); img.destroy!; img = bk
242
+ end
240
243
  op = img.opaque( 'black','white' ); img.destroy!; img = op;
241
244
  if @pageargs[:force_grayscale]
242
245
  img.image_type = GrayscaleType
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdfbeads
3
3
  version: !ruby/object:Gem::Version
4
- hash: 19
4
+ hash: 17
5
5
  prerelease: false
6
6
  segments:
7
7
  - 1
8
8
  - 0
9
- - 2
10
- version: 1.0.2
9
+ - 3
10
+ version: 1.0.3
11
11
  platform: ruby
12
12
  authors:
13
13
  - Alexey Kryukov
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-11-13 00:00:00 +03:00
18
+ date: 2010-11-16 00:00:00 +03:00
19
19
  default_executable: pdfbeads
20
20
  dependencies: []
21
21