pdfbeads 1.0.2 → 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ChangeLog CHANGED
@@ -5,3 +5,16 @@
5
5
  2010 November 12 (Alexey Kryukov) Version 1.0.1
6
6
 
7
7
  * JBIG2 support didn't work properly under MS Windows.
8
+
9
+ 2010 November 15 (Alexey Kryukov) Version 1.0.3
10
+
11
+ + Make it possible to specify binarization threshold for mixed scans
12
+ (it is still not recommended to perform segmentation with pdfbeads,
13
+ unless the image has been previously processed with ScanTailor, so
14
+ the normal value for this parameter is 1).
15
+
16
+ * If a multipage TIFF file is passed to pdfbeads, warn user that
17
+ only the first image will be processed.
18
+
19
+ * TIFF files with an embedded EXIF block were incorrectly processed
20
+ by ImageInspector.
data/bin/pdfbeads CHANGED
@@ -45,6 +45,7 @@ pdfargs = Hash[
45
45
  :meta => nil
46
46
  ]
47
47
  pageargs = Hash[
48
+ :threshold => 1,
48
49
  :maxcolors => 4,
49
50
  :st_resolution => 0,
50
51
  :bg_resolution => 300,
@@ -108,13 +109,22 @@ OptionParser.new() do |opts|
108
109
  "Generate one shared JBIG2 dictionary per NUM pages.",
109
110
  "This option is only applied when JBIG2 compression",
110
111
  "is used. Default value is #{pageargs[:pages_per_dict]}") do |p|
111
- pageargs[:pages_per_dict] = p.to_i
112
+ pval = p.to_i
113
+ pageargs[:pages_per_dict] = pval if ( pval >= 0 )
112
114
  end
113
115
  opts.on("-r", "--force-resolution DPI",
114
116
  "Set resolution for foreground mask images to the",
115
117
  "specified value (in pixels per inch). Note that the",
116
- "image is not actually resampled.") do |r|
117
- pageargs[:st_resolution] = r.to_f
118
+ "image is not actually resampled.") do |dpi|
119
+ pageargs[:st_resolution] = dpi.to_f
120
+ end
121
+ opts.on("-t", "--threshold VAL",
122
+ "Set binarization threshold for mixed images. Valid",
123
+ "values are between 1 and 255. 1 is used by default,",
124
+ "as the input files are assumed to be preprocessed",
125
+ "with ScanTailor (http://scantailor.sourceforge.net)") do |t|
126
+ tval = t.to_i
127
+ pageargs[:threshold] = tval if ( (1..255).include? tval )
118
128
  end
119
129
  opts.on("-x", "--max-colors NUM",
120
130
  "If pdfbeads finds an indexed file with NUM or",
@@ -123,7 +133,8 @@ OptionParser.new() do |opts|
123
133
  "PDF page mask. Otherwise the file is treated just",
124
134
  "like a normal greyscale or color image. Default",
125
135
  "value is #{pageargs[:maxcolors]}") do |num|
126
- pageargs[:maxcolors] = num.to_i
136
+ cval = num.to_i
137
+ pageargs[:maxcolors] = cval if ( (2..255).include? cval )
127
138
  end
128
139
 
129
140
  opts.separator "\n"
@@ -281,7 +281,7 @@ class PDFBeads::PDFBuilder
281
281
  pages.addToDict( 'Count', page_objs.length )
282
282
  pages.addToDict( 'Kids', '[' << page_objs.map{|x| ref(x.getID).to_s}.join(' ') << ']' )
283
283
 
284
- pkey = pidx + 1
284
+ pkey = (pidx + 1).to_s
285
285
  pkey = labels.getPageLabel( cur_range_id,pidx ) if labels != nil and labels.length > 0
286
286
  pages_by_num[pkey] = page.getID
287
287
  pidx += 1
@@ -547,10 +547,11 @@ class PDFBeads::PDFBuilder
547
547
  return nil if stencil.width.nil?
548
548
 
549
549
  width = stencil.width
550
- height = stencil.height
550
+ height = rows_per_strip = stencil.height
551
551
  xres = stencil.x_dpi
552
552
  yres = stencil.y_dpi
553
- rows_per_strip = stencil.tags[0x116][0]
553
+ rows_per_strip = stencil.tags[0x116][0] if
554
+ stencil.format.eql? :TIFF and stencil.tags.has_key? 0x116
554
555
 
555
556
  unless stencil.compression.eql? :CCITTFaxDecode and rows_per_strip >= height
556
557
  img = ImageList.new( path )
@@ -55,6 +55,7 @@ class PDFBeads::PageDataProvider < Array
55
55
  ret = 0
56
56
  force = @pageargs[:force_update]
57
57
  fres = @pageargs[:st_resolution]
58
+ treshold = @pageargs[:threshold]
58
59
 
59
60
  map = Hash[
60
61
  :path => @name,
@@ -73,7 +74,6 @@ class PDFBeads::PageDataProvider < Array
73
74
 
74
75
  if insp.depth == 1
75
76
  @stencils << map
76
- $stderr.puts( "Prepared data for processing #{@name}\n" )
77
77
  ret = 1
78
78
 
79
79
  else
@@ -85,7 +85,7 @@ class PDFBeads::PageDataProvider < Array
85
85
  img.class_type = PseudoClass
86
86
  ret = processIndexed( img,@pageargs[:maxcolors],force )
87
87
  end
88
- ret = processMixed( img,force,map ) if ret == 0
88
+ ret = processMixed( img,treshold,force,map ) if ret == 0
89
89
  img.destroy!
90
90
 
91
91
  # Make sure there are no more RMagick objects
@@ -221,10 +221,10 @@ class PDFBeads::PageDataProvider < Array
221
221
  return ret
222
222
  end
223
223
 
224
- def processMixed( img,force,map )
224
+ def processMixed( img,treshold,force,map )
225
225
  binpath = "#{@basename}.black.tiff"
226
226
  if not File.exists? binpath or force
227
- im_copy = img.copy; bitonal = im_copy.threshold(1); im_copy.destroy!
227
+ im_copy = img.copy; bitonal = im_copy.threshold(QuantumRange/255*treshold); im_copy.destroy!
228
228
  bitonal.write( binpath ){
229
229
  self.format = 'TIFF'
230
230
  self.define( 'TIFF','rows-per-strip',img.rows )
@@ -237,6 +237,9 @@ class PDFBeads::PageDataProvider < Array
237
237
  bgpath = "#{@basename}.bg." << bgf.downcase
238
238
 
239
239
  if not File.exists? bgpath or force
240
+ if treshold > 1
241
+ bk = img.black_threshold(QuantumRange/255*treshold); img.destroy!; img = bk
242
+ end
240
243
  op = img.opaque( 'black','white' ); img.destroy!; img = op;
241
244
  if @pageargs[:force_grayscale]
242
245
  img.image_type = GrayscaleType
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdfbeads
3
3
  version: !ruby/object:Gem::Version
4
- hash: 19
4
+ hash: 17
5
5
  prerelease: false
6
6
  segments:
7
7
  - 1
8
8
  - 0
9
- - 2
10
- version: 1.0.2
9
+ - 3
10
+ version: 1.0.3
11
11
  platform: ruby
12
12
  authors:
13
13
  - Alexey Kryukov
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-11-13 00:00:00 +03:00
18
+ date: 2010-11-16 00:00:00 +03:00
19
19
  default_executable: pdfbeads
20
20
  dependencies: []
21
21