pdfbeads 1.0.2 → 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +13 -0
- data/bin/pdfbeads +15 -4
- data/lib/pdfbeads/pdfbuilder.rb +4 -3
- data/lib/pdfbeads/pdfpage.rb +7 -4
- metadata +4 -4
data/ChangeLog
CHANGED
@@ -5,3 +5,16 @@
|
|
5
5
|
2010 November 12 (Alexey Kryukov) Version 1.0.1
|
6
6
|
|
7
7
|
* JBIG2 support didn't work properly under MS Windows.
|
8
|
+
|
9
|
+
2010 November 15 (Alexey Kryukov) Version 1.0.3
|
10
|
+
|
11
|
+
+ Make it possible to specify binarization threshold for mixed scans
|
12
|
+
(it is still not recommended to perform segmentation with pdfbeads,
|
13
|
+
unless the image has been previously processed with ScanTailor, so
|
14
|
+
the normal value for this parameter is 1).
|
15
|
+
|
16
|
+
* If a multipage TIFF file is passed to pdfbeads, warn user that
|
17
|
+
only the first image will be processed.
|
18
|
+
|
19
|
+
* TIFF files with an embedded EXIF block were incorrectly processed
|
20
|
+
by ImageInspector.
|
data/bin/pdfbeads
CHANGED
@@ -45,6 +45,7 @@ pdfargs = Hash[
|
|
45
45
|
:meta => nil
|
46
46
|
]
|
47
47
|
pageargs = Hash[
|
48
|
+
:threshold => 1,
|
48
49
|
:maxcolors => 4,
|
49
50
|
:st_resolution => 0,
|
50
51
|
:bg_resolution => 300,
|
@@ -108,13 +109,22 @@ OptionParser.new() do |opts|
|
|
108
109
|
"Generate one shared JBIG2 dictionary per NUM pages.",
|
109
110
|
"This option is only applied when JBIG2 compression",
|
110
111
|
"is used. Default value is #{pageargs[:pages_per_dict]}") do |p|
|
111
|
-
|
112
|
+
pval = p.to_i
|
113
|
+
pageargs[:pages_per_dict] = pval if ( pval >= 0 )
|
112
114
|
end
|
113
115
|
opts.on("-r", "--force-resolution DPI",
|
114
116
|
"Set resolution for foreground mask images to the",
|
115
117
|
"specified value (in pixels per inch). Note that the",
|
116
|
-
"image is not actually resampled.") do |
|
117
|
-
pageargs[:st_resolution] =
|
118
|
+
"image is not actually resampled.") do |dpi|
|
119
|
+
pageargs[:st_resolution] = dpi.to_f
|
120
|
+
end
|
121
|
+
opts.on("-t", "--threshold VAL",
|
122
|
+
"Set binarization threshold for mixed images. Valid",
|
123
|
+
"values are between 1 and 255. 1 is used by default,",
|
124
|
+
"as the input files are assumed to be preprocessed",
|
125
|
+
"with ScanTailor (http://scantailor.sourceforge.net)") do |t|
|
126
|
+
tval = t.to_i
|
127
|
+
pageargs[:threshold] = tval if ( (1..255).include? tval )
|
118
128
|
end
|
119
129
|
opts.on("-x", "--max-colors NUM",
|
120
130
|
"If pdfbeads finds an indexed file with NUM or",
|
@@ -123,7 +133,8 @@ OptionParser.new() do |opts|
|
|
123
133
|
"PDF page mask. Otherwise the file is treated just",
|
124
134
|
"like a normal greyscale or color image. Default",
|
125
135
|
"value is #{pageargs[:maxcolors]}") do |num|
|
126
|
-
|
136
|
+
cval = num.to_i
|
137
|
+
pageargs[:maxcolors] = cval if ( (2..255).include? cval )
|
127
138
|
end
|
128
139
|
|
129
140
|
opts.separator "\n"
|
data/lib/pdfbeads/pdfbuilder.rb
CHANGED
@@ -281,7 +281,7 @@ class PDFBeads::PDFBuilder
|
|
281
281
|
pages.addToDict( 'Count', page_objs.length )
|
282
282
|
pages.addToDict( 'Kids', '[' << page_objs.map{|x| ref(x.getID).to_s}.join(' ') << ']' )
|
283
283
|
|
284
|
-
pkey = pidx + 1
|
284
|
+
pkey = (pidx + 1).to_s
|
285
285
|
pkey = labels.getPageLabel( cur_range_id,pidx ) if labels != nil and labels.length > 0
|
286
286
|
pages_by_num[pkey] = page.getID
|
287
287
|
pidx += 1
|
@@ -547,10 +547,11 @@ class PDFBeads::PDFBuilder
|
|
547
547
|
return nil if stencil.width.nil?
|
548
548
|
|
549
549
|
width = stencil.width
|
550
|
-
height = stencil.height
|
550
|
+
height = rows_per_strip = stencil.height
|
551
551
|
xres = stencil.x_dpi
|
552
552
|
yres = stencil.y_dpi
|
553
|
-
rows_per_strip = stencil.tags[0x116][0]
|
553
|
+
rows_per_strip = stencil.tags[0x116][0] if
|
554
|
+
stencil.format.eql? :TIFF and stencil.tags.has_key? 0x116
|
554
555
|
|
555
556
|
unless stencil.compression.eql? :CCITTFaxDecode and rows_per_strip >= height
|
556
557
|
img = ImageList.new( path )
|
data/lib/pdfbeads/pdfpage.rb
CHANGED
@@ -55,6 +55,7 @@ class PDFBeads::PageDataProvider < Array
|
|
55
55
|
ret = 0
|
56
56
|
force = @pageargs[:force_update]
|
57
57
|
fres = @pageargs[:st_resolution]
|
58
|
+
treshold = @pageargs[:threshold]
|
58
59
|
|
59
60
|
map = Hash[
|
60
61
|
:path => @name,
|
@@ -73,7 +74,6 @@ class PDFBeads::PageDataProvider < Array
|
|
73
74
|
|
74
75
|
if insp.depth == 1
|
75
76
|
@stencils << map
|
76
|
-
$stderr.puts( "Prepared data for processing #{@name}\n" )
|
77
77
|
ret = 1
|
78
78
|
|
79
79
|
else
|
@@ -85,7 +85,7 @@ class PDFBeads::PageDataProvider < Array
|
|
85
85
|
img.class_type = PseudoClass
|
86
86
|
ret = processIndexed( img,@pageargs[:maxcolors],force )
|
87
87
|
end
|
88
|
-
ret = processMixed( img,force,map ) if ret == 0
|
88
|
+
ret = processMixed( img,treshold,force,map ) if ret == 0
|
89
89
|
img.destroy!
|
90
90
|
|
91
91
|
# Make sure there are no more RMagick objects
|
@@ -221,10 +221,10 @@ class PDFBeads::PageDataProvider < Array
|
|
221
221
|
return ret
|
222
222
|
end
|
223
223
|
|
224
|
-
def processMixed( img,force,map )
|
224
|
+
def processMixed( img,treshold,force,map )
|
225
225
|
binpath = "#{@basename}.black.tiff"
|
226
226
|
if not File.exists? binpath or force
|
227
|
-
im_copy = img.copy; bitonal = im_copy.threshold(
|
227
|
+
im_copy = img.copy; bitonal = im_copy.threshold(QuantumRange/255*treshold); im_copy.destroy!
|
228
228
|
bitonal.write( binpath ){
|
229
229
|
self.format = 'TIFF'
|
230
230
|
self.define( 'TIFF','rows-per-strip',img.rows )
|
@@ -237,6 +237,9 @@ class PDFBeads::PageDataProvider < Array
|
|
237
237
|
bgpath = "#{@basename}.bg." << bgf.downcase
|
238
238
|
|
239
239
|
if not File.exists? bgpath or force
|
240
|
+
if treshold > 1
|
241
|
+
bk = img.black_threshold(QuantumRange/255*treshold); img.destroy!; img = bk
|
242
|
+
end
|
240
243
|
op = img.opaque( 'black','white' ); img.destroy!; img = op;
|
241
244
|
if @pageargs[:force_grayscale]
|
242
245
|
img.image_type = GrayscaleType
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdfbeads
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 17
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 1.0.
|
9
|
+
- 3
|
10
|
+
version: 1.0.3
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Alexey Kryukov
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-11-
|
18
|
+
date: 2010-11-16 00:00:00 +03:00
|
19
19
|
default_executable: pdfbeads
|
20
20
|
dependencies: []
|
21
21
|
|