pdfbeads 1.0.2 → 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +13 -0
- data/bin/pdfbeads +15 -4
- data/lib/pdfbeads/pdfbuilder.rb +4 -3
- data/lib/pdfbeads/pdfpage.rb +7 -4
- metadata +4 -4
data/ChangeLog
CHANGED
@@ -5,3 +5,16 @@
|
|
5
5
|
2010 November 12 (Alexey Kryukov) Version 1.0.1
|
6
6
|
|
7
7
|
* JBIG2 support didn't work properly under MS Windows.
|
8
|
+
|
9
|
+
2010 November 15 (Alexey Kryukov) Version 1.0.3
|
10
|
+
|
11
|
+
+ Make it possible to specify binarization threshold for mixed scans
|
12
|
+
(it is still not recommended to perform segmentation with pdfbeads,
|
13
|
+
unless the image has been previously processed with ScanTailor, so
|
14
|
+
the normal value for this parameter is 1).
|
15
|
+
|
16
|
+
* If a multipage TIFF file is passed to pdfbeads, warn user that
|
17
|
+
only the first image will be processed.
|
18
|
+
|
19
|
+
* TIFF files with an embedded EXIF block were incorrectly processed
|
20
|
+
by ImageInspector.
|
data/bin/pdfbeads
CHANGED
@@ -45,6 +45,7 @@ pdfargs = Hash[
|
|
45
45
|
:meta => nil
|
46
46
|
]
|
47
47
|
pageargs = Hash[
|
48
|
+
:threshold => 1,
|
48
49
|
:maxcolors => 4,
|
49
50
|
:st_resolution => 0,
|
50
51
|
:bg_resolution => 300,
|
@@ -108,13 +109,22 @@ OptionParser.new() do |opts|
|
|
108
109
|
"Generate one shared JBIG2 dictionary per NUM pages.",
|
109
110
|
"This option is only applied when JBIG2 compression",
|
110
111
|
"is used. Default value is #{pageargs[:pages_per_dict]}") do |p|
|
111
|
-
|
112
|
+
pval = p.to_i
|
113
|
+
pageargs[:pages_per_dict] = pval if ( pval >= 0 )
|
112
114
|
end
|
113
115
|
opts.on("-r", "--force-resolution DPI",
|
114
116
|
"Set resolution for foreground mask images to the",
|
115
117
|
"specified value (in pixels per inch). Note that the",
|
116
|
-
"image is not actually resampled.") do |
|
117
|
-
pageargs[:st_resolution] =
|
118
|
+
"image is not actually resampled.") do |dpi|
|
119
|
+
pageargs[:st_resolution] = dpi.to_f
|
120
|
+
end
|
121
|
+
opts.on("-t", "--threshold VAL",
|
122
|
+
"Set binarization threshold for mixed images. Valid",
|
123
|
+
"values are between 1 and 255. 1 is used by default,",
|
124
|
+
"as the input files are assumed to be preprocessed",
|
125
|
+
"with ScanTailor (http://scantailor.sourceforge.net)") do |t|
|
126
|
+
tval = t.to_i
|
127
|
+
pageargs[:threshold] = tval if ( (1..255).include? tval )
|
118
128
|
end
|
119
129
|
opts.on("-x", "--max-colors NUM",
|
120
130
|
"If pdfbeads finds an indexed file with NUM or",
|
@@ -123,7 +133,8 @@ OptionParser.new() do |opts|
|
|
123
133
|
"PDF page mask. Otherwise the file is treated just",
|
124
134
|
"like a normal greyscale or color image. Default",
|
125
135
|
"value is #{pageargs[:maxcolors]}") do |num|
|
126
|
-
|
136
|
+
cval = num.to_i
|
137
|
+
pageargs[:maxcolors] = cval if ( (2..255).include? cval )
|
127
138
|
end
|
128
139
|
|
129
140
|
opts.separator "\n"
|
data/lib/pdfbeads/pdfbuilder.rb
CHANGED
@@ -281,7 +281,7 @@ class PDFBeads::PDFBuilder
|
|
281
281
|
pages.addToDict( 'Count', page_objs.length )
|
282
282
|
pages.addToDict( 'Kids', '[' << page_objs.map{|x| ref(x.getID).to_s}.join(' ') << ']' )
|
283
283
|
|
284
|
-
pkey = pidx + 1
|
284
|
+
pkey = (pidx + 1).to_s
|
285
285
|
pkey = labels.getPageLabel( cur_range_id,pidx ) if labels != nil and labels.length > 0
|
286
286
|
pages_by_num[pkey] = page.getID
|
287
287
|
pidx += 1
|
@@ -547,10 +547,11 @@ class PDFBeads::PDFBuilder
|
|
547
547
|
return nil if stencil.width.nil?
|
548
548
|
|
549
549
|
width = stencil.width
|
550
|
-
height = stencil.height
|
550
|
+
height = rows_per_strip = stencil.height
|
551
551
|
xres = stencil.x_dpi
|
552
552
|
yres = stencil.y_dpi
|
553
|
-
rows_per_strip = stencil.tags[0x116][0]
|
553
|
+
rows_per_strip = stencil.tags[0x116][0] if
|
554
|
+
stencil.format.eql? :TIFF and stencil.tags.has_key? 0x116
|
554
555
|
|
555
556
|
unless stencil.compression.eql? :CCITTFaxDecode and rows_per_strip >= height
|
556
557
|
img = ImageList.new( path )
|
data/lib/pdfbeads/pdfpage.rb
CHANGED
@@ -55,6 +55,7 @@ class PDFBeads::PageDataProvider < Array
|
|
55
55
|
ret = 0
|
56
56
|
force = @pageargs[:force_update]
|
57
57
|
fres = @pageargs[:st_resolution]
|
58
|
+
treshold = @pageargs[:threshold]
|
58
59
|
|
59
60
|
map = Hash[
|
60
61
|
:path => @name,
|
@@ -73,7 +74,6 @@ class PDFBeads::PageDataProvider < Array
|
|
73
74
|
|
74
75
|
if insp.depth == 1
|
75
76
|
@stencils << map
|
76
|
-
$stderr.puts( "Prepared data for processing #{@name}\n" )
|
77
77
|
ret = 1
|
78
78
|
|
79
79
|
else
|
@@ -85,7 +85,7 @@ class PDFBeads::PageDataProvider < Array
|
|
85
85
|
img.class_type = PseudoClass
|
86
86
|
ret = processIndexed( img,@pageargs[:maxcolors],force )
|
87
87
|
end
|
88
|
-
ret = processMixed( img,force,map ) if ret == 0
|
88
|
+
ret = processMixed( img,treshold,force,map ) if ret == 0
|
89
89
|
img.destroy!
|
90
90
|
|
91
91
|
# Make sure there are no more RMagick objects
|
@@ -221,10 +221,10 @@ class PDFBeads::PageDataProvider < Array
|
|
221
221
|
return ret
|
222
222
|
end
|
223
223
|
|
224
|
-
def processMixed( img,force,map )
|
224
|
+
def processMixed( img,treshold,force,map )
|
225
225
|
binpath = "#{@basename}.black.tiff"
|
226
226
|
if not File.exists? binpath or force
|
227
|
-
im_copy = img.copy; bitonal = im_copy.threshold(
|
227
|
+
im_copy = img.copy; bitonal = im_copy.threshold(QuantumRange/255*treshold); im_copy.destroy!
|
228
228
|
bitonal.write( binpath ){
|
229
229
|
self.format = 'TIFF'
|
230
230
|
self.define( 'TIFF','rows-per-strip',img.rows )
|
@@ -237,6 +237,9 @@ class PDFBeads::PageDataProvider < Array
|
|
237
237
|
bgpath = "#{@basename}.bg." << bgf.downcase
|
238
238
|
|
239
239
|
if not File.exists? bgpath or force
|
240
|
+
if treshold > 1
|
241
|
+
bk = img.black_threshold(QuantumRange/255*treshold); img.destroy!; img = bk
|
242
|
+
end
|
240
243
|
op = img.opaque( 'black','white' ); img.destroy!; img = op;
|
241
244
|
if @pageargs[:force_grayscale]
|
242
245
|
img.image_type = GrayscaleType
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdfbeads
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 17
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 1.0.
|
9
|
+
- 3
|
10
|
+
version: 1.0.3
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Alexey Kryukov
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-11-
|
18
|
+
date: 2010-11-16 00:00:00 +03:00
|
19
19
|
default_executable: pdfbeads
|
20
20
|
dependencies: []
|
21
21
|
|