pdfbeads 1.0.1 → 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/pdfbeads +3 -3
- data/lib/imageinspector.rb +25 -4
- data/lib/pdfbeads.rb +2 -2
- data/lib/pdfbeads/pdfpage.rb +21 -14
- metadata +4 -4
data/bin/pdfbeads
CHANGED
@@ -108,13 +108,13 @@ OptionParser.new() do |opts|
|
|
108
108
|
"Generate one shared JBIG2 dictionary per NUM pages.",
|
109
109
|
"This option is only applied when JBIG2 compression",
|
110
110
|
"is used. Default value is #{pageargs[:pages_per_dict]}") do |p|
|
111
|
-
pageargs[:pages_per_dict] = p
|
111
|
+
pageargs[:pages_per_dict] = p.to_i
|
112
112
|
end
|
113
113
|
opts.on("-r", "--force-resolution DPI",
|
114
114
|
"Set resolution for foreground mask images to the",
|
115
115
|
"specified value (in pixels per inch). Note that the",
|
116
116
|
"image is not actually resampled.") do |r|
|
117
|
-
pageargs[:st_resolution] = r
|
117
|
+
pageargs[:st_resolution] = r.to_f
|
118
118
|
end
|
119
119
|
opts.on("-x", "--max-colors NUM",
|
120
120
|
"If pdfbeads finds an indexed file with NUM or",
|
@@ -123,7 +123,7 @@ OptionParser.new() do |opts|
|
|
123
123
|
"PDF page mask. Otherwise the file is treated just",
|
124
124
|
"like a normal greyscale or color image. Default",
|
125
125
|
"value is #{pageargs[:maxcolors]}") do |num|
|
126
|
-
pageargs[:maxcolors] = num
|
126
|
+
pageargs[:maxcolors] = num.to_i
|
127
127
|
end
|
128
128
|
|
129
129
|
opts.separator "\n"
|
data/lib/imageinspector.rb
CHANGED
@@ -118,6 +118,22 @@ class ImageInspector::Image
|
|
118
118
|
end
|
119
119
|
end
|
120
120
|
|
121
|
+
def nextImage()
|
122
|
+
if @format.eql? :TIFF and @next_off > 0
|
123
|
+
begin
|
124
|
+
if @input.kind_of? IO or @input.kind_of? StringIO
|
125
|
+
tiffNext( @input )
|
126
|
+
else
|
127
|
+
File.open( @input, 'rb' ) { |io| tiffNext( io ) }
|
128
|
+
end
|
129
|
+
return true
|
130
|
+
rescue Exception => e
|
131
|
+
$stderr.puts( "Could not read data from #{@fname}: " << e.message )
|
132
|
+
end
|
133
|
+
end
|
134
|
+
false
|
135
|
+
end
|
136
|
+
|
121
137
|
private
|
122
138
|
|
123
139
|
def clearData
|
@@ -221,6 +237,11 @@ class ImageInspector::Image
|
|
221
237
|
return ret
|
222
238
|
end
|
223
239
|
|
240
|
+
def tiffNext( io )
|
241
|
+
sign = io.read( 4 )
|
242
|
+
tiffExamine( io,sign,@next_off )
|
243
|
+
end
|
244
|
+
|
224
245
|
def tiffParseIFD( io,offset,intgr )
|
225
246
|
packspec = [
|
226
247
|
nil, # nothing (shouldn't happen)
|
@@ -229,7 +250,7 @@ class ImageInspector::Image
|
|
229
250
|
intgr, # SHORT (16-bit unsigned integer)
|
230
251
|
intgr.upcase, # LONG (32-bit unsigned integer)
|
231
252
|
intgr.upcase * 2, # RATIONAL (numerator + denominator)
|
232
|
-
|
253
|
+
'c', # SBYTE (8-bit signed integer)
|
233
254
|
'A*', # undefined, but used for EXIF version
|
234
255
|
intgr, # SSHORT (16-bit signed integer)
|
235
256
|
intgr.upcase, # SLONG (32-bit signed integer)
|
@@ -271,7 +292,7 @@ class ImageInspector::Image
|
|
271
292
|
@tags.has_key? 0x0100 and @tags.has_key? 0x0101 and
|
272
293
|
@tags.has_key? 0x0106 and @tags.has_key? 0x0111 and @tags.has_key? 0x0117 )
|
273
294
|
|
274
|
-
unless @
|
295
|
+
unless @format.eql? :JPEG
|
275
296
|
@width = @tags[0x0100][0]; @height = @tags[0x0101][0]
|
276
297
|
|
277
298
|
@tags[0x0111].each_index do |i|
|
@@ -334,8 +355,8 @@ class ImageInspector::Image
|
|
334
355
|
buf.push( b )
|
335
356
|
tag = buf[4..7].pack('c*')
|
336
357
|
|
337
|
-
# Currently
|
338
|
-
#
|
358
|
+
# Currently no support for resolution, as I have never seen JP2 images
|
359
|
+
# with 'res '/'resc'/'resd' boxes, and not sure if they are ever used.
|
339
360
|
if ['ftyp','jp2h','ihdr','colr','res ','resc',
|
340
361
|
'resd','prfl','bpcc','pclr','cdef','jp2i'].include? tag
|
341
362
|
length = buf[0..4].pack( 'c*' ).unpack( 'N' )[0]
|
data/lib/pdfbeads.rb
CHANGED
@@ -40,8 +40,8 @@ begin
|
|
40
40
|
require 'hpricot'
|
41
41
|
$has_hpricot = true
|
42
42
|
rescue LoadError
|
43
|
-
$stderr.puts( "Warning: the hpricot extension is not available." )
|
44
|
-
$stderr.puts( "
|
43
|
+
$stderr.puts( "Warning: the hpricot extension is not available. I'll not be able" )
|
44
|
+
$stderr.puts( "\tto create hidden text layer from hOCR files." )
|
45
45
|
$has_hpricot = false
|
46
46
|
end
|
47
47
|
|
data/lib/pdfbeads/pdfpage.rb
CHANGED
@@ -70,27 +70,34 @@ class PDFBeads::PageDataProvider < Array
|
|
70
70
|
else
|
71
71
|
@x_res = @y_res = fres
|
72
72
|
end
|
73
|
+
|
73
74
|
if insp.depth == 1
|
74
75
|
@stencils << map
|
75
76
|
$stderr.puts( "Prepared data for processing #{@name}\n" )
|
76
|
-
|
77
|
-
|
77
|
+
ret = 1
|
78
|
+
|
79
|
+
else
|
80
|
+
img = ImageList.new( @name )
|
81
|
+
# ImageMagick incorrectly identifies indexed PNG images as DirectClass.
|
82
|
+
# It also assigns a strange color value to fully opaque areas. So
|
83
|
+
# we have to use an independent approach to recognize indexed images.
|
84
|
+
unless insp.palette.nil?
|
85
|
+
img.class_type = PseudoClass
|
86
|
+
ret = processIndexed( img,@pageargs[:maxcolors],force )
|
87
|
+
end
|
88
|
+
ret = processMixed( img,force,map ) if ret == 0
|
89
|
+
img.destroy!
|
78
90
|
|
79
|
-
|
80
|
-
|
81
|
-
# It also assigns a strange color value to fully opaque areas. So
|
82
|
-
# we have to use an independent approach to recognize indexed images.
|
83
|
-
unless insp.palette.nil?
|
84
|
-
img.class_type = PseudoClass
|
85
|
-
ret = processIndexed( img,@pageargs[:maxcolors],force )
|
91
|
+
# Make sure there are no more RMagick objects
|
92
|
+
GC.start
|
86
93
|
end
|
87
|
-
ret = processMixed( img,force,map ) if ret == 0
|
88
|
-
img.destroy!
|
89
94
|
|
90
|
-
# Make sure there are no more RMagick objects
|
91
|
-
GC.start
|
92
95
|
$stderr.puts( "Prepared data for processing #{@name}\n" )
|
93
|
-
|
96
|
+
if insp.nextImage
|
97
|
+
$stderr.puts( "Warning: #{@name} contains multiple images, but only the first one")
|
98
|
+
$stderr.puts( "\tis going to be used\n" )
|
99
|
+
end
|
100
|
+
ret
|
94
101
|
end
|
95
102
|
|
96
103
|
def addSupplementaryFiles()
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdfbeads
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 19
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 1.0.
|
9
|
+
- 2
|
10
|
+
version: 1.0.2
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Alexey Kryukov
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-11-
|
18
|
+
date: 2010-11-13 00:00:00 +03:00
|
19
19
|
default_executable: pdfbeads
|
20
20
|
dependencies: []
|
21
21
|
|