pdfbeads 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/pdfbeads +3 -3
- data/lib/imageinspector.rb +25 -4
- data/lib/pdfbeads.rb +2 -2
- data/lib/pdfbeads/pdfpage.rb +21 -14
- metadata +4 -4
data/bin/pdfbeads
CHANGED
@@ -108,13 +108,13 @@ OptionParser.new() do |opts|
|
|
108
108
|
"Generate one shared JBIG2 dictionary per NUM pages.",
|
109
109
|
"This option is only applied when JBIG2 compression",
|
110
110
|
"is used. Default value is #{pageargs[:pages_per_dict]}") do |p|
|
111
|
-
pageargs[:pages_per_dict] = p
|
111
|
+
pageargs[:pages_per_dict] = p.to_i
|
112
112
|
end
|
113
113
|
opts.on("-r", "--force-resolution DPI",
|
114
114
|
"Set resolution for foreground mask images to the",
|
115
115
|
"specified value (in pixels per inch). Note that the",
|
116
116
|
"image is not actually resampled.") do |r|
|
117
|
-
pageargs[:st_resolution] = r
|
117
|
+
pageargs[:st_resolution] = r.to_f
|
118
118
|
end
|
119
119
|
opts.on("-x", "--max-colors NUM",
|
120
120
|
"If pdfbeads finds an indexed file with NUM or",
|
@@ -123,7 +123,7 @@ OptionParser.new() do |opts|
|
|
123
123
|
"PDF page mask. Otherwise the file is treated just",
|
124
124
|
"like a normal greyscale or color image. Default",
|
125
125
|
"value is #{pageargs[:maxcolors]}") do |num|
|
126
|
-
pageargs[:maxcolors] = num
|
126
|
+
pageargs[:maxcolors] = num.to_i
|
127
127
|
end
|
128
128
|
|
129
129
|
opts.separator "\n"
|
data/lib/imageinspector.rb
CHANGED
@@ -118,6 +118,22 @@ class ImageInspector::Image
|
|
118
118
|
end
|
119
119
|
end
|
120
120
|
|
121
|
+
def nextImage()
|
122
|
+
if @format.eql? :TIFF and @next_off > 0
|
123
|
+
begin
|
124
|
+
if @input.kind_of? IO or @input.kind_of? StringIO
|
125
|
+
tiffNext( @input )
|
126
|
+
else
|
127
|
+
File.open( @input, 'rb' ) { |io| tiffNext( io ) }
|
128
|
+
end
|
129
|
+
return true
|
130
|
+
rescue Exception => e
|
131
|
+
$stderr.puts( "Could not read data from #{@fname}: " << e.message )
|
132
|
+
end
|
133
|
+
end
|
134
|
+
false
|
135
|
+
end
|
136
|
+
|
121
137
|
private
|
122
138
|
|
123
139
|
def clearData
|
@@ -221,6 +237,11 @@ class ImageInspector::Image
|
|
221
237
|
return ret
|
222
238
|
end
|
223
239
|
|
240
|
+
def tiffNext( io )
|
241
|
+
sign = io.read( 4 )
|
242
|
+
tiffExamine( io,sign,@next_off )
|
243
|
+
end
|
244
|
+
|
224
245
|
def tiffParseIFD( io,offset,intgr )
|
225
246
|
packspec = [
|
226
247
|
nil, # nothing (shouldn't happen)
|
@@ -229,7 +250,7 @@ class ImageInspector::Image
|
|
229
250
|
intgr, # SHORT (16-bit unsigned integer)
|
230
251
|
intgr.upcase, # LONG (32-bit unsigned integer)
|
231
252
|
intgr.upcase * 2, # RATIONAL (numerator + denominator)
|
232
|
-
|
253
|
+
'c', # SBYTE (8-bit signed integer)
|
233
254
|
'A*', # undefined, but used for EXIF version
|
234
255
|
intgr, # SSHORT (16-bit signed integer)
|
235
256
|
intgr.upcase, # SLONG (32-bit signed integer)
|
@@ -271,7 +292,7 @@ class ImageInspector::Image
|
|
271
292
|
@tags.has_key? 0x0100 and @tags.has_key? 0x0101 and
|
272
293
|
@tags.has_key? 0x0106 and @tags.has_key? 0x0111 and @tags.has_key? 0x0117 )
|
273
294
|
|
274
|
-
unless @
|
295
|
+
unless @format.eql? :JPEG
|
275
296
|
@width = @tags[0x0100][0]; @height = @tags[0x0101][0]
|
276
297
|
|
277
298
|
@tags[0x0111].each_index do |i|
|
@@ -334,8 +355,8 @@ class ImageInspector::Image
|
|
334
355
|
buf.push( b )
|
335
356
|
tag = buf[4..7].pack('c*')
|
336
357
|
|
337
|
-
# Currently
|
338
|
-
#
|
358
|
+
# Currently no support for resolution, as I have never seen JP2 images
|
359
|
+
# with 'res '/'resc'/'resd' boxes, and not sure if they are ever used.
|
339
360
|
if ['ftyp','jp2h','ihdr','colr','res ','resc',
|
340
361
|
'resd','prfl','bpcc','pclr','cdef','jp2i'].include? tag
|
341
362
|
length = buf[0..4].pack( 'c*' ).unpack( 'N' )[0]
|
data/lib/pdfbeads.rb
CHANGED
@@ -40,8 +40,8 @@ begin
|
|
40
40
|
require 'hpricot'
|
41
41
|
$has_hpricot = true
|
42
42
|
rescue LoadError
|
43
|
-
$stderr.puts( "Warning: the hpricot extension is not available." )
|
44
|
-
$stderr.puts( "
|
43
|
+
$stderr.puts( "Warning: the hpricot extension is not available. I'll not be able" )
|
44
|
+
$stderr.puts( "\tto create hidden text layer from hOCR files." )
|
45
45
|
$has_hpricot = false
|
46
46
|
end
|
47
47
|
|
data/lib/pdfbeads/pdfpage.rb
CHANGED
@@ -70,27 +70,34 @@ class PDFBeads::PageDataProvider < Array
|
|
70
70
|
else
|
71
71
|
@x_res = @y_res = fres
|
72
72
|
end
|
73
|
+
|
73
74
|
if insp.depth == 1
|
74
75
|
@stencils << map
|
75
76
|
$stderr.puts( "Prepared data for processing #{@name}\n" )
|
76
|
-
|
77
|
-
|
77
|
+
ret = 1
|
78
|
+
|
79
|
+
else
|
80
|
+
img = ImageList.new( @name )
|
81
|
+
# ImageMagick incorrectly identifies indexed PNG images as DirectClass.
|
82
|
+
# It also assigns a strange color value to fully opaque areas. So
|
83
|
+
# we have to use an independent approach to recognize indexed images.
|
84
|
+
unless insp.palette.nil?
|
85
|
+
img.class_type = PseudoClass
|
86
|
+
ret = processIndexed( img,@pageargs[:maxcolors],force )
|
87
|
+
end
|
88
|
+
ret = processMixed( img,force,map ) if ret == 0
|
89
|
+
img.destroy!
|
78
90
|
|
79
|
-
|
80
|
-
|
81
|
-
# It also assigns a strange color value to fully opaque areas. So
|
82
|
-
# we have to use an independent approach to recognize indexed images.
|
83
|
-
unless insp.palette.nil?
|
84
|
-
img.class_type = PseudoClass
|
85
|
-
ret = processIndexed( img,@pageargs[:maxcolors],force )
|
91
|
+
# Make sure there are no more RMagick objects
|
92
|
+
GC.start
|
86
93
|
end
|
87
|
-
ret = processMixed( img,force,map ) if ret == 0
|
88
|
-
img.destroy!
|
89
94
|
|
90
|
-
# Make sure there are no more RMagick objects
|
91
|
-
GC.start
|
92
95
|
$stderr.puts( "Prepared data for processing #{@name}\n" )
|
93
|
-
|
96
|
+
if insp.nextImage
|
97
|
+
$stderr.puts( "Warning: #{@name} contains multiple images, but only the first one")
|
98
|
+
$stderr.puts( "\tis going to be used\n" )
|
99
|
+
end
|
100
|
+
ret
|
94
101
|
end
|
95
102
|
|
96
103
|
def addSupplementaryFiles()
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdfbeads
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 19
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 1.0.
|
9
|
+
- 2
|
10
|
+
version: 1.0.2
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Alexey Kryukov
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-11-
|
18
|
+
date: 2010-11-13 00:00:00 +03:00
|
19
19
|
default_executable: pdfbeads
|
20
20
|
dependencies: []
|
21
21
|
|