pdfbeads 1.0.8 → 1.0.9
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +12 -2
- data/bin/pdfbeads +6 -1
- data/lib/imageinspector.rb +2 -2
- data/lib/pdfbeads/pdfbuilder.rb +24 -2
- data/lib/pdfbeads/pdfpage.rb +18 -5
- metadata +4 -4
data/ChangeLog
CHANGED
@@ -42,6 +42,16 @@
|
|
42
42
|
|
43
43
|
2012 March 5 (Alexey Kryukov) Version 1.0.8
|
44
44
|
|
45
|
-
Palette colors in PNG images were treated as signed chars and this could cause
|
46
|
-
|
45
|
+
* Palette colors in PNG images were treated as signed chars and this could cause
|
46
|
+
indexed images to be incorrecty displayed in the resulting PDF.
|
47
47
|
|
48
|
+
2012 April 22 (Alexey Kryukov) Version 1.0.9
|
49
|
+
|
50
|
+
+ Add an option allowing to delete image files produced as an intermediate stage
|
51
|
+
during the PDF creation process.
|
52
|
+
|
53
|
+
* Processing indexed images with a small number of colors was broken.
|
54
|
+
|
55
|
+
* Don't attempt to use 'ocrx_word' elements which contain no bounding box
|
56
|
+
data (this should fix the problem with the hOCR output produced by some
|
57
|
+
tesseract versions).
|
data/bin/pdfbeads
CHANGED
@@ -42,7 +42,8 @@ pdfargs = Hash[
|
|
42
42
|
:labels => nil,
|
43
43
|
:toc => nil,
|
44
44
|
:pagelayout => 'TwoPageRight',
|
45
|
-
:meta => nil
|
45
|
+
:meta => nil,
|
46
|
+
:delfiles => false
|
46
47
|
]
|
47
48
|
pageargs = Hash[
|
48
49
|
:threshold => 1,
|
@@ -169,6 +170,10 @@ OptionParser.new() do |opts|
|
|
169
170
|
opts.separator "\n"
|
170
171
|
opts.separator "General options:\n"
|
171
172
|
|
173
|
+
opts.on("-d", "--delete",
|
174
|
+
"Delete intermediate image files used to create PDF") do |d|
|
175
|
+
pdfargs[:delfiles] = d
|
176
|
+
end
|
172
177
|
opts.on("-o", "--output FILE",
|
173
178
|
"Print output to a file instead of STDERR") do |f|
|
174
179
|
outpath = f
|
data/lib/imageinspector.rb
CHANGED
@@ -444,14 +444,14 @@ class ImageInspector::Image
|
|
444
444
|
@x_dpi = (x_dpm/100 * 2.54).round
|
445
445
|
@y_dpi = (y_dpm/100 * 2.54).round
|
446
446
|
when 'tRNS'
|
447
|
-
trans = Hash.new
|
447
|
+
trans = Hash.new()
|
448
448
|
case @cspace
|
449
449
|
when :Indexed
|
450
450
|
# Indexed colour, RGB. Each byte in this chunk is an alpha for
|
451
451
|
# the palette index in the PLTE ("palette") chunk up until the
|
452
452
|
# last non-opaque entry. Set up an array, stretching over all
|
453
453
|
# palette entries which will be 0 (opaque) or 1 (transparent).
|
454
|
-
@trans = io.read(
|
454
|
+
@trans = io.read( length ).unpack( 'C*' )
|
455
455
|
when :DeviceGray
|
456
456
|
# Greyscale. Corresponding to entries in the PLTE chunk.
|
457
457
|
# Grey is two bytes, range 0 .. (2 ^ bit-depth) - 1
|
data/lib/pdfbeads/pdfbuilder.rb
CHANGED
@@ -326,6 +326,17 @@ class PDFBeads::PDFBuilder
|
|
326
326
|
cat.addToDict('Outlines', ref(toc[0][:pdfobj].getID))
|
327
327
|
cat.addToDict('PageMode', "/UseOutlines")
|
328
328
|
end
|
329
|
+
|
330
|
+
if @pdfargs[:delfiles]
|
331
|
+
pagefiles.each do |p|
|
332
|
+
$stderr.puts( "Cleaning up temporary files for #{p.name}" )
|
333
|
+
safe_delete( p.fg_layer ) if p.fg_created
|
334
|
+
safe_delete( p.bg_layer ) if p.bg_created
|
335
|
+
p.stencils.each do |s|
|
336
|
+
safe_delete( s[:path] ) if s[:created]
|
337
|
+
end
|
338
|
+
end
|
339
|
+
end
|
329
340
|
end
|
330
341
|
|
331
342
|
# Output the created PDF file to the disk.
|
@@ -347,6 +358,15 @@ class PDFBeads::PDFBuilder
|
|
347
358
|
|
348
359
|
private
|
349
360
|
|
361
|
+
def safe_delete( path )
|
362
|
+
begin
|
363
|
+
File.delete( path )
|
364
|
+
$stderr.puts( " Deleted #{path}" )
|
365
|
+
rescue Exception => e
|
366
|
+
$stderr.puts( "Could not delete #{path}: #{e.message}" )
|
367
|
+
end
|
368
|
+
end
|
369
|
+
|
350
370
|
def parseMeta( path )
|
351
371
|
ret = Hash.new()
|
352
372
|
return ret if path.nil? or path.eql? ''
|
@@ -390,8 +410,9 @@ class PDFBeads::PDFBuilder
|
|
390
410
|
end
|
391
411
|
|
392
412
|
item_text = item[:title].to_binary
|
393
|
-
item_text.
|
394
|
-
item_text.
|
413
|
+
item_text.gsub!( /\x5C/,"\x5C\x5C" )
|
414
|
+
item_text.gsub!( /\x28/,"\x5C\x28" )
|
415
|
+
item_text.gsub!( /\x29/,"\x5C\x29" )
|
395
416
|
item[:pdfobj] = XObj.new(Hash[
|
396
417
|
'Title' => "(\xFE\xFF#{item_text.to_text})",
|
397
418
|
'Parent' => ref(item[:parent][:pdfobj].getID),
|
@@ -465,6 +486,7 @@ class PDFBeads::PDFBuilder
|
|
465
486
|
if ocr_words.length > 0
|
466
487
|
ocr_words.each do |word|
|
467
488
|
bbox = elementCoordinates( word,xscale,yscale )
|
489
|
+
next if bbox == [0,0,0,0]
|
468
490
|
txt = elementText( word,charset )
|
469
491
|
units << [txt,bbox]
|
470
492
|
end
|
data/lib/pdfbeads/pdfpage.rb
CHANGED
@@ -37,7 +37,7 @@ class PDFBeads::PageDataProvider < Array
|
|
37
37
|
# Allows to collect data needed for building an individual page
|
38
38
|
# of a PDF document and gives access to those data.
|
39
39
|
class PageData
|
40
|
-
attr_reader :name, :basename, :s_type, :stencils, :hocr_path
|
40
|
+
attr_reader :name, :basename, :s_type, :stencils, :hocr_path, :fg_created, :bg_created
|
41
41
|
attr_accessor :width, :height, :x_res, :y_res, :fg_layer, :bg_layer
|
42
42
|
|
43
43
|
def initialize( path,basename,args,exts,pref )
|
@@ -49,6 +49,7 @@ class PDFBeads::PageDataProvider < Array
|
|
49
49
|
@exts = exts
|
50
50
|
@pref = pref
|
51
51
|
@bg_layer = @fg_layer = nil
|
52
|
+
@bg_created = @fg_created = false
|
52
53
|
end
|
53
54
|
|
54
55
|
def fillStencilArray()
|
@@ -60,6 +61,7 @@ class PDFBeads::PageDataProvider < Array
|
|
60
61
|
map = Hash[
|
61
62
|
:path => @name,
|
62
63
|
:rgb => [0.0, 0.0, 0.0],
|
64
|
+
:created => false
|
63
65
|
]
|
64
66
|
|
65
67
|
insp = ImageInspector.new( @name )
|
@@ -72,7 +74,7 @@ class PDFBeads::PageDataProvider < Array
|
|
72
74
|
@x_res = @y_res = fres
|
73
75
|
end
|
74
76
|
|
75
|
-
if insp.depth == 1
|
77
|
+
if insp.depth == 1 and insp.trans.nil?
|
76
78
|
@stencils << map
|
77
79
|
ret = 1
|
78
80
|
|
@@ -190,6 +192,7 @@ class PDFBeads::PageDataProvider < Array
|
|
190
192
|
px = Pixel.from_color( color )
|
191
193
|
unless color.eql? exc
|
192
194
|
cpath = "#{@basename}.#{color}.tiff"
|
195
|
+
created = false
|
193
196
|
if not File.exists? cpath or force
|
194
197
|
bitonal = img.copy
|
195
198
|
# Caution: replacing colors in the colormap currently only works
|
@@ -208,10 +211,12 @@ class PDFBeads::PageDataProvider < Array
|
|
208
211
|
self.compression = Group4Compression
|
209
212
|
end
|
210
213
|
bitonal.destroy!
|
214
|
+
created = true
|
211
215
|
end
|
212
216
|
cmap = Hash[
|
213
217
|
:path => cpath,
|
214
|
-
:rgb => [px.red.to_f/QuantumRange, px.green.to_f/QuantumRange, px.blue.to_f/QuantumRange]
|
218
|
+
:rgb => [px.red.to_f/QuantumRange, px.green.to_f/QuantumRange, px.blue.to_f/QuantumRange],
|
219
|
+
:created => created
|
215
220
|
]
|
216
221
|
@stencils << cmap
|
217
222
|
ret += 1
|
@@ -231,6 +236,7 @@ class PDFBeads::PageDataProvider < Array
|
|
231
236
|
self.compression = Group4Compression
|
232
237
|
}
|
233
238
|
bitonal.destroy!
|
239
|
+
map[:created] = true
|
234
240
|
end
|
235
241
|
|
236
242
|
bgf = @pageargs[:bg_format]
|
@@ -263,6 +269,7 @@ class PDFBeads::PageDataProvider < Array
|
|
263
269
|
end
|
264
270
|
|
265
271
|
writeImage( img,bgpath,bgf )
|
272
|
+
@bg_created = true
|
266
273
|
end
|
267
274
|
|
268
275
|
map[:path] = binpath
|
@@ -327,7 +334,10 @@ class PDFBeads::PageDataProvider < Array
|
|
327
334
|
end
|
328
335
|
|
329
336
|
bgpath = "#{@basename}.bg." << fmt.downcase
|
330
|
-
|
337
|
+
if writeImage( bg,bgpath,fmt )
|
338
|
+
@bg_layer = bgpath
|
339
|
+
@bg_created = true
|
340
|
+
end
|
331
341
|
|
332
342
|
bg.destroy!
|
333
343
|
no_fg.destroy!
|
@@ -352,7 +362,10 @@ class PDFBeads::PageDataProvider < Array
|
|
352
362
|
fg.alpha( DeactivateAlphaChannel )
|
353
363
|
|
354
364
|
fgpath = "#{@basename}.fg." << fmt.downcase
|
355
|
-
|
365
|
+
if writeImage( fg,fgpath,fmt )
|
366
|
+
@fg_layer = fgpath
|
367
|
+
@fg_created = true
|
368
|
+
end
|
356
369
|
|
357
370
|
fg.destroy!
|
358
371
|
no_bg.destroy!
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdfbeads
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 5
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 1.0.
|
9
|
+
- 9
|
10
|
+
version: 1.0.9
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Alexey Kryukov
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-
|
18
|
+
date: 2012-04-21 00:00:00 +04:00
|
19
19
|
default_executable: pdfbeads
|
20
20
|
dependencies: []
|
21
21
|
|