pdfbeads 1.0.8 → 1.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +12 -2
- data/bin/pdfbeads +6 -1
- data/lib/imageinspector.rb +2 -2
- data/lib/pdfbeads/pdfbuilder.rb +24 -2
- data/lib/pdfbeads/pdfpage.rb +18 -5
- metadata +4 -4
data/ChangeLog
CHANGED
@@ -42,6 +42,16 @@
|
|
42
42
|
|
43
43
|
2012 March 5 (Alexey Kryukov) Version 1.0.8
|
44
44
|
|
45
|
-
Palette colors in PNG images were treated as signed chars and this could cause
|
46
|
-
|
45
|
+
* Palette colors in PNG images were treated as signed chars and this could cause
|
46
|
+
indexed images to be incorrecty displayed in the resulting PDF.
|
47
47
|
|
48
|
+
2012 April 22 (Alexey Kryukov) Version 1.0.9
|
49
|
+
|
50
|
+
+ Add an option allowing to delete image files produced as an intermediate stage
|
51
|
+
during the PDF creation process.
|
52
|
+
|
53
|
+
* Processing indexed images with a small number of colors was broken.
|
54
|
+
|
55
|
+
* Don't attempt to use 'ocrx_word' elements which contain no bounding box
|
56
|
+
data (this should fix the problem with the hOCR output produced by some
|
57
|
+
tesseract versions).
|
data/bin/pdfbeads
CHANGED
@@ -42,7 +42,8 @@ pdfargs = Hash[
|
|
42
42
|
:labels => nil,
|
43
43
|
:toc => nil,
|
44
44
|
:pagelayout => 'TwoPageRight',
|
45
|
-
:meta => nil
|
45
|
+
:meta => nil,
|
46
|
+
:delfiles => false
|
46
47
|
]
|
47
48
|
pageargs = Hash[
|
48
49
|
:threshold => 1,
|
@@ -169,6 +170,10 @@ OptionParser.new() do |opts|
|
|
169
170
|
opts.separator "\n"
|
170
171
|
opts.separator "General options:\n"
|
171
172
|
|
173
|
+
opts.on("-d", "--delete",
|
174
|
+
"Delete intermediate image files used to create PDF") do |d|
|
175
|
+
pdfargs[:delfiles] = d
|
176
|
+
end
|
172
177
|
opts.on("-o", "--output FILE",
|
173
178
|
"Print output to a file instead of STDERR") do |f|
|
174
179
|
outpath = f
|
data/lib/imageinspector.rb
CHANGED
@@ -444,14 +444,14 @@ class ImageInspector::Image
|
|
444
444
|
@x_dpi = (x_dpm/100 * 2.54).round
|
445
445
|
@y_dpi = (y_dpm/100 * 2.54).round
|
446
446
|
when 'tRNS'
|
447
|
-
trans = Hash.new
|
447
|
+
trans = Hash.new()
|
448
448
|
case @cspace
|
449
449
|
when :Indexed
|
450
450
|
# Indexed colour, RGB. Each byte in this chunk is an alpha for
|
451
451
|
# the palette index in the PLTE ("palette") chunk up until the
|
452
452
|
# last non-opaque entry. Set up an array, stretching over all
|
453
453
|
# palette entries which will be 0 (opaque) or 1 (transparent).
|
454
|
-
@trans = io.read(
|
454
|
+
@trans = io.read( length ).unpack( 'C*' )
|
455
455
|
when :DeviceGray
|
456
456
|
# Greyscale. Corresponding to entries in the PLTE chunk.
|
457
457
|
# Grey is two bytes, range 0 .. (2 ^ bit-depth) - 1
|
data/lib/pdfbeads/pdfbuilder.rb
CHANGED
@@ -326,6 +326,17 @@ class PDFBeads::PDFBuilder
|
|
326
326
|
cat.addToDict('Outlines', ref(toc[0][:pdfobj].getID))
|
327
327
|
cat.addToDict('PageMode', "/UseOutlines")
|
328
328
|
end
|
329
|
+
|
330
|
+
if @pdfargs[:delfiles]
|
331
|
+
pagefiles.each do |p|
|
332
|
+
$stderr.puts( "Cleaning up temporary files for #{p.name}" )
|
333
|
+
safe_delete( p.fg_layer ) if p.fg_created
|
334
|
+
safe_delete( p.bg_layer ) if p.bg_created
|
335
|
+
p.stencils.each do |s|
|
336
|
+
safe_delete( s[:path] ) if s[:created]
|
337
|
+
end
|
338
|
+
end
|
339
|
+
end
|
329
340
|
end
|
330
341
|
|
331
342
|
# Output the created PDF file to the disk.
|
@@ -347,6 +358,15 @@ class PDFBeads::PDFBuilder
|
|
347
358
|
|
348
359
|
private
|
349
360
|
|
361
|
+
def safe_delete( path )
|
362
|
+
begin
|
363
|
+
File.delete( path )
|
364
|
+
$stderr.puts( " Deleted #{path}" )
|
365
|
+
rescue Exception => e
|
366
|
+
$stderr.puts( "Could not delete #{path}: #{e.message}" )
|
367
|
+
end
|
368
|
+
end
|
369
|
+
|
350
370
|
def parseMeta( path )
|
351
371
|
ret = Hash.new()
|
352
372
|
return ret if path.nil? or path.eql? ''
|
@@ -390,8 +410,9 @@ class PDFBeads::PDFBuilder
|
|
390
410
|
end
|
391
411
|
|
392
412
|
item_text = item[:title].to_binary
|
393
|
-
item_text.
|
394
|
-
item_text.
|
413
|
+
item_text.gsub!( /\x5C/,"\x5C\x5C" )
|
414
|
+
item_text.gsub!( /\x28/,"\x5C\x28" )
|
415
|
+
item_text.gsub!( /\x29/,"\x5C\x29" )
|
395
416
|
item[:pdfobj] = XObj.new(Hash[
|
396
417
|
'Title' => "(\xFE\xFF#{item_text.to_text})",
|
397
418
|
'Parent' => ref(item[:parent][:pdfobj].getID),
|
@@ -465,6 +486,7 @@ class PDFBeads::PDFBuilder
|
|
465
486
|
if ocr_words.length > 0
|
466
487
|
ocr_words.each do |word|
|
467
488
|
bbox = elementCoordinates( word,xscale,yscale )
|
489
|
+
next if bbox == [0,0,0,0]
|
468
490
|
txt = elementText( word,charset )
|
469
491
|
units << [txt,bbox]
|
470
492
|
end
|
data/lib/pdfbeads/pdfpage.rb
CHANGED
@@ -37,7 +37,7 @@ class PDFBeads::PageDataProvider < Array
|
|
37
37
|
# Allows to collect data needed for building an individual page
|
38
38
|
# of a PDF document and gives access to those data.
|
39
39
|
class PageData
|
40
|
-
attr_reader :name, :basename, :s_type, :stencils, :hocr_path
|
40
|
+
attr_reader :name, :basename, :s_type, :stencils, :hocr_path, :fg_created, :bg_created
|
41
41
|
attr_accessor :width, :height, :x_res, :y_res, :fg_layer, :bg_layer
|
42
42
|
|
43
43
|
def initialize( path,basename,args,exts,pref )
|
@@ -49,6 +49,7 @@ class PDFBeads::PageDataProvider < Array
|
|
49
49
|
@exts = exts
|
50
50
|
@pref = pref
|
51
51
|
@bg_layer = @fg_layer = nil
|
52
|
+
@bg_created = @fg_created = false
|
52
53
|
end
|
53
54
|
|
54
55
|
def fillStencilArray()
|
@@ -60,6 +61,7 @@ class PDFBeads::PageDataProvider < Array
|
|
60
61
|
map = Hash[
|
61
62
|
:path => @name,
|
62
63
|
:rgb => [0.0, 0.0, 0.0],
|
64
|
+
:created => false
|
63
65
|
]
|
64
66
|
|
65
67
|
insp = ImageInspector.new( @name )
|
@@ -72,7 +74,7 @@ class PDFBeads::PageDataProvider < Array
|
|
72
74
|
@x_res = @y_res = fres
|
73
75
|
end
|
74
76
|
|
75
|
-
if insp.depth == 1
|
77
|
+
if insp.depth == 1 and insp.trans.nil?
|
76
78
|
@stencils << map
|
77
79
|
ret = 1
|
78
80
|
|
@@ -190,6 +192,7 @@ class PDFBeads::PageDataProvider < Array
|
|
190
192
|
px = Pixel.from_color( color )
|
191
193
|
unless color.eql? exc
|
192
194
|
cpath = "#{@basename}.#{color}.tiff"
|
195
|
+
created = false
|
193
196
|
if not File.exists? cpath or force
|
194
197
|
bitonal = img.copy
|
195
198
|
# Caution: replacing colors in the colormap currently only works
|
@@ -208,10 +211,12 @@ class PDFBeads::PageDataProvider < Array
|
|
208
211
|
self.compression = Group4Compression
|
209
212
|
end
|
210
213
|
bitonal.destroy!
|
214
|
+
created = true
|
211
215
|
end
|
212
216
|
cmap = Hash[
|
213
217
|
:path => cpath,
|
214
|
-
:rgb => [px.red.to_f/QuantumRange, px.green.to_f/QuantumRange, px.blue.to_f/QuantumRange]
|
218
|
+
:rgb => [px.red.to_f/QuantumRange, px.green.to_f/QuantumRange, px.blue.to_f/QuantumRange],
|
219
|
+
:created => created
|
215
220
|
]
|
216
221
|
@stencils << cmap
|
217
222
|
ret += 1
|
@@ -231,6 +236,7 @@ class PDFBeads::PageDataProvider < Array
|
|
231
236
|
self.compression = Group4Compression
|
232
237
|
}
|
233
238
|
bitonal.destroy!
|
239
|
+
map[:created] = true
|
234
240
|
end
|
235
241
|
|
236
242
|
bgf = @pageargs[:bg_format]
|
@@ -263,6 +269,7 @@ class PDFBeads::PageDataProvider < Array
|
|
263
269
|
end
|
264
270
|
|
265
271
|
writeImage( img,bgpath,bgf )
|
272
|
+
@bg_created = true
|
266
273
|
end
|
267
274
|
|
268
275
|
map[:path] = binpath
|
@@ -327,7 +334,10 @@ class PDFBeads::PageDataProvider < Array
|
|
327
334
|
end
|
328
335
|
|
329
336
|
bgpath = "#{@basename}.bg." << fmt.downcase
|
330
|
-
|
337
|
+
if writeImage( bg,bgpath,fmt )
|
338
|
+
@bg_layer = bgpath
|
339
|
+
@bg_created = true
|
340
|
+
end
|
331
341
|
|
332
342
|
bg.destroy!
|
333
343
|
no_fg.destroy!
|
@@ -352,7 +362,10 @@ class PDFBeads::PageDataProvider < Array
|
|
352
362
|
fg.alpha( DeactivateAlphaChannel )
|
353
363
|
|
354
364
|
fgpath = "#{@basename}.fg." << fmt.downcase
|
355
|
-
|
365
|
+
if writeImage( fg,fgpath,fmt )
|
366
|
+
@fg_layer = fgpath
|
367
|
+
@fg_created = true
|
368
|
+
end
|
356
369
|
|
357
370
|
fg.destroy!
|
358
371
|
no_bg.destroy!
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdfbeads
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 5
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 1.0.
|
9
|
+
- 9
|
10
|
+
version: 1.0.9
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Alexey Kryukov
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-
|
18
|
+
date: 2012-04-21 00:00:00 +04:00
|
19
19
|
default_executable: pdfbeads
|
20
20
|
dependencies: []
|
21
21
|
|