pdfbeads 1.0.8 → 1.0.9

Sign up to get free protection for your applications and to get access to all the features.
data/ChangeLog CHANGED
@@ -42,6 +42,16 @@
42
42
 
43
43
  2012 March 5 (Alexey Kryukov) Version 1.0.8
44
44
 
45
- Palette colors in PNG images were treated as signed chars and this could cause
46
- indexed images to be incorrecty displayed in the resulting PDF.
45
+ * Palette colors in PNG images were treated as signed chars and this could cause
46
+ indexed images to be incorrecty displayed in the resulting PDF.
47
47
 
48
+ 2012 April 22 (Alexey Kryukov) Version 1.0.9
49
+
50
+ + Add an option allowing to delete image files produced as an intermediate stage
51
+ during the PDF creation process.
52
+
53
+ * Processing indexed images with a small number of colors was broken.
54
+
55
+ * Don't attempt to use 'ocrx_word' elements which contain no bounding box
56
+ data (this should fix the problem with the hOCR output produced by some
57
+ tesseract versions).
@@ -42,7 +42,8 @@ pdfargs = Hash[
42
42
  :labels => nil,
43
43
  :toc => nil,
44
44
  :pagelayout => 'TwoPageRight',
45
- :meta => nil
45
+ :meta => nil,
46
+ :delfiles => false
46
47
  ]
47
48
  pageargs = Hash[
48
49
  :threshold => 1,
@@ -169,6 +170,10 @@ OptionParser.new() do |opts|
169
170
  opts.separator "\n"
170
171
  opts.separator "General options:\n"
171
172
 
173
+ opts.on("-d", "--delete",
174
+ "Delete intermediate image files used to create PDF") do |d|
175
+ pdfargs[:delfiles] = d
176
+ end
172
177
  opts.on("-o", "--output FILE",
173
178
  "Print output to a file instead of STDERR") do |f|
174
179
  outpath = f
@@ -444,14 +444,14 @@ class ImageInspector::Image
444
444
  @x_dpi = (x_dpm/100 * 2.54).round
445
445
  @y_dpi = (y_dpm/100 * 2.54).round
446
446
  when 'tRNS'
447
- trans = Hash.new[]
447
+ trans = Hash.new()
448
448
  case @cspace
449
449
  when :Indexed
450
450
  # Indexed colour, RGB. Each byte in this chunk is an alpha for
451
451
  # the palette index in the PLTE ("palette") chunk up until the
452
452
  # last non-opaque entry. Set up an array, stretching over all
453
453
  # palette entries which will be 0 (opaque) or 1 (transparent).
454
- @trans = io.read( size ).unpack( 'C*' )
454
+ @trans = io.read( length ).unpack( 'C*' )
455
455
  when :DeviceGray
456
456
  # Greyscale. Corresponding to entries in the PLTE chunk.
457
457
  # Grey is two bytes, range 0 .. (2 ^ bit-depth) - 1
@@ -326,6 +326,17 @@ class PDFBeads::PDFBuilder
326
326
  cat.addToDict('Outlines', ref(toc[0][:pdfobj].getID))
327
327
  cat.addToDict('PageMode', "/UseOutlines")
328
328
  end
329
+
330
+ if @pdfargs[:delfiles]
331
+ pagefiles.each do |p|
332
+ $stderr.puts( "Cleaning up temporary files for #{p.name}" )
333
+ safe_delete( p.fg_layer ) if p.fg_created
334
+ safe_delete( p.bg_layer ) if p.bg_created
335
+ p.stencils.each do |s|
336
+ safe_delete( s[:path] ) if s[:created]
337
+ end
338
+ end
339
+ end
329
340
  end
330
341
 
331
342
  # Output the created PDF file to the disk.
@@ -347,6 +358,15 @@ class PDFBeads::PDFBuilder
347
358
 
348
359
  private
349
360
 
361
+ def safe_delete( path )
362
+ begin
363
+ File.delete( path )
364
+ $stderr.puts( " Deleted #{path}" )
365
+ rescue Exception => e
366
+ $stderr.puts( "Could not delete #{path}: #{e.message}" )
367
+ end
368
+ end
369
+
350
370
  def parseMeta( path )
351
371
  ret = Hash.new()
352
372
  return ret if path.nil? or path.eql? ''
@@ -390,8 +410,9 @@ class PDFBeads::PDFBuilder
390
410
  end
391
411
 
392
412
  item_text = item[:title].to_binary
393
- item_text.sub!( /\x28/,"\x5C\x28" )
394
- item_text.sub!( /\x29/,"\x5C\x29" )
413
+ item_text.gsub!( /\x5C/,"\x5C\x5C" )
414
+ item_text.gsub!( /\x28/,"\x5C\x28" )
415
+ item_text.gsub!( /\x29/,"\x5C\x29" )
395
416
  item[:pdfobj] = XObj.new(Hash[
396
417
  'Title' => "(\xFE\xFF#{item_text.to_text})",
397
418
  'Parent' => ref(item[:parent][:pdfobj].getID),
@@ -465,6 +486,7 @@ class PDFBeads::PDFBuilder
465
486
  if ocr_words.length > 0
466
487
  ocr_words.each do |word|
467
488
  bbox = elementCoordinates( word,xscale,yscale )
489
+ next if bbox == [0,0,0,0]
468
490
  txt = elementText( word,charset )
469
491
  units << [txt,bbox]
470
492
  end
@@ -37,7 +37,7 @@ class PDFBeads::PageDataProvider < Array
37
37
  # Allows to collect data needed for building an individual page
38
38
  # of a PDF document and gives access to those data.
39
39
  class PageData
40
- attr_reader :name, :basename, :s_type, :stencils, :hocr_path
40
+ attr_reader :name, :basename, :s_type, :stencils, :hocr_path, :fg_created, :bg_created
41
41
  attr_accessor :width, :height, :x_res, :y_res, :fg_layer, :bg_layer
42
42
 
43
43
  def initialize( path,basename,args,exts,pref )
@@ -49,6 +49,7 @@ class PDFBeads::PageDataProvider < Array
49
49
  @exts = exts
50
50
  @pref = pref
51
51
  @bg_layer = @fg_layer = nil
52
+ @bg_created = @fg_created = false
52
53
  end
53
54
 
54
55
  def fillStencilArray()
@@ -60,6 +61,7 @@ class PDFBeads::PageDataProvider < Array
60
61
  map = Hash[
61
62
  :path => @name,
62
63
  :rgb => [0.0, 0.0, 0.0],
64
+ :created => false
63
65
  ]
64
66
 
65
67
  insp = ImageInspector.new( @name )
@@ -72,7 +74,7 @@ class PDFBeads::PageDataProvider < Array
72
74
  @x_res = @y_res = fres
73
75
  end
74
76
 
75
- if insp.depth == 1
77
+ if insp.depth == 1 and insp.trans.nil?
76
78
  @stencils << map
77
79
  ret = 1
78
80
 
@@ -190,6 +192,7 @@ class PDFBeads::PageDataProvider < Array
190
192
  px = Pixel.from_color( color )
191
193
  unless color.eql? exc
192
194
  cpath = "#{@basename}.#{color}.tiff"
195
+ created = false
193
196
  if not File.exists? cpath or force
194
197
  bitonal = img.copy
195
198
  # Caution: replacing colors in the colormap currently only works
@@ -208,10 +211,12 @@ class PDFBeads::PageDataProvider < Array
208
211
  self.compression = Group4Compression
209
212
  end
210
213
  bitonal.destroy!
214
+ created = true
211
215
  end
212
216
  cmap = Hash[
213
217
  :path => cpath,
214
- :rgb => [px.red.to_f/QuantumRange, px.green.to_f/QuantumRange, px.blue.to_f/QuantumRange]
218
+ :rgb => [px.red.to_f/QuantumRange, px.green.to_f/QuantumRange, px.blue.to_f/QuantumRange],
219
+ :created => created
215
220
  ]
216
221
  @stencils << cmap
217
222
  ret += 1
@@ -231,6 +236,7 @@ class PDFBeads::PageDataProvider < Array
231
236
  self.compression = Group4Compression
232
237
  }
233
238
  bitonal.destroy!
239
+ map[:created] = true
234
240
  end
235
241
 
236
242
  bgf = @pageargs[:bg_format]
@@ -263,6 +269,7 @@ class PDFBeads::PageDataProvider < Array
263
269
  end
264
270
 
265
271
  writeImage( img,bgpath,bgf )
272
+ @bg_created = true
266
273
  end
267
274
 
268
275
  map[:path] = binpath
@@ -327,7 +334,10 @@ class PDFBeads::PageDataProvider < Array
327
334
  end
328
335
 
329
336
  bgpath = "#{@basename}.bg." << fmt.downcase
330
- @bg_layer = bgpath if writeImage( bg,bgpath,fmt )
337
+ if writeImage( bg,bgpath,fmt )
338
+ @bg_layer = bgpath
339
+ @bg_created = true
340
+ end
331
341
 
332
342
  bg.destroy!
333
343
  no_fg.destroy!
@@ -352,7 +362,10 @@ class PDFBeads::PageDataProvider < Array
352
362
  fg.alpha( DeactivateAlphaChannel )
353
363
 
354
364
  fgpath = "#{@basename}.fg." << fmt.downcase
355
- @fg_layer = fgpath if writeImage( fg,fgpath,fmt )
365
+ if writeImage( fg,fgpath,fmt )
366
+ @fg_layer = fgpath
367
+ @fg_created = true
368
+ end
356
369
 
357
370
  fg.destroy!
358
371
  no_bg.destroy!
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdfbeads
3
3
  version: !ruby/object:Gem::Version
4
- hash: 7
4
+ hash: 5
5
5
  prerelease:
6
6
  segments:
7
7
  - 1
8
8
  - 0
9
- - 8
10
- version: 1.0.8
9
+ - 9
10
+ version: 1.0.9
11
11
  platform: ruby
12
12
  authors:
13
13
  - Alexey Kryukov
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-03-05 00:00:00 +04:00
18
+ date: 2012-04-21 00:00:00 +04:00
19
19
  default_executable: pdfbeads
20
20
  dependencies: []
21
21