pdfbeads 1.0.8 → 1.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ChangeLog CHANGED
@@ -42,6 +42,16 @@
42
42
 
43
43
  2012 March 5 (Alexey Kryukov) Version 1.0.8
44
44
 
45
- Palette colors in PNG images were treated as signed chars and this could cause
46
- indexed images to be incorrecty displayed in the resulting PDF.
45
+ * Palette colors in PNG images were treated as signed chars and this could cause
46
+ indexed images to be incorrecty displayed in the resulting PDF.
47
47
 
48
+ 2012 April 22 (Alexey Kryukov) Version 1.0.9
49
+
50
+ + Add an option allowing to delete image files produced as an intermediate stage
51
+ during the PDF creation process.
52
+
53
+ * Processing indexed images with a small number of colors was broken.
54
+
55
+ * Don't attempt to use 'ocrx_word' elements which contain no bounding box
56
+ data (this should fix the problem with the hOCR output produced by some
57
+ tesseract versions).
@@ -42,7 +42,8 @@ pdfargs = Hash[
42
42
  :labels => nil,
43
43
  :toc => nil,
44
44
  :pagelayout => 'TwoPageRight',
45
- :meta => nil
45
+ :meta => nil,
46
+ :delfiles => false
46
47
  ]
47
48
  pageargs = Hash[
48
49
  :threshold => 1,
@@ -169,6 +170,10 @@ OptionParser.new() do |opts|
169
170
  opts.separator "\n"
170
171
  opts.separator "General options:\n"
171
172
 
173
+ opts.on("-d", "--delete",
174
+ "Delete intermediate image files used to create PDF") do |d|
175
+ pdfargs[:delfiles] = d
176
+ end
172
177
  opts.on("-o", "--output FILE",
173
178
  "Print output to a file instead of STDERR") do |f|
174
179
  outpath = f
@@ -444,14 +444,14 @@ class ImageInspector::Image
444
444
  @x_dpi = (x_dpm/100 * 2.54).round
445
445
  @y_dpi = (y_dpm/100 * 2.54).round
446
446
  when 'tRNS'
447
- trans = Hash.new[]
447
+ trans = Hash.new()
448
448
  case @cspace
449
449
  when :Indexed
450
450
  # Indexed colour, RGB. Each byte in this chunk is an alpha for
451
451
  # the palette index in the PLTE ("palette") chunk up until the
452
452
  # last non-opaque entry. Set up an array, stretching over all
453
453
  # palette entries which will be 0 (opaque) or 1 (transparent).
454
- @trans = io.read( size ).unpack( 'C*' )
454
+ @trans = io.read( length ).unpack( 'C*' )
455
455
  when :DeviceGray
456
456
  # Greyscale. Corresponding to entries in the PLTE chunk.
457
457
  # Grey is two bytes, range 0 .. (2 ^ bit-depth) - 1
@@ -326,6 +326,17 @@ class PDFBeads::PDFBuilder
326
326
  cat.addToDict('Outlines', ref(toc[0][:pdfobj].getID))
327
327
  cat.addToDict('PageMode', "/UseOutlines")
328
328
  end
329
+
330
+ if @pdfargs[:delfiles]
331
+ pagefiles.each do |p|
332
+ $stderr.puts( "Cleaning up temporary files for #{p.name}" )
333
+ safe_delete( p.fg_layer ) if p.fg_created
334
+ safe_delete( p.bg_layer ) if p.bg_created
335
+ p.stencils.each do |s|
336
+ safe_delete( s[:path] ) if s[:created]
337
+ end
338
+ end
339
+ end
329
340
  end
330
341
 
331
342
  # Output the created PDF file to the disk.
@@ -347,6 +358,15 @@ class PDFBeads::PDFBuilder
347
358
 
348
359
  private
349
360
 
361
+ def safe_delete( path )
362
+ begin
363
+ File.delete( path )
364
+ $stderr.puts( " Deleted #{path}" )
365
+ rescue Exception => e
366
+ $stderr.puts( "Could not delete #{path}: #{e.message}" )
367
+ end
368
+ end
369
+
350
370
  def parseMeta( path )
351
371
  ret = Hash.new()
352
372
  return ret if path.nil? or path.eql? ''
@@ -390,8 +410,9 @@ class PDFBeads::PDFBuilder
390
410
  end
391
411
 
392
412
  item_text = item[:title].to_binary
393
- item_text.sub!( /\x28/,"\x5C\x28" )
394
- item_text.sub!( /\x29/,"\x5C\x29" )
413
+ item_text.gsub!( /\x5C/,"\x5C\x5C" )
414
+ item_text.gsub!( /\x28/,"\x5C\x28" )
415
+ item_text.gsub!( /\x29/,"\x5C\x29" )
395
416
  item[:pdfobj] = XObj.new(Hash[
396
417
  'Title' => "(\xFE\xFF#{item_text.to_text})",
397
418
  'Parent' => ref(item[:parent][:pdfobj].getID),
@@ -465,6 +486,7 @@ class PDFBeads::PDFBuilder
465
486
  if ocr_words.length > 0
466
487
  ocr_words.each do |word|
467
488
  bbox = elementCoordinates( word,xscale,yscale )
489
+ next if bbox == [0,0,0,0]
468
490
  txt = elementText( word,charset )
469
491
  units << [txt,bbox]
470
492
  end
@@ -37,7 +37,7 @@ class PDFBeads::PageDataProvider < Array
37
37
  # Allows to collect data needed for building an individual page
38
38
  # of a PDF document and gives access to those data.
39
39
  class PageData
40
- attr_reader :name, :basename, :s_type, :stencils, :hocr_path
40
+ attr_reader :name, :basename, :s_type, :stencils, :hocr_path, :fg_created, :bg_created
41
41
  attr_accessor :width, :height, :x_res, :y_res, :fg_layer, :bg_layer
42
42
 
43
43
  def initialize( path,basename,args,exts,pref )
@@ -49,6 +49,7 @@ class PDFBeads::PageDataProvider < Array
49
49
  @exts = exts
50
50
  @pref = pref
51
51
  @bg_layer = @fg_layer = nil
52
+ @bg_created = @fg_created = false
52
53
  end
53
54
 
54
55
  def fillStencilArray()
@@ -60,6 +61,7 @@ class PDFBeads::PageDataProvider < Array
60
61
  map = Hash[
61
62
  :path => @name,
62
63
  :rgb => [0.0, 0.0, 0.0],
64
+ :created => false
63
65
  ]
64
66
 
65
67
  insp = ImageInspector.new( @name )
@@ -72,7 +74,7 @@ class PDFBeads::PageDataProvider < Array
72
74
  @x_res = @y_res = fres
73
75
  end
74
76
 
75
- if insp.depth == 1
77
+ if insp.depth == 1 and insp.trans.nil?
76
78
  @stencils << map
77
79
  ret = 1
78
80
 
@@ -190,6 +192,7 @@ class PDFBeads::PageDataProvider < Array
190
192
  px = Pixel.from_color( color )
191
193
  unless color.eql? exc
192
194
  cpath = "#{@basename}.#{color}.tiff"
195
+ created = false
193
196
  if not File.exists? cpath or force
194
197
  bitonal = img.copy
195
198
  # Caution: replacing colors in the colormap currently only works
@@ -208,10 +211,12 @@ class PDFBeads::PageDataProvider < Array
208
211
  self.compression = Group4Compression
209
212
  end
210
213
  bitonal.destroy!
214
+ created = true
211
215
  end
212
216
  cmap = Hash[
213
217
  :path => cpath,
214
- :rgb => [px.red.to_f/QuantumRange, px.green.to_f/QuantumRange, px.blue.to_f/QuantumRange]
218
+ :rgb => [px.red.to_f/QuantumRange, px.green.to_f/QuantumRange, px.blue.to_f/QuantumRange],
219
+ :created => created
215
220
  ]
216
221
  @stencils << cmap
217
222
  ret += 1
@@ -231,6 +236,7 @@ class PDFBeads::PageDataProvider < Array
231
236
  self.compression = Group4Compression
232
237
  }
233
238
  bitonal.destroy!
239
+ map[:created] = true
234
240
  end
235
241
 
236
242
  bgf = @pageargs[:bg_format]
@@ -263,6 +269,7 @@ class PDFBeads::PageDataProvider < Array
263
269
  end
264
270
 
265
271
  writeImage( img,bgpath,bgf )
272
+ @bg_created = true
266
273
  end
267
274
 
268
275
  map[:path] = binpath
@@ -327,7 +334,10 @@ class PDFBeads::PageDataProvider < Array
327
334
  end
328
335
 
329
336
  bgpath = "#{@basename}.bg." << fmt.downcase
330
- @bg_layer = bgpath if writeImage( bg,bgpath,fmt )
337
+ if writeImage( bg,bgpath,fmt )
338
+ @bg_layer = bgpath
339
+ @bg_created = true
340
+ end
331
341
 
332
342
  bg.destroy!
333
343
  no_fg.destroy!
@@ -352,7 +362,10 @@ class PDFBeads::PageDataProvider < Array
352
362
  fg.alpha( DeactivateAlphaChannel )
353
363
 
354
364
  fgpath = "#{@basename}.fg." << fmt.downcase
355
- @fg_layer = fgpath if writeImage( fg,fgpath,fmt )
365
+ if writeImage( fg,fgpath,fmt )
366
+ @fg_layer = fgpath
367
+ @fg_created = true
368
+ end
356
369
 
357
370
  fg.destroy!
358
371
  no_bg.destroy!
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdfbeads
3
3
  version: !ruby/object:Gem::Version
4
- hash: 7
4
+ hash: 5
5
5
  prerelease:
6
6
  segments:
7
7
  - 1
8
8
  - 0
9
- - 8
10
- version: 1.0.8
9
+ - 9
10
+ version: 1.0.9
11
11
  platform: ruby
12
12
  authors:
13
13
  - Alexey Kryukov
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-03-05 00:00:00 +04:00
18
+ date: 2012-04-21 00:00:00 +04:00
19
19
  default_executable: pdfbeads
20
20
  dependencies: []
21
21