pdfbeads 1.0.8 → 1.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +12 -2
 - data/bin/pdfbeads +6 -1
 - data/lib/imageinspector.rb +2 -2
 - data/lib/pdfbeads/pdfbuilder.rb +24 -2
 - data/lib/pdfbeads/pdfpage.rb +18 -5
 - metadata +4 -4
 
    
        data/ChangeLog
    CHANGED
    
    | 
         @@ -42,6 +42,16 @@ 
     | 
|
| 
       42 
42 
     | 
    
         | 
| 
       43 
43 
     | 
    
         
             
            2012 March 5 (Alexey Kryukov) Version 1.0.8
         
     | 
| 
       44 
44 
     | 
    
         | 
| 
       45 
     | 
    
         
            -
                Palette colors in PNG images were treated as signed chars and this could cause
         
     | 
| 
       46 
     | 
    
         
            -
             
     | 
| 
      
 45 
     | 
    
         
            +
                * Palette colors in PNG images were treated as signed chars and this could cause
         
     | 
| 
      
 46 
     | 
    
         
            +
                  indexed images to be incorrecty displayed in the resulting PDF.
         
     | 
| 
       47 
47 
     | 
    
         | 
| 
      
 48 
     | 
    
         
            +
            2012 April 22 (Alexey Kryukov) Version 1.0.9
         
     | 
| 
      
 49 
     | 
    
         
            +
             
     | 
| 
      
 50 
     | 
    
         
            +
                + Add an option allowing to delete image files produced as an intermediate stage
         
     | 
| 
      
 51 
     | 
    
         
            +
                  during the PDF creation process.
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
                * Processing indexed images with a small number of colors was broken.
         
     | 
| 
      
 54 
     | 
    
         
            +
             
     | 
| 
      
 55 
     | 
    
         
            +
                * Don't attempt to use 'ocrx_word' elements which contain no bounding box
         
     | 
| 
      
 56 
     | 
    
         
            +
                  data (this should fix the problem with the hOCR output produced by some
         
     | 
| 
      
 57 
     | 
    
         
            +
                  tesseract versions).
         
     | 
    
        data/bin/pdfbeads
    CHANGED
    
    | 
         @@ -42,7 +42,8 @@ pdfargs  = Hash[ 
     | 
|
| 
       42 
42 
     | 
    
         
             
              :labels         => nil,
         
     | 
| 
       43 
43 
     | 
    
         
             
              :toc            => nil,
         
     | 
| 
       44 
44 
     | 
    
         
             
              :pagelayout     => 'TwoPageRight',
         
     | 
| 
       45 
     | 
    
         
            -
              :meta           => nil
         
     | 
| 
      
 45 
     | 
    
         
            +
              :meta           => nil,
         
     | 
| 
      
 46 
     | 
    
         
            +
              :delfiles       => false
         
     | 
| 
       46 
47 
     | 
    
         
             
            ]
         
     | 
| 
       47 
48 
     | 
    
         
             
            pageargs = Hash[
         
     | 
| 
       48 
49 
     | 
    
         
             
              :threshold       => 1,
         
     | 
| 
         @@ -169,6 +170,10 @@ OptionParser.new() do |opts| 
     | 
|
| 
       169 
170 
     | 
    
         
             
              opts.separator "\n"
         
     | 
| 
       170 
171 
     | 
    
         
             
              opts.separator "General options:\n"
         
     | 
| 
       171 
172 
     | 
    
         | 
| 
      
 173 
     | 
    
         
            +
              opts.on("-d", "--delete",
         
     | 
| 
      
 174 
     | 
    
         
            +
                            "Delete intermediate image files used to create PDF") do |d|
         
     | 
| 
      
 175 
     | 
    
         
            +
                pdfargs[:delfiles] = d
         
     | 
| 
      
 176 
     | 
    
         
            +
              end
         
     | 
| 
       172 
177 
     | 
    
         
             
              opts.on("-o", "--output FILE",
         
     | 
| 
       173 
178 
     | 
    
         
             
                            "Print output to a file instead of STDERR") do |f|
         
     | 
| 
       174 
179 
     | 
    
         
             
                outpath = f
         
     | 
    
        data/lib/imageinspector.rb
    CHANGED
    
    | 
         @@ -444,14 +444,14 @@ class ImageInspector::Image 
     | 
|
| 
       444 
444 
     | 
    
         
             
                        @x_dpi = (x_dpm/100 * 2.54).round
         
     | 
| 
       445 
445 
     | 
    
         
             
                        @y_dpi = (y_dpm/100 * 2.54).round
         
     | 
| 
       446 
446 
     | 
    
         
             
                      when 'tRNS'
         
     | 
| 
       447 
     | 
    
         
            -
                        trans = Hash.new 
     | 
| 
      
 447 
     | 
    
         
            +
                        trans = Hash.new()
         
     | 
| 
       448 
448 
     | 
    
         
             
                        case @cspace
         
     | 
| 
       449 
449 
     | 
    
         
             
                          when :Indexed
         
     | 
| 
       450 
450 
     | 
    
         
             
                            # Indexed colour, RGB. Each byte in this chunk is an alpha for
         
     | 
| 
       451 
451 
     | 
    
         
             
                            # the palette index in the PLTE ("palette") chunk up until the
         
     | 
| 
       452 
452 
     | 
    
         
             
                            # last non-opaque entry. Set up an array, stretching over all
         
     | 
| 
       453 
453 
     | 
    
         
             
                            # palette entries which will be 0 (opaque) or 1 (transparent).
         
     | 
| 
       454 
     | 
    
         
            -
                            @trans = io.read(  
     | 
| 
      
 454 
     | 
    
         
            +
                            @trans = io.read( length ).unpack( 'C*' )
         
     | 
| 
       455 
455 
     | 
    
         
             
                          when :DeviceGray
         
     | 
| 
       456 
456 
     | 
    
         
             
                            # Greyscale. Corresponding to entries in the PLTE chunk.
         
     | 
| 
       457 
457 
     | 
    
         
             
                            # Grey is two bytes, range 0 .. (2 ^ bit-depth) - 1
         
     | 
    
        data/lib/pdfbeads/pdfbuilder.rb
    CHANGED
    
    | 
         @@ -326,6 +326,17 @@ class PDFBeads::PDFBuilder 
     | 
|
| 
       326 
326 
     | 
    
         
             
                  cat.addToDict('Outlines', ref(toc[0][:pdfobj].getID))
         
     | 
| 
       327 
327 
     | 
    
         
             
                  cat.addToDict('PageMode', "/UseOutlines")
         
     | 
| 
       328 
328 
     | 
    
         
             
                end
         
     | 
| 
      
 329 
     | 
    
         
            +
             
     | 
| 
      
 330 
     | 
    
         
            +
                if @pdfargs[:delfiles]
         
     | 
| 
      
 331 
     | 
    
         
            +
                  pagefiles.each do |p|
         
     | 
| 
      
 332 
     | 
    
         
            +
                    $stderr.puts( "Cleaning up temporary files for #{p.name}" )
         
     | 
| 
      
 333 
     | 
    
         
            +
                    safe_delete( p.fg_layer ) if p.fg_created
         
     | 
| 
      
 334 
     | 
    
         
            +
                    safe_delete( p.bg_layer ) if p.bg_created
         
     | 
| 
      
 335 
     | 
    
         
            +
                    p.stencils.each do |s|
         
     | 
| 
      
 336 
     | 
    
         
            +
                      safe_delete( s[:path] ) if s[:created]
         
     | 
| 
      
 337 
     | 
    
         
            +
                    end
         
     | 
| 
      
 338 
     | 
    
         
            +
                  end
         
     | 
| 
      
 339 
     | 
    
         
            +
                end
         
     | 
| 
       329 
340 
     | 
    
         
             
              end
         
     | 
| 
       330 
341 
     | 
    
         | 
| 
       331 
342 
     | 
    
         
             
              # Output the created PDF file to the disk.
         
     | 
| 
         @@ -347,6 +358,15 @@ class PDFBeads::PDFBuilder 
     | 
|
| 
       347 
358 
     | 
    
         | 
| 
       348 
359 
     | 
    
         
             
              private
         
     | 
| 
       349 
360 
     | 
    
         | 
| 
      
 361 
     | 
    
         
            +
              def safe_delete( path )
         
     | 
| 
      
 362 
     | 
    
         
            +
                begin
         
     | 
| 
      
 363 
     | 
    
         
            +
                  File.delete( path )
         
     | 
| 
      
 364 
     | 
    
         
            +
                  $stderr.puts( " Deleted #{path}" )
         
     | 
| 
      
 365 
     | 
    
         
            +
                rescue Exception => e
         
     | 
| 
      
 366 
     | 
    
         
            +
                    $stderr.puts( "Could not delete #{path}: #{e.message}" )
         
     | 
| 
      
 367 
     | 
    
         
            +
                end
         
     | 
| 
      
 368 
     | 
    
         
            +
              end
         
     | 
| 
      
 369 
     | 
    
         
            +
             
     | 
| 
       350 
370 
     | 
    
         
             
              def parseMeta( path )
         
     | 
| 
       351 
371 
     | 
    
         
             
                ret = Hash.new()
         
     | 
| 
       352 
372 
     | 
    
         
             
                return ret if path.nil? or path.eql? ''
         
     | 
| 
         @@ -390,8 +410,9 @@ class PDFBeads::PDFBuilder 
     | 
|
| 
       390 
410 
     | 
    
         
             
                  end
         
     | 
| 
       391 
411 
     | 
    
         | 
| 
       392 
412 
     | 
    
         
             
                  item_text = item[:title].to_binary
         
     | 
| 
       393 
     | 
    
         
            -
                  item_text. 
     | 
| 
       394 
     | 
    
         
            -
                  item_text. 
     | 
| 
      
 413 
     | 
    
         
            +
                  item_text.gsub!( /\x5C/,"\x5C\x5C" )
         
     | 
| 
      
 414 
     | 
    
         
            +
                  item_text.gsub!( /\x28/,"\x5C\x28" )
         
     | 
| 
      
 415 
     | 
    
         
            +
                  item_text.gsub!( /\x29/,"\x5C\x29" )
         
     | 
| 
       395 
416 
     | 
    
         
             
                  item[:pdfobj] = XObj.new(Hash[
         
     | 
| 
       396 
417 
     | 
    
         
             
                    'Title'  => "(\xFE\xFF#{item_text.to_text})",
         
     | 
| 
       397 
418 
     | 
    
         
             
                    'Parent' => ref(item[:parent][:pdfobj].getID),
         
     | 
| 
         @@ -465,6 +486,7 @@ class PDFBeads::PDFBuilder 
     | 
|
| 
       465 
486 
     | 
    
         
             
                if ocr_words.length > 0
         
     | 
| 
       466 
487 
     | 
    
         
             
                  ocr_words.each do |word|
         
     | 
| 
       467 
488 
     | 
    
         
             
                    bbox = elementCoordinates( word,xscale,yscale )
         
     | 
| 
      
 489 
     | 
    
         
            +
                    next if bbox == [0,0,0,0]
         
     | 
| 
       468 
490 
     | 
    
         
             
                    txt = elementText( word,charset )
         
     | 
| 
       469 
491 
     | 
    
         
             
                    units << [txt,bbox]
         
     | 
| 
       470 
492 
     | 
    
         
             
                  end
         
     | 
    
        data/lib/pdfbeads/pdfpage.rb
    CHANGED
    
    | 
         @@ -37,7 +37,7 @@ class PDFBeads::PageDataProvider < Array 
     | 
|
| 
       37 
37 
     | 
    
         
             
              # Allows to collect data needed for building an individual page
         
     | 
| 
       38 
38 
     | 
    
         
             
              # of a PDF document and gives access to those data.
         
     | 
| 
       39 
39 
     | 
    
         
             
              class PageData
         
     | 
| 
       40 
     | 
    
         
            -
                attr_reader :name, :basename, :s_type, :stencils, :hocr_path
         
     | 
| 
      
 40 
     | 
    
         
            +
                attr_reader :name, :basename, :s_type, :stencils, :hocr_path, :fg_created, :bg_created
         
     | 
| 
       41 
41 
     | 
    
         
             
                attr_accessor :width, :height, :x_res, :y_res, :fg_layer, :bg_layer
         
     | 
| 
       42 
42 
     | 
    
         | 
| 
       43 
43 
     | 
    
         
             
                def initialize( path,basename,args,exts,pref )
         
     | 
| 
         @@ -49,6 +49,7 @@ class PDFBeads::PageDataProvider < Array 
     | 
|
| 
       49 
49 
     | 
    
         
             
                  @exts = exts
         
     | 
| 
       50 
50 
     | 
    
         
             
                  @pref = pref
         
     | 
| 
       51 
51 
     | 
    
         
             
                  @bg_layer = @fg_layer = nil
         
     | 
| 
      
 52 
     | 
    
         
            +
                  @bg_created = @fg_created = false
         
     | 
| 
       52 
53 
     | 
    
         
             
                end
         
     | 
| 
       53 
54 
     | 
    
         | 
| 
       54 
55 
     | 
    
         
             
                def fillStencilArray()
         
     | 
| 
         @@ -60,6 +61,7 @@ class PDFBeads::PageDataProvider < Array 
     | 
|
| 
       60 
61 
     | 
    
         
             
                  map = Hash[
         
     | 
| 
       61 
62 
     | 
    
         
             
                    :path => @name,
         
     | 
| 
       62 
63 
     | 
    
         
             
                    :rgb  => [0.0, 0.0, 0.0],
         
     | 
| 
      
 64 
     | 
    
         
            +
                    :created => false
         
     | 
| 
       63 
65 
     | 
    
         
             
                  ]
         
     | 
| 
       64 
66 
     | 
    
         | 
| 
       65 
67 
     | 
    
         
             
                  insp = ImageInspector.new( @name )
         
     | 
| 
         @@ -72,7 +74,7 @@ class PDFBeads::PageDataProvider < Array 
     | 
|
| 
       72 
74 
     | 
    
         
             
                    @x_res = @y_res = fres
         
     | 
| 
       73 
75 
     | 
    
         
             
                  end
         
     | 
| 
       74 
76 
     | 
    
         | 
| 
       75 
     | 
    
         
            -
                  if insp.depth == 1
         
     | 
| 
      
 77 
     | 
    
         
            +
                  if insp.depth == 1 and insp.trans.nil?
         
     | 
| 
       76 
78 
     | 
    
         
             
                    @stencils << map
         
     | 
| 
       77 
79 
     | 
    
         
             
                    ret = 1
         
     | 
| 
       78 
80 
     | 
    
         | 
| 
         @@ -190,6 +192,7 @@ class PDFBeads::PageDataProvider < Array 
     | 
|
| 
       190 
192 
     | 
    
         
             
                      px = Pixel.from_color( color )
         
     | 
| 
       191 
193 
     | 
    
         
             
                      unless color.eql? exc
         
     | 
| 
       192 
194 
     | 
    
         
             
                        cpath = "#{@basename}.#{color}.tiff"
         
     | 
| 
      
 195 
     | 
    
         
            +
                        created = false
         
     | 
| 
       193 
196 
     | 
    
         
             
                        if not File.exists? cpath or force
         
     | 
| 
       194 
197 
     | 
    
         
             
                          bitonal = img.copy
         
     | 
| 
       195 
198 
     | 
    
         
             
                          # Caution: replacing colors in the colormap currently only works
         
     | 
| 
         @@ -208,10 +211,12 @@ class PDFBeads::PageDataProvider < Array 
     | 
|
| 
       208 
211 
     | 
    
         
             
                            self.compression = Group4Compression
         
     | 
| 
       209 
212 
     | 
    
         
             
                          end
         
     | 
| 
       210 
213 
     | 
    
         
             
                          bitonal.destroy!
         
     | 
| 
      
 214 
     | 
    
         
            +
                          created = true
         
     | 
| 
       211 
215 
     | 
    
         
             
                        end
         
     | 
| 
       212 
216 
     | 
    
         
             
                        cmap = Hash[
         
     | 
| 
       213 
217 
     | 
    
         
             
                          :path => cpath,
         
     | 
| 
       214 
     | 
    
         
            -
                          :rgb  => [px.red.to_f/QuantumRange, px.green.to_f/QuantumRange, px.blue.to_f/QuantumRange]
         
     | 
| 
      
 218 
     | 
    
         
            +
                          :rgb  => [px.red.to_f/QuantumRange, px.green.to_f/QuantumRange, px.blue.to_f/QuantumRange],
         
     | 
| 
      
 219 
     | 
    
         
            +
                          :created => created
         
     | 
| 
       215 
220 
     | 
    
         
             
                        ]
         
     | 
| 
       216 
221 
     | 
    
         
             
                        @stencils << cmap
         
     | 
| 
       217 
222 
     | 
    
         
             
                        ret += 1
         
     | 
| 
         @@ -231,6 +236,7 @@ class PDFBeads::PageDataProvider < Array 
     | 
|
| 
       231 
236 
     | 
    
         
             
                      self.compression = Group4Compression
         
     | 
| 
       232 
237 
     | 
    
         
             
                    }
         
     | 
| 
       233 
238 
     | 
    
         
             
                    bitonal.destroy!
         
     | 
| 
      
 239 
     | 
    
         
            +
                    map[:created] = true
         
     | 
| 
       234 
240 
     | 
    
         
             
                  end
         
     | 
| 
       235 
241 
     | 
    
         | 
| 
       236 
242 
     | 
    
         
             
                  bgf = @pageargs[:bg_format]
         
     | 
| 
         @@ -263,6 +269,7 @@ class PDFBeads::PageDataProvider < Array 
     | 
|
| 
       263 
269 
     | 
    
         
             
                    end
         
     | 
| 
       264 
270 
     | 
    
         | 
| 
       265 
271 
     | 
    
         
             
                    writeImage( img,bgpath,bgf )
         
     | 
| 
      
 272 
     | 
    
         
            +
                    @bg_created = true
         
     | 
| 
       266 
273 
     | 
    
         
             
                  end
         
     | 
| 
       267 
274 
     | 
    
         | 
| 
       268 
275 
     | 
    
         
             
                  map[:path] = binpath
         
     | 
| 
         @@ -327,7 +334,10 @@ class PDFBeads::PageDataProvider < Array 
     | 
|
| 
       327 
334 
     | 
    
         
             
                  end
         
     | 
| 
       328 
335 
     | 
    
         | 
| 
       329 
336 
     | 
    
         
             
                  bgpath = "#{@basename}.bg." << fmt.downcase
         
     | 
| 
       330 
     | 
    
         
            -
                   
     | 
| 
      
 337 
     | 
    
         
            +
                  if writeImage( bg,bgpath,fmt )
         
     | 
| 
      
 338 
     | 
    
         
            +
                    @bg_layer = bgpath
         
     | 
| 
      
 339 
     | 
    
         
            +
                    @bg_created = true
         
     | 
| 
      
 340 
     | 
    
         
            +
                  end
         
     | 
| 
       331 
341 
     | 
    
         | 
| 
       332 
342 
     | 
    
         
             
                  bg.destroy!
         
     | 
| 
       333 
343 
     | 
    
         
             
                  no_fg.destroy!
         
     | 
| 
         @@ -352,7 +362,10 @@ class PDFBeads::PageDataProvider < Array 
     | 
|
| 
       352 
362 
     | 
    
         
             
                    fg.alpha( DeactivateAlphaChannel )
         
     | 
| 
       353 
363 
     | 
    
         | 
| 
       354 
364 
     | 
    
         
             
                    fgpath = "#{@basename}.fg." << fmt.downcase
         
     | 
| 
       355 
     | 
    
         
            -
                     
     | 
| 
      
 365 
     | 
    
         
            +
                    if writeImage( fg,fgpath,fmt )
         
     | 
| 
      
 366 
     | 
    
         
            +
                      @fg_layer = fgpath
         
     | 
| 
      
 367 
     | 
    
         
            +
                      @fg_created = true
         
     | 
| 
      
 368 
     | 
    
         
            +
                    end
         
     | 
| 
       356 
369 
     | 
    
         | 
| 
       357 
370 
     | 
    
         
             
                    fg.destroy!
         
     | 
| 
       358 
371 
     | 
    
         
             
                    no_bg.destroy!
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,13 +1,13 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification 
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: pdfbeads
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version 
         
     | 
| 
       4 
     | 
    
         
            -
              hash:  
     | 
| 
      
 4 
     | 
    
         
            +
              hash: 5
         
     | 
| 
       5 
5 
     | 
    
         
             
              prerelease: 
         
     | 
| 
       6 
6 
     | 
    
         
             
              segments: 
         
     | 
| 
       7 
7 
     | 
    
         
             
              - 1
         
     | 
| 
       8 
8 
     | 
    
         
             
              - 0
         
     | 
| 
       9 
     | 
    
         
            -
              -  
     | 
| 
       10 
     | 
    
         
            -
              version: 1.0. 
     | 
| 
      
 9 
     | 
    
         
            +
              - 9
         
     | 
| 
      
 10 
     | 
    
         
            +
              version: 1.0.9
         
     | 
| 
       11 
11 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       12 
12 
     | 
    
         
             
            authors: 
         
     | 
| 
       13 
13 
     | 
    
         
             
            - Alexey Kryukov
         
     | 
| 
         @@ -15,7 +15,7 @@ autorequire: 
     | 
|
| 
       15 
15 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       16 
16 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       17 
17 
     | 
    
         | 
| 
       18 
     | 
    
         
            -
            date: 2012- 
     | 
| 
      
 18 
     | 
    
         
            +
            date: 2012-04-21 00:00:00 +04:00
         
     | 
| 
       19 
19 
     | 
    
         
             
            default_executable: pdfbeads
         
     | 
| 
       20 
20 
     | 
    
         
             
            dependencies: []
         
     | 
| 
       21 
21 
     | 
    
         |