RubyGems - pdfbeads - Versions diffs - 1.0.7 → 1.1.3 - Mend

pdfbeads 1.0.7 → 1.1.3

Files changed (16) hide show

checksums.yaml +7 -0
data/COPYING +0 -0
data/ChangeLog +59 -0
data/README +0 -0
data/bin/pdfbeads +33 -4
data/doc/pdfbeads.en.html +548 -0
data/doc/pdfbeads.ru.html +74 -34
data/lib/imageinspector.rb +24 -21
data/lib/pdfbeads/pdfbuilder.rb +308 -87
data/lib/pdfbeads/pdfdoc.rb +0 -0
data/lib/pdfbeads/pdffont.rb +0 -0
data/lib/pdfbeads/pdflabels.rb +0 -0
data/lib/pdfbeads/pdfpage.rb +45 -32
data/lib/pdfbeads/pdftoc.rb +7 -3
data/lib/pdfbeads.rb +18 -7
metadata +92 -61

data/lib/pdfbeads/pdfpage.rb CHANGED Viewed

@@ -8,7 +8,7 @@
 # Unlike other PDF creation tools, this utility attempts to implement
 # the approach typically used for DjVu books. Its key feature is
 # separating scanned text (typically black, but indexed images with
-# a small number of colors are also accepted) from halftone images
+# a small number of colors are also accepted) from halftone images
 # placed into a background layer.
 #
 # Copyright (C) 2010 Alexey Kryukov (amkryukov@gmail.com).
@@ -30,14 +30,14 @@
 #
 #######################################################################
-# Represents a set of page images accompanies with auxiliary files
+# Represents a set of page images accompanied with auxiliary files
 # needed to build a PDF document.
 class PDFBeads::PageDataProvider < Array
   # Allows to collect data needed for building an individual page
   # of a PDF document and gives access to those data.
   class PageData
-    attr_reader :name, :basename, :s_type, :stencils, :hocr_path
+    attr_reader :name, :basename, :s_type, :stencils, :hocr_path, :fg_created, :bg_created
     attr_accessor :width, :height, :x_res, :y_res, :fg_layer, :bg_layer
     def initialize( path,basename,args,exts,pref )
@@ -49,6 +49,7 @@ class PDFBeads::PageDataProvider < Array
       @exts = exts
       @pref = pref
       @bg_layer = @fg_layer = nil
+      @bg_created = @fg_created = false
     end
     def fillStencilArray()
@@ -60,6 +61,7 @@ class PDFBeads::PageDataProvider < Array
       map = Hash[
         :path => @name,
         :rgb  => [0.0, 0.0, 0.0],
+        :created => false
       ]
       insp = ImageInspector.new( @name )
@@ -72,7 +74,7 @@ class PDFBeads::PageDataProvider < Array
         @x_res = @y_res = fres
       end
-      if insp.depth == 1
+      if insp.depth == 1 and insp.trans.nil?
         @stencils << map
         ret = 1
@@ -94,7 +96,7 @@ class PDFBeads::PageDataProvider < Array
       $stderr.puts( "Prepared data for processing #{@name}\n" )
       if insp.nextImage
-        $stderr.puts( "Warning: #{@name} contains multiple images, but only the first one")
+        $stderr.puts( "Warning: #{@name} contains multiple images, but only the first one")
         $stderr.puts( "\tis going to be used\n" )
       end
       ret
@@ -115,8 +117,8 @@ class PDFBeads::PageDataProvider < Array
         @bg_layer = bgpath unless bgpath.nil?
         # If updating auxiliary files is requested and the base image is
-        # either monochrome or indexed with just a few colors (i. e. doesn't
-        # contain any elements which should be encoded to the background layer),
+        # either bitonal or indexed with just a few colors (i. e. doesn't
+        # contain any elements which should be placed to the background layer),
         # then the *.color.* image (if present) takes priority over any existing
         # *.bg.* and *.fg.* images. So we should regenerate them.
         if bgpath.nil? or ( force and not @s_type.eql? 'c' )
@@ -137,7 +139,7 @@ class PDFBeads::PageDataProvider < Array
         @fg_layer = fgpath unless fgpath.nil?
       end
-      if $has_hpricot
+      if $has_nokogiri and not @pageargs[:pages_per_dict].nil?
         @hocr_path = Dir.entries('.').detect do |f|
           /\A#{@basename}.(HOCR|HTML?)/i.match(f)
         end
@@ -158,19 +160,19 @@ class PDFBeads::PageDataProvider < Array
     def writeImage( img,path,fmt )
       begin
-        img.write( path ) do
+        img.write( path ) do |curimg|
           case fmt
           when 'JP2'
-            self.define( 'JP2','mode','real' )
-            self.define( 'JP2','numrlvls',4 )
-            self.define( 'JP2','rate',0.015625 )
+            curimg.define( 'JP2','mode','real' )
+            curimg.define( 'JP2','numrlvls',4 )
+            curimg.define( 'JP2','rate',0.015625 )
           when 'JPG'
-            self.quality = 50
+            curimg.quality = 50
           else
-            self.compression = ZipCompression
-            self.quality = 95
+            curimg.compression = ZipCompression
+            curimg.quality = 95
           end
-          self.format = fmt
+          curimg.format = fmt
         end
         return true
       rescue
@@ -190,6 +192,7 @@ class PDFBeads::PageDataProvider < Array
           px = Pixel.from_color( color )
           unless color.eql? exc
             cpath = "#{@basename}.#{color}.tiff"
+            created = false
             if not File.exists? cpath or force
               bitonal = img.copy
               # Caution: replacing colors in the colormap currently only works
@@ -202,16 +205,18 @@ class PDFBeads::PageDataProvider < Array
                 bitonal.colormap( j,crepl )
               end
               bitonal.compress_colormap!
-              bitonal.write( cpath ) do
-                self.format = 'TIFF'
-                self.define( 'TIFF','rows-per-strip',img.rows )
-                self.compression = Group4Compression
+              bitonal.write( cpath ) do |curimg|
+                curimg.format = 'TIFF'
+                curimg.define( 'TIFF','rows-per-strip',img.rows )
+                curimg.compression = Group4Compression
               end
               bitonal.destroy!
+              created = true
             end
             cmap = Hash[
               :path => cpath,
-              :rgb  => [px.red.to_f/QuantumRange, px.green.to_f/QuantumRange, px.blue.to_f/QuantumRange]
+              :rgb  => [px.red.to_f/QuantumRange, px.green.to_f/QuantumRange, px.blue.to_f/QuantumRange],
+              :created => created
             ]
             @stencils << cmap
             ret += 1
@@ -225,12 +230,13 @@ class PDFBeads::PageDataProvider < Array
       binpath = "#{@basename}.black.tiff"
       if not File.exists? binpath or force
         im_copy = img.copy; bitonal = im_copy.threshold(QuantumRange/255*treshold); im_copy.destroy!
-        bitonal.write( binpath ){
-          self.format = 'TIFF'
-          self.define( 'TIFF','rows-per-strip',img.rows )
-          self.compression = Group4Compression
+        bitonal.write( binpath ) { |curimg|
+          curimg.format = 'TIFF'
+          curimg.define( 'TIFF','rows-per-strip',img.rows )
+          curimg.compression = Group4Compression
         }
         bitonal.destroy!
+        map[:created] = true
       end
       bgf = @pageargs[:bg_format]
@@ -263,6 +269,7 @@ class PDFBeads::PageDataProvider < Array
         end
         writeImage( img,bgpath,bgf )
+        @bg_created = true
       end
       map[:path] = binpath
@@ -317,7 +324,7 @@ class PDFBeads::PageDataProvider < Array
       PageData.fixResolution( img )
       mask.resize!( imw,imh ) if mask.columns != imw or mask.rows != imh
-      no_fg = img.composite( mask,CenterGravity,CopyOpacityCompositeOp )
+      no_fg = img.composite( mask,CenterGravity,CopyAlphaCompositeOp )
       bg = no_fg.blur_channel( 0,6,AllChannels )
       bg.alpha( DeactivateAlphaChannel )
@@ -327,7 +334,10 @@ class PDFBeads::PageDataProvider < Array
       end
       bgpath = "#{@basename}.bg." << fmt.downcase
-      @bg_layer = bgpath if writeImage( bg,bgpath,fmt )
+      if writeImage( bg,bgpath,fmt )
+        @bg_layer = bgpath
+        @bg_created = true
+      end
       bg.destroy!
       no_fg.destroy!
@@ -336,14 +346,14 @@ class PDFBeads::PageDataProvider < Array
         ksam = mask.negate
         mask.destroy!
-        no_bg = img.composite( ksam,CenterGravity,CopyOpacityCompositeOp )
+        no_bg = img.composite( ksam,CenterGravity,CopyAlphaCompositeOp )
         fg = no_bg.clone
         # Resize the image to a tiny size and then back to the original size
         # to achieve the desired color diffusion. The idea is inspired by
         # Anthony Thyssen's http://www.imagemagick.org/Usage/scripts/hole_fill_shepards
         # script, which is intended just for this purpose (i. e. removing undesired
-        # areas from the image). However our approach is a bit cruder (but still
+        # areas from the image). However our approach is a bit more crude (but still
         # effective).
         fg.resize!( width=imw/100,height=imh/100,filter=GaussianFilter )
         fg.resize!( width=imw,height=imh,filter=GaussianFilter )
@@ -352,7 +362,10 @@ class PDFBeads::PageDataProvider < Array
         fg.alpha( DeactivateAlphaChannel )
         fgpath = "#{@basename}.fg." << fmt.downcase
-        @fg_layer = fgpath if writeImage( fg,fgpath,fmt )
+        if writeImage( fg,fgpath,fmt )
+          @fg_layer = fgpath
+          @fg_created = true
+        end
         fg.destroy!
         no_bg.destroy!
@@ -390,8 +403,8 @@ class PDFBeads::PageDataProvider < Array
     # A hack for some Windows versions of RMagick, which throw an error the
     # first time when Magick.formats is accessed
+    retries = 2
     begin
-      retries = 2
       mfmts = Magick.formats
     rescue
       retry if (retries -= 1 ) > 0
@@ -458,7 +471,7 @@ class PDFBeads::PageDataProvider < Array
       end
       if pidx == per_dict or i == length - 1
-        # The jbig2 encoder processes a bunch of files at once, producing
+        # The jbig2 encoder processes a bunch of files at once, producing
         # pages which depend from a shared dictionary. Thus we can skip this
         # stage only if both the dictionary and each of the individual pages
         # are already found on the disk

data/lib/pdfbeads/pdftoc.rb CHANGED Viewed

@@ -8,7 +8,7 @@
 # Unlike other PDF creation tools, this utility attempts to implement
 # the approach typically used for DjVu books. Its key feature is
 # separating scanned text (typically black, but indexed images with
-# a small number of colors are also accepted) from halftone images
+# a small number of colors are also accepted) from halftone images
 # placed into a background layer.
 #
 # Copyright (C) 2010 Alexey Kryukov (amkryukov@gmail.com).
@@ -38,7 +38,7 @@
 # <indent>"Title" "Page Number" [0|-|1|+]
 #
 # The indent is used to determine the level of this outline item: it may
-# consist either of spaces or of tabs, but it is not allowed to
+# consist either of spaces or of tabs, but it is not allowed to
 # mix both characters in the same file. The title and page number are
 # separated with an arbitrary number of whitespace characters and are
 # normally enclosed into double quotes. The third, optional argument
@@ -101,7 +101,11 @@ class PDFBeads::PDFBuilder::PDFTOC < Array
           title = parts[0].gsub(/\A"/m,"").gsub(/"\Z/m, "")
           ref   = parts[1].gsub(/\A"/m,"").gsub(/"\Z/m, "")
           begin
-            title = Iconv.iconv( "utf-16be", "utf-8", title ).first
+            if title.respond_to? :encode
+              title.encode!( "utf-16be", "utf-8" )
+            else
+              title = Iconv.iconv( "utf-16be", "utf-8", title ).first
+            end
           rescue
             $stderr.puts("Error: TOC should be specified in utf-8")
             return

data/lib/pdfbeads.rb CHANGED Viewed

@@ -8,7 +8,7 @@
 # Unlike other PDF creation tools, this utility attempts to implement
 # the approach typically used for DjVu books. Its key feature is
 # separating scanned text (typically black, but indexed images with
-# a small number of colors are also accepted) from halftone images
+# a small number of colors are also accepted) from halftone images
 # placed into a background layer.
 #
 # Copyright (C) 2010 Alexey Kryukov (amkryukov@gmail.com).
@@ -30,19 +30,25 @@
 #
 #######################################################################
-require 'iconv'
 require 'zlib'
-require 'RMagick'
+require 'rmagick'
 include Magick
 begin
-  require 'hpricot'
-  $has_hpricot = true
+  require 'nokogiri'
+  $has_nokogiri = true
 rescue LoadError
-  $stderr.puts( "Warning: the hpricot extension is not available. I'll not be able" )
+  $stderr.puts( "Warning: the nokogiri extension is not available. I'll not be able" )
   $stderr.puts( "\tto create hidden text layer from hOCR files." )
-  $has_hpricot = false
+  $has_nokogiri = false
+end
+begin
+  require 'pdf/reader'
+  $has_pdfreader = true
+rescue LoadError
+  $has_pdfreader = false
 end
 unless ''.respond_to? :ord
@@ -50,6 +56,11 @@ unless ''.respond_to? :ord
   require 'jcode'
 end
+# Require iconv for Ruby version less than 1.9.3
+unless ''.respond_to? :encode
+  require 'iconv'
+end
 class String
   # Protect strings which are supposed be treated as a raw sequence of bytes.
   # This is important for Ruby 1.9. For earlier versions the method just

metadata CHANGED Viewed

@@ -1,82 +1,113 @@
---- !ruby/object:Gem::Specification
+--- !ruby/object:Gem::Specification
 name: pdfbeads
-version: !ruby/object:Gem::Version
-  hash: 25
-  prerelease:
-  segments:
-  - 1
-  - 0
-  - 7
-  version: 1.0.7
+version: !ruby/object:Gem::Version
+  version: 1.1.3
 platform: ruby
-authors:
+authors:
 - Alexey Kryukov
-autorequire:
+autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-02-10 00:00:00 +04:00
-default_executable: pdfbeads
-dependencies: []
-description: "    PDFBeads is a small utility written in Ruby which takes scanned\n    page images and converts them into a single PDF file. Unlike other\n    PDF creation tools, PDFBeads attempts to implement the approach\n    typically used for DjVu books. Its key feature is separating scanned\n    text (typically black, but indexed images with a small number of\n    colors are also accepted) from halftone pictures. Each type of\n    graphical data is encoded into its own layer with a specific\n    compression method and resolution.\n"
+date: 2021-11-24 00:00:00.000000000 Z
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: rmagick
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 3.2.0
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 3.2.0
+- !ruby/object:Gem::Dependency
+  name: nokogiri
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 1.5.10
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 1.5.10
+- !ruby/object:Gem::Dependency
+  name: pdf-reader
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 1.0.0
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: 1.0.0
+description: |2
+      PDFBeads is a small utility written in Ruby which takes scanned
+      page images and converts them into a single PDF file. Unlike other
+      PDF creation tools, PDFBeads attempts to implement the approach
+      typically used for DjVu books. Its key feature is separating scanned
+      text (typically black, but indexed images with a small number of
+      colors are also accepted) from halftone pictures. Each type of
+      graphical data is encoded into its own layer with a specific
+      compression method and resolution.
 email: amkryukov@gmail.com
-executables:
+executables:
 - pdfbeads
 extensions: []
-extra_rdoc_files:
+extra_rdoc_files:
 - README
 - COPYING
 - ChangeLog
-files:
+files:
+- COPYING
+- ChangeLog
+- README
+- bin/pdfbeads
+- doc/pdfbeads.en.html
+- doc/pdfbeads.ru.html
+- lib/imageinspector.rb
+- lib/pdfbeads.rb
 - lib/pdfbeads/pdfbuilder.rb
-- lib/pdfbeads/pdfpage.rb
-- lib/pdfbeads/pdftoc.rb
+- lib/pdfbeads/pdfdoc.rb
 - lib/pdfbeads/pdffont.rb
 - lib/pdfbeads/pdflabels.rb
-- lib/pdfbeads/pdfdoc.rb
-- lib/pdfbeads.rb
-- lib/imageinspector.rb
-- bin/pdfbeads
-- doc/pdfbeads.ru.html
-- README
-- COPYING
-- ChangeLog
-has_rdoc: true
+- lib/pdfbeads/pdfpage.rb
+- lib/pdfbeads/pdftoc.rb
 homepage: http://pdfbeads.rubyforge.org
-licenses: []
-post_install_message:
+licenses:
+- GPL-3.0+
+metadata: {}
+post_install_message:
 rdoc_options: []
-require_paths:
+require_paths:
 - lib
-required_ruby_version: !ruby/object:Gem::Requirement
-  none: false
-  requirements:
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
   - - ">="
-    - !ruby/object:Gem::Version
-      hash: 3
-      segments:
-      - 0
-      version: "0"
-required_rubygems_version: !ruby/object:Gem::Requirement
-  none: false
-  requirements:
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
   - - ">="
-    - !ruby/object:Gem::Version
-      hash: 3
-      segments:
-      - 0
-      version: "0"
-requirements:
-- RMagick, v2.13.0 or greater
-- Hpricot, v0.8.3 or greater
-rubyforge_project: PDFBeads
-rubygems_version: 1.5.0
-signing_key:
-specification_version: 3
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements:
+- RMagick, v3.2.0 or greater
+- nokogiri, v1.5.10 or greater
+- PDF::Reader, v1.0.0 or greater
+rubygems_version: 3.2.29
+signing_key:
+specification_version: 4
 summary: PDFBeads -- convert scanned images to a single PDF file.
 test_files: []