RubyGems - pdfbeads - Versions diffs - 1.0.3 → 1.0.5 - Mend

pdfbeads 1.0.3 → 1.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

data/ChangeLog CHANGED

@@ -18,3 +18,11 @@
     * TIFF files with an embedded EXIF block were incorrectly processed
       by ImageInspector.
+2012 February 4 (Alexey Kryukov) Version 1.0.5
+    * If JPEG2000 compression was requested but not available,
+      JPEG files were still written with the 'JP2' extension.
+    * Some tweaks to minimize the effect of page labels being inconsistently handled
+      in various PDF viewers (prefer ISO-8859-1 strings if possible).

data/README CHANGED

@@ -51,3 +51,8 @@ Note that PDFBeads is intended for creating PDF files from previously
 processed images, and so it can't done some operations (e. g. converting
 color or grayscale scans to B&W) which should be typically performed with
 a special scan processing application, such as ScanTailor.
+PDFBeads requires RMagick (the Ruby bindings for the popular Magick++ image
+processing library). The hpricot extension is not required, but highly
+recommended, as without it PDFBeads would not be able to read data from hOCR
+files.

data/bin/pdfbeads CHANGED

@@ -1,4 +1,4 @@
-#!/usr/bin/env ruby1.9.1
+#!/usr/bin/env ruby
 # encoding: UTF-8
 ######################################################################
@@ -9,7 +9,7 @@
 # Unlike other PDF creation tools, this utility attempts to implement
 # the approach typically used for DjVu books. Its key feature is
 # separating scanned text (typically black, but indexed images with
-# a small number of colors are also accepted) from halftone images
+# a small number of colors are also accepted) from halftone images
 # placed into a background layer.
 #
 # Copyright (C) 2010 Alexey Kryukov (amkryukov@gmail.com).

data/lib/pdfbeads/pdfbuilder.rb CHANGED

@@ -145,10 +145,25 @@ class PDFBeads::PDFBuilder
     if labels != nil and labels.length > 0
       nTree = "<</Nums[\n"
       labels.each do |rng|
-        ltitl = Iconv.iconv( "utf-16be", "utf-8", rng[:prefix] ).first.to_text
         nTree << "#{rng[:first]} << "
-        nTree << "/P (\xFE\xFF#{ltitl.to_text}) " if rng.has_key? :prefix
+        if rng.has_key? :prefix
+          begin
+            # If possible, use iso8859-1 (aka PDFDocEncoding) for page labels:
+            # it is at least guaranteed to be safe
+            ltitl = Iconv.iconv( "iso8859-1", "utf-8", rng[:prefix] ).first
+            nTree << "/P (#{ltitl.to_text}) "
+          rescue Iconv::InvalidCharacter, Iconv::IllegalSequence
+            ltitl = Iconv.iconv( "utf-16be", "utf-8", rng[:prefix] ).first
+            # If there is no number (just prefix) then put a zero character after the prefix:
+            # this makes acroread happy, but prevents displaying the number in evince
+            unless rng.has_key? :style
+              nTree << "/P (\xFE\xFF#{ltitl.to_text}\x00\x00) "
+            # Otherwise put a formally correct Unicode string, which, however, may stumble acroread
+            else
+              nTree << "/P (\xFE\xFF#{ltitl.to_text}) "
+            end
+          end
+        end
         nTree << "/S /#{rng[:style]} " if rng.has_key? :style
         nTree << "/St #{rng[:start]}" if rng.has_key? :start
         nTree << ">>\n"
@@ -342,7 +357,7 @@ class PDFBeads::PDFBuilder
       fin.each do |fl|
         next if /^\#/.match( fl )
-        if /^\/?([A-Za-z]+)[ \t]*:[ \t]+\"(.*)\"/.match( fl )
+        if /^\/?([A-Za-z]+)[         ]*:[         ]+\"(.*)\"/.match( fl )
           key = $1
           if keys.include? key
             begin

data/lib/pdfbeads/pdffont.rb CHANGED

@@ -396,9 +396,6 @@ class PDFBeads::PDFBuilder::FontDataProvider
       0xFB01 => ["/fi", 556],
       0xFB02 => ["/fl", 556],
     ]
-    @chardata.default = proc do |fd, uni|
-      fd[uni] = [ sprintf( "/uni%04X",uni ), 500 ]
-    end
     @encodings = Array.new()
     @wlists = Array.new()
@@ -410,7 +407,7 @@ class PDFBeads::PDFBuilder::FontDataProvider
     w = 0.0
     line.each_char do |uc|
       begin
-        w += @chardata[uc.ord][1] * size / 1000.0
+        w += chardata( uc.ord )[1] * size / 1000.0
       rescue
         rawbytes = uc.unpack( 'C*' )
         bs = ''
@@ -427,7 +424,7 @@ class PDFBeads::PDFBuilder::FontDataProvider
   def getEncoding( enc )
     ret = Array.new()
     enc.each do |char|
-      ret << @chardata[char.ord][0]
+      ret << chardata( char.ord )[0]
     end
     ret
   end
@@ -437,7 +434,7 @@ class PDFBeads::PDFBuilder::FontDataProvider
   def getWidths( enc )
     ret = Array.new()
     enc.each do |char|
-      ret << @chardata[char.ord][1]
+      ret << chardata( char.ord )[1]
     end
     ret
   end
@@ -530,4 +527,10 @@ class PDFBeads::PDFBuilder::FontDataProvider
     ], Zlib::Deflate.deflate( cmap,9 ) )
     toUnicode
   end
+  def chardata( uni )
+    @chardata.fetch( uni ) do |u|
+      [ sprintf( "/uni%04X",uni ), 500 ]
+    end
+  end
 end

data/lib/pdfbeads/pdflabels.rb CHANGED

@@ -107,7 +107,7 @@ class PDFBeads::PDFBuilder::PDFLabels < Array
     ]
     res = ''
-    numerals.keys.sort{ |a,b| b <=> a }.reverse.each do |val|
+    numerals.keys.sort{ |a,b| b <=> a }.each do |val|
       while num >= val
         res << numerals[val]
         num -= val

data/lib/pdfbeads/pdfpage.rb CHANGED

@@ -259,6 +259,7 @@ class PDFBeads::PageDataProvider < Array
           $stderr.puts( "This version of ImageMagick doesn't support JPEG2000 compression." )
           $stderr.puts( "\tI'll use JPEG compression instead." )
           bgf = 'JPG'
+          bgpath = "#{@basename}.bg." << bgf.downcase
         end
         writeImage( img,bgpath,bgf )

metadata CHANGED

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: pdfbeads
 version: !ruby/object:Gem::Version
-  hash: 17
-  prerelease: false
+  hash: 29
+  prerelease:
   segments:
   - 1
   - 0
-  - 3
-  version: 1.0.3
+  - 5
+  version: 1.0.5
 platform: ruby
 authors:
 - Alexey Kryukov
@@ -15,7 +15,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2010-11-16 00:00:00 +03:00
+date: 2012-02-04 00:00:00 +04:00
 default_executable: pdfbeads
 dependencies: []
@@ -74,7 +74,7 @@ requirements:
 - RMagick, v2.13.0 or greater
 - Hpricot, v0.8.3 or greater
 rubyforge_project: PDFBeads
-rubygems_version: 1.3.7
+rubygems_version: 1.5.0
 signing_key:
 specification_version: 3
 summary: PDFBeads -- convert scanned images to a single PDF file.