RubyGems - pdfbeads - Versions diffs - 1.0.7 → 1.1.3 - Mend

pdfbeads 1.0.7 → 1.1.3

Files changed (16) hide show

checksums.yaml +7 -0
data/COPYING +0 -0
data/ChangeLog +59 -0
data/README +0 -0
data/bin/pdfbeads +33 -4
data/doc/pdfbeads.en.html +548 -0
data/doc/pdfbeads.ru.html +74 -34
data/lib/imageinspector.rb +24 -21
data/lib/pdfbeads/pdfbuilder.rb +308 -87
data/lib/pdfbeads/pdfdoc.rb +0 -0
data/lib/pdfbeads/pdffont.rb +0 -0
data/lib/pdfbeads/pdflabels.rb +0 -0
data/lib/pdfbeads/pdfpage.rb +45 -32
data/lib/pdfbeads/pdftoc.rb +7 -3
data/lib/pdfbeads.rb +18 -7
metadata +92 -61

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA256:
+  metadata.gz: 198ab9ffc035604ce4cfb3528dbebfb7746d746de1a429089028102003e35480
+  data.tar.gz: e32b5a1de30aeb1bb715f0ea6417478f16f9ba9f229268a55ad3fd4ae9bc67ab
+SHA512:
+  metadata.gz: 485725e99d06c9216e238626b35ec73d7ea2c64513f5112c24448191b6a09ccef40ef58a24bac291a975c8881779a6e62450e629ebee4c89ec34be9d976b4ccb
+  data.tar.gz: f0f53d06628a9433684d77e175cf95446abeebaa54e14e59bb2a637666d06eaa559734a2bac5e43b3d69182d6afb8ef6627d356bcd6c5b3991200a5cd9e8e858

data/COPYING CHANGED Viewed

File without changes

data/ChangeLog CHANGED Viewed

@@ -39,3 +39,62 @@
     + An attempt to achive better positioning of the hidden text layer, taking into
       account not just lines, but also individual words. This should work with hOCR
       files produced with Cuneiform or Tesseract.
+2012 March 5 (Alexey Kryukov) Version 1.0.8
+    * Palette colors in PNG images were treated as signed chars and this could cause
+      indexed images to be incorrecty displayed in the resulting PDF.
+2012 April 22 (Alexey Kryukov) Version 1.0.9
+    + Add an option allowing to delete image files produced as an intermediate stage
+      during the PDF creation process.
+    * Processing indexed images with a small number of colors was broken.
+    * Don't attempt to use 'ocrx_word' elements which contain no bounding box
+      data (this should fix the problem with the hOCR output produced by some
+      tesseract versions).
+2013 Mar 20 (Alexey Kryukov) Version 1.1.0
+    + It is now possible to take the text layer from another PDF document (normally
+      this would be a file produced by passing the same set of images to an
+      OCR application) and embed it into the pdfbeads output. Warning: this feature
+      has been tested so far only with files produced with ABBYY FineReader. It may or
+      may not work with PDF files generated by other OCR programs.
+    * The default PDF page layout is now "OneColumn".
+    + Make it possible to specify that the preferred reading direction for the
+      PDF document is left-to-right.
+    + In order to simplify debugging of resulting files I have added a special
+      flag allowing to make the hidden text layer visible and to disable
+      compression in page streams.
+2014 Jan 26 (Alexey Kryukov) Version 1.1.1
+    * hpricot is no longer developed, so switch to Nokagiri for hOCR processing.
+    * use String#encode instead of Iconv, when available
+    * Got tired from deps not being automatically resolved, so add them to
+      the gemspeck (the preferred method to install on Debian-based distributions
+      is now converting pdfbeads*.gem to a deb package with gem2deb).
+    + English HTML documentation added.
+2016 Dec 20 (Alexey Kryukov) Version 1.1.2
+    * Add a missing String#encode call
+2020 Jan 05 (Alexey Kryukov) Version 1.1.2
+    * Respect the photometric interpretation of Group4-encoded tiff images
+    * Some JPEG images were erroneously treated as LZW-encoded
+2020 Jan 24 (Alexey Kryukov) Version 1.1.2
+    * Fixed the /BaseState field in the optional content dictionary, which caused files to be rejected by some viewers
+2021 Nov 24 (Alexey Kryukov) Version 1.1.3
+    * Fixed some errors/warnings produced by newer rmagick versions
+    * Bumped the required rmagick version up to 3.2.0

data/README CHANGED Viewed

File without changes

data/bin/pdfbeads CHANGED Viewed

@@ -32,7 +32,6 @@
 #######################################################################
 require 'optparse'
-require 'iconv'
 require 'time'
 require 'pdfbeads'
@@ -41,8 +40,12 @@ include PDFBeads
 pdfargs  = Hash[
   :labels         => nil,
   :toc            => nil,
-  :pagelayout     => 'TwoPageRight',
-  :meta           => nil
+  :pagelayout     => 'OneColumn',
+  :meta           => nil,
+  :textpdf        => nil,
+  :delfiles       => false,
+  :debug          => false,
+  :rtl            => false
 ]
 pageargs = Hash[
   :threshold       => 1,
@@ -86,6 +89,23 @@ OptionParser.new() do |opts|
     pdfargs[:pagelayout] = pagelayout
   end
+  opts.on("-R", "--right-to-left",
+                "Set the flag indicating that the preferred reading",
+                "direction for the resulting PDF file is right to left") do |rtl|
+    pdfargs[:rtl] = rtl
+  end
+  opts.on("-T", "--text-pdf PDFFILE",
+                "Specify a PDF file produced by passing the same set",
+		"of files to an OCR program. Pdfbeads will use that file",
+		"to generate the hidden text layer for its PDF output.") do |pdffile|
+    if $has_pdfreader
+      pdfargs[:textpdf] = pdffile
+    else
+      $stderr.puts( "Warning: the pdf/reader extension is not available." )
+      $stderr.puts( "\tthe -T/--text-pdf option is ignored." )
+    end
+  end
   opts.separator "\n"
   opts.separator "Image encoding and compression options:\n"
@@ -146,7 +166,7 @@ OptionParser.new() do |opts|
                 "Compression method for background images. Acceptable",
                 "values are JP2|JPX|JPEG2000, JPG|JPEG or PNG|LOSSLESS.",
                 "JP2 is used by default, unless this format is not",
-                "supported by the available version of ImageMagick" ) do |format|
+                "supported by the available ImageMagick version" ) do |format|
     case format.upcase
     when 'JP2', 'JPX', 'J2K', 'JPEG2000'
       pageargs[:bg_format] = 'JP2'
@@ -169,10 +189,19 @@ OptionParser.new() do |opts|
   opts.separator "\n"
   opts.separator "General options:\n"
+  opts.on("-d", "--delete",
+                "Delete intermediate image files used to create PDF") do |d|
+    pdfargs[:delfiles] = d
+  end
   opts.on("-o", "--output FILE",
                 "Print output to a file instead of STDERR") do |f|
     outpath = f
   end
+  opts.on("-D", "--debug",
+                "Simplify debugging the PDF output by making the hidden",
+                "text layer visible and using uncompressed page streams") do |dbg|
+    pdfargs[:debug] = dbg
+  end
   opts.on_tail("-h", "--help", "Show this message") do
     puts opts
     exit