pdfbeads 1.0.7 → 1.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 198ab9ffc035604ce4cfb3528dbebfb7746d746de1a429089028102003e35480
4
+ data.tar.gz: e32b5a1de30aeb1bb715f0ea6417478f16f9ba9f229268a55ad3fd4ae9bc67ab
5
+ SHA512:
6
+ metadata.gz: 485725e99d06c9216e238626b35ec73d7ea2c64513f5112c24448191b6a09ccef40ef58a24bac291a975c8881779a6e62450e629ebee4c89ec34be9d976b4ccb
7
+ data.tar.gz: f0f53d06628a9433684d77e175cf95446abeebaa54e14e59bb2a637666d06eaa559734a2bac5e43b3d69182d6afb8ef6627d356bcd6c5b3991200a5cd9e8e858
data/COPYING CHANGED
File without changes
data/ChangeLog CHANGED
@@ -39,3 +39,62 @@
39
39
  + An attempt to achive better positioning of the hidden text layer, taking into
40
40
  account not just lines, but also individual words. This should work with hOCR
41
41
  files produced with Cuneiform or Tesseract.
42
+
43
+ 2012 March 5 (Alexey Kryukov) Version 1.0.8
44
+
45
+ * Palette colors in PNG images were treated as signed chars and this could cause
46
+ indexed images to be incorrecty displayed in the resulting PDF.
47
+
48
+ 2012 April 22 (Alexey Kryukov) Version 1.0.9
49
+
50
+ + Add an option allowing to delete image files produced as an intermediate stage
51
+ during the PDF creation process.
52
+
53
+ * Processing indexed images with a small number of colors was broken.
54
+
55
+ * Don't attempt to use 'ocrx_word' elements which contain no bounding box
56
+ data (this should fix the problem with the hOCR output produced by some
57
+ tesseract versions).
58
+
59
+ 2013 Mar 20 (Alexey Kryukov) Version 1.1.0
60
+
61
+ + It is now possible to take the text layer from another PDF document (normally
62
+ this would be a file produced by passing the same set of images to an
63
+ OCR application) and embed it into the pdfbeads output. Warning: this feature
64
+ has been tested so far only with files produced with ABBYY FineReader. It may or
65
+ may not work with PDF files generated by other OCR programs.
66
+
67
+ * The default PDF page layout is now "OneColumn".
68
+
69
+ + Make it possible to specify that the preferred reading direction for the
70
+ PDF document is left-to-right.
71
+
72
+ + In order to simplify debugging of resulting files I have added a special
73
+ flag allowing to make the hidden text layer visible and to disable
74
+ compression in page streams.
75
+
76
+ 2014 Jan 26 (Alexey Kryukov) Version 1.1.1
77
+
78
+ * hpricot is no longer developed, so switch to Nokagiri for hOCR processing.
79
+
80
+ * use String#encode instead of Iconv, when available
81
+
82
+ * Got tired from deps not being automatically resolved, so add them to
83
+ the gemspeck (the preferred method to install on Debian-based distributions
84
+ is now converting pdfbeads*.gem to a deb package with gem2deb).
85
+
86
+ + English HTML documentation added.
87
+
88
+ 2016 Dec 20 (Alexey Kryukov) Version 1.1.2
89
+ * Add a missing String#encode call
90
+
91
+ 2020 Jan 05 (Alexey Kryukov) Version 1.1.2
92
+ * Respect the photometric interpretation of Group4-encoded tiff images
93
+ * Some JPEG images were erroneously treated as LZW-encoded
94
+
95
+ 2020 Jan 24 (Alexey Kryukov) Version 1.1.2
96
+ * Fixed the /BaseState field in the optional content dictionary, which caused files to be rejected by some viewers
97
+
98
+ 2021 Nov 24 (Alexey Kryukov) Version 1.1.3
99
+ * Fixed some errors/warnings produced by newer rmagick versions
100
+ * Bumped the required rmagick version up to 3.2.0
data/README CHANGED
File without changes
data/bin/pdfbeads CHANGED
@@ -32,7 +32,6 @@
32
32
  #######################################################################
33
33
 
34
34
  require 'optparse'
35
- require 'iconv'
36
35
  require 'time'
37
36
 
38
37
  require 'pdfbeads'
@@ -41,8 +40,12 @@ include PDFBeads
41
40
  pdfargs = Hash[
42
41
  :labels => nil,
43
42
  :toc => nil,
44
- :pagelayout => 'TwoPageRight',
45
- :meta => nil
43
+ :pagelayout => 'OneColumn',
44
+ :meta => nil,
45
+ :textpdf => nil,
46
+ :delfiles => false,
47
+ :debug => false,
48
+ :rtl => false
46
49
  ]
47
50
  pageargs = Hash[
48
51
  :threshold => 1,
@@ -86,6 +89,23 @@ OptionParser.new() do |opts|
86
89
 
87
90
  pdfargs[:pagelayout] = pagelayout
88
91
  end
92
+ opts.on("-R", "--right-to-left",
93
+ "Set the flag indicating that the preferred reading",
94
+ "direction for the resulting PDF file is right to left") do |rtl|
95
+ pdfargs[:rtl] = rtl
96
+ end
97
+ opts.on("-T", "--text-pdf PDFFILE",
98
+ "Specify a PDF file produced by passing the same set",
99
+ "of files to an OCR program. Pdfbeads will use that file",
100
+ "to generate the hidden text layer for its PDF output.") do |pdffile|
101
+
102
+ if $has_pdfreader
103
+ pdfargs[:textpdf] = pdffile
104
+ else
105
+ $stderr.puts( "Warning: the pdf/reader extension is not available." )
106
+ $stderr.puts( "\tthe -T/--text-pdf option is ignored." )
107
+ end
108
+ end
89
109
 
90
110
  opts.separator "\n"
91
111
  opts.separator "Image encoding and compression options:\n"
@@ -146,7 +166,7 @@ OptionParser.new() do |opts|
146
166
  "Compression method for background images. Acceptable",
147
167
  "values are JP2|JPX|JPEG2000, JPG|JPEG or PNG|LOSSLESS.",
148
168
  "JP2 is used by default, unless this format is not",
149
- "supported by the available version of ImageMagick" ) do |format|
169
+ "supported by the available ImageMagick version" ) do |format|
150
170
  case format.upcase
151
171
  when 'JP2', 'JPX', 'J2K', 'JPEG2000'
152
172
  pageargs[:bg_format] = 'JP2'
@@ -169,10 +189,19 @@ OptionParser.new() do |opts|
169
189
  opts.separator "\n"
170
190
  opts.separator "General options:\n"
171
191
 
192
+ opts.on("-d", "--delete",
193
+ "Delete intermediate image files used to create PDF") do |d|
194
+ pdfargs[:delfiles] = d
195
+ end
172
196
  opts.on("-o", "--output FILE",
173
197
  "Print output to a file instead of STDERR") do |f|
174
198
  outpath = f
175
199
  end
200
+ opts.on("-D", "--debug",
201
+ "Simplify debugging the PDF output by making the hidden",
202
+ "text layer visible and using uncompressed page streams") do |dbg|
203
+ pdfargs[:debug] = dbg
204
+ end
176
205
  opts.on_tail("-h", "--help", "Show this message") do
177
206
  puts opts
178
207
  exit