pdfbeads 1.0.7 → 1.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/COPYING +0 -0
- data/ChangeLog +59 -0
- data/README +0 -0
- data/bin/pdfbeads +33 -4
- data/doc/pdfbeads.en.html +548 -0
- data/doc/pdfbeads.ru.html +74 -34
- data/lib/imageinspector.rb +24 -21
- data/lib/pdfbeads/pdfbuilder.rb +308 -87
- data/lib/pdfbeads/pdfdoc.rb +0 -0
- data/lib/pdfbeads/pdffont.rb +0 -0
- data/lib/pdfbeads/pdflabels.rb +0 -0
- data/lib/pdfbeads/pdfpage.rb +45 -32
- data/lib/pdfbeads/pdftoc.rb +7 -3
- data/lib/pdfbeads.rb +18 -7
- metadata +92 -61
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 198ab9ffc035604ce4cfb3528dbebfb7746d746de1a429089028102003e35480
|
4
|
+
data.tar.gz: e32b5a1de30aeb1bb715f0ea6417478f16f9ba9f229268a55ad3fd4ae9bc67ab
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 485725e99d06c9216e238626b35ec73d7ea2c64513f5112c24448191b6a09ccef40ef58a24bac291a975c8881779a6e62450e629ebee4c89ec34be9d976b4ccb
|
7
|
+
data.tar.gz: f0f53d06628a9433684d77e175cf95446abeebaa54e14e59bb2a637666d06eaa559734a2bac5e43b3d69182d6afb8ef6627d356bcd6c5b3991200a5cd9e8e858
|
data/COPYING
CHANGED
File without changes
|
data/ChangeLog
CHANGED
@@ -39,3 +39,62 @@
|
|
39
39
|
+ An attempt to achive better positioning of the hidden text layer, taking into
|
40
40
|
account not just lines, but also individual words. This should work with hOCR
|
41
41
|
files produced with Cuneiform or Tesseract.
|
42
|
+
|
43
|
+
2012 March 5 (Alexey Kryukov) Version 1.0.8
|
44
|
+
|
45
|
+
* Palette colors in PNG images were treated as signed chars and this could cause
|
46
|
+
indexed images to be incorrecty displayed in the resulting PDF.
|
47
|
+
|
48
|
+
2012 April 22 (Alexey Kryukov) Version 1.0.9
|
49
|
+
|
50
|
+
+ Add an option allowing to delete image files produced as an intermediate stage
|
51
|
+
during the PDF creation process.
|
52
|
+
|
53
|
+
* Processing indexed images with a small number of colors was broken.
|
54
|
+
|
55
|
+
* Don't attempt to use 'ocrx_word' elements which contain no bounding box
|
56
|
+
data (this should fix the problem with the hOCR output produced by some
|
57
|
+
tesseract versions).
|
58
|
+
|
59
|
+
2013 Mar 20 (Alexey Kryukov) Version 1.1.0
|
60
|
+
|
61
|
+
+ It is now possible to take the text layer from another PDF document (normally
|
62
|
+
this would be a file produced by passing the same set of images to an
|
63
|
+
OCR application) and embed it into the pdfbeads output. Warning: this feature
|
64
|
+
has been tested so far only with files produced with ABBYY FineReader. It may or
|
65
|
+
may not work with PDF files generated by other OCR programs.
|
66
|
+
|
67
|
+
* The default PDF page layout is now "OneColumn".
|
68
|
+
|
69
|
+
+ Make it possible to specify that the preferred reading direction for the
|
70
|
+
PDF document is left-to-right.
|
71
|
+
|
72
|
+
+ In order to simplify debugging of resulting files I have added a special
|
73
|
+
flag allowing to make the hidden text layer visible and to disable
|
74
|
+
compression in page streams.
|
75
|
+
|
76
|
+
2014 Jan 26 (Alexey Kryukov) Version 1.1.1
|
77
|
+
|
78
|
+
* hpricot is no longer developed, so switch to Nokagiri for hOCR processing.
|
79
|
+
|
80
|
+
* use String#encode instead of Iconv, when available
|
81
|
+
|
82
|
+
* Got tired from deps not being automatically resolved, so add them to
|
83
|
+
the gemspeck (the preferred method to install on Debian-based distributions
|
84
|
+
is now converting pdfbeads*.gem to a deb package with gem2deb).
|
85
|
+
|
86
|
+
+ English HTML documentation added.
|
87
|
+
|
88
|
+
2016 Dec 20 (Alexey Kryukov) Version 1.1.2
|
89
|
+
* Add a missing String#encode call
|
90
|
+
|
91
|
+
2020 Jan 05 (Alexey Kryukov) Version 1.1.2
|
92
|
+
* Respect the photometric interpretation of Group4-encoded tiff images
|
93
|
+
* Some JPEG images were erroneously treated as LZW-encoded
|
94
|
+
|
95
|
+
2020 Jan 24 (Alexey Kryukov) Version 1.1.2
|
96
|
+
* Fixed the /BaseState field in the optional content dictionary, which caused files to be rejected by some viewers
|
97
|
+
|
98
|
+
2021 Nov 24 (Alexey Kryukov) Version 1.1.3
|
99
|
+
* Fixed some errors/warnings produced by newer rmagick versions
|
100
|
+
* Bumped the required rmagick version up to 3.2.0
|
data/README
CHANGED
File without changes
|
data/bin/pdfbeads
CHANGED
@@ -32,7 +32,6 @@
|
|
32
32
|
#######################################################################
|
33
33
|
|
34
34
|
require 'optparse'
|
35
|
-
require 'iconv'
|
36
35
|
require 'time'
|
37
36
|
|
38
37
|
require 'pdfbeads'
|
@@ -41,8 +40,12 @@ include PDFBeads
|
|
41
40
|
pdfargs = Hash[
|
42
41
|
:labels => nil,
|
43
42
|
:toc => nil,
|
44
|
-
:pagelayout => '
|
45
|
-
:meta => nil
|
43
|
+
:pagelayout => 'OneColumn',
|
44
|
+
:meta => nil,
|
45
|
+
:textpdf => nil,
|
46
|
+
:delfiles => false,
|
47
|
+
:debug => false,
|
48
|
+
:rtl => false
|
46
49
|
]
|
47
50
|
pageargs = Hash[
|
48
51
|
:threshold => 1,
|
@@ -86,6 +89,23 @@ OptionParser.new() do |opts|
|
|
86
89
|
|
87
90
|
pdfargs[:pagelayout] = pagelayout
|
88
91
|
end
|
92
|
+
opts.on("-R", "--right-to-left",
|
93
|
+
"Set the flag indicating that the preferred reading",
|
94
|
+
"direction for the resulting PDF file is right to left") do |rtl|
|
95
|
+
pdfargs[:rtl] = rtl
|
96
|
+
end
|
97
|
+
opts.on("-T", "--text-pdf PDFFILE",
|
98
|
+
"Specify a PDF file produced by passing the same set",
|
99
|
+
"of files to an OCR program. Pdfbeads will use that file",
|
100
|
+
"to generate the hidden text layer for its PDF output.") do |pdffile|
|
101
|
+
|
102
|
+
if $has_pdfreader
|
103
|
+
pdfargs[:textpdf] = pdffile
|
104
|
+
else
|
105
|
+
$stderr.puts( "Warning: the pdf/reader extension is not available." )
|
106
|
+
$stderr.puts( "\tthe -T/--text-pdf option is ignored." )
|
107
|
+
end
|
108
|
+
end
|
89
109
|
|
90
110
|
opts.separator "\n"
|
91
111
|
opts.separator "Image encoding and compression options:\n"
|
@@ -146,7 +166,7 @@ OptionParser.new() do |opts|
|
|
146
166
|
"Compression method for background images. Acceptable",
|
147
167
|
"values are JP2|JPX|JPEG2000, JPG|JPEG or PNG|LOSSLESS.",
|
148
168
|
"JP2 is used by default, unless this format is not",
|
149
|
-
"supported by the available version
|
169
|
+
"supported by the available ImageMagick version" ) do |format|
|
150
170
|
case format.upcase
|
151
171
|
when 'JP2', 'JPX', 'J2K', 'JPEG2000'
|
152
172
|
pageargs[:bg_format] = 'JP2'
|
@@ -169,10 +189,19 @@ OptionParser.new() do |opts|
|
|
169
189
|
opts.separator "\n"
|
170
190
|
opts.separator "General options:\n"
|
171
191
|
|
192
|
+
opts.on("-d", "--delete",
|
193
|
+
"Delete intermediate image files used to create PDF") do |d|
|
194
|
+
pdfargs[:delfiles] = d
|
195
|
+
end
|
172
196
|
opts.on("-o", "--output FILE",
|
173
197
|
"Print output to a file instead of STDERR") do |f|
|
174
198
|
outpath = f
|
175
199
|
end
|
200
|
+
opts.on("-D", "--debug",
|
201
|
+
"Simplify debugging the PDF output by making the hidden",
|
202
|
+
"text layer visible and using uncompressed page streams") do |dbg|
|
203
|
+
pdfargs[:debug] = dbg
|
204
|
+
end
|
176
205
|
opts.on_tail("-h", "--help", "Show this message") do
|
177
206
|
puts opts
|
178
207
|
exit
|