pdfbeads 1.0.3 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog +8 -0
- data/README +5 -0
- data/bin/pdfbeads +2 -2
- data/lib/pdfbeads/pdfbuilder.rb +19 -4
- data/lib/pdfbeads/pdffont.rb +9 -6
- data/lib/pdfbeads/pdflabels.rb +1 -1
- data/lib/pdfbeads/pdfpage.rb +1 -0
- metadata +6 -6
data/ChangeLog
CHANGED
@@ -18,3 +18,11 @@
|
|
18
18
|
|
19
19
|
* TIFF files with an embedded EXIF block were incorrectly processed
|
20
20
|
by ImageInspector.
|
21
|
+
|
22
|
+
2012 February 4 (Alexey Kryukov) Version 1.0.5
|
23
|
+
|
24
|
+
* If JPEG2000 compression was requested but not available,
|
25
|
+
JPEG files were still written with the 'JP2' extension.
|
26
|
+
|
27
|
+
* Some tweaks to minimize the effect of page labels being inconsistently handled
|
28
|
+
in various PDF viewers (prefer ISO-8859-1 strings if possible).
|
data/README
CHANGED
@@ -51,3 +51,8 @@ Note that PDFBeads is intended for creating PDF files from previously
|
|
51
51
|
processed images, and so it can't done some operations (e. g. converting
|
52
52
|
color or grayscale scans to B&W) which should be typically performed with
|
53
53
|
a special scan processing application, such as ScanTailor.
|
54
|
+
|
55
|
+
PDFBeads requires RMagick (the Ruby bindings for the popular Magick++ image
|
56
|
+
processing library). The hpricot extension is not required, but highly
|
57
|
+
recommended, as without it PDFBeads would not be able to read data from hOCR
|
58
|
+
files.
|
data/bin/pdfbeads
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#!/usr/bin/env
|
1
|
+
#!/usr/bin/env ruby
|
2
2
|
# encoding: UTF-8
|
3
3
|
|
4
4
|
######################################################################
|
@@ -9,7 +9,7 @@
|
|
9
9
|
# Unlike other PDF creation tools, this utility attempts to implement
|
10
10
|
# the approach typically used for DjVu books. Its key feature is
|
11
11
|
# separating scanned text (typically black, but indexed images with
|
12
|
-
# a small number of colors are also accepted) from halftone images
|
12
|
+
# a small number of colors are also accepted) from halftone images
|
13
13
|
# placed into a background layer.
|
14
14
|
#
|
15
15
|
# Copyright (C) 2010 Alexey Kryukov (amkryukov@gmail.com).
|
data/lib/pdfbeads/pdfbuilder.rb
CHANGED
@@ -145,10 +145,25 @@ class PDFBeads::PDFBuilder
|
|
145
145
|
if labels != nil and labels.length > 0
|
146
146
|
nTree = "<</Nums[\n"
|
147
147
|
labels.each do |rng|
|
148
|
-
ltitl = Iconv.iconv( "utf-16be", "utf-8", rng[:prefix] ).first.to_text
|
149
|
-
|
150
148
|
nTree << "#{rng[:first]} << "
|
151
|
-
|
149
|
+
if rng.has_key? :prefix
|
150
|
+
begin
|
151
|
+
# If possible, use iso8859-1 (aka PDFDocEncoding) for page labels:
|
152
|
+
# it is at least guaranteed to be safe
|
153
|
+
ltitl = Iconv.iconv( "iso8859-1", "utf-8", rng[:prefix] ).first
|
154
|
+
nTree << "/P (#{ltitl.to_text}) "
|
155
|
+
rescue Iconv::InvalidCharacter, Iconv::IllegalSequence
|
156
|
+
ltitl = Iconv.iconv( "utf-16be", "utf-8", rng[:prefix] ).first
|
157
|
+
# If there is no number (just prefix) then put a zero character after the prefix:
|
158
|
+
# this makes acroread happy, but prevents displaying the number in evince
|
159
|
+
unless rng.has_key? :style
|
160
|
+
nTree << "/P (\xFE\xFF#{ltitl.to_text}\x00\x00) "
|
161
|
+
# Otherwise put a formally correct Unicode string, which, however, may stumble acroread
|
162
|
+
else
|
163
|
+
nTree << "/P (\xFE\xFF#{ltitl.to_text}) "
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
152
167
|
nTree << "/S /#{rng[:style]} " if rng.has_key? :style
|
153
168
|
nTree << "/St #{rng[:start]}" if rng.has_key? :start
|
154
169
|
nTree << ">>\n"
|
@@ -342,7 +357,7 @@ class PDFBeads::PDFBuilder
|
|
342
357
|
fin.each do |fl|
|
343
358
|
next if /^\#/.match( fl )
|
344
359
|
|
345
|
-
if /^\/?([A-Za-z]+)[
|
360
|
+
if /^\/?([A-Za-z]+)[ ]*:[ ]+\"(.*)\"/.match( fl )
|
346
361
|
key = $1
|
347
362
|
if keys.include? key
|
348
363
|
begin
|
data/lib/pdfbeads/pdffont.rb
CHANGED
@@ -396,9 +396,6 @@ class PDFBeads::PDFBuilder::FontDataProvider
|
|
396
396
|
0xFB01 => ["/fi", 556],
|
397
397
|
0xFB02 => ["/fl", 556],
|
398
398
|
]
|
399
|
-
@chardata.default = proc do |fd, uni|
|
400
|
-
fd[uni] = [ sprintf( "/uni%04X",uni ), 500 ]
|
401
|
-
end
|
402
399
|
|
403
400
|
@encodings = Array.new()
|
404
401
|
@wlists = Array.new()
|
@@ -410,7 +407,7 @@ class PDFBeads::PDFBuilder::FontDataProvider
|
|
410
407
|
w = 0.0
|
411
408
|
line.each_char do |uc|
|
412
409
|
begin
|
413
|
-
w +=
|
410
|
+
w += chardata( uc.ord )[1] * size / 1000.0
|
414
411
|
rescue
|
415
412
|
rawbytes = uc.unpack( 'C*' )
|
416
413
|
bs = ''
|
@@ -427,7 +424,7 @@ class PDFBeads::PDFBuilder::FontDataProvider
|
|
427
424
|
def getEncoding( enc )
|
428
425
|
ret = Array.new()
|
429
426
|
enc.each do |char|
|
430
|
-
ret <<
|
427
|
+
ret << chardata( char.ord )[0]
|
431
428
|
end
|
432
429
|
ret
|
433
430
|
end
|
@@ -437,7 +434,7 @@ class PDFBeads::PDFBuilder::FontDataProvider
|
|
437
434
|
def getWidths( enc )
|
438
435
|
ret = Array.new()
|
439
436
|
enc.each do |char|
|
440
|
-
ret <<
|
437
|
+
ret << chardata( char.ord )[1]
|
441
438
|
end
|
442
439
|
ret
|
443
440
|
end
|
@@ -530,4 +527,10 @@ class PDFBeads::PDFBuilder::FontDataProvider
|
|
530
527
|
], Zlib::Deflate.deflate( cmap,9 ) )
|
531
528
|
toUnicode
|
532
529
|
end
|
530
|
+
|
531
|
+
def chardata( uni )
|
532
|
+
@chardata.fetch( uni ) do |u|
|
533
|
+
[ sprintf( "/uni%04X",uni ), 500 ]
|
534
|
+
end
|
535
|
+
end
|
533
536
|
end
|
data/lib/pdfbeads/pdflabels.rb
CHANGED
data/lib/pdfbeads/pdfpage.rb
CHANGED
@@ -259,6 +259,7 @@ class PDFBeads::PageDataProvider < Array
|
|
259
259
|
$stderr.puts( "This version of ImageMagick doesn't support JPEG2000 compression." )
|
260
260
|
$stderr.puts( "\tI'll use JPEG compression instead." )
|
261
261
|
bgf = 'JPG'
|
262
|
+
bgpath = "#{@basename}.bg." << bgf.downcase
|
262
263
|
end
|
263
264
|
|
264
265
|
writeImage( img,bgpath,bgf )
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdfbeads
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
5
|
-
prerelease:
|
4
|
+
hash: 29
|
5
|
+
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 1.0.
|
9
|
+
- 5
|
10
|
+
version: 1.0.5
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Alexey Kryukov
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date:
|
18
|
+
date: 2012-02-04 00:00:00 +04:00
|
19
19
|
default_executable: pdfbeads
|
20
20
|
dependencies: []
|
21
21
|
|
@@ -74,7 +74,7 @@ requirements:
|
|
74
74
|
- RMagick, v2.13.0 or greater
|
75
75
|
- Hpricot, v0.8.3 or greater
|
76
76
|
rubyforge_project: PDFBeads
|
77
|
-
rubygems_version: 1.
|
77
|
+
rubygems_version: 1.5.0
|
78
78
|
signing_key:
|
79
79
|
specification_version: 3
|
80
80
|
summary: PDFBeads -- convert scanned images to a single PDF file.
|