pdfbeads 1.0.3 → 1.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +8 -0
- data/README +5 -0
- data/bin/pdfbeads +2 -2
- data/lib/pdfbeads/pdfbuilder.rb +19 -4
- data/lib/pdfbeads/pdffont.rb +9 -6
- data/lib/pdfbeads/pdflabels.rb +1 -1
- data/lib/pdfbeads/pdfpage.rb +1 -0
- metadata +6 -6
data/ChangeLog
CHANGED
@@ -18,3 +18,11 @@
|
|
18
18
|
|
19
19
|
* TIFF files with an embedded EXIF block were incorrectly processed
|
20
20
|
by ImageInspector.
|
21
|
+
|
22
|
+
2012 February 4 (Alexey Kryukov) Version 1.0.5
|
23
|
+
|
24
|
+
* If JPEG2000 compression was requested but not available,
|
25
|
+
JPEG files were still written with the 'JP2' extension.
|
26
|
+
|
27
|
+
* Some tweaks to minimize the effect of page labels being inconsistently handled
|
28
|
+
in various PDF viewers (prefer ISO-8859-1 strings if possible).
|
data/README
CHANGED
@@ -51,3 +51,8 @@ Note that PDFBeads is intended for creating PDF files from previously
|
|
51
51
|
processed images, and so it can't done some operations (e. g. converting
|
52
52
|
color or grayscale scans to B&W) which should be typically performed with
|
53
53
|
a special scan processing application, such as ScanTailor.
|
54
|
+
|
55
|
+
PDFBeads requires RMagick (the Ruby bindings for the popular Magick++ image
|
56
|
+
processing library). The hpricot extension is not required, but highly
|
57
|
+
recommended, as without it PDFBeads would not be able to read data from hOCR
|
58
|
+
files.
|
data/bin/pdfbeads
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#!/usr/bin/env
|
1
|
+
#!/usr/bin/env ruby
|
2
2
|
# encoding: UTF-8
|
3
3
|
|
4
4
|
######################################################################
|
@@ -9,7 +9,7 @@
|
|
9
9
|
# Unlike other PDF creation tools, this utility attempts to implement
|
10
10
|
# the approach typically used for DjVu books. Its key feature is
|
11
11
|
# separating scanned text (typically black, but indexed images with
|
12
|
-
# a small number of colors are also accepted) from halftone images
|
12
|
+
# a small number of colors are also accepted) from halftone images
|
13
13
|
# placed into a background layer.
|
14
14
|
#
|
15
15
|
# Copyright (C) 2010 Alexey Kryukov (amkryukov@gmail.com).
|
data/lib/pdfbeads/pdfbuilder.rb
CHANGED
@@ -145,10 +145,25 @@ class PDFBeads::PDFBuilder
|
|
145
145
|
if labels != nil and labels.length > 0
|
146
146
|
nTree = "<</Nums[\n"
|
147
147
|
labels.each do |rng|
|
148
|
-
ltitl = Iconv.iconv( "utf-16be", "utf-8", rng[:prefix] ).first.to_text
|
149
|
-
|
150
148
|
nTree << "#{rng[:first]} << "
|
151
|
-
|
149
|
+
if rng.has_key? :prefix
|
150
|
+
begin
|
151
|
+
# If possible, use iso8859-1 (aka PDFDocEncoding) for page labels:
|
152
|
+
# it is at least guaranteed to be safe
|
153
|
+
ltitl = Iconv.iconv( "iso8859-1", "utf-8", rng[:prefix] ).first
|
154
|
+
nTree << "/P (#{ltitl.to_text}) "
|
155
|
+
rescue Iconv::InvalidCharacter, Iconv::IllegalSequence
|
156
|
+
ltitl = Iconv.iconv( "utf-16be", "utf-8", rng[:prefix] ).first
|
157
|
+
# If there is no number (just prefix) then put a zero character after the prefix:
|
158
|
+
# this makes acroread happy, but prevents displaying the number in evince
|
159
|
+
unless rng.has_key? :style
|
160
|
+
nTree << "/P (\xFE\xFF#{ltitl.to_text}\x00\x00) "
|
161
|
+
# Otherwise put a formally correct Unicode string, which, however, may stumble acroread
|
162
|
+
else
|
163
|
+
nTree << "/P (\xFE\xFF#{ltitl.to_text}) "
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
152
167
|
nTree << "/S /#{rng[:style]} " if rng.has_key? :style
|
153
168
|
nTree << "/St #{rng[:start]}" if rng.has_key? :start
|
154
169
|
nTree << ">>\n"
|
@@ -342,7 +357,7 @@ class PDFBeads::PDFBuilder
|
|
342
357
|
fin.each do |fl|
|
343
358
|
next if /^\#/.match( fl )
|
344
359
|
|
345
|
-
if /^\/?([A-Za-z]+)[
|
360
|
+
if /^\/?([A-Za-z]+)[ ]*:[ ]+\"(.*)\"/.match( fl )
|
346
361
|
key = $1
|
347
362
|
if keys.include? key
|
348
363
|
begin
|
data/lib/pdfbeads/pdffont.rb
CHANGED
@@ -396,9 +396,6 @@ class PDFBeads::PDFBuilder::FontDataProvider
|
|
396
396
|
0xFB01 => ["/fi", 556],
|
397
397
|
0xFB02 => ["/fl", 556],
|
398
398
|
]
|
399
|
-
@chardata.default = proc do |fd, uni|
|
400
|
-
fd[uni] = [ sprintf( "/uni%04X",uni ), 500 ]
|
401
|
-
end
|
402
399
|
|
403
400
|
@encodings = Array.new()
|
404
401
|
@wlists = Array.new()
|
@@ -410,7 +407,7 @@ class PDFBeads::PDFBuilder::FontDataProvider
|
|
410
407
|
w = 0.0
|
411
408
|
line.each_char do |uc|
|
412
409
|
begin
|
413
|
-
w +=
|
410
|
+
w += chardata( uc.ord )[1] * size / 1000.0
|
414
411
|
rescue
|
415
412
|
rawbytes = uc.unpack( 'C*' )
|
416
413
|
bs = ''
|
@@ -427,7 +424,7 @@ class PDFBeads::PDFBuilder::FontDataProvider
|
|
427
424
|
def getEncoding( enc )
|
428
425
|
ret = Array.new()
|
429
426
|
enc.each do |char|
|
430
|
-
ret <<
|
427
|
+
ret << chardata( char.ord )[0]
|
431
428
|
end
|
432
429
|
ret
|
433
430
|
end
|
@@ -437,7 +434,7 @@ class PDFBeads::PDFBuilder::FontDataProvider
|
|
437
434
|
def getWidths( enc )
|
438
435
|
ret = Array.new()
|
439
436
|
enc.each do |char|
|
440
|
-
ret <<
|
437
|
+
ret << chardata( char.ord )[1]
|
441
438
|
end
|
442
439
|
ret
|
443
440
|
end
|
@@ -530,4 +527,10 @@ class PDFBeads::PDFBuilder::FontDataProvider
|
|
530
527
|
], Zlib::Deflate.deflate( cmap,9 ) )
|
531
528
|
toUnicode
|
532
529
|
end
|
530
|
+
|
531
|
+
def chardata( uni )
|
532
|
+
@chardata.fetch( uni ) do |u|
|
533
|
+
[ sprintf( "/uni%04X",uni ), 500 ]
|
534
|
+
end
|
535
|
+
end
|
533
536
|
end
|
data/lib/pdfbeads/pdflabels.rb
CHANGED
data/lib/pdfbeads/pdfpage.rb
CHANGED
@@ -259,6 +259,7 @@ class PDFBeads::PageDataProvider < Array
|
|
259
259
|
$stderr.puts( "This version of ImageMagick doesn't support JPEG2000 compression." )
|
260
260
|
$stderr.puts( "\tI'll use JPEG compression instead." )
|
261
261
|
bgf = 'JPG'
|
262
|
+
bgpath = "#{@basename}.bg." << bgf.downcase
|
262
263
|
end
|
263
264
|
|
264
265
|
writeImage( img,bgpath,bgf )
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdfbeads
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
5
|
-
prerelease:
|
4
|
+
hash: 29
|
5
|
+
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 1.0.
|
9
|
+
- 5
|
10
|
+
version: 1.0.5
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Alexey Kryukov
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date:
|
18
|
+
date: 2012-02-04 00:00:00 +04:00
|
19
19
|
default_executable: pdfbeads
|
20
20
|
dependencies: []
|
21
21
|
|
@@ -74,7 +74,7 @@ requirements:
|
|
74
74
|
- RMagick, v2.13.0 or greater
|
75
75
|
- Hpricot, v0.8.3 or greater
|
76
76
|
rubyforge_project: PDFBeads
|
77
|
-
rubygems_version: 1.
|
77
|
+
rubygems_version: 1.5.0
|
78
78
|
signing_key:
|
79
79
|
specification_version: 3
|
80
80
|
summary: PDFBeads -- convert scanned images to a single PDF file.
|