pdfbeads 1.0.3 → 1.0.5

Sign up to get free protection for your applications and to get access to all the features.
data/ChangeLog CHANGED
@@ -18,3 +18,11 @@
18
18
 
19
19
  * TIFF files with an embedded EXIF block were incorrectly processed
20
20
  by ImageInspector.
21
+
22
+ 2012 February 4 (Alexey Kryukov) Version 1.0.5
23
+
24
+ * If JPEG2000 compression was requested but not available,
25
+ JPEG files were still written with the 'JP2' extension.
26
+
27
+ * Some tweaks to minimize the effect of page labels being inconsistently handled
28
+ in various PDF viewers (prefer ISO-8859-1 strings if possible).
data/README CHANGED
@@ -51,3 +51,8 @@ Note that PDFBeads is intended for creating PDF files from previously
51
51
  processed images, and so it can't done some operations (e. g. converting
52
52
  color or grayscale scans to B&W) which should be typically performed with
53
53
  a special scan processing application, such as ScanTailor.
54
+
55
+ PDFBeads requires RMagick (the Ruby bindings for the popular Magick++ image
56
+ processing library). The hpricot extension is not required, but highly
57
+ recommended, as without it PDFBeads would not be able to read data from hOCR
58
+ files.
@@ -1,4 +1,4 @@
1
- #!/usr/bin/env ruby1.9.1
1
+ #!/usr/bin/env ruby
2
2
  # encoding: UTF-8
3
3
 
4
4
  ######################################################################
@@ -9,7 +9,7 @@
9
9
  # Unlike other PDF creation tools, this utility attempts to implement
10
10
  # the approach typically used for DjVu books. Its key feature is
11
11
  # separating scanned text (typically black, but indexed images with
12
- # a small number of colors are also accepted) from halftone images
12
+ # a small number of colors are also accepted) from halftone images
13
13
  # placed into a background layer.
14
14
  #
15
15
  # Copyright (C) 2010 Alexey Kryukov (amkryukov@gmail.com).
@@ -145,10 +145,25 @@ class PDFBeads::PDFBuilder
145
145
  if labels != nil and labels.length > 0
146
146
  nTree = "<</Nums[\n"
147
147
  labels.each do |rng|
148
- ltitl = Iconv.iconv( "utf-16be", "utf-8", rng[:prefix] ).first.to_text
149
-
150
148
  nTree << "#{rng[:first]} << "
151
- nTree << "/P (\xFE\xFF#{ltitl.to_text}) " if rng.has_key? :prefix
149
+ if rng.has_key? :prefix
150
+ begin
151
+ # If possible, use iso8859-1 (aka PDFDocEncoding) for page labels:
152
+ # it is at least guaranteed to be safe
153
+ ltitl = Iconv.iconv( "iso8859-1", "utf-8", rng[:prefix] ).first
154
+ nTree << "/P (#{ltitl.to_text}) "
155
+ rescue Iconv::InvalidCharacter, Iconv::IllegalSequence
156
+ ltitl = Iconv.iconv( "utf-16be", "utf-8", rng[:prefix] ).first
157
+ # If there is no number (just prefix) then put a zero character after the prefix:
158
+ # this makes acroread happy, but prevents displaying the number in evince
159
+ unless rng.has_key? :style
160
+ nTree << "/P (\xFE\xFF#{ltitl.to_text}\x00\x00) "
161
+ # Otherwise put a formally correct Unicode string, which, however, may stumble acroread
162
+ else
163
+ nTree << "/P (\xFE\xFF#{ltitl.to_text}) "
164
+ end
165
+ end
166
+ end
152
167
  nTree << "/S /#{rng[:style]} " if rng.has_key? :style
153
168
  nTree << "/St #{rng[:start]}" if rng.has_key? :start
154
169
  nTree << ">>\n"
@@ -342,7 +357,7 @@ class PDFBeads::PDFBuilder
342
357
  fin.each do |fl|
343
358
  next if /^\#/.match( fl )
344
359
 
345
- if /^\/?([A-Za-z]+)[ \t]*:[ \t]+\"(.*)\"/.match( fl )
360
+ if /^\/?([A-Za-z]+)[ ]*:[ ]+\"(.*)\"/.match( fl )
346
361
  key = $1
347
362
  if keys.include? key
348
363
  begin
@@ -396,9 +396,6 @@ class PDFBeads::PDFBuilder::FontDataProvider
396
396
  0xFB01 => ["/fi", 556],
397
397
  0xFB02 => ["/fl", 556],
398
398
  ]
399
- @chardata.default = proc do |fd, uni|
400
- fd[uni] = [ sprintf( "/uni%04X",uni ), 500 ]
401
- end
402
399
 
403
400
  @encodings = Array.new()
404
401
  @wlists = Array.new()
@@ -410,7 +407,7 @@ class PDFBeads::PDFBuilder::FontDataProvider
410
407
  w = 0.0
411
408
  line.each_char do |uc|
412
409
  begin
413
- w += @chardata[uc.ord][1] * size / 1000.0
410
+ w += chardata( uc.ord )[1] * size / 1000.0
414
411
  rescue
415
412
  rawbytes = uc.unpack( 'C*' )
416
413
  bs = ''
@@ -427,7 +424,7 @@ class PDFBeads::PDFBuilder::FontDataProvider
427
424
  def getEncoding( enc )
428
425
  ret = Array.new()
429
426
  enc.each do |char|
430
- ret << @chardata[char.ord][0]
427
+ ret << chardata( char.ord )[0]
431
428
  end
432
429
  ret
433
430
  end
@@ -437,7 +434,7 @@ class PDFBeads::PDFBuilder::FontDataProvider
437
434
  def getWidths( enc )
438
435
  ret = Array.new()
439
436
  enc.each do |char|
440
- ret << @chardata[char.ord][1]
437
+ ret << chardata( char.ord )[1]
441
438
  end
442
439
  ret
443
440
  end
@@ -530,4 +527,10 @@ class PDFBeads::PDFBuilder::FontDataProvider
530
527
  ], Zlib::Deflate.deflate( cmap,9 ) )
531
528
  toUnicode
532
529
  end
530
+
531
+ def chardata( uni )
532
+ @chardata.fetch( uni ) do |u|
533
+ [ sprintf( "/uni%04X",uni ), 500 ]
534
+ end
535
+ end
533
536
  end
@@ -107,7 +107,7 @@ class PDFBeads::PDFBuilder::PDFLabels < Array
107
107
  ]
108
108
  res = ''
109
109
 
110
- numerals.keys.sort{ |a,b| b <=> a }.reverse.each do |val|
110
+ numerals.keys.sort{ |a,b| b <=> a }.each do |val|
111
111
  while num >= val
112
112
  res << numerals[val]
113
113
  num -= val
@@ -259,6 +259,7 @@ class PDFBeads::PageDataProvider < Array
259
259
  $stderr.puts( "This version of ImageMagick doesn't support JPEG2000 compression." )
260
260
  $stderr.puts( "\tI'll use JPEG compression instead." )
261
261
  bgf = 'JPG'
262
+ bgpath = "#{@basename}.bg." << bgf.downcase
262
263
  end
263
264
 
264
265
  writeImage( img,bgpath,bgf )
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdfbeads
3
3
  version: !ruby/object:Gem::Version
4
- hash: 17
5
- prerelease: false
4
+ hash: 29
5
+ prerelease:
6
6
  segments:
7
7
  - 1
8
8
  - 0
9
- - 3
10
- version: 1.0.3
9
+ - 5
10
+ version: 1.0.5
11
11
  platform: ruby
12
12
  authors:
13
13
  - Alexey Kryukov
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-11-16 00:00:00 +03:00
18
+ date: 2012-02-04 00:00:00 +04:00
19
19
  default_executable: pdfbeads
20
20
  dependencies: []
21
21
 
@@ -74,7 +74,7 @@ requirements:
74
74
  - RMagick, v2.13.0 or greater
75
75
  - Hpricot, v0.8.3 or greater
76
76
  rubyforge_project: PDFBeads
77
- rubygems_version: 1.3.7
77
+ rubygems_version: 1.5.0
78
78
  signing_key:
79
79
  specification_version: 3
80
80
  summary: PDFBeads -- convert scanned images to a single PDF file.