pdfbeads 1.0.9 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/ChangeLog +23 -0
- data/bin/pdfbeads +28 -3
- data/doc/pdfbeads.en.html +552 -0
- data/doc/pdfbeads.ru.html +74 -34
- data/lib/pdfbeads.rb +17 -6
- data/lib/pdfbeads/pdfbuilder.rb +254 -74
- data/lib/pdfbeads/pdfpage.rb +8 -8
- data/lib/pdfbeads/pdftoc.rb +7 -3
- metadata +80 -48
data/lib/pdfbeads/pdfpage.rb
CHANGED
@@ -8,7 +8,7 @@
|
|
8
8
|
# Unlike other PDF creation tools, this utility attempts to implement
|
9
9
|
# the approach typically used for DjVu books. Its key feature is
|
10
10
|
# separating scanned text (typically black, but indexed images with
|
11
|
-
# a small number of colors are also accepted) from halftone images
|
11
|
+
# a small number of colors are also accepted) from halftone images
|
12
12
|
# placed into a background layer.
|
13
13
|
#
|
14
14
|
# Copyright (C) 2010 Alexey Kryukov (amkryukov@gmail.com).
|
@@ -30,7 +30,7 @@
|
|
30
30
|
#
|
31
31
|
#######################################################################
|
32
32
|
|
33
|
-
# Represents a set of page images
|
33
|
+
# Represents a set of page images accompanied with auxiliary files
|
34
34
|
# needed to build a PDF document.
|
35
35
|
class PDFBeads::PageDataProvider < Array
|
36
36
|
|
@@ -96,7 +96,7 @@ class PDFBeads::PageDataProvider < Array
|
|
96
96
|
|
97
97
|
$stderr.puts( "Prepared data for processing #{@name}\n" )
|
98
98
|
if insp.nextImage
|
99
|
-
$stderr.puts( "Warning: #{@name} contains multiple images, but only the first one")
|
99
|
+
$stderr.puts( "Warning: #{@name} contains multiple images, but only the first one")
|
100
100
|
$stderr.puts( "\tis going to be used\n" )
|
101
101
|
end
|
102
102
|
ret
|
@@ -117,8 +117,8 @@ class PDFBeads::PageDataProvider < Array
|
|
117
117
|
@bg_layer = bgpath unless bgpath.nil?
|
118
118
|
|
119
119
|
# If updating auxiliary files is requested and the base image is
|
120
|
-
# either
|
121
|
-
# contain any elements which should be
|
120
|
+
# either bitonal or indexed with just a few colors (i. e. doesn't
|
121
|
+
# contain any elements which should be placed to the background layer),
|
122
122
|
# then the *.color.* image (if present) takes priority over any existing
|
123
123
|
# *.bg.* and *.fg.* images. So we should regenerate them.
|
124
124
|
if bgpath.nil? or ( force and not @s_type.eql? 'c' )
|
@@ -139,7 +139,7 @@ class PDFBeads::PageDataProvider < Array
|
|
139
139
|
@fg_layer = fgpath unless fgpath.nil?
|
140
140
|
end
|
141
141
|
|
142
|
-
if $
|
142
|
+
if $has_nokogiri and not @pageargs[:pages_per_dict].nil?
|
143
143
|
@hocr_path = Dir.entries('.').detect do |f|
|
144
144
|
/\A#{@basename}.(HOCR|HTML?)/i.match(f)
|
145
145
|
end
|
@@ -353,7 +353,7 @@ class PDFBeads::PageDataProvider < Array
|
|
353
353
|
# to achieve the desired color diffusion. The idea is inspired by
|
354
354
|
# Anthony Thyssen's http://www.imagemagick.org/Usage/scripts/hole_fill_shepards
|
355
355
|
# script, which is intended just for this purpose (i. e. removing undesired
|
356
|
-
# areas from the image). However our approach is a bit
|
356
|
+
# areas from the image). However our approach is a bit more crude (but still
|
357
357
|
# effective).
|
358
358
|
fg.resize!( width=imw/100,height=imh/100,filter=GaussianFilter )
|
359
359
|
fg.resize!( width=imw,height=imh,filter=GaussianFilter )
|
@@ -471,7 +471,7 @@ class PDFBeads::PageDataProvider < Array
|
|
471
471
|
end
|
472
472
|
|
473
473
|
if pidx == per_dict or i == length - 1
|
474
|
-
# The jbig2 encoder processes a bunch of files at once, producing
|
474
|
+
# The jbig2 encoder processes a bunch of files at once, producing
|
475
475
|
# pages which depend from a shared dictionary. Thus we can skip this
|
476
476
|
# stage only if both the dictionary and each of the individual pages
|
477
477
|
# are already found on the disk
|
data/lib/pdfbeads/pdftoc.rb
CHANGED
@@ -8,7 +8,7 @@
|
|
8
8
|
# Unlike other PDF creation tools, this utility attempts to implement
|
9
9
|
# the approach typically used for DjVu books. Its key feature is
|
10
10
|
# separating scanned text (typically black, but indexed images with
|
11
|
-
# a small number of colors are also accepted) from halftone images
|
11
|
+
# a small number of colors are also accepted) from halftone images
|
12
12
|
# placed into a background layer.
|
13
13
|
#
|
14
14
|
# Copyright (C) 2010 Alexey Kryukov (amkryukov@gmail.com).
|
@@ -38,7 +38,7 @@
|
|
38
38
|
# <indent>"Title" "Page Number" [0|-|1|+]
|
39
39
|
#
|
40
40
|
# The indent is used to determine the level of this outline item: it may
|
41
|
-
# consist either of spaces or of tabs, but it is not allowed to
|
41
|
+
# consist either of spaces or of tabs, but it is not allowed to
|
42
42
|
# mix both characters in the same file. The title and page number are
|
43
43
|
# separated with an arbitrary number of whitespace characters and are
|
44
44
|
# normally enclosed into double quotes. The third, optional argument
|
@@ -101,7 +101,11 @@ class PDFBeads::PDFBuilder::PDFTOC < Array
|
|
101
101
|
title = parts[0].gsub(/\A"/m,"").gsub(/"\Z/m, "")
|
102
102
|
ref = parts[1].gsub(/\A"/m,"").gsub(/"\Z/m, "")
|
103
103
|
begin
|
104
|
-
title
|
104
|
+
if title.respond_to? :encode
|
105
|
+
title.encode!( "utf-16be", "utf-8" )
|
106
|
+
else
|
107
|
+
title = Iconv.iconv( "utf-16be", "utf-8", title ).first
|
108
|
+
end
|
105
109
|
rescue
|
106
110
|
$stderr.puts("Error: TOC should be specified in utf-8")
|
107
111
|
return
|
metadata
CHANGED
@@ -1,35 +1,75 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdfbeads
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
prerelease:
|
6
|
-
segments:
|
7
|
-
- 1
|
8
|
-
- 0
|
9
|
-
- 9
|
10
|
-
version: 1.0.9
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.1.1
|
11
5
|
platform: ruby
|
12
|
-
authors:
|
6
|
+
authors:
|
13
7
|
- Alexey Kryukov
|
14
8
|
autorequire:
|
15
9
|
bindir: bin
|
16
10
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
11
|
+
date: 2014-01-30 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rmagick
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - '>='
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: 2.13.0
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - '>='
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: 2.13.0
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: nokogiri
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - '>='
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 1.5.10
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - '>='
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 1.5.10
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: pdf-reader
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - '>='
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 1.0.0
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 1.0.0
|
55
|
+
description: |2
|
56
|
+
PDFBeads is a small utility written in Ruby which takes scanned
|
57
|
+
page images and converts them into a single PDF file. Unlike other
|
58
|
+
PDF creation tools, PDFBeads attempts to implement the approach
|
59
|
+
typically used for DjVu books. Its key feature is separating scanned
|
60
|
+
text (typically black, but indexed images with a small number of
|
61
|
+
colors are also accepted) from halftone pictures. Each type of
|
62
|
+
graphical data is encoded into its own layer with a specific
|
63
|
+
compression method and resolution.
|
23
64
|
email: amkryukov@gmail.com
|
24
|
-
executables:
|
65
|
+
executables:
|
25
66
|
- pdfbeads
|
26
67
|
extensions: []
|
27
|
-
|
28
|
-
extra_rdoc_files:
|
68
|
+
extra_rdoc_files:
|
29
69
|
- README
|
30
70
|
- COPYING
|
31
71
|
- ChangeLog
|
32
|
-
files:
|
72
|
+
files:
|
33
73
|
- lib/pdfbeads/pdfbuilder.rb
|
34
74
|
- lib/pdfbeads/pdfpage.rb
|
35
75
|
- lib/pdfbeads/pdftoc.rb
|
@@ -40,43 +80,35 @@ files:
|
|
40
80
|
- lib/imageinspector.rb
|
41
81
|
- bin/pdfbeads
|
42
82
|
- doc/pdfbeads.ru.html
|
83
|
+
- doc/pdfbeads.en.html
|
43
84
|
- README
|
44
85
|
- COPYING
|
45
86
|
- ChangeLog
|
46
|
-
has_rdoc: true
|
47
87
|
homepage: http://pdfbeads.rubyforge.org
|
48
|
-
licenses:
|
49
|
-
|
88
|
+
licenses:
|
89
|
+
- GPL
|
90
|
+
metadata: {}
|
50
91
|
post_install_message:
|
51
92
|
rdoc_options: []
|
52
|
-
|
53
|
-
require_paths:
|
93
|
+
require_paths:
|
54
94
|
- lib
|
55
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
requirements:
|
67
|
-
- - ">="
|
68
|
-
- !ruby/object:Gem::Version
|
69
|
-
hash: 3
|
70
|
-
segments:
|
71
|
-
- 0
|
72
|
-
version: "0"
|
73
|
-
requirements:
|
95
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
96
|
+
requirements:
|
97
|
+
- - '>='
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: '0'
|
100
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
101
|
+
requirements:
|
102
|
+
- - '>='
|
103
|
+
- !ruby/object:Gem::Version
|
104
|
+
version: '0'
|
105
|
+
requirements:
|
74
106
|
- RMagick, v2.13.0 or greater
|
75
|
-
-
|
107
|
+
- nokogiri, v1.5.10 or greater
|
108
|
+
- PDF::Reader, v1.0.0 or greater
|
76
109
|
rubyforge_project: PDFBeads
|
77
|
-
rubygems_version:
|
110
|
+
rubygems_version: 2.0.3
|
78
111
|
signing_key:
|
79
|
-
specification_version:
|
112
|
+
specification_version: 4
|
80
113
|
summary: PDFBeads -- convert scanned images to a single PDF file.
|
81
114
|
test_files: []
|
82
|
-
|