hexapdf 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +33 -1
- data/CONTRIBUTERS +1 -1
- data/LICENSE +1 -1
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/lib/hexapdf.rb +1 -1
- data/lib/hexapdf/cli.rb +19 -52
- data/lib/hexapdf/cli/command.rb +251 -0
- data/lib/hexapdf/cli/{extract.rb → files.rb} +19 -23
- data/lib/hexapdf/cli/images.rb +147 -0
- data/lib/hexapdf/cli/info.rb +5 -5
- data/lib/hexapdf/cli/inspect.rb +13 -12
- data/lib/hexapdf/cli/merge.rb +200 -0
- data/lib/hexapdf/cli/modify.rb +39 -242
- data/lib/hexapdf/cli/optimize.rb +104 -0
- data/lib/hexapdf/configuration.rb +1 -1
- data/lib/hexapdf/content.rb +1 -1
- data/lib/hexapdf/content/canvas.rb +1 -1
- data/lib/hexapdf/content/color_space.rb +1 -1
- data/lib/hexapdf/content/graphic_object.rb +1 -1
- data/lib/hexapdf/content/graphic_object/arc.rb +1 -1
- data/lib/hexapdf/content/graphic_object/endpoint_arc.rb +1 -1
- data/lib/hexapdf/content/graphic_object/solid_arc.rb +1 -1
- data/lib/hexapdf/content/graphics_state.rb +1 -1
- data/lib/hexapdf/content/operator.rb +1 -1
- data/lib/hexapdf/content/parser.rb +16 -15
- data/lib/hexapdf/content/processor.rb +1 -1
- data/lib/hexapdf/content/transformation_matrix.rb +1 -1
- data/lib/hexapdf/data_dir.rb +1 -1
- data/lib/hexapdf/dictionary.rb +1 -1
- data/lib/hexapdf/dictionary_fields.rb +1 -1
- data/lib/hexapdf/document.rb +1 -1
- data/lib/hexapdf/document/files.rb +1 -1
- data/lib/hexapdf/document/fonts.rb +1 -1
- data/lib/hexapdf/document/images.rb +1 -1
- data/lib/hexapdf/document/pages.rb +1 -1
- data/lib/hexapdf/encryption.rb +1 -1
- data/lib/hexapdf/encryption/aes.rb +1 -1
- data/lib/hexapdf/encryption/arc4.rb +1 -1
- data/lib/hexapdf/encryption/fast_aes.rb +1 -1
- data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
- data/lib/hexapdf/encryption/identity.rb +1 -1
- data/lib/hexapdf/encryption/ruby_aes.rb +1 -1
- data/lib/hexapdf/encryption/ruby_arc4.rb +1 -1
- data/lib/hexapdf/encryption/security_handler.rb +1 -1
- data/lib/hexapdf/encryption/standard_security_handler.rb +1 -1
- data/lib/hexapdf/error.rb +1 -1
- data/lib/hexapdf/filter.rb +1 -1
- data/lib/hexapdf/filter/ascii85_decode.rb +1 -1
- data/lib/hexapdf/filter/ascii_hex_decode.rb +1 -1
- data/lib/hexapdf/filter/dct_decode.rb +1 -1
- data/lib/hexapdf/filter/encryption.rb +1 -1
- data/lib/hexapdf/filter/flate_decode.rb +1 -1
- data/lib/hexapdf/filter/jpx_decode.rb +1 -1
- data/lib/hexapdf/filter/lzw_decode.rb +2 -3
- data/lib/hexapdf/filter/predictor.rb +11 -11
- data/lib/hexapdf/filter/run_length_decode.rb +1 -1
- data/lib/hexapdf/font/cmap.rb +1 -1
- data/lib/hexapdf/font/cmap/parser.rb +1 -1
- data/lib/hexapdf/font/cmap/writer.rb +1 -1
- data/lib/hexapdf/font/encoding.rb +1 -1
- data/lib/hexapdf/font/encoding/base.rb +1 -1
- data/lib/hexapdf/font/encoding/difference_encoding.rb +1 -1
- data/lib/hexapdf/font/encoding/glyph_list.rb +1 -1
- data/lib/hexapdf/font/encoding/mac_expert_encoding.rb +1 -1
- data/lib/hexapdf/font/encoding/mac_roman_encoding.rb +1 -1
- data/lib/hexapdf/font/encoding/standard_encoding.rb +1 -1
- data/lib/hexapdf/font/encoding/symbol_encoding.rb +1 -1
- data/lib/hexapdf/font/encoding/win_ansi_encoding.rb +1 -1
- data/lib/hexapdf/font/encoding/zapf_dingbats_encoding.rb +1 -1
- data/lib/hexapdf/font/true_type.rb +2 -1
- data/lib/hexapdf/font/true_type/font.rb +1 -1
- data/lib/hexapdf/font/true_type/subsetter.rb +186 -0
- data/lib/hexapdf/font/true_type/table.rb +8 -4
- data/lib/hexapdf/font/true_type/table/cmap.rb +1 -1
- data/lib/hexapdf/font/true_type/table/cmap_subtable.rb +1 -1
- data/lib/hexapdf/font/true_type/table/directory.rb +1 -1
- data/lib/hexapdf/font/true_type/table/glyf.rb +6 -2
- data/lib/hexapdf/font/true_type/table/head.rb +2 -2
- data/lib/hexapdf/font/true_type/table/hhea.rb +1 -1
- data/lib/hexapdf/font/true_type/table/hmtx.rb +1 -1
- data/lib/hexapdf/font/true_type/table/loca.rb +1 -1
- data/lib/hexapdf/font/true_type/table/maxp.rb +1 -1
- data/lib/hexapdf/font/true_type/table/name.rb +1 -1
- data/lib/hexapdf/font/true_type/table/os2.rb +1 -1
- data/lib/hexapdf/font/true_type/table/post.rb +1 -1
- data/lib/hexapdf/font/true_type_wrapper.rb +56 -8
- data/lib/hexapdf/font/type1.rb +1 -1
- data/lib/hexapdf/font/type1/afm_parser.rb +1 -1
- data/lib/hexapdf/font/type1/character_metrics.rb +1 -1
- data/lib/hexapdf/font/type1/font.rb +1 -1
- data/lib/hexapdf/font/type1/font_metrics.rb +1 -1
- data/lib/hexapdf/font/type1/pfb_parser.rb +1 -1
- data/lib/hexapdf/font/type1_wrapper.rb +1 -1
- data/lib/hexapdf/font_loader.rb +1 -1
- data/lib/hexapdf/font_loader/from_configuration.rb +6 -3
- data/lib/hexapdf/font_loader/standard14.rb +1 -1
- data/lib/hexapdf/image_loader.rb +1 -1
- data/lib/hexapdf/image_loader/jpeg.rb +1 -1
- data/lib/hexapdf/image_loader/pdf.rb +1 -1
- data/lib/hexapdf/image_loader/png.rb +1 -1
- data/lib/hexapdf/importer.rb +1 -1
- data/lib/hexapdf/name_tree_node.rb +1 -1
- data/lib/hexapdf/number_tree_node.rb +1 -1
- data/lib/hexapdf/object.rb +1 -1
- data/lib/hexapdf/parser.rb +1 -1
- data/lib/hexapdf/rectangle.rb +1 -1
- data/lib/hexapdf/reference.rb +1 -1
- data/lib/hexapdf/revision.rb +1 -1
- data/lib/hexapdf/revisions.rb +13 -15
- data/lib/hexapdf/serializer.rb +7 -3
- data/lib/hexapdf/stream.rb +1 -1
- data/lib/hexapdf/task.rb +1 -1
- data/lib/hexapdf/task/dereference.rb +1 -1
- data/lib/hexapdf/task/optimize.rb +1 -1
- data/lib/hexapdf/tokenizer.rb +12 -12
- data/lib/hexapdf/type.rb +1 -1
- data/lib/hexapdf/type/catalog.rb +1 -1
- data/lib/hexapdf/type/embedded_file.rb +1 -1
- data/lib/hexapdf/type/file_specification.rb +1 -1
- data/lib/hexapdf/type/font.rb +1 -1
- data/lib/hexapdf/type/font_descriptor.rb +1 -1
- data/lib/hexapdf/type/font_simple.rb +1 -1
- data/lib/hexapdf/type/font_true_type.rb +1 -1
- data/lib/hexapdf/type/font_type1.rb +1 -1
- data/lib/hexapdf/type/form.rb +1 -1
- data/lib/hexapdf/type/graphics_state_parameter.rb +1 -1
- data/lib/hexapdf/type/image.rb +187 -1
- data/lib/hexapdf/type/info.rb +1 -1
- data/lib/hexapdf/type/names.rb +1 -1
- data/lib/hexapdf/type/object_stream.rb +1 -1
- data/lib/hexapdf/type/page.rb +1 -1
- data/lib/hexapdf/type/page_tree_node.rb +6 -1
- data/lib/hexapdf/type/resources.rb +1 -1
- data/lib/hexapdf/type/trailer.rb +2 -2
- data/lib/hexapdf/type/viewer_preferences.rb +1 -1
- data/lib/hexapdf/type/xref_stream.rb +22 -18
- data/lib/hexapdf/utils/bit_field.rb +1 -1
- data/lib/hexapdf/utils/bit_stream.rb +16 -32
- data/lib/hexapdf/utils/lru_cache.rb +1 -1
- data/lib/hexapdf/utils/math_helpers.rb +1 -1
- data/lib/hexapdf/utils/object_hash.rb +1 -1
- data/lib/hexapdf/utils/pdf_doc_encoding.rb +1 -1
- data/lib/hexapdf/utils/sorted_tree_node.rb +1 -1
- data/lib/hexapdf/version.rb +2 -2
- data/lib/hexapdf/writer.rb +2 -1
- data/lib/hexapdf/xref_section.rb +6 -1
- data/man/man1/hexapdf.1 +194 -115
- data/test/data/images/greyscale-1bit.png +0 -0
- data/test/data/images/greyscale-2bit.png +0 -0
- data/test/data/images/greyscale-8bit.png +0 -0
- data/test/data/images/indexed-alpha-4bit.png +0 -0
- data/test/data/images/truecolour-8bit.png +0 -0
- data/test/hexapdf/content/test_operator.rb +8 -8
- data/test/hexapdf/content/test_processor.rb +1 -1
- data/test/hexapdf/encryption/test_security_handler.rb +1 -1
- data/test/hexapdf/font/test_true_type_wrapper.rb +89 -48
- data/test/hexapdf/font/true_type/table/test_glyf.rb +1 -0
- data/test/hexapdf/font/true_type/test_subsetter.rb +70 -0
- data/test/hexapdf/font/true_type/test_table.rb +16 -0
- data/test/hexapdf/font_loader/test_from_configuration.rb +7 -0
- data/test/hexapdf/test_document.rb +1 -1
- data/test/hexapdf/test_object.rb +1 -1
- data/test/hexapdf/test_revisions.rb +34 -8
- data/test/hexapdf/test_serializer.rb +3 -0
- data/test/hexapdf/test_writer.rb +11 -2
- data/test/hexapdf/test_xref_section.rb +15 -0
- data/test/hexapdf/type/test_image.rb +234 -0
- data/test/hexapdf/type/test_object_stream.rb +2 -2
- data/test/hexapdf/type/test_trailer.rb +4 -0
- data/test/hexapdf/utils/test_bit_stream.rb +69 -0
- metadata +14 -6
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# This file is part of HexaPDF.
|
|
5
5
|
#
|
|
6
6
|
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
|
|
7
|
-
# Copyright (C)
|
|
7
|
+
# Copyright (C) 2014-2017 Thomas Leitner
|
|
8
8
|
#
|
|
9
9
|
# HexaPDF is free software: you can redistribute it and/or modify it
|
|
10
10
|
# under the terms of the GNU Affero General Public License version 3 as
|
|
@@ -31,28 +31,28 @@
|
|
|
31
31
|
# is created or manipulated using HexaPDF.
|
|
32
32
|
#++
|
|
33
33
|
|
|
34
|
-
require 'hexapdf/cli'
|
|
34
|
+
require 'hexapdf/cli/command'
|
|
35
35
|
|
|
36
36
|
module HexaPDF
|
|
37
37
|
module CLI
|
|
38
38
|
|
|
39
|
-
#
|
|
39
|
+
# Lists or extracts embedded files from a PDF file.
|
|
40
40
|
#
|
|
41
41
|
# See: HexaPDF::Type::EmbeddedFile
|
|
42
|
-
class
|
|
42
|
+
class Files < Command
|
|
43
43
|
|
|
44
44
|
def initialize #:nodoc:
|
|
45
|
-
super('
|
|
46
|
-
short_desc("
|
|
45
|
+
super('files', takes_commands: false)
|
|
46
|
+
short_desc("List or extract embedded files from a PDF file")
|
|
47
47
|
long_desc(<<-EOF.gsub!(/^ */, ''))
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
be used to extract one or more files.
|
|
48
|
+
If the option --extract is not given, the available files are listed with their names and
|
|
49
|
+
indices. The --extract option can then be used to extract one or more files.
|
|
51
50
|
EOF
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
"
|
|
55
|
-
|
|
51
|
+
|
|
52
|
+
options.on("--extract [a,b,c,...]", "-e [a,b,c,...]", Array,
|
|
53
|
+
"The indices of the files that should be extracted. Use 0 or no argument to " \
|
|
54
|
+
"extract all files.") do |indices|
|
|
55
|
+
@indices = (indices ? indices.map(&:to_i) : [0])
|
|
56
56
|
end
|
|
57
57
|
options.on("--[no-]search", "-s", "Search the whole PDF instead of the " \
|
|
58
58
|
"standard locations (default: false)") do |search|
|
|
@@ -60,24 +60,22 @@ module HexaPDF
|
|
|
60
60
|
end
|
|
61
61
|
options.on("--password PASSWORD", "-p", String,
|
|
62
62
|
"The password for decryption. Use - for reading from standard input.") do |pwd|
|
|
63
|
-
@password = (pwd == '-' ?
|
|
63
|
+
@password = (pwd == '-' ? read_password : pwd)
|
|
64
64
|
end
|
|
65
|
+
|
|
65
66
|
@indices = []
|
|
66
|
-
@password =
|
|
67
|
+
@password = nil
|
|
67
68
|
@search = false
|
|
68
69
|
end
|
|
69
70
|
|
|
70
|
-
def execute(
|
|
71
|
-
HexaPDF::Document.open(
|
|
71
|
+
def execute(pdf) #:nodoc:
|
|
72
|
+
HexaPDF::Document.open(pdf, decryption_opts: {password: @password}) do |doc|
|
|
72
73
|
if @indices.empty?
|
|
73
74
|
list_files(doc)
|
|
74
75
|
else
|
|
75
76
|
extract_files(doc)
|
|
76
77
|
end
|
|
77
78
|
end
|
|
78
|
-
rescue HexaPDF::Error => e
|
|
79
|
-
$stderr.puts "Error while processing the PDF file: #{e.message}"
|
|
80
|
-
exit(1)
|
|
81
79
|
end
|
|
82
80
|
|
|
83
81
|
private
|
|
@@ -104,9 +102,7 @@ module HexaPDF
|
|
|
104
102
|
def extract_files(doc)
|
|
105
103
|
each_file(doc) do |obj, index|
|
|
106
104
|
next unless @indices.include?(index + 1) || @indices.include?(0)
|
|
107
|
-
|
|
108
|
-
raise HexaPDF::Error, "Output file #{obj.path} already exists, not overwriting"
|
|
109
|
-
end
|
|
105
|
+
maybe_raise_on_existing_file(obj.path)
|
|
110
106
|
puts "Extracting #{obj.path}..."
|
|
111
107
|
File.open(obj.path, 'wb') do |file|
|
|
112
108
|
fiber = obj.embedded_file_stream.stream_decoder
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
|
2
|
+
#
|
|
3
|
+
#--
|
|
4
|
+
# This file is part of HexaPDF.
|
|
5
|
+
#
|
|
6
|
+
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
|
|
7
|
+
# Copyright (C) 2014-2017 Thomas Leitner
|
|
8
|
+
#
|
|
9
|
+
# HexaPDF is free software: you can redistribute it and/or modify it
|
|
10
|
+
# under the terms of the GNU Affero General Public License version 3 as
|
|
11
|
+
# published by the Free Software Foundation with the addition of the
|
|
12
|
+
# following permission added to Section 15 as permitted in Section 7(a):
|
|
13
|
+
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
|
|
14
|
+
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
|
|
15
|
+
# INFRINGEMENT OF THIRD PARTY RIGHTS.
|
|
16
|
+
#
|
|
17
|
+
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
|
|
18
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
|
|
20
|
+
# License for more details.
|
|
21
|
+
#
|
|
22
|
+
# You should have received a copy of the GNU Affero General Public License
|
|
23
|
+
# along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
|
|
24
|
+
#
|
|
25
|
+
# The interactive user interfaces in modified source and object code
|
|
26
|
+
# versions of HexaPDF must display Appropriate Legal Notices, as required
|
|
27
|
+
# under Section 5 of the GNU Affero General Public License version 3.
|
|
28
|
+
#
|
|
29
|
+
# In accordance with Section 7(b) of the GNU Affero General Public
|
|
30
|
+
# License, a covered work must retain the producer line in every PDF that
|
|
31
|
+
# is created or manipulated using HexaPDF.
|
|
32
|
+
#++
|
|
33
|
+
|
|
34
|
+
require 'hexapdf/cli/command'
|
|
35
|
+
|
|
36
|
+
module HexaPDF
|
|
37
|
+
module CLI
|
|
38
|
+
|
|
39
|
+
# Lists or extracts images from a PDF file.
|
|
40
|
+
#
|
|
41
|
+
# See: HexaPDF::Type::Image
|
|
42
|
+
class Images < Command
|
|
43
|
+
|
|
44
|
+
def initialize #:nodoc:
|
|
45
|
+
super('images', takes_commands: false)
|
|
46
|
+
short_desc("List or extract images from a PDF file")
|
|
47
|
+
long_desc(<<-EOF.gsub!(/^ */, ''))
|
|
48
|
+
If the option --extract is not given, the available images are listed with their index and
|
|
49
|
+
additional information, sorted by page number. The --extract option can then be used to
|
|
50
|
+
extract one or more images, saving them to files called `prefix-n.ext` where the prefix
|
|
51
|
+
can be set via --prefix, n is the index and ext is either png, jpg or jpx.
|
|
52
|
+
EOF
|
|
53
|
+
|
|
54
|
+
options.on("--extract [A,B,C,...]", "-e [A,B,C,...]", Array,
|
|
55
|
+
"The indices of the images that should be extracted. Use 0 or no argument to " \
|
|
56
|
+
"extract all images.") do |indices|
|
|
57
|
+
@indices = (indices ? indices.map(&:to_i) : [0])
|
|
58
|
+
end
|
|
59
|
+
options.on("--prefix PREFIX", String,
|
|
60
|
+
"The prefix to use when saving images. May include directories. Default: " \
|
|
61
|
+
"image.") do |prefix|
|
|
62
|
+
@prefix = prefix
|
|
63
|
+
end
|
|
64
|
+
options.on("--[no-]search", "-s", "Search the whole PDF instead of the " \
|
|
65
|
+
"standard locations (default: false)") do |search|
|
|
66
|
+
@search = search
|
|
67
|
+
end
|
|
68
|
+
options.on("--password PASSWORD", "-p", String,
|
|
69
|
+
"The password for decryption. Use - for reading from standard input.") do |pwd|
|
|
70
|
+
@password = (pwd == '-' ? read_password : pwd)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
@indices = []
|
|
74
|
+
@prefix = 'image'
|
|
75
|
+
@password = nil
|
|
76
|
+
@search = false
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def execute(pdf) #:nodoc:
|
|
80
|
+
HexaPDF::Document.open(pdf, decryption_opts: {password: @password}) do |doc|
|
|
81
|
+
if @indices.empty?
|
|
82
|
+
list_images(doc)
|
|
83
|
+
else
|
|
84
|
+
extract_images(doc)
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
private
|
|
90
|
+
|
|
91
|
+
# Outputs a table with the images of the PDF document.
|
|
92
|
+
def list_images(doc)
|
|
93
|
+
printf("%5s %5s %9s %6s %6s %5s %4s %3s %5s %8s\n",
|
|
94
|
+
"index", "page", "oid", "width", "height", "color", "comp", "bpc", "type",
|
|
95
|
+
"writable")
|
|
96
|
+
puts("-" * 65)
|
|
97
|
+
each_image(doc) do |image, index, pindex|
|
|
98
|
+
info = image.info
|
|
99
|
+
printf("%5i %5s %9s %6i %6i %5s %4i %3i %5s %8s\n",
|
|
100
|
+
index, pindex || '-', "#{image.oid},#{image.gen}", info.width, info.height,
|
|
101
|
+
info.color_space, info.components, info.bits_per_component, info.type,
|
|
102
|
+
info.writable)
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
# Extracts the images with the given indices.
|
|
108
|
+
def extract_images(doc)
|
|
109
|
+
each_image(doc) do |image, index, _|
|
|
110
|
+
next unless @indices.include?(index) || @indices.include?(0)
|
|
111
|
+
path = "#{@prefix}-#{index}.#{image.info.extension}"
|
|
112
|
+
maybe_raise_on_existing_file(path)
|
|
113
|
+
puts "Extracting #{path}..."
|
|
114
|
+
image.write(path)
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Iterates over all images.
|
|
119
|
+
def each_image(doc) # :yields: obj, index, page_index
|
|
120
|
+
index = 1
|
|
121
|
+
seen = {}
|
|
122
|
+
|
|
123
|
+
doc.pages.each_with_index do |page, pindex|
|
|
124
|
+
page.resources[:XObject]&.each do |_name, xobject|
|
|
125
|
+
if seen[xobject]
|
|
126
|
+
yield(xobject, seen[xobject], pindex + 1)
|
|
127
|
+
elsif xobject[:Subtype] == :Image && !xobject[:ImageMask]
|
|
128
|
+
yield(xobject, index, pindex + 1)
|
|
129
|
+
seen[xobject] = index
|
|
130
|
+
index += 1
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
if @search
|
|
136
|
+
doc.images.each do |image|
|
|
137
|
+
next if seen[image]
|
|
138
|
+
yield(image, index, nil)
|
|
139
|
+
index += 1
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
end
|
|
147
|
+
end
|
data/lib/hexapdf/cli/info.rb
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# This file is part of HexaPDF.
|
|
5
5
|
#
|
|
6
6
|
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
|
|
7
|
-
# Copyright (C)
|
|
7
|
+
# Copyright (C) 2014-2017 Thomas Leitner
|
|
8
8
|
#
|
|
9
9
|
# HexaPDF is free software: you can redistribute it and/or modify it
|
|
10
10
|
# under the terms of the GNU Affero General Public License version 3 as
|
|
@@ -31,7 +31,7 @@
|
|
|
31
31
|
# is created or manipulated using HexaPDF.
|
|
32
32
|
#++
|
|
33
33
|
|
|
34
|
-
require 'hexapdf/cli'
|
|
34
|
+
require 'hexapdf/cli/command'
|
|
35
35
|
|
|
36
36
|
module HexaPDF
|
|
37
37
|
module CLI
|
|
@@ -44,7 +44,7 @@ module HexaPDF
|
|
|
44
44
|
# * The used PDF version
|
|
45
45
|
#
|
|
46
46
|
# See: HexaPDF::Type::Info, HexaPDF::Encryption::SecurityHandler
|
|
47
|
-
class Info <
|
|
47
|
+
class Info < Command
|
|
48
48
|
|
|
49
49
|
def initialize #:nodoc:
|
|
50
50
|
super('info', takes_commands: false)
|
|
@@ -55,9 +55,9 @@ module HexaPDF
|
|
|
55
55
|
EOF
|
|
56
56
|
options.on("--password PASSWORD", "-p", String,
|
|
57
57
|
"The password for decryption. Use - for reading from standard input.") do |pwd|
|
|
58
|
-
@password = (pwd == '-' ?
|
|
58
|
+
@password = (pwd == '-' ? read_password : pwd)
|
|
59
59
|
end
|
|
60
|
-
@password =
|
|
60
|
+
@password = nil
|
|
61
61
|
@auto_decrypt = true
|
|
62
62
|
end
|
|
63
63
|
|
data/lib/hexapdf/cli/inspect.rb
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# This file is part of HexaPDF.
|
|
5
5
|
#
|
|
6
6
|
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
|
|
7
|
-
# Copyright (C)
|
|
7
|
+
# Copyright (C) 2014-2017 Thomas Leitner
|
|
8
8
|
#
|
|
9
9
|
# HexaPDF is free software: you can redistribute it and/or modify it
|
|
10
10
|
# under the terms of the GNU Affero General Public License version 3 as
|
|
@@ -31,13 +31,13 @@
|
|
|
31
31
|
# is created or manipulated using HexaPDF.
|
|
32
32
|
#++
|
|
33
33
|
|
|
34
|
-
require 'hexapdf/cli'
|
|
34
|
+
require 'hexapdf/cli/command'
|
|
35
35
|
|
|
36
36
|
module HexaPDF
|
|
37
37
|
module CLI
|
|
38
38
|
|
|
39
39
|
# Shows the internal structure of a PDF file.
|
|
40
|
-
class Inspect <
|
|
40
|
+
class Inspect < Command
|
|
41
41
|
|
|
42
42
|
def initialize #:nodoc:
|
|
43
43
|
super('inspect', takes_commands: false)
|
|
@@ -47,13 +47,13 @@ module HexaPDF
|
|
|
47
47
|
needs to inspect the internal object structure or a stream of a PDF file. A PDF object is
|
|
48
48
|
always shown in the PDF syntax.
|
|
49
49
|
|
|
50
|
-
If no option is given, the
|
|
51
|
-
|
|
52
|
-
|
|
50
|
+
If no option is given, the PDF trailer is shown. Otherwise the various, mutually exclusive
|
|
51
|
+
display options define the shown content. If multiple such options are specified only the
|
|
52
|
+
last is respected.
|
|
53
53
|
EOF
|
|
54
54
|
|
|
55
|
-
options.on("
|
|
56
|
-
@exec = :
|
|
55
|
+
options.on("--catalog", "Show the PDF catalog dictionary.") do
|
|
56
|
+
@exec = :catalog
|
|
57
57
|
end
|
|
58
58
|
options.on("-c", "--page-count", "Print the number of pages.") do
|
|
59
59
|
@exec = :page_count
|
|
@@ -84,11 +84,11 @@ module HexaPDF
|
|
|
84
84
|
options.separator("")
|
|
85
85
|
options.on("--password PASSWORD", "-p", String,
|
|
86
86
|
"The password for decryption. Use - for reading from standard input.") do |pwd|
|
|
87
|
-
@password = (pwd == '-' ?
|
|
87
|
+
@password = (pwd == '-' ? read_password : pwd)
|
|
88
88
|
end
|
|
89
89
|
|
|
90
90
|
@password = nil
|
|
91
|
-
@exec = :
|
|
91
|
+
@exec = :trailer
|
|
92
92
|
@param = nil
|
|
93
93
|
@raw = nil
|
|
94
94
|
end
|
|
@@ -117,9 +117,10 @@ module HexaPDF
|
|
|
117
117
|
end
|
|
118
118
|
|
|
119
119
|
def do_pages(doc) #:nodoc:
|
|
120
|
-
pages =
|
|
120
|
+
pages = parse_pages_specification(@param, doc.pages.count)
|
|
121
|
+
page_list = doc.pages.to_a
|
|
121
122
|
pages.each do |index, _|
|
|
122
|
-
page =
|
|
123
|
+
page = page_list[index]
|
|
123
124
|
str = "page #{index + 1} (#{page.oid},#{page.gen}): "
|
|
124
125
|
str << Array(page[:Contents]).map {|c| "#{c.oid},#{c.gen}"}.join(" ")
|
|
125
126
|
puts str
|
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
|
2
|
+
#
|
|
3
|
+
#--
|
|
4
|
+
# This file is part of HexaPDF.
|
|
5
|
+
#
|
|
6
|
+
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
|
|
7
|
+
# Copyright (C) 2014-2017 Thomas Leitner
|
|
8
|
+
#
|
|
9
|
+
# HexaPDF is free software: you can redistribute it and/or modify it
|
|
10
|
+
# under the terms of the GNU Affero General Public License version 3 as
|
|
11
|
+
# published by the Free Software Foundation with the addition of the
|
|
12
|
+
# following permission added to Section 15 as permitted in Section 7(a):
|
|
13
|
+
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
|
|
14
|
+
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
|
|
15
|
+
# INFRINGEMENT OF THIRD PARTY RIGHTS.
|
|
16
|
+
#
|
|
17
|
+
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
|
|
18
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
|
|
20
|
+
# License for more details.
|
|
21
|
+
#
|
|
22
|
+
# You should have received a copy of the GNU Affero General Public License
|
|
23
|
+
# along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
|
|
24
|
+
#
|
|
25
|
+
# The interactive user interfaces in modified source and object code
|
|
26
|
+
# versions of HexaPDF must display Appropriate Legal Notices, as required
|
|
27
|
+
# under Section 5 of the GNU Affero General Public License version 3.
|
|
28
|
+
#
|
|
29
|
+
# In accordance with Section 7(b) of the GNU Affero General Public
|
|
30
|
+
# License, a covered work must retain the producer line in every PDF that
|
|
31
|
+
# is created or manipulated using HexaPDF.
|
|
32
|
+
#++
|
|
33
|
+
|
|
34
|
+
require 'hexapdf/cli/command'
|
|
35
|
+
|
|
36
|
+
module HexaPDF
|
|
37
|
+
module CLI
|
|
38
|
+
|
|
39
|
+
# Merges pages from multiple PDF files.
|
|
40
|
+
class Merge < Command
|
|
41
|
+
|
|
42
|
+
InputSpec = Struct.new(:file, :pages, :password) #:nodoc:
|
|
43
|
+
|
|
44
|
+
def initialize #:nodoc:
|
|
45
|
+
super('merge', takes_commands: false)
|
|
46
|
+
short_desc("Merge multiple PDF files")
|
|
47
|
+
long_desc(<<-EOF.gsub!(/^ */, ''))
|
|
48
|
+
This command merges pages from multiple PDFs into one output file which can optionally be
|
|
49
|
+
encrypted/decrypted and optimized in various ways.
|
|
50
|
+
|
|
51
|
+
The first input file is the primary input file from which meta data like file information,
|
|
52
|
+
outlines, etc. are taken from. Alternatively, it is possible to start with an empty PDF
|
|
53
|
+
file by using --empty. The order of the files is important as they are used in that order.
|
|
54
|
+
|
|
55
|
+
Also note that the --password and --pages options apply to the last preceeding input file.
|
|
56
|
+
EOF
|
|
57
|
+
|
|
58
|
+
options.on(/.*/, "Input file, can be specified multiple times") do |file|
|
|
59
|
+
@files << InputSpec.new(file, '1-e')
|
|
60
|
+
throw :prune
|
|
61
|
+
end
|
|
62
|
+
options.on("-p", "--password PASSWORD", String, "The password for decrypting the last " \
|
|
63
|
+
"specified input file (use - for reading from standard input)") do |pwd|
|
|
64
|
+
raise OptionParser::InvalidArgument, "(No prior input file specified)" if @files.empty?
|
|
65
|
+
pwd = (pwd == '-' ? read_password("#{@files.last.file} password") : pwd)
|
|
66
|
+
@files.last.password = pwd
|
|
67
|
+
end
|
|
68
|
+
options.on("-i", "--pages PAGES", "The pages of the last specified input file that " \
|
|
69
|
+
"should be used (default: 1-e)") do |pages|
|
|
70
|
+
raise OptionParser::InvalidArgument, "(No prior input file specified)" if @files.empty?
|
|
71
|
+
@files.last.pages = pages
|
|
72
|
+
end
|
|
73
|
+
options.on("-e", "--empty", "Use an empty file as the first input file") do
|
|
74
|
+
@initial_empty = true
|
|
75
|
+
end
|
|
76
|
+
options.on("--[no-]interleave", "Interleave the pages from the input files (default: " \
|
|
77
|
+
"false)") do |c|
|
|
78
|
+
@interleave = c
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
options.separator("")
|
|
82
|
+
options.separator("Output related options")
|
|
83
|
+
define_optimization_options
|
|
84
|
+
define_encryption_options
|
|
85
|
+
|
|
86
|
+
@files = []
|
|
87
|
+
@initial_empty = false
|
|
88
|
+
@interleave = false
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def execute #:nodoc:
|
|
92
|
+
if !@initial_empty && @files.empty?
|
|
93
|
+
error = OptionParser::ParseError.new("At least one FILE or --empty is needed")
|
|
94
|
+
error.reason = "Missing argument"
|
|
95
|
+
raise error
|
|
96
|
+
elsif (@initial_empty && @files.empty?) || (!@initial_empty && @files.length < 2)
|
|
97
|
+
error = OptionParser::ParseError.new("Output file is needed")
|
|
98
|
+
error.reason = "Missing argument"
|
|
99
|
+
raise error
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
output_file = @files.pop.file
|
|
103
|
+
maybe_raise_on_existing_file(output_file)
|
|
104
|
+
|
|
105
|
+
# Create PDF documents for each input file
|
|
106
|
+
cache = {}
|
|
107
|
+
@files.each do |spec|
|
|
108
|
+
cache[spec.file] ||= HexaPDF::Document.new(io: File.open(spec.file),
|
|
109
|
+
decryption_opts: {password: spec.password})
|
|
110
|
+
spec.file = cache[spec.file]
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Assemble pages
|
|
114
|
+
target = (@initial_empty ? HexaPDF::Document.new : @files.first.file)
|
|
115
|
+
page_tree = target.add(Type: :Pages)
|
|
116
|
+
import_pages(page_tree)
|
|
117
|
+
target.catalog[:Pages] = page_tree
|
|
118
|
+
|
|
119
|
+
# Remove potentially imported but unused pages and page tree nodes
|
|
120
|
+
retained = target.pages.each_with_object({}) {|page, h| h[page.data] = true}
|
|
121
|
+
retained[target.pages.root.data] = true
|
|
122
|
+
target.each(current: false) do |obj|
|
|
123
|
+
next unless obj.kind_of?(HexaPDF::Dictionary)
|
|
124
|
+
if (obj.type == :Pages || obj.type == :Page) && !retained.key?(obj.data)
|
|
125
|
+
target.delete(obj)
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
apply_encryption_options(target)
|
|
130
|
+
apply_optimization_options(target)
|
|
131
|
+
|
|
132
|
+
target.write(output_file)
|
|
133
|
+
rescue HexaPDF::Error => e
|
|
134
|
+
$stderr.puts "Processing error : #{e.message}"
|
|
135
|
+
exit(1)
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def usage #:nodoc:
|
|
139
|
+
"Usage: #{command_parser.main_options.program_name} merge [options] {FILE | --empty} " \
|
|
140
|
+
"[FILE]... OUT_FILE"
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
private
|
|
144
|
+
|
|
145
|
+
# Imports the pages of the document as specified with the --pages option to the given page
|
|
146
|
+
# tree.
|
|
147
|
+
def import_pages(page_tree)
|
|
148
|
+
@files.each do |s|
|
|
149
|
+
page_list = s.file.pages.to_a
|
|
150
|
+
s.pages = parse_pages_specification(s.pages, s.file.pages.count)
|
|
151
|
+
s.pages.each do |arr|
|
|
152
|
+
arr[0] = page_list[arr[0]]
|
|
153
|
+
arr[1] = arr[0].value[:Rotate] || :none unless arr[1]
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
if @interleave
|
|
158
|
+
max_pages_per_file = 0
|
|
159
|
+
all = @files.each_with_index.map do |spec, findex|
|
|
160
|
+
list = []
|
|
161
|
+
spec.pages.each {|index, rotation| list << [spec.file, findex, index, rotation]}
|
|
162
|
+
max_pages_per_file = list.size if list.size > max_pages_per_file
|
|
163
|
+
list
|
|
164
|
+
end
|
|
165
|
+
first, *rest = *all
|
|
166
|
+
first[max_pages_per_file - 1] ||= nil
|
|
167
|
+
first.zip(*rest) do |slice|
|
|
168
|
+
slice.each do |source, findex, page, rotation|
|
|
169
|
+
next unless source
|
|
170
|
+
import_page(page_tree, findex, page, rotation)
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
else
|
|
174
|
+
@files.each_with_index do |s, findex|
|
|
175
|
+
s.pages.each {|page, rotation| import_page(page_tree, findex, page, rotation)}
|
|
176
|
+
end
|
|
177
|
+
end
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
# Import the page with the given +rotation+ into the page tree.
|
|
181
|
+
def import_page(page_tree, source_index, page, rotation)
|
|
182
|
+
if page_tree.document == page.document
|
|
183
|
+
page.value.update(page.copy_inherited_values)
|
|
184
|
+
page = page.deep_copy unless source_index == 0
|
|
185
|
+
else
|
|
186
|
+
page = page_tree.document.import(page).deep_copy
|
|
187
|
+
end
|
|
188
|
+
if rotation == :none
|
|
189
|
+
page.delete(:Rotate)
|
|
190
|
+
elsif rotation.kind_of?(Integer)
|
|
191
|
+
page[:Rotate] = ((page[:Rotate] || 0) + rotation) % 360
|
|
192
|
+
end
|
|
193
|
+
page_tree.document.add(page)
|
|
194
|
+
page_tree.add_page(page)
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
end
|
|
200
|
+
end
|