hexapdf 0.41.0 → 0.43.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +55 -0
- data/Rakefile +1 -1
- data/examples/031-acro_form_java_script.rb +36 -24
- data/lib/hexapdf/cli/command.rb +14 -11
- data/lib/hexapdf/cli/files.rb +31 -7
- data/lib/hexapdf/cli/form.rb +10 -31
- data/lib/hexapdf/cli/inspect.rb +1 -1
- data/lib/hexapdf/cli/usage.rb +215 -0
- data/lib/hexapdf/cli.rb +2 -0
- data/lib/hexapdf/configuration.rb +1 -1
- data/lib/hexapdf/dictionary.rb +3 -3
- data/lib/hexapdf/document.rb +14 -1
- data/lib/hexapdf/encryption.rb +17 -0
- data/lib/hexapdf/layout/box.rb +1 -0
- data/lib/hexapdf/layout/box_fitter.rb +3 -3
- data/lib/hexapdf/layout/column_box.rb +2 -2
- data/lib/hexapdf/layout/container_box.rb +1 -1
- data/lib/hexapdf/layout/line.rb +4 -0
- data/lib/hexapdf/layout/list_box.rb +2 -2
- data/lib/hexapdf/layout/table_box.rb +1 -1
- data/lib/hexapdf/layout/text_box.rb +16 -2
- data/lib/hexapdf/parser.rb +20 -17
- data/lib/hexapdf/type/acro_form/button_field.rb +7 -5
- data/lib/hexapdf/type/acro_form/form.rb +123 -27
- data/lib/hexapdf/type/acro_form/java_script_actions.rb +165 -14
- data/lib/hexapdf/type/acro_form/text_field.rb +13 -1
- data/lib/hexapdf/type/resources.rb +2 -1
- data/lib/hexapdf/utils.rb +19 -0
- data/lib/hexapdf/version.rb +1 -1
- data/test/hexapdf/layout/test_box_fitter.rb +3 -3
- data/test/hexapdf/layout/test_text_box.rb +27 -1
- data/test/hexapdf/test_dictionary.rb +6 -4
- data/test/hexapdf/test_parser.rb +12 -0
- data/test/hexapdf/test_utils.rb +16 -0
- data/test/hexapdf/type/acro_form/test_button_field.rb +5 -0
- data/test/hexapdf/type/acro_form/test_form.rb +110 -2
- data/test/hexapdf/type/acro_form/test_java_script_actions.rb +102 -1
- data/test/hexapdf/type/acro_form/test_text_field.rb +22 -4
- data/test/hexapdf/type/test_resources.rb +5 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 24f6839e903fd945678915625b1e2ef6a12221f29a82cf1c7a6d8bcea38af288
|
4
|
+
data.tar.gz: 2f8309e2ef2406dd279e00643bf7fbf73ded63a1076c0067684a356fea8ee89e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8c6ddd8ec6f19d39daa9a6422fdb3595d255b528fb93ca7907ef12dab0eca23c74de9fcfc27d02c15b3a9d3c24d47c7f7db6ea472f4c1ed34c2b7c06d4a8a351
|
7
|
+
data.tar.gz: 1d77c2da70d9048cbef6935afacde40fb6d4041414fce571a331a4bee33b0e3ee2ff59bb28eee02e515798c3c156a57f42f139356e946f64e878ea962756a1d8
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,58 @@
|
|
1
|
+
## 0.43.0 - 2024-05-26
|
2
|
+
|
3
|
+
### Added
|
4
|
+
|
5
|
+
* [HexaPDF::Type::AcroForm::Form#create_namespace_field] for creating a pure
|
6
|
+
namespace field
|
7
|
+
* [HexaPDF::Type::AcroForm::Form#delete_field] for deleting fields
|
8
|
+
|
9
|
+
### Changed
|
10
|
+
|
11
|
+
* Minimum Ruby version to be 3.0
|
12
|
+
* **Breaking change**: Renamed `HexaPDF::Layout::BoxFitter#fit_successful?` to
|
13
|
+
[HexaPDF::Layout::BoxFitter#success?]
|
14
|
+
* **Breaking Change**: Removed HexaPDF::Dictionary#to_h
|
15
|
+
* Form field creation methods of [HexaPDF::Type::AcroForm::Form] to
|
16
|
+
automatically create parent fields as namespace fields
|
17
|
+
|
18
|
+
### Fixed
|
19
|
+
|
20
|
+
* [HexaPDF::Layout::TextBox#fit] to correctly calculate width in case of flowing
|
21
|
+
text around other boxes
|
22
|
+
* [HexaPDF::Layout::TextBox#draw] to correctly draw border, background... on
|
23
|
+
boxes using position 'flow'
|
24
|
+
* Comparison of Hash with [HexaPDF::Dictionary] objects by implementing
|
25
|
+
`#to_hash`
|
26
|
+
* Parsing of invalid files having multiple end-of-file markers with the last one
|
27
|
+
being invalid
|
28
|
+
|
29
|
+
|
30
|
+
## 0.42.0 - 2024-05-12
|
31
|
+
|
32
|
+
### Added
|
33
|
+
|
34
|
+
* Support for the `AFPercent_Format` JavaScript method
|
35
|
+
* Support for the `AFTime_Format` JavaScript method
|
36
|
+
* [HexaPDF::Type::AcroForm::Form#fill] for easily filling out form fields
|
37
|
+
* CLI command `hexapdf usage` for showing space usage information
|
38
|
+
* Support for attaching files via `hexapdf files` CLI command
|
39
|
+
* Refinement on [HexaPDF::Utils] to support conversion of Numeric values to
|
40
|
+
points (e.g. `5.mm`, `5.cm`, `5.inch`)
|
41
|
+
|
42
|
+
### Changed
|
43
|
+
|
44
|
+
* [HexaPDF::Type::AcroForm::ButtonField#field_value=] to always allow using
|
45
|
+
`true` for check boxes
|
46
|
+
* CLI commands to prompt whether an existing output file should be overwritten
|
47
|
+
|
48
|
+
### Fixed
|
49
|
+
|
50
|
+
* [HexaPDF::Type::Resources#font] to always return a correctly wrapped font
|
51
|
+
object
|
52
|
+
* [HexaPDF::Type::AcroForm::TextField#field_value=] to actually use the value
|
53
|
+
returned by the call to the config option 'acro_form.on_invalid_value'
|
54
|
+
|
55
|
+
|
1
56
|
## 0.41.0 - 2024-05-05
|
2
57
|
|
3
58
|
### Added
|
data/Rakefile
CHANGED
@@ -47,7 +47,7 @@ namespace :dev do
|
|
47
47
|
end
|
48
48
|
|
49
49
|
task :test_all do
|
50
|
-
versions = `rbenv versions --bare | grep -i ^
|
50
|
+
versions = `rbenv versions --bare | grep -i ^3.`.split("\n")
|
51
51
|
versions.each do |version|
|
52
52
|
sh "eval \"$(rbenv init -)\"; rbenv shell #{version} && ruby -v && rake test"
|
53
53
|
end
|
@@ -31,70 +31,82 @@ tx.set_format_action(:number, decimals: 2, separator_style: :comma)
|
|
31
31
|
widget = tx.create_widget(page, Rect: [200, 615, 500, 635])
|
32
32
|
tx.field_value = "1234567.898"
|
33
33
|
|
34
|
-
canvas.text("
|
34
|
+
canvas.text("Percent format", at: [70, 590])
|
35
|
+
tx = form.create_text_field("Percent_Format", font_size: 16)
|
36
|
+
tx.set_format_action(:percent, decimals: 2, separator_style: :comma)
|
37
|
+
widget = tx.create_widget(page, Rect: [200, 585, 500, 605])
|
38
|
+
tx.field_value = "12,45678"
|
35
39
|
|
36
|
-
canvas.text("
|
37
|
-
|
40
|
+
canvas.text("Time format", at: [70, 560])
|
41
|
+
tx = form.create_text_field("Time_Format", font_size: 16)
|
42
|
+
tx.set_format_action(:time, format: :hh_mm_ss)
|
43
|
+
widget = tx.create_widget(page, Rect: [200, 555, 500, 575])
|
44
|
+
tx.field_value = "3:15:20 pm"
|
45
|
+
|
46
|
+
canvas.text("Calculate actions", at: [50, 510])
|
47
|
+
|
48
|
+
canvas.text("Source fields", at: [70, 480])
|
49
|
+
canvas.text("a:", at: [200, 480])
|
38
50
|
tx = form.create_text_field("a", font_size: 16)
|
39
51
|
tx.set_format_action(:number, decimals: 2)
|
40
|
-
widget = tx.create_widget(page, Rect: [220,
|
52
|
+
widget = tx.create_widget(page, Rect: [220, 475, 280, 495])
|
41
53
|
tx.field_value = "10,50"
|
42
|
-
canvas.text("b:", at: [310,
|
54
|
+
canvas.text("b:", at: [310, 480])
|
43
55
|
tx = form.create_text_field("b", font_size: 16)
|
44
56
|
tx.set_format_action(:number, decimals: 2)
|
45
|
-
widget = tx.create_widget(page, Rect: [330,
|
57
|
+
widget = tx.create_widget(page, Rect: [330, 475, 390, 495])
|
46
58
|
tx.field_value = "20,60"
|
47
|
-
canvas.text("c:", at: [420,
|
59
|
+
canvas.text("c:", at: [420, 480])
|
48
60
|
tx = form.create_text_field("c", font_size: 16)
|
49
61
|
tx.set_format_action(:number, decimals: 2)
|
50
|
-
widget = tx.create_widget(page, Rect: [440,
|
62
|
+
widget = tx.create_widget(page, Rect: [440, 475, 500, 495])
|
51
63
|
tx.field_value = "30,70"
|
52
64
|
|
53
|
-
canvas.text("Predefined", at: [70,
|
54
|
-
canvas.text("Sum", at: [90,
|
65
|
+
canvas.text("Predefined", at: [70, 450])
|
66
|
+
canvas.text("Sum", at: [90, 420])
|
55
67
|
tx = form.create_text_field("sum", font_size: 16)
|
56
68
|
tx.set_format_action(:number, decimals: 2)
|
57
69
|
tx.set_calculate_action(:sum, fields: ['a', 'b', 'c'])
|
58
70
|
tx.flag(:read_only)
|
59
|
-
widget = tx.create_widget(page, Rect: [310,
|
60
|
-
canvas.text("Average", at: [90,
|
71
|
+
widget = tx.create_widget(page, Rect: [310, 415, 500, 435])
|
72
|
+
canvas.text("Average", at: [90, 390])
|
61
73
|
tx = form.create_text_field("average", font_size: 16)
|
62
74
|
tx.set_format_action(:number, decimals: 2)
|
63
75
|
tx.set_calculate_action(:average, fields: ['a', 'b', 'c'])
|
64
76
|
tx.flag(:read_only)
|
65
|
-
widget = tx.create_widget(page, Rect: [310,
|
66
|
-
canvas.text("Product", at: [90,
|
77
|
+
widget = tx.create_widget(page, Rect: [310, 385, 500, 405])
|
78
|
+
canvas.text("Product", at: [90, 360])
|
67
79
|
tx = form.create_text_field("product", font_size: 16)
|
68
80
|
tx.set_format_action(:number, decimals: 2)
|
69
81
|
tx.set_calculate_action(:product, fields: ['a', 'b', 'c'])
|
70
82
|
tx.flag(:read_only)
|
71
|
-
widget = tx.create_widget(page, Rect: [310,
|
72
|
-
canvas.text("Minimum", at: [90,
|
83
|
+
widget = tx.create_widget(page, Rect: [310, 355, 500, 375])
|
84
|
+
canvas.text("Minimum", at: [90, 330])
|
73
85
|
tx = form.create_text_field("min", font_size: 16)
|
74
86
|
tx.set_format_action(:number, decimals: 2)
|
75
87
|
tx.set_calculate_action(:min, fields: ['a', 'b', 'c'])
|
76
88
|
tx.flag(:read_only)
|
77
|
-
widget = tx.create_widget(page, Rect: [310,
|
78
|
-
canvas.text("Maximum", at: [90,
|
89
|
+
widget = tx.create_widget(page, Rect: [310, 325, 500, 345])
|
90
|
+
canvas.text("Maximum", at: [90, 300])
|
79
91
|
tx = form.create_text_field("max", font_size: 16)
|
80
92
|
tx.set_format_action(:number, decimals: 2)
|
81
93
|
tx.set_calculate_action(:max, fields: ['a', 'b', 'c'])
|
82
94
|
tx.flag(:read_only)
|
83
|
-
widget = tx.create_widget(page, Rect: [310,
|
95
|
+
widget = tx.create_widget(page, Rect: [310, 295, 500, 315])
|
84
96
|
|
85
|
-
canvas.text("Simplified Field Notation", at: [70,
|
86
|
-
canvas.text("a + b + c", at: [90,
|
97
|
+
canvas.text("Simplified Field Notation", at: [70, 270])
|
98
|
+
canvas.text("a + b + c", at: [90, 240])
|
87
99
|
tx = form.create_text_field("sfn1", font_size: 16)
|
88
100
|
tx.set_format_action(:number, decimals: 2)
|
89
101
|
tx.set_calculate_action(:sfn, fields: "a + b + c")
|
90
102
|
tx.flag(:read_only)
|
91
|
-
widget = tx.create_widget(page, Rect: [310,
|
92
|
-
canvas.text("(a + b)*(c - a) / b + 3.14", at: [90,
|
103
|
+
widget = tx.create_widget(page, Rect: [310, 235, 500, 255])
|
104
|
+
canvas.text("(a + b)*(c - a) / b + 3.14", at: [90, 210])
|
93
105
|
tx = form.create_text_field("sfn2", font_size: 16)
|
94
106
|
tx.set_format_action(:number, decimals: 2)
|
95
107
|
tx.set_calculate_action(:sfn, fields: "(a + b)*(c - a) / b + 3.14")
|
96
108
|
tx.flag(:read_only)
|
97
|
-
widget = tx.create_widget(page, Rect: [310,
|
109
|
+
widget = tx.create_widget(page, Rect: [310, 205, 500, 225])
|
98
110
|
|
99
111
|
form.recalculate_fields
|
100
112
|
|
data/lib/hexapdf/cli/command.rb
CHANGED
@@ -53,7 +53,7 @@ module HexaPDF
|
|
53
53
|
module Extensions #:nodoc:
|
54
54
|
def help_banner #:nodoc:
|
55
55
|
"hexapdf #{HexaPDF::VERSION} - Versatile PDF Manipulation Tool\n" \
|
56
|
-
"Copyright (c) 2014-
|
56
|
+
"Copyright (c) 2014-2024 Thomas Leitner; licensed under the AGPLv3\n\n" \
|
57
57
|
"#{format(usage, indent: 7)}\n\n"
|
58
58
|
end
|
59
59
|
|
@@ -167,12 +167,12 @@ module HexaPDF
|
|
167
167
|
end
|
168
168
|
end
|
169
169
|
|
170
|
-
# Checks whether the given output file exists and
|
171
|
-
# HexaPDF::CLI#force is
|
170
|
+
# Checks whether the given output file exists and ask whether to overwrite the output file if
|
171
|
+
# it does. If HexaPDF::CLI#force is set, a possibly existing output file is always overwritten.
|
172
172
|
def maybe_raise_on_existing_file(filename)
|
173
173
|
if !command_parser.force && File.exist?(filename)
|
174
|
-
|
175
|
-
|
174
|
+
response = read_from_console("Output file '#{filename}' already exists - overwrite? (y/n)")
|
175
|
+
exit(1) unless response =~ /y/i
|
176
176
|
end
|
177
177
|
end
|
178
178
|
|
@@ -377,9 +377,9 @@ module HexaPDF
|
|
377
377
|
# console.
|
378
378
|
def read_password(prompt = "Password")
|
379
379
|
if $stdin.tty?
|
380
|
-
read_from_console(prompt)
|
380
|
+
read_from_console(prompt, noecho: true)
|
381
381
|
else
|
382
|
-
($stdin.gets || read_from_console(prompt)).chomp
|
382
|
+
($stdin.gets || read_from_console(prompt, noecho: true)).chomp
|
383
383
|
end
|
384
384
|
end
|
385
385
|
|
@@ -407,11 +407,14 @@ module HexaPDF
|
|
407
407
|
private
|
408
408
|
|
409
409
|
# Displays the given prompt, reads from the console without echo and returns the read string.
|
410
|
-
def read_from_console(prompt)
|
410
|
+
def read_from_console(prompt, noecho: false)
|
411
411
|
IO.console.write("#{prompt}: ")
|
412
|
-
|
413
|
-
|
414
|
-
|
412
|
+
if noecho
|
413
|
+
IO.console.noecho {|io| io.gets.chomp }
|
414
|
+
puts
|
415
|
+
else
|
416
|
+
IO.console.gets.chomp
|
417
|
+
end
|
415
418
|
end
|
416
419
|
|
417
420
|
end
|
data/lib/hexapdf/cli/files.rb
CHANGED
@@ -39,19 +39,29 @@ require 'hexapdf/cli/command'
|
|
39
39
|
module HexaPDF
|
40
40
|
module CLI
|
41
41
|
|
42
|
-
# Lists or extracts embedded files from a PDF file.
|
42
|
+
# Lists or extracts embedded files from a PDF file or attaches them.
|
43
43
|
#
|
44
44
|
# See: HexaPDF::Type::EmbeddedFile
|
45
45
|
class Files < Command
|
46
46
|
|
47
47
|
def initialize #:nodoc:
|
48
48
|
super('files', takes_commands: false)
|
49
|
-
short_desc("List
|
49
|
+
short_desc("List and extract embedded files from a PDF or attach files")
|
50
50
|
long_desc(<<~EOF)
|
51
|
-
If the option --extract is
|
52
|
-
indices. The --extract option can then be
|
51
|
+
If neither the option --attach nor the option --extract is given, the available
|
52
|
+
files are listed with their names and indices. The --extract option can then be
|
53
|
+
used to extract one or more files. Or the --attach option can be used to attach
|
54
|
+
files to the PDF.
|
53
55
|
EOF
|
54
56
|
|
57
|
+
options.on("--attach FILE", "-a FILE", String,
|
58
|
+
"The file that should be attached. Can be used multiple times.") do |file|
|
59
|
+
@attach_files << [file, nil]
|
60
|
+
end
|
61
|
+
options.on("--description DESC", "-d DESC", String,
|
62
|
+
"Adds a description to the last file to be attached.") do |description|
|
63
|
+
@attach_files[-1][1] = description
|
64
|
+
end
|
55
65
|
options.on("--extract [a,b,c,...]", "-e [a,b,c,...]", Array,
|
56
66
|
"The indices of the files that should be extracted. Use 0 or no argument to " \
|
57
67
|
"extract all files.") do |indices|
|
@@ -66,15 +76,24 @@ module HexaPDF
|
|
66
76
|
@password = (pwd == '-' ? read_password : pwd)
|
67
77
|
end
|
68
78
|
|
79
|
+
@attach_files = []
|
69
80
|
@indices = []
|
70
81
|
@password = nil
|
71
82
|
@search = false
|
72
83
|
end
|
73
84
|
|
74
|
-
def execute(pdf) #:nodoc:
|
75
|
-
|
76
|
-
|
85
|
+
def execute(pdf, output = nil) #:nodoc:
|
86
|
+
if @indices.empty? && !@attach_files.empty?
|
87
|
+
raise Error, "Missing output file" unless output
|
88
|
+
maybe_raise_on_existing_file(output)
|
89
|
+
end
|
90
|
+
with_document(pdf, password: @password, out_file: output) do |doc|
|
91
|
+
if @indices.empty? && @attach_files.empty?
|
77
92
|
list_files(doc)
|
93
|
+
elsif !@indices.empty? && !@attach_files.empty?
|
94
|
+
raise Error, "Use either --attach or --extract but not both"
|
95
|
+
elsif !@attach_files.empty?
|
96
|
+
attach_files(doc)
|
78
97
|
else
|
79
98
|
extract_files(doc)
|
80
99
|
end
|
@@ -116,6 +135,11 @@ module HexaPDF
|
|
116
135
|
end
|
117
136
|
end
|
118
137
|
|
138
|
+
# Attaches the files given on the CLI to the document.
|
139
|
+
def attach_files(doc)
|
140
|
+
@attach_files.each {|file, desc| doc.files.add(file, description: desc) }
|
141
|
+
end
|
142
|
+
|
119
143
|
# Iterates over all embedded files.
|
120
144
|
def each_file(doc, &block) # :yields: obj, index
|
121
145
|
doc.files.each(search: @search).select(&:embedded_file?).each_with_index(&block)
|
data/lib/hexapdf/cli/form.rb
CHANGED
@@ -195,6 +195,7 @@ module HexaPDF
|
|
195
195
|
# Fills out the form by interactively asking the user for field values.
|
196
196
|
def fill_form(doc)
|
197
197
|
current_page_index = -1
|
198
|
+
form = doc.acro_form
|
198
199
|
each_field(doc) do |_page, page_index, field, _widget|
|
199
200
|
next if field.flagged?(:read_only) && !@fill_read_only_fields
|
200
201
|
if current_page_index != page_index
|
@@ -224,9 +225,9 @@ module HexaPDF
|
|
224
225
|
print " └─ New value: "
|
225
226
|
value = $stdin.readline.chomp
|
226
227
|
next if value.empty?
|
227
|
-
|
228
|
+
form.fill(field.full_field_name => value)
|
228
229
|
rescue HexaPDF::Error => e
|
229
|
-
puts " ⚠
|
230
|
+
puts " ⚠ Error while setting '#{field.full_field_name}': #{e.message}"
|
230
231
|
retry
|
231
232
|
end
|
232
233
|
end
|
@@ -234,18 +235,20 @@ module HexaPDF
|
|
234
235
|
|
235
236
|
# Fills out the form using the data from the provided template file.
|
236
237
|
def fill_form_with_template(doc)
|
237
|
-
data = parse_template
|
238
238
|
form = doc.acro_form
|
239
|
-
data
|
239
|
+
data = parse_template
|
240
|
+
data.reject! do |name, _value|
|
240
241
|
field = form.field_by_name(name)
|
241
242
|
raise Error, "Field '#{name}' not found in input PDF" unless field
|
242
243
|
if field.flagged?(:read_only) && !@fill_read_only_fields
|
243
244
|
puts "Ignoring field '#{name}' because it is read only and --fill-read-only-fields " \
|
244
|
-
"is
|
245
|
-
|
245
|
+
"is not set"
|
246
|
+
true
|
247
|
+
else
|
248
|
+
false
|
246
249
|
end
|
247
|
-
apply_field_value(field, value)
|
248
250
|
end
|
251
|
+
form.fill(data)
|
249
252
|
end
|
250
253
|
|
251
254
|
# Parses the data from the given template file.
|
@@ -273,30 +276,6 @@ module HexaPDF
|
|
273
276
|
data
|
274
277
|
end
|
275
278
|
|
276
|
-
# Applies the given value to the field.
|
277
|
-
def apply_field_value(field, value)
|
278
|
-
case field.concrete_field_type
|
279
|
-
when :single_line_text_field, :multiline_text_field, :comb_text_field, :file_select_field,
|
280
|
-
:combo_box, :list_box, :editable_combo_box
|
281
|
-
field.field_value = value
|
282
|
-
when :check_box
|
283
|
-
field.field_value = case value
|
284
|
-
when /y(es)?|t(rue)?/
|
285
|
-
true
|
286
|
-
when /n(o)?|f(alse)?/
|
287
|
-
false
|
288
|
-
else
|
289
|
-
value
|
290
|
-
end
|
291
|
-
when :radio_button
|
292
|
-
field.field_value = value.to_sym
|
293
|
-
else
|
294
|
-
raise Error, "Field type #{field.concrete_field_type} not yet supported"
|
295
|
-
end
|
296
|
-
rescue StandardError
|
297
|
-
raise Error, "Error while setting '#{field.full_field_name}': #{$!.message}"
|
298
|
-
end
|
299
|
-
|
300
279
|
# Iterates over all non-push button fields in page order. If a field appears on multiple
|
301
280
|
# pages, it is only yielded on the first page if +with_seen+ is +false.
|
302
281
|
def each_field(doc, with_seen: false) # :yields: page, page_index, field
|
data/lib/hexapdf/cli/inspect.rb
CHANGED
@@ -270,7 +270,7 @@ module HexaPDF
|
|
270
270
|
if (rev_index = data.shift)
|
271
271
|
rev_index = rev_index.to_i - 1
|
272
272
|
if rev_index < 0 || rev_index >= @doc.revisions.count
|
273
|
-
$stderr.puts("Error: Invalid revision
|
273
|
+
$stderr.puts("Error: Invalid revision number specified")
|
274
274
|
next
|
275
275
|
end
|
276
276
|
length = 0
|
@@ -0,0 +1,215 @@
|
|
1
|
+
# -*- encoding: utf-8; frozen_string_literal: true -*-
|
2
|
+
#
|
3
|
+
#--
|
4
|
+
# This file is part of HexaPDF.
|
5
|
+
#
|
6
|
+
# HexaPDF - A Versatile PDF Creation and Manipulation Library For Ruby
|
7
|
+
# Copyright (C) 2014-2024 Thomas Leitner
|
8
|
+
#
|
9
|
+
# HexaPDF is free software: you can redistribute it and/or modify it
|
10
|
+
# under the terms of the GNU Affero General Public License version 3 as
|
11
|
+
# published by the Free Software Foundation with the addition of the
|
12
|
+
# following permission added to Section 15 as permitted in Section 7(a):
|
13
|
+
# FOR ANY PART OF THE COVERED WORK IN WHICH THE COPYRIGHT IS OWNED BY
|
14
|
+
# THOMAS LEITNER, THOMAS LEITNER DISCLAIMS THE WARRANTY OF NON
|
15
|
+
# INFRINGEMENT OF THIRD PARTY RIGHTS.
|
16
|
+
#
|
17
|
+
# HexaPDF is distributed in the hope that it will be useful, but WITHOUT
|
18
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
|
20
|
+
# License for more details.
|
21
|
+
#
|
22
|
+
# You should have received a copy of the GNU Affero General Public License
|
23
|
+
# along with HexaPDF. If not, see <http://www.gnu.org/licenses/>.
|
24
|
+
#
|
25
|
+
# The interactive user interfaces in modified source and object code
|
26
|
+
# versions of HexaPDF must display Appropriate Legal Notices, as required
|
27
|
+
# under Section 5 of the GNU Affero General Public License version 3.
|
28
|
+
#
|
29
|
+
# In accordance with Section 7(b) of the GNU Affero General Public
|
30
|
+
# License, a covered work must retain the producer line in every PDF that
|
31
|
+
# is created or manipulated using HexaPDF.
|
32
|
+
#
|
33
|
+
# If the GNU Affero General Public License doesn't fit your need,
|
34
|
+
# commercial licenses are available at <https://gettalong.at/hexapdf/>.
|
35
|
+
#++
|
36
|
+
|
37
|
+
require 'hexapdf/cli/command'
|
38
|
+
|
39
|
+
module HexaPDF
|
40
|
+
module CLI
|
41
|
+
|
42
|
+
# Shows the space usage of various parts of a PDF file.
|
43
|
+
class Usage < Command
|
44
|
+
|
45
|
+
# Modifies the HexaPDF::PDFData class to store the size information
|
46
|
+
module PDFDataExtension
|
47
|
+
|
48
|
+
# Used to store the size of the indirect object.
|
49
|
+
attr_accessor :size
|
50
|
+
|
51
|
+
# Used to store the size of the object inside the object stream.
|
52
|
+
attr_accessor :size_in_object_stream
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
# Modifies HexaPDF::Parser to retrieve space used by indirect objects.
|
57
|
+
module ParserExtension
|
58
|
+
|
59
|
+
# :nodoc:
|
60
|
+
def initialize(*)
|
61
|
+
super
|
62
|
+
@last_size = nil
|
63
|
+
end
|
64
|
+
|
65
|
+
# :nodoc:
|
66
|
+
def load_object(xref_entry)
|
67
|
+
super.tap do |obj|
|
68
|
+
if xref_entry.type == :compressed
|
69
|
+
obj.data.size_in_object_stream = @last_size
|
70
|
+
elsif xref_entry.type == :in_use
|
71
|
+
obj.data.size = @last_size
|
72
|
+
end
|
73
|
+
@last_size = nil
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
# :nodoc:
|
78
|
+
def parse_indirect_object(offset = nil)
|
79
|
+
real_offset = (offset ? @header_offset + offset : @tokenizer.pos)
|
80
|
+
result = super
|
81
|
+
@last_size = @tokenizer.pos - real_offset
|
82
|
+
result
|
83
|
+
end
|
84
|
+
|
85
|
+
# :nodoc:
|
86
|
+
def load_compressed_object(xref_entry)
|
87
|
+
result = super
|
88
|
+
offsets = @object_stream_data[xref_entry.objstm].instance_variable_get(:@offsets)
|
89
|
+
@last_size = if xref_entry.pos == offsets.size - 1
|
90
|
+
@object_stream_data[xref_entry.objstm].instance_variable_get(:@tokenizer).
|
91
|
+
io.size - offsets[xref_entry.pos]
|
92
|
+
else
|
93
|
+
offsets[xref_entry.pos + 1] - offsets[xref_entry.pos]
|
94
|
+
end
|
95
|
+
result
|
96
|
+
end
|
97
|
+
|
98
|
+
end
|
99
|
+
|
100
|
+
def initialize #:nodoc:
|
101
|
+
super('usage', takes_commands: false)
|
102
|
+
short_desc("Show space usage of various parts of a PDF file")
|
103
|
+
long_desc(<<~EOF)
|
104
|
+
This command displays some usage statistics of the PDF file, i.e. which parts take which
|
105
|
+
approximate space in the file.
|
106
|
+
|
107
|
+
Each statistic line shows the space used followed by the number of indirect objects in
|
108
|
+
parentheses. If some of those objects are in object streams, that number is displayed
|
109
|
+
after a slash.
|
110
|
+
EOF
|
111
|
+
|
112
|
+
options.on("--password PASSWORD", "-p", String,
|
113
|
+
"The password for decryption. Use - for reading from standard input.") do |pwd|
|
114
|
+
@password = (pwd == '-' ? read_password : pwd)
|
115
|
+
end
|
116
|
+
|
117
|
+
@password = nil
|
118
|
+
end
|
119
|
+
|
120
|
+
def execute(file) #:nodoc:
|
121
|
+
HexaPDF::Parser.prepend(ParserExtension)
|
122
|
+
HexaPDF::PDFData.prepend(PDFDataExtension)
|
123
|
+
|
124
|
+
with_document(file, password: @password) do |doc|
|
125
|
+
# Prepare cache of outline items
|
126
|
+
outline_item_cache = {}
|
127
|
+
if doc.catalog.key?(:Outlines)
|
128
|
+
doc.outline.each_item {|item| outline_item_cache[item] = true }
|
129
|
+
outline_item_cache[doc.outline] = true
|
130
|
+
end
|
131
|
+
|
132
|
+
doc.revisions.each.with_index do |rev, index|
|
133
|
+
sum = count = 0
|
134
|
+
categories = {
|
135
|
+
Content: [],
|
136
|
+
Files: [],
|
137
|
+
Fonts: [],
|
138
|
+
Images: [],
|
139
|
+
Metadata: [],
|
140
|
+
ObjectStreams: [],
|
141
|
+
Outline: [],
|
142
|
+
XObjects: [],
|
143
|
+
}
|
144
|
+
puts if index > 0
|
145
|
+
puts "Usage information for revision #{index + 1}" if doc.revisions.count > 1
|
146
|
+
rev.each do |obj|
|
147
|
+
if command_parser.verbosity_info?
|
148
|
+
print "(#{obj.oid},#{obj.gen}): #{obj.data.size.to_i}"
|
149
|
+
print " (#{obj.data.size_in_object_stream})" if obj.data.size.nil?
|
150
|
+
puts
|
151
|
+
end
|
152
|
+
next unless obj.kind_of?(HexaPDF::Dictionary)
|
153
|
+
|
154
|
+
case obj.type
|
155
|
+
when :Page
|
156
|
+
Array(obj[:Contents]).each do |content|
|
157
|
+
categories[:Content] << content if object_in_rev?(content, rev)
|
158
|
+
end
|
159
|
+
when :Font
|
160
|
+
categories[:Fonts] << obj
|
161
|
+
when :FontDescriptor
|
162
|
+
categories[:Fonts] << obj
|
163
|
+
[:FontFile, :FontFile2, :FontFile3].each do |name|
|
164
|
+
categories[:Fonts] << obj[name] if object_in_rev?(obj[name], rev)
|
165
|
+
end
|
166
|
+
when :Metadata
|
167
|
+
categories[:Metadata] << obj
|
168
|
+
when :Filespec
|
169
|
+
categories[:Files] << obj
|
170
|
+
categories[:Files] << obj.embedded_file_stream if obj.embedded_file?
|
171
|
+
when :ObjStm
|
172
|
+
categories[:ObjectStreams] << obj
|
173
|
+
else
|
174
|
+
if obj[:Subtype] == :Image
|
175
|
+
categories[:Images] << obj
|
176
|
+
elsif obj[:Subtype] == :Form
|
177
|
+
categories[:XObjects] << obj
|
178
|
+
end
|
179
|
+
end
|
180
|
+
sum += obj.data.size if obj.data.size
|
181
|
+
count += 1
|
182
|
+
end
|
183
|
+
|
184
|
+
# Populate Outline category
|
185
|
+
outline_item_cache.reject! do |obj, _val|
|
186
|
+
object_in_rev?(obj, rev) && categories[:Outline] << obj
|
187
|
+
end
|
188
|
+
|
189
|
+
categories.each do |name, data|
|
190
|
+
next if data.empty?
|
191
|
+
object_stream_count = 0
|
192
|
+
category_sum = data.sum do |o|
|
193
|
+
object_stream_count += 1 unless o.data.size
|
194
|
+
o.data.size.to_i
|
195
|
+
end
|
196
|
+
object_stream_count = object_stream_count > 0 ? "/#{object_stream_count}" : ''
|
197
|
+
size = human_readable_file_size(category_sum)
|
198
|
+
puts "#{name.to_s.ljust(15)} #{size.rjust(8)} (#{data.count}#{object_stream_count})"
|
199
|
+
end
|
200
|
+
puts "#{'Total'.ljust(15)} #{human_readable_file_size(sum).rjust(8)} (#{count})"
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
private
|
206
|
+
|
207
|
+
# Returns +true+ if the +obj+ is in the given +rev+.
|
208
|
+
def object_in_rev?(obj, rev)
|
209
|
+
obj && rev.object(obj) == obj
|
210
|
+
end
|
211
|
+
|
212
|
+
end
|
213
|
+
|
214
|
+
end
|
215
|
+
end
|
data/lib/hexapdf/cli.rb
CHANGED
@@ -48,6 +48,7 @@ require 'hexapdf/cli/watermark'
|
|
48
48
|
require 'hexapdf/cli/image2pdf'
|
49
49
|
require 'hexapdf/cli/form'
|
50
50
|
require 'hexapdf/cli/fonts'
|
51
|
+
require 'hexapdf/cli/usage'
|
51
52
|
require 'hexapdf/version'
|
52
53
|
require 'hexapdf/document'
|
53
54
|
|
@@ -107,6 +108,7 @@ module HexaPDF
|
|
107
108
|
add_command(HexaPDF::CLI::Image2PDF.new)
|
108
109
|
add_command(HexaPDF::CLI::Form.new)
|
109
110
|
add_command(HexaPDF::CLI::Fonts.new)
|
111
|
+
add_command(HexaPDF::CLI::Usage.new)
|
110
112
|
add_command(CmdParse::HelpCommand.new)
|
111
113
|
version_command = CmdParse::VersionCommand.new(add_switches: false)
|
112
114
|
add_command(version_command)
|
@@ -481,7 +481,7 @@ module HexaPDF
|
|
481
481
|
'acro_form.fallback_font' => 'Helvetica',
|
482
482
|
'acro_form.on_invalid_value' => proc do |field, value|
|
483
483
|
raise HexaPDF::Error, "Invalid value #{value.inspect} for " \
|
484
|
-
"#{field.concrete_field_type} field #{field.full_field_name}"
|
484
|
+
"#{field.concrete_field_type} field named '#{field.full_field_name}'"
|
485
485
|
end,
|
486
486
|
'acro_form.text_field.default_width' => 100,
|
487
487
|
'debug' => false,
|
data/lib/hexapdf/dictionary.rb
CHANGED
@@ -228,9 +228,9 @@ module HexaPDF
|
|
228
228
|
value.empty?
|
229
229
|
end
|
230
230
|
|
231
|
-
# Returns a
|
232
|
-
def
|
233
|
-
value.
|
231
|
+
# Returns a hash containing the preprocessed values (like in #[]).
|
232
|
+
def to_hash
|
233
|
+
value.each_with_object({}) {|(k, _), h| h[k] = self[k] }
|
234
234
|
end
|
235
235
|
|
236
236
|
private
|