hexapdf 0.32.2 → 0.33.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +63 -1
- data/README.md +9 -0
- data/examples/002-graphics.rb +15 -17
- data/examples/003-arcs.rb +9 -9
- data/examples/009-text_layouter_alignment.rb +1 -1
- data/examples/010-text_layouter_inline_boxes.rb +2 -2
- data/examples/011-text_layouter_line_wrapping.rb +1 -1
- data/examples/012-text_layouter_styling.rb +7 -7
- data/examples/013-text_layouter_shapes.rb +1 -1
- data/examples/014-text_in_polygon.rb +1 -1
- data/examples/015-boxes.rb +8 -7
- data/examples/016-frame_automatic_box_placement.rb +2 -2
- data/examples/017-frame_text_flow.rb +2 -1
- data/examples/018-composer.rb +1 -1
- data/examples/020-column_box.rb +2 -1
- data/examples/025-table_box.rb +46 -0
- data/lib/hexapdf/cli/command.rb +5 -2
- data/lib/hexapdf/cli/form.rb +5 -5
- data/lib/hexapdf/cli/inspect.rb +3 -3
- data/lib/hexapdf/composer.rb +104 -52
- data/lib/hexapdf/configuration.rb +44 -39
- data/lib/hexapdf/content/canvas.rb +393 -267
- data/lib/hexapdf/content/color_space.rb +72 -25
- data/lib/hexapdf/content/graphic_object/arc.rb +57 -24
- data/lib/hexapdf/content/graphic_object/endpoint_arc.rb +66 -23
- data/lib/hexapdf/content/graphic_object/geom2d.rb +47 -6
- data/lib/hexapdf/content/graphic_object/solid_arc.rb +58 -36
- data/lib/hexapdf/content/graphic_object.rb +6 -7
- data/lib/hexapdf/content/graphics_state.rb +54 -45
- data/lib/hexapdf/content/operator.rb +52 -54
- data/lib/hexapdf/content/parser.rb +2 -2
- data/lib/hexapdf/content/processor.rb +15 -15
- data/lib/hexapdf/content/transformation_matrix.rb +1 -1
- data/lib/hexapdf/content.rb +5 -0
- data/lib/hexapdf/dictionary.rb +6 -5
- data/lib/hexapdf/dictionary_fields.rb +42 -14
- data/lib/hexapdf/digital_signature/cms_handler.rb +2 -2
- data/lib/hexapdf/digital_signature/handler.rb +1 -1
- data/lib/hexapdf/digital_signature/pkcs1_handler.rb +2 -3
- data/lib/hexapdf/digital_signature/signature.rb +6 -6
- data/lib/hexapdf/digital_signature/signatures.rb +13 -12
- data/lib/hexapdf/digital_signature/signing/default_handler.rb +14 -5
- data/lib/hexapdf/digital_signature/signing/signed_data_creator.rb +2 -4
- data/lib/hexapdf/digital_signature/signing/timestamp_handler.rb +4 -4
- data/lib/hexapdf/digital_signature/signing.rb +4 -0
- data/lib/hexapdf/digital_signature/verification_result.rb +2 -2
- data/lib/hexapdf/digital_signature.rb +7 -2
- data/lib/hexapdf/document/destinations.rb +12 -11
- data/lib/hexapdf/document/files.rb +1 -1
- data/lib/hexapdf/document/fonts.rb +1 -1
- data/lib/hexapdf/document/layout.rb +167 -39
- data/lib/hexapdf/document/pages.rb +3 -2
- data/lib/hexapdf/document.rb +89 -55
- data/lib/hexapdf/encryption/aes.rb +5 -5
- data/lib/hexapdf/encryption/arc4.rb +1 -1
- data/lib/hexapdf/encryption/fast_aes.rb +2 -2
- data/lib/hexapdf/encryption/fast_arc4.rb +1 -1
- data/lib/hexapdf/encryption/identity.rb +1 -1
- data/lib/hexapdf/encryption/ruby_aes.rb +1 -1
- data/lib/hexapdf/encryption/ruby_arc4.rb +1 -1
- data/lib/hexapdf/encryption/security_handler.rb +31 -24
- data/lib/hexapdf/encryption/standard_security_handler.rb +45 -36
- data/lib/hexapdf/encryption.rb +7 -2
- data/lib/hexapdf/error.rb +18 -0
- data/lib/hexapdf/filter/ascii85_decode.rb +1 -1
- data/lib/hexapdf/filter/ascii_hex_decode.rb +1 -1
- data/lib/hexapdf/filter/flate_decode.rb +1 -1
- data/lib/hexapdf/filter/lzw_decode.rb +1 -1
- data/lib/hexapdf/filter/pass_through.rb +1 -1
- data/lib/hexapdf/filter/predictor.rb +1 -1
- data/lib/hexapdf/filter/run_length_decode.rb +1 -1
- data/lib/hexapdf/filter.rb +55 -6
- data/lib/hexapdf/font/cmap/parser.rb +2 -2
- data/lib/hexapdf/font/cmap.rb +1 -1
- data/lib/hexapdf/font/encoding/difference_encoding.rb +1 -1
- data/lib/hexapdf/font/encoding/mac_expert_encoding.rb +1 -1
- data/lib/hexapdf/font/encoding/mac_roman_encoding.rb +2 -2
- data/lib/hexapdf/font/encoding/standard_encoding.rb +1 -1
- data/lib/hexapdf/font/encoding/symbol_encoding.rb +1 -1
- data/lib/hexapdf/font/encoding/win_ansi_encoding.rb +3 -3
- data/lib/hexapdf/font/encoding/zapf_dingbats_encoding.rb +1 -1
- data/lib/hexapdf/font/invalid_glyph.rb +3 -0
- data/lib/hexapdf/font/true_type_wrapper.rb +17 -4
- data/lib/hexapdf/font/type1_wrapper.rb +19 -4
- data/lib/hexapdf/font_loader/from_configuration.rb +5 -2
- data/lib/hexapdf/font_loader/from_file.rb +5 -5
- data/lib/hexapdf/font_loader/standard14.rb +3 -3
- data/lib/hexapdf/font_loader.rb +3 -0
- data/lib/hexapdf/image_loader/jpeg.rb +2 -2
- data/lib/hexapdf/image_loader/pdf.rb +1 -1
- data/lib/hexapdf/image_loader/png.rb +2 -2
- data/lib/hexapdf/image_loader.rb +1 -1
- data/lib/hexapdf/importer.rb +13 -0
- data/lib/hexapdf/layout/box.rb +9 -2
- data/lib/hexapdf/layout/box_fitter.rb +2 -2
- data/lib/hexapdf/layout/column_box.rb +18 -4
- data/lib/hexapdf/layout/frame.rb +30 -12
- data/lib/hexapdf/layout/image_box.rb +5 -0
- data/lib/hexapdf/layout/inline_box.rb +1 -0
- data/lib/hexapdf/layout/list_box.rb +17 -1
- data/lib/hexapdf/layout/page_style.rb +4 -4
- data/lib/hexapdf/layout/style.rb +18 -3
- data/lib/hexapdf/layout/table_box.rb +682 -0
- data/lib/hexapdf/layout/text_box.rb +5 -3
- data/lib/hexapdf/layout/text_fragment.rb +1 -1
- data/lib/hexapdf/layout/text_layouter.rb +12 -4
- data/lib/hexapdf/layout.rb +1 -0
- data/lib/hexapdf/name_tree_node.rb +1 -1
- data/lib/hexapdf/number_tree_node.rb +1 -1
- data/lib/hexapdf/object.rb +18 -7
- data/lib/hexapdf/parser.rb +7 -7
- data/lib/hexapdf/pdf_array.rb +1 -1
- data/lib/hexapdf/rectangle.rb +1 -1
- data/lib/hexapdf/reference.rb +1 -1
- data/lib/hexapdf/revision.rb +1 -1
- data/lib/hexapdf/revisions.rb +3 -3
- data/lib/hexapdf/serializer.rb +15 -15
- data/lib/hexapdf/stream.rb +4 -2
- data/lib/hexapdf/tokenizer.rb +14 -14
- data/lib/hexapdf/type/acro_form/appearance_generator.rb +22 -22
- data/lib/hexapdf/type/acro_form/button_field.rb +1 -1
- data/lib/hexapdf/type/acro_form/choice_field.rb +1 -1
- data/lib/hexapdf/type/acro_form/field.rb +2 -2
- data/lib/hexapdf/type/acro_form/form.rb +1 -1
- data/lib/hexapdf/type/acro_form/signature_field.rb +4 -4
- data/lib/hexapdf/type/acro_form/text_field.rb +1 -1
- data/lib/hexapdf/type/acro_form/variable_text_field.rb +1 -1
- data/lib/hexapdf/type/acro_form.rb +1 -1
- data/lib/hexapdf/type/action.rb +1 -1
- data/lib/hexapdf/type/actions/go_to.rb +1 -1
- data/lib/hexapdf/type/actions/go_to_r.rb +1 -1
- data/lib/hexapdf/type/actions/launch.rb +1 -1
- data/lib/hexapdf/type/actions/uri.rb +1 -1
- data/lib/hexapdf/type/actions.rb +1 -1
- data/lib/hexapdf/type/annotation.rb +3 -3
- data/lib/hexapdf/type/annotations/link.rb +1 -1
- data/lib/hexapdf/type/annotations/markup_annotation.rb +1 -1
- data/lib/hexapdf/type/annotations/text.rb +1 -1
- data/lib/hexapdf/type/annotations/widget.rb +2 -2
- data/lib/hexapdf/type/annotations.rb +1 -1
- data/lib/hexapdf/type/catalog.rb +1 -1
- data/lib/hexapdf/type/cid_font.rb +3 -3
- data/lib/hexapdf/type/embedded_file.rb +1 -1
- data/lib/hexapdf/type/file_specification.rb +2 -2
- data/lib/hexapdf/type/font_descriptor.rb +1 -1
- data/lib/hexapdf/type/font_simple.rb +2 -2
- data/lib/hexapdf/type/font_type0.rb +3 -3
- data/lib/hexapdf/type/font_type3.rb +1 -1
- data/lib/hexapdf/type/form.rb +1 -1
- data/lib/hexapdf/type/graphics_state_parameter.rb +1 -1
- data/lib/hexapdf/type/icon_fit.rb +1 -1
- data/lib/hexapdf/type/image.rb +1 -1
- data/lib/hexapdf/type/info.rb +1 -1
- data/lib/hexapdf/type/mark_information.rb +1 -1
- data/lib/hexapdf/type/names.rb +2 -2
- data/lib/hexapdf/type/object_stream.rb +2 -1
- data/lib/hexapdf/type/outline.rb +1 -1
- data/lib/hexapdf/type/outline_item.rb +1 -1
- data/lib/hexapdf/type/page.rb +19 -10
- data/lib/hexapdf/type/page_label.rb +1 -1
- data/lib/hexapdf/type/page_tree_node.rb +1 -1
- data/lib/hexapdf/type/resources.rb +1 -1
- data/lib/hexapdf/type/trailer.rb +2 -2
- data/lib/hexapdf/type/viewer_preferences.rb +1 -1
- data/lib/hexapdf/type/xref_stream.rb +2 -2
- data/lib/hexapdf/utils/pdf_doc_encoding.rb +1 -1
- data/lib/hexapdf/version.rb +1 -1
- data/lib/hexapdf/writer.rb +4 -4
- data/lib/hexapdf/xref_section.rb +2 -2
- data/test/hexapdf/content/graphic_object/test_endpoint_arc.rb +11 -1
- data/test/hexapdf/content/graphic_object/test_geom2d.rb +7 -0
- data/test/hexapdf/content/test_canvas.rb +0 -1
- data/test/hexapdf/digital_signature/test_signatures.rb +22 -0
- data/test/hexapdf/document/test_files.rb +2 -2
- data/test/hexapdf/document/test_layout.rb +98 -0
- data/test/hexapdf/encryption/test_security_handler.rb +12 -11
- data/test/hexapdf/encryption/test_standard_security_handler.rb +35 -23
- data/test/hexapdf/font/test_true_type_wrapper.rb +18 -1
- data/test/hexapdf/font/test_type1_wrapper.rb +15 -1
- data/test/hexapdf/layout/test_box.rb +1 -1
- data/test/hexapdf/layout/test_column_box.rb +65 -21
- data/test/hexapdf/layout/test_frame.rb +14 -14
- data/test/hexapdf/layout/test_image_box.rb +4 -0
- data/test/hexapdf/layout/test_inline_box.rb +5 -0
- data/test/hexapdf/layout/test_list_box.rb +40 -6
- data/test/hexapdf/layout/test_page_style.rb +3 -2
- data/test/hexapdf/layout/test_style.rb +50 -0
- data/test/hexapdf/layout/test_table_box.rb +722 -0
- data/test/hexapdf/layout/test_text_box.rb +18 -0
- data/test/hexapdf/layout/test_text_layouter.rb +4 -0
- data/test/hexapdf/test_dictionary_fields.rb +4 -1
- data/test/hexapdf/test_document.rb +1 -0
- data/test/hexapdf/test_filter.rb +8 -0
- data/test/hexapdf/test_importer.rb +9 -0
- data/test/hexapdf/test_object.rb +16 -5
- data/test/hexapdf/test_stream.rb +7 -0
- data/test/hexapdf/test_writer.rb +3 -3
- data/test/hexapdf/type/acro_form/test_appearance_generator.rb +13 -5
- data/test/hexapdf/type/acro_form/test_form.rb +4 -3
- data/test/hexapdf/type/test_page.rb +18 -4
- metadata +17 -8
|
@@ -74,13 +74,14 @@ module HexaPDF
|
|
|
74
74
|
|
|
75
75
|
@width = @height = 0
|
|
76
76
|
@result = if style.position == :flow
|
|
77
|
-
@tl.fit(@items, frame.width_specification, frame.shape.bbox.height
|
|
77
|
+
@tl.fit(@items, frame.width_specification, frame.shape.bbox.height,
|
|
78
|
+
apply_first_text_indent: !split_box?)
|
|
78
79
|
else
|
|
79
80
|
@width = reserved_width
|
|
80
81
|
@height = reserved_height
|
|
81
82
|
width = (@initial_width > 0 ? @initial_width : available_width) - @width
|
|
82
83
|
height = (@initial_height > 0 ? @initial_height : available_height) - @height
|
|
83
|
-
@tl.fit(@items, width, height)
|
|
84
|
+
@tl.fit(@items, width, height, apply_first_text_indent: !split_box?)
|
|
84
85
|
end
|
|
85
86
|
@width += if @initial_width > 0 || style.align == :center || style.align == :right
|
|
86
87
|
width
|
|
@@ -103,7 +104,8 @@ module HexaPDF
|
|
|
103
104
|
def split(available_width, available_height, frame)
|
|
104
105
|
fit(available_width, available_height, frame) unless @result
|
|
105
106
|
|
|
106
|
-
if style.position != :flow && (@width >
|
|
107
|
+
if style.position != :flow && (float_compare(@width, available_width) > 0 ||
|
|
108
|
+
float_compare(@height, available_height) > 0)
|
|
107
109
|
[nil, self]
|
|
108
110
|
elsif @result.remaining_items.empty?
|
|
109
111
|
[self]
|
|
@@ -259,7 +259,7 @@ module HexaPDF
|
|
|
259
259
|
# The width of the text fragment.
|
|
260
260
|
#
|
|
261
261
|
# It is the sum of the widths of its items and is calculated by using the algorithm presented
|
|
262
|
-
# in
|
|
262
|
+
# in PDF2.0 s9.4.4. By using kerning values as the first and/or last items, the text contained
|
|
263
263
|
# in the fragment may spill over the left and/or right boundary.
|
|
264
264
|
def width
|
|
265
265
|
@width ||= @items.sum {|item| style.scaled_item_width(item) }
|
|
@@ -51,7 +51,8 @@ module HexaPDF
|
|
|
51
51
|
# * Existing line breaking characters inside of TextFragment objects are respected when fitting
|
|
52
52
|
# text. If this is not wanted, they have to be removed beforehand.
|
|
53
53
|
#
|
|
54
|
-
# * The first line may be indented by setting Style#text_indent which may also
|
|
54
|
+
# * The first line of each paragraph may be indented by setting Style#text_indent which may also
|
|
55
|
+
# be negative.
|
|
55
56
|
#
|
|
56
57
|
# * Text can be fitted into arbitrarily shaped areas, even containing holes.
|
|
57
58
|
#
|
|
@@ -658,7 +659,7 @@ module HexaPDF
|
|
|
658
659
|
end
|
|
659
660
|
|
|
660
661
|
# :call-seq:
|
|
661
|
-
# text_layouter.fit(items, width, height) -> result
|
|
662
|
+
# text_layouter.fit(items, width, height, apply_first_text_indent: true) -> result
|
|
662
663
|
#
|
|
663
664
|
# Fits the items into the given area and returns a Result object with all the information.
|
|
664
665
|
#
|
|
@@ -693,7 +694,14 @@ module HexaPDF
|
|
|
693
694
|
# The text segmentation algorithm specified via #style is applied to the items in case they
|
|
694
695
|
# are not already in segmented form. This also means that Result#remaining_items always
|
|
695
696
|
# contains segmented items.
|
|
696
|
-
|
|
697
|
+
#
|
|
698
|
+
# Optional arguments:
|
|
699
|
+
#
|
|
700
|
+
# +apply_first_text_indent+::
|
|
701
|
+
# Specifies whether style.text_indent should be applied to the first line. This should be
|
|
702
|
+
# set to +false+ if the items start with a continuation of a paragraph instead of starting
|
|
703
|
+
# a new paragraph (e.g. after a page break).
|
|
704
|
+
def fit(items, width, height, apply_first_text_indent: true)
|
|
697
705
|
unless items.empty? || items[0].respond_to?(:type)
|
|
698
706
|
items = style.text_segmentation_algorithm.call(items)
|
|
699
707
|
end
|
|
@@ -704,7 +712,7 @@ module HexaPDF
|
|
|
704
712
|
rest = items
|
|
705
713
|
|
|
706
714
|
# processing state variables
|
|
707
|
-
indent = style.text_indent
|
|
715
|
+
indent = apply_first_text_indent ? style.text_indent : 0
|
|
708
716
|
line_fragments = []
|
|
709
717
|
line_height = 0
|
|
710
718
|
previous_line = nil
|
data/lib/hexapdf/layout.rb
CHANGED
|
@@ -44,7 +44,7 @@ module HexaPDF
|
|
|
44
44
|
# Number trees are similar to name trees but use integers as keys instead of strings. See
|
|
45
45
|
# HexaPDF::NameTreeNode for a more detailed explanation.
|
|
46
46
|
#
|
|
47
|
-
# See:
|
|
47
|
+
# See: PDF2.0 s7.9.7, HexaPDF::NameTreeNode
|
|
48
48
|
class NumberTreeNode < Dictionary
|
|
49
49
|
|
|
50
50
|
include Utils::SortedTreeNode
|
data/lib/hexapdf/object.rb
CHANGED
|
@@ -117,7 +117,7 @@ module HexaPDF
|
|
|
117
117
|
#
|
|
118
118
|
# See: HexaPDF::Dictionary, HexaPDF::Stream, HexaPDF::Reference, HexaPDF::Document
|
|
119
119
|
#
|
|
120
|
-
# See:
|
|
120
|
+
# See: PDF2.0 s7.3.10, s7.3.8
|
|
121
121
|
class Object
|
|
122
122
|
|
|
123
123
|
include Comparable
|
|
@@ -143,18 +143,27 @@ module HexaPDF
|
|
|
143
143
|
|
|
144
144
|
# Makes sure that the object itself as well as all nested values are direct objects.
|
|
145
145
|
#
|
|
146
|
+
# The +document+ argument needs to contain the Document instance to which +object+ belongs so
|
|
147
|
+
# that references can be correctly resolved.
|
|
148
|
+
#
|
|
146
149
|
# If an indirect object is found, it is turned into a direct object and the indirect object is
|
|
147
150
|
# deleted from the document.
|
|
148
|
-
def self.make_direct(object)
|
|
151
|
+
def self.make_direct(object, document)
|
|
149
152
|
if object.kind_of?(HexaPDF::Object) && object.indirect?
|
|
153
|
+
raise HexaPDF::Error, "Can't make a stream object a direct object" if object.data.stream
|
|
150
154
|
object_to_delete = object
|
|
151
155
|
object = object.value
|
|
152
156
|
object_to_delete.document.delete(object_to_delete)
|
|
153
157
|
end
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
+
case object
|
|
159
|
+
when HexaPDF::Object
|
|
160
|
+
object.data.value = make_direct(object.data.value, document)
|
|
161
|
+
when Hash
|
|
162
|
+
object.transform_values! {|val| make_direct(val, document) }
|
|
163
|
+
when Array
|
|
164
|
+
object.map! {|val| make_direct(val, document) }
|
|
165
|
+
when Reference
|
|
166
|
+
object = make_direct(document.object(object), document)
|
|
158
167
|
end
|
|
159
168
|
object
|
|
160
169
|
end
|
|
@@ -255,7 +264,7 @@ module HexaPDF
|
|
|
255
264
|
# type.
|
|
256
265
|
#
|
|
257
266
|
# However, the Type and Subtype fields can easily be used for this. Subclasses for PDF objects
|
|
258
|
-
# that don't have such fields may use a unique name that has to begin with XX (see
|
|
267
|
+
# that don't have such fields may use a unique name that has to begin with XX (see PDF2.0 sE.2)
|
|
259
268
|
# and therefore doesn't clash with names defined by the PDF specification.
|
|
260
269
|
#
|
|
261
270
|
# For basic objects this always returns +:Unknown+.
|
|
@@ -297,6 +306,8 @@ module HexaPDF
|
|
|
297
306
|
end
|
|
298
307
|
|
|
299
308
|
# Makes a deep copy of the source PDF object and resets the object identifier.
|
|
309
|
+
#
|
|
310
|
+
# Note that indirect references are *not* copied! If that is also needed, use Importer::copy.
|
|
300
311
|
def deep_copy
|
|
301
312
|
obj = dup
|
|
302
313
|
obj.instance_variable_set(:@data, @data.dup)
|
data/lib/hexapdf/parser.rb
CHANGED
|
@@ -41,11 +41,11 @@ require 'hexapdf/xref_section'
|
|
|
41
41
|
|
|
42
42
|
module HexaPDF
|
|
43
43
|
|
|
44
|
-
# Parses an IO stream according to
|
|
44
|
+
# Parses an IO stream according to PDF2.0 to get at the contained objects.
|
|
45
45
|
#
|
|
46
46
|
# This class also contains higher-level methods for getting indirect objects and revisions.
|
|
47
47
|
#
|
|
48
|
-
# See:
|
|
48
|
+
# See: PDF2.0 s7
|
|
49
49
|
class Parser
|
|
50
50
|
|
|
51
51
|
# The IO stream which is parsed.
|
|
@@ -125,7 +125,7 @@ module HexaPDF
|
|
|
125
125
|
#
|
|
126
126
|
# Returns an array containing [object, oid, gen, stream].
|
|
127
127
|
#
|
|
128
|
-
# See:
|
|
128
|
+
# See: PDF2.0 s7.3.10, s7.3.8
|
|
129
129
|
def parse_indirect_object(offset = nil)
|
|
130
130
|
@tokenizer.pos = offset + @header_offset if offset
|
|
131
131
|
oid = @tokenizer.next_token
|
|
@@ -267,7 +267,7 @@ module HexaPDF
|
|
|
267
267
|
#
|
|
268
268
|
# This method can only parse cross-reference sections, not cross-reference streams!
|
|
269
269
|
#
|
|
270
|
-
# See:
|
|
270
|
+
# See: PDF2.0 s7.5.4, s7.5.5; ADB1.7 sH.3-3.4.3
|
|
271
271
|
def parse_xref_section_and_trailer(offset)
|
|
272
272
|
@tokenizer.pos = offset + @header_offset
|
|
273
273
|
token = @tokenizer.next_token
|
|
@@ -346,7 +346,7 @@ module HexaPDF
|
|
|
346
346
|
#
|
|
347
347
|
# If strict parsing is disabled, the whole file is searched for the offset.
|
|
348
348
|
#
|
|
349
|
-
# See:
|
|
349
|
+
# See: PDF2.0 s7.5.5, ADB1.7 sH.3-3.4.4
|
|
350
350
|
def startxref_offset
|
|
351
351
|
return @startxref_offset if defined?(@startxref_offset)
|
|
352
352
|
|
|
@@ -397,7 +397,7 @@ module HexaPDF
|
|
|
397
397
|
|
|
398
398
|
# Returns the PDF version number that is stored in the file header.
|
|
399
399
|
#
|
|
400
|
-
# See:
|
|
400
|
+
# See: PDF2.0 s7.5.2
|
|
401
401
|
def file_header_version
|
|
402
402
|
unless @header_version
|
|
403
403
|
raise_malformed("PDF file header is missing or corrupt", pos: 0)
|
|
@@ -413,7 +413,7 @@ module HexaPDF
|
|
|
413
413
|
# restriction so that the header may appear in the first 1024 bytes. We follow the Adobe
|
|
414
414
|
# convention.
|
|
415
415
|
#
|
|
416
|
-
# See:
|
|
416
|
+
# See: PDF2.0 s7.5.2, ADB1.7 sH.3-3.4.1
|
|
417
417
|
def retrieve_pdf_header_offset_and_version
|
|
418
418
|
@io.seek(0)
|
|
419
419
|
@header_offset = (@io.read(1024) || '').index(/%PDF-(\d\.\d)/) || 0
|
data/lib/hexapdf/pdf_array.rb
CHANGED
data/lib/hexapdf/rectangle.rb
CHANGED
|
@@ -51,7 +51,7 @@ module HexaPDF
|
|
|
51
51
|
# where +left+ is the bottom left x-coordinate, +bottom+ is the bottom left y-coordinate, +right+
|
|
52
52
|
# is the top right x-coordinate and +top+ is the top right y-coordinate.
|
|
53
53
|
#
|
|
54
|
-
# See:
|
|
54
|
+
# See: PDF2.0 s7.9.5
|
|
55
55
|
class Rectangle < HexaPDF::PDFArray
|
|
56
56
|
|
|
57
57
|
# Returns the x-coordinate of the bottom-left corner.
|
data/lib/hexapdf/reference.rb
CHANGED
|
@@ -50,7 +50,7 @@ module HexaPDF
|
|
|
50
50
|
# keys. Furthermore the implementation is compatible to the one of Object, i.e. the hash of a
|
|
51
51
|
# Reference object is the same as the hash of an indirect Object.
|
|
52
52
|
#
|
|
53
|
-
# See:
|
|
53
|
+
# See: PDF2.0 s7.3.10, Object
|
|
54
54
|
class Reference
|
|
55
55
|
|
|
56
56
|
include Comparable
|
data/lib/hexapdf/revision.rb
CHANGED
data/lib/hexapdf/revisions.rb
CHANGED
|
@@ -55,7 +55,7 @@ module HexaPDF
|
|
|
55
55
|
# this should only be done if one is familiar with the inner workings of HexaPDF. Otherwise it is
|
|
56
56
|
# best to use the convenience methods of this class to create, access or delete indirect objects.
|
|
57
57
|
#
|
|
58
|
-
# See:
|
|
58
|
+
# See: PDF2.0 s7.5.6, HexaPDF::Revision
|
|
59
59
|
class Revisions
|
|
60
60
|
|
|
61
61
|
class << self
|
|
@@ -76,7 +76,7 @@ module HexaPDF
|
|
|
76
76
|
seen_xref_offsets = {}
|
|
77
77
|
|
|
78
78
|
while offset && !seen_xref_offsets.key?(offset)
|
|
79
|
-
#
|
|
79
|
+
# PDF2.0 s7.5.5 states that :Prev needs to be indirect, Adobe's reference 3.4.4 says it
|
|
80
80
|
# should be direct. Adobe's POV is followed here. Same with :XRefStm.
|
|
81
81
|
xref_section, trailer = parser.load_revision(offset)
|
|
82
82
|
seen_xref_offsets[offset] = true
|
|
@@ -167,7 +167,7 @@ module HexaPDF
|
|
|
167
167
|
# For references to unknown objects, +nil+ is returned but free objects are represented by a
|
|
168
168
|
# PDF Null object, not by +nil+!
|
|
169
169
|
#
|
|
170
|
-
# See:
|
|
170
|
+
# See: PDF2.0 s7.3.9
|
|
171
171
|
def object(ref)
|
|
172
172
|
i = @revisions.size - 1
|
|
173
173
|
while i >= 0
|
data/lib/hexapdf/serializer.rb
CHANGED
|
@@ -79,7 +79,7 @@ module HexaPDF
|
|
|
79
79
|
#
|
|
80
80
|
# If no serialization method for a specific class is found, the ancestors classes are tried.
|
|
81
81
|
#
|
|
82
|
-
# See:
|
|
82
|
+
# See: PDF2.0 s7.3
|
|
83
83
|
class Serializer
|
|
84
84
|
|
|
85
85
|
# The encrypter to use for encrypting strings and streams. If +nil+, strings and streams are not
|
|
@@ -163,21 +163,21 @@ module HexaPDF
|
|
|
163
163
|
|
|
164
164
|
# Serializes the +nil+ value.
|
|
165
165
|
#
|
|
166
|
-
# See:
|
|
166
|
+
# See: PDF2.0 s7.3.9
|
|
167
167
|
def serialize_nilclass(_obj)
|
|
168
168
|
"null"
|
|
169
169
|
end
|
|
170
170
|
|
|
171
171
|
# Serializes the +true+ value.
|
|
172
172
|
#
|
|
173
|
-
# See:
|
|
173
|
+
# See: PDF2.0 s7.3.2
|
|
174
174
|
def serialize_trueclass(_obj)
|
|
175
175
|
"true"
|
|
176
176
|
end
|
|
177
177
|
|
|
178
178
|
# Serializes the +false+ value.
|
|
179
179
|
#
|
|
180
|
-
# See:
|
|
180
|
+
# See: PDF2.0 s7.3.2
|
|
181
181
|
def serialize_falseclass(_obj)
|
|
182
182
|
"false"
|
|
183
183
|
end
|
|
@@ -187,21 +187,21 @@ module HexaPDF
|
|
|
187
187
|
# This method should be used for cases where it is known that the object is either an Integer
|
|
188
188
|
# or a Float.
|
|
189
189
|
#
|
|
190
|
-
# See:
|
|
190
|
+
# See: PDF2.0 s7.3.3
|
|
191
191
|
def serialize_numeric(obj)
|
|
192
192
|
obj.kind_of?(Integer) ? obj.to_s : serialize_float(obj)
|
|
193
193
|
end
|
|
194
194
|
|
|
195
195
|
# Serializes an Integer object.
|
|
196
196
|
#
|
|
197
|
-
# See:
|
|
197
|
+
# See: PDF2.0 s7.3.3
|
|
198
198
|
def serialize_integer(obj)
|
|
199
199
|
obj.to_s
|
|
200
200
|
end
|
|
201
201
|
|
|
202
202
|
# Serializes a Float object.
|
|
203
203
|
#
|
|
204
|
-
# See:
|
|
204
|
+
# See: PDF2.0 s7.3.3
|
|
205
205
|
def serialize_float(obj)
|
|
206
206
|
if -0.0001 < obj && obj < 0.0001 && obj != 0
|
|
207
207
|
sprintf("%.6f", obj)
|
|
@@ -215,7 +215,7 @@ module HexaPDF
|
|
|
215
215
|
# The regexp matches all characters that need to be escaped and the substs hash contains the
|
|
216
216
|
# mapping from these characters to their escaped form.
|
|
217
217
|
#
|
|
218
|
-
# See
|
|
218
|
+
# See PDF2.0 s7.3.5
|
|
219
219
|
NAME_SUBSTS = {} # :nodoc:
|
|
220
220
|
[0..32, 127..255, Tokenizer::DELIMITER.bytes, Tokenizer::WHITESPACE.bytes, [35]].each do |a|
|
|
221
221
|
a.each {|c| NAME_SUBSTS[c.chr] = "##{c.to_s(16).rjust(2, '0')}" }
|
|
@@ -225,7 +225,7 @@ module HexaPDF
|
|
|
225
225
|
|
|
226
226
|
# Serializes a Symbol object (i.e. a PDF name object).
|
|
227
227
|
#
|
|
228
|
-
# See:
|
|
228
|
+
# See: PDF2.0 s7.3.5
|
|
229
229
|
def serialize_symbol(obj)
|
|
230
230
|
NAME_CACHE[obj] ||=
|
|
231
231
|
begin
|
|
@@ -240,7 +240,7 @@ module HexaPDF
|
|
|
240
240
|
|
|
241
241
|
# Serializes an Array object.
|
|
242
242
|
#
|
|
243
|
-
# See:
|
|
243
|
+
# See: PDF2.0 s7.3.6
|
|
244
244
|
def serialize_array(obj)
|
|
245
245
|
str = +"["
|
|
246
246
|
index = 0
|
|
@@ -256,7 +256,7 @@ module HexaPDF
|
|
|
256
256
|
|
|
257
257
|
# Serializes a Hash object (i.e. a PDF dictionary object).
|
|
258
258
|
#
|
|
259
|
-
# See:
|
|
259
|
+
# See: PDF2.0 s7.3.7
|
|
260
260
|
def serialize_hash(obj)
|
|
261
261
|
str = +"<<"
|
|
262
262
|
obj.each do |k, v|
|
|
@@ -274,7 +274,7 @@ module HexaPDF
|
|
|
274
274
|
|
|
275
275
|
# Serializes a String object.
|
|
276
276
|
#
|
|
277
|
-
# See:
|
|
277
|
+
# See: PDF2.0 s7.3.4
|
|
278
278
|
def serialize_string(obj)
|
|
279
279
|
obj = if @encrypter && @object.kind_of?(HexaPDF::Object) && @object.indirect?
|
|
280
280
|
encrypter.encrypt_string(obj, @object)
|
|
@@ -294,7 +294,7 @@ module HexaPDF
|
|
|
294
294
|
# The ISO PDF specification differs in respect to the supported date format. When converting
|
|
295
295
|
# to a date string, a format suitable for both is output.
|
|
296
296
|
#
|
|
297
|
-
# See:
|
|
297
|
+
# See: PDF2.0 s7.9.4, ADB1.7 3.8.3
|
|
298
298
|
def serialize_time(obj)
|
|
299
299
|
zone = obj.strftime("%z'")
|
|
300
300
|
if zone == "+0000'"
|
|
@@ -330,14 +330,14 @@ module HexaPDF
|
|
|
330
330
|
end
|
|
331
331
|
end
|
|
332
332
|
|
|
333
|
-
# See:
|
|
333
|
+
# See: PDF2.0 s7.3.10
|
|
334
334
|
def serialize_hexapdf_reference(obj)
|
|
335
335
|
"#{obj.oid} #{obj.gen} R"
|
|
336
336
|
end
|
|
337
337
|
|
|
338
338
|
# Serializes the streams dictionary and its stream.
|
|
339
339
|
#
|
|
340
|
-
# See:
|
|
340
|
+
# See: PDF2.0 s7.3.8
|
|
341
341
|
def serialize_hexapdf_stream(obj)
|
|
342
342
|
if !obj.indirect?
|
|
343
343
|
raise HexaPDF::Error, "Can't serialize PDF stream without object identifier"
|
data/lib/hexapdf/stream.rb
CHANGED
|
@@ -88,7 +88,9 @@ module HexaPDF
|
|
|
88
88
|
|
|
89
89
|
# Returns a Fiber for getting at the data of the stream represented by this object.
|
|
90
90
|
def fiber(chunk_size = 0)
|
|
91
|
-
if @source.kind_of?(
|
|
91
|
+
if @source.kind_of?(FiberDoubleForString)
|
|
92
|
+
@source.dup
|
|
93
|
+
elsif @source.kind_of?(Proc)
|
|
92
94
|
FiberWithLength.new(@length, &@source)
|
|
93
95
|
elsif @source.kind_of?(String)
|
|
94
96
|
HexaPDF::Filter.source_from_file(@source, pos: @offset || 0, length: @length || -1,
|
|
@@ -134,7 +136,7 @@ module HexaPDF
|
|
|
134
136
|
#
|
|
135
137
|
# Note that support for external streams (/F, /FFilter, /FDecodeParms) is not yet implemented!
|
|
136
138
|
#
|
|
137
|
-
# See:
|
|
139
|
+
# See: PDF2.0 s7.3.8, Dictionary
|
|
138
140
|
class Stream < Dictionary
|
|
139
141
|
|
|
140
142
|
define_field :Length, type: Integer # not required, will be auto-filled when writing
|
data/lib/hexapdf/tokenizer.rb
CHANGED
|
@@ -42,7 +42,7 @@ module HexaPDF
|
|
|
42
42
|
|
|
43
43
|
# Tokenizes the content of an IO object following the PDF rules.
|
|
44
44
|
#
|
|
45
|
-
# See:
|
|
45
|
+
# See: PDF2.0 s7.2
|
|
46
46
|
class Tokenizer
|
|
47
47
|
|
|
48
48
|
# Represents a keyword in a PDF file.
|
|
@@ -61,12 +61,12 @@ module HexaPDF
|
|
|
61
61
|
|
|
62
62
|
# Characters defined as whitespace.
|
|
63
63
|
#
|
|
64
|
-
# See:
|
|
64
|
+
# See: PDF2.0 s7.2.2
|
|
65
65
|
WHITESPACE = " \n\r\0\t\f"
|
|
66
66
|
|
|
67
67
|
# Characters defined as delimiters.
|
|
68
68
|
#
|
|
69
|
-
# See:
|
|
69
|
+
# See: PDF2.0 s7.2.2
|
|
70
70
|
DELIMITER = "()<>{}/[]%"
|
|
71
71
|
|
|
72
72
|
WHITESPACE_MULTI_RE = /[#{WHITESPACE}]+/ # :nodoc:
|
|
@@ -171,7 +171,7 @@ module HexaPDF
|
|
|
171
171
|
# If the +allow_end_array_token+ argument is +true+, the ']' token is permitted to facilitate
|
|
172
172
|
# the use of this method during array parsing.
|
|
173
173
|
#
|
|
174
|
-
# See:
|
|
174
|
+
# See: PDF2.0 s7.3
|
|
175
175
|
def next_object(allow_end_array_token: false, allow_keyword: false)
|
|
176
176
|
token = next_token
|
|
177
177
|
|
|
@@ -231,7 +231,7 @@ module HexaPDF
|
|
|
231
231
|
# If a problem is detected, yields to caller where the argument +recoverable+ is truthy if the
|
|
232
232
|
# problem is recoverable.
|
|
233
233
|
#
|
|
234
|
-
# See:
|
|
234
|
+
# See: PDF2.0 7.5.4
|
|
235
235
|
def next_xref_entry #:yield: recoverable
|
|
236
236
|
prepare_string_scanner(20)
|
|
237
237
|
if !@ss.skip(/(\d{10}) (\d{5}) ([nf])(?: \r| \n|\r\n|(\r\r|\r|\n))/) || @ss[4]
|
|
@@ -242,7 +242,7 @@ module HexaPDF
|
|
|
242
242
|
|
|
243
243
|
# Skips all whitespace at the current position.
|
|
244
244
|
#
|
|
245
|
-
# See:
|
|
245
|
+
# See: PDF2.0 s7.2.2
|
|
246
246
|
def skip_whitespace
|
|
247
247
|
prepare_string_scanner
|
|
248
248
|
prepare_string_scanner while @ss.skip(WHITESPACE_MULTI_RE)
|
|
@@ -268,7 +268,7 @@ module HexaPDF
|
|
|
268
268
|
|
|
269
269
|
# Parses the keyword at the current position.
|
|
270
270
|
#
|
|
271
|
-
# See:
|
|
271
|
+
# See: PDF2.0 s7.2
|
|
272
272
|
def parse_keyword
|
|
273
273
|
str = scan_until(WHITESPACE_OR_DELIMITER_RE) || @ss.scan(/.*/)
|
|
274
274
|
TOKEN_CACHE[str]
|
|
@@ -278,12 +278,12 @@ module HexaPDF
|
|
|
278
278
|
|
|
279
279
|
# Parses the number (integer or real) at the current position.
|
|
280
280
|
#
|
|
281
|
-
# See:
|
|
281
|
+
# See: PDF2.0 s7.3.3
|
|
282
282
|
def parse_number
|
|
283
283
|
val = scan_until(WHITESPACE_OR_DELIMITER_RE) || @ss.scan(/.*/)
|
|
284
284
|
if val.match?(/\A[+-]?\d++(?!\.)\z/)
|
|
285
285
|
tmp = val.to_i
|
|
286
|
-
# Handle object references, see
|
|
286
|
+
# Handle object references, see PDF2.0 s7.3.10
|
|
287
287
|
prepare_string_scanner(10)
|
|
288
288
|
if @ss.scan(REFERENCE_RE)
|
|
289
289
|
tmp = if tmp > 0
|
|
@@ -315,7 +315,7 @@ module HexaPDF
|
|
|
315
315
|
|
|
316
316
|
# Parses the literal string at the current position.
|
|
317
317
|
#
|
|
318
|
-
# See:
|
|
318
|
+
# See: PDF2.0 s7.3.4.2
|
|
319
319
|
def parse_literal_string
|
|
320
320
|
@ss.pos += 1
|
|
321
321
|
str = "".b
|
|
@@ -358,7 +358,7 @@ module HexaPDF
|
|
|
358
358
|
|
|
359
359
|
# Parses the hex string at the current position.
|
|
360
360
|
#
|
|
361
|
-
# See:
|
|
361
|
+
# See: PDF2.0 s7.3.4.3
|
|
362
362
|
def parse_hex_string
|
|
363
363
|
@ss.pos += 1
|
|
364
364
|
data = scan_until(/(?=>)/)
|
|
@@ -373,7 +373,7 @@ module HexaPDF
|
|
|
373
373
|
|
|
374
374
|
# Parses the name at the current position.
|
|
375
375
|
#
|
|
376
|
-
# See:
|
|
376
|
+
# See: PDF2.0 s7.3.5
|
|
377
377
|
def parse_name
|
|
378
378
|
@ss.pos += 1
|
|
379
379
|
str = scan_until(WHITESPACE_OR_DELIMITER_RE) || @ss.scan(/.*/)
|
|
@@ -389,7 +389,7 @@ module HexaPDF
|
|
|
389
389
|
#
|
|
390
390
|
# It is assumed that the initial '[' has already been scanned.
|
|
391
391
|
#
|
|
392
|
-
# See:
|
|
392
|
+
# See: PDF2.0 s7.3.6
|
|
393
393
|
def parse_array
|
|
394
394
|
result = []
|
|
395
395
|
while true
|
|
@@ -408,7 +408,7 @@ module HexaPDF
|
|
|
408
408
|
#
|
|
409
409
|
# It is assumed that the initial '<<' has already been scanned.
|
|
410
410
|
#
|
|
411
|
-
# See:
|
|
411
|
+
# See: PDF2.0 s7.3.7
|
|
412
412
|
def parse_dictionary
|
|
413
413
|
result = {}
|
|
414
414
|
while true
|
|
@@ -61,7 +61,7 @@ module HexaPDF
|
|
|
61
61
|
# By subclassing and overriding the necessary methods it is possible to define custom
|
|
62
62
|
# appearances.
|
|
63
63
|
#
|
|
64
|
-
# See:
|
|
64
|
+
# See: PDF2.0 s12.5.5, s12.7
|
|
65
65
|
class AppearanceGenerator
|
|
66
66
|
|
|
67
67
|
# Creates a new instance for the given +widget+.
|
|
@@ -200,7 +200,7 @@ module HexaPDF
|
|
|
200
200
|
def create_text_appearances
|
|
201
201
|
default_resources = @document.acro_form.default_resources
|
|
202
202
|
font, font_size, font_color = retrieve_font_information(default_resources)
|
|
203
|
-
style = HexaPDF::Layout::Style.new(font: font, fill_color: font_color)
|
|
203
|
+
style = HexaPDF::Layout::Style.new(font: font, font_size: font_size, fill_color: font_color)
|
|
204
204
|
border_style = @widget.border_style
|
|
205
205
|
padding = [1, border_style.width].max
|
|
206
206
|
|
|
@@ -226,8 +226,6 @@ module HexaPDF
|
|
|
226
226
|
|
|
227
227
|
canvas = form.canvas
|
|
228
228
|
apply_background_and_border(border_style, canvas)
|
|
229
|
-
style.font_size = calculate_font_size(font, font_size, height, border_style)
|
|
230
|
-
style.clear_cache
|
|
231
229
|
|
|
232
230
|
canvas.marked_content_sequence(:Tx) do
|
|
233
231
|
if @field.field_value || @field.concrete_field_type == :list_box
|
|
@@ -362,6 +360,7 @@ module HexaPDF
|
|
|
362
360
|
def draw_single_line_text(canvas, width, height, style, padding)
|
|
363
361
|
value, text_color = apply_javascript_formatting(@field.field_value)
|
|
364
362
|
style.fill_color = text_color if text_color
|
|
363
|
+
calculate_and_apply_font_size(value, style, width, height, padding)
|
|
365
364
|
fragment = HexaPDF::Layout::TextFragment.create(value, style)
|
|
366
365
|
|
|
367
366
|
if @field.concrete_field_type == :comb_text_field
|
|
@@ -431,6 +430,11 @@ module HexaPDF
|
|
|
431
430
|
|
|
432
431
|
# Draws the visible option items of the list box in the widget's rectangle.
|
|
433
432
|
def draw_list_box(canvas, width, height, style, padding)
|
|
433
|
+
if style.font_size == 0
|
|
434
|
+
style.font_size = 12 # Seems to be Adobe's default
|
|
435
|
+
style.clear_cache
|
|
436
|
+
end
|
|
437
|
+
|
|
434
438
|
option_items = @field.option_items
|
|
435
439
|
top_index = @field.list_box_top_index
|
|
436
440
|
items = [Layout::TextFragment.create(option_items[top_index..-1].join("\n"), style)]
|
|
@@ -475,24 +479,20 @@ module HexaPDF
|
|
|
475
479
|
[font, font_size, font_color]
|
|
476
480
|
end
|
|
477
481
|
|
|
478
|
-
# Calculates the font size for text fields based on the font
|
|
479
|
-
# appearance string, the annotation rectangle's height and
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
end
|
|
493
|
-
else
|
|
494
|
-
font_size
|
|
495
|
-
end
|
|
482
|
+
# Calculates the font size for single line text fields using auto-sizing, based on the font
|
|
483
|
+
# and font size of the default appearance string, the annotation rectangle's height and
|
|
484
|
+
# width and the given padding. The font size is then applied to the provided style object.
|
|
485
|
+
def calculate_and_apply_font_size(value, style, width, height, padding)
|
|
486
|
+
return if style.font_size != 0
|
|
487
|
+
|
|
488
|
+
font = style.font
|
|
489
|
+
unit_font_size = (font.wrapped_font.bounding_box[3] - font.wrapped_font.bounding_box[1]) *
|
|
490
|
+
font.scaling_factor / 1000.0
|
|
491
|
+
# The constant factor was found empirically by checking what Adobe Reader etc. do
|
|
492
|
+
style.font_size = (height - 2 * padding) / unit_font_size * 0.85
|
|
493
|
+
fragment = HexaPDF::Layout::TextFragment.create(value, style)
|
|
494
|
+
style.font_size = [style.font_size, style.font_size * (width - 4 * padding) / fragment.width].min
|
|
495
|
+
style.clear_cache
|
|
496
496
|
end
|
|
497
497
|
|
|
498
498
|
# Handles Javascript formatting routines for single-line text fields.
|
|
@@ -81,7 +81,7 @@ module HexaPDF
|
|
|
81
81
|
# :radios_in_unison:: A group of radio buttons with the same value for the on state will turn
|
|
82
82
|
# on or off in unison.
|
|
83
83
|
#
|
|
84
|
-
# See:
|
|
84
|
+
# See: PDF2.0 s12.7.4.2
|
|
85
85
|
class ButtonField < Field
|
|
86
86
|
|
|
87
87
|
define_type :XXAcroFormField
|