hexapdf 0.27.0 → 0.28.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +59 -1
  3. data/examples/019-acro_form.rb +14 -3
  4. data/examples/023-images.rb +30 -0
  5. data/lib/hexapdf/cli/info.rb +5 -1
  6. data/lib/hexapdf/cli/inspect.rb +2 -2
  7. data/lib/hexapdf/cli/split.rb +2 -2
  8. data/lib/hexapdf/configuration.rb +1 -2
  9. data/lib/hexapdf/content/canvas.rb +8 -3
  10. data/lib/hexapdf/dictionary.rb +1 -5
  11. data/lib/hexapdf/document.rb +6 -10
  12. data/lib/hexapdf/filter/ascii85_decode.rb +1 -1
  13. data/lib/hexapdf/importer.rb +32 -27
  14. data/lib/hexapdf/layout/list_box.rb +1 -5
  15. data/lib/hexapdf/object.rb +5 -0
  16. data/lib/hexapdf/parser.rb +13 -0
  17. data/lib/hexapdf/revision.rb +15 -12
  18. data/lib/hexapdf/revisions.rb +4 -0
  19. data/lib/hexapdf/tokenizer.rb +14 -8
  20. data/lib/hexapdf/type/acro_form/appearance_generator.rb +174 -128
  21. data/lib/hexapdf/type/acro_form/button_field.rb +5 -3
  22. data/lib/hexapdf/type/acro_form/choice_field.rb +2 -0
  23. data/lib/hexapdf/type/acro_form/field.rb +11 -5
  24. data/lib/hexapdf/type/acro_form/form.rb +33 -7
  25. data/lib/hexapdf/type/acro_form/signature_field.rb +2 -0
  26. data/lib/hexapdf/type/acro_form/text_field.rb +12 -2
  27. data/lib/hexapdf/type/annotations/widget.rb +3 -0
  28. data/lib/hexapdf/type/font_true_type.rb +14 -0
  29. data/lib/hexapdf/type/object_stream.rb +2 -2
  30. data/lib/hexapdf/type/outline.rb +1 -1
  31. data/lib/hexapdf/type/page.rb +56 -46
  32. data/lib/hexapdf/version.rb +1 -1
  33. data/lib/hexapdf/writer.rb +2 -3
  34. data/test/hexapdf/content/test_canvas.rb +5 -0
  35. data/test/hexapdf/document/test_pages.rb +2 -2
  36. data/test/hexapdf/encryption/test_aes.rb +1 -1
  37. data/test/hexapdf/filter/test_predictor.rb +0 -1
  38. data/test/hexapdf/layout/test_box.rb +2 -1
  39. data/test/hexapdf/layout/test_column_box.rb +1 -1
  40. data/test/hexapdf/layout/test_list_box.rb +1 -1
  41. data/test/hexapdf/test_document.rb +2 -8
  42. data/test/hexapdf/test_importer.rb +13 -6
  43. data/test/hexapdf/test_parser.rb +17 -0
  44. data/test/hexapdf/test_revision.rb +15 -14
  45. data/test/hexapdf/test_revisions.rb +43 -0
  46. data/test/hexapdf/test_stream.rb +1 -1
  47. data/test/hexapdf/test_tokenizer.rb +3 -4
  48. data/test/hexapdf/test_writer.rb +3 -3
  49. data/test/hexapdf/type/acro_form/test_appearance_generator.rb +135 -56
  50. data/test/hexapdf/type/acro_form/test_button_field.rb +6 -1
  51. data/test/hexapdf/type/acro_form/test_choice_field.rb +4 -0
  52. data/test/hexapdf/type/acro_form/test_field.rb +4 -4
  53. data/test/hexapdf/type/acro_form/test_form.rb +18 -0
  54. data/test/hexapdf/type/acro_form/test_signature_field.rb +4 -0
  55. data/test/hexapdf/type/acro_form/test_text_field.rb +13 -0
  56. data/test/hexapdf/type/signature/common.rb +3 -1
  57. data/test/hexapdf/type/test_font_true_type.rb +20 -0
  58. data/test/hexapdf/type/test_object_stream.rb +2 -1
  59. data/test/hexapdf/type/test_outline.rb +3 -0
  60. data/test/hexapdf/type/test_page.rb +67 -30
  61. data/test/hexapdf/type/test_page_tree_node.rb +4 -2
  62. metadata +46 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 57b852a0648f47b5e3443e9b8aa4480a7a8cd187085c0067baf86af14cee7d9a
4
- data.tar.gz: 6e28f748f1d8e089b6585748e2f56c3adf79669cf9b00d0d9fe8d83a74e97063
3
+ metadata.gz: 874e09b094ea4e793d1d123cfbaded6d1cc5ba93af3b57587e9faf402786a30f
4
+ data.tar.gz: c1eed6a778936cd360b4f1878a18abc3e5727c1f36b5eab4838a9a72817dff7b
5
5
  SHA512:
6
- metadata.gz: a1d2cd75344a3fc9cd54f0dafa6a0fd23b9821d67f54581090a6db2820f7e04a0989da01d7b5f9c558e02bb57cf254f4ebaab791b950d4dacc1e02a29c5f3844
7
- data.tar.gz: ea758cdeb96d8282e3c6581785b690b8d79e9bee318a7cf80bfb4201b47fc0f1d721a78b5c703b32f20b8671211b604e78f83dec6768187321fef46e5dac3a81
6
+ metadata.gz: b66a7587a239acbeb9ebbb20f851b2fa7738c5a4c2f8a95ae7ae3d7419b84ae37b5d1fb48a6b7023bff0df3e1728c395b5351c9c42c05707fabd5a1722e2b88a
7
+ data.tar.gz: 62ac7d070bb8ae3426685af497a047096dd32dc16a102baa961512652222b388198561776fc1227be69bdd0713183d91fa25e411262eec34a29227aae9723d5c
data/CHANGELOG.md CHANGED
@@ -1,3 +1,61 @@
1
+ ## 0.28.0 - 2022-12-30
2
+
3
+ ### Added
4
+
5
+ * [HexaPDF::Type::AcroForm::AppearanceGenerator#create_push_button_appearances]
6
+ to allow customizing the behaviour
7
+ * [HexaPDF::Parser#linearized?] for determining whether a document is linearized
8
+ * Information on linearization to `hexapdf info` output
9
+ * Support for `AFNumber_Format` Javascript method to the form field appearance
10
+ generator
11
+ * Support for using fully embedded, simple TrueType fonts for drawing operations
12
+
13
+ ### Changed
14
+
15
+ * **Breaking change**: `HexaPDF::Revision#reset_objects` has been removed
16
+ * **Breaking change**: Method signature of [HexaPDF::Importer::for] has been
17
+ changed
18
+ * **Breaking change**: [HexaPDF::Type::AcroForm::Field#each_widget] now has the
19
+ default value of the argument `direct_only` set to `true` instead of `false`
20
+ * [HexaPDF::Revision#each_modified_object] to allow deleting the modified
21
+ objects from the active objects' container
22
+ * [HexaPDF::Revision#each_modified_object] to allow ignoring added object and
23
+ cross-reference stream objects
24
+ * [HexaPDF::Revisions::from_io] to merge the two revisions of a linearized PDF
25
+ * [HexaPDF::Importer] and [HexaPDF::Document#import] to make working with them
26
+ easier by allowing the import of arbitrary objects
27
+ * `HexaPDF::Type::AcroForm::Form#perform_validation` to combine fields with the
28
+ same name
29
+
30
+ ### Fixed
31
+
32
+ * [HexaPDF::Type::AcroForm::AppearanceGenerator#create_check_box_appearances] to
33
+ correctly handle a field value of `nil`
34
+ * Return value of `#type` method for all AcroForm field classes
35
+ * [HexaPDF::Type::Page#flatten_annotations] to work correctly in case no
36
+ annotations are on the page
37
+ * [HexaPDF::Type::AcroForm::ButtonField#create_appearances] to avoid creating
38
+ appearances in case of as-yet unresolved references to existing appearances
39
+ * [HexaPDF::Type::AcroForm::TextField#create_appearances] to avoid creating
40
+ appearances in case of pre-existing ones
41
+ * `HexaPDF::Tokenizer#parse_number` to treat invalid indirect object references
42
+ with an object number of 0 as null values
43
+ * [HexaPDF::Type::AcroForm::AppearanceGenerator] to handle empty appearance
44
+ characteristics dictionary marker style strings
45
+ * Writing of encrypted files containing two or more revisions
46
+ * Generation of object streams to never allow storing the catalog object to
47
+ avoid problems with certain viewers
48
+ * `HexaPDF::Type::Outline#perform_validation` to not show validation error when
49
+ `/Count` is zero
50
+ * Writing of documents with two or more revisions in non-incremental mode when
51
+ `optimize: true` is used and the original document used cross-reference tables
52
+ * [HexaPDF::Type::AcroForm::AppearanceGenerator] to take a widget's rotation
53
+ value into account
54
+ * [HexaPDF::Type::Page#flatten_annotations] to correctly flatten all
55
+ annotations, including ones with custom rotations
56
+ * [HexaPDF::Type::Page#rotate] to also rotate annotations
57
+
58
+
1
59
  ## 0.27.0 - 2022-11-18
2
60
 
3
61
  ### Added
@@ -236,7 +294,7 @@
236
294
  ### Added
237
295
 
238
296
  - [HexaPDF::Composer#create_stamp] for creating a form Xobject
239
- - [HexaPDF::Revision#reset_objects] for deleting all live loaded and added
297
+ - `HexaPDF::Revision#reset_objects` for deleting all live loaded and added
240
298
  objects
241
299
  - Support for removing or flattening annotations to the `hexapdf modify` command
242
300
  - Option to CLI command `hexapdf form` to allow generation of a template file
@@ -6,6 +6,9 @@
6
6
  # This example show-cases how to create the various form field types and their
7
7
  # possible standard appearances.
8
8
  #
9
+ # Note the 'number format' text field which uses a JavaScript function for
10
+ # formatting a number.
11
+ #
9
12
  # Usage:
10
13
  # : `ruby acro_form.rb`
11
14
  #
@@ -42,13 +45,21 @@ rb = form.create_radio_button("Radio")
42
45
  end
43
46
  rb.field_value = :button0
44
47
 
45
- canvas.text("Text fields", at: [50, 450])
48
+ canvas.text("Text fields", at: [50, 480])
46
49
 
47
- canvas.text("Single line", at: [70, 420])
50
+ canvas.text("Single line", at: [70, 450])
48
51
  tx = form.create_text_field("Single Line", font_size: 16)
49
- widget = tx.create_widget(page, Rect: [200, 415, 500, 435])
52
+ widget = tx.create_widget(page, Rect: [200, 445, 500, 465])
50
53
  tx.field_value = "A sample test string!"
51
54
 
55
+ canvas.text("Number format", at: [70, 420])
56
+ tx = form.create_text_field("Number format", font_size: 16)
57
+ widget = tx.create_widget(page, Rect: [200, 415, 500, 435])
58
+ widget[:AA] = {
59
+ F: {S: :JavaScript, JS: 'AFNumber_Format(2, 2, 0, 0, "EUR ", true);'},
60
+ }
61
+ tx.field_value = "123456,789"
62
+
52
63
  canvas.text("Multiline", at: [70, 390])
53
64
  tx = form.create_multiline_text_field("Multiline", font_size: 0, align: :right)
54
65
  widget = tx.create_widget(page, Rect: [200, 325, 500, 405])
@@ -0,0 +1,30 @@
1
+ # # Images
2
+ #
3
+ # This example shows how to embed images into a PDF document, directly on a
4
+ # page's canvas and through the high-level [HexaPDF::Composer].
5
+ #
6
+ # Usage:
7
+ # : `ruby images.rb`
8
+ #
9
+
10
+ require 'hexapdf'
11
+
12
+ file = File.join(__dir__, 'machupicchu.jpg')
13
+
14
+ doc = HexaPDF::Document.new
15
+ # Image only added to PDF once though used multiple times
16
+ canvas = doc.pages.add.canvas
17
+ canvas.image(file, at: [100, 500]) # auto-size based on image size
18
+ canvas.image(file, at: [100, 300], width: 100) # height based on w/h ratio
19
+ canvas.image(file, at: [300, 300], height: 100) # width based on w/h ratio
20
+ canvas.image(file, at: [100, 100], width: 300, height: 100)
21
+
22
+ HexaPDF::Composer.create('images.pdf') do |composer|
23
+ composer.image(file) # fill current rectangular region
24
+ composer.image(file, width: 100) # height based on w/h ratio
25
+ composer.image(file, height: 100) # width based on w/h ratio
26
+ composer.image(file, width: 300, height: 100)
27
+
28
+ # Add the page created above as second page
29
+ composer.document.pages << composer.document.import(doc.pages[0])
30
+ end
@@ -131,6 +131,10 @@ module HexaPDF
131
131
  output_line("Encrypted", "yes (no or wrong password given)")
132
132
  end
133
133
 
134
+ if doc.revisions.parser.linearized?
135
+ output_line("Linearized", "yes")
136
+ end
137
+
134
138
  signatures = doc.signatures.to_a
135
139
  unless signatures.empty?
136
140
  nr_sigs = signatures.count
@@ -186,7 +190,7 @@ module HexaPDF
186
190
  end
187
191
 
188
192
  def output_line(header, text) #:nodoc:
189
- puts(("#{header}:").ljust(COLUMN_WIDTH) << text.to_s)
193
+ puts("#{header}:".ljust(COLUMN_WIDTH) << text.to_s)
190
194
  end
191
195
 
192
196
  end
@@ -335,9 +335,9 @@ module HexaPDF
335
335
  # - The signature dictionary if this revision was signed
336
336
  # - The byte offset from the start of the file to the end of the revision
337
337
  def revision_information
338
- signatures = @doc.signatures.map do |sig|
338
+ signatures = @doc.signatures.to_h do |sig|
339
339
  [@doc.revisions.find {|rev| rev.object(sig) == sig }, sig]
340
- end.to_h
340
+ end
341
341
  io = @doc.revisions.parser.io
342
342
 
343
343
  startxrefs = @doc.revisions.map {|rev| rev.trailer[:Prev] }
@@ -131,8 +131,8 @@ module HexaPDF
131
131
  @page_name_cache ||= {}
132
132
  return @page_name_cache[box] if @page_name_cache.key?(box)
133
133
 
134
- paper_size = HexaPDF::Type::Page::PAPER_SIZE.find do |_name, box|
135
- box.each_with_index.all? {|entry, index| (entry - box[index]).abs < 5 }
134
+ paper_size = HexaPDF::Type::Page::PAPER_SIZE.find do |_name, paper_box|
135
+ paper_box.each_with_index.all? {|entry, index| (entry - paper_box[index]).abs < 5 }
136
136
  end
137
137
 
138
138
  @page_name_cache[box] =
@@ -422,8 +422,7 @@ module HexaPDF
422
422
  'encryption.filter_map' => {
423
423
  Standard: 'HexaPDF::Encryption::StandardSecurityHandler',
424
424
  },
425
- 'encryption.sub_filter_map' => {
426
- },
425
+ 'encryption.sub_filter_map' => {},
427
426
  'filter.map' => {
428
427
  ASCIIHexDecode: 'HexaPDF::Filter::ASCIIHexDecode',
429
428
  AHx: 'HexaPDF::Filter::ASCIIHexDecode',
@@ -1626,17 +1626,22 @@ module HexaPDF
1626
1626
  end
1627
1627
  return obj if obj.width == 0 || obj.height == 0
1628
1628
 
1629
+ left, bottom = *at
1629
1630
  width, height = calculate_dimensions(obj.width, obj.height,
1630
1631
  rwidth: width, rheight: height)
1631
1632
  if obj[:Subtype] != :Image
1632
1633
  width /= obj.box.width.to_f
1633
1634
  height /= obj.box.height.to_f
1634
- at[0] -= obj.box.left
1635
- at[1] -= obj.box.bottom
1635
+ left -= obj.box.left
1636
+ bottom -= obj.box.bottom
1636
1637
  end
1637
1638
 
1638
- transform(width, 0, 0, height, at[0], at[1]) do
1639
+ if left == 0 && bottom == 0 && width == 1 && height == 1
1639
1640
  invoke1(:Do, resources.add_xobject(obj))
1641
+ else
1642
+ transform(width, 0, 0, height, left, bottom) do
1643
+ invoke1(:Do, resources.add_xobject(obj))
1644
+ end
1640
1645
  end
1641
1646
 
1642
1647
  obj
@@ -108,11 +108,7 @@ module HexaPDF
108
108
  # The ancestor classes are also searched for such a field entry if none is found for the
109
109
  # current class.
110
110
  def self.field(name)
111
- if defined?(@fields) && @fields.key?(name)
112
- @fields[name]
113
- elsif superclass.respond_to?(:field)
114
- superclass.field(name)
115
- end
111
+ @fields&.[](name) || superclass.field(name)
116
112
  end
117
113
 
118
114
  # :call-seq:
@@ -164,6 +164,8 @@ module HexaPDF
164
164
  def initialize(io: nil, decryption_opts: {}, config: {})
165
165
  @config = Configuration.with_defaults(config)
166
166
  @version = '1.2'
167
+ @cache = Hash.new {|h, k| h[k] = {} }
168
+ @listeners = {}
167
169
 
168
170
  @revisions = Revisions.from_io(self, io)
169
171
  @security_handler = if encrypted? && @config['document.auto_decrypt']
@@ -171,9 +173,6 @@ module HexaPDF
171
173
  else
172
174
  nil
173
175
  end
174
-
175
- @listeners = {}
176
- @cache = Hash.new {|h, k| h[k] = {} }
177
176
  end
178
177
 
179
178
  # :call-seq:
@@ -251,19 +250,16 @@ module HexaPDF
251
250
  # :call-seq:
252
251
  # doc.import(obj) -> imported_object
253
252
  #
254
- # Imports the given, with a different document associated PDF object and returns the imported
253
+ # Imports the given object from a different HexaPDF::Document instance and returns the imported
255
254
  # object.
256
255
  #
257
256
  # If the same argument is provided in multiple invocations, the import is done only once and
258
- # the previously imoprted object is returned.
257
+ # the previously imported object is returned.
259
258
  #
260
259
  # See: Importer
261
260
  def import(obj)
262
- if !obj.kind_of?(HexaPDF::Object) || !obj.document? || obj.document == self
263
- raise ArgumentError, "Importing only works for PDF objects associated " \
264
- "with another document"
265
- end
266
- HexaPDF::Importer.for(source: obj.document, destination: self).import(obj)
261
+ source = (obj.kind_of?(HexaPDF::Object) ? obj.document : nil)
262
+ HexaPDF::Importer.for(self).import(obj, source: source)
267
263
  end
268
264
 
269
265
  # Wraps the given object inside a HexaPDF::Object class which allows one to use
@@ -49,7 +49,7 @@ module HexaPDF
49
49
  module ASCII85Decode
50
50
 
51
51
  VALUE_TO_CHAR = {} #:nodoc:
52
- (0..84).each do |i|
52
+ 85.times do |i|
53
53
  VALUE_TO_CHAR[i] = (i + 33).chr
54
54
  end
55
55
 
@@ -60,64 +60,69 @@ module HexaPDF
60
60
 
61
61
  end
62
62
 
63
- # Returns the Importer object for copying objects from the +source+ to the +destination+
64
- # document.
65
- def self.for(source:, destination:)
63
+ # Returns the Importer object for copying objects to the +destination+ document.
64
+ def self.for(destination)
66
65
  @map ||= {}
67
- @map.keep_if {|_, v| v.source.weakref_alive? && v.destination.weakref_alive? }
68
- source = NullableWeakRef.new(source)
66
+ @map.keep_if {|_, v| v.destination.weakref_alive? }
69
67
  destination = NullableWeakRef.new(destination)
70
- @map[[source.hash, destination.hash]] ||= new(source: source, destination: destination)
68
+ @map[destination.hash] ||= new(destination)
71
69
  end
72
70
 
73
71
  private_class_method :new
74
72
 
75
- attr_reader :source, :destination #:nodoc:
73
+ attr_reader :destination #:nodoc:
76
74
 
77
- # Initializes a new importer that can import objects from the +source+ document to the
78
- # +destination+ document.
79
- def initialize(source:, destination:)
80
- @source = source
75
+ # Initializes a new importer that can import objects to the +destination+ document.
76
+ def initialize(destination)
81
77
  @destination = destination
82
78
  @mapper = {}
83
79
  end
84
80
 
85
- # Imports the given +object+ from the source to the destination object and returns the
86
- # imported object.
81
+ SourceWrapper = Struct.new(:source) #:nodoc:
82
+
83
+ # Imports the given +object+ to the destination object and returns the imported object.
87
84
  #
88
85
  # Note: Indirect objects are automatically added to the destination document but direct or
89
86
  # simple objects are not.
90
87
  #
91
- # An error is raised if the object doesn't belong to the +source+ document.
92
- def import(object)
88
+ # The +source+ argument should be +nil+ or set to the source document of the imported object. If
89
+ # it is +nil+, the source document is dynamically identified. If this identification is not
90
+ # possible and the source document would be needed, an error is raised.
91
+ def import(object, source: nil)
92
+ internal_import(object, SourceWrapper.new(source))
93
+ end
94
+
95
+ private
96
+
97
+ # Does the actual importing of the given +object+, using +wrapper+ to store/use the source
98
+ # document.
99
+ def internal_import(object, wrapper)
93
100
  mapped_object = @mapper[object.data]&.__getobj__ if object.kind_of?(HexaPDF::Object)
94
- if object.kind_of?(HexaPDF::Object) && object.document? && @source != object.document
95
- raise HexaPDF::Error, "Import error: Incorrect document object for importer"
96
- elsif mapped_object && !mapped_object.null?
101
+ if mapped_object && !mapped_object.null?
97
102
  if object.class != mapped_object.class
98
103
  mapped_object = @destination.wrap(mapped_object, type: object.class)
99
104
  end
100
105
  mapped_object
101
106
  else
102
- duplicate(object)
107
+ duplicate(object, wrapper)
103
108
  end
104
109
  end
105
110
 
106
- private
107
-
108
111
  # Recursively duplicates the object.
109
112
  #
110
113
  # PDF objects are automatically added to the destination document if they are indirect objects
111
114
  # in the source document.
112
- def duplicate(object)
115
+ def duplicate(object, wrapper)
113
116
  case object
114
117
  when Hash
115
- object.transform_values {|v| duplicate(v) }
118
+ object.transform_values {|v| duplicate(v, wrapper) }
116
119
  when Array
117
- object.map {|v| duplicate(v) }
120
+ object.map {|v| duplicate(v, wrapper) }
118
121
  when HexaPDF::Reference
119
- import(@source.object(object))
122
+ raise HexaPDF::Error, "Import error: No source document specified" unless wrapper.source
123
+ internal_import(wrapper.source.object(object), wrapper)
120
124
  when HexaPDF::Object
125
+ wrapper.source ||= object.document
121
126
  if object.type == :Catalog || object.type == :Pages
122
127
  @mapper[object.data] = nil
123
128
  elsif (mapped_object = @mapper[object.data]&.__getobj__) && !mapped_object.null?
@@ -132,8 +137,8 @@ module HexaPDF
132
137
  @destination.add(obj) if object.indirect?
133
138
 
134
139
  obj.data.stream = obj.data.stream.dup if obj.data.stream.kind_of?(String)
135
- obj.data.value = duplicate(obj.data.value)
136
- obj.data.value.update(duplicate(object.copy_inherited_values)) if object.type == :Page
140
+ obj.data.value = duplicate(obj.data.value, wrapper)
141
+ obj.data.value.update(duplicate(object.copy_inherited_values, wrapper)) if object.type == :Page
137
142
  obj
138
143
  end
139
144
  when String
@@ -207,7 +207,7 @@ module HexaPDF
207
207
  @results = []
208
208
  @results_item_marker_x = []
209
209
 
210
- @children.each_with_index do |child, index|
210
+ @children.each do |child|
211
211
  shape = Geom2D::Polygon([left, top - height],
212
212
  [left + width, top - height],
213
213
  [left + width, top],
@@ -217,11 +217,7 @@ module HexaPDF
217
217
  remove_indent_from_frame_shape(shape) unless shape.polygons.empty?
218
218
  end
219
219
 
220
- #p [:list, left, width, shape]
221
-
222
220
  item_frame = Frame.new(item_frame_left, top - height, item_frame_width, height, shape: shape)
223
-
224
- #p [index, item_frame.x, @results_item_marker_x]
225
221
  @results_item_marker_x << item_frame.x - content_indentation
226
222
 
227
223
  box_fitter = BoxFitter.new([item_frame])
@@ -159,6 +159,11 @@ module HexaPDF
159
159
  object
160
160
  end
161
161
 
162
+ # Returns +nil+ to end the recursion for field searching in Dictionary.field.
163
+ def self.field(_name)
164
+ nil
165
+ end
166
+
162
167
  # The wrapped HexaPDF::PDFData value.
163
168
  #
164
169
  # This attribute is not part of the public API!
@@ -70,6 +70,19 @@ module HexaPDF
70
70
  !@reconstructed_revision.nil?
71
71
  end
72
72
 
73
+ # Returns +true+ if the PDF file is a linearized file.
74
+ def linearized?
75
+ @linearized ||=
76
+ begin
77
+ @tokenizer.pos = @header_offset
78
+ 3.times { @tokenizer.next_token } # parse: oid gen obj
79
+ obj = @tokenizer.next_object
80
+ obj.kind_of?(Hash) && obj.key?(:Linearized)
81
+ rescue MalformedPDFError
82
+ false
83
+ end
84
+ end
85
+
73
86
  # Loads the indirect (potentially compressed) object specified by the given cross-reference
74
87
  # entry.
75
88
  #
@@ -229,16 +229,22 @@ module HexaPDF
229
229
  end
230
230
 
231
231
  # :call-seq:
232
- # revision.each_modified_object {|obj| block } -> revision
233
- # revision.each_modified_object -> Enumerator
232
+ # revision.each_modified_object(delete: false, all: all) {|obj| block } -> revision
233
+ # revision.each_modified_object(delete: false, all: all) -> Enumerator
234
234
  #
235
- # Calls the given block once for each object that has been modified since it was loaded. Deleted
236
- # object and cross-reference streams are ignored.
235
+ # Calls the given block once for each object that has been modified since it was loaded. Added
236
+ # or eleted object and cross-reference streams as well as signature dictionaries are ignored.
237
+ #
238
+ # +delete+:: If the +delete+ argument is set to +true+, each modified object is deleted from the
239
+ # active objects.
240
+ #
241
+ # +all+:: If the +all+ argument is set to +true+, added object and cross-reference streams are
242
+ # also yielded.
237
243
  #
238
244
  # Note that this also means that for revisions without an associated cross-reference section all
239
245
  # loaded objects will be yielded.
240
- def each_modified_object
241
- return to_enum(__method__) unless block_given?
246
+ def each_modified_object(delete: false, all: false)
247
+ return to_enum(__method__, delete: delete, all: all) unless block_given?
242
248
 
243
249
  @objects.each do |oid, gen, obj|
244
250
  if @xref_section.entry?(oid, gen)
@@ -259,20 +265,17 @@ module HexaPDF
259
265
  end
260
266
  next if values_unchanged && streams_are_same
261
267
  end
268
+ elsif !all && (obj.type == :XRef || obj.type == :ObjStm)
269
+ next
262
270
  end
263
271
 
264
272
  yield(obj)
273
+ @objects.delete(oid) if delete
265
274
  end
266
275
 
267
276
  self
268
277
  end
269
278
 
270
- # Resets the revision by deleting all loaded and added objects from it.
271
- def reset_objects
272
- @objects = HexaPDF::Utils::ObjectHash.new
273
- @all_objects_loaded = false
274
- end
275
-
276
279
  private
277
280
 
278
281
  # Loads a single object from the associated cross-reference section.
@@ -93,6 +93,10 @@ module HexaPDF
93
93
  seen_xref_offsets[stm] = true
94
94
  end
95
95
 
96
+ if parser.linearized? && !trailer.key?(:Prev)
97
+ merge_revision = offset
98
+ end
99
+
96
100
  if merge_revision == offset
97
101
  xref_section.merge!(revisions.first.xref_section)
98
102
  offset = trailer[:Prev] # Get possible next offset before overwriting trailer
@@ -285,7 +285,14 @@ module HexaPDF
285
285
  tmp = val.to_i
286
286
  # Handle object references, see PDF1.7 s7.3.10
287
287
  prepare_string_scanner(10)
288
- tmp = Reference.new(tmp, @ss[1].to_i) if tmp > 0 && @ss.scan(REFERENCE_RE)
288
+ if @ss.scan(REFERENCE_RE)
289
+ tmp = if tmp > 0
290
+ Reference.new(tmp, @ss[1].to_i)
291
+ else
292
+ maybe_raise("Invalid indirect object reference (#{tmp},#{@ss[1].to_i})")
293
+ nil
294
+ end
295
+ end
289
296
  tmp
290
297
  elsif val.match?(/\A[+-]?(?:\d+\.\d*|\.\d+)\z/)
291
298
  val << '0' if val.getbyte(-1) == 46 # dot '.'
@@ -315,21 +322,20 @@ module HexaPDF
315
322
  parentheses = 1
316
323
 
317
324
  while parentheses != 0
318
- data = scan_until(/([()\\\r])/)
319
- char = @ss[1]
325
+ data = scan_until(/[()\\\r]/)
320
326
  unless data
321
327
  raise HexaPDF::MalformedPDFError.new("Unclosed literal string found", pos: pos)
322
328
  end
323
329
 
324
330
  str << data
325
331
  prepare_string_scanner if @ss.eos?
326
- case char
327
- when '(' then parentheses += 1
328
- when ')' then parentheses -= 1
329
- when "\r"
332
+ case @ss.string.getbyte(@ss.pos - 1)
333
+ when 41 then parentheses -= 1 # )
334
+ when 40 then parentheses += 1 # (
335
+ when 13 # \r
330
336
  str[-1] = "\n"
331
337
  @ss.pos += 1 if @ss.peek(1) == "\n"
332
- when '\\'
338
+ when 92 # \\
333
339
  str.chop!
334
340
  byte = @ss.get_byte
335
341
  if (data = LITERAL_STRING_ESCAPE_MAP[byte])