combine_pdf 1.0.3 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: cb506a3a76c6eb84531ff5069cd3286147dc8fac
4
- data.tar.gz: afb908cfd43b301bfabb3bc53bceccce2e82c4ac
3
+ metadata.gz: a262d8592dbe90e0a4930aebadda8866b731a586
4
+ data.tar.gz: fdf9a2877028b673f55d23741f8c8f59af647a4f
5
5
  SHA512:
6
- metadata.gz: 59d1e37087daed2e4f84ecef8bc2d817ddf14a25ec06f47748739d83d1732a206d6ede05ea3531ea444668d28295d891bd43bc6575413ad00217b76ed610ff47
7
- data.tar.gz: 10a6f92d6c37d966ac5d6df0886961237227e1264726bb609fdb33083d1b6a816672f43996123e3e747ac42e227a1a8b211fae42f86dcb90e4b1e7405fb72d5d
6
+ metadata.gz: 763aa425d24ef58b83717123f4ef7c962d2ecd083a39275df136950872e286af49d0efbc8f8d2a2f4cf0967439848c151e587073497a8ff1a7fd9b0f9ef42d7f
7
+ data.tar.gz: 1c86313e09d88a07e4a2ee43fd53ce445b784770140a6892854d8f50d9dff9140c5fd993ae4d50b54ffa12f0cea6fb706a1026e18304185ccdedae3ceee0fbcb
@@ -52,6 +52,7 @@ module CombinePDF
52
52
  @forms_object = {}.dup
53
53
  @metadata = nil
54
54
  @strings_dictionary = {}.dup # all strings are one string
55
+ @resolution_hash = {}.dup
55
56
  @version = nil
56
57
  @scanner = nil
57
58
  @allow_optional_content = options[:allow_optional_content]
@@ -95,18 +96,20 @@ module CombinePDF
95
96
  # do we really need to apply to @parsed? No, there is no need.
96
97
  end
97
98
 
98
- ## search for objects streams
99
- object_streams = @parsed.select { |obj| obj.is_a?(Hash) && obj[:Type] == :ObjStm }
100
- unless object_streams.empty?
101
- warn 'PDF 1.5 Object streams found - they are not fully supported! attempting to extract objects.'
102
-
103
- object_streams.each do |o|
99
+ # search for objects streams and replace them "in-place"
100
+ # the inplace resolution prevents versioning errors
101
+ while (true)
102
+ found_object_streams = false
103
+ @parsed.length.times do |i|
104
+ o = @parsed[i]
105
+ next unless o.is_a?(Hash) && o[:Type] == :ObjStm
104
106
  ## un-encode (using the correct filter) the object streams
105
107
  PDFFilter.inflate_object o
106
- ## extract objects from stream to top level arry @parsed
108
+ ## extract objects from stream
107
109
  @scanner = StringScanner.new o[:raw_stream_content]
108
110
  stream_data = _parse_
109
111
  id_array = []
112
+ collection = [nil]
110
113
  while stream_data[0].is_a? (Numeric)
111
114
  id_array << stream_data.shift
112
115
  stream_data.shift
@@ -115,11 +118,42 @@ module CombinePDF
115
118
  stream_data[0] = { indirect_without_dictionary: stream_data[0] } unless stream_data[0].is_a?(Hash)
116
119
  stream_data[0][:indirect_reference_id] = id_array.shift
117
120
  stream_data[0][:indirect_generation_number] = 0
118
- @parsed << stream_data.shift
121
+ collection << (stream_data.shift)
119
122
  end
123
+ # place new objects right after this one (removing this one as well)
124
+ @parsed[i] = collection
125
+ found_object_streams = true
120
126
  end
127
+ break unless found_object_streams
128
+ @parsed.flatten!
129
+ @parsed.compact!
121
130
  end
122
131
 
132
+ #
133
+ # object_streams = @parsed.select { |obj| obj.is_a?(Hash) && obj[:Type] == :ObjStm }
134
+ # unless object_streams.empty?
135
+ # warn 'PDF 1.5 Object streams found - they are not fully supported! attempting to extract objects.'
136
+ #
137
+ # object_streams.each do |o|
138
+ # ## un-encode (using the correct filter) the object streams
139
+ # PDFFilter.inflate_object o
140
+ # ## extract objects from stream to top level arry @parsed
141
+ # @scanner = StringScanner.new o[:raw_stream_content]
142
+ # stream_data = _parse_
143
+ # id_array = []
144
+ # while stream_data[0].is_a? (Numeric)
145
+ # id_array << stream_data.shift
146
+ # stream_data.shift
147
+ # end
148
+ # while id_array[0] && stream_data[0]
149
+ # stream_data[0] = { indirect_without_dictionary: stream_data[0] } unless stream_data[0].is_a?(Hash)
150
+ # stream_data[0][:indirect_reference_id] = id_array.shift
151
+ # stream_data[0][:indirect_generation_number] = 0
152
+ # @parsed << stream_data.shift
153
+ # end
154
+ # end
155
+ # end
156
+
123
157
  # serialize_objects_and_references.catalog_pages
124
158
 
125
159
  # Benchmark.bm do |bm|
@@ -149,6 +183,9 @@ module CombinePDF
149
183
  else
150
184
  @info_object = {}
151
185
  end
186
+
187
+ # we can clear the resolution hash now
188
+ @resolution_hash.clear if @resolution_hash
152
189
  # # # ## remove object streams - if they exist
153
190
  # @parsed.reject! {|obj| object_streams << obj if obj.is_a?(Hash) && obj[:Type] == :ObjStm}
154
191
  # # # ## remove XREF dictionaries - if they exist
@@ -377,7 +414,7 @@ module CombinePDF
377
414
  if @scanner.matched[-1] == 'r'
378
415
  if @scanner.skip_until(/<</)
379
416
  data = _parse_
380
- @root_object ||= {}
417
+ (@root_object ||= {}).clear
381
418
  @root_object[data.shift] = data.shift while data[0]
382
419
  end
383
420
  ##########
@@ -514,39 +551,6 @@ module CombinePDF
514
551
  self
515
552
  end
516
553
 
517
- def get_refernced_object(reference_hash = {})
518
- @parsed.each do |stored_object|
519
- return stored_object if stored_object.is_a?(Hash) &&
520
- reference_hash[:indirect_reference_id] == stored_object[:indirect_reference_id] &&
521
- reference_hash[:indirect_generation_number] == stored_object[:indirect_generation_number]
522
- # return (stored_object[:indirect_without_dictionary] || stored_object) if stored_object.is_a?(Hash) &&
523
- # reference_hash[:indirect_reference_id] == stored_object[:indirect_reference_id] &&
524
- # reference_hash[:indirect_generation_number] == stored_object[:indirect_generation_number]
525
- end
526
- warn "didn't find reference #{reference_hash}"
527
- nil
528
- end
529
-
530
- # # @private
531
- # # connects references and objects, according to their reference id's.
532
- # #
533
- # # should be moved to the parser's workflow.
534
- # #
535
- # def serialize_objects_and_references_old
536
- # obj_dir = {}
537
- # # create a dictionary for referenced objects (no value resolution at this point)
538
- # @parsed.each { |o| obj_dir[[o.delete(:indirect_reference_id), o.delete(:indirect_generation_number)]] = o }
539
- # # @parsed.each {|o| obj_dir[ [ o.[](:indirect_reference_id), o.[](:indirect_generation_number) ] ] = o }
540
- # @references.each do |obj|
541
- # obj[:referenced_object] = obj_dir[[obj[:indirect_reference_id], obj[:indirect_generation_number]]]
542
- # warn "couldn't connect a reference!!! could be a null or removed (empty) object, Silent error!!!\n Object raising issue: #{obj}" unless obj[:referenced_object]
543
- # obj.delete(:indirect_reference_id); obj.delete(:indirect_generation_number)
544
- # end
545
- # obj_dir.clear
546
- # @references.clear
547
- # self
548
- # end
549
-
550
554
  # @private
551
555
  # connects references and objects, according to their reference id's.
552
556
  #
@@ -556,9 +560,23 @@ module CombinePDF
556
560
  #
557
561
  def serialize_objects_and_references
558
562
  obj_dir = {}
563
+ objid_cache = {}
559
564
  # create a dictionary for referenced objects (no value resolution at this point)
560
- # @parsed.each { |o| obj_dir[[o.delete(:indirect_reference_id), o.delete(:indirect_generation_number)]] = o }
561
- @parsed.each { |o| obj_dir[[o[:indirect_reference_id], o[:indirect_generation_number]]] = o }
565
+ # at the same time, delete duplicates and old versions when objects have multiple versions
566
+ @parsed.uniq!
567
+ @parsed.length.times do |i|
568
+ o = @parsed[i]
569
+ objid_cache[o.object_id] = i
570
+ tmp_key = [o[:indirect_reference_id], o[:indirect_generation_number]]
571
+ if tmp_found = obj_dir[tmp_key]
572
+ tmp_found.clear
573
+ @parsed[objid_cache[tmp_found.object_id]] = nil
574
+ end
575
+ obj_dir[tmp_key] = o
576
+ end
577
+ @parsed.compact!
578
+ objid_cache.clear
579
+
562
580
  should_resolve = [@parsed, @root_object]
563
581
  while should_resolve.count > 0
564
582
  obj = should_resolve.pop
@@ -1,3 +1,3 @@
1
1
  module CombinePDF
2
- VERSION = '1.0.3'.freeze
2
+ VERSION = '1.0.4'.freeze
3
3
  end
data/test/automated CHANGED
@@ -19,6 +19,10 @@ pdf << CombinePDF.load("./Ruby/test\ pdfs/empty_form.pdf")
19
19
  pdf << CombinePDF.load("./Ruby/test\ pdfs/filled_form.pdf")
20
20
  pdf.save '02_check_form_unification_middle_is_empty.pdf'
21
21
 
22
+ pdf = CombinePDF.load "./Ruby/test\ pdfs/check_form_data__objstreams_w_versions.pdf"
23
+ pdf.save '02_01_check_form_data_ordering_issue.pdf'
24
+
25
+
22
26
  pdf = CombinePDF.load './Ruby/test pdfs/share-font-background.pdf'
23
27
  pdf2 = CombinePDF.load './Ruby/test pdfs/share-font-foreground.pdf'
24
28
  i = 0
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: combine_pdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.3
4
+ version: 1.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Boaz Segev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-06-13 00:00:00.000000000 Z
11
+ date: 2017-07-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-rc4