combine_pdf 1.0.3 → 1.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: cb506a3a76c6eb84531ff5069cd3286147dc8fac
4
- data.tar.gz: afb908cfd43b301bfabb3bc53bceccce2e82c4ac
3
+ metadata.gz: a262d8592dbe90e0a4930aebadda8866b731a586
4
+ data.tar.gz: fdf9a2877028b673f55d23741f8c8f59af647a4f
5
5
  SHA512:
6
- metadata.gz: 59d1e37087daed2e4f84ecef8bc2d817ddf14a25ec06f47748739d83d1732a206d6ede05ea3531ea444668d28295d891bd43bc6575413ad00217b76ed610ff47
7
- data.tar.gz: 10a6f92d6c37d966ac5d6df0886961237227e1264726bb609fdb33083d1b6a816672f43996123e3e747ac42e227a1a8b211fae42f86dcb90e4b1e7405fb72d5d
6
+ metadata.gz: 763aa425d24ef58b83717123f4ef7c962d2ecd083a39275df136950872e286af49d0efbc8f8d2a2f4cf0967439848c151e587073497a8ff1a7fd9b0f9ef42d7f
7
+ data.tar.gz: 1c86313e09d88a07e4a2ee43fd53ce445b784770140a6892854d8f50d9dff9140c5fd993ae4d50b54ffa12f0cea6fb706a1026e18304185ccdedae3ceee0fbcb
@@ -52,6 +52,7 @@ module CombinePDF
52
52
  @forms_object = {}.dup
53
53
  @metadata = nil
54
54
  @strings_dictionary = {}.dup # all strings are one string
55
+ @resolution_hash = {}.dup
55
56
  @version = nil
56
57
  @scanner = nil
57
58
  @allow_optional_content = options[:allow_optional_content]
@@ -95,18 +96,20 @@ module CombinePDF
95
96
  # do we really need to apply to @parsed? No, there is no need.
96
97
  end
97
98
 
98
- ## search for objects streams
99
- object_streams = @parsed.select { |obj| obj.is_a?(Hash) && obj[:Type] == :ObjStm }
100
- unless object_streams.empty?
101
- warn 'PDF 1.5 Object streams found - they are not fully supported! attempting to extract objects.'
102
-
103
- object_streams.each do |o|
99
+ # search for objects streams and replace them "in-place"
100
+ # the inplace resolution prevents versioning errors
101
+ while (true)
102
+ found_object_streams = false
103
+ @parsed.length.times do |i|
104
+ o = @parsed[i]
105
+ next unless o.is_a?(Hash) && o[:Type] == :ObjStm
104
106
  ## un-encode (using the correct filter) the object streams
105
107
  PDFFilter.inflate_object o
106
- ## extract objects from stream to top level arry @parsed
108
+ ## extract objects from stream
107
109
  @scanner = StringScanner.new o[:raw_stream_content]
108
110
  stream_data = _parse_
109
111
  id_array = []
112
+ collection = [nil]
110
113
  while stream_data[0].is_a? (Numeric)
111
114
  id_array << stream_data.shift
112
115
  stream_data.shift
@@ -115,11 +118,42 @@ module CombinePDF
115
118
  stream_data[0] = { indirect_without_dictionary: stream_data[0] } unless stream_data[0].is_a?(Hash)
116
119
  stream_data[0][:indirect_reference_id] = id_array.shift
117
120
  stream_data[0][:indirect_generation_number] = 0
118
- @parsed << stream_data.shift
121
+ collection << (stream_data.shift)
119
122
  end
123
+ # place new objects right after this one (removing this one as well)
124
+ @parsed[i] = collection
125
+ found_object_streams = true
120
126
  end
127
+ break unless found_object_streams
128
+ @parsed.flatten!
129
+ @parsed.compact!
121
130
  end
122
131
 
132
+ #
133
+ # object_streams = @parsed.select { |obj| obj.is_a?(Hash) && obj[:Type] == :ObjStm }
134
+ # unless object_streams.empty?
135
+ # warn 'PDF 1.5 Object streams found - they are not fully supported! attempting to extract objects.'
136
+ #
137
+ # object_streams.each do |o|
138
+ # ## un-encode (using the correct filter) the object streams
139
+ # PDFFilter.inflate_object o
140
+ # ## extract objects from stream to top level arry @parsed
141
+ # @scanner = StringScanner.new o[:raw_stream_content]
142
+ # stream_data = _parse_
143
+ # id_array = []
144
+ # while stream_data[0].is_a? (Numeric)
145
+ # id_array << stream_data.shift
146
+ # stream_data.shift
147
+ # end
148
+ # while id_array[0] && stream_data[0]
149
+ # stream_data[0] = { indirect_without_dictionary: stream_data[0] } unless stream_data[0].is_a?(Hash)
150
+ # stream_data[0][:indirect_reference_id] = id_array.shift
151
+ # stream_data[0][:indirect_generation_number] = 0
152
+ # @parsed << stream_data.shift
153
+ # end
154
+ # end
155
+ # end
156
+
123
157
  # serialize_objects_and_references.catalog_pages
124
158
 
125
159
  # Benchmark.bm do |bm|
@@ -149,6 +183,9 @@ module CombinePDF
149
183
  else
150
184
  @info_object = {}
151
185
  end
186
+
187
+ # we can clear the resolution hash now
188
+ @resolution_hash.clear if @resolution_hash
152
189
  # # # ## remove object streams - if they exist
153
190
  # @parsed.reject! {|obj| object_streams << obj if obj.is_a?(Hash) && obj[:Type] == :ObjStm}
154
191
  # # # ## remove XREF dictionaries - if they exist
@@ -377,7 +414,7 @@ module CombinePDF
377
414
  if @scanner.matched[-1] == 'r'
378
415
  if @scanner.skip_until(/<</)
379
416
  data = _parse_
380
- @root_object ||= {}
417
+ (@root_object ||= {}).clear
381
418
  @root_object[data.shift] = data.shift while data[0]
382
419
  end
383
420
  ##########
@@ -514,39 +551,6 @@ module CombinePDF
514
551
  self
515
552
  end
516
553
 
517
- def get_refernced_object(reference_hash = {})
518
- @parsed.each do |stored_object|
519
- return stored_object if stored_object.is_a?(Hash) &&
520
- reference_hash[:indirect_reference_id] == stored_object[:indirect_reference_id] &&
521
- reference_hash[:indirect_generation_number] == stored_object[:indirect_generation_number]
522
- # return (stored_object[:indirect_without_dictionary] || stored_object) if stored_object.is_a?(Hash) &&
523
- # reference_hash[:indirect_reference_id] == stored_object[:indirect_reference_id] &&
524
- # reference_hash[:indirect_generation_number] == stored_object[:indirect_generation_number]
525
- end
526
- warn "didn't find reference #{reference_hash}"
527
- nil
528
- end
529
-
530
- # # @private
531
- # # connects references and objects, according to their reference id's.
532
- # #
533
- # # should be moved to the parser's workflow.
534
- # #
535
- # def serialize_objects_and_references_old
536
- # obj_dir = {}
537
- # # create a dictionary for referenced objects (no value resolution at this point)
538
- # @parsed.each { |o| obj_dir[[o.delete(:indirect_reference_id), o.delete(:indirect_generation_number)]] = o }
539
- # # @parsed.each {|o| obj_dir[ [ o.[](:indirect_reference_id), o.[](:indirect_generation_number) ] ] = o }
540
- # @references.each do |obj|
541
- # obj[:referenced_object] = obj_dir[[obj[:indirect_reference_id], obj[:indirect_generation_number]]]
542
- # warn "couldn't connect a reference!!! could be a null or removed (empty) object, Silent error!!!\n Object raising issue: #{obj}" unless obj[:referenced_object]
543
- # obj.delete(:indirect_reference_id); obj.delete(:indirect_generation_number)
544
- # end
545
- # obj_dir.clear
546
- # @references.clear
547
- # self
548
- # end
549
-
550
554
  # @private
551
555
  # connects references and objects, according to their reference id's.
552
556
  #
@@ -556,9 +560,23 @@ module CombinePDF
556
560
  #
557
561
  def serialize_objects_and_references
558
562
  obj_dir = {}
563
+ objid_cache = {}
559
564
  # create a dictionary for referenced objects (no value resolution at this point)
560
- # @parsed.each { |o| obj_dir[[o.delete(:indirect_reference_id), o.delete(:indirect_generation_number)]] = o }
561
- @parsed.each { |o| obj_dir[[o[:indirect_reference_id], o[:indirect_generation_number]]] = o }
565
+ # at the same time, delete duplicates and old versions when objects have multiple versions
566
+ @parsed.uniq!
567
+ @parsed.length.times do |i|
568
+ o = @parsed[i]
569
+ objid_cache[o.object_id] = i
570
+ tmp_key = [o[:indirect_reference_id], o[:indirect_generation_number]]
571
+ if tmp_found = obj_dir[tmp_key]
572
+ tmp_found.clear
573
+ @parsed[objid_cache[tmp_found.object_id]] = nil
574
+ end
575
+ obj_dir[tmp_key] = o
576
+ end
577
+ @parsed.compact!
578
+ objid_cache.clear
579
+
562
580
  should_resolve = [@parsed, @root_object]
563
581
  while should_resolve.count > 0
564
582
  obj = should_resolve.pop
@@ -1,3 +1,3 @@
1
1
  module CombinePDF
2
- VERSION = '1.0.3'.freeze
2
+ VERSION = '1.0.4'.freeze
3
3
  end
data/test/automated CHANGED
@@ -19,6 +19,10 @@ pdf << CombinePDF.load("./Ruby/test\ pdfs/empty_form.pdf")
19
19
  pdf << CombinePDF.load("./Ruby/test\ pdfs/filled_form.pdf")
20
20
  pdf.save '02_check_form_unification_middle_is_empty.pdf'
21
21
 
22
+ pdf = CombinePDF.load "./Ruby/test\ pdfs/check_form_data__objstreams_w_versions.pdf"
23
+ pdf.save '02_01_check_form_data_ordering_issue.pdf'
24
+
25
+
22
26
  pdf = CombinePDF.load './Ruby/test pdfs/share-font-background.pdf'
23
27
  pdf2 = CombinePDF.load './Ruby/test pdfs/share-font-foreground.pdf'
24
28
  i = 0
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: combine_pdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.3
4
+ version: 1.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Boaz Segev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-06-13 00:00:00.000000000 Z
11
+ date: 2017-07-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-rc4