combine_pdf 0.2.30 → 0.2.31

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f81f9412da41045468ecaa6e10104fc9062eee8d
4
- data.tar.gz: 23410127dcabe19c6b9ddee352752f9b7bd0abb6
3
+ metadata.gz: 63b0c324e1bf003b0c0fc963eb2071b6c4672c18
4
+ data.tar.gz: 34c200edda06074773888c098b9d4f9a6479d752
5
5
  SHA512:
6
- metadata.gz: 03fdcce50faf9045930e435cbdf4d31e4ed96419f6e594796df89ab7c6e0891568467d5c1b632338c5e33fe17f3e68fe14c1c7c6c199e6d6d8a33ec47e438a46
7
- data.tar.gz: 24442ecac5ee2ed427de851eb5de4081ab8bb4a8625cdcaa2c9d4a84da86c79feaf3b02928ebee22462b2963a94ea9683fde5a47a3e6a35400d9ace2f3b13489
6
+ metadata.gz: ebf1cd2a7c1077f71d6f41037f0ad341c06e6bbb305bfa030833956feb0bb80576e2eaa5fd7324000ff95834f8c125cafc40a782ac97c16c7e3c7e7c02166794
7
+ data.tar.gz: a4cc257441939fbc0dd59dffe2d3faf11ade63b89fa2637bee8b54df7ea9e31a2ec5612d366ec9208856d750deac8fe10f1ddcaab6f9bf21996e6aa52c775e90
@@ -2,9 +2,21 @@
2
2
 
3
3
  ***
4
4
 
5
+ Change log v.0.2.31
6
+
7
+ **Broke**: Broke the fix for issue #65 so that Radio buttons data might be lost... working on a fix.
8
+
9
+ **Fix**: Fixed issue #82 (reintroduction of issue #19 due to core engine rewrite) related to a workaround for an issue with AcrobatReader. Credit to @gyuchang for testing and helping with the fix.
10
+
11
+ **Merge**: Merged pull request #80, fixing an issue with byte decoding. Credit to @gyuchang for the PR.
12
+
13
+ **Performance**: Improved performance for the reference and duplicate object resolution. Credit to @gyuchang for pointing some optimization options.
14
+
15
+ ***
16
+
5
17
  Change log v.0.2.30
6
18
 
7
- **Fix**: Fixed an issue where HTTP artifacts before the beginning of a PDF file / string would prevent the PDF from being parsed. This fixes issue #78 reported by @robvitaro.
19
+ **Fix**: Fixed an issue where HTTP artifacts before the beginning of a PDF file / string would prevent the PDF from being parsed. This should fix issue #78 reported by @robvitaro.
8
20
 
9
21
  ***
10
22
 
@@ -200,7 +200,7 @@ module CombinePDF
200
200
  # instead, a non-strict RegExp is used:
201
201
  str = @scanner.scan_until(/endstream/)
202
202
  # raise error if the stream doesn't end.
203
- raise "Parsing Error: PDF file error - a stream object wasn't properly colsed using 'endstream'!" unless str
203
+ raise "Parsing Error: PDF file error - a stream object wasn't properly closed using 'endstream'!" unless str
204
204
  # need to remove end of stream
205
205
  if out.last.is_a? Hash
206
206
  # out.last[:raw_stream_content] = str[0...-10] #cuts only one EON char (\n or \r)
@@ -19,84 +19,42 @@ module CombinePDF
19
19
  # this function adds the references contained in `@objects`.
20
20
  #
21
21
  # this is used for internal operations, such as injectng data using the << operator.
22
- def add_referenced
22
+ def add_referenced(should_resolve = [])
23
23
  # add references but not root
24
- should_resolve = @objects.dup
25
24
  dup_pages = nil
26
- resolved = [].to_set
25
+ # an existing object map
26
+ resolved = {}.dup
27
+ existing = {}.dup
28
+ @objects.each { |obj| existing[obj] = obj }
29
+ # loop until should_resolve is empty
27
30
  while should_resolve.any?
28
31
  obj = should_resolve.pop
32
+ next if resolved[obj.object_id] # the object exists
29
33
  if obj.is_a?(Hash)
30
- next if resolved.include? obj.object_id
31
- resolved << obj.object_id
32
- if obj[:referenced_object]
33
- tmp = @objects.find_index(obj[:referenced_object])
34
+ referenced = obj[:referenced_object]
35
+ if referenced && referenced.any?
36
+ tmp = resolved[referenced.object_id] || existing[referenced]
34
37
  if tmp
35
- tmp = @objects[tmp]
36
38
  obj[:referenced_object] = tmp
37
39
  else
38
- tmp = obj[:referenced_object]
39
- should_resolve << tmp
40
- @objects << tmp
40
+ resolved[obj.object_id] = referenced
41
+ existing[referenced] = referenced
42
+ should_resolve << referenced
43
+ @objects << referenced
41
44
  end
42
45
  else
43
- obj.keys.each { |k| should_resolve << obj[k] unless k == :Parent || resolved.include?(obj[k].object_id) || !obj[k].is_a?(Enumerable) }
46
+ resolved[obj.object_id] = obj
47
+ obj.keys.each { |k| should_resolve << obj[k] unless !obj[k].is_a?(Enumerable) || resolved[obj[k].object_id] }
44
48
  end
45
49
  elsif obj.is_a?(Array)
46
- next if resolved.include? obj.object_id
47
- resolved << obj.object_id
50
+ resolved[obj.object_id] = obj
48
51
  should_resolve.concat obj
49
52
  end
50
53
  end
51
54
  resolved.clear
55
+ existing.clear
52
56
  end
53
57
 
54
- # # @private
55
- # # Some PDF objects contain references to other PDF objects.
56
- # #
57
- # # this function adds the references contained in "object", but DOESN'T add the object itself.
58
- # #
59
- # # this is used for internal operations, such as injectng data using the << operator.
60
- # def add_referenced(object, dup_pages = true)
61
- # # add references but not root
62
- # if object.is_a?(Array)
63
- # object.each { |it| add_referenced(it, dup_pages) }
64
- # return true
65
- # elsif object.is_a?(Hash)
66
- # # first if statement is actually a workaround for a bug in Acrobat Reader, regarding duplicate pages.
67
- # if dup_pages && object[:is_reference_only] && object[:referenced_object] && object[:referenced_object].is_a?(Hash) && object[:referenced_object][:Type] == :Page
68
- # if @objects.find_index object[:referenced_object]
69
- # @objects << (object[:referenced_object] = object[:referenced_object].dup)
70
- # else
71
- # @objects << object[:referenced_object]
72
- # end
73
- # elsif object[:is_reference_only] && object[:referenced_object]
74
- # found_at = @objects.find_index object[:referenced_object]
75
- # if found_at
76
- # # if the objects are equal, they might still be different objects!
77
- # # so, we need to make sure they are the same object for the pointers to effect id numbering
78
- # # and formatting operations.
79
- # object[:referenced_object] = @objects[found_at]
80
- # # stop this path, there is no need to run over the Hash's keys and values
81
- # return true
82
- # else
83
- # # stop if page propegation is false
84
- # return true if !dup_pages && object[:referenced_object][:Type] == :Page
85
- # # @objects.include? object[:referenced_object] is bound to be false
86
- # # the object wasn't found - add it to the @objects array
87
- # @objects << object[:referenced_object]
88
- # end
89
- #
90
- # end
91
- # object.each do |k, v|
92
- # add_referenced(v, dup_pages) unless RECORSIVE_PROTECTION[k]
93
- # end
94
- # else
95
- # return false
96
- # end
97
- # true
98
- # end
99
-
100
58
  # @private
101
59
  def rebuild_catalog(*with_pages)
102
60
  # # build page list v.1 Slow but WORKS
@@ -113,38 +71,62 @@ module CombinePDF
113
71
  # add pages to catalog, if requested
114
72
  page_list.concat(with_pages) unless with_pages.empty?
115
73
 
74
+ # duplicate any non-unique pages - This is a special case to resolve Adobe Acrobat Reader issues (see issues #19 and #81)
75
+ uniqueness = {}.dup
76
+ page_list.each { |page| page = page.dup if uniqueness[page.object_id]; uniqueness[page.object_id] = page }
77
+ page_list.clear
78
+ page_list = uniqueness.values
79
+ uniqueness.clear
80
+
116
81
  # build new Pages object
117
- pages_object = { Type: :Pages, Count: page_list.length, Kids: page_list.map { |p| { referenced_object: p, is_reference_only: true } } }
82
+ page_object_kids = [].dup
83
+ pages_object = { Type: :Pages, Count: page_list.length, Kids: page_object_kids }
84
+ pages_object_reference = { referenced_object: pages_object, is_reference_only: true }
85
+ page_list.each { |pg| pg[:Parent] = pages_object_reference; page_object_kids << ({ referenced_object: pg, is_reference_only: true }) }
118
86
 
119
87
  # rebuild/rename the names dictionary
120
88
  rebuild_names
121
89
  # build new Catalog object
122
90
  catalog_object = { Type: :Catalog,
123
- Pages: { referenced_object: pages_object, is_reference_only: true },
124
- Names: { referenced_object: @names, is_reference_only: true },
125
- Outlines: { referenced_object: @outlines, is_reference_only: true } }
91
+ Pages: { referenced_object: pages_object, is_reference_only: true } }
92
+ # pages_object[:Parent] = { referenced_object: catalog_object, is_reference_only: true } # causes AcrobatReader to fail
126
93
  catalog_object[:ViewerPreferences] = @viewer_preferences unless @viewer_preferences.empty?
127
94
 
128
- # rebuild/rename the forms dictionary
129
- if @forms_data.nil? || @forms_data.empty?
130
- @forms_data = nil
131
- else
132
- @forms_data = { referenced_object: (@forms_data[:referenced_object] || @forms_data), is_reference_only: true }
133
- catalog_object[:AcroForm] = @forms_data
134
- end
135
-
136
95
  # point old Pages pointers to new Pages object
137
96
  ## first point known pages objects - enough?
138
97
  pages.each { |p| p[:Parent] = { referenced_object: pages_object, is_reference_only: true } }
139
98
  ## or should we, go over structure? (fails)
140
99
  # each_object {|obj| obj[:Parent][:referenced_object] = pages_object if obj.is_a?(Hash) && obj[:Parent].is_a?(Hash) && obj[:Parent][:referenced_object] && obj[:Parent][:referenced_object][:Type] == :Pages}
141
100
 
142
- # remove old catalog and pages objects
143
- @objects.reject! { |obj| obj.is_a?(Hash) && (obj[:Type] == :Catalog || obj[:Type] == :Pages) }
101
+ # # remove old catalog and pages objects
102
+ # @objects.reject! { |obj| obj.is_a?(Hash) && (obj[:Type] == :Catalog || obj[:Type] == :Pages) }
103
+ # remove old objects list and trees
104
+ @objects.clear
144
105
 
145
106
  # inject new catalog and pages objects
146
- @objects << pages_object
107
+ @objects << @info if @info
147
108
  @objects << catalog_object
109
+ @objects << pages_object
110
+
111
+ # rebuild/rename the forms dictionary
112
+ if @forms_data.nil? || @forms_data.empty?
113
+ @forms_data = nil
114
+ else
115
+ @forms_data = { referenced_object: (@forms_data[:referenced_object] || @forms_data), is_reference_only: true }
116
+ catalog_object[:AcroForm] = @forms_data
117
+ @objects << @forms_data[:referenced_object]
118
+ end
119
+
120
+ # add the names dictionary
121
+ if @names && @names.length > 1
122
+ @objects << @names
123
+ catalog_object[:Names] = { referenced_object: @names, is_reference_only: true }
124
+ end
125
+ # add the outlines dictionary
126
+ if @outlines && @outlines.any?
127
+ @objects << @outlines
128
+ catalog_object[:Outlines] = { referenced_object: @outlines, is_reference_only: true }
129
+ end
148
130
 
149
131
  catalog_object
150
132
  end
@@ -166,26 +148,9 @@ module CombinePDF
166
148
  # there is no point is calling the method before preparing the output.
167
149
  def rebuild_catalog_and_objects
168
150
  catalog = rebuild_catalog
169
- @objects.clear
170
- @objects << @info
171
- @objects << catalog
172
- # fix Acrobat Reader issue with page reference uniqueness (must be unique or older Acrobat Reader fails)
173
- catalog[:Pages][:referenced_object][:Kids].each do |page|
174
- tmp = page[:referenced_object]
175
- tmp = page[:referenced_object] = tmp.dup if @objects.include? tmp
176
- @objects << tmp
177
- end
151
+ page_objects = catalog[:Pages][:referenced_object][:Kids].map { |e| @objects << e[:referenced_object]; e[:referenced_object] }
178
152
  # adds every referenced object to the @objects (root), addition is performed as pointers rather then copies
179
- # puts (Benchmark.measure do
180
- add_referenced
181
- # end)
182
- # @objects << @info
183
- # add_referenced @info
184
- # add_referenced catalog
185
- # add_referenced catalog[:Pages]
186
- # add_referenced catalog[:Names], false
187
- # add_referenced catalog[:Outlines], false
188
- # add_referenced catalog[:AcroForm], false
153
+ add_referenced([page_objects, @forms_data, @names, @outlines, @info])
189
154
  catalog
190
155
  end
191
156
 
@@ -304,9 +269,9 @@ module CombinePDF
304
269
  # parent - the outline base node of the resulting merged outline
305
270
  # FIXME implement the possibility to insert somewhere in the middle of the outline
306
271
  prev = nil
307
- pos = first = actual_object(((position != 0) ? old_data : new_data)[:First])
308
- last = actual_object(((position != 0) ? new_data : old_data)[:Last])
309
- median = { is_reference_only: true, referenced_object: actual_object(((position != 0) ? new_data : old_data)[:First]) }
272
+ pos = first = actual_object((position.nonzero? ? old_data : new_data)[:First])
273
+ last = actual_object((position.nonzero? ? new_data : old_data)[:Last])
274
+ median = { is_reference_only: true, referenced_object: actual_object((position.nonzero? ? new_data : old_data)[:First]) }
310
275
  old_data[:First] = { is_reference_only: true, referenced_object: first }
311
276
  old_data[:Last] = { is_reference_only: true, referenced_object: last }
312
277
  parent = { is_reference_only: true, referenced_object: old_data }
@@ -21,7 +21,7 @@ module CombinePDF
21
21
  elsif object.is_a?(Array)
22
22
  return format_array_to_pdf object
23
23
  elsif object.is_a?(Fixnum) || object.is_a?(Float) || object.is_a?(TrueClass) || object.is_a?(FalseClass)
24
- return object.to_s + ' '
24
+ return object.to_s
25
25
  elsif object.is_a?(Hash)
26
26
  return format_hash_to_pdf object
27
27
  else
@@ -33,12 +33,12 @@ module CombinePDF
33
33
  "\x0D" => '\\r',
34
34
  "\x09" => '\\t',
35
35
  "\x08" => '\\b',
36
- "\xFF" => '\\f',
36
+ "\x0C" => '\\f', # form-feed (\f) == 0x0C
37
37
  "\x28" => '\\(',
38
38
  "\x29" => '\\)',
39
39
  "\x5C" => '\\\\' }.dup
40
40
  32.times { |i| STRING_REPLACEMENT_HASH[i.chr] ||= "\\#{i}" }
41
- (256 - 128).times { |i| STRING_REPLACEMENT_HASH[(i + 127).chr] ||= "\\#{i + 127}" }
41
+ (256 - 127).times { |i| STRING_REPLACEMENT_HASH[(i + 127).chr] ||= "\\#{i + 127}" }
42
42
 
43
43
  def format_string_to_pdf(object)
44
44
  # object.force_encoding(Encoding::ASCII_8BIT)
@@ -1,3 +1,3 @@
1
1
  module CombinePDF
2
- VERSION = '0.2.30'.freeze
2
+ VERSION = '0.2.31'.freeze
3
3
  end
@@ -28,15 +28,16 @@ pdf = CombinePDF.load './Ruby/test pdfs/names_go_haywire_0.pdf'
28
28
  pdf << CombinePDF.load('./Ruby/test pdfs/names_go_haywire_1.pdf')
29
29
  pdf.save '04_check_view_and_names_reference.pdf'
30
30
 
31
- str = IO.binread './Ruby/test pdfs/outlines/self_merge_err.pdf'
31
+ pdf = CombinePDF.load('./Ruby/test pdfs/outlines/self_merge_err.pdf')
32
+ pdf.save '05_x1_scribus_test.pdf'
32
33
  pdf = CombinePDF.load('./Ruby/test pdfs/outlines/self_merge_err.pdf')
33
34
  pdf << CombinePDF.load('./Ruby/test pdfs/outlines/self_merge_err.pdf')
34
- pdf.save '05_scribus_test.pdf'
35
+ pdf.save '05_x2_scribus_test.pdf'
35
36
  # pdf = CombinePDF.load "./Ruby/test pdfs/named_dest.pdf";nil
36
37
  # pdf.save '05_check_named_dest_links.pdf' # this will take a while
37
38
  # pdf = CombinePDF.load "./Ruby/test pdfs/named_dest.pdf";nil
38
- # pdf << CombinePDF.load("./Ruby/test pdfs/named_dest.pdf");nil
39
- # pdf.save '05_1_check_named_dest_links.pdf' # never ends... :-(
39
+ pdf << CombinePDF.load('./Ruby/test pdfs/named_dest.pdf'); nil
40
+ pdf.save '05_1_timeless_check_named_dest_links.pdf' # never ends... :-(
40
41
 
41
42
  pdf = CombinePDF.load './Ruby/test pdfs/outline_small.pdf'
42
43
  pdf << CombinePDF.load('./Ruby/test pdfs/outline_small.pdf')
@@ -55,6 +56,17 @@ CombinePDF.load("./Ruby/test\ pdfs/Scribus-unknown_err2.pdf").save '08_2-unknown
55
56
  CombinePDF.load("./Ruby/test\ pdfs/Scribus-unknown_err3.pdf").save '08_3-unknown-err-empty-str.pdf'
56
57
 
57
58
  CombinePDF.load("/Users/2Be/Ruby/test\ pdfs/nil_object.pdf").save('09_nil_in_parsed_array.pdf')
59
+
60
+ require 'prawn'
61
+ IO.binwrite '10_prawn.pdf', (Prawn::Document.new { text 'Hello World!' }).render
62
+ page = CombinePDF.parse((Prawn::Document.new { text 'Hello World!' }).render)
63
+ pdf = CombinePDF.new
64
+ pdf << page
65
+ pdf.save '10_parsed_from_prawn.pdf'
66
+ pdf = CombinePDF.new
67
+ pdf << page << page
68
+ pdf.save('10_AcrobatReader_is_unique_page.pdf')
69
+
58
70
  # unify = [
59
71
  # "./Ruby/test\ pdfs/AESv2\ encrypted.pdf",
60
72
  # "./Ruby/test\ pdfs/data-in-comment.pdf",
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: combine_pdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.30
4
+ version: 0.2.31
5
5
  platform: ruby
6
6
  authors:
7
7
  - Boaz Segev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-07-27 00:00:00.000000000 Z
11
+ date: 2016-08-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-rc4
@@ -111,4 +111,3 @@ test_files:
111
111
  - test/automated
112
112
  - test/console
113
113
  - test/named_dest
114
- has_rdoc: