combine_pdf 0.2.37 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: cac27b28f3653156374b1ea4a429676625ba0c9f
4
- data.tar.gz: 8ce9f60a9bdcbd763a72461703c51845dbab0f2c
3
+ metadata.gz: 3663c5f5602eeed30aba5405fc0503ab9a865432
4
+ data.tar.gz: f6e07e2fbb180065146c32a440f29348fb2a2808
5
5
  SHA512:
6
- metadata.gz: 78aa47281a6f9fa5723a99ed9ce666479999348b69a19778e02eb2144e1c507d4a62ac23a07e4b46079193d14fecf77cbb9dac06591ac2354e92046ba0ba5d20
7
- data.tar.gz: 2b92948efba5ab031a46865416b13b1aecb892a1763c0d45190598313e6e0139907ece00c129349060ff030c7b38531d1d848f9696168d5fec499a4c65121db8
6
+ metadata.gz: 835236c99911009df5112cc92c3a042e6fe2dcd634e9b189f9977aef9f9f42ae33ac9865e94d08e8d26b6ff0328d7bf84ccc0dc858897c2c0ceae10c3c80c944
7
+ data.tar.gz: f11df3aa2c055a17be86c83766ea85f536afc5dcf0b299cd0e5be48344724909619ea54c8b52b7a9997907ec9d5c585d21f9ba81eeb17a74a77234aba2a79994
@@ -2,7 +2,19 @@
2
2
 
3
3
  ***
4
4
 
5
- #### Change log v.0.2.37 (Release Candidate)
5
+ #### Change log v.1.0.0
6
+
7
+ **Fix**: Fixed a possible issue with string corruption... it might have only existed in the development version, I'm not sure, but it's fixed anyway.
8
+
9
+ **Fix** (degrade): Fixed an issue related to deeply nested objects causing unreasonable slowdowns. The issue was resolved by degrading the PDF optimization process to review object with `stream` data instead of reviewing every object. This means more duplicate objects might be observed when similar PDF files are merged.
10
+
11
+ **Fix**: Fixed an issue related to form data where font information was lost during the PDF optimization process.
12
+
13
+ **Fix**: Fixed issue #108 by adding support for PDFs that have spaces and missing zeros in their hex encoded strings. Credit to @emmanuelmillionaer.
14
+
15
+ ***
16
+
17
+ #### Change log v.0.2.37
6
18
 
7
19
  **Fix**: Fixed `Page_Methods#textbox` default `:x`,`:y` to allow for non-zero/cropped page origin. Credit to @donnguyen for exposing the issue.
8
20
 
@@ -138,7 +138,7 @@ end
138
138
  # arrays are Array
139
139
  # strings are String
140
140
  # names are Symbols (String.to_sym)
141
- # numbers are Fixnum or Float
141
+ # numbers are Integer or Float (Numeric)
142
142
  # boolean are TrueClass or FalseClass
143
143
 
144
144
  ## test performance with:
@@ -107,7 +107,7 @@ module CombinePDF
107
107
  @scanner = StringScanner.new o[:raw_stream_content]
108
108
  stream_data = _parse_
109
109
  id_array = []
110
- while stream_data[0].is_a? (Integer)
110
+ while stream_data[0].is_a? (Numeric)
111
111
  id_array << stream_data.shift
112
112
  stream_data.shift
113
113
  end
@@ -180,57 +180,34 @@ module CombinePDF
180
180
  if @scanner.scan(/\[/)
181
181
  out << _parse_
182
182
  ##########################################
183
- ## parse a Dictionary
184
- ##########################################
185
- elsif @scanner.scan(/<</)
186
- data = _parse_
187
- obj = {}
188
- obj[data.shift] = data.shift while data[0]
189
- out << obj
190
- ##########################################
191
- ## return content of array or dictionary
192
- ##########################################
193
- elsif @scanner.scan(/\]/) || @scanner.scan(/>>/)
194
- return out
195
- ##########################################
196
- ## parse a Stream
183
+ ## Parse a Name
197
184
  ##########################################
198
- elsif @scanner.scan(/stream[\r\n]/)
199
- @scanner.pos += 1 if @scanner.peek(1) == "\n".freeze && @scanner.matched[-1] != "\n".freeze
200
- # the following was dicarded because some PDF files didn't have an EOL marker as required
201
- # str = @scanner.scan_until(/(\r\n|\r|\n)endstream/)
202
- # instead, a non-strict RegExp is used:
203
- str = @scanner.scan_until(/endstream/)
204
- # raise error if the stream doesn't end.
205
- raise "Parsing Error: PDF file error - a stream object wasn't properly closed using 'endstream'!" unless str
206
- # need to remove end of stream
207
- if out.last.is_a? Hash
208
- # out.last[:raw_stream_content] = str[0...-10] #cuts only one EON char (\n or \r)
209
- out.last[:raw_stream_content] = unify_string str.sub(/(\r\n|\n|\r)?endstream\z/, '').force_encoding(Encoding::ASCII_8BIT)
210
- else
211
- warn 'Stream not attached to dictionary!'
212
- out << str.sub(/(\r\n|\n|\r)?endstream\z/, '').force_encoding(Encoding::ASCII_8BIT)
213
- end
185
+ # old, probably working version: when str = @scanner.scan(/\/[\#\w\d\.\+\-\\\?\,]+/)
186
+ # I don't know how to write the /[\x21-\x7e___subtract_certain_hex_values_here____]+/
187
+ # all allowed regular caracters between ! and ~ : /[\x21-\x24\x26\x27\x2a-\x2e\x30-\x3b\x3d\x3f-\x5a\x5c\x5e-\x7a\x7c\x7e]+
188
+ # all characters that aren't white space or special: /[^\x00\x09\x0a\x0c\x0d\x20\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25]+
189
+ elsif str = @scanner.scan(/\/[^\x00\x09\x0a\x0c\x0d\x20\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25]*/)
190
+ out << (str[1..-1].gsub(/\#[0-9a-fA-F]{2}/) { |a| a[1..2].hex.chr }).to_sym
214
191
  ##########################################
215
- ## parse an Object after finished
192
+ ## Parse a Number
216
193
  ##########################################
217
- elsif str = @scanner.scan(/endobj/)
218
- # what to do when this is an object?
219
- if out.last.is_a? Hash
220
- out << out.pop.merge(indirect_generation_number: out.pop, indirect_reference_id: out.pop)
221
- else
222
- out << { indirect_without_dictionary: out.pop, indirect_generation_number: out.pop, indirect_reference_id: out.pop }
223
- end
224
- fresh = true
225
- # fix wkhtmltopdf use of PDF 1.1 Dest using symbols instead of strings
226
- out.last[:Dest] = unify_string(out.last[:Dest].to_s) if out.last[:Dest] && out.last[:Dest].is_a?(Symbol)
227
- # puts "!!!!!!!!! Error with :indirect_reference_id\n\nObject #{out.last} :indirect_reference_id = #{out.last[:indirect_reference_id]}" unless out.last[:indirect_reference_id].is_a?(Integer)
194
+ elsif str = @scanner.scan(/[\+\-\.\d]+/)
195
+ str =~ /\./ ? (out << str.to_f) : (out << str.to_i)
228
196
  ##########################################
229
197
  ## parse a Hex String
230
198
  ##########################################
231
- elsif str = @scanner.scan(/<[0-9a-fA-F]*>/)
199
+ elsif str = @scanner.scan(/\<[0-9a-fA-F]*\>/)
232
200
  # warn "Found a hex string"
233
- out << unify_string([str[1..-2]].pack('H*').force_encoding(Encoding::ASCII_8BIT))
201
+ str = str.slice(1..-2).force_encoding(Encoding::ASCII_8BIT)
202
+ # str = "0#{str}" if str.length.odd?
203
+ out << unify_string([str].pack('H*').force_encoding(Encoding::ASCII_8BIT))
204
+ ##########################################
205
+ ## parse a space delimited Hex String
206
+ ##########################################
207
+ elsif str = @scanner.scan(/\<[0-9a-fA-F\s]*\>/)
208
+ # warn "Found a space seperated hex string"
209
+ str = str.force_encoding(Encoding::ASCII_8BIT).split(/\s/).map! {|b| b.length.odd? ? "0#{b}" : b}
210
+ out << unify_string(str.pack('H*' * str.length).force_encoding(Encoding::ASCII_8BIT))
234
211
  ##########################################
235
212
  ## parse a Literal String
236
213
  ##########################################
@@ -315,6 +292,52 @@ module CombinePDF
315
292
  end
316
293
  out << unify_string(str.pack('C*').force_encoding(Encoding::ASCII_8BIT))
317
294
  ##########################################
295
+ ## parse a Dictionary
296
+ ##########################################
297
+ elsif @scanner.scan(/<</)
298
+ data = _parse_
299
+ obj = {}
300
+ obj[data.shift] = data.shift while data[0]
301
+ out << obj
302
+ ##########################################
303
+ ## return content of array or dictionary
304
+ ##########################################
305
+ elsif @scanner.scan(/\]/) || @scanner.scan(/>>/)
306
+ return out
307
+ ##########################################
308
+ ## parse a Stream
309
+ ##########################################
310
+ elsif @scanner.scan(/stream[\r\n]/)
311
+ @scanner.pos += 1 if @scanner.peek(1) == "\n".freeze && @scanner.matched[-1] != "\n".freeze
312
+ # the following was dicarded because some PDF files didn't have an EOL marker as required
313
+ # str = @scanner.scan_until(/(\r\n|\r|\n)endstream/)
314
+ # instead, a non-strict RegExp is used:
315
+ str = @scanner.scan_until(/endstream/)
316
+ # raise error if the stream doesn't end.
317
+ raise "Parsing Error: PDF file error - a stream object wasn't properly closed using 'endstream'!" unless str
318
+ # need to remove end of stream
319
+ if out.last.is_a? Hash
320
+ # out.last[:raw_stream_content] = str[0...-10] #cuts only one EON char (\n or \r)
321
+ out.last[:raw_stream_content] = unify_string str.sub(/(\r\n|\n|\r)?endstream\z/, '').force_encoding(Encoding::ASCII_8BIT)
322
+ else
323
+ warn 'Stream not attached to dictionary!'
324
+ out << str.sub(/(\r\n|\n|\r)?endstream\z/, '').force_encoding(Encoding::ASCII_8BIT)
325
+ end
326
+ ##########################################
327
+ ## parse an Object after finished
328
+ ##########################################
329
+ elsif str = @scanner.scan(/endobj/)
330
+ # what to do when this is an object?
331
+ if out.last.is_a? Hash
332
+ out << out.pop.merge(indirect_generation_number: out.pop, indirect_reference_id: out.pop)
333
+ else
334
+ out << { indirect_without_dictionary: out.pop, indirect_generation_number: out.pop, indirect_reference_id: out.pop }
335
+ end
336
+ fresh = true
337
+ # fix wkhtmltopdf use of PDF 1.1 Dest using symbols instead of strings
338
+ out.last[:Dest] = unify_string(out.last[:Dest].to_s) if out.last[:Dest] && out.last[:Dest].is_a?(Symbol)
339
+ # puts "!!!!!!!!! Error with :indirect_reference_id\n\nObject #{out.last} :indirect_reference_id = #{out.last[:indirect_reference_id]}" unless out.last[:indirect_reference_id].is_a?(Numeric)
340
+ ##########################################
318
341
  ## Parse a comment
319
342
  ##########################################
320
343
  elsif str = @scanner.scan(/\%/)
@@ -326,20 +349,6 @@ module CombinePDF
326
349
  end
327
350
  # puts "AFTER COMMENT: #{@scanner.peek 8}"
328
351
  ##########################################
329
- ## Parse a Name
330
- ##########################################
331
- # old, probably working version: when str = @scanner.scan(/\/[\#\w\d\.\+\-\\\?\,]+/)
332
- # I don't know how to write the /[\x21-\x7e___subtract_certain_hex_values_here____]+/
333
- # all allowed regular caracters between ! and ~ : /[\x21-\x24\x26\x27\x2a-\x2e\x30-\x3b\x3d\x3f-\x5a\x5c\x5e-\x7a\x7c\x7e]+
334
- # all characters that aren't white space or special: /[^\x00\x09\x0a\x0c\x0d\x20\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25]+
335
- elsif str = @scanner.scan(/\/[^\x00\x09\x0a\x0c\x0d\x20\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25]*/)
336
- out << (str[1..-1].gsub(/\#[0-9a-fA-F]{2}/) { |a| a[1..2].hex.chr }).to_sym
337
- ##########################################
338
- ## Parse a Number
339
- ##########################################
340
- elsif str = @scanner.scan(/[\+\-\.\d]+/)
341
- str =~ /\./ ? (out << str.to_f) : (out << str.to_i)
342
- ##########################################
343
352
  ## Parse an Object Reference
344
353
  ##########################################
345
354
  elsif @scanner.scan(/R/)
@@ -562,7 +571,7 @@ module CombinePDF
562
571
  o = nil
563
572
  else
564
573
  o[:referenced_object] = obj_dir[[o[:indirect_reference_id], o[:indirect_generation_number]]]
565
- warn "Couldn't connect reference for #{o}" if o[:referenced_object].nil?
574
+ warn "Couldn't connect reference for #{o}" if o[:referenced_object].nil? && (o[:indirect_reference_id] + o[:indirect_generation_number] != 0)
566
575
  o.delete :indirect_reference_id
567
576
  o.delete :indirect_generation_number
568
577
  o = (o[:referenced_object] && o[:referenced_object][:indirect_without_dictionary]) || o
@@ -33,12 +33,14 @@ module CombinePDF
33
33
  if obj.is_a?(Hash)
34
34
  referenced = obj[:referenced_object]
35
35
  if referenced && referenced.any?
36
- tmp = resolved[referenced.object_id] || existing[referenced]
36
+ # tmp = resolved[referenced.object_id] || existing[referenced]
37
+ tmp = resolved[referenced.object_id] || (referenced[:raw_stream_content] && existing[referenced[:raw_stream_content]])
37
38
  if tmp
38
39
  obj[:referenced_object] = tmp
39
40
  else
40
41
  resolved[obj.object_id] = referenced
41
- existing[referenced] = referenced
42
+ # existing[referenced] = referenced
43
+ existing[referenced[:raw_stream_content]] = referenced
42
44
  should_resolve << referenced
43
45
  @objects << referenced
44
46
  end
@@ -150,7 +152,8 @@ module CombinePDF
150
152
  catalog = rebuild_catalog
151
153
  page_objects = catalog[:Pages][:referenced_object][:Kids].map { |e| @objects << e[:referenced_object]; e[:referenced_object] }
152
154
  # adds every referenced object to the @objects (root), addition is performed as pointers rather then copies
153
- add_referenced([page_objects, @forms_data, @names, @outlines, @info])
155
+ # add_referenced([page_objects, @forms_data, @names, @outlines, @info])
156
+ add_referenced(@objects.dup)
154
157
  catalog
155
158
  end
156
159
 
@@ -163,7 +166,7 @@ module CombinePDF
163
166
  def renumber_object_ids(start = nil)
164
167
  @set_start_id = start || @set_start_id
165
168
  start = @set_start_id
166
- history = {}
169
+ # history = {}
167
170
  @objects.each do |obj|
168
171
  obj[:indirect_reference_id] = start
169
172
  start += 1
@@ -191,7 +194,7 @@ module CombinePDF
191
194
  if pos[0].is_a? String
192
195
  (pos.length / 2).times do |i|
193
196
  dic << (pos[i * 2].clear << base.next!)
194
- pos[(i * 2) + 1][0] = {is_reference_only: true, referenced_object: pages[pos[(i * 2) + 1][0]]} if(pos[(i * 2) + 1].is_a?(Array) && pos[(i * 2) + 1][0].is_a?(Integer))
197
+ pos[(i * 2) + 1][0] = {is_reference_only: true, referenced_object: pages[pos[(i * 2) + 1][0]]} if(pos[(i * 2) + 1].is_a?(Array) && pos[(i * 2) + 1][0].is_a?(Numeric))
195
198
  dic << (pos[(i * 2) + 1].is_a?(Array) ? { is_reference_only: true, referenced_object: { indirect_without_dictionary: pos[(i * 2) + 1] } } : pos[(i * 2) + 1])
196
199
  # dic << pos[(i * 2) + 1]
197
200
  end
@@ -225,12 +228,13 @@ module CombinePDF
225
228
  # preffering the new over the old.
226
229
  def self.hash_merge_new_no_page(_key, old_data, new_data)
227
230
  return old_data unless new_data
231
+ return new_data unless old_data
228
232
  if old_data.is_a?(Hash) && new_data.is_a?(Hash)
229
233
  return old_data if (old_data[:Type] == :Page)
230
234
  old_data.merge(new_data, &(@hash_merge_new_no_page_proc ||= method(:hash_merge_new_no_page)))
231
235
  elsif old_data.is_a? Array
232
- new_data = [new_data] unless new_data.is_a? Array
233
- old_data + new_data
236
+ return old_data + new_data if new_data.is_a?(Array)
237
+ return old_data.dup << new_data
234
238
  elsif new_data.is_a? Array
235
239
  new_data + [old_data]
236
240
  else
@@ -100,7 +100,7 @@ module CombinePDF
100
100
  @names = parser.names_object || {}
101
101
  @forms_data = parser.forms_object || {}
102
102
  @outlines = parser.outlines_object || {}
103
- # rebuild the catalo, to fix wkhtmltopdf's use of static page numbers
103
+ # rebuild the catalog, to fix wkhtmltopdf's use of static page numbers
104
104
  rebuild_catalog
105
105
 
106
106
  # general globals
@@ -20,7 +20,7 @@ module CombinePDF
20
20
  return format_name_to_pdf object
21
21
  elsif object.is_a?(Array)
22
22
  return format_array_to_pdf object
23
- elsif object.is_a?(Integer) || object.is_a?(Float) || object.is_a?(TrueClass) || object.is_a?(FalseClass)
23
+ elsif object.is_a?(Numeric) || object.is_a?(TrueClass) || object.is_a?(FalseClass)
24
24
  return object.to_s
25
25
  elsif object.is_a?(Hash)
26
26
  return format_hash_to_pdf object
@@ -1,3 +1,3 @@
1
1
  module CombinePDF
2
- VERSION = '0.2.37'.freeze
2
+ VERSION = '1.0.0'.freeze
3
3
  end
@@ -14,6 +14,7 @@ require 'bundler/setup'
14
14
 
15
15
  pdf = CombinePDF.load "./Ruby/test\ pdfs/filled_form.pdf"
16
16
  pdf.save '01_check_radio_buttuns.pdf'
17
+ pdf = CombinePDF.load "./Ruby/test\ pdfs/filled_form.pdf"
17
18
  pdf << CombinePDF.load("./Ruby/test\ pdfs/empty_form.pdf")
18
19
  pdf << CombinePDF.load("./Ruby/test\ pdfs/filled_form.pdf")
19
20
  pdf.save '02_check_form_unification_middle_is_empty.pdf'
@@ -51,7 +52,16 @@ pdf.save '06_check_links_to_second_copy.pdf'
51
52
  lists = %w(./Ruby/test\ pdfs/outlines/self_merge_err.pdf ./Ruby/test\ pdfs/outlines/big_toc.pdf ./Ruby/test\ pdfs/outlines/bigger_toc.pdf ./Ruby/test\ pdfs/outlines/named_dest_no_toc.pdf ./Ruby/test\ pdfs/outlines/named_dest_no_toc2.pdf ./Ruby/test\ pdfs/outlines/named_dest.pdf ./Ruby/test\ pdfs/outlines/named_dest2.pdf)
52
53
 
53
54
  i = 0
54
- lists.each { |n| CombinePDF.load(n).save("07_#{(i += 1)}_#{n.split('/')[-1]}"); (CombinePDF.load(n) << CombinePDF.load(n)).save("07_#{i}x2_#{n.split('/')[-1]}") }
55
+ lists.each do |n|
56
+ # puts "loading #{n}"
57
+ pdf = CombinePDF.load(n)
58
+ # puts "saving 07_#{(i += 1)}_#{n.split('/')[-1]}"
59
+ pdf.save("07_#{(i += 1)}_#{n.split('/')[-1]}")
60
+ # puts "loading #{n}X2"
61
+ pdf = CombinePDF.load(n) << CombinePDF.load(n)
62
+ # puts "saving 07_#{i}x2_#{n.split('/')[-1]}"
63
+ pdf.save("07_#{i}x2_#{n.split('/')[-1]}")
64
+ end
55
65
  pdf = CombinePDF.new
56
66
  lists.each { |n| pdf << CombinePDF.load(n) }
57
67
  pdf.save('07_named destinations.pdf')
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: combine_pdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.37
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Boaz Segev
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-05-10 00:00:00.000000000 Z
11
+ date: 2017-05-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-rc4
@@ -104,7 +104,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
104
104
  version: '0'
105
105
  requirements: []
106
106
  rubyforge_project:
107
- rubygems_version: 2.6.8
107
+ rubygems_version: 2.6.11
108
108
  signing_key:
109
109
  specification_version: 4
110
110
  summary: Combine, stamp and watermark PDF files in pure Ruby.