combine_pdf 0.1.4 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b732c13238534158bddb82ff89b5c0d14fdccdd2
4
- data.tar.gz: 45ef6b52039c289ba1f71cce445c245d42c01bc1
3
+ metadata.gz: 7d910d296f84bd5493a258317eace2f9cabf7d4b
4
+ data.tar.gz: 67efe73d182be27f9ad597e48cff83c3147ab37d
5
5
  SHA512:
6
- metadata.gz: 0ac937aea55ebc4f5be4cb9a2a7c9fadd45cc4e3dd01ff86749486541e1056787e89795fd514f031036e4156d4c7807101e82d0b88c64d00b813966211f97b15
7
- data.tar.gz: 1b89918e8c4e79e005dcd1961f8f89fcb89565ed95b57e429a7fb82a1cbabe009665d3396a54db0c6e38ced2b9dc72c720157348404b4a791d374bfae5669353
6
+ metadata.gz: 8924f5bb53e5f8e00c224f360e343688b9902d1c2e7b59c319e07cc66ce763b08651619be64217d1af4a7e1029344091dcb106ffbc7ac41a3e496c5a0e0f7bbc
7
+ data.tar.gz: d2162982205d6c89a6ace341d8d2a3e123caff712d7c8dd3e9220ec4ce4dc67dfa2759e4429a07bc2c28d66600be00c5b6221d26c11bb34501d4fdb42092aba5
@@ -5,6 +5,10 @@ require 'zlib'
5
5
  require 'securerandom'
6
6
  require 'strscan'
7
7
 
8
+ #require the RC4 Gem
9
+ require 'rc4'
10
+
11
+
8
12
  load "combine_pdf/combine_pdf_operations.rb"
9
13
  load "combine_pdf/combine_pdf_basic_writer.rb"
10
14
  load "combine_pdf/combine_pdf_decrypt.rb"
@@ -13,26 +17,6 @@ load "combine_pdf/combine_pdf_filter.rb"
13
17
  load "combine_pdf/combine_pdf_parser.rb"
14
18
  load "combine_pdf/combine_pdf_pdf.rb"
15
19
 
16
- # # will be removed one font support and font library is completed.
17
- # require "combine_pdf/font_metrics/courier-bold_metrics.rb"
18
- # require "combine_pdf/font_metrics/courier-boldoblique_metrics.rb"
19
- # require "combine_pdf/font_metrics/courier-oblique_metrics.rb"
20
- # require "combine_pdf/font_metrics/courier_metrics.rb"
21
- # require "combine_pdf/font_metrics/helvetica-bold_metrics.rb"
22
- # require "combine_pdf/font_metrics/helvetica-boldoblique_metrics.rb"
23
- # require "combine_pdf/font_metrics/helvetica-oblique_metrics.rb"
24
- # require "combine_pdf/font_metrics/helvetica_metrics.rb"
25
- # require "combine_pdf/font_metrics/symbol_metrics.rb"
26
- # require "combine_pdf/font_metrics/times-bold_metrics.rb"
27
- # require "combine_pdf/font_metrics/times-bolditalic_metrics.rb"
28
- # require "combine_pdf/font_metrics/times-italic_metrics.rb"
29
- # require "combine_pdf/font_metrics/times-roman_metrics.rb"
30
- # require "combine_pdf/font_metrics/zapfdingbats_metrics.rb"
31
- # require "combine_pdf/font_metrics/metrics_dictionary.rb"
32
-
33
-
34
-
35
-
36
20
 
37
21
  # This is a pure ruby library to combine/merge, stmap/overlay and number PDF files - as well as to create tables (ment for indexing combined files).
38
22
  #
@@ -314,9 +298,9 @@ end
314
298
  # numbers are Fixnum or Float
315
299
  # boolean are TrueClass or FalseClass
316
300
 
317
- ## You can test performance with:
318
- ## puts Benchmark.measure { pdf = CombinePDF.new(file_name); pdf.save "test.pdf" } # PDFEditor.new_pdf
319
- ## demo: file_name = "/Users/2Be/Ruby/pdfs/encrypted.pdf"; pdf=0; puts Benchmark.measure { pdf = CombinePDF.new(file_name); pdf.save "test.pdf" }
301
+ ## test performance with:
302
+ ## puts Benchmark.measure { pdf = CombinePDF.new(file); pdf.save "test.pdf" } # PDFEditor.new_pdf
303
+ ## demo: file_name = "~/Ruby/pdfs/encrypted.pdf"; pdf=0; puts Benchmark.measure { pdf = CombinePDF.new(file_name); pdf.save "test.pdf" }
320
304
  ## at the moment... my code it terribly slow for larger files... :(
321
305
  ## The file saving is solved (I hope)... but file loading is an issue.
322
306
  ## pdf.each_object {|obj| puts "Stream length: #{obj[:raw_stream_content].length} was registered as #{obj[:Length].is_a?(Hash)? obj[:Length][:referenced_object][:indirect_without_dictionary] : obj[:Length]}" if obj[:raw_stream_content] }
@@ -324,5 +308,30 @@ end
324
308
  ## puts Benchmark.measure { 1000.times { (CombinePDF::PDFOperations.get_refernced_object pdf.objects, {indirect_reference_id: 100, indirect_generation_number:0}).object_id } }
325
309
  ## puts Benchmark.measure { 1000.times { (pdf.objects.select {|o| o[:indirect_reference_id]== 100 && o[:indirect_generation_number] == 0})[0].object_id } }
326
310
  ## puts Benchmark.measure { {}.tap {|out| pdf.objects.each {|o| out[ [o[:indirect_reference_id], o[:indirect_generation_number] ] ] = o }} }
327
-
311
+ ##
312
+ #### local test for CombinePDF
313
+ ## file = "/Users/2Be/Ruby/pdfs/encrypted.pdf"
314
+ ## puts Benchmark.measure { 1000.times { pdf = CombinePDF.new(file); pdf.save "test.pdf" } }
315
+ ### gives : 2.540000 0.140000 2.680000 ( 2.696524)
316
+ ## puts Benchmark.measure { pdf = CombinePDF.new() ; 1000.times { pdf << CombinePDF.new(file) } ; pdf.save "test.pdf" }
317
+ ### gives: 11.770000 0.090000 11.860000 ( 11.879411) #why the difference? NOT the object reference rebuilding...
318
+ ### file size: 7Kb success
319
+ ###### gives: 7.440000 0.100000 7.540000 ( 7.536460) (!!!) with draft file size 8kb
320
+ ##
321
+ #### local test by pdftk
322
+ ## pdftk_path = '/Users/2Be/Ruby/pdfs/pdftk_lib/bin/pdftk'
323
+ ## file_array = []
324
+ ## 1000.times { file_array << file }
325
+ ## puts Benchmark.measure { system ( pdftk_path + " '" + file_array.join("' '") + "' input_pw '' output 'test.pdf'" ) }
326
+ ### gives: 0.000000 0.000000 3.250000 ( 3.244724)
327
+ ### FAILS with no output, unwilling to decrypt.
328
+ ###### gives: 0.000000 0.000000 2.640000 ( 2.661801) with draft file size 1.3MB (!!)
329
+ #### local test by pyton
330
+ ## pyton_path = '/Users/2Be/Ruby/pdfs/pdftk_lib/join.py'
331
+ ## file_array = []
332
+ ## 1000.times { file_array << file }
333
+ ## puts Benchmark.measure { system ( pyton_path + " -o 'test.pdf' '#{file_array.join "' '"}' " ) }
334
+ ### gives 0.000000 0.000000 1.010000 ( 1.147135)
335
+ ### file merge FAILS with 1,000 empty pages (undecrypted)
336
+ ####### gives: 0.000000 0.000000 1.770000 ( 1.775513) with draft. file size 4.9MB (!!!)
328
337
 
@@ -133,7 +133,7 @@ module CombinePDF
133
133
  y = options[:y]
134
134
 
135
135
  # set graphic state for the box
136
- box_stream << "q\nq\nq\n"
136
+ box_stream << "q\n"
137
137
  box_graphic_state = { ca: options[:opacity], CA: options[:opacity], LW: options[:border_width], LC: 0, LJ: 0, LD: 0 }
138
138
  if options[:box_radius] != 0 # if the text box has rounded corners
139
139
  box_graphic_state[:LC], box_graphic_state[:LJ] = 2, 1
@@ -191,7 +191,7 @@ module CombinePDF
191
191
  end
192
192
 
193
193
  # exit graphic state for the box
194
- box_stream << "Q\nQ\nQ\n"
194
+ box_stream << "Q\n"
195
195
  end
196
196
  contents << box_stream
197
197
 
@@ -227,7 +227,7 @@ module CombinePDF
227
227
  end
228
228
 
229
229
  # set graphic state for text
230
- text_stream << "q\nq\nq\n"
230
+ text_stream << "q\n"
231
231
  text_graphic_state = graphic_state({ca: options[:opacity], CA: options[:opacity], LW: options[:stroke_width].to_f, LC: 2, LJ: 1, LD: 0})
232
232
  text_stream << "#{PDFOperations._object_to_pdf text_graphic_state} gs\n"
233
233
  text_stream << "DeviceRGB CS\nDeviceRGB cs\n"
@@ -261,7 +261,7 @@ module CombinePDF
261
261
  y -= encoded[3]/1000*font_size #update text starting point
262
262
  end
263
263
  # exit graphic state for text
264
- text_stream << "Q\nQ\nQ\n"
264
+ text_stream << "Q\n"
265
265
  end
266
266
  contents << text_stream
267
267
 
@@ -43,7 +43,6 @@ module CombinePDF
43
43
  @key = set_general_key
44
44
  case @encryption_dictionary[:V]
45
45
  when 1,2
46
- warn "trying to decrypt with RC4."
47
46
  # raise_encrypted_error
48
47
  _perform_decrypt_proc_ @objects, self.method(:decrypt_RC4)
49
48
  else
@@ -109,7 +108,7 @@ module CombinePDF
109
108
  # (0..2).each { |e| object_key << (encrypted_id >> e*8 & 0xFF ) }
110
109
  # (0..1).each { |e| object_key << (encrypted_generation >> e*8 & 0xFF ) }
111
110
  key_length = object_key.length < 16 ? object_key.length : 16
112
- rc4 = RC4.new( Digest::MD5.digest(object_key)[(0...key_length)] )
111
+ rc4 = ::RC4.new( Digest::MD5.digest(object_key)[(0...key_length)] )
113
112
  rc4.decrypt(encrypted)
114
113
  end
115
114
  def decrypt_AES(encrypted, encrypted_id, encrypted_generation, encrypted_filter)
@@ -37,6 +37,7 @@ module CombinePDF
37
37
 
38
38
  # following the reference chain and assigning a pointer to the correct Resouces object.
39
39
  # (assignments of Strings, Arrays and Hashes are pointers in Ruby, unless the .dup method is called)
40
+ page[:Resources] ||= {}
40
41
  original_resources = page[:Resources]
41
42
  if original_resources[:is_reference_only]
42
43
  original_resources = original_resources[:referenced_object]
@@ -45,6 +46,7 @@ module CombinePDF
45
46
  original_contents = page[:Contents]
46
47
  original_contents = [original_contents] unless original_contents.is_a? Array
47
48
 
49
+ stream[:Resources] ||= {}
48
50
  stream_resources = stream[:Resources]
49
51
  if stream_resources[:is_reference_only]
50
52
  stream_resources = stream_resources[:referenced_object]
@@ -65,7 +65,6 @@ module CombinePDF
65
65
  @version = @scanner.matched.scan(/[\d\.]+/)[0].to_f
66
66
  end
67
67
 
68
- warn "Starting to parse PDF data."
69
68
  @parsed = _parse_
70
69
 
71
70
  if @root_object == {}
@@ -75,7 +74,6 @@ module CombinePDF
75
74
  end
76
75
  end
77
76
  raise "root is unknown - cannot determine if file is Encrypted" if @root_object == {}
78
- warn "Injecting actual values into root object: #{@root_object}."
79
77
  PDFOperations.change_references_to_actual_values @parsed, @root_object
80
78
 
81
79
  if @root_object[:Encrypt]
@@ -91,7 +89,6 @@ module CombinePDF
91
89
  warn "PDF 1.5 Object streams found - they are not fully supported! attempting to extract objects."
92
90
 
93
91
  object_streams.each do |o|
94
- warn "Attempting #{o.select {|k,v| k != :raw_stream_content}}"
95
92
  ## un-encode (using the correct filter) the object streams
96
93
  PDFFilter.inflate_object o
97
94
  ## extract objects from stream to top level arry @parsed
@@ -123,7 +120,6 @@ module CombinePDF
123
120
  else
124
121
  @info_object = {}
125
122
  end
126
- warn "setting parsed collection and returning collection."
127
123
  @parsed
128
124
  end
129
125
 
@@ -133,7 +129,6 @@ module CombinePDF
133
129
  def _parse_
134
130
  out = []
135
131
  str = ''
136
- # warn "Scaning for objects, starting at #{@scanner.pos}: #{@scanner.peek(10)}"
137
132
  while @scanner.rest? do
138
133
  case
139
134
  ##########################################
@@ -171,7 +166,6 @@ module CombinePDF
171
166
  ## parse an Object after finished
172
167
  ##########################################
173
168
  when str = @scanner.scan(/endobj/)
174
- # warn "Proccessing Object"
175
169
  #what to do when this is an object?
176
170
  if out.last.is_a? Hash
177
171
  out << out.pop.merge({indirect_generation_number: out.pop, indirect_reference_id: out.pop})
@@ -115,12 +115,10 @@ module CombinePDF
115
115
  end
116
116
  # general globals
117
117
  @string_output = :literal
118
- @need_to_rebuild_resources = false
119
118
  @set_start_id = 1
120
119
  @info[:Producer] = "Ruby CombinePDF Library by Boaz Segev"
121
120
  @info.delete :CreationDate
122
121
  @info.delete :ModDate
123
- warn "finished to initialize PDF object."
124
122
  end
125
123
 
126
124
  # Formats the data to PDF formats and returns a binary string that represents the PDF file content.
@@ -133,17 +131,11 @@ module CombinePDF
133
131
  @version = 1.5 if @version.to_f == 0.0
134
132
  #set creation date for merged file
135
133
  @info[:CreationDate] = Time.now.strftime "D:%Y%m%d%H%M%S%:::z'00"
136
- #rebuild resources if needed
137
- if @need_to_rebuild_resources
138
- rebuild_resources
139
- end
140
134
  #rebuild_catalog
141
135
  catalog = rebuild_catalog_and_objects
142
136
  # add ID and generation numbers to objects
143
137
  renumber_object_ids
144
138
 
145
- warn "Formatting PDF output"
146
-
147
139
  out = []
148
140
  xref = []
149
141
  indirect_object_count = 1 #the first object is the null object
@@ -159,7 +151,6 @@ module CombinePDF
159
151
  out << PDFOperations._object_to_pdf(o)
160
152
  loc += out.last.length + 1
161
153
  end
162
- warn "Building XREF"
163
154
  xref_location = 0
164
155
  out.each { |line| xref_location += line.bytes.length + 1}
165
156
  out << "xref\n\r0 #{(indirect_object_count).to_s}\n\r0000000000 65535 f \n\r"
@@ -275,7 +266,7 @@ module CombinePDF
275
266
  fonts_array
276
267
  end
277
268
 
278
- # add the pages (or file) to the PDF (combine/merge) and return the new pages array.
269
+ # add the pages (or file) to the PDF (combine/merge) and RETURNS SELF, for nesting.
279
270
  # for example:
280
271
  #
281
272
  # pdf = CombinePDF.new "first_file.pdf"
@@ -290,17 +281,14 @@ module CombinePDF
290
281
  ## and how to handles imported pages?
291
282
  if data.is_a?(PDF)
292
283
  @version = [@version, data.version].max
293
-
294
- @need_to_rebuild_resources = true
295
-
296
284
  @objects.push(*data.objects)
297
285
  # rebuild_catalog
298
- return rebuild_catalog[:Pages][:referenced_object][:Kids]
286
+ return self
299
287
  end
300
288
  insert -1, data
301
289
  end
302
290
 
303
- # add the pages (or file) to the BEGINNING of the PDF (combine/merge) and return the new pages array.
291
+ # add the pages (or file) to the BEGINNING of the PDF (combine/merge) and RETURNS SELF for nesting operators.
304
292
  # for example:
305
293
  #
306
294
  # pdf = CombinePDF.new "second_file.pdf"
@@ -311,6 +299,7 @@ module CombinePDF
311
299
  # data:: is PDF page (Hash), and Array of PDF pages or a parsed PDF object to be added.
312
300
  def >> (data)
313
301
  insert 0, data
302
+ self
314
303
  end
315
304
 
316
305
  # add PDF pages (or PDF files) into a specific location.
@@ -519,8 +508,6 @@ module CombinePDF
519
508
  end
520
509
  # @private
521
510
  def serialize_objects_and_references(object = nil)
522
- warn "connecting objects with their references (serialize_objects_and_references)."
523
-
524
511
  # # Version 3.5 injects indirect objects if they arn't dictionaries.
525
512
  # # benchmark 1000.times was 3.568246 sec for pdf = CombinePDF.new "/Users/2Be/Desktop/מוצגים/20121002\ הודעת\ הערעור.pdf" }
526
513
  # # puts Benchmark.measure { 1000.times {pdf.serialize_objects_and_references} }
@@ -653,49 +640,6 @@ module CombinePDF
653
640
  catalog
654
641
  end
655
642
 
656
- # @private
657
- # disabled, don't use. simpley returns true.
658
- def rebuild_resources
659
-
660
- warn "Resources re-building disabled as it isn't worth the price in peformance as of yet."
661
-
662
- return true
663
-
664
- warn "Re-Building Resources"
665
- @need_to_rebuild_resources = false
666
- # what are resources?
667
- # anything at the top level of the file exept catalogs, page lists (Pages) and pages...
668
- not_resources = [:Catalog, :Pages, :Page]
669
- # get old resources list
670
- old_resources = @objects.select {|obj| obj.is_a?(Hash) && !not_resources.include?(obj[:Type])}
671
- # collect all unique resources while ignoring double values and resetting references
672
- # also ignore inner values (canot use PRIVATE_HASH_KEYS because of stream and other issues)
673
- ignore_keys = [:indirect_reference_id, :indirect_generation_number, :is_reference_only, :referenced_object]
674
- new_resources = []
675
- all_references = references
676
- old_resources.each do |old_r|
677
- add = true
678
- new_resources.each do |new_r|
679
- # ## v.1.0 - slower
680
- # if (old_r.reject {|k,v| ignore_keys.include?(k) }) == (new_r.reject {|k,v| ignore_keys.include?(k)})
681
- # all_references.each {|ref| ref[:referenced_object] = new_r if ref[:referenced_object].object_id == old_r.object_id } # fails, but doesn't assume all references are connected: compare_reference_values(old_r, ref) }
682
- # add = false
683
- # end
684
- ## v.1.1 - faster, doesn't build two hashes (but iterates one)
685
- if ( [].tap {|out| old_r.each {|k,v| out << true unless ((!ignore_keys.include?(k)) && new_r[k] == v) } } .empty?)
686
- all_references.each {|ref| ref[:referenced_object] = new_r if ref[:referenced_object].object_id == old_r.object_id } # fails, but doesn't assume all references are connected: compare_reference_values(old_r, ref) }
687
- add = false
688
- end
689
- end
690
- new_resources << old_r if add
691
- end
692
- # remove old resources
693
- @objects.reject! {|obj| old_resources.include?(obj)}
694
- # insert new resources
695
- @objects.push *new_resources
696
- # rebuild stream lengths?
697
- end
698
-
699
643
  # @private
700
644
  # the function rerturns true if the reference belongs to the object
701
645
  def compare_reference_values(obj, ref)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: combine_pdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Boaz Segev
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-09-19 00:00:00.000000000 Z
12
+ date: 2014-09-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ruby-rc4
@@ -26,8 +26,8 @@ dependencies:
26
26
  - !ruby/object:Gem::Version
27
27
  version: 0.1.5
28
28
  description: A nifty gem, in pure Ruby, to parse PDF files and combine (merge) them
29
- with other PDF files, number the pages, watermark them or stamp them (all using
30
- the PDF file format).
29
+ with other PDF files, number the pages, watermark them or stamp them, create tables
30
+ or basic text objects etc` (all using the PDF file format).
31
31
  email: bsegev@gmail.com
32
32
  executables: []
33
33
  extensions: []