combine_pdf 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b732c13238534158bddb82ff89b5c0d14fdccdd2
4
- data.tar.gz: 45ef6b52039c289ba1f71cce445c245d42c01bc1
3
+ metadata.gz: 7d910d296f84bd5493a258317eace2f9cabf7d4b
4
+ data.tar.gz: 67efe73d182be27f9ad597e48cff83c3147ab37d
5
5
  SHA512:
6
- metadata.gz: 0ac937aea55ebc4f5be4cb9a2a7c9fadd45cc4e3dd01ff86749486541e1056787e89795fd514f031036e4156d4c7807101e82d0b88c64d00b813966211f97b15
7
- data.tar.gz: 1b89918e8c4e79e005dcd1961f8f89fcb89565ed95b57e429a7fb82a1cbabe009665d3396a54db0c6e38ced2b9dc72c720157348404b4a791d374bfae5669353
6
+ metadata.gz: 8924f5bb53e5f8e00c224f360e343688b9902d1c2e7b59c319e07cc66ce763b08651619be64217d1af4a7e1029344091dcb106ffbc7ac41a3e496c5a0e0f7bbc
7
+ data.tar.gz: d2162982205d6c89a6ace341d8d2a3e123caff712d7c8dd3e9220ec4ce4dc67dfa2759e4429a07bc2c28d66600be00c5b6221d26c11bb34501d4fdb42092aba5
@@ -5,6 +5,10 @@ require 'zlib'
5
5
  require 'securerandom'
6
6
  require 'strscan'
7
7
 
8
+ #require the RC4 Gem
9
+ require 'rc4'
10
+
11
+
8
12
  load "combine_pdf/combine_pdf_operations.rb"
9
13
  load "combine_pdf/combine_pdf_basic_writer.rb"
10
14
  load "combine_pdf/combine_pdf_decrypt.rb"
@@ -13,26 +17,6 @@ load "combine_pdf/combine_pdf_filter.rb"
13
17
  load "combine_pdf/combine_pdf_parser.rb"
14
18
  load "combine_pdf/combine_pdf_pdf.rb"
15
19
 
16
- # # will be removed one font support and font library is completed.
17
- # require "combine_pdf/font_metrics/courier-bold_metrics.rb"
18
- # require "combine_pdf/font_metrics/courier-boldoblique_metrics.rb"
19
- # require "combine_pdf/font_metrics/courier-oblique_metrics.rb"
20
- # require "combine_pdf/font_metrics/courier_metrics.rb"
21
- # require "combine_pdf/font_metrics/helvetica-bold_metrics.rb"
22
- # require "combine_pdf/font_metrics/helvetica-boldoblique_metrics.rb"
23
- # require "combine_pdf/font_metrics/helvetica-oblique_metrics.rb"
24
- # require "combine_pdf/font_metrics/helvetica_metrics.rb"
25
- # require "combine_pdf/font_metrics/symbol_metrics.rb"
26
- # require "combine_pdf/font_metrics/times-bold_metrics.rb"
27
- # require "combine_pdf/font_metrics/times-bolditalic_metrics.rb"
28
- # require "combine_pdf/font_metrics/times-italic_metrics.rb"
29
- # require "combine_pdf/font_metrics/times-roman_metrics.rb"
30
- # require "combine_pdf/font_metrics/zapfdingbats_metrics.rb"
31
- # require "combine_pdf/font_metrics/metrics_dictionary.rb"
32
-
33
-
34
-
35
-
36
20
 
37
21
  # This is a pure ruby library to combine/merge, stmap/overlay and number PDF files - as well as to create tables (ment for indexing combined files).
38
22
  #
@@ -314,9 +298,9 @@ end
314
298
  # numbers are Fixnum or Float
315
299
  # boolean are TrueClass or FalseClass
316
300
 
317
- ## You can test performance with:
318
- ## puts Benchmark.measure { pdf = CombinePDF.new(file_name); pdf.save "test.pdf" } # PDFEditor.new_pdf
319
- ## demo: file_name = "/Users/2Be/Ruby/pdfs/encrypted.pdf"; pdf=0; puts Benchmark.measure { pdf = CombinePDF.new(file_name); pdf.save "test.pdf" }
301
+ ## test performance with:
302
+ ## puts Benchmark.measure { pdf = CombinePDF.new(file); pdf.save "test.pdf" } # PDFEditor.new_pdf
303
+ ## demo: file_name = "~/Ruby/pdfs/encrypted.pdf"; pdf=0; puts Benchmark.measure { pdf = CombinePDF.new(file_name); pdf.save "test.pdf" }
320
304
  ## at the moment... my code it terribly slow for larger files... :(
321
305
  ## The file saving is solved (I hope)... but file loading is an issue.
322
306
  ## pdf.each_object {|obj| puts "Stream length: #{obj[:raw_stream_content].length} was registered as #{obj[:Length].is_a?(Hash)? obj[:Length][:referenced_object][:indirect_without_dictionary] : obj[:Length]}" if obj[:raw_stream_content] }
@@ -324,5 +308,30 @@ end
324
308
  ## puts Benchmark.measure { 1000.times { (CombinePDF::PDFOperations.get_refernced_object pdf.objects, {indirect_reference_id: 100, indirect_generation_number:0}).object_id } }
325
309
  ## puts Benchmark.measure { 1000.times { (pdf.objects.select {|o| o[:indirect_reference_id]== 100 && o[:indirect_generation_number] == 0})[0].object_id } }
326
310
  ## puts Benchmark.measure { {}.tap {|out| pdf.objects.each {|o| out[ [o[:indirect_reference_id], o[:indirect_generation_number] ] ] = o }} }
327
-
311
+ ##
312
+ #### local test for CombinePDF
313
+ ## file = "/Users/2Be/Ruby/pdfs/encrypted.pdf"
314
+ ## puts Benchmark.measure { 1000.times { pdf = CombinePDF.new(file); pdf.save "test.pdf" } }
315
+ ### gives : 2.540000 0.140000 2.680000 ( 2.696524)
316
+ ## puts Benchmark.measure { pdf = CombinePDF.new() ; 1000.times { pdf << CombinePDF.new(file) } ; pdf.save "test.pdf" }
317
+ ### gives: 11.770000 0.090000 11.860000 ( 11.879411) #why the difference? NOT the object reference rebuilding...
318
+ ### file size: 7Kb success
319
+ ###### gives: 7.440000 0.100000 7.540000 ( 7.536460) (!!!) with draft file size 8kb
320
+ ##
321
+ #### local test by pdftk
322
+ ## pdftk_path = '/Users/2Be/Ruby/pdfs/pdftk_lib/bin/pdftk'
323
+ ## file_array = []
324
+ ## 1000.times { file_array << file }
325
+ ## puts Benchmark.measure { system ( pdftk_path + " '" + file_array.join("' '") + "' input_pw '' output 'test.pdf'" ) }
326
+ ### gives: 0.000000 0.000000 3.250000 ( 3.244724)
327
+ ### FAILS with no output, unwilling to decrypt.
328
+ ###### gives: 0.000000 0.000000 2.640000 ( 2.661801) with draft file size 1.3MB (!!)
329
+ #### local test by pyton
330
+ ## pyton_path = '/Users/2Be/Ruby/pdfs/pdftk_lib/join.py'
331
+ ## file_array = []
332
+ ## 1000.times { file_array << file }
333
+ ## puts Benchmark.measure { system ( pyton_path + " -o 'test.pdf' '#{file_array.join "' '"}' " ) }
334
+ ### gives 0.000000 0.000000 1.010000 ( 1.147135)
335
+ ### file merge FAILS with 1,000 empty pages (undecrypted)
336
+ ####### gives: 0.000000 0.000000 1.770000 ( 1.775513) with draft. file size 4.9MB (!!!)
328
337
 
@@ -133,7 +133,7 @@ module CombinePDF
133
133
  y = options[:y]
134
134
 
135
135
  # set graphic state for the box
136
- box_stream << "q\nq\nq\n"
136
+ box_stream << "q\n"
137
137
  box_graphic_state = { ca: options[:opacity], CA: options[:opacity], LW: options[:border_width], LC: 0, LJ: 0, LD: 0 }
138
138
  if options[:box_radius] != 0 # if the text box has rounded corners
139
139
  box_graphic_state[:LC], box_graphic_state[:LJ] = 2, 1
@@ -191,7 +191,7 @@ module CombinePDF
191
191
  end
192
192
 
193
193
  # exit graphic state for the box
194
- box_stream << "Q\nQ\nQ\n"
194
+ box_stream << "Q\n"
195
195
  end
196
196
  contents << box_stream
197
197
 
@@ -227,7 +227,7 @@ module CombinePDF
227
227
  end
228
228
 
229
229
  # set graphic state for text
230
- text_stream << "q\nq\nq\n"
230
+ text_stream << "q\n"
231
231
  text_graphic_state = graphic_state({ca: options[:opacity], CA: options[:opacity], LW: options[:stroke_width].to_f, LC: 2, LJ: 1, LD: 0})
232
232
  text_stream << "#{PDFOperations._object_to_pdf text_graphic_state} gs\n"
233
233
  text_stream << "DeviceRGB CS\nDeviceRGB cs\n"
@@ -261,7 +261,7 @@ module CombinePDF
261
261
  y -= encoded[3]/1000*font_size #update text starting point
262
262
  end
263
263
  # exit graphic state for text
264
- text_stream << "Q\nQ\nQ\n"
264
+ text_stream << "Q\n"
265
265
  end
266
266
  contents << text_stream
267
267
 
@@ -43,7 +43,6 @@ module CombinePDF
43
43
  @key = set_general_key
44
44
  case @encryption_dictionary[:V]
45
45
  when 1,2
46
- warn "trying to decrypt with RC4."
47
46
  # raise_encrypted_error
48
47
  _perform_decrypt_proc_ @objects, self.method(:decrypt_RC4)
49
48
  else
@@ -109,7 +108,7 @@ module CombinePDF
109
108
  # (0..2).each { |e| object_key << (encrypted_id >> e*8 & 0xFF ) }
110
109
  # (0..1).each { |e| object_key << (encrypted_generation >> e*8 & 0xFF ) }
111
110
  key_length = object_key.length < 16 ? object_key.length : 16
112
- rc4 = RC4.new( Digest::MD5.digest(object_key)[(0...key_length)] )
111
+ rc4 = ::RC4.new( Digest::MD5.digest(object_key)[(0...key_length)] )
113
112
  rc4.decrypt(encrypted)
114
113
  end
115
114
  def decrypt_AES(encrypted, encrypted_id, encrypted_generation, encrypted_filter)
@@ -37,6 +37,7 @@ module CombinePDF
37
37
 
38
38
  # following the reference chain and assigning a pointer to the correct Resouces object.
39
39
  # (assignments of Strings, Arrays and Hashes are pointers in Ruby, unless the .dup method is called)
40
+ page[:Resources] ||= {}
40
41
  original_resources = page[:Resources]
41
42
  if original_resources[:is_reference_only]
42
43
  original_resources = original_resources[:referenced_object]
@@ -45,6 +46,7 @@ module CombinePDF
45
46
  original_contents = page[:Contents]
46
47
  original_contents = [original_contents] unless original_contents.is_a? Array
47
48
 
49
+ stream[:Resources] ||= {}
48
50
  stream_resources = stream[:Resources]
49
51
  if stream_resources[:is_reference_only]
50
52
  stream_resources = stream_resources[:referenced_object]
@@ -65,7 +65,6 @@ module CombinePDF
65
65
  @version = @scanner.matched.scan(/[\d\.]+/)[0].to_f
66
66
  end
67
67
 
68
- warn "Starting to parse PDF data."
69
68
  @parsed = _parse_
70
69
 
71
70
  if @root_object == {}
@@ -75,7 +74,6 @@ module CombinePDF
75
74
  end
76
75
  end
77
76
  raise "root is unknown - cannot determine if file is Encrypted" if @root_object == {}
78
- warn "Injecting actual values into root object: #{@root_object}."
79
77
  PDFOperations.change_references_to_actual_values @parsed, @root_object
80
78
 
81
79
  if @root_object[:Encrypt]
@@ -91,7 +89,6 @@ module CombinePDF
91
89
  warn "PDF 1.5 Object streams found - they are not fully supported! attempting to extract objects."
92
90
 
93
91
  object_streams.each do |o|
94
- warn "Attempting #{o.select {|k,v| k != :raw_stream_content}}"
95
92
  ## un-encode (using the correct filter) the object streams
96
93
  PDFFilter.inflate_object o
97
94
  ## extract objects from stream to top level arry @parsed
@@ -123,7 +120,6 @@ module CombinePDF
123
120
  else
124
121
  @info_object = {}
125
122
  end
126
- warn "setting parsed collection and returning collection."
127
123
  @parsed
128
124
  end
129
125
 
@@ -133,7 +129,6 @@ module CombinePDF
133
129
  def _parse_
134
130
  out = []
135
131
  str = ''
136
- # warn "Scaning for objects, starting at #{@scanner.pos}: #{@scanner.peek(10)}"
137
132
  while @scanner.rest? do
138
133
  case
139
134
  ##########################################
@@ -171,7 +166,6 @@ module CombinePDF
171
166
  ## parse an Object after finished
172
167
  ##########################################
173
168
  when str = @scanner.scan(/endobj/)
174
- # warn "Proccessing Object"
175
169
  #what to do when this is an object?
176
170
  if out.last.is_a? Hash
177
171
  out << out.pop.merge({indirect_generation_number: out.pop, indirect_reference_id: out.pop})
@@ -115,12 +115,10 @@ module CombinePDF
115
115
  end
116
116
  # general globals
117
117
  @string_output = :literal
118
- @need_to_rebuild_resources = false
119
118
  @set_start_id = 1
120
119
  @info[:Producer] = "Ruby CombinePDF Library by Boaz Segev"
121
120
  @info.delete :CreationDate
122
121
  @info.delete :ModDate
123
- warn "finished to initialize PDF object."
124
122
  end
125
123
 
126
124
  # Formats the data to PDF formats and returns a binary string that represents the PDF file content.
@@ -133,17 +131,11 @@ module CombinePDF
133
131
  @version = 1.5 if @version.to_f == 0.0
134
132
  #set creation date for merged file
135
133
  @info[:CreationDate] = Time.now.strftime "D:%Y%m%d%H%M%S%:::z'00"
136
- #rebuild resources if needed
137
- if @need_to_rebuild_resources
138
- rebuild_resources
139
- end
140
134
  #rebuild_catalog
141
135
  catalog = rebuild_catalog_and_objects
142
136
  # add ID and generation numbers to objects
143
137
  renumber_object_ids
144
138
 
145
- warn "Formatting PDF output"
146
-
147
139
  out = []
148
140
  xref = []
149
141
  indirect_object_count = 1 #the first object is the null object
@@ -159,7 +151,6 @@ module CombinePDF
159
151
  out << PDFOperations._object_to_pdf(o)
160
152
  loc += out.last.length + 1
161
153
  end
162
- warn "Building XREF"
163
154
  xref_location = 0
164
155
  out.each { |line| xref_location += line.bytes.length + 1}
165
156
  out << "xref\n\r0 #{(indirect_object_count).to_s}\n\r0000000000 65535 f \n\r"
@@ -275,7 +266,7 @@ module CombinePDF
275
266
  fonts_array
276
267
  end
277
268
 
278
- # add the pages (or file) to the PDF (combine/merge) and return the new pages array.
269
+ # add the pages (or file) to the PDF (combine/merge) and RETURNS SELF, for nesting.
279
270
  # for example:
280
271
  #
281
272
  # pdf = CombinePDF.new "first_file.pdf"
@@ -290,17 +281,14 @@ module CombinePDF
290
281
  ## and how to handles imported pages?
291
282
  if data.is_a?(PDF)
292
283
  @version = [@version, data.version].max
293
-
294
- @need_to_rebuild_resources = true
295
-
296
284
  @objects.push(*data.objects)
297
285
  # rebuild_catalog
298
- return rebuild_catalog[:Pages][:referenced_object][:Kids]
286
+ return self
299
287
  end
300
288
  insert -1, data
301
289
  end
302
290
 
303
- # add the pages (or file) to the BEGINNING of the PDF (combine/merge) and return the new pages array.
291
+ # add the pages (or file) to the BEGINNING of the PDF (combine/merge) and RETURNS SELF for nesting operators.
304
292
  # for example:
305
293
  #
306
294
  # pdf = CombinePDF.new "second_file.pdf"
@@ -311,6 +299,7 @@ module CombinePDF
311
299
  # data:: is PDF page (Hash), and Array of PDF pages or a parsed PDF object to be added.
312
300
  def >> (data)
313
301
  insert 0, data
302
+ self
314
303
  end
315
304
 
316
305
  # add PDF pages (or PDF files) into a specific location.
@@ -519,8 +508,6 @@ module CombinePDF
519
508
  end
520
509
  # @private
521
510
  def serialize_objects_and_references(object = nil)
522
- warn "connecting objects with their references (serialize_objects_and_references)."
523
-
524
511
  # # Version 3.5 injects indirect objects if they arn't dictionaries.
525
512
  # # benchmark 1000.times was 3.568246 sec for pdf = CombinePDF.new "/Users/2Be/Desktop/מוצגים/20121002\ הודעת\ הערעור.pdf" }
526
513
  # # puts Benchmark.measure { 1000.times {pdf.serialize_objects_and_references} }
@@ -653,49 +640,6 @@ module CombinePDF
653
640
  catalog
654
641
  end
655
642
 
656
- # @private
657
- # disabled, don't use. simpley returns true.
658
- def rebuild_resources
659
-
660
- warn "Resources re-building disabled as it isn't worth the price in peformance as of yet."
661
-
662
- return true
663
-
664
- warn "Re-Building Resources"
665
- @need_to_rebuild_resources = false
666
- # what are resources?
667
- # anything at the top level of the file exept catalogs, page lists (Pages) and pages...
668
- not_resources = [:Catalog, :Pages, :Page]
669
- # get old resources list
670
- old_resources = @objects.select {|obj| obj.is_a?(Hash) && !not_resources.include?(obj[:Type])}
671
- # collect all unique resources while ignoring double values and resetting references
672
- # also ignore inner values (canot use PRIVATE_HASH_KEYS because of stream and other issues)
673
- ignore_keys = [:indirect_reference_id, :indirect_generation_number, :is_reference_only, :referenced_object]
674
- new_resources = []
675
- all_references = references
676
- old_resources.each do |old_r|
677
- add = true
678
- new_resources.each do |new_r|
679
- # ## v.1.0 - slower
680
- # if (old_r.reject {|k,v| ignore_keys.include?(k) }) == (new_r.reject {|k,v| ignore_keys.include?(k)})
681
- # all_references.each {|ref| ref[:referenced_object] = new_r if ref[:referenced_object].object_id == old_r.object_id } # fails, but doesn't assume all references are connected: compare_reference_values(old_r, ref) }
682
- # add = false
683
- # end
684
- ## v.1.1 - faster, doesn't build two hashes (but iterates one)
685
- if ( [].tap {|out| old_r.each {|k,v| out << true unless ((!ignore_keys.include?(k)) && new_r[k] == v) } } .empty?)
686
- all_references.each {|ref| ref[:referenced_object] = new_r if ref[:referenced_object].object_id == old_r.object_id } # fails, but doesn't assume all references are connected: compare_reference_values(old_r, ref) }
687
- add = false
688
- end
689
- end
690
- new_resources << old_r if add
691
- end
692
- # remove old resources
693
- @objects.reject! {|obj| old_resources.include?(obj)}
694
- # insert new resources
695
- @objects.push *new_resources
696
- # rebuild stream lengths?
697
- end
698
-
699
643
  # @private
700
644
  # the function rerturns true if the reference belongs to the object
701
645
  def compare_reference_values(obj, ref)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: combine_pdf
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Boaz Segev
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-09-19 00:00:00.000000000 Z
12
+ date: 2014-09-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: ruby-rc4
@@ -26,8 +26,8 @@ dependencies:
26
26
  - !ruby/object:Gem::Version
27
27
  version: 0.1.5
28
28
  description: A nifty gem, in pure Ruby, to parse PDF files and combine (merge) them
29
- with other PDF files, number the pages, watermark them or stamp them (all using
30
- the PDF file format).
29
+ with other PDF files, number the pages, watermark them or stamp them, create tables
30
+ or basic text objects etc` (all using the PDF file format).
31
31
  email: bsegev@gmail.com
32
32
  executables: []
33
33
  extensions: []