combine_pdf 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/combine_pdf.rb +20 -17
- data/lib/combine_pdf/combine_pdf_basic_writer.rb +10 -1
- data/lib/combine_pdf/combine_pdf_decrypt.rb +5 -1
- data/lib/combine_pdf/combine_pdf_filter.rb +5 -1
- data/lib/combine_pdf/combine_pdf_parser.rb +7 -1
- data/lib/combine_pdf/combine_pdf_pdf.rb +34 -2
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c17b1bcd778d60ecce5f1941ad98801e02ef1934
|
4
|
+
data.tar.gz: 74759b9ad7a766a4e50d891e9ad4629cf11b1f50
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 549eb5a4caaa6b07bd522ea497537568932a67e79427a3e32bd28827e72dcfb8e15f4602e63337dae1f63ca49de7c0f08a19afc108b75a506276d4e719f1b7a1
|
7
|
+
data.tar.gz: a48b950943f4cc36712f96815aab68f310b8bc0f9debc0d2497ee3b5d7d3e3fd0a88f2957c7c6dc9ccdfbe2a2a498762494d5ed72e45686203d2de9633d9cec2
|
data/lib/combine_pdf.rb
CHANGED
@@ -1,19 +1,4 @@
|
|
1
1
|
# -*- encoding : utf-8 -*-
|
2
|
-
|
3
|
-
# this file is part of the CombinePDF library and the code
|
4
|
-
# is subject to the same license (GPLv3).
|
5
|
-
#########################################################
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
# PDF object types cross reference:
|
10
|
-
# Indirect objects, references, dictionaries and streams are Hash
|
11
|
-
# arrays are Array
|
12
|
-
# strings are String
|
13
|
-
# names are Symbols (String.to_sym)
|
14
|
-
# numbers are Fixnum or Float
|
15
|
-
# boolean are TrueClass or FalseClass
|
16
|
-
|
17
2
|
require 'zlib'
|
18
3
|
require 'strscan'
|
19
4
|
require 'combine_pdf/combine_pdf_pdf'
|
@@ -21,6 +6,7 @@ require 'combine_pdf/combine_pdf_decrypt'
|
|
21
6
|
require 'combine_pdf/combine_pdf_filter'
|
22
7
|
require 'combine_pdf/combine_pdf_parser'
|
23
8
|
|
9
|
+
|
24
10
|
# This is a pure ruby library to merge PDF files.
|
25
11
|
# In the future, this library will also allow stamping and watermarking PDFs (it allows this now, only with some issues).
|
26
12
|
#
|
@@ -88,12 +74,16 @@ module CombinePDF
|
|
88
74
|
end
|
89
75
|
end
|
90
76
|
|
91
|
-
module CombinePDF
|
77
|
+
module CombinePDF
|
78
|
+
|
79
|
+
#:nodoc: all
|
92
80
|
################################################################
|
93
81
|
## These are common functions, used within the different classes
|
94
82
|
## These functions aren't open to the public.
|
95
83
|
################################################################
|
84
|
+
#@private
|
96
85
|
PRIVATE_HASH_KEYS = [:indirect_reference_id, :indirect_generation_number, :raw_stream_content, :is_reference_only, :referenced_object, :indirect_without_dictionary]
|
86
|
+
#@private
|
97
87
|
LITERAL_STRING_REPLACEMENT_HASH = {
|
98
88
|
110 => 10, # "\\n".bytes = [92, 110] "\n".ord = 10
|
99
89
|
114 => 13, #r
|
@@ -104,7 +94,9 @@ module CombinePDF #:nodoc: all
|
|
104
94
|
41 => 41, #)
|
105
95
|
92 => 92 #\
|
106
96
|
}
|
107
|
-
|
97
|
+
#@private
|
98
|
+
#:nodoc: all
|
99
|
+
module PDFOperations
|
108
100
|
module_function
|
109
101
|
def inject_to_page page = {Type: :Page, MediaBox: [0,0,612.0,792.0], Resources: {}, Contents: []}, stream = nil, top = true
|
110
102
|
# make sure both the page reciving the new data and the injected page are of the correct data type.
|
@@ -455,6 +447,17 @@ module CombinePDF #:nodoc: all
|
|
455
447
|
end
|
456
448
|
end
|
457
449
|
|
450
|
+
#########################################################
|
451
|
+
# this file is part of the CombinePDF library and the code
|
452
|
+
# is subject to the same license (GPLv3).
|
453
|
+
#########################################################
|
454
|
+
# PDF object types cross reference:
|
455
|
+
# Indirect objects, references, dictionaries and streams are Hash
|
456
|
+
# arrays are Array
|
457
|
+
# strings are String
|
458
|
+
# names are Symbols (String.to_sym)
|
459
|
+
# numbers are Fixnum or Float
|
460
|
+
# boolean are TrueClass or FalseClass
|
458
461
|
|
459
462
|
## You can test performance with:
|
460
463
|
## puts Benchmark.measure { pdf = CombinePDF.new(file_name); pdf.save "test.pdf" } # PDFEditor.new_pdf
|
@@ -5,8 +5,17 @@
|
|
5
5
|
## is subject to the same license.
|
6
6
|
########################################################
|
7
7
|
|
8
|
-
module CombinePDF #:nodoc: all
|
9
8
|
|
9
|
+
|
10
|
+
module CombinePDF
|
11
|
+
|
12
|
+
#@private
|
13
|
+
#:nodoc: all
|
14
|
+
# This doesn't work yet!
|
15
|
+
# in the future I wish to make a simple PDF page writer, that has only one functions - the text box.
|
16
|
+
# Once the simple writer is ready (creates a text box in a self contained Page element),
|
17
|
+
# I could add it to the << operators and add it as either a self contained page or as an overlay.
|
18
|
+
# if all goes well, maybe I will also create an add_image function.
|
10
19
|
class PDFWriter
|
11
20
|
|
12
21
|
def initialize(media_box = [0.0, 0.0, 612.0, 792.0])
|
@@ -5,7 +5,11 @@
|
|
5
5
|
## is subject to the same license.
|
6
6
|
########################################################
|
7
7
|
|
8
|
-
|
8
|
+
|
9
|
+
|
10
|
+
module CombinePDF
|
11
|
+
#@private
|
12
|
+
#:nodoc: all
|
9
13
|
class PDFDecrypt
|
10
14
|
|
11
15
|
def initialize objects=[], root_doctionary = {}
|
@@ -4,7 +4,11 @@
|
|
4
4
|
## this file is part of the CombinePDF library and the code
|
5
5
|
## is subject to the same license.
|
6
6
|
########################################################
|
7
|
-
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
module CombinePDF
|
8
12
|
|
9
13
|
########################################################
|
10
14
|
## This is the Parser class.
|
@@ -15,6 +19,8 @@ module CombinePDF #:nodoc: all
|
|
15
19
|
## file version.
|
16
20
|
########################################################
|
17
21
|
|
22
|
+
#@private
|
23
|
+
#:nodoc: all
|
18
24
|
class PDFParser
|
19
25
|
# LITERAL_STRING_REPLACEMENT_HASH = {
|
20
26
|
# 110 => 10, # "\\n".bytes = [92, 110] "\n".ord = 10
|
@@ -4,6 +4,11 @@
|
|
4
4
|
## this file is part of the CombinePDF library and the code
|
5
5
|
## is subject to the same license.
|
6
6
|
########################################################
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
|
7
12
|
module CombinePDF
|
8
13
|
#######################################################
|
9
14
|
# PDF class is the PDF object that can save itself to
|
@@ -26,8 +31,19 @@ module CombinePDF
|
|
26
31
|
# pdf.pages.each {|page| page << stamp_page} # notice the << operator is on a page and not a PDF object.
|
27
32
|
#######################################################
|
28
33
|
class PDF
|
29
|
-
|
34
|
+
# the objects attribute is an Array containing all the PDF sub-objects for te class.
|
35
|
+
attr_reader :objects
|
36
|
+
# the info attribute is a Hash that sets the Info data for the PDF.
|
37
|
+
# use, for example:
|
38
|
+
# pdf.info[:Title] = "title"
|
39
|
+
attr_reader :info
|
40
|
+
# sets the string output format (PDF files store strings in to type of formats).
|
41
|
+
#
|
42
|
+
# Accepts:
|
43
|
+
# - :literal
|
44
|
+
# - :hex
|
30
45
|
attr_accessor :string_output
|
46
|
+
# A Float attrinute, setting and returning the PDF version of the file (1.1-1.7).
|
31
47
|
attr_accessor :version
|
32
48
|
def initialize (*args)
|
33
49
|
# default before setting
|
@@ -167,7 +183,8 @@ module CombinePDF
|
|
167
183
|
#
|
168
184
|
# pdf << CombinePDF.new "second_file.pdf"
|
169
185
|
#
|
170
|
-
# pdf.save "both_files_merged.pdf"
|
186
|
+
# pdf.save "both_files_merged.pdf"
|
187
|
+
# @params obj is Hash, PDF or Array of parsed PDF data.
|
171
188
|
def << (obj)
|
172
189
|
#########
|
173
190
|
## how should we add data to PDF?
|
@@ -201,6 +218,8 @@ module CombinePDF
|
|
201
218
|
end
|
202
219
|
end
|
203
220
|
class PDF #:nodoc: all
|
221
|
+
|
222
|
+
# @private
|
204
223
|
# this function returns all the Page objects - regardless of order and even if not cataloged
|
205
224
|
# could be used for finding "lost" pages... but actually rather useless.
|
206
225
|
def all_pages
|
@@ -209,6 +228,7 @@ module CombinePDF
|
|
209
228
|
## referenced items and be reached through the connections.
|
210
229
|
[].tap {|out| each_object {|obj| out << obj if obj.is_a?(Hash) && obj[:Type] == :Page } }
|
211
230
|
end
|
231
|
+
# @private
|
212
232
|
def serialize_objects_and_references(object = nil)
|
213
233
|
warn "connecting objects with their references (serialize_objects_and_references)."
|
214
234
|
|
@@ -250,6 +270,7 @@ module CombinePDF
|
|
250
270
|
# end
|
251
271
|
|
252
272
|
end
|
273
|
+
# @private
|
253
274
|
def renumber_object_ids(start = nil)
|
254
275
|
warn "Resetting Object Reference IDs"
|
255
276
|
@set_start_id ||= start
|
@@ -262,6 +283,7 @@ module CombinePDF
|
|
262
283
|
warn "Finished serializing IDs"
|
263
284
|
end
|
264
285
|
|
286
|
+
# @private
|
265
287
|
def references(indirect_reference_id = nil, indirect_generation_number = nil)
|
266
288
|
ref = {indirect_reference_id: indirect_reference_id, indirect_generation_number: indirect_generation_number}
|
267
289
|
out = []
|
@@ -276,9 +298,11 @@ module CombinePDF
|
|
276
298
|
end
|
277
299
|
out
|
278
300
|
end
|
301
|
+
# @private
|
279
302
|
def all_indirect_object
|
280
303
|
[].tap {|out| @objects.each {|obj| out << obj if (obj.is_a?(Hash) && obj[:is_reference_only].nil?) } }
|
281
304
|
end
|
305
|
+
# @private
|
282
306
|
def sort_objects_by_id
|
283
307
|
@objects.sort! do |a,b|
|
284
308
|
if a.is_a?(Hash) && a[:indirect_reference_id] && a[:is_reference_only].nil? && b.is_a?(Hash) && b[:indirect_reference_id] && b[:is_reference_only].nil?
|
@@ -288,6 +312,7 @@ module CombinePDF
|
|
288
312
|
end
|
289
313
|
end
|
290
314
|
|
315
|
+
# @private
|
291
316
|
def add_referenced(object)
|
292
317
|
# add references but not root
|
293
318
|
case
|
@@ -308,6 +333,7 @@ module CombinePDF
|
|
308
333
|
end
|
309
334
|
end
|
310
335
|
end
|
336
|
+
# @private
|
311
337
|
def rebuild_catalog(*with_pages)
|
312
338
|
##########################
|
313
339
|
## Test-Run - How is that done?
|
@@ -348,6 +374,8 @@ module CombinePDF
|
|
348
374
|
|
349
375
|
catalog_object
|
350
376
|
end
|
377
|
+
|
378
|
+
# @private
|
351
379
|
# this is an alternative to the rebuild_catalog catalog method
|
352
380
|
# this method is used by the to_pdf method, for streamlining the PDF output.
|
353
381
|
# there is no point is calling the method before preparing the output.
|
@@ -360,6 +388,7 @@ module CombinePDF
|
|
360
388
|
catalog
|
361
389
|
end
|
362
390
|
|
391
|
+
# @private
|
363
392
|
# disabled, don't use. simpley returns true.
|
364
393
|
def rebuild_resources
|
365
394
|
|
@@ -402,10 +431,13 @@ module CombinePDF
|
|
402
431
|
# rebuild stream lengths?
|
403
432
|
end
|
404
433
|
|
434
|
+
# @private
|
405
435
|
# run block of code on evey object (Hash)
|
406
436
|
def each_object(&block)
|
407
437
|
PDFOperations._each_object(@objects, &block)
|
408
438
|
end
|
439
|
+
|
440
|
+
# @private
|
409
441
|
# the function rerturns true if the reference belongs to the object
|
410
442
|
def compare_reference_values(obj, ref)
|
411
443
|
if obj[:referenced_object] && ref[:referenced_object]
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: combine_pdf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Boaz Segev
|
@@ -17,14 +17,14 @@ dependencies:
|
|
17
17
|
requirements:
|
18
18
|
- - ">="
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version:
|
20
|
+
version: 0.1.5
|
21
21
|
type: :runtime
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
25
|
- - ">="
|
26
26
|
- !ruby/object:Gem::Version
|
27
|
-
version:
|
27
|
+
version: 0.1.5
|
28
28
|
description: A nifty gem, in pure Ruby, to parse PDF files and combine (merge) them
|
29
29
|
with other PDF files, watermark them or stamp them (all using the PDF file format).
|
30
30
|
email: bsegev@gmail.com
|