combine_pdf 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/combine_pdf.rb +20 -17
- data/lib/combine_pdf/combine_pdf_basic_writer.rb +10 -1
- data/lib/combine_pdf/combine_pdf_decrypt.rb +5 -1
- data/lib/combine_pdf/combine_pdf_filter.rb +5 -1
- data/lib/combine_pdf/combine_pdf_parser.rb +7 -1
- data/lib/combine_pdf/combine_pdf_pdf.rb +34 -2
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c17b1bcd778d60ecce5f1941ad98801e02ef1934
|
4
|
+
data.tar.gz: 74759b9ad7a766a4e50d891e9ad4629cf11b1f50
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 549eb5a4caaa6b07bd522ea497537568932a67e79427a3e32bd28827e72dcfb8e15f4602e63337dae1f63ca49de7c0f08a19afc108b75a506276d4e719f1b7a1
|
7
|
+
data.tar.gz: a48b950943f4cc36712f96815aab68f310b8bc0f9debc0d2497ee3b5d7d3e3fd0a88f2957c7c6dc9ccdfbe2a2a498762494d5ed72e45686203d2de9633d9cec2
|
data/lib/combine_pdf.rb
CHANGED
@@ -1,19 +1,4 @@
|
|
1
1
|
# -*- encoding : utf-8 -*-
|
2
|
-
|
3
|
-
# this file is part of the CombinePDF library and the code
|
4
|
-
# is subject to the same license (GPLv3).
|
5
|
-
#########################################################
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
# PDF object types cross reference:
|
10
|
-
# Indirect objects, references, dictionaries and streams are Hash
|
11
|
-
# arrays are Array
|
12
|
-
# strings are String
|
13
|
-
# names are Symbols (String.to_sym)
|
14
|
-
# numbers are Fixnum or Float
|
15
|
-
# boolean are TrueClass or FalseClass
|
16
|
-
|
17
2
|
require 'zlib'
|
18
3
|
require 'strscan'
|
19
4
|
require 'combine_pdf/combine_pdf_pdf'
|
@@ -21,6 +6,7 @@ require 'combine_pdf/combine_pdf_decrypt'
|
|
21
6
|
require 'combine_pdf/combine_pdf_filter'
|
22
7
|
require 'combine_pdf/combine_pdf_parser'
|
23
8
|
|
9
|
+
|
24
10
|
# This is a pure ruby library to merge PDF files.
|
25
11
|
# In the future, this library will also allow stamping and watermarking PDFs (it allows this now, only with some issues).
|
26
12
|
#
|
@@ -88,12 +74,16 @@ module CombinePDF
|
|
88
74
|
end
|
89
75
|
end
|
90
76
|
|
91
|
-
module CombinePDF
|
77
|
+
module CombinePDF
|
78
|
+
|
79
|
+
#:nodoc: all
|
92
80
|
################################################################
|
93
81
|
## These are common functions, used within the different classes
|
94
82
|
## These functions aren't open to the public.
|
95
83
|
################################################################
|
84
|
+
#@private
|
96
85
|
PRIVATE_HASH_KEYS = [:indirect_reference_id, :indirect_generation_number, :raw_stream_content, :is_reference_only, :referenced_object, :indirect_without_dictionary]
|
86
|
+
#@private
|
97
87
|
LITERAL_STRING_REPLACEMENT_HASH = {
|
98
88
|
110 => 10, # "\\n".bytes = [92, 110] "\n".ord = 10
|
99
89
|
114 => 13, #r
|
@@ -104,7 +94,9 @@ module CombinePDF #:nodoc: all
|
|
104
94
|
41 => 41, #)
|
105
95
|
92 => 92 #\
|
106
96
|
}
|
107
|
-
|
97
|
+
#@private
|
98
|
+
#:nodoc: all
|
99
|
+
module PDFOperations
|
108
100
|
module_function
|
109
101
|
def inject_to_page page = {Type: :Page, MediaBox: [0,0,612.0,792.0], Resources: {}, Contents: []}, stream = nil, top = true
|
110
102
|
# make sure both the page reciving the new data and the injected page are of the correct data type.
|
@@ -455,6 +447,17 @@ module CombinePDF #:nodoc: all
|
|
455
447
|
end
|
456
448
|
end
|
457
449
|
|
450
|
+
#########################################################
|
451
|
+
# this file is part of the CombinePDF library and the code
|
452
|
+
# is subject to the same license (GPLv3).
|
453
|
+
#########################################################
|
454
|
+
# PDF object types cross reference:
|
455
|
+
# Indirect objects, references, dictionaries and streams are Hash
|
456
|
+
# arrays are Array
|
457
|
+
# strings are String
|
458
|
+
# names are Symbols (String.to_sym)
|
459
|
+
# numbers are Fixnum or Float
|
460
|
+
# boolean are TrueClass or FalseClass
|
458
461
|
|
459
462
|
## You can test performance with:
|
460
463
|
## puts Benchmark.measure { pdf = CombinePDF.new(file_name); pdf.save "test.pdf" } # PDFEditor.new_pdf
|
@@ -5,8 +5,17 @@
|
|
5
5
|
## is subject to the same license.
|
6
6
|
########################################################
|
7
7
|
|
8
|
-
module CombinePDF #:nodoc: all
|
9
8
|
|
9
|
+
|
10
|
+
module CombinePDF
|
11
|
+
|
12
|
+
#@private
|
13
|
+
#:nodoc: all
|
14
|
+
# This doesn't work yet!
|
15
|
+
# in the future I wish to make a simple PDF page writer, that has only one functions - the text box.
|
16
|
+
# Once the simple writer is ready (creates a text box in a self contained Page element),
|
17
|
+
# I could add it to the << operators and add it as either a self contained page or as an overlay.
|
18
|
+
# if all goes well, maybe I will also create an add_image function.
|
10
19
|
class PDFWriter
|
11
20
|
|
12
21
|
def initialize(media_box = [0.0, 0.0, 612.0, 792.0])
|
@@ -5,7 +5,11 @@
|
|
5
5
|
## is subject to the same license.
|
6
6
|
########################################################
|
7
7
|
|
8
|
-
|
8
|
+
|
9
|
+
|
10
|
+
module CombinePDF
|
11
|
+
#@private
|
12
|
+
#:nodoc: all
|
9
13
|
class PDFDecrypt
|
10
14
|
|
11
15
|
def initialize objects=[], root_doctionary = {}
|
@@ -4,7 +4,11 @@
|
|
4
4
|
## this file is part of the CombinePDF library and the code
|
5
5
|
## is subject to the same license.
|
6
6
|
########################################################
|
7
|
-
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
module CombinePDF
|
8
12
|
|
9
13
|
########################################################
|
10
14
|
## This is the Parser class.
|
@@ -15,6 +19,8 @@ module CombinePDF #:nodoc: all
|
|
15
19
|
## file version.
|
16
20
|
########################################################
|
17
21
|
|
22
|
+
#@private
|
23
|
+
#:nodoc: all
|
18
24
|
class PDFParser
|
19
25
|
# LITERAL_STRING_REPLACEMENT_HASH = {
|
20
26
|
# 110 => 10, # "\\n".bytes = [92, 110] "\n".ord = 10
|
@@ -4,6 +4,11 @@
|
|
4
4
|
## this file is part of the CombinePDF library and the code
|
5
5
|
## is subject to the same license.
|
6
6
|
########################################################
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
|
7
12
|
module CombinePDF
|
8
13
|
#######################################################
|
9
14
|
# PDF class is the PDF object that can save itself to
|
@@ -26,8 +31,19 @@ module CombinePDF
|
|
26
31
|
# pdf.pages.each {|page| page << stamp_page} # notice the << operator is on a page and not a PDF object.
|
27
32
|
#######################################################
|
28
33
|
class PDF
|
29
|
-
|
34
|
+
# the objects attribute is an Array containing all the PDF sub-objects for te class.
|
35
|
+
attr_reader :objects
|
36
|
+
# the info attribute is a Hash that sets the Info data for the PDF.
|
37
|
+
# use, for example:
|
38
|
+
# pdf.info[:Title] = "title"
|
39
|
+
attr_reader :info
|
40
|
+
# sets the string output format (PDF files store strings in to type of formats).
|
41
|
+
#
|
42
|
+
# Accepts:
|
43
|
+
# - :literal
|
44
|
+
# - :hex
|
30
45
|
attr_accessor :string_output
|
46
|
+
# A Float attrinute, setting and returning the PDF version of the file (1.1-1.7).
|
31
47
|
attr_accessor :version
|
32
48
|
def initialize (*args)
|
33
49
|
# default before setting
|
@@ -167,7 +183,8 @@ module CombinePDF
|
|
167
183
|
#
|
168
184
|
# pdf << CombinePDF.new "second_file.pdf"
|
169
185
|
#
|
170
|
-
# pdf.save "both_files_merged.pdf"
|
186
|
+
# pdf.save "both_files_merged.pdf"
|
187
|
+
# @params obj is Hash, PDF or Array of parsed PDF data.
|
171
188
|
def << (obj)
|
172
189
|
#########
|
173
190
|
## how should we add data to PDF?
|
@@ -201,6 +218,8 @@ module CombinePDF
|
|
201
218
|
end
|
202
219
|
end
|
203
220
|
class PDF #:nodoc: all
|
221
|
+
|
222
|
+
# @private
|
204
223
|
# this function returns all the Page objects - regardless of order and even if not cataloged
|
205
224
|
# could be used for finding "lost" pages... but actually rather useless.
|
206
225
|
def all_pages
|
@@ -209,6 +228,7 @@ module CombinePDF
|
|
209
228
|
## referenced items and be reached through the connections.
|
210
229
|
[].tap {|out| each_object {|obj| out << obj if obj.is_a?(Hash) && obj[:Type] == :Page } }
|
211
230
|
end
|
231
|
+
# @private
|
212
232
|
def serialize_objects_and_references(object = nil)
|
213
233
|
warn "connecting objects with their references (serialize_objects_and_references)."
|
214
234
|
|
@@ -250,6 +270,7 @@ module CombinePDF
|
|
250
270
|
# end
|
251
271
|
|
252
272
|
end
|
273
|
+
# @private
|
253
274
|
def renumber_object_ids(start = nil)
|
254
275
|
warn "Resetting Object Reference IDs"
|
255
276
|
@set_start_id ||= start
|
@@ -262,6 +283,7 @@ module CombinePDF
|
|
262
283
|
warn "Finished serializing IDs"
|
263
284
|
end
|
264
285
|
|
286
|
+
# @private
|
265
287
|
def references(indirect_reference_id = nil, indirect_generation_number = nil)
|
266
288
|
ref = {indirect_reference_id: indirect_reference_id, indirect_generation_number: indirect_generation_number}
|
267
289
|
out = []
|
@@ -276,9 +298,11 @@ module CombinePDF
|
|
276
298
|
end
|
277
299
|
out
|
278
300
|
end
|
301
|
+
# @private
|
279
302
|
def all_indirect_object
|
280
303
|
[].tap {|out| @objects.each {|obj| out << obj if (obj.is_a?(Hash) && obj[:is_reference_only].nil?) } }
|
281
304
|
end
|
305
|
+
# @private
|
282
306
|
def sort_objects_by_id
|
283
307
|
@objects.sort! do |a,b|
|
284
308
|
if a.is_a?(Hash) && a[:indirect_reference_id] && a[:is_reference_only].nil? && b.is_a?(Hash) && b[:indirect_reference_id] && b[:is_reference_only].nil?
|
@@ -288,6 +312,7 @@ module CombinePDF
|
|
288
312
|
end
|
289
313
|
end
|
290
314
|
|
315
|
+
# @private
|
291
316
|
def add_referenced(object)
|
292
317
|
# add references but not root
|
293
318
|
case
|
@@ -308,6 +333,7 @@ module CombinePDF
|
|
308
333
|
end
|
309
334
|
end
|
310
335
|
end
|
336
|
+
# @private
|
311
337
|
def rebuild_catalog(*with_pages)
|
312
338
|
##########################
|
313
339
|
## Test-Run - How is that done?
|
@@ -348,6 +374,8 @@ module CombinePDF
|
|
348
374
|
|
349
375
|
catalog_object
|
350
376
|
end
|
377
|
+
|
378
|
+
# @private
|
351
379
|
# this is an alternative to the rebuild_catalog catalog method
|
352
380
|
# this method is used by the to_pdf method, for streamlining the PDF output.
|
353
381
|
# there is no point is calling the method before preparing the output.
|
@@ -360,6 +388,7 @@ module CombinePDF
|
|
360
388
|
catalog
|
361
389
|
end
|
362
390
|
|
391
|
+
# @private
|
363
392
|
# disabled, don't use. simpley returns true.
|
364
393
|
def rebuild_resources
|
365
394
|
|
@@ -402,10 +431,13 @@ module CombinePDF
|
|
402
431
|
# rebuild stream lengths?
|
403
432
|
end
|
404
433
|
|
434
|
+
# @private
|
405
435
|
# run block of code on evey object (Hash)
|
406
436
|
def each_object(&block)
|
407
437
|
PDFOperations._each_object(@objects, &block)
|
408
438
|
end
|
439
|
+
|
440
|
+
# @private
|
409
441
|
# the function rerturns true if the reference belongs to the object
|
410
442
|
def compare_reference_values(obj, ref)
|
411
443
|
if obj[:referenced_object] && ref[:referenced_object]
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: combine_pdf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Boaz Segev
|
@@ -17,14 +17,14 @@ dependencies:
|
|
17
17
|
requirements:
|
18
18
|
- - ">="
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version:
|
20
|
+
version: 0.1.5
|
21
21
|
type: :runtime
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
25
|
- - ">="
|
26
26
|
- !ruby/object:Gem::Version
|
27
|
-
version:
|
27
|
+
version: 0.1.5
|
28
28
|
description: A nifty gem, in pure Ruby, to parse PDF files and combine (merge) them
|
29
29
|
with other PDF files, watermark them or stamp them (all using the PDF file format).
|
30
30
|
email: bsegev@gmail.com
|