combine_pdf 0.2.30 → 0.2.31
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -1
- data/lib/combine_pdf/parser.rb +1 -1
- data/lib/combine_pdf/pdf_protected.rb +62 -97
- data/lib/combine_pdf/renderer.rb +3 -3
- data/lib/combine_pdf/version.rb +1 -1
- data/test/automated +16 -4
- metadata +2 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 63b0c324e1bf003b0c0fc963eb2071b6c4672c18
|
4
|
+
data.tar.gz: 34c200edda06074773888c098b9d4f9a6479d752
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ebf1cd2a7c1077f71d6f41037f0ad341c06e6bbb305bfa030833956feb0bb80576e2eaa5fd7324000ff95834f8c125cafc40a782ac97c16c7e3c7e7c02166794
|
7
|
+
data.tar.gz: a4cc257441939fbc0dd59dffe2d3faf11ade63b89fa2637bee8b54df7ea9e31a2ec5612d366ec9208856d750deac8fe10f1ddcaab6f9bf21996e6aa52c775e90
|
data/CHANGELOG.md
CHANGED
@@ -2,9 +2,21 @@
|
|
2
2
|
|
3
3
|
***
|
4
4
|
|
5
|
+
Change log v.0.2.31
|
6
|
+
|
7
|
+
**Broke**: Broke the fix for issue #65 so that Radio buttons data might be lost... working on a fix.
|
8
|
+
|
9
|
+
**Fix**: Fixed issue #82 (reintroduction of issue #19 due to core engine rewrite) related to a workaround for an issue with AcrobatReader. Credit to @gyuchang for testing and helping with the fix.
|
10
|
+
|
11
|
+
**Merge**: Merged pull request #80, fixing an issue with byte decoding. Credit to @gyuchang for the PR.
|
12
|
+
|
13
|
+
**Performance**: Improved performance for the reference and duplicate object resolution. Credit to @gyuchang for pointing some optimization options.
|
14
|
+
|
15
|
+
***
|
16
|
+
|
5
17
|
Change log v.0.2.30
|
6
18
|
|
7
|
-
**Fix**: Fixed an issue where HTTP artifacts before the beginning of a PDF file / string would prevent the PDF from being parsed. This
|
19
|
+
**Fix**: Fixed an issue where HTTP artifacts before the beginning of a PDF file / string would prevent the PDF from being parsed. This should fix issue #78 reported by @robvitaro.
|
8
20
|
|
9
21
|
***
|
10
22
|
|
data/lib/combine_pdf/parser.rb
CHANGED
@@ -200,7 +200,7 @@ module CombinePDF
|
|
200
200
|
# instead, a non-strict RegExp is used:
|
201
201
|
str = @scanner.scan_until(/endstream/)
|
202
202
|
# raise error if the stream doesn't end.
|
203
|
-
raise "Parsing Error: PDF file error - a stream object wasn't properly
|
203
|
+
raise "Parsing Error: PDF file error - a stream object wasn't properly closed using 'endstream'!" unless str
|
204
204
|
# need to remove end of stream
|
205
205
|
if out.last.is_a? Hash
|
206
206
|
# out.last[:raw_stream_content] = str[0...-10] #cuts only one EON char (\n or \r)
|
@@ -19,84 +19,42 @@ module CombinePDF
|
|
19
19
|
# this function adds the references contained in `@objects`.
|
20
20
|
#
|
21
21
|
# this is used for internal operations, such as injectng data using the << operator.
|
22
|
-
def add_referenced
|
22
|
+
def add_referenced(should_resolve = [])
|
23
23
|
# add references but not root
|
24
|
-
should_resolve = @objects.dup
|
25
24
|
dup_pages = nil
|
26
|
-
|
25
|
+
# an existing object map
|
26
|
+
resolved = {}.dup
|
27
|
+
existing = {}.dup
|
28
|
+
@objects.each { |obj| existing[obj] = obj }
|
29
|
+
# loop until should_resolve is empty
|
27
30
|
while should_resolve.any?
|
28
31
|
obj = should_resolve.pop
|
32
|
+
next if resolved[obj.object_id] # the object exists
|
29
33
|
if obj.is_a?(Hash)
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
tmp = @objects.find_index(obj[:referenced_object])
|
34
|
+
referenced = obj[:referenced_object]
|
35
|
+
if referenced && referenced.any?
|
36
|
+
tmp = resolved[referenced.object_id] || existing[referenced]
|
34
37
|
if tmp
|
35
|
-
tmp = @objects[tmp]
|
36
38
|
obj[:referenced_object] = tmp
|
37
39
|
else
|
38
|
-
|
39
|
-
|
40
|
-
|
40
|
+
resolved[obj.object_id] = referenced
|
41
|
+
existing[referenced] = referenced
|
42
|
+
should_resolve << referenced
|
43
|
+
@objects << referenced
|
41
44
|
end
|
42
45
|
else
|
43
|
-
|
46
|
+
resolved[obj.object_id] = obj
|
47
|
+
obj.keys.each { |k| should_resolve << obj[k] unless !obj[k].is_a?(Enumerable) || resolved[obj[k].object_id] }
|
44
48
|
end
|
45
49
|
elsif obj.is_a?(Array)
|
46
|
-
|
47
|
-
resolved << obj.object_id
|
50
|
+
resolved[obj.object_id] = obj
|
48
51
|
should_resolve.concat obj
|
49
52
|
end
|
50
53
|
end
|
51
54
|
resolved.clear
|
55
|
+
existing.clear
|
52
56
|
end
|
53
57
|
|
54
|
-
# # @private
|
55
|
-
# # Some PDF objects contain references to other PDF objects.
|
56
|
-
# #
|
57
|
-
# # this function adds the references contained in "object", but DOESN'T add the object itself.
|
58
|
-
# #
|
59
|
-
# # this is used for internal operations, such as injectng data using the << operator.
|
60
|
-
# def add_referenced(object, dup_pages = true)
|
61
|
-
# # add references but not root
|
62
|
-
# if object.is_a?(Array)
|
63
|
-
# object.each { |it| add_referenced(it, dup_pages) }
|
64
|
-
# return true
|
65
|
-
# elsif object.is_a?(Hash)
|
66
|
-
# # first if statement is actually a workaround for a bug in Acrobat Reader, regarding duplicate pages.
|
67
|
-
# if dup_pages && object[:is_reference_only] && object[:referenced_object] && object[:referenced_object].is_a?(Hash) && object[:referenced_object][:Type] == :Page
|
68
|
-
# if @objects.find_index object[:referenced_object]
|
69
|
-
# @objects << (object[:referenced_object] = object[:referenced_object].dup)
|
70
|
-
# else
|
71
|
-
# @objects << object[:referenced_object]
|
72
|
-
# end
|
73
|
-
# elsif object[:is_reference_only] && object[:referenced_object]
|
74
|
-
# found_at = @objects.find_index object[:referenced_object]
|
75
|
-
# if found_at
|
76
|
-
# # if the objects are equal, they might still be different objects!
|
77
|
-
# # so, we need to make sure they are the same object for the pointers to effect id numbering
|
78
|
-
# # and formatting operations.
|
79
|
-
# object[:referenced_object] = @objects[found_at]
|
80
|
-
# # stop this path, there is no need to run over the Hash's keys and values
|
81
|
-
# return true
|
82
|
-
# else
|
83
|
-
# # stop if page propegation is false
|
84
|
-
# return true if !dup_pages && object[:referenced_object][:Type] == :Page
|
85
|
-
# # @objects.include? object[:referenced_object] is bound to be false
|
86
|
-
# # the object wasn't found - add it to the @objects array
|
87
|
-
# @objects << object[:referenced_object]
|
88
|
-
# end
|
89
|
-
#
|
90
|
-
# end
|
91
|
-
# object.each do |k, v|
|
92
|
-
# add_referenced(v, dup_pages) unless RECORSIVE_PROTECTION[k]
|
93
|
-
# end
|
94
|
-
# else
|
95
|
-
# return false
|
96
|
-
# end
|
97
|
-
# true
|
98
|
-
# end
|
99
|
-
|
100
58
|
# @private
|
101
59
|
def rebuild_catalog(*with_pages)
|
102
60
|
# # build page list v.1 Slow but WORKS
|
@@ -113,38 +71,62 @@ module CombinePDF
|
|
113
71
|
# add pages to catalog, if requested
|
114
72
|
page_list.concat(with_pages) unless with_pages.empty?
|
115
73
|
|
74
|
+
# duplicate any non-unique pages - This is a special case to resolve Adobe Acrobat Reader issues (see issues #19 and #81)
|
75
|
+
uniqueness = {}.dup
|
76
|
+
page_list.each { |page| page = page.dup if uniqueness[page.object_id]; uniqueness[page.object_id] = page }
|
77
|
+
page_list.clear
|
78
|
+
page_list = uniqueness.values
|
79
|
+
uniqueness.clear
|
80
|
+
|
116
81
|
# build new Pages object
|
117
|
-
|
82
|
+
page_object_kids = [].dup
|
83
|
+
pages_object = { Type: :Pages, Count: page_list.length, Kids: page_object_kids }
|
84
|
+
pages_object_reference = { referenced_object: pages_object, is_reference_only: true }
|
85
|
+
page_list.each { |pg| pg[:Parent] = pages_object_reference; page_object_kids << ({ referenced_object: pg, is_reference_only: true }) }
|
118
86
|
|
119
87
|
# rebuild/rename the names dictionary
|
120
88
|
rebuild_names
|
121
89
|
# build new Catalog object
|
122
90
|
catalog_object = { Type: :Catalog,
|
123
|
-
Pages: { referenced_object: pages_object, is_reference_only: true }
|
124
|
-
|
125
|
-
Outlines: { referenced_object: @outlines, is_reference_only: true } }
|
91
|
+
Pages: { referenced_object: pages_object, is_reference_only: true } }
|
92
|
+
# pages_object[:Parent] = { referenced_object: catalog_object, is_reference_only: true } # causes AcrobatReader to fail
|
126
93
|
catalog_object[:ViewerPreferences] = @viewer_preferences unless @viewer_preferences.empty?
|
127
94
|
|
128
|
-
# rebuild/rename the forms dictionary
|
129
|
-
if @forms_data.nil? || @forms_data.empty?
|
130
|
-
@forms_data = nil
|
131
|
-
else
|
132
|
-
@forms_data = { referenced_object: (@forms_data[:referenced_object] || @forms_data), is_reference_only: true }
|
133
|
-
catalog_object[:AcroForm] = @forms_data
|
134
|
-
end
|
135
|
-
|
136
95
|
# point old Pages pointers to new Pages object
|
137
96
|
## first point known pages objects - enough?
|
138
97
|
pages.each { |p| p[:Parent] = { referenced_object: pages_object, is_reference_only: true } }
|
139
98
|
## or should we, go over structure? (fails)
|
140
99
|
# each_object {|obj| obj[:Parent][:referenced_object] = pages_object if obj.is_a?(Hash) && obj[:Parent].is_a?(Hash) && obj[:Parent][:referenced_object] && obj[:Parent][:referenced_object][:Type] == :Pages}
|
141
100
|
|
142
|
-
# remove old catalog and pages objects
|
143
|
-
@objects.reject! { |obj| obj.is_a?(Hash) && (obj[:Type] == :Catalog || obj[:Type] == :Pages) }
|
101
|
+
# # remove old catalog and pages objects
|
102
|
+
# @objects.reject! { |obj| obj.is_a?(Hash) && (obj[:Type] == :Catalog || obj[:Type] == :Pages) }
|
103
|
+
# remove old objects list and trees
|
104
|
+
@objects.clear
|
144
105
|
|
145
106
|
# inject new catalog and pages objects
|
146
|
-
@objects <<
|
107
|
+
@objects << @info if @info
|
147
108
|
@objects << catalog_object
|
109
|
+
@objects << pages_object
|
110
|
+
|
111
|
+
# rebuild/rename the forms dictionary
|
112
|
+
if @forms_data.nil? || @forms_data.empty?
|
113
|
+
@forms_data = nil
|
114
|
+
else
|
115
|
+
@forms_data = { referenced_object: (@forms_data[:referenced_object] || @forms_data), is_reference_only: true }
|
116
|
+
catalog_object[:AcroForm] = @forms_data
|
117
|
+
@objects << @forms_data[:referenced_object]
|
118
|
+
end
|
119
|
+
|
120
|
+
# add the names dictionary
|
121
|
+
if @names && @names.length > 1
|
122
|
+
@objects << @names
|
123
|
+
catalog_object[:Names] = { referenced_object: @names, is_reference_only: true }
|
124
|
+
end
|
125
|
+
# add the outlines dictionary
|
126
|
+
if @outlines && @outlines.any?
|
127
|
+
@objects << @outlines
|
128
|
+
catalog_object[:Outlines] = { referenced_object: @outlines, is_reference_only: true }
|
129
|
+
end
|
148
130
|
|
149
131
|
catalog_object
|
150
132
|
end
|
@@ -166,26 +148,9 @@ module CombinePDF
|
|
166
148
|
# there is no point is calling the method before preparing the output.
|
167
149
|
def rebuild_catalog_and_objects
|
168
150
|
catalog = rebuild_catalog
|
169
|
-
@objects
|
170
|
-
@objects << @info
|
171
|
-
@objects << catalog
|
172
|
-
# fix Acrobat Reader issue with page reference uniqueness (must be unique or older Acrobat Reader fails)
|
173
|
-
catalog[:Pages][:referenced_object][:Kids].each do |page|
|
174
|
-
tmp = page[:referenced_object]
|
175
|
-
tmp = page[:referenced_object] = tmp.dup if @objects.include? tmp
|
176
|
-
@objects << tmp
|
177
|
-
end
|
151
|
+
page_objects = catalog[:Pages][:referenced_object][:Kids].map { |e| @objects << e[:referenced_object]; e[:referenced_object] }
|
178
152
|
# adds every referenced object to the @objects (root), addition is performed as pointers rather then copies
|
179
|
-
|
180
|
-
add_referenced
|
181
|
-
# end)
|
182
|
-
# @objects << @info
|
183
|
-
# add_referenced @info
|
184
|
-
# add_referenced catalog
|
185
|
-
# add_referenced catalog[:Pages]
|
186
|
-
# add_referenced catalog[:Names], false
|
187
|
-
# add_referenced catalog[:Outlines], false
|
188
|
-
# add_referenced catalog[:AcroForm], false
|
153
|
+
add_referenced([page_objects, @forms_data, @names, @outlines, @info])
|
189
154
|
catalog
|
190
155
|
end
|
191
156
|
|
@@ -304,9 +269,9 @@ module CombinePDF
|
|
304
269
|
# parent - the outline base node of the resulting merged outline
|
305
270
|
# FIXME implement the possibility to insert somewhere in the middle of the outline
|
306
271
|
prev = nil
|
307
|
-
pos = first = actual_object((
|
308
|
-
last = actual_object((
|
309
|
-
median = { is_reference_only: true, referenced_object: actual_object((
|
272
|
+
pos = first = actual_object((position.nonzero? ? old_data : new_data)[:First])
|
273
|
+
last = actual_object((position.nonzero? ? new_data : old_data)[:Last])
|
274
|
+
median = { is_reference_only: true, referenced_object: actual_object((position.nonzero? ? new_data : old_data)[:First]) }
|
310
275
|
old_data[:First] = { is_reference_only: true, referenced_object: first }
|
311
276
|
old_data[:Last] = { is_reference_only: true, referenced_object: last }
|
312
277
|
parent = { is_reference_only: true, referenced_object: old_data }
|
data/lib/combine_pdf/renderer.rb
CHANGED
@@ -21,7 +21,7 @@ module CombinePDF
|
|
21
21
|
elsif object.is_a?(Array)
|
22
22
|
return format_array_to_pdf object
|
23
23
|
elsif object.is_a?(Fixnum) || object.is_a?(Float) || object.is_a?(TrueClass) || object.is_a?(FalseClass)
|
24
|
-
return object.to_s
|
24
|
+
return object.to_s
|
25
25
|
elsif object.is_a?(Hash)
|
26
26
|
return format_hash_to_pdf object
|
27
27
|
else
|
@@ -33,12 +33,12 @@ module CombinePDF
|
|
33
33
|
"\x0D" => '\\r',
|
34
34
|
"\x09" => '\\t',
|
35
35
|
"\x08" => '\\b',
|
36
|
-
"\
|
36
|
+
"\x0C" => '\\f', # form-feed (\f) == 0x0C
|
37
37
|
"\x28" => '\\(',
|
38
38
|
"\x29" => '\\)',
|
39
39
|
"\x5C" => '\\\\' }.dup
|
40
40
|
32.times { |i| STRING_REPLACEMENT_HASH[i.chr] ||= "\\#{i}" }
|
41
|
-
(256 -
|
41
|
+
(256 - 127).times { |i| STRING_REPLACEMENT_HASH[(i + 127).chr] ||= "\\#{i + 127}" }
|
42
42
|
|
43
43
|
def format_string_to_pdf(object)
|
44
44
|
# object.force_encoding(Encoding::ASCII_8BIT)
|
data/lib/combine_pdf/version.rb
CHANGED
data/test/automated
CHANGED
@@ -28,15 +28,16 @@ pdf = CombinePDF.load './Ruby/test pdfs/names_go_haywire_0.pdf'
|
|
28
28
|
pdf << CombinePDF.load('./Ruby/test pdfs/names_go_haywire_1.pdf')
|
29
29
|
pdf.save '04_check_view_and_names_reference.pdf'
|
30
30
|
|
31
|
-
|
31
|
+
pdf = CombinePDF.load('./Ruby/test pdfs/outlines/self_merge_err.pdf')
|
32
|
+
pdf.save '05_x1_scribus_test.pdf'
|
32
33
|
pdf = CombinePDF.load('./Ruby/test pdfs/outlines/self_merge_err.pdf')
|
33
34
|
pdf << CombinePDF.load('./Ruby/test pdfs/outlines/self_merge_err.pdf')
|
34
|
-
pdf.save '
|
35
|
+
pdf.save '05_x2_scribus_test.pdf'
|
35
36
|
# pdf = CombinePDF.load "./Ruby/test pdfs/named_dest.pdf";nil
|
36
37
|
# pdf.save '05_check_named_dest_links.pdf' # this will take a while
|
37
38
|
# pdf = CombinePDF.load "./Ruby/test pdfs/named_dest.pdf";nil
|
38
|
-
|
39
|
-
|
39
|
+
pdf << CombinePDF.load('./Ruby/test pdfs/named_dest.pdf'); nil
|
40
|
+
pdf.save '05_1_timeless_check_named_dest_links.pdf' # never ends... :-(
|
40
41
|
|
41
42
|
pdf = CombinePDF.load './Ruby/test pdfs/outline_small.pdf'
|
42
43
|
pdf << CombinePDF.load('./Ruby/test pdfs/outline_small.pdf')
|
@@ -55,6 +56,17 @@ CombinePDF.load("./Ruby/test\ pdfs/Scribus-unknown_err2.pdf").save '08_2-unknown
|
|
55
56
|
CombinePDF.load("./Ruby/test\ pdfs/Scribus-unknown_err3.pdf").save '08_3-unknown-err-empty-str.pdf'
|
56
57
|
|
57
58
|
CombinePDF.load("/Users/2Be/Ruby/test\ pdfs/nil_object.pdf").save('09_nil_in_parsed_array.pdf')
|
59
|
+
|
60
|
+
require 'prawn'
|
61
|
+
IO.binwrite '10_prawn.pdf', (Prawn::Document.new { text 'Hello World!' }).render
|
62
|
+
page = CombinePDF.parse((Prawn::Document.new { text 'Hello World!' }).render)
|
63
|
+
pdf = CombinePDF.new
|
64
|
+
pdf << page
|
65
|
+
pdf.save '10_parsed_from_prawn.pdf'
|
66
|
+
pdf = CombinePDF.new
|
67
|
+
pdf << page << page
|
68
|
+
pdf.save('10_AcrobatReader_is_unique_page.pdf')
|
69
|
+
|
58
70
|
# unify = [
|
59
71
|
# "./Ruby/test\ pdfs/AESv2\ encrypted.pdf",
|
60
72
|
# "./Ruby/test\ pdfs/data-in-comment.pdf",
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: combine_pdf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.31
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Boaz Segev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-08-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-rc4
|
@@ -111,4 +111,3 @@ test_files:
|
|
111
111
|
- test/automated
|
112
112
|
- test/console
|
113
113
|
- test/named_dest
|
114
|
-
has_rdoc:
|