combine_pdf 0.2.30 → 0.2.31
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +13 -1
- data/lib/combine_pdf/parser.rb +1 -1
- data/lib/combine_pdf/pdf_protected.rb +62 -97
- data/lib/combine_pdf/renderer.rb +3 -3
- data/lib/combine_pdf/version.rb +1 -1
- data/test/automated +16 -4
- metadata +2 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 63b0c324e1bf003b0c0fc963eb2071b6c4672c18
|
4
|
+
data.tar.gz: 34c200edda06074773888c098b9d4f9a6479d752
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ebf1cd2a7c1077f71d6f41037f0ad341c06e6bbb305bfa030833956feb0bb80576e2eaa5fd7324000ff95834f8c125cafc40a782ac97c16c7e3c7e7c02166794
|
7
|
+
data.tar.gz: a4cc257441939fbc0dd59dffe2d3faf11ade63b89fa2637bee8b54df7ea9e31a2ec5612d366ec9208856d750deac8fe10f1ddcaab6f9bf21996e6aa52c775e90
|
data/CHANGELOG.md
CHANGED
@@ -2,9 +2,21 @@
|
|
2
2
|
|
3
3
|
***
|
4
4
|
|
5
|
+
Change log v.0.2.31
|
6
|
+
|
7
|
+
**Broke**: Broke the fix for issue #65 so that Radio buttons data might be lost... working on a fix.
|
8
|
+
|
9
|
+
**Fix**: Fixed issue #82 (reintroduction of issue #19 due to core engine rewrite) related to a workaround for an issue with AcrobatReader. Credit to @gyuchang for testing and helping with the fix.
|
10
|
+
|
11
|
+
**Merge**: Merged pull request #80, fixing an issue with byte decoding. Credit to @gyuchang for the PR.
|
12
|
+
|
13
|
+
**Performance**: Improved performance for the reference and duplicate object resolution. Credit to @gyuchang for pointing some optimization options.
|
14
|
+
|
15
|
+
***
|
16
|
+
|
5
17
|
Change log v.0.2.30
|
6
18
|
|
7
|
-
**Fix**: Fixed an issue where HTTP artifacts before the beginning of a PDF file / string would prevent the PDF from being parsed. This
|
19
|
+
**Fix**: Fixed an issue where HTTP artifacts before the beginning of a PDF file / string would prevent the PDF from being parsed. This should fix issue #78 reported by @robvitaro.
|
8
20
|
|
9
21
|
***
|
10
22
|
|
data/lib/combine_pdf/parser.rb
CHANGED
@@ -200,7 +200,7 @@ module CombinePDF
|
|
200
200
|
# instead, a non-strict RegExp is used:
|
201
201
|
str = @scanner.scan_until(/endstream/)
|
202
202
|
# raise error if the stream doesn't end.
|
203
|
-
raise "Parsing Error: PDF file error - a stream object wasn't properly
|
203
|
+
raise "Parsing Error: PDF file error - a stream object wasn't properly closed using 'endstream'!" unless str
|
204
204
|
# need to remove end of stream
|
205
205
|
if out.last.is_a? Hash
|
206
206
|
# out.last[:raw_stream_content] = str[0...-10] #cuts only one EON char (\n or \r)
|
@@ -19,84 +19,42 @@ module CombinePDF
|
|
19
19
|
# this function adds the references contained in `@objects`.
|
20
20
|
#
|
21
21
|
# this is used for internal operations, such as injectng data using the << operator.
|
22
|
-
def add_referenced
|
22
|
+
def add_referenced(should_resolve = [])
|
23
23
|
# add references but not root
|
24
|
-
should_resolve = @objects.dup
|
25
24
|
dup_pages = nil
|
26
|
-
|
25
|
+
# an existing object map
|
26
|
+
resolved = {}.dup
|
27
|
+
existing = {}.dup
|
28
|
+
@objects.each { |obj| existing[obj] = obj }
|
29
|
+
# loop until should_resolve is empty
|
27
30
|
while should_resolve.any?
|
28
31
|
obj = should_resolve.pop
|
32
|
+
next if resolved[obj.object_id] # the object exists
|
29
33
|
if obj.is_a?(Hash)
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
tmp = @objects.find_index(obj[:referenced_object])
|
34
|
+
referenced = obj[:referenced_object]
|
35
|
+
if referenced && referenced.any?
|
36
|
+
tmp = resolved[referenced.object_id] || existing[referenced]
|
34
37
|
if tmp
|
35
|
-
tmp = @objects[tmp]
|
36
38
|
obj[:referenced_object] = tmp
|
37
39
|
else
|
38
|
-
|
39
|
-
|
40
|
-
|
40
|
+
resolved[obj.object_id] = referenced
|
41
|
+
existing[referenced] = referenced
|
42
|
+
should_resolve << referenced
|
43
|
+
@objects << referenced
|
41
44
|
end
|
42
45
|
else
|
43
|
-
|
46
|
+
resolved[obj.object_id] = obj
|
47
|
+
obj.keys.each { |k| should_resolve << obj[k] unless !obj[k].is_a?(Enumerable) || resolved[obj[k].object_id] }
|
44
48
|
end
|
45
49
|
elsif obj.is_a?(Array)
|
46
|
-
|
47
|
-
resolved << obj.object_id
|
50
|
+
resolved[obj.object_id] = obj
|
48
51
|
should_resolve.concat obj
|
49
52
|
end
|
50
53
|
end
|
51
54
|
resolved.clear
|
55
|
+
existing.clear
|
52
56
|
end
|
53
57
|
|
54
|
-
# # @private
|
55
|
-
# # Some PDF objects contain references to other PDF objects.
|
56
|
-
# #
|
57
|
-
# # this function adds the references contained in "object", but DOESN'T add the object itself.
|
58
|
-
# #
|
59
|
-
# # this is used for internal operations, such as injectng data using the << operator.
|
60
|
-
# def add_referenced(object, dup_pages = true)
|
61
|
-
# # add references but not root
|
62
|
-
# if object.is_a?(Array)
|
63
|
-
# object.each { |it| add_referenced(it, dup_pages) }
|
64
|
-
# return true
|
65
|
-
# elsif object.is_a?(Hash)
|
66
|
-
# # first if statement is actually a workaround for a bug in Acrobat Reader, regarding duplicate pages.
|
67
|
-
# if dup_pages && object[:is_reference_only] && object[:referenced_object] && object[:referenced_object].is_a?(Hash) && object[:referenced_object][:Type] == :Page
|
68
|
-
# if @objects.find_index object[:referenced_object]
|
69
|
-
# @objects << (object[:referenced_object] = object[:referenced_object].dup)
|
70
|
-
# else
|
71
|
-
# @objects << object[:referenced_object]
|
72
|
-
# end
|
73
|
-
# elsif object[:is_reference_only] && object[:referenced_object]
|
74
|
-
# found_at = @objects.find_index object[:referenced_object]
|
75
|
-
# if found_at
|
76
|
-
# # if the objects are equal, they might still be different objects!
|
77
|
-
# # so, we need to make sure they are the same object for the pointers to effect id numbering
|
78
|
-
# # and formatting operations.
|
79
|
-
# object[:referenced_object] = @objects[found_at]
|
80
|
-
# # stop this path, there is no need to run over the Hash's keys and values
|
81
|
-
# return true
|
82
|
-
# else
|
83
|
-
# # stop if page propegation is false
|
84
|
-
# return true if !dup_pages && object[:referenced_object][:Type] == :Page
|
85
|
-
# # @objects.include? object[:referenced_object] is bound to be false
|
86
|
-
# # the object wasn't found - add it to the @objects array
|
87
|
-
# @objects << object[:referenced_object]
|
88
|
-
# end
|
89
|
-
#
|
90
|
-
# end
|
91
|
-
# object.each do |k, v|
|
92
|
-
# add_referenced(v, dup_pages) unless RECORSIVE_PROTECTION[k]
|
93
|
-
# end
|
94
|
-
# else
|
95
|
-
# return false
|
96
|
-
# end
|
97
|
-
# true
|
98
|
-
# end
|
99
|
-
|
100
58
|
# @private
|
101
59
|
def rebuild_catalog(*with_pages)
|
102
60
|
# # build page list v.1 Slow but WORKS
|
@@ -113,38 +71,62 @@ module CombinePDF
|
|
113
71
|
# add pages to catalog, if requested
|
114
72
|
page_list.concat(with_pages) unless with_pages.empty?
|
115
73
|
|
74
|
+
# duplicate any non-unique pages - This is a special case to resolve Adobe Acrobat Reader issues (see issues #19 and #81)
|
75
|
+
uniqueness = {}.dup
|
76
|
+
page_list.each { |page| page = page.dup if uniqueness[page.object_id]; uniqueness[page.object_id] = page }
|
77
|
+
page_list.clear
|
78
|
+
page_list = uniqueness.values
|
79
|
+
uniqueness.clear
|
80
|
+
|
116
81
|
# build new Pages object
|
117
|
-
|
82
|
+
page_object_kids = [].dup
|
83
|
+
pages_object = { Type: :Pages, Count: page_list.length, Kids: page_object_kids }
|
84
|
+
pages_object_reference = { referenced_object: pages_object, is_reference_only: true }
|
85
|
+
page_list.each { |pg| pg[:Parent] = pages_object_reference; page_object_kids << ({ referenced_object: pg, is_reference_only: true }) }
|
118
86
|
|
119
87
|
# rebuild/rename the names dictionary
|
120
88
|
rebuild_names
|
121
89
|
# build new Catalog object
|
122
90
|
catalog_object = { Type: :Catalog,
|
123
|
-
Pages: { referenced_object: pages_object, is_reference_only: true }
|
124
|
-
|
125
|
-
Outlines: { referenced_object: @outlines, is_reference_only: true } }
|
91
|
+
Pages: { referenced_object: pages_object, is_reference_only: true } }
|
92
|
+
# pages_object[:Parent] = { referenced_object: catalog_object, is_reference_only: true } # causes AcrobatReader to fail
|
126
93
|
catalog_object[:ViewerPreferences] = @viewer_preferences unless @viewer_preferences.empty?
|
127
94
|
|
128
|
-
# rebuild/rename the forms dictionary
|
129
|
-
if @forms_data.nil? || @forms_data.empty?
|
130
|
-
@forms_data = nil
|
131
|
-
else
|
132
|
-
@forms_data = { referenced_object: (@forms_data[:referenced_object] || @forms_data), is_reference_only: true }
|
133
|
-
catalog_object[:AcroForm] = @forms_data
|
134
|
-
end
|
135
|
-
|
136
95
|
# point old Pages pointers to new Pages object
|
137
96
|
## first point known pages objects - enough?
|
138
97
|
pages.each { |p| p[:Parent] = { referenced_object: pages_object, is_reference_only: true } }
|
139
98
|
## or should we, go over structure? (fails)
|
140
99
|
# each_object {|obj| obj[:Parent][:referenced_object] = pages_object if obj.is_a?(Hash) && obj[:Parent].is_a?(Hash) && obj[:Parent][:referenced_object] && obj[:Parent][:referenced_object][:Type] == :Pages}
|
141
100
|
|
142
|
-
# remove old catalog and pages objects
|
143
|
-
@objects.reject! { |obj| obj.is_a?(Hash) && (obj[:Type] == :Catalog || obj[:Type] == :Pages) }
|
101
|
+
# # remove old catalog and pages objects
|
102
|
+
# @objects.reject! { |obj| obj.is_a?(Hash) && (obj[:Type] == :Catalog || obj[:Type] == :Pages) }
|
103
|
+
# remove old objects list and trees
|
104
|
+
@objects.clear
|
144
105
|
|
145
106
|
# inject new catalog and pages objects
|
146
|
-
@objects <<
|
107
|
+
@objects << @info if @info
|
147
108
|
@objects << catalog_object
|
109
|
+
@objects << pages_object
|
110
|
+
|
111
|
+
# rebuild/rename the forms dictionary
|
112
|
+
if @forms_data.nil? || @forms_data.empty?
|
113
|
+
@forms_data = nil
|
114
|
+
else
|
115
|
+
@forms_data = { referenced_object: (@forms_data[:referenced_object] || @forms_data), is_reference_only: true }
|
116
|
+
catalog_object[:AcroForm] = @forms_data
|
117
|
+
@objects << @forms_data[:referenced_object]
|
118
|
+
end
|
119
|
+
|
120
|
+
# add the names dictionary
|
121
|
+
if @names && @names.length > 1
|
122
|
+
@objects << @names
|
123
|
+
catalog_object[:Names] = { referenced_object: @names, is_reference_only: true }
|
124
|
+
end
|
125
|
+
# add the outlines dictionary
|
126
|
+
if @outlines && @outlines.any?
|
127
|
+
@objects << @outlines
|
128
|
+
catalog_object[:Outlines] = { referenced_object: @outlines, is_reference_only: true }
|
129
|
+
end
|
148
130
|
|
149
131
|
catalog_object
|
150
132
|
end
|
@@ -166,26 +148,9 @@ module CombinePDF
|
|
166
148
|
# there is no point is calling the method before preparing the output.
|
167
149
|
def rebuild_catalog_and_objects
|
168
150
|
catalog = rebuild_catalog
|
169
|
-
@objects
|
170
|
-
@objects << @info
|
171
|
-
@objects << catalog
|
172
|
-
# fix Acrobat Reader issue with page reference uniqueness (must be unique or older Acrobat Reader fails)
|
173
|
-
catalog[:Pages][:referenced_object][:Kids].each do |page|
|
174
|
-
tmp = page[:referenced_object]
|
175
|
-
tmp = page[:referenced_object] = tmp.dup if @objects.include? tmp
|
176
|
-
@objects << tmp
|
177
|
-
end
|
151
|
+
page_objects = catalog[:Pages][:referenced_object][:Kids].map { |e| @objects << e[:referenced_object]; e[:referenced_object] }
|
178
152
|
# adds every referenced object to the @objects (root), addition is performed as pointers rather then copies
|
179
|
-
|
180
|
-
add_referenced
|
181
|
-
# end)
|
182
|
-
# @objects << @info
|
183
|
-
# add_referenced @info
|
184
|
-
# add_referenced catalog
|
185
|
-
# add_referenced catalog[:Pages]
|
186
|
-
# add_referenced catalog[:Names], false
|
187
|
-
# add_referenced catalog[:Outlines], false
|
188
|
-
# add_referenced catalog[:AcroForm], false
|
153
|
+
add_referenced([page_objects, @forms_data, @names, @outlines, @info])
|
189
154
|
catalog
|
190
155
|
end
|
191
156
|
|
@@ -304,9 +269,9 @@ module CombinePDF
|
|
304
269
|
# parent - the outline base node of the resulting merged outline
|
305
270
|
# FIXME implement the possibility to insert somewhere in the middle of the outline
|
306
271
|
prev = nil
|
307
|
-
pos = first = actual_object((
|
308
|
-
last = actual_object((
|
309
|
-
median = { is_reference_only: true, referenced_object: actual_object((
|
272
|
+
pos = first = actual_object((position.nonzero? ? old_data : new_data)[:First])
|
273
|
+
last = actual_object((position.nonzero? ? new_data : old_data)[:Last])
|
274
|
+
median = { is_reference_only: true, referenced_object: actual_object((position.nonzero? ? new_data : old_data)[:First]) }
|
310
275
|
old_data[:First] = { is_reference_only: true, referenced_object: first }
|
311
276
|
old_data[:Last] = { is_reference_only: true, referenced_object: last }
|
312
277
|
parent = { is_reference_only: true, referenced_object: old_data }
|
data/lib/combine_pdf/renderer.rb
CHANGED
@@ -21,7 +21,7 @@ module CombinePDF
|
|
21
21
|
elsif object.is_a?(Array)
|
22
22
|
return format_array_to_pdf object
|
23
23
|
elsif object.is_a?(Fixnum) || object.is_a?(Float) || object.is_a?(TrueClass) || object.is_a?(FalseClass)
|
24
|
-
return object.to_s
|
24
|
+
return object.to_s
|
25
25
|
elsif object.is_a?(Hash)
|
26
26
|
return format_hash_to_pdf object
|
27
27
|
else
|
@@ -33,12 +33,12 @@ module CombinePDF
|
|
33
33
|
"\x0D" => '\\r',
|
34
34
|
"\x09" => '\\t',
|
35
35
|
"\x08" => '\\b',
|
36
|
-
"\
|
36
|
+
"\x0C" => '\\f', # form-feed (\f) == 0x0C
|
37
37
|
"\x28" => '\\(',
|
38
38
|
"\x29" => '\\)',
|
39
39
|
"\x5C" => '\\\\' }.dup
|
40
40
|
32.times { |i| STRING_REPLACEMENT_HASH[i.chr] ||= "\\#{i}" }
|
41
|
-
(256 -
|
41
|
+
(256 - 127).times { |i| STRING_REPLACEMENT_HASH[(i + 127).chr] ||= "\\#{i + 127}" }
|
42
42
|
|
43
43
|
def format_string_to_pdf(object)
|
44
44
|
# object.force_encoding(Encoding::ASCII_8BIT)
|
data/lib/combine_pdf/version.rb
CHANGED
data/test/automated
CHANGED
@@ -28,15 +28,16 @@ pdf = CombinePDF.load './Ruby/test pdfs/names_go_haywire_0.pdf'
|
|
28
28
|
pdf << CombinePDF.load('./Ruby/test pdfs/names_go_haywire_1.pdf')
|
29
29
|
pdf.save '04_check_view_and_names_reference.pdf'
|
30
30
|
|
31
|
-
|
31
|
+
pdf = CombinePDF.load('./Ruby/test pdfs/outlines/self_merge_err.pdf')
|
32
|
+
pdf.save '05_x1_scribus_test.pdf'
|
32
33
|
pdf = CombinePDF.load('./Ruby/test pdfs/outlines/self_merge_err.pdf')
|
33
34
|
pdf << CombinePDF.load('./Ruby/test pdfs/outlines/self_merge_err.pdf')
|
34
|
-
pdf.save '
|
35
|
+
pdf.save '05_x2_scribus_test.pdf'
|
35
36
|
# pdf = CombinePDF.load "./Ruby/test pdfs/named_dest.pdf";nil
|
36
37
|
# pdf.save '05_check_named_dest_links.pdf' # this will take a while
|
37
38
|
# pdf = CombinePDF.load "./Ruby/test pdfs/named_dest.pdf";nil
|
38
|
-
|
39
|
-
|
39
|
+
pdf << CombinePDF.load('./Ruby/test pdfs/named_dest.pdf'); nil
|
40
|
+
pdf.save '05_1_timeless_check_named_dest_links.pdf' # never ends... :-(
|
40
41
|
|
41
42
|
pdf = CombinePDF.load './Ruby/test pdfs/outline_small.pdf'
|
42
43
|
pdf << CombinePDF.load('./Ruby/test pdfs/outline_small.pdf')
|
@@ -55,6 +56,17 @@ CombinePDF.load("./Ruby/test\ pdfs/Scribus-unknown_err2.pdf").save '08_2-unknown
|
|
55
56
|
CombinePDF.load("./Ruby/test\ pdfs/Scribus-unknown_err3.pdf").save '08_3-unknown-err-empty-str.pdf'
|
56
57
|
|
57
58
|
CombinePDF.load("/Users/2Be/Ruby/test\ pdfs/nil_object.pdf").save('09_nil_in_parsed_array.pdf')
|
59
|
+
|
60
|
+
require 'prawn'
|
61
|
+
IO.binwrite '10_prawn.pdf', (Prawn::Document.new { text 'Hello World!' }).render
|
62
|
+
page = CombinePDF.parse((Prawn::Document.new { text 'Hello World!' }).render)
|
63
|
+
pdf = CombinePDF.new
|
64
|
+
pdf << page
|
65
|
+
pdf.save '10_parsed_from_prawn.pdf'
|
66
|
+
pdf = CombinePDF.new
|
67
|
+
pdf << page << page
|
68
|
+
pdf.save('10_AcrobatReader_is_unique_page.pdf')
|
69
|
+
|
58
70
|
# unify = [
|
59
71
|
# "./Ruby/test\ pdfs/AESv2\ encrypted.pdf",
|
60
72
|
# "./Ruby/test\ pdfs/data-in-comment.pdf",
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: combine_pdf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.31
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Boaz Segev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-08-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-rc4
|
@@ -111,4 +111,3 @@ test_files:
|
|
111
111
|
- test/automated
|
112
112
|
- test/console
|
113
113
|
- test/named_dest
|
114
|
-
has_rdoc:
|