combine_pdf 0.2.7 → 0.2.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/README.md +1 -0
- data/lib/combine_pdf/page_methods.rb +9 -128
- data/lib/combine_pdf/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b5162b002d09ae2f68f9d9075cdaf3d426145dcf
|
4
|
+
data.tar.gz: 931f19e236ec1bdad706132fa54a916eca003d4b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 21eb86cceba04206594dcc87051eabb8af42b3217bb07484686837e4c024c81073864da715af5a8aa5a3b1095bbdc521b0ad02b8f87b6d4d579cde8d4037f614
|
7
|
+
data.tar.gz: 76e795a4209d9176c068ee0f158cc54150b6100a832daf4e53a676b686e20dfc350e909e4f0e08b5d289ed36c38ec0ab93b00d7b2289f95e0626c31a7c22544b
|
data/CHANGELOG.md
CHANGED
@@ -2,6 +2,20 @@
|
|
2
2
|
|
3
3
|
***
|
4
4
|
|
5
|
+
Change log v.0.2.8
|
6
|
+
|
7
|
+
* **Fix/Feature**:
|
8
|
+
|
9
|
+
Experience shows that it's very difficult to know when to use `page.copy` v.s. `page.copy(true)` before stamping one pdf pages on top (or under) another... So...
|
10
|
+
|
11
|
+
Now there is no longer any need for the guesswork. The process is automated for you.
|
12
|
+
|
13
|
+
The moment CombinePDF recognizes a resource name conflice between two pages (such as both pages using one font name to reference two different fonts), CombinePDF will intrusively rename the incoming page's resources.
|
14
|
+
|
15
|
+
It is true that the intrusive resource renaming is somewhat risky and might require the inflation of some comperssed page data (resulting in bigger file sizes), but this is the only way to attempt and prevent PDF data curruption.
|
16
|
+
|
17
|
+
***
|
18
|
+
|
5
19
|
Change log v.0.2.7
|
6
20
|
|
7
21
|
**Fix**: Fixed an issue where a malformed PDF String could cause the parser to hang.
|
data/README.md
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
# CombinePDF - the ruby way for merging PDF files
|
2
2
|
[![Gem Version](https://badge.fury.io/rb/combine_pdf.svg)](http://badge.fury.io/rb/combine_pdf)
|
3
|
+
[![GitHub](https://img.shields.io/badge/GitHub-Open%20Source-blue.svg)](https://github.com/boazsegev/combine_pdf)
|
3
4
|
|
4
5
|
CombinePDF is a nifty model, written in pure Ruby, to parse PDF files and combine (merge) them with other PDF files, watermark them or stamp them (all using the PDF file format and pure Ruby code).
|
5
6
|
|
@@ -55,7 +55,7 @@ module CombinePDF
|
|
55
55
|
|
56
56
|
raise TypeError, "couldn't inject data, expecting a PDF page (Hash type)" unless obj.is_a?(Page_Methods)
|
57
57
|
|
58
|
-
obj = obj.copy #obj.copy(secure_injection)
|
58
|
+
obj = obj.copy( should_secure?(obj) ) #obj.copy(secure_injection)
|
59
59
|
|
60
60
|
# following the reference chain and assigning a pointer to the correct Resouces object.
|
61
61
|
# (assignments of Strings, Arrays and Hashes are pointers in Ruby, unless the .dup method is called)
|
@@ -821,133 +821,14 @@ module CombinePDF
|
|
821
821
|
self
|
822
822
|
end
|
823
823
|
|
824
|
-
|
825
|
-
|
826
|
-
#
|
827
|
-
|
828
|
-
|
829
|
-
|
830
|
-
|
831
|
-
|
832
|
-
|
833
|
-
# # following the reference chain and assigning a pointer to the correct Resouces object.
|
834
|
-
# # (assignments of Strings, Arrays and Hashes are pointers in Ruby, unless the .dup method is called)
|
835
|
-
# page[:Resources] ||= {}
|
836
|
-
# original_resources = page[:Resources]
|
837
|
-
# if original_resources[:is_reference_only]
|
838
|
-
# original_resources = original_resources[:referenced_object]
|
839
|
-
# raise "Couldn't tap into resources dictionary, as it is a reference and isn't linked." unless original_resources
|
840
|
-
# end
|
841
|
-
# original_contents = page[:Contents]
|
842
|
-
# original_contents = [original_contents] unless original_contents.is_a? Array
|
843
|
-
|
844
|
-
# stream[:Resources] ||= {}
|
845
|
-
# stream_resources = stream[:Resources]
|
846
|
-
# if stream_resources[:is_reference_only]
|
847
|
-
# stream_resources = stream_resources[:referenced_object]
|
848
|
-
# raise "Couldn't tap into resources dictionary, as it is a reference and isn't linked." unless stream_resources
|
849
|
-
# end
|
850
|
-
# stream_contents = stream[:Contents]
|
851
|
-
# stream_contents = [stream_contents] unless stream_contents.is_a? Array
|
852
|
-
|
853
|
-
# # collect keys as objects - this is to make sure that
|
854
|
-
# # we are working on the actual resource data, rather then references
|
855
|
-
# flatten_resources_dictionaries stream_resources
|
856
|
-
# flatten_resources_dictionaries original_resources
|
857
|
-
|
858
|
-
# # injecting each of the values in the injected Page
|
859
|
-
# stream_resources.each do |key, new_val|
|
860
|
-
# unless PRIVATE_HASH_KEYS.include? key # keep CombinePDF structual data intact.
|
861
|
-
# if original_resources[key].nil?
|
862
|
-
# original_resources[key] = new_val
|
863
|
-
# elsif original_resources[key].is_a?(Hash) && new_val.is_a?(Hash)
|
864
|
-
# new_val.update original_resources[key] # make sure the old values are respected
|
865
|
-
# original_resources[key].update new_val # transfer old and new values to the injected page
|
866
|
-
# end #Do nothing if array - ot is the PROC array, which is an issue
|
867
|
-
# end
|
868
|
-
# end
|
869
|
-
# original_resources[:ProcSet] = [:PDF, :Text, :ImageB, :ImageC, :ImageI] # this was recommended by the ISO. 32000-1:2008
|
870
|
-
|
871
|
-
# if top # if this is a stamp (overlay)
|
872
|
-
# page[:Contents] = original_contents
|
873
|
-
# page[:Contents].unshift create_deep_copy(CONTENT_CONTAINER_START)
|
874
|
-
# page[:Contents].push create_deep_copy(CONTENT_CONTAINER_MIDDLE)
|
875
|
-
# page[:Contents].push *stream_contents
|
876
|
-
# page[:Contents].push create_deep_copy(CONTENT_CONTAINER_END)
|
877
|
-
# else #if this was a watermark (underlay? would be lost if the page was scanned, as white might not be transparent)
|
878
|
-
# page[:Contents] = stream_contents
|
879
|
-
# page[:Contents].unshift create_deep_copy(CONTENT_CONTAINER_START)
|
880
|
-
# page[:Contents].push create_deep_copy(CONTENT_CONTAINER_MIDDLE)
|
881
|
-
# page[:Contents].push *original_contents
|
882
|
-
# page[:Contents].push create_deep_copy(CONTENT_CONTAINER_END)
|
883
|
-
# end
|
884
|
-
|
885
|
-
# page
|
886
|
-
# end
|
887
|
-
# # copy_and_secure_for_injection(page)
|
888
|
-
# # - page is a page in the pages array, i.e.
|
889
|
-
# # pdf.pages[0]
|
890
|
-
# # takes a page object and:
|
891
|
-
# #
|
892
|
-
# # makes a deep copy of the page (Ruby defaults to pointers, so this will copy the memory).
|
893
|
-
# #
|
894
|
-
# # then it will rewrite the content stream with renamed resources, so as to avoid name conflicts.
|
895
|
-
# def copy_and_secure_for_injection(page)
|
896
|
-
# # copy page
|
897
|
-
# new_page = create_deep_copy page
|
898
|
-
|
899
|
-
# # initiate dictionary from old names to new names
|
900
|
-
# names_dictionary = {}
|
901
|
-
|
902
|
-
# # itirate through all keys that are name objects and give them new names (add to dic)
|
903
|
-
# # this should be done for every dictionary in :Resources
|
904
|
-
# # this is a few steps stage:
|
905
|
-
|
906
|
-
# # 1. get resources object
|
907
|
-
# resources = new_page[:Resources]
|
908
|
-
# if resources[:is_reference_only]
|
909
|
-
# resources = resources[:referenced_object]
|
910
|
-
# raise "Couldn't tap into resources dictionary, as it is a reference and isn't linked." unless resources
|
911
|
-
# end
|
912
|
-
|
913
|
-
# # 2. establich direct access to dictionaries and remove reference values
|
914
|
-
# flatten_resources_dictionaries resources
|
915
|
-
|
916
|
-
# # 3. travel every dictionary to pick up names (keys), change them and add them to the dictionary
|
917
|
-
# resources.each do |k,v|
|
918
|
-
# if v.is_a?(Hash)
|
919
|
-
# new_dictionary = {}
|
920
|
-
# new_name = "Combine" + SecureRandom.hex(7) + "PDF"
|
921
|
-
# i = 1
|
922
|
-
# v.each do |old_key, value|
|
923
|
-
# new_key = (new_name + i.to_s).to_sym
|
924
|
-
# names_dictionary[old_key] = new_key
|
925
|
-
# new_dictionary[new_key] = value
|
926
|
-
# i += 1
|
927
|
-
# end
|
928
|
-
# resources[k] = new_dictionary
|
929
|
-
# end
|
930
|
-
# end
|
931
|
-
|
932
|
-
# # now that we have replaced the names in the resources dictionaries,
|
933
|
-
# # it is time to replace the names inside the stream
|
934
|
-
# # we will need to make sure we have access to the stream injected
|
935
|
-
# # we will user PDFFilter.inflate_object
|
936
|
-
# (new_page[:Contents].is_a?(Array) ? new_page[:Contents] : [new_page[:Contents] ]).each do |c|
|
937
|
-
# stream = c[:referenced_object]
|
938
|
-
# PDFFilter.inflate_object stream
|
939
|
-
# names_dictionary.each do |old_key, new_key|
|
940
|
-
# stream[:raw_stream_content].gsub! _object_to_pdf(old_key), _object_to_pdf(new_key) ##### PRAY(!) that the parsed datawill be correctly reproduced!
|
941
|
-
# end
|
942
|
-
# # patch back to PDF defaults, for OCRed PDF files.
|
943
|
-
# # stream[:raw_stream_content] = "q\nq\nq\nDeviceRGB CS\nDeviceRGB cs\n0 0 0 rg\n0 0 0 RG\n0 Tr\n%s\nQ\nQ\nQ\n" % stream[:raw_stream_content]
|
944
|
-
# # the following was removed for Acrobat Reader compatability: DeviceRGB CS\nDeviceRGB cs\n
|
945
|
-
# stream[:raw_stream_content] = "q\nq\nq\n0 0 0 rg\n0 0 0 RG\n0 Tr\n1 0 0 1 0 0 cm\n%s\nQ\nQ\nQ\n" % stream[:raw_stream_content]
|
946
|
-
# end
|
947
|
-
|
948
|
-
# new_page
|
949
|
-
# end
|
950
|
-
|
824
|
+
# @return [true, false] returns true if there are two different resources sharing the same named reference.
|
825
|
+
def should_secure?(page)
|
826
|
+
# travel every dictionary to pick up names (keys), change them and add them to the dictionary
|
827
|
+
res = self.resources
|
828
|
+
foreign_res = page.resources
|
829
|
+
res.each {|k,v| v.keys.each {|name| return true if foreign_res[k][name] && foreign_res[k][name] != v[name]} if v.is_a?(Hash) }
|
830
|
+
false
|
831
|
+
end
|
951
832
|
|
952
833
|
end
|
953
834
|
|
data/lib/combine_pdf/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: combine_pdf
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Boaz Segev
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-10-
|
11
|
+
date: 2015-10-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-rc4
|