combine_pdf 1.0.16 → 1.0.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/main.yml +32 -0
- data/.travis.yml +1 -0
- data/CHANGELOG.md +61 -1
- data/README.md +51 -12
- data/Rakefile +8 -1
- data/combine_pdf.gemspec +4 -2
- data/lib/combine_pdf/api.rb +6 -5
- data/lib/combine_pdf/basic_writer.rb +2 -1
- data/lib/combine_pdf/decrypt.rb +2 -1
- data/lib/combine_pdf/exceptions.rb +2 -0
- data/lib/combine_pdf/filter.rb +1 -0
- data/lib/combine_pdf/fonts.rb +13 -3
- data/lib/combine_pdf/page_methods.rb +7 -6
- data/lib/combine_pdf/parser.rb +54 -16
- data/lib/combine_pdf/pdf_protected.rb +27 -22
- data/lib/combine_pdf/pdf_public.rb +25 -13
- data/lib/combine_pdf/renderer.rb +3 -2
- data/lib/combine_pdf/version.rb +3 -1
- data/lib/combine_pdf.rb +15 -13
- data/test/automated +65 -64
- data/test/combine_pdf/load_test.rb +48 -0
- data/test/combine_pdf/renderer_test.rb +1 -3
- data/test/fixtures/files/sample_encrypted_pdf.pdf +0 -0
- data/test/fixtures/files/sample_pdf.pdf +0 -0
- metadata +47 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 773cc37e05ad8fb02f148c12e3b7e1e24be8fc71ec946e4bca674bad107e1398
|
4
|
+
data.tar.gz: ecf125185a04ecdbced0856db01514d90036f93b315ff384b3b55cc794018032
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d8c2636e089c11b71f4a07d29a9eea6f7df8ca1f5466c2a447e8afb2598467d2fe3663d4e382c45a04dfd6ead9f19a6afe9ae7e01fae22b8c678db08ba6d1fe8
|
7
|
+
data.tar.gz: efc8160687a50fbf0df43cc5b0b85c013fc5fda3f2a053bc37716b403de18ad429caa4872832047a1ea1e2167797c0d8a5a773d8c341128752e38db9a87990ad
|
@@ -0,0 +1,32 @@
|
|
1
|
+
name: Main
|
2
|
+
on:
|
3
|
+
push:
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
tests:
|
7
|
+
name: Tests
|
8
|
+
runs-on: ubuntu-latest
|
9
|
+
strategy:
|
10
|
+
fail-fast: false
|
11
|
+
matrix:
|
12
|
+
ruby: ["2.7", "3.0", "3.1", "3.2", "3.3"]
|
13
|
+
rubyopt: [""]
|
14
|
+
include:
|
15
|
+
- ruby: "3.3"
|
16
|
+
rubyopt: "--enable-frozen-string-literal --debug-frozen-string-literal"
|
17
|
+
|
18
|
+
steps:
|
19
|
+
- name: Checkout code
|
20
|
+
uses: actions/checkout@v3
|
21
|
+
|
22
|
+
- name: Setup Ruby
|
23
|
+
uses: ruby/setup-ruby@v1
|
24
|
+
with:
|
25
|
+
ruby-version: ${{ matrix.ruby }}
|
26
|
+
bundler-cache: true
|
27
|
+
|
28
|
+
- name: Generate lockfile
|
29
|
+
run: bundle lock
|
30
|
+
|
31
|
+
- name: Run tests
|
32
|
+
run: bundle exec rake test RUBYOPT="${{ matrix.rubyopt }}"
|
data/.travis.yml
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
language: ruby
|
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,66 @@
|
|
1
1
|
# Change Log
|
2
2
|
|
3
|
-
|
3
|
+
#### Change log v.1.0.29 (2024-12-07)
|
4
|
+
|
5
|
+
**Fix**: frozen string literal support fix. Credit to @francescob (Francesco) for PR #245.
|
6
|
+
|
7
|
+
#### Change log v.1.0.28 (2024-11-12)
|
8
|
+
|
9
|
+
**Fix**: use `require` to load code (instead of `load`). Credit to @casperisfine (Jean byroot Boussier) for PR #216.
|
10
|
+
|
11
|
+
#### Change log v.1.0.27 (2024-11-10)
|
12
|
+
|
13
|
+
**Performance**: fix performance issues with `object_id` usage in Ruby 3+. Credit to @amomchilov (Alexander Momchilov) for PR #241.
|
14
|
+
|
15
|
+
**Performance**: use frozen string literals. Credit to @casperisfine (Jean byroot Boussier) for PR #239.
|
16
|
+
|
17
|
+
#### Change log v.1.0.26 (2023-12-22)
|
18
|
+
|
19
|
+
**Performance**: possible performance bump. Credit to @denislavski (Denislav Naydenov) for opening PR #235.
|
20
|
+
|
21
|
+
#### Change log v.1.0.25 (2023-12-19)
|
22
|
+
|
23
|
+
**Fix**: possible improve memory usage. Credit to @denislavski (Denislav Naydenov) for opening PR #233 and suggesting this change.
|
24
|
+
|
25
|
+
#### Change log v.1.0.24 (2023-10-19)
|
26
|
+
|
27
|
+
**Fix**: possible `nil` in loop. Credit to @jkowens for PR #231 and adding a quick fix using a simple guard.
|
28
|
+
|
29
|
+
**Fix**: preserve file creation date metadata where relevant.
|
30
|
+
|
31
|
+
#### Change log v.1.0.23 (2023-04-04)
|
32
|
+
|
33
|
+
**Feature**: merged PR #177 for the `raise_on_encrypted: true` option support. Credit to @leviwilson and @kimyu92 for the PR.
|
34
|
+
|
35
|
+
#### Change log v.1.0.22
|
36
|
+
|
37
|
+
**Fix**: fix `fonts` dereferencing issue (#203), credit to @MarcWeber (Marc Weber) for identifying the issue.
|
38
|
+
|
39
|
+
**Fix**: fix `metrix` dependency, credit to @casperisfine (Jean byroot Boussier) for PR #195.
|
40
|
+
|
41
|
+
#### Change log v.1.0.21
|
42
|
+
|
43
|
+
**Fix**: possible fix for issue #184, where nested PDF files within an object stream could break the parser. Credit to Greg Sparrow (@hazelsparrow) for exposng the issue.
|
44
|
+
|
45
|
+
#### Change log v.1.0.20
|
46
|
+
|
47
|
+
**Fix**: merges PR #180, `TypeError: can't dup NilClass`. Credit to Adam Trepanier (@adam-e-trepanier) for the merge.
|
48
|
+
|
49
|
+
#### Change log v.1.0.19
|
50
|
+
|
51
|
+
**Fix**: fixes font height and width detection issue. Issue #179. Credit to @5anchezzz for opening the issue.
|
52
|
+
|
53
|
+
**Fix**: fixes an indentation warning. Issue #173. Credit to @rubyFeedback for exposing this issue.
|
54
|
+
|
55
|
+
#### Change log v.1.0.18
|
56
|
+
|
57
|
+
**Fix**: fixed issue with the 1.0.17 release where `ProcSet` PDF Arrays should have been expected but where ignored and a PDF Object was assumed instead (issue #171) - credit to @chuchiperriman (Jesús Barbero Rodríguez).
|
58
|
+
|
59
|
+
#### Change log v.1.0.17
|
60
|
+
|
61
|
+
NB: yanked from RubyGems.org.
|
62
|
+
|
63
|
+
**Fix**: fixed issue where nested structure equality tests might provide false positives, resulting in lost data (issue #166) - credit to @cschilbe (Conrad Schilbe).
|
4
64
|
|
5
65
|
#### Change log v.1.0.16
|
6
66
|
|
data/README.md
CHANGED
@@ -1,27 +1,29 @@
|
|
1
1
|
# CombinePDF - the ruby way for merging PDF files
|
2
2
|
[](http://badge.fury.io/rb/combine_pdf)
|
3
3
|
[](https://github.com/boazsegev/combine_pdf)
|
4
|
+
[](https://www.rubydoc.info/github/boazsegev/combine_pdf)
|
4
5
|
[](https://github.com/pickhardt/maintainers-wanted)
|
5
6
|
|
7
|
+
|
6
8
|
CombinePDF is a nifty model, written in pure Ruby, to parse PDF files and combine (merge) them with other PDF files, watermark them or stamp them (all using the PDF file format and pure Ruby code).
|
7
9
|
|
8
|
-
##
|
10
|
+
## Unmaintained - Help Wanted(!)
|
9
11
|
|
10
|
-
|
12
|
+
I decided to stop maintaining this gem and hope someone could take over the PR reviews and maintenance of this gem (or simply open a successful fork).
|
11
13
|
|
12
|
-
|
13
|
-
gem install combine_pdf
|
14
|
-
```
|
14
|
+
I wrote this gem because I needed to solve an issue with bates-numbering existing PDF documents.
|
15
15
|
|
16
|
-
|
16
|
+
However, since 2014 I have been maintaining the gem for free and for no reason at all, except that I enjoyed sharing it with the community.
|
17
17
|
|
18
|
-
I
|
18
|
+
I love this gem, but I cannot keep maintaining it as I have my own projects to focus own and I need both the time and (more importantly) the mindspace.
|
19
19
|
|
20
|
-
|
20
|
+
## Install
|
21
21
|
|
22
|
-
|
22
|
+
Install with ruby gems:
|
23
23
|
|
24
|
-
|
24
|
+
```ruby
|
25
|
+
gem install combine_pdf
|
26
|
+
```
|
25
27
|
|
26
28
|
## Known Limitations
|
27
29
|
|
@@ -37,10 +39,12 @@ Quick rundown:
|
|
37
39
|
|
38
40
|
Some links will be lost when ripping pages out of PDF files and merging them with another PDF.
|
39
41
|
|
40
|
-
* Some encrypted PDF files (usually the ones you can't view without a password) will fail quietly instead of noisily.
|
42
|
+
* Some encrypted PDF files (usually the ones you can't view without a password) will fail quietly instead of noisily. If you prefer to choose the noisy route, you can specify the `raise_on_encrypted` option using `CombinePDF.load(pdf_file, raise_on_encrypted: true)` which will raise a `CombinePDF::EncryptionError`.
|
41
43
|
|
42
44
|
* Sometimes the CombinePDF will raise an exception even if the PDF could be parsed (i.e., when PDF optional content exists)... I find it better to err on the side of caution, although for optional content PDFs an exception is avoidable using `CombinePDF.load(pdf_file, allow_optional_content: true)`.
|
43
45
|
|
46
|
+
* The CombinePDF gem runs recursive code to both parse and format the PDF files. Hence, PDF files that have heavily nested objects, as well as those that where combined in a way that results in cyclic nesting, might explode the stack - resulting in an exception or program failure.
|
47
|
+
|
44
48
|
CombinePDF is written natively in Ruby and should (presumably) work on all Ruby platforms that follow Ruby 2.0 compatibility.
|
45
49
|
|
46
50
|
However, PDF files are quite complex creatures and no guaranty is provided.
|
@@ -112,7 +116,42 @@ pdf.number_pages
|
|
112
116
|
pdf.save "file_with_numbering.pdf"
|
113
117
|
```
|
114
118
|
|
115
|
-
Numbering can be done with many different options, with different formating, with or without a box object, and even with opacity values - see documentation.
|
119
|
+
Numbering can be done with many different options, with different formating, with or without a box object, and even with opacity values - [see documentation](https://www.rubydoc.info/github/boazsegev/combine_pdf/CombinePDF/PDF#number_pages-instance_method).
|
120
|
+
|
121
|
+
For example, should you prefer to place the page number on the bottom right side of all PDF pages, do:
|
122
|
+
|
123
|
+
```ruby
|
124
|
+
pdf.number_pages(location: [:bottom_right])
|
125
|
+
```
|
126
|
+
|
127
|
+
As another example, the dashes around the number are removed and a box is placed around it. The numbering is semi-transparent and the first 3 pages are numbered using letters (a,b,c) rather than numbers:
|
128
|
+
|
129
|
+
|
130
|
+
```ruby
|
131
|
+
# number first 3 pages as "a", "b", "c"
|
132
|
+
pdf.number_pages(number_format: " %s ",
|
133
|
+
location: [:top, :bottom, :top_left, :top_right, :bottom_left, :bottom_right],
|
134
|
+
start_at: "a",
|
135
|
+
page_range: (0..2),
|
136
|
+
box_color: [0.8,0.8,0.8],
|
137
|
+
border_color: [0.4, 0.4, 0.4],
|
138
|
+
border_width: 1,
|
139
|
+
box_radius: 6,
|
140
|
+
opacity: 0.75)
|
141
|
+
# number the rest of the pages as 4, 5, ... etc'
|
142
|
+
pdf.number_pages(number_format: " %s ",
|
143
|
+
location: [:top, :bottom, :top_left, :top_right, :bottom_left, :bottom_right],
|
144
|
+
start_at: 4,
|
145
|
+
page_range: (3..-1),
|
146
|
+
box_color: [0.8,0.8,0.8],
|
147
|
+
border_color: [0.4, 0.4, 0.4],
|
148
|
+
border_width: 1,
|
149
|
+
box_radius: 6,
|
150
|
+
opacity: 0.75)
|
151
|
+
```
|
152
|
+
|
153
|
+
pdf.number_pages(number_format: " %s ", location: :bottom_right, font_size: 44)
|
154
|
+
|
116
155
|
|
117
156
|
## Loading and Parsing PDF data
|
118
157
|
|
data/Rakefile
CHANGED
data/combine_pdf.gemspec
CHANGED
@@ -7,7 +7,7 @@ Gem::Specification.new do |spec|
|
|
7
7
|
spec.name = "combine_pdf"
|
8
8
|
spec.version = CombinePDF::VERSION
|
9
9
|
spec.authors = ["Boaz Segev"]
|
10
|
-
spec.email = ["
|
10
|
+
spec.email = ["bo@bowild.com"]
|
11
11
|
spec.summary = %q{Combine, stamp and watermark PDF files in pure Ruby.}
|
12
12
|
spec.description = %q{A nifty gem, in pure Ruby, to parse PDF files and combine (merge) them with other PDF files, number the pages, watermark them or stamp them, create tables, add basic text objects etc` (all using the PDF file format).}
|
13
13
|
spec.homepage = "https://github.com/boazsegev/combine_pdf"
|
@@ -19,8 +19,10 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
21
|
spec.add_runtime_dependency 'ruby-rc4', '>= 0.1.5'
|
22
|
+
spec.add_runtime_dependency 'matrix'
|
22
23
|
|
23
24
|
# spec.add_development_dependency "bundler", ">= 1.7"
|
24
|
-
spec.add_development_dependency "rake", "
|
25
|
+
spec.add_development_dependency "rake", ">= 12.3.3"
|
25
26
|
spec.add_development_dependency "minitest"
|
27
|
+
spec.add_development_dependency "minitest-around"
|
26
28
|
end
|
data/lib/combine_pdf/api.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# -*- encoding : utf-8 -*-
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
module CombinePDF
|
4
5
|
module_function
|
@@ -24,11 +25,11 @@ module CombinePDF
|
|
24
25
|
raise TypeError, "couldn't create PDF object, expecting type String" unless string.is_a?(String) || string.is_a?(Pathname)
|
25
26
|
begin
|
26
27
|
(begin
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
28
|
+
File.file? string
|
29
|
+
rescue
|
30
|
+
false
|
31
|
+
end) ? load(string) : parse(string)
|
32
|
+
rescue => _e
|
32
33
|
raise 'General PDF error - Use CombinePDF.load or CombinePDF.parse for a non-general error message (the requested file was not found OR the string received is not a valid PDF stream OR the file was found but not valid).'
|
33
34
|
end
|
34
35
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# -*- encoding : utf-8 -*-
|
2
|
+
# frozen_string_literal: true
|
2
3
|
########################################################
|
3
4
|
## Thoughts from reading the ISO 32000-1:2008
|
4
5
|
## this file is part of the CombinePDF library and the code
|
@@ -35,7 +36,7 @@ module CombinePDF
|
|
35
36
|
# mediabox:: the PDF page size in PDF points. defaults to [0, 0, 612.0, 792.0] (US Letter)
|
36
37
|
def initialize(mediabox = [0, 0, 612.0, 792.0])
|
37
38
|
# indirect_reference_id, :indirect_generation_number
|
38
|
-
@contents =
|
39
|
+
@contents = String.new
|
39
40
|
@base_font_name = 'Writer' + SecureRandom.hex(7) + 'PDF'
|
40
41
|
self[:Type] = :Page
|
41
42
|
self[:indirect_reference_id] = 0
|
data/lib/combine_pdf/decrypt.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# -*- encoding : utf-8 -*-
|
2
|
+
# frozen_string_literal: true
|
2
3
|
########################################################
|
3
4
|
## Thoughts from reading the ISO 32000-1:2008
|
4
5
|
## this file is part of the CombinePDF library and the code
|
@@ -137,7 +138,7 @@ module CombinePDF
|
|
137
138
|
object_key = @key.dup
|
138
139
|
object_key << [encrypted_id].pack('i')[0..2]
|
139
140
|
object_key << [encrypted_generation].pack('i')[0..1]
|
140
|
-
object_key << 'sAlT'.
|
141
|
+
object_key << 'sAlT'.b
|
141
142
|
key_length = object_key.length < 16 ? object_key.length : 16
|
142
143
|
|
143
144
|
begin
|
data/lib/combine_pdf/filter.rb
CHANGED
data/lib/combine_pdf/fonts.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# -*- encoding : utf-8 -*-
|
2
|
+
# frozen_string_literal: true
|
2
3
|
########################################################
|
3
4
|
## Thoughts from reading the ISO 32000-1:2008
|
4
5
|
## this file is part of the CombinePDF library and the code
|
@@ -138,12 +139,21 @@ module CombinePDF
|
|
138
139
|
text.each_char do |c|
|
139
140
|
metrics_array << (merged_metrics[c] || { wx: 0, boundingbox: [0, 0, 0, 0] })
|
140
141
|
end
|
141
|
-
|
142
|
-
|
142
|
+
metrics_array_mapped_top = [].dup
|
143
|
+
metrics_array_mapped_bottom = [].dup
|
143
144
|
width = 0.0
|
144
145
|
metrics_array.each do |m|
|
145
|
-
|
146
|
+
if (m && m[:boundingbox])
|
147
|
+
metrics_array_mapped_top << m[:boundingbox][3]
|
148
|
+
metrics_array_mapped_bottom << m[:boundingbox][1]
|
149
|
+
else
|
150
|
+
metrics_array_mapped_top << 0
|
151
|
+
metrics_array_mapped_bottom << 0
|
152
|
+
end
|
153
|
+
width += (m[:wx] || m[:wy] || 0) if m
|
146
154
|
end
|
155
|
+
height = metrics_array_mapped_top.max
|
156
|
+
height -=metrics_array_mapped_bottom.min
|
147
157
|
return [height.to_f / 1000 * size, width.to_f / 1000 * size] if metrics_array[0][:wy]
|
148
158
|
[width.to_f / 1000 * size, height.to_f / 1000 * size]
|
149
159
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# -*- encoding : utf-8 -*-
|
2
|
+
# frozen_string_literal: true
|
2
3
|
########################################################
|
3
4
|
## Thoughts from reading the ISO 32000-1:2008
|
4
5
|
## this file is part of the CombinePDF library and the code
|
@@ -94,7 +95,7 @@ module CombinePDF
|
|
94
95
|
# end
|
95
96
|
|
96
97
|
# set ProcSet to recommended value
|
97
|
-
resources[:ProcSet]
|
98
|
+
resources[:ProcSet] ||= [:PDF, :Text, :ImageB, :ImageC, :ImageI] # this was recommended by the ISO. 32000-1:2008
|
98
99
|
|
99
100
|
if top # if this is a stamp (overlay)
|
100
101
|
insert_content CONTENT_CONTAINER_START, 0
|
@@ -147,7 +148,7 @@ module CombinePDF
|
|
147
148
|
|
148
149
|
# This method adds a simple text box to the Page represented by the PDFWriter class.
|
149
150
|
# This function takes two values:
|
150
|
-
# text:: the text to
|
151
|
+
# text:: the text to write in the box.
|
151
152
|
# properties:: a Hash of box properties.
|
152
153
|
# the symbols and values in the properties Hash could be any or all of the following:
|
153
154
|
# x:: the left position of the box.
|
@@ -214,7 +215,7 @@ module CombinePDF
|
|
214
215
|
options[:text_padding] = 0 if options[:text_padding].to_f >= 1
|
215
216
|
|
216
217
|
# create box stream
|
217
|
-
box_stream = ''
|
218
|
+
box_stream = +''
|
218
219
|
# set graphic state for box
|
219
220
|
if options[:box_color] || (options[:border_width].to_i > 0 && options[:border_color])
|
220
221
|
# compute x and y position for text
|
@@ -290,7 +291,7 @@ module CombinePDF
|
|
290
291
|
# reset x,y by text alignment - x,y are calculated from the bottom left
|
291
292
|
# each unit (1) is 1/72 Inch
|
292
293
|
# create text stream
|
293
|
-
text_stream = ''
|
294
|
+
text_stream = +''
|
294
295
|
if !text.to_s.empty? && options[:font_size] != 0 && (options[:font_color] || options[:stroke_color])
|
295
296
|
# compute x and y position for text
|
296
297
|
x = options[:x] + (options[:width] * options[:text_padding])
|
@@ -679,7 +680,7 @@ module CombinePDF
|
|
679
680
|
insert_content 'Q'
|
680
681
|
|
681
682
|
# Prep content
|
682
|
-
@contents = ''
|
683
|
+
@contents = +''
|
683
684
|
insert_content @contents
|
684
685
|
@contents
|
685
686
|
end
|
@@ -788,7 +789,7 @@ module CombinePDF
|
|
788
789
|
# add to array
|
789
790
|
if out.last.nil? || out.last[0] != fonts[i]
|
790
791
|
out.last[1] << '>' unless out.last.nil?
|
791
|
-
out << [fonts[i], '<', 0, 0]
|
792
|
+
out << [fonts[i], (+'<'), 0, 0]
|
792
793
|
end
|
793
794
|
out.last[1] << (fonts_array[i].cmap.nil? ? (c.unpack('H*')[0]) : fonts_array[i].cmap[c])
|
794
795
|
if fonts_array[i].metrics[c]
|
data/lib/combine_pdf/parser.rb
CHANGED
@@ -33,7 +33,7 @@ module CombinePDF
|
|
33
33
|
# they are mainly to used to know if the file is (was) encrypted and to get more details.
|
34
34
|
attr_reader :info_object, :root_object, :names_object, :forms_object, :outlines_object, :metadata
|
35
35
|
|
36
|
-
attr_reader :allow_optional_content
|
36
|
+
attr_reader :allow_optional_content, :raise_on_encrypted
|
37
37
|
# when creating a parser, it is important to set the data (String) we wish to parse.
|
38
38
|
#
|
39
39
|
# <b>the data is required and it is not possible to set the data at a later stage</b>
|
@@ -58,6 +58,7 @@ module CombinePDF
|
|
58
58
|
@version = nil
|
59
59
|
@scanner = nil
|
60
60
|
@allow_optional_content = options[:allow_optional_content]
|
61
|
+
@raise_on_encrypted = options[:raise_on_encrypted]
|
61
62
|
end
|
62
63
|
|
63
64
|
# parse the data in the new parser (the data already set through the initialize / new method)
|
@@ -96,6 +97,7 @@ module CombinePDF
|
|
96
97
|
end
|
97
98
|
|
98
99
|
if @root_object[:Encrypt]
|
100
|
+
raise EncryptionError, 'the file is encrypted' if @raise_on_encrypted
|
99
101
|
# change_references_to_actual_values @root_object
|
100
102
|
warn 'PDF is Encrypted! Attempting to decrypt - not yet fully supported.'
|
101
103
|
decryptor = PDFDecrypt.new @parsed, @root_object
|
@@ -233,16 +235,18 @@ module CombinePDF
|
|
233
235
|
# all characters that aren't white space or special: /[^\x00\x09\x0a\x0c\x0d\x20\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25]+
|
234
236
|
elsif str = @scanner.scan(/\/[^\x00\x09\x0a\x0c\x0d\x20\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25]*/)
|
235
237
|
out << (str[1..-1].gsub(/\#[0-9a-fA-F]{2}/) { |a| a[1..2].hex.chr }).to_sym
|
238
|
+
# warn "CombinePDF detected name: #{out.last.to_s}"
|
236
239
|
##########################################
|
237
240
|
## Parse a Number
|
238
241
|
##########################################
|
239
242
|
elsif str = @scanner.scan(/[\+\-\.\d]+/)
|
240
243
|
str =~ /\./ ? (out << str.to_f) : (out << str.to_i)
|
244
|
+
# warn "CombinePDF detected number: #{out.last.to_s}"
|
241
245
|
##########################################
|
242
246
|
## parse a Hex String
|
243
247
|
##########################################
|
244
248
|
elsif str = @scanner.scan(/\<[0-9a-fA-F]*\>/)
|
245
|
-
# warn "Found a hex string"
|
249
|
+
# warn "Found a hex string #{str}"
|
246
250
|
str = str.slice(1..-2).force_encoding(Encoding::ASCII_8BIT)
|
247
251
|
# str = "0#{str}" if str.length.odd?
|
248
252
|
out << unify_string([str].pack('H*').force_encoding(Encoding::ASCII_8BIT))
|
@@ -258,7 +262,7 @@ module CombinePDF
|
|
258
262
|
##########################################
|
259
263
|
elsif @scanner.scan(/\(/)
|
260
264
|
# warn "Found a literal string"
|
261
|
-
str = ''.
|
265
|
+
str = ''.b
|
262
266
|
count = 1
|
263
267
|
while count > 0 && @scanner.rest?
|
264
268
|
scn = @scanner.scan_until(/[\(\)]/)
|
@@ -319,8 +323,8 @@ module CombinePDF
|
|
319
323
|
str << 12
|
320
324
|
when 48..57 # octal notation for byte?
|
321
325
|
rep -= 48
|
322
|
-
rep = (rep << 3) + (str_bytes.shift-48) if str_bytes[0]
|
323
|
-
rep = (rep << 3) + (str_bytes.shift-48) if str_bytes[0]
|
326
|
+
rep = (rep << 3) + (str_bytes.shift-48) if str_bytes[0]&.between?(48, 57)
|
327
|
+
rep = (rep << 3) + (str_bytes.shift-48) if str_bytes[0]&.between?(48, 57) && (((rep << 3) + (str_bytes[0] - 48)) <= 255)
|
324
328
|
str << rep
|
325
329
|
when 10 # new line, ignore
|
326
330
|
str_bytes.shift if str_bytes[0] == 13
|
@@ -336,6 +340,7 @@ module CombinePDF
|
|
336
340
|
end
|
337
341
|
end
|
338
342
|
out << unify_string(str.pack('C*').force_encoding(Encoding::ASCII_8BIT))
|
343
|
+
# warn "Found Literal String: #{out.last}"
|
339
344
|
##########################################
|
340
345
|
## parse a Dictionary
|
341
346
|
##########################################
|
@@ -348,29 +353,42 @@ module CombinePDF
|
|
348
353
|
## return content of array or dictionary
|
349
354
|
##########################################
|
350
355
|
elsif @scanner.scan(/\]/) || @scanner.scan(/>>/)
|
356
|
+
# warn "Dictionary / Array ended with #{@scanner.peek(5)}"
|
351
357
|
return out
|
352
358
|
##########################################
|
353
359
|
## parse a Stream
|
354
360
|
##########################################
|
355
361
|
elsif @scanner.scan(/stream[ \t]*[\r\n]/)
|
356
362
|
@scanner.pos += 1 if @scanner.peek(1) == "\n".freeze && @scanner.matched[-1] != "\n".freeze
|
363
|
+
# advance by the publshed stream length (if any)
|
364
|
+
old_pos = @scanner.pos
|
365
|
+
if(out.last.is_a?(Hash) && out.last[:Length].is_a?(Integer) && out.last[:Length] > 2)
|
366
|
+
@scanner.pos += out.last[:Length] - 2
|
367
|
+
end
|
368
|
+
|
357
369
|
# the following was dicarded because some PDF files didn't have an EOL marker as required
|
358
370
|
# str = @scanner.scan_until(/(\r\n|\r|\n)endstream/)
|
359
371
|
# instead, a non-strict RegExp is used:
|
360
|
-
|
372
|
+
|
361
373
|
|
362
374
|
# raise error if the stream doesn't end.
|
363
|
-
unless
|
375
|
+
unless @scanner.skip_until(/endstream/)
|
364
376
|
raise ParsingError, "Parsing Error: PDF file error - a stream object wasn't properly closed using 'endstream'!"
|
365
377
|
end
|
378
|
+
length = @scanner.pos - (old_pos + 9)
|
379
|
+
length = 0 if(length < 0)
|
380
|
+
length -= 1 if(@scanner.string[old_pos + length - 1] == "\n")
|
381
|
+
length -= 1 if(@scanner.string[old_pos + length - 1] == "\r")
|
382
|
+
str = (length > 0) ? @scanner.string.slice(old_pos, length) : +''
|
383
|
+
|
384
|
+
# warn "CombinePDF parser: detected Stream #{str.length} bytes long #{str[0..3]}...#{str[-4..-1]}"
|
366
385
|
|
367
386
|
# need to remove end of stream
|
368
387
|
if out.last.is_a? Hash
|
369
|
-
|
370
|
-
out.last[:raw_stream_content] = unify_string str.sub(/(\r\n|\n|\r)?endstream\z/, '').force_encoding(Encoding::ASCII_8BIT)
|
388
|
+
out.last[:raw_stream_content] = unify_string str.force_encoding(Encoding::ASCII_8BIT)
|
371
389
|
else
|
372
390
|
warn 'Stream not attached to dictionary!'
|
373
|
-
out << str.
|
391
|
+
out << str.force_encoding(Encoding::ASCII_8BIT)
|
374
392
|
end
|
375
393
|
##########################################
|
376
394
|
## parse an Object after finished
|
@@ -528,6 +546,14 @@ module CombinePDF
|
|
528
546
|
inheritance_hash[:Resources] ||= { referenced_object: {}, is_reference_only: true }.dup
|
529
547
|
(inheritance_hash[:Resources][:referenced_object] || inheritance_hash[:Resources]).update((catalogs[:Resources][:referenced_object] || catalogs[:Resources]), &HASH_UPDATE_PROC_FOR_OLD)
|
530
548
|
end
|
549
|
+
if catalogs[:ProcSet].is_a?(Array)
|
550
|
+
if(inheritance_hash[:ProcSet])
|
551
|
+
inheritance_hash[:ProcSet][:referenced_object].concat(catalogs[:ProcSet])
|
552
|
+
inheritance_hash[:ProcSet][:referenced_object].uniq!
|
553
|
+
else
|
554
|
+
inheritance_hash[:ProcSet] ||= { referenced_object: catalogs[:ProcSet], is_reference_only: true }.dup
|
555
|
+
end
|
556
|
+
end
|
531
557
|
if catalogs[:ColorSpace]
|
532
558
|
inheritance_hash[:ColorSpace] ||= { referenced_object: {}, is_reference_only: true }.dup
|
533
559
|
(inheritance_hash[:ColorSpace][:referenced_object] || inheritance_hash[:ColorSpace]).update((catalogs[:ColorSpace][:referenced_object] || catalogs[:ColorSpace]), &HASH_UPDATE_PROC_FOR_OLD)
|
@@ -556,6 +582,18 @@ module CombinePDF
|
|
556
582
|
catalogs[:ColorSpace] = { referenced_object: catalogs[:ColorSpace], is_reference_only: true } unless catalogs[:ColorSpace][:referenced_object]
|
557
583
|
catalogs[:ColorSpace][:referenced_object].update((inheritance_hash[:ColorSpace][:referenced_object] || inheritance_hash[:ColorSpace]), &HASH_UPDATE_PROC_FOR_OLD)
|
558
584
|
end
|
585
|
+
if inheritance_hash[:ProcSet]
|
586
|
+
if(catalogs[:ProcSet])
|
587
|
+
if catalogs[:ProcSet].is_a?(Array)
|
588
|
+
catalogs[:ProcSet] = { referenced_object: catalogs[:ProcSet], is_reference_only: true }
|
589
|
+
end
|
590
|
+
catalogs[:ProcSet][:referenced_object].concat(inheritance_hash[:ProcSet][:referenced_object])
|
591
|
+
catalogs[:ProcSet][:referenced_object].uniq!
|
592
|
+
else
|
593
|
+
catalogs[:ProcSet] = { is_reference_only: true }.dup
|
594
|
+
catalogs[:ProcSet][:referenced_object] = []
|
595
|
+
end
|
596
|
+
end
|
559
597
|
# (catalogs[:ColorSpace] ||= {}).update(inheritance_hash[:ColorSpace], &HASH_UPDATE_PROC_FOR_OLD) if inheritance_hash[:ColorSpace]
|
560
598
|
# catalogs[:Order] ||= inheritance_hash[:Order] if inheritance_hash[:Order]
|
561
599
|
# catalogs[:AS] ||= inheritance_hash[:AS] if inheritance_hash[:AS]
|
@@ -594,17 +632,17 @@ module CombinePDF
|
|
594
632
|
#
|
595
633
|
def serialize_objects_and_references
|
596
634
|
obj_dir = {}
|
597
|
-
objid_cache = {}
|
635
|
+
objid_cache = {}.compare_by_identity
|
598
636
|
# create a dictionary for referenced objects (no value resolution at this point)
|
599
637
|
# at the same time, delete duplicates and old versions when objects have multiple versions
|
600
638
|
@parsed.uniq!
|
601
639
|
@parsed.length.times do |i|
|
602
640
|
o = @parsed[i]
|
603
|
-
objid_cache[o
|
641
|
+
objid_cache[o] = i
|
604
642
|
tmp_key = [o[:indirect_reference_id], o[:indirect_generation_number]]
|
605
643
|
if tmp_found = obj_dir[tmp_key]
|
606
644
|
tmp_found.clear
|
607
|
-
@parsed[objid_cache[tmp_found
|
645
|
+
@parsed[objid_cache[tmp_found]] = nil
|
608
646
|
end
|
609
647
|
obj_dir[tmp_key] = o
|
610
648
|
end
|
@@ -727,9 +765,9 @@ module CombinePDF
|
|
727
765
|
# end
|
728
766
|
|
729
767
|
# # run block of code on evey PDF object (PDF objects are class Hash)
|
730
|
-
# def each_object(object, limit_references = true, already_visited = {}, &block)
|
768
|
+
# def each_object(object, limit_references = true, already_visited = {}.compare_by_identity, &block)
|
731
769
|
# unless limit_references
|
732
|
-
# already_visited[object
|
770
|
+
# already_visited[object] = true
|
733
771
|
# end
|
734
772
|
# case
|
735
773
|
# when object.is_a?(Array)
|
@@ -738,7 +776,7 @@ module CombinePDF
|
|
738
776
|
# yield(object)
|
739
777
|
# unless limit_references && object[:is_reference_only]
|
740
778
|
# object.each do |k,v|
|
741
|
-
# each_object(v, limit_references, already_visited, &block) unless already_visited[v
|
779
|
+
# each_object(v, limit_references, already_visited, &block) unless already_visited[v]
|
742
780
|
# end
|
743
781
|
# end
|
744
782
|
# end
|