pdf-reader 1.1.1 → 2.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (82) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG +87 -2
  3. data/{README.rdoc → README.md} +43 -31
  4. data/Rakefile +21 -16
  5. data/bin/pdf_callbacks +1 -1
  6. data/bin/pdf_object +4 -1
  7. data/bin/pdf_text +1 -3
  8. data/examples/callbacks.rb +2 -1
  9. data/examples/extract_images.rb +11 -6
  10. data/examples/fuzzy_paragraphs.rb +24 -0
  11. data/lib/pdf/reader/afm/Courier-Bold.afm +342 -0
  12. data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -0
  13. data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -0
  14. data/lib/pdf/reader/afm/Courier.afm +342 -0
  15. data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -0
  16. data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -0
  17. data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -0
  18. data/lib/pdf/reader/afm/Helvetica.afm +3051 -0
  19. data/lib/pdf/reader/afm/MustRead.html +19 -0
  20. data/lib/pdf/reader/afm/Symbol.afm +213 -0
  21. data/lib/pdf/reader/afm/Times-Bold.afm +2588 -0
  22. data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -0
  23. data/lib/pdf/reader/afm/Times-Italic.afm +2667 -0
  24. data/lib/pdf/reader/afm/Times-Roman.afm +2419 -0
  25. data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -0
  26. data/lib/pdf/reader/buffer.rb +90 -63
  27. data/lib/pdf/reader/cid_widths.rb +63 -0
  28. data/lib/pdf/reader/cmap.rb +69 -38
  29. data/lib/pdf/reader/encoding.rb +74 -48
  30. data/lib/pdf/reader/error.rb +24 -4
  31. data/lib/pdf/reader/filter/ascii85.rb +28 -0
  32. data/lib/pdf/reader/filter/ascii_hex.rb +30 -0
  33. data/lib/pdf/reader/filter/depredict.rb +141 -0
  34. data/lib/pdf/reader/filter/flate.rb +53 -0
  35. data/lib/pdf/reader/filter/lzw.rb +21 -0
  36. data/lib/pdf/reader/filter/null.rb +18 -0
  37. data/lib/pdf/reader/filter/run_length.rb +45 -0
  38. data/lib/pdf/reader/filter.rb +15 -234
  39. data/lib/pdf/reader/font.rb +107 -43
  40. data/lib/pdf/reader/font_descriptor.rb +80 -0
  41. data/lib/pdf/reader/form_xobject.rb +26 -4
  42. data/lib/pdf/reader/glyph_hash.rb +56 -18
  43. data/lib/pdf/reader/lzw.rb +6 -4
  44. data/lib/pdf/reader/null_security_handler.rb +17 -0
  45. data/lib/pdf/reader/object_cache.rb +40 -16
  46. data/lib/pdf/reader/object_hash.rb +94 -40
  47. data/lib/pdf/reader/object_stream.rb +1 -0
  48. data/lib/pdf/reader/orientation_detector.rb +34 -0
  49. data/lib/pdf/reader/overlapping_runs_filter.rb +65 -0
  50. data/lib/pdf/reader/page.rb +48 -3
  51. data/lib/pdf/reader/page_layout.rb +125 -0
  52. data/lib/pdf/reader/page_state.rb +185 -70
  53. data/lib/pdf/reader/page_text_receiver.rb +70 -20
  54. data/lib/pdf/reader/pages_strategy.rb +4 -293
  55. data/lib/pdf/reader/parser.rb +37 -61
  56. data/lib/pdf/reader/print_receiver.rb +6 -0
  57. data/lib/pdf/reader/reference.rb +4 -1
  58. data/lib/pdf/reader/register_receiver.rb +17 -31
  59. data/lib/pdf/reader/resource_methods.rb +1 -0
  60. data/lib/pdf/reader/standard_security_handler.rb +82 -42
  61. data/lib/pdf/reader/standard_security_handler_v5.rb +91 -0
  62. data/lib/pdf/reader/stream.rb +5 -2
  63. data/lib/pdf/reader/synchronized_cache.rb +33 -0
  64. data/lib/pdf/reader/text_run.rb +99 -0
  65. data/lib/pdf/reader/token.rb +4 -1
  66. data/lib/pdf/reader/transformation_matrix.rb +195 -0
  67. data/lib/pdf/reader/unimplemented_security_handler.rb +17 -0
  68. data/lib/pdf/reader/width_calculator/built_in.rb +67 -0
  69. data/lib/pdf/reader/width_calculator/composite.rb +28 -0
  70. data/lib/pdf/reader/width_calculator/true_type.rb +56 -0
  71. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +33 -0
  72. data/lib/pdf/reader/width_calculator/type_zero.rb +25 -0
  73. data/lib/pdf/reader/width_calculator.rb +12 -0
  74. data/lib/pdf/reader/xref.rb +41 -9
  75. data/lib/pdf/reader.rb +45 -104
  76. data/lib/pdf-reader.rb +4 -1
  77. metadata +220 -101
  78. data/bin/pdf_list_callbacks +0 -17
  79. data/lib/pdf/hash.rb +0 -15
  80. data/lib/pdf/reader/abstract_strategy.rb +0 -81
  81. data/lib/pdf/reader/metadata_strategy.rb +0 -56
  82. data/lib/pdf/reader/text_receiver.rb +0 -264
data/lib/pdf-reader.rb CHANGED
@@ -1 +1,4 @@
1
- require "pdf/reader"
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require "pdf/reader"
metadata CHANGED
@@ -1,188 +1,307 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
5
- prerelease:
4
+ version: 2.5.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - James Healy
9
- autorequire:
8
+ autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2012-05-09 00:00:00.000000000 Z
11
+ date: 2021-06-06 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: rake
16
- requirement: &35841860 !ruby/object:Gem::Requirement
17
- none: false
15
+ requirement: !ruby/object:Gem::Requirement
18
16
  requirements:
19
- - - ! '>='
17
+ - - "<"
20
18
  - !ruby/object:Gem::Version
21
- version: '0'
19
+ version: '13.0'
22
20
  type: :development
23
21
  prerelease: false
24
- version_requirements: *35841860
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "<"
25
+ - !ruby/object:Gem::Version
26
+ version: '13.0'
25
27
  - !ruby/object:Gem::Dependency
26
- name: roodi
27
- requirement: &35841400 !ruby/object:Gem::Requirement
28
- none: false
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
29
30
  requirements:
30
- - - ! '>='
31
+ - - "~>"
31
32
  - !ruby/object:Gem::Version
32
- version: '0'
33
+ version: '3.5'
33
34
  type: :development
34
35
  prerelease: false
35
- version_requirements: *35841400
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '3.5'
36
41
  - !ruby/object:Gem::Dependency
37
- name: rspec
38
- requirement: &35840900 !ruby/object:Gem::Requirement
39
- none: false
42
+ name: cane
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: morecane
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0.2'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0.2'
69
+ - !ruby/object:Gem::Dependency
70
+ name: pry
71
+ requirement: !ruby/object:Gem::Requirement
40
72
  requirements:
41
- - - ~>
73
+ - - ">="
42
74
  - !ruby/object:Gem::Version
43
- version: '2.3'
75
+ version: '0'
44
76
  type: :development
45
77
  prerelease: false
46
- version_requirements: *35840900
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
47
83
  - !ruby/object:Gem::Dependency
48
- name: ZenTest
49
- requirement: &35840400 !ruby/object:Gem::Requirement
50
- none: false
84
+ name: rdoc
85
+ requirement: !ruby/object:Gem::Requirement
51
86
  requirements:
52
- - - ~>
87
+ - - ">="
53
88
  - !ruby/object:Gem::Version
54
- version: 4.4.2
89
+ version: '0'
55
90
  type: :development
56
91
  prerelease: false
57
- version_requirements: *35840400
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
58
97
  - !ruby/object:Gem::Dependency
59
98
  name: Ascii85
60
- requirement: &35839940 !ruby/object:Gem::Requirement
61
- none: false
99
+ requirement: !ruby/object:Gem::Requirement
62
100
  requirements:
63
- - - ~>
101
+ - - "~>"
64
102
  - !ruby/object:Gem::Version
65
- version: 1.0.0
103
+ version: '1.0'
66
104
  type: :runtime
67
105
  prerelease: false
68
- version_requirements: *35839940
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '1.0'
69
111
  - !ruby/object:Gem::Dependency
70
112
  name: ruby-rc4
71
- requirement: &35839520 !ruby/object:Gem::Requirement
72
- none: false
113
+ requirement: !ruby/object:Gem::Requirement
73
114
  requirements:
74
- - - ! '>='
115
+ - - ">="
75
116
  - !ruby/object:Gem::Version
76
117
  version: '0'
77
118
  type: :runtime
78
119
  prerelease: false
79
- version_requirements: *35839520
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: hashery
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '2.0'
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: '2.0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: ttfunk
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :runtime
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ - !ruby/object:Gem::Dependency
154
+ name: afm
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - "~>"
158
+ - !ruby/object:Gem::Version
159
+ version: 0.2.1
160
+ type: :runtime
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - "~>"
165
+ - !ruby/object:Gem::Version
166
+ version: 0.2.1
80
167
  description: The PDF::Reader library implements a PDF parser conforming as much as
81
168
  possible to the PDF specification from Adobe
82
169
  email:
83
- - jimmy@deefa.com
170
+ - james@yob.id.au
84
171
  executables:
85
172
  - pdf_object
86
173
  - pdf_text
87
- - pdf_list_callbacks
88
174
  - pdf_callbacks
89
175
  extensions: []
90
176
  extra_rdoc_files:
91
- - README.rdoc
177
+ - README.md
92
178
  - TODO
93
179
  - CHANGELOG
94
180
  - MIT-LICENSE
95
181
  files:
96
- - examples/metadata.rb
97
- - examples/extract_images.rb
98
- - examples/extract_bates.rb
182
+ - CHANGELOG
183
+ - MIT-LICENSE
184
+ - README.md
185
+ - Rakefile
186
+ - TODO
187
+ - bin/pdf_callbacks
188
+ - bin/pdf_object
189
+ - bin/pdf_text
99
190
  - examples/callbacks.rb
100
- - examples/rspec.rb
101
- - examples/hash.rb
102
- - examples/text.rb
191
+ - examples/extract_bates.rb
103
192
  - examples/extract_fonts.rb
193
+ - examples/extract_images.rb
194
+ - examples/fuzzy_paragraphs.rb
195
+ - examples/hash.rb
196
+ - examples/metadata.rb
104
197
  - examples/page_count.rb
198
+ - examples/rspec.rb
199
+ - examples/text.rb
105
200
  - examples/version.rb
201
+ - lib/pdf-reader.rb
106
202
  - lib/pdf/reader.rb
107
- - lib/pdf/hash.rb
108
- - lib/pdf/reader/print_receiver.rb
109
- - lib/pdf/reader/xref.rb
203
+ - lib/pdf/reader/afm/Courier-Bold.afm
204
+ - lib/pdf/reader/afm/Courier-BoldOblique.afm
205
+ - lib/pdf/reader/afm/Courier-Oblique.afm
206
+ - lib/pdf/reader/afm/Courier.afm
207
+ - lib/pdf/reader/afm/Helvetica-Bold.afm
208
+ - lib/pdf/reader/afm/Helvetica-BoldOblique.afm
209
+ - lib/pdf/reader/afm/Helvetica-Oblique.afm
210
+ - lib/pdf/reader/afm/Helvetica.afm
211
+ - lib/pdf/reader/afm/MustRead.html
212
+ - lib/pdf/reader/afm/Symbol.afm
213
+ - lib/pdf/reader/afm/Times-Bold.afm
214
+ - lib/pdf/reader/afm/Times-BoldItalic.afm
215
+ - lib/pdf/reader/afm/Times-Italic.afm
216
+ - lib/pdf/reader/afm/Times-Roman.afm
217
+ - lib/pdf/reader/afm/ZapfDingbats.afm
110
218
  - lib/pdf/reader/buffer.rb
111
- - lib/pdf/reader/font.rb
112
- - lib/pdf/reader/parser.rb
219
+ - lib/pdf/reader/cid_widths.rb
220
+ - lib/pdf/reader/cmap.rb
221
+ - lib/pdf/reader/encoding.rb
222
+ - lib/pdf/reader/encodings/mac_expert.txt
223
+ - lib/pdf/reader/encodings/mac_roman.txt
224
+ - lib/pdf/reader/encodings/pdf_doc.txt
225
+ - lib/pdf/reader/encodings/standard.txt
226
+ - lib/pdf/reader/encodings/symbol.txt
227
+ - lib/pdf/reader/encodings/win_ansi.txt
228
+ - lib/pdf/reader/encodings/zapf_dingbats.txt
113
229
  - lib/pdf/reader/error.rb
114
230
  - lib/pdf/reader/filter.rb
115
- - lib/pdf/reader/object_hash.rb
116
- - lib/pdf/reader/stream.rb
117
- - lib/pdf/reader/page_state.rb
118
- - lib/pdf/reader/standard_security_handler.rb
119
- - lib/pdf/reader/cmap.rb
231
+ - lib/pdf/reader/filter/ascii85.rb
232
+ - lib/pdf/reader/filter/ascii_hex.rb
233
+ - lib/pdf/reader/filter/depredict.rb
234
+ - lib/pdf/reader/filter/flate.rb
235
+ - lib/pdf/reader/filter/lzw.rb
236
+ - lib/pdf/reader/filter/null.rb
237
+ - lib/pdf/reader/filter/run_length.rb
238
+ - lib/pdf/reader/font.rb
239
+ - lib/pdf/reader/font_descriptor.rb
120
240
  - lib/pdf/reader/form_xobject.rb
121
- - lib/pdf/reader/object_cache.rb
122
- - lib/pdf/reader/object_stream.rb
123
- - lib/pdf/reader/encoding.rb
124
- - lib/pdf/reader/page_text_receiver.rb
125
- - lib/pdf/reader/text_receiver.rb
126
241
  - lib/pdf/reader/glyph_hash.rb
127
242
  - lib/pdf/reader/glyphlist.txt
128
243
  - lib/pdf/reader/lzw.rb
129
- - lib/pdf/reader/register_receiver.rb
244
+ - lib/pdf/reader/null_security_handler.rb
245
+ - lib/pdf/reader/object_cache.rb
246
+ - lib/pdf/reader/object_hash.rb
247
+ - lib/pdf/reader/object_stream.rb
248
+ - lib/pdf/reader/orientation_detector.rb
249
+ - lib/pdf/reader/overlapping_runs_filter.rb
130
250
  - lib/pdf/reader/page.rb
131
- - lib/pdf/reader/abstract_strategy.rb
251
+ - lib/pdf/reader/page_layout.rb
252
+ - lib/pdf/reader/page_state.rb
253
+ - lib/pdf/reader/page_text_receiver.rb
132
254
  - lib/pdf/reader/pages_strategy.rb
255
+ - lib/pdf/reader/parser.rb
256
+ - lib/pdf/reader/print_receiver.rb
133
257
  - lib/pdf/reader/reference.rb
134
- - lib/pdf/reader/encodings/standard.txt
135
- - lib/pdf/reader/encodings/mac_roman.txt
136
- - lib/pdf/reader/encodings/symbol.txt
137
- - lib/pdf/reader/encodings/win_ansi.txt
138
- - lib/pdf/reader/encodings/zapf_dingbats.txt
139
- - lib/pdf/reader/encodings/pdf_doc.txt
140
- - lib/pdf/reader/encodings/mac_expert.txt
258
+ - lib/pdf/reader/register_receiver.rb
141
259
  - lib/pdf/reader/resource_methods.rb
142
- - lib/pdf/reader/metadata_strategy.rb
260
+ - lib/pdf/reader/standard_security_handler.rb
261
+ - lib/pdf/reader/standard_security_handler_v5.rb
262
+ - lib/pdf/reader/stream.rb
263
+ - lib/pdf/reader/synchronized_cache.rb
264
+ - lib/pdf/reader/text_run.rb
143
265
  - lib/pdf/reader/token.rb
144
- - lib/pdf-reader.rb
145
- - Rakefile
146
- - README.rdoc
147
- - TODO
148
- - CHANGELOG
149
- - MIT-LICENSE
150
- - bin/pdf_object
151
- - bin/pdf_text
152
- - bin/pdf_list_callbacks
153
- - bin/pdf_callbacks
154
- homepage: http://github.com/yob/pdf-reader
155
- licenses: []
156
- post_install_message: ! "\n ********************************************\n\n v1.0.0
157
- of PDF::Reader introduced a new page-based API. There are extensive\n examples
158
- showing how to use it in the README and examples directory.\n\n For detailed documentation,
159
- check the rdocs for the PDF::Reader,\n PDF::Reader::Page and PDF::Reader::ObjectHash
160
- classes.\n\n The old API is marked as deprecated but will continue to work with
161
- no\n visible warnings for now.\n\n ********************************************\n\n"
266
+ - lib/pdf/reader/transformation_matrix.rb
267
+ - lib/pdf/reader/unimplemented_security_handler.rb
268
+ - lib/pdf/reader/width_calculator.rb
269
+ - lib/pdf/reader/width_calculator/built_in.rb
270
+ - lib/pdf/reader/width_calculator/composite.rb
271
+ - lib/pdf/reader/width_calculator/true_type.rb
272
+ - lib/pdf/reader/width_calculator/type_one_or_three.rb
273
+ - lib/pdf/reader/width_calculator/type_zero.rb
274
+ - lib/pdf/reader/xref.rb
275
+ homepage: https://github.com/yob/pdf-reader
276
+ licenses:
277
+ - MIT
278
+ metadata:
279
+ bug_tracker_uri: https://github.com/yob/pdf-reader/issues
280
+ changelog_uri: https://github.com/yob/pdf-reader/blob/v2.5.0/CHANGELOG
281
+ documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.5.0
282
+ source_code_uri: https://github.com/yob/pdf-reader/tree/v2.5.0
283
+ post_install_message:
162
284
  rdoc_options:
163
- - --title
285
+ - "--title"
164
286
  - PDF::Reader Documentation
165
- - --main
166
- - README.rdoc
167
- - -q
287
+ - "--main"
288
+ - README.md
289
+ - "-q"
168
290
  require_paths:
169
291
  - lib
170
292
  required_ruby_version: !ruby/object:Gem::Requirement
171
- none: false
172
293
  requirements:
173
- - - ! '>='
294
+ - - ">="
174
295
  - !ruby/object:Gem::Version
175
- version: 1.8.7
296
+ version: '2.0'
176
297
  required_rubygems_version: !ruby/object:Gem::Requirement
177
- none: false
178
298
  requirements:
179
- - - ! '>='
299
+ - - ">="
180
300
  - !ruby/object:Gem::Version
181
301
  version: '0'
182
302
  requirements: []
183
- rubyforge_project:
184
- rubygems_version: 1.8.11
185
- signing_key:
186
- specification_version: 3
303
+ rubygems_version: 3.2.3
304
+ signing_key:
305
+ specification_version: 4
187
306
  summary: A library for accessing the content of PDF files
188
307
  test_files: []
@@ -1,17 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # this executable is deprecated, use pdf_callbacks instead
4
-
5
- require 'rubygems'
6
-
7
- $LOAD_PATH.unshift(File.dirname(__FILE__) + "/../lib")
8
-
9
- require 'pdf/reader'
10
-
11
- receiver = PDF::Reader::PrintReceiver.new
12
-
13
- if ARGV.empty?
14
- PDF::Reader.new.parse($stdin, receiver)
15
- else
16
- PDF::Reader.file(ARGV[0], receiver)
17
- end
data/lib/pdf/hash.rb DELETED
@@ -1,15 +0,0 @@
1
- # coding: utf-8
2
-
3
- module PDF
4
- class Hash < ::PDF::Reader::ObjectHash # :nodoc:
5
- def initialize(input)
6
- warn "DEPRECATION NOTICE: PDF::Hash has been deprecated, use PDF::Reader::ObjectHash instead"
7
- super
8
- end
9
-
10
- def version
11
- warn "DEPRECATION NOTICE: PDF::Hash#version has been deprecated, use PDF::Reader::ObjectHash#pdf_version instead"
12
- pdf_version
13
- end
14
- end
15
- end