pdf-reader 1.1.1 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG +87 -2
  3. data/{README.rdoc → README.md} +43 -31
  4. data/Rakefile +21 -16
  5. data/bin/pdf_callbacks +1 -1
  6. data/bin/pdf_object +4 -1
  7. data/bin/pdf_text +1 -3
  8. data/examples/callbacks.rb +2 -1
  9. data/examples/extract_images.rb +11 -6
  10. data/examples/fuzzy_paragraphs.rb +24 -0
  11. data/lib/pdf/reader/afm/Courier-Bold.afm +342 -0
  12. data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -0
  13. data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -0
  14. data/lib/pdf/reader/afm/Courier.afm +342 -0
  15. data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -0
  16. data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -0
  17. data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -0
  18. data/lib/pdf/reader/afm/Helvetica.afm +3051 -0
  19. data/lib/pdf/reader/afm/MustRead.html +19 -0
  20. data/lib/pdf/reader/afm/Symbol.afm +213 -0
  21. data/lib/pdf/reader/afm/Times-Bold.afm +2588 -0
  22. data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -0
  23. data/lib/pdf/reader/afm/Times-Italic.afm +2667 -0
  24. data/lib/pdf/reader/afm/Times-Roman.afm +2419 -0
  25. data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -0
  26. data/lib/pdf/reader/buffer.rb +90 -63
  27. data/lib/pdf/reader/cid_widths.rb +63 -0
  28. data/lib/pdf/reader/cmap.rb +69 -38
  29. data/lib/pdf/reader/encoding.rb +74 -48
  30. data/lib/pdf/reader/error.rb +24 -4
  31. data/lib/pdf/reader/filter/ascii85.rb +28 -0
  32. data/lib/pdf/reader/filter/ascii_hex.rb +30 -0
  33. data/lib/pdf/reader/filter/depredict.rb +141 -0
  34. data/lib/pdf/reader/filter/flate.rb +53 -0
  35. data/lib/pdf/reader/filter/lzw.rb +21 -0
  36. data/lib/pdf/reader/filter/null.rb +18 -0
  37. data/lib/pdf/reader/filter/run_length.rb +45 -0
  38. data/lib/pdf/reader/filter.rb +15 -234
  39. data/lib/pdf/reader/font.rb +107 -43
  40. data/lib/pdf/reader/font_descriptor.rb +80 -0
  41. data/lib/pdf/reader/form_xobject.rb +26 -4
  42. data/lib/pdf/reader/glyph_hash.rb +56 -18
  43. data/lib/pdf/reader/lzw.rb +6 -4
  44. data/lib/pdf/reader/null_security_handler.rb +17 -0
  45. data/lib/pdf/reader/object_cache.rb +40 -16
  46. data/lib/pdf/reader/object_hash.rb +94 -40
  47. data/lib/pdf/reader/object_stream.rb +1 -0
  48. data/lib/pdf/reader/orientation_detector.rb +34 -0
  49. data/lib/pdf/reader/overlapping_runs_filter.rb +65 -0
  50. data/lib/pdf/reader/page.rb +48 -3
  51. data/lib/pdf/reader/page_layout.rb +125 -0
  52. data/lib/pdf/reader/page_state.rb +185 -70
  53. data/lib/pdf/reader/page_text_receiver.rb +70 -20
  54. data/lib/pdf/reader/pages_strategy.rb +4 -293
  55. data/lib/pdf/reader/parser.rb +37 -61
  56. data/lib/pdf/reader/print_receiver.rb +6 -0
  57. data/lib/pdf/reader/reference.rb +4 -1
  58. data/lib/pdf/reader/register_receiver.rb +17 -31
  59. data/lib/pdf/reader/resource_methods.rb +1 -0
  60. data/lib/pdf/reader/standard_security_handler.rb +82 -42
  61. data/lib/pdf/reader/standard_security_handler_v5.rb +91 -0
  62. data/lib/pdf/reader/stream.rb +5 -2
  63. data/lib/pdf/reader/synchronized_cache.rb +33 -0
  64. data/lib/pdf/reader/text_run.rb +99 -0
  65. data/lib/pdf/reader/token.rb +4 -1
  66. data/lib/pdf/reader/transformation_matrix.rb +195 -0
  67. data/lib/pdf/reader/unimplemented_security_handler.rb +17 -0
  68. data/lib/pdf/reader/width_calculator/built_in.rb +67 -0
  69. data/lib/pdf/reader/width_calculator/composite.rb +28 -0
  70. data/lib/pdf/reader/width_calculator/true_type.rb +56 -0
  71. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +33 -0
  72. data/lib/pdf/reader/width_calculator/type_zero.rb +25 -0
  73. data/lib/pdf/reader/width_calculator.rb +12 -0
  74. data/lib/pdf/reader/xref.rb +41 -9
  75. data/lib/pdf/reader.rb +45 -104
  76. data/lib/pdf-reader.rb +4 -1
  77. metadata +220 -101
  78. data/bin/pdf_list_callbacks +0 -17
  79. data/lib/pdf/hash.rb +0 -15
  80. data/lib/pdf/reader/abstract_strategy.rb +0 -81
  81. data/lib/pdf/reader/metadata_strategy.rb +0 -56
  82. data/lib/pdf/reader/text_receiver.rb +0 -264
data/lib/pdf-reader.rb CHANGED
@@ -1 +1,4 @@
1
- require "pdf/reader"
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require "pdf/reader"
metadata CHANGED
@@ -1,188 +1,307 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
5
- prerelease:
4
+ version: 2.5.0
6
5
  platform: ruby
7
6
  authors:
8
7
  - James Healy
9
- autorequire:
8
+ autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2012-05-09 00:00:00.000000000 Z
11
+ date: 2021-06-06 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: rake
16
- requirement: &35841860 !ruby/object:Gem::Requirement
17
- none: false
15
+ requirement: !ruby/object:Gem::Requirement
18
16
  requirements:
19
- - - ! '>='
17
+ - - "<"
20
18
  - !ruby/object:Gem::Version
21
- version: '0'
19
+ version: '13.0'
22
20
  type: :development
23
21
  prerelease: false
24
- version_requirements: *35841860
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "<"
25
+ - !ruby/object:Gem::Version
26
+ version: '13.0'
25
27
  - !ruby/object:Gem::Dependency
26
- name: roodi
27
- requirement: &35841400 !ruby/object:Gem::Requirement
28
- none: false
28
+ name: rspec
29
+ requirement: !ruby/object:Gem::Requirement
29
30
  requirements:
30
- - - ! '>='
31
+ - - "~>"
31
32
  - !ruby/object:Gem::Version
32
- version: '0'
33
+ version: '3.5'
33
34
  type: :development
34
35
  prerelease: false
35
- version_requirements: *35841400
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '3.5'
36
41
  - !ruby/object:Gem::Dependency
37
- name: rspec
38
- requirement: &35840900 !ruby/object:Gem::Requirement
39
- none: false
42
+ name: cane
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: morecane
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0.2'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0.2'
69
+ - !ruby/object:Gem::Dependency
70
+ name: pry
71
+ requirement: !ruby/object:Gem::Requirement
40
72
  requirements:
41
- - - ~>
73
+ - - ">="
42
74
  - !ruby/object:Gem::Version
43
- version: '2.3'
75
+ version: '0'
44
76
  type: :development
45
77
  prerelease: false
46
- version_requirements: *35840900
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
47
83
  - !ruby/object:Gem::Dependency
48
- name: ZenTest
49
- requirement: &35840400 !ruby/object:Gem::Requirement
50
- none: false
84
+ name: rdoc
85
+ requirement: !ruby/object:Gem::Requirement
51
86
  requirements:
52
- - - ~>
87
+ - - ">="
53
88
  - !ruby/object:Gem::Version
54
- version: 4.4.2
89
+ version: '0'
55
90
  type: :development
56
91
  prerelease: false
57
- version_requirements: *35840400
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
58
97
  - !ruby/object:Gem::Dependency
59
98
  name: Ascii85
60
- requirement: &35839940 !ruby/object:Gem::Requirement
61
- none: false
99
+ requirement: !ruby/object:Gem::Requirement
62
100
  requirements:
63
- - - ~>
101
+ - - "~>"
64
102
  - !ruby/object:Gem::Version
65
- version: 1.0.0
103
+ version: '1.0'
66
104
  type: :runtime
67
105
  prerelease: false
68
- version_requirements: *35839940
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: '1.0'
69
111
  - !ruby/object:Gem::Dependency
70
112
  name: ruby-rc4
71
- requirement: &35839520 !ruby/object:Gem::Requirement
72
- none: false
113
+ requirement: !ruby/object:Gem::Requirement
73
114
  requirements:
74
- - - ! '>='
115
+ - - ">="
75
116
  - !ruby/object:Gem::Version
76
117
  version: '0'
77
118
  type: :runtime
78
119
  prerelease: false
79
- version_requirements: *35839520
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: hashery
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: '2.0'
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: '2.0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: ttfunk
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :runtime
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ - !ruby/object:Gem::Dependency
154
+ name: afm
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - "~>"
158
+ - !ruby/object:Gem::Version
159
+ version: 0.2.1
160
+ type: :runtime
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - "~>"
165
+ - !ruby/object:Gem::Version
166
+ version: 0.2.1
80
167
  description: The PDF::Reader library implements a PDF parser conforming as much as
81
168
  possible to the PDF specification from Adobe
82
169
  email:
83
- - jimmy@deefa.com
170
+ - james@yob.id.au
84
171
  executables:
85
172
  - pdf_object
86
173
  - pdf_text
87
- - pdf_list_callbacks
88
174
  - pdf_callbacks
89
175
  extensions: []
90
176
  extra_rdoc_files:
91
- - README.rdoc
177
+ - README.md
92
178
  - TODO
93
179
  - CHANGELOG
94
180
  - MIT-LICENSE
95
181
  files:
96
- - examples/metadata.rb
97
- - examples/extract_images.rb
98
- - examples/extract_bates.rb
182
+ - CHANGELOG
183
+ - MIT-LICENSE
184
+ - README.md
185
+ - Rakefile
186
+ - TODO
187
+ - bin/pdf_callbacks
188
+ - bin/pdf_object
189
+ - bin/pdf_text
99
190
  - examples/callbacks.rb
100
- - examples/rspec.rb
101
- - examples/hash.rb
102
- - examples/text.rb
191
+ - examples/extract_bates.rb
103
192
  - examples/extract_fonts.rb
193
+ - examples/extract_images.rb
194
+ - examples/fuzzy_paragraphs.rb
195
+ - examples/hash.rb
196
+ - examples/metadata.rb
104
197
  - examples/page_count.rb
198
+ - examples/rspec.rb
199
+ - examples/text.rb
105
200
  - examples/version.rb
201
+ - lib/pdf-reader.rb
106
202
  - lib/pdf/reader.rb
107
- - lib/pdf/hash.rb
108
- - lib/pdf/reader/print_receiver.rb
109
- - lib/pdf/reader/xref.rb
203
+ - lib/pdf/reader/afm/Courier-Bold.afm
204
+ - lib/pdf/reader/afm/Courier-BoldOblique.afm
205
+ - lib/pdf/reader/afm/Courier-Oblique.afm
206
+ - lib/pdf/reader/afm/Courier.afm
207
+ - lib/pdf/reader/afm/Helvetica-Bold.afm
208
+ - lib/pdf/reader/afm/Helvetica-BoldOblique.afm
209
+ - lib/pdf/reader/afm/Helvetica-Oblique.afm
210
+ - lib/pdf/reader/afm/Helvetica.afm
211
+ - lib/pdf/reader/afm/MustRead.html
212
+ - lib/pdf/reader/afm/Symbol.afm
213
+ - lib/pdf/reader/afm/Times-Bold.afm
214
+ - lib/pdf/reader/afm/Times-BoldItalic.afm
215
+ - lib/pdf/reader/afm/Times-Italic.afm
216
+ - lib/pdf/reader/afm/Times-Roman.afm
217
+ - lib/pdf/reader/afm/ZapfDingbats.afm
110
218
  - lib/pdf/reader/buffer.rb
111
- - lib/pdf/reader/font.rb
112
- - lib/pdf/reader/parser.rb
219
+ - lib/pdf/reader/cid_widths.rb
220
+ - lib/pdf/reader/cmap.rb
221
+ - lib/pdf/reader/encoding.rb
222
+ - lib/pdf/reader/encodings/mac_expert.txt
223
+ - lib/pdf/reader/encodings/mac_roman.txt
224
+ - lib/pdf/reader/encodings/pdf_doc.txt
225
+ - lib/pdf/reader/encodings/standard.txt
226
+ - lib/pdf/reader/encodings/symbol.txt
227
+ - lib/pdf/reader/encodings/win_ansi.txt
228
+ - lib/pdf/reader/encodings/zapf_dingbats.txt
113
229
  - lib/pdf/reader/error.rb
114
230
  - lib/pdf/reader/filter.rb
115
- - lib/pdf/reader/object_hash.rb
116
- - lib/pdf/reader/stream.rb
117
- - lib/pdf/reader/page_state.rb
118
- - lib/pdf/reader/standard_security_handler.rb
119
- - lib/pdf/reader/cmap.rb
231
+ - lib/pdf/reader/filter/ascii85.rb
232
+ - lib/pdf/reader/filter/ascii_hex.rb
233
+ - lib/pdf/reader/filter/depredict.rb
234
+ - lib/pdf/reader/filter/flate.rb
235
+ - lib/pdf/reader/filter/lzw.rb
236
+ - lib/pdf/reader/filter/null.rb
237
+ - lib/pdf/reader/filter/run_length.rb
238
+ - lib/pdf/reader/font.rb
239
+ - lib/pdf/reader/font_descriptor.rb
120
240
  - lib/pdf/reader/form_xobject.rb
121
- - lib/pdf/reader/object_cache.rb
122
- - lib/pdf/reader/object_stream.rb
123
- - lib/pdf/reader/encoding.rb
124
- - lib/pdf/reader/page_text_receiver.rb
125
- - lib/pdf/reader/text_receiver.rb
126
241
  - lib/pdf/reader/glyph_hash.rb
127
242
  - lib/pdf/reader/glyphlist.txt
128
243
  - lib/pdf/reader/lzw.rb
129
- - lib/pdf/reader/register_receiver.rb
244
+ - lib/pdf/reader/null_security_handler.rb
245
+ - lib/pdf/reader/object_cache.rb
246
+ - lib/pdf/reader/object_hash.rb
247
+ - lib/pdf/reader/object_stream.rb
248
+ - lib/pdf/reader/orientation_detector.rb
249
+ - lib/pdf/reader/overlapping_runs_filter.rb
130
250
  - lib/pdf/reader/page.rb
131
- - lib/pdf/reader/abstract_strategy.rb
251
+ - lib/pdf/reader/page_layout.rb
252
+ - lib/pdf/reader/page_state.rb
253
+ - lib/pdf/reader/page_text_receiver.rb
132
254
  - lib/pdf/reader/pages_strategy.rb
255
+ - lib/pdf/reader/parser.rb
256
+ - lib/pdf/reader/print_receiver.rb
133
257
  - lib/pdf/reader/reference.rb
134
- - lib/pdf/reader/encodings/standard.txt
135
- - lib/pdf/reader/encodings/mac_roman.txt
136
- - lib/pdf/reader/encodings/symbol.txt
137
- - lib/pdf/reader/encodings/win_ansi.txt
138
- - lib/pdf/reader/encodings/zapf_dingbats.txt
139
- - lib/pdf/reader/encodings/pdf_doc.txt
140
- - lib/pdf/reader/encodings/mac_expert.txt
258
+ - lib/pdf/reader/register_receiver.rb
141
259
  - lib/pdf/reader/resource_methods.rb
142
- - lib/pdf/reader/metadata_strategy.rb
260
+ - lib/pdf/reader/standard_security_handler.rb
261
+ - lib/pdf/reader/standard_security_handler_v5.rb
262
+ - lib/pdf/reader/stream.rb
263
+ - lib/pdf/reader/synchronized_cache.rb
264
+ - lib/pdf/reader/text_run.rb
143
265
  - lib/pdf/reader/token.rb
144
- - lib/pdf-reader.rb
145
- - Rakefile
146
- - README.rdoc
147
- - TODO
148
- - CHANGELOG
149
- - MIT-LICENSE
150
- - bin/pdf_object
151
- - bin/pdf_text
152
- - bin/pdf_list_callbacks
153
- - bin/pdf_callbacks
154
- homepage: http://github.com/yob/pdf-reader
155
- licenses: []
156
- post_install_message: ! "\n ********************************************\n\n v1.0.0
157
- of PDF::Reader introduced a new page-based API. There are extensive\n examples
158
- showing how to use it in the README and examples directory.\n\n For detailed documentation,
159
- check the rdocs for the PDF::Reader,\n PDF::Reader::Page and PDF::Reader::ObjectHash
160
- classes.\n\n The old API is marked as deprecated but will continue to work with
161
- no\n visible warnings for now.\n\n ********************************************\n\n"
266
+ - lib/pdf/reader/transformation_matrix.rb
267
+ - lib/pdf/reader/unimplemented_security_handler.rb
268
+ - lib/pdf/reader/width_calculator.rb
269
+ - lib/pdf/reader/width_calculator/built_in.rb
270
+ - lib/pdf/reader/width_calculator/composite.rb
271
+ - lib/pdf/reader/width_calculator/true_type.rb
272
+ - lib/pdf/reader/width_calculator/type_one_or_three.rb
273
+ - lib/pdf/reader/width_calculator/type_zero.rb
274
+ - lib/pdf/reader/xref.rb
275
+ homepage: https://github.com/yob/pdf-reader
276
+ licenses:
277
+ - MIT
278
+ metadata:
279
+ bug_tracker_uri: https://github.com/yob/pdf-reader/issues
280
+ changelog_uri: https://github.com/yob/pdf-reader/blob/v2.5.0/CHANGELOG
281
+ documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.5.0
282
+ source_code_uri: https://github.com/yob/pdf-reader/tree/v2.5.0
283
+ post_install_message:
162
284
  rdoc_options:
163
- - --title
285
+ - "--title"
164
286
  - PDF::Reader Documentation
165
- - --main
166
- - README.rdoc
167
- - -q
287
+ - "--main"
288
+ - README.md
289
+ - "-q"
168
290
  require_paths:
169
291
  - lib
170
292
  required_ruby_version: !ruby/object:Gem::Requirement
171
- none: false
172
293
  requirements:
173
- - - ! '>='
294
+ - - ">="
174
295
  - !ruby/object:Gem::Version
175
- version: 1.8.7
296
+ version: '2.0'
176
297
  required_rubygems_version: !ruby/object:Gem::Requirement
177
- none: false
178
298
  requirements:
179
- - - ! '>='
299
+ - - ">="
180
300
  - !ruby/object:Gem::Version
181
301
  version: '0'
182
302
  requirements: []
183
- rubyforge_project:
184
- rubygems_version: 1.8.11
185
- signing_key:
186
- specification_version: 3
303
+ rubygems_version: 3.2.3
304
+ signing_key:
305
+ specification_version: 4
187
306
  summary: A library for accessing the content of PDF files
188
307
  test_files: []
@@ -1,17 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- # this executable is deprecated, use pdf_callbacks instead
4
-
5
- require 'rubygems'
6
-
7
- $LOAD_PATH.unshift(File.dirname(__FILE__) + "/../lib")
8
-
9
- require 'pdf/reader'
10
-
11
- receiver = PDF::Reader::PrintReceiver.new
12
-
13
- if ARGV.empty?
14
- PDF::Reader.new.parse($stdin, receiver)
15
- else
16
- PDF::Reader.file(ARGV[0], receiver)
17
- end
data/lib/pdf/hash.rb DELETED
@@ -1,15 +0,0 @@
1
- # coding: utf-8
2
-
3
- module PDF
4
- class Hash < ::PDF::Reader::ObjectHash # :nodoc:
5
- def initialize(input)
6
- warn "DEPRECATION NOTICE: PDF::Hash has been deprecated, use PDF::Reader::ObjectHash instead"
7
- super
8
- end
9
-
10
- def version
11
- warn "DEPRECATION NOTICE: PDF::Hash#version has been deprecated, use PDF::Reader::ObjectHash#pdf_version instead"
12
- pdf_version
13
- end
14
- end
15
- end