pdf-reader 1.4.1 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG +53 -3
  3. data/{README.rdoc → README.md} +40 -23
  4. data/Rakefile +2 -2
  5. data/bin/pdf_callbacks +1 -1
  6. data/bin/pdf_object +4 -1
  7. data/bin/pdf_text +1 -1
  8. data/lib/pdf/reader/afm/Courier-Bold.afm +342 -342
  9. data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -342
  10. data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -342
  11. data/lib/pdf/reader/afm/Courier.afm +342 -342
  12. data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -2827
  13. data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -2827
  14. data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -3051
  15. data/lib/pdf/reader/afm/Helvetica.afm +3051 -3051
  16. data/lib/pdf/reader/afm/MustRead.html +19 -0
  17. data/lib/pdf/reader/afm/Symbol.afm +213 -213
  18. data/lib/pdf/reader/afm/Times-Bold.afm +2588 -2588
  19. data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -2384
  20. data/lib/pdf/reader/afm/Times-Italic.afm +2667 -2667
  21. data/lib/pdf/reader/afm/Times-Roman.afm +2419 -2419
  22. data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -225
  23. data/lib/pdf/reader/buffer.rb +14 -12
  24. data/lib/pdf/reader/cid_widths.rb +2 -0
  25. data/lib/pdf/reader/cmap.rb +48 -36
  26. data/lib/pdf/reader/encoding.rb +16 -18
  27. data/lib/pdf/reader/error.rb +5 -0
  28. data/lib/pdf/reader/filter/ascii85.rb +1 -0
  29. data/lib/pdf/reader/filter/ascii_hex.rb +2 -0
  30. data/lib/pdf/reader/filter/depredict.rb +1 -0
  31. data/lib/pdf/reader/filter/flate.rb +29 -16
  32. data/lib/pdf/reader/filter/lzw.rb +2 -0
  33. data/lib/pdf/reader/filter/null.rb +2 -0
  34. data/lib/pdf/reader/filter/run_length.rb +4 -6
  35. data/lib/pdf/reader/filter.rb +2 -0
  36. data/lib/pdf/reader/font.rb +12 -13
  37. data/lib/pdf/reader/font_descriptor.rb +1 -0
  38. data/lib/pdf/reader/form_xobject.rb +1 -0
  39. data/lib/pdf/reader/glyph_hash.rb +7 -2
  40. data/lib/pdf/reader/lzw.rb +4 -4
  41. data/lib/pdf/reader/null_security_handler.rb +17 -0
  42. data/lib/pdf/reader/object_cache.rb +1 -0
  43. data/lib/pdf/reader/object_hash.rb +91 -37
  44. data/lib/pdf/reader/object_stream.rb +1 -0
  45. data/lib/pdf/reader/orientation_detector.rb +5 -4
  46. data/lib/pdf/reader/overlapping_runs_filter.rb +65 -0
  47. data/lib/pdf/reader/page.rb +30 -1
  48. data/lib/pdf/reader/page_layout.rb +19 -24
  49. data/lib/pdf/reader/page_state.rb +8 -5
  50. data/lib/pdf/reader/page_text_receiver.rb +23 -1
  51. data/lib/pdf/reader/pages_strategy.rb +2 -304
  52. data/lib/pdf/reader/parser.rb +10 -7
  53. data/lib/pdf/reader/print_receiver.rb +1 -0
  54. data/lib/pdf/reader/reference.rb +1 -0
  55. data/lib/pdf/reader/register_receiver.rb +1 -0
  56. data/lib/pdf/reader/resource_methods.rb +1 -0
  57. data/lib/pdf/reader/standard_security_handler.rb +80 -42
  58. data/lib/pdf/reader/standard_security_handler_v5.rb +91 -0
  59. data/lib/pdf/reader/stream.rb +1 -0
  60. data/lib/pdf/reader/synchronized_cache.rb +1 -0
  61. data/lib/pdf/reader/text_run.rb +28 -9
  62. data/lib/pdf/reader/token.rb +1 -0
  63. data/lib/pdf/reader/transformation_matrix.rb +1 -0
  64. data/lib/pdf/reader/unimplemented_security_handler.rb +17 -0
  65. data/lib/pdf/reader/width_calculator/built_in.rb +25 -16
  66. data/lib/pdf/reader/width_calculator/composite.rb +1 -0
  67. data/lib/pdf/reader/width_calculator/true_type.rb +2 -2
  68. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +1 -0
  69. data/lib/pdf/reader/width_calculator/type_zero.rb +1 -0
  70. data/lib/pdf/reader/width_calculator.rb +1 -0
  71. data/lib/pdf/reader/xref.rb +11 -5
  72. data/lib/pdf/reader.rb +30 -119
  73. data/lib/pdf-reader.rb +1 -0
  74. metadata +35 -61
  75. data/bin/pdf_list_callbacks +0 -17
  76. data/lib/pdf/hash.rb +0 -19
  77. data/lib/pdf/reader/abstract_strategy.rb +0 -81
  78. data/lib/pdf/reader/metadata_strategy.rb +0 -56
  79. data/lib/pdf/reader/text_receiver.rb +0 -265
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  ################################################################################
4
5
  #
@@ -31,6 +32,17 @@ class PDF::Reader
31
32
  # extracting various useful information.
32
33
  #
33
34
  class CMap # :nodoc:
35
+ CMAP_KEYWORDS = {
36
+ "begincodespacerange" => 1,
37
+ "endcodespacerange" => 1,
38
+ "beginbfchar" => 1,
39
+ "endbfchar" => 1,
40
+ "beginbfrange" => 1,
41
+ "endbfrange" => 1,
42
+ "begin" => 1,
43
+ "begincmap" => 1,
44
+ "def" => 1
45
+ }
34
46
 
35
47
  attr_reader :map
36
48
 
@@ -40,24 +52,25 @@ class PDF::Reader
40
52
  end
41
53
 
42
54
  def process_data(data)
55
+ parser = build_parser(data)
43
56
  mode = nil
44
- instructions = ""
57
+ instructions = []
45
58
 
46
- data.each_line do |l|
47
- if l.include?("beginbfchar")
59
+ while token = parser.parse_token(CMAP_KEYWORDS)
60
+ if token == "beginbfchar"
48
61
  mode = :char
49
- elsif l.include?("endbfchar")
62
+ elsif token == "endbfchar"
50
63
  process_bfchar_instructions(instructions)
51
- instructions = ""
64
+ instructions = []
52
65
  mode = nil
53
- elsif l.include?("beginbfrange")
66
+ elsif token == "beginbfrange"
54
67
  mode = :range
55
- elsif l.include?("endbfrange")
68
+ elsif token == "endbfrange"
56
69
  process_bfrange_instructions(instructions)
57
- instructions = ""
70
+ instructions = []
58
71
  mode = nil
59
72
  elsif mode == :char || mode == :range
60
- instructions << l
73
+ instructions << token
61
74
  end
62
75
  end
63
76
  end
@@ -83,44 +96,46 @@ class PDF::Reader
83
96
  Parser.new(buffer)
84
97
  end
85
98
 
99
+ # The following includes some manual decoding of UTF-16BE strings into unicode codepoints. In
100
+ # theory we could replace all the UTF-16 code with something based on Ruby's encoding support:
101
+ #
102
+ # str.dup.force_encoding("utf-16be").encode!("utf-8").unpack("U*")
103
+ #
104
+ # However, some cmaps contain broken surrogate pairs and the ruby encoding support raises an
105
+ # exception when we try converting broken UTF-16 to UTF-8
106
+ #
86
107
  def str_to_int(str)
87
108
  return nil if str.nil? || str.size == 0
88
- unpacked_string = if str.size == 1 # UTF-8
109
+ unpacked_string = if str.bytesize == 1 # UTF-8
89
110
  str.unpack("C*")
90
111
  else # UTF-16
91
112
  str.unpack("n*")
92
113
  end
93
- if unpacked_string.size == 1
94
- unpacked_string
95
- elsif unpacked_string.size == 2 && unpacked_string[0] > 0xD800 && unpacked_string[0] < 0xDBFF
96
- # this is a Unicode UTF-16 "Surrogate Pair" see Unicode Spec. Chapter 3.7
97
- # lets convert to a UTF-32. (the high bit is between 0xD800-0xDBFF, the
98
- # low bit is between 0xDC00-0xDFFF) for example: U+1D44E (U+D835 U+DC4E)
99
- [(unpacked_string[0] - 0xD800) * 0x400 + (unpacked_string[1] - 0xDC00) + 0x10000]
100
- else
101
- # it is a bad idea to just return the first 16 bits, as this doesn't allow
102
- # for ligatures for example fi (U+0066 U+0069)
103
- unpacked_string
114
+ result = []
115
+ while unpacked_string.any? do
116
+ if unpacked_string.size >= 2 && unpacked_string[0] > 0xD800 && unpacked_string[0] < 0xDBFF
117
+ # this is a Unicode UTF-16 "Surrogate Pair" see Unicode Spec. Chapter 3.7
118
+ # lets convert to a UTF-32. (the high bit is between 0xD800-0xDBFF, the
119
+ # low bit is between 0xDC00-0xDFFF) for example: U+1D44E (U+D835 U+DC4E)
120
+ points = [unpacked_string.shift, unpacked_string.shift]
121
+ result << (points[0] - 0xD800) * 0x400 + (points[1] - 0xDC00) + 0x10000
122
+ else
123
+ result << unpacked_string.shift
124
+ end
104
125
  end
126
+ result
105
127
  end
106
128
 
107
129
  def process_bfchar_instructions(instructions)
108
- parser = build_parser(instructions)
109
- find = str_to_int(parser.parse_token)
110
- replace = str_to_int(parser.parse_token)
111
- while find && replace
112
- @map[find[0]] = replace
113
- find = str_to_int(parser.parse_token)
114
- replace = str_to_int(parser.parse_token)
130
+ instructions.each_slice(2) do |one, two|
131
+ find = str_to_int(one)
132
+ replace = str_to_int(two)
133
+ @map[find.first] = replace
115
134
  end
116
135
  end
117
136
 
118
137
  def process_bfrange_instructions(instructions)
119
- parser = build_parser(instructions)
120
- start = parser.parse_token
121
- finish = parser.parse_token
122
- to = parser.parse_token
123
- while start && finish && to
138
+ instructions.each_slice(3) do |start, finish, to|
124
139
  if start.kind_of?(String) && finish.kind_of?(String) && to.kind_of?(String)
125
140
  bfrange_type_one(start, finish, to)
126
141
  elsif start.kind_of?(String) && finish.kind_of?(String) && to.kind_of?(Array)
@@ -128,9 +143,6 @@ class PDF::Reader
128
143
  else
129
144
  raise "invalid bfrange section"
130
145
  end
131
- start = parser.parse_token
132
- finish = parser.parse_token
133
- to = parser.parse_token
134
146
  end
135
147
  end
136
148
 
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  ################################################################################
4
5
  #
@@ -39,20 +40,22 @@ class PDF::Reader
39
40
  @mapping = default_mapping # maps from character codes to Unicode codepoints
40
41
  @string_cache = {} # maps from character codes to UTF-8 strings.
41
42
 
42
- if enc.kind_of?(Hash)
43
- self.differences = enc[:Differences] if enc[:Differences]
44
- enc = enc[:Encoding] || enc[:BaseEncoding]
45
- elsif enc != nil
46
- enc = enc.to_sym
43
+ @enc_name = if enc.kind_of?(Hash)
44
+ enc[:Encoding] || enc[:BaseEncoding]
45
+ elsif enc && enc.respond_to?(:to_sym)
46
+ enc.to_sym
47
47
  else
48
- enc = nil
48
+ :StandardEncoding
49
49
  end
50
50
 
51
- @enc_name = enc
52
- @unpack = get_unpack(enc)
53
- @map_file = get_mapping_file(enc)
51
+ @unpack = get_unpack(@enc_name)
52
+ @map_file = get_mapping_file(@enc_name)
54
53
 
55
54
  load_mapping(@map_file) if @map_file
55
+
56
+ if enc.is_a?(Hash) && enc[:Differences]
57
+ self.differences = enc[:Differences]
58
+ end
56
59
  end
57
60
 
58
61
  # set the differences table for this encoding. should be an array in the following format:
@@ -147,7 +150,7 @@ class PDF::Reader
147
150
  ret = [
148
151
  @mapping[glyph_code.to_i] || glyph_code.to_i
149
152
  ].pack("U*")
150
- ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)
153
+ ret.force_encoding("UTF-8")
151
154
  ret
152
155
  end
153
156
 
@@ -158,13 +161,13 @@ class PDF::Reader
158
161
  def little_boxes(times)
159
162
  codepoints = [ PDF::Reader::Encoding::UNKNOWN_CHAR ] * times
160
163
  ret = codepoints.pack("U*")
161
- ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)
164
+ ret.force_encoding("UTF-8")
162
165
  ret
163
166
  end
164
167
 
165
168
  def convert_to_utf8(str)
166
169
  ret = str.unpack(unpack).map! { |c| @mapping[c] || c }.pack("U*")
167
- ret.force_encoding("UTF-8") if ret.respond_to?(:force_encoding)
170
+ ret.force_encoding("UTF-8")
168
171
  ret
169
172
  end
170
173
 
@@ -198,17 +201,12 @@ class PDF::Reader
198
201
  end
199
202
  end
200
203
 
201
- def has_mapping?
202
- @mapping.size > 0
203
- end
204
-
205
204
  def glyphlist
206
205
  @glyphlist ||= PDF::Reader::GlyphHash.new
207
206
  end
208
207
 
209
208
  def load_mapping(file)
210
- RUBY_VERSION >= "1.9" ? mode = "r:BINARY" : mode = "r"
211
- File.open(file, mode) do |f|
209
+ File.open(file, "r:BINARY") do |f|
212
210
  f.each do |l|
213
211
  _m, single_byte, unicode = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
214
212
  @mapping["0x#{single_byte}".hex] = "0x#{unicode}".hex if single_byte
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  ################################################################################
4
5
  #
@@ -52,6 +53,10 @@ class PDF::Reader
52
53
  # the PDF spec and cannot be recovered
53
54
  class MalformedPDFError < RuntimeError; end
54
55
 
56
+ ################################################################################
57
+ # an exception that is raised when an invalid page number is used
58
+ class InvalidPageError < ArgumentError; end
59
+
55
60
  ################################################################################
56
61
  # an exception that is raised when a PDF object appears to be invalid
57
62
  class InvalidObjectError < MalformedPDFError; end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  require 'ascii85'
4
5
 
@@ -1,4 +1,6 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
3
+
2
4
  #
3
5
  class PDF::Reader
4
6
  module Filter # :nodoc:
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  class PDF::Reader
4
5
  module Filter # :nodoc:
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
 
4
5
  require 'zlib'
@@ -7,6 +8,9 @@ class PDF::Reader
7
8
  module Filter # :nodoc:
8
9
  # implementation of the Flate (zlib) stream filter
9
10
  class Flate
11
+ ZLIB_AUTO_DETECT_ZLIB_OR_GZIP = 47 # Zlib::MAX_WBITS + 32
12
+ ZLIB_RAW_DEFLATE = -15 # Zlib::MAX_WBITS * -1
13
+
10
14
  def initialize(options = {})
11
15
  @options = options
12
16
  end
@@ -14,25 +18,34 @@ class PDF::Reader
14
18
  ################################################################################
15
19
  # Decode the specified data with the Zlib compression algorithm
16
20
  def filter(data)
17
- deflated = nil
21
+ deflated = zlib_inflate(data) || zlib_inflate(data[0, data.bytesize-1])
22
+
23
+ if deflated.nil?
24
+ raise MalformedPDFError,
25
+ "Error while inflating a compressed stream (no suitable inflation algorithm found)"
26
+ end
27
+ Depredict.new(@options).filter(deflated)
28
+ end
29
+
30
+ private
31
+
32
+ def zlib_inflate(data)
18
33
  begin
19
- deflated = Zlib::Inflate.new.inflate(data)
20
- rescue Zlib::DataError => e
34
+ return Zlib::Inflate.new(ZLIB_AUTO_DETECT_ZLIB_OR_GZIP).inflate(data)
35
+ rescue Zlib::DataError
21
36
  # by default, Ruby's Zlib assumes the data it's inflating
22
- # is RFC1951 deflated data, wrapped in a RFC1951 zlib container.
23
- # If that fails, then use an undocumented 'feature' to attempt to inflate
24
- # the data as a raw RFC1951 stream.
25
- #
26
- # See
27
- # - http://blade.nagaokaut.ac.jp/cgi-bin/scat.rb/ruby/ruby-talk/243545
28
- # - http://www.gzip.org/zlib/zlib_faq.html#faq38
29
- deflated = Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(data)
37
+ # is RFC1951 deflated data, wrapped in a RFC1950 zlib container. If that
38
+ # fails, swallow the exception and attempt to inflate the data as a raw
39
+ # RFC1951 stream.
30
40
  end
31
- Depredict.new(@options).filter(deflated)
32
- rescue Exception => e
33
- # Oops, there was a problem inflating the stream
34
- raise MalformedPDFError,
35
- "Error occured while inflating a compressed stream (#{e.class.to_s}: #{e.to_s})"
41
+
42
+ begin
43
+ return Zlib::Inflate.new(ZLIB_RAW_DEFLATE).inflate(data)
44
+ rescue StandardError
45
+ # swallow this one too, so we can try some other fallback options
46
+ end
47
+
48
+ nil
36
49
  end
37
50
  end
38
51
  end
@@ -1,4 +1,6 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
3
+
2
4
  #
3
5
  class PDF::Reader
4
6
  module Filter # :nodoc:
@@ -1,4 +1,6 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
3
+
2
4
  #
3
5
  class PDF::Reader
4
6
  module Filter # :nodoc:
@@ -1,4 +1,6 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
3
+
2
4
  #
3
5
  class PDF::Reader # :nodoc:
4
6
  module Filter # :nodoc:
@@ -12,14 +14,10 @@ class PDF::Reader # :nodoc:
12
14
  # Decode the specified data with the RunLengthDecode compression algorithm
13
15
  def filter(data)
14
16
  pos = 0
15
- out = ""
17
+ out = "".dup
16
18
 
17
19
  while pos < data.length
18
- if data.respond_to?(:getbyte)
19
- length = data.getbyte(pos)
20
- else
21
- length = data[pos]
22
- end
20
+ length = data.getbyte(pos)
23
21
  pos += 1
24
22
 
25
23
  case
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  ################################################################################
4
5
  #
@@ -46,6 +47,7 @@ class PDF::Reader
46
47
  when :CCITTFaxDecode then PDF::Reader::Filter::Null.new(options)
47
48
  when :DCTDecode then PDF::Reader::Filter::Null.new(options)
48
49
  when :FlateDecode then PDF::Reader::Filter::Flate.new(options)
50
+ when :Fl then PDF::Reader::Filter::Flate.new(options)
49
51
  when :JBIG2Decode then PDF::Reader::Filter::Null.new(options)
50
52
  when :JPXDecode then PDF::Reader::Filter::Null.new(options)
51
53
  when :LZWDecode then PDF::Reader::Filter::Lzw.new(options)
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  ################################################################################
4
5
  #
@@ -36,11 +37,7 @@ class PDF::Reader
36
37
  attr_reader :widths, :first_char, :last_char, :basefont, :font_descriptor,
37
38
  :cid_widths, :cid_default_width
38
39
 
39
- def initialize(ohash = nil, obj = nil)
40
- if ohash.nil? || obj.nil?
41
- $stderr.puts "DEPREACTION WARNING - PDF::Reader::Font.new should be called with 2 args"
42
- return
43
- end
40
+ def initialize(ohash, obj)
44
41
  @ohash = ohash
45
42
  @tounicode = nil
46
43
 
@@ -52,12 +49,6 @@ class PDF::Reader
52
49
  @encoding ||= PDF::Reader::Encoding.new(:StandardEncoding)
53
50
  end
54
51
 
55
- def basefont=(font)
56
- $stderr.puts "Font#basefont= is deprecated and will be removed in the 2.0 release"
57
- @encoding ||= default_encoding(font)
58
- @basefont = font
59
- end
60
-
61
52
  def to_utf8(params)
62
53
  if @tounicode
63
54
  to_utf8_via_cmap(params)
@@ -106,7 +97,13 @@ class PDF::Reader
106
97
  elsif @subtype == :Type3
107
98
  PDF::Reader::WidthCalculator::TypeOneOrThree.new(self)
108
99
  elsif @subtype == :TrueType
109
- PDF::Reader::WidthCalculator::TrueType.new(self)
100
+ if @font_descriptor
101
+ PDF::Reader::WidthCalculator::TrueType.new(self)
102
+ else
103
+ # A TrueType font that isn't embedded. Most readers look for a version on the
104
+ # local system and fallback to a substitute. For now, we go straight to a substitute
105
+ PDF::Reader::WidthCalculator::BuiltIn.new(self)
106
+ end
110
107
  elsif @subtype == :CIDFontType0 || @subtype == :CIDFontType2
111
108
  PDF::Reader::WidthCalculator::Composite.new(self)
112
109
  else
@@ -134,7 +131,9 @@ class PDF::Reader
134
131
  if obj[:ToUnicode]
135
132
  # ToUnicode is optional for Type1 and Type3
136
133
  stream = @ohash.object(obj[:ToUnicode])
137
- @tounicode = PDF::Reader::CMap.new(stream.unfiltered_data)
134
+ if stream.is_a?(PDF::Reader::Stream)
135
+ @tounicode = PDF::Reader::CMap.new(stream.unfiltered_data)
136
+ end
138
137
  end
139
138
  end
140
139
 
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  require 'ttfunk'
4
5
 
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  require 'digest/md5'
4
5
 
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  ################################################################################
4
5
  #
@@ -48,6 +49,9 @@ class PDF::Reader
48
49
  # h.name_to_unicode(:Euro)
49
50
  # => 8364
50
51
  #
52
+ # h.name_to_unicode(:X4A)
53
+ # => 74
54
+ #
51
55
  # h.name_to_unicode(:G30)
52
56
  # => 48
53
57
  #
@@ -62,6 +66,8 @@ class PDF::Reader
62
66
 
63
67
  if @by_name.has_key?(name)
64
68
  @by_name[name]
69
+ elsif str.match(/\AX[0-9a-fA-F]{2,4}\Z/)
70
+ "0x#{str[1,4]}".hex
65
71
  elsif str.match(/\Auni[A-F\d]{4}\Z/)
66
72
  "0x#{str[3,4]}".hex
67
73
  elsif str.match(/\Au[A-F\d]{4,6}\Z/)
@@ -102,8 +108,7 @@ class PDF::Reader
102
108
  keyed_by_name = {}
103
109
  keyed_by_codepoint = {}
104
110
 
105
- RUBY_VERSION >= "1.9" ? mode = "r:BINARY" : mode = "r"
106
- File.open(File.dirname(__FILE__) + "/glyphlist.txt", mode) do |f|
111
+ File.open(File.dirname(__FILE__) + "/glyphlist.txt", "r:BINARY") do |f|
107
112
  f.each do |l|
108
113
  _m, name, code = *l.match(/([0-9A-Za-z]+);([0-9A-F]{4})/)
109
114
  if name && code
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  module PDF
4
5
 
@@ -22,7 +23,7 @@ module PDF
22
23
 
23
24
  def initialize(data, bits_in_chunk)
24
25
  @data = data
25
- @data.force_encoding("BINARY") if @data.respond_to?(:force_encoding)
26
+ @data.force_encoding("BINARY")
26
27
  @bits_in_chunk = bits_in_chunk
27
28
  @current_pos = 0
28
29
  @bits_left_in_byte = 8
@@ -82,7 +83,7 @@ module PDF
82
83
  #
83
84
  def self.decode(data)
84
85
  stream = BitStream.new data.to_s, 9 # size of codes between 9 and 12 bits
85
- result = ''
86
+ result = "".dup
86
87
  until (code = stream.read) == CODE_EOD
87
88
  if code == CODE_CLEAR_TABLE
88
89
  stream.set_bits_in_chunk(9)
@@ -116,11 +117,10 @@ module PDF
116
117
  result
117
118
  end
118
119
 
119
- private
120
-
121
120
  def self.create_new_string(string_table,some_code, other_code)
122
121
  string_table[some_code] + string_table[other_code][0].chr
123
122
  end
123
+ private_class_method :create_new_string
124
124
 
125
125
  end
126
126
  end
@@ -0,0 +1,17 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ class PDF::Reader
5
+
6
+ # A null object security handler. Used when a PDF is unencrypted.
7
+ class NullSecurityHandler
8
+
9
+ def self.supports?(encrypt)
10
+ encrypt.nil?
11
+ end
12
+
13
+ def decrypt(buf, _ref)
14
+ buf
15
+ end
16
+ end
17
+ end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  require 'hashery/lru_hash'
4
5