pdf-reader 2.4.2 → 2.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +44 -0
  3. data/README.md +16 -1
  4. data/Rakefile +1 -1
  5. data/examples/extract_fonts.rb +12 -7
  6. data/examples/rspec.rb +1 -0
  7. data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
  8. data/lib/pdf/reader/buffer.rb +63 -21
  9. data/lib/pdf/reader/cid_widths.rb +1 -0
  10. data/lib/pdf/reader/cmap.rb +5 -3
  11. data/lib/pdf/reader/encoding.rb +3 -2
  12. data/lib/pdf/reader/error.rb +11 -3
  13. data/lib/pdf/reader/filter/ascii85.rb +7 -1
  14. data/lib/pdf/reader/filter/ascii_hex.rb +6 -1
  15. data/lib/pdf/reader/filter/depredict.rb +10 -8
  16. data/lib/pdf/reader/filter/flate.rb +4 -2
  17. data/lib/pdf/reader/filter/lzw.rb +2 -0
  18. data/lib/pdf/reader/filter/null.rb +1 -0
  19. data/lib/pdf/reader/filter/run_length.rb +19 -13
  20. data/lib/pdf/reader/filter.rb +1 -0
  21. data/lib/pdf/reader/font.rb +44 -0
  22. data/lib/pdf/reader/font_descriptor.rb +1 -0
  23. data/lib/pdf/reader/form_xobject.rb +1 -0
  24. data/lib/pdf/reader/glyph_hash.rb +16 -9
  25. data/lib/pdf/reader/glyphlist-zapfdingbats.txt +245 -0
  26. data/lib/pdf/reader/lzw.rb +4 -2
  27. data/lib/pdf/reader/null_security_handler.rb +1 -0
  28. data/lib/pdf/reader/object_cache.rb +1 -0
  29. data/lib/pdf/reader/object_hash.rb +8 -3
  30. data/lib/pdf/reader/object_stream.rb +1 -0
  31. data/lib/pdf/reader/overlapping_runs_filter.rb +11 -4
  32. data/lib/pdf/reader/page.rb +73 -11
  33. data/lib/pdf/reader/page_layout.rb +37 -37
  34. data/lib/pdf/reader/page_state.rb +18 -23
  35. data/lib/pdf/reader/page_text_receiver.rb +68 -6
  36. data/lib/pdf/reader/pages_strategy.rb +1 -0
  37. data/lib/pdf/reader/parser.rb +15 -7
  38. data/lib/pdf/reader/point.rb +25 -0
  39. data/lib/pdf/reader/print_receiver.rb +1 -0
  40. data/lib/pdf/reader/rectangle.rb +113 -0
  41. data/lib/pdf/reader/reference.rb +1 -0
  42. data/lib/pdf/reader/register_receiver.rb +1 -0
  43. data/lib/pdf/reader/resource_methods.rb +5 -0
  44. data/lib/pdf/reader/standard_security_handler.rb +1 -0
  45. data/lib/pdf/reader/standard_security_handler_v5.rb +1 -0
  46. data/lib/pdf/reader/stream.rb +1 -0
  47. data/lib/pdf/reader/synchronized_cache.rb +1 -0
  48. data/lib/pdf/reader/text_run.rb +14 -6
  49. data/lib/pdf/reader/token.rb +1 -0
  50. data/lib/pdf/reader/transformation_matrix.rb +1 -0
  51. data/lib/pdf/reader/unimplemented_security_handler.rb +1 -0
  52. data/lib/pdf/reader/width_calculator/built_in.rb +8 -15
  53. data/lib/pdf/reader/width_calculator/composite.rb +1 -0
  54. data/lib/pdf/reader/width_calculator/true_type.rb +1 -0
  55. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +1 -0
  56. data/lib/pdf/reader/width_calculator/type_zero.rb +1 -0
  57. data/lib/pdf/reader/width_calculator.rb +1 -0
  58. data/lib/pdf/reader/xref.rb +7 -1
  59. data/lib/pdf/reader/zero_width_runs_filter.rb +13 -0
  60. data/lib/pdf/reader.rb +29 -6
  61. data/lib/pdf-reader.rb +1 -0
  62. data/rbi/pdf-reader.rbi +1763 -0
  63. metadata +12 -7
  64. data/lib/pdf/reader/orientation_detector.rb +0 -34
@@ -1,7 +1,9 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  require 'pdf/reader/overlapping_runs_filter'
6
+ require 'pdf/reader/zero_width_runs_filter'
5
7
 
6
8
  class PDF::Reader
7
9
 
@@ -15,14 +17,15 @@ class PDF::Reader
15
17
  DEFAULT_FONT_SIZE = 12
16
18
 
17
19
  def initialize(runs, mediabox)
18
- raise ArgumentError, "a mediabox must be provided" if mediabox.nil?
20
+ # mediabox is a 4-element array for now, but it'd be nice to switch to a
21
+ # PDF::Reader::Rectangle at some point
22
+ PDF::Reader::Error.validate_not_nil(mediabox, "mediabox")
19
23
 
20
- @runs = merge_runs(OverlappingRunsFilter.exclude_redundant_runs(runs))
24
+ @mediabox = process_mediabox(mediabox)
25
+ @runs = runs
21
26
  @mean_font_size = mean(@runs.map(&:font_size)) || DEFAULT_FONT_SIZE
22
27
  @mean_font_size = DEFAULT_FONT_SIZE if @mean_font_size == 0
23
- @mean_glyph_width = mean(@runs.map(&:mean_character_width)) || 0
24
- @page_width = (mediabox[2] - mediabox[0]).abs
25
- @page_height = (mediabox[3] - mediabox[1]).abs
28
+ @median_glyph_width = median(@runs.map(&:mean_character_width)) || 0
26
29
  @x_offset = @runs.map(&:x).sort.first || 0
27
30
  lowest_y = @runs.map(&:y).sort.first || 0
28
31
  @y_offset = lowest_y > 0 ? 0 : lowest_y
@@ -30,6 +33,7 @@ class PDF::Reader
30
33
 
31
34
  def to_s
32
35
  return "" if @runs.empty?
36
+ return "" if row_count == 0
33
37
 
34
38
  page = row_count.times.map { |i| " " * col_count }
35
39
  @runs.each do |run|
@@ -44,6 +48,14 @@ class PDF::Reader
44
48
 
45
49
  private
46
50
 
51
+ def page_width
52
+ @mediabox.width
53
+ end
54
+
55
+ def page_height
56
+ @mediabox.height
57
+ end
58
+
47
59
  # given an array of strings, return a new array with empty rows from the
48
60
  # beginning and end removed.
49
61
  #
@@ -62,19 +74,19 @@ class PDF::Reader
62
74
  end
63
75
 
64
76
  def row_count
65
- @row_count ||= (@page_height / @mean_font_size).floor
77
+ @row_count ||= (page_height / @mean_font_size).floor
66
78
  end
67
79
 
68
80
  def col_count
69
- @col_count ||= ((@page_width / @mean_glyph_width) * 1.05).floor
81
+ @col_count ||= ((page_width / @median_glyph_width) * 1.05).floor
70
82
  end
71
83
 
72
84
  def row_multiplier
73
- @row_multiplier ||= @page_height.to_f / row_count.to_f
85
+ @row_multiplier ||= page_height.to_f / row_count.to_f
74
86
  end
75
87
 
76
88
  def col_multiplier
77
- @col_multiplier ||= @page_width.to_f / col_count.to_f
89
+ @col_multiplier ||= page_width.to_f / col_count.to_f
78
90
  end
79
91
 
80
92
  def mean(collection)
@@ -85,40 +97,28 @@ class PDF::Reader
85
97
  end
86
98
  end
87
99
 
88
- def each_line(&block)
89
- @runs.sort.group_by { |run|
90
- run.y.to_i
91
- }.map { |y, collection|
92
- yield y, collection
93
- }
100
+ def median(collection)
101
+ if collection.size == 0
102
+ 0
103
+ else
104
+ collection.sort[(collection.size * 0.5).floor]
105
+ end
94
106
  end
95
107
 
96
- # take a collection of TextRun objects and merge any that are in close
97
- # proximity
98
- def merge_runs(runs)
99
- runs.group_by { |char|
100
- char.y.to_i
101
- }.map { |y, chars|
102
- group_chars_into_runs(chars.sort)
103
- }.flatten.sort
108
+ def local_string_insert(haystack, needle, index)
109
+ haystack[Range.new(index, index + needle.length - 1)] = String.new(needle)
104
110
  end
105
111
 
106
- def group_chars_into_runs(chars)
107
- runs = []
108
- while head = chars.shift
109
- if runs.empty?
110
- runs << head
111
- elsif runs.last.mergable?(head)
112
- runs[-1] = runs.last + head
113
- else
114
- runs << head
115
- end
112
+ def process_mediabox(mediabox)
113
+ if mediabox.is_a?(Array)
114
+ msg = "Passing the mediabox to PageLayout as an Array is deprecated," +
115
+ " please use a Rectangle instead"
116
+ $stderr.puts msg
117
+ PDF::Reader::Rectangle.from_array(mediabox)
118
+ else
119
+ mediabox
116
120
  end
117
- runs
118
121
  end
119
122
 
120
- def local_string_insert(haystack, needle, index)
121
- haystack[Range.new(index, index + needle.length - 1)] = String.new(needle)
122
- end
123
123
  end
124
124
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  require 'pdf/reader/transformation_matrix'
@@ -30,15 +31,7 @@ class PDF::Reader
30
31
  @xobject_stack = [page.xobjects]
31
32
  @cs_stack = [page.color_spaces]
32
33
  @stack = [DEFAULT_GRAPHICS_STATE.dup]
33
- if page.rotate == 0
34
- state[:ctm] = identity_matrix
35
- else
36
- rotate_cos = Math.cos(page.rotate * (Math::PI/180.0)).round(2)
37
- rotate_sin = Math.sin(page.rotate * (Math::PI/180.0)).round(2)
38
- state[:ctm] = TransformationMatrix.new(rotate_cos, rotate_sin,
39
- rotate_sin * -1, rotate_cos,
40
- 0, 0)
41
- end
34
+ state[:ctm] = identity_matrix
42
35
  end
43
36
 
44
37
  #####################################################
@@ -320,7 +313,7 @@ class PDF::Reader
320
313
  # may need to be added
321
314
  #
322
315
  def process_glyph_displacement(w0, tj, word_boundary)
323
- fs = font_size # font size
316
+ fs = state[:text_font_size]
324
317
  tc = state[:char_spacing]
325
318
  if word_boundary
326
319
  tw = state[:word_spacing]
@@ -330,22 +323,24 @@ class PDF::Reader
330
323
  th = state[:h_scaling]
331
324
  # optimise the common path to reduce Float allocations
332
325
  if th == 1 && tj == 0 && tc == 0 && tw == 0
333
- glyph_width = w0 * fs
334
- tx = glyph_width
326
+ tx = w0 * fs
327
+ elsif tj != 0
328
+ # don't apply spacing to TJ displacement
329
+ tx = (w0 - (tj/1000.0)) * fs * th
335
330
  else
336
- glyph_width = ((w0 - (tj/1000.0)) * fs) * th
337
- tx = glyph_width + ((tc + tw) * th)
331
+ # apply horizontal scaling to spacing values but not font size
332
+ tx = ((w0 * fs) + tc + tw) * th
338
333
  end
339
-
340
- # TODO: I'm pretty sure that tx shouldn't need to be divided by
341
- # ctm[0] here, but this gets my tests green and I'm out of
342
- # ideas for now
343
334
  # TODO: support ty > 0
344
- if ctm.a == 1 || ctm.a == 0
345
- @text_matrix.horizontal_displacement_multiply!(tx)
346
- else
347
- @text_matrix.horizontal_displacement_multiply!(tx/ctm.a)
348
- end
335
+ ty = 0
336
+ temp = TransformationMatrix.new(1, 0,
337
+ 0, 1,
338
+ tx, ty)
339
+ @text_matrix = temp.multiply!(
340
+ @text_matrix.a, @text_matrix.b,
341
+ @text_matrix.c, @text_matrix.d,
342
+ @text_matrix.e, @text_matrix.f
343
+ )
349
344
  @font_size = @text_rendering_matrix = nil # invalidate cached value
350
345
  end
351
346
 
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  require 'forwardable'
@@ -41,16 +42,37 @@ module PDF
41
42
  # starting a new page
42
43
  def page=(page)
43
44
  @state = PageState.new(page)
45
+ @page = page
44
46
  @content = []
45
47
  @characters = []
46
- @mediabox = page.objects.deref(page.attributes[:MediaBox])
47
- device_bl = @state.ctm_transform(@mediabox[0], @mediabox[1])
48
- device_tr = @state.ctm_transform(@mediabox[2], @mediabox[3])
49
- @device_mediabox = [ device_bl.first, device_bl.last, device_tr.first, device_tr.last]
50
48
  end
51
49
 
50
+ def runs(opts = {})
51
+ runs = @characters
52
+
53
+ if rect = opts.fetch(:rect, @page.rectangles[:CropBox])
54
+ runs = BoundingRectangleRunsFilter.runs_within_rect(runs, rect)
55
+ end
56
+
57
+ if opts.fetch(:skip_zero_width, true)
58
+ runs = ZeroWidthRunsFilter.exclude_zero_width_runs(runs)
59
+ end
60
+
61
+ if opts.fetch(:skip_overlapping, true)
62
+ runs = OverlappingRunsFilter.exclude_redundant_runs(runs)
63
+ end
64
+
65
+ if opts.fetch(:merge, true)
66
+ runs = merge_runs(runs)
67
+ end
68
+
69
+ runs
70
+ end
71
+
72
+ # deprecated
52
73
  def content
53
- PageLayout.new(@characters, @device_mediabox).to_s
74
+ mediabox = @page.rectangles[:MediaBox]
75
+ PageLayout.new(runs, mediabox).to_s
54
76
  end
55
77
 
56
78
  #####################################################
@@ -104,11 +126,13 @@ module PDF
104
126
  glyphs.each_with_index do |glyph_code, index|
105
127
  # paint the current glyph
106
128
  newx, newy = @state.trm_transform(0,0)
129
+ newx, newy = apply_rotation(newx, newy)
130
+
107
131
  utf8_chars = @state.current_font.to_utf8(glyph_code)
108
132
 
109
133
  # apply to glyph displacment for the current glyph so the next
110
134
  # glyph will appear in the correct position
111
- glyph_width = @state.current_font.glyph_width(glyph_code) / 1000.0
135
+ glyph_width = @state.current_font.glyph_width_in_text_space(glyph_code)
112
136
  th = 1
113
137
  scaled_glyph_width = glyph_width * @state.font_size * th
114
138
  unless utf8_chars == SPACE
@@ -118,6 +142,44 @@ module PDF
118
142
  end
119
143
  end
120
144
 
145
+ def apply_rotation(x, y)
146
+ if @page.rotate == 90
147
+ tmp = x
148
+ x = y
149
+ y = tmp * -1
150
+ elsif @page.rotate == 180
151
+ y *= -1
152
+ x *= -1
153
+ elsif @page.rotate == 270
154
+ tmp = y
155
+ y = x
156
+ x = tmp * -1
157
+ end
158
+ return x, y
159
+ end
160
+
161
+ # take a collection of TextRun objects and merge any that are in close
162
+ # proximity
163
+ def merge_runs(runs)
164
+ runs.group_by { |char|
165
+ char.y.to_i
166
+ }.map { |y, chars|
167
+ group_chars_into_runs(chars.sort)
168
+ }.flatten.sort
169
+ end
170
+
171
+ def group_chars_into_runs(chars)
172
+ chars.each_with_object([]) do |char, runs|
173
+ if runs.empty?
174
+ runs << char
175
+ elsif runs.last.mergable?(char)
176
+ runs[-1] = runs.last + char
177
+ else
178
+ runs << char
179
+ end
180
+ end
181
+ end
182
+
121
183
  end
122
184
  end
123
185
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -166,7 +167,9 @@ class PDF::Reader
166
167
 
167
168
  # add a missing digit if required, as required by the spec
168
169
  str << "0" unless str.size % 2 == 0
169
- str.scan(/../).map {|i| i.hex.chr}.join.force_encoding("binary")
170
+ str.chars.each_slice(2).map { |nibbles|
171
+ nibbles.join("").hex.chr
172
+ }.join.force_encoding("binary")
170
173
  end
171
174
  ################################################################################
172
175
  # Reads a PDF String from the buffer and converts it to a Ruby String
@@ -175,15 +178,18 @@ class PDF::Reader
175
178
  return "".dup.force_encoding("binary") if str == ")"
176
179
  Error.assert_equal(parse_token, ")")
177
180
 
178
- str.gsub!(/\\([nrtbf()\\\n]|\d{1,3})?|\r\n?|\n\r/m) do |match|
179
- MAPPING[match] || "".dup
181
+ str.gsub!(/\\(\r\n|[nrtbf()\\\n\r]|([0-7]{1,3}))?|\r\n?/m) do |match|
182
+ if $2.nil? # not octal digits
183
+ MAPPING[match] || "".dup
184
+ else # must be octal digits
185
+ ($2.oct & 0xff).chr # ignore high level overflow
186
+ end
180
187
  end
181
188
  str.force_encoding("binary")
182
189
  end
183
190
 
184
191
  MAPPING = {
185
192
  "\r" => "\n",
186
- "\n\r" => "\n",
187
193
  "\r\n" => "\n",
188
194
  "\\n" => "\n",
189
195
  "\\r" => "\r",
@@ -194,10 +200,9 @@ class PDF::Reader
194
200
  "\\)" => ")",
195
201
  "\\\\" => "\\",
196
202
  "\\\n" => "",
203
+ "\\\r" => "",
204
+ "\\\r\n" => "",
197
205
  }
198
- 0.upto(9) { |n| MAPPING["\\00"+n.to_s] = ("00"+n.to_s).oct.chr }
199
- 0.upto(99) { |n| MAPPING["\\0"+n.to_s] = ("0"+n.to_s).oct.chr }
200
- 0.upto(377) { |n| MAPPING["\\"+n.to_s] = n.to_s.oct.chr }
201
206
 
202
207
  ################################################################################
203
208
  # Decodes the contents of a PDF Stream and returns it as a Ruby String.
@@ -205,6 +210,9 @@ class PDF::Reader
205
210
  raise MalformedPDFError, "PDF malformed, missing stream length" unless dict.has_key?(:Length)
206
211
  if @objects
207
212
  length = @objects.deref(dict[:Length])
213
+ if dict[:Filter]
214
+ dict[:Filter] = @objects.deref(dict[:Filter])
215
+ end
208
216
  else
209
217
  length = dict[:Length] || 0
210
218
  end
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+ # typed: true
3
+ # frozen_string_literal: true
4
+
5
+ module PDF
6
+ class Reader
7
+
8
+ # PDFs are all about positioning content on a page, so there's lots of need to
9
+ # work with a set of X,Y coordinates.
10
+ #
11
+ class Point
12
+
13
+ attr_reader :x, :y
14
+
15
+ def initialize(x, y)
16
+ @x, @y = x, y
17
+ end
18
+
19
+ def ==(other)
20
+ other.respond_to?(:x) && other.respond_to?(:y) && x == other.x && y == other.y
21
+ end
22
+
23
+ end
24
+ end
25
+ end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  class PDF::Reader
@@ -0,0 +1,113 @@
1
+ # coding: utf-8
2
+ # typed: true
3
+ # frozen_string_literal: true
4
+
5
+ module PDF
6
+ class Reader
7
+
8
+ # PDFs represent rectangles all over the place. They're 4 element arrays, like this:
9
+ #
10
+ # [A, B, C, D]
11
+ #
12
+ # Four element arrays are yucky to work with though, so here's a class that's better.
13
+ # Initialize it with the 4 elements, and get utility functions (width, height, etc)
14
+ # for free.
15
+ #
16
+ # By convention the first two elements are x1, y1, the co-ords for the bottom left corner
17
+ # of the rectangle. The third and fourth elements are x2, y2, the co-ords for the top left
18
+ # corner of the rectangle. It's valid for the alternative corners to be used though, so
19
+ # we don't assume which is which.
20
+ #
21
+ class Rectangle
22
+
23
+ attr_reader :bottom_left, :bottom_right, :top_left, :top_right
24
+
25
+ def initialize(x1, y1, x2, y2)
26
+ set_corners(x1, y1, x2, y2)
27
+ end
28
+
29
+ def self.from_array(arr)
30
+ if arr.size != 4
31
+ raise ArgumentError, "Only 4-element Arrays can be converted to a Rectangle"
32
+ end
33
+
34
+ PDF::Reader::Rectangle.new(
35
+ arr[0].to_f,
36
+ arr[1].to_f,
37
+ arr[2].to_f,
38
+ arr[3].to_f,
39
+ )
40
+ end
41
+
42
+ def ==(other)
43
+ to_a == other.to_a
44
+ end
45
+
46
+ def height
47
+ top_right.y - bottom_right.y
48
+ end
49
+
50
+ def width
51
+ bottom_right.x - bottom_left.x
52
+ end
53
+
54
+ def contains?(point)
55
+ point.x >= bottom_left.x && point.x <= top_right.x &&
56
+ point.y >= bottom_left.y && point.y <= top_right.y
57
+ end
58
+
59
+ # A pdf-style 4-number array
60
+ def to_a
61
+ [
62
+ bottom_left.x,
63
+ bottom_left.y,
64
+ top_right.x,
65
+ top_right.y,
66
+ ]
67
+ end
68
+
69
+ def apply_rotation(degrees)
70
+ return if degrees != 90 && degrees != 180 && degrees != 270
71
+
72
+ if degrees == 90
73
+ new_x1 = bottom_left.x
74
+ new_y1 = bottom_left.y - width
75
+ new_x2 = bottom_left.x + height
76
+ new_y2 = bottom_left.y
77
+ elsif degrees == 180
78
+ new_x1 = bottom_left.x - width
79
+ new_y1 = bottom_left.y - height
80
+ new_x2 = bottom_left.x
81
+ new_y2 = bottom_left.y
82
+ elsif degrees == 270
83
+ new_x1 = bottom_left.x - height
84
+ new_y1 = bottom_left.y
85
+ new_x2 = bottom_left.x
86
+ new_y2 = bottom_left.y + width
87
+ end
88
+ set_corners(new_x1, new_y1, new_x2, new_y2)
89
+ end
90
+
91
+ private
92
+
93
+ def set_corners(x1, y1, x2, y2)
94
+ @bottom_left = PDF::Reader::Point.new(
95
+ [x1, x2].min,
96
+ [y1, y2].min,
97
+ )
98
+ @bottom_right = PDF::Reader::Point.new(
99
+ [x1, x2].max,
100
+ [y1, y2].min,
101
+ )
102
+ @top_left = PDF::Reader::Point.new(
103
+ [x1, x2].min,
104
+ [y1, y2].max,
105
+ )
106
+ @top_right = PDF::Reader::Point.new(
107
+ [x1, x2].max,
108
+ [y1, y2].max,
109
+ )
110
+ end
111
+ end
112
+ end
113
+ end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  # Copyright (C) 2010 James Healy (jimmy@deefa.com)
@@ -1,12 +1,17 @@
1
1
  # coding: utf-8
2
+ # typed: false
2
3
  # frozen_string_literal: true
3
4
 
5
+ # Setting this file to "typed: true" is difficult because it's a mixin that assumes some things
6
+ # are aavailable from the class, like @objects and resources. Sorbet doesn't know about them.
7
+
4
8
  module PDF
5
9
  class Reader
6
10
 
7
11
  # mixin for common methods in Page and FormXobjects
8
12
  #
9
13
  module ResourceMethods
14
+
10
15
  # Returns a Hash of color spaces that are available to this page
11
16
  #
12
17
  # NOTE: this method de-serialise objects from the underlying PDF
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  require 'digest'
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -1,4 +1,5 @@
1
1
  # encoding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  # utilities.rb : General-purpose utility classes which don't fit anywhere else
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  class PDF::Reader
@@ -6,15 +7,14 @@ class PDF::Reader
6
7
  class TextRun
7
8
  include Comparable
8
9
 
9
- attr_reader :x, :y, :width, :font_size, :text
10
+ attr_reader :origin, :width, :font_size, :text
10
11
 
11
12
  alias :to_s :text
12
13
 
13
14
  def initialize(x, y, width, font_size, text)
14
- @x = x
15
- @y = y
15
+ @origin = PDF::Reader::Point.new(x, y)
16
16
  @width = width
17
- @font_size = font_size.floor
17
+ @font_size = font_size
18
18
  @text = text
19
19
  end
20
20
 
@@ -34,12 +34,20 @@ class PDF::Reader
34
34
  end
35
35
  end
36
36
 
37
+ def x
38
+ @origin.x
39
+ end
40
+
41
+ def y
42
+ @origin.y
43
+ end
44
+
37
45
  def endx
38
- @endx ||= x + width
46
+ @endx ||= @origin.x + width
39
47
  end
40
48
 
41
49
  def endy
42
- @endy ||= y + font_size
50
+ @endy ||= @origin.y + font_size
43
51
  end
44
52
 
45
53
  def mean_character_width
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  class PDF::Reader
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  class PDF::Reader