pdf-reader 1.3.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +3 -0
- data/README.rdoc +3 -9
- data/lib/pdf/reader/page.rb +4 -0
- data/lib/pdf/reader/page_layout.rb +4 -2
- data/lib/pdf/reader/page_state.rb +14 -5
- data/lib/pdf/reader/page_text_receiver.rb +8 -9
- data/lib/pdf/reader/width_calculator/built_in.rb +8 -1
- metadata +2 -2
data/CHANGELOG
CHANGED
data/README.rdoc
CHANGED
@@ -77,17 +77,11 @@ of PDF::Reader::Page.
|
|
77
77
|
receiver = RedGreenBlue.new
|
78
78
|
page.walk(receiver)
|
79
79
|
|
80
|
-
For low level access to the objects in a PDF file, use the ObjectHash class
|
81
|
-
|
82
|
-
|
83
|
-
puts PDF::Reader::ObjectHash.new("somefile.pdf")
|
84
|
-
|
85
|
-
or via a PDF::Reader instance:
|
80
|
+
For low level access to the objects in a PDF file, use the ObjectHash class like
|
81
|
+
so:
|
86
82
|
|
87
83
|
reader = PDF::Reader.new("somefile.pdf")
|
88
|
-
puts reader.objects
|
89
|
-
|
90
|
-
The second method is preferred to increase the effectiveness of internal caching.
|
84
|
+
puts reader.objects.inspect
|
91
85
|
|
92
86
|
= Text Encoding
|
93
87
|
|
data/lib/pdf/reader/page.rb
CHANGED
@@ -61,6 +61,10 @@ module PDF
|
|
61
61
|
hash.merge!(@objects.deref(obj))
|
62
62
|
end
|
63
63
|
}
|
64
|
+
# This shouldn't be necesary, but some non compliant PDFs leave MediaBox
|
65
|
+
# out. Assuming 8.5" x 11" is what Acobat does, so we do it too.
|
66
|
+
@attributes[:MediaBox] ||= [0,0,612,792]
|
67
|
+
@attributes
|
64
68
|
end
|
65
69
|
|
66
70
|
# returns the plain text content of this page encoded as UTF-8. Any
|
@@ -9,6 +9,8 @@ class PDF::Reader
|
|
9
9
|
# page to be rendered as described by the page's MediaBox attribute
|
10
10
|
class PageLayout
|
11
11
|
def initialize(runs, mediabox)
|
12
|
+
raise ArgumentError, "a mediabox must be provided" if mediabox.nil?
|
13
|
+
|
12
14
|
@runs = merge_runs(runs)
|
13
15
|
@mean_font_size = mean(@runs.map(&:font_size)) || 0
|
14
16
|
@mean_glyph_width = mean(@runs.map(&:mean_character_width)) || 0
|
@@ -58,11 +60,11 @@ class PDF::Reader
|
|
58
60
|
end
|
59
61
|
|
60
62
|
def row_multiplier
|
61
|
-
@row_multiplier ||= @page_height / row_count
|
63
|
+
@row_multiplier ||= @page_height.to_f / row_count.to_f
|
62
64
|
end
|
63
65
|
|
64
66
|
def col_multiplier
|
65
|
-
@col_multiplier ||= @page_width / col_count
|
67
|
+
@col_multiplier ||= @page_width.to_f / col_count.to_f
|
66
68
|
end
|
67
69
|
|
68
70
|
def mean(collection)
|
@@ -63,7 +63,12 @@ class PDF::Reader
|
|
63
63
|
#
|
64
64
|
def concatenate_matrix(a, b, c, d, e, f)
|
65
65
|
if state[:ctm]
|
66
|
-
state[:ctm]
|
66
|
+
ctm = state[:ctm]
|
67
|
+
state[:ctm] = TransformationMatrix.new(a,b,c,d,e,f).multiply!(
|
68
|
+
ctm.a, ctm.b,
|
69
|
+
ctm.c, ctm.d,
|
70
|
+
ctm.e, ctm.f
|
71
|
+
)
|
67
72
|
else
|
68
73
|
state[:ctm] = TransformationMatrix.new(a,b,c,d,e,f)
|
69
74
|
end
|
@@ -102,7 +107,11 @@ class PDF::Reader
|
|
102
107
|
end
|
103
108
|
|
104
109
|
def font_size
|
105
|
-
@font_size ||=
|
110
|
+
@font_size ||= begin
|
111
|
+
_, zero = trm_transform(0,0)
|
112
|
+
_, one = trm_transform(1,1)
|
113
|
+
(zero - one).abs
|
114
|
+
end
|
106
115
|
end
|
107
116
|
|
108
117
|
def set_text_leading(leading)
|
@@ -324,7 +333,7 @@ class PDF::Reader
|
|
324
333
|
# ctm[0] here, but this gets my tests green and I'm out of
|
325
334
|
# ideas for now
|
326
335
|
# TODO: support ty > 0
|
327
|
-
if ctm.a == 1
|
336
|
+
if ctm.a == 1 || ctm.a == 0
|
328
337
|
@text_matrix.horizontal_displacement_multiply!(tx)
|
329
338
|
else
|
330
339
|
@text_matrix.horizontal_displacement_multiply!(tx/ctm.a)
|
@@ -341,8 +350,8 @@ class PDF::Reader
|
|
341
350
|
def text_rendering_matrix
|
342
351
|
@text_rendering_matrix ||= begin
|
343
352
|
state_matrix = TransformationMatrix.new(
|
344
|
-
|
345
|
-
0,
|
353
|
+
state[:text_font_size] * state[:h_scaling], 0,
|
354
|
+
0, state[:text_font_size],
|
346
355
|
0, state[:text_rise]
|
347
356
|
)
|
348
357
|
state_matrix.multiply!(
|
@@ -58,8 +58,12 @@ module PDF
|
|
58
58
|
end
|
59
59
|
|
60
60
|
def show_text_with_positioning(params) # TJ [(A) 120 (WA) 20 (Y)]
|
61
|
-
params.
|
62
|
-
|
61
|
+
params.each do |arg|
|
62
|
+
if arg.is_a?(String)
|
63
|
+
internal_show_text(arg)
|
64
|
+
else
|
65
|
+
@state.process_glyph_displacement(0, arg, false)
|
66
|
+
end
|
63
67
|
end
|
64
68
|
end
|
65
69
|
|
@@ -88,7 +92,7 @@ module PDF
|
|
88
92
|
|
89
93
|
private
|
90
94
|
|
91
|
-
def internal_show_text(string
|
95
|
+
def internal_show_text(string)
|
92
96
|
if @state.current_font.nil?
|
93
97
|
raise PDF::Reader::MalformedPDFError, "current font is invalid"
|
94
98
|
end
|
@@ -102,16 +106,11 @@ module PDF
|
|
102
106
|
# glyph will appear in the correct position
|
103
107
|
glyph_width = @state.current_font.glyph_width(glyph_code) / 1000.0
|
104
108
|
th = 1
|
105
|
-
if kerning != 0 && index == glyphs.size - 1
|
106
|
-
tj = kerning
|
107
|
-
else
|
108
|
-
tj = 0
|
109
|
-
end
|
110
109
|
scaled_glyph_width = glyph_width * @state.font_size * th
|
111
110
|
unless utf8_chars == SPACE
|
112
111
|
@characters << TextRun.new(newx, newy, scaled_glyph_width, @state.font_size, utf8_chars)
|
113
112
|
end
|
114
|
-
@state.process_glyph_displacement(glyph_width,
|
113
|
+
@state.process_glyph_displacement(glyph_width, 0, utf8_chars == SPACE)
|
115
114
|
end
|
116
115
|
end
|
117
116
|
|
@@ -42,7 +42,14 @@ class PDF::Reader
|
|
42
42
|
name = @font.encoding.int_to_name(code_point)
|
43
43
|
m = @metrics.metrics_for_name(name)
|
44
44
|
end
|
45
|
-
|
45
|
+
|
46
|
+
if m
|
47
|
+
m[:wx]
|
48
|
+
elsif @font.widths[code_point - 1]
|
49
|
+
@font.widths[code_point - 1]
|
50
|
+
else
|
51
|
+
raise ArgumentError, "Unknown glyph width for #{codepoint}"
|
52
|
+
end
|
46
53
|
end
|
47
54
|
|
48
55
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2013-02-12 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|