pdf-reader 1.3.0 → 1.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +3 -0
- data/README.rdoc +3 -9
- data/lib/pdf/reader/page.rb +4 -0
- data/lib/pdf/reader/page_layout.rb +4 -2
- data/lib/pdf/reader/page_state.rb +14 -5
- data/lib/pdf/reader/page_text_receiver.rb +8 -9
- data/lib/pdf/reader/width_calculator/built_in.rb +8 -1
- metadata +2 -2
data/CHANGELOG
CHANGED
data/README.rdoc
CHANGED
@@ -77,17 +77,11 @@ of PDF::Reader::Page.
|
|
77
77
|
receiver = RedGreenBlue.new
|
78
78
|
page.walk(receiver)
|
79
79
|
|
80
|
-
For low level access to the objects in a PDF file, use the ObjectHash class
|
81
|
-
|
82
|
-
|
83
|
-
puts PDF::Reader::ObjectHash.new("somefile.pdf")
|
84
|
-
|
85
|
-
or via a PDF::Reader instance:
|
80
|
+
For low level access to the objects in a PDF file, use the ObjectHash class like
|
81
|
+
so:
|
86
82
|
|
87
83
|
reader = PDF::Reader.new("somefile.pdf")
|
88
|
-
puts reader.objects
|
89
|
-
|
90
|
-
The second method is preferred to increase the effectiveness of internal caching.
|
84
|
+
puts reader.objects.inspect
|
91
85
|
|
92
86
|
= Text Encoding
|
93
87
|
|
data/lib/pdf/reader/page.rb
CHANGED
@@ -61,6 +61,10 @@ module PDF
|
|
61
61
|
hash.merge!(@objects.deref(obj))
|
62
62
|
end
|
63
63
|
}
|
64
|
+
# This shouldn't be necesary, but some non compliant PDFs leave MediaBox
|
65
|
+
# out. Assuming 8.5" x 11" is what Acobat does, so we do it too.
|
66
|
+
@attributes[:MediaBox] ||= [0,0,612,792]
|
67
|
+
@attributes
|
64
68
|
end
|
65
69
|
|
66
70
|
# returns the plain text content of this page encoded as UTF-8. Any
|
@@ -9,6 +9,8 @@ class PDF::Reader
|
|
9
9
|
# page to be rendered as described by the page's MediaBox attribute
|
10
10
|
class PageLayout
|
11
11
|
def initialize(runs, mediabox)
|
12
|
+
raise ArgumentError, "a mediabox must be provided" if mediabox.nil?
|
13
|
+
|
12
14
|
@runs = merge_runs(runs)
|
13
15
|
@mean_font_size = mean(@runs.map(&:font_size)) || 0
|
14
16
|
@mean_glyph_width = mean(@runs.map(&:mean_character_width)) || 0
|
@@ -58,11 +60,11 @@ class PDF::Reader
|
|
58
60
|
end
|
59
61
|
|
60
62
|
def row_multiplier
|
61
|
-
@row_multiplier ||= @page_height / row_count
|
63
|
+
@row_multiplier ||= @page_height.to_f / row_count.to_f
|
62
64
|
end
|
63
65
|
|
64
66
|
def col_multiplier
|
65
|
-
@col_multiplier ||= @page_width / col_count
|
67
|
+
@col_multiplier ||= @page_width.to_f / col_count.to_f
|
66
68
|
end
|
67
69
|
|
68
70
|
def mean(collection)
|
@@ -63,7 +63,12 @@ class PDF::Reader
|
|
63
63
|
#
|
64
64
|
def concatenate_matrix(a, b, c, d, e, f)
|
65
65
|
if state[:ctm]
|
66
|
-
state[:ctm]
|
66
|
+
ctm = state[:ctm]
|
67
|
+
state[:ctm] = TransformationMatrix.new(a,b,c,d,e,f).multiply!(
|
68
|
+
ctm.a, ctm.b,
|
69
|
+
ctm.c, ctm.d,
|
70
|
+
ctm.e, ctm.f
|
71
|
+
)
|
67
72
|
else
|
68
73
|
state[:ctm] = TransformationMatrix.new(a,b,c,d,e,f)
|
69
74
|
end
|
@@ -102,7 +107,11 @@ class PDF::Reader
|
|
102
107
|
end
|
103
108
|
|
104
109
|
def font_size
|
105
|
-
@font_size ||=
|
110
|
+
@font_size ||= begin
|
111
|
+
_, zero = trm_transform(0,0)
|
112
|
+
_, one = trm_transform(1,1)
|
113
|
+
(zero - one).abs
|
114
|
+
end
|
106
115
|
end
|
107
116
|
|
108
117
|
def set_text_leading(leading)
|
@@ -324,7 +333,7 @@ class PDF::Reader
|
|
324
333
|
# ctm[0] here, but this gets my tests green and I'm out of
|
325
334
|
# ideas for now
|
326
335
|
# TODO: support ty > 0
|
327
|
-
if ctm.a == 1
|
336
|
+
if ctm.a == 1 || ctm.a == 0
|
328
337
|
@text_matrix.horizontal_displacement_multiply!(tx)
|
329
338
|
else
|
330
339
|
@text_matrix.horizontal_displacement_multiply!(tx/ctm.a)
|
@@ -341,8 +350,8 @@ class PDF::Reader
|
|
341
350
|
def text_rendering_matrix
|
342
351
|
@text_rendering_matrix ||= begin
|
343
352
|
state_matrix = TransformationMatrix.new(
|
344
|
-
|
345
|
-
0,
|
353
|
+
state[:text_font_size] * state[:h_scaling], 0,
|
354
|
+
0, state[:text_font_size],
|
346
355
|
0, state[:text_rise]
|
347
356
|
)
|
348
357
|
state_matrix.multiply!(
|
@@ -58,8 +58,12 @@ module PDF
|
|
58
58
|
end
|
59
59
|
|
60
60
|
def show_text_with_positioning(params) # TJ [(A) 120 (WA) 20 (Y)]
|
61
|
-
params.
|
62
|
-
|
61
|
+
params.each do |arg|
|
62
|
+
if arg.is_a?(String)
|
63
|
+
internal_show_text(arg)
|
64
|
+
else
|
65
|
+
@state.process_glyph_displacement(0, arg, false)
|
66
|
+
end
|
63
67
|
end
|
64
68
|
end
|
65
69
|
|
@@ -88,7 +92,7 @@ module PDF
|
|
88
92
|
|
89
93
|
private
|
90
94
|
|
91
|
-
def internal_show_text(string
|
95
|
+
def internal_show_text(string)
|
92
96
|
if @state.current_font.nil?
|
93
97
|
raise PDF::Reader::MalformedPDFError, "current font is invalid"
|
94
98
|
end
|
@@ -102,16 +106,11 @@ module PDF
|
|
102
106
|
# glyph will appear in the correct position
|
103
107
|
glyph_width = @state.current_font.glyph_width(glyph_code) / 1000.0
|
104
108
|
th = 1
|
105
|
-
if kerning != 0 && index == glyphs.size - 1
|
106
|
-
tj = kerning
|
107
|
-
else
|
108
|
-
tj = 0
|
109
|
-
end
|
110
109
|
scaled_glyph_width = glyph_width * @state.font_size * th
|
111
110
|
unless utf8_chars == SPACE
|
112
111
|
@characters << TextRun.new(newx, newy, scaled_glyph_width, @state.font_size, utf8_chars)
|
113
112
|
end
|
114
|
-
@state.process_glyph_displacement(glyph_width,
|
113
|
+
@state.process_glyph_displacement(glyph_width, 0, utf8_chars == SPACE)
|
115
114
|
end
|
116
115
|
end
|
117
116
|
|
@@ -42,7 +42,14 @@ class PDF::Reader
|
|
42
42
|
name = @font.encoding.int_to_name(code_point)
|
43
43
|
m = @metrics.metrics_for_name(name)
|
44
44
|
end
|
45
|
-
|
45
|
+
|
46
|
+
if m
|
47
|
+
m[:wx]
|
48
|
+
elsif @font.widths[code_point - 1]
|
49
|
+
@font.widths[code_point - 1]
|
50
|
+
else
|
51
|
+
raise ArgumentError, "Unknown glyph width for #{codepoint}"
|
52
|
+
end
|
46
53
|
end
|
47
54
|
|
48
55
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.1
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2013-02-12 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|