pdf-extract 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/analysis/sections.rb +3 -2
- data/lib/font_metrics.rb +8 -2
- data/lib/names.rb +1 -1
- data/lib/references/references.rb +1 -1
- data/lib/spatial.rb +8 -2
- metadata +2 -2
data/lib/analysis/sections.rb
CHANGED
@@ -133,9 +133,10 @@ module PdfExtract
|
|
133
133
|
# Score sections into categories based on their textual attributes.
|
134
134
|
ideals = {
|
135
135
|
:reference => {
|
136
|
-
:name_ratio => [0.2,
|
136
|
+
:name_ratio => [0.2, 2],
|
137
137
|
:letter_ratio => [0.25, 2],
|
138
|
-
:year_ratio => [0.05, 7]
|
138
|
+
:year_ratio => [0.05, 7],
|
139
|
+
:cap_ratio => [0.5, 5]
|
139
140
|
},
|
140
141
|
:body => {
|
141
142
|
:name_ratio => [0.03, 1],
|
data/lib/font_metrics.rb
CHANGED
@@ -26,6 +26,10 @@ module PdfExtract
|
|
26
26
|
attr_accessor :ascent, :descent, :bbox
|
27
27
|
|
28
28
|
def initialize font
|
29
|
+
@ascent = 0
|
30
|
+
@descent = 0
|
31
|
+
@bbox = [0, 0, 0, 0]
|
32
|
+
|
29
33
|
base_font = font.basefont.to_s
|
30
34
|
if @@base_fonts.key? base_font
|
31
35
|
@ascent = @@base_fonts[base_font][:Ascent]
|
@@ -41,8 +45,10 @@ module PdfExtract
|
|
41
45
|
@glyph_width_lookup = proc { |c| font.glyph_width c }
|
42
46
|
end
|
43
47
|
|
44
|
-
|
45
|
-
|
48
|
+
if not @bbox.nil?
|
49
|
+
@ascent = @bbox[3] if @ascent.nil? || @ascent.zero?
|
50
|
+
@descent = @bbox[1] if @descent.nil? || @descent.zero?
|
51
|
+
end
|
46
52
|
end
|
47
53
|
|
48
54
|
def glyph_width c
|
data/lib/names.rb
CHANGED
data/lib/spatial.rb
CHANGED
@@ -165,9 +165,15 @@ module PdfExtract
|
|
165
165
|
items.each do |item|
|
166
166
|
diff = (item[var_name] - ideals[name][var_name][0]).abs
|
167
167
|
if diff.zero?
|
168
|
-
|
168
|
+
scores << 1.0
|
169
|
+
else
|
170
|
+
s = 1.0 / diff
|
171
|
+
if not s.finite?
|
172
|
+
scores << 0.0
|
173
|
+
else
|
174
|
+
scores << s
|
175
|
+
end
|
169
176
|
end
|
170
|
-
scores << 1.0 / diff
|
171
177
|
end
|
172
178
|
|
173
179
|
score_max = scores.max
|