pdf-extract 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -133,9 +133,10 @@ module PdfExtract
133
133
  # Score sections into categories based on their textual attributes.
134
134
  ideals = {
135
135
  :reference => {
136
- :name_ratio => [0.2, 5],
136
+ :name_ratio => [0.2, 2],
137
137
  :letter_ratio => [0.25, 2],
138
- :year_ratio => [0.05, 7]
138
+ :year_ratio => [0.05, 7],
139
+ :cap_ratio => [0.5, 5]
139
140
  },
140
141
  :body => {
141
142
  :name_ratio => [0.03, 1],
data/lib/font_metrics.rb CHANGED
@@ -26,6 +26,10 @@ module PdfExtract
26
26
  attr_accessor :ascent, :descent, :bbox
27
27
 
28
28
  def initialize font
29
+ @ascent = 0
30
+ @descent = 0
31
+ @bbox = [0, 0, 0, 0]
32
+
29
33
  base_font = font.basefont.to_s
30
34
  if @@base_fonts.key? base_font
31
35
  @ascent = @@base_fonts[base_font][:Ascent]
@@ -41,8 +45,10 @@ module PdfExtract
41
45
  @glyph_width_lookup = proc { |c| font.glyph_width c }
42
46
  end
43
47
 
44
- @ascent = @bbox[3] if @ascent.zero?
45
- @descent = @bbox[1] if @descent.zero?
48
+ if not @bbox.nil?
49
+ @ascent = @bbox[3] if @ascent.nil? || @ascent.zero?
50
+ @descent = @bbox[1] if @descent.nil? || @descent.zero?
51
+ end
46
52
  end
47
53
 
48
54
  def glyph_width c
data/lib/names.rb CHANGED
@@ -7,7 +7,7 @@ require_relative "pdf-extract"
7
7
  module PdfExtract::Names
8
8
 
9
9
  class NamesDatabase
10
- @@ambiguous_weighting = 0.1
10
+ @@ambiguous_weighting = 0.0
11
11
  @@unambiguous_weighting = 1.0
12
12
 
13
13
  def self.path_to_data data_filename
@@ -3,7 +3,7 @@ require_relative "../spatial"
3
3
  module PdfExtract
4
4
  module References
5
5
 
6
- Settings.default :min_score, 6.4
6
+ Settings.default :min_score, 8.0
7
7
  Settings.default :min_sequence_count, 3
8
8
  Settings.default :max_reference_order, 1000
9
9
 
data/lib/spatial.rb CHANGED
@@ -165,9 +165,15 @@ module PdfExtract
165
165
  items.each do |item|
166
166
  diff = (item[var_name] - ideals[name][var_name][0]).abs
167
167
  if diff.zero?
168
- diff = Float::MIN
168
+ scores << 1.0
169
+ else
170
+ s = 1.0 / diff
171
+ if not s.finite?
172
+ scores << 0.0
173
+ else
174
+ scores << s
175
+ end
169
176
  end
170
- scores << 1.0 / diff
171
177
  end
172
178
 
173
179
  score_max = scores.max
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 0
7
7
  - 0
8
- - 3
9
- version: 0.0.3
8
+ - 4
9
+ version: 0.0.4
10
10
  platform: ruby
11
11
  authors:
12
12
  - Karl Jonathan Ward