bm25f 0.2.3 → 0.2.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/lib/bm25f.rb +13 -4
  3. data/test/test_bm25f.rb +1 -1
  4. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b00f60fdde35d0565f169e9486d2935e5ddcef9bbd8084d60d9228364c9ebed7
4
- data.tar.gz: 7249983dda5a101fb1b2ddb646414b58776ae9a29feb4f563707e9a654d45094
3
+ metadata.gz: 7c7af39f073c1fc55d42199f9c882caa5adf672d5c94c715fe379c58cd4a4a19
4
+ data.tar.gz: 1ebb197c89da4b70780d3631658a1bb3d0ce5c244abe8b5875c41ee1632f63a3
5
5
  SHA512:
6
- metadata.gz: 16231fa9ba99f19cff2a6e9e801b734b53effa584da0bc8fcc80000774bbe61c75f1994bfbdc39b3cdac8d427b5049ff56dd1dbcc669eed3b836d910e45f5547
7
- data.tar.gz: eea02e6dc1989ff69baa6097997da274149083a87cbbc7355101e613103f09fa52886718422cf68b9cb9afadb2e4ed9ce54be8117cba536a99647db01acb24f0
6
+ metadata.gz: 816270030afd15716f8b89f4c74d5c11dbc32f169c7a10a1eca931782e21e320017aa95bab66e32c6b500d407922e289edce8db396c956a62c245871744bb79b
7
+ data.tar.gz: 0ef709327eaa9c5b14e1132bafc199c4d66759b509c85d8bf3dcb23ba5b8dbe526b698c0ab1051486741fa3a3d4d21d073f8d4e3d12ced9128ec16fedd6a39e4
data/lib/bm25f.rb CHANGED
@@ -88,7 +88,7 @@ class BM25F
88
88
  def calculate_idf
89
89
  idf = {}
90
90
  @field_weights.each_key do |field|
91
- field_doc_count = @documents.count { |doc| !doc[field].empty? }
91
+ field_doc_count = @documents.count { |doc| !doc[field]&.empty? }
92
92
  idf[field] = Math.log((@total_docs - field_doc_count + 0.5) / (field_doc_count + 0.5) + 1.0)
93
93
  end
94
94
  idf
@@ -114,7 +114,10 @@ class BM25F
114
114
  tf = field_term_frequency(field, term, doc_id)
115
115
  idf = @idf[field]
116
116
  field_length_norm = field_length_norm(field, doc_id)
117
- doc_score += @field_weights[field] * ((tf * (@term_freq_weight + 1)) / (tf + @term_freq_weight * field_length_norm) * idf)
117
+ val = @field_weights[field] * ((tf * (@term_freq_weight + 1)) / (tf + @term_freq_weight * field_length_norm) * idf)
118
+ val = 0 if val.nan?
119
+
120
+ doc_score += val
118
121
  end
119
122
  end
120
123
  doc_score
@@ -127,7 +130,10 @@ class BM25F
127
130
  # @param doc_id [Integer] The document ID.
128
131
  # @return [Integer] The term frequency.
129
132
  def field_term_frequency(field, term, doc_id)
130
- @documents[doc_id][field].scan(term).count
133
+ val = @documents[doc_id][field]
134
+ return 0 if val.nil?
135
+
136
+ val.scan(term).count
131
137
  end
132
138
 
133
139
  # Calculates the field length normalization factor of a document.
@@ -136,6 +142,9 @@ class BM25F
136
142
  # @param doc_id [Integer] The document ID.
137
143
  # @return [Float] The field length normalization factor.
138
144
  def field_length_norm(field, doc_id)
139
- 1.0 - @doc_length_weight + @doc_length_weight * (@doc_lengths[doc_id][field] / @avg_doc_length)
145
+ val = @doc_lengths[doc_id][field]
146
+ return 0 if val.nil?
147
+
148
+ 1.0 - @doc_length_weight + @doc_length_weight * (val / @avg_doc_length)
140
149
  end
141
150
  end
data/test/test_bm25f.rb CHANGED
@@ -6,7 +6,7 @@ class BM25FTest < Minitest::Test
6
6
  @bm25f = BM25F.new
7
7
  @documents = [
8
8
  { title: 'hello world', content: 'foo bar baz' },
9
- { title: 'foo bar', content: 'goodbye, world!' }
9
+ { title: 'foo bar', content: 'goodbye, world!', test: nil }
10
10
  ]
11
11
  end
12
12
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bm25f
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - catflip