bm25f 0.1.0 → 0.2.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/bm25f.rb +2 -2
- data/test/test_bm25f.rb +23 -0
- metadata +20 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bc84b7827c64bd77694f548ac479113906f4a5173b50fab2c479079beec3bc41
|
4
|
+
data.tar.gz: 8adf6005477365e0b827e93b67ae8b4bbc99c85db73e1b274005ce043659d690
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 17f1f1e79d4fa265610c85dbc52ae949de236252d96796beca5a08c80c4035947b56dc04607ca00aa5e9e2eaac41cb9f5a645f78415c66c1b91aaa5a77c25e60
|
7
|
+
data.tar.gz: fbc526d3080a2672b41da06b78c7df363138bbc62df91bc8c065133ec2e807432c19ea108fdc14fb3bb2852973bfefdb18d0a0f98a4d4a7742eac23a1d7afdbb
|
data/lib/bm25f.rb
CHANGED
@@ -66,7 +66,7 @@ class BM25F
|
|
66
66
|
# @param documents [Hash] The documents.
|
67
67
|
# @return [Float] The average document length.
|
68
68
|
def calculate_average_document_length(documents)
|
69
|
-
total_length = documents.sum { |doc| doc.values.map
|
69
|
+
total_length = documents.sum { |doc| doc.values.map { |v| v.nil? ? 0 : v.length }.sum }
|
70
70
|
total_length / documents.length.to_f
|
71
71
|
end
|
72
72
|
|
@@ -77,7 +77,7 @@ class BM25F
|
|
77
77
|
def calculate_document_lengths(documents)
|
78
78
|
doc_lengths = {}
|
79
79
|
documents.each_with_index do |doc, i|
|
80
|
-
doc_lengths[i] = doc.transform_values
|
80
|
+
doc_lengths[i] = doc.transform_values { |v| v.nil ? 0 : v.length }
|
81
81
|
end
|
82
82
|
doc_lengths
|
83
83
|
end
|
data/test/test_bm25f.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'minitest/autorun'
|
2
|
+
require 'bm25f'
|
3
|
+
|
4
|
+
class BM25FTest < Minitest::Test
|
5
|
+
def setup
|
6
|
+
@bm25f = BM25F.new
|
7
|
+
@documents = [
|
8
|
+
{ title: 'hello world', content: 'foo bar baz' },
|
9
|
+
{ title: 'foo bar', content: 'goodbye, world!' }
|
10
|
+
]
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_score
|
14
|
+
@bm25f.fit @documents
|
15
|
+
scores = @bm25f.score 'hello world foo bar baz'
|
16
|
+
|
17
|
+
# Sort
|
18
|
+
scores = scores.to_a.sort_by { |_, v| v.to_i }
|
19
|
+
|
20
|
+
# Checks if the most matching element is the first element
|
21
|
+
assert scores.last[0].zero?
|
22
|
+
end
|
23
|
+
end
|
metadata
CHANGED
@@ -1,29 +1,43 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bm25f
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- catflip
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-09-
|
11
|
+
date: 2023-09-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: treat
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: '2.1'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '2.1'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: 10.4.2
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: 10.4.2
|
27
41
|
description: A fast implementation of the BM25F ranking algorithm for information
|
28
42
|
retrieval systems, written in Ruby.
|
29
43
|
email:
|
@@ -32,6 +46,7 @@ extensions: []
|
|
32
46
|
extra_rdoc_files: []
|
33
47
|
files:
|
34
48
|
- lib/bm25f.rb
|
49
|
+
- test/test_bm25f.rb
|
35
50
|
homepage: https://github.com/catflip/bm25f-ruby
|
36
51
|
licenses:
|
37
52
|
- AGPL-3.0
|
@@ -53,7 +68,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
53
68
|
- !ruby/object:Gem::Version
|
54
69
|
version: '0'
|
55
70
|
requirements: []
|
56
|
-
rubygems_version: 3.
|
71
|
+
rubygems_version: 3.4.19
|
57
72
|
signing_key:
|
58
73
|
specification_version: 4
|
59
74
|
summary: BM25F ranking function in Ruby.
|