bm25f 0.1.0 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/lib/bm25f.rb +2 -2
  3. data/test/test_bm25f.rb +23 -0
  4. metadata +20 -5
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bc3b5ddbf5d62479a1a0afafccef1d377db2a149ac9db87f8d207b0c16b41550
4
- data.tar.gz: 295be306f71a84ae399cffaf7c1faff3a768ad7e1c00095230144d564cc9af59
3
+ metadata.gz: bc84b7827c64bd77694f548ac479113906f4a5173b50fab2c479079beec3bc41
4
+ data.tar.gz: 8adf6005477365e0b827e93b67ae8b4bbc99c85db73e1b274005ce043659d690
5
5
  SHA512:
6
- metadata.gz: 1c06abf6c6d53a66e151378c610bf5ecfbe92ced01692bbe126d7bfda77bd85a2c2733934460052a6d8129e61881e067a1e35264f7acb197ff51f2336bb31e19
7
- data.tar.gz: 85ad4ba68f49009bbe667ccf993d498671ced59f9aeff4394435778c53369037f7dee346b7111c3cc6679fc72b9c8e62798139d3dce7a56e098795844e59b85e
6
+ metadata.gz: 17f1f1e79d4fa265610c85dbc52ae949de236252d96796beca5a08c80c4035947b56dc04607ca00aa5e9e2eaac41cb9f5a645f78415c66c1b91aaa5a77c25e60
7
+ data.tar.gz: fbc526d3080a2672b41da06b78c7df363138bbc62df91bc8c065133ec2e807432c19ea108fdc14fb3bb2852973bfefdb18d0a0f98a4d4a7742eac23a1d7afdbb
data/lib/bm25f.rb CHANGED
@@ -66,7 +66,7 @@ class BM25F
66
66
  # @param documents [Hash] The documents.
67
67
  # @return [Float] The average document length.
68
68
  def calculate_average_document_length(documents)
69
- total_length = documents.sum { |doc| doc.values.map(&:length).sum }
69
+ total_length = documents.sum { |doc| doc.values.map { |v| v.nil? ? 0 : v.length }.sum }
70
70
  total_length / documents.length.to_f
71
71
  end
72
72
 
@@ -77,7 +77,7 @@ class BM25F
77
77
  def calculate_document_lengths(documents)
78
78
  doc_lengths = {}
79
79
  documents.each_with_index do |doc, i|
80
- doc_lengths[i] = doc.transform_values(&:length)
80
+ doc_lengths[i] = doc.transform_values { |v| v.nil ? 0 : v.length }
81
81
  end
82
82
  doc_lengths
83
83
  end
@@ -0,0 +1,23 @@
1
+ require 'minitest/autorun'
2
+ require 'bm25f'
3
+
4
+ class BM25FTest < Minitest::Test
5
+ def setup
6
+ @bm25f = BM25F.new
7
+ @documents = [
8
+ { title: 'hello world', content: 'foo bar baz' },
9
+ { title: 'foo bar', content: 'goodbye, world!' }
10
+ ]
11
+ end
12
+
13
+ def test_score
14
+ @bm25f.fit @documents
15
+ scores = @bm25f.score 'hello world foo bar baz'
16
+
17
+ # Sort
18
+ scores = scores.to_a.sort_by { |_, v| v.to_i }
19
+
20
+ # Checks if the most matching element is the first element
21
+ assert scores.last[0].zero?
22
+ end
23
+ end
metadata CHANGED
@@ -1,29 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bm25f
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - catflip
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-09-09 00:00:00.000000000 Z
11
+ date: 2023-09-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: treat
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: '2.1'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '2.1'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 10.4.2
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 10.4.2
27
41
  description: A fast implementation of the BM25F ranking algorithm for information
28
42
  retrieval systems, written in Ruby.
29
43
  email:
@@ -32,6 +46,7 @@ extensions: []
32
46
  extra_rdoc_files: []
33
47
  files:
34
48
  - lib/bm25f.rb
49
+ - test/test_bm25f.rb
35
50
  homepage: https://github.com/catflip/bm25f-ruby
36
51
  licenses:
37
52
  - AGPL-3.0
@@ -53,7 +68,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
53
68
  - !ruby/object:Gem::Version
54
69
  version: '0'
55
70
  requirements: []
56
- rubygems_version: 3.3.26
71
+ rubygems_version: 3.4.19
57
72
  signing_key:
58
73
  specification_version: 4
59
74
  summary: BM25F ranking function in Ruby.