bm25f 0.1.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/lib/bm25f.rb +2 -2
  3. data/test/test_bm25f.rb +23 -0
  4. metadata +20 -5
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bc3b5ddbf5d62479a1a0afafccef1d377db2a149ac9db87f8d207b0c16b41550
4
- data.tar.gz: 295be306f71a84ae399cffaf7c1faff3a768ad7e1c00095230144d564cc9af59
3
+ metadata.gz: bc84b7827c64bd77694f548ac479113906f4a5173b50fab2c479079beec3bc41
4
+ data.tar.gz: 8adf6005477365e0b827e93b67ae8b4bbc99c85db73e1b274005ce043659d690
5
5
  SHA512:
6
- metadata.gz: 1c06abf6c6d53a66e151378c610bf5ecfbe92ced01692bbe126d7bfda77bd85a2c2733934460052a6d8129e61881e067a1e35264f7acb197ff51f2336bb31e19
7
- data.tar.gz: 85ad4ba68f49009bbe667ccf993d498671ced59f9aeff4394435778c53369037f7dee346b7111c3cc6679fc72b9c8e62798139d3dce7a56e098795844e59b85e
6
+ metadata.gz: 17f1f1e79d4fa265610c85dbc52ae949de236252d96796beca5a08c80c4035947b56dc04607ca00aa5e9e2eaac41cb9f5a645f78415c66c1b91aaa5a77c25e60
7
+ data.tar.gz: fbc526d3080a2672b41da06b78c7df363138bbc62df91bc8c065133ec2e807432c19ea108fdc14fb3bb2852973bfefdb18d0a0f98a4d4a7742eac23a1d7afdbb
data/lib/bm25f.rb CHANGED
@@ -66,7 +66,7 @@ class BM25F
66
66
  # @param documents [Hash] The documents.
67
67
  # @return [Float] The average document length.
68
68
  def calculate_average_document_length(documents)
69
- total_length = documents.sum { |doc| doc.values.map(&:length).sum }
69
+ total_length = documents.sum { |doc| doc.values.map { |v| v.nil? ? 0 : v.length }.sum }
70
70
  total_length / documents.length.to_f
71
71
  end
72
72
 
@@ -77,7 +77,7 @@ class BM25F
77
77
  def calculate_document_lengths(documents)
78
78
  doc_lengths = {}
79
79
  documents.each_with_index do |doc, i|
80
- doc_lengths[i] = doc.transform_values(&:length)
80
+ doc_lengths[i] = doc.transform_values { |v| v.nil ? 0 : v.length }
81
81
  end
82
82
  doc_lengths
83
83
  end
@@ -0,0 +1,23 @@
1
+ require 'minitest/autorun'
2
+ require 'bm25f'
3
+
4
+ class BM25FTest < Minitest::Test
5
+ def setup
6
+ @bm25f = BM25F.new
7
+ @documents = [
8
+ { title: 'hello world', content: 'foo bar baz' },
9
+ { title: 'foo bar', content: 'goodbye, world!' }
10
+ ]
11
+ end
12
+
13
+ def test_score
14
+ @bm25f.fit @documents
15
+ scores = @bm25f.score 'hello world foo bar baz'
16
+
17
+ # Sort
18
+ scores = scores.to_a.sort_by { |_, v| v.to_i }
19
+
20
+ # Checks if the most matching element is the first element
21
+ assert scores.last[0].zero?
22
+ end
23
+ end
metadata CHANGED
@@ -1,29 +1,43 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bm25f
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - catflip
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-09-09 00:00:00.000000000 Z
11
+ date: 2023-09-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: treat
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: '2.1'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '2.1'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 10.4.2
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 10.4.2
27
41
  description: A fast implementation of the BM25F ranking algorithm for information
28
42
  retrieval systems, written in Ruby.
29
43
  email:
@@ -32,6 +46,7 @@ extensions: []
32
46
  extra_rdoc_files: []
33
47
  files:
34
48
  - lib/bm25f.rb
49
+ - test/test_bm25f.rb
35
50
  homepage: https://github.com/catflip/bm25f-ruby
36
51
  licenses:
37
52
  - AGPL-3.0
@@ -53,7 +68,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
53
68
  - !ruby/object:Gem::Version
54
69
  version: '0'
55
70
  requirements: []
56
- rubygems_version: 3.3.26
71
+ rubygems_version: 3.4.19
57
72
  signing_key:
58
73
  specification_version: 4
59
74
  summary: BM25F ranking function in Ruby.