classifier 1.4.4 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CLAUDE.md +77 -0
- data/README.md +274 -0
- data/ext/classifier/classifier_ext.c +25 -0
- data/ext/classifier/extconf.rb +15 -0
- data/ext/classifier/linalg.h +64 -0
- data/ext/classifier/matrix.c +387 -0
- data/ext/classifier/svd.c +208 -0
- data/ext/classifier/vector.c +319 -0
- data/lib/classifier/bayes.rb +294 -60
- data/lib/classifier/errors.rb +16 -0
- data/lib/classifier/extensions/vector.rb +42 -26
- data/lib/classifier/extensions/word_hash.rb +8 -1
- data/lib/classifier/lsi/content_node.rb +30 -9
- data/lib/classifier/lsi/word_list.rb +12 -1
- data/lib/classifier/lsi.rb +479 -125
- data/lib/classifier/storage/base.rb +50 -0
- data/lib/classifier/storage/file.rb +51 -0
- data/lib/classifier/storage/memory.rb +49 -0
- data/lib/classifier/storage.rb +9 -0
- data/lib/classifier.rb +2 -0
- data/sig/vendor/fast_stemmer.rbs +9 -0
- data/sig/vendor/gsl.rbs +27 -0
- data/sig/vendor/json.rbs +4 -0
- data/sig/vendor/matrix.rbs +26 -0
- data/sig/vendor/mutex_m.rbs +16 -0
- data/test/test_helper.rb +13 -1
- metadata +71 -10
- data/lib/classifier/extensions/vector_serialize.rb +0 -18
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# rbs_inline: enabled
|
|
2
|
+
|
|
3
|
+
# Author:: Lucas Carlson (mailto:lucas@rufy.com)
|
|
4
|
+
# Copyright:: Copyright (c) 2005 Lucas Carlson
|
|
5
|
+
# License:: LGPL
|
|
6
|
+
|
|
7
|
+
module Classifier
|
|
8
|
+
module Storage
|
|
9
|
+
# Abstract base class for storage backends.
|
|
10
|
+
# Implement this protocol to create custom storage (Redis, PostgreSQL, etc.)
|
|
11
|
+
#
|
|
12
|
+
# Example:
|
|
13
|
+
# class RedisStorage < Classifier::Storage::Base
|
|
14
|
+
# def initialize(redis:, key:)
|
|
15
|
+
# @redis, @key = redis, key
|
|
16
|
+
# end
|
|
17
|
+
#
|
|
18
|
+
# def write(data) = @redis.set(@key, data)
|
|
19
|
+
# def read = @redis.get(@key)
|
|
20
|
+
# def delete = @redis.del(@key)
|
|
21
|
+
# def exists? = @redis.exists?(@key)
|
|
22
|
+
# end
|
|
23
|
+
#
|
|
24
|
+
class Base
|
|
25
|
+
# Save classifier data
|
|
26
|
+
# @rbs (String) -> void
|
|
27
|
+
def write(data)
|
|
28
|
+
raise NotImplementedError, "#{self.class}#write must be implemented"
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Load classifier data
|
|
32
|
+
# @rbs () -> String?
|
|
33
|
+
def read
|
|
34
|
+
raise NotImplementedError, "#{self.class}#read must be implemented"
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Delete classifier data
|
|
38
|
+
# @rbs () -> void
|
|
39
|
+
def delete
|
|
40
|
+
raise NotImplementedError, "#{self.class}#delete must be implemented"
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Check if data exists
|
|
44
|
+
# @rbs () -> bool
|
|
45
|
+
def exists?
|
|
46
|
+
raise NotImplementedError, "#{self.class}#exists? must be implemented"
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# rbs_inline: enabled
|
|
2
|
+
|
|
3
|
+
# Author:: Lucas Carlson (mailto:lucas@rufy.com)
|
|
4
|
+
# Copyright:: Copyright (c) 2005 Lucas Carlson
|
|
5
|
+
# License:: LGPL
|
|
6
|
+
|
|
7
|
+
require_relative 'base'
|
|
8
|
+
|
|
9
|
+
module Classifier
|
|
10
|
+
module Storage
|
|
11
|
+
# File-based storage backend.
|
|
12
|
+
#
|
|
13
|
+
# Example:
|
|
14
|
+
# bayes = Classifier::Bayes.new('Spam', 'Ham')
|
|
15
|
+
# bayes.storage = Classifier::Storage::File.new(path: "/var/models/spam.json")
|
|
16
|
+
# bayes.train_spam("Buy now!")
|
|
17
|
+
# bayes.save
|
|
18
|
+
#
|
|
19
|
+
class File < Base
|
|
20
|
+
# @rbs @path: String
|
|
21
|
+
|
|
22
|
+
attr_reader :path
|
|
23
|
+
|
|
24
|
+
# @rbs (path: String) -> void
|
|
25
|
+
def initialize(path:)
|
|
26
|
+
super()
|
|
27
|
+
@path = path
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# @rbs (String) -> Integer
|
|
31
|
+
def write(data)
|
|
32
|
+
::File.write(@path, data)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# @rbs () -> String?
|
|
36
|
+
def read
|
|
37
|
+
exists? ? ::File.read(@path) : nil
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# @rbs () -> void
|
|
41
|
+
def delete
|
|
42
|
+
::File.delete(@path) if exists?
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# @rbs () -> bool
|
|
46
|
+
def exists?
|
|
47
|
+
::File.exist?(@path)
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# rbs_inline: enabled
|
|
2
|
+
|
|
3
|
+
# Author:: Lucas Carlson (mailto:lucas@rufy.com)
|
|
4
|
+
# Copyright:: Copyright (c) 2005 Lucas Carlson
|
|
5
|
+
# License:: LGPL
|
|
6
|
+
|
|
7
|
+
require_relative 'base'
|
|
8
|
+
|
|
9
|
+
module Classifier
|
|
10
|
+
module Storage
|
|
11
|
+
# In-memory storage for testing and ephemeral use.
|
|
12
|
+
#
|
|
13
|
+
# Example:
|
|
14
|
+
# bayes = Classifier::Bayes.new('Spam', 'Ham')
|
|
15
|
+
# bayes.storage = Classifier::Storage::Memory.new
|
|
16
|
+
# bayes.train_spam("Buy now!")
|
|
17
|
+
# bayes.save
|
|
18
|
+
#
|
|
19
|
+
class Memory < Base
|
|
20
|
+
# @rbs @data: String?
|
|
21
|
+
|
|
22
|
+
# @rbs () -> void
|
|
23
|
+
def initialize
|
|
24
|
+
super
|
|
25
|
+
@data = nil
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# @rbs (String) -> String
|
|
29
|
+
def write(data)
|
|
30
|
+
@data = data
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# @rbs () -> String?
|
|
34
|
+
def read
|
|
35
|
+
@data
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# @rbs () -> void
|
|
39
|
+
def delete
|
|
40
|
+
@data = nil
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# @rbs () -> bool
|
|
44
|
+
def exists?
|
|
45
|
+
!@data.nil?
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
data/lib/classifier.rb
CHANGED
data/sig/vendor/gsl.rbs
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# Type stubs for optional GSL gem
|
|
2
|
+
module GSL
|
|
3
|
+
class Vector
|
|
4
|
+
def self.alloc: (untyped) -> Vector
|
|
5
|
+
def to_a: () -> Array[Float]
|
|
6
|
+
def normalize: () -> Vector
|
|
7
|
+
def sum: () -> Float
|
|
8
|
+
def each: () { (Float) -> void } -> void
|
|
9
|
+
def []: (Integer) -> Float
|
|
10
|
+
def []=: (Integer, Float) -> Float
|
|
11
|
+
def size: () -> Integer
|
|
12
|
+
def row: () -> Vector
|
|
13
|
+
def col: () -> Vector
|
|
14
|
+
def *: (untyped) -> untyped
|
|
15
|
+
def collect: () { (Float) -> Float } -> Vector
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
class Matrix
|
|
19
|
+
def self.alloc: (*untyped) -> Matrix
|
|
20
|
+
def self.diag: (untyped) -> Matrix
|
|
21
|
+
def trans: () -> Matrix
|
|
22
|
+
def *: (untyped) -> Matrix
|
|
23
|
+
def size: () -> [Integer, Integer]
|
|
24
|
+
def column: (Integer) -> Vector
|
|
25
|
+
def SV_decomp: () -> [Matrix, Matrix, Vector]
|
|
26
|
+
end
|
|
27
|
+
end
|
data/sig/vendor/json.rbs
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Type stubs for matrix gem
|
|
2
|
+
class Vector[T]
|
|
3
|
+
EPSILON: Float
|
|
4
|
+
|
|
5
|
+
def self.[]: [T] (*T) -> Vector[T]
|
|
6
|
+
def size: () -> Integer
|
|
7
|
+
def []: (Integer) -> T
|
|
8
|
+
def magnitude: () -> Float
|
|
9
|
+
def normalize: () -> Vector[T]
|
|
10
|
+
def each: () { (T) -> void } -> void
|
|
11
|
+
def collect: [U] () { (T) -> U } -> Vector[U]
|
|
12
|
+
def to_a: () -> Array[T]
|
|
13
|
+
def *: (untyped) -> untyped
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
class Matrix[T]
|
|
17
|
+
def self.rows: [T] (Array[Array[T]]) -> Matrix[T]
|
|
18
|
+
def self.[]: [T] (*Array[T]) -> Matrix[T]
|
|
19
|
+
def self.diag: (untyped) -> Matrix[untyped]
|
|
20
|
+
def trans: () -> Matrix[T]
|
|
21
|
+
def *: (untyped) -> untyped
|
|
22
|
+
def row_size: () -> Integer
|
|
23
|
+
def column_size: () -> Integer
|
|
24
|
+
def column: (Integer) -> Vector[T]
|
|
25
|
+
def SV_decomp: () -> [Matrix[T], Matrix[T], untyped]
|
|
26
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Type stubs for mutex_m gem
|
|
2
|
+
module Mutex_m
|
|
3
|
+
def mu_initialize: () -> void
|
|
4
|
+
def mu_lock: () -> void
|
|
5
|
+
def mu_unlock: () -> void
|
|
6
|
+
def mu_synchronize: [T] () { () -> T } -> T
|
|
7
|
+
def mu_try_lock: () -> bool
|
|
8
|
+
def mu_locked?: () -> bool
|
|
9
|
+
|
|
10
|
+
# Aliases
|
|
11
|
+
alias lock mu_lock
|
|
12
|
+
alias unlock mu_unlock
|
|
13
|
+
alias synchronize mu_synchronize
|
|
14
|
+
alias try_lock mu_try_lock
|
|
15
|
+
alias locked? mu_locked?
|
|
16
|
+
end
|
data/test/test_helper.rb
CHANGED
|
@@ -1,5 +1,17 @@
|
|
|
1
|
-
|
|
1
|
+
require 'simplecov'
|
|
2
|
+
SimpleCov.start do
|
|
3
|
+
add_filter '/test/'
|
|
4
|
+
add_filter '/vendor/'
|
|
5
|
+
add_group 'Bayes', 'lib/classifier/bayes.rb'
|
|
6
|
+
add_group 'LSI', 'lib/classifier/lsi'
|
|
7
|
+
add_group 'Extensions', 'lib/classifier/extensions'
|
|
8
|
+
enable_coverage :branch
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
$LOAD_PATH.unshift("#{File.dirname(__FILE__)}/../lib")
|
|
2
12
|
|
|
3
13
|
require 'minitest'
|
|
4
14
|
require 'minitest/autorun'
|
|
15
|
+
require 'tmpdir'
|
|
16
|
+
require 'json'
|
|
5
17
|
require 'classifier'
|
metadata
CHANGED
|
@@ -1,14 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: classifier
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 2.1.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Lucas Carlson
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: bin
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
11
|
dependencies:
|
|
13
12
|
- !ruby/object:Gem::Dependency
|
|
14
13
|
name: fast-stemmer
|
|
@@ -52,6 +51,20 @@ dependencies:
|
|
|
52
51
|
- - ">="
|
|
53
52
|
- !ruby/object:Gem::Version
|
|
54
53
|
version: '0'
|
|
54
|
+
- !ruby/object:Gem::Dependency
|
|
55
|
+
name: matrix
|
|
56
|
+
requirement: !ruby/object:Gem::Requirement
|
|
57
|
+
requirements:
|
|
58
|
+
- - ">="
|
|
59
|
+
- !ruby/object:Gem::Version
|
|
60
|
+
version: '0'
|
|
61
|
+
type: :runtime
|
|
62
|
+
prerelease: false
|
|
63
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
64
|
+
requirements:
|
|
65
|
+
- - ">="
|
|
66
|
+
- !ruby/object:Gem::Version
|
|
67
|
+
version: '0'
|
|
55
68
|
- !ruby/object:Gem::Dependency
|
|
56
69
|
name: minitest
|
|
57
70
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -66,6 +79,20 @@ dependencies:
|
|
|
66
79
|
- - ">="
|
|
67
80
|
- !ruby/object:Gem::Version
|
|
68
81
|
version: '0'
|
|
82
|
+
- !ruby/object:Gem::Dependency
|
|
83
|
+
name: rbs-inline
|
|
84
|
+
requirement: !ruby/object:Gem::Requirement
|
|
85
|
+
requirements:
|
|
86
|
+
- - ">="
|
|
87
|
+
- !ruby/object:Gem::Version
|
|
88
|
+
version: '0'
|
|
89
|
+
type: :development
|
|
90
|
+
prerelease: false
|
|
91
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
92
|
+
requirements:
|
|
93
|
+
- - ">="
|
|
94
|
+
- !ruby/object:Gem::Version
|
|
95
|
+
version: '0'
|
|
69
96
|
- !ruby/object:Gem::Dependency
|
|
70
97
|
name: rdoc
|
|
71
98
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -80,31 +107,66 @@ dependencies:
|
|
|
80
107
|
- - ">="
|
|
81
108
|
- !ruby/object:Gem::Version
|
|
82
109
|
version: '0'
|
|
110
|
+
- !ruby/object:Gem::Dependency
|
|
111
|
+
name: rake-compiler
|
|
112
|
+
requirement: !ruby/object:Gem::Requirement
|
|
113
|
+
requirements:
|
|
114
|
+
- - ">="
|
|
115
|
+
- !ruby/object:Gem::Version
|
|
116
|
+
version: '0'
|
|
117
|
+
type: :development
|
|
118
|
+
prerelease: false
|
|
119
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
120
|
+
requirements:
|
|
121
|
+
- - ">="
|
|
122
|
+
- !ruby/object:Gem::Version
|
|
123
|
+
version: '0'
|
|
83
124
|
description: A general classifier module to allow Bayesian and other types of classifications.
|
|
84
125
|
email: lucas@rufy.com
|
|
85
126
|
executables: []
|
|
86
|
-
extensions:
|
|
127
|
+
extensions:
|
|
128
|
+
- ext/classifier/extconf.rb
|
|
87
129
|
extra_rdoc_files: []
|
|
88
130
|
files:
|
|
131
|
+
- CLAUDE.md
|
|
89
132
|
- LICENSE
|
|
133
|
+
- README.md
|
|
90
134
|
- bin/bayes.rb
|
|
91
135
|
- bin/summarize.rb
|
|
136
|
+
- ext/classifier/classifier_ext.c
|
|
137
|
+
- ext/classifier/extconf.rb
|
|
138
|
+
- ext/classifier/linalg.h
|
|
139
|
+
- ext/classifier/matrix.c
|
|
140
|
+
- ext/classifier/svd.c
|
|
141
|
+
- ext/classifier/vector.c
|
|
92
142
|
- lib/classifier.rb
|
|
93
143
|
- lib/classifier/bayes.rb
|
|
144
|
+
- lib/classifier/errors.rb
|
|
94
145
|
- lib/classifier/extensions/string.rb
|
|
95
146
|
- lib/classifier/extensions/vector.rb
|
|
96
|
-
- lib/classifier/extensions/vector_serialize.rb
|
|
97
147
|
- lib/classifier/extensions/word_hash.rb
|
|
98
148
|
- lib/classifier/lsi.rb
|
|
99
149
|
- lib/classifier/lsi/content_node.rb
|
|
100
150
|
- lib/classifier/lsi/summary.rb
|
|
101
151
|
- lib/classifier/lsi/word_list.rb
|
|
152
|
+
- lib/classifier/storage.rb
|
|
153
|
+
- lib/classifier/storage/base.rb
|
|
154
|
+
- lib/classifier/storage/file.rb
|
|
155
|
+
- lib/classifier/storage/memory.rb
|
|
156
|
+
- sig/vendor/fast_stemmer.rbs
|
|
157
|
+
- sig/vendor/gsl.rbs
|
|
158
|
+
- sig/vendor/json.rbs
|
|
159
|
+
- sig/vendor/matrix.rbs
|
|
160
|
+
- sig/vendor/mutex_m.rbs
|
|
102
161
|
- test/test_helper.rb
|
|
103
|
-
homepage: https://
|
|
162
|
+
homepage: https://rubyclassifier.com
|
|
104
163
|
licenses:
|
|
105
164
|
- LGPL
|
|
106
|
-
metadata:
|
|
107
|
-
|
|
165
|
+
metadata:
|
|
166
|
+
documentation_uri: https://rubyclassifier.com/docs
|
|
167
|
+
source_code_uri: https://github.com/cardmagic/classifier
|
|
168
|
+
bug_tracker_uri: https://github.com/cardmagic/classifier/issues
|
|
169
|
+
changelog_uri: https://github.com/cardmagic/classifier/releases
|
|
108
170
|
rdoc_options: []
|
|
109
171
|
require_paths:
|
|
110
172
|
- lib
|
|
@@ -119,8 +181,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
119
181
|
- !ruby/object:Gem::Version
|
|
120
182
|
version: '0'
|
|
121
183
|
requirements: []
|
|
122
|
-
rubygems_version:
|
|
123
|
-
signing_key:
|
|
184
|
+
rubygems_version: 4.0.3
|
|
124
185
|
specification_version: 4
|
|
125
186
|
summary: A general classifier module to allow Bayesian and other types of classifications.
|
|
126
187
|
test_files: []
|