classifier 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CLAUDE.md +23 -13
- data/README.md +82 -67
- data/ext/classifier/classifier_ext.c +25 -0
- data/ext/classifier/extconf.rb +15 -0
- data/ext/classifier/linalg.h +64 -0
- data/ext/classifier/matrix.c +387 -0
- data/ext/classifier/svd.c +208 -0
- data/ext/classifier/vector.c +319 -0
- data/lib/classifier/bayes.rb +253 -33
- data/lib/classifier/errors.rb +16 -0
- data/lib/classifier/extensions/vector.rb +12 -4
- data/lib/classifier/lsi/content_node.rb +5 -5
- data/lib/classifier/lsi.rb +439 -141
- data/lib/classifier/storage/base.rb +50 -0
- data/lib/classifier/storage/file.rb +51 -0
- data/lib/classifier/storage/memory.rb +49 -0
- data/lib/classifier/storage.rb +9 -0
- data/lib/classifier.rb +2 -0
- data/sig/vendor/json.rbs +4 -0
- data/sig/vendor/mutex_m.rbs +16 -0
- data/test/test_helper.rb +2 -0
- metadata +36 -5
- data/lib/classifier/extensions/vector_serialize.rb +0 -18
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
# rbs_inline: enabled
|
|
2
|
+
|
|
3
|
+
# Author:: Lucas Carlson (mailto:lucas@rufy.com)
|
|
4
|
+
# Copyright:: Copyright (c) 2005 Lucas Carlson
|
|
5
|
+
# License:: LGPL
|
|
6
|
+
|
|
7
|
+
module Classifier
|
|
8
|
+
module Storage
|
|
9
|
+
# Abstract base class for storage backends.
|
|
10
|
+
# Implement this protocol to create custom storage (Redis, PostgreSQL, etc.)
|
|
11
|
+
#
|
|
12
|
+
# Example:
|
|
13
|
+
# class RedisStorage < Classifier::Storage::Base
|
|
14
|
+
# def initialize(redis:, key:)
|
|
15
|
+
# @redis, @key = redis, key
|
|
16
|
+
# end
|
|
17
|
+
#
|
|
18
|
+
# def write(data) = @redis.set(@key, data)
|
|
19
|
+
# def read = @redis.get(@key)
|
|
20
|
+
# def delete = @redis.del(@key)
|
|
21
|
+
# def exists? = @redis.exists?(@key)
|
|
22
|
+
# end
|
|
23
|
+
#
|
|
24
|
+
class Base
|
|
25
|
+
# Save classifier data
|
|
26
|
+
# @rbs (String) -> void
|
|
27
|
+
def write(data)
|
|
28
|
+
raise NotImplementedError, "#{self.class}#write must be implemented"
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Load classifier data
|
|
32
|
+
# @rbs () -> String?
|
|
33
|
+
def read
|
|
34
|
+
raise NotImplementedError, "#{self.class}#read must be implemented"
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Delete classifier data
|
|
38
|
+
# @rbs () -> void
|
|
39
|
+
def delete
|
|
40
|
+
raise NotImplementedError, "#{self.class}#delete must be implemented"
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Check if data exists
|
|
44
|
+
# @rbs () -> bool
|
|
45
|
+
def exists?
|
|
46
|
+
raise NotImplementedError, "#{self.class}#exists? must be implemented"
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
# rbs_inline: enabled
|
|
2
|
+
|
|
3
|
+
# Author:: Lucas Carlson (mailto:lucas@rufy.com)
|
|
4
|
+
# Copyright:: Copyright (c) 2005 Lucas Carlson
|
|
5
|
+
# License:: LGPL
|
|
6
|
+
|
|
7
|
+
require_relative 'base'
|
|
8
|
+
|
|
9
|
+
module Classifier
|
|
10
|
+
module Storage
|
|
11
|
+
# File-based storage backend.
|
|
12
|
+
#
|
|
13
|
+
# Example:
|
|
14
|
+
# bayes = Classifier::Bayes.new('Spam', 'Ham')
|
|
15
|
+
# bayes.storage = Classifier::Storage::File.new(path: "/var/models/spam.json")
|
|
16
|
+
# bayes.train_spam("Buy now!")
|
|
17
|
+
# bayes.save
|
|
18
|
+
#
|
|
19
|
+
class File < Base
|
|
20
|
+
# @rbs @path: String
|
|
21
|
+
|
|
22
|
+
attr_reader :path
|
|
23
|
+
|
|
24
|
+
# @rbs (path: String) -> void
|
|
25
|
+
def initialize(path:)
|
|
26
|
+
super()
|
|
27
|
+
@path = path
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# @rbs (String) -> Integer
|
|
31
|
+
def write(data)
|
|
32
|
+
::File.write(@path, data)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# @rbs () -> String?
|
|
36
|
+
def read
|
|
37
|
+
exists? ? ::File.read(@path) : nil
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# @rbs () -> void
|
|
41
|
+
def delete
|
|
42
|
+
::File.delete(@path) if exists?
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# @rbs () -> bool
|
|
46
|
+
def exists?
|
|
47
|
+
::File.exist?(@path)
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
end
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# rbs_inline: enabled
|
|
2
|
+
|
|
3
|
+
# Author:: Lucas Carlson (mailto:lucas@rufy.com)
|
|
4
|
+
# Copyright:: Copyright (c) 2005 Lucas Carlson
|
|
5
|
+
# License:: LGPL
|
|
6
|
+
|
|
7
|
+
require_relative 'base'
|
|
8
|
+
|
|
9
|
+
module Classifier
|
|
10
|
+
module Storage
|
|
11
|
+
# In-memory storage for testing and ephemeral use.
|
|
12
|
+
#
|
|
13
|
+
# Example:
|
|
14
|
+
# bayes = Classifier::Bayes.new('Spam', 'Ham')
|
|
15
|
+
# bayes.storage = Classifier::Storage::Memory.new
|
|
16
|
+
# bayes.train_spam("Buy now!")
|
|
17
|
+
# bayes.save
|
|
18
|
+
#
|
|
19
|
+
class Memory < Base
|
|
20
|
+
# @rbs @data: String?
|
|
21
|
+
|
|
22
|
+
# @rbs () -> void
|
|
23
|
+
def initialize
|
|
24
|
+
super
|
|
25
|
+
@data = nil
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# @rbs (String) -> String
|
|
29
|
+
def write(data)
|
|
30
|
+
@data = data
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# @rbs () -> String?
|
|
34
|
+
def read
|
|
35
|
+
@data
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# @rbs () -> void
|
|
39
|
+
def delete
|
|
40
|
+
@data = nil
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# @rbs () -> bool
|
|
44
|
+
def exists?
|
|
45
|
+
!@data.nil?
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
end
|
data/lib/classifier.rb
CHANGED
data/sig/vendor/json.rbs
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# Type stubs for mutex_m gem
|
|
2
|
+
module Mutex_m
|
|
3
|
+
def mu_initialize: () -> void
|
|
4
|
+
def mu_lock: () -> void
|
|
5
|
+
def mu_unlock: () -> void
|
|
6
|
+
def mu_synchronize: [T] () { () -> T } -> T
|
|
7
|
+
def mu_try_lock: () -> bool
|
|
8
|
+
def mu_locked?: () -> bool
|
|
9
|
+
|
|
10
|
+
# Aliases
|
|
11
|
+
alias lock mu_lock
|
|
12
|
+
alias unlock mu_unlock
|
|
13
|
+
alias synchronize mu_synchronize
|
|
14
|
+
alias try_lock mu_try_lock
|
|
15
|
+
alias locked? mu_locked?
|
|
16
|
+
end
|
data/test/test_helper.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: classifier
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 2.
|
|
4
|
+
version: 2.1.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Lucas Carlson
|
|
@@ -107,10 +107,25 @@ dependencies:
|
|
|
107
107
|
- - ">="
|
|
108
108
|
- !ruby/object:Gem::Version
|
|
109
109
|
version: '0'
|
|
110
|
+
- !ruby/object:Gem::Dependency
|
|
111
|
+
name: rake-compiler
|
|
112
|
+
requirement: !ruby/object:Gem::Requirement
|
|
113
|
+
requirements:
|
|
114
|
+
- - ">="
|
|
115
|
+
- !ruby/object:Gem::Version
|
|
116
|
+
version: '0'
|
|
117
|
+
type: :development
|
|
118
|
+
prerelease: false
|
|
119
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
120
|
+
requirements:
|
|
121
|
+
- - ">="
|
|
122
|
+
- !ruby/object:Gem::Version
|
|
123
|
+
version: '0'
|
|
110
124
|
description: A general classifier module to allow Bayesian and other types of classifications.
|
|
111
125
|
email: lucas@rufy.com
|
|
112
126
|
executables: []
|
|
113
|
-
extensions:
|
|
127
|
+
extensions:
|
|
128
|
+
- ext/classifier/extconf.rb
|
|
114
129
|
extra_rdoc_files: []
|
|
115
130
|
files:
|
|
116
131
|
- CLAUDE.md
|
|
@@ -118,24 +133,40 @@ files:
|
|
|
118
133
|
- README.md
|
|
119
134
|
- bin/bayes.rb
|
|
120
135
|
- bin/summarize.rb
|
|
136
|
+
- ext/classifier/classifier_ext.c
|
|
137
|
+
- ext/classifier/extconf.rb
|
|
138
|
+
- ext/classifier/linalg.h
|
|
139
|
+
- ext/classifier/matrix.c
|
|
140
|
+
- ext/classifier/svd.c
|
|
141
|
+
- ext/classifier/vector.c
|
|
121
142
|
- lib/classifier.rb
|
|
122
143
|
- lib/classifier/bayes.rb
|
|
144
|
+
- lib/classifier/errors.rb
|
|
123
145
|
- lib/classifier/extensions/string.rb
|
|
124
146
|
- lib/classifier/extensions/vector.rb
|
|
125
|
-
- lib/classifier/extensions/vector_serialize.rb
|
|
126
147
|
- lib/classifier/extensions/word_hash.rb
|
|
127
148
|
- lib/classifier/lsi.rb
|
|
128
149
|
- lib/classifier/lsi/content_node.rb
|
|
129
150
|
- lib/classifier/lsi/summary.rb
|
|
130
151
|
- lib/classifier/lsi/word_list.rb
|
|
152
|
+
- lib/classifier/storage.rb
|
|
153
|
+
- lib/classifier/storage/base.rb
|
|
154
|
+
- lib/classifier/storage/file.rb
|
|
155
|
+
- lib/classifier/storage/memory.rb
|
|
131
156
|
- sig/vendor/fast_stemmer.rbs
|
|
132
157
|
- sig/vendor/gsl.rbs
|
|
158
|
+
- sig/vendor/json.rbs
|
|
133
159
|
- sig/vendor/matrix.rbs
|
|
160
|
+
- sig/vendor/mutex_m.rbs
|
|
134
161
|
- test/test_helper.rb
|
|
135
|
-
homepage: https://
|
|
162
|
+
homepage: https://rubyclassifier.com
|
|
136
163
|
licenses:
|
|
137
164
|
- LGPL
|
|
138
|
-
metadata:
|
|
165
|
+
metadata:
|
|
166
|
+
documentation_uri: https://rubyclassifier.com/docs
|
|
167
|
+
source_code_uri: https://github.com/cardmagic/classifier
|
|
168
|
+
bug_tracker_uri: https://github.com/cardmagic/classifier/issues
|
|
169
|
+
changelog_uri: https://github.com/cardmagic/classifier/releases
|
|
139
170
|
rdoc_options: []
|
|
140
171
|
require_paths:
|
|
141
172
|
- lib
|