classifier 1.4.4 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,50 @@
1
+ # rbs_inline: enabled
2
+
3
+ # Author:: Lucas Carlson (mailto:lucas@rufy.com)
4
+ # Copyright:: Copyright (c) 2005 Lucas Carlson
5
+ # License:: LGPL
6
+
7
+ module Classifier
8
+ module Storage
9
+ # Abstract base class for storage backends.
10
+ # Implement this protocol to create custom storage (Redis, PostgreSQL, etc.)
11
+ #
12
+ # Example:
13
+ # class RedisStorage < Classifier::Storage::Base
14
+ # def initialize(redis:, key:)
15
+ # @redis, @key = redis, key
16
+ # end
17
+ #
18
+ # def write(data) = @redis.set(@key, data)
19
+ # def read = @redis.get(@key)
20
+ # def delete = @redis.del(@key)
21
+ # def exists? = @redis.exists?(@key)
22
+ # end
23
+ #
24
+ class Base
25
+ # Save classifier data
26
+ # @rbs (String) -> void
27
+ def write(data)
28
+ raise NotImplementedError, "#{self.class}#write must be implemented"
29
+ end
30
+
31
+ # Load classifier data
32
+ # @rbs () -> String?
33
+ def read
34
+ raise NotImplementedError, "#{self.class}#read must be implemented"
35
+ end
36
+
37
+ # Delete classifier data
38
+ # @rbs () -> void
39
+ def delete
40
+ raise NotImplementedError, "#{self.class}#delete must be implemented"
41
+ end
42
+
43
+ # Check if data exists
44
+ # @rbs () -> bool
45
+ def exists?
46
+ raise NotImplementedError, "#{self.class}#exists? must be implemented"
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,51 @@
1
+ # rbs_inline: enabled
2
+
3
+ # Author:: Lucas Carlson (mailto:lucas@rufy.com)
4
+ # Copyright:: Copyright (c) 2005 Lucas Carlson
5
+ # License:: LGPL
6
+
7
+ require_relative 'base'
8
+
9
+ module Classifier
10
+ module Storage
11
+ # File-based storage backend.
12
+ #
13
+ # Example:
14
+ # bayes = Classifier::Bayes.new('Spam', 'Ham')
15
+ # bayes.storage = Classifier::Storage::File.new(path: "/var/models/spam.json")
16
+ # bayes.train_spam("Buy now!")
17
+ # bayes.save
18
+ #
19
+ class File < Base
20
+ # @rbs @path: String
21
+
22
+ attr_reader :path
23
+
24
+ # @rbs (path: String) -> void
25
+ def initialize(path:)
26
+ super()
27
+ @path = path
28
+ end
29
+
30
+ # @rbs (String) -> Integer
31
+ def write(data)
32
+ ::File.write(@path, data)
33
+ end
34
+
35
+ # @rbs () -> String?
36
+ def read
37
+ exists? ? ::File.read(@path) : nil
38
+ end
39
+
40
+ # @rbs () -> void
41
+ def delete
42
+ ::File.delete(@path) if exists?
43
+ end
44
+
45
+ # @rbs () -> bool
46
+ def exists?
47
+ ::File.exist?(@path)
48
+ end
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,49 @@
1
+ # rbs_inline: enabled
2
+
3
+ # Author:: Lucas Carlson (mailto:lucas@rufy.com)
4
+ # Copyright:: Copyright (c) 2005 Lucas Carlson
5
+ # License:: LGPL
6
+
7
+ require_relative 'base'
8
+
9
+ module Classifier
10
+ module Storage
11
+ # In-memory storage for testing and ephemeral use.
12
+ #
13
+ # Example:
14
+ # bayes = Classifier::Bayes.new('Spam', 'Ham')
15
+ # bayes.storage = Classifier::Storage::Memory.new
16
+ # bayes.train_spam("Buy now!")
17
+ # bayes.save
18
+ #
19
+ class Memory < Base
20
+ # @rbs @data: String?
21
+
22
+ # @rbs () -> void
23
+ def initialize
24
+ super
25
+ @data = nil
26
+ end
27
+
28
+ # @rbs (String) -> String
29
+ def write(data)
30
+ @data = data
31
+ end
32
+
33
+ # @rbs () -> String?
34
+ def read
35
+ @data
36
+ end
37
+
38
+ # @rbs () -> void
39
+ def delete
40
+ @data = nil
41
+ end
42
+
43
+ # @rbs () -> bool
44
+ def exists?
45
+ !@data.nil?
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,9 @@
1
+ # rbs_inline: enabled
2
+
3
+ # Author:: Lucas Carlson (mailto:lucas@rufy.com)
4
+ # Copyright:: Copyright (c) 2005 Lucas Carlson
5
+ # License:: LGPL
6
+
7
+ require_relative 'storage/base'
8
+ require_relative 'storage/memory'
9
+ require_relative 'storage/file'
data/lib/classifier.rb CHANGED
@@ -25,6 +25,8 @@
25
25
  # License:: LGPL
26
26
 
27
27
  require 'rubygems'
28
+ require 'classifier/errors'
29
+ require 'classifier/storage'
28
30
  require 'classifier/extensions/string'
29
31
  require 'classifier/extensions/vector'
30
32
  require 'classifier/bayes'
@@ -0,0 +1,9 @@
1
+ # Type stubs for fast-stemmer gem and classifier extensions
2
+ class String
3
+ def stem: () -> String
4
+ def prepare_category_name: () -> Symbol
5
+ end
6
+
7
+ class Symbol
8
+ def prepare_category_name: () -> Symbol
9
+ end
@@ -0,0 +1,27 @@
1
+ # Type stubs for optional GSL gem
2
+ module GSL
3
+ class Vector
4
+ def self.alloc: (untyped) -> Vector
5
+ def to_a: () -> Array[Float]
6
+ def normalize: () -> Vector
7
+ def sum: () -> Float
8
+ def each: () { (Float) -> void } -> void
9
+ def []: (Integer) -> Float
10
+ def []=: (Integer, Float) -> Float
11
+ def size: () -> Integer
12
+ def row: () -> Vector
13
+ def col: () -> Vector
14
+ def *: (untyped) -> untyped
15
+ def collect: () { (Float) -> Float } -> Vector
16
+ end
17
+
18
+ class Matrix
19
+ def self.alloc: (*untyped) -> Matrix
20
+ def self.diag: (untyped) -> Matrix
21
+ def trans: () -> Matrix
22
+ def *: (untyped) -> Matrix
23
+ def size: () -> [Integer, Integer]
24
+ def column: (Integer) -> Vector
25
+ def SV_decomp: () -> [Matrix, Matrix, Vector]
26
+ end
27
+ end
@@ -0,0 +1,4 @@
1
+ module JSON
2
+ def self.parse: (String source, ?symbolize_names: bool) -> untyped
3
+ def self.generate: (untyped obj) -> String
4
+ end
@@ -0,0 +1,26 @@
1
+ # Type stubs for matrix gem
2
+ class Vector[T]
3
+ EPSILON: Float
4
+
5
+ def self.[]: [T] (*T) -> Vector[T]
6
+ def size: () -> Integer
7
+ def []: (Integer) -> T
8
+ def magnitude: () -> Float
9
+ def normalize: () -> Vector[T]
10
+ def each: () { (T) -> void } -> void
11
+ def collect: [U] () { (T) -> U } -> Vector[U]
12
+ def to_a: () -> Array[T]
13
+ def *: (untyped) -> untyped
14
+ end
15
+
16
+ class Matrix[T]
17
+ def self.rows: [T] (Array[Array[T]]) -> Matrix[T]
18
+ def self.[]: [T] (*Array[T]) -> Matrix[T]
19
+ def self.diag: (untyped) -> Matrix[untyped]
20
+ def trans: () -> Matrix[T]
21
+ def *: (untyped) -> untyped
22
+ def row_size: () -> Integer
23
+ def column_size: () -> Integer
24
+ def column: (Integer) -> Vector[T]
25
+ def SV_decomp: () -> [Matrix[T], Matrix[T], untyped]
26
+ end
@@ -0,0 +1,16 @@
1
+ # Type stubs for mutex_m gem
2
+ module Mutex_m
3
+ def mu_initialize: () -> void
4
+ def mu_lock: () -> void
5
+ def mu_unlock: () -> void
6
+ def mu_synchronize: [T] () { () -> T } -> T
7
+ def mu_try_lock: () -> bool
8
+ def mu_locked?: () -> bool
9
+
10
+ # Aliases
11
+ alias lock mu_lock
12
+ alias unlock mu_unlock
13
+ alias synchronize mu_synchronize
14
+ alias try_lock mu_try_lock
15
+ alias locked? mu_locked?
16
+ end
data/test/test_helper.rb CHANGED
@@ -1,5 +1,17 @@
1
- $:.unshift(File.dirname(__FILE__) + '/../lib')
1
+ require 'simplecov'
2
+ SimpleCov.start do
3
+ add_filter '/test/'
4
+ add_filter '/vendor/'
5
+ add_group 'Bayes', 'lib/classifier/bayes.rb'
6
+ add_group 'LSI', 'lib/classifier/lsi'
7
+ add_group 'Extensions', 'lib/classifier/extensions'
8
+ enable_coverage :branch
9
+ end
10
+
11
+ $LOAD_PATH.unshift("#{File.dirname(__FILE__)}/../lib")
2
12
 
3
13
  require 'minitest'
4
14
  require 'minitest/autorun'
15
+ require 'tmpdir'
16
+ require 'json'
5
17
  require 'classifier'
metadata CHANGED
@@ -1,14 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: classifier
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.4
4
+ version: 2.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Lucas Carlson
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2024-07-31 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
12
11
  dependencies:
13
12
  - !ruby/object:Gem::Dependency
14
13
  name: fast-stemmer
@@ -52,6 +51,20 @@ dependencies:
52
51
  - - ">="
53
52
  - !ruby/object:Gem::Version
54
53
  version: '0'
54
+ - !ruby/object:Gem::Dependency
55
+ name: matrix
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '0'
61
+ type: :runtime
62
+ prerelease: false
63
+ version_requirements: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: '0'
55
68
  - !ruby/object:Gem::Dependency
56
69
  name: minitest
57
70
  requirement: !ruby/object:Gem::Requirement
@@ -66,6 +79,20 @@ dependencies:
66
79
  - - ">="
67
80
  - !ruby/object:Gem::Version
68
81
  version: '0'
82
+ - !ruby/object:Gem::Dependency
83
+ name: rbs-inline
84
+ requirement: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ">="
87
+ - !ruby/object:Gem::Version
88
+ version: '0'
89
+ type: :development
90
+ prerelease: false
91
+ version_requirements: !ruby/object:Gem::Requirement
92
+ requirements:
93
+ - - ">="
94
+ - !ruby/object:Gem::Version
95
+ version: '0'
69
96
  - !ruby/object:Gem::Dependency
70
97
  name: rdoc
71
98
  requirement: !ruby/object:Gem::Requirement
@@ -80,31 +107,66 @@ dependencies:
80
107
  - - ">="
81
108
  - !ruby/object:Gem::Version
82
109
  version: '0'
110
+ - !ruby/object:Gem::Dependency
111
+ name: rake-compiler
112
+ requirement: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - ">="
115
+ - !ruby/object:Gem::Version
116
+ version: '0'
117
+ type: :development
118
+ prerelease: false
119
+ version_requirements: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - ">="
122
+ - !ruby/object:Gem::Version
123
+ version: '0'
83
124
  description: A general classifier module to allow Bayesian and other types of classifications.
84
125
  email: lucas@rufy.com
85
126
  executables: []
86
- extensions: []
127
+ extensions:
128
+ - ext/classifier/extconf.rb
87
129
  extra_rdoc_files: []
88
130
  files:
131
+ - CLAUDE.md
89
132
  - LICENSE
133
+ - README.md
90
134
  - bin/bayes.rb
91
135
  - bin/summarize.rb
136
+ - ext/classifier/classifier_ext.c
137
+ - ext/classifier/extconf.rb
138
+ - ext/classifier/linalg.h
139
+ - ext/classifier/matrix.c
140
+ - ext/classifier/svd.c
141
+ - ext/classifier/vector.c
92
142
  - lib/classifier.rb
93
143
  - lib/classifier/bayes.rb
144
+ - lib/classifier/errors.rb
94
145
  - lib/classifier/extensions/string.rb
95
146
  - lib/classifier/extensions/vector.rb
96
- - lib/classifier/extensions/vector_serialize.rb
97
147
  - lib/classifier/extensions/word_hash.rb
98
148
  - lib/classifier/lsi.rb
99
149
  - lib/classifier/lsi/content_node.rb
100
150
  - lib/classifier/lsi/summary.rb
101
151
  - lib/classifier/lsi/word_list.rb
152
+ - lib/classifier/storage.rb
153
+ - lib/classifier/storage/base.rb
154
+ - lib/classifier/storage/file.rb
155
+ - lib/classifier/storage/memory.rb
156
+ - sig/vendor/fast_stemmer.rbs
157
+ - sig/vendor/gsl.rbs
158
+ - sig/vendor/json.rbs
159
+ - sig/vendor/matrix.rbs
160
+ - sig/vendor/mutex_m.rbs
102
161
  - test/test_helper.rb
103
- homepage: https://github.com/cardmagic/classifier
162
+ homepage: https://rubyclassifier.com
104
163
  licenses:
105
164
  - LGPL
106
- metadata: {}
107
- post_install_message:
165
+ metadata:
166
+ documentation_uri: https://rubyclassifier.com/docs
167
+ source_code_uri: https://github.com/cardmagic/classifier
168
+ bug_tracker_uri: https://github.com/cardmagic/classifier/issues
169
+ changelog_uri: https://github.com/cardmagic/classifier/releases
108
170
  rdoc_options: []
109
171
  require_paths:
110
172
  - lib
@@ -119,8 +181,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
119
181
  - !ruby/object:Gem::Version
120
182
  version: '0'
121
183
  requirements: []
122
- rubygems_version: 3.5.9
123
- signing_key:
184
+ rubygems_version: 4.0.3
124
185
  specification_version: 4
125
186
  summary: A general classifier module to allow Bayesian and other types of classifications.
126
187
  test_files: []
@@ -1,18 +0,0 @@
1
- module GSL
2
- class Vector
3
- def _dump(_v)
4
- Marshal.dump(to_a)
5
- end
6
-
7
- def self._load(arr)
8
- arry = Marshal.load(arr)
9
- GSL::Vector.alloc(arry)
10
- end
11
- end
12
-
13
- class Matrix
14
- class << self
15
- alias diag diagonal
16
- end
17
- end
18
- end