RubyGems - classifier - Versions diffs - 2.0.0 → 2.1.0 - Mend

classifier 2.0.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

checksums.yaml +4 -4
data/CLAUDE.md +23 -13
data/README.md +82 -67
data/ext/classifier/classifier_ext.c +25 -0
data/ext/classifier/extconf.rb +15 -0
data/ext/classifier/linalg.h +64 -0
data/ext/classifier/matrix.c +387 -0
data/ext/classifier/svd.c +208 -0
data/ext/classifier/vector.c +319 -0
data/lib/classifier/bayes.rb +253 -33
data/lib/classifier/errors.rb +16 -0
data/lib/classifier/extensions/vector.rb +12 -4
data/lib/classifier/lsi/content_node.rb +5 -5
data/lib/classifier/lsi.rb +439 -141
data/lib/classifier/storage/base.rb +50 -0
data/lib/classifier/storage/file.rb +51 -0
data/lib/classifier/storage/memory.rb +49 -0
data/lib/classifier/storage.rb +9 -0
data/lib/classifier.rb +2 -0
data/sig/vendor/json.rbs +4 -0
data/sig/vendor/mutex_m.rbs +16 -0
data/test/test_helper.rb +2 -0
metadata +36 -5
data/lib/classifier/extensions/vector_serialize.rb +0 -18

data/lib/classifier/extensions/vector.rb CHANGED Viewed

@@ -21,12 +21,20 @@ end
 class Vector
   EPSILON = 1e-10
+  # Cache magnitude since Vector is immutable after creation
+  # Note: We undefine the matrix gem's normalize method first, then redefine it
+  # to provide a more robust implementation that handles zero vectors
+  undef_method :normalize if method_defined?(:normalize)
   def magnitude
-    sum_of_squares = 0.to_r
-    size.times do |i|
-      sum_of_squares += self[i]**2.to_r
+    # Cache magnitude since Vector is immutable after creation
+    @magnitude ||= begin
+      sum_of_squares = 0.to_r
+      size.times do |i|
+        sum_of_squares += self[i]**2.to_r
+      end
+      Math.sqrt(sum_of_squares.to_f)
     end
-    Math.sqrt(sum_of_squares.to_f)
   end
   def normalize

data/lib/classifier/lsi/content_node.rb CHANGED Viewed

@@ -50,8 +50,8 @@ module Classifier
     #
     # @rbs (WordList) -> untyped
     def raw_vector_with(word_list)
-      vec = if Classifier::LSI.gsl_available
-              GSL::Vector.alloc(word_list.size)
+      vec = if Classifier::LSI.native_available?
+              Classifier::LSI.vector_class.alloc(word_list.size)
             else
               Array.new(word_list.size, 0)
             end
@@ -61,8 +61,8 @@ module Classifier
       end
       # Perform the scaling transform
-      total_words = Classifier::LSI.gsl_available ? vec.sum : vec.sum_with_identity
-      vec_array = Classifier::LSI.gsl_available ? vec.to_a : vec
+      total_words = Classifier::LSI.native_available? ? vec.sum : vec.sum_with_identity
+      vec_array = Classifier::LSI.native_available? ? vec.to_a : vec
       total_unique_words = vec_array.count { |word| word != 0 }
       # Perform first-order association transform if this vector has more
@@ -84,7 +84,7 @@ module Classifier
         vec = vec.collect { |val| Math.log(val + 1) / divisor }
       end
-      if Classifier::LSI.gsl_available
+      if Classifier::LSI.native_available?
         @raw_norm   = vec.normalize
         @raw_vector = vec
       else