RubyGems - classifier - Versions diffs - 2.0.0 → 2.2.0 - Mend

classifier 2.0.0 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

checksums.yaml +4 -4
data/CLAUDE.md +23 -13
data/README.md +72 -190
data/ext/classifier/classifier_ext.c +26 -0
data/ext/classifier/extconf.rb +15 -0
data/ext/classifier/incremental_svd.c +393 -0
data/ext/classifier/linalg.h +72 -0
data/ext/classifier/matrix.c +387 -0
data/ext/classifier/svd.c +208 -0
data/ext/classifier/vector.c +319 -0
data/lib/classifier/bayes.rb +398 -54
data/lib/classifier/errors.rb +19 -0
data/lib/classifier/extensions/vector.rb +12 -4
data/lib/classifier/knn.rb +351 -0
data/lib/classifier/logistic_regression.rb +571 -0
data/lib/classifier/lsi/content_node.rb +5 -5
data/lib/classifier/lsi/incremental_svd.rb +166 -0
data/lib/classifier/lsi/summary.rb +25 -5
data/lib/classifier/lsi.rb +784 -138
data/lib/classifier/storage/base.rb +50 -0
data/lib/classifier/storage/file.rb +51 -0
data/lib/classifier/storage/memory.rb +49 -0
data/lib/classifier/storage.rb +9 -0
data/lib/classifier/streaming/line_reader.rb +99 -0
data/lib/classifier/streaming/progress.rb +96 -0
data/lib/classifier/streaming.rb +122 -0
data/lib/classifier/tfidf.rb +408 -0
data/lib/classifier.rb +6 -0
data/sig/vendor/json.rbs +4 -0
data/sig/vendor/matrix.rbs +25 -14
data/sig/vendor/mutex_m.rbs +16 -0
data/sig/vendor/streaming.rbs +14 -0
data/test/test_helper.rb +2 -0
metadata +52 -8
data/lib/classifier/extensions/vector_serialize.rb +0 -18

data/lib/classifier/tfidf.rb ADDED Viewed

@@ -0,0 +1,408 @@
+# rbs_inline: enabled
+# Author::    Lucas Carlson  (mailto:lucas@rufy.com)
+# Copyright:: Copyright (c) 2024 Lucas Carlson
+# License::   LGPL
+require 'json'
+module Classifier
+  # TF-IDF vectorizer: transforms text to weighted feature vectors.
+  # Downweights common words, upweights discriminative terms.
+  #
+  # Example:
+  #   tfidf = Classifier::TFIDF.new
+  #   tfidf.fit(["Dogs are great pets", "Cats are independent"])
+  #   tfidf.transform("Dogs are loyal")  # => {:dog=>0.7071..., :loyal=>0.7071...}
+  #
+  class TFIDF
+    include Streaming
+    # @rbs @min_df: Integer | Float
+    # @rbs @max_df: Integer | Float
+    # @rbs @ngram_range: Array[Integer]
+    # @rbs @sublinear_tf: bool
+    # @rbs @vocabulary: Hash[Symbol, Integer]
+    # @rbs @idf: Hash[Symbol, Float]
+    # @rbs @num_documents: Integer
+    # @rbs @fitted: bool
+    # @rbs @dirty: bool
+    # @rbs @storage: Storage::Base?
+    attr_reader :vocabulary, :idf, :num_documents
+    attr_accessor :storage
+    # Creates a new TF-IDF vectorizer.
+    # - min_df/max_df: filter terms by document frequency (Integer for count, Float for proportion)
+    # - ngram_range: [1,1] for unigrams, [1,2] for unigrams+bigrams
+    # - sublinear_tf: use 1 + log(tf) instead of raw term frequency
+    #
+    # @rbs (?min_df: Integer | Float, ?max_df: Integer | Float,
+    #       ?ngram_range: Array[Integer], ?sublinear_tf: bool) -> void
+    def initialize(min_df: 1, max_df: 1.0, ngram_range: [1, 1], sublinear_tf: false)
+      validate_df!(min_df, 'min_df')
+      validate_df!(max_df, 'max_df')
+      validate_ngram_range!(ngram_range)
+      @min_df = min_df
+      @max_df = max_df
+      @ngram_range = ngram_range
+      @sublinear_tf = sublinear_tf
+      @vocabulary = {}
+      @idf = {}
+      @num_documents = 0
+      @fitted = false
+      @dirty = false
+      @storage = nil
+    end
+    # Learns vocabulary and IDF weights from the corpus.
+    # @rbs (Array[String]) -> self
+    def fit(documents)
+      raise ArgumentError, 'documents must be an array' unless documents.is_a?(Array)
+      raise ArgumentError, 'documents cannot be empty' if documents.empty?
+      @num_documents = documents.size
+      document_frequencies = Hash.new(0)
+      documents.each do |doc|
+        terms = extract_terms(doc)
+        terms.each_key { |term| document_frequencies[term] += 1 }
+      end
+      @vocabulary = {}
+      @idf = {}
+      vocab_index = 0
+      document_frequencies.each do |term, df|
+        next unless within_df_bounds?(df, @num_documents)
+        @vocabulary[term] = vocab_index
+        vocab_index += 1
+        # IDF: log((N + 1) / (df + 1)) + 1
+        @idf[term] = Math.log((@num_documents + 1).to_f / (df + 1)) + 1
+      end
+      @fitted = true
+      @dirty = true
+      self
+    end
+    # Transforms a document into a normalized TF-IDF vector.
+    # @rbs (String) -> Hash[Symbol, Float]
+    def transform(document)
+      raise NotFittedError, 'TFIDF has not been fitted. Call fit first.' unless @fitted
+      terms = extract_terms(document)
+      result = {} #: Hash[Symbol, Float]
+      terms.each do |term, tf|
+        next unless @vocabulary.key?(term)
+        tf_value = @sublinear_tf && tf.positive? ? 1 + Math.log(tf) : tf.to_f
+        result[term] = (tf_value * @idf[term]).to_f
+      end
+      normalize_vector(result)
+    end
+    # Fits and transforms in one step.
+    # @rbs (Array[String]) -> Array[Hash[Symbol, Float]]
+    def fit_transform(documents)
+      fit(documents)
+      documents.map { |doc| transform(doc) }
+    end
+    # Returns vocabulary terms in index order.
+    # @rbs () -> Array[Symbol]
+    def feature_names
+      @vocabulary.keys.sort_by { |term| @vocabulary[term] }
+    end
+    # @rbs () -> bool
+    def fitted?
+      @fitted
+    end
+    # Returns true if there are unsaved changes.
+    # @rbs () -> bool
+    def dirty?
+      @dirty
+    end
+    # Saves the vectorizer to the configured storage.
+    # @rbs () -> void
+    def save
+      raise ArgumentError, 'No storage configured' unless storage
+      storage.write(to_json)
+      @dirty = false
+    end
+    # Saves the vectorizer state to a file.
+    # @rbs (String) -> Integer
+    def save_to_file(path)
+      result = File.write(path, to_json)
+      @dirty = false
+      result
+    end
+    # Loads a vectorizer from the configured storage.
+    # @rbs (storage: Storage::Base) -> TFIDF
+    def self.load(storage:)
+      data = storage.read
+      raise StorageError, 'No saved state found' unless data
+      instance = from_json(data)
+      instance.storage = storage
+      instance
+    end
+    # Loads a vectorizer from a file.
+    # @rbs (String) -> TFIDF
+    def self.load_from_file(path)
+      from_json(File.read(path))
+    end
+    # Reloads the vectorizer from storage, raising if there are unsaved changes.
+    # @rbs () -> self
+    def reload
+      raise ArgumentError, 'No storage configured' unless storage
+      raise UnsavedChangesError, 'Unsaved changes would be lost. Call save first or use reload!' if @dirty
+      data = storage.read
+      raise StorageError, 'No saved state found' unless data
+      restore_from_json(data)
+      @dirty = false
+      self
+    end
+    # Force reloads the vectorizer from storage, discarding any unsaved changes.
+    # @rbs () -> self
+    def reload!
+      raise ArgumentError, 'No storage configured' unless storage
+      data = storage.read
+      raise StorageError, 'No saved state found' unless data
+      restore_from_json(data)
+      @dirty = false
+      self
+    end
+    # @rbs (?untyped) -> Hash[Symbol, untyped]
+    def as_json(_options = nil)
+      {
+        version: 1,
+        type: 'tfidf',
+        min_df: @min_df,
+        max_df: @max_df,
+        ngram_range: @ngram_range,
+        sublinear_tf: @sublinear_tf,
+        vocabulary: @vocabulary,
+        idf: @idf,
+        num_documents: @num_documents,
+        fitted: @fitted
+      }
+    end
+    # @rbs (?untyped) -> String
+    def to_json(_options = nil)
+      JSON.generate(as_json)
+    end
+    # Loads a vectorizer from JSON.
+    # @rbs (String | Hash[String, untyped]) -> TFIDF
+    def self.from_json(json)
+      data = json.is_a?(String) ? JSON.parse(json) : json
+      raise ArgumentError, "Invalid vectorizer type: #{data['type']}" unless data['type'] == 'tfidf'
+      instance = new(
+        min_df: data['min_df'],
+        max_df: data['max_df'],
+        ngram_range: data['ngram_range'],
+        sublinear_tf: data['sublinear_tf']
+      )
+      instance.instance_variable_set(:@vocabulary, symbolize_keys(data['vocabulary']))
+      instance.instance_variable_set(:@idf, symbolize_keys(data['idf']))
+      instance.instance_variable_set(:@num_documents, data['num_documents'])
+      instance.instance_variable_set(:@fitted, data['fitted'])
+      instance.instance_variable_set(:@dirty, false)
+      instance.instance_variable_set(:@storage, nil)
+      instance
+    end
+    # @rbs () -> Array[untyped]
+    def marshal_dump
+      [@min_df, @max_df, @ngram_range, @sublinear_tf, @vocabulary, @idf, @num_documents, @fitted]
+    end
+    # @rbs (Array[untyped]) -> void
+    def marshal_load(data)
+      @min_df, @max_df, @ngram_range, @sublinear_tf, @vocabulary, @idf, @num_documents, @fitted = data
+      @dirty = false
+      @storage = nil
+    end
+    # Loads a vectorizer from a checkpoint.
+    #
+    # @rbs (storage: Storage::Base, checkpoint_id: String) -> TFIDF
+    def self.load_checkpoint(storage:, checkpoint_id:)
+      raise ArgumentError, 'Storage must be File storage for checkpoints' unless storage.is_a?(Storage::File)
+      dir = File.dirname(storage.path)
+      base = File.basename(storage.path, '.*')
+      ext = File.extname(storage.path)
+      checkpoint_path = File.join(dir, "#{base}_checkpoint_#{checkpoint_id}#{ext}")
+      checkpoint_storage = Storage::File.new(path: checkpoint_path)
+      instance = load(storage: checkpoint_storage)
+      instance.storage = storage
+      instance
+    end
+    # Fits the vectorizer from an IO stream.
+    # Collects all documents from the stream, then fits the model.
+    # Note: All documents must be collected in memory for IDF calculation.
+    #
+    # @example Fit from a file
+    #   tfidf.fit_from_stream(File.open('corpus.txt'))
+    #
+    # @example With progress tracking
+    #   tfidf.fit_from_stream(io, batch_size: 500) do |progress|
+    #     puts "#{progress.completed} documents loaded"
+    #   end
+    #
+    # @rbs (IO, ?batch_size: Integer) { (Streaming::Progress) -> void } -> self
+    def fit_from_stream(io, batch_size: Streaming::DEFAULT_BATCH_SIZE)
+      reader = Streaming::LineReader.new(io, batch_size: batch_size)
+      total = reader.estimate_line_count
+      progress = Streaming::Progress.new(total: total)
+      documents = [] #: Array[String]
+      reader.each_batch do |batch|
+        documents.concat(batch)
+        progress.completed += batch.size
+        progress.current_batch += 1
+        yield progress if block_given?
+      end
+      fit(documents) unless documents.empty?
+      self
+    end
+    # TFIDF doesn't support train_from_stream (use fit_from_stream instead).
+    # This method raises NotImplementedError with guidance.
+    #
+    # @rbs (*untyped, **untyped) -> void
+    def train_from_stream(*) # steep:ignore
+      raise NotImplementedError, 'TFIDF uses fit_from_stream instead of train_from_stream'
+    end
+    # TFIDF doesn't support train_batch (use fit instead).
+    # This method raises NotImplementedError with guidance.
+    #
+    # @rbs (*untyped, **untyped) -> void
+    def train_batch(*) # steep:ignore
+      raise NotImplementedError, 'TFIDF uses fit instead of train_batch'
+    end
+    private
+    # Restores vectorizer state from JSON string.
+    # @rbs (String) -> void
+    def restore_from_json(json)
+      data = JSON.parse(json)
+      @min_df = data['min_df']
+      @max_df = data['max_df']
+      @ngram_range = data['ngram_range']
+      @sublinear_tf = data['sublinear_tf']
+      @vocabulary = self.class.send(:symbolize_keys, data['vocabulary'])
+      @idf = self.class.send(:symbolize_keys, data['idf'])
+      @num_documents = data['num_documents']
+      @fitted = data['fitted']
+    end
+    # @rbs (String) -> Hash[Symbol, Integer]
+    def extract_terms(document)
+      result = Hash.new(0)
+      if @ngram_range[0] <= 1
+        word_hash = document.clean_word_hash
+        word_hash.each { |term, count| result[term] += count }
+      end
+      return result if @ngram_range[1] <= 1
+      tokens = tokenize_for_ngrams(document)
+      (2..@ngram_range[1]).each do |n|
+        next if n < @ngram_range[0]
+        generate_ngrams(tokens, n).each { |ngram| result[ngram] += 1 }
+      end
+      result
+    end
+    # @rbs (String) -> Array[String]
+    def tokenize_for_ngrams(document)
+      document
+        .gsub(/[^\w\s]/, '')
+        .split
+        .map(&:downcase)
+        .reject { |w| w.length <= 2 || String::CORPUS_SKIP_WORDS.include?(w) }
+        .map(&:stem)
+    end
+    # @rbs (Array[String], Integer) -> Array[Symbol]
+    def generate_ngrams(tokens, n) # rubocop:disable Naming/MethodParameterName
+      return [] if tokens.size < n
+      tokens.each_cons(n).map { |gram| gram.join('_').intern }
+    end
+    # @rbs (Integer, Integer) -> bool
+    def within_df_bounds?(doc_freq, num_docs)
+      doc_freq.between?(
+        @min_df.is_a?(Float) ? (@min_df * num_docs).ceil : @min_df,
+        @max_df.is_a?(Float) ? (@max_df * num_docs).floor : @max_df
+      )
+    end
+    # @rbs (Hash[Symbol, Float]) -> Hash[Symbol, Float]
+    def normalize_vector(vector)
+      return vector if vector.empty?
+      magnitude = Math.sqrt(vector.values.sum { |v| v * v })
+      return vector if magnitude.zero?
+      vector.transform_values { |v| v / magnitude }
+    end
+    # @rbs (Integer | Float, String) -> void
+    def validate_df!(value, name)
+      raise ArgumentError, "#{name} must be an Integer or Float" unless value.is_a?(Float) || value.is_a?(Integer)
+      raise ArgumentError, "#{name} must be between 0.0 and 1.0" if value.is_a?(Float) && !value.between?(0.0, 1.0)
+      raise ArgumentError, "#{name} must be non-negative" if value.is_a?(Integer) && value.negative?
+    end
+    # @rbs (Array[Integer]) -> void
+    def validate_ngram_range!(range)
+      raise ArgumentError, 'ngram_range must be an array of two integers' unless range.is_a?(Array) && range.size == 2
+      raise ArgumentError, 'ngram_range values must be positive integers' unless range.all?(Integer) && range.all?(&:positive?)
+      raise ArgumentError, 'ngram_range[0] must be <= ngram_range[1]' if range[0] > range[1]
+    end
+    # @rbs (Hash[String, untyped]) -> Hash[Symbol, untyped]
+    def self.symbolize_keys(hash)
+      hash.transform_keys(&:to_sym)
+    end
+    private_class_method :symbolize_keys
+  end
+end

data/lib/classifier.rb CHANGED Viewed

@@ -25,7 +25,13 @@
 # License::   LGPL
 require 'rubygems'
+require 'classifier/errors'
+require 'classifier/storage'
+require 'classifier/streaming'
 require 'classifier/extensions/string'
 require 'classifier/extensions/vector'
 require 'classifier/bayes'
 require 'classifier/lsi'
+require 'classifier/knn'
+require 'classifier/tfidf'
+require 'classifier/logistic_regression'

data/sig/vendor/json.rbs ADDED Viewed

@@ -0,0 +1,4 @@
+module JSON
+  def self.parse: (String source, ?symbolize_names: bool) -> untyped
+  def self.generate: (untyped obj) -> String
+end

data/sig/vendor/matrix.rbs CHANGED Viewed

@@ -1,26 +1,37 @@
 # Type stubs for matrix gem
-class Vector[T]
+# Using untyped elements since our usage is primarily with Floats/Numerics
+class Vector
   EPSILON: Float
-  def self.[]: [T] (*T) -> Vector[T]
+  def self.[]: (*untyped) -> Vector
   def size: () -> Integer
-  def []: (Integer) -> T
+  def []: (Integer) -> untyped
   def magnitude: () -> Float
-  def normalize: () -> Vector[T]
-  def each: () { (T) -> void } -> void
-  def collect: [U] () { (T) -> U } -> Vector[U]
-  def to_a: () -> Array[T]
+  def normalize: () -> Vector
+  def each: () { (untyped) -> void } -> void
+  def collect: () { (untyped) -> untyped } -> Vector
+  def to_a: () -> Array[untyped]
   def *: (untyped) -> untyped
+  def -: (Vector) -> Vector
+  def is_a?: (untyped) -> bool
 end
-class Matrix[T]
-  def self.rows: [T] (Array[Array[T]]) -> Matrix[T]
-  def self.[]: [T] (*Array[T]) -> Matrix[T]
-  def self.diag: (untyped) -> Matrix[untyped]
-  def trans: () -> Matrix[T]
+class Matrix
+  def self.rows: (Array[Array[untyped]]) -> Matrix
+  def self.[]: (*Array[untyped]) -> Matrix
+  def self.diag: (untyped) -> Matrix
+  def self.columns: (Array[Array[untyped]]) -> Matrix
+  def self.empty: (Integer, Integer) -> Matrix
+  def self.zero: (Integer, Integer) -> Matrix
+  def self.vstack: (Matrix, Matrix) -> Matrix
+  def trans: () -> Matrix
+  def transpose: () -> Matrix
   def *: (untyped) -> untyped
   def row_size: () -> Integer
   def column_size: () -> Integer
-  def column: (Integer) -> Vector[T]
-  def SV_decomp: () -> [Matrix[T], Matrix[T], untyped]
+  def row: (Integer) -> Vector
+  def column: (Integer) -> Vector
+  def SV_decomp: () -> [Matrix, Matrix, untyped]
+  def is_a?: (untyped) -> bool
+  def respond_to?: (Symbol) -> bool
 end

data/sig/vendor/mutex_m.rbs ADDED Viewed

@@ -0,0 +1,16 @@
+# Type stubs for mutex_m gem
+module Mutex_m
+  def mu_initialize: () -> void
+  def mu_lock: () -> void
+  def mu_unlock: () -> void
+  def mu_synchronize: [T] () { () -> T } -> T
+  def mu_try_lock: () -> bool
+  def mu_locked?: () -> bool
+  # Aliases
+  alias lock mu_lock
+  alias unlock mu_unlock
+  alias synchronize mu_synchronize
+  alias try_lock mu_try_lock
+  alias locked? mu_locked?
+end

data/sig/vendor/streaming.rbs ADDED Viewed

@@ -0,0 +1,14 @@
+# Type stubs for Streaming module
+# Defines the interface that including classes must implement
+module Classifier
+  # Interface for classes that include Streaming
+  interface _StreamingHost
+    def storage: () -> Storage::Base?
+    def storage=: (Storage::Base?) -> void
+    def save: () -> void
+  end
+  module Streaming : _StreamingHost
+  end
+end

data/test/test_helper.rb CHANGED Viewed

@@ -12,4 +12,6 @@ $LOAD_PATH.unshift("#{File.dirname(__FILE__)}/../lib")
 require 'minitest'
 require 'minitest/autorun'
+require 'tmpdir'
+require 'json'
 require 'classifier'

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: classifier
 version: !ruby/object:Gem::Version
-  version: 2.0.0
+  version: 2.2.0
 platform: ruby
 authors:
 - Lucas Carlson
@@ -107,10 +107,28 @@ dependencies:
     - - ">="
       - !ruby/object:Gem::Version
         version: '0'
-description: A general classifier module to allow Bayesian and other types of classifications.
+- !ruby/object:Gem::Dependency
+  name: rake-compiler
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+description: A Ruby library for text classification featuring Naive Bayes, LSI (Latent
+  Semantic Indexing), Logistic Regression, and k-Nearest Neighbors classifiers. Includes
+  TF-IDF vectorization, streaming/incremental training, pluggable persistence backends,
+  thread safety, and a native C extension for fast LSI operations.
 email: lucas@rufy.com
 executables: []
-extensions: []
+extensions:
+- ext/classifier/extconf.rb
 extra_rdoc_files: []
 files:
 - CLAUDE.md
@@ -118,24 +136,49 @@ files:
 - README.md
 - bin/bayes.rb
 - bin/summarize.rb
+- ext/classifier/classifier_ext.c
+- ext/classifier/extconf.rb
+- ext/classifier/incremental_svd.c
+- ext/classifier/linalg.h
+- ext/classifier/matrix.c
+- ext/classifier/svd.c
+- ext/classifier/vector.c
 - lib/classifier.rb
 - lib/classifier/bayes.rb
+- lib/classifier/errors.rb
 - lib/classifier/extensions/string.rb
 - lib/classifier/extensions/vector.rb
-- lib/classifier/extensions/vector_serialize.rb
 - lib/classifier/extensions/word_hash.rb
+- lib/classifier/knn.rb
+- lib/classifier/logistic_regression.rb
 - lib/classifier/lsi.rb
 - lib/classifier/lsi/content_node.rb
+- lib/classifier/lsi/incremental_svd.rb
 - lib/classifier/lsi/summary.rb
 - lib/classifier/lsi/word_list.rb
+- lib/classifier/storage.rb
+- lib/classifier/storage/base.rb
+- lib/classifier/storage/file.rb
+- lib/classifier/storage/memory.rb
+- lib/classifier/streaming.rb
+- lib/classifier/streaming/line_reader.rb
+- lib/classifier/streaming/progress.rb
+- lib/classifier/tfidf.rb
 - sig/vendor/fast_stemmer.rbs
 - sig/vendor/gsl.rbs
+- sig/vendor/json.rbs
 - sig/vendor/matrix.rbs
+- sig/vendor/mutex_m.rbs
+- sig/vendor/streaming.rbs
 - test/test_helper.rb
-homepage: https://github.com/cardmagic/classifier
+homepage: https://rubyclassifier.com
 licenses:
 - LGPL
-metadata: {}
+metadata:
+  documentation_uri: https://rubyclassifier.com/docs
+  source_code_uri: https://github.com/cardmagic/classifier
+  bug_tracker_uri: https://github.com/cardmagic/classifier/issues
+  changelog_uri: https://github.com/cardmagic/classifier/releases
 rdoc_options: []
 require_paths:
 - lib
@@ -143,7 +186,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      version: '0'
+      version: '3.1'
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
@@ -152,5 +195,6 @@ required_rubygems_version: !ruby/object:Gem::Requirement
 requirements: []
 rubygems_version: 4.0.3
 specification_version: 4
-summary: A general classifier module to allow Bayesian and other types of classifications.
+summary: Text classification with Bayesian, LSI, Logistic Regression, kNN, and TF-IDF
+  vectorization.
 test_files: []

data/lib/classifier/extensions/vector_serialize.rb DELETED Viewed

@@ -1,18 +0,0 @@
-module GSL
-  class Vector
-    def _dump(_v)
-      Marshal.dump(to_a)
-    end
-    def self._load(arr)
-      arry = Marshal.load(arr)
-      GSL::Vector.alloc(arry)
-    end
-  end
-  class Matrix
-    class << self
-      alias diag diagonal
-    end
-  end
-end