valkey-objects 0.3.7 → 0.3.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6796a90c43779a0ba50c73cdd3d4f3240cd2c311b6e26aad64791d82b69cbbe5
4
- data.tar.gz: d10c65e147d7964afb31163deda067f5d27e67ba1427c150286f34ca29ea2ae4
3
+ metadata.gz: 773f8c7d573ecf3a521ccf195d14d669cf627badaf03499fd936670cafe33356
4
+ data.tar.gz: 9aed29f7bf5bf476f6d6a8d18a3dae5c16828656336df706edd80d08ac4a0cd3
5
5
  SHA512:
6
- metadata.gz: d4ddc3a86f521f6591501e0aea1fd84161332a237a2a5500dfdcdd94df0138efef94fe0581313941eb58030050615b3fd3c7a4c104c3447a715dc6550d462858
7
- data.tar.gz: 9f0593942ebd888802b9484c12ce95ee7edc0119b3e37b24b4df4797f8cf180f60a921e3871cc48f0fea268ad9addfe3d068f4457d9897715e7e4cac49b79b12
6
+ metadata.gz: 55c3959e92a716d40bc6813254940023afadf25f6a64bfb26e92f5a73197cf5c75572a7c6eeac08e0e062b5b3a7aab6b9071c818fb35b41f26b9dd8c4ebdd3d3
7
+ data.tar.gz: 382540a624ce59c47149acfe0455dac4ba6ed8328c7808eb6394f072add9a2d95afbf6bc09069bc15401c674a0c5c4c94b26c996339486a7d15e3754b20f87e3
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Valkey
4
4
  module Objects
5
- VERSION = "0.3.7"
5
+ VERSION = "0.3.9"
6
6
  end
7
7
  end
@@ -11,6 +11,7 @@ require 'yaml'
11
11
  require 'classifier-reborn'
12
12
  require 'knn'
13
13
  require 'sentimental'
14
+ require 'tokenizer'
14
15
 
15
16
  module VK
16
17
 
@@ -181,7 +182,23 @@ module VK
181
182
  # @obj.myVector.value { |i,e| ... }
182
183
  # @obj.myvector[0] = "An Entry of Text."
183
184
  define_method(:vector) { |k, h={}| define_method(k.to_sym) { VECTOR.new(%[#{xx}:vector:#{k}:#{@id}], h); } };
184
-
185
+ ##
186
+ # An ordered list of strings.
187
+ ##
188
+ # vector :myCorpus
189
+ # @obj.myCorpus
190
+ # @obj.myCorpus.value { |i,e| ... }
191
+ # @obj.myCorpus["An entry"] => 5
192
+ define_method(:corpus) { |k, h={}| define_method(k.to_sym) { CORPUS.new(%[#{xx}:corpus:#{k}:#{@id}], h); } };
193
+ ##
194
+ # An ordered list of strings.
195
+ ##
196
+ # vector :myBook
197
+ # @obj.myBook
198
+ # @obj.myBook.value { |index,entry,embedding| ... }
199
+ # @obj.mybook.near("Some entry") => [...]
200
+ # @obj.myBook["An entry"] => 5
201
+ define_method(:book) { |k, h={}| define_method(k.to_sym) { BOOK.new(%[#{xx}:book:#{k}:#{@id}], h); } };
185
202
  end
186
203
 
187
204
  def id
@@ -614,6 +631,73 @@ module VK
614
631
  end
615
632
  end
616
633
 
634
+ class CORPUS < O
635
+ def _set
636
+ %[#{key}-sset]
637
+ end
638
+ def _index
639
+ %[#{key}-hash]
640
+ end
641
+ def length
642
+ VK.redis.call("SCARD", _set)
643
+ end
644
+ def value &b
645
+ VK.redis.call("HGETALL", _index).each_pair { |e, i| b.call(i, e) }
646
+ end
647
+ def [] i
648
+ if VK.redis.call("SMISMEMBER", _set, i)[0] == 0
649
+ VK.redis.call("SADD", _set, i)
650
+ VK.redis.call("HSET", _index, i, length);
651
+ end
652
+ return VK.redis.call("HGET", _index, i)
653
+ end
654
+ end
655
+
656
+ class BOOK < O
657
+ def _set
658
+ %[#{key}-sset]
659
+ end
660
+ def _index
661
+ %[#{key}-index]
662
+ end
663
+ def _sec
664
+ %[#{key}-sec]
665
+ end
666
+ def _embed
667
+ %[#{key}-embed]
668
+ end
669
+ def length
670
+ VK.redis.call("SCARD", _set)
671
+ end
672
+ def value &b
673
+ VK.redis.call("HGETALL", _index).each_pair { |e, i| b.call(i.to_i, e) }
674
+ end
675
+ def entry
676
+ Hash.new { |h,k| VK.redis.call("HGET", _sec, k) }
677
+ end
678
+ def vectors
679
+ a = []
680
+ value { |i, e| a << [ i, VK.embed(e) ].flatten }
681
+ return a
682
+ end
683
+ def embed
684
+ return KNN.new(vectors, :distance_measure => :tanimoto_coefficient)
685
+ end
686
+ def near g, *n
687
+ return embed.nearest_neighbours(VK.embed(g)).map { |e|
688
+ { index: e[0], distance: e[1], entry: entry[e[0]] }
689
+ }.sort_by { |ee| ee[:distance] }
690
+ end
691
+ def [] i
692
+ if VK.redis.call("SMISMEMBER", _set, i.to_s)[0] == 0
693
+ VK.redis.call("HSET", _index, i.to_s, length.to_s);
694
+ VK.redis.call("HSET", _sec, length.to_s, i.to_s);
695
+ VK.redis.call("SADD", _set, i)
696
+ end
697
+ return VK.redis.call("HGET", _index, i.to_s).to_i;
698
+ end
699
+ end
700
+
617
701
  @@SENTIMENT_THRESHOLD = 0.9
618
702
  ##
619
703
  # Bayesean Classification
@@ -663,19 +747,30 @@ module VK
663
747
  # index[:vector]['Text to classify'] => :index
664
748
  # index[:vector].learn("Text to classify", :index)
665
749
  # index[:vector].search("Similar text.", 5) => ["Similar text?", "other similar text", ...]
666
- def self.cluster
750
+ def self.index
667
751
  @@CRi
668
752
  end
669
- ##
670
- # Vector Engine
671
- #
672
- # Return known string based upon nearest neighbor by vector of number.
673
- ##
674
- # vector([["return val", 0, 1, 2, ...], ...]).classify(["guess", 0, 1, 2, ...]) => "return val"
675
- def self.cluster vectors, size
676
- Knn::Classifier.new(vectors, SquaredEuclideanCalculator)
753
+ @@VI = Hash.new { |h,k| h[k] = [] }
754
+ @@VC = Hash.new { |h,k| h[k] = Knn::Classifier.new(@@VI[k], SquaredEuclideanCalculator) }
755
+ def self.learn fact, index
756
+ @@CRi[index.to_sym].learn fact, index
757
+ @@VI[index] << fact
677
758
  end
678
759
 
760
+ @@TOKENIZER = Tokenizer::WhitespaceTokenizer.new(:en)
761
+ def self.tokenize i
762
+ @@TOKENIZER.tokenize(i)
763
+ end
764
+
765
+ @@WORDLIST = CORPUS.new(%[WORDLIST])
766
+ def self.wordlist
767
+ @@WORDLIST
768
+ end
769
+
770
+ def self.embed i
771
+ m = VK.tokenize(i).map { |e| VK.wordlist[e].to_f }
772
+ return [m, Array.new(2048 - m.length, 0.0)].flatten
773
+ end
679
774
 
680
775
  def self.flushdb!
681
776
  VK.redis.call("FLUSHDB")
@@ -684,4 +779,13 @@ module VK
684
779
  def self.[] k
685
780
  VK.redis.call("KEYS", k)
686
781
  end
782
+
783
+ def self.txt f, &b
784
+ x = File.read(f).gsub(/\n\n+/,"\n\n").gsub(/ +/, " ").split("\n\n").map { |e| e.gsub(/\n/, " ") }
785
+ if block_given?
786
+ x.map { |e| b.call(e) }
787
+ else
788
+ return x
789
+ end
790
+ end
687
791
  end
@@ -38,8 +38,9 @@ Gem::Specification.new do |spec|
38
38
  spec.add_dependency "ruby-duration"
39
39
  spec.add_dependency "classifier-reborn"
40
40
  spec.add_dependency "amatch"
41
- spec.add_dependency "knn"
41
+ spec.add_dependency "naive_bayes"
42
42
  spec.add_dependency "sentimental"
43
+ spec.add_dependency "tokenizer"
43
44
 
44
45
  spec.add_dependency "pry"
45
46
  spec.add_dependency "awesome_print"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: valkey-objects
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.7
4
+ version: 0.3.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Erik Olson
@@ -94,7 +94,7 @@ dependencies:
94
94
  - !ruby/object:Gem::Version
95
95
  version: '0'
96
96
  - !ruby/object:Gem::Dependency
97
- name: knn
97
+ name: naive_bayes
98
98
  requirement: !ruby/object:Gem::Requirement
99
99
  requirements:
100
100
  - - ">="
@@ -121,6 +121,20 @@ dependencies:
121
121
  - - ">="
122
122
  - !ruby/object:Gem::Version
123
123
  version: '0'
124
+ - !ruby/object:Gem::Dependency
125
+ name: tokenizer
126
+ requirement: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - ">="
129
+ - !ruby/object:Gem::Version
130
+ version: '0'
131
+ type: :runtime
132
+ prerelease: false
133
+ version_requirements: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - ">="
136
+ - !ruby/object:Gem::Version
137
+ version: '0'
124
138
  - !ruby/object:Gem::Dependency
125
139
  name: pry
126
140
  requirement: !ruby/object:Gem::Requirement