valkey-objects 0.3.7 → 0.3.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/valkey/objects/version.rb +1 -1
- data/lib/valkey/objects.rb +114 -10
- data/valkey-objects.gemspec +2 -1
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 773f8c7d573ecf3a521ccf195d14d669cf627badaf03499fd936670cafe33356
|
4
|
+
data.tar.gz: 9aed29f7bf5bf476f6d6a8d18a3dae5c16828656336df706edd80d08ac4a0cd3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 55c3959e92a716d40bc6813254940023afadf25f6a64bfb26e92f5a73197cf5c75572a7c6eeac08e0e062b5b3a7aab6b9071c818fb35b41f26b9dd8c4ebdd3d3
|
7
|
+
data.tar.gz: 382540a624ce59c47149acfe0455dac4ba6ed8328c7808eb6394f072add9a2d95afbf6bc09069bc15401c674a0c5c4c94b26c996339486a7d15e3754b20f87e3
|
data/lib/valkey/objects.rb
CHANGED
@@ -11,6 +11,7 @@ require 'yaml'
|
|
11
11
|
require 'classifier-reborn'
|
12
12
|
require 'knn'
|
13
13
|
require 'sentimental'
|
14
|
+
require 'tokenizer'
|
14
15
|
|
15
16
|
module VK
|
16
17
|
|
@@ -181,7 +182,23 @@ module VK
|
|
181
182
|
# @obj.myVector.value { |i,e| ... }
|
182
183
|
# @obj.myvector[0] = "An Entry of Text."
|
183
184
|
define_method(:vector) { |k, h={}| define_method(k.to_sym) { VECTOR.new(%[#{xx}:vector:#{k}:#{@id}], h); } };
|
184
|
-
|
185
|
+
##
|
186
|
+
# An ordered list of strings.
|
187
|
+
##
|
188
|
+
# vector :myCorpus
|
189
|
+
# @obj.myCorpus
|
190
|
+
# @obj.myCorpus.value { |i,e| ... }
|
191
|
+
# @obj.myCorpus["An entry"] => 5
|
192
|
+
define_method(:corpus) { |k, h={}| define_method(k.to_sym) { CORPUS.new(%[#{xx}:corpus:#{k}:#{@id}], h); } };
|
193
|
+
##
|
194
|
+
# An ordered list of strings.
|
195
|
+
##
|
196
|
+
# vector :myBook
|
197
|
+
# @obj.myBook
|
198
|
+
# @obj.myBook.value { |index,entry,embedding| ... }
|
199
|
+
# @obj.mybook.near("Some entry") => [...]
|
200
|
+
# @obj.myBook["An entry"] => 5
|
201
|
+
define_method(:book) { |k, h={}| define_method(k.to_sym) { BOOK.new(%[#{xx}:book:#{k}:#{@id}], h); } };
|
185
202
|
end
|
186
203
|
|
187
204
|
def id
|
@@ -614,6 +631,73 @@ module VK
|
|
614
631
|
end
|
615
632
|
end
|
616
633
|
|
634
|
+
class CORPUS < O
|
635
|
+
def _set
|
636
|
+
%[#{key}-sset]
|
637
|
+
end
|
638
|
+
def _index
|
639
|
+
%[#{key}-hash]
|
640
|
+
end
|
641
|
+
def length
|
642
|
+
VK.redis.call("SCARD", _set)
|
643
|
+
end
|
644
|
+
def value &b
|
645
|
+
VK.redis.call("HGETALL", _index).each_pair { |e, i| b.call(i, e) }
|
646
|
+
end
|
647
|
+
def [] i
|
648
|
+
if VK.redis.call("SMISMEMBER", _set, i)[0] == 0
|
649
|
+
VK.redis.call("SADD", _set, i)
|
650
|
+
VK.redis.call("HSET", _index, i, length);
|
651
|
+
end
|
652
|
+
return VK.redis.call("HGET", _index, i)
|
653
|
+
end
|
654
|
+
end
|
655
|
+
|
656
|
+
class BOOK < O
|
657
|
+
def _set
|
658
|
+
%[#{key}-sset]
|
659
|
+
end
|
660
|
+
def _index
|
661
|
+
%[#{key}-index]
|
662
|
+
end
|
663
|
+
def _sec
|
664
|
+
%[#{key}-sec]
|
665
|
+
end
|
666
|
+
def _embed
|
667
|
+
%[#{key}-embed]
|
668
|
+
end
|
669
|
+
def length
|
670
|
+
VK.redis.call("SCARD", _set)
|
671
|
+
end
|
672
|
+
def value &b
|
673
|
+
VK.redis.call("HGETALL", _index).each_pair { |e, i| b.call(i.to_i, e) }
|
674
|
+
end
|
675
|
+
def entry
|
676
|
+
Hash.new { |h,k| VK.redis.call("HGET", _sec, k) }
|
677
|
+
end
|
678
|
+
def vectors
|
679
|
+
a = []
|
680
|
+
value { |i, e| a << [ i, VK.embed(e) ].flatten }
|
681
|
+
return a
|
682
|
+
end
|
683
|
+
def embed
|
684
|
+
return KNN.new(vectors, :distance_measure => :tanimoto_coefficient)
|
685
|
+
end
|
686
|
+
def near g, *n
|
687
|
+
return embed.nearest_neighbours(VK.embed(g)).map { |e|
|
688
|
+
{ index: e[0], distance: e[1], entry: entry[e[0]] }
|
689
|
+
}.sort_by { |ee| ee[:distance] }
|
690
|
+
end
|
691
|
+
def [] i
|
692
|
+
if VK.redis.call("SMISMEMBER", _set, i.to_s)[0] == 0
|
693
|
+
VK.redis.call("HSET", _index, i.to_s, length.to_s);
|
694
|
+
VK.redis.call("HSET", _sec, length.to_s, i.to_s);
|
695
|
+
VK.redis.call("SADD", _set, i)
|
696
|
+
end
|
697
|
+
return VK.redis.call("HGET", _index, i.to_s).to_i;
|
698
|
+
end
|
699
|
+
end
|
700
|
+
|
617
701
|
@@SENTIMENT_THRESHOLD = 0.9
|
618
702
|
##
|
619
703
|
# Bayesean Classification
|
@@ -663,19 +747,30 @@ module VK
|
|
663
747
|
# index[:vector]['Text to classify'] => :index
|
664
748
|
# index[:vector].learn("Text to classify", :index)
|
665
749
|
# index[:vector].search("Similar text.", 5) => ["Similar text?", "other similar text", ...]
|
666
|
-
def self.
|
750
|
+
def self.index
|
667
751
|
@@CRi
|
668
752
|
end
|
669
|
-
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
# vector([["return val", 0, 1, 2, ...], ...]).classify(["guess", 0, 1, 2, ...]) => "return val"
|
675
|
-
def self.cluster vectors, size
|
676
|
-
Knn::Classifier.new(vectors, SquaredEuclideanCalculator)
|
753
|
+
@@VI = Hash.new { |h,k| h[k] = [] }
|
754
|
+
@@VC = Hash.new { |h,k| h[k] = Knn::Classifier.new(@@VI[k], SquaredEuclideanCalculator) }
|
755
|
+
def self.learn fact, index
|
756
|
+
@@CRi[index.to_sym].learn fact, index
|
757
|
+
@@VI[index] << fact
|
677
758
|
end
|
678
759
|
|
760
|
+
@@TOKENIZER = Tokenizer::WhitespaceTokenizer.new(:en)
|
761
|
+
def self.tokenize i
|
762
|
+
@@TOKENIZER.tokenize(i)
|
763
|
+
end
|
764
|
+
|
765
|
+
@@WORDLIST = CORPUS.new(%[WORDLIST])
|
766
|
+
def self.wordlist
|
767
|
+
@@WORDLIST
|
768
|
+
end
|
769
|
+
|
770
|
+
def self.embed i
|
771
|
+
m = VK.tokenize(i).map { |e| VK.wordlist[e].to_f }
|
772
|
+
return [m, Array.new(2048 - m.length, 0.0)].flatten
|
773
|
+
end
|
679
774
|
|
680
775
|
def self.flushdb!
|
681
776
|
VK.redis.call("FLUSHDB")
|
@@ -684,4 +779,13 @@ module VK
|
|
684
779
|
def self.[] k
|
685
780
|
VK.redis.call("KEYS", k)
|
686
781
|
end
|
782
|
+
|
783
|
+
def self.txt f, &b
|
784
|
+
x = File.read(f).gsub(/\n\n+/,"\n\n").gsub(/ +/, " ").split("\n\n").map { |e| e.gsub(/\n/, " ") }
|
785
|
+
if block_given?
|
786
|
+
x.map { |e| b.call(e) }
|
787
|
+
else
|
788
|
+
return x
|
789
|
+
end
|
790
|
+
end
|
687
791
|
end
|
data/valkey-objects.gemspec
CHANGED
@@ -38,8 +38,9 @@ Gem::Specification.new do |spec|
|
|
38
38
|
spec.add_dependency "ruby-duration"
|
39
39
|
spec.add_dependency "classifier-reborn"
|
40
40
|
spec.add_dependency "amatch"
|
41
|
-
spec.add_dependency "
|
41
|
+
spec.add_dependency "naive_bayes"
|
42
42
|
spec.add_dependency "sentimental"
|
43
|
+
spec.add_dependency "tokenizer"
|
43
44
|
|
44
45
|
spec.add_dependency "pry"
|
45
46
|
spec.add_dependency "awesome_print"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: valkey-objects
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Erik Olson
|
@@ -94,7 +94,7 @@ dependencies:
|
|
94
94
|
- !ruby/object:Gem::Version
|
95
95
|
version: '0'
|
96
96
|
- !ruby/object:Gem::Dependency
|
97
|
-
name:
|
97
|
+
name: naive_bayes
|
98
98
|
requirement: !ruby/object:Gem::Requirement
|
99
99
|
requirements:
|
100
100
|
- - ">="
|
@@ -121,6 +121,20 @@ dependencies:
|
|
121
121
|
- - ">="
|
122
122
|
- !ruby/object:Gem::Version
|
123
123
|
version: '0'
|
124
|
+
- !ruby/object:Gem::Dependency
|
125
|
+
name: tokenizer
|
126
|
+
requirement: !ruby/object:Gem::Requirement
|
127
|
+
requirements:
|
128
|
+
- - ">="
|
129
|
+
- !ruby/object:Gem::Version
|
130
|
+
version: '0'
|
131
|
+
type: :runtime
|
132
|
+
prerelease: false
|
133
|
+
version_requirements: !ruby/object:Gem::Requirement
|
134
|
+
requirements:
|
135
|
+
- - ">="
|
136
|
+
- !ruby/object:Gem::Version
|
137
|
+
version: '0'
|
124
138
|
- !ruby/object:Gem::Dependency
|
125
139
|
name: pry
|
126
140
|
requirement: !ruby/object:Gem::Requirement
|