lite 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/lite/vmm.rb +51 -28
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 618acdff29331d957b2efd4c4774b7681938296d
|
4
|
+
data.tar.gz: c64f0d41f19ad4427974577d4c16a5baf75c7780
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 36d9bdfa99e5eaeed2986141866e9824863f3a34a952395f00a143fc51722122e7c893c0979a70ba8a5a64cc5b126e9a9cbd053933fd9c1babc008bbc782642e
|
7
|
+
data.tar.gz: 86193ff687c02a9c1cb80e849a294b6766fc1d324a2207f36679206daab77853394f7ec6a2102ee8f0451436b47d5913ce3cfd1b2cf723f9f3396261066a92ba
|
data/lib/lite/vmm.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require "
|
1
|
+
require "msgpack"
|
2
2
|
|
3
3
|
module VMM
|
4
4
|
|
@@ -6,50 +6,51 @@ class PPM
|
|
6
6
|
|
7
7
|
def initialize ab,d=5
|
8
8
|
@trie = Trie.new
|
9
|
-
@ab = ab
|
9
|
+
@ab = ABet.new ab
|
10
10
|
@d = d
|
11
11
|
end
|
12
12
|
|
13
13
|
def learn str
|
14
|
-
(str.size - @d-1 ).times{|i| @trie.grow(str[i..i+@d-1],str[i+@d]) }
|
14
|
+
(str.size - @d-1 ).times{|i| @trie.grow(str[i..i+@d-1].chars.map{|sym| @ab.sym_to_i( sym )}, @ab.sym_to_i( str[i+@d] ) ) }
|
15
15
|
end
|
16
16
|
|
17
17
|
def log_eval str
|
18
18
|
(str.size - @d).times.inject(0.0) do |agg,i|
|
19
|
-
agg += path_predict( str[@d], @trie.path( str[i..i+@d-1] ) )
|
19
|
+
agg += path_predict( @ab.sym_to_i(str[@d]) , @trie.path( str[i..i+@d-1].chars.map{|sym| @ab.sym_to_i(sym)} ) )
|
20
20
|
end
|
21
|
-
|
22
21
|
end
|
23
22
|
|
24
23
|
def path_predict sym, path
|
25
24
|
path.reverse.inject( 0.0 ) do |agg, context|
|
26
25
|
agg += Math.log( single_pr( sym, context ), 2.0 )
|
27
|
-
break( agg ) if context===@trie.root or !context[
|
26
|
+
break( agg ) if context===@trie.root or !context[0].has_key?( sym )
|
28
27
|
agg
|
29
28
|
end
|
30
29
|
end
|
31
30
|
|
32
31
|
def single_pr sym, context
|
33
|
-
( context[
|
34
|
-
(context[
|
32
|
+
( context[1].has_key?( sym ) ? context[1][sym] : context[1].size ) /
|
33
|
+
(context[1].values.inject(:+) + context[1].size).to_f
|
35
34
|
end
|
36
35
|
|
37
|
-
|
38
|
-
|
39
|
-
|
36
|
+
def to_file file_path
|
37
|
+
msg = { 'trie' => @trie.root, 'ab' => @ab.sym_arr, 'd' => @d }.to_msgpack
|
38
|
+
out = File.new( file_path, "w" )
|
39
|
+
out.print msg
|
40
|
+
out.close
|
40
41
|
end
|
41
42
|
|
42
|
-
def self.
|
43
|
-
model =
|
44
|
-
ppm = PPM.new( model[
|
45
|
-
ppm.instance_variable_set( :@trie, Trie.new( model[
|
43
|
+
def self.from_file file_path
|
44
|
+
model = MessagePack.unpack( File.new( file_path,"r").readlines.join )
|
45
|
+
ppm = PPM.new( model['ab'],model['d'] )
|
46
|
+
ppm.instance_variable_set( :@trie, Trie.new( model['trie'] ) )
|
46
47
|
ppm
|
47
48
|
end
|
48
49
|
end
|
49
50
|
|
50
51
|
class Trie
|
51
52
|
def initialize(root=nil)
|
52
|
-
@root = root.nil? ? (
|
53
|
+
@root = root.nil? ? ( [{}, {} ]) : root
|
53
54
|
end
|
54
55
|
|
55
56
|
def root
|
@@ -58,29 +59,51 @@ class Trie
|
|
58
59
|
|
59
60
|
def grow(context, symbol)
|
60
61
|
node = @root
|
61
|
-
@root[
|
62
|
-
@root[
|
63
|
-
context.
|
64
|
-
node[
|
65
|
-
node[
|
66
|
-
node[
|
67
|
-
node = node[
|
62
|
+
@root[1][symbol]||=0
|
63
|
+
@root[1][symbol]+=1
|
64
|
+
context.each do |ch|
|
65
|
+
node[0][ch] ||= new_node symbol
|
66
|
+
node[1][symbol]||=0
|
67
|
+
node[1][symbol] += 1
|
68
|
+
node = node[0][ch]
|
68
69
|
end
|
69
70
|
true
|
70
71
|
end
|
71
72
|
|
72
|
-
def path
|
73
|
-
|
74
|
-
next(agg) unless agg.last[
|
75
|
-
agg << agg.last[
|
73
|
+
def path sym_arr
|
74
|
+
sym_arr.inject([@root]) do |agg,ch|
|
75
|
+
next(agg) unless agg.last[0][ch]
|
76
|
+
agg << agg.last[0][ch]
|
76
77
|
agg
|
77
78
|
end
|
78
79
|
end
|
79
80
|
|
80
81
|
def new_node( v )
|
81
|
-
{
|
82
|
+
[{}, { v => 1}]
|
82
83
|
end
|
83
84
|
end
|
84
85
|
|
86
|
+
class ABet
|
87
|
+
def initialize sym_arr
|
88
|
+
@ab = sym_arr
|
89
|
+
@sym_to_i = Hash[ sym_arr.zip( (0..sym_arr.size-1).to_a ) ]
|
90
|
+
end
|
91
|
+
|
92
|
+
def sym_to_i sym
|
93
|
+
@sym_to_i[ sym ] || @ab.size
|
94
|
+
end
|
95
|
+
|
96
|
+
def i_to_sym id
|
97
|
+
@ab[ id ] || @ab[ size ]
|
98
|
+
end
|
99
|
+
|
100
|
+
def sym_arr
|
101
|
+
@ab
|
102
|
+
end
|
103
|
+
|
104
|
+
def size
|
105
|
+
@ab.size + 1
|
106
|
+
end
|
107
|
+
end
|
85
108
|
|
86
109
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lite
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ronbee
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-08-
|
11
|
+
date: 2013-08-28 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: 'lite machine learning tools: clasifier, annotator, and more'
|
14
14
|
email: void@mailinator.com
|