lite 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/lite/vmm.rb +51 -28
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 618acdff29331d957b2efd4c4774b7681938296d
|
4
|
+
data.tar.gz: c64f0d41f19ad4427974577d4c16a5baf75c7780
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 36d9bdfa99e5eaeed2986141866e9824863f3a34a952395f00a143fc51722122e7c893c0979a70ba8a5a64cc5b126e9a9cbd053933fd9c1babc008bbc782642e
|
7
|
+
data.tar.gz: 86193ff687c02a9c1cb80e849a294b6766fc1d324a2207f36679206daab77853394f7ec6a2102ee8f0451436b47d5913ce3cfd1b2cf723f9f3396261066a92ba
|
data/lib/lite/vmm.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require "
|
1
|
+
require "msgpack"
|
2
2
|
|
3
3
|
module VMM
|
4
4
|
|
@@ -6,50 +6,51 @@ class PPM
|
|
6
6
|
|
7
7
|
def initialize ab,d=5
|
8
8
|
@trie = Trie.new
|
9
|
-
@ab = ab
|
9
|
+
@ab = ABet.new ab
|
10
10
|
@d = d
|
11
11
|
end
|
12
12
|
|
13
13
|
def learn str
|
14
|
-
(str.size - @d-1 ).times{|i| @trie.grow(str[i..i+@d-1],str[i+@d]) }
|
14
|
+
(str.size - @d-1 ).times{|i| @trie.grow(str[i..i+@d-1].chars.map{|sym| @ab.sym_to_i( sym )}, @ab.sym_to_i( str[i+@d] ) ) }
|
15
15
|
end
|
16
16
|
|
17
17
|
def log_eval str
|
18
18
|
(str.size - @d).times.inject(0.0) do |agg,i|
|
19
|
-
agg += path_predict( str[@d], @trie.path( str[i..i+@d-1] ) )
|
19
|
+
agg += path_predict( @ab.sym_to_i(str[@d]) , @trie.path( str[i..i+@d-1].chars.map{|sym| @ab.sym_to_i(sym)} ) )
|
20
20
|
end
|
21
|
-
|
22
21
|
end
|
23
22
|
|
24
23
|
def path_predict sym, path
|
25
24
|
path.reverse.inject( 0.0 ) do |agg, context|
|
26
25
|
agg += Math.log( single_pr( sym, context ), 2.0 )
|
27
|
-
break( agg ) if context===@trie.root or !context[
|
26
|
+
break( agg ) if context===@trie.root or !context[0].has_key?( sym )
|
28
27
|
agg
|
29
28
|
end
|
30
29
|
end
|
31
30
|
|
32
31
|
def single_pr sym, context
|
33
|
-
( context[
|
34
|
-
(context[
|
32
|
+
( context[1].has_key?( sym ) ? context[1][sym] : context[1].size ) /
|
33
|
+
(context[1].values.inject(:+) + context[1].size).to_f
|
35
34
|
end
|
36
35
|
|
37
|
-
|
38
|
-
|
39
|
-
|
36
|
+
def to_file file_path
|
37
|
+
msg = { 'trie' => @trie.root, 'ab' => @ab.sym_arr, 'd' => @d }.to_msgpack
|
38
|
+
out = File.new( file_path, "w" )
|
39
|
+
out.print msg
|
40
|
+
out.close
|
40
41
|
end
|
41
42
|
|
42
|
-
def self.
|
43
|
-
model =
|
44
|
-
ppm = PPM.new( model[
|
45
|
-
ppm.instance_variable_set( :@trie, Trie.new( model[
|
43
|
+
def self.from_file file_path
|
44
|
+
model = MessagePack.unpack( File.new( file_path,"r").readlines.join )
|
45
|
+
ppm = PPM.new( model['ab'],model['d'] )
|
46
|
+
ppm.instance_variable_set( :@trie, Trie.new( model['trie'] ) )
|
46
47
|
ppm
|
47
48
|
end
|
48
49
|
end
|
49
50
|
|
50
51
|
class Trie
|
51
52
|
def initialize(root=nil)
|
52
|
-
@root = root.nil? ? (
|
53
|
+
@root = root.nil? ? ( [{}, {} ]) : root
|
53
54
|
end
|
54
55
|
|
55
56
|
def root
|
@@ -58,29 +59,51 @@ class Trie
|
|
58
59
|
|
59
60
|
def grow(context, symbol)
|
60
61
|
node = @root
|
61
|
-
@root[
|
62
|
-
@root[
|
63
|
-
context.
|
64
|
-
node[
|
65
|
-
node[
|
66
|
-
node[
|
67
|
-
node = node[
|
62
|
+
@root[1][symbol]||=0
|
63
|
+
@root[1][symbol]+=1
|
64
|
+
context.each do |ch|
|
65
|
+
node[0][ch] ||= new_node symbol
|
66
|
+
node[1][symbol]||=0
|
67
|
+
node[1][symbol] += 1
|
68
|
+
node = node[0][ch]
|
68
69
|
end
|
69
70
|
true
|
70
71
|
end
|
71
72
|
|
72
|
-
def path
|
73
|
-
|
74
|
-
next(agg) unless agg.last[
|
75
|
-
agg << agg.last[
|
73
|
+
def path sym_arr
|
74
|
+
sym_arr.inject([@root]) do |agg,ch|
|
75
|
+
next(agg) unless agg.last[0][ch]
|
76
|
+
agg << agg.last[0][ch]
|
76
77
|
agg
|
77
78
|
end
|
78
79
|
end
|
79
80
|
|
80
81
|
def new_node( v )
|
81
|
-
{
|
82
|
+
[{}, { v => 1}]
|
82
83
|
end
|
83
84
|
end
|
84
85
|
|
86
|
+
class ABet
|
87
|
+
def initialize sym_arr
|
88
|
+
@ab = sym_arr
|
89
|
+
@sym_to_i = Hash[ sym_arr.zip( (0..sym_arr.size-1).to_a ) ]
|
90
|
+
end
|
91
|
+
|
92
|
+
def sym_to_i sym
|
93
|
+
@sym_to_i[ sym ] || @ab.size
|
94
|
+
end
|
95
|
+
|
96
|
+
def i_to_sym id
|
97
|
+
@ab[ id ] || @ab[ size ]
|
98
|
+
end
|
99
|
+
|
100
|
+
def sym_arr
|
101
|
+
@ab
|
102
|
+
end
|
103
|
+
|
104
|
+
def size
|
105
|
+
@ab.size + 1
|
106
|
+
end
|
107
|
+
end
|
85
108
|
|
86
109
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lite
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ronbee
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-08-
|
11
|
+
date: 2013-08-28 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: 'lite machine learning tools: clasifier, annotator, and more'
|
14
14
|
email: void@mailinator.com
|