lite 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/lite/vmm.rb +51 -28
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 883e359e761b59f56fa7fd759d9a83b37d61aa4a
4
- data.tar.gz: 8189eebfd8e3aa1ca2b225d6bbcd46cca7c918f3
3
+ metadata.gz: 618acdff29331d957b2efd4c4774b7681938296d
4
+ data.tar.gz: c64f0d41f19ad4427974577d4c16a5baf75c7780
5
5
  SHA512:
6
- metadata.gz: ba14c42f2cbc1fc769de835eb4a6e644a383db5b3edc7dd2bb06ad811851814c266ab359164d08bce90955c0118f245c3c3c2431591a64c2e06e4732923e4a7e
7
- data.tar.gz: 2af2cf2adf7db9f0118cedcf72734e51bf68fe3f37d41d93a2614b7833e403232b77df85b3c066001ece7cba3d60d7f1d131cbb3eb1b881d8cd1ab43ab2703d5
6
+ metadata.gz: 36d9bdfa99e5eaeed2986141866e9824863f3a34a952395f00a143fc51722122e7c893c0979a70ba8a5a64cc5b126e9a9cbd053933fd9c1babc008bbc782642e
7
+ data.tar.gz: 86193ff687c02a9c1cb80e849a294b6766fc1d324a2207f36679206daab77853394f7ec6a2102ee8f0451436b47d5913ce3cfd1b2cf723f9f3396261066a92ba
@@ -1,4 +1,4 @@
1
- require "json"
1
+ require "msgpack"
2
2
 
3
3
  module VMM
4
4
 
@@ -6,50 +6,51 @@ class PPM
6
6
 
7
7
  def initialize ab,d=5
8
8
  @trie = Trie.new
9
- @ab = ab
9
+ @ab = ABet.new ab
10
10
  @d = d
11
11
  end
12
12
 
13
13
  def learn str
14
- (str.size - @d-1 ).times{|i| @trie.grow(str[i..i+@d-1],str[i+@d]) }
14
+ (str.size - @d-1 ).times{|i| @trie.grow(str[i..i+@d-1].chars.map{|sym| @ab.sym_to_i( sym )}, @ab.sym_to_i( str[i+@d] ) ) }
15
15
  end
16
16
 
17
17
  def log_eval str
18
18
  (str.size - @d).times.inject(0.0) do |agg,i|
19
- agg += path_predict( str[@d], @trie.path( str[i..i+@d-1] ) )
19
+ agg += path_predict( @ab.sym_to_i(str[@d]) , @trie.path( str[i..i+@d-1].chars.map{|sym| @ab.sym_to_i(sym)} ) )
20
20
  end
21
-
22
21
  end
23
22
 
24
23
  def path_predict sym, path
25
24
  path.reverse.inject( 0.0 ) do |agg, context|
26
25
  agg += Math.log( single_pr( sym, context ), 2.0 )
27
- break( agg ) if context===@trie.root or !context[:c].has_key?(sym)
26
+ break( agg ) if context===@trie.root or !context[0].has_key?( sym )
28
27
  agg
29
28
  end
30
29
  end
31
30
 
32
31
  def single_pr sym, context
33
- ( context[:v].has_key?( sym ) ? context[:v][sym] : context[:v].size ) /
34
- (context[:v].values.inject(:+) + context[:v].size).to_f
32
+ ( context[1].has_key?( sym ) ? context[1][sym] : context[1].size ) /
33
+ (context[1].values.inject(:+) + context[1].size).to_f
35
34
  end
36
35
 
37
-
38
- def to_json
39
- { :trie => @trie.root, :ab => @ab, :d => @d }.to_json
36
+ def to_file file_path
37
+ msg = { 'trie' => @trie.root, 'ab' => @ab.sym_arr, 'd' => @d }.to_msgpack
38
+ out = File.new( file_path, "w" )
39
+ out.print msg
40
+ out.close
40
41
  end
41
42
 
42
- def self.load json
43
- model = JSON.parse( json, :symbolize_names => true)
44
- ppm = PPM.new( model[:ab],model[:d] )
45
- ppm.instance_variable_set( :@trie, Trie.new( model[:trie] ) )
43
+ def self.from_file file_path
44
+ model = MessagePack.unpack( File.new( file_path,"r").readlines.join )
45
+ ppm = PPM.new( model['ab'],model['d'] )
46
+ ppm.instance_variable_set( :@trie, Trie.new( model['trie'] ) )
46
47
  ppm
47
48
  end
48
49
  end
49
50
 
50
51
  class Trie
51
52
  def initialize(root=nil)
52
- @root = root.nil? ? ({ :c => {}, :v => { } }) : root
53
+ @root = root.nil? ? ( [{}, {} ]) : root
53
54
  end
54
55
 
55
56
  def root
@@ -58,29 +59,51 @@ class Trie
58
59
 
59
60
  def grow(context, symbol)
60
61
  node = @root
61
- @root[:v][symbol]||=0
62
- @root[:v][symbol]+=1
63
- context.each_char do |ch|
64
- node[:c][ch] ||= new_node symbol
65
- node[:v][symbol]||=0
66
- node[:v][symbol] += 1
67
- node = node[:c][ch]
62
+ @root[1][symbol]||=0
63
+ @root[1][symbol]+=1
64
+ context.each do |ch|
65
+ node[0][ch] ||= new_node symbol
66
+ node[1][symbol]||=0
67
+ node[1][symbol] += 1
68
+ node = node[0][ch]
68
69
  end
69
70
  true
70
71
  end
71
72
 
72
- def path str
73
- str.chars.inject([@root]) do |agg,ch|
74
- next(agg) unless agg.last[:c][ch]
75
- agg << agg.last[:c][ch]
73
+ def path sym_arr
74
+ sym_arr.inject([@root]) do |agg,ch|
75
+ next(agg) unless agg.last[0][ch]
76
+ agg << agg.last[0][ch]
76
77
  agg
77
78
  end
78
79
  end
79
80
 
80
81
  def new_node( v )
81
- { :c => {}, :v => { v => 1} }
82
+ [{}, { v => 1}]
82
83
  end
83
84
  end
84
85
 
86
+ class ABet
87
+ def initialize sym_arr
88
+ @ab = sym_arr
89
+ @sym_to_i = Hash[ sym_arr.zip( (0..sym_arr.size-1).to_a ) ]
90
+ end
91
+
92
+ def sym_to_i sym
93
+ @sym_to_i[ sym ] || @ab.size
94
+ end
95
+
96
+ def i_to_sym id
97
+ @ab[ id ] || @ab[ size ]
98
+ end
99
+
100
+ def sym_arr
101
+ @ab
102
+ end
103
+
104
+ def size
105
+ @ab.size + 1
106
+ end
107
+ end
85
108
 
86
109
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lite
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - ronbee
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-08-24 00:00:00.000000000 Z
11
+ date: 2013-08-28 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: 'lite machine learning tools: clasifier, annotator, and more'
14
14
  email: void@mailinator.com