thomas 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/thomas.rb +34 -10
- metadata +4 -3
data/lib/thomas.rb
CHANGED
@@ -26,11 +26,16 @@ module Adocca
|
|
26
26
|
@word = word
|
27
27
|
@amount = amount
|
28
28
|
end
|
29
|
+
def adjust(n)
|
30
|
+
@amount += n
|
31
|
+
end
|
29
32
|
def execute
|
30
33
|
model = Module.const_get(@model_name)
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
+
if model.connection.update(model.sanitizeSQL(["UPDATE #{model.table_name} SET amount = amount + ? WHERE corpus = ? AND word = ?",
|
35
|
+
@amount, @corpus, @word])) == 0
|
36
|
+
model.create(:corpus => @corpus, :word => @word, :amount => @amount)
|
37
|
+
end
|
38
|
+
model.expire_cached_namespace('CachedSupermodel:' + model.name + ':find_by_corpus_and_word:' + [@corpus, @word].inspect)
|
34
39
|
end
|
35
40
|
end
|
36
41
|
|
@@ -38,7 +43,7 @@ module Adocca
|
|
38
43
|
|
39
44
|
attr_accessor :model
|
40
45
|
|
41
|
-
@@updates =
|
46
|
+
@@updates = {}
|
42
47
|
|
43
48
|
def initialize(model)
|
44
49
|
@model = model
|
@@ -67,14 +72,24 @@ module Adocca
|
|
67
72
|
|
68
73
|
def train(corpus, text, options = {})
|
69
74
|
tokenize(text, options).each do |word|
|
70
|
-
@@updates
|
75
|
+
@@updates[corpus] ||= {}
|
76
|
+
if update = @@updates[corpus][word]
|
77
|
+
update.adjust(1)
|
78
|
+
else
|
79
|
+
@@updates[corpus][word] = Update.new(@model, corpus, word, 1)
|
80
|
+
end
|
71
81
|
flush_updates
|
72
82
|
end
|
73
83
|
end
|
74
84
|
|
75
85
|
def untrain(corpus, text, options = {})
|
76
86
|
tokenize(text, options).each do |word|
|
77
|
-
@@updates
|
87
|
+
@@updates[corpus] ||= {}
|
88
|
+
if update = @@updates[corpus][word]
|
89
|
+
update.adjust(-1)
|
90
|
+
else
|
91
|
+
@@updates[corpus][word] = Update.new(@model, corpus, word, -1)
|
92
|
+
end
|
78
93
|
flush_updates
|
79
94
|
end
|
80
95
|
end
|
@@ -104,14 +119,23 @@ module Adocca
|
|
104
119
|
|
105
120
|
protected
|
106
121
|
|
122
|
+
def updates_needed
|
123
|
+
rval = 0
|
124
|
+
@@updates.each do |corpus, words|
|
125
|
+
rval += words.size
|
126
|
+
end
|
127
|
+
return rval
|
128
|
+
end
|
129
|
+
|
107
130
|
def flush_updates
|
108
|
-
if
|
131
|
+
if (defined?($FLUSH_THOMAS) && $FLUSH_THOMAS) || updates_needed > 200
|
109
132
|
retries = 0
|
110
133
|
begin
|
111
134
|
@model.transaction do
|
112
|
-
|
113
|
-
|
114
|
-
|
135
|
+
@@updates.clone.each do |corpus, words|
|
136
|
+
words.each do |s, update|
|
137
|
+
update.execute
|
138
|
+
end
|
115
139
|
end
|
116
140
|
end
|
117
141
|
rescue ActiveRecord::StatementInvalid => e
|
metadata
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.
|
2
|
+
rubygems_version: 0.9.2
|
3
3
|
specification_version: 1
|
4
4
|
name: thomas
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.1.
|
7
|
-
date: 2007-
|
6
|
+
version: 0.1.5
|
7
|
+
date: 2007-04-11 00:00:00 +02:00
|
8
8
|
summary: Bayes classifier using memcached
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -25,6 +25,7 @@ required_ruby_version: !ruby/object:Gem::Version::Requirement
|
|
25
25
|
platform: ruby
|
26
26
|
signing_key:
|
27
27
|
cert_chain:
|
28
|
+
post_install_message:
|
28
29
|
authors:
|
29
30
|
- Adocca Entertainment AB
|
30
31
|
files:
|