thomas 0.1.4 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/thomas.rb +34 -10
  2. metadata +4 -3
data/lib/thomas.rb CHANGED
@@ -26,11 +26,16 @@ module Adocca
26
26
  @word = word
27
27
  @amount = amount
28
28
  end
29
+ def adjust(n)
30
+ @amount += n
31
+ end
29
32
  def execute
30
33
  model = Module.const_get(@model_name)
31
- instance = model.find_by_corpus_and_word(@corpus, @word)
32
- instance = model.create(:corpus => @corpus, :word => @word) unless instance
33
- instance.update_attribute(:amount, (instance.amount || 0) + @amount)
34
+ if model.connection.update(model.sanitizeSQL(["UPDATE #{model.table_name} SET amount = amount + ? WHERE corpus = ? AND word = ?",
35
+ @amount, @corpus, @word])) == 0
36
+ model.create(:corpus => @corpus, :word => @word, :amount => @amount)
37
+ end
38
+ model.expire_cached_namespace('CachedSupermodel:' + model.name + ':find_by_corpus_and_word:' + [@corpus, @word].inspect)
34
39
  end
35
40
  end
36
41
 
@@ -38,7 +43,7 @@ module Adocca
38
43
 
39
44
  attr_accessor :model
40
45
 
41
- @@updates = []
46
+ @@updates = {}
42
47
 
43
48
  def initialize(model)
44
49
  @model = model
@@ -67,14 +72,24 @@ module Adocca
67
72
 
68
73
  def train(corpus, text, options = {})
69
74
  tokenize(text, options).each do |word|
70
- @@updates << Update.new(@model, corpus, word, 1)
75
+ @@updates[corpus] ||= {}
76
+ if update = @@updates[corpus][word]
77
+ update.adjust(1)
78
+ else
79
+ @@updates[corpus][word] = Update.new(@model, corpus, word, 1)
80
+ end
71
81
  flush_updates
72
82
  end
73
83
  end
74
84
 
75
85
  def untrain(corpus, text, options = {})
76
86
  tokenize(text, options).each do |word|
77
- @@updates << Update.new(@model, corpus, word, -1)
87
+ @@updates[corpus] ||= {}
88
+ if update = @@updates[corpus][word]
89
+ update.adjust(-1)
90
+ else
91
+ @@updates[corpus][word] = Update.new(@model, corpus, word, -1)
92
+ end
78
93
  flush_updates
79
94
  end
80
95
  end
@@ -104,14 +119,23 @@ module Adocca
104
119
 
105
120
  protected
106
121
 
122
+ def updates_needed
123
+ rval = 0
124
+ @@updates.each do |corpus, words|
125
+ rval += words.size
126
+ end
127
+ return rval
128
+ end
129
+
107
130
  def flush_updates
108
- if @@updates.size > 200 || (defined?($FLUSH_THOMAS) && $FLUSH_THOMAS)
131
+ if (defined?($FLUSH_THOMAS) && $FLUSH_THOMAS) || updates_needed > 200
109
132
  retries = 0
110
133
  begin
111
134
  @model.transaction do
112
- my_updates = @@updates.clone
113
- until my_updates.empty?
114
- my_updates.pop.execute
135
+ @@updates.clone.each do |corpus, words|
136
+ words.each do |s, update|
137
+ update.execute
138
+ end
115
139
  end
116
140
  end
117
141
  rescue ActiveRecord::StatementInvalid => e
metadata CHANGED
@@ -1,10 +1,10 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.8.11
2
+ rubygems_version: 0.9.2
3
3
  specification_version: 1
4
4
  name: thomas
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.1.4
7
- date: 2007-01-30 00:00:00 +01:00
6
+ version: 0.1.5
7
+ date: 2007-04-11 00:00:00 +02:00
8
8
  summary: Bayes classifier using memcached
9
9
  require_paths:
10
10
  - lib
@@ -25,6 +25,7 @@ required_ruby_version: !ruby/object:Gem::Version::Requirement
25
25
  platform: ruby
26
26
  signing_key:
27
27
  cert_chain:
28
+ post_install_message:
28
29
  authors:
29
30
  - Adocca Entertainment AB
30
31
  files: