thomas 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/thomas.rb +34 -10
  2. metadata +4 -3
data/lib/thomas.rb CHANGED
@@ -26,11 +26,16 @@ module Adocca
26
26
  @word = word
27
27
  @amount = amount
28
28
  end
29
+ def adjust(n)
30
+ @amount += n
31
+ end
29
32
  def execute
30
33
  model = Module.const_get(@model_name)
31
- instance = model.find_by_corpus_and_word(@corpus, @word)
32
- instance = model.create(:corpus => @corpus, :word => @word) unless instance
33
- instance.update_attribute(:amount, (instance.amount || 0) + @amount)
34
+ if model.connection.update(model.sanitizeSQL(["UPDATE #{model.table_name} SET amount = amount + ? WHERE corpus = ? AND word = ?",
35
+ @amount, @corpus, @word])) == 0
36
+ model.create(:corpus => @corpus, :word => @word, :amount => @amount)
37
+ end
38
+ model.expire_cached_namespace('CachedSupermodel:' + model.name + ':find_by_corpus_and_word:' + [@corpus, @word].inspect)
34
39
  end
35
40
  end
36
41
 
@@ -38,7 +43,7 @@ module Adocca
38
43
 
39
44
  attr_accessor :model
40
45
 
41
- @@updates = []
46
+ @@updates = {}
42
47
 
43
48
  def initialize(model)
44
49
  @model = model
@@ -67,14 +72,24 @@ module Adocca
67
72
 
68
73
  def train(corpus, text, options = {})
69
74
  tokenize(text, options).each do |word|
70
- @@updates << Update.new(@model, corpus, word, 1)
75
+ @@updates[corpus] ||= {}
76
+ if update = @@updates[corpus][word]
77
+ update.adjust(1)
78
+ else
79
+ @@updates[corpus][word] = Update.new(@model, corpus, word, 1)
80
+ end
71
81
  flush_updates
72
82
  end
73
83
  end
74
84
 
75
85
  def untrain(corpus, text, options = {})
76
86
  tokenize(text, options).each do |word|
77
- @@updates << Update.new(@model, corpus, word, -1)
87
+ @@updates[corpus] ||= {}
88
+ if update = @@updates[corpus][word]
89
+ update.adjust(-1)
90
+ else
91
+ @@updates[corpus][word] = Update.new(@model, corpus, word, -1)
92
+ end
78
93
  flush_updates
79
94
  end
80
95
  end
@@ -104,14 +119,23 @@ module Adocca
104
119
 
105
120
  protected
106
121
 
122
+ def updates_needed
123
+ rval = 0
124
+ @@updates.each do |corpus, words|
125
+ rval += words.size
126
+ end
127
+ return rval
128
+ end
129
+
107
130
  def flush_updates
108
- if @@updates.size > 200 || (defined?($FLUSH_THOMAS) && $FLUSH_THOMAS)
131
+ if (defined?($FLUSH_THOMAS) && $FLUSH_THOMAS) || updates_needed > 200
109
132
  retries = 0
110
133
  begin
111
134
  @model.transaction do
112
- my_updates = @@updates.clone
113
- until my_updates.empty?
114
- my_updates.pop.execute
135
+ @@updates.clone.each do |corpus, words|
136
+ words.each do |s, update|
137
+ update.execute
138
+ end
115
139
  end
116
140
  end
117
141
  rescue ActiveRecord::StatementInvalid => e
metadata CHANGED
@@ -1,10 +1,10 @@
1
1
  --- !ruby/object:Gem::Specification
2
- rubygems_version: 0.8.11
2
+ rubygems_version: 0.9.2
3
3
  specification_version: 1
4
4
  name: thomas
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.1.4
7
- date: 2007-01-30 00:00:00 +01:00
6
+ version: 0.1.5
7
+ date: 2007-04-11 00:00:00 +02:00
8
8
  summary: Bayes classifier using memcached
9
9
  require_paths:
10
10
  - lib
@@ -25,6 +25,7 @@ required_ruby_version: !ruby/object:Gem::Version::Requirement
25
25
  platform: ruby
26
26
  signing_key:
27
27
  cert_chain:
28
+ post_install_message:
28
29
  authors:
29
30
  - Adocca Entertainment AB
30
31
  files: