categorize 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/categorize/model.rb +35 -33
  2. metadata +1 -1
@@ -1,49 +1,51 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  module Categorize
4
- MIN_WORD_LENGTH = 3
5
- @bag_of_words = Models::BagOfWords.new
4
+ module Model
5
+ MIN_WORD_LENGTH = 3
6
+ @bag_of_words = Models::BagOfWords.new
6
7
 
7
- class << self
8
- #include Bow
9
- # ==== Return
10
- # Hash - category => results
11
- # ==== Parameters
12
- # documents:: a list of documents to be classified
13
- def make_model(query, documents, topic_model = @bag_of_words)
14
- records_to_tokens = lexicalize(documents)
15
- topic_model.model(query.downcase.strip, records_to_tokens)
16
- end
8
+ class << self
9
+ #include Bow
10
+ # ==== Return
11
+ # Hash - category => results
12
+ # ==== Parameters
13
+ # documents:: a list of documents to be classified
14
+ def make_model(query, documents, modeler = @bag_of_words)
15
+ records_to_tokens = lexicalize(documents)
16
+ modeler.model(query.downcase.strip, records_to_tokens)
17
+ end
17
18
 
18
- # ==== Return
19
- # Hash - category => results
20
- # ==== Parameters
21
- # items:: the items to be classified
22
- def make_model_c(strings)
23
- strings.map { |s| preprocess(s) }
24
- #ret = model_bow(array_of_tokens);
25
- count = 0
26
- ret.reduce({}) do |hash, term|
27
- hash[term] ||= []
28
- hash[term] << count += 1
29
- hash
19
+ # ==== Return
20
+ # Hash - category => results
21
+ # ==== Parameters
22
+ # items:: the items to be classified
23
+ def make_model_c(strings)
24
+ strings.map { |s| preprocess(s) }
25
+ #ret = model_bow(array_of_tokens);
26
+ count = 0
27
+ ret.reduce({}) do |hash, term|
28
+ hash[term] ||= []
29
+ hash[term] << count += 1
30
+ hash
31
+ end
30
32
  end
31
- end
32
33
 
33
- private
34
34
  def lexicalize(strings)
35
35
  Hash[
36
36
  (0..(strings.length - 1)).zip(strings.map { |s| preprocess(s) })
37
37
  ]
38
38
  end
39
39
 
40
- def preprocess(string)
41
- split_lower_strings = string.split(
42
- Constants::Words::SPLIT_REGEX).map(&:downcase)
43
- split_lower_strings.delete_if do |word|
44
- word.length < MIN_WORD_LENGTH ||
45
- Constants::Words::COMMON.include?(word)
40
+ private
41
+ def preprocess(string)
42
+ split_lower_strings = string.split(
43
+ Constants::Words::SPLIT_REGEX).map(&:downcase)
44
+ split_lower_strings.delete_if do |word|
45
+ word.length < MIN_WORD_LENGTH ||
46
+ Constants::Words::COMMON.include?(word)
47
+ end
46
48
  end
47
- end
49
+ end
48
50
  end
49
51
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: categorize
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: