categorize 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/categorize/model.rb +35 -33
  2. metadata +1 -1
@@ -1,49 +1,51 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  module Categorize
4
- MIN_WORD_LENGTH = 3
5
- @bag_of_words = Models::BagOfWords.new
4
+ module Model
5
+ MIN_WORD_LENGTH = 3
6
+ @bag_of_words = Models::BagOfWords.new
6
7
 
7
- class << self
8
- #include Bow
9
- # ==== Return
10
- # Hash - category => results
11
- # ==== Parameters
12
- # documents:: a list of documents to be classified
13
- def make_model(query, documents, topic_model = @bag_of_words)
14
- records_to_tokens = lexicalize(documents)
15
- topic_model.model(query.downcase.strip, records_to_tokens)
16
- end
8
+ class << self
9
+ #include Bow
10
+ # ==== Return
11
+ # Hash - category => results
12
+ # ==== Parameters
13
+ # documents:: a list of documents to be classified
14
+ def make_model(query, documents, modeler = @bag_of_words)
15
+ records_to_tokens = lexicalize(documents)
16
+ modeler.model(query.downcase.strip, records_to_tokens)
17
+ end
17
18
 
18
- # ==== Return
19
- # Hash - category => results
20
- # ==== Parameters
21
- # items:: the items to be classified
22
- def make_model_c(strings)
23
- strings.map { |s| preprocess(s) }
24
- #ret = model_bow(array_of_tokens);
25
- count = 0
26
- ret.reduce({}) do |hash, term|
27
- hash[term] ||= []
28
- hash[term] << count += 1
29
- hash
19
+ # ==== Return
20
+ # Hash - category => results
21
+ # ==== Parameters
22
+ # items:: the items to be classified
23
+ def make_model_c(strings)
24
+ strings.map { |s| preprocess(s) }
25
+ #ret = model_bow(array_of_tokens);
26
+ count = 0
27
+ ret.reduce({}) do |hash, term|
28
+ hash[term] ||= []
29
+ hash[term] << count += 1
30
+ hash
31
+ end
30
32
  end
31
- end
32
33
 
33
- private
34
34
  def lexicalize(strings)
35
35
  Hash[
36
36
  (0..(strings.length - 1)).zip(strings.map { |s| preprocess(s) })
37
37
  ]
38
38
  end
39
39
 
40
- def preprocess(string)
41
- split_lower_strings = string.split(
42
- Constants::Words::SPLIT_REGEX).map(&:downcase)
43
- split_lower_strings.delete_if do |word|
44
- word.length < MIN_WORD_LENGTH ||
45
- Constants::Words::COMMON.include?(word)
40
+ private
41
+ def preprocess(string)
42
+ split_lower_strings = string.split(
43
+ Constants::Words::SPLIT_REGEX).map(&:downcase)
44
+ split_lower_strings.delete_if do |word|
45
+ word.length < MIN_WORD_LENGTH ||
46
+ Constants::Words::COMMON.include?(word)
47
+ end
46
48
  end
47
- end
49
+ end
48
50
  end
49
51
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: categorize
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: