categorize 0.0.11 → 0.0.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,7 +12,7 @@ VALUE Categorize = Qnil;
12
12
  VALUE CBagOfWords = Qnil;
13
13
  VALUE Models = Qnil;
14
14
 
15
- static VALUE method_make_model(VALUE, VALUE);
15
+ static VALUE method_model(VALUE, VALUE);
16
16
  static int add_or_update_gram_from_index(int, char *);
17
17
 
18
18
  // Store all grams, used in compare_top_grams.
@@ -25,7 +25,7 @@ void Init_categorize()
25
25
  Categorize = rb_define_module("Categorize");
26
26
  Models = rb_define_module_under(Categorize, "Models");
27
27
  CBagOfWords = rb_define_class_under(Models, "CBagOfWords", rb_cObject);
28
- rb_define_method(CBagOfWords, "make_model", method_make_model, 1);
28
+ rb_define_method(CBagOfWords, "model", method_model, 1);
29
29
  }
30
30
 
31
31
  const bool DEBUG = false;
@@ -122,13 +122,13 @@ int compare_top_grams(const void *idx1, const void *idx2)
122
122
  }
123
123
 
124
124
  /*
125
- * make_model(array_of_tokens);
125
+ * model(array_of_tokens);
126
126
  * ==== Return
127
127
  * Top terms
128
128
  * ==== Parameters
129
129
  * array_of_tokens: Tokens to turn into grams and extract phrases from.
130
130
  */
131
- static VALUE method_make_model(VALUE self, VALUE array_of_tokens)
131
+ static VALUE method_model(VALUE self, VALUE array_of_tokens)
132
132
  {
133
133
  int i, j;
134
134
  long array_of_tokens_len = RARRAY_LEN(array_of_tokens);
@@ -1,7 +1,7 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  # The C extension is listed first.
4
- require 'categorize/categorize' unless ENV['NO_C_INCLUDE']
4
+ require 'categorize/categorize'
5
5
 
6
6
  require 'categorize/models/abstract_model'
7
7
  require 'categorize/models/bag_of_words'
@@ -4,6 +4,7 @@ module Categorize
4
4
  module Model
5
5
  MIN_WORD_LENGTH = 3
6
6
  @bag_of_words = Models::BagOfWords.new
7
+ @c_bag_of_words = Models::CBagOfWords.new
7
8
 
8
9
  class << self
9
10
  #include Bow
@@ -21,12 +22,13 @@ module Categorize
21
22
  # ==== Parameters
22
23
  # items:: the items to be classified
23
24
  def make_model_c(strings)
24
- strings.map { |s| preprocess(s) }
25
- #ret = model_bow(array_of_tokens);
25
+ array_of_tokens = strings.map { |s| preprocess(s) }
26
+ ret = @c_bag_of_words.model(array_of_tokens);
26
27
  count = 0
27
28
  ret.reduce({}) do |hash, term|
28
29
  hash[term] ||= []
29
- hash[term] << count += 1
30
+ hash[term] << count
31
+ count += 1
30
32
  hash
31
33
  end
32
34
  end
@@ -37,15 +39,14 @@ module Categorize
37
39
  ]
38
40
  end
39
41
 
40
- private
41
- def preprocess(string)
42
- split_lower_strings = string.split(
43
- Constants::Words::SPLIT_REGEX).map(&:downcase)
44
- split_lower_strings.delete_if do |word|
45
- word.length < MIN_WORD_LENGTH ||
46
- Constants::Words::COMMON.include?(word)
47
- end
42
+ def preprocess(string)
43
+ split_lower_strings = string.split(
44
+ Constants::Words::SPLIT_REGEX).map(&:downcase)
45
+ split_lower_strings.delete_if do |word|
46
+ word.length < MIN_WORD_LENGTH ||
47
+ Constants::Words::COMMON.include?(word)
48
48
  end
49
+ end
49
50
  end
50
51
  end
51
52
  end
@@ -1,5 +1,5 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  module Categorize
4
- VERSION = '0.0.11'
4
+ VERSION = '0.0.12'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: categorize
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.11
4
+ version: 0.0.12
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -46,9 +46,6 @@ required_ruby_version: !ruby/object:Gem::Requirement
46
46
  - - ! '>='
47
47
  - !ruby/object:Gem::Version
48
48
  version: '0'
49
- segments:
50
- - 0
51
- hash: -3553060524054293255
52
49
  required_rubygems_version: !ruby/object:Gem::Requirement
53
50
  none: false
54
51
  requirements: