categorize 0.0.11 → 0.0.12

Sign up to get free protection for your applications and to get access to all the features.
@@ -12,7 +12,7 @@ VALUE Categorize = Qnil;
12
12
  VALUE CBagOfWords = Qnil;
13
13
  VALUE Models = Qnil;
14
14
 
15
- static VALUE method_make_model(VALUE, VALUE);
15
+ static VALUE method_model(VALUE, VALUE);
16
16
  static int add_or_update_gram_from_index(int, char *);
17
17
 
18
18
  // Store all grams, used in compare_top_grams.
@@ -25,7 +25,7 @@ void Init_categorize()
25
25
  Categorize = rb_define_module("Categorize");
26
26
  Models = rb_define_module_under(Categorize, "Models");
27
27
  CBagOfWords = rb_define_class_under(Models, "CBagOfWords", rb_cObject);
28
- rb_define_method(CBagOfWords, "make_model", method_make_model, 1);
28
+ rb_define_method(CBagOfWords, "model", method_model, 1);
29
29
  }
30
30
 
31
31
  const bool DEBUG = false;
@@ -122,13 +122,13 @@ int compare_top_grams(const void *idx1, const void *idx2)
122
122
  }
123
123
 
124
124
  /*
125
- * make_model(array_of_tokens);
125
+ * model(array_of_tokens);
126
126
  * ==== Return
127
127
  * Top terms
128
128
  * ==== Parameters
129
129
  * array_of_tokens: Tokens to turn into grams and extract phrases from.
130
130
  */
131
- static VALUE method_make_model(VALUE self, VALUE array_of_tokens)
131
+ static VALUE method_model(VALUE self, VALUE array_of_tokens)
132
132
  {
133
133
  int i, j;
134
134
  long array_of_tokens_len = RARRAY_LEN(array_of_tokens);
@@ -1,7 +1,7 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  # The C extension is listed first.
4
- require 'categorize/categorize' unless ENV['NO_C_INCLUDE']
4
+ require 'categorize/categorize'
5
5
 
6
6
  require 'categorize/models/abstract_model'
7
7
  require 'categorize/models/bag_of_words'
@@ -4,6 +4,7 @@ module Categorize
4
4
  module Model
5
5
  MIN_WORD_LENGTH = 3
6
6
  @bag_of_words = Models::BagOfWords.new
7
+ @c_bag_of_words = Models::CBagOfWords.new
7
8
 
8
9
  class << self
9
10
  #include Bow
@@ -21,12 +22,13 @@ module Categorize
21
22
  # ==== Parameters
22
23
  # items:: the items to be classified
23
24
  def make_model_c(strings)
24
- strings.map { |s| preprocess(s) }
25
- #ret = model_bow(array_of_tokens);
25
+ array_of_tokens = strings.map { |s| preprocess(s) }
26
+ ret = @c_bag_of_words.model(array_of_tokens);
26
27
  count = 0
27
28
  ret.reduce({}) do |hash, term|
28
29
  hash[term] ||= []
29
- hash[term] << count += 1
30
+ hash[term] << count
31
+ count += 1
30
32
  hash
31
33
  end
32
34
  end
@@ -37,15 +39,14 @@ module Categorize
37
39
  ]
38
40
  end
39
41
 
40
- private
41
- def preprocess(string)
42
- split_lower_strings = string.split(
43
- Constants::Words::SPLIT_REGEX).map(&:downcase)
44
- split_lower_strings.delete_if do |word|
45
- word.length < MIN_WORD_LENGTH ||
46
- Constants::Words::COMMON.include?(word)
47
- end
42
+ def preprocess(string)
43
+ split_lower_strings = string.split(
44
+ Constants::Words::SPLIT_REGEX).map(&:downcase)
45
+ split_lower_strings.delete_if do |word|
46
+ word.length < MIN_WORD_LENGTH ||
47
+ Constants::Words::COMMON.include?(word)
48
48
  end
49
+ end
49
50
  end
50
51
  end
51
52
  end
@@ -1,5 +1,5 @@
1
1
  # encoding: utf-8
2
2
 
3
3
  module Categorize
4
- VERSION = '0.0.11'
4
+ VERSION = '0.0.12'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: categorize
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.11
4
+ version: 0.0.12
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -46,9 +46,6 @@ required_ruby_version: !ruby/object:Gem::Requirement
46
46
  - - ! '>='
47
47
  - !ruby/object:Gem::Version
48
48
  version: '0'
49
- segments:
50
- - 0
51
- hash: -3553060524054293255
52
49
  required_rubygems_version: !ruby/object:Gem::Requirement
53
50
  none: false
54
51
  requirements: