classifier 1.4.2 → 1.4.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8c8010bf5648dd9ca16e4c15e44c627c295978440a15e8ce2bd8314a91a050f7
4
- data.tar.gz: 168a80242021c3153b405a7102e480a76a66916eedb628e3d90a150bbe3f69a3
3
+ metadata.gz: f5bbc7714c3f1b5b6bf2b81484e071eb34c5510455d9520f8bd743ffe25a3bb6
4
+ data.tar.gz: f9236a1e0c086e1bda93645d94ea21f6634b76acbb4c99e62709e1b011311509
5
5
  SHA512:
6
- metadata.gz: a3fb835c78cfa325c5008a88e3202a92df188cc0a870ce7e09a333040f2bd272c7be15747d5c7d055edfbeba6fedf7a5322d05b36e1da3bef53ca05696b12303
7
- data.tar.gz: cd3dd6304aa20a2eb251db1e6d3c5f28cd93860ca463bdc6b65af1dca7d42ee037e561d2c41ad3f8d0203fd2ba2900203e56e6fed8f0bda54f525b62cb72cc4c
6
+ metadata.gz: 79596fac37a6591587859335d4dbb280f038c1504a6fba9a48f7f3d5c83c50bee959887a827588d7418503ff141bff81401125604ddc104af35863dd84842e28
7
+ data.tar.gz: 5cd28357e92c65e10630700a65097350030e401f16d4aacaaf9407e8bd8dd2d200739965bab52d547054a90bfe25ab53935e6544f06ebe3f61d356cc972d8ead
@@ -6,5 +6,7 @@ require 'fast_stemmer'
6
6
  require 'classifier/extensions/word_hash'
7
7
 
8
8
  class Object
9
- def prepare_category_name = to_s.gsub('_', ' ').capitalize.intern
9
+ def prepare_category_name
10
+ to_s.gsub('_', ' ').capitalize.intern
11
+ end
10
12
  end
@@ -2,6 +2,8 @@
2
2
  # Copyright:: Copyright (c) 2005 Lucas Carlson
3
3
  # License:: LGPL
4
4
 
5
+ require 'set'
6
+
5
7
  # These are extensions to the String class to provide convenience
6
8
  # methods for the Classifier package.
7
9
  class String
@@ -45,86 +47,86 @@ class String
45
47
  d
46
48
  end
47
49
 
48
- CORPUS_SKIP_WORDS = Set.new(%w[
49
- a
50
- again
51
- all
52
- along
53
- are
54
- also
55
- an
56
- and
57
- as
58
- at
59
- but
60
- by
61
- came
62
- can
63
- cant
64
- couldnt
65
- did
66
- didn
67
- didnt
68
- do
69
- doesnt
70
- dont
71
- ever
72
- first
73
- from
74
- have
75
- her
76
- here
77
- him
78
- how
79
- i
80
- if
81
- in
82
- into
83
- is
84
- isnt
85
- it
86
- itll
87
- just
88
- last
89
- least
90
- like
91
- most
92
- my
93
- new
94
- no
95
- not
96
- now
97
- of
98
- on
99
- or
100
- should
101
- sinc
102
- so
103
- some
104
- th
105
- than
106
- this
107
- that
108
- the
109
- their
110
- then
111
- those
112
- to
113
- told
114
- too
115
- true
116
- try
117
- until
118
- url
119
- us
120
- were
121
- when
122
- whether
123
- while
124
- with
125
- within
126
- yes
127
- you
128
- youll
129
- ])
50
+ CORPUS_SKIP_WORDS = ::Set.new(%w[
51
+ a
52
+ again
53
+ all
54
+ along
55
+ are
56
+ also
57
+ an
58
+ and
59
+ as
60
+ at
61
+ but
62
+ by
63
+ came
64
+ can
65
+ cant
66
+ couldnt
67
+ did
68
+ didn
69
+ didnt
70
+ do
71
+ doesnt
72
+ dont
73
+ ever
74
+ first
75
+ from
76
+ have
77
+ her
78
+ here
79
+ him
80
+ how
81
+ i
82
+ if
83
+ in
84
+ into
85
+ is
86
+ isnt
87
+ it
88
+ itll
89
+ just
90
+ last
91
+ least
92
+ like
93
+ most
94
+ my
95
+ new
96
+ no
97
+ not
98
+ now
99
+ of
100
+ on
101
+ or
102
+ should
103
+ sinc
104
+ so
105
+ some
106
+ th
107
+ than
108
+ this
109
+ that
110
+ the
111
+ their
112
+ then
113
+ those
114
+ to
115
+ told
116
+ too
117
+ true
118
+ try
119
+ until
120
+ url
121
+ us
122
+ were
123
+ when
124
+ whether
125
+ while
126
+ with
127
+ within
128
+ yes
129
+ you
130
+ youll
131
+ ])
130
132
  end
@@ -45,10 +45,11 @@ module Classifier
45
45
 
46
46
  # Perform the scaling transform
47
47
  total_words = $GSL ? vec.sum : vec.sum_with_identity
48
+ total_unique_words = vec.count { |word| word != 0 }
48
49
 
49
50
  # Perform first-order association transform if this vector has more
50
51
  # than one word in it.
51
- if total_words > 1.0
52
+ if total_words > 1.0 && total_unique_words > 1
52
53
  weighted_total = 0.0
53
54
 
54
55
  vec.each do |term|
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: classifier
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.2
4
+ version: 1.4.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Lucas Carlson