classifier 1.4.2 → 1.4.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8c8010bf5648dd9ca16e4c15e44c627c295978440a15e8ce2bd8314a91a050f7
4
- data.tar.gz: 168a80242021c3153b405a7102e480a76a66916eedb628e3d90a150bbe3f69a3
3
+ metadata.gz: f5bbc7714c3f1b5b6bf2b81484e071eb34c5510455d9520f8bd743ffe25a3bb6
4
+ data.tar.gz: f9236a1e0c086e1bda93645d94ea21f6634b76acbb4c99e62709e1b011311509
5
5
  SHA512:
6
- metadata.gz: a3fb835c78cfa325c5008a88e3202a92df188cc0a870ce7e09a333040f2bd272c7be15747d5c7d055edfbeba6fedf7a5322d05b36e1da3bef53ca05696b12303
7
- data.tar.gz: cd3dd6304aa20a2eb251db1e6d3c5f28cd93860ca463bdc6b65af1dca7d42ee037e561d2c41ad3f8d0203fd2ba2900203e56e6fed8f0bda54f525b62cb72cc4c
6
+ metadata.gz: 79596fac37a6591587859335d4dbb280f038c1504a6fba9a48f7f3d5c83c50bee959887a827588d7418503ff141bff81401125604ddc104af35863dd84842e28
7
+ data.tar.gz: 5cd28357e92c65e10630700a65097350030e401f16d4aacaaf9407e8bd8dd2d200739965bab52d547054a90bfe25ab53935e6544f06ebe3f61d356cc972d8ead
@@ -6,5 +6,7 @@ require 'fast_stemmer'
6
6
  require 'classifier/extensions/word_hash'
7
7
 
8
8
  class Object
9
- def prepare_category_name = to_s.gsub('_', ' ').capitalize.intern
9
+ def prepare_category_name
10
+ to_s.gsub('_', ' ').capitalize.intern
11
+ end
10
12
  end
@@ -2,6 +2,8 @@
2
2
  # Copyright:: Copyright (c) 2005 Lucas Carlson
3
3
  # License:: LGPL
4
4
 
5
+ require 'set'
6
+
5
7
  # These are extensions to the String class to provide convenience
6
8
  # methods for the Classifier package.
7
9
  class String
@@ -45,86 +47,86 @@ class String
45
47
  d
46
48
  end
47
49
 
48
- CORPUS_SKIP_WORDS = Set.new(%w[
49
- a
50
- again
51
- all
52
- along
53
- are
54
- also
55
- an
56
- and
57
- as
58
- at
59
- but
60
- by
61
- came
62
- can
63
- cant
64
- couldnt
65
- did
66
- didn
67
- didnt
68
- do
69
- doesnt
70
- dont
71
- ever
72
- first
73
- from
74
- have
75
- her
76
- here
77
- him
78
- how
79
- i
80
- if
81
- in
82
- into
83
- is
84
- isnt
85
- it
86
- itll
87
- just
88
- last
89
- least
90
- like
91
- most
92
- my
93
- new
94
- no
95
- not
96
- now
97
- of
98
- on
99
- or
100
- should
101
- sinc
102
- so
103
- some
104
- th
105
- than
106
- this
107
- that
108
- the
109
- their
110
- then
111
- those
112
- to
113
- told
114
- too
115
- true
116
- try
117
- until
118
- url
119
- us
120
- were
121
- when
122
- whether
123
- while
124
- with
125
- within
126
- yes
127
- you
128
- youll
129
- ])
50
+ CORPUS_SKIP_WORDS = ::Set.new(%w[
51
+ a
52
+ again
53
+ all
54
+ along
55
+ are
56
+ also
57
+ an
58
+ and
59
+ as
60
+ at
61
+ but
62
+ by
63
+ came
64
+ can
65
+ cant
66
+ couldnt
67
+ did
68
+ didn
69
+ didnt
70
+ do
71
+ doesnt
72
+ dont
73
+ ever
74
+ first
75
+ from
76
+ have
77
+ her
78
+ here
79
+ him
80
+ how
81
+ i
82
+ if
83
+ in
84
+ into
85
+ is
86
+ isnt
87
+ it
88
+ itll
89
+ just
90
+ last
91
+ least
92
+ like
93
+ most
94
+ my
95
+ new
96
+ no
97
+ not
98
+ now
99
+ of
100
+ on
101
+ or
102
+ should
103
+ sinc
104
+ so
105
+ some
106
+ th
107
+ than
108
+ this
109
+ that
110
+ the
111
+ their
112
+ then
113
+ those
114
+ to
115
+ told
116
+ too
117
+ true
118
+ try
119
+ until
120
+ url
121
+ us
122
+ were
123
+ when
124
+ whether
125
+ while
126
+ with
127
+ within
128
+ yes
129
+ you
130
+ youll
131
+ ])
130
132
  end
@@ -45,10 +45,11 @@ module Classifier
45
45
 
46
46
  # Perform the scaling transform
47
47
  total_words = $GSL ? vec.sum : vec.sum_with_identity
48
+ total_unique_words = vec.count { |word| word != 0 }
48
49
 
49
50
  # Perform first-order association transform if this vector has more
50
51
  # than one word in it.
51
- if total_words > 1.0
52
+ if total_words > 1.0 && total_unique_words > 1
52
53
  weighted_total = 0.0
53
54
 
54
55
  vec.each do |term|
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: classifier
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.2
4
+ version: 1.4.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Lucas Carlson