classifier-reborn 2.0.4 → 2.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/LICENSE +74 -1
- data/README.markdown +57 -207
- data/data/stopwords/ar +104 -0
- data/data/stopwords/bn +362 -0
- data/data/stopwords/hi +97 -0
- data/data/stopwords/ja +43 -0
- data/data/stopwords/ru +420 -0
- data/data/stopwords/tr +199 -30
- data/data/stopwords/vi +647 -0
- data/data/stopwords/zh +125 -0
- data/lib/classifier-reborn/backends/bayes_memory_backend.rb +77 -0
- data/lib/classifier-reborn/backends/bayes_redis_backend.rb +109 -0
- data/lib/classifier-reborn/backends/no_redis_error.rb +14 -0
- data/lib/classifier-reborn/bayes.rb +141 -65
- data/lib/classifier-reborn/category_namer.rb +6 -4
- data/lib/classifier-reborn/extensions/hasher.rb +22 -39
- data/lib/classifier-reborn/extensions/token_filter/stemmer.rb +24 -0
- data/lib/classifier-reborn/extensions/token_filter/stopword.rb +48 -0
- data/lib/classifier-reborn/extensions/token_filter/symbol.rb +20 -0
- data/lib/classifier-reborn/extensions/tokenizer/token.rb +36 -0
- data/lib/classifier-reborn/extensions/tokenizer/whitespace.rb +28 -0
- data/lib/classifier-reborn/extensions/vector.rb +35 -28
- data/lib/classifier-reborn/extensions/vector_serialize.rb +10 -10
- data/lib/classifier-reborn/extensions/zero_vector.rb +7 -0
- data/lib/classifier-reborn/lsi/cached_content_node.rb +6 -5
- data/lib/classifier-reborn/lsi/content_node.rb +35 -25
- data/lib/classifier-reborn/lsi/summarizer.rb +7 -5
- data/lib/classifier-reborn/lsi/word_list.rb +5 -6
- data/lib/classifier-reborn/lsi.rb +166 -94
- data/lib/classifier-reborn/validators/classifier_validator.rb +170 -0
- data/lib/classifier-reborn/version.rb +3 -1
- data/lib/classifier-reborn.rb +12 -1
- metadata +98 -17
- data/bin/bayes.rb +0 -36
- data/bin/summarize.rb +0 -16
metadata
CHANGED
@@ -1,16 +1,16 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: classifier-reborn
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0
|
4
|
+
version: 2.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Lucas Carlson
|
8
8
|
- Parker Moore
|
9
9
|
- Chase Gilliam
|
10
|
-
autorequire:
|
10
|
+
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date:
|
13
|
+
date: 2022-06-21 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: fast-stemmer
|
@@ -26,6 +26,62 @@ dependencies:
|
|
26
26
|
- - "~>"
|
27
27
|
- !ruby/object:Gem::Version
|
28
28
|
version: '1.0'
|
29
|
+
- !ruby/object:Gem::Dependency
|
30
|
+
name: matrix
|
31
|
+
requirement: !ruby/object:Gem::Requirement
|
32
|
+
requirements:
|
33
|
+
- - "~>"
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: '0.4'
|
36
|
+
type: :runtime
|
37
|
+
prerelease: false
|
38
|
+
version_requirements: !ruby/object:Gem::Requirement
|
39
|
+
requirements:
|
40
|
+
- - "~>"
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: '0.4'
|
43
|
+
- !ruby/object:Gem::Dependency
|
44
|
+
name: minitest
|
45
|
+
requirement: !ruby/object:Gem::Requirement
|
46
|
+
requirements:
|
47
|
+
- - ">="
|
48
|
+
- !ruby/object:Gem::Version
|
49
|
+
version: '0'
|
50
|
+
type: :development
|
51
|
+
prerelease: false
|
52
|
+
version_requirements: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: '0'
|
57
|
+
- !ruby/object:Gem::Dependency
|
58
|
+
name: minitest-reporters
|
59
|
+
requirement: !ruby/object:Gem::Requirement
|
60
|
+
requirements:
|
61
|
+
- - ">="
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: '0'
|
64
|
+
type: :development
|
65
|
+
prerelease: false
|
66
|
+
version_requirements: !ruby/object:Gem::Requirement
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
version: '0'
|
71
|
+
- !ruby/object:Gem::Dependency
|
72
|
+
name: pry
|
73
|
+
requirement: !ruby/object:Gem::Requirement
|
74
|
+
requirements:
|
75
|
+
- - ">="
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
type: :development
|
79
|
+
prerelease: false
|
80
|
+
version_requirements: !ruby/object:Gem::Requirement
|
81
|
+
requirements:
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
84
|
+
version: '0'
|
29
85
|
- !ruby/object:Gem::Dependency
|
30
86
|
name: rake
|
31
87
|
requirement: !ruby/object:Gem::Requirement
|
@@ -55,7 +111,21 @@ dependencies:
|
|
55
111
|
- !ruby/object:Gem::Version
|
56
112
|
version: '0'
|
57
113
|
- !ruby/object:Gem::Dependency
|
58
|
-
name:
|
114
|
+
name: redis
|
115
|
+
requirement: !ruby/object:Gem::Requirement
|
116
|
+
requirements:
|
117
|
+
- - ">="
|
118
|
+
- !ruby/object:Gem::Version
|
119
|
+
version: '0'
|
120
|
+
type: :development
|
121
|
+
prerelease: false
|
122
|
+
version_requirements: !ruby/object:Gem::Requirement
|
123
|
+
requirements:
|
124
|
+
- - ">="
|
125
|
+
- !ruby/object:Gem::Version
|
126
|
+
version: '0'
|
127
|
+
- !ruby/object:Gem::Dependency
|
128
|
+
name: rubocop
|
59
129
|
requirement: !ruby/object:Gem::Requirement
|
60
130
|
requirements:
|
61
131
|
- - ">="
|
@@ -68,14 +138,12 @@ dependencies:
|
|
68
138
|
- - ">="
|
69
139
|
- !ruby/object:Gem::Version
|
70
140
|
version: '0'
|
71
|
-
description:
|
141
|
+
description:
|
72
142
|
email:
|
73
143
|
- lucas@rufy.com
|
74
144
|
- parkrmoore@gmail.com
|
75
145
|
- chase.gilliam@gmail.com
|
76
|
-
executables:
|
77
|
-
- bayes.rb
|
78
|
-
- summarize.rb
|
146
|
+
executables: []
|
79
147
|
extensions: []
|
80
148
|
extra_rdoc_files:
|
81
149
|
- README.markdown
|
@@ -83,8 +151,8 @@ extra_rdoc_files:
|
|
83
151
|
files:
|
84
152
|
- LICENSE
|
85
153
|
- README.markdown
|
86
|
-
-
|
87
|
-
-
|
154
|
+
- data/stopwords/ar
|
155
|
+
- data/stopwords/bn
|
88
156
|
- data/stopwords/ca
|
89
157
|
- data/stopwords/cs
|
90
158
|
- data/stopwords/da
|
@@ -93,31 +161,46 @@ files:
|
|
93
161
|
- data/stopwords/es
|
94
162
|
- data/stopwords/fi
|
95
163
|
- data/stopwords/fr
|
164
|
+
- data/stopwords/hi
|
96
165
|
- data/stopwords/hu
|
97
166
|
- data/stopwords/it
|
167
|
+
- data/stopwords/ja
|
98
168
|
- data/stopwords/nl
|
99
169
|
- data/stopwords/no
|
100
170
|
- data/stopwords/pl
|
101
171
|
- data/stopwords/pt
|
172
|
+
- data/stopwords/ru
|
102
173
|
- data/stopwords/se
|
103
174
|
- data/stopwords/tr
|
175
|
+
- data/stopwords/vi
|
176
|
+
- data/stopwords/zh
|
104
177
|
- lib/classifier-reborn.rb
|
178
|
+
- lib/classifier-reborn/backends/bayes_memory_backend.rb
|
179
|
+
- lib/classifier-reborn/backends/bayes_redis_backend.rb
|
180
|
+
- lib/classifier-reborn/backends/no_redis_error.rb
|
105
181
|
- lib/classifier-reborn/bayes.rb
|
106
182
|
- lib/classifier-reborn/category_namer.rb
|
107
183
|
- lib/classifier-reborn/extensions/hasher.rb
|
184
|
+
- lib/classifier-reborn/extensions/token_filter/stemmer.rb
|
185
|
+
- lib/classifier-reborn/extensions/token_filter/stopword.rb
|
186
|
+
- lib/classifier-reborn/extensions/token_filter/symbol.rb
|
187
|
+
- lib/classifier-reborn/extensions/tokenizer/token.rb
|
188
|
+
- lib/classifier-reborn/extensions/tokenizer/whitespace.rb
|
108
189
|
- lib/classifier-reborn/extensions/vector.rb
|
109
190
|
- lib/classifier-reborn/extensions/vector_serialize.rb
|
191
|
+
- lib/classifier-reborn/extensions/zero_vector.rb
|
110
192
|
- lib/classifier-reborn/lsi.rb
|
111
193
|
- lib/classifier-reborn/lsi/cached_content_node.rb
|
112
194
|
- lib/classifier-reborn/lsi/content_node.rb
|
113
195
|
- lib/classifier-reborn/lsi/summarizer.rb
|
114
196
|
- lib/classifier-reborn/lsi/word_list.rb
|
197
|
+
- lib/classifier-reborn/validators/classifier_validator.rb
|
115
198
|
- lib/classifier-reborn/version.rb
|
116
|
-
homepage: https://github.
|
199
|
+
homepage: https://jekyll.github.io/classifier-reborn/
|
117
200
|
licenses:
|
118
201
|
- LGPL
|
119
202
|
metadata: {}
|
120
|
-
post_install_message:
|
203
|
+
post_install_message:
|
121
204
|
rdoc_options:
|
122
205
|
- "--charset=UTF-8"
|
123
206
|
require_paths:
|
@@ -126,17 +209,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
126
209
|
requirements:
|
127
210
|
- - ">="
|
128
211
|
- !ruby/object:Gem::Version
|
129
|
-
version:
|
212
|
+
version: 2.4.0
|
130
213
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
131
214
|
requirements:
|
132
215
|
- - ">="
|
133
216
|
- !ruby/object:Gem::Version
|
134
217
|
version: '0'
|
135
218
|
requirements: []
|
136
|
-
|
137
|
-
|
138
|
-
signing_key:
|
219
|
+
rubygems_version: 3.3.7
|
220
|
+
signing_key:
|
139
221
|
specification_version: 2
|
140
222
|
summary: A general classifier module to allow Bayesian and other types of classifications.
|
141
223
|
test_files: []
|
142
|
-
has_rdoc: true
|
data/bin/bayes.rb
DELETED
@@ -1,36 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
begin
|
4
|
-
require 'rubygems'
|
5
|
-
require 'classifier'
|
6
|
-
rescue
|
7
|
-
require 'classifier'
|
8
|
-
end
|
9
|
-
|
10
|
-
require 'madeleine'
|
11
|
-
|
12
|
-
m = SnapshotMadeleine.new(File.expand_path("~/.bayes_data")) {
|
13
|
-
ClassifierReborn::Bayes.new 'Interesting', 'Uninteresting'
|
14
|
-
}
|
15
|
-
|
16
|
-
case ARGV[0]
|
17
|
-
when "add"
|
18
|
-
case ARGV[1].downcase
|
19
|
-
when "interesting"
|
20
|
-
m.system.train_interesting File.open(ARGV[2]).read
|
21
|
-
puts "#{ARGV[2]} has been classified as interesting"
|
22
|
-
when "uninteresting"
|
23
|
-
m.system.train_uninteresting File.open(ARGV[2]).read
|
24
|
-
puts "#{ARGV[2]} has been classified as uninteresting"
|
25
|
-
else
|
26
|
-
puts "Invalid category: choose between interesting and uninteresting"
|
27
|
-
exit(1)
|
28
|
-
end
|
29
|
-
when "classify"
|
30
|
-
puts m.system.classify(File.open(ARGV[1]).read)
|
31
|
-
else
|
32
|
-
puts "Invalid option: choose add [category] [file] or clasify [file]"
|
33
|
-
exit(-1)
|
34
|
-
end
|
35
|
-
|
36
|
-
m.take_snapshot
|
data/bin/summarize.rb
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
begin
|
4
|
-
require 'rubygems'
|
5
|
-
require 'classifier'
|
6
|
-
rescue
|
7
|
-
require 'classifier'
|
8
|
-
end
|
9
|
-
|
10
|
-
require 'open-uri'
|
11
|
-
|
12
|
-
num = ARGV[1].to_i
|
13
|
-
num = num < 1 ? 10 : num
|
14
|
-
|
15
|
-
text = open(ARGV.first).read
|
16
|
-
puts text.gsub(/<[^>]+>/,"").gsub(/[\s]+/," ").summary(num)
|