classifier-reborn 2.0.4 → 2.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. checksums.yaml +5 -5
  2. data/LICENSE +74 -1
  3. data/README.markdown +57 -207
  4. data/data/stopwords/ar +104 -0
  5. data/data/stopwords/bn +362 -0
  6. data/data/stopwords/hi +97 -0
  7. data/data/stopwords/ja +43 -0
  8. data/data/stopwords/ru +420 -0
  9. data/data/stopwords/tr +199 -30
  10. data/data/stopwords/vi +647 -0
  11. data/data/stopwords/zh +125 -0
  12. data/lib/classifier-reborn/backends/bayes_memory_backend.rb +77 -0
  13. data/lib/classifier-reborn/backends/bayes_redis_backend.rb +109 -0
  14. data/lib/classifier-reborn/backends/no_redis_error.rb +14 -0
  15. data/lib/classifier-reborn/bayes.rb +141 -65
  16. data/lib/classifier-reborn/category_namer.rb +6 -4
  17. data/lib/classifier-reborn/extensions/hasher.rb +22 -39
  18. data/lib/classifier-reborn/extensions/token_filter/stemmer.rb +24 -0
  19. data/lib/classifier-reborn/extensions/token_filter/stopword.rb +48 -0
  20. data/lib/classifier-reborn/extensions/token_filter/symbol.rb +20 -0
  21. data/lib/classifier-reborn/extensions/tokenizer/token.rb +36 -0
  22. data/lib/classifier-reborn/extensions/tokenizer/whitespace.rb +28 -0
  23. data/lib/classifier-reborn/extensions/vector.rb +35 -28
  24. data/lib/classifier-reborn/extensions/vector_serialize.rb +10 -10
  25. data/lib/classifier-reborn/extensions/zero_vector.rb +7 -0
  26. data/lib/classifier-reborn/lsi/cached_content_node.rb +6 -5
  27. data/lib/classifier-reborn/lsi/content_node.rb +35 -25
  28. data/lib/classifier-reborn/lsi/summarizer.rb +7 -5
  29. data/lib/classifier-reborn/lsi/word_list.rb +5 -6
  30. data/lib/classifier-reborn/lsi.rb +166 -94
  31. data/lib/classifier-reborn/validators/classifier_validator.rb +170 -0
  32. data/lib/classifier-reborn/version.rb +3 -1
  33. data/lib/classifier-reborn.rb +12 -1
  34. metadata +98 -17
  35. data/bin/bayes.rb +0 -36
  36. data/bin/summarize.rb +0 -16
metadata CHANGED
@@ -1,16 +1,16 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: classifier-reborn
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.4
4
+ version: 2.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Lucas Carlson
8
8
  - Parker Moore
9
9
  - Chase Gilliam
10
- autorequire:
10
+ autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2015-10-31 00:00:00.000000000 Z
13
+ date: 2022-06-21 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: fast-stemmer
@@ -26,6 +26,62 @@ dependencies:
26
26
  - - "~>"
27
27
  - !ruby/object:Gem::Version
28
28
  version: '1.0'
29
+ - !ruby/object:Gem::Dependency
30
+ name: matrix
31
+ requirement: !ruby/object:Gem::Requirement
32
+ requirements:
33
+ - - "~>"
34
+ - !ruby/object:Gem::Version
35
+ version: '0.4'
36
+ type: :runtime
37
+ prerelease: false
38
+ version_requirements: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - "~>"
41
+ - !ruby/object:Gem::Version
42
+ version: '0.4'
43
+ - !ruby/object:Gem::Dependency
44
+ name: minitest
45
+ requirement: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: '0'
50
+ type: :development
51
+ prerelease: false
52
+ version_requirements: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ - !ruby/object:Gem::Dependency
58
+ name: minitest-reporters
59
+ requirement: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: '0'
64
+ type: :development
65
+ prerelease: false
66
+ version_requirements: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
71
+ - !ruby/object:Gem::Dependency
72
+ name: pry
73
+ requirement: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - ">="
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ type: :development
79
+ prerelease: false
80
+ version_requirements: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
29
85
  - !ruby/object:Gem::Dependency
30
86
  name: rake
31
87
  requirement: !ruby/object:Gem::Requirement
@@ -55,7 +111,21 @@ dependencies:
55
111
  - !ruby/object:Gem::Version
56
112
  version: '0'
57
113
  - !ruby/object:Gem::Dependency
58
- name: test-unit
114
+ name: redis
115
+ requirement: !ruby/object:Gem::Requirement
116
+ requirements:
117
+ - - ">="
118
+ - !ruby/object:Gem::Version
119
+ version: '0'
120
+ type: :development
121
+ prerelease: false
122
+ version_requirements: !ruby/object:Gem::Requirement
123
+ requirements:
124
+ - - ">="
125
+ - !ruby/object:Gem::Version
126
+ version: '0'
127
+ - !ruby/object:Gem::Dependency
128
+ name: rubocop
59
129
  requirement: !ruby/object:Gem::Requirement
60
130
  requirements:
61
131
  - - ">="
@@ -68,14 +138,12 @@ dependencies:
68
138
  - - ">="
69
139
  - !ruby/object:Gem::Version
70
140
  version: '0'
71
- description:
141
+ description:
72
142
  email:
73
143
  - lucas@rufy.com
74
144
  - parkrmoore@gmail.com
75
145
  - chase.gilliam@gmail.com
76
- executables:
77
- - bayes.rb
78
- - summarize.rb
146
+ executables: []
79
147
  extensions: []
80
148
  extra_rdoc_files:
81
149
  - README.markdown
@@ -83,8 +151,8 @@ extra_rdoc_files:
83
151
  files:
84
152
  - LICENSE
85
153
  - README.markdown
86
- - bin/bayes.rb
87
- - bin/summarize.rb
154
+ - data/stopwords/ar
155
+ - data/stopwords/bn
88
156
  - data/stopwords/ca
89
157
  - data/stopwords/cs
90
158
  - data/stopwords/da
@@ -93,31 +161,46 @@ files:
93
161
  - data/stopwords/es
94
162
  - data/stopwords/fi
95
163
  - data/stopwords/fr
164
+ - data/stopwords/hi
96
165
  - data/stopwords/hu
97
166
  - data/stopwords/it
167
+ - data/stopwords/ja
98
168
  - data/stopwords/nl
99
169
  - data/stopwords/no
100
170
  - data/stopwords/pl
101
171
  - data/stopwords/pt
172
+ - data/stopwords/ru
102
173
  - data/stopwords/se
103
174
  - data/stopwords/tr
175
+ - data/stopwords/vi
176
+ - data/stopwords/zh
104
177
  - lib/classifier-reborn.rb
178
+ - lib/classifier-reborn/backends/bayes_memory_backend.rb
179
+ - lib/classifier-reborn/backends/bayes_redis_backend.rb
180
+ - lib/classifier-reborn/backends/no_redis_error.rb
105
181
  - lib/classifier-reborn/bayes.rb
106
182
  - lib/classifier-reborn/category_namer.rb
107
183
  - lib/classifier-reborn/extensions/hasher.rb
184
+ - lib/classifier-reborn/extensions/token_filter/stemmer.rb
185
+ - lib/classifier-reborn/extensions/token_filter/stopword.rb
186
+ - lib/classifier-reborn/extensions/token_filter/symbol.rb
187
+ - lib/classifier-reborn/extensions/tokenizer/token.rb
188
+ - lib/classifier-reborn/extensions/tokenizer/whitespace.rb
108
189
  - lib/classifier-reborn/extensions/vector.rb
109
190
  - lib/classifier-reborn/extensions/vector_serialize.rb
191
+ - lib/classifier-reborn/extensions/zero_vector.rb
110
192
  - lib/classifier-reborn/lsi.rb
111
193
  - lib/classifier-reborn/lsi/cached_content_node.rb
112
194
  - lib/classifier-reborn/lsi/content_node.rb
113
195
  - lib/classifier-reborn/lsi/summarizer.rb
114
196
  - lib/classifier-reborn/lsi/word_list.rb
197
+ - lib/classifier-reborn/validators/classifier_validator.rb
115
198
  - lib/classifier-reborn/version.rb
116
- homepage: https://github.com/jekyll/classifier-reborn
199
+ homepage: https://jekyll.github.io/classifier-reborn/
117
200
  licenses:
118
201
  - LGPL
119
202
  metadata: {}
120
- post_install_message:
203
+ post_install_message:
121
204
  rdoc_options:
122
205
  - "--charset=UTF-8"
123
206
  require_paths:
@@ -126,17 +209,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
126
209
  requirements:
127
210
  - - ">="
128
211
  - !ruby/object:Gem::Version
129
- version: 1.9.3
212
+ version: 2.4.0
130
213
  required_rubygems_version: !ruby/object:Gem::Requirement
131
214
  requirements:
132
215
  - - ">="
133
216
  - !ruby/object:Gem::Version
134
217
  version: '0'
135
218
  requirements: []
136
- rubyforge_project:
137
- rubygems_version: 2.4.8
138
- signing_key:
219
+ rubygems_version: 3.3.7
220
+ signing_key:
139
221
  specification_version: 2
140
222
  summary: A general classifier module to allow Bayesian and other types of classifications.
141
223
  test_files: []
142
- has_rdoc: true
data/bin/bayes.rb DELETED
@@ -1,36 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- begin
4
- require 'rubygems'
5
- require 'classifier'
6
- rescue
7
- require 'classifier'
8
- end
9
-
10
- require 'madeleine'
11
-
12
- m = SnapshotMadeleine.new(File.expand_path("~/.bayes_data")) {
13
- ClassifierReborn::Bayes.new 'Interesting', 'Uninteresting'
14
- }
15
-
16
- case ARGV[0]
17
- when "add"
18
- case ARGV[1].downcase
19
- when "interesting"
20
- m.system.train_interesting File.open(ARGV[2]).read
21
- puts "#{ARGV[2]} has been classified as interesting"
22
- when "uninteresting"
23
- m.system.train_uninteresting File.open(ARGV[2]).read
24
- puts "#{ARGV[2]} has been classified as uninteresting"
25
- else
26
- puts "Invalid category: choose between interesting and uninteresting"
27
- exit(1)
28
- end
29
- when "classify"
30
- puts m.system.classify(File.open(ARGV[1]).read)
31
- else
32
- puts "Invalid option: choose add [category] [file] or clasify [file]"
33
- exit(-1)
34
- end
35
-
36
- m.take_snapshot
data/bin/summarize.rb DELETED
@@ -1,16 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- begin
4
- require 'rubygems'
5
- require 'classifier'
6
- rescue
7
- require 'classifier'
8
- end
9
-
10
- require 'open-uri'
11
-
12
- num = ARGV[1].to_i
13
- num = num < 1 ? 10 : num
14
-
15
- text = open(ARGV.first).read
16
- puts text.gsub(/<[^>]+>/,"").gsub(/[\s]+/," ").summary(num)