classifier-reborn 2.0.4 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (36) hide show
  1. checksums.yaml +5 -5
  2. data/LICENSE +74 -1
  3. data/README.markdown +57 -207
  4. data/data/stopwords/ar +104 -0
  5. data/data/stopwords/bn +362 -0
  6. data/data/stopwords/hi +97 -0
  7. data/data/stopwords/ja +43 -0
  8. data/data/stopwords/ru +420 -0
  9. data/data/stopwords/tr +199 -30
  10. data/data/stopwords/vi +647 -0
  11. data/data/stopwords/zh +125 -0
  12. data/lib/classifier-reborn/backends/bayes_memory_backend.rb +77 -0
  13. data/lib/classifier-reborn/backends/bayes_redis_backend.rb +109 -0
  14. data/lib/classifier-reborn/backends/no_redis_error.rb +14 -0
  15. data/lib/classifier-reborn/bayes.rb +141 -65
  16. data/lib/classifier-reborn/category_namer.rb +6 -4
  17. data/lib/classifier-reborn/extensions/hasher.rb +22 -39
  18. data/lib/classifier-reborn/extensions/token_filter/stemmer.rb +24 -0
  19. data/lib/classifier-reborn/extensions/token_filter/stopword.rb +48 -0
  20. data/lib/classifier-reborn/extensions/token_filter/symbol.rb +20 -0
  21. data/lib/classifier-reborn/extensions/tokenizer/token.rb +36 -0
  22. data/lib/classifier-reborn/extensions/tokenizer/whitespace.rb +28 -0
  23. data/lib/classifier-reborn/extensions/vector.rb +35 -28
  24. data/lib/classifier-reborn/extensions/vector_serialize.rb +10 -10
  25. data/lib/classifier-reborn/extensions/zero_vector.rb +7 -0
  26. data/lib/classifier-reborn/lsi/cached_content_node.rb +6 -5
  27. data/lib/classifier-reborn/lsi/content_node.rb +35 -25
  28. data/lib/classifier-reborn/lsi/summarizer.rb +7 -5
  29. data/lib/classifier-reborn/lsi/word_list.rb +5 -6
  30. data/lib/classifier-reborn/lsi.rb +166 -94
  31. data/lib/classifier-reborn/validators/classifier_validator.rb +170 -0
  32. data/lib/classifier-reborn/version.rb +3 -1
  33. data/lib/classifier-reborn.rb +12 -1
  34. metadata +98 -17
  35. data/bin/bayes.rb +0 -36
  36. data/bin/summarize.rb +0 -16
metadata CHANGED
@@ -1,16 +1,16 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: classifier-reborn
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.4
4
+ version: 2.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Lucas Carlson
8
8
  - Parker Moore
9
9
  - Chase Gilliam
10
- autorequire:
10
+ autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2015-10-31 00:00:00.000000000 Z
13
+ date: 2022-06-21 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: fast-stemmer
@@ -26,6 +26,62 @@ dependencies:
26
26
  - - "~>"
27
27
  - !ruby/object:Gem::Version
28
28
  version: '1.0'
29
+ - !ruby/object:Gem::Dependency
30
+ name: matrix
31
+ requirement: !ruby/object:Gem::Requirement
32
+ requirements:
33
+ - - "~>"
34
+ - !ruby/object:Gem::Version
35
+ version: '0.4'
36
+ type: :runtime
37
+ prerelease: false
38
+ version_requirements: !ruby/object:Gem::Requirement
39
+ requirements:
40
+ - - "~>"
41
+ - !ruby/object:Gem::Version
42
+ version: '0.4'
43
+ - !ruby/object:Gem::Dependency
44
+ name: minitest
45
+ requirement: !ruby/object:Gem::Requirement
46
+ requirements:
47
+ - - ">="
48
+ - !ruby/object:Gem::Version
49
+ version: '0'
50
+ type: :development
51
+ prerelease: false
52
+ version_requirements: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: '0'
57
+ - !ruby/object:Gem::Dependency
58
+ name: minitest-reporters
59
+ requirement: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - ">="
62
+ - !ruby/object:Gem::Version
63
+ version: '0'
64
+ type: :development
65
+ prerelease: false
66
+ version_requirements: !ruby/object:Gem::Requirement
67
+ requirements:
68
+ - - ">="
69
+ - !ruby/object:Gem::Version
70
+ version: '0'
71
+ - !ruby/object:Gem::Dependency
72
+ name: pry
73
+ requirement: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - ">="
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ type: :development
79
+ prerelease: false
80
+ version_requirements: !ruby/object:Gem::Requirement
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
84
+ version: '0'
29
85
  - !ruby/object:Gem::Dependency
30
86
  name: rake
31
87
  requirement: !ruby/object:Gem::Requirement
@@ -55,7 +111,21 @@ dependencies:
55
111
  - !ruby/object:Gem::Version
56
112
  version: '0'
57
113
  - !ruby/object:Gem::Dependency
58
- name: test-unit
114
+ name: redis
115
+ requirement: !ruby/object:Gem::Requirement
116
+ requirements:
117
+ - - ">="
118
+ - !ruby/object:Gem::Version
119
+ version: '0'
120
+ type: :development
121
+ prerelease: false
122
+ version_requirements: !ruby/object:Gem::Requirement
123
+ requirements:
124
+ - - ">="
125
+ - !ruby/object:Gem::Version
126
+ version: '0'
127
+ - !ruby/object:Gem::Dependency
128
+ name: rubocop
59
129
  requirement: !ruby/object:Gem::Requirement
60
130
  requirements:
61
131
  - - ">="
@@ -68,14 +138,12 @@ dependencies:
68
138
  - - ">="
69
139
  - !ruby/object:Gem::Version
70
140
  version: '0'
71
- description:
141
+ description:
72
142
  email:
73
143
  - lucas@rufy.com
74
144
  - parkrmoore@gmail.com
75
145
  - chase.gilliam@gmail.com
76
- executables:
77
- - bayes.rb
78
- - summarize.rb
146
+ executables: []
79
147
  extensions: []
80
148
  extra_rdoc_files:
81
149
  - README.markdown
@@ -83,8 +151,8 @@ extra_rdoc_files:
83
151
  files:
84
152
  - LICENSE
85
153
  - README.markdown
86
- - bin/bayes.rb
87
- - bin/summarize.rb
154
+ - data/stopwords/ar
155
+ - data/stopwords/bn
88
156
  - data/stopwords/ca
89
157
  - data/stopwords/cs
90
158
  - data/stopwords/da
@@ -93,31 +161,46 @@ files:
93
161
  - data/stopwords/es
94
162
  - data/stopwords/fi
95
163
  - data/stopwords/fr
164
+ - data/stopwords/hi
96
165
  - data/stopwords/hu
97
166
  - data/stopwords/it
167
+ - data/stopwords/ja
98
168
  - data/stopwords/nl
99
169
  - data/stopwords/no
100
170
  - data/stopwords/pl
101
171
  - data/stopwords/pt
172
+ - data/stopwords/ru
102
173
  - data/stopwords/se
103
174
  - data/stopwords/tr
175
+ - data/stopwords/vi
176
+ - data/stopwords/zh
104
177
  - lib/classifier-reborn.rb
178
+ - lib/classifier-reborn/backends/bayes_memory_backend.rb
179
+ - lib/classifier-reborn/backends/bayes_redis_backend.rb
180
+ - lib/classifier-reborn/backends/no_redis_error.rb
105
181
  - lib/classifier-reborn/bayes.rb
106
182
  - lib/classifier-reborn/category_namer.rb
107
183
  - lib/classifier-reborn/extensions/hasher.rb
184
+ - lib/classifier-reborn/extensions/token_filter/stemmer.rb
185
+ - lib/classifier-reborn/extensions/token_filter/stopword.rb
186
+ - lib/classifier-reborn/extensions/token_filter/symbol.rb
187
+ - lib/classifier-reborn/extensions/tokenizer/token.rb
188
+ - lib/classifier-reborn/extensions/tokenizer/whitespace.rb
108
189
  - lib/classifier-reborn/extensions/vector.rb
109
190
  - lib/classifier-reborn/extensions/vector_serialize.rb
191
+ - lib/classifier-reborn/extensions/zero_vector.rb
110
192
  - lib/classifier-reborn/lsi.rb
111
193
  - lib/classifier-reborn/lsi/cached_content_node.rb
112
194
  - lib/classifier-reborn/lsi/content_node.rb
113
195
  - lib/classifier-reborn/lsi/summarizer.rb
114
196
  - lib/classifier-reborn/lsi/word_list.rb
197
+ - lib/classifier-reborn/validators/classifier_validator.rb
115
198
  - lib/classifier-reborn/version.rb
116
- homepage: https://github.com/jekyll/classifier-reborn
199
+ homepage: https://jekyll.github.io/classifier-reborn/
117
200
  licenses:
118
201
  - LGPL
119
202
  metadata: {}
120
- post_install_message:
203
+ post_install_message:
121
204
  rdoc_options:
122
205
  - "--charset=UTF-8"
123
206
  require_paths:
@@ -126,17 +209,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
126
209
  requirements:
127
210
  - - ">="
128
211
  - !ruby/object:Gem::Version
129
- version: 1.9.3
212
+ version: 2.4.0
130
213
  required_rubygems_version: !ruby/object:Gem::Requirement
131
214
  requirements:
132
215
  - - ">="
133
216
  - !ruby/object:Gem::Version
134
217
  version: '0'
135
218
  requirements: []
136
- rubyforge_project:
137
- rubygems_version: 2.4.8
138
- signing_key:
219
+ rubygems_version: 3.3.7
220
+ signing_key:
139
221
  specification_version: 2
140
222
  summary: A general classifier module to allow Bayesian and other types of classifications.
141
223
  test_files: []
142
- has_rdoc: true
data/bin/bayes.rb DELETED
@@ -1,36 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- begin
4
- require 'rubygems'
5
- require 'classifier'
6
- rescue
7
- require 'classifier'
8
- end
9
-
10
- require 'madeleine'
11
-
12
- m = SnapshotMadeleine.new(File.expand_path("~/.bayes_data")) {
13
- ClassifierReborn::Bayes.new 'Interesting', 'Uninteresting'
14
- }
15
-
16
- case ARGV[0]
17
- when "add"
18
- case ARGV[1].downcase
19
- when "interesting"
20
- m.system.train_interesting File.open(ARGV[2]).read
21
- puts "#{ARGV[2]} has been classified as interesting"
22
- when "uninteresting"
23
- m.system.train_uninteresting File.open(ARGV[2]).read
24
- puts "#{ARGV[2]} has been classified as uninteresting"
25
- else
26
- puts "Invalid category: choose between interesting and uninteresting"
27
- exit(1)
28
- end
29
- when "classify"
30
- puts m.system.classify(File.open(ARGV[1]).read)
31
- else
32
- puts "Invalid option: choose add [category] [file] or clasify [file]"
33
- exit(-1)
34
- end
35
-
36
- m.take_snapshot
data/bin/summarize.rb DELETED
@@ -1,16 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- begin
4
- require 'rubygems'
5
- require 'classifier'
6
- rescue
7
- require 'classifier'
8
- end
9
-
10
- require 'open-uri'
11
-
12
- num = ARGV[1].to_i
13
- num = num < 1 ? 10 : num
14
-
15
- text = open(ARGV.first).read
16
- puts text.gsub(/<[^>]+>/,"").gsub(/[\s]+/," ").summary(num)