tagelizer 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
data/.rspec ADDED
@@ -0,0 +1 @@
1
+ --color
data/Gemfile ADDED
@@ -0,0 +1,19 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+ gem "raspell", "=1.2"
6
+ gem "ruby-stemmer", ">=0.9.1", :require => 'lingua/stemmer'
7
+
8
+ # Add dependencies to develop your gem here.
9
+ # Include everything needed to run rake, tests, features, etc.
10
+ group :development do
11
+ gem "rspec", "~> 2.3.0"
12
+ gem "bundler", "~> 1.0.0"
13
+ gem "jeweler", "~> 1.5.2"
14
+ gem "rcov", ">= 0"
15
+ gem "reek", "~> 1.2.8"
16
+ gem "rb-fsevent", ">=0.3.5"
17
+ gem "guard-rspec"
18
+ gem "growl"
19
+ end
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 Jonatan Reiners
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,19 @@
1
+ = tagelizer
2
+
3
+ Description goes here.
4
+
5
+ == Contributing to tagelizer
6
+
7
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet
8
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it
9
+ * Fork the project
10
+ * Start a feature/bugfix branch
11
+ * Commit and push until you are happy with your contribution
12
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
13
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
14
+
15
+ == Copyright
16
+
17
+ Copyright (c) 2011 Jonatan Reiners. See LICENSE.txt for
18
+ further details.
19
+
@@ -0,0 +1,58 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'rake'
11
+
12
+ require 'jeweler'
13
+ Jeweler::Tasks.new do |gem|
14
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
15
+ gem.name = "tagelizer"
16
+ gem.homepage = "http://github.com/enc/tagelizer"
17
+ gem.license = "MIT"
18
+ gem.summary = %Q{converts text to array of words for tags}
19
+ gem.description = %Q{converts text to an array of words. Uses stemming and spellchecker to produce better results. Build to convert descriptions to tags.}
20
+ gem.email = "git@encc.de"
21
+ gem.authors = ["Jonatan Reiners"]
22
+ # Include your dependencies below. Runtime dependencies are required when using your gem,
23
+ # and development dependencies are only needed for development (ie running rake tasks, tests, etc)
24
+ gem.add_runtime_dependency 'raspell', '>=1.2'
25
+ gem.add_runtime_dependency 'ruby', '>=0.9.1'
26
+ gem.add_development_dependency 'rspec', '>=2.3.0'
27
+ end
28
+ Jeweler::RubygemsDotOrgTasks.new
29
+
30
+ require 'rspec/core'
31
+ require 'rspec/core/rake_task'
32
+ RSpec::Core::RakeTask.new(:spec) do |spec|
33
+ spec.pattern = FileList['spec/**/*_spec.rb']
34
+ end
35
+
36
+ RSpec::Core::RakeTask.new(:rcov) do |spec|
37
+ spec.pattern = 'spec/**/*_spec.rb'
38
+ spec.rcov = true
39
+ end
40
+
41
+ require 'reek/rake/task'
42
+ Reek::Rake::Task.new do |t|
43
+ t.fail_on_error = true
44
+ t.verbose = false
45
+ t.source_files = 'lib/**/*.rb'
46
+ end
47
+
48
+ task :default => :spec
49
+
50
+ require 'rake/rdoctask'
51
+ Rake::RDocTask.new do |rdoc|
52
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
53
+
54
+ rdoc.rdoc_dir = 'rdoc'
55
+ rdoc.title = "tagelizer #{version}"
56
+ rdoc.rdoc_files.include('README*')
57
+ rdoc.rdoc_files.include('lib/**/*.rb')
58
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,66 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ Bundler.require(:default)
4
+
5
+ class Tagelizer
6
+
7
+ attr_reader :locale, :options
8
+ def initialize(locale = 'en')
9
+ @dictionary= (dictionaries.include?(locale) ? locale : "en")
10
+ @minwordsize = 2
11
+ @options = {'ignore-case' => true}
12
+ end
13
+
14
+ def parse( text )
15
+ text.split(" ").collect {|i| /(\w*)/.match(i)[1]}.select {|i| i.size > @minwordsize}.collect {|w| stemmer.stem corrected_word(w)}
16
+ end
17
+
18
+ def speller
19
+ @speller ||= build_speller
20
+ end
21
+
22
+ def build_speller
23
+ speller = Aspell.new(dictionary)
24
+ speller.suggestion_mode = actual_suggestion_mode
25
+ actual_options.each do |key, value|
26
+ speller.set_option key, value
27
+ end
28
+
29
+ speller
30
+ end
31
+
32
+ def stemmer
33
+ @stemmer ||= build_stemmer
34
+ end
35
+
36
+ def build_stemmer
37
+ Lingua::Stemmer.new(:language => dictionary)
38
+ end
39
+
40
+ attr_reader :dictionary
41
+ def dictionary=(dict)
42
+ unless dictionaries.include?(dict)
43
+ raise ArgumentError, 'unknown dictionary'
44
+ end
45
+
46
+ @dictionary = dict
47
+ end
48
+
49
+ def dictionaries
50
+ @dictionaries ||= Aspell.list_dicts.collect { |dict| dict.code }
51
+ end
52
+
53
+ def corrected_word(word)
54
+ speller.check(word) ? word : speller.suggest(word).first
55
+ end
56
+
57
+ def actual_options
58
+ options.keys.inject({}) do |hash, key|
59
+ hash[key] = options[key].to_s
60
+ hash
61
+ end
62
+ end
63
+
64
+
65
+
66
+ end
@@ -0,0 +1,12 @@
1
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
2
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
3
+ require 'rspec'
4
+ require 'tagelizer'
5
+
6
+ # Requires supporting files with custom matchers and macros, etc,
7
+ # in ./support/ and its subdirectories.
8
+ Dir["#{File.dirname(__FILE__)}/support/**/*.rb"].each {|f| require f}
9
+
10
+ RSpec.configure do |config|
11
+
12
+ end
@@ -0,0 +1,27 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "Tagelizer" do
4
+ it "has a locale set" do
5
+ tagi = Tagelizer.new('ru')
6
+ tagi.dictionary.should == 'ru'
7
+ end
8
+
9
+
10
+ it "splits strings" do
11
+ tagi = Tagelizer.new
12
+ text = "Hi my jealous 34 friend!"
13
+ tagi.parse(text).should == ["jealous", "friend"]
14
+ end
15
+
16
+ it "can fix mistakes" do
17
+ tagi = Tagelizer.new
18
+ text = "Hi my jealous 34 freind!"
19
+ tagi.parse(text).should == ["jealous", "Friend"]
20
+ end
21
+
22
+ it "can use basic form of words" do
23
+ tagi = Tagelizer.new
24
+ text = "He reads a book."
25
+ tagi.parse(text).should == ["read", "book"]
26
+ end
27
+ end
metadata ADDED
@@ -0,0 +1,277 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: tagelizer
3
+ version: !ruby/object:Gem::Version
4
+ hash: 27
5
+ prerelease:
6
+ segments:
7
+ - 0
8
+ - 1
9
+ - 0
10
+ version: 0.1.0
11
+ platform: ruby
12
+ authors:
13
+ - Jonatan Reiners
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2011-04-21 00:00:00 +02:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ type: :runtime
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
25
+ requirements:
26
+ - - "="
27
+ - !ruby/object:Gem::Version
28
+ hash: 11
29
+ segments:
30
+ - 1
31
+ - 2
32
+ version: "1.2"
33
+ name: raspell
34
+ version_requirements: *id001
35
+ prerelease: false
36
+ - !ruby/object:Gem::Dependency
37
+ type: :runtime
38
+ requirement: &id002 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ hash: 57
44
+ segments:
45
+ - 0
46
+ - 9
47
+ - 1
48
+ version: 0.9.1
49
+ name: ruby-stemmer
50
+ version_requirements: *id002
51
+ prerelease: false
52
+ - !ruby/object:Gem::Dependency
53
+ type: :development
54
+ requirement: &id003 !ruby/object:Gem::Requirement
55
+ none: false
56
+ requirements:
57
+ - - ~>
58
+ - !ruby/object:Gem::Version
59
+ hash: 3
60
+ segments:
61
+ - 2
62
+ - 3
63
+ - 0
64
+ version: 2.3.0
65
+ name: rspec
66
+ version_requirements: *id003
67
+ prerelease: false
68
+ - !ruby/object:Gem::Dependency
69
+ type: :development
70
+ requirement: &id004 !ruby/object:Gem::Requirement
71
+ none: false
72
+ requirements:
73
+ - - ~>
74
+ - !ruby/object:Gem::Version
75
+ hash: 23
76
+ segments:
77
+ - 1
78
+ - 0
79
+ - 0
80
+ version: 1.0.0
81
+ name: bundler
82
+ version_requirements: *id004
83
+ prerelease: false
84
+ - !ruby/object:Gem::Dependency
85
+ type: :development
86
+ requirement: &id005 !ruby/object:Gem::Requirement
87
+ none: false
88
+ requirements:
89
+ - - ~>
90
+ - !ruby/object:Gem::Version
91
+ hash: 7
92
+ segments:
93
+ - 1
94
+ - 5
95
+ - 2
96
+ version: 1.5.2
97
+ name: jeweler
98
+ version_requirements: *id005
99
+ prerelease: false
100
+ - !ruby/object:Gem::Dependency
101
+ type: :development
102
+ requirement: &id006 !ruby/object:Gem::Requirement
103
+ none: false
104
+ requirements:
105
+ - - ">="
106
+ - !ruby/object:Gem::Version
107
+ hash: 3
108
+ segments:
109
+ - 0
110
+ version: "0"
111
+ name: rcov
112
+ version_requirements: *id006
113
+ prerelease: false
114
+ - !ruby/object:Gem::Dependency
115
+ type: :development
116
+ requirement: &id007 !ruby/object:Gem::Requirement
117
+ none: false
118
+ requirements:
119
+ - - ~>
120
+ - !ruby/object:Gem::Version
121
+ hash: 15
122
+ segments:
123
+ - 1
124
+ - 2
125
+ - 8
126
+ version: 1.2.8
127
+ name: reek
128
+ version_requirements: *id007
129
+ prerelease: false
130
+ - !ruby/object:Gem::Dependency
131
+ type: :development
132
+ requirement: &id008 !ruby/object:Gem::Requirement
133
+ none: false
134
+ requirements:
135
+ - - ">="
136
+ - !ruby/object:Gem::Version
137
+ hash: 25
138
+ segments:
139
+ - 0
140
+ - 3
141
+ - 5
142
+ version: 0.3.5
143
+ name: rb-fsevent
144
+ version_requirements: *id008
145
+ prerelease: false
146
+ - !ruby/object:Gem::Dependency
147
+ type: :development
148
+ requirement: &id009 !ruby/object:Gem::Requirement
149
+ none: false
150
+ requirements:
151
+ - - ">="
152
+ - !ruby/object:Gem::Version
153
+ hash: 3
154
+ segments:
155
+ - 0
156
+ version: "0"
157
+ name: guard-rspec
158
+ version_requirements: *id009
159
+ prerelease: false
160
+ - !ruby/object:Gem::Dependency
161
+ type: :development
162
+ requirement: &id010 !ruby/object:Gem::Requirement
163
+ none: false
164
+ requirements:
165
+ - - ">="
166
+ - !ruby/object:Gem::Version
167
+ hash: 3
168
+ segments:
169
+ - 0
170
+ version: "0"
171
+ name: growl
172
+ version_requirements: *id010
173
+ prerelease: false
174
+ - !ruby/object:Gem::Dependency
175
+ type: :runtime
176
+ requirement: &id011 !ruby/object:Gem::Requirement
177
+ none: false
178
+ requirements:
179
+ - - ">="
180
+ - !ruby/object:Gem::Version
181
+ hash: 11
182
+ segments:
183
+ - 1
184
+ - 2
185
+ version: "1.2"
186
+ name: raspell
187
+ version_requirements: *id011
188
+ prerelease: false
189
+ - !ruby/object:Gem::Dependency
190
+ type: :runtime
191
+ requirement: &id012 !ruby/object:Gem::Requirement
192
+ none: false
193
+ requirements:
194
+ - - ">="
195
+ - !ruby/object:Gem::Version
196
+ hash: 57
197
+ segments:
198
+ - 0
199
+ - 9
200
+ - 1
201
+ version: 0.9.1
202
+ name: ruby
203
+ version_requirements: *id012
204
+ prerelease: false
205
+ - !ruby/object:Gem::Dependency
206
+ type: :development
207
+ requirement: &id013 !ruby/object:Gem::Requirement
208
+ none: false
209
+ requirements:
210
+ - - ">="
211
+ - !ruby/object:Gem::Version
212
+ hash: 3
213
+ segments:
214
+ - 2
215
+ - 3
216
+ - 0
217
+ version: 2.3.0
218
+ name: rspec
219
+ version_requirements: *id013
220
+ prerelease: false
221
+ description: converts text to an array of words. Uses stemming and spellchecker to produce better results. Build to convert descriptions to tags.
222
+ email: git@encc.de
223
+ executables: []
224
+
225
+ extensions: []
226
+
227
+ extra_rdoc_files:
228
+ - LICENSE.txt
229
+ - README.rdoc
230
+ files:
231
+ - .document
232
+ - .rspec
233
+ - Gemfile
234
+ - LICENSE.txt
235
+ - README.rdoc
236
+ - Rakefile
237
+ - VERSION
238
+ - lib/tagelizer.rb
239
+ - spec/spec_helper.rb
240
+ - spec/tagelizer_spec.rb
241
+ has_rdoc: true
242
+ homepage: http://github.com/enc/tagelizer
243
+ licenses:
244
+ - MIT
245
+ post_install_message:
246
+ rdoc_options: []
247
+
248
+ require_paths:
249
+ - lib
250
+ required_ruby_version: !ruby/object:Gem::Requirement
251
+ none: false
252
+ requirements:
253
+ - - ">="
254
+ - !ruby/object:Gem::Version
255
+ hash: 3
256
+ segments:
257
+ - 0
258
+ version: "0"
259
+ required_rubygems_version: !ruby/object:Gem::Requirement
260
+ none: false
261
+ requirements:
262
+ - - ">="
263
+ - !ruby/object:Gem::Version
264
+ hash: 3
265
+ segments:
266
+ - 0
267
+ version: "0"
268
+ requirements: []
269
+
270
+ rubyforge_project:
271
+ rubygems_version: 1.6.2
272
+ signing_key:
273
+ specification_version: 3
274
+ summary: converts text to array of words for tags
275
+ test_files:
276
+ - spec/spec_helper.rb
277
+ - spec/tagelizer_spec.rb