vidibus-words 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +46 -0
- data/Rakefile +12 -32
- data/lib/vidibus/words.rb +13 -12
- data/lib/vidibus-words.rb +1 -5
- metadata +98 -38
- data/.bundle/config +0 -2
- data/.document +0 -5
- data/.gitignore +0 -21
- data/.rspec +0 -2
- data/Gemfile +0 -11
- data/Gemfile.lock +0 -101
- data/README.rdoc +0 -24
- data/VERSION +0 -1
- data/spec/spec_helper.rb +0 -15
- data/spec/vidibus/words_spec.rb +0 -170
- data/vidibus-words.gemspec +0 -65
data/README.md
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
# Vidibus::Words [![](http://travis-ci.org/vidibus/vidibus-words.png)](http://travis-ci.org/vidibus/vidibus-words)
|
2
|
+
|
3
|
+
This gem provides handling of words. It ships with a list of stop words in English, German, and Spanish and allows extraction of keywords from a string.
|
4
|
+
|
5
|
+
This gem is part of [Vidibus](http://vidibus.org), an open source toolset for building distributed (video) applications.
|
6
|
+
|
7
|
+
|
8
|
+
## Installation
|
9
|
+
|
10
|
+
Add `gem 'vidibus-words'` to your `Gemfile`. Then call `bundle install` on your console.
|
11
|
+
|
12
|
+
|
13
|
+
## Usage
|
14
|
+
|
15
|
+
### Extracting keywords
|
16
|
+
|
17
|
+
To return a list of keywords from a given text, ordered by occurrence, enter:
|
18
|
+
|
19
|
+
```ruby
|
20
|
+
input = Vidibus::Words.new('To tell a long story short, it\'s necessary to tell it briefly without fluff!')
|
21
|
+
input.keywords
|
22
|
+
=> ["tell", "long", "story", "short", "necessary", "briefly", "fluff"]
|
23
|
+
```
|
24
|
+
|
25
|
+
To return keywords of a certain locale only, you may set it as filter:
|
26
|
+
|
27
|
+
```ruby
|
28
|
+
input = Vidibus::Words.new('To tell a long story short, it\'s necessary to tell it briefly without fluff!')
|
29
|
+
input.locale = :de
|
30
|
+
input.keywords
|
31
|
+
=> ["to", "tell", "a", "long", "story", "short", "it's", "necessary", "it", "briefly", "without", "fluff"]
|
32
|
+
```
|
33
|
+
|
34
|
+
### Stopwords lists
|
35
|
+
|
36
|
+
You may obtain stopwords easily:
|
37
|
+
|
38
|
+
```ruby
|
39
|
+
Vidibus::Words.stopwords # => Stopwords for all available locales
|
40
|
+
Vidibus::Words.stopwords(:en) # => English stopwords only
|
41
|
+
```
|
42
|
+
|
43
|
+
|
44
|
+
## Copyright
|
45
|
+
|
46
|
+
© 2010-2012 Andre Pankratz. See LICENSE for details.
|
data/Rakefile
CHANGED
@@ -1,37 +1,17 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require "rspec/core/rake_task"
|
1
|
+
require 'bundler'
|
2
|
+
require 'rdoc/task'
|
3
|
+
require 'rspec'
|
4
|
+
require 'rspec/core/rake_task'
|
6
5
|
|
7
|
-
|
8
|
-
require "jeweler"
|
9
|
-
Jeweler::Tasks.new do |gem|
|
10
|
-
gem.name = "vidibus-words"
|
11
|
-
gem.summary = %Q{Tools for handling of words.}
|
12
|
-
gem.description = %Q{Contains stop words lists and methods to extract keywords from strings.}
|
13
|
-
gem.email = "andre@vidibus.com"
|
14
|
-
gem.homepage = "http://github.com/vidibus/vidibus-words"
|
15
|
-
gem.authors = ["Andre Pankratz"]
|
16
|
-
gem.add_dependency "rails", "~> 3.0.0"
|
17
|
-
gem.add_dependency "vidibus-core_extensions"
|
18
|
-
end
|
19
|
-
Jeweler::GemcutterTasks.new
|
20
|
-
rescue LoadError
|
21
|
-
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
22
|
-
end
|
6
|
+
Bundler::GemHelper.install_tasks
|
23
7
|
|
24
|
-
|
25
|
-
|
26
|
-
t.rcov = true
|
27
|
-
t.rcov_opts = ["--exclude", "^spec,/gems/"]
|
28
|
-
end
|
8
|
+
$LOAD_PATH.unshift File.expand_path('../lib', __FILE__)
|
9
|
+
require 'vidibus/words'
|
29
10
|
|
30
11
|
Rake::RDocTask.new do |rdoc|
|
31
|
-
|
32
|
-
rdoc.
|
33
|
-
rdoc.
|
34
|
-
rdoc.rdoc_files.include(
|
35
|
-
rdoc.
|
36
|
-
rdoc.options << "--charset=utf-8"
|
12
|
+
rdoc.rdoc_dir = 'rdoc'
|
13
|
+
rdoc.title = "vidibus-words #{Vidibus::Words::VERSION}"
|
14
|
+
rdoc.rdoc_files.include('README*')
|
15
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
16
|
+
rdoc.options << '--charset=utf-8'
|
37
17
|
end
|
data/lib/vidibus/words.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
module Vidibus
|
3
3
|
class Words
|
4
|
+
VERSION = '0.0.2'
|
4
5
|
|
5
6
|
class MissingLocaleError < StandardError; end
|
6
7
|
|
@@ -42,7 +43,7 @@ module Vidibus
|
|
42
43
|
count = 0
|
43
44
|
_stopwords = Vidibus::Words.stopwords(*locales)
|
44
45
|
for word in sort
|
45
|
-
clean = word.permalink.gsub(
|
46
|
+
clean = word.permalink.gsub('-','')
|
46
47
|
unless _stopwords.include?(clean)
|
47
48
|
list << word
|
48
49
|
count += 1
|
@@ -61,7 +62,7 @@ module Vidibus
|
|
61
62
|
locales = I18n.available_locales if locales.empty?
|
62
63
|
stopwords = []
|
63
64
|
for locale in locales
|
64
|
-
translation = I18n.t(
|
65
|
+
translation = I18n.t('vidibus.stopwords', :locale => locale)
|
65
66
|
next if translation.is_a?(String)
|
66
67
|
stopwords << translation
|
67
68
|
end
|
@@ -70,20 +71,20 @@ module Vidibus
|
|
70
71
|
|
71
72
|
# Returns a list of words from given string.
|
72
73
|
def words(string)
|
73
|
-
allowed = [
|
74
|
-
disallowed = [
|
75
|
-
match = /[^#{allowed.join(
|
74
|
+
allowed = [' ', 'a-z', 'A-Z', '0-9'] + String::LATIN_MAP.values
|
75
|
+
disallowed = ['¿', '¡'] # Add some disallowed chars that cannot be catched. TODO: Improve!
|
76
|
+
match = /[^#{allowed.join('')}]/
|
76
77
|
string.
|
77
|
-
gsub(/\s+/mu,
|
78
|
-
gsub(/[#{disallowed.join}]/u,
|
79
|
-
gsub(/#{match}+ /u,
|
80
|
-
gsub(/ #{match}+/u,
|
81
|
-
gsub(/#{match}+$/u,
|
82
|
-
gsub(/^#{match}+/u,
|
78
|
+
gsub(/\s+/mu, ' ').
|
79
|
+
gsub(/[#{disallowed.join}]/u, '').
|
80
|
+
gsub(/#{match}+ /u, ' ').
|
81
|
+
gsub(/ #{match}+/u, ' ').
|
82
|
+
gsub(/#{match}+$/u, '').
|
83
|
+
gsub(/^#{match}+/u, '').
|
83
84
|
split(/ /)
|
84
85
|
end
|
85
86
|
|
86
|
-
# Returns a list of words ordered by
|
87
|
+
# Returns a list of words ordered by occurrence.
|
87
88
|
# All words will be converted to downcase.
|
88
89
|
def sort_by_occurrence(list)
|
89
90
|
map = {}
|
data/lib/vidibus-words.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: vidibus-words
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 27
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 2
|
10
|
+
version: 0.0.2
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Andre Pankratz
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date:
|
18
|
+
date: 2012-02-11 00:00:00 +01:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -24,14 +24,12 @@ dependencies:
|
|
24
24
|
requirement: &id001 !ruby/object:Gem::Requirement
|
25
25
|
none: false
|
26
26
|
requirements:
|
27
|
-
- -
|
27
|
+
- - ">="
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
hash:
|
29
|
+
hash: 5
|
30
30
|
segments:
|
31
31
|
- 3
|
32
|
-
|
33
|
-
- 0
|
34
|
-
version: 3.0.0
|
32
|
+
version: "3"
|
35
33
|
type: :runtime
|
36
34
|
version_requirements: *id001
|
37
35
|
- !ruby/object:Gem::Dependency
|
@@ -48,41 +46,102 @@ dependencies:
|
|
48
46
|
version: "0"
|
49
47
|
type: :runtime
|
50
48
|
version_requirements: *id002
|
49
|
+
- !ruby/object:Gem::Dependency
|
50
|
+
name: bundler
|
51
|
+
prerelease: false
|
52
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
53
|
+
none: false
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
hash: 23
|
58
|
+
segments:
|
59
|
+
- 1
|
60
|
+
- 0
|
61
|
+
- 0
|
62
|
+
version: 1.0.0
|
63
|
+
type: :development
|
64
|
+
version_requirements: *id003
|
65
|
+
- !ruby/object:Gem::Dependency
|
66
|
+
name: rake
|
67
|
+
prerelease: false
|
68
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
69
|
+
none: false
|
70
|
+
requirements:
|
71
|
+
- - ">="
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
hash: 3
|
74
|
+
segments:
|
75
|
+
- 0
|
76
|
+
version: "0"
|
77
|
+
type: :development
|
78
|
+
version_requirements: *id004
|
79
|
+
- !ruby/object:Gem::Dependency
|
80
|
+
name: rdoc
|
81
|
+
prerelease: false
|
82
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
83
|
+
none: false
|
84
|
+
requirements:
|
85
|
+
- - ">="
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
hash: 3
|
88
|
+
segments:
|
89
|
+
- 0
|
90
|
+
version: "0"
|
91
|
+
type: :development
|
92
|
+
version_requirements: *id005
|
93
|
+
- !ruby/object:Gem::Dependency
|
94
|
+
name: rspec
|
95
|
+
prerelease: false
|
96
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ">="
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
hash: 3
|
102
|
+
segments:
|
103
|
+
- 0
|
104
|
+
version: "0"
|
105
|
+
type: :development
|
106
|
+
version_requirements: *id006
|
107
|
+
- !ruby/object:Gem::Dependency
|
108
|
+
name: rr
|
109
|
+
prerelease: false
|
110
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
111
|
+
none: false
|
112
|
+
requirements:
|
113
|
+
- - ">="
|
114
|
+
- !ruby/object:Gem::Version
|
115
|
+
hash: 3
|
116
|
+
segments:
|
117
|
+
- 0
|
118
|
+
version: "0"
|
119
|
+
type: :development
|
120
|
+
version_requirements: *id007
|
51
121
|
description: Contains stop words lists and methods to extract keywords from strings.
|
52
122
|
email: andre@vidibus.com
|
53
123
|
executables: []
|
54
124
|
|
55
125
|
extensions: []
|
56
126
|
|
57
|
-
extra_rdoc_files:
|
58
|
-
|
59
|
-
- README.rdoc
|
127
|
+
extra_rdoc_files: []
|
128
|
+
|
60
129
|
files:
|
61
|
-
- .
|
62
|
-
- .
|
63
|
-
- .gitignore
|
64
|
-
- .rspec
|
65
|
-
- Gemfile
|
66
|
-
- Gemfile.lock
|
67
|
-
- LICENSE
|
68
|
-
- README.rdoc
|
69
|
-
- Rakefile
|
70
|
-
- VERSION
|
130
|
+
- lib/vidibus/words.rb
|
131
|
+
- lib/vidibus-words.rb
|
71
132
|
- config/locales/de.yml
|
72
133
|
- config/locales/en.yml
|
73
134
|
- config/locales/es.yml
|
74
|
-
-
|
75
|
-
-
|
76
|
-
-
|
77
|
-
- spec/vidibus/words_spec.rb
|
78
|
-
- vidibus-words.gemspec
|
135
|
+
- LICENSE
|
136
|
+
- README.md
|
137
|
+
- Rakefile
|
79
138
|
has_rdoc: true
|
80
|
-
homepage:
|
139
|
+
homepage: https://github.com/vidibus/vidibus-words
|
81
140
|
licenses: []
|
82
141
|
|
83
142
|
post_install_message:
|
84
|
-
rdoc_options:
|
85
|
-
|
143
|
+
rdoc_options: []
|
144
|
+
|
86
145
|
require_paths:
|
87
146
|
- lib
|
88
147
|
required_ruby_version: !ruby/object:Gem::Requirement
|
@@ -99,17 +158,18 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
99
158
|
requirements:
|
100
159
|
- - ">="
|
101
160
|
- !ruby/object:Gem::Version
|
102
|
-
hash:
|
161
|
+
hash: 23
|
103
162
|
segments:
|
104
|
-
-
|
105
|
-
|
163
|
+
- 1
|
164
|
+
- 3
|
165
|
+
- 6
|
166
|
+
version: 1.3.6
|
106
167
|
requirements: []
|
107
168
|
|
108
|
-
rubyforge_project:
|
169
|
+
rubyforge_project: vidibus-words
|
109
170
|
rubygems_version: 1.3.7
|
110
171
|
signing_key:
|
111
172
|
specification_version: 3
|
112
|
-
summary: Tools for handling
|
113
|
-
test_files:
|
114
|
-
|
115
|
-
- spec/vidibus/words_spec.rb
|
173
|
+
summary: Tools for handling (stop-) words
|
174
|
+
test_files: []
|
175
|
+
|
data/.bundle/config
DELETED
data/.document
DELETED
data/.gitignore
DELETED
data/.rspec
DELETED
data/Gemfile
DELETED
data/Gemfile.lock
DELETED
@@ -1,101 +0,0 @@
|
|
1
|
-
GEM
|
2
|
-
remote: http://rubygems.org/
|
3
|
-
specs:
|
4
|
-
abstract (1.0.0)
|
5
|
-
actionmailer (3.0.1)
|
6
|
-
actionpack (= 3.0.1)
|
7
|
-
mail (~> 2.2.5)
|
8
|
-
actionpack (3.0.1)
|
9
|
-
activemodel (= 3.0.1)
|
10
|
-
activesupport (= 3.0.1)
|
11
|
-
builder (~> 2.1.2)
|
12
|
-
erubis (~> 2.6.6)
|
13
|
-
i18n (~> 0.4.1)
|
14
|
-
rack (~> 1.2.1)
|
15
|
-
rack-mount (~> 0.6.12)
|
16
|
-
rack-test (~> 0.5.4)
|
17
|
-
tzinfo (~> 0.3.23)
|
18
|
-
activemodel (3.0.1)
|
19
|
-
activesupport (= 3.0.1)
|
20
|
-
builder (~> 2.1.2)
|
21
|
-
i18n (~> 0.4.1)
|
22
|
-
activerecord (3.0.1)
|
23
|
-
activemodel (= 3.0.1)
|
24
|
-
activesupport (= 3.0.1)
|
25
|
-
arel (~> 1.0.0)
|
26
|
-
tzinfo (~> 0.3.23)
|
27
|
-
activeresource (3.0.1)
|
28
|
-
activemodel (= 3.0.1)
|
29
|
-
activesupport (= 3.0.1)
|
30
|
-
activesupport (3.0.1)
|
31
|
-
arel (1.0.1)
|
32
|
-
activesupport (~> 3.0.0)
|
33
|
-
builder (2.1.2)
|
34
|
-
diff-lcs (1.1.2)
|
35
|
-
erubis (2.6.6)
|
36
|
-
abstract (>= 1.0.0)
|
37
|
-
gemcutter (0.6.1)
|
38
|
-
git (1.2.5)
|
39
|
-
i18n (0.4.2)
|
40
|
-
jeweler (1.4.0)
|
41
|
-
gemcutter (>= 0.1.0)
|
42
|
-
git (>= 1.2.5)
|
43
|
-
rubyforge (>= 2.0.0)
|
44
|
-
json_pure (1.4.6)
|
45
|
-
mail (2.2.9)
|
46
|
-
activesupport (>= 2.3.6)
|
47
|
-
i18n (~> 0.4.1)
|
48
|
-
mime-types (~> 1.16)
|
49
|
-
treetop (~> 1.4.8)
|
50
|
-
mime-types (1.16)
|
51
|
-
polyglot (0.3.1)
|
52
|
-
rack (1.2.1)
|
53
|
-
rack-mount (0.6.13)
|
54
|
-
rack (>= 1.0.0)
|
55
|
-
rack-test (0.5.6)
|
56
|
-
rack (>= 1.0)
|
57
|
-
rails (3.0.1)
|
58
|
-
actionmailer (= 3.0.1)
|
59
|
-
actionpack (= 3.0.1)
|
60
|
-
activerecord (= 3.0.1)
|
61
|
-
activeresource (= 3.0.1)
|
62
|
-
activesupport (= 3.0.1)
|
63
|
-
bundler (~> 1.0.0)
|
64
|
-
railties (= 3.0.1)
|
65
|
-
railties (3.0.1)
|
66
|
-
actionpack (= 3.0.1)
|
67
|
-
activesupport (= 3.0.1)
|
68
|
-
rake (>= 0.8.4)
|
69
|
-
thor (~> 0.14.0)
|
70
|
-
rake (0.8.7)
|
71
|
-
relevance-rcov (0.9.2.1)
|
72
|
-
rr (1.0.2)
|
73
|
-
rspec (2.0.1)
|
74
|
-
rspec-core (~> 2.0.1)
|
75
|
-
rspec-expectations (~> 2.0.1)
|
76
|
-
rspec-mocks (~> 2.0.1)
|
77
|
-
rspec-core (2.0.1)
|
78
|
-
rspec-expectations (2.0.1)
|
79
|
-
diff-lcs (>= 1.1.2)
|
80
|
-
rspec-mocks (2.0.1)
|
81
|
-
rspec-core (~> 2.0.1)
|
82
|
-
rspec-expectations (~> 2.0.1)
|
83
|
-
rubyforge (2.0.4)
|
84
|
-
json_pure (>= 1.1.7)
|
85
|
-
thor (0.14.4)
|
86
|
-
treetop (1.4.8)
|
87
|
-
polyglot (>= 0.3.1)
|
88
|
-
tzinfo (0.3.23)
|
89
|
-
vidibus-core_extensions (0.3.11)
|
90
|
-
|
91
|
-
PLATFORMS
|
92
|
-
ruby
|
93
|
-
|
94
|
-
DEPENDENCIES
|
95
|
-
jeweler
|
96
|
-
rails (~> 3.0.0)
|
97
|
-
rake
|
98
|
-
relevance-rcov
|
99
|
-
rr
|
100
|
-
rspec (~> 2.0.0.beta.20)
|
101
|
-
vidibus-core_extensions
|
data/README.rdoc
DELETED
@@ -1,24 +0,0 @@
|
|
1
|
-
= vidibus-stopwords
|
2
|
-
|
3
|
-
This gem provides handling of words. It ships with a list of stop words and allows extraction of keywords from a string.
|
4
|
-
|
5
|
-
This gem is part of the open source SOA framework Vidibus: http://vidibus.org
|
6
|
-
|
7
|
-
|
8
|
-
== Installation
|
9
|
-
|
10
|
-
Add the dependency to the Gemfile of your application:
|
11
|
-
|
12
|
-
gem "vidibus-stopwords"
|
13
|
-
|
14
|
-
Then call bundle install on your console.
|
15
|
-
|
16
|
-
|
17
|
-
= Usage
|
18
|
-
|
19
|
-
TODO: describe
|
20
|
-
|
21
|
-
|
22
|
-
== Copyright
|
23
|
-
|
24
|
-
Copyright (c) 2010 Andre Pankratz. See LICENSE for details.
|
data/VERSION
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
0.0.1
|
data/spec/spec_helper.rb
DELETED
@@ -1,15 +0,0 @@
|
|
1
|
-
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
2
|
-
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), "..", "lib"))
|
3
|
-
|
4
|
-
require "rubygems"
|
5
|
-
require "rspec"
|
6
|
-
require "rr"
|
7
|
-
require "active_support/core_ext"
|
8
|
-
require "vidibus-core_extensions"
|
9
|
-
require "vidibus-words"
|
10
|
-
|
11
|
-
RSpec.configure do |config|
|
12
|
-
config.mock_with :rr
|
13
|
-
end
|
14
|
-
|
15
|
-
I18n.load_path += Dir[File.join('config', 'locales', '**', '*.{rb,yml}')]
|
data/spec/vidibus/words_spec.rb
DELETED
@@ -1,170 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
require "spec_helper"
|
3
|
-
|
4
|
-
describe "Vidibus::Words" do
|
5
|
-
describe "initialization" do
|
6
|
-
it "should require an input string" do
|
7
|
-
expect {Vidibus::Words.new}.to raise_error(ArgumentError)
|
8
|
-
end
|
9
|
-
|
10
|
-
it "should accept an additional argument to set locales" do
|
11
|
-
words = Vidibus::Words.new("hello", :en)
|
12
|
-
words.locales.should eql([:en])
|
13
|
-
end
|
14
|
-
|
15
|
-
it "should accept an additional list of locales" do
|
16
|
-
words = Vidibus::Words.new("hello", [:en, :de])
|
17
|
-
words.locales.should eql([:en, :de])
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
describe "to_a" do
|
22
|
-
it "should call Vidibus::Words.words with input string" do
|
23
|
-
stub(Vidibus::Words.words("Whazzup?"))
|
24
|
-
Vidibus::Words.new("Whazzup?").to_a
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
describe "sort" do
|
29
|
-
it "should call Vidibus::Words.sort_by_occurrence with list" do
|
30
|
-
words = Vidibus::Words.new("Whazzup?")
|
31
|
-
stub(Vidibus::Words.sort_by_occurrence(words.list))
|
32
|
-
words.sort
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
describe "keywords" do
|
37
|
-
let(:input) do
|
38
|
-
"El profesor de ajedrez puso fin a la discusión de sus alumnos:
|
39
|
-
-Hoy, lo más importante, es la concentración. Deberán abstraerse del entorno y sólo prestar atención al juego. ¡No se olviden que la semana que viene es la maratón de ajedrez en Buenos Aires y ustedes representarán al club!
|
40
|
-
-¡Pero, maestro…! No se puede jugar en el bar que está lleno de gente y menos con el loquito de Aníbal al lado.
|
41
|
-
-¡Más respeto, jovencito! Aníbal es el hijo del presidente y fanático del ajedrez. Además, hace tiempo que quiere presenciar una partida, de modo que le prometí a su padre que estaría presente en el entrenamiento. ¡Y a ustedes les vendrá muy bien para ensayar la concentración! Los espero esta tarde a las cinco –dijo dando media vuelta y por finalizados los cuestionamientos.
|
42
|
-
El bar del club El Alfil, a las cinco de la tarde, estaba muy concurrido por los socios que se reponían de las distintas actividades del día. La mesa dispuesta para el partido estaba instalada en una esquina del salón, un poco aislada de las otras, ocupadas por parroquianos que poca atención le prestaban a los novatos ajedrecistas. En una silla contigua, se sentaba un joven de expresión entusiasta que contrastaba con el semblante adusto de los jugadores. El profesor colocó el reloj sobre la mesa y dio por iniciada la partida. Diego, que jugaba con las blancas, abrió con la apertura Ruy López y detuvo su cronómetro. Enseguida se escuchó el grito de Aníbal:
|
43
|
-
-¡Gambito de dama! ¡Gambito de dama!
|
44
|
-
Marcelo le echó una mirada de reojo y respondió con la defensa berlinesa para las negras.
|
45
|
-
-¡Defensa siciliana! ¡Defensa Siciliana! –chilló Aníbal en el colmo de su exaltación.
|
46
|
-
Acodado en la barra, don Antonio observaba la escena y le traducía al Sordo su interpretación de los hechos:
|
47
|
-
-Parece que los pibes están jugando a las damas y el loquito de Aníbal les da instrucciones. Por la cara que tienen no les gustan mucho los consejos, pero siendo un juego de damas parece acertado eso de las gambas. Y las sicilianas… ¡Se las traen!
|
48
|
-
El Sordo, que además de escuchar poco veía menos, asintió con un movimiento de cabeza.
|
49
|
-
En la mesa, los ajedrecistas se esforzaban por no perderse entre los desvaríos de Aníbal y movían sus piezas y detenían sus cronómetros y los volvían a poner en marcha.
|
50
|
-
El profesor afirmaba con la testa convencido de que la ordalía les aseguraría el primer puesto en el torneo.
|
51
|
-
-¡Defensa india de dama! ¡Defensa india de dama! –vociferó el hijo del presidente desde la silla, obedeciendo la orden de su papá de no moverse.
|
52
|
-
-¡Ahora pide que venga una india a defender a la dama! Al fin de cuentas parece que no está tan loquito este Aníbal –le dijo don Antonio en la oreja al Sordo y después se empinó un trago de grapa.
|
53
|
-
Los jugadores, estimulados por el acoso del loco, no tardaban más de quince minutos en mover sus piezas. De un solo movimiento, Diego le comió dos peones a Marcelo.
|
54
|
-
-¡Peones muertos! ¡Peones muertos! –sollozó Aníbal que era muy sensible.
|
55
|
-
-¡Las mujeres son vengativas, Sordo! ¿Qué necesidad de matar a esos pobres laburantes si con despedirlos hubiera protegido a la dama? –Le tironea de la manga de la camisa mientras le dice en voz alta:- ¡Mirá, mirá! ¿No te dije? ¡Es un juego violento!
|
56
|
-
En el salón, los aspirantes a campeones perseguían al loquito entre las mesas bombardeándolo con peones, alfiles, caballos, torres, reyes y reinas. El profesor los seguía levantando las piezas en el camino, mientras Aníbal desgranaba a la carrera sus nociones de ajedrez:
|
57
|
-
-¡Apertura, medio juego, final! ¡Final, Finaaaal!
|
58
|
-
-¿Querés que te diga? Me quedo con el truco, que no mata a nadie –concluyó don Antonio."
|
59
|
-
end
|
60
|
-
|
61
|
-
let(:words) {Vidibus::Words.new(input)}
|
62
|
-
|
63
|
-
it "should return a list of words without stopwords, ordered by occurrence" do
|
64
|
-
words = Vidibus::Words.new("To tell a long story short, it's necessary to tell it briefly without fluff!")
|
65
|
-
words.keywords.should eql(%w[tell long story short necessary briefly fluff])
|
66
|
-
end
|
67
|
-
|
68
|
-
it "should only remove stopwords of given locale" do
|
69
|
-
words = Vidibus::Words.new("To tell a long story short, it's necessary to tell it briefly without fluff!")
|
70
|
-
words.locale = :de
|
71
|
-
words.keywords.should eql(%w[to tell a long story short it's necessary it briefly without fluff])
|
72
|
-
end
|
73
|
-
|
74
|
-
it "should return only 20 keywords by default" do
|
75
|
-
keywords = words.keywords
|
76
|
-
keywords.length.should eql(20)
|
77
|
-
keywords.should eql(%w[no aníbal dama defensa profesor ajedrez juego loquito sordo peones mesa don antonio parece piezas india concentración atención ustedes club])
|
78
|
-
end
|
79
|
-
|
80
|
-
it "should accept an optional length param" do
|
81
|
-
words.keywords(30).length.should eql(30)
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
describe ".stopwords" do
|
86
|
-
it "should return a list of stop words of all languages available" do
|
87
|
-
list = Vidibus::Words.stopwords
|
88
|
-
list.should include("also") # de
|
89
|
-
list.should include("able") # en
|
90
|
-
end
|
91
|
-
|
92
|
-
it "should return a list of stop words for given locale only" do
|
93
|
-
list = Vidibus::Words.stopwords(:de)
|
94
|
-
list.should include("also")
|
95
|
-
list.should_not include("able")
|
96
|
-
end
|
97
|
-
|
98
|
-
it "should accept multiple locales" do
|
99
|
-
list = Vidibus::Words.stopwords(:de, :en)
|
100
|
-
list.should include("also") # de
|
101
|
-
list.should include("able") # en
|
102
|
-
end
|
103
|
-
|
104
|
-
it "should return an empty array if no stop words are available for given locale" do
|
105
|
-
Vidibus::Words.stopwords(:fr).should be_empty
|
106
|
-
end
|
107
|
-
end
|
108
|
-
|
109
|
-
describe ".words" do
|
110
|
-
it "should return an array of words from given string" do
|
111
|
-
Vidibus::Words.words("Hello").should eql(%w[Hello])
|
112
|
-
end
|
113
|
-
|
114
|
-
it "should remove dates with slashes" do
|
115
|
-
Vidibus::Words.words("On 01/12/2011 we will party!").should eql(%w[On 01/12/2011 we will party])
|
116
|
-
end
|
117
|
-
|
118
|
-
it "should preserve dates with dashes" do
|
119
|
-
Vidibus::Words.words("On 12-01-2011 we will party!").should eql(%w[On 12-01-2011 we will party])
|
120
|
-
end
|
121
|
-
|
122
|
-
it "should preserve dates with dots" do
|
123
|
-
Vidibus::Words.words("On 12.01.2011 we will party!").should eql(%w[On 12.01.2011 we will party])
|
124
|
-
end
|
125
|
-
|
126
|
-
it "should preserve combined words" do
|
127
|
-
Vidibus::Words.words("sign-on").should eql(%w[sign-on])
|
128
|
-
end
|
129
|
-
|
130
|
-
it "should preserve decimals with dots" do
|
131
|
-
Vidibus::Words.words("10.5").should eql(%w[10.5])
|
132
|
-
end
|
133
|
-
|
134
|
-
it "should preserve decimals with commas" do
|
135
|
-
Vidibus::Words.words("10,5").should eql(%w[10,5])
|
136
|
-
end
|
137
|
-
|
138
|
-
it "should preserve apostrophs" do
|
139
|
-
Vidibus::Words.words("It's on!").should eql(%w[It's on])
|
140
|
-
end
|
141
|
-
|
142
|
-
it "should preserve special chars" do
|
143
|
-
Vidibus::Words.words("Hola señor").should eql(%w[Hola señor])
|
144
|
-
end
|
145
|
-
|
146
|
-
it "should remove non-word chars" do
|
147
|
-
Vidibus::Words.words("¿cómo está?").should eql(%w[cómo está])
|
148
|
-
end
|
149
|
-
|
150
|
-
it "should remove non-word chars within sentences" do
|
151
|
-
Vidibus::Words.words("Hola señor, ¿cómo está?").should eql(%w[Hola señor cómo está])
|
152
|
-
end
|
153
|
-
|
154
|
-
it "should remove double non-word chars" do
|
155
|
-
Vidibus::Words.words("-¡Defensa india de dama!-").should eql(%w[Defensa india de dama])
|
156
|
-
end
|
157
|
-
end
|
158
|
-
|
159
|
-
describe ".sort_by_occurrence" do
|
160
|
-
it "should sort a list of words by occurrence" do
|
161
|
-
words = Vidibus::Words.words("Children's song: Hey, hey Wickie, hey Wickie, hey!")
|
162
|
-
Vidibus::Words.sort_by_occurrence(words).should eql(%w[hey wickie children's song])
|
163
|
-
end
|
164
|
-
|
165
|
-
it "should also weigh the position of words" do
|
166
|
-
words = Vidibus::Words.words("third: first second third")
|
167
|
-
Vidibus::Words.sort_by_occurrence(words).should eql(%w[third first second])
|
168
|
-
end
|
169
|
-
end
|
170
|
-
end
|
data/vidibus-words.gemspec
DELETED
@@ -1,65 +0,0 @@
|
|
1
|
-
# Generated by jeweler
|
2
|
-
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
-
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
-
# -*- encoding: utf-8 -*-
|
5
|
-
|
6
|
-
Gem::Specification.new do |s|
|
7
|
-
s.name = %q{vidibus-words}
|
8
|
-
s.version = "0.0.1"
|
9
|
-
|
10
|
-
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
-
s.authors = ["Andre Pankratz"]
|
12
|
-
s.date = %q{2010-11-16}
|
13
|
-
s.description = %q{Contains stop words lists and methods to extract keywords from strings.}
|
14
|
-
s.email = %q{andre@vidibus.com}
|
15
|
-
s.extra_rdoc_files = [
|
16
|
-
"LICENSE",
|
17
|
-
"README.rdoc"
|
18
|
-
]
|
19
|
-
s.files = [
|
20
|
-
".bundle/config",
|
21
|
-
".document",
|
22
|
-
".gitignore",
|
23
|
-
".rspec",
|
24
|
-
"Gemfile",
|
25
|
-
"Gemfile.lock",
|
26
|
-
"LICENSE",
|
27
|
-
"README.rdoc",
|
28
|
-
"Rakefile",
|
29
|
-
"VERSION",
|
30
|
-
"config/locales/de.yml",
|
31
|
-
"config/locales/en.yml",
|
32
|
-
"config/locales/es.yml",
|
33
|
-
"lib/vidibus-words.rb",
|
34
|
-
"lib/vidibus/words.rb",
|
35
|
-
"spec/spec_helper.rb",
|
36
|
-
"spec/vidibus/words_spec.rb",
|
37
|
-
"vidibus-words.gemspec"
|
38
|
-
]
|
39
|
-
s.homepage = %q{http://github.com/vidibus/vidibus-words}
|
40
|
-
s.rdoc_options = ["--charset=UTF-8"]
|
41
|
-
s.require_paths = ["lib"]
|
42
|
-
s.rubygems_version = %q{1.3.7}
|
43
|
-
s.summary = %q{Tools for handling of words.}
|
44
|
-
s.test_files = [
|
45
|
-
"spec/spec_helper.rb",
|
46
|
-
"spec/vidibus/words_spec.rb"
|
47
|
-
]
|
48
|
-
|
49
|
-
if s.respond_to? :specification_version then
|
50
|
-
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
51
|
-
s.specification_version = 3
|
52
|
-
|
53
|
-
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
54
|
-
s.add_runtime_dependency(%q<rails>, ["~> 3.0.0"])
|
55
|
-
s.add_runtime_dependency(%q<vidibus-core_extensions>, [">= 0"])
|
56
|
-
else
|
57
|
-
s.add_dependency(%q<rails>, ["~> 3.0.0"])
|
58
|
-
s.add_dependency(%q<vidibus-core_extensions>, [">= 0"])
|
59
|
-
end
|
60
|
-
else
|
61
|
-
s.add_dependency(%q<rails>, ["~> 3.0.0"])
|
62
|
-
s.add_dependency(%q<vidibus-core_extensions>, [">= 0"])
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|