filtra 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/CHANGELOG.md +6 -0
- data/README.md +59 -0
- data/filtra.gemspec +1 -1
- data/lib/Filtra/text.rb +2 -3
- data/lib/filtra.rb +3 -2
- data/tests/filtra_test.rb +16 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4f9741d13beea3287c5f17a9a2813f3e0551ab8e
|
4
|
+
data.tar.gz: 3cc0f425d04f04e95ad829ec7f40d7a8fb42c01a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7f84c2f04f98e8625e06c5db0efd501b184cb434883b137db786e5025475b311b7585df3d35128143baf18d90e4e181ed2790a9b02c99c48a3942cc9ac5f3d7d
|
7
|
+
data.tar.gz: aef5a827c6f01331f88f3e1c55baaae101d144683868161737d0f047960a44dbbb62da2ca33bff41edc5562827d5954cda5d1c7c679dec20a41b8d74176cd1b7
|
data/.gitignore
CHANGED
data/CHANGELOG.md
ADDED
data/README.md
CHANGED
@@ -18,3 +18,62 @@ $ gem install filtra
|
|
18
18
|
|
19
19
|
## Usage
|
20
20
|
|
21
|
+
The simplest usage is with default options:
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
filtro = Filtra.new()
|
25
|
+
words = %w(Running fishes Among the Coast line coast)
|
26
|
+
result = filtro.call(words)
|
27
|
+
puts result.inspect
|
28
|
+
#=> ["running", "fishes", "among", "the", "coast", "line"]
|
29
|
+
```
|
30
|
+
|
31
|
+
With default options, the case was changed and the word **coast** appears only once. Not really exciting, uh?.
|
32
|
+
|
33
|
+
If, for some reason you want to keep the casing, then this happens:
|
34
|
+
|
35
|
+
```ruby
|
36
|
+
filtro = Filtra.new(keep_case: true)
|
37
|
+
words = %w(Running fishes Among the Coast line coast)
|
38
|
+
result = filtro.call(words)
|
39
|
+
puts result.inspect
|
40
|
+
#=> ["Running", "fishes", "Among", "the", "Coast", "line", "coast"]
|
41
|
+
```
|
42
|
+
|
43
|
+
Now you see the word **coast** appears two times. That's because the casing.
|
44
|
+
|
45
|
+
Now, let's add some stemming to the mix.
|
46
|
+
|
47
|
+
```ruby
|
48
|
+
filtro = Filtra.new(stem: true)
|
49
|
+
words = %w(Running run fishes Among the Coast line coast)
|
50
|
+
result = filtro.call(words)
|
51
|
+
puts result.inspect
|
52
|
+
#=> ["run", "fish", "among", "the", "coast", "line"]
|
53
|
+
```
|
54
|
+
|
55
|
+
Stemming makes this a bit more interesting, thinking in indexing this later, right?
|
56
|
+
|
57
|
+
Now, let's make this *really* fun. Let's add a list of stopwords.
|
58
|
+
|
59
|
+
```ruby
|
60
|
+
filtro = Filtra.new(stopwords: Filtra.stopwords )
|
61
|
+
words = %w(this can be a nice idea that might not work)
|
62
|
+
result = filtro.call(words)
|
63
|
+
puts result.inspect
|
64
|
+
#=> ["can", "nice", "idea", "might", "work"]
|
65
|
+
```
|
66
|
+
|
67
|
+
Bundled with Filtra there's a list of common stopwords, but you can just pass your own.
|
68
|
+
|
69
|
+
```ruby
|
70
|
+
filtro = Filtra.new(stopwords: %w(this that those there))
|
71
|
+
words = %w(this can be a nice idea that might not work)
|
72
|
+
result = filtro.call(words)
|
73
|
+
puts result.inspect
|
74
|
+
#=> ["can", "be", "a", "nice", "idea", "might", "not", "work"]
|
75
|
+
```
|
76
|
+
|
77
|
+
|
78
|
+
And that's pretty much it. The code is simple, go take a look. And drop a line to julian@porta.sh if you have something to say.
|
79
|
+
|
data/filtra.gemspec
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = "filtra"
|
5
|
-
s.version = "0.0.
|
5
|
+
s.version = "0.0.2"
|
6
6
|
s.summary = "Filtra filters an array of tokens to be indexed"
|
7
7
|
s.description = "Filtra filters an array of tokens or words so they can be indexed by Busca, the simple redis search"
|
8
8
|
s.authors = ["Julián Porta"]
|
data/lib/Filtra/text.rb
CHANGED
@@ -6,13 +6,12 @@ class Filtra
|
|
6
6
|
out = []
|
7
7
|
keep_case = opts[:keep_case] || false
|
8
8
|
stem = opts[:stem] || false
|
9
|
-
|
10
|
-
# stemm
|
9
|
+
stopwords = opts[:stopwords] || []
|
11
10
|
words.each do |word|
|
12
11
|
word.downcase! unless keep_case
|
13
12
|
out.push( stem ? word.stem : word)
|
14
13
|
end
|
15
|
-
out.uniq
|
14
|
+
out.uniq - stopwords
|
16
15
|
end
|
17
16
|
end
|
18
17
|
end
|
data/lib/filtra.rb
CHANGED
@@ -2,8 +2,6 @@ class Filtra
|
|
2
2
|
|
3
3
|
Dir[File.dirname(__FILE__) + '/Filtra/*.rb'].each {|file| require file }
|
4
4
|
|
5
|
-
attr_reader :filtro
|
6
|
-
|
7
5
|
def initialize(filtro = Filtra::Text, opts = {})
|
8
6
|
if filtro.respond_to?(:call)
|
9
7
|
@filtro = filtro
|
@@ -18,5 +16,8 @@ class Filtra
|
|
18
16
|
@filtro.call(words, @opts)
|
19
17
|
end
|
20
18
|
|
19
|
+
def self.stopwords
|
20
|
+
%w(a about above after again against all am an and any are aren't as at be because been before being below between both but by can't cannot could couldn't did didn't do does doesn't doing don't down during each few for from further had hadn't has hasn't have haven't having he he'd he'll he's her here here's hers herself him himself his how how's i i'd i'll i'm i've if in into is isn't it it's its itself let's me more most mustn't my myself no nor not of off on once only or other ought our ours ourselves out over own same shan't she she'd she'll she's should shouldn't so some such than that that's the their theirs them themselves then there there's these they they'd they'll they're they've this those through to too under until up very was wasn't we we'd we'll we're we've were weren't what what's when when's where where's which while who who's whom why why's with won't would wouldn't you you'd you'll you're you've your yours yourself yourselves)
|
21
|
+
end
|
21
22
|
|
22
23
|
end
|
data/tests/filtra_test.rb
CHANGED
@@ -23,3 +23,19 @@ test "Filtra::Text should filter and downcase some words" do
|
|
23
23
|
result = filtro.call(words)
|
24
24
|
assert_equal result, %w(Running fishes Among the Coast line coast)
|
25
25
|
end
|
26
|
+
|
27
|
+
|
28
|
+
test "stopwords should stop words from being indexed" do
|
29
|
+
filtro = Filtra.new(stopwords: %w(this that those there))
|
30
|
+
words = %w(this can be a nice idea that might not work)
|
31
|
+
result = filtro.call(words)
|
32
|
+
assert_equal result, %w(can be a nice idea might not work)
|
33
|
+
end
|
34
|
+
|
35
|
+
test "stopwords from Filtra.stopwords" do
|
36
|
+
filtro = Filtra.new(stopwords: Filtra.stopwords )
|
37
|
+
words = %w(this can be a nice idea that might not work)
|
38
|
+
result = filtro.call(words)
|
39
|
+
assert_equal result, %w(can nice idea might work)
|
40
|
+
end
|
41
|
+
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: filtra
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Julián Porta
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-06-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: cutest
|
@@ -47,8 +47,10 @@ extensions: []
|
|
47
47
|
extra_rdoc_files: []
|
48
48
|
files:
|
49
49
|
- ".gitignore"
|
50
|
+
- CHANGELOG.md
|
50
51
|
- LICENSE
|
51
52
|
- README.md
|
53
|
+
- filtra-0.0.1.gem
|
52
54
|
- filtra.gemspec
|
53
55
|
- lib/Filtra/text.rb
|
54
56
|
- lib/filtra.rb
|