fizx-stringset 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +20 -0
- data/README.rdoc +7 -0
- data/Rakefile +48 -0
- data/VERSION.yml +4 -0
- data/lib/stringset.rb +50 -0
- data/spec/hamlet.txt +7067 -0
- data/spec/spec_helper.rb +11 -0
- data/spec/stringset_spec.rb +67 -0
- metadata +62 -0
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
describe "StringSet" do
|
4
|
+
describe "#new" do
|
5
|
+
it "should accept a string and tokenize it" do
|
6
|
+
s = StringSet.new "tokenize me"
|
7
|
+
s.strings.should == %w[tokenize me]
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should accept an array of tokens" do
|
11
|
+
s = StringSet.new %w[tokenized list]
|
12
|
+
s.strings.should == %w[tokenized list]
|
13
|
+
end
|
14
|
+
|
15
|
+
it "could accept an array of multi-word tokens" do
|
16
|
+
s = StringSet.new ["foo bar", "bar"]
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should know the max token length of the multiword tokenset" do
|
20
|
+
s = StringSet.new ["foo bar", "bar"]
|
21
|
+
s.max_token_size.should == 2
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should have the option to stem" do
|
25
|
+
s = StringSet.new %w[tokenized list], :stem => true
|
26
|
+
s.should be_stemming
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
describe "#ngramize" do
|
31
|
+
it "should make the correct ngrams" do
|
32
|
+
s = StringSet.new
|
33
|
+
s.ngramize(%w[a b c d], 3).should == ["a", "b", "c", "d", "a b", "b c", "c d", "a b c", "b c d"]
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
describe "#substrings_in" do
|
38
|
+
it "should return a list of common substrings" do
|
39
|
+
s = StringSet.new "tokenize me"
|
40
|
+
s.substrings_in("can you please tokenize me?").should == %w[tokenize me]
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should handle multiword substrings" do
|
44
|
+
s = StringSet.new ["tokenize me"]
|
45
|
+
s.substrings_in("can you please tokenize me?").should == ["tokenize me"]
|
46
|
+
end
|
47
|
+
|
48
|
+
it "should handle multiword substrings with stemming" do
|
49
|
+
s = StringSet.new ["tokenize me"], :stem => true
|
50
|
+
s.substrings_in("can you please tokenize me?").should == ["token me"]
|
51
|
+
end
|
52
|
+
|
53
|
+
it "should account for stemming" do
|
54
|
+
s = StringSet.new "token me", :stem => true
|
55
|
+
s.substrings_in("can you please tokenize me?").should == %w[token me]
|
56
|
+
end
|
57
|
+
|
58
|
+
it "should be pretty fast" do
|
59
|
+
needles = %[love thine soldiers bananas monkeys bachelors masters doctorate]
|
60
|
+
hamlet = File.read(File.join(File.dirname(__FILE__), "hamlet.txt"))
|
61
|
+
Benchmark.measure do
|
62
|
+
s = StringSet.new(needles)
|
63
|
+
s.substrings_in(hamlet)
|
64
|
+
end.real.should < 0.1
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
metadata
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: fizx-stringset
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Kyle Maxwell
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-06-04 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description:
|
17
|
+
email: kyle@kylemaxwell.com
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files:
|
23
|
+
- LICENSE
|
24
|
+
- README.rdoc
|
25
|
+
files:
|
26
|
+
- LICENSE
|
27
|
+
- README.rdoc
|
28
|
+
- Rakefile
|
29
|
+
- VERSION.yml
|
30
|
+
- lib/stringset.rb
|
31
|
+
- spec/hamlet.txt
|
32
|
+
- spec/spec_helper.rb
|
33
|
+
- spec/stringset_spec.rb
|
34
|
+
has_rdoc: true
|
35
|
+
homepage: http://github.com/fizx/stringset
|
36
|
+
post_install_message:
|
37
|
+
rdoc_options:
|
38
|
+
- --charset=UTF-8
|
39
|
+
require_paths:
|
40
|
+
- lib
|
41
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: "0"
|
46
|
+
version:
|
47
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
48
|
+
requirements:
|
49
|
+
- - ">="
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: "0"
|
52
|
+
version:
|
53
|
+
requirements: []
|
54
|
+
|
55
|
+
rubyforge_project:
|
56
|
+
rubygems_version: 1.2.0
|
57
|
+
signing_key:
|
58
|
+
specification_version: 2
|
59
|
+
summary: TODO
|
60
|
+
test_files:
|
61
|
+
- spec/spec_helper.rb
|
62
|
+
- spec/stringset_spec.rb
|