fizx-stringset 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +20 -0
- data/README.rdoc +7 -0
- data/Rakefile +48 -0
- data/VERSION.yml +4 -0
- data/lib/stringset.rb +50 -0
- data/spec/hamlet.txt +7067 -0
- data/spec/spec_helper.rb +11 -0
- data/spec/stringset_spec.rb +67 -0
- metadata +62 -0
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,67 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
describe "StringSet" do
|
4
|
+
describe "#new" do
|
5
|
+
it "should accept a string and tokenize it" do
|
6
|
+
s = StringSet.new "tokenize me"
|
7
|
+
s.strings.should == %w[tokenize me]
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should accept an array of tokens" do
|
11
|
+
s = StringSet.new %w[tokenized list]
|
12
|
+
s.strings.should == %w[tokenized list]
|
13
|
+
end
|
14
|
+
|
15
|
+
it "could accept an array of multi-word tokens" do
|
16
|
+
s = StringSet.new ["foo bar", "bar"]
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should know the max token length of the multiword tokenset" do
|
20
|
+
s = StringSet.new ["foo bar", "bar"]
|
21
|
+
s.max_token_size.should == 2
|
22
|
+
end
|
23
|
+
|
24
|
+
it "should have the option to stem" do
|
25
|
+
s = StringSet.new %w[tokenized list], :stem => true
|
26
|
+
s.should be_stemming
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
describe "#ngramize" do
|
31
|
+
it "should make the correct ngrams" do
|
32
|
+
s = StringSet.new
|
33
|
+
s.ngramize(%w[a b c d], 3).should == ["a", "b", "c", "d", "a b", "b c", "c d", "a b c", "b c d"]
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
describe "#substrings_in" do
|
38
|
+
it "should return a list of common substrings" do
|
39
|
+
s = StringSet.new "tokenize me"
|
40
|
+
s.substrings_in("can you please tokenize me?").should == %w[tokenize me]
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should handle multiword substrings" do
|
44
|
+
s = StringSet.new ["tokenize me"]
|
45
|
+
s.substrings_in("can you please tokenize me?").should == ["tokenize me"]
|
46
|
+
end
|
47
|
+
|
48
|
+
it "should handle multiword substrings with stemming" do
|
49
|
+
s = StringSet.new ["tokenize me"], :stem => true
|
50
|
+
s.substrings_in("can you please tokenize me?").should == ["token me"]
|
51
|
+
end
|
52
|
+
|
53
|
+
it "should account for stemming" do
|
54
|
+
s = StringSet.new "token me", :stem => true
|
55
|
+
s.substrings_in("can you please tokenize me?").should == %w[token me]
|
56
|
+
end
|
57
|
+
|
58
|
+
it "should be pretty fast" do
|
59
|
+
needles = %[love thine soldiers bananas monkeys bachelors masters doctorate]
|
60
|
+
hamlet = File.read(File.join(File.dirname(__FILE__), "hamlet.txt"))
|
61
|
+
Benchmark.measure do
|
62
|
+
s = StringSet.new(needles)
|
63
|
+
s.substrings_in(hamlet)
|
64
|
+
end.real.should < 0.1
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
metadata
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: fizx-stringset
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Kyle Maxwell
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-06-04 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description:
|
17
|
+
email: kyle@kylemaxwell.com
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files:
|
23
|
+
- LICENSE
|
24
|
+
- README.rdoc
|
25
|
+
files:
|
26
|
+
- LICENSE
|
27
|
+
- README.rdoc
|
28
|
+
- Rakefile
|
29
|
+
- VERSION.yml
|
30
|
+
- lib/stringset.rb
|
31
|
+
- spec/hamlet.txt
|
32
|
+
- spec/spec_helper.rb
|
33
|
+
- spec/stringset_spec.rb
|
34
|
+
has_rdoc: true
|
35
|
+
homepage: http://github.com/fizx/stringset
|
36
|
+
post_install_message:
|
37
|
+
rdoc_options:
|
38
|
+
- --charset=UTF-8
|
39
|
+
require_paths:
|
40
|
+
- lib
|
41
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: "0"
|
46
|
+
version:
|
47
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
48
|
+
requirements:
|
49
|
+
- - ">="
|
50
|
+
- !ruby/object:Gem::Version
|
51
|
+
version: "0"
|
52
|
+
version:
|
53
|
+
requirements: []
|
54
|
+
|
55
|
+
rubyforge_project:
|
56
|
+
rubygems_version: 1.2.0
|
57
|
+
signing_key:
|
58
|
+
specification_version: 2
|
59
|
+
summary: TODO
|
60
|
+
test_files:
|
61
|
+
- spec/spec_helper.rb
|
62
|
+
- spec/stringset_spec.rb
|