fizx-stringset 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,11 @@
1
+ require "rubygems"
2
+ require 'spec'
3
+ require "benchmark"
4
+
5
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
6
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
7
+ require 'stringset'
8
+
9
+ Spec::Runner.configure do |config|
10
+
11
+ end
@@ -0,0 +1,67 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "StringSet" do
4
+ describe "#new" do
5
+ it "should accept a string and tokenize it" do
6
+ s = StringSet.new "tokenize me"
7
+ s.strings.should == %w[tokenize me]
8
+ end
9
+
10
+ it "should accept an array of tokens" do
11
+ s = StringSet.new %w[tokenized list]
12
+ s.strings.should == %w[tokenized list]
13
+ end
14
+
15
+ it "could accept an array of multi-word tokens" do
16
+ s = StringSet.new ["foo bar", "bar"]
17
+ end
18
+
19
+ it "should know the max token length of the multiword tokenset" do
20
+ s = StringSet.new ["foo bar", "bar"]
21
+ s.max_token_size.should == 2
22
+ end
23
+
24
+ it "should have the option to stem" do
25
+ s = StringSet.new %w[tokenized list], :stem => true
26
+ s.should be_stemming
27
+ end
28
+ end
29
+
30
+ describe "#ngramize" do
31
+ it "should make the correct ngrams" do
32
+ s = StringSet.new
33
+ s.ngramize(%w[a b c d], 3).should == ["a", "b", "c", "d", "a b", "b c", "c d", "a b c", "b c d"]
34
+ end
35
+ end
36
+
37
+ describe "#substrings_in" do
38
+ it "should return a list of common substrings" do
39
+ s = StringSet.new "tokenize me"
40
+ s.substrings_in("can you please tokenize me?").should == %w[tokenize me]
41
+ end
42
+
43
+ it "should handle multiword substrings" do
44
+ s = StringSet.new ["tokenize me"]
45
+ s.substrings_in("can you please tokenize me?").should == ["tokenize me"]
46
+ end
47
+
48
+ it "should handle multiword substrings with stemming" do
49
+ s = StringSet.new ["tokenize me"], :stem => true
50
+ s.substrings_in("can you please tokenize me?").should == ["token me"]
51
+ end
52
+
53
+ it "should account for stemming" do
54
+ s = StringSet.new "token me", :stem => true
55
+ s.substrings_in("can you please tokenize me?").should == %w[token me]
56
+ end
57
+
58
+ it "should be pretty fast" do
59
+ needles = %[love thine soldiers bananas monkeys bachelors masters doctorate]
60
+ hamlet = File.read(File.join(File.dirname(__FILE__), "hamlet.txt"))
61
+ Benchmark.measure do
62
+ s = StringSet.new(needles)
63
+ s.substrings_in(hamlet)
64
+ end.real.should < 0.1
65
+ end
66
+ end
67
+ end
metadata ADDED
@@ -0,0 +1,62 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fizx-stringset
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Kyle Maxwell
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-06-04 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description:
17
+ email: kyle@kylemaxwell.com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - LICENSE
24
+ - README.rdoc
25
+ files:
26
+ - LICENSE
27
+ - README.rdoc
28
+ - Rakefile
29
+ - VERSION.yml
30
+ - lib/stringset.rb
31
+ - spec/hamlet.txt
32
+ - spec/spec_helper.rb
33
+ - spec/stringset_spec.rb
34
+ has_rdoc: true
35
+ homepage: http://github.com/fizx/stringset
36
+ post_install_message:
37
+ rdoc_options:
38
+ - --charset=UTF-8
39
+ require_paths:
40
+ - lib
41
+ required_ruby_version: !ruby/object:Gem::Requirement
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: "0"
46
+ version:
47
+ required_rubygems_version: !ruby/object:Gem::Requirement
48
+ requirements:
49
+ - - ">="
50
+ - !ruby/object:Gem::Version
51
+ version: "0"
52
+ version:
53
+ requirements: []
54
+
55
+ rubyforge_project:
56
+ rubygems_version: 1.2.0
57
+ signing_key:
58
+ specification_version: 2
59
+ summary: TODO
60
+ test_files:
61
+ - spec/spec_helper.rb
62
+ - spec/stringset_spec.rb