fizx-stringset 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,11 @@
1
+ require "rubygems"
2
+ require 'spec'
3
+ require "benchmark"
4
+
5
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
6
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
7
+ require 'stringset'
8
+
9
+ Spec::Runner.configure do |config|
10
+
11
+ end
@@ -0,0 +1,67 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "StringSet" do
4
+ describe "#new" do
5
+ it "should accept a string and tokenize it" do
6
+ s = StringSet.new "tokenize me"
7
+ s.strings.should == %w[tokenize me]
8
+ end
9
+
10
+ it "should accept an array of tokens" do
11
+ s = StringSet.new %w[tokenized list]
12
+ s.strings.should == %w[tokenized list]
13
+ end
14
+
15
+ it "could accept an array of multi-word tokens" do
16
+ s = StringSet.new ["foo bar", "bar"]
17
+ end
18
+
19
+ it "should know the max token length of the multiword tokenset" do
20
+ s = StringSet.new ["foo bar", "bar"]
21
+ s.max_token_size.should == 2
22
+ end
23
+
24
+ it "should have the option to stem" do
25
+ s = StringSet.new %w[tokenized list], :stem => true
26
+ s.should be_stemming
27
+ end
28
+ end
29
+
30
+ describe "#ngramize" do
31
+ it "should make the correct ngrams" do
32
+ s = StringSet.new
33
+ s.ngramize(%w[a b c d], 3).should == ["a", "b", "c", "d", "a b", "b c", "c d", "a b c", "b c d"]
34
+ end
35
+ end
36
+
37
+ describe "#substrings_in" do
38
+ it "should return a list of common substrings" do
39
+ s = StringSet.new "tokenize me"
40
+ s.substrings_in("can you please tokenize me?").should == %w[tokenize me]
41
+ end
42
+
43
+ it "should handle multiword substrings" do
44
+ s = StringSet.new ["tokenize me"]
45
+ s.substrings_in("can you please tokenize me?").should == ["tokenize me"]
46
+ end
47
+
48
+ it "should handle multiword substrings with stemming" do
49
+ s = StringSet.new ["tokenize me"], :stem => true
50
+ s.substrings_in("can you please tokenize me?").should == ["token me"]
51
+ end
52
+
53
+ it "should account for stemming" do
54
+ s = StringSet.new "token me", :stem => true
55
+ s.substrings_in("can you please tokenize me?").should == %w[token me]
56
+ end
57
+
58
+ it "should be pretty fast" do
59
+ needles = %[love thine soldiers bananas monkeys bachelors masters doctorate]
60
+ hamlet = File.read(File.join(File.dirname(__FILE__), "hamlet.txt"))
61
+ Benchmark.measure do
62
+ s = StringSet.new(needles)
63
+ s.substrings_in(hamlet)
64
+ end.real.should < 0.1
65
+ end
66
+ end
67
+ end
metadata ADDED
@@ -0,0 +1,62 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fizx-stringset
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Kyle Maxwell
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-06-04 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description:
17
+ email: kyle@kylemaxwell.com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files:
23
+ - LICENSE
24
+ - README.rdoc
25
+ files:
26
+ - LICENSE
27
+ - README.rdoc
28
+ - Rakefile
29
+ - VERSION.yml
30
+ - lib/stringset.rb
31
+ - spec/hamlet.txt
32
+ - spec/spec_helper.rb
33
+ - spec/stringset_spec.rb
34
+ has_rdoc: true
35
+ homepage: http://github.com/fizx/stringset
36
+ post_install_message:
37
+ rdoc_options:
38
+ - --charset=UTF-8
39
+ require_paths:
40
+ - lib
41
+ required_ruby_version: !ruby/object:Gem::Requirement
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: "0"
46
+ version:
47
+ required_rubygems_version: !ruby/object:Gem::Requirement
48
+ requirements:
49
+ - - ">="
50
+ - !ruby/object:Gem::Version
51
+ version: "0"
52
+ version:
53
+ requirements: []
54
+
55
+ rubyforge_project:
56
+ rubygems_version: 1.2.0
57
+ signing_key:
58
+ specification_version: 2
59
+ summary: TODO
60
+ test_files:
61
+ - spec/spec_helper.rb
62
+ - spec/stringset_spec.rb