strabo 0.0.0 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.0
1
+ 0.0.2
@@ -61,4 +61,12 @@ Feature: Index a document
61
61
  | z | 3 |
62
62
  | a | 1 |
63
63
  | b | 1 |
64
- | c | 1 |
64
+ | c | 1 |
65
+ Then it should have the following term set:
66
+ | T |
67
+ | x |
68
+ | y |
69
+ | z |
70
+ | a |
71
+ | b |
72
+ | c |
@@ -34,4 +34,10 @@ Then /it should have the following "(.+)" term frequencies:/ do |attribute, tabl
34
34
  @context.keywords[attribute].keys.should include(row['term'])
35
35
  @context.keywords[attribute][row['term']].should eql(row['frequency'].to_i)
36
36
  end
37
- end
37
+ end
38
+
39
+ Then /it should have the following term set:/ do |table|
40
+ table.rows.each do |row|
41
+ @context.keywords(:as => :set).should include(row.first)
42
+ end
43
+ end
@@ -1,4 +1,4 @@
1
- require 'Set'
1
+ require 'set'
2
2
 
3
3
  # Strabo assists full text search indexing by generating term-frequency maps
4
4
  # for an object's attributes. The term-frequency map may be flattened into
@@ -45,16 +45,35 @@ module Strabo
45
45
  # Invokes stemmer on token. If no stemmer has been configured, it will
46
46
  # return the original token.
47
47
  #
48
- # @param [String] token
48
+ # @param [Array] one or more tokens
49
49
  #
50
- # @return [String] result of stemming
50
+ # @return [Array] stemmed tokens
51
51
  #
52
52
  # @see Strabo#stemmer
53
- def self.stem(token)
54
- @stemmer.nil? ? token : @stemmer.call(token)
53
+ def self.stem(*tokens)
54
+ tokens.map do |token|
55
+ @stemmer.nil? ? token : @stemmer.call(token)
56
+ end.flatten
55
57
  end
56
58
  end
57
59
 
60
+ # Defines how a single string is divided into multiple strings.
61
+ module Tokenizer
62
+
63
+ # Break a string into a list of strings.
64
+ #
65
+ # @param [String] text to convert into a list
66
+ # @param [Regex] delimiter used to scan the string
67
+ #
68
+ # @return [Array] list of stemmed terms
69
+ #
70
+ # @private
71
+ def self.tokenize(value, delimiter = /\S+/)
72
+ value.downcase.gsub(/[^a-z0-9\s]/i,'').scan(delimiter)
73
+ end
74
+
75
+ end
76
+
58
77
  module Indexer
59
78
 
60
79
  # Get attribute-term-frequency map. If flattened, a term-frequency map
@@ -66,24 +85,17 @@ module Strabo
66
85
  # { term => frequency } map.
67
86
  def keywords(flatten = false)
68
87
  @term_map = {}
69
- self.each { |key, value| @term_map[key] = frequency(tokenize(value)) }
70
- flatten ? flatten_keyword_map(@term_map) : @term_map
88
+ self.each { |key, value| @term_map[key] = frequency(Stemmer::stem(Tokenizer::tokenize(value))) }
89
+
90
+ case flatten
91
+ when false : @term_map
92
+ when true : flatten_keyword_map(@term_map)
93
+ else flatten_keyword_map(@term_map).keys
94
+ end
71
95
  end
72
96
 
73
97
  private
74
98
 
75
- # Break a string into a list of strings.
76
- #
77
- # @param [String] text to convert into a list
78
- # @param [Regex] delimiter used to scan the string
79
- #
80
- # @return [Array] list of stemmed terms
81
- #
82
- # @private
83
- def tokenize(value, delimiter = /\S+/)
84
- value.downcase.scan(delimiter).map { |token| Strabo::Stemmer.stem(token) }
85
- end
86
-
87
99
  # Tally the number of occurrences of a value in a list.
88
100
  #
89
101
  # @param [Array] list of terms to count
@@ -2,7 +2,7 @@
2
2
 
3
3
  ## About
4
4
 
5
- Strabo makes preparing a Ruby object for full text search by tokenizing an objects attributes.
5
+ Strabo prepares a Ruby object for full text search by tokenizing an objects attributes.
6
6
 
7
7
  class Book < Hash
8
8
  include Strabo::Indexer
@@ -0,0 +1,48 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{strabo}
8
+ s.version = "0.0.2"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Jon Morton"]
12
+ s.date = %q{2010-04-20}
13
+ s.description = %q{Simplified tokenization, stemming, and term-frequency map indexes}
14
+ s.email = %q{jon.morton@gmail.com }
15
+ s.files = [
16
+ ".gitignore",
17
+ "Rakefile",
18
+ "VERSION",
19
+ "examples/book.rb",
20
+ "features/index.feature",
21
+ "features/stemming.feature",
22
+ "features/steps/index_steps.rb",
23
+ "features/steps/stemmer_steps.rb",
24
+ "features/support/env.rb",
25
+ "lib/strabo.rb",
26
+ "readme.markdown",
27
+ "strabo.gemspec"
28
+ ]
29
+ s.homepage = %q{http://github.com/jmorton/strabo}
30
+ s.rdoc_options = ["--charset=UTF-8"]
31
+ s.require_paths = ["lib"]
32
+ s.rubygems_version = %q{1.3.6}
33
+ s.summary = %q{Full text search utilities for Ruby}
34
+ s.test_files = [
35
+ "examples/book.rb"
36
+ ]
37
+
38
+ if s.respond_to? :specification_version then
39
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
40
+ s.specification_version = 3
41
+
42
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
43
+ else
44
+ end
45
+ else
46
+ end
47
+ end
48
+
metadata CHANGED
@@ -1,7 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: strabo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.0
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 2
9
+ version: 0.0.2
5
10
  platform: ruby
6
11
  authors:
7
12
  - Jon Morton
@@ -9,7 +14,7 @@ autorequire:
9
14
  bindir: bin
10
15
  cert_chain: []
11
16
 
12
- date: 2010-03-28 00:00:00 -04:00
17
+ date: 2010-04-20 00:00:00 -04:00
13
18
  default_executable:
14
19
  dependencies: []
15
20
 
@@ -33,6 +38,7 @@ files:
33
38
  - features/support/env.rb
34
39
  - lib/strabo.rb
35
40
  - readme.markdown
41
+ - strabo.gemspec
36
42
  has_rdoc: true
37
43
  homepage: http://github.com/jmorton/strabo
38
44
  licenses: []
@@ -46,18 +52,20 @@ required_ruby_version: !ruby/object:Gem::Requirement
46
52
  requirements:
47
53
  - - ">="
48
54
  - !ruby/object:Gem::Version
55
+ segments:
56
+ - 0
49
57
  version: "0"
50
- version:
51
58
  required_rubygems_version: !ruby/object:Gem::Requirement
52
59
  requirements:
53
60
  - - ">="
54
61
  - !ruby/object:Gem::Version
62
+ segments:
63
+ - 0
55
64
  version: "0"
56
- version:
57
65
  requirements: []
58
66
 
59
67
  rubyforge_project:
60
- rubygems_version: 1.3.5
68
+ rubygems_version: 1.3.6
61
69
  signing_key:
62
70
  specification_version: 3
63
71
  summary: Full text search utilities for Ruby