strabo 0.0.0 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.0
1
+ 0.0.2
@@ -61,4 +61,12 @@ Feature: Index a document
61
61
  | z | 3 |
62
62
  | a | 1 |
63
63
  | b | 1 |
64
- | c | 1 |
64
+ | c | 1 |
65
+ Then it should have the following term set:
66
+ | T |
67
+ | x |
68
+ | y |
69
+ | z |
70
+ | a |
71
+ | b |
72
+ | c |
@@ -34,4 +34,10 @@ Then /it should have the following "(.+)" term frequencies:/ do |attribute, tabl
34
34
  @context.keywords[attribute].keys.should include(row['term'])
35
35
  @context.keywords[attribute][row['term']].should eql(row['frequency'].to_i)
36
36
  end
37
- end
37
+ end
38
+
39
+ Then /it should have the following term set:/ do |table|
40
+ table.rows.each do |row|
41
+ @context.keywords(:as => :set).should include(row.first)
42
+ end
43
+ end
@@ -1,4 +1,4 @@
1
- require 'Set'
1
+ require 'set'
2
2
 
3
3
  # Strabo assists full text search indexing by generating term-frequency maps
4
4
  # for an object's attributes. The term-frequency map may be flattened into
@@ -45,16 +45,35 @@ module Strabo
45
45
  # Invokes stemmer on token. If no stemmer has been configured, it will
46
46
  # return the original token.
47
47
  #
48
- # @param [String] token
48
+ # @param [Array] one or more tokens
49
49
  #
50
- # @return [String] result of stemming
50
+ # @return [Array] stemmed tokens
51
51
  #
52
52
  # @see Strabo#stemmer
53
- def self.stem(token)
54
- @stemmer.nil? ? token : @stemmer.call(token)
53
+ def self.stem(*tokens)
54
+ tokens.map do |token|
55
+ @stemmer.nil? ? token : @stemmer.call(token)
56
+ end.flatten
55
57
  end
56
58
  end
57
59
 
60
+ # Defines how a single string is divided into multiple strings.
61
+ module Tokenizer
62
+
63
+ # Break a string into a list of strings.
64
+ #
65
+ # @param [String] text to convert into a list
66
+ # @param [Regex] delimiter used to scan the string
67
+ #
68
+ # @return [Array] list of stemmed terms
69
+ #
70
+ # @private
71
+ def self.tokenize(value, delimiter = /\S+/)
72
+ value.downcase.gsub(/[^a-z0-9\s]/i,'').scan(delimiter)
73
+ end
74
+
75
+ end
76
+
58
77
  module Indexer
59
78
 
60
79
  # Get attribute-term-frequency map. If flattened, a term-frequency map
@@ -66,24 +85,17 @@ module Strabo
66
85
  # { term => frequency } map.
67
86
  def keywords(flatten = false)
68
87
  @term_map = {}
69
- self.each { |key, value| @term_map[key] = frequency(tokenize(value)) }
70
- flatten ? flatten_keyword_map(@term_map) : @term_map
88
+ self.each { |key, value| @term_map[key] = frequency(Stemmer::stem(Tokenizer::tokenize(value))) }
89
+
90
+ case flatten
91
+ when false : @term_map
92
+ when true : flatten_keyword_map(@term_map)
93
+ else flatten_keyword_map(@term_map).keys
94
+ end
71
95
  end
72
96
 
73
97
  private
74
98
 
75
- # Break a string into a list of strings.
76
- #
77
- # @param [String] text to convert into a list
78
- # @param [Regex] delimiter used to scan the string
79
- #
80
- # @return [Array] list of stemmed terms
81
- #
82
- # @private
83
- def tokenize(value, delimiter = /\S+/)
84
- value.downcase.scan(delimiter).map { |token| Strabo::Stemmer.stem(token) }
85
- end
86
-
87
99
  # Tally the number of occurrences of a value in a list.
88
100
  #
89
101
  # @param [Array] list of terms to count
@@ -2,7 +2,7 @@
2
2
 
3
3
  ## About
4
4
 
5
- Strabo makes preparing a Ruby object for full text search by tokenizing an objects attributes.
5
+ Strabo prepares a Ruby object for full text search by tokenizing an objects attributes.
6
6
 
7
7
  class Book < Hash
8
8
  include Strabo::Indexer
@@ -0,0 +1,48 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{strabo}
8
+ s.version = "0.0.2"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Jon Morton"]
12
+ s.date = %q{2010-04-20}
13
+ s.description = %q{Simplified tokenization, stemming, and term-frequency map indexes}
14
+ s.email = %q{jon.morton@gmail.com }
15
+ s.files = [
16
+ ".gitignore",
17
+ "Rakefile",
18
+ "VERSION",
19
+ "examples/book.rb",
20
+ "features/index.feature",
21
+ "features/stemming.feature",
22
+ "features/steps/index_steps.rb",
23
+ "features/steps/stemmer_steps.rb",
24
+ "features/support/env.rb",
25
+ "lib/strabo.rb",
26
+ "readme.markdown",
27
+ "strabo.gemspec"
28
+ ]
29
+ s.homepage = %q{http://github.com/jmorton/strabo}
30
+ s.rdoc_options = ["--charset=UTF-8"]
31
+ s.require_paths = ["lib"]
32
+ s.rubygems_version = %q{1.3.6}
33
+ s.summary = %q{Full text search utilities for Ruby}
34
+ s.test_files = [
35
+ "examples/book.rb"
36
+ ]
37
+
38
+ if s.respond_to? :specification_version then
39
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
40
+ s.specification_version = 3
41
+
42
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
43
+ else
44
+ end
45
+ else
46
+ end
47
+ end
48
+
metadata CHANGED
@@ -1,7 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: strabo
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.0
4
+ prerelease: false
5
+ segments:
6
+ - 0
7
+ - 0
8
+ - 2
9
+ version: 0.0.2
5
10
  platform: ruby
6
11
  authors:
7
12
  - Jon Morton
@@ -9,7 +14,7 @@ autorequire:
9
14
  bindir: bin
10
15
  cert_chain: []
11
16
 
12
- date: 2010-03-28 00:00:00 -04:00
17
+ date: 2010-04-20 00:00:00 -04:00
13
18
  default_executable:
14
19
  dependencies: []
15
20
 
@@ -33,6 +38,7 @@ files:
33
38
  - features/support/env.rb
34
39
  - lib/strabo.rb
35
40
  - readme.markdown
41
+ - strabo.gemspec
36
42
  has_rdoc: true
37
43
  homepage: http://github.com/jmorton/strabo
38
44
  licenses: []
@@ -46,18 +52,20 @@ required_ruby_version: !ruby/object:Gem::Requirement
46
52
  requirements:
47
53
  - - ">="
48
54
  - !ruby/object:Gem::Version
55
+ segments:
56
+ - 0
49
57
  version: "0"
50
- version:
51
58
  required_rubygems_version: !ruby/object:Gem::Requirement
52
59
  requirements:
53
60
  - - ">="
54
61
  - !ruby/object:Gem::Version
62
+ segments:
63
+ - 0
55
64
  version: "0"
56
- version:
57
65
  requirements: []
58
66
 
59
67
  rubyforge_project:
60
- rubygems_version: 1.3.5
68
+ rubygems_version: 1.3.6
61
69
  signing_key:
62
70
  specification_version: 3
63
71
  summary: Full text search utilities for Ruby