strabo 0.0.0 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/features/index.feature +9 -1
- data/features/steps/index_steps.rb +7 -1
- data/lib/strabo.rb +31 -19
- data/readme.markdown +1 -1
- data/strabo.gemspec +48 -0
- metadata +13 -5
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.2
|
data/features/index.feature
CHANGED
@@ -34,4 +34,10 @@ Then /it should have the following "(.+)" term frequencies:/ do |attribute, tabl
|
|
34
34
|
@context.keywords[attribute].keys.should include(row['term'])
|
35
35
|
@context.keywords[attribute][row['term']].should eql(row['frequency'].to_i)
|
36
36
|
end
|
37
|
-
end
|
37
|
+
end
|
38
|
+
|
39
|
+
Then /it should have the following term set:/ do |table|
|
40
|
+
table.rows.each do |row|
|
41
|
+
@context.keywords(:as => :set).should include(row.first)
|
42
|
+
end
|
43
|
+
end
|
data/lib/strabo.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require '
|
1
|
+
require 'set'
|
2
2
|
|
3
3
|
# Strabo assists full text search indexing by generating term-frequency maps
|
4
4
|
# for an object's attributes. The term-frequency map may be flattened into
|
@@ -45,16 +45,35 @@ module Strabo
|
|
45
45
|
# Invokes stemmer on token. If no stemmer has been configured, it will
|
46
46
|
# return the original token.
|
47
47
|
#
|
48
|
-
# @param [
|
48
|
+
# @param [Array] one or more tokens
|
49
49
|
#
|
50
|
-
# @return [
|
50
|
+
# @return [Array] stemmed tokens
|
51
51
|
#
|
52
52
|
# @see Strabo#stemmer
|
53
|
-
def self.stem(
|
54
|
-
|
53
|
+
def self.stem(*tokens)
|
54
|
+
tokens.map do |token|
|
55
|
+
@stemmer.nil? ? token : @stemmer.call(token)
|
56
|
+
end.flatten
|
55
57
|
end
|
56
58
|
end
|
57
59
|
|
60
|
+
# Defines how a single string is divided into multiple strings.
|
61
|
+
module Tokenizer
|
62
|
+
|
63
|
+
# Break a string into a list of strings.
|
64
|
+
#
|
65
|
+
# @param [String] text to convert into a list
|
66
|
+
# @param [Regex] delimiter used to scan the string
|
67
|
+
#
|
68
|
+
# @return [Array] list of stemmed terms
|
69
|
+
#
|
70
|
+
# @private
|
71
|
+
def self.tokenize(value, delimiter = /\S+/)
|
72
|
+
value.downcase.gsub(/[^a-z0-9\s]/i,'').scan(delimiter)
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
58
77
|
module Indexer
|
59
78
|
|
60
79
|
# Get attribute-term-frequency map. If flattened, a term-frequency map
|
@@ -66,24 +85,17 @@ module Strabo
|
|
66
85
|
# { term => frequency } map.
|
67
86
|
def keywords(flatten = false)
|
68
87
|
@term_map = {}
|
69
|
-
self.each { |key, value| @term_map[key] = frequency(tokenize(value)) }
|
70
|
-
|
88
|
+
self.each { |key, value| @term_map[key] = frequency(Stemmer::stem(Tokenizer::tokenize(value))) }
|
89
|
+
|
90
|
+
case flatten
|
91
|
+
when false : @term_map
|
92
|
+
when true : flatten_keyword_map(@term_map)
|
93
|
+
else flatten_keyword_map(@term_map).keys
|
94
|
+
end
|
71
95
|
end
|
72
96
|
|
73
97
|
private
|
74
98
|
|
75
|
-
# Break a string into a list of strings.
|
76
|
-
#
|
77
|
-
# @param [String] text to convert into a list
|
78
|
-
# @param [Regex] delimiter used to scan the string
|
79
|
-
#
|
80
|
-
# @return [Array] list of stemmed terms
|
81
|
-
#
|
82
|
-
# @private
|
83
|
-
def tokenize(value, delimiter = /\S+/)
|
84
|
-
value.downcase.scan(delimiter).map { |token| Strabo::Stemmer.stem(token) }
|
85
|
-
end
|
86
|
-
|
87
99
|
# Tally the number of occurrences of a value in a list.
|
88
100
|
#
|
89
101
|
# @param [Array] list of terms to count
|
data/readme.markdown
CHANGED
data/strabo.gemspec
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{strabo}
|
8
|
+
s.version = "0.0.2"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Jon Morton"]
|
12
|
+
s.date = %q{2010-04-20}
|
13
|
+
s.description = %q{Simplified tokenization, stemming, and term-frequency map indexes}
|
14
|
+
s.email = %q{jon.morton@gmail.com }
|
15
|
+
s.files = [
|
16
|
+
".gitignore",
|
17
|
+
"Rakefile",
|
18
|
+
"VERSION",
|
19
|
+
"examples/book.rb",
|
20
|
+
"features/index.feature",
|
21
|
+
"features/stemming.feature",
|
22
|
+
"features/steps/index_steps.rb",
|
23
|
+
"features/steps/stemmer_steps.rb",
|
24
|
+
"features/support/env.rb",
|
25
|
+
"lib/strabo.rb",
|
26
|
+
"readme.markdown",
|
27
|
+
"strabo.gemspec"
|
28
|
+
]
|
29
|
+
s.homepage = %q{http://github.com/jmorton/strabo}
|
30
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
31
|
+
s.require_paths = ["lib"]
|
32
|
+
s.rubygems_version = %q{1.3.6}
|
33
|
+
s.summary = %q{Full text search utilities for Ruby}
|
34
|
+
s.test_files = [
|
35
|
+
"examples/book.rb"
|
36
|
+
]
|
37
|
+
|
38
|
+
if s.respond_to? :specification_version then
|
39
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
40
|
+
s.specification_version = 3
|
41
|
+
|
42
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
43
|
+
else
|
44
|
+
end
|
45
|
+
else
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
metadata
CHANGED
@@ -1,7 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: strabo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 2
|
9
|
+
version: 0.0.2
|
5
10
|
platform: ruby
|
6
11
|
authors:
|
7
12
|
- Jon Morton
|
@@ -9,7 +14,7 @@ autorequire:
|
|
9
14
|
bindir: bin
|
10
15
|
cert_chain: []
|
11
16
|
|
12
|
-
date: 2010-
|
17
|
+
date: 2010-04-20 00:00:00 -04:00
|
13
18
|
default_executable:
|
14
19
|
dependencies: []
|
15
20
|
|
@@ -33,6 +38,7 @@ files:
|
|
33
38
|
- features/support/env.rb
|
34
39
|
- lib/strabo.rb
|
35
40
|
- readme.markdown
|
41
|
+
- strabo.gemspec
|
36
42
|
has_rdoc: true
|
37
43
|
homepage: http://github.com/jmorton/strabo
|
38
44
|
licenses: []
|
@@ -46,18 +52,20 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
46
52
|
requirements:
|
47
53
|
- - ">="
|
48
54
|
- !ruby/object:Gem::Version
|
55
|
+
segments:
|
56
|
+
- 0
|
49
57
|
version: "0"
|
50
|
-
version:
|
51
58
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
52
59
|
requirements:
|
53
60
|
- - ">="
|
54
61
|
- !ruby/object:Gem::Version
|
62
|
+
segments:
|
63
|
+
- 0
|
55
64
|
version: "0"
|
56
|
-
version:
|
57
65
|
requirements: []
|
58
66
|
|
59
67
|
rubyforge_project:
|
60
|
-
rubygems_version: 1.3.
|
68
|
+
rubygems_version: 1.3.6
|
61
69
|
signing_key:
|
62
70
|
specification_version: 3
|
63
71
|
summary: Full text search utilities for Ruby
|