strabo 0.0.0 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/features/index.feature +9 -1
- data/features/steps/index_steps.rb +7 -1
- data/lib/strabo.rb +31 -19
- data/readme.markdown +1 -1
- data/strabo.gemspec +48 -0
- metadata +13 -5
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.2
|
data/features/index.feature
CHANGED
@@ -34,4 +34,10 @@ Then /it should have the following "(.+)" term frequencies:/ do |attribute, tabl
|
|
34
34
|
@context.keywords[attribute].keys.should include(row['term'])
|
35
35
|
@context.keywords[attribute][row['term']].should eql(row['frequency'].to_i)
|
36
36
|
end
|
37
|
-
end
|
37
|
+
end
|
38
|
+
|
39
|
+
Then /it should have the following term set:/ do |table|
|
40
|
+
table.rows.each do |row|
|
41
|
+
@context.keywords(:as => :set).should include(row.first)
|
42
|
+
end
|
43
|
+
end
|
data/lib/strabo.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require '
|
1
|
+
require 'set'
|
2
2
|
|
3
3
|
# Strabo assists full text search indexing by generating term-frequency maps
|
4
4
|
# for an object's attributes. The term-frequency map may be flattened into
|
@@ -45,16 +45,35 @@ module Strabo
|
|
45
45
|
# Invokes stemmer on token. If no stemmer has been configured, it will
|
46
46
|
# return the original token.
|
47
47
|
#
|
48
|
-
# @param [
|
48
|
+
# @param [Array] one or more tokens
|
49
49
|
#
|
50
|
-
# @return [
|
50
|
+
# @return [Array] stemmed tokens
|
51
51
|
#
|
52
52
|
# @see Strabo#stemmer
|
53
|
-
def self.stem(
|
54
|
-
|
53
|
+
def self.stem(*tokens)
|
54
|
+
tokens.map do |token|
|
55
|
+
@stemmer.nil? ? token : @stemmer.call(token)
|
56
|
+
end.flatten
|
55
57
|
end
|
56
58
|
end
|
57
59
|
|
60
|
+
# Defines how a single string is divided into multiple strings.
|
61
|
+
module Tokenizer
|
62
|
+
|
63
|
+
# Break a string into a list of strings.
|
64
|
+
#
|
65
|
+
# @param [String] text to convert into a list
|
66
|
+
# @param [Regex] delimiter used to scan the string
|
67
|
+
#
|
68
|
+
# @return [Array] list of stemmed terms
|
69
|
+
#
|
70
|
+
# @private
|
71
|
+
def self.tokenize(value, delimiter = /\S+/)
|
72
|
+
value.downcase.gsub(/[^a-z0-9\s]/i,'').scan(delimiter)
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
58
77
|
module Indexer
|
59
78
|
|
60
79
|
# Get attribute-term-frequency map. If flattened, a term-frequency map
|
@@ -66,24 +85,17 @@ module Strabo
|
|
66
85
|
# { term => frequency } map.
|
67
86
|
def keywords(flatten = false)
|
68
87
|
@term_map = {}
|
69
|
-
self.each { |key, value| @term_map[key] = frequency(tokenize(value)) }
|
70
|
-
|
88
|
+
self.each { |key, value| @term_map[key] = frequency(Stemmer::stem(Tokenizer::tokenize(value))) }
|
89
|
+
|
90
|
+
case flatten
|
91
|
+
when false : @term_map
|
92
|
+
when true : flatten_keyword_map(@term_map)
|
93
|
+
else flatten_keyword_map(@term_map).keys
|
94
|
+
end
|
71
95
|
end
|
72
96
|
|
73
97
|
private
|
74
98
|
|
75
|
-
# Break a string into a list of strings.
|
76
|
-
#
|
77
|
-
# @param [String] text to convert into a list
|
78
|
-
# @param [Regex] delimiter used to scan the string
|
79
|
-
#
|
80
|
-
# @return [Array] list of stemmed terms
|
81
|
-
#
|
82
|
-
# @private
|
83
|
-
def tokenize(value, delimiter = /\S+/)
|
84
|
-
value.downcase.scan(delimiter).map { |token| Strabo::Stemmer.stem(token) }
|
85
|
-
end
|
86
|
-
|
87
99
|
# Tally the number of occurrences of a value in a list.
|
88
100
|
#
|
89
101
|
# @param [Array] list of terms to count
|
data/readme.markdown
CHANGED
data/strabo.gemspec
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{strabo}
|
8
|
+
s.version = "0.0.2"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Jon Morton"]
|
12
|
+
s.date = %q{2010-04-20}
|
13
|
+
s.description = %q{Simplified tokenization, stemming, and term-frequency map indexes}
|
14
|
+
s.email = %q{jon.morton@gmail.com }
|
15
|
+
s.files = [
|
16
|
+
".gitignore",
|
17
|
+
"Rakefile",
|
18
|
+
"VERSION",
|
19
|
+
"examples/book.rb",
|
20
|
+
"features/index.feature",
|
21
|
+
"features/stemming.feature",
|
22
|
+
"features/steps/index_steps.rb",
|
23
|
+
"features/steps/stemmer_steps.rb",
|
24
|
+
"features/support/env.rb",
|
25
|
+
"lib/strabo.rb",
|
26
|
+
"readme.markdown",
|
27
|
+
"strabo.gemspec"
|
28
|
+
]
|
29
|
+
s.homepage = %q{http://github.com/jmorton/strabo}
|
30
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
31
|
+
s.require_paths = ["lib"]
|
32
|
+
s.rubygems_version = %q{1.3.6}
|
33
|
+
s.summary = %q{Full text search utilities for Ruby}
|
34
|
+
s.test_files = [
|
35
|
+
"examples/book.rb"
|
36
|
+
]
|
37
|
+
|
38
|
+
if s.respond_to? :specification_version then
|
39
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
40
|
+
s.specification_version = 3
|
41
|
+
|
42
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
43
|
+
else
|
44
|
+
end
|
45
|
+
else
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
metadata
CHANGED
@@ -1,7 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: strabo
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 2
|
9
|
+
version: 0.0.2
|
5
10
|
platform: ruby
|
6
11
|
authors:
|
7
12
|
- Jon Morton
|
@@ -9,7 +14,7 @@ autorequire:
|
|
9
14
|
bindir: bin
|
10
15
|
cert_chain: []
|
11
16
|
|
12
|
-
date: 2010-
|
17
|
+
date: 2010-04-20 00:00:00 -04:00
|
13
18
|
default_executable:
|
14
19
|
dependencies: []
|
15
20
|
|
@@ -33,6 +38,7 @@ files:
|
|
33
38
|
- features/support/env.rb
|
34
39
|
- lib/strabo.rb
|
35
40
|
- readme.markdown
|
41
|
+
- strabo.gemspec
|
36
42
|
has_rdoc: true
|
37
43
|
homepage: http://github.com/jmorton/strabo
|
38
44
|
licenses: []
|
@@ -46,18 +52,20 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
46
52
|
requirements:
|
47
53
|
- - ">="
|
48
54
|
- !ruby/object:Gem::Version
|
55
|
+
segments:
|
56
|
+
- 0
|
49
57
|
version: "0"
|
50
|
-
version:
|
51
58
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
52
59
|
requirements:
|
53
60
|
- - ">="
|
54
61
|
- !ruby/object:Gem::Version
|
62
|
+
segments:
|
63
|
+
- 0
|
55
64
|
version: "0"
|
56
|
-
version:
|
57
65
|
requirements: []
|
58
66
|
|
59
67
|
rubyforge_project:
|
60
|
-
rubygems_version: 1.3.
|
68
|
+
rubygems_version: 1.3.6
|
61
69
|
signing_key:
|
62
70
|
specification_version: 3
|
63
71
|
summary: Full text search utilities for Ruby
|