vss 0.1.8 → 0.1.9
Sign up to get free protection for your applications and to get access to all the features.
- data/.travis.yml +6 -0
- data/README.md +35 -18
- data/Rakefile +9 -3
- data/lib/vss/version.rb +1 -1
- data/test/test.rb +2 -1
- data/vss.gemspec +4 -4
- metadata +46 -78
data/.travis.yml
ADDED
data/README.md
CHANGED
@@ -1,44 +1,61 @@
|
|
1
|
-
# VSS
|
1
|
+
# VSS – Vector Space Search [![Build Status](http://travis-ci.org/mkdynamic/vss.png?branch=master)](http://travis-ci.org/mkdynamic/vss)
|
2
2
|
|
3
|
-
A simple vector space search engine with
|
3
|
+
A simple vector space search engine with tf*idf ranking.
|
4
4
|
|
5
5
|
[More info, and details of how it works.](http://madeofcode.com/posts/69-vss-a-vector-space-search-engine-in-ruby)
|
6
6
|
|
7
|
-
##
|
7
|
+
## Installation
|
8
8
|
|
9
|
-
|
9
|
+
Just install the gem:
|
10
10
|
|
11
|
-
|
11
|
+
```bash
|
12
|
+
gem install vss
|
13
|
+
```
|
12
14
|
|
13
|
-
|
15
|
+
Or add to your Gemfile, if you're using Bundler:
|
14
16
|
|
15
|
-
|
17
|
+
```ruby
|
18
|
+
gem 'vss'
|
19
|
+
```
|
16
20
|
|
17
21
|
## Usage
|
18
22
|
|
19
23
|
To perform a search on a collection of documents:
|
20
24
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
+
```ruby
|
26
|
+
require "vss"
|
27
|
+
docs = ["hello", "goodbye", "hello and goodbye", "hello, hello!"]
|
28
|
+
engine = VSS::Engine.new(docs)
|
29
|
+
engine.search("hello") #=> ["hello", "hello, hello!", "hello and goodbye"]
|
30
|
+
```
|
25
31
|
|
26
32
|
## Rails/ActiveRecord
|
27
33
|
|
28
34
|
If you want to search a collection of `ActiveRecord` objects, you need to pass a **documentizer** `Proc` when initializing `VSS::Engine` which will convert the objects into documents (which are simply strings). For example:
|
29
35
|
|
30
|
-
|
31
|
-
|
32
|
-
|
36
|
+
```ruby
|
37
|
+
class Page < ActiveRecord::Base
|
38
|
+
#attrs: title, content
|
39
|
+
end
|
40
|
+
|
41
|
+
docs = Page.all
|
42
|
+
documentizer = lambda { |record| record.title + " " + record.content }
|
43
|
+
engine = VSS::Engine.new(docs, documentizer)
|
44
|
+
```
|
33
45
|
|
34
|
-
docs = Page.all
|
35
|
-
documentizer = proc { |record| record.title + " " + record.content }
|
36
|
-
engine = VSS::Engine.new(docs, documentizer)
|
37
|
-
|
38
46
|
## Notes
|
39
47
|
|
40
48
|
This isn't designed to be used on huge collections of records. The original use case was for ranking a smallish set of `ActiveRecord` results obtained via a query (using **SearchLogic**). So, essentially, the search consisted of 2 stages; getting the *corpus* via a SQL query, then doing the VSS on that.
|
41
49
|
|
50
|
+
## Ruby
|
51
|
+
|
52
|
+
Tested with the following Ruby versions:
|
53
|
+
|
54
|
+
- MRI 1.9.2
|
55
|
+
- MRI 1.8.7
|
56
|
+
|
57
|
+
Probably works on JRuby ~> 1.6 too, but not actively tested.
|
58
|
+
|
42
59
|
## Credits
|
43
60
|
|
44
61
|
Heavily inspired by [Joesph Wilk's article on building a vector space search engine in Python](http://blog.josephwilk.net/projects/building-a-vector-space-search-engine-in-python.html).
|
data/Rakefile
CHANGED
@@ -1,4 +1,10 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
require 'rake/testtask'
|
3
3
|
|
4
|
-
|
4
|
+
Rake::TestTask.new do |t|
|
5
|
+
t.libs << 'test'
|
6
|
+
t.test_files = Dir['test/*test.rb']
|
7
|
+
t.verbose = true
|
8
|
+
end
|
9
|
+
|
10
|
+
task :default => :test
|
data/lib/vss/version.rb
CHANGED
data/test/test.rb
CHANGED
data/vss.gemspec
CHANGED
@@ -1,9 +1,9 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
1
2
|
require File.expand_path('../lib/vss/version', __FILE__)
|
2
3
|
|
3
4
|
Gem::Specification.new do |s|
|
4
5
|
s.name = "vss"
|
5
6
|
s.version = VSS::VERSION
|
6
|
-
s.platform = Gem::Platform::RUBY
|
7
7
|
s.authors = ["Mark Dodwell"]
|
8
8
|
s.email = ["labs@mkdynamic.co.uk"]
|
9
9
|
s.homepage = "https://github.com/mkdynamic/vss"
|
@@ -12,11 +12,11 @@ Gem::Specification.new do |s|
|
|
12
12
|
|
13
13
|
s.required_ruby_version = ">= 1.8.7"
|
14
14
|
s.add_development_dependency "bundler"
|
15
|
-
s.add_development_dependency "rake"
|
16
|
-
s.add_runtime_dependency "stemmer"
|
15
|
+
s.add_development_dependency "rake"
|
16
|
+
s.add_runtime_dependency "stemmer", "~> 1.0.0"
|
17
17
|
|
18
18
|
s.files = `git ls-files`.split("\n")
|
19
19
|
s.test_files = `git ls-files -- test/*`.split("\n")
|
20
|
-
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
20
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
|
21
21
|
s.require_paths = ["lib"]
|
22
22
|
end
|
metadata
CHANGED
@@ -1,78 +1,58 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: vss
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 1
|
9
|
-
- 8
|
10
|
-
version: 0.1.8
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.9
|
5
|
+
prerelease:
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Mark Dodwell
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
version_requirements: &id001 !ruby/object:Gem::Requirement
|
12
|
+
date: 2011-10-16 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bundler
|
16
|
+
requirement: &2155044600 !ruby/object:Gem::Requirement
|
23
17
|
none: false
|
24
|
-
requirements:
|
25
|
-
- -
|
26
|
-
- !ruby/object:Gem::Version
|
27
|
-
|
28
|
-
segments:
|
29
|
-
- 0
|
30
|
-
version: "0"
|
31
|
-
requirement: *id001
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
32
22
|
type: :development
|
33
|
-
name: bundler
|
34
23
|
prerelease: false
|
35
|
-
|
36
|
-
|
24
|
+
version_requirements: *2155044600
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: rake
|
27
|
+
requirement: &2155043860 !ruby/object:Gem::Requirement
|
37
28
|
none: false
|
38
|
-
requirements:
|
39
|
-
- -
|
40
|
-
- !ruby/object:Gem::Version
|
41
|
-
|
42
|
-
segments:
|
43
|
-
- 0
|
44
|
-
- 8
|
45
|
-
- 7
|
46
|
-
version: 0.8.7
|
47
|
-
requirement: *id002
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
48
33
|
type: :development
|
49
|
-
name: rake
|
50
34
|
prerelease: false
|
51
|
-
|
52
|
-
|
35
|
+
version_requirements: *2155043860
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: stemmer
|
38
|
+
requirement: &2155043020 !ruby/object:Gem::Requirement
|
53
39
|
none: false
|
54
|
-
requirements:
|
55
|
-
- -
|
56
|
-
- !ruby/object:Gem::Version
|
57
|
-
|
58
|
-
segments:
|
59
|
-
- 0
|
60
|
-
version: "0"
|
61
|
-
requirement: *id003
|
40
|
+
requirements:
|
41
|
+
- - ~>
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 1.0.0
|
62
44
|
type: :runtime
|
63
|
-
name: stemmer
|
64
45
|
prerelease: false
|
46
|
+
version_requirements: *2155043020
|
65
47
|
description: A simple vector space search engine with tf*idf ranking.
|
66
|
-
email:
|
48
|
+
email:
|
67
49
|
- labs@mkdynamic.co.uk
|
68
50
|
executables: []
|
69
|
-
|
70
51
|
extensions: []
|
71
|
-
|
72
52
|
extra_rdoc_files: []
|
73
|
-
|
74
|
-
files:
|
53
|
+
files:
|
75
54
|
- .gitignore
|
55
|
+
- .travis.yml
|
76
56
|
- Gemfile
|
77
57
|
- LICENSE
|
78
58
|
- README.md
|
@@ -83,41 +63,29 @@ files:
|
|
83
63
|
- lib/vss/version.rb
|
84
64
|
- test/test.rb
|
85
65
|
- vss.gemspec
|
86
|
-
has_rdoc: true
|
87
66
|
homepage: https://github.com/mkdynamic/vss
|
88
67
|
licenses: []
|
89
|
-
|
90
68
|
post_install_message:
|
91
69
|
rdoc_options: []
|
92
|
-
|
93
|
-
require_paths:
|
70
|
+
require_paths:
|
94
71
|
- lib
|
95
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
72
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
96
73
|
none: false
|
97
|
-
requirements:
|
98
|
-
- -
|
99
|
-
- !ruby/object:Gem::Version
|
100
|
-
hash: 57
|
101
|
-
segments:
|
102
|
-
- 1
|
103
|
-
- 8
|
104
|
-
- 7
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
105
77
|
version: 1.8.7
|
106
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
78
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
107
79
|
none: false
|
108
|
-
requirements:
|
109
|
-
- -
|
110
|
-
- !ruby/object:Gem::Version
|
111
|
-
|
112
|
-
segments:
|
113
|
-
- 0
|
114
|
-
version: "0"
|
80
|
+
requirements:
|
81
|
+
- - ! '>='
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0'
|
115
84
|
requirements: []
|
116
|
-
|
117
85
|
rubyforge_project:
|
118
|
-
rubygems_version: 1.
|
86
|
+
rubygems_version: 1.8.10
|
119
87
|
signing_key:
|
120
88
|
specification_version: 3
|
121
89
|
summary: Vector Space Search
|
122
|
-
test_files:
|
90
|
+
test_files:
|
123
91
|
- test/test.rb
|