vss 0.1.8 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.travis.yml +6 -0
- data/README.md +35 -18
- data/Rakefile +9 -3
- data/lib/vss/version.rb +1 -1
- data/test/test.rb +2 -1
- data/vss.gemspec +4 -4
- metadata +46 -78
data/.travis.yml
ADDED
data/README.md
CHANGED
@@ -1,44 +1,61 @@
|
|
1
|
-
# VSS
|
1
|
+
# VSS – Vector Space Search [](http://travis-ci.org/mkdynamic/vss)
|
2
2
|
|
3
|
-
A simple vector space search engine with
|
3
|
+
A simple vector space search engine with tf*idf ranking.
|
4
4
|
|
5
5
|
[More info, and details of how it works.](http://madeofcode.com/posts/69-vss-a-vector-space-search-engine-in-ruby)
|
6
6
|
|
7
|
-
##
|
7
|
+
## Installation
|
8
8
|
|
9
|
-
|
9
|
+
Just install the gem:
|
10
10
|
|
11
|
-
|
11
|
+
```bash
|
12
|
+
gem install vss
|
13
|
+
```
|
12
14
|
|
13
|
-
|
15
|
+
Or add to your Gemfile, if you're using Bundler:
|
14
16
|
|
15
|
-
|
17
|
+
```ruby
|
18
|
+
gem 'vss'
|
19
|
+
```
|
16
20
|
|
17
21
|
## Usage
|
18
22
|
|
19
23
|
To perform a search on a collection of documents:
|
20
24
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
+
```ruby
|
26
|
+
require "vss"
|
27
|
+
docs = ["hello", "goodbye", "hello and goodbye", "hello, hello!"]
|
28
|
+
engine = VSS::Engine.new(docs)
|
29
|
+
engine.search("hello") #=> ["hello", "hello, hello!", "hello and goodbye"]
|
30
|
+
```
|
25
31
|
|
26
32
|
## Rails/ActiveRecord
|
27
33
|
|
28
34
|
If you want to search a collection of `ActiveRecord` objects, you need to pass a **documentizer** `Proc` when initializing `VSS::Engine` which will convert the objects into documents (which are simply strings). For example:
|
29
35
|
|
30
|
-
|
31
|
-
|
32
|
-
|
36
|
+
```ruby
|
37
|
+
class Page < ActiveRecord::Base
|
38
|
+
#attrs: title, content
|
39
|
+
end
|
40
|
+
|
41
|
+
docs = Page.all
|
42
|
+
documentizer = lambda { |record| record.title + " " + record.content }
|
43
|
+
engine = VSS::Engine.new(docs, documentizer)
|
44
|
+
```
|
33
45
|
|
34
|
-
docs = Page.all
|
35
|
-
documentizer = proc { |record| record.title + " " + record.content }
|
36
|
-
engine = VSS::Engine.new(docs, documentizer)
|
37
|
-
|
38
46
|
## Notes
|
39
47
|
|
40
48
|
This isn't designed to be used on huge collections of records. The original use case was for ranking a smallish set of `ActiveRecord` results obtained via a query (using **SearchLogic**). So, essentially, the search consisted of 2 stages; getting the *corpus* via a SQL query, then doing the VSS on that.
|
41
49
|
|
50
|
+
## Ruby
|
51
|
+
|
52
|
+
Tested with the following Ruby versions:
|
53
|
+
|
54
|
+
- MRI 1.9.2
|
55
|
+
- MRI 1.8.7
|
56
|
+
|
57
|
+
Probably works on JRuby ~> 1.6 too, but not actively tested.
|
58
|
+
|
42
59
|
## Credits
|
43
60
|
|
44
61
|
Heavily inspired by [Joesph Wilk's article on building a vector space search engine in Python](http://blog.josephwilk.net/projects/building-a-vector-space-search-engine-in-python.html).
|
data/Rakefile
CHANGED
@@ -1,4 +1,10 @@
|
|
1
|
-
require
|
2
|
-
require
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
require 'rake/testtask'
|
3
3
|
|
4
|
-
|
4
|
+
Rake::TestTask.new do |t|
|
5
|
+
t.libs << 'test'
|
6
|
+
t.test_files = Dir['test/*test.rb']
|
7
|
+
t.verbose = true
|
8
|
+
end
|
9
|
+
|
10
|
+
task :default => :test
|
data/lib/vss/version.rb
CHANGED
data/test/test.rb
CHANGED
data/vss.gemspec
CHANGED
@@ -1,9 +1,9 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
1
2
|
require File.expand_path('../lib/vss/version', __FILE__)
|
2
3
|
|
3
4
|
Gem::Specification.new do |s|
|
4
5
|
s.name = "vss"
|
5
6
|
s.version = VSS::VERSION
|
6
|
-
s.platform = Gem::Platform::RUBY
|
7
7
|
s.authors = ["Mark Dodwell"]
|
8
8
|
s.email = ["labs@mkdynamic.co.uk"]
|
9
9
|
s.homepage = "https://github.com/mkdynamic/vss"
|
@@ -12,11 +12,11 @@ Gem::Specification.new do |s|
|
|
12
12
|
|
13
13
|
s.required_ruby_version = ">= 1.8.7"
|
14
14
|
s.add_development_dependency "bundler"
|
15
|
-
s.add_development_dependency "rake"
|
16
|
-
s.add_runtime_dependency "stemmer"
|
15
|
+
s.add_development_dependency "rake"
|
16
|
+
s.add_runtime_dependency "stemmer", "~> 1.0.0"
|
17
17
|
|
18
18
|
s.files = `git ls-files`.split("\n")
|
19
19
|
s.test_files = `git ls-files -- test/*`.split("\n")
|
20
|
-
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
20
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
|
21
21
|
s.require_paths = ["lib"]
|
22
22
|
end
|
metadata
CHANGED
@@ -1,78 +1,58 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: vss
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 1
|
9
|
-
- 8
|
10
|
-
version: 0.1.8
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.9
|
5
|
+
prerelease:
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- Mark Dodwell
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
version_requirements: &id001 !ruby/object:Gem::Requirement
|
12
|
+
date: 2011-10-16 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: bundler
|
16
|
+
requirement: &2155044600 !ruby/object:Gem::Requirement
|
23
17
|
none: false
|
24
|
-
requirements:
|
25
|
-
- -
|
26
|
-
- !ruby/object:Gem::Version
|
27
|
-
|
28
|
-
segments:
|
29
|
-
- 0
|
30
|
-
version: "0"
|
31
|
-
requirement: *id001
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
32
22
|
type: :development
|
33
|
-
name: bundler
|
34
23
|
prerelease: false
|
35
|
-
|
36
|
-
|
24
|
+
version_requirements: *2155044600
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: rake
|
27
|
+
requirement: &2155043860 !ruby/object:Gem::Requirement
|
37
28
|
none: false
|
38
|
-
requirements:
|
39
|
-
- -
|
40
|
-
- !ruby/object:Gem::Version
|
41
|
-
|
42
|
-
segments:
|
43
|
-
- 0
|
44
|
-
- 8
|
45
|
-
- 7
|
46
|
-
version: 0.8.7
|
47
|
-
requirement: *id002
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
48
33
|
type: :development
|
49
|
-
name: rake
|
50
34
|
prerelease: false
|
51
|
-
|
52
|
-
|
35
|
+
version_requirements: *2155043860
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: stemmer
|
38
|
+
requirement: &2155043020 !ruby/object:Gem::Requirement
|
53
39
|
none: false
|
54
|
-
requirements:
|
55
|
-
- -
|
56
|
-
- !ruby/object:Gem::Version
|
57
|
-
|
58
|
-
segments:
|
59
|
-
- 0
|
60
|
-
version: "0"
|
61
|
-
requirement: *id003
|
40
|
+
requirements:
|
41
|
+
- - ~>
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 1.0.0
|
62
44
|
type: :runtime
|
63
|
-
name: stemmer
|
64
45
|
prerelease: false
|
46
|
+
version_requirements: *2155043020
|
65
47
|
description: A simple vector space search engine with tf*idf ranking.
|
66
|
-
email:
|
48
|
+
email:
|
67
49
|
- labs@mkdynamic.co.uk
|
68
50
|
executables: []
|
69
|
-
|
70
51
|
extensions: []
|
71
|
-
|
72
52
|
extra_rdoc_files: []
|
73
|
-
|
74
|
-
files:
|
53
|
+
files:
|
75
54
|
- .gitignore
|
55
|
+
- .travis.yml
|
76
56
|
- Gemfile
|
77
57
|
- LICENSE
|
78
58
|
- README.md
|
@@ -83,41 +63,29 @@ files:
|
|
83
63
|
- lib/vss/version.rb
|
84
64
|
- test/test.rb
|
85
65
|
- vss.gemspec
|
86
|
-
has_rdoc: true
|
87
66
|
homepage: https://github.com/mkdynamic/vss
|
88
67
|
licenses: []
|
89
|
-
|
90
68
|
post_install_message:
|
91
69
|
rdoc_options: []
|
92
|
-
|
93
|
-
require_paths:
|
70
|
+
require_paths:
|
94
71
|
- lib
|
95
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
72
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
96
73
|
none: false
|
97
|
-
requirements:
|
98
|
-
- -
|
99
|
-
- !ruby/object:Gem::Version
|
100
|
-
hash: 57
|
101
|
-
segments:
|
102
|
-
- 1
|
103
|
-
- 8
|
104
|
-
- 7
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
105
77
|
version: 1.8.7
|
106
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
78
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
107
79
|
none: false
|
108
|
-
requirements:
|
109
|
-
- -
|
110
|
-
- !ruby/object:Gem::Version
|
111
|
-
|
112
|
-
segments:
|
113
|
-
- 0
|
114
|
-
version: "0"
|
80
|
+
requirements:
|
81
|
+
- - ! '>='
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: '0'
|
115
84
|
requirements: []
|
116
|
-
|
117
85
|
rubyforge_project:
|
118
|
-
rubygems_version: 1.
|
86
|
+
rubygems_version: 1.8.10
|
119
87
|
signing_key:
|
120
88
|
specification_version: 3
|
121
89
|
summary: Vector Space Search
|
122
|
-
test_files:
|
90
|
+
test_files:
|
123
91
|
- test/test.rb
|