doc_rank 0.0.0.alpha.2 → 0.0.0.alpha.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/doc_rank.gemspec +4 -3
- data/lib/doc_rank.rb +1 -12
- data/lib/doc_rank/document.rb +6 -4
- data/lib/doc_rank/ranker.rb +54 -0
- data/lib/doc_rank/version.rb +1 -1
- metadata +15 -28
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 78aa3f24fc74f25238b5e0888027a4d3debd9b44
|
4
|
+
data.tar.gz: 989694cd61a85196089db8d8a6c3a2977ad14cb7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e1536fe620ca05693665e1524863cfcaaa5cd99d61310641ea196b5580ecd6816d707c0d55e0a22b6a47e44cf708066dad0549684135bcb93a64809bdb57aa59
|
7
|
+
data.tar.gz: c0a0a7b0d9307d51c26aef1c1fe74de3c9de7d4231f6249df076cf9d069a3b1b4036377d30b884fbba372953a597d8de98943819c4844ed5c1e5f78e50620d95
|
data/CHANGELOG.md
CHANGED
@@ -6,6 +6,15 @@ For more information about changelogs, check
|
|
6
6
|
[Keep a Changelog](http://keepachangelog.com) and
|
7
7
|
[Vandamme](http://tech-angels.github.io/vandamme).
|
8
8
|
|
9
|
+
## 0.0.0.alpha.3 - 2017
|
10
|
+
|
11
|
+
* [FEATURE] Add `DocRank::Ranker#base_doc`
|
12
|
+
* [FEATURE] Add `DocRank::Ranker#target_docs`
|
13
|
+
* [FEATURE] Add `DocRank::Ranker#scores`
|
14
|
+
* [FEATURE] Add `DocRank::Ranker#weighted_scores`
|
15
|
+
* [FEATURE] Change `DocRank::Document#keywords to DocRank::Document#weighted_keywords`
|
16
|
+
* [FEATURE] Remove `DocRank.compare`
|
17
|
+
|
9
18
|
## 0.0.0.alpha.2 - 2017/09/07
|
10
19
|
|
11
20
|
* [FEATURE] Add `DocRank.compare`
|
data/doc_rank.gemspec
CHANGED
@@ -13,6 +13,8 @@ Gem::Specification.new do |spec|
|
|
13
13
|
spec.homepage = "https://github.com/S1v4/doc_rank"
|
14
14
|
spec.license = "MIT"
|
15
15
|
|
16
|
+
spec.required_ruby_version = '>= 2.4'
|
17
|
+
|
16
18
|
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
17
19
|
f.match(%r{^(test|spec|features)/})
|
18
20
|
end
|
@@ -20,13 +22,12 @@ Gem::Specification.new do |spec|
|
|
20
22
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
21
23
|
spec.require_paths = ["lib"]
|
22
24
|
|
25
|
+
spec.add_runtime_dependency "highscore", "~> 1.2.1"
|
26
|
+
spec.add_runtime_dependency "yomu", "~> 0.2.4"
|
23
27
|
spec.add_development_dependency "yard", "~> 0.9.9"
|
24
|
-
spec.add_development_dependency "highscore", "~> 1.2.1"
|
25
|
-
spec.add_development_dependency "yomu", "~> 0.2.4"
|
26
28
|
spec.add_development_dependency "bundler", "~> 1.15"
|
27
29
|
spec.add_development_dependency "rake", "~> 10.0"
|
28
30
|
spec.add_development_dependency "rspec", "~> 3.0"
|
29
31
|
spec.add_development_dependency "coveralls"
|
30
|
-
spec.add_development_dependency "pry"
|
31
32
|
spec.add_development_dependency "pry-nav"
|
32
33
|
end
|
data/lib/doc_rank.rb
CHANGED
@@ -1,17 +1,6 @@
|
|
1
1
|
require "doc_rank/version"
|
2
|
-
require "doc_rank/
|
2
|
+
require "doc_rank/ranker"
|
3
3
|
|
4
4
|
# A gem to rank document similarities.
|
5
5
|
module DocRank
|
6
|
-
# @return [Hash] a hash of document names mapped to their score.
|
7
|
-
# @param [String] base the file by which target documents are scored againsts.
|
8
|
-
# @param [Array<String>] targets a single file or many files used for comparison.
|
9
|
-
def self.compare(base, targets)
|
10
|
-
base_keywords = DocRank::Document.new(base).keywords.map(&:text)
|
11
|
-
Array(targets).map do |target|
|
12
|
-
doc = DocRank::Document.new target
|
13
|
-
matches = base_keywords & doc.keywords.map(&:text)
|
14
|
-
[doc.name, matches.size]
|
15
|
-
end.sort_by{|doc| doc[1]}.to_h
|
16
|
-
end
|
17
6
|
end
|
data/lib/doc_rank/document.rb
CHANGED
@@ -4,21 +4,23 @@ require 'highscore'
|
|
4
4
|
# A gem to rank document similarities.
|
5
5
|
module DocRank
|
6
6
|
# Document holds the contents of a document (.pdf, .docx, .txt) and has methods:
|
7
|
-
# name, text, and
|
7
|
+
# name, text, and weighted_keywords.
|
8
8
|
class Document
|
9
9
|
|
10
10
|
# [String] the name of the document.
|
11
11
|
attr_reader :name
|
12
12
|
# [String] the text content of the document.
|
13
13
|
attr_reader :text
|
14
|
-
# [
|
15
|
-
attr_reader :
|
14
|
+
# [Hash] a collection of keywords mapped to their weight.
|
15
|
+
attr_reader :weighted_keywords
|
16
16
|
|
17
17
|
# @param [String] file The path to the file.
|
18
18
|
def initialize(file_path)
|
19
19
|
@text = Yomu.new(file_path).text
|
20
20
|
@name = File.basename file_path
|
21
|
-
@
|
21
|
+
@weighted_keywords = Highscore::Content.new(@text).keywords.map do |kw|
|
22
|
+
[kw.text, kw.weight]
|
23
|
+
end.to_h
|
22
24
|
end
|
23
25
|
end
|
24
26
|
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
require "doc_rank/document"
|
2
|
+
|
3
|
+
# A gem to rank document similarities.
|
4
|
+
module DocRank
|
5
|
+
# Ranker ranks documents by keywords and stores them in: scores and weighted_scores.
|
6
|
+
class Ranker
|
7
|
+
|
8
|
+
# [DocRank::Document] the base document.
|
9
|
+
attr_reader :base_doc
|
10
|
+
# [Array<DocRank::Document>] the target documents.
|
11
|
+
attr_reader :target_docs
|
12
|
+
|
13
|
+
# @param [String] base the file by which target documents are scored againsts.
|
14
|
+
# @param [Array<String>] targets a single file or many files used for comparison.
|
15
|
+
def initialize(base, targets)
|
16
|
+
@base_doc = DocRank::Document.new base
|
17
|
+
@base_kws = @base_doc.weighted_keywords
|
18
|
+
@target_docs = Array(targets).map {|target| DocRank::Document.new target }
|
19
|
+
end
|
20
|
+
|
21
|
+
# Rank the target documents based on any keyword matches with the base
|
22
|
+
# document regardless of weight.
|
23
|
+
# @return [Hash] a hash of document names mapped to their matches.
|
24
|
+
def scores
|
25
|
+
@score ||= rank match_keywords
|
26
|
+
end
|
27
|
+
|
28
|
+
# Rank the target documents based on any keyword matches with the base
|
29
|
+
# document by summing the weights of any match.
|
30
|
+
# @return [Hash] a hash of document names mapped to their scores.
|
31
|
+
def weighted_scores
|
32
|
+
@weighted_scores ||= rank match_keywords(weighted: true)
|
33
|
+
end
|
34
|
+
|
35
|
+
private
|
36
|
+
|
37
|
+
def match_keywords(options = {})
|
38
|
+
@target_docs.map do |target|
|
39
|
+
target_kws = target.weighted_keywords
|
40
|
+
matches = @base_kws.keys & target_kws.keys
|
41
|
+
score = options[:weighted] ? sum_weights(matches, target_kws) : matches.count
|
42
|
+
[target.name, score]
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
def sum_weights(matches, target)
|
47
|
+
matches.sum {|match| @base_kws[match] + target[match]}
|
48
|
+
end
|
49
|
+
|
50
|
+
def rank(docs)
|
51
|
+
docs.sort_by{|doc| doc[1]}.to_h
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
data/lib/doc_rank/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: doc_rank
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.0.alpha.
|
4
|
+
version: 0.0.0.alpha.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- s1v4
|
@@ -9,50 +9,50 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-09-
|
12
|
+
date: 2017-09-15 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
|
-
name:
|
15
|
+
name: highscore
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
17
|
requirements:
|
18
18
|
- - "~>"
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version:
|
21
|
-
type: :
|
20
|
+
version: 1.2.1
|
21
|
+
type: :runtime
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
25
|
- - "~>"
|
26
26
|
- !ruby/object:Gem::Version
|
27
|
-
version:
|
27
|
+
version: 1.2.1
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
|
-
name:
|
29
|
+
name: yomu
|
30
30
|
requirement: !ruby/object:Gem::Requirement
|
31
31
|
requirements:
|
32
32
|
- - "~>"
|
33
33
|
- !ruby/object:Gem::Version
|
34
|
-
version:
|
35
|
-
type: :
|
34
|
+
version: 0.2.4
|
35
|
+
type: :runtime
|
36
36
|
prerelease: false
|
37
37
|
version_requirements: !ruby/object:Gem::Requirement
|
38
38
|
requirements:
|
39
39
|
- - "~>"
|
40
40
|
- !ruby/object:Gem::Version
|
41
|
-
version:
|
41
|
+
version: 0.2.4
|
42
42
|
- !ruby/object:Gem::Dependency
|
43
|
-
name:
|
43
|
+
name: yard
|
44
44
|
requirement: !ruby/object:Gem::Requirement
|
45
45
|
requirements:
|
46
46
|
- - "~>"
|
47
47
|
- !ruby/object:Gem::Version
|
48
|
-
version: 0.
|
48
|
+
version: 0.9.9
|
49
49
|
type: :development
|
50
50
|
prerelease: false
|
51
51
|
version_requirements: !ruby/object:Gem::Requirement
|
52
52
|
requirements:
|
53
53
|
- - "~>"
|
54
54
|
- !ruby/object:Gem::Version
|
55
|
-
version: 0.
|
55
|
+
version: 0.9.9
|
56
56
|
- !ruby/object:Gem::Dependency
|
57
57
|
name: bundler
|
58
58
|
requirement: !ruby/object:Gem::Requirement
|
@@ -109,20 +109,6 @@ dependencies:
|
|
109
109
|
- - ">="
|
110
110
|
- !ruby/object:Gem::Version
|
111
111
|
version: '0'
|
112
|
-
- !ruby/object:Gem::Dependency
|
113
|
-
name: pry
|
114
|
-
requirement: !ruby/object:Gem::Requirement
|
115
|
-
requirements:
|
116
|
-
- - ">="
|
117
|
-
- !ruby/object:Gem::Version
|
118
|
-
version: '0'
|
119
|
-
type: :development
|
120
|
-
prerelease: false
|
121
|
-
version_requirements: !ruby/object:Gem::Requirement
|
122
|
-
requirements:
|
123
|
-
- - ">="
|
124
|
-
- !ruby/object:Gem::Version
|
125
|
-
version: '0'
|
126
112
|
- !ruby/object:Gem::Dependency
|
127
113
|
name: pry-nav
|
128
114
|
requirement: !ruby/object:Gem::Requirement
|
@@ -158,6 +144,7 @@ files:
|
|
158
144
|
- doc_rank.gemspec
|
159
145
|
- lib/doc_rank.rb
|
160
146
|
- lib/doc_rank/document.rb
|
147
|
+
- lib/doc_rank/ranker.rb
|
161
148
|
- lib/doc_rank/version.rb
|
162
149
|
homepage: https://github.com/S1v4/doc_rank
|
163
150
|
licenses:
|
@@ -171,7 +158,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
171
158
|
requirements:
|
172
159
|
- - ">="
|
173
160
|
- !ruby/object:Gem::Version
|
174
|
-
version: '
|
161
|
+
version: '2.4'
|
175
162
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
176
163
|
requirements:
|
177
164
|
- - ">"
|