doc_rank 0.0.0.alpha.2 → 0.0.0.alpha.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d561d37c85fb35101c071fb73814f786bac64b0f
4
- data.tar.gz: e22786ab6c7d58ba17a1ec3f17fad2fecc4a7e9a
3
+ metadata.gz: 78aa3f24fc74f25238b5e0888027a4d3debd9b44
4
+ data.tar.gz: 989694cd61a85196089db8d8a6c3a2977ad14cb7
5
5
  SHA512:
6
- metadata.gz: d9a15a757324b53834c47431da9d7c165ef550ad9adfa71cca45c35c62ce03cba9403b69ca59481ad46e620258bbe3e19cb8a35e8cd36db19bac92508c84ed32
7
- data.tar.gz: 0d1fe0b9b4fa900657b2429eb7244221e2b54a7df53f7e3572be8a2b40fdf5fb3b66d3801578915dc5a3a94a1778306ef840e121dffaf7a9e3ad628f987f8ecd
6
+ metadata.gz: e1536fe620ca05693665e1524863cfcaaa5cd99d61310641ea196b5580ecd6816d707c0d55e0a22b6a47e44cf708066dad0549684135bcb93a64809bdb57aa59
7
+ data.tar.gz: c0a0a7b0d9307d51c26aef1c1fe74de3c9de7d4231f6249df076cf9d069a3b1b4036377d30b884fbba372953a597d8de98943819c4844ed5c1e5f78e50620d95
data/CHANGELOG.md CHANGED
@@ -6,6 +6,15 @@ For more information about changelogs, check
6
6
  [Keep a Changelog](http://keepachangelog.com) and
7
7
  [Vandamme](http://tech-angels.github.io/vandamme).
8
8
 
9
+ ## 0.0.0.alpha.3 - 2017
10
+
11
+ * [FEATURE] Add `DocRank::Ranker#base_doc`
12
+ * [FEATURE] Add `DocRank::Ranker#target_docs`
13
+ * [FEATURE] Add `DocRank::Ranker#scores`
14
+ * [FEATURE] Add `DocRank::Ranker#weighted_scores`
15
+ * [FEATURE] Change `DocRank::Document#keywords to DocRank::Document#weighted_keywords`
16
+ * [FEATURE] Remove `DocRank.compare`
17
+
9
18
  ## 0.0.0.alpha.2 - 2017/09/07
10
19
 
11
20
  * [FEATURE] Add `DocRank.compare`
data/doc_rank.gemspec CHANGED
@@ -13,6 +13,8 @@ Gem::Specification.new do |spec|
13
13
  spec.homepage = "https://github.com/S1v4/doc_rank"
14
14
  spec.license = "MIT"
15
15
 
16
+ spec.required_ruby_version = '>= 2.4'
17
+
16
18
  spec.files = `git ls-files -z`.split("\x0").reject do |f|
17
19
  f.match(%r{^(test|spec|features)/})
18
20
  end
@@ -20,13 +22,12 @@ Gem::Specification.new do |spec|
20
22
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
21
23
  spec.require_paths = ["lib"]
22
24
 
25
+ spec.add_runtime_dependency "highscore", "~> 1.2.1"
26
+ spec.add_runtime_dependency "yomu", "~> 0.2.4"
23
27
  spec.add_development_dependency "yard", "~> 0.9.9"
24
- spec.add_development_dependency "highscore", "~> 1.2.1"
25
- spec.add_development_dependency "yomu", "~> 0.2.4"
26
28
  spec.add_development_dependency "bundler", "~> 1.15"
27
29
  spec.add_development_dependency "rake", "~> 10.0"
28
30
  spec.add_development_dependency "rspec", "~> 3.0"
29
31
  spec.add_development_dependency "coveralls"
30
- spec.add_development_dependency "pry"
31
32
  spec.add_development_dependency "pry-nav"
32
33
  end
data/lib/doc_rank.rb CHANGED
@@ -1,17 +1,6 @@
1
1
  require "doc_rank/version"
2
- require "doc_rank/document"
2
+ require "doc_rank/ranker"
3
3
 
4
4
  # A gem to rank document similarities.
5
5
  module DocRank
6
- # @return [Hash] a hash of document names mapped to their score.
7
- # @param [String] base the file by which target documents are scored againsts.
8
- # @param [Array<String>] targets a single file or many files used for comparison.
9
- def self.compare(base, targets)
10
- base_keywords = DocRank::Document.new(base).keywords.map(&:text)
11
- Array(targets).map do |target|
12
- doc = DocRank::Document.new target
13
- matches = base_keywords & doc.keywords.map(&:text)
14
- [doc.name, matches.size]
15
- end.sort_by{|doc| doc[1]}.to_h
16
- end
17
6
  end
@@ -4,21 +4,23 @@ require 'highscore'
4
4
  # A gem to rank document similarities.
5
5
  module DocRank
6
6
  # Document holds the contents of a document (.pdf, .docx, .txt) and has methods:
7
- # name, text, and keywords.
7
+ # name, text, and weighted_keywords.
8
8
  class Document
9
9
 
10
10
  # [String] the name of the document.
11
11
  attr_reader :name
12
12
  # [String] the text content of the document.
13
13
  attr_reader :text
14
- # [Highscore::Keyword] a collection of keyword objects.
15
- attr_reader :keywords
14
+ # [Hash] a collection of keywords mapped to their weight.
15
+ attr_reader :weighted_keywords
16
16
 
17
17
  # @param [String] file The path to the file.
18
18
  def initialize(file_path)
19
19
  @text = Yomu.new(file_path).text
20
20
  @name = File.basename file_path
21
- @keywords = Highscore::Content.new(@text).keywords
21
+ @weighted_keywords = Highscore::Content.new(@text).keywords.map do |kw|
22
+ [kw.text, kw.weight]
23
+ end.to_h
22
24
  end
23
25
  end
24
26
  end
@@ -0,0 +1,54 @@
1
+ require "doc_rank/document"
2
+
3
+ # A gem to rank document similarities.
4
+ module DocRank
5
+ # Ranker ranks documents by keywords and stores them in: scores and weighted_scores.
6
+ class Ranker
7
+
8
+ # [DocRank::Document] the base document.
9
+ attr_reader :base_doc
10
+ # [Array<DocRank::Document>] the target documents.
11
+ attr_reader :target_docs
12
+
13
+ # @param [String] base the file by which target documents are scored againsts.
14
+ # @param [Array<String>] targets a single file or many files used for comparison.
15
+ def initialize(base, targets)
16
+ @base_doc = DocRank::Document.new base
17
+ @base_kws = @base_doc.weighted_keywords
18
+ @target_docs = Array(targets).map {|target| DocRank::Document.new target }
19
+ end
20
+
21
+ # Rank the target documents based on any keyword matches with the base
22
+ # document regardless of weight.
23
+ # @return [Hash] a hash of document names mapped to their matches.
24
+ def scores
25
+ @score ||= rank match_keywords
26
+ end
27
+
28
+ # Rank the target documents based on any keyword matches with the base
29
+ # document by summing the weights of any match.
30
+ # @return [Hash] a hash of document names mapped to their scores.
31
+ def weighted_scores
32
+ @weighted_scores ||= rank match_keywords(weighted: true)
33
+ end
34
+
35
+ private
36
+
37
+ def match_keywords(options = {})
38
+ @target_docs.map do |target|
39
+ target_kws = target.weighted_keywords
40
+ matches = @base_kws.keys & target_kws.keys
41
+ score = options[:weighted] ? sum_weights(matches, target_kws) : matches.count
42
+ [target.name, score]
43
+ end
44
+ end
45
+
46
+ def sum_weights(matches, target)
47
+ matches.sum {|match| @base_kws[match] + target[match]}
48
+ end
49
+
50
+ def rank(docs)
51
+ docs.sort_by{|doc| doc[1]}.to_h
52
+ end
53
+ end
54
+ end
@@ -1,4 +1,4 @@
1
1
  module DocRank
2
2
  #DockRank SemVer-compatible gem version.
3
- VERSION = "0.0.0.alpha.2"
3
+ VERSION = "0.0.0.alpha.3"
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: doc_rank
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.0.alpha.2
4
+ version: 0.0.0.alpha.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - s1v4
@@ -9,50 +9,50 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2017-09-10 00:00:00.000000000 Z
12
+ date: 2017-09-15 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
- name: yard
15
+ name: highscore
16
16
  requirement: !ruby/object:Gem::Requirement
17
17
  requirements:
18
18
  - - "~>"
19
19
  - !ruby/object:Gem::Version
20
- version: 0.9.9
21
- type: :development
20
+ version: 1.2.1
21
+ type: :runtime
22
22
  prerelease: false
23
23
  version_requirements: !ruby/object:Gem::Requirement
24
24
  requirements:
25
25
  - - "~>"
26
26
  - !ruby/object:Gem::Version
27
- version: 0.9.9
27
+ version: 1.2.1
28
28
  - !ruby/object:Gem::Dependency
29
- name: highscore
29
+ name: yomu
30
30
  requirement: !ruby/object:Gem::Requirement
31
31
  requirements:
32
32
  - - "~>"
33
33
  - !ruby/object:Gem::Version
34
- version: 1.2.1
35
- type: :development
34
+ version: 0.2.4
35
+ type: :runtime
36
36
  prerelease: false
37
37
  version_requirements: !ruby/object:Gem::Requirement
38
38
  requirements:
39
39
  - - "~>"
40
40
  - !ruby/object:Gem::Version
41
- version: 1.2.1
41
+ version: 0.2.4
42
42
  - !ruby/object:Gem::Dependency
43
- name: yomu
43
+ name: yard
44
44
  requirement: !ruby/object:Gem::Requirement
45
45
  requirements:
46
46
  - - "~>"
47
47
  - !ruby/object:Gem::Version
48
- version: 0.2.4
48
+ version: 0.9.9
49
49
  type: :development
50
50
  prerelease: false
51
51
  version_requirements: !ruby/object:Gem::Requirement
52
52
  requirements:
53
53
  - - "~>"
54
54
  - !ruby/object:Gem::Version
55
- version: 0.2.4
55
+ version: 0.9.9
56
56
  - !ruby/object:Gem::Dependency
57
57
  name: bundler
58
58
  requirement: !ruby/object:Gem::Requirement
@@ -109,20 +109,6 @@ dependencies:
109
109
  - - ">="
110
110
  - !ruby/object:Gem::Version
111
111
  version: '0'
112
- - !ruby/object:Gem::Dependency
113
- name: pry
114
- requirement: !ruby/object:Gem::Requirement
115
- requirements:
116
- - - ">="
117
- - !ruby/object:Gem::Version
118
- version: '0'
119
- type: :development
120
- prerelease: false
121
- version_requirements: !ruby/object:Gem::Requirement
122
- requirements:
123
- - - ">="
124
- - !ruby/object:Gem::Version
125
- version: '0'
126
112
  - !ruby/object:Gem::Dependency
127
113
  name: pry-nav
128
114
  requirement: !ruby/object:Gem::Requirement
@@ -158,6 +144,7 @@ files:
158
144
  - doc_rank.gemspec
159
145
  - lib/doc_rank.rb
160
146
  - lib/doc_rank/document.rb
147
+ - lib/doc_rank/ranker.rb
161
148
  - lib/doc_rank/version.rb
162
149
  homepage: https://github.com/S1v4/doc_rank
163
150
  licenses:
@@ -171,7 +158,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
171
158
  requirements:
172
159
  - - ">="
173
160
  - !ruby/object:Gem::Version
174
- version: '0'
161
+ version: '2.4'
175
162
  required_rubygems_version: !ruby/object:Gem::Requirement
176
163
  requirements:
177
164
  - - ">"