doc_rank 0.0.0.alpha.2 → 0.0.0.alpha.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d561d37c85fb35101c071fb73814f786bac64b0f
4
- data.tar.gz: e22786ab6c7d58ba17a1ec3f17fad2fecc4a7e9a
3
+ metadata.gz: 78aa3f24fc74f25238b5e0888027a4d3debd9b44
4
+ data.tar.gz: 989694cd61a85196089db8d8a6c3a2977ad14cb7
5
5
  SHA512:
6
- metadata.gz: d9a15a757324b53834c47431da9d7c165ef550ad9adfa71cca45c35c62ce03cba9403b69ca59481ad46e620258bbe3e19cb8a35e8cd36db19bac92508c84ed32
7
- data.tar.gz: 0d1fe0b9b4fa900657b2429eb7244221e2b54a7df53f7e3572be8a2b40fdf5fb3b66d3801578915dc5a3a94a1778306ef840e121dffaf7a9e3ad628f987f8ecd
6
+ metadata.gz: e1536fe620ca05693665e1524863cfcaaa5cd99d61310641ea196b5580ecd6816d707c0d55e0a22b6a47e44cf708066dad0549684135bcb93a64809bdb57aa59
7
+ data.tar.gz: c0a0a7b0d9307d51c26aef1c1fe74de3c9de7d4231f6249df076cf9d069a3b1b4036377d30b884fbba372953a597d8de98943819c4844ed5c1e5f78e50620d95
data/CHANGELOG.md CHANGED
@@ -6,6 +6,15 @@ For more information about changelogs, check
6
6
  [Keep a Changelog](http://keepachangelog.com) and
7
7
  [Vandamme](http://tech-angels.github.io/vandamme).
8
8
 
9
+ ## 0.0.0.alpha.3 - 2017
10
+
11
+ * [FEATURE] Add `DocRank::Ranker#base_doc`
12
+ * [FEATURE] Add `DocRank::Ranker#target_docs`
13
+ * [FEATURE] Add `DocRank::Ranker#scores`
14
+ * [FEATURE] Add `DocRank::Ranker#weighted_scores`
15
+ * [FEATURE] Change `DocRank::Document#keywords to DocRank::Document#weighted_keywords`
16
+ * [FEATURE] Remove `DocRank.compare`
17
+
9
18
  ## 0.0.0.alpha.2 - 2017/09/07
10
19
 
11
20
  * [FEATURE] Add `DocRank.compare`
data/doc_rank.gemspec CHANGED
@@ -13,6 +13,8 @@ Gem::Specification.new do |spec|
13
13
  spec.homepage = "https://github.com/S1v4/doc_rank"
14
14
  spec.license = "MIT"
15
15
 
16
+ spec.required_ruby_version = '>= 2.4'
17
+
16
18
  spec.files = `git ls-files -z`.split("\x0").reject do |f|
17
19
  f.match(%r{^(test|spec|features)/})
18
20
  end
@@ -20,13 +22,12 @@ Gem::Specification.new do |spec|
20
22
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
21
23
  spec.require_paths = ["lib"]
22
24
 
25
+ spec.add_runtime_dependency "highscore", "~> 1.2.1"
26
+ spec.add_runtime_dependency "yomu", "~> 0.2.4"
23
27
  spec.add_development_dependency "yard", "~> 0.9.9"
24
- spec.add_development_dependency "highscore", "~> 1.2.1"
25
- spec.add_development_dependency "yomu", "~> 0.2.4"
26
28
  spec.add_development_dependency "bundler", "~> 1.15"
27
29
  spec.add_development_dependency "rake", "~> 10.0"
28
30
  spec.add_development_dependency "rspec", "~> 3.0"
29
31
  spec.add_development_dependency "coveralls"
30
- spec.add_development_dependency "pry"
31
32
  spec.add_development_dependency "pry-nav"
32
33
  end
data/lib/doc_rank.rb CHANGED
@@ -1,17 +1,6 @@
1
1
  require "doc_rank/version"
2
- require "doc_rank/document"
2
+ require "doc_rank/ranker"
3
3
 
4
4
  # A gem to rank document similarities.
5
5
  module DocRank
6
- # @return [Hash] a hash of document names mapped to their score.
7
- # @param [String] base the file by which target documents are scored againsts.
8
- # @param [Array<String>] targets a single file or many files used for comparison.
9
- def self.compare(base, targets)
10
- base_keywords = DocRank::Document.new(base).keywords.map(&:text)
11
- Array(targets).map do |target|
12
- doc = DocRank::Document.new target
13
- matches = base_keywords & doc.keywords.map(&:text)
14
- [doc.name, matches.size]
15
- end.sort_by{|doc| doc[1]}.to_h
16
- end
17
6
  end
@@ -4,21 +4,23 @@ require 'highscore'
4
4
  # A gem to rank document similarities.
5
5
  module DocRank
6
6
  # Document holds the contents of a document (.pdf, .docx, .txt) and has methods:
7
- # name, text, and keywords.
7
+ # name, text, and weighted_keywords.
8
8
  class Document
9
9
 
10
10
  # [String] the name of the document.
11
11
  attr_reader :name
12
12
  # [String] the text content of the document.
13
13
  attr_reader :text
14
- # [Highscore::Keyword] a collection of keyword objects.
15
- attr_reader :keywords
14
+ # [Hash] a collection of keywords mapped to their weight.
15
+ attr_reader :weighted_keywords
16
16
 
17
17
  # @param [String] file The path to the file.
18
18
  def initialize(file_path)
19
19
  @text = Yomu.new(file_path).text
20
20
  @name = File.basename file_path
21
- @keywords = Highscore::Content.new(@text).keywords
21
+ @weighted_keywords = Highscore::Content.new(@text).keywords.map do |kw|
22
+ [kw.text, kw.weight]
23
+ end.to_h
22
24
  end
23
25
  end
24
26
  end
@@ -0,0 +1,54 @@
1
+ require "doc_rank/document"
2
+
3
+ # A gem to rank document similarities.
4
+ module DocRank
5
+ # Ranker ranks documents by keywords and stores them in: scores and weighted_scores.
6
+ class Ranker
7
+
8
+ # [DocRank::Document] the base document.
9
+ attr_reader :base_doc
10
+ # [Array<DocRank::Document>] the target documents.
11
+ attr_reader :target_docs
12
+
13
+ # @param [String] base the file by which target documents are scored againsts.
14
+ # @param [Array<String>] targets a single file or many files used for comparison.
15
+ def initialize(base, targets)
16
+ @base_doc = DocRank::Document.new base
17
+ @base_kws = @base_doc.weighted_keywords
18
+ @target_docs = Array(targets).map {|target| DocRank::Document.new target }
19
+ end
20
+
21
+ # Rank the target documents based on any keyword matches with the base
22
+ # document regardless of weight.
23
+ # @return [Hash] a hash of document names mapped to their matches.
24
+ def scores
25
+ @score ||= rank match_keywords
26
+ end
27
+
28
+ # Rank the target documents based on any keyword matches with the base
29
+ # document by summing the weights of any match.
30
+ # @return [Hash] a hash of document names mapped to their scores.
31
+ def weighted_scores
32
+ @weighted_scores ||= rank match_keywords(weighted: true)
33
+ end
34
+
35
+ private
36
+
37
+ def match_keywords(options = {})
38
+ @target_docs.map do |target|
39
+ target_kws = target.weighted_keywords
40
+ matches = @base_kws.keys & target_kws.keys
41
+ score = options[:weighted] ? sum_weights(matches, target_kws) : matches.count
42
+ [target.name, score]
43
+ end
44
+ end
45
+
46
+ def sum_weights(matches, target)
47
+ matches.sum {|match| @base_kws[match] + target[match]}
48
+ end
49
+
50
+ def rank(docs)
51
+ docs.sort_by{|doc| doc[1]}.to_h
52
+ end
53
+ end
54
+ end
@@ -1,4 +1,4 @@
1
1
  module DocRank
2
2
  #DockRank SemVer-compatible gem version.
3
- VERSION = "0.0.0.alpha.2"
3
+ VERSION = "0.0.0.alpha.3"
4
4
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: doc_rank
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.0.alpha.2
4
+ version: 0.0.0.alpha.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - s1v4
@@ -9,50 +9,50 @@ authors:
9
9
  autorequire:
10
10
  bindir: exe
11
11
  cert_chain: []
12
- date: 2017-09-10 00:00:00.000000000 Z
12
+ date: 2017-09-15 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
- name: yard
15
+ name: highscore
16
16
  requirement: !ruby/object:Gem::Requirement
17
17
  requirements:
18
18
  - - "~>"
19
19
  - !ruby/object:Gem::Version
20
- version: 0.9.9
21
- type: :development
20
+ version: 1.2.1
21
+ type: :runtime
22
22
  prerelease: false
23
23
  version_requirements: !ruby/object:Gem::Requirement
24
24
  requirements:
25
25
  - - "~>"
26
26
  - !ruby/object:Gem::Version
27
- version: 0.9.9
27
+ version: 1.2.1
28
28
  - !ruby/object:Gem::Dependency
29
- name: highscore
29
+ name: yomu
30
30
  requirement: !ruby/object:Gem::Requirement
31
31
  requirements:
32
32
  - - "~>"
33
33
  - !ruby/object:Gem::Version
34
- version: 1.2.1
35
- type: :development
34
+ version: 0.2.4
35
+ type: :runtime
36
36
  prerelease: false
37
37
  version_requirements: !ruby/object:Gem::Requirement
38
38
  requirements:
39
39
  - - "~>"
40
40
  - !ruby/object:Gem::Version
41
- version: 1.2.1
41
+ version: 0.2.4
42
42
  - !ruby/object:Gem::Dependency
43
- name: yomu
43
+ name: yard
44
44
  requirement: !ruby/object:Gem::Requirement
45
45
  requirements:
46
46
  - - "~>"
47
47
  - !ruby/object:Gem::Version
48
- version: 0.2.4
48
+ version: 0.9.9
49
49
  type: :development
50
50
  prerelease: false
51
51
  version_requirements: !ruby/object:Gem::Requirement
52
52
  requirements:
53
53
  - - "~>"
54
54
  - !ruby/object:Gem::Version
55
- version: 0.2.4
55
+ version: 0.9.9
56
56
  - !ruby/object:Gem::Dependency
57
57
  name: bundler
58
58
  requirement: !ruby/object:Gem::Requirement
@@ -109,20 +109,6 @@ dependencies:
109
109
  - - ">="
110
110
  - !ruby/object:Gem::Version
111
111
  version: '0'
112
- - !ruby/object:Gem::Dependency
113
- name: pry
114
- requirement: !ruby/object:Gem::Requirement
115
- requirements:
116
- - - ">="
117
- - !ruby/object:Gem::Version
118
- version: '0'
119
- type: :development
120
- prerelease: false
121
- version_requirements: !ruby/object:Gem::Requirement
122
- requirements:
123
- - - ">="
124
- - !ruby/object:Gem::Version
125
- version: '0'
126
112
  - !ruby/object:Gem::Dependency
127
113
  name: pry-nav
128
114
  requirement: !ruby/object:Gem::Requirement
@@ -158,6 +144,7 @@ files:
158
144
  - doc_rank.gemspec
159
145
  - lib/doc_rank.rb
160
146
  - lib/doc_rank/document.rb
147
+ - lib/doc_rank/ranker.rb
161
148
  - lib/doc_rank/version.rb
162
149
  homepage: https://github.com/S1v4/doc_rank
163
150
  licenses:
@@ -171,7 +158,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
171
158
  requirements:
172
159
  - - ">="
173
160
  - !ruby/object:Gem::Version
174
- version: '0'
161
+ version: '2.4'
175
162
  required_rubygems_version: !ruby/object:Gem::Requirement
176
163
  requirements:
177
164
  - - ">"