doc_rank 0.0.0.alpha.1 → 0.0.0.alpha.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +3 -0
- data/CHANGELOG.md +14 -0
- data/README.md +7 -2
- data/doc_rank.gemspec +4 -4
- data/lib/doc_rank.rb +13 -1
- data/lib/doc_rank/document.rb +24 -0
- data/lib/doc_rank/version.rb +2 -1
- metadata +31 -29
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d561d37c85fb35101c071fb73814f786bac64b0f
|
4
|
+
data.tar.gz: e22786ab6c7d58ba17a1ec3f17fad2fecc4a7e9a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d9a15a757324b53834c47431da9d7c165ef550ad9adfa71cca45c35c62ce03cba9403b69ca59481ad46e620258bbe3e19cb8a35e8cd36db19bac92508c84ed32
|
7
|
+
data.tar.gz: 0d1fe0b9b4fa900657b2429eb7244221e2b54a7df53f7e3572be8a2b40fdf5fb3b66d3801578915dc5a3a94a1778306ef840e121dffaf7a9e3ad628f987f8ecd
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# Changelog
|
2
|
+
|
3
|
+
All notable changes to this project will be documented in this file.
|
4
|
+
|
5
|
+
For more information about changelogs, check
|
6
|
+
[Keep a Changelog](http://keepachangelog.com) and
|
7
|
+
[Vandamme](http://tech-angels.github.io/vandamme).
|
8
|
+
|
9
|
+
## 0.0.0.alpha.2 - 2017/09/07
|
10
|
+
|
11
|
+
* [FEATURE] Add `DocRank.compare`
|
12
|
+
* [FEATURE] Add `DocRank::Document#text`
|
13
|
+
* [FEATURE] Add `DocRank::Document#name`
|
14
|
+
* [FEATURE] Add `DocRank::Document#keywords`
|
data/README.md
CHANGED
@@ -1,8 +1,13 @@
|
|
1
1
|
# DocRank
|
2
2
|
|
3
|
-
|
3
|
+
Returns a similarity score between one or many documents against a base document (keyword comparison).
|
4
4
|
|
5
|
-
|
5
|
+
[](https://travis-ci.org/S1v4/doc_rank)
|
6
|
+
[](https://codeclimate.com/github/S1v4/doc_rank)
|
7
|
+
[](http://www.rubydoc.info/gems/doc_rank/frames)
|
8
|
+
[](http://rubygems.org/gems/doc_rank)
|
9
|
+
|
10
|
+
The **source code** is available on [GitHub](https://github.com/S1v4/doc_rank) and the **documentation** on [RubyDoc](http://www.rubydoc.info/gems/doc_rank/frames).
|
6
11
|
|
7
12
|
## Installation
|
8
13
|
|
data/doc_rank.gemspec
CHANGED
@@ -6,7 +6,7 @@ require "doc_rank/version"
|
|
6
6
|
Gem::Specification.new do |spec|
|
7
7
|
spec.name = "doc_rank"
|
8
8
|
spec.version = DocRank::VERSION
|
9
|
-
spec.authors = ["s1v4", "
|
9
|
+
spec.authors = ["s1v4", "sergeg"]
|
10
10
|
spec.email = ["hdao61@gmail.com", "sergeyganous@gmail.com"]
|
11
11
|
|
12
12
|
spec.summary = %q{Returns a similarity score between one or many documents against a base document (keyword comparison).}
|
@@ -21,12 +21,12 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.require_paths = ["lib"]
|
22
22
|
|
23
23
|
spec.add_development_dependency "yard", "~> 0.9.9"
|
24
|
-
spec.add_development_dependency "presume", "~> 0.0.4"
|
25
24
|
spec.add_development_dependency "highscore", "~> 1.2.1"
|
26
|
-
spec.add_development_dependency "
|
27
|
-
spec.add_development_dependency "pdf-reader", "~> 2.0.0"
|
25
|
+
spec.add_development_dependency "yomu", "~> 0.2.4"
|
28
26
|
spec.add_development_dependency "bundler", "~> 1.15"
|
29
27
|
spec.add_development_dependency "rake", "~> 10.0"
|
30
28
|
spec.add_development_dependency "rspec", "~> 3.0"
|
31
29
|
spec.add_development_dependency "coveralls"
|
30
|
+
spec.add_development_dependency "pry"
|
31
|
+
spec.add_development_dependency "pry-nav"
|
32
32
|
end
|
data/lib/doc_rank.rb
CHANGED
@@ -1,5 +1,17 @@
|
|
1
1
|
require "doc_rank/version"
|
2
|
+
require "doc_rank/document"
|
2
3
|
|
4
|
+
# A gem to rank document similarities.
|
3
5
|
module DocRank
|
4
|
-
#
|
6
|
+
# @return [Hash] a hash of document names mapped to their score.
|
7
|
+
# @param [String] base the file by which target documents are scored againsts.
|
8
|
+
# @param [Array<String>] targets a single file or many files used for comparison.
|
9
|
+
def self.compare(base, targets)
|
10
|
+
base_keywords = DocRank::Document.new(base).keywords.map(&:text)
|
11
|
+
Array(targets).map do |target|
|
12
|
+
doc = DocRank::Document.new target
|
13
|
+
matches = base_keywords & doc.keywords.map(&:text)
|
14
|
+
[doc.name, matches.size]
|
15
|
+
end.sort_by{|doc| doc[1]}.to_h
|
16
|
+
end
|
5
17
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'yomu'
|
2
|
+
require 'highscore'
|
3
|
+
|
4
|
+
# A gem to rank document similarities.
|
5
|
+
module DocRank
|
6
|
+
# Document holds the contents of a document (.pdf, .docx, .txt) and has methods:
|
7
|
+
# name, text, and keywords.
|
8
|
+
class Document
|
9
|
+
|
10
|
+
# [String] the name of the document.
|
11
|
+
attr_reader :name
|
12
|
+
# [String] the text content of the document.
|
13
|
+
attr_reader :text
|
14
|
+
# [Highscore::Keyword] a collection of keyword objects.
|
15
|
+
attr_reader :keywords
|
16
|
+
|
17
|
+
# @param [String] file The path to the file.
|
18
|
+
def initialize(file_path)
|
19
|
+
@text = Yomu.new(file_path).text
|
20
|
+
@name = File.basename file_path
|
21
|
+
@keywords = Highscore::Content.new(@text).keywords
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
data/lib/doc_rank/version.rb
CHANGED
metadata
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: doc_rank
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.0.alpha.
|
4
|
+
version: 0.0.0.alpha.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- s1v4
|
8
|
-
-
|
8
|
+
- sergeg
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-09-
|
12
|
+
date: 2017-09-10 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: yard
|
@@ -26,105 +26,105 @@ dependencies:
|
|
26
26
|
- !ruby/object:Gem::Version
|
27
27
|
version: 0.9.9
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
|
-
name:
|
29
|
+
name: highscore
|
30
30
|
requirement: !ruby/object:Gem::Requirement
|
31
31
|
requirements:
|
32
32
|
- - "~>"
|
33
33
|
- !ruby/object:Gem::Version
|
34
|
-
version:
|
34
|
+
version: 1.2.1
|
35
35
|
type: :development
|
36
36
|
prerelease: false
|
37
37
|
version_requirements: !ruby/object:Gem::Requirement
|
38
38
|
requirements:
|
39
39
|
- - "~>"
|
40
40
|
- !ruby/object:Gem::Version
|
41
|
-
version:
|
41
|
+
version: 1.2.1
|
42
42
|
- !ruby/object:Gem::Dependency
|
43
|
-
name:
|
43
|
+
name: yomu
|
44
44
|
requirement: !ruby/object:Gem::Requirement
|
45
45
|
requirements:
|
46
46
|
- - "~>"
|
47
47
|
- !ruby/object:Gem::Version
|
48
|
-
version:
|
48
|
+
version: 0.2.4
|
49
49
|
type: :development
|
50
50
|
prerelease: false
|
51
51
|
version_requirements: !ruby/object:Gem::Requirement
|
52
52
|
requirements:
|
53
53
|
- - "~>"
|
54
54
|
- !ruby/object:Gem::Version
|
55
|
-
version:
|
55
|
+
version: 0.2.4
|
56
56
|
- !ruby/object:Gem::Dependency
|
57
|
-
name:
|
57
|
+
name: bundler
|
58
58
|
requirement: !ruby/object:Gem::Requirement
|
59
59
|
requirements:
|
60
60
|
- - "~>"
|
61
61
|
- !ruby/object:Gem::Version
|
62
|
-
version:
|
62
|
+
version: '1.15'
|
63
63
|
type: :development
|
64
64
|
prerelease: false
|
65
65
|
version_requirements: !ruby/object:Gem::Requirement
|
66
66
|
requirements:
|
67
67
|
- - "~>"
|
68
68
|
- !ruby/object:Gem::Version
|
69
|
-
version:
|
69
|
+
version: '1.15'
|
70
70
|
- !ruby/object:Gem::Dependency
|
71
|
-
name:
|
71
|
+
name: rake
|
72
72
|
requirement: !ruby/object:Gem::Requirement
|
73
73
|
requirements:
|
74
74
|
- - "~>"
|
75
75
|
- !ruby/object:Gem::Version
|
76
|
-
version:
|
76
|
+
version: '10.0'
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
79
|
version_requirements: !ruby/object:Gem::Requirement
|
80
80
|
requirements:
|
81
81
|
- - "~>"
|
82
82
|
- !ruby/object:Gem::Version
|
83
|
-
version:
|
83
|
+
version: '10.0'
|
84
84
|
- !ruby/object:Gem::Dependency
|
85
|
-
name:
|
85
|
+
name: rspec
|
86
86
|
requirement: !ruby/object:Gem::Requirement
|
87
87
|
requirements:
|
88
88
|
- - "~>"
|
89
89
|
- !ruby/object:Gem::Version
|
90
|
-
version: '
|
90
|
+
version: '3.0'
|
91
91
|
type: :development
|
92
92
|
prerelease: false
|
93
93
|
version_requirements: !ruby/object:Gem::Requirement
|
94
94
|
requirements:
|
95
95
|
- - "~>"
|
96
96
|
- !ruby/object:Gem::Version
|
97
|
-
version: '
|
97
|
+
version: '3.0'
|
98
98
|
- !ruby/object:Gem::Dependency
|
99
|
-
name:
|
99
|
+
name: coveralls
|
100
100
|
requirement: !ruby/object:Gem::Requirement
|
101
101
|
requirements:
|
102
|
-
- - "
|
102
|
+
- - ">="
|
103
103
|
- !ruby/object:Gem::Version
|
104
|
-
version: '
|
104
|
+
version: '0'
|
105
105
|
type: :development
|
106
106
|
prerelease: false
|
107
107
|
version_requirements: !ruby/object:Gem::Requirement
|
108
108
|
requirements:
|
109
|
-
- - "
|
109
|
+
- - ">="
|
110
110
|
- !ruby/object:Gem::Version
|
111
|
-
version: '
|
111
|
+
version: '0'
|
112
112
|
- !ruby/object:Gem::Dependency
|
113
|
-
name:
|
113
|
+
name: pry
|
114
114
|
requirement: !ruby/object:Gem::Requirement
|
115
115
|
requirements:
|
116
|
-
- - "
|
116
|
+
- - ">="
|
117
117
|
- !ruby/object:Gem::Version
|
118
|
-
version: '
|
118
|
+
version: '0'
|
119
119
|
type: :development
|
120
120
|
prerelease: false
|
121
121
|
version_requirements: !ruby/object:Gem::Requirement
|
122
122
|
requirements:
|
123
|
-
- - "
|
123
|
+
- - ">="
|
124
124
|
- !ruby/object:Gem::Version
|
125
|
-
version: '
|
125
|
+
version: '0'
|
126
126
|
- !ruby/object:Gem::Dependency
|
127
|
-
name:
|
127
|
+
name: pry-nav
|
128
128
|
requirement: !ruby/object:Gem::Requirement
|
129
129
|
requirements:
|
130
130
|
- - ">="
|
@@ -148,6 +148,7 @@ files:
|
|
148
148
|
- ".gitignore"
|
149
149
|
- ".rspec"
|
150
150
|
- ".travis.yml"
|
151
|
+
- CHANGELOG.md
|
151
152
|
- Gemfile
|
152
153
|
- LICENSE.txt
|
153
154
|
- README.md
|
@@ -156,6 +157,7 @@ files:
|
|
156
157
|
- bin/setup
|
157
158
|
- doc_rank.gemspec
|
158
159
|
- lib/doc_rank.rb
|
160
|
+
- lib/doc_rank/document.rb
|
159
161
|
- lib/doc_rank/version.rb
|
160
162
|
homepage: https://github.com/S1v4/doc_rank
|
161
163
|
licenses:
|