doc_rank 0.0.0.alpha.1 → 0.0.0.alpha.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +3 -0
- data/CHANGELOG.md +14 -0
- data/README.md +7 -2
- data/doc_rank.gemspec +4 -4
- data/lib/doc_rank.rb +13 -1
- data/lib/doc_rank/document.rb +24 -0
- data/lib/doc_rank/version.rb +2 -1
- metadata +31 -29
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d561d37c85fb35101c071fb73814f786bac64b0f
|
4
|
+
data.tar.gz: e22786ab6c7d58ba17a1ec3f17fad2fecc4a7e9a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d9a15a757324b53834c47431da9d7c165ef550ad9adfa71cca45c35c62ce03cba9403b69ca59481ad46e620258bbe3e19cb8a35e8cd36db19bac92508c84ed32
|
7
|
+
data.tar.gz: 0d1fe0b9b4fa900657b2429eb7244221e2b54a7df53f7e3572be8a2b40fdf5fb3b66d3801578915dc5a3a94a1778306ef840e121dffaf7a9e3ad628f987f8ecd
|
data/.travis.yml
CHANGED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# Changelog
|
2
|
+
|
3
|
+
All notable changes to this project will be documented in this file.
|
4
|
+
|
5
|
+
For more information about changelogs, check
|
6
|
+
[Keep a Changelog](http://keepachangelog.com) and
|
7
|
+
[Vandamme](http://tech-angels.github.io/vandamme).
|
8
|
+
|
9
|
+
## 0.0.0.alpha.2 - 2017/09/07
|
10
|
+
|
11
|
+
* [FEATURE] Add `DocRank.compare`
|
12
|
+
* [FEATURE] Add `DocRank::Document#text`
|
13
|
+
* [FEATURE] Add `DocRank::Document#name`
|
14
|
+
* [FEATURE] Add `DocRank::Document#keywords`
|
data/README.md
CHANGED
@@ -1,8 +1,13 @@
|
|
1
1
|
# DocRank
|
2
2
|
|
3
|
-
|
3
|
+
Returns a similarity score between one or many documents against a base document (keyword comparison).
|
4
4
|
|
5
|
-
|
5
|
+
[![Build Status](http://img.shields.io/travis/S1v4/doc_rank/master.svg)](https://travis-ci.org/S1v4/doc_rank)
|
6
|
+
[![Code Climate](http://img.shields.io/codeclimate/github/S1v4/doc_rank.svg)](https://codeclimate.com/github/S1v4/doc_rank)
|
7
|
+
[![Online docs](http://img.shields.io/badge/docs-✓-green.svg)](http://www.rubydoc.info/gems/doc_rank/frames)
|
8
|
+
[![Gem Version](http://img.shields.io/gem/v/doc_rank.svg)](http://rubygems.org/gems/doc_rank)
|
9
|
+
|
10
|
+
The **source code** is available on [GitHub](https://github.com/S1v4/doc_rank) and the **documentation** on [RubyDoc](http://www.rubydoc.info/gems/doc_rank/frames).
|
6
11
|
|
7
12
|
## Installation
|
8
13
|
|
data/doc_rank.gemspec
CHANGED
@@ -6,7 +6,7 @@ require "doc_rank/version"
|
|
6
6
|
Gem::Specification.new do |spec|
|
7
7
|
spec.name = "doc_rank"
|
8
8
|
spec.version = DocRank::VERSION
|
9
|
-
spec.authors = ["s1v4", "
|
9
|
+
spec.authors = ["s1v4", "sergeg"]
|
10
10
|
spec.email = ["hdao61@gmail.com", "sergeyganous@gmail.com"]
|
11
11
|
|
12
12
|
spec.summary = %q{Returns a similarity score between one or many documents against a base document (keyword comparison).}
|
@@ -21,12 +21,12 @@ Gem::Specification.new do |spec|
|
|
21
21
|
spec.require_paths = ["lib"]
|
22
22
|
|
23
23
|
spec.add_development_dependency "yard", "~> 0.9.9"
|
24
|
-
spec.add_development_dependency "presume", "~> 0.0.4"
|
25
24
|
spec.add_development_dependency "highscore", "~> 1.2.1"
|
26
|
-
spec.add_development_dependency "
|
27
|
-
spec.add_development_dependency "pdf-reader", "~> 2.0.0"
|
25
|
+
spec.add_development_dependency "yomu", "~> 0.2.4"
|
28
26
|
spec.add_development_dependency "bundler", "~> 1.15"
|
29
27
|
spec.add_development_dependency "rake", "~> 10.0"
|
30
28
|
spec.add_development_dependency "rspec", "~> 3.0"
|
31
29
|
spec.add_development_dependency "coveralls"
|
30
|
+
spec.add_development_dependency "pry"
|
31
|
+
spec.add_development_dependency "pry-nav"
|
32
32
|
end
|
data/lib/doc_rank.rb
CHANGED
@@ -1,5 +1,17 @@
|
|
1
1
|
require "doc_rank/version"
|
2
|
+
require "doc_rank/document"
|
2
3
|
|
4
|
+
# A gem to rank document similarities.
|
3
5
|
module DocRank
|
4
|
-
#
|
6
|
+
# @return [Hash] a hash of document names mapped to their score.
|
7
|
+
# @param [String] base the file by which target documents are scored againsts.
|
8
|
+
# @param [Array<String>] targets a single file or many files used for comparison.
|
9
|
+
def self.compare(base, targets)
|
10
|
+
base_keywords = DocRank::Document.new(base).keywords.map(&:text)
|
11
|
+
Array(targets).map do |target|
|
12
|
+
doc = DocRank::Document.new target
|
13
|
+
matches = base_keywords & doc.keywords.map(&:text)
|
14
|
+
[doc.name, matches.size]
|
15
|
+
end.sort_by{|doc| doc[1]}.to_h
|
16
|
+
end
|
5
17
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'yomu'
|
2
|
+
require 'highscore'
|
3
|
+
|
4
|
+
# A gem to rank document similarities.
|
5
|
+
module DocRank
|
6
|
+
# Document holds the contents of a document (.pdf, .docx, .txt) and has methods:
|
7
|
+
# name, text, and keywords.
|
8
|
+
class Document
|
9
|
+
|
10
|
+
# [String] the name of the document.
|
11
|
+
attr_reader :name
|
12
|
+
# [String] the text content of the document.
|
13
|
+
attr_reader :text
|
14
|
+
# [Highscore::Keyword] a collection of keyword objects.
|
15
|
+
attr_reader :keywords
|
16
|
+
|
17
|
+
# @param [String] file The path to the file.
|
18
|
+
def initialize(file_path)
|
19
|
+
@text = Yomu.new(file_path).text
|
20
|
+
@name = File.basename file_path
|
21
|
+
@keywords = Highscore::Content.new(@text).keywords
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
data/lib/doc_rank/version.rb
CHANGED
metadata
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: doc_rank
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.0.alpha.
|
4
|
+
version: 0.0.0.alpha.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- s1v4
|
8
|
-
-
|
8
|
+
- sergeg
|
9
9
|
autorequire:
|
10
10
|
bindir: exe
|
11
11
|
cert_chain: []
|
12
|
-
date: 2017-09-
|
12
|
+
date: 2017-09-10 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: yard
|
@@ -26,105 +26,105 @@ dependencies:
|
|
26
26
|
- !ruby/object:Gem::Version
|
27
27
|
version: 0.9.9
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
|
-
name:
|
29
|
+
name: highscore
|
30
30
|
requirement: !ruby/object:Gem::Requirement
|
31
31
|
requirements:
|
32
32
|
- - "~>"
|
33
33
|
- !ruby/object:Gem::Version
|
34
|
-
version:
|
34
|
+
version: 1.2.1
|
35
35
|
type: :development
|
36
36
|
prerelease: false
|
37
37
|
version_requirements: !ruby/object:Gem::Requirement
|
38
38
|
requirements:
|
39
39
|
- - "~>"
|
40
40
|
- !ruby/object:Gem::Version
|
41
|
-
version:
|
41
|
+
version: 1.2.1
|
42
42
|
- !ruby/object:Gem::Dependency
|
43
|
-
name:
|
43
|
+
name: yomu
|
44
44
|
requirement: !ruby/object:Gem::Requirement
|
45
45
|
requirements:
|
46
46
|
- - "~>"
|
47
47
|
- !ruby/object:Gem::Version
|
48
|
-
version:
|
48
|
+
version: 0.2.4
|
49
49
|
type: :development
|
50
50
|
prerelease: false
|
51
51
|
version_requirements: !ruby/object:Gem::Requirement
|
52
52
|
requirements:
|
53
53
|
- - "~>"
|
54
54
|
- !ruby/object:Gem::Version
|
55
|
-
version:
|
55
|
+
version: 0.2.4
|
56
56
|
- !ruby/object:Gem::Dependency
|
57
|
-
name:
|
57
|
+
name: bundler
|
58
58
|
requirement: !ruby/object:Gem::Requirement
|
59
59
|
requirements:
|
60
60
|
- - "~>"
|
61
61
|
- !ruby/object:Gem::Version
|
62
|
-
version:
|
62
|
+
version: '1.15'
|
63
63
|
type: :development
|
64
64
|
prerelease: false
|
65
65
|
version_requirements: !ruby/object:Gem::Requirement
|
66
66
|
requirements:
|
67
67
|
- - "~>"
|
68
68
|
- !ruby/object:Gem::Version
|
69
|
-
version:
|
69
|
+
version: '1.15'
|
70
70
|
- !ruby/object:Gem::Dependency
|
71
|
-
name:
|
71
|
+
name: rake
|
72
72
|
requirement: !ruby/object:Gem::Requirement
|
73
73
|
requirements:
|
74
74
|
- - "~>"
|
75
75
|
- !ruby/object:Gem::Version
|
76
|
-
version:
|
76
|
+
version: '10.0'
|
77
77
|
type: :development
|
78
78
|
prerelease: false
|
79
79
|
version_requirements: !ruby/object:Gem::Requirement
|
80
80
|
requirements:
|
81
81
|
- - "~>"
|
82
82
|
- !ruby/object:Gem::Version
|
83
|
-
version:
|
83
|
+
version: '10.0'
|
84
84
|
- !ruby/object:Gem::Dependency
|
85
|
-
name:
|
85
|
+
name: rspec
|
86
86
|
requirement: !ruby/object:Gem::Requirement
|
87
87
|
requirements:
|
88
88
|
- - "~>"
|
89
89
|
- !ruby/object:Gem::Version
|
90
|
-
version: '
|
90
|
+
version: '3.0'
|
91
91
|
type: :development
|
92
92
|
prerelease: false
|
93
93
|
version_requirements: !ruby/object:Gem::Requirement
|
94
94
|
requirements:
|
95
95
|
- - "~>"
|
96
96
|
- !ruby/object:Gem::Version
|
97
|
-
version: '
|
97
|
+
version: '3.0'
|
98
98
|
- !ruby/object:Gem::Dependency
|
99
|
-
name:
|
99
|
+
name: coveralls
|
100
100
|
requirement: !ruby/object:Gem::Requirement
|
101
101
|
requirements:
|
102
|
-
- - "
|
102
|
+
- - ">="
|
103
103
|
- !ruby/object:Gem::Version
|
104
|
-
version: '
|
104
|
+
version: '0'
|
105
105
|
type: :development
|
106
106
|
prerelease: false
|
107
107
|
version_requirements: !ruby/object:Gem::Requirement
|
108
108
|
requirements:
|
109
|
-
- - "
|
109
|
+
- - ">="
|
110
110
|
- !ruby/object:Gem::Version
|
111
|
-
version: '
|
111
|
+
version: '0'
|
112
112
|
- !ruby/object:Gem::Dependency
|
113
|
-
name:
|
113
|
+
name: pry
|
114
114
|
requirement: !ruby/object:Gem::Requirement
|
115
115
|
requirements:
|
116
|
-
- - "
|
116
|
+
- - ">="
|
117
117
|
- !ruby/object:Gem::Version
|
118
|
-
version: '
|
118
|
+
version: '0'
|
119
119
|
type: :development
|
120
120
|
prerelease: false
|
121
121
|
version_requirements: !ruby/object:Gem::Requirement
|
122
122
|
requirements:
|
123
|
-
- - "
|
123
|
+
- - ">="
|
124
124
|
- !ruby/object:Gem::Version
|
125
|
-
version: '
|
125
|
+
version: '0'
|
126
126
|
- !ruby/object:Gem::Dependency
|
127
|
-
name:
|
127
|
+
name: pry-nav
|
128
128
|
requirement: !ruby/object:Gem::Requirement
|
129
129
|
requirements:
|
130
130
|
- - ">="
|
@@ -148,6 +148,7 @@ files:
|
|
148
148
|
- ".gitignore"
|
149
149
|
- ".rspec"
|
150
150
|
- ".travis.yml"
|
151
|
+
- CHANGELOG.md
|
151
152
|
- Gemfile
|
152
153
|
- LICENSE.txt
|
153
154
|
- README.md
|
@@ -156,6 +157,7 @@ files:
|
|
156
157
|
- bin/setup
|
157
158
|
- doc_rank.gemspec
|
158
159
|
- lib/doc_rank.rb
|
160
|
+
- lib/doc_rank/document.rb
|
159
161
|
- lib/doc_rank/version.rb
|
160
162
|
homepage: https://github.com/S1v4/doc_rank
|
161
163
|
licenses:
|