text_rank 1.1.7 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c7c140fafc459b538cdc9fcc84f639e2155c6fbb
|
4
|
+
data.tar.gz: c2e8b24f80414a113ba9d2d93ca26b6a7e1c38a8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ab7b1875f82d42a51243f74b827f90dc0e23b3ad68479b7dca4a78058423bf63e74d50361c952809e1396ddc70423fbafcee090344bbb47cd5095f94cd09e435
|
7
|
+
data.tar.gz: 7fb37752476eb9f0fefab815af32522eb3a748bb39fdeace39af08446c9a6e544c96fe187bcf541c27947493d6c7aafd69351af1c6a6438f987926396189adde
|
@@ -10,35 +10,35 @@ module TextRank
|
|
10
10
|
# significant keywords. But to prevent less significant keywords from being
|
11
11
|
# completely ignored we apply an inverse log linear transformation to each of the
|
12
12
|
# N prefixes.
|
13
|
-
#
|
13
|
+
#
|
14
14
|
# For example, consider the following comparison:
|
15
|
-
#
|
15
|
+
#
|
16
16
|
# town man empty found
|
17
17
|
# vs.
|
18
18
|
# general empty found jar
|
19
|
-
#
|
19
|
+
#
|
20
20
|
# The first pass considers just the first keywords: town vs. general. As these
|
21
21
|
# are different, they contribute 0.
|
22
|
-
#
|
22
|
+
#
|
23
23
|
# The second pass considers the first two keywords: town man vs general empty.
|
24
24
|
# Again, no overlap, so they contribute 0.
|
25
|
-
#
|
25
|
+
#
|
26
26
|
# The third pass considers the first three keywords: town man empty vs general
|
27
27
|
# empty found. Here we have one overlap: empty. This contributes 1.
|
28
|
-
#
|
28
|
+
#
|
29
29
|
# The fourth pass considers all, and there is two overlaps: empty & found. This
|
30
30
|
# contributes 2.
|
31
|
-
#
|
31
|
+
#
|
32
32
|
# We can represent the overlaps as the vector [0, 0, 1, 2]. Then we will apply
|
33
33
|
# the inverse log linear transformation defined by:
|
34
|
-
#
|
34
|
+
#
|
35
35
|
# f(x_i) = x_i / ln(i + 1)
|
36
36
|
# = [0, 0, 1 / ln(4), 2 / ln(5)]
|
37
37
|
# = [0, 0, 0.7213475204444817, 1.2426698691192237]
|
38
|
-
#
|
38
|
+
#
|
39
39
|
# Finally we take the average of the transformed vector and normalize it (to
|
40
40
|
# ensure a final value between 0.0 and 1.0):
|
41
|
-
#
|
41
|
+
#
|
42
42
|
# norm(avg(SUM f(x_i))) = norm( avg(1.9640173895637054) )
|
43
43
|
# = norm( 0.49100434739092635 )
|
44
44
|
# = 0.49100434739092635 / avg(SUM f(1, 2, 3, 4))
|
@@ -41,7 +41,7 @@ module TextRank
|
|
41
41
|
# @option options [Array<Class, Symbol, #filter!>] :rank_filters A list of filters to be applied to the keyword ranks after keyword extraction
|
42
42
|
def initialize(**options)
|
43
43
|
@page_rank_options = {
|
44
|
-
strategy: options[:strategy] || :
|
44
|
+
strategy: options[:strategy] || :sparse,
|
45
45
|
damping: options[:damping],
|
46
46
|
tolerance: options[:tolerance],
|
47
47
|
}
|
@@ -105,7 +105,8 @@ module TextRank
|
|
105
105
|
# until all of the top N final keywords (single or collapsed) have been
|
106
106
|
# considered.
|
107
107
|
loop do
|
108
|
-
|
108
|
+
regexp_safe_tokens = @tokens.keys.select { |s| Regexp.escape(s) == s }
|
109
|
+
single_tokens_to_consider = regexp_safe_tokens.first(@ranks_to_collapse + @to_remove.size - @to_collapse.size) - @to_remove.to_a
|
109
110
|
scan_text_for_all_permutations_of(single_tokens_to_consider) or break
|
110
111
|
decide_what_to_collapse_and_what_to_remove
|
111
112
|
end
|
data/lib/text_rank/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text_rank
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David McCullars
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-10-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -183,7 +183,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
183
183
|
version: '0'
|
184
184
|
requirements: []
|
185
185
|
rubyforge_project:
|
186
|
-
rubygems_version: 2.
|
186
|
+
rubygems_version: 2.6.7
|
187
187
|
signing_key:
|
188
188
|
specification_version: 4
|
189
189
|
summary: Implementation of TextRank solution to ranked keyword extraction
|