text_rank 1.1.7 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1625933f78441107094a4306a488cf0d68f45e53
4
- data.tar.gz: 5e605f2ac2210c6e44e21a1c088620c5d491314c
3
+ metadata.gz: c7c140fafc459b538cdc9fcc84f639e2155c6fbb
4
+ data.tar.gz: c2e8b24f80414a113ba9d2d93ca26b6a7e1c38a8
5
5
  SHA512:
6
- metadata.gz: 1703773a7d6d1391ec81f26bac6aa409344d8f036352b24e0acf37511e141da88cc560c0bbc525563f5b2bd277ea528c9d30bb41f9768afb1987ae9acee42af6
7
- data.tar.gz: fc23bc9f6d63a0361d88d804bc64448613de5aa6c22d1f5b223630bee18c485d3d35b7a12876bb5b4c25dda5efa537aad83ed2e686e12a04a4fed33d88e2147d
6
+ metadata.gz: ab7b1875f82d42a51243f74b827f90dc0e23b3ad68479b7dca4a78058423bf63e74d50361c952809e1396ddc70423fbafcee090344bbb47cd5095f94cd09e435
7
+ data.tar.gz: 7fb37752476eb9f0fefab815af32522eb3a748bb39fdeace39af08446c9a6e544c96fe187bcf541c27947493d6c7aafd69351af1c6a6438f987926396189adde
@@ -10,35 +10,35 @@ module TextRank
10
10
  # significant keywords. But to prevent less significant keywords from being
11
11
  # completely ignored we apply an inverse log linear transformation to each of the
12
12
  # N prefixes.
13
- #
13
+ #
14
14
  # For example, consider the following comparison:
15
- #
15
+ #
16
16
  # town man empty found
17
17
  # vs.
18
18
  # general empty found jar
19
- #
19
+ #
20
20
  # The first pass considers just the first keywords: town vs. general. As these
21
21
  # are different, they contribute 0.
22
- #
22
+ #
23
23
  # The second pass considers the first two keywords: town man vs general empty.
24
24
  # Again, no overlap, so they contribute 0.
25
- #
25
+ #
26
26
  # The third pass considers the first three keywords: town man empty vs general
27
27
  # empty found. Here we have one overlap: empty. This contributes 1.
28
- #
28
+ #
29
29
  # The fourth pass considers all, and there is two overlaps: empty & found. This
30
30
  # contributes 2.
31
- #
31
+ #
32
32
  # We can represent the overlaps as the vector [0, 0, 1, 2]. Then we will apply
33
33
  # the inverse log linear transformation defined by:
34
- #
34
+ #
35
35
  # f(x_i) = x_i / ln(i + 1)
36
36
  # = [0, 0, 1 / ln(4), 2 / ln(5)]
37
37
  # = [0, 0, 0.7213475204444817, 1.2426698691192237]
38
- #
38
+ #
39
39
  # Finally we take the average of the transformed vector and normalize it (to
40
40
  # ensure a final value between 0.0 and 1.0):
41
- #
41
+ #
42
42
  # norm(avg(SUM f(x_i))) = norm( avg(1.9640173895637054) )
43
43
  # = norm( 0.49100434739092635 )
44
44
  # = 0.49100434739092635 / avg(SUM f(1, 2, 3, 4))
@@ -41,7 +41,7 @@ module TextRank
41
41
  # @option options [Array<Class, Symbol, #filter!>] :rank_filters A list of filters to be applied to the keyword ranks after keyword extraction
42
42
  def initialize(**options)
43
43
  @page_rank_options = {
44
- strategy: options[:strategy] || :dense,
44
+ strategy: options[:strategy] || :sparse,
45
45
  damping: options[:damping],
46
46
  tolerance: options[:tolerance],
47
47
  }
@@ -105,7 +105,8 @@ module TextRank
105
105
  # until all of the top N final keywords (single or collapsed) have been
106
106
  # considered.
107
107
  loop do
108
- single_tokens_to_consider = @tokens.keys.first(@ranks_to_collapse + @to_remove.size - @to_collapse.size) - @to_remove.to_a
108
+ regexp_safe_tokens = @tokens.keys.select { |s| Regexp.escape(s) == s }
109
+ single_tokens_to_consider = regexp_safe_tokens.first(@ranks_to_collapse + @to_remove.size - @to_collapse.size) - @to_remove.to_a
109
110
  scan_text_for_all_permutations_of(single_tokens_to_consider) or break
110
111
  decide_what_to_collapse_and_what_to_remove
111
112
  end
@@ -1,4 +1,4 @@
1
1
  module TextRank
2
2
  # Current gem version
3
- VERSION = '1.1.7'
3
+ VERSION = '1.2.0'
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_rank
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.7
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - David McCullars
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-07-05 00:00:00.000000000 Z
11
+ date: 2016-10-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -183,7 +183,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
183
183
  version: '0'
184
184
  requirements: []
185
185
  rubyforge_project:
186
- rubygems_version: 2.5.1
186
+ rubygems_version: 2.6.7
187
187
  signing_key:
188
188
  specification_version: 4
189
189
  summary: Implementation of TextRank solution to ranked keyword extraction