text_rank 1.1.7 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1625933f78441107094a4306a488cf0d68f45e53
4
- data.tar.gz: 5e605f2ac2210c6e44e21a1c088620c5d491314c
3
+ metadata.gz: c7c140fafc459b538cdc9fcc84f639e2155c6fbb
4
+ data.tar.gz: c2e8b24f80414a113ba9d2d93ca26b6a7e1c38a8
5
5
  SHA512:
6
- metadata.gz: 1703773a7d6d1391ec81f26bac6aa409344d8f036352b24e0acf37511e141da88cc560c0bbc525563f5b2bd277ea528c9d30bb41f9768afb1987ae9acee42af6
7
- data.tar.gz: fc23bc9f6d63a0361d88d804bc64448613de5aa6c22d1f5b223630bee18c485d3d35b7a12876bb5b4c25dda5efa537aad83ed2e686e12a04a4fed33d88e2147d
6
+ metadata.gz: ab7b1875f82d42a51243f74b827f90dc0e23b3ad68479b7dca4a78058423bf63e74d50361c952809e1396ddc70423fbafcee090344bbb47cd5095f94cd09e435
7
+ data.tar.gz: 7fb37752476eb9f0fefab815af32522eb3a748bb39fdeace39af08446c9a6e544c96fe187bcf541c27947493d6c7aafd69351af1c6a6438f987926396189adde
@@ -10,35 +10,35 @@ module TextRank
10
10
  # significant keywords. But to prevent less significant keywords from being
11
11
  # completely ignored we apply an inverse log linear transformation to each of the
12
12
  # N prefixes.
13
- #
13
+ #
14
14
  # For example, consider the following comparison:
15
- #
15
+ #
16
16
  # town man empty found
17
17
  # vs.
18
18
  # general empty found jar
19
- #
19
+ #
20
20
  # The first pass considers just the first keywords: town vs. general. As these
21
21
  # are different, they contribute 0.
22
- #
22
+ #
23
23
  # The second pass considers the first two keywords: town man vs general empty.
24
24
  # Again, no overlap, so they contribute 0.
25
- #
25
+ #
26
26
  # The third pass considers the first three keywords: town man empty vs general
27
27
  # empty found. Here we have one overlap: empty. This contributes 1.
28
- #
28
+ #
29
29
  # The fourth pass considers all, and there is two overlaps: empty & found. This
30
30
  # contributes 2.
31
- #
31
+ #
32
32
  # We can represent the overlaps as the vector [0, 0, 1, 2]. Then we will apply
33
33
  # the inverse log linear transformation defined by:
34
- #
34
+ #
35
35
  # f(x_i) = x_i / ln(i + 1)
36
36
  # = [0, 0, 1 / ln(4), 2 / ln(5)]
37
37
  # = [0, 0, 0.7213475204444817, 1.2426698691192237]
38
- #
38
+ #
39
39
  # Finally we take the average of the transformed vector and normalize it (to
40
40
  # ensure a final value between 0.0 and 1.0):
41
- #
41
+ #
42
42
  # norm(avg(SUM f(x_i))) = norm( avg(1.9640173895637054) )
43
43
  # = norm( 0.49100434739092635 )
44
44
  # = 0.49100434739092635 / avg(SUM f(1, 2, 3, 4))
@@ -41,7 +41,7 @@ module TextRank
41
41
  # @option options [Array<Class, Symbol, #filter!>] :rank_filters A list of filters to be applied to the keyword ranks after keyword extraction
42
42
  def initialize(**options)
43
43
  @page_rank_options = {
44
- strategy: options[:strategy] || :dense,
44
+ strategy: options[:strategy] || :sparse,
45
45
  damping: options[:damping],
46
46
  tolerance: options[:tolerance],
47
47
  }
@@ -105,7 +105,8 @@ module TextRank
105
105
  # until all of the top N final keywords (single or collapsed) have been
106
106
  # considered.
107
107
  loop do
108
- single_tokens_to_consider = @tokens.keys.first(@ranks_to_collapse + @to_remove.size - @to_collapse.size) - @to_remove.to_a
108
+ regexp_safe_tokens = @tokens.keys.select { |s| Regexp.escape(s) == s }
109
+ single_tokens_to_consider = regexp_safe_tokens.first(@ranks_to_collapse + @to_remove.size - @to_collapse.size) - @to_remove.to_a
109
110
  scan_text_for_all_permutations_of(single_tokens_to_consider) or break
110
111
  decide_what_to_collapse_and_what_to_remove
111
112
  end
@@ -1,4 +1,4 @@
1
1
  module TextRank
2
2
  # Current gem version
3
- VERSION = '1.1.7'
3
+ VERSION = '1.2.0'
4
4
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_rank
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.7
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - David McCullars
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-07-05 00:00:00.000000000 Z
11
+ date: 2016-10-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -183,7 +183,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
183
183
  version: '0'
184
184
  requirements: []
185
185
  rubyforge_project:
186
- rubygems_version: 2.5.1
186
+ rubygems_version: 2.6.7
187
187
  signing_key:
188
188
  specification_version: 4
189
189
  summary: Implementation of TextRank solution to ranked keyword extraction