RubyGems - text_rank - Versions diffs - 1.1.7 → 1.2.0 - Mend

text_rank 1.1.7 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml +4 -4
data/lib/text_rank/fingerprint.rb +10 -10
data/lib/text_rank/keyword_extractor.rb +1 -1
data/lib/text_rank/rank_filter/collapse_adjacent.rb +2 -1
data/lib/text_rank/version.rb +1 -1
metadata +3 -3

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 1625933f78441107094a4306a488cf0d68f45e53
-  data.tar.gz: 5e605f2ac2210c6e44e21a1c088620c5d491314c
+  metadata.gz: c7c140fafc459b538cdc9fcc84f639e2155c6fbb
+  data.tar.gz: c2e8b24f80414a113ba9d2d93ca26b6a7e1c38a8
 SHA512:
-  metadata.gz: 1703773a7d6d1391ec81f26bac6aa409344d8f036352b24e0acf37511e141da88cc560c0bbc525563f5b2bd277ea528c9d30bb41f9768afb1987ae9acee42af6
-  data.tar.gz: fc23bc9f6d63a0361d88d804bc64448613de5aa6c22d1f5b223630bee18c485d3d35b7a12876bb5b4c25dda5efa537aad83ed2e686e12a04a4fed33d88e2147d
+  metadata.gz: ab7b1875f82d42a51243f74b827f90dc0e23b3ad68479b7dca4a78058423bf63e74d50361c952809e1396ddc70423fbafcee090344bbb47cd5095f94cd09e435
+  data.tar.gz: 7fb37752476eb9f0fefab815af32522eb3a748bb39fdeace39af08446c9a6e544c96fe187bcf541c27947493d6c7aafd69351af1c6a6438f987926396189adde

data/lib/text_rank/fingerprint.rb CHANGED Viewed

@@ -10,35 +10,35 @@ module TextRank
   # significant keywords.  But to prevent less significant keywords from being
   # completely ignored we apply an inverse log linear transformation to each of the
   # N prefixes.
-  #
+  #
   # For example, consider the following comparison:
-  #
+  #
   #   town man empty found
   #   vs.
   #   general empty found jar
-  #
+  #
   # The first pass considers just the first keywords: town vs. general.  As these
   # are different, they contribute 0.
-  #
+  #
   # The second pass considers the first two keywords: town man vs general empty.
   # Again, no overlap, so they contribute 0.
-  #
+  #
   # The third pass considers the first three keywords: town man empty vs general
   # empty found.  Here we have one overlap: empty. This contributes 1.
-  #
+  #
   # The fourth pass considers all, and there is two overlaps:  empty & found.  This
   # contributes 2.
-  #
+  #
   # We can represent the overlaps as the vector [0, 0, 1, 2].  Then we will apply
   # the inverse log linear transformation defined by:
-  #
+  #
   #   f(x_i) = x_i / ln(i + 1)
   #          = [0, 0, 1 / ln(4), 2 / ln(5)]
   #          = [0, 0, 0.7213475204444817, 1.2426698691192237]
-  #
+  #
   # Finally we take the average of the transformed vector and normalize it (to
   # ensure a final value between 0.0 and 1.0):
-  #
+  #
   #   norm(avg(SUM f(x_i))) = norm( avg(1.9640173895637054) )
   #                         = norm( 0.49100434739092635 )
   #                         = 0.49100434739092635 / avg(SUM f(1, 2, 3, 4))

data/lib/text_rank/keyword_extractor.rb CHANGED Viewed

@@ -41,7 +41,7 @@ module TextRank
     # @option options [Array<Class, Symbol, #filter!>]  :rank_filters A list of filters to be applied to the keyword ranks after keyword extraction
     def initialize(**options)
       @page_rank_options = {
-        strategy: options[:strategy] || :dense,
+        strategy: options[:strategy] || :sparse,
         damping: options[:damping],
         tolerance: options[:tolerance],
       }

data/lib/text_rank/rank_filter/collapse_adjacent.rb CHANGED Viewed

@@ -105,7 +105,8 @@ module TextRank
           # until all of the top N final keywords (single or collapsed) have been
           # considered.
           loop do
-            single_tokens_to_consider = @tokens.keys.first(@ranks_to_collapse + @to_remove.size - @to_collapse.size) - @to_remove.to_a
+            regexp_safe_tokens = @tokens.keys.select { |s| Regexp.escape(s) == s }
+            single_tokens_to_consider = regexp_safe_tokens.first(@ranks_to_collapse + @to_remove.size - @to_collapse.size) - @to_remove.to_a
             scan_text_for_all_permutations_of(single_tokens_to_consider) or break
             decide_what_to_collapse_and_what_to_remove
           end

data/lib/text_rank/version.rb CHANGED Viewed

@@ -1,4 +1,4 @@
 module TextRank
   # Current gem version
-  VERSION = '1.1.7'
+  VERSION = '1.2.0'
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: text_rank
 version: !ruby/object:Gem::Version
-  version: 1.1.7
+  version: 1.2.0
 platform: ruby
 authors:
 - David McCullars
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2016-07-05 00:00:00.000000000 Z
+date: 2016-10-19 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -183,7 +183,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.5.1
+rubygems_version: 2.6.7
 signing_key:
 specification_version: 4
 summary: Implementation of TextRank solution to ranked keyword extraction