text_rank 1.2.3 → 1.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e1daece496ee8bb075ecc3540d6b8ca64d45c740b1ca34529e094283933d9c97
4
- data.tar.gz: bee8ff77c21cffa95d838e91ae09773b290afd4b9c0224415c2d783f63069b1b
3
+ metadata.gz: 57c44c7be2d14f3fb9d2b76212639c6f5416adedeb32e3c9d39a6b90368680bd
4
+ data.tar.gz: 829bf430e9cc9dd942bdd753f801fc01d94929f42be09c6504691d47bc7c9f04
5
5
  SHA512:
6
- metadata.gz: 0e9df6c07d6c8bb94a782b61c7877e7fe2e4dd064645c87a0e636bae2236ffbf92e848426d5d8cb4daa04328a6afc25c0a58de3c14ba316c84bb07caa45801f8
7
- data.tar.gz: e7a10407dce5651a05aa208db1136a3c0cc1082c70152b676d3867408cc8107dc8543080e20b8855659b8b58c4fe0063321fc5e8b976933372ff02d6b597ca82
6
+ metadata.gz: 7bdb293e07cca83ba7665cbe3861360cd3e4b82d64d5cd2167717e509bff6ccf14948436463464a9ab57f92cd97b8e7a918d27d004f3a7c476feb5c55565191f
7
+ data.tar.gz: 446d28deeca6a972fc9b257b8699a859ec4fc481d6f04f31d02215d79a7251b87cfebeaba7335001e7977e974b72c3e79113c754d1749303dc943b3d59c6d62c
@@ -15,6 +15,10 @@ Layout/EmptyLinesAroundModuleBody:
15
15
  Layout/ExtraSpacing:
16
16
  Enabled: false
17
17
 
18
+ Layout/HashAlignment:
19
+ EnforcedHashRocketStyle: table
20
+ EnforcedColonStyle: table
21
+
18
22
  Layout/LineLength:
19
23
  Max: 120
20
24
  Enabled: false
@@ -89,6 +93,9 @@ Style/GuardClause:
89
93
  Style/HashEachMethods:
90
94
  Enabled: true
91
95
 
96
+ Style/HashSyntax:
97
+ Enabled: true
98
+
92
99
  Style/HashTransformKeys:
93
100
  Enabled: true
94
101
 
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require "bundler/setup"
4
- require "text_rank"
3
+ require 'bundler/setup'
4
+ require 'text_rank'
5
5
 
6
6
  # You can add fixtures and/or initialization code here to make experimenting
7
7
  # with your gem easier. You can also use a different console, if you like.
@@ -10,5 +10,5 @@ require "text_rank"
10
10
  # require "pry"
11
11
  # Pry.start
12
12
 
13
- require "irb"
13
+ require 'irb'
14
14
  IRB.start
@@ -1,3 +1,5 @@
1
+ require 'set'
2
+
1
3
  ##
2
4
  # A module for supporting Ruby implementations of PageRank. Rather than rely on
3
5
  # one single implementation, this module allows for multiple implementations that
@@ -19,8 +19,7 @@ module PageRank
19
19
  # @return [Float]
20
20
  def damping=(damping)
21
21
  @damping = damping || 0.85
22
- raise ArgumentError.new('Invalid damping factor') if @damping <= 0 || @damping > 1
23
- @damping
22
+ raise ArgumentError, 'Invalid damping factor' if @damping <= 0 || @damping > 1
24
23
  end
25
24
 
26
25
  # Set the tolerance value
@@ -28,8 +27,7 @@ module PageRank
28
27
  # @return [Float]
29
28
  def tolerance=(tolerance)
30
29
  @tolerance = tolerance || 0.0001
31
- raise ArgumentError.new('Invalid tolerance factor') if @tolerance < 0 || @tolerance > 1
32
- @tolerance
30
+ raise ArgumentError, 'Invalid tolerance factor' if @tolerance.negative? || @tolerance > 1
33
31
  end
34
32
 
35
33
  # Adds a directed (and optionally weighted) edge to the graph
@@ -46,9 +44,12 @@ module PageRank
46
44
  def calculate(max_iterations: -1, **_)
47
45
  ranks = initial_ranks
48
46
  loop do
49
- break if max_iterations == 0
50
- ranks, prev_ranks = calculate_step(ranks), ranks
47
+ break if max_iterations.zero?
48
+
49
+ prev_ranks = ranks
50
+ ranks = calculate_step(ranks)
51
51
  break if distance(ranks, prev_ranks) < @tolerance
52
+
52
53
  max_iterations -= 1
53
54
  end
54
55
  sort_ranks(ranks)
@@ -77,9 +78,9 @@ module PageRank
77
78
  end
78
79
 
79
80
  # Calculate the Euclidean distance from one ranking to the next iteration
80
- def distance(v1, v2)
81
+ def distance(vector1, vector2)
81
82
  sum_squares = node_count.times.reduce(0.0) do |sum, i|
82
- d = v1[i] - v2[i]
83
+ d = vector1[i] - vector2[i]
83
84
  sum + d * d
84
85
  end
85
86
  Math.sqrt(sum_squares)
@@ -32,6 +32,7 @@ module PageRank
32
32
  # @return (see Base#add)
33
33
  def add(source, dest, weight: 1.0)
34
34
  return if source == dest
35
+
35
36
  source_idx = index(source)
36
37
  dest_idx = index(dest)
37
38
  @out_links[source_idx] ||= []
@@ -72,7 +73,7 @@ module PageRank
72
73
 
73
74
  def to_matrix
74
75
  total_out_weights = @out_links.map do |links|
75
- links.compact.reduce(:+) if links
76
+ links&.compact&.reduce(:+)
76
77
  end
77
78
  Matrix.build(node_count, node_count) do |dest_idx, source_idx|
78
79
  total = total_out_weights[source_idx]
@@ -1,5 +1,3 @@
1
- require 'set'
2
-
3
1
  module PageRank
4
2
  ##
5
3
  # Implementation of PageRank using a sparse matrix representation of the graph
@@ -33,6 +31,7 @@ module PageRank
33
31
  # @return (see Base#add)
34
32
  def add(source, dest, weight: 1.0)
35
33
  return false if source == dest
34
+
36
35
  @graph[dest] ||= Set.new
37
36
  @graph[dest] << source
38
37
  @weights[source] ||= Hash.new(0.0)
@@ -53,8 +52,8 @@ module PageRank
53
52
  def initial_ranks
54
53
  @dangling_nodes = @nodes - @weight_totals.keys
55
54
  @normalized_weights = @weights.each_with_object({}) do |(source, values), h|
56
- h[source] = values.each_with_object({}) do |(dest, w), h2|
57
- h2[dest] = w / @weight_totals[source]
55
+ h[source] = values.transform_values do |w|
56
+ w / @weight_totals[source]
58
57
  end
59
58
  end
60
59
  Hash[@nodes.map { |k| [k, 1.0 / node_count.to_f] }]
@@ -69,7 +68,7 @@ module PageRank
69
68
  @dangling_nodes.each do |source|
70
69
  sum += ranks[source] / node_count.to_f
71
70
  end
72
- new_ranks[dest] = @damping * sum + (1 - @damping)/node_count
71
+ new_ranks[dest] = @damping * sum + (1 - @damping) / node_count
73
72
  end
74
73
  end
75
74
 
@@ -79,8 +78,8 @@ module PageRank
79
78
  Hash[ranks.map { |k, v| [k, v / sum] }.sort_by { |_, v| -v }]
80
79
  end
81
80
 
82
- def distance(v1, v2)
83
- super(v1.values.to_a, v2.values.to_a)
81
+ def distance(vector1, vector2)
82
+ super(vector1.values.to_a, vector2.values.to_a)
84
83
  end
85
84
 
86
85
  end
@@ -1,4 +1,6 @@
1
1
  require 'page_rank'
2
+ require 'set'
3
+ require 'yaml'
2
4
 
3
5
  ##
4
6
  # Provides convenience methods for quickly extracting keywords.
@@ -7,14 +9,15 @@ require 'page_rank'
7
9
  ##
8
10
  module TextRank
9
11
 
10
- autoload :CharFilter, 'text_rank/char_filter'
11
- autoload :Fingerprint, 'text_rank/fingerprint'
12
- autoload :GraphStrategy, 'text_rank/graph_strategy'
13
- autoload :KeywordExtractor, 'text_rank/keyword_extractor'
14
- autoload :RankFilter, 'text_rank/rank_filter'
15
- autoload :TokenFilter, 'text_rank/token_filter'
16
- autoload :Tokenizer, 'text_rank/tokenizer'
17
- autoload :VERSION, 'text_rank/version'
12
+ autoload :CharFilter, 'text_rank/char_filter'
13
+ autoload :Fingerprint, 'text_rank/fingerprint'
14
+ autoload :FingerprintOverlap, 'text_rank/fingerprint_overlap'
15
+ autoload :GraphStrategy, 'text_rank/graph_strategy'
16
+ autoload :KeywordExtractor, 'text_rank/keyword_extractor'
17
+ autoload :RankFilter, 'text_rank/rank_filter'
18
+ autoload :TokenFilter, 'text_rank/token_filter'
19
+ autoload :Tokenizer, 'text_rank/tokenizer'
20
+ autoload :VERSION, 'text_rank/version'
18
21
 
19
22
  # A convenience method for quickly extracting keywords from text with default options
20
23
  # @param text [String] text from which to extract keywords
@@ -7,7 +7,7 @@ module TextRank
7
7
  # converting non-ascii characters to related ascii characters, forcing text to
8
8
  # lower case, stripping out HTML, converting English contractions (e.g. "won't")
9
9
  # to the non-contracted form ("will not"), and more.
10
- #
10
+ #
11
11
  # Character filters are applied as a chain, so care should be taken to use them
12
12
  # in the desired order.
13
13
  ##
@@ -1,13 +1,17 @@
1
- # coding: utf-8
2
1
  module TextRank
3
2
  module CharFilter
4
3
  ##
5
4
  # Characater filter to transform non-ASCII (unicode) characters into ASCII-friendly versions.
6
5
  #
6
+ # rubocop:disable Style/AsciiComments
7
+ #
7
8
  # = Example
8
9
  #
9
10
  # AsciiFolding.new.filter!("the Perigordian Abbé then made answer, because a poor beggar of the country of Atrébatie heard some foolish things said")
10
11
  # => "the Perigordian Abbe then made answer, because a poor beggar of the country of Atrebatie heard some foolish things said"
12
+ #
13
+ # rubocop:enable Style/AsciiComments
14
+ #
11
15
  ##
12
16
  class AsciiFolding
13
17
 
@@ -5,7 +5,7 @@ module TextRank
5
5
  #
6
6
  # = Example
7
7
  #
8
- # StripPosessive.new.filter!("to loathe ones very being and yet to hold it fast")
8
+ # StripPosessive.new.filter!("to loathe one's very being and yet to hold it fast")
9
9
  # => "to loathe one very being and yet to hold it fast"
10
10
  ##
11
11
  class StripPossessive
@@ -15,7 +15,7 @@ module TextRank
15
15
  # @return [String]
16
16
  def filter!(text)
17
17
  text.gsub!(/([a-z]+)'s\b/) do
18
- $1
18
+ Regexp.last_match(1)
19
19
  end
20
20
  end
21
21
 
@@ -11,143 +11,7 @@ module TextRank
11
11
  class UndoContractions
12
12
 
13
13
  # List of English contractions to undo
14
- CONTRACTIONS = {
15
- "ain't" => "am not",
16
- "amn't" => "am not",
17
- "aren't" => "are not",
18
- "can't" => "can not",
19
- "could've" => "could have",
20
- "couldn't" => "could not",
21
- "couldn't've" => "could not have",
22
- "didn't" => "did not",
23
- "doesn't" => "does not",
24
- "don't" => "do not",
25
- "gonna" => "going to",
26
- "hadn't" => "had not",
27
- "hadn't've" => "had not have",
28
- "hasn't" => "has not",
29
- "haven't" => "have not",
30
- "he'd" => "he had",
31
- "he'd've" => "he would have",
32
- "he'll" => "he shall",
33
- "he's" => "he has",
34
- "he'sn't" => "he has not",
35
- "how'd" => "how did",
36
- "how'll" => "how will",
37
- "how's" => "how has",
38
- "i'd" => "i had",
39
- "i'd've" => "i would have",
40
- "i'll" => "i shall",
41
- "i'm" => "i am",
42
- "i've" => "i have",
43
- "i'ven't" => "i have not",
44
- "isn't" => "is not",
45
- "it'd" => "it had",
46
- "it'd've" => "it would have",
47
- "it'll" => "it shall",
48
- "it's" => "it has",
49
- "it'sn't" => "it has not",
50
- "let's" => "let us",
51
- "ma'am" => "madam",
52
- "mightn't" => "might not",
53
- "mightn't've" => "might not have",
54
- "might've" => "might have",
55
- "mustn't" => "must not",
56
- "must've" => "must have",
57
- "needn't" => "need not",
58
- "not've" => "not have",
59
- "o'clock" => "of the clock",
60
- "ol'" => "old",
61
- "oughtn't" => "ought not",
62
- "shan't" => "shall not",
63
- "she'd" => "she had",
64
- "she'd've" => "she would have",
65
- "she'll" => "she shall",
66
- "she's" => "she has",
67
- "she'sn't" => "she has not",
68
- "should've" => "should have",
69
- "shouldn't" => "should not",
70
- "shouldn't've" => "should not have",
71
- "somebody'd" => "somebody had",
72
- "somebody'd've" => "somebody would have",
73
- "somebody'dn't've" => "somebody would not have",
74
- "somebody'll" => "somebody shall",
75
- "somebody's" => "somebody has",
76
- "someone'd" => "someone had",
77
- "someone'd've" => "someone would have",
78
- "someone'll" => "someone shall",
79
- "someone's" => "someone has",
80
- "something'd" => "something had",
81
- "something'd've" => "something would have",
82
- "something'll" => "something shall",
83
- "something's" => "something has",
84
- "'sup" => "what's up",
85
- "that'll" => "that will",
86
- "that's" => "that has",
87
- "there'd" => "there had",
88
- "there'd've" => "there would have",
89
- "there're" => "there are",
90
- "there's" => "there has",
91
- "they'd" => "they had",
92
- "they'dn't" => "they would not",
93
- "they'dn't've" => "they would not have",
94
- "they'd've" => "they would have",
95
- "they'd'ven't" => "they would have not",
96
- "they'll" => "they shall",
97
- "they'lln't've" => "they will not have",
98
- "they'll'ven't" => "they will have not",
99
- "they're" => "they are",
100
- "they've" => "they have",
101
- "they'ven't" => "they have not",
102
- "'tis" => "it is",
103
- "'twas" => "it was",
104
- "wanna" => "want to",
105
- "wasn't" => "was not",
106
- "we'd" => "we had",
107
- "we'd've" => "we would have",
108
- "we'dn't've" => "we would not have",
109
- "we'll" => "we will",
110
- "we'lln't've" => "we will not have",
111
- "we're" => "we are",
112
- "we've" => "we have",
113
- "weren't" => "were not",
114
- "what'll" => "what shall",
115
- "what're" => "what are",
116
- "what's" => "what has",
117
- "what've" => "what have",
118
- "when's" => "when has",
119
- "where'd" => "where did",
120
- "where's" => "where has",
121
- "where've" => "where have",
122
- "who'd" => "who would",
123
- "who'd've" => "who would have",
124
- "who'll" => "who shall",
125
- "who're" => "who are",
126
- "who's" => "who has",
127
- "who've" => "who have",
128
- "why'll" => "why will",
129
- "why're" => "why are",
130
- "why's" => "why has",
131
- "won't" => "will not",
132
- "won't've" => "will not have",
133
- "would've" => "would have",
134
- "wouldn't" => "would not",
135
- "wouldn't've" => "would not have",
136
- "y'all" => "you all",
137
- "y'all'd've" => "you all would have",
138
- "y'all'dn't've" => "you all would not have",
139
- "y'all'll" => "you all will",
140
- "y'all'lln't" => "you all will not",
141
- "y'all'll've" => "you all will have",
142
- "y'all'll'ven't" => "you all will have not",
143
- "you'd" => "you had",
144
- "you'd've" => "you would have",
145
- "you'll" => "you shall",
146
- "you're" => "you are",
147
- "you'ren't" => "you are not",
148
- "you've" => "you have",
149
- "you'ven't" => "you have not",
150
- }
14
+ CONTRACTIONS = YAML.load_file(File.expand_path('undo_contractions.yml', __dir__))
151
15
 
152
16
  # Perform the filter
153
17
  # @param text [String]
@@ -0,0 +1,135 @@
1
+ ain't: am not
2
+ amn't: am not
3
+ aren't: are not
4
+ can't: can not
5
+ could've: could have
6
+ couldn't: could not
7
+ couldn't've: could not have
8
+ didn't: did not
9
+ doesn't: does not
10
+ don't: do not
11
+ gonna: going to
12
+ hadn't: had not
13
+ hadn't've: had not have
14
+ hasn't: has not
15
+ haven't: have not
16
+ he'd: he had
17
+ he'd've: he would have
18
+ he'll: he shall
19
+ he's: he has
20
+ he'sn't: he has not
21
+ how'd: how did
22
+ how'll: how will
23
+ how's: how has
24
+ i'd: i had
25
+ i'd've: i would have
26
+ i'll: i shall
27
+ i'm: i am
28
+ i've: i have
29
+ i'ven't: i have not
30
+ isn't: is not
31
+ it'd: it had
32
+ it'd've: it would have
33
+ it'll: it shall
34
+ it's: it has
35
+ it'sn't: it has not
36
+ let's: let us
37
+ ma'am: madam
38
+ mightn't: might not
39
+ mightn't've: might not have
40
+ might've: might have
41
+ mustn't: must not
42
+ must've: must have
43
+ needn't: need not
44
+ not've: not have
45
+ o'clock: of the clock
46
+ ol': old
47
+ oughtn't: ought not
48
+ shan't: shall not
49
+ she'd: she had
50
+ she'd've: she would have
51
+ she'll: she shall
52
+ she's: she has
53
+ she'sn't: she has not
54
+ should've: should have
55
+ shouldn't: should not
56
+ shouldn't've: should not have
57
+ somebody'd: somebody had
58
+ somebody'd've: somebody would have
59
+ somebody'dn't've: somebody would not have
60
+ somebody'll: somebody shall
61
+ somebody's: somebody has
62
+ someone'd: someone had
63
+ someone'd've: someone would have
64
+ someone'll: someone shall
65
+ someone's: someone has
66
+ something'd: something had
67
+ something'd've: something would have
68
+ something'll: something shall
69
+ something's: something has
70
+ "'sup": "what's up"
71
+ that'll: that will
72
+ that's: that has
73
+ there'd: there had
74
+ there'd've: there would have
75
+ there're: there are
76
+ there's: there has
77
+ they'd: they had
78
+ they'dn't: they would not
79
+ they'dn't've: they would not have
80
+ they'd've: they would have
81
+ they'd'ven't: they would have not
82
+ they'll: they shall
83
+ they'lln't've: they will not have
84
+ they'll'ven't: they will have not
85
+ they're: they are
86
+ they've: they have
87
+ they'ven't: they have not
88
+ "'tis": it is
89
+ "'twas": it was
90
+ wanna: want to
91
+ wasn't: was not
92
+ we'd: we had
93
+ we'd've: we would have
94
+ we'dn't've: we would not have
95
+ we'll: we will
96
+ we'lln't've: we will not have
97
+ we're: we are
98
+ we've: we have
99
+ weren't: were not
100
+ what'll: what shall
101
+ what're: what are
102
+ what's: what has
103
+ what've: what have
104
+ when's: when has
105
+ where'd: where did
106
+ where's: where has
107
+ where've: where have
108
+ who'd: who would
109
+ who'd've: who would have
110
+ who'll: who shall
111
+ who're: who are
112
+ who's: who has
113
+ who've: who have
114
+ why'll: why will
115
+ why're: why are
116
+ why's: why has
117
+ won't: will not
118
+ won't've: will not have
119
+ would've: would have
120
+ wouldn't: would not
121
+ wouldn't've: would not have
122
+ y'all: you all
123
+ y'all'd've: you all would have
124
+ y'all'dn't've: you all would not have
125
+ y'all'll: you all will
126
+ y'all'lln't: you all will not
127
+ y'all'll've: you all will have
128
+ y'all'll'ven't: you all will have not
129
+ you'd: you had
130
+ you'd've: you would have
131
+ you'll: you shall
132
+ you're: you are
133
+ you'ren't: you are not
134
+ you've: you have
135
+ you'ven't: you have not