text_rank 1.2.3 → 1.2.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e1daece496ee8bb075ecc3540d6b8ca64d45c740b1ca34529e094283933d9c97
4
- data.tar.gz: bee8ff77c21cffa95d838e91ae09773b290afd4b9c0224415c2d783f63069b1b
3
+ metadata.gz: 57c44c7be2d14f3fb9d2b76212639c6f5416adedeb32e3c9d39a6b90368680bd
4
+ data.tar.gz: 829bf430e9cc9dd942bdd753f801fc01d94929f42be09c6504691d47bc7c9f04
5
5
  SHA512:
6
- metadata.gz: 0e9df6c07d6c8bb94a782b61c7877e7fe2e4dd064645c87a0e636bae2236ffbf92e848426d5d8cb4daa04328a6afc25c0a58de3c14ba316c84bb07caa45801f8
7
- data.tar.gz: e7a10407dce5651a05aa208db1136a3c0cc1082c70152b676d3867408cc8107dc8543080e20b8855659b8b58c4fe0063321fc5e8b976933372ff02d6b597ca82
6
+ metadata.gz: 7bdb293e07cca83ba7665cbe3861360cd3e4b82d64d5cd2167717e509bff6ccf14948436463464a9ab57f92cd97b8e7a918d27d004f3a7c476feb5c55565191f
7
+ data.tar.gz: 446d28deeca6a972fc9b257b8699a859ec4fc481d6f04f31d02215d79a7251b87cfebeaba7335001e7977e974b72c3e79113c754d1749303dc943b3d59c6d62c
@@ -15,6 +15,10 @@ Layout/EmptyLinesAroundModuleBody:
15
15
  Layout/ExtraSpacing:
16
16
  Enabled: false
17
17
 
18
+ Layout/HashAlignment:
19
+ EnforcedHashRocketStyle: table
20
+ EnforcedColonStyle: table
21
+
18
22
  Layout/LineLength:
19
23
  Max: 120
20
24
  Enabled: false
@@ -89,6 +93,9 @@ Style/GuardClause:
89
93
  Style/HashEachMethods:
90
94
  Enabled: true
91
95
 
96
+ Style/HashSyntax:
97
+ Enabled: true
98
+
92
99
  Style/HashTransformKeys:
93
100
  Enabled: true
94
101
 
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require "bundler/setup"
4
- require "text_rank"
3
+ require 'bundler/setup'
4
+ require 'text_rank'
5
5
 
6
6
  # You can add fixtures and/or initialization code here to make experimenting
7
7
  # with your gem easier. You can also use a different console, if you like.
@@ -10,5 +10,5 @@ require "text_rank"
10
10
  # require "pry"
11
11
  # Pry.start
12
12
 
13
- require "irb"
13
+ require 'irb'
14
14
  IRB.start
@@ -1,3 +1,5 @@
1
+ require 'set'
2
+
1
3
  ##
2
4
  # A module for supporting Ruby implementations of PageRank. Rather than rely on
3
5
  # one single implementation, this module allows for multiple implementations that
@@ -19,8 +19,7 @@ module PageRank
19
19
  # @return [Float]
20
20
  def damping=(damping)
21
21
  @damping = damping || 0.85
22
- raise ArgumentError.new('Invalid damping factor') if @damping <= 0 || @damping > 1
23
- @damping
22
+ raise ArgumentError, 'Invalid damping factor' if @damping <= 0 || @damping > 1
24
23
  end
25
24
 
26
25
  # Set the tolerance value
@@ -28,8 +27,7 @@ module PageRank
28
27
  # @return [Float]
29
28
  def tolerance=(tolerance)
30
29
  @tolerance = tolerance || 0.0001
31
- raise ArgumentError.new('Invalid tolerance factor') if @tolerance < 0 || @tolerance > 1
32
- @tolerance
30
+ raise ArgumentError, 'Invalid tolerance factor' if @tolerance.negative? || @tolerance > 1
33
31
  end
34
32
 
35
33
  # Adds a directed (and optionally weighted) edge to the graph
@@ -46,9 +44,12 @@ module PageRank
46
44
  def calculate(max_iterations: -1, **_)
47
45
  ranks = initial_ranks
48
46
  loop do
49
- break if max_iterations == 0
50
- ranks, prev_ranks = calculate_step(ranks), ranks
47
+ break if max_iterations.zero?
48
+
49
+ prev_ranks = ranks
50
+ ranks = calculate_step(ranks)
51
51
  break if distance(ranks, prev_ranks) < @tolerance
52
+
52
53
  max_iterations -= 1
53
54
  end
54
55
  sort_ranks(ranks)
@@ -77,9 +78,9 @@ module PageRank
77
78
  end
78
79
 
79
80
  # Calculate the Euclidean distance from one ranking to the next iteration
80
- def distance(v1, v2)
81
+ def distance(vector1, vector2)
81
82
  sum_squares = node_count.times.reduce(0.0) do |sum, i|
82
- d = v1[i] - v2[i]
83
+ d = vector1[i] - vector2[i]
83
84
  sum + d * d
84
85
  end
85
86
  Math.sqrt(sum_squares)
@@ -32,6 +32,7 @@ module PageRank
32
32
  # @return (see Base#add)
33
33
  def add(source, dest, weight: 1.0)
34
34
  return if source == dest
35
+
35
36
  source_idx = index(source)
36
37
  dest_idx = index(dest)
37
38
  @out_links[source_idx] ||= []
@@ -72,7 +73,7 @@ module PageRank
72
73
 
73
74
  def to_matrix
74
75
  total_out_weights = @out_links.map do |links|
75
- links.compact.reduce(:+) if links
76
+ links&.compact&.reduce(:+)
76
77
  end
77
78
  Matrix.build(node_count, node_count) do |dest_idx, source_idx|
78
79
  total = total_out_weights[source_idx]
@@ -1,5 +1,3 @@
1
- require 'set'
2
-
3
1
  module PageRank
4
2
  ##
5
3
  # Implementation of PageRank using a sparse matrix representation of the graph
@@ -33,6 +31,7 @@ module PageRank
33
31
  # @return (see Base#add)
34
32
  def add(source, dest, weight: 1.0)
35
33
  return false if source == dest
34
+
36
35
  @graph[dest] ||= Set.new
37
36
  @graph[dest] << source
38
37
  @weights[source] ||= Hash.new(0.0)
@@ -53,8 +52,8 @@ module PageRank
53
52
  def initial_ranks
54
53
  @dangling_nodes = @nodes - @weight_totals.keys
55
54
  @normalized_weights = @weights.each_with_object({}) do |(source, values), h|
56
- h[source] = values.each_with_object({}) do |(dest, w), h2|
57
- h2[dest] = w / @weight_totals[source]
55
+ h[source] = values.transform_values do |w|
56
+ w / @weight_totals[source]
58
57
  end
59
58
  end
60
59
  Hash[@nodes.map { |k| [k, 1.0 / node_count.to_f] }]
@@ -69,7 +68,7 @@ module PageRank
69
68
  @dangling_nodes.each do |source|
70
69
  sum += ranks[source] / node_count.to_f
71
70
  end
72
- new_ranks[dest] = @damping * sum + (1 - @damping)/node_count
71
+ new_ranks[dest] = @damping * sum + (1 - @damping) / node_count
73
72
  end
74
73
  end
75
74
 
@@ -79,8 +78,8 @@ module PageRank
79
78
  Hash[ranks.map { |k, v| [k, v / sum] }.sort_by { |_, v| -v }]
80
79
  end
81
80
 
82
- def distance(v1, v2)
83
- super(v1.values.to_a, v2.values.to_a)
81
+ def distance(vector1, vector2)
82
+ super(vector1.values.to_a, vector2.values.to_a)
84
83
  end
85
84
 
86
85
  end
@@ -1,4 +1,6 @@
1
1
  require 'page_rank'
2
+ require 'set'
3
+ require 'yaml'
2
4
 
3
5
  ##
4
6
  # Provides convenience methods for quickly extracting keywords.
@@ -7,14 +9,15 @@ require 'page_rank'
7
9
  ##
8
10
  module TextRank
9
11
 
10
- autoload :CharFilter, 'text_rank/char_filter'
11
- autoload :Fingerprint, 'text_rank/fingerprint'
12
- autoload :GraphStrategy, 'text_rank/graph_strategy'
13
- autoload :KeywordExtractor, 'text_rank/keyword_extractor'
14
- autoload :RankFilter, 'text_rank/rank_filter'
15
- autoload :TokenFilter, 'text_rank/token_filter'
16
- autoload :Tokenizer, 'text_rank/tokenizer'
17
- autoload :VERSION, 'text_rank/version'
12
+ autoload :CharFilter, 'text_rank/char_filter'
13
+ autoload :Fingerprint, 'text_rank/fingerprint'
14
+ autoload :FingerprintOverlap, 'text_rank/fingerprint_overlap'
15
+ autoload :GraphStrategy, 'text_rank/graph_strategy'
16
+ autoload :KeywordExtractor, 'text_rank/keyword_extractor'
17
+ autoload :RankFilter, 'text_rank/rank_filter'
18
+ autoload :TokenFilter, 'text_rank/token_filter'
19
+ autoload :Tokenizer, 'text_rank/tokenizer'
20
+ autoload :VERSION, 'text_rank/version'
18
21
 
19
22
  # A convenience method for quickly extracting keywords from text with default options
20
23
  # @param text [String] text from which to extract keywords
@@ -7,7 +7,7 @@ module TextRank
7
7
  # converting non-ascii characters to related ascii characters, forcing text to
8
8
  # lower case, stripping out HTML, converting English contractions (e.g. "won't")
9
9
  # to the non-contracted form ("will not"), and more.
10
- #
10
+ #
11
11
  # Character filters are applied as a chain, so care should be taken to use them
12
12
  # in the desired order.
13
13
  ##
@@ -1,13 +1,17 @@
1
- # coding: utf-8
2
1
  module TextRank
3
2
  module CharFilter
4
3
  ##
5
4
  # Characater filter to transform non-ASCII (unicode) characters into ASCII-friendly versions.
6
5
  #
6
+ # rubocop:disable Style/AsciiComments
7
+ #
7
8
  # = Example
8
9
  #
9
10
  # AsciiFolding.new.filter!("the Perigordian Abbé then made answer, because a poor beggar of the country of Atrébatie heard some foolish things said")
10
11
  # => "the Perigordian Abbe then made answer, because a poor beggar of the country of Atrebatie heard some foolish things said"
12
+ #
13
+ # rubocop:enable Style/AsciiComments
14
+ #
11
15
  ##
12
16
  class AsciiFolding
13
17
 
@@ -5,7 +5,7 @@ module TextRank
5
5
  #
6
6
  # = Example
7
7
  #
8
- # StripPosessive.new.filter!("to loathe ones very being and yet to hold it fast")
8
+ # StripPosessive.new.filter!("to loathe one's very being and yet to hold it fast")
9
9
  # => "to loathe one very being and yet to hold it fast"
10
10
  ##
11
11
  class StripPossessive
@@ -15,7 +15,7 @@ module TextRank
15
15
  # @return [String]
16
16
  def filter!(text)
17
17
  text.gsub!(/([a-z]+)'s\b/) do
18
- $1
18
+ Regexp.last_match(1)
19
19
  end
20
20
  end
21
21
 
@@ -11,143 +11,7 @@ module TextRank
11
11
  class UndoContractions
12
12
 
13
13
  # List of English contractions to undo
14
- CONTRACTIONS = {
15
- "ain't" => "am not",
16
- "amn't" => "am not",
17
- "aren't" => "are not",
18
- "can't" => "can not",
19
- "could've" => "could have",
20
- "couldn't" => "could not",
21
- "couldn't've" => "could not have",
22
- "didn't" => "did not",
23
- "doesn't" => "does not",
24
- "don't" => "do not",
25
- "gonna" => "going to",
26
- "hadn't" => "had not",
27
- "hadn't've" => "had not have",
28
- "hasn't" => "has not",
29
- "haven't" => "have not",
30
- "he'd" => "he had",
31
- "he'd've" => "he would have",
32
- "he'll" => "he shall",
33
- "he's" => "he has",
34
- "he'sn't" => "he has not",
35
- "how'd" => "how did",
36
- "how'll" => "how will",
37
- "how's" => "how has",
38
- "i'd" => "i had",
39
- "i'd've" => "i would have",
40
- "i'll" => "i shall",
41
- "i'm" => "i am",
42
- "i've" => "i have",
43
- "i'ven't" => "i have not",
44
- "isn't" => "is not",
45
- "it'd" => "it had",
46
- "it'd've" => "it would have",
47
- "it'll" => "it shall",
48
- "it's" => "it has",
49
- "it'sn't" => "it has not",
50
- "let's" => "let us",
51
- "ma'am" => "madam",
52
- "mightn't" => "might not",
53
- "mightn't've" => "might not have",
54
- "might've" => "might have",
55
- "mustn't" => "must not",
56
- "must've" => "must have",
57
- "needn't" => "need not",
58
- "not've" => "not have",
59
- "o'clock" => "of the clock",
60
- "ol'" => "old",
61
- "oughtn't" => "ought not",
62
- "shan't" => "shall not",
63
- "she'd" => "she had",
64
- "she'd've" => "she would have",
65
- "she'll" => "she shall",
66
- "she's" => "she has",
67
- "she'sn't" => "she has not",
68
- "should've" => "should have",
69
- "shouldn't" => "should not",
70
- "shouldn't've" => "should not have",
71
- "somebody'd" => "somebody had",
72
- "somebody'd've" => "somebody would have",
73
- "somebody'dn't've" => "somebody would not have",
74
- "somebody'll" => "somebody shall",
75
- "somebody's" => "somebody has",
76
- "someone'd" => "someone had",
77
- "someone'd've" => "someone would have",
78
- "someone'll" => "someone shall",
79
- "someone's" => "someone has",
80
- "something'd" => "something had",
81
- "something'd've" => "something would have",
82
- "something'll" => "something shall",
83
- "something's" => "something has",
84
- "'sup" => "what's up",
85
- "that'll" => "that will",
86
- "that's" => "that has",
87
- "there'd" => "there had",
88
- "there'd've" => "there would have",
89
- "there're" => "there are",
90
- "there's" => "there has",
91
- "they'd" => "they had",
92
- "they'dn't" => "they would not",
93
- "they'dn't've" => "they would not have",
94
- "they'd've" => "they would have",
95
- "they'd'ven't" => "they would have not",
96
- "they'll" => "they shall",
97
- "they'lln't've" => "they will not have",
98
- "they'll'ven't" => "they will have not",
99
- "they're" => "they are",
100
- "they've" => "they have",
101
- "they'ven't" => "they have not",
102
- "'tis" => "it is",
103
- "'twas" => "it was",
104
- "wanna" => "want to",
105
- "wasn't" => "was not",
106
- "we'd" => "we had",
107
- "we'd've" => "we would have",
108
- "we'dn't've" => "we would not have",
109
- "we'll" => "we will",
110
- "we'lln't've" => "we will not have",
111
- "we're" => "we are",
112
- "we've" => "we have",
113
- "weren't" => "were not",
114
- "what'll" => "what shall",
115
- "what're" => "what are",
116
- "what's" => "what has",
117
- "what've" => "what have",
118
- "when's" => "when has",
119
- "where'd" => "where did",
120
- "where's" => "where has",
121
- "where've" => "where have",
122
- "who'd" => "who would",
123
- "who'd've" => "who would have",
124
- "who'll" => "who shall",
125
- "who're" => "who are",
126
- "who's" => "who has",
127
- "who've" => "who have",
128
- "why'll" => "why will",
129
- "why're" => "why are",
130
- "why's" => "why has",
131
- "won't" => "will not",
132
- "won't've" => "will not have",
133
- "would've" => "would have",
134
- "wouldn't" => "would not",
135
- "wouldn't've" => "would not have",
136
- "y'all" => "you all",
137
- "y'all'd've" => "you all would have",
138
- "y'all'dn't've" => "you all would not have",
139
- "y'all'll" => "you all will",
140
- "y'all'lln't" => "you all will not",
141
- "y'all'll've" => "you all will have",
142
- "y'all'll'ven't" => "you all will have not",
143
- "you'd" => "you had",
144
- "you'd've" => "you would have",
145
- "you'll" => "you shall",
146
- "you're" => "you are",
147
- "you'ren't" => "you are not",
148
- "you've" => "you have",
149
- "you'ven't" => "you have not",
150
- }
14
+ CONTRACTIONS = YAML.load_file(File.expand_path('undo_contractions.yml', __dir__))
151
15
 
152
16
  # Perform the filter
153
17
  # @param text [String]
@@ -0,0 +1,135 @@
1
+ ain't: am not
2
+ amn't: am not
3
+ aren't: are not
4
+ can't: can not
5
+ could've: could have
6
+ couldn't: could not
7
+ couldn't've: could not have
8
+ didn't: did not
9
+ doesn't: does not
10
+ don't: do not
11
+ gonna: going to
12
+ hadn't: had not
13
+ hadn't've: had not have
14
+ hasn't: has not
15
+ haven't: have not
16
+ he'd: he had
17
+ he'd've: he would have
18
+ he'll: he shall
19
+ he's: he has
20
+ he'sn't: he has not
21
+ how'd: how did
22
+ how'll: how will
23
+ how's: how has
24
+ i'd: i had
25
+ i'd've: i would have
26
+ i'll: i shall
27
+ i'm: i am
28
+ i've: i have
29
+ i'ven't: i have not
30
+ isn't: is not
31
+ it'd: it had
32
+ it'd've: it would have
33
+ it'll: it shall
34
+ it's: it has
35
+ it'sn't: it has not
36
+ let's: let us
37
+ ma'am: madam
38
+ mightn't: might not
39
+ mightn't've: might not have
40
+ might've: might have
41
+ mustn't: must not
42
+ must've: must have
43
+ needn't: need not
44
+ not've: not have
45
+ o'clock: of the clock
46
+ ol': old
47
+ oughtn't: ought not
48
+ shan't: shall not
49
+ she'd: she had
50
+ she'd've: she would have
51
+ she'll: she shall
52
+ she's: she has
53
+ she'sn't: she has not
54
+ should've: should have
55
+ shouldn't: should not
56
+ shouldn't've: should not have
57
+ somebody'd: somebody had
58
+ somebody'd've: somebody would have
59
+ somebody'dn't've: somebody would not have
60
+ somebody'll: somebody shall
61
+ somebody's: somebody has
62
+ someone'd: someone had
63
+ someone'd've: someone would have
64
+ someone'll: someone shall
65
+ someone's: someone has
66
+ something'd: something had
67
+ something'd've: something would have
68
+ something'll: something shall
69
+ something's: something has
70
+ "'sup": "what's up"
71
+ that'll: that will
72
+ that's: that has
73
+ there'd: there had
74
+ there'd've: there would have
75
+ there're: there are
76
+ there's: there has
77
+ they'd: they had
78
+ they'dn't: they would not
79
+ they'dn't've: they would not have
80
+ they'd've: they would have
81
+ they'd'ven't: they would have not
82
+ they'll: they shall
83
+ they'lln't've: they will not have
84
+ they'll'ven't: they will have not
85
+ they're: they are
86
+ they've: they have
87
+ they'ven't: they have not
88
+ "'tis": it is
89
+ "'twas": it was
90
+ wanna: want to
91
+ wasn't: was not
92
+ we'd: we had
93
+ we'd've: we would have
94
+ we'dn't've: we would not have
95
+ we'll: we will
96
+ we'lln't've: we will not have
97
+ we're: we are
98
+ we've: we have
99
+ weren't: were not
100
+ what'll: what shall
101
+ what're: what are
102
+ what's: what has
103
+ what've: what have
104
+ when's: when has
105
+ where'd: where did
106
+ where's: where has
107
+ where've: where have
108
+ who'd: who would
109
+ who'd've: who would have
110
+ who'll: who shall
111
+ who're: who are
112
+ who's: who has
113
+ who've: who have
114
+ why'll: why will
115
+ why're: why are
116
+ why's: why has
117
+ won't: will not
118
+ won't've: will not have
119
+ would've: would have
120
+ wouldn't: would not
121
+ wouldn't've: would not have
122
+ y'all: you all
123
+ y'all'd've: you all would have
124
+ y'all'dn't've: you all would not have
125
+ y'all'll: you all will
126
+ y'all'lln't: you all will not
127
+ y'all'll've: you all will have
128
+ y'all'll'ven't: you all will have not
129
+ you'd: you had
130
+ you'd've: you would have
131
+ you'll: you shall
132
+ you're: you are
133
+ you'ren't: you are not
134
+ you've: you have
135
+ you'ven't: you have not