text_rank 1.2.3 → 1.2.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +7 -0
- data/bin/console +3 -3
- data/lib/page_rank.rb +2 -0
- data/lib/page_rank/base.rb +9 -8
- data/lib/page_rank/dense.rb +2 -1
- data/lib/page_rank/sparse.rb +6 -7
- data/lib/text_rank.rb +11 -8
- data/lib/text_rank/char_filter.rb +1 -1
- data/lib/text_rank/char_filter/ascii_folding.rb +5 -1
- data/lib/text_rank/char_filter/strip_possessive.rb +2 -2
- data/lib/text_rank/char_filter/undo_contractions.rb +1 -137
- data/lib/text_rank/char_filter/undo_contractions.yml +135 -0
- data/lib/text_rank/fingerprint.rb +10 -18
- data/lib/text_rank/fingerprint_overlap.rb +55 -0
- data/lib/text_rank/graph_strategy/coocurrence.rb +15 -6
- data/lib/text_rank/keyword_extractor.rb +19 -21
- data/lib/text_rank/rank_filter/collapse_adjacent.rb +48 -25
- data/lib/text_rank/rank_filter/normalize_probability.rb +2 -1
- data/lib/text_rank/rank_filter/normalize_unit_vector.rb +2 -1
- data/lib/text_rank/token_filter/part_of_speech.rb +0 -1
- data/lib/text_rank/token_filter/stopwords.rb +1 -321
- data/lib/text_rank/token_filter/stopwords.yml +317 -0
- data/lib/text_rank/tokenizer.rb +1 -1
- data/lib/text_rank/tokenizer/money.rb +11 -6
- data/lib/text_rank/tokenizer/number.rb +4 -3
- data/lib/text_rank/tokenizer/punctuation.rb +4 -1
- data/lib/text_rank/tokenizer/url.rb +3 -0
- data/lib/text_rank/tokenizer/whitespace.rb +4 -1
- data/lib/text_rank/tokenizer/word.rb +5 -2
- data/lib/text_rank/version.rb +3 -1
- metadata +4 -1
@@ -1,14 +1,17 @@
|
|
1
1
|
module TextRank
|
2
2
|
module Tokenizer
|
3
|
+
|
3
4
|
##
|
4
5
|
# A tokenizer regex that preserves a non-space, non-punctuation "word". It does
|
5
6
|
# allow hyphens and numerals, but the first character must be an A-Z character.
|
6
7
|
##
|
7
|
-
|
8
|
+
# rubocop:disable Naming/ConstantName
|
9
|
+
Word = /
|
8
10
|
(
|
9
11
|
[a-z][a-z0-9-]*
|
10
12
|
)
|
11
|
-
|
13
|
+
/xi
|
14
|
+
# rubocop:enable Naming/ConstantName
|
12
15
|
|
13
16
|
end
|
14
17
|
end
|
data/lib/text_rank/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text_rank
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David McCullars
|
@@ -141,7 +141,9 @@ files:
|
|
141
141
|
- lib/text_rank/char_filter/strip_html.rb
|
142
142
|
- lib/text_rank/char_filter/strip_possessive.rb
|
143
143
|
- lib/text_rank/char_filter/undo_contractions.rb
|
144
|
+
- lib/text_rank/char_filter/undo_contractions.yml
|
144
145
|
- lib/text_rank/fingerprint.rb
|
146
|
+
- lib/text_rank/fingerprint_overlap.rb
|
145
147
|
- lib/text_rank/graph_strategy.rb
|
146
148
|
- lib/text_rank/graph_strategy/coocurrence.rb
|
147
149
|
- lib/text_rank/keyword_extractor.rb
|
@@ -154,6 +156,7 @@ files:
|
|
154
156
|
- lib/text_rank/token_filter/min_length.rb
|
155
157
|
- lib/text_rank/token_filter/part_of_speech.rb
|
156
158
|
- lib/text_rank/token_filter/stopwords.rb
|
159
|
+
- lib/text_rank/token_filter/stopwords.yml
|
157
160
|
- lib/text_rank/tokenizer.rb
|
158
161
|
- lib/text_rank/tokenizer/money.rb
|
159
162
|
- lib/text_rank/tokenizer/number.rb
|