text_rank 1.1.6 → 1.2.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (38) hide show
  1. checksums.yaml +5 -5
  2. data/.codeclimate.yml +1 -6
  3. data/.rubocop.yml +60 -1075
  4. data/.ruby-version +1 -1
  5. data/.travis.yml +13 -5
  6. data/{LICENSE.txt → LICENSE} +0 -0
  7. data/README.md +2 -1
  8. data/bin/console +3 -3
  9. data/lib/page_rank.rb +2 -0
  10. data/lib/page_rank/base.rb +9 -8
  11. data/lib/page_rank/dense.rb +2 -1
  12. data/lib/page_rank/sparse.rb +6 -7
  13. data/lib/text_rank.rb +15 -7
  14. data/lib/text_rank/char_filter.rb +1 -1
  15. data/lib/text_rank/char_filter/ascii_folding.rb +5 -1
  16. data/lib/text_rank/char_filter/strip_possessive.rb +2 -2
  17. data/lib/text_rank/char_filter/undo_contractions.rb +1 -137
  18. data/lib/text_rank/char_filter/undo_contractions.yml +135 -0
  19. data/lib/text_rank/fingerprint.rb +91 -0
  20. data/lib/text_rank/fingerprint_overlap.rb +55 -0
  21. data/lib/text_rank/graph_strategy/coocurrence.rb +15 -6
  22. data/lib/text_rank/keyword_extractor.rb +19 -21
  23. data/lib/text_rank/rank_filter/collapse_adjacent.rb +53 -25
  24. data/lib/text_rank/rank_filter/normalize_probability.rb +2 -1
  25. data/lib/text_rank/rank_filter/normalize_unit_vector.rb +2 -1
  26. data/lib/text_rank/token_filter/part_of_speech.rb +0 -1
  27. data/lib/text_rank/token_filter/stopwords.rb +1 -321
  28. data/lib/text_rank/token_filter/stopwords.yml +317 -0
  29. data/lib/text_rank/tokenizer.rb +1 -1
  30. data/lib/text_rank/tokenizer/money.rb +11 -6
  31. data/lib/text_rank/tokenizer/number.rb +4 -3
  32. data/lib/text_rank/tokenizer/punctuation.rb +4 -1
  33. data/lib/text_rank/tokenizer/url.rb +3 -0
  34. data/lib/text_rank/tokenizer/whitespace.rb +4 -1
  35. data/lib/text_rank/tokenizer/word.rb +5 -2
  36. data/lib/text_rank/version.rb +3 -1
  37. data/text_rank.gemspec +9 -10
  38. metadata +38 -34
@@ -1,14 +1,17 @@
1
1
  module TextRank
2
2
  module Tokenizer
3
+
3
4
  ##
4
5
  # A tokenizer regex that preserves a non-space, non-punctuation "word". It does
5
6
  # allow hyphens and numerals, but the first character must be an A-Z character.
6
7
  ##
7
- Word = %r{
8
+ # rubocop:disable Naming/ConstantName
9
+ Word = /
8
10
  (
9
11
  [a-z][a-z0-9-]*
10
12
  )
11
- }xi
13
+ /xi
14
+ # rubocop:enable Naming/ConstantName
12
15
 
13
16
  end
14
17
  end
@@ -1,4 +1,6 @@
1
1
  module TextRank
2
+
2
3
  # Current gem version
3
- VERSION = '1.1.6'
4
+ VERSION = '1.2.4'
5
+
4
6
  end
@@ -1,4 +1,3 @@
1
- # coding: utf-8
2
1
  lib = File.expand_path('../lib', __FILE__)
3
2
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
3
  require 'text_rank/version'
@@ -9,8 +8,8 @@ Gem::Specification.new do |spec|
9
8
  spec.authors = ['David McCullars']
10
9
  spec.email = ['david.mccullars@gmail.com']
11
10
 
12
- spec.summary = %q{Implementation of TextRank solution to ranked keyword extraction}
13
- spec.description = %q{Implementation of TextRank solution to ranked keyword extraction. See https://web.eecs.umich.edu/~mihalcea/papers/mihalcea.emnlp04.pdf}
11
+ spec.summary = 'Implementation of TextRank solution to ranked keyword extraction'
12
+ spec.description = 'Implementation of TextRank solution to ranked keyword extraction. See https://web.eecs.umich.edu/~mihalcea/papers/mihalcea.emnlp04.pdf'
14
13
  spec.homepage = 'https://github.com/david-mccullars/text_rank'
15
14
  spec.license = 'MIT'
16
15
 
@@ -19,12 +18,12 @@ Gem::Specification.new do |spec|
19
18
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
19
  spec.require_paths = ['lib']
21
20
 
22
- spec.add_development_dependency 'bundler', '~> 1.11'
23
- spec.add_development_dependency 'rake', '~> 10.0'
24
- spec.add_development_dependency 'rspec', '~> 3.0'
25
- spec.add_development_dependency 'simplecov', '~> 0.11'
26
- spec.add_development_dependency 'codeclimate-test-reporter'
21
+ spec.add_development_dependency 'bundler'
22
+ spec.add_development_dependency 'rake'
23
+ spec.add_development_dependency 'rspec'
24
+ spec.add_development_dependency 'rubocop'
25
+ spec.add_development_dependency 'simplecov', '~> 0.17.0' # 0.18 not supported by code climate
27
26
 
28
- spec.add_development_dependency 'engtagger', '~> 0.2.0' # Optional runtime dependency but needed for specs
29
- spec.add_development_dependency 'nokogiri', '~> 1.0' # Optional runtime dependency but needed for specs
27
+ spec.add_development_dependency 'engtagger' # Optional runtime dependency but needed for specs
28
+ spec.add_development_dependency 'nokogiri' # Optional runtime dependency but needed for specs
30
29
  end
metadata CHANGED
@@ -1,113 +1,113 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_rank
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.6
4
+ version: 1.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - David McCullars
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-05-24 00:00:00.000000000 Z
11
+ date: 2020-06-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: '1.11'
19
+ version: '0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
- version: '1.11'
26
+ version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - "~>"
31
+ - - ">="
32
32
  - !ruby/object:Gem::Version
33
- version: '10.0'
33
+ version: '0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - "~>"
38
+ - - ">="
39
39
  - !ruby/object:Gem::Version
40
- version: '10.0'
40
+ version: '0'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rspec
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - "~>"
45
+ - - ">="
46
46
  - !ruby/object:Gem::Version
47
- version: '3.0'
47
+ version: '0'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - "~>"
52
+ - - ">="
53
53
  - !ruby/object:Gem::Version
54
- version: '3.0'
54
+ version: '0'
55
55
  - !ruby/object:Gem::Dependency
56
- name: simplecov
56
+ name: rubocop
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - "~>"
59
+ - - ">="
60
60
  - !ruby/object:Gem::Version
61
- version: '0.11'
61
+ version: '0'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - "~>"
66
+ - - ">="
67
67
  - !ruby/object:Gem::Version
68
- version: '0.11'
68
+ version: '0'
69
69
  - !ruby/object:Gem::Dependency
70
- name: codeclimate-test-reporter
70
+ name: simplecov
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - ">="
73
+ - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: '0'
75
+ version: 0.17.0
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - ">="
80
+ - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: '0'
82
+ version: 0.17.0
83
83
  - !ruby/object:Gem::Dependency
84
84
  name: engtagger
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
- - - "~>"
87
+ - - ">="
88
88
  - !ruby/object:Gem::Version
89
- version: 0.2.0
89
+ version: '0'
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
- - - "~>"
94
+ - - ">="
95
95
  - !ruby/object:Gem::Version
96
- version: 0.2.0
96
+ version: '0'
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: nokogiri
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
- - - "~>"
101
+ - - ">="
102
102
  - !ruby/object:Gem::Version
103
- version: '1.0'
103
+ version: '0'
104
104
  type: :development
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
- - - "~>"
108
+ - - ">="
109
109
  - !ruby/object:Gem::Version
110
- version: '1.0'
110
+ version: '0'
111
111
  description: Implementation of TextRank solution to ranked keyword extraction. See
112
112
  https://web.eecs.umich.edu/~mihalcea/papers/mihalcea.emnlp04.pdf
113
113
  email:
@@ -124,7 +124,7 @@ files:
124
124
  - ".travis.yml"
125
125
  - CODE_OF_CONDUCT.md
126
126
  - Gemfile
127
- - LICENSE.txt
127
+ - LICENSE
128
128
  - README.md
129
129
  - Rakefile
130
130
  - bin/console
@@ -141,6 +141,9 @@ files:
141
141
  - lib/text_rank/char_filter/strip_html.rb
142
142
  - lib/text_rank/char_filter/strip_possessive.rb
143
143
  - lib/text_rank/char_filter/undo_contractions.rb
144
+ - lib/text_rank/char_filter/undo_contractions.yml
145
+ - lib/text_rank/fingerprint.rb
146
+ - lib/text_rank/fingerprint_overlap.rb
144
147
  - lib/text_rank/graph_strategy.rb
145
148
  - lib/text_rank/graph_strategy/coocurrence.rb
146
149
  - lib/text_rank/keyword_extractor.rb
@@ -153,6 +156,7 @@ files:
153
156
  - lib/text_rank/token_filter/min_length.rb
154
157
  - lib/text_rank/token_filter/part_of_speech.rb
155
158
  - lib/text_rank/token_filter/stopwords.rb
159
+ - lib/text_rank/token_filter/stopwords.yml
156
160
  - lib/text_rank/tokenizer.rb
157
161
  - lib/text_rank/tokenizer/money.rb
158
162
  - lib/text_rank/tokenizer/number.rb
@@ -182,7 +186,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
182
186
  version: '0'
183
187
  requirements: []
184
188
  rubyforge_project:
185
- rubygems_version: 2.5.1
189
+ rubygems_version: 2.7.6
186
190
  signing_key:
187
191
  specification_version: 4
188
192
  summary: Implementation of TextRank solution to ranked keyword extraction