text_rank 1.3.0 → 1.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.codeclimate.yml +10 -0
- data/.github/workflows/ci.yml +48 -0
- data/.rubocop.yml +105 -45
- data/.yardopts +6 -0
- data/CODE_OF_CONDUCT.md +120 -36
- data/README.md +4 -3
- data/Rakefile +6 -9
- data/ext/text_rank/page_rank_sparse_native.c +10 -17
- data/ext/text_rank/text_rank.c +1 -0
- data/lib/page_rank/base.rb +1 -1
- data/lib/page_rank/dense.rb +1 -1
- data/lib/page_rank/sparse.rb +3 -3
- data/lib/page_rank/sparse_native.rb +1 -1
- data/lib/text_rank/char_filter/ascii_folding.rb +0 -2
- data/lib/text_rank/char_filter/strip_html.rb +1 -0
- data/lib/text_rank/fingerprint.rb +2 -2
- data/lib/text_rank/graph_strategy/coocurrence.rb +6 -6
- data/lib/text_rank/keyword_extractor.rb +0 -1
- data/lib/text_rank/rank_filter/collapse_adjacent.rb +3 -3
- data/lib/text_rank/rank_filter/sort_by_value.rb +1 -1
- data/lib/text_rank/tokenizer/money.rb +2 -4
- data/lib/text_rank/version.rb +1 -1
- data/text_rank.gemspec +10 -2
- metadata +64 -6
- data/.travis.yml +0 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 249809a9542815b7df91ccbf44c09c71c27056c8ffd433f01c07950aafee4900
|
4
|
+
data.tar.gz: 6d59f46f79abe151ff5b2a35f1965849d773bc4da853c25f5bde8bb5584239b4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bbd0aa099bde48e809246ecb2c08e6491c477fdf69c68556e6171ed6aff6541f56aeef99416d077cd68875e22c33af3b99b454659179aa8e515bf50ddf9636a4
|
7
|
+
data.tar.gz: 109825e24f9c030dd693202bb37f74cf7f6f096b8c53f0aed5978acac11f19d9dd3382adacf6d7dd419844c730f36468c4cf954fbff4a3dad0c22adef57eb627
|
data/.codeclimate.yml
CHANGED
@@ -1,15 +1,25 @@
|
|
1
1
|
---
|
2
2
|
engines:
|
3
|
+
cppcheck:
|
4
|
+
enabled: true
|
5
|
+
check: all
|
6
|
+
inline-suppr: true
|
7
|
+
language: c
|
3
8
|
duplication:
|
4
9
|
enabled: true
|
5
10
|
config:
|
6
11
|
languages:
|
12
|
+
- c
|
7
13
|
- ruby
|
8
14
|
checks:
|
9
15
|
Similar code:
|
10
16
|
enabled: false
|
11
17
|
fixme:
|
12
18
|
enabled: true
|
19
|
+
flog:
|
20
|
+
enabled: true
|
21
|
+
config:
|
22
|
+
score_threshold: 20.0
|
13
23
|
rubocop:
|
14
24
|
enabled: true
|
15
25
|
exclude_fingerprints:
|
@@ -0,0 +1,48 @@
|
|
1
|
+
name: CI
|
2
|
+
|
3
|
+
on:
|
4
|
+
push:
|
5
|
+
branches: [ main ]
|
6
|
+
pull_request:
|
7
|
+
branches: [ main ]
|
8
|
+
|
9
|
+
jobs:
|
10
|
+
test:
|
11
|
+
|
12
|
+
runs-on: ubuntu-latest
|
13
|
+
strategy:
|
14
|
+
matrix:
|
15
|
+
ruby-version: ['3.0']
|
16
|
+
|
17
|
+
steps:
|
18
|
+
- uses: actions/checkout@v2
|
19
|
+
- name: Set up Ruby
|
20
|
+
# To automatically get bug fixes and new Ruby versions for ruby/setup-ruby,
|
21
|
+
# change this to (see https://github.com/ruby/setup-ruby#versioning):
|
22
|
+
# uses: ruby/setup-ruby@v1
|
23
|
+
uses: ruby/setup-ruby@473e4d8fe5dd94ee328fdfca9f8c9c7afc9dae5e
|
24
|
+
with:
|
25
|
+
ruby-version: ${{ matrix.ruby-version }}
|
26
|
+
bundler-cache: true # runs 'bundle install' and caches installed gems automatically
|
27
|
+
- name: Compile native extensions
|
28
|
+
run: bundle exec rake clobber compile
|
29
|
+
- name: Run tests
|
30
|
+
run: bundle exec rake spec
|
31
|
+
- name: Rubocop
|
32
|
+
run: bundle exec rubocop
|
33
|
+
- name: cppcheck
|
34
|
+
uses: deep5050/cppcheck-action@main
|
35
|
+
with:
|
36
|
+
github_token: ${{ secrets.GITHUB_TOKEN}}
|
37
|
+
enable: all
|
38
|
+
exclude_check: ./vendor
|
39
|
+
force_language: c
|
40
|
+
inline_suppression: enable
|
41
|
+
other_options: --suppress=missingIncludeSystem
|
42
|
+
- name: Send coverage to Code Climate
|
43
|
+
uses: paambaati/codeclimate-action@v3.0.0
|
44
|
+
env:
|
45
|
+
CC_TEST_REPORTER_ID: 6ab030bf370ffc2abbf0ba4d70a1c8d9649f6fd1426f48f6d43d5c9eb15f187f
|
46
|
+
with:
|
47
|
+
coverageLocations: ${{github.workspace}}/coverage/coverage.json:simplecov
|
48
|
+
coverageCommand: bundle exec rake coverage
|
data/.rubocop.yml
CHANGED
@@ -1,142 +1,202 @@
|
|
1
1
|
AllCops:
|
2
2
|
Exclude:
|
3
|
-
|
4
|
-
|
5
|
-
|
3
|
+
- ext/**/*
|
4
|
+
- tmp/**/*
|
5
|
+
- vendor/**/*
|
6
|
+
Gemspec/DateAssignment:
|
7
|
+
Enabled: true
|
8
|
+
Gemspec/RequireMFA:
|
9
|
+
Enabled: true
|
6
10
|
Layout/EmptyLinesAroundAttributeAccessor:
|
7
11
|
Enabled: true
|
8
|
-
|
9
12
|
Layout/EmptyLinesAroundClassBody:
|
10
13
|
EnforcedStyle: empty_lines_except_namespace
|
11
|
-
|
12
14
|
Layout/EmptyLinesAroundModuleBody:
|
13
15
|
EnforcedStyle: empty_lines_except_namespace
|
14
|
-
|
15
16
|
Layout/ExtraSpacing:
|
16
17
|
Enabled: false
|
17
|
-
|
18
18
|
Layout/HashAlignment:
|
19
19
|
EnforcedHashRocketStyle: table
|
20
20
|
EnforcedColonStyle: table
|
21
|
-
|
21
|
+
Layout/LineEndStringConcatenationIndentation:
|
22
|
+
Enabled: true
|
22
23
|
Layout/LineLength:
|
23
24
|
Max: 120
|
24
25
|
Enabled: false
|
25
|
-
|
26
26
|
Layout/SpaceAroundMethodCallOperator:
|
27
27
|
Enabled: true
|
28
|
-
|
28
|
+
Layout/SpaceBeforeBrackets:
|
29
|
+
Enabled: true
|
30
|
+
Lint/AmbiguousAssignment:
|
31
|
+
Enabled: true
|
32
|
+
Lint/AmbiguousOperatorPrecedence:
|
33
|
+
Enabled: true
|
34
|
+
Lint/AmbiguousRange:
|
35
|
+
Enabled: true
|
36
|
+
Lint/DeprecatedConstants:
|
37
|
+
Enabled: true
|
29
38
|
Lint/DeprecatedOpenSSLConstant:
|
30
39
|
Enabled: true
|
31
|
-
|
40
|
+
Lint/DuplicateBranch:
|
41
|
+
Enabled: true
|
42
|
+
Lint/DuplicateRegexpCharacterClassElement:
|
43
|
+
Enabled: true
|
44
|
+
Lint/EmptyBlock:
|
45
|
+
Enabled: true
|
46
|
+
Lint/EmptyClass:
|
47
|
+
Enabled: true
|
48
|
+
Lint/EmptyInPattern:
|
49
|
+
Enabled: true
|
50
|
+
Lint/IncompatibleIoSelectWithFiberScheduler:
|
51
|
+
Enabled: true
|
52
|
+
Lint/LambdaWithoutLiteralBlock:
|
53
|
+
Enabled: true
|
32
54
|
Lint/MixedRegexpCaptureTypes:
|
33
55
|
Enabled: true
|
34
|
-
|
56
|
+
Lint/NoReturnInBeginEndBlocks:
|
57
|
+
Enabled: true
|
58
|
+
Lint/NumberedParameterAssignment:
|
59
|
+
Enabled: true
|
60
|
+
Lint/OrAssignmentToConstant:
|
61
|
+
Enabled: true
|
35
62
|
Lint/RaiseException:
|
36
63
|
Enabled: true
|
37
|
-
|
64
|
+
Lint/RedundantDirGlobSort:
|
65
|
+
Enabled: true
|
66
|
+
Lint/RequireRelativeSelfPath:
|
67
|
+
Enabled: true
|
38
68
|
Lint/StructNewOverride:
|
39
69
|
Enabled: true
|
40
|
-
|
70
|
+
Lint/SymbolConversion:
|
71
|
+
Enabled: true
|
72
|
+
Lint/ToEnumArguments:
|
73
|
+
Enabled: true
|
74
|
+
Lint/TripleQuotes:
|
75
|
+
Enabled: true
|
76
|
+
Lint/UnexpectedBlockArity:
|
77
|
+
Enabled: true
|
78
|
+
Lint/UnmodifiedReduceAccumulator:
|
79
|
+
Enabled: true
|
80
|
+
Lint/UselessRuby2Keywords:
|
81
|
+
Enabled: true
|
41
82
|
Metrics/AbcSize:
|
42
83
|
Max: 50
|
43
84
|
Enabled: false
|
44
|
-
|
45
85
|
Metrics/BlockLength:
|
46
86
|
Max: 50
|
47
87
|
Enabled: false
|
48
|
-
|
49
88
|
Metrics/ClassLength:
|
50
89
|
Max: 50
|
51
90
|
Enabled: false
|
52
|
-
|
53
91
|
Metrics/CyclomaticComplexity:
|
54
92
|
Max: 30
|
55
93
|
Enabled: false
|
56
|
-
|
57
94
|
Metrics/MethodLength:
|
58
95
|
Max: 20
|
59
96
|
Enabled: false
|
60
|
-
|
61
97
|
Metrics/ModuleLength:
|
62
98
|
Max: 1000
|
63
99
|
Enabled: false
|
64
|
-
|
65
100
|
Metrics/PerceivedComplexity:
|
66
101
|
Max: 30
|
67
102
|
Enabled: false
|
68
|
-
|
103
|
+
Naming/BlockForwarding:
|
104
|
+
Enabled: true
|
105
|
+
Security/IoMethods:
|
106
|
+
Enabled: true
|
69
107
|
Security/MarshalLoad:
|
70
108
|
Enabled: false
|
71
|
-
|
72
109
|
Style/AndOr:
|
73
110
|
Enabled: false
|
74
|
-
|
111
|
+
Style/ArgumentsForwarding:
|
112
|
+
Enabled: true
|
75
113
|
Style/CaseEquality:
|
76
114
|
Enabled: false
|
77
|
-
|
115
|
+
Style/CollectionCompact:
|
116
|
+
Enabled: true
|
117
|
+
Style/DocumentDynamicEvalDefinition:
|
118
|
+
Enabled: true
|
78
119
|
Style/Documentation:
|
79
120
|
Enabled: false
|
80
|
-
|
81
121
|
Style/DoubleNegation:
|
82
122
|
Enabled: false
|
83
|
-
|
123
|
+
Style/EndlessMethod:
|
124
|
+
Enabled: true
|
84
125
|
Style/ExponentialNotation:
|
85
126
|
Enabled: true
|
86
|
-
|
127
|
+
Style/FileRead:
|
128
|
+
Enabled: true
|
129
|
+
Style/FileWrite:
|
130
|
+
Enabled: true
|
87
131
|
Style/FrozenStringLiteralComment:
|
88
132
|
Enabled: false
|
89
|
-
|
90
133
|
Style/GuardClause:
|
91
134
|
Enabled: false
|
92
|
-
|
135
|
+
Style/HashConversion:
|
136
|
+
Enabled: true
|
93
137
|
Style/HashEachMethods:
|
94
138
|
Enabled: true
|
95
|
-
|
139
|
+
Style/HashExcept:
|
140
|
+
Enabled: true
|
96
141
|
Style/HashSyntax:
|
97
142
|
Enabled: true
|
98
|
-
|
99
143
|
Style/HashTransformKeys:
|
100
144
|
Enabled: true
|
101
|
-
|
102
145
|
Style/HashTransformValues:
|
103
146
|
Enabled: true
|
104
|
-
|
105
147
|
Style/IfUnlessModifier:
|
106
148
|
Enabled: false
|
107
|
-
|
149
|
+
Style/IfWithBooleanLiteralBranches:
|
150
|
+
Enabled: true
|
151
|
+
Style/InPatternThen:
|
152
|
+
Enabled: true
|
153
|
+
Style/MapToHash:
|
154
|
+
Enabled: true
|
108
155
|
Style/MultilineBlockChain:
|
109
156
|
Enabled: false
|
110
|
-
|
111
157
|
Style/MultilineIfModifier:
|
112
158
|
Enabled: false
|
113
|
-
|
159
|
+
Style/MultilineInPatternThen:
|
160
|
+
Enabled: true
|
114
161
|
Style/MutableConstant:
|
115
162
|
Enabled: false
|
116
|
-
|
163
|
+
Style/NegatedIfElseCondition:
|
164
|
+
Enabled: true
|
165
|
+
Style/NilLambda:
|
166
|
+
Enabled: true
|
167
|
+
Style/NumberedParameters:
|
168
|
+
Enabled: true
|
169
|
+
Style/NumberedParametersLimit:
|
170
|
+
Enabled: true
|
171
|
+
Style/OpenStructUse:
|
172
|
+
Enabled: true
|
173
|
+
Style/QuotedSymbols:
|
174
|
+
Enabled: true
|
175
|
+
Style/RedundantArgument:
|
176
|
+
Enabled: true
|
117
177
|
Style/RedundantRegexpCharacterClass:
|
118
178
|
Enabled: true
|
119
|
-
|
120
179
|
Style/RedundantRegexpEscape:
|
121
180
|
Enabled: true
|
122
|
-
|
181
|
+
Style/RedundantSelfAssignmentBranch:
|
182
|
+
Enabled: true
|
123
183
|
Style/RescueModifier:
|
124
184
|
Enabled: false
|
125
|
-
|
126
185
|
Style/RescueStandardError:
|
127
186
|
Enabled: false
|
128
|
-
|
187
|
+
Style/SelectByRegexp:
|
188
|
+
Enabled: true
|
129
189
|
Style/SlicingWithRange:
|
130
190
|
Enabled: true
|
131
|
-
|
191
|
+
Style/StringChars:
|
192
|
+
Enabled: true
|
193
|
+
Style/SwapValues:
|
194
|
+
Enabled: true
|
132
195
|
Style/TrailingCommaInArguments:
|
133
196
|
EnforcedStyleForMultiline: comma
|
134
|
-
|
135
197
|
Style/TrailingCommaInArrayLiteral:
|
136
198
|
EnforcedStyleForMultiline: consistent_comma
|
137
|
-
|
138
199
|
Style/TrailingCommaInHashLiteral:
|
139
200
|
EnforcedStyleForMultiline: consistent_comma
|
140
|
-
|
141
201
|
Style/ZeroLengthPredicate:
|
142
202
|
Enabled: false
|
data/.yardopts
ADDED
data/CODE_OF_CONDUCT.md
CHANGED
@@ -1,49 +1,133 @@
|
|
1
|
-
# Contributor Code of Conduct
|
2
1
|
|
3
|
-
|
4
|
-
fostering an open and welcoming community, we pledge to respect all people who
|
5
|
-
contribute through reporting issues, posting feature requests, updating
|
6
|
-
documentation, submitting pull requests or patches, and other activities.
|
2
|
+
# Contributor Covenant Code of Conduct
|
7
3
|
|
8
|
-
|
9
|
-
experience for everyone, regardless of level of experience, gender, gender
|
10
|
-
identity and expression, sexual orientation, disability, personal appearance,
|
11
|
-
body size, race, ethnicity, age, religion, or nationality.
|
4
|
+
## Our Pledge
|
12
5
|
|
13
|
-
|
6
|
+
We as members, contributors, and leaders pledge to make participation in our
|
7
|
+
community a harassment-free experience for everyone, regardless of age, body
|
8
|
+
size, visible or invisible disability, ethnicity, sex characteristics, gender
|
9
|
+
identity and expression, level of experience, education, socio-economic status,
|
10
|
+
nationality, personal appearance, race, caste, color, religion, or sexual
|
11
|
+
identity and orientation.
|
14
12
|
|
15
|
-
|
16
|
-
|
17
|
-
|
13
|
+
We pledge to act and interact in ways that contribute to an open, welcoming,
|
14
|
+
diverse, inclusive, and healthy community.
|
15
|
+
|
16
|
+
## Our Standards
|
17
|
+
|
18
|
+
Examples of behavior that contributes to a positive environment for our
|
19
|
+
community include:
|
20
|
+
|
21
|
+
* Demonstrating empathy and kindness toward other people
|
22
|
+
* Being respectful of differing opinions, viewpoints, and experiences
|
23
|
+
* Giving and gracefully accepting constructive feedback
|
24
|
+
* Accepting responsibility and apologizing to those affected by our mistakes,
|
25
|
+
and learning from the experience
|
26
|
+
* Focusing on what is best not just for us as individuals, but for the overall
|
27
|
+
community
|
28
|
+
|
29
|
+
Examples of unacceptable behavior include:
|
30
|
+
|
31
|
+
* The use of sexualized language or imagery, and sexual attention or advances of
|
32
|
+
any kind
|
33
|
+
* Trolling, insulting or derogatory comments, and personal or political attacks
|
18
34
|
* Public or private harassment
|
19
|
-
* Publishing
|
20
|
-
|
21
|
-
* Other
|
35
|
+
* Publishing others' private information, such as a physical or email address,
|
36
|
+
without their explicit permission
|
37
|
+
* Other conduct which could reasonably be considered inappropriate in a
|
38
|
+
professional setting
|
39
|
+
|
40
|
+
## Enforcement Responsibilities
|
41
|
+
|
42
|
+
Community leaders are responsible for clarifying and enforcing our standards of
|
43
|
+
acceptable behavior and will take appropriate and fair corrective action in
|
44
|
+
response to any behavior that they deem inappropriate, threatening, offensive,
|
45
|
+
or harmful.
|
22
46
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
threatening, offensive, or harmful.
|
47
|
+
Community leaders have the right and responsibility to remove, edit, or reject
|
48
|
+
comments, commits, code, wiki edits, issues, and other contributions that are
|
49
|
+
not aligned to this Code of Conduct, and will communicate reasons for moderation
|
50
|
+
decisions when appropriate.
|
28
51
|
|
29
|
-
|
30
|
-
fairly and consistently applying these principles to every aspect of managing
|
31
|
-
this project. Project maintainers who do not follow or enforce the Code of
|
32
|
-
Conduct may be permanently removed from the project team.
|
52
|
+
## Scope
|
33
53
|
|
34
|
-
This
|
35
|
-
|
54
|
+
This Code of Conduct applies within all community spaces, and also applies when
|
55
|
+
an individual is officially representing the community in public spaces.
|
56
|
+
Examples of representing our community include using an official e-mail address,
|
57
|
+
posting via an official social media account, or acting as an appointed
|
58
|
+
representative at an online or offline event.
|
59
|
+
|
60
|
+
## Enforcement
|
36
61
|
|
37
62
|
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
38
|
-
reported
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
63
|
+
reported to the community leaders responsible for enforcement at
|
64
|
+
[INSERT CONTACT METHOD].
|
65
|
+
All complaints will be reviewed and investigated promptly and fairly.
|
66
|
+
|
67
|
+
All community leaders are obligated to respect the privacy and security of the
|
68
|
+
reporter of any incident.
|
69
|
+
|
70
|
+
## Enforcement Guidelines
|
71
|
+
|
72
|
+
Community leaders will follow these Community Impact Guidelines in determining
|
73
|
+
the consequences for any action they deem in violation of this Code of Conduct:
|
74
|
+
|
75
|
+
### 1. Correction
|
76
|
+
|
77
|
+
**Community Impact**: Use of inappropriate language or other behavior deemed
|
78
|
+
unprofessional or unwelcome in the community.
|
79
|
+
|
80
|
+
**Consequence**: A private, written warning from community leaders, providing
|
81
|
+
clarity around the nature of the violation and an explanation of why the
|
82
|
+
behavior was inappropriate. A public apology may be requested.
|
83
|
+
|
84
|
+
### 2. Warning
|
85
|
+
|
86
|
+
**Community Impact**: A violation through a single incident or series of
|
87
|
+
actions.
|
88
|
+
|
89
|
+
**Consequence**: A warning with consequences for continued behavior. No
|
90
|
+
interaction with the people involved, including unsolicited interaction with
|
91
|
+
those enforcing the Code of Conduct, for a specified period of time. This
|
92
|
+
includes avoiding interactions in community spaces as well as external channels
|
93
|
+
like social media. Violating these terms may lead to a temporary or permanent
|
94
|
+
ban.
|
95
|
+
|
96
|
+
### 3. Temporary Ban
|
97
|
+
|
98
|
+
**Community Impact**: A serious violation of community standards, including
|
99
|
+
sustained inappropriate behavior.
|
100
|
+
|
101
|
+
**Consequence**: A temporary ban from any sort of interaction or public
|
102
|
+
communication with the community for a specified period of time. No public or
|
103
|
+
private interaction with the people involved, including unsolicited interaction
|
104
|
+
with those enforcing the Code of Conduct, is allowed during this period.
|
105
|
+
Violating these terms may lead to a permanent ban.
|
106
|
+
|
107
|
+
### 4. Permanent Ban
|
108
|
+
|
109
|
+
**Community Impact**: Demonstrating a pattern of violation of community
|
110
|
+
standards, including sustained inappropriate behavior, harassment of an
|
111
|
+
individual, or aggression toward or disparagement of classes of individuals.
|
112
|
+
|
113
|
+
**Consequence**: A permanent ban from any sort of public interaction within the
|
114
|
+
community.
|
115
|
+
|
116
|
+
## Attribution
|
43
117
|
|
44
118
|
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
|
45
|
-
version 1
|
46
|
-
[
|
119
|
+
version 2.1, available at
|
120
|
+
[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
|
121
|
+
|
122
|
+
Community Impact Guidelines were inspired by
|
123
|
+
[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
|
124
|
+
|
125
|
+
For answers to common questions about this code of conduct, see the FAQ at
|
126
|
+
[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
|
127
|
+
[https://www.contributor-covenant.org/translations][translations].
|
47
128
|
|
48
|
-
[homepage]:
|
49
|
-
[
|
129
|
+
[homepage]: https://www.contributor-covenant.org
|
130
|
+
[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
|
131
|
+
[Mozilla CoC]: https://github.com/mozilla/diversity
|
132
|
+
[FAQ]: https://www.contributor-covenant.org/faq
|
133
|
+
[translations]: https://www.contributor-covenant.org/translations
|
data/README.md
CHANGED
@@ -8,9 +8,10 @@
|
|
8
8
|
## Status
|
9
9
|
|
10
10
|
[![Gem Version](https://badge.fury.io/rb/text_rank.svg)](https://badge.fury.io/rb/text_rank)
|
11
|
-
[![
|
11
|
+
[![Build Status](https://github.com/david-mccullars/text_rank/workflows/CI/badge.svg)](https://github.com/david-mccullars/text_rank/actions?workflow=CI)
|
12
12
|
[![Code Climate](https://codeclimate.com/github/david-mccullars/text_rank/badges/gpa.svg)](https://codeclimate.com/github/david-mccullars/text_rank)
|
13
13
|
[![Test Coverage](https://codeclimate.com/github/david-mccullars/text_rank/badges/coverage.svg)](https://codeclimate.com/github/david-mccullars/text_rank/coverage)
|
14
|
+
[![MIT License](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
|
14
15
|
|
15
16
|
|
16
17
|
## Description
|
@@ -45,7 +46,7 @@ gem install text_rank
|
|
45
46
|
|
46
47
|
## Requirements
|
47
48
|
|
48
|
-
* Ruby
|
49
|
+
* Ruby 3.0.0 or higher
|
49
50
|
* [engtagger](https://github.com/yohasebe/engtagger) gem is optional but
|
50
51
|
required for `TextRank::TokenFilter::PartOfSpeech`
|
51
52
|
* [nokogiri](https://github.com/sparklemotion/nokogiri) gem is optional but
|
@@ -82,7 +83,7 @@ extractor = TextRank::KeywordExtractor.new(
|
|
82
83
|
damping: 0.85, # The probability of following the graph vs. randomly choosing a new node
|
83
84
|
tolerance: 0.0001, # The desired accuracy of the results
|
84
85
|
char_filters: [...], # A list of filters to be applied prior to tokenization
|
85
|
-
|
86
|
+
tokenizers: [...], # A list of tokenizers to perform tokenization
|
86
87
|
token_filters: [...], # A list of filters to be applied to each token after tokenization
|
87
88
|
graph_strategy: ..., # A class or strategy instance for producing a graph from tokens
|
88
89
|
rank_filters: [...], # A list of filters to be applied to the keyword ranks after keyword extraction
|
data/Rakefile
CHANGED
@@ -1,16 +1,13 @@
|
|
1
|
-
require
|
2
|
-
require
|
3
|
-
require
|
1
|
+
require 'bundler/gem_tasks'
|
2
|
+
require 'rake/extensiontask'
|
3
|
+
require 'rspec/core/rake_task'
|
4
|
+
require 'yard'
|
4
5
|
|
5
6
|
RSpec::Core::RakeTask.new(:spec)
|
6
7
|
|
7
|
-
task :
|
8
|
+
task default: :spec
|
8
9
|
|
9
|
-
|
10
|
-
RDoc::Task.new do |rdoc|
|
11
|
-
rdoc.main = "README.md"
|
12
|
-
rdoc.rdoc_files.include("README.md", "lib/**/*.rb")
|
13
|
-
end
|
10
|
+
YARD::Rake::YardocTask.new
|
14
11
|
|
15
12
|
Rake::ExtensionTask.new('text_rank') do |ext|
|
16
13
|
ext.lib_dir = 'lib/text_rank'
|
@@ -87,9 +87,8 @@ void free_node(Node n) {
|
|
87
87
|
}
|
88
88
|
|
89
89
|
void free_node_list(NodeList nodes, void (*free_item)(Node)) {
|
90
|
-
NodeList tmp;
|
91
90
|
while (nodes != NULL) {
|
92
|
-
tmp = nodes;
|
91
|
+
NodeList tmp = nodes;
|
93
92
|
nodes = nodes->next;
|
94
93
|
if (free_item) {
|
95
94
|
free_item(tmp->node);
|
@@ -104,9 +103,8 @@ void free_edge(Edge e) {
|
|
104
103
|
}
|
105
104
|
|
106
105
|
void free_edge_list(EdgeList edges, void (*free_item)(Edge)) {
|
107
|
-
EdgeList tmp;
|
108
106
|
while (edges != NULL) {
|
109
|
-
tmp = edges;
|
107
|
+
EdgeList tmp = edges;
|
110
108
|
edges = edges->next;
|
111
109
|
if (free_item) {
|
112
110
|
free_item(tmp->edge);
|
@@ -211,7 +209,6 @@ void calculate_step(Graph g, double damping) {
|
|
211
209
|
Node source, destination;
|
212
210
|
EdgeList edges;
|
213
211
|
Edge e;
|
214
|
-
double sum;
|
215
212
|
|
216
213
|
// Set prev rank to rank for all nodes
|
217
214
|
for (nodes = g->nodes; nodes != NULL; nodes = nodes->next) {
|
@@ -222,7 +219,7 @@ void calculate_step(Graph g, double damping) {
|
|
222
219
|
// Re-destribute the rankings according to weight
|
223
220
|
for (nodes = g->nodes; nodes != NULL; nodes = nodes->next) {
|
224
221
|
destination = nodes->node;
|
225
|
-
sum = 0.0;
|
222
|
+
double sum = 0.0;
|
226
223
|
for (edges = destination->source_edges; edges != NULL; edges = edges->next) {
|
227
224
|
e = edges->edge;
|
228
225
|
source = e->source;
|
@@ -238,13 +235,11 @@ void calculate_step(Graph g, double damping) {
|
|
238
235
|
|
239
236
|
// Calculate the Euclidean distance from prev_rank to rank across all nodes
|
240
237
|
double prev_distance(Graph g) {
|
241
|
-
|
242
|
-
Node n;
|
243
|
-
double rank_diff, sum_squares = 0.0;
|
238
|
+
double sum_squares = 0.0;
|
244
239
|
|
245
|
-
for (nodes = g->nodes; nodes != NULL; nodes = nodes->next) {
|
246
|
-
n = nodes->node;
|
247
|
-
rank_diff = n->prev_rank - n->rank;
|
240
|
+
for (NodeList nodes = g->nodes; nodes != NULL; nodes = nodes->next) {
|
241
|
+
Node n = nodes->node;
|
242
|
+
double rank_diff = n->prev_rank - n->rank;
|
248
243
|
sum_squares += rank_diff * rank_diff;
|
249
244
|
}
|
250
245
|
|
@@ -264,11 +259,9 @@ void calculate(Graph g, int max_iterations, double damping, double tolerance) {
|
|
264
259
|
}
|
265
260
|
|
266
261
|
int node_compare(const void *v1, const void *v2) {
|
267
|
-
double rank1
|
268
|
-
|
269
|
-
|
270
|
-
rank2 = (*(Node *)v2)->rank;
|
271
|
-
cmp = rank2 - rank1; // Decreasing order
|
262
|
+
double rank1 = (*(Node *)v1)->rank;
|
263
|
+
double rank2 = (*(Node *)v2)->rank;
|
264
|
+
double cmp = rank2 - rank1; // Decreasing order
|
272
265
|
if (cmp < 0) return -1;
|
273
266
|
if (cmp > 0) return 1;
|
274
267
|
return 0;
|
data/ext/text_rank/text_rank.c
CHANGED
data/lib/page_rank/base.rb
CHANGED
data/lib/page_rank/dense.rb
CHANGED
@@ -79,7 +79,7 @@ module PageRank
|
|
79
79
|
total = total_out_weights[source_idx]
|
80
80
|
if total
|
81
81
|
w = @out_links[source_idx][dest_idx] || 0.0
|
82
|
-
damping * w / total + (1 - damping) / node_count.to_f
|
82
|
+
(damping * w / total) + ((1 - damping) / node_count.to_f)
|
83
83
|
else
|
84
84
|
1.0 / node_count.to_f
|
85
85
|
end
|
data/lib/page_rank/sparse.rb
CHANGED
@@ -56,7 +56,7 @@ module PageRank
|
|
56
56
|
w / @weight_totals[source]
|
57
57
|
end
|
58
58
|
end
|
59
|
-
|
59
|
+
@nodes.to_h { |k| [k, 1.0 / node_count.to_f] }
|
60
60
|
end
|
61
61
|
|
62
62
|
def calculate_step(ranks)
|
@@ -68,14 +68,14 @@ module PageRank
|
|
68
68
|
@dangling_nodes.each do |source|
|
69
69
|
sum += ranks[source] / node_count.to_f
|
70
70
|
end
|
71
|
-
new_ranks[dest] = damping * sum + (1 - damping) / node_count
|
71
|
+
new_ranks[dest] = (damping * sum) + ((1 - damping) / node_count)
|
72
72
|
end
|
73
73
|
end
|
74
74
|
|
75
75
|
def sort_ranks(ranks)
|
76
76
|
sum = 0.0
|
77
77
|
ranks.each { |_, v| sum += v }
|
78
|
-
|
78
|
+
ranks.map { |k, v| [k, v / sum] }.sort_by { |_, v| -v }.to_h
|
79
79
|
end
|
80
80
|
|
81
81
|
def distance(vector1, vector2)
|
@@ -3,14 +3,12 @@ module TextRank
|
|
3
3
|
##
|
4
4
|
# Characater filter to transform non-ASCII (unicode) characters into ASCII-friendly versions.
|
5
5
|
#
|
6
|
-
# rubocop:disable Style/AsciiComments
|
7
6
|
#
|
8
7
|
# = Example
|
9
8
|
#
|
10
9
|
# AsciiFolding.new.filter!("the Perigordian Abbé then made answer, because a poor beggar of the country of Atrébatie heard some foolish things said")
|
11
10
|
# => "the Perigordian Abbe then made answer, because a poor beggar of the country of Atrebatie heard some foolish things said"
|
12
11
|
#
|
13
|
-
# rubocop:enable Style/AsciiComments
|
14
12
|
#
|
15
13
|
##
|
16
14
|
class AsciiFolding
|
@@ -57,7 +57,7 @@ module TextRank
|
|
57
57
|
end
|
58
58
|
|
59
59
|
# Calculates the "similarity" between this fingerprint and another
|
60
|
-
# @param {Fingerprint} A second fingerprint to compare
|
60
|
+
# @param {Fingerprint} other A second fingerprint to compare
|
61
61
|
# @return [Number] A number between 0.0 (different) and 1.0 (same)
|
62
62
|
def similarity(other)
|
63
63
|
return 1.0 if values == other.values # Short-circuit for efficiency
|
@@ -83,7 +83,7 @@ module TextRank
|
|
83
83
|
|
84
84
|
def norm_factor
|
85
85
|
@norm_factor ||= size.times.reduce(0.0) do |s, i|
|
86
|
-
s + (i + 1) / Math.log(i + 2) / size.to_f
|
86
|
+
s + ((i + 1) / Math.log(i + 2) / size.to_f)
|
87
87
|
end
|
88
88
|
end
|
89
89
|
|
@@ -60,7 +60,7 @@ module TextRank
|
|
60
60
|
# @param graph [PageRank::Base] a PageRank graph into which to add nodes/edges
|
61
61
|
# return [nil]
|
62
62
|
def build_graph(tokens, graph)
|
63
|
-
ngram_window = @ngram_size * 2 + 1
|
63
|
+
ngram_window = (@ngram_size * 2) + 1
|
64
64
|
tokens.size.times do |i|
|
65
65
|
ngram_window.times do |j|
|
66
66
|
consider_ngram_window(tokens, graph, i, j)
|
@@ -71,14 +71,14 @@ module TextRank
|
|
71
71
|
|
72
72
|
private
|
73
73
|
|
74
|
-
def consider_ngram_window(tokens, graph,
|
75
|
-
return if
|
74
|
+
def consider_ngram_window(tokens, graph, idx_i, idx_j)
|
75
|
+
return if idx_j == @ngram_size || idx_i + idx_j < @ngram_size
|
76
76
|
|
77
|
-
token_i = tokens[
|
78
|
-
token_j = tokens[
|
77
|
+
token_i = tokens[idx_i]
|
78
|
+
token_j = tokens[idx_i - @ngram_size + idx_j]
|
79
79
|
|
80
80
|
if token_j
|
81
|
-
graph.add(token_i, token_j, weight: 1.0 / (
|
81
|
+
graph.add(token_i, token_j, weight: 1.0 / (idx_j - @ngram_size).abs)
|
82
82
|
end
|
83
83
|
end
|
84
84
|
|
@@ -151,7 +151,7 @@ module TextRank
|
|
151
151
|
# tokenization (e.g. ASCII folding). That's okay. We're just making the best effort we can
|
152
152
|
# to find what we can.
|
153
153
|
def scan_text_for_all_permutations_of(single_tokens)
|
154
|
-
# NOTE that by reversing the order we craft the regex to prefer larger combinations over
|
154
|
+
# NOTE: that by reversing the order we craft the regex to prefer larger combinations over
|
155
155
|
# smaller combinations (or singletons).
|
156
156
|
perms = (1..@max_tokens_to_combine).to_a.reverse.flat_map do |n|
|
157
157
|
scan_text_for_n_permutations_of(single_tokens, n)
|
@@ -162,8 +162,8 @@ module TextRank
|
|
162
162
|
end unless perms.empty?
|
163
163
|
end
|
164
164
|
|
165
|
-
def scan_text_for_n_permutations_of(single_tokens,
|
166
|
-
single_tokens.permutation(
|
165
|
+
def scan_text_for_n_permutations_of(single_tokens, n_perms)
|
166
|
+
single_tokens.permutation(n_perms).map do |perm|
|
167
167
|
unless @permutations_scanned.key?(perm)
|
168
168
|
@permutations_scanned[perm] = 0
|
169
169
|
perm
|
@@ -14,7 +14,7 @@ module TextRank
|
|
14
14
|
# @param ranks [Hash<String, Float>] the results of the PageRank algorithm
|
15
15
|
# @return [Hash<String, Float>]
|
16
16
|
def filter!(ranks, **_)
|
17
|
-
|
17
|
+
ranks.sort_by { |_, v| @descending ? -v : v }.to_h
|
18
18
|
end
|
19
19
|
|
20
20
|
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
module TextRank
|
2
2
|
module Tokenizer
|
3
3
|
|
4
|
-
CURRENCY_SYMBOLS =
|
4
|
+
CURRENCY_SYMBOLS = "[#{[
|
5
5
|
"\u00a4", # Generic Currency Symbol
|
6
6
|
"\u0024", # Dollar Sign
|
7
7
|
"\u00a2", # Cent Sign
|
@@ -26,14 +26,13 @@ module TextRank
|
|
26
26
|
"\u20ab", # Dong Sign
|
27
27
|
"\u0025", # Percent
|
28
28
|
"\u2030", # Per Million
|
29
|
-
].join
|
29
|
+
].join}]"
|
30
30
|
private_constant :CURRENCY_SYMBOLS # Do not expose this to avoid confusion
|
31
31
|
|
32
32
|
##
|
33
33
|
# A tokenizer regex that preserves money or formatted numbers as a single token. This
|
34
34
|
# currently supports 24 different currency symbols:
|
35
35
|
#
|
36
|
-
# rubocop:disable Style/AsciiComments
|
37
36
|
#
|
38
37
|
# * ¤
|
39
38
|
# * $
|
@@ -60,7 +59,6 @@ module TextRank
|
|
60
59
|
# * %
|
61
60
|
# * ‰
|
62
61
|
|
63
|
-
# rubocop:enable Style/AsciiComments
|
64
62
|
#
|
65
63
|
# It also supports two alternative formats for negatives as well as optional three digit comma
|
66
64
|
# separation and optional decimals.
|
data/lib/text_rank/version.rb
CHANGED
data/text_rank.gemspec
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
lib = File.expand_path('
|
1
|
+
lib = File.expand_path('lib', __dir__)
|
2
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
3
3
|
require 'text_rank/version'
|
4
4
|
|
@@ -18,15 +18,23 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
19
19
|
spec.extensions = ['ext/text_rank/extconf.rb']
|
20
20
|
spec.require_paths = ['lib']
|
21
|
+
spec.required_ruby_version = '>= 3.0.0'
|
21
22
|
|
22
23
|
spec.add_development_dependency 'bundler'
|
24
|
+
spec.add_development_dependency 'github-markup'
|
23
25
|
spec.add_development_dependency 'rake'
|
24
26
|
spec.add_development_dependency 'rake-compiler'
|
27
|
+
spec.add_development_dependency 'redcarpet'
|
25
28
|
spec.add_development_dependency 'rspec'
|
26
29
|
spec.add_development_dependency 'rubocop'
|
27
|
-
spec.add_development_dependency '
|
30
|
+
spec.add_development_dependency 'rubocop-rake'
|
31
|
+
spec.add_development_dependency 'rubocop-rspec'
|
32
|
+
spec.add_development_dependency 'simplecov', '~> 0.17.0' # 0.18 not supported by code climate
|
28
33
|
spec.add_development_dependency 'yard'
|
29
34
|
|
30
35
|
spec.add_development_dependency 'engtagger' # Optional runtime dependency but needed for specs
|
31
36
|
spec.add_development_dependency 'nokogiri' # Optional runtime dependency but needed for specs
|
37
|
+
spec.metadata = {
|
38
|
+
'rubygems_mfa_required' => 'true',
|
39
|
+
}
|
32
40
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text_rank
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.3.
|
4
|
+
version: 1.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- David McCullars
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-01-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -24,6 +24,20 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: github-markup
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: rake
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -52,6 +66,20 @@ dependencies:
|
|
52
66
|
- - ">="
|
53
67
|
- !ruby/object:Gem::Version
|
54
68
|
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: redcarpet
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
55
83
|
- !ruby/object:Gem::Dependency
|
56
84
|
name: rspec
|
57
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -81,7 +109,7 @@ dependencies:
|
|
81
109
|
- !ruby/object:Gem::Version
|
82
110
|
version: '0'
|
83
111
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
112
|
+
name: rubocop-rake
|
85
113
|
requirement: !ruby/object:Gem::Requirement
|
86
114
|
requirements:
|
87
115
|
- - ">="
|
@@ -94,6 +122,34 @@ dependencies:
|
|
94
122
|
- - ">="
|
95
123
|
- !ruby/object:Gem::Version
|
96
124
|
version: '0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: rubocop-rspec
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ">="
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '0'
|
132
|
+
type: :development
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ">="
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: simplecov
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - "~>"
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: 0.17.0
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - "~>"
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: 0.17.0
|
97
153
|
- !ruby/object:Gem::Dependency
|
98
154
|
name: yard
|
99
155
|
requirement: !ruby/object:Gem::Requirement
|
@@ -146,11 +202,12 @@ extensions:
|
|
146
202
|
extra_rdoc_files: []
|
147
203
|
files:
|
148
204
|
- ".codeclimate.yml"
|
205
|
+
- ".github/workflows/ci.yml"
|
149
206
|
- ".gitignore"
|
150
207
|
- ".rspec"
|
151
208
|
- ".rubocop.yml"
|
152
209
|
- ".ruby-version"
|
153
|
-
- ".
|
210
|
+
- ".yardopts"
|
154
211
|
- CODE_OF_CONDUCT.md
|
155
212
|
- Gemfile
|
156
213
|
- LICENSE
|
@@ -203,7 +260,8 @@ files:
|
|
203
260
|
homepage: https://github.com/david-mccullars/text_rank
|
204
261
|
licenses:
|
205
262
|
- MIT
|
206
|
-
metadata:
|
263
|
+
metadata:
|
264
|
+
rubygems_mfa_required: 'true'
|
207
265
|
post_install_message:
|
208
266
|
rdoc_options: []
|
209
267
|
require_paths:
|
@@ -212,7 +270,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
212
270
|
requirements:
|
213
271
|
- - ">="
|
214
272
|
- !ruby/object:Gem::Version
|
215
|
-
version:
|
273
|
+
version: 3.0.0
|
216
274
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
217
275
|
requirements:
|
218
276
|
- - ">="
|
data/.travis.yml
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
env:
|
2
|
-
global:
|
3
|
-
- CC_TEST_REPORTER_ID=6ab030bf370ffc2abbf0ba4d70a1c8d9649f6fd1426f48f6d43d5c9eb15f187f
|
4
|
-
language: ruby
|
5
|
-
rvm:
|
6
|
-
- 2.5.1
|
7
|
-
before_install: gem install bundler -v 1.17.3
|
8
|
-
before_script:
|
9
|
-
- curl -L https://codeclimate.com/downloads/test-reporter/test-reporter-latest-linux-amd64 > ./cc-test-reporter
|
10
|
-
- chmod +x ./cc-test-reporter
|
11
|
-
- ./cc-test-reporter before-build
|
12
|
-
- bundle exec rake compile
|
13
|
-
script:
|
14
|
-
- bundle exec rspec
|
15
|
-
after_script:
|
16
|
-
- ./cc-test-reporter after-build --exit-code $TRAVIS_TEST_RESULT
|