keyword-ruby 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +21 -12
- data/lib/keyword_ruby/configuration.rb +2 -1
- data/lib/keyword_ruby/extractors/base.rb +1 -1
- data/lib/keyword_ruby/extractors/textrank.rb +107 -0
- data/lib/keyword_ruby/stop_words/ar.txt +29 -0
- data/lib/keyword_ruby/stop_words/de.txt +39 -0
- data/lib/keyword_ruby/stop_words/es.txt +37 -0
- data/lib/keyword_ruby/stop_words/fr.txt +44 -0
- data/lib/keyword_ruby/stop_words/ja.txt +40 -0
- data/lib/keyword_ruby/stop_words/ms.txt +29 -0
- data/lib/keyword_ruby/stop_words/nl.txt +29 -0
- data/lib/keyword_ruby/stop_words/pt.txt +36 -0
- data/lib/keyword_ruby/text_processing/stop_words.rb +15 -1
- data/lib/keyword_ruby/version.rb +1 -1
- data/lib/keyword_ruby.rb +26 -10
- metadata +10 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ae61b0d85e3b754d30502e323aac159abbe570b4b30341b8b8c1d86f0e3f65c2
|
|
4
|
+
data.tar.gz: 729dc165b59cb53568032f884ccc9eae34c80958cb5d87698bcceff0eb0f6061
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a8bf751b18fff5031917192ebc67cd30da67705d72ebc30d2b31c4e03b488223f4572f99c36261e8dd81c66491f212e9db2ce3e456e8df89efb3bb642d3e7790
|
|
7
|
+
data.tar.gz: 1a1fa33626b2946b913c93eb984dc9854cb65c65b0b208d5959e4aac9dcb255f80dfbf668b1219cafa803acb227bed075b26031af305fcf15fb263bc5637a748
|
data/README.md
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
# keyword-ruby
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Keyword extraction for Ruby using RAKE, YAKE, and TF-IDF algorithms. Extract the most relevant terms from any text.
|
|
4
4
|
|
|
5
5
|
## Installation
|
|
6
6
|
|
|
7
7
|
```ruby
|
|
8
|
-
gem "keyword-ruby"
|
|
8
|
+
gem "keyword-ruby"
|
|
9
9
|
```
|
|
10
10
|
|
|
11
11
|
## Usage
|
|
@@ -13,22 +13,31 @@ gem "keyword-ruby", "~> 0.1"
|
|
|
13
13
|
```ruby
|
|
14
14
|
require "keyword_ruby"
|
|
15
15
|
|
|
16
|
-
|
|
17
|
-
keywords = KeywordRuby.extract("Your text here...")
|
|
18
|
-
keywords.each { |kw| puts "#{kw.phrase}: #{kw.score}" }
|
|
16
|
+
text = "Ruby is a dynamic programming language focused on simplicity and productivity."
|
|
19
17
|
|
|
20
|
-
#
|
|
21
|
-
keywords = KeywordRuby.extract(text, algorithm: :
|
|
18
|
+
# RAKE (Rapid Automatic Keyword Extraction)
|
|
19
|
+
keywords = KeywordRuby.extract(text, algorithm: :rake, top_n: 5)
|
|
22
20
|
|
|
23
|
-
#
|
|
21
|
+
# YAKE (Yet Another Keyword Extractor)
|
|
22
|
+
keywords = KeywordRuby.extract(text, algorithm: :yake, top_n: 5)
|
|
23
|
+
|
|
24
|
+
# TF-IDF
|
|
24
25
|
extractor = KeywordRuby::Extractors::Tfidf.new
|
|
25
|
-
extractor.fit(
|
|
26
|
-
keywords = extractor.extract(text)
|
|
26
|
+
extractor.fit(corpus) # optional: fit on a corpus
|
|
27
|
+
keywords = extractor.extract(text, top_n: 5)
|
|
27
28
|
|
|
28
|
-
#
|
|
29
|
-
results = KeywordRuby.extract_batch(documents, algorithm: :rake, top_n: 5)
|
|
29
|
+
keywords.each { |kw| puts "#{kw.text}: #{kw.score}" }
|
|
30
30
|
```
|
|
31
31
|
|
|
32
|
+
## Features
|
|
33
|
+
|
|
34
|
+
- RAKE with proper co-occurrence degree calculation
|
|
35
|
+
- YAKE with stop word handling in multi-word phrases
|
|
36
|
+
- TF-IDF with optional corpus fitting (falls back to TF-only)
|
|
37
|
+
- Score normalization to 0.0-1.0 range
|
|
38
|
+
- English contraction expansion (don't → do not)
|
|
39
|
+
- Input validation and language support
|
|
40
|
+
|
|
32
41
|
## License
|
|
33
42
|
|
|
34
43
|
MIT
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
module KeywordRuby
|
|
4
4
|
class Configuration
|
|
5
5
|
attr_accessor :default_algorithm, :default_language, :default_top_n,
|
|
6
|
-
:max_phrase_length, :min_word_length
|
|
6
|
+
:max_phrase_length, :min_word_length, :custom_stop_words
|
|
7
7
|
|
|
8
8
|
def initialize
|
|
9
9
|
@default_algorithm = :rake
|
|
@@ -11,6 +11,7 @@ module KeywordRuby
|
|
|
11
11
|
@default_top_n = 10
|
|
12
12
|
@max_phrase_length = 4
|
|
13
13
|
@min_word_length = 2
|
|
14
|
+
@custom_stop_words = []
|
|
14
15
|
end
|
|
15
16
|
end
|
|
16
17
|
end
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
module KeywordRuby
|
|
4
4
|
module Extractors
|
|
5
5
|
class Base
|
|
6
|
-
SUPPORTED_LANGUAGES = %i[en id].freeze
|
|
6
|
+
SUPPORTED_LANGUAGES = %i[en id ms nl fr de es pt ar ja].freeze
|
|
7
7
|
|
|
8
8
|
def initialize(language: nil, top_n: nil, max_length: nil, min_word_length: nil, normalize: true)
|
|
9
9
|
config = KeywordRuby.configuration
|
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module KeywordRuby
|
|
4
|
+
module Extractors
|
|
5
|
+
class TextRank < Base
|
|
6
|
+
DEFAULT_DAMPING = 0.85
|
|
7
|
+
DEFAULT_ITERATIONS = 30
|
|
8
|
+
DEFAULT_CONVERGENCE = 0.0001
|
|
9
|
+
|
|
10
|
+
def initialize(damping: DEFAULT_DAMPING, iterations: DEFAULT_ITERATIONS, **opts)
|
|
11
|
+
super(**opts)
|
|
12
|
+
@damping = damping
|
|
13
|
+
@iterations = iterations
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def extract(text)
|
|
17
|
+
validate_text!(text)
|
|
18
|
+
return [] if text.nil? || text.strip.empty?
|
|
19
|
+
|
|
20
|
+
words = TextProcessing::Tokenizer.tokenize(text)
|
|
21
|
+
.reject { |w| stop_word?(w) || w.length < @min_word_length }
|
|
22
|
+
return [] if words.empty?
|
|
23
|
+
|
|
24
|
+
# Build co-occurrence graph (window size = 4)
|
|
25
|
+
graph = build_graph(words, window: 4)
|
|
26
|
+
return [] if graph.empty?
|
|
27
|
+
|
|
28
|
+
# Run PageRank
|
|
29
|
+
scores = pagerank(graph)
|
|
30
|
+
|
|
31
|
+
# Generate multi-word candidates
|
|
32
|
+
all_words = TextProcessing::Tokenizer.tokenize(text)
|
|
33
|
+
candidates = generate_phrases(all_words, scores)
|
|
34
|
+
|
|
35
|
+
results = candidates.sort.first(@top_n)
|
|
36
|
+
normalize_scores(results)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
private
|
|
40
|
+
|
|
41
|
+
def build_graph(words, window: 4)
|
|
42
|
+
graph = Hash.new { |h, k| h[k] = Hash.new(0.0) }
|
|
43
|
+
|
|
44
|
+
words.each_cons(window) do |group|
|
|
45
|
+
group.uniq.combination(2) do |a, b|
|
|
46
|
+
graph[a][b] += 1.0
|
|
47
|
+
graph[b][a] += 1.0
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
graph
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def pagerank(graph)
|
|
55
|
+
nodes = graph.keys
|
|
56
|
+
n = nodes.size.to_f
|
|
57
|
+
scores = nodes.map { |node| [node, 1.0 / n] }.to_h
|
|
58
|
+
|
|
59
|
+
@iterations.times do
|
|
60
|
+
new_scores = {}
|
|
61
|
+
max_diff = 0.0
|
|
62
|
+
|
|
63
|
+
nodes.each do |node|
|
|
64
|
+
rank = (1.0 - @damping) / n
|
|
65
|
+
neighbors = graph[node]
|
|
66
|
+
|
|
67
|
+
neighbors.each do |neighbor, weight|
|
|
68
|
+
out_weight = graph[neighbor].values.sum
|
|
69
|
+
rank += @damping * (scores[neighbor] || 0.0) * weight / out_weight if out_weight > 0
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
new_scores[node] = rank
|
|
73
|
+
max_diff = [max_diff, (rank - (scores[node] || 0.0)).abs].max
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
scores = new_scores
|
|
77
|
+
break if max_diff < DEFAULT_CONVERGENCE
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
scores
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
def generate_phrases(words, word_scores)
|
|
84
|
+
phrases = {}
|
|
85
|
+
|
|
86
|
+
# Single words
|
|
87
|
+
word_scores.each do |word, score|
|
|
88
|
+
phrases[word] = score
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Multi-word phrases (2-4 words)
|
|
92
|
+
(2..@max_length).each do |n|
|
|
93
|
+
words.each_cons(n) do |gram|
|
|
94
|
+
next if stop_word?(gram.first) || stop_word?(gram.last)
|
|
95
|
+
next if gram.first.length < @min_word_length || gram.last.length < @min_word_length
|
|
96
|
+
|
|
97
|
+
phrase = gram.join(" ")
|
|
98
|
+
score = gram.sum { |w| word_scores[w] || 0.0 }
|
|
99
|
+
phrases[phrase] = [phrases[phrase] || 0.0, score].max
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
phrases.map { |phrase, score| Keyword.new(phrase: phrase, score: score) }
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
der
|
|
2
|
+
die
|
|
3
|
+
das
|
|
4
|
+
und
|
|
5
|
+
ist
|
|
6
|
+
ein
|
|
7
|
+
eine
|
|
8
|
+
in
|
|
9
|
+
den
|
|
10
|
+
von
|
|
11
|
+
zu
|
|
12
|
+
mit
|
|
13
|
+
auf
|
|
14
|
+
für
|
|
15
|
+
nicht
|
|
16
|
+
sich
|
|
17
|
+
des
|
|
18
|
+
dem
|
|
19
|
+
als
|
|
20
|
+
auch
|
|
21
|
+
es
|
|
22
|
+
an
|
|
23
|
+
er
|
|
24
|
+
so
|
|
25
|
+
dass
|
|
26
|
+
aus
|
|
27
|
+
bei
|
|
28
|
+
nach
|
|
29
|
+
wie
|
|
30
|
+
über
|
|
31
|
+
hat
|
|
32
|
+
oder
|
|
33
|
+
noch
|
|
34
|
+
aber
|
|
35
|
+
um
|
|
36
|
+
wenn
|
|
37
|
+
kann
|
|
38
|
+
nur
|
|
39
|
+
werden
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
el
|
|
2
|
+
la
|
|
3
|
+
los
|
|
4
|
+
las
|
|
5
|
+
de
|
|
6
|
+
del
|
|
7
|
+
en
|
|
8
|
+
un
|
|
9
|
+
una
|
|
10
|
+
que
|
|
11
|
+
es
|
|
12
|
+
por
|
|
13
|
+
con
|
|
14
|
+
no
|
|
15
|
+
para
|
|
16
|
+
se
|
|
17
|
+
al
|
|
18
|
+
lo
|
|
19
|
+
su
|
|
20
|
+
como
|
|
21
|
+
más
|
|
22
|
+
pero
|
|
23
|
+
sus
|
|
24
|
+
le
|
|
25
|
+
ya
|
|
26
|
+
fue
|
|
27
|
+
este
|
|
28
|
+
ha
|
|
29
|
+
sí
|
|
30
|
+
porque
|
|
31
|
+
esta
|
|
32
|
+
entre
|
|
33
|
+
cuando
|
|
34
|
+
muy
|
|
35
|
+
sin
|
|
36
|
+
sobre
|
|
37
|
+
también
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
le
|
|
2
|
+
la
|
|
3
|
+
les
|
|
4
|
+
de
|
|
5
|
+
des
|
|
6
|
+
du
|
|
7
|
+
un
|
|
8
|
+
une
|
|
9
|
+
et
|
|
10
|
+
est
|
|
11
|
+
en
|
|
12
|
+
que
|
|
13
|
+
qui
|
|
14
|
+
dans
|
|
15
|
+
ce
|
|
16
|
+
il
|
|
17
|
+
ne
|
|
18
|
+
sur
|
|
19
|
+
se
|
|
20
|
+
pas
|
|
21
|
+
plus
|
|
22
|
+
par
|
|
23
|
+
je
|
|
24
|
+
avec
|
|
25
|
+
tout
|
|
26
|
+
faire
|
|
27
|
+
son
|
|
28
|
+
au
|
|
29
|
+
mais
|
|
30
|
+
nous
|
|
31
|
+
ont
|
|
32
|
+
cette
|
|
33
|
+
ou
|
|
34
|
+
été
|
|
35
|
+
aussi
|
|
36
|
+
leur
|
|
37
|
+
bien
|
|
38
|
+
peut
|
|
39
|
+
même
|
|
40
|
+
ces
|
|
41
|
+
quand
|
|
42
|
+
entre
|
|
43
|
+
notre
|
|
44
|
+
après
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
の
|
|
2
|
+
に
|
|
3
|
+
は
|
|
4
|
+
を
|
|
5
|
+
た
|
|
6
|
+
が
|
|
7
|
+
で
|
|
8
|
+
て
|
|
9
|
+
と
|
|
10
|
+
し
|
|
11
|
+
れ
|
|
12
|
+
さ
|
|
13
|
+
ある
|
|
14
|
+
いる
|
|
15
|
+
も
|
|
16
|
+
する
|
|
17
|
+
から
|
|
18
|
+
な
|
|
19
|
+
こと
|
|
20
|
+
として
|
|
21
|
+
い
|
|
22
|
+
や
|
|
23
|
+
れる
|
|
24
|
+
など
|
|
25
|
+
なっ
|
|
26
|
+
ない
|
|
27
|
+
この
|
|
28
|
+
ため
|
|
29
|
+
その
|
|
30
|
+
あっ
|
|
31
|
+
よう
|
|
32
|
+
また
|
|
33
|
+
もの
|
|
34
|
+
という
|
|
35
|
+
あり
|
|
36
|
+
まで
|
|
37
|
+
られ
|
|
38
|
+
なる
|
|
39
|
+
へ
|
|
40
|
+
か
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
yang
|
|
2
|
+
dan
|
|
3
|
+
di
|
|
4
|
+
ini
|
|
5
|
+
itu
|
|
6
|
+
dengan
|
|
7
|
+
untuk
|
|
8
|
+
dari
|
|
9
|
+
adalah
|
|
10
|
+
pada
|
|
11
|
+
tidak
|
|
12
|
+
dalam
|
|
13
|
+
akan
|
|
14
|
+
telah
|
|
15
|
+
ke
|
|
16
|
+
oleh
|
|
17
|
+
ada
|
|
18
|
+
juga
|
|
19
|
+
saya
|
|
20
|
+
mereka
|
|
21
|
+
sudah
|
|
22
|
+
boleh
|
|
23
|
+
kami
|
|
24
|
+
kita
|
|
25
|
+
semua
|
|
26
|
+
antara
|
|
27
|
+
lebih
|
|
28
|
+
atas
|
|
29
|
+
bagi
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
de
|
|
2
|
+
que
|
|
3
|
+
não
|
|
4
|
+
do
|
|
5
|
+
da
|
|
6
|
+
em
|
|
7
|
+
um
|
|
8
|
+
para
|
|
9
|
+
com
|
|
10
|
+
uma
|
|
11
|
+
os
|
|
12
|
+
no
|
|
13
|
+
se
|
|
14
|
+
na
|
|
15
|
+
por
|
|
16
|
+
mais
|
|
17
|
+
as
|
|
18
|
+
dos
|
|
19
|
+
como
|
|
20
|
+
mas
|
|
21
|
+
ao
|
|
22
|
+
ele
|
|
23
|
+
das
|
|
24
|
+
tem
|
|
25
|
+
seu
|
|
26
|
+
sua
|
|
27
|
+
ou
|
|
28
|
+
quando
|
|
29
|
+
muito
|
|
30
|
+
nos
|
|
31
|
+
já
|
|
32
|
+
também
|
|
33
|
+
só
|
|
34
|
+
pelo
|
|
35
|
+
pela
|
|
36
|
+
até
|
|
@@ -5,9 +5,11 @@ module KeywordRuby
|
|
|
5
5
|
class StopWords
|
|
6
6
|
STOP_WORDS_DIR = File.join(__dir__, "..", "stop_words")
|
|
7
7
|
|
|
8
|
-
def initialize(language: :en)
|
|
8
|
+
def initialize(language: :en, strict: false)
|
|
9
9
|
@language = language
|
|
10
|
+
@strict = strict
|
|
10
11
|
@words = load_stop_words
|
|
12
|
+
add_custom_stop_words
|
|
11
13
|
end
|
|
12
14
|
|
|
13
15
|
def stop_word?(word)
|
|
@@ -18,14 +20,26 @@ module KeywordRuby
|
|
|
18
20
|
words.reject { |w| stop_word?(w) }
|
|
19
21
|
end
|
|
20
22
|
|
|
23
|
+
def add(words)
|
|
24
|
+
words.each { |w| @words.add(w.downcase) }
|
|
25
|
+
end
|
|
26
|
+
|
|
21
27
|
private
|
|
22
28
|
|
|
23
29
|
def load_stop_words
|
|
24
30
|
path = File.join(STOP_WORDS_DIR, "#{@language}.txt")
|
|
31
|
+
if !File.exist?(path) && @strict
|
|
32
|
+
raise KeywordRuby::Error, "Stop word file not found for language: #{@language}"
|
|
33
|
+
end
|
|
25
34
|
return Set.new unless File.exist?(path)
|
|
26
35
|
|
|
27
36
|
Set.new(File.readlines(path, chomp: true).map(&:downcase).reject(&:empty?))
|
|
28
37
|
end
|
|
38
|
+
|
|
39
|
+
def add_custom_stop_words
|
|
40
|
+
custom = KeywordRuby.configuration.custom_stop_words
|
|
41
|
+
custom.each { |w| @words.add(w.downcase) } if custom
|
|
42
|
+
end
|
|
29
43
|
end
|
|
30
44
|
end
|
|
31
45
|
end
|
data/lib/keyword_ruby/version.rb
CHANGED
data/lib/keyword_ruby.rb
CHANGED
|
@@ -5,15 +5,17 @@ require_relative "keyword_ruby/configuration"
|
|
|
5
5
|
require_relative "keyword_ruby/keyword"
|
|
6
6
|
require_relative "keyword_ruby/text_processing/stop_words"
|
|
7
7
|
require_relative "keyword_ruby/text_processing/tokenizer"
|
|
8
|
-
require_relative "keyword_ruby/text_processing/sentence_splitter"
|
|
9
8
|
require_relative "keyword_ruby/extractors/base"
|
|
10
9
|
require_relative "keyword_ruby/extractors/rake"
|
|
11
10
|
require_relative "keyword_ruby/extractors/yake"
|
|
12
11
|
require_relative "keyword_ruby/extractors/tfidf"
|
|
12
|
+
require_relative "keyword_ruby/extractors/textrank"
|
|
13
13
|
|
|
14
14
|
module KeywordRuby
|
|
15
15
|
class Error < StandardError; end
|
|
16
16
|
|
|
17
|
+
ALGORITHMS = %i[rake yake tfidf textrank].freeze
|
|
18
|
+
|
|
17
19
|
class << self
|
|
18
20
|
def configuration
|
|
19
21
|
@configuration ||= Configuration.new
|
|
@@ -29,19 +31,33 @@ module KeywordRuby
|
|
|
29
31
|
|
|
30
32
|
def extract(text, algorithm: nil, language: nil, top_n: nil, normalize: true)
|
|
31
33
|
algo = algorithm || configuration.default_algorithm
|
|
34
|
+
build_extractor(algo, language: language, top_n: top_n, normalize: normalize).extract(text)
|
|
35
|
+
end
|
|
32
36
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
when :yake then Extractors::Yake.new(language: language, top_n: top_n, normalize: normalize)
|
|
36
|
-
when :tfidf then Extractors::Tfidf.new(language: language, top_n: top_n, normalize: normalize)
|
|
37
|
-
else raise ArgumentError, "Unknown algorithm: #{algo}. Supported: :rake, :yake, :tfidf"
|
|
38
|
-
end
|
|
37
|
+
def extract_batch(documents, algorithm: nil, language: nil, top_n: nil, normalize: true)
|
|
38
|
+
algo = algorithm || configuration.default_algorithm
|
|
39
39
|
|
|
40
|
-
|
|
40
|
+
if algo == :tfidf
|
|
41
|
+
# TF-IDF benefits from shared corpus state
|
|
42
|
+
extractor = build_extractor(algo, language: language, top_n: top_n, normalize: normalize)
|
|
43
|
+
extractor.fit(documents)
|
|
44
|
+
documents.map { |doc| extractor.extract(doc) }
|
|
45
|
+
else
|
|
46
|
+
extractor = build_extractor(algo, language: language, top_n: top_n, normalize: normalize)
|
|
47
|
+
documents.map { |doc| extractor.extract(doc) }
|
|
48
|
+
end
|
|
41
49
|
end
|
|
42
50
|
|
|
43
|
-
|
|
44
|
-
|
|
51
|
+
private
|
|
52
|
+
|
|
53
|
+
def build_extractor(algo, **opts)
|
|
54
|
+
case algo
|
|
55
|
+
when :rake then Extractors::Rake.new(**opts)
|
|
56
|
+
when :yake then Extractors::Yake.new(**opts)
|
|
57
|
+
when :tfidf then Extractors::Tfidf.new(**opts)
|
|
58
|
+
when :textrank then Extractors::TextRank.new(**opts)
|
|
59
|
+
else raise ArgumentError, "Unknown algorithm: #{algo}. Supported: #{ALGORITHMS.join(', ')}"
|
|
60
|
+
end
|
|
45
61
|
end
|
|
46
62
|
end
|
|
47
63
|
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: keyword-ruby
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Johannes Dwi Cahyo
|
|
@@ -54,11 +54,20 @@ files:
|
|
|
54
54
|
- lib/keyword_ruby/configuration.rb
|
|
55
55
|
- lib/keyword_ruby/extractors/base.rb
|
|
56
56
|
- lib/keyword_ruby/extractors/rake.rb
|
|
57
|
+
- lib/keyword_ruby/extractors/textrank.rb
|
|
57
58
|
- lib/keyword_ruby/extractors/tfidf.rb
|
|
58
59
|
- lib/keyword_ruby/extractors/yake.rb
|
|
59
60
|
- lib/keyword_ruby/keyword.rb
|
|
61
|
+
- lib/keyword_ruby/stop_words/ar.txt
|
|
62
|
+
- lib/keyword_ruby/stop_words/de.txt
|
|
60
63
|
- lib/keyword_ruby/stop_words/en.txt
|
|
64
|
+
- lib/keyword_ruby/stop_words/es.txt
|
|
65
|
+
- lib/keyword_ruby/stop_words/fr.txt
|
|
61
66
|
- lib/keyword_ruby/stop_words/id.txt
|
|
67
|
+
- lib/keyword_ruby/stop_words/ja.txt
|
|
68
|
+
- lib/keyword_ruby/stop_words/ms.txt
|
|
69
|
+
- lib/keyword_ruby/stop_words/nl.txt
|
|
70
|
+
- lib/keyword_ruby/stop_words/pt.txt
|
|
62
71
|
- lib/keyword_ruby/text_processing/sentence_splitter.rb
|
|
63
72
|
- lib/keyword_ruby/text_processing/stop_words.rb
|
|
64
73
|
- lib/keyword_ruby/text_processing/tokenizer.rb
|