text_nlp 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +2 -0
- data/Gemfile.lock +6 -0
- data/lib/text_nlp.rb +1 -3
- data/lib/text_nlp/expressions.rb +2 -2
- data/lib/text_nlp/normalizer.rb +1 -1
- data/lib/text_nlp/pattern.rb +14 -109
- data/lib/text_nlp/stop_list.rb +44 -0
- data/lib/text_nlp/stoplists/min_fr.txt +43 -0
- data/lib/text_nlp/string.rb +13 -5
- data/lib/text_nlp/synonyms.rb +5 -4
- data/lib/text_nlp/tokenizer.rb +1 -1
- data/spec/min_en.txt +2 -0
- data/spec/min_fr.txt +3 -0
- data/spec/pattern_spec.rb +25 -5
- data/spec/stop_list_spec.rb +34 -0
- data/spec/stop_list_toto.txt +2 -0
- data/spec/stop_list_tutu.txt +2 -0
- data/spec/string_spec.rb +23 -4
- data/spec/synonyms_spec.rb +10 -8
- data/text_nlp.gemspec +7 -2
- metadata +43 -3
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -2,6 +2,7 @@ GEM
|
|
2
2
|
remote: http://rubygems.org/
|
3
3
|
specs:
|
4
4
|
diff-lcs (1.1.2)
|
5
|
+
polyglot (0.3.1)
|
5
6
|
rspec (2.6.0)
|
6
7
|
rspec-core (~> 2.6.0)
|
7
8
|
rspec-expectations (~> 2.6.0)
|
@@ -10,9 +11,14 @@ GEM
|
|
10
11
|
rspec-expectations (2.6.0)
|
11
12
|
diff-lcs (~> 1.1.2)
|
12
13
|
rspec-mocks (2.6.0)
|
14
|
+
textquery (0.1.8)
|
15
|
+
treetop
|
16
|
+
treetop (1.4.9)
|
17
|
+
polyglot (>= 0.3.1)
|
13
18
|
|
14
19
|
PLATFORMS
|
15
20
|
ruby
|
16
21
|
|
17
22
|
DEPENDENCIES
|
18
23
|
rspec
|
24
|
+
textquery
|
data/lib/text_nlp.rb
CHANGED
data/lib/text_nlp/expressions.rb
CHANGED
@@ -12,7 +12,7 @@ class TextNlp
|
|
12
12
|
|
13
13
|
def <<(expression)
|
14
14
|
node = @root
|
15
|
-
expression
|
15
|
+
expression.normalize!
|
16
16
|
@values << expression
|
17
17
|
tokens = expression.tokenize
|
18
18
|
tokens_count = tokens.size
|
@@ -40,7 +40,7 @@ class TextNlp
|
|
40
40
|
end
|
41
41
|
|
42
42
|
def find(text)
|
43
|
-
find_expressions(0,text.normalize.tokenize
|
43
|
+
find_expressions(0,text.normalize.tokenize)
|
44
44
|
end
|
45
45
|
|
46
46
|
private
|
data/lib/text_nlp/normalizer.rb
CHANGED
data/lib/text_nlp/pattern.rb
CHANGED
@@ -1,122 +1,27 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
+
require 'textquery'
|
2
3
|
|
3
4
|
class TextNlp
|
4
5
|
class Pattern
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
@
|
11
|
-
else
|
12
|
-
@root = root_or_string
|
6
|
+
|
7
|
+
def initialize(pattern, options = {})
|
8
|
+
options = {:normalize => true}.merge(options)
|
9
|
+
if options[:normalize]
|
10
|
+
normalize_pattern(pattern)
|
11
|
+
@to_normalize = true
|
13
12
|
end
|
13
|
+
@text_query = TextQuery.new(pattern, {:ignorecase => options[:normalize]})
|
14
14
|
end
|
15
|
-
|
16
|
-
def <<(node)
|
17
|
-
@root << node
|
18
|
-
end
|
19
|
-
|
15
|
+
|
20
16
|
def match?(text)
|
21
|
-
@
|
17
|
+
text.normalize! if @to_normalize
|
18
|
+
@text_query.match?(text)
|
22
19
|
end
|
23
20
|
|
24
21
|
private
|
25
|
-
def
|
26
|
-
|
27
|
-
|
28
|
-
expr.chars.each_with_index do |char,i|
|
29
|
-
if (char == '(')
|
30
|
-
opened += 1
|
31
|
-
current_expression << char if ((opened - closed) > 1)
|
32
|
-
elsif (char == ')')
|
33
|
-
closed += 1
|
34
|
-
current_expression << char if ((opened - closed) > 0)
|
35
|
-
elsif ((opened == closed) && (operators.include?(expr[i-1..i])))
|
36
|
-
node = operator_node(expr[i-1..i])
|
37
|
-
node << parse(current_expression[0..-2])
|
38
|
-
node << parse(expr[i+1..-1])
|
39
|
-
break;
|
40
|
-
else
|
41
|
-
current_expression << char
|
42
|
-
end
|
43
|
-
end
|
44
|
-
unless node
|
45
|
-
if (current_expression.match(/\|{2}|&{2}/))
|
46
|
-
node = parse(current_expression)
|
47
|
-
else
|
48
|
-
node = current_expression[0..0] == '!' ? Not.new(current_expression[1..-1]) : Unary.new(current_expression)
|
49
|
-
end
|
50
|
-
end
|
51
|
-
node
|
52
|
-
end
|
53
|
-
|
54
|
-
def operator_node(operator)
|
55
|
-
node = case operator
|
56
|
-
when '||' then Or.new
|
57
|
-
when '&&' then And.new
|
58
|
-
end
|
59
|
-
node
|
60
|
-
end
|
61
|
-
|
62
|
-
class Composite
|
63
|
-
attr_reader :nodes
|
64
|
-
|
65
|
-
def initialize(*nodes)
|
66
|
-
@nodes = nodes || []
|
67
|
-
end
|
68
|
-
|
69
|
-
def <<(node)
|
70
|
-
@nodes << node
|
71
|
-
end
|
72
|
-
|
73
|
-
def values
|
74
|
-
@nodes.map { |node| node.values }.flatten
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
class And < Composite
|
79
|
-
def evaluate(expr)
|
80
|
-
@nodes.each do |node|
|
81
|
-
return false unless node.evaluate(expr)
|
82
|
-
end
|
83
|
-
return true
|
84
|
-
end
|
85
|
-
end
|
86
|
-
|
87
|
-
class Or < Composite
|
88
|
-
def evaluate(expr)
|
89
|
-
@nodes.each do |node|
|
90
|
-
return true if node.evaluate(expr)
|
91
|
-
end
|
92
|
-
return false
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
class Unary
|
97
|
-
attr_reader :value
|
98
|
-
|
99
|
-
def initialize(value)
|
100
|
-
@value = value
|
101
|
-
@expressions = Expressions.new([@value])
|
102
|
-
end
|
103
|
-
|
104
|
-
def evaluate(expr)
|
105
|
-
@expressions.any?(expr)
|
106
|
-
end
|
107
|
-
|
108
|
-
def values
|
109
|
-
[value]
|
110
|
-
end
|
111
|
-
end
|
112
|
-
|
113
|
-
class Not < Unary
|
114
|
-
def evaluate(expr)
|
115
|
-
!super(expr)
|
116
|
-
end
|
117
|
-
def values
|
118
|
-
[]
|
119
|
-
end
|
22
|
+
def normalize_pattern(pattern)
|
23
|
+
pattern.tr!("éèàçîêô","eeacieo")
|
24
|
+
pattern.tr!("!,;?./\\_|[]{}<>:*$%"," ")
|
120
25
|
end
|
121
26
|
|
122
27
|
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
class TextNlp
|
4
|
+
class StopList
|
5
|
+
|
6
|
+
class << self
|
7
|
+
attr_accessor :directory
|
8
|
+
StopList.directory = File.join(File.dirname(__FILE__),'stoplists')
|
9
|
+
end
|
10
|
+
|
11
|
+
def initialize(options = {})
|
12
|
+
@cache = {}
|
13
|
+
options = {:expressions => []}.merge(options)
|
14
|
+
expressions = options[:expressions]
|
15
|
+
if (options.key?(:name))
|
16
|
+
File.foreach(File.join(StopList.directory,"#{options[:name]}.txt")) { |e| expressions << e }
|
17
|
+
end
|
18
|
+
if (options.key?(:names))
|
19
|
+
options[:names].each do |name|
|
20
|
+
File.foreach(File.join(StopList.directory,"#{name}.txt")) { |e| expressions << e }
|
21
|
+
end
|
22
|
+
end
|
23
|
+
if (options.key?(:file))
|
24
|
+
File.foreach(options[:file]) { |e| expressions << e }
|
25
|
+
end
|
26
|
+
if (options.key?(:files))
|
27
|
+
options[:files].each do |file|
|
28
|
+
File.foreach(file) { |e| expressions << e }
|
29
|
+
end
|
30
|
+
end
|
31
|
+
expressions.each { |e| @cache[e.normalize] = true }
|
32
|
+
@expressions = TextNlp::Expressions.new(expressions)
|
33
|
+
end
|
34
|
+
|
35
|
+
def transform(text)
|
36
|
+
@expressions.expressionize(text).map { |expr| @cache.key?(expr) ? nil : expr }.compact.join(' ')
|
37
|
+
end
|
38
|
+
|
39
|
+
def size
|
40
|
+
@expressions.values.size
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
a
|
2
|
+
au
|
3
|
+
aussi
|
4
|
+
aux
|
5
|
+
avec
|
6
|
+
c
|
7
|
+
ce
|
8
|
+
cette
|
9
|
+
contre
|
10
|
+
d
|
11
|
+
dans
|
12
|
+
de
|
13
|
+
des
|
14
|
+
du
|
15
|
+
en
|
16
|
+
et
|
17
|
+
j
|
18
|
+
l
|
19
|
+
la
|
20
|
+
le
|
21
|
+
les
|
22
|
+
mais
|
23
|
+
n
|
24
|
+
ou
|
25
|
+
par
|
26
|
+
pas
|
27
|
+
pour
|
28
|
+
qu
|
29
|
+
que
|
30
|
+
quel
|
31
|
+
quelle
|
32
|
+
quelles
|
33
|
+
quels
|
34
|
+
qui
|
35
|
+
sa
|
36
|
+
sans
|
37
|
+
ses
|
38
|
+
son
|
39
|
+
sous
|
40
|
+
sur
|
41
|
+
un
|
42
|
+
une
|
43
|
+
y
|
data/lib/text_nlp/string.rb
CHANGED
@@ -18,23 +18,31 @@ class String
|
|
18
18
|
self
|
19
19
|
end
|
20
20
|
|
21
|
+
def normalize!
|
22
|
+
unless normalized()
|
23
|
+
replace(self.normalize)
|
24
|
+
self.normalized = true
|
25
|
+
end
|
26
|
+
self
|
27
|
+
end
|
28
|
+
|
21
29
|
def tokenize
|
22
30
|
(String.tokenizer || TextNlp::Tokenizer.new).tokenize(self)
|
23
31
|
end
|
24
32
|
|
25
33
|
def similarity(text)
|
26
34
|
score = 0.0
|
27
|
-
tokens1 = self.normalize.tokenize
|
28
|
-
tokens2 = text.normalize.tokenize
|
35
|
+
tokens1, tokens2 = self.normalize.tokenize, text.normalize.tokenize
|
29
36
|
if (tokens1.size > 0 && tokens2.size > 0)
|
30
37
|
intersection = tokens1 & tokens2
|
31
|
-
score = (((intersection.size.to_f / tokens1.size
|
38
|
+
score = (((intersection.size.to_f / tokens1.size) + (intersection.size.to_f / tokens2.size)) / 2)
|
32
39
|
end
|
33
40
|
score
|
34
41
|
end
|
35
42
|
|
36
|
-
def
|
37
|
-
|
43
|
+
def transform(*transformers)
|
44
|
+
transformers = [transformers] unless transformers.respond_to?(:each)
|
45
|
+
transformers.flatten.inject(self) { |text,transformer| transformer.transform(text) }
|
38
46
|
end
|
39
47
|
|
40
48
|
end
|
data/lib/text_nlp/synonyms.rb
CHANGED
@@ -13,15 +13,16 @@ class TextNlp
|
|
13
13
|
end
|
14
14
|
|
15
15
|
def register(name,synonyms)
|
16
|
-
|
16
|
+
name.normalize!
|
17
17
|
synonyms.each do |synonym|
|
18
|
+
synonym.normalize!
|
18
19
|
@expressions << synonym
|
19
|
-
@synonyms[synonym
|
20
|
+
@synonyms[synonym] = name
|
20
21
|
end
|
21
22
|
end
|
22
23
|
|
23
|
-
def
|
24
|
-
@expressions.expressionize(text).map { |expr| @synonyms.key?(expr) ? @synonyms[expr] : expr }.join(' ')
|
24
|
+
def transform(text)
|
25
|
+
@expressions.expressionize(text).map { |expr| @synonyms.key?(expr) ? @synonyms[expr] : expr }.compact.join(' ')
|
25
26
|
end
|
26
27
|
|
27
28
|
end
|
data/lib/text_nlp/tokenizer.rb
CHANGED
data/spec/min_en.txt
ADDED
data/spec/min_fr.txt
ADDED
data/spec/pattern_spec.rb
CHANGED
@@ -3,11 +3,31 @@ require "spec_helper"
|
|
3
3
|
|
4
4
|
describe TextNlp::Pattern do
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
6
|
+
context "with normalize option" do
|
7
|
+
|
8
|
+
it "should match or not the pattern" do
|
9
|
+
pattern = TextNlp::Pattern.new("(BD OR 'bande dessinée') AND -samsung")
|
10
|
+
pattern.match?("cette BD est super").should be_true
|
11
|
+
pattern.match?("cette bd est illisible sur samsung NTC").should be_false
|
12
|
+
pattern.match?("cette bande dessinee est illisible sur samsung NTC").should be_false
|
13
|
+
pattern.match?("cette bande dessinee est illisible").should be_true
|
14
|
+
pattern = TextNlp::Pattern.new("'toulouse fc' OR ((toulouse OR tfc) AND (foot OR football OR 'ligue 1' OR 'ligue 2' OR l1 OR l2))")
|
15
|
+
pattern.match?("toulouse est une belle ville").should be_false
|
16
|
+
end
|
17
|
+
|
11
18
|
end
|
12
19
|
|
20
|
+
context "with no normalized option" do
|
21
|
+
|
22
|
+
it "should match or not the pattern" do
|
23
|
+
pattern = TextNlp::Pattern.new("(BD OR 'bande dessinée') AND -samsung", :normalize => false)
|
24
|
+
pattern.match?("cette BD est super").should be_true
|
25
|
+
pattern.match?("cette bd est super").should be_false
|
26
|
+
pattern.match?("cette bande dessinee est illisible").should be_false
|
27
|
+
pattern.match?("cette bande dessinée est illisible").should be_true
|
28
|
+
pattern.match?("cette bande dessinée est illisible sur samsung").should be_false
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
|
13
33
|
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "spec_helper"
|
3
|
+
|
4
|
+
describe TextNlp::StopList do
|
5
|
+
|
6
|
+
it "should remove the words/expressions defined by the stop list" do
|
7
|
+
|
8
|
+
TextNlp::StopList.directory = File.dirname(__FILE__)
|
9
|
+
|
10
|
+
stop_list = TextNlp::StopList.new(:expressions => ['il','a','ecrit par toto'])
|
11
|
+
stop_list.size.should eq 3
|
12
|
+
stop_list.transform("bordel Il fait chaud ici").should eq 'bordel fait chaud ici'
|
13
|
+
stop_list.transform("bordel Il fait chaud ici ecrit par toto").should eq 'bordel fait chaud ici'
|
14
|
+
stop_list.transform("bordel Il fait chaud ici ecrit par titi").should eq 'bordel fait chaud ici ecrit par titi'
|
15
|
+
|
16
|
+
stop_list = TextNlp::StopList.new(:expressions => ['il','a','ecrit par toto'], :file => File.join(File.dirname(__FILE__),"stop_list_toto.txt"))
|
17
|
+
stop_list.size.should eq 5
|
18
|
+
stop_list.transform("bordel Il fait chaud ici").should eq 'fait chaud ici'
|
19
|
+
stop_list.transform("bordel Il fait chaud ici ecrit par toto").should eq 'fait chaud ici'
|
20
|
+
stop_list.transform("bordel Il fait chaud ici ecrit par titi").should eq 'fait chaud ici ecrit par titi'
|
21
|
+
|
22
|
+
stop_list = TextNlp::StopList.new(:name => "min_fr")
|
23
|
+
stop_list.size.should eq 3
|
24
|
+
stop_list.transform("le ballon de zizou").should eq 'ballon zizou'
|
25
|
+
|
26
|
+
stop_list = TextNlp::StopList.new(:names => ["min_fr","min_en"])
|
27
|
+
stop_list.size.should eq 5
|
28
|
+
|
29
|
+
stop_list = TextNlp::StopList.new(
|
30
|
+
:files => [File.join(File.dirname(__FILE__),"stop_list_toto.txt"),File.join(File.dirname(__FILE__),"stop_list_tutu.txt")])
|
31
|
+
stop_list.size.should eq 4
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
data/spec/string_spec.rb
CHANGED
@@ -17,6 +17,16 @@ describe String do
|
|
17
17
|
text.normalize.should eq "TOTO".downcase
|
18
18
|
end
|
19
19
|
|
20
|
+
it "should normalize the receiver string" do
|
21
|
+
text = "TOTO"
|
22
|
+
normalizer = double()
|
23
|
+
String.normalizer = normalizer
|
24
|
+
normalizer.stub(:normalize) { |txt| txt.downcase }
|
25
|
+
text.normalize!
|
26
|
+
text.should eq "TOTO".downcase
|
27
|
+
text.normalized.should be_true
|
28
|
+
end
|
29
|
+
|
20
30
|
it "should call tokenizer" do
|
21
31
|
text = "TOTO"
|
22
32
|
tokenizer = double()
|
@@ -25,11 +35,20 @@ describe String do
|
|
25
35
|
text.tokenize
|
26
36
|
end
|
27
37
|
|
28
|
-
it "should call translator" do
|
38
|
+
it "should call translator / translators" do
|
29
39
|
text = "TOTO"
|
30
|
-
|
31
|
-
|
32
|
-
text.
|
40
|
+
transformer1 = double()
|
41
|
+
transformer1.should_receive(:transform).with(text)
|
42
|
+
text.transform(transformer1)
|
43
|
+
transformer1 = double()
|
44
|
+
transformer1.stub(:transform) { |text| text.tr("T","U") }
|
45
|
+
transformer2 = double()
|
46
|
+
transformer2.stub(:transform) { |text| text.tr("O","A") }
|
47
|
+
transformer1.should_receive(:transform).with("TOTO")
|
48
|
+
transformer2.should_receive(:transform).with("UOUO")
|
49
|
+
text = text.transform(transformer1,transformer2)
|
50
|
+
text.should eq "UAUA"
|
51
|
+
text.transform([transformer1,transformer2])
|
33
52
|
end
|
34
53
|
|
35
54
|
it "should compute similarity" do
|
data/spec/synonyms_spec.rb
CHANGED
@@ -6,18 +6,20 @@ describe TextNlp::Synonyms do
|
|
6
6
|
it "should synonymize the text" do
|
7
7
|
synonyms = TextNlp::Synonyms.new
|
8
8
|
synonyms.register("CAEN",["smc","sm caen","stade malherbe de caen"])
|
9
|
-
synonyms.
|
10
|
-
synonyms.
|
11
|
-
synonyms.
|
12
|
-
synonyms.
|
9
|
+
synonyms.transform("le smc c est de la bombe").should eq "le caen c est de la bombe"
|
10
|
+
synonyms.transform("le truc c est de la bombe").should eq "le truc c est de la bombe"
|
11
|
+
synonyms.transform("le sm caen c est de la bombe").should eq "le caen c est de la bombe"
|
12
|
+
synonyms.transform("le stade malherbe de caen c est de la bombe").should eq "le caen c est de la bombe"
|
13
13
|
end
|
14
14
|
|
15
15
|
it "should synonymize the text" do
|
16
16
|
synonyms = TextNlp::Synonyms.new([["CAEN","smc","sm caen","stade malherbe de caen"],["marseille","om"]])
|
17
|
-
synonyms.
|
18
|
-
synonyms.
|
19
|
-
synonyms.
|
20
|
-
synonyms.
|
17
|
+
synonyms.transform("le smc c est de la bombe").should eq "le caen c est de la bombe"
|
18
|
+
synonyms.transform("le truc c est de la bombe").should eq "le truc c est de la bombe"
|
19
|
+
synonyms.transform("le sm caen c est de la bombe").should eq "le caen c est de la bombe"
|
20
|
+
synonyms.transform("le stade malherbe de caen c est de la bombe").should eq "le caen c est de la bombe"
|
21
|
+
synonyms.transform("le caen c est de la bombe").should eq "le caen c est de la bombe"
|
22
|
+
synonyms.transform("le om c est de la bombe").should eq "le marseille c est de la bombe"
|
21
23
|
end
|
22
24
|
|
23
25
|
end
|
data/text_nlp.gemspec
CHANGED
@@ -1,9 +1,14 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'text_nlp'
|
3
|
-
s.version = '0.0.
|
4
|
-
s.date = '2011-07-
|
3
|
+
s.version = '0.0.3'
|
4
|
+
s.date = '2011-07-07'
|
5
5
|
s.summary = "A minimalist NLP library"
|
6
6
|
s.description = s.summary
|
7
|
+
|
8
|
+
s.add_dependency "textquery"
|
9
|
+
s.add_development_dependency "rspec"
|
10
|
+
s.add_development_dependency "rake"
|
11
|
+
|
7
12
|
s.authors = ["fonzo14"]
|
8
13
|
s.require_paths = ["lib"]
|
9
14
|
s.files = `git ls-files`.split("\n")
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text_nlp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,8 +9,41 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-07-
|
13
|
-
dependencies:
|
12
|
+
date: 2011-07-07 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: textquery
|
16
|
+
requirement: &86270380 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *86270380
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: rspec
|
27
|
+
requirement: &86270160 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :development
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *86270160
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: rake
|
38
|
+
requirement: &86269950 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
type: :development
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *86269950
|
14
47
|
description: A minimalist NLP library
|
15
48
|
email:
|
16
49
|
executables: []
|
@@ -29,13 +62,20 @@ files:
|
|
29
62
|
- lib/text_nlp/expressions.rb
|
30
63
|
- lib/text_nlp/normalizer.rb
|
31
64
|
- lib/text_nlp/pattern.rb
|
65
|
+
- lib/text_nlp/stop_list.rb
|
66
|
+
- lib/text_nlp/stoplists/min_fr.txt
|
32
67
|
- lib/text_nlp/string.rb
|
33
68
|
- lib/text_nlp/synonyms.rb
|
34
69
|
- lib/text_nlp/tokenizer.rb
|
35
70
|
- spec/expressions_spec.rb
|
71
|
+
- spec/min_en.txt
|
72
|
+
- spec/min_fr.txt
|
36
73
|
- spec/normalizer_spec.rb
|
37
74
|
- spec/pattern_spec.rb
|
38
75
|
- spec/spec_helper.rb
|
76
|
+
- spec/stop_list_spec.rb
|
77
|
+
- spec/stop_list_toto.txt
|
78
|
+
- spec/stop_list_tutu.txt
|
39
79
|
- spec/string_spec.rb
|
40
80
|
- spec/synonyms_spec.rb
|
41
81
|
- spec/tokenizer_spec.rb
|