text_nlp 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +2 -0
- data/Gemfile.lock +6 -0
- data/lib/text_nlp.rb +1 -3
- data/lib/text_nlp/expressions.rb +2 -2
- data/lib/text_nlp/normalizer.rb +1 -1
- data/lib/text_nlp/pattern.rb +14 -109
- data/lib/text_nlp/stop_list.rb +44 -0
- data/lib/text_nlp/stoplists/min_fr.txt +43 -0
- data/lib/text_nlp/string.rb +13 -5
- data/lib/text_nlp/synonyms.rb +5 -4
- data/lib/text_nlp/tokenizer.rb +1 -1
- data/spec/min_en.txt +2 -0
- data/spec/min_fr.txt +3 -0
- data/spec/pattern_spec.rb +25 -5
- data/spec/stop_list_spec.rb +34 -0
- data/spec/stop_list_toto.txt +2 -0
- data/spec/stop_list_tutu.txt +2 -0
- data/spec/string_spec.rb +23 -4
- data/spec/synonyms_spec.rb +10 -8
- data/text_nlp.gemspec +7 -2
- metadata +43 -3
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -2,6 +2,7 @@ GEM
|
|
2
2
|
remote: http://rubygems.org/
|
3
3
|
specs:
|
4
4
|
diff-lcs (1.1.2)
|
5
|
+
polyglot (0.3.1)
|
5
6
|
rspec (2.6.0)
|
6
7
|
rspec-core (~> 2.6.0)
|
7
8
|
rspec-expectations (~> 2.6.0)
|
@@ -10,9 +11,14 @@ GEM
|
|
10
11
|
rspec-expectations (2.6.0)
|
11
12
|
diff-lcs (~> 1.1.2)
|
12
13
|
rspec-mocks (2.6.0)
|
14
|
+
textquery (0.1.8)
|
15
|
+
treetop
|
16
|
+
treetop (1.4.9)
|
17
|
+
polyglot (>= 0.3.1)
|
13
18
|
|
14
19
|
PLATFORMS
|
15
20
|
ruby
|
16
21
|
|
17
22
|
DEPENDENCIES
|
18
23
|
rspec
|
24
|
+
textquery
|
data/lib/text_nlp.rb
CHANGED
data/lib/text_nlp/expressions.rb
CHANGED
@@ -12,7 +12,7 @@ class TextNlp
|
|
12
12
|
|
13
13
|
def <<(expression)
|
14
14
|
node = @root
|
15
|
-
expression
|
15
|
+
expression.normalize!
|
16
16
|
@values << expression
|
17
17
|
tokens = expression.tokenize
|
18
18
|
tokens_count = tokens.size
|
@@ -40,7 +40,7 @@ class TextNlp
|
|
40
40
|
end
|
41
41
|
|
42
42
|
def find(text)
|
43
|
-
find_expressions(0,text.normalize.tokenize
|
43
|
+
find_expressions(0,text.normalize.tokenize)
|
44
44
|
end
|
45
45
|
|
46
46
|
private
|
data/lib/text_nlp/normalizer.rb
CHANGED
data/lib/text_nlp/pattern.rb
CHANGED
@@ -1,122 +1,27 @@
|
|
1
1
|
# encoding: UTF-8
|
2
|
+
require 'textquery'
|
2
3
|
|
3
4
|
class TextNlp
|
4
5
|
class Pattern
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
@
|
11
|
-
else
|
12
|
-
@root = root_or_string
|
6
|
+
|
7
|
+
def initialize(pattern, options = {})
|
8
|
+
options = {:normalize => true}.merge(options)
|
9
|
+
if options[:normalize]
|
10
|
+
normalize_pattern(pattern)
|
11
|
+
@to_normalize = true
|
13
12
|
end
|
13
|
+
@text_query = TextQuery.new(pattern, {:ignorecase => options[:normalize]})
|
14
14
|
end
|
15
|
-
|
16
|
-
def <<(node)
|
17
|
-
@root << node
|
18
|
-
end
|
19
|
-
|
15
|
+
|
20
16
|
def match?(text)
|
21
|
-
@
|
17
|
+
text.normalize! if @to_normalize
|
18
|
+
@text_query.match?(text)
|
22
19
|
end
|
23
20
|
|
24
21
|
private
|
25
|
-
def
|
26
|
-
|
27
|
-
|
28
|
-
expr.chars.each_with_index do |char,i|
|
29
|
-
if (char == '(')
|
30
|
-
opened += 1
|
31
|
-
current_expression << char if ((opened - closed) > 1)
|
32
|
-
elsif (char == ')')
|
33
|
-
closed += 1
|
34
|
-
current_expression << char if ((opened - closed) > 0)
|
35
|
-
elsif ((opened == closed) && (operators.include?(expr[i-1..i])))
|
36
|
-
node = operator_node(expr[i-1..i])
|
37
|
-
node << parse(current_expression[0..-2])
|
38
|
-
node << parse(expr[i+1..-1])
|
39
|
-
break;
|
40
|
-
else
|
41
|
-
current_expression << char
|
42
|
-
end
|
43
|
-
end
|
44
|
-
unless node
|
45
|
-
if (current_expression.match(/\|{2}|&{2}/))
|
46
|
-
node = parse(current_expression)
|
47
|
-
else
|
48
|
-
node = current_expression[0..0] == '!' ? Not.new(current_expression[1..-1]) : Unary.new(current_expression)
|
49
|
-
end
|
50
|
-
end
|
51
|
-
node
|
52
|
-
end
|
53
|
-
|
54
|
-
def operator_node(operator)
|
55
|
-
node = case operator
|
56
|
-
when '||' then Or.new
|
57
|
-
when '&&' then And.new
|
58
|
-
end
|
59
|
-
node
|
60
|
-
end
|
61
|
-
|
62
|
-
class Composite
|
63
|
-
attr_reader :nodes
|
64
|
-
|
65
|
-
def initialize(*nodes)
|
66
|
-
@nodes = nodes || []
|
67
|
-
end
|
68
|
-
|
69
|
-
def <<(node)
|
70
|
-
@nodes << node
|
71
|
-
end
|
72
|
-
|
73
|
-
def values
|
74
|
-
@nodes.map { |node| node.values }.flatten
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
class And < Composite
|
79
|
-
def evaluate(expr)
|
80
|
-
@nodes.each do |node|
|
81
|
-
return false unless node.evaluate(expr)
|
82
|
-
end
|
83
|
-
return true
|
84
|
-
end
|
85
|
-
end
|
86
|
-
|
87
|
-
class Or < Composite
|
88
|
-
def evaluate(expr)
|
89
|
-
@nodes.each do |node|
|
90
|
-
return true if node.evaluate(expr)
|
91
|
-
end
|
92
|
-
return false
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
class Unary
|
97
|
-
attr_reader :value
|
98
|
-
|
99
|
-
def initialize(value)
|
100
|
-
@value = value
|
101
|
-
@expressions = Expressions.new([@value])
|
102
|
-
end
|
103
|
-
|
104
|
-
def evaluate(expr)
|
105
|
-
@expressions.any?(expr)
|
106
|
-
end
|
107
|
-
|
108
|
-
def values
|
109
|
-
[value]
|
110
|
-
end
|
111
|
-
end
|
112
|
-
|
113
|
-
class Not < Unary
|
114
|
-
def evaluate(expr)
|
115
|
-
!super(expr)
|
116
|
-
end
|
117
|
-
def values
|
118
|
-
[]
|
119
|
-
end
|
22
|
+
def normalize_pattern(pattern)
|
23
|
+
pattern.tr!("éèàçîêô","eeacieo")
|
24
|
+
pattern.tr!("!,;?./\\_|[]{}<>:*$%"," ")
|
120
25
|
end
|
121
26
|
|
122
27
|
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
class TextNlp
|
4
|
+
class StopList
|
5
|
+
|
6
|
+
class << self
|
7
|
+
attr_accessor :directory
|
8
|
+
StopList.directory = File.join(File.dirname(__FILE__),'stoplists')
|
9
|
+
end
|
10
|
+
|
11
|
+
def initialize(options = {})
|
12
|
+
@cache = {}
|
13
|
+
options = {:expressions => []}.merge(options)
|
14
|
+
expressions = options[:expressions]
|
15
|
+
if (options.key?(:name))
|
16
|
+
File.foreach(File.join(StopList.directory,"#{options[:name]}.txt")) { |e| expressions << e }
|
17
|
+
end
|
18
|
+
if (options.key?(:names))
|
19
|
+
options[:names].each do |name|
|
20
|
+
File.foreach(File.join(StopList.directory,"#{name}.txt")) { |e| expressions << e }
|
21
|
+
end
|
22
|
+
end
|
23
|
+
if (options.key?(:file))
|
24
|
+
File.foreach(options[:file]) { |e| expressions << e }
|
25
|
+
end
|
26
|
+
if (options.key?(:files))
|
27
|
+
options[:files].each do |file|
|
28
|
+
File.foreach(file) { |e| expressions << e }
|
29
|
+
end
|
30
|
+
end
|
31
|
+
expressions.each { |e| @cache[e.normalize] = true }
|
32
|
+
@expressions = TextNlp::Expressions.new(expressions)
|
33
|
+
end
|
34
|
+
|
35
|
+
def transform(text)
|
36
|
+
@expressions.expressionize(text).map { |expr| @cache.key?(expr) ? nil : expr }.compact.join(' ')
|
37
|
+
end
|
38
|
+
|
39
|
+
def size
|
40
|
+
@expressions.values.size
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
a
|
2
|
+
au
|
3
|
+
aussi
|
4
|
+
aux
|
5
|
+
avec
|
6
|
+
c
|
7
|
+
ce
|
8
|
+
cette
|
9
|
+
contre
|
10
|
+
d
|
11
|
+
dans
|
12
|
+
de
|
13
|
+
des
|
14
|
+
du
|
15
|
+
en
|
16
|
+
et
|
17
|
+
j
|
18
|
+
l
|
19
|
+
la
|
20
|
+
le
|
21
|
+
les
|
22
|
+
mais
|
23
|
+
n
|
24
|
+
ou
|
25
|
+
par
|
26
|
+
pas
|
27
|
+
pour
|
28
|
+
qu
|
29
|
+
que
|
30
|
+
quel
|
31
|
+
quelle
|
32
|
+
quelles
|
33
|
+
quels
|
34
|
+
qui
|
35
|
+
sa
|
36
|
+
sans
|
37
|
+
ses
|
38
|
+
son
|
39
|
+
sous
|
40
|
+
sur
|
41
|
+
un
|
42
|
+
une
|
43
|
+
y
|
data/lib/text_nlp/string.rb
CHANGED
@@ -18,23 +18,31 @@ class String
|
|
18
18
|
self
|
19
19
|
end
|
20
20
|
|
21
|
+
def normalize!
|
22
|
+
unless normalized()
|
23
|
+
replace(self.normalize)
|
24
|
+
self.normalized = true
|
25
|
+
end
|
26
|
+
self
|
27
|
+
end
|
28
|
+
|
21
29
|
def tokenize
|
22
30
|
(String.tokenizer || TextNlp::Tokenizer.new).tokenize(self)
|
23
31
|
end
|
24
32
|
|
25
33
|
def similarity(text)
|
26
34
|
score = 0.0
|
27
|
-
tokens1 = self.normalize.tokenize
|
28
|
-
tokens2 = text.normalize.tokenize
|
35
|
+
tokens1, tokens2 = self.normalize.tokenize, text.normalize.tokenize
|
29
36
|
if (tokens1.size > 0 && tokens2.size > 0)
|
30
37
|
intersection = tokens1 & tokens2
|
31
|
-
score = (((intersection.size.to_f / tokens1.size
|
38
|
+
score = (((intersection.size.to_f / tokens1.size) + (intersection.size.to_f / tokens2.size)) / 2)
|
32
39
|
end
|
33
40
|
score
|
34
41
|
end
|
35
42
|
|
36
|
-
def
|
37
|
-
|
43
|
+
def transform(*transformers)
|
44
|
+
transformers = [transformers] unless transformers.respond_to?(:each)
|
45
|
+
transformers.flatten.inject(self) { |text,transformer| transformer.transform(text) }
|
38
46
|
end
|
39
47
|
|
40
48
|
end
|
data/lib/text_nlp/synonyms.rb
CHANGED
@@ -13,15 +13,16 @@ class TextNlp
|
|
13
13
|
end
|
14
14
|
|
15
15
|
def register(name,synonyms)
|
16
|
-
|
16
|
+
name.normalize!
|
17
17
|
synonyms.each do |synonym|
|
18
|
+
synonym.normalize!
|
18
19
|
@expressions << synonym
|
19
|
-
@synonyms[synonym
|
20
|
+
@synonyms[synonym] = name
|
20
21
|
end
|
21
22
|
end
|
22
23
|
|
23
|
-
def
|
24
|
-
@expressions.expressionize(text).map { |expr| @synonyms.key?(expr) ? @synonyms[expr] : expr }.join(' ')
|
24
|
+
def transform(text)
|
25
|
+
@expressions.expressionize(text).map { |expr| @synonyms.key?(expr) ? @synonyms[expr] : expr }.compact.join(' ')
|
25
26
|
end
|
26
27
|
|
27
28
|
end
|
data/lib/text_nlp/tokenizer.rb
CHANGED
data/spec/min_en.txt
ADDED
data/spec/min_fr.txt
ADDED
data/spec/pattern_spec.rb
CHANGED
@@ -3,11 +3,31 @@ require "spec_helper"
|
|
3
3
|
|
4
4
|
describe TextNlp::Pattern do
|
5
5
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
6
|
+
context "with normalize option" do
|
7
|
+
|
8
|
+
it "should match or not the pattern" do
|
9
|
+
pattern = TextNlp::Pattern.new("(BD OR 'bande dessinée') AND -samsung")
|
10
|
+
pattern.match?("cette BD est super").should be_true
|
11
|
+
pattern.match?("cette bd est illisible sur samsung NTC").should be_false
|
12
|
+
pattern.match?("cette bande dessinee est illisible sur samsung NTC").should be_false
|
13
|
+
pattern.match?("cette bande dessinee est illisible").should be_true
|
14
|
+
pattern = TextNlp::Pattern.new("'toulouse fc' OR ((toulouse OR tfc) AND (foot OR football OR 'ligue 1' OR 'ligue 2' OR l1 OR l2))")
|
15
|
+
pattern.match?("toulouse est une belle ville").should be_false
|
16
|
+
end
|
17
|
+
|
11
18
|
end
|
12
19
|
|
20
|
+
context "with no normalized option" do
|
21
|
+
|
22
|
+
it "should match or not the pattern" do
|
23
|
+
pattern = TextNlp::Pattern.new("(BD OR 'bande dessinée') AND -samsung", :normalize => false)
|
24
|
+
pattern.match?("cette BD est super").should be_true
|
25
|
+
pattern.match?("cette bd est super").should be_false
|
26
|
+
pattern.match?("cette bande dessinee est illisible").should be_false
|
27
|
+
pattern.match?("cette bande dessinée est illisible").should be_true
|
28
|
+
pattern.match?("cette bande dessinée est illisible sur samsung").should be_false
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
|
13
33
|
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "spec_helper"
|
3
|
+
|
4
|
+
describe TextNlp::StopList do
|
5
|
+
|
6
|
+
it "should remove the words/expressions defined by the stop list" do
|
7
|
+
|
8
|
+
TextNlp::StopList.directory = File.dirname(__FILE__)
|
9
|
+
|
10
|
+
stop_list = TextNlp::StopList.new(:expressions => ['il','a','ecrit par toto'])
|
11
|
+
stop_list.size.should eq 3
|
12
|
+
stop_list.transform("bordel Il fait chaud ici").should eq 'bordel fait chaud ici'
|
13
|
+
stop_list.transform("bordel Il fait chaud ici ecrit par toto").should eq 'bordel fait chaud ici'
|
14
|
+
stop_list.transform("bordel Il fait chaud ici ecrit par titi").should eq 'bordel fait chaud ici ecrit par titi'
|
15
|
+
|
16
|
+
stop_list = TextNlp::StopList.new(:expressions => ['il','a','ecrit par toto'], :file => File.join(File.dirname(__FILE__),"stop_list_toto.txt"))
|
17
|
+
stop_list.size.should eq 5
|
18
|
+
stop_list.transform("bordel Il fait chaud ici").should eq 'fait chaud ici'
|
19
|
+
stop_list.transform("bordel Il fait chaud ici ecrit par toto").should eq 'fait chaud ici'
|
20
|
+
stop_list.transform("bordel Il fait chaud ici ecrit par titi").should eq 'fait chaud ici ecrit par titi'
|
21
|
+
|
22
|
+
stop_list = TextNlp::StopList.new(:name => "min_fr")
|
23
|
+
stop_list.size.should eq 3
|
24
|
+
stop_list.transform("le ballon de zizou").should eq 'ballon zizou'
|
25
|
+
|
26
|
+
stop_list = TextNlp::StopList.new(:names => ["min_fr","min_en"])
|
27
|
+
stop_list.size.should eq 5
|
28
|
+
|
29
|
+
stop_list = TextNlp::StopList.new(
|
30
|
+
:files => [File.join(File.dirname(__FILE__),"stop_list_toto.txt"),File.join(File.dirname(__FILE__),"stop_list_tutu.txt")])
|
31
|
+
stop_list.size.should eq 4
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
data/spec/string_spec.rb
CHANGED
@@ -17,6 +17,16 @@ describe String do
|
|
17
17
|
text.normalize.should eq "TOTO".downcase
|
18
18
|
end
|
19
19
|
|
20
|
+
it "should normalize the receiver string" do
|
21
|
+
text = "TOTO"
|
22
|
+
normalizer = double()
|
23
|
+
String.normalizer = normalizer
|
24
|
+
normalizer.stub(:normalize) { |txt| txt.downcase }
|
25
|
+
text.normalize!
|
26
|
+
text.should eq "TOTO".downcase
|
27
|
+
text.normalized.should be_true
|
28
|
+
end
|
29
|
+
|
20
30
|
it "should call tokenizer" do
|
21
31
|
text = "TOTO"
|
22
32
|
tokenizer = double()
|
@@ -25,11 +35,20 @@ describe String do
|
|
25
35
|
text.tokenize
|
26
36
|
end
|
27
37
|
|
28
|
-
it "should call translator" do
|
38
|
+
it "should call translator / translators" do
|
29
39
|
text = "TOTO"
|
30
|
-
|
31
|
-
|
32
|
-
text.
|
40
|
+
transformer1 = double()
|
41
|
+
transformer1.should_receive(:transform).with(text)
|
42
|
+
text.transform(transformer1)
|
43
|
+
transformer1 = double()
|
44
|
+
transformer1.stub(:transform) { |text| text.tr("T","U") }
|
45
|
+
transformer2 = double()
|
46
|
+
transformer2.stub(:transform) { |text| text.tr("O","A") }
|
47
|
+
transformer1.should_receive(:transform).with("TOTO")
|
48
|
+
transformer2.should_receive(:transform).with("UOUO")
|
49
|
+
text = text.transform(transformer1,transformer2)
|
50
|
+
text.should eq "UAUA"
|
51
|
+
text.transform([transformer1,transformer2])
|
33
52
|
end
|
34
53
|
|
35
54
|
it "should compute similarity" do
|
data/spec/synonyms_spec.rb
CHANGED
@@ -6,18 +6,20 @@ describe TextNlp::Synonyms do
|
|
6
6
|
it "should synonymize the text" do
|
7
7
|
synonyms = TextNlp::Synonyms.new
|
8
8
|
synonyms.register("CAEN",["smc","sm caen","stade malherbe de caen"])
|
9
|
-
synonyms.
|
10
|
-
synonyms.
|
11
|
-
synonyms.
|
12
|
-
synonyms.
|
9
|
+
synonyms.transform("le smc c est de la bombe").should eq "le caen c est de la bombe"
|
10
|
+
synonyms.transform("le truc c est de la bombe").should eq "le truc c est de la bombe"
|
11
|
+
synonyms.transform("le sm caen c est de la bombe").should eq "le caen c est de la bombe"
|
12
|
+
synonyms.transform("le stade malherbe de caen c est de la bombe").should eq "le caen c est de la bombe"
|
13
13
|
end
|
14
14
|
|
15
15
|
it "should synonymize the text" do
|
16
16
|
synonyms = TextNlp::Synonyms.new([["CAEN","smc","sm caen","stade malherbe de caen"],["marseille","om"]])
|
17
|
-
synonyms.
|
18
|
-
synonyms.
|
19
|
-
synonyms.
|
20
|
-
synonyms.
|
17
|
+
synonyms.transform("le smc c est de la bombe").should eq "le caen c est de la bombe"
|
18
|
+
synonyms.transform("le truc c est de la bombe").should eq "le truc c est de la bombe"
|
19
|
+
synonyms.transform("le sm caen c est de la bombe").should eq "le caen c est de la bombe"
|
20
|
+
synonyms.transform("le stade malherbe de caen c est de la bombe").should eq "le caen c est de la bombe"
|
21
|
+
synonyms.transform("le caen c est de la bombe").should eq "le caen c est de la bombe"
|
22
|
+
synonyms.transform("le om c est de la bombe").should eq "le marseille c est de la bombe"
|
21
23
|
end
|
22
24
|
|
23
25
|
end
|
data/text_nlp.gemspec
CHANGED
@@ -1,9 +1,14 @@
|
|
1
1
|
Gem::Specification.new do |s|
|
2
2
|
s.name = 'text_nlp'
|
3
|
-
s.version = '0.0.
|
4
|
-
s.date = '2011-07-
|
3
|
+
s.version = '0.0.3'
|
4
|
+
s.date = '2011-07-07'
|
5
5
|
s.summary = "A minimalist NLP library"
|
6
6
|
s.description = s.summary
|
7
|
+
|
8
|
+
s.add_dependency "textquery"
|
9
|
+
s.add_development_dependency "rspec"
|
10
|
+
s.add_development_dependency "rake"
|
11
|
+
|
7
12
|
s.authors = ["fonzo14"]
|
8
13
|
s.require_paths = ["lib"]
|
9
14
|
s.files = `git ls-files`.split("\n")
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text_nlp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,8 +9,41 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-07-
|
13
|
-
dependencies:
|
12
|
+
date: 2011-07-07 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: textquery
|
16
|
+
requirement: &86270380 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *86270380
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: rspec
|
27
|
+
requirement: &86270160 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ! '>='
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '0'
|
33
|
+
type: :development
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *86270160
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: rake
|
38
|
+
requirement: &86269950 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ! '>='
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
type: :development
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *86269950
|
14
47
|
description: A minimalist NLP library
|
15
48
|
email:
|
16
49
|
executables: []
|
@@ -29,13 +62,20 @@ files:
|
|
29
62
|
- lib/text_nlp/expressions.rb
|
30
63
|
- lib/text_nlp/normalizer.rb
|
31
64
|
- lib/text_nlp/pattern.rb
|
65
|
+
- lib/text_nlp/stop_list.rb
|
66
|
+
- lib/text_nlp/stoplists/min_fr.txt
|
32
67
|
- lib/text_nlp/string.rb
|
33
68
|
- lib/text_nlp/synonyms.rb
|
34
69
|
- lib/text_nlp/tokenizer.rb
|
35
70
|
- spec/expressions_spec.rb
|
71
|
+
- spec/min_en.txt
|
72
|
+
- spec/min_fr.txt
|
36
73
|
- spec/normalizer_spec.rb
|
37
74
|
- spec/pattern_spec.rb
|
38
75
|
- spec/spec_helper.rb
|
76
|
+
- spec/stop_list_spec.rb
|
77
|
+
- spec/stop_list_toto.txt
|
78
|
+
- spec/stop_list_tutu.txt
|
39
79
|
- spec/string_spec.rb
|
40
80
|
- spec/synonyms_spec.rb
|
41
81
|
- spec/tokenizer_spec.rb
|