text_nlp 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/text_nlp/string.rb +4 -0
- data/lib/text_nlp/synonyms.rb +28 -0
- data/lib/text_nlp.rb +1 -0
- data/spec/string_spec.rb +7 -0
- data/spec/synonyms_spec.rb +23 -0
- data/text_nlp.gemspec +1 -1
- metadata +3 -1
data/lib/text_nlp/string.rb
CHANGED
@@ -0,0 +1,28 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
class TextNlp
|
4
|
+
class Synonyms
|
5
|
+
|
6
|
+
def initialize(synonyms = [])
|
7
|
+
@synonyms = {}
|
8
|
+
@expressions = Expressions.new
|
9
|
+
synonyms.each do |synos|
|
10
|
+
name = synos.shift
|
11
|
+
register(name,synos)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def register(name,synonyms)
|
16
|
+
normalized_name = name.normalize
|
17
|
+
synonyms.each do |synonym|
|
18
|
+
@expressions << synonym
|
19
|
+
@synonyms[synonym.normalize] = normalized_name
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def translate(text)
|
24
|
+
@expressions.expressionize(text).map { |expr| @synonyms.key?(expr) ? @synonyms[expr] : expr }.join(' ')
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
data/lib/text_nlp.rb
CHANGED
data/spec/string_spec.rb
CHANGED
@@ -25,6 +25,13 @@ describe String do
|
|
25
25
|
text.tokenize
|
26
26
|
end
|
27
27
|
|
28
|
+
it "should call translator" do
|
29
|
+
text = "TOTO"
|
30
|
+
translator = double()
|
31
|
+
translator.should_receive(:translate).with(text)
|
32
|
+
text.translate(translator)
|
33
|
+
end
|
34
|
+
|
28
35
|
it "should compute similarity" do
|
29
36
|
"il fait chaud".similarity("il fait chaud").should eq 1.0
|
30
37
|
"il fait chaud".similarity("putin c nul ici").should eq 0.0
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "spec_helper"
|
3
|
+
|
4
|
+
describe TextNlp::Synonyms do
|
5
|
+
|
6
|
+
it "should synonymize the text" do
|
7
|
+
synonyms = TextNlp::Synonyms.new
|
8
|
+
synonyms.register("CAEN",["smc","sm caen","stade malherbe de caen"])
|
9
|
+
synonyms.translate("le smc c est de la bombe").should eq "le caen c est de la bombe"
|
10
|
+
synonyms.translate("le truc c est de la bombe").should eq "le truc c est de la bombe"
|
11
|
+
synonyms.translate("le sm caen c est de la bombe").should eq "le caen c est de la bombe"
|
12
|
+
synonyms.translate("le stade malherbe de caen c est de la bombe").should eq "le caen c est de la bombe"
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should synonymize the text" do
|
16
|
+
synonyms = TextNlp::Synonyms.new([["CAEN","smc","sm caen","stade malherbe de caen"],["marseille","om"]])
|
17
|
+
synonyms.translate("le smc c est de la bombe").should eq "le caen c est de la bombe"
|
18
|
+
synonyms.translate("le truc c est de la bombe").should eq "le truc c est de la bombe"
|
19
|
+
synonyms.translate("le sm caen c est de la bombe").should eq "le caen c est de la bombe"
|
20
|
+
synonyms.translate("le stade malherbe de caen c est de la bombe").should eq "le caen c est de la bombe"
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
data/text_nlp.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text_nlp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -30,12 +30,14 @@ files:
|
|
30
30
|
- lib/text_nlp/normalizer.rb
|
31
31
|
- lib/text_nlp/pattern.rb
|
32
32
|
- lib/text_nlp/string.rb
|
33
|
+
- lib/text_nlp/synonyms.rb
|
33
34
|
- lib/text_nlp/tokenizer.rb
|
34
35
|
- spec/expressions_spec.rb
|
35
36
|
- spec/normalizer_spec.rb
|
36
37
|
- spec/pattern_spec.rb
|
37
38
|
- spec/spec_helper.rb
|
38
39
|
- spec/string_spec.rb
|
40
|
+
- spec/synonyms_spec.rb
|
39
41
|
- spec/tokenizer_spec.rb
|
40
42
|
- text_nlp.gemspec
|
41
43
|
homepage: http://github.com/fonzo14/text_nlp
|