text_nlp 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/text_nlp/string.rb +4 -0
- data/lib/text_nlp/synonyms.rb +28 -0
- data/lib/text_nlp.rb +1 -0
- data/spec/string_spec.rb +7 -0
- data/spec/synonyms_spec.rb +23 -0
- data/text_nlp.gemspec +1 -1
- metadata +3 -1
data/lib/text_nlp/string.rb
CHANGED
@@ -0,0 +1,28 @@
|
|
1
|
+
# encoding: UTF-8
|
2
|
+
|
3
|
+
class TextNlp
|
4
|
+
class Synonyms
|
5
|
+
|
6
|
+
def initialize(synonyms = [])
|
7
|
+
@synonyms = {}
|
8
|
+
@expressions = Expressions.new
|
9
|
+
synonyms.each do |synos|
|
10
|
+
name = synos.shift
|
11
|
+
register(name,synos)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def register(name,synonyms)
|
16
|
+
normalized_name = name.normalize
|
17
|
+
synonyms.each do |synonym|
|
18
|
+
@expressions << synonym
|
19
|
+
@synonyms[synonym.normalize] = normalized_name
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
def translate(text)
|
24
|
+
@expressions.expressionize(text).map { |expr| @synonyms.key?(expr) ? @synonyms[expr] : expr }.join(' ')
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
data/lib/text_nlp.rb
CHANGED
data/spec/string_spec.rb
CHANGED
@@ -25,6 +25,13 @@ describe String do
|
|
25
25
|
text.tokenize
|
26
26
|
end
|
27
27
|
|
28
|
+
it "should call translator" do
|
29
|
+
text = "TOTO"
|
30
|
+
translator = double()
|
31
|
+
translator.should_receive(:translate).with(text)
|
32
|
+
text.translate(translator)
|
33
|
+
end
|
34
|
+
|
28
35
|
it "should compute similarity" do
|
29
36
|
"il fait chaud".similarity("il fait chaud").should eq 1.0
|
30
37
|
"il fait chaud".similarity("putin c nul ici").should eq 0.0
|
@@ -0,0 +1,23 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require "spec_helper"
|
3
|
+
|
4
|
+
describe TextNlp::Synonyms do
|
5
|
+
|
6
|
+
it "should synonymize the text" do
|
7
|
+
synonyms = TextNlp::Synonyms.new
|
8
|
+
synonyms.register("CAEN",["smc","sm caen","stade malherbe de caen"])
|
9
|
+
synonyms.translate("le smc c est de la bombe").should eq "le caen c est de la bombe"
|
10
|
+
synonyms.translate("le truc c est de la bombe").should eq "le truc c est de la bombe"
|
11
|
+
synonyms.translate("le sm caen c est de la bombe").should eq "le caen c est de la bombe"
|
12
|
+
synonyms.translate("le stade malherbe de caen c est de la bombe").should eq "le caen c est de la bombe"
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should synonymize the text" do
|
16
|
+
synonyms = TextNlp::Synonyms.new([["CAEN","smc","sm caen","stade malherbe de caen"],["marseille","om"]])
|
17
|
+
synonyms.translate("le smc c est de la bombe").should eq "le caen c est de la bombe"
|
18
|
+
synonyms.translate("le truc c est de la bombe").should eq "le truc c est de la bombe"
|
19
|
+
synonyms.translate("le sm caen c est de la bombe").should eq "le caen c est de la bombe"
|
20
|
+
synonyms.translate("le stade malherbe de caen c est de la bombe").should eq "le caen c est de la bombe"
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
data/text_nlp.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: text_nlp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -30,12 +30,14 @@ files:
|
|
30
30
|
- lib/text_nlp/normalizer.rb
|
31
31
|
- lib/text_nlp/pattern.rb
|
32
32
|
- lib/text_nlp/string.rb
|
33
|
+
- lib/text_nlp/synonyms.rb
|
33
34
|
- lib/text_nlp/tokenizer.rb
|
34
35
|
- spec/expressions_spec.rb
|
35
36
|
- spec/normalizer_spec.rb
|
36
37
|
- spec/pattern_spec.rb
|
37
38
|
- spec/spec_helper.rb
|
38
39
|
- spec/string_spec.rb
|
40
|
+
- spec/synonyms_spec.rb
|
39
41
|
- spec/tokenizer_spec.rb
|
40
42
|
- text_nlp.gemspec
|
41
43
|
homepage: http://github.com/fonzo14/text_nlp
|