text_nlp 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -33,4 +33,8 @@ class String
33
33
  score
34
34
  end
35
35
 
36
+ def translate(translator)
37
+ translator.translate(self)
38
+ end
39
+
36
40
  end
@@ -0,0 +1,28 @@
1
+ # encoding: UTF-8
2
+
3
+ class TextNlp
4
+ class Synonyms
5
+
6
+ def initialize(synonyms = [])
7
+ @synonyms = {}
8
+ @expressions = Expressions.new
9
+ synonyms.each do |synos|
10
+ name = synos.shift
11
+ register(name,synos)
12
+ end
13
+ end
14
+
15
+ def register(name,synonyms)
16
+ normalized_name = name.normalize
17
+ synonyms.each do |synonym|
18
+ @expressions << synonym
19
+ @synonyms[synonym.normalize] = normalized_name
20
+ end
21
+ end
22
+
23
+ def translate(text)
24
+ @expressions.expressionize(text).map { |expr| @synonyms.key?(expr) ? @synonyms[expr] : expr }.join(' ')
25
+ end
26
+
27
+ end
28
+ end
data/lib/text_nlp.rb CHANGED
@@ -7,6 +7,7 @@ require "text_nlp/tokenizer.rb"
7
7
  require "text_nlp/string.rb"
8
8
  require "text_nlp/expressions.rb"
9
9
  require "text_nlp/pattern.rb"
10
+ require "text_nlp/synonyms.rb"
10
11
 
11
12
  class TextNlp
12
13
  end
data/spec/string_spec.rb CHANGED
@@ -25,6 +25,13 @@ describe String do
25
25
  text.tokenize
26
26
  end
27
27
 
28
+ it "should call translator" do
29
+ text = "TOTO"
30
+ translator = double()
31
+ translator.should_receive(:translate).with(text)
32
+ text.translate(translator)
33
+ end
34
+
28
35
  it "should compute similarity" do
29
36
  "il fait chaud".similarity("il fait chaud").should eq 1.0
30
37
  "il fait chaud".similarity("putin c nul ici").should eq 0.0
@@ -0,0 +1,23 @@
1
+ # encoding: utf-8
2
+ require "spec_helper"
3
+
4
+ describe TextNlp::Synonyms do
5
+
6
+ it "should synonymize the text" do
7
+ synonyms = TextNlp::Synonyms.new
8
+ synonyms.register("CAEN",["smc","sm caen","stade malherbe de caen"])
9
+ synonyms.translate("le smc c est de la bombe").should eq "le caen c est de la bombe"
10
+ synonyms.translate("le truc c est de la bombe").should eq "le truc c est de la bombe"
11
+ synonyms.translate("le sm caen c est de la bombe").should eq "le caen c est de la bombe"
12
+ synonyms.translate("le stade malherbe de caen c est de la bombe").should eq "le caen c est de la bombe"
13
+ end
14
+
15
+ it "should synonymize the text" do
16
+ synonyms = TextNlp::Synonyms.new([["CAEN","smc","sm caen","stade malherbe de caen"],["marseille","om"]])
17
+ synonyms.translate("le smc c est de la bombe").should eq "le caen c est de la bombe"
18
+ synonyms.translate("le truc c est de la bombe").should eq "le truc c est de la bombe"
19
+ synonyms.translate("le sm caen c est de la bombe").should eq "le caen c est de la bombe"
20
+ synonyms.translate("le stade malherbe de caen c est de la bombe").should eq "le caen c est de la bombe"
21
+ end
22
+
23
+ end
data/text_nlp.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = 'text_nlp'
3
- s.version = '0.0.1'
3
+ s.version = '0.0.2'
4
4
  s.date = '2011-07-05'
5
5
  s.summary = "A minimalist NLP library"
6
6
  s.description = s.summary
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: text_nlp
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -30,12 +30,14 @@ files:
30
30
  - lib/text_nlp/normalizer.rb
31
31
  - lib/text_nlp/pattern.rb
32
32
  - lib/text_nlp/string.rb
33
+ - lib/text_nlp/synonyms.rb
33
34
  - lib/text_nlp/tokenizer.rb
34
35
  - spec/expressions_spec.rb
35
36
  - spec/normalizer_spec.rb
36
37
  - spec/pattern_spec.rb
37
38
  - spec/spec_helper.rb
38
39
  - spec/string_spec.rb
40
+ - spec/synonyms_spec.rb
39
41
  - spec/tokenizer_spec.rb
40
42
  - text_nlp.gemspec
41
43
  homepage: http://github.com/fonzo14/text_nlp