text_nlp 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +2 -0
 - data/Gemfile.lock +6 -0
 - data/lib/text_nlp.rb +1 -3
 - data/lib/text_nlp/expressions.rb +2 -2
 - data/lib/text_nlp/normalizer.rb +1 -1
 - data/lib/text_nlp/pattern.rb +14 -109
 - data/lib/text_nlp/stop_list.rb +44 -0
 - data/lib/text_nlp/stoplists/min_fr.txt +43 -0
 - data/lib/text_nlp/string.rb +13 -5
 - data/lib/text_nlp/synonyms.rb +5 -4
 - data/lib/text_nlp/tokenizer.rb +1 -1
 - data/spec/min_en.txt +2 -0
 - data/spec/min_fr.txt +3 -0
 - data/spec/pattern_spec.rb +25 -5
 - data/spec/stop_list_spec.rb +34 -0
 - data/spec/stop_list_toto.txt +2 -0
 - data/spec/stop_list_tutu.txt +2 -0
 - data/spec/string_spec.rb +23 -4
 - data/spec/synonyms_spec.rb +10 -8
 - data/text_nlp.gemspec +7 -2
 - metadata +43 -3
 
    
        data/Gemfile
    CHANGED
    
    
    
        data/Gemfile.lock
    CHANGED
    
    | 
         @@ -2,6 +2,7 @@ GEM 
     | 
|
| 
       2 
2 
     | 
    
         
             
              remote: http://rubygems.org/
         
     | 
| 
       3 
3 
     | 
    
         
             
              specs:
         
     | 
| 
       4 
4 
     | 
    
         
             
                diff-lcs (1.1.2)
         
     | 
| 
      
 5 
     | 
    
         
            +
                polyglot (0.3.1)
         
     | 
| 
       5 
6 
     | 
    
         
             
                rspec (2.6.0)
         
     | 
| 
       6 
7 
     | 
    
         
             
                  rspec-core (~> 2.6.0)
         
     | 
| 
       7 
8 
     | 
    
         
             
                  rspec-expectations (~> 2.6.0)
         
     | 
| 
         @@ -10,9 +11,14 @@ GEM 
     | 
|
| 
       10 
11 
     | 
    
         
             
                rspec-expectations (2.6.0)
         
     | 
| 
       11 
12 
     | 
    
         
             
                  diff-lcs (~> 1.1.2)
         
     | 
| 
       12 
13 
     | 
    
         
             
                rspec-mocks (2.6.0)
         
     | 
| 
      
 14 
     | 
    
         
            +
                textquery (0.1.8)
         
     | 
| 
      
 15 
     | 
    
         
            +
                  treetop
         
     | 
| 
      
 16 
     | 
    
         
            +
                treetop (1.4.9)
         
     | 
| 
      
 17 
     | 
    
         
            +
                  polyglot (>= 0.3.1)
         
     | 
| 
       13 
18 
     | 
    
         | 
| 
       14 
19 
     | 
    
         
             
            PLATFORMS
         
     | 
| 
       15 
20 
     | 
    
         
             
              ruby
         
     | 
| 
       16 
21 
     | 
    
         | 
| 
       17 
22 
     | 
    
         
             
            DEPENDENCIES
         
     | 
| 
       18 
23 
     | 
    
         
             
              rspec
         
     | 
| 
      
 24 
     | 
    
         
            +
              textquery
         
     | 
    
        data/lib/text_nlp.rb
    CHANGED
    
    
    
        data/lib/text_nlp/expressions.rb
    CHANGED
    
    | 
         @@ -12,7 +12,7 @@ class TextNlp 
     | 
|
| 
       12 
12 
     | 
    
         | 
| 
       13 
13 
     | 
    
         
             
                def <<(expression)
         
     | 
| 
       14 
14 
     | 
    
         
             
                  node = @root
         
     | 
| 
       15 
     | 
    
         
            -
                  expression 
     | 
| 
      
 15 
     | 
    
         
            +
                  expression.normalize!
         
     | 
| 
       16 
16 
     | 
    
         
             
                  @values << expression
         
     | 
| 
       17 
17 
     | 
    
         
             
                  tokens = expression.tokenize
         
     | 
| 
       18 
18 
     | 
    
         
             
                  tokens_count = tokens.size
         
     | 
| 
         @@ -40,7 +40,7 @@ class TextNlp 
     | 
|
| 
       40 
40 
     | 
    
         
             
                end
         
     | 
| 
       41 
41 
     | 
    
         | 
| 
       42 
42 
     | 
    
         
             
                def find(text)
         
     | 
| 
       43 
     | 
    
         
            -
                  find_expressions(0,text.normalize.tokenize 
     | 
| 
      
 43 
     | 
    
         
            +
                  find_expressions(0,text.normalize.tokenize)
         
     | 
| 
       44 
44 
     | 
    
         
             
                end
         
     | 
| 
       45 
45 
     | 
    
         | 
| 
       46 
46 
     | 
    
         
             
                private
         
     | 
    
        data/lib/text_nlp/normalizer.rb
    CHANGED
    
    
    
        data/lib/text_nlp/pattern.rb
    CHANGED
    
    | 
         @@ -1,122 +1,27 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            # encoding: UTF-8
         
     | 
| 
      
 2 
     | 
    
         
            +
            require 'textquery'
         
     | 
| 
       2 
3 
     | 
    
         | 
| 
       3 
4 
     | 
    
         
             
            class TextNlp
         
     | 
| 
       4 
5 
     | 
    
         
             
              class Pattern
         
     | 
| 
       5 
     | 
    
         
            -
             
     | 
| 
       6 
     | 
    
         
            -
                 
     | 
| 
       7 
     | 
    
         
            -
             
     | 
| 
       8 
     | 
    
         
            -
             
     | 
| 
       9 
     | 
    
         
            -
             
     | 
| 
       10 
     | 
    
         
            -
                    @ 
     | 
| 
       11 
     | 
    
         
            -
                  else
         
     | 
| 
       12 
     | 
    
         
            -
                    @root = root_or_string
         
     | 
| 
      
 6 
     | 
    
         
            +
                    
         
     | 
| 
      
 7 
     | 
    
         
            +
                def initialize(pattern, options = {})
         
     | 
| 
      
 8 
     | 
    
         
            +
                  options = {:normalize => true}.merge(options)
         
     | 
| 
      
 9 
     | 
    
         
            +
                  if options[:normalize]
         
     | 
| 
      
 10 
     | 
    
         
            +
                    normalize_pattern(pattern) 
         
     | 
| 
      
 11 
     | 
    
         
            +
                    @to_normalize = true
         
     | 
| 
       13 
12 
     | 
    
         
             
                  end
         
     | 
| 
      
 13 
     | 
    
         
            +
                  @text_query = TextQuery.new(pattern, {:ignorecase => options[:normalize]})
         
     | 
| 
       14 
14 
     | 
    
         
             
                end
         
     | 
| 
       15 
     | 
    
         
            -
             
     | 
| 
       16 
     | 
    
         
            -
                def <<(node)
         
     | 
| 
       17 
     | 
    
         
            -
                  @root << node
         
     | 
| 
       18 
     | 
    
         
            -
                end
         
     | 
| 
       19 
     | 
    
         
            -
                
         
     | 
| 
      
 15 
     | 
    
         
            +
                  
         
     | 
| 
       20 
16 
     | 
    
         
             
                def match?(text)
         
     | 
| 
       21 
     | 
    
         
            -
                  @ 
     | 
| 
      
 17 
     | 
    
         
            +
                  text.normalize! if @to_normalize
         
     | 
| 
      
 18 
     | 
    
         
            +
                  @text_query.match?(text)
         
     | 
| 
       22 
19 
     | 
    
         
             
                end
         
     | 
| 
       23 
20 
     | 
    
         | 
| 
       24 
21 
     | 
    
         
             
                private
         
     | 
| 
       25 
     | 
    
         
            -
                def  
     | 
| 
       26 
     | 
    
         
            -
                   
     | 
| 
       27 
     | 
    
         
            -
                   
     | 
| 
       28 
     | 
    
         
            -
                  expr.chars.each_with_index do |char,i|
         
     | 
| 
       29 
     | 
    
         
            -
                    if (char == '(')
         
     | 
| 
       30 
     | 
    
         
            -
                      opened += 1
         
     | 
| 
       31 
     | 
    
         
            -
                      current_expression << char if ((opened - closed) > 1)
         
     | 
| 
       32 
     | 
    
         
            -
                    elsif (char == ')')
         
     | 
| 
       33 
     | 
    
         
            -
                      closed += 1
         
     | 
| 
       34 
     | 
    
         
            -
                      current_expression << char if ((opened - closed) > 0)
         
     | 
| 
       35 
     | 
    
         
            -
                    elsif ((opened == closed) && (operators.include?(expr[i-1..i])))          
         
     | 
| 
       36 
     | 
    
         
            -
                      node = operator_node(expr[i-1..i])
         
     | 
| 
       37 
     | 
    
         
            -
                      node << parse(current_expression[0..-2])
         
     | 
| 
       38 
     | 
    
         
            -
                      node << parse(expr[i+1..-1])
         
     | 
| 
       39 
     | 
    
         
            -
                      break;
         
     | 
| 
       40 
     | 
    
         
            -
                    else          
         
     | 
| 
       41 
     | 
    
         
            -
                      current_expression << char          
         
     | 
| 
       42 
     | 
    
         
            -
                    end
         
     | 
| 
       43 
     | 
    
         
            -
                  end
         
     | 
| 
       44 
     | 
    
         
            -
                  unless node
         
     | 
| 
       45 
     | 
    
         
            -
                    if (current_expression.match(/\|{2}|&{2}/))          
         
     | 
| 
       46 
     | 
    
         
            -
                      node = parse(current_expression)
         
     | 
| 
       47 
     | 
    
         
            -
                    else
         
     | 
| 
       48 
     | 
    
         
            -
                      node = current_expression[0..0] == '!' ? Not.new(current_expression[1..-1]) : Unary.new(current_expression)
         
     | 
| 
       49 
     | 
    
         
            -
                    end
         
     | 
| 
       50 
     | 
    
         
            -
                  end
         
     | 
| 
       51 
     | 
    
         
            -
                  node
         
     | 
| 
       52 
     | 
    
         
            -
                end
         
     | 
| 
       53 
     | 
    
         
            -
                
         
     | 
| 
       54 
     | 
    
         
            -
                def operator_node(operator)
         
     | 
| 
       55 
     | 
    
         
            -
                  node = case operator
         
     | 
| 
       56 
     | 
    
         
            -
                    when '||' then Or.new
         
     | 
| 
       57 
     | 
    
         
            -
                    when '&&' then And.new
         
     | 
| 
       58 
     | 
    
         
            -
                  end
         
     | 
| 
       59 
     | 
    
         
            -
                  node
         
     | 
| 
       60 
     | 
    
         
            -
                end
         
     | 
| 
       61 
     | 
    
         
            -
                
         
     | 
| 
       62 
     | 
    
         
            -
                class Composite
         
     | 
| 
       63 
     | 
    
         
            -
                  attr_reader :nodes
         
     | 
| 
       64 
     | 
    
         
            -
                
         
     | 
| 
       65 
     | 
    
         
            -
                  def initialize(*nodes)
         
     | 
| 
       66 
     | 
    
         
            -
                    @nodes = nodes || []
         
     | 
| 
       67 
     | 
    
         
            -
                  end
         
     | 
| 
       68 
     | 
    
         
            -
                
         
     | 
| 
       69 
     | 
    
         
            -
                  def <<(node)
         
     | 
| 
       70 
     | 
    
         
            -
                    @nodes << node
         
     | 
| 
       71 
     | 
    
         
            -
                  end
         
     | 
| 
       72 
     | 
    
         
            -
                
         
     | 
| 
       73 
     | 
    
         
            -
                  def values
         
     | 
| 
       74 
     | 
    
         
            -
                    @nodes.map { |node| node.values }.flatten
         
     | 
| 
       75 
     | 
    
         
            -
                  end
         
     | 
| 
       76 
     | 
    
         
            -
                end
         
     | 
| 
       77 
     | 
    
         
            -
                
         
     | 
| 
       78 
     | 
    
         
            -
                class And < Composite
         
     | 
| 
       79 
     | 
    
         
            -
                  def evaluate(expr)
         
     | 
| 
       80 
     | 
    
         
            -
                    @nodes.each do |node|
         
     | 
| 
       81 
     | 
    
         
            -
                      return false unless node.evaluate(expr)
         
     | 
| 
       82 
     | 
    
         
            -
                    end
         
     | 
| 
       83 
     | 
    
         
            -
                    return true
         
     | 
| 
       84 
     | 
    
         
            -
                  end
         
     | 
| 
       85 
     | 
    
         
            -
                end
         
     | 
| 
       86 
     | 
    
         
            -
                
         
     | 
| 
       87 
     | 
    
         
            -
                class Or < Composite
         
     | 
| 
       88 
     | 
    
         
            -
                  def evaluate(expr)
         
     | 
| 
       89 
     | 
    
         
            -
                    @nodes.each do |node|
         
     | 
| 
       90 
     | 
    
         
            -
                      return true if node.evaluate(expr)
         
     | 
| 
       91 
     | 
    
         
            -
                    end
         
     | 
| 
       92 
     | 
    
         
            -
                    return false
         
     | 
| 
       93 
     | 
    
         
            -
                  end
         
     | 
| 
       94 
     | 
    
         
            -
                end
         
     | 
| 
       95 
     | 
    
         
            -
                
         
     | 
| 
       96 
     | 
    
         
            -
                class Unary
         
     | 
| 
       97 
     | 
    
         
            -
                  attr_reader :value
         
     | 
| 
       98 
     | 
    
         
            -
                
         
     | 
| 
       99 
     | 
    
         
            -
                  def initialize(value)
         
     | 
| 
       100 
     | 
    
         
            -
                    @value = value
         
     | 
| 
       101 
     | 
    
         
            -
                    @expressions = Expressions.new([@value])
         
     | 
| 
       102 
     | 
    
         
            -
                  end
         
     | 
| 
       103 
     | 
    
         
            -
                
         
     | 
| 
       104 
     | 
    
         
            -
                  def evaluate(expr)
         
     | 
| 
       105 
     | 
    
         
            -
                    @expressions.any?(expr)
         
     | 
| 
       106 
     | 
    
         
            -
                  end
         
     | 
| 
       107 
     | 
    
         
            -
                
         
     | 
| 
       108 
     | 
    
         
            -
                  def values
         
     | 
| 
       109 
     | 
    
         
            -
                    [value]
         
     | 
| 
       110 
     | 
    
         
            -
                  end
         
     | 
| 
       111 
     | 
    
         
            -
                end
         
     | 
| 
       112 
     | 
    
         
            -
                
         
     | 
| 
       113 
     | 
    
         
            -
                class Not < Unary
         
     | 
| 
       114 
     | 
    
         
            -
                  def evaluate(expr)
         
     | 
| 
       115 
     | 
    
         
            -
                    !super(expr)
         
     | 
| 
       116 
     | 
    
         
            -
                  end
         
     | 
| 
       117 
     | 
    
         
            -
                  def values
         
     | 
| 
       118 
     | 
    
         
            -
                    []
         
     | 
| 
       119 
     | 
    
         
            -
                  end
         
     | 
| 
      
 22 
     | 
    
         
            +
                def normalize_pattern(pattern)
         
     | 
| 
      
 23 
     | 
    
         
            +
                  pattern.tr!("éèàçîêô","eeacieo")
         
     | 
| 
      
 24 
     | 
    
         
            +
                  pattern.tr!("!,;?./\\_|[]{}<>:*$%"," ")
         
     | 
| 
       120 
25 
     | 
    
         
             
                end
         
     | 
| 
       121 
26 
     | 
    
         | 
| 
       122 
27 
     | 
    
         
             
              end
         
     | 
| 
         @@ -0,0 +1,44 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # encoding: UTF-8
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            class TextNlp
         
     | 
| 
      
 4 
     | 
    
         
            +
              class StopList
         
     | 
| 
      
 5 
     | 
    
         
            +
                
         
     | 
| 
      
 6 
     | 
    
         
            +
                class << self
         
     | 
| 
      
 7 
     | 
    
         
            +
                  attr_accessor :directory 
         
     | 
| 
      
 8 
     | 
    
         
            +
                  StopList.directory = File.join(File.dirname(__FILE__),'stoplists')
         
     | 
| 
      
 9 
     | 
    
         
            +
                end
         
     | 
| 
      
 10 
     | 
    
         
            +
                
         
     | 
| 
      
 11 
     | 
    
         
            +
                def initialize(options = {})
         
     | 
| 
      
 12 
     | 
    
         
            +
                  @cache = {}
         
     | 
| 
      
 13 
     | 
    
         
            +
                  options = {:expressions => []}.merge(options)
         
     | 
| 
      
 14 
     | 
    
         
            +
                  expressions = options[:expressions]
         
     | 
| 
      
 15 
     | 
    
         
            +
                  if (options.key?(:name))
         
     | 
| 
      
 16 
     | 
    
         
            +
                    File.foreach(File.join(StopList.directory,"#{options[:name]}.txt")) { |e| expressions << e }
         
     | 
| 
      
 17 
     | 
    
         
            +
                  end
         
     | 
| 
      
 18 
     | 
    
         
            +
                  if (options.key?(:names))
         
     | 
| 
      
 19 
     | 
    
         
            +
                    options[:names].each do |name|
         
     | 
| 
      
 20 
     | 
    
         
            +
                      File.foreach(File.join(StopList.directory,"#{name}.txt")) { |e| expressions << e }
         
     | 
| 
      
 21 
     | 
    
         
            +
                    end
         
     | 
| 
      
 22 
     | 
    
         
            +
                  end
         
     | 
| 
      
 23 
     | 
    
         
            +
                  if (options.key?(:file))
         
     | 
| 
      
 24 
     | 
    
         
            +
                    File.foreach(options[:file]) { |e| expressions << e }
         
     | 
| 
      
 25 
     | 
    
         
            +
                  end
         
     | 
| 
      
 26 
     | 
    
         
            +
                  if (options.key?(:files))
         
     | 
| 
      
 27 
     | 
    
         
            +
                    options[:files].each do |file|
         
     | 
| 
      
 28 
     | 
    
         
            +
                      File.foreach(file) { |e| expressions << e }
         
     | 
| 
      
 29 
     | 
    
         
            +
                    end
         
     | 
| 
      
 30 
     | 
    
         
            +
                  end
         
     | 
| 
      
 31 
     | 
    
         
            +
                  expressions.each { |e| @cache[e.normalize] = true }
         
     | 
| 
      
 32 
     | 
    
         
            +
                  @expressions = TextNlp::Expressions.new(expressions)
         
     | 
| 
      
 33 
     | 
    
         
            +
                end
         
     | 
| 
      
 34 
     | 
    
         
            +
                
         
     | 
| 
      
 35 
     | 
    
         
            +
                def transform(text)
         
     | 
| 
      
 36 
     | 
    
         
            +
                  @expressions.expressionize(text).map { |expr| @cache.key?(expr) ? nil : expr }.compact.join(' ')
         
     | 
| 
      
 37 
     | 
    
         
            +
                end
         
     | 
| 
      
 38 
     | 
    
         
            +
                
         
     | 
| 
      
 39 
     | 
    
         
            +
                def size
         
     | 
| 
      
 40 
     | 
    
         
            +
                  @expressions.values.size
         
     | 
| 
      
 41 
     | 
    
         
            +
                end
         
     | 
| 
      
 42 
     | 
    
         
            +
             
     | 
| 
      
 43 
     | 
    
         
            +
              end
         
     | 
| 
      
 44 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,43 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            a
         
     | 
| 
      
 2 
     | 
    
         
            +
            au
         
     | 
| 
      
 3 
     | 
    
         
            +
            aussi
         
     | 
| 
      
 4 
     | 
    
         
            +
            aux
         
     | 
| 
      
 5 
     | 
    
         
            +
            avec
         
     | 
| 
      
 6 
     | 
    
         
            +
            c
         
     | 
| 
      
 7 
     | 
    
         
            +
            ce
         
     | 
| 
      
 8 
     | 
    
         
            +
            cette
         
     | 
| 
      
 9 
     | 
    
         
            +
            contre
         
     | 
| 
      
 10 
     | 
    
         
            +
            d
         
     | 
| 
      
 11 
     | 
    
         
            +
            dans
         
     | 
| 
      
 12 
     | 
    
         
            +
            de
         
     | 
| 
      
 13 
     | 
    
         
            +
            des
         
     | 
| 
      
 14 
     | 
    
         
            +
            du
         
     | 
| 
      
 15 
     | 
    
         
            +
            en
         
     | 
| 
      
 16 
     | 
    
         
            +
            et
         
     | 
| 
      
 17 
     | 
    
         
            +
            j
         
     | 
| 
      
 18 
     | 
    
         
            +
            l
         
     | 
| 
      
 19 
     | 
    
         
            +
            la
         
     | 
| 
      
 20 
     | 
    
         
            +
            le
         
     | 
| 
      
 21 
     | 
    
         
            +
            les
         
     | 
| 
      
 22 
     | 
    
         
            +
            mais
         
     | 
| 
      
 23 
     | 
    
         
            +
            n
         
     | 
| 
      
 24 
     | 
    
         
            +
            ou
         
     | 
| 
      
 25 
     | 
    
         
            +
            par
         
     | 
| 
      
 26 
     | 
    
         
            +
            pas
         
     | 
| 
      
 27 
     | 
    
         
            +
            pour
         
     | 
| 
      
 28 
     | 
    
         
            +
            qu
         
     | 
| 
      
 29 
     | 
    
         
            +
            que
         
     | 
| 
      
 30 
     | 
    
         
            +
            quel
         
     | 
| 
      
 31 
     | 
    
         
            +
            quelle
         
     | 
| 
      
 32 
     | 
    
         
            +
            quelles
         
     | 
| 
      
 33 
     | 
    
         
            +
            quels
         
     | 
| 
      
 34 
     | 
    
         
            +
            qui
         
     | 
| 
      
 35 
     | 
    
         
            +
            sa
         
     | 
| 
      
 36 
     | 
    
         
            +
            sans
         
     | 
| 
      
 37 
     | 
    
         
            +
            ses
         
     | 
| 
      
 38 
     | 
    
         
            +
            son
         
     | 
| 
      
 39 
     | 
    
         
            +
            sous
         
     | 
| 
      
 40 
     | 
    
         
            +
            sur
         
     | 
| 
      
 41 
     | 
    
         
            +
            un
         
     | 
| 
      
 42 
     | 
    
         
            +
            une
         
     | 
| 
      
 43 
     | 
    
         
            +
            y
         
     | 
    
        data/lib/text_nlp/string.rb
    CHANGED
    
    | 
         @@ -18,23 +18,31 @@ class String 
     | 
|
| 
       18 
18 
     | 
    
         
             
                self
         
     | 
| 
       19 
19 
     | 
    
         
             
              end
         
     | 
| 
       20 
20 
     | 
    
         | 
| 
      
 21 
     | 
    
         
            +
              def normalize!
         
     | 
| 
      
 22 
     | 
    
         
            +
                unless normalized()
         
     | 
| 
      
 23 
     | 
    
         
            +
                  replace(self.normalize)
         
     | 
| 
      
 24 
     | 
    
         
            +
                  self.normalized = true
         
     | 
| 
      
 25 
     | 
    
         
            +
                end
         
     | 
| 
      
 26 
     | 
    
         
            +
                self
         
     | 
| 
      
 27 
     | 
    
         
            +
              end
         
     | 
| 
      
 28 
     | 
    
         
            +
              
         
     | 
| 
       21 
29 
     | 
    
         
             
              def tokenize
         
     | 
| 
       22 
30 
     | 
    
         
             
                (String.tokenizer || TextNlp::Tokenizer.new).tokenize(self)
         
     | 
| 
       23 
31 
     | 
    
         
             
              end
         
     | 
| 
       24 
32 
     | 
    
         | 
| 
       25 
33 
     | 
    
         
             
              def similarity(text)
         
     | 
| 
       26 
34 
     | 
    
         
             
                score = 0.0
         
     | 
| 
       27 
     | 
    
         
            -
                tokens1 = self.normalize.tokenize
         
     | 
| 
       28 
     | 
    
         
            -
                tokens2 = text.normalize.tokenize
         
     | 
| 
      
 35 
     | 
    
         
            +
                tokens1, tokens2 = self.normalize.tokenize, text.normalize.tokenize
         
     | 
| 
       29 
36 
     | 
    
         
             
                if (tokens1.size > 0 && tokens2.size > 0)
         
     | 
| 
       30 
37 
     | 
    
         
             
                  intersection = tokens1 & tokens2
         
     | 
| 
       31 
     | 
    
         
            -
                  score = (((intersection.size.to_f / tokens1.size 
     | 
| 
      
 38 
     | 
    
         
            +
                  score = (((intersection.size.to_f / tokens1.size) + (intersection.size.to_f / tokens2.size)) / 2)
         
     | 
| 
       32 
39 
     | 
    
         
             
                end
         
     | 
| 
       33 
40 
     | 
    
         
             
                score
         
     | 
| 
       34 
41 
     | 
    
         
             
              end
         
     | 
| 
       35 
42 
     | 
    
         | 
| 
       36 
     | 
    
         
            -
              def  
     | 
| 
       37 
     | 
    
         
            -
                 
     | 
| 
      
 43 
     | 
    
         
            +
              def transform(*transformers)
         
     | 
| 
      
 44 
     | 
    
         
            +
                transformers = [transformers] unless transformers.respond_to?(:each)
         
     | 
| 
      
 45 
     | 
    
         
            +
                transformers.flatten.inject(self) { |text,transformer| transformer.transform(text) }
         
     | 
| 
       38 
46 
     | 
    
         
             
              end
         
     | 
| 
       39 
47 
     | 
    
         | 
| 
       40 
48 
     | 
    
         
             
            end
         
     | 
    
        data/lib/text_nlp/synonyms.rb
    CHANGED
    
    | 
         @@ -13,15 +13,16 @@ class TextNlp 
     | 
|
| 
       13 
13 
     | 
    
         
             
                end
         
     | 
| 
       14 
14 
     | 
    
         | 
| 
       15 
15 
     | 
    
         
             
                def register(name,synonyms)
         
     | 
| 
       16 
     | 
    
         
            -
                   
     | 
| 
      
 16 
     | 
    
         
            +
                  name.normalize!
         
     | 
| 
       17 
17 
     | 
    
         
             
                  synonyms.each do |synonym|
         
     | 
| 
      
 18 
     | 
    
         
            +
                    synonym.normalize!
         
     | 
| 
       18 
19 
     | 
    
         
             
                    @expressions << synonym
         
     | 
| 
       19 
     | 
    
         
            -
                    @synonyms[synonym 
     | 
| 
      
 20 
     | 
    
         
            +
                    @synonyms[synonym] = name
         
     | 
| 
       20 
21 
     | 
    
         
             
                  end
         
     | 
| 
       21 
22 
     | 
    
         
             
                end
         
     | 
| 
       22 
23 
     | 
    
         | 
| 
       23 
     | 
    
         
            -
                def  
     | 
| 
       24 
     | 
    
         
            -
                  @expressions.expressionize(text).map { |expr| @synonyms.key?(expr) ? @synonyms[expr] : expr }.join(' ')
         
     | 
| 
      
 24 
     | 
    
         
            +
                def transform(text)
         
     | 
| 
      
 25 
     | 
    
         
            +
                  @expressions.expressionize(text).map { |expr| @synonyms.key?(expr) ? @synonyms[expr] : expr }.compact.join(' ')
         
     | 
| 
       25 
26 
     | 
    
         
             
                end
         
     | 
| 
       26 
27 
     | 
    
         | 
| 
       27 
28 
     | 
    
         
             
              end
         
     | 
    
        data/lib/text_nlp/tokenizer.rb
    CHANGED
    
    
    
        data/spec/min_en.txt
    ADDED
    
    
    
        data/spec/min_fr.txt
    ADDED
    
    
    
        data/spec/pattern_spec.rb
    CHANGED
    
    | 
         @@ -3,11 +3,31 @@ require "spec_helper" 
     | 
|
| 
       3 
3 
     | 
    
         | 
| 
       4 
4 
     | 
    
         
             
            describe TextNlp::Pattern do
         
     | 
| 
       5 
5 
     | 
    
         | 
| 
       6 
     | 
    
         
            -
               
     | 
| 
       7 
     | 
    
         
            -
             
     | 
| 
       8 
     | 
    
         
            -
                 
     | 
| 
       9 
     | 
    
         
            -
             
     | 
| 
       10 
     | 
    
         
            -
             
     | 
| 
      
 6 
     | 
    
         
            +
              context "with normalize option" do
         
     | 
| 
      
 7 
     | 
    
         
            +
              
         
     | 
| 
      
 8 
     | 
    
         
            +
                it "should match or not the pattern" do
         
     | 
| 
      
 9 
     | 
    
         
            +
                  pattern = TextNlp::Pattern.new("(BD OR 'bande dessinée') AND -samsung")
         
     | 
| 
      
 10 
     | 
    
         
            +
                  pattern.match?("cette BD est super").should be_true
         
     | 
| 
      
 11 
     | 
    
         
            +
                  pattern.match?("cette bd est illisible sur samsung NTC").should be_false
         
     | 
| 
      
 12 
     | 
    
         
            +
                  pattern.match?("cette bande dessinee est illisible sur samsung NTC").should be_false
         
     | 
| 
      
 13 
     | 
    
         
            +
                  pattern.match?("cette bande dessinee est illisible").should be_true
         
     | 
| 
      
 14 
     | 
    
         
            +
                  pattern = TextNlp::Pattern.new("'toulouse fc' OR ((toulouse OR tfc) AND (foot OR football OR 'ligue 1' OR 'ligue 2' OR l1 OR l2))")
         
     | 
| 
      
 15 
     | 
    
         
            +
                  pattern.match?("toulouse est une belle ville").should be_false
         
     | 
| 
      
 16 
     | 
    
         
            +
                end
         
     | 
| 
      
 17 
     | 
    
         
            +
              
         
     | 
| 
       11 
18 
     | 
    
         
             
              end
         
     | 
| 
       12 
19 
     | 
    
         | 
| 
      
 20 
     | 
    
         
            +
              context "with no normalized option" do
         
     | 
| 
      
 21 
     | 
    
         
            +
                
         
     | 
| 
      
 22 
     | 
    
         
            +
                it "should match or not the pattern" do
         
     | 
| 
      
 23 
     | 
    
         
            +
                  pattern = TextNlp::Pattern.new("(BD OR 'bande dessinée') AND -samsung", :normalize => false)
         
     | 
| 
      
 24 
     | 
    
         
            +
                  pattern.match?("cette BD est super").should be_true
         
     | 
| 
      
 25 
     | 
    
         
            +
                  pattern.match?("cette bd est super").should be_false
         
     | 
| 
      
 26 
     | 
    
         
            +
                  pattern.match?("cette bande dessinee est illisible").should be_false
         
     | 
| 
      
 27 
     | 
    
         
            +
                  pattern.match?("cette bande dessinée est illisible").should be_true
         
     | 
| 
      
 28 
     | 
    
         
            +
                  pattern.match?("cette bande dessinée est illisible sur samsung").should be_false
         
     | 
| 
      
 29 
     | 
    
         
            +
                end
         
     | 
| 
      
 30 
     | 
    
         
            +
                
         
     | 
| 
      
 31 
     | 
    
         
            +
              end  
         
     | 
| 
      
 32 
     | 
    
         
            +
              
         
     | 
| 
       13 
33 
     | 
    
         
             
            end
         
     | 
| 
         @@ -0,0 +1,34 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # encoding: utf-8
         
     | 
| 
      
 2 
     | 
    
         
            +
            require "spec_helper"
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            describe TextNlp::StopList do
         
     | 
| 
      
 5 
     | 
    
         
            +
              
         
     | 
| 
      
 6 
     | 
    
         
            +
              it "should remove the words/expressions defined by the stop list" do
         
     | 
| 
      
 7 
     | 
    
         
            +
                
         
     | 
| 
      
 8 
     | 
    
         
            +
                TextNlp::StopList.directory = File.dirname(__FILE__)
         
     | 
| 
      
 9 
     | 
    
         
            +
                
         
     | 
| 
      
 10 
     | 
    
         
            +
                stop_list = TextNlp::StopList.new(:expressions => ['il','a','ecrit par toto'])
         
     | 
| 
      
 11 
     | 
    
         
            +
                stop_list.size.should eq 3
         
     | 
| 
      
 12 
     | 
    
         
            +
                stop_list.transform("bordel Il fait chaud ici").should eq 'bordel fait chaud ici'
         
     | 
| 
      
 13 
     | 
    
         
            +
                stop_list.transform("bordel Il fait chaud ici ecrit par toto").should eq 'bordel fait chaud ici'
         
     | 
| 
      
 14 
     | 
    
         
            +
                stop_list.transform("bordel Il fait chaud ici ecrit par titi").should eq 'bordel fait chaud ici ecrit par titi'
         
     | 
| 
      
 15 
     | 
    
         
            +
                
         
     | 
| 
      
 16 
     | 
    
         
            +
                stop_list = TextNlp::StopList.new(:expressions => ['il','a','ecrit par toto'], :file => File.join(File.dirname(__FILE__),"stop_list_toto.txt"))
         
     | 
| 
      
 17 
     | 
    
         
            +
                stop_list.size.should eq 5
         
     | 
| 
      
 18 
     | 
    
         
            +
                stop_list.transform("bordel Il fait chaud ici").should eq 'fait chaud ici'
         
     | 
| 
      
 19 
     | 
    
         
            +
                stop_list.transform("bordel Il fait chaud ici ecrit par toto").should eq 'fait chaud ici'
         
     | 
| 
      
 20 
     | 
    
         
            +
                stop_list.transform("bordel Il fait chaud ici ecrit par titi").should eq 'fait chaud ici ecrit par titi'
         
     | 
| 
      
 21 
     | 
    
         
            +
                
         
     | 
| 
      
 22 
     | 
    
         
            +
                stop_list = TextNlp::StopList.new(:name => "min_fr")
         
     | 
| 
      
 23 
     | 
    
         
            +
                stop_list.size.should eq 3
         
     | 
| 
      
 24 
     | 
    
         
            +
                stop_list.transform("le ballon de zizou").should eq 'ballon zizou'
         
     | 
| 
      
 25 
     | 
    
         
            +
                
         
     | 
| 
      
 26 
     | 
    
         
            +
                stop_list = TextNlp::StopList.new(:names => ["min_fr","min_en"])
         
     | 
| 
      
 27 
     | 
    
         
            +
                stop_list.size.should eq 5
         
     | 
| 
      
 28 
     | 
    
         
            +
                
         
     | 
| 
      
 29 
     | 
    
         
            +
                stop_list = TextNlp::StopList.new(
         
     | 
| 
      
 30 
     | 
    
         
            +
                  :files => [File.join(File.dirname(__FILE__),"stop_list_toto.txt"),File.join(File.dirname(__FILE__),"stop_list_tutu.txt")])
         
     | 
| 
      
 31 
     | 
    
         
            +
                stop_list.size.should eq 4
         
     | 
| 
      
 32 
     | 
    
         
            +
              end
         
     | 
| 
      
 33 
     | 
    
         
            +
              
         
     | 
| 
      
 34 
     | 
    
         
            +
            end
         
     | 
    
        data/spec/string_spec.rb
    CHANGED
    
    | 
         @@ -17,6 +17,16 @@ describe String do 
     | 
|
| 
       17 
17 
     | 
    
         
             
                text.normalize.should eq "TOTO".downcase
         
     | 
| 
       18 
18 
     | 
    
         
             
              end
         
     | 
| 
       19 
19 
     | 
    
         | 
| 
      
 20 
     | 
    
         
            +
              it "should normalize the receiver string" do
         
     | 
| 
      
 21 
     | 
    
         
            +
                text = "TOTO"
         
     | 
| 
      
 22 
     | 
    
         
            +
                normalizer = double()
         
     | 
| 
      
 23 
     | 
    
         
            +
                String.normalizer = normalizer
         
     | 
| 
      
 24 
     | 
    
         
            +
                normalizer.stub(:normalize) { |txt| txt.downcase }
         
     | 
| 
      
 25 
     | 
    
         
            +
                text.normalize!
         
     | 
| 
      
 26 
     | 
    
         
            +
                text.should eq "TOTO".downcase
         
     | 
| 
      
 27 
     | 
    
         
            +
                text.normalized.should be_true
         
     | 
| 
      
 28 
     | 
    
         
            +
              end
         
     | 
| 
      
 29 
     | 
    
         
            +
              
         
     | 
| 
       20 
30 
     | 
    
         
             
              it "should call tokenizer" do
         
     | 
| 
       21 
31 
     | 
    
         
             
                text = "TOTO"
         
     | 
| 
       22 
32 
     | 
    
         
             
                tokenizer = double()
         
     | 
| 
         @@ -25,11 +35,20 @@ describe String do 
     | 
|
| 
       25 
35 
     | 
    
         
             
                text.tokenize
         
     | 
| 
       26 
36 
     | 
    
         
             
              end
         
     | 
| 
       27 
37 
     | 
    
         | 
| 
       28 
     | 
    
         
            -
              it "should call translator" do
         
     | 
| 
      
 38 
     | 
    
         
            +
              it "should call translator / translators" do
         
     | 
| 
       29 
39 
     | 
    
         
             
                text = "TOTO"
         
     | 
| 
       30 
     | 
    
         
            -
                 
     | 
| 
       31 
     | 
    
         
            -
                 
     | 
| 
       32 
     | 
    
         
            -
                text. 
     | 
| 
      
 40 
     | 
    
         
            +
                transformer1 = double()
         
     | 
| 
      
 41 
     | 
    
         
            +
                transformer1.should_receive(:transform).with(text)
         
     | 
| 
      
 42 
     | 
    
         
            +
                text.transform(transformer1)
         
     | 
| 
      
 43 
     | 
    
         
            +
                transformer1 = double()
         
     | 
| 
      
 44 
     | 
    
         
            +
                transformer1.stub(:transform) { |text| text.tr("T","U") }
         
     | 
| 
      
 45 
     | 
    
         
            +
                transformer2 = double()
         
     | 
| 
      
 46 
     | 
    
         
            +
                transformer2.stub(:transform) { |text| text.tr("O","A") }
         
     | 
| 
      
 47 
     | 
    
         
            +
                transformer1.should_receive(:transform).with("TOTO")
         
     | 
| 
      
 48 
     | 
    
         
            +
                transformer2.should_receive(:transform).with("UOUO")
         
     | 
| 
      
 49 
     | 
    
         
            +
                text = text.transform(transformer1,transformer2)
         
     | 
| 
      
 50 
     | 
    
         
            +
                text.should eq "UAUA"
         
     | 
| 
      
 51 
     | 
    
         
            +
                text.transform([transformer1,transformer2])
         
     | 
| 
       33 
52 
     | 
    
         
             
              end
         
     | 
| 
       34 
53 
     | 
    
         | 
| 
       35 
54 
     | 
    
         
             
              it "should compute similarity" do
         
     | 
    
        data/spec/synonyms_spec.rb
    CHANGED
    
    | 
         @@ -6,18 +6,20 @@ describe TextNlp::Synonyms do 
     | 
|
| 
       6 
6 
     | 
    
         
             
              it "should synonymize the text" do
         
     | 
| 
       7 
7 
     | 
    
         
             
                synonyms = TextNlp::Synonyms.new
         
     | 
| 
       8 
8 
     | 
    
         
             
                synonyms.register("CAEN",["smc","sm caen","stade malherbe de caen"])
         
     | 
| 
       9 
     | 
    
         
            -
                synonyms. 
     | 
| 
       10 
     | 
    
         
            -
                synonyms. 
     | 
| 
       11 
     | 
    
         
            -
                synonyms. 
     | 
| 
       12 
     | 
    
         
            -
                synonyms. 
     | 
| 
      
 9 
     | 
    
         
            +
                synonyms.transform("le smc c est de la bombe").should eq "le caen c est de la bombe"
         
     | 
| 
      
 10 
     | 
    
         
            +
                synonyms.transform("le truc c est de la bombe").should eq "le truc c est de la bombe"
         
     | 
| 
      
 11 
     | 
    
         
            +
                synonyms.transform("le sm caen c est de la bombe").should eq "le caen c est de la bombe"
         
     | 
| 
      
 12 
     | 
    
         
            +
                synonyms.transform("le stade malherbe de caen c est de la bombe").should eq "le caen c est de la bombe"
         
     | 
| 
       13 
13 
     | 
    
         
             
              end
         
     | 
| 
       14 
14 
     | 
    
         | 
| 
       15 
15 
     | 
    
         
             
              it "should synonymize the text" do
         
     | 
| 
       16 
16 
     | 
    
         
             
                synonyms = TextNlp::Synonyms.new([["CAEN","smc","sm caen","stade malherbe de caen"],["marseille","om"]])
         
     | 
| 
       17 
     | 
    
         
            -
                synonyms. 
     | 
| 
       18 
     | 
    
         
            -
                synonyms. 
     | 
| 
       19 
     | 
    
         
            -
                synonyms. 
     | 
| 
       20 
     | 
    
         
            -
                synonyms. 
     | 
| 
      
 17 
     | 
    
         
            +
                synonyms.transform("le smc c est de la bombe").should eq "le caen c est de la bombe"
         
     | 
| 
      
 18 
     | 
    
         
            +
                synonyms.transform("le truc c est de la bombe").should eq "le truc c est de la bombe"
         
     | 
| 
      
 19 
     | 
    
         
            +
                synonyms.transform("le sm caen c est de la bombe").should eq "le caen c est de la bombe"
         
     | 
| 
      
 20 
     | 
    
         
            +
                synonyms.transform("le stade malherbe de caen c est de la bombe").should eq "le caen c est de la bombe"
         
     | 
| 
      
 21 
     | 
    
         
            +
                synonyms.transform("le caen c est de la bombe").should eq "le caen c est de la bombe"
         
     | 
| 
      
 22 
     | 
    
         
            +
                synonyms.transform("le om c est de la bombe").should eq "le marseille c est de la bombe"
         
     | 
| 
       21 
23 
     | 
    
         
             
              end
         
     | 
| 
       22 
24 
     | 
    
         | 
| 
       23 
25 
     | 
    
         
             
            end
         
     | 
    
        data/text_nlp.gemspec
    CHANGED
    
    | 
         @@ -1,9 +1,14 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            Gem::Specification.new do |s|
         
     | 
| 
       2 
2 
     | 
    
         
             
              s.name          = 'text_nlp'
         
     | 
| 
       3 
     | 
    
         
            -
              s.version       = '0.0. 
     | 
| 
       4 
     | 
    
         
            -
              s.date          = '2011-07- 
     | 
| 
      
 3 
     | 
    
         
            +
              s.version       = '0.0.3'
         
     | 
| 
      
 4 
     | 
    
         
            +
              s.date          = '2011-07-07'
         
     | 
| 
       5 
5 
     | 
    
         
             
              s.summary       = "A minimalist NLP library"
         
     | 
| 
       6 
6 
     | 
    
         
             
              s.description   = s.summary
         
     | 
| 
      
 7 
     | 
    
         
            +
              
         
     | 
| 
      
 8 
     | 
    
         
            +
              s.add_dependency "textquery"
         
     | 
| 
      
 9 
     | 
    
         
            +
              s.add_development_dependency "rspec"
         
     | 
| 
      
 10 
     | 
    
         
            +
              s.add_development_dependency "rake"
         
     | 
| 
      
 11 
     | 
    
         
            +
              
         
     | 
| 
       7 
12 
     | 
    
         
             
              s.authors       = ["fonzo14"]
         
     | 
| 
       8 
13 
     | 
    
         
             
              s.require_paths = ["lib"]
         
     | 
| 
       9 
14 
     | 
    
         
             
              s.files         = `git ls-files`.split("\n")
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: text_nlp
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 0.0. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.0.3
         
     | 
| 
       5 
5 
     | 
    
         
             
              prerelease: 
         
     | 
| 
       6 
6 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       7 
7 
     | 
    
         
             
            authors:
         
     | 
| 
         @@ -9,8 +9,41 @@ authors: 
     | 
|
| 
       9 
9 
     | 
    
         
             
            autorequire: 
         
     | 
| 
       10 
10 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       11 
11 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       12 
     | 
    
         
            -
            date: 2011-07- 
     | 
| 
       13 
     | 
    
         
            -
            dependencies: 
     | 
| 
      
 12 
     | 
    
         
            +
            date: 2011-07-07 00:00:00.000000000Z
         
     | 
| 
      
 13 
     | 
    
         
            +
            dependencies:
         
     | 
| 
      
 14 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 15 
     | 
    
         
            +
              name: textquery
         
     | 
| 
      
 16 
     | 
    
         
            +
              requirement: &86270380 !ruby/object:Gem::Requirement
         
     | 
| 
      
 17 
     | 
    
         
            +
                none: false
         
     | 
| 
      
 18 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 19 
     | 
    
         
            +
                - - ! '>='
         
     | 
| 
      
 20 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 21 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 22 
     | 
    
         
            +
              type: :runtime
         
     | 
| 
      
 23 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 24 
     | 
    
         
            +
              version_requirements: *86270380
         
     | 
| 
      
 25 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 26 
     | 
    
         
            +
              name: rspec
         
     | 
| 
      
 27 
     | 
    
         
            +
              requirement: &86270160 !ruby/object:Gem::Requirement
         
     | 
| 
      
 28 
     | 
    
         
            +
                none: false
         
     | 
| 
      
 29 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 30 
     | 
    
         
            +
                - - ! '>='
         
     | 
| 
      
 31 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 32 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 33 
     | 
    
         
            +
              type: :development
         
     | 
| 
      
 34 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 35 
     | 
    
         
            +
              version_requirements: *86270160
         
     | 
| 
      
 36 
     | 
    
         
            +
            - !ruby/object:Gem::Dependency
         
     | 
| 
      
 37 
     | 
    
         
            +
              name: rake
         
     | 
| 
      
 38 
     | 
    
         
            +
              requirement: &86269950 !ruby/object:Gem::Requirement
         
     | 
| 
      
 39 
     | 
    
         
            +
                none: false
         
     | 
| 
      
 40 
     | 
    
         
            +
                requirements:
         
     | 
| 
      
 41 
     | 
    
         
            +
                - - ! '>='
         
     | 
| 
      
 42 
     | 
    
         
            +
                  - !ruby/object:Gem::Version
         
     | 
| 
      
 43 
     | 
    
         
            +
                    version: '0'
         
     | 
| 
      
 44 
     | 
    
         
            +
              type: :development
         
     | 
| 
      
 45 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 46 
     | 
    
         
            +
              version_requirements: *86269950
         
     | 
| 
       14 
47 
     | 
    
         
             
            description: A minimalist NLP library
         
     | 
| 
       15 
48 
     | 
    
         
             
            email: 
         
     | 
| 
       16 
49 
     | 
    
         
             
            executables: []
         
     | 
| 
         @@ -29,13 +62,20 @@ files: 
     | 
|
| 
       29 
62 
     | 
    
         
             
            - lib/text_nlp/expressions.rb
         
     | 
| 
       30 
63 
     | 
    
         
             
            - lib/text_nlp/normalizer.rb
         
     | 
| 
       31 
64 
     | 
    
         
             
            - lib/text_nlp/pattern.rb
         
     | 
| 
      
 65 
     | 
    
         
            +
            - lib/text_nlp/stop_list.rb
         
     | 
| 
      
 66 
     | 
    
         
            +
            - lib/text_nlp/stoplists/min_fr.txt
         
     | 
| 
       32 
67 
     | 
    
         
             
            - lib/text_nlp/string.rb
         
     | 
| 
       33 
68 
     | 
    
         
             
            - lib/text_nlp/synonyms.rb
         
     | 
| 
       34 
69 
     | 
    
         
             
            - lib/text_nlp/tokenizer.rb
         
     | 
| 
       35 
70 
     | 
    
         
             
            - spec/expressions_spec.rb
         
     | 
| 
      
 71 
     | 
    
         
            +
            - spec/min_en.txt
         
     | 
| 
      
 72 
     | 
    
         
            +
            - spec/min_fr.txt
         
     | 
| 
       36 
73 
     | 
    
         
             
            - spec/normalizer_spec.rb
         
     | 
| 
       37 
74 
     | 
    
         
             
            - spec/pattern_spec.rb
         
     | 
| 
       38 
75 
     | 
    
         
             
            - spec/spec_helper.rb
         
     | 
| 
      
 76 
     | 
    
         
            +
            - spec/stop_list_spec.rb
         
     | 
| 
      
 77 
     | 
    
         
            +
            - spec/stop_list_toto.txt
         
     | 
| 
      
 78 
     | 
    
         
            +
            - spec/stop_list_tutu.txt
         
     | 
| 
       39 
79 
     | 
    
         
             
            - spec/string_spec.rb
         
     | 
| 
       40 
80 
     | 
    
         
             
            - spec/synonyms_spec.rb
         
     | 
| 
       41 
81 
     | 
    
         
             
            - spec/tokenizer_spec.rb
         
     |