nlp 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/rid_category.rb CHANGED
@@ -1,18 +1,17 @@
1
1
  module NLP
2
- class RIDCategory < Category
3
-
2
+ class RIDCategory < Category
3
+
4
4
  def primary?
5
5
  root == :PIERWOTNE
6
6
  end
7
-
7
+
8
8
  def secondary?
9
9
  root == :WTORNE
10
10
  end
11
-
11
+
12
12
  def emotions?
13
13
  root == :EMOCJE
14
14
  end
15
15
 
16
-
17
- end
16
+ end
18
17
  end
data/lib/sentence.rb CHANGED
@@ -1,16 +1,24 @@
1
1
  module NLP
2
- class Sentence
3
- attr_reader :tokens
4
- def initialize()
5
- @tokens = []
6
- end
2
+ class Sentence
7
3
 
8
- def << tokens
9
- @tokens.concat tokens
10
- end
4
+ attr_reader :tokens
11
5
 
12
- def words_number
13
- @tokens.size
14
- end
6
+ def initialize()
7
+ @tokens = []
15
8
  end
9
+
10
+ def << tokens
11
+ if tokens.is_a? Array
12
+ @tokens.concat tokens
13
+ else
14
+ @tokens << tokens
15
+ end
16
+ self
17
+ end
18
+
19
+ def words_number
20
+ @tokens.count{|t| !t.interp?}
21
+ end
22
+
23
+ end
16
24
  end
data/lib/statistic.rb ADDED
@@ -0,0 +1,55 @@
1
+ class Statistic
2
+
3
+ attr_accessor :total_words, :hash
4
+ attr_reader :cwords, :words, :total_words, :word_count
5
+
6
+ def initialize
7
+ @word_count = 0
8
+ @total_words = 0
9
+ @scores = Hash.new { 0 }
10
+ @words = []
11
+ @cwords = Hash.new {nil}
12
+ @hash
13
+ end
14
+
15
+ def add(word,category)
16
+
17
+ @scores[category] += 1
18
+ @word_count += 1
19
+ @words.push word
20
+
21
+ category = category.name
22
+ if @cwords[category].nil?
23
+ @cwords[category] = []
24
+ end
25
+ @cwords[category].push word
26
+
27
+ end
28
+
29
+ def []=(key,value)
30
+ @hash[key] = value
31
+ end
32
+
33
+ def [](key)
34
+ @hash[key]
35
+ end
36
+
37
+ def category_participation(categories)
38
+ sorted_scores = @scores.to_a.sort_by { |result| -result[1] }
39
+ r = {}
40
+ categories.each do |cat|
41
+ r[cat] = percentage_distribution(sorted_scores){|c| c.send(cat.to_s+'?')}
42
+ end
43
+ r
44
+ end
45
+
46
+ private
47
+
48
+ def percentage_distribution scores, &block
49
+ sum = scores.select{|result| yield result[0]}.inject(0){|count,result| count + result[1]}
50
+ Float(sum)/@word_count
51
+ end
52
+
53
+ end
54
+
55
+
@@ -2,5 +2,12 @@ class Array
2
2
  def tail
3
3
  self[1..-1]
4
4
  end
5
+
6
+ def mean
7
+ sum=0
8
+ self.each{|v| sum+=v }
9
+ sum/self.size
10
+ end
11
+
5
12
  end
6
13
 
data/lib/stree.rb CHANGED
@@ -5,53 +5,53 @@ module NLP
5
5
  SYMBOLS = %w{* - : - / ) (}
6
6
  attr_accessor :value
7
7
  attr_accessor :subtrees
8
-
8
+
9
9
  # 0 -> *
10
10
  # 1 -> -
11
11
  # 2 -> a
12
12
  # 33 -> ź
13
13
  def initialize
14
- @subtrees = Array.new( 34, nil )
14
+ @subtrees = Array.new(34, nil)
15
15
  @value = []
16
16
  end
17
-
18
- def insert( s, value )
19
- priv_insert( s.scan(/./), value )
17
+
18
+ def insert(s, value)
19
+ priv_insert(s.scan(/./), value)
20
20
  end
21
-
22
- def find( s )
23
- priv_find( s.scan(/./) )
21
+
22
+ def find(s)
23
+ priv_find(s.scan(/./))
24
24
  end
25
25
 
26
-
27
- protected
26
+
27
+ protected
28
28
  def key( chr )
29
- unless chr
30
- raise ArgumentError, "Argument chr is nil"
31
- end
32
- rval = ALPHABET.index(chr) || -1
33
- if rval > 35
34
- rval = -1 # invalid character
35
- end
36
-
37
- rval
29
+ unless chr
30
+ raise ArgumentError, "Argument chr is nil"
31
+ end
32
+ rval = ALPHABET.index(chr) || -1
33
+ if rval > 35
34
+ rval = -1 # invalid character
35
+ end
36
+
37
+ rval
38
38
  end
39
-
39
+
40
40
  def priv_insert( s, value )
41
41
  if s.empty?
42
42
  @value.push value
43
43
  else
44
44
  index = key( s.first )
45
45
  subtree = if @subtrees[index] == nil
46
- @subtrees[index] = SearchTree.new
47
- else
48
- @subtrees[index]
49
- end
50
-
46
+ @subtrees[index] = SearchTree.new
47
+ else
48
+ @subtrees[index]
49
+ end
50
+
51
51
  subtree.priv_insert( s.tail, value )
52
52
  end
53
53
  end
54
-
54
+
55
55
  def priv_find( search )
56
56
  if @subtrees[0]
57
57
  @subtrees[0].value
@@ -69,17 +69,17 @@ module NLP
69
69
  end
70
70
  end
71
71
 
72
- public
73
- def traverse()
74
- list = []
75
- yield @value
76
- list.concat @subrees if @subtrees != nil
77
- loop do
78
- break if list.empty?
79
- node = list.shift
80
- yield node.value
81
- list.concat node.subtrees if node.subtrees != nil
82
- end
83
- end
84
- end
72
+ public
73
+ def traverse()
74
+ list = []
75
+ yield @value
76
+ list.concat @subrees if @subtrees != nil
77
+ loop do
78
+ break if list.empty?
79
+ node = list.shift
80
+ yield node.value
81
+ list.concat node.subtrees if node.subtrees != nil
82
+ end
83
+ end
84
+ end
85
85
  end
@@ -2,50 +2,50 @@ require 'rubygems'
2
2
  require 'savon'
3
3
 
4
4
  class TakipiWebService
5
- URL = 'http://nlp.pwr.wroc.pl/clarin/ws/takipi/'
6
- WSDL_URL = URL + 'takipi.wsdl'
7
-
8
- def self.request(text)
9
- client = Savon::Client.new WSDL_URL, :soap_endpoint => URL
10
-
11
- # Call remote service methods
12
- response = client.tag do |soap|
13
- soap.body = "<text>#{text}</text><format>TXT</format><useGuesser>true</useGuesser>"
14
- end
15
-
16
- response = response.to_hash
17
- token = response[:tag_response][:tag_response][:msg]
18
- status = (response[:tag_response][:tag_response][:status]).to_i
19
-
20
- #checking status
21
- timeout = 60
22
- step = 5
23
- count = 0
24
- loop do
25
- break if count > timeout
26
- if status == 1
27
- break
28
- elsif status == 2 or status == 3
29
- count += 5
30
- sleep(1)
31
- r = client.get_status do |soap|
32
- soap.body = "<token>#{token}</token>"
33
- end.to_hash
34
- status = (r[:get_status_response][:status]).to_i
35
-
36
- end
37
- end
38
-
39
- #geting result
40
-
41
- result = client.get_result do |soap|
42
- soap.body="<token>#{token}</token>"
43
- end
44
-
45
- response_document = result.to_hash[:get_result_response][:tag_response][:msg]
46
-
47
- #transforming response to well formed xml string
48
- return "<xml><chunkList>#{response_document}</chunkList></xml>"
49
- end
5
+ URL = 'http://nlp.pwr.wroc.pl/clarin/ws/takipi/'
6
+ WSDL_URL = URL + 'takipi.wsdl'
7
+
8
+ def self.request(text)
9
+ client = Savon::Client.new WSDL_URL, :soap_endpoint => URL
10
+
11
+ # Call remote service methods
12
+ response = client.tag do |soap|
13
+ soap.body = "<text>#{text}</text><format>TXT</format><useGuesser>true</useGuesser>"
14
+ end
15
+
16
+ response = response.to_hash
17
+ token = response[:tag_response][:tag_response][:msg]
18
+ status = (response[:tag_response][:tag_response][:status]).to_i
19
+
20
+ #checking status
21
+ timeout = 60
22
+ step = 5
23
+ count = 0
24
+ loop do
25
+ break if count > timeout
26
+ if status == 1
27
+ break
28
+ elsif status == 2 or status == 3
29
+ count += 5
30
+ sleep(1)
31
+ r = client.get_status do |soap|
32
+ soap.body = "<token>#{token}</token>"
33
+ end.to_hash
34
+ status = (r[:get_status_response][:status]).to_i
35
+
36
+ end
37
+ end
38
+
39
+ #geting result
40
+
41
+ result = client.get_result do |soap|
42
+ soap.body="<token>#{token}</token>"
43
+ end
44
+
45
+ response_document = result.to_hash[:get_result_response][:tag_response][:msg]
46
+
47
+ #transforming response to well formed xml string
48
+ return "<xml><chunkList>#{response_document}</chunkList></xml>"
49
+ end
50
50
  end
51
51
 
data/lib/text.rb CHANGED
@@ -1,25 +1,26 @@
1
1
  module NLP
2
- class Text
3
- attr_reader :sentences
2
+ class Text
3
+ attr_reader :sentences
4
4
 
5
- def initialize
6
- @sentences = []
7
- end
5
+ def initialize
6
+ @sentences = []
7
+ end
8
8
 
9
- def << sentence
10
- @sentences.push sentence
11
- end
9
+ def << sentence
10
+ @sentences.push sentence
11
+ end
12
12
 
13
13
 
14
- def words_per_sentence
15
- mean(@sentences.collect{|s| s.words_number})
16
- end
14
+ def words_per_sentence
15
+ @sentences.collect{|s| s.words_number}.mean
16
+ end
17
17
 
18
- private
19
- def mean(x)
20
- sum=0
21
- x.each{|v| sum+=v }
22
- sum/x.size
23
- end
18
+
19
+ def flatten
20
+ flattened = []
21
+ @sentences.each{ |s| s.tokens.each{|t| flattened.push t } }
22
+ flattened
24
23
  end
24
+
25
+ end
25
26
  end
data/lib/token.rb CHANGED
@@ -1,34 +1,37 @@
1
-
2
1
  module NLP
3
- class Token
4
- attr_reader :orth
5
- attr_reader :tags
6
-
7
-
8
- def initialize(orth,tags)
9
- @orth = orth
10
- @tags = tags
11
- end
2
+ class Token
3
+
4
+ attr_reader :orth
5
+ attr_reader :tags
6
+
7
+ def initialize(orth,tags)
8
+ @orth = orth
9
+ @tags = tags
10
+ end
12
11
 
13
- def interp?
14
- @tags.eql? "interp"
15
- end
12
+ def symbol?
13
+ @tags.eql? "tsym"
14
+ end
16
15
 
17
- def word?
18
- not interp? and not number?
19
- end
16
+ def interp?
17
+ @tags.eql? "interp"
18
+ end
20
19
 
21
- def number?
22
- @tags.include?("tnum")
23
- end
20
+ def word?
21
+ not interp? and not number?
22
+ end
24
23
 
25
- def integer?
26
- @tags.include?("tnum:integer")
27
- end
24
+ def number?
25
+ @tags.include?("tnum")
26
+ end
28
27
 
29
- def float?
30
- @tags.include?("tnum:frac")
31
- end
28
+ def integer?
29
+ @tags.include?("tnum:integer")
30
+ end
32
31
 
32
+ def float?
33
+ @tags.include?("tnum:frac")
33
34
  end
35
+
36
+ end
34
37
  end
data/lib/token_scanner.rb CHANGED
@@ -1,72 +1,60 @@
1
-
2
1
  module NLP
3
- class TokenScanner
4
-
5
- attr_reader :text, :tokens
2
+ class TokenScanner
6
3
 
7
- def initialize(text)
8
- @text = text
9
- @pos = 0
10
- @tokens = flatten_text(@text)
11
- end
4
+ attr_reader :text, :tokens
12
5
 
13
- def next(type)
14
- @pos+=1
6
+ def initialize(text)
7
+ @text = text
8
+ @pos = 0
9
+ @tokens = @text.flatten
10
+ end
15
11
 
16
- case type
17
- when :word
18
- while @pos < @tokens.size and !@tokens[@pos].word?
19
- @pos+= 1
20
- end
12
+ def next(type)
13
+ @pos+=1
21
14
 
22
- when :interp
23
- while @pos < @tokens.size and !@tokens[@pos].interp?
24
- @pos+= 1
25
- end
26
-
27
- when :number
28
- while @pos < @tokens.size and !@tokens[@pos].number?
29
- @pos+= 1
30
- end
31
- when :alphanum
32
- while @pos < @tokens.size and !@tokens[@pos].number? and !@tokens[@pos].word?
33
- @pos+= 1
34
- end
35
- end
15
+ case type
16
+ when :word
17
+ while @pos < @tokens.size and !@tokens[@pos].word?
18
+ @pos+= 1
36
19
  end
37
20
 
38
-
39
- def current
40
- if @pos == @tokens.size
41
- nil
42
- else
43
- @tokens[@pos]
44
- end
45
-
21
+ when :interp
22
+ while @pos < @tokens.size and !@tokens[@pos].interp?
23
+ @pos+= 1
46
24
  end
47
25
 
48
- def rewind
49
- @pos = 0
26
+ when :number
27
+ while @pos < @tokens.size and !@tokens[@pos].number?
28
+ @pos+= 1
50
29
  end
51
-
52
-
53
- def index
54
- @pos
30
+ when :alphanum
31
+ while @pos < @tokens.size and !@tokens[@pos].number? and !@tokens[@pos].word?
32
+ @pos+= 1
55
33
  end
34
+ end
35
+ end
56
36
 
57
37
 
58
- def end?
59
- @pos == tokens.size
60
- end
61
-
38
+ def current
39
+ if @pos == @tokens.size
40
+ nil
41
+ else
42
+ @tokens[@pos]
43
+ end
44
+ end
62
45
 
63
- private
46
+ def rewind
47
+ @pos = 0
48
+ end
64
49
 
65
- def flatten_text(text)
66
- flattened = []
67
- text.sentences.each { |s| s.tokens.each {|t| flattened.push t } }
68
- flattened
69
- end
50
+ def index
51
+ @pos
52
+ end
70
53
 
71
- end
54
+ def end?
55
+ @pos == tokens.size
56
+ end
57
+
58
+
59
+ end
72
60
  end