nlp 0.2.6 → 0.2.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,10 +0,0 @@
1
- module NLP
2
-
3
- class RIDAnalyzer < Analyzer
4
-
5
- def initialize
6
- @dictionary = Dictionary.new(:rid)
7
- end
8
-
9
- end
10
- end
@@ -1,17 +0,0 @@
1
- module NLP
2
- class RIDCategory < Category
3
-
4
- def primary?
5
- root == :PIERWOTNE
6
- end
7
-
8
- def secondary?
9
- root == :WTORNE
10
- end
11
-
12
- def emotions?
13
- root == :EMOCJE
14
- end
15
-
16
- end
17
- end
@@ -1,24 +0,0 @@
1
- module NLP
2
- class Sentence
3
-
4
- attr_reader :tokens
5
-
6
- def initialize()
7
- @tokens = []
8
- end
9
-
10
- def << tokens
11
- if tokens.is_a? Array
12
- @tokens.concat tokens
13
- else
14
- @tokens << tokens
15
- end
16
- self
17
- end
18
-
19
- def words_number
20
- @tokens.count{|t| !t.interp?}
21
- end
22
-
23
- end
24
- end
@@ -1,55 +0,0 @@
1
- class Statistic
2
-
3
- attr_accessor :total_words, :hash
4
- attr_reader :cwords, :words, :total_words, :word_count
5
-
6
- def initialize
7
- @word_count = 0
8
- @total_words = 0
9
- @scores = Hash.new { 0 }
10
- @words = []
11
- @cwords = Hash.new {nil}
12
- @hash
13
- end
14
-
15
- def add(word,category)
16
-
17
- @scores[category] += 1
18
- @word_count += 1
19
- @words.push word
20
-
21
- category = category.name
22
- if @cwords[category].nil?
23
- @cwords[category] = []
24
- end
25
- @cwords[category].push word
26
-
27
- end
28
-
29
- def []=(key,value)
30
- @hash[key] = value
31
- end
32
-
33
- def [](key)
34
- @hash[key]
35
- end
36
-
37
- def category_participation(categories)
38
- sorted_scores = @scores.to_a.sort_by { |result| -result[1] }
39
- r = {}
40
- categories.each do |cat|
41
- r[cat] = percentage_distribution(sorted_scores){|c| c.send(cat.to_s+'?')}
42
- end
43
- r
44
- end
45
-
46
- private
47
-
48
- def percentage_distribution scores, &block
49
- sum = scores.select{|result| yield result[0]}.inject(0){|count,result| count + result[1]}
50
- Float(sum)/@word_count
51
- end
52
-
53
- end
54
-
55
-
@@ -1,19 +0,0 @@
1
- class String
2
- alias old_memeber []
3
-
4
- def ordinary (index)
5
- self.old_memeber index
6
- end
7
-
8
- def get(index)
9
- self.scan(/./)[index]
10
- end
11
-
12
- def set(index,value)
13
- arr = self.scan(/./)
14
- arr[index] = value
15
- self.replace(arr.join)
16
- value
17
- end
18
- end
19
-
@@ -1,85 +0,0 @@
1
-
2
- module NLP
3
- class SearchTree
4
- ALPHABET = %w{* - a ą b c ć d e ę f g h i j k l ł m n ń o ó p r s ś t u w y z ź ż}
5
- SYMBOLS = %w{* - : - / ) (}
6
- attr_accessor :value
7
- attr_accessor :subtrees
8
-
9
- # 0 -> *
10
- # 1 -> -
11
- # 2 -> a
12
- # 33 -> ź
13
- def initialize
14
- @subtrees = Array.new(34, nil)
15
- @value = []
16
- end
17
-
18
- def insert(s, value)
19
- priv_insert(s.scan(/./), value)
20
- end
21
-
22
- def find(s)
23
- priv_find(s.scan(/./))
24
- end
25
-
26
-
27
- protected
28
- def key( chr )
29
- unless chr
30
- raise ArgumentError, "Argument chr is nil"
31
- end
32
- rval = ALPHABET.index(chr) || -1
33
- if rval > 35
34
- rval = -1 # invalid character
35
- end
36
-
37
- rval
38
- end
39
-
40
- def priv_insert( s, value )
41
- if s.empty?
42
- @value.push value
43
- else
44
- index = key( s.first )
45
- subtree = if @subtrees[index] == nil
46
- @subtrees[index] = SearchTree.new
47
- else
48
- @subtrees[index]
49
- end
50
-
51
- subtree.priv_insert( s.tail, value )
52
- end
53
- end
54
-
55
- def priv_find( search )
56
- if @subtrees[0]
57
- @subtrees[0].value
58
- else
59
- if search.empty?
60
- value
61
- else
62
- index = key( search.first )
63
- if @subtrees[index]
64
- @subtrees[index].priv_find( search.tail )
65
- else
66
- nil
67
- end
68
- end
69
- end
70
- end
71
-
72
- public
73
- def traverse()
74
- list = []
75
- yield @value
76
- list.concat @subrees if @subtrees != nil
77
- loop do
78
- break if list.empty?
79
- node = list.shift
80
- yield node.value
81
- list.concat node.subtrees if node.subtrees != nil
82
- end
83
- end
84
- end
85
- end
@@ -1,51 +0,0 @@
1
- require 'rubygems'
2
- require 'savon'
3
-
4
- class TakipiWebService
5
- URL = 'http://nlp.pwr.wroc.pl/clarin/ws/takipi/'
6
- WSDL_URL = URL + 'takipi.wsdl'
7
-
8
- def self.request(text)
9
- client = Savon::Client.new WSDL_URL, :soap_endpoint => URL
10
-
11
- # Call remote service methods
12
- response = client.tag do |soap|
13
- soap.body = "<text>#{text}</text><format>TXT</format><useGuesser>true</useGuesser>"
14
- end
15
-
16
- response = response.to_hash
17
- token = response[:tag_response][:tag_response][:msg]
18
- status = (response[:tag_response][:tag_response][:status]).to_i
19
-
20
- #checking status
21
- timeout = 60
22
- step = 5
23
- count = 0
24
- loop do
25
- break if count > timeout
26
- if status == 1
27
- break
28
- elsif status == 2 or status == 3
29
- count += 5
30
- sleep(1)
31
- r = client.get_status do |soap|
32
- soap.body = "<token>#{token}</token>"
33
- end.to_hash
34
- status = (r[:get_status_response][:status]).to_i
35
-
36
- end
37
- end
38
-
39
- #geting result
40
-
41
- result = client.get_result do |soap|
42
- soap.body="<token>#{token}</token>"
43
- end
44
-
45
- response_document = result.to_hash[:get_result_response][:tag_response][:msg]
46
-
47
- #transforming response to well formed xml string
48
- return "<xml><chunkList>#{response_document}</chunkList></xml>"
49
- end
50
- end
51
-
@@ -1,26 +0,0 @@
1
- module NLP
2
- class Text
3
- attr_reader :sentences
4
-
5
- def initialize
6
- @sentences = []
7
- end
8
-
9
- def << sentence
10
- @sentences.push sentence
11
- end
12
-
13
-
14
- def words_per_sentence
15
- @sentences.collect{|s| s.words_number}.mean
16
- end
17
-
18
-
19
- def flatten
20
- flattened = []
21
- @sentences.each{ |s| s.tokens.each{|t| flattened.push t } }
22
- flattened
23
- end
24
-
25
- end
26
- end
@@ -1,37 +0,0 @@
1
- module NLP
2
- class Token
3
-
4
- attr_reader :orth
5
- attr_reader :tags
6
-
7
- def initialize(orth,tags)
8
- @orth = orth
9
- @tags = tags
10
- end
11
-
12
- def symbol?
13
- @tags.eql? "tsym"
14
- end
15
-
16
- def interp?
17
- @tags.eql? "interp"
18
- end
19
-
20
- def word?
21
- not interp? and not number?
22
- end
23
-
24
- def number?
25
- @tags.include?("tnum")
26
- end
27
-
28
- def integer?
29
- @tags.include?("tnum:integer")
30
- end
31
-
32
- def float?
33
- @tags.include?("tnum:frac")
34
- end
35
-
36
- end
37
- end
@@ -1,60 +0,0 @@
1
- module NLP
2
- class TokenScanner
3
-
4
- attr_reader :text, :tokens
5
-
6
- def initialize(text)
7
- @text = text
8
- @pos = 0
9
- @tokens = @text.flatten
10
- end
11
-
12
- def next(type)
13
- @pos+=1
14
-
15
- case type
16
- when :word
17
- while @pos < @tokens.size and !@tokens[@pos].word?
18
- @pos+= 1
19
- end
20
-
21
- when :interp
22
- while @pos < @tokens.size and !@tokens[@pos].interp?
23
- @pos+= 1
24
- end
25
-
26
- when :number
27
- while @pos < @tokens.size and !@tokens[@pos].number?
28
- @pos+= 1
29
- end
30
- when :alphanum
31
- while @pos < @tokens.size and !@tokens[@pos].number? and !@tokens[@pos].word?
32
- @pos+= 1
33
- end
34
- end
35
- end
36
-
37
-
38
- def current
39
- if @pos == @tokens.size
40
- nil
41
- else
42
- @tokens[@pos]
43
- end
44
- end
45
-
46
- def rewind
47
- @pos = 0
48
- end
49
-
50
- def index
51
- @pos
52
- end
53
-
54
- def end?
55
- @pos == tokens.size
56
- end
57
-
58
-
59
- end
60
- end
@@ -1,23 +0,0 @@
1
- require 'inflectable'
2
- require 'meaningable'
3
-
4
- module NLP
5
- class Word < Token
6
-
7
- include Inflectable
8
- include Meaningable
9
-
10
- attr_reader :lemat
11
- attr_accessor :category
12
-
13
- def initialize(word, lemat, tags)
14
- super(word,tags)
15
- @lemat = lemat
16
- end
17
-
18
- def inflection
19
- @tags
20
- end
21
-
22
- end
23
- end
@@ -1,25 +0,0 @@
1
- require '../lib/analyzer.rb'
2
-
3
-
4
- class AnalyzerTest < Test::Unit::TestCase
5
-
6
- include NLP
7
-
8
- def setup
9
- sample = "Ja byłam wtedy bardzo szczęśliwa"
10
- @text = Lemmatizer.lemmatize(sample,:takipi,:local)
11
- @scanner = TokenScanner.new(@text)
12
- @rid_analyzer = Analyzer.new(:rid)
13
- @liwc_analyzer = Analyzer.new(:liwc)
14
- end
15
-
16
- def test_analyze
17
- stats = @rid_analyzer.analyze(@scanner)
18
- assert_kind_of Statistic, stats
19
- assert_equal 5, stats.total_words
20
- assert_equal 1, stats.word_count
21
-
22
- end
23
-
24
- end
25
-