nlp 0.2.6 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +0,0 @@
1
- module NLP
2
-
3
- class RIDAnalyzer < Analyzer
4
-
5
- def initialize
6
- @dictionary = Dictionary.new(:rid)
7
- end
8
-
9
- end
10
- end
@@ -1,17 +0,0 @@
1
- module NLP
2
- class RIDCategory < Category
3
-
4
- def primary?
5
- root == :PIERWOTNE
6
- end
7
-
8
- def secondary?
9
- root == :WTORNE
10
- end
11
-
12
- def emotions?
13
- root == :EMOCJE
14
- end
15
-
16
- end
17
- end
@@ -1,24 +0,0 @@
1
- module NLP
2
- class Sentence
3
-
4
- attr_reader :tokens
5
-
6
- def initialize()
7
- @tokens = []
8
- end
9
-
10
- def << tokens
11
- if tokens.is_a? Array
12
- @tokens.concat tokens
13
- else
14
- @tokens << tokens
15
- end
16
- self
17
- end
18
-
19
- def words_number
20
- @tokens.count{|t| !t.interp?}
21
- end
22
-
23
- end
24
- end
@@ -1,55 +0,0 @@
1
- class Statistic
2
-
3
- attr_accessor :total_words, :hash
4
- attr_reader :cwords, :words, :total_words, :word_count
5
-
6
- def initialize
7
- @word_count = 0
8
- @total_words = 0
9
- @scores = Hash.new { 0 }
10
- @words = []
11
- @cwords = Hash.new {nil}
12
- @hash
13
- end
14
-
15
- def add(word,category)
16
-
17
- @scores[category] += 1
18
- @word_count += 1
19
- @words.push word
20
-
21
- category = category.name
22
- if @cwords[category].nil?
23
- @cwords[category] = []
24
- end
25
- @cwords[category].push word
26
-
27
- end
28
-
29
- def []=(key,value)
30
- @hash[key] = value
31
- end
32
-
33
- def [](key)
34
- @hash[key]
35
- end
36
-
37
- def category_participation(categories)
38
- sorted_scores = @scores.to_a.sort_by { |result| -result[1] }
39
- r = {}
40
- categories.each do |cat|
41
- r[cat] = percentage_distribution(sorted_scores){|c| c.send(cat.to_s+'?')}
42
- end
43
- r
44
- end
45
-
46
- private
47
-
48
- def percentage_distribution scores, &block
49
- sum = scores.select{|result| yield result[0]}.inject(0){|count,result| count + result[1]}
50
- Float(sum)/@word_count
51
- end
52
-
53
- end
54
-
55
-
@@ -1,19 +0,0 @@
1
- class String
2
- alias old_memeber []
3
-
4
- def ordinary (index)
5
- self.old_memeber index
6
- end
7
-
8
- def get(index)
9
- self.scan(/./)[index]
10
- end
11
-
12
- def set(index,value)
13
- arr = self.scan(/./)
14
- arr[index] = value
15
- self.replace(arr.join)
16
- value
17
- end
18
- end
19
-
@@ -1,85 +0,0 @@
1
-
2
- module NLP
3
- class SearchTree
4
- ALPHABET = %w{* - a ą b c ć d e ę f g h i j k l ł m n ń o ó p r s ś t u w y z ź ż}
5
- SYMBOLS = %w{* - : - / ) (}
6
- attr_accessor :value
7
- attr_accessor :subtrees
8
-
9
- # 0 -> *
10
- # 1 -> -
11
- # 2 -> a
12
- # 33 -> ź
13
- def initialize
14
- @subtrees = Array.new(34, nil)
15
- @value = []
16
- end
17
-
18
- def insert(s, value)
19
- priv_insert(s.scan(/./), value)
20
- end
21
-
22
- def find(s)
23
- priv_find(s.scan(/./))
24
- end
25
-
26
-
27
- protected
28
- def key( chr )
29
- unless chr
30
- raise ArgumentError, "Argument chr is nil"
31
- end
32
- rval = ALPHABET.index(chr) || -1
33
- if rval > 35
34
- rval = -1 # invalid character
35
- end
36
-
37
- rval
38
- end
39
-
40
- def priv_insert( s, value )
41
- if s.empty?
42
- @value.push value
43
- else
44
- index = key( s.first )
45
- subtree = if @subtrees[index] == nil
46
- @subtrees[index] = SearchTree.new
47
- else
48
- @subtrees[index]
49
- end
50
-
51
- subtree.priv_insert( s.tail, value )
52
- end
53
- end
54
-
55
- def priv_find( search )
56
- if @subtrees[0]
57
- @subtrees[0].value
58
- else
59
- if search.empty?
60
- value
61
- else
62
- index = key( search.first )
63
- if @subtrees[index]
64
- @subtrees[index].priv_find( search.tail )
65
- else
66
- nil
67
- end
68
- end
69
- end
70
- end
71
-
72
- public
73
- def traverse()
74
- list = []
75
- yield @value
76
- list.concat @subrees if @subtrees != nil
77
- loop do
78
- break if list.empty?
79
- node = list.shift
80
- yield node.value
81
- list.concat node.subtrees if node.subtrees != nil
82
- end
83
- end
84
- end
85
- end
@@ -1,51 +0,0 @@
1
- require 'rubygems'
2
- require 'savon'
3
-
4
- class TakipiWebService
5
- URL = 'http://nlp.pwr.wroc.pl/clarin/ws/takipi/'
6
- WSDL_URL = URL + 'takipi.wsdl'
7
-
8
- def self.request(text)
9
- client = Savon::Client.new WSDL_URL, :soap_endpoint => URL
10
-
11
- # Call remote service methods
12
- response = client.tag do |soap|
13
- soap.body = "<text>#{text}</text><format>TXT</format><useGuesser>true</useGuesser>"
14
- end
15
-
16
- response = response.to_hash
17
- token = response[:tag_response][:tag_response][:msg]
18
- status = (response[:tag_response][:tag_response][:status]).to_i
19
-
20
- #checking status
21
- timeout = 60
22
- step = 5
23
- count = 0
24
- loop do
25
- break if count > timeout
26
- if status == 1
27
- break
28
- elsif status == 2 or status == 3
29
- count += 5
30
- sleep(1)
31
- r = client.get_status do |soap|
32
- soap.body = "<token>#{token}</token>"
33
- end.to_hash
34
- status = (r[:get_status_response][:status]).to_i
35
-
36
- end
37
- end
38
-
39
- #geting result
40
-
41
- result = client.get_result do |soap|
42
- soap.body="<token>#{token}</token>"
43
- end
44
-
45
- response_document = result.to_hash[:get_result_response][:tag_response][:msg]
46
-
47
- #transforming response to well formed xml string
48
- return "<xml><chunkList>#{response_document}</chunkList></xml>"
49
- end
50
- end
51
-
@@ -1,26 +0,0 @@
1
- module NLP
2
- class Text
3
- attr_reader :sentences
4
-
5
- def initialize
6
- @sentences = []
7
- end
8
-
9
- def << sentence
10
- @sentences.push sentence
11
- end
12
-
13
-
14
- def words_per_sentence
15
- @sentences.collect{|s| s.words_number}.mean
16
- end
17
-
18
-
19
- def flatten
20
- flattened = []
21
- @sentences.each{ |s| s.tokens.each{|t| flattened.push t } }
22
- flattened
23
- end
24
-
25
- end
26
- end
@@ -1,37 +0,0 @@
1
- module NLP
2
- class Token
3
-
4
- attr_reader :orth
5
- attr_reader :tags
6
-
7
- def initialize(orth,tags)
8
- @orth = orth
9
- @tags = tags
10
- end
11
-
12
- def symbol?
13
- @tags.eql? "tsym"
14
- end
15
-
16
- def interp?
17
- @tags.eql? "interp"
18
- end
19
-
20
- def word?
21
- not interp? and not number?
22
- end
23
-
24
- def number?
25
- @tags.include?("tnum")
26
- end
27
-
28
- def integer?
29
- @tags.include?("tnum:integer")
30
- end
31
-
32
- def float?
33
- @tags.include?("tnum:frac")
34
- end
35
-
36
- end
37
- end
@@ -1,60 +0,0 @@
1
- module NLP
2
- class TokenScanner
3
-
4
- attr_reader :text, :tokens
5
-
6
- def initialize(text)
7
- @text = text
8
- @pos = 0
9
- @tokens = @text.flatten
10
- end
11
-
12
- def next(type)
13
- @pos+=1
14
-
15
- case type
16
- when :word
17
- while @pos < @tokens.size and !@tokens[@pos].word?
18
- @pos+= 1
19
- end
20
-
21
- when :interp
22
- while @pos < @tokens.size and !@tokens[@pos].interp?
23
- @pos+= 1
24
- end
25
-
26
- when :number
27
- while @pos < @tokens.size and !@tokens[@pos].number?
28
- @pos+= 1
29
- end
30
- when :alphanum
31
- while @pos < @tokens.size and !@tokens[@pos].number? and !@tokens[@pos].word?
32
- @pos+= 1
33
- end
34
- end
35
- end
36
-
37
-
38
- def current
39
- if @pos == @tokens.size
40
- nil
41
- else
42
- @tokens[@pos]
43
- end
44
- end
45
-
46
- def rewind
47
- @pos = 0
48
- end
49
-
50
- def index
51
- @pos
52
- end
53
-
54
- def end?
55
- @pos == tokens.size
56
- end
57
-
58
-
59
- end
60
- end
@@ -1,23 +0,0 @@
1
- require 'inflectable'
2
- require 'meaningable'
3
-
4
- module NLP
5
- class Word < Token
6
-
7
- include Inflectable
8
- include Meaningable
9
-
10
- attr_reader :lemat
11
- attr_accessor :category
12
-
13
- def initialize(word, lemat, tags)
14
- super(word,tags)
15
- @lemat = lemat
16
- end
17
-
18
- def inflection
19
- @tags
20
- end
21
-
22
- end
23
- end
@@ -1,25 +0,0 @@
1
- require '../lib/analyzer.rb'
2
-
3
-
4
- class AnalyzerTest < Test::Unit::TestCase
5
-
6
- include NLP
7
-
8
- def setup
9
- sample = "Ja byłam wtedy bardzo szczęśliwa"
10
- @text = Lemmatizer.lemmatize(sample,:takipi,:local)
11
- @scanner = TokenScanner.new(@text)
12
- @rid_analyzer = Analyzer.new(:rid)
13
- @liwc_analyzer = Analyzer.new(:liwc)
14
- end
15
-
16
- def test_analyze
17
- stats = @rid_analyzer.analyze(@scanner)
18
- assert_kind_of Statistic, stats
19
- assert_equal 5, stats.total_words
20
- assert_equal 1, stats.word_count
21
-
22
- end
23
-
24
- end
25
-