nlp 0.2.5 → 0.2.6

Sign up to get free protection for your applications and to get access to all the features.
data/lib/word.rb CHANGED
@@ -2,22 +2,22 @@ require 'inflectable'
2
2
  require 'meaningable'
3
3
 
4
4
  module NLP
5
- class Word < Token
6
- include Inflectable
7
- include Meaningable
5
+ class Word < Token
6
+
7
+ include Inflectable
8
+ include Meaningable
8
9
 
9
- attr_reader :lemat, :orth
10
- attr_accessor :category
11
-
12
- def initialize(word, lemat, tags)
13
- super(word,tags)
14
- @lemat = lemat
15
- end
16
-
17
- def inflection
18
- @tags
19
- end
10
+ attr_reader :lemat
11
+ attr_accessor :category
20
12
 
13
+ def initialize(word, lemat, tags)
14
+ super(word,tags)
15
+ @lemat = lemat
16
+ end
21
17
 
18
+ def inflection
19
+ @tags
22
20
  end
21
+
22
+ end
23
23
  end
@@ -0,0 +1,25 @@
1
+ require '../lib/analyzer.rb'
2
+
3
+
4
+ class AnalyzerTest < Test::Unit::TestCase
5
+
6
+ include NLP
7
+
8
+ def setup
9
+ sample = "Ja byłam wtedy bardzo szczęśliwa"
10
+ @text = Lemmatizer.lemmatize(sample,:takipi,:local)
11
+ @scanner = TokenScanner.new(@text)
12
+ @rid_analyzer = Analyzer.new(:rid)
13
+ @liwc_analyzer = Analyzer.new(:liwc)
14
+ end
15
+
16
+ def test_analyze
17
+ stats = @rid_analyzer.analyze(@scanner)
18
+ assert_kind_of Statistic, stats
19
+ assert_equal 5, stats.total_words
20
+ assert_equal 1, stats.word_count
21
+
22
+ end
23
+
24
+ end
25
+
@@ -0,0 +1,73 @@
1
+ require 'helper'
2
+ require '../lib/lemmatizer.rb'
3
+
4
+ class LemmatizerTest < Test::Unit::TestCase
5
+
6
+ include NLP
7
+
8
+ def setup
9
+ @sample = "Złe czasy już minęły."
10
+
11
+
12
+ @zle_word = Word.new('złe','zły','adj:pl:nom:m3:pos')
13
+ @czasy_word = Word.new('czasy','czas','subst:pl:nom:m3')
14
+ @minely_word = Word.new('minęły','minąć','praet:pl:m3:perf')
15
+ @juz_word = Word.new('już','już','qub')
16
+ @period = Token.new('.','interp')
17
+
18
+ end
19
+
20
+
21
+
22
+ def test_takipi_remote_lemmatizer
23
+ text = Lemmatizer.lemmatize(@sample,:takipi,:remote)
24
+ test_takipi_lemmatizer(text)
25
+ end
26
+
27
+
28
+ def test_takipi_local_lemmatizer
29
+ text = Lemmatizer.lemmatize(@sample,:takipi,:local)
30
+ test_takipi_lemmatizer(text)
31
+ end
32
+
33
+
34
+ def test_morfeusz_leamtizer
35
+ text = Lemmatizer.lemmatize(@sample)
36
+ assert_equal Text, text.class
37
+ assert_equal 1, text.sentences.size
38
+ assert_equal 4, text.sentences[0].words_number
39
+
40
+ tokens = text.sentences[0].tokens
41
+ zle,czasy,juz,minely,period = *tokens
42
+ assert_equal 'zły', zle.lemat
43
+ assert_equal 'czas', czasy.lemat
44
+ assert_equal 'już', juz.lemat
45
+ assert_equal 'minąć', minely.lemat
46
+
47
+ end
48
+
49
+ private
50
+ def test_takipi_lemmatizer(text)
51
+
52
+ assert_equal Text, text.class
53
+ assert_equal 1, text.sentences.size
54
+ assert_equal 4, text.sentences[0].words_number
55
+
56
+ tokens = text.sentences[0].tokens
57
+ zle, czasy, juz, minely, period = *tokens
58
+ assert_equal @zle_word.inflection, zle.inflection
59
+ assert_equal @czasy_word.inflection, czasy.inflection
60
+ assert_equal @juz_word.inflection, juz.inflection
61
+ assert_equal @minely_word.inflection, minely.inflection
62
+ assert_equal @period.tags, period.tags
63
+
64
+ assert_equal 'zły', zle.lemat
65
+ assert_equal 'czas', czasy.lemat
66
+ assert_equal 'już', juz.lemat
67
+ assert_equal 'minąć', minely.lemat
68
+
69
+ end
70
+
71
+
72
+
73
+ end
@@ -0,0 +1,28 @@
1
+ require 'helper'
2
+ require '../lib/word.rb'
3
+
4
+ class MeaningableTest < Test::Unit::TestCase
5
+ include NLP
6
+
7
+ def setup
8
+
9
+ @word_kochamy = Word.new('kochamy','kochać', 'fin:pl:pri:imperf')
10
+ psych_cat = LIWCCategory.new('PROCESY_PSYCHOLOGICZNE')
11
+ emotion_cat = LIWCCategory.new('EMOCJE',psych_cat)
12
+ pos_emotion_cat = LIWCCategory.new('POZYTYWNE_EMOCJE',emotion_cat)
13
+ @word_kochamy.category = pos_emotion_cat
14
+
15
+ end
16
+
17
+ def test_category_recognition
18
+
19
+ assert @word_kochamy.psychological?
20
+ assert @word_kochamy.positive_emotion?
21
+ assert @word_kochamy.emotion?
22
+
23
+ assert !@word_kochamy.negative_emotion?
24
+ end
25
+
26
+
27
+ end
28
+
@@ -0,0 +1,11 @@
1
+ require 'test/unit'
2
+ require 'helper'
3
+ require "lemmatizer_test"
4
+ require "word_test"
5
+ require "token_test"
6
+ require "text_test"
7
+ require "sentence_test"
8
+ require "token_scanner_test"
9
+ require "meaningable_test"
10
+
11
+
@@ -0,0 +1,26 @@
1
+ require 'helper'
2
+ require '../lib/token.rb'
3
+
4
+ class SentenceTest < Test::Unit::TestCase
5
+
6
+ include NLP
7
+
8
+ def setup
9
+ @sentence = Sentence.new
10
+ @comma = Token.new(',','interp')
11
+ @integer = Token.new('32','tnum:integer')
12
+ @float = Token.new('3,12','tnum:frac')
13
+ @symbol = Token.new('nie_istniejace_slowo','tsym')
14
+ end
15
+
16
+
17
+ def test_sentence_size
18
+ assert_equal 0, @sentence.words_number
19
+ @sentence << @symbol
20
+ assert_equal 1, @sentence.words_number
21
+ @sentence << @integer << @comma << @float
22
+ assert_equal 3, @sentence.words_number
23
+ end
24
+
25
+
26
+ end
data/test/text_test.rb ADDED
@@ -0,0 +1,29 @@
1
+ require 'helper'
2
+ require '../lib/token.rb'
3
+
4
+ class TextTest < Test::Unit::TestCase
5
+
6
+ include NLP
7
+
8
+ def setup
9
+ @s1 = Sentence.new
10
+ @s2 = Sentence.new
11
+ @comma = Token.new(',','interp')
12
+ @integer = Token.new('32','tnum:integer')
13
+ @float = Token.new('3,12','tnum:frac')
14
+ @symbol = Token.new('nie_istniejace_slowo','tsym')
15
+ @s1 << @integer << @comma << @symbol
16
+ @s2 << @integer << @symbol
17
+ @text = Text.new
18
+
19
+ end
20
+
21
+
22
+ def test_text
23
+ @text << @s1
24
+ @text << @s2
25
+ assert_equal 2, @text.words_per_sentence
26
+ end
27
+
28
+
29
+ end
@@ -0,0 +1,28 @@
1
+ require 'helper'
2
+ require '../lib/token_scanner.rb'
3
+
4
+ class TokenScannerTest < Test::Unit::TestCase
5
+
6
+ include NLP
7
+
8
+ def setup
9
+ sentence = "To, jest zdanie."
10
+ @text = Lemmatizer.lemmatize(sentence,:takipi,:local)
11
+ @scanner = TokenScanner.new(@text)
12
+
13
+ end
14
+
15
+ def test_scanner
16
+ assert_equal "To", @scanner.current.orth
17
+ @scanner.next(:word)
18
+ assert_equal "jest", @scanner.current.orth
19
+ @scanner.next(:interp)
20
+ assert_equal ".", @scanner.current.orth
21
+ @scanner.next(:word)
22
+ assert @scanner.end?
23
+ @scanner.rewind
24
+ assert_equal 0, @scanner.index
25
+ end
26
+
27
+ end
28
+
@@ -0,0 +1,37 @@
1
+ require 'helper'
2
+ require '../lib/token.rb'
3
+
4
+ class TokenTest < Test::Unit::TestCase
5
+
6
+ include NLP
7
+
8
+ def setup
9
+ @comma = Token.new(',','interp')
10
+ @integer = Token.new('32','tnum:integer')
11
+ @float = Token.new('3,12','tnum:frac')
12
+ @symbol = Token.new('nie_istniejace_slowo','tsym')
13
+ end
14
+
15
+
16
+ def test_recognizing_interpunction
17
+ assert @comma.interp?
18
+ assert !@comma.word?
19
+ end
20
+
21
+ def test_recognizing_numbers
22
+ assert @integer.integer?
23
+ assert !@integer.word?
24
+
25
+ assert @float.float?
26
+ assert @float.number?
27
+
28
+ assert !@float.word?
29
+ assert !@float.integer?
30
+ end
31
+
32
+ def test_symbol
33
+ assert @symbol.symbol?
34
+ end
35
+
36
+
37
+ end
data/test/word_test.rb CHANGED
@@ -1,42 +1,45 @@
1
1
  require 'helper'
2
2
  require '../lib/word.rb'
3
+
3
4
  class WordTest < Test::Unit::TestCase
4
- def setup
5
- @word_kota = NLP::Word.new('kota','kot','subst:sg:gen.acc:m2')
6
- @word_siebie = NLP::Word.new('siebie','się','siebie:gen.acc')
7
- end
8
-
9
- def test_word_lematization
10
- assert_equal 'kot', @word_kota.lemat
11
- assert_equal 'się', @word_siebie.lemat
12
- end
13
-
14
- def test_word_orth
15
- assert_equal 'kota', @word_kota.orth
16
- assert_equal 'siebie', @word_siebie.orth
17
- end
18
-
19
- def test_recognizing_part_of_speech
20
- assert @word_kota.rzeczownik?
21
- assert @word_siebie.zaimek?
22
- end
23
-
24
- def test_recognizing_inflection
25
- assert @word_kota.liczba_pojedyncza?
26
- assert @word_kota.dopelniacz?
27
- assert @word_kota.biernik?
28
- assert @word_kota.meski_zwierzecy?
29
-
30
- assert_equal false, @word_kota.liczba_mnoga?
31
- assert_equal false, @word_kota.mianownik?
32
-
33
- assert @word_siebie.biernik?
34
- assert @word_siebie.dopelniacz?
35
- end
36
-
37
- def test_inflection_string
38
- assert_equal @word_kota.inflection, 'subst:sg:gen.acc:m2'
39
- end
5
+ include NLP
6
+
7
+ def setup
8
+ @word_kota = Word.new('kota','kot','subst:sg:gen.acc:m2')
9
+ @word_siebie = Word.new('siebie','się','siebie:gen.acc')
10
+ end
11
+
12
+ def test_word_lematization
13
+ assert_equal 'kot', @word_kota.lemat
14
+ assert_equal 'się', @word_siebie.lemat
15
+ end
16
+
17
+ def test_word_orth
18
+ assert_equal 'kota', @word_kota.orth
19
+ assert_equal 'siebie', @word_siebie.orth
20
+ end
21
+
22
+ def test_recognizing_part_of_speech
23
+ assert @word_kota.rzeczownik?
24
+ assert @word_siebie.zaimek?
25
+ end
26
+
27
+ def test_recognizing_inflection
28
+ assert @word_kota.liczba_pojedyncza?
29
+ assert @word_kota.dopelniacz?
30
+ assert @word_kota.biernik?
31
+ assert @word_kota.meski_zwierzecy?
32
+
33
+ assert !@word_kota.liczba_mnoga?
34
+ assert !@word_kota.mianownik?
35
+
36
+ assert @word_siebie.biernik?
37
+ assert @word_siebie.dopelniacz?
38
+ end
39
+
40
+ def test_inflection_string
41
+ assert_equal @word_kota.inflection, 'subst:sg:gen.acc:m2'
42
+ end
40
43
 
41
44
 
42
45
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nlp
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
4
+ hash: 27
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 2
9
- - 5
10
- version: 0.2.5
9
+ - 6
10
+ version: 0.2.6
11
11
  platform: ruby
12
12
  authors:
13
13
  - knife
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-09-22 00:00:00 +02:00
18
+ date: 2011-06-21 00:00:00 +02:00
19
19
  default_executable:
20
20
  dependencies: []
21
21
 
@@ -45,10 +45,10 @@ files:
45
45
  - lib/rid_analyzer.rb
46
46
  - lib/rid_category.rb
47
47
  - lib/sentence.rb
48
+ - lib/statistic.rb
48
49
  - lib/stdlib/ext/array.rb
49
50
  - lib/stdlib/ext/string.rb
50
51
  - lib/stree.rb
51
- - lib/takipi_web_service
52
52
  - lib/takipi_web_service.rb
53
53
  - lib/text.rb
54
54
  - lib/token.rb
@@ -56,9 +56,17 @@ files:
56
56
  - lib/word.rb
57
57
  - LICENSE
58
58
  - README.rdoc
59
+ - test/sentence_test.rb
60
+ - test/analyzer_test.rb
61
+ - test/meaningable_test.rb
62
+ - test/token_scanner_test.rb
59
63
  - test/helper.rb
64
+ - test/nlp_test_suite.rb
60
65
  - test/test_nlp.rb
61
66
  - test/word_test.rb
67
+ - test/lemmatizer_test.rb
68
+ - test/token_test.rb
69
+ - test/text_test.rb
62
70
  has_rdoc: true
63
71
  homepage: http://github.com/knife/nlp
64
72
  licenses: []
@@ -94,6 +102,14 @@ signing_key:
94
102
  specification_version: 3
95
103
  summary: Linguistics tools for processing polish language.
96
104
  test_files:
105
+ - test/sentence_test.rb
106
+ - test/analyzer_test.rb
107
+ - test/meaningable_test.rb
108
+ - test/token_scanner_test.rb
97
109
  - test/helper.rb
110
+ - test/nlp_test_suite.rb
98
111
  - test/test_nlp.rb
99
112
  - test/word_test.rb
113
+ - test/lemmatizer_test.rb
114
+ - test/token_test.rb
115
+ - test/text_test.rb