nlp 0.2.5 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/word.rb CHANGED
@@ -2,22 +2,22 @@ require 'inflectable'
2
2
  require 'meaningable'
3
3
 
4
4
  module NLP
5
- class Word < Token
6
- include Inflectable
7
- include Meaningable
5
+ class Word < Token
6
+
7
+ include Inflectable
8
+ include Meaningable
8
9
 
9
- attr_reader :lemat, :orth
10
- attr_accessor :category
11
-
12
- def initialize(word, lemat, tags)
13
- super(word,tags)
14
- @lemat = lemat
15
- end
16
-
17
- def inflection
18
- @tags
19
- end
10
+ attr_reader :lemat
11
+ attr_accessor :category
20
12
 
13
+ def initialize(word, lemat, tags)
14
+ super(word,tags)
15
+ @lemat = lemat
16
+ end
21
17
 
18
+ def inflection
19
+ @tags
22
20
  end
21
+
22
+ end
23
23
  end
@@ -0,0 +1,25 @@
1
+ require '../lib/analyzer.rb'
2
+
3
+
4
+ class AnalyzerTest < Test::Unit::TestCase
5
+
6
+ include NLP
7
+
8
+ def setup
9
+ sample = "Ja byłam wtedy bardzo szczęśliwa"
10
+ @text = Lemmatizer.lemmatize(sample,:takipi,:local)
11
+ @scanner = TokenScanner.new(@text)
12
+ @rid_analyzer = Analyzer.new(:rid)
13
+ @liwc_analyzer = Analyzer.new(:liwc)
14
+ end
15
+
16
+ def test_analyze
17
+ stats = @rid_analyzer.analyze(@scanner)
18
+ assert_kind_of Statistic, stats
19
+ assert_equal 5, stats.total_words
20
+ assert_equal 1, stats.word_count
21
+
22
+ end
23
+
24
+ end
25
+
@@ -0,0 +1,73 @@
1
+ require 'helper'
2
+ require '../lib/lemmatizer.rb'
3
+
4
+ class LemmatizerTest < Test::Unit::TestCase
5
+
6
+ include NLP
7
+
8
+ def setup
9
+ @sample = "Złe czasy już minęły."
10
+
11
+
12
+ @zle_word = Word.new('złe','zły','adj:pl:nom:m3:pos')
13
+ @czasy_word = Word.new('czasy','czas','subst:pl:nom:m3')
14
+ @minely_word = Word.new('minęły','minąć','praet:pl:m3:perf')
15
+ @juz_word = Word.new('już','już','qub')
16
+ @period = Token.new('.','interp')
17
+
18
+ end
19
+
20
+
21
+
22
+ def test_takipi_remote_lemmatizer
23
+ text = Lemmatizer.lemmatize(@sample,:takipi,:remote)
24
+ test_takipi_lemmatizer(text)
25
+ end
26
+
27
+
28
+ def test_takipi_local_lemmatizer
29
+ text = Lemmatizer.lemmatize(@sample,:takipi,:local)
30
+ test_takipi_lemmatizer(text)
31
+ end
32
+
33
+
34
+ def test_morfeusz_leamtizer
35
+ text = Lemmatizer.lemmatize(@sample)
36
+ assert_equal Text, text.class
37
+ assert_equal 1, text.sentences.size
38
+ assert_equal 4, text.sentences[0].words_number
39
+
40
+ tokens = text.sentences[0].tokens
41
+ zle,czasy,juz,minely,period = *tokens
42
+ assert_equal 'zły', zle.lemat
43
+ assert_equal 'czas', czasy.lemat
44
+ assert_equal 'już', juz.lemat
45
+ assert_equal 'minąć', minely.lemat
46
+
47
+ end
48
+
49
+ private
50
+ def test_takipi_lemmatizer(text)
51
+
52
+ assert_equal Text, text.class
53
+ assert_equal 1, text.sentences.size
54
+ assert_equal 4, text.sentences[0].words_number
55
+
56
+ tokens = text.sentences[0].tokens
57
+ zle, czasy, juz, minely, period = *tokens
58
+ assert_equal @zle_word.inflection, zle.inflection
59
+ assert_equal @czasy_word.inflection, czasy.inflection
60
+ assert_equal @juz_word.inflection, juz.inflection
61
+ assert_equal @minely_word.inflection, minely.inflection
62
+ assert_equal @period.tags, period.tags
63
+
64
+ assert_equal 'zły', zle.lemat
65
+ assert_equal 'czas', czasy.lemat
66
+ assert_equal 'już', juz.lemat
67
+ assert_equal 'minąć', minely.lemat
68
+
69
+ end
70
+
71
+
72
+
73
+ end
@@ -0,0 +1,28 @@
1
+ require 'helper'
2
+ require '../lib/word.rb'
3
+
4
+ class MeaningableTest < Test::Unit::TestCase
5
+ include NLP
6
+
7
+ def setup
8
+
9
+ @word_kochamy = Word.new('kochamy','kochać', 'fin:pl:pri:imperf')
10
+ psych_cat = LIWCCategory.new('PROCESY_PSYCHOLOGICZNE')
11
+ emotion_cat = LIWCCategory.new('EMOCJE',psych_cat)
12
+ pos_emotion_cat = LIWCCategory.new('POZYTYWNE_EMOCJE',emotion_cat)
13
+ @word_kochamy.category = pos_emotion_cat
14
+
15
+ end
16
+
17
+ def test_category_recognition
18
+
19
+ assert @word_kochamy.psychological?
20
+ assert @word_kochamy.positive_emotion?
21
+ assert @word_kochamy.emotion?
22
+
23
+ assert !@word_kochamy.negative_emotion?
24
+ end
25
+
26
+
27
+ end
28
+
@@ -0,0 +1,11 @@
1
+ require 'test/unit'
2
+ require 'helper'
3
+ require "lemmatizer_test"
4
+ require "word_test"
5
+ require "token_test"
6
+ require "text_test"
7
+ require "sentence_test"
8
+ require "token_scanner_test"
9
+ require "meaningable_test"
10
+
11
+
@@ -0,0 +1,26 @@
1
+ require 'helper'
2
+ require '../lib/token.rb'
3
+
4
+ class SentenceTest < Test::Unit::TestCase
5
+
6
+ include NLP
7
+
8
+ def setup
9
+ @sentence = Sentence.new
10
+ @comma = Token.new(',','interp')
11
+ @integer = Token.new('32','tnum:integer')
12
+ @float = Token.new('3,12','tnum:frac')
13
+ @symbol = Token.new('nie_istniejace_slowo','tsym')
14
+ end
15
+
16
+
17
+ def test_sentence_size
18
+ assert_equal 0, @sentence.words_number
19
+ @sentence << @symbol
20
+ assert_equal 1, @sentence.words_number
21
+ @sentence << @integer << @comma << @float
22
+ assert_equal 3, @sentence.words_number
23
+ end
24
+
25
+
26
+ end
data/test/text_test.rb ADDED
@@ -0,0 +1,29 @@
1
+ require 'helper'
2
+ require '../lib/token.rb'
3
+
4
+ class TextTest < Test::Unit::TestCase
5
+
6
+ include NLP
7
+
8
+ def setup
9
+ @s1 = Sentence.new
10
+ @s2 = Sentence.new
11
+ @comma = Token.new(',','interp')
12
+ @integer = Token.new('32','tnum:integer')
13
+ @float = Token.new('3,12','tnum:frac')
14
+ @symbol = Token.new('nie_istniejace_slowo','tsym')
15
+ @s1 << @integer << @comma << @symbol
16
+ @s2 << @integer << @symbol
17
+ @text = Text.new
18
+
19
+ end
20
+
21
+
22
+ def test_text
23
+ @text << @s1
24
+ @text << @s2
25
+ assert_equal 2, @text.words_per_sentence
26
+ end
27
+
28
+
29
+ end
@@ -0,0 +1,28 @@
1
+ require 'helper'
2
+ require '../lib/token_scanner.rb'
3
+
4
+ class TokenScannerTest < Test::Unit::TestCase
5
+
6
+ include NLP
7
+
8
+ def setup
9
+ sentence = "To, jest zdanie."
10
+ @text = Lemmatizer.lemmatize(sentence,:takipi,:local)
11
+ @scanner = TokenScanner.new(@text)
12
+
13
+ end
14
+
15
+ def test_scanner
16
+ assert_equal "To", @scanner.current.orth
17
+ @scanner.next(:word)
18
+ assert_equal "jest", @scanner.current.orth
19
+ @scanner.next(:interp)
20
+ assert_equal ".", @scanner.current.orth
21
+ @scanner.next(:word)
22
+ assert @scanner.end?
23
+ @scanner.rewind
24
+ assert_equal 0, @scanner.index
25
+ end
26
+
27
+ end
28
+
@@ -0,0 +1,37 @@
1
+ require 'helper'
2
+ require '../lib/token.rb'
3
+
4
+ class TokenTest < Test::Unit::TestCase
5
+
6
+ include NLP
7
+
8
+ def setup
9
+ @comma = Token.new(',','interp')
10
+ @integer = Token.new('32','tnum:integer')
11
+ @float = Token.new('3,12','tnum:frac')
12
+ @symbol = Token.new('nie_istniejace_slowo','tsym')
13
+ end
14
+
15
+
16
+ def test_recognizing_interpunction
17
+ assert @comma.interp?
18
+ assert !@comma.word?
19
+ end
20
+
21
+ def test_recognizing_numbers
22
+ assert @integer.integer?
23
+ assert !@integer.word?
24
+
25
+ assert @float.float?
26
+ assert @float.number?
27
+
28
+ assert !@float.word?
29
+ assert !@float.integer?
30
+ end
31
+
32
+ def test_symbol
33
+ assert @symbol.symbol?
34
+ end
35
+
36
+
37
+ end
data/test/word_test.rb CHANGED
@@ -1,42 +1,45 @@
1
1
  require 'helper'
2
2
  require '../lib/word.rb'
3
+
3
4
  class WordTest < Test::Unit::TestCase
4
- def setup
5
- @word_kota = NLP::Word.new('kota','kot','subst:sg:gen.acc:m2')
6
- @word_siebie = NLP::Word.new('siebie','się','siebie:gen.acc')
7
- end
8
-
9
- def test_word_lematization
10
- assert_equal 'kot', @word_kota.lemat
11
- assert_equal 'się', @word_siebie.lemat
12
- end
13
-
14
- def test_word_orth
15
- assert_equal 'kota', @word_kota.orth
16
- assert_equal 'siebie', @word_siebie.orth
17
- end
18
-
19
- def test_recognizing_part_of_speech
20
- assert @word_kota.rzeczownik?
21
- assert @word_siebie.zaimek?
22
- end
23
-
24
- def test_recognizing_inflection
25
- assert @word_kota.liczba_pojedyncza?
26
- assert @word_kota.dopelniacz?
27
- assert @word_kota.biernik?
28
- assert @word_kota.meski_zwierzecy?
29
-
30
- assert_equal false, @word_kota.liczba_mnoga?
31
- assert_equal false, @word_kota.mianownik?
32
-
33
- assert @word_siebie.biernik?
34
- assert @word_siebie.dopelniacz?
35
- end
36
-
37
- def test_inflection_string
38
- assert_equal @word_kota.inflection, 'subst:sg:gen.acc:m2'
39
- end
5
+ include NLP
6
+
7
+ def setup
8
+ @word_kota = Word.new('kota','kot','subst:sg:gen.acc:m2')
9
+ @word_siebie = Word.new('siebie','się','siebie:gen.acc')
10
+ end
11
+
12
+ def test_word_lematization
13
+ assert_equal 'kot', @word_kota.lemat
14
+ assert_equal 'się', @word_siebie.lemat
15
+ end
16
+
17
+ def test_word_orth
18
+ assert_equal 'kota', @word_kota.orth
19
+ assert_equal 'siebie', @word_siebie.orth
20
+ end
21
+
22
+ def test_recognizing_part_of_speech
23
+ assert @word_kota.rzeczownik?
24
+ assert @word_siebie.zaimek?
25
+ end
26
+
27
+ def test_recognizing_inflection
28
+ assert @word_kota.liczba_pojedyncza?
29
+ assert @word_kota.dopelniacz?
30
+ assert @word_kota.biernik?
31
+ assert @word_kota.meski_zwierzecy?
32
+
33
+ assert !@word_kota.liczba_mnoga?
34
+ assert !@word_kota.mianownik?
35
+
36
+ assert @word_siebie.biernik?
37
+ assert @word_siebie.dopelniacz?
38
+ end
39
+
40
+ def test_inflection_string
41
+ assert_equal @word_kota.inflection, 'subst:sg:gen.acc:m2'
42
+ end
40
43
 
41
44
 
42
45
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nlp
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
4
+ hash: 27
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 2
9
- - 5
10
- version: 0.2.5
9
+ - 6
10
+ version: 0.2.6
11
11
  platform: ruby
12
12
  authors:
13
13
  - knife
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-09-22 00:00:00 +02:00
18
+ date: 2011-06-21 00:00:00 +02:00
19
19
  default_executable:
20
20
  dependencies: []
21
21
 
@@ -45,10 +45,10 @@ files:
45
45
  - lib/rid_analyzer.rb
46
46
  - lib/rid_category.rb
47
47
  - lib/sentence.rb
48
+ - lib/statistic.rb
48
49
  - lib/stdlib/ext/array.rb
49
50
  - lib/stdlib/ext/string.rb
50
51
  - lib/stree.rb
51
- - lib/takipi_web_service
52
52
  - lib/takipi_web_service.rb
53
53
  - lib/text.rb
54
54
  - lib/token.rb
@@ -56,9 +56,17 @@ files:
56
56
  - lib/word.rb
57
57
  - LICENSE
58
58
  - README.rdoc
59
+ - test/sentence_test.rb
60
+ - test/analyzer_test.rb
61
+ - test/meaningable_test.rb
62
+ - test/token_scanner_test.rb
59
63
  - test/helper.rb
64
+ - test/nlp_test_suite.rb
60
65
  - test/test_nlp.rb
61
66
  - test/word_test.rb
67
+ - test/lemmatizer_test.rb
68
+ - test/token_test.rb
69
+ - test/text_test.rb
62
70
  has_rdoc: true
63
71
  homepage: http://github.com/knife/nlp
64
72
  licenses: []
@@ -94,6 +102,14 @@ signing_key:
94
102
  specification_version: 3
95
103
  summary: Linguistics tools for processing polish language.
96
104
  test_files:
105
+ - test/sentence_test.rb
106
+ - test/analyzer_test.rb
107
+ - test/meaningable_test.rb
108
+ - test/token_scanner_test.rb
97
109
  - test/helper.rb
110
+ - test/nlp_test_suite.rb
98
111
  - test/test_nlp.rb
99
112
  - test/word_test.rb
113
+ - test/lemmatizer_test.rb
114
+ - test/token_test.rb
115
+ - test/text_test.rb