nlp 0.2.7 → 0.2.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,28 @@
1
+ module NLP
2
+
3
+ class Analyzer
4
+
5
+ def initialize(dict)
6
+ @dictionary = Dictionary.new(dict)
7
+ end
8
+
9
+
10
+ def analyze(scanner)
11
+
12
+ results = TextStatistics.new
13
+
14
+ while token = scanner.current
15
+ word = token.lemat
16
+
17
+ categories = @dictionary.find(word)
18
+ results.add(word,categories) unless categories.nil?
19
+ results.total_words += 1
20
+ scanner.next(:word)
21
+ end
22
+
23
+ results
24
+
25
+ end
26
+ end
27
+ end
28
+
@@ -0,0 +1,68 @@
1
+ module NLP
2
+ class LIWCAnalyzer < Analyzer
3
+
4
+ def initialize
5
+ @dictionary = Dictionary.new(:liwc)
6
+ end
7
+
8
+
9
+ def analyze(scanner)
10
+
11
+ results = TextStatistics.new
12
+ results.hash = {
13
+ :long_words => [],
14
+ :zaimki => [],
15
+ :zaimki1 => [],
16
+ :zaimki2 => [],
17
+ :zaimki3 => [],
18
+ :przyimki => [],
19
+ :numbers => [],
20
+ :emotion => [],
21
+ :social => [],
22
+ :personal => [],
23
+ :posemotion => [],
24
+ :negemotion => [],
25
+ :wulgar => [],
26
+ :cognitive => []
27
+ }
28
+
29
+ while token = scanner.current
30
+ word = token.lemat
31
+ categories = @dictionary.find(word.gsub( /[^\w-]/, "" ))
32
+
33
+ unless categories.nil?
34
+ results.add(word,categories)
35
+ token.category = categories.first
36
+
37
+ results[:emotion].push token.orth if token.emotion?
38
+ results[:social].push token.orth if token.social?
39
+ results[:personal].push token.orth if token.personal?
40
+ results[:wulgar].push token.orth if token.bad_word?
41
+ results[:cognitive].push token.orth if token.cognitive?
42
+
43
+ results[:posemotion].push token.orth if token.positive_emotion?
44
+ results[:negemotion].push token.orth if token.negative_emotion?
45
+ end
46
+ #words longer than 10
47
+ results[:long_words].push word if word.jlength > 10
48
+ if token.zaimek?
49
+ results[:zaimki].push word
50
+
51
+ results[:zaimki1].push token.orth if word === 'ja' or word === 'my'
52
+ results[:zaimki2].push token.orth if word === 'ty' or word === 'wy'
53
+ results[:zaimki3].push token.orth if word === 'on'
54
+ end
55
+
56
+ results[:przyimki].push word if token.przyimek?
57
+ results[:numbers].push token.orth if token.number? or token.liczebnik?
58
+
59
+ results.total_words += 1
60
+ scanner.next(:alphanum)
61
+ end
62
+ results
63
+
64
+ end
65
+
66
+ end
67
+
68
+ end
@@ -0,0 +1,10 @@
1
+ module NLP
2
+
3
+ class RIDAnalyzer < Analyzer
4
+
5
+ def initialize
6
+ @dictionary = Dictionary.new(:rid)
7
+ end
8
+
9
+ end
10
+ end
@@ -0,0 +1,27 @@
1
+ module NLP
2
+ class Category
3
+ attr_reader :parent, :name
4
+
5
+ def initialize(name, parent = nil)
6
+ @parent = parent
7
+ @name = name.to_sym
8
+ end
9
+
10
+ def path
11
+ @parent ? (@parent.path + '/' + name.to_s) : name.to_s
12
+ end
13
+
14
+ def root
15
+ category = self
16
+ while category.parent != nil
17
+ category = category.parent
18
+ end
19
+ category.name
20
+ end
21
+
22
+ def to_s
23
+ "#{path.inspect}"
24
+ end
25
+
26
+ end
27
+ end
@@ -0,0 +1,76 @@
1
+ module NLP
2
+ class Dictionary
3
+
4
+ attr_accessor :tree
5
+
6
+ def initialize(category_file=:rid,restore = true)
7
+ state_file = File.expand_path(DICTIONARY_CACHE_DIR+".#{category_file.to_s}")
8
+ if restore and File.exist?(state_file)
9
+ @tree = Dictionary.restore(state_file)
10
+ else
11
+ @tree = PlTrie.new
12
+ load_categories(File.dirname(__FILE__)+"/../../dict/#{category_file.to_s}", category_file )
13
+ store(state_file)
14
+ end
15
+
16
+ end
17
+
18
+ def store( state_file )
19
+ File.open( File.expand_path( state_file ), "w" ) do |file|
20
+ Marshal.dump( self.tree, file )
21
+ end
22
+ self
23
+ end
24
+
25
+ def self.restore( state_file )
26
+ File.open( File.expand_path( state_file ) ) do |file|
27
+ Marshal.restore( file )
28
+ end
29
+ end
30
+
31
+ def find(word)
32
+ begin
33
+ @tree.find(word)
34
+ rescue
35
+ nil
36
+ end
37
+ end
38
+
39
+ def load_categories(category_file,type)
40
+ category = nil
41
+ primary = nil
42
+ secondary = nil
43
+ tertiary = nil
44
+
45
+ if type == :rid
46
+ cat_class = NLP.const_get("RIDCategory")
47
+ else
48
+ cat_class = NLP.const_get("LIWCCategory")
49
+ end
50
+
51
+ File.open(category_file) do |file|
52
+ while line = file.gets
53
+ line.chomp!
54
+ begin
55
+ lead, rest = line.scan(/(\t*)(.*)/).first
56
+ if lead.size == 0
57
+ category = primary = cat_class.new(rest)
58
+ secondary, tertiary = nil
59
+ elsif lead.size == 1
60
+ category = secondary = cat_class.new(rest, primary)
61
+ tertiary = nil
62
+ elsif lead.size == 2 && ( cat = line.strip.index(/^[A-ZĄŚĘĆŃŹŻŁÓ_]+$/)) && cat >= 0
63
+ category = tertiary = cat_class.new( rest, secondary )
64
+ else
65
+ word = rest.downcase.gsub( /\s*\(1\)$/, '' )
66
+ @tree.insert(word, category)
67
+ end
68
+ rescue
69
+ raise
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
76
+
@@ -0,0 +1,54 @@
1
+ module NLP
2
+ class LIWCCategory < Category
3
+
4
+ #primary categories
5
+
6
+ def linguistic?
7
+ root == :PIERWOTNE
8
+ end
9
+
10
+ def psychological?
11
+ root == :PROCESY_PSYCHOLOGICZNE
12
+ end
13
+
14
+
15
+ def relative?
16
+ root === :RELATYWNOSC
17
+ end
18
+
19
+ def personal?
20
+ root == :OSOBISTE
21
+ end
22
+
23
+ #second categories
24
+
25
+ def emotion?
26
+ path.include? 'EMOCJE'
27
+ end
28
+
29
+ def positive_emotion?
30
+ path.include? 'POZYTYWNE_EMOCJE'
31
+ end
32
+
33
+ def negative_emotion?
34
+ path.include? 'NEGATYWNE_EMOCJE'
35
+ end
36
+
37
+ def cognitive?
38
+ path.include? 'KOGNITYWNE_PROCESY'
39
+ end
40
+
41
+ def sense?
42
+ path.include? 'ZMYSLY'
43
+ end
44
+
45
+ def social?
46
+ path.include? 'SOCIAL'
47
+ end
48
+
49
+ def bad_word?
50
+ path.include? 'WULGAR'
51
+ end
52
+
53
+ end
54
+ end
@@ -0,0 +1,31 @@
1
+ require 'ds'
2
+
3
+ module NLP
4
+
5
+ include DS
6
+
7
+ class PlTrie < Trie
8
+
9
+ ALPHABET = %w{- a ą b c ć d e ę f g h i j k l ł m n ń o ó p r s ś t u v w x y z ź ż} << ' '
10
+
11
+ #private
12
+ def priv_insert(s, value)
13
+ if s.empty?
14
+ if @data.nil?
15
+ @data = [value]
16
+ else
17
+ @data.push value
18
+ end
19
+ else
20
+ index = key(s.first)
21
+ subtree = if @children[index]
22
+ @children[index]
23
+ else
24
+ @children[index] = PlTrie.new
25
+ end
26
+
27
+ subtree.priv_insert(s[1..-1], value)
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,21 @@
1
+ module NLP
2
+ class RIDCategory < Category
3
+
4
+ def self.top_level
5
+ [new(:PIERWOTNE),new(:WTORNE),new(:EMOCJE)]
6
+ end
7
+
8
+ def primary?
9
+ root == :PIERWOTNE
10
+ end
11
+
12
+ def secondary?
13
+ root == :WTORNE
14
+ end
15
+
16
+ def emotions?
17
+ root == :EMOCJE
18
+ end
19
+
20
+ end
21
+ end
data/lib/nlp.rb CHANGED
@@ -5,7 +5,6 @@ end
5
5
 
6
6
 
7
7
  require 'stdlib/ext/array'
8
- require 'morfeusz'
9
8
 
10
9
  require "analizators/analyzer"
11
10
  require "analizators/rid_analyzer.rb"
@@ -0,0 +1,13 @@
1
+ module NLP
2
+ class Emoticon < Token
3
+
4
+ include Meaningable
5
+
6
+ def initialize(tokens,tags)
7
+ @orth = tokens.join("")
8
+ @tags = 'emoticon'
9
+ end
10
+
11
+ end
12
+ end
13
+
@@ -0,0 +1,59 @@
1
+ module Inflectable
2
+
3
+ GRAM_CAT = {
4
+ #rzeczownik
5
+ :adj => 'przymiotnik',
6
+ [:subst,:depr] => 'rzeczownik',
7
+ :adv => 'przyslowek',
8
+ :num => 'liczebnik',
9
+ [:pron,:siebie] => 'zaimek',
10
+ :prep => 'przyimek',
11
+ #liczby
12
+ :sg => 'liczba_pojedyncza',
13
+ :pl => 'liczba_mnoga',
14
+
15
+ #Przypadki
16
+ :nom => 'mianownik',
17
+ :gen => 'dopelniacz',
18
+ :dat => 'celownik',
19
+ :acc => 'biernik',
20
+ :inst => 'narzednik',
21
+ :loc => 'miejscownik',
22
+ :voc => 'wolacz',
23
+
24
+ #Rodzaje
25
+ :m1 => 'meski_osobowy',
26
+ :m2 => 'meski_zwierzecy',
27
+ :m3 => 'meski_rzeczowy',
28
+ :f => 'zenski',
29
+ :n1 => 'nijaki_zbiorowy',
30
+ :n2 => 'nijaki zwykly',
31
+ :p1 => 'przymnogi_osobowy',
32
+ :p2 => 'przymnogi_zwykly',
33
+ :p3 => 'przymnogi_opisowy',
34
+
35
+ #Osoby
36
+ :pri => "pierwsza_osoba",
37
+ :sec => "druga_osoba",
38
+ :ter => "trzecia_osoba",
39
+
40
+ #Stopień
41
+ :pos => "stopien_rowny",
42
+ :comp => "stopien_wyzszy",
43
+ :sup => "stopien_najwyzszy"
44
+ }
45
+
46
+ GRAM_CAT.each do |key,value|
47
+
48
+ define_method(value+"?"){
49
+ inflection.split(":").any?{|e|
50
+ if key.is_a? Array
51
+ key.any?{|k| e.include? k.to_s}
52
+ else
53
+ e.include? key.to_s
54
+ end
55
+ }
56
+ }
57
+ end
58
+
59
+ end
@@ -0,0 +1,112 @@
1
+ require 'rexml/document'
2
+
3
+ module NLP
4
+ class Lemmatizer
5
+
6
+ include REXML
7
+
8
+ def self.lemmatize(text,method=nil,input_type=nil)
9
+ if text.is_a? File
10
+ str = text.read
11
+ text.close
12
+ elsif text.is_a? String
13
+ str = text
14
+ else
15
+ raise ArgumentError, "Argument is not String or File"
16
+ end
17
+
18
+ if method === :takipi
19
+ takipi_lemmatize(str,input_type)
20
+
21
+ #Default lematization method is Morfeusz
22
+ else
23
+ takipi_lemmatize(str,:remote)
24
+
25
+ #morfeusz_lemmatize(str)
26
+ end
27
+ end
28
+
29
+
30
+
31
+ def self.takipi_lemmatize(text,method)
32
+
33
+ if method === :local
34
+
35
+ xml_file = TAKIPI_XML_FILE
36
+
37
+ t1 = Thread.new do
38
+ `echo '#{text}' > /tmp/text.txt; takipi -i /tmp/text.txt -o #{xml_file} -it TXT`
39
+ end
40
+
41
+ t1.join
42
+
43
+ f = File.open(xml_file,"r")
44
+ doc = Document.new f
45
+
46
+ elsif method === :remote
47
+ xml = TakipiWebService.request(text)
48
+ doc = Document.new xml
49
+ else
50
+ raise ArgumentError, 'Argument is not :local or :remote'
51
+ end
52
+
53
+ parse_lemmatized_xml(doc)
54
+ end
55
+
56
+
57
+ def self.morfeusz_lemmatize(text)
58
+ temp_text = Text.new
59
+
60
+ #simple tagger
61
+ #TODO lemmatizer should take TokenScanner object that defines
62
+ #how split string
63
+ # text.split(/\.|!|\?/).each do |s|
64
+ # sentence = Sentence.new
65
+ # sentence << s.split(" ").collect{ |t|
66
+ # if word = Morfeusz::Lexeme.find(t)
67
+ # if word[0]
68
+ # Word.new(t,word[0].base_form,"")
69
+ # else
70
+ # Word.new(t,"","")
71
+ # end
72
+ # else
73
+ # Word.new(t,"","")
74
+ # end
75
+ # }
76
+ # temp_text << sentence
77
+ # end
78
+ temp_text
79
+ end
80
+
81
+
82
+ def self.parse_lemmatized_xml(doc)
83
+
84
+ text = Text.new
85
+
86
+ doc.elements.each("*/chunkList/chunk") do |chunk|
87
+ sentence = Sentence.new
88
+ tokens = []
89
+
90
+ chunk.elements.each("tok") do |tok|
91
+ word = tok.elements[1].text
92
+ lemat, inflect = ""
93
+
94
+ tok.elements.each("lex") do |lex|
95
+ if lex.has_attributes?
96
+ lemat = lex.elements[1].text
97
+ inflect = lex.elements[2].text
98
+ end
99
+ end
100
+
101
+ tokens << Word.new(word,lemat,inflect)
102
+ end
103
+
104
+ sentence << tokens
105
+ text << sentence
106
+ end
107
+ text
108
+ end
109
+
110
+
111
+ end
112
+ end
@@ -0,0 +1,63 @@
1
+ module Meaningable
2
+
3
+ #LIWC
4
+ #primary categories
5
+
6
+ def linguistic?
7
+ category.root == :PIERWOTNE
8
+ end
9
+
10
+ def psychological?
11
+ category.root == :PROCESY_PSYCHOLOGICZNE
12
+ end
13
+
14
+
15
+ def relative?
16
+ category.root === :RELATYWNOSC
17
+ end
18
+
19
+ def personal?
20
+ category.root == :OSOBISTE
21
+ end
22
+
23
+ #second categories
24
+
25
+ def emotion?
26
+ category.path.include? 'EMOCJE'
27
+ end
28
+
29
+ def positive_emotion?
30
+ category.path.include? 'POZYTYWNE_EMOCJE'
31
+ end
32
+
33
+ def negative_emotion?
34
+ category.path.include? 'NEGATYWNE_EMOCJE'
35
+ end
36
+
37
+ def cognitive?
38
+ category.path.include? 'KOGNITYWNE_PROCESY'
39
+ end
40
+
41
+ def sense?
42
+ category.path.include? 'ZMYSLY'
43
+ end
44
+
45
+ def social?
46
+ category.path.include? 'SOCIAL'
47
+ end
48
+
49
+ def bad_word?
50
+ category.path.include? 'WULGAR'
51
+ end
52
+
53
+
54
+ #SEMANTIC
55
+ def synonym?(other)
56
+
57
+ end
58
+
59
+ def synonyms
60
+
61
+ end
62
+
63
+ end
@@ -0,0 +1,24 @@
1
+ module NLP
2
+ class Sentence
3
+
4
+ attr_reader :tokens
5
+
6
+ def initialize()
7
+ @tokens = []
8
+ end
9
+
10
+ def << tokens
11
+ if tokens.is_a? Array
12
+ @tokens.concat tokens
13
+ else
14
+ @tokens << tokens
15
+ end
16
+ self
17
+ end
18
+
19
+ def words_number
20
+ @tokens.count{|t| !t.interp?}
21
+ end
22
+
23
+ end
24
+ end
@@ -0,0 +1,51 @@
1
+ require 'rubygems'
2
+ require 'savon'
3
+
4
+ class TakipiWebService
5
+ URL = 'http://nlp.pwr.wroc.pl/clarin/ws/takipi/'
6
+ WSDL_URL = URL + 'takipi.wsdl'
7
+
8
+ def self.request(text)
9
+ client = Savon::Client.new WSDL_URL, :soap_endpoint => URL
10
+
11
+ # Call remote service methods
12
+ response = client.tag do |soap|
13
+ soap.body = "<text>#{text}</text><format>TXT</format><useGuesser>true</useGuesser>"
14
+ end
15
+
16
+ response = response.to_hash
17
+ token = response[:tag_response][:tag_response][:msg]
18
+ status = (response[:tag_response][:tag_response][:status]).to_i
19
+
20
+ #checking status
21
+ timeout = 60
22
+ step = 5
23
+ count = 0
24
+ loop do
25
+ break if count > timeout
26
+ if status == 1
27
+ break
28
+ elsif status == 2 or status == 3
29
+ count += 5
30
+ sleep(1)
31
+ r = client.get_status do |soap|
32
+ soap.body = "<token>#{token}</token>"
33
+ end.to_hash
34
+ status = (r[:get_status_response][:status]).to_i
35
+
36
+ end
37
+ end
38
+
39
+ #geting result
40
+
41
+ result = client.get_result do |soap|
42
+ soap.body="<token>#{token}</token>"
43
+ end
44
+
45
+ response_document = result.to_hash[:get_result_response][:tag_response][:msg]
46
+
47
+ #transforming response to well formed xml string
48
+ return "<xml><chunkList>#{response_document}</chunkList></xml>"
49
+ end
50
+ end
51
+
@@ -0,0 +1,24 @@
1
+ module NLP
2
+ class Text
3
+ attr_reader :sentences
4
+
5
+ def initialize
6
+ @sentences = []
7
+ end
8
+
9
+ def << sentence
10
+ @sentences.push sentence
11
+ end
12
+
13
+ def words_per_sentence
14
+ @sentences.collect{|s| s.words_number}.mean
15
+ end
16
+
17
+ def flatten
18
+ flattened = []
19
+ @sentences.each{ |s| s.tokens.each{|t| flattened.push t } }
20
+ flattened
21
+ end
22
+
23
+ end
24
+ end
@@ -0,0 +1,45 @@
1
+ module NLP
2
+ class Token
3
+
4
+ attr_reader :orth
5
+ attr_reader :tags
6
+
7
+ def initialize(orth,tags)
8
+ @orth = orth
9
+ @tags = tags
10
+ end
11
+
12
+ def symbol?
13
+ @tags.eql? "tsym"
14
+ end
15
+
16
+ def interp?
17
+ @tags.eql? "interp"
18
+ end
19
+
20
+ def word?
21
+ not interp? and not number? and not agl?
22
+ end
23
+
24
+ def number?
25
+ @tags.include?("tnum")
26
+ end
27
+
28
+ def integer?
29
+ @tags.include?("tnum:integer")
30
+ end
31
+
32
+ def float?
33
+ @tags.include?("tnum:frac")
34
+ end
35
+
36
+ def qublic?
37
+ @tags.include?("qub")
38
+ end
39
+
40
+ def agl?
41
+ @tags.include?("agl")
42
+ end
43
+
44
+ end
45
+ end
@@ -0,0 +1,58 @@
1
+ module NLP
2
+ class TokenScanner
3
+
4
+ attr_reader :text, :tokens
5
+
6
+ def initialize(text)
7
+ @text = text
8
+ @pos = 0
9
+ @tokens = @text.flatten
10
+ end
11
+
12
+ def next(type)
13
+ @pos+=1
14
+
15
+ case type
16
+ when :word
17
+ while @pos < @tokens.size and !@tokens[@pos].word?
18
+ @pos+= 1
19
+ end
20
+
21
+ when :interp
22
+ while @pos < @tokens.size and !@tokens[@pos].interp?
23
+ @pos+= 1
24
+ end
25
+
26
+ when :number
27
+ while @pos < @tokens.size and !@tokens[@pos].number?
28
+ @pos+= 1
29
+ end
30
+ when :alphanum
31
+ while @pos < @tokens.size and !@tokens[@pos].number? and !@tokens[@pos].word?
32
+ @pos+= 1
33
+ end
34
+ end
35
+ end
36
+
37
+ def current
38
+ if @pos == @tokens.size
39
+ nil
40
+ else
41
+ @tokens[@pos]
42
+ end
43
+ end
44
+
45
+ def rewind
46
+ @pos = 0
47
+ end
48
+
49
+ def index
50
+ @pos
51
+ end
52
+
53
+ def end?
54
+ @pos == tokens.size
55
+ end
56
+
57
+ end
58
+ end
@@ -0,0 +1,20 @@
1
+ module NLP
2
+ class Word < Token
3
+
4
+ include Inflectable
5
+ include Meaningable
6
+
7
+ attr_reader :lemat
8
+ attr_accessor :category
9
+
10
+ def initialize(word, lemat, tags)
11
+ super(word,tags)
12
+ @lemat = lemat
13
+ end
14
+
15
+ def inflection
16
+ @tags
17
+ end
18
+
19
+ end
20
+ end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nlp
3
3
  version: !ruby/object:Gem::Version
4
- hash: 25
4
+ hash: 7
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 2
9
- - 7
10
- version: 0.2.7
9
+ - 8
10
+ version: 0.2.8
11
11
  platform: ruby
12
12
  authors:
13
13
  - knife
@@ -59,9 +59,26 @@ extra_rdoc_files:
59
59
  files:
60
60
  - dict/liwc
61
61
  - dict/rid
62
- - lib/morfeusz.rb
62
+ - lib/analizators/analyzer.rb
63
+ - lib/analizators/liwc_analyzer.rb
64
+ - lib/analizators/rid_analyzer.rb
65
+ - lib/dictionaries/category.rb
66
+ - lib/dictionaries/dictionary.rb
67
+ - lib/dictionaries/liwc_category.rb
68
+ - lib/dictionaries/pl_trie.rb
69
+ - lib/dictionaries/rid_category.rb
63
70
  - lib/nlp.rb
64
71
  - lib/stdlib/ext/array.rb
72
+ - lib/tagger/emoticon.rb
73
+ - lib/tagger/inflectable.rb
74
+ - lib/tagger/lemmatizer.rb
75
+ - lib/tagger/meaningable.rb
76
+ - lib/tagger/sentence.rb
77
+ - lib/tagger/takipi_web_service.rb
78
+ - lib/tagger/text.rb
79
+ - lib/tagger/token.rb
80
+ - lib/tagger/token_scanner.rb
81
+ - lib/tagger/word.rb
65
82
  - lib/text_statistics.rb
66
83
  - LICENSE
67
84
  - README.rdoc
@@ -1,69 +0,0 @@
1
- # Ruby bindings for Morfeusz v. 0.1
2
- # Author: Aleksander Pohl
3
- # apohllo@o2.pl
4
-
5
- require 'rubygems'
6
- require 'inline'
7
- require 'singleton'
8
- require 'iconv'
9
- module NLP
10
- module Morfeusz
11
- MORFOPT_ENCODING = 1
12
- MORFEUSZ_UTF_8 = 8
13
- class Morfeusz
14
- include Singleton
15
-
16
- inline(:C) do |builder|
17
- builder.include '"morfeusz.h"'
18
- builder.add_compile_flags '-lmorfeusz', '-I/home/knife/morf/include/'
19
- builder.c <<-END
20
- void initialize(){
21
- morfeusz_set_option(#{MORFOPT_ENCODING},#{MORFEUSZ_UTF_8});
22
- }
23
- END
24
-
25
- builder.c <<-END
26
- char * about(){
27
- return morfeusz_about();
28
- }
29
- END
30
-
31
- builder.c <<-END
32
- VALUE _base(VALUE str){
33
- char * p;
34
- int index = 0;
35
- VALUE arr = rb_ary_new();
36
- int id_push = rb_intern("push");
37
- p = StringValuePtr(str);
38
- InterpMorf* result = morfeusz_analyse(p);
39
- InterpMorf el;
40
- while((el = result[index++]).k != -1){
41
- if(el.haslo != NULL){
42
- rb_funcall(arr,id_push,1,rb_str_new2(el.haslo));
43
- }
44
- }
45
- return arr;
46
- }
47
- END
48
-
49
- def base(word)
50
- # _base(word)
51
- _base(word).collect{|e| e}
52
- end
53
-
54
- end
55
- end
56
-
57
- class Lexeme
58
- attr_reader :base_form
59
- def initialize(base_form)
60
- @base_form = base_form
61
- end
62
-
63
- def self.find(word)
64
- Morfeusz.instance.base(word).collect{|bf| Lexeme.new(bf)}
65
- end
66
-
67
- end
68
- end
69
- end