nlp 0.2.7 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,28 @@
1
+ module NLP
2
+
3
+ class Analyzer
4
+
5
+ def initialize(dict)
6
+ @dictionary = Dictionary.new(dict)
7
+ end
8
+
9
+
10
+ def analyze(scanner)
11
+
12
+ results = TextStatistics.new
13
+
14
+ while token = scanner.current
15
+ word = token.lemat
16
+
17
+ categories = @dictionary.find(word)
18
+ results.add(word,categories) unless categories.nil?
19
+ results.total_words += 1
20
+ scanner.next(:word)
21
+ end
22
+
23
+ results
24
+
25
+ end
26
+ end
27
+ end
28
+
@@ -0,0 +1,68 @@
1
+ module NLP
2
+ class LIWCAnalyzer < Analyzer
3
+
4
+ def initialize
5
+ @dictionary = Dictionary.new(:liwc)
6
+ end
7
+
8
+
9
+ def analyze(scanner)
10
+
11
+ results = TextStatistics.new
12
+ results.hash = {
13
+ :long_words => [],
14
+ :zaimki => [],
15
+ :zaimki1 => [],
16
+ :zaimki2 => [],
17
+ :zaimki3 => [],
18
+ :przyimki => [],
19
+ :numbers => [],
20
+ :emotion => [],
21
+ :social => [],
22
+ :personal => [],
23
+ :posemotion => [],
24
+ :negemotion => [],
25
+ :wulgar => [],
26
+ :cognitive => []
27
+ }
28
+
29
+ while token = scanner.current
30
+ word = token.lemat
31
+ categories = @dictionary.find(word.gsub( /[^\w-]/, "" ))
32
+
33
+ unless categories.nil?
34
+ results.add(word,categories)
35
+ token.category = categories.first
36
+
37
+ results[:emotion].push token.orth if token.emotion?
38
+ results[:social].push token.orth if token.social?
39
+ results[:personal].push token.orth if token.personal?
40
+ results[:wulgar].push token.orth if token.bad_word?
41
+ results[:cognitive].push token.orth if token.cognitive?
42
+
43
+ results[:posemotion].push token.orth if token.positive_emotion?
44
+ results[:negemotion].push token.orth if token.negative_emotion?
45
+ end
46
+ #words longer than 10
47
+ results[:long_words].push word if word.jlength > 10
48
+ if token.zaimek?
49
+ results[:zaimki].push word
50
+
51
+ results[:zaimki1].push token.orth if word === 'ja' or word === 'my'
52
+ results[:zaimki2].push token.orth if word === 'ty' or word === 'wy'
53
+ results[:zaimki3].push token.orth if word === 'on'
54
+ end
55
+
56
+ results[:przyimki].push word if token.przyimek?
57
+ results[:numbers].push token.orth if token.number? or token.liczebnik?
58
+
59
+ results.total_words += 1
60
+ scanner.next(:alphanum)
61
+ end
62
+ results
63
+
64
+ end
65
+
66
+ end
67
+
68
+ end
@@ -0,0 +1,10 @@
1
+ module NLP
2
+
3
+ class RIDAnalyzer < Analyzer
4
+
5
+ def initialize
6
+ @dictionary = Dictionary.new(:rid)
7
+ end
8
+
9
+ end
10
+ end
@@ -0,0 +1,27 @@
1
+ module NLP
2
+ class Category
3
+ attr_reader :parent, :name
4
+
5
+ def initialize(name, parent = nil)
6
+ @parent = parent
7
+ @name = name.to_sym
8
+ end
9
+
10
+ def path
11
+ @parent ? (@parent.path + '/' + name.to_s) : name.to_s
12
+ end
13
+
14
+ def root
15
+ category = self
16
+ while category.parent != nil
17
+ category = category.parent
18
+ end
19
+ category.name
20
+ end
21
+
22
+ def to_s
23
+ "#{path.inspect}"
24
+ end
25
+
26
+ end
27
+ end
@@ -0,0 +1,76 @@
1
+ module NLP
2
+ class Dictionary
3
+
4
+ attr_accessor :tree
5
+
6
+ def initialize(category_file=:rid,restore = true)
7
+ state_file = File.expand_path(DICTIONARY_CACHE_DIR+".#{category_file.to_s}")
8
+ if restore and File.exist?(state_file)
9
+ @tree = Dictionary.restore(state_file)
10
+ else
11
+ @tree = PlTrie.new
12
+ load_categories(File.dirname(__FILE__)+"/../../dict/#{category_file.to_s}", category_file )
13
+ store(state_file)
14
+ end
15
+
16
+ end
17
+
18
+ def store( state_file )
19
+ File.open( File.expand_path( state_file ), "w" ) do |file|
20
+ Marshal.dump( self.tree, file )
21
+ end
22
+ self
23
+ end
24
+
25
+ def self.restore( state_file )
26
+ File.open( File.expand_path( state_file ) ) do |file|
27
+ Marshal.restore( file )
28
+ end
29
+ end
30
+
31
+ def find(word)
32
+ begin
33
+ @tree.find(word)
34
+ rescue
35
+ nil
36
+ end
37
+ end
38
+
39
+ def load_categories(category_file,type)
40
+ category = nil
41
+ primary = nil
42
+ secondary = nil
43
+ tertiary = nil
44
+
45
+ if type == :rid
46
+ cat_class = NLP.const_get("RIDCategory")
47
+ else
48
+ cat_class = NLP.const_get("LIWCCategory")
49
+ end
50
+
51
+ File.open(category_file) do |file|
52
+ while line = file.gets
53
+ line.chomp!
54
+ begin
55
+ lead, rest = line.scan(/(\t*)(.*)/).first
56
+ if lead.size == 0
57
+ category = primary = cat_class.new(rest)
58
+ secondary, tertiary = nil
59
+ elsif lead.size == 1
60
+ category = secondary = cat_class.new(rest, primary)
61
+ tertiary = nil
62
+ elsif lead.size == 2 && ( cat = line.strip.index(/^[A-ZĄŚĘĆŃŹŻŁÓ_]+$/)) && cat >= 0
63
+ category = tertiary = cat_class.new( rest, secondary )
64
+ else
65
+ word = rest.downcase.gsub( /\s*\(1\)$/, '' )
66
+ @tree.insert(word, category)
67
+ end
68
+ rescue
69
+ raise
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
76
+
@@ -0,0 +1,54 @@
1
+ module NLP
2
+ class LIWCCategory < Category
3
+
4
+ #primary categories
5
+
6
+ def linguistic?
7
+ root == :PIERWOTNE
8
+ end
9
+
10
+ def psychological?
11
+ root == :PROCESY_PSYCHOLOGICZNE
12
+ end
13
+
14
+
15
+ def relative?
16
+ root === :RELATYWNOSC
17
+ end
18
+
19
+ def personal?
20
+ root == :OSOBISTE
21
+ end
22
+
23
+ #second categories
24
+
25
+ def emotion?
26
+ path.include? 'EMOCJE'
27
+ end
28
+
29
+ def positive_emotion?
30
+ path.include? 'POZYTYWNE_EMOCJE'
31
+ end
32
+
33
+ def negative_emotion?
34
+ path.include? 'NEGATYWNE_EMOCJE'
35
+ end
36
+
37
+ def cognitive?
38
+ path.include? 'KOGNITYWNE_PROCESY'
39
+ end
40
+
41
+ def sense?
42
+ path.include? 'ZMYSLY'
43
+ end
44
+
45
+ def social?
46
+ path.include? 'SOCIAL'
47
+ end
48
+
49
+ def bad_word?
50
+ path.include? 'WULGAR'
51
+ end
52
+
53
+ end
54
+ end
@@ -0,0 +1,31 @@
1
+ require 'ds'
2
+
3
+ module NLP
4
+
5
+ include DS
6
+
7
+ class PlTrie < Trie
8
+
9
+ ALPHABET = %w{- a ą b c ć d e ę f g h i j k l ł m n ń o ó p r s ś t u v w x y z ź ż} << ' '
10
+
11
+ #private
12
+ def priv_insert(s, value)
13
+ if s.empty?
14
+ if @data.nil?
15
+ @data = [value]
16
+ else
17
+ @data.push value
18
+ end
19
+ else
20
+ index = key(s.first)
21
+ subtree = if @children[index]
22
+ @children[index]
23
+ else
24
+ @children[index] = PlTrie.new
25
+ end
26
+
27
+ subtree.priv_insert(s[1..-1], value)
28
+ end
29
+ end
30
+ end
31
+ end
@@ -0,0 +1,21 @@
1
+ module NLP
2
+ class RIDCategory < Category
3
+
4
+ def self.top_level
5
+ [new(:PIERWOTNE),new(:WTORNE),new(:EMOCJE)]
6
+ end
7
+
8
+ def primary?
9
+ root == :PIERWOTNE
10
+ end
11
+
12
+ def secondary?
13
+ root == :WTORNE
14
+ end
15
+
16
+ def emotions?
17
+ root == :EMOCJE
18
+ end
19
+
20
+ end
21
+ end
data/lib/nlp.rb CHANGED
@@ -5,7 +5,6 @@ end
5
5
 
6
6
 
7
7
  require 'stdlib/ext/array'
8
- require 'morfeusz'
9
8
 
10
9
  require "analizators/analyzer"
11
10
  require "analizators/rid_analyzer.rb"
@@ -0,0 +1,13 @@
1
+ module NLP
2
+ class Emoticon < Token
3
+
4
+ include Meaningable
5
+
6
+ def initialize(tokens,tags)
7
+ @orth = tokens.join("")
8
+ @tags = 'emoticon'
9
+ end
10
+
11
+ end
12
+ end
13
+
@@ -0,0 +1,59 @@
1
+ module Inflectable
2
+
3
+ GRAM_CAT = {
4
+ #rzeczownik
5
+ :adj => 'przymiotnik',
6
+ [:subst,:depr] => 'rzeczownik',
7
+ :adv => 'przyslowek',
8
+ :num => 'liczebnik',
9
+ [:pron,:siebie] => 'zaimek',
10
+ :prep => 'przyimek',
11
+ #liczby
12
+ :sg => 'liczba_pojedyncza',
13
+ :pl => 'liczba_mnoga',
14
+
15
+ #Przypadki
16
+ :nom => 'mianownik',
17
+ :gen => 'dopelniacz',
18
+ :dat => 'celownik',
19
+ :acc => 'biernik',
20
+ :inst => 'narzednik',
21
+ :loc => 'miejscownik',
22
+ :voc => 'wolacz',
23
+
24
+ #Rodzaje
25
+ :m1 => 'meski_osobowy',
26
+ :m2 => 'meski_zwierzecy',
27
+ :m3 => 'meski_rzeczowy',
28
+ :f => 'zenski',
29
+ :n1 => 'nijaki_zbiorowy',
30
+ :n2 => 'nijaki zwykly',
31
+ :p1 => 'przymnogi_osobowy',
32
+ :p2 => 'przymnogi_zwykly',
33
+ :p3 => 'przymnogi_opisowy',
34
+
35
+ #Osoby
36
+ :pri => "pierwsza_osoba",
37
+ :sec => "druga_osoba",
38
+ :ter => "trzecia_osoba",
39
+
40
+ #Stopień
41
+ :pos => "stopien_rowny",
42
+ :comp => "stopien_wyzszy",
43
+ :sup => "stopien_najwyzszy"
44
+ }
45
+
46
+ GRAM_CAT.each do |key,value|
47
+
48
+ define_method(value+"?"){
49
+ inflection.split(":").any?{|e|
50
+ if key.is_a? Array
51
+ key.any?{|k| e.include? k.to_s}
52
+ else
53
+ e.include? key.to_s
54
+ end
55
+ }
56
+ }
57
+ end
58
+
59
+ end
@@ -0,0 +1,112 @@
1
+ require 'rexml/document'
2
+
3
+ module NLP
4
+ class Lemmatizer
5
+
6
+ include REXML
7
+
8
+ def self.lemmatize(text,method=nil,input_type=nil)
9
+ if text.is_a? File
10
+ str = text.read
11
+ text.close
12
+ elsif text.is_a? String
13
+ str = text
14
+ else
15
+ raise ArgumentError, "Argument is not String or File"
16
+ end
17
+
18
+ if method === :takipi
19
+ takipi_lemmatize(str,input_type)
20
+
21
+ #Default lematization method is Morfeusz
22
+ else
23
+ takipi_lemmatize(str,:remote)
24
+
25
+ #morfeusz_lemmatize(str)
26
+ end
27
+ end
28
+
29
+
30
+
31
+ def self.takipi_lemmatize(text,method)
32
+
33
+ if method === :local
34
+
35
+ xml_file = TAKIPI_XML_FILE
36
+
37
+ t1 = Thread.new do
38
+ `echo '#{text}' > /tmp/text.txt; takipi -i /tmp/text.txt -o #{xml_file} -it TXT`
39
+ end
40
+
41
+ t1.join
42
+
43
+ f = File.open(xml_file,"r")
44
+ doc = Document.new f
45
+
46
+ elsif method === :remote
47
+ xml = TakipiWebService.request(text)
48
+ doc = Document.new xml
49
+ else
50
+ raise ArgumentError, 'Argument is not :local or :remote'
51
+ end
52
+
53
+ parse_lemmatized_xml(doc)
54
+ end
55
+
56
+
57
+ def self.morfeusz_lemmatize(text)
58
+ temp_text = Text.new
59
+
60
+ #simple tagger
61
+ #TODO lemmatizer should take TokenScanner object that defines
62
+ #how split string
63
+ # text.split(/\.|!|\?/).each do |s|
64
+ # sentence = Sentence.new
65
+ # sentence << s.split(" ").collect{ |t|
66
+ # if word = Morfeusz::Lexeme.find(t)
67
+ # if word[0]
68
+ # Word.new(t,word[0].base_form,"")
69
+ # else
70
+ # Word.new(t,"","")
71
+ # end
72
+ # else
73
+ # Word.new(t,"","")
74
+ # end
75
+ # }
76
+ # temp_text << sentence
77
+ # end
78
+ temp_text
79
+ end
80
+
81
+
82
+ def self.parse_lemmatized_xml(doc)
83
+
84
+ text = Text.new
85
+
86
+ doc.elements.each("*/chunkList/chunk") do |chunk|
87
+ sentence = Sentence.new
88
+ tokens = []
89
+
90
+ chunk.elements.each("tok") do |tok|
91
+ word = tok.elements[1].text
92
+ lemat, inflect = ""
93
+
94
+ tok.elements.each("lex") do |lex|
95
+ if lex.has_attributes?
96
+ lemat = lex.elements[1].text
97
+ inflect = lex.elements[2].text
98
+ end
99
+ end
100
+
101
+ tokens << Word.new(word,lemat,inflect)
102
+ end
103
+
104
+ sentence << tokens
105
+ text << sentence
106
+ end
107
+ text
108
+ end
109
+
110
+
111
+ end
112
+ end
@@ -0,0 +1,63 @@
1
+ module Meaningable
2
+
3
+ #LIWC
4
+ #primary categories
5
+
6
+ def linguistic?
7
+ category.root == :PIERWOTNE
8
+ end
9
+
10
+ def psychological?
11
+ category.root == :PROCESY_PSYCHOLOGICZNE
12
+ end
13
+
14
+
15
+ def relative?
16
+ category.root === :RELATYWNOSC
17
+ end
18
+
19
+ def personal?
20
+ category.root == :OSOBISTE
21
+ end
22
+
23
+ #second categories
24
+
25
+ def emotion?
26
+ category.path.include? 'EMOCJE'
27
+ end
28
+
29
+ def positive_emotion?
30
+ category.path.include? 'POZYTYWNE_EMOCJE'
31
+ end
32
+
33
+ def negative_emotion?
34
+ category.path.include? 'NEGATYWNE_EMOCJE'
35
+ end
36
+
37
+ def cognitive?
38
+ category.path.include? 'KOGNITYWNE_PROCESY'
39
+ end
40
+
41
+ def sense?
42
+ category.path.include? 'ZMYSLY'
43
+ end
44
+
45
+ def social?
46
+ category.path.include? 'SOCIAL'
47
+ end
48
+
49
+ def bad_word?
50
+ category.path.include? 'WULGAR'
51
+ end
52
+
53
+
54
+ #SEMANTIC
55
+ def synonym?(other)
56
+
57
+ end
58
+
59
+ def synonyms
60
+
61
+ end
62
+
63
+ end
@@ -0,0 +1,24 @@
1
+ module NLP
2
+ class Sentence
3
+
4
+ attr_reader :tokens
5
+
6
+ def initialize()
7
+ @tokens = []
8
+ end
9
+
10
+ def << tokens
11
+ if tokens.is_a? Array
12
+ @tokens.concat tokens
13
+ else
14
+ @tokens << tokens
15
+ end
16
+ self
17
+ end
18
+
19
+ def words_number
20
+ @tokens.count{|t| !t.interp?}
21
+ end
22
+
23
+ end
24
+ end
@@ -0,0 +1,51 @@
1
+ require 'rubygems'
2
+ require 'savon'
3
+
4
+ class TakipiWebService
5
+ URL = 'http://nlp.pwr.wroc.pl/clarin/ws/takipi/'
6
+ WSDL_URL = URL + 'takipi.wsdl'
7
+
8
+ def self.request(text)
9
+ client = Savon::Client.new WSDL_URL, :soap_endpoint => URL
10
+
11
+ # Call remote service methods
12
+ response = client.tag do |soap|
13
+ soap.body = "<text>#{text}</text><format>TXT</format><useGuesser>true</useGuesser>"
14
+ end
15
+
16
+ response = response.to_hash
17
+ token = response[:tag_response][:tag_response][:msg]
18
+ status = (response[:tag_response][:tag_response][:status]).to_i
19
+
20
+ #checking status
21
+ timeout = 60
22
+ step = 5
23
+ count = 0
24
+ loop do
25
+ break if count > timeout
26
+ if status == 1
27
+ break
28
+ elsif status == 2 or status == 3
29
+ count += 5
30
+ sleep(1)
31
+ r = client.get_status do |soap|
32
+ soap.body = "<token>#{token}</token>"
33
+ end.to_hash
34
+ status = (r[:get_status_response][:status]).to_i
35
+
36
+ end
37
+ end
38
+
39
+ #geting result
40
+
41
+ result = client.get_result do |soap|
42
+ soap.body="<token>#{token}</token>"
43
+ end
44
+
45
+ response_document = result.to_hash[:get_result_response][:tag_response][:msg]
46
+
47
+ #transforming response to well formed xml string
48
+ return "<xml><chunkList>#{response_document}</chunkList></xml>"
49
+ end
50
+ end
51
+
@@ -0,0 +1,24 @@
1
+ module NLP
2
+ class Text
3
+ attr_reader :sentences
4
+
5
+ def initialize
6
+ @sentences = []
7
+ end
8
+
9
+ def << sentence
10
+ @sentences.push sentence
11
+ end
12
+
13
+ def words_per_sentence
14
+ @sentences.collect{|s| s.words_number}.mean
15
+ end
16
+
17
+ def flatten
18
+ flattened = []
19
+ @sentences.each{ |s| s.tokens.each{|t| flattened.push t } }
20
+ flattened
21
+ end
22
+
23
+ end
24
+ end
@@ -0,0 +1,45 @@
1
+ module NLP
2
+ class Token
3
+
4
+ attr_reader :orth
5
+ attr_reader :tags
6
+
7
+ def initialize(orth,tags)
8
+ @orth = orth
9
+ @tags = tags
10
+ end
11
+
12
+ def symbol?
13
+ @tags.eql? "tsym"
14
+ end
15
+
16
+ def interp?
17
+ @tags.eql? "interp"
18
+ end
19
+
20
+ def word?
21
+ not interp? and not number? and not agl?
22
+ end
23
+
24
+ def number?
25
+ @tags.include?("tnum")
26
+ end
27
+
28
+ def integer?
29
+ @tags.include?("tnum:integer")
30
+ end
31
+
32
+ def float?
33
+ @tags.include?("tnum:frac")
34
+ end
35
+
36
+ def qublic?
37
+ @tags.include?("qub")
38
+ end
39
+
40
+ def agl?
41
+ @tags.include?("agl")
42
+ end
43
+
44
+ end
45
+ end
@@ -0,0 +1,58 @@
1
+ module NLP
2
+ class TokenScanner
3
+
4
+ attr_reader :text, :tokens
5
+
6
+ def initialize(text)
7
+ @text = text
8
+ @pos = 0
9
+ @tokens = @text.flatten
10
+ end
11
+
12
+ def next(type)
13
+ @pos+=1
14
+
15
+ case type
16
+ when :word
17
+ while @pos < @tokens.size and !@tokens[@pos].word?
18
+ @pos+= 1
19
+ end
20
+
21
+ when :interp
22
+ while @pos < @tokens.size and !@tokens[@pos].interp?
23
+ @pos+= 1
24
+ end
25
+
26
+ when :number
27
+ while @pos < @tokens.size and !@tokens[@pos].number?
28
+ @pos+= 1
29
+ end
30
+ when :alphanum
31
+ while @pos < @tokens.size and !@tokens[@pos].number? and !@tokens[@pos].word?
32
+ @pos+= 1
33
+ end
34
+ end
35
+ end
36
+
37
+ def current
38
+ if @pos == @tokens.size
39
+ nil
40
+ else
41
+ @tokens[@pos]
42
+ end
43
+ end
44
+
45
+ def rewind
46
+ @pos = 0
47
+ end
48
+
49
+ def index
50
+ @pos
51
+ end
52
+
53
+ def end?
54
+ @pos == tokens.size
55
+ end
56
+
57
+ end
58
+ end
@@ -0,0 +1,20 @@
1
+ module NLP
2
+ class Word < Token
3
+
4
+ include Inflectable
5
+ include Meaningable
6
+
7
+ attr_reader :lemat
8
+ attr_accessor :category
9
+
10
+ def initialize(word, lemat, tags)
11
+ super(word,tags)
12
+ @lemat = lemat
13
+ end
14
+
15
+ def inflection
16
+ @tags
17
+ end
18
+
19
+ end
20
+ end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nlp
3
3
  version: !ruby/object:Gem::Version
4
- hash: 25
4
+ hash: 7
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 2
9
- - 7
10
- version: 0.2.7
9
+ - 8
10
+ version: 0.2.8
11
11
  platform: ruby
12
12
  authors:
13
13
  - knife
@@ -59,9 +59,26 @@ extra_rdoc_files:
59
59
  files:
60
60
  - dict/liwc
61
61
  - dict/rid
62
- - lib/morfeusz.rb
62
+ - lib/analizators/analyzer.rb
63
+ - lib/analizators/liwc_analyzer.rb
64
+ - lib/analizators/rid_analyzer.rb
65
+ - lib/dictionaries/category.rb
66
+ - lib/dictionaries/dictionary.rb
67
+ - lib/dictionaries/liwc_category.rb
68
+ - lib/dictionaries/pl_trie.rb
69
+ - lib/dictionaries/rid_category.rb
63
70
  - lib/nlp.rb
64
71
  - lib/stdlib/ext/array.rb
72
+ - lib/tagger/emoticon.rb
73
+ - lib/tagger/inflectable.rb
74
+ - lib/tagger/lemmatizer.rb
75
+ - lib/tagger/meaningable.rb
76
+ - lib/tagger/sentence.rb
77
+ - lib/tagger/takipi_web_service.rb
78
+ - lib/tagger/text.rb
79
+ - lib/tagger/token.rb
80
+ - lib/tagger/token_scanner.rb
81
+ - lib/tagger/word.rb
65
82
  - lib/text_statistics.rb
66
83
  - LICENSE
67
84
  - README.rdoc
@@ -1,69 +0,0 @@
1
- # Ruby bindings for Morfeusz v. 0.1
2
- # Author: Aleksander Pohl
3
- # apohllo@o2.pl
4
-
5
- require 'rubygems'
6
- require 'inline'
7
- require 'singleton'
8
- require 'iconv'
9
- module NLP
10
- module Morfeusz
11
- MORFOPT_ENCODING = 1
12
- MORFEUSZ_UTF_8 = 8
13
- class Morfeusz
14
- include Singleton
15
-
16
- inline(:C) do |builder|
17
- builder.include '"morfeusz.h"'
18
- builder.add_compile_flags '-lmorfeusz', '-I/home/knife/morf/include/'
19
- builder.c <<-END
20
- void initialize(){
21
- morfeusz_set_option(#{MORFOPT_ENCODING},#{MORFEUSZ_UTF_8});
22
- }
23
- END
24
-
25
- builder.c <<-END
26
- char * about(){
27
- return morfeusz_about();
28
- }
29
- END
30
-
31
- builder.c <<-END
32
- VALUE _base(VALUE str){
33
- char * p;
34
- int index = 0;
35
- VALUE arr = rb_ary_new();
36
- int id_push = rb_intern("push");
37
- p = StringValuePtr(str);
38
- InterpMorf* result = morfeusz_analyse(p);
39
- InterpMorf el;
40
- while((el = result[index++]).k != -1){
41
- if(el.haslo != NULL){
42
- rb_funcall(arr,id_push,1,rb_str_new2(el.haslo));
43
- }
44
- }
45
- return arr;
46
- }
47
- END
48
-
49
- def base(word)
50
- # _base(word)
51
- _base(word).collect{|e| e}
52
- end
53
-
54
- end
55
- end
56
-
57
- class Lexeme
58
- attr_reader :base_form
59
- def initialize(base_form)
60
- @base_form = base_form
61
- end
62
-
63
- def self.find(word)
64
- Morfeusz.instance.base(word).collect{|bf| Lexeme.new(bf)}
65
- end
66
-
67
- end
68
- end
69
- end