pils 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +13 -0
- data/.rspec +3 -0
- data/.travis.yml +7 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +6 -0
- data/README.md +39 -0
- data/Rakefile +8 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/exe/pils +3 -0
- data/lib/pils.rb +64 -0
- data/lib/pils/de.rb +24 -0
- data/lib/pils/de/skeleton.rb +207 -0
- data/lib/pils/de/small.rb +128 -0
- data/lib/pils/parsing.rb +31 -0
- data/lib/pils/parsing/cat.rb +62 -0
- data/lib/pils/parsing/grammar.rb +47 -0
- data/lib/pils/parsing/lexicon.rb +100 -0
- data/lib/pils/parsing/parser.rb +310 -0
- data/lib/pils/parsing/rule.rb +43 -0
- data/lib/pils/parsing/tree.rb +147 -0
- data/lib/pils/parsing/wordform.rb +44 -0
- data/lib/pils/structures.rb +7 -0
- data/lib/pils/structures/avm.rb +98 -0
- data/lib/pils/tcf.rb +37 -0
- data/lib/pils/tcf/annotation.rb +42 -0
- data/lib/pils/tcf/bounded_element.rb +46 -0
- data/lib/pils/tcf/geo_annotation.rb +29 -0
- data/lib/pils/tcf/named_entity_annotation.rb +31 -0
- data/lib/pils/tcf/sentence.rb +47 -0
- data/lib/pils/tcf/tcf_document.rb +296 -0
- data/lib/pils/tcf/token.rb +52 -0
- data/lib/pils/tcf/transform/transformer.rb +468 -0
- data/lib/pils/version.rb +3 -0
- data/pils-0.1.2.gem +0 -0
- data/pils.gemspec +41 -0
- data/tasks/testing.rake +23 -0
- metadata +128 -0
@@ -0,0 +1,128 @@
|
|
1
|
+
# This file is part of Pils.
|
2
|
+
#
|
3
|
+
# Pils is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU Lesser General Public License as
|
5
|
+
# published by the Free Software Foundation, either version 3 of
|
6
|
+
# the License, or (at your option) any later version.
|
7
|
+
#
|
8
|
+
# Pils is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU Lesser General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the
|
14
|
+
# GNU Lesser General Public License along with Pils.
|
15
|
+
# If not, see <http://www.gnu.org/licenses/>.
|
16
|
+
|
17
|
+
# This is a language config file.
|
18
|
+
|
19
|
+
module Pils
|
20
|
+
module De
|
21
|
+
module Small
|
22
|
+
|
23
|
+
CAS = %w(nom gen dat acc) #[:nom, :gen, :dat, :acc]
|
24
|
+
NUM = %w(sg pl) #[:sg, :pl]
|
25
|
+
GEN = %w(m f n) # [:m, :f, :n]
|
26
|
+
PER = %w(1 2 3) # [:er, :zw, :dr]
|
27
|
+
MOD = %w(imp ind) # [:imp, :fin, :inf]
|
28
|
+
TMP = %w(pres)
|
29
|
+
|
30
|
+
def self.add_german_verb(lex, grundform, zweitform, praetform, partform, semantic_component)
|
31
|
+
lex.add_wordform Wordform.new(normalize_forms("#{grundform}e"), "V_fin_sg_1_pres".to_sym, {}, semantic_component.clone )
|
32
|
+
lex.add_wordform Wordform.new(normalize_forms("#{zweitform}st"), "V_fin_sg_2_pres".to_sym, {}, semantic_component.clone )
|
33
|
+
lex.add_wordform Wordform.new(normalize_forms("#{zweitform}t"), "V_fin_sg_3_pres".to_sym, {}, semantic_component.clone )
|
34
|
+
lex.add_wordform Wordform.new(normalize_forms("#{grundform}en"), "V_fin_pl_1_pres".to_sym, {}, semantic_component.clone )
|
35
|
+
lex.add_wordform Wordform.new(normalize_forms("#{grundform}t"), "V_fin_pl_2_pres".to_sym, {}, semantic_component.clone )
|
36
|
+
lex.add_wordform Wordform.new(normalize_forms("#{grundform}en"), "V_fin_pl_3_pres".to_sym, {}, semantic_component.clone )
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.add_german_noun(lex, semantic_component, gender, forms)
|
40
|
+
lex.add_wordform(Wordform.new(forms[0], "N_nom_sg_#{gender}_3".to_sym, {}, semantic_component.clone ))
|
41
|
+
lex.add_wordform(Wordform.new(forms[1], "N_gen_sg_#{gender}_3".to_sym, {}, semantic_component.clone ))
|
42
|
+
lex.add_wordform(Wordform.new(forms[2], "N_dat_sg_#{gender}_3".to_sym, {}, semantic_component.clone ))
|
43
|
+
lex.add_wordform(Wordform.new(forms[3], "N_acc_sg_#{gender}_3".to_sym, {}, semantic_component.clone ))
|
44
|
+
lex.add_wordform(Wordform.new(forms[4], "N_nom_pl_#{gender}_3".to_sym, {}, semantic_component.clone ))
|
45
|
+
lex.add_wordform(Wordform.new(forms[5], "N_gen_pl_#{gender}_3".to_sym, {}, semantic_component.clone ))
|
46
|
+
lex.add_wordform(Wordform.new(forms[6], "N_dat_pl_#{gender}_3".to_sym, {}, semantic_component.clone ))
|
47
|
+
lex.add_wordform(Wordform.new(forms[7], "N_acc_pl_#{gender}_3".to_sym, {}, semantic_component.clone ))
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.normalize_forms(form)
|
51
|
+
form = form.gsub(/ee/, 'e')
|
52
|
+
form
|
53
|
+
end
|
54
|
+
|
55
|
+
def self.define_grammar
|
56
|
+
include Pils::Parsing
|
57
|
+
g = Grammar.new()
|
58
|
+
sentence = Cat.new(:S)
|
59
|
+
g.starting_cats = [ sentence ]
|
60
|
+
|
61
|
+
# S -> NP VP, both numerus
|
62
|
+
NUM.each do |num|
|
63
|
+
PER.each do |per|
|
64
|
+
TMP.each do |tmp|
|
65
|
+
GEN.each do |gen|
|
66
|
+
r = Rule.new(Cat.new(:S), [ Cat.new("NP_nom_#{num}_#{gen}_#{per}".to_sym, :agens), Cat.new("VP_fin_#{num}_#{per}_#{tmp}".to_sym, :agens)], {})
|
67
|
+
g.rules << r
|
68
|
+
end
|
69
|
+
r = Rule.new(Cat.new("VP_fin_#{num}_#{per}_#{tmp}".to_sym, :agens), [ Cat.new("V_fin_#{num}_#{per}_#{tmp}".to_sym, :agens)], {})
|
70
|
+
g.rules << r
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# NP rules
|
76
|
+
CAS.each do |cas|
|
77
|
+
NUM.each do |num|
|
78
|
+
GEN.each do |gen|
|
79
|
+
#PER.each do |per|
|
80
|
+
r = Rule.new(Cat.new("NP_#{cas}_#{num}_#{gen}_3".to_sym), [ Cat.new("DETD_#{cas}_#{num}_#{gen}_3".to_sym), Cat.new("NPX_#{cas}_#{num}_#{gen}_3".to_sym)], {})
|
81
|
+
g.rules << r
|
82
|
+
r = Rule.new(Cat.new("NPX_#{cas}_#{num}_#{gen}_3".to_sym), [ Cat.new("N_#{cas}_#{num}_#{gen}_3".to_sym, :agens)], {})
|
83
|
+
g.rules << r
|
84
|
+
#r = FetaRule.new(FetaCat.new("NPX_#{cas}_#{num}_#{gen}_#{per}".to_sym), [ FetaCat.new("ADJA_#{cas}_#{num}_#{gen}_#{per}".to_sym), FetaCat.new("NPX_#{cas}_#{num}_#{gen}_#{per}".to_sym)], {})
|
85
|
+
#g.rules << r
|
86
|
+
#end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
return g
|
92
|
+
end
|
93
|
+
|
94
|
+
def self.define_lexicon
|
95
|
+
lexicon = Lexicon.new()
|
96
|
+
|
97
|
+
# Definite Artikel
|
98
|
+
|
99
|
+
defin = %w(der des dem den die der den die die der der die die der den die das des dem das die der den die)
|
100
|
+
|
101
|
+
id = 0
|
102
|
+
GEN.each do |gen|
|
103
|
+
NUM.each do |num|
|
104
|
+
CAS.each do |cas|
|
105
|
+
form = defin[id]
|
106
|
+
id = id + 1
|
107
|
+
# DETD_nom_sg_f_3
|
108
|
+
lexicon.add_wordform Wordform.new(form, "DETD_#{cas}_#{num}_#{gen}_3".to_sym, {}, {det: :yes})
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
# some nouns
|
114
|
+
|
115
|
+
# add_german_noun(lexicon, :car, :n, %w(Auto Autos Auto Auto Autos Autos Autos Autos))
|
116
|
+
add_german_noun(lexicon, {species: :dog}, :m, %w(Hund Hundes Hund Hund Hunde Hunde Hunden Hunde))
|
117
|
+
add_german_noun(lexicon, {species: :cat}, :f, %w(Katze Katze Katze Katze Katzen Katzen Katzen Katzen))
|
118
|
+
add_german_noun(lexicon, {species: :pig}, :n, %w(Schwein Schweins Schwein Schwein Schweine Schweine Schweinen Schweine))
|
119
|
+
add_german_verb(lexicon, "fauch", "fauch", "faucht", "faucht", {sound: :hiss} )
|
120
|
+
add_german_verb(lexicon, "miau", "miau", "miaut", "miaut", {sound: :meow} )
|
121
|
+
add_german_verb(lexicon, "bell", "bell", "bellt", "bellt", {sound: :bark} )
|
122
|
+
add_german_verb(lexicon, "grunz", "grunz", "grunzt", "grunzt", {sound: :oink} )
|
123
|
+
|
124
|
+
return lexicon
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
data/lib/pils/parsing.rb
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
# This file is part of Pils.
|
2
|
+
#
|
3
|
+
# Pils is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU Lesser General Public License as
|
5
|
+
# published by the Free Software Foundation, either version 3 of
|
6
|
+
# the License, or (at your option) any later version.
|
7
|
+
#
|
8
|
+
# Pils is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU Lesser General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the
|
14
|
+
# GNU Lesser General Public License along with Pils.
|
15
|
+
# If not, see <http://www.gnu.org/licenses/>.
|
16
|
+
|
17
|
+
# This module encapsulates all classes related to parsing natural
|
18
|
+
# language.
|
19
|
+
module Pils
|
20
|
+
module Parsing
|
21
|
+
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
require "pils/parsing/cat"
|
26
|
+
require "pils/parsing/rule"
|
27
|
+
require "pils/parsing/grammar"
|
28
|
+
require "pils/parsing/wordform"
|
29
|
+
require "pils/parsing/lexicon"
|
30
|
+
require "pils/parsing/tree"
|
31
|
+
require "pils/parsing/parser"
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# This file is part of Pils.
|
2
|
+
#
|
3
|
+
# Pils is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU Lesser General Public License as
|
5
|
+
# published by the Free Software Foundation, either version 3 of
|
6
|
+
# the License, or (at your option) any later version.
|
7
|
+
#
|
8
|
+
# Pils is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU Lesser General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the
|
14
|
+
# GNU Lesser General Public License along with Pils.
|
15
|
+
# If not, see <http://www.gnu.org/licenses/>.
|
16
|
+
|
17
|
+
|
18
|
+
module Pils
|
19
|
+
module Parsing
|
20
|
+
class Cat
|
21
|
+
attr_accessor :grammar
|
22
|
+
attr_accessor :rule
|
23
|
+
attr_accessor :cat
|
24
|
+
attr_accessor :feat
|
25
|
+
attr_accessor :descriptor
|
26
|
+
|
27
|
+
|
28
|
+
def initialize(new_cat, new_desc = nil, new_feat={})
|
29
|
+
@cat = new_cat
|
30
|
+
@descriptor = new_desc
|
31
|
+
@feat = Pils::Structures::Avm.new()
|
32
|
+
new_feat.each do |k,v|
|
33
|
+
@feat[k] = v
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# true if the grammar lets you expand this symbol
|
38
|
+
# def expandible?()
|
39
|
+
# grammar.rules.each do |r|
|
40
|
+
# return true if self < r.left
|
41
|
+
# end#
|
42
|
+
# end
|
43
|
+
|
44
|
+
# true if this cat matches other one.
|
45
|
+
def <(other)
|
46
|
+
return false if self.cat != other.cat
|
47
|
+
return self.feat < other.feat
|
48
|
+
end
|
49
|
+
|
50
|
+
def display
|
51
|
+
"%s" % [cat.to_s, feat.values.collect{|v| v.to_s}.join(',')]
|
52
|
+
end
|
53
|
+
|
54
|
+
def to_s
|
55
|
+
self.display
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# This file is part of Pils.
|
2
|
+
#
|
3
|
+
# Pils is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU Lesser General Public License as
|
5
|
+
# published by the Free Software Foundation, either version 3 of
|
6
|
+
# the License, or (at your option) any later version.
|
7
|
+
#
|
8
|
+
# Pils is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU Lesser General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the
|
14
|
+
# GNU Lesser General Public License along with Pils.
|
15
|
+
# If not, see <http://www.gnu.org/licenses/>.
|
16
|
+
|
17
|
+
|
18
|
+
module Pils
|
19
|
+
module Parsing
|
20
|
+
class Grammar
|
21
|
+
|
22
|
+
attr_accessor :starting_cats
|
23
|
+
attr_accessor :rules
|
24
|
+
|
25
|
+
def initialize()
|
26
|
+
@rules = []
|
27
|
+
@starting_cats = []
|
28
|
+
end
|
29
|
+
|
30
|
+
def expandible?(cat)
|
31
|
+
!@rules.find{|r| r.expandible?(cat)}.nil?
|
32
|
+
end
|
33
|
+
|
34
|
+
def expand(cat)
|
35
|
+
@rules.select{|r| r.expandible?(cat)}
|
36
|
+
end
|
37
|
+
|
38
|
+
def describe_rules
|
39
|
+
@rules.each do |rule|
|
40
|
+
Pils::log rule.display
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
@@ -0,0 +1,100 @@
|
|
1
|
+
# This file is part of Pils.
|
2
|
+
#
|
3
|
+
# Pils is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU Lesser General Public License as
|
5
|
+
# published by the Free Software Foundation, either version 3 of
|
6
|
+
# the License, or (at your option) any later version.
|
7
|
+
#
|
8
|
+
# Pils is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU Lesser General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the
|
14
|
+
# GNU Lesser General Public License along with Pils.
|
15
|
+
# If not, see <http://www.gnu.org/licenses/>.
|
16
|
+
|
17
|
+
|
18
|
+
module Pils
|
19
|
+
module Parsing
|
20
|
+
class Lexicon
|
21
|
+
|
22
|
+
attr_accessor :entries
|
23
|
+
attr_accessor :definite_articles
|
24
|
+
|
25
|
+
def initialize
|
26
|
+
@entries = Set.new
|
27
|
+
@definite_articles = [
|
28
|
+
'der', 'des', 'dem', 'den', 'die', 'der', 'den', 'die',
|
29
|
+
'die', 'der', 'der', 'die', 'die', 'der', 'den', 'die',
|
30
|
+
'das', 'des', 'dem', 'das', 'die', 'der', 'den', 'die'
|
31
|
+
]
|
32
|
+
@indefinite_articles = [
|
33
|
+
'ein', 'eines', 'einem', 'einen', '', '', '', '',
|
34
|
+
'eine', 'einer', 'einer', 'eine', '', '', '', '',
|
35
|
+
'ein', 'eines', 'einem', 'ein', '', '', '', ''
|
36
|
+
]
|
37
|
+
end
|
38
|
+
|
39
|
+
def definite_article(kase=:nom, number=:sg, gender=:m)
|
40
|
+
pos = 0
|
41
|
+
pos = 8 if gender==:f
|
42
|
+
pos = 16 if gender==:n
|
43
|
+
pos = pos + 4 if number==:pl
|
44
|
+
pos = pos + 1 if kase==:gen
|
45
|
+
pos = pos + 2 if kase==:dat
|
46
|
+
pos = pos + 3 if kase==:acc
|
47
|
+
@definite_articles[pos]
|
48
|
+
end
|
49
|
+
|
50
|
+
def indefinite_article(kase=:nom, number=:sg, gender=:m)
|
51
|
+
pos = 0
|
52
|
+
pos = 8 if gender==:f
|
53
|
+
pos = 16 if gender==:n
|
54
|
+
pos = pos + 4 if number==:pl
|
55
|
+
pos = pos + 1 if kase==:gen
|
56
|
+
pos = pos + 2 if kase==:dat
|
57
|
+
pos = pos + 3 if kase==:acc
|
58
|
+
@indefinite_articles[pos]
|
59
|
+
end
|
60
|
+
|
61
|
+
|
62
|
+
def add_wordform(new_form)
|
63
|
+
@entries << new_form
|
64
|
+
end
|
65
|
+
|
66
|
+
|
67
|
+
def get(params={})
|
68
|
+
temp = @entries
|
69
|
+
# Pils::log ">>>> " + temp.to_a.collect{|a| ("%s(%s)" % [a.form,a.cat])}.join(', ')
|
70
|
+
return {} if (temp.empty? || params[:form] =~ /^\s*$/ )
|
71
|
+
if params.has_key?(:form) && !(params[:form].nil?)
|
72
|
+
# Pils::log "§FORM"
|
73
|
+
# temp.each do |t|
|
74
|
+
# puts t
|
75
|
+
# puts t.form
|
76
|
+
# end
|
77
|
+
# puts params[:form]
|
78
|
+
temp = temp.select{|t| !(t.form.nil?) && t.form.downcase==params[:form].downcase}
|
79
|
+
end
|
80
|
+
if params.has_key?(:cat) && !(params[:cat].nil?)
|
81
|
+
# Pils::log "§CAT"
|
82
|
+
temp = temp.select{|t| t.cat==params[:cat]}
|
83
|
+
end
|
84
|
+
if params.has_key?(:grammar)
|
85
|
+
g = Pils::Structures::Avm.new(params[:grammar])
|
86
|
+
temp = temp.select{|t| g < t.grammar}
|
87
|
+
end
|
88
|
+
temp
|
89
|
+
end
|
90
|
+
|
91
|
+
|
92
|
+
def describe
|
93
|
+
@entries.each do |entry|
|
94
|
+
Pils::log "// %s" % [entry.display.to_s]
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
@@ -0,0 +1,310 @@
|
|
1
|
+
# This file is part of Pils.
|
2
|
+
#
|
3
|
+
# Pils is free software: you can redistribute it and/or modify
|
4
|
+
# it under the terms of the GNU Lesser General Public License as
|
5
|
+
# published by the Free Software Foundation, either version 3 of
|
6
|
+
# the License, or (at your option) any later version.
|
7
|
+
#
|
8
|
+
# Pils is distributed in the hope that it will be useful,
|
9
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
10
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
11
|
+
# GNU Lesser General Public License for more details.
|
12
|
+
#
|
13
|
+
# You should have received a copy of the
|
14
|
+
# GNU Lesser General Public License along with Pils.
|
15
|
+
# If not, see <http://www.gnu.org/licenses/>.
|
16
|
+
|
17
|
+
module Pils
|
18
|
+
module Parsing
|
19
|
+
class Parser
|
20
|
+
|
21
|
+
attr_accessor :tokens
|
22
|
+
|
23
|
+
attr_accessor :syntax
|
24
|
+
attr_accessor :semantics
|
25
|
+
|
26
|
+
attr_accessor :syntax_position
|
27
|
+
attr_accessor :token_position
|
28
|
+
|
29
|
+
attr_accessor :stack
|
30
|
+
|
31
|
+
attr_accessor :grammar
|
32
|
+
attr_accessor :lexicon
|
33
|
+
|
34
|
+
|
35
|
+
def tokenize(str)
|
36
|
+
str.split(/\s+/)
|
37
|
+
end
|
38
|
+
|
39
|
+
def init(ptokens)
|
40
|
+
# set start states
|
41
|
+
@tokens = ptokens
|
42
|
+
@token_position = 0
|
43
|
+
# syntactic state
|
44
|
+
# set state to first start symbol, stack to remaining start symbols
|
45
|
+
|
46
|
+
@syntax = Tree.new(@grammar.starting_cats.first, nil) # Pils::Parsing::
|
47
|
+
@syntax_position = 0
|
48
|
+
|
49
|
+
# semantic state
|
50
|
+
@semantics = {}
|
51
|
+
# stack
|
52
|
+
@stack = []
|
53
|
+
|
54
|
+
Pils::log @tokens
|
55
|
+
end
|
56
|
+
|
57
|
+
def word_substitutions
|
58
|
+
# current category in syntax
|
59
|
+
# current word
|
60
|
+
# is there a lexicon entry for form, cat, gram?
|
61
|
+
|
62
|
+
@lexicon_matches = @lexicon.get({
|
63
|
+
form: @tokens[@token_position],
|
64
|
+
cat: @head_cat.obj.cat,
|
65
|
+
grammar: @head_cat.obj.grammar
|
66
|
+
})
|
67
|
+
# Pils::log "Lexical matches: %i" % @lexicon_matches.count
|
68
|
+
# Pils::log "Lexical matches:\n - %s" % @lexicon_matches.collect{|m| m.display }.join("\n - ")
|
69
|
+
@lexicon_matches
|
70
|
+
end
|
71
|
+
|
72
|
+
def expand_cat
|
73
|
+
rules = @grammar.expand(@head_cat.obj)
|
74
|
+
# Pils::log "Erwartet wird: %s" % @head_cat.obj.display
|
75
|
+
# exit(0) if @head_cat.obj.display == "NP_nom_sg_m_3"
|
76
|
+
# Pils::log " Ahnen %s" % @head_cat.ancestors_from_root.collect{|a| a.obj }.join('>')
|
77
|
+
# Pils::log " Descriptor %s" % @head_path.join('>')
|
78
|
+
|
79
|
+
|
80
|
+
# Pils::log "Anwendbare Regeln: %i" % rules.count
|
81
|
+
# rules.each_with_index do |rule,ind|
|
82
|
+
# Pils::log " %3i - %s ==> %s"% [ ind, rule.left.display, rule.right.collect{|r| r.display}.join(' ')]
|
83
|
+
# end
|
84
|
+
|
85
|
+
first_rule, *rules_tail = *rules
|
86
|
+
|
87
|
+
if rules_tail.count>0
|
88
|
+
rules_tail.each do |rule|
|
89
|
+
# create a new alternative state.
|
90
|
+
new_syntax = @syntax.clone
|
91
|
+
new_semantics = @semantics.clone
|
92
|
+
# apply rule!
|
93
|
+
new_child_list = []
|
94
|
+
new_head_cat = new_syntax.leaf_at(@syntax_position)
|
95
|
+
rule.right.each do |x|
|
96
|
+
new_child_list << Tree.new(x)
|
97
|
+
end
|
98
|
+
new_head_cat.set_children(new_child_list, true)
|
99
|
+
new_state = [new_syntax, new_semantics, @syntax_position, @token_position]
|
100
|
+
@stack << new_state
|
101
|
+
end
|
102
|
+
end
|
103
|
+
# expand current tree
|
104
|
+
# Pils::log @syntax.display
|
105
|
+
new_child_list = []
|
106
|
+
first_rule.right.each do |x|
|
107
|
+
new_child_list << Tree.new(x)
|
108
|
+
end
|
109
|
+
@head_cat.set_children(new_child_list, true)
|
110
|
+
# Pils::log @syntax.display
|
111
|
+
sync
|
112
|
+
|
113
|
+
end
|
114
|
+
|
115
|
+
def sync
|
116
|
+
@head_cat = @syntax.leaf_at(@syntax_position)
|
117
|
+
if @head_cat
|
118
|
+
@head_path = @head_cat.ancestors_from_root.collect{|a| a.obj.descriptor}.reject{|d| d.nil?}
|
119
|
+
end
|
120
|
+
# Pils::log @head_cat.obj.display
|
121
|
+
|
122
|
+
end
|
123
|
+
|
124
|
+
def parse!(max_iterations=50)
|
125
|
+
@result = true
|
126
|
+
@iter = 0
|
127
|
+
puts @token_position
|
128
|
+
puts @tokens
|
129
|
+
while (@result===true || @result===false) && (@tokens.size-@token_position>=1) && @iter<max_iterations
|
130
|
+
@result = parse
|
131
|
+
@iter = @iter + 1
|
132
|
+
# Pils::log ''
|
133
|
+
# Pils::log '+---+'
|
134
|
+
# Pils::log '+%3i+' % @iter
|
135
|
+
# Pils::log '+---+'
|
136
|
+
|
137
|
+
end
|
138
|
+
# Pils::log Pils::log '+ --- +'
|
139
|
+
# Pils::log Pils::log @result
|
140
|
+
# Pils::log Pils::log '+ --- +'
|
141
|
+
# Pils::log @result.class.name
|
142
|
+
if @result.kind_of?(Hash)
|
143
|
+
@tokens.each_with_index do |token,n|
|
144
|
+
@result[:syntax].set_wordform_at(n, token)
|
145
|
+
end
|
146
|
+
end
|
147
|
+
return @result
|
148
|
+
end
|
149
|
+
|
150
|
+
def parse
|
151
|
+
# categorial replacement?
|
152
|
+
# gibt es für das Blatt an aktueller Position eine Expansion?
|
153
|
+
# Pils::log '*' * 48
|
154
|
+
# Pils::log display_tokens
|
155
|
+
# Pils::log '*' * 48
|
156
|
+
# Pils::log @syntax.display
|
157
|
+
# Pils::log '*' * 48
|
158
|
+
# Pils::log @head_cat.obj if @head_cat
|
159
|
+
# Pils::log '*' * 48
|
160
|
+
# Pils::log @syntax.leaf_count
|
161
|
+
# Pils::log @syntax_position
|
162
|
+
# Pils::log @syntax.leaf_at(@syntax_position-1).obj unless @syntax_position==0
|
163
|
+
# Pils::log '*' * 48
|
164
|
+
sync
|
165
|
+
# Pils::log @syntax.leaf_count
|
166
|
+
# Pils::log @syntax.leaves
|
167
|
+
# Pils::log @syntax.leaves.count
|
168
|
+
# Pils::log @syntax.leaves.reject{|l| l.obj.nil? }
|
169
|
+
# Pils::log @syntax.leaves.reject{|l| l.obj.nil? }.count
|
170
|
+
|
171
|
+
# Pils::log @syntax_position
|
172
|
+
(0..(@syntax.leaf_count-1)).each do |u|
|
173
|
+
current_leaf = @syntax.leaf_at(u)
|
174
|
+
# Pils::log " %2i %s" % [u, current_leaf]
|
175
|
+
# Pils::log " %s" % [u, current_leaf.obj]
|
176
|
+
end
|
177
|
+
# stack:
|
178
|
+
# Pils::log "STACK:"
|
179
|
+
#@stack.each do |n|
|
180
|
+
# Pils::log " -- %2i %2i %s" % [n[2], n[3], n[0].display]
|
181
|
+
#end
|
182
|
+
# Pils::log @syntax.leaf_at(@syntax_position-1).obj unless @syntax_position==0
|
183
|
+
# Pils::log '*' * 48
|
184
|
+
|
185
|
+
# Pils::log "# A. CATEGORY EXPANSION"
|
186
|
+
# Pils::log @head_cat
|
187
|
+
# gib fehler aus, wenn nach sync keine head cat da ist
|
188
|
+
if @head_cat.nil? && !(@token_position > (@tokens.count-1))
|
189
|
+
retrieve_from_stack
|
190
|
+
return false
|
191
|
+
end
|
192
|
+
|
193
|
+
# Pils::log " looking for expansions for #{@head_cat.obj}" if @head_cat
|
194
|
+
|
195
|
+
# Jetzt: Wenn es fuer diese Kategorie eine Ableitung gibt, fuehre diese durch.
|
196
|
+
success = false
|
197
|
+
if @head_cat
|
198
|
+
# Pils::log @grammar.expandible?(@head_cat.obj)
|
199
|
+
if @grammar.expandible?(@head_cat.obj)
|
200
|
+
# get first result.
|
201
|
+
# extend tree.
|
202
|
+
# set new tree as syntax.
|
203
|
+
expand_cat
|
204
|
+
success = true
|
205
|
+
end
|
206
|
+
return true if success
|
207
|
+
# Pils::log Pils::log "KEINE CATEXPANSION MOEGLICH. Daher jetzt Wortabgleich."
|
208
|
+
end
|
209
|
+
# Wortabgleich!
|
210
|
+
# Passt das aktuelle Wort auf das momentane Token?
|
211
|
+
# Wenn ja: Setze beide Positionen eins weiter.
|
212
|
+
# und trage die Semantik in die Semantikliste ein.
|
213
|
+
# Pils::log "# B. WORD EXPANSION"
|
214
|
+
# Pils::log " looking for a word for #{@head_cat.obj}"
|
215
|
+
subs = word_substitutions
|
216
|
+
if subs.count > 0
|
217
|
+
# wir haben Wortersetzungen! Nimm die erste. Erzeuge einen
|
218
|
+
# neuen Zustand im aktuellen System.
|
219
|
+
# Setze die restlichen in die Warteliste.
|
220
|
+
head, *tail = *subs
|
221
|
+
if tail
|
222
|
+
tail.each do |t|
|
223
|
+
new_syntax = @syntax.clone
|
224
|
+
new_semantics = @semantics.clone
|
225
|
+
new_state = [new_syntax, new_semantics, @syntax_position, @token_position]
|
226
|
+
@stack << new_state
|
227
|
+
end
|
228
|
+
# Pils::log "Stack: %i" % @stack.size
|
229
|
+
# @stack.each do |n|
|
230
|
+
# Pils::log " -- %2i %2i %s" % [n[2], n[3], n[0].display]
|
231
|
+
# end
|
232
|
+
end
|
233
|
+
if head
|
234
|
+
# obtain semantics for word form
|
235
|
+
# get path descriptor from tree
|
236
|
+
# Pils::log " --SEM- %s %s" % [head.form, head.semantics ]
|
237
|
+
semobj = @semantics
|
238
|
+
@head_path.each do |p|
|
239
|
+
semobj[p] = {} unless semobj.has_key?(p)
|
240
|
+
semobj = semobj[p]
|
241
|
+
end
|
242
|
+
|
243
|
+
head_path_descriptor = @head_path.collect{|h| "[:#{h}]"}.join('')
|
244
|
+
# Pils::log "HP : %s" % @head_path.to_s
|
245
|
+
# Pils::log "HPD: %s" % head_path_descriptor
|
246
|
+
# Pils::log @head_cat.ancestors.collect{|a| a.display}
|
247
|
+
# Pils::log @head_cat.ancestors_from_root.collect{|a| a.display}
|
248
|
+
|
249
|
+
head.semantics.keys.each do |key|
|
250
|
+
# Pils::log " @semantics#{head_path_descriptor}[key] = head.semantics[key]"
|
251
|
+
instance_eval("@semantics#{head_path_descriptor}[key] = head.semantics[key]")
|
252
|
+
# semobj[key] = head[key]
|
253
|
+
end
|
254
|
+
|
255
|
+
|
256
|
+
@token_position = @token_position + 1
|
257
|
+
@syntax_position = @syntax_position + 1
|
258
|
+
sync
|
259
|
+
# TODO: Store semantics
|
260
|
+
end
|
261
|
+
success = true
|
262
|
+
end
|
263
|
+
# Pils::log " now looking at #{@head_cat ? @head_cat.obj : 'END'}"
|
264
|
+
# Pils::log " token pos %i / %i " % [@token_position, (@tokens.count-1)]
|
265
|
+
# check if end is nigh
|
266
|
+
if @head_cat.nil? && @token_position > (@tokens.count-1)
|
267
|
+
# Pils::log "END IS NIGH!"
|
268
|
+
# Pils::log @syntax.display
|
269
|
+
# Pils::log @semantics # JSON.pretty_generate(@semantics)
|
270
|
+
return {syntax: @syntax.clone, semantics: @semantics.clone}
|
271
|
+
# exit(0)
|
272
|
+
end
|
273
|
+
return true if success
|
274
|
+
|
275
|
+
retrieve_from_stack
|
276
|
+
|
277
|
+
# Pils::log @syntax.display
|
278
|
+
# Pils::log "Now expecting: %s" % @syntax.leaf_at(@syntax_position).obj.to_s
|
279
|
+
|
280
|
+
# Pils::log @stack.size
|
281
|
+
return success
|
282
|
+
end
|
283
|
+
|
284
|
+
# gets the next possible state from stack, discards the current one.
|
285
|
+
def retrieve_from_stack
|
286
|
+
# Pils::log "We retrieve the next object from the stack. %i" % @stack.size
|
287
|
+
return nil if @stack.nil? || @stack.empty?
|
288
|
+
new_state, *state_rest = @stack
|
289
|
+
@stack = state_rest
|
290
|
+
@syntax = new_state[0]
|
291
|
+
@semantics = new_state[1]
|
292
|
+
@syntax_position = new_state[2]
|
293
|
+
@token_position = new_state[3]
|
294
|
+
sync
|
295
|
+
# Pils::log " becomes %i" % @stack.size
|
296
|
+
# @stack.each do |n|
|
297
|
+
# Pils::log " -- %2i %2i %s" % [n[2], n[3], n[0].display]
|
298
|
+
# end
|
299
|
+
end
|
300
|
+
|
301
|
+
|
302
|
+
def display_tokens
|
303
|
+
r = @tokens.slice(0,@token_position)
|
304
|
+
s = @tokens - r
|
305
|
+
t = [r, "*", s].flatten
|
306
|
+
t.join(" ")
|
307
|
+
end
|
308
|
+
end
|
309
|
+
end
|
310
|
+
end
|