shalmaneser-lib 1.2.rc5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.yardopts +10 -0
- data/CHANGELOG.md +4 -0
- data/LICENSE.md +4 -0
- data/README.md +122 -0
- data/lib/configuration/config_data.rb +457 -0
- data/lib/configuration/config_format_element.rb +210 -0
- data/lib/configuration/configuration_error.rb +15 -0
- data/lib/configuration/external_config_data.rb +56 -0
- data/lib/configuration/frappe_config_data.rb +134 -0
- data/lib/configuration/fred_config_data.rb +199 -0
- data/lib/configuration/rosy_config_data.rb +126 -0
- data/lib/db/db_interface.rb +50 -0
- data/lib/db/db_mysql.rb +141 -0
- data/lib/db/db_sqlite.rb +280 -0
- data/lib/db/db_table.rb +237 -0
- data/lib/db/db_view.rb +416 -0
- data/lib/db/db_wrapper.rb +175 -0
- data/lib/db/select_table_and_columns.rb +10 -0
- data/lib/db/sql_query.rb +243 -0
- data/lib/definitions.rb +19 -0
- data/lib/eval.rb +482 -0
- data/lib/ext/maxent/Classify.class +0 -0
- data/lib/ext/maxent/Train.class +0 -0
- data/lib/external_systems.rb +251 -0
- data/lib/framenet_format/fn_corpus_aset.rb +209 -0
- data/lib/framenet_format/fn_corpus_xml_file.rb +120 -0
- data/lib/framenet_format/fn_corpus_xml_sentence.rb +299 -0
- data/lib/framenet_format/fn_database.rb +143 -0
- data/lib/framenet_format/frame_xml_file.rb +104 -0
- data/lib/framenet_format/frame_xml_sentence.rb +411 -0
- data/lib/logging.rb +25 -0
- data/lib/ml/classifier.rb +189 -0
- data/lib/ml/mallet.rb +236 -0
- data/lib/ml/maxent.rb +229 -0
- data/lib/ml/optimize.rb +195 -0
- data/lib/ml/timbl.rb +140 -0
- data/lib/monkey_patching/array.rb +82 -0
- data/lib/monkey_patching/enumerable_bool.rb +24 -0
- data/lib/monkey_patching/enumerable_distribute.rb +18 -0
- data/lib/monkey_patching/file.rb +131 -0
- data/lib/monkey_patching/subsumed.rb +24 -0
- data/lib/ruby_class_extensions.rb +4 -0
- data/lib/salsa_tiger_xml/corpus.rb +24 -0
- data/lib/salsa_tiger_xml/fe_node.rb +98 -0
- data/lib/salsa_tiger_xml/file_parts_parser.rb +214 -0
- data/lib/salsa_tiger_xml/frame_node.rb +145 -0
- data/lib/salsa_tiger_xml/graph_node.rb +347 -0
- data/lib/salsa_tiger_xml/reg_xml.rb +285 -0
- data/lib/salsa_tiger_xml/salsa_tiger_sentence.rb +596 -0
- data/lib/salsa_tiger_xml/salsa_tiger_sentence_graph.rb +333 -0
- data/lib/salsa_tiger_xml/salsa_tiger_sentence_sem.rb +438 -0
- data/lib/salsa_tiger_xml/salsa_tiger_xml_helper.rb +84 -0
- data/lib/salsa_tiger_xml/salsa_tiger_xml_node.rb +161 -0
- data/lib/salsa_tiger_xml/sem_node.rb +58 -0
- data/lib/salsa_tiger_xml/string_terminals_in_right_order.rb +192 -0
- data/lib/salsa_tiger_xml/syn_node.rb +169 -0
- data/lib/salsa_tiger_xml/tree_node.rb +59 -0
- data/lib/salsa_tiger_xml/ts_syn_node.rb +47 -0
- data/lib/salsa_tiger_xml/usp_node.rb +72 -0
- data/lib/salsa_tiger_xml/xml_node.rb +163 -0
- data/lib/shalmaneser/lib.rb +1 -0
- data/lib/tabular_format/fn_tab_format_file.rb +38 -0
- data/lib/tabular_format/fn_tab_frame.rb +67 -0
- data/lib/tabular_format/fn_tab_sentence.rb +169 -0
- data/lib/tabular_format/tab_format_file.rb +91 -0
- data/lib/tabular_format/tab_format_named_args.rb +184 -0
- data/lib/tabular_format/tab_format_sentence.rb +119 -0
- data/lib/value_restriction.rb +49 -0
- metadata +131 -0
@@ -0,0 +1,84 @@
|
|
1
|
+
module STXML
|
2
|
+
# sp jul 05 05
|
3
|
+
#
|
4
|
+
# Static helper methods for SalsaTigerRegXML:
|
5
|
+
|
6
|
+
# - provide header and footer for Salsa/Tiger XML files
|
7
|
+
# - escape and unescape HTML entities
|
8
|
+
#
|
9
|
+
# changed KE nov 05:
|
10
|
+
# many methods moved to FrappeHelper
|
11
|
+
class SalsaTigerXMLHelper
|
12
|
+
# escape and unescape strings for representation in XML
|
13
|
+
@replacements = [
|
14
|
+
# ["''","""], # added by ines (09/03/09), might cause problems for unescape???
|
15
|
+
["&", "&"], # must be first for escaping, last for unescaping
|
16
|
+
["<", "<"],
|
17
|
+
[">", ">"],
|
18
|
+
["\"", "''"],
|
19
|
+
# ["\"","""],
|
20
|
+
# ["\'\'","""],
|
21
|
+
# ["\`\`","""],
|
22
|
+
["\'", "'"],
|
23
|
+
["\`\`", "''"],
|
24
|
+
# ["''","''"]
|
25
|
+
]
|
26
|
+
|
27
|
+
###
|
28
|
+
# get header of SalsaTigerXML files (as string)
|
29
|
+
def self.get_header
|
30
|
+
header = <<ENDOFHEADER
|
31
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
32
|
+
<corpus corpusname="corpus" target="">
|
33
|
+
<head>
|
34
|
+
<meta>
|
35
|
+
<format>NeGra format, version 3</format>
|
36
|
+
</meta>
|
37
|
+
<frames xmlns="http://www.clt-st.de/framenet/frame-database">
|
38
|
+
</frames>
|
39
|
+
<wordtags xmlns="http://www.clt-st.de/salsa/wordtags">
|
40
|
+
</wordtags>
|
41
|
+
<flags>
|
42
|
+
</flags>
|
43
|
+
<annotation>
|
44
|
+
<edgelabel>
|
45
|
+
</edgelabel>
|
46
|
+
<secedgelabel>
|
47
|
+
</secedgelabel>
|
48
|
+
</annotation>
|
49
|
+
</head>
|
50
|
+
<body>
|
51
|
+
ENDOFHEADER
|
52
|
+
|
53
|
+
header
|
54
|
+
end
|
55
|
+
|
56
|
+
###
|
57
|
+
# get footer of SALSATigerXML files (as string)
|
58
|
+
def self.get_footer
|
59
|
+
footer = <<ENDOFFOOTER
|
60
|
+
</body>
|
61
|
+
</corpus>
|
62
|
+
ENDOFFOOTER
|
63
|
+
|
64
|
+
footer
|
65
|
+
end
|
66
|
+
|
67
|
+
def self.escape(string)
|
68
|
+
@replacements.each do |unescaped, escaped|
|
69
|
+
string.gsub!(unescaped, escaped)
|
70
|
+
end
|
71
|
+
|
72
|
+
string
|
73
|
+
end
|
74
|
+
|
75
|
+
def self.unescape(string)
|
76
|
+
# reverse replacements to replace & last
|
77
|
+
@replacements.reverse_each do |unescaped, escaped|
|
78
|
+
string.gsub!(escaped, unescaped)
|
79
|
+
end
|
80
|
+
|
81
|
+
string
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,161 @@
|
|
1
|
+
require_relative 'xml_node'
|
2
|
+
require_relative 'string_terminals_in_right_order'
|
3
|
+
|
4
|
+
module STXML
|
5
|
+
#############
|
6
|
+
# class SalsaTigerXmlNode
|
7
|
+
#
|
8
|
+
# additional methods:
|
9
|
+
#
|
10
|
+
# is_terminal? true if this is a Tiger XML terminal node
|
11
|
+
#
|
12
|
+
# is_nonterminal? true if this is a Tiger XML nonterminal node
|
13
|
+
#
|
14
|
+
# is_splitword? true if this is a splitword part
|
15
|
+
#
|
16
|
+
# is_syntactic? true for terminal, nonterminal, splitword
|
17
|
+
#
|
18
|
+
# is_frame? true if this is a Salsa/Tiger XML frame
|
19
|
+
#
|
20
|
+
# is_target? true if this is a Salsa/Tiger XML frame target
|
21
|
+
#
|
22
|
+
# is_fe? true if this is a Salsa/Tiger XML frame element
|
23
|
+
#
|
24
|
+
# is_outside_sentence? returns false -- this node is not a placeholder for
|
25
|
+
# a node that is outside the current sentence
|
26
|
+
# (but see descendant class TSSynNode)
|
27
|
+
#
|
28
|
+
# yield_nodes returns the list of descendants thatare leaves of the tree
|
29
|
+
# NOTE: this overwrites the Graph.yield_nodes method
|
30
|
+
# since we have to treat splitwords in a special way
|
31
|
+
# empty array if no yield nodes are present
|
32
|
+
#
|
33
|
+
# yield_nodes_ordered returns those descendants ordered by precedence
|
34
|
+
# in the sentence, i.e. their node IDs.
|
35
|
+
#
|
36
|
+
# sid returns the sentence ID of this node
|
37
|
+
#
|
38
|
+
# to_s returns the yield of this node as a string of space-separated words
|
39
|
+
# words ordered left to right
|
40
|
+
#
|
41
|
+
class SalsaTigerXmlNode < XMLNode
|
42
|
+
include StringTerminalsInRightOrder
|
43
|
+
|
44
|
+
###
|
45
|
+
# extracting the ID from a RegXML element
|
46
|
+
# depends on whether it has an ID or an IDref
|
47
|
+
#
|
48
|
+
# returns: a string, the ID, or nil if none was found
|
49
|
+
def self.xmlel_id(xml_obj) # RegXML object
|
50
|
+
case xml_obj.name
|
51
|
+
when "edge", "fenode", "uspitem", "splitword", "other_edge"
|
52
|
+
# contains ID ref
|
53
|
+
xml_obj.attributes["idref"]
|
54
|
+
when "part"
|
55
|
+
# contains ID
|
56
|
+
xml_obj.attributes["id"]
|
57
|
+
else
|
58
|
+
# something else
|
59
|
+
# default: ID is in attribute "id"
|
60
|
+
xml_obj.attributes["id"]
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
###
|
65
|
+
# RegXML object or text
|
66
|
+
def initialize(xml)
|
67
|
+
if xml.text?
|
68
|
+
# text
|
69
|
+
super(xml, nil, nil, true)
|
70
|
+
else
|
71
|
+
# xml element
|
72
|
+
super(xml.name, xml.attributes, SalsaTigerXmlNode.xmlel_id(xml), false)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
###
|
77
|
+
def is_terminal?
|
78
|
+
get_f("name") == "t"
|
79
|
+
end
|
80
|
+
|
81
|
+
###
|
82
|
+
def is_nonterminal?
|
83
|
+
get_f("name") == "nt"
|
84
|
+
end
|
85
|
+
|
86
|
+
###
|
87
|
+
def is_splitword?
|
88
|
+
get_f("name") == "part"
|
89
|
+
end
|
90
|
+
|
91
|
+
###
|
92
|
+
def is_syntactic?
|
93
|
+
is_terminal? || is_nonterminal? || is_splitword?
|
94
|
+
end
|
95
|
+
|
96
|
+
###
|
97
|
+
def is_frame?
|
98
|
+
get_f("name") == "frame"
|
99
|
+
end
|
100
|
+
|
101
|
+
###
|
102
|
+
def is_target?
|
103
|
+
get_f("name") == "target"
|
104
|
+
end
|
105
|
+
|
106
|
+
###
|
107
|
+
def is_fe?
|
108
|
+
get_f("name") == "fe"
|
109
|
+
end
|
110
|
+
|
111
|
+
###
|
112
|
+
def sid
|
113
|
+
# my node ID starts out with the sentence ID
|
114
|
+
id =~ /^(.*?)_/
|
115
|
+
return $1
|
116
|
+
end
|
117
|
+
|
118
|
+
###
|
119
|
+
def is_outside_sentence?
|
120
|
+
false
|
121
|
+
end
|
122
|
+
|
123
|
+
###
|
124
|
+
def yield_nodes
|
125
|
+
# special consideration: splitwords do not count as children!
|
126
|
+
if children.reject { |c| c.is_splitword? }.empty?
|
127
|
+
return [self]
|
128
|
+
end
|
129
|
+
|
130
|
+
arr = []
|
131
|
+
children.reject { |c| c.is_splitword? }.each { |c|
|
132
|
+
if c.children.reject(&:is_splitword?).empty?
|
133
|
+
arr << c
|
134
|
+
else
|
135
|
+
arr.concat c.yield_nodes
|
136
|
+
end
|
137
|
+
}
|
138
|
+
|
139
|
+
arr
|
140
|
+
end
|
141
|
+
|
142
|
+
###
|
143
|
+
def yield_nodes_ordered # legacy name
|
144
|
+
# sort_terminals_and_splitwords_... cannot deal with nonterminals
|
145
|
+
# so remove and attach to the end of the chain
|
146
|
+
t, nt = yield_nodes.distribute { |x| x.is_terminal? || x.is_splitword? }
|
147
|
+
return sort_terminals_and_splitwords_left_to_right(t).concat(nt)
|
148
|
+
end
|
149
|
+
|
150
|
+
###
|
151
|
+
# name parallel to the method of SalsaTigerSentence
|
152
|
+
def terminals_sorted
|
153
|
+
return yield_nodes_ordered
|
154
|
+
end
|
155
|
+
|
156
|
+
###
|
157
|
+
def to_s
|
158
|
+
string_for_node(self)
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
require_relative 'salsa_tiger_xml_node'
|
2
|
+
|
3
|
+
module STXML
|
4
|
+
#############
|
5
|
+
# class SemNode
|
6
|
+
#
|
7
|
+
# common superclass for FrameNode and FeNode,
|
8
|
+
# with methods that are the same for both:
|
9
|
+
#
|
10
|
+
#
|
11
|
+
# is_usp? returns true if the frame/FE is involved in underspecification,
|
12
|
+
# else false
|
13
|
+
#
|
14
|
+
# flags returns an array of all the frame/FE flags for this node.
|
15
|
+
# members of the array are strings describing the flags
|
16
|
+
# that have been set to true
|
17
|
+
#
|
18
|
+
# add_flag add or remove a frame/FE flag
|
19
|
+
# remove_flag
|
20
|
+
class SemNode < SalsaTigerXmlNode
|
21
|
+
attr_reader :flags
|
22
|
+
|
23
|
+
def initialize(xml) # RegXML object or text
|
24
|
+
super(xml)
|
25
|
+
# flags: array of FlagNode objects
|
26
|
+
@flags = []
|
27
|
+
end
|
28
|
+
|
29
|
+
###
|
30
|
+
def is_usp?
|
31
|
+
get_attribute("usp") == "yes"
|
32
|
+
end
|
33
|
+
|
34
|
+
###
|
35
|
+
def add_flag(name) # string: flag name
|
36
|
+
@flags << name
|
37
|
+
end
|
38
|
+
|
39
|
+
###
|
40
|
+
def remove_flag(name) # string: flag name
|
41
|
+
@flags.delete(name)
|
42
|
+
end
|
43
|
+
|
44
|
+
#############
|
45
|
+
protected
|
46
|
+
|
47
|
+
def get_xml_embedded
|
48
|
+
super() + get_xml_offlags
|
49
|
+
end
|
50
|
+
|
51
|
+
def get_xml_offlags
|
52
|
+
# and add flags
|
53
|
+
@flags.map do |flagname|
|
54
|
+
"<flag name=\'#{xml_secure_val(flagname)}\'/>\n"
|
55
|
+
end.join
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,192 @@
|
|
1
|
+
module STXML
|
2
|
+
#########
|
3
|
+
# module StringTerminalsInRightOrder
|
4
|
+
#
|
5
|
+
# returns the yield of a node, or a list of nodes, as a string
|
6
|
+
# of " "-separated words
|
7
|
+
#
|
8
|
+
# Words are put into the right order, left to right,
|
9
|
+
# under the assumption that their node IDs reflect that order
|
10
|
+
#
|
11
|
+
# Terminal nodes are assumed to have IDs ending in a number,
|
12
|
+
# numbered from left to right
|
13
|
+
#
|
14
|
+
# Splitword nodes are assumed to have IDs ending in N_sM
|
15
|
+
# for numbers N and M, where N orders terminals left to right
|
16
|
+
# and M orders the splitword parts left to right
|
17
|
+
#
|
18
|
+
# If the yield of the node/the list of nodes contains all splitwords of a terminal,
|
19
|
+
# the whole terminal is taken instead
|
20
|
+
#
|
21
|
+
# methods:
|
22
|
+
#
|
23
|
+
# string_for_node returns the string for the yield of a node
|
24
|
+
# node: a node object
|
25
|
+
#
|
26
|
+
# string_for_nodes returns the string for the yield of a list of nodes
|
27
|
+
# nodes: a list of node objects
|
28
|
+
module StringTerminalsInRightOrder
|
29
|
+
def string_for_node(node)
|
30
|
+
string_for_nodes([node])
|
31
|
+
end
|
32
|
+
|
33
|
+
def string_for_nodes(nodes)
|
34
|
+
a = right_level_terminals_for_nodes(nodes)
|
35
|
+
a = sort_terminals_and_splitwords_left_to_right(a)
|
36
|
+
return node_array_to_string(a)
|
37
|
+
end
|
38
|
+
|
39
|
+
#####
|
40
|
+
private
|
41
|
+
|
42
|
+
# right_level_terminals_for_nodes:
|
43
|
+
# - compute the yield for each element of 'nodes'
|
44
|
+
# - then consider all splitwords in the yield:
|
45
|
+
# if all splitwords of a terminal are in the yield,
|
46
|
+
# then use the terminal rather than its splitwords
|
47
|
+
def right_level_terminals_for_nodes(nodes)
|
48
|
+
a = nodes.map { |n| n.yield_nodes}.flatten
|
49
|
+
b = []
|
50
|
+
a.each { |n|
|
51
|
+
if n.is_splitword?
|
52
|
+
# see if a contains all parts of this splitword
|
53
|
+
# if so, take into b the splitword's parent, the terminal,
|
54
|
+
# rather than the individual splitwords
|
55
|
+
|
56
|
+
if n.parent.nil?
|
57
|
+
# splitword without a parent
|
58
|
+
b << n
|
59
|
+
elsif b.include? n.parent or a.include? n.parent
|
60
|
+
# did we already include the splitword's parent in b?
|
61
|
+
# then we're done
|
62
|
+
else
|
63
|
+
|
64
|
+
# check if all children of n.parent are in 'a'
|
65
|
+
all_in = true
|
66
|
+
n.parent.each_child { |nsibling|
|
67
|
+
unless a.include? nsibling
|
68
|
+
all_in = false
|
69
|
+
break
|
70
|
+
end
|
71
|
+
}
|
72
|
+
|
73
|
+
if all_in
|
74
|
+
# yes, all children of n.parent are in 'a'
|
75
|
+
b << n.parent
|
76
|
+
else
|
77
|
+
# no, some sibling of n is not in 'a'
|
78
|
+
b << n
|
79
|
+
end
|
80
|
+
end
|
81
|
+
elsif n.is_terminal?
|
82
|
+
# n is a terminal
|
83
|
+
b << n
|
84
|
+
# if n is anything but a splitword or a terminal,
|
85
|
+
# ignore it
|
86
|
+
end
|
87
|
+
}
|
88
|
+
return b.uniq
|
89
|
+
end
|
90
|
+
|
91
|
+
# sort_terminals_and_splitwords_left_to_right:
|
92
|
+
# take an array of nodes that consists of terminals and splitwords
|
93
|
+
# and sort them using the following comparison:
|
94
|
+
# - when comparing two terminals, use the
|
95
|
+
# last numbers in their respective IDs
|
96
|
+
# - when comparing two splitwords, their IDs end in _N_sM
|
97
|
+
# for numbers N and M.
|
98
|
+
# If they coincide in N, compare them by M,
|
99
|
+
# else compare them by M
|
100
|
+
# - when comparing a terminal and a splitword,
|
101
|
+
# compare the terminal's last number to the splitword's N
|
102
|
+
def sort_terminals_and_splitwords_left_to_right(nodes)
|
103
|
+
nodes.sort { |a, b|
|
104
|
+
if a.is_splitword? and b.is_splitword?
|
105
|
+
compare_splitwords(a, b)
|
106
|
+
elsif a.is_terminal? and b.is_terminal?
|
107
|
+
compare_terminals(a, b)
|
108
|
+
else
|
109
|
+
compare_mixed(a, b)
|
110
|
+
end
|
111
|
+
}
|
112
|
+
end
|
113
|
+
|
114
|
+
# node_array_to_string:
|
115
|
+
# 'nodes' is an array of node objects, each of which offer a "word" method
|
116
|
+
# string their words together separated by " "
|
117
|
+
def node_array_to_string(nodes)
|
118
|
+
s = ""
|
119
|
+
nodes.each { |n|
|
120
|
+
s = s + n.word + " "
|
121
|
+
}
|
122
|
+
return s
|
123
|
+
end
|
124
|
+
|
125
|
+
# - when comparing two terminals, use the
|
126
|
+
# last numbers in their respective IDs
|
127
|
+
def compare_terminals(a, b)
|
128
|
+
last_i(a) <=> last_i(b)
|
129
|
+
end
|
130
|
+
|
131
|
+
# - when comparing two splitwords, their IDs end in _N_sM
|
132
|
+
# for numbers N and M.
|
133
|
+
# If they coincide in N, compare them by M,
|
134
|
+
# else compare them by M
|
135
|
+
def compare_splitwords(a, b)
|
136
|
+
if splitword_terminal_i(a) == splitword_terminal_i(b)
|
137
|
+
# parts of same terminal?
|
138
|
+
# compare parts
|
139
|
+
last_i(a) <=> last_i(b)
|
140
|
+
else
|
141
|
+
# not parts of same terminal?
|
142
|
+
# compare terminals
|
143
|
+
splitword_terminal_i(a) <=> splitword_terminal_i(b)
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
# - when comparing a terminal and a splitword,
|
148
|
+
# compare the terminal's last number to the splitword's N
|
149
|
+
def compare_mixed(a, b)
|
150
|
+
if a.is_splitword? and b.is_terminal?
|
151
|
+
splitword_terminal_i(a) <=> last_i(b)
|
152
|
+
|
153
|
+
elsif a.is_terminal? and b.is_splitword?
|
154
|
+
last_i(a) <=> splitword_terminal_i(b)
|
155
|
+
else
|
156
|
+
# not one terminal, one splitword?
|
157
|
+
# then what?
|
158
|
+
$stderr.print "SalsaTigerSentence, compare_mixed: confused by "
|
159
|
+
$stderr.print a.id, ", ", b.id, "\n"
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
# return last number of the ID of a node
|
164
|
+
def last_i(n)
|
165
|
+
n.id =~ /(\d+)$/ # match final string of digits
|
166
|
+
if $1.nil? # if shouldn't happen _in principle_
|
167
|
+
# but we might get weird node IDs for splitwords;
|
168
|
+
# so we act gracefully and catch the case where there
|
169
|
+
# is one final letter behind the digits
|
170
|
+
n.id =~ /(\d+)\w$/
|
171
|
+
end
|
172
|
+
if $1.nil? # this shouldn't ever happen
|
173
|
+
$stderr.print "SalsaTigerSentence, last_i: Couldn't extract digits from: "
|
174
|
+
$stderr.print n.id, "\n"
|
175
|
+
exit 1
|
176
|
+
end
|
177
|
+
return $1.to_i # and return it as number
|
178
|
+
end
|
179
|
+
|
180
|
+
# assume the ID of the node includes N_sM
|
181
|
+
# return N
|
182
|
+
def splitword_terminal_i(n)
|
183
|
+
n.id =~ /(\d+)_s\d*/ # match string of digits before splitword ID
|
184
|
+
if $1.nil? # this shouldn't ever happen
|
185
|
+
$stderr.print "SalsaTigerSentence, splitword_terminal_i: Couldn't extract digits from: "
|
186
|
+
$stderr.print n.id, "\n"
|
187
|
+
exit 1
|
188
|
+
end
|
189
|
+
return $1.to_i # and return it as number
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|