shalmaneser 1.2.0.rc4 → 1.2.rc5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (115) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +47 -18
  3. data/bin/shalmaneser +8 -2
  4. data/doc/index.md +1 -0
  5. data/lib/shalmaneser/opt_parser.rb +68 -67
  6. metadata +49 -119
  7. data/bin/fred +0 -16
  8. data/bin/frprep +0 -34
  9. data/bin/rosy +0 -17
  10. data/lib/common/AbstractSynInterface.rb +0 -1229
  11. data/lib/common/Counter.rb +0 -18
  12. data/lib/common/EnduserMode.rb +0 -27
  13. data/lib/common/Eval.rb +0 -480
  14. data/lib/common/FixSynSemMapping.rb +0 -196
  15. data/lib/common/Graph.rb +0 -345
  16. data/lib/common/ISO-8859-1.rb +0 -24
  17. data/lib/common/ML.rb +0 -186
  18. data/lib/common/Mallet.rb +0 -236
  19. data/lib/common/Maxent.rb +0 -229
  20. data/lib/common/Optimise.rb +0 -195
  21. data/lib/common/Parser.rb +0 -213
  22. data/lib/common/RegXML.rb +0 -269
  23. data/lib/common/RosyConventions.rb +0 -171
  24. data/lib/common/STXmlTerminalOrder.rb +0 -194
  25. data/lib/common/SalsaTigerRegXML.rb +0 -2347
  26. data/lib/common/SalsaTigerXMLHelper.rb +0 -99
  27. data/lib/common/SynInterfaces.rb +0 -282
  28. data/lib/common/TabFormat.rb +0 -721
  29. data/lib/common/Tiger.rb +0 -1448
  30. data/lib/common/Timbl.rb +0 -144
  31. data/lib/common/Tree.rb +0 -61
  32. data/lib/common/config_data.rb +0 -470
  33. data/lib/common/config_format_element.rb +0 -220
  34. data/lib/common/headz.rb +0 -338
  35. data/lib/common/option_parser.rb +0 -13
  36. data/lib/common/prep_config_data.rb +0 -62
  37. data/lib/common/prep_helper.rb +0 -1330
  38. data/lib/common/ruby_class_extensions.rb +0 -310
  39. data/lib/db/db_interface.rb +0 -48
  40. data/lib/db/db_mysql.rb +0 -145
  41. data/lib/db/db_sqlite.rb +0 -280
  42. data/lib/db/db_table.rb +0 -239
  43. data/lib/db/db_wrapper.rb +0 -176
  44. data/lib/db/sql_query.rb +0 -243
  45. data/lib/ext/maxent/Classify.class +0 -0
  46. data/lib/ext/maxent/Train.class +0 -0
  47. data/lib/fred/Baseline.rb +0 -150
  48. data/lib/fred/FileZipped.rb +0 -31
  49. data/lib/fred/FredBOWContext.rb +0 -877
  50. data/lib/fred/FredConventions.rb +0 -232
  51. data/lib/fred/FredDetermineTargets.rb +0 -319
  52. data/lib/fred/FredEval.rb +0 -312
  53. data/lib/fred/FredFeatureExtractors.rb +0 -322
  54. data/lib/fred/FredFeatures.rb +0 -1061
  55. data/lib/fred/FredFeaturize.rb +0 -602
  56. data/lib/fred/FredNumTrainingSenses.rb +0 -27
  57. data/lib/fred/FredParameters.rb +0 -402
  58. data/lib/fred/FredSplit.rb +0 -84
  59. data/lib/fred/FredSplitPkg.rb +0 -180
  60. data/lib/fred/FredTest.rb +0 -606
  61. data/lib/fred/FredTrain.rb +0 -144
  62. data/lib/fred/PlotAndREval.rb +0 -480
  63. data/lib/fred/fred.rb +0 -47
  64. data/lib/fred/fred_config_data.rb +0 -185
  65. data/lib/fred/md5.rb +0 -23
  66. data/lib/fred/opt_parser.rb +0 -250
  67. data/lib/frprep/Ampersand.rb +0 -39
  68. data/lib/frprep/CollinsInterface.rb +0 -1165
  69. data/lib/frprep/Counter.rb +0 -18
  70. data/lib/frprep/FNCorpusXML.rb +0 -643
  71. data/lib/frprep/FNDatabase.rb +0 -144
  72. data/lib/frprep/FrameXML.rb +0 -513
  73. data/lib/frprep/Graph.rb +0 -345
  74. data/lib/frprep/MiniparInterface.rb +0 -1388
  75. data/lib/frprep/RegXML.rb +0 -269
  76. data/lib/frprep/STXmlTerminalOrder.rb +0 -194
  77. data/lib/frprep/SleepyInterface.rb +0 -384
  78. data/lib/frprep/TntInterface.rb +0 -44
  79. data/lib/frprep/TreetaggerInterface.rb +0 -327
  80. data/lib/frprep/do_parses.rb +0 -143
  81. data/lib/frprep/frprep.rb +0 -693
  82. data/lib/frprep/interfaces/berkeley_interface.rb +0 -372
  83. data/lib/frprep/interfaces/stanford_interface.rb +0 -353
  84. data/lib/frprep/interpreters/berkeley_interpreter.rb +0 -22
  85. data/lib/frprep/interpreters/stanford_interpreter.rb +0 -22
  86. data/lib/frprep/one_parsed_file.rb +0 -28
  87. data/lib/frprep/opt_parser.rb +0 -94
  88. data/lib/frprep/ruby_class_extensions.rb +0 -310
  89. data/lib/rosy/AbstractFeatureAndExternal.rb +0 -242
  90. data/lib/rosy/ExternalConfigData.rb +0 -58
  91. data/lib/rosy/FailedParses.rb +0 -130
  92. data/lib/rosy/FeatureInfo.rb +0 -242
  93. data/lib/rosy/GfInduce.rb +0 -1115
  94. data/lib/rosy/GfInduceFeature.rb +0 -148
  95. data/lib/rosy/InputData.rb +0 -294
  96. data/lib/rosy/RosyConfusability.rb +0 -338
  97. data/lib/rosy/RosyEval.rb +0 -465
  98. data/lib/rosy/RosyFeatureExtractors.rb +0 -1609
  99. data/lib/rosy/RosyFeaturize.rb +0 -281
  100. data/lib/rosy/RosyInspect.rb +0 -336
  101. data/lib/rosy/RosyIterator.rb +0 -478
  102. data/lib/rosy/RosyPhase2FeatureExtractors.rb +0 -230
  103. data/lib/rosy/RosyPruning.rb +0 -165
  104. data/lib/rosy/RosyServices.rb +0 -744
  105. data/lib/rosy/RosySplit.rb +0 -232
  106. data/lib/rosy/RosyTask.rb +0 -19
  107. data/lib/rosy/RosyTest.rb +0 -829
  108. data/lib/rosy/RosyTrain.rb +0 -234
  109. data/lib/rosy/RosyTrainingTestTable.rb +0 -787
  110. data/lib/rosy/TargetsMostFrequentFrame.rb +0 -60
  111. data/lib/rosy/View.rb +0 -418
  112. data/lib/rosy/opt_parser.rb +0 -379
  113. data/lib/rosy/rosy.rb +0 -78
  114. data/lib/rosy/rosy_config_data.rb +0 -121
  115. data/lib/shalmaneser/version.rb +0 -3
@@ -1,195 +0,0 @@
1
- # sp 29 07 04
2
- # "optimise" c4.5 files by replacing all feature values which only
3
- # occur with one label by a new, common value.
4
- #
5
- # two modes of operation:
6
- # optimise <file> -- optimise file and store optimisations in <file>.opts
7
- # optimise <file> <file.opts> -- apply optimisation from file.opts to file
8
-
9
- class Optimise
10
-
11
- def initialize
12
- @ready = false
13
- end
14
-
15
- def init_from_data(infile) # find new optimisation
16
-
17
- STDERR.puts "[Optimise] computing new feature optimisation"
18
-
19
- infile = File.new(infile)
20
- labels = Array.new
21
- features = nil
22
- @replacements = Array.new # for each feature, store the list of replacements
23
-
24
- # read data from infile into hash and initialise replacements array
25
- while (line = infile.gets)
26
- f_l = line.chomp.split(",")
27
-
28
- if features.nil? # first line: initialisation
29
- features = Array.new # for each feature: array of feature values from file
30
- f_l.each_index {|i|
31
- features[i] = Array.new
32
- @replacements[i] = Hash.new
33
- }
34
- end
35
- labels << f_l.pop
36
- f_l.each_index {|i|
37
- features[i] << f_l[i]
38
- }
39
- end
40
- infile.close
41
-
42
- features.each_index {|findex| # traverse all features
43
-
44
- # for each feature *value*, find all label indices
45
-
46
- fvalues = features[findex]
47
-
48
- fval_to_label = Hash.new # record fval -> label mappings
49
- # no label : nil
50
- # one label: <label>
51
- # two labels: false
52
-
53
- fvalues.each_index {|inst_idx|
54
- label = labels[inst_idx] # current label
55
- fval = fvalues[inst_idx] # current feature value
56
- seen_label = fval_to_label[fval] # previously seen label
57
- if seen_label.nil?
58
- fval_to_label[fval] = label
59
- elsif seen_label and seen_label != label
60
- fval_to_label[fval] = false
61
- end
62
- } # at the end, all fvals should be mapped to either <label> or false
63
-
64
- # construct new feature value names
65
-
66
- new_fvals = Hash.new
67
- labels.each {|label|
68
- new_fvals[label] = "f"+findex.to_s+"_"+label.gsub(/\./,"")
69
- }
70
-
71
- # record all features values for which we have only seen one label in @replacements
72
-
73
- fval_to_label.each_pair {|fval,label|
74
- if fval == "[U]"
75
- puts "[U]: "+label.to_s+" "+new_fvals[label]
76
- end
77
- if label
78
- # STDERR.puts "replacement of "+fval+" by "+new_fvals[label]
79
- @replacements[findex][fval] = new_fvals[label]
80
- end
81
- }
82
-
83
- # fvalues = features[findex]
84
-
85
- # l_to_v = Hash.new # label -> array of feature values
86
- # v_to_l = Hash.new # feature value -> array of labels
87
-
88
- # fvalues.each_index {|inst| # traverse all instances
89
- # fval = fvalues[inst]
90
- # label = labels[inst]
91
-
92
-
93
- # unless v_to_l.key?(fval) # add entry to v_to_l
94
- # v_to_l[fval] = Array.new
95
- # end
96
- # v_to_l[fval] << label
97
-
98
- # unless l_to_v.key?(label) # add entry to l_to_v
99
- # l_to_v[label] = Array.new
100
- # end
101
- # l_to_v[label] << fval
102
- # }
103
-
104
- # l_to_v.each_pair {|label,values|
105
- # newvalue = "f"+findex.to_s+"_"+label.gsub(/\./,"")
106
- # values.each {|value|
107
- # if v_to_l[value].uniq.length == 1
108
- # @replacements[findex][value] = newvalue
109
- # end
110
- # }
111
- # }
112
- }
113
- @ready = true
114
- end
115
-
116
- def init_from_file(optsfile) # use old optimisation
117
- optsinfile = File.new(optsfile)
118
- @replacements = read(optsinfile)
119
- optsinfile.close
120
- @ready = true
121
- end
122
-
123
- def store(outfilename) # store data necessary to recreate optimisation
124
- unless @ready
125
- raise "[Optimise] Error: Cannot store un-initialised optimisation"
126
- end
127
- outfile = File.new(outfilename,"w")
128
- @replacements.each_index {|i| # for each feature
129
- reps = @replacements[i]
130
- outfile.puts "<"+i.to_s+">"
131
- reps.each_pair{|old,new|
132
- outfile.puts [old,new].join("\t")
133
- }
134
- outfile.puts "</"+i.to_s+">"
135
- }
136
- outfile.close
137
- end
138
-
139
- def apply(infilename,outfilename)
140
- unless @ready
141
- raise "[Optimise] Error: Cannot apply un-initialised optimisation"
142
- end
143
-
144
- STDERR.puts "[Optimise] applying feature optimisation"
145
-
146
- infile = File.new(infilename)
147
- outfile = File.new(outfilename,"w")
148
- features = Array.new
149
- labels = Array.new
150
-
151
-
152
- while (line = infile.gets)
153
- tokens = line.chomp.split(",")
154
-
155
- unless tokens.length == @replacements.length
156
- raise "[Optimise] Error: trying to optimise incompatible feature file!\nFile has "+features.length.to_s+" features, and we know replacements for "+@replacements.length.to_s+" features."
157
- end
158
-
159
- label = tokens.pop
160
- tokens.each_index {|f_idx|
161
- fval = tokens[f_idx]
162
- if @replacements[f_idx].key?(fval)
163
- tokens[f_idx] = @replacements[f_idx][fval]
164
- end
165
- }
166
- tokens.push label
167
- outfile.puts tokens.join(",")
168
- end
169
- outfile.close
170
- end
171
-
172
- private
173
-
174
- def read(infile)
175
- @replacements = Array.new
176
- while line = infile.gets
177
- line.chomp!
178
- if line =~ /<(\d+)>/
179
- reps = Hash.new
180
- elsif line =~ /<\/(\d+)>/
181
- @replacements[$1.to_i] = reps
182
- else
183
- tokens = line.chomp.split("\t")
184
- reps[tokens[0]] = tokens[1]
185
- end
186
- end
187
- infile.close
188
- end
189
-
190
- # return recommended filename to store optimisation patterns for basefile
191
- def Optimise.recommended_filename(basefile)
192
- return basefile+".optimisations"
193
- end
194
-
195
- end
@@ -1,213 +0,0 @@
1
- # Alexander Koller 2003
2
- # extended Katrin Erk June 2003
3
- #
4
- # Classes that return a list of sentence DOMs, from various sources
5
- #
6
- # Each class in this file defines the following methods:
7
- #
8
- # initialize(...) "..." depends on the class
9
- # extractDOMs() return list of all s nodes as DOM objects
10
- # each_s() iterate over s nodes; may take less memory
11
-
12
-
13
- require "rexml/document"
14
-
15
- class FileParser
16
-
17
- include REXML
18
-
19
- def initialize(filename)
20
- @file = File.new(filename)
21
- @doc = nil
22
- end
23
-
24
- # returns an array of DOMs for the sentences
25
- def extractDOMs()
26
- ensureParsedDocument()
27
- @doc.get_elements("/corpus/body/s")
28
- end
29
-
30
- # Iterates over all sentence nodes. This may be more memory
31
- # efficient than using extractDOMs(), but isn't in this case.
32
- def each_s()
33
- extractDOMs().each { |dom| yield(dom) }
34
- end
35
-
36
- # Iterates over all sentence nodes. The block passed to this
37
- # method should return a DOM object as a value. After the iteration
38
- # has been completed, the contents of /corpus/body are then replaced
39
- # by the list of these results.
40
- # At the moment, this changes the FileParser object. This should
41
- # probably change in the future, but I don't want to mess with
42
- # cloning now.
43
- def process_s!()
44
- newBody = Element.new('body')
45
- each_s { |dom| newBody.add_element( yield(dom) ) }
46
-
47
- @doc.delete_element("/corpus/body")
48
- @doc.elements["corpus"].add_element(newBody)
49
-
50
- return @doc
51
- end
52
-
53
-
54
-
55
- private
56
-
57
- def ensureParsedDocument()
58
- if @doc == nil then
59
- @doc = Document.new(@file)
60
- end
61
- end
62
-
63
-
64
- end
65
-
66
-
67
-
68
-
69
- #####################################################################
70
-
71
-
72
-
73
-
74
- class FilePartsParser
75
- # <@file> = File object for the corpus
76
- # <@head> = string up to the first <s> tag
77
- # <@tail> = string after the last </s> tag
78
- # <@rest> = string starting with the latest <s> tag (complete this to
79
- # a <s>...</s> structure by reading up to next </s> tag)
80
- # <@readCompletely> = boolean specifying whether there's still something
81
- # left to read in the file
82
-
83
- attr_reader :head, :tail
84
-
85
- def initialize(filename)
86
- @file = File.new(filename)
87
- @readCompletely = false
88
- # read stuff into @head and initialize @rest
89
- @head = ''
90
- begin
91
- while true do
92
- line = @file.readline()
93
- if line =~ /(.*)(<s\s.*)/ then
94
- @head = @head << $1
95
- @rest = $2
96
- break
97
- elsif line =~ /^(.*)(<\/body[\s>].*)$/
98
- # empty corpus
99
- @head = @head << $1
100
- @tail = $2
101
- while (line = @file.readline())
102
- @tail << "\n" + line
103
- end
104
- @readCompletely = true
105
- break
106
- else
107
- @head = @head << line
108
- end
109
- end
110
- rescue EOFError
111
- @readCompletely = true
112
- end
113
- end
114
-
115
- def close()
116
- @file.close()
117
- end
118
-
119
- def extractDOMs()
120
- allDOMs = Array.new
121
-
122
- process_s!() { |dom|
123
- allDOMs.push(dom)
124
- Element.new("x")
125
- }
126
- return allDOMs
127
- end
128
-
129
- def each_s()
130
- process_s!() { |dom|
131
- yield(dom)
132
- Element.new("x")
133
- }
134
- end
135
-
136
- # This function returns the string for the modified corpus.
137
- # It doesn't change the internal state of the FilePartsParser,
138
- # and is much more memory (and probably time) efficient than
139
- # FileParser#process_s!.
140
- # The block that is called by the method is given an element
141
- # as its argument and is expected to return a changed element.
142
- def process_s!()
143
- if @readCompletely
144
- return
145
- end
146
-
147
- ret = ''
148
- scan_s() { |element|
149
- # Process the <s> ... </s> element
150
- doc = Document.new(element)
151
- elt = doc.root
152
- changedElt = yield(elt)
153
-
154
- changedEltAsString = ''
155
- changedElt.write(changedEltAsString, 0)
156
- ret <<= changedEltAsString
157
- }
158
-
159
- return ret
160
- end
161
-
162
- # KE 12.6.03: scan_s :
163
- # doesn't parse a sentence before yielding it
164
- # doesn't allow for any changes
165
- # but otherwise the same as process_s!
166
- def scan_s()
167
- if @readCompletely
168
- return
169
- end
170
-
171
- begin
172
- while true do
173
- # Invariant: At this point, @rest always starts with an
174
- # unseen <s> tag.
175
-
176
- # First, we continue reading until we find the closing </s>
177
- # No exception should occur in this loop if we're parsing
178
- # a valid XML document.
179
- while @rest !~ /^(.*<\/s>)(.*)/m do
180
- @rest = @rest << @file.readline()
181
- end
182
-
183
- element = $1
184
- @rest = $2
185
-
186
- yield(element) # change HERE: element not parsed!
187
-
188
- # Read on up to the next <s>
189
- while @rest !~ /(.*)(<s\s.*)/m do
190
- @rest = @rest << @file.readline()
191
- end
192
-
193
- @rest = $2
194
- end
195
- rescue EOFError
196
- @tail = @rest
197
- @readCompletely = true
198
- end
199
- end
200
-
201
- # KE 5.11.03: get_rest: read all of the file not processed up to this point
202
- # and return it as a string
203
- def get_rest()
204
- begin
205
- while true do
206
- @rest = @rest << @file.readline()
207
- end
208
- rescue EOFError
209
- @readCompletely = true
210
- end
211
- return @rest
212
- end
213
- end
@@ -1,269 +0,0 @@
1
- # RegXML
2
- #
3
- # Katrin Erk June 2005
4
-
5
- # SalsaTigerRegXML: take control of the data structure, no underlying xml
6
- # representation anymore, re-generation of xml on demand
7
-
8
- class RegXML
9
-
10
- def initialize(string, # string representing a single XML element
11
- i_am_text = false) # boolean: xml element (false) or text (true)
12
-
13
- unless string.class == String
14
- raise "First argument to RegXML.new must be string. I got #{string.class.to_s}"
15
- end
16
- if i_am_text
17
- @s = string
18
- @i_am_text = true
19
- else
20
- @s = string.gsub(/\n/, " ").freeze
21
- @i_am_text = false
22
-
23
- element_test()
24
- dyck_test()
25
- end
26
- end
27
-
28
- def to_s()
29
- return xml_readable(@s)
30
- end
31
-
32
- def text?
33
- return @i_am_text
34
- end
35
-
36
- def name()
37
- if @i_am_text
38
- # text
39
- return nil
40
-
41
- else
42
- # xml element
43
- if @s =~ /^\s*<\s*([\w-]+)[\s\/>]/
44
- return $1
45
- else
46
- raise "Cannot parse:\n#{xml_readable(@s)}"
47
- end
48
- end
49
- end
50
-
51
- def attributes()
52
- if @i_am_text
53
- # text
54
- return {}
55
-
56
- else
57
- # xml element
58
-
59
- # remove <element_name from the beginning of @s,
60
- # place the rest up to the first > into elt_contents:
61
- # this is a string of the form
62
- # - either (name=value)*
63
- # - or (name=value)*/
64
- unless @s =~ /^\s*<\s*#{name()}(.*)$/
65
- raise "Cannot parse:\n #{xml_readable(@s)}"
66
- end
67
-
68
- retv = Hash.new
69
- elt_contents = $1
70
-
71
- # repeat until only > or /> is left
72
- while elt_contents !~ /^\s*\/?>/
73
-
74
- # shave off the next name=value pair
75
- # put the rest into elt_contents
76
- # make sure that if the value is quoted with ',
77
- # we accept " inside the value, and vice versa.
78
- unless elt_contents =~ /^\s*([\w-]+)=(['"])(.*?)\2(.*)$/
79
- raise "Cannot parse:\n #{xml_readable(elt_contents)}"
80
- end
81
- retv[$1] = $3
82
- elt_contents = $4
83
- end
84
-
85
- return retv
86
- end
87
- end
88
-
89
- def children_and_text()
90
- if @i_am_text
91
- return []
92
-
93
- else
94
- if unary_element()
95
- # <bla/>, no children
96
- return []
97
- end
98
-
99
- # @s has the form <bla...> ... </bla>.
100
- # remove <bla ...> from the beginning of @s,
101
- # place the rest up to </bla> into children_s:
102
-
103
- mainname = name()
104
- unless @s =~ /^\s*<\s*#{mainname}(\s+[\w-]+=(["']).*?\2)*\s*>(.*?)<\/\s*#{mainname}\s*>\s*$/
105
- raise "Cannot parse:\n #{xml_readable(@s)}"
106
- end
107
-
108
- retv = Array.new
109
- children_s = $3
110
-
111
- # repeat until only whitespace is left
112
- while children_s !~ /^\s*$/
113
-
114
- # shave off the next bit of text
115
- # put the rest into children_s
116
- unless children_s =~ /^\s*(.*?)(<.*$|$)/
117
- $stderr.puts "Whole was:\n #{xml_readable(@s)}"
118
- $stderr.puts
119
- raise "Cannot parse:\n #{xml_readable(children_s)}"
120
- end
121
- unless $1.strip.empty?
122
- children_s = $2
123
- retv << RegXML.new($1, true)
124
- end
125
-
126
- # anything left after we've parsed text?
127
- if children_s =~ /^s*$/
128
- break
129
- end
130
-
131
- # shave off the next child
132
- # and put the rest into children_s
133
-
134
- # determine the next child's name, and the string index at which
135
- # the element start tag ends with either / or >
136
- unless children_s =~ /^\s*(<\s*([\w-]+)(\s+[\w-]+=(["']).*?\4)*\s*)/
137
- $stderr.puts "Whole was:\n #{xml_readable(@s)}"
138
- $stderr.puts
139
- raise "Cannot parse:\n #{xml_readable(children_s)}"
140
- end
141
- childname = $2
142
- child = $1
143
- endofelt_ix = $&.length()
144
-
145
-
146
- # and remove it
147
- case children_s[endofelt_ix..-1]
148
- when /^\/>(.*)$/
149
- # next child is a unary element
150
- children_s = $1
151
- retv << RegXML.new(child + "/>")
152
-
153
- when /^(>.*?<\s*\/\s*#{childname}\s*>)(.*)$/
154
- children_s = $2
155
- retv << RegXML.new(child + $1)
156
-
157
- else
158
- $stderr.puts "Whole was:\n #{xml_readable(@s)}"
159
- $stderr.puts
160
- raise "Cannot parse:\n#{xml_readable(children_s)}"
161
- end
162
- end
163
-
164
- return retv
165
- end
166
- end
167
-
168
- def RegXML.test()
169
- bla = RegXML.new(" <bla blupp='a\"b'
170
- lalala=\"c\">
171
- <lalala> </lalala>
172
- texttext
173
- <lala blupp='b'/>
174
- nochtext
175
- <la> <l/> </la>
176
- </ bla >
177
- ")
178
- puts "name " + bla.name()
179
- puts
180
- puts bla.to_s()
181
- puts
182
- bla.attributes.each { |attr, val|
183
- puts "attr " + attr + "=" + val
184
- }
185
- puts
186
- bla.children_and_text.each { |child_obj|
187
- if child_obj.text?
188
- puts "da text " + child_obj.to_s
189
- else
190
- puts "da child " + child_obj.to_s
191
- end
192
- }
193
- puts
194
-
195
- puts "NEU"
196
- bla = RegXML.new(" < bla blupp='a\"'/> ")
197
- puts "name " + bla.name()
198
- puts
199
- puts bla.to_s()
200
- puts
201
- bla.attributes.each { |attr, val|
202
- puts "attr " + attr + "=" + val
203
- }
204
- puts
205
- bla.children_and_text.each { |child_obj|
206
- if child_obj.text?
207
- puts "da text " + child_obj.to_s
208
- else
209
- puts "da child " + child_obj.to_s
210
- end
211
- }
212
- puts
213
-
214
- end
215
-
216
- ##############
217
- protected
218
-
219
- def unary_element()
220
- # <bla/>
221
- if @s =~ /^\s*<.*\/>\s*$/
222
- return true
223
- else
224
- return false
225
- end
226
- end
227
-
228
- def element_test()
229
- # make sure we have a single XML element, either <bla/> or
230
- # <bla>...</bla>
231
-
232
- if unary_element()
233
- # <bla/>
234
- elsif @s =~ /^\s*<\s*([\w-]+)\W.*?<\/\s*\1\s*>\s*$/
235
- # <bla > ... </bla>
236
- else
237
- raise "Cannot parse:\n #{xml_readable(@s)}"
238
- end
239
- end
240
-
241
- def dyck_test()
242
- # every prefix of @s must have at least as many < as >
243
- opening = 0
244
- closing = 0
245
- @s.scan(/[<>]/) { |bracket|
246
- case bracket
247
- when "<"
248
- opening += 1
249
- when ">"
250
- closing += 1
251
- if closing > opening
252
- raise "More closing than opening brackets in prefix of:\n #{xml_readable(@s)}"
253
- end
254
- end
255
- }
256
-
257
- # and in total, @s must have equally many < and >
258
- unless @s.count("<") == @s.count(">")
259
- raise "Inequal number of brackets in:\n #{xml_readable(@s)}"
260
- end
261
- end
262
-
263
- def xml_readable(string)
264
- return string.gsub(/>/, ">\n")
265
- end
266
- end
267
-
268
- # RegXML.test()
269
-