shalmaneser 0.0.1.alpha → 1.2.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (76) hide show
  1. checksums.yaml +7 -0
  2. data/.yardopts +2 -2
  3. data/CHANGELOG.md +4 -0
  4. data/LICENSE.md +4 -0
  5. data/README.md +49 -0
  6. data/bin/fred +18 -0
  7. data/bin/frprep +34 -0
  8. data/bin/rosy +17 -0
  9. data/lib/common/AbstractSynInterface.rb +35 -33
  10. data/lib/common/Mallet.rb +236 -0
  11. data/lib/common/Maxent.rb +26 -12
  12. data/lib/common/Parser.rb +5 -5
  13. data/lib/common/SynInterfaces.rb +13 -6
  14. data/lib/common/TabFormat.rb +7 -6
  15. data/lib/common/Tiger.rb +4 -4
  16. data/lib/common/Timbl.rb +144 -0
  17. data/lib/common/{FrprepHelper.rb → frprep_helper.rb} +14 -8
  18. data/lib/common/headz.rb +1 -1
  19. data/lib/common/ruby_class_extensions.rb +3 -3
  20. data/lib/fred/FredBOWContext.rb +14 -2
  21. data/lib/fred/FredDetermineTargets.rb +4 -9
  22. data/lib/fred/FredEval.rb +1 -1
  23. data/lib/fred/FredFeatureExtractors.rb +4 -3
  24. data/lib/fred/FredFeaturize.rb +1 -1
  25. data/lib/frprep/CollinsInterface.rb +6 -6
  26. data/lib/frprep/MiniparInterface.rb +5 -5
  27. data/lib/frprep/SleepyInterface.rb +7 -7
  28. data/lib/frprep/TntInterface.rb +1 -1
  29. data/lib/frprep/TreetaggerInterface.rb +29 -5
  30. data/lib/frprep/do_parses.rb +1 -0
  31. data/lib/frprep/frprep.rb +36 -32
  32. data/lib/{common/BerkeleyInterface.rb → frprep/interfaces/berkeley_interface.rb} +69 -95
  33. data/lib/frprep/interfaces/stanford_interface.rb +353 -0
  34. data/lib/frprep/interpreters/berkeley_interpreter.rb +22 -0
  35. data/lib/frprep/interpreters/stanford_interpreter.rb +22 -0
  36. data/lib/frprep/opt_parser.rb +2 -2
  37. data/lib/rosy/AbstractFeatureAndExternal.rb +5 -3
  38. data/lib/rosy/RosyIterator.rb +11 -10
  39. data/lib/rosy/rosy.rb +1 -0
  40. data/lib/shalmaneser/version.rb +1 -1
  41. data/test/functional/sample_experiment_files/fred_test.salsa.erb +1 -1
  42. data/test/functional/sample_experiment_files/fred_train.salsa.erb +1 -1
  43. data/test/functional/sample_experiment_files/prp_test.salsa.erb +2 -2
  44. data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +2 -2
  45. data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +2 -2
  46. data/test/functional/sample_experiment_files/prp_train.salsa.erb +2 -2
  47. data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +2 -2
  48. data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +2 -2
  49. data/test/functional/sample_experiment_files/rosy_test.salsa.erb +1 -1
  50. data/test/functional/sample_experiment_files/rosy_train.salsa.erb +7 -7
  51. data/test/functional/test_frprep.rb +3 -3
  52. data/test/functional/test_rosy.rb +20 -0
  53. metadata +215 -224
  54. data/CHANGELOG.rdoc +0 -0
  55. data/LICENSE.rdoc +0 -0
  56. data/README.rdoc +0 -0
  57. data/lib/common/CollinsInterface.rb +0 -1165
  58. data/lib/common/MiniparInterface.rb +0 -1388
  59. data/lib/common/SleepyInterface.rb +0 -384
  60. data/lib/common/TntInterface.rb +0 -44
  61. data/lib/common/TreetaggerInterface.rb +0 -303
  62. data/lib/frprep/AbstractSynInterface.rb +0 -1227
  63. data/lib/frprep/BerkeleyInterface.rb +0 -375
  64. data/lib/frprep/ConfigData.rb +0 -694
  65. data/lib/frprep/FixSynSemMapping.rb +0 -196
  66. data/lib/frprep/FrPrepConfigData.rb +0 -66
  67. data/lib/frprep/FrprepHelper.rb +0 -1324
  68. data/lib/frprep/ISO-8859-1.rb +0 -24
  69. data/lib/frprep/Parser.rb +0 -213
  70. data/lib/frprep/SalsaTigerRegXML.rb +0 -2347
  71. data/lib/frprep/SalsaTigerXMLHelper.rb +0 -99
  72. data/lib/frprep/SynInterfaces.rb +0 -275
  73. data/lib/frprep/TabFormat.rb +0 -720
  74. data/lib/frprep/Tiger.rb +0 -1448
  75. data/lib/frprep/Tree.rb +0 -61
  76. data/lib/frprep/headz.rb +0 -338
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 83f5f0ca7cc27a632cb46deef7c093df649c61e1
4
+ data.tar.gz: dbc9a29186421206de7bf9b0138f05f89228fad6
5
+ SHA512:
6
+ metadata.gz: 8a87f1e74b16082cba8d2ab49eb33289e8db23f5bdf3cdd4f294901c8119c8bff1239ec870032871d6d2cf69efbaba500058a47827df92be707aba3ab36ab30a
7
+ data.tar.gz: be1f6b6f3e4aa0b20f26437f30c579faf68f03f7c474cb78e28cb1263ef4ab9397ab4d52fbdffa4ac7ceb50a2d3f44cb4200303a7f14b2bdd0cb06fbfae68f0f
data/.yardopts CHANGED
@@ -4,5 +4,5 @@
4
4
  lib/**/*
5
5
  bin/**/*
6
6
  -
7
- CHANGELOG.rdoc
8
- LICENSE.rdoc
7
+ CHANGELOG.md
8
+ LICENSE.md
data/CHANGELOG.md ADDED
@@ -0,0 +1,4 @@
1
+ # Versions
2
+
3
+ ## Version 1.2.0-rc1
4
+
data/LICENSE.md ADDED
@@ -0,0 +1,4 @@
1
+ # LICENSE
2
+
3
+ This software is written in Ruby and is released under the [GNU Public License](http://www.gnu.org/licenses/gpl-2.0.html) (GPL v2), and the documentation under the [Free Document License](http://www.gnu.org/licenses/old-licenses/fdl-1.2.html) (FDL v1.2).
4
+
data/README.md ADDED
@@ -0,0 +1,49 @@
1
+ # [SHALMANESER - a SHALlow seMANtic parSER](http://www.coli.uni-saarland.de/projects/salsa/shal/)
2
+
3
+
4
+ [RubyGems](http://rubygems.org/gems/shalmaneser) | [RTT Project Page](http://bu.chsta.be/projects/shalmaneser/) |
5
+ [Source Code](https://github.com/arbox/shalmaneser) | [Bug Tracker](https://github.com/arbox/shalmaneser/issues)
6
+
7
+ [<img src="https://badge.fury.io/rb/shalmaneser.png" alt="Gem Version" />](http://badge.fury.io/rb/shalmaneser)
8
+ [<img src="https://travis-ci.org/arbox/shalmaneser.png" alt="Build Status" />](https://travis-ci.org/arbox/shalmaneser)
9
+ [<img src="https://codeclimate.com/github/arbox/shalmaneser.png" alt="Code Climate" />](https://codeclimate.com/github/arbox/shalmaneser)
10
+ [<img alt="Bitdeli Badge" src="https://d2weczhvl823v0.cloudfront.net/arbox/shalmaneser/trend.png" />](https://bitdeli.com/free)
11
+
12
+ ## Description
13
+
14
+ Please be careful, the whole thing is under construction!
15
+
16
+ Shalmaneser is a supervised learning toolbox for shallow semantic parsing, i.e. the automatic assignment of semantic classes and roles to text. The system was developed for Frame Semantics; thus we use Frame Semantics terminology and call the classes frames and the roles frame elements. However, the architecture is reasonably general, and with a certain amount of adaption, Shalmaneser should be usable for other paradigms (e.g., PropBank roles) as well. Shalmaneser caters both for end users, and for researchers.
17
+
18
+ For end users, we provide a simple end user mode which can simply apply the pre-trained classifiers for English (FrameNet annotation / Collins parser) and German (SALSA Frame annotation / Sleepy parser). For researchers interested in investigating shallow semantic parsing, our system is extensively configurable and extendable.
19
+
20
+ ## Origin
21
+ You can find original versions of Shalmaneser up to ``1.1`` on the [SALSA](http://www.coli.uni-saarland.de/projects/salsa/shal/) project page.
22
+
23
+ ## Literature
24
+
25
+ K. Erk and S. Padó: Shalmaneser - a flexible toolbox for semantic role assignment. Proceedings of LREC 2006, Genoa, Italy. [Click here for details](http://www.nlpado.de/~sebastian/pub/papers/lrec06_erk.pdf).
26
+
27
+ ## Documentation
28
+
29
+ The project documentation can be found in our [doc](doc/index.md) folder.
30
+
31
+ ## Development
32
+
33
+ We are working now on two branches:
34
+
35
+ - ``dev`` - our development branch incorporating actual changes, for now pointing to ``1.2``;
36
+
37
+ - ``1.2`` - intermediate target;
38
+
39
+ - ``2.0`` - final target.
40
+
41
+ ## Installation
42
+
43
+ See the installation instructions in the [doc](doc/index.md#installation) folder.
44
+
45
+ ### Machine Learning Systems
46
+
47
+ - http://sourceforge.net/projects/maxent/files/Maxent/2.4.0/
48
+
49
+
data/bin/fred ADDED
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- encoding: utf-8 -*-
3
+
4
+ # AB, 2011-11-13
5
+
6
+ # fred
7
+ # Katrin Erk, April 05
8
+ #
9
+ # Frame disambiguation system:
10
+ # frame assignment as word sense disambiguation
11
+
12
+ require 'fred/opt_parser'
13
+ require 'fred/fred'
14
+
15
+ options = Fred::OptParser.parse(ARGV)
16
+
17
+ fred = Fred::Fred.new(options)
18
+ fred.assign
data/bin/frprep ADDED
@@ -0,0 +1,34 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- encoding: utf-8 -*-
3
+
4
+ # AB, 2010-11-25
5
+
6
+ # frprep
7
+ # Katrin Erk July 05
8
+ #
9
+ # Preprocessing for Fred and Rosy:
10
+ # accept input as plain text,
11
+ # FrameNet XML, Salsa-tabular format,
12
+ # or SalsaTigerXML,
13
+ # lemmatize, POS-tag and parse
14
+ # (if asked to do so)
15
+ # and in any case produce output in
16
+ # SalsaTigerXML.
17
+ #
18
+ # Extensions to SalsaTigerXML introduced by frprep:
19
+ #
20
+ # - "lemma": lemma. Attribute of terminals.
21
+ # - "head": head word (not lemma!) of constituent.Attribute of nonterminals.
22
+ # - "fn_gf": FrameNet grammatical function label, attached to the maximal
23
+ # constituents covering the terminals labeled with that label
24
+
25
+
26
+ require 'frprep/frprep'
27
+ require 'frprep/opt_parser'
28
+
29
+
30
+ options = FrPrep::OptParser.parse(ARGV)
31
+
32
+
33
+ preprocessor = FrPrep::FrPrep.new(options)
34
+ preprocessor.transform
data/bin/rosy ADDED
@@ -0,0 +1,17 @@
1
+ #!/usr/bin/env ruby
2
+ # -*- encoding: utf-8 -*-
3
+
4
+ # AB: 2011-11-14
5
+ # rosy.rb
6
+ # KE, SP April 05
7
+ #
8
+ # Main file of the Rosy role assignment system.
9
+
10
+
11
+ require 'rosy/opt_parser'
12
+ require 'rosy/rosy'
13
+
14
+ options = Rosy::OptParser.parse(ARGV)
15
+
16
+ rosy = Rosy::Rosy.new(options)
17
+ rosy.assign
@@ -25,10 +25,10 @@
25
25
 
26
26
  require "tempfile"
27
27
 
28
- require "common/ruby_class_extensions"
28
+ require 'common/ruby_class_extensions'
29
29
 
30
- require "common/ISO-8859-1"
31
- require "common/Parser"
30
+ require 'common/ISO-8859-1'
31
+ require 'common/Parser'
32
32
  require "common/SalsaTigerRegXML"
33
33
  require "common/TabFormat"
34
34
 
@@ -42,14 +42,14 @@ class SynInterface
42
42
  ###
43
43
  # returns a string: the name of the system
44
44
  # e.g. "Collins" or "TNT"
45
- def SynInterface.system()
45
+ def self.system
46
46
  raise "Overwrite me"
47
47
  end
48
48
 
49
49
  ###
50
50
  # returns a string: the service offered
51
51
  # one of "lemmatizer", "parser", "pos tagger"
52
- def SynInterface.service()
52
+ def self.service
53
53
  raise "Overwrite me"
54
54
  end
55
55
 
@@ -73,10 +73,10 @@ class SynInterface
73
73
  def process_dir(in_dir, # string: name of input directory
74
74
  out_dir) # string: name of output directory
75
75
 
76
- Dir[in_dir+"*#{@insuffix}"].each {|infilename|
77
- outfilename = out_dir + File.basename(infilename, @insuffix) + @outsuffix
78
- process_file(infilename,outfilename)
79
- }
76
+ Dir["#{in_dir}*#{@insuffix}"].each do |infilename|
77
+ outfilename = "#{out_dir}#{File.basename(infilename, @insuffix)}#{@outsuffix}"
78
+ process_file(infilename, outfilename)
79
+ end
80
80
  end
81
81
 
82
82
  ###
@@ -91,13 +91,13 @@ class SynInterface
91
91
  ######
92
92
  protected
93
93
 
94
- def SynInterface.announce_me()
94
+ def self.announce_me
95
95
  if defined?(SynInterfaces)
96
96
  # yup, we have a class to which we can announce ourselves
97
- SynInterfaces.add_interface(eval(self.name()))
97
+ SynInterfaces.add_interface(eval(self.name))
98
98
  else
99
99
  # no interface collector class
100
- $stderr.puts "Interface #{self.name()} not announced: no SynInterfaces."
100
+ STDERR.puts "Interface #{self.name} not announced: no SynInterfaces."
101
101
  end
102
102
  end
103
103
  end
@@ -124,14 +124,13 @@ class SynInterfaceSTXML < SynInterface
124
124
  def to_stxml_dir(in_dir, # string: name of dir with parse files
125
125
  out_dir) # string: name of output dir
126
126
 
127
- Dir[in_dir+"*#{@outsuffix}"].each { |parsefilename|
128
- stxmlfilename = out_dir + File.basename(parsefilename, @outsuffix) + @stsuffix
127
+ Dir["#{in_dir}*#{@outsuffix}"].each do |parsefilename|
128
+ stxmlfilename = "#{out_dir}#{File.basename(parsefilename, @outsuffix)}#{@stsuffix}"
129
129
  to_stxml_file(parsefilename, stxmlfilename)
130
- }
130
+ end
131
131
  end
132
132
 
133
- def to_stxml_file(infilename,
134
- outfilename)
133
+ def to_stxml_file(infilename, outfilename)
135
134
  raise "Overwrite me"
136
135
  end
137
136
 
@@ -142,22 +141,25 @@ class SynInterfaceSTXML < SynInterface
142
141
  # SalsaTigerSentence nodes returned by each_sentence():
143
142
  # map the n-th word of the tab sentence to the n-th terminal of
144
143
  # the SalsaTigerSentence
145
- def SynInterfaceSTXML.standard_mapping(sent, tabsent)
146
- retv = Hash.new
144
+ def self.standard_mapping(sent, tabsent)
145
+ retv = {}
146
+
147
147
  if sent.nil?
148
- return nil
149
- end
150
- terminals = sent.terminals_sorted()
151
- if tabsent
152
- tabsent.each_line_parsed { |l|
153
- if (t = terminals[l.get("lineno")])
154
- retv[l.get("lineno")] = [t]
155
- else
156
- retv[l.get("lineno")] = []
148
+ retv = nil
149
+ else
150
+ terminals = sent.terminals_sorted
151
+ if tabsent
152
+ tabsent.each_line_parsed do |l|
153
+ if (t = terminals[l.get("lineno")])
154
+ retv[l.get("lineno")] = [t]
155
+ else
156
+ retv[l.get("lineno")] = []
157
+ end
157
158
  end
158
- }
159
+ end
159
160
  end
160
- return retv
161
+
162
+ retv
161
163
  end
162
164
 
163
165
 
@@ -185,13 +187,13 @@ class SynInterfaceSTXML < SynInterface
185
187
 
186
188
  # write Salsa/Tiger XML to tempfile
187
189
  tf = Tempfile.new("SynInterface")
188
- tf.close()
190
+ tf.close
189
191
  to_stxml_file(infilename, tf.path)
190
- tf.flush()
192
+ tf.flush
191
193
 
192
194
  # get matching tab file, read
193
195
  tab_reader = get_tab_reader(infilename)
194
- tab_sentences = Array.new
196
+ tab_sentences = []
195
197
  tab_reader.each_sentence { |s| tab_sentences << s }
196
198
 
197
199
  # read Salsa/Tiger sentences and yield them
@@ -0,0 +1,236 @@
1
+ # wrapper script for the Mallet toolkit Maxent classifier
2
+
3
+ # Problem with Winnow: cannot be serialised (written to file). Support dropped.
4
+
5
+ # sp 27 10 04
6
+
7
+
8
+ require "tempfile"
9
+ require "ftools"
10
+
11
+ class Mallet
12
+
13
+ ###
14
+ def initialize(program_path,parameters)
15
+
16
+ if parameters.empty?
17
+ puts "Error: Mallet needs two paths (first the location of mallet itself and then the location of the interface, usually program/tools/mallet)."
18
+ puts "I got only the program path."
19
+ Kernel.exit
20
+ end
21
+
22
+ @malletpath = program_path
23
+ @interface_path = parameters.first
24
+ unless @malletpath =~ /\/$/
25
+ @malletpath = @malletpath + "/"
26
+ end
27
+
28
+ @learner = "MaxEnt,gaussianPriorVariance=1.0"
29
+
30
+ # classpath for mallet
31
+
32
+ @cp = "#{ENV["CLASSPATH"]}:#{@malletpath}class:#{@malletpath}lib/bsh.jar"
33
+
34
+ end
35
+
36
+ ###
37
+ def train(infilename,classifier_location)
38
+ csvfile = Tempfile.new(File.basename(infilename)+".csvtrain")
39
+ infile = File.new(infilename)
40
+ c45_to_csv(infile,csvfile) # training data in csv format
41
+ infile.close
42
+ csvfile.close
43
+ @mallet_train_vectors = infilename+".trainvectors" # training data in mallet format
44
+ if classifier_location
45
+ @classifier_mallet_path = classifier_location
46
+ else
47
+ @classifier_mallet_path = infilename+".classifier"
48
+ end
49
+
50
+ command1 = [@malletpath+"bin/csv2vectors ",
51
+ " --input ",csvfile.path,
52
+ " --output ",@mallet_train_vectors].join("")
53
+
54
+ command2 = ["cd #{@interface_path}; ",
55
+ "java -cp #{@cp} -Xmx1000m Train ",
56
+ " --train ",@mallet_train_vectors,
57
+ " --out ",@classifier_mallet_path,
58
+ " --trainer ",@learner].join("")
59
+ # STDERR.puts "[train 1] "+command1
60
+ successfully_run(command1) # encode
61
+ # STDERR.puts "[train 2] "+command2
62
+ successfully_run(command2) # train
63
+ csvfile.close(true)
64
+ end
65
+
66
+ def write(classifier_file)
67
+ if @classifier_mallet_path
68
+ %x{cp #{@classifier_mallet_path} #{classifier_file}.classifier} # store classifier
69
+ # File.chmod(0664,classifier_file+".classifier")
70
+ end
71
+ if @mallet_train_vectors
72
+ %x{cp #{@mallet_train_vectors} #{classifier_file}.trainvectors} # store train vectors to recreate pipe for testing data
73
+ # File.chmod(0664,classifier_file+".trainvectors")
74
+ end
75
+ end
76
+
77
+ ###
78
+ def exists?(classifier_file)
79
+ return (FileTest.exists?(classifier_file+".trainvectors") and
80
+ FileTest.exists?(classifier_file+".classifier"))
81
+ end
82
+
83
+ ###
84
+ # return true iff reading the classifier has had success
85
+ def read(classifier_file)
86
+ @mallet_train_vectors = classifier_file+".trainvectors" # training data in mallet format
87
+ @classifier_mallet_path = classifier_file+".classifier"
88
+ unless FileTest.exists?(@mallet_train_vectors)
89
+ $stderr.puts "No classifier file "+@mallet_train_vectors
90
+ return false
91
+ end
92
+ unless FileTest.exists?(@classifier_mallet_path)
93
+ $stderr.puts "No classifier file "+@classifier_mallet_path
94
+ return false
95
+ end
96
+ return true
97
+ end
98
+
99
+ ###
100
+ def apply(infilename,outfilename)
101
+ unless @classifier_mallet_path and @mallet_train_vectors
102
+ return false
103
+ end
104
+
105
+ # STDERR.puts "Testing on "+infilename
106
+ csvfile = Tempfile.new(File.basename(infilename)+".csvtest")
107
+
108
+ infile = File.new(infilename)
109
+ c45_to_csv(infile,csvfile) # training data in csv format
110
+ infile.close
111
+ csvfile.close
112
+
113
+ test_mallet_path = infilename+".test.vectors" # training data in mallet format
114
+
115
+ # $stderr.puts "test file in " + infilename
116
+ # $stderr.puts "using training vectors from " + @mallet_train_vectors
117
+
118
+ # copy train vectors to temp file.
119
+ # reason: mallet in std edition reads _and writes_ this file
120
+ # if rosy is interrupted, corrupted (ie incomplete) train vector files
121
+ # result
122
+
123
+ tempfile = Tempfile.new("mallet")
124
+ tempfilename = tempfile.path
125
+ unless File.copy(@mallet_train_vectors,tempfilename)
126
+ return false
127
+ end
128
+
129
+ command1 = [@malletpath+"bin/csv2vectors", # encode testing data
130
+ " --input ",csvfile.path,
131
+ " --output ",test_mallet_path,
132
+ " --use-pipe-from ",tempfilename].join("")
133
+
134
+ # $stderr.puts "Mallet encode: " + command1
135
+ unless successfully_run(command1) # encode
136
+ return false
137
+ end
138
+
139
+ File.safe_unlink(tempfilename)
140
+
141
+ # some error in encoding?
142
+ unless FileTest.exists?(test_mallet_path)
143
+ return false
144
+ end
145
+
146
+ command2 = ["cd #{@interface_path}; ",
147
+ "java -cp #{@cp} -Xmx1000m Classify ",
148
+ @classifier_mallet_path," ",
149
+ test_mallet_path," ",
150
+ "> ",outfilename].join("")
151
+
152
+ # classify
153
+ # $stderr.puts "Mallet classify: " + command2
154
+ unless successfully_run(command2)
155
+ return false
156
+ end
157
+
158
+ # some error in classification
159
+ unless FileTest.exists?(outfilename)
160
+ return false
161
+ end
162
+
163
+ # no errors = success
164
+ csvfile.close(true)
165
+ return true
166
+ end
167
+
168
+ #####
169
+ # format of Mallet result file:
170
+ # <best label> <confidence> \t <secondbest_label> <confidence>....
171
+ def read_resultfile(filename)
172
+ begin
173
+ f = File.new(filename)
174
+ rescue
175
+ $stderr.puts "Mallet error: cannot read Mallet result file #{filemame}."
176
+ return nil
177
+ end
178
+
179
+ retv = Array.new()
180
+
181
+ f.each { |line|
182
+ line_results = Array.new()
183
+ pieces = line.split()
184
+
185
+ while not(pieces.empty?)
186
+ label = pieces.shift()
187
+
188
+ begin
189
+ confidence = pieces.shift().to_f()
190
+ rescue
191
+ $stderr.puts "Error reading mallet output: invalid line: #{line}"
192
+ confidence = 0
193
+ end
194
+
195
+ line_results << [label, confidence]
196
+ end
197
+ retv << line_results
198
+ }
199
+
200
+ return retv
201
+ end
202
+
203
+
204
+ ###################################
205
+ private
206
+
207
+ ###
208
+ # mallet needs "comma separated values"-file
209
+ # input: features separated by comma
210
+ # output:
211
+ # line_number classlabel features_joined_by_spaces
212
+ def c45_to_csv(inpipe,outpipe)
213
+ idx = 0
214
+ while (line = inpipe.gets)
215
+ line.chomp!
216
+ idx += 1
217
+ la = line.split(",")
218
+ label = la.pop
219
+ if label[-1,1] == "."
220
+ label.chop!
221
+ end
222
+ outpipe.puts [idx,label].join(" ")+" "+la.join(" ")
223
+ end
224
+ end
225
+
226
+ ###
227
+ def successfully_run(command)
228
+ retv = Kernel.system(command)
229
+ unless retv
230
+ $stderr.puts "Error running classifier. Continuing."
231
+ $stderr.puts "Offending command: "+command
232
+ # exit 1
233
+ end
234
+ return retv
235
+ end
236
+ end