RubyGems - shalmaneser-rosy - Versions diffs - 1.2.0.rc4 → 1.2.rc5 - Mend

shalmaneser-rosy 1.2.0.rc4 → 1.2.rc5

Files changed (41) hide show

checksums.yaml +4 -4
data/README.md +47 -18
data/bin/rosy +14 -7
data/lib/rosy/FailedParses.rb +22 -20
data/lib/rosy/FeatureInfo.rb +35 -31
data/lib/rosy/GfInduce.rb +132 -130
data/lib/rosy/GfInduceFeature.rb +86 -68
data/lib/rosy/InputData.rb +59 -55
data/lib/rosy/RosyConfusability.rb +47 -40
data/lib/rosy/RosyEval.rb +55 -55
data/lib/rosy/RosyFeatureExtractors.rb +295 -290
data/lib/rosy/RosyFeaturize.rb +54 -67
data/lib/rosy/RosyInspect.rb +52 -50
data/lib/rosy/RosyIterator.rb +73 -67
data/lib/rosy/RosyPhase2FeatureExtractors.rb +48 -48
data/lib/rosy/RosyPruning.rb +39 -31
data/lib/rosy/RosyServices.rb +116 -115
data/lib/rosy/RosySplit.rb +55 -53
data/lib/rosy/RosyTask.rb +7 -3
data/lib/rosy/RosyTest.rb +174 -191
data/lib/rosy/RosyTrain.rb +46 -50
data/lib/rosy/RosyTrainingTestTable.rb +101 -99
data/lib/rosy/TargetsMostFrequentFrame.rb +13 -9
data/lib/rosy/{AbstractFeatureAndExternal.rb → abstract_feature_extractor.rb} +22 -97
data/lib/rosy/abstract_single_feature_extractor.rb +52 -0
data/lib/rosy/external_feature_extractor.rb +35 -0
data/lib/rosy/opt_parser.rb +231 -201
data/lib/rosy/rosy.rb +63 -64
data/lib/rosy/rosy_conventions.rb +66 -0
data/lib/rosy/rosy_error.rb +15 -0
data/lib/rosy/var_var_restriction.rb +16 -0
data/lib/shalmaneser/rosy.rb +1 -0
metadata +26 -19
data/lib/rosy/ExternalConfigData.rb +0 -58
data/lib/rosy/View.rb +0 -418
data/lib/rosy/rosy_config_data.rb +0 -121
data/test/frprep/test_opt_parser.rb +0 -94
data/test/functional/functional_test_helper.rb +0 -58
data/test/functional/test_fred.rb +0 -47
data/test/functional/test_frprep.rb +0 -99
data/test/functional/test_rosy.rb +0 -40

data/lib/rosy/GfInduce.rb CHANGED Viewed

@@ -7,13 +7,13 @@
 # induce a mapping from parse tree paths to grammatical functions from this information
 # and apply it to new sentences
-require "common/AbstractSynInterface"
-require "common/ruby_class_extensions"
+require "ruby_class_extensions"
 #####################################################################
 # Management of mapping from GFs to paths
 #####################################################################
+module Shalmaneser
+module Rosy
 class GfiGfPathMapping
   #########################################
@@ -26,18 +26,18 @@ class GfiGfPathMapping
     @interpreter = interpreter_class
     # hash: POS(string) -> hash gf(string) -> hash: path_string -> frequency(int)
-    @gf_to_paths = Hash.new
+    @gf_to_paths = {}
-    # hash: POS(string)-> hash: gf(string) -> hash: one edge of a path ->
+    # hash: POS(string)-> hash: gf(string) -> hash: one edge of a path ->
     #  frequency(int) | hash: one edge of a path -> ...
-    @gf_to_edgelabel = Hash.new
+    @gf_to_edgelabel = {}
     # hash: word(string) -> array: [gf, prep, head_category]
-    @word_to_gflist = Hash.new
+    @word_to_gflist = {}
     # hash: path as string(string) -> array of steps
     # where a step is a tuple of stringss [{U, D}, edgelabel, nodelabel}
-    @pathstring_to_path = Hash.new
+    @pathstring_to_path = {}
   end
   #########################################
@@ -67,7 +67,7 @@ class GfiGfPathMapping
     # remember the path as an array of triples [direction, edgelabel, nodelabel]
     # as hash value of the path-as-string
     unless @pathstring_to_path[path_s]
-      @pathstring_to_path[path_s] = Array.new
+      @pathstring_to_path[path_s] = []
       path.each_step { |direction, edgelabel, nodelabel, node|
         @pathstring_to_path[path_s] << [direction, edgelabel, nodelabel]
       }
@@ -76,7 +76,7 @@ class GfiGfPathMapping
     # store the mapping in the
     # gf -> path hash
     unless @gf_to_paths[pos]
-      @gf_to_paths[pos] = Hash.new
+      @gf_to_paths[pos] = {}
     end
     unless @gf_to_paths[pos][gf]
       @gf_to_paths[pos][gf] = Hash.new(0)
@@ -86,7 +86,7 @@ class GfiGfPathMapping
     # remember this gf/pt tuple as possible GF of the current lemma
     unless @word_to_gflist[lemmapos]
-      @word_to_gflist[lemmapos] = Array.new
+      @word_to_gflist[lemmapos] = []
     end
     unless @word_to_gflist[lemmapos].include? [gf, prep, headcat]
       @word_to_gflist[lemmapos] << [gf, prep, headcat]
@@ -97,13 +97,13 @@ class GfiGfPathMapping
   # finish up inducing:
   #  reencode information in a fashion
   #  that makes apply() faster
-  def finish_inducing()
+  def finish_inducing
     # make sure gf_to_edgelabel is empty at the start
-    @gf_to_edgelabel.clear()
+    @gf_to_edgelabel.clear
     @gf_to_paths.each_pair { |pos, gf_to_paths_to_freq|
       unless @gf_to_edgelabel[pos]
-        @gf_to_edgelabel[pos] = Hash.new()
+        @gf_to_edgelabel[pos] = {}
       end
       gf_to_paths_to_freq.each_pair { |gf, paths_to_freq|
@@ -115,16 +115,16 @@ class GfiGfPathMapping
             $stderr.puts "found empty path for #{gf}, frequency #{freq}. Skipping."
             next
           end
           if freq >= 5 or
               gf =~ /Head|Appositive|Quant|Protagonist/
             # path frequent enough: list it
             unless @gf_to_edgelabel[pos][gf]
-              @gf_to_edgelabel[pos][gf] = Hash.new()
+              @gf_to_edgelabel[pos][gf] = {}
             end
-            enter_path(@gf_to_edgelabel[pos][gf], steps.clone(), freq)
+            enter_path(@gf_to_edgelabel[pos][gf], steps.clone, freq)
           end
         }
       }
@@ -137,7 +137,7 @@ class GfiGfPathMapping
   ###
   # test output
-  def test_output()
+  def test_output
     # gf_to_paths:
     # sum frequencies, compare frequency against average path length
     puts "============================="
@@ -148,26 +148,26 @@ class GfiGfPathMapping
 #         puts "================"
 #         puts "POS #{pos} GF #{gf}:"
 #         @gf_to_paths[pos][gf].each_pair { |path_s, freq|
-#           puts "#{path_s} freq:#{freq} len:#{@pathstring_to_path[path_s].length()}"
+#           puts "#{path_s} freq:#{freq} len:#{@pathstring_to_path[path_s].length}"
 #         }
 #       }
-#     }
+#     }
     @gf_to_paths.each_key { |pos|
       @gf_to_paths[pos].each_key { |gf|
         puts "================"
         puts "POS #{pos} GF #{gf}:"
         @gf_to_paths[pos][gf].values.uniq.sort { |a, b| b <=> a}.each { |frequency|
           sum = 0
           count = 0
           @gf_to_paths[pos][gf].each_pair { |path_s, otherfreq|
             if otherfreq == frequency
               count += 1
-              sum += @pathstring_to_path[path_s].length()
+              sum += @pathstring_to_path[path_s].length
             end
           }
           avg_pathlen = sum.to_f / count.to_f
           puts "     Frequency #{frequency}: #{count} path(s)"
           puts "                  #{avg_pathlen} avg. path len"
         }
@@ -193,7 +193,7 @@ class GfiGfPathMapping
   end
   #########################################
-  # Restricting induced mappings
+  # Restricting induced mappings
   # to achieve better mappings
   #########################################
@@ -202,7 +202,7 @@ class GfiGfPathMapping
   # exclude all paths that include an Up edge
   #
   # changes @gf_to_edgelabel, not reversible
-  def restrict_to_downpaths()
+  def restrict_to_downpaths
     @gf_to_edgelabel.each_value { |pos_specific|
       pos_specific.each_value { |hash_or_val|
         restrict_pathhash_to_downpaths(hash_or_val)
@@ -249,7 +249,7 @@ class GfiGfPathMapping
   # by comparing paths in the parse tree
   # against the GF/path mappings stored in @gf_to_edgelabel
   #
-  # returns:
+  # returns:
   # hash: SynNode -> tuple [GF(string), preposition(string), frequency(integer)]
   def potential_gfs_of_node(start_node,  # SynNode
                             lemma,       # string: lemma for start_node
@@ -257,10 +257,10 @@ class GfiGfPathMapping
     # determine possible GFs of a SynNode:
-    #
+    #
     # hash: SynNode(some node in this sentence) -> list of tuples [gf label, prep, headcat, hash of steps]
     # initialize with just the entry for the start node
-    potential_gfs = Hash.new
+    potential_gfs = {}
     potential_gfs[start_node] = potential_gfs_of_lemma(lemma, pos)
 #     $stderr.puts "HIER #{lemma} " + potential_gfs_of_lemma(lemma, pos).map { |gf, prep, hc, hash|
 #       "#{gf}:#{prep}:#{hc} "
@@ -274,7 +274,7 @@ class GfiGfPathMapping
     agenda = [start_node]
     # been_there: list of SynNode objects
     #  that have been considered already and needn't be visited again
-    been_there = Hash.new
+    been_there = {}
     been_there[start_node] = true
     # hash: SynNode -> tuple [GF(string), preposition(string), frequency(integer)]
@@ -282,17 +282,17 @@ class GfiGfPathMapping
     #      frequency: frequency with which the path from verb to GF has
     #                 been seen in the FN data (such that we can keep
     #                 the best path and discard others)
-    node_to_label_and_freq = Hash.new()
+    node_to_label_and_freq = {}
     while not(agenda.empty?)
-      prev_node = agenda.shift()
+      prev_node = agenda.shift
       unless potential_gfs[prev_node]
         # no further GFs to be reached from prev_node:
         # shouldn't be here, but never mind, just ignore
         next
       end
       # surrounding_nodes returns a list of pairs [SynNode, Path object]
       @interpreter.surrounding_nodes(prev_node, true).each { |node, path|
         myprep = @interpreter.preposition(node)
@@ -313,10 +313,10 @@ class GfiGfPathMapping
         been_there[node] = true
         unless potential_gfs[node]
-          potential_gfs[node] = Array.new
+          potential_gfs[node] = []
         end
-        path.each_step() { |step|
+        path.each_step { |step|
           # each edge from prev_node to node:
           # see whether we can walk this edge to reach some of the GFs
           # still to be reached
@@ -326,16 +326,16 @@ class GfiGfPathMapping
           potential_gfs[prev_node].each { |gf, prep, headcat, hash|
             if hash[step_s]
-              # yes, there is still a possibility of reaching gf
+              # yes, there is still a possibility of reaching gf
               # from our current node
-              if hash[step_s].kind_of? Integer
+              if hash[step_s].is_a? Integer
                 # actually, we have reached gf,
                 # and hash[last_edge] is the frequency with which
                 # this path has led to this GF in the FN data
                 freq = hash[step_s]
                 # check whether node has the right preposition
                 # and the right head category
                 if myprep != prep or
@@ -351,13 +351,13 @@ class GfiGfPathMapping
                   # or the old frequency was lower than the current one:
                   # keep the new entry
                   node_to_label_and_freq[node] = [gf, prep, freq]
                 else
-                  # this node has been assigned a GF before, and the
+                  # this node has been assigned a GF before, and the
                   # other frequency was higher:
                   # discard the current entry
                 end
               else
                 # we have not yet reached gf, but we still might
                 # at the next node we meet from here
@@ -401,7 +401,7 @@ class GfiGfPathMapping
   #      direction, edgelabel, nodelabel
   #
   # returns: string, the key
-  def string_step(step)
+  def string_step(step)
     direction = step[0]
     edgelabel = step[1]
     nodelabel = step[2]
@@ -420,28 +420,28 @@ class GfiGfPathMapping
                  chainlinks, # array: string*string*string
                  frequency)  # integer: frequency of this mapping
     # take off first chain link
-    key = string_step(chainlinks.shift())
+    key = string_step(chainlinks.shift)
     if chainlinks.empty?
       # that was the last link, actually
       hash[key] = frequency
     else
       # more links available
       unless hash[key]
-        hash[key] = Hash.new()
+        hash[key] = {}
       end
-      if hash[key].kind_of? Integer
-        # there is a shorter path for the same GF,
+      if hash[key].is_a? Integer
+        # there is a shorter path for the same GF,
         # ending at the point where we are now.
         # which frequency is higher?
         if frequency > hash[key]
-          hash[key] = Hash.new()
+          hash[key] = {}
         else
           return
         end
       end
       enter_path(hash[key], chainlinks, frequency)
     end
   end
@@ -457,8 +457,8 @@ class GfiGfPathMapping
   def print_entries(hash, num_spaces)
     hash.each_pair { |first_link, rest|
       print " "*num_spaces, first_link
-      if rest.kind_of? Integer
+      if rest.is_a? Integer
         puts "  #{rest}"
       else
         puts
@@ -468,7 +468,7 @@ class GfiGfPathMapping
   end
   #########################################
-  # Restricting induced mappings
+  # Restricting induced mappings
   # to achieve better mappings
   #########################################
@@ -478,7 +478,7 @@ class GfiGfPathMapping
   # kill all entries whose keys describe an Up step in the path,
   # go into recursion for remaining entries
   def restrict_pathhash_to_downpaths(hash_or_val) # path hash or integer freq
-    if hash_or_val.kind_of? Integer
+    if hash_or_val.is_a? Integer
       return
     end
@@ -498,12 +498,12 @@ class GfiGfPathMapping
   ###
   # recursive function:
-  # if the argument is a hash and
+  # if the argument is a hash and
   # the remaining path length is 0, kill all entries
   # else go into recursion for all entries with reduced path length
   def restrict_pathhash_len(hash_or_val,  # path hash or integer freq
-			    n)            # restrict paths from what length?
-    if hash_or_val.kind_of? Integer
+                            n)            # restrict paths from what length?
+    if hash_or_val.is_a? Integer
       return
     end
@@ -513,7 +513,7 @@ class GfiGfPathMapping
       hash_or_val.keys.each { |k| hash_or_val.delete(k) }
     else
       hash_or_val.each_value { |next_hash|
-	restrict_pathhash_len(next_hash, n-1)
+        restrict_pathhash_len(next_hash, n-1)
       }
     end
   end
@@ -525,9 +525,9 @@ class GfiGfPathMapping
   ###
   # given a lemma,
   # look in its list of all GFs that we have ever found for that lemma
-  #
+  #
   # returns: array of pairs [gf label, point in gf_to_edgelabel hash]
-  #   all the labels of GFs of this word,
+  #   all the labels of GFs of this word,
   #   and for each GF, the matching GF-to-path hash
   def potential_gfs_of_lemma(lemma, pos)
@@ -566,7 +566,7 @@ class GfiSubcatFrames
   def initialize(include_sem) # boolean
     # hash: word(string) -> array:[frame(string), subcatframe]
     #  with subcatframe an array of tuples [gf, prep, fe, multiplicity]
-    @word_to_subcatframes = Hash.new
+    @word_to_subcatframes = {}
     # hash: <subcatframe encoded as string> -> frequency
     @subcat_to_freq = Hash.new(0)
@@ -591,9 +591,9 @@ class GfiSubcatFrames
     unless @include_sem
       frame = nil
     end
     unless @word_to_subcatframes[lemmapos]
-      @word_to_subcatframes[lemmapos] = Array.new
+      @word_to_subcatframes[lemmapos] = []
     end
     # reencode subcat frame:
@@ -601,27 +601,27 @@ class GfiSubcatFrames
     #
     # multiplicity is either "one" or "many", depending on
     # the number of times the same gf/prep pair occurred.
-    # If the same gf/prep pair occurred with different FEs, they
+    # If the same gf/prep pair occurred with different FEs, they
     # will be concatenated into a space-separated string
     # with a single subcat entry
     count_gfprep = Hash.new(0)
-    gfprep_to_fe = Hash.new
+    gfprep_to_fe = {}
     scf.each { |gf, prep, fe|
       count_gfprep[[gf, prep]] += 1
-	unless gfprep_to_fe[[gf, prep]]
-	  gfprep_to_fe[[gf, prep]] = Array.new
-	end
-	unless gfprep_to_fe[[gf, prep]].include?(fe)
-	  gfprep_to_fe[[gf, prep]] << fe
-	end
+        unless gfprep_to_fe[[gf, prep]]
+          gfprep_to_fe[[gf, prep]] = []
+        end
+        unless gfprep_to_fe[[gf, prep]].include?(fe)
+          gfprep_to_fe[[gf, prep]] << fe
+        end
     }
     subcatframe = count_gfprep.to_a.map { |gfprep, count|
       gf, prep = gfprep
       if @include_sem
-	fe = gfprep_to_fe[[gf, prep]].join(" ")
+        fe = gfprep_to_fe[[gf, prep]].join(" ")
       else
-	fe = nil
+        fe = nil
       end
       if count == 1
         [gf, prep, fe, "one"]
@@ -632,7 +632,7 @@ class GfiSubcatFrames
       if a[0] != b[0]
         # compare GF
         a[0] <=> b[0]
-      else
+      else
         # compare prep
         a[1].to_s <=> b[1].to_s
       end
@@ -652,13 +652,13 @@ class GfiSubcatFrames
   #########################################
   ###
-  def test_output()
+  def test_output
     puts "WORD_TO_SUBCATFRAMES"
     @word_to_subcatframes.each_pair { |word, frames_and_mappings|
       puts word
       frames_and_mappings.each { |frame, subcatframe|
         puts "\t#{frame} "+ subcatframe.to_a.map { |gf, prep, fe, freq| "[#{gf}]:#{prep}:#{fe}:#{freq}" }.join(" ")
-	puts "\t\tfreq #{@subcat_to_freq[string_subcatframe(subcatframe)]}"
+        puts "\t\tfreq #{@subcat_to_freq[string_subcatframe(subcatframe)]}"
       }
     }
     puts
@@ -686,11 +686,11 @@ class GfiSubcatFrames
   # hash: SynNode -> tuple [GF(string), preposition(string), frequency(integer)]
   #
   # strict: boolean. If true, return only those subcat frames that exactly match
-  #   all GFs listed in node_to_gf. If false, also return subcat frames that
+  #   all GFs listed in node_to_gf. If false, also return subcat frames that
   #   match a subset of the GFs mentioned in node_to_gf.
-  #
-  # returns: list of tuples [frame(string), subcat frame, frequency(integer)],
-  # where a subcat frame is an array of tuples
+  #
+  # returns: list of tuples [frame(string), subcat frame, frequency(integer)],
+  # where a subcat frame is an array of tuples
   # [gf (string), prep(string or nil), fe(string), synnodes(array:SynNode)]
   #    and the syn_nodes are sorted by confidence, best first
   def match(start_node, # SynNode
@@ -703,10 +703,10 @@ class GfiSubcatFrames
       return []
     end
-#     $stderr.puts "HIER4 GFs found: " + node_to_gf.values.map { |gf, prep, freq|
-#       "#{gf}:#{prep}"
+#     $stderr.puts "HIER4 GFs found: " + node_to_gf.values.map { |gf, prep, freq|
+#       "#{gf}:#{prep}"
 #     }.join(" ")
-#     $stderr.puts "HIER5 GF possible: (#{@word_to_subcatframes[string_lemmapos(lemma, pos)].length()})"
+#     $stderr.puts "HIER5 GF possible: (#{@word_to_subcatframes[string_lemmapos(lemma, pos)].length})"
 #     @word_to_subcatframes[string_lemmapos(lemma, pos)].each { |frame, scf|
 #       scf.each { |gf, prep, fe, mult|
 #         $stderr.print "#{gf}:#{prep} "
@@ -714,12 +714,12 @@ class GfiSubcatFrames
 #       $stderr.puts
 #     }
-    # word_to_subcatframes:
+    # word_to_subcatframes:
     # hash: lemma(string) -> array:[frame(string), subcatframe]
     #  with subcatframe: array of tuples [gf, prep, fe, multiplicity]
     scf_list = @word_to_subcatframes[string_lemmapos(lemma, pos)].map { |frame, subcatframe|
       [
-        frame,
+        frame,
         # returns: array of tuples [gf, prep, fe, syn_nodes]
         match_subcat(subcatframe, node_to_gf, strict),
         @subcat_to_freq[string_subcatframe(subcatframe)]
@@ -730,7 +730,7 @@ class GfiSubcatFrames
     # muiltiplicity "one", and the "many" has only been filled by one
     #
     # so sort by frequency, then discard duplicates using a "seen" hash
-    seen = Hash.new
+    seen = {}
     return scf_list.sort { |a, b| b.last <=> a.last }.select { |frame, subcatframe, frequency|
       sc_string = string_subcatframe_withnodes(subcatframe)
       if seen[sc_string]
@@ -745,7 +745,7 @@ class GfiSubcatFrames
   ###
   # given a subcat frame and a hash mapping each node to a gf/prep pair,
   # check whether the node/gf mapping matches the subcat frame.
-  # Match:
+  # Match:
   # * for each node/gf mapping, the GF/prep occurs in the subcat frame
   #   (But if there are many nodes for the same GF/prep and
   #    multiplicity is "one", nodes may be discarded.)
@@ -756,7 +756,7 @@ class GfiSubcatFrames
   # node_to_gf:
   #   hash: SynNode -> tuple [GF(string), preposition(string), frequency(integer)]
   #
-  # returns:
+  # returns:
   #  nil on mismatch.
   #  match: copy of the subcat frame, each entry minus multiplicity but plus matching syn nodes
   def match_subcat(subcatframe,  # array of tuples as described above
@@ -764,11 +764,11 @@ class GfiSubcatFrames
                    strict)       # boolean: strict match, or subseteq match?
     # each node of the node -> gf hash:
-    # check whether the GF of the node->gf mapping
+    # check whether the GF of the node->gf mapping
     # occurs in the subcat frame
     # if it does, remember it in entry_to_nodes
     # if it does not, regard the match as failed
-    entry_to_nodes = Hash.new
+    entry_to_nodes = {}
     node_to_gf.each_key {|node|
       gf, prep, frequency = node_to_gf[node]
@@ -779,7 +779,7 @@ class GfiSubcatFrames
         if other_gf == gf and other_prep == prep
           # match
           unless entry_to_nodes[[gf, prep]]
-            entry_to_nodes[[gf, prep]] = Array.new
+            entry_to_nodes[[gf, prep]] = []
           end
           entry_to_nodes[[gf, prep]] << node
           match_found = true
@@ -795,23 +795,23 @@ class GfiSubcatFrames
     subcatframe.each { |gf, prep, fe, multiplicity|
       # opposite direction:
       # see if all slots of the subcat frame have been matched against at least one SynNode,
       # otherwise discard
       unless entry_to_nodes[[gf, prep]]
         return nil
       end
       # only one node to be returned for this slot:
       # use the one with the highest frequency for its gf->path mapping
-      if multiplicity == "one" and entry_to_nodes[[gf, prep]].length() > 1
-	# sort nodes by the frequency
-	# entries in node_to_gf,
-	# then keep only the <multiplicity> first ones
-	entry_to_nodes[[gf, prep]] = entry_to_nodes[[gf, prep]].sort { |node1, node2|
-	  node_to_gf[node2].last <=> node_to_gf[node1].last
-	}.slice(0, 1)
+      if multiplicity == "one" and entry_to_nodes[[gf, prep]].length > 1
+        # sort nodes by the frequency
+        # entries in node_to_gf,
+        # then keep only the <multiplicity> first ones
+        entry_to_nodes[[gf, prep]] = entry_to_nodes[[gf, prep]].sort { |node1, node2|
+          node_to_gf[node2].last <=> node_to_gf[node1].last
+        }.slice(0, 1)
       end
     }
@@ -819,7 +819,7 @@ class GfiSubcatFrames
     return subcatframe.map { |gf, prep, fe, multiplicity|
       # sort "many" nodes by the frequency of their gf->path mapping
       [
-        gf, prep, fe,
+        gf, prep, fe,
         entry_to_nodes[[gf, prep]].sort { |node1, node2|
           node_to_gf[node2].last <=> node_to_gf[node1].last
         }
@@ -850,7 +850,7 @@ class GfiSubcatFrames
     return subcatframe.map { |gf, prep, fes, count| "#{gf} #{prep} #{count}" }.sort.join(", ")
   end
   # subcatframe to string
   #
   # here: we have a list of SynNodes instead of the multiplicity
@@ -879,7 +879,7 @@ class GfInduce
   # include_sem: if true, keep frame name and FE name
   # as part of the subcat frame. if false, don't keep them
   def initialize(interpreter_class, # SynInterpreter class
-		 include_sem = false)# boolean
+                 include_sem = false)# boolean
     @interpreter = interpreter_class
     @gf_path_map = GfiGfPathMapping.new(interpreter_class)
@@ -901,7 +901,7 @@ class GfInduce
     end
     file.puts Marshal.dump(self)
-    file.close()
+    file.close
   end
   ###
@@ -917,7 +917,7 @@ class GfInduce
     end
     gfi_obj =  Marshal.load(file)
-    file.close()
+    file.close
     return gfi_obj
   end
@@ -927,7 +927,7 @@ class GfInduce
   ###
   # induce path -> gf mapping from the given SalsaTigerSentence object
-  #
+  #
   # Assumption: sent contains semantic annotation: FrameNet frames
   # and the FEs of the frames have information on grammatical function (gf)
   # and phrase type (pt) of the phrase that the FE points to
@@ -938,20 +938,20 @@ class GfInduce
     # induce GFs from each frame of the sentence
     sent.each_frame { |frame|
       unless frame.target
-        # frame without a target:
+        # frame without a target:
         # nothing I can do
         next
       end
       # main target node, lemma
-      maintarget, targetlemma, targetpos = mainnode_and_lemma(frame.target.children())
+      maintarget, targetlemma, targetpos = mainnode_and_lemma(frame.target.children)
       if not(maintarget) or not(targetlemma)
         # cannot count this one
         next
       end
       # array of tuples [gfpt, prep, fe]
-      subcatframe = Array.new
+      subcatframe = []
       # each FE (but not the target itself):
       frame.each_child { |fe|
@@ -975,21 +975,21 @@ class GfInduce
           # store the mapping
           @gf_path_map.store_mapping(gfpt, path, syn_node, targetlemma, targetpos)
           # preposition?
           prep = @interpreter.preposition(syn_node)
           if prep
             prep.downcase!
           end
-          # remember combination gfpt/prep/fe
+          # remember combination gfpt/prep/fe
           # as part of the subcat frame
-          subcatframe << [gfpt, prep, fe.name()]
+          subcatframe << [gfpt, prep, fe.name]
         } # each syn node that the FE points to
       } # each FE of the frame
       # store the subcat frame
-      @subcat_frames.store_subcatframe(subcatframe, frame.name(), targetlemma, targetpos)
+      @subcat_frames.store_subcatframe(subcatframe, frame.name, targetlemma, targetpos)
     } # each frame
   end
@@ -997,8 +997,8 @@ class GfInduce
   # finish up inducing:
   #  reencode information in a fashion
   #  that makes apply() faster
-  def compute_mapping()
-    @gf_path_map.finish_inducing()
+  def compute_mapping
+    @gf_path_map.finish_inducing
   end
   #########################################
@@ -1006,21 +1006,21 @@ class GfInduce
   #########################################
   ###
-  def test_output()
-    @gf_path_map.test_output()
-    @subcat_frames.test_output()
+  def test_output
+    @gf_path_map.test_output
+    @subcat_frames.test_output
   end
   #########################################
-  # Restricting induced mappings
+  # Restricting induced mappings
   # to achieve better mappings
   #########################################
   ####
   # restrict gf -> path mappings:
   # exclude all paths that include an Up edge
-  def restrict_to_downpaths()
-    @gf_path_map.restrict_to_downpaths()
+  def restrict_to_downpaths
+    @gf_path_map.restrict_to_downpaths
   end
   ####
@@ -1046,18 +1046,18 @@ class GfInduce
   ###
   # given a list of nodes (idea: they form a MWE together;
   #  may of course be a single node),
-  # determine all subcat frames, i.e. all consistent sets of grammatical functions,
+  # determine all subcat frames, i.e. all consistent sets of grammatical functions,
   # for the main node among the nodelist.
   # For each subcat frame, potential FN frames and FE labels
   # are returned as well
   #
   # strict: boolean. If true, return only those subcat frames that exactly match
-  #   all GFs listed in node_to_gf. If false, also return subcat frames that
+  #   all GFs listed in node_to_gf. If false, also return subcat frames that
   #   match a subset of the GFs mentioned in node_to_gf.
-  #
   #
-  # returns: list of tuples [frame(string), subcat frame, frequency(integer)],
-  # where a subcat frame is an array of tuples
+  #
+  # returns: list of tuples [frame(string), subcat frame, frequency(integer)],
+  # where a subcat frame is an array of tuples
   # [gf (string), prep(string or nil), fe(string), synnodes(array:SynNode)]
   def apply(nodelist, # array:SynNode
             strict = false) # match: strict or subseteq?
@@ -1082,7 +1082,7 @@ class GfInduce
     return @subcat_frames.match(mainnode, lemma, pos, node_to_gf, strict)
   end
   #########################################
   #########################################
@@ -1108,8 +1108,10 @@ class GfInduce
     # verb? then add the voice to the POS
     if (voice = @interpreter.voice(mainnode))
       pos = pos + "-" + voice
-    end
+    end
     return [mainnode, lemma, pos]
   end
 end
+end
+end