RubyGems - shalmaneser - Versions diffs - 0.0.1.alpha → 1.2.0.rc1 - Mend

shalmaneser 0.0.1.alpha → 1.2.0.rc1

Files changed (76) hide show

checksums.yaml +7 -0
data/.yardopts +2 -2
data/CHANGELOG.md +4 -0
data/LICENSE.md +4 -0
data/README.md +49 -0
data/bin/fred +18 -0
data/bin/frprep +34 -0
data/bin/rosy +17 -0
data/lib/common/AbstractSynInterface.rb +35 -33
data/lib/common/Mallet.rb +236 -0
data/lib/common/Maxent.rb +26 -12
data/lib/common/Parser.rb +5 -5
data/lib/common/SynInterfaces.rb +13 -6
data/lib/common/TabFormat.rb +7 -6
data/lib/common/Tiger.rb +4 -4
data/lib/common/Timbl.rb +144 -0
data/lib/common/{FrprepHelper.rb → frprep_helper.rb} +14 -8
data/lib/common/headz.rb +1 -1
data/lib/common/ruby_class_extensions.rb +3 -3
data/lib/fred/FredBOWContext.rb +14 -2
data/lib/fred/FredDetermineTargets.rb +4 -9
data/lib/fred/FredEval.rb +1 -1
data/lib/fred/FredFeatureExtractors.rb +4 -3
data/lib/fred/FredFeaturize.rb +1 -1
data/lib/frprep/CollinsInterface.rb +6 -6
data/lib/frprep/MiniparInterface.rb +5 -5
data/lib/frprep/SleepyInterface.rb +7 -7
data/lib/frprep/TntInterface.rb +1 -1
data/lib/frprep/TreetaggerInterface.rb +29 -5
data/lib/frprep/do_parses.rb +1 -0
data/lib/frprep/frprep.rb +36 -32
data/lib/{common/BerkeleyInterface.rb → frprep/interfaces/berkeley_interface.rb} +69 -95
data/lib/frprep/interfaces/stanford_interface.rb +353 -0
data/lib/frprep/interpreters/berkeley_interpreter.rb +22 -0
data/lib/frprep/interpreters/stanford_interpreter.rb +22 -0
data/lib/frprep/opt_parser.rb +2 -2
data/lib/rosy/AbstractFeatureAndExternal.rb +5 -3
data/lib/rosy/RosyIterator.rb +11 -10
data/lib/rosy/rosy.rb +1 -0
data/lib/shalmaneser/version.rb +1 -1
data/test/functional/sample_experiment_files/fred_test.salsa.erb +1 -1
data/test/functional/sample_experiment_files/fred_train.salsa.erb +1 -1
data/test/functional/sample_experiment_files/prp_test.salsa.erb +2 -2
data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +2 -2
data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +2 -2
data/test/functional/sample_experiment_files/prp_train.salsa.erb +2 -2
data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +2 -2
data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +2 -2
data/test/functional/sample_experiment_files/rosy_test.salsa.erb +1 -1
data/test/functional/sample_experiment_files/rosy_train.salsa.erb +7 -7
data/test/functional/test_frprep.rb +3 -3
data/test/functional/test_rosy.rb +20 -0
metadata +215 -224
data/CHANGELOG.rdoc +0 -0
data/LICENSE.rdoc +0 -0
data/README.rdoc +0 -0
data/lib/common/CollinsInterface.rb +0 -1165
data/lib/common/MiniparInterface.rb +0 -1388
data/lib/common/SleepyInterface.rb +0 -384
data/lib/common/TntInterface.rb +0 -44
data/lib/common/TreetaggerInterface.rb +0 -303
data/lib/frprep/AbstractSynInterface.rb +0 -1227
data/lib/frprep/BerkeleyInterface.rb +0 -375
data/lib/frprep/ConfigData.rb +0 -694
data/lib/frprep/FixSynSemMapping.rb +0 -196
data/lib/frprep/FrPrepConfigData.rb +0 -66
data/lib/frprep/FrprepHelper.rb +0 -1324
data/lib/frprep/ISO-8859-1.rb +0 -24
data/lib/frprep/Parser.rb +0 -213
data/lib/frprep/SalsaTigerRegXML.rb +0 -2347
data/lib/frprep/SalsaTigerXMLHelper.rb +0 -99
data/lib/frprep/SynInterfaces.rb +0 -275
data/lib/frprep/TabFormat.rb +0 -720
data/lib/frprep/Tiger.rb +0 -1448
data/lib/frprep/Tree.rb +0 -61
data/lib/frprep/headz.rb +0 -338

data/lib/frprep/BerkeleyInterface.rb DELETED Viewed

@@ -1,375 +0,0 @@
-# -*- coding: utf-8 -*-
-####
-# sp 21 07 05
-#
-# modified ke 30 10 05: adapted to fit into SynInterface
-#
-# represents a file containing Berkeley parses
-#
-# underlying data structure for individual sentences: SalsaTigerSentence
-require "tempfile"
-require "frprep/SalsaTigerRegXML"
-require "frprep/SalsaTigerXMLHelper"
-require "frprep/TabFormat"
-require "frprep/Counter"
-require "frprep/AbstractSynInterface"
-require "frprep/Tiger.rb"
-################################################
-# Interface class
-class BerkeleyInterface < SynInterfaceSTXML
-  $stderr.puts 'Announcing Berkeley Interface' if $DEBUG
-  BerkeleyInterface.announce_me()
-  ###
-  def BerkeleyInterface.system()
-    return "berkeley"
-  end
-  ###
-  def BerkeleyInterface.service()
-    return "parser"
-  end
-  ###
-  # initialize to set values for all subsequent processing
-  def initialize(program_path, # string: path to system
-		 insuffix,      # string: suffix of tab files
-		 outsuffix,     # string: suffix for parsed files
-		 stsuffix,      # string: suffix for Salsa/TIGER XML files
-		 var_hash = {}) # optional arguments in a hash
-    super(program_path, insuffix, outsuffix, stsuffix, var_hash)
-    unless @program_path =~ /\/$/
-      @program_path = @program_path + "/"
-    end
-    # new: evaluate var hash
-    @pos_suffix = var_hash["pos_suffix"]
-    @lemma_suffix = var_hash["lemma_suffix"]
-    @tab_dir = var_hash["tab_dir"]
-  end
-  ####
-  # parse a directory with TabFormat files and write the parse trees to outputdir
-  # I assume that the files in inputdir are smaller than
-  # the maximum number of sentences that
-  # Berkeley can parse in one go (i.e. that they are split)
-  def process_dir(in_dir,  # string: input directory name
-		  out_dir) # string: output directory name
-# not using x64 arch, adjusting for 32 bit
-#    berkeley_prog = "java -d64 -Xmx10000m -jar #{@program_path}berkeley-parser.jar -gr #{@program_path}gerNegra.01.utf8 "
-    berkeley_prog = "java -Xmx2000m -jar #{@program_path}berkeleyParser.jar -gr #{@program_path}ger_sm5.gr"
-    berkeley_prog = "java -jar #{@program_path}berkeley-parser.jar -gr #{@program_path}gerNegra.01.utf8 "
-    Dir[in_dir + "*" + @insuffix].each {|inputfilename|
-      STDERR.puts "*** Parsing #{inputfilename} with Berkeley"
-      corpusfilename = File.basename(inputfilename, @insuffix)
-      parsefilename = out_dir + corpusfilename + @outsuffix
-      tempfile = Tempfile.new(corpusfilename)
-      # we need neither lemmata nor POS tags; berkeley can do with the words
-      corpusfile = FNTabFormatFile.new(inputfilename,nil, nil)
-      corpusfile.each_sentence {|sentence|
-        #puts sentence.to_s
-        tempfile.puts sentence.to_s
-      }
-      tempfile.close
-      # parse and remove comments in the parser output
-      STDERR.puts "#{berkeley_prog} < #{tempfile.path} > #{parsefilename}"
-      # AB: for testing we leave this step out, it takes too much time.
-      # Please keep the <parsefile> intact!!!
-#      Kernel.system("#{berkeley_prog} < #{tempfile.path} > #{parsefilename}")
-      FileUtils.cp tempfile.path, '/home/arbox/input.txt'
-    }
-  end
-  ###
-  # for a given parsed file:
-  # yield each sentence as a pair
-  #  [SalsaTigerSentence object, FNTabFormatSentence object]
-  # of the sentence in SalsaTigerXML and the matching tab format sentence
-  #
-  # If a parse has failed, returns
-  #  [failed_sentence (flat SalsaTigerSentence), FNTabFormatSentence]
-  # to allow more detailed accounting for failed parses
-  # (basically just a flat structure with a failed=true attribute
-  # at the sentence node)
-  def each_sentence(parsefilename)
-    # sanity checks
-    unless @tab_dir
-      raise "Need to set tab directory on initialization"
-    end
-    # get matching tab file for this parser output file
-    parsefile = File.new(parsefilename)
-    tabfilename = @tab_dir+File.basename(parsefilename, @outsuffix)+ @insuffix
-    tabfile = FNTabFormatFile.new(tabfilename, @postag_suffix, @lemma_suffix)
-    sentid = 0
-    tabfile.each_sentence {|tab_sent| # iterate over corpus sentences
-      sentence_str = ""
-      status = true # error encountered?
-      # assemble next sentence in Berkeley file by reading lines from parsefile
-      # for berkeley:
-      while true
-        line = parsefile.gets
-        # search for the next "relevant" file or end of the file
-	if line.nil? or line=~/^\( *\(TOP/ or line=~/^\(\(\)/
-          break
-	end
-        sentid +=1
-      end
-      if line.nil? # while we search a parse, the parse file is over...
-        raise "Error: premature end of parser file!"
-      end
-      # berkeley parser output: remove brackets /(.*)/
-      line.sub!(/^\( */, '')
-      line.sub!(/ *\) *$/, '')
-      line.gsub!(/\)\)/, ') )')
-      line.gsub!(/\)\)/, ') )')
-      line.gsub!(/(\([A-Z]+)_/, '\1-')
-      sentence_str = line.chomp!
-      # if we are here, we have a sentence_str to work on
-      # hopefully, our status is OK
-      case status
-      when true
-        if tab_sent.get_sent_id() and tab_sent.get_sent_id() != "--"
-          my_sent_id = tab_sent.get_sent_id()
-        else
-          my_sent_id = File.basename(parsefilename, @outsuffix) + "_" + sentid.to_s
-        end
-        st_sent = build_salsatiger(" " + sentence_str + " ", 0,
-                                   Array.new, Counter.new(0),
-                                   Counter.new(500),
-                                   SalsaTigerSentence.empty_sentence(my_sent_id.to_s))
-	if st_sent.nil?
-	  next
-	end
-        yield [st_sent, tab_sent, BerkeleyInterface.standard_mapping(st_sent, tab_sent)]
-      else # i.e. when "failed"
-        #raise "Hunh? This is a failed parse, but still we have a parse tree? Look again."
-      end
-    }
-      # we don't have a sentence: hopefully, this is becase parsing has failed
-    # all TabFile sentences are consumed:
-    # now we may just encounter comments, garbage, empty lines etc.
-    while not parsefile.eof?
-      case parsefile.gets
-      when nil, /^%/, /^\s*$/ # empty lines, comments, end of input indicate end of current parse
-      else
-        raise "Error: premature end of tab file!"
-      end
-    end
-  end
-  ###
-  # write Salsa/TIGER XML output to file
-  def to_stxml_file(infilename,  # string: name of parse file
-		    outfilename) # string: name of output stxml file
-    outfile = File.new(outfilename, "w")
-    outfile.puts SalsaTigerXMLHelper.get_header()
-    each_sentence(infilename) { |st_sent, tabsent|
-      outfile.puts st_sent.get()
-    }
-    outfile.puts SalsaTigerXMLHelper.get_footer()
-    outfile.close()
-  end
-  ########################
-  private
-  ###
-  # Recursive function for parsing a Berkeley parse tree and
-  # building a SalsaTigerSentence recursively
-  #
-  # Algorithm: manage stack which contains, for the current constituent,
-  # child constituents (if a nonterminal), and the category label.
-  # When the end of a constituent is reached, a new SynNode (TigerSalsa node) ist created.
-  # All children and the category label are popped from the stack and integrated into the
-  # TigerSalsa data structure. The new node is re-pushed onto the stack.
-  def build_salsatiger(sentence, # string
-                    pos,      # position in string (index): integer
-                    stack,    # stack with incomplete nodes: Array
-                    termc,    # terminal counter
-                    nontc,    # nonterminal counter
-                    sent_obj) # SalsaTigerSentence
-    if sentence =~ /\(\)/
-      return nil
-    end
-   # main case distinction: match the beginning of our string
-   # (i.e. what follows our current position in the string)
-    case sentence[pos..-1]
-    when /^ *$/ # nothing -> whole sentence parsed
-      if stack.length == 1
-	# sleepy always delivers one "top" node; if we don't get just one
-        # node, something has gone wrong
-        node = stack.pop
-        node.del_attribute("gf")
-        return sent_obj
-      else
-        raise "Error: more than one root node (stack length #{stack.length}). Full sentence: \n#{sentence}"
-      end
-    when /^\s*\(([^ )]+) /
-      # match the beginning of a new constituent
-      # (opening bracket + category + space, may not contain closing bracket)
-      cat = $1
-      if cat.nil? or cat == ""
-        raise "Error: found category nil in sentence #{sentence[pos,10]}, full sentence\n#{sentence}"
-      end
-#          STDERR.puts "new const #{cat}"
-      stack.push cat # throw the category label on the stack
-      return build_salsatiger(sentence,pos+$&.length,stack,termc,nontc,sent_obj)
-    when /^\s*(\S+)\) /
-      # match the end of a terminal constituent (something before a closing bracket + space)
-      word = $1
-      comb_cat = stack.pop
-      if comb_cat.to_s == ""
-        raise "Empty cat at position #{sentence[pos,10]}, full sentence\n#{sentence}"
-      end
-      cat,gf = split_cat(comb_cat)
-      node = sent_obj.add_syn("t",
-                              nil,  # cat (doesn't matter here)
-                              SalsaTigerXMLHelper.escape(word), # word
-                              cat,  # pos
-                              termc.next.to_s)
-      node.set_attribute("gf",gf)
-#          STDERR.puts "completed terminal #{cat}, #{word}"
-      stack.push node
-      return build_salsatiger(sentence,pos+$&.length,stack,termc,nontc,sent_obj)
-    when /^\s*\)/ # match the end of a nonterminal (nothing before a closing bracket)
-      # now collect children:
-      # pop items from the stack until you find the category
-      children = Array.new
-      while true
-        if stack.empty?
-          raise "Error: stack empty; cannot find more children"
-        end
-        item = stack.pop
-        case item.class.to_s
-        when "SynNode" # this is a child
-          children.push item
-        when "String" # this is the category label
-          if item.to_s == ""
-            raise "Empty cat at position #{sentence[pos,10]}, full sentence\n#{sentence}"
-          end
-          cat,gf = split_cat(item)
-          break
-        else
-          raise "Error: unknown item class #{item.class.to_s}"
-        end
-      end
-      # now add a nonterminal node to the sentence object and
-      # register the children nodes
-      node = sent_obj.add_syn("nt",
-                              cat, # cat
-                              nil, # word (doesn't matter)
-                              nil, # pos (doesn't matter)
-                              nontc.next.to_s)
-      children.each {|child|
-        child_gf = child.get_attribute("gf")
-        child.del_attribute("gf")
-        node.add_child(child,child_gf)
-        child.add_parent(node, child_gf)
-       }
-      node.set_attribute("gf",gf)
-#          STDERR.puts "Completed nonterm #{cat}, #{children.length} children."
-      stack.push node
-      return build_salsatiger(sentence,pos+$&.length, stack,termc,nontc,sent_obj)
-    else
-      raise "Error: cannot analyse sentence at pos #{pos}: #{sentence[pos..-1]}. Complete sentence: \n#{sentence}"
-    end
-  end
-  ###
-  # Berkeley delivers node labels as "phrase type"-"grammatical function"
-  # but the GF may not be present.
-  def split_cat(cat)
-    cat =~ /^([^-]*)(-([^-]*))?$/
-    unless $1
-      raise "Error: could not identify category in #{cat}"
-    end
-    proper_cat = $1
-    if $3
-      gf = $3
-    else
-      gf = ""
-    end
-    return [proper_cat,gf]
-  end
-end
-################################################
-# Interpreter class
-class BerkeleyInterpreter < Tiger
-  BerkeleyInterpreter.announce_me()
-  ###
-  # names of the systems interpreted by this class:
-  # returns a hash service(string) -> system name (string),
-  # e.g.
-  # { "parser" => "collins", "lemmatizer" => "treetagger" }
-  def BerkeleyInterpreter.systems()
-    return {
-	"parser" => "berkeley"
-    }
-  end
-  ###
-  # names of additional systems that may be interpreted by this class
-  # returns a hash service(string) -> system name(string)
-  # same as names()
-  def BerkeleyInterpreter.optional_systems()
-    return {
-      "lemmatizer" => "treetagger"
-    }
-  end
-end

data/lib/frprep/ConfigData.rb DELETED Viewed

@@ -1,694 +0,0 @@
-# class ConfigData:
-#
-# reads config data file,
-# matches it against feature declarations given in its new() method,
-# offers access methods for different kinds of features
-#
-# In the config file, all feature specifications have the form
-#
-#       feature_name = feature_value
-#
-# where feature_name is a string without spaces. feature_value
-# may include spaces, depending on the feature type (see below).
-#
-# To include a comment in a config file, start the comment line with
-# '#'.
-#
-# Features are typed. The following types are supported:
-#
-# - normal types:
-#   "bool", "float", "integer", "string"
-#   For the get() function with which features in the ConfigData object
-#   are accessed, the values are transformed from the strings in the
-#   config file to the appropriate class: Boolean, Float, Integer, String
-#
-# - other types:
-#   pattern:  This is a feature that may include variables in
-#             <> brackets. When this feature is accesssed,
-#             values for these variables are given, i.e. this
-#             pattern has to be instantiated.
-#             For example, given a feature
-#
-#               fileformat = features.<type>.train
-#
-#             and method call
-#               instantiate("fileformat", "type" => "path")
-#
-#             what is returned is a string "features.path.train"
-#
-#             Variables used in a pattern have to be declared to
-#             the new() method.
-#
-#   list:    This is the only feature type where more than one
-#            feature specification with the same feature_name is allowed.
-#            The right-hand sides of a list feature are stored in an array.
-#
-#            Given a 'list' feature 'bla', if the config file contains
-#
-#                bla = blupp 1 2
-#                bla = la di da
-#
-#            the list feature 'bla' is represented as follows:
-#            @features['bla'] = [['blupp', 1,2], ['la', 'di', 'da']]
-#
-#            For comfortable access to a list feature, arbitrary
-#            access functions for list features can be defined.
-#
-#
-require 'frprep/ruby_class_extensions'
-#####################################################
-####################################################
-# ConfigData is the main class in this package.
-# It manages config files.
-#
-# To use it, inherit from it and just make a new new() method
-# that only takes as input the name of the config file
-# and that declares all the feature types and variable names
-# needed for the given application.
-class ConfigData
-  ###########
-  # new()
-  #
-  # reads the config file
-  #
-  # Input parameters: the name of the config file, a hash declaring all
-  # features by mapping feature names to their types,
-  # and an array of all variables that may occur in pattern type features
-  #
-  def initialize(filename, # string: name of config file
-		 feature_types, # hash: feature_name => feature_type
-		 variables) # array of strings: list of variables used in pattern features
-    @test_print = false
-    @variables = variables
-    @original_filename = filename
-    ##
-    # open config file
-    begin
-      file = File.new(filename)
-    rescue
-      $stderr.puts "Error: I could not open the experiment file " + filename
-      exit 1
-    end
-    # feature_types: hash: feature_name => feature_type
-    # features: hash: feature_name => value
-    @feature_types = feature_types
-    @features = Hash.new
-    # @list_feature_access: hash feature_name => Proc
-    # access method for list features
-    @list_feature_access = Hash.new
-    # pre-initialize list features to an empty array
-    @feature_types.each_pair { |feature_name, feature_type|
-      if feature_type == "list"
-	@features[feature_name] = Array.new
-      end
-    }
-    ##
-    # examine the config file contents
-    while (line = file.gets())
-      line = line.chomp().strip()
-      if line =~ /^#/   # comment
-	next
-      end
-      if line.empty? # nothing to be seen here
-	next
-      end
-      feature_name, rhs = extract_def(line)
-      set_entry(feature_name, rhs)
-    end
-  end
-  #####
-  # set_entry
-  #
-  # set an entry in the experiment file, either an existing or a new one
-  # but it must conform to the feature types declared in the new() method
-  def set_entry(feature_name, rhs)
-    unless @feature_types[feature_name]
-      $stderr.puts "Error in experiment file:"
-      $stderr.puts "Unknown parameter #{feature_name} in #{@original_filename}."
-      $stderr.puts "Expected features for this type of experiment file:"
-      $stderr.puts @feature_types.keys().join(", ")
-      exit 1
-    end
-    case @feature_types[feature_name]
-    when "pattern"
-      # file format specification
-      @features[feature_name] = ConfigFormatElement.new(rhs, @variables)
-    when "list"
-      # rhs is a string of space-separated words
-      # the first of them is the key, the rest is the value, to be
-      # stored as an array of words
-      # split rhs into words
-      if rhs.empty?
-        $stderr.puts "WARNING: I got an empty value for list feature #{feature_name}."
-        $stderr.puts "I'll ignore it."
-      else
-        unless @features[feature_name].include? rhs.split()
-          @features[feature_name] << rhs.split()
-        end
-      end
-    when "bool"
-      # boolean value
-      unless ["true", "false"].include? rhs
-        $stderr.puts "Error in experiment file:"
-        $stderr.puts "Value for #{feature_name} must be either 'true' or 'false'."
-        $stderr.puts "I got: "+ rhs.to_s
-        exit 1
-      end
-      @features[feature_name] = (rhs == "true")
-    when "float"
-      # float value
-      @features[feature_name] = rhs.to_f
-    when "integer"
-      # integer value
-      @features[feature_name] = rhs.to_i
-    when "string"
-      # string value
-      @features[feature_name] = rhs
-    else
-      raise "Unknown feature type for feature #{feature_name}: #{@feature_types[feature_name]}"
-    end
-  end
-  ####
-  # remove list entry in this config data structure:
-  # the lhs argument is the list feature name
-  # the rhs argument can be a string or a regexp.
-  # - string: each entry exactly matching the string is removed
-  # - regexp: each entry matching the regexp is removed
-  def unset_list_entry(lhs, #string: feature name
-                       rhs) # string/regexp: righthand side
-    unless @feature_types[lhs] == "list"
-      $stderr.puts "Error in experiment file: "
-      $stderr.puts "Feature #{lhs} unknown or not of type list."
-      exit 1
-    end
-    case rhs.class.to_s
-    when "String"
-      rhs_match = Regexp.new("^" + Regexp.escape(rhs) + "$")
-    when "Regexp"
-      rhs_match = rhs
-    else
-      raise "Shouldn't be here: " + rhs.class.to_s
-    end
-    to_delete = @features[lhs].select { |entry| entry.join(" ") =~ rhs_match }
-    to_delete.each { |entry| @features[lhs].delete(entry) }
-  end
-  #####
-  # adjoin
-  #
-  # adds the information from a second ConfigData object
-  # to this one.
-  # Disjointness of feature names is assumed.
-  def adjoin(config_obj)  # ConfigData object
-    ##
-    # sanity checks:
-    # the other object must be a ConfigData object
-    unless config_obj.kind_of? ConfigData
-      raise "I can only adjoin another ConfigData object"
-    end
-    # if feature name sets are not disjoint,
-    # ignore the feature names that I already have
-    other_features, other_feature_types, other_list_feature_access = config_obj.get_contents()
-    unless (@feature_types.keys & other_feature_types.keys).empty?
-      other_features = other_features.clone()
-      other_feature_types = other_feature_types.clone()
-      other_list_feature_access = other_list_feature_access.clone()
-      (@feature_types.keys() & other_feature_types.keys()).each { |overlap_feature|
-        other_features.delete(overlap_feature)
-        other_feature_types.delete(overlap_feature)
-        other_list_feature_access.delete(overlap_feature)
-      }
-    end
-    # now adjoin the contents of the other config objects to mine
-    @features.update(other_features)
-    @feature_types.update(other_feature_types)
-    @list_feature_access.update(other_list_feature_access)
-  end
-  #####
-  # get()
-  #
-  # returns the value of a given feature
-  # raises an error if no feature of this name
-  # has been declared to the new() method
-  #
-  # returns: a feature value. the type of the return value
-  #    depends on the type of the feature.
-  #    returns nil if the feature has not been set in the config file.
-  def get(name) # string: name of the feature to access
-    if @feature_types[name].nil?
-      raise "Unknown feature " + name
-    end
-    # may return nil if something has not been set
-    return @features[name]
-  end
-  ####
-  # get_type
-  #
-  # returns the type of a given feature,
-  # or nil if it is undefined
-  def get_type(feature_name)
-    return @feature_types[feature_name]
-  end
-  #####
-  # is_defined
-  #
-  # returns: true if a feature by this name has been set in the config file,
-  #   false else
-  def is_defined(feature) # string: name of the feature
-    if @features[feature]
-      return true
-    else
-      return false
-    end
-  end
-  #####
-  # instantiate
-  #
-  # given a pattern type feature, and a hash
-  # mapping all variables occurring in the pattern to
-  # values, instantiate the pattern
-  #
-  # returns: string, the pattern with all variables
-  #  instantiated with their values
-  def instantiate(key,  # string: feature name
-		  var_hash={}) # hash: variable name(string) => value(string)
-    unless @feature_types[key] == "pattern"
-      raise "Nothing known about pattern " + key
-    end
-    unless @features[key]
-      raise "Please define pattern in configuration file: " + key
-    end
-    # piece together the file name
-    # expand in case it is a filename/directory
-    return @features[key].instantiate(var_hash)
-  end
-  #####
-  # get_filename:
-  #
-  # synonym for instantiate()
-  def get_filename(key, var_hash={})
-    return instantiate(key, var_hash)
-  end
-  #####
-  # set_test_print
-  #
-  # set test output to on (true) or off (false)
-  def set_test_print(tf) # boolean
-    unless [true, false].include? tf
-      raise "Shouldn't be here"
-    end
-    @test_print = tf
-  end
-  #####
-  # get_all_filenames
-  #
-  # given a directory, a pattern type feature,
-  # and a hash mapping some of the pattern's variables
-  # to values, return all filenames in the given directory
-  # that match the partially instantiated pattern
-  #
-  # returns: an array of pairs [filename(string), matches(hash)]
-  # where the matches hash maps all variables of the pattern to
-  # their values as instantiated in the given filename
-  # The filename doesn't include the directory.
-  def get_all_filenames(dir, #string: directory name
-			key, # string: name of pattern type feature
-			var_hash={}) # hash: variable name(string) => value(string)
-    unless @feature_types[key] == "pattern"
-      raise "Nothing known about file format " + key
-    end
-    # array of pairs [filename(string), matches(hash)]
-    filenames = Array.new
-    # iterate through all files of this directory
-    Dir.foreach(dir) { |filename|
-      # does the filename match the pattern of the feature "key"?
-      if (matches = @features[key].match(filename, var_hash))
-	# do the variable values for this filename conform
-	# to the variable values given in var_hash?
-	if @test_print
-	  $stderr.puts "got " + filename
-	end
-	if var_hash.keys.select { |var|
-	    matches[var] != var_hash[var]
-	  }.empty?
-	  filenames << [filename, matches]
-	else
-	  # mismatch for given variables
-	  if @test_print
-	    var_hash.keys.each { |var|
-	      if matches[var] != var_hash[var]
-		$stderr.puts "Mismatch for " + var + ": " +
-		  matches[var].to_s + " vs. " + var_hash[var]
-	      end
-	    }
-	  end
-	end
-      end
-    }
-    return filenames
-  end
-  #####
-  # set list feature access:
-  #
-  # for a given list type feature, set a method that should
-  # be used for accessing the feature.
-  #
-  # method signature: first parameter is an array of tuples of strings.
-  # for each experiment file entry
-  #   feature = rhs
-  # there will be a tuple rhs.split() in the list.
-  #
-  # The other parameters are not checked by ConfigData, there
-  # may be arbitrarily many
-  def set_list_feature_access(feature_name, # string: name of the feature
-			      proc) # proc: access method for list feature
-    unless @feature_types[feature_name] == 'list'
-      raise "Cannot set list feature access to non-list feature #{feature_name}"
-    end
-    @list_feature_access[feature_name] = proc
-  end
-  #####
-  # get_lf
-  #
-  # access a list type feature for which an access function
-  # has been set using set_list_feature_access
-  #
-  # returns: whatever the access function returns
-  def get_lf(feature_name, # string: name of list feature
-	     *parameters)  # parameters for access function, collapsed into an array here
-    unless @list_feature_access[feature_name]
-      raise "I have no list feature access method for #{feature_name}."
-    end
-    # call access function, re-exploding the collapsed parameters and
-    # adding the list of values for the list feature as first parameter
-    return @list_feature_access[feature_name].call(@features[feature_name], *parameters)
-  end
-  protected
-  #####
-  # extract_def
-  #
-  # given a line of the config file,
-  # it is assumed that it has the structure
-  #  [white space] string [white space] = [white space] stuff
-  #  'stuff' may include further white space, 'string' may not.
-  #
-  # returns: a pair of strings, the left-hand side and the right-hand side
-  #  of the =, minus the [white space] in the places shown above
-  def extract_def(line) # string: line from config file
-    unless line =~ /^\s*(\w+)\s*=\s*([^\s].*)$/
-      $stderr.puts "Error in experiment file: "
-      $stderr.puts "I couldn't analyze the following line: "
-      $stderr.puts line
-      exit 1
-    end
-    return [$1, $2]
-  end
-  ####
-  # access to the object variables
-  def get_contents()
-    return [@features, @feature_types, @list_feature_access]
-  end
-end
-##############################
-# ConfigFormatelement is an auxiliary class
-# of ConfigData.
-# It keeps track of feature patterns with variables in them
-# that can be instantiated.
-class ConfigFormatElement
-  # new()
-  #
-  # given a pattern and a list of variable names,
-  # analyze the pattern and remember the variable names
-  #
-  def initialize(string, # string: feature name, may include names of variables.
-		         # they are included in <>
-		 variables) # list of variable names that can occur
-    @variables = variables
-    # pattern: this is what the 'string' is split into,
-    # an array of elements that are either fixed parts or variables.
-    # fixed part: pair [item:string, "string"]
-    # variable: pair [variable_name:string, "variable"]
-    @pattern = Array.new
-    state = "out"
-    item = ""
-    # analyze string,
-    # split into variables and fixed parts
-    string.split(//).each { |char|
-      case state
-      when "in"
-	case char
-	when "<"
-	  raise "Duplicate < in " + string
-	when ">"
-	  unless @variables.include? item
-	    raise "Unknown variable " + item
-	  end
-	  @pattern << [item, "variable"]
-	  item = ""
-	  state = "out"
-	else
-	  item << char
-	  state = "in"
-	end
-      when "out"
-	case char
-	when "<"
-	  unless item.empty?
-	    @pattern << [item, "string"]
-	    item = ""
-	  end
-	  state = "in"
-	when ">"
-	  raise "Unexpected > in " + string
-	else
-	  item << char
-	  state = "out"
-	end
-      else
-	raise "Shouldn't be here"
-      end
-    }
-    # read through the whole of "string"
-    # end state has to be "out"
-    unless state == "out"
-      raise "Unclosed < in " + string
-    end
-    # last bit still to be recorded?
-    unless item.empty?
-      @pattern << [item, "string"]
-    end
-    # make regexp for matching this pattern
-    @regexp = make_regexp(@pattern)
-  end
-  # instantiate: given pairs of variable names and variable values,
-  # instantiate @pattern to a string in which var names are replaced
-  # by their values
-  #
-  # returns: string
-  def instantiate(var_hash) # hash variable name(string) => variable value(string)
-    # instantiate the pattern
-    return @pattern.map { |item, string_or_var|
-      case string_or_var
-      when "string"
-	item
-      when "variable"
-	if var_hash[item].nil?
-	  raise "Missing variable instantiation: " + item
-	end
-	var_hash[item]
-      else
-	raise "Shouldn't be here"
-      end
-    }.join
-  end
-  # match()
-  #
-  # given a string, try to match it against the @pattern
-  # while setting the variables given in 'fillers' to
-  # the values given in that hash.
-  #
-  # returns: if the string matches, a hash variable name => value
-  #   that includes the fillers given as a parameter as well as
-  #   values for all other variables mentioned in @pattern,
-  #   or false if no match.
-  def match(string,   # a string
-	    fillers = nil) # hash variable name(string) => value(string)
-    # have we been given partial info about variables?
-    if fillers
-      match = make_regexp(@pattern, fillers).match(string)
-#      $stderr.print "matching " + make_regexp(@pattern, fillers).source +
-#	" against " + string + " "
-#      if match.nil?
-#	$stderr.puts "no"
-#      else
-#	$stderr.puts "yes"
-#      end
-    else
-      match = @regexp.match(string)
-    end
-    if match.nil?
-      # no match via the regular expression
-      return false
-    end
-    # regular expression matched.
-    # construct return value in hash
-    # retv: variable name(string) => value(string)
-    retv = Hash.new()
-    if fillers
-      # include given fillers in retv hash
-      fillers.each_pair { |name, val| retv[name] = val }
-    end
-    # now put values for other variables in @pattern into retv
-    index = 1
-    @pattern.to_a.select { |item, string_or_var|
-      string_or_var == "variable"
-    }.select { |item, string_or_var|
-      fillers.nil? or
-	fillers[item].nil?
-    }.each { |item, string_or_var|
-      # for all items on the pattern list
-      # that are variables and
-      # haven't been filled by the "fillers" list already:
-      # fill from matches
-      if match[index].nil?
-	raise "Match, but not enough matched elements? Strange."
-      end
-      if retv[item].nil?
-	retv[item] = match[index]
-      else
-	unless retv[item] == match[index]
-	  return false
-	end
-      end
-      index += 1
-    }
-    return retv
-  end
-  # used_variables
-  #
-  # returns: an array of variable names used in @pattern
-  def used_variables()
-    return @pattern.select { |item, string_or_var|
-      string_or_var == "variable"
-    }.map { |item, string_or_var| item}
-  end
-  ####################
-  private
-  # make_regexp:
-  # make regular expression from a pattern
-  # together with some variable fillers
-  #
-  # returns: Regexp object
-  def make_regexp(pattern,  # array of pairs [string, "string"] or [string, "variable"]
-		  fillers = nil) # hash variable name(string) => value(string)
-    return (Regexp.new "^" +
-      pattern.map { |item, string_or_var|
-      case string_or_var
-      when "variable"
-	if fillers and
-	    fillers[item]
-	  Regexp.escape(fillers[item])
-	else
-	  "(.+)"
-	end
-      when "string"
-	Regexp.escape(item)
-      else
-	raise "Shouldn't be here"
-      end
-    }.join + "$")
-  end
-end