RubyGems - shalmaneser - Versions diffs - 0.0.1.alpha → 1.2.0.rc1 - Mend

shalmaneser 0.0.1.alpha → 1.2.0.rc1

Files changed (76) hide show

checksums.yaml +7 -0
data/.yardopts +2 -2
data/CHANGELOG.md +4 -0
data/LICENSE.md +4 -0
data/README.md +49 -0
data/bin/fred +18 -0
data/bin/frprep +34 -0
data/bin/rosy +17 -0
data/lib/common/AbstractSynInterface.rb +35 -33
data/lib/common/Mallet.rb +236 -0
data/lib/common/Maxent.rb +26 -12
data/lib/common/Parser.rb +5 -5
data/lib/common/SynInterfaces.rb +13 -6
data/lib/common/TabFormat.rb +7 -6
data/lib/common/Tiger.rb +4 -4
data/lib/common/Timbl.rb +144 -0
data/lib/common/{FrprepHelper.rb → frprep_helper.rb} +14 -8
data/lib/common/headz.rb +1 -1
data/lib/common/ruby_class_extensions.rb +3 -3
data/lib/fred/FredBOWContext.rb +14 -2
data/lib/fred/FredDetermineTargets.rb +4 -9
data/lib/fred/FredEval.rb +1 -1
data/lib/fred/FredFeatureExtractors.rb +4 -3
data/lib/fred/FredFeaturize.rb +1 -1
data/lib/frprep/CollinsInterface.rb +6 -6
data/lib/frprep/MiniparInterface.rb +5 -5
data/lib/frprep/SleepyInterface.rb +7 -7
data/lib/frprep/TntInterface.rb +1 -1
data/lib/frprep/TreetaggerInterface.rb +29 -5
data/lib/frprep/do_parses.rb +1 -0
data/lib/frprep/frprep.rb +36 -32
data/lib/{common/BerkeleyInterface.rb → frprep/interfaces/berkeley_interface.rb} +69 -95
data/lib/frprep/interfaces/stanford_interface.rb +353 -0
data/lib/frprep/interpreters/berkeley_interpreter.rb +22 -0
data/lib/frprep/interpreters/stanford_interpreter.rb +22 -0
data/lib/frprep/opt_parser.rb +2 -2
data/lib/rosy/AbstractFeatureAndExternal.rb +5 -3
data/lib/rosy/RosyIterator.rb +11 -10
data/lib/rosy/rosy.rb +1 -0
data/lib/shalmaneser/version.rb +1 -1
data/test/functional/sample_experiment_files/fred_test.salsa.erb +1 -1
data/test/functional/sample_experiment_files/fred_train.salsa.erb +1 -1
data/test/functional/sample_experiment_files/prp_test.salsa.erb +2 -2
data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +2 -2
data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +2 -2
data/test/functional/sample_experiment_files/prp_train.salsa.erb +2 -2
data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +2 -2
data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +2 -2
data/test/functional/sample_experiment_files/rosy_test.salsa.erb +1 -1
data/test/functional/sample_experiment_files/rosy_train.salsa.erb +7 -7
data/test/functional/test_frprep.rb +3 -3
data/test/functional/test_rosy.rb +20 -0
metadata +215 -224
data/CHANGELOG.rdoc +0 -0
data/LICENSE.rdoc +0 -0
data/README.rdoc +0 -0
data/lib/common/CollinsInterface.rb +0 -1165
data/lib/common/MiniparInterface.rb +0 -1388
data/lib/common/SleepyInterface.rb +0 -384
data/lib/common/TntInterface.rb +0 -44
data/lib/common/TreetaggerInterface.rb +0 -303
data/lib/frprep/AbstractSynInterface.rb +0 -1227
data/lib/frprep/BerkeleyInterface.rb +0 -375
data/lib/frprep/ConfigData.rb +0 -694
data/lib/frprep/FixSynSemMapping.rb +0 -196
data/lib/frprep/FrPrepConfigData.rb +0 -66
data/lib/frprep/FrprepHelper.rb +0 -1324
data/lib/frprep/ISO-8859-1.rb +0 -24
data/lib/frprep/Parser.rb +0 -213
data/lib/frprep/SalsaTigerRegXML.rb +0 -2347
data/lib/frprep/SalsaTigerXMLHelper.rb +0 -99
data/lib/frprep/SynInterfaces.rb +0 -275
data/lib/frprep/TabFormat.rb +0 -720
data/lib/frprep/Tiger.rb +0 -1448
data/lib/frprep/Tree.rb +0 -61
data/lib/frprep/headz.rb +0 -338

data/lib/frprep/SalsaTigerXMLHelper.rb DELETED Viewed

@@ -1,99 +0,0 @@
-# sp jul 05 05
-#
-# Static helper methods for SalsaTigerRegXML:
-# - provide header and footer for Salsa/Tiger XML files
-# - escape and unescape HTML entities
-#
-# changed KE nov 05:
-# many methods moved to FrprepHelper
-require "frprep/SalsaTigerRegXML"
-require "frprep/headz"
-require "frprep/Parser"
-require "tempfile"
-class SalsaTigerXMLHelper
-  ###
-  # get header of SalsaTigerXML files (as string)
-  def SalsaTigerXMLHelper.get_header
-    header = <<ENDOFHEADER
-<?xml version="1.0" encoding="UTF-8"?>
-  <corpus corpusname="corpus" target="">
-	<head>
-		<meta>
-			<format>
-			NeGra format, version 3</format>
-		</meta>
-		<frames xmlns="http://www.clt-st.de/framenet/frame-database">
-		</frames>
-		<wordtags xmlns="http://www.clt-st.de/salsa/wordtags">
-		</wordtags>
-		<flags>
-		</flags>
-		<annotation>
-			<edgelabel>
-			</edgelabel>
-			<secedgelabel>
-			</secedgelabel>
-		</annotation>
-	</head>
-	<body>
-ENDOFHEADER
-    return header
-  end
-  ###
-  # get footer of SALSATigerXML files (as string)
-  def SalsaTigerXMLHelper.get_footer
-    footer = <<ENDOFFOOTER
-	</body>
-</corpus>
-ENDOFFOOTER
-    return footer
-  end
-# escape and unescape strings for representation in XML
-  @@replacements = [
-#  ["&apos;&apos;","&quot;"], # added by ines (09/03/09), might cause problems for unescape???
-  ["&","&amp;"], # must be first for escaping, last for unescaping
-  ["<","&lt;"],
-  [">", "&gt;"],
-  ["\"","&apos;&apos;"],
-#  ["\"","&quot;"],
-#  ["\'\'","&quot;"],
-#  ["\`\`","&quot;"],
-  ["\'","&apos;"],
-  ["\`\`","&apos;&apos;"],
-#  ["''","&apos;&apos;"]
-  ]
-def SalsaTigerXMLHelper.escape(string)
-  @@replacements.each {|unescaped,escaped|
-    string.gsub!(unescaped,escaped)
-  }
-  return string
-end
-def SalsaTigerXMLHelper.unescape(string)
-  # reverse replacements to replace &amp last
-  @@replacements.reverse.each {|unescaped,escaped|
-    string.gsub!(escaped,unescaped)
-  }
-  return string
-end
-end

data/lib/frprep/SynInterfaces.rb DELETED Viewed

@@ -1,275 +0,0 @@
-# SynInterfaces.rb
-#
-# ke oct/nov 2005
-#
-# Store all known interfaces to
-# systems that do syntactic analysis
-#
-# Given the name of a system and the service that the
-# system performs, return the appropriate interface
-#
-# There are two types of interfaces to syntactic analysis systems:
-# - interfaces:
-#   offer methods for syntactic analysis,
-#   and the transformation to Salsa/Tiger XML and SalsaTigerSentence objects
-# - interpreters:
-#   interpret the resulting Salsa/Tiger XML (represented as
-#   SalsaTigerSentence and SynNode objects), e.g.
-#   generalize over part of speech;
-#   describe the path between a pair of nodes both as a path
-#   and (potentially) as a grammatical function of one of the nodes;
-#   determine whether a node describes a verb, and in which voice;
-#   determine the head of a constituent
-#
-# Abstract classes for both interfaces and interpreters
-# are in AbstractSynInterface.rb
-require "frprep/ruby_class_extensions"
-class Array
-  include EnumerableBool
-end
-# The list of available interface packages
-# is at the end of this file.
-# Please enter additional interfaces there.
-class SynInterfaces
-  ###
-  # class variable:
-  # list of all known interface classes
-  # add to it using add_interface()
-  @@interfaces = Array.new
-  ###
-  # class variable:
-  # list of all known interpreter classes
-  # add to it using add_interpreter()
-  @@interpreters = Array.new
-  ###
-  # add interface/interpreter
-  def SynInterfaces.add_interface(class_name)
-    $stderr.puts "Initializing interface #{class_name}" if $DEBUG
-    @@interfaces << class_name
-  end
-  def SynInterfaces.add_interpreter(class_name)
-    $stderr.puts "Initializing interpreter #{class_name}" if $DEBUG
-    @@interpreters << class_name
-  end
-  # AB: fake method to preview the interfaces table.
-  def SynInterfaces.explore
-    $stderr.puts "Exploring..."
-    $stderr.puts @@interfaces
-    $stderr.puts @@interpreters
-  end
-  ###
-  # check_interfaces_abort_if_missing:
-  #
-  # Given an experiment file, use some_system_missing? to
-  # determine whether the system can be run with the requested
-  # syntactic processing, exit with an error message if that is not possible
-  def SynInterfaces.check_interfaces_abort_if_missing(exp) #FrPrepConfigData object
-    if (missing = SynInterfaces.some_system_missing?(exp))
-      interwhat, services = missing
-      $stderr.puts
-      $stderr.puts "ERROR: I am missing an #{interwhat} for "
-      services.each_pair { |service, system_name|
-        $stderr.puts "\tservice #{service}, system #{system_name}"
-      }
-      $stderr.puts
-      $stderr.puts "I have the following interfaces:"
-      @@interfaces.each { |interface_class|
-        $stderr.puts "\tservice #{interface_class.service}, system #{interface_class.system}"
-      }
-      $stderr.puts "I have the following interpreters:"
-      @@interpreters.each { |interpreter_class|
-        $stderr.print "\t"
-        $stderr.print interpreter_class.systems.to_a.map { |service, system_name|
-          "service #{service}, system #{system_name}"
-        }.join("; ")
-        unless interpreter_class.optional_systems.empty?
-          $stderr.print ", optional: "
-          $stderr.print interpreter_class.optional_systems.to_a.map { |service, system_name|
-          "service #{service}, system #{system_name}"
-          }.join("; ")
-        end
-        $stderr.puts
-      }
-      $stderr.puts
-      $stderr.puts "Please adapt your experiment file."
-      exit 1
-    end
-  end
-  ###
-  # some_system_missing?
-  # returns nil if I have interfaces and interpreters
-  # for all services requested in the given experiment file
-  # else:
-  # returns pair [interface or interpreter, info]
-  #  where the 1st element is either 'interface' or 'interpreter',
-  #  and the 2nd element is a hash mapping services to system names:
-  #  the services that could not be provided
-  def SynInterfaces.some_system_missing?(exp) # FrPrepConfigData object
-    services = SynInterfaces.requested_services(exp)
-    # check interfaces
-    services.each_pair { |service, system_name|
-      unless SynInterfaces.get_interface(service, system_name)
-        return ["interface", {service => system_name} ]
-      end
-    }
-    # check interpreter
-    unless SynInterfaces.get_interpreter_according_to_exp(exp)
-      return ["interpreter", services]
-    end
-    # everything okay
-    return nil
-  end
-  ###
-  # given the name of a system and the service that it
-  # performs, find the matching interface class
-  #
-  # system: string: name of system, e.g. collins
-  # service: string: service, e.g. parser
-  #
-  # returns: SynInterface class
-  def SynInterfaces.get_interface(service,
-                                  system)
-    # try to find an interface class with the given
-    # name and service
-    @@interfaces.each { |interface_class|
-      if interface_class.system == system and
-	  interface_class.service == service
-	return interface_class
-      end
-    }
-    # at this point, detection of a suitable interface class has failed
-    return nil
-  end
-  ###
-  # helper for get_interpreter:
-  def SynInterfaces.get_interpreter_according_to_exp(exp)
-    return SynInterfaces.get_interpreter(SynInterfaces.requested_services(exp))
-  end
-  ###
-  # given the names and services of a set of systems,
-  # find the matching interpreter class
-  #
-  # an interpreter class has both obligatory systems
-  # (they need to be present for this class to apply)
-  # and optional systems (they may or may not be present
-  # for the class to apply, but no other system performing
-  # the same service may)
-  #
-  # systems:
-  # hash: service(string) -> system name(string)
-  #
-  # returns: SynInterpreter class
-  def SynInterfaces.get_interpreter(systems)
-    # try to find an interface class with the given
-    # service-name pairs
-    @@interpreters.each { |interpreter_class|
-      if interpreter_class.systems.to_a.big_and { |service, system|
-	  # all obligatory entries of interpreter_class
-	  # are in systems
-	  systems[service] == system
-	} and
-	  interpreter_class.optional_systems.to_a.big_and { |service, system|
-	  # all optional entries of interpreter_class are
-	  # either in systems, or the service isn't in systems at all
-	  systems[service].nil? or systems[service] == system
-	} and
-	  systems.to_a.big_and { |service, system|
-	  # all entries in names are in either
-	  # the obligatory or optional set for interpreter_class
-	  interpreter_class.systems[service] == system or
-	    interpreter_class.optional_systems[service] == system
-	}
-	return interpreter_class
-      end
-    }
-    # at this point, detection of a suitable interpreter class has failed
-    return nil
-  end
-  ################
-  protected
-  ###
-  # knows about possible services that can be set in
-  # the experiment file, and where the names of
-  # the matching systems will be found in the experiment file data structure
-  #
-  # WARNING: adapt this when you introduce new services!
-  #
-  # returns: a hash
-  #  <service> => system_name
-  #
-  #  such that for each service/system name pair:
-  #  the service with the given name has been requested in
-  #  the experiment file, and the names of the systems to be used
-  #  for performing the service
-  def SynInterfaces.requested_services(exp)
-    retv = Hash.new
-    [
-      { "flag" => "do_postag", "service"=> "pos_tagger"},
-      { "flag" => "do_lemmatize", "service"=> "lemmatizer"},
-      { "flag" => "do_parse", "service" => "parser" }
-    ].each { |hash|
-      if exp.get(hash["flag"])  # yes, perform this service
-	retv[hash["service"]] = exp.get(hash["service"])
-      end
-    }
-    return retv
-  end
-end
-require "frprep/CollinsInterface"
-require "frprep/BerkeleyInterface"
-require "frprep/SleepyInterface"
-require "frprep/MiniparInterface"
-require "frprep/TntInterface"
-require "frprep/TreetaggerInterface"
-class EmptyInterpreter < SynInterpreter
-  EmptyInterpreter.announce_me()
-  ###
-  # systems interpreted by this class:
-  # returns a hash service(string) -> system name (string),
-  # e.g.
-  # { "parser" => "collins", "lemmatizer" => "treetagger" }
-  def EmptyInterpreter.systems()
-    return {}
-  end
-  ###
-  # names of additional systems that may be interpreted by this class
-  # returns a hash service(string) -> system name(string)
-  # same as names()
-  def SynInterpreter.optional_systems()
-    return {}
-  end
-end

data/lib/frprep/TabFormat.rb DELETED Viewed

@@ -1,720 +0,0 @@
-# TabFormat.rb
-# Katrin Erk, Jan 2004
-#
-# classes to be used with tabular format text files.
-# originally CoNLL2.rb
-# Original: Katrin Erk, Jan 2004 for CoNLL '04 data
-# Rewrite: Sebastian Pado, Mar 2004 for Gemmas FrameNet data (no NEs etc.)
-# Extensions SP Jun/Jul 04
-# renamed GemmaCorpus to FNTabFormat
-# partial rewrite SP 250804: made things cleaner & leaner: no RawFormat, for example
-# sp 04/05: add a "frame" column to FNTabFormat
-#
-# Substantial changes KE 12/06:
-# variable number of columns to accommodate more than one frame per sentence
-#################################################
-# class for reading a file
-# containing data in tabular
-require "tempfile"
-require "frprep/ISO-8859-1"
-require "frprep/ruby_class_extensions"
-#######################
-# This function takes a variable number of arguments and
-# returns them as an array
-# Idea: make formulation of tab format entries easier to read,
-# enclose variable arguments in a repeat() call,
-# which immediately gets transformed into a list
-def repeat(*args)
-  return args
-end
-#######################
-class TabFormatFile
-  #######
-  # initialize:
-  # open files for reading.
-  #
-  # fp is a list of pairs [filename, format]
-  # where format is a list of strings that will be used
-  # to address columns of the file, the 1st string for the 1st column
-  #
-  # format may contain _one_ entry that is an array (or a call to repeat())
-  # e.g.:
-  # ["word", "pos", "lemma", repeat("frame", "target", "gf", "pt")]
-  def initialize(fp)
-    # open files
-    @files = Array.new
-    @patterns = Array.new
-    @no_of_read_lines = 0
-    fp.each_index { |ix|
-      if ix.modulo(2) == 0
-	# filename
-	begin
-	  @files << File.new(fp[ix])
-	rescue
-	  raise 'Sorry, could not read input file ' + fp[ix] + "\n"
-	end
-      else
-	# pattern
-	@patterns += fp[ix]
-      end
-    }
-    @my_sentence_class = TabFormatSentence
-  end
-  ########
-  # each_sentence:
-  # yield each sentence of the files in turn.
-  # sentences are expected to be separated
-  # by a line containing nothing but whitespace.
-  # the last sentence may or may not be followed by
-  # an empty line.
-  # each_sentence ends when EOF is encountered on the first file.
-  # it expects all the other files to be the same length
-  # (in terms of number of lines) as the first file.
-  # each sentence is returned in the form of an
-  # array of TabFormatSentence sentences.
-  def each_sentence
-    unless @read_completely
-      sentence = @my_sentence_class.new(@patterns)
-      begin
-	lines = Array.new
-	while true do
-	  line = ""
-	  linearray = Array.new
-	  @files.each {|f|
-	    linearray << f.readline().chomp()
-	  }
-	#STDERR.puts linearray
-	  @no_of_read_lines += 1
-	  if linearray.detect{|x| x.strip == ""}
-	    if linearray.detect {|x| x.strip != ""}
-	      STDERR.puts "Error: Mismatching empty lines!"
-	      exit(1)
-	    else
-	      # sentence finished. yield it and start a new one
-	      unless sentence.empty?
-		yield sentence
-	      end
-	      sentence = @my_sentence_class.new(@patterns)
-            end
-	    # read an empty line in each of the other files
-	  else
-	    # sentence not yet finished.
-	    # add this line to it
-	    sentence.add_line(linearray.join("\t"))
-	  end
-	end
-      rescue EOFError
-	unless sentence.empty?
-	  # maybe we haven't yielded the last sentence yet.
-	  yield sentence
-	end
-	@read_completely = true
-      end
-    end
-  end
-end
-#################################################
-# class for keeping one line,
-# parsed.
-# The line is kept as follows:
-# - normal features: in a hash @f mapping feature names to values
-# - features of the repeated group: in an array @r of
-#   TabFormatNamedArgs objects, one per group
-#
-# each feature of the line is available by name
-# via the method "get".
-# Additional features (from other input files) can be
-# added to the TabFormatNamedArgs object via the method
-# add_feature
-#
-# methods:
-#
-# new: initialize.
-#    values: array of strings
-#    features:  how to access the strings by name
-#              'features' is an array of strings
-#              later the i-th feature will be used to access
-#              the i-th value,
-#              except for repeated groups
-#
-# get: returns one feature by its name
-#    name: a string
-#
-# add_feature: add another feature to this object,
-#              which can be accessed via "get"
-#    name: name for the new feature, should be distinct
-#          from the ones already used in new()
-#    feature: a string, the value of the feature
-##
-class TabFormatNamedArgs
-  ############
-  def initialize(values, features, group = nil)
-    @f = Hash.new
-    @r = Array.new
-    @group = group
-    # record the feature names, give special attention to a group
-    # if we have one
-    @group_feature_names = nil
-    @feature_names = features.map { |feature|
-      if feature.instance_of? Array
-	# found a group
-	@group_feature_names = feature
-	"GROUP"
-      else
-	feature
-      end
-    }
-    if @feature_names.count("GROUP") > 1
-      $stderr.puts "More than one group in feature set:" + features.join(" ")
-      raise "Cannot handle this."
-    end
-    # group_index: position of group in overall feature list
-    group_index = @feature_names.index("GROUP")
-    unless group_index
-      group_index = @feature_names.length()
-    end
-    num_features_after_group = [0,
-      (@feature_names.length() - 1) - group_index].max()
-    index_after_groups = values.length() - num_features_after_group
-    # features before group: put feature/value pairs in @f hash
-    0.upto(group_index - 1) { |i|
-      @f[features[i]] = values[i]
-    }
-    # group: store each group in @r hash
-    if @group_feature_names
-      # for (group_start = group_index; group_start < index_after_groups;
-      #      group_start += @group_feature_names.length())
-      group_no = 0
-      group_index.step(index_after_groups - 1,
-		       @group_feature_names.length()) { |group_start|
-	@r << TabFormatNamedArgs.new(values.slice(group_start,
-						  @group_feature_names.length()),
-				     @group_feature_names,
-                                     group_no)
-        group_no += 1
-      }
-    end
-    # features after group: put feature/value pairs in @f hash
-    feature_index = group_index + 1
-    index_after_groups.upto(values.length() - 1) { |i|
-      @f[features[feature_index]] = values[i]
-      feature_index += 1
-    }
-  end
-  ############
-  # return feature/value pairs as a tab format line,
-  # order of features as given in the 'features' list
-  # Features not set in the hash: their entry will be "-"
-  #
-  # If the feature list includes a group,
-  # assume zero entries for that group
-  def TabFormatNamedArgs.format_str(hash,     # hash: feature -> value
-				    features) # feature list, as for new()
-    if features.nil?
-      return ""
-    end
-    # sanity check: does the hash contain keys that are not in the feature list?
-    hash.keys().reject { |f| features.include? f }.each { |bad_feature|
-      $stderr.puts "Error: unknown feature #{bad_feature} in format_str: ignoring."
-    }
-    return features.select { |f|
-      # remove the group feature, if it's there
-      not(f.instance_of? Array)
-    }.map { |feature|
-      if hash[feature]
-	hash[feature]
-      else
-	"-"
-      end
-    }.join("\t")
-  end
-  #############
-  def add_feature(name, feature)
-    if @f.has_key? name
-      raise "Trying to add a feature twice: "+name
-    end
-    @f[name] = feature
-  end
-  #############
-  # get feature value, identified by feature name
-  # return: feature value as string
-  def get(name)
-    if (retv = get_nongroup(name))
-      return retv
-    else
-      return get_from_group(name, @group)
-    end
-  end
-  #############
-  def set(name, feature)
-    @f[name] = feature
-  end
-  #############
-  def num_groups()
-    return @r.length()
-  end
-  #############
-  # return line as string, entries connected by tab,
-  # in the order that the entries were in originally
-  def to_s()
-    return @feature_names.map { |feature|
-      case feature
-      when "GROUP"
-	@r.map { |group_obj| group_obj.to_s }.join("\t")
-      else
-	@f[feature]
-      end
-    }.join("\t")
-  end
-  protected
-  # get feature, non-group
-  # return: feature value (string)
-  def get_nongroup(feature)
-    return @f[feature]
-  end
-  # get feature from one of the groups
-  # return: feature value (string)
-  def get_from_group(name, group_no)
-    if not(group_no) or group_no >= @r.length()
-      # no group with that number
-      return nil
-    else
-      return @r[group_no].get_nongroup(name)
-    end
-  end
-end
-#################################################
-# class for keeping and yielding one sentence
-# in tabular format
-class TabFormatSentence
-  ############
-  # initialize:
-  # the sentence will be stored one word (plus additional info
-  # for that word) per line. Each line will be stored in a cell of
-  # the array @lines. the 'initialize' method starts with an empty
-  # array of lines.
-  def initialize(pattern)
-    @lines = Array.new
-    @pattern = pattern
-    # this is just for inheritance; FNTabFormatSentence will need this
-    @group_no = nil
-  end
-  #####
-  # length: number of words in the sentence
-  def length
-    return @lines.length
-  end
-  ################3
-  # add_line:
-  # add one entry to the @lines array, i.e. information for one word
-  # of the sentence.
-  def add_line(line)
-    @lines << line
-  end
-  ###################
-  # empty?:
-  # returns true if there are currently no lines stored in this
-  # TabFormatSentence object
-  # else false
-  def empty?
-    return @lines.empty?
-  end
-  ######################
-  # empty!:
-  # discards all entries to the @lines array,
-  # i.e. empties this TabFormatSentence object of all
-  # data
-  def empty!
-    @lines.clear
-  end
-  #####################
-  # each_line:
-  # yields each line of the sentence
-  # as a string
-  def each_line
-    @lines.each { |l| yield l }
-  end
-  ######################
-  # each_line_parsed:
-  # yields each line of the sentence
-  # broken up as follows:
-  # the line is expected to contain 6 or more pieces of
-  # information, separated by whitespace.
-  # - the word
-  # - the part of speech info for the word
-  # - syntax for roles (not to be used)
-  # - target (or -)
-  # - gramm. function for roles (not to be used)
-  # - one column with role annotation
-  #
-  # All pieces are yielded as strings, except for the argument columns, which
-  # are yielded as an array of strings.
-  def each_line_parsed
-    lineno = 0
-    f = nil
-    @lines.each { |l|
-      f = TabFormatNamedArgs.new(l.split("\t"), @pattern, @group_no)
-      f.add_feature("lineno", lineno)
-      yield f
-      lineno += 1
-    }
-  end
-  ###
-  # read_one_line:
-  # return a line of the sentence specified by its number
-  def read_one_line(number)
-    return(@lines[number])
-  end
-  ###
-  # read_one_line_parsed:
-  # like get_line, but the features in the line are returned
-  # separately,
-  # as in each_line_parsed
-  def read_one_line_parsed(number)
-    if @lines[number].nil?
-      return nil
-    else
-      f = TabFormatNamedArgs.new(@lines[number].split("\t"), @pattern, @group_no)
-      f.add_feature("lineno", number)
-      return f
-    end
-  end
-  # set line no of first line of present sentence
-  def set_starting_line(n)
-    raise "Deprecated"
-  end
-  # returns line no of first line of present sentence
-  def get_starting_line()
-    raise "Deprecated"
-  end
-end
-########################################################
-# TabFormat files containing everything that's in the FN lexunit files
-#
-# one target per sentence
-class FNTabFormatFile < TabFormatFile
-  def initialize(filename,tag_suffix=nil,lemma_suffix=nil)
-    corpusname = File.dirname(filename)+"/"+File.basename(filename,".tab")
-    filename_label_pairs = [filename,FNTabFormatFile.fntab_format()]
-    if lemma_suffix # raise exception if lemmatisation does not esist
-      filename_label_pairs.concat [corpusname+lemma_suffix,["lemma"]]
-    end
-    if tag_suffix # raise exception if tagging does not exist
-      filename_label_pairs.concat [corpusname+tag_suffix,["pos"]]
-    end
-    super(filename_label_pairs)
-    @my_sentence_class = FNTabSentence
-  end
-  def FNTabFormatFile.fntab_format()
-#    return ["word", "pt", "gf", "role", "target", "frame", "lu_sent_ids"]
-    return [
-      "word",
-      FNTabFormatFile.frametab_format(),
-      "ne", "sent_id"
-    ]
-  end
-  def FNTabFormatFile.frametab_format()
-    return ["pt", "gf", "role", "target", "frame", "stuff"]
-  end
-  ##########
-  # given a hash mapping features to values,
-  # format according to fntab_format
-  def FNTabFormatFile.format_str(hash)
-    return TabFormatNamedArgs.format_str(hash, FNTabFormatFile.fntab_format())
-  end
-end
-############################################
-class FNTabSentence < TabFormatSentence
-  ####
-  # overwrite this to get a feature from
-  # a group rather than from the main feature list
-  def get_this(l, feature_name)
-    return l.get(feature_name)
-  end
-  ####
-  def sanity_check()
-    each_line_parsed {|l|
-      if l.get("sent_id").nil?
-        raise "Error: corpus file does not conform to FN format."
-      else
-        return
-      end
-    }
-  end
-  ####
-  # returns the sentence ID, a string, as set by FrameNet
-  def get_sent_id()
-    sanity_check
-    each_line_parsed {|l|
-      return l.get("sent_id")
-    }
-  end
-  ####
-  # iterator, yields each frame of the sentence as a FNTabFrame
-  # object. They contain the complete sentence, but provide
-  # access to exactly one frame of that sentence.
-  def each_frame()
-    # how many frames? assume that each line has the same
-    # number of frames
-    num_frames = read_one_line_parsed(0).num_groups()
-    0.upto(num_frames - 1) { |frame_no|
-      frame_obj = FNTabFrame.new(@pattern, frame_no)
-      each_line { |l| frame_obj.add_line(l) }
-      yield frame_obj
-    }
-  end
-  ####
-  # computes a mapping from word indices to labels on these words
-  #
-  # returns a hash: index_list(array:integer) -> label(string)
-  # An entry il->label means that all the lines whose line
-  # numbers are listed in il are labeled with label.
-  #
-  # Line numbers correspond to words of the sentence. Counting starts at 0.
-  #
-  # By default, "markables" looks for role labels, i.e. labels in the
-  # column "role", but it can also look in another column.
-  # To change the default, give the column name as a parameter.
-  def markables(use_this_column="role")
-    # returns hash of {index list} -> {markup label}
-    sanity_check()
-    idlist_to_annotation_list = Hash.new
-    # add entry for the target word
-    # idlist_to_annotation_list[get_target_indices()] = "target"
-    # determine span of each frame element
-    # if we find overlapping FEs, we write a warning to STDERR
-    # ignore the 2nd label and attempt to "close" the 1st label
-    ids = Array.new
-    label = nil
-    each_line_parsed { |l|
-      this_id = get_this(l, "lineno")
-      # start of FE?
-      this_col = get_this(l, use_this_column)
-      unless this_col
-        $stderr.puts "nil entry #{use_this_column} in line #{this_id} of sent #{get_sent_id()}. Skipping."
-        next
-      end
-      this_fe_ann = this_col.split(":")
-      case this_fe_ann.length
-      when 1 # nothing at all, or a single begin or end
-        markup = this_fe_ann.first
-        if markup == "-"  or markup == "--" # no change
-          if label
-            ids << this_id
-          end
-        elsif markup =~ /^B-(\S+)$/
-          if label # are we within a markable right now?
-            $stderr.puts "[TabFormat] Warning: Markable "+$1.to_s+" starts while within markable  ", label.to_s
-            $stderr.puts "Debug data: Sentence id #{get_sent_id()}, current ID list #{ids.join(" ")}"
-          else
-            label = $1
-            ids << this_id
-          end
-        elsif markup =~ /^E-(\S+)$/
-          if label == $1 # we close the markable we've opened before
-            ids << this_id
-            # store information
-            idlist_to_annotation_list[ids] = label
-            # reset memory
-            label = nil
-            ids = Array.new
-          else
-            $stderr.puts "[TabFormat] Warning: Markable "+$1.to_s+" closes while within markable "+ label.to_s
-            $stderr.puts "Debug data: Sentence id #{get_sent_id()}, current ID list #{ids.join(" ")}"
-          end
-        else
-          $stderr.puts "[TabFormat] Warning: cannot analyse markup "+markup
-          $stderr.puts "Debug data: Sentence id #{get_sent_id()}"
-        end
-      when 2 # this should be a one-word markable
-        b_markup = this_fe_ann[0]
-        e_markup = this_fe_ann[1]
-        if label
-          $stderr.puts "[TabFormat] Warning: Finding new markable at word #{this_id} while within markable ", label
-          $stderr.puts "Debug data: Sentence id #{get_sent_id()}, current ID list #{ids.join(" ")}"
-        else
-          if b_markup =~ /^B-(\S+)$/
-            b_label = $1
-            if e_markup =~ /^E-(\S+)$/
-              e_label = $1
-              if b_label == e_label
-                idlist_to_annotation_list[[this_id]] = b_label
-              else
-                $stderr.puts "[TabFormat] Warning: Starting markable "+b_label+", closing markable "+e_label
-                $stderr.puts "Debug data: Sentence id #{get_sent_id()}, current ID list #{ids.join(" ")}"
-              end
-            else
-              $stderr.puts "[TabFormat] Warning: Unknown end markup "+e_markup
-              $stderr.puts "Debug data: Sentence id #{get_sent_id()}, current ID list #{ids.join(" ")}"
-            end
-          else
-            $stderr.puts "[TabFormat] Warning: Unknown start markup "+b_markup
-            $stderr.puts "Debug data: Sentence id #{get_sent_id()}, current ID list #{ids.join(" ")}"
-          end
-        end
-      else
-        $stderr.puts "Warning: cannot analyse markup with more than two colon-separated parts like "+this_fee_ann.join(":")
-        $stderr.puts "Debug data: Sentence id #{get_sent_id()}"
-      end
-    }
-    unless label.nil?
-      $stderr.puts "[TabFormat] Warning: Markable ", label, " did not end in sentence."
-      $stderr.puts "Debug data: Sentence id #{get_sent_id()}, current ID list #{ids.join(" ")}"
-    end
-    return idlist_to_annotation_list
-  end
-  #######
-  def to_s
-    sanity_check
-    array = Array.new
-    each_line_parsed {|l|
-      array << l.get("word")
-    }
-    return array.join(" ")
-  end
-end
-class FNTabFrame < FNTabSentence
-  ############
-  # initialize:
-  # as parent, except that we also get a frame number
-  # such that we can access the features of ``our'' frame
-  def initialize(pattern, frameno)
-    # by setting @group_no to frameno,
-    # we are initializing each TabFormatNamedArgs object
-    # in each_line_parsed() or read_one_line_parsed()
-    # with the right group number,
-    # such that all calls to TabFormatNamedArgs.get()
-    # will access the right group.
-    super(pattern)
-    @group_no = frameno
-  end
-  # returns the frame introduced by the target word(s)
-  # of this frame group, a string
-  def get_frame()
-    sanity_check()
-    each_line_parsed {|l|
-      return l.get("frame")
-    }
-  end
-  ####
-  # returns an array of integers: the indices of the target of
-  # the frame
-  # These are the line numbers, which start counting at 0
-  #
-  # a target may span more than one word
-  def get_target_indices()
-    sanity_check
-    idx = Array.new
-    each_line_parsed {|l|
-      unless l.get("target") == "-"
-        idx << l.get("lineno")
-      end
-    }
-    return idx
-  end
-  ####
-  # returns a string: the target
-  # in the case of multiword targets,
-  # we find the complete target at all
-  # indices, i.e. we can just take the first one we find
-  def get_target()
-    each_line_parsed {|l|
-      t = l.get("target")
-      unless t == "-"
-	return t
-      end
-    }
-  end
-  ####
-  # get the target POS, according to FrameNet
-  def get_target_fn_pos()
-    get_target() =~ /^[^\.]+\.(\w+)$/
-    return $1
-  end
-end