frprep 0.0.1.prealpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.yardopts +8 -0
 - data/CHANGELOG.rdoc +0 -0
 - data/LICENSE.rdoc +0 -0
 - data/README.rdoc +0 -0
 - data/lib/common/AbstractSynInterface.rb +1227 -0
 - data/lib/common/BerkeleyInterface.rb +375 -0
 - data/lib/common/CollinsInterface.rb +1165 -0
 - data/lib/common/ConfigData.rb +694 -0
 - data/lib/common/Counter.rb +18 -0
 - data/lib/common/DBInterface.rb +48 -0
 - data/lib/common/EnduserMode.rb +27 -0
 - data/lib/common/Eval.rb +480 -0
 - data/lib/common/FixSynSemMapping.rb +196 -0
 - data/lib/common/FrPrepConfigData.rb +66 -0
 - data/lib/common/FrprepHelper.rb +1324 -0
 - data/lib/common/Graph.rb +345 -0
 - data/lib/common/ISO-8859-1.rb +24 -0
 - data/lib/common/ML.rb +186 -0
 - data/lib/common/Maxent.rb +215 -0
 - data/lib/common/MiniparInterface.rb +1388 -0
 - data/lib/common/Optimise.rb +195 -0
 - data/lib/common/Parser.rb +213 -0
 - data/lib/common/RegXML.rb +269 -0
 - data/lib/common/RosyConventions.rb +171 -0
 - data/lib/common/SQLQuery.rb +243 -0
 - data/lib/common/STXmlTerminalOrder.rb +194 -0
 - data/lib/common/SalsaTigerRegXML.rb +2347 -0
 - data/lib/common/SalsaTigerXMLHelper.rb +99 -0
 - data/lib/common/SleepyInterface.rb +384 -0
 - data/lib/common/SynInterfaces.rb +275 -0
 - data/lib/common/TabFormat.rb +720 -0
 - data/lib/common/Tiger.rb +1448 -0
 - data/lib/common/TntInterface.rb +44 -0
 - data/lib/common/Tree.rb +61 -0
 - data/lib/common/TreetaggerInterface.rb +303 -0
 - data/lib/common/headz.rb +338 -0
 - data/lib/common/option_parser.rb +13 -0
 - data/lib/common/ruby_class_extensions.rb +310 -0
 - data/lib/fred/Baseline.rb +150 -0
 - data/lib/fred/FileZipped.rb +31 -0
 - data/lib/fred/FredBOWContext.rb +863 -0
 - data/lib/fred/FredConfigData.rb +182 -0
 - data/lib/fred/FredConventions.rb +232 -0
 - data/lib/fred/FredDetermineTargets.rb +324 -0
 - data/lib/fred/FredEval.rb +312 -0
 - data/lib/fred/FredFeatureExtractors.rb +321 -0
 - data/lib/fred/FredFeatures.rb +1061 -0
 - data/lib/fred/FredFeaturize.rb +596 -0
 - data/lib/fred/FredNumTrainingSenses.rb +27 -0
 - data/lib/fred/FredParameters.rb +402 -0
 - data/lib/fred/FredSplit.rb +84 -0
 - data/lib/fred/FredSplitPkg.rb +180 -0
 - data/lib/fred/FredTest.rb +607 -0
 - data/lib/fred/FredTrain.rb +144 -0
 - data/lib/fred/PlotAndREval.rb +480 -0
 - data/lib/fred/fred.rb +45 -0
 - data/lib/fred/md5.rb +23 -0
 - data/lib/fred/opt_parser.rb +250 -0
 - data/lib/frprep/AbstractSynInterface.rb +1227 -0
 - data/lib/frprep/Ampersand.rb +37 -0
 - data/lib/frprep/BerkeleyInterface.rb +375 -0
 - data/lib/frprep/CollinsInterface.rb +1165 -0
 - data/lib/frprep/ConfigData.rb +694 -0
 - data/lib/frprep/Counter.rb +18 -0
 - data/lib/frprep/FNCorpusXML.rb +643 -0
 - data/lib/frprep/FNDatabase.rb +144 -0
 - data/lib/frprep/FixSynSemMapping.rb +196 -0
 - data/lib/frprep/FrPrepConfigData.rb +66 -0
 - data/lib/frprep/FrameXML.rb +513 -0
 - data/lib/frprep/FrprepHelper.rb +1324 -0
 - data/lib/frprep/Graph.rb +345 -0
 - data/lib/frprep/ISO-8859-1.rb +24 -0
 - data/lib/frprep/MiniparInterface.rb +1388 -0
 - data/lib/frprep/Parser.rb +213 -0
 - data/lib/frprep/RegXML.rb +269 -0
 - data/lib/frprep/STXmlTerminalOrder.rb +194 -0
 - data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
 - data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
 - data/lib/frprep/SleepyInterface.rb +384 -0
 - data/lib/frprep/SynInterfaces.rb +275 -0
 - data/lib/frprep/TabFormat.rb +720 -0
 - data/lib/frprep/Tiger.rb +1448 -0
 - data/lib/frprep/TntInterface.rb +44 -0
 - data/lib/frprep/Tree.rb +61 -0
 - data/lib/frprep/TreetaggerInterface.rb +303 -0
 - data/lib/frprep/do_parses.rb +142 -0
 - data/lib/frprep/frprep.rb +686 -0
 - data/lib/frprep/headz.rb +338 -0
 - data/lib/frprep/one_parsed_file.rb +28 -0
 - data/lib/frprep/opt_parser.rb +94 -0
 - data/lib/frprep/ruby_class_extensions.rb +310 -0
 - data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
 - data/lib/rosy/DBMySQL.rb +146 -0
 - data/lib/rosy/DBSQLite.rb +280 -0
 - data/lib/rosy/DBTable.rb +239 -0
 - data/lib/rosy/DBWrapper.rb +176 -0
 - data/lib/rosy/ExternalConfigData.rb +58 -0
 - data/lib/rosy/FailedParses.rb +130 -0
 - data/lib/rosy/FeatureInfo.rb +242 -0
 - data/lib/rosy/GfInduce.rb +1115 -0
 - data/lib/rosy/GfInduceFeature.rb +148 -0
 - data/lib/rosy/InputData.rb +294 -0
 - data/lib/rosy/RosyConfigData.rb +115 -0
 - data/lib/rosy/RosyConfusability.rb +338 -0
 - data/lib/rosy/RosyEval.rb +465 -0
 - data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
 - data/lib/rosy/RosyFeaturize.rb +280 -0
 - data/lib/rosy/RosyInspect.rb +336 -0
 - data/lib/rosy/RosyIterator.rb +477 -0
 - data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
 - data/lib/rosy/RosyPruning.rb +165 -0
 - data/lib/rosy/RosyServices.rb +744 -0
 - data/lib/rosy/RosySplit.rb +232 -0
 - data/lib/rosy/RosyTask.rb +19 -0
 - data/lib/rosy/RosyTest.rb +826 -0
 - data/lib/rosy/RosyTrain.rb +232 -0
 - data/lib/rosy/RosyTrainingTestTable.rb +786 -0
 - data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
 - data/lib/rosy/View.rb +418 -0
 - data/lib/rosy/opt_parser.rb +379 -0
 - data/lib/rosy/rosy.rb +77 -0
 - data/lib/shalmaneser/version.rb +3 -0
 - data/test/frprep/test_opt_parser.rb +94 -0
 - data/test/functional/functional_test_helper.rb +40 -0
 - data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
 - data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
 - data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
 - data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
 - data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
 - data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
 - data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
 - data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
 - data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
 - data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
 - data/test/functional/test_fred.rb +47 -0
 - data/test/functional/test_frprep.rb +52 -0
 - data/test/functional/test_rosy.rb +20 -0
 - metadata +270 -0
 
| 
         @@ -0,0 +1,44 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require "tempfile"
         
     | 
| 
      
 2 
     | 
    
         
            +
            require "common/AbstractSynInterface"
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            ################################################
         
     | 
| 
      
 5 
     | 
    
         
            +
            # Interface class
         
     | 
| 
      
 6 
     | 
    
         
            +
            class TntInterface < SynInterfaceTab
         
     | 
| 
      
 7 
     | 
    
         
            +
              TntInterface.announce_me()
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
              def TntInterface.system()
         
     | 
| 
      
 10 
     | 
    
         
            +
                return "tnt"
         
     | 
| 
      
 11 
     | 
    
         
            +
              end
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
              def TntInterface.service()
         
     | 
| 
      
 14 
     | 
    
         
            +
                return "pos_tagger"
         
     | 
| 
      
 15 
     | 
    
         
            +
              end
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
              def process_file(infilename,   # string: name of input file
         
     | 
| 
      
 18 
     | 
    
         
            +
            		   outfilename)  # string: name of output file
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
                tempfile = Tempfile.new("Tnt")
         
     | 
| 
      
 21 
     | 
    
         
            +
                TntInterface.fntab_words_to_file(infilename, tempfile)
         
     | 
| 
      
 22 
     | 
    
         
            +
                tempfile.close
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
                # 1. use grep to remove commentaries from file      
         
     | 
| 
      
 25 
     | 
    
         
            +
                # 2. use sed to extract tags tag list:
         
     | 
| 
      
 26 
     | 
    
         
            +
                #    - match one or more non-spaces
         
     | 
| 
      
 27 
     | 
    
         
            +
                #    - match one or more spaces
         
     | 
| 
      
 28 
     | 
    
         
            +
                #    - match one or more non-spaces and write to outfilename 
         
     | 
| 
      
 29 
     | 
    
         
            +
                
         
     | 
| 
      
 30 
     | 
    
         
            +
                # This assumes that the experiment file entry for pos_tagger_path
         
     | 
| 
      
 31 
     | 
    
         
            +
                # has the form 
         
     | 
| 
      
 32 
     | 
    
         
            +
                # pos_tagger_path = <program_name> <model>
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
                Kernel.system(@program_path + " " + tempfile.path +
         
     | 
| 
      
 35 
     | 
    
         
            +
            		  ' | grep -v -E "^%%" |  sed -e\'s/^[^ ]\{1,\}[[:space:]]\{1,\}\([^ ]\{1,\}\)/\1/\' > '+outfilename)
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
      
 37 
     | 
    
         
            +
                tempfile.close(true) # delete tempfile
         
     | 
| 
      
 38 
     | 
    
         
            +
                unless `cat #{infilename} | wc -l`.strip ==
         
     | 
| 
      
 39 
     | 
    
         
            +
                                                 `cat #{outfilename} | wc -l`.strip
         
     | 
| 
      
 40 
     | 
    
         
            +
                  raise "Error: tagged file has different line number from corpus file!"
         
     | 
| 
      
 41 
     | 
    
         
            +
                end   
         
     | 
| 
      
 42 
     | 
    
         
            +
              end
         
     | 
| 
      
 43 
     | 
    
         
            +
            end
         
     | 
| 
      
 44 
     | 
    
         
            +
             
     | 
    
        data/lib/common/Tree.rb
    ADDED
    
    | 
         @@ -0,0 +1,61 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'common/Graph'
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            class TreeNode < GraphNode
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
              def initialize(id)
         
     | 
| 
      
 6 
     | 
    
         
            +
                super(id)
         
     | 
| 
      
 7 
     | 
    
         
            +
              end
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
              # redo the ancestor-related methods,
         
     | 
| 
      
 10 
     | 
    
         
            +
              # since here we only have one parent per node
         
     | 
| 
      
 11 
     | 
    
         
            +
              def parent()
         
     | 
| 
      
 12 
     | 
    
         
            +
                retv = parents()
         
     | 
| 
      
 13 
     | 
    
         
            +
                if retv.nil?
         
     | 
| 
      
 14 
     | 
    
         
            +
                  return nil
         
     | 
| 
      
 15 
     | 
    
         
            +
                else
         
     | 
| 
      
 16 
     | 
    
         
            +
                  return retv.first
         
     | 
| 
      
 17 
     | 
    
         
            +
                end
         
     | 
| 
      
 18 
     | 
    
         
            +
              end
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
              def parent_label()
         
     | 
| 
      
 21 
     | 
    
         
            +
                retv = parent_labels()
         
     | 
| 
      
 22 
     | 
    
         
            +
                if retv.nil?
         
     | 
| 
      
 23 
     | 
    
         
            +
                  return nil
         
     | 
| 
      
 24 
     | 
    
         
            +
                else
         
     | 
| 
      
 25 
     | 
    
         
            +
                  return retv.first
         
     | 
| 
      
 26 
     | 
    
         
            +
                end
         
     | 
| 
      
 27 
     | 
    
         
            +
              end
         
     | 
| 
      
 28 
     | 
    
         
            +
             
         
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
              def parent_with_edgelabel()
         
     | 
| 
      
 31 
     | 
    
         
            +
                retv = parents_with_edgelabel()
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
                if retv.nil?
         
     | 
| 
      
 34 
     | 
    
         
            +
                  return nil
         
     | 
| 
      
 35 
     | 
    
         
            +
                else
         
     | 
| 
      
 36 
     | 
    
         
            +
                  return retv.first
         
     | 
| 
      
 37 
     | 
    
         
            +
                end
         
     | 
| 
      
 38 
     | 
    
         
            +
              end
         
     | 
| 
      
 39 
     | 
    
         
            +
             
     | 
| 
      
 40 
     | 
    
         
            +
             
     | 
| 
      
 41 
     | 
    
         
            +
              def add_parent(parent, edgelabel, varhash={})
         
     | 
| 
      
 42 
     | 
    
         
            +
                set_parent(parent, edgelabel, varhash)
         
     | 
| 
      
 43 
     | 
    
         
            +
              end
         
     | 
| 
      
 44 
     | 
    
         
            +
             
     | 
| 
      
 45 
     | 
    
         
            +
              def set_parent(parent, edgelabel, varhash={})
         
     | 
| 
      
 46 
     | 
    
         
            +
                # remove old parent
         
     | 
| 
      
 47 
     | 
    
         
            +
                each_parent_with_edgelabel { |label, parent|
         
     | 
| 
      
 48 
     | 
    
         
            +
                  remove_parent(parent, label, varhash)
         
     | 
| 
      
 49 
     | 
    
         
            +
                }
         
     | 
| 
      
 50 
     | 
    
         
            +
             
     | 
| 
      
 51 
     | 
    
         
            +
                # set new parent
         
     | 
| 
      
 52 
     | 
    
         
            +
                @parents << [edgelabel, parent]
         
     | 
| 
      
 53 
     | 
    
         
            +
             
     | 
| 
      
 54 
     | 
    
         
            +
                # and vice versa: add self as child to parent
         
     | 
| 
      
 55 
     | 
    
         
            +
                unless varhash["pointer_insteadof_edge"]
         
     | 
| 
      
 56 
     | 
    
         
            +
                  unless parent.children_with_edgelabel().include? [edgelabel, self]
         
     | 
| 
      
 57 
     | 
    
         
            +
                    parent.add_child(self, edgelabel)
         
     | 
| 
      
 58 
     | 
    
         
            +
                  end
         
     | 
| 
      
 59 
     | 
    
         
            +
                end
         
     | 
| 
      
 60 
     | 
    
         
            +
              end
         
     | 
| 
      
 61 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,303 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # sp 30 11 06
         
     | 
| 
      
 2 
     | 
    
         
            +
            # extended by TreeTaggerPOSInterface
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            require "tempfile"
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            require "common/AbstractSynInterface"
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
            ###########
         
     | 
| 
      
 9 
     | 
    
         
            +
            # KE dec 7, 06
         
     | 
| 
      
 10 
     | 
    
         
            +
            # common mixin for both Treetagger modules, doing the actual processing
         
     | 
| 
      
 11 
     | 
    
         
            +
            module TreetaggerModule
         
     | 
| 
      
 12 
     | 
    
         
            +
              ###
         
     | 
| 
      
 13 
     | 
    
         
            +
              # Treetagger does both lemmatization and POS-tagging.
         
     | 
| 
      
 14 
     | 
    
         
            +
              # However, the way the SynInterface system is set up in Shalmaneser,
         
     | 
| 
      
 15 
     | 
    
         
            +
              # each SynInterface can offer only _one_ service.
         
     | 
| 
      
 16 
     | 
    
         
            +
              # This means that we cannot do a SynInterface that writes
         
     | 
| 
      
 17 
     | 
    
         
            +
              # both a POS file and a lemma file.
         
     | 
| 
      
 18 
     | 
    
         
            +
              # Instead, both will include this module, which does the
         
     | 
| 
      
 19 
     | 
    
         
            +
              # actual TreeTagger call and then stores the result in a file
         
     | 
| 
      
 20 
     | 
    
         
            +
              # of its own, similar to the 'outfilename' given to TreetaggerInterface.process_file
         
     | 
| 
      
 21 
     | 
    
         
            +
              # but with a separate extension.
         
     | 
| 
      
 22 
     | 
    
         
            +
              # really_process_file checks for existence of this file because,
         
     | 
| 
      
 23 
     | 
    
         
            +
              # if the TreeTagger lemmatization and POS-tagging classes are called separately,
         
     | 
| 
      
 24 
     | 
    
         
            +
              # one of them will go first, and the 2nd one will not need to do the 
         
     | 
| 
      
 25 
     | 
    
         
            +
              # TreeTagger call anymore
         
     | 
| 
      
 26 
     | 
    
         
            +
              #
         
     | 
| 
      
 27 
     | 
    
         
            +
              # really_process_file returns a filename, the name of the file containing
         
     | 
| 
      
 28 
     | 
    
         
            +
              # the TreeTagger output with both POS tags and lemma information
         
     | 
| 
      
 29 
     | 
    
         
            +
              #
         
     | 
| 
      
 30 
     | 
    
         
            +
              # WARNING: this method assumes that outfilename contains a suffix
         
     | 
| 
      
 31 
     | 
    
         
            +
              # that can be replaced by .TreeTagger
         
     | 
| 
      
 32 
     | 
    
         
            +
              def really_process_file(infilename, # string: name of input file
         
     | 
| 
      
 33 
     | 
    
         
            +
                                      outfilename,# string: name of file that the caller is to produce
         
     | 
| 
      
 34 
     | 
    
         
            +
                                      make_new_outfile_anyway = false) # Boolean: run TreeTagger in any case?
         
     | 
| 
      
 35 
     | 
    
         
            +
             
     | 
| 
      
 36 
     | 
    
         
            +
                # fabricate the filename in which the 
         
     | 
| 
      
 37 
     | 
    
         
            +
                # actual TreeTagger output will be placed:
         
     | 
| 
      
 38 
     | 
    
         
            +
                # <directory> + <outfilename minus last suffix> + ".TreeTagger"
         
     | 
| 
      
 39 
     | 
    
         
            +
                current_suffix = outfilename[outfilename.rindex(".")..-1]
         
     | 
| 
      
 40 
     | 
    
         
            +
                my_outfilename = File.dirname(outfilename) + "/" + 
         
     | 
| 
      
 41 
     | 
    
         
            +
                  File.basename(outfilename, current_suffix) + 
         
     | 
| 
      
 42 
     | 
    
         
            +
                  ".TreeTagger"
         
     | 
| 
      
 43 
     | 
    
         
            +
             
     | 
| 
      
 44 
     | 
    
         
            +
                ##
         
     | 
| 
      
 45 
     | 
    
         
            +
                # does it exist? then just return it
         
     | 
| 
      
 46 
     | 
    
         
            +
                if not(make_new_outfile_anyway) and File.exists?(my_outfilename)
         
     | 
| 
      
 47 
     | 
    
         
            +
                  return my_outfilename
         
     | 
| 
      
 48 
     | 
    
         
            +
                end
         
     | 
| 
      
 49 
     | 
    
         
            +
             
     | 
| 
      
 50 
     | 
    
         
            +
                ##
         
     | 
| 
      
 51 
     | 
    
         
            +
                # else construct it, then return it
         
     | 
| 
      
 52 
     | 
    
         
            +
                tempfile = Tempfile.new("Treetagger")
         
     | 
| 
      
 53 
     | 
    
         
            +
                TreetaggerInterface.fntab_words_to_file(infilename, tempfile, "<EOS>", "iso")
         
     | 
| 
      
 54 
     | 
    
         
            +
                tempfile.close
         
     | 
| 
      
 55 
     | 
    
         
            +
             
     | 
| 
      
 56 
     | 
    
         
            +
                # call TreeTagger
         
     | 
| 
      
 57 
     | 
    
         
            +
                Kernel.system(@program_path+" "+tempfile.path + 
         
     | 
| 
      
 58 
     | 
    
         
            +
                              " > " + my_outfilename)
         
     | 
| 
      
 59 
     | 
    
         
            +
                tempfile.close(true) # delete first tempfile
         
     | 
| 
      
 60 
     | 
    
         
            +
                
         
     | 
| 
      
 61 
     | 
    
         
            +
                # external problem: sometimes, the treetagger keeps the last <EOS> for itself, 
         
     | 
| 
      
 62 
     | 
    
         
            +
                # resulting on a .tagged file missing the last (blank) line
         
     | 
| 
      
 63 
     | 
    
         
            +
                
         
     | 
| 
      
 64 
     | 
    
         
            +
                original_length = `cat #{infilename} | wc -l`.strip.to_i
         
     | 
| 
      
 65 
     | 
    
         
            +
                puts infilename
         
     | 
| 
      
 66 
     | 
    
         
            +
                lemmatised_length = `cat #{my_outfilename} | wc -l`.strip.to_i
         
     | 
| 
      
 67 
     | 
    
         
            +
             
     | 
| 
      
 68 
     | 
    
         
            +
            #    `cp #{tempfile2.path()} /tmp/lout`
         
     | 
| 
      
 69 
     | 
    
         
            +
             
     | 
| 
      
 70 
     | 
    
         
            +
                case original_length - lemmatised_length
         
     | 
| 
      
 71 
     | 
    
         
            +
                when 0
         
     | 
| 
      
 72 
     | 
    
         
            +
                  # everything ok, don't do anything
         
     | 
| 
      
 73 
     | 
    
         
            +
                when 1
         
     | 
| 
      
 74 
     | 
    
         
            +
                  # add one more newline to the .tagged file
         
     | 
| 
      
 75 
     | 
    
         
            +
                  `echo "" >> #{my_outfilename}`
         
     | 
| 
      
 76 
     | 
    
         
            +
                else
         
     | 
| 
      
 77 
     | 
    
         
            +
                  # this is "real" error
         
     | 
| 
      
 78 
     | 
    
         
            +
                  STDERR.puts "Original length: #{original_length}\tLemmatised length: #{lemmatised_length}"
         
     | 
| 
      
 79 
     | 
    
         
            +
                  STDERR.puts "Error: lemmatiser/tagger output for for #{File.basename(infilename)}"
         
     | 
| 
      
 80 
     | 
    
         
            +
                  $stderr.puts "has different line number from corpus file!"
         
     | 
| 
      
 81 
     | 
    
         
            +
                  raise
         
     | 
| 
      
 82 
     | 
    
         
            +
                end
         
     | 
| 
      
 83 
     | 
    
         
            +
                
         
     | 
| 
      
 84 
     | 
    
         
            +
             
     | 
| 
      
 85 
     | 
    
         
            +
                return my_outfilename
         
     | 
| 
      
 86 
     | 
    
         
            +
              end
         
     | 
| 
      
 87 
     | 
    
         
            +
            end
         
     | 
| 
      
 88 
     | 
    
         
            +
             
     | 
| 
      
 89 
     | 
    
         
            +
            #######################################
         
     | 
| 
      
 90 
     | 
    
         
            +
            class TreetaggerInterface < SynInterfaceTab
         
     | 
| 
      
 91 
     | 
    
         
            +
              TreetaggerInterface.announce_me()
         
     | 
| 
      
 92 
     | 
    
         
            +
              
         
     | 
| 
      
 93 
     | 
    
         
            +
              include TreetaggerModule
         
     | 
| 
      
 94 
     | 
    
         
            +
             
     | 
| 
      
 95 
     | 
    
         
            +
              ###
         
     | 
| 
      
 96 
     | 
    
         
            +
              def TreetaggerInterface.system()
         
     | 
| 
      
 97 
     | 
    
         
            +
                return "treetagger"
         
     | 
| 
      
 98 
     | 
    
         
            +
              end
         
     | 
| 
      
 99 
     | 
    
         
            +
             
     | 
| 
      
 100 
     | 
    
         
            +
              ###
         
     | 
| 
      
 101 
     | 
    
         
            +
              def TreetaggerInterface.service()
         
     | 
| 
      
 102 
     | 
    
         
            +
                return "lemmatizer"
         
     | 
| 
      
 103 
     | 
    
         
            +
              end
         
     | 
| 
      
 104 
     | 
    
         
            +
             
     | 
| 
      
 105 
     | 
    
         
            +
              ###
         
     | 
| 
      
 106 
     | 
    
         
            +
              # convert TreeTagger's penn tagset into Collins' penn tagset *argh*
         
     | 
| 
      
 107 
     | 
    
         
            +
              
         
     | 
| 
      
 108 
     | 
    
         
            +
              def convert_to_berkeley(line)
         
     | 
| 
      
 109 
     | 
    
         
            +
                  line.chomp!
         
     | 
| 
      
 110 
     | 
    
         
            +
                  return line.gsub(/\(/,"-LRB-").gsub(/\)/,"-RRB-").gsub(/''/,"\"").gsub(/\`\`/,"\"")
         
     | 
| 
      
 111 
     | 
    
         
            +
              end
         
     | 
| 
      
 112 
     | 
    
         
            +
              
         
     | 
| 
      
 113 
     | 
    
         
            +
             
     | 
| 
      
 114 
     | 
    
         
            +
              ###
         
     | 
| 
      
 115 
     | 
    
         
            +
              def process_file(infilename,  # string: name of input file
         
     | 
| 
      
 116 
     | 
    
         
            +
                               outfilename) # string: name of output file
         
     | 
| 
      
 117 
     | 
    
         
            +
             
     | 
| 
      
 118 
     | 
    
         
            +
                # KE change here
         
     | 
| 
      
 119 
     | 
    
         
            +
                ttfilename = really_process_file(infilename, outfilename)
         
     | 
| 
      
 120 
     | 
    
         
            +
             
     | 
| 
      
 121 
     | 
    
         
            +
                # write all output to tempfile2 first, then 
         
     | 
| 
      
 122 
     | 
    
         
            +
                # change ISO to UTF-8 into outputfile
         
     | 
| 
      
 123 
     | 
    
         
            +
                tempfile2 = Tempfile.new("treetagger")
         
     | 
| 
      
 124 
     | 
    
         
            +
                tempfile2.close()
         
     | 
| 
      
 125 
     | 
    
         
            +
                
         
     | 
| 
      
 126 
     | 
    
         
            +
                # 2. use cut to get the actual lemmtisation
         
     | 
| 
      
 127 
     | 
    
         
            +
                
         
     | 
| 
      
 128 
     | 
    
         
            +
                Kernel.system("cat " + ttfilename + 
         
     | 
| 
      
 129 
     | 
    
         
            +
            		  ' | sed -e\'s/<EOS>//\' | cut -f3 > '+tempfile2.path()) 
         
     | 
| 
      
 130 
     | 
    
         
            +
                
         
     | 
| 
      
 131 
     | 
    
         
            +
                # transform ISO-8859-1 back to UTF-8, 
         
     | 
| 
      
 132 
     | 
    
         
            +
                # write to 'outfilename'    
         
     | 
| 
      
 133 
     | 
    
         
            +
                begin
         
     | 
| 
      
 134 
     | 
    
         
            +
                  outfile = File.new(outfilename, "w")
         
     | 
| 
      
 135 
     | 
    
         
            +
                rescue
         
     | 
| 
      
 136 
     | 
    
         
            +
                  raise "Could not write to #{outfilename}"
         
     | 
| 
      
 137 
     | 
    
         
            +
                end
         
     | 
| 
      
 138 
     | 
    
         
            +
                tempfile2.open
         
     | 
| 
      
 139 
     | 
    
         
            +
                # AB: Internally all the flow is an utf-8 encoded stream.
         
     | 
| 
      
 140 
     | 
    
         
            +
                # TreeTagger consumes one byte encodings (but we should provide a
         
     | 
| 
      
 141 
     | 
    
         
            +
                # utf-8 model for German). So we convert utf-8 to latin1, then
         
     | 
| 
      
 142 
     | 
    
         
            +
                # process the text and convert it back to utf-8.
         
     | 
| 
      
 143 
     | 
    
         
            +
                #
         
     | 
| 
      
 144 
     | 
    
         
            +
                while line = tempfile2.gets
         
     | 
| 
      
 145 
     | 
    
         
            +
            	#outfile.puts UtfIso.from_iso_8859_1(line)
         
     | 
| 
      
 146 
     | 
    
         
            +
                  utf8line = UtfIso.from_iso_8859_1(line)
         
     | 
| 
      
 147 
     | 
    
         
            +
                  outfile.puts convert_to_berkeley(utf8line)
         
     | 
| 
      
 148 
     | 
    
         
            +
                end
         
     | 
| 
      
 149 
     | 
    
         
            +
                
         
     | 
| 
      
 150 
     | 
    
         
            +
                # remove second tempfile, finalize output file
         
     | 
| 
      
 151 
     | 
    
         
            +
                tempfile2.close(true)
         
     | 
| 
      
 152 
     | 
    
         
            +
                outfile.close()
         
     | 
| 
      
 153 
     | 
    
         
            +
             
     | 
| 
      
 154 
     | 
    
         
            +
              end
         
     | 
| 
      
 155 
     | 
    
         
            +
            end
         
     | 
| 
      
 156 
     | 
    
         
            +
             
     | 
| 
      
 157 
     | 
    
         
            +
             
     | 
| 
      
 158 
     | 
    
         
            +
            # sp 30 11 06
         
     | 
| 
      
 159 
     | 
    
         
            +
            #
         
     | 
| 
      
 160 
     | 
    
         
            +
            # using TreeTagger for POS tagging of English text
         
     | 
| 
      
 161 
     | 
    
         
            +
            #
         
     | 
| 
      
 162 
     | 
    
         
            +
            # copy-and-paste from lemmatisation
         
     | 
| 
      
 163 
     | 
    
         
            +
            #
         
     | 
| 
      
 164 
     | 
    
         
            +
            # differences: 
         
     | 
| 
      
 165 
     | 
    
         
            +
            # 1. use field 2 and not 3 from the output
         
     | 
| 
      
 166 
     | 
    
         
            +
            # 2. convert tags from what Treetagger thinks is the Penn Tagset to what TnT and Collins think is the Penn Tagset
         
     | 
| 
      
 167 
     | 
    
         
            +
            # 
         
     | 
| 
      
 168 
     | 
    
         
            +
            # KE 7 12 06
         
     | 
| 
      
 169 
     | 
    
         
            +
            # change interface such that TreeTagger is called only once
         
     | 
| 
      
 170 
     | 
    
         
            +
            # and both POS tags and lemma are read from the same files,
         
     | 
| 
      
 171 
     | 
    
         
            +
            # rather than calling the tagger twice
         
     | 
| 
      
 172 
     | 
    
         
            +
            class TreetaggerPOSInterface < SynInterfaceTab
         
     | 
| 
      
 173 
     | 
    
         
            +
              TreetaggerPOSInterface.announce_me()
         
     | 
| 
      
 174 
     | 
    
         
            +
              include TreetaggerModule
         
     | 
| 
      
 175 
     | 
    
         
            +
              
         
     | 
| 
      
 176 
     | 
    
         
            +
              ###
         
     | 
| 
      
 177 
     | 
    
         
            +
              def TreetaggerPOSInterface.system()
         
     | 
| 
      
 178 
     | 
    
         
            +
                return "treetagger"
         
     | 
| 
      
 179 
     | 
    
         
            +
              end
         
     | 
| 
      
 180 
     | 
    
         
            +
             
     | 
| 
      
 181 
     | 
    
         
            +
              ###
         
     | 
| 
      
 182 
     | 
    
         
            +
              def TreetaggerPOSInterface.service()
         
     | 
| 
      
 183 
     | 
    
         
            +
                return "pos_tagger"
         
     | 
| 
      
 184 
     | 
    
         
            +
              end
         
     | 
| 
      
 185 
     | 
    
         
            +
             
     | 
| 
      
 186 
     | 
    
         
            +
              ###
         
     | 
| 
      
 187 
     | 
    
         
            +
              # convert TreeTagger's penn tagset into Collins' penn tagset *argh*
         
     | 
| 
      
 188 
     | 
    
         
            +
             
     | 
| 
      
 189 
     | 
    
         
            +
              def convert_to_collins(line)
         
     | 
| 
      
 190 
     | 
    
         
            +
                line.chomp!
         
     | 
| 
      
 191 
     | 
    
         
            +
                return line.gsub(/^PP/,"PRP").gsub(/^NP/,"NNP").gsub(/^VV/,"VB").gsub(/^VH/,"VB").gsub(/^SENT/,".")
         
     | 
| 
      
 192 
     | 
    
         
            +
              end
         
     | 
| 
      
 193 
     | 
    
         
            +
             
     | 
| 
      
 194 
     | 
    
         
            +
              ###
         
     | 
| 
      
 195 
     | 
    
         
            +
              def process_file(infilename,  # string: name of input file
         
     | 
| 
      
 196 
     | 
    
         
            +
                               outfilename) # string: name of output file
         
     | 
| 
      
 197 
     | 
    
         
            +
             
     | 
| 
      
 198 
     | 
    
         
            +
                # KE change here
         
     | 
| 
      
 199 
     | 
    
         
            +
                tt_filename = really_process_file(infilename, outfilename, true)
         
     | 
| 
      
 200 
     | 
    
         
            +
             
     | 
| 
      
 201 
     | 
    
         
            +
                # write all output to tempfile2 first, then 
         
     | 
| 
      
 202 
     | 
    
         
            +
                # change ISO to UTF-8 into outputfile
         
     | 
| 
      
 203 
     | 
    
         
            +
                tempfile2 = Tempfile.new("treetagger")
         
     | 
| 
      
 204 
     | 
    
         
            +
                tempfile2.close()
         
     | 
| 
      
 205 
     | 
    
         
            +
                
         
     | 
| 
      
 206 
     | 
    
         
            +
                # 2. use cut to get the actual lemmtisation
         
     | 
| 
      
 207 
     | 
    
         
            +
             
     | 
| 
      
 208 
     | 
    
         
            +
                Kernel.system("cat " + tt_filename +
         
     | 
| 
      
 209 
     | 
    
         
            +
            		  ' | sed -e\'s/<EOS>//\' | cut -f2 > '+tempfile2.path()) 
         
     | 
| 
      
 210 
     | 
    
         
            +
                
         
     | 
| 
      
 211 
     | 
    
         
            +
                # transform ISO-8859-1 back to UTF-8, 
         
     | 
| 
      
 212 
     | 
    
         
            +
                # write to 'outfilename'    
         
     | 
| 
      
 213 
     | 
    
         
            +
                begin
         
     | 
| 
      
 214 
     | 
    
         
            +
                  outfile = File.new(outfilename, "w")
         
     | 
| 
      
 215 
     | 
    
         
            +
                rescue
         
     | 
| 
      
 216 
     | 
    
         
            +
                  raise "Could not write to #{outfilename}"
         
     | 
| 
      
 217 
     | 
    
         
            +
                end
         
     | 
| 
      
 218 
     | 
    
         
            +
                tempfile2.open()
         
     | 
| 
      
 219 
     | 
    
         
            +
                while (line = tempfile2.gets())
         
     | 
| 
      
 220 
     | 
    
         
            +
                  outfile.puts UtfIso.from_iso_8859_1(convert_to_collins(line))
         
     | 
| 
      
 221 
     | 
    
         
            +
                end
         
     | 
| 
      
 222 
     | 
    
         
            +
                
         
     | 
| 
      
 223 
     | 
    
         
            +
                # remove second tempfile, finalize output file
         
     | 
| 
      
 224 
     | 
    
         
            +
                tempfile2.close(true)
         
     | 
| 
      
 225 
     | 
    
         
            +
                outfile.close()
         
     | 
| 
      
 226 
     | 
    
         
            +
              end
         
     | 
| 
      
 227 
     | 
    
         
            +
            end
         
     | 
| 
      
 228 
     | 
    
         
            +
             
     | 
| 
      
 229 
     | 
    
         
            +
            ###############
         
     | 
| 
      
 230 
     | 
    
         
            +
            # an interpreter that only has Treetagger, no parser
         
     | 
| 
      
 231 
     | 
    
         
            +
            class TreetaggerInterpreter < SynInterpreter
         
     | 
| 
      
 232 
     | 
    
         
            +
              TreetaggerInterpreter.announce_me()
         
     | 
| 
      
 233 
     | 
    
         
            +
             
     | 
| 
      
 234 
     | 
    
         
            +
              ###
         
     | 
| 
      
 235 
     | 
    
         
            +
              # names of the systems interpreted by this class:
         
     | 
| 
      
 236 
     | 
    
         
            +
              # returns a hash service(string) -> system name (string),
         
     | 
| 
      
 237 
     | 
    
         
            +
              # e.g.
         
     | 
| 
      
 238 
     | 
    
         
            +
              # { "parser" => "collins", "lemmatizer" => "treetagger" }
         
     | 
| 
      
 239 
     | 
    
         
            +
              def TreetaggerInterpreter.systems()
         
     | 
| 
      
 240 
     | 
    
         
            +
                return {
         
     | 
| 
      
 241 
     | 
    
         
            +
                  "pos_tagger" => "treetagger",
         
     | 
| 
      
 242 
     | 
    
         
            +
                }
         
     | 
| 
      
 243 
     | 
    
         
            +
              end
         
     | 
| 
      
 244 
     | 
    
         
            +
             
     | 
| 
      
 245 
     | 
    
         
            +
              ###
         
     | 
| 
      
 246 
     | 
    
         
            +
              # names of additional systems that may be interpreted by this class
         
     | 
| 
      
 247 
     | 
    
         
            +
              # returns a hash service(string) -> system name(string)
         
     | 
| 
      
 248 
     | 
    
         
            +
              # same as names()
         
     | 
| 
      
 249 
     | 
    
         
            +
              def TreetaggerInterpreter.optional_systems()
         
     | 
| 
      
 250 
     | 
    
         
            +
                return {
         
     | 
| 
      
 251 
     | 
    
         
            +
                  "lemmatizer" => "treetagger"
         
     | 
| 
      
 252 
     | 
    
         
            +
                }
         
     | 
| 
      
 253 
     | 
    
         
            +
              end
         
     | 
| 
      
 254 
     | 
    
         
            +
             
     | 
| 
      
 255 
     | 
    
         
            +
              ###
         
     | 
| 
      
 256 
     | 
    
         
            +
              # generalize over POS tags.
         
     | 
| 
      
 257 
     | 
    
         
            +
              #
         
     | 
| 
      
 258 
     | 
    
         
            +
              # returns one of:
         
     | 
| 
      
 259 
     | 
    
         
            +
              #
         
     | 
| 
      
 260 
     | 
    
         
            +
              # adj:  adjective (phrase)
         
     | 
| 
      
 261 
     | 
    
         
            +
              # adv:  adverb (phrase)
         
     | 
| 
      
 262 
     | 
    
         
            +
              # card: numbers, quantity phrases
         
     | 
| 
      
 263 
     | 
    
         
            +
              # con:  conjunction
         
     | 
| 
      
 264 
     | 
    
         
            +
              # det:  determiner, including possessive/demonstrative pronouns etc.
         
     | 
| 
      
 265 
     | 
    
         
            +
              # for:  foreign material
         
     | 
| 
      
 266 
     | 
    
         
            +
              # noun: noun (phrase), including personal pronouns, proper names, expletives
         
     | 
| 
      
 267 
     | 
    
         
            +
              # part: particles, truncated words (German compound parts)
         
     | 
| 
      
 268 
     | 
    
         
            +
              # prep: preposition (phrase)
         
     | 
| 
      
 269 
     | 
    
         
            +
              # pun:  punctuation, brackets, etc.
         
     | 
| 
      
 270 
     | 
    
         
            +
              # sent: sentence
         
     | 
| 
      
 271 
     | 
    
         
            +
              # top:  top node of a sentence
         
     | 
| 
      
 272 
     | 
    
         
            +
              # verb: verb (phrase)
         
     | 
| 
      
 273 
     | 
    
         
            +
              # nil:  something went wrong
         
     | 
| 
      
 274 
     | 
    
         
            +
              #
         
     | 
| 
      
 275 
     | 
    
         
            +
              # returns: string, or nil
         
     | 
| 
      
 276 
     | 
    
         
            +
              def TreetaggerInterpreter.category(node) # SynNode
         
     | 
| 
      
 277 
     | 
    
         
            +
                pt = TreetaggerInterpreter.pt(node)
         
     | 
| 
      
 278 
     | 
    
         
            +
                if pt.nil?
         
     | 
| 
      
 279 
     | 
    
         
            +
                  # phrase type could not be determined
         
     | 
| 
      
 280 
     | 
    
         
            +
                  return nil
         
     | 
| 
      
 281 
     | 
    
         
            +
                end
         
     | 
| 
      
 282 
     | 
    
         
            +
             
     | 
| 
      
 283 
     | 
    
         
            +
                pt.to_s.strip() =~ /^([^-]*)/  
         
     | 
| 
      
 284 
     | 
    
         
            +
                case $1
         
     | 
| 
      
 285 
     | 
    
         
            +
                when  /^JJ/ ,/(WH)?ADJP/, /^PDT/ then  return "adj"
         
     | 
| 
      
 286 
     | 
    
         
            +
                when /^RB/, /(WH)?ADVP/, /^UH/ then return "adv"
         
     | 
| 
      
 287 
     | 
    
         
            +
                when /^CD/, /^QP/ then  return "card"
         
     | 
| 
      
 288 
     | 
    
         
            +
                when /^CC/, /^WRB/, /^CONJP/ then return "con"
         
     | 
| 
      
 289 
     | 
    
         
            +
                when /^DT/, /^POS/ then  return "det"
         
     | 
| 
      
 290 
     | 
    
         
            +
                when /^FW/, /^SYM/ then  return "for"
         
     | 
| 
      
 291 
     | 
    
         
            +
                when /^N/, "WHAD", "WDT", /^PRP/ , /^WHNP/, /^EX/, /^WP/  then return "noun"
         
     | 
| 
      
 292 
     | 
    
         
            +
                when  /^IN/ , /^TO/, /(WH)?PP/, "RP", /^PR(T|N)/ then return "prep"
         
     | 
| 
      
 293 
     | 
    
         
            +
                when /^PUNC/, /LRB/, /RRB/, /[,'".:;!?\(\)]/ then  return "pun"
         
     | 
| 
      
 294 
     | 
    
         
            +
                when /^S(s|bar|BAR|G|Q|BARQ|INV)?$/, /^UCP/, /^FRAG/, /^X/, /^INTJ/ then return "sent"
         
     | 
| 
      
 295 
     | 
    
         
            +
                when /^TOP/ then  return "top"
         
     | 
| 
      
 296 
     | 
    
         
            +
                when /^TRACE/ then  return "trace"
         
     | 
| 
      
 297 
     | 
    
         
            +
                when /^V/ , /^MD/ then return "verb"
         
     | 
| 
      
 298 
     | 
    
         
            +
                else
         
     | 
| 
      
 299 
     | 
    
         
            +
            #      $stderr.puts "WARNING: Unknown category/POS "+c.to_s + " (English data)"
         
     | 
| 
      
 300 
     | 
    
         
            +
                  return nil
         
     | 
| 
      
 301 
     | 
    
         
            +
                end
         
     | 
| 
      
 302 
     | 
    
         
            +
              end
         
     | 
| 
      
 303 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/common/headz.rb
    ADDED
    
    | 
         @@ -0,0 +1,338 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # name: Module Headz
         
     | 
| 
      
 2 
     | 
    
         
            +
            # auth: albu@coli.uni-sb.de
         
     | 
| 
      
 3 
     | 
    
         
            +
            # 
         
     | 
| 
      
 4 
     | 
    
         
            +
            # modified KE Sept 04:
         
     | 
| 
      
 5 
     | 
    
         
            +
            # changed from old Sentence pkg to new SalsaTigerSentence pkg
         
     | 
| 
      
 6 
     | 
    
         
            +
            #
         
     | 
| 
      
 7 
     | 
    
         
            +
            # modified KE April 05:
         
     | 
| 
      
 8 
     | 
    
         
            +
            # suppress the flood of warnings
         
     | 
| 
      
 9 
     | 
    
         
            +
            #
         
     | 
| 
      
 10 
     | 
    
         
            +
            # modified SP June 05: added some more cases; change to SalsTigerRegXML
         
     | 
| 
      
 11 
     | 
    
         
            +
            # 
         
     | 
| 
      
 12 
     | 
    
         
            +
            # 
         
     | 
| 
      
 13 
     | 
    
         
            +
            # INIT: REXML TIGER sentence, 
         
     | 
| 
      
 14 
     | 
    
         
            +
            # FUNC: syn_nodes(term/non_term) -> heads   
         
     | 
| 
      
 15 
     | 
    
         
            +
            #  
         
     | 
| 
      
 16 
     | 
    
         
            +
            # 
         
     | 
| 
      
 17 
     | 
    
         
            +
            # usage:
         
     | 
| 
      
 18 
     | 
    
         
            +
            # 
         
     | 
| 
      
 19 
     | 
    
         
            +
            # h = Headz.new() 
         
     | 
| 
      
 20 
     | 
    
         
            +
            #
         
     | 
| 
      
 21 
     | 
    
         
            +
            # hash = h.get_sem_head(node) # node is a SalsaTigerXmlNode obj
         
     | 
| 
      
 22 
     | 
    
         
            +
            # 
         
     | 
| 
      
 23 
     | 
    
         
            +
            # head = hash["head"]
         
     | 
| 
      
 24 
     | 
    
         
            +
            # prep = hash["prep"]
         
     | 
| 
      
 25 
     | 
    
         
            +
            # 
         
     | 
| 
      
 26 
     | 
    
         
            +
            # if h.complex(head)
         
     | 
| 
      
 27 
     | 
    
         
            +
            #   print "preposition of conjunction involved"
         
     | 
| 
      
 28 
     | 
    
         
            +
            # end
         
     | 
| 
      
 29 
     | 
    
         
            +
             
     | 
| 
      
 30 
     | 
    
         
            +
            require "common/SalsaTigerRegXML"
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
            class Headz 
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
              def initialize()
         
     | 
| 
      
 35 
     | 
    
         
            +
                @Helpers = HeadzHelpers.new()
         
     | 
| 
      
 36 
     | 
    
         
            +
                @Verbose = false #KE 13.4.05: please not that many messages!
         
     | 
| 
      
 37 
     | 
    
         
            +
              end
         
     | 
| 
      
 38 
     | 
    
         
            +
              
         
     | 
| 
      
 39 
     | 
    
         
            +
              # head of one node
         
     | 
| 
      
 40 
     | 
    
         
            +
              def get_sem_head(node)
         
     | 
| 
      
 41 
     | 
    
         
            +
                gsh(node)
         
     | 
| 
      
 42 
     | 
    
         
            +
              end
         
     | 
| 
      
 43 
     | 
    
         
            +
              
         
     | 
| 
      
 44 
     | 
    
         
            +
              # all headz of top-nodes covering fe
         
     | 
| 
      
 45 
     | 
    
         
            +
              def get_fe_heads(fe)
         
     | 
| 
      
 46 
     | 
    
         
            +
                if (const = fe.children())
         
     | 
| 
      
 47 
     | 
    
         
            +
                  const.map { |node|
         
     | 
| 
      
 48 
     | 
    
         
            +
            	get_sem_head(node)
         
     | 
| 
      
 49 
     | 
    
         
            +
                  }
         
     | 
| 
      
 50 
     | 
    
         
            +
                else
         
     | 
| 
      
 51 
     | 
    
         
            +
                  $stderr.puts "Headz.get_sem_head: no children for FE #{fe}"
         
     | 
| 
      
 52 
     | 
    
         
            +
                  []
         
     | 
| 
      
 53 
     | 
    
         
            +
                end
         
     | 
| 
      
 54 
     | 
    
         
            +
              end
         
     | 
| 
      
 55 
     | 
    
         
            +
              
         
     | 
| 
      
 56 
     | 
    
         
            +
              def gsh (node)
         
     | 
| 
      
 57 
     | 
    
         
            +
                if !node then 
         
     | 
| 
      
 58 
     | 
    
         
            +
                  if @Verbose then $stderr.puts "Headz.gsh: no input node" end
         
     | 
| 
      
 59 
     | 
    
         
            +
                  return {}
         
     | 
| 
      
 60 
     | 
    
         
            +
             
     | 
| 
      
 61 
     | 
    
         
            +
                elsif node.is_terminal? then return Hash['head'=>node]
         
     | 
| 
      
 62 
     | 
    
         
            +
             
     | 
| 
      
 63 
     | 
    
         
            +
                else
         
     | 
| 
      
 64 
     | 
    
         
            +
                  case node.category
         
     | 
| 
      
 65 
     | 
    
         
            +
                  when 'AP'
         
     | 
| 
      
 66 
     | 
    
         
            +
            	return gsh(@Helpers.get_dtr(node,'HD'))
         
     | 
| 
      
 67 
     | 
    
         
            +
             
     | 
| 
      
 68 
     | 
    
         
            +
                  when 'AVP'
         
     | 
| 
      
 69 
     | 
    
         
            +
            	return gsh(@Helpers.get_dtr(node,'HD'))
         
     | 
| 
      
 70 
     | 
    
         
            +
                  when 'CAP', 'CAVP', 'CNP', 'CPP', 'CS', 'CVP'
         
     | 
| 
      
 71 
     | 
    
         
            +
            	conjs = @Helpers.get_conjuncts(node)
         
     | 
| 
      
 72 
     | 
    
         
            +
            	head = gsh(conjs.shift)
         
     | 
| 
      
 73 
     | 
    
         
            +
                    if head
         
     | 
| 
      
 74 
     | 
    
         
            +
                      head.update(Hash["conj"=>gsh_conjs(conjs)])
         
     | 
| 
      
 75 
     | 
    
         
            +
                    end
         
     | 
| 
      
 76 
     | 
    
         
            +
                    return head
         
     | 
| 
      
 77 
     | 
    
         
            +
             
     | 
| 
      
 78 
     | 
    
         
            +
                  when 'NM'
         
     | 
| 
      
 79 
     | 
    
         
            +
            	return gsh(@Helpers.get_rightmost_dtr(node,'NMC'))
         
     | 
| 
      
 80 
     | 
    
         
            +
                  when 'NP'
         
     | 
| 
      
 81 
     | 
    
         
            +
                    nk = @Helpers.get_rightmost_dtr(node,'NK')
         
     | 
| 
      
 82 
     | 
    
         
            +
                    if nk
         
     | 
| 
      
 83 
     | 
    
         
            +
                      return gsh(nk)
         
     | 
| 
      
 84 
     | 
    
         
            +
                    else
         
     | 
| 
      
 85 
     | 
    
         
            +
                      return gsh(@Helpers.get_rightmost_dtr(node, "NN"))
         
     | 
| 
      
 86 
     | 
    
         
            +
                    end
         
     | 
| 
      
 87 
     | 
    
         
            +
             
     | 
| 
      
 88 
     | 
    
         
            +
                  when 'PN'
         
     | 
| 
      
 89 
     | 
    
         
            +
            	pncs = @Helpers.get_dtrs(node,'PNC')
         
     | 
| 
      
 90 
     | 
    
         
            +
            	head = gsh(pncs.last)
         
     | 
| 
      
 91 
     | 
    
         
            +
                    if head
         
     | 
| 
      
 92 
     | 
    
         
            +
                      head.update(Hash["pncs"=>pncs])
         
     | 
| 
      
 93 
     | 
    
         
            +
                    end
         
     | 
| 
      
 94 
     | 
    
         
            +
                    return head
         
     | 
| 
      
 95 
     | 
    
         
            +
             
     | 
| 
      
 96 
     | 
    
         
            +
                  when 'PP'
         
     | 
| 
      
 97 
     | 
    
         
            +
            	return pp(node)
         
     | 
| 
      
 98 
     | 
    
         
            +
             
     | 
| 
      
 99 
     | 
    
         
            +
                  when 'S'
         
     | 
| 
      
 100 
     | 
    
         
            +
            	return s(node)
         
     | 
| 
      
 101 
     | 
    
         
            +
                  when 'VROOT'
         
     | 
| 
      
 102 
     | 
    
         
            +
                    dtrs = @Helpers.get_dtrs(node,'--')
         
     | 
| 
      
 103 
     | 
    
         
            +
             
     | 
| 
      
 104 
     | 
    
         
            +
                    # discourse level node with sentence nodes below?
         
     | 
| 
      
 105 
     | 
    
         
            +
                    # or conjunction with sentence nodes below?
         
     | 
| 
      
 106 
     | 
    
         
            +
                    discourselevel_dtr = dtrs.detect { |n| n.category == "DL"}
         
     | 
| 
      
 107 
     | 
    
         
            +
                    co_dtr = dtrs.detect { |n| n.category == "CO" }
         
     | 
| 
      
 108 
     | 
    
         
            +
                    if discourselevel_dtr
         
     | 
| 
      
 109 
     | 
    
         
            +
                      dtrs = discourselevel_dtr.children()
         
     | 
| 
      
 110 
     | 
    
         
            +
                    elsif co_dtr
         
     | 
| 
      
 111 
     | 
    
         
            +
                      dtrs = co_dtr.children()
         
     | 
| 
      
 112 
     | 
    
         
            +
                    end
         
     | 
| 
      
 113 
     | 
    
         
            +
             
     | 
| 
      
 114 
     | 
    
         
            +
             
     | 
| 
      
 115 
     | 
    
         
            +
                    # take first sentence node
         
     | 
| 
      
 116 
     | 
    
         
            +
                    sent_dtr = dtrs.detect {|n| n.category =~ /^C?S/}
         
     | 
| 
      
 117 
     | 
    
         
            +
                    if sent_dtr
         
     | 
| 
      
 118 
     | 
    
         
            +
                      return gsh(sent_dtr)
         
     | 
| 
      
 119 
     | 
    
         
            +
                    else          
         
     | 
| 
      
 120 
     | 
    
         
            +
            #          $stderr.puts "headz Warning: no sentence found below VROOT! Node #{node.id()}"
         
     | 
| 
      
 121 
     | 
    
         
            +
                      return nil
         
     | 
| 
      
 122 
     | 
    
         
            +
                    end
         
     | 
| 
      
 123 
     | 
    
         
            +
             
     | 
| 
      
 124 
     | 
    
         
            +
                  when 'VP'
         
     | 
| 
      
 125 
     | 
    
         
            +
            	return vp(node)
         
     | 
| 
      
 126 
     | 
    
         
            +
             
     | 
| 
      
 127 
     | 
    
         
            +
                  when 'MTA'
         
     | 
| 
      
 128 
     | 
    
         
            +
                    return gsh(@Helpers.get_rightmost_dtr(node,'ADC'))
         
     | 
| 
      
 129 
     | 
    
         
            +
             
     | 
| 
      
 130 
     | 
    
         
            +
                  when 'VZ'
         
     | 
| 
      
 131 
     | 
    
         
            +
            	return gsh(@Helpers.get_dtr(node,'HD'))
         
     | 
| 
      
 132 
     | 
    
         
            +
                  else
         
     | 
| 
      
 133 
     | 
    
         
            +
            	if @Verbose 
         
     | 
| 
      
 134 
     | 
    
         
            +
            	  $stderr.puts " Headz.gsh: no rule for #{node.category}" 
         
     | 
| 
      
 135 
     | 
    
         
            +
            	end
         
     | 
| 
      
 136 
     | 
    
         
            +
            	{}
         
     | 
| 
      
 137 
     | 
    
         
            +
                  end
         
     | 
| 
      
 138 
     | 
    
         
            +
                end
         
     | 
| 
      
 139 
     | 
    
         
            +
              end
         
     | 
| 
      
 140 
     | 
    
         
            +
              
         
     | 
| 
      
 141 
     | 
    
         
            +
              # flatten the processed conjs to a list of (head) Hashes 
         
     | 
| 
      
 142 
     | 
    
         
            +
              # containing no conj features themselves 
         
     | 
| 
      
 143 
     | 
    
         
            +
              def gsh_conjs(conjs)
         
     | 
| 
      
 144 
     | 
    
         
            +
                flat = Array.new
         
     | 
| 
      
 145 
     | 
    
         
            +
                
         
     | 
| 
      
 146 
     | 
    
         
            +
                conjs.each {|conj|
         
     | 
| 
      
 147 
     | 
    
         
            +
                  current = gsh(conj)
         
     | 
| 
      
 148 
     | 
    
         
            +
                  @Helpers.descend(current,flat)
         
     | 
| 
      
 149 
     | 
    
         
            +
                }
         
     | 
| 
      
 150 
     | 
    
         
            +
                
         
     | 
| 
      
 151 
     | 
    
         
            +
                flat
         
     | 
| 
      
 152 
     | 
    
         
            +
              end
         
     | 
| 
      
 153 
     | 
    
         
            +
             
         
     | 
| 
      
 154 
     | 
    
         
            +
              #####################################3
         
     | 
| 
      
 155 
     | 
    
         
            +
              def pp(node)
         
     | 
| 
      
 156 
     | 
    
         
            +
             
     | 
| 
      
 157 
     | 
    
         
            +
                prep = node.terminals_sorted().detect { |n| 
         
     | 
| 
      
 158 
     | 
    
         
            +
                  (pt = n.part_of_speech()) and 
         
     | 
| 
      
 159 
     | 
    
         
            +
                    (pt =~ /^APPR/ or 
         
     | 
| 
      
 160 
     | 
    
         
            +
                       pt =~ /^PWAV/ or
         
     | 
| 
      
 161 
     | 
    
         
            +
                       pt =~ /^C?PP/
         
     | 
| 
      
 162 
     | 
    
         
            +
                     )
         
     | 
| 
      
 163 
     | 
    
         
            +
                }
         
     | 
| 
      
 164 
     | 
    
         
            +
             
     | 
| 
      
 165 
     | 
    
         
            +
                if (lastnk = @Helpers.get_rightmost_dtr(node,'NK'))
         
     | 
| 
      
 166 
     | 
    
         
            +
                  head = gsh(lastnk)
         
     | 
| 
      
 167 
     | 
    
         
            +
                  if head and prep
         
     | 
| 
      
 168 
     | 
    
         
            +
                    head.update(Hash['prep'=>prep])      
         
     | 
| 
      
 169 
     | 
    
         
            +
                  end
         
     | 
| 
      
 170 
     | 
    
         
            +
             
     | 
| 
      
 171 
     | 
    
         
            +
                elsif (re = @Helpers.get_dtr(node,'RE'))
         
     | 
| 
      
 172 
     | 
    
         
            +
                  head = gsh(re)
         
     | 
| 
      
 173 
     | 
    
         
            +
                  if head and prep
         
     | 
| 
      
 174 
     | 
    
         
            +
                    head.update(Hash['prep'=>prep])      
         
     | 
| 
      
 175 
     | 
    
         
            +
                  end
         
     | 
| 
      
 176 
     | 
    
         
            +
                else
         
     | 
| 
      
 177 
     | 
    
         
            +
                  if @Verbose then $stderr.puts " pp: no rule for #{node}" end
         
     | 
| 
      
 178 
     | 
    
         
            +
                end
         
     | 
| 
      
 179 
     | 
    
         
            +
             
     | 
| 
      
 180 
     | 
    
         
            +
                head
         
     | 
| 
      
 181 
     | 
    
         
            +
              end
         
     | 
| 
      
 182 
     | 
    
         
            +
              
         
     | 
| 
      
 183 
     | 
    
         
            +
              ################
         
     | 
| 
      
 184 
     | 
    
         
            +
              def s(node)
         
     | 
| 
      
 185 
     | 
    
         
            +
                head = @Helpers.get_dtr(node,'HD')
         
     | 
| 
      
 186 
     | 
    
         
            +
                if !head
         
     | 
| 
      
 187 
     | 
    
         
            +
            #      $stderr.puts " s: no head for #{node}"
         
     | 
| 
      
 188 
     | 
    
         
            +
                  return Hash[]
         
     | 
| 
      
 189 
     | 
    
         
            +
                end
         
     | 
| 
      
 190 
     | 
    
         
            +
                
         
     | 
| 
      
 191 
     | 
    
         
            +
                if head.outdeg() == 0
         
     | 
| 
      
 192 
     | 
    
         
            +
                  return gsh(head)
         
     | 
| 
      
 193 
     | 
    
         
            +
                end
         
     | 
| 
      
 194 
     | 
    
         
            +
             
     | 
| 
      
 195 
     | 
    
         
            +
                oc = @Helpers.get_dtr(node,'OC')
         
     | 
| 
      
 196 
     | 
    
         
            +
                case head.category
         
     | 
| 
      
 197 
     | 
    
         
            +
                when 'VVFIN' 
         
     | 
| 
      
 198 
     | 
    
         
            +
                  if svp = @Helpers.get_dtr(node,'SVP') then 
         
     | 
| 
      
 199 
     | 
    
         
            +
                    h = gsh(head)
         
     | 
| 
      
 200 
     | 
    
         
            +
                    if h
         
     | 
| 
      
 201 
     | 
    
         
            +
                      return h.update(Hash['svp'=>gsh(svp), 'oc'=>gsh(oc)]) 
         
     | 
| 
      
 202 
     | 
    
         
            +
                    else
         
     | 
| 
      
 203 
     | 
    
         
            +
                      return h
         
     | 
| 
      
 204 
     | 
    
         
            +
                    end
         
     | 
| 
      
 205 
     | 
    
         
            +
                  else 
         
     | 
| 
      
 206 
     | 
    
         
            +
                    return gsh(head)
         
     | 
| 
      
 207 
     | 
    
         
            +
                  end
         
     | 
| 
      
 208 
     | 
    
         
            +
             
     | 
| 
      
 209 
     | 
    
         
            +
                when 'VAFIN'
         
     | 
| 
      
 210 
     | 
    
         
            +
                  if oc && headd = @Helpers.get_dtr(oc,'HD')
         
     | 
| 
      
 211 
     | 
    
         
            +
                    h = gsh(headd)
         
     | 
| 
      
 212 
     | 
    
         
            +
                    if h
         
     | 
| 
      
 213 
     | 
    
         
            +
                      return h.update(Hash['oc'=>gsh(oc)])
         
     | 
| 
      
 214 
     | 
    
         
            +
                    else
         
     | 
| 
      
 215 
     | 
    
         
            +
                      return h
         
     | 
| 
      
 216 
     | 
    
         
            +
                    end
         
     | 
| 
      
 217 
     | 
    
         
            +
            	
         
     | 
| 
      
 218 
     | 
    
         
            +
                  elsif pd = @Helpers.get_dtr(node,'PD') && head = @Helpers.get_dtr(pd,'HD')
         
     | 
| 
      
 219 
     | 
    
         
            +
                    return gsh(head)
         
     | 
| 
      
 220 
     | 
    
         
            +
                  
         
     | 
| 
      
 221 
     | 
    
         
            +
                  else 
         
     | 
| 
      
 222 
     | 
    
         
            +
                    if @Verbose then $stderr.puts " s: no rule for #{node}" end
         
     | 
| 
      
 223 
     | 
    
         
            +
                  end
         
     | 
| 
      
 224 
     | 
    
         
            +
                else
         
     | 
| 
      
 225 
     | 
    
         
            +
                  if @Verbose then $stderr.puts " s: no rule for #{node}" end
         
     | 
| 
      
 226 
     | 
    
         
            +
                end  
         
     | 
| 
      
 227 
     | 
    
         
            +
              end
         
     | 
| 
      
 228 
     | 
    
         
            +
              
         
     | 
| 
      
 229 
     | 
    
         
            +
              ################
         
     | 
| 
      
 230 
     | 
    
         
            +
              def vp(node)
         
     | 
| 
      
 231 
     | 
    
         
            +
                head = gsh(@Helpers.get_dtr(node,'HD'))
         
     | 
| 
      
 232 
     | 
    
         
            +
                tmp = @Verbose
         
     | 
| 
      
 233 
     | 
    
         
            +
                @Verbose = false
         
     | 
| 
      
 234 
     | 
    
         
            +
                newHash = Hash.new
         
     | 
| 
      
 235 
     | 
    
         
            +
                ["da","oa"].each { |type| 
         
     | 
| 
      
 236 
     | 
    
         
            +
                  if (dtr = @Helpers.get_dtr(node,type.upcase))
         
     | 
| 
      
 237 
     | 
    
         
            +
            	newHash[type] = gsh(dtr)
         
     | 
| 
      
 238 
     | 
    
         
            +
                  end
         
     | 
| 
      
 239 
     | 
    
         
            +
                }
         
     | 
| 
      
 240 
     | 
    
         
            +
                @Verbose = tmp
         
     | 
| 
      
 241 
     | 
    
         
            +
                if head 
         
     | 
| 
      
 242 
     | 
    
         
            +
                  return head.update(newHash) 
         
     | 
| 
      
 243 
     | 
    
         
            +
                else 
         
     | 
| 
      
 244 
     | 
    
         
            +
                  return newHash 
         
     | 
| 
      
 245 
     | 
    
         
            +
                end
         
     | 
| 
      
 246 
     | 
    
         
            +
              end
         
     | 
| 
      
 247 
     | 
    
         
            +
              
         
     | 
| 
      
 248 
     | 
    
         
            +
              ################
         
     | 
| 
      
 249 
     | 
    
         
            +
              # Access
         
     | 
| 
      
 250 
     | 
    
         
            +
              def head(h)
         
     | 
| 
      
 251 
     | 
    
         
            +
                return h['head']
         
     | 
| 
      
 252 
     | 
    
         
            +
              end
         
     | 
| 
      
 253 
     | 
    
         
            +
             
     | 
| 
      
 254 
     | 
    
         
            +
              def complex(h)
         
     | 
| 
      
 255 
     | 
    
         
            +
                prep(h) or conj(h)
         
     | 
| 
      
 256 
     | 
    
         
            +
              end
         
     | 
| 
      
 257 
     | 
    
         
            +
             
     | 
| 
      
 258 
     | 
    
         
            +
              def prep(h)
         
     | 
| 
      
 259 
     | 
    
         
            +
                return h['prep']
         
     | 
| 
      
 260 
     | 
    
         
            +
              end
         
     | 
| 
      
 261 
     | 
    
         
            +
             
     | 
| 
      
 262 
     | 
    
         
            +
              def conj(h)
         
     | 
| 
      
 263 
     | 
    
         
            +
                return h['conj']
         
     | 
| 
      
 264 
     | 
    
         
            +
              end
         
     | 
| 
      
 265 
     | 
    
         
            +
             
     | 
| 
      
 266 
     | 
    
         
            +
             
     | 
| 
      
 267 
     | 
    
         
            +
              
         
     | 
| 
      
 268 
     | 
    
         
            +
            end # Class Headz
         
     | 
| 
      
 269 
     | 
    
         
            +
             
     | 
| 
      
 270 
     | 
    
         
            +
             
     | 
| 
      
 271 
     | 
    
         
            +
            class HeadzHelpers
         
     | 
| 
      
 272 
     | 
    
         
            +
              @Verbose = true
         
     | 
| 
      
 273 
     | 
    
         
            +
              
         
     | 
| 
      
 274 
     | 
    
         
            +
              # Conjunction
         
     | 
| 
      
 275 
     | 
    
         
            +
              
         
     | 
| 
      
 276 
     | 
    
         
            +
              def get_conjuncts(node)
         
     | 
| 
      
 277 
     | 
    
         
            +
                conjuncts = get_dtrs(node,'CJ')
         
     | 
| 
      
 278 
     | 
    
         
            +
              end
         
     | 
| 
      
 279 
     | 
    
         
            +
             
     | 
| 
      
 280 
     | 
    
         
            +
              # flatten
         
     | 
| 
      
 281 
     | 
    
         
            +
              def descend(current,flat)
         
     | 
| 
      
 282 
     | 
    
         
            +
                if current.nil?
         
     | 
| 
      
 283 
     | 
    
         
            +
                  return flat
         
     | 
| 
      
 284 
     | 
    
         
            +
                end
         
     | 
| 
      
 285 
     | 
    
         
            +
             
     | 
| 
      
 286 
     | 
    
         
            +
                if current.has_key?("conj") then
         
     | 
| 
      
 287 
     | 
    
         
            +
                  tmp = current.delete("conj")
         
     | 
| 
      
 288 
     | 
    
         
            +
                  flat.push current
         
     | 
| 
      
 289 
     | 
    
         
            +
                  tmp.each {|item|     
         
     | 
| 
      
 290 
     | 
    
         
            +
            	descend(item,flat)}
         
     | 
| 
      
 291 
     | 
    
         
            +
                else 
         
     | 
| 
      
 292 
     | 
    
         
            +
                  flat.push current 
         
     | 
| 
      
 293 
     | 
    
         
            +
                end
         
     | 
| 
      
 294 
     | 
    
         
            +
              end
         
     | 
| 
      
 295 
     | 
    
         
            +
              
         
     | 
| 
      
 296 
     | 
    
         
            +
              # Zugriff
         
     | 
| 
      
 297 
     | 
    
         
            +
              
         
     | 
| 
      
 298 
     | 
    
         
            +
              def get_dtr(node,label)
         
     | 
| 
      
 299 
     | 
    
         
            +
                if (dtrs = node.children_by_edgelabels([label]))
         
     | 
| 
      
 300 
     | 
    
         
            +
                  dtrs.first
         
     | 
| 
      
 301 
     | 
    
         
            +
                else
         
     | 
| 
      
 302 
     | 
    
         
            +
                  if @Verbose then $stderr.puts " SelectHeadDtr: no #{label} dtr for #{node}" end
         
     | 
| 
      
 303 
     | 
    
         
            +
                  nil
         
     | 
| 
      
 304 
     | 
    
         
            +
                end
         
     | 
| 
      
 305 
     | 
    
         
            +
              end
         
     | 
| 
      
 306 
     | 
    
         
            +
              
         
     | 
| 
      
 307 
     | 
    
         
            +
              def get_dtrs(node,label)
         
     | 
| 
      
 308 
     | 
    
         
            +
                if ! dtrs = node.children_by_edgelabels([label])
         
     | 
| 
      
 309 
     | 
    
         
            +
                  if @Verbose then $stderr.puts " SelectHeadDtr: no #{label} dtr for #{node}" end
         
     | 
| 
      
 310 
     | 
    
         
            +
                else
         
     | 
| 
      
 311 
     | 
    
         
            +
                  dtrs
         
     | 
| 
      
 312 
     | 
    
         
            +
                end
         
     | 
| 
      
 313 
     | 
    
         
            +
              end
         
     | 
| 
      
 314 
     | 
    
         
            +
              
         
     | 
| 
      
 315 
     | 
    
         
            +
              def get_rightmost_dtr(node,label)
         
     | 
| 
      
 316 
     | 
    
         
            +
                children = node.children_by_edgelabels([label])
         
     | 
| 
      
 317 
     | 
    
         
            +
                if re = children.last then re
         
     | 
| 
      
 318 
     | 
    
         
            +
                else 
         
     | 
| 
      
 319 
     | 
    
         
            +
                  if @Verbose then $stderr.puts " SelectHeadDtr: no #{label} dtrs for #{node}" end
         
     | 
| 
      
 320 
     | 
    
         
            +
                  nil 
         
     | 
| 
      
 321 
     | 
    
         
            +
                end
         
     | 
| 
      
 322 
     | 
    
         
            +
              end
         
     | 
| 
      
 323 
     | 
    
         
            +
             
     | 
| 
      
 324 
     | 
    
         
            +
            #   def l2h(list)
         
     | 
| 
      
 325 
     | 
    
         
            +
            #     h = Hash.new
         
     | 
| 
      
 326 
     | 
    
         
            +
            #     while (list.length > 1) do
         
     | 
| 
      
 327 
     | 
    
         
            +
            #       h[list.shift] = list.shift
         
     | 
| 
      
 328 
     | 
    
         
            +
            #     end
         
     | 
| 
      
 329 
     | 
    
         
            +
            #     if list.length == 1 then 
         
     | 
| 
      
 330 
     | 
    
         
            +
            #       $stderr.puts "l2h: odd number of elems: " + list.join(" / ")
         
     | 
| 
      
 331 
     | 
    
         
            +
            #     end
         
     | 
| 
      
 332 
     | 
    
         
            +
            #     h
         
     | 
| 
      
 333 
     | 
    
         
            +
            #   end
         
     | 
| 
      
 334 
     | 
    
         
            +
              
         
     | 
| 
      
 335 
     | 
    
         
            +
            end # Class HeadzHelpers
         
     | 
| 
      
 336 
     | 
    
         
            +
             
     | 
| 
      
 337 
     | 
    
         
            +
             
     | 
| 
      
 338 
     | 
    
         
            +
             
     |