frprep 0.0.1.prealpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.yardopts +8 -0
 - data/CHANGELOG.rdoc +0 -0
 - data/LICENSE.rdoc +0 -0
 - data/README.rdoc +0 -0
 - data/lib/common/AbstractSynInterface.rb +1227 -0
 - data/lib/common/BerkeleyInterface.rb +375 -0
 - data/lib/common/CollinsInterface.rb +1165 -0
 - data/lib/common/ConfigData.rb +694 -0
 - data/lib/common/Counter.rb +18 -0
 - data/lib/common/DBInterface.rb +48 -0
 - data/lib/common/EnduserMode.rb +27 -0
 - data/lib/common/Eval.rb +480 -0
 - data/lib/common/FixSynSemMapping.rb +196 -0
 - data/lib/common/FrPrepConfigData.rb +66 -0
 - data/lib/common/FrprepHelper.rb +1324 -0
 - data/lib/common/Graph.rb +345 -0
 - data/lib/common/ISO-8859-1.rb +24 -0
 - data/lib/common/ML.rb +186 -0
 - data/lib/common/Maxent.rb +215 -0
 - data/lib/common/MiniparInterface.rb +1388 -0
 - data/lib/common/Optimise.rb +195 -0
 - data/lib/common/Parser.rb +213 -0
 - data/lib/common/RegXML.rb +269 -0
 - data/lib/common/RosyConventions.rb +171 -0
 - data/lib/common/SQLQuery.rb +243 -0
 - data/lib/common/STXmlTerminalOrder.rb +194 -0
 - data/lib/common/SalsaTigerRegXML.rb +2347 -0
 - data/lib/common/SalsaTigerXMLHelper.rb +99 -0
 - data/lib/common/SleepyInterface.rb +384 -0
 - data/lib/common/SynInterfaces.rb +275 -0
 - data/lib/common/TabFormat.rb +720 -0
 - data/lib/common/Tiger.rb +1448 -0
 - data/lib/common/TntInterface.rb +44 -0
 - data/lib/common/Tree.rb +61 -0
 - data/lib/common/TreetaggerInterface.rb +303 -0
 - data/lib/common/headz.rb +338 -0
 - data/lib/common/option_parser.rb +13 -0
 - data/lib/common/ruby_class_extensions.rb +310 -0
 - data/lib/fred/Baseline.rb +150 -0
 - data/lib/fred/FileZipped.rb +31 -0
 - data/lib/fred/FredBOWContext.rb +863 -0
 - data/lib/fred/FredConfigData.rb +182 -0
 - data/lib/fred/FredConventions.rb +232 -0
 - data/lib/fred/FredDetermineTargets.rb +324 -0
 - data/lib/fred/FredEval.rb +312 -0
 - data/lib/fred/FredFeatureExtractors.rb +321 -0
 - data/lib/fred/FredFeatures.rb +1061 -0
 - data/lib/fred/FredFeaturize.rb +596 -0
 - data/lib/fred/FredNumTrainingSenses.rb +27 -0
 - data/lib/fred/FredParameters.rb +402 -0
 - data/lib/fred/FredSplit.rb +84 -0
 - data/lib/fred/FredSplitPkg.rb +180 -0
 - data/lib/fred/FredTest.rb +607 -0
 - data/lib/fred/FredTrain.rb +144 -0
 - data/lib/fred/PlotAndREval.rb +480 -0
 - data/lib/fred/fred.rb +45 -0
 - data/lib/fred/md5.rb +23 -0
 - data/lib/fred/opt_parser.rb +250 -0
 - data/lib/frprep/AbstractSynInterface.rb +1227 -0
 - data/lib/frprep/Ampersand.rb +37 -0
 - data/lib/frprep/BerkeleyInterface.rb +375 -0
 - data/lib/frprep/CollinsInterface.rb +1165 -0
 - data/lib/frprep/ConfigData.rb +694 -0
 - data/lib/frprep/Counter.rb +18 -0
 - data/lib/frprep/FNCorpusXML.rb +643 -0
 - data/lib/frprep/FNDatabase.rb +144 -0
 - data/lib/frprep/FixSynSemMapping.rb +196 -0
 - data/lib/frprep/FrPrepConfigData.rb +66 -0
 - data/lib/frprep/FrameXML.rb +513 -0
 - data/lib/frprep/FrprepHelper.rb +1324 -0
 - data/lib/frprep/Graph.rb +345 -0
 - data/lib/frprep/ISO-8859-1.rb +24 -0
 - data/lib/frprep/MiniparInterface.rb +1388 -0
 - data/lib/frprep/Parser.rb +213 -0
 - data/lib/frprep/RegXML.rb +269 -0
 - data/lib/frprep/STXmlTerminalOrder.rb +194 -0
 - data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
 - data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
 - data/lib/frprep/SleepyInterface.rb +384 -0
 - data/lib/frprep/SynInterfaces.rb +275 -0
 - data/lib/frprep/TabFormat.rb +720 -0
 - data/lib/frprep/Tiger.rb +1448 -0
 - data/lib/frprep/TntInterface.rb +44 -0
 - data/lib/frprep/Tree.rb +61 -0
 - data/lib/frprep/TreetaggerInterface.rb +303 -0
 - data/lib/frprep/do_parses.rb +142 -0
 - data/lib/frprep/frprep.rb +686 -0
 - data/lib/frprep/headz.rb +338 -0
 - data/lib/frprep/one_parsed_file.rb +28 -0
 - data/lib/frprep/opt_parser.rb +94 -0
 - data/lib/frprep/ruby_class_extensions.rb +310 -0
 - data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
 - data/lib/rosy/DBMySQL.rb +146 -0
 - data/lib/rosy/DBSQLite.rb +280 -0
 - data/lib/rosy/DBTable.rb +239 -0
 - data/lib/rosy/DBWrapper.rb +176 -0
 - data/lib/rosy/ExternalConfigData.rb +58 -0
 - data/lib/rosy/FailedParses.rb +130 -0
 - data/lib/rosy/FeatureInfo.rb +242 -0
 - data/lib/rosy/GfInduce.rb +1115 -0
 - data/lib/rosy/GfInduceFeature.rb +148 -0
 - data/lib/rosy/InputData.rb +294 -0
 - data/lib/rosy/RosyConfigData.rb +115 -0
 - data/lib/rosy/RosyConfusability.rb +338 -0
 - data/lib/rosy/RosyEval.rb +465 -0
 - data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
 - data/lib/rosy/RosyFeaturize.rb +280 -0
 - data/lib/rosy/RosyInspect.rb +336 -0
 - data/lib/rosy/RosyIterator.rb +477 -0
 - data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
 - data/lib/rosy/RosyPruning.rb +165 -0
 - data/lib/rosy/RosyServices.rb +744 -0
 - data/lib/rosy/RosySplit.rb +232 -0
 - data/lib/rosy/RosyTask.rb +19 -0
 - data/lib/rosy/RosyTest.rb +826 -0
 - data/lib/rosy/RosyTrain.rb +232 -0
 - data/lib/rosy/RosyTrainingTestTable.rb +786 -0
 - data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
 - data/lib/rosy/View.rb +418 -0
 - data/lib/rosy/opt_parser.rb +379 -0
 - data/lib/rosy/rosy.rb +77 -0
 - data/lib/shalmaneser/version.rb +3 -0
 - data/test/frprep/test_opt_parser.rb +94 -0
 - data/test/functional/functional_test_helper.rb +40 -0
 - data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
 - data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
 - data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
 - data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
 - data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
 - data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
 - data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
 - data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
 - data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
 - data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
 - data/test/functional/test_fred.rb +47 -0
 - data/test/functional/test_frprep.rb +52 -0
 - data/test/functional/test_rosy.rb +20 -0
 - metadata +270 -0
 
| 
         @@ -0,0 +1,182 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # FredConfigData
         
     | 
| 
      
 2 
     | 
    
         
            +
            # Katrin Erk April 05
         
     | 
| 
      
 3 
     | 
    
         
            +
            #
         
     | 
| 
      
 4 
     | 
    
         
            +
            # Frame disambiguation system: 
         
     | 
| 
      
 5 
     | 
    
         
            +
            # access to a configuration and experiment description file
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
            require "common/ConfigData"
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
            ##############################
         
     | 
| 
      
 10 
     | 
    
         
            +
            # Class FredConfigData
         
     | 
| 
      
 11 
     | 
    
         
            +
            #
         
     | 
| 
      
 12 
     | 
    
         
            +
            # inherits from ConfigData,
         
     | 
| 
      
 13 
     | 
    
         
            +
            # sets variable names appropriate to WSD task
         
     | 
| 
      
 14 
     | 
    
         
            +
             
     | 
| 
      
 15 
     | 
    
         
            +
            class FredConfigData < ConfigData
         
     | 
| 
      
 16 
     | 
    
         
            +
              def initialize(filename)
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
                # initialize config data object
         
     | 
| 
      
 19 
     | 
    
         
            +
                super(filename,          # config file
         
     | 
| 
      
 20 
     | 
    
         
            +
            	  { 
         
     | 
| 
      
 21 
     | 
    
         
            +
                        "experiment_ID" => "string", # experiment ID
         
     | 
| 
      
 22 
     | 
    
         
            +
                        "enduser_mode" => "bool", # work in enduser mode? (disallowing many things)
         
     | 
| 
      
 23 
     | 
    
         
            +
            	    
         
     | 
| 
      
 24 
     | 
    
         
            +
                        "preproc_descr_file_train" => "string", # path to preprocessing files
         
     | 
| 
      
 25 
     | 
    
         
            +
                        "preproc_descr_file_test" => "string",
         
     | 
| 
      
 26 
     | 
    
         
            +
                        "directory_output" => "string", # path to Salsa/Tiger XML output directory
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
                        "verbose" => "bool" ,     # print diagnostic messages?
         
     | 
| 
      
 29 
     | 
    
         
            +
                        "apply_to_all_known_targets" => "bool", # apply to all known targets rather than the ones with a frame?
         
     | 
| 
      
 30 
     | 
    
         
            +
                       
         
     | 
| 
      
 31 
     | 
    
         
            +
                        "fred_directory" => "string",# directory for internal info
         
     | 
| 
      
 32 
     | 
    
         
            +
                        "classifier_dir" => "string", # write classifiers here
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
                        "classifier" => "list",  # classifiers
         
     | 
| 
      
 35 
     | 
    
         
            +
             
     | 
| 
      
 36 
     | 
    
         
            +
                        "dbtype" => "string",    # "mysql" or "sqlite"
         
     | 
| 
      
 37 
     | 
    
         
            +
                       
         
     | 
| 
      
 38 
     | 
    
         
            +
                        "host" => "string",      # DB access: sqlite only
         
     | 
| 
      
 39 
     | 
    
         
            +
                        "user" => "string",
         
     | 
| 
      
 40 
     | 
    
         
            +
                        "passwd" => "string",
         
     | 
| 
      
 41 
     | 
    
         
            +
                        "dbname" => "string",
         
     | 
| 
      
 42 
     | 
    
         
            +
             
     | 
| 
      
 43 
     | 
    
         
            +
                        # featurization info
         
     | 
| 
      
 44 
     | 
    
         
            +
                        "feature" => "list",     # which features to use for the classifier?
         
     | 
| 
      
 45 
     | 
    
         
            +
                        "binary_classifiers" => "bool",# make binary rather than n-ary clasifiers?
         
     | 
| 
      
 46 
     | 
    
         
            +
            	    "negsense" => "string",  # binary classifier: negative sense is..?
         
     | 
| 
      
 47 
     | 
    
         
            +
                        "numerical_features" => "string", # do what with numerical features?
         
     | 
| 
      
 48 
     | 
    
         
            +
             
     | 
| 
      
 49 
     | 
    
         
            +
                        # what to do with items that have multiple senses?
         
     | 
| 
      
 50 
     | 
    
         
            +
                        # 'binarize': binary classifiers, and consider positive
         
     | 
| 
      
 51 
     | 
    
         
            +
                        #          if the sense is among the gold senses
         
     | 
| 
      
 52 
     | 
    
         
            +
                        # 'join' : make one joint sense
         
     | 
| 
      
 53 
     | 
    
         
            +
                        # 'repeat' : make multiple occurrences of the item, one sense per occ
         
     | 
| 
      
 54 
     | 
    
         
            +
                        # 'keep' : keep as separate labels
         
     | 
| 
      
 55 
     | 
    
         
            +
                        #
         
     | 
| 
      
 56 
     | 
    
         
            +
                        # multilabel: consider as assigned all labels
         
     | 
| 
      
 57 
     | 
    
         
            +
                        # above a certain confidence threshold?
         
     | 
| 
      
 58 
     | 
    
         
            +
                        "handle_multilabel" => "string",
         
     | 
| 
      
 59 
     | 
    
         
            +
                        "assignment_confidence_threshold" => "float",
         
     | 
| 
      
 60 
     | 
    
         
            +
                        
         
     | 
| 
      
 61 
     | 
    
         
            +
                        # single-sentence context?
         
     | 
| 
      
 62 
     | 
    
         
            +
                        "single_sent_context" => "bool",
         
     | 
| 
      
 63 
     | 
    
         
            +
             
     | 
| 
      
 64 
     | 
    
         
            +
                        # noncontiguous input? then we need access to a larger corpus
         
     | 
| 
      
 65 
     | 
    
         
            +
                        "noncontiguous_input" => "bool",
         
     | 
| 
      
 66 
     | 
    
         
            +
                        "larger_corpus_dir" => "string",
         
     | 
| 
      
 67 
     | 
    
         
            +
                        "larger_corpus_format" => "string", 
         
     | 
| 
      
 68 
     | 
    
         
            +
                        "larger_corpus_encoding" => "string"
         
     | 
| 
      
 69 
     | 
    
         
            +
            	  },
         
     | 
| 
      
 70 
     | 
    
         
            +
            	  [ # variables
         
     | 
| 
      
 71 
     | 
    
         
            +
                        "train", 
         
     | 
| 
      
 72 
     | 
    
         
            +
                       "exp_ID"
         
     | 
| 
      
 73 
     | 
    
         
            +
            	  ]
         
     | 
| 
      
 74 
     | 
    
         
            +
            	  )
         
     | 
| 
      
 75 
     | 
    
         
            +
             
     | 
| 
      
 76 
     | 
    
         
            +
                # set access functions for list features
         
     | 
| 
      
 77 
     | 
    
         
            +
                set_list_feature_access("classifier",
         
     | 
| 
      
 78 
     | 
    
         
            +
                                        method("access_classifier"))
         
     | 
| 
      
 79 
     | 
    
         
            +
                set_list_feature_access("feature",
         
     | 
| 
      
 80 
     | 
    
         
            +
                                        method("access_feature"))
         
     | 
| 
      
 81 
     | 
    
         
            +
              end
         
     | 
| 
      
 82 
     | 
    
         
            +
             
     | 
| 
      
 83 
     | 
    
         
            +
              ###
         
     | 
| 
      
 84 
     | 
    
         
            +
              # protected
         
     | 
| 
      
 85 
     | 
    
         
            +
             
     | 
| 
      
 86 
     | 
    
         
            +
              #####
         
     | 
| 
      
 87 
     | 
    
         
            +
              # access_feature
         
     | 
| 
      
 88 
     | 
    
         
            +
              # 
         
     | 
| 
      
 89 
     | 
    
         
            +
              # access function for feature 'feature'
         
     | 
| 
      
 90 
     | 
    
         
            +
              #
         
     | 
| 
      
 91 
     | 
    
         
            +
              # assumed format:
         
     | 
| 
      
 92 
     | 
    
         
            +
              #
         
     | 
| 
      
 93 
     | 
    
         
            +
              #   feature = context 50
         
     | 
| 
      
 94 
     | 
    
         
            +
              #   feature = context 2
         
     | 
| 
      
 95 
     | 
    
         
            +
              #   feature = syn
         
     | 
| 
      
 96 
     | 
    
         
            +
              #
         
     | 
| 
      
 97 
     | 
    
         
            +
              # i.e. first the name of the feature type to use, then
         
     | 
| 
      
 98 
     | 
    
         
            +
              # optionally a parameter,
         
     | 
| 
      
 99 
     | 
    
         
            +
              # and the same feature can occur more than once (which makes sense
         
     | 
| 
      
 100 
     | 
    
         
            +
              # only in case of parameters)
         
     | 
| 
      
 101 
     | 
    
         
            +
              #
         
     | 
| 
      
 102 
     | 
    
         
            +
              #
         
     | 
| 
      
 103 
     | 
    
         
            +
              # returns: 
         
     | 
| 
      
 104 
     | 
    
         
            +
              #  - If a feature is given as a parameter, 
         
     | 
| 
      
 105 
     | 
    
         
            +
              #    - If the feature is not set in the experiment file, nil
         
     | 
| 
      
 106 
     | 
    
         
            +
              #    - If the feature is set and has a parameter, the list of 
         
     | 
| 
      
 107 
     | 
    
         
            +
              #      parameter values set for it. It is assumed that the parameters
         
     | 
| 
      
 108 
     | 
    
         
            +
              #      are integers, and they are returned as integers
         
     | 
| 
      
 109 
     | 
    
         
            +
              #    - If the feature is set and has no parameter, true
         
     | 
| 
      
 110 
     | 
    
         
            +
              # - If no feature is given as parameter:
         
     | 
| 
      
 111 
     | 
    
         
            +
              #   a list of all features that have been set in the experiment file
         
     | 
| 
      
 112 
     | 
    
         
            +
              #   Each feature is given as a tuple: the first element is the feature (a string),
         
     | 
| 
      
 113 
     | 
    
         
            +
              #   all further elements are options (integers)
         
     | 
| 
      
 114 
     | 
    
         
            +
              def access_feature(val_list, # array:array:string: list of tuples defined in config file
         
     | 
| 
      
 115 
     | 
    
         
            +
            		               # for feature 'feature'
         
     | 
| 
      
 116 
     | 
    
         
            +
            		     feature=nil)  # string: feature type name
         
     | 
| 
      
 117 
     | 
    
         
            +
             
     | 
| 
      
 118 
     | 
    
         
            +
                if feature
         
     | 
| 
      
 119 
     | 
    
         
            +
                  # access options for this feature
         
     | 
| 
      
 120 
     | 
    
         
            +
             
     | 
| 
      
 121 
     | 
    
         
            +
                  # get the right tuples
         
     | 
| 
      
 122 
     | 
    
         
            +
                  positives = val_list.select { |entries|
         
     | 
| 
      
 123 
     | 
    
         
            +
                    entries.first() == feature
         
     | 
| 
      
 124 
     | 
    
         
            +
                  }.map { |entries|
         
     | 
| 
      
 125 
     | 
    
         
            +
                    entries[1]
         
     | 
| 
      
 126 
     | 
    
         
            +
                  }
         
     | 
| 
      
 127 
     | 
    
         
            +
                  
         
     | 
| 
      
 128 
     | 
    
         
            +
                  if positives.empty?
         
     | 
| 
      
 129 
     | 
    
         
            +
                    # feature not defined
         
     | 
| 
      
 130 
     | 
    
         
            +
                    return nil
         
     | 
| 
      
 131 
     | 
    
         
            +
                    
         
     | 
| 
      
 132 
     | 
    
         
            +
                  elsif positives.compact().empty?
         
     | 
| 
      
 133 
     | 
    
         
            +
                    # feature defined, but no parameters
         
     | 
| 
      
 134 
     | 
    
         
            +
                    return true
         
     | 
| 
      
 135 
     | 
    
         
            +
                    
         
     | 
| 
      
 136 
     | 
    
         
            +
                  else
         
     | 
| 
      
 137 
     | 
    
         
            +
                    # feature defined, and has values
         
     | 
| 
      
 138 
     | 
    
         
            +
                    return positives.map { |par| par.to_i() }
         
     | 
| 
      
 139 
     | 
    
         
            +
                  end
         
     | 
| 
      
 140 
     | 
    
         
            +
             
     | 
| 
      
 141 
     | 
    
         
            +
                else
         
     | 
| 
      
 142 
     | 
    
         
            +
                  # return all features that have been set
         
     | 
| 
      
 143 
     | 
    
         
            +
                  return val_list.map { |feature_name, *options|
         
     | 
| 
      
 144 
     | 
    
         
            +
                    [feature_name] + options.map { |o| o.to_i() }        
         
     | 
| 
      
 145 
     | 
    
         
            +
                  }
         
     | 
| 
      
 146 
     | 
    
         
            +
                end
         
     | 
| 
      
 147 
     | 
    
         
            +
              end
         
     | 
| 
      
 148 
     | 
    
         
            +
             
     | 
| 
      
 149 
     | 
    
         
            +
              #####
         
     | 
| 
      
 150 
     | 
    
         
            +
              # access_classifier
         
     | 
| 
      
 151 
     | 
    
         
            +
              #
         
     | 
| 
      
 152 
     | 
    
         
            +
              # access function for feature 'classifier'
         
     | 
| 
      
 153 
     | 
    
         
            +
              #
         
     | 
| 
      
 154 
     | 
    
         
            +
              # assumed format in the config file:
         
     | 
| 
      
 155 
     | 
    
         
            +
              #
         
     | 
| 
      
 156 
     | 
    
         
            +
              #   feature = path [option]*
         
     | 
| 
      
 157 
     | 
    
         
            +
              #
         
     | 
| 
      
 158 
     | 
    
         
            +
              # i.e. first the name of the feature type to use, then
         
     | 
| 
      
 159 
     | 
    
         
            +
              # optionally options associated with that feature,
         
     | 
| 
      
 160 
     | 
    
         
            +
              # e.g. 'argrec': use that feature only when computing argrec
         
     | 
| 
      
 161 
     | 
    
         
            +
              #
         
     | 
| 
      
 162 
     | 
    
         
            +
              # the access function is called with parameter val_list, an array of
         
     | 
| 
      
 163 
     | 
    
         
            +
              # string tuples, one string tuple for each feature defined.
         
     | 
| 
      
 164 
     | 
    
         
            +
              # the first string in the tuple is the feature name, the rest are the options
         
     | 
| 
      
 165 
     | 
    
         
            +
              #
         
     | 
| 
      
 166 
     | 
    
         
            +
              # returns: a list of pairs [feature_name(string), options(array:string)]
         
     | 
| 
      
 167 
     | 
    
         
            +
              # of defined features
         
     | 
| 
      
 168 
     | 
    
         
            +
              def access_classifier(val_list) # array:array:string: list of tuples defined in config file
         
     | 
| 
      
 169 
     | 
    
         
            +
            		               # for feature 'feature'
         
     | 
| 
      
 170 
     | 
    
         
            +
                if val_list.nil?
         
     | 
| 
      
 171 
     | 
    
         
            +
                  return []
         
     | 
| 
      
 172 
     | 
    
         
            +
                else
         
     | 
| 
      
 173 
     | 
    
         
            +
                  return val_list.map { |cl_descr_tuple|
         
     | 
| 
      
 174 
     | 
    
         
            +
                    [cl_descr_tuple.first, cl_descr_tuple[1..-1]]
         
     | 
| 
      
 175 
     | 
    
         
            +
                  }    
         
     | 
| 
      
 176 
     | 
    
         
            +
                end
         
     | 
| 
      
 177 
     | 
    
         
            +
              end
         
     | 
| 
      
 178 
     | 
    
         
            +
             
     | 
| 
      
 179 
     | 
    
         
            +
            end
         
     | 
| 
      
 180 
     | 
    
         
            +
             
     | 
| 
      
 181 
     | 
    
         
            +
             
     | 
| 
      
 182 
     | 
    
         
            +
             
         
     | 
| 
         @@ -0,0 +1,232 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # FredConventions
         
     | 
| 
      
 2 
     | 
    
         
            +
            # Katrin Erk June 05
         
     | 
| 
      
 3 
     | 
    
         
            +
            #
         
     | 
| 
      
 4 
     | 
    
         
            +
            # several small things that should be uniform
         
     | 
| 
      
 5 
     | 
    
         
            +
            # throughout the system
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
            require "common/ruby_class_extensions"
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
            require "common/EnduserMode"
         
     | 
| 
      
 10 
     | 
    
         
            +
            class Object
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
            ###
         
     | 
| 
      
 13 
     | 
    
         
            +
            # joining and breaking up senses
         
     | 
| 
      
 14 
     | 
    
         
            +
            def fred_join_senses(senses)
         
     | 
| 
      
 15 
     | 
    
         
            +
              return senses.sort().join("++")
         
     | 
| 
      
 16 
     | 
    
         
            +
            end
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
            def fred_split_sense(joined_senses)
         
     | 
| 
      
 19 
     | 
    
         
            +
              return joined_senses.split("++")
         
     | 
| 
      
 20 
     | 
    
         
            +
            end
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
            ###
         
     | 
| 
      
 23 
     | 
    
         
            +
            # fred_dirname
         
     | 
| 
      
 24 
     | 
    
         
            +
            #
         
     | 
| 
      
 25 
     | 
    
         
            +
            # constructs a directory name:
         
     | 
| 
      
 26 
     | 
    
         
            +
            # fred data directory / experiment ID / maindir / subdir
         
     | 
| 
      
 27 
     | 
    
         
            +
            #
         
     | 
| 
      
 28 
     | 
    
         
            +
            # if is_existing == existing, the directory is checked for existence,
         
     | 
| 
      
 29 
     | 
    
         
            +
            # if is_existing == new, it is created if necessary
         
     | 
| 
      
 30 
     | 
    
         
            +
            #
         
     | 
| 
      
 31 
     | 
    
         
            +
            # returns: a string
         
     | 
| 
      
 32 
     | 
    
         
            +
            def fred_dirname(exp,             # FredConfigData object
         
     | 
| 
      
 33 
     | 
    
         
            +
                             maindir,         # string: main part of directory name
         
     | 
| 
      
 34 
     | 
    
         
            +
                             subdir,          # string: subpart of directory name
         
     | 
| 
      
 35 
     | 
    
         
            +
                             is_existing = "existing")  # string: "existing" or "new", default: existing
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
      
 37 
     | 
    
         
            +
              case is_existing
         
     | 
| 
      
 38 
     | 
    
         
            +
              when "existing"
         
     | 
| 
      
 39 
     | 
    
         
            +
                return File.existing_dir(exp.get("fred_directory"),
         
     | 
| 
      
 40 
     | 
    
         
            +
                                     exp.get("experiment_ID"),
         
     | 
| 
      
 41 
     | 
    
         
            +
                                     maindir,
         
     | 
| 
      
 42 
     | 
    
         
            +
                                     subdir)
         
     | 
| 
      
 43 
     | 
    
         
            +
              when "new"
         
     | 
| 
      
 44 
     | 
    
         
            +
                return File.new_dir(exp.get("fred_directory"),
         
     | 
| 
      
 45 
     | 
    
         
            +
                                     exp.get("experiment_ID"),
         
     | 
| 
      
 46 
     | 
    
         
            +
                                     maindir,
         
     | 
| 
      
 47 
     | 
    
         
            +
                                     subdir)
         
     | 
| 
      
 48 
     | 
    
         
            +
              else
         
     | 
| 
      
 49 
     | 
    
         
            +
                raise "Shouldn't be here: #{is_existing}"
         
     | 
| 
      
 50 
     | 
    
         
            +
              end
         
     | 
| 
      
 51 
     | 
    
         
            +
            end
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
            ####
         
     | 
| 
      
 54 
     | 
    
         
            +
            # filenames for feature files
         
     | 
| 
      
 55 
     | 
    
         
            +
            def fred_feature_filename(lemma, sense = nil, 
         
     | 
| 
      
 56 
     | 
    
         
            +
            			  do_binary = false)
         
     | 
| 
      
 57 
     | 
    
         
            +
              if do_binary
         
     | 
| 
      
 58 
     | 
    
         
            +
                return "fred.features.#{lemma}.SENSE.#{sense}"
         
     | 
| 
      
 59 
     | 
    
         
            +
              else
         
     | 
| 
      
 60 
     | 
    
         
            +
                return "fred.features.#{lemma}"
         
     | 
| 
      
 61 
     | 
    
         
            +
              end
         
     | 
| 
      
 62 
     | 
    
         
            +
            end
         
     | 
| 
      
 63 
     | 
    
         
            +
             
     | 
| 
      
 64 
     | 
    
         
            +
            ####
         
     | 
| 
      
 65 
     | 
    
         
            +
            # filenames for split files
         
     | 
| 
      
 66 
     | 
    
         
            +
            def fred_split_filename(lemma)
         
     | 
| 
      
 67 
     | 
    
         
            +
              return "fred.split.#{lemma}"
         
     | 
| 
      
 68 
     | 
    
         
            +
            end
         
     | 
| 
      
 69 
     | 
    
         
            +
             
     | 
| 
      
 70 
     | 
    
         
            +
            ###
         
     | 
| 
      
 71 
     | 
    
         
            +
            # deconstruct split filename
         
     | 
| 
      
 72 
     | 
    
         
            +
            # returns: lemma
         
     | 
| 
      
 73 
     | 
    
         
            +
            def deconstruct_fred_split_filename(filename)
         
     | 
| 
      
 74 
     | 
    
         
            +
              basename = File.basename(filename)
         
     | 
| 
      
 75 
     | 
    
         
            +
              if basename =~ /^fred\.split\.(.*)/
         
     | 
| 
      
 76 
     | 
    
         
            +
                return $1
         
     | 
| 
      
 77 
     | 
    
         
            +
              else
         
     | 
| 
      
 78 
     | 
    
         
            +
                return nil
         
     | 
| 
      
 79 
     | 
    
         
            +
              end
         
     | 
| 
      
 80 
     | 
    
         
            +
            end
         
     | 
| 
      
 81 
     | 
    
         
            +
             
     | 
| 
      
 82 
     | 
    
         
            +
            ###
         
     | 
| 
      
 83 
     | 
    
         
            +
            # deconstruct feature file name
         
     | 
| 
      
 84 
     | 
    
         
            +
            # returns: hash with keys
         
     | 
| 
      
 85 
     | 
    
         
            +
            # "lemma"
         
     | 
| 
      
 86 
     | 
    
         
            +
            # "sense
         
     | 
| 
      
 87 
     | 
    
         
            +
            def deconstruct_fred_feature_filename(filename)
         
     | 
| 
      
 88 
     | 
    
         
            +
             
     | 
| 
      
 89 
     | 
    
         
            +
              basename = File.basename(filename)
         
     | 
| 
      
 90 
     | 
    
         
            +
              retv = Hash.new()
         
     | 
| 
      
 91 
     | 
    
         
            +
              # binary: 
         
     | 
| 
      
 92 
     | 
    
         
            +
              # fred.features.#{lemma}.SENSE.#{sense}
         
     | 
| 
      
 93 
     | 
    
         
            +
              if basename =~ /^fred\.features\.(.*)\.SENSE\.(.*)$/
         
     | 
| 
      
 94 
     | 
    
         
            +
                retv["lemma"] = $1
         
     | 
| 
      
 95 
     | 
    
         
            +
                retv["sense"] = $2
         
     | 
| 
      
 96 
     | 
    
         
            +
              elsif basename =~ /^fred\.features\.(.*)/
         
     | 
| 
      
 97 
     | 
    
         
            +
                # fred.features.#{lemma}
         
     | 
| 
      
 98 
     | 
    
         
            +
                retv["lemma"] = $1
         
     | 
| 
      
 99 
     | 
    
         
            +
             
     | 
| 
      
 100 
     | 
    
         
            +
              else
         
     | 
| 
      
 101 
     | 
    
         
            +
                # complete mismatch
         
     | 
| 
      
 102 
     | 
    
         
            +
                return nil
         
     | 
| 
      
 103 
     | 
    
         
            +
              end
         
     | 
| 
      
 104 
     | 
    
         
            +
             
     | 
| 
      
 105 
     | 
    
         
            +
              return retv
         
     | 
| 
      
 106 
     | 
    
         
            +
            end
         
     | 
| 
      
 107 
     | 
    
         
            +
             
     | 
| 
      
 108 
     | 
    
         
            +
            ####
         
     | 
| 
      
 109 
     | 
    
         
            +
            # filename for answer key files
         
     | 
| 
      
 110 
     | 
    
         
            +
            def fred_answerkey_filename(lemma)
         
     | 
| 
      
 111 
     | 
    
         
            +
              return "fred.answerkey.#{lemma}"
         
     | 
| 
      
 112 
     | 
    
         
            +
            end
         
     | 
| 
      
 113 
     | 
    
         
            +
             
     | 
| 
      
 114 
     | 
    
         
            +
            ###
         
     | 
| 
      
 115 
     | 
    
         
            +
            # classifier directory
         
     | 
| 
      
 116 
     | 
    
         
            +
            def fred_classifier_directory(exp,     # FredConfigData object
         
     | 
| 
      
 117 
     | 
    
         
            +
                                          splitID = nil) # string or nil
         
     | 
| 
      
 118 
     | 
    
         
            +
             
     | 
| 
      
 119 
     | 
    
         
            +
              if exp.get("classifier_dir")
         
     | 
| 
      
 120 
     | 
    
         
            +
                # user-specified classifier directory
         
     | 
| 
      
 121 
     | 
    
         
            +
             
     | 
| 
      
 122 
     | 
    
         
            +
                if splitID
         
     | 
| 
      
 123 
     | 
    
         
            +
                  return File.new_dir(exp.get("classifier_dir"), splitID)
         
     | 
| 
      
 124 
     | 
    
         
            +
                else
         
     | 
| 
      
 125 
     | 
    
         
            +
                  return File.new_dir(exp.get("classifier_dir"))
         
     | 
| 
      
 126 
     | 
    
         
            +
                end
         
     | 
| 
      
 127 
     | 
    
         
            +
             
     | 
| 
      
 128 
     | 
    
         
            +
              else
         
     | 
| 
      
 129 
     | 
    
         
            +
                # my classifier directory
         
     | 
| 
      
 130 
     | 
    
         
            +
                if splitID
         
     | 
| 
      
 131 
     | 
    
         
            +
                  return fred_dirname(exp, "classifiers", splitID, "new")
         
     | 
| 
      
 132 
     | 
    
         
            +
                else
         
     | 
| 
      
 133 
     | 
    
         
            +
                  return fred_dirname(exp, "classifiers", "all", "new")
         
     | 
| 
      
 134 
     | 
    
         
            +
                end
         
     | 
| 
      
 135 
     | 
    
         
            +
              end
         
     | 
| 
      
 136 
     | 
    
         
            +
            end
         
     | 
| 
      
 137 
     | 
    
         
            +
             
     | 
| 
      
 138 
     | 
    
         
            +
            ###
         
     | 
| 
      
 139 
     | 
    
         
            +
            # classifier file
         
     | 
| 
      
 140 
     | 
    
         
            +
            def fred_classifier_filename(classifier, lemma, sense=nil)
         
     | 
| 
      
 141 
     | 
    
         
            +
              if sense
         
     | 
| 
      
 142 
     | 
    
         
            +
                return "fred.classif.#{classifier}.LEMMA.#{lemma}.SENSE.#{sense}"
         
     | 
| 
      
 143 
     | 
    
         
            +
              else
         
     | 
| 
      
 144 
     | 
    
         
            +
                return "fred.classif.#{classifier}.LEMMA.#{lemma}"
         
     | 
| 
      
 145 
     | 
    
         
            +
              end
         
     | 
| 
      
 146 
     | 
    
         
            +
            end
         
     | 
| 
      
 147 
     | 
    
         
            +
             
     | 
| 
      
 148 
     | 
    
         
            +
            def deconstruct_fred_classifier_filename(filename)
         
     | 
| 
      
 149 
     | 
    
         
            +
              retv = Hash.new()
         
     | 
| 
      
 150 
     | 
    
         
            +
              if filename =~ /^fred\.classif\.(.*)\.LEMMA\.(.*)\.SENSE\.(.*)$/
         
     | 
| 
      
 151 
     | 
    
         
            +
                retv["lemma"] = $2
         
     | 
| 
      
 152 
     | 
    
         
            +
                retv["sense"] = $3
         
     | 
| 
      
 153 
     | 
    
         
            +
              elsif filename =~ /^fred\.classif\.(.*)\.LEMMA\.(.*)$/
         
     | 
| 
      
 154 
     | 
    
         
            +
                retv["lemma"] = $2
         
     | 
| 
      
 155 
     | 
    
         
            +
              end
         
     | 
| 
      
 156 
     | 
    
         
            +
              return retv
         
     | 
| 
      
 157 
     | 
    
         
            +
            end
         
     | 
| 
      
 158 
     | 
    
         
            +
             
     | 
| 
      
 159 
     | 
    
         
            +
            ###
         
     | 
| 
      
 160 
     | 
    
         
            +
            # result file
         
     | 
| 
      
 161 
     | 
    
         
            +
            def fred_result_filename(lemma)
         
     | 
| 
      
 162 
     | 
    
         
            +
              return "fred.result.#{lemma.gsub(/\./, "_")}"
         
     | 
| 
      
 163 
     | 
    
         
            +
            end
         
     | 
| 
      
 164 
     | 
    
         
            +
             
     | 
| 
      
 165 
     | 
    
         
            +
            ##########
         
     | 
| 
      
 166 
     | 
    
         
            +
            # lemma and POS: combine into string separated by 
         
     | 
| 
      
 167 
     | 
    
         
            +
            # a separator character
         
     | 
| 
      
 168 
     | 
    
         
            +
            #
         
     | 
| 
      
 169 
     | 
    
         
            +
            # fred_lemmapos_combine: take two strings, return combined string
         
     | 
| 
      
 170 
     | 
    
         
            +
            #      if POS is nil, returns lemma<separator character>
         
     | 
| 
      
 171 
     | 
    
         
            +
            # fred_lemmapos_separate: take one string, return two strings
         
     | 
| 
      
 172 
     | 
    
         
            +
            #      if no POS could be retrieved, returns nil as POS and the whole string as lemma
         
     | 
| 
      
 173 
     | 
    
         
            +
            def fred_lemmapos_combine(lemma, # string
         
     | 
| 
      
 174 
     | 
    
         
            +
            			  pos)   # string
         
     | 
| 
      
 175 
     | 
    
         
            +
              return lemma.to_s + "." + pos.to_s.gsub(/\./, "DOT")
         
     | 
| 
      
 176 
     | 
    
         
            +
            end
         
     | 
| 
      
 177 
     | 
    
         
            +
             
     | 
| 
      
 178 
     | 
    
         
            +
            ###
         
     | 
| 
      
 179 
     | 
    
         
            +
            def fred_lemmapos_separate(lemmapos)  # string
         
     | 
| 
      
 180 
     | 
    
         
            +
              pieces = lemmapos.split(".")
         
     | 
| 
      
 181 
     | 
    
         
            +
              if pieces.length() > 1
         
     | 
| 
      
 182 
     | 
    
         
            +
            	return [ pieces[0..-2].join("."), pieces[-1] ]
         
     | 
| 
      
 183 
     | 
    
         
            +
              else
         
     | 
| 
      
 184 
     | 
    
         
            +
                # no POS found, treat all of lemmapos as lemma
         
     | 
| 
      
 185 
     | 
    
         
            +
                return [ lemmapos, nil ]
         
     | 
| 
      
 186 
     | 
    
         
            +
              end
         
     | 
| 
      
 187 
     | 
    
         
            +
            end
         
     | 
| 
      
 188 
     | 
    
         
            +
            end
         
     | 
| 
      
 189 
     | 
    
         
            +
             
     | 
| 
      
 190 
     | 
    
         
            +
            ########################################
         
     | 
| 
      
 191 
     | 
    
         
            +
            # given a SynNode object representing a terminal,
         
     | 
| 
      
 192 
     | 
    
         
            +
            # return:
         
     | 
| 
      
 193 
     | 
    
         
            +
            # - the word
         
     | 
| 
      
 194 
     | 
    
         
            +
            # - the lemma
         
     | 
| 
      
 195 
     | 
    
         
            +
            # - the part of speech
         
     | 
| 
      
 196 
     | 
    
         
            +
            # - the named entity (if any)
         
     | 
| 
      
 197 
     | 
    
         
            +
            #
         
     | 
| 
      
 198 
     | 
    
         
            +
            # as a tuple
         
     | 
| 
      
 199 
     | 
    
         
            +
            #
         
     | 
| 
      
 200 
     | 
    
         
            +
            # WARNING: word and lemma are turned to lowercase
         
     | 
| 
      
 201 
     | 
    
         
            +
            module WordLemmaPosNe
         
     | 
| 
      
 202 
     | 
    
         
            +
              def word_lemma_pos_ne(syn_obj, # SynNode object
         
     | 
| 
      
 203 
     | 
    
         
            +
                                    i)       # SynInterpreter class
         
     | 
| 
      
 204 
     | 
    
         
            +
                unless syn_obj.is_terminal?
         
     | 
| 
      
 205 
     | 
    
         
            +
                  $stderr.puts "Featurization warning: unexpectedly received non-terminal"
         
     | 
| 
      
 206 
     | 
    
         
            +
                  return [ nil, nil, nil, nil ]
         
     | 
| 
      
 207 
     | 
    
         
            +
                end
         
     | 
| 
      
 208 
     | 
    
         
            +
             
     | 
| 
      
 209 
     | 
    
         
            +
                word = syn_obj.word()
         
     | 
| 
      
 210 
     | 
    
         
            +
                if word
         
     | 
| 
      
 211 
     | 
    
         
            +
                  word.downcase!
         
     | 
| 
      
 212 
     | 
    
         
            +
                end
         
     | 
| 
      
 213 
     | 
    
         
            +
             
     | 
| 
      
 214 
     | 
    
         
            +
                lemma = i.lemma_backoff(syn_obj)
         
     | 
| 
      
 215 
     | 
    
         
            +
                if lemma and SalsaTigerXMLHelper.unescape(lemma) == "<unknown>"
         
     | 
| 
      
 216 
     | 
    
         
            +
                  lemma = nil
         
     | 
| 
      
 217 
     | 
    
         
            +
                end
         
     | 
| 
      
 218 
     | 
    
         
            +
                if lemma
         
     | 
| 
      
 219 
     | 
    
         
            +
                  lemma.downcase!
         
     | 
| 
      
 220 
     | 
    
         
            +
                end
         
     | 
| 
      
 221 
     | 
    
         
            +
             
     | 
| 
      
 222 
     | 
    
         
            +
                pos = syn_obj.part_of_speech()
         
     | 
| 
      
 223 
     | 
    
         
            +
             
     | 
| 
      
 224 
     | 
    
         
            +
                ne = syn_obj.get_attribute("ne")
         
     | 
| 
      
 225 
     | 
    
         
            +
                unless ne
         
     | 
| 
      
 226 
     | 
    
         
            +
                  ne = syn_obj.get_attribute("headof_ne")
         
     | 
| 
      
 227 
     | 
    
         
            +
                end
         
     | 
| 
      
 228 
     | 
    
         
            +
             
     | 
| 
      
 229 
     | 
    
         
            +
                return [word, lemma, pos, ne]
         
     | 
| 
      
 230 
     | 
    
         
            +
              end
         
     | 
| 
      
 231 
     | 
    
         
            +
            end
         
     | 
| 
      
 232 
     | 
    
         
            +
             
     | 
| 
         @@ -0,0 +1,324 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require "fred/FileZipped"
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require "fred/FredConfigData"
         
     | 
| 
      
 4 
     | 
    
         
            +
            require "common/SynInterfaces"
         
     | 
| 
      
 5 
     | 
    
         
            +
            require "fred/FredConventions"
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
            ########################################
         
     | 
| 
      
 9 
     | 
    
         
            +
            # target determination classes:
         
     | 
| 
      
 10 
     | 
    
         
            +
            # either determine targets from existing annotation
         
     | 
| 
      
 11 
     | 
    
         
            +
            # with frames,
         
     | 
| 
      
 12 
     | 
    
         
            +
            # or use all known targets.
         
     | 
| 
      
 13 
     | 
    
         
            +
            class Targets
         
     | 
| 
      
 14 
     | 
    
         
            +
              attr_reader :targets_okay
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
              ###
         
     | 
| 
      
 17 
     | 
    
         
            +
              def initialize(exp,                 # experiment file object
         
     | 
| 
      
 18 
     | 
    
         
            +
                             interpreter_class,   # SynInterpreter class, or nil
         
     | 
| 
      
 19 
     | 
    
         
            +
                             mode)                # string: "r", "w", "a", as in files
         
     | 
| 
      
 20 
     | 
    
         
            +
                @exp = exp
         
     | 
| 
      
 21 
     | 
    
         
            +
                @interpreter_class = interpreter_class
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
                # keep recorded targets here.
         
     | 
| 
      
 24 
     | 
    
         
            +
                # try to read old list now.
         
     | 
| 
      
 25 
     | 
    
         
            +
                @targets = Hash.new()
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
                # write target info in the classifier directory.
         
     | 
| 
      
 28 
     | 
    
         
            +
                # This is _not_ dependent on a potential split ID
         
     | 
| 
      
 29 
     | 
    
         
            +
                @dir = File.new_dir(fred_classifier_directory(@exp), "targets")
         
     | 
| 
      
 30 
     | 
    
         
            +
             
     | 
| 
      
 31 
     | 
    
         
            +
                @targets_okay = true
         
     | 
| 
      
 32 
     | 
    
         
            +
                case mode
         
     | 
| 
      
 33 
     | 
    
         
            +
                when "w"
         
     | 
| 
      
 34 
     | 
    
         
            +
                  # start from scratch, no list of targets
         
     | 
| 
      
 35 
     | 
    
         
            +
                when "a", "r"
         
     | 
| 
      
 36 
     | 
    
         
            +
                  # read existing file containing targets
         
     | 
| 
      
 37 
     | 
    
         
            +
                  begin
         
     | 
| 
      
 38 
     | 
    
         
            +
                    file = FileZipped.new(@dir + "targets.txt.gz")
         
     | 
| 
      
 39 
     | 
    
         
            +
                  rescue
         
     | 
| 
      
 40 
     | 
    
         
            +
                    # no pickle present: signal this
         
     | 
| 
      
 41 
     | 
    
         
            +
                    @targets_okay = false
         
     | 
| 
      
 42 
     | 
    
         
            +
                    return
         
     | 
| 
      
 43 
     | 
    
         
            +
                  end
         
     | 
| 
      
 44 
     | 
    
         
            +
                  file.each { |line|
         
     | 
| 
      
 45 
     | 
    
         
            +
                    line.chomp!
         
     | 
| 
      
 46 
     | 
    
         
            +
                    if line =~ /^LEMMA (.+) SENSES (.+)$/
         
     | 
| 
      
 47 
     | 
    
         
            +
                      lemmapos = $1
         
     | 
| 
      
 48 
     | 
    
         
            +
                      senses = $2.split()
         
     | 
| 
      
 49 
     | 
    
         
            +
                      lemmapos.gsub!(/ /, '_')
         
     | 
| 
      
 50 
     | 
    
         
            +
                      #lemmapos.gsub!(/\.[A-Z]\./, '.')
         
     | 
| 
      
 51 
     | 
    
         
            +
                     @targets[lemmapos] = senses
         
     | 
| 
      
 52 
     | 
    
         
            +
                    end
         
     | 
| 
      
 53 
     | 
    
         
            +
                  }
         
     | 
| 
      
 54 
     | 
    
         
            +
             
     | 
| 
      
 55 
     | 
    
         
            +
                else
         
     | 
| 
      
 56 
     | 
    
         
            +
                  $stderr.puts "Error: shouldn't be here."
         
     | 
| 
      
 57 
     | 
    
         
            +
                  exit 1
         
     | 
| 
      
 58 
     | 
    
         
            +
                end
         
     | 
| 
      
 59 
     | 
    
         
            +
             
     | 
| 
      
 60 
     | 
    
         
            +
                if ["w", "a"].include? mode
         
     | 
| 
      
 61 
     | 
    
         
            +
                  @record_targets = true
         
     | 
| 
      
 62 
     | 
    
         
            +
                else
         
     | 
| 
      
 63 
     | 
    
         
            +
                  @record_targets = false
         
     | 
| 
      
 64 
     | 
    
         
            +
                end
         
     | 
| 
      
 65 
     | 
    
         
            +
              end
         
     | 
| 
      
 66 
     | 
    
         
            +
             
     | 
| 
      
 67 
     | 
    
         
            +
              ###
         
     | 
| 
      
 68 
     | 
    
         
            +
              # determine_targets:
         
     | 
| 
      
 69 
     | 
    
         
            +
              # for a given SalsaTigerSentence,
         
     | 
| 
      
 70 
     | 
    
         
            +
              # determine all targets,
         
     | 
| 
      
 71 
     | 
    
         
            +
              # each as a _single_ main terminal node
         
     | 
| 
      
 72 
     | 
    
         
            +
              #
         
     | 
| 
      
 73 
     | 
    
         
            +
              # We need a single terminal node in order
         
     | 
| 
      
 74 
     | 
    
         
            +
              # to compute the context window
         
     | 
| 
      
 75 
     | 
    
         
            +
              #
         
     | 
| 
      
 76 
     | 
    
         
            +
              # returns:
         
     | 
| 
      
 77 
     | 
    
         
            +
              #  hash: target_IDs -> list of senses
         
     | 
| 
      
 78 
     | 
    
         
            +
              #   where target_IDs is a pair [list of terminal IDs, main terminal ID]
         
     | 
| 
      
 79 
     | 
    
         
            +
              #  
         
     | 
| 
      
 80 
     | 
    
         
            +
              #  where a sense is represented as a hash:
         
     | 
| 
      
 81 
     | 
    
         
            +
              #  "sense": sense, a string
         
     | 
| 
      
 82 
     | 
    
         
            +
              #  "obj":   FrameNode object
         
     | 
| 
      
 83 
     | 
    
         
            +
              #  "all_targets": list of node IDs, may comprise more than a single node
         
     | 
| 
      
 84 
     | 
    
         
            +
              #  "lex":   lemma, or multiword expression in canonical form
         
     | 
| 
      
 85 
     | 
    
         
            +
              #  "sid": sentence ID
         
     | 
| 
      
 86 
     | 
    
         
            +
              def determine_targets(sent)
         
     | 
| 
      
 87 
     | 
    
         
            +
                raise "overwrite me"
         
     | 
| 
      
 88 
     | 
    
         
            +
              end
         
     | 
| 
      
 89 
     | 
    
         
            +
             
     | 
| 
      
 90 
     | 
    
         
            +
              ##
         
     | 
| 
      
 91 
     | 
    
         
            +
              # returns a list of lemma-pos combined strings
         
     | 
| 
      
 92 
     | 
    
         
            +
              def get_lemmas()
         
     | 
| 
      
 93 
     | 
    
         
            +
                return @targets.keys()
         
     | 
| 
      
 94 
     | 
    
         
            +
              end
         
     | 
| 
      
 95 
     | 
    
         
            +
             
     | 
| 
      
 96 
     | 
    
         
            +
              ##
         
     | 
| 
      
 97 
     | 
    
         
            +
              # access to lemmas and POS, returns a list of pairs [lemma, pos] (string*string)
         
     | 
| 
      
 98 
     | 
    
         
            +
              def get_lemma_pos()
         
     | 
| 
      
 99 
     | 
    
         
            +
             
     | 
| 
      
 100 
     | 
    
         
            +
                return @targets.keys().map { |lemmapos| fred_lemmapos_separate(lemmapos) }
         
     | 
| 
      
 101 
     | 
    
         
            +
              end
         
     | 
| 
      
 102 
     | 
    
         
            +
             
     | 
| 
      
 103 
     | 
    
         
            +
              ##
         
     | 
| 
      
 104 
     | 
    
         
            +
              # access to senses
         
     | 
| 
      
 105 
     | 
    
         
            +
              def get_senses(lemmapos) # string, result of fred_lemmapos_combine 
         
     | 
| 
      
 106 
     | 
    
         
            +
             
     | 
| 
      
 107 
     | 
    
         
            +
                if @targets[lemmapos]
         
     | 
| 
      
 108 
     | 
    
         
            +
                  return @targets[lemmapos]
         
     | 
| 
      
 109 
     | 
    
         
            +
                else
         
     | 
| 
      
 110 
     | 
    
         
            +
                  return []
         
     | 
| 
      
 111 
     | 
    
         
            +
                end
         
     | 
| 
      
 112 
     | 
    
         
            +
              end
         
     | 
| 
      
 113 
     | 
    
         
            +
             
     | 
| 
      
 114 
     | 
    
         
            +
              ##
         
     | 
| 
      
 115 
     | 
    
         
            +
              # write file
         
     | 
| 
      
 116 
     | 
    
         
            +
              def done_reading_targets()
         
     | 
| 
      
 117 
     | 
    
         
            +
                begin
         
     | 
| 
      
 118 
     | 
    
         
            +
                  file = FileZipped.new(@dir + "targets.txt.gz", "w")
         
     | 
| 
      
 119 
     | 
    
         
            +
                rescue
         
     | 
| 
      
 120 
     | 
    
         
            +
                  $stderr.puts "Error: Could not write file #{@dir}targets.txt.gz"
         
     | 
| 
      
 121 
     | 
    
         
            +
                  exit 1
         
     | 
| 
      
 122 
     | 
    
         
            +
                end
         
     | 
| 
      
 123 
     | 
    
         
            +
             
     | 
| 
      
 124 
     | 
    
         
            +
                @targets.each_pair { |lemma, senses|
         
     | 
| 
      
 125 
     | 
    
         
            +
                  file.puts "LEMMA #{lemma} SENSES "+ senses.join(" ")
         
     | 
| 
      
 126 
     | 
    
         
            +
                }
         
     | 
| 
      
 127 
     | 
    
         
            +
             
     | 
| 
      
 128 
     | 
    
         
            +
                file.close()
         
     | 
| 
      
 129 
     | 
    
         
            +
              end
         
     | 
| 
      
 130 
     | 
    
         
            +
             
     | 
| 
      
 131 
     | 
    
         
            +
              ###############################
         
     | 
| 
      
 132 
     | 
    
         
            +
              protected
         
     | 
| 
      
 133 
     | 
    
         
            +
              
         
     | 
| 
      
 134 
     | 
    
         
            +
              ##
         
     | 
| 
      
 135 
     | 
    
         
            +
              # record: record occurrence of a lemma/sense pair
         
     | 
| 
      
 136 
     | 
    
         
            +
              # @targets data structure
         
     | 
| 
      
 137 
     | 
    
         
            +
              def record(target_info) 
         
     | 
| 
      
 138 
     | 
    
         
            +
                lemmapos = fred_lemmapos_combine(target_info["lex"], target_info["pos"])
         
     | 
| 
      
 139 
     | 
    
         
            +
                unless @targets[lemmapos]
         
     | 
| 
      
 140 
     | 
    
         
            +
                  @targets[lemmapos] = Array.new
         
     | 
| 
      
 141 
     | 
    
         
            +
                end
         
     | 
| 
      
 142 
     | 
    
         
            +
             
         
     | 
| 
      
 143 
     | 
    
         
            +
                unless @targets[lemmapos].include? target_info["sense"]
         
     | 
| 
      
 144 
     | 
    
         
            +
                  @targets[lemmapos] << target_info["sense"]
         
     | 
| 
      
 145 
     | 
    
         
            +
                end
         
     | 
| 
      
 146 
     | 
    
         
            +
              end
         
     | 
| 
      
 147 
     | 
    
         
            +
            end
         
     | 
| 
      
 148 
     | 
    
         
            +
             
     | 
| 
      
 149 
     | 
    
         
            +
            ########################################
         
     | 
| 
      
 150 
     | 
    
         
            +
            class FindTargetsFromFrames < Targets
         
     | 
| 
      
 151 
     | 
    
         
            +
              ###
         
     | 
| 
      
 152 
     | 
    
         
            +
              # determine_targets:
         
     | 
| 
      
 153 
     | 
    
         
            +
              # use existing frames to find targets
         
     | 
| 
      
 154 
     | 
    
         
            +
              #
         
     | 
| 
      
 155 
     | 
    
         
            +
              # returns:
         
     | 
| 
      
 156 
     | 
    
         
            +
              #  hash: target_IDs -> list of senses
         
     | 
| 
      
 157 
     | 
    
         
            +
              #   where target_IDs is a pair [list of terminal IDs, main terminal ID]
         
     | 
| 
      
 158 
     | 
    
         
            +
              #  
         
     | 
| 
      
 159 
     | 
    
         
            +
              #  where a sense is represented as a hash:
         
     | 
| 
      
 160 
     | 
    
         
            +
              #  "sense": sense, a string
         
     | 
| 
      
 161 
     | 
    
         
            +
              #  "obj":   FrameNode object
         
     | 
| 
      
 162 
     | 
    
         
            +
              #  "all_targets": list of node IDs, may comprise more than a single node
         
     | 
| 
      
 163 
     | 
    
         
            +
              #  "lex":   lemma, or multiword expression in canonical form
         
     | 
| 
      
 164 
     | 
    
         
            +
              #  "sid": sentence ID
         
     | 
| 
      
 165 
     | 
    
         
            +
              def determine_targets(st_sent) #SalsaTigerSentence object
         
     | 
| 
      
 166 
     | 
    
         
            +
               retv = Hash.new()
         
     | 
| 
      
 167 
     | 
    
         
            +
                st_sent.each_frame { |frame_obj|
         
     | 
| 
      
 168 
     | 
    
         
            +
                  # instance-specific computation:
         
     | 
| 
      
 169 
     | 
    
         
            +
                  # target and target positions
         
     | 
| 
      
 170 
     | 
    
         
            +
                  # WARNING: at this moment, we are 
         
     | 
| 
      
 171 
     | 
    
         
            +
                  # not considering true multiword targets for German.
         
     | 
| 
      
 172 
     | 
    
         
            +
                  # Remove the "no_mwe" parameter in main_node_of_expr
         
     | 
| 
      
 173 
     | 
    
         
            +
                  # to change this
         
     | 
| 
      
 174 
     | 
    
         
            +
                  term = nil
         
     | 
| 
      
 175 
     | 
    
         
            +
                  all_targets = nil
         
     | 
| 
      
 176 
     | 
    
         
            +
                  if frame_obj.target.nil? or frame_obj.target.children.empty?
         
     | 
| 
      
 177 
     | 
    
         
            +
                    # no target, nothing to record
         
     | 
| 
      
 178 
     | 
    
         
            +
             
     | 
| 
      
 179 
     | 
    
         
            +
                  elsif @exp.get("language") == "de"
         
     | 
| 
      
 180 
     | 
    
         
            +
                    # don't consider true multiword targets for German
         
     | 
| 
      
 181 
     | 
    
         
            +
                    all_targets = frame_obj.target.children()
         
     | 
| 
      
 182 
     | 
    
         
            +
                    term = @interpreter_class.main_node_of_expr(all_targets, "no_mwe")
         
     | 
| 
      
 183 
     | 
    
         
            +
             
     | 
| 
      
 184 
     | 
    
         
            +
                  else
         
     | 
| 
      
 185 
     | 
    
         
            +
                    # for all other languages: try to figure out the head target word
         
     | 
| 
      
 186 
     | 
    
         
            +
                    # anyway
         
     | 
| 
      
 187 
     | 
    
         
            +
                    all_targets = frame_obj.target.children()
         
     | 
| 
      
 188 
     | 
    
         
            +
                    term = @interpreter_class.main_node_of_expr(all_targets)
         
     | 
| 
      
 189 
     | 
    
         
            +
                  end
         
     | 
| 
      
 190 
     | 
    
         
            +
             
     | 
| 
      
 191 
     | 
    
         
            +
                  if term and term.is_splitword?
         
     | 
| 
      
 192 
     | 
    
         
            +
                    # don't use parts of a word as main node
         
     | 
| 
      
 193 
     | 
    
         
            +
                      term = term.parent()
         
     | 
| 
      
 194 
     | 
    
         
            +
                  end
         
     | 
| 
      
 195 
     | 
    
         
            +
                  if term and term.is_terminal?
         
     | 
| 
      
 196 
     | 
    
         
            +
                    key = [all_targets.map { |t| t.id() }, term.id()]
         
     | 
| 
      
 197 
     | 
    
         
            +
             
     | 
| 
      
 198 
     | 
    
         
            +
                    unless retv[key]
         
     | 
| 
      
 199 
     | 
    
         
            +
                      retv[key] = Array.new()
         
     | 
| 
      
 200 
     | 
    
         
            +
                    end
         
     | 
| 
      
 201 
     | 
    
         
            +
             
     | 
| 
      
 202 
     | 
    
         
            +
                    pos = frame_obj.target().get_attribute("pos")
         
     | 
| 
      
 203 
     | 
    
         
            +
                    # gold POS available, may be in wrong form,
         
     | 
| 
      
 204 
     | 
    
         
            +
                    # i.e. not the same strings that @interpreter_class.category()
         
     | 
| 
      
 205 
     | 
    
         
            +
                    # would return
         
     | 
| 
      
 206 
     | 
    
         
            +
                    case pos
         
     | 
| 
      
 207 
     | 
    
         
            +
                    when /^[Vv]$/
         
     | 
| 
      
 208 
     | 
    
         
            +
                      pos = "verb"
         
     | 
| 
      
 209 
     | 
    
         
            +
                    when /^[Nn]$/
         
     | 
| 
      
 210 
     | 
    
         
            +
                      pos = "noun"
         
     | 
| 
      
 211 
     | 
    
         
            +
                    when /^[Aa]$/
         
     | 
| 
      
 212 
     | 
    
         
            +
                      pos = "adj"
         
     | 
| 
      
 213 
     | 
    
         
            +
                    when nil
         
     | 
| 
      
 214 
     | 
    
         
            +
                      pos = @interpreter_class.category(term)
         
     | 
| 
      
 215 
     | 
    
         
            +
                    end
         
     | 
| 
      
 216 
     | 
    
         
            +
             
     | 
| 
      
 217 
     | 
    
         
            +
                    target_info = {
         
     | 
| 
      
 218 
     | 
    
         
            +
                      "sense" => frame_obj.name(),
         
     | 
| 
      
 219 
     | 
    
         
            +
                      "obj" => frame_obj,
         
     | 
| 
      
 220 
     | 
    
         
            +
                      "all_targets" => frame_obj.target.children().map { |ch| ch.id() },
         
     | 
| 
      
 221 
     | 
    
         
            +
                      "lex" => frame_obj.target().get_attribute("lemma"),
         
     | 
| 
      
 222 
     | 
    
         
            +
                      "pos" => pos,
         
     | 
| 
      
 223 
     | 
    
         
            +
                      "sid" => st_sent.id()
         
     | 
| 
      
 224 
     | 
    
         
            +
                    }
         
     | 
| 
      
 225 
     | 
    
         
            +
                  #print "lex ", frame_obj.target(), " und ",frame_obj.target().get_attribute("lemma"), "\n"
         
     | 
| 
      
 226 
     | 
    
         
            +
                    retv[key] << target_info
         
     | 
| 
      
 227 
     | 
    
         
            +
                    if @record_targets
         
     | 
| 
      
 228 
     | 
    
         
            +
                      record(target_info)
         
     | 
| 
      
 229 
     | 
    
         
            +
                    end
         
     | 
| 
      
 230 
     | 
    
         
            +
                  end
         
     | 
| 
      
 231 
     | 
    
         
            +
                }
         
     | 
| 
      
 232 
     | 
    
         
            +
                return retv
         
     | 
| 
      
 233 
     | 
    
         
            +
              end
         
     | 
| 
      
 234 
     | 
    
         
            +
            end
         
     | 
| 
      
 235 
     | 
    
         
            +
             
     | 
| 
      
 236 
     | 
    
         
            +
            ########################################
         
     | 
| 
      
 237 
     | 
    
         
            +
            class FindAllTargets < Targets
         
     | 
| 
      
 238 
     | 
    
         
            +
              ###
         
     | 
| 
      
 239 
     | 
    
         
            +
              # determine_targets:
         
     | 
| 
      
 240 
     | 
    
         
            +
              # use all known lemmas, minus stopwords
         
     | 
| 
      
 241 
     | 
    
         
            +
              def initialize(exp,
         
     | 
| 
      
 242 
     | 
    
         
            +
                             interpreter_class)
         
     | 
| 
      
 243 
     | 
    
         
            +
                # read target info from file
         
     | 
| 
      
 244 
     | 
    
         
            +
                super(exp, interpreter_class, "r")
         
     | 
| 
      
 245 
     | 
    
         
            +
                @training_lemmapos_pairs = get_lemma_pos()
         
     | 
| 
      
 246 
     | 
    
         
            +
             
     | 
| 
      
 247 
     | 
    
         
            +
                get_senses(@training_lemmapos_pairs)
         
     | 
| 
      
 248 
     | 
    
         
            +
                # list of words to exclude from assignment, for now
         
     | 
| 
      
 249 
     | 
    
         
            +
                @stoplemmas = [
         
     | 
| 
      
 250 
     | 
    
         
            +
                               "have", 
         
     | 
| 
      
 251 
     | 
    
         
            +
                               "do", 
         
     | 
| 
      
 252 
     | 
    
         
            +
                               "be"
         
     | 
| 
      
 253 
     | 
    
         
            +
                               #      "make"
         
     | 
| 
      
 254 
     | 
    
         
            +
                              ]
         
     | 
| 
      
 255 
     | 
    
         
            +
              
         
     | 
| 
      
 256 
     | 
    
         
            +
              end
         
     | 
| 
      
 257 
     | 
    
         
            +
             
     | 
| 
      
 258 
     | 
    
         
            +
              ####
         
     | 
| 
      
 259 
     | 
    
         
            +
              #
         
     | 
| 
      
 260 
     | 
    
         
            +
              # returns:
         
     | 
| 
      
 261 
     | 
    
         
            +
              #  hash: target_IDs -> list of senses
         
     | 
| 
      
 262 
     | 
    
         
            +
              #   where target_IDs is a pair [list of terminal IDs, main terminal ID]
         
     | 
| 
      
 263 
     | 
    
         
            +
              #  
         
     | 
| 
      
 264 
     | 
    
         
            +
              #  where a sense is represented as a hash:
         
     | 
| 
      
 265 
     | 
    
         
            +
              #  "sense": sense, a string
         
     | 
| 
      
 266 
     | 
    
         
            +
              #  "obj":   FrameNode object
         
     | 
| 
      
 267 
     | 
    
         
            +
              #  "all_targets": list of node IDs, may comprise more than a single node
         
     | 
| 
      
 268 
     | 
    
         
            +
              #  "lex":   lemma, or multiword expression in canonical form
         
     | 
| 
      
 269 
     | 
    
         
            +
              #  "sid": sentence ID
         
     | 
| 
      
 270 
     | 
    
         
            +
              def determine_targets(sent) #SalsaTigerSentence object
         
     | 
| 
      
 271 
     | 
    
         
            +
                # map target IDs to list of senses, in our case always [ nil ]
         
     | 
| 
      
 272 
     | 
    
         
            +
                # because we assume that the senses of the targets we point out
         
     | 
| 
      
 273 
     | 
    
         
            +
                # are unknown
         
     | 
| 
      
 274 
     | 
    
         
            +
                retv = Hash.new()
         
     | 
| 
      
 275 
     | 
    
         
            +
                # iterate through terminals of the sentence, check for inclusion
         
     | 
| 
      
 276 
     | 
    
         
            +
                # of their lemma in @training_lemmas
         
     | 
| 
      
 277 
     | 
    
         
            +
                sent.each_terminal { |node|
         
     | 
| 
      
 278 
     | 
    
         
            +
                  # we know this lemma from the training data,
         
     | 
| 
      
 279 
     | 
    
         
            +
                  # and it is not an auxiliary,
         
     | 
| 
      
 280 
     | 
    
         
            +
                  # and it is not in the stopword list
         
     | 
| 
      
 281 
     | 
    
         
            +
                  # and the node does not represent a preposition
         
     | 
| 
      
 282 
     | 
    
         
            +
             
     | 
| 
      
 283 
     | 
    
         
            +
                  ### modified by ines, 17.10.2008
         
     | 
| 
      
 284 
     | 
    
         
            +
                  lemma = @interpreter_class.lemma_backoff(node)
         
     | 
| 
      
 285 
     | 
    
         
            +
                  pos = @interpreter_class.category(node)
         
     | 
| 
      
 286 
     | 
    
         
            +
             
     | 
| 
      
 287 
     | 
    
         
            +
            #	print "lemma ", lemma, " pos ", pos, "\n"
         
     | 
| 
      
 288 
     | 
    
         
            +
            #      reg = /\.[ANV]/
         
     | 
| 
      
 289 
     | 
    
         
            +
            #      if !reg.match(lemma) 
         
     | 
| 
      
 290 
     | 
    
         
            +
            #        if /verb/.match(pos) 
         
     | 
| 
      
 291 
     | 
    
         
            +
            #          lemma = lemma + ".V"
         
     | 
| 
      
 292 
     | 
    
         
            +
            #        elsif /noun/.match(pos) 
         
     | 
| 
      
 293 
     | 
    
         
            +
            #          lemma = lemma + ".N"
         
     | 
| 
      
 294 
     | 
    
         
            +
            #        elsif /adj/.match(pos) 
         
     | 
| 
      
 295 
     | 
    
         
            +
            #          lemma = lemma + ".A"
         
     | 
| 
      
 296 
     | 
    
         
            +
            #        end
         
     | 
| 
      
 297 
     | 
    
         
            +
            #        print "LEMMA ", lemma, " POS ", pos, "\n" 
         
     | 
| 
      
 298 
     | 
    
         
            +
            #      end
         
     | 
| 
      
 299 
     | 
    
         
            +
             
     | 
| 
      
 300 
     | 
    
         
            +
                  if (@training_lemmapos_pairs.include? [lemma, pos] and
         
     | 
| 
      
 301 
     | 
    
         
            +
                      not(@interpreter_class.auxiliary?(node)) and
         
     | 
| 
      
 302 
     | 
    
         
            +
                      not(@stoplemmas.include? lemma) and
         
     | 
| 
      
 303 
     | 
    
         
            +
                      not(pos == "prep"))
         
     | 
| 
      
 304 
     | 
    
         
            +
                      key = [ [ node.id() ], node.id() ]
         
     | 
| 
      
 305 
     | 
    
         
            +
             
     | 
| 
      
 306 
     | 
    
         
            +
                      # take this as a target.
         
     | 
| 
      
 307 
     | 
    
         
            +
                      retv[ key ] = [ 
         
     | 
| 
      
 308 
     | 
    
         
            +
                                     {
         
     | 
| 
      
 309 
     | 
    
         
            +
                                       "sense" => nil,
         
     | 
| 
      
 310 
     | 
    
         
            +
                                       "obj" => nil,
         
     | 
| 
      
 311 
     | 
    
         
            +
                                       "all_targets" => [ node.id() ],
         
     | 
| 
      
 312 
     | 
    
         
            +
                                       "lex" => lemma,
         
     | 
| 
      
 313 
     | 
    
         
            +
                                       "pos" => pos,
         
     | 
| 
      
 314 
     | 
    
         
            +
                                       "sid" => sent.id()
         
     | 
| 
      
 315 
     | 
    
         
            +
                                     } ]
         
     | 
| 
      
 316 
     | 
    
         
            +
                      # no recording of target info,
         
     | 
| 
      
 317 
     | 
    
         
            +
                      # since we haven't determined anything new
         
     | 
| 
      
 318 
     | 
    
         
            +
                    end
         
     | 
| 
      
 319 
     | 
    
         
            +
                }
         
     | 
| 
      
 320 
     | 
    
         
            +
             
     | 
| 
      
 321 
     | 
    
         
            +
                return retv
         
     | 
| 
      
 322 
     | 
    
         
            +
              end
         
     | 
| 
      
 323 
     | 
    
         
            +
            end
         
     | 
| 
      
 324 
     | 
    
         
            +
             
     |