frprep 0.0.1.prealpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.yardopts +8 -0
 - data/CHANGELOG.rdoc +0 -0
 - data/LICENSE.rdoc +0 -0
 - data/README.rdoc +0 -0
 - data/lib/common/AbstractSynInterface.rb +1227 -0
 - data/lib/common/BerkeleyInterface.rb +375 -0
 - data/lib/common/CollinsInterface.rb +1165 -0
 - data/lib/common/ConfigData.rb +694 -0
 - data/lib/common/Counter.rb +18 -0
 - data/lib/common/DBInterface.rb +48 -0
 - data/lib/common/EnduserMode.rb +27 -0
 - data/lib/common/Eval.rb +480 -0
 - data/lib/common/FixSynSemMapping.rb +196 -0
 - data/lib/common/FrPrepConfigData.rb +66 -0
 - data/lib/common/FrprepHelper.rb +1324 -0
 - data/lib/common/Graph.rb +345 -0
 - data/lib/common/ISO-8859-1.rb +24 -0
 - data/lib/common/ML.rb +186 -0
 - data/lib/common/Maxent.rb +215 -0
 - data/lib/common/MiniparInterface.rb +1388 -0
 - data/lib/common/Optimise.rb +195 -0
 - data/lib/common/Parser.rb +213 -0
 - data/lib/common/RegXML.rb +269 -0
 - data/lib/common/RosyConventions.rb +171 -0
 - data/lib/common/SQLQuery.rb +243 -0
 - data/lib/common/STXmlTerminalOrder.rb +194 -0
 - data/lib/common/SalsaTigerRegXML.rb +2347 -0
 - data/lib/common/SalsaTigerXMLHelper.rb +99 -0
 - data/lib/common/SleepyInterface.rb +384 -0
 - data/lib/common/SynInterfaces.rb +275 -0
 - data/lib/common/TabFormat.rb +720 -0
 - data/lib/common/Tiger.rb +1448 -0
 - data/lib/common/TntInterface.rb +44 -0
 - data/lib/common/Tree.rb +61 -0
 - data/lib/common/TreetaggerInterface.rb +303 -0
 - data/lib/common/headz.rb +338 -0
 - data/lib/common/option_parser.rb +13 -0
 - data/lib/common/ruby_class_extensions.rb +310 -0
 - data/lib/fred/Baseline.rb +150 -0
 - data/lib/fred/FileZipped.rb +31 -0
 - data/lib/fred/FredBOWContext.rb +863 -0
 - data/lib/fred/FredConfigData.rb +182 -0
 - data/lib/fred/FredConventions.rb +232 -0
 - data/lib/fred/FredDetermineTargets.rb +324 -0
 - data/lib/fred/FredEval.rb +312 -0
 - data/lib/fred/FredFeatureExtractors.rb +321 -0
 - data/lib/fred/FredFeatures.rb +1061 -0
 - data/lib/fred/FredFeaturize.rb +596 -0
 - data/lib/fred/FredNumTrainingSenses.rb +27 -0
 - data/lib/fred/FredParameters.rb +402 -0
 - data/lib/fred/FredSplit.rb +84 -0
 - data/lib/fred/FredSplitPkg.rb +180 -0
 - data/lib/fred/FredTest.rb +607 -0
 - data/lib/fred/FredTrain.rb +144 -0
 - data/lib/fred/PlotAndREval.rb +480 -0
 - data/lib/fred/fred.rb +45 -0
 - data/lib/fred/md5.rb +23 -0
 - data/lib/fred/opt_parser.rb +250 -0
 - data/lib/frprep/AbstractSynInterface.rb +1227 -0
 - data/lib/frprep/Ampersand.rb +37 -0
 - data/lib/frprep/BerkeleyInterface.rb +375 -0
 - data/lib/frprep/CollinsInterface.rb +1165 -0
 - data/lib/frprep/ConfigData.rb +694 -0
 - data/lib/frprep/Counter.rb +18 -0
 - data/lib/frprep/FNCorpusXML.rb +643 -0
 - data/lib/frprep/FNDatabase.rb +144 -0
 - data/lib/frprep/FixSynSemMapping.rb +196 -0
 - data/lib/frprep/FrPrepConfigData.rb +66 -0
 - data/lib/frprep/FrameXML.rb +513 -0
 - data/lib/frprep/FrprepHelper.rb +1324 -0
 - data/lib/frprep/Graph.rb +345 -0
 - data/lib/frprep/ISO-8859-1.rb +24 -0
 - data/lib/frprep/MiniparInterface.rb +1388 -0
 - data/lib/frprep/Parser.rb +213 -0
 - data/lib/frprep/RegXML.rb +269 -0
 - data/lib/frprep/STXmlTerminalOrder.rb +194 -0
 - data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
 - data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
 - data/lib/frprep/SleepyInterface.rb +384 -0
 - data/lib/frprep/SynInterfaces.rb +275 -0
 - data/lib/frprep/TabFormat.rb +720 -0
 - data/lib/frprep/Tiger.rb +1448 -0
 - data/lib/frprep/TntInterface.rb +44 -0
 - data/lib/frprep/Tree.rb +61 -0
 - data/lib/frprep/TreetaggerInterface.rb +303 -0
 - data/lib/frprep/do_parses.rb +142 -0
 - data/lib/frprep/frprep.rb +686 -0
 - data/lib/frprep/headz.rb +338 -0
 - data/lib/frprep/one_parsed_file.rb +28 -0
 - data/lib/frprep/opt_parser.rb +94 -0
 - data/lib/frprep/ruby_class_extensions.rb +310 -0
 - data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
 - data/lib/rosy/DBMySQL.rb +146 -0
 - data/lib/rosy/DBSQLite.rb +280 -0
 - data/lib/rosy/DBTable.rb +239 -0
 - data/lib/rosy/DBWrapper.rb +176 -0
 - data/lib/rosy/ExternalConfigData.rb +58 -0
 - data/lib/rosy/FailedParses.rb +130 -0
 - data/lib/rosy/FeatureInfo.rb +242 -0
 - data/lib/rosy/GfInduce.rb +1115 -0
 - data/lib/rosy/GfInduceFeature.rb +148 -0
 - data/lib/rosy/InputData.rb +294 -0
 - data/lib/rosy/RosyConfigData.rb +115 -0
 - data/lib/rosy/RosyConfusability.rb +338 -0
 - data/lib/rosy/RosyEval.rb +465 -0
 - data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
 - data/lib/rosy/RosyFeaturize.rb +280 -0
 - data/lib/rosy/RosyInspect.rb +336 -0
 - data/lib/rosy/RosyIterator.rb +477 -0
 - data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
 - data/lib/rosy/RosyPruning.rb +165 -0
 - data/lib/rosy/RosyServices.rb +744 -0
 - data/lib/rosy/RosySplit.rb +232 -0
 - data/lib/rosy/RosyTask.rb +19 -0
 - data/lib/rosy/RosyTest.rb +826 -0
 - data/lib/rosy/RosyTrain.rb +232 -0
 - data/lib/rosy/RosyTrainingTestTable.rb +786 -0
 - data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
 - data/lib/rosy/View.rb +418 -0
 - data/lib/rosy/opt_parser.rb +379 -0
 - data/lib/rosy/rosy.rb +77 -0
 - data/lib/shalmaneser/version.rb +3 -0
 - data/test/frprep/test_opt_parser.rb +94 -0
 - data/test/functional/functional_test_helper.rb +40 -0
 - data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
 - data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
 - data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
 - data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
 - data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
 - data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
 - data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
 - data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
 - data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
 - data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
 - data/test/functional/test_fred.rb +47 -0
 - data/test/functional/test_frprep.rb +52 -0
 - data/test/functional/test_rosy.rb +20 -0
 - metadata +270 -0
 
| 
         @@ -0,0 +1,176 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            ###########################
         
     | 
| 
      
 2 
     | 
    
         
            +
            # DBWrapper:
         
     | 
| 
      
 3 
     | 
    
         
            +
            # abstract class wrapping database interfaces,
         
     | 
| 
      
 4 
     | 
    
         
            +
            # so we can have both an interface to an SQL server
         
     | 
| 
      
 5 
     | 
    
         
            +
            # and an interface to SQLite in Shalmaneser
         
     | 
| 
      
 6 
     | 
    
         
            +
            class DBWrapper
         
     | 
| 
      
 7 
     | 
    
         
            +
              attr_reader :table_name
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
              ###
         
     | 
| 
      
 10 
     | 
    
         
            +
              def initialize(exp)  # RosyConfigData experiment file object
         
     | 
| 
      
 11 
     | 
    
         
            +
                # remember experiment file
         
     | 
| 
      
 12 
     | 
    
         
            +
                @exp = exp
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
                # open the database:
         
     | 
| 
      
 15 
     | 
    
         
            +
                # please set to some other value in subclass initialization
         
     | 
| 
      
 16 
     | 
    
         
            +
                @database = nil
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
                # name of default table to access: none
         
     | 
| 
      
 19 
     | 
    
         
            +
                @table_name = nil
         
     | 
| 
      
 20 
     | 
    
         
            +
              end
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
              ###
         
     | 
| 
      
 23 
     | 
    
         
            +
              # close DB access
         
     | 
| 
      
 24 
     | 
    
         
            +
              def close()
         
     | 
| 
      
 25 
     | 
    
         
            +
                @database.close()
         
     | 
| 
      
 26 
     | 
    
         
            +
              end
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
              ####
         
     | 
| 
      
 29 
     | 
    
         
            +
              # querying the database:
         
     | 
| 
      
 30 
     | 
    
         
            +
              # returns an DBResult object
         
     | 
| 
      
 31 
     | 
    
         
            +
              def query(query)
         
     | 
| 
      
 32 
     | 
    
         
            +
                raise "Overwrite me"
         
     | 
| 
      
 33 
     | 
    
         
            +
              end
         
     | 
| 
      
 34 
     | 
    
         
            +
             
     | 
| 
      
 35 
     | 
    
         
            +
              ####
         
     | 
| 
      
 36 
     | 
    
         
            +
              # querying the database:
         
     | 
| 
      
 37 
     | 
    
         
            +
              # no result value
         
     | 
| 
      
 38 
     | 
    
         
            +
              def query_noretv(query)
         
     | 
| 
      
 39 
     | 
    
         
            +
                raise "Overwrite me"
         
     | 
| 
      
 40 
     | 
    
         
            +
              end
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
      
 42 
     | 
    
         
            +
              ###
         
     | 
| 
      
 43 
     | 
    
         
            +
              # list all tables in the database:
         
     | 
| 
      
 44 
     | 
    
         
            +
              # no default here
         
     | 
| 
      
 45 
     | 
    
         
            +
              #
         
     | 
| 
      
 46 
     | 
    
         
            +
              # returns: list of strings
         
     | 
| 
      
 47 
     | 
    
         
            +
              def list_tables()
         
     | 
| 
      
 48 
     | 
    
         
            +
                raise "Overwrite me"
         
     | 
| 
      
 49 
     | 
    
         
            +
              end
         
     | 
| 
      
 50 
     | 
    
         
            +
             
     | 
| 
      
 51 
     | 
    
         
            +
              ###
         
     | 
| 
      
 52 
     | 
    
         
            +
              # make a table
         
     | 
| 
      
 53 
     | 
    
         
            +
              #
         
     | 
| 
      
 54 
     | 
    
         
            +
              # returns: nothing
         
     | 
| 
      
 55 
     | 
    
         
            +
              def create_table(table_name, # string
         
     | 
| 
      
 56 
     | 
    
         
            +
                               column_formats, # array: array: string*string [column_name,column_format]
         
     | 
| 
      
 57 
     | 
    
         
            +
                               index_column_names, # array: string: column_name
         
     | 
| 
      
 58 
     | 
    
         
            +
                               indexname)  # string: name of automatically created index column    
         
     | 
| 
      
 59 
     | 
    
         
            +
                raise "overwrite me"
         
     | 
| 
      
 60 
     | 
    
         
            +
              end
         
     | 
| 
      
 61 
     | 
    
         
            +
             
     | 
| 
      
 62 
     | 
    
         
            +
              ###
         
     | 
| 
      
 63 
     | 
    
         
            +
              # remove a table
         
     | 
| 
      
 64 
     | 
    
         
            +
              def drop_table(table_name)
         
     | 
| 
      
 65 
     | 
    
         
            +
                query_noretv("DROP TABLE " + table_name)
         
     | 
| 
      
 66 
     | 
    
         
            +
              end
         
     | 
| 
      
 67 
     | 
    
         
            +
             
     | 
| 
      
 68 
     | 
    
         
            +
              ###
         
     | 
| 
      
 69 
     | 
    
         
            +
              # list all column names of a table (no default)
         
     | 
| 
      
 70 
     | 
    
         
            +
              #
         
     | 
| 
      
 71 
     | 
    
         
            +
              # returns: array of strings
         
     | 
| 
      
 72 
     | 
    
         
            +
              def list_column_names(table_name)
         
     | 
| 
      
 73 
     | 
    
         
            +
                return list_column_formats(table_name).map { |col_name, col_format| col_name }
         
     | 
| 
      
 74 
     | 
    
         
            +
              end
         
     | 
| 
      
 75 
     | 
    
         
            +
             
     | 
| 
      
 76 
     | 
    
         
            +
              #####
         
     | 
| 
      
 77 
     | 
    
         
            +
              # list_column_formats
         
     | 
| 
      
 78 
     | 
    
         
            +
              #
         
     | 
| 
      
 79 
     | 
    
         
            +
              # list column names and column types of this table
         
     | 
| 
      
 80 
     | 
    
         
            +
              #
         
     | 
| 
      
 81 
     | 
    
         
            +
              # returns: array:string*string, list of pairs [column name, column format]
         
     | 
| 
      
 82 
     | 
    
         
            +
              def list_column_formats(table_name)
         
     | 
| 
      
 83 
     | 
    
         
            +
                raise "Overwrite me"
         
     | 
| 
      
 84 
     | 
    
         
            +
              end
         
     | 
| 
      
 85 
     | 
    
         
            +
             
     | 
| 
      
 86 
     | 
    
         
            +
              ####
         
     | 
| 
      
 87 
     | 
    
         
            +
              # num_rows
         
     | 
| 
      
 88 
     | 
    
         
            +
              #
         
     | 
| 
      
 89 
     | 
    
         
            +
              # determine the number of rows in a table
         
     | 
| 
      
 90 
     | 
    
         
            +
              # returns: integer
         
     | 
| 
      
 91 
     | 
    
         
            +
              def num_rows(table_name)
         
     | 
| 
      
 92 
     | 
    
         
            +
                raise "Overwrite me"
         
     | 
| 
      
 93 
     | 
    
         
            +
              end
         
     | 
| 
      
 94 
     | 
    
         
            +
             
     | 
| 
      
 95 
     | 
    
         
            +
              ####
         
     | 
| 
      
 96 
     | 
    
         
            +
              # make a temporary table: basically just make a table
         
     | 
| 
      
 97 
     | 
    
         
            +
              #
         
     | 
| 
      
 98 
     | 
    
         
            +
              # returns: DBWrapper object (or object of current subclass)
         
     | 
| 
      
 99 
     | 
    
         
            +
              # that has the @table_name attribute set to the name of a temporary DB
         
     | 
| 
      
 100 
     | 
    
         
            +
              def make_temp_table(column_formats, # array: string*string [column_name,column_format]
         
     | 
| 
      
 101 
     | 
    
         
            +
                                  index_column_names, # array: string: column_name
         
     | 
| 
      
 102 
     | 
    
         
            +
                                  indexname)  # string: name of autoincrement primary index
         
     | 
| 
      
 103 
     | 
    
         
            +
             
     | 
| 
      
 104 
     | 
    
         
            +
                temp_obj = self.clone()
         
     | 
| 
      
 105 
     | 
    
         
            +
                temp_obj.initialize_temp_table(column_formats, index_column_names, indexname)
         
     | 
| 
      
 106 
     | 
    
         
            +
                return temp_obj
         
     | 
| 
      
 107 
     | 
    
         
            +
              end
         
     | 
| 
      
 108 
     | 
    
         
            +
             
     | 
| 
      
 109 
     | 
    
         
            +
              def drop_temp_table()
         
     | 
| 
      
 110 
     | 
    
         
            +
                unless @table_name
         
     | 
| 
      
 111 
     | 
    
         
            +
                  raise "can only do drop_temp_table() for objects that have a temp table"
         
     | 
| 
      
 112 
     | 
    
         
            +
                end
         
     | 
| 
      
 113 
     | 
    
         
            +
                drop_table(@table_name)
         
     | 
| 
      
 114 
     | 
    
         
            +
              end
         
     | 
| 
      
 115 
     | 
    
         
            +
             
     | 
| 
      
 116 
     | 
    
         
            +
              ##############################
         
     | 
| 
      
 117 
     | 
    
         
            +
              protected
         
     | 
| 
      
 118 
     | 
    
         
            +
             
     | 
| 
      
 119 
     | 
    
         
            +
              def initialize_temp_table(column_formats, index_column_names, indexname)
         
     | 
| 
      
 120 
     | 
    
         
            +
                @table_name = "t" + Time.new().to_f().to_s().gsub(/\./, "")
         
     | 
| 
      
 121 
     | 
    
         
            +
                create_table(@table_name, column_formats, index_column_names, indexname)
         
     | 
| 
      
 122 
     | 
    
         
            +
              end
         
     | 
| 
      
 123 
     | 
    
         
            +
            end
         
     | 
| 
      
 124 
     | 
    
         
            +
             
     | 
| 
      
 125 
     | 
    
         
            +
             
     | 
| 
      
 126 
     | 
    
         
            +
             
     | 
| 
      
 127 
     | 
    
         
            +
             
     | 
| 
      
 128 
     | 
    
         
            +
            ######################################################################
         
     | 
| 
      
 129 
     | 
    
         
            +
            # DBResult:
         
     | 
| 
      
 130 
     | 
    
         
            +
            # abstract class keeping query results
         
     | 
| 
      
 131 
     | 
    
         
            +
            #
         
     | 
| 
      
 132 
     | 
    
         
            +
            # instantiate for the DB package used
         
     | 
| 
      
 133 
     | 
    
         
            +
            class DBResult
         
     | 
| 
      
 134 
     | 
    
         
            +
              ###
         
     | 
| 
      
 135 
     | 
    
         
            +
              # initialize with query result, and keep it
         
     | 
| 
      
 136 
     | 
    
         
            +
              def initialize(value)
         
     | 
| 
      
 137 
     | 
    
         
            +
                @result = value
         
     | 
| 
      
 138 
     | 
    
         
            +
              end
         
     | 
| 
      
 139 
     | 
    
         
            +
             
     | 
| 
      
 140 
     | 
    
         
            +
              # column names: NO DEFAULT
         
     | 
| 
      
 141 
     | 
    
         
            +
              def list_column_names()
         
     | 
| 
      
 142 
     | 
    
         
            +
                raise "Overwrite me"
         
     | 
| 
      
 143 
     | 
    
         
            +
              end
         
     | 
| 
      
 144 
     | 
    
         
            +
             
     | 
| 
      
 145 
     | 
    
         
            +
              # number of rows: returns an integer
         
     | 
| 
      
 146 
     | 
    
         
            +
              def num_rows()
         
     | 
| 
      
 147 
     | 
    
         
            +
                return @result.num_rows
         
     | 
| 
      
 148 
     | 
    
         
            +
              end
         
     | 
| 
      
 149 
     | 
    
         
            +
             
     | 
| 
      
 150 
     | 
    
         
            +
              # yields each row as an array of values
         
     | 
| 
      
 151 
     | 
    
         
            +
              def each()
         
     | 
| 
      
 152 
     | 
    
         
            +
                @result.each { |row| yield row }
         
     | 
| 
      
 153 
     | 
    
         
            +
              end
         
     | 
| 
      
 154 
     | 
    
         
            +
             
     | 
| 
      
 155 
     | 
    
         
            +
              # yields each row as a hash: column name=> column value
         
     | 
| 
      
 156 
     | 
    
         
            +
              def each_hash()
         
     | 
| 
      
 157 
     | 
    
         
            +
                @result.each_hash { |row_hash| yield row_hash }
         
     | 
| 
      
 158 
     | 
    
         
            +
              end
         
     | 
| 
      
 159 
     | 
    
         
            +
             
     | 
| 
      
 160 
     | 
    
         
            +
              # reset object, such that each() can be run again
         
     | 
| 
      
 161 
     | 
    
         
            +
              # DEFAULT DOES NOTHING, PLEASE OVERWRITE
         
     | 
| 
      
 162 
     | 
    
         
            +
              def reset()
         
     | 
| 
      
 163 
     | 
    
         
            +
              end
         
     | 
| 
      
 164 
     | 
    
         
            +
             
     | 
| 
      
 165 
     | 
    
         
            +
              # free result object
         
     | 
| 
      
 166 
     | 
    
         
            +
              def free()
         
     | 
| 
      
 167 
     | 
    
         
            +
                @result.free()
         
     | 
| 
      
 168 
     | 
    
         
            +
              end
         
     | 
| 
      
 169 
     | 
    
         
            +
             
     | 
| 
      
 170 
     | 
    
         
            +
              # returns row as an array of column contents
         
     | 
| 
      
 171 
     | 
    
         
            +
              def fetch_row()
         
     | 
| 
      
 172 
     | 
    
         
            +
                return @result.fetch_row()
         
     | 
| 
      
 173 
     | 
    
         
            +
              end
         
     | 
| 
      
 174 
     | 
    
         
            +
             
     | 
| 
      
 175 
     | 
    
         
            +
            end
         
     | 
| 
      
 176 
     | 
    
         
            +
             
     | 
| 
         @@ -0,0 +1,58 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # ExternalConfigData
         
     | 
| 
      
 2 
     | 
    
         
            +
            # Katrin Erk January 2006
         
     | 
| 
      
 3 
     | 
    
         
            +
            #
         
     | 
| 
      
 4 
     | 
    
         
            +
            # All scripts that compute additional external knowledge sources
         
     | 
| 
      
 5 
     | 
    
         
            +
            # for Fred and Rosy:
         
     | 
| 
      
 6 
     | 
    
         
            +
            # access to configuration and experiment description file
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
            require 'common/ConfigData'
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
            ##############################
         
     | 
| 
      
 11 
     | 
    
         
            +
            # Class ExternalConfigData
         
     | 
| 
      
 12 
     | 
    
         
            +
            #
         
     | 
| 
      
 13 
     | 
    
         
            +
            # inherits from ConfigData,
         
     | 
| 
      
 14 
     | 
    
         
            +
            # sets variable names appropriate to tasks of external knowledge sources
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
            class ExternalConfigData < ConfigData
         
     | 
| 
      
 17 
     | 
    
         
            +
              def initialize(filename)
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
                # initialize config data object
         
     | 
| 
      
 20 
     | 
    
         
            +
                super(filename,          # config file
         
     | 
| 
      
 21 
     | 
    
         
            +
            	  { "directory" => "string", # features
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
      
 23 
     | 
    
         
            +
            	    "experiment_id" => "string",
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
            	    "gfmap_restrict_to_downpath" => "bool",
         
     | 
| 
      
 26 
     | 
    
         
            +
            	    "gfmap_restrict_pathlen" => "integer",
         
     | 
| 
      
 27 
     | 
    
         
            +
            	    "gfmap_remove_gf" => "list"
         
     | 
| 
      
 28 
     | 
    
         
            +
            	  },
         
     | 
| 
      
 29 
     | 
    
         
            +
            	  [] # variables
         
     | 
| 
      
 30 
     | 
    
         
            +
            	  )
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
                # set access functions for list features
         
     | 
| 
      
 33 
     | 
    
         
            +
                set_list_feature_access("gfmap_remove_gf", 
         
     | 
| 
      
 34 
     | 
    
         
            +
            			    method("access_as_stringlist"))
         
     | 
| 
      
 35 
     | 
    
         
            +
              end
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
      
 37 
     | 
    
         
            +
              ###
         
     | 
| 
      
 38 
     | 
    
         
            +
              protected
         
     | 
| 
      
 39 
     | 
    
         
            +
             
     | 
| 
      
 40 
     | 
    
         
            +
              #####
         
     | 
| 
      
 41 
     | 
    
         
            +
              # access_as_stringlist
         
     | 
| 
      
 42 
     | 
    
         
            +
              #
         
     | 
| 
      
 43 
     | 
    
         
            +
              # assumed format:
         
     | 
| 
      
 44 
     | 
    
         
            +
              #
         
     | 
| 
      
 45 
     | 
    
         
            +
              #   lhs = rhs1 rhs2 ... rhsN
         
     | 
| 
      
 46 
     | 
    
         
            +
              #
         
     | 
| 
      
 47 
     | 
    
         
            +
              # given in val_list as string tuples [rhs1,...,rhsN]
         
     | 
| 
      
 48 
     | 
    
         
            +
              #
         
     | 
| 
      
 49 
     | 
    
         
            +
              # join the rhs strings by spaces, return as string
         
     | 
| 
      
 50 
     | 
    
         
            +
              # "rhs1 rhs2 ... rhsN"
         
     | 
| 
      
 51 
     | 
    
         
            +
              #
         
     | 
| 
      
 52 
     | 
    
         
            +
              def access_as_stringlist(val_list) # array:array:string
         
     | 
| 
      
 53 
     | 
    
         
            +
                return val_list.map { |rhs| rhs.join(" ") }
         
     | 
| 
      
 54 
     | 
    
         
            +
              end
         
     | 
| 
      
 55 
     | 
    
         
            +
            end
         
     | 
| 
      
 56 
     | 
    
         
            +
             
     | 
| 
      
 57 
     | 
    
         
            +
             
     | 
| 
      
 58 
     | 
    
         
            +
             
         
     | 
| 
         @@ -0,0 +1,130 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # Failed Parses
         
     | 
| 
      
 2 
     | 
    
         
            +
            #
         
     | 
| 
      
 3 
     | 
    
         
            +
            # SP May 05
         
     | 
| 
      
 4 
     | 
    
         
            +
            #
         
     | 
| 
      
 5 
     | 
    
         
            +
            # Administration of information about failed parses; 
         
     | 
| 
      
 6 
     | 
    
         
            +
            # - sentence ID
         
     | 
| 
      
 7 
     | 
    
         
            +
            # - frame
         
     | 
| 
      
 8 
     | 
    
         
            +
            # - missed FE markables
         
     | 
| 
      
 9 
     | 
    
         
            +
            #
         
     | 
| 
      
 10 
     | 
    
         
            +
            # this class is pretty much a gloriefied hash table with methods to 
         
     | 
| 
      
 11 
     | 
    
         
            +
            # - read FailedParses from a file and to write them to a file
         
     | 
| 
      
 12 
     | 
    
         
            +
            # - access info in a frame-specific way
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
            class FailedParses
         
     | 
| 
      
 15 
     | 
    
         
            +
              
         
     | 
| 
      
 16 
     | 
    
         
            +
              ###
         
     | 
| 
      
 17 
     | 
    
         
            +
              # initialize
         
     | 
| 
      
 18 
     | 
    
         
            +
              #
         
     | 
| 
      
 19 
     | 
    
         
            +
              # nothing much happens here
         
     | 
| 
      
 20 
     | 
    
         
            +
              def initialize()
         
     | 
| 
      
 21 
     | 
    
         
            +
                @failed_parses = Array.new
         
     | 
| 
      
 22 
     | 
    
         
            +
              end
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
              ###
         
     | 
| 
      
 25 
     | 
    
         
            +
              # register
         
     | 
| 
      
 26 
     | 
    
         
            +
              #
         
     | 
| 
      
 27 
     | 
    
         
            +
              # register new failed parse by specifying
         
     | 
| 
      
 28 
     | 
    
         
            +
              # - its sentence id (any object)
         
     | 
| 
      
 29 
     | 
    
         
            +
              # - its frame (String)
         
     | 
| 
      
 30 
     | 
    
         
            +
              # - its FE list (String Array)
         
     | 
| 
      
 31 
     | 
    
         
            +
              
         
     | 
| 
      
 32 
     | 
    
         
            +
              def register(sent_id, # object
         
     | 
| 
      
 33 
     | 
    
         
            +
                           frame,   # string: frame name
         
     | 
| 
      
 34 
     | 
    
         
            +
                           target,  # string?
         
     | 
| 
      
 35 
     | 
    
         
            +
                           target_pos, # string: target POS
         
     | 
| 
      
 36 
     | 
    
         
            +
                           fe_list) # array:string
         
     | 
| 
      
 37 
     | 
    
         
            +
                if @failed_parses.assoc sent_id
         
     | 
| 
      
 38 
     | 
    
         
            +
            #      $stderr.puts "Error: trying to register sentence id #{sent_id} twice!"
         
     | 
| 
      
 39 
     | 
    
         
            +
            #      $stderr.puts "Skipping second occurrence."
         
     | 
| 
      
 40 
     | 
    
         
            +
                end
         
     | 
| 
      
 41 
     | 
    
         
            +
                @failed_parses << [sent_id,frame,target,target_pos,fe_list]
         
     | 
| 
      
 42 
     | 
    
         
            +
              end
         
     | 
| 
      
 43 
     | 
    
         
            +
             
     | 
| 
      
 44 
     | 
    
         
            +
              ###
         
     | 
| 
      
 45 
     | 
    
         
            +
              # make_split
         
     | 
| 
      
 46 
     | 
    
         
            +
              #
         
     | 
| 
      
 47 
     | 
    
         
            +
              # produce a "split" of the failed parses into a train and a test section
         
     | 
| 
      
 48 
     | 
    
         
            +
              # paramer: train_percentage, Integer between 0 and 100
         
     | 
| 
      
 49 
     | 
    
         
            +
              #
         
     | 
| 
      
 50 
     | 
    
         
            +
              # returns an Array with two FailedParses objects, the first for the
         
     | 
| 
      
 51 
     | 
    
         
            +
              # train data, the second for the test data
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
              def make_split(train_percentage)
         
     | 
| 
      
 54 
     | 
    
         
            +
                unless train_percentage.class < Integer and train_percentage >= 0 and train_percentage <= 100
         
     | 
| 
      
 55 
     | 
    
         
            +
                  raise "Need Integer between 0 and 100 as training percentage."
         
     | 
| 
      
 56 
     | 
    
         
            +
                end
         
     | 
| 
      
 57 
     | 
    
         
            +
                train_failed = FailedParses.new()
         
     | 
| 
      
 58 
     | 
    
         
            +
                test_failed = FailedParses.new()
         
     | 
| 
      
 59 
     | 
    
         
            +
                @failed_parses.each {|sent_id,frame,target,target_pos,fe_list|
         
     | 
| 
      
 60 
     | 
    
         
            +
                  if rand(100) > train_percentage
         
     | 
| 
      
 61 
     | 
    
         
            +
                    test_failed.register(sent_id,frame,target,target_pos,fe_list)
         
     | 
| 
      
 62 
     | 
    
         
            +
                  else
         
     | 
| 
      
 63 
     | 
    
         
            +
                    train_failed.register(sent_id,frame,target,target_pos,fe_list)
         
     | 
| 
      
 64 
     | 
    
         
            +
                  end
         
     | 
| 
      
 65 
     | 
    
         
            +
                }
         
     | 
| 
      
 66 
     | 
    
         
            +
                return [train_failed, test_failed]
         
     | 
| 
      
 67 
     | 
    
         
            +
              end
         
     | 
| 
      
 68 
     | 
    
         
            +
             
     | 
| 
      
 69 
     | 
    
         
            +
              ###
         
     | 
| 
      
 70 
     | 
    
         
            +
              # Access information
         
     | 
| 
      
 71 
     | 
    
         
            +
              #
         
     | 
| 
      
 72 
     | 
    
         
            +
              # failed_sent: number of failed sentences
         
     | 
| 
      
 73 
     | 
    
         
            +
              # failed_fes:  Hash that maps FE names [String] onto numbers of failed FEs [Int] 
         
     | 
| 
      
 74 
     | 
    
         
            +
              #
         
     | 
| 
      
 75 
     | 
    
         
            +
              # optional parameters: frame, target, target_pos : if not specified or nil, marginal 
         
     | 
| 
      
 76 
     | 
    
         
            +
              #                      frequencies are counted (sum over all values)
         
     | 
| 
      
 77 
     | 
    
         
            +
              
         
     | 
| 
      
 78 
     | 
    
         
            +
             
     | 
| 
      
 79 
     | 
    
         
            +
              def failed_sent(frame_spec=nil,target_spec=nil,target_pos_spec=nil)    
         
     | 
| 
      
 80 
     | 
    
         
            +
                counter = 0
         
     | 
| 
      
 81 
     | 
    
         
            +
                @failed_parses.each {|sent_id,frame,target,target_pos,fe_list|
         
     | 
| 
      
 82 
     | 
    
         
            +
                  if ((frame_spec.nil? or frame_spec == frame) and 
         
     | 
| 
      
 83 
     | 
    
         
            +
            	  (target_spec.nil? or target_spec == target) and 
         
     | 
| 
      
 84 
     | 
    
         
            +
            	  (target_pos_spec.nil? or target_pos_spec == target_pos))
         
     | 
| 
      
 85 
     | 
    
         
            +
            	counter += 1
         
     | 
| 
      
 86 
     | 
    
         
            +
                  end
         
     | 
| 
      
 87 
     | 
    
         
            +
                }
         
     | 
| 
      
 88 
     | 
    
         
            +
                return counter
         
     | 
| 
      
 89 
     | 
    
         
            +
              end
         
     | 
| 
      
 90 
     | 
    
         
            +
             
     | 
| 
      
 91 
     | 
    
         
            +
              def failed_fes(frame_spec=nil,target_spec=nil,target_pos_spec=nil)
         
     | 
| 
      
 92 
     | 
    
         
            +
                fe_hash = Hash.new(0)
         
     | 
| 
      
 93 
     | 
    
         
            +
                @failed_parses.each {|sent_id,frame,target,target_pos,fe_list|
         
     | 
| 
      
 94 
     | 
    
         
            +
                  if ((frame_spec.nil? or frame_spec == frame) and 
         
     | 
| 
      
 95 
     | 
    
         
            +
            	  (target_spec.nil? or target_spec == target) and 
         
     | 
| 
      
 96 
     | 
    
         
            +
            	  (target_pos_spec.nil? or target_pos_spec == target))
         
     | 
| 
      
 97 
     | 
    
         
            +
            	fe_list.each {|fe_label|
         
     | 
| 
      
 98 
     | 
    
         
            +
            	  fe_hash[fe_label] += 1
         
     | 
| 
      
 99 
     | 
    
         
            +
            	}
         
     | 
| 
      
 100 
     | 
    
         
            +
                  end
         
     | 
| 
      
 101 
     | 
    
         
            +
                }
         
     | 
| 
      
 102 
     | 
    
         
            +
                return fe_hash
         
     | 
| 
      
 103 
     | 
    
         
            +
              end
         
     | 
| 
      
 104 
     | 
    
         
            +
             
     | 
| 
      
 105 
     | 
    
         
            +
              
         
     | 
| 
      
 106 
     | 
    
         
            +
              ###
         
     | 
| 
      
 107 
     | 
    
         
            +
              # Marshalling:
         
     | 
| 
      
 108 
     | 
    
         
            +
              #
         
     | 
| 
      
 109 
     | 
    
         
            +
              # save - save info about failed parses to file
         
     | 
| 
      
 110 
     | 
    
         
            +
              # load - load info about failed parses from file
         
     | 
| 
      
 111 
     | 
    
         
            +
             
     | 
| 
      
 112 
     | 
    
         
            +
              def save(filename)
         
     | 
| 
      
 113 
     | 
    
         
            +
                io_obj = File.new(filename,"w")
         
     | 
| 
      
 114 
     | 
    
         
            +
                Marshal.dump(@failed_parses,io_obj)
         
     | 
| 
      
 115 
     | 
    
         
            +
                io_obj.close
         
     | 
| 
      
 116 
     | 
    
         
            +
              end
         
     | 
| 
      
 117 
     | 
    
         
            +
             
     | 
| 
      
 118 
     | 
    
         
            +
              def load(filename)
         
     | 
| 
      
 119 
     | 
    
         
            +
                begin
         
     | 
| 
      
 120 
     | 
    
         
            +
                  io_obj = File.new(filename)
         
     | 
| 
      
 121 
     | 
    
         
            +
                  @failed_parses = Marshal.load(io_obj)
         
     | 
| 
      
 122 
     | 
    
         
            +
                  io_obj.close
         
     | 
| 
      
 123 
     | 
    
         
            +
                rescue
         
     | 
| 
      
 124 
     | 
    
         
            +
                  $stderr.puts "WARNING: couldn't read failed parses file #{filename}."
         
     | 
| 
      
 125 
     | 
    
         
            +
                  $stderr.puts "I'll assume that there are no failed parses."
         
     | 
| 
      
 126 
     | 
    
         
            +
                end
         
     | 
| 
      
 127 
     | 
    
         
            +
              end
         
     | 
| 
      
 128 
     | 
    
         
            +
             
         
     | 
| 
      
 129 
     | 
    
         
            +
               
         
     | 
| 
      
 130 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,242 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'common/ruby_class_extensions'
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            class RosyFeatureInfo
         
     | 
| 
      
 4 
     | 
    
         
            +
              ###
         
     | 
| 
      
 5 
     | 
    
         
            +
              # class variable:
         
     | 
| 
      
 6 
     | 
    
         
            +
              # list of all known extractors
         
     | 
| 
      
 7 
     | 
    
         
            +
              # add to it using add_feature()
         
     | 
| 
      
 8 
     | 
    
         
            +
              @@extractors = Array.new
         
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
      
 10 
     | 
    
         
            +
              # boolean. set to true after warning messages have been given once
         
     | 
| 
      
 11 
     | 
    
         
            +
              @@warned = false
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
              ###
         
     | 
| 
      
 14 
     | 
    
         
            +
              # add interface/interpreter
         
     | 
| 
      
 15 
     | 
    
         
            +
              def RosyFeatureInfo.add_feature(class_name) # Class object
         
     | 
| 
      
 16 
     | 
    
         
            +
                @@extractors << class_name
         
     | 
| 
      
 17 
     | 
    
         
            +
              end
         
     | 
| 
      
 18 
     | 
    
         
            +
              
         
     | 
| 
      
 19 
     | 
    
         
            +
              ###
         
     | 
| 
      
 20 
     | 
    
         
            +
              def initialize(exp)
         
     | 
| 
      
 21 
     | 
    
         
            +
                
         
     | 
| 
      
 22 
     | 
    
         
            +
                ##
         
     | 
| 
      
 23 
     | 
    
         
            +
                # make list of extractors that are
         
     | 
| 
      
 24 
     | 
    
         
            +
                # either required by the user
         
     | 
| 
      
 25 
     | 
    
         
            +
                # or needed by the system
         
     | 
| 
      
 26 
     | 
    
         
            +
                @current_extractors = Array.new
         
     | 
| 
      
 27 
     | 
    
         
            +
                @exp = exp
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
      
 29 
     | 
    
         
            +
                # user-chosen extractors:
         
     | 
| 
      
 30 
     | 
    
         
            +
                # returns array of pairs [feature group designator(string), options(array:string)]
         
     | 
| 
      
 31 
     | 
    
         
            +
                exp.get_lf("feature").each { |extractor_name, options|
         
     | 
| 
      
 32 
     | 
    
         
            +
                  extractor = @@extractors.detect { |e| e.designator() == extractor_name }
         
     | 
| 
      
 33 
     | 
    
         
            +
                  unless extractor
         
     | 
| 
      
 34 
     | 
    
         
            +
                    # no extractor found matching the given designator
         
     | 
| 
      
 35 
     | 
    
         
            +
                    unless @@warned
         
     | 
| 
      
 36 
     | 
    
         
            +
                      $stderr.puts "Warning: Could not find a feature extractor for #{extractor_name}: skipping."
         
     | 
| 
      
 37 
     | 
    
         
            +
                    end
         
     | 
| 
      
 38 
     | 
    
         
            +
                    next
         
     | 
| 
      
 39 
     | 
    
         
            +
                  end
         
     | 
| 
      
 40 
     | 
    
         
            +
             
     | 
| 
      
 41 
     | 
    
         
            +
                  # read and check options
         
     | 
| 
      
 42 
     | 
    
         
            +
                  step = nil
         
     | 
| 
      
 43 
     | 
    
         
            +
             
     | 
| 
      
 44 
     | 
    
         
            +
                  options.each { |option|
         
     | 
| 
      
 45 
     | 
    
         
            +
                    case option
         
     | 
| 
      
 46 
     | 
    
         
            +
                    when "dontuse", "argrec", "arglab", "onestep"
         
     | 
| 
      
 47 
     | 
    
         
            +
             
     | 
| 
      
 48 
     | 
    
         
            +
                      if step
         
     | 
| 
      
 49 
     | 
    
         
            +
                        # step has already been set
         
     | 
| 
      
 50 
     | 
    
         
            +
                        $stderr.puts "ERROR in feature #{extractor_name}: Please set only one of the options dontuse, argrec, arglab, onestep"
         
     | 
| 
      
 51 
     | 
    
         
            +
                        exit 1
         
     | 
| 
      
 52 
     | 
    
         
            +
                      end
         
     | 
| 
      
 53 
     | 
    
         
            +
             
     | 
| 
      
 54 
     | 
    
         
            +
                      step = option
         
     | 
| 
      
 55 
     | 
    
         
            +
             
     | 
| 
      
 56 
     | 
    
         
            +
                    else
         
     | 
| 
      
 57 
     | 
    
         
            +
                      unless @@warned
         
     | 
| 
      
 58 
     | 
    
         
            +
                        $stderr.puts "Warning: Unknown option for feature #{extractor_name}: #{option}. Skipping"
         
     | 
| 
      
 59 
     | 
    
         
            +
                      end
         
     | 
| 
      
 60 
     | 
    
         
            +
                    end
         
     | 
| 
      
 61 
     | 
    
         
            +
                  }
         
     | 
| 
      
 62 
     | 
    
         
            +
             
     | 
| 
      
 63 
     | 
    
         
            +
                  @current_extractors << {
         
     | 
| 
      
 64 
     | 
    
         
            +
                    "extractor" => extractor,
         
     | 
| 
      
 65 
     | 
    
         
            +
                    "step" => step
         
     | 
| 
      
 66 
     | 
    
         
            +
                  }
         
     | 
| 
      
 67 
     | 
    
         
            +
                }
         
     | 
| 
      
 68 
     | 
    
         
            +
             
     | 
| 
      
 69 
     | 
    
         
            +
                # extractors needed by the system
         
     | 
| 
      
 70 
     | 
    
         
            +
                @@extractors.select { |e|
         
     | 
| 
      
 71 
     | 
    
         
            +
                  # select admin features and gold feature
         
     | 
| 
      
 72 
     | 
    
         
            +
                  ["admin", "gold"].include? e.feature_type()
         
     | 
| 
      
 73 
     | 
    
         
            +
                }.each { |extractor|
         
     | 
| 
      
 74 
     | 
    
         
            +
                  
         
     | 
| 
      
 75 
     | 
    
         
            +
                  # if we have already added that extractor, remove it
         
     | 
| 
      
 76 
     | 
    
         
            +
                  # and add it with our own options
         
     | 
| 
      
 77 
     | 
    
         
            +
                  @current_extractors.delete_if { |descr| descr["extractor"].designator() == extractor.designator() }
         
     | 
| 
      
 78 
     | 
    
         
            +
                  
         
     | 
| 
      
 79 
     | 
    
         
            +
                  @current_extractors << {
         
     | 
| 
      
 80 
     | 
    
         
            +
                    "extractor"=> extractor,
         
     | 
| 
      
 81 
     | 
    
         
            +
                    "step" => "dontuse"
         
     | 
| 
      
 82 
     | 
    
         
            +
                  }
         
     | 
| 
      
 83 
     | 
    
         
            +
                }
         
     | 
| 
      
 84 
     | 
    
         
            +
             
     | 
| 
      
 85 
     | 
    
         
            +
                # make sure that all extractors are computable in the current model
         
     | 
| 
      
 86 
     | 
    
         
            +
                # (i.e. check dependencies)
         
     | 
| 
      
 87 
     | 
    
         
            +
             
     | 
| 
      
 88 
     | 
    
         
            +
                allstep_extractors = @current_extractors.find_all {|e_hash| e_hash["step"].nil?
         
     | 
| 
      
 89 
     | 
    
         
            +
                }.map { |e| e["extractor"].designator() }
         
     | 
| 
      
 90 
     | 
    
         
            +
                argrec_extractors = @current_extractors.find_all {|e_hash| e_hash["step"].nil? or e_hash["step"] == "argrec"
         
     | 
| 
      
 91 
     | 
    
         
            +
                }.map { |e| e["extractor"].designator() }
         
     | 
| 
      
 92 
     | 
    
         
            +
                arglab_extractors = @current_extractors.find_all {|e_hash| e_hash["step"].nil? or e_hash["step"] == "arglab"
         
     | 
| 
      
 93 
     | 
    
         
            +
                }.map { |e| e["extractor"].designator() }
         
     | 
| 
      
 94 
     | 
    
         
            +
                onestep_extractors = @current_extractors.find_all {|e_hash| e_hash["step"].nil? or e_hash["step"] == "onestep"
         
     | 
| 
      
 95 
     | 
    
         
            +
                }.map { |e| e["extractor"].designator() }
         
     | 
| 
      
 96 
     | 
    
         
            +
                
         
     | 
| 
      
 97 
     | 
    
         
            +
                @current_extractors.delete_if {|extractor_hash|
         
     | 
| 
      
 98 
     | 
    
         
            +
                  case extractor_hash["step"]
         
     | 
| 
      
 99 
     | 
    
         
            +
                  when nil
         
     | 
| 
      
 100 
     | 
    
         
            +
                    computable = extractor_hash["extractor"].is_computable(allstep_extractors)
         
     | 
| 
      
 101 
     | 
    
         
            +
                  when "argrec"
         
     | 
| 
      
 102 
     | 
    
         
            +
                    computable = extractor_hash["extractor"].is_computable(argrec_extractors)
         
     | 
| 
      
 103 
     | 
    
         
            +
                  when "arglab"
         
     | 
| 
      
 104 
     | 
    
         
            +
                    computable = extractor_hash["extractor"].is_computable(arglab_extractors)
         
     | 
| 
      
 105 
     | 
    
         
            +
                  when "onestep"
         
     | 
| 
      
 106 
     | 
    
         
            +
                    computable = extractor_hash["extractor"].is_computable(onestep_extractors)
         
     | 
| 
      
 107 
     | 
    
         
            +
                  when "dontuse" 
         
     | 
| 
      
 108 
     | 
    
         
            +
            	# either an admin feature or a user feature not to be used this time
         
     | 
| 
      
 109 
     | 
    
         
            +
                    computable = true
         
     | 
| 
      
 110 
     | 
    
         
            +
                  end
         
     | 
| 
      
 111 
     | 
    
         
            +
             
     | 
| 
      
 112 
     | 
    
         
            +
                  if computable
         
     | 
| 
      
 113 
     | 
    
         
            +
                    false # i.e. don't delete
         
     | 
| 
      
 114 
     | 
    
         
            +
                  else
         
     | 
| 
      
 115 
     | 
    
         
            +
                    unless @@warned
         
     | 
| 
      
 116 
     | 
    
         
            +
                      $stderr.puts "Warning: Feature extractor #{extractor_hash["extractor"].designator()} cannot be computed: skipping."
         
     | 
| 
      
 117 
     | 
    
         
            +
                    end
         
     | 
| 
      
 118 
     | 
    
         
            +
                    true
         
     | 
| 
      
 119 
     | 
    
         
            +
                  end
         
     | 
| 
      
 120 
     | 
    
         
            +
                }
         
     | 
| 
      
 121 
     | 
    
         
            +
             
     | 
| 
      
 122 
     | 
    
         
            +
                # make list of all features as hashes
         
     | 
| 
      
 123 
     | 
    
         
            +
                # "feature_name" -> string,
         
     | 
| 
      
 124 
     | 
    
         
            +
                # "sql_type" -> string,
         
     | 
| 
      
 125 
     | 
    
         
            +
                # "is_index" -> boolean,
         
     | 
| 
      
 126 
     | 
    
         
            +
                # "step" -> string: argrec, arglab, onestep, or nil
         
     | 
| 
      
 127 
     | 
    
         
            +
                # "type" -> string
         
     | 
| 
      
 128 
     | 
    
         
            +
                # "phase" -> string: phase 1 or phase 2
         
     | 
| 
      
 129 
     | 
    
         
            +
                @features = Array.new
         
     | 
| 
      
 130 
     | 
    
         
            +
                @current_extractors.each { |descr|
         
     | 
| 
      
 131 
     | 
    
         
            +
                  extractor = descr["extractor"]
         
     | 
| 
      
 132 
     | 
    
         
            +
                  extractor.feature_names.each { |feature_name|
         
     | 
| 
      
 133 
     | 
    
         
            +
                    @features << {
         
     | 
| 
      
 134 
     | 
    
         
            +
                      "feature_name" => feature_name,
         
     | 
| 
      
 135 
     | 
    
         
            +
                      "sql_type"     => extractor.sql_type(),
         
     | 
| 
      
 136 
     | 
    
         
            +
                      "is_index"     => extractor.info().include?("index"),
         
     | 
| 
      
 137 
     | 
    
         
            +
                      "step"         => descr["step"],
         
     | 
| 
      
 138 
     | 
    
         
            +
                      "type"         => extractor.feature_type(),
         
     | 
| 
      
 139 
     | 
    
         
            +
                      "phase"        => extractor.phase()
         
     | 
| 
      
 140 
     | 
    
         
            +
                    }
         
     | 
| 
      
 141 
     | 
    
         
            +
                  }
         
     | 
| 
      
 142 
     | 
    
         
            +
                }
         
     | 
| 
      
 143 
     | 
    
         
            +
             
     | 
| 
      
 144 
     | 
    
         
            +
                # do not print warnings again if another RosyFeatureInfo object is made
         
     | 
| 
      
 145 
     | 
    
         
            +
                @@warned = true
         
     | 
| 
      
 146 
     | 
    
         
            +
              end
         
     | 
| 
      
 147 
     | 
    
         
            +
             
     | 
| 
      
 148 
     | 
    
         
            +
              ###
         
     | 
| 
      
 149 
     | 
    
         
            +
              # get_column_formats
         
     | 
| 
      
 150 
     | 
    
         
            +
              #
         
     | 
| 
      
 151 
     | 
    
         
            +
              # returns a list of pairs [feature_name(string), sql_column_format(string)]:
         
     | 
| 
      
 152 
     | 
    
         
            +
              # all features to be computed, with their SQL column formats
         
     | 
| 
      
 153 
     | 
    
         
            +
              def get_column_formats(phase = nil) # string: phase 1 or phase 2
         
     | 
| 
      
 154 
     | 
    
         
            +
                return @features.select { |feature_descr|
         
     | 
| 
      
 155 
     | 
    
         
            +
                  phase.nil? or 
         
     | 
| 
      
 156 
     | 
    
         
            +
                    feature_descr["phase"] == phase
         
     | 
| 
      
 157 
     | 
    
         
            +
                }.map { |feature_descr|
         
     | 
| 
      
 158 
     | 
    
         
            +
                  [feature_descr["feature_name"], feature_descr["sql_type"]]
         
     | 
| 
      
 159 
     | 
    
         
            +
                }
         
     | 
| 
      
 160 
     | 
    
         
            +
              end
         
     | 
| 
      
 161 
     | 
    
         
            +
             
     | 
| 
      
 162 
     | 
    
         
            +
              ###
         
     | 
| 
      
 163 
     | 
    
         
            +
              # get_column_names
         
     | 
| 
      
 164 
     | 
    
         
            +
              #
         
     | 
| 
      
 165 
     | 
    
         
            +
              # returns a list of feature names (strings)
         
     | 
| 
      
 166 
     | 
    
         
            +
              # all features to be computed
         
     | 
| 
      
 167 
     | 
    
         
            +
              def get_column_names(phase = nil)  # string: phase 1 or phase 2
         
     | 
| 
      
 168 
     | 
    
         
            +
                return @features.select { |feature_descr|
         
     | 
| 
      
 169 
     | 
    
         
            +
                  phase.nil? or 
         
     | 
| 
      
 170 
     | 
    
         
            +
                    feature_descr["phase"] == phase
         
     | 
| 
      
 171 
     | 
    
         
            +
                }.map { |feature_descr|
         
     | 
| 
      
 172 
     | 
    
         
            +
                  feature_descr["feature_name"]
         
     | 
| 
      
 173 
     | 
    
         
            +
                }
         
     | 
| 
      
 174 
     | 
    
         
            +
              end
         
     | 
| 
      
 175 
     | 
    
         
            +
             
     | 
| 
      
 176 
     | 
    
         
            +
              ###
         
     | 
| 
      
 177 
     | 
    
         
            +
              # get_index_columns
         
     | 
| 
      
 178 
     | 
    
         
            +
              #
         
     | 
| 
      
 179 
     | 
    
         
            +
              # returns a list of feature (column) names as Strings
         
     | 
| 
      
 180 
     | 
    
         
            +
              # consisting of all features that have been requested as index features
         
     | 
| 
      
 181 
     | 
    
         
            +
              # in the experiment file or in the list of @@all_features_we_have above
         
     | 
| 
      
 182 
     | 
    
         
            +
              def get_index_columns()
         
     | 
| 
      
 183 
     | 
    
         
            +
                return @features.select { |feature_descr|
         
     | 
| 
      
 184 
     | 
    
         
            +
                  feature_descr["is_index"] 
         
     | 
| 
      
 185 
     | 
    
         
            +
                }.map {|feature_descr|
         
     | 
| 
      
 186 
     | 
    
         
            +
                  feature_descr["feature_name"]
         
     | 
| 
      
 187 
     | 
    
         
            +
                }
         
     | 
| 
      
 188 
     | 
    
         
            +
              end
         
     | 
| 
      
 189 
     | 
    
         
            +
             
     | 
| 
      
 190 
     | 
    
         
            +
              ###
         
     | 
| 
      
 191 
     | 
    
         
            +
              # get_model_features
         
     | 
| 
      
 192 
     | 
    
         
            +
              #
         
     | 
| 
      
 193 
     | 
    
         
            +
              # returns a list of feature (column) names as strings
         
     | 
| 
      
 194 
     | 
    
         
            +
              # consisting of all the features to be used for the modeling
         
     | 
| 
      
 195 
     | 
    
         
            +
              #
         
     | 
| 
      
 196 
     | 
    
         
            +
              # step: argrec, arglab, onestep
         
     | 
| 
      
 197 
     | 
    
         
            +
              def get_model_features(step)
         
     | 
| 
      
 198 
     | 
    
         
            +
             
     | 
| 
      
 199 
     | 
    
         
            +
                return @features.select { |feature_descr|
         
     | 
| 
      
 200 
     | 
    
         
            +
                  # features for the current step
         
     | 
| 
      
 201 
     | 
    
         
            +
                  # feature_descr["step"] is argrec, arglab, onestep, dontuse, or nil
         
     | 
| 
      
 202 
     | 
    
         
            +
                  # nil matches all steps
         
     | 
| 
      
 203 
     | 
    
         
            +
                  # 'dontuse' matches no step, so these features will never be returned here
         
     | 
| 
      
 204 
     | 
    
         
            +
                  feature_descr["step"].nil? or
         
     | 
| 
      
 205 
     | 
    
         
            +
                    feature_descr["step"] == step
         
     | 
| 
      
 206 
     | 
    
         
            +
                }.reject { |feature_descr|
         
     | 
| 
      
 207 
     | 
    
         
            +
                  # that are not admin features or the gold label
         
     | 
| 
      
 208 
     | 
    
         
            +
                  ["admin", "gold"].include? feature_descr["type"]
         
     | 
| 
      
 209 
     | 
    
         
            +
                }.map { |feature_descr|
         
     | 
| 
      
 210 
     | 
    
         
            +
                  # use just the names of the features
         
     | 
| 
      
 211 
     | 
    
         
            +
                  feature_descr["feature_name"]
         
     | 
| 
      
 212 
     | 
    
         
            +
                }      
         
     | 
| 
      
 213 
     | 
    
         
            +
              end
         
     | 
| 
      
 214 
     | 
    
         
            +
             
     | 
| 
      
 215 
     | 
    
         
            +
              ###
         
     | 
| 
      
 216 
     | 
    
         
            +
              # get_extractor_objects
         
     | 
| 
      
 217 
     | 
    
         
            +
              #
         
     | 
| 
      
 218 
     | 
    
         
            +
              # returns two lists of feature extractor objects, 
         
     | 
| 
      
 219 
     | 
    
         
            +
              # covering all features of the given phase:
         
     | 
| 
      
 220 
     | 
    
         
            +
              # the first list contains RosyFeatureExtractor extractors,
         
     | 
| 
      
 221 
     | 
    
         
            +
              # the second list contains the others.
         
     | 
| 
      
 222 
     | 
    
         
            +
              def get_extractor_objects(phase, # string: "phase 1" or "phase 2"
         
     | 
| 
      
 223 
     | 
    
         
            +
                                        interpreter_class) # SynInterpreter class
         
     | 
| 
      
 224 
     | 
    
         
            +
                unless ["phase 1", "phase 2"].include? phase
         
     | 
| 
      
 225 
     | 
    
         
            +
                  raise "Shouldn't be here: " + phase
         
     | 
| 
      
 226 
     | 
    
         
            +
                end
         
     | 
| 
      
 227 
     | 
    
         
            +
             
     | 
| 
      
 228 
     | 
    
         
            +
                return @current_extractors.select { |descr|
         
     | 
| 
      
 229 
     | 
    
         
            +
                  # select extractors of the right phase
         
     | 
| 
      
 230 
     | 
    
         
            +
                  descr["extractor"].phase() == phase
         
     | 
| 
      
 231 
     | 
    
         
            +
                }.map { |descr|
         
     | 
| 
      
 232 
     | 
    
         
            +
             
     | 
| 
      
 233 
     | 
    
         
            +
                  # make objects from extractor classes
         
     | 
| 
      
 234 
     | 
    
         
            +
                  descr["extractor"].new(@exp, interpreter_class)
         
     | 
| 
      
 235 
     | 
    
         
            +
                }.distribute { |extractor_obj|
         
     | 
| 
      
 236 
     | 
    
         
            +
                  # distribute extractors in two bins: 
         
     | 
| 
      
 237 
     | 
    
         
            +
                  # first, rosy extractors
         
     | 
| 
      
 238 
     | 
    
         
            +
                  # second, others
         
     | 
| 
      
 239 
     | 
    
         
            +
                  extractor_obj.class.info().include? "rosy"
         
     | 
| 
      
 240 
     | 
    
         
            +
                }
         
     | 
| 
      
 241 
     | 
    
         
            +
              end
         
     | 
| 
      
 242 
     | 
    
         
            +
            end
         
     |