frprep 0.0.1.prealpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.yardopts +8 -0
 - data/CHANGELOG.rdoc +0 -0
 - data/LICENSE.rdoc +0 -0
 - data/README.rdoc +0 -0
 - data/lib/common/AbstractSynInterface.rb +1227 -0
 - data/lib/common/BerkeleyInterface.rb +375 -0
 - data/lib/common/CollinsInterface.rb +1165 -0
 - data/lib/common/ConfigData.rb +694 -0
 - data/lib/common/Counter.rb +18 -0
 - data/lib/common/DBInterface.rb +48 -0
 - data/lib/common/EnduserMode.rb +27 -0
 - data/lib/common/Eval.rb +480 -0
 - data/lib/common/FixSynSemMapping.rb +196 -0
 - data/lib/common/FrPrepConfigData.rb +66 -0
 - data/lib/common/FrprepHelper.rb +1324 -0
 - data/lib/common/Graph.rb +345 -0
 - data/lib/common/ISO-8859-1.rb +24 -0
 - data/lib/common/ML.rb +186 -0
 - data/lib/common/Maxent.rb +215 -0
 - data/lib/common/MiniparInterface.rb +1388 -0
 - data/lib/common/Optimise.rb +195 -0
 - data/lib/common/Parser.rb +213 -0
 - data/lib/common/RegXML.rb +269 -0
 - data/lib/common/RosyConventions.rb +171 -0
 - data/lib/common/SQLQuery.rb +243 -0
 - data/lib/common/STXmlTerminalOrder.rb +194 -0
 - data/lib/common/SalsaTigerRegXML.rb +2347 -0
 - data/lib/common/SalsaTigerXMLHelper.rb +99 -0
 - data/lib/common/SleepyInterface.rb +384 -0
 - data/lib/common/SynInterfaces.rb +275 -0
 - data/lib/common/TabFormat.rb +720 -0
 - data/lib/common/Tiger.rb +1448 -0
 - data/lib/common/TntInterface.rb +44 -0
 - data/lib/common/Tree.rb +61 -0
 - data/lib/common/TreetaggerInterface.rb +303 -0
 - data/lib/common/headz.rb +338 -0
 - data/lib/common/option_parser.rb +13 -0
 - data/lib/common/ruby_class_extensions.rb +310 -0
 - data/lib/fred/Baseline.rb +150 -0
 - data/lib/fred/FileZipped.rb +31 -0
 - data/lib/fred/FredBOWContext.rb +863 -0
 - data/lib/fred/FredConfigData.rb +182 -0
 - data/lib/fred/FredConventions.rb +232 -0
 - data/lib/fred/FredDetermineTargets.rb +324 -0
 - data/lib/fred/FredEval.rb +312 -0
 - data/lib/fred/FredFeatureExtractors.rb +321 -0
 - data/lib/fred/FredFeatures.rb +1061 -0
 - data/lib/fred/FredFeaturize.rb +596 -0
 - data/lib/fred/FredNumTrainingSenses.rb +27 -0
 - data/lib/fred/FredParameters.rb +402 -0
 - data/lib/fred/FredSplit.rb +84 -0
 - data/lib/fred/FredSplitPkg.rb +180 -0
 - data/lib/fred/FredTest.rb +607 -0
 - data/lib/fred/FredTrain.rb +144 -0
 - data/lib/fred/PlotAndREval.rb +480 -0
 - data/lib/fred/fred.rb +45 -0
 - data/lib/fred/md5.rb +23 -0
 - data/lib/fred/opt_parser.rb +250 -0
 - data/lib/frprep/AbstractSynInterface.rb +1227 -0
 - data/lib/frprep/Ampersand.rb +37 -0
 - data/lib/frprep/BerkeleyInterface.rb +375 -0
 - data/lib/frprep/CollinsInterface.rb +1165 -0
 - data/lib/frprep/ConfigData.rb +694 -0
 - data/lib/frprep/Counter.rb +18 -0
 - data/lib/frprep/FNCorpusXML.rb +643 -0
 - data/lib/frprep/FNDatabase.rb +144 -0
 - data/lib/frprep/FixSynSemMapping.rb +196 -0
 - data/lib/frprep/FrPrepConfigData.rb +66 -0
 - data/lib/frprep/FrameXML.rb +513 -0
 - data/lib/frprep/FrprepHelper.rb +1324 -0
 - data/lib/frprep/Graph.rb +345 -0
 - data/lib/frprep/ISO-8859-1.rb +24 -0
 - data/lib/frprep/MiniparInterface.rb +1388 -0
 - data/lib/frprep/Parser.rb +213 -0
 - data/lib/frprep/RegXML.rb +269 -0
 - data/lib/frprep/STXmlTerminalOrder.rb +194 -0
 - data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
 - data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
 - data/lib/frprep/SleepyInterface.rb +384 -0
 - data/lib/frprep/SynInterfaces.rb +275 -0
 - data/lib/frprep/TabFormat.rb +720 -0
 - data/lib/frprep/Tiger.rb +1448 -0
 - data/lib/frprep/TntInterface.rb +44 -0
 - data/lib/frprep/Tree.rb +61 -0
 - data/lib/frprep/TreetaggerInterface.rb +303 -0
 - data/lib/frprep/do_parses.rb +142 -0
 - data/lib/frprep/frprep.rb +686 -0
 - data/lib/frprep/headz.rb +338 -0
 - data/lib/frprep/one_parsed_file.rb +28 -0
 - data/lib/frprep/opt_parser.rb +94 -0
 - data/lib/frprep/ruby_class_extensions.rb +310 -0
 - data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
 - data/lib/rosy/DBMySQL.rb +146 -0
 - data/lib/rosy/DBSQLite.rb +280 -0
 - data/lib/rosy/DBTable.rb +239 -0
 - data/lib/rosy/DBWrapper.rb +176 -0
 - data/lib/rosy/ExternalConfigData.rb +58 -0
 - data/lib/rosy/FailedParses.rb +130 -0
 - data/lib/rosy/FeatureInfo.rb +242 -0
 - data/lib/rosy/GfInduce.rb +1115 -0
 - data/lib/rosy/GfInduceFeature.rb +148 -0
 - data/lib/rosy/InputData.rb +294 -0
 - data/lib/rosy/RosyConfigData.rb +115 -0
 - data/lib/rosy/RosyConfusability.rb +338 -0
 - data/lib/rosy/RosyEval.rb +465 -0
 - data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
 - data/lib/rosy/RosyFeaturize.rb +280 -0
 - data/lib/rosy/RosyInspect.rb +336 -0
 - data/lib/rosy/RosyIterator.rb +477 -0
 - data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
 - data/lib/rosy/RosyPruning.rb +165 -0
 - data/lib/rosy/RosyServices.rb +744 -0
 - data/lib/rosy/RosySplit.rb +232 -0
 - data/lib/rosy/RosyTask.rb +19 -0
 - data/lib/rosy/RosyTest.rb +826 -0
 - data/lib/rosy/RosyTrain.rb +232 -0
 - data/lib/rosy/RosyTrainingTestTable.rb +786 -0
 - data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
 - data/lib/rosy/View.rb +418 -0
 - data/lib/rosy/opt_parser.rb +379 -0
 - data/lib/rosy/rosy.rb +77 -0
 - data/lib/shalmaneser/version.rb +3 -0
 - data/test/frprep/test_opt_parser.rb +94 -0
 - data/test/functional/functional_test_helper.rb +40 -0
 - data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
 - data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
 - data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
 - data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
 - data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
 - data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
 - data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
 - data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
 - data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
 - data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
 - data/test/functional/test_fred.rb +47 -0
 - data/test/functional/test_frprep.rb +52 -0
 - data/test/functional/test_rosy.rb +20 -0
 - metadata +270 -0
 
| 
         @@ -0,0 +1,2347 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            # SalsaTigerRegXML.rb
         
     | 
| 
      
 2 
     | 
    
         
            +
            #
         
     | 
| 
      
 3 
     | 
    
         
            +
            # Katrin Erk, June 2005
         
     | 
| 
      
 4 
     | 
    
         
            +
            #
         
     | 
| 
      
 5 
     | 
    
         
            +
            # Classes for accessing and managing 
         
     | 
| 
      
 6 
     | 
    
         
            +
            # SalsaTigerXML sentences
         
     | 
| 
      
 7 
     | 
    
         
            +
            #
         
     | 
| 
      
 8 
     | 
    
         
            +
            # The interface of the classes in this package
         
     | 
| 
      
 9 
     | 
    
         
            +
            # is similar to that of SalsaTigerXML.rb
         
     | 
| 
      
 10 
     | 
    
         
            +
            # but the package is based solely on regular expressions
         
     | 
| 
      
 11 
     | 
    
         
            +
            # and not on REXML.
         
     | 
| 
      
 12 
     | 
    
         
            +
            #
         
     | 
| 
      
 13 
     | 
    
         
            +
            # Main class here: SalsaTigerSentence, keeps a complete sentence
         
     | 
| 
      
 14 
     | 
    
         
            +
            #
         
     | 
| 
      
 15 
     | 
    
         
            +
            # Nodes of the syntactic tree, frames and frame elements are all
         
     | 
| 
      
 16 
     | 
    
         
            +
            # handed around as XMLNode objects, or more specifically 
         
     | 
| 
      
 17 
     | 
    
         
            +
            # SynNode, FrameNode and FeNode objects, respectively. 
         
     | 
| 
      
 18 
     | 
    
         
            +
            #
         
     | 
| 
      
 19 
     | 
    
         
            +
            # Inheritance between classes in here:
         
     | 
| 
      
 20 
     | 
    
         
            +
            #
         
     | 
| 
      
 21 
     | 
    
         
            +
            #                  GraphNode
         
     | 
| 
      
 22 
     | 
    
         
            +
            #                    |
         
     | 
| 
      
 23 
     | 
    
         
            +
            #                  XMLNode
         
     | 
| 
      
 24 
     | 
    
         
            +
            #                    |
         
     | 
| 
      
 25 
     | 
    
         
            +
            #                SalsaTigerXmlNode
         
     | 
| 
      
 26 
     | 
    
         
            +
            #                /                 \
         
     | 
| 
      
 27 
     | 
    
         
            +
            #              SynNode            SemNode
         
     | 
| 
      
 28 
     | 
    
         
            +
            #               |                 /     \
         
     | 
| 
      
 29 
     | 
    
         
            +
            #            TSSynNode      FrameNode   FeNode
         
     | 
| 
      
 30 
     | 
    
         
            +
            #
         
     | 
| 
      
 31 
     | 
    
         
            +
            # 
         
     | 
| 
      
 32 
     | 
    
         
            +
            # SalsaTigerSentence uses the other classes, but is separate
         
     | 
| 
      
 33 
     | 
    
         
            +
            #
         
     | 
| 
      
 34 
     | 
    
         
            +
            # SalsaTigerSentence does _not_ yield a faithful image of the SalsaTiger XML structure of 
         
     | 
| 
      
 35 
     | 
    
         
            +
            # a sentence. With the SalsaTiger XML structure you need to follow "idref" attributes
         
     | 
| 
      
 36 
     | 
    
         
            +
            # to the elements with matching "id" attributes in other parts of the structure.
         
     | 
| 
      
 37 
     | 
    
         
            +
            # With the classes in this package, you don't. 
         
     | 
| 
      
 38 
     | 
    
         
            +
            # Wherever in SalsaTiger XML you have an idref, you will have _direct access to the 
         
     | 
| 
      
 39 
     | 
    
         
            +
            # object_ here. 
         
     | 
| 
      
 40 
     | 
    
         
            +
            #
         
     | 
| 
      
 41 
     | 
    
         
            +
            # Suppose that in the XML structure you have a nonterminal element X with <edge> elements
         
     | 
| 
      
 42 
     | 
    
         
            +
            # pointing to other (terminal or nonterminal) elements X1,.., Xn. Then you'll have 
         
     | 
| 
      
 43 
     | 
    
         
            +
            # a SynNode object N that contains X as its XML object, and the children N1,..,Nn of N 
         
     | 
| 
      
 44 
     | 
    
         
            +
            # will be SynNode objects that contain X1,..,Xn as their XML objects.
         
     | 
| 
      
 45 
     | 
    
         
            +
            #
         
     | 
| 
      
 46 
     | 
    
         
            +
            # A SynNode that is a terminal may have children too: its splitword parts (if any).
         
     | 
| 
      
 47 
     | 
    
         
            +
            #
         
     | 
| 
      
 48 
     | 
    
         
            +
            # So: a syntactic node is a SynNode object, its children are SynNode objects. The edges
         
     | 
| 
      
 49 
     | 
    
         
            +
            # to its children are labeled the same way as in the XML structure. If the children
         
     | 
| 
      
 50 
     | 
    
         
            +
            # are splitword parts, the edges are unlabeled.
         
     | 
| 
      
 51 
     | 
    
         
            +
            #
         
     | 
| 
      
 52 
     | 
    
         
            +
            # A frame is a FrameNode object, its children are FeNode objects. The edges to its children
         
     | 
| 
      
 53 
     | 
    
         
            +
            # are labeled with the FE name or with "target".
         
     | 
| 
      
 54 
     | 
    
         
            +
            #
         
     | 
| 
      
 55 
     | 
    
         
            +
            # A frame element is an FeNode object, its children are SynNode objects. The edges to its
         
     | 
| 
      
 56 
     | 
    
         
            +
            # children are unlabeled.
         
     | 
| 
      
 57 
     | 
    
         
            +
            #
         
     | 
| 
      
 58 
     | 
    
         
            +
            # A frame underspecification is an UspNode object, its children are FrameNode objects.
         
     | 
| 
      
 59 
     | 
    
         
            +
            # The edges to its children are unlabeled.
         
     | 
| 
      
 60 
     | 
    
         
            +
            #
         
     | 
| 
      
 61 
     | 
    
         
            +
            # A frame element underspecification is an UspNode objects, its children are
         
     | 
| 
      
 62 
     | 
    
         
            +
            # FeNode objects. The edges to its children are unlabeled.
         
     | 
| 
      
 63 
     | 
    
         
            +
             
     | 
| 
      
 64 
     | 
    
         
            +
            require "common/Tree"
         
     | 
| 
      
 65 
     | 
    
         
            +
            require "common/STXmlTerminalOrder"
         
     | 
| 
      
 66 
     | 
    
         
            +
            require "common/RegXML"
         
     | 
| 
      
 67 
     | 
    
         
            +
            require "common/ruby_class_extensions"
         
     | 
| 
      
 68 
     | 
    
         
            +
             
     | 
| 
      
 69 
     | 
    
         
            +
            #############
         
     | 
| 
      
 70 
     | 
    
         
            +
            # class XMLNode
         
     | 
| 
      
 71 
     | 
    
         
            +
            # 
         
     | 
| 
      
 72 
     | 
    
         
            +
            # node with entries pointing to its children
         
     | 
| 
      
 73 
     | 
    
         
            +
            # as well as its parent. 
         
     | 
| 
      
 74 
     | 
    
         
            +
            # all edges may be labeled.
         
     | 
| 
      
 75 
     | 
    
         
            +
            # each node has a unique ID.
         
     | 
| 
      
 76 
     | 
    
         
            +
            # 
         
     | 
| 
      
 77 
     | 
    
         
            +
            # indexes a string with XML data representing the same node, 
         
     | 
| 
      
 78 
     | 
    
         
            +
            # but does not look into it, just keeps it
         
     | 
| 
      
 79 
     | 
    
         
            +
            # 
         
     | 
| 
      
 80 
     | 
    
         
            +
            # methods:
         
     | 
| 
      
 81 
     | 
    
         
            +
            # This class inherits from TreeNode and GraphNode. 
         
     | 
| 
      
 82 
     | 
    
         
            +
            # See Tree.rb and Graph.rb for the methods they offer.
         
     | 
| 
      
 83 
     | 
    
         
            +
            #
         
     | 
| 
      
 84 
     | 
    
         
            +
            # new        initializes the object
         
     | 
| 
      
 85 
     | 
    
         
            +
            #
         
     | 
| 
      
 86 
     | 
    
         
            +
            # get        returns the XML object representing
         
     | 
| 
      
 87 
     | 
    
         
            +
            #            the same node as this node object
         
     | 
| 
      
 88 
     | 
    
         
            +
            #
         
     | 
| 
      
 89 
     | 
    
         
            +
             
     | 
| 
      
 90 
     | 
    
         
            +
            class XMLNode < TreeNode
         
     | 
| 
      
 91 
     | 
    
         
            +
             
     | 
| 
      
 92 
     | 
    
         
            +
              ###
         
     | 
| 
      
 93 
     | 
    
         
            +
              def initialize(name,        # string: element name; or, for text, the whole text
         
     | 
| 
      
 94 
     | 
    
         
            +
                             attribute,   # hash: attr_name(string) -> attr_value(string)
         
     | 
| 
      
 95 
     | 
    
         
            +
                             id,          # string: node ID
         
     | 
| 
      
 96 
     | 
    
         
            +
                             i_am_text = false) # boolean: set to anything but false or nil
         
     | 
| 
      
 97 
     | 
    
         
            +
                                          # to represent not an xml element but text
         
     | 
| 
      
 98 
     | 
    
         
            +
             
     | 
| 
      
 99 
     | 
    
         
            +
                if id.nil?
         
     | 
| 
      
 100 
     | 
    
         
            +
                  # I wasn't given any ID
         
     | 
| 
      
 101 
     | 
    
         
            +
                  # take system time for an ID
         
     | 
| 
      
 102 
     | 
    
         
            +
                  # use to_f to get fractions of seconds too:
         
     | 
| 
      
 103 
     | 
    
         
            +
                  # If I make several nodes in the same second,
         
     | 
| 
      
 104 
     | 
    
         
            +
                  # they should still have unique IDs
         
     | 
| 
      
 105 
     | 
    
         
            +
                  id = Time.new().to_f.to_s
         
     | 
| 
      
 106 
     | 
    
         
            +
                end
         
     | 
| 
      
 107 
     | 
    
         
            +
             
     | 
| 
      
 108 
     | 
    
         
            +
                super(id)
         
     | 
| 
      
 109 
     | 
    
         
            +
             
     | 
| 
      
 110 
     | 
    
         
            +
                # remember values for this element
         
     | 
| 
      
 111 
     | 
    
         
            +
                set_f("name", name)
         
     | 
| 
      
 112 
     | 
    
         
            +
                set_f("attributes", attribute)
         
     | 
| 
      
 113 
     | 
    
         
            +
                set_f("i_am_text", i_am_text)
         
     | 
| 
      
 114 
     | 
    
         
            +
             
     | 
| 
      
 115 
     | 
    
         
            +
                # sanity check
         
     | 
| 
      
 116 
     | 
    
         
            +
                if i_am_text and attributes
         
     | 
| 
      
 117 
     | 
    
         
            +
                  raise "A text element cannot have attributes"
         
     | 
| 
      
 118 
     | 
    
         
            +
                end
         
     | 
| 
      
 119 
     | 
    
         
            +
             
     | 
| 
      
 120 
     | 
    
         
            +
                @kith = Array.new()
         
     | 
| 
      
 121 
     | 
    
         
            +
              end
         
     | 
| 
      
 122 
     | 
    
         
            +
             
     | 
| 
      
 123 
     | 
    
         
            +
              ###
         
     | 
| 
      
 124 
     | 
    
         
            +
              # add sanity check:
         
     | 
| 
      
 125 
     | 
    
         
            +
              # if this is text rather than an xml element,
         
     | 
| 
      
 126 
     | 
    
         
            +
              # it cannot have children
         
     | 
| 
      
 127 
     | 
    
         
            +
              def add_child(child, edgelabel, varhash={})
         
     | 
| 
      
 128 
     | 
    
         
            +
                if get_f("i_am_text")
         
     | 
| 
      
 129 
     | 
    
         
            +
                  raise "A text element cannot have children"
         
     | 
| 
      
 130 
     | 
    
         
            +
                end
         
     | 
| 
      
 131 
     | 
    
         
            +
                super(child, edgelabel, varhash)
         
     | 
| 
      
 132 
     | 
    
         
            +
              end
         
     | 
| 
      
 133 
     | 
    
         
            +
             
     | 
| 
      
 134 
     | 
    
         
            +
              ###
         
     | 
| 
      
 135 
     | 
    
         
            +
              def add_kith(xml) # RegXML object
         
     | 
| 
      
 136 
     | 
    
         
            +
                @kith << xml
         
     | 
| 
      
 137 
     | 
    
         
            +
              end
         
     | 
| 
      
 138 
     | 
    
         
            +
             
     | 
| 
      
 139 
     | 
    
         
            +
              ###
         
     | 
| 
      
 140 
     | 
    
         
            +
              # set attribute
         
     | 
| 
      
 141 
     | 
    
         
            +
              def set_attribute(name, value)
         
     | 
| 
      
 142 
     | 
    
         
            +
                unless value.class == String
         
     | 
| 
      
 143 
     | 
    
         
            +
                  raise "I can only set attribute values to strings. Got: #{value.class.to_s}"
         
     | 
| 
      
 144 
     | 
    
         
            +
                end
         
     | 
| 
      
 145 
     | 
    
         
            +
             
     | 
| 
      
 146 
     | 
    
         
            +
                if get_f("attributes").nil?
         
     | 
| 
      
 147 
     | 
    
         
            +
                  set_f("attributes", Hash.new())
         
     | 
| 
      
 148 
     | 
    
         
            +
                end
         
     | 
| 
      
 149 
     | 
    
         
            +
                get_f("attributes")[name] = value
         
     | 
| 
      
 150 
     | 
    
         
            +
              end
         
     | 
| 
      
 151 
     | 
    
         
            +
             
     | 
| 
      
 152 
     | 
    
         
            +
              ###
         
     | 
| 
      
 153 
     | 
    
         
            +
              def get_attribute(name)
         
     | 
| 
      
 154 
     | 
    
         
            +
                if get_f("attributes")
         
     | 
| 
      
 155 
     | 
    
         
            +
                  return get_f("attributes")[name]
         
     | 
| 
      
 156 
     | 
    
         
            +
                else
         
     | 
| 
      
 157 
     | 
    
         
            +
                  return nil
         
     | 
| 
      
 158 
     | 
    
         
            +
                end
         
     | 
| 
      
 159 
     | 
    
         
            +
              end
         
     | 
| 
      
 160 
     | 
    
         
            +
             
     | 
| 
      
 161 
     | 
    
         
            +
              ###
         
     | 
| 
      
 162 
     | 
    
         
            +
              # delete attribute
         
     | 
| 
      
 163 
     | 
    
         
            +
              def del_attribute(name)
         
     | 
| 
      
 164 
     | 
    
         
            +
                if get_f("attributes")
         
     | 
| 
      
 165 
     | 
    
         
            +
                  get_f("attributes").delete(name)
         
     | 
| 
      
 166 
     | 
    
         
            +
                end
         
     | 
| 
      
 167 
     | 
    
         
            +
              end  
         
     | 
| 
      
 168 
     | 
    
         
            +
             
     | 
| 
      
 169 
     | 
    
         
            +
              ###
         
     | 
| 
      
 170 
     | 
    
         
            +
              # return XML as string:
         
     | 
| 
      
 171 
     | 
    
         
            +
              # If this is a text, just return the text
         
     | 
| 
      
 172 
     | 
    
         
            +
              # which is stored in "name"
         
     | 
| 
      
 173 
     | 
    
         
            +
              # If this is an XMl element,
         
     | 
| 
      
 174 
     | 
    
         
            +
              # make a tag from its name and attributes,
         
     | 
| 
      
 175 
     | 
    
         
            +
              # then add tags for all its children,
         
     | 
| 
      
 176 
     | 
    
         
            +
              # then add an end tag.
         
     | 
| 
      
 177 
     | 
    
         
            +
              def get()
         
     | 
| 
      
 178 
     | 
    
         
            +
                if get_f("i_am_text")
         
     | 
| 
      
 179 
     | 
    
         
            +
                  # text rather than XML element
         
     | 
| 
      
 180 
     | 
    
         
            +
                  return get_f("name")
         
     | 
| 
      
 181 
     | 
    
         
            +
                else
         
     | 
| 
      
 182 
     | 
    
         
            +
                  # XMl element, not text
         
     | 
| 
      
 183 
     | 
    
         
            +
                  string = "<" + get_f("name")
         
     | 
| 
      
 184 
     | 
    
         
            +
                  if get_f("attributes")
         
     | 
| 
      
 185 
     | 
    
         
            +
                    string << get_f("attributes").to_a.map { |name, value|
         
     | 
| 
      
 186 
     | 
    
         
            +
                      " " + name + "=\'" + xml_secure_val(value) + "\'"
         
     | 
| 
      
 187 
     | 
    
         
            +
                    }.join()
         
     | 
| 
      
 188 
     | 
    
         
            +
                  end
         
     | 
| 
      
 189 
     | 
    
         
            +
                  string << ">\n"
         
     | 
| 
      
 190 
     | 
    
         
            +
                  string << get_xml_embedded()
         
     | 
| 
      
 191 
     | 
    
         
            +
                  string << "</#{get_f("name")}>\n"
         
     | 
| 
      
 192 
     | 
    
         
            +
                  return string
         
     | 
| 
      
 193 
     | 
    
         
            +
                end
         
     | 
| 
      
 194 
     | 
    
         
            +
              end
         
     | 
| 
      
 195 
     | 
    
         
            +
              
         
     | 
| 
      
 196 
     | 
    
         
            +
              #############
         
     | 
| 
      
 197 
     | 
    
         
            +
              protected
         
     | 
| 
      
 198 
     | 
    
         
            +
             
     | 
| 
      
 199 
     | 
    
         
            +
              def get_xml_embedded()
         
     | 
| 
      
 200 
     | 
    
         
            +
                return get_xml_ofchildren() +
         
     | 
| 
      
 201 
     | 
    
         
            +
                       get_xml_ofkith()
         
     | 
| 
      
 202 
     | 
    
         
            +
              end
         
     | 
| 
      
 203 
     | 
    
         
            +
             
     | 
| 
      
 204 
     | 
    
         
            +
             
     | 
| 
      
 205 
     | 
    
         
            +
              def get_xml_ofchildren()
         
     | 
| 
      
 206 
     | 
    
         
            +
                return children.map { |child|
         
     | 
| 
      
 207 
     | 
    
         
            +
                  child.get()
         
     | 
| 
      
 208 
     | 
    
         
            +
                }.join()
         
     | 
| 
      
 209 
     | 
    
         
            +
              end
         
     | 
| 
      
 210 
     | 
    
         
            +
             
     | 
| 
      
 211 
     | 
    
         
            +
             
     | 
| 
      
 212 
     | 
    
         
            +
              def get_xml_ofkith()
         
     | 
| 
      
 213 
     | 
    
         
            +
                return @kith.map { |thing| thing.to_s + "\n" }.join()
         
     | 
| 
      
 214 
     | 
    
         
            +
              end  
         
     | 
| 
      
 215 
     | 
    
         
            +
                
         
     | 
| 
      
 216 
     | 
    
         
            +
             
     | 
| 
      
 217 
     | 
    
         
            +
              ###
         
     | 
| 
      
 218 
     | 
    
         
            +
              def warn_child_ignored(where, xml_node)
         
     | 
| 
      
 219 
     | 
    
         
            +
                $stderr.puts "WARNING: additional material found in #{where}, will be ignored:"
         
     | 
| 
      
 220 
     | 
    
         
            +
                $stderr.puts "\t" + xml_node.to_s
         
     | 
| 
      
 221 
     | 
    
         
            +
              end
         
     | 
| 
      
 222 
     | 
    
         
            +
              
         
     | 
| 
      
 223 
     | 
    
         
            +
              ###
         
     | 
| 
      
 224 
     | 
    
         
            +
              def xml_secure_val(value) # string: value of an attribute
         
     | 
| 
      
 225 
     | 
    
         
            +
                return value.gsub(/'/, "'").gsub(/"/, "''")
         
     | 
| 
      
 226 
     | 
    
         
            +
                return value
         
     | 
| 
      
 227 
     | 
    
         
            +
              end
         
     | 
| 
      
 228 
     | 
    
         
            +
            end
         
     | 
| 
      
 229 
     | 
    
         
            +
             
     | 
| 
      
 230 
     | 
    
         
            +
            #############
         
     | 
| 
      
 231 
     | 
    
         
            +
            # class SalsaTigerXmlNode
         
     | 
| 
      
 232 
     | 
    
         
            +
            #
         
     | 
| 
      
 233 
     | 
    
         
            +
            # additional methods:
         
     | 
| 
      
 234 
     | 
    
         
            +
            #
         
     | 
| 
      
 235 
     | 
    
         
            +
            # is_terminal?    true if this is a Tiger XML terminal node
         
     | 
| 
      
 236 
     | 
    
         
            +
            #
         
     | 
| 
      
 237 
     | 
    
         
            +
            # is_nonterminal? true if this is a Tiger XML nonterminal node
         
     | 
| 
      
 238 
     | 
    
         
            +
            #
         
     | 
| 
      
 239 
     | 
    
         
            +
            # is_splitword?   true if this is a splitword part
         
     | 
| 
      
 240 
     | 
    
         
            +
            #
         
     | 
| 
      
 241 
     | 
    
         
            +
            # is_syntactic?   true for terminal, nonterminal, splitword
         
     | 
| 
      
 242 
     | 
    
         
            +
            #
         
     | 
| 
      
 243 
     | 
    
         
            +
            # is_frame?       true if this is a Salsa/Tiger XML frame
         
     | 
| 
      
 244 
     | 
    
         
            +
            #
         
     | 
| 
      
 245 
     | 
    
         
            +
            # is_target?      true if this is a Salsa/Tiger XML frame target
         
     | 
| 
      
 246 
     | 
    
         
            +
            #
         
     | 
| 
      
 247 
     | 
    
         
            +
            # is_fe?          true if this is a Salsa/Tiger XML frame element
         
     | 
| 
      
 248 
     | 
    
         
            +
            #
         
     | 
| 
      
 249 
     | 
    
         
            +
            # is_outside_sentence? returns false -- this node is not a placeholder for
         
     | 
| 
      
 250 
     | 
    
         
            +
            #                 a node that is outside the current sentence
         
     | 
| 
      
 251 
     | 
    
         
            +
            #                 (but see descendant class TSSynNode)
         
     | 
| 
      
 252 
     | 
    
         
            +
            #
         
     | 
| 
      
 253 
     | 
    
         
            +
            # yield_nodes     returns the list of descendants thatare leaves of the tree
         
     | 
| 
      
 254 
     | 
    
         
            +
            #                 NOTE: this overwrites the Graph.yield_nodes method
         
     | 
| 
      
 255 
     | 
    
         
            +
            #                 since we have to treat splitwords in a special way
         
     | 
| 
      
 256 
     | 
    
         
            +
            #                 empty array if no yield nodes are present
         
     | 
| 
      
 257 
     | 
    
         
            +
            #
         
     | 
| 
      
 258 
     | 
    
         
            +
            # yield_nodes_ordered returns those descendants ordered by precedence 
         
     | 
| 
      
 259 
     | 
    
         
            +
            #                 in the sentence, i.e. their node IDs.
         
     | 
| 
      
 260 
     | 
    
         
            +
            # 
         
     | 
| 
      
 261 
     | 
    
         
            +
            # sid             returns the sentence ID of this node
         
     | 
| 
      
 262 
     | 
    
         
            +
            #
         
     | 
| 
      
 263 
     | 
    
         
            +
            # to_s            returns the yield of this node as a string of space-separated words
         
     | 
| 
      
 264 
     | 
    
         
            +
            #                 words ordered left to right
         
     | 
| 
      
 265 
     | 
    
         
            +
            # 
         
     | 
| 
      
 266 
     | 
    
         
            +
            class SalsaTigerXmlNode < XMLNode
         
     | 
| 
      
 267 
     | 
    
         
            +
              include StringTerminalsInRightOrder
         
     | 
| 
      
 268 
     | 
    
         
            +
             
     | 
| 
      
 269 
     | 
    
         
            +
              ###
         
     | 
| 
      
 270 
     | 
    
         
            +
              # extracting the ID from a RegXML element
         
     | 
| 
      
 271 
     | 
    
         
            +
              # depends on whether it has an ID or an IDref
         
     | 
| 
      
 272 
     | 
    
         
            +
              #
         
     | 
| 
      
 273 
     | 
    
         
            +
              # returns: a string, the ID, or nil if none was found
         
     | 
| 
      
 274 
     | 
    
         
            +
              def SalsaTigerXmlNode.xmlel_id(xml_obj) # RegXML object
         
     | 
| 
      
 275 
     | 
    
         
            +
                case xml_obj.name
         
     | 
| 
      
 276 
     | 
    
         
            +
                when "edge", "fenode", "uspitem", "splitword", "other_edge"
         
     | 
| 
      
 277 
     | 
    
         
            +
                  # contains ID ref
         
     | 
| 
      
 278 
     | 
    
         
            +
                  return xml_obj.attributes()["idref"]
         
     | 
| 
      
 279 
     | 
    
         
            +
                when "part"
         
     | 
| 
      
 280 
     | 
    
         
            +
                  #  contains ID
         
     | 
| 
      
 281 
     | 
    
         
            +
                  return xml_obj.attributes()["id"]
         
     | 
| 
      
 282 
     | 
    
         
            +
                else
         
     | 
| 
      
 283 
     | 
    
         
            +
                  # something else
         
     | 
| 
      
 284 
     | 
    
         
            +
                  # default: ID is in attribute "id"
         
     | 
| 
      
 285 
     | 
    
         
            +
                  return xml_obj.attributes()["id"]
         
     | 
| 
      
 286 
     | 
    
         
            +
                end
         
     | 
| 
      
 287 
     | 
    
         
            +
              end
         
     | 
| 
      
 288 
     | 
    
         
            +
             
     | 
| 
      
 289 
     | 
    
         
            +
              ###
         
     | 
| 
      
 290 
     | 
    
         
            +
              def initialize(xml) # RegXML object or text
         
     | 
| 
      
 291 
     | 
    
         
            +
                if xml.text?
         
     | 
| 
      
 292 
     | 
    
         
            +
                  # text
         
     | 
| 
      
 293 
     | 
    
         
            +
                  super(xml, nil, nil, true)
         
     | 
| 
      
 294 
     | 
    
         
            +
                else
         
     | 
| 
      
 295 
     | 
    
         
            +
                  # xml element
         
     | 
| 
      
 296 
     | 
    
         
            +
                  super(xml.name(), xml.attributes(), SalsaTigerXmlNode.xmlel_id(xml), false)
         
     | 
| 
      
 297 
     | 
    
         
            +
                end
         
     | 
| 
      
 298 
     | 
    
         
            +
              end
         
     | 
| 
      
 299 
     | 
    
         
            +
             
     | 
| 
      
 300 
     | 
    
         
            +
              ###
         
     | 
| 
      
 301 
     | 
    
         
            +
              def is_terminal?
         
     | 
| 
      
 302 
     | 
    
         
            +
                return get_f("name") == "t"
         
     | 
| 
      
 303 
     | 
    
         
            +
              end
         
     | 
| 
      
 304 
     | 
    
         
            +
              
         
     | 
| 
      
 305 
     | 
    
         
            +
              ###
         
     | 
| 
      
 306 
     | 
    
         
            +
              def is_nonterminal?
         
     | 
| 
      
 307 
     | 
    
         
            +
                return get_f("name") == "nt"
         
     | 
| 
      
 308 
     | 
    
         
            +
              end
         
     | 
| 
      
 309 
     | 
    
         
            +
             
     | 
| 
      
 310 
     | 
    
         
            +
              ###
         
     | 
| 
      
 311 
     | 
    
         
            +
              def is_splitword?
         
     | 
| 
      
 312 
     | 
    
         
            +
                return get_f("name") == "part"
         
     | 
| 
      
 313 
     | 
    
         
            +
              end
         
     | 
| 
      
 314 
     | 
    
         
            +
             
     | 
| 
      
 315 
     | 
    
         
            +
              ###
         
     | 
| 
      
 316 
     | 
    
         
            +
              def is_syntactic?
         
     | 
| 
      
 317 
     | 
    
         
            +
                if is_terminal? or is_nonterminal? or is_splitword?
         
     | 
| 
      
 318 
     | 
    
         
            +
                  return true
         
     | 
| 
      
 319 
     | 
    
         
            +
                else
         
     | 
| 
      
 320 
     | 
    
         
            +
                  return false
         
     | 
| 
      
 321 
     | 
    
         
            +
                end
         
     | 
| 
      
 322 
     | 
    
         
            +
              end
         
     | 
| 
      
 323 
     | 
    
         
            +
             
     | 
| 
      
 324 
     | 
    
         
            +
              ###
         
     | 
| 
      
 325 
     | 
    
         
            +
              def is_frame?
         
     | 
| 
      
 326 
     | 
    
         
            +
                return get_f("name") == "frame"
         
     | 
| 
      
 327 
     | 
    
         
            +
              end
         
     | 
| 
      
 328 
     | 
    
         
            +
             
     | 
| 
      
 329 
     | 
    
         
            +
              ###
         
     | 
| 
      
 330 
     | 
    
         
            +
              def is_target?
         
     | 
| 
      
 331 
     | 
    
         
            +
                return get_f("name") == "target"
         
     | 
| 
      
 332 
     | 
    
         
            +
              end
         
     | 
| 
      
 333 
     | 
    
         
            +
              
         
     | 
| 
      
 334 
     | 
    
         
            +
              ###
         
     | 
| 
      
 335 
     | 
    
         
            +
              def is_fe?
         
     | 
| 
      
 336 
     | 
    
         
            +
                return get_f("name") == "fe"
         
     | 
| 
      
 337 
     | 
    
         
            +
              end
         
     | 
| 
      
 338 
     | 
    
         
            +
             
     | 
| 
      
 339 
     | 
    
         
            +
              ###
         
     | 
| 
      
 340 
     | 
    
         
            +
              def sid()
         
     | 
| 
      
 341 
     | 
    
         
            +
                # my node ID starts out with the sentence ID
         
     | 
| 
      
 342 
     | 
    
         
            +
                id =~ /^(.*?)_/
         
     | 
| 
      
 343 
     | 
    
         
            +
                return $1
         
     | 
| 
      
 344 
     | 
    
         
            +
              end
         
     | 
| 
      
 345 
     | 
    
         
            +
             
     | 
| 
      
 346 
     | 
    
         
            +
              ###
         
     | 
| 
      
 347 
     | 
    
         
            +
              def is_outside_sentence?
         
     | 
| 
      
 348 
     | 
    
         
            +
                return false
         
     | 
| 
      
 349 
     | 
    
         
            +
              end
         
     | 
| 
      
 350 
     | 
    
         
            +
             
     | 
| 
      
 351 
     | 
    
         
            +
              ###
         
     | 
| 
      
 352 
     | 
    
         
            +
              def yield_nodes()
         
     | 
| 
      
 353 
     | 
    
         
            +
                # special consideration: splitwords do not count as children!
         
     | 
| 
      
 354 
     | 
    
         
            +
                if children.reject {|c| c.is_splitword? }.empty?
         
     | 
| 
      
 355 
     | 
    
         
            +
                  return [ self ]
         
     | 
| 
      
 356 
     | 
    
         
            +
                end
         
     | 
| 
      
 357 
     | 
    
         
            +
             
     | 
| 
      
 358 
     | 
    
         
            +
                arr = Array.new
         
     | 
| 
      
 359 
     | 
    
         
            +
                children.reject { |c| c.is_splitword? }.each { |c| 
         
     | 
| 
      
 360 
     | 
    
         
            +
                  if c.children.reject {|gc| gc.is_splitword? }.empty?
         
     | 
| 
      
 361 
     | 
    
         
            +
            	arr << c
         
     | 
| 
      
 362 
     | 
    
         
            +
                  else
         
     | 
| 
      
 363 
     | 
    
         
            +
            	arr.concat c.yield_nodes()
         
     | 
| 
      
 364 
     | 
    
         
            +
                  end
         
     | 
| 
      
 365 
     | 
    
         
            +
                }
         
     | 
| 
      
 366 
     | 
    
         
            +
                return arr
         
     | 
| 
      
 367 
     | 
    
         
            +
              end
         
     | 
| 
      
 368 
     | 
    
         
            +
             
     | 
| 
      
 369 
     | 
    
         
            +
              ###
         
     | 
| 
      
 370 
     | 
    
         
            +
              def yield_nodes_ordered() # legacy name
         
     | 
| 
      
 371 
     | 
    
         
            +
                # sort_terminals_and_splitwords_... cannot deal with nonterminals
         
     | 
| 
      
 372 
     | 
    
         
            +
                # so remove and attach to the end of the chain
         
     | 
| 
      
 373 
     | 
    
         
            +
                t, nt  = yield_nodes().distribute { |x| x.is_terminal? or x.is_splitword? }
         
     | 
| 
      
 374 
     | 
    
         
            +
                return sort_terminals_and_splitwords_left_to_right(t).concat(nt)
         
     | 
| 
      
 375 
     | 
    
         
            +
              end
         
     | 
| 
      
 376 
     | 
    
         
            +
             
     | 
| 
      
 377 
     | 
    
         
            +
              ###
         
     | 
| 
      
 378 
     | 
    
         
            +
              def terminals_sorted() # name parallel to the method of SalsaTigerSentence
         
     | 
| 
      
 379 
     | 
    
         
            +
                return yield_nodes_ordered()
         
     | 
| 
      
 380 
     | 
    
         
            +
              end
         
     | 
| 
      
 381 
     | 
    
         
            +
             
     | 
| 
      
 382 
     | 
    
         
            +
              ###
         
     | 
| 
      
 383 
     | 
    
         
            +
              def to_s
         
     | 
| 
      
 384 
     | 
    
         
            +
                return string_for_node(self)
         
     | 
| 
      
 385 
     | 
    
         
            +
              end
         
     | 
| 
      
 386 
     | 
    
         
            +
            end
         
     | 
| 
      
 387 
     | 
    
         
            +
             
     | 
| 
      
 388 
     | 
    
         
            +
            #############
         
     | 
| 
      
 389 
     | 
    
         
            +
            # class SynNode
         
     | 
| 
      
 390 
     | 
    
         
            +
            #
         
     | 
| 
      
 391 
     | 
    
         
            +
            # inherits from SalsaTigerXmlNode,
         
     | 
| 
      
 392 
     | 
    
         
            +
            # adds to it methods specific to nodes
         
     | 
| 
      
 393 
     | 
    
         
            +
            # that describe the syntactic structure
         
     | 
| 
      
 394 
     | 
    
         
            +
            #
         
     | 
| 
      
 395 
     | 
    
         
            +
            # additional/changed methods:
         
     | 
| 
      
 396 
     | 
    
         
            +
            #
         
     | 
| 
      
 397 
     | 
    
         
            +
            # part_of_speech  part_of_speech information as a string,
         
     | 
| 
      
 398 
     | 
    
         
            +
            #         nil for anything but terminal nodes
         
     | 
| 
      
 399 
     | 
    
         
            +
            #
         
     | 
| 
      
 400 
     | 
    
         
            +
            # word    word information for this node as a string,
         
     | 
| 
      
 401 
     | 
    
         
            +
            #         nil for anything but terminal nodes
         
     | 
| 
      
 402 
     | 
    
         
            +
            #
         
     | 
| 
      
 403 
     | 
    
         
            +
            # category category information for this node as a string,
         
     | 
| 
      
 404 
     | 
    
         
            +
            #         nil for anything but nonterminal nodes
         
     | 
| 
      
 405 
     | 
    
         
            +
            #
         
     | 
| 
      
 406 
     | 
    
         
            +
            # is_punct?       true if this is a terminal node and it is a punctuation sign 
         
     | 
| 
      
 407 
     | 
    
         
            +
            #
         
     | 
| 
      
 408 
     | 
    
         
            +
            # get_sem  add a non-tree edge from this syntactic node to a semantic node
         
     | 
| 
      
 409 
     | 
    
         
            +
            #         Idea: this is basically the inverse of the edge pointing from
         
     | 
| 
      
 410 
     | 
    
         
            +
            #         the FeNode to this SynNode, so you can fetch a node's semantics directly
         
     | 
| 
      
 411 
     | 
    
         
            +
            #
         
     | 
| 
      
 412 
     | 
    
         
            +
            # add_sem add non-tree edge from this syntactic node to a FeNode
         
     | 
| 
      
 413 
     | 
    
         
            +
             
     | 
| 
      
 414 
     | 
    
         
            +
            class SynNode <  SalsaTigerXmlNode
         
     | 
| 
      
 415 
     | 
    
         
            +
             
     | 
| 
      
 416 
     | 
    
         
            +
              ###
         
     | 
| 
      
 417 
     | 
    
         
            +
              def initialize(xml)
         
     | 
| 
      
 418 
     | 
    
         
            +
                super(xml)
         
     | 
| 
      
 419 
     | 
    
         
            +
             
     | 
| 
      
 420 
     | 
    
         
            +
                @sem = Array.new
         
     | 
| 
      
 421 
     | 
    
         
            +
                @other_links = Array.new
         
     | 
| 
      
 422 
     | 
    
         
            +
              end
         
     | 
| 
      
 423 
     | 
    
         
            +
             
     | 
| 
      
 424 
     | 
    
         
            +
              ###
         
     | 
| 
      
 425 
     | 
    
         
            +
              def add_link(other_node,        # SynNode
         
     | 
| 
      
 426 
     | 
    
         
            +
                           link_label,        # string: edge label
         
     | 
| 
      
 427 
     | 
    
         
            +
                           attributes = {})   # hash string>string: further attribute-value pairs for the edge
         
     | 
| 
      
 428 
     | 
    
         
            +
             
     | 
| 
      
 429 
     | 
    
         
            +
                @other_links << [link_label, other_node, attributes]
         
     | 
| 
      
 430 
     | 
    
         
            +
              end
         
     | 
| 
      
 431 
     | 
    
         
            +
             
     | 
| 
      
 432 
     | 
    
         
            +
              ###
         
     | 
| 
      
 433 
     | 
    
         
            +
              def get_linked(label = nil)  # string/nil: if string, use only linked with this link_label
         
     | 
| 
      
 434 
     | 
    
         
            +
                if label
         
     | 
| 
      
 435 
     | 
    
         
            +
                  return @other_links.select { |label_node_attr| label_node_attr.first == label }
         
     | 
| 
      
 436 
     | 
    
         
            +
                else
         
     | 
| 
      
 437 
     | 
    
         
            +
                  return @other_links
         
     | 
| 
      
 438 
     | 
    
         
            +
                end
         
     | 
| 
      
 439 
     | 
    
         
            +
              end
         
     | 
| 
      
 440 
     | 
    
         
            +
             
     | 
| 
      
 441 
     | 
    
         
            +
              ###
         
     | 
| 
      
 442 
     | 
    
         
            +
              def part_of_speech
         
     | 
| 
      
 443 
     | 
    
         
            +
                if get_attribute("pos")
         
     | 
| 
      
 444 
     | 
    
         
            +
                  return get_attribute("pos").strip
         
     | 
| 
      
 445 
     | 
    
         
            +
                else
         
     | 
| 
      
 446 
     | 
    
         
            +
                  return nil
         
     | 
| 
      
 447 
     | 
    
         
            +
                end
         
     | 
| 
      
 448 
     | 
    
         
            +
              end
         
     | 
| 
      
 449 
     | 
    
         
            +
             
     | 
| 
      
 450 
     | 
    
         
            +
              ###
         
     | 
| 
      
 451 
     | 
    
         
            +
              def category
         
     | 
| 
      
 452 
     | 
    
         
            +
                if get_attribute("cat")
         
     | 
| 
      
 453 
     | 
    
         
            +
                  return get_attribute("cat").strip
         
     | 
| 
      
 454 
     | 
    
         
            +
                else
         
     | 
| 
      
 455 
     | 
    
         
            +
                  return nil
         
     | 
| 
      
 456 
     | 
    
         
            +
                end
         
     | 
| 
      
 457 
     | 
    
         
            +
              end
         
     | 
| 
      
 458 
     | 
    
         
            +
             
     | 
| 
      
 459 
     | 
    
         
            +
              ###
         
     | 
| 
      
 460 
     | 
    
         
            +
              def word()
         
     | 
| 
      
 461 
     | 
    
         
            +
                if get_attribute("word")
         
     | 
| 
      
 462 
     | 
    
         
            +
                  return get_attribute("word").strip
         
     | 
| 
      
 463 
     | 
    
         
            +
                else
         
     | 
| 
      
 464 
     | 
    
         
            +
                  return nil
         
     | 
| 
      
 465 
     | 
    
         
            +
                end
         
     | 
| 
      
 466 
     | 
    
         
            +
              end
         
     | 
| 
      
 467 
     | 
    
         
            +
             
     | 
| 
      
 468 
     | 
    
         
            +
              ###
         
     | 
| 
      
 469 
     | 
    
         
            +
              def is_punct?()
         
     | 
| 
      
 470 
     | 
    
         
            +
                if is_nonterminal?
         
     | 
| 
      
 471 
     | 
    
         
            +
                  # only terminals can be punctuation signs
         
     | 
| 
      
 472 
     | 
    
         
            +
                  return false 
         
     | 
| 
      
 473 
     | 
    
         
            +
                end
         
     | 
| 
      
 474 
     | 
    
         
            +
             
     | 
| 
      
 475 
     | 
    
         
            +
                # next check part of speech
         
     | 
| 
      
 476 
     | 
    
         
            +
                # this works at least for TIGER corpus annotation
         
     | 
| 
      
 477 
     | 
    
         
            +
                case part_of_speech 
         
     | 
| 
      
 478 
     | 
    
         
            +
                when '$.', '$,', '$('
         
     | 
| 
      
 479 
     | 
    
         
            +
                  return true
         
     | 
| 
      
 480 
     | 
    
         
            +
                end
         
     | 
| 
      
 481 
     | 
    
         
            +
                if part_of_speech =~ /^PUNC/
         
     | 
| 
      
 482 
     | 
    
         
            +
                  return true
         
     | 
| 
      
 483 
     | 
    
         
            +
                end
         
     | 
| 
      
 484 
     | 
    
         
            +
             
     | 
| 
      
 485 
     | 
    
         
            +
                # known punctuation signs: filtered out for determining maximal constituents
         
     | 
| 
      
 486 
     | 
    
         
            +
             
     | 
| 
      
 487 
     | 
    
         
            +
                # no luck with part of speech:
         
     | 
| 
      
 488 
     | 
    
         
            +
                # check word
         
     | 
| 
      
 489 
     | 
    
         
            +
                case word
         
     | 
| 
      
 490 
     | 
    
         
            +
                when ".", ";", ",", ":", "?", "!", "(", ")", "[", "]", "{", "}", "-", "''", "``", "\"", "'"
         
     | 
| 
      
 491 
     | 
    
         
            +
                  return true
         
     | 
| 
      
 492 
     | 
    
         
            +
                end
         
     | 
| 
      
 493 
     | 
    
         
            +
             
     | 
| 
      
 494 
     | 
    
         
            +
                # not a punctuation sign by any of the tests we have applied
         
     | 
| 
      
 495 
     | 
    
         
            +
                return false
         
     | 
| 
      
 496 
     | 
    
         
            +
              end
         
     | 
| 
      
 497 
     | 
    
         
            +
             
     | 
| 
      
 498 
     | 
    
         
            +
              ###
         
     | 
| 
      
 499 
     | 
    
         
            +
              def to_s()
         
     | 
| 
      
 500 
     | 
    
         
            +
                if is_terminal?
         
     | 
| 
      
 501 
     | 
    
         
            +
                  return word
         
     | 
| 
      
 502 
     | 
    
         
            +
                else
         
     | 
| 
      
 503 
     | 
    
         
            +
                  return super()
         
     | 
| 
      
 504 
     | 
    
         
            +
                end
         
     | 
| 
      
 505 
     | 
    
         
            +
              end
         
     | 
| 
      
 506 
     | 
    
         
            +
             
     | 
| 
      
 507 
     | 
    
         
            +
              ###
         
     | 
| 
      
 508 
     | 
    
         
            +
              def get_sem()
         
     | 
| 
      
 509 
     | 
    
         
            +
                return @sem.clone()
         
     | 
| 
      
 510 
     | 
    
         
            +
              end
         
     | 
| 
      
 511 
     | 
    
         
            +
             
     | 
| 
      
 512 
     | 
    
         
            +
              ###
         
     | 
| 
      
 513 
     | 
    
         
            +
              def add_sem(fe_node)
         
     | 
| 
      
 514 
     | 
    
         
            +
                unless fe_node.class == FeNode
         
     | 
| 
      
 515 
     | 
    
         
            +
                  raise "Unexpected class of semantic node: was expecting an FeNode"
         
     | 
| 
      
 516 
     | 
    
         
            +
                end
         
     | 
| 
      
 517 
     | 
    
         
            +
             
     | 
| 
      
 518 
     | 
    
         
            +
                @sem << fe_node
         
     | 
| 
      
 519 
     | 
    
         
            +
              end
         
     | 
| 
      
 520 
     | 
    
         
            +
             
     | 
| 
      
 521 
     | 
    
         
            +
              #############
         
     | 
| 
      
 522 
     | 
    
         
            +
              protected
         
     | 
| 
      
 523 
     | 
    
         
            +
             
     | 
| 
      
 524 
     | 
    
         
            +
              def get_xml_ofchildren()
         
     | 
| 
      
 525 
     | 
    
         
            +
                string = ""
         
     | 
| 
      
 526 
     | 
    
         
            +
             
     | 
| 
      
 527 
     | 
    
         
            +
                each_child_with_edgelabel { |label, child|
         
     | 
| 
      
 528 
     | 
    
         
            +
                  unless child.is_splitword?
         
     | 
| 
      
 529 
     | 
    
         
            +
                    # terminal or nonterminal child.
         
     | 
| 
      
 530 
     | 
    
         
            +
                    # splitwords are handled separately in the "sem" part of the sentence
         
     | 
| 
      
 531 
     | 
    
         
            +
                    if label
         
     | 
| 
      
 532 
     | 
    
         
            +
                      string << "<edge label=\'#{xml_secure_val(label)}\' idref=\'#{xml_secure_val(child.id)}\'/>\n"
         
     | 
| 
      
 533 
     | 
    
         
            +
                    else
         
     | 
| 
      
 534 
     | 
    
         
            +
                      string << "<edge label=\'-\' idref=\'#{xml_secure_val(child.id)}\'/>\n"
         
     | 
| 
      
 535 
     | 
    
         
            +
                    end
         
     | 
| 
      
 536 
     | 
    
         
            +
                  end
         
     | 
| 
      
 537 
     | 
    
         
            +
                }
         
     | 
| 
      
 538 
     | 
    
         
            +
                @other_links.each { |label, node, attributes|
         
     | 
| 
      
 539 
     | 
    
         
            +
                  if label
         
     | 
| 
      
 540 
     | 
    
         
            +
                    string << "<other_edge label=\'#{xml_secure_val(label)}\'"
         
     | 
| 
      
 541 
     | 
    
         
            +
                  else
         
     | 
| 
      
 542 
     | 
    
         
            +
                    string << "<other_edge label=\'-\'"
         
     | 
| 
      
 543 
     | 
    
         
            +
                  end
         
     | 
| 
      
 544 
     | 
    
         
            +
                  string <<  " idref=\'#{xml_secure_val(node.id)}\'" 
         
     | 
| 
      
 545 
     | 
    
         
            +
                  if attributes
         
     | 
| 
      
 546 
     | 
    
         
            +
                    string << " " + attributes.to_a.map { |attr, val| "#{xml_secure_val(attr)}=\'#{xml_secure_val(val)}\'" }.join(" ")
         
     | 
| 
      
 547 
     | 
    
         
            +
                  end
         
     | 
| 
      
 548 
     | 
    
         
            +
                  string << "/>\n"
         
     | 
| 
      
 549 
     | 
    
         
            +
                }
         
     | 
| 
      
 550 
     | 
    
         
            +
                
         
     | 
| 
      
 551 
     | 
    
         
            +
                return string
         
     | 
| 
      
 552 
     | 
    
         
            +
              end
         
     | 
| 
      
 553 
     | 
    
         
            +
            end
         
     | 
| 
      
 554 
     | 
    
         
            +
             
     | 
| 
      
 555 
     | 
    
         
            +
            #############
         
     | 
| 
      
 556 
     | 
    
         
            +
            # class TSSynNode
         
     | 
| 
      
 557 
     | 
    
         
            +
            #
         
     | 
| 
      
 558 
     | 
    
         
            +
            # inherits from SynNode
         
     | 
| 
      
 559 
     | 
    
         
            +
            #
         
     | 
| 
      
 560 
     | 
    
         
            +
            # describes a syntactic node that isn't really there:
         
     | 
| 
      
 561 
     | 
    
         
            +
            # a reference to a node in another sentence
         
     | 
| 
      
 562 
     | 
    
         
            +
            #
         
     | 
| 
      
 563 
     | 
    
         
            +
            # contains that node's ID, but an empty RegXML object,
         
     | 
| 
      
 564 
     | 
    
         
            +
            # its string is "<unknown>", and you cannot add
         
     | 
| 
      
 565 
     | 
    
         
            +
            # a child to it
         
     | 
| 
      
 566 
     | 
    
         
            +
            #
         
     | 
| 
      
 567 
     | 
    
         
            +
            # new or changed methods:
         
     | 
| 
      
 568 
     | 
    
         
            +
            #-----------------------
         
     | 
| 
      
 569 
     | 
    
         
            +
            #
         
     | 
| 
      
 570 
     | 
    
         
            +
            # is_outside_sentence? returns true
         
     | 
| 
      
 571 
     | 
    
         
            +
            # 
         
     | 
| 
      
 572 
     | 
    
         
            +
            # word                 returns "<unknown>"
         
     | 
| 
      
 573 
     | 
    
         
            +
            #
         
     | 
| 
      
 574 
     | 
    
         
            +
            # add_child raises an error
         
     | 
| 
      
 575 
     | 
    
         
            +
             
     | 
| 
      
 576 
     | 
    
         
            +
            class TSSynNode < SynNode
         
     | 
| 
      
 577 
     | 
    
         
            +
             
     | 
| 
      
 578 
     | 
    
         
            +
              ###
         
     | 
| 
      
 579 
     | 
    
         
            +
              def initialize(id_string)
         
     | 
| 
      
 580 
     | 
    
         
            +
                super(RegXML.new("<OTHER_SENTENCE id='" + id_string + "'/>"))
         
     | 
| 
      
 581 
     | 
    
         
            +
              end
         
     | 
| 
      
 582 
     | 
    
         
            +
             
     | 
| 
      
 583 
     | 
    
         
            +
              ###
         
     | 
| 
      
 584 
     | 
    
         
            +
              def is_outside_sentence?
         
     | 
| 
      
 585 
     | 
    
         
            +
                return true
         
     | 
| 
      
 586 
     | 
    
         
            +
              end
         
     | 
| 
      
 587 
     | 
    
         
            +
             
     | 
| 
      
 588 
     | 
    
         
            +
              ###
         
     | 
| 
      
 589 
     | 
    
         
            +
              # word of this node: <unknown>
         
     | 
| 
      
 590 
     | 
    
         
            +
              def word
         
     | 
| 
      
 591 
     | 
    
         
            +
                return "<unknown>"
         
     | 
| 
      
 592 
     | 
    
         
            +
              end
         
     | 
| 
      
 593 
     | 
    
         
            +
             
     | 
| 
      
 594 
     | 
    
         
            +
              def add_child(arg1, arg2)
         
     | 
| 
      
 595 
     | 
    
         
            +
                raise "Not implemented for this class"
         
     | 
| 
      
 596 
     | 
    
         
            +
              end
         
     | 
| 
      
 597 
     | 
    
         
            +
            end
         
     | 
| 
      
 598 
     | 
    
         
            +
             
     | 
| 
      
 599 
     | 
    
         
            +
            #############
         
     | 
| 
      
 600 
     | 
    
         
            +
            # class SemNode
         
     | 
| 
      
 601 
     | 
    
         
            +
            #
         
     | 
| 
      
 602 
     | 
    
         
            +
            # common superclass for FrameNode and FeNode,
         
     | 
| 
      
 603 
     | 
    
         
            +
            # with methods that are the same for both:
         
     | 
| 
      
 604 
     | 
    
         
            +
            #
         
     | 
| 
      
 605 
     | 
    
         
            +
            # 
         
     | 
| 
      
 606 
     | 
    
         
            +
            # is_usp?   returns true if the frame/FE is involved in underspecification,
         
     | 
| 
      
 607 
     | 
    
         
            +
            #           else false
         
     | 
| 
      
 608 
     | 
    
         
            +
            #
         
     | 
| 
      
 609 
     | 
    
         
            +
            # flags     returns an array of all the frame/FE flags for this node.
         
     | 
| 
      
 610 
     | 
    
         
            +
            #           members of the array are strings describing the flags
         
     | 
| 
      
 611 
     | 
    
         
            +
            #           that have been set to true
         
     | 
| 
      
 612 
     | 
    
         
            +
            #
         
     | 
| 
      
 613 
     | 
    
         
            +
            # add_flag  add or remove a frame/FE flag
         
     | 
| 
      
 614 
     | 
    
         
            +
            # remove_flag
         
     | 
| 
      
 615 
     | 
    
         
            +
             
     | 
| 
      
 616 
     | 
    
         
            +
            class SemNode < SalsaTigerXmlNode
         
     | 
| 
      
 617 
     | 
    
         
            +
              attr_reader :flags
         
     | 
| 
      
 618 
     | 
    
         
            +
             
     | 
| 
      
 619 
     | 
    
         
            +
              def initialize(xml) # RegXML object or text
         
     | 
| 
      
 620 
     | 
    
         
            +
                super(xml)
         
     | 
| 
      
 621 
     | 
    
         
            +
                # flags: array of FlagNode objects
         
     | 
| 
      
 622 
     | 
    
         
            +
                @flags = Array.new()
         
     | 
| 
      
 623 
     | 
    
         
            +
              end
         
     | 
| 
      
 624 
     | 
    
         
            +
             
     | 
| 
      
 625 
     | 
    
         
            +
              ###
         
     | 
| 
      
 626 
     | 
    
         
            +
              def is_usp?
         
     | 
| 
      
 627 
     | 
    
         
            +
                return get_attribute("usp") == "yes"
         
     | 
| 
      
 628 
     | 
    
         
            +
              end
         
     | 
| 
      
 629 
     | 
    
         
            +
             
     | 
| 
      
 630 
     | 
    
         
            +
              ###
         
     | 
| 
      
 631 
     | 
    
         
            +
              def add_flag(name) # string: flag name
         
     | 
| 
      
 632 
     | 
    
         
            +
                @flags << name
         
     | 
| 
      
 633 
     | 
    
         
            +
              end
         
     | 
| 
      
 634 
     | 
    
         
            +
             
     | 
| 
      
 635 
     | 
    
         
            +
              ### 
         
     | 
| 
      
 636 
     | 
    
         
            +
              def remove_flag(name) # string: flag name
         
     | 
| 
      
 637 
     | 
    
         
            +
                @flags.delete(name)
         
     | 
| 
      
 638 
     | 
    
         
            +
              end
         
     | 
| 
      
 639 
     | 
    
         
            +
             
     | 
| 
      
 640 
     | 
    
         
            +
              #############
         
     | 
| 
      
 641 
     | 
    
         
            +
              protected
         
     | 
| 
      
 642 
     | 
    
         
            +
             
     | 
| 
      
 643 
     | 
    
         
            +
              def get_xml_embedded()
         
     | 
| 
      
 644 
     | 
    
         
            +
                return super() + get_xml_offlags()
         
     | 
| 
      
 645 
     | 
    
         
            +
              end
         
     | 
| 
      
 646 
     | 
    
         
            +
             
     | 
| 
      
 647 
     | 
    
         
            +
              def get_xml_offlags()
         
     | 
| 
      
 648 
     | 
    
         
            +
                # and add flags
         
     | 
| 
      
 649 
     | 
    
         
            +
                return @flags.map { |flagname|
         
     | 
| 
      
 650 
     | 
    
         
            +
                  "<flag name=\'#{xml_secure_val(flagname)}\'/>\n"
         
     | 
| 
      
 651 
     | 
    
         
            +
                }.join
         
     | 
| 
      
 652 
     | 
    
         
            +
              end    
         
     | 
| 
      
 653 
     | 
    
         
            +
            end
         
     | 
| 
      
 654 
     | 
    
         
            +
             
     | 
| 
      
 655 
     | 
    
         
            +
             
     | 
| 
      
 656 
     | 
    
         
            +
             
     | 
| 
      
 657 
     | 
    
         
            +
            #############
         
     | 
| 
      
 658 
     | 
    
         
            +
            # class FrameNode
         
     | 
| 
      
 659 
     | 
    
         
            +
            #
         
     | 
| 
      
 660 
     | 
    
         
            +
            # inherits from SemNode
         
     | 
| 
      
 661 
     | 
    
         
            +
            # adds to it methods specific to nodes
         
     | 
| 
      
 662 
     | 
    
         
            +
            # that describe a frame
         
     | 
| 
      
 663 
     | 
    
         
            +
            #
         
     | 
| 
      
 664 
     | 
    
         
            +
            # additional/changed methods:
         
     | 
| 
      
 665 
     | 
    
         
            +
            #
         
     | 
| 
      
 666 
     | 
    
         
            +
            # name      returns the name of the frame
         
     | 
| 
      
 667 
     | 
    
         
            +
            # set_name  changes the name of the frame to a new name
         
     | 
| 
      
 668 
     | 
    
         
            +
            # target    returns the target (as a FeNode object)
         
     | 
| 
      
 669 
     | 
    
         
            +
            #
         
     | 
| 
      
 670 
     | 
    
         
            +
            # each_child() iterates through FEs, children() returns all FEs
         
     | 
| 
      
 671 
     | 
    
         
            +
            #
         
     | 
| 
      
 672 
     | 
    
         
            +
            # each_fe_by_name A frame node may have several FE children with the same
         
     | 
| 
      
 673 
     | 
    
         
            +
            #           frame element label. While each_child returns them separately,
         
     | 
| 
      
 674 
     | 
    
         
            +
            #           each_fe_by_name lumps FE children with the same frame element label
         
     | 
| 
      
 675 
     | 
    
         
            +
            #           into one FeNode. 
         
     | 
| 
      
 676 
     | 
    
         
            +
            #           Warnings:
         
     | 
| 
      
 677 
     | 
    
         
            +
            #           - the REXML object of the FeNode is that of the first FE child
         
     | 
| 
      
 678 
     | 
    
         
            +
            #             with that frame element label.
         
     | 
| 
      
 679 
     | 
    
         
            +
            #           - Underspecification is ignored! If you have the same FE twice, 
         
     | 
| 
      
 680 
     | 
    
         
            +
            #             and there is underspecification regarding the extent of the FE,
         
     | 
| 
      
 681 
     | 
    
         
            +
            #             the two FE children will be lumped together anyway. 
         
     | 
| 
      
 682 
     | 
    
         
            +
            #             If you don't want that, use each_child instead. 
         
     | 
| 
      
 683 
     | 
    
         
            +
            # 
         
     | 
| 
      
 684 
     | 
    
         
            +
            #
         
     | 
| 
      
 685 
     | 
    
         
            +
            # add_fe CAUTION: please do not call this method directly externally, 
         
     | 
| 
      
 686 
     | 
    
         
            +
            #           use SalsaTigerSentence.add_fe, otherwise the node and its ID
         
     | 
| 
      
 687 
     | 
    
         
            +
            #           will not be recorded in the node list and the node cannot be retrieved
         
     | 
| 
      
 688 
     | 
    
         
            +
            #           via its ID
         
     | 
| 
      
 689 
     | 
    
         
            +
             
     | 
| 
      
 690 
     | 
    
         
            +
            class FrameNode <  SemNode
         
     | 
| 
      
 691 
     | 
    
         
            +
             
     | 
| 
      
 692 
     | 
    
         
            +
              ###
         
     | 
| 
      
 693 
     | 
    
         
            +
              def target()
         
     | 
| 
      
 694 
     | 
    
         
            +
                target = children_by_edgelabels(["target"])
         
     | 
| 
      
 695 
     | 
    
         
            +
                if target.empty?
         
     | 
| 
      
 696 
     | 
    
         
            +
                  $stderr.puts "SalsaTigerRegXML warning: Frame #{id()}: No target, but I got: \n" + child_labels().join(", ")
         
     | 
| 
      
 697 
     | 
    
         
            +
                  return nil
         
     | 
| 
      
 698 
     | 
    
         
            +
                else
         
     | 
| 
      
 699 
     | 
    
         
            +
                  unless target.length == 1
         
     | 
| 
      
 700 
     | 
    
         
            +
            	raise "target: more than one target to frame "+id()
         
     | 
| 
      
 701 
     | 
    
         
            +
                  end
         
     | 
| 
      
 702 
     | 
    
         
            +
                  return target.first
         
     | 
| 
      
 703 
     | 
    
         
            +
                end
         
     | 
| 
      
 704 
     | 
    
         
            +
              end
         
     | 
| 
      
 705 
     | 
    
         
            +
             
     | 
| 
      
 706 
     | 
    
         
            +
              ###
         
     | 
| 
      
 707 
     | 
    
         
            +
              def name
         
     | 
| 
      
 708 
     | 
    
         
            +
                return get_attribute("name")
         
     | 
| 
      
 709 
     | 
    
         
            +
              end
         
     | 
| 
      
 710 
     | 
    
         
            +
             
     | 
| 
      
 711 
     | 
    
         
            +
              ###
         
     | 
| 
      
 712 
     | 
    
         
            +
              def set_name(new_name)
         
     | 
| 
      
 713 
     | 
    
         
            +
                set_attribute("name", new_name)
         
     | 
| 
      
 714 
     | 
    
         
            +
              end
         
     | 
| 
      
 715 
     | 
    
         
            +
             
     | 
| 
      
 716 
     | 
    
         
            +
              ###
         
     | 
| 
      
 717 
     | 
    
         
            +
              # each_fe: synonym for each_child
         
     | 
| 
      
 718 
     | 
    
         
            +
              def each_fe()
         
     | 
| 
      
 719 
     | 
    
         
            +
                each_child { |c| yield c }
         
     | 
| 
      
 720 
     | 
    
         
            +
              end
         
     | 
| 
      
 721 
     | 
    
         
            +
             
     | 
| 
      
 722 
     | 
    
         
            +
              ###
         
     | 
| 
      
 723 
     | 
    
         
            +
              # fes: synonym for children
         
     | 
| 
      
 724 
     | 
    
         
            +
              def fes()
         
     | 
| 
      
 725 
     | 
    
         
            +
                children()
         
     | 
| 
      
 726 
     | 
    
         
            +
              end
         
     | 
| 
      
 727 
     | 
    
         
            +
             
     | 
| 
      
 728 
     | 
    
         
            +
              ###
         
     | 
| 
      
 729 
     | 
    
         
            +
              def each_fe_by_name()
         
     | 
| 
      
 730 
     | 
    
         
            +
                child_labels.uniq.each { |fe_name|
         
     | 
| 
      
 731 
     | 
    
         
            +
                  unless fe_name == "target"
         
     | 
| 
      
 732 
     | 
    
         
            +
             
     | 
| 
      
 733 
     | 
    
         
            +
            	fes = children_by_edgelabels([fe_name])
         
     | 
| 
      
 734 
     | 
    
         
            +
             
     | 
| 
      
 735 
     | 
    
         
            +
            	if fes.length == 1 
         
     | 
| 
      
 736 
     | 
    
         
            +
            	  # one frame element with that name
         
     | 
| 
      
 737 
     | 
    
         
            +
            	  yield fes.first
         
     | 
| 
      
 738 
     | 
    
         
            +
             
     | 
| 
      
 739 
     | 
    
         
            +
            	else
         
     | 
| 
      
 740 
     | 
    
         
            +
            	  # several frame elements with that name
         
     | 
| 
      
 741 
     | 
    
         
            +
            	  # combine them
         
     | 
| 
      
 742 
     | 
    
         
            +
             
     | 
| 
      
 743 
     | 
    
         
            +
            	  combined_fe = FeNode.new(fe_name, id() + "_" + fe_name)
         
     | 
| 
      
 744 
     | 
    
         
            +
            	  fes.each { |fe|
         
     | 
| 
      
 745 
     | 
    
         
            +
            	    fe.each_child() { |child|
         
     | 
| 
      
 746 
     | 
    
         
            +
            	      combined_fe.add_child(child)
         
     | 
| 
      
 747 
     | 
    
         
            +
            	    }
         
     | 
| 
      
 748 
     | 
    
         
            +
            	  }
         
     | 
| 
      
 749 
     | 
    
         
            +
            	  yield combined_fe
         
     | 
| 
      
 750 
     | 
    
         
            +
            	end
         
     | 
| 
      
 751 
     | 
    
         
            +
                  end
         
     | 
| 
      
 752 
     | 
    
         
            +
                }
         
     | 
| 
      
 753 
     | 
    
         
            +
              end
         
     | 
| 
      
 754 
     | 
    
         
            +
             
     | 
| 
      
 755 
     | 
    
         
            +
              ###
         
     | 
| 
      
 756 
     | 
    
         
            +
              def add_child(fe_node)
         
     | 
| 
      
 757 
     | 
    
         
            +
                if fe_node.name == "target" and not(children_by_edgelabels(["target"]).empty?)
         
     | 
| 
      
 758 
     | 
    
         
            +
                  $stderr.puts "Adding second target to frame #{id()}"
         
     | 
| 
      
 759 
     | 
    
         
            +
                  $stderr.puts "I already have: " + children_by_edgelabels(["target"]).map { |t| t.id() }.join(",")
         
     | 
| 
      
 760 
     | 
    
         
            +
                  raise "More than one target."
         
     | 
| 
      
 761 
     | 
    
         
            +
                end
         
     | 
| 
      
 762 
     | 
    
         
            +
                   
         
     | 
| 
      
 763 
     | 
    
         
            +
                super(fe_node, fe_node.name)
         
     | 
| 
      
 764 
     | 
    
         
            +
              end
         
     | 
| 
      
 765 
     | 
    
         
            +
              
         
     | 
| 
      
 766 
     | 
    
         
            +
              ###
         
     | 
| 
      
 767 
     | 
    
         
            +
              def remove_child(fe_node)
         
     | 
| 
      
 768 
     | 
    
         
            +
                super(fe_node, fe_node.name)
         
     | 
| 
      
 769 
     | 
    
         
            +
              end
         
     | 
| 
      
 770 
     | 
    
         
            +
             
     | 
| 
      
 771 
     | 
    
         
            +
              ###
         
     | 
| 
      
 772 
     | 
    
         
            +
              def add_fe(fe_name,   # string: name of FE to add
         
     | 
| 
      
 773 
     | 
    
         
            +
                         syn_nodes, # array:SynNode, syntactic nodes that this FE should point to
         
     | 
| 
      
 774 
     | 
    
         
            +
                         fe_id = nil) # string: ID for the new FE
         
     | 
| 
      
 775 
     | 
    
         
            +
             
     | 
| 
      
 776 
     | 
    
         
            +
                if fe_name == "target" and not(children_by_edgelabels(["target"]).empty?)
         
     | 
| 
      
 777 
     | 
    
         
            +
                  $stderr.puts "Adding second target to frame #{id()}"
         
     | 
| 
      
 778 
     | 
    
         
            +
                  $stderr.puts "I already have: " + children_by_edgelabels(["target"]).map { |t| t.id() }.join(",")
         
     | 
| 
      
 779 
     | 
    
         
            +
                  raise "More than one target."
         
     | 
| 
      
 780 
     | 
    
         
            +
                end
         
     | 
| 
      
 781 
     | 
    
         
            +
                   
         
     | 
| 
      
 782 
     | 
    
         
            +
                # make FE node and list as this frame's child
         
     | 
| 
      
 783 
     | 
    
         
            +
                unless fe_id
         
     | 
| 
      
 784 
     | 
    
         
            +
                  # no FE ID given, make one myself
         
     | 
| 
      
 785 
     | 
    
         
            +
                  fe_id = id() + "_fe" + Time.new().to_f.to_s
         
     | 
| 
      
 786 
     | 
    
         
            +
                end
         
     | 
| 
      
 787 
     | 
    
         
            +
             
     | 
| 
      
 788 
     | 
    
         
            +
                n = FeNode.new(fe_name, fe_id)
         
     | 
| 
      
 789 
     | 
    
         
            +
                add_child(n)
         
     | 
| 
      
 790 
     | 
    
         
            +
             
     | 
| 
      
 791 
     | 
    
         
            +
                # add syn nodes
         
     | 
| 
      
 792 
     | 
    
         
            +
                syn_nodes.each { |syn_node|
         
     | 
| 
      
 793 
     | 
    
         
            +
                  n.add_child(syn_node)
         
     | 
| 
      
 794 
     | 
    
         
            +
                }
         
     | 
| 
      
 795 
     | 
    
         
            +
             
     | 
| 
      
 796 
     | 
    
         
            +
                return n
         
     | 
| 
      
 797 
     | 
    
         
            +
              end
         
     | 
| 
      
 798 
     | 
    
         
            +
            end
         
     | 
| 
      
 799 
     | 
    
         
            +
             
     | 
| 
      
 800 
     | 
    
         
            +
            #############
         
     | 
| 
      
 801 
     | 
    
         
            +
            # class FeNode
         
     | 
| 
      
 802 
     | 
    
         
            +
            #
         
     | 
| 
      
 803 
     | 
    
         
            +
            # inherits from SemNode,
         
     | 
| 
      
 804 
     | 
    
         
            +
            # adds to it methods specific to nodes
         
     | 
| 
      
 805 
     | 
    
         
            +
            # that describe a frame element or target
         
     | 
| 
      
 806 
     | 
    
         
            +
            #
         
     | 
| 
      
 807 
     | 
    
         
            +
            # additional/changed methods:
         
     | 
| 
      
 808 
     | 
    
         
            +
            #----------------------------
         
     | 
| 
      
 809 
     | 
    
         
            +
            #
         
     | 
| 
      
 810 
     | 
    
         
            +
            # name      returns the name of the frame element, or "target"
         
     | 
| 
      
 811 
     | 
    
         
            +
            #
         
     | 
| 
      
 812 
     | 
    
         
            +
            # add_child, remove_child
         
     | 
| 
      
 813 
     | 
    
         
            +
             
     | 
| 
      
 814 
     | 
    
         
            +
            class FeNode <  SemNode
         
     | 
| 
      
 815 
     | 
    
         
            +
             
     | 
| 
      
 816 
     | 
    
         
            +
              ###
         
     | 
| 
      
 817 
     | 
    
         
            +
              def initialize(name_or_xml, # either RegXMl object or the name of the FE as a string 
         
     | 
| 
      
 818 
     | 
    
         
            +
                             id_if_name = nil) # string: ID to use if we just got the name of the FE
         
     | 
| 
      
 819 
     | 
    
         
            +
             
     | 
| 
      
 820 
     | 
    
         
            +
                case name_or_xml.class.to_s
         
     | 
| 
      
 821 
     | 
    
         
            +
                when "String"
         
     | 
| 
      
 822 
     | 
    
         
            +
                  if name_or_xml == "target"
         
     | 
| 
      
 823 
     | 
    
         
            +
                    super(RegXML.new("<target id=\'#{xml_secure_val(id_if_name.to_s)}\'/>"))
         
     | 
| 
      
 824 
     | 
    
         
            +
                    @i_am_target = true
         
     | 
| 
      
 825 
     | 
    
         
            +
                  else
         
     | 
| 
      
 826 
     | 
    
         
            +
                    super(RegXML.new("<fe name=\'#{xml_secure_val(name_or_xml)}\' id=\'#{xml_secure_val(id_if_name.to_s)}\'/>"))
         
     | 
| 
      
 827 
     | 
    
         
            +
                    @i_am_target = false
         
     | 
| 
      
 828 
     | 
    
         
            +
                  end
         
     | 
| 
      
 829 
     | 
    
         
            +
             
     | 
| 
      
 830 
     | 
    
         
            +
                when "RegXML"
         
     | 
| 
      
 831 
     | 
    
         
            +
                  super(name_or_xml)
         
     | 
| 
      
 832 
     | 
    
         
            +
             
     | 
| 
      
 833 
     | 
    
         
            +
                  if name_or_xml.name() == "target"
         
     | 
| 
      
 834 
     | 
    
         
            +
                    @i_am_target = true
         
     | 
| 
      
 835 
     | 
    
         
            +
                  else
         
     | 
| 
      
 836 
     | 
    
         
            +
                    @i_am_target = false
         
     | 
| 
      
 837 
     | 
    
         
            +
                  end
         
     | 
| 
      
 838 
     | 
    
         
            +
                else
         
     | 
| 
      
 839 
     | 
    
         
            +
                  raise "Shouldn't be here: " + name_or_xml.class.to_s
         
     | 
| 
      
 840 
     | 
    
         
            +
                end
         
     | 
| 
      
 841 
     | 
    
         
            +
             
     | 
| 
      
 842 
     | 
    
         
            +
                # child_attr: keep additional attributes of <fenode> elements,
         
     | 
| 
      
 843 
     | 
    
         
            +
                # if there are any
         
     | 
| 
      
 844 
     | 
    
         
            +
                # child_attr: hash syn_node_id(string) -> attributes(hash)
         
     | 
| 
      
 845 
     | 
    
         
            +
                @child_attr = Hash.new()
         
     | 
| 
      
 846 
     | 
    
         
            +
              end
         
     | 
| 
      
 847 
     | 
    
         
            +
              
         
     | 
| 
      
 848 
     | 
    
         
            +
              ###
         
     | 
| 
      
 849 
     | 
    
         
            +
              def name
         
     | 
| 
      
 850 
     | 
    
         
            +
                if @i_am_target
         
     | 
| 
      
 851 
     | 
    
         
            +
                  return "target"
         
     | 
| 
      
 852 
     | 
    
         
            +
                else
         
     | 
| 
      
 853 
     | 
    
         
            +
                  return get_attribute("name")
         
     | 
| 
      
 854 
     | 
    
         
            +
                end
         
     | 
| 
      
 855 
     | 
    
         
            +
              end
         
     | 
| 
      
 856 
     | 
    
         
            +
             
     | 
| 
      
 857 
     | 
    
         
            +
              ###
         
     | 
| 
      
 858 
     | 
    
         
            +
              def add_child(syn_node,
         
     | 
| 
      
 859 
     | 
    
         
            +
                            xml_obj = nil)
         
     | 
| 
      
 860 
     | 
    
         
            +
                if xml_obj
         
     | 
| 
      
 861 
     | 
    
         
            +
                  # we've been given the fenode XML element
         
     | 
| 
      
 862 
     | 
    
         
            +
                  # see if there are any attributes that we will need:
         
     | 
| 
      
 863 
     | 
    
         
            +
                  # get attributes, remove the idref (we get that from the
         
     | 
| 
      
 864 
     | 
    
         
            +
                  # child's ID directly)
         
     | 
| 
      
 865 
     | 
    
         
            +
                  at = xml_obj.attributes
         
     | 
| 
      
 866 
     | 
    
         
            +
                  at.delete("idref")
         
     | 
| 
      
 867 
     | 
    
         
            +
                  unless at.empty?
         
     | 
| 
      
 868 
     | 
    
         
            +
                    @child_attr[syn_node.id] = at
         
     | 
| 
      
 869 
     | 
    
         
            +
                  end
         
     | 
| 
      
 870 
     | 
    
         
            +
                end
         
     | 
| 
      
 871 
     | 
    
         
            +
             
     | 
| 
      
 872 
     | 
    
         
            +
                super(syn_node, nil, "pointer_insteadof_edge" => true)
         
     | 
| 
      
 873 
     | 
    
         
            +
              end
         
     | 
| 
      
 874 
     | 
    
         
            +
             
     | 
| 
      
 875 
     | 
    
         
            +
              ###
         
     | 
| 
      
 876 
     | 
    
         
            +
              def remove_child(syn_node, varhash={})
         
     | 
| 
      
 877 
     | 
    
         
            +
                super(syn_node, nil, "pointer_insteadof_edge" => true)
         
     | 
| 
      
 878 
     | 
    
         
            +
              end
         
     | 
| 
      
 879 
     | 
    
         
            +
             
     | 
| 
      
 880 
     | 
    
         
            +
              #############
         
     | 
| 
      
 881 
     | 
    
         
            +
              protected
         
     | 
| 
      
 882 
     | 
    
         
            +
             
     | 
| 
      
 883 
     | 
    
         
            +
              def get_xml_ofchildren()
         
     | 
| 
      
 884 
     | 
    
         
            +
                return children.map { |child|
         
     | 
| 
      
 885 
     | 
    
         
            +
                  if @child_attr[child.id()]
         
     | 
| 
      
 886 
     | 
    
         
            +
                    "<fenode idref=\'#{xml_secure_val(child.id())}\'" +
         
     | 
| 
      
 887 
     | 
    
         
            +
                    @child_attr[child.id()].to_a.map { |attr, val|
         
     | 
| 
      
 888 
     | 
    
         
            +
                      " #{attr}=\'#{xml_secure_val(val)}\'"
         
     | 
| 
      
 889 
     | 
    
         
            +
                    }.join() +
         
     | 
| 
      
 890 
     | 
    
         
            +
                    "/>\n"
         
     | 
| 
      
 891 
     | 
    
         
            +
             
     | 
| 
      
 892 
     | 
    
         
            +
                  else        
         
     | 
| 
      
 893 
     | 
    
         
            +
                    "<fenode idref=\'#{xml_secure_val(child.id())}\'/>\n"
         
     | 
| 
      
 894 
     | 
    
         
            +
                  end
         
     | 
| 
      
 895 
     | 
    
         
            +
                }.join()
         
     | 
| 
      
 896 
     | 
    
         
            +
              end
         
     | 
| 
      
 897 
     | 
    
         
            +
            end
         
     | 
| 
      
 898 
     | 
    
         
            +
             
     | 
| 
      
 899 
     | 
    
         
            +
            #############
         
     | 
| 
      
 900 
     | 
    
         
            +
            # class UspNode
         
     | 
| 
      
 901 
     | 
    
         
            +
            #
         
     | 
| 
      
 902 
     | 
    
         
            +
            # inherits from SalsaTigerXmlNode,
         
     | 
| 
      
 903 
     | 
    
         
            +
            # adds to it methods specific to nodes
         
     | 
| 
      
 904 
     | 
    
         
            +
            # that describe a frame underspecification or frame element underspecification
         
     | 
| 
      
 905 
     | 
    
         
            +
            #
         
     | 
| 
      
 906 
     | 
    
         
            +
            # additional/changed methods:
         
     | 
| 
      
 907 
     | 
    
         
            +
            #----------------------------
         
     | 
| 
      
 908 
     | 
    
         
            +
            #
         
     | 
| 
      
 909 
     | 
    
         
            +
            # new             initializes the object
         
     | 
| 
      
 910 
     | 
    
         
            +
            #    rexml_object: underlying XML object for this node
         
     | 
| 
      
 911 
     | 
    
         
            +
            #    frame_or_fe:  string, either "frame" for frame underspecification
         
     | 
| 
      
 912 
     | 
    
         
            +
            #                  or "fe" for frame element underspecification
         
     | 
| 
      
 913 
     | 
    
         
            +
            #
         
     | 
| 
      
 914 
     | 
    
         
            +
            # add_child, remove_child   add, remove underspecification entry
         
     | 
| 
      
 915 
     | 
    
         
            +
             
     | 
| 
      
 916 
     | 
    
         
            +
            class UspNode <  SalsaTigerXmlNode
         
     | 
| 
      
 917 
     | 
    
         
            +
             
     | 
| 
      
 918 
     | 
    
         
            +
              attr_reader :i_am
         
     | 
| 
      
 919 
     | 
    
         
            +
             
     | 
| 
      
 920 
     | 
    
         
            +
              ###
         
     | 
| 
      
 921 
     | 
    
         
            +
              def initialize(xml_obj,      # RegXMl object
         
     | 
| 
      
 922 
     | 
    
         
            +
                             frame_or_fe)  # string "frame" or "fe"
         
     | 
| 
      
 923 
     | 
    
         
            +
             
     | 
| 
      
 924 
     | 
    
         
            +
                super(xml_obj)
         
     | 
| 
      
 925 
     | 
    
         
            +
                case frame_or_fe
         
     | 
| 
      
 926 
     | 
    
         
            +
                when "frame"
         
     | 
| 
      
 927 
     | 
    
         
            +
                  @i_am = "frame"
         
     | 
| 
      
 928 
     | 
    
         
            +
                when "fe"
         
     | 
| 
      
 929 
     | 
    
         
            +
                  @i_am = "fe"
         
     | 
| 
      
 930 
     | 
    
         
            +
                else
         
     | 
| 
      
 931 
     | 
    
         
            +
                  raise "new: neither frame nor fe??"
         
     | 
| 
      
 932 
     | 
    
         
            +
                end
         
     | 
| 
      
 933 
     | 
    
         
            +
              end
         
     | 
| 
      
 934 
     | 
    
         
            +
             
     | 
| 
      
 935 
     | 
    
         
            +
              ###
         
     | 
| 
      
 936 
     | 
    
         
            +
              def add_child(node, varhash={})
         
     | 
| 
      
 937 
     | 
    
         
            +
                if node
         
     | 
| 
      
 938 
     | 
    
         
            +
                  super(node, nil, "pointer_insteadof_edge" => true)
         
     | 
| 
      
 939 
     | 
    
         
            +
                else
         
     | 
| 
      
 940 
     | 
    
         
            +
                  raise "Got nil for a node."
         
     | 
| 
      
 941 
     | 
    
         
            +
                end
         
     | 
| 
      
 942 
     | 
    
         
            +
             
     | 
| 
      
 943 
     | 
    
         
            +
                # set usp. attribute on child
         
     | 
| 
      
 944 
     | 
    
         
            +
                node.set_attribute("usp", "yes")
         
     | 
| 
      
 945 
     | 
    
         
            +
              end
         
     | 
| 
      
 946 
     | 
    
         
            +
             
     | 
| 
      
 947 
     | 
    
         
            +
              ###
         
     | 
| 
      
 948 
     | 
    
         
            +
              def remove_child(node, varhash={})
         
     | 
| 
      
 949 
     | 
    
         
            +
                super(node, nil, "pointer_insteadof_edge" => true)
         
     | 
| 
      
 950 
     | 
    
         
            +
             
     | 
| 
      
 951 
     | 
    
         
            +
                # removing "usp" attribute on child
         
     | 
| 
      
 952 
     | 
    
         
            +
                # this will be wrong if the child is involved in more 
         
     | 
| 
      
 953 
     | 
    
         
            +
                # than one instance of underspecification!
         
     | 
| 
      
 954 
     | 
    
         
            +
             
     | 
| 
      
 955 
     | 
    
         
            +
                $stderr.puts "Warning: unsafe removal of attribute 'usp'"
         
     | 
| 
      
 956 
     | 
    
         
            +
                node.del_attribute("usp")
         
     | 
| 
      
 957 
     | 
    
         
            +
              end
         
     | 
| 
      
 958 
     | 
    
         
            +
             
     | 
| 
      
 959 
     | 
    
         
            +
              #############
         
     | 
| 
      
 960 
     | 
    
         
            +
              protected
         
     | 
| 
      
 961 
     | 
    
         
            +
             
     | 
| 
      
 962 
     | 
    
         
            +
              def get_xml_ofchildren()
         
     | 
| 
      
 963 
     | 
    
         
            +
                return children.map { |child|
         
     | 
| 
      
 964 
     | 
    
         
            +
                  "<uspitem idref=\'#{xml_secure_val(child.id)}\'/>\n"
         
     | 
| 
      
 965 
     | 
    
         
            +
                }.join()
         
     | 
| 
      
 966 
     | 
    
         
            +
              end
         
     | 
| 
      
 967 
     | 
    
         
            +
             
     | 
| 
      
 968 
     | 
    
         
            +
            end
         
     | 
| 
      
 969 
     | 
    
         
            +
             
     | 
| 
      
 970 
     | 
    
         
            +
            #############
         
     | 
| 
      
 971 
     | 
    
         
            +
            class SalsaTigerSentenceGraph < XMLNode
         
     | 
| 
      
 972 
     | 
    
         
            +
              include StringTerminalsInRightOrder
         
     | 
| 
      
 973 
     | 
    
         
            +
             
     | 
| 
      
 974 
     | 
    
         
            +
              attr_reader :node
         
     | 
| 
      
 975 
     | 
    
         
            +
             
     | 
| 
      
 976 
     | 
    
         
            +
              def initialize(xml_obj,     # RegXML object
         
     | 
| 
      
 977 
     | 
    
         
            +
                             sentence_id) # string: ID of this sentence
         
     | 
| 
      
 978 
     | 
    
         
            +
             
     | 
| 
      
 979 
     | 
    
         
            +
                # global data:
         
     | 
| 
      
 980 
     | 
    
         
            +
                # node: hash node_id -> XMLNode object
         
     | 
| 
      
 981 
     | 
    
         
            +
                #       maps node IDs to the nodes with that ID
         
     | 
| 
      
 982 
     | 
    
         
            +
                @node = Hash.new
         
     | 
| 
      
 983 
     | 
    
         
            +
                @sentence_id = sentence_id
         
     | 
| 
      
 984 
     | 
    
         
            +
             
     | 
| 
      
 985 
     | 
    
         
            +
                if xml_obj
         
     | 
| 
      
 986 
     | 
    
         
            +
                  # we actually have syntactic information.
         
     | 
| 
      
 987 
     | 
    
         
            +
                  # read it.
         
     | 
| 
      
 988 
     | 
    
         
            +
                  
         
     | 
| 
      
 989 
     | 
    
         
            +
                  # initialize this object as an XML node,
         
     | 
| 
      
 990 
     | 
    
         
            +
                  # i.e. remember the outermost element's name, attributes, 
         
     | 
| 
      
 991 
     | 
    
         
            +
                  # and ID, and specify that it's not a text but an XML object
         
     | 
| 
      
 992 
     | 
    
         
            +
                  super(xml_obj.name, xml_obj.attributes, sentence_id + "_graph", false)
         
     | 
| 
      
 993 
     | 
    
         
            +
                  
         
     | 
| 
      
 994 
     | 
    
         
            +
                  # initialize nodes, remember their IDs
         
     | 
| 
      
 995 
     | 
    
         
            +
                  xml_obj.children_and_text.each { |child_or_text|
         
     | 
| 
      
 996 
     | 
    
         
            +
                    
         
     | 
| 
      
 997 
     | 
    
         
            +
                    case child_or_text.name
         
     | 
| 
      
 998 
     | 
    
         
            +
                    when "terminals"
         
     | 
| 
      
 999 
     | 
    
         
            +
                      make_nodes(child_or_text, "t", "s/graph/terminals", "all_children_kith")
         
     | 
| 
      
 1000 
     | 
    
         
            +
                    when "nonterminals"
         
     | 
| 
      
 1001 
     | 
    
         
            +
                      make_nodes(child_or_text, "nt", "s/graph/nonterminals")
         
     | 
| 
      
 1002 
     | 
    
         
            +
                    else
         
     | 
| 
      
 1003 
     | 
    
         
            +
                      # additional info that we don't need for now
         
     | 
| 
      
 1004 
     | 
    
         
            +
                      # keep for output
         
     | 
| 
      
 1005 
     | 
    
         
            +
                      add_kith(child_or_text)
         
     | 
| 
      
 1006 
     | 
    
         
            +
                    end
         
     | 
| 
      
 1007 
     | 
    
         
            +
                  }
         
     | 
| 
      
 1008 
     | 
    
         
            +
                  
         
     | 
| 
      
 1009 
     | 
    
         
            +
             
     | 
| 
      
 1010 
     | 
    
         
            +
             
     | 
| 
      
 1011 
     | 
    
         
            +
                  # add edges between nodes
         
     | 
| 
      
 1012 
     | 
    
         
            +
                  nonterminals = xml_obj.children_and_text.detect { |child| child.name == "nonterminals" }
         
     | 
| 
      
 1013 
     | 
    
         
            +
                  if nonterminals
         
     | 
| 
      
 1014 
     | 
    
         
            +
                    nonterminals.children_and_text.each { |nt|
         
     | 
| 
      
 1015 
     | 
    
         
            +
             
     | 
| 
      
 1016 
     | 
    
         
            +
                      unless nt.name == "nt"
         
     | 
| 
      
 1017 
     | 
    
         
            +
                        # we've already done the warning bit in make_nodes
         
     | 
| 
      
 1018 
     | 
    
         
            +
                        next
         
     | 
| 
      
 1019 
     | 
    
         
            +
                      end
         
     | 
| 
      
 1020 
     | 
    
         
            +
             
     | 
| 
      
 1021 
     | 
    
         
            +
                      syn_add_children(@node[SalsaTigerXmlNode.xmlel_id(nt)], nt)
         
     | 
| 
      
 1022 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1023 
     | 
    
         
            +
                  end
         
     | 
| 
      
 1024 
     | 
    
         
            +
             
     | 
| 
      
 1025 
     | 
    
         
            +
                else
         
     | 
| 
      
 1026 
     | 
    
         
            +
                  # we have no syntactic information
         
     | 
| 
      
 1027 
     | 
    
         
            +
                  # record it anyway
         
     | 
| 
      
 1028 
     | 
    
         
            +
                  
         
     | 
| 
      
 1029 
     | 
    
         
            +
                  super("graph", {}, sentence_id + "_graph", false)
         
     | 
| 
      
 1030 
     | 
    
         
            +
                end
         
     | 
| 
      
 1031 
     | 
    
         
            +
              end
         
     | 
| 
      
 1032 
     | 
    
         
            +
             
     | 
| 
      
 1033 
     | 
    
         
            +
             
     | 
| 
      
 1034 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1035 
     | 
    
         
            +
              def add_splitwords(xml_obj)  #RegXMl object
         
     | 
| 
      
 1036 
     | 
    
         
            +
                unless xml_obj.nil?
         
     | 
| 
      
 1037 
     | 
    
         
            +
                  # splitwords is an XML element with name "splitwords" and
         
     | 
| 
      
 1038 
     | 
    
         
            +
                  # children named "splitword", each of which describes a split
         
     | 
| 
      
 1039 
     | 
    
         
            +
                  # for one of the terminals we already know
         
     | 
| 
      
 1040 
     | 
    
         
            +
                  xml_obj.children_and_text.each { |splitword|
         
     | 
| 
      
 1041 
     | 
    
         
            +
                    unless splitword.name() == "splitword"
         
     | 
| 
      
 1042 
     | 
    
         
            +
                      warn_child_ignored("s/sem/splitwords/", splitword)
         
     | 
| 
      
 1043 
     | 
    
         
            +
                      next
         
     | 
| 
      
 1044 
     | 
    
         
            +
                    end
         
     | 
| 
      
 1045 
     | 
    
         
            +
             
     | 
| 
      
 1046 
     | 
    
         
            +
                    # make nodes for the splitword parts
         
     | 
| 
      
 1047 
     | 
    
         
            +
                    make_nodes(splitword, "part", "s/sem/splitwords/splitword", "all_children_kith")
         
     | 
| 
      
 1048 
     | 
    
         
            +
                    
         
     | 
| 
      
 1049 
     | 
    
         
            +
                    # this is the terminal that is being split:
         
     | 
| 
      
 1050 
     | 
    
         
            +
                    # add links to its new children
         
     | 
| 
      
 1051 
     | 
    
         
            +
                    syn_add_children(@node[SalsaTigerXmlNode.xmlel_id(splitword)], splitword)
         
     | 
| 
      
 1052 
     | 
    
         
            +
                  }
         
     | 
| 
      
 1053 
     | 
    
         
            +
                end
         
     | 
| 
      
 1054 
     | 
    
         
            +
              end
         
     | 
| 
      
 1055 
     | 
    
         
            +
             
     | 
| 
      
 1056 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1057 
     | 
    
         
            +
              def to_s
         
     | 
| 
      
 1058 
     | 
    
         
            +
                string_for_nodes(syn_roots())
         
     | 
| 
      
 1059 
     | 
    
         
            +
              end
         
     | 
| 
      
 1060 
     | 
    
         
            +
             
     | 
| 
      
 1061 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1062 
     | 
    
         
            +
              def get()
         
     | 
| 
      
 1063 
     | 
    
         
            +
                # make sure that the graph element has a 'root' attribute
         
     | 
| 
      
 1064 
     | 
    
         
            +
                # since the Salsa tool needs this
         
     | 
| 
      
 1065 
     | 
    
         
            +
                set_attribute("root", syn_roots().first.id())
         
     | 
| 
      
 1066 
     | 
    
         
            +
                super()
         
     | 
| 
      
 1067 
     | 
    
         
            +
              end
         
     | 
| 
      
 1068 
     | 
    
         
            +
             
     | 
| 
      
 1069 
     | 
    
         
            +
              #####
         
     | 
| 
      
 1070 
     | 
    
         
            +
              # access methods
         
     | 
| 
      
 1071 
     | 
    
         
            +
             
     | 
| 
      
 1072 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1073 
     | 
    
         
            +
              def each_node
         
     | 
| 
      
 1074 
     | 
    
         
            +
                @node.each_value { |n| 
         
     | 
| 
      
 1075 
     | 
    
         
            +
                  yield n 
         
     | 
| 
      
 1076 
     | 
    
         
            +
                }
         
     | 
| 
      
 1077 
     | 
    
         
            +
              end
         
     | 
| 
      
 1078 
     | 
    
         
            +
             
     | 
| 
      
 1079 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1080 
     | 
    
         
            +
              def nodes
         
     | 
| 
      
 1081 
     | 
    
         
            +
                return @node.values()
         
     | 
| 
      
 1082 
     | 
    
         
            +
              end
         
     | 
| 
      
 1083 
     | 
    
         
            +
             
     | 
| 
      
 1084 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1085 
     | 
    
         
            +
              def each_terminal
         
     | 
| 
      
 1086 
     | 
    
         
            +
                @node.each_value { |node|
         
     | 
| 
      
 1087 
     | 
    
         
            +
                  if node.is_terminal?
         
     | 
| 
      
 1088 
     | 
    
         
            +
                    yield node
         
     | 
| 
      
 1089 
     | 
    
         
            +
                  end
         
     | 
| 
      
 1090 
     | 
    
         
            +
                }
         
     | 
| 
      
 1091 
     | 
    
         
            +
              end
         
     | 
| 
      
 1092 
     | 
    
         
            +
             
     | 
| 
      
 1093 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1094 
     | 
    
         
            +
              def each_terminal_sorted
         
     | 
| 
      
 1095 
     | 
    
         
            +
                sort_terminals_and_splitwords_left_to_right(terminals).each { |node_obj| 
         
     | 
| 
      
 1096 
     | 
    
         
            +
                  yield node_obj
         
     | 
| 
      
 1097 
     | 
    
         
            +
                }
         
     | 
| 
      
 1098 
     | 
    
         
            +
              end
         
     | 
| 
      
 1099 
     | 
    
         
            +
             
     | 
| 
      
 1100 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1101 
     | 
    
         
            +
              def terminals
         
     | 
| 
      
 1102 
     | 
    
         
            +
                return @node.values.select { |node| node.is_terminal? }
         
     | 
| 
      
 1103 
     | 
    
         
            +
              end
         
     | 
| 
      
 1104 
     | 
    
         
            +
             
     | 
| 
      
 1105 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1106 
     | 
    
         
            +
              def terminals_sorted
         
     | 
| 
      
 1107 
     | 
    
         
            +
                return  sort_terminals_and_splitwords_left_to_right(terminals)
         
     | 
| 
      
 1108 
     | 
    
         
            +
              end
         
     | 
| 
      
 1109 
     | 
    
         
            +
             
     | 
| 
      
 1110 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1111 
     | 
    
         
            +
              def each_nonterminal
         
     | 
| 
      
 1112 
     | 
    
         
            +
                @node.each_value { |node|
         
     | 
| 
      
 1113 
     | 
    
         
            +
                  if node.is_nonterminal?
         
     | 
| 
      
 1114 
     | 
    
         
            +
                    yield node
         
     | 
| 
      
 1115 
     | 
    
         
            +
                  end
         
     | 
| 
      
 1116 
     | 
    
         
            +
                }
         
     | 
| 
      
 1117 
     | 
    
         
            +
              end
         
     | 
| 
      
 1118 
     | 
    
         
            +
             
     | 
| 
      
 1119 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1120 
     | 
    
         
            +
              def nonterminals
         
     | 
| 
      
 1121 
     | 
    
         
            +
                return @node.values.select { |node| node.is_nonterminal? }
         
     | 
| 
      
 1122 
     | 
    
         
            +
              end
         
     | 
| 
      
 1123 
     | 
    
         
            +
             
     | 
| 
      
 1124 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1125 
     | 
    
         
            +
              def syn_roots
         
     | 
| 
      
 1126 
     | 
    
         
            +
                return @node.values.select { |node|
         
     | 
| 
      
 1127 
     | 
    
         
            +
                  node.parent().nil?
         
     | 
| 
      
 1128 
     | 
    
         
            +
                }
         
     | 
| 
      
 1129 
     | 
    
         
            +
              end
         
     | 
| 
      
 1130 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1131 
     | 
    
         
            +
             
     | 
| 
      
 1132 
     | 
    
         
            +
              ######################3
         
     | 
| 
      
 1133 
     | 
    
         
            +
              # adding nodes
         
     | 
| 
      
 1134 
     | 
    
         
            +
             
     | 
| 
      
 1135 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1136 
     | 
    
         
            +
              def add_child(arg1, arg2, varhash={})
         
     | 
| 
      
 1137 
     | 
    
         
            +
                raise "Not implemented for this class"
         
     | 
| 
      
 1138 
     | 
    
         
            +
              end
         
     | 
| 
      
 1139 
     | 
    
         
            +
             
     | 
| 
      
 1140 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1141 
     | 
    
         
            +
              def remove_child(arg1, arg2, varhash={})
         
     | 
| 
      
 1142 
     | 
    
         
            +
                raise "Not implemented for this class"
         
     | 
| 
      
 1143 
     | 
    
         
            +
              end
         
     | 
| 
      
 1144 
     | 
    
         
            +
             
     | 
| 
      
 1145 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1146 
     | 
    
         
            +
              def add_node(sentid,    # string: sentence ID
         
     | 
| 
      
 1147 
     | 
    
         
            +
                           label,     # string: t or nt
         
     | 
| 
      
 1148 
     | 
    
         
            +
                           cat = nil, # string: category
         
     | 
| 
      
 1149 
     | 
    
         
            +
                           word = nil,# string: word
         
     | 
| 
      
 1150 
     | 
    
         
            +
                           pos = nil, # string: part of speech
         
     | 
| 
      
 1151 
     | 
    
         
            +
                           syn_id = nil)   # string: ID for the new node
         
     | 
| 
      
 1152 
     | 
    
         
            +
             
     | 
| 
      
 1153 
     | 
    
         
            +
                unless ["t", "nt"].include? label
         
     | 
| 
      
 1154 
     | 
    
         
            +
                  raise "Unknown node label #{label} for new syntactic node. Must be either t or nt."
         
     | 
| 
      
 1155 
     | 
    
         
            +
                end
         
     | 
| 
      
 1156 
     | 
    
         
            +
             
     | 
| 
      
 1157 
     | 
    
         
            +
                # make node ID: sentence ID plus ID generated by system time
         
     | 
| 
      
 1158 
     | 
    
         
            +
                if syn_id
         
     | 
| 
      
 1159 
     | 
    
         
            +
                  new_id = sentid + "_" + syn_id
         
     | 
| 
      
 1160 
     | 
    
         
            +
                else
         
     | 
| 
      
 1161 
     | 
    
         
            +
                  new_id = sentid + "_" + Time.new().to_f.to_s
         
     | 
| 
      
 1162 
     | 
    
         
            +
                end
         
     | 
| 
      
 1163 
     | 
    
         
            +
             
     | 
| 
      
 1164 
     | 
    
         
            +
                elt = "<#{label}"
         
     | 
| 
      
 1165 
     | 
    
         
            +
                [["id", new_id], ["cat", cat], ["word", word], ["pos", pos]].each { |label, content|
         
     | 
| 
      
 1166 
     | 
    
         
            +
                  if content
         
     | 
| 
      
 1167 
     | 
    
         
            +
                    elt << " #{label}=\"#{xml_secure_val(content)}\""
         
     | 
| 
      
 1168 
     | 
    
         
            +
                  end
         
     | 
| 
      
 1169 
     | 
    
         
            +
                }
         
     | 
| 
      
 1170 
     | 
    
         
            +
                elt << "/>"
         
     | 
| 
      
 1171 
     | 
    
         
            +
                n = SynNode.new(RegXML.new(elt))
         
     | 
| 
      
 1172 
     | 
    
         
            +
                @node[n.id] = n
         
     | 
| 
      
 1173 
     | 
    
         
            +
             
     | 
| 
      
 1174 
     | 
    
         
            +
                return n
         
     | 
| 
      
 1175 
     | 
    
         
            +
              end
         
     | 
| 
      
 1176 
     | 
    
         
            +
             
     | 
| 
      
 1177 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1178 
     | 
    
         
            +
              def remove_node(node) # SynNode
         
     | 
| 
      
 1179 
     | 
    
         
            +
                # remove node from list
         
     | 
| 
      
 1180 
     | 
    
         
            +
                @node.delete(node.id)
         
     | 
| 
      
 1181 
     | 
    
         
            +
             
     | 
| 
      
 1182 
     | 
    
         
            +
                # remove it as child and parent of other nodes;
         
     | 
| 
      
 1183 
     | 
    
         
            +
                # add its own children to the parent. 
         
     | 
| 
      
 1184 
     | 
    
         
            +
                # the _edgelabel_ of the new edges will be the edgeslabels 
         
     | 
| 
      
 1185 
     | 
    
         
            +
                # between the original node in its children
         
     | 
| 
      
 1186 
     | 
    
         
            +
                # in other words, the label of the removed node's incoming edge
         
     | 
| 
      
 1187 
     | 
    
         
            +
                # is deleted
         
     | 
| 
      
 1188 
     | 
    
         
            +
             
     | 
| 
      
 1189 
     | 
    
         
            +
            #    STDERR.puts "Removing node #{node.id}:"
         
     | 
| 
      
 1190 
     | 
    
         
            +
                
         
     | 
| 
      
 1191 
     | 
    
         
            +
                pair = node.parent_with_edgelabel
         
     | 
| 
      
 1192 
     | 
    
         
            +
                if pair
         
     | 
| 
      
 1193 
     | 
    
         
            +
                # delete incoming edge for deleted node
         
     | 
| 
      
 1194 
     | 
    
         
            +
                  label, parent = pair
         
     | 
| 
      
 1195 
     | 
    
         
            +
            #      STDERR.puts "  Removing link from PARENT #{parent.id}, edgelabel #{label}"
         
     | 
| 
      
 1196 
     | 
    
         
            +
                  parent.remove_child(node, label)
         
     | 
| 
      
 1197 
     | 
    
         
            +
                end
         
     | 
| 
      
 1198 
     | 
    
         
            +
                # delete outgoing edge for deleted node
         
     | 
| 
      
 1199 
     | 
    
         
            +
                node.each_child_with_edgelabel { |label, child|
         
     | 
| 
      
 1200 
     | 
    
         
            +
                  child.remove_parent(node, label)
         
     | 
| 
      
 1201 
     | 
    
         
            +
            #      STDERR.puts "  Removing link to child #{child.id}"
         
     | 
| 
      
 1202 
     | 
    
         
            +
                }
         
     | 
| 
      
 1203 
     | 
    
         
            +
                # glue deleted node's children to its parent    
         
     | 
| 
      
 1204 
     | 
    
         
            +
                if pair
         
     | 
| 
      
 1205 
     | 
    
         
            +
                  plabel, parent = pair      
         
     | 
| 
      
 1206 
     | 
    
         
            +
                  node.each_child_with_edgelabel {|clabel,child|
         
     | 
| 
      
 1207 
     | 
    
         
            +
                    parent.add_child(child, clabel)
         
     | 
| 
      
 1208 
     | 
    
         
            +
                  }
         
     | 
| 
      
 1209 
     | 
    
         
            +
            #      STDERR.puts "Parent now has children "+node.parent.children.map {|c| c.id}.join(" ")
         
     | 
| 
      
 1210 
     | 
    
         
            +
                end
         
     | 
| 
      
 1211 
     | 
    
         
            +
              end
         
     | 
| 
      
 1212 
     | 
    
         
            +
             
     | 
| 
      
 1213 
     | 
    
         
            +
              ######################
         
     | 
| 
      
 1214 
     | 
    
         
            +
              protected
         
     | 
| 
      
 1215 
     | 
    
         
            +
                
         
     | 
| 
      
 1216 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1217 
     | 
    
         
            +
              def get_xml_ofchildren()
         
     | 
| 
      
 1218 
     | 
    
         
            +
                string = ""
         
     | 
| 
      
 1219 
     | 
    
         
            +
             
     | 
| 
      
 1220 
     | 
    
         
            +
                string << "<terminals>\n"
         
     | 
| 
      
 1221 
     | 
    
         
            +
                each_terminal_sorted { |t|
         
     | 
| 
      
 1222 
     | 
    
         
            +
                  string << t.get()
         
     | 
| 
      
 1223 
     | 
    
         
            +
                }
         
     | 
| 
      
 1224 
     | 
    
         
            +
                string << "</terminals>\n"
         
     | 
| 
      
 1225 
     | 
    
         
            +
             
     | 
| 
      
 1226 
     | 
    
         
            +
                string << "<nonterminals>\n"
         
     | 
| 
      
 1227 
     | 
    
         
            +
                each_nonterminal { |nt|
         
     | 
| 
      
 1228 
     | 
    
         
            +
                  string << nt.get()
         
     | 
| 
      
 1229 
     | 
    
         
            +
                }
         
     | 
| 
      
 1230 
     | 
    
         
            +
                string << "</nonterminals>\n"
         
     | 
| 
      
 1231 
     | 
    
         
            +
             
     | 
| 
      
 1232 
     | 
    
         
            +
                return string
         
     | 
| 
      
 1233 
     | 
    
         
            +
                
         
     | 
| 
      
 1234 
     | 
    
         
            +
              end
         
     | 
| 
      
 1235 
     | 
    
         
            +
             
     | 
| 
      
 1236 
     | 
    
         
            +
              def make_nodes(xml_obj,  # RegXML object
         
     | 
| 
      
 1237 
     | 
    
         
            +
                             expected_obj_name, # string
         
     | 
| 
      
 1238 
     | 
    
         
            +
                             where, # string
         
     | 
| 
      
 1239 
     | 
    
         
            +
                             all_children_kith = nil) # object: if non-nil,
         
     | 
| 
      
 1240 
     | 
    
         
            +
                                                      # keep all children of the new nodes
         
     | 
| 
      
 1241 
     | 
    
         
            +
                                                      # as kith" 
         
     | 
| 
      
 1242 
     | 
    
         
            +
                
         
     | 
| 
      
 1243 
     | 
    
         
            +
                xml_obj.children_and_text.each { |elt|
         
     | 
| 
      
 1244 
     | 
    
         
            +
             
     | 
| 
      
 1245 
     | 
    
         
            +
                  if elt.name == expected_obj_name
         
     | 
| 
      
 1246 
     | 
    
         
            +
                    # this is the kind of child we were expecting to see
         
     | 
| 
      
 1247 
     | 
    
         
            +
                    n = SynNode.new(elt)
         
     | 
| 
      
 1248 
     | 
    
         
            +
                    @node[n.id] = n
         
     | 
| 
      
 1249 
     | 
    
         
            +
             
     | 
| 
      
 1250 
     | 
    
         
            +
                    if all_children_kith
         
     | 
| 
      
 1251 
     | 
    
         
            +
                      elt.children_and_text.each { |elt_child|
         
     | 
| 
      
 1252 
     | 
    
         
            +
                        n.add_kith(elt_child)
         
     | 
| 
      
 1253 
     | 
    
         
            +
                      }
         
     | 
| 
      
 1254 
     | 
    
         
            +
                    end
         
     | 
| 
      
 1255 
     | 
    
         
            +
                    
         
     | 
| 
      
 1256 
     | 
    
         
            +
                  else
         
     | 
| 
      
 1257 
     | 
    
         
            +
                    warn_child_ignored(where, elt)
         
     | 
| 
      
 1258 
     | 
    
         
            +
                  end
         
     | 
| 
      
 1259 
     | 
    
         
            +
                }
         
     | 
| 
      
 1260 
     | 
    
         
            +
              end
         
     | 
| 
      
 1261 
     | 
    
         
            +
              
         
     | 
| 
      
 1262 
     | 
    
         
            +
              def syn_add_children(node,
         
     | 
| 
      
 1263 
     | 
    
         
            +
                                   xml_obj)
         
     | 
| 
      
 1264 
     | 
    
         
            +
                unless node
         
     | 
| 
      
 1265 
     | 
    
         
            +
                  raise "Shouldn't be here"
         
     | 
| 
      
 1266 
     | 
    
         
            +
                end
         
     | 
| 
      
 1267 
     | 
    
         
            +
                
         
     | 
| 
      
 1268 
     | 
    
         
            +
                xml_obj.children_and_text.each { |edge|
         
     | 
| 
      
 1269 
     | 
    
         
            +
             
     | 
| 
      
 1270 
     | 
    
         
            +
                  if ["edge", "part"].include? edge.name()
         
     | 
| 
      
 1271 
     | 
    
         
            +
             
     | 
| 
      
 1272 
     | 
    
         
            +
                    # add an edge to this child,
         
     | 
| 
      
 1273 
     | 
    
         
            +
                    # retrieve the node with the given ID from id_to_node
         
     | 
| 
      
 1274 
     | 
    
         
            +
                    child = @node[SalsaTigerXmlNode.xmlel_id(edge)]
         
     | 
| 
      
 1275 
     | 
    
         
            +
                    unless child
         
     | 
| 
      
 1276 
     | 
    
         
            +
                      raise "Sentence #{@sentence_id}: I cannot find a node for " + edge.to_s()
         
     | 
| 
      
 1277 
     | 
    
         
            +
                    end
         
     | 
| 
      
 1278 
     | 
    
         
            +
                    
         
     | 
| 
      
 1279 
     | 
    
         
            +
                    edgelabel = edge.attributes()["label"]
         
     | 
| 
      
 1280 
     | 
    
         
            +
                    node.add_child(child, edgelabel)
         
     | 
| 
      
 1281 
     | 
    
         
            +
             
     | 
| 
      
 1282 
     | 
    
         
            +
                  elsif edge.name() == "other_edge"
         
     | 
| 
      
 1283 
     | 
    
         
            +
                    # add link to this node,
         
     | 
| 
      
 1284 
     | 
    
         
            +
                    # retrieve the node with the given ID from id_to_node
         
     | 
| 
      
 1285 
     | 
    
         
            +
                    child = @node[SalsaTigerXmlNode.xmlel_id(edge)]
         
     | 
| 
      
 1286 
     | 
    
         
            +
                    unless child
         
     | 
| 
      
 1287 
     | 
    
         
            +
                      raise "Sentence #{@sentence_id}: I cannot find a node for other_edge #{SalsaTigerXmlNode.xmlel_id(edge)} : " + edge.to_s()
         
     | 
| 
      
 1288 
     | 
    
         
            +
                    end
         
     | 
| 
      
 1289 
     | 
    
         
            +
                    
         
     | 
| 
      
 1290 
     | 
    
         
            +
                    attributes = edge.attributes()
         
     | 
| 
      
 1291 
     | 
    
         
            +
                    if attributes
         
     | 
| 
      
 1292 
     | 
    
         
            +
                      edgelabel = attributes.delete("label")
         
     | 
| 
      
 1293 
     | 
    
         
            +
                    else
         
     | 
| 
      
 1294 
     | 
    
         
            +
                      edgelabel = nil
         
     | 
| 
      
 1295 
     | 
    
         
            +
                    end
         
     | 
| 
      
 1296 
     | 
    
         
            +
                    node.add_link(child, edgelabel, attributes)
         
     | 
| 
      
 1297 
     | 
    
         
            +
             
     | 
| 
      
 1298 
     | 
    
         
            +
                  else
         
     | 
| 
      
 1299 
     | 
    
         
            +
                    # something other than an edge
         
     | 
| 
      
 1300 
     | 
    
         
            +
                    # keep for output
         
     | 
| 
      
 1301 
     | 
    
         
            +
                    node.add_kith(edge)
         
     | 
| 
      
 1302 
     | 
    
         
            +
                  end
         
     | 
| 
      
 1303 
     | 
    
         
            +
                }
         
     | 
| 
      
 1304 
     | 
    
         
            +
              end
         
     | 
| 
      
 1305 
     | 
    
         
            +
            end
         
     | 
| 
      
 1306 
     | 
    
         
            +
             
     | 
| 
      
 1307 
     | 
    
         
            +
            #############
         
     | 
| 
      
 1308 
     | 
    
         
            +
            class SalsaTigerSentenceSem < XMLNode
         
     | 
| 
      
 1309 
     | 
    
         
            +
             
     | 
| 
      
 1310 
     | 
    
         
            +
              attr_reader :node
         
     | 
| 
      
 1311 
     | 
    
         
            +
             
     | 
| 
      
 1312 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1313 
     | 
    
         
            +
              def SalsaTigerSentenceSem.get_splitwords(xml_obj)
         
     | 
| 
      
 1314 
     | 
    
         
            +
                return xml_obj.children_and_text.detect { |child|
         
     | 
| 
      
 1315 
     | 
    
         
            +
                  child.name == "splitwords"
         
     | 
| 
      
 1316 
     | 
    
         
            +
                }
         
     | 
| 
      
 1317 
     | 
    
         
            +
              end
         
     | 
| 
      
 1318 
     | 
    
         
            +
             
     | 
| 
      
 1319 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1320 
     | 
    
         
            +
              def initialize(xml_obj,      # RegXML object  
         
     | 
| 
      
 1321 
     | 
    
         
            +
                             sentence_id,  # string: sentence ID
         
     | 
| 
      
 1322 
     | 
    
         
            +
                             id_to_node)   # hash: syn_node_id(string) -> SynNode object
         
     | 
| 
      
 1323 
     | 
    
         
            +
             
     | 
| 
      
 1324 
     | 
    
         
            +
                # global data:
         
     | 
| 
      
 1325 
     | 
    
         
            +
                # node: hash node_id -> XMLNode object
         
     | 
| 
      
 1326 
     | 
    
         
            +
                #       maps node IDs to the nodes with that ID
         
     | 
| 
      
 1327 
     | 
    
         
            +
                # frame_id, uspframe_id, uspfe_id: arrays of node IDs,
         
     | 
| 
      
 1328 
     | 
    
         
            +
                #   listing all frame nodes, frame underspecification nodes,
         
     | 
| 
      
 1329 
     | 
    
         
            +
                #   and FE underspecification nodes respectively
         
     | 
| 
      
 1330 
     | 
    
         
            +
                # globals: array of RegXML objects, each representing one sentence flag
         
     | 
| 
      
 1331 
     | 
    
         
            +
                @node = Hash.new
         
     | 
| 
      
 1332 
     | 
    
         
            +
                @frame_id = Array.new
         
     | 
| 
      
 1333 
     | 
    
         
            +
                @uspframe_id = Array.new
         
     | 
| 
      
 1334 
     | 
    
         
            +
                @uspfe_id = Array.new
         
     | 
| 
      
 1335 
     | 
    
         
            +
                @globals = Array.new
         
     | 
| 
      
 1336 
     | 
    
         
            +
             
     | 
| 
      
 1337 
     | 
    
         
            +
                if xml_obj
         
     | 
| 
      
 1338 
     | 
    
         
            +
                  # we actually have semantic information.
         
     | 
| 
      
 1339 
     | 
    
         
            +
                  # read it.
         
     | 
| 
      
 1340 
     | 
    
         
            +
             
     | 
| 
      
 1341 
     | 
    
         
            +
                  super(xml_obj.name, xml_obj.attributes, sentence_id + "_sem", false)
         
     | 
| 
      
 1342 
     | 
    
         
            +
             
     | 
| 
      
 1343 
     | 
    
         
            +
                  globals_obj = frames_obj = usp_obj = nil
         
     | 
| 
      
 1344 
     | 
    
         
            +
             
     | 
| 
      
 1345 
     | 
    
         
            +
                  xml_obj.children_and_text.each { |obj|
         
     | 
| 
      
 1346 
     | 
    
         
            +
                    case obj.name
         
     | 
| 
      
 1347 
     | 
    
         
            +
                    when "globals"
         
     | 
| 
      
 1348 
     | 
    
         
            +
                      globals_obj = obj
         
     | 
| 
      
 1349 
     | 
    
         
            +
                    when "frames"
         
     | 
| 
      
 1350 
     | 
    
         
            +
                      frames_obj = obj
         
     | 
| 
      
 1351 
     | 
    
         
            +
                    when "usp"
         
     | 
| 
      
 1352 
     | 
    
         
            +
                      usp_obj = obj
         
     | 
| 
      
 1353 
     | 
    
         
            +
                    else
         
     | 
| 
      
 1354 
     | 
    
         
            +
                      add_kith(obj)
         
     | 
| 
      
 1355 
     | 
    
         
            +
                    end
         
     | 
| 
      
 1356 
     | 
    
         
            +
                  }
         
     | 
| 
      
 1357 
     | 
    
         
            +
                  
         
     | 
| 
      
 1358 
     | 
    
         
            +
                  # handle globals
         
     | 
| 
      
 1359 
     | 
    
         
            +
                  if globals_obj
         
     | 
| 
      
 1360 
     | 
    
         
            +
                    globals_obj.children_and_text.each { |obj|
         
     | 
| 
      
 1361 
     | 
    
         
            +
                      @globals << obj
         
     | 
| 
      
 1362 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1363 
     | 
    
         
            +
                  end
         
     | 
| 
      
 1364 
     | 
    
         
            +
             
     | 
| 
      
 1365 
     | 
    
         
            +
                  # index frames
         
     | 
| 
      
 1366 
     | 
    
         
            +
                  if frames_obj
         
     | 
| 
      
 1367 
     | 
    
         
            +
                    frames_obj.children_and_text.each { |frame|
         
     | 
| 
      
 1368 
     | 
    
         
            +
                      unless frame.name() == "frame"
         
     | 
| 
      
 1369 
     | 
    
         
            +
                        warn_child_ignored("s/sem/frames/", frame)
         
     | 
| 
      
 1370 
     | 
    
         
            +
                        next
         
     | 
| 
      
 1371 
     | 
    
         
            +
                      end
         
     | 
| 
      
 1372 
     | 
    
         
            +
                      
         
     | 
| 
      
 1373 
     | 
    
         
            +
                      # make a node for the frame.
         
     | 
| 
      
 1374 
     | 
    
         
            +
                      node = FrameNode.new(frame)
         
     | 
| 
      
 1375 
     | 
    
         
            +
                      semnode_add_flags(node, frame)
         
     | 
| 
      
 1376 
     | 
    
         
            +
                      @node[node.id] = node
         
     | 
| 
      
 1377 
     | 
    
         
            +
                      @frame_id << node.id
         
     | 
| 
      
 1378 
     | 
    
         
            +
                      # add FEs
         
     | 
| 
      
 1379 
     | 
    
         
            +
                      frame_add_children(node, frame, id_to_node)
         
     | 
| 
      
 1380 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1381 
     | 
    
         
            +
                  end
         
     | 
| 
      
 1382 
     | 
    
         
            +
             
     | 
| 
      
 1383 
     | 
    
         
            +
                  # index underspecification
         
     | 
| 
      
 1384 
     | 
    
         
            +
                  if usp_obj
         
     | 
| 
      
 1385 
     | 
    
         
            +
                    usp_obj.children_and_text.each { |uspframe_or_fe|
         
     | 
| 
      
 1386 
     | 
    
         
            +
                      case uspframe_or_fe.name
         
     | 
| 
      
 1387 
     | 
    
         
            +
                      when "uspframes"
         
     | 
| 
      
 1388 
     | 
    
         
            +
                        initialize_usp(uspframe_or_fe, "frame")            
         
     | 
| 
      
 1389 
     | 
    
         
            +
                      when "uspfes"
         
     | 
| 
      
 1390 
     | 
    
         
            +
                        initialize_usp(uspframe_or_fe, "fe")            
         
     | 
| 
      
 1391 
     | 
    
         
            +
             
     | 
| 
      
 1392 
     | 
    
         
            +
                      else
         
     | 
| 
      
 1393 
     | 
    
         
            +
                        warn_child_ignored("s/sem/usp/", uspframe_or_fe)
         
     | 
| 
      
 1394 
     | 
    
         
            +
                      end
         
     | 
| 
      
 1395 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1396 
     | 
    
         
            +
                  end
         
     | 
| 
      
 1397 
     | 
    
         
            +
             
     | 
| 
      
 1398 
     | 
    
         
            +
                else
         
     | 
| 
      
 1399 
     | 
    
         
            +
                  # we have no semantic information
         
     | 
| 
      
 1400 
     | 
    
         
            +
                  # record it anyway
         
     | 
| 
      
 1401 
     | 
    
         
            +
             
     | 
| 
      
 1402 
     | 
    
         
            +
                  super("sem", {}, sentence_id + "_sem", false)
         
     | 
| 
      
 1403 
     | 
    
         
            +
                end
         
     | 
| 
      
 1404 
     | 
    
         
            +
              end
         
     | 
| 
      
 1405 
     | 
    
         
            +
              
         
     | 
| 
      
 1406 
     | 
    
         
            +
              ################################################3
         
     | 
| 
      
 1407 
     | 
    
         
            +
              # access methods
         
     | 
| 
      
 1408 
     | 
    
         
            +
             
     | 
| 
      
 1409 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1410 
     | 
    
         
            +
              def each_frame 
         
     | 
| 
      
 1411 
     | 
    
         
            +
                @frame_id.each { |node_id|
         
     | 
| 
      
 1412 
     | 
    
         
            +
                  yield @node[node_id]
         
     | 
| 
      
 1413 
     | 
    
         
            +
                }
         
     | 
| 
      
 1414 
     | 
    
         
            +
              end
         
     | 
| 
      
 1415 
     | 
    
         
            +
             
     | 
| 
      
 1416 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1417 
     | 
    
         
            +
              def frames
         
     | 
| 
      
 1418 
     | 
    
         
            +
                return @frame_id.map { |node_id| @node[node_id] }
         
     | 
| 
      
 1419 
     | 
    
         
            +
              end
         
     | 
| 
      
 1420 
     | 
    
         
            +
             
     | 
| 
      
 1421 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1422 
     | 
    
         
            +
              def each_usp_frameblock
         
     | 
| 
      
 1423 
     | 
    
         
            +
                @uspframe_id.each { |node_id|
         
     | 
| 
      
 1424 
     | 
    
         
            +
                  yield @node[node_id]
         
     | 
| 
      
 1425 
     | 
    
         
            +
                }
         
     | 
| 
      
 1426 
     | 
    
         
            +
              end
         
     | 
| 
      
 1427 
     | 
    
         
            +
             
     | 
| 
      
 1428 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1429 
     | 
    
         
            +
              def usp_frameblocks()
         
     | 
| 
      
 1430 
     | 
    
         
            +
                return @uspframe_id.map { |node_id| @node[node_id] }
         
     | 
| 
      
 1431 
     | 
    
         
            +
              end
         
     | 
| 
      
 1432 
     | 
    
         
            +
             
     | 
| 
      
 1433 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1434 
     | 
    
         
            +
              def each_usp_feblock
         
     | 
| 
      
 1435 
     | 
    
         
            +
                @uspfe_id.each { |node_id|
         
     | 
| 
      
 1436 
     | 
    
         
            +
                  yield @node[node_id]
         
     | 
| 
      
 1437 
     | 
    
         
            +
                }
         
     | 
| 
      
 1438 
     | 
    
         
            +
              end
         
     | 
| 
      
 1439 
     | 
    
         
            +
             
     | 
| 
      
 1440 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1441 
     | 
    
         
            +
              def usp_feblocks()
         
     | 
| 
      
 1442 
     | 
    
         
            +
                return @uspfe_id.map { |node_id| @node[node_id] }
         
     | 
| 
      
 1443 
     | 
    
         
            +
              end
         
     | 
| 
      
 1444 
     | 
    
         
            +
             
     | 
| 
      
 1445 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1446 
     | 
    
         
            +
              def flags
         
     | 
| 
      
 1447 
     | 
    
         
            +
                return @globals.map { |xml_obj|
         
     | 
| 
      
 1448 
     | 
    
         
            +
                  { "type" => xml_obj.attributes["type"],
         
     | 
| 
      
 1449 
     | 
    
         
            +
                   "param" => xml_obj.attributes["param"],
         
     | 
| 
      
 1450 
     | 
    
         
            +
                   "text" => xml_obj.children_and_text.map { |c| c.to_s }.join
         
     | 
| 
      
 1451 
     | 
    
         
            +
                  }
         
     | 
| 
      
 1452 
     | 
    
         
            +
                }
         
     | 
| 
      
 1453 
     | 
    
         
            +
              end
         
     | 
| 
      
 1454 
     | 
    
         
            +
             
     | 
| 
      
 1455 
     | 
    
         
            +
              ################################################3
         
     | 
| 
      
 1456 
     | 
    
         
            +
              # adding and removing things
         
     | 
| 
      
 1457 
     | 
    
         
            +
             
     | 
| 
      
 1458 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1459 
     | 
    
         
            +
              def add_frame(sentid,  # string: sentence ID
         
     | 
| 
      
 1460 
     | 
    
         
            +
                            name,    # string: name of the frame
         
     | 
| 
      
 1461 
     | 
    
         
            +
                            sem_id = nil) # string: ID for the new node
         
     | 
| 
      
 1462 
     | 
    
         
            +
             
     | 
| 
      
 1463 
     | 
    
         
            +
                # make a node for the frame
         
     | 
| 
      
 1464 
     | 
    
         
            +
                if sem_id
         
     | 
| 
      
 1465 
     | 
    
         
            +
                  frameid = sem_id
         
     | 
| 
      
 1466 
     | 
    
         
            +
                else
         
     | 
| 
      
 1467 
     | 
    
         
            +
                  frameid = sentid + "_f" + Time.new().to_f.to_s
         
     | 
| 
      
 1468 
     | 
    
         
            +
                end
         
     | 
| 
      
 1469 
     | 
    
         
            +
                n = FrameNode.new(RegXML.new("<frame id=\"#{frameid}\" name=\"#{name}\"/>"))
         
     | 
| 
      
 1470 
     | 
    
         
            +
                @node[n.id] = n
         
     | 
| 
      
 1471 
     | 
    
         
            +
                @frame_id << n.id
         
     | 
| 
      
 1472 
     | 
    
         
            +
             
     | 
| 
      
 1473 
     | 
    
         
            +
                return n
         
     | 
| 
      
 1474 
     | 
    
         
            +
              end
         
     | 
| 
      
 1475 
     | 
    
         
            +
             
     | 
| 
      
 1476 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1477 
     | 
    
         
            +
              def remove_frame(frame_node)
         
     | 
| 
      
 1478 
     | 
    
         
            +
                @node.delete(frame_node.id)
         
     | 
| 
      
 1479 
     | 
    
         
            +
                @frame_id.delete(frame_node.id)
         
     | 
| 
      
 1480 
     | 
    
         
            +
              end
         
     | 
| 
      
 1481 
     | 
    
         
            +
             
     | 
| 
      
 1482 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1483 
     | 
    
         
            +
              def add_fe(frame_node, # FrameNode
         
     | 
| 
      
 1484 
     | 
    
         
            +
                         fe_name,    # string: name of new FE
         
     | 
| 
      
 1485 
     | 
    
         
            +
                         fe_children, # array:SynNode, children of new FE
         
     | 
| 
      
 1486 
     | 
    
         
            +
                         sem_id = nil) # optional: ID of new FE
         
     | 
| 
      
 1487 
     | 
    
         
            +
             
     | 
| 
      
 1488 
     | 
    
         
            +
             
     | 
| 
      
 1489 
     | 
    
         
            +
                new_fe = frame_node.add_fe(fe_name, fe_children, sem_id)
         
     | 
| 
      
 1490 
     | 
    
         
            +
                @node[new_fe.id] = new_fe
         
     | 
| 
      
 1491 
     | 
    
         
            +
                return new_fe
         
     | 
| 
      
 1492 
     | 
    
         
            +
              end
         
     | 
| 
      
 1493 
     | 
    
         
            +
             
     | 
| 
      
 1494 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1495 
     | 
    
         
            +
              def remove_fe(fe_node)
         
     | 
| 
      
 1496 
     | 
    
         
            +
                @node.delete(fe_node.id)
         
     | 
| 
      
 1497 
     | 
    
         
            +
                fe_node.parent.remove_child(fe_node)
         
     | 
| 
      
 1498 
     | 
    
         
            +
              end
         
     | 
| 
      
 1499 
     | 
    
         
            +
             
     | 
| 
      
 1500 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1501 
     | 
    
         
            +
              def add_usp(frame_or_fe)    # string: "frame" or "fe"
         
     | 
| 
      
 1502 
     | 
    
         
            +
             
     | 
| 
      
 1503 
     | 
    
         
            +
                n = UspNode.new(RegXML.new("<uspblock/>"), frame_or_fe)
         
     | 
| 
      
 1504 
     | 
    
         
            +
                @node[n.id] = n
         
     | 
| 
      
 1505 
     | 
    
         
            +
                case frame_or_fe
         
     | 
| 
      
 1506 
     | 
    
         
            +
                when "frame"
         
     | 
| 
      
 1507 
     | 
    
         
            +
                  @uspframe_id << n.id
         
     | 
| 
      
 1508 
     | 
    
         
            +
                when "fe"
         
     | 
| 
      
 1509 
     | 
    
         
            +
                  @uspfe_id << n.id
         
     | 
| 
      
 1510 
     | 
    
         
            +
                else
         
     | 
| 
      
 1511 
     | 
    
         
            +
                  raise "Shouldn't be here"
         
     | 
| 
      
 1512 
     | 
    
         
            +
                end
         
     | 
| 
      
 1513 
     | 
    
         
            +
             
     | 
| 
      
 1514 
     | 
    
         
            +
                return n
         
     | 
| 
      
 1515 
     | 
    
         
            +
              end
         
     | 
| 
      
 1516 
     | 
    
         
            +
             
     | 
| 
      
 1517 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1518 
     | 
    
         
            +
              def remove_usp(usp_node)
         
     | 
| 
      
 1519 
     | 
    
         
            +
                usp_node.children.each { |child|
         
     | 
| 
      
 1520 
     | 
    
         
            +
                  usp_node.remove_child(child)
         
     | 
| 
      
 1521 
     | 
    
         
            +
                }
         
     | 
| 
      
 1522 
     | 
    
         
            +
                @node.delete(usp_node.id)
         
     | 
| 
      
 1523 
     | 
    
         
            +
                case usp_node.i_am
         
     | 
| 
      
 1524 
     | 
    
         
            +
                when "frame"
         
     | 
| 
      
 1525 
     | 
    
         
            +
                  @uspframe_id.delete(usp_node.id)
         
     | 
| 
      
 1526 
     | 
    
         
            +
                when "fe"
         
     | 
| 
      
 1527 
     | 
    
         
            +
                  @uspfe_id.delete(usp_node.id)
         
     | 
| 
      
 1528 
     | 
    
         
            +
                else
         
     | 
| 
      
 1529 
     | 
    
         
            +
                  raise "Shouldn't be here"
         
     | 
| 
      
 1530 
     | 
    
         
            +
                end
         
     | 
| 
      
 1531 
     | 
    
         
            +
              end
         
     | 
| 
      
 1532 
     | 
    
         
            +
             
     | 
| 
      
 1533 
     | 
    
         
            +
             
     | 
| 
      
 1534 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1535 
     | 
    
         
            +
              def add_child(arg1, arg2)
         
     | 
| 
      
 1536 
     | 
    
         
            +
                raise "Not implemented for this class"
         
     | 
| 
      
 1537 
     | 
    
         
            +
              end
         
     | 
| 
      
 1538 
     | 
    
         
            +
             
     | 
| 
      
 1539 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1540 
     | 
    
         
            +
              def remove_child(arg1, arg2)
         
     | 
| 
      
 1541 
     | 
    
         
            +
                raise "Not implemented for this class"
         
     | 
| 
      
 1542 
     | 
    
         
            +
              end
         
     | 
| 
      
 1543 
     | 
    
         
            +
             
     | 
| 
      
 1544 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1545 
     | 
    
         
            +
              def add_flag(type, param=nil, text=nil)
         
     | 
| 
      
 1546 
     | 
    
         
            +
            #    unless ["REEXAMINE", "WRONGSUBCORPUS", "INTERESTING", "LATER"].include? type
         
     | 
| 
      
 1547 
     | 
    
         
            +
            #      raise "add_flag: unknown type "+type
         
     | 
| 
      
 1548 
     | 
    
         
            +
            #    end
         
     | 
| 
      
 1549 
     | 
    
         
            +
             
     | 
| 
      
 1550 
     | 
    
         
            +
                newglob = "<global type=\'#{xml_secure_val(type)}\'"
         
     | 
| 
      
 1551 
     | 
    
         
            +
                if param
         
     | 
| 
      
 1552 
     | 
    
         
            +
                  newglob << " param=\'#{xml_secure_val(param)}\'"
         
     | 
| 
      
 1553 
     | 
    
         
            +
                end
         
     | 
| 
      
 1554 
     | 
    
         
            +
                if text
         
     | 
| 
      
 1555 
     | 
    
         
            +
                  newglob << "> #{text} </global>"
         
     | 
| 
      
 1556 
     | 
    
         
            +
                else
         
     | 
| 
      
 1557 
     | 
    
         
            +
                  newglob << "/>"
         
     | 
| 
      
 1558 
     | 
    
         
            +
                end
         
     | 
| 
      
 1559 
     | 
    
         
            +
             
     | 
| 
      
 1560 
     | 
    
         
            +
                newglob = RegXML.new(newglob)
         
     | 
| 
      
 1561 
     | 
    
         
            +
                @globals << newglob
         
     | 
| 
      
 1562 
     | 
    
         
            +
                return newglob
         
     | 
| 
      
 1563 
     | 
    
         
            +
              end
         
     | 
| 
      
 1564 
     | 
    
         
            +
             
     | 
| 
      
 1565 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1566 
     | 
    
         
            +
              def remove_flag(type, param=nil, text=nil)
         
     | 
| 
      
 1567 
     | 
    
         
            +
                
         
     | 
| 
      
 1568 
     | 
    
         
            +
                remove_ix = nil
         
     | 
| 
      
 1569 
     | 
    
         
            +
                @globals.each_with_index { |glob,ix|
         
     | 
| 
      
 1570 
     | 
    
         
            +
                  if glob.attributes("type") == type
         
     | 
| 
      
 1571 
     | 
    
         
            +
                    if param.nil? or glob.attributes("param") == param
         
     | 
| 
      
 1572 
     | 
    
         
            +
                      if text.nil? or glob.children_and_text.map { |c| c.to_s }.join == text
         
     | 
| 
      
 1573 
     | 
    
         
            +
                        # found it
         
     | 
| 
      
 1574 
     | 
    
         
            +
                        remove_ix = ix
         
     | 
| 
      
 1575 
     | 
    
         
            +
                        break
         
     | 
| 
      
 1576 
     | 
    
         
            +
                      end
         
     | 
| 
      
 1577 
     | 
    
         
            +
                    end
         
     | 
| 
      
 1578 
     | 
    
         
            +
                  end
         
     | 
| 
      
 1579 
     | 
    
         
            +
                }
         
     | 
| 
      
 1580 
     | 
    
         
            +
             
     | 
| 
      
 1581 
     | 
    
         
            +
                if remove_ix
         
     | 
| 
      
 1582 
     | 
    
         
            +
                 return  @globals.delete_at(remove_ix)
         
     | 
| 
      
 1583 
     | 
    
         
            +
                else
         
     | 
| 
      
 1584 
     | 
    
         
            +
                  return nil
         
     | 
| 
      
 1585 
     | 
    
         
            +
                end
         
     | 
| 
      
 1586 
     | 
    
         
            +
              end
         
     | 
| 
      
 1587 
     | 
    
         
            +
             
     | 
| 
      
 1588 
     | 
    
         
            +
              ############################3
         
     | 
| 
      
 1589 
     | 
    
         
            +
              protected
         
     | 
| 
      
 1590 
     | 
    
         
            +
             
     | 
| 
      
 1591 
     | 
    
         
            +
              def get_xml_ofchildren()
         
     | 
| 
      
 1592 
     | 
    
         
            +
                string = ""
         
     | 
| 
      
 1593 
     | 
    
         
            +
             
     | 
| 
      
 1594 
     | 
    
         
            +
                # globals
         
     | 
| 
      
 1595 
     | 
    
         
            +
                string << "<globals>\n"
         
     | 
| 
      
 1596 
     | 
    
         
            +
                @globals.each { |glob|
         
     | 
| 
      
 1597 
     | 
    
         
            +
                  string << glob.to_s + "\n"
         
     | 
| 
      
 1598 
     | 
    
         
            +
                }
         
     | 
| 
      
 1599 
     | 
    
         
            +
                string << "</globals>\n"
         
     | 
| 
      
 1600 
     | 
    
         
            +
             
     | 
| 
      
 1601 
     | 
    
         
            +
                # frames
         
     | 
| 
      
 1602 
     | 
    
         
            +
                string << "<frames>\n"
         
     | 
| 
      
 1603 
     | 
    
         
            +
                each_frame { |frame_node|
         
     | 
| 
      
 1604 
     | 
    
         
            +
                  string << frame_node.get()
         
     | 
| 
      
 1605 
     | 
    
         
            +
                }
         
     | 
| 
      
 1606 
     | 
    
         
            +
                string << "</frames>\n"
         
     | 
| 
      
 1607 
     | 
    
         
            +
             
     | 
| 
      
 1608 
     | 
    
         
            +
                # underspecification
         
     | 
| 
      
 1609 
     | 
    
         
            +
                string << "<usp>\n"
         
     | 
| 
      
 1610 
     | 
    
         
            +
                string << "<uspframes>\n"
         
     | 
| 
      
 1611 
     | 
    
         
            +
                each_usp_frameblock { |block|
         
     | 
| 
      
 1612 
     | 
    
         
            +
                  string << block.get()
         
     | 
| 
      
 1613 
     | 
    
         
            +
                }
         
     | 
| 
      
 1614 
     | 
    
         
            +
                string << "</uspframes>\n"
         
     | 
| 
      
 1615 
     | 
    
         
            +
                string << "<uspfes>\n"
         
     | 
| 
      
 1616 
     | 
    
         
            +
                each_usp_feblock { |block|
         
     | 
| 
      
 1617 
     | 
    
         
            +
                  string << block.get()
         
     | 
| 
      
 1618 
     | 
    
         
            +
                }
         
     | 
| 
      
 1619 
     | 
    
         
            +
                string << "</uspfes>\n"    
         
     | 
| 
      
 1620 
     | 
    
         
            +
                string << "</usp>\n"
         
     | 
| 
      
 1621 
     | 
    
         
            +
             
     | 
| 
      
 1622 
     | 
    
         
            +
                return string
         
     | 
| 
      
 1623 
     | 
    
         
            +
              end
         
     | 
| 
      
 1624 
     | 
    
         
            +
             
     | 
| 
      
 1625 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1626 
     | 
    
         
            +
              def semnode_add_flags(sem_node,  # SemNode object
         
     | 
| 
      
 1627 
     | 
    
         
            +
                                    xml_obj)   # RegXML object
         
     | 
| 
      
 1628 
     | 
    
         
            +
             
     | 
| 
      
 1629 
     | 
    
         
            +
                xml_obj.children_and_text.each { |child|
         
     | 
| 
      
 1630 
     | 
    
         
            +
                  if child.name == "flag"
         
     | 
| 
      
 1631 
     | 
    
         
            +
                    # found a flag, record it
         
     | 
| 
      
 1632 
     | 
    
         
            +
                    name = child.attributes["name"]
         
     | 
| 
      
 1633 
     | 
    
         
            +
                    if name
         
     | 
| 
      
 1634 
     | 
    
         
            +
                      sem_node.add_flag(name)
         
     | 
| 
      
 1635 
     | 
    
         
            +
                    else
         
     | 
| 
      
 1636 
     | 
    
         
            +
                      $stderr.puts "Warning: flag without a name"
         
     | 
| 
      
 1637 
     | 
    
         
            +
                    end
         
     | 
| 
      
 1638 
     | 
    
         
            +
                  end
         
     | 
| 
      
 1639 
     | 
    
         
            +
                }
         
     | 
| 
      
 1640 
     | 
    
         
            +
              end
         
     | 
| 
      
 1641 
     | 
    
         
            +
             
     | 
| 
      
 1642 
     | 
    
         
            +
              def frame_add_children(frame_node, # FrameNode object
         
     | 
| 
      
 1643 
     | 
    
         
            +
                                     xml_obj,    # RegXML object
         
     | 
| 
      
 1644 
     | 
    
         
            +
                                     id_to_node) # hash: syn_node_id(string) -> SynNode object
         
     | 
| 
      
 1645 
     | 
    
         
            +
             
     | 
| 
      
 1646 
     | 
    
         
            +
                xml_obj.children_and_text.each { |fe|
         
     | 
| 
      
 1647 
     | 
    
         
            +
                  case fe.name
         
     | 
| 
      
 1648 
     | 
    
         
            +
                  when "fe", "target"
         
     | 
| 
      
 1649 
     | 
    
         
            +
            #        $stderr.puts "Da: #{fe.name}\n#{fe.to_s}"
         
     | 
| 
      
 1650 
     | 
    
         
            +
             
     | 
| 
      
 1651 
     | 
    
         
            +
                    # make a node for this,
         
     | 
| 
      
 1652 
     | 
    
         
            +
                    # and add it as child of this frame node.
         
     | 
| 
      
 1653 
     | 
    
         
            +
                    fe_node = FeNode.new(fe)
         
     | 
| 
      
 1654 
     | 
    
         
            +
                    @node[fe_node.id] = fe_node
         
     | 
| 
      
 1655 
     | 
    
         
            +
                    frame_node.add_child(fe_node)
         
     | 
| 
      
 1656 
     | 
    
         
            +
             
     | 
| 
      
 1657 
     | 
    
         
            +
                    semnode_add_flags(fe_node, fe)
         
     | 
| 
      
 1658 
     | 
    
         
            +
             
     | 
| 
      
 1659 
     | 
    
         
            +
                    # add the FE's children
         
     | 
| 
      
 1660 
     | 
    
         
            +
                    fe.children_and_text.each { |fechild|
         
     | 
| 
      
 1661 
     | 
    
         
            +
                      case fechild.name
         
     | 
| 
      
 1662 
     | 
    
         
            +
                      when "fenode"
         
     | 
| 
      
 1663 
     | 
    
         
            +
             
     | 
| 
      
 1664 
     | 
    
         
            +
                        syn_node = id_to_node[SalsaTigerXmlNode.xmlel_id(fechild)]
         
     | 
| 
      
 1665 
     | 
    
         
            +
                        if syn_node
         
     | 
| 
      
 1666 
     | 
    
         
            +
                          # normal syntactic node, which the id_to_node mapping knows
         
     | 
| 
      
 1667 
     | 
    
         
            +
                          fe_node.add_child(syn_node, fechild)
         
     | 
| 
      
 1668 
     | 
    
         
            +
                          syn_node.add_sem(fe_node)
         
     | 
| 
      
 1669 
     | 
    
         
            +
             
     | 
| 
      
 1670 
     | 
    
         
            +
                        else
         
     | 
| 
      
 1671 
     | 
    
         
            +
                          # must be a node in a different sentence
         
     | 
| 
      
 1672 
     | 
    
         
            +
                          # make a dummy graph node for it
         
     | 
| 
      
 1673 
     | 
    
         
            +
                          fe_node.add_child(TSSynNode.new(SalsaTigerXmlNode.xmlel_id(fechild)), fechild)
         
     | 
| 
      
 1674 
     | 
    
         
            +
                        end
         
     | 
| 
      
 1675 
     | 
    
         
            +
             
     | 
| 
      
 1676 
     | 
    
         
            +
                      when "flag"
         
     | 
| 
      
 1677 
     | 
    
         
            +
                        # nothing to do, we've handled that already
         
     | 
| 
      
 1678 
     | 
    
         
            +
                      else
         
     | 
| 
      
 1679 
     | 
    
         
            +
                        fe_node.add_kith(fechild)
         
     | 
| 
      
 1680 
     | 
    
         
            +
                      end
         
     | 
| 
      
 1681 
     | 
    
         
            +
                    }
         
     | 
| 
      
 1682 
     | 
    
         
            +
             
     | 
| 
      
 1683 
     | 
    
         
            +
                  when "flag"
         
     | 
| 
      
 1684 
     | 
    
         
            +
                    # nothing to do, wee handled that already
         
     | 
| 
      
 1685 
     | 
    
         
            +
             
     | 
| 
      
 1686 
     | 
    
         
            +
                  else
         
     | 
| 
      
 1687 
     | 
    
         
            +
                    # keep for output
         
     | 
| 
      
 1688 
     | 
    
         
            +
                    frame_node.add_kith(fe)
         
     | 
| 
      
 1689 
     | 
    
         
            +
                  end
         
     | 
| 
      
 1690 
     | 
    
         
            +
                }
         
     | 
| 
      
 1691 
     | 
    
         
            +
              end
         
     | 
| 
      
 1692 
     | 
    
         
            +
             
     | 
| 
      
 1693 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1694 
     | 
    
         
            +
              def initialize_usp(xml_obj,      # RegXML object
         
     | 
| 
      
 1695 
     | 
    
         
            +
                                 frame_or_fe)  # string: "frame" or "fe"
         
     | 
| 
      
 1696 
     | 
    
         
            +
             
     | 
| 
      
 1697 
     | 
    
         
            +
                xml_obj.children_and_text.each { |uspblock|
         
     | 
| 
      
 1698 
     | 
    
         
            +
                  unless uspblock.name == "uspblock"
         
     | 
| 
      
 1699 
     | 
    
         
            +
                    warn_child_ignored("s/sem/usp/uspframe|uspfe", uspblock)
         
     | 
| 
      
 1700 
     | 
    
         
            +
                    next
         
     | 
| 
      
 1701 
     | 
    
         
            +
                  end
         
     | 
| 
      
 1702 
     | 
    
         
            +
             
     | 
| 
      
 1703 
     | 
    
         
            +
                  # node for this underspecified block
         
     | 
| 
      
 1704 
     | 
    
         
            +
                  n = UspNode.new(uspblock, frame_or_fe)
         
     | 
| 
      
 1705 
     | 
    
         
            +
                  @node[n.id] = n
         
     | 
| 
      
 1706 
     | 
    
         
            +
             
     | 
| 
      
 1707 
     | 
    
         
            +
                  case frame_or_fe
         
     | 
| 
      
 1708 
     | 
    
         
            +
                  when "frame"
         
     | 
| 
      
 1709 
     | 
    
         
            +
                    @uspframe_id << n.id
         
     | 
| 
      
 1710 
     | 
    
         
            +
                  when  "fe"
         
     | 
| 
      
 1711 
     | 
    
         
            +
                    @uspfe_id << n.id
         
     | 
| 
      
 1712 
     | 
    
         
            +
                  else
         
     | 
| 
      
 1713 
     | 
    
         
            +
                    raise "Shouldn't be here"
         
     | 
| 
      
 1714 
     | 
    
         
            +
                  end
         
     | 
| 
      
 1715 
     | 
    
         
            +
             
     | 
| 
      
 1716 
     | 
    
         
            +
                  # add its children
         
     | 
| 
      
 1717 
     | 
    
         
            +
                  uspblock.children_and_text.each { |uspitem|
         
     | 
| 
      
 1718 
     | 
    
         
            +
                    unless uspitem.name == "uspitem"
         
     | 
| 
      
 1719 
     | 
    
         
            +
                      warn_child_ignored("s/sem/usp/uspframe|uspfe/uspblock", uspitem)
         
     | 
| 
      
 1720 
     | 
    
         
            +
                      next
         
     | 
| 
      
 1721 
     | 
    
         
            +
                    end
         
     | 
| 
      
 1722 
     | 
    
         
            +
             
     | 
| 
      
 1723 
     | 
    
         
            +
                    usp_id = SalsaTigerXmlNode.xmlel_id(uspitem)
         
     | 
| 
      
 1724 
     | 
    
         
            +
            	usp_id = usp_id.gsub(/.*_s/, "s") 
         
     | 
| 
      
 1725 
     | 
    
         
            +
            	
         
     | 
| 
      
 1726 
     | 
    
         
            +
                    unless @node[usp_id]
         
     | 
| 
      
 1727 
     | 
    
         
            +
                      $stderr.puts "Error: Underspecification: could not find node with ID #{usp_id}. Skipping."
         
     | 
| 
      
 1728 
     | 
    
         
            +
                      next
         
     | 
| 
      
 1729 
     | 
    
         
            +
                    end
         
     | 
| 
      
 1730 
     | 
    
         
            +
                    n.add_child(@node[usp_id])
         
     | 
| 
      
 1731 
     | 
    
         
            +
                  }
         
     | 
| 
      
 1732 
     | 
    
         
            +
                }
         
     | 
| 
      
 1733 
     | 
    
         
            +
              end
         
     | 
| 
      
 1734 
     | 
    
         
            +
            end
         
     | 
| 
      
 1735 
     | 
    
         
            +
             
     | 
| 
      
 1736 
     | 
    
         
            +
             
     | 
| 
      
 1737 
     | 
    
         
            +
            #############
         
     | 
| 
      
 1738 
     | 
    
         
            +
            # class SalsaTigerSentence
         
     | 
| 
      
 1739 
     | 
    
         
            +
            # 
         
     | 
| 
      
 1740 
     | 
    
         
            +
            # offers access methods to a SalsaTigerXML sentence
         
     | 
| 
      
 1741 
     | 
    
         
            +
            # given as a string
         
     | 
| 
      
 1742 
     | 
    
         
            +
            #
         
     | 
| 
      
 1743 
     | 
    
         
            +
            # Nodes of syntactic structure as well as frames and
         
     | 
| 
      
 1744 
     | 
    
         
            +
            # frame elements are kept (and returned) as XMLNode objects, 
         
     | 
| 
      
 1745 
     | 
    
         
            +
            # or more specifically as SynNode, FrameNode and FeNode objects.
         
     | 
| 
      
 1746 
     | 
    
         
            +
            #
         
     | 
| 
      
 1747 
     | 
    
         
            +
            # methods:
         
     | 
| 
      
 1748 
     | 
    
         
            +
            #
         
     | 
| 
      
 1749 
     | 
    
         
            +
            # new      initializes the object
         
     | 
| 
      
 1750 
     | 
    
         
            +
            #
         
     | 
| 
      
 1751 
     | 
    
         
            +
            # id       returns the sentence ID
         
     | 
| 
      
 1752 
     | 
    
         
            +
            # 
         
     | 
| 
      
 1753 
     | 
    
         
            +
            # get      returns the REXML object describing the same sentence
         
     | 
| 
      
 1754 
     | 
    
         
            +
            #          as this object
         
     | 
| 
      
 1755 
     | 
    
         
            +
            #
         
     | 
| 
      
 1756 
     | 
    
         
            +
            # each_terminal  yields each terminal of the sentence in turn.
         
     | 
| 
      
 1757 
     | 
    
         
            +
            #          they are returned as SynNode objects
         
     | 
| 
      
 1758 
     | 
    
         
            +
            #
         
     | 
| 
      
 1759 
     | 
    
         
            +
            # terminals returns all terminal node objects in an array
         
     | 
| 
      
 1760 
     | 
    
         
            +
            #
         
     | 
| 
      
 1761 
     | 
    
         
            +
            # each_terminal_sorted  yields each terminal of the sentence in turn,
         
     | 
| 
      
 1762 
     | 
    
         
            +
            #          making sure the terminal with the lowest ID is returned first.
         
     | 
| 
      
 1763 
     | 
    
         
            +
            #          use this if you need the terminal words in the right order!
         
     | 
| 
      
 1764 
     | 
    
         
            +
            #          nodes are returned as SynNode objects
         
     | 
| 
      
 1765 
     | 
    
         
            +
            #
         
     | 
| 
      
 1766 
     | 
    
         
            +
            # each_nonterminal yields each nonterminal of the sentence in turn.
         
     | 
| 
      
 1767 
     | 
    
         
            +
            #           nodes are returned as SynNode objects
         
     | 
| 
      
 1768 
     | 
    
         
            +
            #
         
     | 
| 
      
 1769 
     | 
    
         
            +
            # each_frame yields each frame of the sentence in turn.
         
     | 
| 
      
 1770 
     | 
    
         
            +
            #           nodes are returned as FrameNode objects
         
     | 
| 
      
 1771 
     | 
    
         
            +
            #
         
     | 
| 
      
 1772 
     | 
    
         
            +
            # frames returns all frame objects in an array
         
     | 
| 
      
 1773 
     | 
    
         
            +
            #
         
     | 
| 
      
 1774 
     | 
    
         
            +
            # each_usp_frameblock 
         
     | 
| 
      
 1775 
     | 
    
         
            +
            #          yields each group of underspecified frames of the sentence
         
     | 
| 
      
 1776 
     | 
    
         
            +
            #          in turn, as an UspNode object. To see the frames involved
         
     | 
| 
      
 1777 
     | 
    
         
            +
            #          in this underspecification, use each_child on the UspNode object
         
     | 
| 
      
 1778 
     | 
    
         
            +
            #
         
     | 
| 
      
 1779 
     | 
    
         
            +
            #
         
     | 
| 
      
 1780 
     | 
    
         
            +
            # usp_frameblocks  returns all groups of underspecified frames as an array
         
     | 
| 
      
 1781 
     | 
    
         
            +
            #          of UspNode objects
         
     | 
| 
      
 1782 
     | 
    
         
            +
            #
         
     | 
| 
      
 1783 
     | 
    
         
            +
            # each_usp_feblock 
         
     | 
| 
      
 1784 
     | 
    
         
            +
            #          yields each group of underspecified frame elements 
         
     | 
| 
      
 1785 
     | 
    
         
            +
            #          of the sentence in turn, as an UspNode object. 
         
     | 
| 
      
 1786 
     | 
    
         
            +
            #          To see the frames involved
         
     | 
| 
      
 1787 
     | 
    
         
            +
            #          in this underspecification, use each_child on the UspNode object
         
     | 
| 
      
 1788 
     | 
    
         
            +
            #
         
     | 
| 
      
 1789 
     | 
    
         
            +
            # usp_feblocks  returns all groups of underspecified frame elements 
         
     | 
| 
      
 1790 
     | 
    
         
            +
            #          as an array of UspNode objects
         
     | 
| 
      
 1791 
     | 
    
         
            +
            #
         
     | 
| 
      
 1792 
     | 
    
         
            +
            #
         
     | 
| 
      
 1793 
     | 
    
         
            +
            # flags     returns a list of the sentence flags, as hashes.
         
     | 
| 
      
 1794 
     | 
    
         
            +
            #           key "type": a string, either REEXAMINE or WRONGSUBCORPUS
         
     | 
| 
      
 1795 
     | 
    
         
            +
            #                       or INTERESTING or LATER
         
     | 
| 
      
 1796 
     | 
    
         
            +
            #           key "param": a string, the parameter. important for 
         
     | 
| 
      
 1797 
     | 
    
         
            +
            #                        REEXAMINE
         
     | 
| 
      
 1798 
     | 
    
         
            +
            #	    key "text": a string, the text of this flag. Will be
         
     | 
| 
      
 1799 
     | 
    
         
            +
            #                       nonempty only for INTERESTING cases
         
     | 
| 
      
 1800 
     | 
    
         
            +
            #
         
     | 
| 
      
 1801 
     | 
    
         
            +
            # syn_roots returns a list of all the roots of the syntactic trees
         
     | 
| 
      
 1802 
     | 
    
         
            +
            #           in this sentence, as node objects. There may be more than
         
     | 
| 
      
 1803 
     | 
    
         
            +
            #           one, unfortunately.
         
     | 
| 
      
 1804 
     | 
    
         
            +
            #
         
     | 
| 
      
 1805 
     | 
    
         
            +
            # add_syn  add a new syntactic node with the given category, word, POS,
         
     | 
| 
      
 1806 
     | 
    
         
            +
            #          returns the new node
         
     | 
| 
      
 1807 
     | 
    
         
            +
            #
         
     | 
| 
      
 1808 
     | 
    
         
            +
            # add_frame add a frame with a given name, returns the new frame node
         
     | 
| 
      
 1809 
     | 
    
         
            +
            #
         
     | 
| 
      
 1810 
     | 
    
         
            +
            # add_usp  add a new underspecification block, either for frames or FEs
         
     | 
| 
      
 1811 
     | 
    
         
            +
            #
         
     | 
| 
      
 1812 
     | 
    
         
            +
            # add_flag  adds a sentence flag to this sentence. 
         
     | 
| 
      
 1813 
     | 
    
         
            +
            #   type: a string, must be REEXAMINE, INTERESTING, WRONGSUBCORPUS,
         
     | 
| 
      
 1814 
     | 
    
         
            +
            #         or LATER
         
     | 
| 
      
 1815 
     | 
    
         
            +
            #   param: optional parameter, a string, describes type of Reexamine
         
     | 
| 
      
 1816 
     | 
    
         
            +
            #          for REEXAMINE-type flags
         
     | 
| 
      
 1817 
     | 
    
         
            +
            #   text:  optional parameter, a string, arbitrary text commenting
         
     | 
| 
      
 1818 
     | 
    
         
            +
            #          on the flag, used mainly with INTERESTING
         
     | 
| 
      
 1819 
     | 
    
         
            +
            #
         
     | 
| 
      
 1820 
     | 
    
         
            +
            # remove_flag removes a sentence flag to this sentence
         
     | 
| 
      
 1821 
     | 
    
         
            +
            #          only removes flag in case of exact match of type, param, and text
         
     | 
| 
      
 1822 
     | 
    
         
            +
            #   type: a string, either REEXAMINE, INTERESTING, WRONGSUBCORPUS,
         
     | 
| 
      
 1823 
     | 
    
         
            +
            #         or LATER
         
     | 
| 
      
 1824 
     | 
    
         
            +
            #   param: optional parameter, a string, describes type of Reexamine
         
     | 
| 
      
 1825 
     | 
    
         
            +
            #          for REEXAMINE-type flags
         
     | 
| 
      
 1826 
     | 
    
         
            +
            #   text:  optional parameter, a string, arbitrary text commenting
         
     | 
| 
      
 1827 
     | 
    
         
            +
            #          on the flag, used mainly with INTERESTING
         
     | 
| 
      
 1828 
     | 
    
         
            +
             
     | 
| 
      
 1829 
     | 
    
         
            +
            class SalsaTigerSentence < XMLNode
         
     | 
| 
      
 1830 
     | 
    
         
            +
             
     | 
| 
      
 1831 
     | 
    
         
            +
              def initialize(string)
         
     | 
| 
      
 1832 
     | 
    
         
            +
                # parse string as an XML element
         
     | 
| 
      
 1833 
     | 
    
         
            +
                xml_obj = RegXML.new(string)
         
     | 
| 
      
 1834 
     | 
    
         
            +
             
     | 
| 
      
 1835 
     | 
    
         
            +
                # initialize this object as an XML node,
         
     | 
| 
      
 1836 
     | 
    
         
            +
                # i.e. remember the outermost element's name, attributes, 
         
     | 
| 
      
 1837 
     | 
    
         
            +
                # and ID, and specify that it's not a text but an XML object
         
     | 
| 
      
 1838 
     | 
    
         
            +
                super(xml_obj.name, xml_obj.attributes, SalsaTigerXmlNode.xmlel_id(xml_obj), false)
         
     | 
| 
      
 1839 
     | 
    
         
            +
             
     | 
| 
      
 1840 
     | 
    
         
            +
                # find XML element "graph",
         
     | 
| 
      
 1841 
     | 
    
         
            +
                # which contains the syntactic info of the sentence.
         
     | 
| 
      
 1842 
     | 
    
         
            +
                # It is a child of the <s> element.
         
     | 
| 
      
 1843 
     | 
    
         
            +
                xml_syn_obj = xml_obj.children_and_text().detect { |thing|
         
     | 
| 
      
 1844 
     | 
    
         
            +
                  thing.name == "graph"
         
     | 
| 
      
 1845 
     | 
    
         
            +
                }
         
     | 
| 
      
 1846 
     | 
    
         
            +
             
     | 
| 
      
 1847 
     | 
    
         
            +
                unless xml_syn_obj
         
     | 
| 
      
 1848 
     | 
    
         
            +
                  # no graph in this sentence -- fake one
         
     | 
| 
      
 1849 
     | 
    
         
            +
                  xml_syn_obj = RegXML.new("<graph/>")
         
     | 
| 
      
 1850 
     | 
    
         
            +
                end
         
     | 
| 
      
 1851 
     | 
    
         
            +
             
     | 
| 
      
 1852 
     | 
    
         
            +
                @syn = SalsaTigerSentenceGraph.new(xml_syn_obj, id)
         
     | 
| 
      
 1853 
     | 
    
         
            +
             
     | 
| 
      
 1854 
     | 
    
         
            +
                # find XML element "sem"
         
     | 
| 
      
 1855 
     | 
    
         
            +
                # which contains the semantic info of the sentence.
         
     | 
| 
      
 1856 
     | 
    
         
            +
                # It is a child of the <s> element.
         
     | 
| 
      
 1857 
     | 
    
         
            +
                xml_sem_obj = xml_obj.children_and_text().detect { |thing|
         
     | 
| 
      
 1858 
     | 
    
         
            +
                  thing.name == "sem"
         
     | 
| 
      
 1859 
     | 
    
         
            +
                }
         
     | 
| 
      
 1860 
     | 
    
         
            +
             
     | 
| 
      
 1861 
     | 
    
         
            +
                unless xml_sem_obj
         
     | 
| 
      
 1862 
     | 
    
         
            +
                  # no semantic info in this sentence -- fake one
         
     | 
| 
      
 1863 
     | 
    
         
            +
                  xml_sem_obj = RegXML.new("<sem/>")
         
     | 
| 
      
 1864 
     | 
    
         
            +
                end
         
     | 
| 
      
 1865 
     | 
    
         
            +
             
     | 
| 
      
 1866 
     | 
    
         
            +
                # add splitword info to @syn element
         
     | 
| 
      
 1867 
     | 
    
         
            +
                @syn.add_splitwords(SalsaTigerSentenceSem.get_splitwords(xml_sem_obj))
         
     | 
| 
      
 1868 
     | 
    
         
            +
                  
         
     | 
| 
      
 1869 
     | 
    
         
            +
                @sem = SalsaTigerSentenceSem.new(xml_sem_obj, id, @syn.node)
         
     | 
| 
      
 1870 
     | 
    
         
            +
             
     | 
| 
      
 1871 
     | 
    
         
            +
                # go through the children of the <s> object again,
         
     | 
| 
      
 1872 
     | 
    
         
            +
                # remembering all children except <graph> and <sem>
         
     | 
| 
      
 1873 
     | 
    
         
            +
                # for later output
         
     | 
| 
      
 1874 
     | 
    
         
            +
                xml_obj.children_and_text.each { |child_or_text|
         
     | 
| 
      
 1875 
     | 
    
         
            +
                  case child_or_text.name
         
     | 
| 
      
 1876 
     | 
    
         
            +
                  when "graph", "sem"
         
     | 
| 
      
 1877 
     | 
    
         
            +
                    # we have handled them already
         
     | 
| 
      
 1878 
     | 
    
         
            +
                  else
         
     | 
| 
      
 1879 
     | 
    
         
            +
                    add_kith(child_or_text)
         
     | 
| 
      
 1880 
     | 
    
         
            +
                  end
         
     | 
| 
      
 1881 
     | 
    
         
            +
                }
         
     | 
| 
      
 1882 
     | 
    
         
            +
             
     | 
| 
      
 1883 
     | 
    
         
            +
              end
         
     | 
| 
      
 1884 
     | 
    
         
            +
             
     | 
| 
      
 1885 
     | 
    
         
            +
              #############
         
     | 
| 
      
 1886 
     | 
    
         
            +
              def SalsaTigerSentence.empty_sentence(sentence_id)  # string
         
     | 
| 
      
 1887 
     | 
    
         
            +
                sentence_id = sentence_id.gsub(/'/, "'")
         
     | 
| 
      
 1888 
     | 
    
         
            +
                sent_string = "<s id=\'#{sentence_id}\'>\n" +
         
     | 
| 
      
 1889 
     | 
    
         
            +
                              "<graph/>\n" + 
         
     | 
| 
      
 1890 
     | 
    
         
            +
                              "<sem/>\n" + 
         
     | 
| 
      
 1891 
     | 
    
         
            +
                              "</s>"       
         
     | 
| 
      
 1892 
     | 
    
         
            +
                return SalsaTigerSentence.new(sent_string)
         
     | 
| 
      
 1893 
     | 
    
         
            +
              end
         
     | 
| 
      
 1894 
     | 
    
         
            +
             
     | 
| 
      
 1895 
     | 
    
         
            +
              #####
         
     | 
| 
      
 1896 
     | 
    
         
            +
             
     | 
| 
      
 1897 
     | 
    
         
            +
             
     | 
| 
      
 1898 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1899 
     | 
    
         
            +
              def to_s
         
     | 
| 
      
 1900 
     | 
    
         
            +
                return @syn.to_s
         
     | 
| 
      
 1901 
     | 
    
         
            +
              end
         
     | 
| 
      
 1902 
     | 
    
         
            +
             
     | 
| 
      
 1903 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1904 
     | 
    
         
            +
              def each_terminal
         
     | 
| 
      
 1905 
     | 
    
         
            +
                @syn.each_terminal { |n| yield n }
         
     | 
| 
      
 1906 
     | 
    
         
            +
              end
         
     | 
| 
      
 1907 
     | 
    
         
            +
             
     | 
| 
      
 1908 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1909 
     | 
    
         
            +
              def each_terminal_sorted
         
     | 
| 
      
 1910 
     | 
    
         
            +
                @syn.each_terminal_sorted { |n| yield n }
         
     | 
| 
      
 1911 
     | 
    
         
            +
              end
         
     | 
| 
      
 1912 
     | 
    
         
            +
             
     | 
| 
      
 1913 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1914 
     | 
    
         
            +
              def terminals
         
     | 
| 
      
 1915 
     | 
    
         
            +
                return @syn.terminals()
         
     | 
| 
      
 1916 
     | 
    
         
            +
              end
         
     | 
| 
      
 1917 
     | 
    
         
            +
             
     | 
| 
      
 1918 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1919 
     | 
    
         
            +
              def terminals_sorted
         
     | 
| 
      
 1920 
     | 
    
         
            +
                return @syn.terminals_sorted()
         
     | 
| 
      
 1921 
     | 
    
         
            +
              end
         
     | 
| 
      
 1922 
     | 
    
         
            +
             
     | 
| 
      
 1923 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1924 
     | 
    
         
            +
              def each_nonterminal
         
     | 
| 
      
 1925 
     | 
    
         
            +
                @syn.each_nonterminal { |n| yield n }
         
     | 
| 
      
 1926 
     | 
    
         
            +
              end
         
     | 
| 
      
 1927 
     | 
    
         
            +
             
     | 
| 
      
 1928 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1929 
     | 
    
         
            +
              def nonterminals
         
     | 
| 
      
 1930 
     | 
    
         
            +
                return @syn.nonterminals()
         
     | 
| 
      
 1931 
     | 
    
         
            +
              end
         
     | 
| 
      
 1932 
     | 
    
         
            +
             
     | 
| 
      
 1933 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1934 
     | 
    
         
            +
              def each_syn_node
         
     | 
| 
      
 1935 
     | 
    
         
            +
                @syn.each_node {  |n| 
         
     | 
| 
      
 1936 
     | 
    
         
            +
                  yield n 
         
     | 
| 
      
 1937 
     | 
    
         
            +
                }
         
     | 
| 
      
 1938 
     | 
    
         
            +
              end
         
     | 
| 
      
 1939 
     | 
    
         
            +
             
     | 
| 
      
 1940 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1941 
     | 
    
         
            +
              def syn_nodes
         
     | 
| 
      
 1942 
     | 
    
         
            +
                return @syn.nodes()
         
     | 
| 
      
 1943 
     | 
    
         
            +
              end
         
     | 
| 
      
 1944 
     | 
    
         
            +
             
     | 
| 
      
 1945 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1946 
     | 
    
         
            +
              def syn_roots
         
     | 
| 
      
 1947 
     | 
    
         
            +
                return @syn.syn_roots()
         
     | 
| 
      
 1948 
     | 
    
         
            +
              end
         
     | 
| 
      
 1949 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1950 
     | 
    
         
            +
             
     | 
| 
      
 1951 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1952 
     | 
    
         
            +
              def syn_node_with_id(syn_id)
         
     | 
| 
      
 1953 
     | 
    
         
            +
                return @syn.node[syn_id]
         
     | 
| 
      
 1954 
     | 
    
         
            +
              end
         
     | 
| 
      
 1955 
     | 
    
         
            +
             
     | 
| 
      
 1956 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1957 
     | 
    
         
            +
              def sem_node_with_id(sem_id)
         
     | 
| 
      
 1958 
     | 
    
         
            +
                return @sem.node[sem_id]
         
     | 
| 
      
 1959 
     | 
    
         
            +
              end
         
     | 
| 
      
 1960 
     | 
    
         
            +
             
     | 
| 
      
 1961 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1962 
     | 
    
         
            +
              def each_frame 
         
     | 
| 
      
 1963 
     | 
    
         
            +
                @sem.each_frame { |f| yield f }
         
     | 
| 
      
 1964 
     | 
    
         
            +
              end
         
     | 
| 
      
 1965 
     | 
    
         
            +
             
     | 
| 
      
 1966 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1967 
     | 
    
         
            +
              def frames
         
     | 
| 
      
 1968 
     | 
    
         
            +
                return @sem.frames
         
     | 
| 
      
 1969 
     | 
    
         
            +
              end
         
     | 
| 
      
 1970 
     | 
    
         
            +
             
     | 
| 
      
 1971 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1972 
     | 
    
         
            +
              def each_usp_frameblock
         
     | 
| 
      
 1973 
     | 
    
         
            +
                @sem.each_usp_frameblock { |b| yield b }
         
     | 
| 
      
 1974 
     | 
    
         
            +
              end
         
     | 
| 
      
 1975 
     | 
    
         
            +
             
     | 
| 
      
 1976 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1977 
     | 
    
         
            +
              def usp_frameblocks()
         
     | 
| 
      
 1978 
     | 
    
         
            +
                return @sem.usp_frameblocks()
         
     | 
| 
      
 1979 
     | 
    
         
            +
              end
         
     | 
| 
      
 1980 
     | 
    
         
            +
             
     | 
| 
      
 1981 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1982 
     | 
    
         
            +
              def each_usp_feblock
         
     | 
| 
      
 1983 
     | 
    
         
            +
                @sem.each_usp_feblock { |b| yield b }
         
     | 
| 
      
 1984 
     | 
    
         
            +
              end
         
     | 
| 
      
 1985 
     | 
    
         
            +
             
     | 
| 
      
 1986 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1987 
     | 
    
         
            +
              def usp_feblocks()
         
     | 
| 
      
 1988 
     | 
    
         
            +
                return @sem.usp_feblocks()
         
     | 
| 
      
 1989 
     | 
    
         
            +
              end
         
     | 
| 
      
 1990 
     | 
    
         
            +
             
     | 
| 
      
 1991 
     | 
    
         
            +
              ###
         
     | 
| 
      
 1992 
     | 
    
         
            +
              def flags
         
     | 
| 
      
 1993 
     | 
    
         
            +
                return @sem.flags()
         
     | 
| 
      
 1994 
     | 
    
         
            +
              end
         
     | 
| 
      
 1995 
     | 
    
         
            +
             
     | 
| 
      
 1996 
     | 
    
         
            +
              ###################################
         
     | 
| 
      
 1997 
     | 
    
         
            +
              # adding and removing things
         
     | 
| 
      
 1998 
     | 
    
         
            +
             
     | 
| 
      
 1999 
     | 
    
         
            +
              ###
         
     | 
| 
      
 2000 
     | 
    
         
            +
              # add syntactic node, specified as terminal(t) or nonterminal(nt)
         
     | 
| 
      
 2001 
     | 
    
         
            +
              # 
         
     | 
| 
      
 2002 
     | 
    
         
            +
              # returns the new node
         
     | 
| 
      
 2003 
     | 
    
         
            +
              def add_syn(label,     # string: t or nt
         
     | 
| 
      
 2004 
     | 
    
         
            +
                          cat = nil, # string: category
         
     | 
| 
      
 2005 
     | 
    
         
            +
                          word = nil,# string: word
         
     | 
| 
      
 2006 
     | 
    
         
            +
                          pos = nil, # string: part of speech
         
     | 
| 
      
 2007 
     | 
    
         
            +
                          syn_id = nil)  # string: ID for the new node
         
     | 
| 
      
 2008 
     | 
    
         
            +
                return @syn.add_node(id(), label, cat, word, pos, syn_id)
         
     | 
| 
      
 2009 
     | 
    
         
            +
              end
         
     | 
| 
      
 2010 
     | 
    
         
            +
             
     | 
| 
      
 2011 
     | 
    
         
            +
              ###
         
     | 
| 
      
 2012 
     | 
    
         
            +
              def remove_syn(node)
         
     | 
| 
      
 2013 
     | 
    
         
            +
                @syn.remove_node(node)
         
     | 
| 
      
 2014 
     | 
    
         
            +
              end
         
     | 
| 
      
 2015 
     | 
    
         
            +
             
     | 
| 
      
 2016 
     | 
    
         
            +
              ###
         
     | 
| 
      
 2017 
     | 
    
         
            +
              def add_frame(name,    # string: name of the frame
         
     | 
| 
      
 2018 
     | 
    
         
            +
                            sem_id = nil) # string: ID for the new node
         
     | 
| 
      
 2019 
     | 
    
         
            +
                return @sem.add_frame(id(), name, sem_id)
         
     | 
| 
      
 2020 
     | 
    
         
            +
              end
         
     | 
| 
      
 2021 
     | 
    
         
            +
             
     | 
| 
      
 2022 
     | 
    
         
            +
              ###
         
     | 
| 
      
 2023 
     | 
    
         
            +
              def remove_frame(frame_node) # FrameNode object
         
     | 
| 
      
 2024 
     | 
    
         
            +
                @sem.remove_frame(frame_node)
         
     | 
| 
      
 2025 
     | 
    
         
            +
              end
         
     | 
| 
      
 2026 
     | 
    
         
            +
             
     | 
| 
      
 2027 
     | 
    
         
            +
              ###
         
     | 
| 
      
 2028 
     | 
    
         
            +
              def add_fe(frame_obj,
         
     | 
| 
      
 2029 
     | 
    
         
            +
                         name,
         
     | 
| 
      
 2030 
     | 
    
         
            +
                         fe_children,
         
     | 
| 
      
 2031 
     | 
    
         
            +
                         sem_id = nil)
         
     | 
| 
      
 2032 
     | 
    
         
            +
                return @sem.add_fe(frame_obj, name, fe_children, sem_id)
         
     | 
| 
      
 2033 
     | 
    
         
            +
              end
         
     | 
| 
      
 2034 
     | 
    
         
            +
             
     | 
| 
      
 2035 
     | 
    
         
            +
              ###
         
     | 
| 
      
 2036 
     | 
    
         
            +
              def remove_fe(fe_node)
         
     | 
| 
      
 2037 
     | 
    
         
            +
                @sem.remove_fe(fe_node)
         
     | 
| 
      
 2038 
     | 
    
         
            +
              end
         
     | 
| 
      
 2039 
     | 
    
         
            +
             
     | 
| 
      
 2040 
     | 
    
         
            +
              ###
         
     | 
| 
      
 2041 
     | 
    
         
            +
              def add_usp(frame_or_fe)
         
     | 
| 
      
 2042 
     | 
    
         
            +
                return @sem.add_usp(frame_or_fe)
         
     | 
| 
      
 2043 
     | 
    
         
            +
              end
         
     | 
| 
      
 2044 
     | 
    
         
            +
             
     | 
| 
      
 2045 
     | 
    
         
            +
              ###
         
     | 
| 
      
 2046 
     | 
    
         
            +
              def remove_usp(usp_node) # UspNode object
         
     | 
| 
      
 2047 
     | 
    
         
            +
                @sem.remove_usp(usp_node)
         
     | 
| 
      
 2048 
     | 
    
         
            +
              end
         
     | 
| 
      
 2049 
     | 
    
         
            +
             
     | 
| 
      
 2050 
     | 
    
         
            +
              ###
         
     | 
| 
      
 2051 
     | 
    
         
            +
              def add_flag(type, param=nil, text=nil)
         
     | 
| 
      
 2052 
     | 
    
         
            +
                @sem.add_flag(type, param, text)
         
     | 
| 
      
 2053 
     | 
    
         
            +
              end
         
     | 
| 
      
 2054 
     | 
    
         
            +
             
     | 
| 
      
 2055 
     | 
    
         
            +
              ###
         
     | 
| 
      
 2056 
     | 
    
         
            +
              def remove_flag(type, param=nil, text=nil)
         
     | 
| 
      
 2057 
     | 
    
         
            +
                @sem.remove_flag(type, param, text)
         
     | 
| 
      
 2058 
     | 
    
         
            +
              end
         
     | 
| 
      
 2059 
     | 
    
         
            +
             
     | 
| 
      
 2060 
     | 
    
         
            +
              ###
         
     | 
| 
      
 2061 
     | 
    
         
            +
              def remove_semantics()
         
     | 
| 
      
 2062 
     | 
    
         
            +
                empty_sem = RegXML.new("<sem/>")
         
     | 
| 
      
 2063 
     | 
    
         
            +
                @sem = SalsaTigerSentenceSem.new(empty_sem, id(), @syn.node)
         
     | 
| 
      
 2064 
     | 
    
         
            +
              end
         
     | 
| 
      
 2065 
     | 
    
         
            +
             
     | 
| 
      
 2066 
     | 
    
         
            +
              #################33
         
     | 
| 
      
 2067 
     | 
    
         
            +
              # output
         
     | 
| 
      
 2068 
     | 
    
         
            +
              def get_syn()
         
     | 
| 
      
 2069 
     | 
    
         
            +
                return @syn.get()
         
     | 
| 
      
 2070 
     | 
    
         
            +
              end
         
     | 
| 
      
 2071 
     | 
    
         
            +
             
     | 
| 
      
 2072 
     | 
    
         
            +
              ############################3
         
     | 
| 
      
 2073 
     | 
    
         
            +
              protected
         
     | 
| 
      
 2074 
     | 
    
         
            +
             
     | 
| 
      
 2075 
     | 
    
         
            +
              def get_xml_ofchildren()
         
     | 
| 
      
 2076 
     | 
    
         
            +
                return @syn.get() + @sem.get()
         
     | 
| 
      
 2077 
     | 
    
         
            +
              end
         
     | 
| 
      
 2078 
     | 
    
         
            +
            end
         
     | 
| 
      
 2079 
     | 
    
         
            +
             
     | 
| 
      
 2080 
     | 
    
         
            +
            #######
         
     | 
| 
      
 2081 
     | 
    
         
            +
            # identify the set of maximal constituents covering a set of nodes
         
     | 
| 
      
 2082 
     | 
    
         
            +
            #
         
     | 
| 
      
 2083 
     | 
    
         
            +
            module MaxConst
         
     | 
| 
      
 2084 
     | 
    
         
            +
             
     | 
| 
      
 2085 
     | 
    
         
            +
              # returns: array:SynNode, list of maximal constituents covering
         
     | 
| 
      
 2086 
     | 
    
         
            +
              # the input nodes
         
     | 
| 
      
 2087 
     | 
    
         
            +
              def max_constituents_for_nodes(node_list, # array: SynNode
         
     | 
| 
      
 2088 
     | 
    
         
            +
                                             ignore_empty_terminals = false) # boolean: ignore empty terminals?
         
     | 
| 
      
 2089 
     | 
    
         
            +
             
     | 
| 
      
 2090 
     | 
    
         
            +
                # sort node IDs into splitwords and rest,
         
     | 
| 
      
 2091 
     | 
    
         
            +
                # and filter out punctuation marks
         
     | 
| 
      
 2092 
     | 
    
         
            +
                #
         
     | 
| 
      
 2093 
     | 
    
         
            +
                # 'words' is an array of node IDs that are not splitwords
         
     | 
| 
      
 2094 
     | 
    
         
            +
                # 'splitwords' is an array of fenodes that refer to splitwords
         
     | 
| 
      
 2095 
     | 
    
         
            +
                words = Array.new
         
     | 
| 
      
 2096 
     | 
    
         
            +
                splitwords = Array.new
         
     | 
| 
      
 2097 
     | 
    
         
            +
                
         
     | 
| 
      
 2098 
     | 
    
         
            +
                node_list.each { |node|
         
     | 
| 
      
 2099 
     | 
    
         
            +
                  if node.is_splitword?
         
     | 
| 
      
 2100 
     | 
    
         
            +
                    splitwords << node
         
     | 
| 
      
 2101 
     | 
    
         
            +
                  else
         
     | 
| 
      
 2102 
     | 
    
         
            +
                    words.concat node.yield_nodes().reject { |t| t.is_punct? }
         
     | 
| 
      
 2103 
     | 
    
         
            +
                  end
         
     | 
| 
      
 2104 
     | 
    
         
            +
                }
         
     | 
| 
      
 2105 
     | 
    
         
            +
             
     | 
| 
      
 2106 
     | 
    
         
            +
                # check all nodes from root down:
         
     | 
| 
      
 2107 
     | 
    
         
            +
                # 'constituents', 'nodes_to_check' are arrays of node IDs
         
     | 
| 
      
 2108 
     | 
    
         
            +
                # 'constituents' contains found constituents,
         
     | 
| 
      
 2109 
     | 
    
         
            +
                # 'nodes_to_check' contains nodes for which we still need constituents
         
     | 
| 
      
 2110 
     | 
    
         
            +
                
         
     | 
| 
      
 2111 
     | 
    
         
            +
                constituents = Array.new
         
     | 
| 
      
 2112 
     | 
    
         
            +
                nodes_to_check = syn_roots() # (there may be more than one) 
         
     | 
| 
      
 2113 
     | 
    
         
            +
                # this accesses the syn_roots() method of SalsaTigerSentence
         
     | 
| 
      
 2114 
     | 
    
         
            +
                
         
     | 
| 
      
 2115 
     | 
    
         
            +
                while(true)
         
     | 
| 
      
 2116 
     | 
    
         
            +
                  node = nodes_to_check.shift()
         
     | 
| 
      
 2117 
     | 
    
         
            +
                  # have we checked all nodes already? or are we done with all words? then stop.
         
     | 
| 
      
 2118 
     | 
    
         
            +
                  if node.nil?
         
     | 
| 
      
 2119 
     | 
    
         
            +
            	constituents.concat words
         
     | 
| 
      
 2120 
     | 
    
         
            +
            	words = []
         
     | 
| 
      
 2121 
     | 
    
         
            +
            	break
         
     | 
| 
      
 2122 
     | 
    
         
            +
                  end
         
     | 
| 
      
 2123 
     | 
    
         
            +
                  if words.empty?
         
     | 
| 
      
 2124 
     | 
    
         
            +
            	break
         
     | 
| 
      
 2125 
     | 
    
         
            +
                  end
         
     | 
| 
      
 2126 
     | 
    
         
            +
                  
         
     | 
| 
      
 2127 
     | 
    
         
            +
                  # only match nonempty non-punctuation nodes
         
     | 
| 
      
 2128 
     | 
    
         
            +
             
     | 
| 
      
 2129 
     | 
    
         
            +
                  node_yield = node.yield_nodes.reject {|n| n.is_punct? }
         
     | 
| 
      
 2130 
     | 
    
         
            +
                  if ignore_empty_terminals
         
     | 
| 
      
 2131 
     | 
    
         
            +
                    node_yield = node_yield.reject { |n| n.is_terminal? and (n.word.nil? or n.word.empty?) }
         
     | 
| 
      
 2132 
     | 
    
         
            +
                  end
         
     | 
| 
      
 2133 
     | 
    
         
            +
                  if node_yield.empty?
         
     | 
| 
      
 2134 
     | 
    
         
            +
                    # this node has no yield, or only punctuation sign yield.
         
     | 
| 
      
 2135 
     | 
    
         
            +
                    # skip it.
         
     | 
| 
      
 2136 
     | 
    
         
            +
                    next
         
     | 
| 
      
 2137 
     | 
    
         
            +
                  end
         
     | 
| 
      
 2138 
     | 
    
         
            +
                  
         
     | 
| 
      
 2139 
     | 
    
         
            +
                  rest = node_yield - words
         
     | 
| 
      
 2140 
     | 
    
         
            +
                  if rest.size == 0
         
     | 
| 
      
 2141 
     | 
    
         
            +
                    # whole yield of node consists of words from this FE
         
     | 
| 
      
 2142 
     | 
    
         
            +
            	constituents << node
         
     | 
| 
      
 2143 
     | 
    
         
            +
            	words = words - node_yield
         
     | 
| 
      
 2144 
     | 
    
         
            +
            	
         
     | 
| 
      
 2145 
     | 
    
         
            +
                  elsif rest.size < node_yield.size
         
     | 
| 
      
 2146 
     | 
    
         
            +
            	# at least some of the words in FE appear below this node:
         
     | 
| 
      
 2147 
     | 
    
         
            +
            	# check this node's children too
         
     | 
| 
      
 2148 
     | 
    
         
            +
            	node.children.each{ |child| nodes_to_check << child }
         
     | 
| 
      
 2149 
     | 
    
         
            +
                  end
         
     | 
| 
      
 2150 
     | 
    
         
            +
                end
         
     | 
| 
      
 2151 
     | 
    
         
            +
                
         
     | 
| 
      
 2152 
     | 
    
         
            +
                constituents.concat(splitwords) #splitwords stay what they are
         
     | 
| 
      
 2153 
     | 
    
         
            +
                constituents.concat(words) # any leftover words that may not be from that sentence?
         
     | 
| 
      
 2154 
     | 
    
         
            +
                # just keep them.    
         
     | 
| 
      
 2155 
     | 
    
         
            +
             
     | 
| 
      
 2156 
     | 
    
         
            +
                return constituents
         
     | 
| 
      
 2157 
     | 
    
         
            +
              end
         
     | 
| 
      
 2158 
     | 
    
         
            +
             
     | 
| 
      
 2159 
     | 
    
         
            +
              ###
         
     | 
| 
      
 2160 
     | 
    
         
            +
              # determine maximum constituents covering the nodes in node_list
         
     | 
| 
      
 2161 
     | 
    
         
            +
              # punctuation terminals (and optionally empty terminals) are ignored.
         
     | 
| 
      
 2162 
     | 
    
         
            +
              #
         
     | 
| 
      
 2163 
     | 
    
         
            +
              # If include_single_missing_children is set to true,
         
     | 
| 
      
 2164 
     | 
    
         
            +
              # then a node that has at least one child whose yield is in nodelist,
         
     | 
| 
      
 2165 
     | 
    
         
            +
              #   and has only one child whose yield is not in nodelist,
         
     | 
| 
      
 2166 
     | 
    
         
            +
              #   will be considered as having its yield in nodelist.
         
     | 
| 
      
 2167 
     | 
    
         
            +
              #
         
     | 
| 
      
 2168 
     | 
    
         
            +
              # Optionally, a procedure accept_anyway_proc can be given.
         
     | 
| 
      
 2169 
     | 
    
         
            +
              # Like the option include_single_missing_children, it can lead to nodes being
         
     | 
| 
      
 2170 
     | 
    
         
            +
              # included in the list of nodes whose yield nodes are all also yield nodes of node_list (NYNAAYNN)
         
     | 
| 
      
 2171 
     | 
    
         
            +
              # even though not all of their yield nodes are yield nodes of the node_list.
         
     | 
| 
      
 2172 
     | 
    
         
            +
              # accept_anyway_proc can implement arbitrary rules for including nodes in NYAAYNN.
         
     | 
| 
      
 2173 
     | 
    
         
            +
              # The procedure is called with three arguments:
         
     | 
| 
      
 2174 
     | 
    
         
            +
              #   accept_anyway_proc(node, ch_in, ch_out)
         
     | 
| 
      
 2175 
     | 
    
         
            +
              # node is a SynNode that would not normally be in NYAAYNN.
         
     | 
| 
      
 2176 
     | 
    
         
            +
              # ch_in is the list of its children that are in NYAAYNN.
         
     | 
| 
      
 2177 
     | 
    
         
            +
              # ch_out is the list of its children that are not.
         
     | 
| 
      
 2178 
     | 
    
         
            +
              # If the procedure exists and returns true, node is put into NYAAYNN.
         
     | 
| 
      
 2179 
     | 
    
         
            +
              #
         
     | 
| 
      
 2180 
     | 
    
         
            +
              # returns: an array of SynNodes: the maximal constituents that together
         
     | 
| 
      
 2181 
     | 
    
         
            +
              #    exactly cover node_list
         
     | 
| 
      
 2182 
     | 
    
         
            +
              def max_constituents_smc(node_list, # array: SynNode
         
     | 
| 
      
 2183 
     | 
    
         
            +
                                       include_single_missing_children, # boolean
         
     | 
| 
      
 2184 
     | 
    
         
            +
                                       ignore_empty_terminals = false, # boolean: ignore empty terminals?
         
     | 
| 
      
 2185 
     | 
    
         
            +
                                       accept_anyway_proc = nil) # proc: SynNode, array:SynNode, array:SynNode => boolean
         
     | 
| 
      
 2186 
     | 
    
         
            +
             
     | 
| 
      
 2187 
     | 
    
         
            +
                # sort node IDs into splitwords and rest,
         
     | 
| 
      
 2188 
     | 
    
         
            +
                # and filter out punctuation marks
         
     | 
| 
      
 2189 
     | 
    
         
            +
                #
         
     | 
| 
      
 2190 
     | 
    
         
            +
                # 'words' is an array of node IDs that are not splitwords
         
     | 
| 
      
 2191 
     | 
    
         
            +
                # 'splitwords' is an array of fenodes that refer to splitwords
         
     | 
| 
      
 2192 
     | 
    
         
            +
                words = Array.new
         
     | 
| 
      
 2193 
     | 
    
         
            +
                splitwords = Array.new
         
     | 
| 
      
 2194 
     | 
    
         
            +
                
         
     | 
| 
      
 2195 
     | 
    
         
            +
                node_list.each { |node|
         
     | 
| 
      
 2196 
     | 
    
         
            +
                  if node.is_splitword?
         
     | 
| 
      
 2197 
     | 
    
         
            +
                    splitwords << node
         
     | 
| 
      
 2198 
     | 
    
         
            +
                  else
         
     | 
| 
      
 2199 
     | 
    
         
            +
                    words.concat node.yield_nodes().reject { |t| t.is_punct? }
         
     | 
| 
      
 2200 
     | 
    
         
            +
                  end
         
     | 
| 
      
 2201 
     | 
    
         
            +
                }
         
     | 
| 
      
 2202 
     | 
    
         
            +
             
     | 
| 
      
 2203 
     | 
    
         
            +
                constituents = splitwords
         
     | 
| 
      
 2204 
     | 
    
         
            +
             
     | 
| 
      
 2205 
     | 
    
         
            +
                syn_roots().each { |node|
         
     | 
| 
      
 2206 
     | 
    
         
            +
                  node_included, descendants_included = max_constituents_aux(node, words, 
         
     | 
| 
      
 2207 
     | 
    
         
            +
                                                                             include_single_missing_children, 
         
     | 
| 
      
 2208 
     | 
    
         
            +
                                                                             ignore_empty_terminals,
         
     | 
| 
      
 2209 
     | 
    
         
            +
                                                                             accept_anyway_proc)
         
     | 
| 
      
 2210 
     | 
    
         
            +
             
     | 
| 
      
 2211 
     | 
    
         
            +
                  if node_included == "true"
         
     | 
| 
      
 2212 
     | 
    
         
            +
                    constituents << node
         
     | 
| 
      
 2213 
     | 
    
         
            +
                  else
         
     | 
| 
      
 2214 
     | 
    
         
            +
                    constituents.concat descendants_included
         
     | 
| 
      
 2215 
     | 
    
         
            +
                  end
         
     | 
| 
      
 2216 
     | 
    
         
            +
                }
         
     | 
| 
      
 2217 
     | 
    
         
            +
                # which words remain to be added?
         
     | 
| 
      
 2218 
     | 
    
         
            +
                constituents.each { |c| words = words - c.yield_nodes() }
         
     | 
| 
      
 2219 
     | 
    
         
            +
                constituents.concat words
         
     | 
| 
      
 2220 
     | 
    
         
            +
             
     | 
| 
      
 2221 
     | 
    
         
            +
                return constituents
         
     | 
| 
      
 2222 
     | 
    
         
            +
              end
         
     | 
| 
      
 2223 
     | 
    
         
            +
              
         
     | 
| 
      
 2224 
     | 
    
         
            +
              ##########33
         
     | 
| 
      
 2225 
     | 
    
         
            +
              private
         
     | 
| 
      
 2226 
     | 
    
         
            +
              
         
     | 
| 
      
 2227 
     | 
    
         
            +
              ###
         
     | 
| 
      
 2228 
     | 
    
         
            +
              # recursively determine maximum constituents covering the nodes in 'nodelist',
         
     | 
| 
      
 2229 
     | 
    
         
            +
              # starting at 'node'.
         
     | 
| 
      
 2230 
     | 
    
         
            +
              # punctuation terminals (and optionally empty terminals) are ignored.
         
     | 
| 
      
 2231 
     | 
    
         
            +
              #
         
     | 
| 
      
 2232 
     | 
    
         
            +
              # If include_single_missing_children is set to true,
         
     | 
| 
      
 2233 
     | 
    
         
            +
              # then a node that has at least one child whose yield is in nodelist,
         
     | 
| 
      
 2234 
     | 
    
         
            +
              #   and has only one child whose yield is not in nodelist,
         
     | 
| 
      
 2235 
     | 
    
         
            +
              #   will be considered as having its yield in nodelist.
         
     | 
| 
      
 2236 
     | 
    
         
            +
              #
         
     | 
| 
      
 2237 
     | 
    
         
            +
              # If accept_anyway_proc is nonnil, also use that to decide whether
         
     | 
| 
      
 2238 
     | 
    
         
            +
              # a node will be considered as having its yield in nodelist.
         
     | 
| 
      
 2239 
     | 
    
         
            +
              #
         
     | 
| 
      
 2240 
     | 
    
         
            +
              # returns: pair [mybool, included_descendants]
         
     | 
| 
      
 2241 
     | 
    
         
            +
              #  where mybool is a string, "true", "false" or "ignoreme" (for ignored 
         
     | 
| 
      
 2242 
     | 
    
         
            +
              #          punctuation and empty terminals):
         
     | 
| 
      
 2243 
     | 
    
         
            +
              #          does the yield of this node consist entirely of nodes from nodelist?
         
     | 
| 
      
 2244 
     | 
    
         
            +
              #  and included_descendants is a list of SynNodes: if mybool is "false", 
         
     | 
| 
      
 2245 
     | 
    
         
            +
              #          this is a list of descendants of this node whose yield does consist
         
     | 
| 
      
 2246 
     | 
    
         
            +
              #          entirely of nodes from nodelist
         
     | 
| 
      
 2247 
     | 
    
         
            +
              def max_constituents_aux(node,    # SynNode
         
     | 
| 
      
 2248 
     | 
    
         
            +
                                       nodelist, # array:SynNode
         
     | 
| 
      
 2249 
     | 
    
         
            +
                                       include_single_missing_children = false, # boolean
         
     | 
| 
      
 2250 
     | 
    
         
            +
                                       ignore_empty_terminals = false, # boolean: ignore empty terminals?
         
     | 
| 
      
 2251 
     | 
    
         
            +
                                       accept_anyway_proc = nil) # proc: SynNode, array:SynNode, array:SynNode => Boolean
         
     | 
| 
      
 2252 
     | 
    
         
            +
             
     | 
| 
      
 2253 
     | 
    
         
            +
             
     | 
| 
      
 2254 
     | 
    
         
            +
                
         
     | 
| 
      
 2255 
     | 
    
         
            +
                if node.is_terminal? and nodelist.include? node
         
     | 
| 
      
 2256 
     | 
    
         
            +
                  # node is terminal and included in nodelist
         
     | 
| 
      
 2257 
     | 
    
         
            +
                  return ["true", []]
         
     | 
| 
      
 2258 
     | 
    
         
            +
                elsif node.is_punct?
         
     | 
| 
      
 2259 
     | 
    
         
            +
                  # punctuation: ignore
         
     | 
| 
      
 2260 
     | 
    
         
            +
                  return ["ignoreme", []]
         
     | 
| 
      
 2261 
     | 
    
         
            +
                elsif ignore_empty_terminals and node.is_terminal? and
         
     | 
| 
      
 2262 
     | 
    
         
            +
                    (node.word.nil? or node.word.empty?)
         
     | 
| 
      
 2263 
     | 
    
         
            +
                  # empty terminal: possibly ignore
         
     | 
| 
      
 2264 
     | 
    
         
            +
                  return ["ignoreme", []]
         
     | 
| 
      
 2265 
     | 
    
         
            +
                elsif node.is_terminal?
         
     | 
| 
      
 2266 
     | 
    
         
            +
                  # terminal, but not included in nodelist
         
     | 
| 
      
 2267 
     | 
    
         
            +
                  return ["false", []]
         
     | 
| 
      
 2268 
     | 
    
         
            +
                end
         
     | 
| 
      
 2269 
     | 
    
         
            +
                
         
     | 
| 
      
 2270 
     | 
    
         
            +
                children_results = node.children.map { |ch|
         
     | 
| 
      
 2271 
     | 
    
         
            +
                  fully_included, descendants_included = max_constituents_aux(ch, nodelist, 
         
     | 
| 
      
 2272 
     | 
    
         
            +
                                                                              include_single_missing_children, 
         
     | 
| 
      
 2273 
     | 
    
         
            +
                                                                              ignore_empty_terminals,
         
     | 
| 
      
 2274 
     | 
    
         
            +
                                                                              accept_anyway_proc)
         
     | 
| 
      
 2275 
     | 
    
         
            +
                  [ch, fully_included, descendants_included]
         
     | 
| 
      
 2276 
     | 
    
         
            +
                }
         
     | 
| 
      
 2277 
     | 
    
         
            +
                
         
     | 
| 
      
 2278 
     | 
    
         
            +
                res_false = children_results.select { |ch, fully_included, descendants_included| 
         
     | 
| 
      
 2279 
     | 
    
         
            +
                  fully_included == "false" 
         
     | 
| 
      
 2280 
     | 
    
         
            +
                }
         
     | 
| 
      
 2281 
     | 
    
         
            +
                res_true  = children_results.select { |ch, fully_included, descendants_included| 
         
     | 
| 
      
 2282 
     | 
    
         
            +
                  fully_included == "true"
         
     | 
| 
      
 2283 
     | 
    
         
            +
                }
         
     | 
| 
      
 2284 
     | 
    
         
            +
             
     | 
| 
      
 2285 
     | 
    
         
            +
                if res_false.empty? and res_true.length() > 0
         
     | 
| 
      
 2286 
     | 
    
         
            +
                  # all true, or all true and ignoreme
         
     | 
| 
      
 2287 
     | 
    
         
            +
                  return ["true", []]
         
     | 
| 
      
 2288 
     | 
    
         
            +
             
     | 
| 
      
 2289 
     | 
    
         
            +
                elsif res_false.empty? and res_true.empty? 
         
     | 
| 
      
 2290 
     | 
    
         
            +
                  # all ignoreme
         
     | 
| 
      
 2291 
     | 
    
         
            +
                  return ["ignoreme", []]
         
     | 
| 
      
 2292 
     | 
    
         
            +
             
     | 
| 
      
 2293 
     | 
    
         
            +
                elsif res_false.length() == 1 and res_true.length() > 1 and
         
     | 
| 
      
 2294 
     | 
    
         
            +
                    include_single_missing_children
         
     | 
| 
      
 2295 
     | 
    
         
            +
                  # one child not covered,
         
     | 
| 
      
 2296 
     | 
    
         
            +
                  # resulting in all other children (except the ignoremes) being marked individually:
         
     | 
| 
      
 2297 
     | 
    
         
            +
                  # consider the single missing child as covered, too
         
     | 
| 
      
 2298 
     | 
    
         
            +
                  
         
     | 
| 
      
 2299 
     | 
    
         
            +
                  return ["true", []]
         
     | 
| 
      
 2300 
     | 
    
         
            +
             
     | 
| 
      
 2301 
     | 
    
         
            +
                elsif accept_anyway_proc and 
         
     | 
| 
      
 2302 
     | 
    
         
            +
                    accept_anyway_proc.call(node, res_true.map { |ch, bool1, bool2| ch }, res_false.map { |ch, bool1, bool2| ch })
         
     | 
| 
      
 2303 
     | 
    
         
            +
                  # some external source tells us that
         
     | 
| 
      
 2304 
     | 
    
         
            +
                  # we are to consider the missing children as covered, too
         
     | 
| 
      
 2305 
     | 
    
         
            +
                  return ["true", []]
         
     | 
| 
      
 2306 
     | 
    
         
            +
             
     | 
| 
      
 2307 
     | 
    
         
            +
                else
         
     | 
| 
      
 2308 
     | 
    
         
            +
                  # not all children covered
         
     | 
| 
      
 2309 
     | 
    
         
            +
                  return [
         
     | 
| 
      
 2310 
     | 
    
         
            +
                    "false",
         
     | 
| 
      
 2311 
     | 
    
         
            +
                    children_results.map { |ch, fully_included, descendants_included|
         
     | 
| 
      
 2312 
     | 
    
         
            +
                      if fully_included == "true"
         
     | 
| 
      
 2313 
     | 
    
         
            +
                        [ch]
         
     | 
| 
      
 2314 
     | 
    
         
            +
                      else
         
     | 
| 
      
 2315 
     | 
    
         
            +
                        descendants_included
         
     | 
| 
      
 2316 
     | 
    
         
            +
                      end
         
     | 
| 
      
 2317 
     | 
    
         
            +
                    }.flatten
         
     | 
| 
      
 2318 
     | 
    
         
            +
                  ]
         
     | 
| 
      
 2319 
     | 
    
         
            +
                end
         
     | 
| 
      
 2320 
     | 
    
         
            +
              end
         
     | 
| 
      
 2321 
     | 
    
         
            +
            end
         
     | 
| 
      
 2322 
     | 
    
         
            +
             
     | 
| 
      
 2323 
     | 
    
         
            +
            module ConvexComp
         
     | 
| 
      
 2324 
     | 
    
         
            +
              
         
     | 
| 
      
 2325 
     | 
    
         
            +
              def convex_complemented(node_set)
         
     | 
| 
      
 2326 
     | 
    
         
            +
             
     | 
| 
      
 2327 
     | 
    
         
            +
                terminals = terminals_sorted()
         
     | 
| 
      
 2328 
     | 
    
         
            +
             
     | 
| 
      
 2329 
     | 
    
         
            +
                yield_nodes = node_set.map {|node| node.yield_nodes_ordered}.flatten
         
     | 
| 
      
 2330 
     | 
    
         
            +
                leftmost =  yield_nodes.map {|t| terminals.index(t)}.min
         
     | 
| 
      
 2331 
     | 
    
         
            +
                rightmost = yield_nodes.map {|t| terminals.index(t)}.max
         
     | 
| 
      
 2332 
     | 
    
         
            +
                if leftmost.nil? or rightmost.nil?
         
     | 
| 
      
 2333 
     | 
    
         
            +
                  STDERR.puts "Warning: could not complement projected node set #{yield_nodes.map {|t| t.id}}; terminals not found in sorted set of sentence terminals!?"
         
     | 
| 
      
 2334 
     | 
    
         
            +
                  return node_set
         
     | 
| 
      
 2335 
     | 
    
         
            +
                else
         
     | 
| 
      
 2336 
     | 
    
         
            +
                  STDERR.puts "Replacing "+yield_nodes.join(" ")
         
     | 
| 
      
 2337 
     | 
    
         
            +
                  new_node_set = terminals[leftmost..rightmost]
         
     | 
| 
      
 2338 
     | 
    
         
            +
                  STDERR.puts "By        "+new_node_set.join(" ")
         
     | 
| 
      
 2339 
     | 
    
         
            +
                  return max_constituents_for_nodes(new_node_set)
         
     | 
| 
      
 2340 
     | 
    
         
            +
                end
         
     | 
| 
      
 2341 
     | 
    
         
            +
              end
         
     | 
| 
      
 2342 
     | 
    
         
            +
            end
         
     | 
| 
      
 2343 
     | 
    
         
            +
             
     | 
| 
      
 2344 
     | 
    
         
            +
            class SalsaTigerSentence
         
     | 
| 
      
 2345 
     | 
    
         
            +
              include MaxConst
         
     | 
| 
      
 2346 
     | 
    
         
            +
              include ConvexComp
         
     | 
| 
      
 2347 
     | 
    
         
            +
            end
         
     |