shalmaneser-frappe 1.2.rc5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.yardopts +10 -0
- data/CHANGELOG.md +4 -0
- data/LICENSE.md +4 -0
- data/README.md +122 -0
- data/lib/frappe/Ampersand.rb +41 -0
- data/lib/frappe/file_parser.rb +126 -0
- data/lib/frappe/fix_syn_sem_mapping.rb +196 -0
- data/lib/frappe/frappe.rb +217 -0
- data/lib/frappe/frappe_flat_syntax.rb +89 -0
- data/lib/frappe/frappe_read_stxml.rb +48 -0
- data/lib/frappe/interfaces/berkeley_interface.rb +380 -0
- data/lib/frappe/interfaces/collins_interface.rb +340 -0
- data/lib/frappe/interfaces/counter.rb +19 -0
- data/lib/frappe/interfaces/stanford_interface.rb +353 -0
- data/lib/frappe/interfaces/treetagger_interface.rb +74 -0
- data/lib/frappe/interfaces/treetagger_module.rb +111 -0
- data/lib/frappe/interfaces/treetagger_pos_interface.rb +80 -0
- data/lib/frappe/interpreters/berkeley_interpreter.rb +27 -0
- data/lib/frappe/interpreters/collins_tnt_interpreter.rb +807 -0
- data/lib/frappe/interpreters/collins_treetagger_interpreter.rb +16 -0
- data/lib/frappe/interpreters/empty_interpreter.rb +26 -0
- data/lib/frappe/interpreters/headz.rb +265 -0
- data/lib/frappe/interpreters/headz_helpers.rb +54 -0
- data/lib/frappe/interpreters/stanford_interpreter.rb +28 -0
- data/lib/frappe/interpreters/syn_interpreter.rb +727 -0
- data/lib/frappe/interpreters/tiger_interpreter.rb +1846 -0
- data/lib/frappe/interpreters/treetagger_interpreter.rb +89 -0
- data/lib/frappe/one_parsed_file.rb +31 -0
- data/lib/frappe/opt_parser.rb +92 -0
- data/lib/frappe/path.rb +199 -0
- data/lib/frappe/plain_converter.rb +59 -0
- data/lib/frappe/salsa_tab_converter.rb +154 -0
- data/lib/frappe/salsa_tab_with_pos_converter.rb +531 -0
- data/lib/frappe/stxml_converter.rb +666 -0
- data/lib/frappe/syn_interface.rb +76 -0
- data/lib/frappe/syn_interface_stxml.rb +173 -0
- data/lib/frappe/syn_interface_tab.rb +39 -0
- data/lib/frappe/utf_iso.rb +27 -0
- data/lib/shalmaneser/frappe.rb +1 -0
- metadata +130 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 0b7ec35085dc7311add3094d750959ecc910a154
|
4
|
+
data.tar.gz: d30a7dde0b8954ca89d1000723178477aea27cde
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 0fdc58e5ef35e89a639db4ec2074545f6acaa65f17871547cabc07a4efbdc1dc5f010d44d10288c9346378a53a760edd62ef065241f75f642aedb270d400b8c8
|
7
|
+
data.tar.gz: 93dc75206689d12f72e9bdecaf1e089b548af7d87503a4ae1f5e5b4d291cf78447b2759e8ea9d3d2c037af2ece4b1446ff048dd3c9f8389920f605ae8e44a99e
|
data/.yardopts
ADDED
data/CHANGELOG.md
ADDED
data/LICENSE.md
ADDED
data/README.md
ADDED
@@ -0,0 +1,122 @@
|
|
1
|
+
# SHALMANESER
|
2
|
+
|
3
|
+
[RubyGems](http://rubygems.org/gems/shalmaneser) |
|
4
|
+
[Shalmanesers Project Page](http://bu.chsta.be/projects/shalmaneser/) |
|
5
|
+
[Source Code](https://github.com/arbox/shalmaneser) |
|
6
|
+
[Bug Tracker](https://github.com/arbox/shalmaneser/issues)
|
7
|
+
|
8
|
+
|
9
|
+
[![Gem Version](https://img.shields.io/gem/v/shalmaneser.svg")](https://rubygems.org/gems/shalmaneser)
|
10
|
+
[![Gem Version](https://img.shields.io/gem/v/frprep.svg")](https://rubygems.org/gems/shalmaneser-prep)
|
11
|
+
[![Gem Version](https://img.shields.io/gem/v/fred.svg")](https://rubygems.org/gems/shalmaneser-fred)
|
12
|
+
[![Gem Version](https://img.shields.io/gem/v/rosy.svg")](https://rubygems.org/gems/shalmaneser-rosy)
|
13
|
+
|
14
|
+
|
15
|
+
[![License GPL 2](http://img.shields.io/badge/License-GPL%202-green.svg)](http://www.gnu.org/licenses/gpl-2.0.txt)
|
16
|
+
[![Build Status](https://img.shields.io/travis/arbox/shalmaneser.svg?branch=1.2")](https://travis-ci.org/arbox/shalmaneser)
|
17
|
+
[![Code Climate](https://img.shields.io/codeclimate/github/arbox/shalmaneser.svg")](https://codeclimate.com/github/arbox/shalmaneser)
|
18
|
+
[![Dependency Status](https://img.shields.io/gemnasium/arbox/shalmaneser.svg")](https://gemnasium.com/arbox/shalmaneser)
|
19
|
+
|
20
|
+
[SHALMANESER](http://www.coli.uni-saarland.de/projects/salsa/shal/) is a SHALlow seMANtic parSER.
|
21
|
+
|
22
|
+
The name Shalmaneser is borrowed from John Brunner. He describes in his novel
|
23
|
+
"Stand on Zanzibar" an all knowing supercomputer baptized Shalmaneser.
|
24
|
+
|
25
|
+
Shalmaneser also has other origins like the king [Shalmaneser III](https://en.wikipedia.org/wiki/Shalmaneser_III).
|
26
|
+
|
27
|
+
> "SCANALYZER is the one single, the ONLY study of the news in depth
|
28
|
+
> that’s processed by General Technics’ famed computer Shalmaneser,
|
29
|
+
> who sees all, hears all, knows all save only that which YOU, Mr. and Mrs.
|
30
|
+
> Everywhere, wish to keep to yourselves." <br/>
|
31
|
+
> John Brunner (1968) "Stand on Zanzibar"
|
32
|
+
|
33
|
+
> But Shalmaneser is a Micryogenic® computer bathed in liquid helium and it’s cold in his vault. <br/>
|
34
|
+
> John Brunner (1968) "Stand on Zanzibar"
|
35
|
+
|
36
|
+
> “Of course not. Shalmaneser’s main task is to achieve the impossible again, a routine undertaking here at GT.” <br/>
|
37
|
+
> John Brunner (1968) "Stand on Zanzibar"
|
38
|
+
|
39
|
+
> “They programmed Shalmaneser with the formula for this stiffener, see, and…” <br/>
|
40
|
+
> John Brunner (1968) "Stand on Zanzibar"
|
41
|
+
|
42
|
+
> What am I going to do now? <br/>
|
43
|
+
> “All right, Shalmaneser!” <br/>
|
44
|
+
> John Brunner (1968) "Stand on Zanzibar"
|
45
|
+
|
46
|
+
> Shalmaneser is a Micryogenic® computer bathed in liquid helium and there’s no sign of Teresa. <br/>
|
47
|
+
> John Brunner (1968) "Stand on Zanzibar"
|
48
|
+
|
49
|
+
> Bathed in his currents of liquid helium, self-contained, immobile, vastly well informed by every mechanical sense: Shalmaneser. <br/>
|
50
|
+
> John Brunner (1968) "Stand on Zanzibar"
|
51
|
+
|
52
|
+
## Description
|
53
|
+
|
54
|
+
Please be careful, the whole thing is under construction! For now Shalmaneser it not intended to run on Windows systems since it heavily uses system calls for external invocations.
|
55
|
+
Current versions of Shalmaneser have been tested on Linux only (other *NIX testers are welcome!).
|
56
|
+
|
57
|
+
Shalmaneser is a supervised learning toolbox for shallow semantic parsing, i.e. the automatic assignment of semantic classes and roles to text. This technique is often called [SRL](https://en.wikipedia.org/wiki/Semantic_role_labeling) (Semantic Role Labelling). The system was developed for Frame Semantics; thus we use Frame Semantics terminology and call the classes frames and the roles frame elements. However, the architecture is reasonably general, and with a certain amount of adaption, Shalmaneser should be usable for other paradigms (e.g., PropBank roles) as well. Shalmaneser caters both for end users, and for researchers.
|
58
|
+
|
59
|
+
For end users, we provide a simple end user mode which can simply apply the pre-trained classifiers
|
60
|
+
for [English](http://www.coli.uni-saarland.de/projects/salsa/shal/index.php?nav=download) (FrameNet 1.3 annotation / Collins parser)
|
61
|
+
and [German](http://www.coli.uni-saarland.de/projects/salsa/shal/index.php?nav=download) (SALSA 1.0 annotation / Sleepy parser).
|
62
|
+
|
63
|
+
We'll try to provide newer pretrained models for English, German, and possibly other languages as soon as possible.
|
64
|
+
|
65
|
+
For researchers interested in investigating shallow semantic parsing, our system is extensively configurable and extendable.
|
66
|
+
|
67
|
+
## Origin
|
68
|
+
|
69
|
+
The original version of Shalmaneser was written by Sebastian Padó, Katrin Erk, Alexander Koller, Ines Rehbein, Aljoscha Burchardt and others during their work in the SALSA Project.
|
70
|
+
|
71
|
+
You can find original versions of Shalmaneser up to ``1.1`` on the [SALSA](http://www.coli.uni-saarland.de/projects/salsa/shal/) project page.
|
72
|
+
|
73
|
+
## Publications on Shalmaneser
|
74
|
+
|
75
|
+
- K. Erk and S. Padó: Shalmaneser - a flexible toolbox for semantic role assignment. Proceedings of LREC 2006, Genoa, Italy. [Click here for details](http://www.nlpado.de/~sebastian/pub/papers/lrec06_erk.pdf).
|
76
|
+
|
77
|
+
- TODO: add other works
|
78
|
+
|
79
|
+
## Documentation
|
80
|
+
|
81
|
+
The project documentation can be found in our [doc](https://github.com/arbox/shalmaneser/blob/master/doc/index.md) folder.
|
82
|
+
|
83
|
+
## Development
|
84
|
+
|
85
|
+
We are working now only on the `master` branch. For different intermediate versions see corresponding tags.
|
86
|
+
|
87
|
+
## Installation
|
88
|
+
|
89
|
+
See the installation instructions in the [doc](https://github.com/arbox/shalmaneser/blob/master/doc/index.md#installation) folder.
|
90
|
+
|
91
|
+
### Tokenizers
|
92
|
+
|
93
|
+
- [Ucto](http://ilk.uvt.nl/ucto/)
|
94
|
+
|
95
|
+
### POS Taggers
|
96
|
+
|
97
|
+
- [TreeTagger](http://www.cis.uni-muenchen.de/~schmid/tools/TreeTagger/)
|
98
|
+
|
99
|
+
### Lemmatizers
|
100
|
+
|
101
|
+
- [TreeTagger](http://www.cis.uni-muenchen.de/~schmid/tools/TreeTagger/)
|
102
|
+
|
103
|
+
### Parsers
|
104
|
+
|
105
|
+
- [BerkeleyParser](https://github.com/slavpetrov/berkeleyparser)
|
106
|
+
- [Stanford Parser](http://nlp.stanford.edu/software/lex-parser.shtml)
|
107
|
+
- [Collins Parser](http://www.cs.columbia.edu/~mcollins/code.html)
|
108
|
+
|
109
|
+
### Machine Learning Systems
|
110
|
+
|
111
|
+
- [OpenNLP MaxEnt](http://sourceforge.net/projects/maxent/files/Maxent/2.4.0/)
|
112
|
+
- [Mallet](http://mallet.cs.umass.edu/index.php)
|
113
|
+
|
114
|
+
## License
|
115
|
+
|
116
|
+
Shalmaneser is released under the `GPL v. 2.0` license as of the initial authors.
|
117
|
+
|
118
|
+
For a local copy of the full license text see the [LICENSE](LICENSE.md) file.
|
119
|
+
|
120
|
+
## Contributing
|
121
|
+
|
122
|
+
Feel free to contact me via Github. Open an issue if you see problems or need help.
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# @note AB: This whole thing should be obsolete on Ruby 1.9
|
2
|
+
# @note #unpack seems to work on 1.8 and 1.9 equally
|
3
|
+
require_relative 'utf_iso'
|
4
|
+
|
5
|
+
####################3
|
6
|
+
# Reformatting to and from
|
7
|
+
# a hex format for special characters
|
8
|
+
module Shalmaneser
|
9
|
+
module Frappe
|
10
|
+
module Ampersand
|
11
|
+
def self.hex_to_iso(str)
|
12
|
+
return str.gsub(/&.+?;/) { |umlaut|
|
13
|
+
if umlaut =~ /&#x(.+);/
|
14
|
+
bla = $1.hex
|
15
|
+
bla.chr
|
16
|
+
else
|
17
|
+
umlaut
|
18
|
+
end
|
19
|
+
}
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.iso_to_hex(str)
|
23
|
+
utf8_to_hex(UtfIso.from_iso_8859_1(str))
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.utf8_to_hex(str)
|
27
|
+
arr=str.unpack('U*')
|
28
|
+
outstr = ""
|
29
|
+
arr.each { |num|
|
30
|
+
if num < 0x80
|
31
|
+
outstr << num.chr
|
32
|
+
else
|
33
|
+
outstr.concat sprintf("&\#x%04x;", num)
|
34
|
+
end
|
35
|
+
}
|
36
|
+
|
37
|
+
outstr
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,126 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
require_relative 'one_parsed_file'
|
4
|
+
require_relative 'frappe_read_stxml'
|
5
|
+
require_relative 'frappe_flat_syntax'
|
6
|
+
require 'external_systems'
|
7
|
+
require 'logger'
|
8
|
+
|
9
|
+
module Shalmaneser
|
10
|
+
module Frappe
|
11
|
+
##############################
|
12
|
+
# class for managing parses:
|
13
|
+
#
|
14
|
+
# Given either a directory with tab format files or
|
15
|
+
# a directory with SalsaTigerXML files (or both) and
|
16
|
+
# a directory for putting parse files:
|
17
|
+
# - parse, unless no parsing set in the experiment file
|
18
|
+
# - for each parsed file: yield one OneParsedFile object
|
19
|
+
class FileParser
|
20
|
+
# @param [FrappeConfigData] exp
|
21
|
+
# @param [Hash<String, String>] file_suffixes Hash: file type(string) -> suffix(string)
|
22
|
+
# @param [String] parse_dir string: name of directory to put parses
|
23
|
+
# @param [Hash] dirs further directories
|
24
|
+
def initialize(exp, file_suffixes, parse_dir, dirs = {})
|
25
|
+
@exp = exp
|
26
|
+
@file_suffixes = file_suffixes
|
27
|
+
@parse_dir = parse_dir
|
28
|
+
@tab_dir = dirs["tab_dir"]
|
29
|
+
@stxml_dir = dirs["stxml_dir"]
|
30
|
+
# pre-parsed data available?
|
31
|
+
@parsed_files = @exp.get("directory_parserout")
|
32
|
+
end
|
33
|
+
|
34
|
+
###
|
35
|
+
def each_parsed_file
|
36
|
+
postag_suffix = @exp.get("do_postag") ? @file_suffixes["pos"] : nil
|
37
|
+
|
38
|
+
lemma_suffix = @exp.get("do_lemmatize") ? @file_suffixes["lemma"] : nil
|
39
|
+
|
40
|
+
if @exp.get("do_parse")
|
41
|
+
# get parser interface
|
42
|
+
sys_class = ExternalSystems.get_interface("parser", @exp.get("parser"))
|
43
|
+
|
44
|
+
# This suffix is used as extension for parsed files.
|
45
|
+
parse_suffix = ".#{sys_class.name.split('::').last}"
|
46
|
+
|
47
|
+
sys = sys_class.new(@exp.get("parser_path"),
|
48
|
+
@file_suffixes["tab"],
|
49
|
+
parse_suffix,
|
50
|
+
@file_suffixes["stxml"],
|
51
|
+
"pos_suffix" => postag_suffix,
|
52
|
+
"lemma_suffix" => lemma_suffix,
|
53
|
+
"tab_dir" => @tab_dir)
|
54
|
+
|
55
|
+
if @parsed_files
|
56
|
+
# reuse old parses
|
57
|
+
LOGGER.info "#{PROGRAM_NAME}: Using pre-computed parses in #{@parsed_files}.\n"\
|
58
|
+
"#{PROGRAM_NAME} Postprocessing SalsaTigerXML data."
|
59
|
+
|
60
|
+
Dir[@parsed_files + "*"].each do |parsefilename|
|
61
|
+
if File.stat(parsefilename).ftype != "file"
|
62
|
+
# something other than a file
|
63
|
+
next
|
64
|
+
end
|
65
|
+
# core filename: remove directory and anything after the last "."
|
66
|
+
filename_core = File.basename(parsefilename, ".*")
|
67
|
+
|
68
|
+
# use iterator to read each parsed file
|
69
|
+
yield OneParsedFile.new(filename_core, parsefilename, sys)
|
70
|
+
end
|
71
|
+
else
|
72
|
+
# do new parses
|
73
|
+
LOGGER.info "#{PROGRAM_NAME}: Syntactic analysis with #{sys.class.name.split('::').last}."
|
74
|
+
|
75
|
+
unless @tab_dir
|
76
|
+
raise "Cannot parse without tab files"
|
77
|
+
end
|
78
|
+
|
79
|
+
# @note AB: NOTE This is the position where a parser is invoked.
|
80
|
+
# parse
|
81
|
+
sys.process_dir(@tab_dir, @parse_dir)
|
82
|
+
|
83
|
+
LOGGER.info "#{PROGRAM_NAME}: Postprocessing SalsaTigerXML data."
|
84
|
+
|
85
|
+
Dir[@parse_dir + "*" + parse_suffix].each do |parsefilename|
|
86
|
+
filename_core = File.basename(parsefilename, parse_suffix)
|
87
|
+
|
88
|
+
# use iterator to read each parsed file
|
89
|
+
yield OneParsedFile.new(filename_core, parsefilename, sys)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
else
|
93
|
+
# no parse:
|
94
|
+
# get pseudo-parse tree
|
95
|
+
if @stxml_dir
|
96
|
+
# use existing SalsaTigerXML files
|
97
|
+
Dir[@stxml_dir + "*.xml"].each do |stxmlfilename|
|
98
|
+
filename_core = File.basename(stxmlfilename, ".xml")
|
99
|
+
if @tab_dir
|
100
|
+
# we know the tab directory too
|
101
|
+
tabfilename = @tab_dir + filename_core + @file_suffixes["tab"]
|
102
|
+
each_sentence_obj = FrappeReadStxml.new(stxmlfilename, tabfilename,
|
103
|
+
postag_suffix, lemma_suffix)
|
104
|
+
else
|
105
|
+
# we have no tab directory
|
106
|
+
each_sentence_obj = FrappeReadStxml.new(stxmlfilename, nil,
|
107
|
+
postag_suffix, lemma_suffix)
|
108
|
+
end
|
109
|
+
|
110
|
+
yield OneParsedFile.new(filename_core, stxmlfilename, each_sentence_obj)
|
111
|
+
end
|
112
|
+
else
|
113
|
+
# construct SalsaTigerXML from tab files
|
114
|
+
Dir[@tab_dir + "*" + @file_suffixes["tab"]].each do |tabfilename|
|
115
|
+
each_sentence_obj = FrappeFlatSyntax.new(tabfilename,
|
116
|
+
postag_suffix,
|
117
|
+
lemma_suffix)
|
118
|
+
filename_core = File.basename(tabfilename, @file_suffixes["tab"])
|
119
|
+
yield OneParsedFile.new(filename_core, tabfilename, each_sentence_obj)
|
120
|
+
end
|
121
|
+
end # source of pseudo-parse
|
122
|
+
end # parse or no parse
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
@@ -0,0 +1,196 @@
|
|
1
|
+
###
|
2
|
+
# FixSynSemMapping:
|
3
|
+
# Given a SalsaTigerRegXML sentence with semantic role annotation,
|
4
|
+
# simplify the mapping of semantic roles to syntactic constituents
|
5
|
+
#
|
6
|
+
# The following is lifted from the LREC06 paper on Shalmaneser:
|
7
|
+
# During preprocessing, the span of semantic roles in the training corpora is
|
8
|
+
# projected onto the output of the syntactic parser by assigning each
|
9
|
+
# role to the set of maximal constituents covering its word span.
|
10
|
+
# f the word span of a role does not coincide
|
11
|
+
# with parse tree constituents, e.g. due to misparses,
|
12
|
+
# the role is ``spread out'' across several constituents. This leads to
|
13
|
+
# idiosyncratic paths between predicate and semantic role in the parse
|
14
|
+
# tree.
|
15
|
+
#
|
16
|
+
# [The following span standardization algorithm is used to make the
|
17
|
+
# syntax-semantics mapping more uniform:]
|
18
|
+
# Given a role r that has been assigned, let N be the set of
|
19
|
+
# terminal nodes of the syntactic structure that are covered by r.
|
20
|
+
#
|
21
|
+
# Iteratively compute the maximal projection of N in the syntactic
|
22
|
+
# structure:
|
23
|
+
# 1) If n is a node such that all of n's children are in N,
|
24
|
+
# then remove n's children from N and add n instead.
|
25
|
+
# 2) If n is a node with 3 or more children, and all of n's
|
26
|
+
# children except one are in N, then remove n's children from N
|
27
|
+
# and add n instead.
|
28
|
+
# 3) If n is an NP with 2 children, and one of them, another NP,
|
29
|
+
# is in N, and the other, a relative clause, is not, then remove
|
30
|
+
# n's children from N and add n instead.
|
31
|
+
#
|
32
|
+
# If none of the rules is applicable to N anymore, assign r to the
|
33
|
+
# nodes in N.
|
34
|
+
#
|
35
|
+
# Rule 1 implements normal maximal projection. Rule 2 ``repairs'' parser
|
36
|
+
# errors where all children of a node but one have been assigned the
|
37
|
+
# same role. Rule 3 addresses a problem of the FrameNet data, where
|
38
|
+
# relative clauses have been omitted from roles assigned to NPs.
|
39
|
+
|
40
|
+
# KE Feb 08: rule 3 currently out of commission!
|
41
|
+
|
42
|
+
# require "SalsaTigerRegXML"
|
43
|
+
|
44
|
+
module FixSynSemMapping
|
45
|
+
##
|
46
|
+
# fix it
|
47
|
+
#
|
48
|
+
# relevant settings in the experiment file:
|
49
|
+
#
|
50
|
+
# fe_syn_repair:
|
51
|
+
# If there is a node that would be a max. constituent for the
|
52
|
+
# words covered by the given FE, except that it has one child
|
53
|
+
# whose words are not in the FE, use the node as max constituent anyway.
|
54
|
+
# This is to repair cases where the parser has made an attachment choice
|
55
|
+
# that differs from the one in the gold annotation
|
56
|
+
#
|
57
|
+
# fe_rel_repair:
|
58
|
+
# If there is an NP such that all of its children except one have been
|
59
|
+
# assigned the same FE, and that missing child is a relative clause
|
60
|
+
# depending on one of the other children, then take the complete NP as
|
61
|
+
# that FE
|
62
|
+
def FixSynSemMapping.fixit(sent, # SalsaTigerSentence object
|
63
|
+
exp, # experiment file object
|
64
|
+
interpreter_class) # SynInterpreter class
|
65
|
+
|
66
|
+
|
67
|
+
unless exp.get("fe_syn_repair") or exp.get("fe_rel_repair")
|
68
|
+
return
|
69
|
+
end
|
70
|
+
|
71
|
+
if sent.nil?
|
72
|
+
return
|
73
|
+
end
|
74
|
+
|
75
|
+
# "repair" FEs:
|
76
|
+
sent.each_frame { |frame|
|
77
|
+
|
78
|
+
frame.each_child { |fe_or_target|
|
79
|
+
|
80
|
+
# repair only if the FE currently
|
81
|
+
# points to more than one syn node
|
82
|
+
if fe_or_target.children.length < 2
|
83
|
+
next
|
84
|
+
end
|
85
|
+
|
86
|
+
if exp.get("fe_rel_repair")
|
87
|
+
lastfe = fe_or_target.children.last
|
88
|
+
if lastfe and interpreter_class.simplified_pt(lastfe) =~ /^(WDT)|(WP\$?)|(WRB)/
|
89
|
+
|
90
|
+
# remove syn nodes that the FE points to
|
91
|
+
old_fe_syn = fe_or_target.children
|
92
|
+
old_fe_syn.each { |child|
|
93
|
+
fe_or_target.remove_child(child)
|
94
|
+
}
|
95
|
+
|
96
|
+
# set it to point only to the last previous node, the relative pronoun
|
97
|
+
fe_or_target.add_child(lastfe)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
if exp.get("fe_syn_repair")
|
102
|
+
# remove syn nodes that the FE points to
|
103
|
+
old_fe_syn = fe_or_target.children
|
104
|
+
old_fe_syn.each { |child|
|
105
|
+
fe_or_target.remove_child(child)
|
106
|
+
}
|
107
|
+
|
108
|
+
# and recompute
|
109
|
+
new_fe_syn = interpreter_class.max_constituents(old_fe_syn.map { |t|
|
110
|
+
t.yield_nodes
|
111
|
+
}.flatten.uniq,
|
112
|
+
sent,
|
113
|
+
exp.get("fe_syn_repair"))
|
114
|
+
|
115
|
+
# make the FE point to the new nodes
|
116
|
+
new_fe_syn.each { |syn_node|
|
117
|
+
fe_or_target.add_child(syn_node)
|
118
|
+
}
|
119
|
+
end
|
120
|
+
} # each FE
|
121
|
+
} # each frame
|
122
|
+
end # def fixit
|
123
|
+
end # module
|
124
|
+
|
125
|
+
|
126
|
+
#########3
|
127
|
+
# old code
|
128
|
+
|
129
|
+
# if exp.get("fe_rel_repair")
|
130
|
+
# # repair relative clauses:
|
131
|
+
# # then make a procedure to pass on to max constituents
|
132
|
+
# # that will recognize the relevant cases
|
133
|
+
|
134
|
+
# accept_anyway_proc = Proc.new { |node, children_in, children_out|
|
135
|
+
|
136
|
+
# # node: SynNode
|
137
|
+
# # children_in, children_out: array:SynNode. children_in are the children
|
138
|
+
# # that are already covered by the FE, children_out the ones that aren't
|
139
|
+
|
140
|
+
# # if node is an NP,
|
141
|
+
# # and only one of its children is out,
|
142
|
+
# # and one node in children_in is an NP, and the missing child is an SBAR
|
143
|
+
# # with a child that is a relative pronoun, then consider the child in children_out as covered
|
144
|
+
# if interpreter_class.category(node) == "noun" and
|
145
|
+
# children_out.length() == 1 and
|
146
|
+
# children_in.select { |n| interpreter_class.category(n) == "noun" } and
|
147
|
+
# interpreter_class.category(children_out.first) == "sent" and
|
148
|
+
# (ch = children_out.first.children) and
|
149
|
+
# ch.select { |n| interpreter_class.relative_pronoun?(n) }
|
150
|
+
# true
|
151
|
+
# else
|
152
|
+
# false
|
153
|
+
# end
|
154
|
+
# }
|
155
|
+
|
156
|
+
# else
|
157
|
+
# accept_anyway_proc = nil
|
158
|
+
# end
|
159
|
+
|
160
|
+
|
161
|
+
# # "repair" FEs:
|
162
|
+
# sent.each_frame { |frame|
|
163
|
+
|
164
|
+
# frame.each_child { |fe_or_target|
|
165
|
+
|
166
|
+
# # repair only if the FE currently
|
167
|
+
# # points to more than one syn node, or
|
168
|
+
# # if it is a noun with a non-covered sentence sister
|
169
|
+
# if fe_or_target.children.length() > 1 or
|
170
|
+
# (exp.get("fe_rel_repair") and (curr_marked = fe_or_target.children.first()) and
|
171
|
+
# interpreter_class.category(curr_marked) == "noun" and
|
172
|
+
# (p = curr_marked.parent) and
|
173
|
+
# p.children.select { |n| n != curr_marked and interpreter_class.category(n) == "sent" } )
|
174
|
+
|
175
|
+
# # remember nodes covered by the FE
|
176
|
+
# old_fe_syn = fe_or_target.children()
|
177
|
+
|
178
|
+
# # remove syn nodes that the FE points to
|
179
|
+
# old_fe_syn.each { |child|
|
180
|
+
# fe_or_target.remove_child(child)
|
181
|
+
# }
|
182
|
+
|
183
|
+
# # and recompute
|
184
|
+
# new_fe_syn = interpreter_class.max_constituents(old_fe_syn.map { |t| t.yield_nodes}.flatten.uniq,
|
185
|
+
# sent,
|
186
|
+
# exp.get("fe_syn_repair"),
|
187
|
+
# accept_anyway_proc)
|
188
|
+
|
189
|
+
# # make the FE point to the new nodes
|
190
|
+
# new_fe_syn.each { |syn_node|
|
191
|
+
# fe_or_target.add_child(syn_node)
|
192
|
+
# }
|
193
|
+
|
194
|
+
# end # if FE points to more than one syn node
|
195
|
+
# } # each FE
|
196
|
+
# } # each frame
|