shalmaneser-fred 1.2.0.rc4 → 1.2.rc5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +47 -18
- data/bin/fred +8 -3
- data/lib/fred/FredConventions.rb +190 -189
- data/lib/fred/abstract_context_provider.rb +246 -0
- data/lib/fred/abstract_fred_feature_access.rb +43 -0
- data/lib/fred/answer_key_access.rb +130 -0
- data/lib/fred/aux_keep_writers.rb +94 -0
- data/lib/fred/baseline.rb +153 -0
- data/lib/fred/context_provider.rb +55 -0
- data/lib/fred/feature_extractors/fred_context_feature_extractor.rb +48 -0
- data/lib/fred/feature_extractors/fred_context_pos_feature_extractor.rb +48 -0
- data/lib/fred/feature_extractors/fred_feature_extractor.rb +50 -0
- data/lib/fred/feature_extractors/fred_ngram_feature_extractor.rb +65 -0
- data/lib/fred/feature_extractors/fred_syn_feature_extractor.rb +33 -0
- data/lib/fred/feature_extractors/fred_synsem_feature_extractor.rb +32 -0
- data/lib/fred/feature_extractors.rb +5 -0
- data/lib/fred/file_zipped.rb +43 -0
- data/lib/fred/find_all_targets.rb +94 -0
- data/lib/fred/find_targets_from_frames.rb +92 -0
- data/lib/fred/fred.rb +43 -40
- data/lib/fred/fred_error.rb +15 -0
- data/lib/fred/fred_eval.rb +311 -0
- data/lib/fred/fred_feature_access.rb +420 -0
- data/lib/fred/fred_feature_info.rb +56 -0
- data/lib/fred/fred_featurize.rb +525 -0
- data/lib/fred/fred_parameters.rb +190 -0
- data/lib/fred/fred_split.rb +86 -0
- data/lib/fred/fred_split_pkg.rb +189 -0
- data/lib/fred/fred_test.rb +571 -0
- data/lib/fred/fred_train.rb +125 -0
- data/lib/fred/grammatical_function_access.rb +63 -0
- data/lib/fred/md5.rb +6 -0
- data/lib/fred/meta_feature_access.rb +185 -0
- data/lib/fred/non_contiguous_context_provider.rb +532 -0
- data/lib/fred/opt_parser.rb +182 -161
- data/lib/fred/plot_and_r_eval.rb +486 -0
- data/lib/fred/single_sent_context_provider.rb +76 -0
- data/lib/fred/slide_var.rb +148 -0
- data/lib/fred/targets.rb +136 -0
- data/lib/fred/toggle_var.rb +61 -0
- data/lib/fred/word_lemma_pos_ne.rb +51 -0
- data/lib/fred/write_features_binary.rb +95 -0
- data/lib/fred/write_features_nary.rb +51 -0
- data/lib/fred/write_features_nary_or_binary.rb +51 -0
- data/lib/shalmaneser/fred.rb +1 -0
- metadata +57 -30
- data/lib/fred/Baseline.rb +0 -150
- data/lib/fred/FileZipped.rb +0 -31
- data/lib/fred/FredBOWContext.rb +0 -877
- data/lib/fred/FredDetermineTargets.rb +0 -319
- data/lib/fred/FredEval.rb +0 -312
- data/lib/fred/FredFeatureExtractors.rb +0 -322
- data/lib/fred/FredFeatures.rb +0 -1061
- data/lib/fred/FredFeaturize.rb +0 -602
- data/lib/fred/FredNumTrainingSenses.rb +0 -27
- data/lib/fred/FredParameters.rb +0 -402
- data/lib/fred/FredSplit.rb +0 -84
- data/lib/fred/FredSplitPkg.rb +0 -180
- data/lib/fred/FredTest.rb +0 -606
- data/lib/fred/FredTrain.rb +0 -144
- data/lib/fred/PlotAndREval.rb +0 -480
- data/lib/fred/fred_config_data.rb +0 -185
- data/test/frprep/test_opt_parser.rb +0 -94
- data/test/functional/functional_test_helper.rb +0 -58
- data/test/functional/test_fred.rb +0 -47
- data/test/functional/test_frprep.rb +0 -99
- data/test/functional/test_rosy.rb +0 -40
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'fred/abstract_context_provider'
|
2
|
+
require 'salsa_tiger_xml/salsa_tiger_sentence'
|
3
|
+
require 'salsa_tiger_xml/file_parts_parser'
|
4
|
+
|
5
|
+
module Shalmaneser
|
6
|
+
module Fred
|
7
|
+
|
8
|
+
####################################
|
9
|
+
# ContextProvider:
|
10
|
+
# subclass of AbstractContextProvider
|
11
|
+
# that assumes that the input text is a contiguous text
|
12
|
+
# and computes the context accordingly.
|
13
|
+
class ContextProvider < AbstractContextProvider
|
14
|
+
###
|
15
|
+
# each_window: iterator
|
16
|
+
#
|
17
|
+
# given a directory with Salsa/Tiger XML data,
|
18
|
+
# iterate through the data,
|
19
|
+
# yielding each target word as soon as its context window is filled
|
20
|
+
# (or the last file is at an end)
|
21
|
+
def each_window(dir) # string: directory containing Salsa/Tiger XML data
|
22
|
+
|
23
|
+
# iterate through files in the directory.
|
24
|
+
# Try sorting filenames numerically, since this is
|
25
|
+
# what frprep mostly does with filenames
|
26
|
+
Dir[dir + "*.xml"].sort { |a, b|
|
27
|
+
File.basename(a, ".xml").to_i <=> File.basename(b, ".xml").to_i
|
28
|
+
}.each do |filename|
|
29
|
+
# progress bar
|
30
|
+
if @exp.get("verbose")
|
31
|
+
$stderr.puts "Featurizing #{File.basename(filename)}"
|
32
|
+
end
|
33
|
+
f = STXML::FilePartsParser.new(filename)
|
34
|
+
each_window_for_file(f) { |result| yield result }
|
35
|
+
end
|
36
|
+
# and empty the context array
|
37
|
+
each_remaining_target { |result| yield result }
|
38
|
+
end
|
39
|
+
|
40
|
+
##################################
|
41
|
+
protected
|
42
|
+
|
43
|
+
######################
|
44
|
+
# each_window_for_file: iterator
|
45
|
+
# same as each_window, but only for a single file
|
46
|
+
# (to be called from each_window())
|
47
|
+
def each_window_for_file(fpp) # FilePartsParser object: Salsa/Tiger XMl data
|
48
|
+
fpp.scan_s { |sent_string|
|
49
|
+
sent = STXML::SalsaTigerSentence.new(sent_string)
|
50
|
+
each_window_for_sent(sent) { |result| yield result }
|
51
|
+
}
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require_relative 'fred_feature_extractor'
|
2
|
+
|
3
|
+
module Shalmaneser
|
4
|
+
module Fred
|
5
|
+
#####
|
6
|
+
# context feature
|
7
|
+
class FredContextFeatureExtractor < FredFeatureExtractor
|
8
|
+
|
9
|
+
FredContextFeatureExtractor.announce_me
|
10
|
+
|
11
|
+
def self.feature_name
|
12
|
+
'context'
|
13
|
+
end
|
14
|
+
|
15
|
+
###
|
16
|
+
def initialize(exp)
|
17
|
+
super(exp)
|
18
|
+
|
19
|
+
# cxsizes: list of context sizes chosen as features,
|
20
|
+
# encoded in metafeature labels
|
21
|
+
# written in a hash for fast access
|
22
|
+
@cxsizes = {}
|
23
|
+
@exp.get_lf("feature", "context").each do |cxsize|
|
24
|
+
@cxsizes["CX" + cxsize.to_s] = true
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
###
|
29
|
+
def each_feature(feature_hash)
|
30
|
+
# grf#word#lemma#pos#ne
|
31
|
+
lemma_index = 2
|
32
|
+
|
33
|
+
feature_hash.each do |ftype, fvalues|
|
34
|
+
if @cxsizes[ftype]
|
35
|
+
# this is a context feature of a size chosen
|
36
|
+
# by the user for featurization
|
37
|
+
|
38
|
+
fvalues.each do |f|
|
39
|
+
next if f =~ /#####/
|
40
|
+
yield ftype + f.split("#")[lemma_index]
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require_relative 'fred_feature_extractor'
|
2
|
+
|
3
|
+
module Shalmaneser
|
4
|
+
module Fred
|
5
|
+
#####
|
6
|
+
# context feature: POS separately, small contexts only
|
7
|
+
class FredContextPOSFeatureExtractor < FredFeatureExtractor
|
8
|
+
FredContextPOSFeatureExtractor.announce_me
|
9
|
+
|
10
|
+
def self.feature_name
|
11
|
+
'context_pos'
|
12
|
+
end
|
13
|
+
|
14
|
+
###
|
15
|
+
def initialize(exp)
|
16
|
+
super(exp)
|
17
|
+
|
18
|
+
# cxsizes: list of context sizes chosen as features,
|
19
|
+
# encoded in metafeature labels
|
20
|
+
# written in a hash for fast access
|
21
|
+
@cxsizes = {}
|
22
|
+
@exp.get_lf("feature", "context").each do |cxsize|
|
23
|
+
if cxsize <= 10
|
24
|
+
@cxsizes["CX" + cxsize.to_s] = true
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
if @cxsizes.empty?
|
29
|
+
$stderr.puts "context_pos feature warning: will not be computed"
|
30
|
+
$stderr.puts "as there is no context of size <= 10"
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
###
|
35
|
+
def each_feature(feature_hash)
|
36
|
+
# word#lemma#pos#ne
|
37
|
+
pos_index = 2
|
38
|
+
feature_hash.each do |ftype, fvalues|
|
39
|
+
if @cxsizes[ftype]
|
40
|
+
# this is a context feature of a size chosen
|
41
|
+
# by the user for featurization
|
42
|
+
fvalues.each { |f| yield "POS" + ftype + f.split("#")[pos_index] }
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require 'fred/fred_feature_info'
|
2
|
+
|
3
|
+
module Shalmaneser
|
4
|
+
module Fred
|
5
|
+
##################################3
|
6
|
+
class FredFeatureExtractor
|
7
|
+
###
|
8
|
+
# feature name:
|
9
|
+
# name by which you choose this feature
|
10
|
+
# in the experiment file
|
11
|
+
def FredFeatureExtractor.feature_name
|
12
|
+
raise "Overwrite me."
|
13
|
+
end
|
14
|
+
|
15
|
+
###
|
16
|
+
# initialize with Fred experiment file object
|
17
|
+
def initialize(exp)
|
18
|
+
@exp = exp
|
19
|
+
end
|
20
|
+
|
21
|
+
###
|
22
|
+
# compute features from meta-features
|
23
|
+
#
|
24
|
+
# argument: hash
|
25
|
+
# metafeature_label -> metafeatures
|
26
|
+
# string -> array:string
|
27
|
+
#
|
28
|
+
# yields each feature as a string
|
29
|
+
def each_feature(feature_hash)
|
30
|
+
raise "overwrite me"
|
31
|
+
end
|
32
|
+
|
33
|
+
######
|
34
|
+
protected
|
35
|
+
|
36
|
+
def FredFeatureExtractor.announce_me
|
37
|
+
# This check is obsolete since we require FeatureInfo.
|
38
|
+
# AB: In 1.9 constants are symbols.
|
39
|
+
if Module.constants.include?("FredFeatureInfo") or Module.constants.include?(:FredFeatureInfo)
|
40
|
+
# yup, we have a class to which we can announce ourselves
|
41
|
+
FredFeatureInfo.add_feature(self)
|
42
|
+
else
|
43
|
+
# no interface collector class
|
44
|
+
# $stderr.puts "Feature #{self.name()} not announced: no RosyFeatureInfo."
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
require_relative 'fred_feature_extractor'
|
2
|
+
|
3
|
+
module Shalmaneser
|
4
|
+
module Fred
|
5
|
+
#####
|
6
|
+
# bigram/trigram feature
|
7
|
+
class FredNgramFeatureExtractor < FredFeatureExtractor
|
8
|
+
|
9
|
+
FredNgramFeatureExtractor.announce_me
|
10
|
+
|
11
|
+
def self.feature_name
|
12
|
+
'ngram'
|
13
|
+
end
|
14
|
+
|
15
|
+
###
|
16
|
+
def initialize(exp)
|
17
|
+
super(exp)
|
18
|
+
|
19
|
+
# cxsize: context size from which the ngram feature will be computed
|
20
|
+
# encoded in metafeature labels
|
21
|
+
# written in a hash for fast access
|
22
|
+
@cxsize = @exp.get_lf("feature", "context").detect do |cxsize|
|
23
|
+
cxsize >= 2
|
24
|
+
end
|
25
|
+
|
26
|
+
unless @cxsize
|
27
|
+
$stderr.puts "Warning: no context of size >= 2, so"
|
28
|
+
$stderr.puts "no ngram feature computed."
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
###
|
33
|
+
def each_feature(feature_hash)
|
34
|
+
# word#lemma#pos#ne
|
35
|
+
lemma_index = 1
|
36
|
+
pos_index = 2
|
37
|
+
|
38
|
+
feature_hash.each do |ftype, fvalues|
|
39
|
+
if ftype == "CX" + @cxsize.to_s
|
40
|
+
# compute the ngram features from this context
|
41
|
+
# |fvalues| = 2*cxsize, that is, cxsize describes
|
42
|
+
# the length of a one-sided context window
|
43
|
+
# the bigram of features around the target
|
44
|
+
# concerns fvalues[cxsize-1] and fvalues[cxsize]
|
45
|
+
# the trigram of two words before, one word after includes
|
46
|
+
# fvalues[cxsize-2], fvalues[cxsize-1] and fvalues[cxsize]
|
47
|
+
|
48
|
+
[
|
49
|
+
[[-1, 0], "BLEM", lemma_index], # bigram of lemmas
|
50
|
+
[[-1, 0], "BPOS", pos_index], # bigram of POSs
|
51
|
+
[[-2, -1, 0], "TLEM", lemma_index], # trigram of lemmas
|
52
|
+
[[-2, -1, 0], "TPOS", pos_index] # trigram of POSs
|
53
|
+
].each do |f_indices, label, subindex|
|
54
|
+
fs = f_indices.map { |i| fvalues[@cxsize+i] }.compact
|
55
|
+
if fs.length == f_indices.length
|
56
|
+
# we successfully extracted entries for all the given indices
|
57
|
+
yield label + fs.map { |f| f.split("#")[subindex] }.join
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require_relative 'fred_feature_extractor'
|
2
|
+
|
3
|
+
module Shalmaneser
|
4
|
+
module Fred
|
5
|
+
#####
|
6
|
+
# syntax feature
|
7
|
+
class FredSynFeatureExtractor < FredFeatureExtractor
|
8
|
+
|
9
|
+
FredSynFeatureExtractor.announce_me
|
10
|
+
|
11
|
+
def self.feature_name
|
12
|
+
'syntax'
|
13
|
+
end
|
14
|
+
|
15
|
+
###
|
16
|
+
def each_feature(feature_hash)
|
17
|
+
feature_hash.each do |ftype, fvalues|
|
18
|
+
case ftype
|
19
|
+
when "CH", "PA"
|
20
|
+
grf_index = 0
|
21
|
+
fvalues.each { |f| yield ftype + f.split("#")[grf_index] }
|
22
|
+
when "SI"
|
23
|
+
# parentlemma#grf#word#lemma#pos#ne
|
24
|
+
grf_index = 1
|
25
|
+
fvalues.each { |f| yield ftype + f.split("#")[grf_index] }
|
26
|
+
else
|
27
|
+
# not a syntactic metafeature
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require_relative 'fred_feature_extractor'
|
2
|
+
|
3
|
+
module Shalmaneser
|
4
|
+
module Fred
|
5
|
+
#####
|
6
|
+
# syntax-plus-headword feature
|
7
|
+
class FredSynsemFeatureExtractor < FredFeatureExtractor
|
8
|
+
|
9
|
+
FredSynsemFeatureExtractor.announce_me
|
10
|
+
|
11
|
+
def self.feature_name
|
12
|
+
'synsem'
|
13
|
+
end
|
14
|
+
|
15
|
+
def each_feature(feature_hash)
|
16
|
+
feature_hash.each do |ftype, fvalues|
|
17
|
+
case ftype
|
18
|
+
when "CH", "PA"
|
19
|
+
# grf#word#lemma#pos#ne
|
20
|
+
fvalues.each { |f| yield ftype + "SEM" + f }
|
21
|
+
when "SI"
|
22
|
+
# parentlemma#grf#word#lemma#pos#ne
|
23
|
+
# remove parent lemma
|
24
|
+
fvalues.each { |f| yield ftype + "SEM" + f.split("#")[1..-1].join("#") }
|
25
|
+
else
|
26
|
+
# not a syntax feature
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,5 @@
|
|
1
|
+
require 'fred/feature_extractors/fred_context_feature_extractor'
|
2
|
+
require 'fred/feature_extractors/fred_context_pos_feature_extractor'
|
3
|
+
require 'fred/feature_extractors/fred_ngram_feature_extractor'
|
4
|
+
require 'fred/feature_extractors/fred_syn_feature_extractor'
|
5
|
+
require 'fred/feature_extractors/fred_synsem_feature_extractor'
|
@@ -0,0 +1,43 @@
|
|
1
|
+
require 'fred/fred_error'
|
2
|
+
require 'logging'
|
3
|
+
|
4
|
+
module Shalmaneser
|
5
|
+
module Fred
|
6
|
+
class FileZipped
|
7
|
+
|
8
|
+
# @todo Rewrite this class using stdlib.
|
9
|
+
# @return [IO]
|
10
|
+
# @param filename [String]
|
11
|
+
# @param mode [String]
|
12
|
+
# @raise [FredError] if some external error occured
|
13
|
+
def self.new(filename, mode = 'r')
|
14
|
+
# escape characters in the filename that
|
15
|
+
# would make the shell hiccup on the command
|
16
|
+
filename = filename.gsub(/([();:!?'`])/, 'XXSLASHXX\1')
|
17
|
+
filename = filename.gsub(/XXSLASHXX/, "\\")
|
18
|
+
|
19
|
+
unless %w{r w a}.include?(mode)
|
20
|
+
LOGGER.fatal "FileZipped error: only modes r, w, a are implemented. "\
|
21
|
+
"I got: #{mode}."
|
22
|
+
raise FredError
|
23
|
+
end
|
24
|
+
|
25
|
+
begin
|
26
|
+
case mode
|
27
|
+
when "r"
|
28
|
+
unless File.exist?(filename)
|
29
|
+
raise FredError, 'File does not exist!'
|
30
|
+
end
|
31
|
+
return IO.popen("gunzip -c #{filename}")
|
32
|
+
when "w"
|
33
|
+
return IO.popen("gzip > #{filename}", "w")
|
34
|
+
when "a"
|
35
|
+
return IO.popen("gzip >> #{filename}", "w")
|
36
|
+
end
|
37
|
+
rescue => e
|
38
|
+
raise FredError, "Error opening file #{filename}.", e
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,94 @@
|
|
1
|
+
require 'fred/targets'
|
2
|
+
|
3
|
+
module Shalmaneser
|
4
|
+
module Fred
|
5
|
+
########################################
|
6
|
+
class FindAllTargets < Targets
|
7
|
+
###
|
8
|
+
# determine_targets:
|
9
|
+
# use all known lemmas, minus stopwords
|
10
|
+
def initialize(exp,
|
11
|
+
interpreter_class)
|
12
|
+
# read target info from file
|
13
|
+
super(exp, interpreter_class, "r")
|
14
|
+
@training_lemmapos_pairs = get_lemma_pos
|
15
|
+
|
16
|
+
get_senses(@training_lemmapos_pairs)
|
17
|
+
# list of words to exclude from assignment, for now
|
18
|
+
@stoplemmas = [
|
19
|
+
"have",
|
20
|
+
"do",
|
21
|
+
"be"
|
22
|
+
# "make"
|
23
|
+
]
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
####
|
28
|
+
#
|
29
|
+
# returns:
|
30
|
+
# hash: target_IDs -> list of senses
|
31
|
+
# where target_IDs is a pair [list of terminal IDs, main terminal ID]
|
32
|
+
#
|
33
|
+
# where a sense is represented as a hash:
|
34
|
+
# "sense": sense, a string
|
35
|
+
# "obj": FrameNode object
|
36
|
+
# "all_targets": list of node IDs, may comprise more than a single node
|
37
|
+
# "lex": lemma, or multiword expression in canonical form
|
38
|
+
# "sid": sentence ID
|
39
|
+
def determine_targets(sent) #SalsaTigerSentence object
|
40
|
+
# map target IDs to list of senses, in our case always [ nil ]
|
41
|
+
# because we assume that the senses of the targets we point out
|
42
|
+
# are unknown
|
43
|
+
retv = {}
|
44
|
+
# iterate through terminals of the sentence, check for inclusion
|
45
|
+
# of their lemma in @training_lemmas
|
46
|
+
sent.each_terminal { |node|
|
47
|
+
# we know this lemma from the training data,
|
48
|
+
# and it is not an auxiliary,
|
49
|
+
# and it is not in the stopword list
|
50
|
+
# and the node does not represent a preposition
|
51
|
+
|
52
|
+
### modified by ines, 17.10.2008
|
53
|
+
lemma = @interpreter_class.lemma_backoff(node)
|
54
|
+
pos = @interpreter_class.category(node)
|
55
|
+
|
56
|
+
# print "lemma ", lemma, " pos ", pos, "\n"
|
57
|
+
# reg = /\.[ANV]/
|
58
|
+
# if !reg.match(lemma)
|
59
|
+
# if /verb/.match(pos)
|
60
|
+
# lemma = lemma + ".V"
|
61
|
+
# elsif /noun/.match(pos)
|
62
|
+
# lemma = lemma + ".N"
|
63
|
+
# elsif /adj/.match(pos)
|
64
|
+
# lemma = lemma + ".A"
|
65
|
+
# end
|
66
|
+
# print "LEMMA ", lemma, " POS ", pos, "\n"
|
67
|
+
# end
|
68
|
+
|
69
|
+
if (@training_lemmapos_pairs.include? [lemma, pos] and
|
70
|
+
not(@interpreter_class.auxiliary?(node)) and
|
71
|
+
not(@stoplemmas.include? lemma) and
|
72
|
+
not(pos == "prep"))
|
73
|
+
key = [ [ node.id ], node.id ]
|
74
|
+
|
75
|
+
# take this as a target.
|
76
|
+
retv[ key ] = [
|
77
|
+
{
|
78
|
+
"sense" => nil,
|
79
|
+
"obj" => nil,
|
80
|
+
"all_targets" => [ node.id ],
|
81
|
+
"lex" => lemma,
|
82
|
+
"pos" => pos,
|
83
|
+
"sid" => sent.id
|
84
|
+
} ]
|
85
|
+
# no recording of target info,
|
86
|
+
# since we haven't determined anything new
|
87
|
+
end
|
88
|
+
}
|
89
|
+
|
90
|
+
return retv
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
require 'fred/targets'
|
2
|
+
|
3
|
+
module Shalmaneser
|
4
|
+
module Fred
|
5
|
+
########################################
|
6
|
+
class FindTargetsFromFrames < Targets
|
7
|
+
###
|
8
|
+
# determine_targets:
|
9
|
+
# use existing frames to find targets
|
10
|
+
#
|
11
|
+
# returns:
|
12
|
+
# hash: target_IDs -> list of senses
|
13
|
+
# where target_IDs is a pair [list of terminal IDs, main terminal ID]
|
14
|
+
#
|
15
|
+
# where a sense is represented as a hash:
|
16
|
+
# "sense": sense, a string
|
17
|
+
# "obj": FrameNode object
|
18
|
+
# "all_targets": list of node IDs, may comprise more than a single node
|
19
|
+
# "lex": lemma, or multiword expression in canonical form
|
20
|
+
# "sid": sentence ID
|
21
|
+
def determine_targets(st_sent) #SalsaTigerSentence object
|
22
|
+
retv = {}
|
23
|
+
st_sent.each_frame { |frame_obj|
|
24
|
+
# instance-specific computation:
|
25
|
+
# target and target positions
|
26
|
+
# WARNING: at this moment, we are
|
27
|
+
# not considering true multiword targets for German.
|
28
|
+
# Remove the "no_mwe" parameter in main_node_of_expr
|
29
|
+
# to change this
|
30
|
+
term = nil
|
31
|
+
all_targets = nil
|
32
|
+
if frame_obj.target.nil? or frame_obj.target.children.empty?
|
33
|
+
# no target, nothing to record
|
34
|
+
|
35
|
+
elsif @exp.get("language") == "de"
|
36
|
+
# don't consider true multiword targets for German
|
37
|
+
all_targets = frame_obj.target.children
|
38
|
+
term = @interpreter_class.main_node_of_expr(all_targets, "no_mwe")
|
39
|
+
|
40
|
+
else
|
41
|
+
# for all other languages: try to figure out the head target word
|
42
|
+
# anyway
|
43
|
+
all_targets = frame_obj.target.children
|
44
|
+
term = @interpreter_class.main_node_of_expr(all_targets)
|
45
|
+
end
|
46
|
+
|
47
|
+
if term and term.is_splitword?
|
48
|
+
# don't use parts of a word as main node
|
49
|
+
term = term.parent
|
50
|
+
end
|
51
|
+
if term and term.is_terminal?
|
52
|
+
key = [all_targets.map { |t| t.id }, term.id]
|
53
|
+
|
54
|
+
unless retv[key]
|
55
|
+
retv[key] = []
|
56
|
+
end
|
57
|
+
|
58
|
+
pos = frame_obj.target.get_attribute("pos")
|
59
|
+
# gold POS available, may be in wrong form,
|
60
|
+
# i.e. not the same strings that @interpreter_class.category()
|
61
|
+
# would return
|
62
|
+
case pos
|
63
|
+
when /^[Vv]$/
|
64
|
+
pos = "verb"
|
65
|
+
when /^[Nn]$/
|
66
|
+
pos = "noun"
|
67
|
+
when /^[Aa]$/
|
68
|
+
pos = "adj"
|
69
|
+
when nil
|
70
|
+
pos = @interpreter_class.category(term)
|
71
|
+
end
|
72
|
+
|
73
|
+
target_info = {
|
74
|
+
"sense" => frame_obj.name,
|
75
|
+
"obj" => frame_obj,
|
76
|
+
"all_targets" => frame_obj.target.children.map { |ch| ch.id },
|
77
|
+
"lex" => frame_obj.target.get_attribute("lemma"),
|
78
|
+
"pos" => pos,
|
79
|
+
"sid" => st_sent.id
|
80
|
+
}
|
81
|
+
#print "lex ", frame_obj.target(), " und ",frame_obj.target().get_attribute("lemma"), "\n"
|
82
|
+
retv[key] << target_info
|
83
|
+
if @record_targets
|
84
|
+
record(target_info)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
}
|
88
|
+
return retv
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
data/lib/fred/fred.rb
CHANGED
@@ -1,47 +1,50 @@
|
|
1
1
|
# AB: 2011-11-13
|
2
2
|
# Initial import done, need to reimplement the whole interface.
|
3
|
-
require 'fred/
|
4
|
-
require 'fred/
|
5
|
-
require 'fred/
|
6
|
-
require 'fred/
|
7
|
-
require 'fred/
|
3
|
+
require 'fred/fred_featurize'
|
4
|
+
require 'fred/fred_split'
|
5
|
+
require 'fred/fred_train'
|
6
|
+
require 'fred/fred_test'
|
7
|
+
require 'fred/fred_eval'
|
8
|
+
# Reintroduce this task.
|
9
|
+
# require 'fred/fred_parameters'
|
8
10
|
|
9
|
-
|
10
|
-
|
11
|
+
require 'logging'
|
12
|
+
require 'definitions'
|
11
13
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
14
|
+
module Shalmaneser
|
15
|
+
module Fred
|
16
|
+
class Fred
|
17
|
+
def initialize(options)
|
18
|
+
@exp, @opts = options
|
19
|
+
@task = @opts['--task']
|
20
|
+
end
|
18
21
|
|
19
|
-
|
22
|
+
##
|
23
|
+
# now perform the given task
|
24
|
+
def assign
|
25
|
+
# initialize task object
|
26
|
+
task = case @task
|
27
|
+
when "featurize"
|
28
|
+
FredFeaturize.new(@exp, @opts)
|
29
|
+
when "refeaturize"
|
30
|
+
FredFeaturize.new(@exp, @opts, "refeaturize" => true)
|
31
|
+
when "split"
|
32
|
+
FredSplit.new(@exp, @opts)
|
33
|
+
when "train"
|
34
|
+
FredTrain.new(@exp, @opts)
|
35
|
+
when "test"
|
36
|
+
FredTest.new(@exp, @opts)
|
37
|
+
when "eval"
|
38
|
+
FredEval.new(@exp, @opts)
|
39
|
+
else
|
40
|
+
raise ArgumentError, "Wrong taks for #{PROGRAM_NAME}: #{@task}!"
|
41
|
+
# @todo AB: this <else> condition should be impossible.
|
42
|
+
# Do it in OptionParser
|
43
|
+
end
|
20
44
|
|
21
|
-
|
22
|
-
|
23
|
-
when "featurize"
|
24
|
-
task_obj = FredFeaturize.new(@exp, @opts)
|
25
|
-
when "refeaturize"
|
26
|
-
task_obj = FredFeaturize.new(@exp, @opts, "refeaturize" => true)
|
27
|
-
when "split"
|
28
|
-
task_obj = FredSplit.new(@exp, @opts)
|
29
|
-
when "train"
|
30
|
-
task_obj = FredTrain.new(@exp, @opts)
|
31
|
-
when "test"
|
32
|
-
task_obj = FredTest.new(@exp, @opts)
|
33
|
-
when "eval"
|
34
|
-
task_obj = FredEval.new(@exp, @opts)
|
35
|
-
else
|
36
|
-
raise "Shouldn't be here"
|
37
|
-
# @todo AB: this <else> condition should be unpossible
|
38
|
-
# do in OptionParser
|
45
|
+
task.compute
|
46
|
+
LOGGER.info "#{PROGRAM_NAME} finished performing the task: #{task}!"
|
39
47
|
end
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
$stderr.puts "Fred: Done."
|
44
|
-
|
45
|
-
end
|
46
|
-
end # class Fred
|
47
|
-
end # module Fred
|
48
|
+
end # class Fred
|
49
|
+
end # module Fred
|
50
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Shalmaneser
|
2
|
+
module Fred
|
3
|
+
class FredError < StandardError
|
4
|
+
# @param [String] msg A custom message for this exception.
|
5
|
+
# @param [Exception] nested_exception An external exception
|
6
|
+
# which is reused to provide more information.
|
7
|
+
def initialize(msg = nil, nested_exception = nil)
|
8
|
+
if nested_exception
|
9
|
+
msg = "#{nested_exception.class}: #{nested_exception.message}\n#{msg}"
|
10
|
+
end
|
11
|
+
super(msg)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|