shalmaneser 1.2.0.rc3 → 1.2.0.rc4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +26 -7
- data/bin/fred +2 -4
- data/doc/exp_files.md +6 -5
- data/lib/common/{ConfigData.rb → config_data.rb} +46 -270
- data/lib/common/config_format_element.rb +220 -0
- data/lib/common/prep_config_data.rb +62 -0
- data/lib/common/{frprep_helper.rb → prep_helper.rb} +0 -0
- data/lib/{common/DBInterface.rb → db/db_interface.rb} +2 -2
- data/lib/{rosy/DBMySQL.rb → db/db_mysql.rb} +1 -2
- data/lib/{rosy/DBSQLite.rb → db/db_sqlite.rb} +1 -1
- data/lib/{rosy/DBTable.rb → db/db_table.rb} +1 -1
- data/lib/{rosy/DBWrapper.rb → db/db_wrapper.rb} +0 -0
- data/lib/{common/SQLQuery.rb → db/sql_query.rb} +0 -0
- data/lib/fred/FredBOWContext.rb +8 -6
- data/lib/fred/FredDetermineTargets.rb +1 -1
- data/lib/fred/FredEval.rb +1 -1
- data/lib/fred/FredFeaturize.rb +22 -16
- data/lib/fred/FredTest.rb +0 -1
- data/lib/fred/fred.rb +2 -0
- data/lib/fred/{FredConfigData.rb → fred_config_data.rb} +70 -67
- data/lib/fred/opt_parser.rb +1 -1
- data/lib/frprep/frprep.rb +1 -1
- data/lib/frprep/interfaces/berkeley_interface.rb +7 -9
- data/lib/frprep/opt_parser.rb +1 -1
- data/lib/rosy/ExternalConfigData.rb +1 -1
- data/lib/rosy/RosyEval.rb +1 -1
- data/lib/rosy/RosyFeaturize.rb +21 -20
- data/lib/rosy/RosyInspect.rb +1 -1
- data/lib/rosy/RosyPruning.rb +1 -1
- data/lib/rosy/RosyServices.rb +1 -1
- data/lib/rosy/RosySplit.rb +1 -1
- data/lib/rosy/RosyTest.rb +23 -20
- data/lib/rosy/RosyTrain.rb +15 -13
- data/lib/rosy/RosyTrainingTestTable.rb +2 -1
- data/lib/rosy/View.rb +1 -1
- data/lib/rosy/opt_parser.rb +1 -1
- data/lib/rosy/rosy.rb +1 -1
- data/lib/rosy/rosy_config_data.rb +121 -0
- data/lib/shalmaneser/opt_parser.rb +32 -2
- data/lib/shalmaneser/version.rb +1 -1
- metadata +23 -114
- checksums.yaml.gz.sig +0 -0
- data.tar.gz.sig +0 -0
- data/lib/common/FrPrepConfigData.rb +0 -66
- data/lib/rosy/RosyConfigData.rb +0 -115
- metadata.gz.sig +0 -0
checksums.yaml.gz.sig
DELETED
Binary file
|
data.tar.gz.sig
DELETED
Binary file
|
@@ -1,66 +0,0 @@
|
|
1
|
-
# FPrepConfigData
|
2
|
-
# Katrin Erk July 05
|
3
|
-
#
|
4
|
-
# Preprocessing for Fred and Rosy:
|
5
|
-
# access to a configuration and experiment description file
|
6
|
-
|
7
|
-
require "common/ConfigData"
|
8
|
-
|
9
|
-
##############################
|
10
|
-
# Class FrPrepConfigData
|
11
|
-
#
|
12
|
-
# inherits from ConfigData,
|
13
|
-
# sets variable names appropriate to preprocessing task
|
14
|
-
|
15
|
-
class FrPrepConfigData < ConfigData
|
16
|
-
def initialize(filename)
|
17
|
-
|
18
|
-
# initialize config data object
|
19
|
-
super(filename, # config file
|
20
|
-
{ "prep_experiment_ID" => "string", # experiment identifier
|
21
|
-
|
22
|
-
"frprep_directory" => "string", # dir for frprep internal data
|
23
|
-
|
24
|
-
# information about the dataset
|
25
|
-
"language" => "string", # en, de
|
26
|
-
"origin"=> "string", # FrameNet, Salsa, or nothing
|
27
|
-
"format" => "string", # Plain, SalsaTab, FNXml, FNCorpusXml, SalsaTigerXML
|
28
|
-
"encoding" => "string", # utf8, iso, hex, or nothing
|
29
|
-
|
30
|
-
|
31
|
-
# directories
|
32
|
-
"directory_input" => "string", # dir with input data
|
33
|
-
"directory_preprocessed" => "string", # dir with output Salsa/Tiger XML data
|
34
|
-
"directory_parserout" => "string", # dir with parser output for the parser named below
|
35
|
-
|
36
|
-
# syntactic processing
|
37
|
-
"pos_tagger" => "string", # name of POS tagger
|
38
|
-
"lemmatizer" => "string", # name of lemmatizer
|
39
|
-
"parser" => "string", # name of parser
|
40
|
-
"pos_tagger_path" => "string", # path to POS tagger
|
41
|
-
"lemmatizer_path" => "string", # path to lemmatizer
|
42
|
-
"parser_path" => "string", # path to parser
|
43
|
-
"parser_max_sent_num" => "integer", # max number of sentences per parser input file
|
44
|
-
"parser_max_sent_len" => "integer", # max sentence length the parser handles
|
45
|
-
|
46
|
-
"do_parse" => "bool", # use parser?
|
47
|
-
"do_lemmatize" => "bool",# use lemmatizer?
|
48
|
-
"do_postag" => "bool", # use POS tagger?
|
49
|
-
|
50
|
-
# output format: if tabformat_output == true,
|
51
|
-
# output in Tab format rather than Salsa/Tiger XML
|
52
|
-
# (this will not work if do_parse == true)
|
53
|
-
"tabformat_output" => "bool",
|
54
|
-
|
55
|
-
# syntactic repairs, dependent on existing semantic role annotation
|
56
|
-
"fe_syn_repair" => "bool", # map words to constituents for FEs: idealize?
|
57
|
-
"fe_rel_repair" => "bool", # FEs: include non-included relative clauses into FEs
|
58
|
-
},
|
59
|
-
[ ] # variables
|
60
|
-
)
|
61
|
-
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
|
66
|
-
|
data/lib/rosy/RosyConfigData.rb
DELETED
@@ -1,115 +0,0 @@
|
|
1
|
-
require 'common/ConfigData'
|
2
|
-
|
3
|
-
##############################
|
4
|
-
# Class RosyConfigData
|
5
|
-
#
|
6
|
-
# inherits from ConfigData,
|
7
|
-
# sets features for ROSY
|
8
|
-
|
9
|
-
class RosyConfigData < ConfigData
|
10
|
-
def initialize(filename)
|
11
|
-
super(filename, # config file
|
12
|
-
{ # features
|
13
|
-
"feature" => "list",
|
14
|
-
"classifier" => "list",
|
15
|
-
|
16
|
-
"verbose" => "bool" ,
|
17
|
-
"enduser_mode" => "bool",
|
18
|
-
|
19
|
-
"experiment_ID" => "string",
|
20
|
-
|
21
|
-
"directory_input_train" => "string",
|
22
|
-
"directory_input_test" => "string",
|
23
|
-
"directory_output" => "string",
|
24
|
-
|
25
|
-
"preproc_descr_file_train" => "string",
|
26
|
-
"preproc_descr_file_test" => "string",
|
27
|
-
"external_descr_file" => "string",
|
28
|
-
|
29
|
-
"dbtype" => "string", # "mysql" or "sqlite"
|
30
|
-
|
31
|
-
"host" => "string", # DB access: sqlite only
|
32
|
-
"user" => "string",
|
33
|
-
"passwd" => "string",
|
34
|
-
"dbname" => "string",
|
35
|
-
|
36
|
-
"data_dir" => "string", # for external use
|
37
|
-
"rosy_dir" => "pattern", # for internal use only, set by rosy.rb
|
38
|
-
|
39
|
-
"classifier_dir" => "string", # if present, special directory for classifiers
|
40
|
-
|
41
|
-
"classif_column_name" => "string",
|
42
|
-
"main_table_name" => "pattern",
|
43
|
-
"test_table_name" => "pattern",
|
44
|
-
|
45
|
-
"eval_file" => "pattern",
|
46
|
-
"log_file" => "pattern",
|
47
|
-
"failed_file" => "pattern",
|
48
|
-
"classifier_file" => "pattern",
|
49
|
-
"classifier_output_file" => "pattern",
|
50
|
-
"noval" => "string",
|
51
|
-
|
52
|
-
|
53
|
-
"split_nones" => "bool",
|
54
|
-
"print_eval_log" => "bool",
|
55
|
-
"assume_argrec_perfect" => "bool",
|
56
|
-
"xwise_argrec" => "string",
|
57
|
-
"xwise_arglab" => "string",
|
58
|
-
"xwise_onestep" => "string",
|
59
|
-
|
60
|
-
"fe_syn_repair" => "bool", # map words to constituents for FEs: idealize?
|
61
|
-
"fe_rel_repair" => "bool", # FEs: include non-included relative clauses into FEs
|
62
|
-
|
63
|
-
"prune" => "string", # pruning prior to argrec?
|
64
|
-
|
65
|
-
},
|
66
|
-
["exp_ID", "test_ID", "split_ID", "feature_name", "classif", "step",
|
67
|
-
"group", "dataset","mode"] # variables
|
68
|
-
)
|
69
|
-
|
70
|
-
# set access functions for list features
|
71
|
-
set_list_feature_access("feature",
|
72
|
-
method("access_feature"))
|
73
|
-
|
74
|
-
# set access functions for list features
|
75
|
-
set_list_feature_access("classifier",
|
76
|
-
method("access_feature"))
|
77
|
-
|
78
|
-
end
|
79
|
-
|
80
|
-
###
|
81
|
-
# protected
|
82
|
-
|
83
|
-
#####
|
84
|
-
# access_feature
|
85
|
-
#
|
86
|
-
# access function for feature 'feature'
|
87
|
-
#
|
88
|
-
# assumed format in the config file:
|
89
|
-
#
|
90
|
-
# feature = path [option]*
|
91
|
-
#
|
92
|
-
# i.e. first the name of the feature type to use, then
|
93
|
-
# optionally options associated with that feature,
|
94
|
-
# e.g. 'argrec': use that feature only when computing argrec
|
95
|
-
#
|
96
|
-
# the access function is called with parameter val_list, an array of
|
97
|
-
# string tuples, one string tuple for each feature defined.
|
98
|
-
# the first string in the tuple is the feature name, the rest are the options
|
99
|
-
#
|
100
|
-
# returns: a list of pairs [feature_name(string), options(array:string)]
|
101
|
-
# of defined features
|
102
|
-
def access_feature(val_list) # array:array:string: list of tuples defined in config file
|
103
|
-
# for feature 'feature'
|
104
|
-
if val_list.nil?
|
105
|
-
return []
|
106
|
-
else
|
107
|
-
return val_list.map { |feature_descr_tuple|
|
108
|
-
[feature_descr_tuple.first, feature_descr_tuple[1..-1]]
|
109
|
-
}
|
110
|
-
end
|
111
|
-
end
|
112
|
-
end
|
113
|
-
|
114
|
-
|
115
|
-
|
metadata.gz.sig
DELETED
Binary file
|