shalmaneser 1.2.0.rc3 → 1.2.0.rc4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +26 -7
  3. data/bin/fred +2 -4
  4. data/doc/exp_files.md +6 -5
  5. data/lib/common/{ConfigData.rb → config_data.rb} +46 -270
  6. data/lib/common/config_format_element.rb +220 -0
  7. data/lib/common/prep_config_data.rb +62 -0
  8. data/lib/common/{frprep_helper.rb → prep_helper.rb} +0 -0
  9. data/lib/{common/DBInterface.rb → db/db_interface.rb} +2 -2
  10. data/lib/{rosy/DBMySQL.rb → db/db_mysql.rb} +1 -2
  11. data/lib/{rosy/DBSQLite.rb → db/db_sqlite.rb} +1 -1
  12. data/lib/{rosy/DBTable.rb → db/db_table.rb} +1 -1
  13. data/lib/{rosy/DBWrapper.rb → db/db_wrapper.rb} +0 -0
  14. data/lib/{common/SQLQuery.rb → db/sql_query.rb} +0 -0
  15. data/lib/fred/FredBOWContext.rb +8 -6
  16. data/lib/fred/FredDetermineTargets.rb +1 -1
  17. data/lib/fred/FredEval.rb +1 -1
  18. data/lib/fred/FredFeaturize.rb +22 -16
  19. data/lib/fred/FredTest.rb +0 -1
  20. data/lib/fred/fred.rb +2 -0
  21. data/lib/fred/{FredConfigData.rb → fred_config_data.rb} +70 -67
  22. data/lib/fred/opt_parser.rb +1 -1
  23. data/lib/frprep/frprep.rb +1 -1
  24. data/lib/frprep/interfaces/berkeley_interface.rb +7 -9
  25. data/lib/frprep/opt_parser.rb +1 -1
  26. data/lib/rosy/ExternalConfigData.rb +1 -1
  27. data/lib/rosy/RosyEval.rb +1 -1
  28. data/lib/rosy/RosyFeaturize.rb +21 -20
  29. data/lib/rosy/RosyInspect.rb +1 -1
  30. data/lib/rosy/RosyPruning.rb +1 -1
  31. data/lib/rosy/RosyServices.rb +1 -1
  32. data/lib/rosy/RosySplit.rb +1 -1
  33. data/lib/rosy/RosyTest.rb +23 -20
  34. data/lib/rosy/RosyTrain.rb +15 -13
  35. data/lib/rosy/RosyTrainingTestTable.rb +2 -1
  36. data/lib/rosy/View.rb +1 -1
  37. data/lib/rosy/opt_parser.rb +1 -1
  38. data/lib/rosy/rosy.rb +1 -1
  39. data/lib/rosy/rosy_config_data.rb +121 -0
  40. data/lib/shalmaneser/opt_parser.rb +32 -2
  41. data/lib/shalmaneser/version.rb +1 -1
  42. metadata +23 -114
  43. checksums.yaml.gz.sig +0 -0
  44. data.tar.gz.sig +0 -0
  45. data/lib/common/FrPrepConfigData.rb +0 -66
  46. data/lib/rosy/RosyConfigData.rb +0 -115
  47. metadata.gz.sig +0 -0
checksums.yaml.gz.sig DELETED
Binary file
data.tar.gz.sig DELETED
Binary file
@@ -1,66 +0,0 @@
1
- # FPrepConfigData
2
- # Katrin Erk July 05
3
- #
4
- # Preprocessing for Fred and Rosy:
5
- # access to a configuration and experiment description file
6
-
7
- require "common/ConfigData"
8
-
9
- ##############################
10
- # Class FrPrepConfigData
11
- #
12
- # inherits from ConfigData,
13
- # sets variable names appropriate to preprocessing task
14
-
15
- class FrPrepConfigData < ConfigData
16
- def initialize(filename)
17
-
18
- # initialize config data object
19
- super(filename, # config file
20
- { "prep_experiment_ID" => "string", # experiment identifier
21
-
22
- "frprep_directory" => "string", # dir for frprep internal data
23
-
24
- # information about the dataset
25
- "language" => "string", # en, de
26
- "origin"=> "string", # FrameNet, Salsa, or nothing
27
- "format" => "string", # Plain, SalsaTab, FNXml, FNCorpusXml, SalsaTigerXML
28
- "encoding" => "string", # utf8, iso, hex, or nothing
29
-
30
-
31
- # directories
32
- "directory_input" => "string", # dir with input data
33
- "directory_preprocessed" => "string", # dir with output Salsa/Tiger XML data
34
- "directory_parserout" => "string", # dir with parser output for the parser named below
35
-
36
- # syntactic processing
37
- "pos_tagger" => "string", # name of POS tagger
38
- "lemmatizer" => "string", # name of lemmatizer
39
- "parser" => "string", # name of parser
40
- "pos_tagger_path" => "string", # path to POS tagger
41
- "lemmatizer_path" => "string", # path to lemmatizer
42
- "parser_path" => "string", # path to parser
43
- "parser_max_sent_num" => "integer", # max number of sentences per parser input file
44
- "parser_max_sent_len" => "integer", # max sentence length the parser handles
45
-
46
- "do_parse" => "bool", # use parser?
47
- "do_lemmatize" => "bool",# use lemmatizer?
48
- "do_postag" => "bool", # use POS tagger?
49
-
50
- # output format: if tabformat_output == true,
51
- # output in Tab format rather than Salsa/Tiger XML
52
- # (this will not work if do_parse == true)
53
- "tabformat_output" => "bool",
54
-
55
- # syntactic repairs, dependent on existing semantic role annotation
56
- "fe_syn_repair" => "bool", # map words to constituents for FEs: idealize?
57
- "fe_rel_repair" => "bool", # FEs: include non-included relative clauses into FEs
58
- },
59
- [ ] # variables
60
- )
61
-
62
- end
63
- end
64
-
65
-
66
-
@@ -1,115 +0,0 @@
1
- require 'common/ConfigData'
2
-
3
- ##############################
4
- # Class RosyConfigData
5
- #
6
- # inherits from ConfigData,
7
- # sets features for ROSY
8
-
9
- class RosyConfigData < ConfigData
10
- def initialize(filename)
11
- super(filename, # config file
12
- { # features
13
- "feature" => "list",
14
- "classifier" => "list",
15
-
16
- "verbose" => "bool" ,
17
- "enduser_mode" => "bool",
18
-
19
- "experiment_ID" => "string",
20
-
21
- "directory_input_train" => "string",
22
- "directory_input_test" => "string",
23
- "directory_output" => "string",
24
-
25
- "preproc_descr_file_train" => "string",
26
- "preproc_descr_file_test" => "string",
27
- "external_descr_file" => "string",
28
-
29
- "dbtype" => "string", # "mysql" or "sqlite"
30
-
31
- "host" => "string", # DB access: sqlite only
32
- "user" => "string",
33
- "passwd" => "string",
34
- "dbname" => "string",
35
-
36
- "data_dir" => "string", # for external use
37
- "rosy_dir" => "pattern", # for internal use only, set by rosy.rb
38
-
39
- "classifier_dir" => "string", # if present, special directory for classifiers
40
-
41
- "classif_column_name" => "string",
42
- "main_table_name" => "pattern",
43
- "test_table_name" => "pattern",
44
-
45
- "eval_file" => "pattern",
46
- "log_file" => "pattern",
47
- "failed_file" => "pattern",
48
- "classifier_file" => "pattern",
49
- "classifier_output_file" => "pattern",
50
- "noval" => "string",
51
-
52
-
53
- "split_nones" => "bool",
54
- "print_eval_log" => "bool",
55
- "assume_argrec_perfect" => "bool",
56
- "xwise_argrec" => "string",
57
- "xwise_arglab" => "string",
58
- "xwise_onestep" => "string",
59
-
60
- "fe_syn_repair" => "bool", # map words to constituents for FEs: idealize?
61
- "fe_rel_repair" => "bool", # FEs: include non-included relative clauses into FEs
62
-
63
- "prune" => "string", # pruning prior to argrec?
64
-
65
- },
66
- ["exp_ID", "test_ID", "split_ID", "feature_name", "classif", "step",
67
- "group", "dataset","mode"] # variables
68
- )
69
-
70
- # set access functions for list features
71
- set_list_feature_access("feature",
72
- method("access_feature"))
73
-
74
- # set access functions for list features
75
- set_list_feature_access("classifier",
76
- method("access_feature"))
77
-
78
- end
79
-
80
- ###
81
- # protected
82
-
83
- #####
84
- # access_feature
85
- #
86
- # access function for feature 'feature'
87
- #
88
- # assumed format in the config file:
89
- #
90
- # feature = path [option]*
91
- #
92
- # i.e. first the name of the feature type to use, then
93
- # optionally options associated with that feature,
94
- # e.g. 'argrec': use that feature only when computing argrec
95
- #
96
- # the access function is called with parameter val_list, an array of
97
- # string tuples, one string tuple for each feature defined.
98
- # the first string in the tuple is the feature name, the rest are the options
99
- #
100
- # returns: a list of pairs [feature_name(string), options(array:string)]
101
- # of defined features
102
- def access_feature(val_list) # array:array:string: list of tuples defined in config file
103
- # for feature 'feature'
104
- if val_list.nil?
105
- return []
106
- else
107
- return val_list.map { |feature_descr_tuple|
108
- [feature_descr_tuple.first, feature_descr_tuple[1..-1]]
109
- }
110
- end
111
- end
112
- end
113
-
114
-
115
-
metadata.gz.sig DELETED
Binary file