RubyGems - shalmaneser - Versions diffs - 0.0.1.alpha - Mend

shalmaneser 0.0.1.alpha

Files changed (138) hide show

data/.yardopts +8 -0
data/CHANGELOG.rdoc +0 -0
data/LICENSE.rdoc +0 -0
data/README.rdoc +0 -0
data/lib/common/AbstractSynInterface.rb +1227 -0
data/lib/common/BerkeleyInterface.rb +375 -0
data/lib/common/CollinsInterface.rb +1165 -0
data/lib/common/ConfigData.rb +694 -0
data/lib/common/Counter.rb +18 -0
data/lib/common/DBInterface.rb +48 -0
data/lib/common/EnduserMode.rb +27 -0
data/lib/common/Eval.rb +480 -0
data/lib/common/FixSynSemMapping.rb +196 -0
data/lib/common/FrPrepConfigData.rb +66 -0
data/lib/common/FrprepHelper.rb +1324 -0
data/lib/common/Graph.rb +345 -0
data/lib/common/ISO-8859-1.rb +24 -0
data/lib/common/ML.rb +186 -0
data/lib/common/Maxent.rb +215 -0
data/lib/common/MiniparInterface.rb +1388 -0
data/lib/common/Optimise.rb +195 -0
data/lib/common/Parser.rb +213 -0
data/lib/common/RegXML.rb +269 -0
data/lib/common/RosyConventions.rb +171 -0
data/lib/common/SQLQuery.rb +243 -0
data/lib/common/STXmlTerminalOrder.rb +194 -0
data/lib/common/SalsaTigerRegXML.rb +2347 -0
data/lib/common/SalsaTigerXMLHelper.rb +99 -0
data/lib/common/SleepyInterface.rb +384 -0
data/lib/common/SynInterfaces.rb +275 -0
data/lib/common/TabFormat.rb +720 -0
data/lib/common/Tiger.rb +1448 -0
data/lib/common/TntInterface.rb +44 -0
data/lib/common/Tree.rb +61 -0
data/lib/common/TreetaggerInterface.rb +303 -0
data/lib/common/headz.rb +338 -0
data/lib/common/option_parser.rb +13 -0
data/lib/common/ruby_class_extensions.rb +310 -0
data/lib/fred/Baseline.rb +150 -0
data/lib/fred/FileZipped.rb +31 -0
data/lib/fred/FredBOWContext.rb +863 -0
data/lib/fred/FredConfigData.rb +182 -0
data/lib/fred/FredConventions.rb +232 -0
data/lib/fred/FredDetermineTargets.rb +324 -0
data/lib/fred/FredEval.rb +312 -0
data/lib/fred/FredFeatureExtractors.rb +321 -0
data/lib/fred/FredFeatures.rb +1061 -0
data/lib/fred/FredFeaturize.rb +596 -0
data/lib/fred/FredNumTrainingSenses.rb +27 -0
data/lib/fred/FredParameters.rb +402 -0
data/lib/fred/FredSplit.rb +84 -0
data/lib/fred/FredSplitPkg.rb +180 -0
data/lib/fred/FredTest.rb +607 -0
data/lib/fred/FredTrain.rb +144 -0
data/lib/fred/PlotAndREval.rb +480 -0
data/lib/fred/fred.rb +45 -0
data/lib/fred/md5.rb +23 -0
data/lib/fred/opt_parser.rb +250 -0
data/lib/frprep/AbstractSynInterface.rb +1227 -0
data/lib/frprep/Ampersand.rb +37 -0
data/lib/frprep/BerkeleyInterface.rb +375 -0
data/lib/frprep/CollinsInterface.rb +1165 -0
data/lib/frprep/ConfigData.rb +694 -0
data/lib/frprep/Counter.rb +18 -0
data/lib/frprep/FNCorpusXML.rb +643 -0
data/lib/frprep/FNDatabase.rb +144 -0
data/lib/frprep/FixSynSemMapping.rb +196 -0
data/lib/frprep/FrPrepConfigData.rb +66 -0
data/lib/frprep/FrameXML.rb +513 -0
data/lib/frprep/FrprepHelper.rb +1324 -0
data/lib/frprep/Graph.rb +345 -0
data/lib/frprep/ISO-8859-1.rb +24 -0
data/lib/frprep/MiniparInterface.rb +1388 -0
data/lib/frprep/Parser.rb +213 -0
data/lib/frprep/RegXML.rb +269 -0
data/lib/frprep/STXmlTerminalOrder.rb +194 -0
data/lib/frprep/SalsaTigerRegXML.rb +2347 -0
data/lib/frprep/SalsaTigerXMLHelper.rb +99 -0
data/lib/frprep/SleepyInterface.rb +384 -0
data/lib/frprep/SynInterfaces.rb +275 -0
data/lib/frprep/TabFormat.rb +720 -0
data/lib/frprep/Tiger.rb +1448 -0
data/lib/frprep/TntInterface.rb +44 -0
data/lib/frprep/Tree.rb +61 -0
data/lib/frprep/TreetaggerInterface.rb +303 -0
data/lib/frprep/do_parses.rb +142 -0
data/lib/frprep/frprep.rb +686 -0
data/lib/frprep/headz.rb +338 -0
data/lib/frprep/one_parsed_file.rb +28 -0
data/lib/frprep/opt_parser.rb +94 -0
data/lib/frprep/ruby_class_extensions.rb +310 -0
data/lib/rosy/AbstractFeatureAndExternal.rb +240 -0
data/lib/rosy/DBMySQL.rb +146 -0
data/lib/rosy/DBSQLite.rb +280 -0
data/lib/rosy/DBTable.rb +239 -0
data/lib/rosy/DBWrapper.rb +176 -0
data/lib/rosy/ExternalConfigData.rb +58 -0
data/lib/rosy/FailedParses.rb +130 -0
data/lib/rosy/FeatureInfo.rb +242 -0
data/lib/rosy/GfInduce.rb +1115 -0
data/lib/rosy/GfInduceFeature.rb +148 -0
data/lib/rosy/InputData.rb +294 -0
data/lib/rosy/RosyConfigData.rb +115 -0
data/lib/rosy/RosyConfusability.rb +338 -0
data/lib/rosy/RosyEval.rb +465 -0
data/lib/rosy/RosyFeatureExtractors.rb +1609 -0
data/lib/rosy/RosyFeaturize.rb +280 -0
data/lib/rosy/RosyInspect.rb +336 -0
data/lib/rosy/RosyIterator.rb +477 -0
data/lib/rosy/RosyPhase2FeatureExtractors.rb +230 -0
data/lib/rosy/RosyPruning.rb +165 -0
data/lib/rosy/RosyServices.rb +744 -0
data/lib/rosy/RosySplit.rb +232 -0
data/lib/rosy/RosyTask.rb +19 -0
data/lib/rosy/RosyTest.rb +826 -0
data/lib/rosy/RosyTrain.rb +232 -0
data/lib/rosy/RosyTrainingTestTable.rb +786 -0
data/lib/rosy/TargetsMostFrequentFrame.rb +60 -0
data/lib/rosy/View.rb +418 -0
data/lib/rosy/opt_parser.rb +379 -0
data/lib/rosy/rosy.rb +77 -0
data/lib/shalmaneser/version.rb +3 -0
data/test/frprep/test_opt_parser.rb +94 -0
data/test/functional/functional_test_helper.rb +40 -0
data/test/functional/sample_experiment_files/fred_test.salsa.erb +122 -0
data/test/functional/sample_experiment_files/fred_train.salsa.erb +135 -0
data/test/functional/sample_experiment_files/prp_test.salsa.erb +138 -0
data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb +120 -0
data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb +120 -0
data/test/functional/sample_experiment_files/prp_train.salsa.erb +138 -0
data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb +138 -0
data/test/functional/sample_experiment_files/prp_train.salsa.rosy.standalone.erb +138 -0
data/test/functional/sample_experiment_files/rosy_test.salsa.erb +257 -0
data/test/functional/sample_experiment_files/rosy_train.salsa.erb +259 -0
data/test/functional/test_fred.rb +47 -0
data/test/functional/test_frprep.rb +52 -0
data/test/functional/test_rosy.rb +20 -0
metadata +284 -0

data/test/functional/sample_experiment_files/prp_test.salsa.fred.standalone.erb ADDED Viewed

@@ -0,0 +1,120 @@
+# ID identifying this experiment and all its data
+# please do not use spaces inside the experiment ID
+prep_experiment_ID = prp_test
+# YOUR INPUT DATA:
+# frprep accepts an input directory rather than an input file.
+# It will process all files in the directory directory_input
+# and write the results to directory_preprocessed.
+#
+# For input formats see the discussion of "format" below.
+#directory_input = <%= File.expand_path('test/functional/input/frprep/test.salsa') %>
+directory_preprocessed = <%= File.expand_path('test/functional/input/fred/frprep/test.salsa') %>
+##
+# Experimental data is described by the following parameters:
+#
+# - language: en / de
+#    en for English or de for German
+#
+# - format:  SalsaTigerXML / FNXml / SalsaTab / BNC / Plain
+#
+#    Format of the input data, training/test set
+#    SalsaTigerXML:  Parsed data, English or German
+#    FNXml:          FrameNet Lexical Unit files in FrameNet XML format
+#    FNCorpusXML:    FrameNet files in the FrameNet corpus XML format
+#    SalsaTab:       tabular format (internal)
+#    BNC             BNC XML format, alternating words and POS tags
+#    Plain           Plain text, ONE SENTENCE PER LINE.
+#
+#    Preprocessing transforms all data to SalsaTigerXML.
+#
+# - origin:  SalsaTiger / FrameNet / <not specified>
+#    This is the origin of the training/test data.
+#    SalsaTiger: data from the Tiger corpus, possibly semantically
+#                annotated by Salsa
+#    FrameNet: data from the FrameNet project
+#
+#    Don't set 'origin' if none of these origins apply
+#
+# - encoding: utf8 / iso / hex / <not specified>
+#                 Default: iso
+language = de
+#origin =
+format = Plain
+encoding = iso
+#############################
+# Which preprocessing steps to take?
+#
+# Data can be parsed, lemmatized and POS-tagged,
+# but this happens only if it is specified in the
+# experiment file.
+#
+# Set these booleans to true to trigger the respective
+# type of preprocessing. The default value is false.
+do_lemmatize = true
+do_postag = false
+do_parse = true
+#############################
+# directory where frprep puts its internal data
+#
+#frprep_directory = <%= File.expand_path('test/functional/input/fred/frprep') %>
+#############################
+# Syntax/semantics interface repair:
+# FrameNet annotated data has some annotation choices
+# that may make it harder to learn the mapping from
+# syntactic structure to semantic roles.
+#
+# If you are using FrameNet data for training a
+# semantic role labeler, set the following two settings
+# to true (default is false) to 'repair' semantic role labels
+# to closer match the syntactic structure
+fe_syn_repair = true
+fe_rel_repair = false
+#################
+# Location of tools and resources used by Fred
+# currently known to the system:
+# (Saarbruecken paths given)
+#
+# - POS tagging:
+#   - pos_tagger = treetagger
+#     pos_tagger_path = /proj/llx/Software/treetagger/cmd/tree-tagger-english-notokenisation
+#
+# - Lemmatization:
+#   - lemmatizer = treetagger
+#     lemmatizer_path = /proj/llx/Software/treetagger/cmd/tree-tagger-english-notokenisation
+#     lemmatizer_path = /proj/llx/Software/treetagger/cmd/tree-tagger-german-notokenisation
+#
+# - Parser:
+#   - parser = collins  (English)
+#     parser_path = /proj/llx/Software/Parsers/COLLINS-PARSER/
+#   - parser = sleepy   (German)
+#     parser_path = /proj/corpora/sleepy3/
+#   - parser = minipar (English)
+#     parser_path = /proj/llx/Software/Parsers/minipar-linux/
+#
+pos_tagger = treetagger
+pos_tagger_path = <%= File.expand_path('tools/treetagger/shal-ger') %>
+lemmatizer = treetagger
+lemmatizer_path = <%= File.expand_path('tools/treetagger/shal-ger') %>
+parser = berkeley
+parser_path = <%= File.expand_path('tools/berkeleyParser') %>
+# parser:
+# maximum no. of sentences in a parse file,
+# maximum sentence length to be parsed
+parser_max_sent_num = 2000
+parser_max_sent_len = 80

data/test/functional/sample_experiment_files/prp_test.salsa.rosy.standalone.erb ADDED Viewed

@@ -0,0 +1,120 @@
+# ID identifying this experiment and all its data
+# please do not use spaces inside the experiment ID
+prep_experiment_ID = prp_test
+# YOUR INPUT DATA:
+# frprep accepts an input directory rather than an input file.
+# It will process all files in the directory directory_input
+# and write the results to directory_preprocessed.
+#
+# For input formats see the discussion of "format" below.
+#directory_input = <%= File.expand_path('test/functional/input/frprep/test.salsa') %>
+directory_preprocessed = <%= File.expand_path('test/functional/input/rosy/frprep/test.salsa') %>
+##
+# Experimental data is described by the following parameters:
+#
+# - language: en / de
+#    en for English or de for German
+#
+# - format:  SalsaTigerXML / FNXml / SalsaTab / BNC / Plain
+#
+#    Format of the input data, training/test set
+#    SalsaTigerXML:  Parsed data, English or German
+#    FNXml:          FrameNet Lexical Unit files in FrameNet XML format
+#    FNCorpusXML:    FrameNet files in the FrameNet corpus XML format
+#    SalsaTab:       tabular format (internal)
+#    BNC             BNC XML format, alternating words and POS tags
+#    Plain           Plain text, ONE SENTENCE PER LINE.
+#
+#    Preprocessing transforms all data to SalsaTigerXML.
+#
+# - origin:  SalsaTiger / FrameNet / <not specified>
+#    This is the origin of the training/test data.
+#    SalsaTiger: data from the Tiger corpus, possibly semantically
+#                annotated by Salsa
+#    FrameNet: data from the FrameNet project
+#
+#    Don't set 'origin' if none of these origins apply
+#
+# - encoding: utf8 / iso / hex / <not specified>
+#                 Default: iso
+language = de
+#origin =
+format = Plain
+encoding = iso
+#############################
+# Which preprocessing steps to take?
+#
+# Data can be parsed, lemmatized and POS-tagged,
+# but this happens only if it is specified in the
+# experiment file.
+#
+# Set these booleans to true to trigger the respective
+# type of preprocessing. The default value is false.
+do_lemmatize = true
+do_postag = false
+do_parse = true
+#############################
+# directory where frprep puts its internal data
+#
+#frprep_directory = <%= File.expand_path('test/functional/input/rosy/frprep') %>
+#############################
+# Syntax/semantics interface repair:
+# FrameNet annotated data has some annotation choices
+# that may make it harder to learn the mapping from
+# syntactic structure to semantic roles.
+#
+# If you are using FrameNet data for training a
+# semantic role labeler, set the following two settings
+# to true (default is false) to 'repair' semantic role labels
+# to closer match the syntactic structure
+fe_syn_repair = true
+fe_rel_repair = false
+#################
+# Location of tools and resources used by Fred
+# currently known to the system:
+# (Saarbruecken paths given)
+#
+# - POS tagging:
+#   - pos_tagger = treetagger
+#     pos_tagger_path = /proj/llx/Software/treetagger/cmd/tree-tagger-english-notokenisation
+#
+# - Lemmatization:
+#   - lemmatizer = treetagger
+#     lemmatizer_path = /proj/llx/Software/treetagger/cmd/tree-tagger-english-notokenisation
+#     lemmatizer_path = /proj/llx/Software/treetagger/cmd/tree-tagger-german-notokenisation
+#
+# - Parser:
+#   - parser = collins  (English)
+#     parser_path = /proj/llx/Software/Parsers/COLLINS-PARSER/
+#   - parser = sleepy   (German)
+#     parser_path = /proj/corpora/sleepy3/
+#   - parser = minipar (English)
+#     parser_path = /proj/llx/Software/Parsers/minipar-linux/
+#
+pos_tagger = treetagger
+pos_tagger_path = <%= File.expand_path('tools/treetagger/shal-ger') %>
+lemmatizer = treetagger
+lemmatizer_path = <%= File.expand_path('tools/treetagger/shal-ger') %>
+parser = berkeley
+parser_path = <%= File.expand_path('tools/berkeleyParser') %>
+# parser:
+# maximum no. of sentences in a parse file,
+# maximum sentence length to be parsed
+parser_max_sent_num = 2000
+parser_max_sent_len = 80

data/test/functional/sample_experiment_files/prp_train.salsa.erb ADDED Viewed

@@ -0,0 +1,138 @@
+#################################################
+# This is a sample experiment file
+# with explanations of all features
+# that can be set for the frprep preprocessing system for Fred and Rosy.
+#
+# To start your own experiment,
+# replace all occurrences of
+# %...% by values of your choice.
+#
+# Boolean features may be omitted and are false by default.
+#
+# Experiment file lines that start with '#'
+# are comments and are ignored. Empty lines are ignored as well.
+########################
+# Experiment description
+#
+# ID identifying this experiment and all its data
+# please do not use spaces inside the experiment ID
+prep_experiment_ID = prp_train
+# YOUR INPUT DATA:
+# frprep accepts an input directory rather than an input file.
+# It will process all files in the directory directory_input
+# and write the results to directory_preprocessed.
+#
+# For input formats see the discussion of "format" below.
+directory_input = <%= File.expand_path('test/functional/input/frprep/train.salsa') %>
+directory_preprocessed = <%= File.expand_path('test/functional/output/frprep/train.salsa') %>
+##
+# Experimental data is described by the following parameters:
+#
+# - language: en / de
+#    en for English or de for German
+#
+# - format:  SalsaTigerXML / FNXml / SalsaTab / BNC / Plain
+#
+#    Format of the input data, training/test set
+#    SalsaTigerXML:  Parsed data, English or German
+#    FNXml:          FrameNet Lexical Unit files in FrameNet XML format
+#    FNCorpusXML:    FrameNet files in the FrameNet corpus XML format
+#    SalsaTab:       tabular format (internal)
+#    BNC             BNC XML format, alternating words and POS tags
+#    Plain           Plain text, ONE SENTENCE PER LINE.
+#
+#    Preprocessing transforms all data to SalsaTigerXML.
+#
+# - origin:  SalsaTiger / FrameNet / <not specified>
+#    This is the origin of the training/test data.
+#    SalsaTiger: data from the Tiger corpus, possibly semantically
+#                annotated by Salsa
+#    FrameNet: data from the FrameNet project
+#
+#    Don't set 'origin' if none of these origins apply
+#
+# - encoding: utf8 / iso / hex / <not specified>
+#                 Default: iso
+language = de
+#origin =
+format = SalsaTigerXML
+encoding = utf8
+#############################
+# Which preprocessing steps to take?
+#
+# Data can be parsed, lemmatized and POS-tagged,
+# but this happens only if it is specified in the
+# experiment file.
+#
+# Set these booleans to true to trigger the respective
+# type of preprocessing. The default value is false.
+do_lemmatize = true
+do_postag = false
+do_parse = true
+#############################
+# directory where frprep puts its internal data
+#
+frprep_directory = <%= File.expand_path('test/functional/output/') %>
+#############################
+# Syntax/semantics interface repair:
+# FrameNet annotated data has some annotation choices
+# that may make it harder to learn the mapping from
+# syntactic structure to semantic roles.
+#
+# If you are using FrameNet data for training a
+# semantic role labeler, set the following two settings
+# to true (default is false) to 'repair' semantic role labels
+# to closer match the syntactic structure
+fe_syn_repair = true
+fe_rel_repair = false
+#################
+# Location of tools and resources used by Fred
+# currently known to the system:
+# (Saarbruecken paths given)
+#
+# - POS tagging:
+#   - pos_tagger = treetagger
+#     pos_tagger_path = /proj/llx/Software/treetagger/cmd/tree-tagger-english-notokenisation
+#
+# - Lemmatization:
+#   - lemmatizer = treetagger
+#     lemmatizer_path = /proj/llx/Software/treetagger/cmd/tree-tagger-english-notokenisation
+#     lemmatizer_path = /proj/llx/Software/treetagger/cmd/tree-tagger-german-notokenisation
+#
+# - Parser:
+#   - parser = collins  (English)
+#     parser_path = /proj/llx/Software/Parsers/COLLINS-PARSER/
+#   - parser = sleepy   (German)
+#     parser_path = /proj/corpora/sleepy3/
+#   - parser = minipar (English)
+#     parser_path = /proj/llx/Software/Parsers/minipar-linux/
+#
+pos_tagger = treetagger
+pos_tagger_path = <%= File.expand_path('tools/treetagger/shal-ger') %>
+lemmatizer = treetagger
+lemmatizer_path = <%= File.expand_path('tools/treetagger/shal-ger') %>
+parser = berkeley
+parser_path = <%= File.expand_path('tools/berkeleyParser') %>
+# parser:
+# maximum no. of sentences in a parse file,
+# maximum sentence length to be parsed
+parser_max_sent_num = 2000
+parser_max_sent_len = 80

data/test/functional/sample_experiment_files/prp_train.salsa.fred.standalone.erb ADDED Viewed

@@ -0,0 +1,138 @@
+#################################################
+# This is a sample experiment file
+# with explanations of all features
+# that can be set for the frprep preprocessing system for Fred and Rosy.
+#
+# To start your own experiment,
+# replace all occurrences of
+# %...% by values of your choice.
+#
+# Boolean features may be omitted and are false by default.
+#
+# Experiment file lines that start with '#'
+# are comments and are ignored. Empty lines are ignored as well.
+########################
+# Experiment description
+#
+# ID identifying this experiment and all its data
+# please do not use spaces inside the experiment ID
+prep_experiment_ID = prp_train
+# YOUR INPUT DATA:
+# frprep accepts an input directory rather than an input file.
+# It will process all files in the directory directory_input
+# and write the results to directory_preprocessed.
+#
+# For input formats see the discussion of "format" below.
+#directory_input = <%= File.expand_path('test/functional/input/frprep/train.salsa') %>
+directory_preprocessed = <%= File.expand_path('test/functional/input/fred/frprep/train.salsa') %>
+##
+# Experimental data is described by the following parameters:
+#
+# - language: en / de
+#    en for English or de for German
+#
+# - format:  SalsaTigerXML / FNXml / SalsaTab / BNC / Plain
+#
+#    Format of the input data, training/test set
+#    SalsaTigerXML:  Parsed data, English or German
+#    FNXml:          FrameNet Lexical Unit files in FrameNet XML format
+#    FNCorpusXML:    FrameNet files in the FrameNet corpus XML format
+#    SalsaTab:       tabular format (internal)
+#    BNC             BNC XML format, alternating words and POS tags
+#    Plain           Plain text, ONE SENTENCE PER LINE.
+#
+#    Preprocessing transforms all data to SalsaTigerXML.
+#
+# - origin:  SalsaTiger / FrameNet / <not specified>
+#    This is the origin of the training/test data.
+#    SalsaTiger: data from the Tiger corpus, possibly semantically
+#                annotated by Salsa
+#    FrameNet: data from the FrameNet project
+#
+#    Don't set 'origin' if none of these origins apply
+#
+# - encoding: utf8 / iso / hex / <not specified>
+#                 Default: iso
+language = de
+#origin =
+format = SalsaTigerXML
+encoding = utf8
+#############################
+# Which preprocessing steps to take?
+#
+# Data can be parsed, lemmatized and POS-tagged,
+# but this happens only if it is specified in the
+# experiment file.
+#
+# Set these booleans to true to trigger the respective
+# type of preprocessing. The default value is false.
+do_lemmatize = true
+do_postag = false
+do_parse = true
+#############################
+# directory where frprep puts its internal data
+#
+#frprep_directory = <%= File.expand_path('test/functional/input/fred/') %>
+#############################
+# Syntax/semantics interface repair:
+# FrameNet annotated data has some annotation choices
+# that may make it harder to learn the mapping from
+# syntactic structure to semantic roles.
+#
+# If you are using FrameNet data for training a
+# semantic role labeler, set the following two settings
+# to true (default is false) to 'repair' semantic role labels
+# to closer match the syntactic structure
+fe_syn_repair = true
+fe_rel_repair = false
+#################
+# Location of tools and resources used by Fred
+# currently known to the system:
+# (Saarbruecken paths given)
+#
+# - POS tagging:
+#   - pos_tagger = treetagger
+#     pos_tagger_path = /proj/llx/Software/treetagger/cmd/tree-tagger-english-notokenisation
+#
+# - Lemmatization:
+#   - lemmatizer = treetagger
+#     lemmatizer_path = /proj/llx/Software/treetagger/cmd/tree-tagger-english-notokenisation
+#     lemmatizer_path = /proj/llx/Software/treetagger/cmd/tree-tagger-german-notokenisation
+#
+# - Parser:
+#   - parser = collins  (English)
+#     parser_path = /proj/llx/Software/Parsers/COLLINS-PARSER/
+#   - parser = sleepy   (German)
+#     parser_path = /proj/corpora/sleepy3/
+#   - parser = minipar (English)
+#     parser_path = /proj/llx/Software/Parsers/minipar-linux/
+#
+pos_tagger = treetagger
+pos_tagger_path = <%= File.expand_path('tools/treetagger/shal-ger') %>
+lemmatizer = treetagger
+lemmatizer_path = <%= File.expand_path('tools/treetagger/shal-ger') %>
+parser = berkeley
+parser_path = <%= File.expand_path('tools/berkeleyParser') %>
+# parser:
+# maximum no. of sentences in a parse file,
+# maximum sentence length to be parsed
+parser_max_sent_num = 2000
+parser_max_sent_len = 80