nltkor 1.2.14__cp311-cp311-macosx_13_0_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. nltkor/Kor_char.py +193 -0
  2. nltkor/__init__.py +16 -0
  3. nltkor/alignment/__init__.py +1315 -0
  4. nltkor/cider/__init__.py +2 -0
  5. nltkor/cider/cider.py +55 -0
  6. nltkor/cider/cider_scorer.py +207 -0
  7. nltkor/distance/__init__.py +441 -0
  8. nltkor/distance/wasserstein.py +126 -0
  9. nltkor/etc.py +22 -0
  10. nltkor/lazyimport.py +144 -0
  11. nltkor/make_requirement.py +11 -0
  12. nltkor/metrics/__init__.py +63 -0
  13. nltkor/metrics/bartscore.py +301 -0
  14. nltkor/metrics/bertscore.py +331 -0
  15. nltkor/metrics/bleu_tensor.py +20 -0
  16. nltkor/metrics/classical.py +847 -0
  17. nltkor/metrics/entment.py +24 -0
  18. nltkor/metrics/eval.py +517 -0
  19. nltkor/metrics/mauve.py +273 -0
  20. nltkor/metrics/mauve_utils.py +131 -0
  21. nltkor/misc/__init__.py +11 -0
  22. nltkor/misc/string2string_basic_functions.py +59 -0
  23. nltkor/misc/string2string_default_tokenizer.py +83 -0
  24. nltkor/misc/string2string_hash_functions.py +159 -0
  25. nltkor/misc/string2string_word_embeddings.py +503 -0
  26. nltkor/search/__init__.py +10 -0
  27. nltkor/search/classical.py +569 -0
  28. nltkor/search/faiss_search.py +787 -0
  29. nltkor/search/kobert_tokenizer.py +181 -0
  30. nltkor/sejong/__init__.py +3 -0
  31. nltkor/sejong/__pycache__/__init__.cpython-38.pyc +0 -0
  32. nltkor/sejong/__pycache__/__init__.cpython-39.pyc +0 -0
  33. nltkor/sejong/__pycache__/sejong_download.cpython-38.pyc +0 -0
  34. nltkor/sejong/__pycache__/sejong_download.cpython-39.pyc +0 -0
  35. nltkor/sejong/__pycache__/ssem.cpython-38.pyc +0 -0
  36. nltkor/sejong/__pycache__/ssem.cpython-39.pyc +0 -0
  37. nltkor/sejong/ch.py +12 -0
  38. nltkor/sejong/dict_semClassNum.txt +491 -0
  39. nltkor/sejong/layer.txt +630 -0
  40. nltkor/sejong/sejong_download.py +87 -0
  41. nltkor/sejong/ssem.py +684 -0
  42. nltkor/similarity/__init__.py +3 -0
  43. nltkor/similarity/bartscore____.py +337 -0
  44. nltkor/similarity/bertscore____.py +339 -0
  45. nltkor/similarity/classical.py +245 -0
  46. nltkor/similarity/cosine_similarity.py +175 -0
  47. nltkor/tag/__init__.py +71 -0
  48. nltkor/tag/__pycache__/__init__.cpython-38.pyc +0 -0
  49. nltkor/tag/__pycache__/__init__.cpython-39.pyc +0 -0
  50. nltkor/tag/__pycache__/espresso_tag.cpython-38.pyc +0 -0
  51. nltkor/tag/__pycache__/espresso_tag.cpython-39.pyc +0 -0
  52. nltkor/tag/espresso_tag.py +220 -0
  53. nltkor/tag/libs/__init__.py +10 -0
  54. nltkor/tag/libs/__pycache__/__init__.cpython-38.pyc +0 -0
  55. nltkor/tag/libs/__pycache__/__init__.cpython-39.pyc +0 -0
  56. nltkor/tag/libs/__pycache__/attributes.cpython-38.pyc +0 -0
  57. nltkor/tag/libs/__pycache__/attributes.cpython-39.pyc +0 -0
  58. nltkor/tag/libs/__pycache__/config.cpython-38.pyc +0 -0
  59. nltkor/tag/libs/__pycache__/config.cpython-39.pyc +0 -0
  60. nltkor/tag/libs/__pycache__/metadata.cpython-38.pyc +0 -0
  61. nltkor/tag/libs/__pycache__/metadata.cpython-39.pyc +0 -0
  62. nltkor/tag/libs/__pycache__/reader.cpython-38.pyc +0 -0
  63. nltkor/tag/libs/__pycache__/reader.cpython-39.pyc +0 -0
  64. nltkor/tag/libs/__pycache__/taggers.cpython-38.pyc +0 -0
  65. nltkor/tag/libs/__pycache__/taggers.cpython-39.pyc +0 -0
  66. nltkor/tag/libs/__pycache__/utils.cpython-38.pyc +0 -0
  67. nltkor/tag/libs/__pycache__/utils.cpython-39.pyc +0 -0
  68. nltkor/tag/libs/__pycache__/word_dictionary.cpython-38.pyc +0 -0
  69. nltkor/tag/libs/__pycache__/word_dictionary.cpython-39.pyc +0 -0
  70. nltkor/tag/libs/arguments.py +280 -0
  71. nltkor/tag/libs/attributes.py +231 -0
  72. nltkor/tag/libs/config.py +159 -0
  73. nltkor/tag/libs/metadata.py +129 -0
  74. nltkor/tag/libs/ner/__init__.py +2 -0
  75. nltkor/tag/libs/ner/__pycache__/__init__.cpython-38.pyc +0 -0
  76. nltkor/tag/libs/ner/__pycache__/__init__.cpython-39.pyc +0 -0
  77. nltkor/tag/libs/ner/__pycache__/ner_reader.cpython-38.pyc +0 -0
  78. nltkor/tag/libs/ner/__pycache__/ner_reader.cpython-39.pyc +0 -0
  79. nltkor/tag/libs/ner/macmorphoreader.py +7 -0
  80. nltkor/tag/libs/ner/ner_reader.py +92 -0
  81. nltkor/tag/libs/network.c +72325 -0
  82. nltkor/tag/libs/network.cpython-311-darwin.so +0 -0
  83. nltkor/tag/libs/network.pyx +878 -0
  84. nltkor/tag/libs/networkconv.pyx +1028 -0
  85. nltkor/tag/libs/networkdependencyconv.pyx +451 -0
  86. nltkor/tag/libs/parse/__init__.py +1 -0
  87. nltkor/tag/libs/parse/__pycache__/__init__.cpython-38.pyc +0 -0
  88. nltkor/tag/libs/parse/__pycache__/__init__.cpython-39.pyc +0 -0
  89. nltkor/tag/libs/parse/__pycache__/parse_reader.cpython-38.pyc +0 -0
  90. nltkor/tag/libs/parse/__pycache__/parse_reader.cpython-39.pyc +0 -0
  91. nltkor/tag/libs/parse/parse_reader.py +283 -0
  92. nltkor/tag/libs/pos/__init__.py +2 -0
  93. nltkor/tag/libs/pos/__pycache__/__init__.cpython-38.pyc +0 -0
  94. nltkor/tag/libs/pos/__pycache__/__init__.cpython-39.pyc +0 -0
  95. nltkor/tag/libs/pos/__pycache__/pos_reader.cpython-38.pyc +0 -0
  96. nltkor/tag/libs/pos/__pycache__/pos_reader.cpython-39.pyc +0 -0
  97. nltkor/tag/libs/pos/macmorphoreader.py +7 -0
  98. nltkor/tag/libs/pos/pos_reader.py +97 -0
  99. nltkor/tag/libs/reader.py +485 -0
  100. nltkor/tag/libs/srl/__init__.py +3 -0
  101. nltkor/tag/libs/srl/__pycache__/__init__.cpython-38.pyc +0 -0
  102. nltkor/tag/libs/srl/__pycache__/__init__.cpython-39.pyc +0 -0
  103. nltkor/tag/libs/srl/__pycache__/srl_reader.cpython-38.pyc +0 -0
  104. nltkor/tag/libs/srl/__pycache__/srl_reader.cpython-39.pyc +0 -0
  105. nltkor/tag/libs/srl/__pycache__/train_srl.cpython-38.pyc +0 -0
  106. nltkor/tag/libs/srl/__pycache__/train_srl.cpython-39.pyc +0 -0
  107. nltkor/tag/libs/srl/__srl_reader_.py +535 -0
  108. nltkor/tag/libs/srl/srl_reader.py +436 -0
  109. nltkor/tag/libs/srl/train_srl.py +87 -0
  110. nltkor/tag/libs/taggers.py +926 -0
  111. nltkor/tag/libs/utils.py +384 -0
  112. nltkor/tag/libs/word_dictionary.py +239 -0
  113. nltkor/tag/libs/wsd/__init__.py +2 -0
  114. nltkor/tag/libs/wsd/__pycache__/__init__.cpython-38.pyc +0 -0
  115. nltkor/tag/libs/wsd/__pycache__/__init__.cpython-39.pyc +0 -0
  116. nltkor/tag/libs/wsd/__pycache__/wsd_reader.cpython-38.pyc +0 -0
  117. nltkor/tag/libs/wsd/__pycache__/wsd_reader.cpython-39.pyc +0 -0
  118. nltkor/tag/libs/wsd/macmorphoreader.py +7 -0
  119. nltkor/tag/libs/wsd/wsd_reader.py +93 -0
  120. nltkor/tokenize/__init__.py +62 -0
  121. nltkor/tokenize/ko_tokenize.py +115 -0
  122. nltkor/trans.py +121 -0
  123. nltkor-1.2.14.dist-info/LICENSE.txt +1093 -0
  124. nltkor-1.2.14.dist-info/METADATA +41 -0
  125. nltkor-1.2.14.dist-info/RECORD +127 -0
  126. nltkor-1.2.14.dist-info/WHEEL +5 -0
  127. nltkor-1.2.14.dist-info/top_level.txt +1 -0
@@ -0,0 +1,220 @@
1
+ import os
2
+
3
+ # import sys
4
+ # sys.path.append("/Users/dowon/nltk_ko/nltk/tag")
5
+ # from libs import *
6
+
7
+ """
8
+ This script will run a POS or SRL tagger on the input data and print the results
9
+ to stdout.
10
+ """
11
+
12
+ import argparse
13
+ import logging
14
+
15
+ if __package__ is None: # 스크립트로 실행할 때
16
+ import sys
17
+ from os import path
18
+ #print(path.dirname( path.dirname( path.abspath(__file__) ) ))
19
+ sys.path.append(path.dirname( path.dirname( path.abspath(__file__) ) ))
20
+ from libs import *
21
+ else:
22
+ from .libs import *
23
+ #from .libs import *
24
+ import requests
25
+ import zipfile
26
+
27
+ logging.disable(logging.INFO) # disable INFO and DEBUG logging everywhere
28
+
29
+
30
+ class EspressoTagger:
31
+ def __init__(self, data_dir=None, task="pos"):
32
+ self.data_dir = data_dir
33
+ if data_dir == None:
34
+ path=os.path.dirname(__file__)
35
+ path= path + '/data'
36
+ self.data_dir = path
37
+ #print(path)
38
+ self.path = ""
39
+ self.tagger = None
40
+
41
+ self.task = task.lower()
42
+ if not self._check_model():
43
+ self._download_model()
44
+
45
+ set_data_dir(self.data_dir)
46
+
47
+ if self.task == 'pos':
48
+ self.tagger = taggers.POSTagger(data_dir=self.data_dir)
49
+ elif self.task == 'ner':
50
+ self.tagger = taggers.NERTagger(data_dir=self.data_dir)
51
+ elif self.task == 'wsd':
52
+ self.tagger = taggers.WSDTagger(data_dir=self.data_dir)
53
+ elif self.task == 'srl':
54
+ self.tagger = taggers.SRLTagger(data_dir=self.data_dir)
55
+ elif self.task == 'dependency':
56
+ self.tagger = taggers.DependencyParser(data_dir=self.data_dir)
57
+ else:
58
+ raise ValueError('Unknown task: %s' % self.task)
59
+
60
+
61
+ def tag(self, text, use_sent_tokenizer=True, lemma=True):
62
+ """
63
+ This function provides an interactive environment for running the system.
64
+ It receives text from the standard input, tokenizes it, and calls the function
65
+ given as a parameter to produce an answer.
66
+
67
+ :param task: 'pos', ner', 'wsd', 'srl' or 'dependency'
68
+ :param use_tokenizer: whether to use built-in tokenizer
69
+ """
70
+
71
+ #use_sent_tokenizer = not use_sent_tokenizer
72
+ mode = 'standard' if lemma else 'eumjeol'
73
+ result = self.tagger.tag(text, use_sent_tokenizer, mode)
74
+ '''
75
+ else:
76
+ tokens = text.split()
77
+ if self.task != 'dependency':
78
+ result = [self.tagger.tag_tokens(tokens, True)]
79
+ else:
80
+ result = [self.tagger.tag_tokens(tokens)]
81
+ '''
82
+
83
+ return self._result_tagged(result, self.task)
84
+
85
+ def _result_tagged(self, tagged_sents, task):
86
+ """
87
+ Prints the tagged text to stdout.
88
+
89
+ :param tagged_sents: sentences tagged according to any of espresso taggers.
90
+ :param task: the tagging task (either 'pos', 'ner', 'wsd', 'srl' or 'dependency')
91
+ """
92
+
93
+ ##TODO: print부분 return으로 변경
94
+ if task == 'pos':
95
+ return self._return_tagged_pos(tagged_sents)
96
+ elif task == 'ner':
97
+ return self._return_tagged_ner(tagged_sents)
98
+ elif task == 'wsd':
99
+ return self._return_tagged_wsd(tagged_sents)
100
+ elif task == 'srl':
101
+ return self._return_tagged_srl(tagged_sents)
102
+ elif task == 'dependency':
103
+ return self._return_parsed_dependency(tagged_sents)
104
+ else:
105
+ raise ValueError('Unknown task: %s' % task)
106
+
107
+
108
+
109
+ def _return_parsed_dependency(self, parsed_sents):
110
+ """Prints one token per line and its head"""
111
+ result = []
112
+ temp_list = []
113
+ temp_list2 = []
114
+ for sent in parsed_sents:
115
+ temp_list = sent.to_conll().split('\t')
116
+ temp_list = temp_list[1:]
117
+ for ele in temp_list:
118
+ if '\n' in ele:
119
+ ele = ele[:ele.find('\n')]
120
+ temp_list2.append(ele)
121
+ result.append(self._dependency_after(temp_list2)[:])
122
+ temp_list2 = []
123
+
124
+ return result
125
+
126
+ def _return_tagged_pos(self, tagged_sents):
127
+ """Prints one sentence per line as token_tag"""
128
+ result = []
129
+ for sent in tagged_sents:
130
+ result = result + list(sent)
131
+ return result
132
+
133
+ def _return_tagged_srl(self, tagged_sents):
134
+ result = []
135
+ for sent in tagged_sents:
136
+ # print (' '.join(sent.tokens))
137
+ temp_dict1 = {}
138
+ for predicate, arg_structure in sent.arg_structures:
139
+ # print ("test 1 :", predicate)
140
+ # print("te22 :", arg_structure)
141
+
142
+ temp_dict2 = {}
143
+ for label in arg_structure:
144
+ argument = ' '.join(arg_structure[label])
145
+ # line = '\t%s: %s' % (label, argument)
146
+ # print (line)
147
+ temp_dict2[label] = argument
148
+
149
+ # result.append((label, argument))
150
+ # print ('\n')
151
+ temp_dict1[predicate] = temp_dict2
152
+
153
+ result.append(temp_dict1)
154
+
155
+ return result
156
+
157
+ def _return_tagged_ner(self, tagged_sents):
158
+ """Prints one sentence per line as token_tag"""
159
+ result = []
160
+ for sent in tagged_sents:
161
+ for item in sent:
162
+ #s = '_'.join(item)
163
+ result.append(item)
164
+
165
+ return result
166
+
167
+ def _return_tagged_wsd(self, tagged_sents):
168
+ """Prints one sentence per line as token_tag"""
169
+ result = []
170
+ for sent in tagged_sents:
171
+ for item in sent:
172
+ s = '_'.join(item)
173
+ result.append(s)
174
+
175
+ return result
176
+
177
+ def _download_model(self):
178
+ """Downloads the model from the server"""
179
+ temp_path = os.path.dirname(__file__) + '/data.zip'
180
+ url = "https://air.changwon.ac.kr/~airdemo/storage/espresso_data_1/data.zip"
181
+ print("Downloading Espresso5 model...")
182
+ with requests.get(url, stream=True) as r:
183
+ r.raise_for_status()
184
+ with open(temp_path, 'wb') as f:
185
+ for chunk in r.iter_content(chunk_size=8192):
186
+ # If you have chunk encoded response uncomment if
187
+ # and set chunk_size parameter to None.
188
+ #if chunk:
189
+ f.write(chunk)
190
+
191
+ if os.path.exists(self.data_dir):
192
+ os.rmdir(self.data_dir)
193
+
194
+ with zipfile.ZipFile(temp_path, "r") as zip_ref:
195
+ zip_ref.extractall(os.path.dirname(__file__))
196
+
197
+ def _check_model(self):
198
+ """Checks if the model is available and downloads it if necessary"""
199
+ if not os.path.exists(self.data_dir):
200
+ return False
201
+ else:
202
+ return True
203
+
204
+ def _dependency_after(self, list):
205
+ len_list = len(list)
206
+ temp_list = []
207
+ repeat = len_list//3
208
+ for i in range(repeat):
209
+ index = i*3
210
+ tup1 = (i+1, )
211
+ tup2 = tuple(list[index:index+3])
212
+ tup = tup1 + tup2
213
+ temp_list.append(tup[:])
214
+
215
+
216
+ return temp_list
217
+
218
+ if __name__ == '__main__':
219
+ tagger = EspressoTagger(task='pos')
220
+ print(tagger.tag("나는 아름다운 강산에 살고있다."))
@@ -0,0 +1,10 @@
1
+ # import to provide easier access for nlpnet users
2
+ from .config import set_data_dir
3
+ from . import taggers
4
+ from . import utils
5
+
6
+ from .taggers import POSTagger, NERTagger, WSDTagger, SRLTagger, DependencyParser
7
+ from .utils import tokenize
8
+ from .utils import PickleConverter
9
+
10
+ __version__ = '1.2.0'
@@ -0,0 +1,280 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ """
4
+ Code for argument parsing and a few verifications.
5
+ These arguments are used by the training script.
6
+ """
7
+
8
+ import argparse
9
+
10
+
11
+ def fill_defaults(args, defaults_per_task):
12
+ """
13
+ This function fills arguments not explicitly set (left as None)
14
+ with default values according to the chosen task.
15
+
16
+ We can't rely on argparse to it because using subparsers with
17
+ set_defaults and a parent parser overwrites the defaults.
18
+ """
19
+ task = args.task
20
+ defaults = defaults_per_task[task]
21
+ for arg in args.__dict__:
22
+ if getattr(args, arg) is None and arg in defaults:
23
+ setattr(args, arg, defaults[arg])
24
+
25
+
26
+ def get_args():
27
+ parser = argparse.ArgumentParser(description="Train the Espresso "\
28
+ "for a given NLP task.")
29
+ subparsers = parser.add_subparsers(title='Tasks',
30
+ dest='task',
31
+ description='Task to train the Espresso for. '\
32
+ 'Type %(prog)s [TASK] -h to get task-specific help.')
33
+
34
+ defaults = {}
35
+
36
+ # base parser with arguments not related to any model
37
+ base_parser = argparse.ArgumentParser(add_help=False)
38
+ base_parser.add_argument('-f', '--num_features', type=int,
39
+ help='Number of features per word '\
40
+ '(used to generate random vectors)',
41
+ default=50, dest='num_features')
42
+ base_parser.add_argument('--load_network', action='store_true',
43
+ help='Load previously saved network')
44
+ base_parser.add_argument('--load_features', action='store_true',
45
+ help="Load previously saved word type features "\
46
+ "(overrides -f and must also load a vocabulary file)",
47
+ dest='load_types')
48
+ base_parser.add_argument('-v', '--verbose', help='Verbose mode',
49
+ action="store_true")
50
+ base_parser.add_argument('--gold', help='File with annotated data for training.',
51
+ type=str, required=True)
52
+ base_parser.add_argument('--data', help='Directory to save new models and load '\
53
+ 'partially trained ones (default: current directory)', type=str,
54
+ default='./data/')
55
+ base_parser.add_argument('--dev', help='Development (validation) data. If not given, '\
56
+ 'training data will be used to evaluate performance.',
57
+ default=None)
58
+
59
+ # parser with network arguments shared among most tasks
60
+ # each task-specific parser may define defaults
61
+ network_parser = argparse.ArgumentParser(add_help=False, parents=[base_parser])
62
+
63
+ network_parser.add_argument('-w', '--window', type=int,
64
+ help='Size of the word window',
65
+ dest='window') # 반드시 홀수로 할 것
66
+ network_parser.add_argument('-e', '--epochs', type=int, dest='iterations',
67
+ help='Number of training epochs')
68
+ network_parser.add_argument('-l', '--learning_rate', type=float,
69
+ help='Learning rate for network connections',
70
+ dest='learning_rate')
71
+ network_parser.add_argument('--lf', type=float,
72
+ help='Learning rate for features',
73
+ dest='learning_rate_features')
74
+ network_parser.add_argument('--lt', type=float,
75
+ help='Learning rate for tag transitions',
76
+ dest='learning_rate_transitions')
77
+ network_parser.add_argument('--decay', type=float, const=1, nargs='?', default=None,
78
+ help='Use learning rate decay. Optionally, '\
79
+ 'supply decay factor (default 1)')
80
+ network_parser.add_argument('-a', '--accuracy', type=float,
81
+ help='Maximum desired accuracy per token.',
82
+ default=0, dest='accuracy')
83
+ network_parser.add_argument('-n', '--hidden', type=int,
84
+ help='Number of hidden neurons',
85
+ dest='hidden')
86
+ network_parser.add_argument('--caps', const=5, nargs='?', type=int, default=None,
87
+ help='Include capitalization features. '\
88
+ 'Optionally, supply the number of features (default 5)')
89
+
90
+
91
+ # parser with arguments shared among convolutional-based tasks
92
+ conv_parser = argparse.ArgumentParser(add_help=False)
93
+ conv_parser.add_argument('-c', '--convolution', type=int,
94
+ help='Number of convolution neurons',
95
+ dest='convolution')
96
+ conv_parser.add_argument('--pos', const=5, nargs='?', type=int, default=None,
97
+ help='Include part-of-speech features. '\
98
+ 'Optionally, supply the number of features (default 5)')
99
+ conv_parser.add_argument('--max_dist', type=int, default=10,
100
+ help='Maximum distance to have its own feature vector')
101
+ conv_parser.add_argument('--target_features', type=int, default=5,
102
+ help='Number of features for distance to target word')
103
+ conv_parser.add_argument('--pred_features', type=int, default=5,
104
+ help='Number of features for distance to predicate')
105
+
106
+
107
+ # POS argument parser
108
+ parser_pos = subparsers.add_parser('pos', help='POS tagging',
109
+ parents=[network_parser])
110
+ parser_pos.add_argument('--suffix', const=2, nargs='?', type=int, default=None,
111
+ help='Include suffix features. Optionally, '\
112
+ 'supply the number of features (default 2)')
113
+ parser_pos.add_argument('--suffix_size', type=int, default=5,
114
+ help='Use suffixes up to this size (in characters, default 5). '\
115
+ 'Only used if --suffix is supplied')
116
+ parser_pos.add_argument('--prefix', const=2, nargs='?', type=int, default=None,
117
+ help='Include prefix features. Optionally, '\
118
+ 'supply the number of features (default 2)')
119
+ parser_pos.add_argument('--prefix_size', type=int, default=5,
120
+ help='Use prefixes up to this size (in characters, default 5). '\
121
+ 'Only used if --suffix is supplied')
122
+ defaults['pos'] = dict(window=9, hidden=300, iterations=20,
123
+ learning_rate=0.001, learning_rate_features=0.001,
124
+ learning_rate_transitions=0.001 )
125
+
126
+ # NER argument parser
127
+ parser_ner = subparsers.add_parser('ner', help='Named Entity Tagging',
128
+ parents=[network_parser])
129
+ parser_ner.add_argument('--suffix', const=2, nargs='?', type=int, default=None,
130
+ help='Include suffix features. Optionally, '\
131
+ 'supply the number of features (default 2)')
132
+ parser_ner.add_argument('--suffix_size', type=int, default=5,
133
+ help='Use suffixes up to this size (in characters, default 5). '\
134
+ 'Only used if --suffix is supplied')
135
+ parser_ner.add_argument('--prefix', const=2, nargs='?', type=int, default=None,
136
+ help='Include prefix features. Optionally, '\
137
+ 'supply the number of features (default 2)')
138
+ parser_ner.add_argument('--prefix_size', type=int, default=5,
139
+ help='Use prefixes up to this size (in characters, default 5). '\
140
+ 'Only used if --suffix is supplied')
141
+ defaults['ner'] = dict(window=5, hidden=300, iterations=20,
142
+ learning_rate=0.001, learning_rate_features=0.001,
143
+ learning_rate_transitions=0.001)
144
+
145
+ # WSD argument parser
146
+ parser_wsd = subparsers.add_parser('wsd', help='Word Sense Disambiguation',
147
+ parents=[network_parser])
148
+ parser_wsd.add_argument('--suffix', const=2, nargs='?', type=int, default=None,
149
+ help='Include suffix features. Optionally, '\
150
+ 'supply the number of features (default 2)')
151
+ parser_wsd.add_argument('--suffix_size', type=int, default=5,
152
+ help='Use suffixes up to this size (in characters, default 5). '\
153
+ 'Only used if --suffix is supplied')
154
+ parser_wsd.add_argument('--prefix', const=2, nargs='?', type=int, default=None,
155
+ help='Include prefix features. Optionally, '\
156
+ 'supply the number of features (default 2)')
157
+ parser_wsd.add_argument('--prefix_size', type=int, default=5,
158
+ help='Use prefixes up to this size (in characters, default 5). '\
159
+ 'Only used if --suffix is supplied')
160
+ defaults['wsd'] = dict(window=5, hidden=300, iterations=20,
161
+ learning_rate=0.001, learning_rate_features=0.001,
162
+ learning_rate_transitions=0.001)
163
+
164
+ # Chunk argument parser
165
+ #parser_ner = subparsers.add_parser('ner', help='Named Entity Tagging',
166
+ # parents=[network_parser])
167
+ #parser_ner.add_argument('--suffix', const=2, nargs='?', type=int, default=None,
168
+ # help='Include suffix features. Optionally, '\
169
+ # 'supply the number of features (default 2)')
170
+ #parser_ner.add_argument('--suffix_size', type=int, default=5,
171
+ # help='Use suffixes up to this size (in characters, default 5). '\
172
+ # 'Only used if --suffix is supplied')
173
+ #parser_ner.add_argument('--prefix', const=2, nargs='?', type=int, default=None,
174
+ # help='Include prefix features. Optionally, '\
175
+ # 'supply the number of features (default 2)')
176
+ #parser_ner.add_argument('--prefix_size', type=int, default=5,
177
+ # help='Use prefixes up to this size (in characters, default 5). '\
178
+ # 'Only used if --suffix is supplied')
179
+ #defaults['ner'] = dict(window=7, hidden=300, iterations=20,
180
+ # learning_rate=0.001, learning_rate_features=0.001,
181
+ # learning_rate_transitions=0.001)
182
+
183
+
184
+ # dependency
185
+ parser_dep = subparsers.add_parser('dependency', help='Dependency parsing')
186
+ dep_subparsers = parser_dep.add_subparsers(title='Dependency parsing training steps',
187
+ dest='subtask',
188
+ description='Which step of the dependency training '\
189
+ '(detecting edges or labeling them)')
190
+
191
+ dep_subparsers.add_parser('labeled', help='Labeling dependency edges',
192
+ parents=[network_parser, conv_parser])
193
+ dep_subparsers.add_parser('unlabeled', help='Dependency edge detection',
194
+ parents=[network_parser, conv_parser])
195
+
196
+ defaults['dependency_filter'] = dict()
197
+ defaults['labeled_dependency'] = dict(window=9, hidden=200, iterations=20,
198
+ learning_rate=0.001, learning_rate_features=0.001,
199
+ convolution=300, pos=50,
200
+ learning_rate_transitions=0.001)
201
+ defaults['unlabeled_dependency'] = dict(window=9, hidden=200, iterations=20,
202
+ learning_rate=0.001, learning_rate_features=0.001,
203
+ convolution=300, pos=50,
204
+ learning_rate_transitions=0.001)
205
+
206
+
207
+ # SRL argument parser
208
+ # There is another level of subparsers for predicate detection /
209
+ # argument boundary identification / argument classification /
210
+ # (id + class) in one step
211
+ parser_srl = subparsers.add_parser('srl', help='Semantic Role Labeling',
212
+ parents=[network_parser, conv_parser],
213
+ formatter_class=argparse.RawDescriptionHelpFormatter)
214
+ #parser_srl.set_defaults(identify=False, predicates=False, classify=False)
215
+
216
+ desc = '''SRL has 3 steps: predicate detection, argument identification and
217
+ argument classification. Each one depends on the one before.
218
+
219
+ You need one model trained for each subtask (or one for predicate
220
+ detection and another with the other 2 steps) in order to perform
221
+ full SRL.
222
+
223
+ Type %(prog)s [SUBTASK] -h to get subtask-specific help.'''
224
+
225
+ '''
226
+ srl_subparsers = parser_srl.add_subparsers(title='SRL subtasks',
227
+ dest='subtask',
228
+ description=desc)
229
+ srl_subparsers.add_parser('pred', help='Predicate identification',
230
+ parents=[network_parser])
231
+ defaults['srl_predicates'] = dict(window=5, hidden=50, iterations=1,
232
+ learning_rate=0.01, learning_rate_features=0.01,
233
+ learning_rate_transitions=0.01,
234
+ predicates=True)
235
+
236
+ srl_subparsers.add_parser('id', help='Argument identification',
237
+ parents=[network_parser, conv_parser])
238
+ defaults['srl_boundary'] = dict(window=3, hidden=150, convolution=150,
239
+ identify=True, iterations=15,
240
+ learning_rate=0.001, learning_rate_features=0.001,
241
+ learning_rate_transitions=0.001)
242
+
243
+ srl_subparsers.add_parser('class', help='Argument classification',
244
+ parents=[network_parser, conv_parser])
245
+ defaults['srl_classify'] = dict(window=3, hidden=0, convolution=100,
246
+ classify=True, iterations=3,
247
+ learning_rate=0.01, learning_rate_features=0.01,
248
+ learning_rate_transitions=0.01)
249
+ srl_subparsers.add_parser('1step', parents=[network_parser, conv_parser],
250
+ help='Argument identification and '\
251
+ 'classification together')
252
+ '''
253
+ defaults['srl'] = dict(window=3, hidden=150, convolution=200, iterations=8,
254
+ pos = 5,
255
+ learning_rate=0.001, learning_rate_features=0.001,
256
+ learning_rate_transitions=0.001)
257
+
258
+
259
+ args = parser.parse_args()
260
+
261
+ #if args.task == 'srl':
262
+ # if args.subtask == 'class':
263
+ # args.task = 'srl_classify'
264
+ # args.classify = True
265
+ # elif args.subtask == 'id':
266
+ # args.task = 'srl_boundary'
267
+ # args.identify = True
268
+ # elif args.subtask == 'pred':
269
+ # args.task = 'srl_predicates'
270
+ # args.predicates = True
271
+ if args.task == 'dependency':
272
+ if args.subtask == 'labeled':
273
+ args.task = 'labeled_dependency'
274
+ args.labeled = True
275
+ elif args.subtask == 'unlabeled':
276
+ args.task = 'unlabeled_dependency'
277
+ args.labeled = False
278
+
279
+ fill_defaults(args, defaults)
280
+ return args