nltkor 1.2.14__cp311-cp311-macosx_13_0_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. nltkor/Kor_char.py +193 -0
  2. nltkor/__init__.py +16 -0
  3. nltkor/alignment/__init__.py +1315 -0
  4. nltkor/cider/__init__.py +2 -0
  5. nltkor/cider/cider.py +55 -0
  6. nltkor/cider/cider_scorer.py +207 -0
  7. nltkor/distance/__init__.py +441 -0
  8. nltkor/distance/wasserstein.py +126 -0
  9. nltkor/etc.py +22 -0
  10. nltkor/lazyimport.py +144 -0
  11. nltkor/make_requirement.py +11 -0
  12. nltkor/metrics/__init__.py +63 -0
  13. nltkor/metrics/bartscore.py +301 -0
  14. nltkor/metrics/bertscore.py +331 -0
  15. nltkor/metrics/bleu_tensor.py +20 -0
  16. nltkor/metrics/classical.py +847 -0
  17. nltkor/metrics/entment.py +24 -0
  18. nltkor/metrics/eval.py +517 -0
  19. nltkor/metrics/mauve.py +273 -0
  20. nltkor/metrics/mauve_utils.py +131 -0
  21. nltkor/misc/__init__.py +11 -0
  22. nltkor/misc/string2string_basic_functions.py +59 -0
  23. nltkor/misc/string2string_default_tokenizer.py +83 -0
  24. nltkor/misc/string2string_hash_functions.py +159 -0
  25. nltkor/misc/string2string_word_embeddings.py +503 -0
  26. nltkor/search/__init__.py +10 -0
  27. nltkor/search/classical.py +569 -0
  28. nltkor/search/faiss_search.py +787 -0
  29. nltkor/search/kobert_tokenizer.py +181 -0
  30. nltkor/sejong/__init__.py +3 -0
  31. nltkor/sejong/__pycache__/__init__.cpython-38.pyc +0 -0
  32. nltkor/sejong/__pycache__/__init__.cpython-39.pyc +0 -0
  33. nltkor/sejong/__pycache__/sejong_download.cpython-38.pyc +0 -0
  34. nltkor/sejong/__pycache__/sejong_download.cpython-39.pyc +0 -0
  35. nltkor/sejong/__pycache__/ssem.cpython-38.pyc +0 -0
  36. nltkor/sejong/__pycache__/ssem.cpython-39.pyc +0 -0
  37. nltkor/sejong/ch.py +12 -0
  38. nltkor/sejong/dict_semClassNum.txt +491 -0
  39. nltkor/sejong/layer.txt +630 -0
  40. nltkor/sejong/sejong_download.py +87 -0
  41. nltkor/sejong/ssem.py +684 -0
  42. nltkor/similarity/__init__.py +3 -0
  43. nltkor/similarity/bartscore____.py +337 -0
  44. nltkor/similarity/bertscore____.py +339 -0
  45. nltkor/similarity/classical.py +245 -0
  46. nltkor/similarity/cosine_similarity.py +175 -0
  47. nltkor/tag/__init__.py +71 -0
  48. nltkor/tag/__pycache__/__init__.cpython-38.pyc +0 -0
  49. nltkor/tag/__pycache__/__init__.cpython-39.pyc +0 -0
  50. nltkor/tag/__pycache__/espresso_tag.cpython-38.pyc +0 -0
  51. nltkor/tag/__pycache__/espresso_tag.cpython-39.pyc +0 -0
  52. nltkor/tag/espresso_tag.py +220 -0
  53. nltkor/tag/libs/__init__.py +10 -0
  54. nltkor/tag/libs/__pycache__/__init__.cpython-38.pyc +0 -0
  55. nltkor/tag/libs/__pycache__/__init__.cpython-39.pyc +0 -0
  56. nltkor/tag/libs/__pycache__/attributes.cpython-38.pyc +0 -0
  57. nltkor/tag/libs/__pycache__/attributes.cpython-39.pyc +0 -0
  58. nltkor/tag/libs/__pycache__/config.cpython-38.pyc +0 -0
  59. nltkor/tag/libs/__pycache__/config.cpython-39.pyc +0 -0
  60. nltkor/tag/libs/__pycache__/metadata.cpython-38.pyc +0 -0
  61. nltkor/tag/libs/__pycache__/metadata.cpython-39.pyc +0 -0
  62. nltkor/tag/libs/__pycache__/reader.cpython-38.pyc +0 -0
  63. nltkor/tag/libs/__pycache__/reader.cpython-39.pyc +0 -0
  64. nltkor/tag/libs/__pycache__/taggers.cpython-38.pyc +0 -0
  65. nltkor/tag/libs/__pycache__/taggers.cpython-39.pyc +0 -0
  66. nltkor/tag/libs/__pycache__/utils.cpython-38.pyc +0 -0
  67. nltkor/tag/libs/__pycache__/utils.cpython-39.pyc +0 -0
  68. nltkor/tag/libs/__pycache__/word_dictionary.cpython-38.pyc +0 -0
  69. nltkor/tag/libs/__pycache__/word_dictionary.cpython-39.pyc +0 -0
  70. nltkor/tag/libs/arguments.py +280 -0
  71. nltkor/tag/libs/attributes.py +231 -0
  72. nltkor/tag/libs/config.py +159 -0
  73. nltkor/tag/libs/metadata.py +129 -0
  74. nltkor/tag/libs/ner/__init__.py +2 -0
  75. nltkor/tag/libs/ner/__pycache__/__init__.cpython-38.pyc +0 -0
  76. nltkor/tag/libs/ner/__pycache__/__init__.cpython-39.pyc +0 -0
  77. nltkor/tag/libs/ner/__pycache__/ner_reader.cpython-38.pyc +0 -0
  78. nltkor/tag/libs/ner/__pycache__/ner_reader.cpython-39.pyc +0 -0
  79. nltkor/tag/libs/ner/macmorphoreader.py +7 -0
  80. nltkor/tag/libs/ner/ner_reader.py +92 -0
  81. nltkor/tag/libs/network.c +72325 -0
  82. nltkor/tag/libs/network.cpython-311-darwin.so +0 -0
  83. nltkor/tag/libs/network.pyx +878 -0
  84. nltkor/tag/libs/networkconv.pyx +1028 -0
  85. nltkor/tag/libs/networkdependencyconv.pyx +451 -0
  86. nltkor/tag/libs/parse/__init__.py +1 -0
  87. nltkor/tag/libs/parse/__pycache__/__init__.cpython-38.pyc +0 -0
  88. nltkor/tag/libs/parse/__pycache__/__init__.cpython-39.pyc +0 -0
  89. nltkor/tag/libs/parse/__pycache__/parse_reader.cpython-38.pyc +0 -0
  90. nltkor/tag/libs/parse/__pycache__/parse_reader.cpython-39.pyc +0 -0
  91. nltkor/tag/libs/parse/parse_reader.py +283 -0
  92. nltkor/tag/libs/pos/__init__.py +2 -0
  93. nltkor/tag/libs/pos/__pycache__/__init__.cpython-38.pyc +0 -0
  94. nltkor/tag/libs/pos/__pycache__/__init__.cpython-39.pyc +0 -0
  95. nltkor/tag/libs/pos/__pycache__/pos_reader.cpython-38.pyc +0 -0
  96. nltkor/tag/libs/pos/__pycache__/pos_reader.cpython-39.pyc +0 -0
  97. nltkor/tag/libs/pos/macmorphoreader.py +7 -0
  98. nltkor/tag/libs/pos/pos_reader.py +97 -0
  99. nltkor/tag/libs/reader.py +485 -0
  100. nltkor/tag/libs/srl/__init__.py +3 -0
  101. nltkor/tag/libs/srl/__pycache__/__init__.cpython-38.pyc +0 -0
  102. nltkor/tag/libs/srl/__pycache__/__init__.cpython-39.pyc +0 -0
  103. nltkor/tag/libs/srl/__pycache__/srl_reader.cpython-38.pyc +0 -0
  104. nltkor/tag/libs/srl/__pycache__/srl_reader.cpython-39.pyc +0 -0
  105. nltkor/tag/libs/srl/__pycache__/train_srl.cpython-38.pyc +0 -0
  106. nltkor/tag/libs/srl/__pycache__/train_srl.cpython-39.pyc +0 -0
  107. nltkor/tag/libs/srl/__srl_reader_.py +535 -0
  108. nltkor/tag/libs/srl/srl_reader.py +436 -0
  109. nltkor/tag/libs/srl/train_srl.py +87 -0
  110. nltkor/tag/libs/taggers.py +926 -0
  111. nltkor/tag/libs/utils.py +384 -0
  112. nltkor/tag/libs/word_dictionary.py +239 -0
  113. nltkor/tag/libs/wsd/__init__.py +2 -0
  114. nltkor/tag/libs/wsd/__pycache__/__init__.cpython-38.pyc +0 -0
  115. nltkor/tag/libs/wsd/__pycache__/__init__.cpython-39.pyc +0 -0
  116. nltkor/tag/libs/wsd/__pycache__/wsd_reader.cpython-38.pyc +0 -0
  117. nltkor/tag/libs/wsd/__pycache__/wsd_reader.cpython-39.pyc +0 -0
  118. nltkor/tag/libs/wsd/macmorphoreader.py +7 -0
  119. nltkor/tag/libs/wsd/wsd_reader.py +93 -0
  120. nltkor/tokenize/__init__.py +62 -0
  121. nltkor/tokenize/ko_tokenize.py +115 -0
  122. nltkor/trans.py +121 -0
  123. nltkor-1.2.14.dist-info/LICENSE.txt +1093 -0
  124. nltkor-1.2.14.dist-info/METADATA +41 -0
  125. nltkor-1.2.14.dist-info/RECORD +127 -0
  126. nltkor-1.2.14.dist-info/WHEEL +5 -0
  127. nltkor-1.2.14.dist-info/top_level.txt +1 -0
nltkor/sejong/ssem.py ADDED
@@ -0,0 +1,684 @@
1
+ from xml.etree.ElementTree import parse
2
+ import os, re
3
+ from operator import eq
4
+ import time
5
+ import nltkor
6
+ from nltkor.sejong.sejong_download import SejongDir
7
+
8
+
9
+ common_path=os.path.dirname(nltkor.sejong.__file__)
10
+ class Entry():
11
+
12
+ def __init__(self, name, en, pos):
13
+ self.name = name
14
+ self.entry = en
15
+ self.pos = pos
16
+ self.SejongDir = SejongDir()
17
+
18
+
19
+ def __repr__(self):
20
+ return "%s('%s')" % (type(self).__name__, self.name)
21
+
22
+
23
+
24
+ # sense객체 리턴
25
+ def senses(self):
26
+ list = []
27
+
28
+ allsense = self.entry.findall("sense")
29
+ for se in allsense:
30
+ try:
31
+ ss = str(self.name + "." + se.attrib['n'])
32
+ except KeyError:
33
+ ss = str(self.name)
34
+ temp = Sense(ss, se, self.pos)
35
+ list.append(temp)
36
+
37
+ return list
38
+
39
+ # 숙어
40
+ def idm(self):
41
+ list = []
42
+ try:
43
+ id = self.entry.find("idm_grp")
44
+ idm = id.findall("idm")
45
+ except AttributeError:
46
+ return list
47
+
48
+ for tmp in idm:
49
+ if tmp.text is None:
50
+ return list
51
+
52
+ if '~' in tmp.text:
53
+ name = self.name.split('.')
54
+ tmp.text = tmp.text.replace('~', name[0])
55
+ list.append(tmp.text)
56
+
57
+ return list
58
+
59
+ # 복합어
60
+ def comp(self):
61
+
62
+ list = []
63
+ try:
64
+ mor = self.entry.find("morph_grp")
65
+ comp = mor.findall("comp")
66
+ except AttributeError:
67
+ return list
68
+
69
+ for tmp in comp:
70
+ if tmp.text is None:
71
+ return list
72
+
73
+ if '~' in tmp.text:
74
+ name = self.name.split('.')
75
+ tmp.text = tmp.text.replace('~', name[0])
76
+ list.append(tmp.text)
77
+
78
+ return list
79
+
80
+ # 파생어
81
+ def der(self):
82
+ list = []
83
+ try:
84
+ mor = self.entry.find("morph_grp")
85
+ comp = mor.findall("der")
86
+ except AttributeError:
87
+ return list
88
+ for tmp in comp:
89
+ if tmp.text is None:
90
+ return list
91
+
92
+ if '~' in tmp.text:
93
+ name = self.name.split('.')
94
+ tmp.text = tmp.text.replace('~', name[0])
95
+ list.append(tmp.text)
96
+
97
+ return list
98
+
99
+
100
+ class Sense():
101
+
102
+ def __init__(self, name, se, pos):
103
+ self.name = name
104
+ self.sense = se
105
+ self.pos = pos
106
+
107
+ def __repr__(self):
108
+ return "%s('%s')" % (type(self).__name__, self.name)
109
+
110
+ # 공통 태그
111
+ def common_lr(self, sense):
112
+ sem = sense.find("sem_grp")
113
+ lr = sem.find("lr")
114
+ return lr
115
+
116
+ # sem
117
+ #sem
118
+ def sem(self):
119
+ list = []
120
+ sem = self.sense.find("sem_grp")
121
+ synn = sem.find("sem_class")
122
+ try:
123
+ synn = synn.text
124
+ except AttributeError:
125
+ return list
126
+
127
+ list.append(synn)
128
+ return list
129
+
130
+ # if None in list:
131
+ # list = []
132
+ # return list
133
+ # else:
134
+ # return list
135
+ # 동의어
136
+ def syn(self):
137
+ list = []
138
+ lr = self.common_lr(self.sense)
139
+ try:
140
+ synn = lr.findall("syn")
141
+ except AttributeError:
142
+ return list
143
+
144
+ for tmp in synn:
145
+ list.append(tmp.text)
146
+
147
+ if None in list:
148
+ list = []
149
+ return list
150
+ else:
151
+ return list
152
+ # 반의어
153
+ def ant(self):
154
+ list = []
155
+ lr = self.common_lr(self.sense)
156
+ try:
157
+ ant = lr.findall("ant")
158
+ except AttributeError:
159
+ return list
160
+
161
+ for tmp in ant:
162
+ list.append(tmp.text)
163
+
164
+ if None in list:
165
+ list = []
166
+ return list
167
+ else:
168
+ return list
169
+ # 동위어
170
+ def coord(self):
171
+ list = []
172
+ lr = self.common_lr(self.sense)
173
+ try:
174
+ coo = lr.findall("coord")
175
+ except AttributeError:
176
+ return list
177
+
178
+ for tmp in coo:
179
+ list.append(tmp.text)
180
+
181
+ if None in list:
182
+ list = []
183
+ return list
184
+ else:
185
+ return list
186
+ # 부분어
187
+ def mero(self):
188
+ list = []
189
+ lr = self.common_lr(self.sense)
190
+ try:
191
+ me = lr.findall("mero")
192
+ except AttributeError:
193
+ return list
194
+
195
+ for tmp in me:
196
+ list.append(tmp.text)
197
+
198
+ '''if not list:
199
+ return("@@@@@",list)
200
+ '''
201
+ if None in list:
202
+ list = []
203
+ return list
204
+ else:
205
+ return list
206
+ # 상위어
207
+ def hyper(self):
208
+ list = []
209
+ lr = self.common_lr(self.sense)
210
+ try:
211
+ hy = lr.findall("hyper")
212
+ except AttributeError:
213
+ return list
214
+
215
+ for tmp in hy:
216
+ list.append(tmp.text)
217
+
218
+ if None in list:
219
+ list = []
220
+ return list
221
+ else:
222
+ return list
223
+ # 하위어
224
+ def hypo(self):
225
+ list = []
226
+ lr = self.common_lr(self.sense)
227
+ try:
228
+ hy = lr.findall("hypo")
229
+ except AttributeError:
230
+ return list
231
+
232
+ for tmp in hy:
233
+ if '~' in tmp.text:
234
+ name = self.name.split('.')
235
+ tmp.text = tmp.text.replace('~', name[0])
236
+ list.append(tmp.text)
237
+ else:
238
+ list.append(tmp.text)
239
+
240
+ if None in list:
241
+ list = []
242
+ return list
243
+ else:
244
+ return list
245
+ # 전체어
246
+ def holo(self):
247
+ list = []
248
+ lr = self.common_lr(self.sense)
249
+ try:
250
+ ho = lr.findall("holo")
251
+ except AttributeError:
252
+ return list
253
+
254
+ for tmp in ho:
255
+ if '~' in tmp.text:
256
+ name = self.name.split('.')
257
+ tmp.text = tmp.text.replace('~', name[0])
258
+ list.append(tmp.text)
259
+ else:
260
+ list.append(tmp.text)
261
+
262
+ if None in list:
263
+ list = []
264
+ return list
265
+ else:
266
+ return list
267
+ # 관련어
268
+ def rel(self):
269
+ list = []
270
+ lr = self.common_lr(self.sense)
271
+ try:
272
+ rel = lr.findall("rel")
273
+ except AttributeError:
274
+ return list
275
+
276
+ for tmp in rel:
277
+ list.append(tmp.text)
278
+
279
+ if None in list:
280
+ list = []
281
+ return list
282
+ else:
283
+ return list
284
+ # 예시
285
+ def example(self):
286
+ list = []
287
+
288
+ if self.pos != 'nng_s':
289
+ return list
290
+
291
+ else:
292
+ sem = self.sense.find("sem_grp")
293
+ eg = sem.findall("eg")
294
+ for tmp in eg:
295
+ if '~' in tmp.text:
296
+ name = self.name.split('.')
297
+ tmp.text = tmp.text.replace('~', name[0])
298
+ list.append(tmp.text)
299
+ else:
300
+ list.append(tmp.text)
301
+
302
+ if None in list:
303
+ list = []
304
+ return list
305
+ else:
306
+ return list
307
+ # 영어
308
+ def trans(self):
309
+ list = []
310
+ sem = self.sense.find("sem_grp")
311
+ trs = sem.findall("trans")
312
+ for tmp in trs:
313
+ list.append(tmp.text)
314
+
315
+ if None in list:
316
+ list = []
317
+ return list
318
+ else:
319
+ return list
320
+ # 형용사 결합
321
+ def comb_aj(self):
322
+ list = []
323
+
324
+ try:
325
+ syn = self.sense.find("syn_grp")
326
+ aj = syn.findall("comb_aj")
327
+ except AttributeError:
328
+ return list
329
+
330
+ for tmp in aj:
331
+ if tmp.text is None:
332
+ return list
333
+
334
+ if '~' in tmp.text:
335
+ name = self.name.split('.')
336
+ tmp.text = tmp.text.replace('~', name[0])
337
+ list.append(tmp.text)
338
+
339
+ return list
340
+ # 명사 결합
341
+ def comb_n(self):
342
+ list = []
343
+ try:
344
+ syn = self.sense.find("syn_grp")
345
+ n = syn.findall("comb_n")
346
+ except AttributeError:
347
+ return list
348
+ for tmp in n:
349
+ if tmp.text is None:
350
+ return list
351
+
352
+ if '~' in tmp.text:
353
+ name = self.name.split('.')
354
+ tmp.text = tmp.text.replace('~', name[0])
355
+ list.append(tmp.text)
356
+
357
+ return list
358
+ # 동사 결합
359
+ def comb_v(self):
360
+ list = []
361
+ try:
362
+ syn = self.sense.find("syn_grp")
363
+ v = syn.findall("comb_v")
364
+ except AttributeError:
365
+ return list
366
+
367
+ for tmp in v:
368
+ v = tmp.find("form").text
369
+ if v is None:
370
+ return list
371
+
372
+ if '~' in v:
373
+ name = self.name.split('.')
374
+ v = v.replace('~', name[0])
375
+ list.append(v)
376
+ return list
377
+
378
+ # frame
379
+ def sel_rst(self):
380
+
381
+ final = {}
382
+ list = []
383
+
384
+ if self.pos == 'nng_s':
385
+ return list
386
+
387
+ frame_grps = self.sense.findall("frame_grp")
388
+
389
+ for grp in frame_grps: # 각각의 frame_grp type
390
+ sub_list = []
391
+ for subsense in grp.findall('subsense'): # n개의 subsense
392
+ str = ""
393
+ eg_list = []
394
+ check = 0
395
+ for sel_rst in subsense.findall('sel_rst'): # m개의 sel_rst
396
+ check += 1
397
+ for tmp in sel_rst.attrib.items():
398
+
399
+ if (tmp[0] == 'arg'):
400
+ str += ("<" + tmp[0] + "=" + tmp[1] + " ")
401
+
402
+ if (tmp[0] == 'tht'):
403
+ str += (tmp[0] + "=" + tmp[1] + ">")
404
+ try:
405
+ str += (sel_rst.text)
406
+ except TypeError:
407
+ str += ' '
408
+
409
+ if (check != len(subsense.findall('sel_rst'))):
410
+ str += ', '
411
+
412
+ for eg in subsense.findall('eg'):
413
+ eg_list.append(eg.text)
414
+
415
+ sub_list.append(str)
416
+ sub_list.append(eg_list)
417
+
418
+ final[grp.find('frame').text] = sub_list
419
+
420
+ return final
421
+
422
+ # 최상위 경로
423
+ def sem_path(self):
424
+
425
+ cur_sem = self.sem()[0]
426
+ if cur_sem == None:
427
+ return []
428
+ filename = common_path+'/dict_semClassNum.txt'
429
+ with open(filename, 'r',encoding="cp949") as file_object:
430
+ lines = file_object.read()
431
+
432
+ #print(lines)
433
+ temp_list = []
434
+ sem_list = []
435
+ str = ""
436
+
437
+ # 리스트 형성
438
+ for tmp in lines:
439
+ if tmp != '\n' and tmp != '\t':
440
+ str += tmp
441
+ else:
442
+ if (str != ''):
443
+ sem_list.append(str)
444
+ str = ''
445
+
446
+ # 입력 단어 sem 위치 찾기
447
+ regex = re.compile(r"_" + cur_sem + '$')
448
+ for x in sem_list:
449
+ if regex.search(x):
450
+ cur_sem = x
451
+ temp_list.append(cur_sem)
452
+
453
+ while len(cur_sem.split('_')[0]) > 1:
454
+
455
+ if cur_sem.split('_')[0][-2] == '.':
456
+ tmp = cur_sem.split('_')[0][0:-2] + '_'
457
+ else:
458
+ tmp = cur_sem.split('_')[0][0:-3] + '_'
459
+ regex = re.compile(r"^" + tmp)
460
+
461
+ for x in sem_list:
462
+ if regex.search(x):
463
+ cur_sem = x
464
+ temp_list.append(x)
465
+
466
+ return list(reversed(temp_list))
467
+
468
+
469
+ #유사도
470
+ def wup_similarity(self,target):
471
+ #self sem
472
+ sem = self.sense.find("sem_grp")
473
+ synn = sem.find("sem_class")
474
+ synn1 = synn.text
475
+
476
+
477
+ #target sem
478
+ sem=target.sense.find("sem_grp")
479
+ synn=sem.find("sem_class")
480
+ synn2=synn.text
481
+
482
+
483
+ list=[]
484
+ path=common_path+"/layer.txt"
485
+ f=open(path,'r')
486
+ lines=f.readlines()
487
+ for tmp in lines:
488
+ if '_'+synn1+'\n' in tmp:
489
+ list.append(tmp)
490
+ if '_'+synn2+'\n' in tmp:
491
+ list.append(tmp)
492
+
493
+ ch=[]
494
+ for tmp in list:
495
+ ch.append(tmp.split("_")[0])
496
+
497
+ word1 =ch[0].split('.');
498
+ word2 =ch[1].split('.');
499
+
500
+ same=0
501
+
502
+ for tmp in range (0, min(len(word1),len(word2))):
503
+ if word1[tmp] == word2[tmp]:
504
+ same+=2
505
+ else:
506
+ break
507
+
508
+ if self.name==target.name:
509
+ same+=2
510
+
511
+ result=same/((len(word1)+len(word2))+2)
512
+
513
+ return result
514
+
515
+
516
+
517
+ # sense 바로 접근
518
+ def sense(input):
519
+
520
+ input_list = input.split('.')
521
+ arg= (input_list[0]+'.'+input_list[1]+'.'+input_list[2])
522
+ target =entry(arg)
523
+ allsense =target.entry.findall("sense")
524
+
525
+ for se in allsense:
526
+ if input==str(target.name+'.'+se.attrib['n']):
527
+ return Sense(input,se,target.pos)
528
+
529
+ #ss = str(self.name + "." + se.attrib['n'])
530
+ #ss = str(self.name)
531
+
532
+ # entry 바로 접근
533
+ def entry(input):
534
+
535
+ input_list = input.split('.')
536
+ path=common_path+""
537
+ if 'nn' in input_list[1]:
538
+ path += "/01. 체언_상세"
539
+ elif input_list[1] == 'vv':
540
+ path += "/02. 용언_상세//vv"
541
+ elif input_list[1] == 'va':
542
+ path += "/02. 용언_상세//va"
543
+ else:
544
+ return
545
+
546
+
547
+ path += "//"+input_list[0]+".xml"
548
+
549
+ tree = parse(path)
550
+ root = tree.getroot()
551
+ allentry = root.findall("entry")
552
+ for en in allentry:
553
+ try:
554
+ if input==str(input_list[0]+"."+en.attrib['pos']+"." + en.attrib['n']):
555
+ return Entry(str(input_list[0]+"."+en.attrib['pos']+"." + en.attrib['n']), en, str(en.attrib['pos']))
556
+ except KeyError:
557
+ if input==str(input_list[0]+"."+en.attrib['pos']):
558
+ return Entry(str(input_list[0]+"."+en.attrib['pos']), en, str(en.attrib['pos']))
559
+
560
+ # entry 객체 리턴
561
+ def entrys(word):
562
+ path = filecheck(word)
563
+ list = []
564
+
565
+ for tmp in path:
566
+ tree = parse(tmp)
567
+ root = tree.getroot()
568
+ allentry = root.findall("entry")
569
+
570
+ for en in allentry:
571
+ try:
572
+ es = str(word + "." + en.attrib['pos'] + "." + en.attrib['n'])
573
+ except KeyError:
574
+ es = str(word + "." + en.attrib['pos'])
575
+
576
+ temp = Entry(es, en, str(en.attrib['pos']))
577
+ list.append(temp)
578
+
579
+ return list
580
+
581
+
582
+ def _syn(word):
583
+
584
+ ets=entrys(word)
585
+ syn_list=[]
586
+
587
+ for et in ets:
588
+ for se in et.senses():
589
+ syn_list+=se.syn()
590
+
591
+ return syn_list
592
+
593
+ '''
594
+ def entry_error():
595
+
596
+ path="./02. 용언_상세//va"
597
+ abs_dir=os.path.join(os.getcwd(),path)
598
+ file_names=os.listdir(abs_dir)
599
+
600
+ #print(file_names)
601
+ #print(len(file_names))
602
+ error_list=[]
603
+
604
+
605
+ for word in file_names:
606
+
607
+ fpath=path+"//"+word
608
+ #print(fpath)
609
+ tree=parse(fpath)
610
+ root=tree.getroot()
611
+ #print(root.findtext('orth'))
612
+ allentry=root.findall("entry")
613
+
614
+ for en in allentry:
615
+ try:
616
+ en.attrib['n']
617
+
618
+ except:
619
+
620
+ error_list.append(word)
621
+ break;
622
+
623
+ print(error_list)
624
+ print(len(error_list))
625
+ print(len(file_names))
626
+
627
+
628
+ return error_list
629
+
630
+
631
+
632
+ def sense_error():
633
+
634
+ path="./02. 용언_상세//va"
635
+ abs_dir=os.path.join(os.getcwd(),path)
636
+ file_names=os.listdir(abs_dir)
637
+
638
+ error_list=[]
639
+
640
+
641
+ for word in file_names:
642
+
643
+ fpath=path+"//"+word
644
+ tree=parse(fpath)
645
+ root=tree.getroot()
646
+ allentry=root.findall("entry")
647
+
648
+ for en in allentry:
649
+ allsense=en.findall("sense")
650
+ for se in allsense:
651
+ try:
652
+ se.attrib['n']
653
+ except:
654
+
655
+ if word not in error_list:
656
+ error_list.append(word)
657
+ break;
658
+
659
+ print(error_list)
660
+ print(len(error_list))
661
+ print(len(file_names))
662
+
663
+
664
+ return error_list
665
+
666
+ '''
667
+
668
+ # file check
669
+ def filecheck(word):
670
+ n_path = common_path+"/01. 체언_상세"
671
+ vv_path = common_path+"/02. 용언_상세/vv"
672
+ va_path = common_path+"/02. 용언_상세/va"
673
+
674
+
675
+ path = [n_path, vv_path, va_path]
676
+ ret_list = []
677
+ check = word + ".xml"
678
+
679
+
680
+ for tmp in path:
681
+ if check in os.listdir(tmp):
682
+ ret_list.append(tmp + "/" + check)
683
+
684
+ return ret_list
@@ -0,0 +1,3 @@
1
+ # The following trick allows us to import the classes directly from the similarity module:
2
+ from .cosine_similarity import CosineSimilarity
3
+ from .classical import LCSubstringSimilarity, LCSubsequenceSimilarity, JaroSimilarity