nltkor 1.2.0__tar.gz → 1.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. {nltkor-1.2.0 → nltkor-1.2.3}/PKG-INFO +2 -2
  2. {nltkor-1.2.0 → nltkor-1.2.3}/README.md +3 -16
  3. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor.egg-info/PKG-INFO +2 -2
  4. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor.egg-info/SOURCES.txt +3 -1
  5. {nltkor-1.2.0 → nltkor-1.2.3}/setup.py +2 -2
  6. nltkor-1.2.3/test/test.py +282 -0
  7. nltkor-1.2.3/test/testespresso.py +19 -0
  8. {nltkor-1.2.0 → nltkor-1.2.3}/LICENSE.txt +0 -0
  9. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/Kor_char.py +0 -0
  10. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/__init__.py +1 -1
  11. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/alignment/__init__.py +0 -0
  12. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/cider/__init__.py +0 -0
  13. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/cider/cider.py +0 -0
  14. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/cider/cider_scorer.py +0 -0
  15. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/distance/__init__.py +0 -0
  16. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/distance/wasserstein.py +0 -0
  17. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/etc.py +0 -0
  18. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/lazyimport.py +0 -0
  19. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/make_requirement.py +0 -0
  20. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/metrics/__init__.py +0 -0
  21. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/metrics/bartscore.py +0 -0
  22. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/metrics/bertscore.py +0 -0
  23. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/metrics/bleu_tensor.py +0 -0
  24. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/metrics/classical.py +0 -0
  25. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/metrics/entment.py +0 -0
  26. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/metrics/eval.py +0 -0
  27. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/metrics/mauve.py +0 -0
  28. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/metrics/mauve_utils.py +0 -0
  29. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/misc/__init__.py +0 -0
  30. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/misc/string2string_basic_functions.py +0 -0
  31. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/misc/string2string_default_tokenizer.py +0 -0
  32. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/misc/string2string_hash_functions.py +0 -0
  33. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/misc/string2string_word_embeddings.py +0 -0
  34. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/search/__init__.py +0 -0
  35. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/search/classical.py +0 -0
  36. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/search/faiss_search.py +0 -0
  37. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/search/kobert_tokenizer.py +0 -0
  38. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/sejong/__init__.py +0 -0
  39. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/sejong/ch.py +0 -0
  40. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/sejong/dict_semClassNum.txt +0 -0
  41. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/sejong/layer.txt +0 -0
  42. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/sejong/sejong_download.py +0 -0
  43. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/sejong/ssem.py +0 -0
  44. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/similarity/__init__.py +0 -0
  45. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/similarity/bartscore____.py +0 -0
  46. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/similarity/bertscore____.py +0 -0
  47. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/similarity/classical.py +0 -0
  48. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/similarity/cosine_similarity.py +0 -0
  49. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/tag/__init__.py +0 -0
  50. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/tag/espresso_tag.py +0 -0
  51. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/tag/libs/__init__.py +0 -0
  52. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/tag/libs/arguments.py +0 -0
  53. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/tag/libs/attributes.py +0 -0
  54. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/tag/libs/config.py +0 -0
  55. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/tag/libs/metadata.py +0 -0
  56. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/tag/libs/ner/__init__.py +0 -0
  57. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/tag/libs/ner/macmorphoreader.py +0 -0
  58. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/tag/libs/ner/ner_reader.py +0 -0
  59. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/tag/libs/network.c +0 -0
  60. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/tag/libs/parse/__init__.py +0 -0
  61. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/tag/libs/parse/parse_reader.py +0 -0
  62. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/tag/libs/pos/__init__.py +0 -0
  63. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/tag/libs/pos/macmorphoreader.py +0 -0
  64. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/tag/libs/pos/pos_reader.py +0 -0
  65. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/tag/libs/reader.py +0 -0
  66. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/tag/libs/srl/__init__.py +0 -0
  67. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/tag/libs/srl/__srl_reader_.py +0 -0
  68. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/tag/libs/srl/srl_reader.py +0 -0
  69. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/tag/libs/srl/train_srl.py +0 -0
  70. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/tag/libs/taggers.py +0 -0
  71. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/tag/libs/utils.py +0 -0
  72. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/tag/libs/word_dictionary.py +0 -0
  73. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/tag/libs/wsd/__init__.py +0 -0
  74. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/tag/libs/wsd/macmorphoreader.py +0 -0
  75. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/tag/libs/wsd/wsd_reader.py +0 -0
  76. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/tokenize/__init__.py +0 -0
  77. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/tokenize/ko_tokenize.py +0 -0
  78. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor/trans.py +0 -0
  79. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor.egg-info/dependency_links.txt +0 -0
  80. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor.egg-info/requires.txt +0 -0
  81. {nltkor-1.2.0 → nltkor-1.2.3}/nltkor.egg-info/top_level.txt +0 -0
  82. {nltkor-1.2.0 → nltkor-1.2.3}/setup.cfg +0 -0
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nltkor
3
- Version: 1.2.0
4
- Home-page: https://github.com/cwnu-airlab/NLTKo.git
3
+ Version: 1.2.3
4
+ Home-page: https://modi.changwon.ac.kr/air_cwnu/nlp_tool/nltk_ko.git
5
5
  Keywords: string matching,pattern matching,edit distance,string to string correction,string to string matching,Levenshtein edit distance,Hamming distance,Damerau-Levenshtein distance,Jaro-Winkler distance,longest common subsequence,longest common substring,dynamic programming,approximate string matching,semantic similarity,natural language processing,NLP,information retrieval,rouge,sacrebleu,bertscore,bartscore,fasttext,glove,cosine similarity,Smith-Waterman,Needleman-Wunsch,Hirschberg,Karp-Rabin,Knuth-Morris-Pratt,Boyer-Moore
6
6
  Classifier: Programming Language :: Python :: 3.7
7
7
  Classifier: Programming Language :: Python :: 3.8
@@ -119,24 +119,11 @@ NLTKor는 한국어를 위한 NLTK이며 기존의 영어에서 사용하는 Wor
119
119
 
120
120
  ### 2.1 라이브러리 설치
121
121
 
122
- 해당 라이브러리를 설치하기 위해서 아래와 동일하게 명령어 라인에서 입력하여 다운로드 받을 때, 사용자의 2가지 정보가 필요하다. 'modi.changwon.ac.kr' 내 사용하는 **사용자의 ID와 PW를 입력**해주어야만 다운로드가 가능하다.
122
+ 해당 라이브러리를 설치하기 위해서 아래와 동일하게 명령어 라인에서 입력한다.
123
123
 
124
124
  ```h
125
- $ git config --global http.sslVerify false
126
- $ pip install git+https://modi.changwon.ac.kr/air_cwnu/nlp_tool/nltk_ko
127
-
128
- Collecting git+https://modi.changwon.ac.kr/air_cwnu/nlp_tool/nltk_ko.git
129
- Cloning https://modi.changwon.ac.kr/air_cwnu/nlp_tool/nltk_ko.git to /tmp/pip-req-build-1dychja8
130
- Username for 'https://modi.changwon.ac.kr': seongtae (gitlab의 사용자 ID)
131
- Password for 'https://seongtae@modi.changwon.ac.kr': (gitlab의 사용자 PW)
132
- Requirement already satisfied: regex==2020.7.14 in ./mac/lib/python3.6/site-packages (from nltk==1.0.1) (2020.7.14)
133
- Building wheels for collected packages: nltk
134
- Building wheel for nltk (setup.py) ... done
135
- Created wheel for nltk: filename=nltk-1.0.1-py3-none-any.whl size=47028553 sha256=bd14c5ee5672a27557ac92965b6c2a639fe4e595b1777dadf4aee35b3384a947
136
- Stored in directory: /tmp/pip-ephem-wheel-cache-4otqpoux/wheels/9e/cc/cc/96c8c07aaef3aee9a5168e352fdc588a7d120da6236501ef61
137
- Successfully built nltk
138
- Installing collected packages: nltk
139
- Successfully installed nltk-1.0.1
125
+ $ pip install nltkor
126
+
140
127
  ```
141
128
 
142
129
  ##### 2.1.1. 설치 도중 오류 발생시 해결 방법
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: nltkor
3
- Version: 1.2.0
4
- Home-page: https://github.com/cwnu-airlab/NLTKo.git
3
+ Version: 1.2.3
4
+ Home-page: https://modi.changwon.ac.kr/air_cwnu/nlp_tool/nltk_ko.git
5
5
  Keywords: string matching,pattern matching,edit distance,string to string correction,string to string matching,Levenshtein edit distance,Hamming distance,Damerau-Levenshtein distance,Jaro-Winkler distance,longest common subsequence,longest common substring,dynamic programming,approximate string matching,semantic similarity,natural language processing,NLP,information retrieval,rouge,sacrebleu,bertscore,bartscore,fasttext,glove,cosine similarity,Smith-Waterman,Needleman-Wunsch,Hirschberg,Karp-Rabin,Knuth-Morris-Pratt,Boyer-Moore
6
6
  Classifier: Programming Language :: Python :: 3.7
7
7
  Classifier: Programming Language :: Python :: 3.8
@@ -75,4 +75,6 @@ nltkor/tag/libs/wsd/__init__.py
75
75
  nltkor/tag/libs/wsd/macmorphoreader.py
76
76
  nltkor/tag/libs/wsd/wsd_reader.py
77
77
  nltkor/tokenize/__init__.py
78
- nltkor/tokenize/ko_tokenize.py
78
+ nltkor/tokenize/ko_tokenize.py
79
+ test/test.py
80
+ test/testespresso.py
@@ -23,8 +23,8 @@ module1 = Extension("nltkor.tag.libs.network",
23
23
 
24
24
  setup(
25
25
  name='nltkor',
26
- version='1.2.0',
27
- url='https://github.com/cwnu-airlab/NLTKo.git',
26
+ version='1.2.3',
27
+ url='https://modi.changwon.ac.kr/air_cwnu/nlp_tool/nltk_ko.git',
28
28
  packages=find_packages(exclude=[]),
29
29
  python_requires='>=3.7',
30
30
  install_requires=[
@@ -0,0 +1,282 @@
1
+ from nltk.alignment import NeedlemanWunsch, SmithWaterman, Hirschberg, LongestCommonSubsequence, LongestCommonSubstring, DTW
2
+ from nltk.distance import LevenshteinEditDistance, HammingDistance, DamerauLevenshteinDistance, WassersteinDistance
3
+ from nltk.similarity import CosineSimilarity, LCSubstringSimilarity, LCSubsequenceSimilarity, JaroSimilarity
4
+ from nltk.tokenize import sent_tokenize, word_tokenize, syllable_tokenize
5
+ from nltk.search import NaiveSearch, RabinKarpSearch, KMPSearch, BoyerMooreSearch, FaissSearch
6
+ from nltk.metrics import BERTScore, BARTScore, DefaultMetric
7
+ from nltk import pos_tag, nouns, word_segmentor, pos_tag_with_verb_form
8
+ import numpy as np
9
+ from typing import List
10
+ import torch
11
+
12
+ def demo():
13
+ str1 = '기존에 제품이 장기간 사용으로 손상'
14
+ str2 = '장기간 사용으로 제품이 손상'
15
+
16
+ # result1, result2 = NeedlemanWunsch().get_alignment(str1, str2)
17
+ # print(result1, '\n', result2)
18
+
19
+ result1, result2 = SmithWaterman().get_alignment(str1, str2)
20
+ print(f"{result1}\n{result2}")
21
+
22
+ # result1, result2 = Hirschberg().get_alignment(str1, str2)
23
+ # print(f"{result1}\n{result2}")
24
+
25
+ # result = DTW().get_alignment_path(str1, str2)
26
+ # print(result)
27
+
28
+ # result = LongestCommonSubsequence().compute(str1, str2)
29
+ # print(result)
30
+
31
+ # result = LongestCommonSubstring().compute(str1, str2)
32
+ # print("-------LongestCommonSubstring-------")
33
+ # print(result)
34
+ # print("------------------------------------")
35
+ # print()
36
+
37
+ def demo2():
38
+ str1 = '나는 학생이다.'
39
+ str2 = '그는 선생님이다.'
40
+
41
+ result = BARTScore().compute([str1], [str2])
42
+ print("-------BARTScore-------")
43
+ print(result)
44
+ print("-----------------------")
45
+ print()
46
+
47
+ def demo3():
48
+ str1 = '나는 학생이다.'
49
+ str2 = '그는 선생님이다.'
50
+ model_name = 'bert-base-uncased'
51
+ result = BERTScore(model_name_or_path=model_name, lang='kor', num_layers=12).compute([str1], [str2])
52
+
53
+ print("model name: ", model_name)
54
+ print("-------BERTScore-------")
55
+ print(result)
56
+ print("-----------------------")
57
+ print()
58
+
59
+ def demo4():
60
+ demo_setences = ['제가 나와 있는 곳은 경남 거제시 옥포동 덕포 해수욕장에 나와 있습니다.']
61
+ for sen in demo_setences:
62
+ print(word_tokenize(sen, "korean"))
63
+ print(pos_tag(sen, lang='kor'))
64
+
65
+ def demo5():
66
+ str1 = '나는 학생이다.'
67
+ str2 = '그는 선생님이다.'
68
+
69
+ # result = LevenshteinEditDistance().compute(str1, str2)
70
+
71
+ # result = HammingDistance().compute(str1, str2)
72
+
73
+
74
+ result = DamerauLevenshteinDistance().compute(str1, str2)
75
+
76
+ print("-------DamerauLevenshteinDistance-------")
77
+ print(result)
78
+ print("----------------------------------------")
79
+ print()
80
+
81
+ def demo6():
82
+ x1 = np.array([1, 2, 3, 4, 5])
83
+ x2 = np.array([3, 7, 8, 3, 1])
84
+
85
+ result = CosineSimilarity().compute(x1, x2)
86
+
87
+ print("-------CosineSimilarity-------")
88
+ print(result)
89
+ print("------------------------------")
90
+ print()
91
+
92
+ def demo7():
93
+ str1 = '나는 학생이다.'
94
+ str2 = '그는 선생님이다.'
95
+
96
+ result = LCSubstringSimilarity().compute(str1, str2)
97
+
98
+ print("-------LCSubstringSimilarity-------")
99
+ print(result)
100
+ print("-----------------------------------")
101
+ print()
102
+
103
+ result = LCSubsequenceSimilarity().compute(str1, str2)
104
+
105
+ print("-------LCSubsequenceSimilarity-------")
106
+ print(result)
107
+ print("--------------------------------------")
108
+ print()
109
+
110
+ result = JaroSimilarity().compute(str1, str2)
111
+
112
+ print("-------JaroSimilarity-------")
113
+ print(result)
114
+ print("----------------------------")
115
+ print()
116
+
117
+
118
+ def demo8():
119
+ pattern = "학생"
120
+ str1 = '나는 학생이다.'
121
+
122
+ result = NaiveSearch().search(pattern, str1)
123
+ print(result)
124
+
125
+ result = RabinKarpSearch().search(pattern, str1)
126
+ print(result)
127
+
128
+ result = KMPSearch().search(pattern, str1)
129
+ print(result)
130
+
131
+ result = BoyerMooreSearch().search(pattern, str1)
132
+ print(result)
133
+
134
+ def demo9():
135
+ faiss = FaissSearch(model_name_or_path = 'skt/kobert-base-v1', tokenizer_name_or_path = 'skt/kobert-base-v1')
136
+ corpus = {
137
+ 'text': [
138
+ "오늘은 날씨가 매우 덥습니다.",
139
+ "저는 음악을 듣는 것을 좋아합니다.",
140
+ "한국 음식 중에서 떡볶이가 제일 맛있습니다.",
141
+ "도서관에서 책을 읽는 건 좋은 취미입니다.",
142
+ "내일은 친구와 영화를 보러 갈 거예요.",
143
+ "여름 휴가 때 해변에 가서 수영하고 싶어요.",
144
+ "한국의 문화는 다양하고 흥미로워요.",
145
+ "피아노 연주는 나를 편안하게 해줍니다.",
146
+ "공원에서 산책하면 스트레스가 풀립니다.",
147
+ "요즘 드라마를 많이 시청하고 있어요.",
148
+ "커피가 일상에서 필수입니다.",
149
+ "새로운 언어를 배우는 것은 어려운 일이에요.",
150
+ "가을에 단풍 구경을 가고 싶어요.",
151
+ "요리를 만들면 집안이 좋아보입니다.",
152
+ "휴대폰 없이 하루를 보내는 것이 쉽지 않아요.",
153
+ "스포츠를 하면 건강에 좋습니다.",
154
+ "고양이와 개 중에 어떤 동물을 좋아하세요?"
155
+ "천천히 걸어가면서 풍경을 감상하는 것이 좋아요.",
156
+ "일주일에 한 번은 가족과 모임을 가요.",
157
+ "공부할 때 집중력을 높이는 방법이 있을까요?",
158
+ "봄에 꽃들이 피어날 때가 기대되요.",
159
+ "여행 가방을 챙기고 싶어서 설레여요.",
160
+ "사진 찍는 걸 좋아하는데, 카메라가 필요해요.",
161
+ "다음 주에 시험이 있어서 공부해야 해요.",
162
+ "운동을 하면 몸이 가벼워집니다.",
163
+ "좋은 책을 읽으면 마음이 풍요로워져요.",
164
+ "새로운 음악을 발견하면 기분이 좋아져요.",
165
+ "미술 전시회에 가면 예술을 감상할 수 있어요.",
166
+ "친구들과 함께 시간을 보내는 건 즐거워요.",
167
+ "자전거 타면 바람을 맞으면서 즐거워집니다."
168
+ ],
169
+ }
170
+ print(faiss.initialize_corpus(corpus=corpus, section='text', embedding_type='mean_pooling', save_path='/Users/dowon/Test/test.json'))
171
+ query = "오늘은 날씨가 매우 춥다."
172
+ top_k = 5
173
+ result = faiss.search(query, top_k)
174
+ print(result)
175
+
176
+ def faiss_test():
177
+ faiss = FaissSearch(model_name_or_path = 'klue/bert-base')
178
+ result = TextReader("/Users/dowon/Test/sentence1.txt").read()
179
+ id = 0
180
+
181
+ for i in result:
182
+ print(i)
183
+ i = i.replace('\n', '')
184
+ print(i)
185
+ i = "i am test"
186
+ print(faiss.get_embeddings(text=i, num_workers=10).detach().cpu().numpy())
187
+ id += 1
188
+ if id ==3:
189
+ break
190
+
191
+ def faiss_save_test():
192
+ faiss = FaissSearch(model_name_or_path = '/Users/dowon/test_model/trained_model/', tokenizer_name_or_path = '/Users/dowon/test_model/trained_model/')
193
+ faiss.load_dataset_from_json('/Users/dowon/Test/test.json')
194
+ faiss.embedding_type = 'mean_pooling'
195
+ # faiss.load_faiss_index(index_name='embeddings',file_path='/Users/dowon/Test/test_index.json')
196
+ faiss.add_faiss_index(column_name='embeddings')
197
+ query = "오늘은 날시가 매우 춥다."
198
+ top_k = 5
199
+ result = faiss.search(query, top_k)
200
+ print(result)
201
+
202
+
203
+ def demo10():
204
+ metric = DefaultMetric()
205
+ y_true = [1, 3, 3, 5, 5,1]
206
+ y_pred = [1, 2, 3, 4, 5,2]
207
+ str1 = "i am teacher"
208
+ str2 = "he is student"
209
+ print(metric.precision_score(y_true, y_pred, "macro"))
210
+
211
+ def demo11():
212
+ print("\nBegin Wasserstein distance demo ")
213
+
214
+ P = np.array([0.6, 0.1, 0.1, 0.1, 0.1])
215
+ Q1 = np.array([0.1, 0.1, 0.6, 0.1, 0.1])
216
+ Q2 = np.array([0.1, 0.1, 0.1, 0.1, 0.6])
217
+
218
+ P = torch.from_numpy(P)
219
+ Q1 = torch.from_numpy(Q1)
220
+ Q2 = torch.from_numpy(Q2)
221
+ kl_p_q1 = WassersteinDistance().compute_kullback(P, Q1)
222
+ kl_p_q2 = WassersteinDistance().compute_kullback(P, Q2)
223
+
224
+ wass_p_q1 = WassersteinDistance().compute_wasserstein(P, Q1)
225
+ wass_p_q2 = WassersteinDistance().compute_wasserstein(P, Q2)
226
+
227
+ jesson_p_q1 = WassersteinDistance().compute_jesson_shannon(P, Q1)
228
+ jesson_p_q2 = WassersteinDistance().compute_jesson_shannon(P, Q2)
229
+
230
+
231
+ print("\nKullback-Leibler distances: ")
232
+ print("P to Q1 : %0.4f " % kl_p_q1)
233
+ print("P to Q2 : %0.4f " % kl_p_q2)
234
+
235
+ print("\nWasserstein distances: ")
236
+ print("P to Q1 : %0.4f " % wass_p_q1)
237
+ print("P to Q2 : %0.4f " % wass_p_q2)
238
+
239
+ print("\nJesson-Shannon distances: ")
240
+ print("P to Q1 : %0.4f " % jesson_p_q1)
241
+ print("P to Q2 : %0.4f " % jesson_p_q2)
242
+
243
+ print("\nEnd demo ")
244
+
245
+ def demo12():
246
+ y_pred = [5, 2, 4, 1, 3, 2, 5, 6, 7]
247
+ y_true = [1, 3, 6, 7, 1, 5]
248
+
249
+ user = [[5, 3, 2], [9, 1, 2], [3, 5, 6], [7, 2, 1]]
250
+ h_pred = [[15, 6, 21, 3], [15, 77, 23, 14], [51, 23, 21, 2], [53, 2, 1, 5]]
251
+
252
+ metric = DefaultMetric()
253
+ print(metric.precision_at_k(y_true, y_pred, 3))
254
+ print(metric.recall_at_k(y_true,y_pred, 3))
255
+ print(metric.hit_rate_at_k(user, h_pred, 1))
256
+
257
+
258
+
259
+ class TextReader:
260
+ def __init__(self, path: str):
261
+ self.path = path
262
+
263
+ def read(self) -> List[str]:
264
+ with open(self.path, 'r') as f:
265
+ return f.readlines()
266
+
267
+
268
+ if __name__=="__main__":
269
+ # demo()
270
+ # demo2()
271
+ # demo3()
272
+ #demo4()
273
+ # demo5()
274
+ # demo6()
275
+ # demo7()
276
+ # demo8()
277
+ # demo9()
278
+ # faiss_test()
279
+ # faiss_save_test()
280
+ # demo10()
281
+ demo11()
282
+ #demo12()
@@ -0,0 +1,19 @@
1
+ from nltk.tag import EspressoTagger
2
+
3
+ if __name__ == '__main__':
4
+ sent = "나는 배가 고프다. 나는 아름다운 강산에 살고있다."
5
+ tagger = EspressoTagger()
6
+ print()
7
+ print(tagger.tag('pos', sent))
8
+ print("dependency :")
9
+ print(tagger.tag('dependency', sent))
10
+ print('ner :')
11
+ ner = tagger.tag('ner', sent)
12
+ print(ner)
13
+ print()
14
+ print()
15
+ print('wsd :')
16
+ print(tagger.tag('wsd', sent))
17
+ print()
18
+ #print('srl :')
19
+ #print(tagger.tag('srl', sent))
File without changes
File without changes
@@ -1,10 +1,10 @@
1
1
  from nltkor import alignment
2
2
  from nltkor import cider
3
3
  from nltkor import distance
4
+ from nltkor import sejong
4
5
  from nltkor import metrics
5
6
  from nltkor import misc
6
7
  from nltkor import search
7
- from nltkor import sejong
8
8
  from nltkor import similarity
9
9
  from nltkor import tag
10
10
  from nltkor import tokenize
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes