nltkor 1.2.17__tar.gz → 1.2.19__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. {nltkor-1.2.17 → nltkor-1.2.19}/PKG-INFO +8 -2
  2. {nltkor-1.2.17 → nltkor-1.2.19}/README.md +42 -7
  3. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/__init__.py +1 -1
  4. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/metrics/__init__.py +0 -1
  5. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/search/__init__.py +2 -1
  6. nltkor-1.2.19/nltkor/search/test.py +25 -0
  7. nltkor-1.2.19/nltkor/search/trie_search.py +95 -0
  8. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/network.c +24404 -27780
  9. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor.egg-info/PKG-INFO +8 -2
  10. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor.egg-info/SOURCES.txt +2 -1
  11. {nltkor-1.2.17 → nltkor-1.2.19}/setup.py +1 -1
  12. nltkor-1.2.17/nltkor/metrics/bleu_tensor.py +0 -20
  13. {nltkor-1.2.17 → nltkor-1.2.19}/LICENSE.txt +0 -0
  14. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/Kor_char.py +0 -0
  15. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/alignment/__init__.py +0 -0
  16. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/cider/__init__.py +0 -0
  17. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/cider/cider.py +0 -0
  18. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/cider/cider_scorer.py +0 -0
  19. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/distance/__init__.py +0 -0
  20. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/distance/wasserstein.py +0 -0
  21. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/etc.py +0 -0
  22. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/lazyimport.py +0 -0
  23. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/make_requirement.py +0 -0
  24. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/metrics/bartscore.py +0 -0
  25. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/metrics/bertscore.py +0 -0
  26. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/metrics/classical.py +0 -0
  27. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/metrics/entment.py +0 -0
  28. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/metrics/eval.py +0 -0
  29. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/metrics/mauve.py +0 -0
  30. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/metrics/mauve_utils.py +0 -0
  31. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/misc/__init__.py +0 -0
  32. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/misc/string2string_basic_functions.py +0 -0
  33. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/misc/string2string_default_tokenizer.py +0 -0
  34. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/misc/string2string_hash_functions.py +0 -0
  35. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/misc/string2string_word_embeddings.py +0 -0
  36. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/search/classical.py +0 -0
  37. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/search/faiss_search.py +0 -0
  38. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/search/kobert_tokenizer.py +0 -0
  39. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/sejong/__init__.py +0 -0
  40. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/sejong/__pycache__/__init__.cpython-38.pyc +0 -0
  41. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/sejong/__pycache__/__init__.cpython-39.pyc +0 -0
  42. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/sejong/__pycache__/sejong_download.cpython-38.pyc +0 -0
  43. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/sejong/__pycache__/sejong_download.cpython-39.pyc +0 -0
  44. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/sejong/__pycache__/ssem.cpython-38.pyc +0 -0
  45. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/sejong/__pycache__/ssem.cpython-39.pyc +0 -0
  46. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/sejong/ch.py +0 -0
  47. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/sejong/dict_semClassNum.txt +0 -0
  48. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/sejong/layer.txt +0 -0
  49. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/sejong/sejong_download.py +0 -0
  50. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/sejong/ssem.py +0 -0
  51. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/similarity/__init__.py +0 -0
  52. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/similarity/bartscore____.py +0 -0
  53. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/similarity/bertscore____.py +0 -0
  54. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/similarity/classical.py +0 -0
  55. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/similarity/cosine_similarity.py +0 -0
  56. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/__init__.py +0 -0
  57. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/__pycache__/__init__.cpython-38.pyc +0 -0
  58. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/__pycache__/__init__.cpython-39.pyc +0 -0
  59. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/__pycache__/espresso_tag.cpython-38.pyc +0 -0
  60. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/__pycache__/espresso_tag.cpython-39.pyc +0 -0
  61. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/espresso_tag.py +0 -0
  62. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/__init__.py +0 -0
  63. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/__pycache__/__init__.cpython-38.pyc +0 -0
  64. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/__pycache__/__init__.cpython-39.pyc +0 -0
  65. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/__pycache__/attributes.cpython-38.pyc +0 -0
  66. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/__pycache__/attributes.cpython-39.pyc +0 -0
  67. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/__pycache__/config.cpython-38.pyc +0 -0
  68. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/__pycache__/config.cpython-39.pyc +0 -0
  69. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/__pycache__/metadata.cpython-38.pyc +0 -0
  70. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/__pycache__/metadata.cpython-39.pyc +0 -0
  71. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/__pycache__/reader.cpython-38.pyc +0 -0
  72. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/__pycache__/reader.cpython-39.pyc +0 -0
  73. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/__pycache__/taggers.cpython-38.pyc +0 -0
  74. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/__pycache__/taggers.cpython-39.pyc +0 -0
  75. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/__pycache__/utils.cpython-38.pyc +0 -0
  76. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/__pycache__/utils.cpython-39.pyc +0 -0
  77. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/__pycache__/word_dictionary.cpython-38.pyc +0 -0
  78. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/__pycache__/word_dictionary.cpython-39.pyc +0 -0
  79. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/arguments.py +0 -0
  80. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/attributes.py +0 -0
  81. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/config.py +0 -0
  82. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/metadata.py +0 -0
  83. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/ner/__init__.py +0 -0
  84. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/ner/__pycache__/__init__.cpython-38.pyc +0 -0
  85. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/ner/__pycache__/__init__.cpython-39.pyc +0 -0
  86. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/ner/__pycache__/ner_reader.cpython-38.pyc +0 -0
  87. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/ner/__pycache__/ner_reader.cpython-39.pyc +0 -0
  88. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/ner/macmorphoreader.py +0 -0
  89. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/ner/ner_reader.py +0 -0
  90. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/network.pyx +0 -0
  91. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/networkconv.pyx +0 -0
  92. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/networkdependencyconv.pyx +0 -0
  93. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/parse/__init__.py +0 -0
  94. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/parse/__pycache__/__init__.cpython-38.pyc +0 -0
  95. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/parse/__pycache__/__init__.cpython-39.pyc +0 -0
  96. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/parse/__pycache__/parse_reader.cpython-38.pyc +0 -0
  97. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/parse/__pycache__/parse_reader.cpython-39.pyc +0 -0
  98. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/parse/parse_reader.py +0 -0
  99. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/pos/__init__.py +0 -0
  100. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/pos/__pycache__/__init__.cpython-38.pyc +0 -0
  101. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/pos/__pycache__/__init__.cpython-39.pyc +0 -0
  102. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/pos/__pycache__/pos_reader.cpython-38.pyc +0 -0
  103. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/pos/__pycache__/pos_reader.cpython-39.pyc +0 -0
  104. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/pos/macmorphoreader.py +0 -0
  105. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/pos/pos_reader.py +0 -0
  106. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/reader.py +0 -0
  107. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/srl/__init__.py +0 -0
  108. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/srl/__pycache__/__init__.cpython-38.pyc +0 -0
  109. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/srl/__pycache__/__init__.cpython-39.pyc +0 -0
  110. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/srl/__pycache__/srl_reader.cpython-38.pyc +0 -0
  111. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/srl/__pycache__/srl_reader.cpython-39.pyc +0 -0
  112. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/srl/__pycache__/train_srl.cpython-38.pyc +0 -0
  113. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/srl/__pycache__/train_srl.cpython-39.pyc +0 -0
  114. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/srl/__srl_reader_.py +0 -0
  115. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/srl/srl_reader.py +0 -0
  116. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/srl/train_srl.py +0 -0
  117. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/taggers.py +0 -0
  118. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/utils.py +0 -0
  119. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/word_dictionary.py +0 -0
  120. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/wsd/__init__.py +0 -0
  121. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/wsd/__pycache__/__init__.cpython-38.pyc +0 -0
  122. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/wsd/__pycache__/__init__.cpython-39.pyc +0 -0
  123. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/wsd/__pycache__/wsd_reader.cpython-38.pyc +0 -0
  124. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/wsd/__pycache__/wsd_reader.cpython-39.pyc +0 -0
  125. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/wsd/macmorphoreader.py +0 -0
  126. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tag/libs/wsd/wsd_reader.py +0 -0
  127. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tokenize/__init__.py +0 -0
  128. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/tokenize/ko_tokenize.py +0 -0
  129. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor/trans.py +0 -0
  130. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor.egg-info/dependency_links.txt +0 -0
  131. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor.egg-info/requires.txt +0 -0
  132. {nltkor-1.2.17 → nltkor-1.2.19}/nltkor.egg-info/top_level.txt +0 -0
  133. {nltkor-1.2.17 → nltkor-1.2.19}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: nltkor
3
- Version: 1.2.17
3
+ Version: 1.2.19
4
4
  Home-page: https://modi.changwon.ac.kr/air_cwnu/nlp_tool/nltk_ko.git
5
5
  Keywords: string matching,pattern matching,edit distance,string to string correction,string to string matching,Levenshtein edit distance,Hamming distance,Damerau-Levenshtein distance,Jaro-Winkler distance,longest common subsequence,longest common substring,dynamic programming,approximate string matching,semantic similarity,natural language processing,NLP,information retrieval,rouge,sacrebleu,bertscore,bartscore,fasttext,glove,cosine similarity,Smith-Waterman,Needleman-Wunsch,Hirschberg,Karp-Rabin,Knuth-Morris-Pratt,Boyer-Moore
6
6
  Classifier: Programming Language :: Python :: 3.7
@@ -34,3 +34,9 @@ Requires-Dist: bert_score
34
34
  Requires-Dist: chardet
35
35
  Requires-Dist: GPUtil
36
36
  Requires-Dist: fasttext
37
+ Dynamic: classifier
38
+ Dynamic: home-page
39
+ Dynamic: keywords
40
+ Dynamic: license-file
41
+ Dynamic: requires-dist
42
+ Dynamic: requires-python
@@ -8,6 +8,8 @@
8
8
  | 2 | 2024.5.22 | 차정원 | NLTKo 1.1.0 공개 |
9
9
  | 3 | 2025.2.5 | 이예나 | NLTKor 1.2.0 공개<br> bleu tensor 추가, entment 추가, accurancy norm 추가 |
10
10
  | 4 | 2025.4.3 | 이예나 | NLTKor 1.2.10 업데이트<br> espresso 오류 수정 |
11
+ | 5 | 2025.5.21 | 정찬혁 | NLTKor 1.2.18 업데이트<br> TRIE 검색 추가|
12
+
11
13
 
12
14
 
13
15
 
@@ -89,6 +91,7 @@
89
91
  - [12.3 KMP 검색 알고리즘](#123-kmp-검색)
90
92
  - [12.4 Boyer-Moore 검색 알고리즘](#124-boyer-moore-검색)
91
93
  - [12.5 Faiss-Semantic 검색](#125-faiss-semantic-검색)
94
+ - [12.6 TRIE 검색](#126-trie-검색)
92
95
  - [13. 세종전자사전 (ssem)](#13-세종전자사전-ssem)
93
96
  - [13.1 객체 확인 방법](#131-객체-확인-방법)
94
97
  - [13.2 entry 접근법](#132-entry-접근법)
@@ -107,18 +110,13 @@ NLTKor는 한국어를 위한 NLTK이며 기존의 영어에서 사용하는 Wor
107
110
 
108
111
  ## 2. 사용 환경
109
112
 
110
- - 운영체제 : ubuntu 18.04, ubuntu 22.04, MacOS
113
+ - 운영체제 : ubuntu 18.04, ubuntu 22.04, MacOS, Windows
111
114
  - 언어 : `python3.8`, `python3.9`, `python3.10`, `python3.11`
112
115
  - 라이브러리 : nltk>=1.1.3, numpy==1.23, faiss-cpu=1.7.3 **※ 해당 NLTKor는 영어 NLTK를 별도로 인스톨해야 함.**
113
116
 
114
117
  **주의사항**
115
118
 
116
- - Espresso5의 EspressoTagger의 사용가능 환경은 다음과 같다.
117
-
118
- | OS | python | 아키텍처 |
119
- | ------ | ----------------------------------------- | ------------- |
120
- | Mac | python3.8 | arm64 |
121
- | ubuntu | python3.8 python3.9 python3.10 python3.11 | arm64, x86_64 |
119
+ - Windows 환경에서 python 3.9~3.11을 사용할 경우 fasttext 라이브러리가 지원되지 않습니다. 대신 fasttext-wheel 라이브러리를 사용해야 합니다(pip install fasttext-wheel).
122
120
 
123
121
  ### 2.1 라이브러리 설치
124
122
 
@@ -609,6 +607,7 @@ accuracy = correct / len(examples)
609
607
  print(f"Accuracy: {accuracy * 100:.2f}%")
610
608
  print(f"Time: {sum(inference_times)/len(inference_times)}, memory: {sum(memory_usages)/len(memory_usages)}")
611
609
  ```
610
+ **결과**
612
611
  ```
613
612
  Accuracy: 20.00
614
613
  Time: 0.05374705195426941, memory: 1409.9
@@ -1665,6 +1664,42 @@ Adding FAISS index...
1665
1664
  4 피아노 연주는 나를 편안하게 해줍니다. [-0.242319867, 0.6492734551, -1.4172941446, 0.... 34.069862
1666
1665
  ```
1667
1666
 
1667
+ #### 12.6 TRIE 검색
1668
+ - 텍스트 파일에 word가 포함되어 있는지 판단한다.
1669
+
1670
+ ```python
1671
+ root = {}
1672
+ dict_file = '텍스트파일 경로'
1673
+ sc = TRIESearch(root)
1674
+ with open(dict_file, 'r') as f:
1675
+ for line in f:
1676
+ if ';;' in line[:2]: continue
1677
+ k, v = line.strip().split('\t')
1678
+ sc.build_trie_search(k, v)
1679
+ # print(root)
1680
+ word = '고용 노동부'
1681
+ values, value_data = sc.trie_search(word, True)
1682
+ print(values, value_data)
1683
+
1684
+ word = '2시뉴스외전'
1685
+ values, value_data = sc.trie_search( word, True)
1686
+ print(values, value_data)
1687
+ word = '2시 뉴스외전'
1688
+ values, value_data = sc.trie_search( word, True)
1689
+ print(values, value_data)
1690
+
1691
+ word = 'gbc'
1692
+ values, value_data = sc.trie_search( word, True)
1693
+ print(values, value_data)
1694
+ ```
1695
+ **결과**
1696
+ ```
1697
+ ['고용 노동부'] ['NN']
1698
+ ['2시뉴스외전'] ['NN']
1699
+ ['2시 뉴스외전'] ['NN']
1700
+ ['bc'] ['ND']
1701
+ ```
1702
+
1668
1703
  ### 13. 세종전자사전 (ssem)
1669
1704
 
1670
1705
  우선 해당 기능을 사용하기 전에 인자 포맷에 대해 설명한다. 인자는 **entrys, entry, sense** 함수에서 사용한다. 인자 포맷을 설명하기 위해 예제는 체언의 '눈'과 용언의 '감다'를 이용하였다.
@@ -13,4 +13,4 @@ from nltkor import trans
13
13
  from nltkor import Kor_char
14
14
  from nltkor import etc
15
15
 
16
- __version__ = '1.2.17'
16
+ __version__ = '1.2.19'
@@ -53,7 +53,6 @@ from nltkor.metrics.eval import StringMetric
53
53
  """
54
54
  from nltkor.metrics.classical import DefaultMetric
55
55
  from nltkor.metrics.entment import EMR
56
- from nltkor.metrics.bleu_tensor import *
57
56
  #DefaultMetric = lazy_import.lazy_callable("nltkor.metrics.classical.DefaultMetric")
58
57
  #Mauve = lazy_import.lazy_callable("nltkor.metrics.mauve.Mauve")
59
58
  from nltkor.metrics.mauve import Mauve
@@ -7,4 +7,5 @@ from .classical import (
7
7
  BoyerMooreSearch,
8
8
  )
9
9
  from .faiss_search import FaissSearch
10
- from .kobert_tokenizer import KoBERTTokenizer
10
+ from .kobert_tokenizer import KoBERTTokenizer
11
+ from .trie_search import TRIESearch
@@ -0,0 +1,25 @@
1
+ from trie_search import TRIESearch
2
+
3
+ root = {}
4
+ dict_file = '/Users/chanhyeok/Downloads/lexicon.txt'
5
+ sc = TRIESearch(root)
6
+ with open(dict_file, 'r') as f:
7
+ for line in f:
8
+ if ';;' in line[:2]: continue
9
+ k, v = line.strip().split('\t')
10
+ sc.build_trie_search(k, v)
11
+ # print(root)
12
+ word = '고용 노동부'
13
+ values, value_data = sc.trie_search(word, True)
14
+ print(values, value_data)
15
+
16
+ word = '2시뉴스외전'
17
+ values, value_data = sc.trie_search( word, True)
18
+ print(values, value_data)
19
+ word = '2시 뉴스외전'
20
+ values, value_data = sc.trie_search( word, True)
21
+ print(values, value_data)
22
+
23
+ word = 'gbc'
24
+ values, value_data = sc.trie_search( word, True)
25
+ print(values, value_data)
@@ -0,0 +1,95 @@
1
+ import re, os, sys
2
+ import pandas as pd
3
+ import numpy as np
4
+ import json
5
+ import argparse
6
+
7
+ class TRIESearch :
8
+ def __init__ (self,root) :
9
+ self.root = root
10
+
11
+ def build_trie_search(self, word, data) -> dict:
12
+ current_dict = self.root
13
+ _end_word_ = '$$'
14
+ for letter in word:
15
+
16
+ current_dict = current_dict.setdefault(letter, {})
17
+ current_dict = current_dict.setdefault(_end_word_, data)
18
+
19
+
20
+
21
+
22
+ def trie_search(self, word, space_flag=False):
23
+ '''
24
+ TRIE 탐색
25
+ space_flag: if True then including space, otherwise do not including space
26
+ '''
27
+
28
+ values = list()
29
+ value_data = list()
30
+ if not word: return self.root.keys()
31
+
32
+ current_dict = self.root
33
+ _end_word_ = '$$'
34
+ SPACE = ' '
35
+ s = 0
36
+ for i, letter in enumerate(word):
37
+ #print(i, s, '>', letter, values, value_data, current_dict)
38
+ if letter in current_dict:
39
+ #print('\t', letter, values, value_data, current_dict)
40
+ current_dict = current_dict[letter]
41
+ if _end_word_ in current_dict :
42
+ values.append(word[s:i+1])
43
+ value_data.append(current_dict[_end_word_])
44
+ elif space_flag and letter != SPACE and SPACE in current_dict:
45
+ look_ahead_dict = current_dict[SPACE]
46
+ # print('\t==', i, letter, values, look_ahead_dict)
47
+ if letter in look_ahead_dict:
48
+ current_dict = look_ahead_dict[letter]
49
+ elif space_flag and letter == SPACE:
50
+ # print('\t##', i, letter, word[i+1], values)
51
+ continue
52
+ else:
53
+ # print('\t@@', i, letter, values)
54
+ s = i+1
55
+ current_dict = self.root
56
+ else:
57
+ if values: return values, value_data
58
+ else: return list(word), value_data
59
+
60
+
61
+ def save_dict(self, file_path):
62
+ # root dictionary를 pickle 파일로 저장
63
+ with open(file_path, 'wb') as f:
64
+ pickle.dump(self.root, f)
65
+
66
+ def load_dict(self,file_path) -> dict:
67
+ # pickle 퍄일을 읽어들인다.
68
+ with open(file_path, 'rb') as f:
69
+ return pickle.load(f)
70
+ if __name__ == "__main__":
71
+ root = {}
72
+ dict_file = '텍스트파일 경로'
73
+ sc = TRIESearch(root)
74
+ with open(dict_file, 'r') as f:
75
+ for line in f:
76
+ if ';;' in line[:2]: continue
77
+ k, v = line.strip().split('\t')
78
+ sc.build_trie_search(k, v)
79
+ # print(root)
80
+ word = '고용 노동부'
81
+ values, value_data = sc.trie_search(word, True)
82
+ print(values, value_data)
83
+
84
+ word = '2시뉴스외전'
85
+ values, value_data = sc.trie_search( word, True)
86
+ print(values, value_data)
87
+ word = '2시 뉴스외전'
88
+ values, value_data = sc.trie_search( word, True)
89
+ print(values, value_data)
90
+
91
+ word = 'gbc'
92
+ values, value_data = sc.trie_search( word, True)
93
+ print(values, value_data)
94
+
95
+