nltkor 1.2.18__tar.gz → 1.2.19__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {nltkor-1.2.18 → nltkor-1.2.19}/PKG-INFO +30 -8
- {nltkor-1.2.18 → nltkor-1.2.19}/README.md +9 -14
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/__init__.py +1 -1
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/search/__init__.py +1 -1
- nltkor-1.2.19/nltkor/search/test.py +25 -0
- nltkor-1.2.18/nltkor/search/search_dict.py → nltkor-1.2.19/nltkor/search/trie_search.py +10 -10
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/network.c +125 -125
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor.egg-info/PKG-INFO +30 -8
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor.egg-info/SOURCES.txt +2 -1
- {nltkor-1.2.18 → nltkor-1.2.19}/setup.py +1 -1
- {nltkor-1.2.18 → nltkor-1.2.19}/LICENSE.txt +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/Kor_char.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/alignment/__init__.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/cider/__init__.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/cider/cider.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/cider/cider_scorer.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/distance/__init__.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/distance/wasserstein.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/etc.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/lazyimport.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/make_requirement.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/metrics/__init__.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/metrics/bartscore.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/metrics/bertscore.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/metrics/classical.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/metrics/entment.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/metrics/eval.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/metrics/mauve.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/metrics/mauve_utils.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/misc/__init__.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/misc/string2string_basic_functions.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/misc/string2string_default_tokenizer.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/misc/string2string_hash_functions.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/misc/string2string_word_embeddings.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/search/classical.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/search/faiss_search.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/search/kobert_tokenizer.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/sejong/__init__.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/sejong/__pycache__/__init__.cpython-38.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/sejong/__pycache__/__init__.cpython-39.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/sejong/__pycache__/sejong_download.cpython-38.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/sejong/__pycache__/sejong_download.cpython-39.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/sejong/__pycache__/ssem.cpython-38.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/sejong/__pycache__/ssem.cpython-39.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/sejong/ch.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/sejong/dict_semClassNum.txt +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/sejong/layer.txt +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/sejong/sejong_download.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/sejong/ssem.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/similarity/__init__.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/similarity/bartscore____.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/similarity/bertscore____.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/similarity/classical.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/similarity/cosine_similarity.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/__init__.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/__pycache__/__init__.cpython-38.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/__pycache__/__init__.cpython-39.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/__pycache__/espresso_tag.cpython-38.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/__pycache__/espresso_tag.cpython-39.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/espresso_tag.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/__init__.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/__pycache__/__init__.cpython-38.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/__pycache__/__init__.cpython-39.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/__pycache__/attributes.cpython-38.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/__pycache__/attributes.cpython-39.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/__pycache__/config.cpython-38.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/__pycache__/config.cpython-39.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/__pycache__/metadata.cpython-38.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/__pycache__/metadata.cpython-39.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/__pycache__/reader.cpython-38.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/__pycache__/reader.cpython-39.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/__pycache__/taggers.cpython-38.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/__pycache__/taggers.cpython-39.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/__pycache__/utils.cpython-38.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/__pycache__/utils.cpython-39.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/__pycache__/word_dictionary.cpython-38.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/__pycache__/word_dictionary.cpython-39.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/arguments.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/attributes.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/config.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/metadata.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/ner/__init__.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/ner/__pycache__/__init__.cpython-38.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/ner/__pycache__/__init__.cpython-39.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/ner/__pycache__/ner_reader.cpython-38.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/ner/__pycache__/ner_reader.cpython-39.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/ner/macmorphoreader.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/ner/ner_reader.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/network.pyx +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/networkconv.pyx +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/networkdependencyconv.pyx +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/parse/__init__.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/parse/__pycache__/__init__.cpython-38.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/parse/__pycache__/__init__.cpython-39.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/parse/__pycache__/parse_reader.cpython-38.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/parse/__pycache__/parse_reader.cpython-39.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/parse/parse_reader.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/pos/__init__.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/pos/__pycache__/__init__.cpython-38.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/pos/__pycache__/__init__.cpython-39.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/pos/__pycache__/pos_reader.cpython-38.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/pos/__pycache__/pos_reader.cpython-39.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/pos/macmorphoreader.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/pos/pos_reader.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/reader.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/srl/__init__.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/srl/__pycache__/__init__.cpython-38.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/srl/__pycache__/__init__.cpython-39.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/srl/__pycache__/srl_reader.cpython-38.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/srl/__pycache__/srl_reader.cpython-39.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/srl/__pycache__/train_srl.cpython-38.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/srl/__pycache__/train_srl.cpython-39.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/srl/__srl_reader_.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/srl/srl_reader.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/srl/train_srl.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/taggers.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/utils.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/word_dictionary.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/wsd/__init__.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/wsd/__pycache__/__init__.cpython-38.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/wsd/__pycache__/__init__.cpython-39.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/wsd/__pycache__/wsd_reader.cpython-38.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/wsd/__pycache__/wsd_reader.cpython-39.pyc +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/wsd/macmorphoreader.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tag/libs/wsd/wsd_reader.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tokenize/__init__.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/tokenize/ko_tokenize.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor/trans.py +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor.egg-info/dependency_links.txt +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor.egg-info/requires.txt +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/nltkor.egg-info/top_level.txt +0 -0
- {nltkor-1.2.18 → nltkor-1.2.19}/setup.cfg +0 -0
@@ -1,11 +1,8 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.4
|
2
2
|
Name: nltkor
|
3
|
-
Version: 1.2.
|
4
|
-
Summary: UNKNOWN
|
3
|
+
Version: 1.2.19
|
5
4
|
Home-page: https://modi.changwon.ac.kr/air_cwnu/nlp_tool/nltk_ko.git
|
6
|
-
License: UNKNOWN
|
7
5
|
Keywords: string matching,pattern matching,edit distance,string to string correction,string to string matching,Levenshtein edit distance,Hamming distance,Damerau-Levenshtein distance,Jaro-Winkler distance,longest common subsequence,longest common substring,dynamic programming,approximate string matching,semantic similarity,natural language processing,NLP,information retrieval,rouge,sacrebleu,bertscore,bartscore,fasttext,glove,cosine similarity,Smith-Waterman,Needleman-Wunsch,Hirschberg,Karp-Rabin,Knuth-Morris-Pratt,Boyer-Moore
|
8
|
-
Platform: UNKNOWN
|
9
6
|
Classifier: Programming Language :: Python :: 3.7
|
10
7
|
Classifier: Programming Language :: Python :: 3.8
|
11
8
|
Classifier: Programming Language :: Python :: 3.9
|
@@ -15,6 +12,31 @@ Classifier: Operating System :: OS Independent
|
|
15
12
|
Classifier: Typing :: Typed
|
16
13
|
Requires-Python: >=3.7
|
17
14
|
License-File: LICENSE.txt
|
18
|
-
|
19
|
-
|
20
|
-
|
15
|
+
Requires-Dist: Cython
|
16
|
+
Requires-Dist: numpy<=1.26.4,>=1.23.5
|
17
|
+
Requires-Dist: regex
|
18
|
+
Requires-Dist: tqdm>=4.40.0
|
19
|
+
Requires-Dist: joblib
|
20
|
+
Requires-Dist: requests
|
21
|
+
Requires-Dist: nltk>3.0
|
22
|
+
Requires-Dist: pyarrow
|
23
|
+
Requires-Dist: beautifulSoup4
|
24
|
+
Requires-Dist: faiss-cpu==1.7.3
|
25
|
+
Requires-Dist: datasets
|
26
|
+
Requires-Dist: torch
|
27
|
+
Requires-Dist: dill<0.3.9
|
28
|
+
Requires-Dist: scikit-learn>=0.22.1
|
29
|
+
Requires-Dist: transformers==4.42.2
|
30
|
+
Requires-Dist: protobuf
|
31
|
+
Requires-Dist: sentencepiece
|
32
|
+
Requires-Dist: pandas
|
33
|
+
Requires-Dist: bert_score
|
34
|
+
Requires-Dist: chardet
|
35
|
+
Requires-Dist: GPUtil
|
36
|
+
Requires-Dist: fasttext
|
37
|
+
Dynamic: classifier
|
38
|
+
Dynamic: home-page
|
39
|
+
Dynamic: keywords
|
40
|
+
Dynamic: license-file
|
41
|
+
Dynamic: requires-dist
|
42
|
+
Dynamic: requires-python
|
@@ -110,18 +110,13 @@ NLTKor는 한국어를 위한 NLTK이며 기존의 영어에서 사용하는 Wor
|
|
110
110
|
|
111
111
|
## 2. 사용 환경
|
112
112
|
|
113
|
-
- 운영체제 : ubuntu 18.04, ubuntu 22.04, MacOS
|
113
|
+
- 운영체제 : ubuntu 18.04, ubuntu 22.04, MacOS, Windows
|
114
114
|
- 언어 : `python3.8`, `python3.9`, `python3.10`, `python3.11`
|
115
115
|
- 라이브러리 : nltk>=1.1.3, numpy==1.23, faiss-cpu=1.7.3 **※ 해당 NLTKor는 영어 NLTK를 별도로 인스톨해야 함.**
|
116
116
|
|
117
117
|
**주의사항**
|
118
118
|
|
119
|
-
-
|
120
|
-
|
121
|
-
| OS | python | 아키텍처 |
|
122
|
-
| ------ | ----------------------------------------- | ------------- |
|
123
|
-
| Mac | python3.8 | arm64 |
|
124
|
-
| ubuntu | python3.8 python3.9 python3.10 python3.11 | arm64, x86_64 |
|
119
|
+
- Windows 환경에서 python 3.9~3.11을 사용할 경우 fasttext 라이브러리가 지원되지 않습니다. 대신 fasttext-wheel 라이브러리를 사용해야 합니다(pip install fasttext-wheel).
|
125
120
|
|
126
121
|
### 2.1 라이브러리 설치
|
127
122
|
|
@@ -1674,27 +1669,27 @@ Adding FAISS index...
|
|
1674
1669
|
|
1675
1670
|
```python
|
1676
1671
|
root = {}
|
1677
|
-
dict_file = '
|
1678
|
-
sc =
|
1672
|
+
dict_file = '텍스트파일 경로'
|
1673
|
+
sc = TRIESearch(root)
|
1679
1674
|
with open(dict_file, 'r') as f:
|
1680
1675
|
for line in f:
|
1681
1676
|
if ';;' in line[:2]: continue
|
1682
1677
|
k, v = line.strip().split('\t')
|
1683
|
-
sc.
|
1678
|
+
sc.build_trie_search(k, v)
|
1684
1679
|
# print(root)
|
1685
1680
|
word = '고용 노동부'
|
1686
|
-
values, value_data = sc.
|
1681
|
+
values, value_data = sc.trie_search(word, True)
|
1687
1682
|
print(values, value_data)
|
1688
1683
|
|
1689
1684
|
word = '2시뉴스외전'
|
1690
|
-
values, value_data = sc.
|
1685
|
+
values, value_data = sc.trie_search( word, True)
|
1691
1686
|
print(values, value_data)
|
1692
1687
|
word = '2시 뉴스외전'
|
1693
|
-
values, value_data = sc.
|
1688
|
+
values, value_data = sc.trie_search( word, True)
|
1694
1689
|
print(values, value_data)
|
1695
1690
|
|
1696
1691
|
word = 'gbc'
|
1697
|
-
values, value_data = sc.
|
1692
|
+
values, value_data = sc.trie_search( word, True)
|
1698
1693
|
print(values, value_data)
|
1699
1694
|
```
|
1700
1695
|
**결과**
|
@@ -0,0 +1,25 @@
|
|
1
|
+
from trie_search import TRIESearch
|
2
|
+
|
3
|
+
root = {}
|
4
|
+
dict_file = '/Users/chanhyeok/Downloads/lexicon.txt'
|
5
|
+
sc = TRIESearch(root)
|
6
|
+
with open(dict_file, 'r') as f:
|
7
|
+
for line in f:
|
8
|
+
if ';;' in line[:2]: continue
|
9
|
+
k, v = line.strip().split('\t')
|
10
|
+
sc.build_trie_search(k, v)
|
11
|
+
# print(root)
|
12
|
+
word = '고용 노동부'
|
13
|
+
values, value_data = sc.trie_search(word, True)
|
14
|
+
print(values, value_data)
|
15
|
+
|
16
|
+
word = '2시뉴스외전'
|
17
|
+
values, value_data = sc.trie_search( word, True)
|
18
|
+
print(values, value_data)
|
19
|
+
word = '2시 뉴스외전'
|
20
|
+
values, value_data = sc.trie_search( word, True)
|
21
|
+
print(values, value_data)
|
22
|
+
|
23
|
+
word = 'gbc'
|
24
|
+
values, value_data = sc.trie_search( word, True)
|
25
|
+
print(values, value_data)
|
@@ -4,11 +4,11 @@ import numpy as np
|
|
4
4
|
import json
|
5
5
|
import argparse
|
6
6
|
|
7
|
-
class
|
7
|
+
class TRIESearch :
|
8
8
|
def __init__ (self,root) :
|
9
9
|
self.root = root
|
10
10
|
|
11
|
-
def
|
11
|
+
def build_trie_search(self, word, data) -> dict:
|
12
12
|
current_dict = self.root
|
13
13
|
_end_word_ = '$$'
|
14
14
|
for letter in word:
|
@@ -19,7 +19,7 @@ class SearchDic :
|
|
19
19
|
|
20
20
|
|
21
21
|
|
22
|
-
def
|
22
|
+
def trie_search(self, word, space_flag=False):
|
23
23
|
'''
|
24
24
|
TRIE 탐색
|
25
25
|
space_flag: if True then including space, otherwise do not including space
|
@@ -69,27 +69,27 @@ class SearchDic :
|
|
69
69
|
return pickle.load(f)
|
70
70
|
if __name__ == "__main__":
|
71
71
|
root = {}
|
72
|
-
dict_file = '
|
73
|
-
sc =
|
72
|
+
dict_file = '텍스트파일 경로'
|
73
|
+
sc = TRIESearch(root)
|
74
74
|
with open(dict_file, 'r') as f:
|
75
75
|
for line in f:
|
76
76
|
if ';;' in line[:2]: continue
|
77
77
|
k, v = line.strip().split('\t')
|
78
|
-
sc.
|
78
|
+
sc.build_trie_search(k, v)
|
79
79
|
# print(root)
|
80
80
|
word = '고용 노동부'
|
81
|
-
values, value_data = sc.
|
81
|
+
values, value_data = sc.trie_search(word, True)
|
82
82
|
print(values, value_data)
|
83
83
|
|
84
84
|
word = '2시뉴스외전'
|
85
|
-
values, value_data = sc.
|
85
|
+
values, value_data = sc.trie_search( word, True)
|
86
86
|
print(values, value_data)
|
87
87
|
word = '2시 뉴스외전'
|
88
|
-
values, value_data = sc.
|
88
|
+
values, value_data = sc.trie_search( word, True)
|
89
89
|
print(values, value_data)
|
90
90
|
|
91
91
|
word = 'gbc'
|
92
|
-
values, value_data = sc.
|
92
|
+
values, value_data = sc.trie_search( word, True)
|
93
93
|
print(values, value_data)
|
94
94
|
|
95
95
|
|