PyPI - nltkor - Versions diffs - 1.2.15__tar.gz → 1.2.16__tar.gz - Mend

nltkor 1.2.15tar.gz → 1.2.16tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (133) hide show

{nltkor-1.2.15 → nltkor-1.2.16}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nltkor
-Version: 1.2.15
+Version: 1.2.16
 Home-page: https://modi.changwon.ac.kr/air_cwnu/nlp_tool/nltk_ko.git
 Keywords: string matching,pattern matching,edit distance,string to string correction,string to string matching,Levenshtein edit distance,Hamming distance,Damerau-Levenshtein distance,Jaro-Winkler distance,longest common subsequence,longest common substring,dynamic programming,approximate string matching,semantic similarity,natural language processing,NLP,information retrieval,rouge,sacrebleu,bertscore,bartscore,fasttext,glove,cosine similarity,Smith-Waterman,Needleman-Wunsch,Hirschberg,Karp-Rabin,Knuth-Morris-Pratt,Boyer-Moore
 Classifier: Programming Language :: Python :: 3.7

{nltkor-1.2.15 → nltkor-1.2.16}/README.md RENAMED Viewed

@@ -910,11 +910,18 @@ TF-IDF를 n-gram에 대한 가중치로 계산하고 참조 캡션과 생성 캡
 0.6303797468354431
 ```
-#### 5.14 EntMent
+#### 5.14 EMR(Entity Mention Recall)
-- EntMent (Entity Mention Recall)
-  : 요약된 텍스트에 포함된 고유 엔터티의 참조 비율
+요약된 텍스트가 참조 문서에 등장하는 중요 개체를 얼마나 잘 유지하고 있는지에 대한 평가 지표이다.
+```python
+>>> # -*- coding: utf-8 -*-
+>>> from nltkor.metrics import EntMent
+>>> EntMent().entity("국립창원대학교(총장 박민원)가 사천우주항공캠퍼스 개교와 함께 2025학년도 사천우주항공공학부 입학식을 7일 오전 11시 사천우주항공캠퍼스에서 열었다.이날 행사에는 박민원 총장을 비롯해 국민의힘 서천호 국회의원(사천·남해·하동), 윤영빈 우주항공청장, 박동식 사천시장, 김규헌 사천시의회 의장, 지역 유관기관 관계자들과 신입생 및 가족들이 참석했다. 글로컬대학30사업 선정에 따라 국립창원대와 통합을 추진 중인 경남도립거창대학, 경남도립남해대학 관계자도 함께 자리했다.행사는 1부 현판 제막식과 2부 입학식으로 진행됐으며, 박동식 사천시장은 신입생들에게 축하 선물로 금배지를 전달했고, 박민원 총장은 캠퍼스 설립에 기여한 유공자들에게 표창장을 수여했다.","국립창원대학교는 4월 7일 사천우주항공캠퍼스에서 2025학년도 사천우주항공공학부 입학식을 개최했다. 이날 행사에는 박민원 총장, 서천호 국회의원, 윤영빈 우주항공청장, 박동식 사천시장 등 주요 인사와 신입생 및 가족들이 참석했으며, 글로컬대학30사업과 관련된 거창대학·남해대학 관계자들도 함께했다. 행사는 현판 제막식과 입학식으로 나뉘어 진행되었고, 신입생들에게는 금배지가, 캠퍼스 설립 유공자들에게는 표창장이 수여되었다.")
+Downloading Espresso5 model...
+0.8888888888888888
+```
 ### 6 확장 평가 함수

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/__init__.py RENAMED Viewed

@@ -13,4 +13,4 @@ from nltkor import trans
 from nltkor import Kor_char
 from nltkor import etc
-__version__ = '1.2.15'
+__version__ = '1.2.16'

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/metrics/__init__.py RENAMED Viewed

@@ -52,7 +52,7 @@ from nltk.metrics.aline import align
 from nltkor.metrics.eval import StringMetric
 """
 from nltkor.metrics.classical import DefaultMetric
-from nltkor.metrics.entment import EntMent
+from nltkor.metrics.entment import EMR
 from nltkor.metrics.bleu_tensor import *
 #DefaultMetric = lazy_import.lazy_callable("nltkor.metrics.classical.DefaultMetric")
 #Mauve = lazy_import.lazy_callable("nltkor.metrics.mauve.Mauve")

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/metrics/entment.py RENAMED Viewed

@@ -1,6 +1,6 @@
 from nltkor.tag import EspressoTagger
-class EntMent :
+class EMR :
     def __init__(self):
         self.entity_list = []

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/search/faiss_search.py RENAMED Viewed

@@ -33,11 +33,12 @@ SOFTWARE.
 This module contains a wrapper for the Faiss library by Facebook AI Research.
 """
-from collections import Counter
+from collections import Counter
 from typing import List, Union, Optional, Dict, Any
 import os
 import copy
 import logging
+import transformers
 os.environ["TOKENIZERS_PARALLELISM"] = "false"
 from nltkor.make_requirement import make_requirement
@@ -70,24 +71,28 @@ class FaissSearch:
             mode = None,
             model_name_or_path: str = 'klue/bert-base',
             tokenizer_name_or_path: str = 'klue/bert-base',
+            embedding_type: str = 'last_hidden_state',
             device: str = 'cpu'
             ) -> None:
         if mode == 'sentence':
-            return FaissSearch_SenEmbed(model_name_or_path)
+            return FaissSearch_SenEmbed(model_name_or_path=model_name_or_path, embedding_type=embedding_type)
         elif mode == 'word':
-            return FaissSearch_WordEmbed(model_name_or_path)
+            return FaissSearch_WordEmbed(model_name_or_path=model_name_or_path, embedding_type=embedding_type)
+        elif mode == 'splade':
+            return FaissSearch_Splade(model_name_or_path=model_name_or_path, embedding_type=embedding_type)
         else:
-            raise ValueError("choice 'sentence' or 'word'")
+            raise ValueError("choice 'sentence' or 'word' or 'splade'")
-# FAISS original library wrapper class
 class FaissSearch_SenEmbed:
     def __init__(self,
         model_name_or_path: str = 'klue/bert-base',
         tokenizer_name_or_path: str = 'klue/bert-base',
+        embedding_type: str = 'last_hidden_state',
         device: str = 'cpu',
         ) -> None:
-        r"""
+        """
         This function initializes the wrapper for the FAISS library, which is used to perform semantic search.
@@ -143,8 +148,7 @@ class FaissSearch_SenEmbed:
         # Initialize the dataset
         self.dataset = None
     # Auxiliary function to get the last hidden state
     def get_last_hidden_state(self,
         embeddings: torch.Tensor,
@@ -166,7 +170,6 @@ class FaissSearch_SenEmbed:
         return last_hidden_state[:, 0, :]
     # Auxiliary function to get the mean pooling
     def get_mean_pooling(self,
         embeddings: torch.Tensor,
@@ -244,7 +247,6 @@ class FaissSearch_SenEmbed:
         return embeddings
     # Add FAISS index
     def add_faiss_index(self,
         column_name: str = 'embeddings',
@@ -309,7 +311,6 @@ class FaissSearch_SenEmbed:
         self.dataset.save_faiss_index(index_name=index_name, file=file_path)
     def load_faiss_index(self,
         index_name: str,
         file_path: str,
@@ -339,7 +340,6 @@ class FaissSearch_SenEmbed:
         self.dataset.load_faiss_index(index_name=index_name, file=file_path, device=device)
     # Initialize the corpus using a dictionary or pandas DataFrame or HuggingFace Datasets object
     def initialize_corpus(self,
         corpus: Union[Dict[str, List[str]], pd.DataFrame, Dataset],
@@ -407,7 +407,6 @@ class FaissSearch_SenEmbed:
         return self.dataset
     # Initialize the dataset using a JSON file
     def load_dataset_from_json(self,
         json_path: str,
@@ -429,7 +428,6 @@ class FaissSearch_SenEmbed:
         return self.dataset
     # Search for the most similar elements in the dataset, given a query
     def search(self,
         query: str,
@@ -475,12 +473,132 @@ class FaissSearch_SenEmbed:
+# FAISS Splade + ICT library wrapper class
+class FaissSearch_Splade(FaissSearch_SenEmbed):
+    def __init__(self,
+        model_name_or_path: str = 'klue/bert-base',
+        tokenizer_name_or_path: str = 'klue/bert-base',
+        embedding_type: str = 'last_hidden_state',
+        device: str = 'cpu',
+        ) -> None:
+        r"""
+        This function initializes the wrapper for the FAISS library, which is used to perform semantic search.
+        .. attention::
+            * If you use this class, please make sure to cite the following paper:
+                .. code-block:: latex
+                    @article{johnson2019billion,
+                        title={Billion-scale similarity search with {GPUs}},
+                        author={Johnson, Jeff and Douze, Matthijs and J{\'e}gou, Herv{\'e}},
+                        journal={IEEE Transactions on Big Data},
+                        volume={7},
+                        number={3},
+                        pages={535--547},
+                        year={2019},
+                        publisher={IEEE}
+                    }
+            * The code is based on the following GitHub repository:
+                https://github.com/facebookresearch/faiss
+        Arguments:
+            model_name_or_path (str, optional): The name or path of the model to use. Defaults to 'facebook/bart-large'.
+            tokenizer_name_or_path (str, optional): The name or path of the tokenizer to use. Defaults to 'facebook/bart-large'.
+            device (str, optional): The device to use. Defaults to 'cpu'.
+        Returns:
+            None
+        """
+        # Set the device
+        self.device = device
+        # If the tokenizer is not specified, use the model name or path
+        if tokenizer_name_or_path is None:
+            tokenizer_name_or_path = model_name_or_path
+        # Load the tokenizer
+        if tokenizer_name_or_path == 'skt/kobert-base-v1':
+            # self.tokenizer = KoBERTTokenizer.from_pretrained(tokenizer_name_or_path)
+            self.tokenizer = XLNetTokenizer.from_pretrained(tokenizer_name_or_path)
+        else:
+            self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path)
+        # Load the model
+        self.model = transformers.BertForMaskedLM.from_pretrained(model_name_or_path).to(self.device)
+        # Set the model to evaluation mode (since we do not need the gradients)
+        self.model.eval()
+        # Initialize the dataset
+        self.dataset = None
+    # Get the embeddings
+    def get_embeddings(self,
+        text: Union[str, List[str]],
+        embedding_type: str = 'last_hidden_state',
+        batch_size: int = 8,
+        num_workers: int = 4,
+    ) -> torch.Tensor:
+        """
+        This function returns the embeddings of the input text.
+        Arguments:
+            text (Union[str, List[str]]): The input text.
+            embedding_type (str, optional): The type of embedding to use. Defaults to 'last_hidden_state'.
+            batch_size (int, optional): The batch size to use. Defaults to 8.
+            num_workers (int, optional): The number of workers to use. Defaults to 4.
+        Returns:
+            torch.Tensor: The embeddings.
+        Raises:
+            ValueError: If the embedding type is invalid.
+        """
+        # Check if the embedding type is valid
+        if embedding_type not in ['last_hidden_state', 'mean_pooling']:
+            raise ValueError(f'Invalid embedding type: {embedding_type}. Only "last_hidden_state" and "mean_pooling" are supported.')
+        # Tokenize the input text
+        encoded_text = self.tokenizer(
+            text,
+            padding=True,
+            truncation=True,
+            return_tensors='pt',
+        )
+        # Move the input text to the device
+        encoded_text = encoded_text.to(self.device)
+        # encoded_inputs = {k: v.to(self.device) for k, v in encoded_inputs.items()}
+        # Get the embeddings
+        with torch.no_grad():
+            embeddings = self.model(**encoded_text)
+        # Get the last hidden state
+        embeddings = embeddings['logits']
+        embeddings = torch.sum(torch.log(1+torch.relu(embeddings)) * encoded_text['attention_mask'].unsqueeze(-1), dim=1)
+        e_norm = torch.nn.functional.normalize(embeddings, p=2, dim=1, eps=1e-8)
+        # Return the embeddings
+        return e_norm
 # FAISS word embedding library wrapper class
 class FaissSearch_WordEmbed(FaissSearch_SenEmbed):
     def __init__(self,
         model_name_or_path: str = 'klue/bert-base',
         tokenizer_name_or_path: str = 'klue/bert-base',
+        embedding_type: str = 'last_hidden_state',
         device: str = 'cpu',
         ) -> None:
         r"""
@@ -533,6 +651,7 @@ class FaissSearch_WordEmbed(FaissSearch_SenEmbed):
         # Load the model
         self.model = AutoModel.from_pretrained(model_name_or_path).to(self.device)
         # Set the model to evaluation mode (since we do not need the gradients)
         self.model.eval()
@@ -540,7 +659,6 @@ class FaissSearch_WordEmbed(FaissSearch_SenEmbed):
         self.dataset = None
     # Get the embeddings (new code)
     def get_doc_embeddings(self,
         #text: Union[str, List[str]],
@@ -564,7 +682,7 @@ class FaissSearch_WordEmbed(FaissSearch_SenEmbed):
         Raises:
             ValueError: If the embedding type is invalid.
         """
         # Check if the embedding type is valid
         if embedding_type not in ['last_hidden_state', 'mean_pooling']:
             raise ValueError(f'Invalid embedding type: {embedding_type}. Only "last_hidden_state" and "mean_pooling" are supported.')
@@ -577,12 +695,10 @@ class FaissSearch_WordEmbed(FaissSearch_SenEmbed):
                 padding=False,
                 truncation=True,
                 return_tensors='pt',
-                add_special_tokens=False,
+                add_special_tokens=False
             )
             # Move the input text to the device
             encoded_text = encoded_text.to(self.device)
             token_ids_list = encoded_text['input_ids'].tolist()
             token_ids_list = token_ids_list[0]
             for ids in token_ids_list:
@@ -591,19 +707,17 @@ class FaissSearch_WordEmbed(FaissSearch_SenEmbed):
                 else:
                     if text not in ids_dict[ids]:
                         ids_dict[ids].append(sentence)
         # Get the embeddings
         embedding_dict = {}
         self.model.eval()
         for key, value in ids_dict.items():
             embed = self.model(torch.tensor([[key]]), output_hidden_states=True).hidden_states[-1][:,0,:].detach()
             embedding_dict[embed] = value
         # Return the embeddings
         return embedding_dict
     # Get the embeddings (new code)
     def get_query_embeddings(self,
         text: Union[str, List[str]],
@@ -657,7 +771,6 @@ class FaissSearch_WordEmbed(FaissSearch_SenEmbed):
         # Return the embeddings
         return embeds
     # Initialize the corpus using a dictionary or pandas DataFrame or HuggingFace Datasets object
     def initialize_corpus(self,
@@ -693,7 +806,7 @@ class FaissSearch_WordEmbed(FaissSearch_SenEmbed):
         # Set the embedding_type
         self.embedding_type = embedding_type
         # get embedding dict
         embedding_dict = self.get_doc_embeddings(text=corpus, embedding_type=self.embedding_type)
@@ -729,7 +842,6 @@ class FaissSearch_WordEmbed(FaissSearch_SenEmbed):
         return self.dataset
     # Search for the most similar elements in the dataset, given a query
     def search(self,
         query: str,
@@ -751,7 +863,6 @@ class FaissSearch_WordEmbed(FaissSearch_SenEmbed):
             The returned elements are dictionaries containing the text and the score.
         """
         # Get the embeddings of the query
         query_embeddings = self.get_query_embeddings([query], embedding_type=self.embedding_type)
@@ -768,6 +879,7 @@ class FaissSearch_WordEmbed(FaissSearch_SenEmbed):
             scores.append(score)
             similar_elts.append(similar_elt)
         text_list = []
         for item in similar_elts:
             for text in item['text']:
@@ -776,12 +888,10 @@ class FaissSearch_WordEmbed(FaissSearch_SenEmbed):
         flat_list = [sentence for sublist in text_list for sentence in sublist]
         count = Counter(flat_list)
         count = dict(count.most_common(5))
         sorted_dict = dict(sorted(count.items(), key=lambda x: x[1], reverse=True))
         # Convert the results to a pandas DataFrame
         results_df = pd.DataFrame({'text': sorted_dict.keys() , 'freq': sorted_dict.values()})
         # Return the most similar elements
         return results_df

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: nltkor
-Version: 1.2.15
+Version: 1.2.16
 Home-page: https://modi.changwon.ac.kr/air_cwnu/nlp_tool/nltk_ko.git
 Keywords: string matching,pattern matching,edit distance,string to string correction,string to string matching,Levenshtein edit distance,Hamming distance,Damerau-Levenshtein distance,Jaro-Winkler distance,longest common subsequence,longest common substring,dynamic programming,approximate string matching,semantic similarity,natural language processing,NLP,information retrieval,rouge,sacrebleu,bertscore,bartscore,fasttext,glove,cosine similarity,Smith-Waterman,Needleman-Wunsch,Hirschberg,Karp-Rabin,Knuth-Morris-Pratt,Boyer-Moore
 Classifier: Programming Language :: Python :: 3.7

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor.egg-info/SOURCES.txt RENAMED Viewed

@@ -126,6 +126,4 @@ nltkor/tag/libs/wsd/__pycache__/__init__.cpython-39.pyc
 nltkor/tag/libs/wsd/__pycache__/wsd_reader.cpython-38.pyc
 nltkor/tag/libs/wsd/__pycache__/wsd_reader.cpython-39.pyc
 nltkor/tokenize/__init__.py
-nltkor/tokenize/ko_tokenize.py
-test/test.py
-test/testespresso.py
+nltkor/tokenize/ko_tokenize.py

{nltkor-1.2.15 → nltkor-1.2.16}/setup.py RENAMED Viewed

@@ -66,7 +66,7 @@ module1 = cythonize([
 setup(
   name='nltkor',
-  version='1.2.15',
+  version='1.2.16',
 	url='https://modi.changwon.ac.kr/air_cwnu/nlp_tool/nltk_ko.git',
   packages=find_packages(exclude=[]),
   python_requires='>=3.7',

nltkor-1.2.15/test/test.py DELETED Viewed

@@ -1,282 +0,0 @@
-from nltk.alignment import NeedlemanWunsch, SmithWaterman, Hirschberg, LongestCommonSubsequence, LongestCommonSubstring, DTW
-from nltk.distance import LevenshteinEditDistance, HammingDistance, DamerauLevenshteinDistance, WassersteinDistance
-from nltk.similarity import CosineSimilarity, LCSubstringSimilarity, LCSubsequenceSimilarity, JaroSimilarity
-from nltk.tokenize import sent_tokenize, word_tokenize, syllable_tokenize
-from nltk.search import NaiveSearch, RabinKarpSearch, KMPSearch, BoyerMooreSearch, FaissSearch
-from nltk.metrics import BERTScore, BARTScore, DefaultMetric
-from nltk import pos_tag, nouns, word_segmentor, pos_tag_with_verb_form
-import numpy as np
-from typing import List
-import torch
-def demo():
-    str1 = '기존에 제품이 장기간 사용으로 손상'
-    str2 = '장기간 사용으로 제품이 손상'
-    # result1, result2 = NeedlemanWunsch().get_alignment(str1, str2)
-    # print(result1, '\n', result2)
-    result1, result2 = SmithWaterman().get_alignment(str1, str2)
-    print(f"{result1}\n{result2}")
-    # result1, result2 = Hirschberg().get_alignment(str1, str2)
-    # print(f"{result1}\n{result2}")
-    # result = DTW().get_alignment_path(str1, str2)
-    # print(result)
-    # result = LongestCommonSubsequence().compute(str1, str2)
-    # print(result)
-    # result = LongestCommonSubstring().compute(str1, str2)
-    # print("-------LongestCommonSubstring-------")
-    # print(result)
-    # print("------------------------------------")
-    # print()
-def demo2():
-    str1 = '나는 학생이다.'
-    str2 = '그는 선생님이다.'
-    result = BARTScore().compute([str1], [str2])
-    print("-------BARTScore-------")
-    print(result)
-    print("-----------------------")
-    print()
-def demo3():
-    str1 = '나는 학생이다.'
-    str2 = '그는 선생님이다.'
-    model_name = 'bert-base-uncased'
-    result = BERTScore(model_name_or_path=model_name, lang='kor', num_layers=12).compute([str1], [str2])
-    print("model name: ", model_name)
-    print("-------BERTScore-------")
-    print(result)
-    print("-----------------------")
-    print()
-def demo4():
-    demo_setences = ['제가 나와 있는 곳은 경남 거제시 옥포동 덕포 해수욕장에 나와 있습니다.']
-    for sen in demo_setences:
-        print(word_tokenize(sen, "korean"))
-        print(pos_tag(sen, lang='kor'))
-def demo5():
-    str1 = '나는 학생이다.'
-    str2 = '그는 선생님이다.'
-    # result = LevenshteinEditDistance().compute(str1, str2)
-    # result = HammingDistance().compute(str1, str2)
-    result = DamerauLevenshteinDistance().compute(str1, str2)
-    print("-------DamerauLevenshteinDistance-------")
-    print(result)
-    print("----------------------------------------")
-    print()
-def demo6():
-    x1 = np.array([1, 2, 3, 4, 5])
-    x2 = np.array([3, 7, 8, 3, 1])
-    result = CosineSimilarity().compute(x1, x2)
-    print("-------CosineSimilarity-------")
-    print(result)
-    print("------------------------------")
-    print()
-def demo7():
-    str1 = '나는 학생이다.'
-    str2 = '그는 선생님이다.'
-    result = LCSubstringSimilarity().compute(str1, str2)
-    print("-------LCSubstringSimilarity-------")
-    print(result)
-    print("-----------------------------------")
-    print()
-    result = LCSubsequenceSimilarity().compute(str1, str2)
-    print("-------LCSubsequenceSimilarity-------")
-    print(result)
-    print("--------------------------------------")
-    print()
-    result = JaroSimilarity().compute(str1, str2)
-    print("-------JaroSimilarity-------")
-    print(result)
-    print("----------------------------")
-    print()
-def demo8():
-    pattern = "학생"
-    str1 = '나는 학생이다.'
-    result = NaiveSearch().search(pattern, str1)
-    print(result)
-    result = RabinKarpSearch().search(pattern, str1)
-    print(result)
-    result = KMPSearch().search(pattern, str1)
-    print(result)
-    result = BoyerMooreSearch().search(pattern, str1)
-    print(result)
-def demo9():
-    faiss = FaissSearch(model_name_or_path = 'skt/kobert-base-v1', tokenizer_name_or_path = 'skt/kobert-base-v1')
-    corpus = {
-        'text': [
-                "오늘은 날씨가 매우 덥습니다.",
-                "저는 음악을 듣는 것을 좋아합니다.",
-                "한국 음식 중에서 떡볶이가 제일 맛있습니다.",
-                "도서관에서 책을 읽는 건 좋은 취미입니다.",
-                "내일은 친구와 영화를 보러 갈 거예요.",
-                "여름 휴가 때 해변에 가서 수영하고 싶어요.",
-                "한국의 문화는 다양하고 흥미로워요.",
-                "피아노 연주는 나를 편안하게 해줍니다.",
-                "공원에서 산책하면 스트레스가 풀립니다.",
-                "요즘 드라마를 많이 시청하고 있어요.",
-                "커피가 일상에서 필수입니다.",
-                "새로운 언어를 배우는 것은 어려운 일이에요.",
-                "가을에 단풍 구경을 가고 싶어요.",
-                "요리를 만들면 집안이 좋아보입니다.",
-                "휴대폰 없이 하루를 보내는 것이 쉽지 않아요.",
-                "스포츠를 하면 건강에 좋습니다.",
-                "고양이와 개 중에 어떤 동물을 좋아하세요?"
-                "천천히 걸어가면서 풍경을 감상하는 것이 좋아요.",
-                "일주일에 한 번은 가족과 모임을 가요.",
-                "공부할 때 집중력을 높이는 방법이 있을까요?",
-                "봄에 꽃들이 피어날 때가 기대되요.",
-                "여행 가방을 챙기고 싶어서 설레여요.",
-                "사진 찍는 걸 좋아하는데, 카메라가 필요해요.",
-                "다음 주에 시험이 있어서 공부해야 해요.",
-                "운동을 하면 몸이 가벼워집니다.",
-                "좋은 책을 읽으면 마음이 풍요로워져요.",
-                "새로운 음악을 발견하면 기분이 좋아져요.",
-                "미술 전시회에 가면 예술을 감상할 수 있어요.",
-                "친구들과 함께 시간을 보내는 건 즐거워요.",
-                "자전거 타면 바람을 맞으면서 즐거워집니다."
-        ],
-    }
-    print(faiss.initialize_corpus(corpus=corpus, section='text', embedding_type='mean_pooling', save_path='/Users/dowon/Test/test.json'))
-    query = "오늘은 날씨가 매우 춥다."
-    top_k = 5
-    result = faiss.search(query, top_k)
-    print(result)
-def faiss_test():
-    faiss = FaissSearch(model_name_or_path = 'klue/bert-base')
-    result = TextReader("/Users/dowon/Test/sentence1.txt").read()
-    id = 0
-    for i in result:
-        print(i)
-        i = i.replace('\n', '')
-        print(i)
-        i = "i am test"
-        print(faiss.get_embeddings(text=i, num_workers=10).detach().cpu().numpy())
-        id += 1
-        if id ==3:
-            break
-def faiss_save_test():
-    faiss = FaissSearch(model_name_or_path = '/Users/dowon/test_model/trained_model/', tokenizer_name_or_path = '/Users/dowon/test_model/trained_model/')
-    faiss.load_dataset_from_json('/Users/dowon/Test/test.json')
-    faiss.embedding_type = 'mean_pooling'
-    # faiss.load_faiss_index(index_name='embeddings',file_path='/Users/dowon/Test/test_index.json')
-    faiss.add_faiss_index(column_name='embeddings')
-    query = "오늘은 날시가 매우 춥다."
-    top_k = 5
-    result = faiss.search(query, top_k)
-    print(result)
-def demo10():
-    metric = DefaultMetric()
-    y_true = [1, 3, 3, 5, 5,1]
-    y_pred = [1, 2, 3, 4, 5,2]
-    str1 = "i am teacher"
-    str2 = "he is student"
-    print(metric.precision_score(y_true, y_pred, "macro"))
-def demo11():
-    print("\nBegin Wasserstein distance demo ")
-    P =  np.array([0.6, 0.1, 0.1, 0.1, 0.1])
-    Q1 = np.array([0.1, 0.1, 0.6, 0.1, 0.1])
-    Q2 = np.array([0.1, 0.1, 0.1, 0.1, 0.6])
-    P = torch.from_numpy(P)
-    Q1 = torch.from_numpy(Q1)
-    Q2 = torch.from_numpy(Q2)
-    kl_p_q1 = WassersteinDistance().compute_kullback(P, Q1)
-    kl_p_q2 = WassersteinDistance().compute_kullback(P, Q2)
-    wass_p_q1 = WassersteinDistance().compute_wasserstein(P, Q1)
-    wass_p_q2 = WassersteinDistance().compute_wasserstein(P, Q2)
-    jesson_p_q1 = WassersteinDistance().compute_jesson_shannon(P, Q1)
-    jesson_p_q2 = WassersteinDistance().compute_jesson_shannon(P, Q2)
-    print("\nKullback-Leibler distances: ")
-    print("P to Q1 : %0.4f " % kl_p_q1)
-    print("P to Q2 : %0.4f " % kl_p_q2)
-    print("\nWasserstein distances: ")
-    print("P to Q1 : %0.4f " % wass_p_q1)
-    print("P to Q2 : %0.4f " % wass_p_q2)
-    print("\nJesson-Shannon distances: ")
-    print("P to Q1 : %0.4f " % jesson_p_q1)
-    print("P to Q2 : %0.4f " % jesson_p_q2)
-    print("\nEnd demo ")
-def demo12():
-	y_pred = [5, 2, 4, 1, 3, 2, 5, 6, 7]
-	y_true = [1, 3, 6, 7, 1, 5]
-	user = [[5, 3, 2], [9, 1, 2], [3, 5, 6], [7, 2, 1]]
-	h_pred = [[15, 6, 21, 3], [15, 77, 23, 14], [51, 23, 21, 2], [53, 2, 1, 5]]
-	metric = DefaultMetric()
-	print(metric.precision_at_k(y_true,  y_pred, 3))
-	print(metric.recall_at_k(y_true,y_pred, 3))
-	print(metric.hit_rate_at_k(user, h_pred, 1))
-class TextReader:
-    def __init__(self, path: str):
-        self.path = path
-    def read(self) -> List[str]:
-        with open(self.path, 'r') as f:
-            return f.readlines()
-if __name__=="__main__":
-    # demo()
-    # demo2()
-    # demo3()
-    #demo4()
-    # demo5()
-    # demo6()
-    # demo7()
-    # demo8()
-    # demo9()
-    # faiss_test()
-    # faiss_save_test()
-    # demo10()
-    demo11()
-    #demo12()

nltkor-1.2.15/test/testespresso.py DELETED Viewed

@@ -1,19 +0,0 @@
-from nltk.tag import EspressoTagger
-if __name__ == '__main__':
-    sent = "나는 배가 고프다. 나는 아름다운 강산에 살고있다."
-    tagger = EspressoTagger()
-    print()
-    print(tagger.tag('pos', sent))
-    print("dependency :")
-    print(tagger.tag('dependency', sent))
-    print('ner :')
-    ner = tagger.tag('ner', sent)
-    print(ner)
-    print()
-    print()
-    print('wsd :')
-    print(tagger.tag('wsd', sent))
-    print()
-    #print('srl :')
-    #print(tagger.tag('srl', sent))

{nltkor-1.2.15 → nltkor-1.2.16}/LICENSE.txt RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/Kor_char.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/alignment/__init__.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/cider/__init__.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/cider/cider.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/cider/cider_scorer.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/distance/__init__.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/distance/wasserstein.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/etc.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/lazyimport.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/make_requirement.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/metrics/bartscore.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/metrics/bertscore.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/metrics/bleu_tensor.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/metrics/classical.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/metrics/eval.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/metrics/mauve.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/metrics/mauve_utils.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/misc/__init__.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/misc/string2string_basic_functions.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/misc/string2string_default_tokenizer.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/misc/string2string_hash_functions.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/misc/string2string_word_embeddings.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/search/__init__.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/search/classical.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/search/kobert_tokenizer.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/sejong/__init__.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/sejong/__pycache__/__init__.cpython-38.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/sejong/__pycache__/__init__.cpython-39.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/sejong/__pycache__/sejong_download.cpython-38.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/sejong/__pycache__/sejong_download.cpython-39.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/sejong/__pycache__/ssem.cpython-38.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/sejong/__pycache__/ssem.cpython-39.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/sejong/ch.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/sejong/dict_semClassNum.txt RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/sejong/layer.txt RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/sejong/sejong_download.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/sejong/ssem.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/similarity/__init__.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/similarity/bartscore____.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/similarity/bertscore____.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/similarity/classical.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/similarity/cosine_similarity.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/__init__.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/__pycache__/__init__.cpython-38.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/__pycache__/__init__.cpython-39.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/__pycache__/espresso_tag.cpython-38.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/__pycache__/espresso_tag.cpython-39.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/espresso_tag.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/__init__.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/__pycache__/__init__.cpython-38.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/__pycache__/__init__.cpython-39.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/__pycache__/attributes.cpython-38.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/__pycache__/attributes.cpython-39.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/__pycache__/config.cpython-38.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/__pycache__/config.cpython-39.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/__pycache__/metadata.cpython-38.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/__pycache__/metadata.cpython-39.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/__pycache__/reader.cpython-38.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/__pycache__/reader.cpython-39.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/__pycache__/taggers.cpython-38.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/__pycache__/taggers.cpython-39.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/__pycache__/utils.cpython-38.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/__pycache__/utils.cpython-39.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/__pycache__/word_dictionary.cpython-38.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/__pycache__/word_dictionary.cpython-39.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/arguments.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/attributes.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/config.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/metadata.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/ner/__init__.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/ner/__pycache__/__init__.cpython-38.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/ner/__pycache__/__init__.cpython-39.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/ner/__pycache__/ner_reader.cpython-38.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/ner/__pycache__/ner_reader.cpython-39.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/ner/macmorphoreader.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/ner/ner_reader.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/network.c RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/network.pyx RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/networkconv.pyx RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/networkdependencyconv.pyx RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/parse/__init__.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/parse/__pycache__/__init__.cpython-38.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/parse/__pycache__/__init__.cpython-39.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/parse/__pycache__/parse_reader.cpython-38.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/parse/__pycache__/parse_reader.cpython-39.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/parse/parse_reader.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/pos/__init__.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/pos/__pycache__/__init__.cpython-38.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/pos/__pycache__/__init__.cpython-39.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/pos/__pycache__/pos_reader.cpython-38.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/pos/__pycache__/pos_reader.cpython-39.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/pos/macmorphoreader.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/pos/pos_reader.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/reader.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/srl/__init__.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/srl/__pycache__/__init__.cpython-38.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/srl/__pycache__/__init__.cpython-39.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/srl/__pycache__/srl_reader.cpython-38.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/srl/__pycache__/srl_reader.cpython-39.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/srl/__pycache__/train_srl.cpython-38.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/srl/__pycache__/train_srl.cpython-39.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/srl/__srl_reader_.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/srl/srl_reader.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/srl/train_srl.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/taggers.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/utils.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/word_dictionary.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/wsd/__init__.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/wsd/__pycache__/__init__.cpython-38.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/wsd/__pycache__/__init__.cpython-39.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/wsd/__pycache__/wsd_reader.cpython-38.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/wsd/__pycache__/wsd_reader.cpython-39.pyc RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/wsd/macmorphoreader.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tag/libs/wsd/wsd_reader.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tokenize/__init__.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/tokenize/ko_tokenize.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor/trans.py RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor.egg-info/requires.txt RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/nltkor.egg-info/top_level.txt RENAMED Viewed

File without changes

{nltkor-1.2.15 → nltkor-1.2.16}/setup.cfg RENAMED Viewed

File without changes

nltkor 1.2.15__tar.gz → 1.2.16__tar.gz

nltkor 1.2.15tar.gz → 1.2.16tar.gz