nltkor 1.2.14__cp311-cp311-macosx_13_0_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (127) hide show
  1. nltkor/Kor_char.py +193 -0
  2. nltkor/__init__.py +16 -0
  3. nltkor/alignment/__init__.py +1315 -0
  4. nltkor/cider/__init__.py +2 -0
  5. nltkor/cider/cider.py +55 -0
  6. nltkor/cider/cider_scorer.py +207 -0
  7. nltkor/distance/__init__.py +441 -0
  8. nltkor/distance/wasserstein.py +126 -0
  9. nltkor/etc.py +22 -0
  10. nltkor/lazyimport.py +144 -0
  11. nltkor/make_requirement.py +11 -0
  12. nltkor/metrics/__init__.py +63 -0
  13. nltkor/metrics/bartscore.py +301 -0
  14. nltkor/metrics/bertscore.py +331 -0
  15. nltkor/metrics/bleu_tensor.py +20 -0
  16. nltkor/metrics/classical.py +847 -0
  17. nltkor/metrics/entment.py +24 -0
  18. nltkor/metrics/eval.py +517 -0
  19. nltkor/metrics/mauve.py +273 -0
  20. nltkor/metrics/mauve_utils.py +131 -0
  21. nltkor/misc/__init__.py +11 -0
  22. nltkor/misc/string2string_basic_functions.py +59 -0
  23. nltkor/misc/string2string_default_tokenizer.py +83 -0
  24. nltkor/misc/string2string_hash_functions.py +159 -0
  25. nltkor/misc/string2string_word_embeddings.py +503 -0
  26. nltkor/search/__init__.py +10 -0
  27. nltkor/search/classical.py +569 -0
  28. nltkor/search/faiss_search.py +787 -0
  29. nltkor/search/kobert_tokenizer.py +181 -0
  30. nltkor/sejong/__init__.py +3 -0
  31. nltkor/sejong/__pycache__/__init__.cpython-38.pyc +0 -0
  32. nltkor/sejong/__pycache__/__init__.cpython-39.pyc +0 -0
  33. nltkor/sejong/__pycache__/sejong_download.cpython-38.pyc +0 -0
  34. nltkor/sejong/__pycache__/sejong_download.cpython-39.pyc +0 -0
  35. nltkor/sejong/__pycache__/ssem.cpython-38.pyc +0 -0
  36. nltkor/sejong/__pycache__/ssem.cpython-39.pyc +0 -0
  37. nltkor/sejong/ch.py +12 -0
  38. nltkor/sejong/dict_semClassNum.txt +491 -0
  39. nltkor/sejong/layer.txt +630 -0
  40. nltkor/sejong/sejong_download.py +87 -0
  41. nltkor/sejong/ssem.py +684 -0
  42. nltkor/similarity/__init__.py +3 -0
  43. nltkor/similarity/bartscore____.py +337 -0
  44. nltkor/similarity/bertscore____.py +339 -0
  45. nltkor/similarity/classical.py +245 -0
  46. nltkor/similarity/cosine_similarity.py +175 -0
  47. nltkor/tag/__init__.py +71 -0
  48. nltkor/tag/__pycache__/__init__.cpython-38.pyc +0 -0
  49. nltkor/tag/__pycache__/__init__.cpython-39.pyc +0 -0
  50. nltkor/tag/__pycache__/espresso_tag.cpython-38.pyc +0 -0
  51. nltkor/tag/__pycache__/espresso_tag.cpython-39.pyc +0 -0
  52. nltkor/tag/espresso_tag.py +220 -0
  53. nltkor/tag/libs/__init__.py +10 -0
  54. nltkor/tag/libs/__pycache__/__init__.cpython-38.pyc +0 -0
  55. nltkor/tag/libs/__pycache__/__init__.cpython-39.pyc +0 -0
  56. nltkor/tag/libs/__pycache__/attributes.cpython-38.pyc +0 -0
  57. nltkor/tag/libs/__pycache__/attributes.cpython-39.pyc +0 -0
  58. nltkor/tag/libs/__pycache__/config.cpython-38.pyc +0 -0
  59. nltkor/tag/libs/__pycache__/config.cpython-39.pyc +0 -0
  60. nltkor/tag/libs/__pycache__/metadata.cpython-38.pyc +0 -0
  61. nltkor/tag/libs/__pycache__/metadata.cpython-39.pyc +0 -0
  62. nltkor/tag/libs/__pycache__/reader.cpython-38.pyc +0 -0
  63. nltkor/tag/libs/__pycache__/reader.cpython-39.pyc +0 -0
  64. nltkor/tag/libs/__pycache__/taggers.cpython-38.pyc +0 -0
  65. nltkor/tag/libs/__pycache__/taggers.cpython-39.pyc +0 -0
  66. nltkor/tag/libs/__pycache__/utils.cpython-38.pyc +0 -0
  67. nltkor/tag/libs/__pycache__/utils.cpython-39.pyc +0 -0
  68. nltkor/tag/libs/__pycache__/word_dictionary.cpython-38.pyc +0 -0
  69. nltkor/tag/libs/__pycache__/word_dictionary.cpython-39.pyc +0 -0
  70. nltkor/tag/libs/arguments.py +280 -0
  71. nltkor/tag/libs/attributes.py +231 -0
  72. nltkor/tag/libs/config.py +159 -0
  73. nltkor/tag/libs/metadata.py +129 -0
  74. nltkor/tag/libs/ner/__init__.py +2 -0
  75. nltkor/tag/libs/ner/__pycache__/__init__.cpython-38.pyc +0 -0
  76. nltkor/tag/libs/ner/__pycache__/__init__.cpython-39.pyc +0 -0
  77. nltkor/tag/libs/ner/__pycache__/ner_reader.cpython-38.pyc +0 -0
  78. nltkor/tag/libs/ner/__pycache__/ner_reader.cpython-39.pyc +0 -0
  79. nltkor/tag/libs/ner/macmorphoreader.py +7 -0
  80. nltkor/tag/libs/ner/ner_reader.py +92 -0
  81. nltkor/tag/libs/network.c +72325 -0
  82. nltkor/tag/libs/network.cpython-311-darwin.so +0 -0
  83. nltkor/tag/libs/network.pyx +878 -0
  84. nltkor/tag/libs/networkconv.pyx +1028 -0
  85. nltkor/tag/libs/networkdependencyconv.pyx +451 -0
  86. nltkor/tag/libs/parse/__init__.py +1 -0
  87. nltkor/tag/libs/parse/__pycache__/__init__.cpython-38.pyc +0 -0
  88. nltkor/tag/libs/parse/__pycache__/__init__.cpython-39.pyc +0 -0
  89. nltkor/tag/libs/parse/__pycache__/parse_reader.cpython-38.pyc +0 -0
  90. nltkor/tag/libs/parse/__pycache__/parse_reader.cpython-39.pyc +0 -0
  91. nltkor/tag/libs/parse/parse_reader.py +283 -0
  92. nltkor/tag/libs/pos/__init__.py +2 -0
  93. nltkor/tag/libs/pos/__pycache__/__init__.cpython-38.pyc +0 -0
  94. nltkor/tag/libs/pos/__pycache__/__init__.cpython-39.pyc +0 -0
  95. nltkor/tag/libs/pos/__pycache__/pos_reader.cpython-38.pyc +0 -0
  96. nltkor/tag/libs/pos/__pycache__/pos_reader.cpython-39.pyc +0 -0
  97. nltkor/tag/libs/pos/macmorphoreader.py +7 -0
  98. nltkor/tag/libs/pos/pos_reader.py +97 -0
  99. nltkor/tag/libs/reader.py +485 -0
  100. nltkor/tag/libs/srl/__init__.py +3 -0
  101. nltkor/tag/libs/srl/__pycache__/__init__.cpython-38.pyc +0 -0
  102. nltkor/tag/libs/srl/__pycache__/__init__.cpython-39.pyc +0 -0
  103. nltkor/tag/libs/srl/__pycache__/srl_reader.cpython-38.pyc +0 -0
  104. nltkor/tag/libs/srl/__pycache__/srl_reader.cpython-39.pyc +0 -0
  105. nltkor/tag/libs/srl/__pycache__/train_srl.cpython-38.pyc +0 -0
  106. nltkor/tag/libs/srl/__pycache__/train_srl.cpython-39.pyc +0 -0
  107. nltkor/tag/libs/srl/__srl_reader_.py +535 -0
  108. nltkor/tag/libs/srl/srl_reader.py +436 -0
  109. nltkor/tag/libs/srl/train_srl.py +87 -0
  110. nltkor/tag/libs/taggers.py +926 -0
  111. nltkor/tag/libs/utils.py +384 -0
  112. nltkor/tag/libs/word_dictionary.py +239 -0
  113. nltkor/tag/libs/wsd/__init__.py +2 -0
  114. nltkor/tag/libs/wsd/__pycache__/__init__.cpython-38.pyc +0 -0
  115. nltkor/tag/libs/wsd/__pycache__/__init__.cpython-39.pyc +0 -0
  116. nltkor/tag/libs/wsd/__pycache__/wsd_reader.cpython-38.pyc +0 -0
  117. nltkor/tag/libs/wsd/__pycache__/wsd_reader.cpython-39.pyc +0 -0
  118. nltkor/tag/libs/wsd/macmorphoreader.py +7 -0
  119. nltkor/tag/libs/wsd/wsd_reader.py +93 -0
  120. nltkor/tokenize/__init__.py +62 -0
  121. nltkor/tokenize/ko_tokenize.py +115 -0
  122. nltkor/trans.py +121 -0
  123. nltkor-1.2.14.dist-info/LICENSE.txt +1093 -0
  124. nltkor-1.2.14.dist-info/METADATA +41 -0
  125. nltkor-1.2.14.dist-info/RECORD +127 -0
  126. nltkor-1.2.14.dist-info/WHEEL +5 -0
  127. nltkor-1.2.14.dist-info/top_level.txt +1 -0
@@ -0,0 +1,93 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ """
4
+ Class for dealing with WSD data.
5
+ """
6
+
7
+ from ..reader import TaggerReader
8
+
9
+ class ConllWSD(object):
10
+ """
11
+ Dummy class for storing column positions in a conll file.
12
+ """
13
+ id = 0
14
+ word = 1
15
+ pos = 2
16
+ wsd = 3
17
+ SEP = '\t'
18
+
19
+ class WSDReader(TaggerReader):
20
+ """
21
+ This class reads data from a POS corpus and turns it into a format
22
+ readable by the neural network for the POS tagging task.
23
+ """
24
+
25
+ def __init__(self, md=None, filename=None, load_dictionaries=True):
26
+ """
27
+ Constructor
28
+ """
29
+ self.rare_tag = None
30
+ self.sentences = []
31
+ if filename is not None:
32
+ try:
33
+ self._read_plain(filename)
34
+ except:
35
+ self._read_conll(filename)
36
+
37
+ super(WSDReader, self).__init__(md, load_dictionaries=load_dictionaries)
38
+
39
+ @property
40
+ def task(self):
41
+ """
42
+ Abstract Base Class (ABC) attribute.
43
+ """
44
+ return 'wsd'
45
+
46
+ def _read_plain(self, filename):
47
+ """
48
+ Read data from a "plain" file, with one sentence per line, each token
49
+ as token_tag.
50
+ """
51
+ self.sentences = []
52
+ with open(filename, 'rt') as f:
53
+ for line in f:
54
+ #line = unicode(line, 'utf-8')
55
+ items = line.strip().split()
56
+ sentence = []
57
+ for item in items:
58
+ token, tag = item.rsplit('_', 1)
59
+ sentence.append((token, tag))
60
+
61
+ self.sentences.append(sentence)
62
+
63
+ def _read_conll(self, filename):
64
+ """
65
+ Read data from a CoNLL formatted file. It expects at least 4 columns:
66
+ id, surface word, lemma (ignored, may be anything)
67
+ and the POS tag.
68
+ """
69
+ self.sentences = []
70
+ sentence = []
71
+ with open(filename, 'rt') as f:
72
+ for line in f:
73
+ line = line.strip()
74
+ if line == '':
75
+ if len(sentence) > 0:
76
+ self.sentences.append(sentence)
77
+ sentence = []
78
+ continue
79
+
80
+ fields = line.split(ConllWSD.SEP)
81
+ try:
82
+ word = fields[ConllWSD.word]
83
+ pos = fields[ConllWSD.pos]
84
+ wsd = fields[ConllWSD.wsd]
85
+ except: continue
86
+ sentence.append((word, wsd))
87
+ #sentence.append((word, pos, ner))
88
+
89
+ if len(sentence) > 0:
90
+ self.sentences.append(sentence)
91
+
92
+ # backwards compatibility
93
+ MacMorphoReader = WSDReader
@@ -0,0 +1,62 @@
1
+ # -*- coding: utf-8 -*-
2
+ # Natural Language Toolkit: Tokenizers
3
+ #
4
+ # Copyright (C) 2001-2020 NLTK Project
5
+ # Author: Edward Loper <edloper@gmail.com>
6
+ # Steven Bird <stevenbird1@gmail.com> (minor additions)
7
+ # Contributors: matthewmc, clouds56
8
+ # URL: <http://nltk.org/>
9
+ # For license information, see LICENSE.TXT
10
+
11
+ r"""
12
+ NLTK Tokenizer Package
13
+
14
+ Tokenizers divide strings into lists of substrings. For example,
15
+ tokenizers can be used to find the words and punctuation in a string:
16
+
17
+ >>> from nltk.tokenize import word_tokenize
18
+ >>> s = '''Good muffins cost $3.88\nin New York. Please buy me
19
+ ... two of them.\n\nThanks.'''
20
+ >>> word_tokenize(s)
21
+ ['Good', 'muffins', 'cost', '$', '3.88', 'in', 'New', 'York', '.',
22
+ 'Please', 'buy', 'me', 'two', 'of', 'them', '.', 'Thanks', '.']
23
+
24
+ This particular tokenizer requires the Punkt sentence tokenization
25
+ models to be installed. NLTK also provides a simpler,
26
+ regular-expression based tokenizer, which splits text on whitespace
27
+ and punctuation:
28
+
29
+ >>> from nltk.tokenize import wordpunct_tokenize
30
+ >>> wordpunct_tokenize(s)
31
+ ['Good', 'muffins', 'cost', '$', '3', '.', '88', 'in', 'New', 'York', '.',
32
+ 'Please', 'buy', 'me', 'two', 'of', 'them', '.', 'Thanks', '.']
33
+
34
+ We can also operate at the level of sentences, using the sentence
35
+ tokenizer directly as follows:
36
+
37
+ >>> from nltk.tokenize import sent_tokenize, word_tokenize
38
+ >>> sent_tokenize(s)
39
+ ['Good muffins cost $3.88\nin New York.', 'Please buy me\ntwo of them.', 'Thanks.']
40
+ >>> [word_tokenize(t) for t in sent_tokenize(s)]
41
+ [['Good', 'muffins', 'cost', '$', '3.88', 'in', 'New', 'York', '.'],
42
+ ['Please', 'buy', 'me', 'two', 'of', 'them', '.'], ['Thanks', '.']]
43
+
44
+ Caution: when tokenizing a Unicode string, make sure you are not
45
+ using an encoded version of the string (it may be necessary to
46
+ decode it first, e.g. with ``s.decode("utf8")``.
47
+
48
+ NLTK tokenizers can produce token-spans, represented as tuples of integers
49
+ having the same semantics as string slices, to support efficient comparison
50
+ of tokenizers. (These methods are implemented as generators.)
51
+
52
+ >>> from nltk.tokenize import WhitespaceTokenizer
53
+ >>> list(WhitespaceTokenizer().span_tokenize(s))
54
+ [(0, 4), (5, 12), (13, 17), (18, 23), (24, 26), (27, 30), (31, 36), (38, 44),
55
+ (45, 48), (49, 51), (52, 55), (56, 58), (59, 64), (66, 73)]
56
+
57
+ There are numerous ways to tokenize text. If you need more control over
58
+ tokenization, see the other methods provided in this package.
59
+
60
+ For further information, please see Chapter 3 of the NLTK book.
61
+ """
62
+ from nltkor.tokenize.ko_tokenize import Ko_tokenize
@@ -0,0 +1,115 @@
1
+ # Natural Language Toolkit for Korean: NLTKor's very own tokenizer.
2
+ #
3
+ # Copyright (C) 2001-2020 NLTKor Project
4
+ # Author:
5
+ # URL: <http://>
6
+ # For license information, see LICENSE.TXT
7
+
8
+
9
+ import re
10
+
11
+ #for Korean
12
+
13
+ class Enum(object):
14
+ def __init__(self, names):
15
+ for value, name in enumerate(names.split()): setattr(self, name, value)
16
+
17
+ class Ko_tokenize():
18
+
19
+ def word(target, encoding='utf8'):
20
+ """ Word Tokenizer
21
+
22
+ 단어 단위로 Tokenizing 한다.
23
+
24
+ 인자값 목록 (모든 변수가 반드시 필요):
25
+
26
+ target : Tokenizing 하고자 하는 대상 문자열
27
+
28
+ 결과값 : 토크나이징 결과를 list 자료형으로 넘김
29
+
30
+ """
31
+ isHangulSyllables = lambda x: unicodedata.name(x).find("HANGUL SYLLABLE") == 0
32
+ isHanjaSyllables = lambda x: unicodedata.name(x).find("CJK") == 0
33
+ isNumber = lambda x: unicodedata.name(x).find("FULLWIDTH DIGIT") == 0 or unicodedata.name(x).find("DIGIT") == 0
34
+ isAlphabet = lambda x: unicodedata.name(x).find("FULLWIDTH LATIN") == 0 or unicodedata.name(x).find("LATIN") == 0
35
+ isAlphabet_Connection = lambda x: x in (".", "-", "_", "|")
36
+ isNumber_Connection = lambda x: x in (".", ",")
37
+ isPunctuation = lambda x: unicodedata.category(x)[0] == "P"
38
+ isSymbol = lambda x: unicodedata.category(x)[0] == "S"
39
+ getCategory = lambda x: unicodedata.category(x)
40
+
41
+ TYPE = Enum("UNKNOWN SYMBOL NUMBER PUNCTUATION ALPHABET HANJA HANGUL")
42
+
43
+ buf = str()
44
+ type_prev = 0
45
+ type_cur = 0
46
+
47
+ if type(target) == str:
48
+ target = target
49
+
50
+ for i in range(len(target)):
51
+ ch = target[i]
52
+ ca = str()
53
+ try:
54
+ if isHangulSyllables(ch): type_cur = TYPE.HANGUL
55
+ elif isHanjaSyllables(ch): type_cur = TYPE.HANJA
56
+ elif isNumber(ch): type_cur = TYPE.NUMBER
57
+ elif isAlphabet(ch): type_cur = TYPE.ALPHABET
58
+ elif isAlphabet_Connection(ch) and type_prev == TYPE.ALPHABET:
59
+ if i+1 < len(target) and not isAlphabet(target[i+1]): type_cur = TYPE.SYMBOL
60
+ else: type_cur = TYPE.ALPHABET
61
+ elif isNumber_Connection(ch) and type_prev == TYPE.NUMBER:
62
+ if i+1 < len(target) and not isNumber(target[i+1]): type_cur = TYPE.SYMBOL
63
+ elif i+1 == len(target): type_cur = TYPE.SYMBOL
64
+ else: type_cur = TYPE.NUMBER
65
+ elif isPunctuation(ch): type_cur = TYPE.PUNCTUATION
66
+ elif isSymbol(ch): type_cur = TYPE.SYMBOL
67
+ else: type_cur = TYPE.UNKNOWN
68
+ ca = getCategory(ch)
69
+ except:
70
+ type_cur = TYPE.UNKNOWN
71
+ if type_cur == TYPE.PUNCTUATION:
72
+ if ca in ("Ps", "Pe"): buf += " "
73
+ elif i >= 0 and i < len(target) and target[i-1] != target[i]: buf += " "
74
+ elif type_cur != type_prev: buf += " "
75
+ buf += ch
76
+ type_prev = type_cur
77
+ return buf.split()
78
+
79
+
80
+
81
+ def syllable(text,blank=False):
82
+ """
83
+ 음절 토크나이저
84
+
85
+ 음절단위로 tokenizing
86
+
87
+ 박찬양
88
+ """
89
+ emjeol_list = list()
90
+ for emjeol in text:
91
+
92
+ if blank and (emjeol not in ['\n']):
93
+ emjeol_list.append(emjeol)
94
+
95
+ elif emjeol not in [' ', '\n']:
96
+ emjeol_list.append(emjeol)
97
+
98
+ return emjeol_list
99
+
100
+
101
+ def sentence(text):
102
+ """
103
+ 문장 토크나이저
104
+
105
+ 문장단위로 tokenizing
106
+ """
107
+ txt=text.replace("\n"," ")
108
+ p=re.compile(r'(?<!\w\.\w.)(?<=\.|\?|\!)\s').split(txt)
109
+ result=[]
110
+ for tmp in p:
111
+ if (tmp == ' ' or tmp== ''):
112
+ continue
113
+ else:result.append(tmp.strip(" "))
114
+
115
+ return result
nltkor/trans.py ADDED
@@ -0,0 +1,121 @@
1
+ import requests
2
+ import json
3
+ from datetime import datetime
4
+ import hmac
5
+ import base64
6
+ import uuid
7
+ import time
8
+ import re
9
+ from bs4 import BeautifulSoup as bs
10
+
11
+
12
+ class papago:
13
+
14
+ def __init__(self):
15
+
16
+
17
+ response=requests.get('https://papago.naver.com')
18
+ html=bs(response.text,'html.parser')
19
+ pattern1=r'/vendors~main.*chunk.js'
20
+
21
+ for tmp in html.find_all('script'):
22
+ tmp=str(tmp)
23
+ m=re.search(pattern1,tmp)
24
+ if m is not None:
25
+ a=m.group()
26
+
27
+ js_url='https://papago.naver.com'+str(a)
28
+ rest=requests.get(js_url)
29
+ org=rest.text
30
+ pattern2=r'AUTH_KEY:[\s]*"[\w.]+"'
31
+ self.match=str(re.findall(pattern2,org)).split('"')[1]
32
+
33
+ # headers 보안키 생성
34
+ def hmac_md5(self,key, s):
35
+ return base64.b64encode(hmac.new(key.encode('utf-8'), s.encode('utf-8'), 'MD5').digest()).decode()
36
+
37
+
38
+ def translate(self,data,source,target):
39
+
40
+ url = 'https://papago.naver.com/apis/n2mt/translate'
41
+ AUTH_KEY = self.match
42
+
43
+ dt = datetime.now()
44
+ timestamp = str(round(dt.timestamp()*1000))
45
+
46
+ # 고정 값을 사용할 시 서버로 부터 차단을 방지
47
+ deviceId = str(uuid.uuid4())
48
+
49
+ headers = {
50
+ 'authorization': 'PPG ' + deviceId + ':' + self.hmac_md5(AUTH_KEY, deviceId + '\n' + url + '\n' + timestamp),
51
+ 'timestamp': timestamp
52
+ }
53
+
54
+ form_data = {
55
+ 'deviceId': deviceId,
56
+ 'locale': 'ko',
57
+ 'dict': 'true',
58
+ 'dictDisplay': 30,
59
+ 'honorific': 'false',
60
+ 'instant': 'false',
61
+ 'paging': 'false',
62
+ 'source': source,
63
+ 'target': target,
64
+ 'text': data
65
+ }
66
+
67
+ res_data = requests.post(url, data=form_data, headers=headers)
68
+
69
+ #papago 번역 결과물 전체 확인
70
+ #print("\n\n\n",res_data.json())
71
+
72
+ return res_data.json()['translatedText']
73
+
74
+
75
+ def e2k(self,sent_list):
76
+
77
+ patient = 0
78
+ return_list=[]
79
+
80
+ for line in sent_list:
81
+ line = line.strip()
82
+ try:
83
+ text = self.translate(line,'en','ko') ## translatin
84
+ except (KeyError,requests.exceptions.ConnectionError) as e:
85
+ if patient > 5:
86
+ ofp.close()
87
+ exit() ## Error가 5번 이상 누적되면 종료
88
+ patient += 1
89
+ time.sleep(30) ## 에러 발생 시 1시간 대기
90
+ continue
91
+
92
+ return_list.append(text)
93
+
94
+ #print(json.dumps(result, ensure_ascii=False), flush=True, file=ofp) ## json line 형식으로 저장
95
+ return return_list
96
+
97
+
98
+ def k2e(self,sent_list):
99
+
100
+ patient = 0
101
+ return_list=[]
102
+
103
+ for line in sent_list:
104
+ line = line.strip()
105
+ try:
106
+ text = self.translate(line,'ko','en') ## translatin
107
+ except (KeyError,requests.exceptions.ConnectionError) as e:
108
+ if patient > 5:
109
+ ofp.close()
110
+ exit() ## Error가 5번 이상 누적되면 종료
111
+ patient += 1
112
+ time.sleep(30) ## 에러 발생 시 1시간 대기
113
+ continue
114
+
115
+ return_list.append(text)
116
+
117
+ #print(json.dumps(result, ensure_ascii=False), flush=True, file=ofp) ## json line 형식으로 저장
118
+ return return_list
119
+
120
+
121
+