pythainlp 5.0.0.dev2__tar.gz → 5.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (228) hide show
  1. {pythainlp-5.0.0.dev2/pythainlp.egg-info → pythainlp-5.0.2}/PKG-INFO +3 -3
  2. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/README.md +29 -39
  3. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/README_TH.md +14 -20
  4. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pyproject.toml +3 -0
  5. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/__init__.py +2 -2
  6. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/__main__.py +1 -1
  7. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/ancient/__init__.py +1 -1
  8. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/ancient/aksonhan.py +1 -1
  9. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/augment/__init__.py +1 -1
  10. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/augment/lm/__init__.py +1 -1
  11. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/augment/lm/fasttext.py +1 -1
  12. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/augment/lm/phayathaibert.py +1 -1
  13. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/augment/lm/wangchanberta.py +1 -1
  14. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/augment/word2vec/__init__.py +1 -1
  15. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/augment/word2vec/bpemb_wv.py +1 -1
  16. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/augment/word2vec/core.py +1 -1
  17. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/augment/word2vec/ltw2v.py +1 -1
  18. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/augment/word2vec/thai2fit.py +1 -1
  19. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/augment/wordnet.py +1 -1
  20. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/benchmarks/__init__.py +1 -1
  21. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/benchmarks/word_tokenization.py +1 -1
  22. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/chat/__init__.py +1 -1
  23. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/chat/core.py +1 -1
  24. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/classify/__init__.py +1 -1
  25. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/classify/param_free.py +1 -1
  26. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/cli/__init__.py +1 -1
  27. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/cli/benchmark.py +1 -1
  28. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/cli/data.py +1 -1
  29. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/cli/soundex.py +1 -1
  30. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/cli/tag.py +1 -1
  31. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/cli/tokenize.py +1 -1
  32. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/cls/__init__.py +1 -1
  33. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/coref/__init__.py +1 -1
  34. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/coref/_fastcoref.py +1 -1
  35. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/coref/core.py +1 -1
  36. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/coref/han_coref.py +1 -1
  37. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/corpus/__init__.py +1 -1
  38. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/corpus/common.py +1 -1
  39. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/corpus/conceptnet.py +1 -1
  40. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/corpus/core.py +1 -1
  41. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/corpus/icu.py +1 -1
  42. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/corpus/oscar.py +1 -1
  43. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/corpus/th_en_translit.py +1 -1
  44. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/corpus/tnc.py +1 -1
  45. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/corpus/ttc.py +1 -1
  46. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/corpus/util.py +1 -1
  47. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/corpus/volubilis.py +1 -1
  48. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/corpus/wikipedia.py +1 -1
  49. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/corpus/wordnet.py +1 -1
  50. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/el/__init__.py +1 -1
  51. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/el/_multiel.py +1 -1
  52. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/el/core.py +1 -1
  53. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/generate/__init__.py +1 -1
  54. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/generate/core.py +1 -1
  55. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/generate/thai2fit.py +1 -1
  56. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/generate/wangchanglm.py +1 -1
  57. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/khavee/__init__.py +1 -1
  58. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/khavee/core.py +1 -1
  59. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/khavee/example.py +1 -1
  60. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/morpheme/__init__.py +1 -1
  61. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/morpheme/thaiwordcheck.py +1 -1
  62. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/morpheme/word_formation.py +3 -1
  63. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/parse/__init__.py +1 -1
  64. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/parse/core.py +1 -1
  65. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/phayathaibert/__init__.py +1 -1
  66. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/phayathaibert/core.py +1 -1
  67. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/soundex/__init__.py +1 -1
  68. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/soundex/core.py +1 -1
  69. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/soundex/lk82.py +1 -1
  70. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/soundex/metasound.py +1 -1
  71. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/soundex/prayut_and_somchaip.py +1 -1
  72. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/soundex/sound.py +1 -1
  73. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/soundex/udom83.py +1 -1
  74. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/spell/__init__.py +1 -1
  75. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/spell/core.py +1 -1
  76. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/spell/phunspell.py +1 -1
  77. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/spell/symspellpy.py +1 -1
  78. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/spell/tltk.py +1 -1
  79. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/spell/wanchanberta_thai_grammarly.py +1 -1
  80. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/summarize/__init__.py +1 -1
  81. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/summarize/core.py +1 -1
  82. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/summarize/freq.py +1 -1
  83. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/summarize/keybert.py +1 -1
  84. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/summarize/mt5.py +1 -1
  85. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tag/__init__.py +1 -1
  86. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tag/_tag_perceptron.py +1 -1
  87. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tag/blackboard.py +1 -1
  88. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tag/chunk.py +1 -1
  89. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tag/crfchunk.py +1 -1
  90. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tag/locations.py +1 -1
  91. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tag/named_entity.py +1 -1
  92. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tag/orchid.py +1 -1
  93. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tag/perceptron.py +1 -1
  94. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tag/pos_tag.py +1 -1
  95. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tag/thai_nner.py +1 -1
  96. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tag/thainer.py +1 -1
  97. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tag/tltk.py +1 -1
  98. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tag/unigram.py +1 -1
  99. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tag/wangchanberta_onnx.py +1 -1
  100. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tokenize/__init__.py +1 -1
  101. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tokenize/_utils.py +1 -1
  102. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tokenize/attacut.py +1 -1
  103. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tokenize/core.py +1 -1
  104. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tokenize/crfcls.py +1 -1
  105. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tokenize/crfcut.py +11 -2
  106. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tokenize/deepcut.py +1 -1
  107. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tokenize/etcc.py +1 -1
  108. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tokenize/han_solo.py +1 -1
  109. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tokenize/longest.py +1 -1
  110. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tokenize/multi_cut.py +1 -1
  111. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tokenize/nercut.py +1 -1
  112. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tokenize/newmm.py +1 -1
  113. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tokenize/nlpo3.py +1 -1
  114. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tokenize/oskut.py +1 -1
  115. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tokenize/pyicu.py +1 -1
  116. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tokenize/sefr_cut.py +1 -1
  117. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tokenize/ssg.py +1 -1
  118. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tokenize/tcc.py +1 -1
  119. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tokenize/tcc_p.py +1 -1
  120. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tokenize/thaisumcut.py +1 -1
  121. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tokenize/tltk.py +1 -1
  122. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tokenize/wtsplit.py +1 -1
  123. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tools/__init__.py +1 -1
  124. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tools/misspell.py +1 -1
  125. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/tools/path.py +1 -1
  126. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/translate/__init__.py +1 -1
  127. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/translate/core.py +1 -1
  128. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/translate/en_th.py +1 -1
  129. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/translate/th_fr.py +1 -1
  130. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/translate/zh_th.py +1 -1
  131. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/transliterate/__init__.py +1 -1
  132. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/transliterate/core.py +1 -1
  133. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/transliterate/ipa.py +1 -1
  134. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/transliterate/iso_11940.py +1 -1
  135. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/transliterate/lookup.py +1 -1
  136. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/transliterate/pyicu.py +1 -1
  137. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/transliterate/royin.py +1 -1
  138. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/transliterate/spoonerism.py +1 -1
  139. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/transliterate/thai2rom.py +1 -1
  140. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/transliterate/thai2rom_onnx.py +1 -1
  141. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/transliterate/thaig2p.py +1 -1
  142. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/transliterate/tltk.py +1 -1
  143. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/transliterate/w2p.py +1 -1
  144. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/transliterate/wunsen.py +1 -1
  145. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/ulmfit/__init__.py +1 -1
  146. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/ulmfit/core.py +1 -1
  147. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/ulmfit/preprocess.py +1 -1
  148. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/ulmfit/tokenizer.py +1 -1
  149. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/util/__init__.py +1 -1
  150. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/util/abbreviation.py +1 -1
  151. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/util/collate.py +1 -1
  152. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/util/date.py +1 -1
  153. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/util/digitconv.py +1 -1
  154. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/util/emojiconv.py +1 -1
  155. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/util/encoding.py +1 -1
  156. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/util/keyboard.py +1 -1
  157. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/util/keywords.py +1 -1
  158. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/util/morse.py +1 -1
  159. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/util/normalize.py +1 -1
  160. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/util/numtoword.py +1 -1
  161. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/util/phoneme.py +1 -1
  162. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/util/pronounce.py +7 -4
  163. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/util/remove_trailing_repeat_consonants.py +1 -1
  164. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/util/spell_words.py +1 -1
  165. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/util/strftime.py +1 -1
  166. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/util/syllable.py +1 -1
  167. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/util/thai.py +1 -1
  168. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/util/thaiwordcheck.py +1 -1
  169. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/util/time.py +1 -1
  170. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/util/trie.py +1 -1
  171. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/util/wordtonum.py +1 -1
  172. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/wangchanberta/__init__.py +1 -1
  173. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/wangchanberta/core.py +1 -1
  174. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/word_vector/__init__.py +1 -1
  175. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/word_vector/core.py +1 -1
  176. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/wsd/__init__.py +1 -1
  177. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/wsd/core.py +1 -1
  178. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2/pythainlp.egg-info}/PKG-INFO +3 -3
  179. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/setup.cfg +1 -1
  180. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/setup.py +4 -4
  181. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/tests/__init__.py +1 -1
  182. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/tests/test_ancient.py +1 -1
  183. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/tests/test_augment.py +1 -1
  184. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/tests/test_benchmarks.py +1 -1
  185. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/tests/test_classify.py +1 -1
  186. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/tests/test_cli.py +1 -1
  187. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/tests/test_coref.py +1 -1
  188. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/tests/test_corpus.py +1 -1
  189. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/tests/test_el.py +1 -1
  190. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/tests/test_generate.py +3 -1
  191. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/tests/test_khavee.py +1 -1
  192. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/tests/test_misspell.py +1 -1
  193. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/tests/test_morpheme.py +1 -1
  194. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/tests/test_parse.py +1 -1
  195. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/tests/test_soundex.py +1 -1
  196. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/tests/test_spell.py +1 -1
  197. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/tests/test_summarize.py +1 -1
  198. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/tests/test_tag.py +1 -1
  199. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/tests/test_tokenize.py +1 -1
  200. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/tests/test_tools.py +1 -1
  201. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/tests/test_util.py +1 -1
  202. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/tests/test_wsd.py +1 -1
  203. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/CONTRIBUTING.md +0 -0
  204. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/LICENSE +0 -0
  205. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/MANIFEST.in +0 -0
  206. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/parse/esupar_engine.py +0 -0
  207. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/parse/spacy_thai_engine.py +0 -0
  208. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/parse/transformers_ud.py +0 -0
  209. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/parse/ud_goeswith.py +0 -0
  210. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/spell/pn.py +0 -0
  211. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/translate/small100.py +0 -0
  212. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp/translate/tokenization_small100.py +0 -0
  213. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp.egg-info/SOURCES.txt +0 -0
  214. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp.egg-info/dependency_links.txt +0 -0
  215. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp.egg-info/entry_points.txt +0 -0
  216. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp.egg-info/not-zip-safe +0 -0
  217. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp.egg-info/requires.txt +1 -1
  218. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/pythainlp.egg-info/top_level.txt +0 -0
  219. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/tests/data/eval-details-input.json +0 -0
  220. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/tests/data/eval-input.yml +0 -0
  221. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/tests/data/input.txt +0 -0
  222. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/tests/data/sentences.yml +0 -0
  223. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/tests/data/test.txt +0 -0
  224. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/tests/test_translate.py +0 -0
  225. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/tests/test_transliterate.py +0 -0
  226. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/tests/test_ulmfit.py +0 -0
  227. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/tests/test_wangchanberta.py +0 -0
  228. {pythainlp-5.0.0.dev2 → pythainlp-5.0.2}/tests/test_word_vector.py +0 -0
@@ -1,12 +1,12 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pythainlp
3
- Version: 5.0.0.dev2
3
+ Version: 5.0.2
4
4
  Summary: Thai Natural Language Processing library
5
5
  Home-page: https://github.com/PyThaiNLP/pythainlp
6
6
  Author: PyThaiNLP
7
7
  Author-email: email@wannaphong.com
8
8
  License: Apache Software License 2.0
9
- Project-URL: Documentation, https://pythainlp.github.io/docs/4.0/
9
+ Project-URL: Documentation, https://pythainlp.github.io/docs/5.0/
10
10
  Project-URL: Tutorials, https://pythainlp.github.io/tutorials/
11
11
  Project-URL: Source Code, https://github.com/PyThaiNLP/pythainlp
12
12
  Project-URL: Bug Tracker, https://github.com/PyThaiNLP/pythainlp/issues
@@ -39,7 +39,7 @@ Description:
39
39
 
40
40
  Keywords: pythainlp,NLP,natural language processing,text analytics,text processing,localization,computational linguistics,ThaiNLP,Thai NLP,Thai language
41
41
  Platform: UNKNOWN
42
- Classifier: Development Status :: 4 - Beta
42
+ Classifier: Development Status :: 5 - Production/Stable
43
43
  Classifier: Programming Language :: Python :: 3
44
44
  Classifier: Intended Audience :: Developers
45
45
  Classifier: License :: OSI Approved :: Apache Software License
@@ -1,31 +1,30 @@
1
1
  <div align="center">
2
2
  <img src="https://avatars0.githubusercontent.com/u/32934255?s=200&v=4"/>
3
3
  <h1>PyThaiNLP: Thai Natural Language Processing in Python</h1>
4
+ <a href="https://www.repostatus.org/#active"><img alt="Project Status: Active – The project has reached a stable, usable state and is being actively developed." src="https://www.repostatus.org/badges/latest/active.svg"/></a>
4
5
  <a href="https://pypi.python.org/pypi/pythainlp"><img alt="pypi" src="https://img.shields.io/pypi/v/pythainlp.svg"/></a>
5
- <a href="https://www.python.org/downloads/release/python-370/"><img alt="Python 3.7" src="https://img.shields.io/badge/python-3.7-blue.svg"/></a>
6
+ <a href="https://www.python.org/downloads/release/python-370/"><img alt="Python 3.8" src="https://img.shields.io/badge/python-3.8-blue.svg"/></a>
6
7
  <a href="https://opensource.org/licenses/Apache-2.0"><img alt="License" src="https://img.shields.io/badge/License-Apache%202.0-blue.svg"/></a>
7
- <a href="https://pepy.tech/project/pythainlp"><img alt="Download" src="https://pepy.tech/badge/pythainlp/month"/></a>
8
8
  <a href="https://github.com/PyThaiNLP/pythainlp/actions/workflows/test.ymlp"><img alt="Unit test and code coverage" src="https://github.com/PyThaiNLP/pythainlp/actions/workflows/test.yml/badge.svg"/></a>
9
- <a href="https://coveralls.io/github/PyThaiNLP/pythainlp?branch=dev"><img alt="Coverage Status" src="https://coveralls.io/repos/github/PyThaiNLP/pythainlp/badge.svg?branch=dev"/></a>
10
9
  <a href="https://www.codacy.com/gh/PyThaiNLP/pythainlp/dashboard?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=PyThaiNLP/pythainlp&amp;utm_campaign=Badge_Grade"><img src="https://app.codacy.com/project/badge/Grade/5821a0de122041c79999bbb280230ffb"/></a>
10
+ <a href="https://coveralls.io/github/PyThaiNLP/pythainlp?branch=dev"><img alt="Coverage Status" src="https://coveralls.io/repos/github/PyThaiNLP/pythainlp/badge.svg?branch=dev"/></a>
11
11
  <a href="https://colab.research.google.com/github/PyThaiNLP/tutorials/blob/master/source/notebooks/pythainlp_get_started.ipynb"><img alt="Google Colab Badge" src="https://badgen.net/badge/Launch%20Quick%20Start%20Guide/on%20Google%20Colab/blue?icon=terminal"/></a>
12
12
  <a href="https://zenodo.org/badge/latestdoi/61813823"><img alt="DOI" src="https://zenodo.org/badge/61813823.svg"/></a>
13
13
  <a href="https://matrix.to/#/#thainlp:matrix.org" rel="noopener" target="_blank"><img src="https://matrix.to/img/matrix-badge.svg" alt="Chat on Matrix"></a>
14
14
  </div>
15
15
 
16
- PyThaiNLP is a Python package for text processing and linguistic analysis, similar to [NLTK](https://www.nltk.org/) with a focus on the Thai language.
16
+ PyThaiNLP is a Python package for text processing and linguistic analysis, similar to [NLTK](https://www.nltk.org/) with a focus on Thai language.
17
17
 
18
18
  PyThaiNLP เป็นไลบารีภาษาไพทอนสำหรับประมวลผลภาษาธรรมชาติ คล้ายกับ NLTK โดยเน้นภาษาไทย [ดูรายละเอียดภาษาไทยได้ที่ README_TH.MD](https://github.com/PyThaiNLP/pythainlp/blob/dev/README_TH.md)
19
19
 
20
- **News**
20
+ ## News
21
21
 
22
22
  > Now, You can contact with or ask any questions of the PyThaiNLP team. <a href="https://matrix.to/#/#thainlp:matrix.org" rel="noopener" target="_blank"><img src="https://matrix.to/img/matrix-badge.svg" alt="Chat on Matrix"></a>
23
23
 
24
24
  | Version | Description | Status |
25
25
  |:------:|:--:|:------:|
26
- | [4.0](https://github.com/PyThaiNLP/pythainlp/releases) | Stable | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/714) |
27
- | [`dev`](https://github.com/PyThaiNLP/pythainlp/tree/dev) | Release Candidate for 5.0 | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/788) |
28
-
26
+ | [5.0.2](https://github.com/PyThaiNLP/pythainlp/releases) | Stable | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/788) |
27
+ | [`dev`](https://github.com/PyThaiNLP/pythainlp/tree/dev) | Release Candidate for 5.1 | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/900) |
29
28
 
30
29
  ## Getting Started
31
30
 
@@ -37,24 +36,20 @@ PyThaiNLP เป็นไลบารีภาษาไพทอนสำหร
37
36
 
38
37
  ## Capabilities
39
38
 
40
- PyThaiNLP provides standard NLP functions for Thai, for example part-of-speech tagging, linguistic unit segmentation (syllable, word, or sentence). Some of these functions are also available via the command-line interface.
39
+ PyThaiNLP provides standard linguistic analysis for Thai language and standard Thai locale utility functions.
40
+ Some of these functions are also available via the command-line interface (run `thainlp` in your shell).
41
41
 
42
- <details>
43
- <summary>List of Features</summary>
42
+ Partial list of features:
44
43
 
45
44
  - Convenient character and word classes, like Thai consonants (`pythainlp.thai_consonants`), vowels (`pythainlp.thai_vowels`), digits (`pythainlp.thai_digits`), and stop words (`pythainlp.corpus.thai_stopwords`) -- comparable to constants like `string.letters`, `string.digits`, and `string.punctuation`
46
- - Thai linguistic unit segmentation/tokenization, including sentence (`sent_tokenize`), word (`word_tokenize`), and subword segmentations based on Thai Character Cluster (`subword_tokenize`)
47
- - Thai part-of-speech tagging (`pos_tag`)
48
- - Thai spelling suggestion and correction (`spell` and `correct`)
49
- - Thai transliteration (`transliterate`)
50
- - Thai soundex (`soundex`) with three engines (`lk82`, `udom83`, `metasound`)
51
- - Thai collation (sorted by dictionary order) (`collate`)
52
- - Read out number to Thai words (`bahttext`, `num_to_thaiword`)
53
- - Thai datetime formatting (`thai_strftime`)
45
+ - Linguistic unit segmentation at different levels: sentence (`sent_tokenize`), word (`word_tokenize`), and subword (`subword_tokenize`)
46
+ - Part-of-speech tagging (`pos_tag`)
47
+ - Spelling suggestion and correction (`spell` and `correct`)
48
+ - Phonetic algorithm and transliteration (`soundex` and `transliterate`)
49
+ - Collation (sorted by dictionary order) (`collate`)
50
+ - Number read out (`num_to_thaiword` and `bahttext`)
51
+ - Datetime formatting (`thai_strftime`)
54
52
  - Thai-English keyboard misswitched fix (`eng_to_thai`, `thai_to_eng`)
55
- - Command-line interface for basic functions, like tokenization and POS tagging (run `thainlp` in your shell)
56
- </details>
57
-
58
53
 
59
54
  ## Installation
60
55
 
@@ -78,23 +73,20 @@ Some functionalities, like Thai WordNet, may require extra packages. To install
78
73
  pip install pythainlp[extra1,extra2,...]
79
74
  ```
80
75
 
81
- <details>
82
- <summary>List of possible <code>extras</code></summary>
76
+ Possible `extras`:
83
77
 
84
- - `full` (install everything)
85
- - `attacut` (to support attacut, a fast and accurate tokenizer)
86
- - `benchmarks` (for [word tokenization benchmarking](tokenization-benchmark.md))
87
- - `icu` (for ICU, International Components for Unicode, support in transliteration and tokenization)
88
- - `ipa` (for IPA, International Phonetic Alphabet, support in transliteration)
89
- - `ml` (to support ULMFiT models for classification)
90
- - `thai2fit` (for Thai word vector)
91
- - `thai2rom` (for machine-learnt romanization)
92
- - `wordnet` (for Thai WordNet API)
93
- </details>
78
+ - `full` (install everything)
79
+ - `attacut` (to support attacut, a fast and accurate tokenizer)
80
+ - `benchmarks` (for [word tokenization benchmarking](tokenization-benchmark.md))
81
+ - `icu` (for ICU, International Components for Unicode, support in transliteration and tokenization)
82
+ - `ipa` (for IPA, International Phonetic Alphabet, support in transliteration)
83
+ - `ml` (to support ULMFiT models for classification)
84
+ - `thai2fit` (for Thai word vector)
85
+ - `thai2rom` (for machine-learnt romanization)
86
+ - `wordnet` (for Thai WordNet API)
94
87
 
95
88
  For dependency details, look at the `extras` variable in [`setup.py`](https://github.com/PyThaiNLP/pythainlp/blob/dev/setup.py).
96
89
 
97
-
98
90
  ## Data Directory
99
91
 
100
92
  - Some additional data, like word lists and language models, may be automatically downloaded during runtime.
@@ -102,22 +94,22 @@ For dependency details, look at the `extras` variable in [`setup.py`](https://gi
102
94
  - The data directory can be changed by specifying the environment variable `PYTHAINLP_DATA_DIR`.
103
95
  - See the data catalog (`db.json`) at https://github.com/PyThaiNLP/pythainlp-corpus
104
96
 
105
-
106
97
  ## Command-Line Interface
107
98
 
108
99
  Some of PyThaiNLP functionalities can be used via command line with the `thainlp` command.
109
100
 
110
101
  For example, to display a catalog of datasets:
102
+
111
103
  ```sh
112
104
  thainlp data catalog
113
105
  ```
114
106
 
115
107
  To show how to use:
108
+
116
109
  ```sh
117
110
  thainlp help
118
111
  ```
119
112
 
120
-
121
113
  ## Licenses
122
114
 
123
115
  | | License |
@@ -127,7 +119,6 @@ thainlp help
127
119
  | Language models created by PyThaiNLP | [Creative Commons Attribution 4.0 International Public License (CC-by)](https://creativecommons.org/licenses/by/4.0/) |
128
120
  | Other corpora and models that may be included in PyThaiNLP | See [Corpus License](https://github.com/PyThaiNLP/pythainlp/blob/dev/pythainlp/corpus/corpus_license.md) |
129
121
 
130
-
131
122
  ## Contribute to PyThaiNLP
132
123
 
133
124
  - Please fork and create a pull request :)
@@ -137,7 +128,6 @@ thainlp help
137
128
 
138
129
  You can read [INTHEWILD.md](https://github.com/PyThaiNLP/pythainlp/blob/dev/INTHEWILD.md).
139
130
 
140
-
141
131
  ## Citations
142
132
 
143
133
  If you use `PyThaiNLP` in your project or publication, please cite the library as follows:
@@ -14,18 +14,17 @@
14
14
  </div>
15
15
  PyThaiNLP เป็นไลบารีภาษาไพทอนสำหรับประมวลผลภาษาธรรมชาติ โดยเน้นภาษาไทย
16
16
 
17
- **ข่าวสาร**
17
+ ## ข่าวสาร
18
18
 
19
19
  > คุณสามารถพูดคุยหรือแชทกับทีม PyThaiNLP หรือผู้สนับสนุนคนอื่น ๆ ได้ที่ <a href="https://matrix.to/#/#thainlp:matrix.org" rel="noopener" target="_blank"><img src="https://matrix.to/img/matrix-badge.svg" alt="Chat on Matrix"></a>
20
20
 
21
21
  | รุ่น | คำอธิบาย | สถานะ |
22
22
  |:------:|:--:|:------:|
23
- | [4.0](https://github.com/PyThaiNLP/pythainlp/releases) | Stable | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/714) |
24
- | [`dev`](https://github.com/PyThaiNLP/pythainlp/tree/dev) | Release Candidate for 5.0 | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/788) |
23
+ | [5.0.2](https://github.com/PyThaiNLP/pythainlp/releases) | Stable | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/788) |
24
+ | [`dev`](https://github.com/PyThaiNLP/pythainlp/tree/dev) | Release Candidate for 5.1 | [Change Log](https://github.com/PyThaiNLP/pythainlp/issues/900) |
25
25
 
26
26
  ติดตามพวกเราบน [PyThaiNLP Facebook page](https://www.facebook.com/pythainlp/) เพื่อรับข่าวสารเพิ่มเติม
27
27
 
28
-
29
28
  ## เริ่มต้นกับ PyThaiNLP
30
29
 
31
30
  พวกเราได้จัดทำ [PyThaiNLP Get Started Tutorial](https://pythainlp.github.io/tutorials/notebooks/pythainlp_get_started.html) สำหรับสำรวจความสามารถของ PyThaiNLP; พวกเรามีเอกสารสอนใช้งาน สามารถศึกษาได้ที่ [หน้า tutorial](https://pythainlp.github.io/tutorials).
@@ -34,7 +33,6 @@ PyThaiNLP เป็นไลบารีภาษาไพทอนสำหร
34
33
 
35
34
  พวกเราพยายามทำให้โมดูลใช้งานได้ง่ายที่สุดเท่าที่จะเป็นไปได้; ตัวอย่างเช่น บางชุดข้อมูล (เช่น รายการคำและตัวแบบภาษา) จะถูกดาวน์โหลดอัตโนมัติเมื่อมีการเรียกใช้งาน โดย PyThaiNLP จะจัดเก็บข้อมูลเหล่านั้นไว้ในโฟลเดอร์ `~/pythainlp-data` เป็นค่าเริ่มต้น แต่ผู้ใช้งานสามารถระบุตำแหน่งที่ต้องการได้เองผ่านค่า environment variable `PYTHAINLP_DATA_DIR` อ่านรายละเอียดคลังข้อมูลเพิ่มเติมได้ที่ [PyThaiNLP/pythainlp-corpus](https://github.com/PyThaiNLP/pythainlp-corpus).
36
35
 
37
-
38
36
  ## ความสามารถ
39
37
 
40
38
  PyThaiNLP มีความสามารถพื้นฐานสำหรับการประมวลผลภาษาไทย ตัวอย่างเช่นการกำกับหน้าที่ของคำ (part-of-speech tagging) การแบ่งหน่วยของข้อความตามหลักภาษาศาสตร์ (พยางค์ คำ และประโยค) บางความสามารถสามารถใช้งานได้ผ่านทางคอมมานด์ไลน์
@@ -84,35 +82,35 @@ pip install pythainlp[extra1,extra2,...]
84
82
  <details>
85
83
  <summary>รายการสำหรับติดตั้งผ่าน <code>extras</code></summary>
86
84
 
87
- - `full` (ติดตั้งทุกอย่าง)
88
- - `attacut` (เพื่อสนับสนุน attacut ซึ่งเป็นตัวตัดคำที่ทำงานได้รวดเร็วและมีประสิทธิภาพ)
89
- - `benchmarks` (สำหรับ [word tokenization benchmarking](tokenization-benchmark.md))
90
- - `icu` (สำหรับการรองรับ ICU หรือ International Components for Unicode ในการถอดเสียงเป็นอักษรและการตัดแบ่งคำ)
91
- - `ipa` (สำหรับการรองรับ IPA หรือ International Phonetic Alphabet ในการถอดเสียงเป็นอักษร)
92
- - `ml` (เพื่อให้สนับสนุนตัวแบบภาษา ULMFiT สำหรับการจำแนกข้อความ)
93
- - `thai2fit` (สำหรับ Thai word vector)
94
- - `thai2rom` (สำหรับการถอดอักษรไทยเป็นอักษรโรมัน)
95
- - `wordnet` (สำหรับ Thai WordNet API)
85
+ - `full` (ติดตั้งทุกอย่าง)
86
+ - `attacut` (เพื่อสนับสนุน attacut ซึ่งเป็นตัวตัดคำที่ทำงานได้รวดเร็วและมีประสิทธิภาพ)
87
+ - `benchmarks` (สำหรับ [word tokenization benchmarking](tokenization-benchmark.md))
88
+ - `icu` (สำหรับการรองรับ ICU หรือ International Components for Unicode ในการถอดเสียงเป็นอักษรและการตัดแบ่งคำ)
89
+ - `ipa` (สำหรับการรองรับ IPA หรือ International Phonetic Alphabet ในการถอดเสียงเป็นอักษร)
90
+ - `ml` (เพื่อให้สนับสนุนตัวแบบภาษา ULMFiT สำหรับการจำแนกข้อความ)
91
+ - `thai2fit` (สำหรับ Thai word vector)
92
+ - `thai2rom` (สำหรับการถอดอักษรไทยเป็นอักษรโรมัน)
93
+ - `wordnet` (สำหรับ Thai WordNet API)
96
94
  </details>
97
95
 
98
96
  สำหรับโมดูลที่ต้องการ สามารถดูรายละเอียดได้ที่ตัวแปร `extras` ใน [`setup.py`](https://github.com/PyThaiNLP/pythainlp/blob/dev/setup.py).
99
97
 
100
-
101
98
  ## Command-line
102
99
 
103
100
  บางความสามารถของ PyThaiNLP สามารถใช้งานผ่าน command line ได้โดยใช้ `thainlp`
104
101
 
105
102
  ตัวอย่าง, แสดงรายละเอียดของชุดข้อมูล:
103
+
106
104
  ```sh
107
105
  thainlp data catalog
108
106
  ```
109
107
 
110
108
  แสดงวิธีใช้งาน:
109
+
111
110
  ```sh
112
111
  thainlp help
113
112
  ```
114
113
 
115
-
116
114
  ## ผู้ใช้งาน Python 2
117
115
 
118
116
  - PyThaiNLP 2 สนับสนุน Python 3.6 ขึ้นไป บางความสามารถ สามารถใช้งานกับ Python 3 รุ่นก่อนหน้าได้ แต่ไม่ได้มีการทดสอบว่าใช้งานได้หรือไม่ อ่านเพิ่มเติม [1.7 -> 2.0 change log](https://github.com/PyThaiNLP/pythainlp/issues/118).
@@ -120,7 +118,6 @@ thainlp help
120
118
  - [Upgrade ThaiNER from 1.7](https://github.com/PyThaiNLP/pythainlp/wiki/Upgrade-ThaiNER-from-PyThaiNLP-1.7-to-PyThaiNLP-2.0)
121
119
  - ผู้ใช้งาน Python 2.7 สามารถใช้งาน PyThaiNLP 1.6
122
120
 
123
-
124
121
  ## การอ้างอิง
125
122
 
126
123
  หากคุณใช้ซอฟต์แวร์ `PyThaiNLP` ในโครงงานหรืองานวิจัยของคุณ คุณสามารถอ้างอิงได้ตามนี้
@@ -184,7 +181,6 @@ Wannaphong Phatthiyaphaibun, Korakot Chaovavanich, Charin Polpanumas, Arthit Sur
184
181
 
185
182
  คุณสามารถอ่านได้ที่ [INTHEWILD.md](https://github.com/PyThaiNLP/pythainlp/blob/dev/INTHEWILD.md)
186
183
 
187
-
188
184
  ## สัญญาอนุญาต
189
185
 
190
186
  | | สัญญาอนุญาต |
@@ -194,12 +190,10 @@ Wannaphong Phatthiyaphaibun, Korakot Chaovavanich, Charin Polpanumas, Arthit Sur
194
190
  | Language models created by PyThaiNLP | [Creative Commons Attribution 4.0 International Public License (CC-by)](https://creativecommons.org/licenses/by/4.0/) |
195
191
  | สำหรับฐานข้อมูลภาษาและโมเดลอื่นที่อาจมาพร้อมกับซอฟต์แวร์ PyThaiNLP | ดู [Corpus License](https://github.com/PyThaiNLP/pythainlp/blob/dev/pythainlp/corpus/corpus_license.md) |
196
192
 
197
-
198
193
  ## บัตรโมเดล
199
194
 
200
195
  สำหรับรายละเอียดทางเทคนิค ข้อควรระวัง และข้อคำนึงทางจริยธรรมของตัวแบบ (โมเดล) ที่ใช้ใน PyThaiNLP กรุณาดูที่ [Model cards](https://github.com/PyThaiNLP/pythainlp/wiki/Model-Cards)
201
196
 
202
-
203
197
  ## ผู้สนับสนุน
204
198
 
205
199
  [![VISTEC-depa Thailand Artificial Intelligence Research Institute](https://airesearch.in.th/assets/img/logo/airesearch-logo.svg)](https://airesearch.in.th/)
@@ -1,3 +1,6 @@
1
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
1
4
  [tool.ruff]
2
5
  line-length = 79
3
6
  indent-width = 4
@@ -1,7 +1,7 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
- __version__ = "5.0.0dev2"
4
+ __version__ = "5.0.2"
5
5
 
6
6
  thai_consonants = "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ" # 44 chars
7
7
 
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  import argparse
5
5
  import sys
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  """
5
5
  Ancient versions of the Thai language
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  from pythainlp.util import Trie
5
5
  from pythainlp import thai_consonants, thai_tonemarks
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  """
5
5
  Thai text augment
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  """
5
5
  Language Models
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  import itertools
5
5
  from typing import List, Tuple
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
 
5
5
  from typing import List
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
 
5
5
  from typing import List
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  """
5
5
  Word2Vec
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  from typing import List, Tuple
5
5
  from pythainlp.augment.word2vec.core import Word2VecAug
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  from typing import List, Tuple
5
5
  import itertools
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  from typing import List, Tuple
5
5
  from pythainlp.augment.word2vec.core import Word2VecAug
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  from typing import List, Tuple
5
5
  from pythainlp.augment.word2vec.core import Word2VecAug
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  """
5
5
  Thank https://dev.to/ton_ami/text-data-augmentation-synonym-replacement-4h8l
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  """
5
5
  Performance benchmarking.
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
 
5
5
  import re
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  """
5
5
  pythainlp.chat
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  import torch
5
5
 
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  """
5
5
  pythainlp.classify
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
 
5
5
  import gzip
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  """Command line helpers."""
5
5
  import sys
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env python3
2
2
  # -*- coding: utf-8 -*-
3
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
3
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
4
4
  # SPDX-License-Identifier: Apache-2.0
5
5
 
6
6
  import argparse
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  """
5
5
  Command line for PyThaiNLP's dataset/corpus management.
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  """
5
5
  Command line for PyThaiNLP's soundex.
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  """
5
5
  Command line for PyThaiNLP's taggers.
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  """
5
5
  Command line for PyThaiNLP's tokenizers.
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  """
5
5
  pythainlp.cls
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  """
5
5
  PyThaiNLP Coreference Resolution
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  from typing import List
5
5
  import spacy
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  from typing import List
5
5
 
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  import spacy
5
5
  from pythainlp.coref._fastcoref import FastCoref
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  """
5
5
  Corpus related functions.
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
 
5
5
  """
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  """
5
5
  Get data from ConceptNet API at http://conceptnet.io
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  """
5
5
  Corpus related functions.
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  """
5
5
  Provides an optional word list from International Components for Unicode (ICU) dictionary.
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  """
5
5
  Thai unigram word frequency from OSCAR Corpus (words tokenized using ICU)
@@ -1,5 +1,5 @@
1
1
  # -*- coding: utf-8 -*-
2
- # SPDX-FileCopyrightText: Copyright 2016-2024 PyThaiNLP Project
2
+ # SPDX-FileCopyrightText: 2016-2024 PyThaiNLP Project
3
3
  # SPDX-License-Identifier: Apache-2.0
4
4
  """
5
5
  Thai-English Transliteration Dictionary v1.4