cntext 0.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cntext/__init__.py +6 -0
- Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cntext/__pycache__/__init__.cpython-37.pyc +0 -0
- Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cntext/description/__init__.py +1 -0
- Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cntext/description/__pycache__/__init__.cpython-37.pyc +0 -0
- Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cntext/description/__pycache__/description.cpython-37.pyc +0 -0
- Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cntext/description/description.py +65 -0
- Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cntext/dictionary/__init__.py +1 -0
- Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cntext/dictionary/__pycache__/__init__.cpython-37.pyc +0 -0
- Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cntext/dictionary/__pycache__/dictionary.cpython-37.pyc +0 -0
- Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cntext/dictionary/dictionary.py +411 -0
- Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cntext/sentiment/__init__.py +1 -0
- Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cntext/sentiment/__pycache__/__init__.cpython-37.pyc +0 -0
- Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cntext/sentiment/__pycache__/sentiment.cpython-37.pyc +0 -0
- Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cntext/sentiment/sentiment.py +295 -0
- Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cntext/similarity/__init__.py +1 -0
- Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cntext/similarity/__pycache__/__init__.cpython-37.pyc +0 -0
- Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cntext/similarity/__pycache__/similarity.cpython-37.pyc +0 -0
- Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cntext/similarity/similarity.py +97 -0
- Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cntext/stats/__init__.py +1 -0
- Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cntext/stats/__pycache__/__init__.cpython-37.pyc +0 -0
- Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cntext/stats/__pycache__/stats.cpython-37.pyc +0 -0
- Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cntext/stats/stats.py +65 -0
- Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cntext/visualization/__init__.py +1 -0
- Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cntext/visualization/__pycache__/__init__.cpython-37.pyc +0 -0
- Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cntext/visualization/__pycache__/visualization.cpython-37.pyc +0 -0
- Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cntext/visualization/visualization.py +54 -0
- Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cntext-0.9-py3.7.egg-info/PKG-INFO +370 -0
- Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cntext-0.9-py3.7.egg-info/SOURCES.txt +19 -0
- Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cntext-0.9-py3.7.egg-info/dependency_links.txt +1 -0
- Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cntext-0.9-py3.7.egg-info/requires.txt +7 -0
- Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cntext-0.9-py3.7.egg-info/top_level.txt +1 -0
Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/cntext/__init__.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
from cntext.dictionary import *
|
|
2
|
+
from cntext.sentiment import senti_by_dutir, senti_by_hownet, senti_by_diydict, init_jieba
|
|
3
|
+
from cntext.similarity import similarity_score
|
|
4
|
+
from cntext.stats import term_freq, readability
|
|
5
|
+
from cntext.visualization import wordcloud, wordshiftor
|
|
6
|
+
|
|
Binary file
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from cntext.description.description import term_freq, readability
|
|
Binary file
|
|
Binary file
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
from cntext.dictionary.dictionary import ADV_words, CONJ_words, STOPWORDS_zh
|
|
2
|
+
import re
|
|
3
|
+
import jieba
|
|
4
|
+
from collections import Counter
|
|
5
|
+
import numpy as np
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def term_freq(text):
|
|
9
|
+
text = ''.join(re.findall('[\u4e00-\u9fa5]+', text))
|
|
10
|
+
words = jieba.lcut(text)
|
|
11
|
+
words = [w for w in words if w not in STOPWORDS_zh]
|
|
12
|
+
return Counter(words)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def readability(text, language='chinese'):
|
|
17
|
+
"""
|
|
18
|
+
文本可读性,指标越大,文章复杂度越高,可读性越差。
|
|
19
|
+
------------
|
|
20
|
+
【英文可读性】公式 4.71 x (characters/words) + 0.5 x (words/sentences) - 21.43;
|
|
21
|
+
【中文可读性】 参考自 【徐巍,姚振晔,陈冬华.中文年报可读性:衡量与检验[J].会计研究,2021(03):28-44.】
|
|
22
|
+
readability1 ---每个分句中的平均字数
|
|
23
|
+
readability2 ---每个句子中副词和连词所占的比例
|
|
24
|
+
readability3 ---参考Fog Index, readability3=(readability1+readability2)×0.5
|
|
25
|
+
以上三个指标越大,都说明文本的复杂程度越高,可读性越差。
|
|
26
|
+
|
|
27
|
+
"""
|
|
28
|
+
if language=='english':
|
|
29
|
+
text = text.lower()
|
|
30
|
+
num_of_characters = len(text)
|
|
31
|
+
num_of_words = len(text.split(" "))
|
|
32
|
+
num_of_sentences = len(re.split('[\.!\?\n;]+', text))
|
|
33
|
+
ari = (
|
|
34
|
+
4.71 * (num_of_characters / num_of_words)
|
|
35
|
+
+ 0.5 * (num_of_words / num_of_sentences)
|
|
36
|
+
- 21.43
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
return {"readability": ari}
|
|
40
|
+
if language=='chinese':
|
|
41
|
+
adv_conj_words = set(ADV_words+CONJ_words)
|
|
42
|
+
zi_num_per_sent = []
|
|
43
|
+
adv_conj_ratio_per_sent = []
|
|
44
|
+
sentences = re.split('[\.。!!?\?\n;;]+', text)
|
|
45
|
+
for sent in sentences:
|
|
46
|
+
adv_conj_num = 0
|
|
47
|
+
zi_num_per_sent.append(len(sent))
|
|
48
|
+
words = jieba.lcut(sent)
|
|
49
|
+
for w in words:
|
|
50
|
+
if w in adv_conj_words:
|
|
51
|
+
adv_conj_num+=1
|
|
52
|
+
adv_conj_ratio_per_sent.append(adv_conj_num/len(words))
|
|
53
|
+
readability1 = np.mean(zi_num_per_sent)
|
|
54
|
+
readability2 = np.mean(adv_conj_ratio_per_sent)
|
|
55
|
+
readability3 = (readability1+readability2)*0.5
|
|
56
|
+
return {'readability1': readability1,
|
|
57
|
+
'readability2': readability2,
|
|
58
|
+
'readability3': readability3}
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from cntext.dictionary.dictionary import *
|
|
Binary file
|