nlpertools 1.0.5__py3-none-any.whl → 1.0.8__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- nlpertools/__init__.py +23 -20
- nlpertools/algo/ac.py +18 -0
- nlpertools/algo/bit_ops.py +28 -0
- nlpertools/algo/kmp.py +94 -55
- nlpertools/algo/num_ops.py +12 -0
- nlpertools/algo/template.py +116 -0
- nlpertools/algo/union.py +13 -0
- nlpertools/cli.py +87 -0
- nlpertools/data_client.py +426 -257
- nlpertools/data_structure/base_structure.py +109 -13
- nlpertools/dataprocess.py +627 -3
- nlpertools/default_db_config.yml +41 -0
- nlpertools/draw/__init__.py +0 -0
- nlpertools/draw/draw.py +83 -0
- nlpertools/draw/math_func.py +33 -0
- nlpertools/get_2fa.py +0 -0
- nlpertools/io/__init__.py +3 -3
- nlpertools/io/dir.py +86 -36
- nlpertools/io/file.py +283 -222
- nlpertools/ml.py +511 -460
- nlpertools/monitor/__init__.py +0 -0
- nlpertools/monitor/gpu.py +18 -0
- nlpertools/monitor/memory.py +24 -0
- nlpertools/movie.py +36 -0
- nlpertools/nlpertools_config.yml +1 -0
- nlpertools/{openApi.py → open_api.py} +65 -65
- nlpertools/other.py +475 -249
- nlpertools/pic.py +288 -0
- nlpertools/plugin.py +43 -43
- nlpertools/reminder.py +98 -87
- nlpertools/utils/__init__.py +3 -3
- nlpertools/utils/lazy.py +727 -0
- nlpertools/utils/log_util.py +20 -0
- nlpertools/utils/package.py +89 -76
- nlpertools/utils/package_v1.py +94 -0
- nlpertools/utils/package_v2.py +117 -0
- nlpertools/utils_for_nlpertools.py +93 -93
- nlpertools/vector_index_demo.py +108 -0
- nlpertools/wrapper.py +161 -96
- {nlpertools-1.0.5.dist-info → nlpertools-1.0.8.dist-info}/LICENSE +200 -200
- nlpertools-1.0.8.dist-info/METADATA +132 -0
- nlpertools-1.0.8.dist-info/RECORD +49 -0
- {nlpertools-1.0.5.dist-info → nlpertools-1.0.8.dist-info}/WHEEL +1 -1
- nlpertools-1.0.8.dist-info/entry_points.txt +2 -0
- nlpertools-1.0.8.dist-info/top_level.txt +2 -0
- nlpertools_helper/__init__.py +10 -0
- nlpertools-1.0.5.dist-info/METADATA +0 -85
- nlpertools-1.0.5.dist-info/RECORD +0 -25
- nlpertools-1.0.5.dist-info/top_level.txt +0 -1
@@ -0,0 +1,108 @@
|
|
1
|
+
#!/usr/bin/python3.8
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : youshu.Ji
|
4
|
+
import math
|
5
|
+
|
6
|
+
import faiss
|
7
|
+
import gensim
|
8
|
+
import numpy as np
|
9
|
+
import pandas as pd
|
10
|
+
|
11
|
+
|
12
|
+
def build_index_use(vectors):
|
13
|
+
d = len(vectors[0])
|
14
|
+
nlist = 100
|
15
|
+
quantizer = faiss.IndexFlatL2(d)
|
16
|
+
index = faiss.IndexIVFFlat(quantizer, d, nlist, faiss.METRIC_L2)
|
17
|
+
index.train(vectors)
|
18
|
+
index.add(vectors)
|
19
|
+
return index
|
20
|
+
|
21
|
+
|
22
|
+
def build_index(vectors, distances="L2", nprobe=10):
|
23
|
+
""" 建立 faiss 索引.
|
24
|
+
|
25
|
+
Args:
|
26
|
+
vectors(numpy.array): 向量矩阵,shape=(向量数, 向量维度)
|
27
|
+
distance(str): 度量距离,支持 L2、COS 和 INNER_PRODUCT.
|
28
|
+
nprobe(int): 向量搜索时需要搜索的聚类数.
|
29
|
+
|
30
|
+
Return: 返回 faiss 索引对象.
|
31
|
+
|
32
|
+
"""
|
33
|
+
metric_type = None
|
34
|
+
if distances == "L2":
|
35
|
+
metric_type = faiss.METRIC_L2
|
36
|
+
elif distances in ("COS", "INNER_PRODUCT"):
|
37
|
+
metric_type = faiss.METRIC_INNER_PRODUCT
|
38
|
+
else:
|
39
|
+
raise NotImplementedError
|
40
|
+
|
41
|
+
index_pipes = []
|
42
|
+
|
43
|
+
if distances == "COS":
|
44
|
+
index_pipes.append("L2norm")
|
45
|
+
|
46
|
+
K = 4 * math.sqrt(vectors.shape[0])
|
47
|
+
use_ivf = False
|
48
|
+
if vectors.shape[0] >= 30 * K:
|
49
|
+
index_pipes.append(f"IVF{K}")
|
50
|
+
use_ivf = True
|
51
|
+
|
52
|
+
index_pipes.append("Flat")
|
53
|
+
|
54
|
+
index = faiss.index_factory(vectors.shape[1], ",".join(index_pipes),
|
55
|
+
metric_type)
|
56
|
+
|
57
|
+
vectors = vectors.astype(np.float32)
|
58
|
+
if not index.is_trained:
|
59
|
+
index.train(vectors)
|
60
|
+
|
61
|
+
index.add(vectors)
|
62
|
+
|
63
|
+
# IVF 使用 reconstruct 时必须执行此函数
|
64
|
+
if use_ivf:
|
65
|
+
ivf_index = faiss.extract_index_ivf(index)
|
66
|
+
ivf_index.make_direct_map()
|
67
|
+
ivf_index.nprobe = nprobe
|
68
|
+
|
69
|
+
return index
|
70
|
+
|
71
|
+
|
72
|
+
def read_index_from_file(filename):
|
73
|
+
""" 从向量文件中读取 faiss 向量对象. """
|
74
|
+
return faiss.read_index(filename)
|
75
|
+
|
76
|
+
|
77
|
+
def write_index_to_file(index, filename):
|
78
|
+
""" 将 faiss 向量对象写入文件. """
|
79
|
+
faiss.write_index(index, filename)
|
80
|
+
|
81
|
+
|
82
|
+
word2vec_path = "glove_vector_path"
|
83
|
+
wv_from_text = gensim.models.KeyedVectors.load_word2vec_format(word2vec_path, binary=False, no_header=True)
|
84
|
+
vectors = wv_from_text.vectors
|
85
|
+
|
86
|
+
name_example = ["{}.jpg".format((i % 9) + 1) for i in range(len(vectors))]
|
87
|
+
df = pd.DataFrame({
|
88
|
+
"name": name_example,
|
89
|
+
# "vector": str(vectors[0]),
|
90
|
+
# "text": list(wv_from_text.key_to_index.keys()),
|
91
|
+
})
|
92
|
+
test_index = build_index_use(vectors)
|
93
|
+
write_index_to_file(test_index, "test.index")
|
94
|
+
|
95
|
+
df.to_csv("test.csv", index=False)
|
96
|
+
|
97
|
+
import gensim
|
98
|
+
import hnswlib
|
99
|
+
|
100
|
+
word2vec_path = "glove_vector_path"
|
101
|
+
wv_from_text = gensim.models.KeyedVectors.load_word2vec_format(word2vec_path, binary=False, no_header=True)
|
102
|
+
vectors = wv_from_text.vectors
|
103
|
+
|
104
|
+
labels = [idx for idx, i in enumerate(vectors)]
|
105
|
+
index = hnswlib.Index(space="l2", dim=len(vectors[0]))
|
106
|
+
index.init_index(max_elements=len(vectors), ef_construction=200, M=16)
|
107
|
+
index.add_items(vectors, labels)
|
108
|
+
index.save_index("hnswlib.index")
|
nlpertools/wrapper.py
CHANGED
@@ -1,96 +1,161 @@
|
|
1
|
-
#!/usr/bin/python3.8
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
# @Author : youshu.Ji
|
4
|
-
# 定义装饰器
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
return
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
1
|
+
#!/usr/bin/python3.8
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# @Author : youshu.Ji
|
4
|
+
# 定义装饰器
|
5
|
+
import logging
|
6
|
+
import time
|
7
|
+
from functools import wraps
|
8
|
+
import asyncio
|
9
|
+
|
10
|
+
def fn_async_timer(function):
|
11
|
+
"""
|
12
|
+
针对异步函数的装饰器
|
13
|
+
"""
|
14
|
+
@wraps(function)
|
15
|
+
async def function_timer(*args, **kwargs):
|
16
|
+
t0 = time.time()
|
17
|
+
result = await function(*args, **kwargs)
|
18
|
+
t1 = time.time()
|
19
|
+
print('[finished {func_name} in {time:.2f}s]'.format(func_name=function.__name__, time=t1 - t0))
|
20
|
+
return result
|
21
|
+
|
22
|
+
return function_timer
|
23
|
+
|
24
|
+
|
25
|
+
def fn_timer(async_func=False, analyse=False):
|
26
|
+
"""
|
27
|
+
>>> @fn_timer()
|
28
|
+
>>> def example():
|
29
|
+
>>> time.sleep(2)
|
30
|
+
:param analyse:
|
31
|
+
:return:
|
32
|
+
"""
|
33
|
+
|
34
|
+
def wrapper(func):
|
35
|
+
async def func_time_async(*args, **kwargs):
|
36
|
+
t0 = time.time()
|
37
|
+
result = await asyncio.create_task(func(*args, **kwargs))
|
38
|
+
t1 = time.time()
|
39
|
+
print('[finished {func_name} in {time:.2f}s]'.format(func_name=func.__name__, time=t1 - t0))
|
40
|
+
return result
|
41
|
+
|
42
|
+
def func_time(*args, **kwargs):
|
43
|
+
t0 = time.time()
|
44
|
+
result = func(*args, **kwargs)
|
45
|
+
t1 = time.time()
|
46
|
+
print('[finished {func_name} in {time:.2f}s]'.format(func_name=func.__name__, time=t1 - t0))
|
47
|
+
return result
|
48
|
+
|
49
|
+
def func_time_analyse(*args, **kwargs):
|
50
|
+
from pyinstrument import Profiler
|
51
|
+
|
52
|
+
profiler = Profiler()
|
53
|
+
profiler.start()
|
54
|
+
|
55
|
+
result = func(*args, **kwargs)
|
56
|
+
|
57
|
+
profiler.stop()
|
58
|
+
profiler.print()
|
59
|
+
return result
|
60
|
+
|
61
|
+
if async_func is True:
|
62
|
+
return func_time_async
|
63
|
+
else:
|
64
|
+
if analyse:
|
65
|
+
return func_time_analyse
|
66
|
+
else:
|
67
|
+
return func_time
|
68
|
+
|
69
|
+
return wrapper
|
70
|
+
|
71
|
+
|
72
|
+
def fn_timeout_checker(wait_time, callback):
|
73
|
+
"""
|
74
|
+
超时判断的装饰器
|
75
|
+
两个包,使用gevent出现bug
|
76
|
+
"""
|
77
|
+
# from gevent import Timeout
|
78
|
+
# from gevent.monkey import patch_all
|
79
|
+
|
80
|
+
# patch_all() # thread=False加了这个参数,配合flask app的threaded=True,会报错,目前还没有理解阻塞,线程之间的关系。不加即thread=True时没问题
|
81
|
+
|
82
|
+
from eventlet import Timeout
|
83
|
+
from eventlet import monkey_patch
|
84
|
+
|
85
|
+
monkey_patch(time=True)
|
86
|
+
|
87
|
+
def wrapper(func):
|
88
|
+
def inner(*args, **kwargs):
|
89
|
+
finish_flag = False
|
90
|
+
with Timeout(wait_time, False):
|
91
|
+
res = func(*args, **kwargs)
|
92
|
+
finish_flag = True
|
93
|
+
if not finish_flag:
|
94
|
+
res = callback()
|
95
|
+
return res
|
96
|
+
|
97
|
+
return inner
|
98
|
+
|
99
|
+
return wrapper
|
100
|
+
|
101
|
+
|
102
|
+
def fn_try(parameter):
|
103
|
+
"""
|
104
|
+
该函数把try...catch...封装成装饰器,
|
105
|
+
接收一个字典参数,并把其中的msg字段改为具体报错信息
|
106
|
+
:param parameter: {"msg": "", etc.}
|
107
|
+
:return: parameter: {"msg": 内容填充为具体的报错信息, etc.}
|
108
|
+
"""
|
109
|
+
|
110
|
+
def wrapper(function):
|
111
|
+
def inner(*args, **kwargs):
|
112
|
+
try:
|
113
|
+
result = function(*args, **kwargs)
|
114
|
+
return result
|
115
|
+
except Exception as e:
|
116
|
+
msg = "报错!"
|
117
|
+
print('[func_name: {func_name} {msg}]'.format(func_name=function.__name__, msg=msg))
|
118
|
+
parameter["msg"] = parameter["msg"].format(str(e))
|
119
|
+
return parameter
|
120
|
+
finally:
|
121
|
+
pass
|
122
|
+
|
123
|
+
return inner
|
124
|
+
|
125
|
+
return wrapper
|
126
|
+
|
127
|
+
|
128
|
+
def try_log(function):
|
129
|
+
@wraps(function)
|
130
|
+
def inner(*args, **kwargs):
|
131
|
+
try:
|
132
|
+
result = function(*args, **kwargs)
|
133
|
+
return result
|
134
|
+
except Exception as e:
|
135
|
+
logging.error(*args)
|
136
|
+
logging.error(e.__traceback__.tb_frame.f_globals["__file__"])
|
137
|
+
logging.error(e.__traceback__.tb_lineno)
|
138
|
+
logging.error(repr(e))
|
139
|
+
|
140
|
+
return inner
|
141
|
+
|
142
|
+
|
143
|
+
def example(function):
|
144
|
+
@wraps(function)
|
145
|
+
def function_example(*args, **kwargs):
|
146
|
+
print("此方法仅仅用于提示该方法怎么写")
|
147
|
+
result = function(*args, **kwargs)
|
148
|
+
return result
|
149
|
+
|
150
|
+
return function_example
|
151
|
+
|
152
|
+
|
153
|
+
def singleton(cls):
|
154
|
+
instances = {}
|
155
|
+
|
156
|
+
def _singleton(*args, **kwargs):
|
157
|
+
if cls not in instances:
|
158
|
+
instances[cls] = cls(*args, **kwargs)
|
159
|
+
return instances[cls]
|
160
|
+
|
161
|
+
return _singleton
|