nlpertools 1.0.5__py3-none-any.whl → 1.0.6.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. nlpertools/__init__.py +24 -20
  2. nlpertools/algo/ac.py +18 -0
  3. nlpertools/algo/bit_ops.py +28 -0
  4. nlpertools/algo/kmp.py +94 -55
  5. nlpertools/algo/num_ops.py +12 -0
  6. nlpertools/algo/template.py +116 -0
  7. nlpertools/algo/union.py +13 -0
  8. nlpertools/data_client.py +387 -257
  9. nlpertools/data_structure/base_structure.py +109 -13
  10. nlpertools/dataprocess.py +611 -3
  11. nlpertools/default_db_config.yml +41 -0
  12. nlpertools/io/__init__.py +3 -3
  13. nlpertools/io/dir.py +54 -36
  14. nlpertools/io/file.py +277 -222
  15. nlpertools/ml.py +483 -460
  16. nlpertools/monitor/__init__.py +0 -0
  17. nlpertools/monitor/gpu.py +18 -0
  18. nlpertools/monitor/memory.py +24 -0
  19. nlpertools/movie.py +36 -0
  20. nlpertools/nlpertools_config.yml +1 -0
  21. nlpertools/{openApi.py → open_api.py} +65 -65
  22. nlpertools/other.py +364 -249
  23. nlpertools/pic.py +288 -0
  24. nlpertools/plugin.py +43 -43
  25. nlpertools/reminder.py +98 -87
  26. nlpertools/utils/__init__.py +3 -3
  27. nlpertools/utils/lazy.py +727 -0
  28. nlpertools/utils/log_util.py +20 -0
  29. nlpertools/utils/package.py +89 -76
  30. nlpertools/utils/package_v1.py +94 -0
  31. nlpertools/utils/package_v2.py +117 -0
  32. nlpertools/utils_for_nlpertools.py +93 -93
  33. nlpertools/vector_index_demo.py +108 -0
  34. nlpertools/wrapper.py +161 -96
  35. {nlpertools-1.0.5.dist-info → nlpertools-1.0.6.dev0.dist-info}/LICENSE +200 -200
  36. nlpertools-1.0.6.dev0.dist-info/METADATA +111 -0
  37. nlpertools-1.0.6.dev0.dist-info/RECORD +43 -0
  38. {nlpertools-1.0.5.dist-info → nlpertools-1.0.6.dev0.dist-info}/WHEEL +1 -1
  39. nlpertools-1.0.6.dev0.dist-info/top_level.txt +2 -0
  40. nlpertools_helper/__init__.py +10 -0
  41. nlpertools-1.0.5.dist-info/METADATA +0 -85
  42. nlpertools-1.0.5.dist-info/RECORD +0 -25
  43. nlpertools-1.0.5.dist-info/top_level.txt +0 -1
nlpertools/other.py CHANGED
@@ -1,249 +1,364 @@
1
- #!/usr/bin/python3.8
2
- # -*- coding: utf-8 -*-
3
- # @Author : youshu.Ji
4
- import os
5
- import re
6
- import string
7
- from concurrent.futures import ThreadPoolExecutor
8
- from functools import reduce
9
-
10
- from .io.file import writetxt_w_list, writetxt_a
11
- # import numpy as np
12
- # import psutil
13
- # import pyquery as pq
14
- # import requests
15
- # import torch
16
- # from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
17
- # from sklearn.metrics import precision_recall_fscore_support
18
- # from tqdm import tqdm
19
- # from win32evtlogutil import langid
20
- from .utils.package import *
21
-
22
- CHINESE_PUNCTUATION = list(',。;:‘’“”!?《》「」【】<>()、')
23
- ENGLISH_PUNCTUATION = list(',.;:\'"!?<>()')
24
-
25
-
26
- def seed_everything():
27
- # seed everything
28
- seed = 7777777
29
- np.random.seed(seed)
30
- torch.manual_seed(seed) # CPU随机种子确定
31
- torch.cuda.manual_seed(seed)
32
- torch.cuda.manual_seed_all(seed)
33
-
34
-
35
- def convert_np_to_py(res):
36
- np2py = {
37
- np.float64: float,
38
- np.int32: int
39
- }
40
- news_dict = {}
41
- for k, v in res.best_params_.items():
42
- if type(v) in np2py:
43
- v = np2py[type(v)](v)
44
- news_dict[k] = v
45
- return news_dict
46
-
47
-
48
- def git_push():
49
- """
50
- 针对国内提交github经常失败,自动提交
51
- """
52
- num = -1
53
- while 1:
54
- num += 1
55
- print("retry num: {}".format(num))
56
- res = os.system("git push --set-upstream origin main")
57
- print(str(res))
58
- if not str(res).startswith("fatal"):
59
- print("scucess")
60
- break
61
-
62
-
63
- def snake_to_camel(s: str) -> str:
64
- """
65
- author: u
66
- snake case 转换到 camel case.
67
- :param s: snake case variable
68
- :return:
69
- """
70
- return s.title().replace("_", "")
71
-
72
-
73
- def camel_to_snake(s: str) -> str:
74
- """
75
- camel case 转换到 snake case.
76
- :param s: camel case variable
77
- :return:
78
- """
79
- return reduce(lambda x, y: x + ('_' if y.isupper() else '') + y, s).lower()
80
-
81
-
82
- def identify_language(text):
83
- language = langid.classify(text[:200])[0]
84
- # print(language)
85
- if language == 'zh':
86
- return 'zh'
87
- elif language == 'en':
88
- return 'en'
89
- else:
90
- return 'other'
91
- # return 'en'
92
-
93
-
94
- # other ----------------------------------------------------------------------
95
- # 统计词频
96
- def calc_word_count(list_word, mode, path='tempcount.txt', sort_id=1, is_reverse=True):
97
- word_count = {}
98
- for key in list_word:
99
- if key not in word_count:
100
- word_count[key] = 1
101
- else:
102
- word_count[key] += 1
103
- word_dict_sort = sorted(word_count.items(), key=lambda x: x[sort_id], reverse=is_reverse)
104
- if mode == 'w':
105
- for key in word_dict_sort:
106
- writetxt_a(str(key[0]) + '\t' + str(key[1]) + '\n', path)
107
- elif mode == 'p':
108
- for key in word_dict_sort:
109
- print(str(key[0]) + '\t' + str(key[1]))
110
- elif mode == 'u':
111
- return word_dict_sort
112
-
113
-
114
- # 字典去重
115
- def dupl_dict(dict_list, key):
116
- new_dict_list, value_set = [], []
117
- print('去重中...')
118
- for i in tqdm(dict_list):
119
- if i[key] not in value_set:
120
- new_dict_list.append(i)
121
- value_set.append(i[key])
122
- return new_dict_list
123
-
124
-
125
- def multi_thread_run(_task, data):
126
- with ThreadPoolExecutor() as executor:
127
- result = list(tqdm(executor.map(_task, data), total=len(ata)))
128
- return result
129
-
130
-
131
- def del_special_char(sentence):
132
- special_chars = ['\ufeff', '\xa0', '\u3000', '\xa0', '\ue627']
133
- for i in special_chars:
134
- sentence = sentence.replace(i, '')
135
- return sentence
136
-
137
-
138
- def en_pun_2_zh_pun(sentence):
139
- # TODO 因为引号的问题,所以我没有写
140
- for i in ENGLISH_PUNCTUATION:
141
- pass
142
-
143
-
144
- def spider(url):
145
- """
146
-
147
- :param url:
148
- :return:
149
- """
150
- if 'baijiahao' in url:
151
- content = requests.get(url)
152
- # print(content.text)
153
- html = pq.PyQuery(content.text)
154
- title = html('.index-module_articleTitle_28fPT').text()
155
- res = html('.index-module_articleWrap_2Zphx').text().rstrip('举报/反馈')
156
- return '{}\n{}'.format(title, res)
157
-
158
-
159
- def eda(sentence):
160
- url = 'http://x.x.x.x:x/eda'
161
- json_data = dict({"sentence": sentence})
162
- res = requests.post(url, json=json_data)
163
- return res.json()['eda']
164
-
165
-
166
- def find_language(text):
167
- # TODO 替换为开源包
168
- letters = list(string.ascii_letters)
169
- if len(text) > 50:
170
- passage = text[:50]
171
- len_passage = 50
172
- else:
173
- len_passage = len(text)
174
- count = 0
175
- for c in passage:
176
- if c in letters:
177
- count += 1
178
- if count / len_passage > 0.5:
179
- return "en"
180
- else:
181
- return "not en"
182
-
183
-
184
- def print_prf(y_true, y_pred, label=None):
185
- # y_true = [0, 1, 2, 1, 1, 2, 3, 1, 1, 1]
186
- # y_pred = [0, 1, 2, 1, 1, 2, 3, 1, 1, 1]
187
- # p, r, f, s = precision_recall_fscore_support(y_true=y_true, y_pred=y_pred)
188
- # print("p\t{}".format(p))
189
- # print("r\t{}".format(r))
190
- # print("f\t{}".format(f))
191
- # print("s\t{}".format(s))
192
- result = precision_recall_fscore_support(y_true=y_true, y_pred=y_pred, labels=label)
193
-
194
- for i in range(len(label)):
195
- res = []
196
- for k in result:
197
- res.append('%.5f' % k[i])
198
- print('{}: {} {} {}'.format(label[i], *res[:3]))
199
-
200
-
201
- def print_cpu():
202
- p = psutil.Process()
203
- # pro_info = p.as_dict(attrs=['pid', 'name', 'username'])
204
- print(psutil.cpu_count())
205
-
206
-
207
- def stress_test(func, ipts):
208
- with ThreadPoolExecutor() as executor:
209
- results = list(tqdm(executor.map(func, ipts), total=len(ipts)))
210
- return results
211
-
212
-
213
- def get_substring_loc(text, subtext):
214
- res = re.finditer(
215
- subtext.replace('\\', '\\\\').replace('?', '\?').replace('(', '\(').replace(')', '\)').replace(']',
216
- '\]').replace(
217
- '[', '\[').replace('+', '\+'), text)
218
- l, r = [i for i in res][0].regs[0]
219
- return l, r
220
-
221
-
222
- def tf_idf(corpus, save_path):
223
- tfidfdict = {}
224
- vectorizer = CountVectorizer() # 该类会将文本中的词语转换为词频矩阵,矩阵元素a[i][j] 表示j词在i类文本下的词频
225
- transformer = TfidfTransformer() # 该类会统计每个词语的tf-idf权值
226
- tfidf = transformer.fit_transform(
227
- vectorizer.fit_transform(corpus)) # 第一个fit_transform是计算tf-idf,第二个fit_transform是将文本转为词频矩阵
228
- word = vectorizer.get_feature_names() # 获取词袋模型中的所有词语
229
- weight = tfidf.toarray() # 将tf-idf矩阵抽取出来,元素a[i][j]表示j词在i类文本中的tf-idf权重
230
- for i in range(len(weight)): # 打印每类文本的tf-idf词语权重,第一个for遍历所有文本,第二个for便利某一类文本下的词语权重
231
- for j in range(len(word)):
232
- getword = word[j]
233
- getvalue = weight[i][j]
234
- if getvalue != 0: # 去掉值为0的项
235
- if getword in tfidfdict: # 更新全局TFIDF值
236
- tfidfdict[getword] += float(getvalue)
237
- else:
238
- tfidfdict.update({getword: getvalue})
239
- sorted_tfidf = sorted(tfidfdict.items(), key=lambda d: d[1], reverse=True)
240
- to_write = ['{} {}'.format(i[0], i[1]) for i in sorted_tfidf]
241
- writetxt_w_list(to_write, save_path, num_lf=1)
242
-
243
- # 常用函数参考
244
- # import tensorflow as tf
245
- #
246
- # gpu_options = tf.compat.v1.GPUOptions(allow_growth=True)
247
- # sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))
248
- # for gpu in tf.config.experimental.list_physical_devices('GPU'):
249
- # tf.config.experimental.set_memory_growth()
1
+ #!/usr/bin/python3.8
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : youshu.Ji
4
+ import itertools
5
+ import os
6
+ import re
7
+ import string
8
+ from concurrent.futures import ThreadPoolExecutor
9
+ from functools import reduce
10
+ import math
11
+ import datetime
12
+ import psutil
13
+ from .io.file import writetxt_w_list, writetxt_a
14
+ # import numpy as np
15
+ # import psutil
16
+ # import pyquery as pq
17
+ # import requests
18
+ # import torch
19
+ # from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
20
+ # from sklearn.metrics import precision_recall_fscore_support
21
+ # from tqdm import tqdm
22
+ # from win32evtlogutil import langid
23
+ from .utils.package import *
24
+
25
+ CHINESE_PUNCTUATION = list(',。;:‘’“”!?《》「」【】<>()、')
26
+ ENGLISH_PUNCTUATION = list(',.;:\'"!?<>()')
27
+ OTHER_PUNCTUATION = list('!@#$%^&*')
28
+
29
+
30
+ def seed_everything():
31
+ import torch
32
+ # seed everything
33
+ seed = 7777777
34
+ np.random.seed(seed)
35
+ torch.manual_seed(seed) # CPU随机种子确定
36
+ torch.cuda.manual_seed(seed)
37
+ torch.cuda.manual_seed_all(seed)
38
+
39
+
40
+ def sent_email(mail_user, mail_pass, receiver, title, content, attach_path=None):
41
+ import smtplib
42
+ from email.mime.multipart import MIMEMultipart
43
+ from email.mime.text import MIMEText
44
+ from email.mime.application import MIMEApplication
45
+
46
+ mail_host = 'smtp.qq.com'
47
+ mail_user = mail_user
48
+ mail_pass = mail_pass
49
+ sender = mail_user
50
+
51
+ message = MIMEMultipart()
52
+ message.attach(MIMEText(content, 'plain', 'utf-8'))
53
+ if attach_path:
54
+ attachment = MIMEApplication(open(attach_path, 'rb').read())
55
+ attachment["Content-Type"] = 'application/octet-stream'
56
+ attachment.add_header('Content-Dispositon', 'attachment',
57
+ filename=('utf-8', '', attach_path)) # 注意:此处basename要转换为gbk编码,否则中文会有乱码。
58
+ message.attach(attachment)
59
+ message['Subject'] = title
60
+ message['From'] = sender
61
+ message['To'] = receiver
62
+
63
+ try:
64
+ smtp_obj = smtplib.SMTP()
65
+ smtp_obj.connect(mail_host, 25)
66
+ smtp_obj.login(mail_user, mail_pass)
67
+ smtp_obj.sendmail(sender, receiver, message.as_string())
68
+ smtp_obj.quit()
69
+ print('send email success')
70
+ except smtplib.SMTPException as e:
71
+ print('send failed', e)
72
+
73
+
74
+ def convert_np_to_py(obj):
75
+ if isinstance(obj, dict):
76
+ return {k: convert_np_to_py(v) for k, v in obj.items()}
77
+ elif isinstance(obj, list):
78
+ return [convert_np_to_py(v) for v in obj]
79
+ elif isinstance(obj, np.float64) or isinstance(obj, np.float32):
80
+ return float(obj)
81
+ else:
82
+ return obj
83
+
84
+
85
+ def git_push():
86
+ """
87
+ 针对国内提交github经常失败,自动提交
88
+ """
89
+ num = -1
90
+ while 1:
91
+ num += 1
92
+ print("retry num: {}".format(num))
93
+ info = os.system("git push --set-upstream origin main")
94
+ print(str(info))
95
+ if not str(info).startswith("fatal"):
96
+ print("scucess")
97
+ break
98
+
99
+
100
+ def snake_to_camel(s: str) -> str:
101
+ """
102
+ author: u
103
+ snake case 转换到 camel case.
104
+ :param s: snake case variable
105
+ :return:
106
+ """
107
+ return s.title().replace("_", "")
108
+
109
+
110
+ def camel_to_snake(s: str) -> str:
111
+ """
112
+ 将 camel case 转换到 snake case.
113
+ :param s: camel case variable
114
+ :return:
115
+ """
116
+ return reduce(lambda x, y: x + ('_' if y.isupper() else '') + y, s).lower()
117
+
118
+
119
+ # other ----------------------------------------------------------------------
120
+ # 统计词频
121
+ def calc_word_count(list_word, mode, path='tempcount.txt', sort_id=1, is_reverse=True):
122
+ word_count = {}
123
+ for key in list_word:
124
+ if key not in word_count:
125
+ word_count[key] = 1
126
+ else:
127
+ word_count[key] += 1
128
+ word_dict_sort = sorted(word_count.items(), key=lambda x: x[sort_id], reverse=is_reverse)
129
+ if mode == 'w':
130
+ for key in word_dict_sort:
131
+ writetxt_a(str(key[0]) + '\t' + str(key[1]) + '\n', path)
132
+ elif mode == 'p':
133
+ for key in word_dict_sort:
134
+ print(str(key[0]) + '\t' + str(key[1]))
135
+ elif mode == 'u':
136
+ return word_dict_sort
137
+
138
+
139
+ # 字典去重
140
+ def dupl_dict(dict_list, key):
141
+ new_dict_list, value_set = [], []
142
+ print('去重中...')
143
+ for i in tqdm(dict_list):
144
+ if i[key] not in value_set:
145
+ new_dict_list.append(i)
146
+ value_set.append(i[key])
147
+ return new_dict_list
148
+
149
+
150
+ def multi_thread_run(_task, data):
151
+ with ThreadPoolExecutor() as executor:
152
+ result = list(tqdm(executor.map(_task, data), total=len(data)))
153
+ return result
154
+
155
+
156
+ def del_special_char(sentence):
157
+ special_chars = ['\ufeff', '\xa0', '\u3000', '\xa0', '\ue627']
158
+ for i in special_chars:
159
+ sentence = sentence.replace(i, '')
160
+ return sentence
161
+
162
+
163
+ def en_pun_2_zh_pun(sentence):
164
+ # TODO 因为引号的问题,所以我没有写
165
+ for i in ENGLISH_PUNCTUATION:
166
+ pass
167
+
168
+
169
+ def spider(url):
170
+ """
171
+
172
+ :param url:
173
+ :return:
174
+ """
175
+ if 'baijiahao' in url:
176
+ content = requests.get(url)
177
+ # print(content.text)
178
+ html = pq.PyQuery(content.text)
179
+ title = html('.index-module_articleTitle_28fPT').text()
180
+ res = html('.index-module_articleWrap_2Zphx').text().rstrip('举报/反馈')
181
+ return '{}\n{}'.format(title, res)
182
+
183
+
184
+ def eda(sentence):
185
+ url = 'https://x.x.x.x:x/eda'
186
+ json_data = dict({"sentence": sentence})
187
+ res = requests.post(url, json=json_data)
188
+ return res.json()['eda']
189
+
190
+
191
+ def find_language(text):
192
+ # TODO 替换为开源包
193
+ letters = list(string.ascii_letters)
194
+ if len(text) > 50:
195
+ passage = text[:50]
196
+ len_passage = 50
197
+ else:
198
+ len_passage = len(text)
199
+ count = 0
200
+ for c in passage:
201
+ if c in letters:
202
+ count += 1
203
+ if count / len_passage > 0.5:
204
+ return "en"
205
+ else:
206
+ return "not en"
207
+
208
+
209
+ def print_prf(y_true, y_pred, label=None):
210
+ # y_true = [0, 1, 2, 1, 1, 2, 3, 1, 1, 1]
211
+ # y_pred = [0, 1, 2, 1, 1, 2, 3, 1, 1, 1]
212
+ # p, r, f, s = precision_recall_fscore_support(y_true=y_true, y_pred=y_pred)
213
+ # print("p\t{}".format(p))
214
+ # print("r\t{}".format(r))
215
+ # print("f\t{}".format(f))
216
+ # print("s\t{}".format(s))
217
+ result = precision_recall_fscore_support(y_true=y_true, y_pred=y_pred, labels=label)
218
+
219
+ for i in range(len(label)):
220
+ res = []
221
+ for k in result:
222
+ res.append('%.5f' % k[i])
223
+ print('{}: {} {} {}'.format(label[i], *res[:3]))
224
+
225
+
226
+ def print_cpu():
227
+ p = psutil.Process()
228
+ # pro_info = p.as_dict(attrs=['pid', 'name', 'username'])
229
+ print(psutil.cpu_count())
230
+
231
+
232
+ def stress_test(func, ipts):
233
+ with ThreadPoolExecutor() as executor:
234
+ results = list(tqdm(executor.map(func, ipts), total=len(ipts)))
235
+ return results
236
+
237
+
238
+ def get_substring_loc(text, subtext):
239
+ res = re.finditer(
240
+ subtext.replace('\\', '\\\\').replace('?', '\?').replace('(', '\(').replace(')', '\)').replace(']',
241
+ '\]').replace(
242
+ '[', '\[').replace('+', '\+'), text)
243
+ l, r = [i for i in res][0].regs[0]
244
+ return l, r
245
+
246
+
247
+ def squeeze_list(high_dim_list):
248
+ return list(itertools.chain.from_iterable(high_dim_list))
249
+
250
+
251
+ def unsqueeze_list(flatten_list, each_element_len):
252
+ two_dim_list = [flatten_list[i * each_element_len:(i + 1) * each_element_len] for i in
253
+ range(len(flatten_list) // each_element_len)]
254
+ return two_dim_list
255
+
256
+
257
+ def auto_close():
258
+ """
259
+ 针对企业微信15分钟会显示离开的机制,假装自己还在上班
260
+ """
261
+ import pyautogui as pg
262
+ import time
263
+ import os
264
+ cmd = 'schtasks /create /tn shut /tr "shutdown -s -f" /sc once /st 23:30'
265
+ os.system(cmd)
266
+ while 1:
267
+ pg.moveTo(970, 17, 2)
268
+ pg.click()
269
+ time.sleep(840)
270
+
271
+
272
+ def tf_idf(corpus, save_path):
273
+ tfidfdict = {}
274
+ vectorizer = CountVectorizer() # 该类会将文本中的词语转换为词频矩阵,矩阵元素a[i][j] 表示j词在i类文本下的词频
275
+ transformer = TfidfTransformer() # 该类会统计每个词语的tf-idf权值
276
+ tfidf = transformer.fit_transform(
277
+ vectorizer.fit_transform(corpus)) # 第一个fit_transform是计算tf-idf,第二个fit_transform是将文本转为词频矩阵
278
+ word = vectorizer.get_feature_names() # 获取词袋模型中的所有词语
279
+ weight = tfidf.toarray() # 将tf-idf矩阵抽取出来,元素a[i][j]表示j词在i类文本中的tf-idf权重
280
+ for i in range(len(weight)): # 打印每类文本的tf-idf词语权重,第一个for遍历所有文本,第二个for便利某一类文本下的词语权重
281
+ for j in range(len(word)):
282
+ getword = word[j]
283
+ getvalue = weight[i][j]
284
+ if getvalue != 0: # 去掉值为0的项
285
+ if getword in tfidfdict: # 更新全局TFIDF值
286
+ tfidfdict[getword] += float(getvalue)
287
+ else:
288
+ tfidfdict.update({getword: getvalue})
289
+ sorted_tfidf = sorted(tfidfdict.items(), key=lambda d: d[1], reverse=True)
290
+ to_write = ['{} {}'.format(i[0], i[1]) for i in sorted_tfidf]
291
+ writetxt_w_list(to_write, save_path, num_lf=1)
292
+
293
+
294
+ class GaussDecay(object):
295
+ """
296
+ 当前只实现了时间的,全部使用默认值
297
+ """
298
+
299
+ def __init__(self, origin='2022-08-02', scale='90d', offset='5d', decay=0.5, task="time"):
300
+ self.origin = origin
301
+ self.task = task
302
+ self.scale, self.offset = self.translate(scale, offset)
303
+ self.decay = decay
304
+ self.time_coefficient = 0.6
305
+ self.related_coefficient = 0.4
306
+
307
+ def translate(self, scale, offset):
308
+ """
309
+ 将领域的输入转化为标准
310
+ :return:
311
+ """
312
+ if self.task == "time":
313
+ scale = 180
314
+ offset = 5
315
+ else:
316
+ scale = 180
317
+ offset = 5
318
+ return scale, offset
319
+
320
+ @staticmethod
321
+ def translated_minus(field_value):
322
+ origin = datetime.datetime.now()
323
+ field_value = datetime.datetime.strptime(field_value, '%Y-%m-%d %H:%M:%S')
324
+ return (origin - field_value).days
325
+
326
+ def calc_exp(self):
327
+ pass
328
+
329
+ def calc_liner(self):
330
+ pass
331
+
332
+ def calc_gauss(self, raw_score, field_value):
333
+ """
334
+ $$S(doc)=exp(-\frac{max(0,|fieldvalues_{doc}-origin|-offset)^2}{2σ^2})$$ -
335
+ $$σ^2=-scale^2/(2·ln(decay))$$
336
+ :param raw_score:
337
+ :param field_value:
338
+ :return:
339
+ """
340
+ numerator = max(0, (abs(self.translated_minus(field_value)) - self.offset)) ** 2
341
+ sigma_square = -1 * self.scale ** 2 / (2 * math.log(self.decay, math.e))
342
+ denominator = 2 * sigma_square
343
+ s = math.exp(-1 * numerator / denominator)
344
+ return round(self.time_coefficient * s + self.related_coefficient * raw_score, 7)
345
+
346
+
347
+ if __name__ == '__main__':
348
+ gauss_decay = GaussDecay()
349
+ res = gauss_decay.calc_gauss(raw_score=1, field_value="2021-05-29 14:31:13")
350
+ print(res)
351
+ # res = gauss_decay.calc_gauss(raw_score=1, field_value="2022-05-29 14:31:13")
352
+ # print(res)
353
+ # res = gauss_decay.calc_gauss(raw_score=1, field_value="2022-05-29 14:31:13")
354
+ # print(res)
355
+ # res = gauss_decay.calc_gauss(raw_score=1, field_value="2022-05-29 14:31:13")
356
+ # print(res)
357
+
358
+ # 常用函数参考
359
+ # import tensorflow as tf
360
+ #
361
+ # gpu_options = tf.compat.v1.GPUOptions(allow_growth=True)
362
+ # sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(gpu_options=gpu_options))
363
+ # for gpu in tf.config.experimental.list_physical_devices('GPU'):
364
+ # tf.config.experimental.set_memory_growth()