nlpertools 1.0.5__py3-none-any.whl → 1.0.8__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- nlpertools/__init__.py +23 -20
- nlpertools/algo/ac.py +18 -0
- nlpertools/algo/bit_ops.py +28 -0
- nlpertools/algo/kmp.py +94 -55
- nlpertools/algo/num_ops.py +12 -0
- nlpertools/algo/template.py +116 -0
- nlpertools/algo/union.py +13 -0
- nlpertools/cli.py +87 -0
- nlpertools/data_client.py +426 -257
- nlpertools/data_structure/base_structure.py +109 -13
- nlpertools/dataprocess.py +627 -3
- nlpertools/default_db_config.yml +41 -0
- nlpertools/draw/__init__.py +0 -0
- nlpertools/draw/draw.py +83 -0
- nlpertools/draw/math_func.py +33 -0
- nlpertools/get_2fa.py +0 -0
- nlpertools/io/__init__.py +3 -3
- nlpertools/io/dir.py +86 -36
- nlpertools/io/file.py +283 -222
- nlpertools/ml.py +511 -460
- nlpertools/monitor/__init__.py +0 -0
- nlpertools/monitor/gpu.py +18 -0
- nlpertools/monitor/memory.py +24 -0
- nlpertools/movie.py +36 -0
- nlpertools/nlpertools_config.yml +1 -0
- nlpertools/{openApi.py → open_api.py} +65 -65
- nlpertools/other.py +475 -249
- nlpertools/pic.py +288 -0
- nlpertools/plugin.py +43 -43
- nlpertools/reminder.py +98 -87
- nlpertools/utils/__init__.py +3 -3
- nlpertools/utils/lazy.py +727 -0
- nlpertools/utils/log_util.py +20 -0
- nlpertools/utils/package.py +89 -76
- nlpertools/utils/package_v1.py +94 -0
- nlpertools/utils/package_v2.py +117 -0
- nlpertools/utils_for_nlpertools.py +93 -93
- nlpertools/vector_index_demo.py +108 -0
- nlpertools/wrapper.py +161 -96
- {nlpertools-1.0.5.dist-info → nlpertools-1.0.8.dist-info}/LICENSE +200 -200
- nlpertools-1.0.8.dist-info/METADATA +132 -0
- nlpertools-1.0.8.dist-info/RECORD +49 -0
- {nlpertools-1.0.5.dist-info → nlpertools-1.0.8.dist-info}/WHEEL +1 -1
- nlpertools-1.0.8.dist-info/entry_points.txt +2 -0
- nlpertools-1.0.8.dist-info/top_level.txt +2 -0
- nlpertools_helper/__init__.py +10 -0
- nlpertools-1.0.5.dist-info/METADATA +0 -85
- nlpertools-1.0.5.dist-info/RECORD +0 -25
- nlpertools-1.0.5.dist-info/top_level.txt +0 -1
nlpertools/data_client.py
CHANGED
@@ -1,257 +1,426 @@
|
|
1
|
-
#
|
2
|
-
#
|
3
|
-
#
|
4
|
-
|
5
|
-
import
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
from .
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
def
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
""
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
self.
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
return
|
178
|
-
|
179
|
-
def
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
return
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
1
|
+
#encoding=utf-8
|
2
|
+
# !/usr/bin/python3.8
|
3
|
+
# -*- coding: utf-8 -*-
|
4
|
+
# @Author : youshu.Ji
|
5
|
+
import datetime
|
6
|
+
import json
|
7
|
+
import logging
|
8
|
+
|
9
|
+
from .io.file import read_yaml
|
10
|
+
from .utils.package import *
|
11
|
+
import os
|
12
|
+
|
13
|
+
DB_CONFIG_FILE = os.path.join(os.path.dirname(__file__), "default_db_config.yml")
|
14
|
+
|
15
|
+
# import aioredis
|
16
|
+
# import happybase
|
17
|
+
# import pandas as pd
|
18
|
+
# import pymysql
|
19
|
+
# from elasticsearch import Elasticsearch, helpers
|
20
|
+
# from kafka import KafkaProducer, KafkaConsumer
|
21
|
+
# from pymongo import MongoClient
|
22
|
+
|
23
|
+
logger = logging.getLogger(__name__)
|
24
|
+
|
25
|
+
global_db_config = read_yaml(DB_CONFIG_FILE)
|
26
|
+
|
27
|
+
|
28
|
+
class Neo4jOps(object):
|
29
|
+
# neo4j 连接的超时秒数
|
30
|
+
# py2neo 内部会重试 3 次...
|
31
|
+
NEO4J_TIMEOUT = 0.3
|
32
|
+
pass
|
33
|
+
|
34
|
+
|
35
|
+
class SqliteOps(object):
|
36
|
+
pass
|
37
|
+
# import sqlite3
|
38
|
+
# database_path = r'xx.db'
|
39
|
+
# conn = sqlite3.connect(database_path)
|
40
|
+
# c = conn.cursor()
|
41
|
+
# sql = "select name from sqlite_master where type='table' order by name"
|
42
|
+
# c.execute(sql)
|
43
|
+
# print(c.fetchall())
|
44
|
+
# sql = "select * from typecho_contents"
|
45
|
+
# c.execute(sql)
|
46
|
+
# res = c.fetchall()
|
47
|
+
# print(res[3])
|
48
|
+
#
|
49
|
+
# conn.commit()
|
50
|
+
# conn.close()
|
51
|
+
|
52
|
+
|
53
|
+
class MysqlOps(object):
|
54
|
+
import pandas as pd
|
55
|
+
def __init__(self, config=global_db_config["mysql"]):
|
56
|
+
self.db = pymysql.connect(host=config["host"],
|
57
|
+
port=config["port"],
|
58
|
+
user=config["user"],
|
59
|
+
password=config["password"],
|
60
|
+
database=config["database"])
|
61
|
+
|
62
|
+
def query(self, sql):
|
63
|
+
df = pd.read_sql(sql, self.db)
|
64
|
+
return df
|
65
|
+
|
66
|
+
|
67
|
+
class EsOps(object):
|
68
|
+
from elasticsearch import Elasticsearch, helpers
|
69
|
+
def __init__(self, config=global_db_config["es"]):
|
70
|
+
self.es = Elasticsearch(
|
71
|
+
host=config["host"], timeout=config["timeout"])
|
72
|
+
|
73
|
+
def search_roll(self, index, body):
|
74
|
+
all_data = []
|
75
|
+
data = self.es.search(index=index, body=body, scroll="5m")
|
76
|
+
all_data.extend(data["hits"]["hits"])
|
77
|
+
scroll_id = data["_scroll_id"]
|
78
|
+
while data["hits"]["hits"]:
|
79
|
+
print(scroll_id[:5])
|
80
|
+
data = self.es.scroll(scroll_id=scroll_id, scroll="5m")
|
81
|
+
scroll_id = data["_scroll_id"]
|
82
|
+
all_data.extend(data["hits"]["hits"])
|
83
|
+
all_data = [i["_source"] for i in all_data]
|
84
|
+
return all_data
|
85
|
+
|
86
|
+
def search_roll_iter(self, index, body):
|
87
|
+
data = self.es.search(index=index, body=body, scroll="5m")
|
88
|
+
scroll_id = data["_scroll_id"]
|
89
|
+
while data["hits"]["hits"]:
|
90
|
+
yield data["hits"]["hits"]
|
91
|
+
data = self.es.scroll(scroll_id=scroll_id, scroll="5m")
|
92
|
+
scroll_id = data["_scroll_id"]
|
93
|
+
|
94
|
+
def search(self, index, body):
|
95
|
+
return self.es.search(index=index, body=body)
|
96
|
+
|
97
|
+
def delete(self, index, body):
|
98
|
+
self.es.delete_by_query(index=index, body=body)
|
99
|
+
|
100
|
+
def save(self, data):
|
101
|
+
# data里有index
|
102
|
+
helpers.bulk(self.es, data)
|
103
|
+
|
104
|
+
def delete_data_by_query(self, index, _project_id, _source_ids):
|
105
|
+
_query = {
|
106
|
+
"query": {
|
107
|
+
"bool": {
|
108
|
+
"must": [
|
109
|
+
{"terms": {"source_id": _source_ids}},
|
110
|
+
{"term": {"project_id": _project_id}},
|
111
|
+
]
|
112
|
+
}
|
113
|
+
}
|
114
|
+
}
|
115
|
+
_res = self.es.delete_by_query(index=index, body=_query)
|
116
|
+
print(f"delete_data_by_query: {_res}")
|
117
|
+
|
118
|
+
def batch_re_save(self, index, _data, _project_id, _source_ids):
|
119
|
+
self.delete_data_by_query(_project_id, _source_ids)
|
120
|
+
_action = [{"_index": index, "_source": i} for i in _data]
|
121
|
+
_res = helpers.bulk(self.es, _action)
|
122
|
+
print(f"批量保存数据: {_res}")
|
123
|
+
|
124
|
+
|
125
|
+
class MongoDB_BETA:
|
126
|
+
def __init__(self, host='localhost', port=27017, db_name=None, collection_name=None):
|
127
|
+
self.host = host
|
128
|
+
self.port = port
|
129
|
+
self.db_name = db_name
|
130
|
+
self.collection_name = collection_name
|
131
|
+
self.client = None
|
132
|
+
self.db = None
|
133
|
+
self.collection = None
|
134
|
+
|
135
|
+
def connect(self):
|
136
|
+
self.client = MongoClient(self.host, self.port)
|
137
|
+
self.db = self.client[self.db_name]
|
138
|
+
self.collection = self.db[self.collection_name]
|
139
|
+
|
140
|
+
def close(self):
|
141
|
+
if self.client:
|
142
|
+
self.client.close()
|
143
|
+
|
144
|
+
def insert_data(self, data):
|
145
|
+
if isinstance(data, list):
|
146
|
+
self.collection.insert_many(data)
|
147
|
+
else:
|
148
|
+
self.collection.insert_one(data)
|
149
|
+
|
150
|
+
def check_data_exists(self, query):
|
151
|
+
"""
|
152
|
+
检查某个数据是否存在于数据库中
|
153
|
+
:param query: 查询条件
|
154
|
+
:return: 布尔值,表示数据是否存在
|
155
|
+
"""
|
156
|
+
return self.collection.count_documents(query) > 0
|
157
|
+
|
158
|
+
|
159
|
+
|
160
|
+
class MongoOps(object):
|
161
|
+
from pymongo import MongoClient
|
162
|
+
def __init__(self, config=global_db_config["mongo"]):
|
163
|
+
mongo_client = MongoClient(config["uri"])
|
164
|
+
db = mongo_client[config["db"]]
|
165
|
+
self.collection = db[config["col"]]
|
166
|
+
|
167
|
+
def fetch_all(self):
|
168
|
+
"""
|
169
|
+
读取所有数据
|
170
|
+
:return:
|
171
|
+
"""
|
172
|
+
ans = []
|
173
|
+
print('提取所有数据.')
|
174
|
+
for record in self.collection.find({}):
|
175
|
+
record['_id'] = str(record['_id'])
|
176
|
+
ans.append(record)
|
177
|
+
return ans
|
178
|
+
|
179
|
+
def load_from_mongo(self, special_value):
|
180
|
+
"""
|
181
|
+
读取mongodb该special_value下所有值为special_value的数据
|
182
|
+
:param
|
183
|
+
:return:
|
184
|
+
"""
|
185
|
+
record = self.collection.find({"{}".format(special_value): special_value})
|
186
|
+
record = list(record)
|
187
|
+
if not record:
|
188
|
+
return None
|
189
|
+
else:
|
190
|
+
record = sorted(record, key=lambda x: len(x.get("another_value", [])))[0]
|
191
|
+
return record
|
192
|
+
|
193
|
+
def delete_all(self):
|
194
|
+
query = {}
|
195
|
+
deleted = self.collection.delete_many(query)
|
196
|
+
return deleted
|
197
|
+
|
198
|
+
def delete_by_time(self, time):
|
199
|
+
query = {"name": {"$regex": "^F"}}
|
200
|
+
deleted = self.collection.delete_many(query)
|
201
|
+
|
202
|
+
def fetch_by_time(self, year=2022, month=7, day=7, hour=7, minute=7, second=7):
|
203
|
+
query = {"query_time": {"$gte": datetime.datetime(year, month, day, hour, minute, second)}}
|
204
|
+
sort_sql = [("query_time", -1)]
|
205
|
+
ans = []
|
206
|
+
print('提取所有数据.')
|
207
|
+
for record in self.collection.find(query).sort(sort_sql):
|
208
|
+
record['_id'] = str(record['_id'])
|
209
|
+
ans.append(record)
|
210
|
+
return ans
|
211
|
+
|
212
|
+
def save_to_mongo(self, special_value, each_item):
|
213
|
+
"""
|
214
|
+
数据存入mongo
|
215
|
+
:param special_value:
|
216
|
+
:param each_item:
|
217
|
+
:return:
|
218
|
+
"""
|
219
|
+
query = self.collection.find({"{}".format(special_value): special_value})
|
220
|
+
if list(query):
|
221
|
+
self.collection.update_one({"{}".format(special_value): special_value},
|
222
|
+
{"$push": {'each_item': each_item}})
|
223
|
+
else:
|
224
|
+
insert_item = {
|
225
|
+
"special_value": special_value,
|
226
|
+
"each_item": [each_item]
|
227
|
+
}
|
228
|
+
self.collection.insert_one(insert_item)
|
229
|
+
print("update success")
|
230
|
+
|
231
|
+
def insert_one(self, data):
|
232
|
+
self.collection.insert_one(data)
|
233
|
+
|
234
|
+
def update_to_mongo(self, condition_term, condition_value, new_value):
|
235
|
+
"""
|
236
|
+
根据提供的字段和值,查询出对应的数据,更新数据存入mongo
|
237
|
+
类似 updata
|
238
|
+
:param condition_term: 条件字段term
|
239
|
+
:param condition_value: 条件字段值
|
240
|
+
:param new_value: 新的值。最好是dict,不是dict的话不知道行不行
|
241
|
+
:return:
|
242
|
+
"""
|
243
|
+
query = self.collection.find({condition_term: condition_value})
|
244
|
+
if list(query):
|
245
|
+
self.collection.update_one({condition_term: condition_value},
|
246
|
+
{"$push": new_value})
|
247
|
+
else:
|
248
|
+
insert_item = {
|
249
|
+
condition_term: condition_value,
|
250
|
+
"processed_data": new_value
|
251
|
+
}
|
252
|
+
self.collection.insert_one(insert_item)
|
253
|
+
print("update success")
|
254
|
+
|
255
|
+
|
256
|
+
class RedisOps(object):
|
257
|
+
def __init__(self, config=global_db_config["redis"]):
|
258
|
+
redis_max_connections = 1024
|
259
|
+
REDIS_GET_TIMEOUT = 0.1
|
260
|
+
self.redis = aioredis.from_url(config["uri"], max_connections=redis_max_connections)
|
261
|
+
|
262
|
+
|
263
|
+
class HBaseOps(object):
|
264
|
+
import happybase
|
265
|
+
"""
|
266
|
+
demo
|
267
|
+
key = 'test'
|
268
|
+
db = HBaseHelper(host=hbase_host)
|
269
|
+
data = db.query_single_line(table='table', row_key=key)
|
270
|
+
print(data)
|
271
|
+
"""
|
272
|
+
|
273
|
+
def __init__(self, config=global_db_config["hbase"]):
|
274
|
+
self.host = config["DEFAULT_HOST"]
|
275
|
+
self.port = config["DEFAULT_PORT"]
|
276
|
+
self.compat = config["DEFAULT_COMPAT"]
|
277
|
+
self.table_prefix = None # namespace
|
278
|
+
self.transport = config["DEFAULT_TRANSPORT"]
|
279
|
+
self.protocol = config["DEFAULT_PROTOCOL"]
|
280
|
+
self.conn = self.connect()
|
281
|
+
|
282
|
+
def connect(self):
|
283
|
+
conn = happybase.Connection(host=self.host, port=self.port, timeout=None, autoconnect=True,
|
284
|
+
table_prefix=self.table_prefix, compat=self.compat,
|
285
|
+
transport=self.transport, protocol=self.protocol)
|
286
|
+
return conn
|
287
|
+
|
288
|
+
def create_hb_table(self, table_name, **families):
|
289
|
+
self.conn.create_table(table_name, families)
|
290
|
+
|
291
|
+
def single_put(self, table_name, row_key, column, data):
|
292
|
+
hb = happybase.Table(table_name, self.conn)
|
293
|
+
hb.put(row_key,
|
294
|
+
data={'{column}:{k}'.format(column=column, k=k): str(v).encode("utf-8") for k, v in data.items()})
|
295
|
+
|
296
|
+
def batch_put(self, table, row_key_name, column, datas, batch_size=1):
|
297
|
+
hb = happybase.Table(table, self.conn)
|
298
|
+
datas_new = [datas[i:i + batch_size] for i in range(0, len(datas), batch_size)]
|
299
|
+
for x in datas_new:
|
300
|
+
with hb.batch(batch_size=batch_size) as batch:
|
301
|
+
for da in x:
|
302
|
+
da_nw = {'{column}:{k}'.format(column=column, k=k): v for k, v in da.items()}
|
303
|
+
row_key = da_nw.pop('{column}:{k}'.format(column=column, k=row_key_name))
|
304
|
+
batch.put(row_key, da_nw)
|
305
|
+
return batch
|
306
|
+
|
307
|
+
def single_put_self(self, table_name, row_keys, datas):
|
308
|
+
hb = happybase.Table(table_name, self.conn)
|
309
|
+
for row_key, (_, val) in zip(row_keys, datas.items()):
|
310
|
+
hb.put(row_key, {'maybe_table_name:maybe_column_name': "%s" % val[0],
|
311
|
+
'maybe_table_name:maybe_column_name2': "%s" % val[1]})
|
312
|
+
|
313
|
+
def scan_table(self, table, row_start=None, row_stop=None, include_timestamp=False, limit=None, timestamps=None,
|
314
|
+
filter=None):
|
315
|
+
hb = happybase.Table(table, self.conn)
|
316
|
+
scan = hb.scan(row_start=row_start, row_stop=row_stop, limit=limit, timestamp=timestamps, filter=filter)
|
317
|
+
hb_dict = dict(scan)
|
318
|
+
if hb_dict:
|
319
|
+
return {str(k1).decode('utf-8'): {str(k2).decode('utf-8'): str(v2).decode('utf-8') for k2, v2 in v1.items()}
|
320
|
+
for k1, v1 in
|
321
|
+
hb_dict.items()}
|
322
|
+
else:
|
323
|
+
return {}
|
324
|
+
|
325
|
+
def query_single_line(self, table, row_key):
|
326
|
+
conn = self.connect()
|
327
|
+
hb = happybase.Table(table, conn)
|
328
|
+
hb_dict = hb.row(row_key)
|
329
|
+
if hb_dict:
|
330
|
+
return {k.decode('utf-8'): v.decode('utf-8') for k, v in hb_dict.items()}
|
331
|
+
else:
|
332
|
+
return {}
|
333
|
+
|
334
|
+
def query_multi_lines(self, table, row_keys):
|
335
|
+
hb = happybase.Table(table, self.conn)
|
336
|
+
hb_dict = dict(hb.rows(row_keys))
|
337
|
+
if hb_dict:
|
338
|
+
return {k1.decode('utf-8'): {k2.decode('utf-8'): v2.decode('utf-8') for k2, v2 in v1.items()} for k1, v1 in
|
339
|
+
hb_dict.items()}
|
340
|
+
else:
|
341
|
+
return {}
|
342
|
+
|
343
|
+
def single_delete(self, table, row_key):
|
344
|
+
hb = happybase.Table(table, self.conn)
|
345
|
+
hb.delete(row_key)
|
346
|
+
|
347
|
+
def test_scan(self, table):
|
348
|
+
hb = happybase.Table(table, self.conn)
|
349
|
+
filter = "SingleColumnValueFilter ('maybe_column_name', 'lang', =, 'regexstring:[regex_string]')"
|
350
|
+
scan = hb.scan(limit=1000, filter=filter)
|
351
|
+
|
352
|
+
hb_dict = dict(scan)
|
353
|
+
if hb_dict:
|
354
|
+
return {str(k1).decode('utf-8'): {str(k2).decode('utf-8'): str(v2).decode('utf-8') for k2, v2 in v1.items()}
|
355
|
+
for k1, v1 in
|
356
|
+
hb_dict.items()}
|
357
|
+
else:
|
358
|
+
return {}
|
359
|
+
|
360
|
+
def close(self):
|
361
|
+
self.conn.close()
|
362
|
+
|
363
|
+
|
364
|
+
class KafkaConfig():
|
365
|
+
pass
|
366
|
+
|
367
|
+
|
368
|
+
class KafkaOps(object):
|
369
|
+
def __init__(self, config=global_db_config["kafka"]):
|
370
|
+
self.bootstrap_server = config["bootstrap_server"]
|
371
|
+
self.topic = config["topic"]
|
372
|
+
# 超时时间设置默认30s, 修改为60s
|
373
|
+
self.producer = KafkaProducer(value_serializer=lambda v: json.dumps(v).encode('utf-8'),
|
374
|
+
bootstrap_servers=self.bootstrap_server,
|
375
|
+
acks='all',
|
376
|
+
request_timeout_ms=60000)
|
377
|
+
|
378
|
+
def send_data_to_kafka(self, data):
|
379
|
+
try:
|
380
|
+
self.producer.send(self.topic, data)
|
381
|
+
logger.info(f"data send successful! ---- {data}")
|
382
|
+
except Exception as e:
|
383
|
+
logger.exception(f'kafka occur error ---- {e}')
|
384
|
+
|
385
|
+
def consumer_msg(self):
|
386
|
+
consumer = KafkaConsumer(self.topic, group_id='test-group_id', bootstrap_servers=self.bootstrap_server)
|
387
|
+
for msg in consumer:
|
388
|
+
recv = "%s:%d:%d: key=%s value=%s" % (msg.topic, msg.partition, msg.offset, msg.key, msg.value)
|
389
|
+
print(recv)
|
390
|
+
|
391
|
+
|
392
|
+
class MilvusOps(object):
|
393
|
+
def __init__(self, config=global_db_config.milvus):
|
394
|
+
from pymilvus import connections, Collection
|
395
|
+
|
396
|
+
connections.connect("default", host=config.host, port=config.port)
|
397
|
+
self.collection = Collection(config.collection)
|
398
|
+
self.collection.load()
|
399
|
+
|
400
|
+
def get_similarity(self, embedding):
|
401
|
+
search_params = {
|
402
|
+
"metric_type": "L2",
|
403
|
+
"params": {"nprobe": 1},
|
404
|
+
}
|
405
|
+
# # %%
|
406
|
+
logger.debug(embedding)
|
407
|
+
result = self.collection.search(
|
408
|
+
[list(embedding)],
|
409
|
+
"vec",
|
410
|
+
search_params,
|
411
|
+
limit=3,
|
412
|
+
output_fields=["pk", "entity_name", "standard_entity_name"],
|
413
|
+
)
|
414
|
+
hits = result[0]
|
415
|
+
entities = []
|
416
|
+
for hit in hits:
|
417
|
+
entities.append(
|
418
|
+
{
|
419
|
+
"name": hit.entity.get("entity_name"),
|
420
|
+
"standard_name": hit.entity.get("standard_entity_name"),
|
421
|
+
}
|
422
|
+
)
|
423
|
+
return entities
|
424
|
+
|
425
|
+
# def insert(self, collection, entities):
|
426
|
+
# collection.insert(entities)
|