nlpertools 1.0.5__py3-none-any.whl → 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- nlpertools/__init__.py +23 -20
- nlpertools/algo/ac.py +18 -0
- nlpertools/algo/bit_ops.py +28 -0
- nlpertools/algo/kmp.py +94 -55
- nlpertools/algo/num_ops.py +12 -0
- nlpertools/algo/template.py +116 -0
- nlpertools/algo/union.py +13 -0
- nlpertools/cli.py +87 -0
- nlpertools/data_client.py +426 -257
- nlpertools/data_structure/base_structure.py +109 -13
- nlpertools/dataprocess.py +627 -3
- nlpertools/default_db_config.yml +41 -0
- nlpertools/draw/__init__.py +0 -0
- nlpertools/draw/draw.py +83 -0
- nlpertools/draw/math_func.py +33 -0
- nlpertools/get_2fa.py +0 -0
- nlpertools/io/__init__.py +3 -3
- nlpertools/io/dir.py +86 -36
- nlpertools/io/file.py +283 -222
- nlpertools/ml.py +511 -460
- nlpertools/monitor/__init__.py +0 -0
- nlpertools/monitor/gpu.py +18 -0
- nlpertools/monitor/memory.py +24 -0
- nlpertools/movie.py +36 -0
- nlpertools/nlpertools_config.yml +1 -0
- nlpertools/{openApi.py → open_api.py} +65 -65
- nlpertools/other.py +475 -249
- nlpertools/pic.py +288 -0
- nlpertools/plugin.py +43 -43
- nlpertools/reminder.py +98 -87
- nlpertools/utils/__init__.py +3 -3
- nlpertools/utils/lazy.py +727 -0
- nlpertools/utils/log_util.py +20 -0
- nlpertools/utils/package.py +89 -76
- nlpertools/utils/package_v1.py +94 -0
- nlpertools/utils/package_v2.py +117 -0
- nlpertools/utils_for_nlpertools.py +93 -93
- nlpertools/vector_index_demo.py +108 -0
- nlpertools/wrapper.py +161 -96
- {nlpertools-1.0.5.dist-info → nlpertools-1.0.8.dist-info}/LICENSE +200 -200
- nlpertools-1.0.8.dist-info/METADATA +132 -0
- nlpertools-1.0.8.dist-info/RECORD +49 -0
- {nlpertools-1.0.5.dist-info → nlpertools-1.0.8.dist-info}/WHEEL +1 -1
- nlpertools-1.0.8.dist-info/entry_points.txt +2 -0
- nlpertools-1.0.8.dist-info/top_level.txt +2 -0
- nlpertools_helper/__init__.py +10 -0
- nlpertools-1.0.5.dist-info/METADATA +0 -85
- nlpertools-1.0.5.dist-info/RECORD +0 -25
- nlpertools-1.0.5.dist-info/top_level.txt +0 -1
nlpertools/data_client.py
CHANGED
@@ -1,257 +1,426 @@
|
|
1
|
-
#
|
2
|
-
#
|
3
|
-
#
|
4
|
-
|
5
|
-
import
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
from .
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
def
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
""
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
self.
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
return
|
178
|
-
|
179
|
-
def
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
return
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
1
|
+
#encoding=utf-8
|
2
|
+
# !/usr/bin/python3.8
|
3
|
+
# -*- coding: utf-8 -*-
|
4
|
+
# @Author : youshu.Ji
|
5
|
+
import datetime
|
6
|
+
import json
|
7
|
+
import logging
|
8
|
+
|
9
|
+
from .io.file import read_yaml
|
10
|
+
from .utils.package import *
|
11
|
+
import os
|
12
|
+
|
13
|
+
DB_CONFIG_FILE = os.path.join(os.path.dirname(__file__), "default_db_config.yml")
|
14
|
+
|
15
|
+
# import aioredis
|
16
|
+
# import happybase
|
17
|
+
# import pandas as pd
|
18
|
+
# import pymysql
|
19
|
+
# from elasticsearch import Elasticsearch, helpers
|
20
|
+
# from kafka import KafkaProducer, KafkaConsumer
|
21
|
+
# from pymongo import MongoClient
|
22
|
+
|
23
|
+
logger = logging.getLogger(__name__)
|
24
|
+
|
25
|
+
global_db_config = read_yaml(DB_CONFIG_FILE)
|
26
|
+
|
27
|
+
|
28
|
+
class Neo4jOps(object):
|
29
|
+
# neo4j 连接的超时秒数
|
30
|
+
# py2neo 内部会重试 3 次...
|
31
|
+
NEO4J_TIMEOUT = 0.3
|
32
|
+
pass
|
33
|
+
|
34
|
+
|
35
|
+
class SqliteOps(object):
|
36
|
+
pass
|
37
|
+
# import sqlite3
|
38
|
+
# database_path = r'xx.db'
|
39
|
+
# conn = sqlite3.connect(database_path)
|
40
|
+
# c = conn.cursor()
|
41
|
+
# sql = "select name from sqlite_master where type='table' order by name"
|
42
|
+
# c.execute(sql)
|
43
|
+
# print(c.fetchall())
|
44
|
+
# sql = "select * from typecho_contents"
|
45
|
+
# c.execute(sql)
|
46
|
+
# res = c.fetchall()
|
47
|
+
# print(res[3])
|
48
|
+
#
|
49
|
+
# conn.commit()
|
50
|
+
# conn.close()
|
51
|
+
|
52
|
+
|
53
|
+
class MysqlOps(object):
|
54
|
+
import pandas as pd
|
55
|
+
def __init__(self, config=global_db_config["mysql"]):
|
56
|
+
self.db = pymysql.connect(host=config["host"],
|
57
|
+
port=config["port"],
|
58
|
+
user=config["user"],
|
59
|
+
password=config["password"],
|
60
|
+
database=config["database"])
|
61
|
+
|
62
|
+
def query(self, sql):
|
63
|
+
df = pd.read_sql(sql, self.db)
|
64
|
+
return df
|
65
|
+
|
66
|
+
|
67
|
+
class EsOps(object):
|
68
|
+
from elasticsearch import Elasticsearch, helpers
|
69
|
+
def __init__(self, config=global_db_config["es"]):
|
70
|
+
self.es = Elasticsearch(
|
71
|
+
host=config["host"], timeout=config["timeout"])
|
72
|
+
|
73
|
+
def search_roll(self, index, body):
|
74
|
+
all_data = []
|
75
|
+
data = self.es.search(index=index, body=body, scroll="5m")
|
76
|
+
all_data.extend(data["hits"]["hits"])
|
77
|
+
scroll_id = data["_scroll_id"]
|
78
|
+
while data["hits"]["hits"]:
|
79
|
+
print(scroll_id[:5])
|
80
|
+
data = self.es.scroll(scroll_id=scroll_id, scroll="5m")
|
81
|
+
scroll_id = data["_scroll_id"]
|
82
|
+
all_data.extend(data["hits"]["hits"])
|
83
|
+
all_data = [i["_source"] for i in all_data]
|
84
|
+
return all_data
|
85
|
+
|
86
|
+
def search_roll_iter(self, index, body):
|
87
|
+
data = self.es.search(index=index, body=body, scroll="5m")
|
88
|
+
scroll_id = data["_scroll_id"]
|
89
|
+
while data["hits"]["hits"]:
|
90
|
+
yield data["hits"]["hits"]
|
91
|
+
data = self.es.scroll(scroll_id=scroll_id, scroll="5m")
|
92
|
+
scroll_id = data["_scroll_id"]
|
93
|
+
|
94
|
+
def search(self, index, body):
|
95
|
+
return self.es.search(index=index, body=body)
|
96
|
+
|
97
|
+
def delete(self, index, body):
|
98
|
+
self.es.delete_by_query(index=index, body=body)
|
99
|
+
|
100
|
+
def save(self, data):
|
101
|
+
# data里有index
|
102
|
+
helpers.bulk(self.es, data)
|
103
|
+
|
104
|
+
def delete_data_by_query(self, index, _project_id, _source_ids):
|
105
|
+
_query = {
|
106
|
+
"query": {
|
107
|
+
"bool": {
|
108
|
+
"must": [
|
109
|
+
{"terms": {"source_id": _source_ids}},
|
110
|
+
{"term": {"project_id": _project_id}},
|
111
|
+
]
|
112
|
+
}
|
113
|
+
}
|
114
|
+
}
|
115
|
+
_res = self.es.delete_by_query(index=index, body=_query)
|
116
|
+
print(f"delete_data_by_query: {_res}")
|
117
|
+
|
118
|
+
def batch_re_save(self, index, _data, _project_id, _source_ids):
|
119
|
+
self.delete_data_by_query(_project_id, _source_ids)
|
120
|
+
_action = [{"_index": index, "_source": i} for i in _data]
|
121
|
+
_res = helpers.bulk(self.es, _action)
|
122
|
+
print(f"批量保存数据: {_res}")
|
123
|
+
|
124
|
+
|
125
|
+
class MongoDB_BETA:
|
126
|
+
def __init__(self, host='localhost', port=27017, db_name=None, collection_name=None):
|
127
|
+
self.host = host
|
128
|
+
self.port = port
|
129
|
+
self.db_name = db_name
|
130
|
+
self.collection_name = collection_name
|
131
|
+
self.client = None
|
132
|
+
self.db = None
|
133
|
+
self.collection = None
|
134
|
+
|
135
|
+
def connect(self):
|
136
|
+
self.client = MongoClient(self.host, self.port)
|
137
|
+
self.db = self.client[self.db_name]
|
138
|
+
self.collection = self.db[self.collection_name]
|
139
|
+
|
140
|
+
def close(self):
|
141
|
+
if self.client:
|
142
|
+
self.client.close()
|
143
|
+
|
144
|
+
def insert_data(self, data):
|
145
|
+
if isinstance(data, list):
|
146
|
+
self.collection.insert_many(data)
|
147
|
+
else:
|
148
|
+
self.collection.insert_one(data)
|
149
|
+
|
150
|
+
def check_data_exists(self, query):
|
151
|
+
"""
|
152
|
+
检查某个数据是否存在于数据库中
|
153
|
+
:param query: 查询条件
|
154
|
+
:return: 布尔值,表示数据是否存在
|
155
|
+
"""
|
156
|
+
return self.collection.count_documents(query) > 0
|
157
|
+
|
158
|
+
|
159
|
+
|
160
|
+
class MongoOps(object):
|
161
|
+
from pymongo import MongoClient
|
162
|
+
def __init__(self, config=global_db_config["mongo"]):
|
163
|
+
mongo_client = MongoClient(config["uri"])
|
164
|
+
db = mongo_client[config["db"]]
|
165
|
+
self.collection = db[config["col"]]
|
166
|
+
|
167
|
+
def fetch_all(self):
|
168
|
+
"""
|
169
|
+
读取所有数据
|
170
|
+
:return:
|
171
|
+
"""
|
172
|
+
ans = []
|
173
|
+
print('提取所有数据.')
|
174
|
+
for record in self.collection.find({}):
|
175
|
+
record['_id'] = str(record['_id'])
|
176
|
+
ans.append(record)
|
177
|
+
return ans
|
178
|
+
|
179
|
+
def load_from_mongo(self, special_value):
|
180
|
+
"""
|
181
|
+
读取mongodb该special_value下所有值为special_value的数据
|
182
|
+
:param
|
183
|
+
:return:
|
184
|
+
"""
|
185
|
+
record = self.collection.find({"{}".format(special_value): special_value})
|
186
|
+
record = list(record)
|
187
|
+
if not record:
|
188
|
+
return None
|
189
|
+
else:
|
190
|
+
record = sorted(record, key=lambda x: len(x.get("another_value", [])))[0]
|
191
|
+
return record
|
192
|
+
|
193
|
+
def delete_all(self):
|
194
|
+
query = {}
|
195
|
+
deleted = self.collection.delete_many(query)
|
196
|
+
return deleted
|
197
|
+
|
198
|
+
def delete_by_time(self, time):
|
199
|
+
query = {"name": {"$regex": "^F"}}
|
200
|
+
deleted = self.collection.delete_many(query)
|
201
|
+
|
202
|
+
def fetch_by_time(self, year=2022, month=7, day=7, hour=7, minute=7, second=7):
|
203
|
+
query = {"query_time": {"$gte": datetime.datetime(year, month, day, hour, minute, second)}}
|
204
|
+
sort_sql = [("query_time", -1)]
|
205
|
+
ans = []
|
206
|
+
print('提取所有数据.')
|
207
|
+
for record in self.collection.find(query).sort(sort_sql):
|
208
|
+
record['_id'] = str(record['_id'])
|
209
|
+
ans.append(record)
|
210
|
+
return ans
|
211
|
+
|
212
|
+
def save_to_mongo(self, special_value, each_item):
|
213
|
+
"""
|
214
|
+
数据存入mongo
|
215
|
+
:param special_value:
|
216
|
+
:param each_item:
|
217
|
+
:return:
|
218
|
+
"""
|
219
|
+
query = self.collection.find({"{}".format(special_value): special_value})
|
220
|
+
if list(query):
|
221
|
+
self.collection.update_one({"{}".format(special_value): special_value},
|
222
|
+
{"$push": {'each_item': each_item}})
|
223
|
+
else:
|
224
|
+
insert_item = {
|
225
|
+
"special_value": special_value,
|
226
|
+
"each_item": [each_item]
|
227
|
+
}
|
228
|
+
self.collection.insert_one(insert_item)
|
229
|
+
print("update success")
|
230
|
+
|
231
|
+
def insert_one(self, data):
|
232
|
+
self.collection.insert_one(data)
|
233
|
+
|
234
|
+
def update_to_mongo(self, condition_term, condition_value, new_value):
|
235
|
+
"""
|
236
|
+
根据提供的字段和值,查询出对应的数据,更新数据存入mongo
|
237
|
+
类似 updata
|
238
|
+
:param condition_term: 条件字段term
|
239
|
+
:param condition_value: 条件字段值
|
240
|
+
:param new_value: 新的值。最好是dict,不是dict的话不知道行不行
|
241
|
+
:return:
|
242
|
+
"""
|
243
|
+
query = self.collection.find({condition_term: condition_value})
|
244
|
+
if list(query):
|
245
|
+
self.collection.update_one({condition_term: condition_value},
|
246
|
+
{"$push": new_value})
|
247
|
+
else:
|
248
|
+
insert_item = {
|
249
|
+
condition_term: condition_value,
|
250
|
+
"processed_data": new_value
|
251
|
+
}
|
252
|
+
self.collection.insert_one(insert_item)
|
253
|
+
print("update success")
|
254
|
+
|
255
|
+
|
256
|
+
class RedisOps(object):
|
257
|
+
def __init__(self, config=global_db_config["redis"]):
|
258
|
+
redis_max_connections = 1024
|
259
|
+
REDIS_GET_TIMEOUT = 0.1
|
260
|
+
self.redis = aioredis.from_url(config["uri"], max_connections=redis_max_connections)
|
261
|
+
|
262
|
+
|
263
|
+
class HBaseOps(object):
|
264
|
+
import happybase
|
265
|
+
"""
|
266
|
+
demo
|
267
|
+
key = 'test'
|
268
|
+
db = HBaseHelper(host=hbase_host)
|
269
|
+
data = db.query_single_line(table='table', row_key=key)
|
270
|
+
print(data)
|
271
|
+
"""
|
272
|
+
|
273
|
+
def __init__(self, config=global_db_config["hbase"]):
|
274
|
+
self.host = config["DEFAULT_HOST"]
|
275
|
+
self.port = config["DEFAULT_PORT"]
|
276
|
+
self.compat = config["DEFAULT_COMPAT"]
|
277
|
+
self.table_prefix = None # namespace
|
278
|
+
self.transport = config["DEFAULT_TRANSPORT"]
|
279
|
+
self.protocol = config["DEFAULT_PROTOCOL"]
|
280
|
+
self.conn = self.connect()
|
281
|
+
|
282
|
+
def connect(self):
|
283
|
+
conn = happybase.Connection(host=self.host, port=self.port, timeout=None, autoconnect=True,
|
284
|
+
table_prefix=self.table_prefix, compat=self.compat,
|
285
|
+
transport=self.transport, protocol=self.protocol)
|
286
|
+
return conn
|
287
|
+
|
288
|
+
def create_hb_table(self, table_name, **families):
|
289
|
+
self.conn.create_table(table_name, families)
|
290
|
+
|
291
|
+
def single_put(self, table_name, row_key, column, data):
|
292
|
+
hb = happybase.Table(table_name, self.conn)
|
293
|
+
hb.put(row_key,
|
294
|
+
data={'{column}:{k}'.format(column=column, k=k): str(v).encode("utf-8") for k, v in data.items()})
|
295
|
+
|
296
|
+
def batch_put(self, table, row_key_name, column, datas, batch_size=1):
|
297
|
+
hb = happybase.Table(table, self.conn)
|
298
|
+
datas_new = [datas[i:i + batch_size] for i in range(0, len(datas), batch_size)]
|
299
|
+
for x in datas_new:
|
300
|
+
with hb.batch(batch_size=batch_size) as batch:
|
301
|
+
for da in x:
|
302
|
+
da_nw = {'{column}:{k}'.format(column=column, k=k): v for k, v in da.items()}
|
303
|
+
row_key = da_nw.pop('{column}:{k}'.format(column=column, k=row_key_name))
|
304
|
+
batch.put(row_key, da_nw)
|
305
|
+
return batch
|
306
|
+
|
307
|
+
def single_put_self(self, table_name, row_keys, datas):
|
308
|
+
hb = happybase.Table(table_name, self.conn)
|
309
|
+
for row_key, (_, val) in zip(row_keys, datas.items()):
|
310
|
+
hb.put(row_key, {'maybe_table_name:maybe_column_name': "%s" % val[0],
|
311
|
+
'maybe_table_name:maybe_column_name2': "%s" % val[1]})
|
312
|
+
|
313
|
+
def scan_table(self, table, row_start=None, row_stop=None, include_timestamp=False, limit=None, timestamps=None,
|
314
|
+
filter=None):
|
315
|
+
hb = happybase.Table(table, self.conn)
|
316
|
+
scan = hb.scan(row_start=row_start, row_stop=row_stop, limit=limit, timestamp=timestamps, filter=filter)
|
317
|
+
hb_dict = dict(scan)
|
318
|
+
if hb_dict:
|
319
|
+
return {str(k1).decode('utf-8'): {str(k2).decode('utf-8'): str(v2).decode('utf-8') for k2, v2 in v1.items()}
|
320
|
+
for k1, v1 in
|
321
|
+
hb_dict.items()}
|
322
|
+
else:
|
323
|
+
return {}
|
324
|
+
|
325
|
+
def query_single_line(self, table, row_key):
|
326
|
+
conn = self.connect()
|
327
|
+
hb = happybase.Table(table, conn)
|
328
|
+
hb_dict = hb.row(row_key)
|
329
|
+
if hb_dict:
|
330
|
+
return {k.decode('utf-8'): v.decode('utf-8') for k, v in hb_dict.items()}
|
331
|
+
else:
|
332
|
+
return {}
|
333
|
+
|
334
|
+
def query_multi_lines(self, table, row_keys):
|
335
|
+
hb = happybase.Table(table, self.conn)
|
336
|
+
hb_dict = dict(hb.rows(row_keys))
|
337
|
+
if hb_dict:
|
338
|
+
return {k1.decode('utf-8'): {k2.decode('utf-8'): v2.decode('utf-8') for k2, v2 in v1.items()} for k1, v1 in
|
339
|
+
hb_dict.items()}
|
340
|
+
else:
|
341
|
+
return {}
|
342
|
+
|
343
|
+
def single_delete(self, table, row_key):
|
344
|
+
hb = happybase.Table(table, self.conn)
|
345
|
+
hb.delete(row_key)
|
346
|
+
|
347
|
+
def test_scan(self, table):
|
348
|
+
hb = happybase.Table(table, self.conn)
|
349
|
+
filter = "SingleColumnValueFilter ('maybe_column_name', 'lang', =, 'regexstring:[regex_string]')"
|
350
|
+
scan = hb.scan(limit=1000, filter=filter)
|
351
|
+
|
352
|
+
hb_dict = dict(scan)
|
353
|
+
if hb_dict:
|
354
|
+
return {str(k1).decode('utf-8'): {str(k2).decode('utf-8'): str(v2).decode('utf-8') for k2, v2 in v1.items()}
|
355
|
+
for k1, v1 in
|
356
|
+
hb_dict.items()}
|
357
|
+
else:
|
358
|
+
return {}
|
359
|
+
|
360
|
+
def close(self):
|
361
|
+
self.conn.close()
|
362
|
+
|
363
|
+
|
364
|
+
class KafkaConfig():
|
365
|
+
pass
|
366
|
+
|
367
|
+
|
368
|
+
class KafkaOps(object):
|
369
|
+
def __init__(self, config=global_db_config["kafka"]):
|
370
|
+
self.bootstrap_server = config["bootstrap_server"]
|
371
|
+
self.topic = config["topic"]
|
372
|
+
# 超时时间设置默认30s, 修改为60s
|
373
|
+
self.producer = KafkaProducer(value_serializer=lambda v: json.dumps(v).encode('utf-8'),
|
374
|
+
bootstrap_servers=self.bootstrap_server,
|
375
|
+
acks='all',
|
376
|
+
request_timeout_ms=60000)
|
377
|
+
|
378
|
+
def send_data_to_kafka(self, data):
|
379
|
+
try:
|
380
|
+
self.producer.send(self.topic, data)
|
381
|
+
logger.info(f"data send successful! ---- {data}")
|
382
|
+
except Exception as e:
|
383
|
+
logger.exception(f'kafka occur error ---- {e}')
|
384
|
+
|
385
|
+
def consumer_msg(self):
|
386
|
+
consumer = KafkaConsumer(self.topic, group_id='test-group_id', bootstrap_servers=self.bootstrap_server)
|
387
|
+
for msg in consumer:
|
388
|
+
recv = "%s:%d:%d: key=%s value=%s" % (msg.topic, msg.partition, msg.offset, msg.key, msg.value)
|
389
|
+
print(recv)
|
390
|
+
|
391
|
+
|
392
|
+
class MilvusOps(object):
|
393
|
+
def __init__(self, config=global_db_config.milvus):
|
394
|
+
from pymilvus import connections, Collection
|
395
|
+
|
396
|
+
connections.connect("default", host=config.host, port=config.port)
|
397
|
+
self.collection = Collection(config.collection)
|
398
|
+
self.collection.load()
|
399
|
+
|
400
|
+
def get_similarity(self, embedding):
|
401
|
+
search_params = {
|
402
|
+
"metric_type": "L2",
|
403
|
+
"params": {"nprobe": 1},
|
404
|
+
}
|
405
|
+
# # %%
|
406
|
+
logger.debug(embedding)
|
407
|
+
result = self.collection.search(
|
408
|
+
[list(embedding)],
|
409
|
+
"vec",
|
410
|
+
search_params,
|
411
|
+
limit=3,
|
412
|
+
output_fields=["pk", "entity_name", "standard_entity_name"],
|
413
|
+
)
|
414
|
+
hits = result[0]
|
415
|
+
entities = []
|
416
|
+
for hit in hits:
|
417
|
+
entities.append(
|
418
|
+
{
|
419
|
+
"name": hit.entity.get("entity_name"),
|
420
|
+
"standard_name": hit.entity.get("standard_entity_name"),
|
421
|
+
}
|
422
|
+
)
|
423
|
+
return entities
|
424
|
+
|
425
|
+
# def insert(self, collection, entities):
|
426
|
+
# collection.insert(entities)
|