nlpertools 1.0.4__py3-none-any.whl → 1.0.6.dev0__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. nlpertools/__init__.py +24 -11
  2. nlpertools/algo/__init__.py +0 -0
  3. nlpertools/algo/ac.py +18 -0
  4. nlpertools/algo/bit_ops.py +28 -0
  5. nlpertools/algo/kmp.py +94 -0
  6. nlpertools/algo/num_ops.py +12 -0
  7. nlpertools/algo/template.py +116 -0
  8. nlpertools/algo/union.py +13 -0
  9. nlpertools/data_client.py +387 -0
  10. nlpertools/data_structure/__init__.py +0 -0
  11. nlpertools/data_structure/base_structure.py +109 -0
  12. nlpertools/dataprocess.py +611 -3
  13. nlpertools/default_db_config.yml +41 -0
  14. nlpertools/io/__init__.py +3 -3
  15. nlpertools/io/dir.py +54 -47
  16. nlpertools/io/file.py +277 -205
  17. nlpertools/ml.py +483 -317
  18. nlpertools/monitor/__init__.py +0 -0
  19. nlpertools/monitor/gpu.py +18 -0
  20. nlpertools/monitor/memory.py +24 -0
  21. nlpertools/movie.py +36 -0
  22. nlpertools/nlpertools_config.yml +1 -0
  23. nlpertools/{openApi.py → open_api.py} +65 -62
  24. nlpertools/other.py +364 -188
  25. nlpertools/pic.py +288 -0
  26. nlpertools/plugin.py +43 -34
  27. nlpertools/reminder.py +98 -15
  28. nlpertools/template/__init__.py +0 -0
  29. nlpertools/utils/__init__.py +3 -0
  30. nlpertools/utils/lazy.py +727 -0
  31. nlpertools/utils/log_util.py +20 -0
  32. nlpertools/utils/package.py +89 -0
  33. nlpertools/utils/package_v1.py +94 -0
  34. nlpertools/utils/package_v2.py +117 -0
  35. nlpertools/utils_for_nlpertools.py +93 -0
  36. nlpertools/vector_index_demo.py +108 -0
  37. nlpertools/wrapper.py +161 -0
  38. {nlpertools-1.0.4.dist-info → nlpertools-1.0.6.dev0.dist-info}/LICENSE +200 -200
  39. nlpertools-1.0.6.dev0.dist-info/METADATA +111 -0
  40. nlpertools-1.0.6.dev0.dist-info/RECORD +43 -0
  41. {nlpertools-1.0.4.dist-info → nlpertools-1.0.6.dev0.dist-info}/WHEEL +1 -1
  42. nlpertools-1.0.6.dev0.dist-info/top_level.txt +2 -0
  43. nlpertools_helper/__init__.py +10 -0
  44. nlpertools-1.0.4.dist-info/METADATA +0 -42
  45. nlpertools-1.0.4.dist-info/RECORD +0 -15
  46. nlpertools-1.0.4.dist-info/top_level.txt +0 -1
@@ -0,0 +1,387 @@
1
+ # !/usr/bin/python3.8
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : youshu.Ji
4
+ import datetime
5
+ import json
6
+ import logging
7
+
8
+ from . import DB_CONFIG_FILE
9
+ from .io.file import read_yaml
10
+ from .utils.package import *
11
+
12
+ # import aioredis
13
+ # import happybase
14
+ # import pandas as pd
15
+ # import pymysql
16
+ # from elasticsearch import Elasticsearch, helpers
17
+ # from kafka import KafkaProducer, KafkaConsumer
18
+ # from pymongo import MongoClient
19
+
20
+ logger = logging.getLogger(__name__)
21
+
22
+ global_db_config = read_yaml(DB_CONFIG_FILE)
23
+
24
+
25
+ class Neo4jOps(object):
26
+ # neo4j 连接的超时秒数
27
+ # py2neo 内部会重试 3 次...
28
+ NEO4J_TIMEOUT = 0.3
29
+ pass
30
+
31
+ class SqliteOps(object):
32
+ import sqlite3
33
+ database_path = r'xx.db'
34
+ conn = sqlite3.connect(database_path)
35
+ c = conn.cursor()
36
+ sql = "select name from sqlite_master where type='table' order by name"
37
+ c.execute(sql)
38
+ print(c.fetchall())
39
+ sql = "select * from typecho_contents"
40
+ c.execute(sql)
41
+ res = c.fetchall()
42
+ print(res[3])
43
+
44
+ conn.commit()
45
+ conn.close()
46
+
47
+ class MysqlOps(object):
48
+ import pandas as pd
49
+ def __init__(self, config=global_db_config["mysql"]):
50
+ self.db = pymysql.connect(host=config["host"],
51
+ port=config["port"],
52
+ user=config["user"],
53
+ password=config["password"],
54
+ database=config["database"])
55
+
56
+ def query(self, sql):
57
+ df = pd.read_sql(sql, self.db)
58
+ return df
59
+
60
+
61
+ class EsOps(object):
62
+ from elasticsearch import Elasticsearch, helpers
63
+ def __init__(self, config=global_db_config["es"]):
64
+ self.es = Elasticsearch(
65
+ host=config["host"], timeout=config["timeout"])
66
+
67
+ def search_roll(self, index, body):
68
+ all_data = []
69
+ data = self.es.search(index=index, body=body, scroll="5m")
70
+ all_data.extend(data["hits"]["hits"])
71
+ scroll_id = data["_scroll_id"]
72
+ while data["hits"]["hits"]:
73
+ print(scroll_id[:5])
74
+ data = self.es.scroll(scroll_id=scroll_id, scroll="5m")
75
+ scroll_id = data["_scroll_id"]
76
+ all_data.extend(data["hits"]["hits"])
77
+ all_data = [i["_source"] for i in all_data]
78
+ return all_data
79
+
80
+ def search_roll_iter(self, index, body):
81
+ data = self.es.search(index=index, body=body, scroll="5m")
82
+ scroll_id = data["_scroll_id"]
83
+ while data["hits"]["hits"]:
84
+ yield data["hits"]["hits"]
85
+ data = self.es.scroll(scroll_id=scroll_id, scroll="5m")
86
+ scroll_id = data["_scroll_id"]
87
+
88
+ def search(self, index, body):
89
+ return self.es.search(index=index, body=body)
90
+
91
+ def delete(self, index, body):
92
+ self.es.delete_by_query(index=index, body=body)
93
+
94
+ def save(self, data):
95
+ # data里有index
96
+ helpers.bulk(self.es, data)
97
+
98
+ def delete_data_by_query(self, index, _project_id, _source_ids):
99
+ _query = {
100
+ "query": {
101
+ "bool": {
102
+ "must": [
103
+ {"terms": {"source_id": _source_ids}},
104
+ {"term": {"project_id": _project_id}},
105
+ ]
106
+ }
107
+ }
108
+ }
109
+ _res = self.es.delete_by_query(index=index, body=_query)
110
+ print(f"delete_data_by_query: {_res}")
111
+
112
+ def batch_re_save(self, index, _data, _project_id, _source_ids):
113
+ self.delete_data_by_query(_project_id, _source_ids)
114
+ _action = [{"_index": index, "_source": i} for i in _data]
115
+ _res = helpers.bulk(self.es, _action)
116
+ print(f"批量保存数据: {_res}")
117
+
118
+
119
+ class MongoOps(object):
120
+ from pymongo import MongoClient
121
+ def __init__(self, config=global_db_config["mongo"]):
122
+ mongo_client = MongoClient(config["uri"])
123
+ db = mongo_client[config["db"]]
124
+ self.collection = db[config["col"]]
125
+
126
+ def fetch_all(self):
127
+ """
128
+ 读取所有数据
129
+ :return:
130
+ """
131
+ ans = []
132
+ print('提取所有数据.')
133
+ for record in self.collection.find({}):
134
+ record['_id'] = str(record['_id'])
135
+ ans.append(record)
136
+ return ans
137
+
138
+ def load_from_mongo(self, special_value):
139
+ """
140
+ 读取mongodb该special_value下所有值为special_value的数据
141
+ :param
142
+ :return:
143
+ """
144
+ record = self.collection.find({"{}".format(special_value): special_value})
145
+ record = list(record)
146
+ if not record:
147
+ return None
148
+ else:
149
+ record = sorted(record, key=lambda x: len(x.get("another_value", [])))[0]
150
+ return record
151
+
152
+ def delete_all(self):
153
+ query = {}
154
+ deleted = self.collection.delete_many(query)
155
+ return deleted
156
+
157
+ def delete_by_time(self, time):
158
+ query = {"name": {"$regex": "^F"}}
159
+ deleted = self.collection.delete_many(query)
160
+
161
+ def fetch_by_time(self, year=2022, month=7, day=7, hour=7, minute=7, second=7):
162
+ query = {"query_time": {"$gte": datetime.datetime(year, month, day, hour, minute, second)}}
163
+ sort_sql = [("query_time", -1)]
164
+ ans = []
165
+ print('提取所有数据.')
166
+ for record in self.collection.find(query).sort(sort_sql):
167
+ record['_id'] = str(record['_id'])
168
+ ans.append(record)
169
+ return ans
170
+
171
+ def save_to_mongo(self, special_value, each_item):
172
+ """
173
+ 数据存入mongo
174
+ :param special_value:
175
+ :param each_item:
176
+ :return:
177
+ """
178
+ query = self.collection.find({"{}".format(special_value): special_value})
179
+ if list(query):
180
+ self.collection.update_one({"{}".format(special_value): special_value},
181
+ {"$push": {'each_item': each_item}})
182
+ else:
183
+ insert_item = {
184
+ "special_value": special_value,
185
+ "each_item": [each_item]
186
+ }
187
+ self.collection.insert_one(insert_item)
188
+ print("update success")
189
+
190
+ def insert_one(self, data):
191
+ self.collection.insert_one(data)
192
+
193
+ def update_to_mongo(self, condition_term, condition_value, new_value):
194
+ """
195
+ 根据提供的字段和值,查询出对应的数据,更新数据存入mongo
196
+ 类似 updata
197
+ :param condition_term: 条件字段term
198
+ :param condition_value: 条件字段值
199
+ :param new_value: 新的值。最好是dict,不是dict的话不知道行不行
200
+ :return:
201
+ """
202
+ query = self.collection.find({condition_term: condition_value})
203
+ if list(query):
204
+ self.collection.update_one({condition_term: condition_value},
205
+ {"$push": new_value})
206
+ else:
207
+ insert_item = {
208
+ condition_term: condition_value,
209
+ "processed_data": new_value
210
+ }
211
+ self.collection.insert_one(insert_item)
212
+ print("update success")
213
+
214
+
215
+ class RedisOps(object):
216
+ def __init__(self, config=global_db_config["redis"]):
217
+ redis_max_connections = 1024
218
+ REDIS_GET_TIMEOUT = 0.1
219
+ self.redis = aioredis.from_url(config["uri"], max_connections=redis_max_connections)
220
+
221
+
222
+ class HBaseOps(object):
223
+ import happybase
224
+ """
225
+ demo
226
+ key = 'test'
227
+ db = HBaseHelper(host=hbase_host)
228
+ data = db.query_single_line(table='table', row_key=key)
229
+ print(data)
230
+ """
231
+
232
+ def __init__(self, config=global_db_config["hbase"]):
233
+ self.host = config["DEFAULT_HOST"]
234
+ self.port = config["DEFAULT_PORT"]
235
+ self.compat = config["DEFAULT_COMPAT"]
236
+ self.table_prefix = None # namespace
237
+ self.transport = config["DEFAULT_TRANSPORT"]
238
+ self.protocol = config["DEFAULT_PROTOCOL"]
239
+ self.conn = self.connect()
240
+
241
+ def connect(self):
242
+ conn = happybase.Connection(host=self.host, port=self.port, timeout=None, autoconnect=True,
243
+ table_prefix=self.table_prefix, compat=self.compat,
244
+ transport=self.transport, protocol=self.protocol)
245
+ return conn
246
+
247
+ def create_hb_table(self, table_name, **families):
248
+ self.conn.create_table(table_name, families)
249
+
250
+ def single_put(self, table_name, row_key, column, data):
251
+ hb = happybase.Table(table_name, self.conn)
252
+ hb.put(row_key,
253
+ data={'{column}:{k}'.format(column=column, k=k): str(v).encode("utf-8") for k, v in data.items()})
254
+
255
+ def batch_put(self, table, row_key_name, column, datas, batch_size=1):
256
+ hb = happybase.Table(table, self.conn)
257
+ datas_new = [datas[i:i + batch_size] for i in range(0, len(datas), batch_size)]
258
+ for x in datas_new:
259
+ with hb.batch(batch_size=batch_size) as batch:
260
+ for da in x:
261
+ da_nw = {'{column}:{k}'.format(column=column, k=k): v for k, v in da.items()}
262
+ row_key = da_nw.pop('{column}:{k}'.format(column=column, k=row_key_name))
263
+ batch.put(row_key, da_nw)
264
+ return batch
265
+
266
+ def single_put_self(self, table_name, row_keys, datas):
267
+ hb = happybase.Table(table_name, self.conn)
268
+ for row_key, (_, val) in zip(row_keys, datas.items()):
269
+ hb.put(row_key, {'maybe_table_name:maybe_column_name': "%s" % val[0],
270
+ 'maybe_table_name:maybe_column_name2': "%s" % val[1]})
271
+
272
+ def scan_table(self, table, row_start=None, row_stop=None, include_timestamp=False, limit=None, timestamps=None,
273
+ filter=None):
274
+ hb = happybase.Table(table, self.conn)
275
+ scan = hb.scan(row_start=row_start, row_stop=row_stop, limit=limit, timestamp=timestamps, filter=filter)
276
+ hb_dict = dict(scan)
277
+ if hb_dict:
278
+ return {str(k1).decode('utf-8'): {str(k2).decode('utf-8'): str(v2).decode('utf-8') for k2, v2 in v1.items()}
279
+ for k1, v1 in
280
+ hb_dict.items()}
281
+ else:
282
+ return {}
283
+
284
+ def query_single_line(self, table, row_key):
285
+ conn = self.connect()
286
+ hb = happybase.Table(table, conn)
287
+ hb_dict = hb.row(row_key)
288
+ if hb_dict:
289
+ return {k.decode('utf-8'): v.decode('utf-8') for k, v in hb_dict.items()}
290
+ else:
291
+ return {}
292
+
293
+ def query_multi_lines(self, table, row_keys):
294
+ hb = happybase.Table(table, self.conn)
295
+ hb_dict = dict(hb.rows(row_keys))
296
+ if hb_dict:
297
+ return {k1.decode('utf-8'): {k2.decode('utf-8'): v2.decode('utf-8') for k2, v2 in v1.items()} for k1, v1 in
298
+ hb_dict.items()}
299
+ else:
300
+ return {}
301
+
302
+ def single_delete(self, table, row_key):
303
+ hb = happybase.Table(table, self.conn)
304
+ hb.delete(row_key)
305
+
306
+ def test_scan(self, table):
307
+ hb = happybase.Table(table, self.conn)
308
+ filter = "SingleColumnValueFilter ('maybe_column_name', 'lang', =, 'regexstring:[regex_string]')"
309
+ scan = hb.scan(limit=1000, filter=filter)
310
+
311
+ hb_dict = dict(scan)
312
+ if hb_dict:
313
+ return {str(k1).decode('utf-8'): {str(k2).decode('utf-8'): str(v2).decode('utf-8') for k2, v2 in v1.items()}
314
+ for k1, v1 in
315
+ hb_dict.items()}
316
+ else:
317
+ return {}
318
+
319
+ def close(self):
320
+ self.conn.close()
321
+
322
+
323
+ class KafkaConfig():
324
+ pass
325
+
326
+
327
+ class KafkaOps(object):
328
+ def __init__(self, config=global_db_config["kafka"]):
329
+ self.bootstrap_server = config["bootstrap_server"]
330
+ self.topic = config["topic"]
331
+ # 超时时间设置默认30s, 修改为60s
332
+ self.producer = KafkaProducer(value_serializer=lambda v: json.dumps(v).encode('utf-8'),
333
+ bootstrap_servers=self.bootstrap_server,
334
+ acks='all',
335
+ request_timeout_ms=60000)
336
+
337
+ def send_data_to_kafka(self, data):
338
+ try:
339
+ self.producer.send(self.topic, data)
340
+ logger.info(f"data send successful! ---- {data}")
341
+ except Exception as e:
342
+ logger.exception(f'kafka occur error ---- {e}')
343
+
344
+ def consumer_msg(self):
345
+ consumer = KafkaConsumer(self.topic, group_id='test-group_id', bootstrap_servers=self.bootstrap_server)
346
+ for msg in consumer:
347
+ recv = "%s:%d:%d: key=%s value=%s" % (msg.topic, msg.partition, msg.offset, msg.key, msg.value)
348
+ print(recv)
349
+
350
+
351
+
352
+
353
+ class MilvusOps(object):
354
+ def __init__(self, config=global_db_config.milvus):
355
+ from pymilvus import connections, Collection
356
+
357
+ connections.connect("default", host=config.host, port=config.port)
358
+ self.collection = Collection(config.collection)
359
+ self.collection.load()
360
+
361
+ def get_similarity(self, embedding):
362
+ search_params = {
363
+ "metric_type": "L2",
364
+ "params": {"nprobe": 1},
365
+ }
366
+ # # %%
367
+ logger.debug(embedding)
368
+ result = self.collection.search(
369
+ [list(embedding)],
370
+ "vec",
371
+ search_params,
372
+ limit=3,
373
+ output_fields=["pk", "entity_name", "standard_entity_name"],
374
+ )
375
+ hits = result[0]
376
+ entities = []
377
+ for hit in hits:
378
+ entities.append(
379
+ {
380
+ "name": hit.entity.get("entity_name"),
381
+ "standard_name": hit.entity.get("standard_entity_name"),
382
+ }
383
+ )
384
+ return entities
385
+
386
+ # def insert(self, collection, entities):
387
+ # collection.insert(entities)
File without changes
@@ -0,0 +1,109 @@
1
+ #!/usr/bin/python3.8
2
+ # -*- coding: utf-8 -*-
3
+ # @Author : youshu.Ji
4
+
5
+
6
+ class ListNode:
7
+ def __init__(self, x):
8
+ self.length = 1
9
+ if type(x) is int:
10
+ self.val = x
11
+ self.next = None
12
+ else:
13
+ # 初始化list,感觉用递归会比较好
14
+ pre = ListNode(x.pop(0))
15
+ head = pre
16
+ while x:
17
+ self.length += 1
18
+ pre.next = ListNode(x.pop(0))
19
+ pre = pre.next
20
+ self.val = head.val
21
+ self.next = head.next
22
+
23
+ def add(self):
24
+ pass
25
+
26
+ def __str__(self):
27
+ # TODO 循环链表标记出来
28
+ print_string = [self.val]
29
+ tmp = self.next
30
+ # 防止循环链表
31
+ recurrent_num = 0
32
+ while tmp and recurrent_num <= self.length + 10:
33
+ recurrent_num += 1
34
+ print_string.append(tmp.val)
35
+ tmp = tmp.next
36
+ return str(print_string)
37
+
38
+
39
+ class TreeNode:
40
+ def __init__(self, val=0, left=None, right=None):
41
+ if type(val) is list:
42
+ pass
43
+ else:
44
+ self.val = val
45
+ self.left = left
46
+ self.right = right
47
+
48
+ def build_from_list(self):
49
+ pass
50
+
51
+ def __str__(self):
52
+ pass
53
+
54
+ @staticmethod
55
+ def pre_order(node):
56
+ stack = []
57
+ res = []
58
+ while stack or node:
59
+ while node:
60
+ res.append(node)
61
+ stack.append(node)
62
+ node = node.left
63
+ node = stack.pop(-1)
64
+ node = node.right
65
+ return res
66
+
67
+ def level_order(self, node):
68
+ # 层序遍历
69
+ # 直观觉得递归不行,采用迭代
70
+ # deque表示正在遍历的层
71
+ deque = [node]
72
+ nxt_deque = []
73
+ res = []
74
+ while deque:
75
+ while deque:
76
+ node = deque.pop(0)
77
+ res.append(node.val)
78
+ if node.left:
79
+ nxt_deque.append(node.left)
80
+ if node.right:
81
+ nxt_deque.append(node.right)
82
+ deque, nxt_deque = nxt_deque, []
83
+ return res
84
+
85
+ pass
86
+
87
+ def bfs(self):
88
+ # 具体怎么用迭代写BFS,是根据需求来的。
89
+ # dp 里面放这个吧(node, depth)
90
+ pass
91
+
92
+ def mid_order(self, node):
93
+ if node.left:
94
+ self.mid_order(node.left)
95
+ print(node.val)
96
+ if node.right:
97
+ self.mid_order(node.right)
98
+
99
+ def post_order(self, node=None):
100
+ pass
101
+
102
+ def in_order(self, node):
103
+ # bts的读法
104
+ pass
105
+
106
+
107
+ if __name__ == '__main__':
108
+ a = ListNode([1, 2, 3, 4])
109
+ print(a)