gomyck-tools 1.3.1__py3-none-any.whl → 1.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ctools/__init__.py +0 -0
- ctools/aes_tools.py +35 -0
- ctools/api_result.py +55 -0
- ctools/application.py +386 -0
- ctools/b64.py +7 -0
- ctools/bashPath.py +13 -0
- ctools/bottle_web_base.py +169 -0
- ctools/bottle_webserver.py +143 -0
- ctools/bottle_websocket.py +75 -0
- ctools/browser_element_tools.py +314 -0
- ctools/call.py +71 -0
- ctools/cdebug.py +143 -0
- ctools/cftp.py +74 -0
- ctools/cjson.py +54 -0
- ctools/ckafka.py +159 -0
- ctools/compile_tools.py +18 -0
- ctools/console.py +55 -0
- ctools/coord_trans.py +127 -0
- ctools/credis.py +111 -0
- ctools/cron_lite.py +245 -0
- ctools/ctoken.py +34 -0
- ctools/cword.py +30 -0
- ctools/czip.py +130 -0
- ctools/database.py +185 -0
- ctools/date_utils.py +43 -0
- ctools/dict_wrapper.py +20 -0
- ctools/douglas_rarefy.py +136 -0
- ctools/download_tools.py +57 -0
- ctools/enums.py +4 -0
- ctools/ex.py +31 -0
- ctools/excelOpt.py +36 -0
- ctools/html_soup.py +35 -0
- ctools/http_utils.py +24 -0
- ctools/images_tools.py +27 -0
- ctools/imgDialog.py +44 -0
- ctools/metrics.py +131 -0
- ctools/mqtt_utils.py +289 -0
- ctools/obj.py +20 -0
- ctools/pacth.py +74 -0
- ctools/plan_area_tools.py +97 -0
- ctools/process_pool.py +36 -0
- ctools/pty_tools.py +72 -0
- ctools/resource_bundle_tools.py +121 -0
- ctools/rsa.py +70 -0
- ctools/screenshot_tools.py +127 -0
- ctools/sign.py +20 -0
- ctools/sm_tools.py +49 -0
- ctools/snow_id.py +76 -0
- ctools/str_diff.py +20 -0
- ctools/string_tools.py +85 -0
- ctools/sys_info.py +157 -0
- ctools/sys_log.py +89 -0
- ctools/thread_pool.py +35 -0
- ctools/upload_tools.py +40 -0
- ctools/win_canvas.py +83 -0
- ctools/win_control.py +106 -0
- ctools/word_fill.py +562 -0
- ctools/word_fill_entity.py +46 -0
- ctools/work_path.py +69 -0
- {gomyck_tools-1.3.1.dist-info → gomyck_tools-1.3.3.dist-info}/METADATA +4 -2
- gomyck_tools-1.3.3.dist-info/RECORD +64 -0
- {gomyck_tools-1.3.1.dist-info → gomyck_tools-1.3.3.dist-info}/WHEEL +1 -1
- gomyck_tools-1.3.3.dist-info/licenses/LICENSE +13 -0
- gomyck_tools-1.3.3.dist-info/top_level.txt +1 -0
- gomyck_tools-1.3.1.dist-info/RECORD +0 -4
- gomyck_tools-1.3.1.dist-info/top_level.txt +0 -1
ctools/credis.py
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: UTF-8 -*-
|
3
|
+
__author__ = 'haoyang'
|
4
|
+
__date__ = '2025/2/14 11:09'
|
5
|
+
|
6
|
+
import redis
|
7
|
+
from redis import Redis
|
8
|
+
|
9
|
+
from ctools import date_utils, thread_pool, string_tools
|
10
|
+
|
11
|
+
def init_pool(host: str = 'localhost', port: int = 6379, db: int = 0, password: str = None,
|
12
|
+
username: str = None, decode_responses: bool = True, max_connections: int = 75,
|
13
|
+
health_check_interval: int = 30, retry_count: int = 3) -> Redis:
|
14
|
+
for attempt in range(retry_count):
|
15
|
+
try:
|
16
|
+
r: Redis = redis.StrictRedis(
|
17
|
+
host=host, port=port, db=db,
|
18
|
+
username=username, password=password,
|
19
|
+
retry_on_timeout=True,
|
20
|
+
max_connections=max_connections,
|
21
|
+
decode_responses=decode_responses,
|
22
|
+
health_check_interval=health_check_interval,
|
23
|
+
socket_connect_timeout=5,
|
24
|
+
socket_timeout=5
|
25
|
+
)
|
26
|
+
if r.ping():
|
27
|
+
print('CRedis connect {} {} success!'.format(host, port))
|
28
|
+
return r
|
29
|
+
except redis.ConnectionError as e:
|
30
|
+
if attempt == retry_count - 1:
|
31
|
+
raise Exception(f"Failed to connect to Redis after {retry_count} attempts: {str(e)}")
|
32
|
+
print(f"Connection attempt {attempt + 1} failed, retrying...")
|
33
|
+
|
34
|
+
def add_lock(r: Redis, key: str, timeout: int = 30):
|
35
|
+
if r.exists(key):
|
36
|
+
expire_time = r.get(key)
|
37
|
+
if date_utils.time_diff_in_seconds(expire_time, date_utils.get_date_time()) > 0:
|
38
|
+
return True
|
39
|
+
else:
|
40
|
+
r.delete(key)
|
41
|
+
return r.set(key, date_utils.opt_time(seconds=timeout), nx=True, ex=timeout) is not None
|
42
|
+
|
43
|
+
def remove_lock(r: Redis, key: str):
|
44
|
+
r.delete(key)
|
45
|
+
|
46
|
+
def subscribe(r: Redis, channel_name, callback):
|
47
|
+
def thread_func():
|
48
|
+
pubsub = r.pubsub()
|
49
|
+
pubsub.subscribe(channel_name)
|
50
|
+
for message in pubsub.listen():
|
51
|
+
callback(message)
|
52
|
+
thread_pool.submit(thread_func)
|
53
|
+
|
54
|
+
def _process_pending_messages(r: Redis, stream_name: str, group_name: str, consumer_name: str, callback):
|
55
|
+
"""
|
56
|
+
处理未确认的消息
|
57
|
+
:param r: Redis 连接
|
58
|
+
:param stream_name: 流名称
|
59
|
+
:param group_name: 消费者组名称
|
60
|
+
:param consumer_name: 消费者名称
|
61
|
+
:param callback: 消息处理回调函数
|
62
|
+
"""
|
63
|
+
# 检查未确认的消息
|
64
|
+
pending_messages = r.xpending(stream_name, group_name)
|
65
|
+
if pending_messages['pending'] > 0:
|
66
|
+
print(f"Found {pending_messages['pending']} pending messages.")
|
67
|
+
# 获取未确认的消息列表
|
68
|
+
pending_list = r.xpending_range(stream_name, group_name, min='-', max='+', count=pending_messages['pending'])
|
69
|
+
for message in pending_list:
|
70
|
+
message_id = message['message_id']
|
71
|
+
claimed_messages = r.xclaim(stream_name, group_name, consumer_name, min_idle_time=0, message_ids=[message_id])
|
72
|
+
if claimed_messages:
|
73
|
+
# 处理消息
|
74
|
+
for claimed_message in claimed_messages:
|
75
|
+
message_id, data = claimed_message
|
76
|
+
print(f"Processing pending message: {message_id}, data: {data}")
|
77
|
+
try:
|
78
|
+
if callback(message_id, data):
|
79
|
+
r.xack(stream_name, group_name, message_id)
|
80
|
+
except Exception as e:
|
81
|
+
print(f"Error processing message {message_id}: {e}")
|
82
|
+
else:
|
83
|
+
print("No pending messages found.")
|
84
|
+
|
85
|
+
def stream_subscribe(r: Redis, stream_name, group_name, callback, from_id: str='$', noack: bool = False):
|
86
|
+
def thread_func():
|
87
|
+
try:
|
88
|
+
# $表示从最后面消费, 0表示从开始消费
|
89
|
+
r.xgroup_create(name=stream_name, groupname=group_name, id=from_id, mkstream=True)
|
90
|
+
print(f"Consumer group '{group_name}' created successfully.")
|
91
|
+
except Exception as e:
|
92
|
+
if "already exists" in str(e):
|
93
|
+
print(f"Consumer group '{group_name}' already exists.")
|
94
|
+
else:
|
95
|
+
print(f"Error creating consumer group '{group_name}': {e}")
|
96
|
+
consumer_name = 'consumer-{}'.format(string_tools.get_uuid())
|
97
|
+
# 处理未确认的消息
|
98
|
+
_process_pending_messages(r, stream_name, group_name, consumer_name, callback)
|
99
|
+
while True:
|
100
|
+
messages = r.xreadgroup(group_name, consumer_name, {stream_name: '>'}, block=1000, noack=noack)
|
101
|
+
for message in messages:
|
102
|
+
try:
|
103
|
+
message_id, data = message[1][0]
|
104
|
+
res = callback(message_id, data)
|
105
|
+
if res: r.xack(stream_name, group_name, message_id)
|
106
|
+
except Exception as e:
|
107
|
+
print('stream_subscribe error: ', e)
|
108
|
+
thread_pool.submit(thread_func)
|
109
|
+
|
110
|
+
def stream_publish(r: Redis, stream_name, message):
|
111
|
+
r.xadd(stream_name, message)
|
ctools/cron_lite.py
ADDED
@@ -0,0 +1,245 @@
|
|
1
|
+
#!/usr/bin/env python3.6
|
2
|
+
# coding: utf-8
|
3
|
+
import sched
|
4
|
+
import threading
|
5
|
+
import time
|
6
|
+
import traceback
|
7
|
+
from datetime import datetime
|
8
|
+
from functools import wraps
|
9
|
+
from typing import Optional, Dict
|
10
|
+
|
11
|
+
import pytz
|
12
|
+
from croniter import croniter
|
13
|
+
|
14
|
+
"""
|
15
|
+
@cron_lite.cron_task('0/1 * * * * ? *')
|
16
|
+
def demo():
|
17
|
+
print('hello world')
|
18
|
+
|
19
|
+
@cron_lite.cron_task('0/1 * * * * ? *')
|
20
|
+
def demo1():
|
21
|
+
print('hello world111')
|
22
|
+
|
23
|
+
def demo2(xx, fff):
|
24
|
+
print('hello world222', xx, fff)
|
25
|
+
|
26
|
+
cron_lite.reg_cron_task('0/1 * * * * ? *', demo2, (123123123, 34534534))
|
27
|
+
print(123123)
|
28
|
+
|
29
|
+
cron_lite.start_all()
|
30
|
+
"""
|
31
|
+
|
32
|
+
class SchedulerMeta:
|
33
|
+
timer_task_name: str = None
|
34
|
+
switch: bool = True
|
35
|
+
status: bool = False
|
36
|
+
event: sched.Event = None
|
37
|
+
scheduler: sched.scheduler = None
|
38
|
+
cron_str: str = None
|
39
|
+
|
40
|
+
|
41
|
+
scheduler_map: Dict[str, SchedulerMeta] = {} # {timer_task_name: SchedulerMeta}
|
42
|
+
_switch = False
|
43
|
+
_info_handler = print
|
44
|
+
_error_handler = print
|
45
|
+
_time_zone: Optional[pytz.BaseTzInfo] = pytz.timezone("Asia/Shanghai")
|
46
|
+
|
47
|
+
|
48
|
+
def set_time_zone(time_zone_name: str):
|
49
|
+
global _time_zone
|
50
|
+
_time_zone = pytz.timezone(time_zone_name)
|
51
|
+
|
52
|
+
# @annotation
|
53
|
+
def cron_task(cron_expr: str, task_name: str = None, till_time_stamp: int = None):
|
54
|
+
"""
|
55
|
+
cron_task decorator to register a function as crontab task
|
56
|
+
:param cron_expr: the croniter accepted cron_expression. NOTICE: the default timezone is UTC and can be changed by
|
57
|
+
`set_time_zone`. The format is `min hour day month weekday [sec]`
|
58
|
+
:param till_time_stamp: run this jog till when. None means forever
|
59
|
+
:return: the real decorator
|
60
|
+
"""
|
61
|
+
cron_expr = _convert_cron(cron_expr)
|
62
|
+
assert len(cron_expr.split(" ")) in (5, 6), \
|
63
|
+
"only supported <min hour day month weekday> and <min hour day month weekday sec>"
|
64
|
+
|
65
|
+
def deco(func):
|
66
|
+
@wraps(func)
|
67
|
+
def inner():
|
68
|
+
try:
|
69
|
+
func()
|
70
|
+
except Exception:
|
71
|
+
try:
|
72
|
+
_error_handler(f"run {func.__name__} failed\n" + traceback.format_exc())
|
73
|
+
except Exception:
|
74
|
+
_error_handler(f"run {func.__name__} failed\n")
|
75
|
+
_register_next(inner.__name__ if task_name is None else task_name, inner, cron_expr, till_time_stamp)
|
76
|
+
|
77
|
+
_register_next(inner.__name__ if task_name is None else task_name, inner, cron_expr, till_time_stamp, init=True)
|
78
|
+
return inner
|
79
|
+
|
80
|
+
return deco
|
81
|
+
|
82
|
+
|
83
|
+
def reg_cron_task(cron_expr, func, params, timer_task_name=None, till_time_stamp=None):
|
84
|
+
"""
|
85
|
+
cron_task decorator to register a function as crontab task
|
86
|
+
:param func: task callback function
|
87
|
+
:param params: transparent parameters
|
88
|
+
:param cron_expr: the croniter accepted cron_expression. NOTICE: the default timezone is UTC and can be changed by
|
89
|
+
`set_time_zone`. The format is `min hour day month weekday [sec]`
|
90
|
+
:param timer_task_name: task name
|
91
|
+
:param till_time_stamp: run this jog till when. None means forever
|
92
|
+
:return: the real decorator
|
93
|
+
"""
|
94
|
+
cron_expr = _convert_cron(cron_expr)
|
95
|
+
assert len(cron_expr.split(" ")) in (5, 6), "Only supported <minute hour day month weekday> and <minute hour day month weekday second>"
|
96
|
+
task_name = func.__name__ if timer_task_name is None else timer_task_name
|
97
|
+
@wraps(func)
|
98
|
+
def wrapper(*args, **kwargs):
|
99
|
+
try:
|
100
|
+
nonlocal params
|
101
|
+
func.__taskName__ = task_name
|
102
|
+
func(*params, *args, **kwargs)
|
103
|
+
except Exception as exc:
|
104
|
+
_error_handler(f"Run {func.__name__} failed with error: {str(exc)}")
|
105
|
+
finally:
|
106
|
+
_register_next(task_name, wrapper, cron_expr, till_time_stamp)
|
107
|
+
|
108
|
+
_register_next(task_name, wrapper, cron_expr, till_time_stamp, init=True)
|
109
|
+
|
110
|
+
def start_all(spawn: bool = True, daemon: bool = True, info_handler=None, error_handler=None) -> Optional[threading.Thread]:
|
111
|
+
"""
|
112
|
+
start_all starts all cron tasks registered before.
|
113
|
+
:param spawn: whether to start a new thread for scheduler. If not, the action will block the current thread
|
114
|
+
:param daemon: the new thread is daemon if True
|
115
|
+
:param info_handler: handle info output (scheduler start / stop), default = print, can use logging.info
|
116
|
+
:param error_handler: handle error output (task execute exception), default = print, can use logging.error
|
117
|
+
:raise RuntimeError: if the tasks are already started and still running we cannot start again. The feature is not
|
118
|
+
concurrent-safe
|
119
|
+
:return: the new thread if spawn = True
|
120
|
+
"""
|
121
|
+
global _switch, _info_handler, _error_handler
|
122
|
+
if _switch:
|
123
|
+
raise RuntimeError("the crontab was already started...")
|
124
|
+
if info_handler:
|
125
|
+
_info_handler = info_handler
|
126
|
+
if error_handler:
|
127
|
+
_error_handler = error_handler
|
128
|
+
if spawn:
|
129
|
+
t = threading.Thread(target=_start, daemon=daemon)
|
130
|
+
t.start()
|
131
|
+
return t
|
132
|
+
else:
|
133
|
+
_start()
|
134
|
+
|
135
|
+
|
136
|
+
def is_active(timer_task_name):
|
137
|
+
res = False
|
138
|
+
if timer_task_name in scheduler_map:
|
139
|
+
res = scheduler_map.get(timer_task_name).switch or scheduler_map.get(timer_task_name).status
|
140
|
+
return res
|
141
|
+
|
142
|
+
|
143
|
+
def active(timer_task_name):
|
144
|
+
if timer_task_name in scheduler_map:
|
145
|
+
scheduler_map.get(timer_task_name).status = True
|
146
|
+
|
147
|
+
def get_switch(timer_task_name):
|
148
|
+
switch = True
|
149
|
+
if timer_task_name in scheduler_map:
|
150
|
+
switch = scheduler_map.get(timer_task_name).switch
|
151
|
+
return switch
|
152
|
+
|
153
|
+
|
154
|
+
def inactive(timer_task_name):
|
155
|
+
if timer_task_name in scheduler_map:
|
156
|
+
scheduler_map.get(timer_task_name).status = False
|
157
|
+
if not scheduler_map.get(timer_task_name).switch:
|
158
|
+
scheduler_map.get(timer_task_name).scheduler.cancel(scheduler_map[timer_task_name].event)
|
159
|
+
|
160
|
+
|
161
|
+
def stop(timer_task_name):
|
162
|
+
if timer_task_name in scheduler_map:
|
163
|
+
scheduler_map.get(timer_task_name).switch = False
|
164
|
+
time.sleep(1)
|
165
|
+
|
166
|
+
|
167
|
+
def stop_all(wait_thread: Optional[threading.Thread] = None):
|
168
|
+
"""
|
169
|
+
stop_all turns off the switch to stop the scheduler. Running jobs will be wait till finished.
|
170
|
+
:param wait_thread: join() the spawned scheduler thread (if you started it as spawn and you want) to ensure all jobs
|
171
|
+
to finish
|
172
|
+
:return:
|
173
|
+
"""
|
174
|
+
for timer_task_name in scheduler_map:
|
175
|
+
scheduler_map.get(timer_task_name).switch = False
|
176
|
+
if wait_thread:
|
177
|
+
wait_thread.join()
|
178
|
+
|
179
|
+
|
180
|
+
def _register_next(timer_task_name, base_func, cron_expr, till_time_stamp, init: bool = False):
|
181
|
+
cron_obj = croniter(cron_expr)
|
182
|
+
if _time_zone:
|
183
|
+
cron_obj.set_current(datetime.now(tz=_time_zone))
|
184
|
+
next_time = int(cron_obj.get_next())
|
185
|
+
if scheduler_map.get(timer_task_name) is None:
|
186
|
+
scheduler_meta = SchedulerMeta()
|
187
|
+
scheduler_meta.timer_task_name = timer_task_name
|
188
|
+
scheduler_meta.switch = True
|
189
|
+
scheduler_meta.scheduler = sched.scheduler(time.time, time.sleep)
|
190
|
+
scheduler_meta.cron_str = cron_expr
|
191
|
+
scheduler_map[timer_task_name] = scheduler_meta
|
192
|
+
elif init:
|
193
|
+
raise ValueError(f"task name: {timer_task_name} already exists!!!!!")
|
194
|
+
if till_time_stamp is None or next_time <= till_time_stamp:
|
195
|
+
scheduler_map[timer_task_name].event = scheduler_map[timer_task_name].scheduler.enterabs(next_time, 0, base_func)
|
196
|
+
|
197
|
+
|
198
|
+
def _run_sched(scheduler_meta: SchedulerMeta):
|
199
|
+
active(scheduler_meta.timer_task_name)
|
200
|
+
while True:
|
201
|
+
scheduler = scheduler_meta.scheduler
|
202
|
+
if not _switch or not scheduler_meta.switch:
|
203
|
+
scheduler.empty()
|
204
|
+
inactive(scheduler_meta.timer_task_name)
|
205
|
+
return
|
206
|
+
t = scheduler.run(False)
|
207
|
+
if t is None:
|
208
|
+
inactive(scheduler_meta.timer_task_name)
|
209
|
+
return
|
210
|
+
st = time.time()
|
211
|
+
while time.time() - st < t:
|
212
|
+
if not _switch or not scheduler_meta.switch:
|
213
|
+
scheduler.empty()
|
214
|
+
inactive(scheduler_meta.timer_task_name)
|
215
|
+
return
|
216
|
+
time.sleep(0.5)
|
217
|
+
|
218
|
+
def _start(taskName: str = None):
|
219
|
+
global _switch
|
220
|
+
_switch = True
|
221
|
+
_info_handler("cron job begin start...")
|
222
|
+
taskList = []
|
223
|
+
for timer_task_name, scheduler_meta in scheduler_map.items():
|
224
|
+
if taskName is not None and timer_task_name != taskName: continue
|
225
|
+
print("register job: ", timer_task_name, ", cron: ", scheduler_meta.cron_str)
|
226
|
+
thread = threading.Thread(target=_run_sched, args=(scheduler_meta, ), daemon=True)
|
227
|
+
thread.start()
|
228
|
+
taskList.append(thread)
|
229
|
+
for task in taskList: task.join()
|
230
|
+
_info_handler("cron job execute finished...")
|
231
|
+
_switch = False
|
232
|
+
scheduler_map.clear()
|
233
|
+
|
234
|
+
def _convert_cron(cron_expr):
|
235
|
+
res_cron = ""
|
236
|
+
cron_list = cron_expr.split(" ")
|
237
|
+
if len(cron_list) > 6:
|
238
|
+
for cron in cron_list[1:]:
|
239
|
+
if cron != "?":
|
240
|
+
res_cron += "%s " % cron
|
241
|
+
res_cron += "%s" % cron_list[0]
|
242
|
+
else:
|
243
|
+
res_cron = cron_expr
|
244
|
+
return res_cron
|
245
|
+
|
ctools/ctoken.py
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: UTF-8 -*-
|
3
|
+
__author__ = 'haoyang'
|
4
|
+
__date__ = '2025/1/21 16:01'
|
5
|
+
|
6
|
+
import time
|
7
|
+
import jwt
|
8
|
+
from bottle import request
|
9
|
+
|
10
|
+
from ctools.dict_wrapper import DictWrapper
|
11
|
+
|
12
|
+
token_header = 'Authorization'
|
13
|
+
|
14
|
+
def gen_token(payload: {}, secret_key, expired: int=3600) -> str:
|
15
|
+
payload.update({'exp': time.time() + expired})
|
16
|
+
return jwt.encode(payload, secret_key, algorithm='HS256')
|
17
|
+
|
18
|
+
def get_payload(token, secret_key):
|
19
|
+
try:
|
20
|
+
payload = jwt.decode(token, secret_key, algorithms=['HS256'])
|
21
|
+
return DictWrapper(payload)
|
22
|
+
except Exception as e:
|
23
|
+
return None
|
24
|
+
|
25
|
+
def get_token(key):
|
26
|
+
return get_payload(request.get_header(token_header), key)
|
27
|
+
|
28
|
+
def is_valid(key):
|
29
|
+
return get_payload(request.get_header(token_header), key) is not None
|
30
|
+
|
31
|
+
# if __name__ == '__main__':
|
32
|
+
# token = gen_token({"xx": 123}, '123')
|
33
|
+
# xx = get_payload(token, '123')
|
34
|
+
# print(xx.xx)
|
ctools/cword.py
ADDED
@@ -0,0 +1,30 @@
|
|
1
|
+
# from docxtpl import DocxTemplate
|
2
|
+
#
|
3
|
+
# # tpl = DocxTemplate('/Users/haoyang/Desktop/xxx.docx')
|
4
|
+
# tpl = DocxTemplate('/Users/haoyang/Desktop/123.doc')
|
5
|
+
#
|
6
|
+
# # 设置好各标签需要填写的内容
|
7
|
+
# context = {'xxxx': '计算机科学与技术', 'cccc': '2022050513'}
|
8
|
+
# # 将标签内容填入模板中
|
9
|
+
# tpl.render(context)
|
10
|
+
# # 保存
|
11
|
+
# tpl.save('/Users/haoyang/Desktop/new_test2.docx')
|
12
|
+
|
13
|
+
from docx import Document
|
14
|
+
|
15
|
+
def merge_word_files(input_files: [], output_file: str):
|
16
|
+
merged_doc = Document()
|
17
|
+
for file in input_files:
|
18
|
+
doc = Document(file)
|
19
|
+
for element in doc.element.body:
|
20
|
+
merged_doc.element.body.append(element)
|
21
|
+
merged_doc.save(output_file)
|
22
|
+
|
23
|
+
|
24
|
+
def read_word_file(input_file: str):
|
25
|
+
doc = Document(input_file)
|
26
|
+
text = []
|
27
|
+
for paragraph in doc.paragraphs:
|
28
|
+
text.append(paragraph.text)
|
29
|
+
return "\n".join(text)
|
30
|
+
|
ctools/czip.py
ADDED
@@ -0,0 +1,130 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: UTF-8 -*-
|
3
|
+
__author__ = 'haoyang'
|
4
|
+
__date__ = '2025/1/24 08:48'
|
5
|
+
|
6
|
+
import io
|
7
|
+
import os
|
8
|
+
import time
|
9
|
+
import pyzipper
|
10
|
+
|
11
|
+
"""
|
12
|
+
target_directory = '/Users/haoyang/Desktop/知识库文件'
|
13
|
+
zip_password = None
|
14
|
+
process_directory_to_single_zip(target_directory, zip_password, "knowledge_base")
|
15
|
+
|
16
|
+
files_to_compress = [
|
17
|
+
'/path/to/file1.txt',
|
18
|
+
'/path/to/file2.pdf',
|
19
|
+
'/path/to/file3.jpg'
|
20
|
+
]
|
21
|
+
output_directory = '/Users/haoyang/Desktop'
|
22
|
+
compress_specific_files(files_to_compress, output_directory, zip_password, "my_files")
|
23
|
+
"""
|
24
|
+
def create_zip_with_files(file_dict, password=None) -> io.BytesIO:
|
25
|
+
"""Compress multiple files into a single password-protected ZIP archive in memory.
|
26
|
+
Args:
|
27
|
+
file_dict: Dictionary of {filename: file_content} pairs
|
28
|
+
filename = os.path.relpath(file_path, start=root_dir) # 相对路径获取, 用于在 zip 内的路径定位
|
29
|
+
password: Optional password for the ZIP file
|
30
|
+
Returns:
|
31
|
+
BytesIO object containing the ZIP file
|
32
|
+
"""
|
33
|
+
zip_buffer = io.BytesIO()
|
34
|
+
try:
|
35
|
+
if password:
|
36
|
+
with pyzipper.AESZipFile(zip_buffer, 'w', compression=pyzipper.ZIP_DEFLATED, encryption=pyzipper.WZ_AES) as zipf:
|
37
|
+
zipf.setpassword(password.encode('utf-8'))
|
38
|
+
for filename, content in file_dict.items():
|
39
|
+
zipf.writestr(filename, content)
|
40
|
+
else:
|
41
|
+
with pyzipper.ZipFile(zip_buffer, 'w', compression=pyzipper.ZIP_DEFLATED) as zipf:
|
42
|
+
for filename, content in file_dict.items():
|
43
|
+
zipf.writestr(filename, content)
|
44
|
+
zip_buffer.seek(0)
|
45
|
+
return zip_buffer
|
46
|
+
except Exception as e:
|
47
|
+
zip_buffer.close()
|
48
|
+
raise e
|
49
|
+
|
50
|
+
|
51
|
+
def process_directory_to_single_zip(root_dir, password=None, zip_name=None):
|
52
|
+
"""Walk through directory and compress all files into a single ZIP.
|
53
|
+
Args:
|
54
|
+
root_dir: Root directory to scan for files
|
55
|
+
password: Optional password for the ZIP file
|
56
|
+
zip_name: Base name for the ZIP file (without extension)
|
57
|
+
"""
|
58
|
+
file_dict = {}
|
59
|
+
for dirpath, _, filenames in os.walk(root_dir):
|
60
|
+
for filename in filenames:
|
61
|
+
file_path = os.path.join(dirpath, filename)
|
62
|
+
try:
|
63
|
+
with open(file_path, 'rb') as f:
|
64
|
+
rel_path = os.path.relpath(file_path, start=root_dir)
|
65
|
+
file_dict[rel_path] = f.read()
|
66
|
+
except Exception as e:
|
67
|
+
print(f"Error reading {file_path}: {str(e)}")
|
68
|
+
if not file_dict:
|
69
|
+
print("No files found to compress.")
|
70
|
+
return
|
71
|
+
|
72
|
+
try:
|
73
|
+
zip_buffer = create_zip_with_files(file_dict, password)
|
74
|
+
timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
|
75
|
+
if zip_name:
|
76
|
+
base_name = f"{zip_name}_{timestamp}.zip"
|
77
|
+
else:
|
78
|
+
base_name = f"archive_{timestamp}.zip"
|
79
|
+
output_path = os.path.join(root_dir, base_name)
|
80
|
+
with open(output_path, 'wb') as out_file:
|
81
|
+
out_file.write(zip_buffer.read())
|
82
|
+
print(f"Created single archive: {output_path}")
|
83
|
+
except Exception as e:
|
84
|
+
print(f"Error creating ZIP archive: {str(e)}")
|
85
|
+
finally:
|
86
|
+
if 'zip_buffer' in locals(): zip_buffer.close()
|
87
|
+
|
88
|
+
|
89
|
+
def compress_specific_files(file_paths:[], output_dir:str, password=None, zip_name=None):
|
90
|
+
"""Compress multiple specified files into a single ZIP archive.
|
91
|
+
Args:
|
92
|
+
file_paths: List of absolute file paths to compress
|
93
|
+
output_dir: Directory where the ZIP file will be saved
|
94
|
+
password: Optional password for the ZIP file
|
95
|
+
zip_name: Base name for the ZIP file (without extension)
|
96
|
+
"""
|
97
|
+
if not file_paths:
|
98
|
+
print("No files specified to compress.")
|
99
|
+
return
|
100
|
+
file_dict = {}
|
101
|
+
for file_path in file_paths:
|
102
|
+
if not os.path.isfile(file_path):
|
103
|
+
print(f"Warning: {file_path} is not a file or doesn't exist. Skipping.")
|
104
|
+
continue
|
105
|
+
try:
|
106
|
+
with open(file_path, 'rb') as f:
|
107
|
+
filename_in_zip = os.path.basename(file_path)
|
108
|
+
file_dict[filename_in_zip] = f.read()
|
109
|
+
except Exception as e:
|
110
|
+
print(f"Error reading {file_path}: {str(e)}")
|
111
|
+
if not file_dict:
|
112
|
+
print("No valid files found to compress.")
|
113
|
+
return
|
114
|
+
try:
|
115
|
+
zip_buffer = create_zip_with_files(file_dict, password)
|
116
|
+
timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
|
117
|
+
if zip_name:
|
118
|
+
base_name = f"{zip_name}_{timestamp}.zip"
|
119
|
+
else:
|
120
|
+
first_file = os.path.basename(file_paths[0])
|
121
|
+
base_name = f"{os.path.splitext(first_file)[0]}_{timestamp}.zip"
|
122
|
+
output_path = os.path.join(output_dir, base_name)
|
123
|
+
os.makedirs(output_dir, exist_ok=True)
|
124
|
+
with open(output_path, 'wb') as out_file:
|
125
|
+
out_file.write(zip_buffer.read())
|
126
|
+
print(f"Created archive: {output_path}")
|
127
|
+
except Exception as e:
|
128
|
+
print(f"Error creating ZIP archive: {str(e)}")
|
129
|
+
finally:
|
130
|
+
if 'zip_buffer' in locals(): zip_buffer.close()
|