funboost 49.7__py3-none-any.whl → 49.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of funboost might be problematic. Click here for more details.
- funboost/__init__.py +1 -1
- funboost/assist/celery_helper.py +1 -1
- funboost/concurrent_pool/async_pool_executor.py +23 -17
- funboost/constant.py +23 -0
- funboost/consumers/base_consumer.py +15 -8
- funboost/consumers/grpc_consumer.py +102 -0
- funboost/consumers/kafka_consumer.py +4 -2
- funboost/consumers/kafka_consumer_manually_commit.py +7 -2
- funboost/consumers/mysql_cdc_consumer.py +95 -0
- funboost/contrib/cdc/__init__.py +0 -0
- funboost/contrib/cdc/mysql2mysql.py +44 -0
- funboost/core/booster.py +25 -2
- funboost/core/exceptions.py +3 -0
- funboost/core/func_params_model.py +7 -4
- funboost/core/msg_result_getter.py +8 -7
- funboost/factories/broker_kind__publsiher_consumer_type_map.py +10 -1
- funboost/publishers/base_publisher.py +5 -6
- funboost/publishers/grpc_publisher.py +53 -0
- funboost/publishers/kafka_publisher.py +3 -1
- funboost/publishers/mysql_cdc_publisher.py +24 -0
- funboost/timing_job/timing_push.py +3 -1
- {funboost-49.7.dist-info → funboost-49.9.dist-info}/METADATA +69 -33
- {funboost-49.7.dist-info → funboost-49.9.dist-info}/RECORD +27 -30
- funboost/utils/class_utils2.py +0 -94
- funboost/utils/custom_pysnooper.py +0 -149
- funboost/utils/pysnooper_ydf/__init__.py +0 -32
- funboost/utils/pysnooper_ydf/pycompat.py +0 -82
- funboost/utils/pysnooper_ydf/tracer.py +0 -479
- funboost/utils/pysnooper_ydf/utils.py +0 -101
- funboost/utils/pysnooper_ydf/variables.py +0 -133
- funboost/utils/times/__init__.py +0 -85
- funboost/utils/times/version.py +0 -1
- {funboost-49.7.dist-info → funboost-49.9.dist-info}/LICENSE +0 -0
- {funboost-49.7.dist-info → funboost-49.9.dist-info}/WHEEL +0 -0
- {funboost-49.7.dist-info → funboost-49.9.dist-info}/entry_points.txt +0 -0
- {funboost-49.7.dist-info → funboost-49.9.dist-info}/top_level.txt +0 -0
funboost/__init__.py
CHANGED
funboost/assist/celery_helper.py
CHANGED
|
@@ -6,6 +6,7 @@ import threading
|
|
|
6
6
|
import time
|
|
7
7
|
import traceback
|
|
8
8
|
from threading import Thread
|
|
9
|
+
import traceback
|
|
9
10
|
|
|
10
11
|
from funboost.concurrent_pool.base_pool_type import FunboostBaseConcurrentPool
|
|
11
12
|
from funboost.core.loggers import FunboostFileLoggerMixin
|
|
@@ -53,14 +54,17 @@ class AsyncPoolExecutor(FunboostFileLoggerMixin,FunboostBaseConcurrentPool):
|
|
|
53
54
|
使api和线程池一样,最好的性能做法是submit也弄成 async def,生产和消费在同一个线程同一个loop一起运行,但会对调用链路的兼容性产生破坏,从而调用方式不兼容线程池。
|
|
54
55
|
"""
|
|
55
56
|
|
|
56
|
-
def __init__(self, size,
|
|
57
|
+
def __init__(self, size, specify_async_loop=None,
|
|
58
|
+
is_auto_start_specify_async_loop_in_child_thread=True):
|
|
57
59
|
"""
|
|
58
60
|
|
|
59
61
|
:param size: 同时并发运行的协程任务数量。
|
|
60
|
-
:param loop
|
|
62
|
+
:param specify_loop: 可以指定loop,异步三方包的连接池发请求不能使用不同的loop去使用连接池.
|
|
61
63
|
"""
|
|
62
64
|
self._size = size
|
|
63
|
-
self.
|
|
65
|
+
self._specify_async_loop = specify_async_loop
|
|
66
|
+
self._is_auto_start_specify_async_loop_in_child_thread = is_auto_start_specify_async_loop_in_child_thread
|
|
67
|
+
self.loop = specify_async_loop or asyncio.new_event_loop()
|
|
64
68
|
asyncio.set_event_loop(self.loop)
|
|
65
69
|
self._diff_init()
|
|
66
70
|
# self._lock = threading.Lock()
|
|
@@ -90,7 +94,7 @@ class AsyncPoolExecutor(FunboostFileLoggerMixin,FunboostBaseConcurrentPool):
|
|
|
90
94
|
|
|
91
95
|
def submit(self, func, *args, **kwargs):
|
|
92
96
|
future = asyncio.run_coroutine_threadsafe(self._produce(func, *args, **kwargs), self.loop) # 这个 run_coroutine_threadsafe 方法也有缺点,消耗的性能巨大。
|
|
93
|
-
future.result() # 阻止过快放入,放入超过队列大小后,使submit阻塞。
|
|
97
|
+
future.result() # 阻止过快放入,放入超过队列大小后,使submit阻塞。 背压是为了防止 迅速掏空消息队列几千万消息到内存.
|
|
94
98
|
|
|
95
99
|
async def _produce(self, func, *args, **kwargs):
|
|
96
100
|
await self._queue.put((func, args, kwargs))
|
|
@@ -119,12 +123,22 @@ class AsyncPoolExecutor(FunboostFileLoggerMixin,FunboostBaseConcurrentPool):
|
|
|
119
123
|
# asyncio.set_event_loop(self.loop)
|
|
120
124
|
# self.loop.run_until_complete(asyncio.wait([self._consume() for _ in range(self._size)], loop=self.loop))
|
|
121
125
|
# self._can_be_closed_flag = True
|
|
122
|
-
|
|
123
|
-
|
|
126
|
+
if self._specify_async_loop is None:
|
|
127
|
+
for _ in range(self._size):
|
|
128
|
+
self.loop.create_task(self._consume())
|
|
129
|
+
else:
|
|
130
|
+
for _ in range(self._size):
|
|
131
|
+
asyncio.run_coroutine_threadsafe(self._consume(),self.loop) # 这是 asyncio 专门提供的用于从其他线程向事件循环安全提交任务的函数。
|
|
132
|
+
if self._specify_async_loop is None:
|
|
124
133
|
self.loop.run_forever()
|
|
125
|
-
|
|
126
|
-
self.
|
|
127
|
-
|
|
134
|
+
else:
|
|
135
|
+
if self._is_auto_start_specify_async_loop_in_child_thread:
|
|
136
|
+
try:
|
|
137
|
+
self.loop.run_forever() #如果是指定的loop不能多次启动一个loop.
|
|
138
|
+
except Exception as e:
|
|
139
|
+
self.logger.warning(f'{e} {traceback.format_exc()}') # 如果多个线程使用一个loop,不能重复启动loop,否则会报错。
|
|
140
|
+
else:
|
|
141
|
+
pass # 用户需要自己在自己的业务代码中去手动启动loop.run_forever()
|
|
128
142
|
|
|
129
143
|
|
|
130
144
|
# def shutdown(self):
|
|
@@ -139,14 +153,6 @@ class AsyncPoolExecutor(FunboostFileLoggerMixin,FunboostBaseConcurrentPool):
|
|
|
139
153
|
|
|
140
154
|
|
|
141
155
|
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
156
|
if __name__ == '__main__':
|
|
151
157
|
def test_async_pool_executor():
|
|
152
158
|
from funboost.concurrent_pool import CustomThreadPoolExecutor as ThreadPoolExecutor
|
funboost/constant.py
CHANGED
|
@@ -101,6 +101,8 @@ class BrokerEnum:
|
|
|
101
101
|
|
|
102
102
|
HTTP = 'HTTP' # 基于http实现的,发布使用的urllib3,消费服务端使用的aiohttp.server实现的,支持分布式但不支持持久化,好处是不需要安装消息队列中间件软件。
|
|
103
103
|
|
|
104
|
+
GRPC = 'GRPC' # 使用知名grpc作为broker,可以使用 sync_call 方法同步获取grpc的结果, 简单程度暴击用户手写原生的 grpc客户端 服务端
|
|
105
|
+
|
|
104
106
|
NATS = 'NATS' # 高性能中间件nats,中间件服务端性能很好,。
|
|
105
107
|
|
|
106
108
|
TXT_FILE = 'TXT_FILE' # 磁盘txt文件作为消息队列,支持单机持久化,不支持多机分布式。不建议这个,用sqlite。
|
|
@@ -119,6 +121,27 @@ class BrokerEnum:
|
|
|
119
121
|
|
|
120
122
|
NAMEKO = 'NAMEKO' # funboost支持python微服务框架nameko,用户无需掌握nameko api语法,就玩转python nameko微服务
|
|
121
123
|
|
|
124
|
+
|
|
125
|
+
"""
|
|
126
|
+
MYSQL_CDC 是 funboost 中 神奇 的 与众不同的 broker 中间件
|
|
127
|
+
mysql binlog cdc 自动作为消息,用户无需手动发布消息,只需要写处理binlog内容的逻辑,
|
|
128
|
+
一行代码就能轻量级实现 mysql2mysql mysql2kafka mysql2rabbitmq 等等.
|
|
129
|
+
这个是与其他中间件不同,不需要手工发布消息, 任何对数据库的 insert update delete 会自动作为 funboost 的消息.
|
|
130
|
+
几乎是轻量级平替 canal flinkcdc 的作用.
|
|
131
|
+
|
|
132
|
+
以此类推, 日志文件也能扩展作为broker,只要另外一个程序写入了文件日志,就能触发funboost消费,
|
|
133
|
+
然后自己在函数逻辑把消息发到kafka,(虽然是已经有大名鼎鼎elk,这只是举个场景例子,说明funboost broker的灵活性)
|
|
134
|
+
|
|
135
|
+
日志文件、文件系统变更(inotify)、甚至是硬件传感器的信号,按照4.21章节文档,都可以被封装成一个 funboost 的 Broker。
|
|
136
|
+
|
|
137
|
+
充分说明 funboost 有能力化身为 通用的、事件驱动的函数调度平台,而非仅仅是celery这种传统的消息驱动.
|
|
138
|
+
"""
|
|
139
|
+
"""
|
|
140
|
+
funboost 有能力消费canal发到kafka的binlog消息,也能不依赖canal,自己捕获cdc数据
|
|
141
|
+
"""
|
|
142
|
+
MYSQL_CDC = 'MYSQL_CDC'
|
|
143
|
+
|
|
144
|
+
|
|
122
145
|
|
|
123
146
|
class ConcurrentModeEnum:
|
|
124
147
|
THREADING = 'threading' # 线程方式运行,兼容支持 async def 的异步函数。
|
|
@@ -637,10 +637,13 @@ class AbstractConsumer(LoggerLevelSetterMixin, metaclass=abc.ABCMeta, ):
|
|
|
637
637
|
"""
|
|
638
638
|
self._do_not_delete_extra_from_msg = True
|
|
639
639
|
|
|
640
|
-
def
|
|
640
|
+
def _frame_custom_record_process_info_func(self,current_function_result_status: FunctionResultStatus,kw:dict):
|
|
641
641
|
pass
|
|
642
642
|
|
|
643
|
-
|
|
643
|
+
def user_custom_record_process_info_func(self, current_function_result_status: FunctionResultStatus,): # 这个可以继承
|
|
644
|
+
pass
|
|
645
|
+
|
|
646
|
+
async def aio_user_custom_record_process_info_func(self, current_function_result_status: FunctionResultStatus,): # 这个可以继承
|
|
644
647
|
pass
|
|
645
648
|
|
|
646
649
|
def _convert_real_function_only_params_by_conusuming_function_kind(self, function_only_params: dict,extra_params:dict):
|
|
@@ -729,9 +732,10 @@ class AbstractConsumer(LoggerLevelSetterMixin, metaclass=abc.ABCMeta, ):
|
|
|
729
732
|
|
|
730
733
|
with self._lock_for_count_execute_task_times_every_unit_time:
|
|
731
734
|
self.metric_calculation.cal(t_start_run_fun,current_function_result_status)
|
|
732
|
-
self.
|
|
735
|
+
self._frame_custom_record_process_info_func(current_function_result_status,kw)
|
|
736
|
+
self.user_custom_record_process_info_func(current_function_result_status,) # 两种方式都可以自定义,记录结果,建议继承方式,不使用boost中指定 user_custom_record_process_info_func
|
|
733
737
|
if self.consumer_params.user_custom_record_process_info_func:
|
|
734
|
-
self.consumer_params.user_custom_record_process_info_func(current_function_result_status)
|
|
738
|
+
self.consumer_params.user_custom_record_process_info_func(current_function_result_status,)
|
|
735
739
|
except BaseException as e:
|
|
736
740
|
log_msg = f' error 严重错误 {type(e)} {e} '
|
|
737
741
|
# self.logger.critical(msg=f'{log_msg} \n', exc_info=True)
|
|
@@ -888,10 +892,11 @@ class AbstractConsumer(LoggerLevelSetterMixin, metaclass=abc.ABCMeta, ):
|
|
|
888
892
|
async with self._async_lock_for_count_execute_task_times_every_unit_time:
|
|
889
893
|
self.metric_calculation.cal(t_start_run_fun, current_function_result_status)
|
|
890
894
|
|
|
891
|
-
self.
|
|
892
|
-
|
|
895
|
+
self._frame_custom_record_process_info_func(current_function_result_status)
|
|
896
|
+
self.user_custom_record_process_info_func(current_function_result_status,) # 两种方式都可以自定义,记录结果.建议使用文档4.21.b的方式继承来重写
|
|
897
|
+
await self.aio_user_custom_record_process_info_func(current_function_result_status,)
|
|
893
898
|
if self.consumer_params.user_custom_record_process_info_func:
|
|
894
|
-
self.consumer_params.user_custom_record_process_info_func(current_function_result_status)
|
|
899
|
+
self.consumer_params.user_custom_record_process_info_func(current_function_result_status,)
|
|
895
900
|
|
|
896
901
|
except BaseException as e:
|
|
897
902
|
log_msg = f' error 严重错误 {type(e)} {e} '
|
|
@@ -1126,7 +1131,9 @@ class ConcurrentModeDispatcher(FunboostFileLoggerMixin):
|
|
|
1126
1131
|
# pool_type = ProcessPoolExecutor
|
|
1127
1132
|
if self._concurrent_mode == ConcurrentModeEnum.ASYNC:
|
|
1128
1133
|
self.consumer._concurrent_pool = self.consumer.consumer_params.specify_concurrent_pool or pool_type(
|
|
1129
|
-
self.consumer.consumer_params.concurrent_num,
|
|
1134
|
+
self.consumer.consumer_params.concurrent_num,
|
|
1135
|
+
specify_async_loop=self.consumer.consumer_params.specify_async_loop,
|
|
1136
|
+
is_auto_start_specify_async_loop_in_child_thread=self.consumer.consumer_params.is_auto_start_specify_async_loop_in_child_thread)
|
|
1130
1137
|
else:
|
|
1131
1138
|
# print(pool_type)
|
|
1132
1139
|
self.consumer._concurrent_pool = self.consumer.consumer_params.specify_concurrent_pool or pool_type(self.consumer.consumer_params.concurrent_num)
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# @Author : ydf
|
|
3
|
+
# @Time : 2023/8/8 0008 13:32
|
|
4
|
+
|
|
5
|
+
import abc
|
|
6
|
+
import threading
|
|
7
|
+
import grpc
|
|
8
|
+
import time
|
|
9
|
+
|
|
10
|
+
from funboost import FunctionResultStatus
|
|
11
|
+
from funboost.assist.grpc_helper import funboost_grpc_pb2_grpc, funboost_grpc_pb2
|
|
12
|
+
from funboost.consumers.base_consumer import AbstractConsumer
|
|
13
|
+
from funboost.core.serialization import Serialization
|
|
14
|
+
from funboost.core.exceptions import FunboostWaitRpcResultTimeout
|
|
15
|
+
from funboost.concurrent_pool.flexible_thread_pool import FlexibleThreadPool
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class FutureStatusResult:
|
|
19
|
+
def __init__(self,call_type:str):
|
|
20
|
+
self.execute_finish_event = threading.Event()
|
|
21
|
+
self.staus_result_obj: FunctionResultStatus = None
|
|
22
|
+
self.call_type = call_type # sync_call or publish
|
|
23
|
+
|
|
24
|
+
def set_finish(self):
|
|
25
|
+
self.execute_finish_event.set()
|
|
26
|
+
|
|
27
|
+
def wait_finish(self,rpc_timeout):
|
|
28
|
+
return self.execute_finish_event.wait(rpc_timeout)
|
|
29
|
+
|
|
30
|
+
def set_staus_result_obj(self, staus_result_obj:FunctionResultStatus):
|
|
31
|
+
self.staus_result_obj = staus_result_obj
|
|
32
|
+
|
|
33
|
+
def get_staus_result_obj(self):
|
|
34
|
+
return self.staus_result_obj
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class GrpcConsumer(AbstractConsumer, ):
|
|
40
|
+
"""
|
|
41
|
+
grpc as broker
|
|
42
|
+
"""
|
|
43
|
+
BROKER_EXCLUSIVE_CONFIG_DEFAULT = {'host': '127.0.0.1', 'port': None}
|
|
44
|
+
|
|
45
|
+
def custom_init(self):
|
|
46
|
+
class FunboostGrpcServicer(funboost_grpc_pb2_grpc.FunboostBrokerServiceServicer):
|
|
47
|
+
"""
|
|
48
|
+
HelloService 的实现类
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
def Call(this, request, context):
|
|
52
|
+
"""
|
|
53
|
+
实现 SayHello 方法
|
|
54
|
+
"""
|
|
55
|
+
future_status_result = FutureStatusResult(call_type=request.call_type)
|
|
56
|
+
kw = {'body': request.json_req, 'future_status_result': future_status_result,}
|
|
57
|
+
self._submit_task(kw)
|
|
58
|
+
if request.call_type =="sync_call":
|
|
59
|
+
if future_status_result.wait_finish(self.consumer_params.rpc_timeout): # 等待并发出的消费结果
|
|
60
|
+
return funboost_grpc_pb2.FunboostGrpcResponse(json_resp=Serialization.to_json_str(
|
|
61
|
+
future_status_result.get_staus_result_obj().get_status_dict(without_datetime_obj=True)))
|
|
62
|
+
else:
|
|
63
|
+
self.logger.error(f'wait rpc data timeout')
|
|
64
|
+
context.set_code(grpc.StatusCode.DEADLINE_EXCEEDED)
|
|
65
|
+
context.set_details(f'wait rpc data timeout')
|
|
66
|
+
# raise FunboostWaitRpcResultTimeout(f'wait rpc data timeout')
|
|
67
|
+
else:
|
|
68
|
+
return funboost_grpc_pb2.FunboostGrpcResponse(json_resp='{"publish_status":"ok"}')
|
|
69
|
+
|
|
70
|
+
self.GRPC_SERVICER_CLS = FunboostGrpcServicer
|
|
71
|
+
|
|
72
|
+
def _shedual_task(self):
|
|
73
|
+
server = grpc.server(self.concurrent_pool)
|
|
74
|
+
|
|
75
|
+
# 添加服务
|
|
76
|
+
funboost_grpc_pb2_grpc.add_FunboostBrokerServiceServicer_to_server(self.GRPC_SERVICER_CLS(), server)
|
|
77
|
+
|
|
78
|
+
# 绑定端口
|
|
79
|
+
port = self.consumer_params.broker_exclusive_config['port']
|
|
80
|
+
if port is None:
|
|
81
|
+
raise ValueError('please specify port')
|
|
82
|
+
listen_addr = f'[::]:{port}'
|
|
83
|
+
server.add_insecure_port(listen_addr)
|
|
84
|
+
|
|
85
|
+
# 启动服务器
|
|
86
|
+
server.start()
|
|
87
|
+
print(f"GRPC Has started. listening on: {listen_addr}")
|
|
88
|
+
|
|
89
|
+
while True:
|
|
90
|
+
time.sleep(100) # 保持服务器运行
|
|
91
|
+
|
|
92
|
+
def _confirm_consume(self, kw):
|
|
93
|
+
pass
|
|
94
|
+
|
|
95
|
+
def _requeue(self, kw):
|
|
96
|
+
pass
|
|
97
|
+
|
|
98
|
+
def _frame_custom_record_process_info_func(self, current_function_result_status: FunctionResultStatus, kw):
|
|
99
|
+
future_status_result: FutureStatusResult = kw['future_status_result']
|
|
100
|
+
if future_status_result.call_type == "sync_call":
|
|
101
|
+
future_status_result.set_staus_result_obj(current_function_result_status)
|
|
102
|
+
future_status_result.set_finish() # 这是最重要最核心的, 并发池里面处理函数完成,马上告诉grpc服务端,已经处理完成.
|
|
@@ -22,7 +22,7 @@ class KafkaConsumer(AbstractConsumer):
|
|
|
22
22
|
可以让消费函数内部 sleep60秒,突然停止消费代码,使用 kafka-consumer-groups.sh --bootstrap-server 127.0.0.1:9092 --describe --group funboost 来证实自动确认消费和手动确认消费的区别。
|
|
23
23
|
"""
|
|
24
24
|
|
|
25
|
-
BROKER_EXCLUSIVE_CONFIG_DEFAULT = {'group_id': 'funboost_kafka', 'auto_offset_reset': 'earliest'}
|
|
25
|
+
BROKER_EXCLUSIVE_CONFIG_DEFAULT = {'group_id': 'funboost_kafka', 'auto_offset_reset': 'earliest','num_partitions':10,'replication_factor':1,}
|
|
26
26
|
# not_all_brokers_general_settings配置 ,支持独立的中间件配置参数是 group_id 和 auto_offset_reset
|
|
27
27
|
"""
|
|
28
28
|
auto_offset_reset 介绍
|
|
@@ -35,7 +35,9 @@ class KafkaConsumer(AbstractConsumer):
|
|
|
35
35
|
def _shedual_task(self):
|
|
36
36
|
try:
|
|
37
37
|
admin_client = KafkaPythonImporter().KafkaAdminClient(bootstrap_servers=BrokerConnConfig.KAFKA_BOOTSTRAP_SERVERS)
|
|
38
|
-
admin_client.create_topics([KafkaPythonImporter().NewTopic(self._queue_name,
|
|
38
|
+
admin_client.create_topics([KafkaPythonImporter().NewTopic(self._queue_name,
|
|
39
|
+
self.consumer_params.broker_exclusive_config['num_partitions'],
|
|
40
|
+
self.consumer_params.broker_exclusive_config['replication_factor'])])
|
|
39
41
|
# admin_client.create_partitions({self._queue_name: NewPartitions(total_count=16)})
|
|
40
42
|
except KafkaPythonImporter().TopicAlreadyExistsError:
|
|
41
43
|
pass
|
|
@@ -2,6 +2,9 @@
|
|
|
2
2
|
# @Author : ydf
|
|
3
3
|
# @Time : 2021/4/18 0008 13:32
|
|
4
4
|
|
|
5
|
+
"""
|
|
6
|
+
这个可以实现kafka topic单分区,但funboost 200线程消费消息,并且随意强制重启消费进程,不丢失消息
|
|
7
|
+
"""
|
|
5
8
|
|
|
6
9
|
import json
|
|
7
10
|
import threading
|
|
@@ -28,7 +31,7 @@ class KafkaConsumerManuallyCommit(AbstractConsumer):
|
|
|
28
31
|
可以让消费函数内部 sleep 60秒,突然停止消费代码,使用 kafka-consumer-groups.sh --bootstrap-server 127.0.0.1:9092 --describe --group frame_group 来证实自动确认消费和手动确认消费的区别。
|
|
29
32
|
"""
|
|
30
33
|
|
|
31
|
-
BROKER_EXCLUSIVE_CONFIG_DEFAULT = {'group_id': '
|
|
34
|
+
BROKER_EXCLUSIVE_CONFIG_DEFAULT = {'group_id': 'funboost_kafka', 'auto_offset_reset': 'earliest','num_partitions':10,'replication_factor':1,}
|
|
32
35
|
|
|
33
36
|
def custom_init(self):
|
|
34
37
|
self._lock_for_operate_offset_dict = threading.Lock()
|
|
@@ -37,7 +40,9 @@ class KafkaConsumerManuallyCommit(AbstractConsumer):
|
|
|
37
40
|
|
|
38
41
|
try:
|
|
39
42
|
admin_client = KafkaPythonImporter().KafkaAdminClient(bootstrap_servers=BrokerConnConfig.KAFKA_BOOTSTRAP_SERVERS)
|
|
40
|
-
admin_client.create_topics([KafkaPythonImporter().NewTopic(self._queue_name,
|
|
43
|
+
admin_client.create_topics([KafkaPythonImporter().NewTopic(self._queue_name,
|
|
44
|
+
self.consumer_params.broker_exclusive_config['num_partitions'],
|
|
45
|
+
self.consumer_params.broker_exclusive_config['replication_factor'])])
|
|
41
46
|
# admin_client.create_partitions({self._queue_name: NewPartitions(total_count=16)})
|
|
42
47
|
except KafkaPythonImporter().TopicAlreadyExistsError:
|
|
43
48
|
pass
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
# funboost/consumers/cdc_consumer.py
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
import typing
|
|
5
|
+
from funboost.consumers.base_consumer import AbstractConsumer
|
|
6
|
+
from funboost.core.loggers import develop_logger
|
|
7
|
+
|
|
8
|
+
# pip install mysql-replication==1.0.9
|
|
9
|
+
from pymysqlreplication import BinLogStreamReader
|
|
10
|
+
from pymysqlreplication.row_event import (
|
|
11
|
+
DeleteRowsEvent,
|
|
12
|
+
UpdateRowsEvent,
|
|
13
|
+
WriteRowsEvent,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class MysqlCdcConsumer(AbstractConsumer):
|
|
19
|
+
"""
|
|
20
|
+
A consumer that listens to MySQL binlog events (CDC) and treats them as tasks.
|
|
21
|
+
This broker is consumer-driven; it automatically generates tasks from database changes.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
BROKER_EXCLUSIVE_CONFIG_DEFAULT = {
|
|
25
|
+
'BinLogStreamReaderConfig': {},
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
def custom_init(self):
|
|
29
|
+
"""Validates the essential configuration."""
|
|
30
|
+
config = self.consumer_params.broker_exclusive_config['BinLogStreamReaderConfig']
|
|
31
|
+
if not config.get('connection_settings') or not config.get('server_id'):
|
|
32
|
+
raise ValueError("For 'funboost_cdc' broker, 'connection_settings' and 'server_id' must be provided in 'broker_exclusive_config'.")
|
|
33
|
+
self.logger.info("FunboostCdcConsumer initialized. Ready to listen for binlog events.")
|
|
34
|
+
self._bin_log_stream_reader_config = config
|
|
35
|
+
|
|
36
|
+
def _shedual_task(self):
|
|
37
|
+
"""
|
|
38
|
+
This is the main loop that connects to MySQL, reads binlog events,
|
|
39
|
+
and submits them as tasks to the funboost engine.
|
|
40
|
+
"""
|
|
41
|
+
# Prepare the arguments for BinLogStreamReader by filtering out None values
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
stream = BinLogStreamReader(**self._bin_log_stream_reader_config)
|
|
45
|
+
|
|
46
|
+
try:
|
|
47
|
+
for binlogevent in stream:
|
|
48
|
+
event_type = None
|
|
49
|
+
if isinstance(binlogevent, WriteRowsEvent):
|
|
50
|
+
event_type = 'INSERT'
|
|
51
|
+
elif isinstance(binlogevent, UpdateRowsEvent):
|
|
52
|
+
event_type = 'UPDATE'
|
|
53
|
+
elif isinstance(binlogevent, DeleteRowsEvent):
|
|
54
|
+
event_type = 'DELETE'
|
|
55
|
+
|
|
56
|
+
if event_type:
|
|
57
|
+
for row in binlogevent.rows:
|
|
58
|
+
# Construct a clear, flat dictionary to be used as function kwargs
|
|
59
|
+
task_body = {
|
|
60
|
+
'event_type': event_type,
|
|
61
|
+
'schema': binlogevent.schema,
|
|
62
|
+
'table': binlogevent.table,
|
|
63
|
+
'timestamp': binlogevent.timestamp,
|
|
64
|
+
}
|
|
65
|
+
# Unpack row data ('values' or 'before_values'/'after_values')
|
|
66
|
+
task_body.update(row)
|
|
67
|
+
|
|
68
|
+
# Submit the structured data as a task to the funboost engine
|
|
69
|
+
self._submit_task({'body': task_body})
|
|
70
|
+
except Exception as e:
|
|
71
|
+
self.logger.critical(f"An error occurred in the binlog stream: {e}", exc_info=True)
|
|
72
|
+
# A small delay before potentially restarting or exiting, depending on supervisor.
|
|
73
|
+
time.sleep(10)
|
|
74
|
+
finally:
|
|
75
|
+
self.logger.info("Closing binlog stream.")
|
|
76
|
+
stream.close()
|
|
77
|
+
|
|
78
|
+
def _confirm_consume(self, kw: dict):
|
|
79
|
+
"""
|
|
80
|
+
Confirmation is implicitly handled by the BinLogStreamReader's position management.
|
|
81
|
+
When resume_stream=True, the library automatically saves its position.
|
|
82
|
+
Funboost's ACK here confirms that the *processing* of the event is complete.
|
|
83
|
+
"""
|
|
84
|
+
pass
|
|
85
|
+
|
|
86
|
+
def _requeue(self, kw: dict):
|
|
87
|
+
"""
|
|
88
|
+
Requeuing a binlog event is not a standard operation.
|
|
89
|
+
Funboost's built-in retry mechanism should be used for transient processing errors.
|
|
90
|
+
If a task fails permanently, it will be ACK'd after exhausting retries,
|
|
91
|
+
and the binlog position will eventually advance.
|
|
92
|
+
"""
|
|
93
|
+
self.logger.warning(f"Requeuing a CDC event is not supported. "
|
|
94
|
+
f"Use funboost's retry mechanism for processing failures. Task: {kw.get('body')}")
|
|
95
|
+
pass
|
|
File without changes
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import dataset
|
|
2
|
+
from typing import Dict
|
|
3
|
+
|
|
4
|
+
class MySql2Mysql:
|
|
5
|
+
"""
|
|
6
|
+
使用dataset封装的mysql binlog消息数据,保存到目标库中
|
|
7
|
+
有了这个贡献类, 用户只需要一行代码就能通过cdc 实现 mysql2mysql,非常方便把数据库实例1的源表a,自动实时同步到数据库实例2的目标表a
|
|
8
|
+
|
|
9
|
+
这个只是贡献类,用户想怎么插入表,想怎么清洗都可以,可以参考这个例子,dataset把一个字典保存到mysql的一行,真的很方便.
|
|
10
|
+
用户还可以自定义批量插入目标表,都可以. 这个类不是必须使用,是做个示范.
|
|
11
|
+
"""
|
|
12
|
+
def __init__(self, primary_key: str,
|
|
13
|
+
target_table_name: str,
|
|
14
|
+
target_sink_db: dataset.Database, ):
|
|
15
|
+
self.primary_key = primary_key
|
|
16
|
+
self.target_table_name = target_table_name
|
|
17
|
+
self.target_sink_db = target_sink_db
|
|
18
|
+
|
|
19
|
+
def sync_data(self, event_type: str,
|
|
20
|
+
schema: str,
|
|
21
|
+
table: str,
|
|
22
|
+
timestamp: int,
|
|
23
|
+
row_data: Dict, ):
|
|
24
|
+
# 例如把这个表里面的数据原封不动 插入到 testdb7.users 表里面
|
|
25
|
+
target_table: dataset.Table = self.target_sink_db[self.target_table_name] # dataset会根据表名自动获取或创建表
|
|
26
|
+
print(f"接收到事件: {event_type} on schema: {schema}, table: {table}, timestamp: {timestamp}")
|
|
27
|
+
|
|
28
|
+
if event_type == 'INSERT':
|
|
29
|
+
# `row_data` 中包含 'values' 字典
|
|
30
|
+
data_to_insert = row_data['values']
|
|
31
|
+
target_table.upsert(data_to_insert, [self.primary_key])
|
|
32
|
+
print(f" [INSERT] 成功同步数据: {data_to_insert}")
|
|
33
|
+
|
|
34
|
+
elif event_type == 'UPDATE':
|
|
35
|
+
# `row_data` 中包含 'before_values' 和 'after_values'
|
|
36
|
+
data_to_update = row_data['after_values']
|
|
37
|
+
target_table.upsert(data_to_update, [self.primary_key])
|
|
38
|
+
print(f" [UPDATE] 成功同步数据: {data_to_update}")
|
|
39
|
+
|
|
40
|
+
elif event_type == 'DELETE':
|
|
41
|
+
# `row_data` 中包含 'values' 字典,即被删除的行的数据
|
|
42
|
+
data_to_delete = row_data['values']
|
|
43
|
+
target_table.delete(**{self.primary_key: data_to_delete[self.primary_key]})
|
|
44
|
+
print(f" [DELETE] 成功同步数据: {data_to_delete}")
|
funboost/core/booster.py
CHANGED
|
@@ -142,6 +142,7 @@ class Booster:
|
|
|
142
142
|
|
|
143
143
|
def _safe_push(self, *func_args, **func_kwargs) -> AsyncResult:
|
|
144
144
|
""" 多进程安全的,在fork多进程(非spawn多进程)情况下,有的包多进程不能共用一个连接,例如kafka"""
|
|
145
|
+
# print( self.__dict__)
|
|
145
146
|
consumer = BoostersManager.get_or_create_booster_by_queue_name(self.queue_name).consumer
|
|
146
147
|
return consumer.publisher_of_same_queue.push(*func_args, **func_kwargs)
|
|
147
148
|
|
|
@@ -155,14 +156,14 @@ class Booster:
|
|
|
155
156
|
"""asyncio 生态下发布消息,因为同步push只需要消耗不到1毫秒,所以基本上大概可以直接在asyncio异步生态中直接调用同步的push方法,
|
|
156
157
|
但为了更好的防止网络波动(例如发布消息到外网的消息队列耗时达到10毫秒),可以使用aio_push"""
|
|
157
158
|
async_result = await simple_run_in_executor(self.push, *func_args, **func_kwargs)
|
|
158
|
-
return AioAsyncResult(async_result.task_id, )
|
|
159
|
+
return AioAsyncResult(async_result.task_id,timeout=async_result.timeout )
|
|
159
160
|
|
|
160
161
|
async def aio_publish(self, msg: typing.Union[str, dict], task_id=None,
|
|
161
162
|
priority_control_config: PriorityConsumingControlConfig = None) -> AioAsyncResult:
|
|
162
163
|
"""asyncio 生态下发布消息,因为同步push只需要消耗不到1毫秒,所以基本上大概可以直接在asyncio异步生态中直接调用同步的push方法,
|
|
163
164
|
但为了更好的防止网络波动(例如发布消息到外网的消息队列耗时达到10毫秒),可以使用aio_push"""
|
|
164
165
|
async_result = await simple_run_in_executor(self.publish, msg, task_id, priority_control_config)
|
|
165
|
-
return AioAsyncResult(async_result.task_id, )
|
|
166
|
+
return AioAsyncResult(async_result.task_id, timeout=async_result.timeout)
|
|
166
167
|
|
|
167
168
|
# noinspection PyMethodMayBeStatic
|
|
168
169
|
def multi_process_consume(self, process_num=1):
|
|
@@ -207,6 +208,23 @@ class Booster:
|
|
|
207
208
|
from funboost.core.fabric_deploy_helper import fabric_deploy
|
|
208
209
|
fabric_deploy(self, **params)
|
|
209
210
|
|
|
211
|
+
def __getstate__(self):
|
|
212
|
+
state = {}
|
|
213
|
+
state['queue_name'] = self.boost_params.queue_name
|
|
214
|
+
return state
|
|
215
|
+
|
|
216
|
+
def __setstate__(self, state):
|
|
217
|
+
"""非常高级的骚操作,支持booster对象pickle序列化和反序列化,设计非常巧妙,堪称神来之笔
|
|
218
|
+
这样当使用redis作为apscheduler的 jobstores时候,aps_obj.add_job(booster.push,...) 可以正常工作,
|
|
219
|
+
使不报错 booster对象无法pickle序列化.
|
|
220
|
+
|
|
221
|
+
这个反序列化,没有执着于对 socket threding.Lock 怎么反序列化,而是偷换概念,绕过难题,基于标识的代理反序列化
|
|
222
|
+
"""
|
|
223
|
+
_booster = BoostersManager.get_or_create_booster_by_queue_name(state['queue_name'])
|
|
224
|
+
self.__dict__.update(_booster.__dict__)
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
|
|
210
228
|
|
|
211
229
|
boost = Booster # @boost 后消费函数. 不能自动补全方法就用 Booster就可以。 2024版本的 pycharm抽风了,@boost的消费函数不能自动补全提示 .consume .push 这些方法。
|
|
212
230
|
task_deco = boost # 两个装饰器名字都可以。task_deco是原来名字,兼容一下。
|
|
@@ -214,8 +232,13 @@ task_deco = boost # 两个装饰器名字都可以。task_deco是原来名字
|
|
|
214
232
|
|
|
215
233
|
class BoostersManager:
|
|
216
234
|
"""
|
|
235
|
+
这个BoostersManager类是后来加的不是一开始就规划了的.
|
|
236
|
+
|
|
217
237
|
消费函数生成Booster对象时候,会自动调用BoostersManager.regist_booster方法,把队列名和入参信息保存到pid_queue_name__booster_map字典中.
|
|
218
238
|
使用这个类,可以创建booster对象,达到无需使用装饰器的目的.
|
|
239
|
+
|
|
240
|
+
如果你想一次性启动所有函数消费,不想 f1.consume() f2.consume() f3.consume() 一个个的启动.
|
|
241
|
+
可以 BoostersManager.consume_all_queues()
|
|
219
242
|
"""
|
|
220
243
|
|
|
221
244
|
# pid_queue_name__booster_map字典存放 {(进程id,queue_name):Booster对象}
|
funboost/core/exceptions.py
CHANGED
|
@@ -17,6 +17,9 @@ class FunboostWaitRpcResultTimeout(FunboostException):
|
|
|
17
17
|
class FunboostRpcResultError(FunboostException):
|
|
18
18
|
"""rpc结果是错误状态"""
|
|
19
19
|
|
|
20
|
+
class HasNotAsyncResult(FunboostException):
|
|
21
|
+
pass
|
|
22
|
+
|
|
20
23
|
class ExceptionForPushToDlxqueue(FunboostException):
|
|
21
24
|
"""框架检测到ExceptionForPushToDlxqueue错误,发布到死信队列"""
|
|
22
25
|
|
|
@@ -57,7 +57,7 @@ class BaseJsonAbleModel(BaseModel):
|
|
|
57
57
|
|
|
58
58
|
def json_str_value(self):
|
|
59
59
|
try:
|
|
60
|
-
return json.dumps(self.get_str_dict(), ensure_ascii=False, )
|
|
60
|
+
return json.dumps(dict(self.get_str_dict()), ensure_ascii=False, )
|
|
61
61
|
except TypeError as e:
|
|
62
62
|
return str(self.get_str_dict())
|
|
63
63
|
|
|
@@ -134,8 +134,10 @@ class BoosterParams(BaseJsonAbleModel):
|
|
|
134
134
|
concurrent_mode: str = ConcurrentModeEnum.THREADING # 并发模式,支持THREADING,GEVENT,EVENTLET,ASYNC,SINGLE_THREAD并发,multi_process_consume 支持协程/线程 叠加多进程并发,性能炸裂.
|
|
135
135
|
concurrent_num: int = 50 # 并发数量,并发种类由concurrent_mode决定
|
|
136
136
|
specify_concurrent_pool: typing.Optional[FunboostBaseConcurrentPool] = None # 使用指定的线程池/携程池,可以多个消费者共使用一个线程池,节约线程.不为None时候。threads_num失效
|
|
137
|
+
|
|
137
138
|
specify_async_loop: typing.Optional[asyncio.AbstractEventLoop] = None # 指定的async的loop循环,设置并发模式为async才能起作用。 有些包例如aiohttp,发送请求和httpclient的实例化不能处在两个不同的loop中,可以传过来.
|
|
138
|
-
|
|
139
|
+
is_auto_start_specify_async_loop_in_child_thread: bool = True # 是否自动在funboost asyncio并发池的子线程中自动启动指定的async的loop循环,设置并发模式为async才能起作用。如果是False,用户自己在自己的代码中去手动启动自己的loop.run_forever()
|
|
140
|
+
|
|
139
141
|
"""qps:
|
|
140
142
|
强悍的控制功能,指定1秒内的函数执行次数,例如可以是小数0.01代表每100秒执行一次,也可以是50代表1秒执行50次.为None则不控频。 设置qps时候,不需要指定并发数量,funboost的能够自适应智能动态调节并发池大小."""
|
|
141
143
|
qps: typing.Union[float, int, None] = None
|
|
@@ -186,7 +188,8 @@ class BoosterParams(BaseJsonAbleModel):
|
|
|
186
188
|
user_custom_record_process_info_func: typing.Optional[typing.Callable] = None # 提供一个用户自定义的保存消息处理记录到某个地方例如mysql数据库的函数,函数仅仅接受一个入参,入参类型是 FunctionResultStatus,用户可以打印参数
|
|
187
189
|
|
|
188
190
|
is_using_rpc_mode: bool = False # 是否使用rpc模式,可以在发布端获取消费端的结果回调,但消耗一定性能,使用async_result.result时候会等待阻塞住当前线程。
|
|
189
|
-
rpc_result_expire_seconds: int =
|
|
191
|
+
rpc_result_expire_seconds: int = 1800 # redis保存rpc结果的过期时间.
|
|
192
|
+
rpc_timeout:int = 1800 # rpc模式下,等待rpc结果返回的超时时间
|
|
190
193
|
|
|
191
194
|
delay_task_apscheduler_jobstores_kind :Literal[ 'redis', 'memory'] = 'redis' # 延时任务的aspcheduler对象使用哪种jobstores ,可以为 redis memory 两种作为jobstore
|
|
192
195
|
|
|
@@ -354,7 +357,7 @@ class PublisherParams(BaseJsonAbleModel):
|
|
|
354
357
|
# func_params_is_pydantic_model: bool = False # funboost 兼容支持 函数娼还是 pydantic model类型,funboost在发布之前和取出来时候自己转化。
|
|
355
358
|
publish_msg_log_use_full_msg: bool = False # 发布到消息队列的消息内容的日志,是否显示消息的完整体,还是只显示函数入参。
|
|
356
359
|
consuming_function_kind: typing.Optional[str] = None # 自动生成的信息,不需要用户主动传参.
|
|
357
|
-
|
|
360
|
+
rpc_timeout: int = 1800 # rpc模式下,等待rpc结果返回的超时时间
|
|
358
361
|
|
|
359
362
|
if __name__ == '__main__':
|
|
360
363
|
from funboost.concurrent_pool import FlexibleThreadPool
|