funboost 49.8__py3-none-any.whl → 50.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of funboost might be problematic. Click here for more details.

Files changed (36) hide show
  1. funboost/__init__.py +1 -1
  2. funboost/assist/celery_helper.py +1 -1
  3. funboost/concurrent_pool/async_pool_executor.py +7 -2
  4. funboost/constant.py +23 -0
  5. funboost/consumers/base_consumer.py +12 -7
  6. funboost/consumers/grpc_consumer.py +102 -0
  7. funboost/consumers/kafka_consumer.py +4 -2
  8. funboost/consumers/kafka_consumer_manually_commit.py +7 -2
  9. funboost/consumers/mysql_cdc_consumer.py +95 -0
  10. funboost/contrib/cdc/__init__.py +0 -0
  11. funboost/contrib/cdc/mysql2mysql.py +44 -0
  12. funboost/core/booster.py +25 -2
  13. funboost/core/exceptions.py +3 -0
  14. funboost/core/func_params_model.py +6 -5
  15. funboost/core/msg_result_getter.py +8 -7
  16. funboost/factories/broker_kind__publsiher_consumer_type_map.py +10 -1
  17. funboost/publishers/base_publisher.py +5 -6
  18. funboost/publishers/grpc_publisher.py +53 -0
  19. funboost/publishers/kafka_publisher.py +3 -1
  20. funboost/publishers/mysql_cdc_publisher.py +24 -0
  21. funboost/timing_job/timing_push.py +3 -1
  22. {funboost-49.8.dist-info → funboost-50.0.dist-info}/METADATA +69 -33
  23. {funboost-49.8.dist-info → funboost-50.0.dist-info}/RECORD +27 -30
  24. funboost/utils/class_utils2.py +0 -94
  25. funboost/utils/custom_pysnooper.py +0 -149
  26. funboost/utils/pysnooper_ydf/__init__.py +0 -32
  27. funboost/utils/pysnooper_ydf/pycompat.py +0 -82
  28. funboost/utils/pysnooper_ydf/tracer.py +0 -479
  29. funboost/utils/pysnooper_ydf/utils.py +0 -101
  30. funboost/utils/pysnooper_ydf/variables.py +0 -133
  31. funboost/utils/times/__init__.py +0 -85
  32. funboost/utils/times/version.py +0 -1
  33. {funboost-49.8.dist-info → funboost-50.0.dist-info}/LICENSE +0 -0
  34. {funboost-49.8.dist-info → funboost-50.0.dist-info}/WHEEL +0 -0
  35. {funboost-49.8.dist-info → funboost-50.0.dist-info}/entry_points.txt +0 -0
  36. {funboost-49.8.dist-info → funboost-50.0.dist-info}/top_level.txt +0 -0
funboost/__init__.py CHANGED
@@ -13,7 +13,7 @@ set_frame_config这个模块的 use_config_form_funboost_config_module() 是核
13
13
  这段注释说明和使用的用户无关,只和框架开发人员有关.
14
14
  '''
15
15
 
16
- __version__ = "49.8"
16
+ __version__ = "50.0"
17
17
 
18
18
  from funboost.set_frame_config import show_frame_config
19
19
 
@@ -8,7 +8,7 @@ import threading
8
8
  from functools import partial
9
9
 
10
10
  import celery
11
-
11
+ from celery.app.task import Task
12
12
  import nb_log
13
13
  from funboost.funboost_config_deafult import BrokerConnConfig,FunboostCommonConfig
14
14
  from funboost import ConcurrentModeEnum
@@ -94,7 +94,7 @@ class AsyncPoolExecutor(FunboostFileLoggerMixin,FunboostBaseConcurrentPool):
94
94
 
95
95
  def submit(self, func, *args, **kwargs):
96
96
  future = asyncio.run_coroutine_threadsafe(self._produce(func, *args, **kwargs), self.loop) # 这个 run_coroutine_threadsafe 方法也有缺点,消耗的性能巨大。
97
- future.result() # 阻止过快放入,放入超过队列大小后,使submit阻塞。
97
+ future.result() # 阻止过快放入,放入超过队列大小后,使submit阻塞。 背压是为了防止 迅速掏空消息队列几千万消息到内存.
98
98
 
99
99
  async def _produce(self, func, *args, **kwargs):
100
100
  await self._queue.put((func, args, kwargs))
@@ -123,7 +123,12 @@ class AsyncPoolExecutor(FunboostFileLoggerMixin,FunboostBaseConcurrentPool):
123
123
  # asyncio.set_event_loop(self.loop)
124
124
  # self.loop.run_until_complete(asyncio.wait([self._consume() for _ in range(self._size)], loop=self.loop))
125
125
  # self._can_be_closed_flag = True
126
- [self.loop.create_task(self._consume()) for _ in range(self._size)]
126
+ if self._specify_async_loop is None:
127
+ for _ in range(self._size):
128
+ self.loop.create_task(self._consume())
129
+ else:
130
+ for _ in range(self._size):
131
+ asyncio.run_coroutine_threadsafe(self._consume(),self.loop) # 这是 asyncio 专门提供的用于从其他线程向事件循环安全提交任务的函数。
127
132
  if self._specify_async_loop is None:
128
133
  self.loop.run_forever()
129
134
  else:
funboost/constant.py CHANGED
@@ -101,6 +101,8 @@ class BrokerEnum:
101
101
 
102
102
  HTTP = 'HTTP' # 基于http实现的,发布使用的urllib3,消费服务端使用的aiohttp.server实现的,支持分布式但不支持持久化,好处是不需要安装消息队列中间件软件。
103
103
 
104
+ GRPC = 'GRPC' # 使用知名grpc作为broker,可以使用 sync_call 方法同步获取grpc的结果, 简单程度暴击用户手写原生的 grpc客户端 服务端
105
+
104
106
  NATS = 'NATS' # 高性能中间件nats,中间件服务端性能很好,。
105
107
 
106
108
  TXT_FILE = 'TXT_FILE' # 磁盘txt文件作为消息队列,支持单机持久化,不支持多机分布式。不建议这个,用sqlite。
@@ -119,6 +121,27 @@ class BrokerEnum:
119
121
 
120
122
  NAMEKO = 'NAMEKO' # funboost支持python微服务框架nameko,用户无需掌握nameko api语法,就玩转python nameko微服务
121
123
 
124
+
125
+ """
126
+ MYSQL_CDC 是 funboost 中 神奇 的 与众不同的 broker 中间件
127
+ mysql binlog cdc 自动作为消息,用户无需手动发布消息,只需要写处理binlog内容的逻辑,
128
+ 一行代码就能轻量级实现 mysql2mysql mysql2kafka mysql2rabbitmq 等等.
129
+ 这个是与其他中间件不同,不需要手工发布消息, 任何对数据库的 insert update delete 会自动作为 funboost 的消息.
130
+ 几乎是轻量级平替 canal flinkcdc 的作用.
131
+
132
+ 以此类推, 日志文件也能扩展作为broker,只要另外一个程序写入了文件日志,就能触发funboost消费,
133
+ 然后自己在函数逻辑把消息发到kafka,(虽然是已经有大名鼎鼎elk,这只是举个场景例子,说明funboost broker的灵活性)
134
+
135
+ 日志文件、文件系统变更(inotify)、甚至是硬件传感器的信号,按照4.21章节文档,都可以被封装成一个 funboost 的 Broker。
136
+
137
+ 充分说明 funboost 有能力化身为 通用的、事件驱动的函数调度平台,而非仅仅是celery这种传统的消息驱动.
138
+ """
139
+ """
140
+ funboost 有能力消费canal发到kafka的binlog消息,也能不依赖canal,自己捕获cdc数据
141
+ """
142
+ MYSQL_CDC = 'MYSQL_CDC'
143
+
144
+
122
145
 
123
146
  class ConcurrentModeEnum:
124
147
  THREADING = 'threading' # 线程方式运行,兼容支持 async def 的异步函数。
@@ -637,10 +637,13 @@ class AbstractConsumer(LoggerLevelSetterMixin, metaclass=abc.ABCMeta, ):
637
637
  """
638
638
  self._do_not_delete_extra_from_msg = True
639
639
 
640
- def user_custom_record_process_info_func(self, current_function_result_status: FunctionResultStatus): # 这个可以继承
640
+ def _frame_custom_record_process_info_func(self,current_function_result_status: FunctionResultStatus,kw:dict):
641
641
  pass
642
642
 
643
- async def aio_user_custom_record_process_info_func(self, current_function_result_status: FunctionResultStatus): # 这个可以继承
643
+ def user_custom_record_process_info_func(self, current_function_result_status: FunctionResultStatus,): # 这个可以继承
644
+ pass
645
+
646
+ async def aio_user_custom_record_process_info_func(self, current_function_result_status: FunctionResultStatus,): # 这个可以继承
644
647
  pass
645
648
 
646
649
  def _convert_real_function_only_params_by_conusuming_function_kind(self, function_only_params: dict,extra_params:dict):
@@ -729,9 +732,10 @@ class AbstractConsumer(LoggerLevelSetterMixin, metaclass=abc.ABCMeta, ):
729
732
 
730
733
  with self._lock_for_count_execute_task_times_every_unit_time:
731
734
  self.metric_calculation.cal(t_start_run_fun,current_function_result_status)
732
- self.user_custom_record_process_info_func(current_function_result_status) # 两种方式都可以自定义,记录结果,建议继承方式,不使用boost中指定 user_custom_record_process_info_func
735
+ self._frame_custom_record_process_info_func(current_function_result_status,kw)
736
+ self.user_custom_record_process_info_func(current_function_result_status,) # 两种方式都可以自定义,记录结果,建议继承方式,不使用boost中指定 user_custom_record_process_info_func
733
737
  if self.consumer_params.user_custom_record_process_info_func:
734
- self.consumer_params.user_custom_record_process_info_func(current_function_result_status)
738
+ self.consumer_params.user_custom_record_process_info_func(current_function_result_status,)
735
739
  except BaseException as e:
736
740
  log_msg = f' error 严重错误 {type(e)} {e} '
737
741
  # self.logger.critical(msg=f'{log_msg} \n', exc_info=True)
@@ -888,10 +892,11 @@ class AbstractConsumer(LoggerLevelSetterMixin, metaclass=abc.ABCMeta, ):
888
892
  async with self._async_lock_for_count_execute_task_times_every_unit_time:
889
893
  self.metric_calculation.cal(t_start_run_fun, current_function_result_status)
890
894
 
891
- self.user_custom_record_process_info_func(current_function_result_status) # 两种方式都可以自定义,记录结果.建议使用文档4.21.b的方式继承来重写
892
- await self.aio_user_custom_record_process_info_func(current_function_result_status)
895
+ self._frame_custom_record_process_info_func(current_function_result_status)
896
+ self.user_custom_record_process_info_func(current_function_result_status,) # 两种方式都可以自定义,记录结果.建议使用文档4.21.b的方式继承来重写
897
+ await self.aio_user_custom_record_process_info_func(current_function_result_status,)
893
898
  if self.consumer_params.user_custom_record_process_info_func:
894
- self.consumer_params.user_custom_record_process_info_func(current_function_result_status)
899
+ self.consumer_params.user_custom_record_process_info_func(current_function_result_status,)
895
900
 
896
901
  except BaseException as e:
897
902
  log_msg = f' error 严重错误 {type(e)} {e} '
@@ -0,0 +1,102 @@
1
+ # -*- coding: utf-8 -*-
2
+ # @Author : ydf
3
+ # @Time : 2023/8/8 0008 13:32
4
+
5
+ import abc
6
+ import threading
7
+ import grpc
8
+ import time
9
+
10
+ from funboost import FunctionResultStatus
11
+ from funboost.assist.grpc_helper import funboost_grpc_pb2_grpc, funboost_grpc_pb2
12
+ from funboost.consumers.base_consumer import AbstractConsumer
13
+ from funboost.core.serialization import Serialization
14
+ from funboost.core.exceptions import FunboostWaitRpcResultTimeout
15
+ from funboost.concurrent_pool.flexible_thread_pool import FlexibleThreadPool
16
+
17
+
18
+ class FutureStatusResult:
19
+ def __init__(self,call_type:str):
20
+ self.execute_finish_event = threading.Event()
21
+ self.staus_result_obj: FunctionResultStatus = None
22
+ self.call_type = call_type # sync_call or publish
23
+
24
+ def set_finish(self):
25
+ self.execute_finish_event.set()
26
+
27
+ def wait_finish(self,rpc_timeout):
28
+ return self.execute_finish_event.wait(rpc_timeout)
29
+
30
+ def set_staus_result_obj(self, staus_result_obj:FunctionResultStatus):
31
+ self.staus_result_obj = staus_result_obj
32
+
33
+ def get_staus_result_obj(self):
34
+ return self.staus_result_obj
35
+
36
+
37
+
38
+
39
+ class GrpcConsumer(AbstractConsumer, ):
40
+ """
41
+ grpc as broker
42
+ """
43
+ BROKER_EXCLUSIVE_CONFIG_DEFAULT = {'host': '127.0.0.1', 'port': None}
44
+
45
+ def custom_init(self):
46
+ class FunboostGrpcServicer(funboost_grpc_pb2_grpc.FunboostBrokerServiceServicer):
47
+ """
48
+ HelloService 的实现类
49
+ """
50
+
51
+ def Call(this, request, context):
52
+ """
53
+ 实现 SayHello 方法
54
+ """
55
+ future_status_result = FutureStatusResult(call_type=request.call_type)
56
+ kw = {'body': request.json_req, 'future_status_result': future_status_result,}
57
+ self._submit_task(kw)
58
+ if request.call_type =="sync_call":
59
+ if future_status_result.wait_finish(self.consumer_params.rpc_timeout): # 等待并发出的消费结果
60
+ return funboost_grpc_pb2.FunboostGrpcResponse(json_resp=Serialization.to_json_str(
61
+ future_status_result.get_staus_result_obj().get_status_dict(without_datetime_obj=True)))
62
+ else:
63
+ self.logger.error(f'wait rpc data timeout')
64
+ context.set_code(grpc.StatusCode.DEADLINE_EXCEEDED)
65
+ context.set_details(f'wait rpc data timeout')
66
+ # raise FunboostWaitRpcResultTimeout(f'wait rpc data timeout')
67
+ else:
68
+ return funboost_grpc_pb2.FunboostGrpcResponse(json_resp='{"publish_status":"ok"}')
69
+
70
+ self.GRPC_SERVICER_CLS = FunboostGrpcServicer
71
+
72
+ def _shedual_task(self):
73
+ server = grpc.server(self.concurrent_pool)
74
+
75
+ # 添加服务
76
+ funboost_grpc_pb2_grpc.add_FunboostBrokerServiceServicer_to_server(self.GRPC_SERVICER_CLS(), server)
77
+
78
+ # 绑定端口
79
+ port = self.consumer_params.broker_exclusive_config['port']
80
+ if port is None:
81
+ raise ValueError('please specify port')
82
+ listen_addr = f'[::]:{port}'
83
+ server.add_insecure_port(listen_addr)
84
+
85
+ # 启动服务器
86
+ server.start()
87
+ print(f"GRPC Has started. listening on: {listen_addr}")
88
+
89
+ while True:
90
+ time.sleep(100) # 保持服务器运行
91
+
92
+ def _confirm_consume(self, kw):
93
+ pass
94
+
95
+ def _requeue(self, kw):
96
+ pass
97
+
98
+ def _frame_custom_record_process_info_func(self, current_function_result_status: FunctionResultStatus, kw):
99
+ future_status_result: FutureStatusResult = kw['future_status_result']
100
+ if future_status_result.call_type == "sync_call":
101
+ future_status_result.set_staus_result_obj(current_function_result_status)
102
+ future_status_result.set_finish() # 这是最重要最核心的, 并发池里面处理函数完成,马上告诉grpc服务端,已经处理完成.
@@ -22,7 +22,7 @@ class KafkaConsumer(AbstractConsumer):
22
22
  可以让消费函数内部 sleep60秒,突然停止消费代码,使用 kafka-consumer-groups.sh --bootstrap-server 127.0.0.1:9092 --describe --group funboost 来证实自动确认消费和手动确认消费的区别。
23
23
  """
24
24
 
25
- BROKER_EXCLUSIVE_CONFIG_DEFAULT = {'group_id': 'funboost_kafka', 'auto_offset_reset': 'earliest'}
25
+ BROKER_EXCLUSIVE_CONFIG_DEFAULT = {'group_id': 'funboost_kafka', 'auto_offset_reset': 'earliest','num_partitions':10,'replication_factor':1,}
26
26
  # not_all_brokers_general_settings配置 ,支持独立的中间件配置参数是 group_id 和 auto_offset_reset
27
27
  """
28
28
  auto_offset_reset 介绍
@@ -35,7 +35,9 @@ class KafkaConsumer(AbstractConsumer):
35
35
  def _shedual_task(self):
36
36
  try:
37
37
  admin_client = KafkaPythonImporter().KafkaAdminClient(bootstrap_servers=BrokerConnConfig.KAFKA_BOOTSTRAP_SERVERS)
38
- admin_client.create_topics([KafkaPythonImporter().NewTopic(self._queue_name, 10, 1)])
38
+ admin_client.create_topics([KafkaPythonImporter().NewTopic(self._queue_name,
39
+ self.consumer_params.broker_exclusive_config['num_partitions'],
40
+ self.consumer_params.broker_exclusive_config['replication_factor'])])
39
41
  # admin_client.create_partitions({self._queue_name: NewPartitions(total_count=16)})
40
42
  except KafkaPythonImporter().TopicAlreadyExistsError:
41
43
  pass
@@ -2,6 +2,9 @@
2
2
  # @Author : ydf
3
3
  # @Time : 2021/4/18 0008 13:32
4
4
 
5
+ """
6
+ 这个可以实现kafka topic单分区,但funboost 200线程消费消息,并且随意强制重启消费进程,不丢失消息
7
+ """
5
8
 
6
9
  import json
7
10
  import threading
@@ -28,7 +31,7 @@ class KafkaConsumerManuallyCommit(AbstractConsumer):
28
31
  可以让消费函数内部 sleep 60秒,突然停止消费代码,使用 kafka-consumer-groups.sh --bootstrap-server 127.0.0.1:9092 --describe --group frame_group 来证实自动确认消费和手动确认消费的区别。
29
32
  """
30
33
 
31
- BROKER_EXCLUSIVE_CONFIG_DEFAULT = {'group_id': 'funboost_confluent_kafka', 'auto_offset_reset': 'earliest'}
34
+ BROKER_EXCLUSIVE_CONFIG_DEFAULT = {'group_id': 'funboost_kafka', 'auto_offset_reset': 'earliest','num_partitions':10,'replication_factor':1,}
32
35
 
33
36
  def custom_init(self):
34
37
  self._lock_for_operate_offset_dict = threading.Lock()
@@ -37,7 +40,9 @@ class KafkaConsumerManuallyCommit(AbstractConsumer):
37
40
 
38
41
  try:
39
42
  admin_client = KafkaPythonImporter().KafkaAdminClient(bootstrap_servers=BrokerConnConfig.KAFKA_BOOTSTRAP_SERVERS)
40
- admin_client.create_topics([KafkaPythonImporter().NewTopic(self._queue_name, 10, 1)])
43
+ admin_client.create_topics([KafkaPythonImporter().NewTopic(self._queue_name,
44
+ self.consumer_params.broker_exclusive_config['num_partitions'],
45
+ self.consumer_params.broker_exclusive_config['replication_factor'])])
41
46
  # admin_client.create_partitions({self._queue_name: NewPartitions(total_count=16)})
42
47
  except KafkaPythonImporter().TopicAlreadyExistsError:
43
48
  pass
@@ -0,0 +1,95 @@
1
+ # funboost/consumers/cdc_consumer.py
2
+
3
+ import time
4
+ import typing
5
+ from funboost.consumers.base_consumer import AbstractConsumer
6
+ from funboost.core.loggers import develop_logger
7
+
8
+ # pip install mysql-replication==1.0.9
9
+ from pymysqlreplication import BinLogStreamReader
10
+ from pymysqlreplication.row_event import (
11
+ DeleteRowsEvent,
12
+ UpdateRowsEvent,
13
+ WriteRowsEvent,
14
+ )
15
+
16
+
17
+
18
+ class MysqlCdcConsumer(AbstractConsumer):
19
+ """
20
+ A consumer that listens to MySQL binlog events (CDC) and treats them as tasks.
21
+ This broker is consumer-driven; it automatically generates tasks from database changes.
22
+ """
23
+
24
+ BROKER_EXCLUSIVE_CONFIG_DEFAULT = {
25
+ 'BinLogStreamReaderConfig': {},
26
+ }
27
+
28
+ def custom_init(self):
29
+ """Validates the essential configuration."""
30
+ config = self.consumer_params.broker_exclusive_config['BinLogStreamReaderConfig']
31
+ if not config.get('connection_settings') or not config.get('server_id'):
32
+ raise ValueError("For 'funboost_cdc' broker, 'connection_settings' and 'server_id' must be provided in 'broker_exclusive_config'.")
33
+ self.logger.info("FunboostCdcConsumer initialized. Ready to listen for binlog events.")
34
+ self._bin_log_stream_reader_config = config
35
+
36
+ def _shedual_task(self):
37
+ """
38
+ This is the main loop that connects to MySQL, reads binlog events,
39
+ and submits them as tasks to the funboost engine.
40
+ """
41
+ # Prepare the arguments for BinLogStreamReader by filtering out None values
42
+
43
+
44
+ stream = BinLogStreamReader(**self._bin_log_stream_reader_config)
45
+
46
+ try:
47
+ for binlogevent in stream:
48
+ event_type = None
49
+ if isinstance(binlogevent, WriteRowsEvent):
50
+ event_type = 'INSERT'
51
+ elif isinstance(binlogevent, UpdateRowsEvent):
52
+ event_type = 'UPDATE'
53
+ elif isinstance(binlogevent, DeleteRowsEvent):
54
+ event_type = 'DELETE'
55
+
56
+ if event_type:
57
+ for row in binlogevent.rows:
58
+ # Construct a clear, flat dictionary to be used as function kwargs
59
+ task_body = {
60
+ 'event_type': event_type,
61
+ 'schema': binlogevent.schema,
62
+ 'table': binlogevent.table,
63
+ 'timestamp': binlogevent.timestamp,
64
+ }
65
+ # Unpack row data ('values' or 'before_values'/'after_values')
66
+ task_body.update(row)
67
+
68
+ # Submit the structured data as a task to the funboost engine
69
+ self._submit_task({'body': task_body})
70
+ except Exception as e:
71
+ self.logger.critical(f"An error occurred in the binlog stream: {e}", exc_info=True)
72
+ # A small delay before potentially restarting or exiting, depending on supervisor.
73
+ time.sleep(10)
74
+ finally:
75
+ self.logger.info("Closing binlog stream.")
76
+ stream.close()
77
+
78
+ def _confirm_consume(self, kw: dict):
79
+ """
80
+ Confirmation is implicitly handled by the BinLogStreamReader's position management.
81
+ When resume_stream=True, the library automatically saves its position.
82
+ Funboost's ACK here confirms that the *processing* of the event is complete.
83
+ """
84
+ pass
85
+
86
+ def _requeue(self, kw: dict):
87
+ """
88
+ Requeuing a binlog event is not a standard operation.
89
+ Funboost's built-in retry mechanism should be used for transient processing errors.
90
+ If a task fails permanently, it will be ACK'd after exhausting retries,
91
+ and the binlog position will eventually advance.
92
+ """
93
+ self.logger.warning(f"Requeuing a CDC event is not supported. "
94
+ f"Use funboost's retry mechanism for processing failures. Task: {kw.get('body')}")
95
+ pass
File without changes
@@ -0,0 +1,44 @@
1
+ import dataset
2
+ from typing import Dict
3
+
4
+ class MySql2Mysql:
5
+ """
6
+ 使用dataset封装的mysql binlog消息数据,保存到目标库中
7
+ 有了这个贡献类, 用户只需要一行代码就能通过cdc 实现 mysql2mysql,非常方便把数据库实例1的源表a,自动实时同步到数据库实例2的目标表a
8
+
9
+ 这个只是贡献类,用户想怎么插入表,想怎么清洗都可以,可以参考这个例子,dataset把一个字典保存到mysql的一行,真的很方便.
10
+ 用户还可以自定义批量插入目标表,都可以. 这个类不是必须使用,是做个示范.
11
+ """
12
+ def __init__(self, primary_key: str,
13
+ target_table_name: str,
14
+ target_sink_db: dataset.Database, ):
15
+ self.primary_key = primary_key
16
+ self.target_table_name = target_table_name
17
+ self.target_sink_db = target_sink_db
18
+
19
+ def sync_data(self, event_type: str,
20
+ schema: str,
21
+ table: str,
22
+ timestamp: int,
23
+ row_data: Dict, ):
24
+ # 例如把这个表里面的数据原封不动 插入到 testdb7.users 表里面
25
+ target_table: dataset.Table = self.target_sink_db[self.target_table_name] # dataset会根据表名自动获取或创建表
26
+ print(f"接收到事件: {event_type} on schema: {schema}, table: {table}, timestamp: {timestamp}")
27
+
28
+ if event_type == 'INSERT':
29
+ # `row_data` 中包含 'values' 字典
30
+ data_to_insert = row_data['values']
31
+ target_table.upsert(data_to_insert, [self.primary_key])
32
+ print(f" [INSERT] 成功同步数据: {data_to_insert}")
33
+
34
+ elif event_type == 'UPDATE':
35
+ # `row_data` 中包含 'before_values' 和 'after_values'
36
+ data_to_update = row_data['after_values']
37
+ target_table.upsert(data_to_update, [self.primary_key])
38
+ print(f" [UPDATE] 成功同步数据: {data_to_update}")
39
+
40
+ elif event_type == 'DELETE':
41
+ # `row_data` 中包含 'values' 字典,即被删除的行的数据
42
+ data_to_delete = row_data['values']
43
+ target_table.delete(**{self.primary_key: data_to_delete[self.primary_key]})
44
+ print(f" [DELETE] 成功同步数据: {data_to_delete}")
funboost/core/booster.py CHANGED
@@ -142,6 +142,7 @@ class Booster:
142
142
 
143
143
  def _safe_push(self, *func_args, **func_kwargs) -> AsyncResult:
144
144
  """ 多进程安全的,在fork多进程(非spawn多进程)情况下,有的包多进程不能共用一个连接,例如kafka"""
145
+ # print( self.__dict__)
145
146
  consumer = BoostersManager.get_or_create_booster_by_queue_name(self.queue_name).consumer
146
147
  return consumer.publisher_of_same_queue.push(*func_args, **func_kwargs)
147
148
 
@@ -155,14 +156,14 @@ class Booster:
155
156
  """asyncio 生态下发布消息,因为同步push只需要消耗不到1毫秒,所以基本上大概可以直接在asyncio异步生态中直接调用同步的push方法,
156
157
  但为了更好的防止网络波动(例如发布消息到外网的消息队列耗时达到10毫秒),可以使用aio_push"""
157
158
  async_result = await simple_run_in_executor(self.push, *func_args, **func_kwargs)
158
- return AioAsyncResult(async_result.task_id, )
159
+ return AioAsyncResult(async_result.task_id,timeout=async_result.timeout )
159
160
 
160
161
  async def aio_publish(self, msg: typing.Union[str, dict], task_id=None,
161
162
  priority_control_config: PriorityConsumingControlConfig = None) -> AioAsyncResult:
162
163
  """asyncio 生态下发布消息,因为同步push只需要消耗不到1毫秒,所以基本上大概可以直接在asyncio异步生态中直接调用同步的push方法,
163
164
  但为了更好的防止网络波动(例如发布消息到外网的消息队列耗时达到10毫秒),可以使用aio_push"""
164
165
  async_result = await simple_run_in_executor(self.publish, msg, task_id, priority_control_config)
165
- return AioAsyncResult(async_result.task_id, )
166
+ return AioAsyncResult(async_result.task_id, timeout=async_result.timeout)
166
167
 
167
168
  # noinspection PyMethodMayBeStatic
168
169
  def multi_process_consume(self, process_num=1):
@@ -207,6 +208,23 @@ class Booster:
207
208
  from funboost.core.fabric_deploy_helper import fabric_deploy
208
209
  fabric_deploy(self, **params)
209
210
 
211
+ def __getstate__(self):
212
+ state = {}
213
+ state['queue_name'] = self.boost_params.queue_name
214
+ return state
215
+
216
+ def __setstate__(self, state):
217
+ """非常高级的骚操作,支持booster对象pickle序列化和反序列化,设计非常巧妙,堪称神来之笔
218
+ 这样当使用redis作为apscheduler的 jobstores时候,aps_obj.add_job(booster.push,...) 可以正常工作,
219
+ 使不报错 booster对象无法pickle序列化.
220
+
221
+ 这个反序列化,没有执着于对 socket threding.Lock 怎么反序列化,而是偷换概念,绕过难题,基于标识的代理反序列化
222
+ """
223
+ _booster = BoostersManager.get_or_create_booster_by_queue_name(state['queue_name'])
224
+ self.__dict__.update(_booster.__dict__)
225
+
226
+
227
+
210
228
 
211
229
  boost = Booster # @boost 后消费函数. 不能自动补全方法就用 Booster就可以。 2024版本的 pycharm抽风了,@boost的消费函数不能自动补全提示 .consume .push 这些方法。
212
230
  task_deco = boost # 两个装饰器名字都可以。task_deco是原来名字,兼容一下。
@@ -214,8 +232,13 @@ task_deco = boost # 两个装饰器名字都可以。task_deco是原来名字
214
232
 
215
233
  class BoostersManager:
216
234
  """
235
+ 这个BoostersManager类是后来加的不是一开始就规划了的.
236
+
217
237
  消费函数生成Booster对象时候,会自动调用BoostersManager.regist_booster方法,把队列名和入参信息保存到pid_queue_name__booster_map字典中.
218
238
  使用这个类,可以创建booster对象,达到无需使用装饰器的目的.
239
+
240
+ 如果你想一次性启动所有函数消费,不想 f1.consume() f2.consume() f3.consume() 一个个的启动.
241
+ 可以 BoostersManager.consume_all_queues()
219
242
  """
220
243
 
221
244
  # pid_queue_name__booster_map字典存放 {(进程id,queue_name):Booster对象}
@@ -17,6 +17,9 @@ class FunboostWaitRpcResultTimeout(FunboostException):
17
17
  class FunboostRpcResultError(FunboostException):
18
18
  """rpc结果是错误状态"""
19
19
 
20
+ class HasNotAsyncResult(FunboostException):
21
+ pass
22
+
20
23
  class ExceptionForPushToDlxqueue(FunboostException):
21
24
  """框架检测到ExceptionForPushToDlxqueue错误,发布到死信队列"""
22
25
 
@@ -57,7 +57,7 @@ class BaseJsonAbleModel(BaseModel):
57
57
 
58
58
  def json_str_value(self):
59
59
  try:
60
- return json.dumps(self.get_str_dict(), ensure_ascii=False, )
60
+ return json.dumps(dict(self.get_str_dict()), ensure_ascii=False, )
61
61
  except TypeError as e:
62
62
  return str(self.get_str_dict())
63
63
 
@@ -136,8 +136,8 @@ class BoosterParams(BaseJsonAbleModel):
136
136
  specify_concurrent_pool: typing.Optional[FunboostBaseConcurrentPool] = None # 使用指定的线程池/携程池,可以多个消费者共使用一个线程池,节约线程.不为None时候。threads_num失效
137
137
 
138
138
  specify_async_loop: typing.Optional[asyncio.AbstractEventLoop] = None # 指定的async的loop循环,设置并发模式为async才能起作用。 有些包例如aiohttp,发送请求和httpclient的实例化不能处在两个不同的loop中,可以传过来.
139
- is_auto_start_specify_async_loop_in_child_thread: bool = True # 是否在子线程中自动启动指定的async的loop循环,设置并发模式为async才能起作用。如果是False,用户自己在自己的代码中去手动启动自己的loop.run_forever()
140
-
139
+ is_auto_start_specify_async_loop_in_child_thread: bool = True # 是否自动在funboost asyncio并发池的子线程中自动启动指定的async的loop循环,设置并发模式为async才能起作用。如果是False,用户自己在自己的代码中去手动启动自己的loop.run_forever()
140
+
141
141
  """qps:
142
142
  强悍的控制功能,指定1秒内的函数执行次数,例如可以是小数0.01代表每100秒执行一次,也可以是50代表1秒执行50次.为None则不控频。 设置qps时候,不需要指定并发数量,funboost的能够自适应智能动态调节并发池大小."""
143
143
  qps: typing.Union[float, int, None] = None
@@ -188,7 +188,8 @@ class BoosterParams(BaseJsonAbleModel):
188
188
  user_custom_record_process_info_func: typing.Optional[typing.Callable] = None # 提供一个用户自定义的保存消息处理记录到某个地方例如mysql数据库的函数,函数仅仅接受一个入参,入参类型是 FunctionResultStatus,用户可以打印参数
189
189
 
190
190
  is_using_rpc_mode: bool = False # 是否使用rpc模式,可以在发布端获取消费端的结果回调,但消耗一定性能,使用async_result.result时候会等待阻塞住当前线程。
191
- rpc_result_expire_seconds: int = 600 # 保存rpc结果的过期时间.
191
+ rpc_result_expire_seconds: int = 1800 # redis保存rpc结果的过期时间.
192
+ rpc_timeout:int = 1800 # rpc模式下,等待rpc结果返回的超时时间
192
193
 
193
194
  delay_task_apscheduler_jobstores_kind :Literal[ 'redis', 'memory'] = 'redis' # 延时任务的aspcheduler对象使用哪种jobstores ,可以为 redis memory 两种作为jobstore
194
195
 
@@ -356,7 +357,7 @@ class PublisherParams(BaseJsonAbleModel):
356
357
  # func_params_is_pydantic_model: bool = False # funboost 兼容支持 函数娼还是 pydantic model类型,funboost在发布之前和取出来时候自己转化。
357
358
  publish_msg_log_use_full_msg: bool = False # 发布到消息队列的消息内容的日志,是否显示消息的完整体,还是只显示函数入参。
358
359
  consuming_function_kind: typing.Optional[str] = None # 自动生成的信息,不需要用户主动传参.
359
-
360
+ rpc_timeout: int = 1800 # rpc模式下,等待rpc结果返回的超时时间
360
361
 
361
362
  if __name__ == '__main__':
362
363
  from funboost.concurrent_pool import FlexibleThreadPool
@@ -4,6 +4,7 @@ import time
4
4
  import typing
5
5
  import json
6
6
 
7
+ from funboost.core.exceptions import FunboostWaitRpcResultTimeout, FunboostRpcResultError, HasNotAsyncResult
7
8
  from funboost.utils.mongo_util import MongoMixin
8
9
 
9
10
  from funboost.concurrent_pool import CustomThreadPoolExecutor
@@ -14,15 +15,14 @@ from funboost.core.serialization import Serialization
14
15
 
15
16
  from funboost.core.function_result_status_saver import FunctionResultStatus
16
17
 
17
- class HasNotAsyncResult(Exception):
18
- pass
18
+
19
19
 
20
20
 
21
21
  NO_RESULT = 'no_result'
22
22
 
23
23
  def _judge_rpc_function_result_status_obj(status_and_result_obj:FunctionResultStatus,raise_exception:bool):
24
24
  if status_and_result_obj is None:
25
- raise FunboostWaitRpcResultTimeout(f'等待 {status_and_result_obj.task_id} rpc结果超过了指定时间')
25
+ raise FunboostWaitRpcResultTimeout(f'wait rpc data timeout for task_id:{status_and_result_obj.task_id}')
26
26
  if status_and_result_obj.success is True:
27
27
  return status_and_result_obj
28
28
  else:
@@ -35,6 +35,7 @@ def _judge_rpc_function_result_status_obj(status_and_result_obj:FunctionResultSt
35
35
  else:
36
36
  status_and_result_obj.rpc_chain_error_msg_dict = error_msg_dict
37
37
  return status_and_result_obj
38
+
38
39
  class AsyncResult(RedisMixin):
39
40
  default_callback_run_executor = FlexibleThreadPoolMinWorkers0(200,work_queue_maxsize=50)
40
41
 
@@ -51,14 +52,14 @@ class AsyncResult(RedisMixin):
51
52
  """
52
53
  self._callback_run_executor = thread_pool_executor
53
54
 
54
- def __init__(self, task_id, timeout=120):
55
+ def __init__(self, task_id, timeout=1800):
55
56
  self.task_id = task_id
56
57
  self.timeout = timeout
57
58
  self._has_pop = False
58
59
  self._status_and_result = None
59
60
  self._callback_run_executor = None
60
61
 
61
- def set_timeout(self, timeout=60):
62
+ def set_timeout(self, timeout=1800):
62
63
  self.timeout = timeout
63
64
  return self
64
65
 
@@ -172,13 +173,13 @@ if __name__ == '__main__':
172
173
 
173
174
  '''
174
175
 
175
- def __init__(self, task_id, timeout=120):
176
+ def __init__(self, task_id, timeout=1800):
176
177
  self.task_id = task_id
177
178
  self.timeout = timeout
178
179
  self._has_pop = False
179
180
  self._status_and_result = None
180
181
 
181
- def set_timeout(self, timeout=60):
182
+ def set_timeout(self, timeout=1800):
182
183
  self.timeout = timeout
183
184
  return self
184
185
 
@@ -106,7 +106,7 @@ def register_custom_broker(broker_kind, publisher_class: typing.Type[AbstractPub
106
106
 
107
107
  def regist_to_funboost(broker_kind: str):
108
108
  """
109
- 延迟导入是因为funboost没有pip自动安装这些三方包,防止一启动就报错。
109
+ 不直接定义在broker_kind__publsiher_consumer_type_map, 延迟导入是因为funboost没有pip自动安装这些三方包,防止一启动就报错。
110
110
  这样当用户需要使用某些三方包中间件作为消息队列时候,按照import报错信息,用户自己去pip安装好。或者 pip install funboost[all] 一次性安装所有中间件。
111
111
  建议按照 https://github.com/ydf0509/funboost/blob/master/setup.py 中的 extra_brokers 和 install_requires 里面的版本号来安装三方包版本.
112
112
  """
@@ -175,6 +175,15 @@ def regist_to_funboost(broker_kind: str):
175
175
  from funboost.consumers.nsq_consumer import NsqConsumer
176
176
  register_custom_broker(broker_kind, NsqPublisher, NsqConsumer)
177
177
 
178
+ if broker_kind == BrokerEnum.GRPC:
179
+ from funboost.consumers.grpc_consumer import GrpcConsumer
180
+ from funboost.publishers.grpc_publisher import GrpcPublisher
181
+ register_custom_broker(broker_kind, GrpcPublisher, GrpcConsumer)
182
+
183
+ if broker_kind == BrokerEnum.MYSQL_CDC:
184
+ from funboost.consumers.mysql_cdc_consumer import MysqlCdcConsumer
185
+ from funboost.publishers.mysql_cdc_publisher import MysqlCdcPublisher
186
+ register_custom_broker(broker_kind, MysqlCdcPublisher, MysqlCdcConsumer)
178
187
 
179
188
  if __name__ == '__main__':
180
189
  import sys