recurvedata-lib 0.1.487__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recurvedata-lib might be problematic. Click here for more details.
- recurvedata/__init__.py +0 -0
- recurvedata/__version__.py +1 -0
- recurvedata/client/__init__.py +3 -0
- recurvedata/client/client.py +150 -0
- recurvedata/client/server_client.py +91 -0
- recurvedata/config.py +99 -0
- recurvedata/connectors/__init__.py +20 -0
- recurvedata/connectors/_register.py +46 -0
- recurvedata/connectors/base.py +111 -0
- recurvedata/connectors/config_schema.py +1575 -0
- recurvedata/connectors/connectors/__init__.py +0 -0
- recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
- recurvedata/connectors/connectors/auth.py +44 -0
- recurvedata/connectors/connectors/azure_blob.py +89 -0
- recurvedata/connectors/connectors/azure_synapse.py +79 -0
- recurvedata/connectors/connectors/bigquery.py +359 -0
- recurvedata/connectors/connectors/clickhouse.py +219 -0
- recurvedata/connectors/connectors/dingtalk.py +61 -0
- recurvedata/connectors/connectors/doris.py +215 -0
- recurvedata/connectors/connectors/es.py +62 -0
- recurvedata/connectors/connectors/feishu.py +65 -0
- recurvedata/connectors/connectors/ftp.py +50 -0
- recurvedata/connectors/connectors/generic.py +49 -0
- recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
- recurvedata/connectors/connectors/google_service_account.py +225 -0
- recurvedata/connectors/connectors/hive.py +207 -0
- recurvedata/connectors/connectors/impala.py +210 -0
- recurvedata/connectors/connectors/jenkins.py +51 -0
- recurvedata/connectors/connectors/mail.py +89 -0
- recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
- recurvedata/connectors/connectors/mongo.py +79 -0
- recurvedata/connectors/connectors/mssql.py +131 -0
- recurvedata/connectors/connectors/mysql.py +191 -0
- recurvedata/connectors/connectors/n8n.py +141 -0
- recurvedata/connectors/connectors/oss.py +74 -0
- recurvedata/connectors/connectors/owncloud.py +36 -0
- recurvedata/connectors/connectors/phoenix.py +36 -0
- recurvedata/connectors/connectors/postgres.py +230 -0
- recurvedata/connectors/connectors/python.py +50 -0
- recurvedata/connectors/connectors/redshift.py +187 -0
- recurvedata/connectors/connectors/s3.py +93 -0
- recurvedata/connectors/connectors/sftp.py +87 -0
- recurvedata/connectors/connectors/slack.py +35 -0
- recurvedata/connectors/connectors/spark.py +99 -0
- recurvedata/connectors/connectors/starrocks.py +175 -0
- recurvedata/connectors/connectors/tencent_cos.py +40 -0
- recurvedata/connectors/connectors/tidb.py +49 -0
- recurvedata/connectors/const.py +315 -0
- recurvedata/connectors/datasource.py +189 -0
- recurvedata/connectors/dbapi.py +469 -0
- recurvedata/connectors/fs.py +66 -0
- recurvedata/connectors/ftp.py +40 -0
- recurvedata/connectors/object_store.py +60 -0
- recurvedata/connectors/pigeon.py +172 -0
- recurvedata/connectors/proxy.py +104 -0
- recurvedata/connectors/service.py +223 -0
- recurvedata/connectors/utils.py +47 -0
- recurvedata/consts.py +49 -0
- recurvedata/core/__init__.py +0 -0
- recurvedata/core/config.py +46 -0
- recurvedata/core/configurable.py +27 -0
- recurvedata/core/consts.py +2 -0
- recurvedata/core/templating.py +206 -0
- recurvedata/core/tracing.py +223 -0
- recurvedata/core/transformer.py +186 -0
- recurvedata/core/translation.py +91 -0
- recurvedata/dbt/client.py +97 -0
- recurvedata/dbt/consts.py +99 -0
- recurvedata/dbt/cosmos_utils.py +275 -0
- recurvedata/dbt/error_codes.py +18 -0
- recurvedata/dbt/schemas.py +98 -0
- recurvedata/dbt/service.py +451 -0
- recurvedata/dbt/utils.py +246 -0
- recurvedata/error_codes.py +71 -0
- recurvedata/exceptions.py +72 -0
- recurvedata/executors/__init__.py +4 -0
- recurvedata/executors/cli/__init__.py +7 -0
- recurvedata/executors/cli/connector.py +117 -0
- recurvedata/executors/cli/dbt.py +118 -0
- recurvedata/executors/cli/main.py +82 -0
- recurvedata/executors/cli/parameters.py +18 -0
- recurvedata/executors/client.py +190 -0
- recurvedata/executors/consts.py +50 -0
- recurvedata/executors/debug_executor.py +100 -0
- recurvedata/executors/executor.py +300 -0
- recurvedata/executors/link_executor.py +189 -0
- recurvedata/executors/models.py +34 -0
- recurvedata/executors/schemas.py +222 -0
- recurvedata/executors/service/__init__.py +0 -0
- recurvedata/executors/service/connector.py +380 -0
- recurvedata/executors/utils.py +172 -0
- recurvedata/filestorage/__init__.py +11 -0
- recurvedata/filestorage/_factory.py +33 -0
- recurvedata/filestorage/backends/__init__.py +0 -0
- recurvedata/filestorage/backends/fsspec.py +45 -0
- recurvedata/filestorage/backends/local.py +67 -0
- recurvedata/filestorage/backends/oss.py +56 -0
- recurvedata/filestorage/interface.py +84 -0
- recurvedata/operators/__init__.py +10 -0
- recurvedata/operators/base.py +28 -0
- recurvedata/operators/config.py +21 -0
- recurvedata/operators/context.py +255 -0
- recurvedata/operators/dbt_operator/__init__.py +2 -0
- recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
- recurvedata/operators/dbt_operator/operator.py +353 -0
- recurvedata/operators/link_operator/__init__.py +1 -0
- recurvedata/operators/link_operator/operator.py +120 -0
- recurvedata/operators/models.py +55 -0
- recurvedata/operators/notify_operator/__init__.py +1 -0
- recurvedata/operators/notify_operator/operator.py +180 -0
- recurvedata/operators/operator.py +119 -0
- recurvedata/operators/python_operator/__init__.py +1 -0
- recurvedata/operators/python_operator/operator.py +132 -0
- recurvedata/operators/sensor_operator/__init__.py +1 -0
- recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
- recurvedata/operators/sensor_operator/operator.py +172 -0
- recurvedata/operators/spark_operator/__init__.py +1 -0
- recurvedata/operators/spark_operator/operator.py +200 -0
- recurvedata/operators/spark_operator/spark_sample.py +47 -0
- recurvedata/operators/sql_operator/__init__.py +1 -0
- recurvedata/operators/sql_operator/operator.py +90 -0
- recurvedata/operators/task.py +211 -0
- recurvedata/operators/transfer_operator/__init__.py +40 -0
- recurvedata/operators/transfer_operator/const.py +10 -0
- recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
- recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
- recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
- recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
- recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
- recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
- recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
- recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
- recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
- recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
- recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
- recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
- recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
- recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
- recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
- recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
- recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
- recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
- recurvedata/operators/transfer_operator/load_task_email.py +188 -0
- recurvedata/operators/transfer_operator/load_task_es.py +86 -0
- recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
- recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
- recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
- recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
- recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
- recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
- recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
- recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
- recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
- recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
- recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
- recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
- recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
- recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
- recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
- recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
- recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
- recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
- recurvedata/operators/transfer_operator/mixin.py +31 -0
- recurvedata/operators/transfer_operator/operator.py +231 -0
- recurvedata/operators/transfer_operator/task.py +223 -0
- recurvedata/operators/transfer_operator/utils.py +134 -0
- recurvedata/operators/ui.py +80 -0
- recurvedata/operators/utils/__init__.py +51 -0
- recurvedata/operators/utils/file_factory.py +150 -0
- recurvedata/operators/utils/fs.py +10 -0
- recurvedata/operators/utils/lineage.py +265 -0
- recurvedata/operators/web_init.py +15 -0
- recurvedata/pigeon/connector/__init__.py +294 -0
- recurvedata/pigeon/connector/_registry.py +17 -0
- recurvedata/pigeon/connector/aliyun_oss.py +80 -0
- recurvedata/pigeon/connector/awss3.py +123 -0
- recurvedata/pigeon/connector/azure_blob.py +176 -0
- recurvedata/pigeon/connector/azure_synapse.py +51 -0
- recurvedata/pigeon/connector/cass.py +151 -0
- recurvedata/pigeon/connector/clickhouse.py +403 -0
- recurvedata/pigeon/connector/clickhouse_native.py +351 -0
- recurvedata/pigeon/connector/dbapi.py +571 -0
- recurvedata/pigeon/connector/doris.py +166 -0
- recurvedata/pigeon/connector/es.py +176 -0
- recurvedata/pigeon/connector/feishu.py +1135 -0
- recurvedata/pigeon/connector/ftp.py +163 -0
- recurvedata/pigeon/connector/google_bigquery.py +283 -0
- recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
- recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
- recurvedata/pigeon/connector/hdfs.py +204 -0
- recurvedata/pigeon/connector/hive_impala.py +383 -0
- recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
- recurvedata/pigeon/connector/mongodb.py +56 -0
- recurvedata/pigeon/connector/mssql.py +467 -0
- recurvedata/pigeon/connector/mysql.py +175 -0
- recurvedata/pigeon/connector/owncloud.py +92 -0
- recurvedata/pigeon/connector/postgresql.py +267 -0
- recurvedata/pigeon/connector/power_bi.py +179 -0
- recurvedata/pigeon/connector/qcloud_cos.py +79 -0
- recurvedata/pigeon/connector/redshift.py +123 -0
- recurvedata/pigeon/connector/sftp.py +73 -0
- recurvedata/pigeon/connector/sqlite.py +42 -0
- recurvedata/pigeon/connector/starrocks.py +144 -0
- recurvedata/pigeon/connector/tableau.py +162 -0
- recurvedata/pigeon/const.py +21 -0
- recurvedata/pigeon/csv.py +172 -0
- recurvedata/pigeon/docs/datasources-example.json +82 -0
- recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
- recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
- recurvedata/pigeon/dumper/__init__.py +171 -0
- recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
- recurvedata/pigeon/dumper/base.py +141 -0
- recurvedata/pigeon/dumper/cass.py +213 -0
- recurvedata/pigeon/dumper/dbapi.py +346 -0
- recurvedata/pigeon/dumper/es.py +112 -0
- recurvedata/pigeon/dumper/ftp.py +64 -0
- recurvedata/pigeon/dumper/mongodb.py +103 -0
- recurvedata/pigeon/handler/__init__.py +4 -0
- recurvedata/pigeon/handler/base.py +153 -0
- recurvedata/pigeon/handler/csv_handler.py +290 -0
- recurvedata/pigeon/loader/__init__.py +87 -0
- recurvedata/pigeon/loader/base.py +83 -0
- recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
- recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
- recurvedata/pigeon/loader/csv_to_doris.py +215 -0
- recurvedata/pigeon/loader/csv_to_es.py +51 -0
- recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
- recurvedata/pigeon/loader/csv_to_hive.py +468 -0
- recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
- recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
- recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
- recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
- recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
- recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
- recurvedata/pigeon/meta.py +116 -0
- recurvedata/pigeon/row_factory.py +42 -0
- recurvedata/pigeon/schema/__init__.py +124 -0
- recurvedata/pigeon/schema/types.py +13 -0
- recurvedata/pigeon/sync.py +283 -0
- recurvedata/pigeon/transformer.py +146 -0
- recurvedata/pigeon/utils/__init__.py +134 -0
- recurvedata/pigeon/utils/bloomfilter.py +181 -0
- recurvedata/pigeon/utils/date_time.py +323 -0
- recurvedata/pigeon/utils/escape.py +15 -0
- recurvedata/pigeon/utils/fs.py +266 -0
- recurvedata/pigeon/utils/json.py +44 -0
- recurvedata/pigeon/utils/keyed_tuple.py +85 -0
- recurvedata/pigeon/utils/mp.py +156 -0
- recurvedata/pigeon/utils/sql.py +328 -0
- recurvedata/pigeon/utils/timing.py +155 -0
- recurvedata/provider_manager.py +0 -0
- recurvedata/providers/__init__.py +0 -0
- recurvedata/providers/dbapi/__init__.py +0 -0
- recurvedata/providers/flywheel/__init__.py +0 -0
- recurvedata/providers/mysql/__init__.py +0 -0
- recurvedata/schedulers/__init__.py +1 -0
- recurvedata/schedulers/airflow.py +974 -0
- recurvedata/schedulers/airflow_db_process.py +331 -0
- recurvedata/schedulers/airflow_operators.py +61 -0
- recurvedata/schedulers/airflow_plugin.py +9 -0
- recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
- recurvedata/schedulers/base.py +99 -0
- recurvedata/schedulers/cli.py +228 -0
- recurvedata/schedulers/client.py +56 -0
- recurvedata/schedulers/consts.py +52 -0
- recurvedata/schedulers/debug_celery.py +62 -0
- recurvedata/schedulers/model.py +63 -0
- recurvedata/schedulers/schemas.py +97 -0
- recurvedata/schedulers/service.py +20 -0
- recurvedata/schedulers/system_dags.py +59 -0
- recurvedata/schedulers/task_status.py +279 -0
- recurvedata/schedulers/utils.py +73 -0
- recurvedata/schema/__init__.py +0 -0
- recurvedata/schema/field.py +88 -0
- recurvedata/schema/schema.py +55 -0
- recurvedata/schema/types.py +17 -0
- recurvedata/schema.py +0 -0
- recurvedata/server/__init__.py +0 -0
- recurvedata/server/app.py +7 -0
- recurvedata/server/connector/__init__.py +0 -0
- recurvedata/server/connector/api.py +79 -0
- recurvedata/server/connector/schemas.py +28 -0
- recurvedata/server/data_service/__init__.py +0 -0
- recurvedata/server/data_service/api.py +126 -0
- recurvedata/server/data_service/client.py +18 -0
- recurvedata/server/data_service/consts.py +1 -0
- recurvedata/server/data_service/schemas.py +68 -0
- recurvedata/server/data_service/service.py +218 -0
- recurvedata/server/dbt/__init__.py +0 -0
- recurvedata/server/dbt/api.py +116 -0
- recurvedata/server/error_code.py +49 -0
- recurvedata/server/exceptions.py +19 -0
- recurvedata/server/executor/__init__.py +0 -0
- recurvedata/server/executor/api.py +37 -0
- recurvedata/server/executor/schemas.py +30 -0
- recurvedata/server/executor/service.py +220 -0
- recurvedata/server/main.py +32 -0
- recurvedata/server/schedulers/__init__.py +0 -0
- recurvedata/server/schedulers/api.py +252 -0
- recurvedata/server/schedulers/schemas.py +50 -0
- recurvedata/server/schemas.py +50 -0
- recurvedata/utils/__init__.py +15 -0
- recurvedata/utils/_typer.py +61 -0
- recurvedata/utils/attrdict.py +19 -0
- recurvedata/utils/command_helper.py +20 -0
- recurvedata/utils/compat.py +12 -0
- recurvedata/utils/compression.py +203 -0
- recurvedata/utils/crontab.py +42 -0
- recurvedata/utils/crypto_util.py +305 -0
- recurvedata/utils/dataclass.py +11 -0
- recurvedata/utils/date_time.py +464 -0
- recurvedata/utils/dispatch.py +114 -0
- recurvedata/utils/email_util.py +104 -0
- recurvedata/utils/files.py +386 -0
- recurvedata/utils/helpers.py +170 -0
- recurvedata/utils/httputil.py +117 -0
- recurvedata/utils/imports.py +132 -0
- recurvedata/utils/json.py +80 -0
- recurvedata/utils/log.py +117 -0
- recurvedata/utils/log_capture.py +153 -0
- recurvedata/utils/mp.py +178 -0
- recurvedata/utils/normalizer.py +102 -0
- recurvedata/utils/redis_lock.py +474 -0
- recurvedata/utils/registry.py +54 -0
- recurvedata/utils/shell.py +15 -0
- recurvedata/utils/singleton.py +33 -0
- recurvedata/utils/sql.py +6 -0
- recurvedata/utils/timeout.py +28 -0
- recurvedata/utils/tracing.py +14 -0
- recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
- recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
- recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
- recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"""
|
|
2
|
+
This module is a copy of the import_string function from Sentry.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
from typing import Callable, Type
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ModuleProxyCache(dict):
|
|
11
|
+
def __missing__(self, key):
|
|
12
|
+
if "." not in key:
|
|
13
|
+
return __import__(key)
|
|
14
|
+
|
|
15
|
+
module_name, class_name = key.rsplit(".", 1)
|
|
16
|
+
|
|
17
|
+
module = __import__(module_name, {}, {}, [class_name])
|
|
18
|
+
handler = getattr(module, class_name)
|
|
19
|
+
|
|
20
|
+
# We cache a NoneType for missing imports to avoid repeated lookups
|
|
21
|
+
self[key] = handler
|
|
22
|
+
|
|
23
|
+
return handler
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
_cache = ModuleProxyCache()
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def import_string(path: str) -> Type:
|
|
30
|
+
"""
|
|
31
|
+
Path must be module.path.ClassName
|
|
32
|
+
|
|
33
|
+
>>> cls = import_string('sentry.models.Group')
|
|
34
|
+
"""
|
|
35
|
+
result = _cache[path]
|
|
36
|
+
return result
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# This is a copy of the qualname function from Apache Airflow.
|
|
40
|
+
def qualname(o: object | Callable) -> str:
|
|
41
|
+
"""Convert an attribute/class/function to a string importable by ``import_string``."""
|
|
42
|
+
if callable(o) and hasattr(o, "__module__") and hasattr(o, "__name__"):
|
|
43
|
+
return f"{o.__module__}.{o.__name__}"
|
|
44
|
+
|
|
45
|
+
cls = o
|
|
46
|
+
|
|
47
|
+
if not isinstance(cls, type): # instance or class
|
|
48
|
+
cls = type(cls)
|
|
49
|
+
|
|
50
|
+
name = cls.__qualname__
|
|
51
|
+
module = cls.__module__
|
|
52
|
+
|
|
53
|
+
if module and module != "__builtin__":
|
|
54
|
+
return f"{module}.{name}"
|
|
55
|
+
|
|
56
|
+
return name
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class MockModule:
|
|
60
|
+
"""
|
|
61
|
+
A placeholder object for handling failed module imports.
|
|
62
|
+
When attempting to access any attribute or call any method on this object,
|
|
63
|
+
it will raise an ImportError exception to notify the user that the module
|
|
64
|
+
was not successfully imported.
|
|
65
|
+
|
|
66
|
+
Usage example:
|
|
67
|
+
```
|
|
68
|
+
try:
|
|
69
|
+
import some_module
|
|
70
|
+
except ImportError:
|
|
71
|
+
some_module = MockModule("some_module")
|
|
72
|
+
|
|
73
|
+
# When trying to use some_module, it will raise an import error
|
|
74
|
+
some_module.some_function() # Will raise ImportError
|
|
75
|
+
```
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
def __init__(self, module_name):
|
|
79
|
+
self.__module_name = module_name
|
|
80
|
+
|
|
81
|
+
def __getattr__(self, name):
|
|
82
|
+
"""Triggered when accessing any attribute"""
|
|
83
|
+
self.__raise_import_error()
|
|
84
|
+
|
|
85
|
+
def __call__(self, *args, **kwargs):
|
|
86
|
+
"""Triggered when attempting to call as a function"""
|
|
87
|
+
self.__raise_import_error()
|
|
88
|
+
|
|
89
|
+
def __getitem__(self, key):
|
|
90
|
+
"""Triggered when attempting to access as dictionary or list"""
|
|
91
|
+
self.__raise_import_error()
|
|
92
|
+
|
|
93
|
+
def __raise_import_error(self):
|
|
94
|
+
"""Raise standard import error"""
|
|
95
|
+
raise ImportError(
|
|
96
|
+
f"Module '{self.__module_name}' was not successfully imported. Please install the module before using it."
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
class MockDecorator:
|
|
101
|
+
"""
|
|
102
|
+
A decorator that raises ImportError when the decorated function is called.
|
|
103
|
+
|
|
104
|
+
This is useful for creating placeholder decorators for optional dependencies.
|
|
105
|
+
The ImportError is only raised when the decorated function is actually called,
|
|
106
|
+
not when the module is imported or the function is defined.
|
|
107
|
+
|
|
108
|
+
Usage example:
|
|
109
|
+
```
|
|
110
|
+
try:
|
|
111
|
+
from optional_package import some_decorator
|
|
112
|
+
except ImportError:
|
|
113
|
+
some_decorator = MockDecorator("optional_package")
|
|
114
|
+
|
|
115
|
+
@some_decorator
|
|
116
|
+
def my_function():
|
|
117
|
+
pass
|
|
118
|
+
|
|
119
|
+
# The ImportError will only be raised when my_function is called, not when it's defined
|
|
120
|
+
```
|
|
121
|
+
"""
|
|
122
|
+
|
|
123
|
+
def __init__(self, module_name):
|
|
124
|
+
self.__module_name = module_name
|
|
125
|
+
|
|
126
|
+
def __call__(self, func, *args, **kwargs):
|
|
127
|
+
def wrapper(*args, **kwargs):
|
|
128
|
+
raise ImportError(
|
|
129
|
+
f"Module '{self.__module_name}' was not successfully imported. Please install the module before using it."
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
return wrapper
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import decimal
|
|
3
|
+
import json
|
|
4
|
+
import uuid
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
import orjson
|
|
9
|
+
except ImportError:
|
|
10
|
+
orjson = None
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _json_default(obj: Any) -> str:
|
|
14
|
+
if isinstance(obj, datetime.date):
|
|
15
|
+
return obj.isoformat()
|
|
16
|
+
if isinstance(obj, datetime.timedelta):
|
|
17
|
+
return str(obj)
|
|
18
|
+
if isinstance(obj, uuid.UUID):
|
|
19
|
+
return str(obj)
|
|
20
|
+
if isinstance(obj, decimal.Decimal):
|
|
21
|
+
return str(obj)
|
|
22
|
+
raise TypeError(f"Object of type '{obj.__class__.__name__}' is not JSON serializable")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _orjson_default(obj: Any) -> str:
|
|
26
|
+
if isinstance(obj, datetime.timedelta):
|
|
27
|
+
return str(obj)
|
|
28
|
+
if isinstance(obj, decimal.Decimal):
|
|
29
|
+
return str(obj)
|
|
30
|
+
raise TypeError(f"Object of type '{obj.__class__.__name__}' is not JSON serializable")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def pretty_print(v):
|
|
34
|
+
print(dumps(v, indent=2, ensure_ascii=False))
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def dumps(data: Any, **kwargs) -> str:
|
|
38
|
+
"""Serialize ``data`` to JSON. Uses orjson if available."""
|
|
39
|
+
|
|
40
|
+
if orjson is None:
|
|
41
|
+
if not kwargs.get("indent", False):
|
|
42
|
+
kwargs.setdefault("separators", (",", ":"))
|
|
43
|
+
return json.dumps(data, default=_json_default, **kwargs)
|
|
44
|
+
|
|
45
|
+
# orjson does not support all the same kwargs as json.dumps
|
|
46
|
+
option = orjson.OPT_NON_STR_KEYS
|
|
47
|
+
if kwargs.pop("indent", False):
|
|
48
|
+
option |= orjson.OPT_INDENT_2
|
|
49
|
+
if kwargs.pop("sort_keys", False):
|
|
50
|
+
option |= orjson.OPT_SORT_KEYS
|
|
51
|
+
|
|
52
|
+
return orjson.dumps(data, default=_orjson_default, option=option).decode()
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def loads(data: str) -> Any:
|
|
56
|
+
"""Deserialize ``data`` from JSON. Uses orjson if available."""
|
|
57
|
+
|
|
58
|
+
if orjson is None:
|
|
59
|
+
return json.loads(data)
|
|
60
|
+
|
|
61
|
+
return orjson.loads(data)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class JSONEncoder(json.JSONEncoder):
|
|
65
|
+
def default(self, obj):
|
|
66
|
+
if isinstance(obj, datetime.date):
|
|
67
|
+
return obj.isoformat()
|
|
68
|
+
|
|
69
|
+
if isinstance(obj, datetime.timedelta):
|
|
70
|
+
return str(obj)
|
|
71
|
+
|
|
72
|
+
super(JSONEncoder, self).default(obj)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def json_dumps(data, **kwargs):
|
|
76
|
+
return json.dumps(data, cls=JSONEncoder, **kwargs)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def json_loads(content: str, **kwargs):
|
|
80
|
+
return json.loads(content, **kwargs)
|
recurvedata/utils/log.py
ADDED
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import datetime
|
|
4
|
+
import inspect
|
|
5
|
+
import logging
|
|
6
|
+
from typing import Optional, Type, TypeVar, Union
|
|
7
|
+
|
|
8
|
+
import pendulum
|
|
9
|
+
|
|
10
|
+
_T = TypeVar("_T")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
# This is a copy of the airflow.utils.log.logging_mixin.LoggingMixin class
|
|
14
|
+
class LoggingMixin:
|
|
15
|
+
"""Convenience super-class to have a logger configured with the class name."""
|
|
16
|
+
|
|
17
|
+
_log: Optional[logging.Logger] = None
|
|
18
|
+
|
|
19
|
+
# Parent logger used by this class. It should match one of the loggers defined in the
|
|
20
|
+
# `logging_config_class`. By default, this attribute is used to create the final name of the logger, and
|
|
21
|
+
# will prefix the `_logger_name` with a separating dot.
|
|
22
|
+
_log_config_logger_name: Optional[str] = None # noqa: UP007
|
|
23
|
+
|
|
24
|
+
_logger_name: Optional[str] = None # noqa: UP007
|
|
25
|
+
|
|
26
|
+
@staticmethod
|
|
27
|
+
def _create_logger_name(
|
|
28
|
+
logged_class: Type[_T],
|
|
29
|
+
log_config_logger_name: str = None,
|
|
30
|
+
class_logger_name: str = None,
|
|
31
|
+
) -> str:
|
|
32
|
+
"""Generate a logger name for the given `logged_class`.
|
|
33
|
+
|
|
34
|
+
By default, this function returns the `class_logger_name` as logger name. If it is not provided,
|
|
35
|
+
the {class.__module__}.{class.__name__} is returned instead. When a `parent_logger_name` is provided,
|
|
36
|
+
it will prefix the logger name with a separating dot.
|
|
37
|
+
"""
|
|
38
|
+
logger_name: str = (
|
|
39
|
+
class_logger_name if class_logger_name is not None else f"{logged_class.__module__}.{logged_class.__name__}"
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
if log_config_logger_name:
|
|
43
|
+
return f"{log_config_logger_name}.{logger_name}" if logger_name else log_config_logger_name
|
|
44
|
+
return logger_name
|
|
45
|
+
|
|
46
|
+
@classmethod
|
|
47
|
+
def _get_log(cls, obj: Union["LoggingMixin", Type["LoggingMixin"]], clazz: Type[_T]) -> logging.Logger:
|
|
48
|
+
if obj._log is None:
|
|
49
|
+
logger_name: str = cls._create_logger_name(
|
|
50
|
+
logged_class=clazz,
|
|
51
|
+
log_config_logger_name=obj._log_config_logger_name,
|
|
52
|
+
class_logger_name=obj._logger_name,
|
|
53
|
+
)
|
|
54
|
+
obj._log = logging.getLogger(logger_name)
|
|
55
|
+
return obj._log
|
|
56
|
+
|
|
57
|
+
@classmethod
|
|
58
|
+
def logger(cls) -> logging.Logger:
|
|
59
|
+
"""Return a logger."""
|
|
60
|
+
return LoggingMixin._get_log(cls, cls)
|
|
61
|
+
|
|
62
|
+
@property
|
|
63
|
+
def log(self) -> logging.Logger:
|
|
64
|
+
"""Return a logger."""
|
|
65
|
+
return LoggingMixin._get_log(self, self.__class__)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class AwareFormatter(logging.Formatter):
|
|
69
|
+
_local_tz = pendulum.local_timezone()
|
|
70
|
+
|
|
71
|
+
def formatTime(self, record, datefmt=None):
|
|
72
|
+
# Use dateutil to get a timezone-aware datetime
|
|
73
|
+
dt = datetime.datetime.fromtimestamp(record.created, tz=self._local_tz)
|
|
74
|
+
if datefmt:
|
|
75
|
+
return dt.strftime(datefmt)
|
|
76
|
+
return dt.isoformat()
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def init_logging(
|
|
80
|
+
level=logging.INFO,
|
|
81
|
+
fmt="%(asctime)s - %(levelname)s - %(name)s - %(filename)s:%(lineno)d - [%(process)d:%(threadName)s] - %(message)s",
|
|
82
|
+
):
|
|
83
|
+
logging.basicConfig(level=level, format=fmt)
|
|
84
|
+
|
|
85
|
+
logging.getLogger("httpx").setLevel(logging.WARNING)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def setup_loguru():
|
|
89
|
+
class InterceptHandler(logging.Handler):
|
|
90
|
+
"""Intercept standard logging messages and redirect them to loguru.""" ""
|
|
91
|
+
|
|
92
|
+
def emit(self, record):
|
|
93
|
+
from loguru import logger
|
|
94
|
+
|
|
95
|
+
level: str | int
|
|
96
|
+
try:
|
|
97
|
+
level = logger.level(record.levelname).name
|
|
98
|
+
except ValueError:
|
|
99
|
+
level = record.levelno
|
|
100
|
+
|
|
101
|
+
# Find caller from where originated the logged message.
|
|
102
|
+
frame, depth = inspect.currentframe(), 0
|
|
103
|
+
while frame and (depth == 0 or frame.f_code.co_filename == logging.__file__):
|
|
104
|
+
frame = frame.f_back
|
|
105
|
+
depth += 1
|
|
106
|
+
|
|
107
|
+
logger.opt(depth=depth, exception=record.exc_info).log(level, record.getMessage())
|
|
108
|
+
|
|
109
|
+
# intercept everything at the root logger
|
|
110
|
+
logging.root.handlers = [InterceptHandler()]
|
|
111
|
+
# logging.root.setLevel(logging.INFO)
|
|
112
|
+
|
|
113
|
+
# remove every other logger's handlers
|
|
114
|
+
# and propagate to root logger
|
|
115
|
+
for name in logging.root.manager.loggerDict.keys():
|
|
116
|
+
logging.getLogger(name).handlers = []
|
|
117
|
+
logging.getLogger(name).propagate = True
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import sys
|
|
3
|
+
import threading
|
|
4
|
+
import traceback
|
|
5
|
+
from queue import Empty, Queue
|
|
6
|
+
from typing import Optional, Protocol, Type
|
|
7
|
+
|
|
8
|
+
from recurvedata.utils.log import AwareFormatter
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class MessageHandler(Protocol):
|
|
12
|
+
def __call__(self, message: str) -> None:
|
|
13
|
+
...
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class OutputInterceptor:
|
|
17
|
+
def __init__(self, handler: MessageHandler, flush_interval_seconds: int = 5, batch_size: int = 10) -> None:
|
|
18
|
+
"""
|
|
19
|
+
Initialize the OutputInterceptor object.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
handler: The handler to call with processed messages.
|
|
23
|
+
flush_interval_seconds: Time interval (in seconds) between flushes.
|
|
24
|
+
batch_size: Number of messages to accumulate before triggering a flush.
|
|
25
|
+
"""
|
|
26
|
+
self.handler = handler
|
|
27
|
+
self.flush_interval_seconds = flush_interval_seconds
|
|
28
|
+
self.batch_size = batch_size
|
|
29
|
+
|
|
30
|
+
self.queue: Queue[str] = Queue()
|
|
31
|
+
self._stop_event = threading.Event()
|
|
32
|
+
self._flusher_thread = threading.Thread(target=self._periodic_flush, daemon=True)
|
|
33
|
+
self._flusher_thread.start()
|
|
34
|
+
|
|
35
|
+
# Create a dedicated logger for internal use
|
|
36
|
+
self._logger = logging.getLogger(self.logger_name)
|
|
37
|
+
|
|
38
|
+
self._original_stdout = sys.stdout
|
|
39
|
+
self._original_stderr = sys.stderr
|
|
40
|
+
|
|
41
|
+
@property
|
|
42
|
+
def logger_name(self) -> str:
|
|
43
|
+
return f"{__name__}.{self.__class__.__name__}"
|
|
44
|
+
|
|
45
|
+
def __enter__(self) -> "OutputInterceptor":
|
|
46
|
+
"""Support for context management, starts the interceptor."""
|
|
47
|
+
self._original_stdout = sys.stdout
|
|
48
|
+
self._original_stderr = sys.stderr
|
|
49
|
+
sys.stdout = self
|
|
50
|
+
sys.stderr = self
|
|
51
|
+
return self
|
|
52
|
+
|
|
53
|
+
def __exit__(
|
|
54
|
+
self,
|
|
55
|
+
exc_type: Optional[type],
|
|
56
|
+
exc_value: Optional[BaseException],
|
|
57
|
+
exc_traceback: Optional[BaseException],
|
|
58
|
+
) -> None:
|
|
59
|
+
"""Ensure all remaining data is flushed when the context exits."""
|
|
60
|
+
if exc_type is not None:
|
|
61
|
+
tb_lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
|
|
62
|
+
self._write("".join(tb_lines))
|
|
63
|
+
|
|
64
|
+
self.stop()
|
|
65
|
+
|
|
66
|
+
sys.stdout = self._original_stdout
|
|
67
|
+
sys.stderr = self._original_stderr
|
|
68
|
+
|
|
69
|
+
def write(self, s: str) -> None:
|
|
70
|
+
"""
|
|
71
|
+
Write a string to the queue. This method is called when sys.stdout or sys.stderr is written to.
|
|
72
|
+
"""
|
|
73
|
+
# Write to the original output stream
|
|
74
|
+
if sys.stdout is self:
|
|
75
|
+
self._original_stdout.write(s)
|
|
76
|
+
elif sys.stderr is self:
|
|
77
|
+
self._original_stderr.write(s)
|
|
78
|
+
|
|
79
|
+
self._write(s)
|
|
80
|
+
|
|
81
|
+
def flush(self) -> None:
|
|
82
|
+
"""No-op flush method to maintain compatibility with sys.stdout and sys.stderr."""
|
|
83
|
+
pass
|
|
84
|
+
|
|
85
|
+
def write_log(self, s: str) -> None:
|
|
86
|
+
self._write(s + "\n")
|
|
87
|
+
|
|
88
|
+
def _write(self, s: str) -> None:
|
|
89
|
+
if isinstance(s, bytes):
|
|
90
|
+
# when an error raised, the last line of s could be bytes
|
|
91
|
+
s = s.decode("utf-8", errors="replace")
|
|
92
|
+
# if s.strip(): # Ignore empty lines
|
|
93
|
+
# self.queue.put(s)
|
|
94
|
+
self.queue.put(s)
|
|
95
|
+
|
|
96
|
+
if self.queue.qsize() >= self.batch_size:
|
|
97
|
+
self.flush_messages()
|
|
98
|
+
elif self._stop_event.is_set():
|
|
99
|
+
# when Exception, because we Propagate the exception, the exception will write to stderr after __exit__
|
|
100
|
+
self.flush_messages()
|
|
101
|
+
|
|
102
|
+
def _periodic_flush(self) -> None:
|
|
103
|
+
"""Periodically flush the queue and process each message using the processor."""
|
|
104
|
+
while not self._stop_event.is_set():
|
|
105
|
+
try:
|
|
106
|
+
self.flush_messages()
|
|
107
|
+
except Exception as e:
|
|
108
|
+
self._logger.error(f"Error during message flush: {e}", exc_info=True)
|
|
109
|
+
self._stop_event.wait(self.flush_interval_seconds)
|
|
110
|
+
|
|
111
|
+
def flush_messages(self) -> None:
|
|
112
|
+
"""Flush all queued messages using the provided processor."""
|
|
113
|
+
messages = []
|
|
114
|
+
while True:
|
|
115
|
+
try:
|
|
116
|
+
messages.append(self.queue.get_nowait())
|
|
117
|
+
except Empty:
|
|
118
|
+
break
|
|
119
|
+
if messages:
|
|
120
|
+
try:
|
|
121
|
+
self.handler("".join(messages))
|
|
122
|
+
except Exception as e:
|
|
123
|
+
self._logger.error(f"Failed to process messages: {e}", exc_info=True)
|
|
124
|
+
|
|
125
|
+
def stop(self) -> None:
|
|
126
|
+
"""Stop the periodic flush thread and ensure any remaining data is processed."""
|
|
127
|
+
self._stop_event.set()
|
|
128
|
+
self._flusher_thread.join()
|
|
129
|
+
self.flush_messages() # Ensure all remaining data is processed
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class LoggingHandler(logging.Handler):
|
|
133
|
+
def __init__(self, interceptor: OutputInterceptor) -> None:
|
|
134
|
+
super().__init__()
|
|
135
|
+
self.interceptor = interceptor
|
|
136
|
+
|
|
137
|
+
def emit(self, record: logging.LogRecord) -> None:
|
|
138
|
+
# Filter out logs from the OutputInterceptor's own logger
|
|
139
|
+
if record.name == self.interceptor.logger_name:
|
|
140
|
+
return
|
|
141
|
+
|
|
142
|
+
log_entry = self.format(record)
|
|
143
|
+
self.interceptor.write_log(log_entry)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def setup_log_handler(
|
|
147
|
+
interceptor: OutputInterceptor,
|
|
148
|
+
fmt="[%(asctime)s] - %(levelname)s - %(name)s - %(filename)s:%(lineno)d - [%(process)d:%(threadName)s] - %(message)s",
|
|
149
|
+
formatter_cls: Type[logging.Formatter] = AwareFormatter,
|
|
150
|
+
):
|
|
151
|
+
handler = LoggingHandler(interceptor)
|
|
152
|
+
handler.setFormatter(formatter_cls(fmt))
|
|
153
|
+
logging.getLogger().addHandler(handler)
|
recurvedata/utils/mp.py
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import subprocess
|
|
3
|
+
import sys
|
|
4
|
+
import time
|
|
5
|
+
from multiprocessing import Process
|
|
6
|
+
from multiprocessing.queues import Queue
|
|
7
|
+
from queue import Empty, Full
|
|
8
|
+
from typing import Any, Optional, Union
|
|
9
|
+
|
|
10
|
+
logger = logging.getLogger(__name__)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def get_qsize(queue: Queue) -> Optional[int]:
|
|
14
|
+
if sys.platform.lower() == "darwin":
|
|
15
|
+
# queue.qsize() Raises NotImplementedError on Mac OSX because of broken sem_getvalue()
|
|
16
|
+
return None
|
|
17
|
+
return queue.qsize()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def safe_join_subprocesses(workers: list[Process], result_queue: Queue) -> list[Any]:
|
|
21
|
+
result = []
|
|
22
|
+
live_workers = list(workers)
|
|
23
|
+
while live_workers:
|
|
24
|
+
try:
|
|
25
|
+
while 1:
|
|
26
|
+
result.append(result_queue.get(False))
|
|
27
|
+
except Empty:
|
|
28
|
+
pass
|
|
29
|
+
|
|
30
|
+
time.sleep(0.5) # Give tasks a chance to put more data in
|
|
31
|
+
if not result_queue.empty():
|
|
32
|
+
continue
|
|
33
|
+
live_workers = [p for p in live_workers if p.is_alive()]
|
|
34
|
+
return result
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def has_process_fail(workers: list[Process], log: bool = True) -> bool:
|
|
38
|
+
for p in workers:
|
|
39
|
+
if p.is_alive():
|
|
40
|
+
continue
|
|
41
|
+
if p.exitcode != 0:
|
|
42
|
+
if log:
|
|
43
|
+
logger.info(f"found process {p.pid} fail, exitcode {p.exitcode}")
|
|
44
|
+
return True
|
|
45
|
+
return False
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def terminate_processes(workers: list[Process]):
|
|
49
|
+
for p in workers:
|
|
50
|
+
if p.is_alive():
|
|
51
|
+
logger.info(f"start terminate process {p.pid}")
|
|
52
|
+
p.terminate()
|
|
53
|
+
logger.info(f"finish terminate process {p.pid}")
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def master_safe_put_queue(
|
|
57
|
+
queue: Queue,
|
|
58
|
+
obj: Any,
|
|
59
|
+
workers: list[Process],
|
|
60
|
+
block: bool = True,
|
|
61
|
+
timeout: Optional[int] = None,
|
|
62
|
+
) -> Optional[bool]:
|
|
63
|
+
"""
|
|
64
|
+
A scenario where queue.put is called involves the master putting data into the queue, while workers consume the data.
|
|
65
|
+
Under the default settings of timeout=None and block=True,
|
|
66
|
+
if queue.maxsize is small and workers encounter errors that prevent them from consuming data promptly,
|
|
67
|
+
this can cause the master to become stuck when calling queue.put.
|
|
68
|
+
This master_safe_put_queue function can address this issue.
|
|
69
|
+
When operating under timeout=None and block=True,
|
|
70
|
+
it uses a small timeout (10 seconds) and continuously attempts queue.put(timeout=10) in a loop.
|
|
71
|
+
If queue.put becomes stuck for 10 seconds, it raises a queue.Full error.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
queue: multiprocessing.Queue
|
|
75
|
+
obj: the object to be placed into the queue
|
|
76
|
+
workers: subprocesses
|
|
77
|
+
block: whether to block when the queue has no free slots
|
|
78
|
+
timeout: the timeout for queue.put
|
|
79
|
+
|
|
80
|
+
Returns:
|
|
81
|
+
If there are any worker processes that have exited abnormally, it returns True,
|
|
82
|
+
indicating that an abnormal worker exit caused the master's queue.put to become stuck.
|
|
83
|
+
If all workers are functioning normally, it indicates that the workers are simply consuming data slowly,
|
|
84
|
+
and it will continue to call queue.put(timeout=10) in a loop.
|
|
85
|
+
In other cases, it behaves the same as queue.put.
|
|
86
|
+
"""
|
|
87
|
+
if timeout is None and block:
|
|
88
|
+
while True:
|
|
89
|
+
try:
|
|
90
|
+
return queue.put(obj, timeout=10)
|
|
91
|
+
except Full:
|
|
92
|
+
if has_process_fail(workers):
|
|
93
|
+
return True
|
|
94
|
+
else:
|
|
95
|
+
return queue.put(obj, block=block, timeout=timeout)
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def safe_join_subprocesses_early_stop(workers: list[Process], result_queue: Queue) -> tuple[list, bool]:
|
|
99
|
+
"""
|
|
100
|
+
wait and read the sub workers' result from result_queue,
|
|
101
|
+
exit when
|
|
102
|
+
1) one sub worker fail, or
|
|
103
|
+
2) all sub workers success
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
workers: sub progresses
|
|
107
|
+
result_queue: queue which sub progresses put result into
|
|
108
|
+
|
|
109
|
+
Returns:
|
|
110
|
+
result got from sub workers, and early_stop flag
|
|
111
|
+
"""
|
|
112
|
+
result = []
|
|
113
|
+
early_stop = False
|
|
114
|
+
live_workers = list(workers)
|
|
115
|
+
last_check_early_stop_time = time.time()
|
|
116
|
+
while live_workers:
|
|
117
|
+
try:
|
|
118
|
+
while 1:
|
|
119
|
+
result.append(result_queue.get(False))
|
|
120
|
+
|
|
121
|
+
if time.time() - last_check_early_stop_time > 10:
|
|
122
|
+
if has_process_fail(live_workers):
|
|
123
|
+
early_stop = True
|
|
124
|
+
return result, early_stop
|
|
125
|
+
last_check_early_stop_time = time.time()
|
|
126
|
+
|
|
127
|
+
except Empty:
|
|
128
|
+
pass
|
|
129
|
+
|
|
130
|
+
time.sleep(0.5) # Give tasks a chance to put more data in
|
|
131
|
+
if not result_queue.empty():
|
|
132
|
+
continue
|
|
133
|
+
|
|
134
|
+
if has_process_fail(live_workers):
|
|
135
|
+
early_stop = True
|
|
136
|
+
return result, early_stop
|
|
137
|
+
last_check_early_stop_time = time.time()
|
|
138
|
+
live_workers = [p for p in live_workers if p.is_alive()]
|
|
139
|
+
return result, early_stop
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def run_subprocess(
|
|
143
|
+
cmd: Union[str, list],
|
|
144
|
+
return_output=False,
|
|
145
|
+
_logger: logging.Logger = logger,
|
|
146
|
+
**kwargs,
|
|
147
|
+
) -> Optional[str]:
|
|
148
|
+
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True, **kwargs)
|
|
149
|
+
logger.info(f"started sub process: {cmd}, pid: {p.pid}")
|
|
150
|
+
lines: list[str] = []
|
|
151
|
+
for raw_line in iter(p.stdout.readline, ""):
|
|
152
|
+
line = raw_line.rstrip()
|
|
153
|
+
_logger.info(line)
|
|
154
|
+
if return_output:
|
|
155
|
+
lines.append(line)
|
|
156
|
+
p.wait()
|
|
157
|
+
logger.info("sub process exited with return code %s", p.returncode)
|
|
158
|
+
if p.returncode:
|
|
159
|
+
raise subprocess.CalledProcessError(p.returncode, p.args)
|
|
160
|
+
return "\n".join(lines)
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def robust_run_subprocess(
|
|
164
|
+
cmd: Union[str, list],
|
|
165
|
+
_logger: logging.Logger = logger,
|
|
166
|
+
**kwargs,
|
|
167
|
+
) -> tuple[str, int]:
|
|
168
|
+
p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True, **kwargs)
|
|
169
|
+
logger.info(f"started sub process: {cmd}, pid: {p.pid}")
|
|
170
|
+
lines: list[str] = []
|
|
171
|
+
for raw_line in iter(p.stdout.readline, ""):
|
|
172
|
+
line = raw_line.rstrip()
|
|
173
|
+
_logger.info(line)
|
|
174
|
+
lines.append(line)
|
|
175
|
+
p.wait()
|
|
176
|
+
logger.info("sub process exited with return code %s", p.returncode)
|
|
177
|
+
output = "\n".join(lines)
|
|
178
|
+
return output, p.returncode
|