recurvedata-lib 0.1.487__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recurvedata-lib might be problematic. Click here for more details.

Files changed (333) hide show
  1. recurvedata/__init__.py +0 -0
  2. recurvedata/__version__.py +1 -0
  3. recurvedata/client/__init__.py +3 -0
  4. recurvedata/client/client.py +150 -0
  5. recurvedata/client/server_client.py +91 -0
  6. recurvedata/config.py +99 -0
  7. recurvedata/connectors/__init__.py +20 -0
  8. recurvedata/connectors/_register.py +46 -0
  9. recurvedata/connectors/base.py +111 -0
  10. recurvedata/connectors/config_schema.py +1575 -0
  11. recurvedata/connectors/connectors/__init__.py +0 -0
  12. recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
  13. recurvedata/connectors/connectors/auth.py +44 -0
  14. recurvedata/connectors/connectors/azure_blob.py +89 -0
  15. recurvedata/connectors/connectors/azure_synapse.py +79 -0
  16. recurvedata/connectors/connectors/bigquery.py +359 -0
  17. recurvedata/connectors/connectors/clickhouse.py +219 -0
  18. recurvedata/connectors/connectors/dingtalk.py +61 -0
  19. recurvedata/connectors/connectors/doris.py +215 -0
  20. recurvedata/connectors/connectors/es.py +62 -0
  21. recurvedata/connectors/connectors/feishu.py +65 -0
  22. recurvedata/connectors/connectors/ftp.py +50 -0
  23. recurvedata/connectors/connectors/generic.py +49 -0
  24. recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
  25. recurvedata/connectors/connectors/google_service_account.py +225 -0
  26. recurvedata/connectors/connectors/hive.py +207 -0
  27. recurvedata/connectors/connectors/impala.py +210 -0
  28. recurvedata/connectors/connectors/jenkins.py +51 -0
  29. recurvedata/connectors/connectors/mail.py +89 -0
  30. recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
  31. recurvedata/connectors/connectors/mongo.py +79 -0
  32. recurvedata/connectors/connectors/mssql.py +131 -0
  33. recurvedata/connectors/connectors/mysql.py +191 -0
  34. recurvedata/connectors/connectors/n8n.py +141 -0
  35. recurvedata/connectors/connectors/oss.py +74 -0
  36. recurvedata/connectors/connectors/owncloud.py +36 -0
  37. recurvedata/connectors/connectors/phoenix.py +36 -0
  38. recurvedata/connectors/connectors/postgres.py +230 -0
  39. recurvedata/connectors/connectors/python.py +50 -0
  40. recurvedata/connectors/connectors/redshift.py +187 -0
  41. recurvedata/connectors/connectors/s3.py +93 -0
  42. recurvedata/connectors/connectors/sftp.py +87 -0
  43. recurvedata/connectors/connectors/slack.py +35 -0
  44. recurvedata/connectors/connectors/spark.py +99 -0
  45. recurvedata/connectors/connectors/starrocks.py +175 -0
  46. recurvedata/connectors/connectors/tencent_cos.py +40 -0
  47. recurvedata/connectors/connectors/tidb.py +49 -0
  48. recurvedata/connectors/const.py +315 -0
  49. recurvedata/connectors/datasource.py +189 -0
  50. recurvedata/connectors/dbapi.py +469 -0
  51. recurvedata/connectors/fs.py +66 -0
  52. recurvedata/connectors/ftp.py +40 -0
  53. recurvedata/connectors/object_store.py +60 -0
  54. recurvedata/connectors/pigeon.py +172 -0
  55. recurvedata/connectors/proxy.py +104 -0
  56. recurvedata/connectors/service.py +223 -0
  57. recurvedata/connectors/utils.py +47 -0
  58. recurvedata/consts.py +49 -0
  59. recurvedata/core/__init__.py +0 -0
  60. recurvedata/core/config.py +46 -0
  61. recurvedata/core/configurable.py +27 -0
  62. recurvedata/core/consts.py +2 -0
  63. recurvedata/core/templating.py +206 -0
  64. recurvedata/core/tracing.py +223 -0
  65. recurvedata/core/transformer.py +186 -0
  66. recurvedata/core/translation.py +91 -0
  67. recurvedata/dbt/client.py +97 -0
  68. recurvedata/dbt/consts.py +99 -0
  69. recurvedata/dbt/cosmos_utils.py +275 -0
  70. recurvedata/dbt/error_codes.py +18 -0
  71. recurvedata/dbt/schemas.py +98 -0
  72. recurvedata/dbt/service.py +451 -0
  73. recurvedata/dbt/utils.py +246 -0
  74. recurvedata/error_codes.py +71 -0
  75. recurvedata/exceptions.py +72 -0
  76. recurvedata/executors/__init__.py +4 -0
  77. recurvedata/executors/cli/__init__.py +7 -0
  78. recurvedata/executors/cli/connector.py +117 -0
  79. recurvedata/executors/cli/dbt.py +118 -0
  80. recurvedata/executors/cli/main.py +82 -0
  81. recurvedata/executors/cli/parameters.py +18 -0
  82. recurvedata/executors/client.py +190 -0
  83. recurvedata/executors/consts.py +50 -0
  84. recurvedata/executors/debug_executor.py +100 -0
  85. recurvedata/executors/executor.py +300 -0
  86. recurvedata/executors/link_executor.py +189 -0
  87. recurvedata/executors/models.py +34 -0
  88. recurvedata/executors/schemas.py +222 -0
  89. recurvedata/executors/service/__init__.py +0 -0
  90. recurvedata/executors/service/connector.py +380 -0
  91. recurvedata/executors/utils.py +172 -0
  92. recurvedata/filestorage/__init__.py +11 -0
  93. recurvedata/filestorage/_factory.py +33 -0
  94. recurvedata/filestorage/backends/__init__.py +0 -0
  95. recurvedata/filestorage/backends/fsspec.py +45 -0
  96. recurvedata/filestorage/backends/local.py +67 -0
  97. recurvedata/filestorage/backends/oss.py +56 -0
  98. recurvedata/filestorage/interface.py +84 -0
  99. recurvedata/operators/__init__.py +10 -0
  100. recurvedata/operators/base.py +28 -0
  101. recurvedata/operators/config.py +21 -0
  102. recurvedata/operators/context.py +255 -0
  103. recurvedata/operators/dbt_operator/__init__.py +2 -0
  104. recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
  105. recurvedata/operators/dbt_operator/operator.py +353 -0
  106. recurvedata/operators/link_operator/__init__.py +1 -0
  107. recurvedata/operators/link_operator/operator.py +120 -0
  108. recurvedata/operators/models.py +55 -0
  109. recurvedata/operators/notify_operator/__init__.py +1 -0
  110. recurvedata/operators/notify_operator/operator.py +180 -0
  111. recurvedata/operators/operator.py +119 -0
  112. recurvedata/operators/python_operator/__init__.py +1 -0
  113. recurvedata/operators/python_operator/operator.py +132 -0
  114. recurvedata/operators/sensor_operator/__init__.py +1 -0
  115. recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
  116. recurvedata/operators/sensor_operator/operator.py +172 -0
  117. recurvedata/operators/spark_operator/__init__.py +1 -0
  118. recurvedata/operators/spark_operator/operator.py +200 -0
  119. recurvedata/operators/spark_operator/spark_sample.py +47 -0
  120. recurvedata/operators/sql_operator/__init__.py +1 -0
  121. recurvedata/operators/sql_operator/operator.py +90 -0
  122. recurvedata/operators/task.py +211 -0
  123. recurvedata/operators/transfer_operator/__init__.py +40 -0
  124. recurvedata/operators/transfer_operator/const.py +10 -0
  125. recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
  126. recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
  127. recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
  128. recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
  129. recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
  130. recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
  131. recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
  132. recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
  133. recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
  134. recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
  135. recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
  136. recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
  137. recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
  138. recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
  139. recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
  140. recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
  141. recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
  142. recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
  143. recurvedata/operators/transfer_operator/load_task_email.py +188 -0
  144. recurvedata/operators/transfer_operator/load_task_es.py +86 -0
  145. recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
  146. recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
  147. recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
  148. recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
  149. recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
  150. recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
  151. recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
  152. recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
  153. recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
  154. recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
  155. recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
  156. recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
  157. recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
  158. recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
  159. recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
  160. recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
  161. recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
  162. recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
  163. recurvedata/operators/transfer_operator/mixin.py +31 -0
  164. recurvedata/operators/transfer_operator/operator.py +231 -0
  165. recurvedata/operators/transfer_operator/task.py +223 -0
  166. recurvedata/operators/transfer_operator/utils.py +134 -0
  167. recurvedata/operators/ui.py +80 -0
  168. recurvedata/operators/utils/__init__.py +51 -0
  169. recurvedata/operators/utils/file_factory.py +150 -0
  170. recurvedata/operators/utils/fs.py +10 -0
  171. recurvedata/operators/utils/lineage.py +265 -0
  172. recurvedata/operators/web_init.py +15 -0
  173. recurvedata/pigeon/connector/__init__.py +294 -0
  174. recurvedata/pigeon/connector/_registry.py +17 -0
  175. recurvedata/pigeon/connector/aliyun_oss.py +80 -0
  176. recurvedata/pigeon/connector/awss3.py +123 -0
  177. recurvedata/pigeon/connector/azure_blob.py +176 -0
  178. recurvedata/pigeon/connector/azure_synapse.py +51 -0
  179. recurvedata/pigeon/connector/cass.py +151 -0
  180. recurvedata/pigeon/connector/clickhouse.py +403 -0
  181. recurvedata/pigeon/connector/clickhouse_native.py +351 -0
  182. recurvedata/pigeon/connector/dbapi.py +571 -0
  183. recurvedata/pigeon/connector/doris.py +166 -0
  184. recurvedata/pigeon/connector/es.py +176 -0
  185. recurvedata/pigeon/connector/feishu.py +1135 -0
  186. recurvedata/pigeon/connector/ftp.py +163 -0
  187. recurvedata/pigeon/connector/google_bigquery.py +283 -0
  188. recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
  189. recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
  190. recurvedata/pigeon/connector/hdfs.py +204 -0
  191. recurvedata/pigeon/connector/hive_impala.py +383 -0
  192. recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
  193. recurvedata/pigeon/connector/mongodb.py +56 -0
  194. recurvedata/pigeon/connector/mssql.py +467 -0
  195. recurvedata/pigeon/connector/mysql.py +175 -0
  196. recurvedata/pigeon/connector/owncloud.py +92 -0
  197. recurvedata/pigeon/connector/postgresql.py +267 -0
  198. recurvedata/pigeon/connector/power_bi.py +179 -0
  199. recurvedata/pigeon/connector/qcloud_cos.py +79 -0
  200. recurvedata/pigeon/connector/redshift.py +123 -0
  201. recurvedata/pigeon/connector/sftp.py +73 -0
  202. recurvedata/pigeon/connector/sqlite.py +42 -0
  203. recurvedata/pigeon/connector/starrocks.py +144 -0
  204. recurvedata/pigeon/connector/tableau.py +162 -0
  205. recurvedata/pigeon/const.py +21 -0
  206. recurvedata/pigeon/csv.py +172 -0
  207. recurvedata/pigeon/docs/datasources-example.json +82 -0
  208. recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
  209. recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
  210. recurvedata/pigeon/dumper/__init__.py +171 -0
  211. recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
  212. recurvedata/pigeon/dumper/base.py +141 -0
  213. recurvedata/pigeon/dumper/cass.py +213 -0
  214. recurvedata/pigeon/dumper/dbapi.py +346 -0
  215. recurvedata/pigeon/dumper/es.py +112 -0
  216. recurvedata/pigeon/dumper/ftp.py +64 -0
  217. recurvedata/pigeon/dumper/mongodb.py +103 -0
  218. recurvedata/pigeon/handler/__init__.py +4 -0
  219. recurvedata/pigeon/handler/base.py +153 -0
  220. recurvedata/pigeon/handler/csv_handler.py +290 -0
  221. recurvedata/pigeon/loader/__init__.py +87 -0
  222. recurvedata/pigeon/loader/base.py +83 -0
  223. recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
  224. recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
  225. recurvedata/pigeon/loader/csv_to_doris.py +215 -0
  226. recurvedata/pigeon/loader/csv_to_es.py +51 -0
  227. recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
  228. recurvedata/pigeon/loader/csv_to_hive.py +468 -0
  229. recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
  230. recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
  231. recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
  232. recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
  233. recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
  234. recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
  235. recurvedata/pigeon/meta.py +116 -0
  236. recurvedata/pigeon/row_factory.py +42 -0
  237. recurvedata/pigeon/schema/__init__.py +124 -0
  238. recurvedata/pigeon/schema/types.py +13 -0
  239. recurvedata/pigeon/sync.py +283 -0
  240. recurvedata/pigeon/transformer.py +146 -0
  241. recurvedata/pigeon/utils/__init__.py +134 -0
  242. recurvedata/pigeon/utils/bloomfilter.py +181 -0
  243. recurvedata/pigeon/utils/date_time.py +323 -0
  244. recurvedata/pigeon/utils/escape.py +15 -0
  245. recurvedata/pigeon/utils/fs.py +266 -0
  246. recurvedata/pigeon/utils/json.py +44 -0
  247. recurvedata/pigeon/utils/keyed_tuple.py +85 -0
  248. recurvedata/pigeon/utils/mp.py +156 -0
  249. recurvedata/pigeon/utils/sql.py +328 -0
  250. recurvedata/pigeon/utils/timing.py +155 -0
  251. recurvedata/provider_manager.py +0 -0
  252. recurvedata/providers/__init__.py +0 -0
  253. recurvedata/providers/dbapi/__init__.py +0 -0
  254. recurvedata/providers/flywheel/__init__.py +0 -0
  255. recurvedata/providers/mysql/__init__.py +0 -0
  256. recurvedata/schedulers/__init__.py +1 -0
  257. recurvedata/schedulers/airflow.py +974 -0
  258. recurvedata/schedulers/airflow_db_process.py +331 -0
  259. recurvedata/schedulers/airflow_operators.py +61 -0
  260. recurvedata/schedulers/airflow_plugin.py +9 -0
  261. recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
  262. recurvedata/schedulers/base.py +99 -0
  263. recurvedata/schedulers/cli.py +228 -0
  264. recurvedata/schedulers/client.py +56 -0
  265. recurvedata/schedulers/consts.py +52 -0
  266. recurvedata/schedulers/debug_celery.py +62 -0
  267. recurvedata/schedulers/model.py +63 -0
  268. recurvedata/schedulers/schemas.py +97 -0
  269. recurvedata/schedulers/service.py +20 -0
  270. recurvedata/schedulers/system_dags.py +59 -0
  271. recurvedata/schedulers/task_status.py +279 -0
  272. recurvedata/schedulers/utils.py +73 -0
  273. recurvedata/schema/__init__.py +0 -0
  274. recurvedata/schema/field.py +88 -0
  275. recurvedata/schema/schema.py +55 -0
  276. recurvedata/schema/types.py +17 -0
  277. recurvedata/schema.py +0 -0
  278. recurvedata/server/__init__.py +0 -0
  279. recurvedata/server/app.py +7 -0
  280. recurvedata/server/connector/__init__.py +0 -0
  281. recurvedata/server/connector/api.py +79 -0
  282. recurvedata/server/connector/schemas.py +28 -0
  283. recurvedata/server/data_service/__init__.py +0 -0
  284. recurvedata/server/data_service/api.py +126 -0
  285. recurvedata/server/data_service/client.py +18 -0
  286. recurvedata/server/data_service/consts.py +1 -0
  287. recurvedata/server/data_service/schemas.py +68 -0
  288. recurvedata/server/data_service/service.py +218 -0
  289. recurvedata/server/dbt/__init__.py +0 -0
  290. recurvedata/server/dbt/api.py +116 -0
  291. recurvedata/server/error_code.py +49 -0
  292. recurvedata/server/exceptions.py +19 -0
  293. recurvedata/server/executor/__init__.py +0 -0
  294. recurvedata/server/executor/api.py +37 -0
  295. recurvedata/server/executor/schemas.py +30 -0
  296. recurvedata/server/executor/service.py +220 -0
  297. recurvedata/server/main.py +32 -0
  298. recurvedata/server/schedulers/__init__.py +0 -0
  299. recurvedata/server/schedulers/api.py +252 -0
  300. recurvedata/server/schedulers/schemas.py +50 -0
  301. recurvedata/server/schemas.py +50 -0
  302. recurvedata/utils/__init__.py +15 -0
  303. recurvedata/utils/_typer.py +61 -0
  304. recurvedata/utils/attrdict.py +19 -0
  305. recurvedata/utils/command_helper.py +20 -0
  306. recurvedata/utils/compat.py +12 -0
  307. recurvedata/utils/compression.py +203 -0
  308. recurvedata/utils/crontab.py +42 -0
  309. recurvedata/utils/crypto_util.py +305 -0
  310. recurvedata/utils/dataclass.py +11 -0
  311. recurvedata/utils/date_time.py +464 -0
  312. recurvedata/utils/dispatch.py +114 -0
  313. recurvedata/utils/email_util.py +104 -0
  314. recurvedata/utils/files.py +386 -0
  315. recurvedata/utils/helpers.py +170 -0
  316. recurvedata/utils/httputil.py +117 -0
  317. recurvedata/utils/imports.py +132 -0
  318. recurvedata/utils/json.py +80 -0
  319. recurvedata/utils/log.py +117 -0
  320. recurvedata/utils/log_capture.py +153 -0
  321. recurvedata/utils/mp.py +178 -0
  322. recurvedata/utils/normalizer.py +102 -0
  323. recurvedata/utils/redis_lock.py +474 -0
  324. recurvedata/utils/registry.py +54 -0
  325. recurvedata/utils/shell.py +15 -0
  326. recurvedata/utils/singleton.py +33 -0
  327. recurvedata/utils/sql.py +6 -0
  328. recurvedata/utils/timeout.py +28 -0
  329. recurvedata/utils/tracing.py +14 -0
  330. recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
  331. recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
  332. recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
  333. recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
@@ -0,0 +1,132 @@
1
+ """
2
+ This module is a copy of the import_string function from Sentry.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ from typing import Callable, Type
8
+
9
+
10
+ class ModuleProxyCache(dict):
11
+ def __missing__(self, key):
12
+ if "." not in key:
13
+ return __import__(key)
14
+
15
+ module_name, class_name = key.rsplit(".", 1)
16
+
17
+ module = __import__(module_name, {}, {}, [class_name])
18
+ handler = getattr(module, class_name)
19
+
20
+ # We cache a NoneType for missing imports to avoid repeated lookups
21
+ self[key] = handler
22
+
23
+ return handler
24
+
25
+
26
+ _cache = ModuleProxyCache()
27
+
28
+
29
+ def import_string(path: str) -> Type:
30
+ """
31
+ Path must be module.path.ClassName
32
+
33
+ >>> cls = import_string('sentry.models.Group')
34
+ """
35
+ result = _cache[path]
36
+ return result
37
+
38
+
39
+ # This is a copy of the qualname function from Apache Airflow.
40
+ def qualname(o: object | Callable) -> str:
41
+ """Convert an attribute/class/function to a string importable by ``import_string``."""
42
+ if callable(o) and hasattr(o, "__module__") and hasattr(o, "__name__"):
43
+ return f"{o.__module__}.{o.__name__}"
44
+
45
+ cls = o
46
+
47
+ if not isinstance(cls, type): # instance or class
48
+ cls = type(cls)
49
+
50
+ name = cls.__qualname__
51
+ module = cls.__module__
52
+
53
+ if module and module != "__builtin__":
54
+ return f"{module}.{name}"
55
+
56
+ return name
57
+
58
+
59
+ class MockModule:
60
+ """
61
+ A placeholder object for handling failed module imports.
62
+ When attempting to access any attribute or call any method on this object,
63
+ it will raise an ImportError exception to notify the user that the module
64
+ was not successfully imported.
65
+
66
+ Usage example:
67
+ ```
68
+ try:
69
+ import some_module
70
+ except ImportError:
71
+ some_module = MockModule("some_module")
72
+
73
+ # When trying to use some_module, it will raise an import error
74
+ some_module.some_function() # Will raise ImportError
75
+ ```
76
+ """
77
+
78
+ def __init__(self, module_name):
79
+ self.__module_name = module_name
80
+
81
+ def __getattr__(self, name):
82
+ """Triggered when accessing any attribute"""
83
+ self.__raise_import_error()
84
+
85
+ def __call__(self, *args, **kwargs):
86
+ """Triggered when attempting to call as a function"""
87
+ self.__raise_import_error()
88
+
89
+ def __getitem__(self, key):
90
+ """Triggered when attempting to access as dictionary or list"""
91
+ self.__raise_import_error()
92
+
93
+ def __raise_import_error(self):
94
+ """Raise standard import error"""
95
+ raise ImportError(
96
+ f"Module '{self.__module_name}' was not successfully imported. Please install the module before using it."
97
+ )
98
+
99
+
100
+ class MockDecorator:
101
+ """
102
+ A decorator that raises ImportError when the decorated function is called.
103
+
104
+ This is useful for creating placeholder decorators for optional dependencies.
105
+ The ImportError is only raised when the decorated function is actually called,
106
+ not when the module is imported or the function is defined.
107
+
108
+ Usage example:
109
+ ```
110
+ try:
111
+ from optional_package import some_decorator
112
+ except ImportError:
113
+ some_decorator = MockDecorator("optional_package")
114
+
115
+ @some_decorator
116
+ def my_function():
117
+ pass
118
+
119
+ # The ImportError will only be raised when my_function is called, not when it's defined
120
+ ```
121
+ """
122
+
123
+ def __init__(self, module_name):
124
+ self.__module_name = module_name
125
+
126
+ def __call__(self, func, *args, **kwargs):
127
+ def wrapper(*args, **kwargs):
128
+ raise ImportError(
129
+ f"Module '{self.__module_name}' was not successfully imported. Please install the module before using it."
130
+ )
131
+
132
+ return wrapper
@@ -0,0 +1,80 @@
1
+ import datetime
2
+ import decimal
3
+ import json
4
+ import uuid
5
+ from typing import Any
6
+
7
+ try:
8
+ import orjson
9
+ except ImportError:
10
+ orjson = None
11
+
12
+
13
+ def _json_default(obj: Any) -> str:
14
+ if isinstance(obj, datetime.date):
15
+ return obj.isoformat()
16
+ if isinstance(obj, datetime.timedelta):
17
+ return str(obj)
18
+ if isinstance(obj, uuid.UUID):
19
+ return str(obj)
20
+ if isinstance(obj, decimal.Decimal):
21
+ return str(obj)
22
+ raise TypeError(f"Object of type '{obj.__class__.__name__}' is not JSON serializable")
23
+
24
+
25
+ def _orjson_default(obj: Any) -> str:
26
+ if isinstance(obj, datetime.timedelta):
27
+ return str(obj)
28
+ if isinstance(obj, decimal.Decimal):
29
+ return str(obj)
30
+ raise TypeError(f"Object of type '{obj.__class__.__name__}' is not JSON serializable")
31
+
32
+
33
+ def pretty_print(v):
34
+ print(dumps(v, indent=2, ensure_ascii=False))
35
+
36
+
37
+ def dumps(data: Any, **kwargs) -> str:
38
+ """Serialize ``data`` to JSON. Uses orjson if available."""
39
+
40
+ if orjson is None:
41
+ if not kwargs.get("indent", False):
42
+ kwargs.setdefault("separators", (",", ":"))
43
+ return json.dumps(data, default=_json_default, **kwargs)
44
+
45
+ # orjson does not support all the same kwargs as json.dumps
46
+ option = orjson.OPT_NON_STR_KEYS
47
+ if kwargs.pop("indent", False):
48
+ option |= orjson.OPT_INDENT_2
49
+ if kwargs.pop("sort_keys", False):
50
+ option |= orjson.OPT_SORT_KEYS
51
+
52
+ return orjson.dumps(data, default=_orjson_default, option=option).decode()
53
+
54
+
55
+ def loads(data: str) -> Any:
56
+ """Deserialize ``data`` from JSON. Uses orjson if available."""
57
+
58
+ if orjson is None:
59
+ return json.loads(data)
60
+
61
+ return orjson.loads(data)
62
+
63
+
64
+ class JSONEncoder(json.JSONEncoder):
65
+ def default(self, obj):
66
+ if isinstance(obj, datetime.date):
67
+ return obj.isoformat()
68
+
69
+ if isinstance(obj, datetime.timedelta):
70
+ return str(obj)
71
+
72
+ super(JSONEncoder, self).default(obj)
73
+
74
+
75
+ def json_dumps(data, **kwargs):
76
+ return json.dumps(data, cls=JSONEncoder, **kwargs)
77
+
78
+
79
+ def json_loads(content: str, **kwargs):
80
+ return json.loads(content, **kwargs)
@@ -0,0 +1,117 @@
1
+ from __future__ import annotations
2
+
3
+ import datetime
4
+ import inspect
5
+ import logging
6
+ from typing import Optional, Type, TypeVar, Union
7
+
8
+ import pendulum
9
+
10
+ _T = TypeVar("_T")
11
+
12
+
13
+ # This is a copy of the airflow.utils.log.logging_mixin.LoggingMixin class
14
+ class LoggingMixin:
15
+ """Convenience super-class to have a logger configured with the class name."""
16
+
17
+ _log: Optional[logging.Logger] = None
18
+
19
+ # Parent logger used by this class. It should match one of the loggers defined in the
20
+ # `logging_config_class`. By default, this attribute is used to create the final name of the logger, and
21
+ # will prefix the `_logger_name` with a separating dot.
22
+ _log_config_logger_name: Optional[str] = None # noqa: UP007
23
+
24
+ _logger_name: Optional[str] = None # noqa: UP007
25
+
26
+ @staticmethod
27
+ def _create_logger_name(
28
+ logged_class: Type[_T],
29
+ log_config_logger_name: str = None,
30
+ class_logger_name: str = None,
31
+ ) -> str:
32
+ """Generate a logger name for the given `logged_class`.
33
+
34
+ By default, this function returns the `class_logger_name` as logger name. If it is not provided,
35
+ the {class.__module__}.{class.__name__} is returned instead. When a `parent_logger_name` is provided,
36
+ it will prefix the logger name with a separating dot.
37
+ """
38
+ logger_name: str = (
39
+ class_logger_name if class_logger_name is not None else f"{logged_class.__module__}.{logged_class.__name__}"
40
+ )
41
+
42
+ if log_config_logger_name:
43
+ return f"{log_config_logger_name}.{logger_name}" if logger_name else log_config_logger_name
44
+ return logger_name
45
+
46
+ @classmethod
47
+ def _get_log(cls, obj: Union["LoggingMixin", Type["LoggingMixin"]], clazz: Type[_T]) -> logging.Logger:
48
+ if obj._log is None:
49
+ logger_name: str = cls._create_logger_name(
50
+ logged_class=clazz,
51
+ log_config_logger_name=obj._log_config_logger_name,
52
+ class_logger_name=obj._logger_name,
53
+ )
54
+ obj._log = logging.getLogger(logger_name)
55
+ return obj._log
56
+
57
+ @classmethod
58
+ def logger(cls) -> logging.Logger:
59
+ """Return a logger."""
60
+ return LoggingMixin._get_log(cls, cls)
61
+
62
+ @property
63
+ def log(self) -> logging.Logger:
64
+ """Return a logger."""
65
+ return LoggingMixin._get_log(self, self.__class__)
66
+
67
+
68
+ class AwareFormatter(logging.Formatter):
69
+ _local_tz = pendulum.local_timezone()
70
+
71
+ def formatTime(self, record, datefmt=None):
72
+ # Use dateutil to get a timezone-aware datetime
73
+ dt = datetime.datetime.fromtimestamp(record.created, tz=self._local_tz)
74
+ if datefmt:
75
+ return dt.strftime(datefmt)
76
+ return dt.isoformat()
77
+
78
+
79
+ def init_logging(
80
+ level=logging.INFO,
81
+ fmt="%(asctime)s - %(levelname)s - %(name)s - %(filename)s:%(lineno)d - [%(process)d:%(threadName)s] - %(message)s",
82
+ ):
83
+ logging.basicConfig(level=level, format=fmt)
84
+
85
+ logging.getLogger("httpx").setLevel(logging.WARNING)
86
+
87
+
88
+ def setup_loguru():
89
+ class InterceptHandler(logging.Handler):
90
+ """Intercept standard logging messages and redirect them to loguru.""" ""
91
+
92
+ def emit(self, record):
93
+ from loguru import logger
94
+
95
+ level: str | int
96
+ try:
97
+ level = logger.level(record.levelname).name
98
+ except ValueError:
99
+ level = record.levelno
100
+
101
+ # Find caller from where originated the logged message.
102
+ frame, depth = inspect.currentframe(), 0
103
+ while frame and (depth == 0 or frame.f_code.co_filename == logging.__file__):
104
+ frame = frame.f_back
105
+ depth += 1
106
+
107
+ logger.opt(depth=depth, exception=record.exc_info).log(level, record.getMessage())
108
+
109
+ # intercept everything at the root logger
110
+ logging.root.handlers = [InterceptHandler()]
111
+ # logging.root.setLevel(logging.INFO)
112
+
113
+ # remove every other logger's handlers
114
+ # and propagate to root logger
115
+ for name in logging.root.manager.loggerDict.keys():
116
+ logging.getLogger(name).handlers = []
117
+ logging.getLogger(name).propagate = True
@@ -0,0 +1,153 @@
1
+ import logging
2
+ import sys
3
+ import threading
4
+ import traceback
5
+ from queue import Empty, Queue
6
+ from typing import Optional, Protocol, Type
7
+
8
+ from recurvedata.utils.log import AwareFormatter
9
+
10
+
11
+ class MessageHandler(Protocol):
12
+ def __call__(self, message: str) -> None:
13
+ ...
14
+
15
+
16
+ class OutputInterceptor:
17
+ def __init__(self, handler: MessageHandler, flush_interval_seconds: int = 5, batch_size: int = 10) -> None:
18
+ """
19
+ Initialize the OutputInterceptor object.
20
+
21
+ Args:
22
+ handler: The handler to call with processed messages.
23
+ flush_interval_seconds: Time interval (in seconds) between flushes.
24
+ batch_size: Number of messages to accumulate before triggering a flush.
25
+ """
26
+ self.handler = handler
27
+ self.flush_interval_seconds = flush_interval_seconds
28
+ self.batch_size = batch_size
29
+
30
+ self.queue: Queue[str] = Queue()
31
+ self._stop_event = threading.Event()
32
+ self._flusher_thread = threading.Thread(target=self._periodic_flush, daemon=True)
33
+ self._flusher_thread.start()
34
+
35
+ # Create a dedicated logger for internal use
36
+ self._logger = logging.getLogger(self.logger_name)
37
+
38
+ self._original_stdout = sys.stdout
39
+ self._original_stderr = sys.stderr
40
+
41
+ @property
42
+ def logger_name(self) -> str:
43
+ return f"{__name__}.{self.__class__.__name__}"
44
+
45
+ def __enter__(self) -> "OutputInterceptor":
46
+ """Support for context management, starts the interceptor."""
47
+ self._original_stdout = sys.stdout
48
+ self._original_stderr = sys.stderr
49
+ sys.stdout = self
50
+ sys.stderr = self
51
+ return self
52
+
53
+ def __exit__(
54
+ self,
55
+ exc_type: Optional[type],
56
+ exc_value: Optional[BaseException],
57
+ exc_traceback: Optional[BaseException],
58
+ ) -> None:
59
+ """Ensure all remaining data is flushed when the context exits."""
60
+ if exc_type is not None:
61
+ tb_lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
62
+ self._write("".join(tb_lines))
63
+
64
+ self.stop()
65
+
66
+ sys.stdout = self._original_stdout
67
+ sys.stderr = self._original_stderr
68
+
69
+ def write(self, s: str) -> None:
70
+ """
71
+ Write a string to the queue. This method is called when sys.stdout or sys.stderr is written to.
72
+ """
73
+ # Write to the original output stream
74
+ if sys.stdout is self:
75
+ self._original_stdout.write(s)
76
+ elif sys.stderr is self:
77
+ self._original_stderr.write(s)
78
+
79
+ self._write(s)
80
+
81
+ def flush(self) -> None:
82
+ """No-op flush method to maintain compatibility with sys.stdout and sys.stderr."""
83
+ pass
84
+
85
+ def write_log(self, s: str) -> None:
86
+ self._write(s + "\n")
87
+
88
+ def _write(self, s: str) -> None:
89
+ if isinstance(s, bytes):
90
+ # when an error raised, the last line of s could be bytes
91
+ s = s.decode("utf-8", errors="replace")
92
+ # if s.strip(): # Ignore empty lines
93
+ # self.queue.put(s)
94
+ self.queue.put(s)
95
+
96
+ if self.queue.qsize() >= self.batch_size:
97
+ self.flush_messages()
98
+ elif self._stop_event.is_set():
99
+ # when Exception, because we Propagate the exception, the exception will write to stderr after __exit__
100
+ self.flush_messages()
101
+
102
+ def _periodic_flush(self) -> None:
103
+ """Periodically flush the queue and process each message using the processor."""
104
+ while not self._stop_event.is_set():
105
+ try:
106
+ self.flush_messages()
107
+ except Exception as e:
108
+ self._logger.error(f"Error during message flush: {e}", exc_info=True)
109
+ self._stop_event.wait(self.flush_interval_seconds)
110
+
111
+ def flush_messages(self) -> None:
112
+ """Flush all queued messages using the provided processor."""
113
+ messages = []
114
+ while True:
115
+ try:
116
+ messages.append(self.queue.get_nowait())
117
+ except Empty:
118
+ break
119
+ if messages:
120
+ try:
121
+ self.handler("".join(messages))
122
+ except Exception as e:
123
+ self._logger.error(f"Failed to process messages: {e}", exc_info=True)
124
+
125
+ def stop(self) -> None:
126
+ """Stop the periodic flush thread and ensure any remaining data is processed."""
127
+ self._stop_event.set()
128
+ self._flusher_thread.join()
129
+ self.flush_messages() # Ensure all remaining data is processed
130
+
131
+
132
+ class LoggingHandler(logging.Handler):
133
+ def __init__(self, interceptor: OutputInterceptor) -> None:
134
+ super().__init__()
135
+ self.interceptor = interceptor
136
+
137
+ def emit(self, record: logging.LogRecord) -> None:
138
+ # Filter out logs from the OutputInterceptor's own logger
139
+ if record.name == self.interceptor.logger_name:
140
+ return
141
+
142
+ log_entry = self.format(record)
143
+ self.interceptor.write_log(log_entry)
144
+
145
+
146
+ def setup_log_handler(
147
+ interceptor: OutputInterceptor,
148
+ fmt="[%(asctime)s] - %(levelname)s - %(name)s - %(filename)s:%(lineno)d - [%(process)d:%(threadName)s] - %(message)s",
149
+ formatter_cls: Type[logging.Formatter] = AwareFormatter,
150
+ ):
151
+ handler = LoggingHandler(interceptor)
152
+ handler.setFormatter(formatter_cls(fmt))
153
+ logging.getLogger().addHandler(handler)
@@ -0,0 +1,178 @@
1
+ import logging
2
+ import subprocess
3
+ import sys
4
+ import time
5
+ from multiprocessing import Process
6
+ from multiprocessing.queues import Queue
7
+ from queue import Empty, Full
8
+ from typing import Any, Optional, Union
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ def get_qsize(queue: Queue) -> Optional[int]:
14
+ if sys.platform.lower() == "darwin":
15
+ # queue.qsize() Raises NotImplementedError on Mac OSX because of broken sem_getvalue()
16
+ return None
17
+ return queue.qsize()
18
+
19
+
20
+ def safe_join_subprocesses(workers: list[Process], result_queue: Queue) -> list[Any]:
21
+ result = []
22
+ live_workers = list(workers)
23
+ while live_workers:
24
+ try:
25
+ while 1:
26
+ result.append(result_queue.get(False))
27
+ except Empty:
28
+ pass
29
+
30
+ time.sleep(0.5) # Give tasks a chance to put more data in
31
+ if not result_queue.empty():
32
+ continue
33
+ live_workers = [p for p in live_workers if p.is_alive()]
34
+ return result
35
+
36
+
37
+ def has_process_fail(workers: list[Process], log: bool = True) -> bool:
38
+ for p in workers:
39
+ if p.is_alive():
40
+ continue
41
+ if p.exitcode != 0:
42
+ if log:
43
+ logger.info(f"found process {p.pid} fail, exitcode {p.exitcode}")
44
+ return True
45
+ return False
46
+
47
+
48
+ def terminate_processes(workers: list[Process]):
49
+ for p in workers:
50
+ if p.is_alive():
51
+ logger.info(f"start terminate process {p.pid}")
52
+ p.terminate()
53
+ logger.info(f"finish terminate process {p.pid}")
54
+
55
+
56
+ def master_safe_put_queue(
57
+ queue: Queue,
58
+ obj: Any,
59
+ workers: list[Process],
60
+ block: bool = True,
61
+ timeout: Optional[int] = None,
62
+ ) -> Optional[bool]:
63
+ """
64
+ A scenario where queue.put is called involves the master putting data into the queue, while workers consume the data.
65
+ Under the default settings of timeout=None and block=True,
66
+ if queue.maxsize is small and workers encounter errors that prevent them from consuming data promptly,
67
+ this can cause the master to become stuck when calling queue.put.
68
+ This master_safe_put_queue function can address this issue.
69
+ When operating under timeout=None and block=True,
70
+ it uses a small timeout (10 seconds) and continuously attempts queue.put(timeout=10) in a loop.
71
+ If queue.put becomes stuck for 10 seconds, it raises a queue.Full error.
72
+
73
+ Args:
74
+ queue: multiprocessing.Queue
75
+ obj: the object to be placed into the queue
76
+ workers: subprocesses
77
+ block: whether to block when the queue has no free slots
78
+ timeout: the timeout for queue.put
79
+
80
+ Returns:
81
+ If there are any worker processes that have exited abnormally, it returns True,
82
+ indicating that an abnormal worker exit caused the master's queue.put to become stuck.
83
+ If all workers are functioning normally, it indicates that the workers are simply consuming data slowly,
84
+ and it will continue to call queue.put(timeout=10) in a loop.
85
+ In other cases, it behaves the same as queue.put.
86
+ """
87
+ if timeout is None and block:
88
+ while True:
89
+ try:
90
+ return queue.put(obj, timeout=10)
91
+ except Full:
92
+ if has_process_fail(workers):
93
+ return True
94
+ else:
95
+ return queue.put(obj, block=block, timeout=timeout)
96
+
97
+
98
+ def safe_join_subprocesses_early_stop(workers: list[Process], result_queue: Queue) -> tuple[list, bool]:
99
+ """
100
+ wait and read the sub workers' result from result_queue,
101
+ exit when
102
+ 1) one sub worker fail, or
103
+ 2) all sub workers success
104
+
105
+ Args:
106
+ workers: sub progresses
107
+ result_queue: queue which sub progresses put result into
108
+
109
+ Returns:
110
+ result got from sub workers, and early_stop flag
111
+ """
112
+ result = []
113
+ early_stop = False
114
+ live_workers = list(workers)
115
+ last_check_early_stop_time = time.time()
116
+ while live_workers:
117
+ try:
118
+ while 1:
119
+ result.append(result_queue.get(False))
120
+
121
+ if time.time() - last_check_early_stop_time > 10:
122
+ if has_process_fail(live_workers):
123
+ early_stop = True
124
+ return result, early_stop
125
+ last_check_early_stop_time = time.time()
126
+
127
+ except Empty:
128
+ pass
129
+
130
+ time.sleep(0.5) # Give tasks a chance to put more data in
131
+ if not result_queue.empty():
132
+ continue
133
+
134
+ if has_process_fail(live_workers):
135
+ early_stop = True
136
+ return result, early_stop
137
+ last_check_early_stop_time = time.time()
138
+ live_workers = [p for p in live_workers if p.is_alive()]
139
+ return result, early_stop
140
+
141
+
142
+ def run_subprocess(
143
+ cmd: Union[str, list],
144
+ return_output=False,
145
+ _logger: logging.Logger = logger,
146
+ **kwargs,
147
+ ) -> Optional[str]:
148
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True, **kwargs)
149
+ logger.info(f"started sub process: {cmd}, pid: {p.pid}")
150
+ lines: list[str] = []
151
+ for raw_line in iter(p.stdout.readline, ""):
152
+ line = raw_line.rstrip()
153
+ _logger.info(line)
154
+ if return_output:
155
+ lines.append(line)
156
+ p.wait()
157
+ logger.info("sub process exited with return code %s", p.returncode)
158
+ if p.returncode:
159
+ raise subprocess.CalledProcessError(p.returncode, p.args)
160
+ return "\n".join(lines)
161
+
162
+
163
+ def robust_run_subprocess(
164
+ cmd: Union[str, list],
165
+ _logger: logging.Logger = logger,
166
+ **kwargs,
167
+ ) -> tuple[str, int]:
168
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True, **kwargs)
169
+ logger.info(f"started sub process: {cmd}, pid: {p.pid}")
170
+ lines: list[str] = []
171
+ for raw_line in iter(p.stdout.readline, ""):
172
+ line = raw_line.rstrip()
173
+ _logger.info(line)
174
+ lines.append(line)
175
+ p.wait()
176
+ logger.info("sub process exited with return code %s", p.returncode)
177
+ output = "\n".join(lines)
178
+ return output, p.returncode