recurvedata-lib 0.1.487__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recurvedata-lib might be problematic. Click here for more details.

Files changed (333) hide show
  1. recurvedata/__init__.py +0 -0
  2. recurvedata/__version__.py +1 -0
  3. recurvedata/client/__init__.py +3 -0
  4. recurvedata/client/client.py +150 -0
  5. recurvedata/client/server_client.py +91 -0
  6. recurvedata/config.py +99 -0
  7. recurvedata/connectors/__init__.py +20 -0
  8. recurvedata/connectors/_register.py +46 -0
  9. recurvedata/connectors/base.py +111 -0
  10. recurvedata/connectors/config_schema.py +1575 -0
  11. recurvedata/connectors/connectors/__init__.py +0 -0
  12. recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
  13. recurvedata/connectors/connectors/auth.py +44 -0
  14. recurvedata/connectors/connectors/azure_blob.py +89 -0
  15. recurvedata/connectors/connectors/azure_synapse.py +79 -0
  16. recurvedata/connectors/connectors/bigquery.py +359 -0
  17. recurvedata/connectors/connectors/clickhouse.py +219 -0
  18. recurvedata/connectors/connectors/dingtalk.py +61 -0
  19. recurvedata/connectors/connectors/doris.py +215 -0
  20. recurvedata/connectors/connectors/es.py +62 -0
  21. recurvedata/connectors/connectors/feishu.py +65 -0
  22. recurvedata/connectors/connectors/ftp.py +50 -0
  23. recurvedata/connectors/connectors/generic.py +49 -0
  24. recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
  25. recurvedata/connectors/connectors/google_service_account.py +225 -0
  26. recurvedata/connectors/connectors/hive.py +207 -0
  27. recurvedata/connectors/connectors/impala.py +210 -0
  28. recurvedata/connectors/connectors/jenkins.py +51 -0
  29. recurvedata/connectors/connectors/mail.py +89 -0
  30. recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
  31. recurvedata/connectors/connectors/mongo.py +79 -0
  32. recurvedata/connectors/connectors/mssql.py +131 -0
  33. recurvedata/connectors/connectors/mysql.py +191 -0
  34. recurvedata/connectors/connectors/n8n.py +141 -0
  35. recurvedata/connectors/connectors/oss.py +74 -0
  36. recurvedata/connectors/connectors/owncloud.py +36 -0
  37. recurvedata/connectors/connectors/phoenix.py +36 -0
  38. recurvedata/connectors/connectors/postgres.py +230 -0
  39. recurvedata/connectors/connectors/python.py +50 -0
  40. recurvedata/connectors/connectors/redshift.py +187 -0
  41. recurvedata/connectors/connectors/s3.py +93 -0
  42. recurvedata/connectors/connectors/sftp.py +87 -0
  43. recurvedata/connectors/connectors/slack.py +35 -0
  44. recurvedata/connectors/connectors/spark.py +99 -0
  45. recurvedata/connectors/connectors/starrocks.py +175 -0
  46. recurvedata/connectors/connectors/tencent_cos.py +40 -0
  47. recurvedata/connectors/connectors/tidb.py +49 -0
  48. recurvedata/connectors/const.py +315 -0
  49. recurvedata/connectors/datasource.py +189 -0
  50. recurvedata/connectors/dbapi.py +469 -0
  51. recurvedata/connectors/fs.py +66 -0
  52. recurvedata/connectors/ftp.py +40 -0
  53. recurvedata/connectors/object_store.py +60 -0
  54. recurvedata/connectors/pigeon.py +172 -0
  55. recurvedata/connectors/proxy.py +104 -0
  56. recurvedata/connectors/service.py +223 -0
  57. recurvedata/connectors/utils.py +47 -0
  58. recurvedata/consts.py +49 -0
  59. recurvedata/core/__init__.py +0 -0
  60. recurvedata/core/config.py +46 -0
  61. recurvedata/core/configurable.py +27 -0
  62. recurvedata/core/consts.py +2 -0
  63. recurvedata/core/templating.py +206 -0
  64. recurvedata/core/tracing.py +223 -0
  65. recurvedata/core/transformer.py +186 -0
  66. recurvedata/core/translation.py +91 -0
  67. recurvedata/dbt/client.py +97 -0
  68. recurvedata/dbt/consts.py +99 -0
  69. recurvedata/dbt/cosmos_utils.py +275 -0
  70. recurvedata/dbt/error_codes.py +18 -0
  71. recurvedata/dbt/schemas.py +98 -0
  72. recurvedata/dbt/service.py +451 -0
  73. recurvedata/dbt/utils.py +246 -0
  74. recurvedata/error_codes.py +71 -0
  75. recurvedata/exceptions.py +72 -0
  76. recurvedata/executors/__init__.py +4 -0
  77. recurvedata/executors/cli/__init__.py +7 -0
  78. recurvedata/executors/cli/connector.py +117 -0
  79. recurvedata/executors/cli/dbt.py +118 -0
  80. recurvedata/executors/cli/main.py +82 -0
  81. recurvedata/executors/cli/parameters.py +18 -0
  82. recurvedata/executors/client.py +190 -0
  83. recurvedata/executors/consts.py +50 -0
  84. recurvedata/executors/debug_executor.py +100 -0
  85. recurvedata/executors/executor.py +300 -0
  86. recurvedata/executors/link_executor.py +189 -0
  87. recurvedata/executors/models.py +34 -0
  88. recurvedata/executors/schemas.py +222 -0
  89. recurvedata/executors/service/__init__.py +0 -0
  90. recurvedata/executors/service/connector.py +380 -0
  91. recurvedata/executors/utils.py +172 -0
  92. recurvedata/filestorage/__init__.py +11 -0
  93. recurvedata/filestorage/_factory.py +33 -0
  94. recurvedata/filestorage/backends/__init__.py +0 -0
  95. recurvedata/filestorage/backends/fsspec.py +45 -0
  96. recurvedata/filestorage/backends/local.py +67 -0
  97. recurvedata/filestorage/backends/oss.py +56 -0
  98. recurvedata/filestorage/interface.py +84 -0
  99. recurvedata/operators/__init__.py +10 -0
  100. recurvedata/operators/base.py +28 -0
  101. recurvedata/operators/config.py +21 -0
  102. recurvedata/operators/context.py +255 -0
  103. recurvedata/operators/dbt_operator/__init__.py +2 -0
  104. recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
  105. recurvedata/operators/dbt_operator/operator.py +353 -0
  106. recurvedata/operators/link_operator/__init__.py +1 -0
  107. recurvedata/operators/link_operator/operator.py +120 -0
  108. recurvedata/operators/models.py +55 -0
  109. recurvedata/operators/notify_operator/__init__.py +1 -0
  110. recurvedata/operators/notify_operator/operator.py +180 -0
  111. recurvedata/operators/operator.py +119 -0
  112. recurvedata/operators/python_operator/__init__.py +1 -0
  113. recurvedata/operators/python_operator/operator.py +132 -0
  114. recurvedata/operators/sensor_operator/__init__.py +1 -0
  115. recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
  116. recurvedata/operators/sensor_operator/operator.py +172 -0
  117. recurvedata/operators/spark_operator/__init__.py +1 -0
  118. recurvedata/operators/spark_operator/operator.py +200 -0
  119. recurvedata/operators/spark_operator/spark_sample.py +47 -0
  120. recurvedata/operators/sql_operator/__init__.py +1 -0
  121. recurvedata/operators/sql_operator/operator.py +90 -0
  122. recurvedata/operators/task.py +211 -0
  123. recurvedata/operators/transfer_operator/__init__.py +40 -0
  124. recurvedata/operators/transfer_operator/const.py +10 -0
  125. recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
  126. recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
  127. recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
  128. recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
  129. recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
  130. recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
  131. recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
  132. recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
  133. recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
  134. recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
  135. recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
  136. recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
  137. recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
  138. recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
  139. recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
  140. recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
  141. recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
  142. recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
  143. recurvedata/operators/transfer_operator/load_task_email.py +188 -0
  144. recurvedata/operators/transfer_operator/load_task_es.py +86 -0
  145. recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
  146. recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
  147. recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
  148. recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
  149. recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
  150. recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
  151. recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
  152. recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
  153. recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
  154. recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
  155. recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
  156. recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
  157. recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
  158. recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
  159. recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
  160. recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
  161. recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
  162. recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
  163. recurvedata/operators/transfer_operator/mixin.py +31 -0
  164. recurvedata/operators/transfer_operator/operator.py +231 -0
  165. recurvedata/operators/transfer_operator/task.py +223 -0
  166. recurvedata/operators/transfer_operator/utils.py +134 -0
  167. recurvedata/operators/ui.py +80 -0
  168. recurvedata/operators/utils/__init__.py +51 -0
  169. recurvedata/operators/utils/file_factory.py +150 -0
  170. recurvedata/operators/utils/fs.py +10 -0
  171. recurvedata/operators/utils/lineage.py +265 -0
  172. recurvedata/operators/web_init.py +15 -0
  173. recurvedata/pigeon/connector/__init__.py +294 -0
  174. recurvedata/pigeon/connector/_registry.py +17 -0
  175. recurvedata/pigeon/connector/aliyun_oss.py +80 -0
  176. recurvedata/pigeon/connector/awss3.py +123 -0
  177. recurvedata/pigeon/connector/azure_blob.py +176 -0
  178. recurvedata/pigeon/connector/azure_synapse.py +51 -0
  179. recurvedata/pigeon/connector/cass.py +151 -0
  180. recurvedata/pigeon/connector/clickhouse.py +403 -0
  181. recurvedata/pigeon/connector/clickhouse_native.py +351 -0
  182. recurvedata/pigeon/connector/dbapi.py +571 -0
  183. recurvedata/pigeon/connector/doris.py +166 -0
  184. recurvedata/pigeon/connector/es.py +176 -0
  185. recurvedata/pigeon/connector/feishu.py +1135 -0
  186. recurvedata/pigeon/connector/ftp.py +163 -0
  187. recurvedata/pigeon/connector/google_bigquery.py +283 -0
  188. recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
  189. recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
  190. recurvedata/pigeon/connector/hdfs.py +204 -0
  191. recurvedata/pigeon/connector/hive_impala.py +383 -0
  192. recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
  193. recurvedata/pigeon/connector/mongodb.py +56 -0
  194. recurvedata/pigeon/connector/mssql.py +467 -0
  195. recurvedata/pigeon/connector/mysql.py +175 -0
  196. recurvedata/pigeon/connector/owncloud.py +92 -0
  197. recurvedata/pigeon/connector/postgresql.py +267 -0
  198. recurvedata/pigeon/connector/power_bi.py +179 -0
  199. recurvedata/pigeon/connector/qcloud_cos.py +79 -0
  200. recurvedata/pigeon/connector/redshift.py +123 -0
  201. recurvedata/pigeon/connector/sftp.py +73 -0
  202. recurvedata/pigeon/connector/sqlite.py +42 -0
  203. recurvedata/pigeon/connector/starrocks.py +144 -0
  204. recurvedata/pigeon/connector/tableau.py +162 -0
  205. recurvedata/pigeon/const.py +21 -0
  206. recurvedata/pigeon/csv.py +172 -0
  207. recurvedata/pigeon/docs/datasources-example.json +82 -0
  208. recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
  209. recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
  210. recurvedata/pigeon/dumper/__init__.py +171 -0
  211. recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
  212. recurvedata/pigeon/dumper/base.py +141 -0
  213. recurvedata/pigeon/dumper/cass.py +213 -0
  214. recurvedata/pigeon/dumper/dbapi.py +346 -0
  215. recurvedata/pigeon/dumper/es.py +112 -0
  216. recurvedata/pigeon/dumper/ftp.py +64 -0
  217. recurvedata/pigeon/dumper/mongodb.py +103 -0
  218. recurvedata/pigeon/handler/__init__.py +4 -0
  219. recurvedata/pigeon/handler/base.py +153 -0
  220. recurvedata/pigeon/handler/csv_handler.py +290 -0
  221. recurvedata/pigeon/loader/__init__.py +87 -0
  222. recurvedata/pigeon/loader/base.py +83 -0
  223. recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
  224. recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
  225. recurvedata/pigeon/loader/csv_to_doris.py +215 -0
  226. recurvedata/pigeon/loader/csv_to_es.py +51 -0
  227. recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
  228. recurvedata/pigeon/loader/csv_to_hive.py +468 -0
  229. recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
  230. recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
  231. recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
  232. recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
  233. recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
  234. recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
  235. recurvedata/pigeon/meta.py +116 -0
  236. recurvedata/pigeon/row_factory.py +42 -0
  237. recurvedata/pigeon/schema/__init__.py +124 -0
  238. recurvedata/pigeon/schema/types.py +13 -0
  239. recurvedata/pigeon/sync.py +283 -0
  240. recurvedata/pigeon/transformer.py +146 -0
  241. recurvedata/pigeon/utils/__init__.py +134 -0
  242. recurvedata/pigeon/utils/bloomfilter.py +181 -0
  243. recurvedata/pigeon/utils/date_time.py +323 -0
  244. recurvedata/pigeon/utils/escape.py +15 -0
  245. recurvedata/pigeon/utils/fs.py +266 -0
  246. recurvedata/pigeon/utils/json.py +44 -0
  247. recurvedata/pigeon/utils/keyed_tuple.py +85 -0
  248. recurvedata/pigeon/utils/mp.py +156 -0
  249. recurvedata/pigeon/utils/sql.py +328 -0
  250. recurvedata/pigeon/utils/timing.py +155 -0
  251. recurvedata/provider_manager.py +0 -0
  252. recurvedata/providers/__init__.py +0 -0
  253. recurvedata/providers/dbapi/__init__.py +0 -0
  254. recurvedata/providers/flywheel/__init__.py +0 -0
  255. recurvedata/providers/mysql/__init__.py +0 -0
  256. recurvedata/schedulers/__init__.py +1 -0
  257. recurvedata/schedulers/airflow.py +974 -0
  258. recurvedata/schedulers/airflow_db_process.py +331 -0
  259. recurvedata/schedulers/airflow_operators.py +61 -0
  260. recurvedata/schedulers/airflow_plugin.py +9 -0
  261. recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
  262. recurvedata/schedulers/base.py +99 -0
  263. recurvedata/schedulers/cli.py +228 -0
  264. recurvedata/schedulers/client.py +56 -0
  265. recurvedata/schedulers/consts.py +52 -0
  266. recurvedata/schedulers/debug_celery.py +62 -0
  267. recurvedata/schedulers/model.py +63 -0
  268. recurvedata/schedulers/schemas.py +97 -0
  269. recurvedata/schedulers/service.py +20 -0
  270. recurvedata/schedulers/system_dags.py +59 -0
  271. recurvedata/schedulers/task_status.py +279 -0
  272. recurvedata/schedulers/utils.py +73 -0
  273. recurvedata/schema/__init__.py +0 -0
  274. recurvedata/schema/field.py +88 -0
  275. recurvedata/schema/schema.py +55 -0
  276. recurvedata/schema/types.py +17 -0
  277. recurvedata/schema.py +0 -0
  278. recurvedata/server/__init__.py +0 -0
  279. recurvedata/server/app.py +7 -0
  280. recurvedata/server/connector/__init__.py +0 -0
  281. recurvedata/server/connector/api.py +79 -0
  282. recurvedata/server/connector/schemas.py +28 -0
  283. recurvedata/server/data_service/__init__.py +0 -0
  284. recurvedata/server/data_service/api.py +126 -0
  285. recurvedata/server/data_service/client.py +18 -0
  286. recurvedata/server/data_service/consts.py +1 -0
  287. recurvedata/server/data_service/schemas.py +68 -0
  288. recurvedata/server/data_service/service.py +218 -0
  289. recurvedata/server/dbt/__init__.py +0 -0
  290. recurvedata/server/dbt/api.py +116 -0
  291. recurvedata/server/error_code.py +49 -0
  292. recurvedata/server/exceptions.py +19 -0
  293. recurvedata/server/executor/__init__.py +0 -0
  294. recurvedata/server/executor/api.py +37 -0
  295. recurvedata/server/executor/schemas.py +30 -0
  296. recurvedata/server/executor/service.py +220 -0
  297. recurvedata/server/main.py +32 -0
  298. recurvedata/server/schedulers/__init__.py +0 -0
  299. recurvedata/server/schedulers/api.py +252 -0
  300. recurvedata/server/schedulers/schemas.py +50 -0
  301. recurvedata/server/schemas.py +50 -0
  302. recurvedata/utils/__init__.py +15 -0
  303. recurvedata/utils/_typer.py +61 -0
  304. recurvedata/utils/attrdict.py +19 -0
  305. recurvedata/utils/command_helper.py +20 -0
  306. recurvedata/utils/compat.py +12 -0
  307. recurvedata/utils/compression.py +203 -0
  308. recurvedata/utils/crontab.py +42 -0
  309. recurvedata/utils/crypto_util.py +305 -0
  310. recurvedata/utils/dataclass.py +11 -0
  311. recurvedata/utils/date_time.py +464 -0
  312. recurvedata/utils/dispatch.py +114 -0
  313. recurvedata/utils/email_util.py +104 -0
  314. recurvedata/utils/files.py +386 -0
  315. recurvedata/utils/helpers.py +170 -0
  316. recurvedata/utils/httputil.py +117 -0
  317. recurvedata/utils/imports.py +132 -0
  318. recurvedata/utils/json.py +80 -0
  319. recurvedata/utils/log.py +117 -0
  320. recurvedata/utils/log_capture.py +153 -0
  321. recurvedata/utils/mp.py +178 -0
  322. recurvedata/utils/normalizer.py +102 -0
  323. recurvedata/utils/redis_lock.py +474 -0
  324. recurvedata/utils/registry.py +54 -0
  325. recurvedata/utils/shell.py +15 -0
  326. recurvedata/utils/singleton.py +33 -0
  327. recurvedata/utils/sql.py +6 -0
  328. recurvedata/utils/timeout.py +28 -0
  329. recurvedata/utils/tracing.py +14 -0
  330. recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
  331. recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
  332. recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
  333. recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
@@ -0,0 +1,19 @@
1
+ from typing import Any, KeysView, Union
2
+
3
+
4
+ class AttrDict(dict):
5
+ """A dict that allows for attribute-style access."""
6
+
7
+ def __getattr__(self, item: str) -> Union[Any, "AttrDict"]:
8
+ if item not in self:
9
+ return None
10
+ value = self[item]
11
+ if isinstance(value, dict):
12
+ return AttrDict(value)
13
+ return value
14
+
15
+ def __setattr__(self, key: str, value: Any) -> None:
16
+ self[key] = value
17
+
18
+ def __dir__(self) -> KeysView[str]:
19
+ return self.keys()
@@ -0,0 +1,20 @@
1
+ import contextlib
2
+ import json
3
+ import os
4
+ import traceback
5
+
6
+
7
+ @contextlib.contextmanager
8
+ def capture_error_to_file(filename: str, suppress_error: bool = False):
9
+ if not filename:
10
+ yield
11
+ else:
12
+ try:
13
+ yield
14
+ except Exception as e:
15
+ error_info = {"traceback": traceback.format_exc(), "error_message": str(e), "is_success": False}
16
+ os.makedirs(os.path.dirname(filename), exist_ok=True)
17
+ with open(filename, "w") as f:
18
+ json.dump(error_info, f, indent=2)
19
+ if not suppress_error:
20
+ raise
@@ -0,0 +1,12 @@
1
+ from __future__ import annotations
2
+
3
+ import sys
4
+
5
+ PY38 = sys.version_info >= (3, 8)
6
+ PY39 = sys.version_info >= (3, 9)
7
+ PY310 = sys.version_info >= (3, 10)
8
+ PY311 = sys.version_info >= (3, 11)
9
+ PY312 = sys.version_info >= (3, 12)
10
+
11
+ is_osx = sys.platform == "darwin"
12
+ is_win = sys.platform == "win32"
@@ -0,0 +1,203 @@
1
+ import bz2
2
+ import gzip
3
+ import logging
4
+ import os
5
+ import shutil
6
+ import struct
7
+ import tarfile
8
+ import tempfile
9
+ import zipfile
10
+ import zlib
11
+ from typing import Callable, NamedTuple, Optional
12
+
13
+ from recurvedata.utils import files, shell
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class _Config(NamedTuple):
19
+ opener: Callable
20
+ ext: str
21
+ compress_cmd: str
22
+ decompress_cmd: str
23
+
24
+
25
+ _gzip_cfg = _Config(gzip.open, "gz", "gzip", "gzip -d")
26
+ _bzip2_cfg = _Config(bz2.open, "bz2", "bzip2", "bzip2 -d")
27
+
28
+
29
+ def gzip_compress(src_file: str, dst_file: str = None, using_cmd: bool = False, inplace: bool = False) -> str:
30
+ """Compress a file using gzip
31
+
32
+ Args:
33
+ src_file: the path of input file
34
+ dst_file: the path of output file, a temporary filename will be made otherwise
35
+ using_cmd: use the gzip command line instead of Python gzip to speedup
36
+ inplace: replace the src_file with dst_file if True
37
+
38
+ Returns:
39
+ the dst_file if inplace is False, otherwise the src_file
40
+ """
41
+ return _compress_file(_gzip_cfg, src_file, dst_file, using_cmd, inplace)
42
+
43
+
44
+ def gzip_decompress(src_file: str, dst_file: str = None, using_cmd: bool = False, inplace: bool = False) -> str:
45
+ """Decompress a .gz file
46
+
47
+ Args:
48
+ src_file: the path of input file
49
+ dst_file: the path of output file, a temporary filename will be made otherwise
50
+ using_cmd: use the gzip command line instead of Python gzip to speedup
51
+ inplace: replace the filename with target_filename if True
52
+
53
+ Returns:
54
+ the dst_file if inplace is False, otherwise the src_file
55
+ """
56
+ return _decompress_file(_gzip_cfg, src_file, dst_file, using_cmd, inplace)
57
+
58
+
59
+ def bzip2_compress(src_file: str, dst_file: str = None, using_cmd: bool = False, inplace: bool = False) -> str:
60
+ """Compress a file using bzip2
61
+
62
+ Args:
63
+ src_file: the path of input file
64
+ dst_file: the path of output file, a temporary filename will be made otherwise
65
+ using_cmd: use the bzip2 command line instead of Python bzip2 to speedup
66
+ inplace: replace the filename with target_filename if True
67
+
68
+ Returns:
69
+ the dst_file if inplace is False, otherwise the src_file
70
+ """
71
+ return _compress_file(_bzip2_cfg, src_file, dst_file, using_cmd, inplace)
72
+
73
+
74
+ def bzip2_decompress(src_file: str, dst_file: str = None, using_cmd: bool = False) -> str:
75
+ """Decompress a .bz2 file
76
+
77
+ Args:
78
+ src_file: the path of input file
79
+ dst_file: the path of output file, a temporary filename will be made otherwise
80
+ using_cmd: use the bzip2 command line instead of Python bzip2 to speedup
81
+
82
+ Returns:
83
+ the dst_file if inplace is False, otherwise the src_file
84
+ """
85
+ return _decompress_file(_bzip2_cfg, src_file, dst_file, using_cmd, False)
86
+
87
+
88
+ def _compress_file(
89
+ cfg: _Config, src_file: str, dst_file: str = None, using_cmd: bool = False, inplace: bool = False
90
+ ) -> str:
91
+ if dst_file is None:
92
+ dst_file = files.new_tempfile(suffix=f".{cfg.ext}")
93
+
94
+ if using_cmd:
95
+ shell.run(f"{cfg.compress_cmd} {src_file} -c > {dst_file}", logger)
96
+ else:
97
+ with open(src_file, "rb") as f_in, cfg.opener(dst_file, "wb") as f_out:
98
+ shutil.copyfileobj(f_in, f_out)
99
+
100
+ return files.replace_file_with_temp(dst_file, src_file, inplace)
101
+
102
+
103
+ def _decompress_file(
104
+ cfg: _Config, src_file: str, dst_file: str = None, using_cmd: bool = False, inplace: bool = False
105
+ ) -> str:
106
+ if dst_file is None:
107
+ dst_file = files.new_tempfile()
108
+
109
+ if using_cmd:
110
+ shell.run(f"{cfg.decompress_cmd} {src_file} -c > {dst_file}", logger)
111
+ else:
112
+ with cfg.opener(src_file, "rb") as f_in, open(dst_file, "wb") as f_out:
113
+ shutil.copyfileobj(f_in, f_out)
114
+
115
+ return files.replace_file_with_temp(dst_file, src_file, inplace)
116
+
117
+
118
+ def zip_compress(src_file: str, dst_file: str = None, using_cmd: bool = False, arcname: str = None) -> str:
119
+ """Compress a file using zip
120
+
121
+ Args:
122
+ src_file: the path of input file
123
+ dst_file: the path of output file, a temporary filename will be made otherwise
124
+ using_cmd: use the zip command line instead of Python ZipFile to speedup
125
+ arcname: filename in the archive file, only supported with using_cmd=False
126
+
127
+ Returns:
128
+ the target_filename
129
+ """
130
+ if dst_file is None:
131
+ dst_file = files.new_tempfile(suffix=".zip")
132
+
133
+ directory, basename = os.path.split(src_file.rstrip("/"))
134
+
135
+ if using_cmd:
136
+ # 先删除生成的临时文件,只使用生成的文件名,要不然会报错
137
+ # zip warning: missing end signature--probably not a zip file (did you
138
+ # zip warning: remember to use binary mode when you transferred it?)
139
+ # zip warning: (if you are trying to read a damaged archive try -F)
140
+ files.remove_files_safely(dst_file)
141
+ if arcname is not None:
142
+ logger.warning("arcname is not supported while using cmd")
143
+ shell.run(f"cd {directory} && zip -r {dst_file} {basename}", logger)
144
+ return dst_file
145
+
146
+ with zipfile.ZipFile(dst_file, "w", compression=zipfile.ZIP_DEFLATED) as zf:
147
+ zf.write(src_file, arcname=arcname or basename)
148
+ return dst_file
149
+
150
+
151
+ def zip_decompress(src_file: str, target_directory: str = None, using_cmd: bool = False) -> str:
152
+ """Decompress a .zip file into a directory
153
+
154
+ Args:
155
+ src_file: the path of input file
156
+ target_directory: the path of output directory, a temporary directory will be made otherwise
157
+ using_cmd: use the unzip command line instead of Python ZipFile to speedup
158
+
159
+ Returns:
160
+ the output directory
161
+ """
162
+ if not target_directory:
163
+ target_directory = tempfile.mkdtemp()
164
+
165
+ if using_cmd:
166
+ shell.run(f"unzip {src_file} -d {target_directory}", logger)
167
+ return target_directory
168
+
169
+ with zipfile.ZipFile(src_file, "r") as zf:
170
+ zf.extractall(target_directory)
171
+ return target_directory
172
+
173
+
174
+ def mysql_compress(value: bytes) -> Optional[bytes]:
175
+ """A Python implementation of COMPRESS function of MySQL."""
176
+ if value is None:
177
+ return None
178
+ if value == b"":
179
+ return b""
180
+ size: bytes = struct.pack("I", len(value))
181
+ data: bytes = zlib.compress(value)
182
+ return size + data
183
+
184
+
185
+ def mysql_uncompress(value: bytes) -> bytes:
186
+ """A Python implementation of UNCOMPRESS function of MySQL.
187
+
188
+ Used to decompress result of COMPRESS function.
189
+
190
+ https://dev.mysql.com/doc/refman/5.7/en/encryption-functions.html#function_compress
191
+ """
192
+ size_byte: int = 4
193
+ if not value or len(value) < size_byte:
194
+ return value
195
+
196
+ return zlib.decompress(value[size_byte:])
197
+
198
+
199
+ def tar_gzip_uncompress(tar_gz_path, extract_path):
200
+ logger.info(f"extract tar.gz {tar_gz_path} to {extract_path}")
201
+ os.makedirs(extract_path, exist_ok=True)
202
+ with tarfile.open(tar_gz_path, "r:gz") as tar:
203
+ tar.extractall(path=extract_path)
@@ -0,0 +1,42 @@
1
+ import datetime
2
+ from typing import Optional
3
+
4
+ import croniter
5
+
6
+ from recurvedata.utils.date_time import DATELIKE
7
+
8
+
9
+ def normalize_schedule_interval(schedule_interval: str) -> str:
10
+ mapping = {
11
+ "@once": None,
12
+ "@hourly": "0 * * * *",
13
+ "@daily": "0 0 * * *",
14
+ "@weekly": "0 0 * * 0",
15
+ "@monthly": "0 0 1 * *",
16
+ "@yearly": "0 0 1 1 *",
17
+ }
18
+ return mapping.get(schedule_interval, schedule_interval)
19
+
20
+
21
+ def _get_schedule(schedule_interval: str, dttm: DATELIKE, is_next: bool = False) -> Optional[DATELIKE]:
22
+ cron_spec = normalize_schedule_interval(schedule_interval)
23
+ if not cron_spec:
24
+ return None
25
+ start_time = dttm
26
+ cron = croniter.croniter(cron_spec, start_time)
27
+ if is_next:
28
+ value = cron.get_next(datetime.datetime)
29
+ else:
30
+ value = cron.get_prev(datetime.datetime)
31
+ return value
32
+
33
+
34
+ def next_schedule(schedule_interval: str, dttm: DATELIKE) -> DATELIKE:
35
+ return _get_schedule(schedule_interval, dttm, is_next=True)
36
+
37
+
38
+ def previous_schedule(schedule_interval: str, dttm: DATELIKE) -> DATELIKE:
39
+ return _get_schedule(schedule_interval, dttm, is_next=False)
40
+
41
+
42
+ get_schedule = _get_schedule
@@ -0,0 +1,305 @@
1
+ import base64
2
+ import hashlib
3
+ import json
4
+ import logging
5
+ import os
6
+ from enum import Enum
7
+ from functools import lru_cache
8
+ from typing import Literal, NamedTuple
9
+
10
+ from recurvedata.client.client import Client
11
+ from recurvedata.utils.imports import MockModule
12
+ from recurvedata.utils.registry import register_func
13
+
14
+ try:
15
+ from cryptography.hazmat.primitives import hashes, padding, serialization
16
+ from cryptography.hazmat.primitives.asymmetric import padding as asymmetric_padding
17
+ from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
18
+ except ImportError:
19
+ mock_module = MockModule("cryptography")
20
+ hashes = mock_module
21
+ padding = mock_module
22
+ serialization = mock_module
23
+ asymmetric_padding = mock_module
24
+ Cipher = mock_module
25
+ algorithms = mock_module
26
+ modes = mock_module
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ class CryptoMethod(str, Enum):
32
+ AES_128 = "AES-128"
33
+ AES_256 = "AES-256"
34
+ RSA_2048 = "RSA-2048"
35
+ RSA_4096 = "RSA-4096"
36
+
37
+
38
+ class RSAKeyPair(NamedTuple):
39
+ public_key: bytes
40
+ private_key: bytes
41
+
42
+
43
+ class CryptoUtil:
44
+ """Utility class for encryption operations"""
45
+
46
+ @staticmethod
47
+ @lru_cache
48
+ def fetch_key_data(key_name: str) -> tuple[str, str]:
49
+ """Fetch key data from server using key name"""
50
+ client = Client()
51
+ response = client.request("GET", f"/api/executor/keys/{key_name}")
52
+ key_data = response["key_data"]
53
+ encryption_method = response["encryption_method"]
54
+ return key_data, encryption_method
55
+
56
+ @staticmethod
57
+ @lru_cache
58
+ def get_key_data(key_name: str) -> bytes | RSAKeyPair:
59
+ """Get and process key data from server using key name
60
+
61
+ Args:
62
+ key_name: Name of the key to fetch
63
+
64
+ Returns:
65
+ Processed key data based on encryption method from server
66
+ """
67
+ key_data, encryption_method = CryptoUtil.fetch_key_data(key_name)
68
+ return CryptoUtil._decode_key_data(key_data, encryption_method)
69
+
70
+ @staticmethod
71
+ def _decode_key_data(key_data: str, encryption_method: str) -> bytes | RSAKeyPair:
72
+ if encryption_method in (CryptoMethod.AES_128, CryptoMethod.AES_256):
73
+ try:
74
+ key = base64.urlsafe_b64decode(key_data)
75
+ except Exception as e:
76
+ raise ValueError("Invalid base64 encoding") from e
77
+ if len(key) not in (16, 24, 32):
78
+ raise ValueError("AES key must be 16, 24, or 32 bytes long")
79
+ return key
80
+ elif encryption_method in (CryptoMethod.RSA_2048, CryptoMethod.RSA_4096):
81
+ try:
82
+ key_dict = json.loads(key_data)
83
+ except Exception as e:
84
+ raise ValueError("Invalid RSA key data") from e
85
+ if not isinstance(key_dict, dict):
86
+ raise ValueError("Invalid RSA key data")
87
+ if "public_key" not in key_dict or "private_key" not in key_dict:
88
+ raise ValueError("Public key or private key is missing")
89
+ return RSAKeyPair(
90
+ public_key=key_dict["public_key"].encode("utf-8"),
91
+ private_key=key_dict["private_key"].encode("utf-8"),
92
+ )
93
+ else:
94
+ raise ValueError(f"Unsupported encryption method: {encryption_method}")
95
+
96
+ @staticmethod
97
+ def aes_encrypt_with_key(
98
+ key: bytes, data: str | bytes | None, mode: Literal["ECB", "CBC"] = "ECB", iv: str | bytes | None = None
99
+ ) -> bytes | None:
100
+ """Encrypt data using AES with provided key
101
+
102
+ Args:
103
+ key: AES key bytes
104
+ data: Data to encrypt
105
+ mode: AES mode ('ECB' or 'CBC')
106
+ iv: Initialization vector for CBC mode
107
+
108
+ Returns:
109
+ Encrypted bytes (with IV prepended for CBC mode)
110
+ """
111
+ if data is None:
112
+ return None
113
+
114
+ if isinstance(data, str):
115
+ data = data.encode("utf-8")
116
+
117
+ padder = padding.PKCS7(algorithms.AES.block_size).padder()
118
+ padded_data = padder.update(data) + padder.finalize()
119
+
120
+ if mode.upper() == "ECB":
121
+ cipher = Cipher(algorithms.AES(key), modes.ECB()).encryptor()
122
+ return cipher.update(padded_data) + cipher.finalize()
123
+ elif mode.upper() == "CBC":
124
+ if iv is None:
125
+ processed_iv = os.urandom(16)
126
+ else:
127
+ if isinstance(iv, str):
128
+ iv = iv.encode("utf-8")
129
+ # Hash the IV to get exactly 16 bytes
130
+ processed_iv = hashlib.md5(iv).digest()
131
+
132
+ cipher = Cipher(algorithms.AES(key), modes.CBC(processed_iv)).encryptor()
133
+ return processed_iv + cipher.update(padded_data) + cipher.finalize()
134
+ else:
135
+ raise ValueError(f"Unsupported AES mode: {mode}")
136
+
137
+ @staticmethod
138
+ def aes_decrypt_with_key(key: bytes, data: bytes | None, mode: Literal["ECB", "CBC"] = "ECB") -> bytes | None:
139
+ """Decrypt data using AES with provided key"""
140
+ if data is None:
141
+ return None
142
+
143
+ if mode.upper() == "ECB":
144
+ cipher = Cipher(algorithms.AES(key), modes.ECB()).decryptor()
145
+ padded_data = cipher.update(data) + cipher.finalize()
146
+ elif mode.upper() == "CBC":
147
+ iv = data[:16]
148
+ cipher = Cipher(algorithms.AES(key), modes.CBC(iv)).decryptor()
149
+ padded_data = cipher.update(data[16:]) + cipher.finalize()
150
+ else:
151
+ raise ValueError(f"Unsupported AES mode: {mode}")
152
+
153
+ unpadder = padding.PKCS7(algorithms.AES.block_size).unpadder()
154
+ return unpadder.update(padded_data) + unpadder.finalize()
155
+
156
+ @staticmethod
157
+ def aes_encrypt(
158
+ key_name: str, data: str | bytes | None, mode: Literal["ECB", "CBC"] = "ECB", iv: str | bytes | None = None
159
+ ) -> bytes | None:
160
+ """Encrypt data using AES with key from key store"""
161
+ key: bytes = CryptoUtil.get_key_data(key_name)
162
+ return CryptoUtil.aes_encrypt_with_key(key, data, mode, iv)
163
+
164
+ @staticmethod
165
+ def aes_decrypt(key_name: str, data: bytes | None, mode: Literal["ECB", "CBC"] = "ECB") -> bytes | None:
166
+ """Decrypt data using AES with key from key store"""
167
+ key: bytes = CryptoUtil.get_key_data(key_name)
168
+ return CryptoUtil.aes_decrypt_with_key(key, data, mode)
169
+
170
+ @staticmethod
171
+ def rsa_encrypt_with_key(public_key: bytes, data: str | bytes) -> bytes:
172
+ """Encrypt data using RSA with provided public key"""
173
+ public_key_obj = serialization.load_pem_public_key(public_key)
174
+ if isinstance(data, str):
175
+ data = data.encode("utf-8")
176
+
177
+ return public_key_obj.encrypt(
178
+ data,
179
+ asymmetric_padding.OAEP(
180
+ mgf=asymmetric_padding.MGF1(algorithm=hashes.SHA256()),
181
+ algorithm=hashes.SHA256(),
182
+ label=None,
183
+ ),
184
+ )
185
+
186
+ @staticmethod
187
+ def rsa_decrypt_with_key(private_key: bytes, data: bytes) -> bytes:
188
+ """Decrypt data using RSA with provided private key"""
189
+ private_key_obj = serialization.load_pem_private_key(private_key, password=None)
190
+ return private_key_obj.decrypt(
191
+ data,
192
+ asymmetric_padding.OAEP(
193
+ mgf=asymmetric_padding.MGF1(algorithm=hashes.SHA256()),
194
+ algorithm=hashes.SHA256(),
195
+ label=None,
196
+ ),
197
+ )
198
+
199
+ @staticmethod
200
+ def rsa_encrypt(key_name: str, data: str | bytes | None) -> bytes | None:
201
+ """Encrypt data using RSA with key from key store"""
202
+ if data is None:
203
+ return None
204
+ key_pair: RSAKeyPair = CryptoUtil.get_key_data(key_name)
205
+ return CryptoUtil.rsa_encrypt_with_key(key_pair.public_key, data)
206
+
207
+ @staticmethod
208
+ def rsa_decrypt(key_name: str, data: bytes | None) -> bytes | None:
209
+ """Decrypt data using RSA with key from key store"""
210
+ if data is None:
211
+ return None
212
+ key_pair: RSAKeyPair = CryptoUtil.get_key_data(key_name)
213
+ return CryptoUtil.rsa_decrypt_with_key(key_pair.private_key, data)
214
+
215
+ @staticmethod
216
+ def base64_encode(data: bytes | None) -> str | None:
217
+ """Convert bytes data to base64 encoded string.
218
+
219
+ Args:
220
+ data: The bytes data to encode. Can be None.
221
+
222
+ Returns:
223
+ Base64 encoded string if input is not None, otherwise None.
224
+ """
225
+ if data is None:
226
+ return None
227
+ return base64.b64encode(data).decode("utf-8")
228
+
229
+ @staticmethod
230
+ def base64_decode(data: str | None) -> bytes | None:
231
+ """Convert base64 encoded encrypted data back to bytes
232
+
233
+ Args:
234
+ data: Base64 encoded encrypted data string. Can be None.
235
+
236
+ Returns:
237
+ Decoded bytes data if input is not None, otherwise None.
238
+ """
239
+ if data is None:
240
+ return None
241
+ return base64.b64decode(data.encode("utf-8"))
242
+
243
+ @staticmethod
244
+ def md5(data: str | bytes | None) -> str | None:
245
+ """Calculate MD5 hash of data
246
+
247
+ Args:
248
+ data: Input data as string or bytes
249
+
250
+ Returns:
251
+ MD5 hash as hex string
252
+ """
253
+ return CryptoUtil._hash(data, "md5")
254
+
255
+ @staticmethod
256
+ def sha1(data: str | bytes | None) -> str | None:
257
+ """Calculate SHA1 hash of data
258
+
259
+ Args:
260
+ data: Input data as string or bytes
261
+
262
+ Returns:
263
+ SHA1 hash as hex string
264
+ """
265
+ return CryptoUtil._hash(data, "sha1")
266
+
267
+ @staticmethod
268
+ def sha256(data: str | bytes | None) -> str | None:
269
+ """Calculate SHA256 hash of data
270
+
271
+ Args:
272
+ data: Input data as string or bytes
273
+
274
+ Returns:
275
+ SHA256 hash as hex string
276
+ """
277
+ return CryptoUtil._hash(data, "sha256")
278
+
279
+ @staticmethod
280
+ def _hash(data: str | bytes | None, algorithm: Literal["md5", "sha1", "sha256"]) -> str | None:
281
+ if data is None:
282
+ return None
283
+ if isinstance(data, str):
284
+ data = data.encode("utf-8")
285
+ return hashlib.new(algorithm, data).hexdigest()
286
+
287
+
288
+ @register_func
289
+ def secret(key_name: str) -> str:
290
+ """Get secret from server using key name"""
291
+ try:
292
+ key_data, _ = CryptoUtil.fetch_key_data(key_name)
293
+ return key_data
294
+ except Exception as e:
295
+ logger.warning(f"Failed to get secret for key {key_name}: {e}")
296
+
297
+ return key_name
298
+
299
+
300
+ aes_encrypt = CryptoUtil.aes_encrypt
301
+ aes_decrypt = CryptoUtil.aes_decrypt
302
+ rsa_encrypt = CryptoUtil.rsa_encrypt
303
+ rsa_decrypt = CryptoUtil.rsa_decrypt
304
+ base64_encode = CryptoUtil.base64_encode
305
+ base64_decode = CryptoUtil.base64_decode
@@ -0,0 +1,11 @@
1
+ from dataclasses import fields
2
+ from typing import Any
3
+
4
+ from recurvedata.utils.helpers import extract_dict
5
+
6
+
7
+ def init_dataclass_from_dict(dc_cls, arg_dct: dict[str, Any], **kwargs):
8
+ field_names = [f.name for f in fields(dc_cls) if f.init]
9
+
10
+ valid_arg_dct = extract_dict(arg_dct, field_names)
11
+ return dc_cls(**valid_arg_dct, **kwargs)