recurvedata-lib 0.1.487__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recurvedata-lib might be problematic. Click here for more details.

Files changed (333) hide show
  1. recurvedata/__init__.py +0 -0
  2. recurvedata/__version__.py +1 -0
  3. recurvedata/client/__init__.py +3 -0
  4. recurvedata/client/client.py +150 -0
  5. recurvedata/client/server_client.py +91 -0
  6. recurvedata/config.py +99 -0
  7. recurvedata/connectors/__init__.py +20 -0
  8. recurvedata/connectors/_register.py +46 -0
  9. recurvedata/connectors/base.py +111 -0
  10. recurvedata/connectors/config_schema.py +1575 -0
  11. recurvedata/connectors/connectors/__init__.py +0 -0
  12. recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
  13. recurvedata/connectors/connectors/auth.py +44 -0
  14. recurvedata/connectors/connectors/azure_blob.py +89 -0
  15. recurvedata/connectors/connectors/azure_synapse.py +79 -0
  16. recurvedata/connectors/connectors/bigquery.py +359 -0
  17. recurvedata/connectors/connectors/clickhouse.py +219 -0
  18. recurvedata/connectors/connectors/dingtalk.py +61 -0
  19. recurvedata/connectors/connectors/doris.py +215 -0
  20. recurvedata/connectors/connectors/es.py +62 -0
  21. recurvedata/connectors/connectors/feishu.py +65 -0
  22. recurvedata/connectors/connectors/ftp.py +50 -0
  23. recurvedata/connectors/connectors/generic.py +49 -0
  24. recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
  25. recurvedata/connectors/connectors/google_service_account.py +225 -0
  26. recurvedata/connectors/connectors/hive.py +207 -0
  27. recurvedata/connectors/connectors/impala.py +210 -0
  28. recurvedata/connectors/connectors/jenkins.py +51 -0
  29. recurvedata/connectors/connectors/mail.py +89 -0
  30. recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
  31. recurvedata/connectors/connectors/mongo.py +79 -0
  32. recurvedata/connectors/connectors/mssql.py +131 -0
  33. recurvedata/connectors/connectors/mysql.py +191 -0
  34. recurvedata/connectors/connectors/n8n.py +141 -0
  35. recurvedata/connectors/connectors/oss.py +74 -0
  36. recurvedata/connectors/connectors/owncloud.py +36 -0
  37. recurvedata/connectors/connectors/phoenix.py +36 -0
  38. recurvedata/connectors/connectors/postgres.py +230 -0
  39. recurvedata/connectors/connectors/python.py +50 -0
  40. recurvedata/connectors/connectors/redshift.py +187 -0
  41. recurvedata/connectors/connectors/s3.py +93 -0
  42. recurvedata/connectors/connectors/sftp.py +87 -0
  43. recurvedata/connectors/connectors/slack.py +35 -0
  44. recurvedata/connectors/connectors/spark.py +99 -0
  45. recurvedata/connectors/connectors/starrocks.py +175 -0
  46. recurvedata/connectors/connectors/tencent_cos.py +40 -0
  47. recurvedata/connectors/connectors/tidb.py +49 -0
  48. recurvedata/connectors/const.py +315 -0
  49. recurvedata/connectors/datasource.py +189 -0
  50. recurvedata/connectors/dbapi.py +469 -0
  51. recurvedata/connectors/fs.py +66 -0
  52. recurvedata/connectors/ftp.py +40 -0
  53. recurvedata/connectors/object_store.py +60 -0
  54. recurvedata/connectors/pigeon.py +172 -0
  55. recurvedata/connectors/proxy.py +104 -0
  56. recurvedata/connectors/service.py +223 -0
  57. recurvedata/connectors/utils.py +47 -0
  58. recurvedata/consts.py +49 -0
  59. recurvedata/core/__init__.py +0 -0
  60. recurvedata/core/config.py +46 -0
  61. recurvedata/core/configurable.py +27 -0
  62. recurvedata/core/consts.py +2 -0
  63. recurvedata/core/templating.py +206 -0
  64. recurvedata/core/tracing.py +223 -0
  65. recurvedata/core/transformer.py +186 -0
  66. recurvedata/core/translation.py +91 -0
  67. recurvedata/dbt/client.py +97 -0
  68. recurvedata/dbt/consts.py +99 -0
  69. recurvedata/dbt/cosmos_utils.py +275 -0
  70. recurvedata/dbt/error_codes.py +18 -0
  71. recurvedata/dbt/schemas.py +98 -0
  72. recurvedata/dbt/service.py +451 -0
  73. recurvedata/dbt/utils.py +246 -0
  74. recurvedata/error_codes.py +71 -0
  75. recurvedata/exceptions.py +72 -0
  76. recurvedata/executors/__init__.py +4 -0
  77. recurvedata/executors/cli/__init__.py +7 -0
  78. recurvedata/executors/cli/connector.py +117 -0
  79. recurvedata/executors/cli/dbt.py +118 -0
  80. recurvedata/executors/cli/main.py +82 -0
  81. recurvedata/executors/cli/parameters.py +18 -0
  82. recurvedata/executors/client.py +190 -0
  83. recurvedata/executors/consts.py +50 -0
  84. recurvedata/executors/debug_executor.py +100 -0
  85. recurvedata/executors/executor.py +300 -0
  86. recurvedata/executors/link_executor.py +189 -0
  87. recurvedata/executors/models.py +34 -0
  88. recurvedata/executors/schemas.py +222 -0
  89. recurvedata/executors/service/__init__.py +0 -0
  90. recurvedata/executors/service/connector.py +380 -0
  91. recurvedata/executors/utils.py +172 -0
  92. recurvedata/filestorage/__init__.py +11 -0
  93. recurvedata/filestorage/_factory.py +33 -0
  94. recurvedata/filestorage/backends/__init__.py +0 -0
  95. recurvedata/filestorage/backends/fsspec.py +45 -0
  96. recurvedata/filestorage/backends/local.py +67 -0
  97. recurvedata/filestorage/backends/oss.py +56 -0
  98. recurvedata/filestorage/interface.py +84 -0
  99. recurvedata/operators/__init__.py +10 -0
  100. recurvedata/operators/base.py +28 -0
  101. recurvedata/operators/config.py +21 -0
  102. recurvedata/operators/context.py +255 -0
  103. recurvedata/operators/dbt_operator/__init__.py +2 -0
  104. recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
  105. recurvedata/operators/dbt_operator/operator.py +353 -0
  106. recurvedata/operators/link_operator/__init__.py +1 -0
  107. recurvedata/operators/link_operator/operator.py +120 -0
  108. recurvedata/operators/models.py +55 -0
  109. recurvedata/operators/notify_operator/__init__.py +1 -0
  110. recurvedata/operators/notify_operator/operator.py +180 -0
  111. recurvedata/operators/operator.py +119 -0
  112. recurvedata/operators/python_operator/__init__.py +1 -0
  113. recurvedata/operators/python_operator/operator.py +132 -0
  114. recurvedata/operators/sensor_operator/__init__.py +1 -0
  115. recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
  116. recurvedata/operators/sensor_operator/operator.py +172 -0
  117. recurvedata/operators/spark_operator/__init__.py +1 -0
  118. recurvedata/operators/spark_operator/operator.py +200 -0
  119. recurvedata/operators/spark_operator/spark_sample.py +47 -0
  120. recurvedata/operators/sql_operator/__init__.py +1 -0
  121. recurvedata/operators/sql_operator/operator.py +90 -0
  122. recurvedata/operators/task.py +211 -0
  123. recurvedata/operators/transfer_operator/__init__.py +40 -0
  124. recurvedata/operators/transfer_operator/const.py +10 -0
  125. recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
  126. recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
  127. recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
  128. recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
  129. recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
  130. recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
  131. recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
  132. recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
  133. recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
  134. recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
  135. recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
  136. recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
  137. recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
  138. recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
  139. recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
  140. recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
  141. recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
  142. recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
  143. recurvedata/operators/transfer_operator/load_task_email.py +188 -0
  144. recurvedata/operators/transfer_operator/load_task_es.py +86 -0
  145. recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
  146. recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
  147. recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
  148. recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
  149. recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
  150. recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
  151. recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
  152. recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
  153. recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
  154. recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
  155. recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
  156. recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
  157. recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
  158. recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
  159. recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
  160. recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
  161. recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
  162. recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
  163. recurvedata/operators/transfer_operator/mixin.py +31 -0
  164. recurvedata/operators/transfer_operator/operator.py +231 -0
  165. recurvedata/operators/transfer_operator/task.py +223 -0
  166. recurvedata/operators/transfer_operator/utils.py +134 -0
  167. recurvedata/operators/ui.py +80 -0
  168. recurvedata/operators/utils/__init__.py +51 -0
  169. recurvedata/operators/utils/file_factory.py +150 -0
  170. recurvedata/operators/utils/fs.py +10 -0
  171. recurvedata/operators/utils/lineage.py +265 -0
  172. recurvedata/operators/web_init.py +15 -0
  173. recurvedata/pigeon/connector/__init__.py +294 -0
  174. recurvedata/pigeon/connector/_registry.py +17 -0
  175. recurvedata/pigeon/connector/aliyun_oss.py +80 -0
  176. recurvedata/pigeon/connector/awss3.py +123 -0
  177. recurvedata/pigeon/connector/azure_blob.py +176 -0
  178. recurvedata/pigeon/connector/azure_synapse.py +51 -0
  179. recurvedata/pigeon/connector/cass.py +151 -0
  180. recurvedata/pigeon/connector/clickhouse.py +403 -0
  181. recurvedata/pigeon/connector/clickhouse_native.py +351 -0
  182. recurvedata/pigeon/connector/dbapi.py +571 -0
  183. recurvedata/pigeon/connector/doris.py +166 -0
  184. recurvedata/pigeon/connector/es.py +176 -0
  185. recurvedata/pigeon/connector/feishu.py +1135 -0
  186. recurvedata/pigeon/connector/ftp.py +163 -0
  187. recurvedata/pigeon/connector/google_bigquery.py +283 -0
  188. recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
  189. recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
  190. recurvedata/pigeon/connector/hdfs.py +204 -0
  191. recurvedata/pigeon/connector/hive_impala.py +383 -0
  192. recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
  193. recurvedata/pigeon/connector/mongodb.py +56 -0
  194. recurvedata/pigeon/connector/mssql.py +467 -0
  195. recurvedata/pigeon/connector/mysql.py +175 -0
  196. recurvedata/pigeon/connector/owncloud.py +92 -0
  197. recurvedata/pigeon/connector/postgresql.py +267 -0
  198. recurvedata/pigeon/connector/power_bi.py +179 -0
  199. recurvedata/pigeon/connector/qcloud_cos.py +79 -0
  200. recurvedata/pigeon/connector/redshift.py +123 -0
  201. recurvedata/pigeon/connector/sftp.py +73 -0
  202. recurvedata/pigeon/connector/sqlite.py +42 -0
  203. recurvedata/pigeon/connector/starrocks.py +144 -0
  204. recurvedata/pigeon/connector/tableau.py +162 -0
  205. recurvedata/pigeon/const.py +21 -0
  206. recurvedata/pigeon/csv.py +172 -0
  207. recurvedata/pigeon/docs/datasources-example.json +82 -0
  208. recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
  209. recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
  210. recurvedata/pigeon/dumper/__init__.py +171 -0
  211. recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
  212. recurvedata/pigeon/dumper/base.py +141 -0
  213. recurvedata/pigeon/dumper/cass.py +213 -0
  214. recurvedata/pigeon/dumper/dbapi.py +346 -0
  215. recurvedata/pigeon/dumper/es.py +112 -0
  216. recurvedata/pigeon/dumper/ftp.py +64 -0
  217. recurvedata/pigeon/dumper/mongodb.py +103 -0
  218. recurvedata/pigeon/handler/__init__.py +4 -0
  219. recurvedata/pigeon/handler/base.py +153 -0
  220. recurvedata/pigeon/handler/csv_handler.py +290 -0
  221. recurvedata/pigeon/loader/__init__.py +87 -0
  222. recurvedata/pigeon/loader/base.py +83 -0
  223. recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
  224. recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
  225. recurvedata/pigeon/loader/csv_to_doris.py +215 -0
  226. recurvedata/pigeon/loader/csv_to_es.py +51 -0
  227. recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
  228. recurvedata/pigeon/loader/csv_to_hive.py +468 -0
  229. recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
  230. recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
  231. recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
  232. recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
  233. recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
  234. recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
  235. recurvedata/pigeon/meta.py +116 -0
  236. recurvedata/pigeon/row_factory.py +42 -0
  237. recurvedata/pigeon/schema/__init__.py +124 -0
  238. recurvedata/pigeon/schema/types.py +13 -0
  239. recurvedata/pigeon/sync.py +283 -0
  240. recurvedata/pigeon/transformer.py +146 -0
  241. recurvedata/pigeon/utils/__init__.py +134 -0
  242. recurvedata/pigeon/utils/bloomfilter.py +181 -0
  243. recurvedata/pigeon/utils/date_time.py +323 -0
  244. recurvedata/pigeon/utils/escape.py +15 -0
  245. recurvedata/pigeon/utils/fs.py +266 -0
  246. recurvedata/pigeon/utils/json.py +44 -0
  247. recurvedata/pigeon/utils/keyed_tuple.py +85 -0
  248. recurvedata/pigeon/utils/mp.py +156 -0
  249. recurvedata/pigeon/utils/sql.py +328 -0
  250. recurvedata/pigeon/utils/timing.py +155 -0
  251. recurvedata/provider_manager.py +0 -0
  252. recurvedata/providers/__init__.py +0 -0
  253. recurvedata/providers/dbapi/__init__.py +0 -0
  254. recurvedata/providers/flywheel/__init__.py +0 -0
  255. recurvedata/providers/mysql/__init__.py +0 -0
  256. recurvedata/schedulers/__init__.py +1 -0
  257. recurvedata/schedulers/airflow.py +974 -0
  258. recurvedata/schedulers/airflow_db_process.py +331 -0
  259. recurvedata/schedulers/airflow_operators.py +61 -0
  260. recurvedata/schedulers/airflow_plugin.py +9 -0
  261. recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
  262. recurvedata/schedulers/base.py +99 -0
  263. recurvedata/schedulers/cli.py +228 -0
  264. recurvedata/schedulers/client.py +56 -0
  265. recurvedata/schedulers/consts.py +52 -0
  266. recurvedata/schedulers/debug_celery.py +62 -0
  267. recurvedata/schedulers/model.py +63 -0
  268. recurvedata/schedulers/schemas.py +97 -0
  269. recurvedata/schedulers/service.py +20 -0
  270. recurvedata/schedulers/system_dags.py +59 -0
  271. recurvedata/schedulers/task_status.py +279 -0
  272. recurvedata/schedulers/utils.py +73 -0
  273. recurvedata/schema/__init__.py +0 -0
  274. recurvedata/schema/field.py +88 -0
  275. recurvedata/schema/schema.py +55 -0
  276. recurvedata/schema/types.py +17 -0
  277. recurvedata/schema.py +0 -0
  278. recurvedata/server/__init__.py +0 -0
  279. recurvedata/server/app.py +7 -0
  280. recurvedata/server/connector/__init__.py +0 -0
  281. recurvedata/server/connector/api.py +79 -0
  282. recurvedata/server/connector/schemas.py +28 -0
  283. recurvedata/server/data_service/__init__.py +0 -0
  284. recurvedata/server/data_service/api.py +126 -0
  285. recurvedata/server/data_service/client.py +18 -0
  286. recurvedata/server/data_service/consts.py +1 -0
  287. recurvedata/server/data_service/schemas.py +68 -0
  288. recurvedata/server/data_service/service.py +218 -0
  289. recurvedata/server/dbt/__init__.py +0 -0
  290. recurvedata/server/dbt/api.py +116 -0
  291. recurvedata/server/error_code.py +49 -0
  292. recurvedata/server/exceptions.py +19 -0
  293. recurvedata/server/executor/__init__.py +0 -0
  294. recurvedata/server/executor/api.py +37 -0
  295. recurvedata/server/executor/schemas.py +30 -0
  296. recurvedata/server/executor/service.py +220 -0
  297. recurvedata/server/main.py +32 -0
  298. recurvedata/server/schedulers/__init__.py +0 -0
  299. recurvedata/server/schedulers/api.py +252 -0
  300. recurvedata/server/schedulers/schemas.py +50 -0
  301. recurvedata/server/schemas.py +50 -0
  302. recurvedata/utils/__init__.py +15 -0
  303. recurvedata/utils/_typer.py +61 -0
  304. recurvedata/utils/attrdict.py +19 -0
  305. recurvedata/utils/command_helper.py +20 -0
  306. recurvedata/utils/compat.py +12 -0
  307. recurvedata/utils/compression.py +203 -0
  308. recurvedata/utils/crontab.py +42 -0
  309. recurvedata/utils/crypto_util.py +305 -0
  310. recurvedata/utils/dataclass.py +11 -0
  311. recurvedata/utils/date_time.py +464 -0
  312. recurvedata/utils/dispatch.py +114 -0
  313. recurvedata/utils/email_util.py +104 -0
  314. recurvedata/utils/files.py +386 -0
  315. recurvedata/utils/helpers.py +170 -0
  316. recurvedata/utils/httputil.py +117 -0
  317. recurvedata/utils/imports.py +132 -0
  318. recurvedata/utils/json.py +80 -0
  319. recurvedata/utils/log.py +117 -0
  320. recurvedata/utils/log_capture.py +153 -0
  321. recurvedata/utils/mp.py +178 -0
  322. recurvedata/utils/normalizer.py +102 -0
  323. recurvedata/utils/redis_lock.py +474 -0
  324. recurvedata/utils/registry.py +54 -0
  325. recurvedata/utils/shell.py +15 -0
  326. recurvedata/utils/singleton.py +33 -0
  327. recurvedata/utils/sql.py +6 -0
  328. recurvedata/utils/timeout.py +28 -0
  329. recurvedata/utils/tracing.py +14 -0
  330. recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
  331. recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
  332. recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
  333. recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
@@ -0,0 +1,102 @@
1
+ class ColumnTypeNormalizer:
2
+ """
3
+ A class to normalize database column types to a standard set of types.
4
+
5
+ Attributes:
6
+ - database (str): The name of the database for which normalization is needed.
7
+ - _normalized_types (dict[str, list[str]]): A dictionary mapping normalized types to their corresponding database-specific types.
8
+
9
+ Usage examples:
10
+ >>> mysql_normalizer = ColumnTypeNormalizer(database='mysql')
11
+ >>> mysql_normalizer.normalize('varchar')
12
+ "string"
13
+ >>> mysql_normalizer.normalize('tinyint(1)')
14
+ "boolean"
15
+ >>> postgres_normalizer = ColumnTypeNormalizer(database='postgresql')
16
+ >>> postgres_normalizer.normalize('int4')
17
+ "integer"
18
+ >>> postgres_normalizer.normalize('jsonb')
19
+ "json"
20
+ >>> snowflake_normalizer = ColumnTypeNormalizer(database='snowflake')
21
+ >>> snowflake_normalizer.normalize('timestamp_ntz')
22
+ "datetime"
23
+ >>> snowflake_normalizer.normalize('variant')
24
+ "json"
25
+ """
26
+
27
+ _COMMON_NORMALIZED_TYPES: dict[str, list[str]] = {
28
+ "integer": [
29
+ "int",
30
+ "integer",
31
+ "smallint",
32
+ "bigint",
33
+ "tinyint",
34
+ "int2",
35
+ "int4",
36
+ "int8",
37
+ "int16",
38
+ "int32",
39
+ "int64",
40
+ ],
41
+ "float": ["float", "double", "real", "decimal", "numeric"],
42
+ "string": ["varchar", "char", "text", "character varying", "nvarchar", "nchar", "clob", "string"],
43
+ "boolean": ["bool", "boolean"],
44
+ "date": ["date"],
45
+ "datetime": ["datetime", "timestamp", "timestamp with time zone", "timestamp without time zone"],
46
+ "time": ["time"],
47
+ "binary": ["binary", "blob", "varbinary"],
48
+ "json": ["json", "jsonb", "object", "variant", "array"],
49
+ }
50
+
51
+ def __init__(self, database: str, custom_mappings: dict[str, list[str]] | None = None):
52
+ """
53
+ Initializes the normalizer for a specific database with optional custom mappings.
54
+
55
+ Args:
56
+ - database: The name of the database for which normalization is needed.
57
+ - custom_mappings: A dictionary mapping database-specific types to normalized types.
58
+ """
59
+ self.database = database.lower()
60
+ # Convert common types and their mappings to lowercase
61
+ self._normalized_types = {
62
+ k.lower(): [v.lower() for v in values] for k, values in self._COMMON_NORMALIZED_TYPES.items()
63
+ }
64
+
65
+ # Convert custom mappings to lowercase, if provided
66
+ if custom_mappings:
67
+ for key, values in custom_mappings.items():
68
+ key = key.lower()
69
+ values = [v.lower() for v in values]
70
+ if key in self._normalized_types:
71
+ self._normalized_types[key].extend(values)
72
+ else:
73
+ self._normalized_types[key] = values
74
+
75
+ def normalize(self, column_type: str) -> str:
76
+ """
77
+ Normalizes a given database column type to a standard type.
78
+
79
+ Args:
80
+ - column_type: The database column type to normalize.
81
+
82
+ Returns:
83
+
84
+ Usage example:
85
+ >>> normalizer = ColumnTypeNormalizer('postgresql')
86
+ >>> normalizer.normalize('int4')
87
+ "integer"
88
+ """
89
+ column_type = column_type.lower()
90
+
91
+ for normalized, column_types in self._normalized_types.items():
92
+ for _type in column_types:
93
+ if _type.startswith(column_type):
94
+ return normalized
95
+
96
+ return column_type
97
+
98
+
99
+ if __name__ == "__main__":
100
+ import doctest
101
+
102
+ doctest.testmod()
@@ -0,0 +1,474 @@
1
+ import asyncio
2
+ import threading
3
+ import time
4
+ from typing import Optional
5
+
6
+ import redis
7
+ from redis.asyncio import Redis as AsyncRedis
8
+ from redis.asyncio.lock import Lock as NativeAsyncRedisLock
9
+ from redis.client import Lock as NativeRedisLock
10
+ from redis.client import Redis
11
+ from redis.exceptions import LockError
12
+
13
+ from recurvedata.config import REDIS_LOCK_URL
14
+
15
+
16
+ class RedisLock:
17
+ """
18
+ Redis-based distributed lock implementation using redis.lock
19
+
20
+ Usage:
21
+ ```
22
+ with RedisLock("my_lock_name", expire=60) as lock:
23
+ if lock.acquired:
24
+ # Execute code that needs lock protection
25
+ pass
26
+ else:
27
+ # Failed to acquire lock
28
+ pass
29
+ ```
30
+
31
+ Or manage manually:
32
+ ```
33
+ lock = RedisLock("my_lock_name")
34
+ if lock.acquire():
35
+ try:
36
+ # Execute code that needs lock protection
37
+ pass
38
+ finally:
39
+ lock.release()
40
+ ```
41
+
42
+ You can also extend the lock expiration time:
43
+ ```
44
+ lock = RedisLock("my_lock_name", expire=60)
45
+ if lock.acquire():
46
+ try:
47
+ # Start some long operation
48
+ # ...
49
+ # Extend the lock if operation takes longer than expected
50
+ lock.extend(additional_time=60)
51
+ # Continue operation
52
+ # ...
53
+ finally:
54
+ lock.release()
55
+ ```
56
+
57
+ For long-running operations with unknown duration, you can use auto-extend:
58
+ ```
59
+ with RedisLock("my_lock_name", expire=60, auto_extend=True) as lock:
60
+ if lock.acquired:
61
+ # Execute long-running code, lock will be automatically extended
62
+ # until the operation completes or the lock is released
63
+ pass
64
+ ```
65
+ """
66
+
67
+ def __init__(
68
+ self,
69
+ name: str,
70
+ expire: int = 60,
71
+ timeout: int = 0,
72
+ sleep_interval: float = 0.1,
73
+ redis_client: Optional[Redis] = None,
74
+ redis_url: Optional[str] = REDIS_LOCK_URL,
75
+ auto_extend: bool = False,
76
+ extend_interval: Optional[int] = None,
77
+ ):
78
+ """
79
+ Initialize Redis lock
80
+
81
+ Args:
82
+ name: Lock name, must be unique
83
+ expire: Lock expiration time (seconds), prevents deadlock
84
+ timeout: Lock acquisition timeout (seconds), 0 means try only once
85
+ sleep_interval: Sleep interval (seconds)
86
+ redis_client: Optional Redis client, if not provided will create from REDIS_URL
87
+ auto_extend: Whether to automatically extend the lock while it's held
88
+ extend_interval: Interval (seconds) to extend the lock, defaults to expire/3
89
+ """
90
+ self.name = f"recurve:lock:{name}"
91
+ self.expire = expire
92
+ self.timeout = timeout
93
+ self.sleep_interval = sleep_interval
94
+ self.acquired = False
95
+ self.auto_extend = auto_extend
96
+ self.extend_interval = extend_interval or max(1, int(expire / 3))
97
+ self._extend_thread = None
98
+ self._stop_extend = threading.Event()
99
+ self._lock_token = None # Store the lock token for cross-thread access
100
+
101
+ if redis_client is not None:
102
+ self.redis = redis_client
103
+ else:
104
+ self.redis = redis.from_url(redis_url)
105
+
106
+ # Create the native Redis lock
107
+ self.lock: NativeRedisLock = self.redis.lock(self.name, timeout=self.expire, sleep=self.sleep_interval)
108
+
109
+ def _extend_lock_periodically(self):
110
+ """Background thread that periodically extends the lock"""
111
+ while not self._stop_extend.is_set():
112
+ # Sleep for the extend interval
113
+ for _ in range(int(self.extend_interval / self.sleep_interval)): # Check stop flag more frequently
114
+ if self._stop_extend.is_set():
115
+ return
116
+ time.sleep(self.sleep_interval)
117
+
118
+ # Extend the lock if we still have it
119
+ if self.acquired:
120
+ try:
121
+ # Directly extend the lock using Redis commands instead of the lock.extend method
122
+ # This avoids the thread-local token issue
123
+ success = self._extend_lock_directly()
124
+ if not success:
125
+ # If extension fails, stop the thread
126
+ self._stop_extend.set()
127
+ except Exception:
128
+ # If any exception occurs, stop the thread
129
+ self._stop_extend.set()
130
+
131
+ def _extend_lock_directly(self) -> bool:
132
+ """
133
+ Extend the lock directly using Redis commands
134
+
135
+ Returns:
136
+ bool: Whether successfully extended the lock
137
+ """
138
+ if not self.acquired or not self._lock_token:
139
+ return False
140
+
141
+ try:
142
+ # Use Redis PEXPIRE command to extend the lock if the token matches
143
+ script = """
144
+ if redis.call('get', KEYS[1]) == ARGV[1] then
145
+ return redis.call('pexpire', KEYS[1], ARGV[2])
146
+ else
147
+ return 0
148
+ end
149
+ """
150
+ # Convert seconds to milliseconds for pexpire
151
+ extend_time_ms = int(self.expire * 1000)
152
+ result = self.redis.eval(script, 1, self.name, self._lock_token, extend_time_ms)
153
+ return bool(result)
154
+ except Exception:
155
+ return False
156
+
157
+ def _start_extend_thread(self):
158
+ """Start the background thread to extend the lock periodically"""
159
+ if not self._extend_thread:
160
+ self._stop_extend.clear()
161
+ self._extend_thread = threading.Thread(
162
+ target=self._extend_lock_periodically, daemon=True # Make it a daemon so it doesn't block program exit
163
+ )
164
+ self._extend_thread.start()
165
+
166
+ def _stop_extend_thread(self):
167
+ """Stop the background thread that extends the lock"""
168
+ if self._extend_thread:
169
+ self._stop_extend.set()
170
+ if self._extend_thread.is_alive():
171
+ self._extend_thread.join(timeout=1.0) # Wait for thread to finish
172
+ self._extend_thread = None
173
+
174
+ def acquire(self) -> bool:
175
+ """
176
+ Try to acquire the lock
177
+
178
+ Returns:
179
+ bool: Whether successfully acquired the lock
180
+ """
181
+ if self.timeout > 0:
182
+ # With timeout
183
+ try:
184
+ self.acquired = self.lock.acquire(blocking=True, blocking_timeout=self.timeout)
185
+ except redis.exceptions.LockError:
186
+ self.acquired = False
187
+ else:
188
+ # Without timeout (try once)
189
+ try:
190
+ self.acquired = self.lock.acquire(blocking=False)
191
+ except redis.exceptions.LockError:
192
+ self.acquired = False
193
+
194
+ # Store the lock token for cross-thread access if acquired
195
+ if self.acquired:
196
+ # Access the thread-local token from the lock
197
+ self._lock_token = self.lock.local.token
198
+
199
+ # If auto_extend is enabled, start the extend thread
200
+ if self.auto_extend:
201
+ self._start_extend_thread()
202
+
203
+ return self.acquired
204
+
205
+ def release(self) -> bool:
206
+ """
207
+ Release the lock
208
+
209
+ Returns:
210
+ bool: Whether successfully released the lock
211
+ """
212
+ if not self.acquired:
213
+ return False
214
+
215
+ # Stop the extend thread if it's running
216
+ self._stop_extend_thread()
217
+
218
+ try:
219
+ self.lock.release()
220
+ self.acquired = False
221
+ self._lock_token = None # Clear the token
222
+ return True
223
+ except redis.exceptions.LockError:
224
+ return False
225
+
226
+ def extend(self, additional_time: Optional[int] = None) -> bool:
227
+ """
228
+ Extend the lock's expiration time
229
+
230
+ Args:
231
+ additional_time: Additional seconds to extend the lock.
232
+ If None, uses the original expire time.
233
+
234
+ Returns:
235
+ bool: Whether successfully extended the lock
236
+ """
237
+ if not self.acquired:
238
+ return False
239
+
240
+ try:
241
+ # If additional_time is not provided, use the original expire time
242
+ extend_time = additional_time if additional_time is not None else self.expire
243
+
244
+ # Try to use the lock's extend method first
245
+ try:
246
+ success = self.lock.extend(additional_time=extend_time)
247
+ return success
248
+ except AttributeError:
249
+ # Fallback for older redis-py versions that don't support extend
250
+ # or if we're in a different thread
251
+ return self._extend_lock_directly()
252
+
253
+ except redis.exceptions.LockError:
254
+ return False
255
+
256
+ def __enter__(self) -> "RedisLock":
257
+ self.acquire()
258
+ return self
259
+
260
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None:
261
+ if self.acquired:
262
+ self.release()
263
+
264
+
265
+ class AsyncRedisLock:
266
+ """
267
+ Redis-based distributed lock implementation using redis.lock (async version)
268
+
269
+ Usage:
270
+ ```
271
+ async with AsyncRedisLock("my_lock_name", expire=60) as lock:
272
+ if lock.acquired:
273
+ # Execute code that needs lock protection
274
+ pass
275
+ else:
276
+ # Failed to acquire lock
277
+ pass
278
+ ```
279
+
280
+ Or manage manually:
281
+ ```
282
+ lock = AsyncRedisLock("my_lock_name")
283
+ if await lock.acquire():
284
+ try:
285
+ # Execute code that needs lock protection
286
+ pass
287
+ finally:
288
+ await lock.release()
289
+ ```
290
+
291
+ You can also extend the lock expiration time:
292
+ ```
293
+ lock = AsyncRedisLock("my_lock_name", expire=60)
294
+ if await lock.acquire():
295
+ try:
296
+ # Start some long operation
297
+ # ...
298
+ # Extend the lock if operation takes longer than expected
299
+ await lock.extend(additional_time=60)
300
+ # Continue operation
301
+ # ...
302
+ finally:
303
+ await lock.release()
304
+ ```
305
+
306
+ For long-running operations with unknown duration, you can use auto-extend:
307
+ ```
308
+ async with AsyncRedisLock("my_lock_name", expire=60, auto_extend=True) as lock:
309
+ if lock.acquired:
310
+ # Execute long-running code, lock will be automatically extended
311
+ # until the operation completes or the lock is released
312
+ pass
313
+ ```
314
+ """
315
+
316
+ def __init__(
317
+ self,
318
+ name: str,
319
+ expire: int = 60,
320
+ timeout: int = 0,
321
+ sleep_interval: float = 0.1,
322
+ redis_client: Optional[AsyncRedis] = None,
323
+ redis_url: Optional[str] = REDIS_LOCK_URL,
324
+ auto_extend: bool = False,
325
+ extend_interval: Optional[int] = None,
326
+ ):
327
+ """
328
+ Initialize Redis lock
329
+
330
+ Args:
331
+ name: Lock name, must be unique
332
+ expire: Lock expiration time (seconds), prevents deadlock
333
+ timeout: Lock acquisition timeout (seconds), 0 means try only once
334
+ sleep_interval: Sleep interval (seconds)
335
+ redis_client: Optional Redis client, if not provided will create from REDIS_URL
336
+ auto_extend: Whether to automatically extend the lock while it's held
337
+ extend_interval: Interval (seconds) to extend the lock, defaults to expire/3
338
+ """
339
+ self.name = f"recurve:lock:{name}"
340
+ self.expire = expire
341
+ self.timeout = timeout
342
+ self.sleep_interval = sleep_interval
343
+ self.acquired = False
344
+ self.auto_extend = auto_extend
345
+ self.extend_interval = extend_interval or max(1, int(expire / 3))
346
+ self._extend_task = None
347
+ self._stop_extend = asyncio.Event()
348
+ self._lock_token = None
349
+
350
+ self.redis = redis_client if redis_client is not None else AsyncRedis.from_url(redis_url)
351
+ self.lock: NativeAsyncRedisLock = self.redis.lock(self.name, timeout=self.expire, sleep=self.sleep_interval)
352
+
353
+ async def _extend_lock_periodically(self):
354
+ """Background task that periodically extends the lock"""
355
+ while not self._stop_extend.is_set():
356
+ try:
357
+ await asyncio.sleep(self.extend_interval)
358
+ if self.acquired:
359
+ success = await self._extend_lock_directly()
360
+ if not success:
361
+ self._stop_extend.set()
362
+ except Exception:
363
+ self._stop_extend.set()
364
+
365
+ async def _extend_lock_directly(self) -> bool:
366
+ """
367
+ Extend the lock directly using Redis commands
368
+
369
+ Returns:
370
+ bool: Whether successfully extended the lock
371
+ """
372
+ if not self.acquired or not self._lock_token:
373
+ return False
374
+
375
+ try:
376
+ script = """
377
+ if redis.call('get', KEYS[1]) == ARGV[1] then
378
+ return redis.call('pexpire', KEYS[1], ARGV[2])
379
+ else
380
+ return 0
381
+ end
382
+ """
383
+ extend_time_ms = int(self.expire * 1000)
384
+ result = await self.redis.eval(script, 1, self.name, self._lock_token, extend_time_ms)
385
+ return bool(result)
386
+ except Exception:
387
+ return False
388
+
389
+ def _start_extend_task(self):
390
+ """Start the background task to extend the lock periodically"""
391
+ if not self._extend_task:
392
+ self._stop_extend.clear()
393
+ self._extend_task = asyncio.create_task(self._extend_lock_periodically())
394
+
395
+ async def _stop_extend_task(self):
396
+ """Stop the background task that extends the lock"""
397
+ if self._extend_task:
398
+ self._stop_extend.set()
399
+ await self._extend_task
400
+ self._extend_task = None
401
+
402
+ async def acquire(self) -> bool:
403
+ """
404
+ Try to acquire the lock
405
+
406
+ Returns:
407
+ bool: Whether successfully acquired the lock
408
+ """
409
+ try:
410
+ if self.timeout > 0:
411
+ self.acquired = await self.lock.acquire(blocking=True, blocking_timeout=self.timeout)
412
+ else:
413
+ self.acquired = await self.lock.acquire(blocking=False)
414
+
415
+ if self.acquired:
416
+ self._lock_token = self.lock.local.token
417
+ if self.auto_extend:
418
+ self._start_extend_task()
419
+
420
+ return self.acquired
421
+ except LockError:
422
+ self.acquired = False
423
+ return False
424
+
425
+ async def release(self) -> bool:
426
+ """
427
+ Release the lock
428
+
429
+ Returns:
430
+ bool: Whether successfully released the lock
431
+ """
432
+ if not self.acquired:
433
+ return False
434
+
435
+ await self._stop_extend_task()
436
+
437
+ try:
438
+ await self.lock.release()
439
+ self.acquired = False
440
+ self._lock_token = None
441
+ return True
442
+ except LockError:
443
+ return False
444
+
445
+ async def extend(self, additional_time: Optional[int] = None) -> bool:
446
+ """
447
+ Extend the lock's expiration time
448
+
449
+ Args:
450
+ additional_time: Additional seconds to extend the lock.
451
+ If None, uses the original expire time.
452
+
453
+ Returns:
454
+ bool: Whether successfully extended the lock
455
+ """
456
+ if not self.acquired:
457
+ return False
458
+
459
+ try:
460
+ extend_time = additional_time if additional_time is not None else self.expire
461
+ try:
462
+ return await self.lock.extend(additional_time=extend_time)
463
+ except AttributeError:
464
+ return await self._extend_lock_directly()
465
+ except LockError:
466
+ return False
467
+
468
+ async def __aenter__(self) -> "AsyncRedisLock":
469
+ await self.acquire()
470
+ return self
471
+
472
+ async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
473
+ if self.acquired:
474
+ await self.release()
@@ -0,0 +1,54 @@
1
+ from typing import Callable, Generic, ItemsView, Iterable, KeysView, TypeVar, Union, ValuesView
2
+
3
+ _KT = TypeVar("_KT")
4
+ _VT = TypeVar("_VT")
5
+
6
+
7
+ class GenericRegistry(Generic[_KT, _VT]):
8
+ def __init__(self):
9
+ self._registry: dict[_KT, _VT] = {}
10
+
11
+ def add(self, *keys: _KT) -> Callable[[_VT], _VT]:
12
+ def inner(target: _VT) -> _VT:
13
+ for k in keys:
14
+ self._registry[k] = target
15
+ return target
16
+
17
+ return inner
18
+
19
+ def get(self, key: _KT, default: _VT = None) -> _VT:
20
+ return self._registry.get(key, default)
21
+
22
+ def __len__(self) -> int:
23
+ return len(self._registry)
24
+
25
+ def keys(self) -> KeysView[_KT]:
26
+ return self._registry.keys()
27
+
28
+ def values(self) -> ValuesView[_VT]:
29
+ return self._registry.values()
30
+
31
+ def items(self) -> ItemsView[_KT, _VT]:
32
+ return self._registry.items()
33
+
34
+ def __getitem__(self, key):
35
+ return self._registry.get(key)
36
+
37
+
38
+ class Registry(GenericRegistry[str, _VT]):
39
+ def __init__(self, key_callback: Callable[[_VT], Union[str, Iterable[str]]]):
40
+ self.key_callback = key_callback
41
+ super().__init__()
42
+
43
+ def add(self, target: _VT) -> _VT:
44
+ keys: Union[str, Iterable[str]] = self.key_callback(target)
45
+ if isinstance(keys, str):
46
+ keys = [keys]
47
+ return super().add(*keys)(target)
48
+
49
+ def register(self, target: _VT) -> _VT: # Compatibility with recurvedata.operator
50
+ return self.add(target)
51
+
52
+
53
+ jinja2_template_funcs_registry = Registry(key_callback=lambda x: x.__name__)
54
+ register_func = jinja2_template_funcs_registry.add
@@ -0,0 +1,15 @@
1
+ import logging
2
+ import subprocess
3
+ from typing import Optional
4
+
5
+ _logger = logging.getLogger(__name__)
6
+
7
+
8
+ def run(cmd: str, logger: Optional[logging.Logger] = _logger) -> None:
9
+ logger.debug("Running command: %s", cmd)
10
+ subprocess.check_call(cmd, shell=True)
11
+
12
+
13
+ def run_output(cmd: str, logger: Optional[logging.Logger] = _logger) -> str:
14
+ logger.debug("Running command: %s", cmd)
15
+ return subprocess.check_output(cmd, shell=True).decode("utf-8").strip()
@@ -0,0 +1,33 @@
1
+ #
2
+ # Licensed to the Apache Software Foundation (ASF) under one
3
+ # or more contributor license agreements. See the NOTICE file
4
+ # distributed with this work for additional information
5
+ # regarding copyright ownership. The ASF licenses this file
6
+ # to you under the Apache License, Version 2.0 (the
7
+ # "License"); you may not use this file except in compliance
8
+ # with the License. You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing,
13
+ # software distributed under the License is distributed on an
14
+ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15
+ # KIND, either express or implied. See the License for the
16
+ # specific language governing permissions and limitations
17
+ # under the License.
18
+ from __future__ import annotations
19
+
20
+ from typing import Generic, TypeVar
21
+
22
+ T = TypeVar("T")
23
+
24
+
25
+ class Singleton(type, Generic[T]):
26
+ """Metaclass that allows to implement singleton pattern."""
27
+
28
+ _instances: dict[Singleton[T], T] = {}
29
+
30
+ def __call__(cls: Singleton[T], *args, **kwargs) -> T:
31
+ if cls not in cls._instances:
32
+ cls._instances[cls] = super().__call__(*args, **kwargs)
33
+ return cls._instances[cls]
@@ -0,0 +1,6 @@
1
+ def trim_replace_special_character(sql: str, strip_sufix: bool = False) -> str:
2
+ sql = sql.replace("\\n", "\n") # todo: may cause error if \\n is in `like '%\\n%'` format
3
+ # process \\n in sql
4
+ if strip_sufix:
5
+ sql = sql.strip(";")
6
+ return sql