recurvedata-lib 0.1.487__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recurvedata-lib might be problematic. Click here for more details.

Files changed (333) hide show
  1. recurvedata/__init__.py +0 -0
  2. recurvedata/__version__.py +1 -0
  3. recurvedata/client/__init__.py +3 -0
  4. recurvedata/client/client.py +150 -0
  5. recurvedata/client/server_client.py +91 -0
  6. recurvedata/config.py +99 -0
  7. recurvedata/connectors/__init__.py +20 -0
  8. recurvedata/connectors/_register.py +46 -0
  9. recurvedata/connectors/base.py +111 -0
  10. recurvedata/connectors/config_schema.py +1575 -0
  11. recurvedata/connectors/connectors/__init__.py +0 -0
  12. recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
  13. recurvedata/connectors/connectors/auth.py +44 -0
  14. recurvedata/connectors/connectors/azure_blob.py +89 -0
  15. recurvedata/connectors/connectors/azure_synapse.py +79 -0
  16. recurvedata/connectors/connectors/bigquery.py +359 -0
  17. recurvedata/connectors/connectors/clickhouse.py +219 -0
  18. recurvedata/connectors/connectors/dingtalk.py +61 -0
  19. recurvedata/connectors/connectors/doris.py +215 -0
  20. recurvedata/connectors/connectors/es.py +62 -0
  21. recurvedata/connectors/connectors/feishu.py +65 -0
  22. recurvedata/connectors/connectors/ftp.py +50 -0
  23. recurvedata/connectors/connectors/generic.py +49 -0
  24. recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
  25. recurvedata/connectors/connectors/google_service_account.py +225 -0
  26. recurvedata/connectors/connectors/hive.py +207 -0
  27. recurvedata/connectors/connectors/impala.py +210 -0
  28. recurvedata/connectors/connectors/jenkins.py +51 -0
  29. recurvedata/connectors/connectors/mail.py +89 -0
  30. recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
  31. recurvedata/connectors/connectors/mongo.py +79 -0
  32. recurvedata/connectors/connectors/mssql.py +131 -0
  33. recurvedata/connectors/connectors/mysql.py +191 -0
  34. recurvedata/connectors/connectors/n8n.py +141 -0
  35. recurvedata/connectors/connectors/oss.py +74 -0
  36. recurvedata/connectors/connectors/owncloud.py +36 -0
  37. recurvedata/connectors/connectors/phoenix.py +36 -0
  38. recurvedata/connectors/connectors/postgres.py +230 -0
  39. recurvedata/connectors/connectors/python.py +50 -0
  40. recurvedata/connectors/connectors/redshift.py +187 -0
  41. recurvedata/connectors/connectors/s3.py +93 -0
  42. recurvedata/connectors/connectors/sftp.py +87 -0
  43. recurvedata/connectors/connectors/slack.py +35 -0
  44. recurvedata/connectors/connectors/spark.py +99 -0
  45. recurvedata/connectors/connectors/starrocks.py +175 -0
  46. recurvedata/connectors/connectors/tencent_cos.py +40 -0
  47. recurvedata/connectors/connectors/tidb.py +49 -0
  48. recurvedata/connectors/const.py +315 -0
  49. recurvedata/connectors/datasource.py +189 -0
  50. recurvedata/connectors/dbapi.py +469 -0
  51. recurvedata/connectors/fs.py +66 -0
  52. recurvedata/connectors/ftp.py +40 -0
  53. recurvedata/connectors/object_store.py +60 -0
  54. recurvedata/connectors/pigeon.py +172 -0
  55. recurvedata/connectors/proxy.py +104 -0
  56. recurvedata/connectors/service.py +223 -0
  57. recurvedata/connectors/utils.py +47 -0
  58. recurvedata/consts.py +49 -0
  59. recurvedata/core/__init__.py +0 -0
  60. recurvedata/core/config.py +46 -0
  61. recurvedata/core/configurable.py +27 -0
  62. recurvedata/core/consts.py +2 -0
  63. recurvedata/core/templating.py +206 -0
  64. recurvedata/core/tracing.py +223 -0
  65. recurvedata/core/transformer.py +186 -0
  66. recurvedata/core/translation.py +91 -0
  67. recurvedata/dbt/client.py +97 -0
  68. recurvedata/dbt/consts.py +99 -0
  69. recurvedata/dbt/cosmos_utils.py +275 -0
  70. recurvedata/dbt/error_codes.py +18 -0
  71. recurvedata/dbt/schemas.py +98 -0
  72. recurvedata/dbt/service.py +451 -0
  73. recurvedata/dbt/utils.py +246 -0
  74. recurvedata/error_codes.py +71 -0
  75. recurvedata/exceptions.py +72 -0
  76. recurvedata/executors/__init__.py +4 -0
  77. recurvedata/executors/cli/__init__.py +7 -0
  78. recurvedata/executors/cli/connector.py +117 -0
  79. recurvedata/executors/cli/dbt.py +118 -0
  80. recurvedata/executors/cli/main.py +82 -0
  81. recurvedata/executors/cli/parameters.py +18 -0
  82. recurvedata/executors/client.py +190 -0
  83. recurvedata/executors/consts.py +50 -0
  84. recurvedata/executors/debug_executor.py +100 -0
  85. recurvedata/executors/executor.py +300 -0
  86. recurvedata/executors/link_executor.py +189 -0
  87. recurvedata/executors/models.py +34 -0
  88. recurvedata/executors/schemas.py +222 -0
  89. recurvedata/executors/service/__init__.py +0 -0
  90. recurvedata/executors/service/connector.py +380 -0
  91. recurvedata/executors/utils.py +172 -0
  92. recurvedata/filestorage/__init__.py +11 -0
  93. recurvedata/filestorage/_factory.py +33 -0
  94. recurvedata/filestorage/backends/__init__.py +0 -0
  95. recurvedata/filestorage/backends/fsspec.py +45 -0
  96. recurvedata/filestorage/backends/local.py +67 -0
  97. recurvedata/filestorage/backends/oss.py +56 -0
  98. recurvedata/filestorage/interface.py +84 -0
  99. recurvedata/operators/__init__.py +10 -0
  100. recurvedata/operators/base.py +28 -0
  101. recurvedata/operators/config.py +21 -0
  102. recurvedata/operators/context.py +255 -0
  103. recurvedata/operators/dbt_operator/__init__.py +2 -0
  104. recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
  105. recurvedata/operators/dbt_operator/operator.py +353 -0
  106. recurvedata/operators/link_operator/__init__.py +1 -0
  107. recurvedata/operators/link_operator/operator.py +120 -0
  108. recurvedata/operators/models.py +55 -0
  109. recurvedata/operators/notify_operator/__init__.py +1 -0
  110. recurvedata/operators/notify_operator/operator.py +180 -0
  111. recurvedata/operators/operator.py +119 -0
  112. recurvedata/operators/python_operator/__init__.py +1 -0
  113. recurvedata/operators/python_operator/operator.py +132 -0
  114. recurvedata/operators/sensor_operator/__init__.py +1 -0
  115. recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
  116. recurvedata/operators/sensor_operator/operator.py +172 -0
  117. recurvedata/operators/spark_operator/__init__.py +1 -0
  118. recurvedata/operators/spark_operator/operator.py +200 -0
  119. recurvedata/operators/spark_operator/spark_sample.py +47 -0
  120. recurvedata/operators/sql_operator/__init__.py +1 -0
  121. recurvedata/operators/sql_operator/operator.py +90 -0
  122. recurvedata/operators/task.py +211 -0
  123. recurvedata/operators/transfer_operator/__init__.py +40 -0
  124. recurvedata/operators/transfer_operator/const.py +10 -0
  125. recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
  126. recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
  127. recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
  128. recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
  129. recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
  130. recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
  131. recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
  132. recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
  133. recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
  134. recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
  135. recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
  136. recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
  137. recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
  138. recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
  139. recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
  140. recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
  141. recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
  142. recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
  143. recurvedata/operators/transfer_operator/load_task_email.py +188 -0
  144. recurvedata/operators/transfer_operator/load_task_es.py +86 -0
  145. recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
  146. recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
  147. recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
  148. recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
  149. recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
  150. recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
  151. recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
  152. recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
  153. recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
  154. recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
  155. recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
  156. recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
  157. recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
  158. recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
  159. recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
  160. recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
  161. recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
  162. recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
  163. recurvedata/operators/transfer_operator/mixin.py +31 -0
  164. recurvedata/operators/transfer_operator/operator.py +231 -0
  165. recurvedata/operators/transfer_operator/task.py +223 -0
  166. recurvedata/operators/transfer_operator/utils.py +134 -0
  167. recurvedata/operators/ui.py +80 -0
  168. recurvedata/operators/utils/__init__.py +51 -0
  169. recurvedata/operators/utils/file_factory.py +150 -0
  170. recurvedata/operators/utils/fs.py +10 -0
  171. recurvedata/operators/utils/lineage.py +265 -0
  172. recurvedata/operators/web_init.py +15 -0
  173. recurvedata/pigeon/connector/__init__.py +294 -0
  174. recurvedata/pigeon/connector/_registry.py +17 -0
  175. recurvedata/pigeon/connector/aliyun_oss.py +80 -0
  176. recurvedata/pigeon/connector/awss3.py +123 -0
  177. recurvedata/pigeon/connector/azure_blob.py +176 -0
  178. recurvedata/pigeon/connector/azure_synapse.py +51 -0
  179. recurvedata/pigeon/connector/cass.py +151 -0
  180. recurvedata/pigeon/connector/clickhouse.py +403 -0
  181. recurvedata/pigeon/connector/clickhouse_native.py +351 -0
  182. recurvedata/pigeon/connector/dbapi.py +571 -0
  183. recurvedata/pigeon/connector/doris.py +166 -0
  184. recurvedata/pigeon/connector/es.py +176 -0
  185. recurvedata/pigeon/connector/feishu.py +1135 -0
  186. recurvedata/pigeon/connector/ftp.py +163 -0
  187. recurvedata/pigeon/connector/google_bigquery.py +283 -0
  188. recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
  189. recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
  190. recurvedata/pigeon/connector/hdfs.py +204 -0
  191. recurvedata/pigeon/connector/hive_impala.py +383 -0
  192. recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
  193. recurvedata/pigeon/connector/mongodb.py +56 -0
  194. recurvedata/pigeon/connector/mssql.py +467 -0
  195. recurvedata/pigeon/connector/mysql.py +175 -0
  196. recurvedata/pigeon/connector/owncloud.py +92 -0
  197. recurvedata/pigeon/connector/postgresql.py +267 -0
  198. recurvedata/pigeon/connector/power_bi.py +179 -0
  199. recurvedata/pigeon/connector/qcloud_cos.py +79 -0
  200. recurvedata/pigeon/connector/redshift.py +123 -0
  201. recurvedata/pigeon/connector/sftp.py +73 -0
  202. recurvedata/pigeon/connector/sqlite.py +42 -0
  203. recurvedata/pigeon/connector/starrocks.py +144 -0
  204. recurvedata/pigeon/connector/tableau.py +162 -0
  205. recurvedata/pigeon/const.py +21 -0
  206. recurvedata/pigeon/csv.py +172 -0
  207. recurvedata/pigeon/docs/datasources-example.json +82 -0
  208. recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
  209. recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
  210. recurvedata/pigeon/dumper/__init__.py +171 -0
  211. recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
  212. recurvedata/pigeon/dumper/base.py +141 -0
  213. recurvedata/pigeon/dumper/cass.py +213 -0
  214. recurvedata/pigeon/dumper/dbapi.py +346 -0
  215. recurvedata/pigeon/dumper/es.py +112 -0
  216. recurvedata/pigeon/dumper/ftp.py +64 -0
  217. recurvedata/pigeon/dumper/mongodb.py +103 -0
  218. recurvedata/pigeon/handler/__init__.py +4 -0
  219. recurvedata/pigeon/handler/base.py +153 -0
  220. recurvedata/pigeon/handler/csv_handler.py +290 -0
  221. recurvedata/pigeon/loader/__init__.py +87 -0
  222. recurvedata/pigeon/loader/base.py +83 -0
  223. recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
  224. recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
  225. recurvedata/pigeon/loader/csv_to_doris.py +215 -0
  226. recurvedata/pigeon/loader/csv_to_es.py +51 -0
  227. recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
  228. recurvedata/pigeon/loader/csv_to_hive.py +468 -0
  229. recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
  230. recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
  231. recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
  232. recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
  233. recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
  234. recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
  235. recurvedata/pigeon/meta.py +116 -0
  236. recurvedata/pigeon/row_factory.py +42 -0
  237. recurvedata/pigeon/schema/__init__.py +124 -0
  238. recurvedata/pigeon/schema/types.py +13 -0
  239. recurvedata/pigeon/sync.py +283 -0
  240. recurvedata/pigeon/transformer.py +146 -0
  241. recurvedata/pigeon/utils/__init__.py +134 -0
  242. recurvedata/pigeon/utils/bloomfilter.py +181 -0
  243. recurvedata/pigeon/utils/date_time.py +323 -0
  244. recurvedata/pigeon/utils/escape.py +15 -0
  245. recurvedata/pigeon/utils/fs.py +266 -0
  246. recurvedata/pigeon/utils/json.py +44 -0
  247. recurvedata/pigeon/utils/keyed_tuple.py +85 -0
  248. recurvedata/pigeon/utils/mp.py +156 -0
  249. recurvedata/pigeon/utils/sql.py +328 -0
  250. recurvedata/pigeon/utils/timing.py +155 -0
  251. recurvedata/provider_manager.py +0 -0
  252. recurvedata/providers/__init__.py +0 -0
  253. recurvedata/providers/dbapi/__init__.py +0 -0
  254. recurvedata/providers/flywheel/__init__.py +0 -0
  255. recurvedata/providers/mysql/__init__.py +0 -0
  256. recurvedata/schedulers/__init__.py +1 -0
  257. recurvedata/schedulers/airflow.py +974 -0
  258. recurvedata/schedulers/airflow_db_process.py +331 -0
  259. recurvedata/schedulers/airflow_operators.py +61 -0
  260. recurvedata/schedulers/airflow_plugin.py +9 -0
  261. recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
  262. recurvedata/schedulers/base.py +99 -0
  263. recurvedata/schedulers/cli.py +228 -0
  264. recurvedata/schedulers/client.py +56 -0
  265. recurvedata/schedulers/consts.py +52 -0
  266. recurvedata/schedulers/debug_celery.py +62 -0
  267. recurvedata/schedulers/model.py +63 -0
  268. recurvedata/schedulers/schemas.py +97 -0
  269. recurvedata/schedulers/service.py +20 -0
  270. recurvedata/schedulers/system_dags.py +59 -0
  271. recurvedata/schedulers/task_status.py +279 -0
  272. recurvedata/schedulers/utils.py +73 -0
  273. recurvedata/schema/__init__.py +0 -0
  274. recurvedata/schema/field.py +88 -0
  275. recurvedata/schema/schema.py +55 -0
  276. recurvedata/schema/types.py +17 -0
  277. recurvedata/schema.py +0 -0
  278. recurvedata/server/__init__.py +0 -0
  279. recurvedata/server/app.py +7 -0
  280. recurvedata/server/connector/__init__.py +0 -0
  281. recurvedata/server/connector/api.py +79 -0
  282. recurvedata/server/connector/schemas.py +28 -0
  283. recurvedata/server/data_service/__init__.py +0 -0
  284. recurvedata/server/data_service/api.py +126 -0
  285. recurvedata/server/data_service/client.py +18 -0
  286. recurvedata/server/data_service/consts.py +1 -0
  287. recurvedata/server/data_service/schemas.py +68 -0
  288. recurvedata/server/data_service/service.py +218 -0
  289. recurvedata/server/dbt/__init__.py +0 -0
  290. recurvedata/server/dbt/api.py +116 -0
  291. recurvedata/server/error_code.py +49 -0
  292. recurvedata/server/exceptions.py +19 -0
  293. recurvedata/server/executor/__init__.py +0 -0
  294. recurvedata/server/executor/api.py +37 -0
  295. recurvedata/server/executor/schemas.py +30 -0
  296. recurvedata/server/executor/service.py +220 -0
  297. recurvedata/server/main.py +32 -0
  298. recurvedata/server/schedulers/__init__.py +0 -0
  299. recurvedata/server/schedulers/api.py +252 -0
  300. recurvedata/server/schedulers/schemas.py +50 -0
  301. recurvedata/server/schemas.py +50 -0
  302. recurvedata/utils/__init__.py +15 -0
  303. recurvedata/utils/_typer.py +61 -0
  304. recurvedata/utils/attrdict.py +19 -0
  305. recurvedata/utils/command_helper.py +20 -0
  306. recurvedata/utils/compat.py +12 -0
  307. recurvedata/utils/compression.py +203 -0
  308. recurvedata/utils/crontab.py +42 -0
  309. recurvedata/utils/crypto_util.py +305 -0
  310. recurvedata/utils/dataclass.py +11 -0
  311. recurvedata/utils/date_time.py +464 -0
  312. recurvedata/utils/dispatch.py +114 -0
  313. recurvedata/utils/email_util.py +104 -0
  314. recurvedata/utils/files.py +386 -0
  315. recurvedata/utils/helpers.py +170 -0
  316. recurvedata/utils/httputil.py +117 -0
  317. recurvedata/utils/imports.py +132 -0
  318. recurvedata/utils/json.py +80 -0
  319. recurvedata/utils/log.py +117 -0
  320. recurvedata/utils/log_capture.py +153 -0
  321. recurvedata/utils/mp.py +178 -0
  322. recurvedata/utils/normalizer.py +102 -0
  323. recurvedata/utils/redis_lock.py +474 -0
  324. recurvedata/utils/registry.py +54 -0
  325. recurvedata/utils/shell.py +15 -0
  326. recurvedata/utils/singleton.py +33 -0
  327. recurvedata/utils/sql.py +6 -0
  328. recurvedata/utils/timeout.py +28 -0
  329. recurvedata/utils/tracing.py +14 -0
  330. recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
  331. recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
  332. recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
  333. recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
@@ -0,0 +1,328 @@
1
+ from typing import Callable
2
+
3
+ import sqlparse
4
+ import sqlparse.keywords
5
+ import sqlparse.sql
6
+ import sqlparse.tokens
7
+
8
+
9
+ def staging_table_of(table: str) -> str:
10
+ return f"z_{table}_staging"
11
+
12
+
13
+ def reconcile_table_of(table: str) -> str:
14
+ return f"z_{table}_reconcile"
15
+
16
+
17
+ def bak_table_of(table: str) -> str:
18
+ return f"z_{table}_bak"
19
+
20
+
21
+ def trim_prefix(s: str, sub: str, ignore_case: bool = True) -> str:
22
+ head = s[: len(sub)]
23
+ if ignore_case:
24
+ has_prefix = head.lower() == sub.lower()
25
+ else:
26
+ has_prefix = head == sub
27
+ if not has_prefix:
28
+ return s
29
+ return s[len(sub) :]
30
+
31
+
32
+ def apply_where_naively(query: str, where: str) -> str:
33
+ if not where:
34
+ return query
35
+
36
+ where = trim_prefix(where, "where")
37
+ if "where" in query.lower():
38
+ query = "{} AND {}".format(query, where)
39
+ else:
40
+ query = "{} WHERE {}".format(query, where)
41
+ return query
42
+
43
+
44
+ def apply_where_safely(query: str, where: str) -> str:
45
+ if not where:
46
+ return query
47
+
48
+ where = trim_prefix(where, "where")
49
+
50
+ parsed = sqlparse.parse(query)[0]
51
+
52
+ idx, old_where_token = parsed.token_next_by(i=sqlparse.sql.Where)
53
+ # there is already a WHERE clause, replace it
54
+ if idx is not None:
55
+ # add the new condition to an new line, see https://gitlab.yimian.com.cn/etl/pigeon/issues/4
56
+ new_where = "{}\nAND {}\n".format(old_where_token.value, where)
57
+ new_where_token = sqlparse.sql.Where([sqlparse.sql.Token(None, new_where)])
58
+ parsed.tokens[idx] = new_where_token
59
+ return str(parsed)
60
+
61
+ # there is no WHERE clause, so we should create a new one and insert into the right place
62
+ next_idx = None
63
+ for i, token in enumerate(parsed.tokens):
64
+ if token.is_keyword and token.value.upper() in ("ORDER", "GROUP", "LIMIT", "HAVING"):
65
+ next_idx = i
66
+ break
67
+
68
+ # add WHERE clause to an new line, see https://gitlab.yimian.com.cn/etl/pigeon/issues/4
69
+ new_where = "\nWHERE {}\n".format(where)
70
+ # sqlparse.sql.Where.ttype is None
71
+ new_where_token = sqlparse.sql.Where([sqlparse.sql.Token(None, new_where)])
72
+ if next_idx is None:
73
+ next_idx = len(parsed.tokens)
74
+ parsed.insert_before(next_idx, new_where_token)
75
+ return str(parsed)
76
+
77
+
78
+ def extract_from_clause(query: str) -> str:
79
+ """Extract the FROM clause from a SQL query.
80
+
81
+ Args:
82
+ query (str): The SQL query
83
+
84
+ Returns:
85
+ str: The FROM clause without GROUP BY, ORDER BY, HAVING, or LIMIT
86
+ """
87
+ parsed = sqlparse.parse(query)[0]
88
+
89
+ start_idx = None
90
+ end_idx = None
91
+
92
+ # Find FROM token
93
+ for i, t in enumerate(parsed.tokens):
94
+ if t.value.upper() == "FROM":
95
+ start_idx = i + 1
96
+ break
97
+
98
+ if start_idx is None:
99
+ return ""
100
+
101
+ # Find the end of FROM clause by looking for GROUP BY, ORDER BY, HAVING, LIMIT
102
+ for i, t in enumerate(parsed.tokens[start_idx:], start=start_idx):
103
+ if t.is_keyword and t.value.upper() in ("GROUP", "ORDER", "LIMIT", "HAVING"):
104
+ end_idx = i
105
+ break
106
+ elif isinstance(t, sqlparse.sql.Where):
107
+ end_idx = i
108
+ break
109
+
110
+ if end_idx is None:
111
+ end_idx = len(parsed.tokens)
112
+
113
+ tokens = parsed.tokens[start_idx:end_idx]
114
+ tl = sqlparse.sql.TokenList(tokens)
115
+ return str(tl).strip()
116
+
117
+
118
+ def extract_where_clause(query: str) -> str:
119
+ parsed = sqlparse.parse(query)[0]
120
+ idx, where_token = parsed.token_next_by(i=sqlparse.sql.Where)
121
+ if idx is None:
122
+ return ""
123
+
124
+ return where_token.value
125
+
126
+
127
+ def extract_limit_count(query: str) -> int | None:
128
+ parsed = sqlparse.parse(query)[0]
129
+ idx = 0
130
+ for i, t in enumerate(parsed.tokens):
131
+ if t.value.upper() == "LIMIT":
132
+ idx = i + 2
133
+ break
134
+ if not idx:
135
+ return None
136
+ return int(parsed.tokens[idx].value)
137
+
138
+
139
+ def mssql_extract_limit_count(query: str) -> int | None:
140
+ """Extract the TOP limit from a SQL Server query.
141
+
142
+ Args:
143
+ query (str): The SQL query
144
+
145
+ Returns:
146
+ int | None: The TOP limit value, or None if not found
147
+ """
148
+
149
+ def _get_first_token_from_identifier_list(token):
150
+ if isinstance(token, (sqlparse.sql.IdentifierList, sqlparse.sql.Identifier)):
151
+ return _get_first_token_from_identifier_list(token.token_first(skip_cm=True))
152
+ return token
153
+
154
+ if "TOP" not in sqlparse.keywords.KEYWORDS:
155
+ sqlparse.keywords.KEYWORDS["TOP"] = sqlparse.tokens.Keyword
156
+
157
+ parsed = sqlparse.parse(query)[0]
158
+ idx = 0
159
+ for i, t in enumerate(parsed.tokens):
160
+ if t.value.upper() == "TOP":
161
+ idx = i + 2
162
+ break
163
+ if not idx:
164
+ return None
165
+
166
+ value_token = _get_first_token_from_identifier_list(parsed.tokens[idx])
167
+ # Remove parentheses if present
168
+ value = value_token.value.strip("()")
169
+ return int(value)
170
+
171
+
172
+ def apply_limit(query: str, count: int) -> str:
173
+ parsed = sqlparse.parse(query)[0]
174
+ idx = 0
175
+ for i, t in enumerate(parsed.tokens):
176
+ if t.value.upper() == "LIMIT":
177
+ idx = i + 2
178
+ break
179
+ if not idx:
180
+ return f"{query} LIMIT {count}"
181
+ parsed.tokens[idx].value = str(count)
182
+ return str(parsed)
183
+
184
+
185
+ def mssql_apply_limit(query: str, count: int) -> str:
186
+ parsed = sqlparse.parse(query)[0]
187
+
188
+ select_idx = top_idx = sel_start_idx = None
189
+ for i, t in enumerate(parsed.tokens):
190
+ if select_idx is None and t.value.upper() == "SELECT":
191
+ select_idx = i
192
+ if select_idx is not None and sel_start_idx is None:
193
+ if isinstance(t, (sqlparse.sql.IdentifierList, sqlparse.sql.Identifier)):
194
+ sel_start_idx = i
195
+ if isinstance(t, sqlparse.sql.Token) and t.ttype == sqlparse.tokens.Wildcard:
196
+ sel_start_idx = i
197
+ if t.value.upper() == "TOP":
198
+ top_idx = i + 2
199
+ break
200
+ if not top_idx:
201
+ white_space = sqlparse.sql.Token(sqlparse.tokens.Whitespace, " ")
202
+ add_tokens = [
203
+ sqlparse.sql.Token(sqlparse.tokens.Keyword, "TOP"),
204
+ white_space,
205
+ sqlparse.sql.Token(sqlparse.tokens.Number, count),
206
+ white_space,
207
+ ]
208
+ parsed.tokens = parsed.tokens[:sel_start_idx] + add_tokens + parsed.tokens[sel_start_idx:]
209
+ return str(parsed)
210
+ parsed.tokens[top_idx].value = str(count)
211
+ return str(parsed)
212
+
213
+
214
+ def apply_sql_no_cache(query: str) -> str:
215
+ """Add SQL_NO_CACHE hint to a SELECT query.
216
+
217
+ Args:
218
+ query (str): The SQL query
219
+
220
+ Returns:
221
+ str: Query with SQL_NO_CACHE hint added
222
+ """
223
+ parsed = sqlparse.parse(query)[0]
224
+ if "/*" in query:
225
+ return query
226
+
227
+ comment = "/*!40001 SQL_NO_CACHE*/"
228
+ token = sqlparse.sql.Comment([sqlparse.sql.Token(None, comment)])
229
+
230
+ # Find SELECT token and insert hint right after it
231
+ for i, t in enumerate(parsed.tokens):
232
+ if t.value.upper() == "SELECT":
233
+ # Add a single space after SELECT
234
+ space_token = sqlparse.sql.Token(sqlparse.tokens.Whitespace, " ")
235
+ parsed.tokens.insert(i + 1, space_token)
236
+ parsed.tokens.insert(i + 2, token)
237
+
238
+ # Add a space after the comment
239
+ space_token2 = sqlparse.sql.Token(sqlparse.tokens.Whitespace, " ")
240
+ parsed.tokens.insert(i + 3, space_token2)
241
+
242
+ break
243
+ return str(parsed)
244
+
245
+
246
+ def sqlformat(query: str, reindent: bool = False, **kwargs) -> str:
247
+ kwargs.update({"reindent": True, "keyword_case": "upper"})
248
+ rv = sqlparse.format(query.strip(), **kwargs)
249
+ if not reindent:
250
+ rv = " ".join(x.strip() for x in rv.splitlines())
251
+ return rv
252
+
253
+
254
+ def add_schema_to_create_table(
255
+ create_table_ddl: str, schema: str, quote_callback: Callable[[str], str] | None = None
256
+ ) -> str:
257
+ """Add schema to a CREATE TABLE statement if the table name doesn't already have a schema.
258
+
259
+ Args:
260
+ create_table_ddl (str): The CREATE TABLE DDL statement
261
+ schema (str): The schema name to add
262
+ quote_callback (Optional[QuoteCallback]): Optional callback function to quote table names.
263
+ The callback should accept a string (either 'table' or 'schema.table')
264
+ and return the properly quoted string.
265
+
266
+ Returns:
267
+ str: Modified CREATE TABLE statement with schema added, or original if no modification needed
268
+ """
269
+ if not schema or not create_table_ddl or "CREATE TABLE" not in create_table_ddl.upper():
270
+ return create_table_ddl
271
+
272
+ parsed = sqlparse.parse(create_table_ddl)[0]
273
+
274
+ # Find the CREATE TABLE tokens
275
+ for token in parsed.tokens:
276
+ if isinstance(token, sqlparse.sql.Identifier):
277
+ token_str = str(token)
278
+
279
+ # Check if it already has a schema
280
+ if "." in token_str:
281
+ # Already has schema, if we have a quote callback, apply it
282
+ if quote_callback:
283
+ parts = token_str.split(".")
284
+ schema_part = parts[0].strip()
285
+ table_part = ".".join(parts[1:]).strip()
286
+
287
+ # Extract actual names without quotes
288
+ quote_chars = ["`", '"', "[", "]"]
289
+ clean_schema = schema_part
290
+ clean_table = table_part
291
+
292
+ for char in quote_chars:
293
+ clean_schema = clean_schema.replace(char, "")
294
+ clean_table = clean_table.replace(char, "")
295
+
296
+ # Apply quote callback to the extracted schema.table
297
+ qualified_name = f"{clean_schema}.{clean_table}"
298
+ new_name = quote_callback(qualified_name)
299
+
300
+ # Replace the token
301
+ token.tokens = [sqlparse.sql.Token(sqlparse.tokens.Name, new_name)]
302
+ return str(parsed)
303
+
304
+ # No schema, need to add one
305
+ table_name = token.get_real_name()
306
+ original = token.value
307
+
308
+ # Special case for SQL Server bracket style
309
+ if original.startswith("[") and original.endswith("]"):
310
+ new_name = f"{schema}.{table_name}"
311
+ # Handle other quoted identifiers
312
+ elif "`" in original:
313
+ new_name = f"{schema}.`{table_name}`"
314
+ elif '"' in original:
315
+ new_name = f'{schema}."{table_name}"'
316
+ else:
317
+ new_name = f"{schema}.{table_name}"
318
+
319
+ if quote_callback:
320
+ # Apply quote callback to new schema.table
321
+ qualified_name = f"{schema}.{table_name}"
322
+ new_name = quote_callback(qualified_name)
323
+
324
+ # Replace the token
325
+ token.tokens = [sqlparse.sql.Token(sqlparse.tokens.Name, new_name)]
326
+ break
327
+
328
+ return str(parsed)
@@ -0,0 +1,155 @@
1
+ import datetime
2
+ import functools
3
+ import logging
4
+ import time
5
+
6
+ from recurvedata.utils.imports import MockModule
7
+
8
+ try:
9
+ import humanize
10
+ except ImportError:
11
+ humanize = MockModule("humanize")
12
+
13
+ _logger = logging.getLogger(__name__)
14
+
15
+
16
+ def time_since(dt):
17
+ return datetime.datetime.now() - dt
18
+
19
+
20
+ class Timer(object):
21
+ def __init__(self, delay=False, logger=None):
22
+ self.logger = logger or _logger
23
+ self.start_dttm = None
24
+ if not delay:
25
+ self.reset()
26
+
27
+ def reset(self):
28
+ self.start_dttm = datetime.datetime.now()
29
+
30
+ def debug(self, message, *args):
31
+ self._log(self.logger.debug, message, *args)
32
+
33
+ def info(self, message, *args):
34
+ self._log(self.logger.info, message, *args)
35
+
36
+ def warning(self, message, *args):
37
+ self._log(self.logger.warning, message, *args)
38
+
39
+ def error(self, message, *args):
40
+ self._log(self.logger.error, message, *args)
41
+
42
+ def _log(self, func, message, *args):
43
+ message = message.rstrip() + " took %s"
44
+ # TODO: humanize timedelta
45
+ args = args + (time_since(self.start_dttm),)
46
+ func(message, *args)
47
+
48
+
49
+ class timing(object):
50
+ def __init__(self, operation="", logger=None):
51
+ self.operation = operation
52
+ self._timer = Timer(delay=True, logger=logger)
53
+
54
+ def __call__(self, func):
55
+ if not self.operation:
56
+ self.operation = "calling {}".format(func)
57
+
58
+ @functools.wraps(func)
59
+ def inner(*args, **kwargs):
60
+ self._timer.reset()
61
+ rv = func(*args, **kwargs)
62
+ self._timer.info(self.operation)
63
+ return rv
64
+
65
+ return inner
66
+
67
+ def __enter__(self):
68
+ self._timer.reset()
69
+
70
+ def __exit__(self, exc_type, exc_val, exc_tb):
71
+ self._timer.info(self.operation or "operation finished")
72
+
73
+
74
+ class TimeCounter(object):
75
+ def __init__(self, name="", log_threshold=5000, total=None, logger=None):
76
+ self.name = name
77
+ self.log_threshold = log_threshold
78
+ self.total = total
79
+
80
+ self._logger = logger or logging
81
+ self._count = 0
82
+ self._start_time = datetime.datetime.now()
83
+
84
+ @property
85
+ def count(self):
86
+ return self._count
87
+
88
+ def incr(self, by=1):
89
+ self._count += by
90
+ if self.log_threshold and self._count % self.log_threshold == 0:
91
+ self.show_stat()
92
+
93
+ def show_stat(self):
94
+ d = datetime.datetime.now() - self._start_time
95
+ speed = self._count / d.total_seconds()
96
+ if not self.total:
97
+ self._logger.info("<%s> finished %d in %s, speed: %.2f/s", self.name, self._count, d, speed)
98
+ else:
99
+ progress = 100.0 * self._count / self.total
100
+ self._logger.info(
101
+ "<%s> finished %d in %s, speed: %.2f/s, progress: %.2f", self.name, self._count, d, speed, progress
102
+ )
103
+
104
+
105
+ class DisplayProgress:
106
+ def __init__(self, total_amount: int = None, display_interval: float = 1024 * 1024, stream: bool = True):
107
+ self._stream = stream
108
+ self._seen_so_far = 0
109
+ self._interval = display_interval
110
+ self._start_time = datetime.datetime.now()
111
+ self._size = total_amount
112
+
113
+ def __call__(self, bytes_amount: int, total_amount: int = None):
114
+ if self._stream:
115
+ self._seen_so_far += bytes_amount
116
+ else:
117
+ self._seen_so_far = bytes_amount
118
+
119
+ total_amount = total_amount or self._size
120
+ if total_amount != 0:
121
+ progress = (self._seen_so_far / total_amount) * 100
122
+ else:
123
+ progress = 0
124
+
125
+ if not self._seen_so_far or (self._seen_so_far < total_amount and self._seen_so_far % self._interval != 0):
126
+ return None
127
+
128
+ duration = datetime.datetime.now() - self._start_time
129
+ speed = self._seen_so_far / duration.total_seconds()
130
+ _logger.info(
131
+ "transferred %s in %s, average speed: %s/s, progress: %.2f%%",
132
+ humanize.naturalsize(self._seen_so_far, gnu=True),
133
+ duration,
134
+ humanize.naturalsize(speed, gnu=True),
135
+ progress,
136
+ )
137
+
138
+
139
+ class ProgressCallback:
140
+ def __init__(self):
141
+ self._start_time = time.time()
142
+
143
+ def __call__(self, consumed_bytes, total_bytes):
144
+ if not total_bytes:
145
+ return
146
+ duration = time.time() - self._start_time
147
+ speed = consumed_bytes / duration
148
+ progress = 100 * (float(consumed_bytes) / float(total_bytes))
149
+ logging.info(
150
+ "transferred %s of %s, avg speed: %s/s, progress: %.2f%%",
151
+ humanize.naturalsize(consumed_bytes, gnu=True),
152
+ humanize.naturalsize(total_bytes, gnu=True),
153
+ humanize.naturalsize(speed, gnu=True),
154
+ progress,
155
+ )
File without changes
File without changes
File without changes
File without changes
File without changes
@@ -0,0 +1 @@
1
+ # schedulers package