recurvedata-lib 0.1.487__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recurvedata-lib might be problematic. Click here for more details.

Files changed (333) hide show
  1. recurvedata/__init__.py +0 -0
  2. recurvedata/__version__.py +1 -0
  3. recurvedata/client/__init__.py +3 -0
  4. recurvedata/client/client.py +150 -0
  5. recurvedata/client/server_client.py +91 -0
  6. recurvedata/config.py +99 -0
  7. recurvedata/connectors/__init__.py +20 -0
  8. recurvedata/connectors/_register.py +46 -0
  9. recurvedata/connectors/base.py +111 -0
  10. recurvedata/connectors/config_schema.py +1575 -0
  11. recurvedata/connectors/connectors/__init__.py +0 -0
  12. recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
  13. recurvedata/connectors/connectors/auth.py +44 -0
  14. recurvedata/connectors/connectors/azure_blob.py +89 -0
  15. recurvedata/connectors/connectors/azure_synapse.py +79 -0
  16. recurvedata/connectors/connectors/bigquery.py +359 -0
  17. recurvedata/connectors/connectors/clickhouse.py +219 -0
  18. recurvedata/connectors/connectors/dingtalk.py +61 -0
  19. recurvedata/connectors/connectors/doris.py +215 -0
  20. recurvedata/connectors/connectors/es.py +62 -0
  21. recurvedata/connectors/connectors/feishu.py +65 -0
  22. recurvedata/connectors/connectors/ftp.py +50 -0
  23. recurvedata/connectors/connectors/generic.py +49 -0
  24. recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
  25. recurvedata/connectors/connectors/google_service_account.py +225 -0
  26. recurvedata/connectors/connectors/hive.py +207 -0
  27. recurvedata/connectors/connectors/impala.py +210 -0
  28. recurvedata/connectors/connectors/jenkins.py +51 -0
  29. recurvedata/connectors/connectors/mail.py +89 -0
  30. recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
  31. recurvedata/connectors/connectors/mongo.py +79 -0
  32. recurvedata/connectors/connectors/mssql.py +131 -0
  33. recurvedata/connectors/connectors/mysql.py +191 -0
  34. recurvedata/connectors/connectors/n8n.py +141 -0
  35. recurvedata/connectors/connectors/oss.py +74 -0
  36. recurvedata/connectors/connectors/owncloud.py +36 -0
  37. recurvedata/connectors/connectors/phoenix.py +36 -0
  38. recurvedata/connectors/connectors/postgres.py +230 -0
  39. recurvedata/connectors/connectors/python.py +50 -0
  40. recurvedata/connectors/connectors/redshift.py +187 -0
  41. recurvedata/connectors/connectors/s3.py +93 -0
  42. recurvedata/connectors/connectors/sftp.py +87 -0
  43. recurvedata/connectors/connectors/slack.py +35 -0
  44. recurvedata/connectors/connectors/spark.py +99 -0
  45. recurvedata/connectors/connectors/starrocks.py +175 -0
  46. recurvedata/connectors/connectors/tencent_cos.py +40 -0
  47. recurvedata/connectors/connectors/tidb.py +49 -0
  48. recurvedata/connectors/const.py +315 -0
  49. recurvedata/connectors/datasource.py +189 -0
  50. recurvedata/connectors/dbapi.py +469 -0
  51. recurvedata/connectors/fs.py +66 -0
  52. recurvedata/connectors/ftp.py +40 -0
  53. recurvedata/connectors/object_store.py +60 -0
  54. recurvedata/connectors/pigeon.py +172 -0
  55. recurvedata/connectors/proxy.py +104 -0
  56. recurvedata/connectors/service.py +223 -0
  57. recurvedata/connectors/utils.py +47 -0
  58. recurvedata/consts.py +49 -0
  59. recurvedata/core/__init__.py +0 -0
  60. recurvedata/core/config.py +46 -0
  61. recurvedata/core/configurable.py +27 -0
  62. recurvedata/core/consts.py +2 -0
  63. recurvedata/core/templating.py +206 -0
  64. recurvedata/core/tracing.py +223 -0
  65. recurvedata/core/transformer.py +186 -0
  66. recurvedata/core/translation.py +91 -0
  67. recurvedata/dbt/client.py +97 -0
  68. recurvedata/dbt/consts.py +99 -0
  69. recurvedata/dbt/cosmos_utils.py +275 -0
  70. recurvedata/dbt/error_codes.py +18 -0
  71. recurvedata/dbt/schemas.py +98 -0
  72. recurvedata/dbt/service.py +451 -0
  73. recurvedata/dbt/utils.py +246 -0
  74. recurvedata/error_codes.py +71 -0
  75. recurvedata/exceptions.py +72 -0
  76. recurvedata/executors/__init__.py +4 -0
  77. recurvedata/executors/cli/__init__.py +7 -0
  78. recurvedata/executors/cli/connector.py +117 -0
  79. recurvedata/executors/cli/dbt.py +118 -0
  80. recurvedata/executors/cli/main.py +82 -0
  81. recurvedata/executors/cli/parameters.py +18 -0
  82. recurvedata/executors/client.py +190 -0
  83. recurvedata/executors/consts.py +50 -0
  84. recurvedata/executors/debug_executor.py +100 -0
  85. recurvedata/executors/executor.py +300 -0
  86. recurvedata/executors/link_executor.py +189 -0
  87. recurvedata/executors/models.py +34 -0
  88. recurvedata/executors/schemas.py +222 -0
  89. recurvedata/executors/service/__init__.py +0 -0
  90. recurvedata/executors/service/connector.py +380 -0
  91. recurvedata/executors/utils.py +172 -0
  92. recurvedata/filestorage/__init__.py +11 -0
  93. recurvedata/filestorage/_factory.py +33 -0
  94. recurvedata/filestorage/backends/__init__.py +0 -0
  95. recurvedata/filestorage/backends/fsspec.py +45 -0
  96. recurvedata/filestorage/backends/local.py +67 -0
  97. recurvedata/filestorage/backends/oss.py +56 -0
  98. recurvedata/filestorage/interface.py +84 -0
  99. recurvedata/operators/__init__.py +10 -0
  100. recurvedata/operators/base.py +28 -0
  101. recurvedata/operators/config.py +21 -0
  102. recurvedata/operators/context.py +255 -0
  103. recurvedata/operators/dbt_operator/__init__.py +2 -0
  104. recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
  105. recurvedata/operators/dbt_operator/operator.py +353 -0
  106. recurvedata/operators/link_operator/__init__.py +1 -0
  107. recurvedata/operators/link_operator/operator.py +120 -0
  108. recurvedata/operators/models.py +55 -0
  109. recurvedata/operators/notify_operator/__init__.py +1 -0
  110. recurvedata/operators/notify_operator/operator.py +180 -0
  111. recurvedata/operators/operator.py +119 -0
  112. recurvedata/operators/python_operator/__init__.py +1 -0
  113. recurvedata/operators/python_operator/operator.py +132 -0
  114. recurvedata/operators/sensor_operator/__init__.py +1 -0
  115. recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
  116. recurvedata/operators/sensor_operator/operator.py +172 -0
  117. recurvedata/operators/spark_operator/__init__.py +1 -0
  118. recurvedata/operators/spark_operator/operator.py +200 -0
  119. recurvedata/operators/spark_operator/spark_sample.py +47 -0
  120. recurvedata/operators/sql_operator/__init__.py +1 -0
  121. recurvedata/operators/sql_operator/operator.py +90 -0
  122. recurvedata/operators/task.py +211 -0
  123. recurvedata/operators/transfer_operator/__init__.py +40 -0
  124. recurvedata/operators/transfer_operator/const.py +10 -0
  125. recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
  126. recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
  127. recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
  128. recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
  129. recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
  130. recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
  131. recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
  132. recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
  133. recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
  134. recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
  135. recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
  136. recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
  137. recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
  138. recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
  139. recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
  140. recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
  141. recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
  142. recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
  143. recurvedata/operators/transfer_operator/load_task_email.py +188 -0
  144. recurvedata/operators/transfer_operator/load_task_es.py +86 -0
  145. recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
  146. recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
  147. recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
  148. recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
  149. recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
  150. recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
  151. recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
  152. recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
  153. recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
  154. recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
  155. recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
  156. recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
  157. recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
  158. recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
  159. recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
  160. recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
  161. recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
  162. recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
  163. recurvedata/operators/transfer_operator/mixin.py +31 -0
  164. recurvedata/operators/transfer_operator/operator.py +231 -0
  165. recurvedata/operators/transfer_operator/task.py +223 -0
  166. recurvedata/operators/transfer_operator/utils.py +134 -0
  167. recurvedata/operators/ui.py +80 -0
  168. recurvedata/operators/utils/__init__.py +51 -0
  169. recurvedata/operators/utils/file_factory.py +150 -0
  170. recurvedata/operators/utils/fs.py +10 -0
  171. recurvedata/operators/utils/lineage.py +265 -0
  172. recurvedata/operators/web_init.py +15 -0
  173. recurvedata/pigeon/connector/__init__.py +294 -0
  174. recurvedata/pigeon/connector/_registry.py +17 -0
  175. recurvedata/pigeon/connector/aliyun_oss.py +80 -0
  176. recurvedata/pigeon/connector/awss3.py +123 -0
  177. recurvedata/pigeon/connector/azure_blob.py +176 -0
  178. recurvedata/pigeon/connector/azure_synapse.py +51 -0
  179. recurvedata/pigeon/connector/cass.py +151 -0
  180. recurvedata/pigeon/connector/clickhouse.py +403 -0
  181. recurvedata/pigeon/connector/clickhouse_native.py +351 -0
  182. recurvedata/pigeon/connector/dbapi.py +571 -0
  183. recurvedata/pigeon/connector/doris.py +166 -0
  184. recurvedata/pigeon/connector/es.py +176 -0
  185. recurvedata/pigeon/connector/feishu.py +1135 -0
  186. recurvedata/pigeon/connector/ftp.py +163 -0
  187. recurvedata/pigeon/connector/google_bigquery.py +283 -0
  188. recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
  189. recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
  190. recurvedata/pigeon/connector/hdfs.py +204 -0
  191. recurvedata/pigeon/connector/hive_impala.py +383 -0
  192. recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
  193. recurvedata/pigeon/connector/mongodb.py +56 -0
  194. recurvedata/pigeon/connector/mssql.py +467 -0
  195. recurvedata/pigeon/connector/mysql.py +175 -0
  196. recurvedata/pigeon/connector/owncloud.py +92 -0
  197. recurvedata/pigeon/connector/postgresql.py +267 -0
  198. recurvedata/pigeon/connector/power_bi.py +179 -0
  199. recurvedata/pigeon/connector/qcloud_cos.py +79 -0
  200. recurvedata/pigeon/connector/redshift.py +123 -0
  201. recurvedata/pigeon/connector/sftp.py +73 -0
  202. recurvedata/pigeon/connector/sqlite.py +42 -0
  203. recurvedata/pigeon/connector/starrocks.py +144 -0
  204. recurvedata/pigeon/connector/tableau.py +162 -0
  205. recurvedata/pigeon/const.py +21 -0
  206. recurvedata/pigeon/csv.py +172 -0
  207. recurvedata/pigeon/docs/datasources-example.json +82 -0
  208. recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
  209. recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
  210. recurvedata/pigeon/dumper/__init__.py +171 -0
  211. recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
  212. recurvedata/pigeon/dumper/base.py +141 -0
  213. recurvedata/pigeon/dumper/cass.py +213 -0
  214. recurvedata/pigeon/dumper/dbapi.py +346 -0
  215. recurvedata/pigeon/dumper/es.py +112 -0
  216. recurvedata/pigeon/dumper/ftp.py +64 -0
  217. recurvedata/pigeon/dumper/mongodb.py +103 -0
  218. recurvedata/pigeon/handler/__init__.py +4 -0
  219. recurvedata/pigeon/handler/base.py +153 -0
  220. recurvedata/pigeon/handler/csv_handler.py +290 -0
  221. recurvedata/pigeon/loader/__init__.py +87 -0
  222. recurvedata/pigeon/loader/base.py +83 -0
  223. recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
  224. recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
  225. recurvedata/pigeon/loader/csv_to_doris.py +215 -0
  226. recurvedata/pigeon/loader/csv_to_es.py +51 -0
  227. recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
  228. recurvedata/pigeon/loader/csv_to_hive.py +468 -0
  229. recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
  230. recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
  231. recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
  232. recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
  233. recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
  234. recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
  235. recurvedata/pigeon/meta.py +116 -0
  236. recurvedata/pigeon/row_factory.py +42 -0
  237. recurvedata/pigeon/schema/__init__.py +124 -0
  238. recurvedata/pigeon/schema/types.py +13 -0
  239. recurvedata/pigeon/sync.py +283 -0
  240. recurvedata/pigeon/transformer.py +146 -0
  241. recurvedata/pigeon/utils/__init__.py +134 -0
  242. recurvedata/pigeon/utils/bloomfilter.py +181 -0
  243. recurvedata/pigeon/utils/date_time.py +323 -0
  244. recurvedata/pigeon/utils/escape.py +15 -0
  245. recurvedata/pigeon/utils/fs.py +266 -0
  246. recurvedata/pigeon/utils/json.py +44 -0
  247. recurvedata/pigeon/utils/keyed_tuple.py +85 -0
  248. recurvedata/pigeon/utils/mp.py +156 -0
  249. recurvedata/pigeon/utils/sql.py +328 -0
  250. recurvedata/pigeon/utils/timing.py +155 -0
  251. recurvedata/provider_manager.py +0 -0
  252. recurvedata/providers/__init__.py +0 -0
  253. recurvedata/providers/dbapi/__init__.py +0 -0
  254. recurvedata/providers/flywheel/__init__.py +0 -0
  255. recurvedata/providers/mysql/__init__.py +0 -0
  256. recurvedata/schedulers/__init__.py +1 -0
  257. recurvedata/schedulers/airflow.py +974 -0
  258. recurvedata/schedulers/airflow_db_process.py +331 -0
  259. recurvedata/schedulers/airflow_operators.py +61 -0
  260. recurvedata/schedulers/airflow_plugin.py +9 -0
  261. recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
  262. recurvedata/schedulers/base.py +99 -0
  263. recurvedata/schedulers/cli.py +228 -0
  264. recurvedata/schedulers/client.py +56 -0
  265. recurvedata/schedulers/consts.py +52 -0
  266. recurvedata/schedulers/debug_celery.py +62 -0
  267. recurvedata/schedulers/model.py +63 -0
  268. recurvedata/schedulers/schemas.py +97 -0
  269. recurvedata/schedulers/service.py +20 -0
  270. recurvedata/schedulers/system_dags.py +59 -0
  271. recurvedata/schedulers/task_status.py +279 -0
  272. recurvedata/schedulers/utils.py +73 -0
  273. recurvedata/schema/__init__.py +0 -0
  274. recurvedata/schema/field.py +88 -0
  275. recurvedata/schema/schema.py +55 -0
  276. recurvedata/schema/types.py +17 -0
  277. recurvedata/schema.py +0 -0
  278. recurvedata/server/__init__.py +0 -0
  279. recurvedata/server/app.py +7 -0
  280. recurvedata/server/connector/__init__.py +0 -0
  281. recurvedata/server/connector/api.py +79 -0
  282. recurvedata/server/connector/schemas.py +28 -0
  283. recurvedata/server/data_service/__init__.py +0 -0
  284. recurvedata/server/data_service/api.py +126 -0
  285. recurvedata/server/data_service/client.py +18 -0
  286. recurvedata/server/data_service/consts.py +1 -0
  287. recurvedata/server/data_service/schemas.py +68 -0
  288. recurvedata/server/data_service/service.py +218 -0
  289. recurvedata/server/dbt/__init__.py +0 -0
  290. recurvedata/server/dbt/api.py +116 -0
  291. recurvedata/server/error_code.py +49 -0
  292. recurvedata/server/exceptions.py +19 -0
  293. recurvedata/server/executor/__init__.py +0 -0
  294. recurvedata/server/executor/api.py +37 -0
  295. recurvedata/server/executor/schemas.py +30 -0
  296. recurvedata/server/executor/service.py +220 -0
  297. recurvedata/server/main.py +32 -0
  298. recurvedata/server/schedulers/__init__.py +0 -0
  299. recurvedata/server/schedulers/api.py +252 -0
  300. recurvedata/server/schedulers/schemas.py +50 -0
  301. recurvedata/server/schemas.py +50 -0
  302. recurvedata/utils/__init__.py +15 -0
  303. recurvedata/utils/_typer.py +61 -0
  304. recurvedata/utils/attrdict.py +19 -0
  305. recurvedata/utils/command_helper.py +20 -0
  306. recurvedata/utils/compat.py +12 -0
  307. recurvedata/utils/compression.py +203 -0
  308. recurvedata/utils/crontab.py +42 -0
  309. recurvedata/utils/crypto_util.py +305 -0
  310. recurvedata/utils/dataclass.py +11 -0
  311. recurvedata/utils/date_time.py +464 -0
  312. recurvedata/utils/dispatch.py +114 -0
  313. recurvedata/utils/email_util.py +104 -0
  314. recurvedata/utils/files.py +386 -0
  315. recurvedata/utils/helpers.py +170 -0
  316. recurvedata/utils/httputil.py +117 -0
  317. recurvedata/utils/imports.py +132 -0
  318. recurvedata/utils/json.py +80 -0
  319. recurvedata/utils/log.py +117 -0
  320. recurvedata/utils/log_capture.py +153 -0
  321. recurvedata/utils/mp.py +178 -0
  322. recurvedata/utils/normalizer.py +102 -0
  323. recurvedata/utils/redis_lock.py +474 -0
  324. recurvedata/utils/registry.py +54 -0
  325. recurvedata/utils/shell.py +15 -0
  326. recurvedata/utils/singleton.py +33 -0
  327. recurvedata/utils/sql.py +6 -0
  328. recurvedata/utils/timeout.py +28 -0
  329. recurvedata/utils/tracing.py +14 -0
  330. recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
  331. recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
  332. recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
  333. recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
@@ -0,0 +1,56 @@
1
+ import datetime
2
+ from collections import OrderedDict
3
+
4
+ import bson
5
+ import pymongo
6
+
7
+ from recurvedata.pigeon.connector._registry import register_connector_class
8
+ from recurvedata.pigeon.schema import Schema, types
9
+ from recurvedata.pigeon.utils import LoggingMixin
10
+
11
+
12
+ @register_connector_class(['mongodb'])
13
+ class MongoDBConnector(LoggingMixin):
14
+ def __init__(self, host=None, port=None, **kwargs):
15
+ self.host = host
16
+ self.port = port
17
+
18
+ kwargs.setdefault('document_class', OrderedDict)
19
+ self.kwargs = kwargs
20
+
21
+ def connect(self, **kwargs):
22
+ opts = self.kwargs.copy()
23
+ opts.update(kwargs)
24
+
25
+ for k, v in opts.copy().items():
26
+ try:
27
+ pymongo.common.validate(k, v)
28
+ except pymongo.errors.ConfigurationError as e:
29
+ opts.pop(k)
30
+
31
+ return pymongo.MongoClient(host=self.host, port=self.port, **opts)
32
+
33
+ def infer_schema(self, doc: dict):
34
+ schema = Schema()
35
+ for field, value in doc.items():
36
+ schema.add_field_by_attrs(field, self._infer_data_type(value))
37
+ return schema
38
+
39
+ def _infer_data_type(self, value):
40
+ if isinstance(value, float):
41
+ return types.FLOAT64
42
+ if isinstance(value, int):
43
+ return types.INT64
44
+ if isinstance(value, (str, bson.ObjectId)):
45
+ return types.STRING
46
+ if isinstance(value, datetime.datetime):
47
+ return types.DATETIME
48
+ if isinstance(value, bool):
49
+ return types.BOOLEAN
50
+
51
+ if isinstance(value, (list, dict)):
52
+ # 被 JSON 序列化
53
+ return types.JSON
54
+
55
+ # 其他类型都当作字符串
56
+ return types.STRING
@@ -0,0 +1,467 @@
1
+ import datetime
2
+ import os
3
+ import urllib
4
+ from collections import OrderedDict
5
+
6
+ import cytoolz as toolz
7
+ import pyodbc
8
+
9
+ from recurvedata.pigeon.connector._registry import register_connector_class
10
+ from recurvedata.pigeon.connector.azure_blob import AzureBlobConnector
11
+ from recurvedata.pigeon.connector.dbapi import DBAPIConnector
12
+ from recurvedata.pigeon.schema import types
13
+ from recurvedata.pigeon.utils import fs, md5hash, safe_int
14
+
15
+ # https://github.com/mkleehammer/pyodbc/wiki/Cursor#description
16
+ # The 'type code' value is the class type used to create the Python objects when reading rows.
17
+ # For example, a varchar column's type will be str.
18
+ _mssql_type_to_canonical_type = {
19
+ int: types.INT64,
20
+ float: types.FLOAT64,
21
+ bool: types.BOOLEAN,
22
+ datetime.datetime: types.DATETIME,
23
+ str: types.STRING,
24
+ }
25
+
26
+ _canonical_type_to_mssql_type = {
27
+ types.BOOLEAN: "BIT",
28
+ types.INT8: "TINYINT",
29
+ types.INT16: "SMALLINT",
30
+ types.INT32: "INT",
31
+ types.INT64: "BIGINT",
32
+ types.FLOAT32: "REAL",
33
+ types.FLOAT64: "DOUBLE PRECISION",
34
+ types.DATE: "DATE",
35
+ types.DATETIME: "DATETIME",
36
+ # 使用 NVARCHAR (national character varying) 来支持 unicode
37
+ types.STRING: "NVARCHAR",
38
+ types.JSON: "NVARCHAR",
39
+ }
40
+
41
+
42
+ @register_connector_class("mssql")
43
+ class SQLServerConnector(DBAPIConnector):
44
+ _sqla_driver = "mssql+pyodbc"
45
+ _identifier_start_quote = "["
46
+ _identifier_end_quote = "]"
47
+ _param_placeholder = "?"
48
+ _default_port = 1433
49
+ _autocommit = False
50
+
51
+ def __init__(
52
+ self,
53
+ host=None,
54
+ port=None,
55
+ database=None,
56
+ user=None,
57
+ password=None,
58
+ conn_string=None,
59
+ schema=None,
60
+ odbc_driver: str = "ODBC Driver 18 for SQL Server",
61
+ encrypt: bool = True,
62
+ trust_server_certificate: bool = False,
63
+ *args,
64
+ **kwargs,
65
+ ):
66
+ super().__init__(host, port, database, user, password, schema, *args, **kwargs)
67
+ self.odbc_driver = odbc_driver
68
+ self.encrypt = encrypt
69
+ self.trust_server_certificate = trust_server_certificate
70
+ if conn_string:
71
+ attrs = self.parse_conn_string(conn_string)
72
+ for k, v in attrs.items():
73
+ setattr(self, k, v)
74
+
75
+ @property
76
+ def conn_string(self):
77
+ # TODO: 使用传进来的 conn string 里相应参数
78
+ options = OrderedDict(
79
+ {
80
+ "Driver": f"{self.odbc_driver}",
81
+ "Server": f"tcp:{self.host},{self.port}",
82
+ "Database": self.database,
83
+ "Uid": self.user,
84
+ "Pwd": "{%s}" % self.password,
85
+ "Encrypt": "yes" if self.encrypt else "no",
86
+ "TrustServerCertificate": "yes" if self.trust_server_certificate else "no",
87
+ "Connection Timeout": 30,
88
+ }
89
+ )
90
+ options.update(self.kwargs.get("odbc_options", {}))
91
+ return ";".join([f"{k}={v}" for k, v in options.items()])
92
+
93
+ @staticmethod
94
+ def parse_conn_string(conn_string: str):
95
+ parts = conn_string.strip(";").split(";")
96
+ kvs = {}
97
+ for p in parts:
98
+ k, v = p.split("=")
99
+ kvs[k.lower()] = v
100
+
101
+ server = kvs["server"].split(":")[1].split(",")
102
+ return {
103
+ "host": server[0],
104
+ "port": int(server[1]),
105
+ "user": kvs["uid"],
106
+ "password": kvs["pwd"][1:-1], # remove leading and trailing {}
107
+ "database": kvs["database"],
108
+ }
109
+
110
+ def connect_impl(self, autocommit=None, *args, **kwargs):
111
+ if autocommit is None:
112
+ autocommit = self._autocommit
113
+ return pyodbc.connect(self.conn_string, autocommit=autocommit)
114
+
115
+ def cursor(self, autocommit=None, dryrun=False, commit_on_close=True, **kwargs):
116
+ if autocommit is None:
117
+ autocommit = self._autocommit
118
+ return super().cursor(autocommit, dryrun, commit_on_close, **kwargs)
119
+
120
+ def has_schema(self, schema):
121
+ rv = self.fetchone(f"SELECT * FROM sys.schemas WHERE name='{schema}'")
122
+ return bool(rv)
123
+
124
+ def has_table(self, table, schema=None, **kwargs):
125
+ schema, table = self._get_schema_table(table, schema)
126
+ schema = schema or "dbo"
127
+ query = f"""
128
+ SELECT name FROM sys.tables
129
+ WHERE schema_name(schema_id) = '{schema}' AND name = '{table}'
130
+ """
131
+ return bool(self.fetchall(query))
132
+
133
+ def create_schema(self, schema):
134
+ with self.cursor() as cursor:
135
+ cursor.execute(f"SELECT * FROM sys.schemas WHERE name='{schema}'")
136
+ exists = bool(cursor.fetchall())
137
+ if not exists:
138
+ cursor.execute(f"CREATE SCHEMA {self.quote_identifier(schema)}")
139
+
140
+ def create_master_key(self):
141
+ queries = """
142
+ IF NOT EXISTS (SELECT * FROM sys.symmetric_keys)
143
+ CREATE MASTER KEY
144
+ """
145
+ self.execute(queries)
146
+
147
+ def get_columns(self, table, schema=None, exclude=None):
148
+ schema, table = self._get_schema_table(table, schema)
149
+ if not self.has_table(table=table, schema=schema):
150
+ raise ValueError(f"Table {schema}.{table} not exists")
151
+
152
+ # the table/view name may be case-sensitive
153
+ query = f"""
154
+ SELECT column_name FROM INFORMATION_SCHEMA.COLUMNS
155
+ WHERE table_schema='{schema}' AND table_name = '{table}'
156
+ ORDER BY ordinal_position
157
+ """
158
+ rv = self.fetchall(query)
159
+ cols = [x[0] for x in rv]
160
+ if exclude:
161
+ cols = [x for x in cols if x not in exclude]
162
+ return cols
163
+
164
+ def drop_table_if_exists(self, schema, table, external_table=False):
165
+ schema, table = self._get_schema_table(table, schema)
166
+ external = " EXTERNAL " if external_table else " "
167
+ queries = f"""
168
+ IF EXISTS (
169
+ SELECT * FROM sys.tables WHERE SCHEMA_NAME(schema_id) = '{schema}' AND name = '{table}'
170
+ )
171
+ DROP {external} table {schema}.{table}
172
+ """
173
+ self.execute(queries)
174
+
175
+ def load_csv(
176
+ self,
177
+ table,
178
+ filename,
179
+ schema="dbo",
180
+ columns=None,
181
+ delimiter=",",
182
+ quotechar='"',
183
+ lineterminator="\r\n",
184
+ escapechar=None,
185
+ skiprows=0,
186
+ using_insert=None,
187
+ **kwargs,
188
+ ):
189
+ if not using_insert:
190
+ try:
191
+ options = dict(
192
+ columns=columns,
193
+ delimiter=delimiter,
194
+ quotechar=quotechar,
195
+ lineterminator=lineterminator,
196
+ escapechar=escapechar,
197
+ skiprows=skiprows,
198
+ )
199
+ options.update(**kwargs)
200
+ self.load_csv_bulk(table, filename, schema, **options)
201
+ except Exception as e:
202
+ self.logger.warning("bulk load local file is not supported, apply INSERT instead. error: %s", e)
203
+ else:
204
+ return
205
+
206
+ # SQL Server 有参数数量限制
207
+ # https://docs.microsoft.com/en-us/sql/sql-server/maximum-capacity-specifications-for-sql-server
208
+ num_params_limit = 2100 - 1
209
+ if not columns:
210
+ columns = self.get_columns(table=table, schema=schema)
211
+
212
+ batch_size = kwargs.get("batch_size", 1000)
213
+ new_batch_size = int(min(num_params_limit / len(columns), batch_size))
214
+ self.logger.info(
215
+ "table has %s columns, adjust batch_size from %s to %s", len(columns), batch_size, new_batch_size
216
+ )
217
+ kwargs["batch_size"] = new_batch_size
218
+
219
+ table = self._format_table_name(table, schema)
220
+ self.load_csv_by_inserting(
221
+ table, filename, columns, delimiter, quotechar, lineterminator, escapechar, skiprows, **kwargs
222
+ )
223
+
224
+ def load_csv_bulk(
225
+ self,
226
+ table,
227
+ filename,
228
+ schema="dbo",
229
+ columns=None,
230
+ delimiter=",",
231
+ quotechar='"',
232
+ lineterminator="\r\n",
233
+ escapechar=None,
234
+ skiprows=0,
235
+ **kwargs,
236
+ ):
237
+ raise NotImplementedError
238
+
239
+ def _format_table_name(self, table, schema):
240
+ if schema and "." not in table:
241
+ table = self.quote_identifier(f"{schema}.{table}")
242
+ return table
243
+
244
+ def _get_schema_table(self, table, schema):
245
+ if "." in table:
246
+ schema, table = table.split(".")
247
+ if not schema:
248
+ schema = "dbo"
249
+ return schema, table
250
+
251
+ @staticmethod
252
+ def to_canonical_type(type_code, size):
253
+ return _mssql_type_to_canonical_type.get(type_code, types.STRING)
254
+
255
+ @staticmethod
256
+ def from_canonical_type(canonical_type, size):
257
+ if canonical_type == types.STRING:
258
+ # 使用 4 个字节表示一个字符比较安全
259
+ # https://docs.microsoft.com/en-us/sql/t-sql/data-types/nchar-and-nvarchar-transact-sql?view=sql-server-2017#arguments
260
+ # max indicates that the maximum storage size is 2^30-1 characters
261
+ size = safe_int(size) * 4
262
+ if size > 4000:
263
+ size = "max"
264
+ elif size == 0:
265
+ size = "max"
266
+ mssql_type = f"NVARCHAR({size})"
267
+ else:
268
+ mssql_type = _canonical_type_to_mssql_type.get(canonical_type, "NVARCHAR(200)")
269
+ return mssql_type
270
+
271
+ def generate_ddl(self, table, schema="dbo", database=None, if_exists=True):
272
+ schema, table = self._get_schema_table(table, schema)
273
+ if not self.has_table(table, schema):
274
+ raise ValueError(f"Table {table!r} not exists in {database!r}")
275
+
276
+ query = f"""
277
+ SELECT column_name, data_type, character_maximum_length, is_nullable
278
+ FROM INFORMATION_SCHEMA.COLUMNS
279
+ WHERE table_schema = '{schema}' AND table_name = '{table}'
280
+ ORDER BY ordinal_position
281
+ """
282
+ with self.cursor() as cursor:
283
+ cursor.execute(query)
284
+ columns = cursor.fetchall()
285
+
286
+ col_definitions = []
287
+ # column_name, data_type, character_maximum_length, is_nullable
288
+ for col in columns:
289
+ dtype = col.data_type
290
+ if col.character_maximum_length:
291
+ dtype = f"{dtype}({col.character_maximum_length})"
292
+ null_modifier = "DEFAULT" if col.is_nullable == "YES" else "NOT"
293
+ definition = f"[{col.column_name}] {dtype.upper()} {null_modifier} NULL"
294
+ col_definitions.append(definition)
295
+
296
+ body = ",\n\t\t\t\t".join(col_definitions)
297
+ ddl = f"""
298
+ CREATE TABLE [{schema}].[{table}] (
299
+ {body}
300
+ )
301
+ """
302
+ if if_exists:
303
+ ddl = f"""
304
+ IF NOT EXISTS (
305
+ SELECT * FROM sys.tables
306
+ WHERE schema_name(schema_id) = '{schema}' AND name = '{table}'
307
+ )
308
+ {ddl}
309
+ """
310
+ return ddl
311
+
312
+ def is_mssql(self):
313
+ return True
314
+
315
+ def _get_sqlalchemy_uri(self):
316
+ return "mssql+pyodbc:///?odbc_connect=%s" % urllib.parse.quote_plus(self.conn_string)
317
+
318
+
319
+ # 兼容老代码
320
+ MSSQLConnector = SQLServerConnector
321
+
322
+
323
+ class BaseAzureSQLConnector(SQLServerConnector):
324
+ """Base class for Azure SQL based connectors (Synapse and Fabric)
325
+ Provides common functionality for Azure SQL services
326
+ reference:
327
+ - https://learn.microsoft.com/en-us/sql/t-sql/statements/copy-into-transact-sql?view=fabric
328
+ - https://learn.microsoft.com/en-us/sql/t-sql/statements/copy-into-transact-sql?view=azure-sqldw-latest
329
+ """
330
+
331
+ def _get_credential(self, blob: AzureBlobConnector) -> str:
332
+ """Get Azure Blob Storage credential for COPY INTO command.
333
+
334
+ Args:
335
+ blob: Azure Blob Storage connector instance
336
+
337
+ Returns:
338
+ str: Credential string for COPY INTO command
339
+ """
340
+ if blob.account_key:
341
+ return f"CREDENTIAL=(IDENTITY= 'Storage Account Key', SECRET='{blob.account_key}'),"
342
+ elif blob.sas_token:
343
+ return f"CREDENTIAL=(IDENTITY= 'Shared Access Signature', SECRET='{blob.sas_token}'),"
344
+ else:
345
+ return ""
346
+
347
+ def load_csv_bulk(
348
+ self,
349
+ table: str,
350
+ filename: str,
351
+ schema="dbo",
352
+ columns=None,
353
+ delimiter=",",
354
+ quotechar='"',
355
+ lineterminator="\r\n",
356
+ escapechar=None,
357
+ skiprows=0,
358
+ **kwargs,
359
+ ):
360
+ """
361
+ Bulk load data using COPY command for Azure SQL services
362
+
363
+ Args:
364
+ table: Target table name
365
+ filename: Source file path
366
+ schema: Schema name
367
+ columns: List of column names
368
+ delimiter: Field delimiter
369
+ quotechar: Quote character
370
+ lineterminator: Line terminator
371
+ escapechar: Escape character
372
+ skiprows: Number of rows to skip
373
+ **kwargs: Additional arguments
374
+ """
375
+ blob = self.create_blob_connector()
376
+ if not blob:
377
+ raise RuntimeError("blob storage is not configured")
378
+
379
+ # upload
380
+ if filename.endswith(".gz"):
381
+ file_to_upload = filename
382
+ else:
383
+ self.logger.info("compressing file %s", filename)
384
+ file_to_upload = fs.gzip_compress(filename, using_cmd=True)
385
+
386
+ if "." in table:
387
+ schema, table = table.split(".")
388
+
389
+ container = self.kwargs.get("blob_options", {}).get("container_name", self._generate_blob_container_name())
390
+ blob.create_container(container)
391
+ blob_name = f"{self.database}/{schema}/{table}/{os.path.basename(file_to_upload)}"
392
+ self.logger.info(f"uploading {file_to_upload} to {container}/{blob_name}")
393
+ blob_path = blob.upload(container, file_to_upload, blob_name)
394
+
395
+ if columns:
396
+ column_list = f'({", ".join(columns)})'
397
+ else:
398
+ column_list = ""
399
+
400
+ query = f"""
401
+ COPY INTO {self.quote_identifier(schema)}.{self.quote_identifier(table)} {column_list}
402
+ FROM '{blob.get_url(container, blob_name)}'
403
+ WITH (
404
+ FILE_TYPE = 'CSV',
405
+ {self._get_credential(blob)}
406
+ COMPRESSION = 'Gzip',
407
+ FIELDQUOTE = '{quotechar}',
408
+ FIELDTERMINATOR = '{delimiter}',
409
+ ROWTERMINATOR = '{lineterminator}',
410
+ FIRSTROW = {skiprows + 1}
411
+ )
412
+ OPTION (LABEL = 'COPY {schema}.{table}')
413
+ """
414
+ try:
415
+ self.logger.info("running COPY command")
416
+ self.execute(query, autocommit=False, commit_on_close=True)
417
+ self.logger.info("COPY finished")
418
+ except Exception as e:
419
+ self.logger.exception("failed to copy data to database")
420
+ raise e
421
+ finally:
422
+ if file_to_upload != filename:
423
+ self.logger.info("delete %s", file_to_upload)
424
+ fs.remove_files_safely(file_to_upload)
425
+
426
+ self.logger.info(f"delete blob: {blob_path}")
427
+ try:
428
+ blob.delete_blob(container, blob_name)
429
+ except Exception as e:
430
+ self.logger.error(f"operation on blob storage fails: {e}")
431
+
432
+ @toolz.memoize
433
+ def create_blob_connector(self):
434
+ """Create blob connector"""
435
+ blob_options = self.kwargs.get("blob_options")
436
+ if not blob_options:
437
+ return None
438
+ return AzureBlobConnector(**blob_options)
439
+
440
+ def _generate_blob_container_name(self):
441
+ """Generate blob container name that follows Azure naming rules:
442
+ - 3-63 characters long
443
+ - Lowercase letters, numbers, and hyphens only
444
+ - Must start and end with a letter or number
445
+ - No consecutive hyphens
446
+ """
447
+ # Get instance name and limit its length to 20 characters
448
+ instance = self.host.split(".", 1)[0][:20]
449
+ # Remove any non-alphanumeric characters and convert to lowercase
450
+ instance = "".join(c for c in instance if c.isalnum()).lower()
451
+ # Ensure instance is not empty
452
+ if not instance:
453
+ instance = "default"
454
+ # Generate container name with fixed prefix and limited length
455
+ container_name = f"pigeon-{instance}-{md5hash(self.host)[:8]}"
456
+ # Ensure total length is within limits (63 chars)
457
+ if len(container_name) > 63:
458
+ container_name = container_name[:63]
459
+ # Ensure name ends with alphanumeric
460
+ while not container_name[-1].isalnum():
461
+ container_name = container_name[:-1]
462
+ return container_name
463
+
464
+
465
+ @register_connector_class("azure_mssql")
466
+ class AzureSQLServerConnector(BaseAzureSQLConnector):
467
+ pass
@@ -0,0 +1,175 @@
1
+ import re
2
+
3
+ import cytoolz as toolz
4
+ import pymysql
5
+ import sqlalchemy
6
+ import sqlalchemy.engine.url
7
+ from pymysql.constants import FIELD_TYPE
8
+ from pymysql.converters import escape_string
9
+
10
+ from recurvedata.pigeon.connector._registry import register_connector_class
11
+ from recurvedata.pigeon.connector.dbapi import DBAPIConnector, _ShowTableLikeMixin
12
+ from recurvedata.pigeon.schema import types
13
+ from recurvedata.pigeon.utils import fs, safe_int
14
+
15
+ _mysql_type_to_canonical_type = {
16
+ FIELD_TYPE.TINY: types.INT8,
17
+ FIELD_TYPE.SHORT: types.INT16,
18
+ FIELD_TYPE.LONG: types.INT32,
19
+ FIELD_TYPE.LONGLONG: types.INT64,
20
+ FIELD_TYPE.INT24: types.INT64,
21
+ FIELD_TYPE.FLOAT: types.FLOAT32,
22
+ FIELD_TYPE.DOUBLE: types.FLOAT64,
23
+ FIELD_TYPE.DECIMAL: types.FLOAT64,
24
+ FIELD_TYPE.NEWDECIMAL: types.FLOAT64,
25
+
26
+ FIELD_TYPE.TIMESTAMP: types.DATETIME,
27
+ FIELD_TYPE.DATETIME: types.DATETIME,
28
+ FIELD_TYPE.DATE: types.DATE,
29
+
30
+ # others: types.STRING
31
+ }
32
+
33
+ _canonical_type_to_mysql_type = {
34
+ types.BOOLEAN: 'TINYINT',
35
+ types.INT8: 'TINYINT',
36
+ types.INT16: 'SMALLINT',
37
+ types.INT32: 'INT',
38
+ types.INT64: 'BIGINT',
39
+ types.FLOAT32: 'FLOAT',
40
+ types.FLOAT64: 'DOUBLE',
41
+
42
+ types.DATE: 'DATE',
43
+ types.DATETIME: 'DATETIME',
44
+
45
+ types.STRING: 'TEXT',
46
+ types.JSON: 'TEXT',
47
+ }
48
+
49
+
50
+ @register_connector_class(['mysql', 'tidb'])
51
+ class MySQLConnector(_ShowTableLikeMixin, DBAPIConnector):
52
+ _sqla_driver = 'mysql+pymysql'
53
+ _sqla_url_query = {'charset': 'utf8mb4'}
54
+ _default_port = 3306
55
+
56
+ def connect_impl(self, autocommit=False, *args, **kwargs):
57
+ kwargs.setdefault('cursorclass', pymysql.cursors.SSCursor)
58
+ return pymysql.connect(host=self.host,
59
+ port=self.port or 3306,
60
+ user=self.user,
61
+ password=self.password,
62
+ database=self.database,
63
+ charset='utf8mb4',
64
+ autocommit=autocommit,
65
+ *args, **kwargs)
66
+
67
+ def _get_sqlalchemy_uri(self):
68
+ url = sqlalchemy.engine.url.URL(drivername=self._sqla_driver, host=self.host, port=self.port,
69
+ username=self.user, password=self.password,
70
+ database=self.database or '',
71
+ query=self._sqla_url_query)
72
+ return url.__to_string__(hide_password=False)
73
+
74
+ @classmethod
75
+ def escape_string(cls, v):
76
+ return escape_string(v)
77
+
78
+ def load_csv(self, table, filename, columns=None, delimiter=',', quotechar='"',
79
+ lineterminator='\r\n', escapechar=None, skiprows=0, using_insert=False, **kwargs):
80
+ table = self.quote_identifier(table)
81
+ if using_insert:
82
+ method = self.load_csv_by_inserting
83
+ else:
84
+ if self.is_tidb():
85
+ method = self._load_csv_tidb
86
+ else:
87
+ method = self._load_csv_mysql
88
+ return method(table, filename, columns,
89
+ delimiter, quotechar, lineterminator, escapechar,
90
+ skiprows=skiprows, **kwargs)
91
+
92
+ def _load_csv_mysql(self, table, filename, columns=None, delimiter=',', quotechar='"',
93
+ lineterminator='\r\n', escapechar=None, skiprows=0, **kwargs):
94
+ if columns:
95
+ cols = '({})'.format(', '.join(columns))
96
+ else:
97
+ cols = ''
98
+
99
+ escape = "ESCAPED BY '{}'".format(escape_string(escapechar)) if escapechar else ''
100
+ lineterminator = escape_string(lineterminator)
101
+ ignore_lines = f'IGNORE {skiprows} LINES' if skiprows else ''
102
+ query = f'''
103
+ LOAD DATA LOCAL INFILE '{filename}'
104
+ INTO TABLE {table}
105
+ FIELDS TERMINATED BY '{delimiter}' ENCLOSED BY '{quotechar}' {escape}
106
+ LINES TERMINATED BY '{lineterminator}'
107
+ {ignore_lines}
108
+ {cols}
109
+ '''.strip()
110
+
111
+ self._log(query)
112
+ with self.cursor(local_infile=True) as cursor:
113
+ cursor.execute(query)
114
+
115
+ def _load_csv_tidb(self, table, filename, columns=None, delimiter=',', quotechar='"',
116
+ lineterminator='\r\n', escapechar=None, skiprows=0, **kwargs):
117
+ infile = filename
118
+ if skiprows:
119
+ infile = fs.skip_lines(filename, skiprows)
120
+ self._load_csv_mysql(table, infile, columns,
121
+ delimiter, quotechar, lineterminator, escapechar,
122
+ skiprows=0, **kwargs)
123
+ if infile != filename:
124
+ fs.remove_files_safely(infile)
125
+
126
+ def is_mysql(self):
127
+ return True
128
+
129
+ @toolz.memoize
130
+ def is_tidb(self):
131
+ with self.cursor() as cursor:
132
+ try:
133
+ cursor.execute('SELECT tidb_version()')
134
+ cursor.fetchall()
135
+ return True
136
+ except Exception as e:
137
+ return False
138
+
139
+ @staticmethod
140
+ def to_canonical_type(type_code, size):
141
+ return _mysql_type_to_canonical_type.get(type_code, types.STRING)
142
+
143
+ @staticmethod
144
+ def from_canonical_type(canonical_type, size):
145
+ if canonical_type == types.STRING:
146
+ mysql_type = 'TEXT'
147
+ size = safe_int(size)
148
+
149
+ # utf8mb4 uses 4 bytes for one rune
150
+ # 255 / 4 = 63
151
+ if 0 < size < 63:
152
+ mysql_type = 'VARCHAR(255)'
153
+ # 65535 / 4 = 16383
154
+ elif size >= 16383:
155
+ # MEDIUMTEXT is enough
156
+ mysql_type = 'MEDIUMTEXT'
157
+ else:
158
+ mysql_type = _canonical_type_to_mysql_type.get(canonical_type, 'TEXT')
159
+ return mysql_type
160
+
161
+ def generate_ddl(self, table, database=None, if_exists=True):
162
+ if database is None:
163
+ database = self.database
164
+ if not self.has_table(table, database):
165
+ raise ValueError(f'Table {table!r} not exists in {database!r}')
166
+
167
+ with self.cursor() as cursor:
168
+ cursor.execute(f'USE {self.quote_identifier(database)}')
169
+ cursor.execute(f'SHOW CREATE TABLE {self.quote_identifier(table)}')
170
+ if_exists_stmt = ' IF NOT EXISTS ' if if_exists else ' '
171
+ body = re.search(r'CREATE TABLE (.*)', cursor.fetchall()[0][1], flags=re.S).group(1)
172
+ return f'CREATE TABLE{if_exists_stmt}{body}'
173
+
174
+
175
+ TiDBConnector = MySQLConnector