recurvedata-lib 0.1.487__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recurvedata-lib might be problematic. Click here for more details.

Files changed (333) hide show
  1. recurvedata/__init__.py +0 -0
  2. recurvedata/__version__.py +1 -0
  3. recurvedata/client/__init__.py +3 -0
  4. recurvedata/client/client.py +150 -0
  5. recurvedata/client/server_client.py +91 -0
  6. recurvedata/config.py +99 -0
  7. recurvedata/connectors/__init__.py +20 -0
  8. recurvedata/connectors/_register.py +46 -0
  9. recurvedata/connectors/base.py +111 -0
  10. recurvedata/connectors/config_schema.py +1575 -0
  11. recurvedata/connectors/connectors/__init__.py +0 -0
  12. recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
  13. recurvedata/connectors/connectors/auth.py +44 -0
  14. recurvedata/connectors/connectors/azure_blob.py +89 -0
  15. recurvedata/connectors/connectors/azure_synapse.py +79 -0
  16. recurvedata/connectors/connectors/bigquery.py +359 -0
  17. recurvedata/connectors/connectors/clickhouse.py +219 -0
  18. recurvedata/connectors/connectors/dingtalk.py +61 -0
  19. recurvedata/connectors/connectors/doris.py +215 -0
  20. recurvedata/connectors/connectors/es.py +62 -0
  21. recurvedata/connectors/connectors/feishu.py +65 -0
  22. recurvedata/connectors/connectors/ftp.py +50 -0
  23. recurvedata/connectors/connectors/generic.py +49 -0
  24. recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
  25. recurvedata/connectors/connectors/google_service_account.py +225 -0
  26. recurvedata/connectors/connectors/hive.py +207 -0
  27. recurvedata/connectors/connectors/impala.py +210 -0
  28. recurvedata/connectors/connectors/jenkins.py +51 -0
  29. recurvedata/connectors/connectors/mail.py +89 -0
  30. recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
  31. recurvedata/connectors/connectors/mongo.py +79 -0
  32. recurvedata/connectors/connectors/mssql.py +131 -0
  33. recurvedata/connectors/connectors/mysql.py +191 -0
  34. recurvedata/connectors/connectors/n8n.py +141 -0
  35. recurvedata/connectors/connectors/oss.py +74 -0
  36. recurvedata/connectors/connectors/owncloud.py +36 -0
  37. recurvedata/connectors/connectors/phoenix.py +36 -0
  38. recurvedata/connectors/connectors/postgres.py +230 -0
  39. recurvedata/connectors/connectors/python.py +50 -0
  40. recurvedata/connectors/connectors/redshift.py +187 -0
  41. recurvedata/connectors/connectors/s3.py +93 -0
  42. recurvedata/connectors/connectors/sftp.py +87 -0
  43. recurvedata/connectors/connectors/slack.py +35 -0
  44. recurvedata/connectors/connectors/spark.py +99 -0
  45. recurvedata/connectors/connectors/starrocks.py +175 -0
  46. recurvedata/connectors/connectors/tencent_cos.py +40 -0
  47. recurvedata/connectors/connectors/tidb.py +49 -0
  48. recurvedata/connectors/const.py +315 -0
  49. recurvedata/connectors/datasource.py +189 -0
  50. recurvedata/connectors/dbapi.py +469 -0
  51. recurvedata/connectors/fs.py +66 -0
  52. recurvedata/connectors/ftp.py +40 -0
  53. recurvedata/connectors/object_store.py +60 -0
  54. recurvedata/connectors/pigeon.py +172 -0
  55. recurvedata/connectors/proxy.py +104 -0
  56. recurvedata/connectors/service.py +223 -0
  57. recurvedata/connectors/utils.py +47 -0
  58. recurvedata/consts.py +49 -0
  59. recurvedata/core/__init__.py +0 -0
  60. recurvedata/core/config.py +46 -0
  61. recurvedata/core/configurable.py +27 -0
  62. recurvedata/core/consts.py +2 -0
  63. recurvedata/core/templating.py +206 -0
  64. recurvedata/core/tracing.py +223 -0
  65. recurvedata/core/transformer.py +186 -0
  66. recurvedata/core/translation.py +91 -0
  67. recurvedata/dbt/client.py +97 -0
  68. recurvedata/dbt/consts.py +99 -0
  69. recurvedata/dbt/cosmos_utils.py +275 -0
  70. recurvedata/dbt/error_codes.py +18 -0
  71. recurvedata/dbt/schemas.py +98 -0
  72. recurvedata/dbt/service.py +451 -0
  73. recurvedata/dbt/utils.py +246 -0
  74. recurvedata/error_codes.py +71 -0
  75. recurvedata/exceptions.py +72 -0
  76. recurvedata/executors/__init__.py +4 -0
  77. recurvedata/executors/cli/__init__.py +7 -0
  78. recurvedata/executors/cli/connector.py +117 -0
  79. recurvedata/executors/cli/dbt.py +118 -0
  80. recurvedata/executors/cli/main.py +82 -0
  81. recurvedata/executors/cli/parameters.py +18 -0
  82. recurvedata/executors/client.py +190 -0
  83. recurvedata/executors/consts.py +50 -0
  84. recurvedata/executors/debug_executor.py +100 -0
  85. recurvedata/executors/executor.py +300 -0
  86. recurvedata/executors/link_executor.py +189 -0
  87. recurvedata/executors/models.py +34 -0
  88. recurvedata/executors/schemas.py +222 -0
  89. recurvedata/executors/service/__init__.py +0 -0
  90. recurvedata/executors/service/connector.py +380 -0
  91. recurvedata/executors/utils.py +172 -0
  92. recurvedata/filestorage/__init__.py +11 -0
  93. recurvedata/filestorage/_factory.py +33 -0
  94. recurvedata/filestorage/backends/__init__.py +0 -0
  95. recurvedata/filestorage/backends/fsspec.py +45 -0
  96. recurvedata/filestorage/backends/local.py +67 -0
  97. recurvedata/filestorage/backends/oss.py +56 -0
  98. recurvedata/filestorage/interface.py +84 -0
  99. recurvedata/operators/__init__.py +10 -0
  100. recurvedata/operators/base.py +28 -0
  101. recurvedata/operators/config.py +21 -0
  102. recurvedata/operators/context.py +255 -0
  103. recurvedata/operators/dbt_operator/__init__.py +2 -0
  104. recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
  105. recurvedata/operators/dbt_operator/operator.py +353 -0
  106. recurvedata/operators/link_operator/__init__.py +1 -0
  107. recurvedata/operators/link_operator/operator.py +120 -0
  108. recurvedata/operators/models.py +55 -0
  109. recurvedata/operators/notify_operator/__init__.py +1 -0
  110. recurvedata/operators/notify_operator/operator.py +180 -0
  111. recurvedata/operators/operator.py +119 -0
  112. recurvedata/operators/python_operator/__init__.py +1 -0
  113. recurvedata/operators/python_operator/operator.py +132 -0
  114. recurvedata/operators/sensor_operator/__init__.py +1 -0
  115. recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
  116. recurvedata/operators/sensor_operator/operator.py +172 -0
  117. recurvedata/operators/spark_operator/__init__.py +1 -0
  118. recurvedata/operators/spark_operator/operator.py +200 -0
  119. recurvedata/operators/spark_operator/spark_sample.py +47 -0
  120. recurvedata/operators/sql_operator/__init__.py +1 -0
  121. recurvedata/operators/sql_operator/operator.py +90 -0
  122. recurvedata/operators/task.py +211 -0
  123. recurvedata/operators/transfer_operator/__init__.py +40 -0
  124. recurvedata/operators/transfer_operator/const.py +10 -0
  125. recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
  126. recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
  127. recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
  128. recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
  129. recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
  130. recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
  131. recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
  132. recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
  133. recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
  134. recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
  135. recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
  136. recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
  137. recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
  138. recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
  139. recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
  140. recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
  141. recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
  142. recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
  143. recurvedata/operators/transfer_operator/load_task_email.py +188 -0
  144. recurvedata/operators/transfer_operator/load_task_es.py +86 -0
  145. recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
  146. recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
  147. recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
  148. recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
  149. recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
  150. recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
  151. recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
  152. recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
  153. recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
  154. recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
  155. recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
  156. recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
  157. recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
  158. recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
  159. recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
  160. recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
  161. recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
  162. recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
  163. recurvedata/operators/transfer_operator/mixin.py +31 -0
  164. recurvedata/operators/transfer_operator/operator.py +231 -0
  165. recurvedata/operators/transfer_operator/task.py +223 -0
  166. recurvedata/operators/transfer_operator/utils.py +134 -0
  167. recurvedata/operators/ui.py +80 -0
  168. recurvedata/operators/utils/__init__.py +51 -0
  169. recurvedata/operators/utils/file_factory.py +150 -0
  170. recurvedata/operators/utils/fs.py +10 -0
  171. recurvedata/operators/utils/lineage.py +265 -0
  172. recurvedata/operators/web_init.py +15 -0
  173. recurvedata/pigeon/connector/__init__.py +294 -0
  174. recurvedata/pigeon/connector/_registry.py +17 -0
  175. recurvedata/pigeon/connector/aliyun_oss.py +80 -0
  176. recurvedata/pigeon/connector/awss3.py +123 -0
  177. recurvedata/pigeon/connector/azure_blob.py +176 -0
  178. recurvedata/pigeon/connector/azure_synapse.py +51 -0
  179. recurvedata/pigeon/connector/cass.py +151 -0
  180. recurvedata/pigeon/connector/clickhouse.py +403 -0
  181. recurvedata/pigeon/connector/clickhouse_native.py +351 -0
  182. recurvedata/pigeon/connector/dbapi.py +571 -0
  183. recurvedata/pigeon/connector/doris.py +166 -0
  184. recurvedata/pigeon/connector/es.py +176 -0
  185. recurvedata/pigeon/connector/feishu.py +1135 -0
  186. recurvedata/pigeon/connector/ftp.py +163 -0
  187. recurvedata/pigeon/connector/google_bigquery.py +283 -0
  188. recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
  189. recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
  190. recurvedata/pigeon/connector/hdfs.py +204 -0
  191. recurvedata/pigeon/connector/hive_impala.py +383 -0
  192. recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
  193. recurvedata/pigeon/connector/mongodb.py +56 -0
  194. recurvedata/pigeon/connector/mssql.py +467 -0
  195. recurvedata/pigeon/connector/mysql.py +175 -0
  196. recurvedata/pigeon/connector/owncloud.py +92 -0
  197. recurvedata/pigeon/connector/postgresql.py +267 -0
  198. recurvedata/pigeon/connector/power_bi.py +179 -0
  199. recurvedata/pigeon/connector/qcloud_cos.py +79 -0
  200. recurvedata/pigeon/connector/redshift.py +123 -0
  201. recurvedata/pigeon/connector/sftp.py +73 -0
  202. recurvedata/pigeon/connector/sqlite.py +42 -0
  203. recurvedata/pigeon/connector/starrocks.py +144 -0
  204. recurvedata/pigeon/connector/tableau.py +162 -0
  205. recurvedata/pigeon/const.py +21 -0
  206. recurvedata/pigeon/csv.py +172 -0
  207. recurvedata/pigeon/docs/datasources-example.json +82 -0
  208. recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
  209. recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
  210. recurvedata/pigeon/dumper/__init__.py +171 -0
  211. recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
  212. recurvedata/pigeon/dumper/base.py +141 -0
  213. recurvedata/pigeon/dumper/cass.py +213 -0
  214. recurvedata/pigeon/dumper/dbapi.py +346 -0
  215. recurvedata/pigeon/dumper/es.py +112 -0
  216. recurvedata/pigeon/dumper/ftp.py +64 -0
  217. recurvedata/pigeon/dumper/mongodb.py +103 -0
  218. recurvedata/pigeon/handler/__init__.py +4 -0
  219. recurvedata/pigeon/handler/base.py +153 -0
  220. recurvedata/pigeon/handler/csv_handler.py +290 -0
  221. recurvedata/pigeon/loader/__init__.py +87 -0
  222. recurvedata/pigeon/loader/base.py +83 -0
  223. recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
  224. recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
  225. recurvedata/pigeon/loader/csv_to_doris.py +215 -0
  226. recurvedata/pigeon/loader/csv_to_es.py +51 -0
  227. recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
  228. recurvedata/pigeon/loader/csv_to_hive.py +468 -0
  229. recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
  230. recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
  231. recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
  232. recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
  233. recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
  234. recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
  235. recurvedata/pigeon/meta.py +116 -0
  236. recurvedata/pigeon/row_factory.py +42 -0
  237. recurvedata/pigeon/schema/__init__.py +124 -0
  238. recurvedata/pigeon/schema/types.py +13 -0
  239. recurvedata/pigeon/sync.py +283 -0
  240. recurvedata/pigeon/transformer.py +146 -0
  241. recurvedata/pigeon/utils/__init__.py +134 -0
  242. recurvedata/pigeon/utils/bloomfilter.py +181 -0
  243. recurvedata/pigeon/utils/date_time.py +323 -0
  244. recurvedata/pigeon/utils/escape.py +15 -0
  245. recurvedata/pigeon/utils/fs.py +266 -0
  246. recurvedata/pigeon/utils/json.py +44 -0
  247. recurvedata/pigeon/utils/keyed_tuple.py +85 -0
  248. recurvedata/pigeon/utils/mp.py +156 -0
  249. recurvedata/pigeon/utils/sql.py +328 -0
  250. recurvedata/pigeon/utils/timing.py +155 -0
  251. recurvedata/provider_manager.py +0 -0
  252. recurvedata/providers/__init__.py +0 -0
  253. recurvedata/providers/dbapi/__init__.py +0 -0
  254. recurvedata/providers/flywheel/__init__.py +0 -0
  255. recurvedata/providers/mysql/__init__.py +0 -0
  256. recurvedata/schedulers/__init__.py +1 -0
  257. recurvedata/schedulers/airflow.py +974 -0
  258. recurvedata/schedulers/airflow_db_process.py +331 -0
  259. recurvedata/schedulers/airflow_operators.py +61 -0
  260. recurvedata/schedulers/airflow_plugin.py +9 -0
  261. recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
  262. recurvedata/schedulers/base.py +99 -0
  263. recurvedata/schedulers/cli.py +228 -0
  264. recurvedata/schedulers/client.py +56 -0
  265. recurvedata/schedulers/consts.py +52 -0
  266. recurvedata/schedulers/debug_celery.py +62 -0
  267. recurvedata/schedulers/model.py +63 -0
  268. recurvedata/schedulers/schemas.py +97 -0
  269. recurvedata/schedulers/service.py +20 -0
  270. recurvedata/schedulers/system_dags.py +59 -0
  271. recurvedata/schedulers/task_status.py +279 -0
  272. recurvedata/schedulers/utils.py +73 -0
  273. recurvedata/schema/__init__.py +0 -0
  274. recurvedata/schema/field.py +88 -0
  275. recurvedata/schema/schema.py +55 -0
  276. recurvedata/schema/types.py +17 -0
  277. recurvedata/schema.py +0 -0
  278. recurvedata/server/__init__.py +0 -0
  279. recurvedata/server/app.py +7 -0
  280. recurvedata/server/connector/__init__.py +0 -0
  281. recurvedata/server/connector/api.py +79 -0
  282. recurvedata/server/connector/schemas.py +28 -0
  283. recurvedata/server/data_service/__init__.py +0 -0
  284. recurvedata/server/data_service/api.py +126 -0
  285. recurvedata/server/data_service/client.py +18 -0
  286. recurvedata/server/data_service/consts.py +1 -0
  287. recurvedata/server/data_service/schemas.py +68 -0
  288. recurvedata/server/data_service/service.py +218 -0
  289. recurvedata/server/dbt/__init__.py +0 -0
  290. recurvedata/server/dbt/api.py +116 -0
  291. recurvedata/server/error_code.py +49 -0
  292. recurvedata/server/exceptions.py +19 -0
  293. recurvedata/server/executor/__init__.py +0 -0
  294. recurvedata/server/executor/api.py +37 -0
  295. recurvedata/server/executor/schemas.py +30 -0
  296. recurvedata/server/executor/service.py +220 -0
  297. recurvedata/server/main.py +32 -0
  298. recurvedata/server/schedulers/__init__.py +0 -0
  299. recurvedata/server/schedulers/api.py +252 -0
  300. recurvedata/server/schedulers/schemas.py +50 -0
  301. recurvedata/server/schemas.py +50 -0
  302. recurvedata/utils/__init__.py +15 -0
  303. recurvedata/utils/_typer.py +61 -0
  304. recurvedata/utils/attrdict.py +19 -0
  305. recurvedata/utils/command_helper.py +20 -0
  306. recurvedata/utils/compat.py +12 -0
  307. recurvedata/utils/compression.py +203 -0
  308. recurvedata/utils/crontab.py +42 -0
  309. recurvedata/utils/crypto_util.py +305 -0
  310. recurvedata/utils/dataclass.py +11 -0
  311. recurvedata/utils/date_time.py +464 -0
  312. recurvedata/utils/dispatch.py +114 -0
  313. recurvedata/utils/email_util.py +104 -0
  314. recurvedata/utils/files.py +386 -0
  315. recurvedata/utils/helpers.py +170 -0
  316. recurvedata/utils/httputil.py +117 -0
  317. recurvedata/utils/imports.py +132 -0
  318. recurvedata/utils/json.py +80 -0
  319. recurvedata/utils/log.py +117 -0
  320. recurvedata/utils/log_capture.py +153 -0
  321. recurvedata/utils/mp.py +178 -0
  322. recurvedata/utils/normalizer.py +102 -0
  323. recurvedata/utils/redis_lock.py +474 -0
  324. recurvedata/utils/registry.py +54 -0
  325. recurvedata/utils/shell.py +15 -0
  326. recurvedata/utils/singleton.py +33 -0
  327. recurvedata/utils/sql.py +6 -0
  328. recurvedata/utils/timeout.py +28 -0
  329. recurvedata/utils/tracing.py +14 -0
  330. recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
  331. recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
  332. recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
  333. recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
@@ -0,0 +1,163 @@
1
+ import datetime
2
+ import ftplib
3
+ import logging
4
+ import os
5
+ import shutil
6
+ import time
7
+
8
+ import humanize
9
+
10
+ from recurvedata.pigeon.connector._registry import register_connector_class
11
+ from recurvedata.pigeon.utils import LoggingMixin
12
+
13
+
14
+ @register_connector_class("ftp")
15
+ class FtpConnector(LoggingMixin):
16
+ def __init__(self, host, user, password, **kwargs):
17
+ self.host = host
18
+ self.username = user
19
+ self.password = password
20
+ self.port = kwargs.pop("port", ftplib.FTP_PORT)
21
+
22
+ # The ftplib.FTP is stupid, it doesn't support the port parameter in the constructor
23
+ # so we have to use the connect method to specify the port
24
+ if self.port == ftplib.FTP_PORT:
25
+ self.ftp = ftplib.FTP(host=self.host, user=self.username, passwd=self.password, **kwargs)
26
+ else:
27
+ self.ftp = ftplib.FTP(**kwargs)
28
+ self.ftp.connect(self.host, self.port)
29
+ if user:
30
+ self.ftp.login(user=user, passwd=password, acct=kwargs.get("acct", ""))
31
+
32
+ def close(self):
33
+ ftp = self.ftp
34
+ ftp.quit()
35
+ self.ftp = None
36
+
37
+ def list_dir(self, path):
38
+ try:
39
+ return self.ftp.nlst(path)
40
+ except ftplib.all_errors:
41
+ return []
42
+
43
+ def rename(self, from_name, to_name):
44
+ return self.ftp.rename(from_name, to_name)
45
+
46
+ def makedir(self, path):
47
+ self.ftp.mkd(path)
48
+
49
+ def rmdir(self, path):
50
+ self.ftp.rmd(path)
51
+
52
+ def rm(self, name):
53
+ self.ftp.delete(name)
54
+
55
+ def pwd(self):
56
+ return self.ftp.pwd()
57
+
58
+ def size(self, name):
59
+ return self.ftp.size(name)
60
+
61
+ def is_ftp_dir(self, path):
62
+ original_cwd = self.pwd()
63
+ try:
64
+ self.ftp.cwd(path)
65
+ self.ftp.cwd(original_cwd)
66
+ return True
67
+ except ftplib.all_errors:
68
+ return False
69
+
70
+ def download_file(self, src_file, dst_file):
71
+ exists = True
72
+ local_dir = os.path.dirname(dst_file)
73
+ if not os.path.exists(local_dir):
74
+ exists = False
75
+ os.makedirs(local_dir)
76
+ try:
77
+ total_bytes = self.size(src_file)
78
+ with open(dst_file, "wb") as f:
79
+ writer = StatsReaderWriter(f, total_bytes)
80
+ self.ftp.retrbinary(f"RETR {src_file}", writer.write)
81
+ writer.show_stat()
82
+ self.logger.info(f"successfully downloaded {src_file} to {dst_file}")
83
+ except ftplib.all_errors as e:
84
+ os.unlink(dst_file)
85
+ if not exists:
86
+ shutil.rmtree(local_dir)
87
+ self.logger.exception(f"failed to download {src_file}")
88
+ raise e
89
+
90
+ def upload_file(self, src_file, dst_file):
91
+ if not os.path.isfile(src_file):
92
+ raise ValueError(f"{src_file} is not a file")
93
+ try:
94
+ total_bytes = os.stat(src_file).st_size
95
+ with open(src_file, "rb") as f:
96
+ reader = StatsReaderWriter(f, total_bytes)
97
+ self.ftp.storbinary(f"STOR {dst_file}", reader)
98
+ reader.show_stat()
99
+ self.logger.info(f"successfully uploaded {src_file} to {dst_file}")
100
+ except ftplib.all_errors as e:
101
+ self.logger.exception(f"failed to upload {src_file}")
102
+ raise e
103
+
104
+
105
+ class StatsReaderWriter(object):
106
+ def __init__(self, fp, total_bytes, show_stats_bytes=1024 * 1024):
107
+ self.fp = fp
108
+ self.total_bytes = total_bytes
109
+ self.show_stats_bytes = show_stats_bytes
110
+
111
+ self._transferred_bytes = 0
112
+ self._start_time = time.time()
113
+ self._end_time = 0
114
+
115
+ def read(self, n):
116
+ rv = self.fp.read(n)
117
+ self._incr_transferred_bytes(n)
118
+ return rv
119
+
120
+ def write(self, data):
121
+ rv = self.fp.write(data)
122
+ self._incr_transferred_bytes(len(data))
123
+ return rv
124
+
125
+ def close(self):
126
+ if self.fp.closed:
127
+ if self._end_time == 0:
128
+ self._end_time = time.time()
129
+ return
130
+ try:
131
+ self.fp.close()
132
+ except Exception:
133
+ pass
134
+ self._end_time = time.time()
135
+
136
+ def _incr_transferred_bytes(self, n):
137
+ for _ in range(n):
138
+ self._transferred_bytes += 1
139
+ if self._transferred_bytes % self.show_stats_bytes == 0:
140
+ self.show_stat()
141
+
142
+ def show_stat(self):
143
+ if self._end_time == 0:
144
+ end_time = time.time()
145
+ else:
146
+ end_time = self._end_time
147
+ duration = end_time - self._start_time
148
+ if duration == 0:
149
+ speed = 0
150
+ else:
151
+ speed = self._transferred_bytes / duration
152
+
153
+ if self.total_bytes == 0:
154
+ progress = 0
155
+ else:
156
+ progress = 100 * self._transferred_bytes / self.total_bytes
157
+ logging.info(
158
+ "transferred %s in %s, average speed: %s/s, progress: %.2f%%",
159
+ humanize.naturalsize(self._transferred_bytes, gnu=True),
160
+ datetime.timedelta(seconds=duration),
161
+ humanize.naturalsize(speed, gnu=True),
162
+ progress,
163
+ )
@@ -0,0 +1,283 @@
1
+ import copy
2
+ import os
3
+ from urllib import parse
4
+
5
+ import cytoolz as toolz
6
+ from google import auth
7
+ from google.cloud.bigquery import Client, LoadJobConfig, SourceFormat, dbapi, enums, job
8
+ from google.cloud.bigquery.dataset import DatasetReference
9
+ from google.cloud.bigquery.table import TableReference
10
+ from google.cloud.exceptions import BadRequest, NotFound
11
+ from google.oauth2 import service_account
12
+ from requests import Session
13
+
14
+ from recurvedata.pigeon.connector._registry import register_connector_class
15
+ from recurvedata.pigeon.connector.dbapi import ClosingCursor, DBAPIConnector, NullCursor
16
+ from recurvedata.pigeon.schema import types
17
+
18
+ _bigquery_type_to_canonical_type = {
19
+ enums.SqlTypeNames.STRING: types.STRING,
20
+ enums.SqlTypeNames.INT64: types.INT64,
21
+ enums.SqlTypeNames.INTEGER: types.INT64,
22
+ enums.SqlTypeNames.FLOAT: types.FLOAT64,
23
+ enums.SqlTypeNames.FLOAT64: types.FLOAT64,
24
+ enums.SqlTypeNames.NUMERIC: types.FLOAT64,
25
+ enums.SqlTypeNames.BOOLEAN: types.BOOLEAN,
26
+ enums.SqlTypeNames.BOOL: types.BOOLEAN,
27
+ enums.SqlTypeNames.TIMESTAMP: types.DATETIME,
28
+ enums.SqlTypeNames.DATETIME: types.DATETIME,
29
+ enums.SqlTypeNames.DATE: types.DATE,
30
+ }
31
+
32
+ GOOGLE_DRIVE_API = (
33
+ "https://www.googleapis.com/auth/drive" # external table linked with google sheet, need google drive api enabled
34
+ )
35
+
36
+
37
+ class IterCursor(ClosingCursor):
38
+ def __init__(self, connection, commit_on_close=True, pagesize=None):
39
+ super().__init__(connection, commit_on_close)
40
+ self._cursor.arraysize = pagesize
41
+
42
+ def __iter__(self):
43
+ self._cursor._try_fetch()
44
+ return self._cursor._query_data
45
+
46
+
47
+ @register_connector_class(["google_bigquery", "gbq"])
48
+ class GoogleBigqueryConnector(DBAPIConnector):
49
+ _sqla_driver = "bigquery"
50
+
51
+ if GOOGLE_DRIVE_API in Client.SCOPE:
52
+ _scopes = Client.SCOPE
53
+ else:
54
+ _scopes = Client.SCOPE + (GOOGLE_DRIVE_API,)
55
+
56
+ def __init__(
57
+ self,
58
+ key_path: str = None,
59
+ key_dict: dict = None,
60
+ project: str = None,
61
+ http: Session = None,
62
+ proxies: dict = None,
63
+ location: str = None,
64
+ dataset: str = None,
65
+ pagesize: int = None,
66
+ *args,
67
+ **kwargs,
68
+ ):
69
+ """
70
+ instance of gbq
71
+ :param project: project_id
72
+ :param key_path: path to json key file
73
+ :param key_dict: dict of key
74
+ :param http: requests session
75
+ :param proxies: proxy
76
+ :param location: location
77
+ :param dataset: dataset_id
78
+ """
79
+ super().__init__(host=None, database=dataset, *args, **kwargs)
80
+ self._project_id = project
81
+ self._key_path = key_path
82
+ self._key_dict = key_dict
83
+ self._http = http
84
+ self._proxies = proxies
85
+ self._location = location
86
+ self.dataset = dataset
87
+ self.pagesize = pagesize
88
+
89
+ def is_google_bigquery(self):
90
+ return True
91
+
92
+ def get_credentials(self):
93
+ if not any([self._key_path, self._key_dict]):
94
+ credentials, project_id = auth.default(scopes=self._scopes, request=self._http)
95
+ self._project_id = self._project_id or project_id
96
+ elif self._key_path:
97
+ credentials = service_account.Credentials.from_service_account_file(
98
+ filename=self._key_path, scopes=self._scopes
99
+ )
100
+ else:
101
+ _key_dict = copy.deepcopy(self._key_dict)
102
+ # Fix private key format with robust conversion
103
+ _key_dict["private_key"] = self._convert_private_key(_key_dict["private_key"])
104
+ credentials = service_account.Credentials.from_service_account_info(info=_key_dict, scopes=self._scopes)
105
+ self._project_id = self._project_id or credentials.project_id
106
+
107
+ return credentials
108
+
109
+ @staticmethod
110
+ def _convert_private_key(private_key: str) -> str:
111
+ """
112
+ Convert private key from various escape formats to proper PEM format.
113
+ Handles multiple levels of escaping that can occur during transmission/storage.
114
+ """
115
+ if not private_key:
116
+ return private_key
117
+
118
+ # Remove any leading/trailing whitespace
119
+ private_key = private_key.strip()
120
+
121
+ # Handle various escape sequence patterns
122
+ # Multiple replacement passes to handle nested escaping
123
+
124
+ # Replace quadruple-escaped newlines (\\\\n -> \\n)
125
+ private_key = private_key.replace("\\\\n", "\\n")
126
+
127
+ # Replace double-escaped newlines (\\n -> \n)
128
+ private_key = private_key.replace("\\n", "\n")
129
+
130
+ # Handle edge case where literal \n strings need to become actual newlines
131
+ # This covers cases where the key was stored as a literal string
132
+ if "-----BEGIN PRIVATE KEY-----" in private_key and "\n" not in private_key:
133
+ # If we have the BEGIN marker but no actual newlines, it's likely escaped
134
+ private_key = private_key.replace("-----BEGIN PRIVATE KEY-----", "-----BEGIN PRIVATE KEY-----\n")
135
+ private_key = private_key.replace("-----END PRIVATE KEY-----", "\n-----END PRIVATE KEY-----")
136
+
137
+ # Split the key content and add newlines every 64 characters (standard PEM format)
138
+ lines = private_key.split('\n')
139
+ if len(lines) >= 2:
140
+ # Extract the key content between BEGIN and END
141
+ begin_line = lines[0]
142
+ end_line = lines[-1]
143
+ key_content = ''.join(lines[1:-1])
144
+
145
+ # Split key content into 64-character lines
146
+ formatted_lines = [begin_line]
147
+ for i in range(0, len(key_content), 64):
148
+ formatted_lines.append(key_content[i:i+64])
149
+ formatted_lines.append(end_line)
150
+
151
+ private_key = '\n'.join(formatted_lines)
152
+
153
+ return private_key
154
+
155
+ @toolz.memoize
156
+ def connect_impl(self, *args, **kwargs):
157
+ return dbapi.connect(client=self.client())
158
+
159
+ def client(self):
160
+ if self._proxies:
161
+ for scheme in ["http", "https"]:
162
+ os.environ[f"{scheme}_proxy"] = self._proxies[scheme]
163
+
164
+ client = Client(
165
+ project=self._project_id, credentials=self.get_credentials(), location=self._location, _http=self._http
166
+ )
167
+ return client
168
+
169
+ def cursor(self, autocommit=False, dryrun=False, commit_on_close=True, **kwargs):
170
+ if dryrun:
171
+ return NullCursor()
172
+ conn = self.connect(autocommit, **kwargs)
173
+ return IterCursor(conn, commit_on_close=commit_on_close, pagesize=self.pagesize)
174
+
175
+ def _get_sqlalchemy_uri(self):
176
+ params = {"location": self._location}
177
+ if self._key_path:
178
+ params.update({"credentials_path": self._key_path})
179
+ uri = f"{self._sqla_driver}://{self._project_id}"
180
+ if self.dataset:
181
+ uri = os.path.join(uri, self.dataset)
182
+ return f"{uri}?{parse.urlencode(params)}"
183
+
184
+ def get_pandas_df(self, query, parameters=None, **kwargs):
185
+ """Get pandas dataframe
186
+ Note: pd.read_gbq 无法正常工作,改用 client to_dataframe()
187
+ """
188
+ format_operation = dbapi.cursor._format_operation(query, parameters=parameters)
189
+ query_parameters = dbapi._helpers.to_query_parameters(parameters)
190
+ config = job.QueryJobConfig(use_legacy_sql=False)
191
+ config.query_parameters = query_parameters
192
+ result = self.client().query(format_operation, job_config=config).result()
193
+ return result.to_dataframe()
194
+
195
+ def table_ref(self, table, dataset):
196
+ return TableReference(DatasetReference(self._project_id, dataset), table)
197
+
198
+ def has_table(self, table, dataset=None, **kwargs):
199
+ if dataset is None:
200
+ dataset = self.dataset
201
+ try:
202
+ self.client().get_table(self.table_ref(table, dataset))
203
+ return True
204
+ except NotFound:
205
+ return False
206
+
207
+ def list_partitions(self, table, dataset):
208
+ """查询 partition keys"""
209
+ try:
210
+ return self.client().list_partitions(self.table_ref(table, dataset))
211
+ except BadRequest:
212
+ return []
213
+
214
+ @staticmethod
215
+ def to_canonical_type(type_code, size):
216
+ return _bigquery_type_to_canonical_type.get(type_code, types.STRING)
217
+
218
+ @staticmethod
219
+ def from_canonical_type(canonical_type, size):
220
+ _canonical_type_to_bigquery_type = {v: k for k, v in _bigquery_type_to_canonical_type.items()}
221
+ return _canonical_type_to_bigquery_type.get(canonical_type, "STRING")
222
+
223
+ def generate_ddl(self, table, dataset=None, if_exists=True):
224
+ cols = [f"{col.name} {col.field_type}" for col in self.get_schema(table, dataset)]
225
+ if_exists_stmt = " IF NOT EXISTS " if if_exists else " "
226
+ full_table_name = f"{self.quote_identifier(dataset)}.{self.quote_identifier(table)}"
227
+ return f'CREATE TABLE{if_exists_stmt}{full_table_name} ({", ".join(cols)})'
228
+
229
+ def get_columns(self, table, dataset=None, exclude=()):
230
+ cols = []
231
+ for col in self.get_schema(table, dataset):
232
+ if col.name in exclude:
233
+ continue
234
+ cols.append(col.name)
235
+ return cols
236
+
237
+ def get_schema(self, table, dataset):
238
+ if dataset is None:
239
+ dataset = self.dataset
240
+ if not self.has_table(table, dataset):
241
+ raise ValueError(f"Table {table} not exists in {dataset!r}")
242
+ table = self.client().get_table(self.table_ref(table, dataset))
243
+ return table.schema
244
+
245
+ def load_csv(
246
+ self,
247
+ table,
248
+ filename=None,
249
+ gcs_uri=None,
250
+ delimiter=",",
251
+ quotechar='"',
252
+ skiprows=0,
253
+ write_disposition="WRITE_APPEND",
254
+ schema=None,
255
+ **kwargs,
256
+ ):
257
+ dataset, table = table.split(".")
258
+ job_config = LoadJobConfig(
259
+ source_format=SourceFormat.CSV,
260
+ skip_leading_rows=skiprows,
261
+ # autodetect=True,
262
+ field_delimiter=delimiter,
263
+ quote_character=quotechar,
264
+ write_disposition=write_disposition,
265
+ schema=schema,
266
+ **kwargs,
267
+ )
268
+
269
+ if filename:
270
+ with open(filename, "rb") as file:
271
+ load_job = self.client().load_table_from_file(
272
+ file_obj=file, destination=self.table_ref(table, dataset), job_config=job_config
273
+ )
274
+ elif gcs_uri:
275
+ load_job = self.client().load_table_from_uri(
276
+ source_uris=gcs_uri, destination=self.table_ref(table, dataset), job_config=job_config
277
+ )
278
+ else:
279
+ self.logger.error("no file or gcs uri is provided")
280
+
281
+ self.logger.info("start loading csv to bigquery")
282
+ load_job.result()
283
+ self.logger.info("finish loading csv to bigquery")
@@ -0,0 +1,130 @@
1
+ import copy
2
+ import logging
3
+ import os
4
+
5
+ from google import auth
6
+ from google.cloud import storage
7
+ from google.oauth2 import service_account
8
+
9
+ from recurvedata.pigeon.connector._registry import register_connector_class
10
+
11
+
12
+ @register_connector_class(["google_cloud_storage", "gcs"])
13
+ class GoogleCloudStorageConnector(object):
14
+ def __init__(
15
+ self,
16
+ key_path: str = None,
17
+ key_dict: dict = None,
18
+ project: str = None,
19
+ proxies: dict = None,
20
+ bucket_name: str = None,
21
+ **kwargs,
22
+ ):
23
+ """
24
+ instance of gcs
25
+ :param project: project_id
26
+ :param key_path: path to json key file
27
+ :param key_dict: dict of key
28
+ :param proxies: proxy
29
+ :param bucket_name: bucket's name
30
+ """
31
+ self._project_id = project
32
+ self._key_path = key_path
33
+ self._key_dict = key_dict
34
+ self._proxies = proxies
35
+ self.bucket_name = bucket_name
36
+
37
+ if not any([self._key_path, self._key_dict]):
38
+ # 需要配置好 GOOGLE_APPLICATION_CREDENTIALS 环境变量
39
+ # export GOOGLE_APPLICATION_CREDENTIALS='{service account key 文件路径}'
40
+ self._credentials, auth_project_id = auth.default()
41
+ self._project_id = self._project_id or auth_project_id
42
+ elif self._key_path:
43
+ # 传入 service account key 文件路径
44
+ self._credentials = service_account.Credentials.from_service_account_file(filename=self._key_path)
45
+ else:
46
+ # 传入 service account key dict
47
+ _key_dict = copy.deepcopy(self._key_dict)
48
+ _key_dict["private_key"] = _key_dict["private_key"].replace("\\n", "\n")
49
+ self._credentials = service_account.Credentials.from_service_account_info(info=_key_dict)
50
+ self._project_id = self._project_id or self._credentials.project_id
51
+
52
+ if self._proxies:
53
+ for scheme in ["http", "https"]:
54
+ os.environ[f"{scheme}_proxy"] = self._proxies[scheme]
55
+
56
+ self.client = storage.Client(project=self._project_id, credentials=self._credentials, **kwargs)
57
+
58
+ def create_bucket(self, bucket_name, location=None):
59
+ logging.info(f"Start creating bucket {bucket_name} at location {location}")
60
+ new_bucket = self.client.create_bucket(bucket_name, location=location)
61
+ logging.info(f"Successfully created bucket {bucket_name} at location {location}")
62
+ return new_bucket
63
+
64
+ def get_buckets(self):
65
+ buckets = self.client.list_buckets()
66
+ return [bucket.name for bucket in buckets]
67
+
68
+ def has_key(self, key, bucket_name=None):
69
+ if not bucket_name:
70
+ bucket_name = self.bucket_name
71
+
72
+ bucket = self.client.bucket(bucket_name)
73
+ return bucket.blob(key).exists()
74
+
75
+ def get_keys(self, bucket_name=None, prefix=""):
76
+ if not bucket_name:
77
+ bucket_name = self.bucket_name
78
+
79
+ keys = self.client.list_blobs(bucket_name, prefix=prefix)
80
+ return [key.name for key in keys]
81
+
82
+ def delete_key(self, key, bucket_name=None):
83
+ if not bucket_name:
84
+ bucket_name = self.bucket_name
85
+
86
+ bucket = self.client.bucket(bucket_name)
87
+ blob = bucket.blob(key)
88
+ logging.info(f"Start deleting storage object {key}")
89
+ blob.delete()
90
+ logging.info(f"Successfully deleted storage object {key}")
91
+
92
+ def upload(self, filename, bucket_name=None, key=None, folder=None, overwrite=True, **kwargs):
93
+ if not bucket_name:
94
+ bucket_name = self.bucket_name
95
+ if not key:
96
+ key = os.path.basename(filename)
97
+ if folder:
98
+ key = os.path.join(folder, key)
99
+
100
+ bucket = self.client.bucket(bucket_name)
101
+ blob = bucket.blob(key)
102
+
103
+ if not overwrite and blob.exists():
104
+ return key
105
+
106
+ logging.info(f"Start uploading file {filename} to {key}.")
107
+ blob.upload_from_filename(filename, **kwargs)
108
+ logging.info(f"Successfully uploaded file {filename} to {key}.")
109
+ return key
110
+
111
+ def download(self, key, bucket_name=None, folder=None, filename=None, overwrite=True, **kwargs):
112
+ if not bucket_name:
113
+ bucket_name = self.bucket_name
114
+ if not filename:
115
+ filename = os.path.basename(key)
116
+ if folder:
117
+ filename = os.path.join(folder, filename)
118
+
119
+ if not overwrite and os.path.exists(filename):
120
+ return filename
121
+
122
+ bucket = self.client.bucket(bucket_name)
123
+ blob = bucket.get_blob(key)
124
+ logging.info(f"Start downloading storage object {key} from bucket {bucket_name} to local file {filename}.")
125
+ logging.info(f"Size: {round(blob.size / 1024 / 1024, 2)} MB")
126
+ blob.download_to_filename(filename, **kwargs)
127
+ logging.info(
128
+ f"Successfully downloaded storage object {key} from bucket {bucket_name} to local file {filename}."
129
+ )
130
+ return filename
@@ -0,0 +1,108 @@
1
+ import phoenixdb
2
+ from phoenixdb.cursor import Cursor
3
+
4
+ from recurvedata.pigeon.connector._registry import register_connector_class
5
+ from recurvedata.pigeon.connector.dbapi import DBAPIConnector
6
+ from recurvedata.pigeon.schema import types
7
+
8
+ # Phoenix Data Types: http://phoenix.apache.org/language/datatypes.html
9
+ _phoenix_type_to_canonical_type = {
10
+ 'INTEGER': types.INT32,
11
+ 'UNSIGNED_INT': types.INT32,
12
+ 'BIGINT': types.INT64,
13
+ 'UNSIGNED_LONG': types.INT64,
14
+ 'TINYINT': types.INT8,
15
+ 'UNSIGNED_TINYINT': types.INT8,
16
+ 'SMALLINT': types.INT16,
17
+ 'UNSIGNED_SMALLINT': types.INT16,
18
+ 'FLOAT': types.FLOAT32,
19
+ 'UNSIGNED_FLOAT': types.FLOAT32,
20
+ 'DOUBLE': types.FLOAT64,
21
+ 'UNSIGNED_DOUBLE': types.FLOAT64,
22
+ 'DECIMAL': types.FLOAT64,
23
+ 'BOOLEAN': types.BOOLEAN,
24
+
25
+ 'TIME': types.STRING,
26
+ 'UNSIGNED_TIME': types.STRING,
27
+ 'DATE': types.DATE,
28
+ 'UNSIGNED_DATE': types.DATE,
29
+ 'TIMESTAMP': types.DATETIME,
30
+ 'UNSIGNED_TIMESTAMP': types.DATETIME,
31
+
32
+ 'VARCHAR': types.STRING,
33
+ 'CHAR': types.STRING,
34
+
35
+ # default: types.STRING
36
+ }
37
+
38
+ _canonical_type_to_phoenix_type = {
39
+ types.BOOLEAN: 'BOOLEAN',
40
+ types.INT8: 'TINYINT',
41
+ types.INT16: 'SMALLINT',
42
+ types.INT32: 'INTEGER',
43
+ types.INT64: 'BIGINT',
44
+ types.FLOAT32: 'FLOAT',
45
+ types.FLOAT64: 'DOUBLE',
46
+
47
+ types.DATE: 'DATE',
48
+ types.DATETIME: 'DATETIME',
49
+
50
+ types.STRING: 'VARCHAR',
51
+ types.JSON: 'VARCHAR',
52
+ }
53
+
54
+
55
+ class PhoenixCursor(Cursor):
56
+ itersize = 1000
57
+
58
+
59
+ @register_connector_class(['phoenix'])
60
+ class PhoenixConnector(DBAPIConnector):
61
+ _default_port = 8765
62
+ _identifier_start_quote = '"'
63
+ _identifier_end_quote = '"'
64
+
65
+ def connect_impl(self, autocommit=False, *args, **kwargs):
66
+ url = f'http://{self.host}:{self.port}'
67
+ return phoenixdb.connect(url=url, autocommit=autocommit, cursor_factory=PhoenixCursor, *args, **kwargs)
68
+
69
+ def create_engine(self, engine_kwargs=None):
70
+ raise NotImplementedError
71
+
72
+ def has_table(self, table, database=None, **kwargs):
73
+ if database is None:
74
+ query = 'SELECT 1 FROM system.catalog WHERE table_name = ? LIMIT 1'
75
+ params = [table]
76
+ else:
77
+ query = 'SELECT 1 FROM system.catalog WHERE table_name = ? AND table_schem = ? LIMIT 1'
78
+ params = [table, database]
79
+ return self.fetchone(query, params) is not None
80
+
81
+ def get_columns(self, table, database=None, exclude=None):
82
+ if database:
83
+ clause = f'table_schem = {database!r}'
84
+ else:
85
+ clause = 'table_schem IS NULL'
86
+ query = f'''
87
+ SELECT column_name FROM system.catalog
88
+ WHERE {clause} AND table_name = ? AND ordinal_position IS NOT NULL
89
+ ORDER BY ordinal_position
90
+ '''
91
+ cols = [x[0] for x in self.fetchall(query, [table])]
92
+ if not cols:
93
+ raise ValueError('Table {!r} not exists in {!r}'.format(table, database))
94
+
95
+ if exclude:
96
+ cols = [x for x in cols if x not in exclude]
97
+ return cols
98
+
99
+ def is_phoenix(self):
100
+ return True
101
+
102
+ @staticmethod
103
+ def to_canonical_type(type_code, size):
104
+ return _phoenix_type_to_canonical_type.get(type_code, types.STRING)
105
+
106
+ @staticmethod
107
+ def from_canonical_type(canonical_type, size):
108
+ return _canonical_type_to_phoenix_type.get(canonical_type, 'VARCHAR')