recurvedata-lib 0.1.487__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recurvedata-lib might be problematic. Click here for more details.

Files changed (333) hide show
  1. recurvedata/__init__.py +0 -0
  2. recurvedata/__version__.py +1 -0
  3. recurvedata/client/__init__.py +3 -0
  4. recurvedata/client/client.py +150 -0
  5. recurvedata/client/server_client.py +91 -0
  6. recurvedata/config.py +99 -0
  7. recurvedata/connectors/__init__.py +20 -0
  8. recurvedata/connectors/_register.py +46 -0
  9. recurvedata/connectors/base.py +111 -0
  10. recurvedata/connectors/config_schema.py +1575 -0
  11. recurvedata/connectors/connectors/__init__.py +0 -0
  12. recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
  13. recurvedata/connectors/connectors/auth.py +44 -0
  14. recurvedata/connectors/connectors/azure_blob.py +89 -0
  15. recurvedata/connectors/connectors/azure_synapse.py +79 -0
  16. recurvedata/connectors/connectors/bigquery.py +359 -0
  17. recurvedata/connectors/connectors/clickhouse.py +219 -0
  18. recurvedata/connectors/connectors/dingtalk.py +61 -0
  19. recurvedata/connectors/connectors/doris.py +215 -0
  20. recurvedata/connectors/connectors/es.py +62 -0
  21. recurvedata/connectors/connectors/feishu.py +65 -0
  22. recurvedata/connectors/connectors/ftp.py +50 -0
  23. recurvedata/connectors/connectors/generic.py +49 -0
  24. recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
  25. recurvedata/connectors/connectors/google_service_account.py +225 -0
  26. recurvedata/connectors/connectors/hive.py +207 -0
  27. recurvedata/connectors/connectors/impala.py +210 -0
  28. recurvedata/connectors/connectors/jenkins.py +51 -0
  29. recurvedata/connectors/connectors/mail.py +89 -0
  30. recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
  31. recurvedata/connectors/connectors/mongo.py +79 -0
  32. recurvedata/connectors/connectors/mssql.py +131 -0
  33. recurvedata/connectors/connectors/mysql.py +191 -0
  34. recurvedata/connectors/connectors/n8n.py +141 -0
  35. recurvedata/connectors/connectors/oss.py +74 -0
  36. recurvedata/connectors/connectors/owncloud.py +36 -0
  37. recurvedata/connectors/connectors/phoenix.py +36 -0
  38. recurvedata/connectors/connectors/postgres.py +230 -0
  39. recurvedata/connectors/connectors/python.py +50 -0
  40. recurvedata/connectors/connectors/redshift.py +187 -0
  41. recurvedata/connectors/connectors/s3.py +93 -0
  42. recurvedata/connectors/connectors/sftp.py +87 -0
  43. recurvedata/connectors/connectors/slack.py +35 -0
  44. recurvedata/connectors/connectors/spark.py +99 -0
  45. recurvedata/connectors/connectors/starrocks.py +175 -0
  46. recurvedata/connectors/connectors/tencent_cos.py +40 -0
  47. recurvedata/connectors/connectors/tidb.py +49 -0
  48. recurvedata/connectors/const.py +315 -0
  49. recurvedata/connectors/datasource.py +189 -0
  50. recurvedata/connectors/dbapi.py +469 -0
  51. recurvedata/connectors/fs.py +66 -0
  52. recurvedata/connectors/ftp.py +40 -0
  53. recurvedata/connectors/object_store.py +60 -0
  54. recurvedata/connectors/pigeon.py +172 -0
  55. recurvedata/connectors/proxy.py +104 -0
  56. recurvedata/connectors/service.py +223 -0
  57. recurvedata/connectors/utils.py +47 -0
  58. recurvedata/consts.py +49 -0
  59. recurvedata/core/__init__.py +0 -0
  60. recurvedata/core/config.py +46 -0
  61. recurvedata/core/configurable.py +27 -0
  62. recurvedata/core/consts.py +2 -0
  63. recurvedata/core/templating.py +206 -0
  64. recurvedata/core/tracing.py +223 -0
  65. recurvedata/core/transformer.py +186 -0
  66. recurvedata/core/translation.py +91 -0
  67. recurvedata/dbt/client.py +97 -0
  68. recurvedata/dbt/consts.py +99 -0
  69. recurvedata/dbt/cosmos_utils.py +275 -0
  70. recurvedata/dbt/error_codes.py +18 -0
  71. recurvedata/dbt/schemas.py +98 -0
  72. recurvedata/dbt/service.py +451 -0
  73. recurvedata/dbt/utils.py +246 -0
  74. recurvedata/error_codes.py +71 -0
  75. recurvedata/exceptions.py +72 -0
  76. recurvedata/executors/__init__.py +4 -0
  77. recurvedata/executors/cli/__init__.py +7 -0
  78. recurvedata/executors/cli/connector.py +117 -0
  79. recurvedata/executors/cli/dbt.py +118 -0
  80. recurvedata/executors/cli/main.py +82 -0
  81. recurvedata/executors/cli/parameters.py +18 -0
  82. recurvedata/executors/client.py +190 -0
  83. recurvedata/executors/consts.py +50 -0
  84. recurvedata/executors/debug_executor.py +100 -0
  85. recurvedata/executors/executor.py +300 -0
  86. recurvedata/executors/link_executor.py +189 -0
  87. recurvedata/executors/models.py +34 -0
  88. recurvedata/executors/schemas.py +222 -0
  89. recurvedata/executors/service/__init__.py +0 -0
  90. recurvedata/executors/service/connector.py +380 -0
  91. recurvedata/executors/utils.py +172 -0
  92. recurvedata/filestorage/__init__.py +11 -0
  93. recurvedata/filestorage/_factory.py +33 -0
  94. recurvedata/filestorage/backends/__init__.py +0 -0
  95. recurvedata/filestorage/backends/fsspec.py +45 -0
  96. recurvedata/filestorage/backends/local.py +67 -0
  97. recurvedata/filestorage/backends/oss.py +56 -0
  98. recurvedata/filestorage/interface.py +84 -0
  99. recurvedata/operators/__init__.py +10 -0
  100. recurvedata/operators/base.py +28 -0
  101. recurvedata/operators/config.py +21 -0
  102. recurvedata/operators/context.py +255 -0
  103. recurvedata/operators/dbt_operator/__init__.py +2 -0
  104. recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
  105. recurvedata/operators/dbt_operator/operator.py +353 -0
  106. recurvedata/operators/link_operator/__init__.py +1 -0
  107. recurvedata/operators/link_operator/operator.py +120 -0
  108. recurvedata/operators/models.py +55 -0
  109. recurvedata/operators/notify_operator/__init__.py +1 -0
  110. recurvedata/operators/notify_operator/operator.py +180 -0
  111. recurvedata/operators/operator.py +119 -0
  112. recurvedata/operators/python_operator/__init__.py +1 -0
  113. recurvedata/operators/python_operator/operator.py +132 -0
  114. recurvedata/operators/sensor_operator/__init__.py +1 -0
  115. recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
  116. recurvedata/operators/sensor_operator/operator.py +172 -0
  117. recurvedata/operators/spark_operator/__init__.py +1 -0
  118. recurvedata/operators/spark_operator/operator.py +200 -0
  119. recurvedata/operators/spark_operator/spark_sample.py +47 -0
  120. recurvedata/operators/sql_operator/__init__.py +1 -0
  121. recurvedata/operators/sql_operator/operator.py +90 -0
  122. recurvedata/operators/task.py +211 -0
  123. recurvedata/operators/transfer_operator/__init__.py +40 -0
  124. recurvedata/operators/transfer_operator/const.py +10 -0
  125. recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
  126. recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
  127. recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
  128. recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
  129. recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
  130. recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
  131. recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
  132. recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
  133. recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
  134. recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
  135. recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
  136. recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
  137. recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
  138. recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
  139. recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
  140. recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
  141. recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
  142. recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
  143. recurvedata/operators/transfer_operator/load_task_email.py +188 -0
  144. recurvedata/operators/transfer_operator/load_task_es.py +86 -0
  145. recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
  146. recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
  147. recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
  148. recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
  149. recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
  150. recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
  151. recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
  152. recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
  153. recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
  154. recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
  155. recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
  156. recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
  157. recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
  158. recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
  159. recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
  160. recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
  161. recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
  162. recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
  163. recurvedata/operators/transfer_operator/mixin.py +31 -0
  164. recurvedata/operators/transfer_operator/operator.py +231 -0
  165. recurvedata/operators/transfer_operator/task.py +223 -0
  166. recurvedata/operators/transfer_operator/utils.py +134 -0
  167. recurvedata/operators/ui.py +80 -0
  168. recurvedata/operators/utils/__init__.py +51 -0
  169. recurvedata/operators/utils/file_factory.py +150 -0
  170. recurvedata/operators/utils/fs.py +10 -0
  171. recurvedata/operators/utils/lineage.py +265 -0
  172. recurvedata/operators/web_init.py +15 -0
  173. recurvedata/pigeon/connector/__init__.py +294 -0
  174. recurvedata/pigeon/connector/_registry.py +17 -0
  175. recurvedata/pigeon/connector/aliyun_oss.py +80 -0
  176. recurvedata/pigeon/connector/awss3.py +123 -0
  177. recurvedata/pigeon/connector/azure_blob.py +176 -0
  178. recurvedata/pigeon/connector/azure_synapse.py +51 -0
  179. recurvedata/pigeon/connector/cass.py +151 -0
  180. recurvedata/pigeon/connector/clickhouse.py +403 -0
  181. recurvedata/pigeon/connector/clickhouse_native.py +351 -0
  182. recurvedata/pigeon/connector/dbapi.py +571 -0
  183. recurvedata/pigeon/connector/doris.py +166 -0
  184. recurvedata/pigeon/connector/es.py +176 -0
  185. recurvedata/pigeon/connector/feishu.py +1135 -0
  186. recurvedata/pigeon/connector/ftp.py +163 -0
  187. recurvedata/pigeon/connector/google_bigquery.py +283 -0
  188. recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
  189. recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
  190. recurvedata/pigeon/connector/hdfs.py +204 -0
  191. recurvedata/pigeon/connector/hive_impala.py +383 -0
  192. recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
  193. recurvedata/pigeon/connector/mongodb.py +56 -0
  194. recurvedata/pigeon/connector/mssql.py +467 -0
  195. recurvedata/pigeon/connector/mysql.py +175 -0
  196. recurvedata/pigeon/connector/owncloud.py +92 -0
  197. recurvedata/pigeon/connector/postgresql.py +267 -0
  198. recurvedata/pigeon/connector/power_bi.py +179 -0
  199. recurvedata/pigeon/connector/qcloud_cos.py +79 -0
  200. recurvedata/pigeon/connector/redshift.py +123 -0
  201. recurvedata/pigeon/connector/sftp.py +73 -0
  202. recurvedata/pigeon/connector/sqlite.py +42 -0
  203. recurvedata/pigeon/connector/starrocks.py +144 -0
  204. recurvedata/pigeon/connector/tableau.py +162 -0
  205. recurvedata/pigeon/const.py +21 -0
  206. recurvedata/pigeon/csv.py +172 -0
  207. recurvedata/pigeon/docs/datasources-example.json +82 -0
  208. recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
  209. recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
  210. recurvedata/pigeon/dumper/__init__.py +171 -0
  211. recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
  212. recurvedata/pigeon/dumper/base.py +141 -0
  213. recurvedata/pigeon/dumper/cass.py +213 -0
  214. recurvedata/pigeon/dumper/dbapi.py +346 -0
  215. recurvedata/pigeon/dumper/es.py +112 -0
  216. recurvedata/pigeon/dumper/ftp.py +64 -0
  217. recurvedata/pigeon/dumper/mongodb.py +103 -0
  218. recurvedata/pigeon/handler/__init__.py +4 -0
  219. recurvedata/pigeon/handler/base.py +153 -0
  220. recurvedata/pigeon/handler/csv_handler.py +290 -0
  221. recurvedata/pigeon/loader/__init__.py +87 -0
  222. recurvedata/pigeon/loader/base.py +83 -0
  223. recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
  224. recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
  225. recurvedata/pigeon/loader/csv_to_doris.py +215 -0
  226. recurvedata/pigeon/loader/csv_to_es.py +51 -0
  227. recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
  228. recurvedata/pigeon/loader/csv_to_hive.py +468 -0
  229. recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
  230. recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
  231. recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
  232. recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
  233. recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
  234. recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
  235. recurvedata/pigeon/meta.py +116 -0
  236. recurvedata/pigeon/row_factory.py +42 -0
  237. recurvedata/pigeon/schema/__init__.py +124 -0
  238. recurvedata/pigeon/schema/types.py +13 -0
  239. recurvedata/pigeon/sync.py +283 -0
  240. recurvedata/pigeon/transformer.py +146 -0
  241. recurvedata/pigeon/utils/__init__.py +134 -0
  242. recurvedata/pigeon/utils/bloomfilter.py +181 -0
  243. recurvedata/pigeon/utils/date_time.py +323 -0
  244. recurvedata/pigeon/utils/escape.py +15 -0
  245. recurvedata/pigeon/utils/fs.py +266 -0
  246. recurvedata/pigeon/utils/json.py +44 -0
  247. recurvedata/pigeon/utils/keyed_tuple.py +85 -0
  248. recurvedata/pigeon/utils/mp.py +156 -0
  249. recurvedata/pigeon/utils/sql.py +328 -0
  250. recurvedata/pigeon/utils/timing.py +155 -0
  251. recurvedata/provider_manager.py +0 -0
  252. recurvedata/providers/__init__.py +0 -0
  253. recurvedata/providers/dbapi/__init__.py +0 -0
  254. recurvedata/providers/flywheel/__init__.py +0 -0
  255. recurvedata/providers/mysql/__init__.py +0 -0
  256. recurvedata/schedulers/__init__.py +1 -0
  257. recurvedata/schedulers/airflow.py +974 -0
  258. recurvedata/schedulers/airflow_db_process.py +331 -0
  259. recurvedata/schedulers/airflow_operators.py +61 -0
  260. recurvedata/schedulers/airflow_plugin.py +9 -0
  261. recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
  262. recurvedata/schedulers/base.py +99 -0
  263. recurvedata/schedulers/cli.py +228 -0
  264. recurvedata/schedulers/client.py +56 -0
  265. recurvedata/schedulers/consts.py +52 -0
  266. recurvedata/schedulers/debug_celery.py +62 -0
  267. recurvedata/schedulers/model.py +63 -0
  268. recurvedata/schedulers/schemas.py +97 -0
  269. recurvedata/schedulers/service.py +20 -0
  270. recurvedata/schedulers/system_dags.py +59 -0
  271. recurvedata/schedulers/task_status.py +279 -0
  272. recurvedata/schedulers/utils.py +73 -0
  273. recurvedata/schema/__init__.py +0 -0
  274. recurvedata/schema/field.py +88 -0
  275. recurvedata/schema/schema.py +55 -0
  276. recurvedata/schema/types.py +17 -0
  277. recurvedata/schema.py +0 -0
  278. recurvedata/server/__init__.py +0 -0
  279. recurvedata/server/app.py +7 -0
  280. recurvedata/server/connector/__init__.py +0 -0
  281. recurvedata/server/connector/api.py +79 -0
  282. recurvedata/server/connector/schemas.py +28 -0
  283. recurvedata/server/data_service/__init__.py +0 -0
  284. recurvedata/server/data_service/api.py +126 -0
  285. recurvedata/server/data_service/client.py +18 -0
  286. recurvedata/server/data_service/consts.py +1 -0
  287. recurvedata/server/data_service/schemas.py +68 -0
  288. recurvedata/server/data_service/service.py +218 -0
  289. recurvedata/server/dbt/__init__.py +0 -0
  290. recurvedata/server/dbt/api.py +116 -0
  291. recurvedata/server/error_code.py +49 -0
  292. recurvedata/server/exceptions.py +19 -0
  293. recurvedata/server/executor/__init__.py +0 -0
  294. recurvedata/server/executor/api.py +37 -0
  295. recurvedata/server/executor/schemas.py +30 -0
  296. recurvedata/server/executor/service.py +220 -0
  297. recurvedata/server/main.py +32 -0
  298. recurvedata/server/schedulers/__init__.py +0 -0
  299. recurvedata/server/schedulers/api.py +252 -0
  300. recurvedata/server/schedulers/schemas.py +50 -0
  301. recurvedata/server/schemas.py +50 -0
  302. recurvedata/utils/__init__.py +15 -0
  303. recurvedata/utils/_typer.py +61 -0
  304. recurvedata/utils/attrdict.py +19 -0
  305. recurvedata/utils/command_helper.py +20 -0
  306. recurvedata/utils/compat.py +12 -0
  307. recurvedata/utils/compression.py +203 -0
  308. recurvedata/utils/crontab.py +42 -0
  309. recurvedata/utils/crypto_util.py +305 -0
  310. recurvedata/utils/dataclass.py +11 -0
  311. recurvedata/utils/date_time.py +464 -0
  312. recurvedata/utils/dispatch.py +114 -0
  313. recurvedata/utils/email_util.py +104 -0
  314. recurvedata/utils/files.py +386 -0
  315. recurvedata/utils/helpers.py +170 -0
  316. recurvedata/utils/httputil.py +117 -0
  317. recurvedata/utils/imports.py +132 -0
  318. recurvedata/utils/json.py +80 -0
  319. recurvedata/utils/log.py +117 -0
  320. recurvedata/utils/log_capture.py +153 -0
  321. recurvedata/utils/mp.py +178 -0
  322. recurvedata/utils/normalizer.py +102 -0
  323. recurvedata/utils/redis_lock.py +474 -0
  324. recurvedata/utils/registry.py +54 -0
  325. recurvedata/utils/shell.py +15 -0
  326. recurvedata/utils/singleton.py +33 -0
  327. recurvedata/utils/sql.py +6 -0
  328. recurvedata/utils/timeout.py +28 -0
  329. recurvedata/utils/tracing.py +14 -0
  330. recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
  331. recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
  332. recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
  333. recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
@@ -0,0 +1,64 @@
1
+ import os
2
+
3
+ import humanize
4
+
5
+ from recurvedata.pigeon.connector.ftp import FtpConnector
6
+ from recurvedata.pigeon.meta import DumperMeta
7
+ from recurvedata.pigeon.utils import LoggingMixin
8
+
9
+
10
+ class FtpDumperMeta(DumperMeta):
11
+ def __init__(self):
12
+ super().__init__()
13
+ self.total_size = 0
14
+ self.dumped_files = []
15
+
16
+ def to_dict(self):
17
+ speed = self.total_size / self.duration.total_seconds()
18
+ return {
19
+ "time_start": self.time_start,
20
+ "time_finish": self.time_finish,
21
+ "time_duration": self.duration,
22
+ "total_size": self.total_size,
23
+ "total_size_human": humanize.naturalsize(self.total_size, gnu=True),
24
+ "download_speed": f"{humanize.naturalsize(speed, gnu=True)}/s",
25
+ "num_dumped_files": len(self.dumped_files),
26
+ "dumped_files": self.dumped_files,
27
+ }
28
+
29
+
30
+ class FtpDumper(LoggingMixin):
31
+ def __init__(self, connector, src, dst):
32
+ assert isinstance(connector, FtpConnector)
33
+ self.connector = connector
34
+ self.src = src
35
+ self.dst = dst
36
+ self.meta = FtpDumperMeta()
37
+
38
+ def execute(self):
39
+ self.meta.mark_start()
40
+ self.execute_impl()
41
+ self.meta.mark_finish()
42
+ self.logger.info(f"dumper meta: {self.meta.to_json(indent=2)}")
43
+ return self.meta
44
+
45
+ def execute_impl(self):
46
+ if self.connector.is_ftp_dir(self.src):
47
+ for item in self.connector.list_dir(self.src):
48
+ if self.connector.is_ftp_dir(item):
49
+ self.logger.warning(f"{item} may be a directory. Skip")
50
+ else:
51
+ _, remote_file = os.path.split(item)
52
+ dst = os.path.join(self.dst, remote_file)
53
+ self.connector.download_file(item, dst)
54
+ self.collect_meta(dst)
55
+ else:
56
+ self.connector.download_file(self.src, self.dst)
57
+ self.collect_meta(self.dst)
58
+
59
+ def collect_meta(self, filepath):
60
+ if not os.path.exists(filepath):
61
+ return None
62
+ file_size = os.stat(filepath).st_size
63
+ self.meta.dumped_files.append({"filepath": filepath, "size": file_size})
64
+ self.meta.total_size += file_size
@@ -0,0 +1,103 @@
1
+ from bson import json_util
2
+
3
+ from recurvedata.pigeon.connector.mongodb import MongoDBConnector
4
+ from recurvedata.pigeon.dumper.base import BaseDumper
5
+ from recurvedata.pigeon.row_factory import ordered_dict_factory
6
+ from recurvedata.pigeon.schema import Schema
7
+ from recurvedata.pigeon.utils.timing import TimeCounter
8
+
9
+
10
+ class MongoDBDumper(BaseDumper):
11
+ _row_factory = staticmethod(ordered_dict_factory)
12
+
13
+ def __init__(self, connector, database, collection, filter=None, projection=None, handler_factories=None):
14
+ """MongoDBDumper 用于从 MongoDB 导出数据
15
+ :param connector: MongoDBConnector 对象
16
+ :param database: database 名字
17
+ :param collection: collection 名字
18
+ :param filter: 查询条件,用于 find 函数。
19
+ 注意,如果传入字符串格式的 filter,将被当作 json 字符串,用 bson.json_util.loads 反序列化。
20
+ :param projection: 控制返回的字段,用于 find 函数
21
+ :param handler_factories:
22
+ """
23
+ super().__init__(handler_factories=handler_factories)
24
+
25
+ if not isinstance(connector, MongoDBConnector):
26
+ raise TypeError("connector should be instance of MongoDBConnector")
27
+
28
+ self.connector = connector
29
+ self.database = database
30
+ self.collection = collection
31
+
32
+ self.filter = filter or {}
33
+ if isinstance(self.filter, str):
34
+ self.filter = json_util.loads(self.filter)
35
+
36
+ self.projection = projection
37
+
38
+ self.meta.context = {
39
+ "database": database,
40
+ "collection": collection,
41
+ "filter": filter,
42
+ "projection": projection,
43
+ }
44
+
45
+ @property
46
+ def row_factory(self):
47
+ return ordered_dict_factory
48
+
49
+ @row_factory.setter
50
+ def row_factory(self, factory):
51
+ raise ValueError(f"{self.__class__.__name__}.row_factory is dict_factory, and is readonly")
52
+
53
+ def execute(self):
54
+ self.meta.mark_start()
55
+ self.execute_impl()
56
+ self.meta.mark_finish()
57
+ self.logger.info("dumper meta: %s", self.meta.to_json(indent=2))
58
+ self.handle_schema()
59
+ return self.meta
60
+
61
+ def execute_impl(self):
62
+ handlers = self.create_handlers()
63
+
64
+ self.logger.info("execute with context")
65
+ self.logger.info(" filter: %s", self.filter)
66
+ self.logger.info(" projection: %s", self.projection)
67
+
68
+ # MongoDB 没有模式,以第一条结果的字段和值来推导 schema
69
+ schema = Schema()
70
+ field_names = []
71
+ client = self.connector.connect()
72
+ if self.collection not in client[self.database].list_collection_names():
73
+ raise RuntimeError(f"collection '{self.collection}' does not exist")
74
+ total_count = client[self.database][self.collection].count_documents(self.filter)
75
+ cursor = client[self.database][self.collection].find(self.filter, self.projection)
76
+
77
+ counter = TimeCounter(name="", log_threshold=10000, logger=self.logger, total=total_count)
78
+ # Use projection field order as the base order
79
+ field_names = list(self.projection.keys()) if self.projection else []
80
+
81
+ for doc in cursor:
82
+ counter.incr(1)
83
+
84
+ fixed_doc = doc
85
+ # if projection is not None, then use projection to filter the fields and fill the missing fields with None
86
+ if field_names:
87
+ # Use projection field order, and fill missing fields with None
88
+ fixed_doc = self.row_factory(field_names, [doc.get(x, None) for x in field_names])
89
+
90
+ for h in handlers:
91
+ h.handle(fixed_doc)
92
+
93
+ counter.show_stat()
94
+ self.meta.schema = schema
95
+ self.meta.num_dumped_rows = counter.count
96
+
97
+ for hf, h in zip(self.handler_factories, handlers):
98
+ hf.meta.update(h.meta)
99
+ self.meta.handlers_meta = [x.meta for x in self.handler_factories]
100
+
101
+ for h in handlers:
102
+ h.close()
103
+ self.join_handlers()
@@ -0,0 +1,4 @@
1
+ from recurvedata.pigeon.handler.base import Handler, HandlerFactory, NullHandler
2
+ from recurvedata.pigeon.handler.csv_handler import CSVFileHandler, HiveCSVFileHandler
3
+
4
+ null_factory = HandlerFactory(NullHandler)
@@ -0,0 +1,153 @@
1
+ import cytoolz as toolz
2
+
3
+ from recurvedata.pigeon.meta import HandlerFactoryMeta, HandlerMeta
4
+ from recurvedata.pigeon.transformer import Transformer
5
+ from recurvedata.pigeon.utils import LoggingMixin
6
+
7
+ _default_transformer = Transformer()
8
+
9
+
10
+ class Handler(LoggingMixin):
11
+ ERROR_HANDLE_PARAMS = [
12
+ "max_error_rate",
13
+ "min_sample_rows",
14
+ "check_error_rate_on_finish",
15
+ "max_continuous_error_log_rows",
16
+ "max_continuous_error_log_size",
17
+ "error_log_cycle_interval",
18
+ ]
19
+
20
+ def __init__(
21
+ self,
22
+ transformer=_default_transformer,
23
+ max_error_rate=0.2,
24
+ min_sample_rows=1000,
25
+ check_error_rate_on_finish=True,
26
+ max_continuous_error_log_rows=10,
27
+ max_continuous_error_log_size=500000,
28
+ error_log_cycle_interval=100,
29
+ *args,
30
+ **kwargs,
31
+ ):
32
+ """记录处理逻辑,接收一行输入,调用转换逻辑,输出处理结果。
33
+ :param transformer: 定义数据处理逻辑,Transformer 或子类对象
34
+ :param max_error_rate: 最大错误率,超出该错误率可能会(结合样本大小)抛出异常中断程序
35
+ :param min_sample_rows: 最小采样数量,避免样本太小导致误报
36
+ :param check_error_rate_on_finish: 在程序结束时检查错误率,达到阈值会抛出异常,避免因为数据量太小达不到样本容量而淹没错误。
37
+ :param max_continuous_error_log_rows: 单个 handler 连续打印的报错 row 的最大行数,防止错误日志太多 log 太大
38
+ :param max_continuous_error_log_size: 单个 handler 连续打印的报错 row 字符串的最大字符数,默认 50w, 防止错误日志太多 log 太大
39
+ "param error_log_cycle_interval: 当达到单个 handler 连续打印最大行数或者最大字符数后,仍然间隔 error_log_cycle_interval 打印一次报错日志
40
+ """
41
+ self.transformer = transformer
42
+ self.max_error_rate = max_error_rate
43
+ self.min_sample_rows = min_sample_rows
44
+ self.check_error_rate_on_finish = check_error_rate_on_finish
45
+ self.max_continuous_error_log_rows = max_continuous_error_log_rows
46
+ self.max_continuous_error_log_size = max_continuous_error_log_size
47
+ self.error_log_cycle_interval = error_log_cycle_interval
48
+
49
+ self.meta = HandlerMeta()
50
+ self.meta.schema = self.transformer.output_schema
51
+
52
+ def set_transformer(self, transformer):
53
+ self.transformer = transformer
54
+
55
+ def transform(self, row):
56
+ return self.transformer.transform(row)
57
+
58
+ def set_input_schema(self, schema):
59
+ self.transformer.input_schema = schema
60
+
61
+ def close(self):
62
+ if self.check_error_rate_on_finish:
63
+ self.check_error_rate()
64
+
65
+ def emit(self, row):
66
+ raise NotImplementedError("emit must be implemented by subclasses")
67
+
68
+ def handle(self, row):
69
+ self.meta.num_input_rows += 1
70
+ try:
71
+ rv = self.transform(row)
72
+ if rv:
73
+ num_rows = self.emit(rv)
74
+ self.meta.num_output_rows += num_rows
75
+ except (KeyboardInterrupt, SystemExit):
76
+ raise
77
+ except Exception:
78
+ self.handle_error(row)
79
+
80
+ if self.meta.num_input_rows >= self.min_sample_rows:
81
+ self.check_error_rate()
82
+
83
+ @property
84
+ def error_rate(self):
85
+ if self.meta.num_input_rows == 0:
86
+ return 0
87
+ return self.meta.num_error_rows / self.meta.num_input_rows
88
+
89
+ def check_error_rate(self):
90
+ if self.error_rate > self.max_error_rate:
91
+ raise RuntimeError(
92
+ f"max_error_rate reached,"
93
+ f" #input: {self.meta.num_input_rows},"
94
+ f" #error: {self.meta.num_error_rows},"
95
+ f" error_rate: {self.error_rate},"
96
+ f" threshold: {self.max_error_rate}"
97
+ )
98
+
99
+ def handle_error(self, row):
100
+ self.meta.num_error_rows += 1
101
+ self.meta.error_log_size += len(str(row))
102
+ if (
103
+ self.meta.num_error_rows <= self.max_continuous_error_log_rows
104
+ and self.meta.error_log_size <= self.max_continuous_error_log_size
105
+ ):
106
+ self.logger.exception("failed to handle row: %s", row)
107
+ elif self.meta.num_error_rows % self.error_log_cycle_interval == 0:
108
+ self.logger.exception(
109
+ "current handler total %s error rows, failed to handle row: %s", self.meta.num_error_rows, row
110
+ )
111
+
112
+ def reset(self):
113
+ """reset all states"""
114
+ self.meta.reset()
115
+
116
+
117
+ class NullHandler(Handler):
118
+ def transform(self, row):
119
+ pass
120
+
121
+ def emit(self, row):
122
+ pass
123
+
124
+ def handle(self, row):
125
+ return 0
126
+
127
+
128
+ class HandlerFactory(LoggingMixin):
129
+ def __init__(self, handler_class, transformer=_default_transformer, **handler_options):
130
+ self.handler_class = handler_class
131
+ self.transformer = transformer
132
+ self.handler_options = handler_options
133
+ self.handlers = []
134
+
135
+ self.meta = HandlerFactoryMeta(name=self.meta_name())
136
+
137
+ def set_transformer(self, transformer):
138
+ self.transformer = transformer
139
+
140
+ def create_handler(self, **kwargs):
141
+ h = self.handler_class(**toolz.merge(self.handler_options, kwargs))
142
+ h.set_transformer(self.transformer)
143
+ self.handlers.append(h)
144
+ return h
145
+
146
+ def join(self):
147
+ pass
148
+
149
+ def meta_name(self):
150
+ return f"<{self.__class__.__name__}>"
151
+
152
+ def handle_dumper_schema(self, schema):
153
+ pass
@@ -0,0 +1,290 @@
1
+ import csv
2
+ import glob
3
+ import logging
4
+ import os
5
+
6
+ import cytoolz as toolz
7
+
8
+ from recurvedata.pigeon import const
9
+ from recurvedata.pigeon.csv import CSV
10
+ from recurvedata.pigeon.handler.base import Handler, HandlerFactory
11
+ from recurvedata.pigeon.row_factory import get_row_keys
12
+ from recurvedata.pigeon.schema import Schema
13
+ from recurvedata.pigeon.utils import escape, extract_dict, fs
14
+ from recurvedata.pigeon.utils.json import json_dumps
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class CSVFileHandler(Handler):
20
+ def __init__(
21
+ self,
22
+ filename=None,
23
+ encoding=None,
24
+ write_header=False,
25
+ null=None,
26
+ delimiter=",",
27
+ quoting=csv.QUOTE_ALL,
28
+ escapechar=None,
29
+ doublequote=True,
30
+ **kwargs,
31
+ ):
32
+ """Transform rows and write result as CSV file.
33
+
34
+ :param filename: result filename
35
+ :param encoding: encoding
36
+ :param write_header: writer headers or not
37
+ """
38
+ if filename is None:
39
+ filename = fs.new_tempfile(suffix=".csv")
40
+ self.filename = os.path.abspath(filename)
41
+ if os.path.exists(self.filename):
42
+ logger.info(f"found {filename}, remove before writing")
43
+ os.unlink(self.filename)
44
+
45
+ self.encoding = encoding
46
+ self.null = null
47
+ self._fd = None
48
+ self._writer = None
49
+ self.writer_header = write_header
50
+ self._field_names = None
51
+
52
+ self.csv_options = {k: v for k, v in kwargs.items() if k not in self.ERROR_HANDLE_PARAMS}
53
+ self.csv_options.update(
54
+ {"delimiter": delimiter, "quoting": quoting, "escapechar": escapechar, "doublequote": doublequote}
55
+ )
56
+ super().__init__(**extract_dict(kwargs, self.ERROR_HANDLE_PARAMS))
57
+
58
+ def flush(self):
59
+ if self._fd is not None:
60
+ self._fd.flush()
61
+
62
+ def close(self):
63
+ super().close()
64
+
65
+ if self._fd is not None:
66
+ self._fd.close()
67
+
68
+ def reset(self):
69
+ super().reset()
70
+ if self._fd is not None:
71
+ self._fd.seek(0)
72
+ self._fd.truncate(0)
73
+ self._fd.close()
74
+ self._fd = self._writer = None
75
+
76
+ def _open_writer(self, row):
77
+ self._fd = open(self.filename, "w", newline="", encoding=self.encoding)
78
+
79
+ self._determine_header(row)
80
+
81
+ if isinstance(row, dict):
82
+ self._writer = csv.DictWriter(self._fd, fieldnames=self._field_names, **self.csv_options)
83
+ if self.writer_header:
84
+ self._writer.writeheader()
85
+ else:
86
+ self._writer = csv.writer(self._fd, **self.csv_options)
87
+ if self.writer_header:
88
+ self._writer.writerow(self._field_names)
89
+
90
+ def _determine_header(self, row):
91
+ logger.info("try to get schema from row (%s)", type(row))
92
+ field_names = get_row_keys(row)
93
+ if not field_names:
94
+ logger.info("try to get schema from transformer")
95
+ if self.transformer.output_schema is not None:
96
+ field_names = [x.name for x in self.transformer.output_schema]
97
+ logger.info("header: %s", field_names)
98
+ self._field_names = field_names
99
+
100
+ def emit(self, row):
101
+ if not isinstance(row, list):
102
+ row = [row]
103
+
104
+ if self._fd is None:
105
+ self._open_writer(row[0])
106
+
107
+ for r in row:
108
+ self._writerow(r)
109
+
110
+ return len(row)
111
+
112
+ def _writerow(self, row):
113
+ if isinstance(row, dict):
114
+ row = toolz.valmap(self._escape_item, row)
115
+ else:
116
+ row = [self._escape_item(x) for x in row]
117
+ self._writer.writerow(row)
118
+
119
+ def _escape_item(self, v):
120
+ if v is None:
121
+ return self.null
122
+
123
+ # Handle dict, tuple, set and list
124
+ if isinstance(v, (dict, tuple, set, list)):
125
+ v = json_dumps(v, ensure_ascii=False)
126
+
127
+ if isinstance(v, str):
128
+ return escape.escape_string(v)
129
+ return v
130
+
131
+ def __str__(self):
132
+ return "<{} (filename={})>".format(self.__class__.__name__, self.filename)
133
+
134
+
135
+ class HiveCSVFileHandler(CSVFileHandler):
136
+ """
137
+ The default file format of Hive is not CSV, but only delimiter-ed text file.
138
+ """
139
+
140
+ def __init__(
141
+ self,
142
+ filename=None,
143
+ encoding=None,
144
+ write_header=False,
145
+ null=const.HIVE_NULL,
146
+ delimiter=const.HIVE_FIELD_DELIMITER,
147
+ quoting=csv.QUOTE_NONE,
148
+ **csv_options,
149
+ ):
150
+ super().__init__(filename, encoding, write_header, null, delimiter, quoting, **csv_options)
151
+ self.delimiter = delimiter
152
+
153
+ def _writerow(self, row):
154
+ if isinstance(row, dict):
155
+ line = self.format_line(row.values())
156
+ else:
157
+ line = self.format_line(row)
158
+
159
+ self._fd.write(line)
160
+ self._fd.write("\n")
161
+
162
+ def _escape_item(self, v):
163
+ v = super()._escape_item(v)
164
+ return str(v)
165
+
166
+ def format_line(self, row):
167
+ values = map(self._escape_item, row)
168
+ return self.delimiter.join(values)
169
+
170
+
171
+ class CSVFileHandlerFactory(HandlerFactory):
172
+ def __init__(
173
+ self, handler_class=CSVFileHandler, filename=None, encoding=None, write_header=False, merge_files=True, **kwargs
174
+ ):
175
+ self.filename = filename or fs.new_tempfile(".csv")
176
+ self.merge_files = merge_files
177
+ options = {"filename": self.filename, "encoding": encoding, "write_header": write_header}
178
+ options.update(kwargs)
179
+ super().__init__(handler_class=handler_class, **options)
180
+
181
+ self._saved_schema = False
182
+
183
+ def meta_name(self):
184
+ return f"<{self.__class__.__name__} ({self.filename})>"
185
+
186
+ def create_handler(self, **kwargs):
187
+ filename = f'{self.handler_options["filename"]}.{len(self.handlers)}'
188
+ return super().create_handler(filename=filename)
189
+
190
+ def join(self):
191
+ if not self.handlers:
192
+ return
193
+
194
+ files = [h.filename for h in self.handlers if not fs.is_file_empty(h.filename)]
195
+ if not files:
196
+ return
197
+
198
+ if self.merge_files:
199
+ has_header = self.handler_options.get("write_header", False)
200
+ logger.info("files have header: %s", has_header)
201
+ logger.info("merging files %s into %s", files, self.filename)
202
+ if not has_header:
203
+ fs.merge_files(files, self.filename)
204
+ else:
205
+ base_file = files[0] # keep header
206
+ target = fs.merge_files(files[1:], num_skip_lines=1)
207
+ fs.merge_files([base_file, target], self.filename)
208
+
209
+ self.save_output_schema()
210
+
211
+ def _determine_output_schema(self):
212
+ if self.transformer.output_schema is not None:
213
+ # list of Fields
214
+ if isinstance(self.transformer.output_schema, list):
215
+ return Schema(self.transformer.output_schema)
216
+ return self.transformer.output_schema
217
+
218
+ if self.handler_options.get("write_header", False):
219
+ csv_options = self.handlers[0].csv_options
220
+ if self.merge_files:
221
+ f = self.filename
222
+ else:
223
+ # use the first non-empty file
224
+ f = toolz.first(x for x in glob.glob(f"{self.filename}.[0-9]*") if os.path.getsize(x))
225
+ csv_proxy = CSV(path=f, encoding=self.handler_options["encoding"], **csv_options)
226
+ return csv_proxy.infer_schema()
227
+ return None
228
+
229
+ def save_output_schema(self):
230
+ schema = self._determine_output_schema()
231
+ if not schema:
232
+ logger.warning("could not able to infer output schema")
233
+ return
234
+
235
+ filename = fs.schema_filename(self.filename)
236
+ logger.info("saving output schema to %s", filename)
237
+ schema.dump(filename)
238
+ self._saved_schema = True
239
+ return filename
240
+
241
+ def handle_dumper_schema(self, schema):
242
+ filename = fs.schema_filename(self.filename)
243
+ if self._saved_schema and os.path.exists(filename):
244
+ logger.info("file %s already exists, pass", filename)
245
+ return
246
+
247
+ if not isinstance(schema, Schema):
248
+ raise TypeError(f"got {type(schema)}")
249
+
250
+ logger.info("saving dumper schema to %s", filename)
251
+ schema.dump(filename)
252
+ return filename
253
+
254
+
255
+ def create_csv_file_handler_factory(
256
+ filename=None, encoding=None, write_header=False, hive=False, transformer=None, merge_files=True, **kwargs
257
+ ):
258
+ if hive:
259
+ handler_class = HiveCSVFileHandler
260
+ else:
261
+ handler_class = CSVFileHandler
262
+
263
+ factory = CSVFileHandlerFactory(
264
+ handler_class=handler_class,
265
+ filename=filename,
266
+ encoding=encoding,
267
+ write_header=write_header,
268
+ merge_files=merge_files,
269
+ **kwargs,
270
+ )
271
+ if transformer is not None:
272
+ factory.set_transformer(transformer)
273
+ return factory
274
+
275
+
276
+ def convert_csv_to_hive_textfile(filename, folder=None, replace=False, has_header=False, **csv_options):
277
+ new_name = fs.new_tempfile(prefix=os.path.basename(filename), dir=folder)
278
+ handler = HiveCSVFileHandler(filename=new_name)
279
+ with open(filename, newline="") as fd:
280
+ if has_header:
281
+ fd.readline()
282
+
283
+ reader = csv.reader(fd, **csv_options)
284
+ for row in reader:
285
+ handler.handle(tuple(row))
286
+ handler.close()
287
+
288
+ if replace:
289
+ os.rename(new_name, filename)
290
+ return new_name
@@ -0,0 +1,87 @@
1
+ from recurvedata.pigeon.connector import (
2
+ new_azure_synapse_connector,
3
+ new_elasticsearch_connector,
4
+ new_google_bigquery_connector,
5
+ new_postgresql_connector,
6
+ new_redshift_connector,
7
+ new_tidb_connector,
8
+ )
9
+ from recurvedata.pigeon.loader.csv_to_azure_synapse import CSVToAzureSynapseLoader
10
+ from recurvedata.pigeon.loader.csv_to_clickhouse import CSVToClickHouseLoader
11
+ from recurvedata.pigeon.loader.csv_to_es import CSVToElasticSearchLoader
12
+ from recurvedata.pigeon.loader.csv_to_google_bigquery import CSVToGoogleBigqueryLoader
13
+ from recurvedata.pigeon.loader.csv_to_hive import CSVToHiveLoader
14
+ from recurvedata.pigeon.loader.csv_to_mysql import CSVToMySQLLoader
15
+ from recurvedata.pigeon.loader.csv_to_postgresql import CSVToPostgresqlLoader
16
+ from recurvedata.pigeon.loader.csv_to_redshift import CSVToRedshiftLoader
17
+ from recurvedata.pigeon.loader.csv_to_starrocks import CSVToStarRocksLoader
18
+
19
+
20
+ def new_csv_to_hive_loader(table, filename, database, **kwargs):
21
+ params = kwargs.copy()
22
+ params.update(table=table, filename=filename, database=database)
23
+ loader = CSVToHiveLoader(**params)
24
+ return loader
25
+
26
+
27
+ def new_csv_to_mysql_loader(table, filename, database, **kwargs):
28
+ params = kwargs.copy()
29
+ params.update(table=table, filename=filename, database=database)
30
+ loader = CSVToMySQLLoader(**params)
31
+ return loader
32
+
33
+
34
+ def new_csv_to_starrocks_loader(table, filename, database, **kwargs):
35
+ params = kwargs.copy()
36
+ params.update(table=table, filename=filename, database=database)
37
+ loader = CSVToStarRocksLoader(**params)
38
+ return loader
39
+
40
+
41
+ def new_csv_to_tidb_loader(table, filename, database, **kwargs):
42
+ params = kwargs.copy()
43
+ params.update(table=table, filename=filename, database=database)
44
+ loader = CSVToMySQLLoader(**params)
45
+ return loader
46
+
47
+
48
+ def new_csv_to_redshift_loader(table, filename, database, **kwargs):
49
+ params = kwargs.copy()
50
+ params.update(table=table, filename=filename, database=database)
51
+ loader = CSVToRedshiftLoader(**params)
52
+ return loader
53
+
54
+
55
+ def new_csv_to_postgresql_loader(table, filename, database, **kwargs):
56
+ params = kwargs.copy()
57
+ params.update(table=table, filename=filename, database=database)
58
+ loader = CSVToPostgresqlLoader(**params)
59
+ return loader
60
+
61
+
62
+ def new_csv_to_azure_synapse_loader(table, filename, **kwargs):
63
+ params = kwargs.copy()
64
+ params.update(table=table, filename=filename)
65
+ loader = CSVToAzureSynapseLoader(**params)
66
+ return loader
67
+
68
+
69
+ def new_csv_to_clickhouse_loader(table, filename, database, **kwargs):
70
+ params = kwargs.copy()
71
+ params.update(table=table, filename=filename, database=database)
72
+ loader = CSVToClickHouseLoader(**params)
73
+ return loader
74
+
75
+
76
+ def new_csv_to_elasticsearch_loader(index, filename, **kwargs):
77
+ params = kwargs.copy()
78
+ params.update(index=index, filename=filename)
79
+ loader = CSVToElasticSearchLoader(**params)
80
+ return loader
81
+
82
+
83
+ def new_csv_to_google_bigquery_loader(table, filename, **kwargs):
84
+ params = kwargs.copy()
85
+ params.update(table=table, filename=filename)
86
+ loader = CSVToGoogleBigqueryLoader(**params)
87
+ return loader