recurvedata-lib 0.1.487__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recurvedata-lib might be problematic. Click here for more details.

Files changed (333) hide show
  1. recurvedata/__init__.py +0 -0
  2. recurvedata/__version__.py +1 -0
  3. recurvedata/client/__init__.py +3 -0
  4. recurvedata/client/client.py +150 -0
  5. recurvedata/client/server_client.py +91 -0
  6. recurvedata/config.py +99 -0
  7. recurvedata/connectors/__init__.py +20 -0
  8. recurvedata/connectors/_register.py +46 -0
  9. recurvedata/connectors/base.py +111 -0
  10. recurvedata/connectors/config_schema.py +1575 -0
  11. recurvedata/connectors/connectors/__init__.py +0 -0
  12. recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
  13. recurvedata/connectors/connectors/auth.py +44 -0
  14. recurvedata/connectors/connectors/azure_blob.py +89 -0
  15. recurvedata/connectors/connectors/azure_synapse.py +79 -0
  16. recurvedata/connectors/connectors/bigquery.py +359 -0
  17. recurvedata/connectors/connectors/clickhouse.py +219 -0
  18. recurvedata/connectors/connectors/dingtalk.py +61 -0
  19. recurvedata/connectors/connectors/doris.py +215 -0
  20. recurvedata/connectors/connectors/es.py +62 -0
  21. recurvedata/connectors/connectors/feishu.py +65 -0
  22. recurvedata/connectors/connectors/ftp.py +50 -0
  23. recurvedata/connectors/connectors/generic.py +49 -0
  24. recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
  25. recurvedata/connectors/connectors/google_service_account.py +225 -0
  26. recurvedata/connectors/connectors/hive.py +207 -0
  27. recurvedata/connectors/connectors/impala.py +210 -0
  28. recurvedata/connectors/connectors/jenkins.py +51 -0
  29. recurvedata/connectors/connectors/mail.py +89 -0
  30. recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
  31. recurvedata/connectors/connectors/mongo.py +79 -0
  32. recurvedata/connectors/connectors/mssql.py +131 -0
  33. recurvedata/connectors/connectors/mysql.py +191 -0
  34. recurvedata/connectors/connectors/n8n.py +141 -0
  35. recurvedata/connectors/connectors/oss.py +74 -0
  36. recurvedata/connectors/connectors/owncloud.py +36 -0
  37. recurvedata/connectors/connectors/phoenix.py +36 -0
  38. recurvedata/connectors/connectors/postgres.py +230 -0
  39. recurvedata/connectors/connectors/python.py +50 -0
  40. recurvedata/connectors/connectors/redshift.py +187 -0
  41. recurvedata/connectors/connectors/s3.py +93 -0
  42. recurvedata/connectors/connectors/sftp.py +87 -0
  43. recurvedata/connectors/connectors/slack.py +35 -0
  44. recurvedata/connectors/connectors/spark.py +99 -0
  45. recurvedata/connectors/connectors/starrocks.py +175 -0
  46. recurvedata/connectors/connectors/tencent_cos.py +40 -0
  47. recurvedata/connectors/connectors/tidb.py +49 -0
  48. recurvedata/connectors/const.py +315 -0
  49. recurvedata/connectors/datasource.py +189 -0
  50. recurvedata/connectors/dbapi.py +469 -0
  51. recurvedata/connectors/fs.py +66 -0
  52. recurvedata/connectors/ftp.py +40 -0
  53. recurvedata/connectors/object_store.py +60 -0
  54. recurvedata/connectors/pigeon.py +172 -0
  55. recurvedata/connectors/proxy.py +104 -0
  56. recurvedata/connectors/service.py +223 -0
  57. recurvedata/connectors/utils.py +47 -0
  58. recurvedata/consts.py +49 -0
  59. recurvedata/core/__init__.py +0 -0
  60. recurvedata/core/config.py +46 -0
  61. recurvedata/core/configurable.py +27 -0
  62. recurvedata/core/consts.py +2 -0
  63. recurvedata/core/templating.py +206 -0
  64. recurvedata/core/tracing.py +223 -0
  65. recurvedata/core/transformer.py +186 -0
  66. recurvedata/core/translation.py +91 -0
  67. recurvedata/dbt/client.py +97 -0
  68. recurvedata/dbt/consts.py +99 -0
  69. recurvedata/dbt/cosmos_utils.py +275 -0
  70. recurvedata/dbt/error_codes.py +18 -0
  71. recurvedata/dbt/schemas.py +98 -0
  72. recurvedata/dbt/service.py +451 -0
  73. recurvedata/dbt/utils.py +246 -0
  74. recurvedata/error_codes.py +71 -0
  75. recurvedata/exceptions.py +72 -0
  76. recurvedata/executors/__init__.py +4 -0
  77. recurvedata/executors/cli/__init__.py +7 -0
  78. recurvedata/executors/cli/connector.py +117 -0
  79. recurvedata/executors/cli/dbt.py +118 -0
  80. recurvedata/executors/cli/main.py +82 -0
  81. recurvedata/executors/cli/parameters.py +18 -0
  82. recurvedata/executors/client.py +190 -0
  83. recurvedata/executors/consts.py +50 -0
  84. recurvedata/executors/debug_executor.py +100 -0
  85. recurvedata/executors/executor.py +300 -0
  86. recurvedata/executors/link_executor.py +189 -0
  87. recurvedata/executors/models.py +34 -0
  88. recurvedata/executors/schemas.py +222 -0
  89. recurvedata/executors/service/__init__.py +0 -0
  90. recurvedata/executors/service/connector.py +380 -0
  91. recurvedata/executors/utils.py +172 -0
  92. recurvedata/filestorage/__init__.py +11 -0
  93. recurvedata/filestorage/_factory.py +33 -0
  94. recurvedata/filestorage/backends/__init__.py +0 -0
  95. recurvedata/filestorage/backends/fsspec.py +45 -0
  96. recurvedata/filestorage/backends/local.py +67 -0
  97. recurvedata/filestorage/backends/oss.py +56 -0
  98. recurvedata/filestorage/interface.py +84 -0
  99. recurvedata/operators/__init__.py +10 -0
  100. recurvedata/operators/base.py +28 -0
  101. recurvedata/operators/config.py +21 -0
  102. recurvedata/operators/context.py +255 -0
  103. recurvedata/operators/dbt_operator/__init__.py +2 -0
  104. recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
  105. recurvedata/operators/dbt_operator/operator.py +353 -0
  106. recurvedata/operators/link_operator/__init__.py +1 -0
  107. recurvedata/operators/link_operator/operator.py +120 -0
  108. recurvedata/operators/models.py +55 -0
  109. recurvedata/operators/notify_operator/__init__.py +1 -0
  110. recurvedata/operators/notify_operator/operator.py +180 -0
  111. recurvedata/operators/operator.py +119 -0
  112. recurvedata/operators/python_operator/__init__.py +1 -0
  113. recurvedata/operators/python_operator/operator.py +132 -0
  114. recurvedata/operators/sensor_operator/__init__.py +1 -0
  115. recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
  116. recurvedata/operators/sensor_operator/operator.py +172 -0
  117. recurvedata/operators/spark_operator/__init__.py +1 -0
  118. recurvedata/operators/spark_operator/operator.py +200 -0
  119. recurvedata/operators/spark_operator/spark_sample.py +47 -0
  120. recurvedata/operators/sql_operator/__init__.py +1 -0
  121. recurvedata/operators/sql_operator/operator.py +90 -0
  122. recurvedata/operators/task.py +211 -0
  123. recurvedata/operators/transfer_operator/__init__.py +40 -0
  124. recurvedata/operators/transfer_operator/const.py +10 -0
  125. recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
  126. recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
  127. recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
  128. recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
  129. recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
  130. recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
  131. recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
  132. recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
  133. recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
  134. recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
  135. recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
  136. recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
  137. recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
  138. recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
  139. recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
  140. recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
  141. recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
  142. recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
  143. recurvedata/operators/transfer_operator/load_task_email.py +188 -0
  144. recurvedata/operators/transfer_operator/load_task_es.py +86 -0
  145. recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
  146. recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
  147. recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
  148. recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
  149. recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
  150. recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
  151. recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
  152. recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
  153. recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
  154. recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
  155. recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
  156. recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
  157. recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
  158. recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
  159. recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
  160. recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
  161. recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
  162. recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
  163. recurvedata/operators/transfer_operator/mixin.py +31 -0
  164. recurvedata/operators/transfer_operator/operator.py +231 -0
  165. recurvedata/operators/transfer_operator/task.py +223 -0
  166. recurvedata/operators/transfer_operator/utils.py +134 -0
  167. recurvedata/operators/ui.py +80 -0
  168. recurvedata/operators/utils/__init__.py +51 -0
  169. recurvedata/operators/utils/file_factory.py +150 -0
  170. recurvedata/operators/utils/fs.py +10 -0
  171. recurvedata/operators/utils/lineage.py +265 -0
  172. recurvedata/operators/web_init.py +15 -0
  173. recurvedata/pigeon/connector/__init__.py +294 -0
  174. recurvedata/pigeon/connector/_registry.py +17 -0
  175. recurvedata/pigeon/connector/aliyun_oss.py +80 -0
  176. recurvedata/pigeon/connector/awss3.py +123 -0
  177. recurvedata/pigeon/connector/azure_blob.py +176 -0
  178. recurvedata/pigeon/connector/azure_synapse.py +51 -0
  179. recurvedata/pigeon/connector/cass.py +151 -0
  180. recurvedata/pigeon/connector/clickhouse.py +403 -0
  181. recurvedata/pigeon/connector/clickhouse_native.py +351 -0
  182. recurvedata/pigeon/connector/dbapi.py +571 -0
  183. recurvedata/pigeon/connector/doris.py +166 -0
  184. recurvedata/pigeon/connector/es.py +176 -0
  185. recurvedata/pigeon/connector/feishu.py +1135 -0
  186. recurvedata/pigeon/connector/ftp.py +163 -0
  187. recurvedata/pigeon/connector/google_bigquery.py +283 -0
  188. recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
  189. recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
  190. recurvedata/pigeon/connector/hdfs.py +204 -0
  191. recurvedata/pigeon/connector/hive_impala.py +383 -0
  192. recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
  193. recurvedata/pigeon/connector/mongodb.py +56 -0
  194. recurvedata/pigeon/connector/mssql.py +467 -0
  195. recurvedata/pigeon/connector/mysql.py +175 -0
  196. recurvedata/pigeon/connector/owncloud.py +92 -0
  197. recurvedata/pigeon/connector/postgresql.py +267 -0
  198. recurvedata/pigeon/connector/power_bi.py +179 -0
  199. recurvedata/pigeon/connector/qcloud_cos.py +79 -0
  200. recurvedata/pigeon/connector/redshift.py +123 -0
  201. recurvedata/pigeon/connector/sftp.py +73 -0
  202. recurvedata/pigeon/connector/sqlite.py +42 -0
  203. recurvedata/pigeon/connector/starrocks.py +144 -0
  204. recurvedata/pigeon/connector/tableau.py +162 -0
  205. recurvedata/pigeon/const.py +21 -0
  206. recurvedata/pigeon/csv.py +172 -0
  207. recurvedata/pigeon/docs/datasources-example.json +82 -0
  208. recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
  209. recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
  210. recurvedata/pigeon/dumper/__init__.py +171 -0
  211. recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
  212. recurvedata/pigeon/dumper/base.py +141 -0
  213. recurvedata/pigeon/dumper/cass.py +213 -0
  214. recurvedata/pigeon/dumper/dbapi.py +346 -0
  215. recurvedata/pigeon/dumper/es.py +112 -0
  216. recurvedata/pigeon/dumper/ftp.py +64 -0
  217. recurvedata/pigeon/dumper/mongodb.py +103 -0
  218. recurvedata/pigeon/handler/__init__.py +4 -0
  219. recurvedata/pigeon/handler/base.py +153 -0
  220. recurvedata/pigeon/handler/csv_handler.py +290 -0
  221. recurvedata/pigeon/loader/__init__.py +87 -0
  222. recurvedata/pigeon/loader/base.py +83 -0
  223. recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
  224. recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
  225. recurvedata/pigeon/loader/csv_to_doris.py +215 -0
  226. recurvedata/pigeon/loader/csv_to_es.py +51 -0
  227. recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
  228. recurvedata/pigeon/loader/csv_to_hive.py +468 -0
  229. recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
  230. recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
  231. recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
  232. recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
  233. recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
  234. recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
  235. recurvedata/pigeon/meta.py +116 -0
  236. recurvedata/pigeon/row_factory.py +42 -0
  237. recurvedata/pigeon/schema/__init__.py +124 -0
  238. recurvedata/pigeon/schema/types.py +13 -0
  239. recurvedata/pigeon/sync.py +283 -0
  240. recurvedata/pigeon/transformer.py +146 -0
  241. recurvedata/pigeon/utils/__init__.py +134 -0
  242. recurvedata/pigeon/utils/bloomfilter.py +181 -0
  243. recurvedata/pigeon/utils/date_time.py +323 -0
  244. recurvedata/pigeon/utils/escape.py +15 -0
  245. recurvedata/pigeon/utils/fs.py +266 -0
  246. recurvedata/pigeon/utils/json.py +44 -0
  247. recurvedata/pigeon/utils/keyed_tuple.py +85 -0
  248. recurvedata/pigeon/utils/mp.py +156 -0
  249. recurvedata/pigeon/utils/sql.py +328 -0
  250. recurvedata/pigeon/utils/timing.py +155 -0
  251. recurvedata/provider_manager.py +0 -0
  252. recurvedata/providers/__init__.py +0 -0
  253. recurvedata/providers/dbapi/__init__.py +0 -0
  254. recurvedata/providers/flywheel/__init__.py +0 -0
  255. recurvedata/providers/mysql/__init__.py +0 -0
  256. recurvedata/schedulers/__init__.py +1 -0
  257. recurvedata/schedulers/airflow.py +974 -0
  258. recurvedata/schedulers/airflow_db_process.py +331 -0
  259. recurvedata/schedulers/airflow_operators.py +61 -0
  260. recurvedata/schedulers/airflow_plugin.py +9 -0
  261. recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
  262. recurvedata/schedulers/base.py +99 -0
  263. recurvedata/schedulers/cli.py +228 -0
  264. recurvedata/schedulers/client.py +56 -0
  265. recurvedata/schedulers/consts.py +52 -0
  266. recurvedata/schedulers/debug_celery.py +62 -0
  267. recurvedata/schedulers/model.py +63 -0
  268. recurvedata/schedulers/schemas.py +97 -0
  269. recurvedata/schedulers/service.py +20 -0
  270. recurvedata/schedulers/system_dags.py +59 -0
  271. recurvedata/schedulers/task_status.py +279 -0
  272. recurvedata/schedulers/utils.py +73 -0
  273. recurvedata/schema/__init__.py +0 -0
  274. recurvedata/schema/field.py +88 -0
  275. recurvedata/schema/schema.py +55 -0
  276. recurvedata/schema/types.py +17 -0
  277. recurvedata/schema.py +0 -0
  278. recurvedata/server/__init__.py +0 -0
  279. recurvedata/server/app.py +7 -0
  280. recurvedata/server/connector/__init__.py +0 -0
  281. recurvedata/server/connector/api.py +79 -0
  282. recurvedata/server/connector/schemas.py +28 -0
  283. recurvedata/server/data_service/__init__.py +0 -0
  284. recurvedata/server/data_service/api.py +126 -0
  285. recurvedata/server/data_service/client.py +18 -0
  286. recurvedata/server/data_service/consts.py +1 -0
  287. recurvedata/server/data_service/schemas.py +68 -0
  288. recurvedata/server/data_service/service.py +218 -0
  289. recurvedata/server/dbt/__init__.py +0 -0
  290. recurvedata/server/dbt/api.py +116 -0
  291. recurvedata/server/error_code.py +49 -0
  292. recurvedata/server/exceptions.py +19 -0
  293. recurvedata/server/executor/__init__.py +0 -0
  294. recurvedata/server/executor/api.py +37 -0
  295. recurvedata/server/executor/schemas.py +30 -0
  296. recurvedata/server/executor/service.py +220 -0
  297. recurvedata/server/main.py +32 -0
  298. recurvedata/server/schedulers/__init__.py +0 -0
  299. recurvedata/server/schedulers/api.py +252 -0
  300. recurvedata/server/schedulers/schemas.py +50 -0
  301. recurvedata/server/schemas.py +50 -0
  302. recurvedata/utils/__init__.py +15 -0
  303. recurvedata/utils/_typer.py +61 -0
  304. recurvedata/utils/attrdict.py +19 -0
  305. recurvedata/utils/command_helper.py +20 -0
  306. recurvedata/utils/compat.py +12 -0
  307. recurvedata/utils/compression.py +203 -0
  308. recurvedata/utils/crontab.py +42 -0
  309. recurvedata/utils/crypto_util.py +305 -0
  310. recurvedata/utils/dataclass.py +11 -0
  311. recurvedata/utils/date_time.py +464 -0
  312. recurvedata/utils/dispatch.py +114 -0
  313. recurvedata/utils/email_util.py +104 -0
  314. recurvedata/utils/files.py +386 -0
  315. recurvedata/utils/helpers.py +170 -0
  316. recurvedata/utils/httputil.py +117 -0
  317. recurvedata/utils/imports.py +132 -0
  318. recurvedata/utils/json.py +80 -0
  319. recurvedata/utils/log.py +117 -0
  320. recurvedata/utils/log_capture.py +153 -0
  321. recurvedata/utils/mp.py +178 -0
  322. recurvedata/utils/normalizer.py +102 -0
  323. recurvedata/utils/redis_lock.py +474 -0
  324. recurvedata/utils/registry.py +54 -0
  325. recurvedata/utils/shell.py +15 -0
  326. recurvedata/utils/singleton.py +33 -0
  327. recurvedata/utils/sql.py +6 -0
  328. recurvedata/utils/timeout.py +28 -0
  329. recurvedata/utils/tracing.py +14 -0
  330. recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
  331. recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
  332. recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
  333. recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
@@ -0,0 +1,92 @@
1
+ import logging
2
+ import os
3
+ import traceback
4
+
5
+ import pandas as pd
6
+ from owncloud import Client, HTTPResponseError
7
+
8
+ from recurvedata.pigeon.connector._registry import register_connector_class
9
+ from recurvedata.pigeon.utils.fs import new_tempfile, remove_files_safely
10
+
11
+
12
+ class OwncloudException(Exception):
13
+ pass
14
+
15
+
16
+ class OwncloudDownloadException(OwncloudException):
17
+ pass
18
+
19
+
20
+ class OwncloudUploadException(OwncloudException):
21
+ pass
22
+
23
+
24
+ class NewOwncloudClient(Client):
25
+ def get_webdav_url(self):
26
+ return self._webdav_url
27
+
28
+
29
+ @register_connector_class('owncloud')
30
+ class OwncloudConnector(object):
31
+ def __init__(self, url: str = None, user: str = None, password: str = None, **kwargs):
32
+ self.url = url
33
+ self.user = user
34
+ self.password = password
35
+ self.oc = NewOwncloudClient(url, **kwargs)
36
+ self.oc.login(user, password)
37
+
38
+ def download_file(self, remote_path: str, local_path: str):
39
+ logging.info(f'Downloading remote file {remote_path} to {local_path}.')
40
+ try:
41
+ status = self.oc.get_file(remote_path, local_path)
42
+ if status:
43
+ logging.info(f'Successfully download remote file {remote_path} to {local_path}.')
44
+ else:
45
+ raise OwncloudDownloadException(f'Failed to download remote file {remote_path}, unknown error.')
46
+ except HTTPResponseError as e:
47
+ logging.error(traceback.format_exc())
48
+ raise OwncloudDownloadException(
49
+ f'Failed to download remote file {remote_path}, HTTPResponseError {e.res}.'
50
+ )
51
+ return status
52
+
53
+ def upload_file(self, remote_path: str, local_source_file: str, **kwargs):
54
+ logging.info(f'Uploading local file {local_source_file} to {remote_path}.')
55
+ try:
56
+ res = self.oc.put_file(remote_path, local_source_file, **kwargs)
57
+ logging.info(f'Successfully upload local file {local_source_file} to remote {remote_path}.')
58
+ except Exception as e:
59
+ logging.error(traceback.format_exc())
60
+ raise OwncloudUploadException(
61
+ f'Failed to upload local file {local_source_file} to remote {remote_path}, {e.args}.'
62
+ )
63
+ return res
64
+
65
+ def get_pandas_df(self, remote_path: str) -> pd.DataFrame:
66
+ temp_file_path = new_tempfile()
67
+ if self.download_file(remote_path, temp_file_path):
68
+ file_type = os.path.splitext(remote_path)[-1]
69
+ try:
70
+ if file_type and file_type.lower() in ('.xlsx', '.xls'):
71
+ df = pd.read_excel(temp_file_path)
72
+ elif file_type and file_type.lower() in ('.parquet', '.parq'):
73
+ df = pd.read_parquet(temp_file_path)
74
+ elif file_type and file_type.lower() == '.json':
75
+ df = pd.read_json(temp_file_path)
76
+ else:
77
+ df = pd.read_csv(temp_file_path)
78
+ except Exception as e:
79
+ logging.error(traceback.format_exc())
80
+ raise ValueError(f'Failed to load remote file {remote_path} to pandas df, {e.args}.')
81
+ finally:
82
+ remove_files_safely(temp_file_path)
83
+ logging.info(f'Successfully load remote file {remote_path} to pandas df, {len(df)} rows.')
84
+ return df
85
+
86
+ @property
87
+ def webdav_url(self):
88
+ return self.oc.get_webdav_url()
89
+
90
+ @property
91
+ def http_auth_conf(self):
92
+ return {'username': f'{self.user}', 'password': f'{self.password}'}
@@ -0,0 +1,267 @@
1
+ import psycopg2
2
+
3
+ from recurvedata.pigeon.connector._registry import register_connector_class
4
+ from recurvedata.pigeon.connector.dbapi import ClosingCursor, DBAPIConnector, NullCursor
5
+ from recurvedata.pigeon.schema import types
6
+
7
+ _pg_type_to_canonical_type = {
8
+ 16: types.BOOLEAN,
9
+ 21: types.INT16,
10
+ 23: types.INT32,
11
+ 20: types.INT64,
12
+ 114: types.JSON,
13
+ 700: types.FLOAT32,
14
+ 701: types.FLOAT64,
15
+ 1700: types.FLOAT64,
16
+ 1114: types.DATETIME,
17
+ 1184: types.DATETIME,
18
+ 1082: types.DATE,
19
+ 1043: types.STRING,
20
+ 1014: types.STRING,
21
+ 1015: types.STRING,
22
+ 1008: types.STRING,
23
+ 1009: types.STRING,
24
+ 2951: types.STRING,
25
+ }
26
+
27
+ canonical_type_to_pg_type = {
28
+ types.BOOLEAN: "BOOLEAN",
29
+ types.INT8: "INT2",
30
+ types.INT16: "INT2",
31
+ types.INT32: "INT4",
32
+ types.INT64: "INT8",
33
+ types.FLOAT32: "FLOAT4",
34
+ types.FLOAT64: "FLOAT8",
35
+ types.DATETIME: "TIMESTAMP",
36
+ types.DATE: "DATE",
37
+ types.STRING: "TEXT",
38
+ types.JSON: "JSON",
39
+ }
40
+
41
+
42
+ class NamedCursor(ClosingCursor):
43
+ """NamedCursor is a server side cursor, using DECLARE and FETCH internally
44
+ http://initd.org/psycopg/docs/usage.html#server-side-cursors
45
+ """
46
+
47
+ def __init__(self, connection, commit_on_close=True, name=None):
48
+ self.connection = connection
49
+ self._commit_on_close = commit_on_close
50
+ if name is not None:
51
+ self._cursor = connection.cursor(name, withhold=True)
52
+ self._cursor.itersize = 1000
53
+ else:
54
+ self._cursor = connection.cursor()
55
+
56
+
57
+ @register_connector_class(["postgres", "postgresql"])
58
+ class PostgresConnector(DBAPIConnector):
59
+ _sqla_driver = "postgresql+psycopg2"
60
+ _identifier_start_quote = '"'
61
+ _identifier_end_quote = '"'
62
+ _default_port = 5432
63
+
64
+ def connect_impl(self, autocommit=False, *args, **kwargs):
65
+ conn = psycopg2.connect(
66
+ host=self.host,
67
+ port=self.port,
68
+ user=self.user,
69
+ password=self.password,
70
+ database=self.database,
71
+ *args,
72
+ **kwargs,
73
+ )
74
+ conn.autocommit = autocommit
75
+
76
+ if self.schema:
77
+ with conn.cursor() as cursor:
78
+ cursor.execute(f"SET search_path TO {self.schema}, public")
79
+
80
+ return conn
81
+
82
+ def cursor(self, autocommit=False, dryrun=False, commit_on_close=True, **kwargs):
83
+ """Returns a DBAPI cursor"""
84
+ if dryrun:
85
+ return NullCursor()
86
+ cursor_name = kwargs.pop("cursor_name", None)
87
+ conn = self.connect(autocommit, **kwargs)
88
+ return NamedCursor(conn, commit_on_close=commit_on_close, name=cursor_name)
89
+
90
+ def has_table(self, table, database=None, schema="public", **kwargs):
91
+ schema, table = self._get_schema_table(table, schema)
92
+
93
+ if database is not None and database != self.database:
94
+ conn = self.clone()
95
+ conn.database = database
96
+ else:
97
+ conn = self
98
+ with conn.cursor() as cursor:
99
+ cursor.execute(
100
+ """
101
+ SELECT EXISTS (
102
+ SELECT 1 FROM information_schema.tables
103
+ WHERE table_name = %s AND table_schema = %s
104
+ )
105
+ """,
106
+ (table, schema),
107
+ )
108
+ return bool(cursor.fetchone()[0])
109
+
110
+ def get_columns(self, table, schema="public", database=None):
111
+ schema, table = self._get_schema_table(table, schema)
112
+ if database is None:
113
+ database = self.database
114
+ if not self.has_table(table, database, schema=schema):
115
+ raise ValueError("Table {!r}.{!r} not exists in {!r}".format(schema, table, database))
116
+ with self.cursor() as cursor:
117
+ cursor.execute('SELECT * FROM "{}"."{}" LIMIT 0'.format(schema, table))
118
+ cursor.fetchall()
119
+ return [x[0] for x in cursor.description]
120
+
121
+ def generate_ddl(self, table, schema="public", database=None, field_filter=(), if_exists=True):
122
+ schema, table = self._get_schema_table(table, schema)
123
+ if database is None:
124
+ database = self.database
125
+ if not self.has_table(table, database, schema=schema):
126
+ raise ValueError(f"Table {schema!r}.{table!r} not exists in {database!r}")
127
+
128
+ with self.cursor() as cursor:
129
+ # get table comment
130
+ tbl_comment_sql = f"""
131
+ SELECT pgd.description AS table_comment
132
+ FROM pg_catalog.pg_description pgd
133
+ WHERE pgd.objsubid = 0 AND pgd.objoid = (SELECT c.oid
134
+ FROM pg_catalog.pg_class c
135
+ LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
136
+ WHERE n.nspname = {schema!r}
137
+ AND c.relname = {table!r} AND
138
+ c.relkind IN ('r', 'v', 'm', 'f'));
139
+ """
140
+ cursor.execute(tbl_comment_sql)
141
+ t_comment = cursor.fetchall()
142
+ # get columns
143
+ col_comment_sql = f"""
144
+ SELECT
145
+ a.attname AS "field",
146
+ pg_catalog.format_type(a.atttypid, a.atttypmod) AS "type",
147
+ (SELECT pg_catalog.pg_get_expr(d.adbin, d.adrelid)
148
+ FROM pg_catalog.pg_attrdef d
149
+ WHERE d.adrelid = a.attrelid AND d.adnum = a.attnum
150
+ AND a.atthasdef)
151
+ AS "default",
152
+ a.attnotnull AS "isnull",
153
+ pgd.description AS "comment"
154
+ FROM pg_catalog.pg_attribute a
155
+ LEFT JOIN pg_catalog.pg_description pgd ON (
156
+ pgd.objoid = a.attrelid AND pgd.objsubid = a.attnum)
157
+ WHERE a.attrelid = (SELECT c.oid
158
+ FROM pg_catalog.pg_class c
159
+ LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
160
+ WHERE n.nspname = {schema!r}
161
+ AND c.relname = {table!r} AND c.relkind IN ('r', 'v', 'm', 'f'))
162
+ AND a.attnum > 0 AND NOT a.attisdropped
163
+ ORDER BY a.attnum;
164
+ """
165
+ cursor.execute(col_comment_sql)
166
+ col_info = cursor.fetchall()
167
+
168
+ if t_comment:
169
+ comments = [f"COMMENT ON TABLE {self.quote_identifier(table)} IS {t_comment[0][0]!r};"]
170
+ else:
171
+ comments = []
172
+
173
+ cols = []
174
+ for col in col_info:
175
+ if col in field_filter:
176
+ continue
177
+ default = " DEFAULT {col[2]}" if col[2] else ""
178
+ isnull = " NOT NULL " if col[3] else ""
179
+ if "character varying" in col[1]:
180
+ ctype = col[1].replace("character varying", "varchar")
181
+ cols.append(f"{self.quote_identifier(col[0])} {ctype}{isnull}{default}")
182
+ else:
183
+ cols.append(f"{self.quote_identifier(col[0])} {col[1]}{isnull}{default}")
184
+ if col[4]:
185
+ comments.append(
186
+ f"COMMENT ON COLUMN {self.quote_identifier(table)}.{self.quote_identifier(col[0])} IS {col[4]!r};"
187
+ )
188
+ if_exists_stmt = " IF NOT EXISTS " if if_exists else " "
189
+ cols_stmt = ", ".join(cols)
190
+ comments_stmt = " ".join(comments)
191
+ return f"CREATE TABLE{if_exists_stmt}{self.quote_identifier(table)} ({cols_stmt}); {comments_stmt}"
192
+
193
+ def is_postgres(self):
194
+ return True
195
+
196
+ @staticmethod
197
+ def to_canonical_type(type_code, size):
198
+ return _pg_type_to_canonical_type.get(type_code, types.STRING)
199
+
200
+ @staticmethod
201
+ def from_canonical_type(canonical_type, size):
202
+ return canonical_type_to_pg_type.get(canonical_type, "TEXT")
203
+
204
+ def load_csv(
205
+ self,
206
+ table,
207
+ filename,
208
+ schema="public",
209
+ columns=None,
210
+ delimiter=",",
211
+ quotechar='"',
212
+ lineterminator="\r\n",
213
+ escapechar=None,
214
+ skiprows=0,
215
+ using_insert=True,
216
+ **kwargs,
217
+ ):
218
+ # if using_insert:
219
+ # method = self.load_csv_by_inserting
220
+ # else:
221
+ # method = self._copy_csv
222
+ if not using_insert:
223
+ self.logger.warning("load file directly is not implemented yet, fallback to using bulk INSERT")
224
+
225
+ method = self.load_csv_by_inserting
226
+ schema, table = self._get_schema_table(table, schema)
227
+ table = self._format_table_name(table, schema)
228
+
229
+ return method(
230
+ table, filename, columns, delimiter, quotechar, lineterminator, escapechar, skiprows=skiprows, **kwargs
231
+ )
232
+
233
+ def _copy_csv(
234
+ self,
235
+ table,
236
+ filename,
237
+ columns=None,
238
+ delimiter=",",
239
+ quotechar='"',
240
+ lineterminator="\r\n",
241
+ escapechar=None,
242
+ skiprows=0,
243
+ **kwargs,
244
+ ):
245
+ conn = self.connect()
246
+ cursor = conn.cursor()
247
+ self.logger.info("copy file %s into %s", filename, table)
248
+ with open(filename, "r") as f:
249
+ if skiprows:
250
+ for _ in range(skiprows):
251
+ f.readline()
252
+ # the copy_from method does support standard CSV
253
+ cursor.copy_from(f, table, sep=delimiter, columns=columns)
254
+ conn.commit()
255
+ conn.close()
256
+
257
+ def _get_schema_table(self, table, schema):
258
+ if "." in table:
259
+ schema, table = table.split(".")
260
+ if not schema:
261
+ schema = "public"
262
+ return schema, table
263
+
264
+ def _format_table_name(self, table, schema):
265
+ if schema and "." not in table:
266
+ table = self.quote_identifier(f"{schema}.{table}")
267
+ return table
@@ -0,0 +1,179 @@
1
+ import copy
2
+ import logging
3
+ import time
4
+ from typing import Dict, List, Union
5
+
6
+ import msal
7
+ import pandas as pd
8
+ import requests
9
+
10
+ config = dict(
11
+ # Can be set to 'MasterUser' or 'ServicePrincipal'
12
+ AUTHENTICATION_MODE='ServicePrincipal',
13
+ POWER_BI_TENANT_ID='',
14
+ POWER_BI_CLIENT_ID='',
15
+ # Client Secret (App Secret) of the AAD app. Required only for ServicePrincipal authentication mode.
16
+ POWER_BI_CLIENT_SECRET='',
17
+ # Scope of AAD app. Use the below configuration to use all the permissions provided in the AAD(Azure Active Directory) app through Azure portal.
18
+ POWER_BI_SCOPE=['https://analysis.windows.net/powerbi/api/.default'], # 公有云
19
+ POWER_BI_SCOPE_CN=['https://analysis.chinacloudapi.cn/powerbi/api/.default'], # 中国区
20
+ # URL used for initiating authorization request
21
+ POWER_BI_AUTHORITY='https://login.microsoftonline.com/tenant_id',
22
+ POWER_BI_AUTHORITY_CN='https://login.chinacloudapi.cn/tenant_id',
23
+ POWER_BI_API_URL_PREFIX='https://api.powerbi.com/v1.0/myorg',
24
+ POWER_BI_API_URL_PREFIX_CN='https://api.powerbi.cn/v1.0/myorg'
25
+ )
26
+
27
+
28
+ class PBIRefreshFailedException(Exception):
29
+ pass
30
+
31
+
32
+ class PBIRefreshTimeoutException(Exception):
33
+ pass
34
+
35
+
36
+ class PowerBI:
37
+
38
+ def __init__(self, tenant_id: str, client_id: str, client_secret: str, **kwargs):
39
+ self.config = copy.deepcopy(config)
40
+ self.config["POWER_BI_TENANT_ID"] = tenant_id
41
+ self.config["POWER_BI_CLIENT_ID"] = client_id
42
+ self.config["POWER_BI_CLIENT_SECRET"] = client_secret
43
+ for k, v in kwargs.items():
44
+ if k in self.config:
45
+ self.config[k] = v
46
+ self.access_token = None
47
+ self.token_abort_time = None
48
+
49
+ def get_access_token(self):
50
+ if self.access_token is not None and time.time() < self.token_abort_time:
51
+ return self.access_token
52
+ try:
53
+ # Service Principal auth is the recommended by Microsoft to achieve App Owns Data Power BI embedding
54
+ authority = self.config['POWER_BI_AUTHORITY_CN'].replace('tenant_id', self.config['POWER_BI_TENANT_ID'])
55
+ client_app = msal.ConfidentialClientApplication(
56
+ client_id=self.config['POWER_BI_CLIENT_ID'],
57
+ client_credential=self.config['POWER_BI_CLIENT_SECRET'],
58
+ authority=authority
59
+ )
60
+ # Make a client call if Access token is not available in cache
61
+ response = client_app.acquire_token_for_client(scopes=self.config['POWER_BI_SCOPE_CN'])
62
+ self.access_token = response
63
+ self.token_abort_time = time.time() + (response["expires_in"] - 60)
64
+
65
+ return response
66
+
67
+ except Exception as ex:
68
+ raise Exception('Error retrieving Access token\n' + str(ex))
69
+
70
+ @property
71
+ def request_header(self):
72
+ """
73
+ Get Power BI API request header
74
+ """
75
+ access_token = self.get_access_token()
76
+ return {
77
+ 'Content-Type': 'application/json',
78
+ 'Authorization': f"{access_token['token_type']} {access_token['access_token']}"
79
+ }
80
+
81
+ def get_refresh_job_info(self, group_id: str, dataset_id: str, request_id: str = None, limit: int = 10) -> Union[Dict, List[Dict]]:
82
+ """
83
+ Parameters:
84
+ group_id: The workspace ID
85
+ dataset_id: The dataset ID
86
+ request_id: 如果指定 request_id,返回对应 request Dict 否则返回 List[Dict]
87
+ limit: numbers of recently requests (Descending), default 10
88
+ """
89
+ url = f"{self.config['POWER_BI_API_URL_PREFIX_CN']}/groups/{group_id}/datasets/{dataset_id}/refreshes/?$top={limit}"
90
+ r = requests.get(url, headers=self.request_header)
91
+ r.raise_for_status()
92
+ ret = r.json()["value"]
93
+ if request_id:
94
+ return next(filter(lambda x: x["requestId"] == request_id, ret), None)
95
+ return ret
96
+
97
+ def refresh_dataset_in_group(self, group_id: str, dataset_id: str, is_wait: bool = True, timeout: int = 300, check_interval: int = 20, limit: int = 10):
98
+ """
99
+ 推荐使用 refresh_datasets()
100
+
101
+ https://learn.microsoft.com/en-us/rest/api/power-bi/datasets/refresh-dataset-in-group \n
102
+ Limitation:
103
+ For Shared capacities, a maximum of 8 requests per day, including refreshes executed by using scheduled refresh, can be initiated.
104
+
105
+ Parameters:
106
+ group_id: The workspace ID
107
+ dataset_id: The dataset ID
108
+ is_wait: Wait until refresh finish, default wait for refreshing
109
+ timeout: Default timeout is 5 minutes if is_wait is True
110
+ check_interval: Default 20 seconds
111
+ limit: numbers of recently requests (Descending)
112
+
113
+ Returns:
114
+ The refreshing job information
115
+ """
116
+
117
+ logging.info(f"Start refreshing dataset {dataset_id} in group {group_id}")
118
+ url = f"{self.config['POWER_BI_API_URL_PREFIX_CN']}/groups/{group_id}/datasets/{dataset_id}/refreshes"
119
+ r = requests.post(url, headers=self.request_header)
120
+ r.raise_for_status()
121
+ request_id = r.headers.get("RequestId")
122
+ # get refresh job information
123
+ job_info = self.get_refresh_job_info(group_id, dataset_id, request_id, limit)
124
+ if not job_info: # if don't receive specific job, wait a second
125
+ time.sleep(5)
126
+ job_info = self.get_refresh_job_info(group_id, dataset_id, request_id, limit)
127
+ logging.info(f"Refresh detail: request_id -> {request_id}, job_info -> {job_info}")
128
+ if not is_wait:
129
+ if job_info["status"] == "Failed":
130
+ logging.info(f"Refresh failed: {dataset_id}")
131
+ raise PBIRefreshFailedException(job_info)
132
+ return job_info
133
+ abort_time = time.time() + timeout
134
+ while job_info["status"] != "Completed":
135
+ if job_info["status"] == "Failed":
136
+ logging.info(f"Refresh failed: {dataset_id}")
137
+ raise PBIRefreshFailedException(job_info)
138
+ time.sleep(check_interval)
139
+ if time.time() > abort_time:
140
+ logging.info(f"Refresh timeout: {dataset_id}")
141
+ raise PBIRefreshTimeoutException(job_info)
142
+ job_info = self.get_refresh_job_info(group_id, dataset_id, request_id, limit)
143
+ logging.info(f"Retry: {job_info}")
144
+ logging.info(f"Refresh completed: {dataset_id}")
145
+ return job_info
146
+
147
+ def refresh_datasets(self, refresh_list: pd.DataFrame, is_wait: bool = True, timeout: int = 300, check_interval: int = 20, limit: int = 10):
148
+ """
149
+ 传入包含 group_id,dataset_id 列的 dataframe,刷新完毕返回刷新的情况
150
+ """
151
+ summary = {
152
+ "Completed": [], "Failed": [], "Timeout": [], "Error": []
153
+ }
154
+ if not {"group_id", "dataset_id"}.issubset(refresh_list.columns):
155
+ raise Exception(f"Contain wrong columns, input must include group_id and dataset_id, while target dataframe has {refresh_list.columns.to_list()} columns.")
156
+ for _, row in refresh_list.iterrows():
157
+ group_id, dataset_id = row["group_id"], row["dataset_id"]
158
+ try:
159
+ job_info = self.refresh_dataset_in_group(group_id, dataset_id, is_wait, timeout, check_interval, limit)
160
+ summary["Completed"].append({"group_id": group_id, "dataset_id": dataset_id, "job_info": job_info})
161
+ except PBIRefreshFailedException as e:
162
+ summary["Failed"].append({"group_id": group_id, "dataset_id": dataset_id, "job_info": e.args[0]})
163
+ except PBIRefreshTimeoutException as e:
164
+ summary["Timeout"].append({"group_id": group_id, "dataset_id": dataset_id, "job_info": e.args[0]})
165
+ except Exception as e:
166
+ summary["Error"].append({"group_id": group_id, "dataset_id": dataset_id, "reason": repr(e)})
167
+ return summary
168
+
169
+ def get_datasets_in_group(self, group_id: str) -> pd.DataFrame:
170
+ """
171
+ https://learn.microsoft.com/en-us/rest/api/power-bi/datasets/get-datasets-in-group \n
172
+ Returns a list of datasets from the specified workspace.
173
+ """
174
+ url = f"{self.config['POWER_BI_API_URL_PREFIX_CN']}/groups/{group_id}/datasets"
175
+ r = requests.get(url, headers=self.request_header)
176
+ r.raise_for_status()
177
+ df = pd.DataFrame(r.json()["value"]).rename(columns={"id": "dataset_id"})
178
+ df["group_id"] = group_id
179
+ return df
@@ -0,0 +1,79 @@
1
+ import os
2
+
3
+ import qcloud_cos
4
+
5
+ from recurvedata.pigeon.connector._registry import register_connector_class
6
+ from recurvedata.pigeon.utils.timing import ProgressCallback
7
+
8
+
9
+ @register_connector_class("cos")
10
+ class COSConnector(object):
11
+ def __init__(self, secret_id, secret_key, region, proxies=None, endpoint=None, **kwargs):
12
+ self.secret_id = secret_id
13
+ self.secret_key = secret_key
14
+ self.region = region
15
+
16
+ self.config = qcloud_cos.CosConfig(
17
+ Region=region, SecretId=secret_id, SecretKey=secret_key, Endpoint=endpoint, Proxies=proxies
18
+ )
19
+ self.cos = qcloud_cos.CosS3Client(self.config)
20
+
21
+ def has_bucket(self, bucket_name):
22
+ return self.cos.bucket_exists(bucket_name)
23
+
24
+ def create_bucket(self, bucket_name):
25
+ if not self.has_bucket(bucket_name):
26
+ self.cos.create_bucket(bucket_name)
27
+
28
+ def delete_bucket(self, bucket_name):
29
+ if self.has_bucket(bucket_name):
30
+ self.cos.delete_bucket(bucket_name)
31
+
32
+ def has_object(self, bucket_name, key):
33
+ return self.cos.object_exists(bucket_name, key)
34
+
35
+ def delete_object(self, bucket_name, key):
36
+ self.cos.delete_object(bucket_name, key)
37
+
38
+ def list_objects(self, bucket_name, prefix=""):
39
+ res = self.cos.list_objects(Bucket=bucket_name, Prefix=prefix)
40
+ return [x["Key"] for x in res.get("Contents", [])]
41
+
42
+ def delete_keys_by_prefix(self, bucket_name, prefix):
43
+ keys = self.list_objects(bucket_name, prefix)
44
+ for key in keys:
45
+ self.delete_object(bucket_name, key)
46
+
47
+ def upload(self, bucket_name, filename, key=None, folder=None, overwrite=True, num_threads=4, **kwargs):
48
+ if not key:
49
+ key = os.path.basename(filename)
50
+ if folder:
51
+ key = os.path.join(folder, key)
52
+
53
+ if not overwrite:
54
+ if self.has_object(bucket_name=bucket_name, key=key):
55
+ return key
56
+
57
+ self.cos.upload_file(
58
+ Bucket=bucket_name,
59
+ LocalFilePath=filename,
60
+ Key=key,
61
+ MAXThread=num_threads,
62
+ progress_callback=ProgressCallback(),
63
+ )
64
+ return key
65
+
66
+ def download(self, bucket_name, key, folder=None, filename=None, overwrite=True, num_threads=4, **kwargs):
67
+ if not self.has_object(bucket_name, key):
68
+ raise ValueError(f"{key} not exists in {bucket_name}")
69
+
70
+ if not filename:
71
+ filename = os.path.basename(key)
72
+ if folder:
73
+ filename = os.path.join(folder, filename)
74
+
75
+ if not overwrite and os.path.exists(filename):
76
+ return filename
77
+
78
+ self.cos.download_file(Bucket=bucket_name, Key=key, DestFilePath=filename, MAXThread=num_threads)
79
+ return filename