recurvedata-lib 0.1.487__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of recurvedata-lib might be problematic. Click here for more details.

Files changed (333) hide show
  1. recurvedata/__init__.py +0 -0
  2. recurvedata/__version__.py +1 -0
  3. recurvedata/client/__init__.py +3 -0
  4. recurvedata/client/client.py +150 -0
  5. recurvedata/client/server_client.py +91 -0
  6. recurvedata/config.py +99 -0
  7. recurvedata/connectors/__init__.py +20 -0
  8. recurvedata/connectors/_register.py +46 -0
  9. recurvedata/connectors/base.py +111 -0
  10. recurvedata/connectors/config_schema.py +1575 -0
  11. recurvedata/connectors/connectors/__init__.py +0 -0
  12. recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
  13. recurvedata/connectors/connectors/auth.py +44 -0
  14. recurvedata/connectors/connectors/azure_blob.py +89 -0
  15. recurvedata/connectors/connectors/azure_synapse.py +79 -0
  16. recurvedata/connectors/connectors/bigquery.py +359 -0
  17. recurvedata/connectors/connectors/clickhouse.py +219 -0
  18. recurvedata/connectors/connectors/dingtalk.py +61 -0
  19. recurvedata/connectors/connectors/doris.py +215 -0
  20. recurvedata/connectors/connectors/es.py +62 -0
  21. recurvedata/connectors/connectors/feishu.py +65 -0
  22. recurvedata/connectors/connectors/ftp.py +50 -0
  23. recurvedata/connectors/connectors/generic.py +49 -0
  24. recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
  25. recurvedata/connectors/connectors/google_service_account.py +225 -0
  26. recurvedata/connectors/connectors/hive.py +207 -0
  27. recurvedata/connectors/connectors/impala.py +210 -0
  28. recurvedata/connectors/connectors/jenkins.py +51 -0
  29. recurvedata/connectors/connectors/mail.py +89 -0
  30. recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
  31. recurvedata/connectors/connectors/mongo.py +79 -0
  32. recurvedata/connectors/connectors/mssql.py +131 -0
  33. recurvedata/connectors/connectors/mysql.py +191 -0
  34. recurvedata/connectors/connectors/n8n.py +141 -0
  35. recurvedata/connectors/connectors/oss.py +74 -0
  36. recurvedata/connectors/connectors/owncloud.py +36 -0
  37. recurvedata/connectors/connectors/phoenix.py +36 -0
  38. recurvedata/connectors/connectors/postgres.py +230 -0
  39. recurvedata/connectors/connectors/python.py +50 -0
  40. recurvedata/connectors/connectors/redshift.py +187 -0
  41. recurvedata/connectors/connectors/s3.py +93 -0
  42. recurvedata/connectors/connectors/sftp.py +87 -0
  43. recurvedata/connectors/connectors/slack.py +35 -0
  44. recurvedata/connectors/connectors/spark.py +99 -0
  45. recurvedata/connectors/connectors/starrocks.py +175 -0
  46. recurvedata/connectors/connectors/tencent_cos.py +40 -0
  47. recurvedata/connectors/connectors/tidb.py +49 -0
  48. recurvedata/connectors/const.py +315 -0
  49. recurvedata/connectors/datasource.py +189 -0
  50. recurvedata/connectors/dbapi.py +469 -0
  51. recurvedata/connectors/fs.py +66 -0
  52. recurvedata/connectors/ftp.py +40 -0
  53. recurvedata/connectors/object_store.py +60 -0
  54. recurvedata/connectors/pigeon.py +172 -0
  55. recurvedata/connectors/proxy.py +104 -0
  56. recurvedata/connectors/service.py +223 -0
  57. recurvedata/connectors/utils.py +47 -0
  58. recurvedata/consts.py +49 -0
  59. recurvedata/core/__init__.py +0 -0
  60. recurvedata/core/config.py +46 -0
  61. recurvedata/core/configurable.py +27 -0
  62. recurvedata/core/consts.py +2 -0
  63. recurvedata/core/templating.py +206 -0
  64. recurvedata/core/tracing.py +223 -0
  65. recurvedata/core/transformer.py +186 -0
  66. recurvedata/core/translation.py +91 -0
  67. recurvedata/dbt/client.py +97 -0
  68. recurvedata/dbt/consts.py +99 -0
  69. recurvedata/dbt/cosmos_utils.py +275 -0
  70. recurvedata/dbt/error_codes.py +18 -0
  71. recurvedata/dbt/schemas.py +98 -0
  72. recurvedata/dbt/service.py +451 -0
  73. recurvedata/dbt/utils.py +246 -0
  74. recurvedata/error_codes.py +71 -0
  75. recurvedata/exceptions.py +72 -0
  76. recurvedata/executors/__init__.py +4 -0
  77. recurvedata/executors/cli/__init__.py +7 -0
  78. recurvedata/executors/cli/connector.py +117 -0
  79. recurvedata/executors/cli/dbt.py +118 -0
  80. recurvedata/executors/cli/main.py +82 -0
  81. recurvedata/executors/cli/parameters.py +18 -0
  82. recurvedata/executors/client.py +190 -0
  83. recurvedata/executors/consts.py +50 -0
  84. recurvedata/executors/debug_executor.py +100 -0
  85. recurvedata/executors/executor.py +300 -0
  86. recurvedata/executors/link_executor.py +189 -0
  87. recurvedata/executors/models.py +34 -0
  88. recurvedata/executors/schemas.py +222 -0
  89. recurvedata/executors/service/__init__.py +0 -0
  90. recurvedata/executors/service/connector.py +380 -0
  91. recurvedata/executors/utils.py +172 -0
  92. recurvedata/filestorage/__init__.py +11 -0
  93. recurvedata/filestorage/_factory.py +33 -0
  94. recurvedata/filestorage/backends/__init__.py +0 -0
  95. recurvedata/filestorage/backends/fsspec.py +45 -0
  96. recurvedata/filestorage/backends/local.py +67 -0
  97. recurvedata/filestorage/backends/oss.py +56 -0
  98. recurvedata/filestorage/interface.py +84 -0
  99. recurvedata/operators/__init__.py +10 -0
  100. recurvedata/operators/base.py +28 -0
  101. recurvedata/operators/config.py +21 -0
  102. recurvedata/operators/context.py +255 -0
  103. recurvedata/operators/dbt_operator/__init__.py +2 -0
  104. recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
  105. recurvedata/operators/dbt_operator/operator.py +353 -0
  106. recurvedata/operators/link_operator/__init__.py +1 -0
  107. recurvedata/operators/link_operator/operator.py +120 -0
  108. recurvedata/operators/models.py +55 -0
  109. recurvedata/operators/notify_operator/__init__.py +1 -0
  110. recurvedata/operators/notify_operator/operator.py +180 -0
  111. recurvedata/operators/operator.py +119 -0
  112. recurvedata/operators/python_operator/__init__.py +1 -0
  113. recurvedata/operators/python_operator/operator.py +132 -0
  114. recurvedata/operators/sensor_operator/__init__.py +1 -0
  115. recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
  116. recurvedata/operators/sensor_operator/operator.py +172 -0
  117. recurvedata/operators/spark_operator/__init__.py +1 -0
  118. recurvedata/operators/spark_operator/operator.py +200 -0
  119. recurvedata/operators/spark_operator/spark_sample.py +47 -0
  120. recurvedata/operators/sql_operator/__init__.py +1 -0
  121. recurvedata/operators/sql_operator/operator.py +90 -0
  122. recurvedata/operators/task.py +211 -0
  123. recurvedata/operators/transfer_operator/__init__.py +40 -0
  124. recurvedata/operators/transfer_operator/const.py +10 -0
  125. recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
  126. recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
  127. recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
  128. recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
  129. recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
  130. recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
  131. recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
  132. recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
  133. recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
  134. recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
  135. recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
  136. recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
  137. recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
  138. recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
  139. recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
  140. recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
  141. recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
  142. recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
  143. recurvedata/operators/transfer_operator/load_task_email.py +188 -0
  144. recurvedata/operators/transfer_operator/load_task_es.py +86 -0
  145. recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
  146. recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
  147. recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
  148. recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
  149. recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
  150. recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
  151. recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
  152. recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
  153. recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
  154. recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
  155. recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
  156. recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
  157. recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
  158. recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
  159. recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
  160. recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
  161. recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
  162. recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
  163. recurvedata/operators/transfer_operator/mixin.py +31 -0
  164. recurvedata/operators/transfer_operator/operator.py +231 -0
  165. recurvedata/operators/transfer_operator/task.py +223 -0
  166. recurvedata/operators/transfer_operator/utils.py +134 -0
  167. recurvedata/operators/ui.py +80 -0
  168. recurvedata/operators/utils/__init__.py +51 -0
  169. recurvedata/operators/utils/file_factory.py +150 -0
  170. recurvedata/operators/utils/fs.py +10 -0
  171. recurvedata/operators/utils/lineage.py +265 -0
  172. recurvedata/operators/web_init.py +15 -0
  173. recurvedata/pigeon/connector/__init__.py +294 -0
  174. recurvedata/pigeon/connector/_registry.py +17 -0
  175. recurvedata/pigeon/connector/aliyun_oss.py +80 -0
  176. recurvedata/pigeon/connector/awss3.py +123 -0
  177. recurvedata/pigeon/connector/azure_blob.py +176 -0
  178. recurvedata/pigeon/connector/azure_synapse.py +51 -0
  179. recurvedata/pigeon/connector/cass.py +151 -0
  180. recurvedata/pigeon/connector/clickhouse.py +403 -0
  181. recurvedata/pigeon/connector/clickhouse_native.py +351 -0
  182. recurvedata/pigeon/connector/dbapi.py +571 -0
  183. recurvedata/pigeon/connector/doris.py +166 -0
  184. recurvedata/pigeon/connector/es.py +176 -0
  185. recurvedata/pigeon/connector/feishu.py +1135 -0
  186. recurvedata/pigeon/connector/ftp.py +163 -0
  187. recurvedata/pigeon/connector/google_bigquery.py +283 -0
  188. recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
  189. recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
  190. recurvedata/pigeon/connector/hdfs.py +204 -0
  191. recurvedata/pigeon/connector/hive_impala.py +383 -0
  192. recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
  193. recurvedata/pigeon/connector/mongodb.py +56 -0
  194. recurvedata/pigeon/connector/mssql.py +467 -0
  195. recurvedata/pigeon/connector/mysql.py +175 -0
  196. recurvedata/pigeon/connector/owncloud.py +92 -0
  197. recurvedata/pigeon/connector/postgresql.py +267 -0
  198. recurvedata/pigeon/connector/power_bi.py +179 -0
  199. recurvedata/pigeon/connector/qcloud_cos.py +79 -0
  200. recurvedata/pigeon/connector/redshift.py +123 -0
  201. recurvedata/pigeon/connector/sftp.py +73 -0
  202. recurvedata/pigeon/connector/sqlite.py +42 -0
  203. recurvedata/pigeon/connector/starrocks.py +144 -0
  204. recurvedata/pigeon/connector/tableau.py +162 -0
  205. recurvedata/pigeon/const.py +21 -0
  206. recurvedata/pigeon/csv.py +172 -0
  207. recurvedata/pigeon/docs/datasources-example.json +82 -0
  208. recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
  209. recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
  210. recurvedata/pigeon/dumper/__init__.py +171 -0
  211. recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
  212. recurvedata/pigeon/dumper/base.py +141 -0
  213. recurvedata/pigeon/dumper/cass.py +213 -0
  214. recurvedata/pigeon/dumper/dbapi.py +346 -0
  215. recurvedata/pigeon/dumper/es.py +112 -0
  216. recurvedata/pigeon/dumper/ftp.py +64 -0
  217. recurvedata/pigeon/dumper/mongodb.py +103 -0
  218. recurvedata/pigeon/handler/__init__.py +4 -0
  219. recurvedata/pigeon/handler/base.py +153 -0
  220. recurvedata/pigeon/handler/csv_handler.py +290 -0
  221. recurvedata/pigeon/loader/__init__.py +87 -0
  222. recurvedata/pigeon/loader/base.py +83 -0
  223. recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
  224. recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
  225. recurvedata/pigeon/loader/csv_to_doris.py +215 -0
  226. recurvedata/pigeon/loader/csv_to_es.py +51 -0
  227. recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
  228. recurvedata/pigeon/loader/csv_to_hive.py +468 -0
  229. recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
  230. recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
  231. recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
  232. recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
  233. recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
  234. recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
  235. recurvedata/pigeon/meta.py +116 -0
  236. recurvedata/pigeon/row_factory.py +42 -0
  237. recurvedata/pigeon/schema/__init__.py +124 -0
  238. recurvedata/pigeon/schema/types.py +13 -0
  239. recurvedata/pigeon/sync.py +283 -0
  240. recurvedata/pigeon/transformer.py +146 -0
  241. recurvedata/pigeon/utils/__init__.py +134 -0
  242. recurvedata/pigeon/utils/bloomfilter.py +181 -0
  243. recurvedata/pigeon/utils/date_time.py +323 -0
  244. recurvedata/pigeon/utils/escape.py +15 -0
  245. recurvedata/pigeon/utils/fs.py +266 -0
  246. recurvedata/pigeon/utils/json.py +44 -0
  247. recurvedata/pigeon/utils/keyed_tuple.py +85 -0
  248. recurvedata/pigeon/utils/mp.py +156 -0
  249. recurvedata/pigeon/utils/sql.py +328 -0
  250. recurvedata/pigeon/utils/timing.py +155 -0
  251. recurvedata/provider_manager.py +0 -0
  252. recurvedata/providers/__init__.py +0 -0
  253. recurvedata/providers/dbapi/__init__.py +0 -0
  254. recurvedata/providers/flywheel/__init__.py +0 -0
  255. recurvedata/providers/mysql/__init__.py +0 -0
  256. recurvedata/schedulers/__init__.py +1 -0
  257. recurvedata/schedulers/airflow.py +974 -0
  258. recurvedata/schedulers/airflow_db_process.py +331 -0
  259. recurvedata/schedulers/airflow_operators.py +61 -0
  260. recurvedata/schedulers/airflow_plugin.py +9 -0
  261. recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
  262. recurvedata/schedulers/base.py +99 -0
  263. recurvedata/schedulers/cli.py +228 -0
  264. recurvedata/schedulers/client.py +56 -0
  265. recurvedata/schedulers/consts.py +52 -0
  266. recurvedata/schedulers/debug_celery.py +62 -0
  267. recurvedata/schedulers/model.py +63 -0
  268. recurvedata/schedulers/schemas.py +97 -0
  269. recurvedata/schedulers/service.py +20 -0
  270. recurvedata/schedulers/system_dags.py +59 -0
  271. recurvedata/schedulers/task_status.py +279 -0
  272. recurvedata/schedulers/utils.py +73 -0
  273. recurvedata/schema/__init__.py +0 -0
  274. recurvedata/schema/field.py +88 -0
  275. recurvedata/schema/schema.py +55 -0
  276. recurvedata/schema/types.py +17 -0
  277. recurvedata/schema.py +0 -0
  278. recurvedata/server/__init__.py +0 -0
  279. recurvedata/server/app.py +7 -0
  280. recurvedata/server/connector/__init__.py +0 -0
  281. recurvedata/server/connector/api.py +79 -0
  282. recurvedata/server/connector/schemas.py +28 -0
  283. recurvedata/server/data_service/__init__.py +0 -0
  284. recurvedata/server/data_service/api.py +126 -0
  285. recurvedata/server/data_service/client.py +18 -0
  286. recurvedata/server/data_service/consts.py +1 -0
  287. recurvedata/server/data_service/schemas.py +68 -0
  288. recurvedata/server/data_service/service.py +218 -0
  289. recurvedata/server/dbt/__init__.py +0 -0
  290. recurvedata/server/dbt/api.py +116 -0
  291. recurvedata/server/error_code.py +49 -0
  292. recurvedata/server/exceptions.py +19 -0
  293. recurvedata/server/executor/__init__.py +0 -0
  294. recurvedata/server/executor/api.py +37 -0
  295. recurvedata/server/executor/schemas.py +30 -0
  296. recurvedata/server/executor/service.py +220 -0
  297. recurvedata/server/main.py +32 -0
  298. recurvedata/server/schedulers/__init__.py +0 -0
  299. recurvedata/server/schedulers/api.py +252 -0
  300. recurvedata/server/schedulers/schemas.py +50 -0
  301. recurvedata/server/schemas.py +50 -0
  302. recurvedata/utils/__init__.py +15 -0
  303. recurvedata/utils/_typer.py +61 -0
  304. recurvedata/utils/attrdict.py +19 -0
  305. recurvedata/utils/command_helper.py +20 -0
  306. recurvedata/utils/compat.py +12 -0
  307. recurvedata/utils/compression.py +203 -0
  308. recurvedata/utils/crontab.py +42 -0
  309. recurvedata/utils/crypto_util.py +305 -0
  310. recurvedata/utils/dataclass.py +11 -0
  311. recurvedata/utils/date_time.py +464 -0
  312. recurvedata/utils/dispatch.py +114 -0
  313. recurvedata/utils/email_util.py +104 -0
  314. recurvedata/utils/files.py +386 -0
  315. recurvedata/utils/helpers.py +170 -0
  316. recurvedata/utils/httputil.py +117 -0
  317. recurvedata/utils/imports.py +132 -0
  318. recurvedata/utils/json.py +80 -0
  319. recurvedata/utils/log.py +117 -0
  320. recurvedata/utils/log_capture.py +153 -0
  321. recurvedata/utils/mp.py +178 -0
  322. recurvedata/utils/normalizer.py +102 -0
  323. recurvedata/utils/redis_lock.py +474 -0
  324. recurvedata/utils/registry.py +54 -0
  325. recurvedata/utils/shell.py +15 -0
  326. recurvedata/utils/singleton.py +33 -0
  327. recurvedata/utils/sql.py +6 -0
  328. recurvedata/utils/timeout.py +28 -0
  329. recurvedata/utils/tracing.py +14 -0
  330. recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
  331. recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
  332. recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
  333. recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
@@ -0,0 +1,213 @@
1
+ import multiprocessing
2
+
3
+ from cassandra.query import FETCH_SIZE_UNSET, SimpleStatement, tuple_factory
4
+
5
+ from recurvedata.pigeon.connector.cass import CassandraConnector
6
+ from recurvedata.pigeon.dumper.base import BaseDumper, SQLBasedWorker
7
+ from recurvedata.pigeon.utils import ensure_list, ensure_str_list, mp
8
+ from recurvedata.pigeon.utils import sql as sqlutils
9
+
10
+
11
+ class CassandraDumperWorker(SQLBasedWorker):
12
+ def dump_query(self, query, parameters=None):
13
+ # if isinstance(query, str):
14
+ # query = sqlutils.sqlformat(query)
15
+ self.logger.info("running query `%s` with parameters: %s", query, parameters)
16
+
17
+ with self.connector.closing_session() as session:
18
+ session.row_factory = tuple_factory
19
+
20
+ # Cassandra 2.0+ offers support for automatic query paging.
21
+ result_set = session.execute(query, parameters, timeout=30)
22
+ schema = self.connector.get_data_schema(result_set)
23
+ self.set_input_schema(schema)
24
+ col_names = result_set.column_names
25
+ # col_types = result_set.column_types
26
+ for row in result_set:
27
+ yield self.row_factory(col_names, row)
28
+
29
+
30
+ class CassandraDumper(BaseDumper):
31
+ def __init__(
32
+ self,
33
+ connector,
34
+ table,
35
+ columns=None,
36
+ where=None,
37
+ partition_column=None,
38
+ partitions=None,
39
+ splits=1,
40
+ concurrency=1,
41
+ page_size=FETCH_SIZE_UNSET,
42
+ consistency_level=None,
43
+ retries=3,
44
+ handler_factories=None,
45
+ ):
46
+ """The base class that dumps rows from Cassandra.
47
+
48
+ :param connector: the connector object
49
+ :type connector: pigeon.database.CassandraConnector
50
+ :param table: the table name
51
+ :type table: str
52
+ :param columns: columns to query
53
+ :type columns: list | str
54
+ :param where: where clause
55
+ :type where: str
56
+ :param partition_column: the partition column name
57
+ :type partition_column: str
58
+ :param partitions: specific partitions
59
+ :type partitions: list
60
+ :param concurrency: number of workers to dump data from partitions.
61
+ This is used only if `partitions` is not None, and
62
+ would be adjust to min(len(partitions), concurrency).
63
+ :type concurrency: int
64
+ :param splits: Deprecated, use `concurrency` instead
65
+ :param retries: max retry number
66
+ :param handler_factories: handler factories to create handlers
67
+ :type handler_factories: list
68
+ """
69
+ super().__init__(handler_factories=handler_factories)
70
+
71
+ self.concurrency = concurrency or splits or 1
72
+
73
+ assert isinstance(connector, CassandraConnector)
74
+ self.connector = connector
75
+
76
+ self.table = table
77
+ self.columns = ensure_str_list(columns)
78
+ self.where = where
79
+
80
+ if partitions is not None and partition_column is None:
81
+ raise ValueError("partition_column must not be None")
82
+
83
+ self.partitions = ensure_str_list(partitions)
84
+ self.partition_column = partition_column
85
+ if self.partitions:
86
+ self.concurrency = min(len(self.partitions), self.concurrency)
87
+ self.retries = retries
88
+
89
+ self.page_size = page_size
90
+ self.consistency_level = consistency_level
91
+
92
+ self._base_query = self.construct_query()
93
+ self.worker_cls = CassandraDumperWorker
94
+
95
+ self.meta.context = {
96
+ "table": self.table,
97
+ "columns": self.columns,
98
+ "base_query": self.base_query,
99
+ "where": self.where,
100
+ "partition_column": self.partition_column,
101
+ "partitions": self.partitions,
102
+ "concurrency": self.concurrency,
103
+ }
104
+
105
+ @property
106
+ def base_query(self):
107
+ return self._base_query
108
+
109
+ def construct_query(self):
110
+ project = "*"
111
+ if self.columns:
112
+ project = ", ".join(self.columns)
113
+ query = "SELECT {} FROM {}".format(project, self.table)
114
+
115
+ if self.partitions:
116
+ query += " WHERE {} = %s".format(self.partition_column)
117
+
118
+ query = sqlutils.apply_where_safely(query, self.where)
119
+ return query.strip(";")
120
+
121
+ def execute(self):
122
+ self.meta.mark_start()
123
+ if self.concurrency <= 1:
124
+ rv = self.execute_in_serial()
125
+ else:
126
+ rv = self.execute_in_parallel()
127
+ self.meta.mark_finish()
128
+ self.collect_meta(rv)
129
+ self.logger.info("dump meta: %s", self.meta.to_json(indent=2))
130
+ self.handle_schema()
131
+ return self.meta
132
+
133
+ def _create_worker(self, **kwargs):
134
+ query = SimpleStatement(self.base_query, fetch_size=self.page_size, consistency_level=self.consistency_level)
135
+ options = {
136
+ "row_factory": self.row_factory,
137
+ "query": query,
138
+ "connector": self.connector,
139
+ "retries": self.retries,
140
+ }
141
+ options.update(kwargs)
142
+ return self.worker_cls(**options)
143
+
144
+ def execute_in_serial(self):
145
+ workers_meta = []
146
+ if self.partitions:
147
+ for idx, partition in enumerate(self.partitions):
148
+ handlers = self.create_handlers()
149
+ worker = self._create_worker(worker_id=1, task_id=idx, parameters=(partition,), handlers=handlers)
150
+ workers_meta.append(worker.execute())
151
+ else:
152
+ handlers = self.create_handlers()
153
+ worker = self._create_worker(worker_id=1, task_id=1, parameters=None, handlers=handlers)
154
+ workers_meta.append(worker.execute())
155
+ self.join_handlers()
156
+ return workers_meta
157
+
158
+ def execute_in_parallel(self):
159
+ if not self.partitions:
160
+ self.logger.info("there are no partitions, fallback to single process")
161
+ return self.execute_in_serial()
162
+
163
+ workers = []
164
+ result_queue = multiprocessing.Queue()
165
+ task_queue = multiprocessing.Queue()
166
+ for idx in range(self.concurrency):
167
+ p = multiprocessing.Process(target=self.run_worker, args=(idx, task_queue, result_queue))
168
+ p.start()
169
+ workers.append(p)
170
+
171
+ for idx, p in enumerate(self.partitions):
172
+ self.logger.info("sending partition %d %s to task queue", idx, p)
173
+ handlers = self.create_handlers()
174
+ task_queue.put((idx, p, handlers))
175
+
176
+ self.logger.info("sending finish signal to workers")
177
+ for _ in workers:
178
+ task_queue.put(None)
179
+
180
+ self.logger.info("waiting for workers to finish")
181
+ workers_meta = mp.safe_join_subprocesses(workers, result_queue)
182
+ self.join_handlers()
183
+
184
+ # some works failed
185
+ num_total_tasks = len(self.partitions)
186
+ num_success_tasks = len(workers_meta)
187
+ if num_success_tasks < num_total_tasks:
188
+ raise RuntimeError(f"only {num_success_tasks} of {num_total_tasks} tasks succeeded")
189
+
190
+ return workers_meta
191
+
192
+ def run_worker(self, worker_id, task_queue, result_queue):
193
+ while True:
194
+ task = task_queue.get()
195
+ if task is None:
196
+ self.logger.info("got None partition, exist.")
197
+ break
198
+ task_id, partition, handlers = task
199
+ worker = self._create_worker(
200
+ worker_id=worker_id, task_id=task_id, parameters=(partition,), handlers=handlers
201
+ )
202
+ n = worker.execute()
203
+ result_queue.put(n)
204
+
205
+ def collect_meta(self, workers_meta):
206
+ workers_meta = ensure_list(workers_meta)
207
+ for meta in workers_meta:
208
+ self.meta.num_dumped_rows += meta.num_dumped_rows
209
+ for i, hf in enumerate(self.handler_factories):
210
+ hf.meta.update(meta.handlers_meta[i])
211
+
212
+ self.meta.schema = workers_meta[0].schema
213
+ self.meta.handlers_meta = [x.meta for x in self.handler_factories]
@@ -0,0 +1,346 @@
1
+ import datetime
2
+ import multiprocessing
3
+ import os
4
+ from typing import TYPE_CHECKING, Dict, List, Tuple, TypeVar, Union
5
+
6
+ from recurvedata.pigeon.connector.dbapi import DBAPIConnector
7
+ from recurvedata.pigeon.const import CLICKHOUSE_MAX_ROW_BUFFER
8
+ from recurvedata.pigeon.dumper.base import BaseDumper, SQLBasedWorker
9
+ from recurvedata.pigeon.utils import ensure_list, mp
10
+ from recurvedata.pigeon.utils import sql as sqlutils
11
+
12
+ if TYPE_CHECKING:
13
+ from recurvedata.pigeon.handler import HandlerFactory
14
+ from recurvedata.pigeon.meta import DumperMeta, DumperWorkerMeta
15
+
16
+ DONE = 'TASK_DONE'
17
+
18
+ T = TypeVar('T')
19
+
20
+
21
+ class DBAPIDumperWorker(SQLBasedWorker):
22
+ def dump_query(self, sql: str, parameters: Union[List, Tuple, Dict] = None):
23
+ # sql = sqlutils.sqlformat(sql)
24
+ self.logger.info('running query:\n%s\nwith parameters: %s', sql, parameters)
25
+
26
+ cursor_options = {'commit_on_close': False}
27
+ if self.connector.is_postgres() or self.connector.is_redshift():
28
+ ts = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
29
+ cursor_options['cursor_name'] = f'pigeon_{self.worker_id}_{ts}'
30
+ elif self.connector.is_clickhouse_native():
31
+ cursor_options.update({
32
+ 'stream': True,
33
+ 'max_rows': CLICKHOUSE_MAX_ROW_BUFFER
34
+ })
35
+
36
+ with self.connector.cursor(**cursor_options) as cursor:
37
+ if parameters:
38
+ cursor.execute(sql, parameters)
39
+ else:
40
+ cursor.execute(sql)
41
+
42
+ # Postgres 使用 server side cursor, 要先 fetch 数据才能获取到 cursor.description
43
+ if self.connector.is_postgres() or self.connector.is_redshift():
44
+ row = cursor.fetchone()
45
+ schema = self.connector.cursor_to_schema(cursor)
46
+ self.set_input_schema(schema)
47
+
48
+ if row is None:
49
+ # 没有结果,直接返回,否则下面再 fetch 会抛出异常
50
+ # opening multiple cursors from within the same client connection is not allowed.
51
+ return
52
+ else:
53
+ row = None
54
+
55
+ schema = self.connector.cursor_to_schema(cursor)
56
+ self.set_input_schema(schema)
57
+ col_names = schema.field_names
58
+
59
+ if row:
60
+ yield self.row_factory(col_names, row)
61
+
62
+ for row in cursor:
63
+ yield self.row_factory(col_names, row)
64
+
65
+
66
+ class DBAPIDumper(BaseDumper):
67
+ def __init__(
68
+ self,
69
+ connector: DBAPIConnector,
70
+ table: str = None,
71
+ sql: str = None,
72
+ splitby: str = None,
73
+ splits: int = 1,
74
+ concurrency: int = None,
75
+ retries: int = 3,
76
+ handler_factories: List['HandlerFactory'] = None
77
+ ):
78
+ """The base class that performs a dumping operation against a DBMS over DBAPI.
79
+
80
+ :param connector: the connector object
81
+ :param table: the table name, this is equivalent to pass sql as 'SELECT * FROM table'
82
+ :param sql: the sql query to perform
83
+ :param splitby: the column used to split tasks
84
+ :param splits: 切分成 splits 个任务,每个任务失败后会单独重试
85
+ :param concurrency: 并发数,同时 concurrency 个进程执行任务
86
+ :param retries: 每个任务的重试次数
87
+ :param handler_factories: handler factories to create handlers
88
+ """
89
+ super().__init__(handler_factories=handler_factories)
90
+
91
+ self.splits = splits
92
+ self.concurrency = splits if concurrency is None else concurrency
93
+ self.retries = retries
94
+
95
+ assert isinstance(connector, DBAPIConnector)
96
+ self.connector = connector
97
+
98
+ self.table = table
99
+ self.sql = sql
100
+ self._base_query = self.construct_query()
101
+ self.splitby = splitby
102
+
103
+ if not self.splitby:
104
+ self.logger.warning('split column is not set, reset concurrency and splits')
105
+ self.concurrency = 1
106
+ self.splits = 1
107
+
108
+ self.worker_cls = DBAPIDumperWorker
109
+
110
+ self.meta.context = {
111
+ 'table': self.table,
112
+ 'sql': self.sql,
113
+ 'base_query': self.base_query,
114
+ 'splitby': self.splitby,
115
+ 'splits': self.splits,
116
+ 'concurrency': self.concurrency,
117
+ }
118
+
119
+ @property
120
+ def base_query(self) -> str:
121
+ return self._base_query
122
+
123
+ def construct_query(self) -> str:
124
+ if self.sql:
125
+ query = self.sql
126
+ elif self.table:
127
+ query = f'SELECT * FROM {self.connector.quote_identifier(self.table)}'
128
+ else:
129
+ raise ValueError('either table or sql is required')
130
+
131
+ # if self.connector.is_mysql():
132
+ # query = sqlutils.apply_sql_no_cache(query)
133
+ return query.strip(';')
134
+
135
+ def _create_worker(self, **kwargs) -> DBAPIDumperWorker:
136
+ return self.worker_cls(
137
+ **kwargs,
138
+ row_factory=self.row_factory,
139
+ connector=self.connector,
140
+ retries=self.retries
141
+ )
142
+
143
+ def execute(self) -> 'DumperMeta':
144
+ self.meta.mark_start()
145
+ if self.splits <= 1:
146
+ rv = self.execute_in_serial()
147
+ else:
148
+ rv = self.execute_in_parallel()
149
+ self.meta.mark_finish()
150
+ self.collect_meta(rv)
151
+ self.logger.info('dumper meta: %s', self.meta.to_json(indent=2))
152
+ self.handle_schema()
153
+ return self.meta
154
+
155
+ def execute_in_serial(self) -> List['DumperWorkerMeta']:
156
+ handlers = self.create_handlers()
157
+ worker = self._create_worker(worker_id=0, task_id=0, query=self.base_query,
158
+ parameters=None, handlers=handlers)
159
+
160
+ worker_meta = worker.execute()
161
+ self.join_handlers()
162
+ return [worker_meta]
163
+
164
+ def execute_in_parallel(self) -> List['DumperWorkerMeta']:
165
+ lower, upper = self._determine_boundary()
166
+ self.logger.info('got boundary: (%s, %s)', lower, upper)
167
+ if lower is None and upper is None:
168
+ self.logger.info('bad boundary values, fallback to single process')
169
+ return self.execute_in_serial()
170
+ if lower == upper:
171
+ self.logger.info('lower and upper boundary are the same, fallback to single process')
172
+ return self.execute_in_serial()
173
+
174
+ ranges = self._split_ranges(lower, upper, self.splits)
175
+
176
+ split_col = self.connector.quote_identifier(self.splitby)
177
+ tasks = []
178
+ for idx, (start, end) in enumerate(ranges):
179
+ include_upper = (idx == len(ranges) - 1) # the last split should include the upper bound
180
+ if self.connector.is_impala() or self.connector.is_clickhouse_native():
181
+ # 截至 2018-05-30, impyla 使用 list、tuple 格式化参数的时候会有 bug
182
+ # 详情见 https://github.com/cloudera/impyla/pull/156#issuecomment-159790585
183
+ # 本来应该给 impyla 提交 issue 或 PR,但这个项目感觉像死了。。。先在上层规避
184
+ markers = ['%(start)s', '%(end)s']
185
+ params = {'start': start, 'end': end}
186
+ elif self.connector.is_azure_synapse() or self.connector.is_mssql() or self.connector.is_phoenix():
187
+ markers = ['?', '?']
188
+ params = (start, end)
189
+ else:
190
+ markers = ['%s', '%s']
191
+ params = (start, end)
192
+
193
+ # phoenix 日期作为参数传入会报错,直接把 SQL 格式化好
194
+ less_than = f'<{"=" if include_upper else ""}'
195
+ if self.connector.is_phoenix() and isinstance(start, datetime.date):
196
+ where = f"{split_col} >= TIMESTAMP '{params[0]}' AND {split_col} {less_than} TIMESTAMP '{params[1]}'"
197
+ params = None
198
+ else:
199
+ where = f'{split_col} >= %s AND {split_col} {less_than} %s' % tuple(markers)
200
+ query = sqlutils.apply_where_safely(self.base_query, where)
201
+ handlers = self.create_handlers()
202
+ tasks.append((idx, query, params, handlers))
203
+
204
+ task_queue = multiprocessing.Queue()
205
+ for task in tasks:
206
+ task_queue.put(task)
207
+ for i in range(self.concurrency):
208
+ task_queue.put(DONE)
209
+
210
+ workers = []
211
+ result_queue = multiprocessing.Queue()
212
+ for i in range(self.concurrency):
213
+ p = multiprocessing.Process(target=self.run_worker, args=(i, task_queue, result_queue))
214
+ workers.append(p)
215
+ p.start()
216
+
217
+ self.logger.info('waiting for workers to finish')
218
+ workers_meta, is_early_stop = mp.safe_join_subprocesses_early_stop(workers, result_queue)
219
+ if is_early_stop:
220
+ self.logger.info(f'early stop because some task failed, terminate all workers')
221
+ mp.terminate_processes(workers)
222
+ raise RuntimeError(f'early stop because some task failed')
223
+ # 从一个 worker 中提取 input_schema,并赋值到每个 handler_factory 下的 input_schema
224
+ for wm in workers_meta:
225
+ if wm.schema is not None:
226
+ self.set_input_schema(wm.schema)
227
+
228
+ self.join_handlers()
229
+
230
+ # some works failed
231
+ num_total_tasks = len(tasks)
232
+ num_success_tasks = len(workers_meta)
233
+ if num_success_tasks < num_total_tasks:
234
+ raise RuntimeError(f'only {num_success_tasks} of {num_total_tasks} tasks succeeded')
235
+
236
+ return workers_meta
237
+
238
+ def run_worker(self, worker_id: int, task_queue: multiprocessing.Queue, result_queue: multiprocessing.Queue):
239
+ pid = os.getpid()
240
+ self.logger.info(f'Worker#{worker_id} pid={pid} started')
241
+ while True:
242
+ task = task_queue.get()
243
+ if task == DONE:
244
+ break
245
+ task_id, query, parameters, handlers = task
246
+ worker = self._create_worker(
247
+ worker_id=worker_id,
248
+ task_id=task_id,
249
+ query=query,
250
+ parameters=parameters,
251
+ handlers=handlers)
252
+ result = worker.execute()
253
+ result_queue.put(result)
254
+ self.logger.info(f'Worker#{worker_id} pid={pid} exited')
255
+
256
+ def collect_meta(self, workers_meta: Union['DumperWorkerMeta', List['DumperWorkerMeta']]):
257
+ workers_meta = ensure_list(workers_meta)
258
+ for meta in workers_meta:
259
+ self.meta.num_dumped_rows += meta.num_dumped_rows
260
+ for hf, hm in zip(self.handler_factories, meta.handlers_meta):
261
+ hf.meta.update(hm)
262
+
263
+ self.meta.schema = [x.schema for x in workers_meta if x.schema is not None][0]
264
+ self.meta.handlers_meta = [x.meta for x in self.handler_factories]
265
+
266
+ def _determine_boundary(self) -> Tuple[T, T]:
267
+ lower = self._select_min_max(self.splitby, max_=False)
268
+ upper = self._select_min_max(self.splitby, max_=True)
269
+ return lower, upper
270
+
271
+ def _select_min_max(self, col: str, max_: bool=False) -> T:
272
+ from_clause = sqlutils.extract_from_clause(self.base_query)
273
+ where_clause = sqlutils.extract_where_clause(self.base_query)
274
+ ctx = {
275
+ 'col': self.connector.quote_identifier(col),
276
+ 'f': f'{from_clause}\n',
277
+ 'w': where_clause and f'{where_clause}\n' or '',
278
+ 'direction': 'DESC' if max_ else 'ASC'
279
+ }
280
+ if self.connector.is_azure_synapse() or self.connector.is_mssql():
281
+ sql = 'SELECT TOP 1 {col} FROM {f} {w} ORDER BY {col} {direction}'.format(**ctx)
282
+ else:
283
+ sql = 'SELECT {col} FROM {f} {w} ORDER BY {col} {direction} LIMIT 1'.format(**ctx)
284
+ row = self.connector.fetchall(sql)
285
+ if row:
286
+ return row[0][0]
287
+ return None
288
+
289
+ @staticmethod
290
+ def _split_ranges(start: T, end: T, splits: int) -> List[Tuple[T, T]]:
291
+ assert end > start, 'end "{}" must be greater than start "{}"'.format(end, start)
292
+
293
+ convert_str = False
294
+ if isinstance(start, str):
295
+ convert_str = True
296
+ # treat as date/datetime,only support `%Y-%m-%d` and `%Y-%m-%d %H:%M%S'
297
+ if len(start) == len('2018-04-18'):
298
+ # date
299
+ start = datetime.datetime.strptime(start, '%Y-%m-%d').date()
300
+ end = datetime.datetime.strptime(end, '%Y-%m-%d').date()
301
+ elif len(start) == len('2023-01-01 00:00:00.000000'):
302
+ # datetime
303
+ start = datetime.datetime.strptime(start, '%Y-%m-%d %H:%M:%S.%f')
304
+ end = datetime.datetime.strptime(end, '%Y-%m-%d %H:%M:%S.%f')
305
+ else:
306
+ # datetime
307
+ start = datetime.datetime.strptime(start, '%Y-%m-%d %H:%M:%S')
308
+ end = datetime.datetime.strptime(end, '%Y-%m-%d %H:%M:%S')
309
+
310
+ if isinstance(start, datetime.datetime):
311
+ size, remain = divmod((end - start).total_seconds(), splits)
312
+
313
+ def delta(x):
314
+ return datetime.timedelta(seconds=x)
315
+ elif isinstance(start, datetime.date):
316
+ # adjust number of splits according to the number of days
317
+ days = (end - start).days
318
+ splits = min(days, splits)
319
+ size, remain = divmod(days, splits)
320
+
321
+ def delta(x):
322
+ return datetime.timedelta(days=x)
323
+ else:
324
+ size, remain = divmod(end - start, splits)
325
+
326
+ def delta(x):
327
+ return x
328
+
329
+ ranges = []
330
+ if size == 0:
331
+ return [(start, end)]
332
+
333
+ range_start = start
334
+ for i in range(splits):
335
+ range_end = range_start + delta(size)
336
+ if remain > 0:
337
+ range_end += delta(1)
338
+ remain -= 1
339
+ if i == splits - 1:
340
+ range_end = end
341
+ if convert_str:
342
+ ranges.append((str(range_start), str(range_end)))
343
+ else:
344
+ ranges.append((range_start, range_end))
345
+ range_start = range_end
346
+ return ranges
@@ -0,0 +1,112 @@
1
+ from recurvedata.pigeon.connector.es import ElasticSearchConnector
2
+ from recurvedata.pigeon.dumper.base import BaseDumper
3
+ from recurvedata.pigeon.row_factory import ordered_dict_factory
4
+ from recurvedata.pigeon.utils import ensure_str_list, extract_dict
5
+
6
+
7
+ class ElasticSearchDumper(BaseDumper):
8
+ _row_factory = staticmethod(ordered_dict_factory)
9
+
10
+ def __init__(
11
+ self,
12
+ connector,
13
+ index,
14
+ query=None,
15
+ doc_type=None,
16
+ fields=None,
17
+ meta_fields=None,
18
+ search_kwargs=None,
19
+ handler_factories=None,
20
+ ):
21
+ super().__init__(handler_factories=handler_factories)
22
+
23
+ assert isinstance(connector, ElasticSearchConnector)
24
+ self.es = connector
25
+
26
+ self.index = index
27
+ self.doc_type = doc_type
28
+ self.query = query
29
+ self.fields = ensure_str_list(fields) or None
30
+ self.meta_fields = ensure_str_list(meta_fields) or None
31
+ self.search_kwargs = search_kwargs or {}
32
+
33
+ self.meta.context = {
34
+ "index": self.index,
35
+ "doc_type": self.doc_type,
36
+ "query": self.query,
37
+ "fields": self.fields,
38
+ "meta_fields": self.meta_fields,
39
+ "search_kwargs": self.search_kwargs,
40
+ }
41
+ self.meta.schema = self.get_result_schema()
42
+ self.result_fields = self.meta.schema.field_names
43
+
44
+ @property
45
+ def row_factory(self):
46
+ return ordered_dict_factory
47
+
48
+ @row_factory.setter
49
+ def row_factory(self, factory):
50
+ raise ValueError(f"{self.__class__.__name__}.row_factory is dict_factory, and is readonly")
51
+
52
+ def execute(self):
53
+ self.meta.mark_start()
54
+ self.execute_impl()
55
+ self.meta.mark_finish()
56
+ self.logger.info("dumper meta: %s", self.meta.to_json(indent=2))
57
+ self.handle_schema()
58
+ return self.meta
59
+
60
+ def execute_impl(self):
61
+ handlers = self.create_handlers()
62
+ for i, h in enumerate(handlers):
63
+ h.set_input_schema(self.meta.schema)
64
+ self.logger.info("Handler #%s: %s", i, h)
65
+
66
+ for hit in self.iter_result():
67
+ doc = self.flat_hit(hit)
68
+
69
+ # keep order && patch missing fields
70
+ values = [doc.get(k) for k in self.result_fields]
71
+ ordered_doc = self.row_factory(self.result_fields, values)
72
+ for h in handlers:
73
+ h.handle(ordered_doc)
74
+
75
+ for hf, h in zip(self.handler_factories, handlers):
76
+ hf.meta.update(h.meta)
77
+ self.meta.handlers_meta = [x.meta for x in self.handler_factories]
78
+
79
+ for h in handlers:
80
+ h.close()
81
+ self.join_handlers()
82
+
83
+ def iter_result(self):
84
+ res = self.es.scan(self.query, self.index, self.doc_type, self.fields, **self.search_kwargs)
85
+ n = 0
86
+ t = self.start_timer()
87
+ for hit in res:
88
+ yield hit
89
+ n += 1
90
+ if n % 20000 == 0:
91
+ t.info("dumped %d rows", n)
92
+ t.info("dumped %d rows in total", n)
93
+ self.meta.num_dumped_rows = n
94
+
95
+ def flat_hit(self, hit):
96
+ rv = hit["_source"]
97
+ if self.fields:
98
+ rv = extract_dict(rv, self.fields)
99
+ if self.meta_fields:
100
+ rv.update(extract_dict(hit, self.meta_fields))
101
+
102
+ return rv
103
+
104
+ def get_result_schema(self):
105
+ schema = self.es.get_schema(self.index, self.doc_type)
106
+ if self.fields:
107
+ schema.keep_fields(self.fields)
108
+
109
+ if self.meta_fields:
110
+ for name in self.meta_fields:
111
+ schema.add_field_by_attrs(name, self.es.get_meta_field_type(name))
112
+ return schema