recurvedata-lib 0.1.487__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recurvedata-lib might be problematic. Click here for more details.
- recurvedata/__init__.py +0 -0
- recurvedata/__version__.py +1 -0
- recurvedata/client/__init__.py +3 -0
- recurvedata/client/client.py +150 -0
- recurvedata/client/server_client.py +91 -0
- recurvedata/config.py +99 -0
- recurvedata/connectors/__init__.py +20 -0
- recurvedata/connectors/_register.py +46 -0
- recurvedata/connectors/base.py +111 -0
- recurvedata/connectors/config_schema.py +1575 -0
- recurvedata/connectors/connectors/__init__.py +0 -0
- recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
- recurvedata/connectors/connectors/auth.py +44 -0
- recurvedata/connectors/connectors/azure_blob.py +89 -0
- recurvedata/connectors/connectors/azure_synapse.py +79 -0
- recurvedata/connectors/connectors/bigquery.py +359 -0
- recurvedata/connectors/connectors/clickhouse.py +219 -0
- recurvedata/connectors/connectors/dingtalk.py +61 -0
- recurvedata/connectors/connectors/doris.py +215 -0
- recurvedata/connectors/connectors/es.py +62 -0
- recurvedata/connectors/connectors/feishu.py +65 -0
- recurvedata/connectors/connectors/ftp.py +50 -0
- recurvedata/connectors/connectors/generic.py +49 -0
- recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
- recurvedata/connectors/connectors/google_service_account.py +225 -0
- recurvedata/connectors/connectors/hive.py +207 -0
- recurvedata/connectors/connectors/impala.py +210 -0
- recurvedata/connectors/connectors/jenkins.py +51 -0
- recurvedata/connectors/connectors/mail.py +89 -0
- recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
- recurvedata/connectors/connectors/mongo.py +79 -0
- recurvedata/connectors/connectors/mssql.py +131 -0
- recurvedata/connectors/connectors/mysql.py +191 -0
- recurvedata/connectors/connectors/n8n.py +141 -0
- recurvedata/connectors/connectors/oss.py +74 -0
- recurvedata/connectors/connectors/owncloud.py +36 -0
- recurvedata/connectors/connectors/phoenix.py +36 -0
- recurvedata/connectors/connectors/postgres.py +230 -0
- recurvedata/connectors/connectors/python.py +50 -0
- recurvedata/connectors/connectors/redshift.py +187 -0
- recurvedata/connectors/connectors/s3.py +93 -0
- recurvedata/connectors/connectors/sftp.py +87 -0
- recurvedata/connectors/connectors/slack.py +35 -0
- recurvedata/connectors/connectors/spark.py +99 -0
- recurvedata/connectors/connectors/starrocks.py +175 -0
- recurvedata/connectors/connectors/tencent_cos.py +40 -0
- recurvedata/connectors/connectors/tidb.py +49 -0
- recurvedata/connectors/const.py +315 -0
- recurvedata/connectors/datasource.py +189 -0
- recurvedata/connectors/dbapi.py +469 -0
- recurvedata/connectors/fs.py +66 -0
- recurvedata/connectors/ftp.py +40 -0
- recurvedata/connectors/object_store.py +60 -0
- recurvedata/connectors/pigeon.py +172 -0
- recurvedata/connectors/proxy.py +104 -0
- recurvedata/connectors/service.py +223 -0
- recurvedata/connectors/utils.py +47 -0
- recurvedata/consts.py +49 -0
- recurvedata/core/__init__.py +0 -0
- recurvedata/core/config.py +46 -0
- recurvedata/core/configurable.py +27 -0
- recurvedata/core/consts.py +2 -0
- recurvedata/core/templating.py +206 -0
- recurvedata/core/tracing.py +223 -0
- recurvedata/core/transformer.py +186 -0
- recurvedata/core/translation.py +91 -0
- recurvedata/dbt/client.py +97 -0
- recurvedata/dbt/consts.py +99 -0
- recurvedata/dbt/cosmos_utils.py +275 -0
- recurvedata/dbt/error_codes.py +18 -0
- recurvedata/dbt/schemas.py +98 -0
- recurvedata/dbt/service.py +451 -0
- recurvedata/dbt/utils.py +246 -0
- recurvedata/error_codes.py +71 -0
- recurvedata/exceptions.py +72 -0
- recurvedata/executors/__init__.py +4 -0
- recurvedata/executors/cli/__init__.py +7 -0
- recurvedata/executors/cli/connector.py +117 -0
- recurvedata/executors/cli/dbt.py +118 -0
- recurvedata/executors/cli/main.py +82 -0
- recurvedata/executors/cli/parameters.py +18 -0
- recurvedata/executors/client.py +190 -0
- recurvedata/executors/consts.py +50 -0
- recurvedata/executors/debug_executor.py +100 -0
- recurvedata/executors/executor.py +300 -0
- recurvedata/executors/link_executor.py +189 -0
- recurvedata/executors/models.py +34 -0
- recurvedata/executors/schemas.py +222 -0
- recurvedata/executors/service/__init__.py +0 -0
- recurvedata/executors/service/connector.py +380 -0
- recurvedata/executors/utils.py +172 -0
- recurvedata/filestorage/__init__.py +11 -0
- recurvedata/filestorage/_factory.py +33 -0
- recurvedata/filestorage/backends/__init__.py +0 -0
- recurvedata/filestorage/backends/fsspec.py +45 -0
- recurvedata/filestorage/backends/local.py +67 -0
- recurvedata/filestorage/backends/oss.py +56 -0
- recurvedata/filestorage/interface.py +84 -0
- recurvedata/operators/__init__.py +10 -0
- recurvedata/operators/base.py +28 -0
- recurvedata/operators/config.py +21 -0
- recurvedata/operators/context.py +255 -0
- recurvedata/operators/dbt_operator/__init__.py +2 -0
- recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
- recurvedata/operators/dbt_operator/operator.py +353 -0
- recurvedata/operators/link_operator/__init__.py +1 -0
- recurvedata/operators/link_operator/operator.py +120 -0
- recurvedata/operators/models.py +55 -0
- recurvedata/operators/notify_operator/__init__.py +1 -0
- recurvedata/operators/notify_operator/operator.py +180 -0
- recurvedata/operators/operator.py +119 -0
- recurvedata/operators/python_operator/__init__.py +1 -0
- recurvedata/operators/python_operator/operator.py +132 -0
- recurvedata/operators/sensor_operator/__init__.py +1 -0
- recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
- recurvedata/operators/sensor_operator/operator.py +172 -0
- recurvedata/operators/spark_operator/__init__.py +1 -0
- recurvedata/operators/spark_operator/operator.py +200 -0
- recurvedata/operators/spark_operator/spark_sample.py +47 -0
- recurvedata/operators/sql_operator/__init__.py +1 -0
- recurvedata/operators/sql_operator/operator.py +90 -0
- recurvedata/operators/task.py +211 -0
- recurvedata/operators/transfer_operator/__init__.py +40 -0
- recurvedata/operators/transfer_operator/const.py +10 -0
- recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
- recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
- recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
- recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
- recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
- recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
- recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
- recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
- recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
- recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
- recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
- recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
- recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
- recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
- recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
- recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
- recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
- recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
- recurvedata/operators/transfer_operator/load_task_email.py +188 -0
- recurvedata/operators/transfer_operator/load_task_es.py +86 -0
- recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
- recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
- recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
- recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
- recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
- recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
- recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
- recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
- recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
- recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
- recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
- recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
- recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
- recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
- recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
- recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
- recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
- recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
- recurvedata/operators/transfer_operator/mixin.py +31 -0
- recurvedata/operators/transfer_operator/operator.py +231 -0
- recurvedata/operators/transfer_operator/task.py +223 -0
- recurvedata/operators/transfer_operator/utils.py +134 -0
- recurvedata/operators/ui.py +80 -0
- recurvedata/operators/utils/__init__.py +51 -0
- recurvedata/operators/utils/file_factory.py +150 -0
- recurvedata/operators/utils/fs.py +10 -0
- recurvedata/operators/utils/lineage.py +265 -0
- recurvedata/operators/web_init.py +15 -0
- recurvedata/pigeon/connector/__init__.py +294 -0
- recurvedata/pigeon/connector/_registry.py +17 -0
- recurvedata/pigeon/connector/aliyun_oss.py +80 -0
- recurvedata/pigeon/connector/awss3.py +123 -0
- recurvedata/pigeon/connector/azure_blob.py +176 -0
- recurvedata/pigeon/connector/azure_synapse.py +51 -0
- recurvedata/pigeon/connector/cass.py +151 -0
- recurvedata/pigeon/connector/clickhouse.py +403 -0
- recurvedata/pigeon/connector/clickhouse_native.py +351 -0
- recurvedata/pigeon/connector/dbapi.py +571 -0
- recurvedata/pigeon/connector/doris.py +166 -0
- recurvedata/pigeon/connector/es.py +176 -0
- recurvedata/pigeon/connector/feishu.py +1135 -0
- recurvedata/pigeon/connector/ftp.py +163 -0
- recurvedata/pigeon/connector/google_bigquery.py +283 -0
- recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
- recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
- recurvedata/pigeon/connector/hdfs.py +204 -0
- recurvedata/pigeon/connector/hive_impala.py +383 -0
- recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
- recurvedata/pigeon/connector/mongodb.py +56 -0
- recurvedata/pigeon/connector/mssql.py +467 -0
- recurvedata/pigeon/connector/mysql.py +175 -0
- recurvedata/pigeon/connector/owncloud.py +92 -0
- recurvedata/pigeon/connector/postgresql.py +267 -0
- recurvedata/pigeon/connector/power_bi.py +179 -0
- recurvedata/pigeon/connector/qcloud_cos.py +79 -0
- recurvedata/pigeon/connector/redshift.py +123 -0
- recurvedata/pigeon/connector/sftp.py +73 -0
- recurvedata/pigeon/connector/sqlite.py +42 -0
- recurvedata/pigeon/connector/starrocks.py +144 -0
- recurvedata/pigeon/connector/tableau.py +162 -0
- recurvedata/pigeon/const.py +21 -0
- recurvedata/pigeon/csv.py +172 -0
- recurvedata/pigeon/docs/datasources-example.json +82 -0
- recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
- recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
- recurvedata/pigeon/dumper/__init__.py +171 -0
- recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
- recurvedata/pigeon/dumper/base.py +141 -0
- recurvedata/pigeon/dumper/cass.py +213 -0
- recurvedata/pigeon/dumper/dbapi.py +346 -0
- recurvedata/pigeon/dumper/es.py +112 -0
- recurvedata/pigeon/dumper/ftp.py +64 -0
- recurvedata/pigeon/dumper/mongodb.py +103 -0
- recurvedata/pigeon/handler/__init__.py +4 -0
- recurvedata/pigeon/handler/base.py +153 -0
- recurvedata/pigeon/handler/csv_handler.py +290 -0
- recurvedata/pigeon/loader/__init__.py +87 -0
- recurvedata/pigeon/loader/base.py +83 -0
- recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
- recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
- recurvedata/pigeon/loader/csv_to_doris.py +215 -0
- recurvedata/pigeon/loader/csv_to_es.py +51 -0
- recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
- recurvedata/pigeon/loader/csv_to_hive.py +468 -0
- recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
- recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
- recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
- recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
- recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
- recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
- recurvedata/pigeon/meta.py +116 -0
- recurvedata/pigeon/row_factory.py +42 -0
- recurvedata/pigeon/schema/__init__.py +124 -0
- recurvedata/pigeon/schema/types.py +13 -0
- recurvedata/pigeon/sync.py +283 -0
- recurvedata/pigeon/transformer.py +146 -0
- recurvedata/pigeon/utils/__init__.py +134 -0
- recurvedata/pigeon/utils/bloomfilter.py +181 -0
- recurvedata/pigeon/utils/date_time.py +323 -0
- recurvedata/pigeon/utils/escape.py +15 -0
- recurvedata/pigeon/utils/fs.py +266 -0
- recurvedata/pigeon/utils/json.py +44 -0
- recurvedata/pigeon/utils/keyed_tuple.py +85 -0
- recurvedata/pigeon/utils/mp.py +156 -0
- recurvedata/pigeon/utils/sql.py +328 -0
- recurvedata/pigeon/utils/timing.py +155 -0
- recurvedata/provider_manager.py +0 -0
- recurvedata/providers/__init__.py +0 -0
- recurvedata/providers/dbapi/__init__.py +0 -0
- recurvedata/providers/flywheel/__init__.py +0 -0
- recurvedata/providers/mysql/__init__.py +0 -0
- recurvedata/schedulers/__init__.py +1 -0
- recurvedata/schedulers/airflow.py +974 -0
- recurvedata/schedulers/airflow_db_process.py +331 -0
- recurvedata/schedulers/airflow_operators.py +61 -0
- recurvedata/schedulers/airflow_plugin.py +9 -0
- recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
- recurvedata/schedulers/base.py +99 -0
- recurvedata/schedulers/cli.py +228 -0
- recurvedata/schedulers/client.py +56 -0
- recurvedata/schedulers/consts.py +52 -0
- recurvedata/schedulers/debug_celery.py +62 -0
- recurvedata/schedulers/model.py +63 -0
- recurvedata/schedulers/schemas.py +97 -0
- recurvedata/schedulers/service.py +20 -0
- recurvedata/schedulers/system_dags.py +59 -0
- recurvedata/schedulers/task_status.py +279 -0
- recurvedata/schedulers/utils.py +73 -0
- recurvedata/schema/__init__.py +0 -0
- recurvedata/schema/field.py +88 -0
- recurvedata/schema/schema.py +55 -0
- recurvedata/schema/types.py +17 -0
- recurvedata/schema.py +0 -0
- recurvedata/server/__init__.py +0 -0
- recurvedata/server/app.py +7 -0
- recurvedata/server/connector/__init__.py +0 -0
- recurvedata/server/connector/api.py +79 -0
- recurvedata/server/connector/schemas.py +28 -0
- recurvedata/server/data_service/__init__.py +0 -0
- recurvedata/server/data_service/api.py +126 -0
- recurvedata/server/data_service/client.py +18 -0
- recurvedata/server/data_service/consts.py +1 -0
- recurvedata/server/data_service/schemas.py +68 -0
- recurvedata/server/data_service/service.py +218 -0
- recurvedata/server/dbt/__init__.py +0 -0
- recurvedata/server/dbt/api.py +116 -0
- recurvedata/server/error_code.py +49 -0
- recurvedata/server/exceptions.py +19 -0
- recurvedata/server/executor/__init__.py +0 -0
- recurvedata/server/executor/api.py +37 -0
- recurvedata/server/executor/schemas.py +30 -0
- recurvedata/server/executor/service.py +220 -0
- recurvedata/server/main.py +32 -0
- recurvedata/server/schedulers/__init__.py +0 -0
- recurvedata/server/schedulers/api.py +252 -0
- recurvedata/server/schedulers/schemas.py +50 -0
- recurvedata/server/schemas.py +50 -0
- recurvedata/utils/__init__.py +15 -0
- recurvedata/utils/_typer.py +61 -0
- recurvedata/utils/attrdict.py +19 -0
- recurvedata/utils/command_helper.py +20 -0
- recurvedata/utils/compat.py +12 -0
- recurvedata/utils/compression.py +203 -0
- recurvedata/utils/crontab.py +42 -0
- recurvedata/utils/crypto_util.py +305 -0
- recurvedata/utils/dataclass.py +11 -0
- recurvedata/utils/date_time.py +464 -0
- recurvedata/utils/dispatch.py +114 -0
- recurvedata/utils/email_util.py +104 -0
- recurvedata/utils/files.py +386 -0
- recurvedata/utils/helpers.py +170 -0
- recurvedata/utils/httputil.py +117 -0
- recurvedata/utils/imports.py +132 -0
- recurvedata/utils/json.py +80 -0
- recurvedata/utils/log.py +117 -0
- recurvedata/utils/log_capture.py +153 -0
- recurvedata/utils/mp.py +178 -0
- recurvedata/utils/normalizer.py +102 -0
- recurvedata/utils/redis_lock.py +474 -0
- recurvedata/utils/registry.py +54 -0
- recurvedata/utils/shell.py +15 -0
- recurvedata/utils/singleton.py +33 -0
- recurvedata/utils/sql.py +6 -0
- recurvedata/utils/timeout.py +28 -0
- recurvedata/utils/tracing.py +14 -0
- recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
- recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
- recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
- recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
|
@@ -0,0 +1,1135 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
import json
|
|
3
|
+
import logging
|
|
4
|
+
import os
|
|
5
|
+
import re
|
|
6
|
+
import time
|
|
7
|
+
from collections import namedtuple
|
|
8
|
+
from io import BytesIO
|
|
9
|
+
from typing import Dict, List, Tuple, Union
|
|
10
|
+
from urllib.parse import unquote
|
|
11
|
+
|
|
12
|
+
import pandas
|
|
13
|
+
import requests
|
|
14
|
+
|
|
15
|
+
try:
|
|
16
|
+
# python 3.12 requests_toolbelt 0.10 conflict with urllib3 2.0
|
|
17
|
+
from requests_toolbelt import MultipartEncoder
|
|
18
|
+
except ImportError:
|
|
19
|
+
pass
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ArgumentException(Exception):
|
|
23
|
+
pass
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class FeiShuException(Exception):
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class FeiShuRenewTokenException(FeiShuException):
|
|
31
|
+
pass
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class FeiShuMessageException(FeiShuException):
|
|
35
|
+
pass
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class FeiShuDocumentException(FeiShuException):
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class FeiShuUploadException(FeiShuDocumentException):
|
|
43
|
+
pass
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class FeiShuReadSheetException(FeiShuDocumentException):
|
|
47
|
+
pass
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class FeiShuWriteSheetException(FeiShuDocumentException):
|
|
51
|
+
pass
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class FeiShuReadExcelException(FeiShuDocumentException):
|
|
55
|
+
pass
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class FeiShuReadWikiException(FeiShuDocumentException):
|
|
59
|
+
pass
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class FeiShuWriteWikiException(FeiShuDocumentException):
|
|
63
|
+
pass
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class FeiShuCreateFolderException(FeiShuDocumentException):
|
|
67
|
+
pass
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class FeiShuListChildrenException(FeiShuDocumentException):
|
|
71
|
+
pass
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class FeiShuDeleteFileException(FeiShuDocumentException):
|
|
75
|
+
pass
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class FeiShuReadBitableException(FeiShuDocumentException):
|
|
79
|
+
pass
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
Field = namedtuple("Field", ["field_id", "field_name", "type", "property"])
|
|
83
|
+
logger = logging.getLogger(__name__)
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class FeishuBot:
|
|
87
|
+
APP_ID: str = ""
|
|
88
|
+
APP_SECRET: str = ""
|
|
89
|
+
|
|
90
|
+
def __init__(self, app_id=APP_ID, app_secret=APP_SECRET):
|
|
91
|
+
self._host = "https://open.feishu.cn/open-apis"
|
|
92
|
+
self._app_id = app_id
|
|
93
|
+
self._app_secret = app_secret
|
|
94
|
+
self._tenant_access_token = None
|
|
95
|
+
# self._renew_tenant_access_token() # token valid for 2 hours
|
|
96
|
+
self._tenant_access_token_expiration: int = 0
|
|
97
|
+
self.type_mapping = { # file_token prefix and corresponding type
|
|
98
|
+
"boxcn": "file",
|
|
99
|
+
"shtcn": "sheet",
|
|
100
|
+
"doccn": "doc",
|
|
101
|
+
"bascn": "bitable",
|
|
102
|
+
"doxcn": "docx",
|
|
103
|
+
"bmncn": "mindnote",
|
|
104
|
+
}
|
|
105
|
+
self._session: requests.Session | None = None
|
|
106
|
+
|
|
107
|
+
@property
|
|
108
|
+
def session(self) -> requests.Session:
|
|
109
|
+
if self._session is None:
|
|
110
|
+
self._session = requests.Session()
|
|
111
|
+
|
|
112
|
+
if self._should_renew_access_token():
|
|
113
|
+
self._renew_tenant_access_token()
|
|
114
|
+
self._session.headers["Authorization"] = f"Bearer {self._tenant_access_token}"
|
|
115
|
+
return self._session
|
|
116
|
+
|
|
117
|
+
def _should_renew_access_token(self) -> bool:
|
|
118
|
+
if self._tenant_access_token is None:
|
|
119
|
+
return True
|
|
120
|
+
if time.time() >= self._tenant_access_token_expiration:
|
|
121
|
+
return True
|
|
122
|
+
return False
|
|
123
|
+
|
|
124
|
+
def _renew_tenant_access_token(self):
|
|
125
|
+
logger.info("Attempting to renew tenant_access_token ...")
|
|
126
|
+
url = "https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal/"
|
|
127
|
+
payload = {"app_id": self._app_id, "app_secret": self._app_secret}
|
|
128
|
+
# 3 attempts to retry
|
|
129
|
+
renew_status = False
|
|
130
|
+
for i in range(3):
|
|
131
|
+
resp = requests.post(url, json=payload, timeout=60) # 60 seconds timeout
|
|
132
|
+
resp.raise_for_status()
|
|
133
|
+
data = resp.json()
|
|
134
|
+
if data["code"] == 0:
|
|
135
|
+
self._tenant_access_token = data["tenant_access_token"]
|
|
136
|
+
self._tenant_access_token_expiration = (
|
|
137
|
+
time.time() + data["expire"] - 300
|
|
138
|
+
) # The server's validity period is a bit shorter, renew in advance
|
|
139
|
+
renew_status = True
|
|
140
|
+
break
|
|
141
|
+
else:
|
|
142
|
+
logger.warning(f"Failed to renew token, retrying {i + 1} time")
|
|
143
|
+
if renew_status:
|
|
144
|
+
logger.info("Successfully renewed tenant_access_token")
|
|
145
|
+
else:
|
|
146
|
+
raise FeiShuRenewTokenException("Failed to renew token")
|
|
147
|
+
|
|
148
|
+
def _request(self, method: str, path: str, params=None, json=None, data=None, files=None, headers=None) -> dict:
|
|
149
|
+
# TODO: add retry
|
|
150
|
+
url = f'{self._host}/{path.lstrip("/")}'
|
|
151
|
+
# 10 minutes timeout
|
|
152
|
+
resp = self.session.request(
|
|
153
|
+
method, url, params=params, data=data, json=json, files=files, headers=headers, timeout=600
|
|
154
|
+
)
|
|
155
|
+
logger.info(f"{method} {url} {params}, duration: {resp.elapsed.total_seconds() * 1000:.2f}ms")
|
|
156
|
+
data = resp.json()
|
|
157
|
+
try:
|
|
158
|
+
resp.raise_for_status()
|
|
159
|
+
if data.get("code") in (99991663, 99991668):
|
|
160
|
+
self._renew_tenant_access_token()
|
|
161
|
+
except Exception:
|
|
162
|
+
if data.get("code") in (99991663, 99991668):
|
|
163
|
+
logger.info("tenant access token expired, try to renew and request again")
|
|
164
|
+
self._renew_tenant_access_token()
|
|
165
|
+
return self._request(method, path, params, json, data, files, headers)
|
|
166
|
+
return data
|
|
167
|
+
|
|
168
|
+
def _iter_pages(self, path: str, params: dict = None, page_size=100, headers=None):
|
|
169
|
+
has_more = True
|
|
170
|
+
page_token = None
|
|
171
|
+
while has_more:
|
|
172
|
+
query = {
|
|
173
|
+
"page_token": page_token,
|
|
174
|
+
"page_size": page_size,
|
|
175
|
+
}
|
|
176
|
+
if params:
|
|
177
|
+
query.update(params)
|
|
178
|
+
resp = self._request("GET", path, params=query, headers=headers)
|
|
179
|
+
data = resp["data"]
|
|
180
|
+
has_more = data["has_more"]
|
|
181
|
+
if has_more:
|
|
182
|
+
page_token = data["page_token"]
|
|
183
|
+
for item in data["items"]:
|
|
184
|
+
yield item
|
|
185
|
+
|
|
186
|
+
def get_group_list(self):
|
|
187
|
+
"""
|
|
188
|
+
Get all groups where the application is located
|
|
189
|
+
"""
|
|
190
|
+
group_lst = self._iter_pages("/im/v1/chats", {"user_id_type": "open_id"})
|
|
191
|
+
result = [(item["chat_id"], item["name"]) for item in group_lst]
|
|
192
|
+
return result
|
|
193
|
+
|
|
194
|
+
def get_user_email(self, open_id):
|
|
195
|
+
"""
|
|
196
|
+
Get user email: https://open.feishu.cn/document/uAjLw4CM/ukTMukTMukTM/reference/contact-v3/user/get
|
|
197
|
+
"""
|
|
198
|
+
path = f"/contact/v3/users/{open_id}"
|
|
199
|
+
params = {"user_id_type": "open_id", "department_id_type": "open_department_id"}
|
|
200
|
+
resp = self._request("GET", path, params=params)
|
|
201
|
+
if resp["code"] == 0:
|
|
202
|
+
return resp["data"]["user"]["enterprise_email"]
|
|
203
|
+
else:
|
|
204
|
+
raise FeiShuException(f"code: {resp.get('code')}, msg: {resp.get('msg')}")
|
|
205
|
+
|
|
206
|
+
def get_group_members(self, chat_id):
|
|
207
|
+
"""
|
|
208
|
+
Get the list of group members: https://open.feishu.cn/document/uAjLw4CM/ukTMukTMukTM/reference/im-v1/chat-members/get
|
|
209
|
+
return [(username, email, open_id)]
|
|
210
|
+
"""
|
|
211
|
+
path = f"/im/v1/chats/{chat_id}/members"
|
|
212
|
+
member_lst = self._iter_pages(path, {"member_id_type": "open_id"})
|
|
213
|
+
result = [(i["name"], self.get_user_email(i["member_id"]), i["member_id"]) for i in member_lst]
|
|
214
|
+
return result
|
|
215
|
+
|
|
216
|
+
def get_name_by_chat_id(self, chat_id):
|
|
217
|
+
"""
|
|
218
|
+
Get the group name by chat_id: https://open.feishu.cn/document/uAjLw4CM/ukTMukTMukTM/reference/im-v1/chat/get
|
|
219
|
+
"""
|
|
220
|
+
path = f"/im/v1/chats/{chat_id}"
|
|
221
|
+
params = {"user_id_type": "open_id"}
|
|
222
|
+
resp = self._request("GET", path, params=params)
|
|
223
|
+
if resp["code"] == 0:
|
|
224
|
+
return resp["data"]["name"]
|
|
225
|
+
else:
|
|
226
|
+
raise FeiShuException(f"code: {resp.get('code')}, msg: {resp.get('msg')}")
|
|
227
|
+
|
|
228
|
+
def get_chat_id_by_name(self, group_name):
|
|
229
|
+
group_lst = self._iter_pages("/im/v1/chats", {"user_id_type": "open_id"})
|
|
230
|
+
result = []
|
|
231
|
+
for group in group_lst:
|
|
232
|
+
if group["name"] == group_name:
|
|
233
|
+
result.append(group["chat_id"])
|
|
234
|
+
if not result:
|
|
235
|
+
logger.info(f"""Group {group_name} was not found in Feishu!""")
|
|
236
|
+
return result
|
|
237
|
+
|
|
238
|
+
def get_open_id_by_email(self, email_lst):
|
|
239
|
+
resp = self._request("GET", "/user/v1/batch_get_id", params={"emails": email_lst})
|
|
240
|
+
email_users = resp["data"]["email_users"]
|
|
241
|
+
open_id_dct = {k: email_users[k][0]["open_id"] for k in email_users}
|
|
242
|
+
return open_id_dct
|
|
243
|
+
|
|
244
|
+
def send_message(
|
|
245
|
+
self,
|
|
246
|
+
receiver_type="user",
|
|
247
|
+
user_email="",
|
|
248
|
+
chat_name="",
|
|
249
|
+
chat_id="",
|
|
250
|
+
msg_type="text",
|
|
251
|
+
content='{"text":"Feishu notification"}',
|
|
252
|
+
):
|
|
253
|
+
"""
|
|
254
|
+
API documentation: https://open.feishu.cn/document/uAjLw4CM/ukTMukTMukTM/reference/im-v1/message/create
|
|
255
|
+
:param receiver_type: If the receiver is a user, then user_email needs to be filled; if the receiver is a group, then chat_name or chat_id needs to be filled
|
|
256
|
+
:param user_email: Feishu email
|
|
257
|
+
:param chat_name: Feishu group name
|
|
258
|
+
:param chat_id: Feishu group chat_id
|
|
259
|
+
:param msg_type: Message type, including text, post, image, file, audio, media, sticker, interactive, share_chat, share_user,
|
|
260
|
+
Please refer to: https://open.feishu.cn/document/uAjLw4CM/ukTMukTMukTM/im-v1/message/create_json for the specific construction method of each type of message
|
|
261
|
+
:param content: Message content, json structure
|
|
262
|
+
:return: Returns response.json()
|
|
263
|
+
"""
|
|
264
|
+
if receiver_type not in ("user", "group"):
|
|
265
|
+
raise ValueError("""receiver_type must be user or group.""")
|
|
266
|
+
|
|
267
|
+
if receiver_type == "user":
|
|
268
|
+
if not user_email:
|
|
269
|
+
raise ValueError("""valid user_email is required for receiver_type=user.""")
|
|
270
|
+
params = {"receive_id_type": "email"}
|
|
271
|
+
receive_id = user_email
|
|
272
|
+
else:
|
|
273
|
+
if not chat_id:
|
|
274
|
+
if not chat_name:
|
|
275
|
+
raise ValueError("""chat_id or chat_name is required for receiver_type=group.""")
|
|
276
|
+
chat_id = self.get_chat_id_by_name(chat_name)[
|
|
277
|
+
0
|
|
278
|
+
] # to_do: Same-named groups may conflict, need to handle
|
|
279
|
+
if not chat_id:
|
|
280
|
+
raise ValueError(f"""Group {chat_name} was not found in Feishu.""")
|
|
281
|
+
params = {"receive_id_type": "chat_id"}
|
|
282
|
+
receive_id = chat_id
|
|
283
|
+
|
|
284
|
+
body = {
|
|
285
|
+
"receive_id": receive_id,
|
|
286
|
+
"content": content,
|
|
287
|
+
"msg_type": msg_type,
|
|
288
|
+
}
|
|
289
|
+
resp = self._request("POST", "/im/v1/messages", params=params, json=body)
|
|
290
|
+
return resp
|
|
291
|
+
|
|
292
|
+
def send_card(
|
|
293
|
+
self,
|
|
294
|
+
receiver_type="user",
|
|
295
|
+
user_email="",
|
|
296
|
+
chat_name=None,
|
|
297
|
+
chat_id=None,
|
|
298
|
+
email_lst=None,
|
|
299
|
+
subject="Data Refresh Notification",
|
|
300
|
+
subject_bg_color="green",
|
|
301
|
+
table="",
|
|
302
|
+
table_row_num="",
|
|
303
|
+
oneflow_url="",
|
|
304
|
+
airflow_url="",
|
|
305
|
+
log_url="",
|
|
306
|
+
extra_info="",
|
|
307
|
+
card=None,
|
|
308
|
+
image_lst=None,
|
|
309
|
+
):
|
|
310
|
+
"""
|
|
311
|
+
:param receiver_type: If the receiver is a user, then user_email needs to be filled; if the receiver is a group, then chat_name or chat_id needs to be filled
|
|
312
|
+
:param user_email: Feishu email
|
|
313
|
+
:param chat_name: Feishu group name
|
|
314
|
+
:param chat_id: Feishu group chat_id
|
|
315
|
+
:param email_lst: Feishu email list
|
|
316
|
+
:param subject: Message card-title
|
|
317
|
+
:param subject_bg_color: Message card-title background color, default green, other colors see Feishu interface document
|
|
318
|
+
https://open.feishu.cn/document/ukTMukTMukTM/ukTNwUjL5UDM14SO1ATN
|
|
319
|
+
:param table: Message card-data table
|
|
320
|
+
:param table_row_num: Message card-data table row number
|
|
321
|
+
:param oneflow_url: Message card-OneFlow link
|
|
322
|
+
:param airflow_url: Message card-airflow link
|
|
323
|
+
:param log_url: Message card-update log link
|
|
324
|
+
:param extra_info: Additional text
|
|
325
|
+
:param card: Custom message card
|
|
326
|
+
:param image_lst: Image path list
|
|
327
|
+
:return: Returns response.json()
|
|
328
|
+
"""
|
|
329
|
+
msg_type = "interactive" # The msg_type of the message card is interactive
|
|
330
|
+
if not card:
|
|
331
|
+
# If no card is passed in, it is constructed based on the parameters
|
|
332
|
+
if email_lst == "all":
|
|
333
|
+
at_lst = "<at id=all></at>"
|
|
334
|
+
else:
|
|
335
|
+
at_lst = f"{''.join(['<at email=' + email + '></at>' for email in email_lst])}"
|
|
336
|
+
card = {
|
|
337
|
+
"config": {"wide_screen_mode": True},
|
|
338
|
+
"elements": [
|
|
339
|
+
{
|
|
340
|
+
"fields": [
|
|
341
|
+
{
|
|
342
|
+
"is_short": True,
|
|
343
|
+
"text": {
|
|
344
|
+
"content": f"**⏱ Time:**\n{datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
|
|
345
|
+
"tag": "lark_md",
|
|
346
|
+
},
|
|
347
|
+
},
|
|
348
|
+
{"is_short": True, "text": {"content": f"**✅ Data Table:**\n{table}", "tag": "lark_md"}},
|
|
349
|
+
{"is_short": False, "text": {"content": "", "tag": "lark_md"}},
|
|
350
|
+
{
|
|
351
|
+
"is_short": True,
|
|
352
|
+
"text": {"content": f"**📊 Data Row Number:**\n{table_row_num}", "tag": "lark_md"},
|
|
353
|
+
},
|
|
354
|
+
{
|
|
355
|
+
"is_short": True,
|
|
356
|
+
"text": {"content": f"**👨💻 Notified Person:**\n{at_lst}", "tag": "lark_md"},
|
|
357
|
+
},
|
|
358
|
+
],
|
|
359
|
+
"tag": "div",
|
|
360
|
+
},
|
|
361
|
+
],
|
|
362
|
+
"header": {"template": subject_bg_color, "title": {"content": subject, "tag": "plain_text"}},
|
|
363
|
+
}
|
|
364
|
+
# add text
|
|
365
|
+
if extra_info:
|
|
366
|
+
card["elements"].extend(
|
|
367
|
+
[
|
|
368
|
+
{"tag": "hr"},
|
|
369
|
+
{
|
|
370
|
+
"fields": [{"is_short": False, "text": {"content": extra_info, "tag": "lark_md"}}],
|
|
371
|
+
"tag": "div",
|
|
372
|
+
},
|
|
373
|
+
]
|
|
374
|
+
)
|
|
375
|
+
# add image
|
|
376
|
+
if image_lst:
|
|
377
|
+
image_key_lst = [self.upload_image(image) for image in image_lst]
|
|
378
|
+
image_content = "\n".join([f"" for k in image_key_lst])
|
|
379
|
+
card["elements"].extend(
|
|
380
|
+
[
|
|
381
|
+
{"tag": "hr"},
|
|
382
|
+
{
|
|
383
|
+
"tag": "markdown",
|
|
384
|
+
"content": image_content,
|
|
385
|
+
},
|
|
386
|
+
]
|
|
387
|
+
)
|
|
388
|
+
# add buttons
|
|
389
|
+
card["elements"].extend(
|
|
390
|
+
[
|
|
391
|
+
{"tag": "hr"},
|
|
392
|
+
{
|
|
393
|
+
"actions": [
|
|
394
|
+
{
|
|
395
|
+
"tag": "button",
|
|
396
|
+
"text": {"content": " Update Log ", "tag": "plain_text"},
|
|
397
|
+
"type": "default",
|
|
398
|
+
"url": log_url,
|
|
399
|
+
},
|
|
400
|
+
{
|
|
401
|
+
"tag": "button",
|
|
402
|
+
"text": {"content": " OneFlow ", "tag": "plain_text"},
|
|
403
|
+
"type": "default",
|
|
404
|
+
"url": oneflow_url,
|
|
405
|
+
},
|
|
406
|
+
{
|
|
407
|
+
"tag": "button",
|
|
408
|
+
"text": {"content": " AirFlow ", "tag": "plain_text"},
|
|
409
|
+
"type": "default",
|
|
410
|
+
"url": airflow_url,
|
|
411
|
+
},
|
|
412
|
+
],
|
|
413
|
+
"tag": "action",
|
|
414
|
+
},
|
|
415
|
+
]
|
|
416
|
+
)
|
|
417
|
+
return self.send_message(
|
|
418
|
+
receiver_type=receiver_type,
|
|
419
|
+
user_email=user_email,
|
|
420
|
+
chat_name=chat_name,
|
|
421
|
+
chat_id=chat_id,
|
|
422
|
+
msg_type=msg_type,
|
|
423
|
+
content=json.dumps(card),
|
|
424
|
+
)
|
|
425
|
+
|
|
426
|
+
def send_text(
|
|
427
|
+
self,
|
|
428
|
+
receiver_type: str = "user",
|
|
429
|
+
user_email: str = "",
|
|
430
|
+
chat_name: str = None,
|
|
431
|
+
chat_id: str = None,
|
|
432
|
+
email_lst: Union[str, List[str]] = None,
|
|
433
|
+
subject: str = "Data Notification",
|
|
434
|
+
content: str = "",
|
|
435
|
+
image_lst: List = None,
|
|
436
|
+
):
|
|
437
|
+
"""
|
|
438
|
+
:param receiver_type: If the recipient is a user, user_email needs to be filled in; If the recipient is a group, chat_name or chat_id needs to be filled in
|
|
439
|
+
:param user_email: Feishu email
|
|
440
|
+
:param chat_name: Feishu group name
|
|
441
|
+
:param chat_id: Feishu group chat_id
|
|
442
|
+
:param email_lst: Feishu email list
|
|
443
|
+
:param subject: Rich text message-title
|
|
444
|
+
:param content: Rich text message-content
|
|
445
|
+
:param image_lst: Image path list
|
|
446
|
+
:return: Return response.json()
|
|
447
|
+
"""
|
|
448
|
+
msg_type = "post" # The msg_type of rich text is post
|
|
449
|
+
content = {
|
|
450
|
+
"zh_cn": {
|
|
451
|
+
"title": subject,
|
|
452
|
+
"content": [
|
|
453
|
+
[{"tag": "text", "text": content}],
|
|
454
|
+
],
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
if email_lst == "all":
|
|
458
|
+
at_lst = [{"tag": "at", "user_id": "all", "user_name": "all"}]
|
|
459
|
+
else:
|
|
460
|
+
at_lst = []
|
|
461
|
+
if email_lst:
|
|
462
|
+
open_id_dct = self.get_open_id_by_email(email_lst)
|
|
463
|
+
for k in open_id_dct:
|
|
464
|
+
at_lst.append(
|
|
465
|
+
{
|
|
466
|
+
"tag": "at",
|
|
467
|
+
"user_id": open_id_dct[k],
|
|
468
|
+
"user_name": k,
|
|
469
|
+
}
|
|
470
|
+
)
|
|
471
|
+
if at_lst:
|
|
472
|
+
content["zh_cn"]["content"].append(at_lst)
|
|
473
|
+
if image_lst:
|
|
474
|
+
content["zh_cn"]["content"].append(
|
|
475
|
+
[{"tag": "img", "image_key": self.upload_image(image)} for image in image_lst]
|
|
476
|
+
)
|
|
477
|
+
return self.send_message(
|
|
478
|
+
receiver_type=receiver_type,
|
|
479
|
+
user_email=user_email,
|
|
480
|
+
chat_name=chat_name,
|
|
481
|
+
chat_id=chat_id,
|
|
482
|
+
msg_type=msg_type,
|
|
483
|
+
content=json.dumps(content),
|
|
484
|
+
)
|
|
485
|
+
|
|
486
|
+
def upload_image(self, file_path, image_type="message") -> str:
|
|
487
|
+
path = "/im/v1/images"
|
|
488
|
+
form = {"image_type": image_type, "image": (open(file_path, "rb"))}
|
|
489
|
+
multi_form = MultipartEncoder(form)
|
|
490
|
+
resp = self._request("POST", path, headers={"Content-Type": multi_form.content_type}, data=multi_form)
|
|
491
|
+
if resp["code"] == 0:
|
|
492
|
+
return resp["data"]["image_key"]
|
|
493
|
+
else:
|
|
494
|
+
return FeiShuUploadException(f"code: {resp.get('code')}, msg: {resp.get('msg')}")
|
|
495
|
+
|
|
496
|
+
def upload_file(self, file_path, parent_node="fldcn36aedZjP3L5Vj7QAoi5HQd", overwrite=True) -> str:
|
|
497
|
+
"""
|
|
498
|
+
@return: Return a file url
|
|
499
|
+
@param file_path: absolute file path
|
|
500
|
+
@param parent_node: a unique id for a shared folder, default folder is "云文档/共享空间/feishu_loader_test"
|
|
501
|
+
@param overwrite: default True, files with same name will be deleted after uploading
|
|
502
|
+
"""
|
|
503
|
+
file_size = os.path.getsize(file_path)
|
|
504
|
+
file_upload_info = {
|
|
505
|
+
"share_link": "https://yimiandata.feishu.cn/file/{file_token}",
|
|
506
|
+
"file_path": file_path,
|
|
507
|
+
"file_size": file_size,
|
|
508
|
+
"parent_node": parent_node,
|
|
509
|
+
}
|
|
510
|
+
# 20971520 is a 20MiB size limit, <= 20MiB, use small file upload, otherwise use large file upload
|
|
511
|
+
if file_size <= 20971520:
|
|
512
|
+
logger.info(f"Ready to upload file: {file_path}, size: {file_size}, use small file upload")
|
|
513
|
+
rv = self._upload_small_file(file_upload_info)
|
|
514
|
+
else:
|
|
515
|
+
logger.info(f"Ready to upload file: {file_path}, size: {file_size}, use large file upload")
|
|
516
|
+
rv = self._upload_large_file(file_upload_info)
|
|
517
|
+
|
|
518
|
+
if rv and overwrite:
|
|
519
|
+
# delete files that already exists with the same name if file is successfully uploaded and mode is overwrite
|
|
520
|
+
file_shared_link = rv
|
|
521
|
+
file_token = file_shared_link.split("/")[-1]
|
|
522
|
+
file_name = os.path.basename(file_path)
|
|
523
|
+
self.delete_file_by_title(title=file_name, parent_node=parent_node, keep_lst=[file_token])
|
|
524
|
+
|
|
525
|
+
return rv
|
|
526
|
+
|
|
527
|
+
def _upload_small_file(self, file_upload_info: dict) -> str:
|
|
528
|
+
file_path = file_upload_info["file_path"]
|
|
529
|
+
parent_node = file_upload_info["parent_node"]
|
|
530
|
+
file_size = os.path.getsize(file_path)
|
|
531
|
+
file_name = os.path.basename(file_path)
|
|
532
|
+
with open(file_path, "rb") as f:
|
|
533
|
+
files = {
|
|
534
|
+
"file": (file_name, f, "application/octet-stream"),
|
|
535
|
+
"parent_type": (None, "explorer"),
|
|
536
|
+
"parent_node": (None, parent_node),
|
|
537
|
+
"size": (None, file_size),
|
|
538
|
+
"file_name": (None, file_name),
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
logger.info(f"Start uploading process, file_name: {file_name}, file_size: {file_size}")
|
|
542
|
+
# Debug info
|
|
543
|
+
logging.debug(f"small upload request body dict: {files}")
|
|
544
|
+
resp = self._request("POST", "/drive/v1/files/upload_all", files=files)
|
|
545
|
+
if resp["code"] == 0:
|
|
546
|
+
# success
|
|
547
|
+
file_token = resp["data"]["file_token"]
|
|
548
|
+
file_shared_link = file_upload_info["share_link"].format(file_token=file_token)
|
|
549
|
+
logger.info(f"upload succeeded, file token: {file_token}")
|
|
550
|
+
logger.info(f"share link: {file_shared_link}")
|
|
551
|
+
return file_shared_link
|
|
552
|
+
else:
|
|
553
|
+
raise FeiShuUploadException(f"upload small file failed, unknown error, response json: {resp}")
|
|
554
|
+
|
|
555
|
+
def _upload_large_file(self, file_upload_info: dict):
|
|
556
|
+
file_upload_info = self._upload_large_file_prepare(file_upload_info)
|
|
557
|
+
file_upload_info = self._upload_large_file_multipart(file_upload_info)
|
|
558
|
+
return self._upload_large_file_finish(file_upload_info)
|
|
559
|
+
|
|
560
|
+
def _upload_large_file_prepare(self, file_upload_info):
|
|
561
|
+
file_path = file_upload_info["file_path"]
|
|
562
|
+
parent_node = file_upload_info["parent_node"]
|
|
563
|
+
file_size = os.path.getsize(file_path)
|
|
564
|
+
file_name = os.path.basename(file_path)
|
|
565
|
+
body = {"file_name": file_name, "parent_type": "explorer", "parent_node": parent_node, "size": file_size}
|
|
566
|
+
resp = self._request("POST", "/drive/v1/files/upload_prepare", json=body)
|
|
567
|
+
if resp["code"] == 0:
|
|
568
|
+
# success
|
|
569
|
+
file_upload_info["prepare_resp"] = resp
|
|
570
|
+
return file_upload_info
|
|
571
|
+
else:
|
|
572
|
+
raise FeiShuUploadException(f"upload_large_file_prepare failed, unknown error, response json: {resp}")
|
|
573
|
+
|
|
574
|
+
def _upload_large_file_multipart(self, file_upload_info: dict) -> dict:
|
|
575
|
+
file_path = file_upload_info["file_path"]
|
|
576
|
+
prepare_resp = file_upload_info["prepare_resp"]
|
|
577
|
+
file_size = os.path.getsize(file_path)
|
|
578
|
+
file_name = os.path.basename(file_path)
|
|
579
|
+
upload_id_str = prepare_resp["data"]["upload_id"]
|
|
580
|
+
block_size = prepare_resp["data"]["block_size"]
|
|
581
|
+
block_num = prepare_resp["data"]["block_num"]
|
|
582
|
+
current_block_num = 0
|
|
583
|
+
logger.info(
|
|
584
|
+
f"Start uploading large file, file_size: {file_size} ,block_size: {block_size}, "
|
|
585
|
+
f"block_num in total: {block_num}"
|
|
586
|
+
)
|
|
587
|
+
|
|
588
|
+
with open(file_path, "rb") as f:
|
|
589
|
+
while current_block_num < block_num:
|
|
590
|
+
binary_data = f.read(block_size)
|
|
591
|
+
files = {
|
|
592
|
+
"file": (file_name, binary_data, "application/octet-stream"),
|
|
593
|
+
"upload_id": (None, upload_id_str),
|
|
594
|
+
"seq": (None, f"{current_block_num}"),
|
|
595
|
+
"size": (None, len(binary_data)),
|
|
596
|
+
}
|
|
597
|
+
resp = self._request("POST", "/drive/v1/files/upload_part", files=files)
|
|
598
|
+
if resp["code"] == 0:
|
|
599
|
+
logger.info(
|
|
600
|
+
f"upload_large_file_multipart: uploading block {current_block_num + 1} succeeded,"
|
|
601
|
+
f" progress: "
|
|
602
|
+
f"{current_block_num + 1}/{block_num}, %{((current_block_num + 1) / block_num) * 100}"
|
|
603
|
+
)
|
|
604
|
+
else:
|
|
605
|
+
raise FeiShuUploadException(
|
|
606
|
+
f"upload_large_file_prepare failed, unknown error, response json: {resp}"
|
|
607
|
+
)
|
|
608
|
+
current_block_num += 1
|
|
609
|
+
|
|
610
|
+
file_upload_info["upload_id"] = upload_id_str
|
|
611
|
+
file_upload_info["block_num"] = block_num
|
|
612
|
+
return file_upload_info
|
|
613
|
+
|
|
614
|
+
def _upload_large_file_finish(self, file_upload_info: dict):
|
|
615
|
+
body = {"upload_id": file_upload_info["upload_id"], "block_num": file_upload_info["block_num"]}
|
|
616
|
+
resp = self._request("POST", "/drive/v1/files/upload_finish", json=body)
|
|
617
|
+
if resp["code"] == 0:
|
|
618
|
+
file_token = resp["data"]["file_token"]
|
|
619
|
+
file_shared_link = file_upload_info["share_link"].format(file_token=file_token)
|
|
620
|
+
logger.info(f"upload large file succeeded, file token: {file_token}")
|
|
621
|
+
logger.info(f"share link: {file_shared_link}")
|
|
622
|
+
return file_shared_link
|
|
623
|
+
else:
|
|
624
|
+
raise FeiShuUploadException(f"upload_large_file_finish failed, unknown error, response json: {resp}")
|
|
625
|
+
|
|
626
|
+
def read_feishusheet(
|
|
627
|
+
self, file_token: str, sheet: str, use_raw=False, use_filter=False, **kwargs
|
|
628
|
+
) -> Union[pandas.DataFrame, List[List]]:
|
|
629
|
+
"""
|
|
630
|
+
Read the content of a sheet in a Feishu electronic document (not excel or csv).
|
|
631
|
+
Example url: https://yimiandata.feishu.cn/sheets/shtcncglGS4VMi9CcU2GNaNhaVc?sheet=ep8dvw
|
|
632
|
+
@param file_token: The file token of the Feishu electronic document, `htcncglGS4VMi9CcU2GNaNhaVc`
|
|
633
|
+
@param sheet: The token of a sheet in the electronic document, `ep8dvw`
|
|
634
|
+
@param use_raw: default value is False; True returns a list, False returns a pandas.DataFrame
|
|
635
|
+
@param use_filter: default value is False; True only reads the cells within the filter range, False reads all cells
|
|
636
|
+
@param kwargs: extra key-value pairs passed to `pandas.DataFrame()`
|
|
637
|
+
@return: pandas.DataFrame
|
|
638
|
+
"""
|
|
639
|
+
file_token = self.get_real_file_token(file_token)
|
|
640
|
+
if use_filter:
|
|
641
|
+
sheet = self.get_sheet_filter_range(file_token, sheet)
|
|
642
|
+
path = f"/sheets/v2/spreadsheets/{file_token}/values/{sheet}"
|
|
643
|
+
params = {"valueRenderOption": "ToString", "dateTimeRenderOption": "FormattedString"}
|
|
644
|
+
resp = self._request("GET", path, params=params)
|
|
645
|
+
if resp["code"] == 0:
|
|
646
|
+
data_dict = resp["data"]["valueRange"]["values"]
|
|
647
|
+
if use_raw:
|
|
648
|
+
return data_dict
|
|
649
|
+
if len(data_dict) == 0:
|
|
650
|
+
return pandas.DataFrame()
|
|
651
|
+
column_names = data_dict[0]
|
|
652
|
+
data_rows = data_dict[1:]
|
|
653
|
+
logger.info(f"Sheet header: {column_names}")
|
|
654
|
+
logger.info(f"{len(data_rows)} rows are downloaded")
|
|
655
|
+
return pandas.DataFrame(data_rows, columns=column_names, **kwargs)
|
|
656
|
+
else:
|
|
657
|
+
raise FeiShuReadSheetException(f"read_feishusheet: Unexpected error. response json: {resp}")
|
|
658
|
+
|
|
659
|
+
def get_sheet_filter_range(self, file_token: str, sheet: str) -> str:
|
|
660
|
+
file_token = self.get_real_file_token(file_token)
|
|
661
|
+
path = f"/sheets/v3/spreadsheets/{file_token}/sheets/{sheet}/filter"
|
|
662
|
+
resp = self._request("GET", path)
|
|
663
|
+
if resp["code"] == 0:
|
|
664
|
+
data = resp["data"]
|
|
665
|
+
if not data.get("sheet_filter_info"):
|
|
666
|
+
raise ValueError("use_filter=True requires a filtered cell range in the feishu sheet.")
|
|
667
|
+
cell_range = data["sheet_filter_info"]["range"]
|
|
668
|
+
return cell_range
|
|
669
|
+
else:
|
|
670
|
+
raise FeiShuReadSheetException(f"get_sheet_filter_range: Unexpected error. response json: {resp}")
|
|
671
|
+
|
|
672
|
+
def get_spreadsheets_metainfo(self, file_token: str) -> dict:
|
|
673
|
+
"""
|
|
674
|
+
Get the metadata of a Feishu spreadsheet based on the file_token
|
|
675
|
+
"""
|
|
676
|
+
file_token = self.get_real_file_token(file_token)
|
|
677
|
+
path = f"/sheets/v2/spreadsheets/{file_token}/metainfo"
|
|
678
|
+
resp = self._request("GET", path)
|
|
679
|
+
if resp["code"] == 0:
|
|
680
|
+
return resp["data"]
|
|
681
|
+
else:
|
|
682
|
+
raise FeiShuReadSheetException(f"get_spreadsheets_metainfo: Unexpected error. response json: {resp}")
|
|
683
|
+
|
|
684
|
+
def get_sheet_metainfo(self, file_token: str, sheet: str) -> dict:
|
|
685
|
+
"""
|
|
686
|
+
Get the metadata of a single sheet in a Feishu spreadsheet based on the file_token and sheet
|
|
687
|
+
"""
|
|
688
|
+
file_token = self.get_real_file_token(file_token)
|
|
689
|
+
spreadsheets_metainfo = self.get_spreadsheets_metainfo(file_token)
|
|
690
|
+
for sheet_metainfo in spreadsheets_metainfo["sheets"]:
|
|
691
|
+
if sheet == sheet_metainfo["sheetId"]:
|
|
692
|
+
return sheet_metainfo
|
|
693
|
+
raise FeiShuReadSheetException(f"get_sheet_metainfo: sheetId={sheet} does not exist.")
|
|
694
|
+
|
|
695
|
+
def get_sheet_ids(self, file_token):
|
|
696
|
+
"""Get the sheet ids on a Feishu document
|
|
697
|
+
Args:
|
|
698
|
+
file_token (str): Feishu file token, the characters after https://yimiandata.feishu.cn/sheets/ are the token
|
|
699
|
+
Returns:
|
|
700
|
+
DataFrame: sheet data
|
|
701
|
+
"""
|
|
702
|
+
file_token = self.get_real_file_token(file_token)
|
|
703
|
+
# 1. Get the metadata of the sheet, and get the sheet_id of the corresponding sheet_name
|
|
704
|
+
path = f"/sheets/v2/spreadsheets/{file_token}/metainfo"
|
|
705
|
+
resp = self._request("GET", path)
|
|
706
|
+
sheet_ids = []
|
|
707
|
+
sheet_names = []
|
|
708
|
+
for sheet in resp["data"]["sheets"]:
|
|
709
|
+
sheet_ids.append(sheet["sheetId"])
|
|
710
|
+
sheet_names.append(sheet["title"])
|
|
711
|
+
return sheet_ids, sheet_names
|
|
712
|
+
|
|
713
|
+
def read_feishusheets(self, file_token: str, **kwargs) -> pandas.DataFrame:
|
|
714
|
+
"""
|
|
715
|
+
Read the content of multiple sheets in a Feishu electronic document (not excel or csv).
|
|
716
|
+
Example url: https://yimiandata.feishu.cn/sheets/shtcncglGS4VMi9CcU2GNaNhaVc?sheet=ep8dvw
|
|
717
|
+
@param file_token: The file token of the Feishu electronic document, `htcncglGS4VMi9CcU2GNaNhaVc`
|
|
718
|
+
@param sheet: The token of a sheet in the electronic document, `ep8dvw`
|
|
719
|
+
@param kwargs: extra key-value pairs passed to `pandas.DataFrame()`
|
|
720
|
+
@return: pandas.DataFrame
|
|
721
|
+
"""
|
|
722
|
+
file_token = self.get_real_file_token(file_token)
|
|
723
|
+
sheet_ids, sheet_names = self.get_sheet_ids(file_token)
|
|
724
|
+
df_new = []
|
|
725
|
+
for index, sheet in enumerate(sheet_ids):
|
|
726
|
+
_df = self.read_feishusheet(file_token, sheet, **kwargs)
|
|
727
|
+
df_new.append(_df)
|
|
728
|
+
return pandas.concat(df_new)
|
|
729
|
+
|
|
730
|
+
def read_feishuexcel(self, file_token: str, is_excel: bool = True, **kwargs) -> pandas.DataFrame:
|
|
731
|
+
"""
|
|
732
|
+
Read the Excel or CSV file uploaded to Feishu, the BOT needs to have read permission for this document.
|
|
733
|
+
Example Excel url: https://yimiandata.feishu.cn/file/boxcnJo72CHvjRdD2uTC3dvN5Oc
|
|
734
|
+
Example Csv url: https://yimiandata.feishu.cn/file/boxcnZDcu7NSjfcHA7ioZwEA6Ye
|
|
735
|
+
@param file_token: The token of the document, `boxcnJo72CHvjRdD2uTC3dvN5Oc`
|
|
736
|
+
@param is_excel: default read Excel, if `False`, call `pands.read_csv()`
|
|
737
|
+
@param kwargs: extra key-value pairs passed to `pandas.read_excel()` or `pandas.read_csv()`
|
|
738
|
+
@return: pandas.DataFrame
|
|
739
|
+
"""
|
|
740
|
+
url = f"https://open.feishu.cn/open-apis/drive/v1/files/{file_token}/download"
|
|
741
|
+
resp = self.session.get(url)
|
|
742
|
+
if resp.status_code == 200:
|
|
743
|
+
# Determine whether it is CSV or Excel through Content-Type, no need to specify, is_excel parameter does not take effect
|
|
744
|
+
logger.info(resp.headers)
|
|
745
|
+
content_type = resp.headers.get("Content-Type", "").lower()
|
|
746
|
+
if "csv" in content_type:
|
|
747
|
+
return pandas.read_csv(BytesIO(resp.content), **kwargs)
|
|
748
|
+
return pandas.read_excel(resp.content, **kwargs)
|
|
749
|
+
|
|
750
|
+
elif resp.status_code == 404:
|
|
751
|
+
raise FeiShuReadExcelException("file not found")
|
|
752
|
+
elif resp.status_code == 403:
|
|
753
|
+
raise FeiShuReadExcelException("Bot has no access to this file")
|
|
754
|
+
elif resp.status_code == 400:
|
|
755
|
+
response = resp.json()
|
|
756
|
+
raise FeiShuReadExcelException(f"read_feishuexcel: Unexpected error. response json:{response}")
|
|
757
|
+
else:
|
|
758
|
+
raise FeiShuReadExcelException(
|
|
759
|
+
"read_feishuexcel: Unexpected error. " f"response text:{resp.text}, status_code: {resp.status_code}"
|
|
760
|
+
)
|
|
761
|
+
|
|
762
|
+
def create_folder(self, title, parent_node):
|
|
763
|
+
folder_token_lst = self.get_children_token(children_type="folder", children_name=title, parent_node=parent_node)
|
|
764
|
+
if folder_token_lst:
|
|
765
|
+
logging.warning(f"Folder {title} already exists in parent_node={parent_node}")
|
|
766
|
+
return
|
|
767
|
+
|
|
768
|
+
path = f"/drive/explorer/v2/folder/{parent_node}"
|
|
769
|
+
resp = self._request("POST", path, json={"title": title})
|
|
770
|
+
if resp["code"] == 0:
|
|
771
|
+
url = resp["data"]["url"]
|
|
772
|
+
logger.info(f"Folder {title} url: {url}")
|
|
773
|
+
return url
|
|
774
|
+
else:
|
|
775
|
+
raise FeiShuCreateFolderException(f"create_folder: Unexpected error. response json: {resp}")
|
|
776
|
+
|
|
777
|
+
def delete_file_by_title(self, title, parent_node, keep_lst=[]):
|
|
778
|
+
file_token_lst = self.get_children_token(children_type="file", children_name=title, parent_node=parent_node)
|
|
779
|
+
if not file_token_lst:
|
|
780
|
+
logging.warning(f"File {title} does not exist in parent_node={parent_node}")
|
|
781
|
+
return
|
|
782
|
+
|
|
783
|
+
for file_token in file_token_lst:
|
|
784
|
+
if file_token in keep_lst:
|
|
785
|
+
continue
|
|
786
|
+
self.delete_file_by_token(file_token)
|
|
787
|
+
|
|
788
|
+
def delete_file_by_token(self, file_token):
|
|
789
|
+
# Only able to delete files where FeishuBot is the owner
|
|
790
|
+
path = f"/drive/v1/files/{file_token}"
|
|
791
|
+
resp = self._request("DELETE", path, params={"type": "file"})
|
|
792
|
+
if resp["code"] == 0:
|
|
793
|
+
logger.info(f"file_token={file_token} has been deleted")
|
|
794
|
+
else:
|
|
795
|
+
raise FeiShuDeleteFileException(f"delete_file: Unexpected error. response json: {resp}")
|
|
796
|
+
|
|
797
|
+
def list_children(self, parent_node):
|
|
798
|
+
path = f"/drive/explorer/v2/folder/{parent_node}/children"
|
|
799
|
+
resp = self._request("GET", path)
|
|
800
|
+
if resp["code"] == 0:
|
|
801
|
+
children_lst = [v for k, v in resp["data"]["children"].items()]
|
|
802
|
+
return children_lst
|
|
803
|
+
else:
|
|
804
|
+
raise FeiShuListChildrenException(f"list_children: Unexpected error. response json: {resp}")
|
|
805
|
+
|
|
806
|
+
def get_children_token(self, children_type, children_name, parent_node):
|
|
807
|
+
# children_type in ('doc', 'sheet', 'file', 'bitable', 'folder')
|
|
808
|
+
token_lst = []
|
|
809
|
+
children_lst = self.list_children(parent_node)
|
|
810
|
+
if not children_lst:
|
|
811
|
+
return token_lst
|
|
812
|
+
for c in children_lst:
|
|
813
|
+
if c["type"] == children_type and c["name"] == children_name:
|
|
814
|
+
token_lst.append(c["token"])
|
|
815
|
+
return token_lst
|
|
816
|
+
|
|
817
|
+
def _get_bitable_tables(self, file_token: str) -> Dict[str, str]:
|
|
818
|
+
"""
|
|
819
|
+
Get multi-dimensional table-data table list
|
|
820
|
+
:param file_token: The token of the multi-dimensional table, which is the app_token in the Feishu development document
|
|
821
|
+
:return: Dict<table_name,table_id>
|
|
822
|
+
"""
|
|
823
|
+
path = f"/bitable/v1/apps/{file_token}/tables"
|
|
824
|
+
gen = self._iter_pages(path, page_size=10)
|
|
825
|
+
table_dict = {}
|
|
826
|
+
for item in gen:
|
|
827
|
+
table_dict[item["name"]] = item["table_id"]
|
|
828
|
+
return table_dict
|
|
829
|
+
|
|
830
|
+
def _get_bitable_table_fields(self, file_token: str, table_id: str) -> List:
|
|
831
|
+
"""
|
|
832
|
+
Get multi-dimensional table-data table fields
|
|
833
|
+
Field description check `https://open.feishu.cn/document/uAjLw4CM/ukTMukTMukTM/reference/bitable-v1/app-table-field/guide`
|
|
834
|
+
:param file_token: The token of the multi-dimensional table, which is the app_token in the Feishu development document
|
|
835
|
+
:param table_id: Data table id
|
|
836
|
+
:return: List[(field_id, field_name, multi-dimensional table field type, field property varies with field type)]
|
|
837
|
+
"""
|
|
838
|
+
path = f"/bitable/v1/apps/{file_token}/tables/{table_id}/fields"
|
|
839
|
+
fields = []
|
|
840
|
+
gen = self._iter_pages(path, page_size=10)
|
|
841
|
+
for item in gen:
|
|
842
|
+
fields.append(Field(item["field_id"], item["field_name"], item["type"], item["property"]))
|
|
843
|
+
return fields
|
|
844
|
+
|
|
845
|
+
def _get_bitable_table_records(self, file_token: str, table_id: str, fields: List, **kwargs) -> pandas.DataFrame:
|
|
846
|
+
"""
|
|
847
|
+
Get multi-dimensional table-data table records
|
|
848
|
+
Field type (details refer to `https://open.feishu.cn/document/uAjLw4CM/ukTMukTMukTM/reference/bitable-v1/app-table-field/guide`)
|
|
849
|
+
1: Multi-line text
|
|
850
|
+
2: Number
|
|
851
|
+
3: Single selection
|
|
852
|
+
4: Multiple selection
|
|
853
|
+
5: Date
|
|
854
|
+
7: Checkbox
|
|
855
|
+
11: Personnel
|
|
856
|
+
15: Hyperlink
|
|
857
|
+
17: Attachment
|
|
858
|
+
18: One-way association
|
|
859
|
+
19: Find reference
|
|
860
|
+
20: Formula
|
|
861
|
+
21: Two-way association
|
|
862
|
+
1001: Creation time
|
|
863
|
+
1002: Last update time
|
|
864
|
+
1003: Creator
|
|
865
|
+
1004: Modifier
|
|
866
|
+
1005: Automatic number
|
|
867
|
+
:param file_token: The token of the multi-dimensional table, which is the app_token in the Feishu development document
|
|
868
|
+
:param table_id: Data table id
|
|
869
|
+
:param fields: Header field list
|
|
870
|
+
:param pk: Primary key column name
|
|
871
|
+
:param kwargs: Parameters passed to pandas
|
|
872
|
+
:return: Data table record df
|
|
873
|
+
"""
|
|
874
|
+
path = f"/bitable/v1/apps/{file_token}/tables/{table_id}/records"
|
|
875
|
+
params = {
|
|
876
|
+
"display_formula_ref": True, # Control whether to display the complete original result of the formula and find reference, please refer to the Feishu development document for specific instructions
|
|
877
|
+
"automatic_fields": True, # Control whether to return automatically calculated fields, please refer to the Feishu development document for specific instructions
|
|
878
|
+
}
|
|
879
|
+
records = []
|
|
880
|
+
gen = self._iter_pages(path, params=params, page_size=100)
|
|
881
|
+
for item in gen:
|
|
882
|
+
record = item.get("fields", {})
|
|
883
|
+
bitable_raw_data = json.dumps(record, ensure_ascii=False)
|
|
884
|
+
new_record = []
|
|
885
|
+
for field in fields:
|
|
886
|
+
field_name = field.field_name
|
|
887
|
+
if record.get(field_name) is None:
|
|
888
|
+
value = None
|
|
889
|
+
elif field.type in (1, 2, 3, 7, 20, 1005):
|
|
890
|
+
value = record[field_name]
|
|
891
|
+
elif field.type in (4,):
|
|
892
|
+
value = str(record[field_name])
|
|
893
|
+
elif field.type in (11,):
|
|
894
|
+
value = str([x["name"] for x in record[field_name]])
|
|
895
|
+
elif field.type in (5, 1001, 1002):
|
|
896
|
+
value = datetime.datetime.fromtimestamp(record[field_name] / 1000)
|
|
897
|
+
elif field.type in (17,):
|
|
898
|
+
value = str([x["url"] for x in record[field_name]])
|
|
899
|
+
elif field.type in (15,):
|
|
900
|
+
value = record[field_name].get("link")
|
|
901
|
+
elif field.type in (19,):
|
|
902
|
+
# todo: It seems that the find reference type supports multiple types of data, but currently only text type data is used in the demand, and the format of other types of data returned cannot be determined temporarily
|
|
903
|
+
value = str([x.get("text") for x in record[field_name].get("value", [])])
|
|
904
|
+
elif field.type in (1003, 1004):
|
|
905
|
+
value = record.get(field.field_name, {}).get("name")
|
|
906
|
+
else:
|
|
907
|
+
value = record[field_name]
|
|
908
|
+
new_record.append(value)
|
|
909
|
+
new_record.append(bitable_raw_data)
|
|
910
|
+
records.append(new_record)
|
|
911
|
+
|
|
912
|
+
col_names = [field.field_name for field in fields]
|
|
913
|
+
col_names.append("bitable_raw_data")
|
|
914
|
+
df = pandas.DataFrame(records, columns=col_names, **kwargs)
|
|
915
|
+
# Filter out the automatic number field columns
|
|
916
|
+
df.drop(labels=[field.field_name for field in fields if field.type == 1005], axis=1, inplace=True)
|
|
917
|
+
return df
|
|
918
|
+
|
|
919
|
+
def read_bitable(self, file_token: str, table_names: List[str] = None, **kwargs) -> Dict[str, pandas.DataFrame]:
|
|
920
|
+
"""
|
|
921
|
+
Read the data of the Feishu multi-dimensional table, divided into 3 steps:
|
|
922
|
+
1. Get the data table list of the multi-dimensional table
|
|
923
|
+
2. Get the header field information according to the data table
|
|
924
|
+
3. Get the data table records, and parse the data according to the different types of header fields
|
|
925
|
+
:param table_names: List of data tables to be read, read all data tables by default
|
|
926
|
+
:param file_token: The token of the multi-dimensional table, which is the app_token in the Feishu development document
|
|
927
|
+
:param kwargs: Parameters passed to pandas
|
|
928
|
+
:return: Map<Data table name, Data table DF>
|
|
929
|
+
"""
|
|
930
|
+
file_token = self.get_real_file_token(file_token)
|
|
931
|
+
table_dict = self._get_bitable_tables(file_token)
|
|
932
|
+
table_df_dict = {}
|
|
933
|
+
if table_names is None:
|
|
934
|
+
table_names = table_dict.keys()
|
|
935
|
+
|
|
936
|
+
# First check the table name
|
|
937
|
+
for table_name in table_names:
|
|
938
|
+
if table_dict.get(table_name) is None:
|
|
939
|
+
raise ArgumentException(f"read_bitable: Wrong table name error: {table_name}")
|
|
940
|
+
|
|
941
|
+
for table_name in table_names:
|
|
942
|
+
table_id = table_dict[table_name]
|
|
943
|
+
fields = self._get_bitable_table_fields(file_token, table_id)
|
|
944
|
+
table_df = self._get_bitable_table_records(file_token, table_id, fields, **kwargs)
|
|
945
|
+
table_df_dict[table_name] = table_df
|
|
946
|
+
|
|
947
|
+
return table_df_dict
|
|
948
|
+
|
|
949
|
+
def get_real_file_token(self, file_token) -> str:
|
|
950
|
+
"""Currently supports 3 types of file_token, the first 5 characters are fixed values
|
|
951
|
+
Knowledge base - wikcn; Multi-dimensional table - bascn; Spreadsheet - shtcn; File - boxcn; New document - doxcn;
|
|
952
|
+
Mind map - bmncn; doccn_- Document
|
|
953
|
+
"""
|
|
954
|
+
# If it is a wiki, you need to request the file_token of the actual object once
|
|
955
|
+
try:
|
|
956
|
+
_, file_token = self.get_wiki_type_token(wiki_token=file_token)
|
|
957
|
+
return file_token
|
|
958
|
+
except Exception:
|
|
959
|
+
return file_token
|
|
960
|
+
|
|
961
|
+
def get_wiki_type_token(self, wiki_token: str) -> Tuple[str, str]:
|
|
962
|
+
"""
|
|
963
|
+
Get the document type and token of a node in the Feishu knowledge base
|
|
964
|
+
@param wiki_token: The node token of the Feishu knowledge base, sheet: `wikcn1MAs8sOhEUF1LhiKPHRPZe`, multi-dimensional table: `wikcnBJcryeMkPQP4gxmR6YXP8g`
|
|
965
|
+
@return [obj_type, obj_token]
|
|
966
|
+
"""
|
|
967
|
+
resp = self._request("GET", "/wiki/v2/spaces/get_node", params={"token": wiki_token})
|
|
968
|
+
if resp["code"] == 0:
|
|
969
|
+
obj_type = resp["data"]["node"]["obj_type"]
|
|
970
|
+
obj_token = resp["data"]["node"]["obj_token"]
|
|
971
|
+
return obj_type, obj_token
|
|
972
|
+
else:
|
|
973
|
+
raise FeiShuReadWikiException(f"get_wiki_type_token: Unexpected error. response json: {resp}")
|
|
974
|
+
|
|
975
|
+
def get_document_metadata(self, file_tokens: List, with_url=False) -> dict:
|
|
976
|
+
"""
|
|
977
|
+
Get document metadata: `https://open.feishu.cn/document/uAjLw4CM/ukTMukTMukTM/reference/drive-v1/meta/batch_query`
|
|
978
|
+
Note!! If an error occurs, please re-upload the file to confirm that the token has a file type prefix
|
|
979
|
+
@param file_tokens: file token list ['boxcnlAUCgIesXRBgndlifeR7af', 'shtcnx8YVo30ML6G5GB6wjk6Pfh']
|
|
980
|
+
@param with_url: Return the url corresponding to the file token
|
|
981
|
+
"""
|
|
982
|
+
if not (1 <= len(file_tokens) <= 200):
|
|
983
|
+
raise ArgumentException("range must be between 1 and 200")
|
|
984
|
+
file_tokens = [self.get_real_file_token(x) for x in file_tokens]
|
|
985
|
+
request_docs = []
|
|
986
|
+
for token in file_tokens:
|
|
987
|
+
doc_type = None
|
|
988
|
+
for prefix, t in self.type_mapping.items():
|
|
989
|
+
if token.startswith(prefix):
|
|
990
|
+
doc_type = t
|
|
991
|
+
break
|
|
992
|
+
if not doc_type:
|
|
993
|
+
raise FeiShuException(f"Unsupported doc-type with token: {token}")
|
|
994
|
+
request_docs.append({"doc_token": token, "doc_type": doc_type})
|
|
995
|
+
|
|
996
|
+
path = "/drive/v1/metas/batch_query"
|
|
997
|
+
body = {"request_docs": request_docs, "with_url": with_url}
|
|
998
|
+
resp = self._request("POST", path, json=body)
|
|
999
|
+
if resp["code"] == 0:
|
|
1000
|
+
successe_list = resp["data"]["metas"]
|
|
1001
|
+
failed_list = []
|
|
1002
|
+
if resp["data"].get("failed_list"):
|
|
1003
|
+
reason_dict = {
|
|
1004
|
+
970002: "Unsupported doc-type",
|
|
1005
|
+
970003: "No permission to access met",
|
|
1006
|
+
970005: "Record not found (不存在或者已被删除)",
|
|
1007
|
+
}
|
|
1008
|
+
for item in resp["data"]["failed_list"]:
|
|
1009
|
+
failed_list.append(
|
|
1010
|
+
{"doc_token": item["token"], "reason": reason_dict.get(item["code"], "Unknown reason")}
|
|
1011
|
+
)
|
|
1012
|
+
return {"success": successe_list, "failed": failed_list}
|
|
1013
|
+
else:
|
|
1014
|
+
raise FeiShuException(f"get_document_metadata: Unexpected error. response json: {resp}")
|
|
1015
|
+
|
|
1016
|
+
def get_file_object(self, file_token: str, save_path: str = None):
|
|
1017
|
+
"""
|
|
1018
|
+
Get the file name and binary object: `https://open.feishu.cn/document/uAjLw4CM/ukTMukTMukTM/reference/drive-v1/file/download`
|
|
1019
|
+
Notes: `https://yimiandata.feishu.cn/wiki/wikcnObMT9VE0Rc8J4FpR7ncWJf#UOS8fZ`
|
|
1020
|
+
@param file_token: file token, such as 'shtcnx8YVo30ML6G5GB6wjk6Pfh'
|
|
1021
|
+
@param save_path: Save path, such as /tmp, optional
|
|
1022
|
+
return File name and file binary object
|
|
1023
|
+
"""
|
|
1024
|
+
file_token = self.get_real_file_token(file_token)
|
|
1025
|
+
url = f"https://open.feishu.cn/open-apis/drive/v1/files/{file_token}/download"
|
|
1026
|
+
resp = self.session.get(url)
|
|
1027
|
+
|
|
1028
|
+
if resp.status_code == 200:
|
|
1029
|
+
content_disposition = resp.headers.get("Content-Disposition")
|
|
1030
|
+
filename = unquote(re.findall("""\'\'(.*?)$""", content_disposition)[-1])
|
|
1031
|
+
file_object = BytesIO(resp.content)
|
|
1032
|
+
if save_path:
|
|
1033
|
+
save_file_path = os.path.join(save_path, file_token)
|
|
1034
|
+
if not os.path.exists(save_file_path):
|
|
1035
|
+
os.makedirs(save_file_path)
|
|
1036
|
+
logger.info("Create directory: " + save_file_path)
|
|
1037
|
+
full_path = os.path.join(save_file_path, filename)
|
|
1038
|
+
try:
|
|
1039
|
+
with open(full_path, "wb") as file:
|
|
1040
|
+
logger.info(f"Start writing file {full_path}")
|
|
1041
|
+
file.write(file_object.getvalue())
|
|
1042
|
+
logger.info(f"File {filename} successfully written to {full_path}")
|
|
1043
|
+
except Exception as e:
|
|
1044
|
+
raise Exception(f"Failed to write {full_path}: {e}")
|
|
1045
|
+
|
|
1046
|
+
return filename, file_object
|
|
1047
|
+
elif resp.status_code == 404:
|
|
1048
|
+
raise FeiShuException("file not found")
|
|
1049
|
+
elif resp.status_code == 403:
|
|
1050
|
+
raise FeiShuException("Bot has no access to this file")
|
|
1051
|
+
else:
|
|
1052
|
+
raise FeiShuException(
|
|
1053
|
+
f"get_file_object: Unexpected error. response text:{resp.text}, status_code: {resp.status_code}"
|
|
1054
|
+
)
|
|
1055
|
+
|
|
1056
|
+
def write_feishusheet(self, file_token: str, sheet: str, cell_range: str, values: List):
|
|
1057
|
+
"""
|
|
1058
|
+
Write a specific cell range of a sheet in the Feishu electronic document (not excel or csv)
|
|
1059
|
+
Example url: `https://yimiandata.feishu.cn/sheets/shtcncglGS4VMi9CcU2GNaNhaVc?sheet=ep8dvw`
|
|
1060
|
+
@param file_token: The file token of the Feishu electronic document, `htcncglGS4VMi9CcU2GNaNhaVc`
|
|
1061
|
+
@param sheet: The token of a sheet in the electronic document, `ep8dvw`
|
|
1062
|
+
@param cell_range: The cell range where the data needs to be written A1:B1
|
|
1063
|
+
"""
|
|
1064
|
+
file_token = self.get_real_file_token(file_token)
|
|
1065
|
+
path = f"/sheets/v2/spreadsheets/{file_token}/values"
|
|
1066
|
+
body = {"valueRange": {"range": f"{sheet}!{cell_range}", "values": values}}
|
|
1067
|
+
resp = self._request("PUT", path, json=body)
|
|
1068
|
+
if resp["code"] == 0:
|
|
1069
|
+
logger.info(f"write_sheet: {resp['data']}")
|
|
1070
|
+
else:
|
|
1071
|
+
raise FeiShuWriteSheetException(f"write_sheet: Unexpected error. response json: {resp}")
|
|
1072
|
+
|
|
1073
|
+
def clear_sheet_contents(self, file_token: str, sheet: str):
|
|
1074
|
+
file_token = self.get_real_file_token(file_token)
|
|
1075
|
+
sheet_metainfo = self.get_sheet_metainfo(file_token, sheet)
|
|
1076
|
+
max_row = sheet_metainfo["rowCount"]
|
|
1077
|
+
max_col = sheet_metainfo["columnCount"]
|
|
1078
|
+
max_col_letter = self._get_column_letter(max_col)
|
|
1079
|
+
cell_range = f"A1:{max_col_letter}{max_row}"
|
|
1080
|
+
|
|
1081
|
+
null_values = [[None] * max_col] * max_row
|
|
1082
|
+
self.write_feishusheet(file_token, sheet, cell_range, null_values)
|
|
1083
|
+
|
|
1084
|
+
@staticmethod
|
|
1085
|
+
def _get_column_letter(col_id):
|
|
1086
|
+
if not (1 <= col_id <= 16384):
|
|
1087
|
+
# excel maximum column number 16384
|
|
1088
|
+
raise ValueError("column_id should be between 1 and 16384")
|
|
1089
|
+
letters = []
|
|
1090
|
+
while col_id > 0:
|
|
1091
|
+
col_id, remainder = divmod(col_id, 26)
|
|
1092
|
+
if remainder == 0:
|
|
1093
|
+
remainder = 26
|
|
1094
|
+
col_id -= 1
|
|
1095
|
+
letters.append(chr(remainder + 64))
|
|
1096
|
+
return "".join(reversed(letters))
|
|
1097
|
+
|
|
1098
|
+
def create_sheet(self, file_token: str, sheet_name: str):
|
|
1099
|
+
"""
|
|
1100
|
+
Feishu spreadsheet, create new sheet
|
|
1101
|
+
@param file_token: The file token of the Feishu electronic document, `htcncglGS4VMi9CcU2GNaNhaVc`
|
|
1102
|
+
@param sheet_name: The name of the sheet to be created, `sheet1`
|
|
1103
|
+
"""
|
|
1104
|
+
meta_info = self.get_spreadsheets_metainfo(file_token)
|
|
1105
|
+
path = f"/sheets/v2/spreadsheets/{file_token}/sheets_batch_update"
|
|
1106
|
+
param = {
|
|
1107
|
+
"requests": [
|
|
1108
|
+
{
|
|
1109
|
+
"addSheet": {
|
|
1110
|
+
"properties": {"title": sheet_name, "index": meta_info.get("properties").get("sheetCount")}
|
|
1111
|
+
}
|
|
1112
|
+
}
|
|
1113
|
+
]
|
|
1114
|
+
}
|
|
1115
|
+
resp = self._request("POST", path, json=param)
|
|
1116
|
+
if resp["code"] == 0:
|
|
1117
|
+
return resp["data"]
|
|
1118
|
+
else:
|
|
1119
|
+
raise FeiShuException(f"code: {resp.get('code')}, msg: {resp.get('msg')}")
|
|
1120
|
+
|
|
1121
|
+
def get_employees(self, view="basic", status=None, user_id_type="open_id", user_ids=None):
|
|
1122
|
+
"""
|
|
1123
|
+
Get employee information
|
|
1124
|
+
https://open.feishu.cn/document/uAjLw4CM/ukTMukTMukTM/ehr/ehr-v1/employee/list
|
|
1125
|
+
:param view: basic: overview, only return id, name and other basic information; full: detail, return system standard fields and custom field collection, default is basic
|
|
1126
|
+
:param status: 1: To be hired, 2: In service, 3: Canceled entry, 4: To be resigned, 5: Resigned. Multiple states are separated by commas
|
|
1127
|
+
:param user_id_type: open_id/union_id/user_id, default open_id
|
|
1128
|
+
:param user_ids: User id corresponding to user_id_type
|
|
1129
|
+
:return: List of employee information
|
|
1130
|
+
"""
|
|
1131
|
+
path = "/ehr/v1/employees"
|
|
1132
|
+
params = {"view": view, "status": status, "user_id_type": user_id_type, "user_ids": user_ids}
|
|
1133
|
+
member_lst = self._iter_pages(path, params=params)
|
|
1134
|
+
result = [i["system_fields"] for i in member_lst]
|
|
1135
|
+
return result
|