recurvedata-lib 0.1.487__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of recurvedata-lib might be problematic. Click here for more details.
- recurvedata/__init__.py +0 -0
- recurvedata/__version__.py +1 -0
- recurvedata/client/__init__.py +3 -0
- recurvedata/client/client.py +150 -0
- recurvedata/client/server_client.py +91 -0
- recurvedata/config.py +99 -0
- recurvedata/connectors/__init__.py +20 -0
- recurvedata/connectors/_register.py +46 -0
- recurvedata/connectors/base.py +111 -0
- recurvedata/connectors/config_schema.py +1575 -0
- recurvedata/connectors/connectors/__init__.py +0 -0
- recurvedata/connectors/connectors/aliyun_access_key.py +30 -0
- recurvedata/connectors/connectors/auth.py +44 -0
- recurvedata/connectors/connectors/azure_blob.py +89 -0
- recurvedata/connectors/connectors/azure_synapse.py +79 -0
- recurvedata/connectors/connectors/bigquery.py +359 -0
- recurvedata/connectors/connectors/clickhouse.py +219 -0
- recurvedata/connectors/connectors/dingtalk.py +61 -0
- recurvedata/connectors/connectors/doris.py +215 -0
- recurvedata/connectors/connectors/es.py +62 -0
- recurvedata/connectors/connectors/feishu.py +65 -0
- recurvedata/connectors/connectors/ftp.py +50 -0
- recurvedata/connectors/connectors/generic.py +49 -0
- recurvedata/connectors/connectors/google_cloud_storage.py +115 -0
- recurvedata/connectors/connectors/google_service_account.py +225 -0
- recurvedata/connectors/connectors/hive.py +207 -0
- recurvedata/connectors/connectors/impala.py +210 -0
- recurvedata/connectors/connectors/jenkins.py +51 -0
- recurvedata/connectors/connectors/mail.py +89 -0
- recurvedata/connectors/connectors/microsoft_fabric.py +284 -0
- recurvedata/connectors/connectors/mongo.py +79 -0
- recurvedata/connectors/connectors/mssql.py +131 -0
- recurvedata/connectors/connectors/mysql.py +191 -0
- recurvedata/connectors/connectors/n8n.py +141 -0
- recurvedata/connectors/connectors/oss.py +74 -0
- recurvedata/connectors/connectors/owncloud.py +36 -0
- recurvedata/connectors/connectors/phoenix.py +36 -0
- recurvedata/connectors/connectors/postgres.py +230 -0
- recurvedata/connectors/connectors/python.py +50 -0
- recurvedata/connectors/connectors/redshift.py +187 -0
- recurvedata/connectors/connectors/s3.py +93 -0
- recurvedata/connectors/connectors/sftp.py +87 -0
- recurvedata/connectors/connectors/slack.py +35 -0
- recurvedata/connectors/connectors/spark.py +99 -0
- recurvedata/connectors/connectors/starrocks.py +175 -0
- recurvedata/connectors/connectors/tencent_cos.py +40 -0
- recurvedata/connectors/connectors/tidb.py +49 -0
- recurvedata/connectors/const.py +315 -0
- recurvedata/connectors/datasource.py +189 -0
- recurvedata/connectors/dbapi.py +469 -0
- recurvedata/connectors/fs.py +66 -0
- recurvedata/connectors/ftp.py +40 -0
- recurvedata/connectors/object_store.py +60 -0
- recurvedata/connectors/pigeon.py +172 -0
- recurvedata/connectors/proxy.py +104 -0
- recurvedata/connectors/service.py +223 -0
- recurvedata/connectors/utils.py +47 -0
- recurvedata/consts.py +49 -0
- recurvedata/core/__init__.py +0 -0
- recurvedata/core/config.py +46 -0
- recurvedata/core/configurable.py +27 -0
- recurvedata/core/consts.py +2 -0
- recurvedata/core/templating.py +206 -0
- recurvedata/core/tracing.py +223 -0
- recurvedata/core/transformer.py +186 -0
- recurvedata/core/translation.py +91 -0
- recurvedata/dbt/client.py +97 -0
- recurvedata/dbt/consts.py +99 -0
- recurvedata/dbt/cosmos_utils.py +275 -0
- recurvedata/dbt/error_codes.py +18 -0
- recurvedata/dbt/schemas.py +98 -0
- recurvedata/dbt/service.py +451 -0
- recurvedata/dbt/utils.py +246 -0
- recurvedata/error_codes.py +71 -0
- recurvedata/exceptions.py +72 -0
- recurvedata/executors/__init__.py +4 -0
- recurvedata/executors/cli/__init__.py +7 -0
- recurvedata/executors/cli/connector.py +117 -0
- recurvedata/executors/cli/dbt.py +118 -0
- recurvedata/executors/cli/main.py +82 -0
- recurvedata/executors/cli/parameters.py +18 -0
- recurvedata/executors/client.py +190 -0
- recurvedata/executors/consts.py +50 -0
- recurvedata/executors/debug_executor.py +100 -0
- recurvedata/executors/executor.py +300 -0
- recurvedata/executors/link_executor.py +189 -0
- recurvedata/executors/models.py +34 -0
- recurvedata/executors/schemas.py +222 -0
- recurvedata/executors/service/__init__.py +0 -0
- recurvedata/executors/service/connector.py +380 -0
- recurvedata/executors/utils.py +172 -0
- recurvedata/filestorage/__init__.py +11 -0
- recurvedata/filestorage/_factory.py +33 -0
- recurvedata/filestorage/backends/__init__.py +0 -0
- recurvedata/filestorage/backends/fsspec.py +45 -0
- recurvedata/filestorage/backends/local.py +67 -0
- recurvedata/filestorage/backends/oss.py +56 -0
- recurvedata/filestorage/interface.py +84 -0
- recurvedata/operators/__init__.py +10 -0
- recurvedata/operators/base.py +28 -0
- recurvedata/operators/config.py +21 -0
- recurvedata/operators/context.py +255 -0
- recurvedata/operators/dbt_operator/__init__.py +2 -0
- recurvedata/operators/dbt_operator/model_pipeline_link_operator.py +55 -0
- recurvedata/operators/dbt_operator/operator.py +353 -0
- recurvedata/operators/link_operator/__init__.py +1 -0
- recurvedata/operators/link_operator/operator.py +120 -0
- recurvedata/operators/models.py +55 -0
- recurvedata/operators/notify_operator/__init__.py +1 -0
- recurvedata/operators/notify_operator/operator.py +180 -0
- recurvedata/operators/operator.py +119 -0
- recurvedata/operators/python_operator/__init__.py +1 -0
- recurvedata/operators/python_operator/operator.py +132 -0
- recurvedata/operators/sensor_operator/__init__.py +1 -0
- recurvedata/operators/sensor_operator/airflow_utils.py +63 -0
- recurvedata/operators/sensor_operator/operator.py +172 -0
- recurvedata/operators/spark_operator/__init__.py +1 -0
- recurvedata/operators/spark_operator/operator.py +200 -0
- recurvedata/operators/spark_operator/spark_sample.py +47 -0
- recurvedata/operators/sql_operator/__init__.py +1 -0
- recurvedata/operators/sql_operator/operator.py +90 -0
- recurvedata/operators/task.py +211 -0
- recurvedata/operators/transfer_operator/__init__.py +40 -0
- recurvedata/operators/transfer_operator/const.py +10 -0
- recurvedata/operators/transfer_operator/dump_aliyun_sls.py +82 -0
- recurvedata/operators/transfer_operator/dump_sheet_task_base.py +292 -0
- recurvedata/operators/transfer_operator/dump_task_cass.py +155 -0
- recurvedata/operators/transfer_operator/dump_task_dbapi.py +209 -0
- recurvedata/operators/transfer_operator/dump_task_es.py +113 -0
- recurvedata/operators/transfer_operator/dump_task_feishu_sheet.py +114 -0
- recurvedata/operators/transfer_operator/dump_task_ftp.py +234 -0
- recurvedata/operators/transfer_operator/dump_task_google_sheet.py +66 -0
- recurvedata/operators/transfer_operator/dump_task_mongodb.py +168 -0
- recurvedata/operators/transfer_operator/dump_task_oss.py +285 -0
- recurvedata/operators/transfer_operator/dump_task_python.py +212 -0
- recurvedata/operators/transfer_operator/dump_task_s3.py +270 -0
- recurvedata/operators/transfer_operator/dump_task_sftp.py +229 -0
- recurvedata/operators/transfer_operator/load_task_aliyun_oss.py +107 -0
- recurvedata/operators/transfer_operator/load_task_azure_blob.py +115 -0
- recurvedata/operators/transfer_operator/load_task_azure_synapse.py +90 -0
- recurvedata/operators/transfer_operator/load_task_clickhouse.py +167 -0
- recurvedata/operators/transfer_operator/load_task_doris.py +164 -0
- recurvedata/operators/transfer_operator/load_task_email.py +188 -0
- recurvedata/operators/transfer_operator/load_task_es.py +86 -0
- recurvedata/operators/transfer_operator/load_task_filebrowser.py +151 -0
- recurvedata/operators/transfer_operator/load_task_ftp.py +19 -0
- recurvedata/operators/transfer_operator/load_task_google_bigquery.py +90 -0
- recurvedata/operators/transfer_operator/load_task_google_cloud_storage.py +127 -0
- recurvedata/operators/transfer_operator/load_task_google_sheet.py +130 -0
- recurvedata/operators/transfer_operator/load_task_hive.py +158 -0
- recurvedata/operators/transfer_operator/load_task_microsoft_fabric.py +105 -0
- recurvedata/operators/transfer_operator/load_task_mssql.py +153 -0
- recurvedata/operators/transfer_operator/load_task_mysql.py +157 -0
- recurvedata/operators/transfer_operator/load_task_owncloud.py +135 -0
- recurvedata/operators/transfer_operator/load_task_postgresql.py +109 -0
- recurvedata/operators/transfer_operator/load_task_qcloud_cos.py +119 -0
- recurvedata/operators/transfer_operator/load_task_recurve_data_prep.py +75 -0
- recurvedata/operators/transfer_operator/load_task_redshift.py +95 -0
- recurvedata/operators/transfer_operator/load_task_s3.py +150 -0
- recurvedata/operators/transfer_operator/load_task_sftp.py +90 -0
- recurvedata/operators/transfer_operator/load_task_starrocks.py +169 -0
- recurvedata/operators/transfer_operator/load_task_yicrowds.py +97 -0
- recurvedata/operators/transfer_operator/mixin.py +31 -0
- recurvedata/operators/transfer_operator/operator.py +231 -0
- recurvedata/operators/transfer_operator/task.py +223 -0
- recurvedata/operators/transfer_operator/utils.py +134 -0
- recurvedata/operators/ui.py +80 -0
- recurvedata/operators/utils/__init__.py +51 -0
- recurvedata/operators/utils/file_factory.py +150 -0
- recurvedata/operators/utils/fs.py +10 -0
- recurvedata/operators/utils/lineage.py +265 -0
- recurvedata/operators/web_init.py +15 -0
- recurvedata/pigeon/connector/__init__.py +294 -0
- recurvedata/pigeon/connector/_registry.py +17 -0
- recurvedata/pigeon/connector/aliyun_oss.py +80 -0
- recurvedata/pigeon/connector/awss3.py +123 -0
- recurvedata/pigeon/connector/azure_blob.py +176 -0
- recurvedata/pigeon/connector/azure_synapse.py +51 -0
- recurvedata/pigeon/connector/cass.py +151 -0
- recurvedata/pigeon/connector/clickhouse.py +403 -0
- recurvedata/pigeon/connector/clickhouse_native.py +351 -0
- recurvedata/pigeon/connector/dbapi.py +571 -0
- recurvedata/pigeon/connector/doris.py +166 -0
- recurvedata/pigeon/connector/es.py +176 -0
- recurvedata/pigeon/connector/feishu.py +1135 -0
- recurvedata/pigeon/connector/ftp.py +163 -0
- recurvedata/pigeon/connector/google_bigquery.py +283 -0
- recurvedata/pigeon/connector/google_cloud_storage.py +130 -0
- recurvedata/pigeon/connector/hbase_phoenix.py +108 -0
- recurvedata/pigeon/connector/hdfs.py +204 -0
- recurvedata/pigeon/connector/hive_impala.py +383 -0
- recurvedata/pigeon/connector/microsoft_fabric.py +95 -0
- recurvedata/pigeon/connector/mongodb.py +56 -0
- recurvedata/pigeon/connector/mssql.py +467 -0
- recurvedata/pigeon/connector/mysql.py +175 -0
- recurvedata/pigeon/connector/owncloud.py +92 -0
- recurvedata/pigeon/connector/postgresql.py +267 -0
- recurvedata/pigeon/connector/power_bi.py +179 -0
- recurvedata/pigeon/connector/qcloud_cos.py +79 -0
- recurvedata/pigeon/connector/redshift.py +123 -0
- recurvedata/pigeon/connector/sftp.py +73 -0
- recurvedata/pigeon/connector/sqlite.py +42 -0
- recurvedata/pigeon/connector/starrocks.py +144 -0
- recurvedata/pigeon/connector/tableau.py +162 -0
- recurvedata/pigeon/const.py +21 -0
- recurvedata/pigeon/csv.py +172 -0
- recurvedata/pigeon/docs/datasources-example.json +82 -0
- recurvedata/pigeon/docs/images/pigeon_design.png +0 -0
- recurvedata/pigeon/docs/lightweight-data-sync-solution.md +111 -0
- recurvedata/pigeon/dumper/__init__.py +171 -0
- recurvedata/pigeon/dumper/aliyun_sls.py +415 -0
- recurvedata/pigeon/dumper/base.py +141 -0
- recurvedata/pigeon/dumper/cass.py +213 -0
- recurvedata/pigeon/dumper/dbapi.py +346 -0
- recurvedata/pigeon/dumper/es.py +112 -0
- recurvedata/pigeon/dumper/ftp.py +64 -0
- recurvedata/pigeon/dumper/mongodb.py +103 -0
- recurvedata/pigeon/handler/__init__.py +4 -0
- recurvedata/pigeon/handler/base.py +153 -0
- recurvedata/pigeon/handler/csv_handler.py +290 -0
- recurvedata/pigeon/loader/__init__.py +87 -0
- recurvedata/pigeon/loader/base.py +83 -0
- recurvedata/pigeon/loader/csv_to_azure_synapse.py +214 -0
- recurvedata/pigeon/loader/csv_to_clickhouse.py +152 -0
- recurvedata/pigeon/loader/csv_to_doris.py +215 -0
- recurvedata/pigeon/loader/csv_to_es.py +51 -0
- recurvedata/pigeon/loader/csv_to_google_bigquery.py +169 -0
- recurvedata/pigeon/loader/csv_to_hive.py +468 -0
- recurvedata/pigeon/loader/csv_to_microsoft_fabric.py +242 -0
- recurvedata/pigeon/loader/csv_to_mssql.py +174 -0
- recurvedata/pigeon/loader/csv_to_mysql.py +180 -0
- recurvedata/pigeon/loader/csv_to_postgresql.py +248 -0
- recurvedata/pigeon/loader/csv_to_redshift.py +240 -0
- recurvedata/pigeon/loader/csv_to_starrocks.py +233 -0
- recurvedata/pigeon/meta.py +116 -0
- recurvedata/pigeon/row_factory.py +42 -0
- recurvedata/pigeon/schema/__init__.py +124 -0
- recurvedata/pigeon/schema/types.py +13 -0
- recurvedata/pigeon/sync.py +283 -0
- recurvedata/pigeon/transformer.py +146 -0
- recurvedata/pigeon/utils/__init__.py +134 -0
- recurvedata/pigeon/utils/bloomfilter.py +181 -0
- recurvedata/pigeon/utils/date_time.py +323 -0
- recurvedata/pigeon/utils/escape.py +15 -0
- recurvedata/pigeon/utils/fs.py +266 -0
- recurvedata/pigeon/utils/json.py +44 -0
- recurvedata/pigeon/utils/keyed_tuple.py +85 -0
- recurvedata/pigeon/utils/mp.py +156 -0
- recurvedata/pigeon/utils/sql.py +328 -0
- recurvedata/pigeon/utils/timing.py +155 -0
- recurvedata/provider_manager.py +0 -0
- recurvedata/providers/__init__.py +0 -0
- recurvedata/providers/dbapi/__init__.py +0 -0
- recurvedata/providers/flywheel/__init__.py +0 -0
- recurvedata/providers/mysql/__init__.py +0 -0
- recurvedata/schedulers/__init__.py +1 -0
- recurvedata/schedulers/airflow.py +974 -0
- recurvedata/schedulers/airflow_db_process.py +331 -0
- recurvedata/schedulers/airflow_operators.py +61 -0
- recurvedata/schedulers/airflow_plugin.py +9 -0
- recurvedata/schedulers/airflow_trigger_dag_patch.py +117 -0
- recurvedata/schedulers/base.py +99 -0
- recurvedata/schedulers/cli.py +228 -0
- recurvedata/schedulers/client.py +56 -0
- recurvedata/schedulers/consts.py +52 -0
- recurvedata/schedulers/debug_celery.py +62 -0
- recurvedata/schedulers/model.py +63 -0
- recurvedata/schedulers/schemas.py +97 -0
- recurvedata/schedulers/service.py +20 -0
- recurvedata/schedulers/system_dags.py +59 -0
- recurvedata/schedulers/task_status.py +279 -0
- recurvedata/schedulers/utils.py +73 -0
- recurvedata/schema/__init__.py +0 -0
- recurvedata/schema/field.py +88 -0
- recurvedata/schema/schema.py +55 -0
- recurvedata/schema/types.py +17 -0
- recurvedata/schema.py +0 -0
- recurvedata/server/__init__.py +0 -0
- recurvedata/server/app.py +7 -0
- recurvedata/server/connector/__init__.py +0 -0
- recurvedata/server/connector/api.py +79 -0
- recurvedata/server/connector/schemas.py +28 -0
- recurvedata/server/data_service/__init__.py +0 -0
- recurvedata/server/data_service/api.py +126 -0
- recurvedata/server/data_service/client.py +18 -0
- recurvedata/server/data_service/consts.py +1 -0
- recurvedata/server/data_service/schemas.py +68 -0
- recurvedata/server/data_service/service.py +218 -0
- recurvedata/server/dbt/__init__.py +0 -0
- recurvedata/server/dbt/api.py +116 -0
- recurvedata/server/error_code.py +49 -0
- recurvedata/server/exceptions.py +19 -0
- recurvedata/server/executor/__init__.py +0 -0
- recurvedata/server/executor/api.py +37 -0
- recurvedata/server/executor/schemas.py +30 -0
- recurvedata/server/executor/service.py +220 -0
- recurvedata/server/main.py +32 -0
- recurvedata/server/schedulers/__init__.py +0 -0
- recurvedata/server/schedulers/api.py +252 -0
- recurvedata/server/schedulers/schemas.py +50 -0
- recurvedata/server/schemas.py +50 -0
- recurvedata/utils/__init__.py +15 -0
- recurvedata/utils/_typer.py +61 -0
- recurvedata/utils/attrdict.py +19 -0
- recurvedata/utils/command_helper.py +20 -0
- recurvedata/utils/compat.py +12 -0
- recurvedata/utils/compression.py +203 -0
- recurvedata/utils/crontab.py +42 -0
- recurvedata/utils/crypto_util.py +305 -0
- recurvedata/utils/dataclass.py +11 -0
- recurvedata/utils/date_time.py +464 -0
- recurvedata/utils/dispatch.py +114 -0
- recurvedata/utils/email_util.py +104 -0
- recurvedata/utils/files.py +386 -0
- recurvedata/utils/helpers.py +170 -0
- recurvedata/utils/httputil.py +117 -0
- recurvedata/utils/imports.py +132 -0
- recurvedata/utils/json.py +80 -0
- recurvedata/utils/log.py +117 -0
- recurvedata/utils/log_capture.py +153 -0
- recurvedata/utils/mp.py +178 -0
- recurvedata/utils/normalizer.py +102 -0
- recurvedata/utils/redis_lock.py +474 -0
- recurvedata/utils/registry.py +54 -0
- recurvedata/utils/shell.py +15 -0
- recurvedata/utils/singleton.py +33 -0
- recurvedata/utils/sql.py +6 -0
- recurvedata/utils/timeout.py +28 -0
- recurvedata/utils/tracing.py +14 -0
- recurvedata_lib-0.1.487.dist-info/METADATA +605 -0
- recurvedata_lib-0.1.487.dist-info/RECORD +333 -0
- recurvedata_lib-0.1.487.dist-info/WHEEL +5 -0
- recurvedata_lib-0.1.487.dist-info/entry_points.txt +6 -0
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import threading
|
|
3
|
+
|
|
4
|
+
_default_error_rate = 1 / 10**8 # 亿分之一
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class _BloomFilterInterface(object):
|
|
8
|
+
def __init__(self, filename, capacity=100, error_rate=_default_error_rate, auto_scale=True, reuse=True):
|
|
9
|
+
self.filename = filename
|
|
10
|
+
|
|
11
|
+
def __len__(self):
|
|
12
|
+
raise NotImplementedError
|
|
13
|
+
|
|
14
|
+
def __contains__(self, item):
|
|
15
|
+
raise NotImplementedError
|
|
16
|
+
|
|
17
|
+
def __str__(self):
|
|
18
|
+
raise NotImplementedError
|
|
19
|
+
|
|
20
|
+
def add(self, key):
|
|
21
|
+
raise NotImplementedError
|
|
22
|
+
|
|
23
|
+
def save(self):
|
|
24
|
+
raise NotImplementedError
|
|
25
|
+
|
|
26
|
+
def close(self):
|
|
27
|
+
pass
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class PyBloomFilter(_BloomFilterInterface):
|
|
31
|
+
def __init__(self, filename, capacity=100, error_rate=_default_error_rate, auto_scale=True, reuse=True):
|
|
32
|
+
import pybloom_live
|
|
33
|
+
|
|
34
|
+
super().__init__(filename, capacity, error_rate, auto_scale, reuse)
|
|
35
|
+
|
|
36
|
+
if auto_scale:
|
|
37
|
+
filter_cls = pybloom_live.ScalableBloomFilter
|
|
38
|
+
else:
|
|
39
|
+
filter_cls = pybloom_live.BloomFilter
|
|
40
|
+
|
|
41
|
+
if reuse and os.path.exists(filename):
|
|
42
|
+
with open(filename, "rb") as f:
|
|
43
|
+
self._bf = filter_cls.fromfile(f)
|
|
44
|
+
else:
|
|
45
|
+
self._bf = filter_cls(capacity, error_rate)
|
|
46
|
+
|
|
47
|
+
self._lock = threading.RLock()
|
|
48
|
+
|
|
49
|
+
def __getattr__(self, name):
|
|
50
|
+
with self._lock:
|
|
51
|
+
return getattr(self._bf, name)
|
|
52
|
+
|
|
53
|
+
def __len__(self):
|
|
54
|
+
with self._lock:
|
|
55
|
+
return len(self._bf)
|
|
56
|
+
|
|
57
|
+
def __str__(self):
|
|
58
|
+
return f"<{self.__class__.__name__} ({repr(self.filename)}, capacity={self.capacity}, count={self.count})>"
|
|
59
|
+
|
|
60
|
+
def __contains__(self, item):
|
|
61
|
+
with self._lock:
|
|
62
|
+
return item in self._bf
|
|
63
|
+
|
|
64
|
+
def add(self, key):
|
|
65
|
+
with self._lock:
|
|
66
|
+
return self._bf.add(key)
|
|
67
|
+
|
|
68
|
+
def save(self):
|
|
69
|
+
with self._lock:
|
|
70
|
+
with open(self.filename, "wb") as f:
|
|
71
|
+
self._bf.tofile(f)
|
|
72
|
+
|
|
73
|
+
def close(self):
|
|
74
|
+
self.save()
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
class CBloomFilter(_BloomFilterInterface):
|
|
78
|
+
def __init__(self, filename, capacity, error_rate=_default_error_rate, auto_scale=True, reuse=True):
|
|
79
|
+
import pybloomfilter
|
|
80
|
+
|
|
81
|
+
super().__init__(filename, capacity, error_rate, auto_scale, reuse)
|
|
82
|
+
|
|
83
|
+
# pybloomfilter 不支持自动 scale,必须给定 capacity
|
|
84
|
+
if reuse and os.path.exists(filename):
|
|
85
|
+
self._bf = pybloomfilter.BloomFilter.open(filename)
|
|
86
|
+
else:
|
|
87
|
+
self._bf = pybloomfilter.BloomFilter(capacity, error_rate, filename)
|
|
88
|
+
self._lock = threading.RLock()
|
|
89
|
+
|
|
90
|
+
@property
|
|
91
|
+
def count(self):
|
|
92
|
+
return len(self)
|
|
93
|
+
|
|
94
|
+
@property
|
|
95
|
+
def capacity(self):
|
|
96
|
+
return self._bf.capacity
|
|
97
|
+
|
|
98
|
+
def __contains__(self, item):
|
|
99
|
+
with self._lock:
|
|
100
|
+
return item in self._bf
|
|
101
|
+
|
|
102
|
+
def __len__(self):
|
|
103
|
+
with self._lock:
|
|
104
|
+
return len(self._bf)
|
|
105
|
+
|
|
106
|
+
def __str__(self):
|
|
107
|
+
return f"<{self.__class__.__name__} ({repr(self.filename)}, capacity={self.capacity}, count={self.count})>"
|
|
108
|
+
|
|
109
|
+
def add(self, key):
|
|
110
|
+
with self._lock:
|
|
111
|
+
return self._bf.add(key)
|
|
112
|
+
|
|
113
|
+
def save(self):
|
|
114
|
+
with self._lock:
|
|
115
|
+
self._bf.sync()
|
|
116
|
+
|
|
117
|
+
def close(self):
|
|
118
|
+
self._bf.close()
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
class DummyBloomFilter(_BloomFilterInterface):
|
|
122
|
+
def __len__(self):
|
|
123
|
+
return 0
|
|
124
|
+
|
|
125
|
+
def __contains__(self, item):
|
|
126
|
+
return False
|
|
127
|
+
|
|
128
|
+
def __str__(self):
|
|
129
|
+
return self.__class__.__name__
|
|
130
|
+
|
|
131
|
+
def add(self, key):
|
|
132
|
+
return False
|
|
133
|
+
|
|
134
|
+
def save(self):
|
|
135
|
+
pass
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
_engine_choices = {
|
|
139
|
+
"C": CBloomFilter,
|
|
140
|
+
"py": PyBloomFilter,
|
|
141
|
+
"dummy": DummyBloomFilter,
|
|
142
|
+
"None": DummyBloomFilter,
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def new_bloom_filter(filename, capacity, error_rate=_default_error_rate, auto_scale=True, reuse=True, engine="py"):
|
|
147
|
+
if engine is None:
|
|
148
|
+
engine = "None"
|
|
149
|
+
engine = engine or "py"
|
|
150
|
+
return _engine_choices[engine](filename, capacity, error_rate, auto_scale, reuse)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def __benchmark(filename, capacity, error_rate, engine):
|
|
154
|
+
import datetime
|
|
155
|
+
|
|
156
|
+
values = range(capacity)
|
|
157
|
+
f = new_bloom_filter(filename, capacity, error_rate, reuse=False, engine=engine)
|
|
158
|
+
st = datetime.datetime.now()
|
|
159
|
+
for i in values:
|
|
160
|
+
if i not in f:
|
|
161
|
+
f.add(i)
|
|
162
|
+
f.save()
|
|
163
|
+
duration = datetime.datetime.now() - st
|
|
164
|
+
qps = len(values) / duration.total_seconds()
|
|
165
|
+
print(engine, f, duration, qps)
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
if __name__ == "__main__":
|
|
169
|
+
# C <CBloomFilter ('/tmp/bloom_C', capacity=2000000, count=1999740)> 0:00:03.047118 656357.9093425327
|
|
170
|
+
# C <CBloomFilter ('/tmp/bloom_C', capacity=2000000, count=1999766)> 0:00:02.591953 771618.9298185576
|
|
171
|
+
# C <CBloomFilter ('/tmp/bloom_C', capacity=2000000, count=1999737)> 0:00:02.756158 725647.8039357685
|
|
172
|
+
# py <PyBloomFilter ('/tmp/bloom_py', capacity=2000000, count=1999770)> 0:00:33.104276 60415.15603603595
|
|
173
|
+
# py <PyBloomFilter ('/tmp/bloom_py', capacity=2000000, count=1999770)> 0:00:33.406134 59869.244372904686
|
|
174
|
+
# py <PyBloomFilter ('/tmp/bloom_py', capacity=2000000, count=1999770)> 0:00:34.070549 58701.725058789045
|
|
175
|
+
# None DummyBloomFilter 0:00:00.431990 4629736.799462951
|
|
176
|
+
# None DummyBloomFilter 0:00:00.432359 4625785.51620297
|
|
177
|
+
# None DummyBloomFilter 0:00:00.432572 4623507.762869534
|
|
178
|
+
for x in ["C", "py", "None"]:
|
|
179
|
+
fn = f"/tmp/bloom_{x}"
|
|
180
|
+
for _ in range(3):
|
|
181
|
+
__benchmark(fn, capacity=10000000, error_rate=0.001, engine=x)
|
|
@@ -0,0 +1,323 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
from typing import List, Union
|
|
3
|
+
|
|
4
|
+
import dateutil.parser
|
|
5
|
+
import pendulum
|
|
6
|
+
|
|
7
|
+
_tz_utc = pendulum.timezone("utc")
|
|
8
|
+
_tz_local = pendulum.local_timezone()
|
|
9
|
+
|
|
10
|
+
_DATELIKE = Union[str, datetime.datetime, datetime.date, pendulum.DateTime, pendulum.Date]
|
|
11
|
+
_TZ_TYPE = Union[datetime.tzinfo, str]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def utcnow() -> datetime.datetime:
|
|
15
|
+
"""Current datetime in UTC timezone, naive format (without timezone info).
|
|
16
|
+
e.g. datetime.datetime(2022, 10, 8, 9, 52, 13, 489857)
|
|
17
|
+
"""
|
|
18
|
+
return datetime.datetime.utcnow()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def utcnow_aware() -> datetime.datetime:
|
|
22
|
+
"""Current datetime in UTC timezone, aware format (with timezone info).
|
|
23
|
+
e.g. datetime.datetime(2022, 10, 8, 9, 52, 13, 489857, tzinfo=tzutc())
|
|
24
|
+
"""
|
|
25
|
+
return datetime.datetime.utcnow().replace(tzinfo=_tz_utc)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def now() -> datetime.datetime:
|
|
29
|
+
"""Current datetime in local timezone, naive format (without timezone info).
|
|
30
|
+
e.g. datetime.datetime(2022, 10, 8, 17, 52, 13, 489857)
|
|
31
|
+
"""
|
|
32
|
+
return datetime.datetime.now()
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def now_aware() -> datetime.datetime:
|
|
36
|
+
"""Current datetime in local timezone, naive format (with timezone info).
|
|
37
|
+
e.g. datetime.datetime(2022, 10, 8, 17, 52, 13, 489857, tzinfo=tzlocal())
|
|
38
|
+
"""
|
|
39
|
+
return datetime.datetime.now(tz=_tz_local)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _ensure_datetime(dttm: _DATELIKE) -> datetime.datetime:
|
|
43
|
+
"""Convert a date-like value to a datetime.datetime object, leave the timezone info as-is
|
|
44
|
+
|
|
45
|
+
>>> _ensure_datetime('2022-09-10')
|
|
46
|
+
datetime.datetime(2022, 9, 10, 0, 0)
|
|
47
|
+
>>> _ensure_datetime('2022-09-10 08:00:00+00:00')
|
|
48
|
+
datetime.datetime(2022, 9, 10, 8, 0, tzinfo=tzutc())
|
|
49
|
+
>>> _ensure_datetime(datetime.datetime(2022, 9, 10))
|
|
50
|
+
datetime.datetime(2022, 9, 10, 0, 0)
|
|
51
|
+
>>> _ensure_datetime(pendulum.parse('2022-09-10 08:00:00+00:00'))
|
|
52
|
+
datetime.datetime(2022, 9, 10, 8, 0, tzinfo=Timezone('+00:00'))
|
|
53
|
+
"""
|
|
54
|
+
if isinstance(dttm, pendulum.DateTime):
|
|
55
|
+
return datetime.datetime.fromtimestamp(dttm.timestamp(), dttm.tz)
|
|
56
|
+
if isinstance(dttm, datetime.datetime):
|
|
57
|
+
return dttm
|
|
58
|
+
if isinstance(dttm, datetime.date):
|
|
59
|
+
return datetime.datetime.combine(dttm, datetime.time.min)
|
|
60
|
+
if isinstance(dttm, str):
|
|
61
|
+
return dateutil.parser.parse(dttm)
|
|
62
|
+
raise TypeError(f"unsupported type {type(dttm)}")
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def to_pendulum(dttm: _DATELIKE) -> pendulum.DateTime:
|
|
66
|
+
"""Convert a date-like value into pendulum.DateTime
|
|
67
|
+
|
|
68
|
+
>>> to_pendulum('2022-09-10')
|
|
69
|
+
DateTime(2022, 9, 10, 0, 0, 0, tzinfo=Timezone('UTC'))
|
|
70
|
+
>>> to_pendulum('2022-09-10 12:12:12')
|
|
71
|
+
DateTime(2022, 9, 10, 12, 12, 12, tzinfo=Timezone('UTC'))
|
|
72
|
+
>>> to_pendulum('2022-09-10 12:12:12+08:00')
|
|
73
|
+
DateTime(2022, 9, 10, 12, 12, 12, tzinfo=Timezone('+08:00'))
|
|
74
|
+
>>> to_pendulum(datetime.datetime(2022, 9, 10))
|
|
75
|
+
DateTime(2022, 9, 10, 0, 0, 0, tzinfo=Timezone('UTC'))
|
|
76
|
+
"""
|
|
77
|
+
if isinstance(dttm, str):
|
|
78
|
+
return pendulum.parse(dttm)
|
|
79
|
+
return pendulum.instance(dttm)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def as_local_datetime(dt: _DATELIKE) -> datetime.datetime:
|
|
83
|
+
"""Convert a date-like value into local timezone, ignore the original timezone
|
|
84
|
+
|
|
85
|
+
Note those tests only work well in timezon Asia/Shanghai
|
|
86
|
+
>>> as_local_datetime('2022-09-10')
|
|
87
|
+
datetime.datetime(2022, 9, 10, 0, 0, tzinfo=Timezone('Asia/Shanghai'))
|
|
88
|
+
>>> as_local_datetime('2022-09-10 12:12:12+08:00')
|
|
89
|
+
datetime.datetime(2022, 9, 10, 12, 12, 12, tzinfo=Timezone('Asia/Shanghai'))
|
|
90
|
+
>>> as_local_datetime(pendulum.parse('2022-09-10 08:00:00+00:00'))
|
|
91
|
+
datetime.datetime(2022, 9, 10, 8, 0, tzinfo=Timezone('Asia/Shanghai'))
|
|
92
|
+
"""
|
|
93
|
+
return _ensure_datetime(dt).replace(tzinfo=_tz_local)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _ensure_tz(tz_or_name: _TZ_TYPE) -> datetime.tzinfo:
|
|
97
|
+
if isinstance(tz_or_name, str):
|
|
98
|
+
return pendulum.timezone(tz_or_name)
|
|
99
|
+
return tz_or_name
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def convert_tz(dt: _DATELIKE, source: _TZ_TYPE, to: _TZ_TYPE) -> datetime.datetime:
|
|
103
|
+
"""Convert timezone.
|
|
104
|
+
|
|
105
|
+
>>> convert_tz('2022-09-10 08:00:00', 'Asia/Shanghai', 'UTC')
|
|
106
|
+
datetime.datetime(2022, 9, 10, 0, 0, tzinfo=Timezone('UTC'))
|
|
107
|
+
>>> convert_tz('2022-09-10 00:00:00', 'UTC', 'Asia/Shanghai')
|
|
108
|
+
datetime.datetime(2022, 9, 10, 8, 0, tzinfo=Timezone('Asia/Shanghai'))
|
|
109
|
+
>>> convert_tz('2022-09-10 00:00:00', 'Europe/Paris', 'Asia/Shanghai')
|
|
110
|
+
datetime.datetime(2022, 9, 10, 6, 0, tzinfo=Timezone('Asia/Shanghai'))
|
|
111
|
+
"""
|
|
112
|
+
return _ensure_datetime(dt).replace(tzinfo=_ensure_tz(source)).astimezone(_ensure_tz(to))
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def local_to_utc(dt: _DATELIKE) -> datetime.datetime:
|
|
116
|
+
"""Convert a datetime from local to utc
|
|
117
|
+
|
|
118
|
+
>>> local_to_utc('2022-09-10 08:00:00')
|
|
119
|
+
datetime.datetime(2022, 9, 10, 0, 0, tzinfo=Timezone('UTC'))
|
|
120
|
+
>>> local_to_utc('2022-09-10 08:00:00+08:00')
|
|
121
|
+
datetime.datetime(2022, 9, 10, 0, 0, tzinfo=Timezone('UTC'))
|
|
122
|
+
"""
|
|
123
|
+
return convert_tz(dt, source=_tz_local, to=_tz_utc)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def utc_to_local(dt: _DATELIKE) -> datetime.datetime:
|
|
127
|
+
"""Convert a datetime from utc to local
|
|
128
|
+
|
|
129
|
+
>>> utc_to_local('2022-09-10 08:00:00')
|
|
130
|
+
datetime.datetime(2022, 9, 10, 16, 0, tzinfo=Timezone('Asia/Shanghai'))
|
|
131
|
+
>>> utc_to_local('2022-09-10 08:00:00+00:00')
|
|
132
|
+
datetime.datetime(2022, 9, 10, 16, 0, tzinfo=Timezone('Asia/Shanghai'))
|
|
133
|
+
"""
|
|
134
|
+
return convert_tz(dt, source=_tz_utc, to=_tz_local)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def truncate_second(dttm: _DATELIKE) -> datetime.datetime:
|
|
138
|
+
return truncate(dttm, "second")
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def truncate_minute(dttm: _DATELIKE) -> datetime.datetime:
|
|
142
|
+
return truncate(dttm, "minute")
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def truncate_hour(dttm: _DATELIKE) -> datetime.datetime:
|
|
146
|
+
return truncate(dttm, "hour")
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
def truncate_day(dttm: _DATELIKE) -> datetime.datetime:
|
|
150
|
+
return truncate(dttm, "day")
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def truncate_week(dttm: _DATELIKE) -> datetime.datetime:
|
|
154
|
+
dttm = truncate(dttm, "day")
|
|
155
|
+
return dttm - datetime.timedelta(days=dttm.isoweekday() - 1)
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def truncate_month(dttm: _DATELIKE) -> datetime.datetime:
|
|
159
|
+
return truncate(dttm, "month")
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def truncate_half_month(dttm: _DATELIKE) -> datetime.datetime:
|
|
163
|
+
dttm = truncate_day(dttm)
|
|
164
|
+
if dttm.day >= 15:
|
|
165
|
+
return dttm.replace(day=15)
|
|
166
|
+
return dttm.replace(day=1)
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def truncate_quarter(dttm: _DATELIKE) -> datetime.datetime:
|
|
170
|
+
dttm = truncate(dttm, "month")
|
|
171
|
+
|
|
172
|
+
month = dttm.month
|
|
173
|
+
if 1 <= month <= 3:
|
|
174
|
+
return dttm.replace(month=1)
|
|
175
|
+
elif 4 <= month <= 6:
|
|
176
|
+
return dttm.replace(month=4)
|
|
177
|
+
elif 7 <= month <= 9:
|
|
178
|
+
return dttm.replace(month=7)
|
|
179
|
+
elif 10 <= month <= 12:
|
|
180
|
+
return dttm.replace(month=10)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def truncate_half_year(dttm: _DATELIKE) -> datetime.datetime:
|
|
184
|
+
dttm = truncate(dttm, "month")
|
|
185
|
+
if 1 <= dttm.month <= 6:
|
|
186
|
+
return dttm.replace(month=1)
|
|
187
|
+
return dttm.replace(month=7)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def truncate_year(dttm: _DATELIKE) -> datetime.datetime:
|
|
191
|
+
return truncate(dttm, "year")
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
_PERIODS = {
|
|
195
|
+
"second": dict(microsecond=0),
|
|
196
|
+
"minute": dict(microsecond=0, second=0),
|
|
197
|
+
"hour": dict(microsecond=0, second=0, minute=0),
|
|
198
|
+
"day": dict(
|
|
199
|
+
microsecond=0,
|
|
200
|
+
second=0,
|
|
201
|
+
minute=0,
|
|
202
|
+
hour=0,
|
|
203
|
+
),
|
|
204
|
+
"month": dict(microsecond=0, second=0, minute=0, hour=0, day=1),
|
|
205
|
+
"year": dict(microsecond=0, second=0, minute=0, hour=0, day=1, month=1),
|
|
206
|
+
}
|
|
207
|
+
_ODD_PERIODS = {"week": truncate_week, "quarter": truncate_quarter, "half_year": truncate_half_year}
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def truncate(dttm: _DATELIKE, truncate_to="day") -> datetime.datetime:
|
|
211
|
+
dttm = _ensure_datetime(dttm)
|
|
212
|
+
if truncate_to in _PERIODS:
|
|
213
|
+
return dttm.replace(**_PERIODS[truncate_to])
|
|
214
|
+
|
|
215
|
+
if truncate_to not in _ODD_PERIODS:
|
|
216
|
+
raise ValueError(
|
|
217
|
+
"truncate_to not valid. Valid periods: {}".format(
|
|
218
|
+
", ".join(list(_PERIODS.keys()) + list(_ODD_PERIODS.keys()))
|
|
219
|
+
)
|
|
220
|
+
)
|
|
221
|
+
return _ODD_PERIODS[truncate_to](dttm)
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def date_add(dttm: _DATELIKE, days: int) -> datetime.datetime:
|
|
225
|
+
"""
|
|
226
|
+
>>> dttm = datetime.datetime(2022, 10, 8)
|
|
227
|
+
>>> date_add(dttm, 6)
|
|
228
|
+
datetime.datetime(2022, 10, 14, 0, 0)
|
|
229
|
+
>>> date_add('2022-10-08', 6)
|
|
230
|
+
datetime.datetime(2022, 10, 14, 0, 0)
|
|
231
|
+
"""
|
|
232
|
+
return _ensure_datetime(dttm) + datetime.timedelta(days=days)
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def month_start(dttm: _DATELIKE) -> datetime.datetime:
|
|
236
|
+
"""
|
|
237
|
+
>>> dttm = datetime.datetime(2022, 10, 8)
|
|
238
|
+
>>> month_start(dttm)
|
|
239
|
+
datetime.datetime(2022, 10, 1, 0, 0)
|
|
240
|
+
"""
|
|
241
|
+
return truncate_month(dttm)
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def month_end(dttm: _DATELIKE) -> datetime.datetime:
|
|
245
|
+
"""
|
|
246
|
+
>>> dttm = datetime.datetime(2022, 10, 8)
|
|
247
|
+
>>> month_end(dttm)
|
|
248
|
+
datetime.datetime(2022, 10, 31, 0, 0)
|
|
249
|
+
"""
|
|
250
|
+
dt = to_pendulum(dttm).last_of("month")
|
|
251
|
+
return datetime.datetime(dt.year, dt.month, dt.day)
|
|
252
|
+
|
|
253
|
+
|
|
254
|
+
def _get_last_month(dttm: _DATELIKE) -> datetime.datetime:
|
|
255
|
+
return month_start(dttm) - datetime.timedelta(days=1)
|
|
256
|
+
|
|
257
|
+
|
|
258
|
+
def last_month_start(dttm: _DATELIKE) -> datetime.datetime:
|
|
259
|
+
"""
|
|
260
|
+
>>> dttm = datetime.datetime(2022, 10, 8)
|
|
261
|
+
>>> last_month_start(dttm)
|
|
262
|
+
datetime.datetime(2022, 9, 1, 0, 0)
|
|
263
|
+
"""
|
|
264
|
+
return month_start(_get_last_month(dttm))
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def last_month_end(dttm: _DATELIKE) -> datetime.datetime:
|
|
268
|
+
"""
|
|
269
|
+
>>> dttm = datetime.datetime(2022, 10, 8)
|
|
270
|
+
>>> last_month_end(dttm)
|
|
271
|
+
datetime.datetime(2022, 9, 30, 0, 0)
|
|
272
|
+
"""
|
|
273
|
+
return month_start(dttm) - datetime.timedelta(days=1)
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def _get_last_week(dttm: _DATELIKE) -> datetime.datetime:
|
|
277
|
+
return truncate_week(dttm) - datetime.timedelta(days=7)
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def last_week_start(dttm: _DATELIKE) -> datetime.datetime:
|
|
281
|
+
"""
|
|
282
|
+
>>> dttm = datetime.datetime(2022, 10, 8)
|
|
283
|
+
>>> last_week_start(dttm)
|
|
284
|
+
datetime.datetime(2022, 9, 26, 0, 0)
|
|
285
|
+
"""
|
|
286
|
+
return truncate_week(dttm) - datetime.timedelta(days=7)
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def last_week_end(dttm: _DATELIKE) -> datetime.datetime:
|
|
290
|
+
"""
|
|
291
|
+
>>> dttm = datetime.datetime(2022, 10, 8)
|
|
292
|
+
>>> last_week_end(dttm)
|
|
293
|
+
datetime.datetime(2022, 10, 2, 0, 0)
|
|
294
|
+
"""
|
|
295
|
+
return truncate_week(dttm) - datetime.timedelta(days=1)
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def month_range(start_date: _DATELIKE, end_date: _DATELIKE) -> List[str]:
|
|
299
|
+
"""Get the first day of all months between start_date and end_date
|
|
300
|
+
|
|
301
|
+
>>> month_range('2022-01-02', '2022-05-20')
|
|
302
|
+
['2022-01-01', '2022-02-01', '2022-03-01', '2022-04-01', '2022-05-01']
|
|
303
|
+
"""
|
|
304
|
+
start_date = to_pendulum(start_date).replace(day=1)
|
|
305
|
+
end_date = to_pendulum(end_date).replace(day=1)
|
|
306
|
+
return [x.date().isoformat() for x in pendulum.period(start_date, end_date).range("months")]
|
|
307
|
+
|
|
308
|
+
|
|
309
|
+
def day_range(start_date: _DATELIKE, end_date: _DATELIKE) -> List[str]:
|
|
310
|
+
"""Get all dates between start_date and end_date
|
|
311
|
+
|
|
312
|
+
>>> day_range('2022-01-02', '2022-01-07')
|
|
313
|
+
['2022-01-02', '2022-01-03', '2022-01-04', '2022-01-05', '2022-01-06', '2022-01-07']
|
|
314
|
+
"""
|
|
315
|
+
start_date = to_pendulum(start_date)
|
|
316
|
+
end_date = to_pendulum(end_date)
|
|
317
|
+
return [x.date().isoformat() for x in pendulum.period(start_date, end_date).range("days")]
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
if __name__ == "__main__":
|
|
321
|
+
import doctest
|
|
322
|
+
|
|
323
|
+
doctest.testmod()
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
_escape_table = [chr(x) for x in range(128)]
|
|
2
|
+
_escape_table[0] = "\\0"
|
|
3
|
+
|
|
4
|
+
# hive delimiters
|
|
5
|
+
_escape_table[1] = "\\1"
|
|
6
|
+
# _escape_table[2] = '\\2'
|
|
7
|
+
# _escape_table[3] = '\\3'
|
|
8
|
+
|
|
9
|
+
# _escape_table[ord('\\')] = '\\\\'
|
|
10
|
+
_escape_table[ord("\n")] = "\\n"
|
|
11
|
+
_escape_table[ord("\r")] = "\\r"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def escape_string(v):
|
|
15
|
+
return v.translate(_escape_table)
|